1864 files changed, 78771 insertions, 73905 deletions
diff --git a/gcc/config/aarch64/aarch64-abi-ms.h b/gcc/config/aarch64/aarch64-abi-ms.h
index 15dc33d..bc9ada0 100644
--- a/gcc/config/aarch64/aarch64-abi-ms.h
+++ b/gcc/config/aarch64/aarch64-abi-ms.h
@@ -1,5 +1,5 @@
 /* Machine description for AArch64 MS ABI.
-   Copyright (C) 2024 Free Software Foundation, Inc.
+   Copyright (C) 2024-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/aarch64/aarch64-arches.def b/gcc/config/aarch64/aarch64-arches.def
index 4634b27..bf56fe9 100644
--- a/gcc/config/aarch64/aarch64-arches.def
+++ b/gcc/config/aarch64/aarch64-arches.def
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2011-2025 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
@@ -33,11 +33,11 @@
 AARCH64_ARCH("armv8-a",       generic_armv8_a,   V8A,       8,  (SIMD))
 AARCH64_ARCH("armv8.1-a",     generic_armv8_a,   V8_1A,     8,  (V8A, LSE, CRC, RDMA))
 AARCH64_ARCH("armv8.2-a",     generic_armv8_a,   V8_2A,     8,  (V8_1A))
-AARCH64_ARCH("armv8.3-a",     generic_armv8_a,   V8_3A,     8,  (V8_2A, PAUTH, RCPC))
-AARCH64_ARCH("armv8.4-a",     generic_armv8_a,   V8_4A,     8,  (V8_3A, F16FML, DOTPROD, FLAGM))
-AARCH64_ARCH("armv8.5-a",     generic_armv8_a,   V8_5A,     8,  (V8_4A, SB, SSBS, PREDRES))
+AARCH64_ARCH("armv8.3-a",     generic_armv8_a,   V8_3A,     8,  (V8_2A, PAUTH, RCPC, FCMA, JSCVT))
+AARCH64_ARCH("armv8.4-a",     generic_armv8_a,   V8_4A,     8,  (V8_3A, F16FML, DOTPROD, FLAGM, RCPC2))
+AARCH64_ARCH("armv8.5-a",     generic_armv8_a,   V8_5A,     8,  (V8_4A, SB, SSBS, PREDRES, FRINTTS, FLAGM2))
 AARCH64_ARCH("armv8.6-a",     generic_armv8_a,   V8_6A,     8,  (V8_5A, I8MM, BF16))
-AARCH64_ARCH("armv8.7-a",     generic_armv8_a,   V8_7A,     8,  (V8_6A))
+AARCH64_ARCH("armv8.7-a",     generic_armv8_a,   V8_7A,     8,  (V8_6A, WFXT, XS))
 AARCH64_ARCH("armv8.8-a",     generic_armv8_a,   V8_8A,     8,  (V8_7A, MOPS))
 AARCH64_ARCH("armv8.9-a",     generic_armv8_a,   V8_9A,     8,  (V8_8A, CSSC))
 AARCH64_ARCH("armv8-r",       generic_armv8_a,   V8R  ,     8,  (V8_4A))
@@ -45,6 +45,7 @@ AARCH64_ARCH("armv9-a",       generic_armv9_a,   V9A  ,     9,  (V8_5A, SVE2))
 AARCH64_ARCH("armv9.1-a",     generic_armv9_a,   V9_1A,     9,  (V8_6A, V9A))
 AARCH64_ARCH("armv9.2-a",     generic_armv9_a,   V9_2A,     9,  (V8_7A, V9_1A))
 AARCH64_ARCH("armv9.3-a",     generic_armv9_a,   V9_3A,     9,  (V8_8A, V9_2A))
-AARCH64_ARCH("armv9.4-a",     generic_armv9_a,   V9_4A,     9,  (V8_9A, V9_3A))
+AARCH64_ARCH("armv9.4-a",     generic_armv9_a,   V9_4A,     9,  (V8_9A, V9_3A, SVE2p1))
+AARCH64_ARCH("armv9.5-a",     generic_armv9_a,   V9_5A,     9,  (V9_4A, CPA, FAMINMAX, LUT))
 
 #undef AARCH64_ARCH
diff --git a/gcc/config/aarch64/aarch64-builtins.cc b/gcc/config/aarch64/aarch64-builtins.cc
index eb878b9..93f939a 100644
--- a/gcc/config/aarch64/aarch64-builtins.cc
+++ b/gcc/config/aarch64/aarch64-builtins.cc
@@ -1,5 +1,5 @@
 /* Builtins' description for AArch64 SIMD architecture.
-   Copyright (C) 2011-2024 Free Software Foundation, Inc.
+   Copyright (C) 2011-2025 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
@@ -43,6 +43,7 @@
 #include "langhooks.h"
 #include "gimple-iterator.h"
 #include "case-cfn-macros.h"
+#include "regs.h"
 #include "emit-rtl.h"
 #include "stringpool.h"
 #include "attribs.h"
@@ -50,6 +51,8 @@
 #include "builtins.h"
 #include "aarch64-builtins.h"
 
+using namespace aarch64;
+
 #define v8qi_UP  E_V8QImode
 #define v8di_UP  E_V8DImode
 #define v4hi_UP  E_V4HImode
@@ -133,6 +136,7 @@
 #define MODE_d_f16 E_V4HFmode
 #define MODE_d_f32 E_V2SFmode
 #define MODE_d_f64 E_V1DFmode
+#define MODE_d_mf8 E_V8QImode
 #define MODE_d_s8 E_V8QImode
 #define MODE_d_s16 E_V4HImode
 #define MODE_d_s32 E_V2SImode
@@ -148,6 +152,7 @@
 #define MODE_q_f16 E_V8HFmode
 #define MODE_q_f32 E_V4SFmode
 #define MODE_q_f64 E_V2DFmode
+#define MODE_q_mf8 E_V16QImode
 #define MODE_q_s8 E_V16QImode
 #define MODE_q_s16 E_V8HImode
 #define MODE_q_s32 E_V4SImode
@@ -177,6 +182,7 @@
 #define QUAL_p16 qualifier_poly
 #define QUAL_p64 qualifier_poly
 #define QUAL_p128 qualifier_poly
+#define QUAL_mf8 qualifier_modal_float
 
 #define LENGTH_d ""
 #define LENGTH_q "q"
@@ -188,22 +194,25 @@
 #define SIMD_MAX_BUILTIN_ARGS 5
 
 /* Flags that describe what a function might do.  */
-const unsigned int FLAG_NONE = 0U;
 const unsigned int FLAG_READ_FPCR = 1U << 0;
 const unsigned int FLAG_RAISE_FP_EXCEPTIONS = 1U << 1;
 const unsigned int FLAG_READ_MEMORY = 1U << 2;
 const unsigned int FLAG_PREFETCH_MEMORY = 1U << 3;
 const unsigned int FLAG_WRITE_MEMORY = 1U << 4;
+const unsigned int FLAG_USES_FPMR = 1U << 5;
 
-/* Not all FP intrinsics raise FP exceptions or read FPCR register,
-   use this flag to suppress it.  */
-const unsigned int FLAG_AUTO_FP = 1U << 5;
+/* Indicates that READ_FPCR and RAISE_FP_EXCEPTIONS should be set for
+   floating-point modes but not for integer modes.  */
+const unsigned int FLAG_AUTO_FP = 1U << 6;
 
+const unsigned int FLAG_QUIET = 0;
+const unsigned int FLAG_DEFAULT = FLAG_AUTO_FP;
 const unsigned int FLAG_FP = FLAG_READ_FPCR | FLAG_RAISE_FP_EXCEPTIONS;
 const unsigned int FLAG_ALL = FLAG_READ_FPCR | FLAG_RAISE_FP_EXCEPTIONS
   | FLAG_READ_MEMORY | FLAG_PREFETCH_MEMORY | FLAG_WRITE_MEMORY;
-const unsigned int FLAG_STORE = FLAG_WRITE_MEMORY | FLAG_AUTO_FP;
-const unsigned int FLAG_LOAD = FLAG_READ_MEMORY | FLAG_AUTO_FP;
+const unsigned int FLAG_STORE = FLAG_WRITE_MEMORY;
+const unsigned int FLAG_LOAD = FLAG_READ_MEMORY;
+const unsigned int FLAG_FP8 = FLAG_FP | FLAG_USES_FPMR;
 
 typedef struct
 {
@@ -458,6 +467,19 @@ aarch64_types_storestruct_lane_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
       qualifier_poly, qualifier_struct_load_store_lane_index };
 #define TYPES_STORESTRUCT_LANE_P (aarch64_types_storestruct_lane_p_qualifiers)
 
+constexpr insn_code CODE_FOR_aarch64_sdot_prodv8qi
+  = CODE_FOR_sdot_prodv2siv8qi;
+constexpr insn_code CODE_FOR_aarch64_udot_prodv8qi
+  = CODE_FOR_udot_prodv2siv8qi;
+constexpr insn_code CODE_FOR_aarch64_usdot_prodv8qi
+  = CODE_FOR_usdot_prodv2siv8qi;
+constexpr insn_code CODE_FOR_aarch64_sdot_prodv16qi
+  = CODE_FOR_sdot_prodv4siv16qi;
+constexpr insn_code CODE_FOR_aarch64_udot_prodv16qi
+  = CODE_FOR_udot_prodv4siv16qi;
+constexpr insn_code CODE_FOR_aarch64_usdot_prodv16qi
+  = CODE_FOR_usdot_prodv4siv16qi;
+
 #define CF0(N, X) CODE_FOR_aarch64_##N##X
 #define CF1(N, X) CODE_FOR_##N##X##1
 #define CF2(N, X) CODE_FOR_##N##X##2
@@ -585,6 +607,7 @@ static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = {
 /* vreinterpret intrinsics are defined for any pair of element types.
    {     _bf16           }   {     _bf16           }
    {      _f16 _f32 _f64 }   {      _f16 _f32 _f64 }
+   { _mf8                }   { _mf8                }
    { _s8  _s16 _s32 _s64 } x { _s8  _s16 _s32 _s64 }
    { _u8  _u16 _u32 _u64 }   { _u8  _u16 _u32 _u64 }
    { _p8  _p16      _p64 }   { _p8  _p16      _p64 }.  */
@@ -596,6 +619,7 @@ static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = {
   VREINTERPRET_BUILTIN2 (A, f16) \
   VREINTERPRET_BUILTIN2 (A, f32) \
   VREINTERPRET_BUILTIN2 (A, f64) \
+  VREINTERPRET_BUILTIN2 (A, mf8) \
   VREINTERPRET_BUILTIN2 (A, s8) \
   VREINTERPRET_BUILTIN2 (A, s16) \
   VREINTERPRET_BUILTIN2 (A, s32) \
@@ -613,6 +637,7 @@ static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = {
   VREINTERPRET_BUILTINS1 (f16) \
   VREINTERPRET_BUILTINS1 (f32) \
   VREINTERPRET_BUILTINS1 (f64) \
+  VREINTERPRET_BUILTINS1 (mf8) \
   VREINTERPRET_BUILTINS1 (s8) \
   VREINTERPRET_BUILTINS1 (s16) \
   VREINTERPRET_BUILTINS1 (s32) \
@@ -628,6 +653,7 @@ static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = {
 /* vreinterpretq intrinsics are additionally defined for p128.
    {     _bf16                 }   {     _bf16                 }
    {      _f16 _f32 _f64       }   {      _f16 _f32 _f64       }
+   { _mf8                      }   { _mf8                      }
    { _s8  _s16 _s32 _s64       } x { _s8  _s16 _s32 _s64       }
    { _u8  _u16 _u32 _u64       }   { _u8  _u16 _u32 _u64       }
    { _p8  _p16      _p64 _p128 }   { _p8  _p16      _p64 _p128 }.  */
@@ -639,6 +665,7 @@ static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = {
   VREINTERPRETQ_BUILTIN2 (A, f16) \
   VREINTERPRETQ_BUILTIN2 (A, f32) \
   VREINTERPRETQ_BUILTIN2 (A, f64) \
+  VREINTERPRETQ_BUILTIN2 (A, mf8) \
   VREINTERPRETQ_BUILTIN2 (A, s8) \
   VREINTERPRETQ_BUILTIN2 (A, s16) \
   VREINTERPRETQ_BUILTIN2 (A, s32) \
@@ -657,6 +684,7 @@ static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = {
   VREINTERPRETQ_BUILTINS1 (f16) \
   VREINTERPRETQ_BUILTINS1 (f32) \
   VREINTERPRETQ_BUILTINS1 (f64) \
+  VREINTERPRETQ_BUILTINS1 (mf8) \
   VREINTERPRETQ_BUILTINS1 (s8) \
   VREINTERPRETQ_BUILTINS1 (s16) \
   VREINTERPRETQ_BUILTINS1 (s32) \
@@ -675,6 +703,7 @@ static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = {
   VREINTERPRETQ_BUILTINS
 
 #define AARCH64_SIMD_VGET_LOW_BUILTINS \
+  VGET_LOW_BUILTIN(mf8) \
   VGET_LOW_BUILTIN(f16) \
   VGET_LOW_BUILTIN(f32) \
   VGET_LOW_BUILTIN(f64) \
@@ -692,6 +721,7 @@ static aarch64_simd_builtin_datum aarch64_simd_builtin_data[] = {
   VGET_LOW_BUILTIN(bf16)
 
 #define AARCH64_SIMD_VGET_HIGH_BUILTINS \
+  VGET_HIGH_BUILTIN(mf8) \
   VGET_HIGH_BUILTIN(f16) \
   VGET_HIGH_BUILTIN(f32) \
   VGET_HIGH_BUILTIN(f64) \
@@ -757,6 +787,10 @@ typedef struct
 #define VAR1(T, N, MAP, FLAG, A) \
   AARCH64_SIMD_BUILTIN_##T##_##N##A,
 
+#undef ENTRY
+#define ENTRY(N, S, T0, T1, T2, T3, U, F)	\
+  AARCH64_##N,
+
 enum aarch64_builtins
 {
   AARCH64_BUILTIN_MIN,
@@ -829,6 +863,10 @@ enum aarch64_builtins
   AARCH64_RBIT,
   AARCH64_RBITL,
   AARCH64_RBITLL,
+  /* Pragma builtins.  */
+  AARCH64_PRAGMA_BUILTIN_START,
+#include "aarch64-simd-pragma-builtins.def"
+  AARCH64_PRAGMA_BUILTIN_END,
   /* System register builtins.  */
   AARCH64_RSR,
   AARCH64_RSRP,
@@ -846,6 +884,11 @@ enum aarch64_builtins
   AARCH64_PLDX,
   AARCH64_PLI,
   AARCH64_PLIX,
+  /* Armv8.9-A / Armv9.4-A builtins.  */
+  AARCH64_BUILTIN_CHKFEAT,
+  AARCH64_BUILTIN_GCSPR,
+  AARCH64_BUILTIN_GCSPOPM,
+  AARCH64_BUILTIN_GCSSS,
   AARCH64_BUILTIN_MAX
 };
 
@@ -876,7 +919,7 @@ static aarch64_fcmla_laneq_builtin_datum aarch64_fcmla_lane_builtin_data[] = {
    2, \
    { SIMD_INTR_MODE(A, L), SIMD_INTR_MODE(B, L) }, \
    { SIMD_INTR_QUAL(A), SIMD_INTR_QUAL(B) }, \
-   FLAG_AUTO_FP, \
+   FLAG_DEFAULT, \
    SIMD_INTR_MODE(A, L) == SIMD_INTR_MODE(B, L) \
      && SIMD_INTR_QUAL(A) == SIMD_INTR_QUAL(B) \
   },
@@ -888,7 +931,7 @@ static aarch64_fcmla_laneq_builtin_datum aarch64_fcmla_lane_builtin_data[] = {
    2, \
    { SIMD_INTR_MODE(A, d), SIMD_INTR_MODE(A, q) }, \
    { SIMD_INTR_QUAL(A), SIMD_INTR_QUAL(A) }, \
-   FLAG_AUTO_FP, \
+   FLAG_DEFAULT, \
    false \
   },
 
@@ -899,7 +942,7 @@ static aarch64_fcmla_laneq_builtin_datum aarch64_fcmla_lane_builtin_data[] = {
    2, \
    { SIMD_INTR_MODE(A, d), SIMD_INTR_MODE(A, q) }, \
    { SIMD_INTR_QUAL(A), SIMD_INTR_QUAL(A) }, \
-   FLAG_AUTO_FP, \
+   FLAG_DEFAULT, \
    false \
   },
 
@@ -945,15 +988,20 @@ const char *aarch64_scalar_builtin_types[] = {
   NULL
 };
 
-extern GTY(()) aarch64_simd_type_info aarch64_simd_types[];
+extern const aarch64_simd_type_info aarch64_simd_types[];
+extern GTY(()) aarch64_simd_type_info_trees aarch64_simd_types_trees[];
 
+#undef ENTRY
 #define ENTRY(E, M, Q, G)  \
-  {E, "__" #E, #G "__" #E, NULL_TREE, NULL_TREE, E_##M##mode, qualifier_##Q},
-struct aarch64_simd_type_info aarch64_simd_types [] = {
+  {E, "__" #E, #G "__" #E, E_##M##mode, qualifier_##Q},
+const struct aarch64_simd_type_info aarch64_simd_types[] = {
 #include "aarch64-simd-builtin-types.def"
 };
 #undef ENTRY
 
+struct aarch64_simd_type_info_trees
+aarch64_simd_types_trees[ARRAY_SIZE (aarch64_simd_types)];
+
 static machine_mode aarch64_simd_tuple_modes[ARM_NEON_H_TYPES_LAST][3];
 static GTY(()) tree aarch64_simd_tuple_types[ARM_NEON_H_TYPES_LAST][3];
 
@@ -961,6 +1009,11 @@ static GTY(()) tree aarch64_simd_intOI_type_node = NULL_TREE;
 static GTY(()) tree aarch64_simd_intCI_type_node = NULL_TREE;
 static GTY(()) tree aarch64_simd_intXI_type_node = NULL_TREE;
 
+/* The user-visible __mfp8 type, and a pointer to that type.  Used
+   across the back-end.  */
+tree aarch64_mfp8_type_node = NULL_TREE;
+tree aarch64_mfp8_ptr_type_node = NULL_TREE;
+
 /* The user-visible __fp16 type, and a pointer to that type.  Used
    across the back-end.  */
 tree aarch64_fp16_type_node = NULL_TREE;
@@ -1046,6 +1099,8 @@ aarch64_int_or_fp_type (machine_mode mode,
   switch (mode)
     {
     case E_QImode:
+      if (qualifiers & qualifier_modal_float)
+	return aarch64_mfp8_type_node;
       return QUAL_TYPE (QI);
     case E_HImode:
       return QUAL_TYPE (HI);
@@ -1082,13 +1137,14 @@ aarch64_lookup_simd_type_in_table (machine_mode mode,
 {
   int i;
   int nelts = ARRAY_SIZE (aarch64_simd_types);
-  int q = qualifiers & (qualifier_poly | qualifier_unsigned);
+  int q = qualifiers
+    & (qualifier_poly | qualifier_unsigned | qualifier_modal_float);
 
   for (i = 0; i < nelts; i++)
     {
       if (aarch64_simd_types[i].mode == mode
 	  && aarch64_simd_types[i].q == q)
-	return aarch64_simd_types[i].itype;
+	return aarch64_simd_types_trees[i].itype;
       if (aarch64_simd_tuple_types[i][0] != NULL_TREE)
 	for (int j = 0; j < 3; j++)
 	  if (aarch64_simd_tuple_modes[i][j] == mode
@@ -1126,7 +1182,7 @@ aarch64_simd_builtin_type (machine_mode mode,
 
   return type;
 }
- 
+
 static void
 aarch64_init_simd_builtin_types (void)
 {
@@ -1135,62 +1191,76 @@ aarch64_init_simd_builtin_types (void)
   tree tdecl;
 
   /* Init all the element types built by the front-end.  */
-  aarch64_simd_types[Int8x8_t].eltype = intQI_type_node;
-  aarch64_simd_types[Int8x16_t].eltype = intQI_type_node;
-  aarch64_simd_types[Int16x4_t].eltype = intHI_type_node;
-  aarch64_simd_types[Int16x8_t].eltype = intHI_type_node;
-  aarch64_simd_types[Int32x2_t].eltype = intSI_type_node;
-  aarch64_simd_types[Int32x4_t].eltype = intSI_type_node;
-  aarch64_simd_types[Int64x1_t].eltype = intDI_type_node;
-  aarch64_simd_types[Int64x2_t].eltype = intDI_type_node;
-  aarch64_simd_types[Uint8x8_t].eltype = unsigned_intQI_type_node;
-  aarch64_simd_types[Uint8x16_t].eltype = unsigned_intQI_type_node;
-  aarch64_simd_types[Uint16x4_t].eltype = unsigned_intHI_type_node;
-  aarch64_simd_types[Uint16x8_t].eltype = unsigned_intHI_type_node;
-  aarch64_simd_types[Uint32x2_t].eltype = unsigned_intSI_type_node;
-  aarch64_simd_types[Uint32x4_t].eltype = unsigned_intSI_type_node;
-  aarch64_simd_types[Uint64x1_t].eltype = unsigned_intDI_type_node;
-  aarch64_simd_types[Uint64x2_t].eltype = unsigned_intDI_type_node;
+  aarch64_simd_types_trees[Int8x8_t].eltype = intQI_type_node;
+  aarch64_simd_types_trees[Int8x16_t].eltype = intQI_type_node;
+  aarch64_simd_types_trees[Int16x4_t].eltype = intHI_type_node;
+  aarch64_simd_types_trees[Int16x8_t].eltype = intHI_type_node;
+  aarch64_simd_types_trees[Int32x2_t].eltype = intSI_type_node;
+  aarch64_simd_types_trees[Int32x4_t].eltype = intSI_type_node;
+  aarch64_simd_types_trees[Int64x1_t].eltype = intDI_type_node;
+  aarch64_simd_types_trees[Int64x2_t].eltype = intDI_type_node;
+  aarch64_simd_types_trees[Uint8x8_t].eltype = unsigned_intQI_type_node;
+  aarch64_simd_types_trees[Uint8x16_t].eltype = unsigned_intQI_type_node;
+  aarch64_simd_types_trees[Uint16x4_t].eltype = unsigned_intHI_type_node;
+  aarch64_simd_types_trees[Uint16x8_t].eltype = unsigned_intHI_type_node;
+  aarch64_simd_types_trees[Uint32x2_t].eltype = unsigned_intSI_type_node;
+  aarch64_simd_types_trees[Uint32x4_t].eltype = unsigned_intSI_type_node;
+  aarch64_simd_types_trees[Uint64x1_t].eltype = unsigned_intDI_type_node;
+  aarch64_simd_types_trees[Uint64x2_t].eltype = unsigned_intDI_type_node;
 
   /* Poly types are a world of their own.  */
-  aarch64_simd_types[Poly8_t].eltype = aarch64_simd_types[Poly8_t].itype =
-    build_distinct_type_copy (unsigned_intQI_type_node);
+  aarch64_simd_types_trees[Poly8_t].eltype
+    = aarch64_simd_types_trees[Poly8_t].itype
+    = build_distinct_type_copy (unsigned_intQI_type_node);
   /* Prevent front-ends from transforming Poly8_t arrays into string
      literals.  */
-  TYPE_STRING_FLAG (aarch64_simd_types[Poly8_t].eltype) = false;
-
-  aarch64_simd_types[Poly16_t].eltype = aarch64_simd_types[Poly16_t].itype =
-    build_distinct_type_copy (unsigned_intHI_type_node);
-  aarch64_simd_types[Poly64_t].eltype = aarch64_simd_types[Poly64_t].itype =
-    build_distinct_type_copy (unsigned_intDI_type_node);
-  aarch64_simd_types[Poly128_t].eltype = aarch64_simd_types[Poly128_t].itype =
-    build_distinct_type_copy (unsigned_intTI_type_node);
+  TYPE_STRING_FLAG (aarch64_simd_types_trees[Poly8_t].eltype) = false;
+
+  aarch64_simd_types_trees[Poly16_t].eltype
+    = aarch64_simd_types_trees[Poly16_t].itype
+    = build_distinct_type_copy (unsigned_intHI_type_node);
+  aarch64_simd_types_trees[Poly64_t].eltype
+    = aarch64_simd_types_trees[Poly64_t].itype
+    = build_distinct_type_copy (unsigned_intDI_type_node);
+  aarch64_simd_types_trees[Poly128_t].eltype
+    = aarch64_simd_types_trees[Poly128_t].itype
+    = build_distinct_type_copy (unsigned_intTI_type_node);
   /* Init poly vector element types with scalar poly types.  */
-  aarch64_simd_types[Poly8x8_t].eltype = aarch64_simd_types[Poly8_t].itype;
-  aarch64_simd_types[Poly8x16_t].eltype = aarch64_simd_types[Poly8_t].itype;
-  aarch64_simd_types[Poly16x4_t].eltype = aarch64_simd_types[Poly16_t].itype;
-  aarch64_simd_types[Poly16x8_t].eltype = aarch64_simd_types[Poly16_t].itype;
-  aarch64_simd_types[Poly64x1_t].eltype = aarch64_simd_types[Poly64_t].itype;
-  aarch64_simd_types[Poly64x2_t].eltype = aarch64_simd_types[Poly64_t].itype;
+  aarch64_simd_types_trees[Poly8x8_t].eltype
+    = aarch64_simd_types_trees[Poly8_t].itype;
+  aarch64_simd_types_trees[Poly8x16_t].eltype
+    = aarch64_simd_types_trees[Poly8_t].itype;
+  aarch64_simd_types_trees[Poly16x4_t].eltype
+    = aarch64_simd_types_trees[Poly16_t].itype;
+  aarch64_simd_types_trees[Poly16x8_t].eltype
+    = aarch64_simd_types_trees[Poly16_t].itype;
+  aarch64_simd_types_trees[Poly64x1_t].eltype
+    = aarch64_simd_types_trees[Poly64_t].itype;
+  aarch64_simd_types_trees[Poly64x2_t].eltype
+    = aarch64_simd_types_trees[Poly64_t].itype;
 
   /* Continue with standard types.  */
-  aarch64_simd_types[Float16x4_t].eltype = aarch64_fp16_type_node;
-  aarch64_simd_types[Float16x8_t].eltype = aarch64_fp16_type_node;
-  aarch64_simd_types[Float32x2_t].eltype = float_type_node;
-  aarch64_simd_types[Float32x4_t].eltype = float_type_node;
-  aarch64_simd_types[Float64x1_t].eltype = double_type_node;
-  aarch64_simd_types[Float64x2_t].eltype = double_type_node;
+  aarch64_simd_types_trees[Float16x4_t].eltype = aarch64_fp16_type_node;
+  aarch64_simd_types_trees[Float16x8_t].eltype = aarch64_fp16_type_node;
+  aarch64_simd_types_trees[Float32x2_t].eltype = float_type_node;
+  aarch64_simd_types_trees[Float32x4_t].eltype = float_type_node;
+  aarch64_simd_types_trees[Float64x1_t].eltype = double_type_node;
+  aarch64_simd_types_trees[Float64x2_t].eltype = double_type_node;
 
   /* Init Bfloat vector types with underlying __bf16 type.  */
-  aarch64_simd_types[Bfloat16x4_t].eltype = bfloat16_type_node;
-  aarch64_simd_types[Bfloat16x8_t].eltype = bfloat16_type_node;
+  aarch64_simd_types_trees[Bfloat16x4_t].eltype = bfloat16_type_node;
+  aarch64_simd_types_trees[Bfloat16x8_t].eltype = bfloat16_type_node;
+
+  /* Init FP8 element types.  */
+  aarch64_simd_types_trees[Mfloat8x8_t].eltype = aarch64_mfp8_type_node;
+  aarch64_simd_types_trees[Mfloat8x16_t].eltype = aarch64_mfp8_type_node;
 
   for (i = 0; i < nelts; i++)
     {
-      tree eltype = aarch64_simd_types[i].eltype;
+      tree eltype = aarch64_simd_types_trees[i].eltype;
       machine_mode mode = aarch64_simd_types[i].mode;
 
-      if (aarch64_simd_types[i].itype == NULL)
+      if (aarch64_simd_types_trees[i].itype == NULL)
 	{
 	  tree type = build_vector_type (eltype, GET_MODE_NUNITS (mode));
 	  type = build_distinct_type_copy (type);
@@ -1201,12 +1271,12 @@ aarch64_init_simd_builtin_types (void)
 	  TYPE_ATTRIBUTES (type)
 	    = tree_cons (get_identifier ("Advanced SIMD type"), value,
 			 TYPE_ATTRIBUTES (type));
-	  aarch64_simd_types[i].itype = type;
+	  aarch64_simd_types_trees[i].itype = type;
 	}
 
       tdecl = add_builtin_type (aarch64_simd_types[i].name,
-				aarch64_simd_types[i].itype);
-      TYPE_NAME (aarch64_simd_types[i].itype) = tdecl;
+				aarch64_simd_types_trees[i].itype);
+      TYPE_NAME (aarch64_simd_types_trees[i].itype) = tdecl;
     }
 
 #define AARCH64_BUILD_SIGNED_TYPE(mode)  \
@@ -1268,13 +1338,23 @@ aarch64_init_simd_builtin_scalar_types (void)
 					     "__builtin_aarch64_simd_udi");
 }
 
+/* If MODE is a single Advanced SIMD vector, return the number of lanes in the
+   vector.  If MODE is an Advanced SIMD structure/tuple mode, return the number
+   of lanes in a single vector.  */
+static unsigned int
+aarch64_num_lanes (machine_mode mode)
+{
+  unsigned int nregs = targetm.hard_regno_nregs (V0_REGNUM, mode);
+  return exact_div (GET_MODE_NUNITS (mode), nregs).to_constant ();
+}
+
 /* Return a set of FLAG_* flags derived from FLAGS
    that describe what a function with result MODE could do,
    taking the command-line flags into account.  */
 static unsigned int
 aarch64_call_properties (unsigned int flags, machine_mode mode)
 {
-  if (!(flags & FLAG_AUTO_FP) && FLOAT_MODE_P (mode))
+  if ((flags & FLAG_AUTO_FP) && FLOAT_MODE_P (mode))
     flags |= FLAG_FP;
 
   /* -fno-trapping-math means that we can assume any FP exceptions
@@ -1436,10 +1516,14 @@ aarch64_init_simd_builtin_functions (bool called_from_pragma)
 						      size_type_node,
 						      intSI_type_node,
 						      NULL);
+      /* aarch64_im_lane_boundsi should be leaf and nothrow as it
+	 is expanded as nop or will cause an user error.  */
+      tree attrs = aarch64_add_attribute ("nothrow", NULL_TREE);
+      attrs = aarch64_add_attribute ("leaf", attrs);
       aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_LANE_CHECK]
 	= aarch64_general_add_builtin ("__builtin_aarch64_im_lane_boundsi",
 				       lane_check_fpr,
-				       AARCH64_SIMD_BUILTIN_LANE_CHECK);
+				       AARCH64_SIMD_BUILTIN_LANE_CHECK, attrs);
     }
 
   for (i = 0; i < ARRAY_SIZE (aarch64_simd_builtin_data); i++, fcode++)
@@ -1547,12 +1631,202 @@ aarch64_init_simd_builtin_functions (bool called_from_pragma)
     }
 }
 
+enum class aarch64_builtin_signatures
+{
+  binary,
+  binary_lane,
+  binary_two_lanes,
+  load,
+  load_lane,
+  store,
+  store_lane,
+  ternary,
+  ternary_lane,
+  unary,
+  unary_lane,
+};
+
+namespace {
+
+/* Pairs a machine mode with the information needed to turn it into a
+   function argument type or return type.  */
+struct simd_type {
+  tree type () const { return aarch64_simd_builtin_type (mode, qualifiers); }
+  unsigned nunits () const { return GET_MODE_NUNITS (mode).to_constant (); }
+
+  machine_mode mode;
+  aarch64_type_qualifiers qualifiers;
+};
+
+namespace simd_types {
+#define VARIANTS(BASE, D, Q, MODE, QUALIFIERS)				\
+  constexpr simd_type BASE { V##D##MODE, QUALIFIERS };			\
+  constexpr simd_type BASE##x2 { V2x##D##MODE, QUALIFIERS };		\
+  constexpr simd_type BASE##x3 { V3x##D##MODE, QUALIFIERS };		\
+  constexpr simd_type BASE##x4 { V4x##D##MODE, QUALIFIERS };		\
+  constexpr simd_type BASE##q { V##Q##MODE, QUALIFIERS };		\
+  constexpr simd_type BASE##qx2 { V2x##Q##MODE, QUALIFIERS };		\
+  constexpr simd_type BASE##qx3 { V3x##Q##MODE, QUALIFIERS };		\
+  constexpr simd_type BASE##qx4 { V4x##Q##MODE, QUALIFIERS };		\
+  constexpr simd_type BASE##_scalar { MODE, QUALIFIERS };		\
+  constexpr simd_type BASE##_scalar_ptr					\
+    { MODE, aarch64_type_qualifiers (QUALIFIERS | qualifier_pointer) };	\
+  constexpr simd_type BASE##_scalar_const_ptr				\
+    { MODE, aarch64_type_qualifiers (QUALIFIERS | qualifier_const_pointer) };
+
+  VARIANTS (mf8, 8, 16, QImode, qualifier_modal_float)
+  VARIANTS (p8, 8, 16, QImode, qualifier_poly)
+  VARIANTS (s8, 8, 16, QImode, qualifier_none)
+  VARIANTS (u8, 8, 16, QImode, qualifier_unsigned)
+
+  VARIANTS (bf16, 4, 8, BFmode, qualifier_none)
+  VARIANTS (f16, 4, 8, HFmode, qualifier_none)
+  VARIANTS (p16, 4, 8, HImode, qualifier_poly)
+  VARIANTS (s16, 4, 8, HImode, qualifier_none)
+  VARIANTS (u16, 4, 8, HImode, qualifier_unsigned)
+
+  VARIANTS (f32, 2, 4, SFmode, qualifier_none)
+  VARIANTS (p32, 2, 4, SImode, qualifier_poly)
+  VARIANTS (s32, 2, 4, SImode, qualifier_none)
+  VARIANTS (u32, 2, 4, SImode, qualifier_unsigned)
+
+  VARIANTS (f64, 1, 2, DFmode, qualifier_none)
+  VARIANTS (p64, 1, 2, DImode, qualifier_poly)
+  VARIANTS (s64, 1, 2, DImode, qualifier_none)
+  VARIANTS (u64, 1, 2, DImode, qualifier_unsigned)
+
+  constexpr simd_type none { VOIDmode, qualifier_none };
+#undef VARIANTS
+}
+
+}
+
+#undef ENTRY
+#define ENTRY(N, S, T0, T1, T2, T3, U, F) \
+  {#N, aarch64_builtin_signatures::S, simd_types::T0, simd_types::T1, \
+   simd_types::T2, simd_types::T3, U, \
+   aarch64_required_extensions::REQUIRED_EXTENSIONS, FLAG_##F},
+
+/* Initialize pragma builtins.  */
+
+struct aarch64_pragma_builtins_data
+{
+  const char *name;
+  aarch64_builtin_signatures signature;
+  simd_type types[4];
+  int unspec;
+  aarch64_required_extensions required_extensions;
+  unsigned int flags;
+};
+
+static aarch64_pragma_builtins_data aarch64_pragma_builtins[] = {
+#include "aarch64-simd-pragma-builtins.def"
+};
+
+/* Return the function type for BUILTIN_DATA.  */
+static tree
+aarch64_fntype (const aarch64_pragma_builtins_data &builtin_data)
+{
+  tree return_type = NULL_TREE;
+  auto_vec<tree, 8> arg_types;
+  switch (builtin_data.signature)
+    {
+    case aarch64_builtin_signatures::binary:
+    case aarch64_builtin_signatures::binary_lane:
+    case aarch64_builtin_signatures::load_lane:
+      return_type = builtin_data.types[0].type ();
+      for (int i = 1; i <= 2; ++i)
+	arg_types.quick_push (builtin_data.types[i].type ());
+      break;
+
+    case aarch64_builtin_signatures::binary_two_lanes:
+      /* binary_two_lanes has to be handled as a special case because indices
+	 interleave vectors.  */
+      return_type = builtin_data.types[0].type ();
+      arg_types.quick_push (builtin_data.types[1].type ());
+      arg_types.quick_push (integer_type_node);
+      arg_types.quick_push (builtin_data.types[2].type ());
+      arg_types.quick_push (integer_type_node);
+      break;
+
+    case aarch64_builtin_signatures::load:
+    case aarch64_builtin_signatures::unary:
+    case aarch64_builtin_signatures::unary_lane:
+      return_type = builtin_data.types[0].type ();
+      arg_types.quick_push (builtin_data.types[1].type ());
+      break;
+
+    case aarch64_builtin_signatures::store:
+    case aarch64_builtin_signatures::store_lane:
+      return_type = void_type_node;
+      for (int i = 0; i <= 1; ++i)
+	arg_types.quick_push (builtin_data.types[i].type ());
+      break;
+
+    case aarch64_builtin_signatures::ternary:
+    case aarch64_builtin_signatures::ternary_lane:
+      return_type = builtin_data.types[0].type ();
+      for (int i = 1; i <= 3; ++i)
+	arg_types.quick_push (builtin_data.types[i].type ());
+      break;
+    }
+  switch (builtin_data.signature)
+    {
+    case aarch64_builtin_signatures::binary_lane:
+    case aarch64_builtin_signatures::load_lane:
+    case aarch64_builtin_signatures::store_lane:
+    case aarch64_builtin_signatures::ternary_lane:
+    case aarch64_builtin_signatures::unary_lane:
+      arg_types.quick_push (integer_type_node);
+      break;
+
+    default:
+      break;
+    }
+  if (builtin_data.flags & FLAG_USES_FPMR)
+    arg_types.quick_push (uint64_type_node);
+  return build_function_type_array (return_type, arg_types.length (),
+				    arg_types.address ());
+}
+
+/* Simulate function definitions for all of the builtins in
+   aarch64_pragma_builtins.  */
+static void
+aarch64_init_pragma_builtins ()
+{
+  for (size_t i = 0; i < ARRAY_SIZE (aarch64_pragma_builtins); ++i)
+    {
+      auto data = aarch64_pragma_builtins[i];
+      auto fntype = aarch64_fntype (data);
+      auto code = AARCH64_PRAGMA_BUILTIN_START + i + 1;
+      auto flag_mode = data.types[0].mode;
+      auto attrs = aarch64_get_attributes (data.flags, flag_mode);
+      aarch64_builtin_decls[code]
+	= aarch64_general_simulate_builtin (data.name, fntype, code, attrs);
+    }
+}
+
+/* If the builtin function with code CODE has an entry in
+   aarch64_pragma_builtins, return its entry, otherwise return null.  */
+
+static const aarch64_pragma_builtins_data*
+aarch64_get_pragma_builtin (int code)
+{
+  if (!(code > AARCH64_PRAGMA_BUILTIN_START
+	&& code < AARCH64_PRAGMA_BUILTIN_END))
+    return NULL;
+
+  auto idx = code - (AARCH64_PRAGMA_BUILTIN_START + 1);
+  return &aarch64_pragma_builtins[idx];
+}
+
 /* Register the tuple type that contains NUM_VECTORS of the AdvSIMD type
    indexed by TYPE_INDEX.  */
 static void
 register_tuple_type (unsigned int num_vectors, unsigned int type_index)
 {
-  aarch64_simd_type_info *type = &aarch64_simd_types[type_index];
+  const aarch64_simd_type_info *type = &aarch64_simd_types[type_index];
+  aarch64_simd_type_info_trees *trees = &aarch64_simd_types_trees[type_index];
 
   /* Synthesize the name of the user-visible vector tuple type.  */
   const char *vector_type_name = type->name;
@@ -1562,7 +1836,7 @@ register_tuple_type (unsigned int num_vectors, unsigned int type_index)
 	    num_vectors);
   tuple_type_name[0] = TOLOWER (tuple_type_name[0]);
 
-  tree vector_type = type->itype;
+  tree vector_type = trees->itype;
   tree array_type = build_array_type_nelts (vector_type, num_vectors);
   if (type->mode == DImode)
     {
@@ -1604,23 +1878,42 @@ aarch64_scalar_builtin_type_p (aarch64_simd_type t)
   return (t == Poly8_t || t == Poly16_t || t == Poly64_t || t == Poly128_t);
 }
 
-/* Enable AARCH64_FL_* flags EXTRA_FLAGS on top of the base Advanced SIMD
-   set.  */
-aarch64_simd_switcher::aarch64_simd_switcher (aarch64_feature_flags extra_flags)
+/* Temporarily set FLAGS as the enabled target features.  */
+aarch64_target_switcher::aarch64_target_switcher (aarch64_feature_flags flags)
   : m_old_asm_isa_flags (aarch64_asm_isa_flags),
-    m_old_general_regs_only (TARGET_GENERAL_REGS_ONLY)
+    m_old_general_regs_only (TARGET_GENERAL_REGS_ONLY),
+    m_old_target_pragma (current_target_pragma)
 {
-  /* Changing the ISA flags should be enough here.  We shouldn't need to
-     pay the compile-time cost of a full target switch.  */
-  global_options.x_target_flags &= ~MASK_GENERAL_REGS_ONLY;
-  aarch64_set_asm_isa_flags (AARCH64_FL_FP | AARCH64_FL_SIMD | extra_flags);
+  /* Include all dependencies.  */
+  flags = aarch64_get_required_features (flags);
+
+  /* Changing the ISA flags and have_regs_of_mode should be enough here.  We
+     shouldn't need to pay the compile-time cost of a full target switch.  */
+  if (flags & AARCH64_FL_FP)
+    global_options.x_target_flags &= ~MASK_GENERAL_REGS_ONLY;
+  aarch64_set_asm_isa_flags (flags);
+
+  /* Target pragmas are irrelevant when defining intrinsics artificially.  */
+  current_target_pragma = NULL_TREE;
+
+  /* Ensure SVE regs are available if SVE or SME is enabled.  */
+  memcpy (m_old_have_regs_of_mode, have_regs_of_mode, sizeof
+	  (have_regs_of_mode));
+  if (flags & (AARCH64_FL_SVE | AARCH64_FL_SME))
+    for (int i = 0; i < NUM_MACHINE_MODES; ++i)
+      if (aarch64_sve_mode_p ((machine_mode) i))
+	have_regs_of_mode[i] = true;
 }
 
-aarch64_simd_switcher::~aarch64_simd_switcher ()
+aarch64_target_switcher::~aarch64_target_switcher ()
 {
   if (m_old_general_regs_only)
     global_options.x_target_flags |= MASK_GENERAL_REGS_ONLY;
   aarch64_set_asm_isa_flags (m_old_asm_isa_flags);
+  current_target_pragma = m_old_target_pragma;
+
+  memcpy (have_regs_of_mode, m_old_have_regs_of_mode,
+	  sizeof (have_regs_of_mode));
 }
 
 /* Implement #pragma GCC aarch64 "arm_neon.h".
@@ -1630,7 +1923,7 @@ aarch64_simd_switcher::~aarch64_simd_switcher ()
 void
 handle_arm_neon_h (void)
 {
-  aarch64_simd_switcher simd;
+  aarch64_target_switcher switcher (AARCH64_FL_SIMD);
 
   /* Register the AdvSIMD vector tuple types.  */
   for (unsigned int i = 0; i < ARM_NEON_H_TYPES_LAST; i++)
@@ -1640,6 +1933,7 @@ handle_arm_neon_h (void)
 
   aarch64_init_simd_builtin_functions (true);
   aarch64_init_simd_intrinsics ();
+  aarch64_init_pragma_builtins ();
 }
 
 static void
@@ -1672,7 +1966,7 @@ aarch64_init_crc32_builtins ()
       aarch64_crc_builtin_datum* d = &aarch64_crc_builtin_data[i];
       tree argtype = aarch64_simd_builtin_type (d->mode, qualifier_unsigned);
       tree ftype = build_function_type_list (usi_type, usi_type, argtype, NULL_TREE);
-      tree attrs = aarch64_get_attributes (FLAG_NONE, d->mode);
+      tree attrs = aarch64_get_attributes (FLAG_DEFAULT, d->mode);
       tree fndecl
 	= aarch64_general_add_builtin (d->name, ftype, d->fcode, attrs);
 
@@ -1721,6 +2015,19 @@ aarch64_init_builtin_rsqrt (void)
   }
 }
 
+/* Initialize the backend type that supports the user-visible __mfp8
+   type and its relative pointer type.  */
+
+static void
+aarch64_init_fp8_types (void)
+{
+  aarch64_mfp8_type_node = make_unsigned_type (8);
+  SET_TYPE_MODE (aarch64_mfp8_type_node, QImode);
+
+  lang_hooks.types.register_builtin_type (aarch64_mfp8_type_node, "__mfp8");
+  aarch64_mfp8_ptr_type_node = build_pointer_type (aarch64_mfp8_type_node);
+}
+
 /* Initialize the backend types that support the user-visible __fp16
    type, also initialize a pointer to that type, to be used when
    forming HFAs.  */
@@ -1895,10 +2202,12 @@ aarch64_init_prefetch_builtin (void)
 {
 #define AARCH64_INIT_PREFETCH_BUILTIN(INDEX, N)				\
   aarch64_builtin_decls[INDEX] =					\
-    aarch64_general_add_builtin ("__builtin_aarch64_" N, ftype, INDEX)
+    aarch64_general_add_builtin ("__builtin_aarch64_" N, ftype, INDEX,  \
+				 prefetch_attrs)
 
   tree ftype;
   tree cv_argtype;
+  tree prefetch_attrs = aarch64_get_attributes (FLAG_PREFETCH_MEMORY, DImode);
   cv_argtype = build_qualified_type (void_type_node, TYPE_QUAL_CONST
 						     | TYPE_QUAL_VOLATILE);
   cv_argtype = build_pointer_type (cv_argtype);
@@ -2031,6 +2340,8 @@ aarch64_init_ls64_builtins (void)
 static void
 aarch64_init_data_intrinsics (void)
 {
+  /* These intrinsics are not fp nor they read/write memory. */
+  tree attrs = aarch64_get_attributes (FLAG_DEFAULT, SImode);
   tree uint32_fntype = build_function_type_list (uint32_type_node,
 						 uint32_type_node, NULL_TREE);
   tree ulong_fntype = build_function_type_list (long_unsigned_type_node,
@@ -2040,28 +2351,30 @@ aarch64_init_data_intrinsics (void)
 						 uint64_type_node, NULL_TREE);
   aarch64_builtin_decls[AARCH64_REV16]
     = aarch64_general_add_builtin ("__builtin_aarch64_rev16", uint32_fntype,
-				   AARCH64_REV16);
+				   AARCH64_REV16, attrs);
   aarch64_builtin_decls[AARCH64_REV16L]
     = aarch64_general_add_builtin ("__builtin_aarch64_rev16l", ulong_fntype,
-				   AARCH64_REV16L);
+				   AARCH64_REV16L, attrs);
   aarch64_builtin_decls[AARCH64_REV16LL]
     = aarch64_general_add_builtin ("__builtin_aarch64_rev16ll", uint64_fntype,
-				   AARCH64_REV16LL);
+				   AARCH64_REV16LL, attrs);
   aarch64_builtin_decls[AARCH64_RBIT]
     = aarch64_general_add_builtin ("__builtin_aarch64_rbit", uint32_fntype,
-				   AARCH64_RBIT);
+				   AARCH64_RBIT, attrs);
   aarch64_builtin_decls[AARCH64_RBITL]
     = aarch64_general_add_builtin ("__builtin_aarch64_rbitl", ulong_fntype,
-				   AARCH64_RBITL);
+				   AARCH64_RBITL, attrs);
   aarch64_builtin_decls[AARCH64_RBITLL]
     = aarch64_general_add_builtin ("__builtin_aarch64_rbitll", uint64_fntype,
-				   AARCH64_RBITLL);
+				   AARCH64_RBITLL, attrs);
 }
 
 /* Implement #pragma GCC aarch64 "arm_acle.h".  */
 void
 handle_arm_acle_h (void)
 {
+  aarch64_target_switcher switcher;
+
   aarch64_init_ls64_builtins ();
   aarch64_init_tme_builtins ();
   aarch64_init_memtag_builtins ();
@@ -2118,6 +2431,29 @@ aarch64_init_fpsr_fpcr_builtins (void)
 				   AARCH64_BUILTIN_SET_FPSR64);
 }
 
+/* Add builtins for Guarded Control Stack instructions.  */
+
+static void
+aarch64_init_gcs_builtins (void)
+{
+  tree ftype;
+
+  ftype = build_function_type_list (ptr_type_node, NULL);
+  aarch64_builtin_decls[AARCH64_BUILTIN_GCSPR]
+    = aarch64_general_add_builtin ("__builtin_aarch64_gcspr", ftype,
+				   AARCH64_BUILTIN_GCSPR);
+
+  ftype = build_function_type_list (uint64_type_node, NULL);
+  aarch64_builtin_decls[AARCH64_BUILTIN_GCSPOPM]
+    = aarch64_general_add_builtin ("__builtin_aarch64_gcspopm", ftype,
+				   AARCH64_BUILTIN_GCSPOPM);
+
+  ftype = build_function_type_list (ptr_type_node, ptr_type_node, NULL);
+  aarch64_builtin_decls[AARCH64_BUILTIN_GCSSS]
+    = aarch64_general_add_builtin ("__builtin_aarch64_gcsss", ftype,
+				   AARCH64_BUILTIN_GCSSS);
+}
+
 /* Initialize all builtins in the AARCH64_BUILTIN_GENERAL group.  */
 
 void
@@ -2125,12 +2461,14 @@ aarch64_general_init_builtins (void)
 {
   aarch64_init_fpsr_fpcr_builtins ();
 
+  aarch64_init_fp8_types ();
+
   aarch64_init_fp16_types ();
 
   aarch64_init_bf16_types ();
 
   {
-    aarch64_simd_switcher simd;
+    aarch64_target_switcher switcher (AARCH64_FL_SIMD);
     aarch64_init_simd_builtins ();
   }
 
@@ -2157,6 +2495,14 @@ aarch64_general_init_builtins (void)
   if (!TARGET_ILP32)
     aarch64_init_pauth_hint_builtins ();
 
+  tree ftype_chkfeat
+    = build_function_type_list (uint64_type_node, uint64_type_node, NULL);
+  aarch64_builtin_decls[AARCH64_BUILTIN_CHKFEAT]
+    = aarch64_general_add_builtin ("__builtin_aarch64_chkfeat", ftype_chkfeat,
+				   AARCH64_BUILTIN_CHKFEAT);
+
+  aarch64_init_gcs_builtins ();
+
   if (in_lto_p)
     handle_arm_acle_h ();
 }
@@ -2212,18 +2558,40 @@ aarch64_report_missing_registers (location_t location, tree fndecl)
   reported_missing_registers_p = true;
 }
 
-/* Check whether all the AARCH64_FL_* values in REQUIRED_EXTENSIONS are
-   enabled, given that those extensions are required for function FNDECL.
-   Report an error against LOCATION if not.  */
+/* Check whether the requirements in REQUIRED_EXTENSIONS are met, given that
+   those requirements come from calling function FNDECL.  Report an error
+   against LOCATION if not.  */
 bool
 aarch64_check_required_extensions (location_t location, tree fndecl,
-				   aarch64_feature_flags required_extensions)
+				   aarch64_required_extensions
+				     required_extensions)
 {
-  if ((required_extensions & ~aarch64_isa_flags) == 0)
-    return true;
+  aarch64_feature_flags sm_state_extensions = 0;
+  if (!TARGET_STREAMING)
+    {
+      if (required_extensions.sm_off == 0)
+	{
+	  error_at (location, "ACLE function %qD can only be called when"
+		    " SME streaming mode is enabled", fndecl);
+	  return false;
+	}
+      sm_state_extensions |= required_extensions.sm_off & ~AARCH64_FL_SM_OFF;
+    }
+  if (!TARGET_NON_STREAMING)
+    {
+      if (required_extensions.sm_on == 0)
+	{
+	  error_at (location, "ACLE function %qD cannot be called when"
+		    " SME streaming mode is enabled", fndecl);
+	  return false;
+	}
+      sm_state_extensions |= required_extensions.sm_on & ~AARCH64_FL_SM_ON;
+    }
 
-  auto missing_extensions = required_extensions & ~aarch64_asm_isa_flags;
+  if ((sm_state_extensions & ~aarch64_isa_flags) == 0)
+    return true;
 
+  auto missing_extensions = sm_state_extensions & ~aarch64_asm_isa_flags;
   if (missing_extensions == 0)
     {
       /* All required extensions are enabled in aarch64_asm_isa_flags, so the
@@ -2232,20 +2600,6 @@ aarch64_check_required_extensions (location_t location, tree fndecl,
       return false;
     }
 
-  if (missing_extensions & AARCH64_FL_SM_OFF)
-    {
-      error_at (location, "ACLE function %qD cannot be called when"
-		" SME streaming mode is enabled", fndecl);
-      return false;
-    }
-
-  if (missing_extensions & AARCH64_FL_SM_ON)
-    {
-      error_at (location, "ACLE function %qD can only be called when"
-		" SME streaming mode is enabled", fndecl);
-      return false;
-    }
-
   if (missing_extensions & AARCH64_FL_ZA_ON)
     {
       error_at (location, "ACLE function %qD can only be called from"
@@ -2271,12 +2625,203 @@ aarch64_check_required_extensions (location_t location, tree fndecl,
   gcc_unreachable ();
 }
 
+/* Return the ISA extensions required by function CODE.  */
+static aarch64_required_extensions
+aarch64_general_required_extensions (unsigned int code)
+{
+  using ext = aarch64_required_extensions;
+  switch (code)
+    {
+    case AARCH64_TME_BUILTIN_TSTART:
+    case AARCH64_TME_BUILTIN_TCOMMIT:
+    case AARCH64_TME_BUILTIN_TTEST:
+    case AARCH64_TME_BUILTIN_TCANCEL:
+      return ext::streaming_compatible (AARCH64_FL_TME);
+
+    case AARCH64_LS64_BUILTIN_LD64B:
+    case AARCH64_LS64_BUILTIN_ST64B:
+    case AARCH64_LS64_BUILTIN_ST64BV:
+    case AARCH64_LS64_BUILTIN_ST64BV0:
+      return ext::streaming_compatible (AARCH64_FL_LS64);
+
+    default:
+      if (code >= AARCH64_MEMTAG_BUILTIN_START
+	  && code <= AARCH64_MEMTAG_BUILTIN_END)
+	return ext::streaming_compatible (AARCH64_FL_MEMTAG);
+
+      if (auto builtin_data = aarch64_get_pragma_builtin (code))
+	return builtin_data->required_extensions;
+    }
+  return ext::streaming_compatible (0);
+}
+
+/* Checks calls to intrinsics that are defined using
+   aarch64-simd-pragma-builtins.def.  */
+struct aarch64_pragma_builtins_checker
+{
+  aarch64_pragma_builtins_checker (location_t, tree, unsigned int, tree *,
+				   const aarch64_pragma_builtins_data &);
+
+  bool require_immediate_range (unsigned int, HOST_WIDE_INT,
+				HOST_WIDE_INT);
+  bool require_immediate_lane_index (unsigned int, unsigned int, unsigned int);
+  bool require_immediate_lane_index (unsigned int, unsigned int);
+
+  bool check ();
+
+  location_t location;
+  tree fndecl;
+  unsigned int nargs;
+  array_slice<tree> args;
+  const aarch64_pragma_builtins_data &builtin_data;
+};
+
+/* LOCATION is the location of the call; FNDECL is the FUNCTION_DECL
+   that is being called; NARGS is the number of arguments to the call,
+   which are in a vector starting at FIRST_ARG; and BUILTIN_DATA describes
+   the intrinsic.  */
+aarch64_pragma_builtins_checker::
+aarch64_pragma_builtins_checker (location_t location, tree fndecl,
+				 unsigned int nargs, tree *first_arg,
+				 const aarch64_pragma_builtins_data
+				    &builtin_data)
+  : location (location), fndecl (fndecl), nargs (nargs),
+    args (first_arg, nargs), builtin_data (builtin_data)
+{
+}
+
+/* Require argument ARGNO to be an integer constant expression in the
+   range [MIN, MAX].  Return true if it was.  */
+bool
+aarch64_pragma_builtins_checker::
+require_immediate_range (unsigned int argno, HOST_WIDE_INT min,
+			 HOST_WIDE_INT max)
+{
+  if (!tree_fits_shwi_p (args[argno]))
+    {
+      report_non_ice (location, fndecl, argno);
+      return false;
+    }
+
+  HOST_WIDE_INT actual = tree_to_shwi (args[argno]);
+  if (actual < min || actual > max)
+    {
+      report_out_of_range (location, fndecl, argno, actual, min, max);
+      return false;
+    }
+
+  return true;
+}
+
+/* Require argument LANE_ARGNO to be an immediate lane index into vector
+   argument VEC_ARGNO, given that each index selects enough data to fill
+   one element of argument ELT_ARGNO.  Return true if the argument
+   is valid.  */
+bool
+aarch64_pragma_builtins_checker::
+require_immediate_lane_index (unsigned int lane_argno, unsigned vec_argno,
+			      unsigned int elt_argno)
+{
+  auto vec_mode = TYPE_MODE (TREE_TYPE (args[vec_argno]));
+  auto elt_mode = TYPE_MODE (TREE_TYPE (args[elt_argno]));
+  auto nunits = (aarch64_num_lanes (vec_mode)
+		 * GET_MODE_UNIT_SIZE (vec_mode)
+		 / GET_MODE_UNIT_SIZE (elt_mode));
+  return require_immediate_range (lane_argno, 0, nunits - 1);
+}
+
+/* Require argument LANE_ARGNO to be an immediate lane index that selects
+   one element of argument VEC_ARGNO.  Return true if the argument
+   is valid.  */
+bool
+aarch64_pragma_builtins_checker::
+require_immediate_lane_index (unsigned int lane_argno, unsigned int vec_argno)
+{
+  return require_immediate_lane_index (lane_argno, vec_argno, vec_argno);
+}
+
+/* Check the arguments to the intrinsic call and return true if they
+   are valid.  */
+bool
+aarch64_pragma_builtins_checker::check ()
+{
+  auto &types = builtin_data.types;
+
+  switch (builtin_data.unspec)
+    {
+    case UNSPEC_DUP_LANE:
+    case UNSPEC_GET_LANE:
+    case UNSPEC_LD2_LANE:
+    case UNSPEC_LD3_LANE:
+    case UNSPEC_LD4_LANE:
+    case UNSPEC_SET_LANE:
+    case UNSPEC_ST1_LANE:
+    case UNSPEC_ST2_LANE:
+    case UNSPEC_ST3_LANE:
+    case UNSPEC_ST4_LANE:
+      return require_immediate_lane_index (nargs - 1, nargs - 2);
+
+    case UNSPEC_EXT:
+      return require_immediate_range (2, 0, types[2].nunits () - 1);
+
+    case UNSPEC_FDOT_LANE_FP8:
+      return require_immediate_lane_index (nargs - 2, nargs - 3, 0);
+
+    case UNSPEC_FMLALB_FP8:
+    case UNSPEC_FMLALT_FP8:
+    case UNSPEC_FMLALLBB_FP8:
+    case UNSPEC_FMLALLBT_FP8:
+    case UNSPEC_FMLALLTB_FP8:
+    case UNSPEC_FMLALLTT_FP8:
+      if (builtin_data.signature == aarch64_builtin_signatures::ternary_lane)
+	return require_immediate_lane_index (nargs - 2, nargs - 3);
+      else if (builtin_data.signature == aarch64_builtin_signatures::ternary)
+	return true;
+      else
+	gcc_unreachable ();
+
+    case UNSPEC_LUTI2:
+    case UNSPEC_LUTI4:
+      {
+	auto vector_to_index_nunits = types[nargs - 1].nunits ();
+	int output_mode_nunits = types[0].nunits ();
+
+	int high;
+	if (builtin_data.unspec == UNSPEC_LUTI2)
+	  high = (4 * vector_to_index_nunits / output_mode_nunits) - 1;
+	else
+	  high = (2 * vector_to_index_nunits / output_mode_nunits) - 1;
+
+	return require_immediate_range (nargs - 1, 0, high);
+      }
+
+    case UNSPEC_VEC_COPY:
+      /* & rather than && so that we report errors against both indices.  */
+      return (require_immediate_lane_index (1, 0)
+	      & require_immediate_lane_index (3, 2));
+
+    default:
+      return true;
+    }
+}
+
 bool
 aarch64_general_check_builtin_call (location_t location, vec<location_t>,
-			    unsigned int code, tree fndecl,
-			    unsigned int nargs ATTRIBUTE_UNUSED, tree *args)
+				    unsigned int code, tree fndecl,
+				    unsigned int nargs, tree *args)
 {
   tree decl = aarch64_builtin_decls[code];
+  auto required_extensions = aarch64_general_required_extensions (code);
+  if (!aarch64_check_required_extensions (location, decl, required_extensions))
+    return false;
+
+  if (auto builtin_data = aarch64_get_pragma_builtin (code))
+    {
+      aarch64_pragma_builtins_checker checker (location, fndecl, nargs, args,
+					       *builtin_data);
+      return checker.check ();
+    }
+
   switch (code)
     {
     case AARCH64_RSR:
@@ -2302,30 +2847,8 @@ aarch64_general_check_builtin_call (location_t location, vec<location_t>,
 	  }
 	break;
       }
-
-    case AARCH64_TME_BUILTIN_TSTART:
-    case AARCH64_TME_BUILTIN_TCOMMIT:
-    case AARCH64_TME_BUILTIN_TTEST:
-    case AARCH64_TME_BUILTIN_TCANCEL:
-      return aarch64_check_required_extensions (location, decl,
-						AARCH64_FL_TME);
-
-    case AARCH64_LS64_BUILTIN_LD64B:
-    case AARCH64_LS64_BUILTIN_ST64B:
-    case AARCH64_LS64_BUILTIN_ST64BV:
-    case AARCH64_LS64_BUILTIN_ST64BV0:
-      return aarch64_check_required_extensions (location, decl,
-						AARCH64_FL_LS64);
-
-    default:
-      break;
     }
 
-  if (code >= AARCH64_MEMTAG_BUILTIN_START
-      && code <= AARCH64_MEMTAG_BUILTIN_END)
-    return aarch64_check_required_extensions (location, decl,
-					      AARCH64_FL_MEMTAG);
-
   return true;
 }
 
@@ -3189,6 +3712,501 @@ aarch64_expand_builtin_data_intrinsic (unsigned int fcode, tree exp, rtx target)
   return ops[0].value;
 }
 
+/* Convert ptr_mode value OP to a Pmode value (for ILP32).  */
+static void
+aarch64_convert_address (expand_operand *op)
+{
+  op->value = convert_memory_address (Pmode, op->value);
+  op->mode = Pmode;
+}
+
+/* Dereference the pointer in OP, turning it into a memory reference to
+   NELTS instances of MEM_MODE.  */
+static void
+aarch64_dereference_pointer (expand_operand *op, machine_mode mem_mode,
+			     unsigned int nelts = 1)
+{
+  if (nelts == 1)
+    {
+      op->value = gen_rtx_MEM (mem_mode, op->value);
+      op->mode = mem_mode;
+    }
+  else
+    {
+      op->value = gen_rtx_MEM (BLKmode, op->value);
+      op->mode = BLKmode;
+      set_mem_size (op->value, GET_MODE_SIZE (mem_mode) * nelts);
+    }
+}
+
+/* OP contains an integer index into a vector or tuple of mode VEC_MODE.
+   Convert OP from an architectural lane number to a GCC lane number.  */
+static void
+aarch64_canonicalize_lane (expand_operand *op, machine_mode vec_mode)
+{
+  auto nunits = aarch64_num_lanes (vec_mode);
+  op->value = gen_int_mode (ENDIAN_LANE_N (nunits, UINTVAL (op->value)),
+			    SImode);
+}
+
+/* OP contains an integer index into a vector or tuple of mode VEC_MODE.
+   Convert OP from an architectural lane number to a vec_merge mask.  */
+static void
+aarch64_convert_to_lane_mask (expand_operand *op, machine_mode vec_mode)
+{
+  auto nunits = aarch64_num_lanes (vec_mode);
+  create_integer_operand (op, 1 << ENDIAN_LANE_N (nunits, INTVAL (op->value)));
+}
+
+/* If OP is a 128-bit vector, convert it to the equivalent 64-bit vector.
+   Do nothing otherwise.  */
+static void
+aarch64_convert_to_v64 (expand_operand *op)
+{
+  if (known_eq (GET_MODE_BITSIZE (op->mode), 128u))
+    {
+      op->mode = aarch64_v64_mode (GET_MODE_INNER (op->mode)).require ();
+      op->value = gen_lowpart (op->mode, op->value);
+    }
+}
+
+/* If OP is a 64-bit (half-register) vector or a structure of 64-bit vectors,
+   pack its contents into the smallest associated full-register mode,
+   padding with zeros if necessary.  Return true if padding was used.  */
+static bool
+aarch64_pack_into_v128s (expand_operand *op)
+{
+  bool padded = false;
+  unsigned int nregs = targetm.hard_regno_nregs (V0_REGNUM, op->mode);
+
+  /* Do nothing if the operand is already a full-register mode.  */
+  if (known_eq (nregs * UNITS_PER_VREG, GET_MODE_SIZE (op->mode)))
+    return padded;
+
+  auto elt_mode = GET_MODE_INNER (op->mode);
+  auto v64_mode = aarch64_v64_mode (elt_mode).require ();
+  auto v128_mode = aarch64_v128_mode (elt_mode).require ();
+
+  auto new_mode = v128_mode;
+  if (nregs > 2)
+    new_mode = aarch64_advsimd_vector_array_mode (v128_mode, CEIL (nregs, 2))
+      .require ();
+
+  /* Get enough V64_MODE inputs to fill NEW_MDOE, which is made up of a
+     whole number of V128_MODEs.  */
+  auto_vec<rtx, 4> inputs;
+  for (unsigned int i = 0; i < nregs; ++i)
+    {
+      rtx input = simplify_gen_subreg (v64_mode, op->value, op->mode,
+				       i * GET_MODE_SIZE (v64_mode));
+      inputs.quick_push (input);
+    }
+  if (nregs & 1)
+    {
+      inputs.quick_push (CONST0_RTX (v64_mode));
+      padded = true;
+    }
+
+  /* Create a NEW_MODE register and build it up from individual V128_MODEs.  */
+  op->mode = new_mode;
+  op->value = gen_reg_rtx (new_mode);
+  for (unsigned int i = 0; i < inputs.length (); i += 2)
+    {
+      rtx result = gen_rtx_SUBREG (v128_mode, op->value,
+				   i * GET_MODE_SIZE (v64_mode));
+      emit_insn (gen_aarch64_combine (v64_mode, result,
+				      inputs[i], inputs[i + 1]));
+    }
+  return padded;
+}
+
+/* UNSPEC is a high unspec, indicated by "2" in mnemonics and "_high" in
+   intrinsic names.  Return the equivalent low unspec.  */
+static int
+aarch64_get_low_unspec (int unspec)
+{
+  switch (unspec)
+    {
+    case UNSPEC_FCVTN2_FP8:
+      return UNSPEC_FCVTN_FP8;
+    case UNSPEC_F1CVTL2_FP8:
+      return UNSPEC_F1CVTL_FP8;
+    case UNSPEC_F2CVTL2_FP8:
+      return UNSPEC_F2CVTL_FP8;
+    default:
+      gcc_unreachable ();
+    }
+}
+
+/* OPS contains the operands for one of the permute pair functions vtrn,
+   vuzp or vzip.  Expand the call, given that PERMUTE1 is the unspec for
+   the first permute and PERMUTE2 is the unspec for the second permute.  */
+static rtx
+aarch64_expand_permute_pair (vec<expand_operand> &ops, int permute1,
+			     int permute2)
+{
+  rtx op0 = force_reg (ops[1].mode, ops[1].value);
+  rtx op1 = force_reg (ops[2].mode, ops[2].value);
+  rtx target = gen_reg_rtx (ops[0].mode);
+  rtx target0 = gen_rtx_SUBREG (ops[1].mode, target, 0);
+  rtx target1 = gen_rtx_SUBREG (ops[1].mode, target,
+				GET_MODE_SIZE (ops[1].mode));
+  emit_insn (gen_aarch64 (permute1, ops[1].mode, target0, op0, op1));
+  emit_insn (gen_aarch64 (permute2, ops[1].mode, target1, op0, op1));
+  return target;
+}
+
+/* Emit a TBL or TBX instruction with inputs INPUTS and a result of mode
+   MODE.  Return the result of the instruction.
+
+   UNSPEC is either UNSPEC_TBL or UNSPEC_TBX.  The inputs must already be in
+   registers.  */
+static rtx
+aarch64_expand_tbl_tbx (vec<rtx> &inputs, int unspec, machine_mode mode)
+{
+  rtx result = gen_reg_rtx (mode);
+  rtvec vec = gen_rtvec_v (inputs.length (), inputs.address ());
+  emit_insn (gen_rtx_SET (result, gen_rtx_UNSPEC (mode, vec, unspec)));
+  return result;
+}
+
+/* Emit a TBL or TBX intrinsic with the operands given by OPS.  Return the
+   result of the intrinsic.
+
+   UNSPEC is either UNSPEC_TBL or UNSPEC_TBX.  */
+static rtx
+aarch64_expand_tbl_tbx (vec<expand_operand> &ops, int unspec)
+{
+  for (unsigned int i = 1; i < ops.length (); ++i)
+    ops[i].value = force_reg (ops[i].mode, ops[i].value);
+
+  /* Handle the legacy forms for which the table is composed of 64-bit
+     rather than 128-bit vectors.  */
+  auto &table = ops[ops.length () - 2];
+  auto table_nelts = GET_MODE_NUNITS (table.mode);
+  bool padded = aarch64_pack_into_v128s (&table);
+
+  /* Packing to 128-bit vectors is enough for everything except the 64-bit
+     forms of vtbx1 and vtbx3, where we need to handle the zero padding.  */
+  if (unspec == UNSPEC_TBL || !padded)
+    {
+      auto_vec<rtx, 3> inputs;
+      for (unsigned int i = 1; i < ops.length (); ++i)
+	inputs.quick_push (ops[i].value);
+      return aarch64_expand_tbl_tbx (inputs, unspec, ops[0].mode);
+    }
+
+  /* Generate a TBL, which will give the right results for indices that
+     are less than TABLE_NELTS.  */
+  auto_vec<rtx, 2> inputs;
+  for (unsigned int i = 2; i < ops.length (); ++i)
+    inputs.quick_push (ops[i].value);
+  rtx tbl_result = aarch64_expand_tbl_tbx (inputs, UNSPEC_TBL, ops[0].mode);
+
+  /* Get a mask of the indices that are less than TABLE_NELTS.  */
+  auto &indices = ops.last ();
+  rtx cmp_result = gen_reg_rtx (indices.mode);
+  rtx bound_rtx = gen_int_mode (table_nelts, GET_MODE_INNER (indices.mode));
+  rtx bound_vec_rtx = gen_const_vec_duplicate (indices.mode, bound_rtx);
+  emit_insn (gen_aarch64_cm (GTU, indices.mode, cmp_result,
+			     force_reg (indices.mode, bound_vec_rtx),
+			     indices.value));
+
+  /* Select from the TBL result if the index is less than TABLE_NELTS
+     and from OPS[1] otherwise.  */
+  rtx result = gen_reg_rtx (ops[0].mode);
+  auto icode = get_vcond_mask_icode (ops[0].mode, indices.mode);
+  emit_insn (GEN_FCN (icode) (result, tbl_result, ops[1].value, cmp_result));
+  return result;
+}
+
+/* Expand CALL_EXPR EXP, given that it is a call to the function described
+   by BUILTIN_DATA, and return the function's return value.  Put the result
+   in TARGET if convenient.  */
+static rtx
+aarch64_expand_pragma_builtin (tree exp, rtx target,
+			       const aarch64_pragma_builtins_data &builtin_data)
+{
+  unsigned int nargs = call_expr_nargs (exp);
+  bool returns_void = VOID_TYPE_P (TREE_TYPE (exp));
+
+  auto_vec<expand_operand, 8> ops;
+  if (!returns_void)
+    create_output_operand (ops.safe_push ({}), target,
+			   TYPE_MODE (TREE_TYPE (exp)));
+  for (unsigned int i = 0; i < nargs; ++i)
+    {
+      tree arg = CALL_EXPR_ARG (exp, i);
+      create_input_operand (ops.safe_push ({}), expand_normal (arg),
+			    TYPE_MODE (TREE_TYPE (arg)));
+      if (POINTER_TYPE_P (TREE_TYPE (arg)))
+	aarch64_convert_address (&ops.last ());
+    }
+
+  if (builtin_data.flags & FLAG_USES_FPMR)
+    {
+      auto fpm_input = ops.pop ().value;
+      auto fpmr = gen_rtx_REG (DImode, FPM_REGNUM);
+      emit_move_insn (fpmr, fpm_input);
+    }
+
+  switch (builtin_data.unspec)
+    {
+    case UNSPEC_F1CVTL_FP8:
+    case UNSPEC_F2CVTL_FP8:
+      /* Convert _low forms (which take 128-bit vectors) to the base
+	 64-bit forms.  */
+      aarch64_convert_to_v64 (&ops[1]);
+      break;
+
+    case UNSPEC_LUTI2:
+      /* LUTI2 treats the first argument as a vector of 4 elements.  The forms
+	 with 128-bit inputs are only provided as a convenience; the upper
+	 halves don't actually matter.  */
+      aarch64_convert_to_v64 (&ops[1]);
+      break;
+    }
+
+  insn_code icode;
+  switch (builtin_data.unspec)
+    {
+    case UNSPEC_BSL:
+      icode = code_for_aarch64_simd_bsl (ops[0].mode);
+      break;
+
+    case UNSPEC_COMBINE:
+      icode = code_for_aarch64_combine (ops[1].mode);
+      break;
+
+    case UNSPEC_DUP:
+      if (builtin_data.signature == aarch64_builtin_signatures::load)
+	aarch64_dereference_pointer (&ops[1], GET_MODE_INNER (ops[0].mode));
+      return expand_vector_broadcast (ops[0].mode, ops[1].value);
+
+    case UNSPEC_DUP_LANE:
+      aarch64_canonicalize_lane (&ops[2], ops[1].mode);
+      if (ops[0].mode == ops[1].mode)
+	icode = code_for_aarch64_dup_lane (ops[0].mode);
+      else
+	icode = code_for_aarch64_dup_lane (ops[0].mode, ops[0].mode);
+      break;
+
+    case UNSPEC_EXT:
+      icode = code_for_aarch64_ext (ops[0].mode);
+      break;
+
+    case UNSPEC_FAMAX:
+    case UNSPEC_FAMIN:
+    case UNSPEC_F1CVTL_FP8:
+    case UNSPEC_F2CVTL_FP8:
+    case UNSPEC_FDOT_FP8:
+    case UNSPEC_FSCALE:
+    case UNSPEC_TRN1:
+    case UNSPEC_TRN2:
+    case UNSPEC_UZP1:
+    case UNSPEC_UZP2:
+    case UNSPEC_ZIP1:
+    case UNSPEC_ZIP2:
+      icode = code_for_aarch64 (builtin_data.unspec, ops[0].mode);
+      break;
+
+    case UNSPEC_F1CVTL2_FP8:
+    case UNSPEC_F2CVTL2_FP8:
+      {
+	/* Add a high-part selector for the vec_merge.  */
+	auto src_mode = ops.last ().mode;
+	auto nunits = GET_MODE_NUNITS (src_mode).to_constant ();
+	rtx par = aarch64_simd_vect_par_cnst_half (src_mode, nunits, true);
+	create_fixed_operand (ops.safe_push ({}), par);
+
+	auto unspec = aarch64_get_low_unspec (builtin_data.unspec);
+	icode = code_for_aarch64_high (unspec, ops[0].mode);
+	break;
+      }
+
+    case UNSPEC_FCVTN_FP8:
+      icode = code_for_aarch64 (builtin_data.unspec, ops[1].mode);
+      break;
+
+    case UNSPEC_FCVTN2_FP8:
+      {
+	auto unspec = aarch64_get_low_unspec (builtin_data.unspec);
+	auto mode = ops.last ().mode;
+	if (BYTES_BIG_ENDIAN)
+	  icode = code_for_aarch64_high_be (unspec, mode);
+	else
+	  icode = code_for_aarch64_high_le (unspec, mode);
+	break;
+      }
+
+    case UNSPEC_FDOT_LANE_FP8:
+      /* This pattern does not canonicalize the lane number.  */
+      icode = code_for_aarch64_lane (builtin_data.unspec,
+				     ops[0].mode, ops[3].mode);
+      break;
+
+    case UNSPEC_FMLALB_FP8:
+    case UNSPEC_FMLALT_FP8:
+    case UNSPEC_FMLALLBB_FP8:
+    case UNSPEC_FMLALLBT_FP8:
+    case UNSPEC_FMLALLTB_FP8:
+    case UNSPEC_FMLALLTT_FP8:
+      if (builtin_data.signature == aarch64_builtin_signatures::ternary_lane)
+	{
+	  aarch64_canonicalize_lane (&ops[4], ops[3].mode);
+	  icode = code_for_aarch64_lane (builtin_data.unspec,
+					 ops[0].mode, ops[3].mode);
+	}
+      else if (builtin_data.signature == aarch64_builtin_signatures::ternary)
+	icode = code_for_aarch64 (builtin_data.unspec, ops[0].mode);
+      else
+	gcc_unreachable ();
+      break;
+
+    case UNSPEC_GET_LANE:
+      aarch64_canonicalize_lane (&ops[2], ops[1].mode);
+      icode = code_for_aarch64_get_lane (ops[1].mode);
+      break;
+
+    case UNSPEC_LD1:
+      icode = code_for_aarch64_ld1 (ops[0].mode);
+      break;
+
+    case UNSPEC_LD1x2:
+      icode = code_for_aarch64_ld1x2 (ops[0].mode);
+      break;
+
+    case UNSPEC_LD1x3:
+      icode = code_for_aarch64_ld1x3 (ops[0].mode);
+      break;
+
+    case UNSPEC_LD1x4:
+      icode = code_for_aarch64_ld1x4 (ops[0].mode);
+      break;
+
+    case UNSPEC_LD2:
+    case UNSPEC_LD3:
+    case UNSPEC_LD4:
+      icode = code_for_aarch64_ld (ops[0].mode, ops[0].mode);
+      break;
+
+    case UNSPEC_LD2_DUP:
+      aarch64_dereference_pointer (&ops[1], GET_MODE_INNER (ops[0].mode), 2);
+      icode = code_for_aarch64_simd_ld2r (ops[0].mode);
+      break;
+
+    case UNSPEC_LD3_DUP:
+      aarch64_dereference_pointer (&ops[1], GET_MODE_INNER (ops[0].mode), 3);
+      icode = code_for_aarch64_simd_ld3r (ops[0].mode);
+      break;
+
+    case UNSPEC_LD4_DUP:
+      aarch64_dereference_pointer (&ops[1], GET_MODE_INNER (ops[0].mode), 4);
+      icode = code_for_aarch64_simd_ld4r (ops[0].mode);
+      break;
+
+    case UNSPEC_LD2_LANE:
+    case UNSPEC_LD3_LANE:
+    case UNSPEC_LD4_LANE:
+      aarch64_canonicalize_lane (&ops[3], ops[2].mode);
+      icode = code_for_aarch64_ld_lane (ops[0].mode, ops[0].mode);
+      break;
+
+    case UNSPEC_LUTI2:
+    case UNSPEC_LUTI4:
+      create_integer_operand (ops.safe_push ({}),
+			      builtin_data.unspec == UNSPEC_LUTI2 ? 2 : 4);
+      icode = code_for_aarch64_lut (ops[1].mode, ops[2].mode);
+      break;
+
+    case UNSPEC_REV16:
+    case UNSPEC_REV32:
+    case UNSPEC_REV64:
+      icode = code_for_aarch64_rev (builtin_data.unspec, ops[0].mode);
+      break;
+
+    case UNSPEC_SET_LANE:
+      if (builtin_data.signature == aarch64_builtin_signatures::load_lane)
+	aarch64_dereference_pointer (&ops[1], GET_MODE_INNER (ops[0].mode));
+      /* The vec_set operand order is: dest, scalar, mask, vector.  */
+      std::swap (ops[2], ops[3]);
+      aarch64_convert_to_lane_mask (&ops[2], ops[3].mode);
+      icode = code_for_aarch64_simd_vec_set (ops[0].mode);
+      break;
+
+    case UNSPEC_ST1:
+      icode = code_for_aarch64_st1 (ops[1].mode);
+      break;
+
+    case UNSPEC_ST1_LANE:
+      aarch64_dereference_pointer (&ops[0], GET_MODE_INNER (ops[1].mode));
+      /* Reinterpret ops[0] as an output.  */
+      create_fixed_operand (&ops[0], ops[0].value);
+      aarch64_canonicalize_lane (&ops[2], ops[1].mode);
+      icode = code_for_aarch64_get_lane (ops[1].mode);
+      break;
+
+    case UNSPEC_ST1x2:
+      icode = code_for_aarch64_st1x2 (ops[1].mode);
+      break;
+
+    case UNSPEC_ST1x3:
+      icode = code_for_aarch64_st1x3 (ops[1].mode);
+      break;
+
+    case UNSPEC_ST1x4:
+      icode = code_for_aarch64_st1x4 (ops[1].mode);
+      break;
+
+    case UNSPEC_ST2:
+    case UNSPEC_ST3:
+    case UNSPEC_ST4:
+      icode = code_for_aarch64_st (ops[1].mode, ops[1].mode);
+      break;
+
+    case UNSPEC_ST2_LANE:
+    case UNSPEC_ST3_LANE:
+    case UNSPEC_ST4_LANE:
+      aarch64_canonicalize_lane (&ops[2], ops[1].mode);
+      icode = code_for_aarch64_st_lane (ops[1].mode, ops[1].mode);
+      break;
+
+    case UNSPEC_TBL:
+    case UNSPEC_TBX:
+      return aarch64_expand_tbl_tbx (ops, builtin_data.unspec);
+
+    case UNSPEC_TRN:
+      return aarch64_expand_permute_pair (ops, UNSPEC_TRN1, UNSPEC_TRN2);
+
+    case UNSPEC_UZP:
+      return aarch64_expand_permute_pair (ops, UNSPEC_UZP1, UNSPEC_UZP2);
+
+    case UNSPEC_VCREATE:
+      return force_lowpart_subreg (ops[0].mode, ops[1].value, ops[1].mode);
+
+    case UNSPEC_VEC_COPY:
+      {
+	aarch64_convert_to_lane_mask (&ops[2], ops[1].mode);
+	aarch64_canonicalize_lane (&ops[4], ops[3].mode);
+	if (ops[1].mode == ops[3].mode)
+	  icode = code_for_aarch64_simd_vec_copy_lane (ops[1].mode);
+	else
+	  icode = code_for_aarch64_simd_vec_copy_lane (ops[1].mode,
+						       ops[1].mode);
+	break;
+      }
+
+    case UNSPEC_ZIP:
+      return aarch64_expand_permute_pair (ops, UNSPEC_ZIP1, UNSPEC_ZIP2);
+
+    default:
+      gcc_unreachable ();
+    }
+
+  expand_insn (icode, ops.length (), ops.address ());
+  return ops[0].value;
+}
+
 /* Expand an expression EXP as fpsr or fpcr setter (depending on
    UNSPEC) using MODE.  */
 static void
@@ -3211,6 +4229,48 @@ aarch64_expand_fpsr_fpcr_getter (enum insn_code icode, machine_mode mode,
   return op.value;
 }
 
+/* Expand GCS builtin EXP with code FCODE, putting the result
+   into TARGET.  If IGNORE is true the return value is ignored.  */
+
+rtx
+aarch64_expand_gcs_builtin (tree exp, rtx target, int fcode, int ignore)
+{
+  if (fcode == AARCH64_BUILTIN_GCSPR)
+    {
+      expand_operand op;
+      create_output_operand (&op, target, DImode);
+      expand_insn (CODE_FOR_aarch64_load_gcspr, 1, &op);
+      return op.value;
+    }
+  if (fcode == AARCH64_BUILTIN_GCSPOPM && ignore)
+    {
+      expand_insn (CODE_FOR_aarch64_gcspopm_xzr, 0, 0);
+      return target;
+    }
+  if (fcode == AARCH64_BUILTIN_GCSPOPM)
+    {
+      expand_operand ops[2];
+      create_output_operand (&ops[0], target, DImode);
+      create_input_operand (&ops[1], const0_rtx, DImode);
+      expand_insn (CODE_FOR_aarch64_gcspopm, 2, ops);
+      return gen_lowpart (ptr_mode, ops[0].value);
+    }
+  if (fcode == AARCH64_BUILTIN_GCSSS)
+    {
+      expand_operand opnd;
+      rtx arg = expand_normal (CALL_EXPR_ARG (exp, 0));
+      arg = convert_modes (DImode, ptr_mode, arg, true);
+      create_input_operand (&opnd, arg, DImode);
+      expand_insn (CODE_FOR_aarch64_gcsss1, 1, &opnd);
+      expand_operand ops[2];
+      create_output_operand (&ops[0], target, DImode);
+      create_input_operand (&ops[1], const0_rtx, DImode);
+      expand_insn (CODE_FOR_aarch64_gcsss2, 2, ops);
+      return gen_lowpart (ptr_mode, ops[0].value);
+    }
+  gcc_unreachable ();
+}
+
 /* Expand an expression EXP that calls built-in function FCODE,
    with result going to TARGET if that's convenient.  IGNORE is true
    if the result of the builtin is ignored.  */
@@ -3336,6 +4396,20 @@ aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target,
     case AARCH64_PLIX:
       aarch64_expand_prefetch_builtin (exp, fcode);
       return target;
+
+    case AARCH64_BUILTIN_CHKFEAT:
+      {
+	rtx x16_reg = gen_rtx_REG (DImode, R16_REGNUM);
+	op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
+	emit_move_insn (x16_reg, op0);
+	expand_insn (CODE_FOR_aarch64_chkfeat, 0, 0);
+	return copy_to_reg (x16_reg);
+      }
+
+    case AARCH64_BUILTIN_GCSPR:
+    case AARCH64_BUILTIN_GCSPOPM:
+    case AARCH64_BUILTIN_GCSSS:
+      return aarch64_expand_gcs_builtin (exp, target, fcode, ignore);
     }
 
   if (fcode >= AARCH64_SIMD_BUILTIN_BASE && fcode <= AARCH64_SIMD_BUILTIN_MAX)
@@ -3369,6 +4443,9 @@ aarch64_general_expand_builtin (unsigned int fcode, tree exp, rtx target,
       && fcode <= AARCH64_RBITLL)
     return aarch64_expand_builtin_data_intrinsic (fcode, exp, target);
 
+  if (auto builtin_data = aarch64_get_pragma_builtin (fcode))
+    return aarch64_expand_pragma_builtin (exp, target, *builtin_data);
+
   gcc_unreachable ();
 }
 
@@ -3569,12 +4646,370 @@ aarch64_record_vector_load_arg (tree addr)
   cfun->machine->vector_load_decls->add (decl);
 }
 
+/* Force VAL into a valid gimple value, creating a new SSA_NAME if
+   necessary.  Insert any new statements before GSI.  */
+static tree
+aarch64_force_gimple_val (gimple_stmt_iterator *gsi, tree val)
+{
+  if (is_gimple_val (val))
+    return val;
+
+  tree tmp = make_ssa_name (TREE_TYPE (val));
+  gsi_insert_before_without_update (gsi, gimple_build_assign (tmp, val),
+				    GSI_SAME_STMT);
+  return tmp;
+}
+
+/* Copy vops from FROM to TO and return TO.  */
+static gimple *
+aarch64_copy_vops (gimple *to, gimple *from)
+{
+  gimple_set_vuse (to, gimple_vuse (from));
+  gimple_set_vdef (to, gimple_vdef (from));
+  return to;
+}
+
+/* Fold STMT (at GSI) to VAL, with SEQ setting up the value of VAL.
+   Return the replacement statement.  */
+static gimple *
+aarch64_fold_to_val (gcall *stmt, gimple_stmt_iterator *gsi,
+		     gimple *seq, tree val)
+{
+  auto *assign = gimple_build_assign (gimple_call_lhs (stmt), val);
+  gimple_seq_add_stmt_without_update (&seq, assign);
+  gsi_replace_with_seq_vops (gsi, seq);
+  return assign;
+}
+
+/* Dereference pointer ADDR, giving a memory reference of type TYPE.  */
+static tree
+aarch64_dereference (tree addr, tree type)
+{
+  tree elt_type = (VECTOR_TYPE_P (type) ? TREE_TYPE (type) : type);
+  tree elt_ptr_type = build_pointer_type_for_mode (elt_type, VOIDmode, true);
+  tree zero = build_zero_cst (elt_ptr_type);
+  /* Use element type alignment.  */
+  tree access_type = build_aligned_type (type, TYPE_ALIGN (elt_type));
+  return fold_build2 (MEM_REF, access_type, addr, zero);
+}
+
+/* LANE is a lane index into VEC.  Return the associated bit index
+   (counting from the first byte in memory order).  */
+static tree
+aarch64_get_lane_bit_index (tree vec, tree lane)
+{
+  auto vec_mode = TYPE_MODE (TREE_TYPE (vec));
+  auto nunits = aarch64_num_lanes (vec_mode);
+  auto idx = ENDIAN_LANE_N (nunits, tree_to_uhwi (lane));
+  return bitsize_int (idx * GET_MODE_UNIT_BITSIZE (vec_mode));
+}
+
+/* LANE is a lane index into VEC.  Return a BIT_FIELD_REF for the
+   selected element.  */
+static tree
+aarch64_get_lane (tree vec, tree lane)
+{
+  auto elt_type = TREE_TYPE (TREE_TYPE (vec));
+  return fold_build3 (BIT_FIELD_REF, elt_type, vec, TYPE_SIZE (elt_type),
+		      aarch64_get_lane_bit_index (vec, lane));
+}
+
+/* LANE is a lane index into VEC.  Return a BIT_INSERT_EXPR that replaces
+   that index with ELT and stores the result in LHS.  */
+static gimple *
+aarch64_set_lane (tree lhs, tree elt, tree vec, tree lane)
+{
+  tree bit = aarch64_get_lane_bit_index (vec, lane);
+  return gimple_build_assign (lhs, BIT_INSERT_EXPR, vec, elt, bit);
+}
+
+/* Fold a call to vcombine.  */
+static gimple *
+aarch64_fold_combine (gcall *stmt)
+{
+  tree first_part, second_part;
+  if (BYTES_BIG_ENDIAN)
+    {
+      second_part = gimple_call_arg (stmt, 0);
+      first_part = gimple_call_arg (stmt, 1);
+    }
+  else
+    {
+      first_part = gimple_call_arg (stmt, 0);
+      second_part = gimple_call_arg (stmt, 1);
+    }
+  tree ret_type = gimple_call_return_type (stmt);
+  tree ctor = build_constructor_va (ret_type, 2, NULL_TREE, first_part,
+				    NULL_TREE, second_part);
+  return gimple_build_assign (gimple_call_lhs (stmt), ctor);
+}
+
+/* Fold a call to vaeseq_u8 and vaesdq_u8.
+   That is `vaeseq_u8 (x ^ y, 0)` gets folded
+   into `vaeseq_u8 (x, y)`.*/
+static gimple *
+aarch64_fold_aes_op (gcall *stmt)
+{
+  tree arg0 = gimple_call_arg (stmt, 0);
+  tree arg1 = gimple_call_arg (stmt, 1);
+  if (integer_zerop (arg0))
+    arg0 = arg1;
+  else if (!integer_zerop (arg1))
+    return nullptr;
+  if (TREE_CODE (arg0) != SSA_NAME)
+    return nullptr;
+  if (!has_single_use (arg0))
+    return nullptr;
+  auto *s = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (arg0));
+  if (!s || gimple_assign_rhs_code (s) != BIT_XOR_EXPR)
+    return nullptr;
+  gimple_call_set_arg (stmt, 0, gimple_assign_rhs1 (s));
+  gimple_call_set_arg (stmt, 1, gimple_assign_rhs2 (s));
+  return stmt;
+}
+
+/* Fold a call to vld1, given that it loads something of type TYPE.  */
+static gimple *
+aarch64_fold_load (gcall *stmt, tree type)
+{
+  /* Punt until after inlining, so that we stand more chance of
+     recording something meaningful in vector_load_decls.  */
+  if (!cfun->after_inlining)
+    return nullptr;
+  tree addr = gimple_call_arg (stmt, 0);
+  aarch64_record_vector_load_arg (addr);
+  if (!BYTES_BIG_ENDIAN)
+    {
+      tree mem = aarch64_dereference (addr, type);
+      auto *new_stmt = gimple_build_assign (gimple_get_lhs (stmt), mem);
+      return aarch64_copy_vops (new_stmt, stmt);
+    }
+  return nullptr;
+}
+
+/* Fold a call to vst1, given that it loads something of type TYPE.  */
+static gimple *
+aarch64_fold_store (gcall *stmt, tree type)
+{
+  tree addr = gimple_call_arg (stmt, 0);
+  tree data = gimple_call_arg (stmt, 1);
+  if (!BYTES_BIG_ENDIAN)
+    {
+      tree mem = aarch64_dereference (addr, type);
+      auto *new_stmt = gimple_build_assign (mem, data);
+      return aarch64_copy_vops (new_stmt, stmt);
+    }
+  return nullptr;
+}
+
+/* An aarch64_fold_permute callback for vext.  SELECTOR is the value of
+   the final argument.  */
+static unsigned int
+aarch64_ext_index (unsigned int, unsigned int selector, unsigned int i)
+{
+  return selector + i;
+}
+
+/* An aarch64_fold_permute callback for vrev.  SELECTOR is the number
+   of elements in each reversal group.  */
+static unsigned int
+aarch64_rev_index (unsigned int, unsigned int selector, unsigned int i)
+{
+  return ROUND_DOWN (i, selector) + (selector - 1) - (i % selector);
+}
+
+/* An aarch64_fold_permute callback for vtrn.  SELECTOR is 0 for TRN1
+   and 1 for TRN2.  */
+static unsigned int
+aarch64_trn_index (unsigned int nelts, unsigned int selector, unsigned int i)
+{
+  return (i % 2) * nelts + ROUND_DOWN (i, 2) + selector;
+}
+
+/* An aarch64_fold_permute callback for vuzp.  SELECTOR is 0 for UZP1
+   and 1 for UZP2.  */
+static unsigned int
+aarch64_uzp_index (unsigned int, unsigned int selector, unsigned int i)
+{
+  return i * 2 + selector;
+}
+
+/* An aarch64_fold_permute callback for vzip.  SELECTOR is 0 for ZIP1
+   and 1 for ZIP2.  */
+static unsigned int
+aarch64_zip_index (unsigned int nelts, unsigned int selector, unsigned int i)
+{
+  return (i % 2) * nelts + (i / 2) + selector * (nelts / 2);
+}
+
+/* Fold STMT to a VEC_PERM_EXPR on the first NINPUTS arguments.
+   Make the VEC_PERM_EXPR emulate an NINPUTS-input TBL in which
+   architectural lane I of the result selects architectural lane:
+
+     GET_INDEX (NELTS, SELECTOR, I)
+
+   of the input table.  NELTS is the number of elements in one vector.  */
+static gimple *
+aarch64_fold_permute (gcall *stmt, unsigned int ninputs,
+		      unsigned int (*get_index) (unsigned int, unsigned int,
+						 unsigned int),
+		      unsigned int selector)
+{
+  tree op0 = gimple_call_arg (stmt, 0);
+  tree op1 = ninputs == 2 ? gimple_call_arg (stmt, 1) : op0;
+  auto nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (op0)).to_constant ();
+  vec_perm_builder sel (nelts, nelts, 1);
+  for (unsigned int i = 0; i < nelts; ++i)
+    {
+      unsigned int index = get_index (nelts, selector,
+				      ENDIAN_LANE_N (nelts, i));
+      unsigned int vec = index / nelts;
+      unsigned int elt = ENDIAN_LANE_N (nelts, index % nelts);
+      sel.quick_push (vec * nelts + elt);
+    }
+
+  vec_perm_indices indices (sel, ninputs, nelts);
+  tree mask_type = build_vector_type (ssizetype, nelts);
+  tree mask = vec_perm_indices_to_tree (mask_type, indices);
+  return gimple_build_assign (gimple_call_lhs (stmt), VEC_PERM_EXPR,
+			      op0, op1, mask);
+}
+
+/* Try to fold STMT (at GSI), given that it is a call to the builtin
+   described by BUILTIN_DATA.  Return the new statement on success,
+   otherwise return null.  */
+static gimple *
+aarch64_gimple_fold_pragma_builtin
+  (gcall *stmt, gimple_stmt_iterator *gsi,
+   const aarch64_pragma_builtins_data &builtin_data)
+{
+  auto &types = builtin_data.types;
+
+  switch (builtin_data.unspec)
+    {
+    case UNSPEC_COMBINE:
+      return aarch64_fold_combine (stmt);
+
+    case UNSPEC_DUP:
+    case UNSPEC_DUP_LANE:
+      {
+	tree arg = gimple_call_arg (stmt, 0);
+	tree type = types[0].type ();
+	if (builtin_data.signature == aarch64_builtin_signatures::load)
+	  arg = aarch64_dereference (arg, TREE_TYPE (type));
+	else if (builtin_data.unspec == UNSPEC_DUP_LANE)
+	  arg = aarch64_get_lane (arg, gimple_call_arg (stmt, 1));
+	arg = aarch64_force_gimple_val (gsi, arg);
+
+	tree dup = build_vector_from_val (type, arg);
+	return aarch64_fold_to_val (stmt, gsi, nullptr, dup);
+      }
+
+    case UNSPEC_EXT:
+      {
+	auto index = tree_to_uhwi (gimple_call_arg (stmt, 2));
+	return aarch64_fold_permute (stmt, 2, aarch64_ext_index, index);
+      }
+
+    case UNSPEC_GET_LANE:
+      {
+	tree val = aarch64_get_lane (gimple_call_arg (stmt, 0),
+				     gimple_call_arg (stmt, 1));
+	return gimple_build_assign (gimple_call_lhs (stmt), val);
+      }
+
+    case UNSPEC_LD1:
+      return aarch64_fold_load (stmt, types[0].type ());
+
+    case UNSPEC_REV16:
+      {
+	auto selector = 16 / GET_MODE_UNIT_BITSIZE (types[0].mode);
+	return aarch64_fold_permute (stmt, 1, aarch64_rev_index, selector);
+      }
+
+    case UNSPEC_REV32:
+      {
+	auto selector = 32 / GET_MODE_UNIT_BITSIZE (types[0].mode);
+	return aarch64_fold_permute (stmt, 1, aarch64_rev_index, selector);
+      }
+
+    case UNSPEC_REV64:
+      {
+	auto selector = 64 / GET_MODE_UNIT_BITSIZE (types[0].mode);
+	return aarch64_fold_permute (stmt, 1, aarch64_rev_index, selector);
+      }
+
+    case UNSPEC_SET_LANE:
+      {
+	tree elt = gimple_call_arg (stmt, 0);
+	if (builtin_data.signature == aarch64_builtin_signatures::load_lane)
+	  {
+	    elt = aarch64_dereference (elt, TREE_TYPE (types[0].type ()));
+	    elt = aarch64_force_gimple_val (gsi, elt);
+	  }
+	return aarch64_set_lane (gimple_call_lhs (stmt), elt,
+				 gimple_call_arg (stmt, 1),
+				 gimple_call_arg (stmt, 2));
+      }
+
+    case UNSPEC_ST1:
+      return aarch64_fold_store (stmt, types[1].type ());
+
+    case UNSPEC_ST1_LANE:
+      {
+	tree val = aarch64_get_lane (gimple_call_arg (stmt, 1),
+				     gimple_call_arg (stmt, 2));
+	tree mem = aarch64_dereference (gimple_call_arg (stmt, 0),
+					TREE_TYPE (types[0].type ()));
+	val = aarch64_force_gimple_val (gsi, val);
+	return aarch64_copy_vops (gimple_build_assign (mem, val), stmt);
+      }
+
+    case UNSPEC_TRN1:
+      return aarch64_fold_permute (stmt, 2, aarch64_trn_index, 0);
+
+    case UNSPEC_TRN2:
+      return aarch64_fold_permute (stmt, 2, aarch64_trn_index, 1);
+
+    case UNSPEC_UZP1:
+      return aarch64_fold_permute (stmt, 2, aarch64_uzp_index, 0);
+
+    case UNSPEC_UZP2:
+      return aarch64_fold_permute (stmt, 2, aarch64_uzp_index, 1);
+
+    case UNSPEC_VCREATE:
+      return gimple_build_assign (gimple_call_lhs (stmt),
+				  fold_build1 (VIEW_CONVERT_EXPR,
+					       types[0].type (),
+					       gimple_call_arg (stmt, 0)));
+
+    case UNSPEC_VEC_COPY:
+      {
+	tree elt = aarch64_get_lane (gimple_call_arg (stmt, 2),
+				     gimple_call_arg (stmt, 3));
+	elt = aarch64_force_gimple_val (gsi, elt);
+	return aarch64_set_lane (gimple_call_lhs (stmt), elt,
+				 gimple_call_arg (stmt, 0),
+				 gimple_call_arg (stmt, 1));
+      }
+
+    case UNSPEC_ZIP1:
+      return aarch64_fold_permute (stmt, 2, aarch64_zip_index, 0);
+
+    case UNSPEC_ZIP2:
+      return aarch64_fold_permute (stmt, 2, aarch64_zip_index, 1);
+
+    default:
+      return nullptr;
+    }
+}
+
 /* Try to fold STMT, given that it's a call to the built-in function with
    subcode FCODE.  Return the new statement on success and null on
    failure.  */
 gimple *
 aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt,
-				     gimple_stmt_iterator *gsi ATTRIBUTE_UNUSED)
+				     gimple_stmt_iterator *gsi)
 {
   gimple *new_stmt = NULL;
   unsigned nargs = gimple_call_num_args (stmt);
@@ -3588,12 +5023,17 @@ aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt,
   switch (fcode)
     {
       BUILTIN_VALL (UNOP, reduc_plus_scal_, 10, ALL)
-      BUILTIN_VDQ_I (UNOPU, reduc_plus_scal_, 10, NONE)
+      BUILTIN_VDQ_I (UNOPU, reduc_plus_scal_, 10, DEFAULT)
 	new_stmt = gimple_build_call_internal (IFN_REDUC_PLUS,
 					       1, args[0]);
 	gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
 	break;
 
+      VAR1 (BINOPU, crypto_aese, 0, DEFAULT, v16qi)
+      VAR1 (BINOPU, crypto_aesd, 0, DEFAULT, v16qi)
+	new_stmt = aarch64_fold_aes_op (stmt);
+	break;
+
       /* Lower sqrt builtins to gimple/internal function sqrt. */
       BUILTIN_VHSDF_DF (UNOP, sqrt, 2, FP)
 	new_stmt = gimple_build_call_internal (IFN_SQRT,
@@ -3601,84 +5041,36 @@ aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt,
 	gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
 	break;
 
-     BUILTIN_VDC (BINOP, combine, 0, AUTO_FP)
-     BUILTIN_VD_I (BINOPU, combine, 0, NONE)
-     BUILTIN_VDC_P (BINOPP, combine, 0, NONE)
-	{
-	  tree first_part, second_part;
-	  if (BYTES_BIG_ENDIAN)
-	    {
-	      second_part = args[0];
-	      first_part = args[1];
-	    }
-	  else
-	    {
-	      first_part = args[0];
-	      second_part = args[1];
-	    }
-	  tree ret_type = gimple_call_return_type (stmt);
-	  tree ctor = build_constructor_va (ret_type, 2, NULL_TREE, first_part,
-					    NULL_TREE, second_part);
-	  new_stmt = gimple_build_assign (gimple_call_lhs (stmt), ctor);
-	}
-	break;
+     BUILTIN_VDC (BINOP, combine, 0, QUIET)
+     BUILTIN_VD_I (BINOPU, combine, 0, DEFAULT)
+     BUILTIN_VDC_P (BINOPP, combine, 0, DEFAULT)
+       new_stmt = aarch64_fold_combine (stmt);
+       break;
 
      /*lower store and load neon builtins to gimple.  */
      BUILTIN_VALL_F16 (LOAD1, ld1, 0, LOAD)
      BUILTIN_VDQ_I (LOAD1_U, ld1, 0, LOAD)
      BUILTIN_VALLP_NO_DI (LOAD1_P, ld1, 0, LOAD)
-	/* Punt until after inlining, so that we stand more chance of
-	   recording something meaningful in vector_load_decls.  */
-	if (!cfun->after_inlining)
-	  break;
-	aarch64_record_vector_load_arg (args[0]);
-	if (!BYTES_BIG_ENDIAN)
-	  {
-	    enum aarch64_simd_type mem_type
-	      = get_mem_type_for_load_store(fcode);
-	    aarch64_simd_type_info simd_type
-	      = aarch64_simd_types[mem_type];
-	    tree elt_ptr_type = build_pointer_type_for_mode (simd_type.eltype,
-							     VOIDmode, true);
-	    tree zero = build_zero_cst (elt_ptr_type);
-	    /* Use element type alignment.  */
-	    tree access_type
-	      = build_aligned_type (simd_type.itype,
-				    TYPE_ALIGN (simd_type.eltype));
-	    new_stmt
-	      = gimple_build_assign (gimple_get_lhs (stmt),
-				     fold_build2 (MEM_REF,
-						  access_type,
-						  args[0], zero));
-	    gimple_set_vuse (new_stmt, gimple_vuse (stmt));
-	    gimple_set_vdef (new_stmt, gimple_vdef (stmt));
-	  }
-	break;
+       {
+	 enum aarch64_simd_type mem_type
+	   = get_mem_type_for_load_store (fcode);
+	 aarch64_simd_type_info_trees simd_type
+	   = aarch64_simd_types_trees[mem_type];
+	 new_stmt = aarch64_fold_load (stmt, simd_type.itype);
+	 break;
+       }
 
       BUILTIN_VALL_F16 (STORE1, st1, 0, STORE)
       BUILTIN_VDQ_I (STORE1_U, st1, 0, STORE)
       BUILTIN_VALLP_NO_DI (STORE1_P, st1, 0, STORE)
-	if (!BYTES_BIG_ENDIAN)
-	  {
-	    enum aarch64_simd_type mem_type
-	      = get_mem_type_for_load_store(fcode);
-	    aarch64_simd_type_info simd_type
-	      = aarch64_simd_types[mem_type];
-	    tree elt_ptr_type = build_pointer_type_for_mode (simd_type.eltype,
-							     VOIDmode, true);
-	    tree zero = build_zero_cst (elt_ptr_type);
-	    /* Use element type alignment.  */
-	    tree access_type
-	      = build_aligned_type (simd_type.itype,
-				    TYPE_ALIGN (simd_type.eltype));
-	    new_stmt
-	      = gimple_build_assign (fold_build2 (MEM_REF, access_type,
-						  args[0], zero),
-				     args[1]);
-	    gimple_set_vuse (new_stmt, gimple_vuse (stmt));
-	    gimple_set_vdef (new_stmt, gimple_vdef (stmt));
-	  }
-	break;
+	{
+	  enum aarch64_simd_type mem_type
+	    = get_mem_type_for_load_store (fcode);
+	  aarch64_simd_type_info_trees simd_type
+	    = aarch64_simd_types_trees[mem_type];
+	  new_stmt = aarch64_fold_store (stmt, simd_type.itype);
+	  break;
+	}
 
       BUILTIN_VDQIF (UNOP, reduc_smax_scal_, 10, ALL)
       BUILTIN_VDQ_BHSI (UNOPU, reduc_umax_scal_, 10, ALL)
@@ -3692,14 +5084,26 @@ aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt,
 					       1, args[0]);
 	gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
 	break;
-      BUILTIN_VSDQ_I_DI (BINOP, ashl, 3, NONE)
+      BUILTIN_VSDQ_I_DI (BINOP, ashl, 3, DEFAULT)
 	if (TREE_CODE (args[1]) == INTEGER_CST
 	    && wi::ltu_p (wi::to_wide (args[1]), element_precision (args[0])))
 	  new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
 					  LSHIFT_EXPR, args[0], args[1]);
 	break;
-      BUILTIN_VSDQ_I_DI (BINOP, sshl, 0, NONE)
-      BUILTIN_VSDQ_I_DI (BINOP_UUS, ushl, 0, NONE)
+      /* lower saturating add/sub neon builtins to gimple.  */
+      BUILTIN_VSDQ_I (BINOP, ssadd, 3, DEFAULT)
+      BUILTIN_VSDQ_I (BINOPU, usadd, 3, DEFAULT)
+	new_stmt = gimple_build_call_internal (IFN_SAT_ADD, 2, args[0], args[1]);
+	gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
+	break;
+      BUILTIN_VSDQ_I (BINOP, sssub, 3, DEFAULT)
+      BUILTIN_VSDQ_I (BINOPU, ussub, 3, DEFAULT)
+	new_stmt = gimple_build_call_internal (IFN_SAT_SUB, 2, args[0], args[1]);
+	gimple_call_set_lhs (new_stmt, gimple_call_lhs (stmt));
+	break;
+
+      BUILTIN_VSDQ_I_DI (BINOP, sshl, 0, DEFAULT)
+      BUILTIN_VSDQ_I_DI (BINOP_UUS, ushl, 0, DEFAULT)
 	{
 	  tree cst = args[1];
 	  tree ctype = TREE_TYPE (cst);
@@ -3731,10 +5135,10 @@ aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt,
 	    }
 	}
 	break;
-      BUILTIN_VDQ_I (SHIFTIMM, ashr, 3, NONE)
-      VAR1 (SHIFTIMM, ashr_simd, 0, NONE, di)
-      BUILTIN_VDQ_I (USHIFTIMM, lshr, 3, NONE)
-      VAR1 (USHIFTIMM, lshr_simd, 0, NONE, di)
+      BUILTIN_VDQ_I (SHIFTIMM, ashr, 3, DEFAULT)
+      VAR1 (SHIFTIMM, ashr_simd, 0, DEFAULT, di)
+      BUILTIN_VDQ_I (USHIFTIMM, lshr, 3, DEFAULT)
+      VAR1 (USHIFTIMM, lshr_simd, 0, DEFAULT, di)
 	if (TREE_CODE (args[1]) == INTEGER_CST
 	    && wi::ltu_p (wi::to_wide (args[1]), element_precision (args[0])))
 	  new_stmt = gimple_build_assign (gimple_call_lhs (stmt),
@@ -3795,6 +5199,9 @@ aarch64_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt,
 	}
       break;
     default:
+      if (auto builtin_data = aarch64_get_pragma_builtin (fcode))
+	new_stmt = aarch64_gimple_fold_pragma_builtin (stmt, gsi,
+						       *builtin_data);
       break;
     }
 
diff --git a/gcc/config/aarch64/aarch64-builtins.h b/gcc/config/aarch64/aarch64-builtins.h
index e326fe6..b96081a 100644
--- a/gcc/config/aarch64/aarch64-builtins.h
+++ b/gcc/config/aarch64/aarch64-builtins.h
@@ -1,5 +1,5 @@
 /* Builtins' description for AArch64 SIMD architecture.
-   Copyright (C) 2023-2024 Free Software Foundation, Inc.
+   Copyright (C) 2023-2025 Free Software Foundation, Inc.
    This file is part of GCC.
 
    GCC is free software; you can redistribute it and/or modify it
@@ -28,6 +28,8 @@ enum aarch64_type_qualifiers
   qualifier_const = 0x2, /* 1 << 1  */
   /* T *foo.  */
   qualifier_pointer = 0x4, /* 1 << 2  */
+  /* const T *foo.  */
+  qualifier_const_pointer = 0x6,
   /* Used when expanding arguments if an operand could
      be an immediate.  */
   qualifier_immediate = 0x8, /* 1 << 3  */
@@ -54,6 +56,8 @@ enum aarch64_type_qualifiers
   /* Lane indices selected in quadtuplets. - must be in range, and flipped for
      bigendian.  */
   qualifier_lane_quadtup_index = 0x1000,
+  /* Modal FP types.  */
+  qualifier_modal_float = 0x2000,
 };
 
 #define ENTRY(E, M, Q, G) E,
@@ -64,7 +68,7 @@ enum aarch64_simd_type
 };
 #undef ENTRY
 
-struct GTY(()) aarch64_simd_type_info
+struct aarch64_simd_type_info
 {
   enum aarch64_simd_type type;
 
@@ -81,12 +85,6 @@ struct GTY(()) aarch64_simd_type_info
      will get default mangled names.  */
   const char *mangle;
 
-  /* Internal type.  */
-  tree itype;
-
-  /* Element type.  */
-  tree eltype;
-
   /* Machine mode the internal type maps to.  */
   enum machine_mode mode;
 
@@ -94,6 +92,23 @@ struct GTY(()) aarch64_simd_type_info
   enum aarch64_type_qualifiers q;
 };
 
-extern aarch64_simd_type_info aarch64_simd_types[];
+/* This is in a different structure than aarch64_simd_type_info because we do
+   not want to reset the static members of aarch64_simd_type_info to their
+   default value.  We only want the tree types to be GC-ed.
+   This is necessary for libgccjit which can run multiple times in the same
+   process.  If the static values were GC-ed, the second run would ICE/segfault
+   because of their invalid value.
+ */
+struct GTY(()) aarch64_simd_type_info_trees
+{
+  /* Internal type.  */
+  tree itype;
+
+  /* Element type.  */
+  tree eltype;
+};
+
+extern const aarch64_simd_type_info aarch64_simd_types[];
+extern aarch64_simd_type_info_trees aarch64_simd_types_trees[];
 
-#endif
-\ No newline at end of file
+#endif
diff --git a/gcc/config/aarch64/aarch64-c.cc b/gcc/config/aarch64/aarch64-c.cc
index f9b9e37..98337b7 100644
--- a/gcc/config/aarch64/aarch64-c.cc
+++ b/gcc/config/aarch64/aarch64-c.cc
@@ -1,5 +1,5 @@
 /* Target-specific code for C family languages.
-   Copyright (C) 2015-2024 Free Software Foundation, Inc.
+   Copyright (C) 2015-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -193,27 +193,35 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
   aarch64_def_or_undef (TARGET_SIMD_RDMA, "__ARM_FEATURE_QRDMX", pfile);
   aarch64_def_or_undef (TARGET_SVE, "__ARM_FEATURE_SVE", pfile);
   cpp_undef (pfile, "__ARM_FEATURE_SVE_BITS");
+  cpp_undef (pfile, "__ARM_FEATURE_SVE_VECTOR_OPERATORS");
   if (TARGET_SVE)
     {
       int bits;
+      int ops = 1;
       if (!BITS_PER_SVE_VECTOR.is_constant (&bits))
-	bits = 0;
+	{
+	  bits = 0;
+	  ops = 2;
+	}
       builtin_define_with_int_value ("__ARM_FEATURE_SVE_BITS", bits);
+      builtin_define_with_int_value ("__ARM_FEATURE_SVE_VECTOR_OPERATORS", ops);
     }
-  aarch64_def_or_undef (TARGET_SVE, "__ARM_FEATURE_SVE_VECTOR_OPERATORS",
-			pfile);
   aarch64_def_or_undef (TARGET_SVE_I8MM,
 			"__ARM_FEATURE_SVE_MATMUL_INT8", pfile);
   aarch64_def_or_undef (TARGET_SVE_F32MM,
 			"__ARM_FEATURE_SVE_MATMUL_FP32", pfile);
   aarch64_def_or_undef (TARGET_SVE_F64MM,
 			"__ARM_FEATURE_SVE_MATMUL_FP64", pfile);
+  aarch64_def_or_undef (AARCH64_HAVE_ISA (SVE_B16B16)
+			&& (TARGET_SVE2 || TARGET_SME2),
+			"__ARM_FEATURE_SVE_B16B16", pfile);
   aarch64_def_or_undef (TARGET_SVE2, "__ARM_FEATURE_SVE2", pfile);
   aarch64_def_or_undef (TARGET_SVE2_AES, "__ARM_FEATURE_SVE2_AES", pfile);
   aarch64_def_or_undef (TARGET_SVE2_BITPERM,
 			"__ARM_FEATURE_SVE2_BITPERM", pfile);
   aarch64_def_or_undef (TARGET_SVE2_SHA3, "__ARM_FEATURE_SVE2_SHA3", pfile);
   aarch64_def_or_undef (TARGET_SVE2_SM4, "__ARM_FEATURE_SVE2_SM4", pfile);
+  aarch64_def_or_undef (TARGET_SVE2p1, "__ARM_FEATURE_SVE2p1", pfile);
 
   aarch64_def_or_undef (TARGET_LSE, "__ARM_FEATURE_ATOMICS", pfile);
   aarch64_def_or_undef (TARGET_AES, "__ARM_FEATURE_AES", pfile);
@@ -247,6 +255,9 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
 
   aarch64_def_or_undef (TARGET_PAUTH, "__ARM_FEATURE_PAUTH", pfile);
   aarch64_def_or_undef (TARGET_BTI, "__ARM_FEATURE_BTI", pfile);
+  aarch64_def_or_undef (aarch64_gcs_enabled (),
+			"__ARM_FEATURE_GCS_DEFAULT", pfile);
+  aarch64_def_or_undef (TARGET_GCS, "__ARM_FEATURE_GCS", pfile);
   aarch64_def_or_undef (TARGET_I8MM, "__ARM_FEATURE_MATMUL_INT8", pfile);
   aarch64_def_or_undef (TARGET_BF16_SIMD,
 			"__ARM_FEATURE_BF16_VECTOR_ARITHMETIC", pfile);
@@ -257,6 +268,16 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
   aarch64_def_or_undef (TARGET_SVE_BF16,
 			"__ARM_FEATURE_SVE_BF16", pfile);
 
+  aarch64_def_or_undef (TARGET_LUT, "__ARM_FEATURE_LUT", pfile);
+
+  aarch64_def_or_undef (TARGET_FP8, "__ARM_FEATURE_FP8", pfile);
+
+  aarch64_def_or_undef (TARGET_FP8DOT2, "__ARM_FEATURE_FP8DOT2", pfile);
+
+  aarch64_def_or_undef (TARGET_FP8DOT4, "__ARM_FEATURE_FP8DOT4", pfile);
+
+  aarch64_def_or_undef (TARGET_FP8FMA, "__ARM_FEATURE_FP8FMA", pfile);
+
   aarch64_def_or_undef (TARGET_LS64,
 			"__ARM_FEATURE_LS64", pfile);
   aarch64_def_or_undef (TARGET_RCPC, "__ARM_FEATURE_RCPC", pfile);
@@ -264,8 +285,15 @@ aarch64_update_cpp_builtins (cpp_reader *pfile)
 
   aarch64_def_or_undef (TARGET_SME, "__ARM_FEATURE_SME", pfile);
   aarch64_def_or_undef (TARGET_SME_I16I64, "__ARM_FEATURE_SME_I16I64", pfile);
+  aarch64_def_or_undef (AARCH64_HAVE_ISA (SME_B16B16),
+			"__ARM_FEATURE_SME_B16B16", pfile);
+  aarch64_def_or_undef (AARCH64_HAVE_ISA (SME_F16F16),
+			"__ARM_FEATURE_SME_F16F16", pfile);
   aarch64_def_or_undef (TARGET_SME_F64F64, "__ARM_FEATURE_SME_F64F64", pfile);
   aarch64_def_or_undef (TARGET_SME2, "__ARM_FEATURE_SME2", pfile);
+  aarch64_def_or_undef (AARCH64_HAVE_ISA (SME2p1),
+			"__ARM_FEATURE_SME2p1", pfile);
+  aarch64_def_or_undef (TARGET_FAMINMAX, "__ARM_FEATURE_FAMINMAX", pfile);
 
   /* Not for ACLE, but required to keep "float.h" correct if we switch
      target between implementations that do or do not support ARMv8.2-A
@@ -365,11 +393,10 @@ aarch64_pragma_aarch64 (cpp_reader *)
 
 /* Implement TARGET_RESOLVE_OVERLOADED_BUILTIN.  */
 static tree
-aarch64_resolve_overloaded_builtin (unsigned int uncast_location,
-				    tree fndecl, void *uncast_arglist)
+aarch64_resolve_overloaded_builtin (location_t location,
+				    tree fndecl, void *uncast_arglist, bool)
 {
   vec<tree, va_gc> empty = {};
-  location_t location = (location_t) uncast_location;
   vec<tree, va_gc> *arglist = (uncast_arglist
 			       ? (vec<tree, va_gc> *) uncast_arglist
 			       : &empty);
@@ -395,8 +422,8 @@ aarch64_resolve_overloaded_builtin (unsigned int uncast_location,
 /* Implement TARGET_CHECK_BUILTIN_CALL.  */
 static bool
 aarch64_check_builtin_call (location_t loc, vec<location_t> arg_loc,
-			    tree fndecl, tree orig_fndecl,
-			    unsigned int nargs, tree *args)
+			    tree fndecl, tree orig_fndecl, unsigned int nargs,
+			    tree *args, bool)
 {
   unsigned int code = DECL_MD_FUNCTION_CODE (fndecl);
   unsigned int subcode = code >> AARCH64_BUILTIN_SHIFT;
diff --git a/gcc/config/aarch64/aarch64-cc-fusion.cc b/gcc/config/aarch64/aarch64-cc-fusion.cc
index 3af8c00..cea54de 100644
--- a/gcc/config/aarch64/aarch64-cc-fusion.cc
+++ b/gcc/config/aarch64/aarch64-cc-fusion.cc
@@ -1,5 +1,5 @@
 // Pass to fuse CC operations with other instructions.
-// Copyright (C) 2021-2024 Free Software Foundation, Inc.
+// Copyright (C) 2021-2025 Free Software Foundation, Inc.
 //
 // This file is part of GCC.
 //
diff --git a/gcc/config/aarch64/aarch64-coff.h b/gcc/config/aarch64/aarch64-coff.h
index 81fd995..7260726 100644
--- a/gcc/config/aarch64/aarch64-coff.h
+++ b/gcc/config/aarch64/aarch64-coff.h
@@ -1,5 +1,5 @@
 /* Machine description for AArch64 architecture.
-   Copyright (C) 2024 Free Software Foundation, Inc.
+   Copyright (C) 2024-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -20,9 +20,8 @@
 #ifndef GCC_AARCH64_COFF_H
 #define GCC_AARCH64_COFF_H
 
-#ifndef LOCAL_LABEL_PREFIX
-# define LOCAL_LABEL_PREFIX 	""
-#endif
+#undef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX  "."
 
 /* Using long long breaks -ansi and -std=c90, so these will need to be
    made conditional for an LLP64 ABI.  */
@@ -54,19 +53,10 @@
     }
 #endif
 
-/* Output a local common block.  /bin/as can't do this, so hack a
-   `.space' into the bss segment.  Note that this is *bad* practice,
-   which is guaranteed NOT to work since it doesn't define STATIC
-   COMMON space but merely STATIC BSS space.  */
-#ifndef ASM_OUTPUT_ALIGNED_LOCAL
-# define ASM_OUTPUT_ALIGNED_LOCAL(STREAM, NAME, SIZE, ALIGN)		\
-    {									\
-      switch_to_section (bss_section);					\
-      ASM_OUTPUT_ALIGN (STREAM, floor_log2 (ALIGN / BITS_PER_UNIT));	\
-      ASM_OUTPUT_LABEL (STREAM, NAME);					\
-      fprintf (STREAM, "\t.space\t%d\n", (int)(SIZE));			\
-    }
-#endif
+#define ASM_OUTPUT_LOCAL(FILE, NAME, SIZE, ROUNDED)  \
+( fputs (".lcomm ", (FILE)),			\
+  assemble_name ((FILE), (NAME)),		\
+  fprintf ((FILE), "," HOST_WIDE_INT_PRINT_UNSIGNED "\n", (ROUNDED)))
 
 #define ASM_OUTPUT_SKIP(STREAM, NBYTES) 	\
   fprintf (STREAM, "\t.space\t%d  // skip\n", (int) (NBYTES))
diff --git a/gcc/config/aarch64/aarch64-cores.def b/gcc/config/aarch64/aarch64-cores.def
index cc22600..8040409 100644
--- a/gcc/config/aarch64/aarch64-cores.def
+++ b/gcc/config/aarch64/aarch64-cores.def
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2011-2025 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
@@ -83,14 +83,14 @@ AARCH64_CORE("emag",        emag,      xgene1,    V8A,  (CRC, CRYPTO), emag, 0x5
 AARCH64_CORE("xgene1",      xgene1,    xgene1,    V8A,  (), xgene1, 0x50, 0x000, -1)
 
 /* Qualcomm ('Q') cores. */
-AARCH64_CORE("falkor",      falkor,    falkor,    V8A,  (CRC, CRYPTO, RDMA), qdf24xx,   0x51, 0xC00, -1)
-AARCH64_CORE("qdf24xx",     qdf24xx,   falkor,    V8A,  (CRC, CRYPTO, RDMA), qdf24xx,   0x51, 0xC00, -1)
+AARCH64_CORE("falkor",      falkor,    cortexa57,    V8A,  (CRC, CRYPTO, RDMA), qdf24xx,   0x51, 0xC00, -1)
+AARCH64_CORE("qdf24xx",     qdf24xx,   cortexa57,    V8A,  (CRC, CRYPTO, RDMA), qdf24xx,   0x51, 0xC00, -1)
 
 /* Samsung ('S') cores. */
 AARCH64_CORE("exynos-m1",   exynosm1,  exynosm1,  V8A,  (CRC, CRYPTO), exynosm1,  0x53, 0x001, -1)
 
 /* HXT ('h') cores. */
-AARCH64_CORE("phecda",      phecda,    falkor,    V8A,  (CRC, CRYPTO), qdf24xx,   0x68, 0x000, -1)
+AARCH64_CORE("phecda",      phecda,    cortexa57,    V8A,  (CRC, CRYPTO), qdf24xx,   0x68, 0x000, -1)
 
 /* ARMv8.1-A Architecture Processors.  */
 
@@ -132,6 +132,7 @@ AARCH64_CORE("octeontx2f95mm", octeontx2f95mm, cortexa57, V8_2A,  (CRYPTO, PROFI
 
 /* Fujitsu ('F') cores. */
 AARCH64_CORE("a64fx", a64fx, a64fx, V8_2A,  (F16, SVE), a64fx, 0x46, 0x001, -1)
+AARCH64_CORE("fujitsu-monaka", fujitsu_monaka, cortexa57, V9_3A, (F16, FAMINMAX, FP8FMA, FP8DOT2, FP8DOT4, LS64, LUT, RNG, CRYPTO, SVE2_AES, SVE2_BITPERM, SVE2_SHA3, SVE2_SM4), fujitsu_monaka, 0x46, 0x003, -1)
 
 /* HiSilicon ('H') cores. */
 AARCH64_CORE("tsv110",  tsv110, tsv110, V8_2A,  (CRYPTO, F16), tsv110,   0x48, 0xd01, -1)
@@ -149,7 +150,7 @@ AARCH64_CORE("zeus", zeus, cortexa57, V8_4A,  (SVE, I8MM, BF16, PROFILE, SSBS, R
 AARCH64_CORE("neoverse-512tvb", neoverse512tvb, cortexa57, V8_4A,  (SVE, I8MM, BF16, PROFILE, SSBS, RNG), neoverse512tvb, INVALID_IMP, INVALID_CORE, -1)
 
 /* Qualcomm ('Q') cores. */
-AARCH64_CORE("saphira",     saphira,    saphira,    V8_4A,  (CRYPTO), saphira,   0x51, 0xC01, -1)
+AARCH64_CORE("saphira",     saphira,    cortexa57,    V8_4A,  (CRYPTO), saphira,   0x51, 0xC01, -1)
 
 /* ARMv8.6-A Architecture Processors.  */
 
@@ -170,6 +171,23 @@ AARCH64_CORE("cortex-a76.cortex-a55",  cortexa76cortexa55, cortexa53, V8_2A,  (F
 
 /* Armv8-R Architecture Processors.  */
 AARCH64_CORE("cortex-r82", cortexr82, cortexa53, V8R, (), cortexa53, 0x41, 0xd15, -1)
+AARCH64_CORE("cortex-r82ae", cortexr82ae, cortexa53, V8R, (), cortexa53, 0x41, 0xd14, -1)
+
+/* Apple (A12 and M) cores.
+   Known part numbers as listed in other public sources.
+   Placeholders for schedulers, generic_armv8_a for costs.
+   A12 seems mostly 8.3, M1 is 8.5 without BTI, M2 and M3 are 8.6
+   From measurements made so far the odd-number core IDs are performance.  */
+AARCH64_CORE("apple-a12", applea12, cortexa53, V8_3A,  (), generic_armv8_a, 0x61, 0x12, -1)
+AARCH64_CORE("apple-m1", applem1_0, cortexa57, V8_5A,  (), generic_armv8_a, 0x61, AARCH64_BIG_LITTLE (0x21, 0x20), -1)
+AARCH64_CORE("apple-m1", applem1_1, cortexa57, V8_5A,  (), generic_armv8_a, 0x61, AARCH64_BIG_LITTLE (0x23, 0x22), -1)
+AARCH64_CORE("apple-m1", applem1_2, cortexa57, V8_5A,  (), generic_armv8_a, 0x61, AARCH64_BIG_LITTLE (0x25, 0x24), -1)
+AARCH64_CORE("apple-m1", applem1_3, cortexa57, V8_5A,  (), generic_armv8_a, 0x61, AARCH64_BIG_LITTLE (0x29, 0x28), -1)
+AARCH64_CORE("apple-m2", applem2_0, cortexa57, V8_6A,  (), generic_armv8_a, 0x61, AARCH64_BIG_LITTLE (0x31, 0x30), -1)
+AARCH64_CORE("apple-m2", applem2_1, cortexa57, V8_6A,  (), generic_armv8_a, 0x61, AARCH64_BIG_LITTLE (0x33, 0x32), -1)
+AARCH64_CORE("apple-m2", applem2_2, cortexa57, V8_6A,  (), generic_armv8_a, 0x61, AARCH64_BIG_LITTLE (0x35, 0x34), -1)
+AARCH64_CORE("apple-m2", applem2_3, cortexa57, V8_6A,  (), generic_armv8_a, 0x61, AARCH64_BIG_LITTLE (0x39, 0x38), -1)
+AARCH64_CORE("apple-m3", applem3_0, cortexa57, V8_6A,  (), generic_armv8_a, 0x61, AARCH64_BIG_LITTLE (0x49, 0x48), -1)
 
 /* Armv9.0-A Architecture Processors.  */
 
@@ -177,19 +195,21 @@ AARCH64_CORE("cortex-r82", cortexr82, cortexa53, V8R, (), cortexa53, 0x41, 0xd15
 AARCH64_CORE("cortex-a510",  cortexa510, cortexa53, V9A,  (SVE2_BITPERM, MEMTAG, I8MM, BF16), cortexa53, 0x41, 0xd46, -1)
 
 AARCH64_CORE("cortex-a520",  cortexa520, cortexa53, V9_2A,  (SVE2_BITPERM, MEMTAG), cortexa53, 0x41, 0xd80, -1)
+AARCH64_CORE("cortex-a520ae",  cortexa520ae, cortexa53, V9_2A,  (SVE2_BITPERM, MEMTAG), cortexa53, 0x41, 0xd88, -1)
 
 AARCH64_CORE("cortex-a710",  cortexa710, cortexa57, V9A,  (SVE2_BITPERM, MEMTAG, I8MM, BF16), neoversen2, 0x41, 0xd47, -1)
 
 AARCH64_CORE("cortex-a715",  cortexa715, cortexa57, V9A,  (SVE2_BITPERM, MEMTAG, I8MM, BF16), neoversen2, 0x41, 0xd4d, -1)
 
 AARCH64_CORE("cortex-a720",  cortexa720, cortexa57, V9_2A,  (SVE2_BITPERM, MEMTAG, PROFILE), neoversen2, 0x41, 0xd81, -1)
+AARCH64_CORE("cortex-a720ae",  cortexa720ae, cortexa57, V9_2A,  (SVE2_BITPERM, MEMTAG, PROFILE), neoversen2, 0x41, 0xd89, -1)
 AARCH64_CORE("cortex-a725",  cortexa725, cortexa57, V9_2A, (SVE2_BITPERM, MEMTAG, PROFILE), neoversen3, 0x41, 0xd87, -1)
 
 AARCH64_CORE("cortex-x2",  cortexx2, cortexa57, V9A,  (SVE2_BITPERM, MEMTAG, I8MM, BF16), neoversen2, 0x41, 0xd48, -1)
 
 AARCH64_CORE("cortex-x3",  cortexx3, cortexa57, V9A,  (SVE2_BITPERM, MEMTAG, I8MM, BF16), neoversev2, 0x41, 0xd4e, -1)
 
-AARCH64_CORE("cortex-x4",  cortexx4, cortexa57, V9_2A,  (SVE2_BITPERM, MEMTAG, PROFILE), neoversev3, 0x41, 0xd81, -1)
+AARCH64_CORE("cortex-x4",  cortexx4, cortexa57, V9_2A,  (SVE2_BITPERM, MEMTAG, PROFILE), neoversev3, 0x41, 0xd82, -1)
 AARCH64_CORE("cortex-x925", cortexx925, cortexa57, V9_2A,  (SVE2_BITPERM, MEMTAG, PROFILE), cortexx925, 0x41, 0xd85, -1)
 
 AARCH64_CORE("neoverse-n2", neoversen2, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversen2, 0x41, 0xd49, -1)
@@ -203,6 +223,12 @@ AARCH64_CORE("neoverse-v3ae", neoversev3ae, cortexa57, V9_2A, (SVE2_BITPERM, RNG
 
 AARCH64_CORE("demeter", demeter, cortexa57, V9A, (I8MM, BF16, SVE2_BITPERM, RNG, MEMTAG, PROFILE), neoversev2, 0x41, 0xd4f, -1)
 
+/* NVIDIA ('N') cores. */
+AARCH64_CORE("olympus", olympus, cortexa57, V9_2A, (SVE2_BITPERM, RNG, LS64, MEMTAG, PROFILE, FAMINMAX, FP8FMA, FP8DOT2, FP8DOT4, LUT, SVE2_AES, SVE2_SHA3, SVE2_SM4), neoversev3, 0x4e, 0x10, -1)
+
+/* Armv9-A big.LITTLE processors.  */
+AARCH64_CORE("gb10",  gb10, cortexa57, V9_2A,  (SVE2_BITPERM, SVE2_AES, SVE2_SHA3, SVE2_SM4, MEMTAG, PROFILE), cortexx925, 0x41, AARCH64_BIG_LITTLE (0xd85, 0xd87), -1)
+
 /* Generic Architecture Processors.  */
 AARCH64_CORE("generic",  generic, cortexa53, V8A,  (), generic, 0x0, 0x0, -1)
 AARCH64_CORE("generic-armv8-a",  generic_armv8_a, cortexa53, V8A, (), generic_armv8_a, 0x0, 0x0, -1)
diff --git a/gcc/config/aarch64/aarch64-cost-tables.h b/gcc/config/aarch64/aarch64-cost-tables.h
index 7c79491..c49ff7f 100644
--- a/gcc/config/aarch64/aarch64-cost-tables.h
+++ b/gcc/config/aarch64/aarch64-cost-tables.h
@@ -1,6 +1,6 @@
 /* RTX cost tables for AArch64.
 
-   Copyright (C) 2014-2024 Free Software Foundation, Inc.
+   Copyright (C) 2014-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -346,7 +346,7 @@ const struct cpu_cost_table thunderx2t99_extra_costs =
   }
 };
 
-const struct cpu_cost_table thunderx3t110_extra_costs = 
+const struct cpu_cost_table thunderx3t110_extra_costs =
 {
   /* ALU */
   {
diff --git a/gcc/config/aarch64/aarch64-d.cc b/gcc/config/aarch64/aarch64-d.cc
index 6edeff4..a02007e 100644
--- a/gcc/config/aarch64/aarch64-d.cc
+++ b/gcc/config/aarch64/aarch64-d.cc
@@ -1,5 +1,5 @@
 /* Subroutines for the D front end on the AArch64 architecture.
-   Copyright (C) 2017-2024 Free Software Foundation, Inc.
+   Copyright (C) 2017-2025 Free Software Foundation, Inc.
 
 GCC is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
diff --git a/gcc/config/aarch64/aarch64-d.h b/gcc/config/aarch64/aarch64-d.h
index 039c816..24534e5 100644
--- a/gcc/config/aarch64/aarch64-d.h
+++ b/gcc/config/aarch64/aarch64-d.h
@@ -1,5 +1,5 @@
 /* Definitions for the D front end on the AArch64 architecture.
-   Copyright (C) 2022-2024 Free Software Foundation, Inc.
+   Copyright (C) 2022-2025 Free Software Foundation, Inc.
 
 GCC is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
diff --git a/gcc/config/aarch64/aarch64-early-ra.cc b/gcc/config/aarch64/aarch64-early-ra.cc
index 5f269d0..479fe56 100644
--- a/gcc/config/aarch64/aarch64-early-ra.cc
+++ b/gcc/config/aarch64/aarch64-early-ra.cc
@@ -1,5 +1,5 @@
 // Early register allocation pass.
-// Copyright (C) 2023-2024 Free Software Foundation, Inc.
+// Copyright (C) 2023-2025 Free Software Foundation, Inc.
 //
 // This file is part of GCC.
 //
@@ -159,6 +159,9 @@ private:
   // as a multi-register vector operand in some other non-move instruction.
   static constexpr unsigned int HAS_FLEXIBLE_STRIDE = 1U << 10;
   static constexpr unsigned int HAS_FIXED_STRIDE = 1U << 11;
+  //
+  // Whether we have decided not to allocate the pseudo register.
+  static constexpr unsigned int IGNORE_REG = 1U << 12;
 
   // Flags that should be propagated across moves between pseudo registers.
   static constexpr unsigned int PSEUDO_COPY_FLAGS = ~(HAS_FLEXIBLE_STRIDE
@@ -438,6 +441,11 @@ private:
 
   void start_new_region ();
 
+  template<typename T>
+  void record_live_range_failure (T);
+  template<typename T>
+  void record_allocation_failure (T);
+
   allocno_group_info *create_allocno_group (unsigned int, unsigned int);
   allocno_subgroup get_allocno_subgroup (rtx);
   void record_fpr_use (unsigned int);
@@ -449,7 +457,9 @@ private:
   void record_copy (rtx, rtx, bool = false);
   void record_constraints (rtx_insn *);
   void record_artificial_refs (unsigned int);
-  void record_insn_refs (rtx_insn *);
+  void record_insn_defs (rtx_insn *);
+  void record_insn_call (rtx_call_insn *);
+  void record_insn_uses (rtx_insn *);
 
   bool consider_strong_copy_src_chain (allocno_info *);
   int strided_polarity_pref (allocno_info *, allocno_info *);
@@ -492,6 +502,7 @@ private:
 
   void process_region ();
   bool is_dead_insn (rtx_insn *);
+  bool could_split_region_here ();
   void process_block (basic_block, bool);
   void process_blocks ();
 
@@ -548,6 +559,10 @@ private:
   // current function.
   unsigned int m_call_preserved_fprs;
 
+  // True if we have so far been able to track the live ranges of individual
+  // allocnos.
+  bool m_accurate_live_ranges;
+
   // True if we haven't yet failed to allocate the current region.
   bool m_allocation_successful;
 
@@ -1061,8 +1076,9 @@ is_stride_candidate (rtx_insn *insn)
     return false;
 
   auto stride_type = get_attr_stride_type (insn);
-  return (stride_type == STRIDE_TYPE_LD1_CONSECUTIVE
-	  || stride_type == STRIDE_TYPE_ST1_CONSECUTIVE);
+  return (TARGET_STREAMING_SME2
+	  && (stride_type == STRIDE_TYPE_LD1_CONSECUTIVE
+	      || stride_type == STRIDE_TYPE_ST1_CONSECUTIVE));
 }
 
 // Go through the constraints of INSN, which has already been extracted,
@@ -1214,7 +1230,9 @@ early_ra::fpr_preference (unsigned int regno)
 {
   auto mode = m_pseudo_regs[regno].mode;
   auto flags = m_pseudo_regs[regno].flags;
-  if (mode == VOIDmode || !targetm.hard_regno_mode_ok (V0_REGNUM, mode))
+  if (flags & IGNORE_REG)
+    return -4;
+  else if (mode == VOIDmode || !targetm.hard_regno_mode_ok (V0_REGNUM, mode))
     return -3;
   else if (flags & HAS_FLEXIBLE_STRIDE)
     return 3;
@@ -1310,10 +1328,48 @@ early_ra::start_new_region ()
   m_dead_insns.truncate (0);
   m_allocated_fprs = 0;
   m_call_preserved_fprs = 0;
+  m_accurate_live_ranges = true;
   m_allocation_successful = true;
   m_current_region += 1;
 }
 
+// Record that we can no longer track the liveness of individual allocnos.
+// Call DUMP to dump the reason to a dump file.
+template<typename T>
+void
+early_ra::record_live_range_failure (T dump)
+{
+  if (!m_accurate_live_ranges)
+    return;
+
+  m_accurate_live_ranges = false;
+  m_allocation_successful = false;
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    {
+      fprintf (dump_file, "Unable to track live ranges further: ");
+      dump ();
+      fprintf (dump_file, "\n");
+    }
+}
+
+// Record that the allocation of the current region has filed.  Call DUMP to
+// dump the reason to a dump file.
+template<typename T>
+void
+early_ra::record_allocation_failure (T dump)
+{
+  if (!m_allocation_successful)
+    return;
+
+  m_allocation_successful = false;
+  if (dump_file && (dump_flags & TDF_DETAILS))
+    {
+      fprintf (dump_file, "Not allocating region: ");
+      dump ();
+      fprintf (dump_file, "\n");
+    }
+}
+
 // Create and return an allocno group of size SIZE for register REGNO.
 // REGNO can be INVALID_REGNUM if the group just exists to allow
 // other groups to be chained together, and does not have any new
@@ -1383,19 +1439,36 @@ early_ra::get_allocno_subgroup (rtx reg)
       if (!inner)
 	return {};
 
-      if (!targetm.modes_tieable_p (GET_MODE (SUBREG_REG (reg)),
-				    GET_MODE (reg)))
+      if (!targetm.can_change_mode_class (GET_MODE (SUBREG_REG (reg)),
+					  GET_MODE (reg), FP_REGS))
 	{
-	  m_allocation_successful = false;
+	  record_live_range_failure ([&](){
+	    fprintf (dump_file, "cannot refer to r%d:%s in mode %s",
+		     REGNO (SUBREG_REG (reg)),
+		     GET_MODE_NAME (GET_MODE (SUBREG_REG (reg))),
+		     GET_MODE_NAME (GET_MODE (reg)));
+	  });
 	  return {};
 	}
 
+      if (!targetm.modes_tieable_p (GET_MODE (SUBREG_REG (reg)),
+				    GET_MODE (reg)))
+	record_allocation_failure ([&](){
+	    fprintf (dump_file, "r%d's mode %s is not tieable to mode %s",
+		     REGNO (SUBREG_REG (reg)),
+		     GET_MODE_NAME (GET_MODE (SUBREG_REG (reg))),
+		     GET_MODE_NAME (GET_MODE (reg)));
+	});
+
       subreg_info info;
       subreg_get_info (V0_REGNUM, GET_MODE (SUBREG_REG (reg)),
 		       SUBREG_BYTE (reg), GET_MODE (reg), &info);
       if (!info.representable_p)
 	{
-	  m_allocation_successful = false;
+	  record_live_range_failure ([&](){
+	    fprintf (dump_file, "subreg of r%d is invalid for V0",
+		     REGNO (SUBREG_REG (reg)));
+	  });
 	  return {};
 	}
 
@@ -1434,7 +1507,9 @@ early_ra::get_allocno_subgroup (rtx reg)
 	  || ((flags & ALLOWS_NONFPR)
 	      && !FLOAT_MODE_P (GET_MODE (reg))
 	      && !VECTOR_MODE_P (GET_MODE (reg))))
-	m_allocation_successful = false;
+	record_allocation_failure ([&](){
+	  fprintf (dump_file, "r%d has FPR and non-FPR references", regno);
+	});
 
       if (flags & ALLOWS_FPR8)
 	group->fpr_candidates &= 0xff;
@@ -1826,7 +1901,10 @@ early_ra::record_constraints (rtx_insn *insn)
 	  rtx op = recog_data.operand[opno];
 	  if (GET_CODE (op) == SCRATCH
 	      && reg_classes_intersect_p (op_alt[opno].cl, FP_REGS))
-	    m_allocation_successful = false;
+	    record_allocation_failure ([&](){
+	      fprintf (dump_file, "insn %d has FPR match_scratch",
+		       INSN_UID (insn));
+	    });
 
 	  // Record filter information, which applies to the first register
 	  // in the operand.
@@ -1853,22 +1931,40 @@ early_ra::record_constraints (rtx_insn *insn)
     {
       if (dump_file && (dump_flags & TDF_DETAILS))
 	fprintf (dump_file, "       -- no match\n");
-      m_allocation_successful = false;
+      record_allocation_failure ([&](){
+	fprintf (dump_file, "no matching constraints for insn %d",
+		 INSN_UID (insn));
+      });
     }
 
-  // Record if there is an output operand that is never earlyclobber and never
-  // matched to an input.  See the comment below for how this is used.
   rtx dest_op = NULL_RTX;
   for (int opno = 0; opno < recog_data.n_operands; ++opno)
     {
+      rtx op = recog_data.operand[opno];
       auto op_mask = operand_mask (1) << opno;
+      // Record if there is an output operand that is "independent" of the
+      // inputs, in the sense that it is never earlyclobber and is never
+      // matched to an input.  See the comment below for how this is used.
       if (recog_data.operand_type[opno] == OP_OUT
 	  && (earlyclobber_operands & op_mask) == 0
 	  && (matched_operands & op_mask) == 0)
 	{
-	  dest_op = recog_data.operand[opno];
+	  dest_op = op;
 	  break;
 	}
+      // We sometimes decide not to allocate pseudos even if they meet
+      // the normal FPR preference criteria; see the setters of IGNORE_REG
+      // for details.  However, the premise is that we should only ignore
+      // definitions of such pseudos if they are independent of the other
+      // operands in the instruction.
+      else if (recog_data.operand_type[opno] != OP_IN
+	       && REG_P (op)
+	       && !HARD_REGISTER_P (op)
+	       && (m_pseudo_regs[REGNO (op)].flags & IGNORE_REG) != 0)
+	record_allocation_failure ([&](){
+	  fprintf (dump_file, "ignored register r%d depends on input operands",
+		   REGNO (op));
+	});
     }
 
   for (int opno = 0; opno < recog_data.n_operands; ++opno)
@@ -1881,7 +1977,10 @@ early_ra::record_constraints (rtx_insn *insn)
       // register, since we don't have IRA's ability to find an alternative.
       // It's better if earlier passes don't create this kind of situation.
       if (REG_P (op) && FP_REGNUM_P (REGNO (op)))
-	m_allocation_successful = false;
+	record_allocation_failure ([&](){
+	  fprintf (dump_file, "operand %d of insn %d refers directly to %s",
+		   opno, INSN_UID (insn), reg_names[REGNO (op)]);
+	});
 
       // Treat input operands as being earlyclobbered if an output is
       // sometimes earlyclobber and if the input never matches an output.
@@ -1934,21 +2033,70 @@ early_ra::record_artificial_refs (unsigned int flags)
   m_current_point += 1;
 }
 
-// Model the register references in INSN as part of a backwards walk.
+// Return true if:
+//
+// - X is a SUBREG, in which case it is a SUBREG of some REG Y
+//
+// - one 64-bit word of Y can be modified while preserving all other words
+//
+// - X refers to no more than one 64-bit word of Y
+//
+// - assigning FPRs to Y would put more than one 64-bit word in each FPR
+//
+// For example, this is true of:
+//
+// - (subreg:DI (reg:TI R) 0) and
+// - (subreg:DI (reg:TI R) 8)
+//
+// but is not true of:
+//
+// - (subreg:V2SI (reg:V2x2SI R) 0) or
+// - (subreg:V2SI (reg:V2x2SI R) 8).
+static bool
+allocno_assignment_is_rmw (rtx x)
+{
+  if (partial_subreg_p (x))
+    {
+      auto outer_mode = GET_MODE (x);
+      auto inner_mode = GET_MODE (SUBREG_REG (x));
+      if (known_eq (REGMODE_NATURAL_SIZE (inner_mode), 0U + UNITS_PER_WORD)
+	  && known_lt (GET_MODE_SIZE (outer_mode), UNITS_PER_VREG))
+	{
+	  auto nregs = targetm.hard_regno_nregs (V0_REGNUM, inner_mode);
+	  if (maybe_ne (nregs * UNITS_PER_WORD, GET_MODE_SIZE (inner_mode)))
+	    return true;
+	}
+    }
+  return false;
+}
+
+// Called as part of a backwards walk over a block.  Model the definitions
+// in INSN, excluding partial call clobbers.
 void
-early_ra::record_insn_refs (rtx_insn *insn)
+early_ra::record_insn_defs (rtx_insn *insn)
 {
   df_ref ref;
 
-  // Record all definitions, excluding partial call clobbers.
   FOR_EACH_INSN_DEF (ref, insn)
     if (IN_RANGE (DF_REF_REGNO (ref), V0_REGNUM, V31_REGNUM))
       record_fpr_def (DF_REF_REGNO (ref));
     else
       {
-	auto range = get_allocno_subgroup (DF_REF_REG (ref));
+	rtx reg = DF_REF_REG (ref);
+	auto range = get_allocno_subgroup (reg);
 	for (auto &allocno : range.allocnos ())
 	  {
+	    // Make sure that assigning to the DF_REF_REG clobbers the
+	    // whole of this allocno, not just some of it.
+	    if (allocno_assignment_is_rmw (reg))
+	      {
+		record_live_range_failure ([&](){
+		  fprintf (dump_file, "read-modify-write of allocno %d",
+			   allocno.id);
+		});
+		break;
+	      }
+
 	    // If the destination is unused, record a momentary blip
 	    // in its live range.
 	    if (!bitmap_bit_p (m_live_allocnos, allocno.id))
@@ -1957,19 +2105,28 @@ early_ra::record_insn_refs (rtx_insn *insn)
 	  }
       }
   m_current_point += 1;
+}
 
-  // Model the call made by a call insn as a separate phase in the
-  // evaluation of the insn.  Any partial call clobbers happen at that
-  // point, rather than in the definition or use phase of the insn.
-  if (auto *call_insn = dyn_cast<rtx_call_insn *> (insn))
-    {
-      function_abi abi = insn_callee_abi (call_insn);
-      m_call_points[abi.id ()].safe_push (m_current_point);
-      m_current_point += 1;
-    }
+// Called as part of a backwards walk over a block.  Model the call made
+// by INSN as a separate phase in the evaluation of the insn.  Any partial
+// call clobbers happen at that point, rather than in the definition or use
+// phase of the insn.
+void
+early_ra::record_insn_call (rtx_call_insn *insn)
+{
+  function_abi abi = insn_callee_abi (insn);
+  m_call_points[abi.id ()].safe_push (m_current_point);
+  m_current_point += 1;
+}
+
+// Called as part of a backwards walk over a block.  Model the uses in INSN.
+// We can ignore READ_MODIFY_WRITE uses of plain subregs, since we track the
+// FPR-sized parts of them individually.
+void
+early_ra::record_insn_uses (rtx_insn *insn)
+{
+  df_ref ref;
 
-  // Record all uses.  We can ignore READ_MODIFY_WRITE uses of plain subregs,
-  // since we track the FPR-sized parts of them individually.
   FOR_EACH_INSN_USE (ref, insn)
     if (IN_RANGE (DF_REF_REGNO (ref), V0_REGNUM, V31_REGNUM))
       record_fpr_use (DF_REF_REGNO (ref));
@@ -2855,7 +3012,9 @@ early_ra::allocate_colors ()
 
       if (best == INVALID_REGNUM)
 	{
-	  m_allocation_successful = false;
+	  record_allocation_failure ([&](){
+	    fprintf (dump_file, "no free register for color %d", color->id);
+	  });
 	  return;
 	}
 
@@ -3212,9 +3371,9 @@ early_ra::maybe_convert_to_strided_access (rtx_insn *insn)
   auto stride_type = get_attr_stride_type (insn);
   rtx pat = PATTERN (insn);
   rtx op;
-  if (stride_type == STRIDE_TYPE_LD1_CONSECUTIVE)
+  if (TARGET_STREAMING_SME2 && stride_type == STRIDE_TYPE_LD1_CONSECUTIVE)
     op = SET_DEST (pat);
-  else if (stride_type == STRIDE_TYPE_ST1_CONSECUTIVE)
+  else if (TARGET_STREAMING_SME2 && stride_type == STRIDE_TYPE_ST1_CONSECUTIVE)
     op = XVECEXP (SET_SRC (pat), 0, 1);
   else
     return false;
@@ -3389,9 +3548,44 @@ early_ra::is_dead_insn (rtx_insn *insn)
   if (side_effects_p (set))
     return false;
 
+  /* If we can't delete dead exceptions and the insn throws,
+     then the instruction is not dead.  */
+  if (!cfun->can_delete_dead_exceptions
+      && !insn_nothrow_p (insn))
+    return false;
+
   return true;
 }
 
+// Return true if we could split a single-block region into two regions
+// at the current program point.  This is true if the block up to this
+// point is worth treating as an independent region and if no registers
+// that would affect the allocation are currently live.
+inline bool
+early_ra::could_split_region_here ()
+{
+  return (m_accurate_live_ranges
+	  && bitmap_empty_p (m_live_allocnos)
+	  && m_live_fprs == 0
+	  && !m_allocnos.is_empty ());
+}
+
+// Return true if any of the pseudos defined by INSN are also defined
+// elsewhere.
+static bool
+defines_multi_def_pseudo (rtx_insn *insn)
+{
+  df_ref ref;
+
+  FOR_EACH_INSN_DEF (ref, insn)
+    {
+      unsigned int regno = DF_REF_REGNO (ref);
+      if (!HARD_REGISTER_NUM_P (regno) && DF_REG_DEF_COUNT (regno) > 1)
+	return true;
+    }
+  return false;
+}
+
 // Build up information about block BB.  IS_ISOLATED is true if the
 // block is not part of a larger region.
 void
@@ -3462,7 +3656,54 @@ early_ra::process_block (basic_block bb, bool is_isolated)
 	}
       else
 	{
-	  record_insn_refs (insn);
+	  record_insn_defs (insn);
+
+	  // If we've decided not to allocate the current region, and if
+	  // all relevant registers are now dead, consider splitting the
+	  // block into two regions at this point.
+	  //
+	  // This is useful when a sequence of vector code ends in a
+	  // vector-to-scalar-integer operation.  We wouldn't normally
+	  // want to allocate the scalar integer, since IRA is much better
+	  // at handling cross-file moves.  But if nothing of relevance is
+	  // live between the death of the final vector and the birth of the
+	  // scalar integer, we can try to split the region at that point.
+	  // We'd then allocate the vector input and leave the real RA
+	  // to handle the scalar output.
+	  if (is_isolated
+	      && !m_allocation_successful
+	      && could_split_region_here ()
+	      && !defines_multi_def_pseudo (insn))
+	    {
+	      // Mark that we've deliberately chosen to leave the output
+	      // pseudos to the real RA.
+	      df_ref ref;
+	      FOR_EACH_INSN_DEF (ref, insn)
+		{
+		  unsigned int regno = DF_REF_REGNO (ref);
+		  if (!HARD_REGISTER_NUM_P (regno))
+		    {
+		      if (dump_file && (dump_flags & TDF_DETAILS))
+			fprintf (dump_file, "Ignoring register r%d\n", regno);
+		      m_pseudo_regs[regno].flags |= IGNORE_REG;
+		      bitmap_clear_bit (m_fpr_pseudos, regno);
+		    }
+		}
+
+	      if (dump_file && (dump_flags & TDF_DETAILS))
+		fprintf (dump_file, "\nStarting new region at insn %d\n",
+			 INSN_UID (insn));
+
+	      // Start a new region and replay the definitions.  The replay
+	      // is needed if the instruction sets a hard FP register.
+	      start_new_region ();
+	      record_insn_defs (insn);
+	      start_insn = insn;
+	    }
+
+	  if (auto *call_insn = dyn_cast<rtx_call_insn *> (insn))
+	    record_insn_call (call_insn);
+	  record_insn_uses (insn);
 	  rtx pat = PATTERN (insn);
 	  if (is_move_set (pat))
 	    record_copy (SET_DEST (pat), SET_SRC (pat), true);
@@ -3475,15 +3716,14 @@ early_ra::process_block (basic_block bb, bool is_isolated)
 
       // See whether we have a complete region, with no remaining live
       // allocnos.
-      if (is_isolated
-	  && bitmap_empty_p (m_live_allocnos)
-	  && m_live_fprs == 0
-	  && m_allocation_successful
-	  && !m_allocnos.is_empty ())
+      if (is_isolated && could_split_region_here ())
 	{
 	  rtx_insn *prev_insn = PREV_INSN (insn);
-	  m_insn_ranges.safe_push ({ start_insn, prev_insn });
-	  process_region ();
+	  if (m_allocation_successful)
+	    {
+	      m_insn_ranges.safe_push ({ start_insn, prev_insn });
+	      process_region ();
+	    }
 	  start_new_region ();
 	  is_first = true;
 	  start_insn = prev_insn;
diff --git a/gcc/config/aarch64/aarch64-elf-metadata.cc b/gcc/config/aarch64/aarch64-elf-metadata.cc
new file mode 100644
index 0000000..88fbb93
--- /dev/null
+++ b/gcc/config/aarch64/aarch64-elf-metadata.cc
@@ -0,0 +1,145 @@
+/* ELF metadata for AArch64 architecture.
+   Copyright (C) 2024-2025 Free Software Foundation, Inc.
+   Contributed by ARM Ltd.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#define INCLUDE_STRING
+#define INCLUDE_ALGORITHM
+#define INCLUDE_MEMORY
+#define INCLUDE_VECTOR
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "target.h"
+#include "rtl.h"
+#include "output.h"
+
+#include "aarch64-elf-metadata.h"
+
+/* Defined for convenience.  */
+#define POINTER_BYTES (POINTER_SIZE / BITS_PER_UNIT)
+
+namespace aarch64 {
+
+constexpr unsigned GNU_PROPERTY_AARCH64_FEATURE_1_AND = 0xc0000000;
+constexpr unsigned GNU_PROPERTY_AARCH64_FEATURE_1_BTI = (1U << 0);
+constexpr unsigned GNU_PROPERTY_AARCH64_FEATURE_1_PAC = (1U << 1);
+constexpr unsigned GNU_PROPERTY_AARCH64_FEATURE_1_GCS = (1U << 2);
+
+namespace {
+
+std::string
+gnu_property_features_to_string (unsigned feature_1_and)
+{
+  struct flag_name
+  {
+    unsigned int mask;
+    const char *name;
+  };
+
+  static const flag_name flags[] = {
+    {GNU_PROPERTY_AARCH64_FEATURE_1_BTI, "BTI"},
+    {GNU_PROPERTY_AARCH64_FEATURE_1_PAC, "PAC"},
+    {GNU_PROPERTY_AARCH64_FEATURE_1_GCS, "GCS"},
+  };
+
+  const char *separator = "";
+  std::string s_features;
+  for (auto &flag : flags)
+    if (feature_1_and & flag.mask)
+      {
+	s_features.append (separator).append (flag.name);
+	separator = ", ";
+      }
+  return s_features;
+};
+
+} // namespace anonymous
+
+section_note_gnu_property::section_note_gnu_property ()
+  : m_feature_1_and (0) {}
+
+void
+section_note_gnu_property::bti_enabled ()
+{
+  m_feature_1_and |= GNU_PROPERTY_AARCH64_FEATURE_1_BTI;
+}
+
+void
+section_note_gnu_property::pac_enabled ()
+{
+  m_feature_1_and |= GNU_PROPERTY_AARCH64_FEATURE_1_PAC;
+}
+
+void
+section_note_gnu_property::gcs_enabled ()
+{
+  m_feature_1_and |= GNU_PROPERTY_AARCH64_FEATURE_1_GCS;
+}
+
+void
+section_note_gnu_property::write () const
+{
+  if (m_feature_1_and)
+    {
+      /* Generate .note.gnu.property section.  */
+      switch_to_section (
+	get_section (".note.gnu.property", SECTION_NOTYPE, NULL));
+
+      /* PT_NOTE header: namesz, descsz, type.
+	 namesz = 4 ("GNU\0")
+	 descsz = 16 (Size of the program property array)
+		  [(12 + padding) * Number of array elements]
+	 type   = 5 (NT_GNU_PROPERTY_TYPE_0).  */
+      assemble_align (POINTER_SIZE);
+      assemble_integer (GEN_INT (4), 4, 32, 1);
+      assemble_integer (GEN_INT (ROUND_UP (12, POINTER_BYTES)), 4, 32, 1);
+      assemble_integer (GEN_INT (5), 4, 32, 1);
+
+      /* PT_NOTE name.  */
+      assemble_string ("GNU", 4);
+
+      /* PT_NOTE contents for NT_GNU_PROPERTY_TYPE_0:
+	 type   = GNU_PROPERTY_AARCH64_FEATURE_1_AND
+	 datasz = 4
+	 data   = feature_1_and.  */
+      fputs (integer_asm_op (4, true), asm_out_file);
+      fprint_whex (asm_out_file, GNU_PROPERTY_AARCH64_FEATURE_1_AND);
+      putc ('\n', asm_out_file);
+      assemble_integer (GEN_INT (4), 4, 32, 1);
+
+      fputs (integer_asm_op (4, true), asm_out_file);
+      fprint_whex (asm_out_file, m_feature_1_and);
+      if (flag_debug_asm)
+	{
+	  auto const &s_features
+	    = gnu_property_features_to_string (m_feature_1_and);
+	  asm_fprintf (asm_out_file,
+		       "\t%s GNU_PROPERTY_AARCH64_FEATURE_1_AND (%s)\n",
+		       ASM_COMMENT_START, s_features.c_str ());
+	}
+      else
+	putc ('\n', asm_out_file);
+
+      /* Pad the size of the note to the required alignment.  */
+      assemble_align (POINTER_SIZE);
+    }
+}
+
+} // namespace aarch64
diff --git a/gcc/config/aarch64/aarch64-elf-metadata.h b/gcc/config/aarch64/aarch64-elf-metadata.h
new file mode 100644
index 0000000..e99f6df
--- /dev/null
+++ b/gcc/config/aarch64/aarch64-elf-metadata.h
@@ -0,0 +1,253 @@
+/* ELF metadata for AArch64 architecture.
+   Copyright (C) 2024-2025 Free Software Foundation, Inc.
+   Contributed by ARM Ltd.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_AARCH64_ELF_METADATA_H
+#define GCC_AARCH64_ELF_METADATA_H
+
+#include "vec.h"
+
+namespace aarch64 {
+
+class section_note_gnu_property
+{
+ public:
+  section_note_gnu_property ();
+
+  /* Add BTI flag to GNU properties.  */
+  void bti_enabled ();
+  /* Add GCS flag to GNU properties.  */
+  void gcs_enabled ();
+  /* Add PAC flag to GNU properties.  */
+  void pac_enabled ();
+
+  /* Write the data to the assembly file.  */
+  void write () const;
+
+ private:
+  unsigned m_feature_1_and;
+};
+
+enum subsection_optionality : uint8_t
+{
+  required = 0x0,
+  optional = 0x1,
+};
+
+enum subsection_val_type : uint8_t
+{
+  uleb128 = 0x0,
+  ntbs = 0x1,
+};
+
+enum BA_TagFeature_t : uint8_t
+{
+  Tag_Feature_BTI = 0,
+  Tag_Feature_PAC = 1,
+  Tag_Feature_GCS = 2,
+};
+
+template <typename T_tag, typename T_val>
+struct aeabi_attribute
+{
+  T_tag tag;
+  T_val value;
+};
+
+template <typename T_tag, typename T_val>
+aeabi_attribute<T_tag, T_val>
+make_aeabi_attribute (T_tag tag, T_val val)
+{
+  return aeabi_attribute<T_tag, T_val>{tag, val};
+}
+
+namespace details {
+
+constexpr const char *
+to_c_str (bool b)
+{
+  return b ? "true" : "false";
+}
+
+constexpr const char *
+to_c_str (const char *s)
+{
+  return s;
+}
+
+constexpr const char *
+to_c_str (subsection_optionality v)
+{
+  return (v == optional ? "optional"
+	  : v == required ? "required"
+	  : nullptr);
+}
+
+constexpr const char *
+to_c_str (subsection_val_type v)
+{
+  return (v == uleb128 ? "ULEB128"
+	  : v == ntbs ? "NTBS"
+	  : nullptr);
+}
+
+constexpr const char *
+to_c_str (BA_TagFeature_t feature)
+{
+  return (feature == Tag_Feature_BTI ? "Tag_Feature_BTI"
+	  : feature == Tag_Feature_PAC ? "Tag_Feature_PAC"
+	  : feature == Tag_Feature_GCS ? "Tag_Feature_GCS"
+	  : nullptr);
+}
+
+template <
+  typename T,
+  typename = typename std::enable_if<std::is_unsigned<T>::value, T>::type
+>
+constexpr const char *
+aeabi_attr_str_fmt (T)
+{
+  return "\t.aeabi_attribute %s, %u";
+}
+
+constexpr const char *
+aeabi_attr_str_fmt (const char *)
+{
+  return "\t.aeabi_attribute %s, \"%s\"";
+}
+
+template <
+  typename T,
+  typename = typename std::enable_if<std::is_unsigned<T>::value, T>::type
+>
+constexpr uint8_t
+aeabi_attr_val_for_fmt (T value)
+{
+  return static_cast<uint8_t>(value);
+}
+
+constexpr const char *
+aeabi_attr_val_for_fmt (const char *s)
+{
+  return s;
+}
+
+template <typename T_tag, typename T_val>
+void
+write (FILE *out_file, aeabi_attribute<T_tag, T_val> const &attr)
+{
+  asm_fprintf (out_file, aeabi_attr_str_fmt (T_val{}),
+	       to_c_str (attr.tag), aeabi_attr_val_for_fmt (attr.value));
+  if (flag_debug_asm)
+    asm_fprintf (out_file, "\t%s %s: %s", ASM_COMMENT_START,
+		 to_c_str (attr.tag), to_c_str (attr.value));
+  asm_fprintf (out_file, "\n");
+}
+
+template <
+  typename T,
+  typename = typename std::enable_if<std::is_unsigned<T>::value, T>::type
+>
+constexpr subsection_val_type
+deduce_attr_av_type (T)
+{
+  return subsection_val_type::uleb128;
+}
+
+constexpr subsection_val_type
+deduce_attr_av_type (const char *)
+{
+  return subsection_val_type::ntbs;
+}
+
+} // namespace details
+
+/* AEABI subsections can be public or private.  A subsection is public if it is
+   prefixed with "aeabi", private otherwise.  The header of an AEABI subsection
+   is composed of a name (usually a vendor name), an optionality status (optional
+   or required), and the expected type of its associated attributes (ULEB128 or
+   NTBS).  Note: The attributes in the same subsection have all the same type.
+   An attribute is composed of a tag identifier (ULEB128), and its value (ULEB128
+   or NTBS).
+
+   Syntax:
+     .aeabi_subsection NameOfTheSubsection: string (=NTBS),
+		       Optional: boolean (=ULEB128),
+		       AttributeValueType: enum{ULEB128, NTBS} (=ULEB128)
+     [
+       .aeabi_attribute  TagIdentifier: ULEB128,
+			 TagValue: Variant[ULEB128|NTBS]
+     ]*
+
+   Example:
+     .aeabi_subsection .aeabi-feature-and-bits, optional, ULEB128
+     .aeabi_attribute Tag_Feature_GCS, 1 // Tag_Feature_GCS: true
+
+   Note: The textual representations of the tag and its value are emitted as a
+   comment along their numerical representations to annotate the assembler
+   output when the developer flag '-dA' is provided.  */
+template <
+  typename T_tag, /* The type of a tag.  */
+  typename T_val, /* The type of a value.  */
+  size_t N = 0    /* The number of expected attributes if we know it.  */
+>
+class aeabi_subsection
+{
+ public:
+  aeabi_subsection (const char *name, bool optional)
+    : m_name (name),
+      m_optionality (optional
+		     ? subsection_optionality::optional
+		     : subsection_optionality::required),
+      m_avtype (details::deduce_attr_av_type (T_val{}))
+  {}
+
+  /* Append an attribute to the subsection.  */
+  void append (aeabi_attribute<T_tag, T_val> &&attr)
+  {
+    m_attributes.quick_push (std::move (attr));
+  }
+
+  /* Write the data to the assembly file.  */
+  void write (FILE *out_file) const
+  {
+    asm_fprintf (out_file, "\n\t.aeabi_subsection %s, %s, %s\n",
+		 m_name, details::to_c_str (m_optionality),
+		 details::to_c_str (m_avtype));
+
+    for (auto const &attr : m_attributes)
+      details::write (out_file, attr);
+  }
+
+  /* Indicate if the subsection is empty.  */
+  bool empty () const
+  {
+    return m_attributes.is_empty ();
+  }
+
+ private:
+  const char *m_name;
+  subsection_optionality m_optionality;
+  subsection_val_type m_avtype;
+  auto_vec<aeabi_attribute<T_tag, T_val>, N> m_attributes;
+};
+
+} // namespace aarch64
+
+#endif /* GCC_AARCH64_ELF_METADATA_H */
diff --git a/gcc/config/aarch64/aarch64-elf-raw.h b/gcc/config/aarch64/aarch64-elf-raw.h
index 5396da9..15cf1eb 100644
--- a/gcc/config/aarch64/aarch64-elf-raw.h
+++ b/gcc/config/aarch64/aarch64-elf-raw.h
@@ -1,5 +1,5 @@
 /* Machine description for AArch64 architecture.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
@@ -38,4 +38,12 @@
   AARCH64_ERRATA_LINK_SPEC
 #endif
 
+#ifndef CC1_SPEC
+# define CC1_SPEC AARCH64_ERRATA_COMPILE_SPEC
+#endif
+
+#ifndef CC1PLUS_SPEC
+# define CC1PLUS_SPEC AARCH64_ERRATA_COMPILE_SPEC
+#endif
+
 #endif /* GCC_AARCH64_ELF_RAW_H */
diff --git a/gcc/config/aarch64/aarch64-elf.h b/gcc/config/aarch64/aarch64-elf.h
index b6fb7936..f6ebb72 100644
--- a/gcc/config/aarch64/aarch64-elf.h
+++ b/gcc/config/aarch64/aarch64-elf.h
@@ -1,5 +1,5 @@
 /* Machine description for AArch64 architecture.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
@@ -136,7 +136,6 @@
 #define ASM_SPEC "\
 %{mbig-endian:-EB} \
 %{mlittle-endian:-EL} \
-%{march=*:-march=%*} \
 %(asm_cpu_spec)" \
 ASM_MABI_SPEC
 #endif
diff --git a/gcc/config/aarch64/aarch64-errata.h b/gcc/config/aarch64/aarch64-errata.h
index c323595..f952554 100644
--- a/gcc/config/aarch64/aarch64-errata.h
+++ b/gcc/config/aarch64/aarch64-errata.h
@@ -1,5 +1,5 @@
 /* Machine description for AArch64 architecture.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
@@ -21,24 +21,61 @@
 #ifndef GCC_AARCH64_ERRATA_H
 #define GCC_AARCH64_ERRATA_H
 
+/* Completely ignore the option if we've explicitly specify something other than
+   mcpu=cortex-a53 or march=armv8-a.  */
+#define TARGET_SUPPRESS_OPT_SPEC(OPT) \
+    "mcpu=*:%{%:is_local_not_armv8_base(%{mcpu=*:%*}):; " OPT \
+    "}; march=*:%{%:is_local_not_armv8_base(%{march=*:%*}):;" OPT "}; " OPT
+
+/* Explicitly turn off the option if we've explicitly specify something other
+   than mcpu=cortex-a53 or march=armv8-a.  This will also erase any other usage
+   of the flag making the order of the options not relevant.  */
+# define TARGET_TURN_OFF_OPT_SPEC(FLAG)  \
+   "mcpu=*:%{%:is_local_not_armv8_base(%{mcpu=*:%*}):%<m" FLAG " -mno-" FLAG \
+   "}; march=*:%{%:is_local_not_armv8_base(%{march=*:%*}):%<m" FLAG " -mno-" FLAG "}"
+
+/* Cortex-A53 835769 Errata.  */
+
 #if TARGET_FIX_ERR_A53_835769_DEFAULT
-#define CA53_ERR_835769_SPEC \
-  " %{!mno-fix-cortex-a53-835769:--fix-cortex-a53-835769}"
+#define CA53_ERR_835769_SPEC     \
+  " %{" \
+  TARGET_SUPPRESS_OPT_SPEC ("!mno-fix-cortex-a53-835769:--fix-cortex-a53-835769") \
+  " }"
 #else
-#define CA53_ERR_835769_SPEC \
-  " %{mfix-cortex-a53-835769:--fix-cortex-a53-835769}"
+#define CA53_ERR_835769_SPEC     \
+  " %{" \
+  TARGET_SUPPRESS_OPT_SPEC ("mfix-cortex-a53-835769:--fix-cortex-a53-835769") \
+  " }"
 #endif
 
+#define CA53_ERR_835769_COMPILE_SPEC \
+  " %{" TARGET_TURN_OFF_OPT_SPEC ("fix-cortex-a53-835769") "}"
+
+/* Cortex-A53 843419 Errata.  */
+
 #if TARGET_FIX_ERR_A53_843419_DEFAULT
-#define CA53_ERR_843419_SPEC \
-  " %{!mno-fix-cortex-a53-843419:--fix-cortex-a53-843419}"
+#define CA53_ERR_843419_SPEC     \
+  " %{" \
+  TARGET_SUPPRESS_OPT_SPEC ("!mno-fix-cortex-a53-843419:--fix-cortex-a53-843419") \
+  " }"
 #else
-#define CA53_ERR_843419_SPEC \
-  " %{mfix-cortex-a53-843419:--fix-cortex-a53-843419}"
+#define CA53_ERR_843419_SPEC     \
+  " %{" \
+  TARGET_SUPPRESS_OPT_SPEC ("mfix-cortex-a53-843419:--fix-cortex-a53-843419") \
+  " }"
 #endif
 
+#define CA53_ERR_843419_COMPILE_SPEC \
+  " %{" TARGET_TURN_OFF_OPT_SPEC ("fix-cortex-a53-843419") "}"
+
+/* Exports to use in SPEC files.  */
+
 #define AARCH64_ERRATA_LINK_SPEC		\
   CA53_ERR_835769_SPEC				\
   CA53_ERR_843419_SPEC
 
+#define AARCH64_ERRATA_COMPILE_SPEC		\
+  CA53_ERR_835769_COMPILE_SPEC			\
+  CA53_ERR_843419_COMPILE_SPEC
+
 #endif /*  GCC_AARCH64_ERRATA_H */
diff --git a/gcc/config/aarch64/aarch64-feature-deps.h b/gcc/config/aarch64/aarch64-feature-deps.h
index a14ae22..55a0dbf 100644
--- a/gcc/config/aarch64/aarch64-feature-deps.h
+++ b/gcc/config/aarch64/aarch64-feature-deps.h
@@ -1,5 +1,5 @@
 /* Feature dependency helpers for AArch64.
-   Copyright (C) 2022-2024 Free Software Foundation, Inc.
+   Copyright (C) 2022-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -56,7 +56,8 @@ get_enable (T1 i, Ts... args)
 
    - explicit_on: the transitive closure of the features that an
      explicit +FEATURE enables, including FLAG itself.  This is
-     always a superset of ENABLE
+     always a superset of ENABLE, except that the CRYPTO alias bit is
+     explicitly unset for consistency.
 
    Also define a function FEATURE () that returns an info<FEATURE>
    (which is an empty structure, since all members are static).
@@ -69,7 +70,8 @@ template<aarch64_feature> struct info;
   template<> struct info<aarch64_feature::IDENT> {			\
     static constexpr auto flag = AARCH64_FL_##IDENT;			\
     static constexpr auto enable = flag | get_enable REQUIRES;		\
-    static constexpr auto explicit_on = enable | get_enable EXPLICIT_ON; \
+    static constexpr auto explicit_on                                   \
+      = (enable | get_enable EXPLICIT_ON) & ~AARCH64_FL_CRYPTO;         \
   };									\
   constexpr aarch64_feature_flags info<aarch64_feature::IDENT>::flag;	\
   constexpr aarch64_feature_flags info<aarch64_feature::IDENT>::enable;	\
@@ -114,9 +116,11 @@ get_flags_off (aarch64_feature_flags mask)
 #include "config/aarch64/aarch64-option-extensions.def"
 
 /* Define cpu_<NAME> variables for each CPU, giving the transitive
-   closure of all the features that the CPU supports.  */
+   closure of all the features that the CPU supports.  The CRYPTO bit is just
+   an alias, so explicitly unset it for consistency.  */
 #define AARCH64_CORE(A, CORE_IDENT, C, ARCH_IDENT, FEATURES, F, G, H, I) \
-  constexpr auto cpu_##CORE_IDENT = ARCH_IDENT ().enable | get_enable FEATURES;
+  constexpr auto cpu_##CORE_IDENT \
+    = (ARCH_IDENT ().enable | get_enable FEATURES) & ~AARCH64_FL_CRYPTO;
 #include "config/aarch64/aarch64-cores.def"
 
 /* Define fmv_deps_<NAME> variables for each FMV feature, giving the transitive
diff --git a/gcc/config/aarch64/aarch64-freebsd.h b/gcc/config/aarch64/aarch64-freebsd.h
index e26d69c..3a52489 100644
--- a/gcc/config/aarch64/aarch64-freebsd.h
+++ b/gcc/config/aarch64/aarch64-freebsd.h
@@ -1,5 +1,5 @@
 /* Definitions for AArch64 running FreeBSD
-   Copyright (C) 2016-2024 Free Software Foundation, Inc.
+   Copyright (C) 2016-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -47,6 +47,14 @@
     -X" SUBTARGET_EXTRA_LINK_SPEC "                             \
     %{mbig-endian:-EB} %{mlittle-endian:-EL}"
 
+#ifndef CC1_SPEC
+# define CC1_SPEC AARCH64_ERRATA_COMPILE_SPEC
+#endif
+
+#ifndef CC1PLUS_SPEC
+# define CC1PLUS_SPEC AARCH64_ERRATA_COMPILE_SPEC
+#endif
+
 #undef  LINK_SPEC
 #define LINK_SPEC FBSD_TARGET_LINK_SPEC AARCH64_ERRATA_LINK_SPEC
 
diff --git a/gcc/config/aarch64/aarch64-fusion-pairs.def b/gcc/config/aarch64/aarch64-fusion-pairs.def
index bf5e85b..fecfcb9 100644
--- a/gcc/config/aarch64/aarch64-fusion-pairs.def
+++ b/gcc/config/aarch64/aarch64-fusion-pairs.def
@@ -1,4 +1,4 @@
-/* Copyright (C) 2015-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2015-2025 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
@@ -41,3 +41,12 @@ AARCH64_FUSION_PAIR ("cmp+csel", CMP_CSEL)
 AARCH64_FUSION_PAIR ("cmp+cset", CMP_CSET)
 
 #undef AARCH64_FUSION_PAIR
+
+/* Baseline fusion settings suitable for all cores.  */
+#define AARCH64_FUSE_BASE (AARCH64_FUSE_CMP_BRANCH | AARCH64_FUSE_AES_AESMC)
+
+/* Baseline fusion settings suitable for all Neoverse cores.  */
+#define AARCH64_FUSE_NEOVERSE_BASE (AARCH64_FUSE_BASE | AARCH64_FUSE_CMP_CSEL \
+				    | AARCH64_FUSE_CMP_CSET)
+
+#define AARCH64_FUSE_MOVK (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_MOVK_MOVK)
diff --git a/gcc/config/aarch64/aarch64-gnu.h b/gcc/config/aarch64/aarch64-gnu.h
index ee54940..554da8d 100644
--- a/gcc/config/aarch64/aarch64-gnu.h
+++ b/gcc/config/aarch64/aarch64-gnu.h
@@ -1,5 +1,5 @@
 /* Definitions for AArch64 running GNU/Hurd.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -36,6 +36,13 @@
    %{mbig-endian:-EB} %{mlittle-endian:-EL}     \
    -maarch64gnu%{mabi=ilp32:32}%{mbig-endian:b}"
 
+#ifndef CC1_SPEC
+# define CC1_SPEC AARCH64_ERRATA_COMPILE_SPEC
+#endif
+
+#ifndef CC1PLUS_SPEC
+# define CC1PLUS_SPEC AARCH64_ERRATA_COMPILE_SPEC
+#endif
 
 #define LINK_SPEC GNU_TARGET_LINK_SPEC AARCH64_ERRATA_LINK_SPEC
 
diff --git a/gcc/config/aarch64/aarch64-isa-modes.def b/gcc/config/aarch64/aarch64-isa-modes.def
index 06dfeb5..3fc4eb0 100644
--- a/gcc/config/aarch64/aarch64-isa-modes.def
+++ b/gcc/config/aarch64/aarch64-isa-modes.def
@@ -1,4 +1,4 @@
-/* Copyright (C) 2023-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2023-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/aarch64/aarch64-ldp-fusion.cc b/gcc/config/aarch64/aarch64-ldp-fusion.cc
index b255dcb..ced14ce 100644
--- a/gcc/config/aarch64/aarch64-ldp-fusion.cc
+++ b/gcc/config/aarch64/aarch64-ldp-fusion.cc
@@ -1,5 +1,5 @@
 // LoadPair fusion optimization pass for AArch64.
-// Copyright (C) 2023-2024 Free Software Foundation, Inc.
+// Copyright (C) 2023-2025 Free Software Foundation, Inc.
 //
 // This file is part of GCC.
 //
diff --git a/gcc/config/aarch64/aarch64-ldpstp.md b/gcc/config/aarch64/aarch64-ldpstp.md
index 7890a8c..e748369 100644
--- a/gcc/config/aarch64/aarch64-ldpstp.md
+++ b/gcc/config/aarch64/aarch64-ldpstp.md
@@ -1,5 +1,5 @@
 ;; AArch64 ldp/stp peephole optimizations.
-;; Copyright (C) 2014-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2014-2025 Free Software Foundation, Inc.
 ;; Contributed by ARM Ltd.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/aarch64/aarch64-linux.h b/gcc/config/aarch64/aarch64-linux.h
index 8e51c82..116bb4e 100644
--- a/gcc/config/aarch64/aarch64-linux.h
+++ b/gcc/config/aarch64/aarch64-linux.h
@@ -1,5 +1,5 @@
 /* Machine description for AArch64 architecture.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
@@ -29,8 +29,12 @@
 #undef  ASAN_CC1_SPEC
 #define ASAN_CC1_SPEC "%{%:sanitize(address):-funwind-tables}"
 
-#undef  CC1_SPEC
-#define CC1_SPEC GNU_USER_TARGET_CC1_SPEC ASAN_CC1_SPEC
+#undef CC1_SPEC
+#define CC1_SPEC GNU_USER_TARGET_CC1_SPEC ASAN_CC1_SPEC \
+    AARCH64_ERRATA_COMPILE_SPEC
+
+#undef CC1PLUS_SPEC
+#define CC1PLUS_SPEC AARCH64_ERRATA_COMPILE_SPEC
 
 #define CPP_SPEC "%{pthread:-D_REENTRANT}"
 
diff --git a/gcc/config/aarch64/aarch64-modes.def b/gcc/config/aarch64/aarch64-modes.def
index 25a22c1..8c66ee8 100644
--- a/gcc/config/aarch64/aarch64-modes.def
+++ b/gcc/config/aarch64/aarch64-modes.def
@@ -1,5 +1,5 @@
 /* Machine description for AArch64 architecture.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
@@ -48,18 +48,21 @@ ADJUST_FLOAT_FORMAT (HF, &ieee_half_format);
 
 /* Vector modes.  */
 
+VECTOR_BOOL_MODE (VNx64BI, 64, BI, 8);
 VECTOR_BOOL_MODE (VNx32BI, 32, BI, 4);
 VECTOR_BOOL_MODE (VNx16BI, 16, BI, 2);
 VECTOR_BOOL_MODE (VNx8BI, 8, BI, 2);
 VECTOR_BOOL_MODE (VNx4BI, 4, BI, 2);
 VECTOR_BOOL_MODE (VNx2BI, 2, BI, 2);
 
+ADJUST_NUNITS (VNx64BI, aarch64_sve_vg * 32);
 ADJUST_NUNITS (VNx32BI, aarch64_sve_vg * 16);
 ADJUST_NUNITS (VNx16BI, aarch64_sve_vg * 8);
 ADJUST_NUNITS (VNx8BI, aarch64_sve_vg * 4);
 ADJUST_NUNITS (VNx4BI, aarch64_sve_vg * 2);
 ADJUST_NUNITS (VNx2BI, aarch64_sve_vg);
 
+ADJUST_ALIGNMENT (VNx64BI, 2);
 ADJUST_ALIGNMENT (VNx32BI, 2);
 ADJUST_ALIGNMENT (VNx16BI, 2);
 ADJUST_ALIGNMENT (VNx8BI, 2);
@@ -191,7 +194,7 @@ ADV_SIMD_Q_REG_STRUCT_MODES (4, V4x16, V4x8, V4x4, V4x2)
    stored in each 128-bit unit.  The actual size of the mode depends
    on command-line flags.
 
-   VNx1TI isn't really a native SVE mode, but it can be useful in some
+   VNx1* aren't really native SVE modes, but they can be useful in some
    limited situations.  */
 VECTOR_MODE_WITH_PREFIX (VNx, INT, TI, 1, 1);
 SVE_MODES (1, VNx16, VNx8, VNx4, VNx2, VNx1)
@@ -201,9 +204,10 @@ SVE_MODES (4, VNx64, VNx32, VNx16, VNx8, VNx4)
 
 /* Partial SVE vectors:
 
-      VNx2QI VNx4QI VNx8QI
-      VNx2HI VNx4HI
-      VNx2SI
+             VNx2QI VNx4QI VNx8QI
+             VNx2HI VNx4HI
+      VNx1SI VNx2SI
+      VNx1DI
 
    In memory they occupy contiguous locations, in the same way as fixed-length
    vectors.  E.g. VNx8QImode is half the size of VNx16QImode.
@@ -211,12 +215,17 @@ SVE_MODES (4, VNx64, VNx32, VNx16, VNx8, VNx4)
    Passing 2 as the final argument ensures that the modes come after all
    other single-vector modes in the GET_MODE_WIDER chain, so that we never
    pick them in preference to a full vector mode.  */
+VECTOR_MODE_WITH_PREFIX (VNx, INT, SI, 1, 2);
+VECTOR_MODE_WITH_PREFIX (VNx, INT, DI, 1, 2);
 VECTOR_MODES_WITH_PREFIX (VNx, INT, 2, 2);
 VECTOR_MODES_WITH_PREFIX (VNx, INT, 4, 2);
 VECTOR_MODES_WITH_PREFIX (VNx, INT, 8, 2);
 VECTOR_MODES_WITH_PREFIX (VNx, FLOAT, 4, 2);
 VECTOR_MODES_WITH_PREFIX (VNx, FLOAT, 8, 2);
 
+ADJUST_NUNITS (VNx1SI, exact_div (aarch64_sve_vg, 2));
+ADJUST_NUNITS (VNx1DI, exact_div (aarch64_sve_vg, 2));
+
 ADJUST_NUNITS (VNx2QI, aarch64_sve_vg);
 ADJUST_NUNITS (VNx2HI, aarch64_sve_vg);
 ADJUST_NUNITS (VNx2SI, aarch64_sve_vg);
@@ -242,9 +251,12 @@ ADJUST_ALIGNMENT (VNx2BF, 2);
 ADJUST_ALIGNMENT (VNx4HF, 2);
 ADJUST_ALIGNMENT (VNx4BF, 2);
 
+ADJUST_ALIGNMENT (VNx1SI, 4);
 ADJUST_ALIGNMENT (VNx2SI, 4);
 ADJUST_ALIGNMENT (VNx2SF, 4);
 
+ADJUST_ALIGNMENT (VNx1DI, 8);
+
 /* Quad float: 128-bit floating mode for long doubles.  */
 FLOAT_MODE (TF, 16, ieee_quad_format);
 
diff --git a/gcc/config/aarch64/aarch64-neon-sve-bridge-builtins.def b/gcc/config/aarch64/aarch64-neon-sve-bridge-builtins.def
index b827d19..a28d219 100644
--- a/gcc/config/aarch64/aarch64-neon-sve-bridge-builtins.def
+++ b/gcc/config/aarch64/aarch64-neon-sve-bridge-builtins.def
@@ -1,5 +1,5 @@
 /* Builtin lists for AArch64 NEON-SVE-Bridge
-   Copyright (C) 2023-2024 Free Software Foundation, Inc.
+   Copyright (C) 2023-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/aarch64/aarch64-netbsd.h b/gcc/config/aarch64/aarch64-netbsd.h
index fb8d10f..080b56e 100644
--- a/gcc/config/aarch64/aarch64-netbsd.h
+++ b/gcc/config/aarch64/aarch64-netbsd.h
@@ -1,5 +1,5 @@
 /* Definitions for AArch64 running NetBSD
-   Copyright (C) 2016-2024 Free Software Foundation, Inc.
+   Copyright (C) 2016-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -39,6 +39,15 @@
   "%{mlittle-endian:-EL -m " TARGET_LINKER_LITTLE_EMULATION "} "	\
   "%(netbsd_link_spec)"
 
+
+#ifndef CC1_SPEC
+# define CC1_SPEC AARCH64_ERRATA_COMPILE_SPEC
+#endif
+
+#ifndef CC1PLUS_SPEC
+# define CC1PLUS_SPEC AARCH64_ERRATA_COMPILE_SPEC
+#endif
+
 #undef  LINK_SPEC
 #define LINK_SPEC NETBSD_LINK_SPEC_ELF		\
 		  NETBSD_TARGET_LINK_SPEC	\
diff --git a/gcc/config/aarch64/aarch64-option-extensions.def b/gcc/config/aarch64/aarch64-option-extensions.def
index 6998627..1c3e697 100644
--- a/gcc/config/aarch64/aarch64-option-extensions.def
+++ b/gcc/config/aarch64/aarch64-option-extensions.def
@@ -1,4 +1,4 @@
-/* Copyright (C) 2012-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2012-2025 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
@@ -103,6 +103,8 @@ AARCH64_OPT_FMV_EXTENSION("rng", RNG, (), (), (), "rng")
 
 AARCH64_OPT_FMV_EXTENSION("flagm", FLAGM, (), (), (), "flagm")
 
+AARCH64_OPT_FMV_EXTENSION("flagm2", FLAGM2, (FLAGM), (), (), "flagm2")
+
 AARCH64_OPT_FMV_EXTENSION("lse", LSE, (), (), (), "atomics")
 
 AARCH64_OPT_FMV_EXTENSION("fp", FP, (), (), (), "fp")
@@ -151,9 +153,17 @@ AARCH64_OPT_EXTENSION("fp16fml", F16FML, (), (F16), (), "asimdfhm")
 
 AARCH64_FMV_FEATURE("fp16fml", FP16FML, (F16FML))
 
+AARCH64_OPT_FMV_EXTENSION("jscvt", JSCVT, (FP), (), (), "jscvt")
+
+AARCH64_OPT_FMV_EXTENSION("fcma", FCMA, (SIMD), (), (), "fcma")
+
 AARCH64_OPT_FMV_EXTENSION("rcpc", RCPC, (), (), (), "lrcpc")
 
-AARCH64_OPT_FMV_EXTENSION("rcpc3", RCPC3, (RCPC), (), (), "lrcpc3")
+AARCH64_OPT_FMV_EXTENSION("rcpc2", RCPC2, (RCPC), (), (), "ilrcpc")
+
+AARCH64_OPT_FMV_EXTENSION("rcpc3", RCPC3, (RCPC2), (), (), "lrcpc3")
+
+AARCH64_OPT_FMV_EXTENSION("frintts", FRINTTS, (FP), (), (), "frint")
 
 AARCH64_OPT_FMV_EXTENSION("i8mm", I8MM, (SIMD), (), (), "i8mm")
 
@@ -163,13 +173,16 @@ AARCH64_OPT_FMV_EXTENSION("bf16", BF16, (FP), (SIMD), (), "bf16")
 
 AARCH64_FMV_FEATURE("rpres", RPRES, ())
 
-AARCH64_OPT_FMV_EXTENSION("sve", SVE, (SIMD, F16), (), (), "sve")
+AARCH64_OPT_FMV_EXTENSION("sve", SVE, (SIMD, F16, FCMA), (), (), "sve")
+
+/* This specifically does not imply +sve.  */
+AARCH64_OPT_EXTENSION("sve-b16b16", SVE_B16B16, (), (), (), "sveb16b16")
 
-AARCH64_OPT_EXTENSION("f32mm", F32MM, (SVE), (), (), "f32mm")
+AARCH64_OPT_EXTENSION("f32mm", F32MM, (SVE), (), (), "svef32mm")
 
 AARCH64_FMV_FEATURE("f32mm", SVE_F32MM, (F32MM))
 
-AARCH64_OPT_EXTENSION("f64mm", F64MM, (SVE), (), (), "f64mm")
+AARCH64_OPT_EXTENSION("f64mm", F64MM, (SVE), (), (), "svef64mm")
 
 AARCH64_FMV_FEATURE("f64mm", SVE_F64MM, (F64MM))
 
@@ -192,13 +205,15 @@ AARCH64_OPT_EXTENSION("sve2-sm4", SVE2_SM4, (SVE2, SM4), (), (), "svesm4")
 
 AARCH64_FMV_FEATURE("sve2-sm4", SVE_SM4, (SVE2_SM4))
 
-AARCH64_OPT_FMV_EXTENSION("sme", SME, (BF16, SVE2), (), (), "sme")
+AARCH64_OPT_EXTENSION("sve2p1", SVE2p1, (SVE2), (), (), "sve2p1")
+
+AARCH64_OPT_FMV_EXTENSION("sme", SME, (BF16, FCMA, F16, F16FML), (), (), "sme")
 
 AARCH64_OPT_EXTENSION("memtag", MEMTAG, (), (), (), "")
 
 AARCH64_OPT_FMV_EXTENSION("sb", SB, (), (), (), "sb")
 
-AARCH64_OPT_FMV_EXTENSION("predres", PREDRES, (), (), (), "")
+AARCH64_OPT_EXTENSION("predres", PREDRES, (), (), (), "")
 
 AARCH64_OPT_EXTENSION("ssbs", SSBS, (), (), (), "ssbs")
 
@@ -210,20 +225,32 @@ AARCH64_OPT_EXTENSION("pauth", PAUTH, (), (), (), "paca pacg")
 
 AARCH64_OPT_EXTENSION("ls64", LS64, (), (), (), "")
 
-AARCH64_OPT_EXTENSION("sme-f64f64", SME_F64F64, (SME), (), (), "")
+AARCH64_OPT_FMV_EXTENSION("wfxt", WFXT, (), (), (), "wfxt")
+
+AARCH64_OPT_EXTENSION("xs", XS, (), (), (), "")
+
+AARCH64_OPT_EXTENSION("sme-f64f64", SME_F64F64, (SME), (), (), "smef64f64")
 
 AARCH64_FMV_FEATURE("sme-f64f64", SME_F64, (SME_F64F64))
 
-AARCH64_OPT_EXTENSION("sme-i16i64", SME_I16I64, (SME), (), (), "")
+AARCH64_OPT_EXTENSION("sme-i16i64", SME_I16I64, (SME), (), (), "smei16i64")
 
 AARCH64_FMV_FEATURE("sme-i16i64", SME_I64, (SME_I16I64))
 
 AARCH64_OPT_FMV_EXTENSION("sme2", SME2, (SME), (), (), "sme2")
 
-AARCH64_OPT_EXTENSION("mops", MOPS, (), (), (), "")
+AARCH64_OPT_EXTENSION("sme2p1", SME2p1, (SME2), (), (), "sme2p1")
+
+AARCH64_OPT_EXTENSION("sme-b16b16", SME_B16B16, (SME2, SVE_B16B16), (), (), "smeb16b16")
+
+AARCH64_OPT_EXTENSION("sme-f16f16", SME_F16F16, (SME2), (), (), "smef16f16")
+
+AARCH64_OPT_EXTENSION("mops", MOPS, (), (), (), "mops")
 
 AARCH64_OPT_EXTENSION("cssc", CSSC, (), (), (), "cssc")
 
+AARCH64_OPT_EXTENSION("cmpbr", CMPBR, (), (), (), "cmpbr")
+
 AARCH64_OPT_EXTENSION("lse128", LSE128, (LSE), (), (), "lse128")
 
 AARCH64_OPT_EXTENSION("d128", D128, (LSE128), (), (), "d128")
@@ -232,7 +259,25 @@ AARCH64_OPT_EXTENSION("the", THE, (), (), (), "the")
 
 AARCH64_OPT_EXTENSION("gcs", GCS, (), (), (), "gcs")
 
-AARCH64_OPT_EXTENSION("fp8", FP8, (SIMD), (), (), "fp8")
+AARCH64_OPT_EXTENSION("fp8", FP8, (SIMD), (), (), "f8cvt")
+
+AARCH64_OPT_EXTENSION("fp8fma", FP8FMA, (FP8), (), (), "f8fma")
+
+AARCH64_OPT_EXTENSION("ssve-fp8fma", SSVE_FP8FMA, (SME2, FP8), (), (), "smesf8fma")
+
+AARCH64_OPT_EXTENSION("faminmax", FAMINMAX, (SIMD), (), (), "faminmax")
+
+AARCH64_OPT_EXTENSION("fp8dot4", FP8DOT4, (FP8), (), (), "f8dp4")
+
+AARCH64_OPT_EXTENSION("ssve-fp8dot4", SSVE_FP8DOT4, (SME2, FP8), (), (), "smesf8dp4")
+
+AARCH64_OPT_EXTENSION("fp8dot2", FP8DOT2, (FP8), (), (), "f8dp2")
+
+AARCH64_OPT_EXTENSION("ssve-fp8dot2", SSVE_FP8DOT2, (SME2, FP8), (), (), "smesf8dp2")
+
+AARCH64_OPT_EXTENSION("lut", LUT, (SIMD), (), (), "lut")
+
+AARCH64_OPT_EXTENSION("cpa", CPA, (), (), (), "")
 
 #undef AARCH64_OPT_FMV_EXTENSION
 #undef AARCH64_OPT_EXTENSION
diff --git a/gcc/config/aarch64/aarch64-opts.h b/gcc/config/aarch64/aarch64-opts.h
index 9fb62e5..a6ca5cf 100644
--- a/gcc/config/aarch64/aarch64-opts.h
+++ b/gcc/config/aarch64/aarch64-opts.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2011-2025 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
@@ -25,6 +25,8 @@
 #ifndef USED_FOR_TARGET
 #include "bbitmap.h"
 
+constexpr unsigned int AARCH64_NUM_ABI_ATTRIBUTES = 1;
+
 typedef uint64_t aarch64_isa_mode;
 
 constexpr unsigned int AARCH64_NUM_ISA_MODES = (0
@@ -36,13 +38,13 @@ typedef bbitmap<2> aarch64_feature_flags;
 #endif
 
 /* The various cores that implement AArch64.  */
-enum aarch64_processor
+enum aarch64_cpu
 {
 #define AARCH64_CORE(NAME, INTERNAL_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \
-  INTERNAL_IDENT,
+  AARCH64_CPU_##INTERNAL_IDENT,
 #include "aarch64-cores.def"
   /* Used to mark the end of the processor table.  */
-  aarch64_none
+  aarch64_no_cpu
 };
 
 enum aarch64_arch
diff --git a/gcc/config/aarch64/aarch64-passes.def b/gcc/config/aarch64/aarch64-passes.def
index e20e6f4..9cf9d3e 100644
--- a/gcc/config/aarch64/aarch64-passes.def
+++ b/gcc/config/aarch64/aarch64-passes.def
@@ -1,5 +1,5 @@
 /* AArch64-specific passes declarations.
-   Copyright (C) 2016-2024 Free Software Foundation, Inc.
+   Copyright (C) 2016-2025 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
@@ -23,7 +23,6 @@ INSERT_PASS_AFTER (pass_regrename, 1, pass_fma_steering);
 INSERT_PASS_BEFORE (pass_reorder_blocks, 1, pass_track_speculation);
 INSERT_PASS_BEFORE (pass_late_thread_prologue_and_epilogue, 1, pass_switch_pstate_sm);
 INSERT_PASS_BEFORE (pass_late_thread_prologue_and_epilogue, 1, pass_late_track_speculation);
-INSERT_PASS_AFTER (pass_machine_reorg, 1, pass_tag_collision_avoidance);
 INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_insert_bti);
 INSERT_PASS_AFTER (pass_if_after_combine, 1, pass_cc_fusion);
 INSERT_PASS_BEFORE (pass_early_remat, 1, pass_ldp_fusion);
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index d03c1fe..e946e8d 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -1,5 +1,5 @@
 /* Machine description for AArch64 architecture.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
@@ -290,8 +290,8 @@ struct sve_vec_cost : simd_vec_cost
 
   /* The cost of a gather load instruction.  The x32 value is for loads
      of 32-bit elements and the x64 value is for loads of 64-bit elements.  */
-  const int gather_load_x32_cost;
-  const int gather_load_x64_cost;
+  const unsigned int gather_load_x32_cost;
+  const unsigned int gather_load_x64_cost;
 
   /* Additional loop initialization cost of using a gather load instruction.  The x32
      value is for loads of 32-bit elements and the x64 value is for loads of
@@ -627,7 +627,7 @@ struct aarch64_address_info {
 };
 
 #define AARCH64_FUSION_PAIR(x, name) \
-  AARCH64_FUSE_##name##_index, 
+  AARCH64_FUSE_##name##_index,
 /* Supported fusion operations.  */
 enum aarch64_fusion_pairs_index
 {
@@ -665,16 +665,6 @@ enum aarch64_extra_tuning_flags
   AARCH64_EXTRA_TUNE_ALL = (1u << AARCH64_EXTRA_TUNE_index_END) - 1
 };
 
-/* Enum to distinguish which type of check is to be done in
-   aarch64_simd_valid_immediate.  This is used as a bitmask where
-   AARCH64_CHECK_MOV has both bits set.  Thus AARCH64_CHECK_MOV will
-   perform all checks.  Adding new types would require changes accordingly.  */
-enum simd_immediate_check {
-  AARCH64_CHECK_ORR  = 1 << 0,
-  AARCH64_CHECK_BIC  = 1 << 1,
-  AARCH64_CHECK_MOV  = AARCH64_CHECK_ORR | AARCH64_CHECK_BIC
-};
-
 extern struct tune_params aarch64_tune_params;
 
 /* The available SVE predicate patterns, known in the ACLE as "svpattern".  */
@@ -743,15 +733,115 @@ const unsigned int AARCH64_BUILTIN_CLASS = (1 << AARCH64_BUILTIN_SHIFT) - 1;
 
 /* RAII class for enabling enough features to define built-in types
    and implement the arm_neon.h pragma.  */
-class aarch64_simd_switcher
+class aarch64_target_switcher
 {
 public:
-  aarch64_simd_switcher (aarch64_feature_flags extra_flags = 0);
-  ~aarch64_simd_switcher ();
+  aarch64_target_switcher (aarch64_feature_flags flags = 0);
+  ~aarch64_target_switcher ();
 
 private:
   aarch64_feature_flags m_old_asm_isa_flags;
   bool m_old_general_regs_only;
+  tree m_old_target_pragma;
+  bool m_old_have_regs_of_mode[MAX_MACHINE_MODE];
+};
+
+/* Represents the ISA requirements of an intrinsic function, or of some
+   other similar operation.  It stores separate feature flags for
+   non-streaming mode and for streaming-mode; both requirements must
+   be met in streaming-compatible mode.  */
+struct aarch64_required_extensions
+{
+  /* Return a requirement that includes FLAGS on top of any existing
+     requirements.  */
+  inline CONSTEXPR aarch64_required_extensions
+  and_also (aarch64_feature_flags flags)
+  {
+    return { sm_off ? sm_off | flags : 0,
+	     sm_on ? sm_on | flags : 0 };
+  }
+
+  /* Return a requirement that is as restrictive as possible while still being
+     no more restrictive than THIS and no more restrictive than OTHER.  */
+  inline CONSTEXPR aarch64_required_extensions
+  common_denominator (const aarch64_required_extensions &other)
+  {
+    return { sm_off && other.sm_off
+	     ? sm_off & other.sm_off
+	     : sm_off | other.sm_off,
+	     sm_on && other.sm_on
+	     ? sm_on & other.sm_on
+	     : sm_on | other.sm_on };
+  }
+
+  /* Require non-streaming mode and the features in FLAGS.  */
+  static inline CONSTEXPR aarch64_required_extensions
+  nonstreaming_only (aarch64_feature_flags flags)
+  {
+    return { AARCH64_FL_SM_OFF | flags, 0 };
+  }
+
+  /* Likewise, and also require SVE.  */
+  static inline CONSTEXPR aarch64_required_extensions
+  nonstreaming_sve (aarch64_feature_flags flags)
+  {
+    return nonstreaming_only (AARCH64_FL_SVE | flags);
+  }
+
+  /* Allow both streaming and non-streaming mode, requiring the features
+     in FLAGS for both cases.  */
+  static inline CONSTEXPR aarch64_required_extensions
+  streaming_compatible (aarch64_feature_flags flags)
+  {
+    return { AARCH64_FL_SM_OFF | flags, AARCH64_FL_SM_ON | flags };
+  }
+
+  /* Likewise, and also require SVE for non-streaming mode.  */
+  static inline CONSTEXPR aarch64_required_extensions
+  ssve (aarch64_feature_flags flags)
+  {
+    return streaming_compatible (AARCH64_FL_SVE | flags, flags);
+  }
+
+  /* Allow both streaming and non-streaming mode, requiring the features
+     in SM_OFF for non-streaming mode and the features in SM_ON for
+     streaming mode.  */
+  static inline CONSTEXPR aarch64_required_extensions
+  streaming_compatible (aarch64_feature_flags sm_off,
+			aarch64_feature_flags sm_on)
+  {
+    return { AARCH64_FL_SM_OFF | sm_off, AARCH64_FL_SM_ON | sm_on };
+  }
+
+  /* Likewise, and also require SVE for non-streaming mode.  */
+  static inline CONSTEXPR aarch64_required_extensions
+  sve_and_sme (aarch64_feature_flags sm_off, aarch64_feature_flags sm_on)
+  {
+    return streaming_compatible (AARCH64_FL_SVE | sm_off, sm_on);
+  }
+
+  /* Require streaming mode and the features in FLAGS.  */
+  static inline CONSTEXPR aarch64_required_extensions
+  streaming_only (aarch64_feature_flags flags)
+  {
+    return { 0, AARCH64_FL_SM_ON | flags };
+  }
+
+  /* The ISA requirements in non-streaming mode, or 0 if the operation
+     is only allowed in streaming mode.  When this field is nonzero,
+     it always includes AARCH64_FL_SM_OFF.  */
+  aarch64_feature_flags sm_off;
+
+  /* The ISA requirements in streaming mode, or 0 if the operation is only
+     allowed in non-streaming mode.  When this field is nonzero,
+     it always includes AARCH64_FL_SM_ON.
+
+     This field should not normally include AARCH64_FL_SME, since we
+     would not be in streaming mode if SME wasn't supported.  Excluding
+     AARCH64_FL_SME makes it easier to handle streaming-compatible rules
+     since (for example) svadd_x should be available in streaming-compatible
+     functions even without +sme.  */
+  aarch64_feature_flags sm_on;
 };
 
 void aarch64_post_cfi_startproc (void);
@@ -765,9 +855,11 @@ bool aarch64_and_bitmask_imm (unsigned HOST_WIDE_INT val_in, machine_mode mode);
 int aarch64_branch_cost (bool, bool);
 enum aarch64_symbol_type aarch64_classify_symbolic_expression (rtx);
 bool aarch64_advsimd_struct_mode_p (machine_mode mode);
-opt_machine_mode aarch64_vq_mode (scalar_mode);
+opt_machine_mode aarch64_v64_mode (scalar_mode);
+opt_machine_mode aarch64_v128_mode (scalar_mode);
 opt_machine_mode aarch64_full_sve_mode (scalar_mode);
 bool aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode);
+bool aarch64_valid_fp_move (rtx, rtx, machine_mode);
 bool aarch64_const_vec_all_same_int_p (rtx, HOST_WIDE_INT);
 bool aarch64_const_vec_all_same_in_range_p (rtx, HOST_WIDE_INT,
 					    HOST_WIDE_INT);
@@ -776,8 +868,9 @@ bool aarch64_rnd_imm_p (rtx);
 bool aarch64_constant_address_p (rtx);
 bool aarch64_emit_approx_div (rtx, rtx, rtx);
 bool aarch64_emit_approx_sqrt (rtx, rtx, bool);
+bool aarch64_emit_opt_vec_rotate (rtx, rtx, rtx);
 tree aarch64_vector_load_decl (tree);
-rtx aarch64_gen_callee_cookie (aarch64_isa_mode, arm_pcs);
+rtx aarch64_gen_callee_cookie (aarch64_isa_mode, arm_pcs, bool);
 void aarch64_expand_call (rtx, rtx, rtx, bool);
 bool aarch64_expand_cpymem_mops (rtx *, bool);
 bool aarch64_expand_cpymem (rtx *, bool);
@@ -806,6 +899,8 @@ bool aarch64_move_imm (unsigned HOST_WIDE_INT, machine_mode);
 machine_mode aarch64_sve_int_mode (machine_mode);
 opt_machine_mode aarch64_sve_pred_mode (unsigned int);
 machine_mode aarch64_sve_pred_mode (machine_mode);
+opt_machine_mode aarch64_advsimd_vector_array_mode (machine_mode,
+						    unsigned HOST_WIDE_INT);
 opt_machine_mode aarch64_sve_data_mode (scalar_mode, poly_uint64);
 bool aarch64_sve_mode_p (machine_mode);
 HOST_WIDE_INT aarch64_fold_sve_cnt_pat (aarch64_svpattern, unsigned int);
@@ -834,8 +929,12 @@ char *aarch64_output_sve_rdvl (rtx);
 char *aarch64_output_sve_addvl_addpl (rtx);
 char *aarch64_output_sve_vector_inc_dec (const char *, rtx);
 char *aarch64_output_scalar_simd_mov_immediate (rtx, scalar_int_mode);
-char *aarch64_output_simd_mov_immediate (rtx, unsigned,
-			enum simd_immediate_check w = AARCH64_CHECK_MOV);
+char *aarch64_output_simd_mov_imm (rtx, unsigned);
+char *aarch64_output_simd_orr_imm (rtx, unsigned);
+char *aarch64_output_simd_and_imm (rtx, unsigned);
+char *aarch64_output_simd_xor_imm (rtx, unsigned);
+char *aarch64_output_fmov (rtx);
+
 char *aarch64_output_sve_mov_immediate (rtx);
 char *aarch64_output_sve_ptrues (rtx);
 bool aarch64_pad_reg_upward (machine_mode, const_tree, bool);
@@ -848,9 +947,13 @@ bool aarch64_parallel_select_half_p (machine_mode, rtx);
 bool aarch64_pars_overlap_p (rtx, rtx);
 bool aarch64_simd_scalar_immediate_valid_for_move (rtx, scalar_int_mode);
 bool aarch64_simd_shift_imm_p (rtx, machine_mode, bool);
+bool aarch64_sve_valid_pred_p (rtx, machine_mode);
 bool aarch64_sve_ptrue_svpattern_p (rtx, struct simd_immediate_info *);
-bool aarch64_simd_valid_immediate (rtx, struct simd_immediate_info *,
-			enum simd_immediate_check w = AARCH64_CHECK_MOV);
+bool aarch64_simd_valid_and_imm (rtx);
+bool aarch64_simd_valid_and_imm_fmov (rtx, unsigned int * = NULL);
+bool aarch64_simd_valid_mov_imm (rtx);
+bool aarch64_simd_valid_orr_imm (rtx);
+bool aarch64_simd_valid_xor_imm (rtx);
 bool aarch64_valid_sysreg_name_p (const char *);
 const char *aarch64_retrieve_sysreg (const char *, bool, bool);
 rtx aarch64_check_zero_based_sve_index_immediate (rtx);
@@ -922,15 +1025,23 @@ rtx aarch64_expand_sve_dupq (rtx, machine_mode, rtx);
 void aarch64_expand_mov_immediate (rtx, rtx);
 rtx aarch64_stack_protect_canary_mem (machine_mode, rtx, aarch64_salt_type);
 rtx aarch64_ptrue_reg (machine_mode);
+rtx aarch64_ptrue_reg (machine_mode, unsigned int);
+rtx aarch64_ptrue_reg (machine_mode, machine_mode);
 rtx aarch64_pfalse_reg (machine_mode);
 bool aarch64_sve_same_pred_for_ptest_p (rtx *, rtx *);
+rtx aarch64_sve_packed_pred (machine_mode);
+rtx aarch64_sve_fp_pred (machine_mode, rtx *);
+void aarch64_emit_load_store_through_mode (rtx, rtx, machine_mode);
+bool aarch64_expand_maskloadstore (rtx *, machine_mode);
 void aarch64_emit_sve_pred_move (rtx, rtx, rtx);
 void aarch64_expand_sve_mem_move (rtx, rtx, machine_mode);
 bool aarch64_maybe_expand_sve_subreg_move (rtx, rtx);
 rtx aarch64_replace_reg_mode (rtx, machine_mode);
 void aarch64_split_sve_subreg_move (rtx, rtx, rtx);
 void aarch64_expand_prologue (void);
+void aarch64_decompose_vec_struct_index (machine_mode, rtx *, rtx *, bool);
 void aarch64_expand_vector_init (rtx, rtx);
+void aarch64_sve_expand_vector_init_subvector (rtx, rtx);
 void aarch64_sve_expand_vector_init (rtx, rtx);
 void aarch64_init_cumulative_args (CUMULATIVE_ARGS *, const_tree, rtx,
 				   const_tree, unsigned, bool = false);
@@ -950,6 +1061,7 @@ void aarch64_subvti_scratch_regs (rtx, rtx, rtx *,
 				  rtx *, rtx *, rtx *);
 void aarch64_expand_subvti (rtx, rtx, rtx,
 			    rtx, rtx, rtx, rtx, bool);
+int aarch64_exact_log2_inverse (unsigned int, rtx);
 
 
 /* Initialize builtins for SIMD intrinsics.  */
@@ -963,7 +1075,7 @@ rtx aarch64_simd_expand_builtin (int, tree, rtx);
 void aarch64_simd_lane_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT, const_tree);
 rtx aarch64_endian_lane_rtx (machine_mode, unsigned int);
 
-void aarch64_split_double_move (rtx, rtx, machine_mode);
+void aarch64_split_move (rtx, rtx, machine_mode);
 void aarch64_split_128bit_move (rtx, rtx);
 
 bool aarch64_split_128bit_move_p (rtx, rtx);
@@ -997,8 +1109,7 @@ void aarch64_finish_ldpstp_peephole (rtx *, bool,
 				     enum rtx_code = (enum rtx_code)0);
 
 void aarch64_expand_sve_vec_cmp_int (rtx, rtx_code, rtx, rtx);
-bool aarch64_expand_sve_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool);
-void aarch64_expand_sve_vcond (machine_mode, machine_mode, rtx *);
+void aarch64_expand_sve_vec_cmp_float (rtx, rtx_code, rtx, rtx);
 
 bool aarch64_prepare_sve_int_fma (rtx *, rtx_code);
 bool aarch64_prepare_sve_cond_int_fma (rtx *, rtx_code);
@@ -1019,11 +1130,25 @@ void handle_arm_acle_h (void);
 void handle_arm_neon_h (void);
 
 bool aarch64_check_required_extensions (location_t, tree,
-					aarch64_feature_flags);
+					aarch64_required_extensions);
 bool aarch64_general_check_builtin_call (location_t, vec<location_t>,
 					 unsigned int, tree, unsigned int,
 					 tree *);
 
+bool aarch64_cb_rhs (rtx_code op_code, rtx rhs);
+
+namespace aarch64 {
+  void report_non_ice (location_t, tree, unsigned int);
+  void report_out_of_range (location_t, tree, unsigned int, HOST_WIDE_INT,
+			    HOST_WIDE_INT, HOST_WIDE_INT);
+  void report_neither_nor (location_t, tree, unsigned int, HOST_WIDE_INT,
+			   HOST_WIDE_INT, HOST_WIDE_INT);
+  void report_not_one_of (location_t, tree, unsigned int, HOST_WIDE_INT,
+			  HOST_WIDE_INT, HOST_WIDE_INT, HOST_WIDE_INT,
+			  HOST_WIDE_INT);
+  void report_not_enum (location_t, tree, unsigned int, HOST_WIDE_INT, tree);
+}
+
 namespace aarch64_sve {
   void init_builtins ();
   void handle_arm_sve_h (bool);
@@ -1043,6 +1168,8 @@ namespace aarch64_sve {
 #ifdef GCC_TARGET_H
   bool verify_type_context (location_t, type_context_kind, const_tree, bool);
 #endif
+ void add_sve_type_attribute (tree, unsigned int, unsigned int,
+			      const char *, const char *);
 }
 
 extern void aarch64_split_combinev16qi (rtx operands[3]);
@@ -1076,19 +1203,36 @@ void aarch64_set_asm_isa_flags (aarch64_feature_flags);
 void aarch64_set_asm_isa_flags (gcc_options *, aarch64_feature_flags);
 bool aarch64_handle_option (struct gcc_options *, struct gcc_options *,
 			     const struct cl_decoded_option *, location_t);
-const char *aarch64_rewrite_selected_cpu (const char *name);
+aarch64_feature_flags aarch64_get_required_features (aarch64_feature_flags);
+void aarch64_print_hint_for_extensions (const char *);
+void aarch64_print_hint_for_arch (const char *);
+void aarch64_print_hint_for_core (const char *);
 enum aarch_parse_opt_result aarch64_parse_extension (const char *,
                                                      aarch64_feature_flags *,
                                                      std::string *);
-void aarch64_get_all_extension_candidates (auto_vec<const char *> *candidates);
+enum aarch_parse_opt_result aarch64_parse_arch (const char *,
+						aarch64_arch *,
+						aarch64_feature_flags *,
+						std::string *);
+enum aarch_parse_opt_result aarch64_parse_cpu (const char *,
+					       aarch64_cpu *,
+					       aarch64_feature_flags *,
+					       std::string *);
+enum aarch_parse_opt_result aarch64_parse_tune (const char *, aarch64_cpu *);
+bool aarch64_validate_march (const char *, aarch64_arch *,
+			     aarch64_feature_flags *);
+bool aarch64_validate_mcpu (const char *, aarch64_cpu *,
+			    aarch64_feature_flags *);
+bool aarch64_validate_mtune (const char *, aarch64_cpu *);
 std::string aarch64_get_extension_string_for_isa_flags (aarch64_feature_flags,
 							aarch64_feature_flags);
+std::string aarch64_get_arch_string_for_assembler (aarch64_arch,
+						   aarch64_feature_flags);
 
 rtl_opt_pass *make_pass_aarch64_early_ra (gcc::context *);
 rtl_opt_pass *make_pass_fma_steering (gcc::context *);
 rtl_opt_pass *make_pass_track_speculation (gcc::context *);
 rtl_opt_pass *make_pass_late_track_speculation (gcc::context *);
-rtl_opt_pass *make_pass_tag_collision_avoidance (gcc::context *);
 rtl_opt_pass *make_pass_insert_bti (gcc::context *ctxt);
 rtl_opt_pass *make_pass_cc_fusion (gcc::context *ctxt);
 rtl_opt_pass *make_pass_switch_pstate_sm (gcc::context *ctxt);
@@ -1124,5 +1268,15 @@ extern void aarch64_adjust_reg_alloc_order ();
 
 bool aarch64_optimize_mode_switching (aarch64_mode_entity);
 void aarch64_restore_za (rtx);
+void aarch64_expand_crc_using_pmull (scalar_mode, scalar_mode, rtx *);
+void aarch64_expand_reversed_crc_using_pmull (scalar_mode, scalar_mode, rtx *);
+
+void aarch64_expand_fp_spaceship (rtx, rtx, rtx, rtx);
+
+extern bool aarch64_pacret_enabled ();
+extern bool aarch64_gcs_enabled ();
+
+extern unsigned aarch64_data_alignment (const_tree exp, unsigned align);
+extern unsigned aarch64_stack_alignment (const_tree exp, unsigned align);
 
 #endif /* GCC_AARCH64_PROTOS_H */
diff --git a/gcc/config/aarch64/aarch64-simd-builtin-types.def b/gcc/config/aarch64/aarch64-simd-builtin-types.def
index 6111cd0..ec86657 100644
--- a/gcc/config/aarch64/aarch64-simd-builtin-types.def
+++ b/gcc/config/aarch64/aarch64-simd-builtin-types.def
@@ -1,5 +1,5 @@
 /* Builtin AdvSIMD types.
-   Copyright (C) 2014-2024 Free Software Foundation, Inc.
+   Copyright (C) 2014-2025 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
@@ -52,3 +52,5 @@
   ENTRY (Float64x2_t, V2DF, none, 13)
   ENTRY (Bfloat16x4_t, V4BF, none, 14)
   ENTRY (Bfloat16x8_t, V8BF, none, 14)
+  ENTRY (Mfloat8x8_t, V8QI, modal_float, 13)
+  ENTRY (Mfloat8x16_t, V16QI, modal_float, 14)
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index e65f73d..685bf0d 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -1,5 +1,5 @@
 /* Machine description for AArch64 architecture.
-   Copyright (C) 2012-2024 Free Software Foundation, Inc.
+   Copyright (C) 2012-2025 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
@@ -50,34 +50,34 @@
   BUILTIN_V12DI (STORESTRUCT_LANE_U, vec_stl1_lane, 0, ALL)
   BUILTIN_V12DI (STORESTRUCT_LANE_P, vec_stl1_lane, 0, ALL)
 
-  BUILTIN_VDC (BINOP, combine, 0, AUTO_FP)
-  BUILTIN_VD_I (BINOPU, combine, 0, NONE)
-  BUILTIN_VDC_P (BINOPP, combine, 0, NONE)
-  BUILTIN_VB (BINOPP, pmul, 0, NONE)
-  VAR1 (BINOPP, pmull, 0, NONE, v8qi)
-  VAR1 (BINOPP, pmull_hi, 0, NONE, v16qi)
+  BUILTIN_VDC (BINOP, combine, 0, QUIET)
+  BUILTIN_VD_I (BINOPU, combine, 0, DEFAULT)
+  BUILTIN_VDC_P (BINOPP, combine, 0, DEFAULT)
+  BUILTIN_VB (BINOPP, pmul, 0, DEFAULT)
+  VAR1 (BINOPP, pmull, 0, DEFAULT, v8qi)
+  VAR1 (BINOPP, pmull_hi, 0, DEFAULT, v16qi)
   BUILTIN_VHSDF_HSDF (BINOP, fmulx, 0, FP)
-  BUILTIN_VHSDF_DF (UNOP, sqrt, 2, FP)
-  BUILTIN_VDQ_I (BINOP, addp, 0, NONE)
-  BUILTIN_VDQ_I (BINOPU, addp, 0, NONE)
-  BUILTIN_VDQ_BHSI (UNOP, clrsb, 2, NONE)
-  BUILTIN_VDQ_BHSI (UNOP, clz, 2, NONE)
-  BUILTIN_VS (UNOP, ctz, 2, NONE)
-  BUILTIN_VB (UNOP, popcount, 2, NONE)
+  BUILTIN_VHSDF_HSDF (UNOP, sqrt, 2, FP)
+  BUILTIN_VDQ_I (BINOP, addp, 0, DEFAULT)
+  BUILTIN_VDQ_I (BINOPU, addp, 0, DEFAULT)
+  BUILTIN_VDQ_BHSI (UNOP, clrsb, 2, DEFAULT)
+  BUILTIN_VDQ_BHSI (UNOP, clz, 2, DEFAULT)
+  BUILTIN_VS (UNOP, ctz, 2, DEFAULT)
+  BUILTIN_VB (UNOP, popcount, 2, DEFAULT)
 
   /* Implemented by aarch64_<sur>q<r>shl<mode>.  */
-  BUILTIN_VSDQ_I (BINOP, sqshl, 0, NONE)
-  BUILTIN_VSDQ_I (BINOP_UUS, uqshl, 0, NONE)
-  BUILTIN_VSDQ_I (BINOP, sqrshl, 0, NONE)
-  BUILTIN_VSDQ_I (BINOP_UUS, uqrshl, 0, NONE)
+  BUILTIN_VSDQ_I (BINOP, sqshl, 0, DEFAULT)
+  BUILTIN_VSDQ_I (BINOP_UUS, uqshl, 0, DEFAULT)
+  BUILTIN_VSDQ_I (BINOP, sqrshl, 0, DEFAULT)
+  BUILTIN_VSDQ_I (BINOP_UUS, uqrshl, 0, DEFAULT)
   /* Implemented by aarch64_<su_optab><optab><mode>.  */
-  BUILTIN_VSDQ_I (BINOP, sqadd, 0, NONE)
-  BUILTIN_VSDQ_I (BINOPU, uqadd, 0, NONE)
-  BUILTIN_VSDQ_I (BINOP, sqsub, 0, NONE)
-  BUILTIN_VSDQ_I (BINOPU, uqsub, 0, NONE)
+  BUILTIN_VSDQ_I (BINOP, ssadd, 3, DEFAULT)
+  BUILTIN_VSDQ_I (BINOPU, usadd, 3, DEFAULT)
+  BUILTIN_VSDQ_I (BINOP, sssub, 3, DEFAULT)
+  BUILTIN_VSDQ_I (BINOPU, ussub, 3, DEFAULT)
   /* Implemented by aarch64_<sur>qadd<mode>.  */
-  BUILTIN_VSDQ_I (BINOP_SSU, suqadd, 0, NONE)
-  BUILTIN_VSDQ_I (BINOP_UUS, usqadd, 0, NONE)
+  BUILTIN_VSDQ_I (BINOP_SSU, suqadd, 0, DEFAULT)
+  BUILTIN_VSDQ_I (BINOP_UUS, usqadd, 0, DEFAULT)
 
   /* Implemented by aarch64_ld1x2<vstruct_elt>. */
   BUILTIN_VALLDIF (LOADSTRUCT, ld1x2, 0, LOAD)
@@ -160,276 +160,276 @@
   BUILTIN_VSDQ_I_DI (STORESTRUCT_LANE_U, st4_lane, 0, ALL)
   BUILTIN_VALLP (STORESTRUCT_LANE_P, st4_lane, 0, ALL)
 
-  BUILTIN_VQW (BINOP, saddl2, 0, NONE)
-  BUILTIN_VQW (BINOPU, uaddl2, 0, NONE)
-  BUILTIN_VQW (BINOP, ssubl2, 0, NONE)
-  BUILTIN_VQW (BINOPU, usubl2, 0, NONE)
-  BUILTIN_VQW (BINOP, saddw2, 0, NONE)
-  BUILTIN_VQW (BINOPU, uaddw2, 0, NONE)
-  BUILTIN_VQW (BINOP, ssubw2, 0, NONE)
-  BUILTIN_VQW (BINOPU, usubw2, 0, NONE)
+  BUILTIN_VQW (BINOP, saddl2, 0, DEFAULT)
+  BUILTIN_VQW (BINOPU, uaddl2, 0, DEFAULT)
+  BUILTIN_VQW (BINOP, ssubl2, 0, DEFAULT)
+  BUILTIN_VQW (BINOPU, usubl2, 0, DEFAULT)
+  BUILTIN_VQW (BINOP, saddw2, 0, DEFAULT)
+  BUILTIN_VQW (BINOPU, uaddw2, 0, DEFAULT)
+  BUILTIN_VQW (BINOP, ssubw2, 0, DEFAULT)
+  BUILTIN_VQW (BINOPU, usubw2, 0, DEFAULT)
   /* Implemented by aarch64_<ANY_EXTEND:su><ADDSUB:optab>l<mode>.  */
-  BUILTIN_VD_BHSI (BINOP, saddl, 0, NONE)
-  BUILTIN_VD_BHSI (BINOPU, uaddl, 0, NONE)
-  BUILTIN_VD_BHSI (BINOP, ssubl, 0, NONE)
-  BUILTIN_VD_BHSI (BINOPU, usubl, 0, NONE)
+  BUILTIN_VD_BHSI (BINOP, saddl, 0, DEFAULT)
+  BUILTIN_VD_BHSI (BINOPU, uaddl, 0, DEFAULT)
+  BUILTIN_VD_BHSI (BINOP, ssubl, 0, DEFAULT)
+  BUILTIN_VD_BHSI (BINOPU, usubl, 0, DEFAULT)
   /* Implemented by aarch64_<ANY_EXTEND:su><ADDSUB:optab>w<mode>.  */
-  BUILTIN_VD_BHSI (BINOP, saddw, 0, NONE)
-  BUILTIN_VD_BHSI (BINOPU, uaddw, 0, NONE)
-  BUILTIN_VD_BHSI (BINOP, ssubw, 0, NONE)
-  BUILTIN_VD_BHSI (BINOPU, usubw, 0, NONE)
+  BUILTIN_VD_BHSI (BINOP, saddw, 0, DEFAULT)
+  BUILTIN_VD_BHSI (BINOPU, uaddw, 0, DEFAULT)
+  BUILTIN_VD_BHSI (BINOP, ssubw, 0, DEFAULT)
+  BUILTIN_VD_BHSI (BINOPU, usubw, 0, DEFAULT)
   /* Implemented by aarch64_<sur>h<addsub><mode>.  */
-  BUILTIN_VDQ_BHSI (BINOP, shadd, 0, NONE)
-  BUILTIN_VDQ_BHSI (BINOP, shsub, 0, NONE)
-  BUILTIN_VDQ_BHSI (BINOPU, uhadd, 0, NONE)
-  BUILTIN_VDQ_BHSI (BINOPU, uhsub, 0, NONE)
-  BUILTIN_VDQ_BHSI (BINOP, srhadd, 0, NONE)
-  BUILTIN_VDQ_BHSI (BINOPU, urhadd, 0, NONE)
+  BUILTIN_VDQ_BHSI (BINOP, shadd, 0, DEFAULT)
+  BUILTIN_VDQ_BHSI (BINOP, shsub, 0, DEFAULT)
+  BUILTIN_VDQ_BHSI (BINOPU, uhadd, 0, DEFAULT)
+  BUILTIN_VDQ_BHSI (BINOPU, uhsub, 0, DEFAULT)
+  BUILTIN_VDQ_BHSI (BINOP, srhadd, 0, DEFAULT)
+  BUILTIN_VDQ_BHSI (BINOPU, urhadd, 0, DEFAULT)
 
   /* Implemented by aarch64_<su>addlp<mode>.  */
-  BUILTIN_VDQV_L (UNOP, saddlp, 0, NONE)
-  BUILTIN_VDQV_L (UNOPU, uaddlp, 0, NONE)
+  BUILTIN_VDQV_L (UNOP, saddlp, 0, DEFAULT)
+  BUILTIN_VDQV_L (UNOPU, uaddlp, 0, DEFAULT)
 
   /* Implemented by aarch64_<su>addlv<mode>.  */
-  BUILTIN_VDQV_L (UNOP, saddlv, 0, NONE)
-  BUILTIN_VDQV_L (UNOPU, uaddlv, 0, NONE)
+  BUILTIN_VDQV_L (UNOP, saddlv, 0, DEFAULT)
+  BUILTIN_VDQV_L (UNOPU, uaddlv, 0, DEFAULT)
 
   /* Implemented by aarch64_<su>abd<mode>.  */
-  BUILTIN_VDQ_BHSI (BINOP, sabd, 0, NONE)
-  BUILTIN_VDQ_BHSI (BINOPU, uabd, 0, NONE)
+  BUILTIN_VDQ_BHSI (BINOP, sabd, 0, DEFAULT)
+  BUILTIN_VDQ_BHSI (BINOPU, uabd, 0, DEFAULT)
 
   /* Implemented by aarch64_<su>aba<mode>.  */
-  BUILTIN_VDQ_BHSI (TERNOP, saba, 0, NONE)
-  BUILTIN_VDQ_BHSI (TERNOPU, uaba, 0, NONE)
+  BUILTIN_VDQ_BHSI (TERNOP, saba, 0, DEFAULT)
+  BUILTIN_VDQ_BHSI (TERNOPU, uaba, 0, DEFAULT)
 
-  BUILTIN_VDQV_L (BINOP, sadalp, 0, NONE)
-  BUILTIN_VDQV_L (BINOPU, uadalp, 0, NONE)
+  BUILTIN_VDQV_L (BINOP, sadalp, 0, DEFAULT)
+  BUILTIN_VDQV_L (BINOPU, uadalp, 0, DEFAULT)
 
   /* Implemented by aarch64_<sur>abal<mode>.  */
-  BUILTIN_VD_BHSI (TERNOP, sabal, 0, NONE)
-  BUILTIN_VD_BHSI (TERNOPU, uabal, 0, NONE)
+  BUILTIN_VD_BHSI (TERNOP, sabal, 0, DEFAULT)
+  BUILTIN_VD_BHSI (TERNOPU, uabal, 0, DEFAULT)
 
   /* Implemented by aarch64_<sur>abal2<mode>.  */
-  BUILTIN_VQW (TERNOP, sabal2, 0, NONE)
-  BUILTIN_VQW (TERNOPU, uabal2, 0, NONE)
+  BUILTIN_VQW (TERNOP, sabal2, 0, DEFAULT)
+  BUILTIN_VQW (TERNOPU, uabal2, 0, DEFAULT)
 
   /* Implemented by aarch64_<sur>abdl<mode>.  */
-  BUILTIN_VD_BHSI (BINOP, sabdl, 0, NONE)
-  BUILTIN_VD_BHSI (BINOPU, uabdl, 0, NONE)
+  BUILTIN_VD_BHSI (BINOP, sabdl, 0, DEFAULT)
+  BUILTIN_VD_BHSI (BINOPU, uabdl, 0, DEFAULT)
 
   /* Implemented by aarch64_<sur>abdl2<mode>.  */
-  BUILTIN_VQW (BINOP, sabdl2, 0, NONE)
-  BUILTIN_VQW (BINOPU, uabdl2, 0, NONE)
+  BUILTIN_VQW (BINOP, sabdl2, 0, DEFAULT)
+  BUILTIN_VQW (BINOPU, uabdl2, 0, DEFAULT)
 
   /* Implemented by aarch64_<sur><addsub>hn<mode>.  */
-  BUILTIN_VQN (BINOP, addhn, 0, NONE)
-  BUILTIN_VQN (BINOPU, addhn, 0, NONE)
-  BUILTIN_VQN (BINOP, subhn, 0, NONE)
-  BUILTIN_VQN (BINOPU, subhn, 0, NONE)
-  BUILTIN_VQN (BINOP, raddhn, 0, NONE)
-  BUILTIN_VQN (BINOPU, raddhn, 0, NONE)
-  BUILTIN_VQN (BINOP, rsubhn, 0, NONE)
-  BUILTIN_VQN (BINOPU, rsubhn, 0, NONE)
+  BUILTIN_VQN (BINOP, addhn, 0, DEFAULT)
+  BUILTIN_VQN (BINOPU, addhn, 0, DEFAULT)
+  BUILTIN_VQN (BINOP, subhn, 0, DEFAULT)
+  BUILTIN_VQN (BINOPU, subhn, 0, DEFAULT)
+  BUILTIN_VQN (BINOP, raddhn, 0, DEFAULT)
+  BUILTIN_VQN (BINOPU, raddhn, 0, DEFAULT)
+  BUILTIN_VQN (BINOP, rsubhn, 0, DEFAULT)
+  BUILTIN_VQN (BINOPU, rsubhn, 0, DEFAULT)
   /* Implemented by aarch64_<sur><addsub>hn2<mode>.  */
-  BUILTIN_VQN (TERNOP, addhn2, 0, NONE)
-  BUILTIN_VQN (TERNOPU, addhn2, 0, NONE)
-  BUILTIN_VQN (TERNOP, subhn2, 0, NONE)
-  BUILTIN_VQN (TERNOPU, subhn2, 0, NONE)
-  BUILTIN_VQN (TERNOP, raddhn2, 0, NONE)
-  BUILTIN_VQN (TERNOPU, raddhn2, 0, NONE)
-  BUILTIN_VQN (TERNOP, rsubhn2, 0, NONE)
-  BUILTIN_VQN (TERNOPU, rsubhn2, 0, NONE)
+  BUILTIN_VQN (TERNOP, addhn2, 0, DEFAULT)
+  BUILTIN_VQN (TERNOPU, addhn2, 0, DEFAULT)
+  BUILTIN_VQN (TERNOP, subhn2, 0, DEFAULT)
+  BUILTIN_VQN (TERNOPU, subhn2, 0, DEFAULT)
+  BUILTIN_VQN (TERNOP, raddhn2, 0, DEFAULT)
+  BUILTIN_VQN (TERNOPU, raddhn2, 0, DEFAULT)
+  BUILTIN_VQN (TERNOP, rsubhn2, 0, DEFAULT)
+  BUILTIN_VQN (TERNOPU, rsubhn2, 0, DEFAULT)
 
   /* Implemented by aarch64_<us>xtl<mode>.  */
-  BUILTIN_VQN (UNOP, sxtl, 0, NONE)
-  BUILTIN_VQN (UNOPU, uxtl, 0, NONE)
+  BUILTIN_VQN (UNOP, sxtl, 0, DEFAULT)
+  BUILTIN_VQN (UNOPU, uxtl, 0, DEFAULT)
 
   /* Implemented by aarch64_xtn<mode>.  */
-  BUILTIN_VQN (UNOP, xtn, 0, NONE)
-  BUILTIN_VQN (UNOPU, xtn, 0, NONE)
+  BUILTIN_VQN (UNOP, xtn, 0, DEFAULT)
+  BUILTIN_VQN (UNOPU, xtn, 0, DEFAULT)
 
   /* Implemented by aarch64_mla<mode>.  */
-  BUILTIN_VDQ_BHSI (TERNOP, mla, 0, NONE)
-  BUILTIN_VDQ_BHSI (TERNOPU, mla, 0, NONE)
+  BUILTIN_VDQ_BHSI (TERNOP, mla, 0, DEFAULT)
+  BUILTIN_VDQ_BHSI (TERNOPU, mla, 0, DEFAULT)
   /* Implemented by aarch64_mla_n<mode>.  */
-  BUILTIN_VDQHS (TERNOP, mla_n, 0, NONE)
-  BUILTIN_VDQHS (TERNOPU, mla_n, 0, NONE)
+  BUILTIN_VDQHS (TERNOP, mla_n, 0, DEFAULT)
+  BUILTIN_VDQHS (TERNOPU, mla_n, 0, DEFAULT)
 
   /* Implemented by aarch64_mls<mode>.  */
-  BUILTIN_VDQ_BHSI (TERNOP, mls, 0, NONE)
-  BUILTIN_VDQ_BHSI (TERNOPU, mls, 0, NONE)
+  BUILTIN_VDQ_BHSI (TERNOP, mls, 0, DEFAULT)
+  BUILTIN_VDQ_BHSI (TERNOPU, mls, 0, DEFAULT)
   /* Implemented by aarch64_mls_n<mode>.  */
-  BUILTIN_VDQHS (TERNOP, mls_n, 0, NONE)
-  BUILTIN_VDQHS (TERNOPU, mls_n, 0, NONE)
+  BUILTIN_VDQHS (TERNOP, mls_n, 0, DEFAULT)
+  BUILTIN_VDQHS (TERNOPU, mls_n, 0, DEFAULT)
 
-  BUILTIN_VQN (SHIFTIMM, shrn_n, 0, NONE)
-  BUILTIN_VQN (USHIFTIMM, shrn_n, 0, NONE)
+  BUILTIN_VQN (SHIFTIMM, shrn_n, 0, DEFAULT)
+  BUILTIN_VQN (USHIFTIMM, shrn_n, 0, DEFAULT)
 
-  BUILTIN_VQN (SHIFT2IMM, ushrn2_n, 0, NONE)
-  BUILTIN_VQN (USHIFT2IMM, ushrn2_n, 0, NONE)
+  BUILTIN_VQN (SHIFT2IMM, ushrn2_n, 0, DEFAULT)
+  BUILTIN_VQN (USHIFT2IMM, ushrn2_n, 0, DEFAULT)
 
-  BUILTIN_VQN (SHIFTIMM, rshrn_n, 0, NONE)
-  BUILTIN_VQN (USHIFTIMM, rshrn_n, 0, NONE)
+  BUILTIN_VQN (SHIFTIMM, rshrn_n, 0, DEFAULT)
+  BUILTIN_VQN (USHIFTIMM, rshrn_n, 0, DEFAULT)
 
-  BUILTIN_VQN (SHIFT2IMM, rshrn2_n, 0, NONE)
-  BUILTIN_VQN (USHIFT2IMM, rshrn2_n, 0, NONE)
+  BUILTIN_VQN (SHIFT2IMM, rshrn2_n, 0, DEFAULT)
+  BUILTIN_VQN (USHIFT2IMM, rshrn2_n, 0, DEFAULT)
 
   /* Implemented by aarch64_<su>mlsl<mode>.  */
-  BUILTIN_VD_BHSI (TERNOP, smlsl, 0, NONE)
-  BUILTIN_VD_BHSI (TERNOPU, umlsl, 0, NONE)
+  BUILTIN_VD_BHSI (TERNOP, smlsl, 0, DEFAULT)
+  BUILTIN_VD_BHSI (TERNOPU, umlsl, 0, DEFAULT)
 
   /* Implemented by aarch64_<su>mlsl_n<mode>.  */
-  BUILTIN_VD_HSI (TERNOP, smlsl_n, 0, NONE)
-  BUILTIN_VD_HSI (TERNOPU, umlsl_n, 0, NONE)
+  BUILTIN_VD_HSI (TERNOP, smlsl_n, 0, DEFAULT)
+  BUILTIN_VD_HSI (TERNOPU, umlsl_n, 0, DEFAULT)
 
   /* Implemented by aarch64_<su>mlal<mode>.  */
-  BUILTIN_VD_BHSI (TERNOP, smlal, 0, NONE)
-  BUILTIN_VD_BHSI (TERNOPU, umlal, 0, NONE)
+  BUILTIN_VD_BHSI (TERNOP, smlal, 0, DEFAULT)
+  BUILTIN_VD_BHSI (TERNOPU, umlal, 0, DEFAULT)
 
   /* Implemented by aarch64_<su>mlal_n<mode>.  */
-  BUILTIN_VD_HSI (TERNOP, smlal_n, 0, NONE)
-  BUILTIN_VD_HSI (TERNOPU, umlal_n, 0, NONE)
+  BUILTIN_VD_HSI (TERNOP, smlal_n, 0, DEFAULT)
+  BUILTIN_VD_HSI (TERNOPU, umlal_n, 0, DEFAULT)
 
   /* Implemented by aarch64_<su>mlsl_hi<mode>.  */
-  BUILTIN_VQW (TERNOP, smlsl_hi, 0, NONE)
-  BUILTIN_VQW (TERNOPU, umlsl_hi, 0, NONE)
+  BUILTIN_VQW (TERNOP, smlsl_hi, 0, DEFAULT)
+  BUILTIN_VQW (TERNOPU, umlsl_hi, 0, DEFAULT)
 
   /* Implemented by aarch64_<su>mlsl_hi_n<mode>.  */
-  BUILTIN_VQ_HSI (TERNOP, smlsl_hi_n, 0, NONE)
-  BUILTIN_VQ_HSI (TERNOPU, umlsl_hi_n, 0, NONE)
+  BUILTIN_VQ_HSI (TERNOP, smlsl_hi_n, 0, DEFAULT)
+  BUILTIN_VQ_HSI (TERNOPU, umlsl_hi_n, 0, DEFAULT)
 
   /* Implemented by aarch64_<su>mlal_hi<mode>.  */
-  BUILTIN_VQW (TERNOP, smlal_hi, 0, NONE)
-  BUILTIN_VQW (TERNOPU, umlal_hi, 0, NONE)
+  BUILTIN_VQW (TERNOP, smlal_hi, 0, DEFAULT)
+  BUILTIN_VQW (TERNOPU, umlal_hi, 0, DEFAULT)
 
   /* Implemented by aarch64_<su>mlal_hi_n<mode>.  */
-  BUILTIN_VQ_HSI (TERNOP, smlal_hi_n, 0, NONE)
-  BUILTIN_VQ_HSI (TERNOPU, umlal_hi_n, 0, NONE)
+  BUILTIN_VQ_HSI (TERNOP, smlal_hi_n, 0, DEFAULT)
+  BUILTIN_VQ_HSI (TERNOPU, umlal_hi_n, 0, DEFAULT)
 
   /* Implemented by aarch64_sqmovun<mode>.  */
-  BUILTIN_VQN (UNOPUS, sqmovun, 0, NONE)
-  BUILTIN_SD_HSDI (UNOPUS, sqmovun, 0, NONE)
+  BUILTIN_VQN (UNOPUS, sqmovun, 0, DEFAULT)
+  BUILTIN_SD_HSDI (UNOPUS, sqmovun, 0, DEFAULT)
 
   /* Implemented by aarch64_sqxtun2<mode>.  */
-  BUILTIN_VQN (BINOP_UUS, sqxtun2, 0, NONE)
+  BUILTIN_VQN (BINOP_UUS, sqxtun2, 0, DEFAULT)
 
   /* Implemented by aarch64_<su>qmovn<mode>.  */
-  BUILTIN_VQN (UNOP, sqmovn, 0, NONE)
-  BUILTIN_SD_HSDI (UNOP, sqmovn, 0, NONE)
-  BUILTIN_VQN (UNOP, uqmovn, 0, NONE)
-  BUILTIN_SD_HSDI (UNOP, uqmovn, 0, NONE)
+  BUILTIN_VQN (UNOP, sqmovn, 0, DEFAULT)
+  BUILTIN_SD_HSDI (UNOP, sqmovn, 0, DEFAULT)
+  BUILTIN_VQN (UNOP, uqmovn, 0, DEFAULT)
+  BUILTIN_SD_HSDI (UNOP, uqmovn, 0, DEFAULT)
 
   /* Implemented by aarch64_<su>qxtn2<mode>.  */
-  BUILTIN_VQN (BINOP, sqxtn2, 0, NONE)
-  BUILTIN_VQN (BINOPU, uqxtn2, 0, NONE)
+  BUILTIN_VQN (BINOP, sqxtn2, 0, DEFAULT)
+  BUILTIN_VQN (BINOPU, uqxtn2, 0, DEFAULT)
 
   /* Implemented by aarch64_s<optab><mode>.  */
-  BUILTIN_VSDQ_I (UNOP, sqabs, 0, NONE)
-  BUILTIN_VSDQ_I (UNOP, sqneg, 0, NONE)
+  BUILTIN_VSDQ_I (UNOP, sqabs, 0, DEFAULT)
+  BUILTIN_VSDQ_I (UNOP, sqneg, 0, DEFAULT)
 
   /* Implemented by aarch64_sqdml<SBINQOPS:as>l<mode>.  */
-  BUILTIN_VSD_HSI (TERNOP, sqdmlal, 0, NONE)
-  BUILTIN_VSD_HSI (TERNOP, sqdmlsl, 0, NONE)
+  BUILTIN_VSD_HSI (TERNOP, sqdmlal, 0, DEFAULT)
+  BUILTIN_VSD_HSI (TERNOP, sqdmlsl, 0, DEFAULT)
   /* Implemented by aarch64_sqdml<SBINQOPS:as>l_lane<mode>.  */
-  BUILTIN_VSD_HSI (QUADOP_LANE, sqdmlal_lane, 0, NONE)
-  BUILTIN_VSD_HSI (QUADOP_LANE, sqdmlsl_lane, 0, NONE)
+  BUILTIN_VSD_HSI (QUADOP_LANE, sqdmlal_lane, 0, DEFAULT)
+  BUILTIN_VSD_HSI (QUADOP_LANE, sqdmlsl_lane, 0, DEFAULT)
   /* Implemented by aarch64_sqdml<SBINQOPS:as>l_laneq<mode>.  */
-  BUILTIN_VSD_HSI (QUADOP_LANE, sqdmlal_laneq, 0, NONE)
-  BUILTIN_VSD_HSI (QUADOP_LANE, sqdmlsl_laneq, 0, NONE)
+  BUILTIN_VSD_HSI (QUADOP_LANE, sqdmlal_laneq, 0, DEFAULT)
+  BUILTIN_VSD_HSI (QUADOP_LANE, sqdmlsl_laneq, 0, DEFAULT)
   /* Implemented by aarch64_sqdml<SBINQOPS:as>l_n<mode>.  */
-  BUILTIN_VD_HSI (TERNOP, sqdmlal_n, 0, NONE)
-  BUILTIN_VD_HSI (TERNOP, sqdmlsl_n, 0, NONE)
-
-  BUILTIN_VQ_HSI (TERNOP, sqdmlal2, 0, NONE)
-  BUILTIN_VQ_HSI (TERNOP, sqdmlsl2, 0, NONE)
-  BUILTIN_VQ_HSI (QUADOP_LANE, sqdmlal2_lane, 0, NONE)
-  BUILTIN_VQ_HSI (QUADOP_LANE, sqdmlsl2_lane, 0, NONE)
-  BUILTIN_VQ_HSI (QUADOP_LANE, sqdmlal2_laneq, 0, NONE)
-  BUILTIN_VQ_HSI (QUADOP_LANE, sqdmlsl2_laneq, 0, NONE)
-  BUILTIN_VQ_HSI (TERNOP, sqdmlal2_n, 0, NONE)
-  BUILTIN_VQ_HSI (TERNOP, sqdmlsl2_n, 0, NONE)
-
-  BUILTIN_VD_BHSI (BINOP, intrinsic_vec_smult_lo_, 0, NONE)
-  BUILTIN_VD_BHSI (BINOPU, intrinsic_vec_umult_lo_, 0, NONE)
-
-  BUILTIN_VQW (BINOP, vec_widen_smult_hi_, 10, NONE)
-  BUILTIN_VQW (BINOPU, vec_widen_umult_hi_, 10, NONE)
-
-  BUILTIN_VD_HSI (BINOP, smull_n, 0, NONE)
-  BUILTIN_VD_HSI (BINOPU, umull_n, 0, NONE)
-
-  BUILTIN_VQ_HSI (BINOP, smull_hi_n, 0, NONE)
-  BUILTIN_VQ_HSI (BINOPU, umull_hi_n, 0, NONE)
-
-  BUILTIN_VQ_HSI (TERNOP_LANE, smull_hi_lane, 0, NONE)
-  BUILTIN_VQ_HSI (TERNOP_LANE, smull_hi_laneq, 0, NONE)
-  BUILTIN_VQ_HSI (TERNOPU_LANE, umull_hi_lane, 0, NONE)
-  BUILTIN_VQ_HSI (TERNOPU_LANE, umull_hi_laneq, 0, NONE)
-
-  BUILTIN_VD_HSI (TERNOP_LANE, vec_smult_lane_, 0, NONE)
-  BUILTIN_VD_HSI (QUADOP_LANE, vec_smlal_lane_, 0, NONE)
-  BUILTIN_VD_HSI (TERNOP_LANE, vec_smult_laneq_, 0, NONE)
-  BUILTIN_VD_HSI (QUADOP_LANE, vec_smlal_laneq_, 0, NONE)
-  BUILTIN_VD_HSI (TERNOPU_LANE, vec_umult_lane_, 0, NONE)
-  BUILTIN_VD_HSI (QUADOPU_LANE, vec_umlal_lane_, 0, NONE)
-  BUILTIN_VD_HSI (TERNOPU_LANE, vec_umult_laneq_, 0, NONE)
-  BUILTIN_VD_HSI (QUADOPU_LANE, vec_umlal_laneq_, 0, NONE)
-
-  BUILTIN_VD_HSI (QUADOP_LANE, vec_smlsl_lane_, 0, NONE)
-  BUILTIN_VD_HSI (QUADOP_LANE, vec_smlsl_laneq_, 0, NONE)
-  BUILTIN_VD_HSI (QUADOPU_LANE, vec_umlsl_lane_, 0, NONE)
-  BUILTIN_VD_HSI (QUADOPU_LANE, vec_umlsl_laneq_, 0, NONE)
-
-  BUILTIN_VQ_HSI (QUADOP_LANE, smlal_hi_lane, 0, NONE)
-  BUILTIN_VQ_HSI (QUADOP_LANE, smlal_hi_laneq, 0, NONE)
-  BUILTIN_VQ_HSI (QUADOPU_LANE, umlal_hi_lane, 0, NONE)
-  BUILTIN_VQ_HSI (QUADOPU_LANE, umlal_hi_laneq, 0, NONE)
-
-  BUILTIN_VQ_HSI (QUADOP_LANE, smlsl_hi_lane, 0, NONE)
-  BUILTIN_VQ_HSI (QUADOP_LANE, smlsl_hi_laneq, 0, NONE)
-  BUILTIN_VQ_HSI (QUADOPU_LANE, umlsl_hi_lane, 0, NONE)
-  BUILTIN_VQ_HSI (QUADOPU_LANE, umlsl_hi_laneq, 0, NONE)
-
-  BUILTIN_VSD_HSI (BINOP, sqdmull, 0, NONE)
-  BUILTIN_VSD_HSI (TERNOP_LANE, sqdmull_lane, 0, NONE)
-  BUILTIN_VSD_HSI (TERNOP_LANE, sqdmull_laneq, 0, NONE)
-  BUILTIN_VD_HSI (BINOP, sqdmull_n, 0, NONE)
-  BUILTIN_VQ_HSI (BINOP, sqdmull2, 0, NONE)
-  BUILTIN_VQ_HSI (TERNOP_LANE, sqdmull2_lane, 0, NONE)
-  BUILTIN_VQ_HSI (TERNOP_LANE, sqdmull2_laneq, 0, NONE)
-  BUILTIN_VQ_HSI (BINOP, sqdmull2_n, 0, NONE)
+  BUILTIN_VD_HSI (TERNOP, sqdmlal_n, 0, DEFAULT)
+  BUILTIN_VD_HSI (TERNOP, sqdmlsl_n, 0, DEFAULT)
+
+  BUILTIN_VQ_HSI (TERNOP, sqdmlal2, 0, DEFAULT)
+  BUILTIN_VQ_HSI (TERNOP, sqdmlsl2, 0, DEFAULT)
+  BUILTIN_VQ_HSI (QUADOP_LANE, sqdmlal2_lane, 0, DEFAULT)
+  BUILTIN_VQ_HSI (QUADOP_LANE, sqdmlsl2_lane, 0, DEFAULT)
+  BUILTIN_VQ_HSI (QUADOP_LANE, sqdmlal2_laneq, 0, DEFAULT)
+  BUILTIN_VQ_HSI (QUADOP_LANE, sqdmlsl2_laneq, 0, DEFAULT)
+  BUILTIN_VQ_HSI (TERNOP, sqdmlal2_n, 0, DEFAULT)
+  BUILTIN_VQ_HSI (TERNOP, sqdmlsl2_n, 0, DEFAULT)
+
+  BUILTIN_VD_BHSI (BINOP, intrinsic_vec_smult_lo_, 0, DEFAULT)
+  BUILTIN_VD_BHSI (BINOPU, intrinsic_vec_umult_lo_, 0, DEFAULT)
+
+  BUILTIN_VQW (BINOP, vec_widen_smult_hi_, 10, DEFAULT)
+  BUILTIN_VQW (BINOPU, vec_widen_umult_hi_, 10, DEFAULT)
+
+  BUILTIN_VD_HSI (BINOP, smull_n, 0, DEFAULT)
+  BUILTIN_VD_HSI (BINOPU, umull_n, 0, DEFAULT)
+
+  BUILTIN_VQ_HSI (BINOP, smull_hi_n, 0, DEFAULT)
+  BUILTIN_VQ_HSI (BINOPU, umull_hi_n, 0, DEFAULT)
+
+  BUILTIN_VQ_HSI (TERNOP_LANE, smull_hi_lane, 0, DEFAULT)
+  BUILTIN_VQ_HSI (TERNOP_LANE, smull_hi_laneq, 0, DEFAULT)
+  BUILTIN_VQ_HSI (TERNOPU_LANE, umull_hi_lane, 0, DEFAULT)
+  BUILTIN_VQ_HSI (TERNOPU_LANE, umull_hi_laneq, 0, DEFAULT)
+
+  BUILTIN_VD_HSI (TERNOP_LANE, vec_smult_lane_, 0, DEFAULT)
+  BUILTIN_VD_HSI (QUADOP_LANE, vec_smlal_lane_, 0, DEFAULT)
+  BUILTIN_VD_HSI (TERNOP_LANE, vec_smult_laneq_, 0, DEFAULT)
+  BUILTIN_VD_HSI (QUADOP_LANE, vec_smlal_laneq_, 0, DEFAULT)
+  BUILTIN_VD_HSI (TERNOPU_LANE, vec_umult_lane_, 0, DEFAULT)
+  BUILTIN_VD_HSI (QUADOPU_LANE, vec_umlal_lane_, 0, DEFAULT)
+  BUILTIN_VD_HSI (TERNOPU_LANE, vec_umult_laneq_, 0, DEFAULT)
+  BUILTIN_VD_HSI (QUADOPU_LANE, vec_umlal_laneq_, 0, DEFAULT)
+
+  BUILTIN_VD_HSI (QUADOP_LANE, vec_smlsl_lane_, 0, DEFAULT)
+  BUILTIN_VD_HSI (QUADOP_LANE, vec_smlsl_laneq_, 0, DEFAULT)
+  BUILTIN_VD_HSI (QUADOPU_LANE, vec_umlsl_lane_, 0, DEFAULT)
+  BUILTIN_VD_HSI (QUADOPU_LANE, vec_umlsl_laneq_, 0, DEFAULT)
+
+  BUILTIN_VQ_HSI (QUADOP_LANE, smlal_hi_lane, 0, DEFAULT)
+  BUILTIN_VQ_HSI (QUADOP_LANE, smlal_hi_laneq, 0, DEFAULT)
+  BUILTIN_VQ_HSI (QUADOPU_LANE, umlal_hi_lane, 0, DEFAULT)
+  BUILTIN_VQ_HSI (QUADOPU_LANE, umlal_hi_laneq, 0, DEFAULT)
+
+  BUILTIN_VQ_HSI (QUADOP_LANE, smlsl_hi_lane, 0, DEFAULT)
+  BUILTIN_VQ_HSI (QUADOP_LANE, smlsl_hi_laneq, 0, DEFAULT)
+  BUILTIN_VQ_HSI (QUADOPU_LANE, umlsl_hi_lane, 0, DEFAULT)
+  BUILTIN_VQ_HSI (QUADOPU_LANE, umlsl_hi_laneq, 0, DEFAULT)
+
+  BUILTIN_VSD_HSI (BINOP, sqdmull, 0, DEFAULT)
+  BUILTIN_VSD_HSI (TERNOP_LANE, sqdmull_lane, 0, DEFAULT)
+  BUILTIN_VSD_HSI (TERNOP_LANE, sqdmull_laneq, 0, DEFAULT)
+  BUILTIN_VD_HSI (BINOP, sqdmull_n, 0, DEFAULT)
+  BUILTIN_VQ_HSI (BINOP, sqdmull2, 0, DEFAULT)
+  BUILTIN_VQ_HSI (TERNOP_LANE, sqdmull2_lane, 0, DEFAULT)
+  BUILTIN_VQ_HSI (TERNOP_LANE, sqdmull2_laneq, 0, DEFAULT)
+  BUILTIN_VQ_HSI (BINOP, sqdmull2_n, 0, DEFAULT)
   /* Implemented by aarch64_sq<r>dmulh<mode>.  */
-  BUILTIN_VSDQ_HSI (BINOP, sqdmulh, 0, NONE)
-  BUILTIN_VSDQ_HSI (BINOP, sqrdmulh, 0, NONE)
+  BUILTIN_VSDQ_HSI (BINOP, sqdmulh, 0, DEFAULT)
+  BUILTIN_VSDQ_HSI (BINOP, sqrdmulh, 0, DEFAULT)
   /* Implemented by aarch64_sq<r>dmulh_n<mode>.  */
-  BUILTIN_VDQHS (BINOP, sqdmulh_n, 0, NONE)
-  BUILTIN_VDQHS (BINOP, sqrdmulh_n, 0, NONE)
+  BUILTIN_VDQHS (BINOP, sqdmulh_n, 0, DEFAULT)
+  BUILTIN_VDQHS (BINOP, sqrdmulh_n, 0, DEFAULT)
   /* Implemented by aarch64_sq<r>dmulh_lane<q><mode>.  */
-  BUILTIN_VSDQ_HSI (TERNOP_LANE, sqdmulh_lane, 0, NONE)
-  BUILTIN_VSDQ_HSI (TERNOP_LANE, sqdmulh_laneq, 0, NONE)
-  BUILTIN_VSDQ_HSI (TERNOP_LANE, sqrdmulh_lane, 0, NONE)
-  BUILTIN_VSDQ_HSI (TERNOP_LANE, sqrdmulh_laneq, 0, NONE)
+  BUILTIN_VSDQ_HSI (TERNOP_LANE, sqdmulh_lane, 0, DEFAULT)
+  BUILTIN_VSDQ_HSI (TERNOP_LANE, sqdmulh_laneq, 0, DEFAULT)
+  BUILTIN_VSDQ_HSI (TERNOP_LANE, sqrdmulh_lane, 0, DEFAULT)
+  BUILTIN_VSDQ_HSI (TERNOP_LANE, sqrdmulh_laneq, 0, DEFAULT)
 
-  BUILTIN_VSDQ_I_DI (BINOP, ashl, 3, NONE)
+  BUILTIN_VSDQ_I_DI (BINOP, ashl, 3, DEFAULT)
   /* Implemented by aarch64_<sur>shl<mode>.  */
-  BUILTIN_VSDQ_I_DI (BINOP, sshl, 0, NONE)
-  BUILTIN_VSDQ_I_DI (BINOP_UUS, ushl, 0, NONE)
-  BUILTIN_VSDQ_I_DI (BINOP, srshl, 0, NONE)
-  BUILTIN_VSDQ_I_DI (BINOP_UUS, urshl, 0, NONE)
+  BUILTIN_VSDQ_I_DI (BINOP, sshl, 0, DEFAULT)
+  BUILTIN_VSDQ_I_DI (BINOP_UUS, ushl, 0, DEFAULT)
+  BUILTIN_VSDQ_I_DI (BINOP, srshl, 0, DEFAULT)
+  BUILTIN_VSDQ_I_DI (BINOP_UUS, urshl, 0, DEFAULT)
 
   /* Implemented by <sur><dotprod>_prod<dot_mode>.  */
-  BUILTIN_VB (TERNOP, sdot_prod, 10, NONE)
-  BUILTIN_VB (TERNOPU, udot_prod, 10, NONE)
-  BUILTIN_VB (TERNOP_SUSS, usdot_prod, 10, NONE)
+  BUILTIN_VB (TERNOP, sdot_prod, 0, DEFAULT)
+  BUILTIN_VB (TERNOPU, udot_prod, 0, DEFAULT)
+  BUILTIN_VB (TERNOP_SUSS, usdot_prod, 0, DEFAULT)
   /* Implemented by aarch64_<sur><dotprod>_lane{q}<dot_mode>.  */
-  BUILTIN_VB (QUADOP_LANE, sdot_lane, 0, NONE)
-  BUILTIN_VB (QUADOPU_LANE, udot_lane, 0, NONE)
-  BUILTIN_VB (QUADOP_LANE, sdot_laneq, 0, NONE)
-  BUILTIN_VB (QUADOPU_LANE, udot_laneq, 0, NONE)
-  BUILTIN_VB (QUADOPSSUS_LANE_QUADTUP, usdot_lane, 0, NONE)
-  BUILTIN_VB (QUADOPSSUS_LANE_QUADTUP, usdot_laneq, 0, NONE)
-  BUILTIN_VB (QUADOPSSSU_LANE_QUADTUP, sudot_lane, 0, NONE)
-  BUILTIN_VB (QUADOPSSSU_LANE_QUADTUP, sudot_laneq, 0, NONE)
+  BUILTIN_VB (QUADOP_LANE, sdot_lane, 0, DEFAULT)
+  BUILTIN_VB (QUADOPU_LANE, udot_lane, 0, DEFAULT)
+  BUILTIN_VB (QUADOP_LANE, sdot_laneq, 0, DEFAULT)
+  BUILTIN_VB (QUADOPU_LANE, udot_laneq, 0, DEFAULT)
+  BUILTIN_VB (QUADOPSSUS_LANE_QUADTUP, usdot_lane, 0, DEFAULT)
+  BUILTIN_VB (QUADOPSSUS_LANE_QUADTUP, usdot_laneq, 0, DEFAULT)
+  BUILTIN_VB (QUADOPSSSU_LANE_QUADTUP, sudot_lane, 0, DEFAULT)
+  BUILTIN_VB (QUADOPSSSU_LANE_QUADTUP, sudot_laneq, 0, DEFAULT)
 
   /* Implemented by aarch64_fcadd<rot><mode>.   */
   BUILTIN_VHSDF (BINOP, fcadd90, 0, FP)
@@ -450,79 +450,79 @@
   BUILTIN_VQ_HSF (QUADOP_LANE_PAIR, fcmlaq_lane180, 0, FP)
   BUILTIN_VQ_HSF (QUADOP_LANE_PAIR, fcmlaq_lane270, 0, FP)
 
-  BUILTIN_VDQ_I (SHIFTIMM, ashr, 3, NONE)
-  VAR1 (SHIFTIMM, ashr_simd, 0, NONE, di)
-  BUILTIN_VDQ_I (USHIFTIMM, lshr, 3, NONE)
-  VAR1 (USHIFTIMM, lshr_simd, 0, NONE, di)
+  BUILTIN_VDQ_I (SHIFTIMM, ashr, 3, DEFAULT)
+  VAR1 (SHIFTIMM, ashr_simd, 0, DEFAULT, di)
+  BUILTIN_VDQ_I (USHIFTIMM, lshr, 3, DEFAULT)
+  VAR1 (USHIFTIMM, lshr_simd, 0, DEFAULT, di)
   /* Implemented by aarch64_<sur>shr_n<mode>.  */
-  BUILTIN_VSDQ_I_DI (SHIFTIMM, srshr_n, 0, NONE)
-  BUILTIN_VSDQ_I_DI (USHIFTIMM, urshr_n, 0, NONE)
+  BUILTIN_VSDQ_I_DI (SHIFTIMM, srshr_n, 0, DEFAULT)
+  BUILTIN_VSDQ_I_DI (USHIFTIMM, urshr_n, 0, DEFAULT)
   /* Implemented by aarch64_<sur>sra_n<mode>.  */
-  BUILTIN_VSDQ_I_DI (SHIFTACC, ssra_n, 0, NONE)
-  BUILTIN_VSDQ_I_DI (USHIFTACC, usra_n, 0, NONE)
-  BUILTIN_VSDQ_I_DI (SHIFTACC, srsra_n, 0, NONE)
-  BUILTIN_VSDQ_I_DI (USHIFTACC, ursra_n, 0, NONE)
+  BUILTIN_VSDQ_I_DI (SHIFTACC, ssra_n, 0, DEFAULT)
+  BUILTIN_VSDQ_I_DI (USHIFTACC, usra_n, 0, DEFAULT)
+  BUILTIN_VSDQ_I_DI (SHIFTACC, srsra_n, 0, DEFAULT)
+  BUILTIN_VSDQ_I_DI (USHIFTACC, ursra_n, 0, DEFAULT)
   /* Implemented by aarch64_<sur>shll_n<mode>.  */
-  BUILTIN_VD_BHSI (SHIFTIMM, sshll_n, 0, NONE)
-  BUILTIN_VD_BHSI (USHIFTIMM, ushll_n, 0, NONE)
+  BUILTIN_VD_BHSI (SHIFTIMM, sshll_n, 0, DEFAULT)
+  BUILTIN_VD_BHSI (USHIFTIMM, ushll_n, 0, DEFAULT)
   /* Implemented by aarch64_<sur>shll2_n<mode>.  */
-  BUILTIN_VQW (SHIFTIMM, sshll2_n, 0, NONE)
-  BUILTIN_VQW (SHIFTIMM, ushll2_n, 0, NONE)
-  BUILTIN_VQN (SHIFTIMM, sqshrun_n, 0, NONE)
-  BUILTIN_VQN (SHIFTIMM, sqrshrun_n, 0, NONE)
-  BUILTIN_VQN (SHIFTIMM, sqshrn_n, 0, NONE)
-  BUILTIN_VQN (USHIFTIMM, uqshrn_n, 0, NONE)
-  BUILTIN_VQN (SHIFTIMM, sqrshrn_n, 0, NONE)
-  BUILTIN_VQN (USHIFTIMM, uqrshrn_n, 0, NONE)
-  BUILTIN_SD_HSDI (SHIFTIMM, sqshrun_n, 0, NONE)
-  BUILTIN_SD_HSDI (SHIFTIMM, sqrshrun_n, 0, NONE)
-  BUILTIN_SD_HSDI (SHIFTIMM, sqshrn_n, 0, NONE)
-  BUILTIN_SD_HSDI (USHIFTIMM, uqshrn_n, 0, NONE)
-  BUILTIN_SD_HSDI (SHIFTIMM, sqrshrn_n, 0, NONE)
-  BUILTIN_SD_HSDI (USHIFTIMM, uqrshrn_n, 0, NONE)
-  BUILTIN_VQN (SHIFT2IMM_UUSS, sqshrun2_n, 0, NONE)
-  BUILTIN_VQN (SHIFT2IMM_UUSS, sqrshrun2_n, 0, NONE)
-  BUILTIN_VQN (SHIFT2IMM, sqsshrn2_n, 0, NONE)
-  BUILTIN_VQN (USHIFT2IMM, uqushrn2_n, 0, NONE)
-  BUILTIN_VQN (SHIFT2IMM, sqrshrn2_n, 0, NONE)
-  BUILTIN_VQN (USHIFT2IMM, uqrshrn2_n, 0, NONE)
+  BUILTIN_VQW (SHIFTIMM, sshll2_n, 0, DEFAULT)
+  BUILTIN_VQW (SHIFTIMM, ushll2_n, 0, DEFAULT)
+  BUILTIN_VQN (SHIFTIMM, sqshrun_n, 0, DEFAULT)
+  BUILTIN_VQN (SHIFTIMM, sqrshrun_n, 0, DEFAULT)
+  BUILTIN_VQN (SHIFTIMM, sqshrn_n, 0, DEFAULT)
+  BUILTIN_VQN (USHIFTIMM, uqshrn_n, 0, DEFAULT)
+  BUILTIN_VQN (SHIFTIMM, sqrshrn_n, 0, DEFAULT)
+  BUILTIN_VQN (USHIFTIMM, uqrshrn_n, 0, DEFAULT)
+  BUILTIN_SD_HSDI (SHIFTIMM, sqshrun_n, 0, DEFAULT)
+  BUILTIN_SD_HSDI (SHIFTIMM, sqrshrun_n, 0, DEFAULT)
+  BUILTIN_SD_HSDI (SHIFTIMM, sqshrn_n, 0, DEFAULT)
+  BUILTIN_SD_HSDI (USHIFTIMM, uqshrn_n, 0, DEFAULT)
+  BUILTIN_SD_HSDI (SHIFTIMM, sqrshrn_n, 0, DEFAULT)
+  BUILTIN_SD_HSDI (USHIFTIMM, uqrshrn_n, 0, DEFAULT)
+  BUILTIN_VQN (SHIFT2IMM_UUSS, sqshrun2_n, 0, DEFAULT)
+  BUILTIN_VQN (SHIFT2IMM_UUSS, sqrshrun2_n, 0, DEFAULT)
+  BUILTIN_VQN (SHIFT2IMM, sqsshrn2_n, 0, DEFAULT)
+  BUILTIN_VQN (USHIFT2IMM, uqushrn2_n, 0, DEFAULT)
+  BUILTIN_VQN (SHIFT2IMM, sqrshrn2_n, 0, DEFAULT)
+  BUILTIN_VQN (USHIFT2IMM, uqrshrn2_n, 0, DEFAULT)
   /* Implemented by aarch64_<sur>s<lr>i_n<mode>.  */
-  BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssri_n, 0, NONE)
-  BUILTIN_VALLP (SHIFTINSERTP, ssri_n, 0, NONE)
-  BUILTIN_VSDQ_I_DI (USHIFTACC, usri_n, 0, NONE)
-  BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssli_n, 0, NONE)
-  BUILTIN_VALLP (SHIFTINSERTP, ssli_n, 0, NONE)
-  BUILTIN_VSDQ_I_DI (USHIFTACC, usli_n, 0, NONE)
+  BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssri_n, 0, DEFAULT)
+  BUILTIN_VALLP (SHIFTINSERTP, ssri_n, 0, DEFAULT)
+  BUILTIN_VSDQ_I_DI (USHIFTACC, usri_n, 0, DEFAULT)
+  BUILTIN_VSDQ_I_DI (SHIFTINSERT, ssli_n, 0, DEFAULT)
+  BUILTIN_VALLP (SHIFTINSERTP, ssli_n, 0, DEFAULT)
+  BUILTIN_VSDQ_I_DI (USHIFTACC, usli_n, 0, DEFAULT)
   /* Implemented by aarch64_<sur>qshl<u>_n<mode>.  */
-  BUILTIN_VSDQ_I (SHIFTIMM_USS, sqshlu_n, 0, NONE)
-  BUILTIN_VSDQ_I (SHIFTIMM, sqshl_n, 0, NONE)
-  BUILTIN_VSDQ_I (USHIFTIMM, uqshl_n, 0, NONE)
+  BUILTIN_VSDQ_I (SHIFTIMM_USS, sqshlu_n, 0, DEFAULT)
+  BUILTIN_VSDQ_I (SHIFTIMM, sqshl_n, 0, DEFAULT)
+  BUILTIN_VSDQ_I (USHIFTIMM, uqshl_n, 0, DEFAULT)
 
   /* Implemented by aarch64_xtn2<mode>.  */
-  BUILTIN_VQN (BINOP, xtn2, 0, NONE)
-  BUILTIN_VQN (BINOPU, xtn2, 0, NONE)
+  BUILTIN_VQN (BINOP, xtn2, 0, DEFAULT)
+  BUILTIN_VQN (BINOPU, xtn2, 0, DEFAULT)
 
   /* Implemented by vec_unpack<su>_hi_<mode>.  */
-  BUILTIN_VQW (UNOP, vec_unpacks_hi_, 10, NONE)
-  BUILTIN_VQW (UNOPU, vec_unpacku_hi_, 10, NONE)
+  BUILTIN_VQW (UNOP, vec_unpacks_hi_, 10, DEFAULT)
+  BUILTIN_VQW (UNOPU, vec_unpacku_hi_, 10, DEFAULT)
 
   /* Implemented by aarch64_reduc_plus_<mode>.  */
-  BUILTIN_VALL (UNOP, reduc_plus_scal_, 10, NONE)
-  BUILTIN_VDQ_I (UNOPU, reduc_plus_scal_, 10, NONE)
+  BUILTIN_VALL (UNOP, reduc_plus_scal_, 10, DEFAULT)
+  BUILTIN_VDQ_I (UNOPU, reduc_plus_scal_, 10, DEFAULT)
 
   /* Implemented by reduc_<maxmin_uns>_scal_<mode> (producing scalar).  */
-  BUILTIN_VDQIF_F16 (UNOP, reduc_smax_scal_, 10, NONE)
-  BUILTIN_VDQIF_F16 (UNOP, reduc_smin_scal_, 10, NONE)
-  BUILTIN_VDQ_BHSI (UNOPU, reduc_umax_scal_, 10, NONE)
-  BUILTIN_VDQ_BHSI (UNOPU, reduc_umin_scal_, 10, NONE)
-  BUILTIN_VHSDF (UNOP, reduc_smax_nan_scal_, 10, NONE)
-  BUILTIN_VHSDF (UNOP, reduc_smin_nan_scal_, 10, NONE)
+  BUILTIN_VDQIF_F16 (UNOP, reduc_smax_scal_, 10, DEFAULT)
+  BUILTIN_VDQIF_F16 (UNOP, reduc_smin_scal_, 10, DEFAULT)
+  BUILTIN_VDQ_BHSI (UNOPU, reduc_umax_scal_, 10, DEFAULT)
+  BUILTIN_VDQ_BHSI (UNOPU, reduc_umin_scal_, 10, DEFAULT)
+  BUILTIN_VHSDF (UNOP, reduc_smax_nan_scal_, 10, DEFAULT)
+  BUILTIN_VHSDF (UNOP, reduc_smin_nan_scal_, 10, DEFAULT)
 
   /* Implemented by <optab><mode>3.  */
-  BUILTIN_VDQ_BHSI (BINOP, smax, 3, NONE)
-  BUILTIN_VDQ_BHSI (BINOP, smin, 3, NONE)
-  BUILTIN_VDQ_BHSI (BINOP, umax, 3, NONE)
-  BUILTIN_VDQ_BHSI (BINOP, umin, 3, NONE)
+  BUILTIN_VDQ_BHSI (BINOP, smax, 3, DEFAULT)
+  BUILTIN_VDQ_BHSI (BINOP, smin, 3, DEFAULT)
+  BUILTIN_VDQ_BHSI (BINOP, umax, 3, DEFAULT)
+  BUILTIN_VDQ_BHSI (BINOP, umin, 3, DEFAULT)
 
   /* Implemented by <fmaxmin><mode>3.  */
   BUILTIN_VHSDF_HSDF (BINOP, fmax, 3, FP)
@@ -531,14 +531,14 @@
   BUILTIN_VHSDF_DF (BINOP, fmin_nan, 3, FP)
 
   /* Implemented by aarch64_<optab>p<mode>.  */
-  BUILTIN_VDQ_BHSI (BINOP, smaxp, 0, NONE)
-  BUILTIN_VDQ_BHSI (BINOP, sminp, 0, NONE)
-  BUILTIN_VDQ_BHSI (BINOP, umaxp, 0, NONE)
-  BUILTIN_VDQ_BHSI (BINOP, uminp, 0, NONE)
-  BUILTIN_VHSDF (BINOP, smaxp, 0, NONE)
-  BUILTIN_VHSDF (BINOP, sminp, 0, NONE)
-  BUILTIN_VHSDF (BINOP, smax_nanp, 0, NONE)
-  BUILTIN_VHSDF (BINOP, smin_nanp, 0, NONE)
+  BUILTIN_VDQ_BHSI (BINOP, smaxp, 0, DEFAULT)
+  BUILTIN_VDQ_BHSI (BINOP, sminp, 0, DEFAULT)
+  BUILTIN_VDQ_BHSI (BINOP, umaxp, 0, DEFAULT)
+  BUILTIN_VDQ_BHSI (BINOP, uminp, 0, DEFAULT)
+  BUILTIN_VHSDF (BINOP, smaxp, 0, DEFAULT)
+  BUILTIN_VHSDF (BINOP, sminp, 0, DEFAULT)
+  BUILTIN_VHSDF (BINOP, smax_nanp, 0, DEFAULT)
+  BUILTIN_VHSDF (BINOP, smin_nanp, 0, DEFAULT)
 
   /* Implemented by <frint_pattern><mode>2.  */
   BUILTIN_VHSDF (UNOP, btrunc, 2, FP)
@@ -651,32 +651,32 @@
   VAR1 (UNOP, floatunsv4si, 2, FP, v4sf)
   VAR1 (UNOP, floatunsv2di, 2, FP, v2df)
 
-  VAR5 (UNOPU, bswap, 2, NONE, v4hi, v8hi, v2si, v4si, v2di)
+  VAR5 (UNOPU, bswap, 2, DEFAULT, v4hi, v8hi, v2si, v4si, v2di)
 
-  BUILTIN_VB (UNOP, rbit, 0, NONE)
+  BUILTIN_VB (UNOP, rbit, 0, DEFAULT)
 
   /* Implemented by
      aarch64_<PERMUTE:perm_insn><mode>.  */
-  BUILTIN_VALL (BINOP, zip1, 0, AUTO_FP)
-  BUILTIN_VALL (BINOP, zip2, 0, AUTO_FP)
-  BUILTIN_VALL (BINOP, uzp1, 0, AUTO_FP)
-  BUILTIN_VALL (BINOP, uzp2, 0, AUTO_FP)
-  BUILTIN_VALL (BINOP, trn1, 0, AUTO_FP)
-  BUILTIN_VALL (BINOP, trn2, 0, AUTO_FP)
+  BUILTIN_VALL (BINOP, zip1, 0, QUIET)
+  BUILTIN_VALL (BINOP, zip2, 0, QUIET)
+  BUILTIN_VALL (BINOP, uzp1, 0, QUIET)
+  BUILTIN_VALL (BINOP, uzp2, 0, QUIET)
+  BUILTIN_VALL (BINOP, trn1, 0, QUIET)
+  BUILTIN_VALL (BINOP, trn2, 0, QUIET)
 
   BUILTIN_GPF_F16 (UNOP, frecpe, 0, FP)
   BUILTIN_GPF_F16 (UNOP, frecpx, 0, FP)
 
-  BUILTIN_VDQ_SI (UNOP, urecpe, 0, NONE)
+  BUILTIN_VDQ_SI (UNOP, urecpe, 0, DEFAULT)
 
   BUILTIN_VHSDF (UNOP, frecpe, 0, FP)
   BUILTIN_VHSDF_HSDF (BINOP, frecps, 0, FP)
 
   /* Implemented by a mixture of abs2 patterns.  Note the DImode builtin is
      only ever used for the int64x1_t intrinsic, there is no scalar version.  */
-  BUILTIN_VSDQ_I_DI (UNOP, abs, 0, AUTO_FP)
-  BUILTIN_VHSDF (UNOP, abs, 2, AUTO_FP)
-  VAR1 (UNOP, abs, 2, AUTO_FP, hf)
+  BUILTIN_VSDQ_I_DI (UNOP, abs, 0, QUIET)
+  BUILTIN_VHSDF (UNOP, abs, 2, QUIET)
+  VAR1 (UNOP, abs, 2, QUIET, hf)
 
   BUILTIN_VQ_HSF (UNOP, vec_unpacks_hi_, 10, FP)
   VAR1 (BINOP, float_truncate_hi_, 0, FP, v4sf)
@@ -717,96 +717,96 @@
   BUILTIN_VDQSF (QUADOP_LANE, float_mls_laneq, 0, FP)
 
   /* Implemented by aarch64_simd_bsl<mode>.  */
-  BUILTIN_VDQQH (BSL_P, simd_bsl, 0, NONE)
-  VAR2 (BSL_P, simd_bsl,0, NONE, di, v2di)
-  BUILTIN_VSDQ_I_DI (BSL_U, simd_bsl, 0, NONE)
-  BUILTIN_VALLDIF (BSL_S, simd_bsl, 0, AUTO_FP)
+  BUILTIN_VDQQH (BSL_P, simd_bsl, 0, DEFAULT)
+  VAR2 (BSL_P, simd_bsl,0, DEFAULT, di, v2di)
+  BUILTIN_VSDQ_I_DI (BSL_U, simd_bsl, 0, DEFAULT)
+  BUILTIN_VALLDIF (BSL_S, simd_bsl, 0, QUIET)
 
   /* Implemented by aarch64_crypto_aes<op><mode>.  */
-  VAR1 (BINOPU, crypto_aese, 0, NONE, v16qi)
-  VAR1 (BINOPU, crypto_aesd, 0, NONE, v16qi)
-  VAR1 (UNOPU, crypto_aesmc, 0, NONE, v16qi)
-  VAR1 (UNOPU, crypto_aesimc, 0, NONE, v16qi)
+  VAR1 (BINOPU, crypto_aese, 0, DEFAULT, v16qi)
+  VAR1 (BINOPU, crypto_aesd, 0, DEFAULT, v16qi)
+  VAR1 (UNOPU, crypto_aesmc, 0, DEFAULT, v16qi)
+  VAR1 (UNOPU, crypto_aesimc, 0, DEFAULT, v16qi)
 
   /* Implemented by aarch64_crypto_sha1<op><mode>.  */
-  VAR1 (UNOPU, crypto_sha1h, 0, NONE, si)
-  VAR1 (BINOPU, crypto_sha1su1, 0, NONE, v4si)
-  VAR1 (TERNOPU, crypto_sha1c, 0, NONE, v4si)
-  VAR1 (TERNOPU, crypto_sha1m, 0, NONE, v4si)
-  VAR1 (TERNOPU, crypto_sha1p, 0, NONE, v4si)
-  VAR1 (TERNOPU, crypto_sha1su0, 0, NONE, v4si)
+  VAR1 (UNOPU, crypto_sha1h, 0, DEFAULT, si)
+  VAR1 (BINOPU, crypto_sha1su1, 0, DEFAULT, v4si)
+  VAR1 (TERNOPU, crypto_sha1c, 0, DEFAULT, v4si)
+  VAR1 (TERNOPU, crypto_sha1m, 0, DEFAULT, v4si)
+  VAR1 (TERNOPU, crypto_sha1p, 0, DEFAULT, v4si)
+  VAR1 (TERNOPU, crypto_sha1su0, 0, DEFAULT, v4si)
 
   /* Implemented by aarch64_crypto_sha256<op><mode>.  */
-  VAR1 (TERNOPU, crypto_sha256h, 0, NONE, v4si)
-  VAR1 (TERNOPU, crypto_sha256h2, 0, NONE, v4si)
-  VAR1 (BINOPU, crypto_sha256su0, 0, NONE, v4si)
-  VAR1 (TERNOPU, crypto_sha256su1, 0, NONE, v4si)
+  VAR1 (TERNOPU, crypto_sha256h, 0, DEFAULT, v4si)
+  VAR1 (TERNOPU, crypto_sha256h2, 0, DEFAULT, v4si)
+  VAR1 (BINOPU, crypto_sha256su0, 0, DEFAULT, v4si)
+  VAR1 (TERNOPU, crypto_sha256su1, 0, DEFAULT, v4si)
 
   /* Implemented by aarch64_crypto_pmull<mode>.  */
-  VAR1 (BINOPP, crypto_pmull, 0, NONE, di)
-  VAR1 (BINOPP, crypto_pmull, 0, NONE, v2di)
+  VAR1 (BINOPP, crypto_pmull, 0, DEFAULT, di)
+  VAR1 (BINOPP, crypto_pmull, 0, DEFAULT, v2di)
 
   /* Implemented by aarch64_qtbl1<mode>.  */
-  VAR2 (BINOP, qtbl1, 0, NONE, v8qi, v16qi)
-  VAR2 (BINOPU, qtbl1, 0, NONE, v8qi, v16qi)
-  VAR2 (BINOP_PPU, qtbl1, 0, NONE, v8qi, v16qi)
-  VAR2 (BINOP_SSU, qtbl1, 0, NONE, v8qi, v16qi)
+  VAR2 (BINOP, qtbl1, 0, DEFAULT, v8qi, v16qi)
+  VAR2 (BINOPU, qtbl1, 0, DEFAULT, v8qi, v16qi)
+  VAR2 (BINOP_PPU, qtbl1, 0, DEFAULT, v8qi, v16qi)
+  VAR2 (BINOP_SSU, qtbl1, 0, DEFAULT, v8qi, v16qi)
 
   /* Implemented by aarch64_qtbl2<mode>.  */
-  VAR2 (BINOP, qtbl2, 0, NONE, v8qi, v16qi)
-  VAR2 (BINOPU, qtbl2, 0, NONE, v8qi, v16qi)
-  VAR2 (BINOP_PPU, qtbl2, 0, NONE, v8qi, v16qi)
-  VAR2 (BINOP_SSU, qtbl2, 0, NONE, v8qi, v16qi)
+  VAR2 (BINOP, qtbl2, 0, DEFAULT, v8qi, v16qi)
+  VAR2 (BINOPU, qtbl2, 0, DEFAULT, v8qi, v16qi)
+  VAR2 (BINOP_PPU, qtbl2, 0, DEFAULT, v8qi, v16qi)
+  VAR2 (BINOP_SSU, qtbl2, 0, DEFAULT, v8qi, v16qi)
 
   /* Implemented by aarch64_qtbl3<mode>.  */
-  VAR2 (BINOP, qtbl3, 0, NONE, v8qi, v16qi)
-  VAR2 (BINOPU, qtbl3, 0, NONE, v8qi, v16qi)
-  VAR2 (BINOP_PPU, qtbl3, 0, NONE, v8qi, v16qi)
-  VAR2 (BINOP_SSU, qtbl3, 0, NONE, v8qi, v16qi)
+  VAR2 (BINOP, qtbl3, 0, DEFAULT, v8qi, v16qi)
+  VAR2 (BINOPU, qtbl3, 0, DEFAULT, v8qi, v16qi)
+  VAR2 (BINOP_PPU, qtbl3, 0, DEFAULT, v8qi, v16qi)
+  VAR2 (BINOP_SSU, qtbl3, 0, DEFAULT, v8qi, v16qi)
 
   /* Implemented by aarch64_qtbl4<mode>.  */
-  VAR2 (BINOP, qtbl4, 0, NONE, v8qi, v16qi)
-  VAR2 (BINOPU, qtbl4, 0, NONE, v8qi, v16qi)
-  VAR2 (BINOP_PPU, qtbl4, 0, NONE, v8qi, v16qi)
-  VAR2 (BINOP_SSU, qtbl4, 0, NONE, v8qi, v16qi)
+  VAR2 (BINOP, qtbl4, 0, DEFAULT, v8qi, v16qi)
+  VAR2 (BINOPU, qtbl4, 0, DEFAULT, v8qi, v16qi)
+  VAR2 (BINOP_PPU, qtbl4, 0, DEFAULT, v8qi, v16qi)
+  VAR2 (BINOP_SSU, qtbl4, 0, DEFAULT, v8qi, v16qi)
 
   /* Implemented by aarch64_qtbx1<mode>.  */
-  VAR2 (TERNOP, qtbx1, 0, NONE, v8qi, v16qi)
-  VAR2 (TERNOPU, qtbx1, 0, NONE, v8qi, v16qi)
-  VAR2 (TERNOP_PPPU, qtbx1, 0, NONE, v8qi, v16qi)
-  VAR2 (TERNOP_SSSU, qtbx1, 0, NONE, v8qi, v16qi)
+  VAR2 (TERNOP, qtbx1, 0, DEFAULT, v8qi, v16qi)
+  VAR2 (TERNOPU, qtbx1, 0, DEFAULT, v8qi, v16qi)
+  VAR2 (TERNOP_PPPU, qtbx1, 0, DEFAULT, v8qi, v16qi)
+  VAR2 (TERNOP_SSSU, qtbx1, 0, DEFAULT, v8qi, v16qi)
 
   /* Implemented by aarch64_qtbx2<mode>.  */
-  VAR2 (TERNOP, qtbx2, 0, NONE, v8qi, v16qi)
-  VAR2 (TERNOPU, qtbx2, 0, NONE, v8qi, v16qi)
-  VAR2 (TERNOP_PPPU, qtbx2, 0, NONE, v8qi, v16qi)
-  VAR2 (TERNOP_SSSU, qtbx2, 0, NONE, v8qi, v16qi)
+  VAR2 (TERNOP, qtbx2, 0, DEFAULT, v8qi, v16qi)
+  VAR2 (TERNOPU, qtbx2, 0, DEFAULT, v8qi, v16qi)
+  VAR2 (TERNOP_PPPU, qtbx2, 0, DEFAULT, v8qi, v16qi)
+  VAR2 (TERNOP_SSSU, qtbx2, 0, DEFAULT, v8qi, v16qi)
 
   /* Implemented by aarch64_qtbx3<mode>.  */
-  VAR2 (TERNOP, qtbx3, 0, NONE, v8qi, v16qi)
-  VAR2 (TERNOPU, qtbx3, 0, NONE, v8qi, v16qi)
-  VAR2 (TERNOP_PPPU, qtbx3, 0, NONE, v8qi, v16qi)
-  VAR2 (TERNOP_SSSU, qtbx3, 0, NONE, v8qi, v16qi)
+  VAR2 (TERNOP, qtbx3, 0, DEFAULT, v8qi, v16qi)
+  VAR2 (TERNOPU, qtbx3, 0, DEFAULT, v8qi, v16qi)
+  VAR2 (TERNOP_PPPU, qtbx3, 0, DEFAULT, v8qi, v16qi)
+  VAR2 (TERNOP_SSSU, qtbx3, 0, DEFAULT, v8qi, v16qi)
 
   /* Implemented by aarch64_qtbx4<mode>.  */
-  VAR2 (TERNOP, qtbx4, 0, NONE, v8qi, v16qi)
-  VAR2 (TERNOPU, qtbx4, 0, NONE, v8qi, v16qi)
-  VAR2 (TERNOP_PPPU, qtbx4, 0, NONE, v8qi, v16qi)
-  VAR2 (TERNOP_SSSU, qtbx4, 0, NONE, v8qi, v16qi)
+  VAR2 (TERNOP, qtbx4, 0, DEFAULT, v8qi, v16qi)
+  VAR2 (TERNOPU, qtbx4, 0, DEFAULT, v8qi, v16qi)
+  VAR2 (TERNOP_PPPU, qtbx4, 0, DEFAULT, v8qi, v16qi)
+  VAR2 (TERNOP_SSSU, qtbx4, 0, DEFAULT, v8qi, v16qi)
 
   /* Builtins for ARMv8.1-A Adv.SIMD instructions.  */
 
   /* Implemented by aarch64_sqrdml<SQRDMLH_AS:rdma_as>h<mode>.  */
-  BUILTIN_VSDQ_HSI (TERNOP, sqrdmlah, 0, NONE)
-  BUILTIN_VSDQ_HSI (TERNOP, sqrdmlsh, 0, NONE)
+  BUILTIN_VSDQ_HSI (TERNOP, sqrdmlah, 0, DEFAULT)
+  BUILTIN_VSDQ_HSI (TERNOP, sqrdmlsh, 0, DEFAULT)
 
   /* Implemented by aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_lane<mode>.  */
-  BUILTIN_VSDQ_HSI (QUADOP_LANE, sqrdmlah_lane, 0, NONE)
-  BUILTIN_VSDQ_HSI (QUADOP_LANE, sqrdmlsh_lane, 0, NONE)
+  BUILTIN_VSDQ_HSI (QUADOP_LANE, sqrdmlah_lane, 0, DEFAULT)
+  BUILTIN_VSDQ_HSI (QUADOP_LANE, sqrdmlsh_lane, 0, DEFAULT)
 
   /* Implemented by aarch64_sqrdml<SQRDMLH_AS:rdma_as>h_laneq<mode>.  */
-  BUILTIN_VSDQ_HSI (QUADOP_LANE, sqrdmlah_laneq, 0, NONE)
-  BUILTIN_VSDQ_HSI (QUADOP_LANE, sqrdmlsh_laneq, 0, NONE)
+  BUILTIN_VSDQ_HSI (QUADOP_LANE, sqrdmlah_laneq, 0, DEFAULT)
+  BUILTIN_VSDQ_HSI (QUADOP_LANE, sqrdmlsh_laneq, 0, DEFAULT)
 
   /* Implemented by <FCVT_F2FIXED/FIXED2F:fcvt_fixed_insn><*><*>3.  */
   BUILTIN_VSDQ_HSDI (SHIFTIMM, scvtf, 3, FP)
@@ -827,7 +827,7 @@
   BUILTIN_VHSDF_HSDF (BINOP, rsqrts, 0, FP)
 
   /* Implemented by aarch64_ursqrte<mode>.  */
-  BUILTIN_VDQ_SI (UNOPU, ursqrte, 0, NONE)
+  BUILTIN_VDQ_SI (UNOPU, ursqrte, 0, DEFAULT)
 
   /* Implemented by fabd<mode>3.  */
   BUILTIN_VHSDF_HSDF (BINOP, fabd, 3, FP)
@@ -848,9 +848,6 @@
   BUILTIN_VHSDF_HSDF (BINOP_USS, facgt, 0, FP)
   BUILTIN_VHSDF_HSDF (BINOP_USS, facge, 0, FP)
 
-  /* Implemented by sqrt<mode>2.  */
-  VAR1 (UNOP, sqrt, 2, FP, hf)
-
   /* Implemented by <optab><mode>hf2.  */
   VAR1 (UNOP, floatdi, 2, FP, hf)
   VAR1 (UNOP, floatsi, 2, FP, hf)
@@ -866,37 +863,37 @@
   BUILTIN_GPI (UNOPUS, fixuns_truncdf, 2, FP)
 
   /* Implemented by aarch64_sm3ss1qv4si.  */
-  VAR1 (TERNOPU, sm3ss1q, 0, NONE, v4si)
+  VAR1 (TERNOPU, sm3ss1q, 0, DEFAULT, v4si)
   /* Implemented by aarch64_sm3tt<sm3tt_op>qv4si.  */
-  VAR1 (QUADOPUI, sm3tt1aq, 0, NONE, v4si)
-  VAR1 (QUADOPUI, sm3tt1bq, 0, NONE, v4si)
-  VAR1 (QUADOPUI, sm3tt2aq, 0, NONE, v4si)
-  VAR1 (QUADOPUI, sm3tt2bq, 0, NONE, v4si)
+  VAR1 (QUADOPUI, sm3tt1aq, 0, DEFAULT, v4si)
+  VAR1 (QUADOPUI, sm3tt1bq, 0, DEFAULT, v4si)
+  VAR1 (QUADOPUI, sm3tt2aq, 0, DEFAULT, v4si)
+  VAR1 (QUADOPUI, sm3tt2bq, 0, DEFAULT, v4si)
   /* Implemented by aarch64_sm3partw<sm3part_op>qv4si.  */
-  VAR1 (TERNOPU, sm3partw1q, 0, NONE, v4si)
-  VAR1 (TERNOPU, sm3partw2q, 0, NONE, v4si)
+  VAR1 (TERNOPU, sm3partw1q, 0, DEFAULT, v4si)
+  VAR1 (TERNOPU, sm3partw2q, 0, DEFAULT, v4si)
   /* Implemented by aarch64_sm4eqv4si.  */
-  VAR1 (BINOPU, sm4eq, 0, NONE, v4si)
+  VAR1 (BINOPU, sm4eq, 0, DEFAULT, v4si)
   /* Implemented by aarch64_sm4ekeyqv4si.  */
-  VAR1 (BINOPU, sm4ekeyq, 0, NONE, v4si)
+  VAR1 (BINOPU, sm4ekeyq, 0, DEFAULT, v4si)
   /* Implemented by aarch64_crypto_sha512hqv2di.  */
-  VAR1 (TERNOPU, crypto_sha512hq, 0, NONE, v2di)
+  VAR1 (TERNOPU, crypto_sha512hq, 0, DEFAULT, v2di)
   /* Implemented by aarch64_sha512h2qv2di.  */
-  VAR1 (TERNOPU, crypto_sha512h2q, 0, NONE, v2di)
+  VAR1 (TERNOPU, crypto_sha512h2q, 0, DEFAULT, v2di)
   /* Implemented by aarch64_crypto_sha512su0qv2di.  */
-  VAR1 (BINOPU, crypto_sha512su0q, 0, NONE, v2di)
+  VAR1 (BINOPU, crypto_sha512su0q, 0, DEFAULT, v2di)
   /* Implemented by aarch64_crypto_sha512su1qv2di.  */
-  VAR1 (TERNOPU, crypto_sha512su1q, 0, NONE, v2di)
+  VAR1 (TERNOPU, crypto_sha512su1q, 0, DEFAULT, v2di)
   /* Implemented by eor3q<mode>4.  */
-  BUILTIN_VQ_I (TERNOPU, eor3q, 4, NONE)
-  BUILTIN_VQ_I (TERNOP, eor3q, 4, NONE)
+  BUILTIN_VQ_I (TERNOPU, eor3q, 4, DEFAULT)
+  BUILTIN_VQ_I (TERNOP, eor3q, 4, DEFAULT)
   /* Implemented by aarch64_rax1qv2di.  */
-  VAR1 (BINOPU, rax1q, 0, NONE, v2di)
+  VAR1 (BINOPU, rax1q, 0, DEFAULT, v2di)
   /* Implemented by aarch64_xarqv2di.  */
-  VAR1 (TERNOPUI, xarq, 0, NONE, v2di)
+  VAR1 (TERNOPUI, xarq, 0, DEFAULT, v2di)
   /* Implemented by bcaxq<mode>4.  */
-  BUILTIN_VQ_I (TERNOPU, bcaxq, 4, NONE)
-  BUILTIN_VQ_I (TERNOP, bcaxq, 4, NONE)
+  BUILTIN_VQ_I (TERNOPU, bcaxq, 4, DEFAULT)
+  BUILTIN_VQ_I (TERNOP, bcaxq, 4, DEFAULT)
 
   /* Implemented by aarch64_fml<f16mac1>l<f16quad>_low<mode>.  */
   VAR1 (TERNOP, fmlal_low, 0, FP, v2sf)
@@ -940,12 +937,12 @@
   BUILTIN_VSFDF (UNOP, frint64x, 0, FP)
 
   /* Implemented by aarch64_bfdot{_lane}{q}<mode>.  */
-  VAR2 (TERNOP, bfdot, 0, AUTO_FP, v2sf, v4sf)
-  VAR2 (QUADOP_LANE_PAIR, bfdot_lane, 0, AUTO_FP, v2sf, v4sf)
-  VAR2 (QUADOP_LANE_PAIR, bfdot_laneq, 0, AUTO_FP, v2sf, v4sf)
+  VAR2 (TERNOP, bfdot, 0, QUIET, v2sf, v4sf)
+  VAR2 (QUADOP_LANE_PAIR, bfdot_lane, 0, QUIET, v2sf, v4sf)
+  VAR2 (QUADOP_LANE_PAIR, bfdot_laneq, 0, QUIET, v2sf, v4sf)
 
   /* Implemented by aarch64_bfmmlaqv4sf  */
-  VAR1 (TERNOP, bfmmlaq, 0, AUTO_FP, v4sf)
+  VAR1 (TERNOP, bfmmlaq, 0, QUIET, v4sf)
 
   /* Implemented by aarch64_bfmlal<bt>{_lane{q}}v4sf  */
   VAR1 (TERNOP, bfmlalb, 0, FP, v4sf)
@@ -956,9 +953,9 @@
   VAR1 (QUADOP_LANE, bfmlalt_lane_q, 0, FP, v4sf)
 
   /* Implemented by aarch64_simd_<sur>mmlav16qi.  */
-  VAR1 (TERNOP, simd_smmla, 0, NONE, v16qi)
-  VAR1 (TERNOPU, simd_ummla, 0, NONE, v16qi)
-  VAR1 (TERNOP_SSUS, simd_usmmla, 0, NONE, v16qi)
+  VAR1 (TERNOP, simd_smmla, 0, DEFAULT, v16qi)
+  VAR1 (TERNOPU, simd_ummla, 0, DEFAULT, v16qi)
+  VAR1 (TERNOP_SSUS, simd_usmmla, 0, DEFAULT, v16qi)
 
   /* Implemented by aarch64_bfcvtn{q}{2}<mode>  */
   VAR1 (UNOP, bfcvtn, 0, FP, v4bf)
@@ -967,6 +964,6 @@
   VAR1 (UNOP, bfcvt, 0, FP, bf)
 
   /* Implemented by aarch64_{v}bfcvt{_high}<mode>.  */
-  VAR2 (UNOP, vbfcvt, 0, AUTO_FP, v4bf, v8bf)
-  VAR1 (UNOP, vbfcvt_high, 0, AUTO_FP, v8bf)
-  VAR1 (UNOP, bfcvt, 0, AUTO_FP, sf)
+  VAR2 (UNOP, vbfcvt, 0, QUIET, v4bf, v8bf)
+  VAR1 (UNOP, vbfcvt_high, 0, QUIET, v8bf)
+  VAR1 (UNOP, bfcvt, 0, QUIET, sf)
diff --git a/gcc/config/aarch64/aarch64-simd-pragma-builtins.def b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def
new file mode 100644
index 0000000..7768236
--- /dev/null
+++ b/gcc/config/aarch64/aarch64-simd-pragma-builtins.def
@@ -0,0 +1,418 @@
+/* AArch64 SIMD pragma builtins
+   Copyright (C) 2024-2025 Free Software Foundation, Inc.
+   Contributed by ARM Ltd.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#undef ENTRY_BINARY
+#define ENTRY_BINARY(N, T0, T1, T2, U, F)	\
+  ENTRY (N, binary, T0, T1, T2, none, U, F)
+
+#undef ENTRY_BINARY_LANE
+#define ENTRY_BINARY_LANE(N, T0, T1, T2, U, F)	\
+  ENTRY (N, binary_lane, T0, T1, T2, none, U, F)
+
+#undef ENTRY_BINARY_TWO_LANES
+#define ENTRY_BINARY_TWO_LANES(N, T0, T1, T2, U, F)	\
+  ENTRY (N, binary_two_lanes, T0, T1, T2, none, U, F)
+
+#undef ENTRY_LOAD
+#define ENTRY_LOAD(N, T0, T1, U)			\
+  ENTRY (N, load, T0, T1, none, none, U, LOAD)
+
+#undef ENTRY_LOAD_LANE
+#define ENTRY_LOAD_LANE(N, T0, T1, T2, U)		\
+  ENTRY (N, load_lane, T0, T1, T2, none, U, LOAD)
+
+#undef ENTRY_STORE
+#define ENTRY_STORE(N, T0, T1, U)			\
+  ENTRY (N, store, T0, T1, none, none, U, STORE)
+
+#undef ENTRY_STORE_LANE
+#define ENTRY_STORE_LANE(N, T0, T1, U)			\
+  ENTRY (N, store_lane, T0, T1, none, none, U, STORE)
+
+#undef ENTRY_TERNARY
+#define ENTRY_TERNARY(N, T0, T1, T2, T3, U, F)	\
+  ENTRY (N, ternary, T0, T1, T2, T3, U, F)
+
+#undef ENTRY_TERNARY_LANE
+#define ENTRY_TERNARY_LANE(N, T0, T1, T2, T3, U, F)	\
+  ENTRY (N, ternary_lane, T0, T1, T2, T3, U, F)
+
+#undef ENTRY_UNARY
+#define ENTRY_UNARY(N, T0, T1, U, F)	\
+  ENTRY (N, unary, T0, T1, none, none, U, F)
+
+#undef ENTRY_UNARY_LANE
+#define ENTRY_UNARY_LANE(N, T0, T1, U, F)	\
+  ENTRY (N, unary_lane, T0, T1, none, none, U, F)
+
+#undef ENTRY_BINARY_VHSDF
+#define ENTRY_BINARY_VHSDF(NAME, UNSPEC, FLAGS)			\
+  ENTRY_BINARY (NAME##_f16, f16, f16, f16, UNSPEC, FLAGS)	\
+  ENTRY_BINARY (NAME##q_f16, f16q, f16q, f16q, UNSPEC, FLAGS)	\
+  ENTRY_BINARY (NAME##_f32, f32, f32, f32, UNSPEC, FLAGS)	\
+  ENTRY_BINARY (NAME##q_f32, f32q, f32q, f32q, UNSPEC, FLAGS)	\
+  ENTRY_BINARY (NAME##q_f64, f64q, f64q, f64q, UNSPEC, FLAGS)
+
+#undef ENTRY_BINARY_VHSDF_SIGNED
+#define ENTRY_BINARY_VHSDF_SIGNED(NAME, UNSPEC, FLAGS)		\
+  ENTRY_BINARY (NAME##_f16, f16, f16, s16, UNSPEC, FLAGS)	\
+  ENTRY_BINARY (NAME##q_f16, f16q, f16q, s16q, UNSPEC, FLAGS)	\
+  ENTRY_BINARY (NAME##_f32, f32, f32, s32, UNSPEC, FLAGS)	\
+  ENTRY_BINARY (NAME##q_f32, f32q, f32q, s32q, UNSPEC, FLAGS)	\
+  ENTRY_BINARY (NAME##q_f64, f64q, f64q, s64q, UNSPEC, FLAGS)
+
+#undef ENTRY_TERNARY_VLUT8
+#define ENTRY_TERNARY_VLUT8(T)					\
+  ENTRY_BINARY_LANE (vluti2_lane_##T##8, T##8q, T##8, u8,	\
+		     UNSPEC_LUTI2, QUIET)			\
+  ENTRY_BINARY_LANE (vluti2_laneq_##T##8, T##8q, T##8, u8q,	\
+		     UNSPEC_LUTI2, QUIET)			\
+  ENTRY_BINARY_LANE (vluti2q_lane_##T##8, T##8q, T##8q, u8,	\
+		     UNSPEC_LUTI2, QUIET)			\
+  ENTRY_BINARY_LANE (vluti2q_laneq_##T##8, T##8q, T##8q, u8q,	\
+		     UNSPEC_LUTI2, QUIET)			\
+  ENTRY_BINARY_LANE (vluti4q_lane_##T##8, T##8q, T##8q, u8,	\
+		     UNSPEC_LUTI4, QUIET)			\
+  ENTRY_BINARY_LANE (vluti4q_laneq_##T##8, T##8q, T##8q, u8q,	\
+		     UNSPEC_LUTI4, QUIET)
+
+#undef ENTRY_TERNARY_VLUT16
+#define ENTRY_TERNARY_VLUT16(T)						\
+  ENTRY_BINARY_LANE (vluti2_lane_##T##16, T##16q, T##16, u8,		\
+		     UNSPEC_LUTI2, QUIET)				\
+  ENTRY_BINARY_LANE (vluti2_laneq_##T##16, T##16q, T##16, u8q,		\
+		     UNSPEC_LUTI2, QUIET)				\
+  ENTRY_BINARY_LANE (vluti2q_lane_##T##16, T##16q, T##16q, u8,		\
+		     UNSPEC_LUTI2, QUIET)				\
+  ENTRY_BINARY_LANE (vluti2q_laneq_##T##16, T##16q, T##16q, u8q,	\
+		     UNSPEC_LUTI2, QUIET)				\
+  ENTRY_BINARY_LANE (vluti4q_lane_##T##16_x2, T##16q, T##16qx2, u8,	\
+		     UNSPEC_LUTI4, QUIET)				\
+  ENTRY_BINARY_LANE (vluti4q_laneq_##T##16_x2, T##16q, T##16qx2, u8q,	\
+		     UNSPEC_LUTI4, QUIET)
+
+#undef ENTRY_UNARY_VQ_BHF
+#define ENTRY_UNARY_VQ_BHF(N, T1, UNSPEC, FLAGS)		\
+  ENTRY_UNARY (N##_bf16_mf8_fpm, bf16q, T1, UNSPEC, FLAGS)	\
+  ENTRY_UNARY (N##_f16_mf8_fpm, f16q, T1, UNSPEC, FLAGS)
+
+#undef ENTRY_VDOT_FPM
+#define ENTRY_VDOT_FPM(T)						\
+  ENTRY_TERNARY (vdot_##T##_mf8_fpm, T, T, mf8, mf8,			\
+		 UNSPEC_FDOT_FP8, FP8)					\
+  ENTRY_TERNARY (vdotq_##T##_mf8_fpm, T##q, T##q, mf8q, mf8q,		\
+		 UNSPEC_FDOT_FP8, FP8)					\
+  ENTRY_TERNARY_LANE (vdot_lane_##T##_mf8_fpm, T, T, mf8, mf8,		\
+		      UNSPEC_FDOT_LANE_FP8, FP8)			\
+  ENTRY_TERNARY_LANE (vdot_laneq_##T##_mf8_fpm, T, T, mf8, mf8q,	\
+		      UNSPEC_FDOT_LANE_FP8, FP8)			\
+  ENTRY_TERNARY_LANE (vdotq_lane_##T##_mf8_fpm, T##q, T##q, mf8q, mf8,	\
+		      UNSPEC_FDOT_LANE_FP8, FP8)			\
+  ENTRY_TERNARY_LANE (vdotq_laneq_##T##_mf8_fpm, T##q, T##q, mf8q, mf8q,\
+		      UNSPEC_FDOT_LANE_FP8, FP8)
+
+#undef ENTRY_FMA_FPM
+#define ENTRY_FMA_FPM(N, T, U)						\
+  ENTRY_TERNARY (N##q_##T##_mf8_fpm, T##q, T##q, mf8q, mf8q, U, FP8)	\
+  ENTRY_TERNARY_LANE (N##q_lane_##T##_mf8_fpm, T##q, T##q, mf8q, mf8, U, FP8) \
+  ENTRY_TERNARY_LANE (N##q_laneq_##T##_mf8_fpm, T##q, T##q, mf8q, mf8q, U, FP8)
+
+// faminmax
+#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FAMINMAX)
+ENTRY_BINARY_VHSDF (vamax, UNSPEC_FAMAX, FP)
+ENTRY_BINARY_VHSDF (vamin, UNSPEC_FAMIN, FP)
+#undef REQUIRED_EXTENSIONS
+
+// lut
+#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_LUT)
+ENTRY_TERNARY_VLUT8 (p)
+ENTRY_TERNARY_VLUT8 (s)
+ENTRY_TERNARY_VLUT8 (u)
+ENTRY_TERNARY_VLUT8 (mf)
+
+ENTRY_TERNARY_VLUT16 (bf)
+ENTRY_TERNARY_VLUT16 (f)
+ENTRY_TERNARY_VLUT16 (p)
+ENTRY_TERNARY_VLUT16 (s)
+ENTRY_TERNARY_VLUT16 (u)
+#undef REQUIRED_EXTENSIONS
+
+// fpm conversion
+#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FP8)
+ENTRY_UNARY_VQ_BHF (vcvt1, mf8, UNSPEC_F1CVTL_FP8, FP8)
+ENTRY_UNARY_VQ_BHF (vcvt1_high, mf8q, UNSPEC_F1CVTL2_FP8, FP8)
+ENTRY_UNARY_VQ_BHF (vcvt1_low, mf8q, UNSPEC_F1CVTL_FP8, FP8)
+ENTRY_UNARY_VQ_BHF (vcvt2, mf8, UNSPEC_F2CVTL_FP8, FP8)
+ENTRY_UNARY_VQ_BHF (vcvt2_high, mf8q, UNSPEC_F2CVTL2_FP8, FP8)
+ENTRY_UNARY_VQ_BHF (vcvt2_low, mf8q, UNSPEC_F2CVTL_FP8, FP8)
+
+ENTRY_BINARY (vcvt_mf8_f16_fpm, mf8, f16, f16, UNSPEC_FCVTN_FP8, FP8)
+ENTRY_BINARY (vcvtq_mf8_f16_fpm, mf8q, f16q, f16q, UNSPEC_FCVTN_FP8, FP8)
+ENTRY_BINARY (vcvt_mf8_f32_fpm, mf8, f32q, f32q, UNSPEC_FCVTN_FP8, FP8)
+
+ENTRY_TERNARY (vcvt_high_mf8_f32_fpm, mf8q, mf8, f32q, f32q,
+	       UNSPEC_FCVTN2_FP8, FP8)
+#undef REQUIRED_EXTENSIONS
+
+// fpm scaling
+#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FP8)
+ENTRY_BINARY_VHSDF_SIGNED (vscale, UNSPEC_FSCALE, FP)
+#undef REQUIRED_EXTENSIONS
+
+// fpm dot2 product
+#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FP8DOT2)
+ENTRY_VDOT_FPM (f16)
+#undef REQUIRED_EXTENSIONS
+
+// fpm dot4 product
+#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FP8DOT4)
+ENTRY_VDOT_FPM (f32)
+#undef REQUIRED_EXTENSIONS
+
+// fp8 multiply-add
+#define REQUIRED_EXTENSIONS nonstreaming_only (AARCH64_FL_FP8FMA)
+ENTRY_FMA_FPM (vmlalb, f16, UNSPEC_FMLALB_FP8)
+ENTRY_FMA_FPM (vmlalt, f16, UNSPEC_FMLALT_FP8)
+ENTRY_FMA_FPM (vmlallbb, f32, UNSPEC_FMLALLBB_FP8)
+ENTRY_FMA_FPM (vmlallbt, f32, UNSPEC_FMLALLBT_FP8)
+ENTRY_FMA_FPM (vmlalltb, f32, UNSPEC_FMLALLTB_FP8)
+ENTRY_FMA_FPM (vmlalltt, f32, UNSPEC_FMLALLTT_FP8)
+#undef REQUIRED_EXTENSIONS
+
+// bsl
+#define REQUIRED_EXTENSIONS nonstreaming_only (TARGET_SIMD)
+ENTRY_TERNARY (vbsl_mf8, mf8, u8, mf8, mf8, UNSPEC_BSL, QUIET)
+ENTRY_TERNARY (vbslq_mf8, mf8q, u8q, mf8q, mf8q, UNSPEC_BSL, QUIET)
+#undef REQUIRED_EXTENSIONS
+
+// combine
+#define REQUIRED_EXTENSIONS nonstreaming_only (0)
+ENTRY_BINARY (vcombine_mf8, mf8q, mf8, mf8, UNSPEC_COMBINE, QUIET)
+#undef REQUIRED_EXTENSIONS
+
+// copy_lane
+#define REQUIRED_EXTENSIONS nonstreaming_only (TARGET_SIMD)
+ENTRY_BINARY_TWO_LANES (vcopy_lane_mf8, mf8, mf8, mf8,
+			UNSPEC_VEC_COPY, QUIET)
+ENTRY_BINARY_TWO_LANES (vcopyq_lane_mf8, mf8q, mf8q, mf8,
+			UNSPEC_VEC_COPY, QUIET)
+ENTRY_BINARY_TWO_LANES (vcopy_laneq_mf8, mf8, mf8, mf8q,
+			UNSPEC_VEC_COPY, QUIET)
+ENTRY_BINARY_TWO_LANES (vcopyq_laneq_mf8, mf8q, mf8q, mf8q,
+			UNSPEC_VEC_COPY, QUIET)
+#undef REQUIRED_EXTENSIONS
+
+// create
+#define REQUIRED_EXTENSIONS nonstreaming_only (TARGET_SIMD)
+ENTRY_UNARY (vcreate_mf8, mf8, u64_scalar, UNSPEC_VCREATE, QUIET)
+#undef REQUIRED_EXTENSIONS
+
+// dup
+#define REQUIRED_EXTENSIONS nonstreaming_only (TARGET_SIMD)
+ENTRY_UNARY (vdup_n_mf8, mf8, mf8_scalar, UNSPEC_DUP, QUIET)
+ENTRY_UNARY (vdupq_n_mf8, mf8q, mf8_scalar, UNSPEC_DUP, QUIET)
+
+ENTRY_UNARY_LANE (vdup_lane_mf8, mf8, mf8, UNSPEC_DUP_LANE, QUIET)
+ENTRY_UNARY_LANE (vdupq_lane_mf8, mf8q, mf8, UNSPEC_DUP_LANE, QUIET)
+ENTRY_UNARY_LANE (vdup_laneq_mf8, mf8, mf8q, UNSPEC_DUP_LANE, QUIET)
+ENTRY_UNARY_LANE (vdupq_laneq_mf8, mf8q, mf8q, UNSPEC_DUP_LANE, QUIET)
+#undef REQUIRED_EXTENSIONS
+
+// dupb_lane
+#define REQUIRED_EXTENSIONS nonstreaming_only (TARGET_SIMD)
+ENTRY_UNARY_LANE (vdupb_lane_mf8, mf8_scalar, mf8, UNSPEC_GET_LANE, QUIET)
+ENTRY_UNARY_LANE (vdupb_laneq_mf8, mf8_scalar, mf8q, UNSPEC_GET_LANE, QUIET)
+#undef REQUIRED_EXTENSIONS
+
+// ext
+#define REQUIRED_EXTENSIONS nonstreaming_only (TARGET_SIMD)
+ENTRY_BINARY_LANE (vext_mf8, mf8, mf8, mf8, UNSPEC_EXT, QUIET)
+ENTRY_BINARY_LANE (vextq_mf8, mf8q, mf8q, mf8q, UNSPEC_EXT, QUIET)
+#undef REQUIRED_EXTENSIONS
+
+// ld1
+#define REQUIRED_EXTENSIONS nonstreaming_only (TARGET_SIMD)
+ENTRY_LOAD (vld1_mf8, mf8, mf8_scalar_const_ptr, UNSPEC_LD1)
+ENTRY_LOAD (vld1q_mf8, mf8q, mf8_scalar_const_ptr, UNSPEC_LD1)
+ENTRY_LOAD (vld1_dup_mf8, mf8, mf8_scalar_const_ptr, UNSPEC_DUP)
+ENTRY_LOAD (vld1q_dup_mf8, mf8q, mf8_scalar_const_ptr, UNSPEC_DUP)
+
+ENTRY_LOAD_LANE (vld1_lane_mf8, mf8, mf8_scalar_const_ptr, mf8,
+		 UNSPEC_SET_LANE)
+ENTRY_LOAD_LANE (vld1q_lane_mf8, mf8q, mf8_scalar_const_ptr, mf8q,
+		 UNSPEC_SET_LANE)
+#undef REQUIRED_EXTENSIONS
+
+// ld<n>
+#define REQUIRED_EXTENSIONS nonstreaming_only (TARGET_SIMD)
+ENTRY_LOAD (vld1_mf8_x2, mf8x2, mf8_scalar_const_ptr, UNSPEC_LD1x2)
+ENTRY_LOAD (vld1q_mf8_x2, mf8qx2, mf8_scalar_const_ptr, UNSPEC_LD1x2)
+ENTRY_LOAD (vld2_mf8, mf8x2, mf8_scalar_const_ptr, UNSPEC_LD2)
+ENTRY_LOAD (vld2q_mf8, mf8qx2, mf8_scalar_const_ptr, UNSPEC_LD2)
+ENTRY_LOAD (vld2_dup_mf8, mf8x2, mf8_scalar_const_ptr, UNSPEC_LD2_DUP)
+ENTRY_LOAD (vld2q_dup_mf8, mf8qx2, mf8_scalar_const_ptr, UNSPEC_LD2_DUP)
+ENTRY_LOAD_LANE (vld2_lane_mf8, mf8x2, mf8_scalar_const_ptr, mf8x2,
+		 UNSPEC_LD2_LANE)
+ENTRY_LOAD_LANE (vld2q_lane_mf8, mf8qx2, mf8_scalar_const_ptr, mf8qx2,
+		 UNSPEC_LD2_LANE)
+
+ENTRY_LOAD (vld1_mf8_x3, mf8x3, mf8_scalar_const_ptr, UNSPEC_LD1x3)
+ENTRY_LOAD (vld1q_mf8_x3, mf8qx3, mf8_scalar_const_ptr, UNSPEC_LD1x3)
+ENTRY_LOAD (vld3_mf8, mf8x3, mf8_scalar_const_ptr, UNSPEC_LD3)
+ENTRY_LOAD (vld3q_mf8, mf8qx3, mf8_scalar_const_ptr, UNSPEC_LD3)
+ENTRY_LOAD (vld3_dup_mf8, mf8x3, mf8_scalar_const_ptr, UNSPEC_LD3_DUP)
+ENTRY_LOAD (vld3q_dup_mf8, mf8qx3, mf8_scalar_const_ptr, UNSPEC_LD3_DUP)
+ENTRY_LOAD_LANE (vld3_lane_mf8, mf8x3, mf8_scalar_const_ptr, mf8x3,
+		 UNSPEC_LD3_LANE)
+ENTRY_LOAD_LANE (vld3q_lane_mf8, mf8qx3, mf8_scalar_const_ptr, mf8qx3,
+		 UNSPEC_LD3_LANE)
+
+ENTRY_LOAD (vld1_mf8_x4, mf8x4, mf8_scalar_const_ptr, UNSPEC_LD1x4)
+ENTRY_LOAD (vld1q_mf8_x4, mf8qx4, mf8_scalar_const_ptr, UNSPEC_LD1x4)
+ENTRY_LOAD (vld4_mf8, mf8x4, mf8_scalar_const_ptr, UNSPEC_LD4)
+ENTRY_LOAD (vld4q_mf8, mf8qx4, mf8_scalar_const_ptr, UNSPEC_LD4)
+ENTRY_LOAD (vld4_dup_mf8, mf8x4, mf8_scalar_const_ptr, UNSPEC_LD4_DUP)
+ENTRY_LOAD (vld4q_dup_mf8, mf8qx4, mf8_scalar_const_ptr, UNSPEC_LD4_DUP)
+ENTRY_LOAD_LANE (vld4_lane_mf8, mf8x4, mf8_scalar_const_ptr, mf8x4,
+		 UNSPEC_LD4_LANE)
+ENTRY_LOAD_LANE (vld4q_lane_mf8, mf8qx4, mf8_scalar_const_ptr, mf8qx4,
+		 UNSPEC_LD4_LANE)
+#undef REQUIRED_EXTENSIONS
+
+// mov
+#define REQUIRED_EXTENSIONS nonstreaming_only (TARGET_SIMD)
+ENTRY_UNARY (vmov_n_mf8, mf8, mf8_scalar, UNSPEC_DUP, QUIET)
+ENTRY_UNARY (vmovq_n_mf8, mf8q, mf8_scalar, UNSPEC_DUP, QUIET)
+#undef REQUIRED_EXTENSIONS
+
+// rev
+#define REQUIRED_EXTENSIONS nonstreaming_only (TARGET_SIMD)
+ENTRY_UNARY (vrev64_mf8, mf8, mf8, UNSPEC_REV64, QUIET)
+ENTRY_UNARY (vrev64q_mf8, mf8q, mf8q, UNSPEC_REV64, QUIET)
+
+ENTRY_UNARY (vrev32_mf8, mf8, mf8, UNSPEC_REV32, QUIET)
+ENTRY_UNARY (vrev32q_mf8, mf8q, mf8q, UNSPEC_REV32, QUIET)
+
+ENTRY_UNARY (vrev16_mf8, mf8, mf8, UNSPEC_REV16, QUIET)
+ENTRY_UNARY (vrev16q_mf8, mf8q, mf8q, UNSPEC_REV16, QUIET)
+#undef REQUIRED_EXTENSIONS
+
+// set_lane
+#define REQUIRED_EXTENSIONS nonstreaming_only (TARGET_SIMD)
+ENTRY_BINARY_LANE (vset_lane_mf8, mf8, mf8_scalar, mf8, UNSPEC_SET_LANE, QUIET)
+ENTRY_BINARY_LANE (vsetq_lane_mf8, mf8q, mf8_scalar, mf8q, UNSPEC_SET_LANE, QUIET)
+#undef REQUIRED_EXTENSIONS
+
+// st1
+#define REQUIRED_EXTENSIONS nonstreaming_only (TARGET_SIMD)
+ENTRY_STORE (vst1_mf8, mf8_scalar_ptr, mf8, UNSPEC_ST1)
+ENTRY_STORE (vst1q_mf8, mf8_scalar_ptr, mf8q, UNSPEC_ST1)
+
+ENTRY_STORE_LANE (vst1_lane_mf8, mf8_scalar_ptr, mf8, UNSPEC_ST1_LANE)
+ENTRY_STORE_LANE (vst1q_lane_mf8, mf8_scalar_ptr, mf8q, UNSPEC_ST1_LANE)
+#undef REQUIRED_EXTENSIONS
+
+// st<n>
+#define REQUIRED_EXTENSIONS nonstreaming_only (TARGET_SIMD)
+ENTRY_STORE (vst2_mf8, mf8_scalar_ptr, mf8x2, UNSPEC_ST2)
+ENTRY_STORE (vst2q_mf8, mf8_scalar_ptr, mf8qx2, UNSPEC_ST2)
+ENTRY_STORE (vst1_mf8_x2, mf8_scalar_ptr, mf8x2, UNSPEC_ST1x2)
+ENTRY_STORE (vst1q_mf8_x2, mf8_scalar_ptr, mf8qx2, UNSPEC_ST1x2)
+ENTRY_STORE_LANE (vst2_lane_mf8, mf8_scalar_ptr, mf8x2, UNSPEC_ST2_LANE)
+ENTRY_STORE_LANE (vst2q_lane_mf8, mf8_scalar_ptr, mf8qx2, UNSPEC_ST2_LANE)
+
+ENTRY_STORE (vst3_mf8, mf8_scalar_ptr, mf8x3, UNSPEC_ST3)
+ENTRY_STORE (vst3q_mf8, mf8_scalar_ptr, mf8qx3, UNSPEC_ST3)
+ENTRY_STORE (vst1_mf8_x3, mf8_scalar_ptr, mf8x3, UNSPEC_ST1x3)
+ENTRY_STORE (vst1q_mf8_x3, mf8_scalar_ptr, mf8qx3, UNSPEC_ST1x3)
+ENTRY_STORE_LANE (vst3_lane_mf8, mf8_scalar_ptr, mf8x3, UNSPEC_ST3_LANE)
+ENTRY_STORE_LANE (vst3q_lane_mf8, mf8_scalar_ptr, mf8qx3, UNSPEC_ST3_LANE)
+
+ENTRY_STORE (vst4_mf8, mf8_scalar_ptr, mf8x4, UNSPEC_ST4)
+ENTRY_STORE (vst4q_mf8, mf8_scalar_ptr, mf8qx4, UNSPEC_ST4)
+ENTRY_STORE (vst1_mf8_x4, mf8_scalar_ptr, mf8x4, UNSPEC_ST1x4)
+ENTRY_STORE (vst1q_mf8_x4, mf8_scalar_ptr, mf8qx4, UNSPEC_ST1x4)
+ENTRY_STORE_LANE (vst4_lane_mf8, mf8_scalar_ptr, mf8x4, UNSPEC_ST4_LANE)
+ENTRY_STORE_LANE (vst4q_lane_mf8, mf8_scalar_ptr, mf8qx4, UNSPEC_ST4_LANE)
+#undef REQUIRED_EXTENSIONS
+
+// tbl<n>
+#define REQUIRED_EXTENSIONS nonstreaming_only (TARGET_SIMD)
+ENTRY_BINARY (vtbl1_mf8, mf8, mf8, u8, UNSPEC_TBL, QUIET)
+ENTRY_BINARY (vtbl2_mf8, mf8, mf8x2, u8, UNSPEC_TBL, QUIET)
+ENTRY_BINARY (vtbl3_mf8, mf8, mf8x3, u8, UNSPEC_TBL, QUIET)
+ENTRY_BINARY (vtbl4_mf8, mf8, mf8x4, u8, UNSPEC_TBL, QUIET)
+
+ENTRY_BINARY (vqtbl1_mf8, mf8, mf8q, u8, UNSPEC_TBL, QUIET)
+ENTRY_BINARY (vqtbl1q_mf8, mf8q, mf8q, u8q, UNSPEC_TBL, QUIET)
+ENTRY_BINARY (vqtbl2_mf8, mf8, mf8qx2, u8, UNSPEC_TBL, QUIET)
+ENTRY_BINARY (vqtbl2q_mf8, mf8q, mf8qx2, u8q, UNSPEC_TBL, QUIET)
+ENTRY_BINARY (vqtbl3_mf8, mf8, mf8qx3, u8, UNSPEC_TBL, QUIET)
+ENTRY_BINARY (vqtbl3q_mf8, mf8q, mf8qx3, u8q, UNSPEC_TBL, QUIET)
+ENTRY_BINARY (vqtbl4_mf8, mf8, mf8qx4, u8, UNSPEC_TBL, QUIET)
+ENTRY_BINARY (vqtbl4q_mf8, mf8q, mf8qx4, u8q, UNSPEC_TBL, QUIET)
+#undef REQUIRED_EXTENSIONS
+
+// tbx<n>
+#define REQUIRED_EXTENSIONS nonstreaming_only (TARGET_SIMD)
+ENTRY_TERNARY (vtbx1_mf8, mf8, mf8, mf8, u8, UNSPEC_TBX, QUIET)
+ENTRY_TERNARY (vtbx2_mf8, mf8, mf8, mf8x2, u8, UNSPEC_TBX, QUIET)
+ENTRY_TERNARY (vtbx3_mf8, mf8, mf8, mf8x3, u8, UNSPEC_TBX, QUIET)
+ENTRY_TERNARY (vtbx4_mf8, mf8, mf8, mf8x4, u8, UNSPEC_TBX, QUIET)
+
+ENTRY_TERNARY (vqtbx1_mf8, mf8, mf8, mf8q, u8, UNSPEC_TBX, QUIET)
+ENTRY_TERNARY (vqtbx1q_mf8, mf8q, mf8q, mf8q, u8q, UNSPEC_TBX, QUIET)
+ENTRY_TERNARY (vqtbx2_mf8, mf8, mf8, mf8qx2, u8, UNSPEC_TBX, QUIET)
+ENTRY_TERNARY (vqtbx2q_mf8, mf8q, mf8q, mf8qx2, u8q, UNSPEC_TBX, QUIET)
+ENTRY_TERNARY (vqtbx3_mf8, mf8, mf8, mf8qx3, u8, UNSPEC_TBX, QUIET)
+ENTRY_TERNARY (vqtbx3q_mf8, mf8q, mf8q, mf8qx3, u8q, UNSPEC_TBX, QUIET)
+ENTRY_TERNARY (vqtbx4_mf8, mf8, mf8, mf8qx4, u8, UNSPEC_TBX, QUIET)
+ENTRY_TERNARY (vqtbx4q_mf8, mf8q, mf8q, mf8qx4, u8q, UNSPEC_TBX, QUIET)
+#undef REQUIRED_EXTENSIONS
+
+// trn<n>
+#define REQUIRED_EXTENSIONS nonstreaming_only (TARGET_SIMD)
+ENTRY_BINARY (vtrn1_mf8, mf8, mf8, mf8, UNSPEC_TRN1, QUIET)
+ENTRY_BINARY (vtrn1q_mf8, mf8q, mf8q, mf8q, UNSPEC_TRN1, QUIET)
+ENTRY_BINARY (vtrn2_mf8, mf8, mf8, mf8, UNSPEC_TRN2, QUIET)
+ENTRY_BINARY (vtrn2q_mf8, mf8q, mf8q, mf8q, UNSPEC_TRN2, QUIET)
+ENTRY_BINARY (vtrn_mf8, mf8x2, mf8, mf8, UNSPEC_TRN, QUIET)
+ENTRY_BINARY (vtrnq_mf8, mf8qx2, mf8q, mf8q, UNSPEC_TRN, QUIET)
+#undef REQUIRED_EXTENSIONS
+
+// uzp<n>
+#define REQUIRED_EXTENSIONS nonstreaming_only (TARGET_SIMD)
+ENTRY_BINARY (vuzp1_mf8, mf8, mf8, mf8, UNSPEC_UZP1, QUIET)
+ENTRY_BINARY (vuzp1q_mf8, mf8q, mf8q, mf8q, UNSPEC_UZP1, QUIET)
+ENTRY_BINARY (vuzp2_mf8, mf8, mf8, mf8, UNSPEC_UZP2, QUIET)
+ENTRY_BINARY (vuzp2q_mf8, mf8q, mf8q, mf8q, UNSPEC_UZP2, QUIET)
+ENTRY_BINARY (vuzp_mf8, mf8x2, mf8, mf8, UNSPEC_UZP, QUIET)
+ENTRY_BINARY (vuzpq_mf8, mf8qx2, mf8q, mf8q, UNSPEC_UZP, QUIET)
+#undef REQUIRED_EXTENSIONS
+
+// zip<n>
+#define REQUIRED_EXTENSIONS nonstreaming_only (TARGET_SIMD)
+ENTRY_BINARY (vzip1_mf8, mf8, mf8, mf8, UNSPEC_ZIP1, QUIET)
+ENTRY_BINARY (vzip1q_mf8, mf8q, mf8q, mf8q, UNSPEC_ZIP1, QUIET)
+ENTRY_BINARY (vzip2_mf8, mf8, mf8, mf8, UNSPEC_ZIP2, QUIET)
+ENTRY_BINARY (vzip2q_mf8, mf8q, mf8q, mf8q, UNSPEC_ZIP2, QUIET)
+ENTRY_BINARY (vzip_mf8, mf8x2, mf8, mf8, UNSPEC_ZIP, QUIET)
+ENTRY_BINARY (vzipq_mf8, mf8qx2, mf8q, mf8q, UNSPEC_ZIP, QUIET)
+#undef REQUIRED_EXTENSIONS
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 23c03a9..75192dd 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -1,5 +1,5 @@
 ;; Machine description for AArch64 AdvSIMD architecture.
-;; Copyright (C) 2011-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2011-2025 Free Software Foundation, Inc.
 ;; Contributed by ARM Ltd.
 ;;
 ;; This file is part of GCC.
@@ -112,7 +112,7 @@
   }
 )
 
-(define_insn "aarch64_dup_lane<mode>"
+(define_insn "@aarch64_dup_lane<mode>"
   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
 	(vec_duplicate:VALL_F16
 	  (vec_select:<VEL>
@@ -127,7 +127,7 @@
   [(set_attr "type" "neon_dup<q>")]
 )
 
-(define_insn "aarch64_dup_lane_<vswap_width_name><mode>"
+(define_insn "@aarch64_dup_lane_<vswap_width_name><mode>"
   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
 	(vec_duplicate:VALL_F16_NO_V2Q
 	  (vec_select:<VEL>
@@ -160,7 +160,7 @@
      [?r, w ; neon_to_gp<q>      , *        , *] fmov\t%x0, %d1
      [?w, r ; f_mcr              , *        , *] fmov\t%d0, %1
      [?r, r ; mov_reg            , *        , *] mov\t%0, %1
-     [w , Dn; neon_move<q>       , simd     , *] << aarch64_output_simd_mov_immediate (operands[1], 64);
+     [w , Dn; neon_move<q>       , simd     , *] << aarch64_output_simd_mov_imm (operands[1], 64);
      [w , Dz; f_mcr              , *        , *] fmov\t%d0, xzr
      [w , Dx; neon_move          , simd     , 8] #
   }
@@ -189,7 +189,7 @@
      [?r , w ; multiple           , *   , 8] #
      [?w , r ; multiple           , *   , 8] #
      [?r , r ; multiple           , *   , 8] #
-     [w  , Dn; neon_move<q>       , simd, 4] << aarch64_output_simd_mov_immediate (operands[1], 128);
+     [w  , Dn; neon_move<q>       , simd, 4] << aarch64_output_simd_mov_imm (operands[1], 128);
      [w  , Dz; fmov               , *   , 4] fmov\t%d0, xzr
      [w  , Dx; neon_move          , simd, 8] #
   }
@@ -208,7 +208,6 @@
     else
       {
 	if (FP_REGNUM_P (REGNO (operands[0]))
-	    && <MODE>mode == V2DImode
 	    && aarch64_maybe_generate_simd_constant (operands[0], operands[1],
 						     <MODE>mode))
 	  ;
@@ -568,7 +567,7 @@
 ;; ...
 ;;
 ;; and so the vectorizer provides r, in which the result has to be accumulated.
-(define_insn "<sur>dot_prod<vsi2qi><vczle><vczbe>"
+(define_insn "<sur>dot_prod<mode><vsi2qi><vczle><vczbe>"
   [(set (match_operand:VS 0 "register_operand" "=w")
 	(plus:VS
 	  (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand" "w")
@@ -582,7 +581,7 @@
 
 ;; These instructions map to the __builtins for the Armv8.6-a I8MM usdot
 ;; (vector) Dot Product operation and the vectorized optab.
-(define_insn "usdot_prod<vsi2qi><vczle><vczbe>"
+(define_insn "usdot_prod<mode><vsi2qi><vczle><vczbe>"
   [(set (match_operand:VS 0 "register_operand" "=w")
 	(plus:VS
 	  (unspec:VS [(match_operand:<VSI2QI> 1 "register_operand" "w")
@@ -1075,7 +1074,8 @@
 	rtx ones = force_reg (V16QImode, CONST1_RTX (V16QImode));
 	rtx abd = gen_reg_rtx (V16QImode);
 	emit_insn (gen_aarch64_<su>abdv16qi (abd, operands[1], operands[2]));
-	emit_insn (gen_udot_prodv16qi (operands[0], abd, ones, operands[3]));
+	emit_insn (gen_udot_prodv4siv16qi (operands[0], abd, ones,
+					   operands[3]));
 	DONE;
       }
     rtx reduc = gen_reg_rtx (V8HImode);
@@ -1117,41 +1117,42 @@
   [(set_attr "type" "neon_fp_abd_<stype><q>")]
 )
 
-;; For AND (vector, register) and BIC (vector, immediate)
+;; For AND (vector, register), BIC (vector, immediate) and FMOV (register)
 (define_insn "and<mode>3<vczle><vczbe>"
   [(set (match_operand:VDQ_I 0 "register_operand")
 	(and:VDQ_I (match_operand:VDQ_I 1 "register_operand")
-		   (match_operand:VDQ_I 2 "aarch64_reg_or_bic_imm")))]
+		   (match_operand:VDQ_I 2 "aarch64_reg_or_and_imm")))]
   "TARGET_SIMD"
-  {@ [ cons: =0 , 1 , 2   ]
-     [ w        , w , w   ] and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>
-     [ w        , 0 , Db  ] << aarch64_output_simd_mov_immediate (operands[2], <bitsize>, AARCH64_CHECK_BIC);
+  {@ [ cons: =0 , 1 , 2  ; attrs: type   ]
+     [ w        , w , w  ; neon_logic<q> ] and\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>
+     [ w        , w , Df ; fmov          ] << aarch64_output_fmov (operands[2]);
+     [ w        , 0 , Db ; neon_logic<q> ] << aarch64_output_simd_and_imm (operands[2], <bitsize>);
   }
-  [(set_attr "type" "neon_logic<q>")]
 )
 
 ;; For ORR (vector, register) and ORR (vector, immediate)
 (define_insn "ior<mode>3<vczle><vczbe>"
   [(set (match_operand:VDQ_I 0 "register_operand")
 	(ior:VDQ_I (match_operand:VDQ_I 1 "register_operand")
-		   (match_operand:VDQ_I 2 "aarch64_orr_imm_sve_advsimd")))]
+		   (match_operand:VDQ_I 2 "aarch64_reg_or_orr_imm")))]
   "TARGET_SIMD"
-  {@ [ cons: =0 , 1 , 2; attrs: arch ]
-     [ w        , w , w  ; simd      ] orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>
-     [ w        , 0 , vsl; sve       ] orr\t%Z0.<Vetype>, %Z0.<Vetype>, #%2
-     [ w        , 0 , Do ; simd      ] \
-       << aarch64_output_simd_mov_immediate (operands[2], <bitsize>, \
-					     AARCH64_CHECK_ORR);
+  {@ [ cons: =0 , 1 , 2  ]
+     [ w        , w , w  ] orr\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>
+     [ w        , 0 , Do ] << aarch64_output_simd_orr_imm (operands[2], <bitsize>);
   }
   [(set_attr "type" "neon_logic<q>")]
 )
 
+;; For EOR (vector, register) and SVE EOR (vector, immediate)
 (define_insn "xor<mode>3<vczle><vczbe>"
-  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
-        (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
-		 (match_operand:VDQ_I 2 "register_operand" "w")))]
+  [(set (match_operand:VDQ_I 0 "register_operand")
+        (xor:VDQ_I (match_operand:VDQ_I 1 "register_operand")
+                   (match_operand:VDQ_I 2 "aarch64_reg_or_xor_imm")))]
   "TARGET_SIMD"
-  "eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>"
+  {@ [ cons: =0 , 1 , 2  ]
+     [ w        , w , w  ] eor\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>
+     [ w        , 0 , De ] << aarch64_output_simd_xor_imm (operands[2], <bitsize>);
+  }
   [(set_attr "type" "neon_logic<q>")]
 )
 
@@ -1163,7 +1164,7 @@
   [(set_attr "type" "neon_logic<q>")]
 )
 
-(define_insn "aarch64_simd_vec_set<mode>"
+(define_insn "@aarch64_simd_vec_set<mode>"
   [(set (match_operand:VALL_F16 0 "register_operand" "=w,w,w")
 	(vec_merge:VALL_F16
 	    (vec_duplicate:VALL_F16
@@ -1192,12 +1193,14 @@
 (define_insn "aarch64_simd_vec_set_zero<mode>"
   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
 	(vec_merge:VALL_F16
-	    (match_operand:VALL_F16 1 "aarch64_simd_imm_zero" "")
-	    (match_operand:VALL_F16 3 "register_operand" "0")
+	    (match_operand:VALL_F16 1 "register_operand" "0")
+	    (match_operand:VALL_F16 3 "aarch64_simd_imm_zero" "")
 	    (match_operand:SI 2 "immediate_operand" "i")))]
-  "TARGET_SIMD && exact_log2 (INTVAL (operands[2])) >= 0"
+  "TARGET_SIMD && aarch64_exact_log2_inverse (<nunits>, operands[2]) >= 0"
   {
-    int elt = ENDIAN_LANE_N (<nunits>, exact_log2 (INTVAL (operands[2])));
+    int elt = ENDIAN_LANE_N (<nunits>,
+			     aarch64_exact_log2_inverse (<nunits>,
+							 operands[2]));
     operands[2] = GEN_INT ((HOST_WIDE_INT) 1 << elt);
     return "ins\\t%0.<Vetype>[%p2], <vwcore>zr";
   }
@@ -1224,7 +1227,7 @@
   [(set_attr "type" "neon_ins<q>")]
 )
 
-(define_insn "*aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
+(define_insn "@aarch64_simd_vec_copy_lane_<vswap_width_name><mode>"
   [(set (match_operand:VALL_F16_NO_V2Q 0 "register_operand" "=w")
 	(vec_merge:VALL_F16_NO_V2Q
 	    (vec_duplicate:VALL_F16_NO_V2Q
@@ -1293,6 +1296,40 @@
   [(set_attr "type" "neon_shift_acc<q>")]
 )
 
+;; After all the combinations and propagations of ROTATE have been
+;; attempted split any remaining vector rotates into SHL + USRA sequences.
+;; Don't match this after reload as the various possible sequence for this
+;; require temporary registers.
+(define_insn_and_split "*aarch64_simd_rotate_imm<mode>"
+  [(set (match_operand:VDQ_I 0 "register_operand" "=&w")
+	(rotate:VDQ_I (match_operand:VDQ_I 1 "register_operand" "w")
+		      (match_operand:VDQ_I 2 "aarch64_simd_lshift_imm")))]
+  "TARGET_SIMD && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 3)
+	(ashift:VDQ_I (match_dup 1)
+		      (match_dup 2)))
+   (set (match_dup 0)
+	(plus:VDQ_I
+	  (lshiftrt:VDQ_I
+	    (match_dup 1)
+	    (match_dup 4))
+	  (match_dup 3)))]
+  {
+    if (aarch64_emit_opt_vec_rotate (operands[0], operands[1], operands[2]))
+      DONE;
+
+    operands[3] = gen_reg_rtx (<MODE>mode);
+    rtx shft_amnt = unwrap_const_vec_duplicate (operands[2]);
+    int bitwidth = GET_MODE_UNIT_SIZE (<MODE>mode) * BITS_PER_UNIT;
+    operands[4]
+      = aarch64_simd_gen_const_vector_dup (<MODE>mode,
+					   bitwidth - INTVAL (shft_amnt));
+  }
+  [(set_attr "length" "8")]
+)
+
 (define_insn "aarch64_<sra_op>rsra_n<mode>_insn"
  [(set (match_operand:VSDQ_I_DI 0 "register_operand" "=w")
 	(plus:VSDQ_I_DI
@@ -1591,6 +1628,24 @@
   }
 )
 
+(define_expand "vec_set<mode>"
+  [(match_operand:VSTRUCT_QD 0 "register_operand")
+   (match_operand:<VEL> 1 "aarch64_simd_nonimmediate_operand")
+   (match_operand:SI 2 "immediate_operand")]
+  "TARGET_SIMD"
+{
+  aarch64_decompose_vec_struct_index (<VSTRUCT_ELT>mode, &operands[0],
+				      &operands[2], true);
+  /* For tuples of 64-bit modes, <vstruct_elt> is the 64-bit scalar mode.
+     Allow gen_vec_set<vstruct_elt> to cope with those cases too.  */
+  auto gen_vec_setdi ATTRIBUTE_UNUSED = [](rtx x0, rtx x1, rtx)
+    {
+      return gen_move_insn (x0, x1);
+    };
+  auto gen_vec_setdf ATTRIBUTE_UNUSED = gen_vec_setdi;
+  emit_insn (gen_vec_set<vstruct_elt> (operands[0], operands[1], operands[2]));
+  DONE;
+})
 
 (define_insn "aarch64_mla<mode><vczle><vczbe>"
  [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
@@ -1748,9 +1803,11 @@
       gcc_unreachable ();
     }
 
+  rtx mask = gen_reg_rtx (V2DImode);
   cmp_fmt = gen_rtx_fmt_ee (cmp_operator, V2DImode, operands[1], operands[2]);
-  emit_insn (gen_vcondv2div2di (operands[0], operands[1],
-              operands[2], cmp_fmt, operands[1], operands[2]));
+  emit_insn (gen_vec_cmpv2div2di (mask, cmp_fmt, operands[1], operands[2]));
+  emit_insn (gen_vcond_mask_v2div2di (operands[0], operands[1],
+				      operands[2], mask));
   DONE;
 })
 
@@ -1837,6 +1894,22 @@
   [(set_attr "type" "neon_permute<q>")]
 )
 
+(define_insn "*aarch64_trunc_concat<mode>"
+  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=w")
+	(truncate:<VNARROWQ>
+	  (vec_concat:VQN
+	    (match_operand:<VHALF> 1 "register_operand" "w")
+	    (match_operand:<VHALF> 2 "register_operand" "w"))))]
+  "TARGET_SIMD"
+{
+  if (!BYTES_BIG_ENDIAN)
+    return "uzp1\\t%0.<Vntype>, %1.<Vntype>, %2.<Vntype>";
+  else
+    return "uzp1\\t%0.<Vntype>, %2.<Vntype>, %1.<Vntype>";
+}
+  [(set_attr "type" "neon_permute<q>")]
+)
+
 ;; Packing doubles.
 
 (define_expand "vec_pack_trunc_<mode>"
@@ -3515,21 +3588,31 @@
 )
 
 (define_expand "popcount<mode>2"
-  [(set (match_operand:VDQHSD 0 "register_operand")
-	(popcount:VDQHSD (match_operand:VDQHSD 1 "register_operand")))]
+  [(set (match_operand:VDQHSD_V1DI 0 "register_operand")
+	(popcount:VDQHSD_V1DI
+	  (match_operand:VDQHSD_V1DI 1 "register_operand")))]
   "TARGET_SIMD"
   {
     if (TARGET_SVE)
       {
-	rtx p = aarch64_ptrue_reg (<VPRED>mode);
+	rtx p = aarch64_ptrue_reg (<VPRED>mode, <bitsize> == 64 ? 8 : 16);
 	emit_insn (gen_aarch64_pred_popcount<mode> (operands[0],
 						    p,
 						    operands[1]));
 	DONE;
       }
 
+    if (<MODE>mode == V1DImode)
+      {
+	rtx out = gen_reg_rtx (DImode);
+	emit_insn (gen_popcountdi2 (out, gen_lowpart (DImode, operands[1])));
+	emit_move_insn (operands[0], gen_lowpart (<MODE>mode, out));
+	DONE;
+      }
+
     /* Generate a byte popcount.  */
     machine_mode mode = <bitsize> == 64 ? V8QImode : V16QImode;
+    machine_mode mode2 = <bitsize> == 64 ? V2SImode : V4SImode;
     rtx tmp = gen_reg_rtx (mode);
     auto icode = optab_handler (popcount_optab, mode);
     emit_insn (GEN_FCN (icode) (tmp, gen_lowpart (mode, operands[1])));
@@ -3540,7 +3623,7 @@
 	/* For V4SI and V2SI, we can generate a UDOT with a 0 accumulator and a
 	   1 multiplicand.  For V2DI, another UAADDLP is needed.  */
 	rtx ones = force_reg (mode, CONST1_RTX (mode));
-	auto icode = optab_handler (udot_prod_optab, mode);
+	auto icode = convert_optab_handler (udot_prod_optab, mode2, mode);
 	mode = <bitsize> == 64 ? V2SImode : V4SImode;
 	rtx dest = mode == <MODE>mode ? operands[0] : gen_reg_rtx (mode);
 	rtx zeros = force_reg (mode, CONST0_RTX (mode));
@@ -3790,7 +3873,7 @@
 }
 )
 
-(define_expand "aarch64_simd_bsl<mode>"
+(define_expand "@aarch64_simd_bsl<mode>"
   [(match_operand:VALLDIF 0 "register_operand")
    (match_operand:<V_INT_EQUIV> 1 "register_operand")
    (match_operand:VALLDIF 2 "register_operand")
@@ -3883,7 +3966,7 @@
 
   rtx cc_reg = aarch64_gen_compare_reg (code, val, const0_rtx);
   rtx cmp_rtx = gen_rtx_fmt_ee (code, DImode, cc_reg, const0_rtx);
-  emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, operands[3]));
+  emit_jump_insn (gen_aarch64_bcond (cmp_rtx, cc_reg, operands[3]));
   DONE;
 })
 
@@ -4154,126 +4237,6 @@
   DONE;
 })
 
-(define_expand "vcond<mode><mode>"
-  [(set (match_operand:VALLDI 0 "register_operand")
-	(if_then_else:VALLDI
-	  (match_operator 3 "comparison_operator"
-	    [(match_operand:VALLDI 4 "register_operand")
-	     (match_operand:VALLDI 5 "nonmemory_operand")])
-	  (match_operand:VALLDI 1 "nonmemory_operand")
-	  (match_operand:VALLDI 2 "nonmemory_operand")))]
-  "TARGET_SIMD"
-{
-  rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
-  enum rtx_code code = GET_CODE (operands[3]);
-
-  /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
-     it as well as switch operands 1/2 in order to avoid the additional
-     NOT instruction.  */
-  if (code == NE)
-    {
-      operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
-				    operands[4], operands[5]);
-      std::swap (operands[1], operands[2]);
-    }
-  emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
-					     operands[4], operands[5]));
-  emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
-						 operands[2], mask));
-
-  DONE;
-})
-
-(define_expand "vcond<v_cmp_mixed><mode>"
-  [(set (match_operand:<V_cmp_mixed> 0 "register_operand")
-	(if_then_else:<V_cmp_mixed>
-	  (match_operator 3 "comparison_operator"
-	    [(match_operand:VDQF_COND 4 "register_operand")
-	     (match_operand:VDQF_COND 5 "nonmemory_operand")])
-	  (match_operand:<V_cmp_mixed> 1 "nonmemory_operand")
-	  (match_operand:<V_cmp_mixed> 2 "nonmemory_operand")))]
-  "TARGET_SIMD"
-{
-  rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
-  enum rtx_code code = GET_CODE (operands[3]);
-
-  /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
-     it as well as switch operands 1/2 in order to avoid the additional
-     NOT instruction.  */
-  if (code == NE)
-    {
-      operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
-				    operands[4], operands[5]);
-      std::swap (operands[1], operands[2]);
-    }
-  emit_insn (gen_vec_cmp<mode><v_int_equiv> (mask, operands[3],
-					     operands[4], operands[5]));
-  emit_insn (gen_vcond_mask_<v_cmp_mixed><v_int_equiv> (
-						operands[0], operands[1],
-						operands[2], mask));
-
-  DONE;
-})
-
-(define_expand "vcondu<mode><mode>"
-  [(set (match_operand:VSDQ_I_DI 0 "register_operand")
-	(if_then_else:VSDQ_I_DI
-	  (match_operator 3 "comparison_operator"
-	    [(match_operand:VSDQ_I_DI 4 "register_operand")
-	     (match_operand:VSDQ_I_DI 5 "nonmemory_operand")])
-	  (match_operand:VSDQ_I_DI 1 "nonmemory_operand")
-	  (match_operand:VSDQ_I_DI 2 "nonmemory_operand")))]
-  "TARGET_SIMD"
-{
-  rtx mask = gen_reg_rtx (<MODE>mode);
-  enum rtx_code code = GET_CODE (operands[3]);
-
-  /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
-     it as well as switch operands 1/2 in order to avoid the additional
-     NOT instruction.  */
-  if (code == NE)
-    {
-      operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
-				    operands[4], operands[5]);
-      std::swap (operands[1], operands[2]);
-    }
-  emit_insn (gen_vec_cmp<mode><mode> (mask, operands[3],
-				      operands[4], operands[5]));
-  emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
-						 operands[2], mask));
-  DONE;
-})
-
-(define_expand "vcondu<mode><v_cmp_mixed>"
-  [(set (match_operand:VDQF 0 "register_operand")
-	(if_then_else:VDQF
-	  (match_operator 3 "comparison_operator"
-	    [(match_operand:<V_cmp_mixed> 4 "register_operand")
-	     (match_operand:<V_cmp_mixed> 5 "nonmemory_operand")])
-	  (match_operand:VDQF 1 "nonmemory_operand")
-	  (match_operand:VDQF 2 "nonmemory_operand")))]
-  "TARGET_SIMD"
-{
-  rtx mask = gen_reg_rtx (<V_INT_EQUIV>mode);
-  enum rtx_code code = GET_CODE (operands[3]);
-
-  /* NE is handled as !EQ in vec_cmp patterns, we can explicitly invert
-     it as well as switch operands 1/2 in order to avoid the additional
-     NOT instruction.  */
-  if (code == NE)
-    {
-      operands[3] = gen_rtx_fmt_ee (EQ, GET_MODE (operands[3]),
-				    operands[4], operands[5]);
-      std::swap (operands[1], operands[2]);
-    }
-  emit_insn (gen_vec_cmp<v_cmp_mixed><v_cmp_mixed> (
-						  mask, operands[3],
-						  operands[4], operands[5]));
-  emit_insn (gen_vcond_mask_<mode><v_int_equiv> (operands[0], operands[1],
-						 operands[2], mask));
-  DONE;
-})
-
 ;; Patterns for AArch64 SIMD Intrinsics.
 
 ;; Lane extraction with sign extension to general purpose register.
@@ -4312,7 +4275,7 @@
 ;; RTL uses GCC vector extension indices throughout so flip only for assembly.
 ;; Extracting lane zero is split into a simple move when it is between SIMD
 ;; registers or a store.
-(define_insn_and_split "aarch64_get_lane<mode>"
+(define_insn_and_split "@aarch64_get_lane<mode>"
   [(set (match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand" "=?r, w, Utv")
 	(vec_select:<VEL>
 	  (match_operand:VALL_F16 1 "register_operand" "w, w, w")
@@ -4511,7 +4474,7 @@
 ;; Form a vector whose least significant half comes from operand 1 and whose
 ;; most significant half comes from operand 2.  This operand order follows
 ;; arm_neon.h vcombine* intrinsics.
-(define_expand "aarch64_combine<mode>"
+(define_expand "@aarch64_combine<mode>"
   [(match_operand:<VDBL> 0 "register_operand")
    (match_operand:VDC 1 "general_operand")
    (match_operand:VDC 2 "general_operand")]
@@ -5083,6 +5046,36 @@
   DONE;
 })
 
+;; convert (truncate)(~x >> imm) into (truncate)(((u16)-1 - x) >> imm)
+;; because it will result in the 'not' being replaced with a constant load
+;; which allows for better loop optimization.
+;; We limit this to truncations that take the upper half and shift it to the
+;; lower half as we use subhn (patterns that would have generated an shrn
+;; otherwise).
+;; On some implementations the use of subhn also result in better throughput.
+(define_insn_and_split "*shrn_to_subhn_<mode>"
+  [(set (match_operand:<VNARROWQ> 0 "register_operand" "=&w")
+	(truncate:<VNARROWQ>
+	  (lshiftrt:VQN
+	    (not:VQN (match_operand:VQN 1 "register_operand" "w"))
+	    (match_operand:VQN 2 "aarch64_simd_shift_imm_vec_exact_top"))))]
+  "TARGET_SIMD"
+  "#"
+  "&& true"
+  [(const_int 0)]
+{
+  rtx tmp;
+  if (can_create_pseudo_p ())
+    tmp = gen_reg_rtx (<MODE>mode);
+  else
+    tmp = gen_rtx_REG (<MODE>mode, REGNO (operands[0]));
+  emit_move_insn (tmp, CONSTM1_RTX (<MODE>mode));
+  emit_insn (gen_aarch64_subhn<mode>_insn (operands[0], tmp,
+					   operands[1], operands[2]));
+  DONE;
+})
+
+
 ;; pmul.
 
 (define_insn "aarch64_pmul<mode>"
@@ -5219,15 +5212,214 @@
 )
 ;; <su>q<addsub>
 
-(define_insn "aarch64_<su_optab>q<addsub><mode><vczle><vczbe>"
-  [(set (match_operand:VSDQ_I 0 "register_operand" "=w")
-	(BINQOPS:VSDQ_I (match_operand:VSDQ_I 1 "register_operand" "w")
-			(match_operand:VSDQ_I 2 "register_operand" "w")))]
+(define_insn "<su_optab>s<addsub><mode>3<vczle><vczbe>"
+  [(set (match_operand:VSDQ_I_QI_HI 0 "register_operand" "=w")
+	(BINQOPS:VSDQ_I_QI_HI
+	  (match_operand:VSDQ_I_QI_HI 1 "register_operand" "w")
+	  (match_operand:VSDQ_I_QI_HI 2 "register_operand" "w")))]
   "TARGET_SIMD"
   "<su_optab>q<addsub>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>"
   [(set_attr "type" "neon_q<addsub><q>")]
 )
 
+(define_expand "<su_optab>s<addsub><mode>3"
+  [(parallel
+    [(set (match_operand:GPI 0 "register_operand")
+	  (SBINQOPS:GPI (match_operand:GPI 1 "register_operand")
+			(match_operand:GPI 2 "aarch64_plus_operand")))
+    (clobber (scratch:GPI))
+    (clobber (reg:CC CC_REGNUM))])]
+)
+
+;; Introducing a temporary GP reg allows signed saturating arithmetic with GPR
+;; operands to be calculated without the use of costly transfers to and from FP
+;; registers.  For example, saturating addition usually uses three FMOVs:
+;;
+;;   fmov	d0, x0
+;;   fmov	d1, x1
+;;   sqadd	d0, d0, d1
+;;   fmov	x0, d0
+;;
+;; Using a temporary register results in three cheaper instructions being used
+;; in place of the three FMOVs, which calculate the saturating limit accounting
+;; for the signedness of operand2:
+;;
+;;   asr	x2, x1, 63
+;;   adds	x0, x0, x1
+;;   eor	x2, x2, 0x8000000000000000
+;;   csinv	x0, x0, x2, vc
+;;
+;; If operand2 is a constant value, the temporary register can be used to store
+;; the saturating limit without the need for asr, xor to calculate said limit.
+
+(define_insn_and_split "aarch64_<su_optab>s<addsub><mode>3<vczle><vczbe>"
+  [(set (match_operand:GPI 0 "register_operand")
+	(SBINQOPS:GPI (match_operand:GPI 1 "register_operand")
+		      (match_operand:GPI 2 "aarch64_plus_operand")))
+    (clobber (match_scratch:GPI 3))
+    (clobber (reg:CC CC_REGNUM))]
+  ""
+  {@ [ cons: =0, 1 , 2   , =3 ; attrs: type       , arch , length ]
+     [ w       , w , w   , X  ; neon_q<addsub><q> , simd , 4      ] <su_optab>q<addsub>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>
+     [ r       , r , JIr , &r ; *		  , *    , 8      ] #
+  }
+  "&& reload_completed && GP_REGNUM_P (REGNO (operands[0]))"
+  [(set (match_dup 0)
+	(if_then_else:GPI
+	  (match_dup 4)
+	  (match_dup 5)
+	  (match_dup 6)))]
+  {
+    if (REG_P (operands[2]))
+      {
+      rtx shift_constant = gen_int_mode (GET_MODE_BITSIZE (<MODE>mode) - 1,
+					 <MODE>mode);
+      auto limit = HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (<MODE>mode) - 1);
+      rtx limit_constant = gen_int_mode (limit, <MODE>mode);
+      emit_insn (gen_ashr<mode>3 (operands[3], operands[2], shift_constant));
+      emit_insn (gen_xor<mode>3 (operands[3], operands[3], limit_constant));
+
+      switch (<CODE>)
+	{
+	case SS_MINUS:
+	  emit_insn (gen_sub<mode>3_compare1 (operands[0], operands[1],
+					      operands[2]));
+	break;
+	case SS_PLUS:
+	  emit_insn (gen_add<mode>3_compare0 (operands[0], operands[1],
+					      operands[2]));
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+
+      rtx ccin = gen_rtx_REG (E_CC_Vmode, CC_REGNUM);
+      switch (<CODE>)
+	{
+	case SS_PLUS:
+	  operands[4] = gen_rtx_NE (<MODE>mode, ccin, const0_rtx);
+	  operands[5] = gen_rtx_NOT (<MODE>mode, operands[3]);
+	  operands[6] = operands[0];
+	  break;
+	case SS_MINUS:
+	  operands[4] = gen_rtx_EQ (<MODE>mode, ccin, const0_rtx);
+	  operands[5] = operands[0];
+	  operands[6] = operands[3];
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      }
+    else
+      {
+	auto imm = INTVAL (operands[2]);
+	rtx neg_imm = gen_int_mode (-imm, <MODE>mode);
+	wide_int limit;
+
+	switch (<CODE>)
+	  {
+	  case SS_MINUS:
+	    emit_insn (gen_sub<mode>3_compare1_imm (operands[0], operands[1],
+						    operands[2], neg_imm));
+	    limit = imm >= 0 ? wi::min_value (<MODE>mode, SIGNED)
+			     : wi::max_value (<MODE>mode, SIGNED);
+	    break;
+	  case SS_PLUS:
+	    emit_insn (gen_sub<mode>3_compare1_imm (operands[0], operands[1],
+						    neg_imm, operands[2]));
+	    limit = imm >= 0 ? wi::max_value (<MODE>mode, SIGNED)
+			     : wi::min_value (<MODE>mode, SIGNED);
+	    break;
+	  default:
+	    gcc_unreachable ();
+	  }
+
+      rtx sat_limit = immed_wide_int_const (limit, <MODE>mode);
+      emit_insn (gen_rtx_SET (operands[3], sat_limit));
+
+      rtx ccin = gen_rtx_REG (E_CC_Vmode, CC_REGNUM);
+      operands[4] = gen_rtx_EQ (<MODE>mode, ccin, const0_rtx);
+      operands[5] = operands[0];
+      operands[6] = operands[3];
+      }
+  }
+)
+
+;; Unsigned saturating arithmetic with GPR operands can be optimised similarly
+;; to the signed case, albeit without the need for a temporary register as the
+;; saturating limit can be inferred from the <addsub> code.  This applies only
+;; to SImode and DImode.
+
+(define_insn_and_split "<su_optab>s<addsub><mode>3<vczle><vczbe>"
+  [(set (match_operand:GPI 0 "register_operand")
+	(UBINQOPS:GPI (match_operand:GPI 1 "register_operand")
+		      (match_operand:GPI 2 "aarch64_plus_operand")))
+    (clobber (reg:CC CC_REGNUM))]
+  ""
+  {@ [ cons: =0, 1 , 2   ; attrs: type       , arch , length ]
+     [ w       , w , w   ; neon_q<addsub><q> , simd , 4      ] <su_optab>q<addsub>\\t%<v>0<Vmtype>, %<v>1<Vmtype>, %<v>2<Vmtype>
+     [ r       , r , JIr ; *		     , *    , 8      ] #
+  }
+  "&& reload_completed && GP_REGNUM_P (REGNO (operands[0]))"
+  [(set (match_dup 0)
+	(if_then_else:GPI
+	  (match_dup 3)
+	  (match_dup 0)
+	  (match_dup 4)))]
+  {
+
+    if (REG_P (operands[2]))
+      {
+	switch (<CODE>)
+	  {
+	  case US_MINUS:
+	    emit_insn (gen_sub<mode>3_compare1 (operands[0], operands[1],
+						operands[2]));
+	    break;
+	  case US_PLUS:
+	    emit_insn (gen_add<mode>3_compare0 (operands[0], operands[1],
+						operands[2]));
+	    break;
+	  default:
+	    gcc_unreachable ();
+	  }
+      }
+    else
+      {
+	auto imm = UINTVAL (operands[2]);
+	rtx neg_imm = gen_int_mode (-imm, <MODE>mode);
+	switch (<CODE>)
+	  {
+	  case US_MINUS:
+	    emit_insn (gen_sub<mode>3_compare1_imm (operands[0], operands[1],
+						    operands[2], neg_imm));
+	    break;
+	  case US_PLUS:
+	    emit_insn (gen_sub<mode>3_compare1_imm (operands[0], operands[1],
+						    neg_imm, operands[2]));
+	    break;
+	  default:
+	    gcc_unreachable ();
+	  }
+      }
+
+    rtx ccin = gen_rtx_REG (CCmode, CC_REGNUM);
+    switch (<CODE>)
+      {
+      case US_PLUS:
+	operands[3] = gen_rtx_LTU (<MODE>mode, ccin, const0_rtx);
+	operands[4] = gen_int_mode (-1, <MODE>mode);
+	break;
+      case US_MINUS:
+	operands[3] = gen_rtx_GEU (<MODE>mode, ccin, const0_rtx);
+	operands[4] = const0_rtx;
+	break;
+      default:
+	gcc_unreachable ();
+      }
+  }
+)
+
 ;; suqadd and usqadd
 
 (define_insn "aarch64_<sur>qadd<mode><vczle><vczbe>"
@@ -7044,7 +7236,7 @@
 ;; Note, we have constraints for Dz and Z as different expanders
 ;; have different ideas of what should be passed to this pattern.
 
-(define_insn "aarch64_cm<optab><mode><vczle><vczbe>"
+(define_insn "@aarch64_cm<optab><mode><vczle><vczbe>"
   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
 	(neg:<V_INT_EQUIV>
 	  (COMPARISONS:<V_INT_EQUIV>
@@ -7109,7 +7301,7 @@
 
 ;; cm(hs|hi)
 
-(define_insn "aarch64_cm<optab><mode><vczle><vczbe>"
+(define_insn "@aarch64_cm<optab><mode><vczle><vczbe>"
   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand" "=w")
 	(neg:<V_INT_EQUIV>
 	  (UCOMPARISONS:<V_INT_EQUIV>
@@ -7261,7 +7453,7 @@
 
 ;; fcm(eq|ge|gt|le|lt)
 
-(define_insn "aarch64_cm<optab><mode><vczle><vczbe>"
+(define_insn "@aarch64_cm<optab><mode><vczle><vczbe>"
   [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
 	(neg:<V_INT_EQUIV>
 	  (COMPARISONS:<V_INT_EQUIV>
@@ -7422,7 +7614,7 @@
   [(set_attr "type" "neon_load2_2reg<q>")]
 )
 
-(define_insn "aarch64_simd_ld2r<vstruct_elt>"
+(define_insn "@aarch64_simd_ld2r<vstruct_elt>"
   [(set (match_operand:VSTRUCT_2QD 0 "register_operand" "=w")
 	(unspec:VSTRUCT_2QD [
 	  (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
@@ -7432,7 +7624,7 @@
   [(set_attr "type" "neon_load2_all_lanes<q>")]
 )
 
-(define_insn "aarch64_vec_load_lanes<mode>_lane<vstruct_elt>"
+(define_insn "@aarch64_vec_load_lanes<mode>_lane<vstruct_elt>"
   [(set (match_operand:VSTRUCT_2QD 0 "register_operand" "=w")
 	(unspec:VSTRUCT_2QD [
 		(match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")
@@ -7522,7 +7714,7 @@
   [(set_attr "type" "neon_load3_3reg<q>")]
 )
 
-(define_insn "aarch64_simd_ld3r<vstruct_elt>"
+(define_insn "@aarch64_simd_ld3r<vstruct_elt>"
   [(set (match_operand:VSTRUCT_3QD 0 "register_operand" "=w")
 	(unspec:VSTRUCT_3QD [
 	  (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
@@ -7622,7 +7814,7 @@
   [(set_attr "type" "neon_load4_4reg<q>")]
 )
 
-(define_insn "aarch64_simd_ld4r<vstruct_elt>"
+(define_insn "@aarch64_simd_ld4r<vstruct_elt>"
   [(set (match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
 	(unspec:VSTRUCT_4QD [
 	  (match_operand:BLK 1 "aarch64_simd_struct_operand" "Utv")]
@@ -7846,7 +8038,7 @@
     operands[1] = force_reg (V8DImode, operands[1]);
 })
 
-(define_expand "aarch64_ld1x3<vstruct_elt>"
+(define_expand "@aarch64_ld1x3<vstruct_elt>"
   [(match_operand:VSTRUCT_3QD 0 "register_operand")
    (match_operand:DI 1 "register_operand")]
   "TARGET_SIMD"
@@ -7866,7 +8058,7 @@
   [(set_attr "type" "neon_load1_3reg<q>")]
 )
 
-(define_expand "aarch64_ld1x4<vstruct_elt>"
+(define_expand "@aarch64_ld1x4<vstruct_elt>"
   [(match_operand:VSTRUCT_4QD 0 "register_operand" "=w")
    (match_operand:DI 1 "register_operand" "r")]
   "TARGET_SIMD"
@@ -7886,7 +8078,7 @@
   [(set_attr "type" "neon_load1_4reg<q>")]
 )
 
-(define_expand "aarch64_st1x2<vstruct_elt>"
+(define_expand "@aarch64_st1x2<vstruct_elt>"
   [(match_operand:DI 0 "register_operand")
    (match_operand:VSTRUCT_2QD 1 "register_operand")]
   "TARGET_SIMD"
@@ -7906,7 +8098,7 @@
   [(set_attr "type" "neon_store1_2reg<q>")]
 )
 
-(define_expand "aarch64_st1x3<vstruct_elt>"
+(define_expand "@aarch64_st1x3<vstruct_elt>"
   [(match_operand:DI 0 "register_operand")
    (match_operand:VSTRUCT_3QD 1 "register_operand")]
   "TARGET_SIMD"
@@ -7926,7 +8118,7 @@
   [(set_attr "type" "neon_store1_3reg<q>")]
 )
 
-(define_expand "aarch64_st1x4<vstruct_elt>"
+(define_expand "@aarch64_st1x4<vstruct_elt>"
   [(match_operand:DI 0 "register_operand" "")
    (match_operand:VSTRUCT_4QD 1 "register_operand" "")]
   "TARGET_SIMD"
@@ -7946,32 +8138,6 @@
   [(set_attr "type" "neon_store1_4reg<q>")]
 )
 
-(define_insn "*aarch64_mov<mode>"
-  [(set (match_operand:VSTRUCT_QD 0 "aarch64_simd_nonimmediate_operand")
-	(match_operand:VSTRUCT_QD 1 "aarch64_simd_general_operand"))]
-  "TARGET_SIMD && !BYTES_BIG_ENDIAN
-   && (register_operand (operands[0], <MODE>mode)
-       || register_operand (operands[1], <MODE>mode))"
-  {@ [ cons: =0 , 1   ; attrs: type                    , length        ]
-     [ w        , w   ; multiple                       , <insn_count>  ] #
-     [ Utv      , w   ; neon_store<nregs>_<nregs>reg_q , 4             ] st1\t{%S1.<Vtype> - %<Vendreg>1.<Vtype>}, %0
-     [ w        , Utv ; neon_load<nregs>_<nregs>reg_q  , 4             ] ld1\t{%S0.<Vtype> - %<Vendreg>0.<Vtype>}, %1
-  }
-)
-
-(define_insn "*aarch64_mov<mode>"
-  [(set (match_operand:VSTRUCT 0 "aarch64_simd_nonimmediate_operand")
-	(match_operand:VSTRUCT 1 "aarch64_simd_general_operand"))]
-  "TARGET_SIMD && !BYTES_BIG_ENDIAN
-   && (register_operand (operands[0], <MODE>mode)
-       || register_operand (operands[1], <MODE>mode))"
-  {@ [ cons: =0 , 1   ; attrs: type                    , length        ]
-     [ w        , w   ; multiple                       , <insn_count>  ] #
-     [ Utv      , w   ; neon_store<nregs>_<nregs>reg_q , 4             ] st1\t{%S1.16b - %<Vendreg>1.16b}, %0
-     [ w        , Utv ; neon_load<nregs>_<nregs>reg_q  , 4             ] ld1\t{%S0.16b - %<Vendreg>0.16b}, %1
-  }
-)
-
 (define_insn "*aarch64_movv8di"
   [(set (match_operand:V8DI 0 "nonimmediate_operand" "=r,m,r")
 	(match_operand:V8DI 1 "general_operand" " r,r,m"))]
@@ -8001,11 +8167,10 @@
   [(set_attr "type" "neon_store1_1reg<q>")]
 )
 
-(define_insn "*aarch64_be_mov<mode>"
+(define_insn "*aarch64_mov<mode>"
   [(set (match_operand:VSTRUCT_2D 0 "nonimmediate_operand")
 	(match_operand:VSTRUCT_2D 1 "general_operand"))]
   "TARGET_FLOAT
-   && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
    && (register_operand (operands[0], <MODE>mode)
        || register_operand (operands[1], <MODE>mode))"
   {@ [ cons: =0 , 1 ; attrs: type , length ]
@@ -8015,11 +8180,10 @@
   }
 )
 
-(define_insn "*aarch64_be_mov<mode>"
+(define_insn "*aarch64_mov<mode>"
   [(set (match_operand:VSTRUCT_2Q 0 "nonimmediate_operand")
 	(match_operand:VSTRUCT_2Q 1 "general_operand"))]
   "TARGET_FLOAT
-   && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
    && (register_operand (operands[0], <MODE>mode)
        || register_operand (operands[1], <MODE>mode))"
   {@ [ cons: =0 , 1 ; attrs: type , arch , length ]
@@ -8029,11 +8193,10 @@
   }
 )
 
-(define_insn "*aarch64_be_movoi"
+(define_insn "*aarch64_movoi"
   [(set (match_operand:OI 0 "nonimmediate_operand")
 	(match_operand:OI 1 "general_operand"))]
   "TARGET_FLOAT
-   && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
    && (register_operand (operands[0], OImode)
        || register_operand (operands[1], OImode))"
   {@ [ cons: =0 , 1 ; attrs: type , arch , length ]
@@ -8043,11 +8206,10 @@
   }
 )
 
-(define_insn "*aarch64_be_mov<mode>"
+(define_insn "*aarch64_mov<mode>"
   [(set (match_operand:VSTRUCT_3QD 0 "nonimmediate_operand" "=w,o,w")
 	(match_operand:VSTRUCT_3QD 1 "general_operand"      " w,w,o"))]
   "TARGET_FLOAT
-   && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
    && (register_operand (operands[0], <MODE>mode)
        || register_operand (operands[1], <MODE>mode))"
   "#"
@@ -8056,11 +8218,10 @@
    (set_attr "length" "12,8,8")]
 )
 
-(define_insn "*aarch64_be_movci"
+(define_insn "*aarch64_movci"
   [(set (match_operand:CI 0 "nonimmediate_operand" "=w,o,w")
 	(match_operand:CI 1 "general_operand"      " w,w,o"))]
   "TARGET_FLOAT
-   && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
    && (register_operand (operands[0], CImode)
        || register_operand (operands[1], CImode))"
   "#"
@@ -8069,11 +8230,10 @@
    (set_attr "length" "12,8,8")]
 )
 
-(define_insn "*aarch64_be_mov<mode>"
+(define_insn "*aarch64_mov<mode>"
   [(set (match_operand:VSTRUCT_4QD 0 "nonimmediate_operand" "=w,o,w")
 	(match_operand:VSTRUCT_4QD 1 "general_operand"      " w,w,o"))]
   "TARGET_FLOAT
-   && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
    && (register_operand (operands[0], <MODE>mode)
        || register_operand (operands[1], <MODE>mode))"
   "#"
@@ -8082,11 +8242,10 @@
    (set_attr "length" "16,8,8")]
 )
 
-(define_insn "*aarch64_be_movxi"
+(define_insn "*aarch64_movxi"
   [(set (match_operand:XI 0 "nonimmediate_operand" "=w,o,w")
 	(match_operand:XI 1 "general_operand"      " w,w,o"))]
   "TARGET_FLOAT
-   && (!TARGET_SIMD || BYTES_BIG_ENDIAN)
    && (register_operand (operands[0], XImode)
        || register_operand (operands[1], XImode))"
   "#"
@@ -8123,11 +8282,8 @@
 {
   if (register_operand (operands[0], <MODE>mode)
       && register_operand (operands[1], <MODE>mode))
-    {
-      aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 3);
-      DONE;
-    }
-  else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
+    aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 3);
+  else
     {
       int elt_size = GET_MODE_SIZE (<MODE>mode).to_constant () / <nregs>;
       machine_mode pair_mode = elt_size == 16 ? V2x16QImode : V2x8QImode;
@@ -8145,10 +8301,8 @@
 							operands[1],
 							<MODE>mode,
 							2 * elt_size)));
-      DONE;
     }
-  else
-    FAIL;
+  DONE;
 })
 
 (define_split
@@ -8159,11 +8313,8 @@
 {
   if (register_operand (operands[0], CImode)
       && register_operand (operands[1], CImode))
-    {
-      aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
-      DONE;
-    }
-  else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
+    aarch64_simd_emit_reg_reg_move (operands, TImode, 3);
+  else
     {
       emit_move_insn (simplify_gen_subreg (OImode, operands[0], CImode, 0),
 		      simplify_gen_subreg (OImode, operands[1], CImode, 0));
@@ -8173,10 +8324,8 @@
 		      gen_lowpart (V16QImode,
 				   simplify_gen_subreg (TImode, operands[1],
 							CImode, 32)));
-      DONE;
     }
-  else
-    FAIL;
+  DONE;
 })
 
 (define_split
@@ -8187,11 +8336,8 @@
 {
   if (register_operand (operands[0], <MODE>mode)
       && register_operand (operands[1], <MODE>mode))
-    {
-      aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 4);
-      DONE;
-    }
-  else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
+    aarch64_simd_emit_reg_reg_move (operands, <VSTRUCT_ELT>mode, 4);
+  else
     {
       int elt_size = GET_MODE_SIZE (<MODE>mode).to_constant () / <nregs>;
       machine_mode pair_mode = elt_size == 16 ? V2x16QImode : V2x8QImode;
@@ -8203,10 +8349,8 @@
 					   <MODE>mode, 2 * elt_size),
 		      simplify_gen_subreg (pair_mode, operands[1],
 					   <MODE>mode, 2 * elt_size));
-      DONE;
     }
-  else
-    FAIL;
+  DONE;
 })
 
 (define_split
@@ -8217,20 +8361,15 @@
 {
   if (register_operand (operands[0], XImode)
       && register_operand (operands[1], XImode))
-    {
-      aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
-      DONE;
-    }
-  else if (!TARGET_SIMD || BYTES_BIG_ENDIAN)
+    aarch64_simd_emit_reg_reg_move (operands, TImode, 4);
+  else
     {
       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 0),
 		      simplify_gen_subreg (OImode, operands[1], XImode, 0));
       emit_move_insn (simplify_gen_subreg (OImode, operands[0], XImode, 32),
 		      simplify_gen_subreg (OImode, operands[1], XImode, 32));
-      DONE;
     }
-  else
-    FAIL;
+  DONE;
 })
 
 (define_split
@@ -8346,7 +8485,7 @@
   [(set_attr "type" "neon_load1_4reg<q>")]
 )
 
-(define_expand "aarch64_ld<nregs><vstruct_elt>"
+(define_expand "@aarch64_ld<nregs><vstruct_elt>"
  [(match_operand:VSTRUCT_D 0 "register_operand")
   (match_operand:DI 1 "register_operand")]
   "TARGET_SIMD"
@@ -8356,7 +8495,7 @@
   DONE;
 })
 
-(define_expand "aarch64_ld1<VALL_F16:mode>"
+(define_expand "@aarch64_ld1<VALL_F16:mode>"
  [(match_operand:VALL_F16 0 "register_operand")
   (match_operand:DI 1 "register_operand")]
   "TARGET_SIMD"
@@ -8371,7 +8510,7 @@
   DONE;
 })
 
-(define_expand "aarch64_ld<nregs><vstruct_elt>"
+(define_expand "@aarch64_ld<nregs><vstruct_elt>"
  [(match_operand:VSTRUCT_Q 0 "register_operand")
   (match_operand:DI 1 "register_operand")]
   "TARGET_SIMD"
@@ -8381,7 +8520,7 @@
   DONE;
 })
 
-(define_expand "aarch64_ld1x2<vstruct_elt>"
+(define_expand "@aarch64_ld1x2<vstruct_elt>"
  [(match_operand:VSTRUCT_2QD 0 "register_operand")
   (match_operand:DI 1 "register_operand")]
   "TARGET_SIMD"
@@ -8393,7 +8532,7 @@
   DONE;
 })
 
-(define_expand "aarch64_ld<nregs>_lane<vstruct_elt>"
+(define_expand "@aarch64_ld<nregs>_lane<vstruct_elt>"
   [(match_operand:VSTRUCT_QD 0 "register_operand")
 	(match_operand:DI 1 "register_operand")
 	(match_operand:VSTRUCT_QD 2 "register_operand")
@@ -8537,7 +8676,7 @@
 ;; This instruction's pattern is generated directly by
 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
 ;; need corresponding changes there.
-(define_insn "aarch64_<PERMUTE:perm_insn><mode><vczle><vczbe>"
+(define_insn "@aarch64_<PERMUTE:perm_insn><mode><vczle><vczbe>"
   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
 	(unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
 			  (match_operand:VALL_F16 2 "register_operand" "w")]
@@ -8563,7 +8702,7 @@
 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
 ;; need corresponding changes there.  Note that the immediate (third)
 ;; operand is a lane index not a byte index.
-(define_insn "aarch64_ext<mode>"
+(define_insn "@aarch64_ext<mode>"
   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
         (unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")
 			  (match_operand:VALL_F16 2 "register_operand" "w")
@@ -8581,7 +8720,7 @@
 ;; This instruction's pattern is generated directly by
 ;; aarch64_expand_vec_perm_const, so any changes to the pattern would
 ;; need corresponding changes there.
-(define_insn "aarch64_rev<REVERSE:rev_op><mode><vczle><vczbe>"
+(define_insn "@aarch64_rev<REVERSE:rev_op><mode><vczle><vczbe>"
   [(set (match_operand:VALL_F16 0 "register_operand" "=w")
 	(unspec:VALL_F16 [(match_operand:VALL_F16 1 "register_operand" "w")]
                     REVERSE))]
@@ -8650,7 +8789,7 @@
   [(set_attr "type" "neon_store1_4reg")]
 )
 
-(define_expand "aarch64_st<nregs><vstruct_elt>"
+(define_expand "@aarch64_st<nregs><vstruct_elt>"
  [(match_operand:DI 0 "register_operand")
   (match_operand:VSTRUCT_D 1 "register_operand")]
   "TARGET_SIMD"
@@ -8660,7 +8799,7 @@
   DONE;
 })
 
-(define_expand "aarch64_st<nregs><vstruct_elt>"
+(define_expand "@aarch64_st<nregs><vstruct_elt>"
  [(match_operand:DI 0 "register_operand")
   (match_operand:VSTRUCT_Q 1 "register_operand")]
   "TARGET_SIMD"
@@ -8670,7 +8809,7 @@
   DONE;
 })
 
-(define_expand "aarch64_st<nregs>_lane<vstruct_elt>"
+(define_expand "@aarch64_st<nregs>_lane<vstruct_elt>"
  [(match_operand:DI 0 "register_operand")
   (match_operand:VSTRUCT_QD 1 "register_operand")
   (match_operand:SI 2 "immediate_operand")]
@@ -8686,7 +8825,7 @@
   DONE;
 })
 
-(define_expand "aarch64_st1<VALL_F16:mode>"
+(define_expand "@aarch64_st1<VALL_F16:mode>"
  [(match_operand:DI 0 "register_operand")
   (match_operand:VALL_F16 1 "register_operand")]
   "TARGET_SIMD"
@@ -8792,6 +8931,26 @@
     DONE;
 })
 
+(define_expand "vec_extract<mode><Vel>"
+  [(match_operand:<VEL> 0 "aarch64_simd_nonimmediate_operand")
+   (match_operand:VSTRUCT_QD 1 "register_operand")
+   (match_operand:SI 2 "immediate_operand")]
+  "TARGET_SIMD"
+{
+  aarch64_decompose_vec_struct_index (<VSTRUCT_ELT>mode, &operands[1],
+				      &operands[2], false);
+  /* For tuples of 64-bit modes, <vstruct_elt> is the 64-bit scalar mode.
+     Allow gen_vec_extract<vstruct_elt><Vel> to cope with those cases too.  */
+  auto gen_vec_extractdidi ATTRIBUTE_UNUSED = [](rtx x0, rtx x1, rtx)
+    {
+      return gen_move_insn (x0, x1);
+    };
+  auto gen_vec_extractdfdf ATTRIBUTE_UNUSED = gen_vec_extractdidi;
+  emit_insn (gen_vec_extract<vstruct_elt><Vel> (operands[0], operands[1],
+						operands[2]));
+  DONE;
+})
+
 ;; Extract a 64-bit vector from one half of a 128-bit vector.
 (define_expand "vec_extract<mode><Vhalf>"
   [(match_operand:<VHALF> 0 "register_operand")
@@ -9021,17 +9180,42 @@
 ;; sha3
 
 (define_insn "eor3q<mode>4"
-  [(set (match_operand:VQ_I 0 "register_operand" "=w")
-	(xor:VQ_I
-	 (xor:VQ_I
-	  (match_operand:VQ_I 2 "register_operand" "w")
-	  (match_operand:VQ_I 3 "register_operand" "w"))
-	 (match_operand:VQ_I 1 "register_operand" "w")))]
+  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
+	(xor:VDQ_I
+	 (xor:VDQ_I
+	  (match_operand:VDQ_I 2 "register_operand" "w")
+	  (match_operand:VDQ_I 3 "register_operand" "w"))
+	 (match_operand:VDQ_I 1 "register_operand" "w")))]
   "TARGET_SHA3"
   "eor3\\t%0.16b, %1.16b, %2.16b, %3.16b"
   [(set_attr "type" "crypto_sha3")]
 )
 
+(define_insn_and_split "*eor3qdi4"
+  [(set (match_operand:DI 0 "register_operand")
+	(xor:DI
+	 (xor:DI
+	  (match_operand:DI 2 "register_operand")
+	  (match_operand:DI 3 "register_operand"))
+	 (match_operand:DI 1 "register_operand")))]
+  "TARGET_SHA3"
+  {@ [ cons: =0, 1, 2 , 3  ; attrs: type ]
+     [ w       , w, w , w  ; crypto_sha3 ] eor3\t%0.16b, %1.16b, %2.16b, %3.16b
+     [ &r      , r, r0, r0 ; multiple    ] #
+  }
+  "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
+  [(set (match_dup 4) (xor:DI (match_dup 2) (match_dup 3)))
+   (set (match_dup 0) (xor:DI (match_dup 4) (match_dup 1)))]
+  {
+    if (reload_completed)
+      operands[4] = operands[0];
+    else if (can_create_pseudo_p ())
+      operands[4] = gen_reg_rtx (DImode);
+    else
+      FAIL;
+  }
+)
+
 (define_insn "aarch64_rax1qv2di"
   [(set (match_operand:V2DI 0 "register_operand" "=w")
 	(xor:V2DI
@@ -9044,30 +9228,84 @@
   [(set_attr "type" "crypto_sha3")]
 )
 
-(define_insn "aarch64_xarqv2di"
+(define_insn "*aarch64_xarqv2di_insn"
   [(set (match_operand:V2DI 0 "register_operand" "=w")
-	(rotatert:V2DI
+	(rotate:V2DI
 	 (xor:V2DI
 	  (match_operand:V2DI 1 "register_operand" "%w")
 	  (match_operand:V2DI 2 "register_operand" "w"))
-	 (match_operand:SI 3 "aarch64_simd_shift_imm_di" "Usd")))]
+	 (match_operand:V2DI 3 "aarch64_simd_lshift_imm" "Dl")))]
   "TARGET_SHA3"
-  "xar\\t%0.2d, %1.2d, %2.2d, %3"
+  {
+    operands[3]
+      = GEN_INT (64 - INTVAL (unwrap_const_vec_duplicate (operands[3])));
+    return "xar\\t%0.2d, %1.2d, %2.2d, %3";
+  }
   [(set_attr "type" "crypto_sha3")]
 )
 
+;; The semantics of the vxarq_u64 intrinsics treat the immediate argument as a
+;; right-rotate amount but the recommended representation of rotates by a
+;; constant in RTL is with the left ROTATE code.  Translate between the
+;; intrinsic-provided amount and the RTL operands in the expander here.
+;; The define_insn for XAR will translate back to instruction semantics in its
+;; output logic.
+(define_expand "aarch64_xarqv2di"
+  [(set (match_operand:V2DI 0 "register_operand")
+	(rotate:V2DI
+	 (xor:V2DI
+	  (match_operand:V2DI 1 "register_operand")
+	  (match_operand:V2DI 2 "register_operand"))
+	 (match_operand:SI 3 "aarch64_simd_shift_imm_di")))]
+  "TARGET_SHA3"
+  {
+    operands[3]
+      = aarch64_simd_gen_const_vector_dup (V2DImode,
+					   64 - INTVAL (operands[3]));
+  }
+)
+
 (define_insn "bcaxq<mode>4"
-  [(set (match_operand:VQ_I 0 "register_operand" "=w")
-	(xor:VQ_I
-	 (and:VQ_I
-	  (not:VQ_I (match_operand:VQ_I 3 "register_operand" "w"))
-	  (match_operand:VQ_I 2 "register_operand" "w"))
-	 (match_operand:VQ_I 1 "register_operand" "w")))]
+  [(set (match_operand:VDQ_I 0 "register_operand" "=w")
+	(xor:VDQ_I
+	 (and:VDQ_I
+	  (not:VDQ_I (match_operand:VDQ_I 3 "register_operand" "w"))
+	  (match_operand:VDQ_I 2 "register_operand" "w"))
+	 (match_operand:VDQ_I 1 "register_operand" "w")))]
   "TARGET_SHA3"
   "bcax\\t%0.16b, %1.16b, %2.16b, %3.16b"
   [(set_attr "type" "crypto_sha3")]
 )
 
+(define_insn_and_split "*bcaxqdi4"
+  [(set (match_operand:DI 0 "register_operand")
+	(xor:DI
+	  (and:DI
+	    (not:DI (match_operand:DI 3 "register_operand"))
+	    (match_operand:DI 2 "register_operand"))
+	  (match_operand:DI 1 "register_operand")))]
+  "TARGET_SHA3"
+  {@ [ cons: =0, 1, 2 , 3  ; attrs: type ]
+     [ w       , w, w , w  ; crypto_sha3 ] bcax\t%0.16b, %1.16b, %2.16b, %3.16b
+     [ &r      , r, r0, r0 ; multiple    ] #
+  }
+  "&& REG_P (operands[0]) && GP_REGNUM_P (REGNO (operands[0]))"
+  [(set (match_dup 4)
+	(and:DI (not:DI (match_dup 3))
+		(match_dup 2)))
+   (set (match_dup 0)
+	(xor:DI (match_dup 4)
+		(match_dup 1)))]
+  {
+    if (reload_completed)
+      operands[4] = operands[0];
+    else if (can_create_pseudo_p ())
+      operands[4] = gen_reg_rtx (DImode);
+    else
+      FAIL;
+  }
+)
+
 ;; SM3
 
 (define_insn "aarch64_sm3ss1qv4si"
@@ -9910,3 +10148,210 @@
   "shl\\t%d0, %d1, #16"
   [(set_attr "type" "neon_shift_imm")]
 )
+
+;; faminmax
+(define_insn "@aarch64_<faminmax_uns_op><mode>"
+  [(set (match_operand:VHSDF 0 "register_operand" "=w")
+	(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
+		       (match_operand:VHSDF 2 "register_operand" "w")]
+		      FAMINMAX_UNS))]
+  "TARGET_FAMINMAX"
+  "<faminmax_uns_op>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+)
+
+(define_insn "*aarch64_faminmax_fused"
+  [(set (match_operand:VHSDF 0 "register_operand" "=w")
+	(FMAXMIN:VHSDF
+	  (abs:VHSDF (match_operand:VHSDF 1 "register_operand" "w"))
+	  (abs:VHSDF (match_operand:VHSDF 2 "register_operand" "w"))))]
+  "TARGET_FAMINMAX"
+  "<faminmax_op>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+)
+
+(define_insn "@aarch64_lut<VLUT:mode><VB:mode>"
+  [(set (match_operand:<VLUT:VCONQ> 0 "register_operand" "=w")
+        (unspec:<VLUT:VCONQ>
+	 [(match_operand:VLUT 1 "register_operand" "w")
+          (match_operand:VB 2 "register_operand" "w")
+          (match_operand:SI 3 "const_int_operand")
+	  (match_operand:SI 4 "const_int_operand")]
+         UNSPEC_LUTI))]
+  "TARGET_LUT && INTVAL (operands[4]) <= exact_log2 (<VLUT:nunits>)"
+  "luti%4\t%0<VLUT:Vconqtype>, {%1<VLUT:Vconqtype>}, %2[%3]"
+)
+
+;; lutx2
+(define_insn "@aarch64_lut<VLUTx2:mode><VB:mode>"
+  [(set (match_operand:<VSTRUCT_ELT> 0 "register_operand" "=w")
+        (unspec:<VSTRUCT_ELT>
+	 [(match_operand:VLUTx2 1 "register_operand" "w")
+          (match_operand:VB 2 "register_operand" "w")
+          (match_operand:SI 3 "const_int_operand")
+	  (match_operand:SI 4 "const_int_operand")]
+         UNSPEC_LUTI))]
+  "TARGET_LUT && INTVAL (operands[4]) == 4"
+  "luti%4\t%0.8h, {%S1.8h, %T1.8h}, %2[%3]"
+)
+
+;; fpm unary instructions (low part).
+(define_insn "@aarch64_<insn><mode>"
+  [(set (match_operand:VQ_BHF 0 "register_operand" "=w")
+	(unspec:VQ_BHF
+	 [(match_operand:V8QI 1 "register_operand" "w")
+	  (reg:DI FPM_REGNUM)]
+	FPM_UNARY_UNS))]
+  "TARGET_FP8"
+  "<b><insn>\t%0.<Vtype>, %1.8b"
+)
+
+;; fpm unary instructions (high part).
+(define_insn "@aarch64_<insn><mode>_high"
+  [(set (match_operand:VQ_BHF 0 "register_operand" "=w")
+	(unspec:VQ_BHF
+	 [(vec_select:V8QI
+	    (match_operand:V16QI 1 "register_operand" "w")
+	    (match_operand:V16QI 2 "vect_par_cnst_hi_half"))
+	  (reg:DI FPM_REGNUM)]
+	FPM_UNARY_UNS))]
+  "TARGET_FP8"
+  "<b><insn>2\t%0.<Vtype>, %1.16b"
+)
+
+;; fpm binary instructions.
+(define_insn "@aarch64_<insn><mode>"
+  [(set (match_operand:<VPACKB> 0 "register_operand" "=w")
+	(unspec:<VPACKB>
+	 [(match_operand:VCVTFPM 1 "register_operand" "w")
+	  (match_operand:VCVTFPM 2 "register_operand" "w")
+	  (reg:DI FPM_REGNUM)]
+	 FPM_BINARY_UNS))]
+  "TARGET_FP8"
+  "<insn>\t%0.<VPACKBtype>, %1.<Vtype>, %2.<Vtype>"
+)
+
+;; fpm binary instructions & merge with low.
+(define_insn "@aarch64_<insn><mode>_high_le"
+  [(set (match_operand:V16QI 0 "register_operand" "=w")
+	(vec_concat:V16QI
+	  (match_operand:V8QI 1 "register_operand" "0")
+	  (unspec:V8QI
+	    [(match_operand:V4SF_ONLY 2 "register_operand" "w")
+	     (match_operand:V4SF_ONLY 3 "register_operand" "w")
+	     (reg:DI FPM_REGNUM)]
+	    FPM_BINARY_UNS)))]
+  "TARGET_FP8 && !BYTES_BIG_ENDIAN"
+  "<insn>2\t%1.16b, %2.<V4SF_ONLY:Vtype>, %3.<V4SF_ONLY:Vtype>";
+)
+
+(define_insn "@aarch64_<insn><mode>_high_be"
+  [(set (match_operand:V16QI 0 "register_operand" "=w")
+	(vec_concat:V16QI
+	  (unspec:V8QI
+	    [(match_operand:V4SF_ONLY 2 "register_operand" "w")
+	     (match_operand:V4SF_ONLY 3 "register_operand" "w")
+	     (reg:DI FPM_REGNUM)]
+	    FPM_BINARY_UNS)
+	  (match_operand:V8QI 1 "register_operand" "0")))]
+  "TARGET_FP8 && BYTES_BIG_ENDIAN"
+  "<insn>2\t%1.16b, %2.<V4SF_ONLY:Vtype>, %3.<V4SF_ONLY:Vtype>";
+)
+
+;; fscale instructions
+(define_insn "@aarch64_<insn><mode>"
+  [(set (match_operand:VHSDF 0 "register_operand" "=w")
+	(unspec:VHSDF [(match_operand:VHSDF 1 "register_operand" "w")
+		       (match_operand:<FCVT_TARGET> 2 "register_operand" "w")]
+		      FSCALE_UNS))]
+  "TARGET_FP8"
+  "<insn>\t%0.<Vtype>, %1.<Vtype>, %2.<Vtype>"
+)
+
+;; fpm vdot instructions.  The target requirements are enforced by
+;; VDQ_HSF_FDOT.
+(define_insn "@aarch64_<insn><mode>"
+  [(set (match_operand:VDQ_HSF_FDOT 0 "register_operand" "=w")
+	(unspec:VDQ_HSF_FDOT
+	 [(match_operand:VDQ_HSF_FDOT 1 "register_operand" "0")
+	  (match_operand:<VNARROWB> 2 "register_operand" "w")
+	  (match_operand:<VNARROWB> 3 "register_operand" "w")
+	  (reg:DI FPM_REGNUM)]
+	 FPM_FDOT))]
+  ""
+  "<insn>\t%1.<Vtype>, %2.<Vnbtype>, %3.<Vnbtype>"
+)
+
+(define_insn "@aarch64_<insn>_lane<VDQ_HSF_FDOT:mode><VB:mode>"
+  [(set (match_operand:VDQ_HSF_FDOT 0 "register_operand" "=w")
+	(unspec:VDQ_HSF_FDOT
+	 [(match_operand:VDQ_HSF_FDOT 1 "register_operand" "0")
+	  (match_operand:<VDQ_HSF_FDOT:VNARROWB> 2 "register_operand" "w")
+	  (match_operand:VB 3 "register_operand" "w")
+	  (match_operand 4 "const_int_operand")
+	  (reg:DI FPM_REGNUM)]
+	 FPM_FDOT_LANE))]
+  ""
+  "<insn>\t%1.<VDQ_HSF_FDOT:Vtype>, %2.<VDQ_HSF_FDOT:Vnbtype>, %3.<VDQ_HSF_FDOT:Vnbsubtype>[%4]"
+)
+
+;; fpm fma instructions.
+(define_insn "@aarch64_<insn><mode>"
+  [(set (match_operand:V8HF_ONLY 0 "register_operand" "=w")
+	(unspec:V8HF_ONLY
+	 [(match_operand:V8HF_ONLY 1 "register_operand" "0")
+	  (match_operand:V16QI 2 "register_operand" "w")
+	  (match_operand:V16QI 3 "register_operand" "w")
+	  (reg:DI FPM_REGNUM)]
+	FMLAL_FP8_HF))]
+  "TARGET_FP8FMA"
+  "<insn>\t%0.<Vtype>, %2.16b, %3.16b"
+)
+
+(define_insn "@aarch64_<insn>_lane<V8HF_ONLY:mode><VB:mode>"
+  [(set (match_operand:V8HF_ONLY 0 "register_operand" "=w")
+	(unspec:V8HF_ONLY
+	 [(match_operand:V8HF_ONLY 1 "register_operand" "0")
+	  (match_operand:V16QI 2 "register_operand" "w")
+	  (vec_duplicate:V16QI
+	    (vec_select:QI
+	      (match_operand:VB 3 "register_operand" "w")
+	      (parallel [(match_operand:SI 4 "immediate_operand")])))
+	  (reg:DI FPM_REGNUM)]
+	FMLAL_FP8_HF))]
+  "TARGET_FP8FMA"
+  {
+    operands[4] = aarch64_endian_lane_rtx (<VB:MODE>mode,
+					   INTVAL (operands[4]));
+    return "<insn>\t%0.<V8HF_ONLY:Vtype>, %2.16b, %3.b[%4]";
+  }
+)
+
+(define_insn "@aarch64_<insn><mode>"
+  [(set (match_operand:V4SF_ONLY 0 "register_operand" "=w")
+	(unspec:V4SF_ONLY
+	 [(match_operand:V4SF_ONLY 1 "register_operand" "0")
+	  (match_operand:V16QI 2 "register_operand" "w")
+	  (match_operand:V16QI 3 "register_operand" "w")
+	  (reg:DI FPM_REGNUM)]
+	FMLALL_FP8_SF))]
+  "TARGET_FP8FMA"
+  "<insn>\t%0.<Vtype>, %2.16b, %3.16b"
+)
+
+(define_insn "@aarch64_<insn>_lane<V4SF_ONLY:mode><VB:mode>"
+  [(set (match_operand:V4SF_ONLY 0 "register_operand" "=w")
+	(unspec:V4SF_ONLY
+	 [(match_operand:V4SF_ONLY 1 "register_operand" "0")
+	  (match_operand:V16QI 2 "register_operand" "w")
+	  (vec_duplicate:V16QI
+	    (vec_select:QI
+	      (match_operand:VB 3 "register_operand" "w")
+	      (parallel [(match_operand:SI 4 "immediate_operand")])))
+	  (reg:DI FPM_REGNUM)]
+	FMLALL_FP8_SF))]
+  "TARGET_FP8FMA"
+  {
+    operands[4] = aarch64_endian_lane_rtx (<VB:MODE>mode,
+					   INTVAL (operands[4]));
+    return "<insn>\t%0.<V4SF_ONLY:Vtype>, %2.16b, %3.b[%4]";
+  }
+)
diff --git a/gcc/config/aarch64/aarch64-sme.md b/gcc/config/aarch64/aarch64-sme.md
index 78ad2fc..b8bb4cc 100644
--- a/gcc/config/aarch64/aarch64-sme.md
+++ b/gcc/config/aarch64/aarch64-sme.md
@@ -1,5 +1,5 @@
 ;; Machine description for AArch64 SME.
-;; Copyright (C) 2023-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2023-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
@@ -373,6 +373,8 @@
 		    (reg:DI SME_STATE_REGNUM)
 		    (reg:DI TPIDR2_SETUP_REGNUM)
 		    (reg:DI ZA_SAVED_REGNUM)] UNSPEC_RESTORE_ZA))
+   (set (reg:DI SME_STATE_REGNUM)
+	(unspec:DI [(reg:DI SME_STATE_REGNUM)] UNSPEC_TPIDR2_RESTORE))
    (clobber (reg:DI R0_REGNUM))
    (clobber (reg:DI R14_REGNUM))
    (clobber (reg:DI R15_REGNUM))
@@ -389,7 +391,7 @@
     auto label = gen_label_rtx ();
     auto tpidr2 = gen_rtx_REG (DImode, R16_REGNUM);
     emit_insn (gen_aarch64_read_tpidr2 (tpidr2));
-    auto jump = emit_likely_jump_insn (gen_aarch64_cbnedi1 (tpidr2, label));
+    auto jump = emit_likely_jump_insn (gen_aarch64_cbznedi1 (tpidr2, label));
     JUMP_LABEL (jump) = label;
 
     aarch64_restore_za (operands[0]);
@@ -481,7 +483,7 @@
 	   (match_operand:<VPRED> 2 "register_operand" "Upl")
 	   (match_operand:SME_ZA_I 3 "aarch64_sve_ldff1_operand" "Utf")]
 	  SME_LD1))]
-  "TARGET_STREAMING_SME"
+  "TARGET_STREAMING"
   "ld1<Vesize>\t{ za%0<hv>.<Vetype>[%w1, 0] }, %2/z, %3"
 )
 
@@ -496,7 +498,7 @@
 	   (match_operand:<VPRED> 3 "register_operand" "Upl")
 	   (match_operand:SME_ZA_I 4 "aarch64_sve_ldff1_operand" "Utf")]
 	  SME_LD1))]
-  "TARGET_STREAMING_SME
+  "TARGET_STREAMING
    && UINTVAL (operands[2]) < 128 / <elem_bits>"
   "ld1<Vesize>\t{ za%0<hv>.<Vetype>[%w1, %2] }, %3/z, %4"
 )
@@ -533,7 +535,7 @@
 ;; ---- Table loads
 ;; -------------------------------------------------------------------------
 ;; Includes:
-;; - LDR
+;; - LDR (SME2)
 ;; -------------------------------------------------------------------------
 
 (define_c_enum "unspec" [
@@ -583,7 +585,7 @@
 	   (match_operand:SI 2 "register_operand" "Ucj")
 	   (match_operand:<VPRED> 3 "register_operand" "Upl")]
 	  SME_ST1))]
-  "TARGET_STREAMING_SME"
+  "TARGET_STREAMING"
   "st1<Vesize>\t{ za%1<hv>.<Vetype>[%w2, 0] }, %3, %0"
 )
 
@@ -598,7 +600,7 @@
 		    (match_operand:SI 3 "const_int_operand"))
 	   (match_operand:<VPRED> 4 "register_operand" "Upl")]
 	  SME_ST1))]
-  "TARGET_STREAMING_SME
+  "TARGET_STREAMING
    && UINTVAL (operands[3]) < 128 / <elem_bits>"
   "st1<Vesize>\t{ za%1<hv>.<Vetype>[%w2, %3] }, %4, %0"
 )
@@ -635,7 +637,7 @@
 ;; ---- Table stores
 ;; -------------------------------------------------------------------------
 ;; Includes:
-;; - STR
+;; - STR (SME2)
 ;; -------------------------------------------------------------------------
 
 (define_insn "aarch64_sme_str_zt0"
@@ -651,6 +653,7 @@
 ;; -------------------------------------------------------------------------
 ;; Includes:
 ;; - MOVA
+;; - MOVAZ (SME2p1)
 ;; -------------------------------------------------------------------------
 
 (define_insn "@aarch64_sme_<optab><v_int_container><mode>"
@@ -662,8 +665,8 @@
 	   (match_operand:<VPRED> 2 "register_operand" "Upl")
 	   (match_operand:DI 3 "const_int_operand")
 	   (match_operand:SI 4 "register_operand" "Ucj")]
-	  SME_READ))]
-  "TARGET_STREAMING_SME"
+	  SME_READ_HV))]
+  "TARGET_STREAMING"
   "mova\t%0.<Vetype>, %2/m, za%3<hv>.<Vetype>[%w4, 0]"
 )
 
@@ -677,8 +680,8 @@
 	   (match_operand:DI 3 "const_int_operand")
 	   (plus:SI (match_operand:SI 4 "register_operand" "Ucj")
 		    (match_operand:SI 5 "const_int_operand"))]
-	  SME_READ))]
-  "TARGET_STREAMING_SME
+	  SME_READ_HV))]
+  "TARGET_STREAMING
    && UINTVAL (operands[5]) < 128 / <elem_bits>"
   "mova\t%0.<Vetype>, %2/m, za%3<hv>.<Vetype>[%w4, %5]"
 )
@@ -692,12 +695,78 @@
 	   (match_operand:VNx2BI 2 "register_operand" "Upl")
 	   (match_operand:DI 3 "const_int_operand")
 	   (match_operand:SI 4 "register_operand" "Ucj")]
-	  SME_READ))]
-  "TARGET_STREAMING_SME"
+	  SME_READ_HV))]
+  "TARGET_STREAMING"
   "mova\t%0.q, %2/m, za%3<hv>.q[%w4, 0]"
 )
 
 (define_insn "@aarch64_sme_<optab><v_int_container><mode>"
+  [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
+	(unspec:SVE_FULL
+	  [(reg:<V_INT_CONTAINER> ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (match_operand:DI 1 "const_int_operand")
+	   (match_operand:SI 2 "register_operand" "Ucj")
+	   (const_int 0)]
+	  SME_READZ_HV))
+   (set (reg:<V_INT_CONTAINER> ZA_REGNUM)
+	(unspec:<V_INT_CONTAINER>
+	  [(reg:<V_INT_CONTAINER> ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (match_dup 1)
+	   (match_dup 2)
+	   (const_int 1)]
+	  SME_READZ_HV))]
+  "TARGET_STREAMING_SME2p1"
+  "movaz\t%0.<Vetype>, za%1<hv>.<Vetype>[%w2, 0]"
+)
+
+(define_insn "*aarch64_sme_<optab><v_int_container><mode>_plus"
+  [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
+	(unspec:SVE_FULL
+	  [(reg:<V_INT_CONTAINER> ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (match_operand:DI 1 "const_int_operand")
+	   (plus:SI (match_operand:SI 2 "register_operand" "Ucj")
+		    (match_operand:SI 3 "const_int_operand"))
+	   (const_int 0)]
+	  SME_READZ_HV))
+   (set (reg:<V_INT_CONTAINER> ZA_REGNUM)
+	(unspec:<V_INT_CONTAINER>
+	  [(reg:<V_INT_CONTAINER> ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (match_dup 1)
+	   (plus:SI (match_dup 2)
+		    (match_dup 3))
+	   (const_int 1)]
+	  SME_READZ_HV))]
+  "TARGET_STREAMING_SME2p1
+   && UINTVAL (operands[3]) < 128 / <elem_bits>"
+  "movaz\t%0.<Vetype>, za%1<hv>.<Vetype>[%w2, %3]"
+)
+
+(define_insn "@aarch64_sme_<optab><VNx1TI_ONLY:mode><SVE_FULL:mode>"
+  [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
+	(unspec:SVE_FULL
+	  [(reg:VNx1TI_ONLY ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (match_operand:DI 1 "const_int_operand")
+	   (match_operand:SI 2 "register_operand" "Ucj")
+	   (const_int 0)]
+	  SME_READZ_HV))
+   (set (reg:VNx1TI_ONLY ZA_REGNUM)
+	(unspec:VNx1TI_ONLY
+	  [(reg:VNx1TI_ONLY ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (match_dup 1)
+	   (match_dup 2)
+	   (const_int 0)]
+	  SME_READZ_HV))]
+  "TARGET_STREAMING_SME2p1"
+  "movaz\t%0.q, za%1<hv>.q[%w2, 0]"
+)
+
+(define_insn "@aarch64_sme_<optab><v_int_container><mode>"
   [(set (reg:<V_INT_CONTAINER> ZA_REGNUM)
 	(unspec:<V_INT_CONTAINER>
 	  [(reg:SVE_FULL ZA_REGNUM)
@@ -706,8 +775,8 @@
 	   (match_operand:SI 1 "register_operand" "Ucj")
 	   (match_operand:<VPRED> 2 "register_operand" "Upl")
 	   (match_operand:SVE_FULL 3 "register_operand" "w")]
-	  SME_WRITE))]
-  "TARGET_STREAMING_SME"
+	  SME_WRITE_HV))]
+  "TARGET_STREAMING"
   "mova\tza%0<hv>.<Vetype>[%w1, 0], %2/m, %3.<Vetype>"
 )
 
@@ -721,8 +790,8 @@
 		    (match_operand:SI 2 "const_int_operand"))
 	   (match_operand:<VPRED> 3 "register_operand" "Upl")
 	   (match_operand:SVE_FULL 4 "register_operand" "w")]
-	  SME_WRITE))]
-  "TARGET_STREAMING_SME
+	  SME_WRITE_HV))]
+  "TARGET_STREAMING
    && UINTVAL (operands[2]) < 128 / <elem_bits>"
   "mova\tza%0<hv>.<Vetype>[%w1, %2], %3/m, %4.<Vetype>"
 )
@@ -736,8 +805,8 @@
 	   (match_operand:SI 1 "register_operand" "Ucj")
 	   (match_operand:VNx2BI 2 "register_operand" "Upl")
 	   (match_operand:SVE_FULL 3 "register_operand" "w")]
-	  SME_WRITE))]
-  "TARGET_STREAMING_SME"
+	  SME_WRITE_HV))]
+  "TARGET_STREAMING"
   "mova\tza%0<hv>.q[%w1, 0], %2/m, %3.q"
 )
 
@@ -746,6 +815,7 @@
 ;; -------------------------------------------------------------------------
 ;; Includes:
 ;; - MOVA
+;; - MOVAZ (SME2p1)
 ;; -------------------------------------------------------------------------
 
 (define_insn "@aarch64_sme_<optab><mode><mode>"
@@ -755,7 +825,7 @@
 	   (reg:DI SME_STATE_REGNUM)
 	   (match_operand:DI 1 "const_int_operand")
 	   (match_operand:SI 2 "register_operand" "Ucj")]
-	  SME_READ))]
+	  SME_READ_HV))]
   "TARGET_STREAMING_SME2"
   {
     operands[3] = GEN_INT (<vector_count> - 1);
@@ -772,7 +842,7 @@
 	   (plus:SI
 	     (match_operand:SI 2 "register_operand" "Ucj")
 	     (match_operand:SI 3 "const_int_operand"))]
-	  SME_READ))]
+	  SME_READ_HV))]
   "TARGET_STREAMING_SME2
    && UINTVAL (operands[3]) % <vector_count> == 0
    && UINTVAL (operands[3]) < 128 / <elem_bits>"
@@ -782,6 +852,60 @@
   }
 )
 
+(define_insn "@aarch64_sme_<optab><mode><mode>"
+  [(set (match_operand:SVE_FULLx24 0 "aligned_register_operand" "=Uw<vector_count>")
+	(unspec:SVE_FULLx24
+	  [(reg:SVE_FULLx24 ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (match_operand:DI 1 "const_int_operand")
+	   (match_operand:SI 2 "register_operand" "Ucj")
+	   (const_int 0)]
+	  SME_READZ_HV))
+   (set (reg:SVE_FULLx24 ZA_REGNUM)
+	(unspec:SVE_FULLx24
+	  [(reg:SVE_FULLx24 ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (match_dup 1)
+	   (match_dup 2)
+	   (const_int 1)]
+	  SME_READZ_HV))]
+  "TARGET_STREAMING_SME2p1"
+  {
+    operands[3] = GEN_INT (<vector_count> - 1);
+    return "movaz\t%0, za%1<hv>.<Vetype>[%w2, 0:%3]";
+  }
+)
+
+(define_insn "*aarch64_sme_<optab><mode><mode>_plus"
+  [(set (match_operand:SVE_FULLx24 0 "aligned_register_operand" "=Uw<vector_count>")
+	(unspec:SVE_FULLx24
+	  [(reg:SVE_FULLx24 ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (match_operand:DI 1 "const_int_operand")
+	   (plus:SI
+	     (match_operand:SI 2 "register_operand" "Ucj")
+	     (match_operand:SI 3 "const_int_operand"))
+	   (const_int 0)]
+	  SME_READZ_HV))
+   (set (reg:SVE_FULLx24 ZA_REGNUM)
+	(unspec:SVE_FULLx24
+	  [(reg:SVE_FULLx24 ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (match_dup 1)
+	   (plus:SI
+	     (match_dup 2)
+	     (match_dup 3))
+	   (const_int 1)]
+	  SME_READZ_HV))]
+  "TARGET_STREAMING_SME2p1
+   && UINTVAL (operands[3]) % <vector_count> == 0
+   && UINTVAL (operands[3]) < 128 / <elem_bits>"
+  {
+    operands[4] = GEN_INT (INTVAL (operands[3]) + <vector_count> - 1);
+    return "movaz\t%0, za%1<hv>.<Vetype>[%w2, %3:%4]";
+  }
+)
+
 (define_insn "@aarch64_sme_read<mode>"
   [(set (match_operand:SVE_DIx24 0 "aligned_register_operand" "=Uw<vector_count>")
 	(unspec:SVE_DIx24
@@ -805,6 +929,46 @@
   "mova\t%0, za.d[%w1, %2, vgx<vector_count>]"
 )
 
+(define_insn "@aarch64_sme_readz<mode>"
+  [(set (match_operand:SVE_DIx24 0 "aligned_register_operand" "=Uw<vector_count>")
+	(unspec:SVE_DIx24
+	  [(reg:SVE_DIx24 ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (match_operand:SI 1 "register_operand" "Uci")
+	   (const_int 0)]
+	  UNSPEC_SME_READZ))
+   (set (reg:SVE_DIx24 ZA_REGNUM)
+	(unspec:SVE_DIx24
+	  [(reg:SVE_DIx24 ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (match_dup 1)
+	   (const_int 1)]
+	  UNSPEC_SME_READZ))]
+  "TARGET_STREAMING_SME2p1"
+  "movaz\t%0, za.d[%w1, 0, vgx<vector_count>]"
+)
+
+(define_insn "*aarch64_sme_readz<mode>_plus"
+  [(set (match_operand:SVE_DIx24 0 "aligned_register_operand" "=Uw<vector_count>")
+	(unspec:SVE_DIx24
+	  [(reg:SVE_DIx24 ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (plus:SI (match_operand:SI 1 "register_operand" "Uci")
+		    (match_operand:SI 2 "const_0_to_7_operand"))
+	   (const_int 0)]
+	  UNSPEC_SME_READZ))
+   (set (reg:SVE_DIx24 ZA_REGNUM)
+	(unspec:SVE_DIx24
+	  [(reg:SVE_DIx24 ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (plus:SI (match_dup 1)
+		    (match_dup 2))
+	   (const_int 1)]
+	  UNSPEC_SME_READZ))]
+  "TARGET_STREAMING_SME2p1"
+  "movaz\t%0, za.d[%w1, %2, vgx<vector_count>]"
+)
+
 (define_insn "@aarch64_sme_<optab><mode><mode>"
   [(set (reg:SVE_FULLx24 ZA_REGNUM)
 	(unspec:SVE_FULLx24
@@ -813,7 +977,7 @@
 	   (match_operand:DI 0 "const_int_operand")
 	   (match_operand:SI 1 "register_operand" "Ucj")
 	   (match_operand:SVE_FULLx24 2 "aligned_register_operand" "Uw<vector_count>")]
-	  SME_WRITE))]
+	  SME_WRITE_HV))]
   "TARGET_STREAMING_SME2"
   {
     operands[3] = GEN_INT (<vector_count> - 1);
@@ -831,7 +995,7 @@
 	     (match_operand:SI 1 "register_operand" "Ucj")
 	     (match_operand:SI 2 "const_int_operand"))
 	   (match_operand:SVE_FULLx24 3 "aligned_register_operand" "Uw<vector_count>")]
-	  SME_WRITE))]
+	  SME_WRITE_HV))]
   "TARGET_STREAMING_SME2
    && UINTVAL (operands[2]) % <vector_count> == 0
    && UINTVAL (operands[2]) < 128 / <elem_bits>"
@@ -848,7 +1012,7 @@
 	   (reg:DI SME_STATE_REGNUM)
 	   (match_operand:SI 0 "register_operand" "Uci")
 	   (match_operand:SVE_DIx24 1 "aligned_register_operand" "Uw<vector_count>")]
-	  UNSPEC_SME_READ))]
+	  UNSPEC_SME_WRITE))]
   "TARGET_STREAMING_SME2"
   "mova\tza.d[%w0, 0, vgx<vector_count>], %1"
 )
@@ -861,7 +1025,7 @@
 	   (plus:SI (match_operand:SI 0 "register_operand" "Uci")
 		    (match_operand:SI 1 "const_0_to_7_operand"))
 	   (match_operand:SVE_DIx24 2 "aligned_register_operand" "Uw<vector_count>")]
-	  UNSPEC_SME_READ))]
+	  UNSPEC_SME_WRITE))]
   "TARGET_STREAMING_SME2"
   "mova\tza.d[%w0, %1, vgx<vector_count>], %2"
 )
@@ -873,7 +1037,7 @@
 ;; - ZERO
 ;; -------------------------------------------------------------------------
 
-(define_c_enum "unspec" [UNSPEC_SME_ZERO])
+(define_c_enum "unspec" [UNSPEC_SME_ZERO UNSPEC_SME_ZERO_SLICES])
 
 (define_insn "aarch64_sme_zero_za"
   [(set (reg:VNx16QI ZA_REGNUM)
@@ -887,6 +1051,59 @@
   }
 )
 
+(define_insn "@aarch64_sme_zero_za_slices<mode>"
+  [(set (reg:VNx16QI ZA_REGNUM)
+	(unspec:VNx16QI
+	  [(reg:VNx16QI ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (scratch:SME_ZA_SDIx24)
+	   (match_operand:SI 0 "register_operand" "Uci")]
+	  UNSPEC_SME_ZERO_SLICES))]
+  "TARGET_STREAMING_SME2p1"
+  "zero\tza.d[%w0, 0, vgx<vector_count>]"
+)
+
+(define_insn "*aarch64_sme_zero_za_slices<mode>_plus"
+  [(set (reg:VNx16QI ZA_REGNUM)
+	(unspec:VNx16QI
+	  [(reg:VNx16QI ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (scratch:SME_ZA_SDIx24)
+	   (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+		    (match_operand:SI 1 "const_0_to_7_operand"))]
+	  UNSPEC_SME_ZERO_SLICES))]
+  "TARGET_STREAMING_SME2p1"
+  "zero\tza.d[%w0, %1, vgx<vector_count>]"
+)
+
+(define_insn "@aarch64_sme_zero_za_slices<mode>"
+  [(set (reg:VNx16QI ZA_REGNUM)
+	(unspec:VNx16QI
+	  [(reg:VNx16QI ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (scratch:SME_ZA_BHIx124)
+	   (match_operand:SI 0 "register_operand" "Uci")]
+	  UNSPEC_SME_ZERO_SLICES))]
+  "TARGET_STREAMING_SME2p1"
+  "zero\tza.d[%w0, 0:<za32_last_offset><vg_modifier>]"
+)
+
+(define_insn "*aarch64_sme_zero_za_slices<mode>_plus"
+  [(set (reg:VNx16QI ZA_REGNUM)
+	(unspec:VNx16QI
+	  [(reg:VNx16QI ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (scratch:SME_ZA_BHIx124)
+	   (plus:SI (match_operand:SI 0 "register_operand" "Uci")
+		    (match_operand:SI 1 "const_<za32_offset_range>_operand"))]
+	  UNSPEC_SME_ZERO_SLICES))]
+  "TARGET_STREAMING_SME2p1"
+  {
+    operands[2] = GEN_INT (INTVAL (operands[1]) + <za32_last_offset>);
+    return "zero\tza.d[%w0, %1:%2<vg_modifier>]";
+  }
+)
+
 (define_insn "aarch64_sme_zero_zt0"
   [(set (reg:V8DI ZT0_REGNUM)
 	(const_int 0))
@@ -917,7 +1134,7 @@
 	   (match_operand:<VPRED> 2 "register_operand" "Upl")
 	   (match_operand:SME_ZA_SDI 3 "register_operand" "w")]
 	  SME_BINARY_SDI))]
-  "TARGET_STREAMING_SME"
+  "TARGET_STREAMING"
   "<optab>\tza%0.<Vetype>, %1/m, %2/m, %3.<Vetype>"
 )
 
@@ -925,7 +1142,12 @@
 ;; ---- Binary arithmetic on ZA slice
 ;; -------------------------------------------------------------------------
 ;; Includes:
-;; - ADD
+;; - ADD (SME2)
+;; - BFADD (SME_B16B16)
+;; - BFSUB (SME_B16B16)
+;; - FADD (SME2)
+;; - FSUB (SME2)
+;; - SUB (SME2)
 ;; -------------------------------------------------------------------------
 
 (define_insn "@aarch64_sme_<optab><mode>"
@@ -954,36 +1176,36 @@
 )
 
 (define_insn "@aarch64_sme_<optab><mode>"
-  [(set (reg:SME_ZA_SDFx24 ZA_REGNUM)
-	(unspec:SME_ZA_SDFx24
-	  [(reg:SME_ZA_SDFx24 ZA_REGNUM)
+  [(set (reg:SME_ZA_HSDFx24 ZA_REGNUM)
+	(unspec:SME_ZA_HSDFx24
+	  [(reg:SME_ZA_HSDFx24 ZA_REGNUM)
 	   (reg:DI SME_STATE_REGNUM)
 	   (match_operand:SI 0 "register_operand" "Uci")
-	   (match_operand:SME_ZA_SDFx24 1 "aligned_register_operand" "Uw<vector_count>")]
-	  SME_BINARY_SLICE_SDF))]
+	   (match_operand:SME_ZA_HSDFx24 1 "aligned_register_operand" "Uw<vector_count>")]
+	  SME_BINARY_SLICE_HSDF))]
   "TARGET_STREAMING_SME2"
-  "<optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1"
+  "<b><optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1"
 )
 
 (define_insn "*aarch64_sme_<optab><mode>_plus"
-  [(set (reg:SME_ZA_SDFx24 ZA_REGNUM)
-	(unspec:SME_ZA_SDFx24
-	  [(reg:SME_ZA_SDFx24 ZA_REGNUM)
+  [(set (reg:SME_ZA_HSDFx24 ZA_REGNUM)
+	(unspec:SME_ZA_HSDFx24
+	  [(reg:SME_ZA_HSDFx24 ZA_REGNUM)
 	   (reg:DI SME_STATE_REGNUM)
 	   (plus:SI (match_operand:SI 0 "register_operand" "Uci")
 		    (match_operand:SI 1 "const_0_to_7_operand"))
-	   (match_operand:SME_ZA_SDFx24 2 "aligned_register_operand" "Uw<vector_count>")]
-	  SME_BINARY_SLICE_SDF))]
+	   (match_operand:SME_ZA_HSDFx24 2 "aligned_register_operand" "Uw<vector_count>")]
+	  SME_BINARY_SLICE_HSDF))]
   "TARGET_STREAMING_SME2"
-  "<optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2"
+  "<b><optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2"
 )
 
 ;; -------------------------------------------------------------------------
 ;; ---- Binary arithmetic, writing to ZA slice
 ;; -------------------------------------------------------------------------
 ;; Includes:
-;; - ADD
-;; - SUB
+;; - ADD (SME2)
+;; - SUB (SME2)
 ;; -------------------------------------------------------------------------
 
 (define_insn "@aarch64_sme_<optab><mode>"
@@ -1050,10 +1272,10 @@
 ;; ---- [INT] Dot product
 ;; -------------------------------------------------------------------------
 ;; Includes:
-;; - SDOT
-;; - SUDOT
-;; - UDOT
-;; - USDOT
+;; - SDOT (SME2)
+;; - SUDOT (SME2)
+;; - UDOT (SME2)
+;; - USDOT (SME2)
 ;; -------------------------------------------------------------------------
 
 (define_insn "@aarch64_sme_<optab><SME_ZA_SDI:mode><SME_ZA_BHIx24:mode>"
@@ -1191,10 +1413,10 @@
 ;; ---- [INT] Ternary widening arithmetic on ZA slice
 ;; -------------------------------------------------------------------------
 ;; Includes:
-;; - SMLA
-;; - SMLS
-;; - UMLA
-;; - UMLS
+;; - SMLA (SME2)
+;; - SMLS (SME2)
+;; - UMLA (SME2)
+;; - UMLS (SME2)
 ;; -------------------------------------------------------------------------
 
 (define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><SVE_FULL_BHI:mode>"
@@ -1334,7 +1556,7 @@
 	   (match_operand:VNx8HI_ONLY 1 "register_operand" "w")
 	   (match_operand:VNx8HI_ONLY 2 "register_operand" "x")]
 	  SME_INT_TERNARY_SLICE))]
-  "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME"
+  "TARGET_STREAMING_SME2 && TARGET_SME_I16I64"
   "<optab>ll\tza.d[%w0, 0:3], %1.h, %2.h"
 )
 
@@ -1348,7 +1570,7 @@
 	   (match_operand:VNx8HI_ONLY 2 "register_operand" "w")
 	   (match_operand:VNx8HI_ONLY 3 "register_operand" "x")]
 	  SME_INT_TERNARY_SLICE))]
-  "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME"
+  "TARGET_STREAMING_SME2 && TARGET_SME_I16I64"
   {
     operands[4] = GEN_INT (INTVAL (operands[1]) + 3);
     return "<optab>ll\tza.d[%w0, %1:%4], %2.h, %3.h";
@@ -1364,7 +1586,7 @@
 	   (match_operand:SME_ZA_HIx24 1 "aligned_register_operand" "Uw<vector_count>")
 	   (match_operand:SME_ZA_HIx24 2 "aligned_register_operand" "Uw<vector_count>")]
 	  SME_INT_TERNARY_SLICE))]
-  "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME"
+  "TARGET_STREAMING_SME2 && TARGET_SME_I16I64"
   "<optab>ll\tza.d[%w0, 0:3, vgx<vector_count>], %1, %2"
 )
 
@@ -1378,7 +1600,7 @@
 	   (match_operand:SME_ZA_HIx24 2 "aligned_register_operand" "Uw<vector_count>")
 	   (match_operand:SME_ZA_HIx24 3 "aligned_register_operand" "Uw<vector_count>")]
 	  SME_INT_TERNARY_SLICE))]
-  "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME"
+  "TARGET_STREAMING_SME2 && TARGET_SME_I16I64"
   {
     operands[4] = GEN_INT (INTVAL (operands[1]) + 3);
     return "<optab>ll\tza.d[%w0, %1:%4, vgx<vector_count>], %2, %3";
@@ -1395,7 +1617,7 @@
 	   (vec_duplicate:SME_ZA_HIx24
 	     (match_operand:<SME_ZA_HIx24:VSINGLE> 2 "register_operand" "x"))]
 	  SME_INT_TERNARY_SLICE))]
-  "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME"
+  "TARGET_STREAMING_SME2 && TARGET_SME_I16I64"
   "<optab>ll\tza.d[%w0, 0:3, vgx<vector_count>], %1, %2.h"
 )
 
@@ -1410,7 +1632,7 @@
 	   (vec_duplicate:SME_ZA_HIx24
 	     (match_operand:<SME_ZA_HIx24:VSINGLE> 3 "register_operand" "x"))]
 	  SME_INT_TERNARY_SLICE))]
-  "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME"
+  "TARGET_STREAMING_SME2 && TARGET_SME_I16I64"
   {
     operands[4] = GEN_INT (INTVAL (operands[1]) + 3);
     return "<optab>ll\tza.d[%w0, %1:%4, vgx<vector_count>], %2, %3.h";
@@ -1429,7 +1651,7 @@
 	      (match_operand:SI 3 "const_int_operand")]
 	     UNSPEC_SVE_LANE_SELECT)]
 	  SME_INT_TERNARY_SLICE))]
-  "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME"
+  "TARGET_STREAMING_SME2 && TARGET_SME_I16I64"
   "<optab>ll\tza.d[%w0, 0:3<vg_modifier>], %1<z_suffix>, %2.h[%3]"
 )
 
@@ -1446,7 +1668,7 @@
 	      (match_operand:SI 4 "const_int_operand")]
 	     UNSPEC_SVE_LANE_SELECT)]
 	  SME_INT_TERNARY_SLICE))]
-  "TARGET_SME2 && TARGET_SME_I16I64 && TARGET_STREAMING_SME"
+  "TARGET_STREAMING_SME2 && TARGET_SME_I16I64"
   {
     operands[5] = GEN_INT (INTVAL (operands[1]) + 3);
     return "<optab>ll\tza.d[%w0, %1:%5<vg_modifier>], %2<z_suffix>, %3.h[%4]";
@@ -1456,8 +1678,8 @@
 ;; -------------------------------------------------------------------------
 ;; ---- [INT] Sum of outer products
 ;; -------------------------------------------------------------------------
-;; - BMOPA
-;; - BMOPS
+;; - BMOPA (SME2)
+;; - BMOPS (SME2)
 ;; - SMOPA
 ;; - SMOPS
 ;; - SUMOPA
@@ -1479,7 +1701,7 @@
 	   (match_operand:VNx16QI_ONLY 3 "register_operand" "w")
 	   (match_operand:VNx16QI_ONLY 4 "register_operand" "w")]
 	  SME_INT_MOP))]
-  "TARGET_STREAMING_SME"
+  "TARGET_STREAMING"
   "<optab>\tza%0.s, %1/m, %2/m, %3.b, %4.b"
 )
 
@@ -1494,7 +1716,7 @@
 	   (match_operand:VNx8HI_ONLY 3 "register_operand" "w")
 	   (match_operand:VNx8HI_ONLY 4 "register_operand" "w")]
 	  SME_INT_MOP))]
-  "TARGET_STREAMING_SME && TARGET_SME_I16I64"
+  "TARGET_STREAMING && TARGET_SME_I16I64"
   "<optab>\tza%0.d, %1/m, %2/m, %3.h, %4.h"
 )
 
@@ -1532,8 +1754,8 @@
 ;; ---- [FP] Dot product
 ;; -------------------------------------------------------------------------
 ;; Includes:
-;; - BFDOT
-;; - FDOT
+;; - BFDOT (SME2)
+;; - FDOT (SME2)
 ;; -------------------------------------------------------------------------
 
 (define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><SME_ZA_HFx24:mode>"
@@ -1629,119 +1851,109 @@
 ;; ---- [FP] Ternary arithmetic on ZA slice
 ;; -------------------------------------------------------------------------
 ;; Includes:
-;; - FMLA
-;; - FMLS
+;; - BFMLA (SME_B16B16)
+;; - BFMLS (SME_B16B16)
+;; - FMLA (SME2)
+;; - FMLS (SME2)
 ;; -------------------------------------------------------------------------
 
-(define_insn "@aarch64_sme_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>"
-  [(set (reg:SME_ZA_SDF_I ZA_REGNUM)
-	(unspec:SME_ZA_SDF_I
-	  [(reg:SME_ZA_SDF_I ZA_REGNUM)
+(define_insn "@aarch64_sme_<optab><mode><mode>"
+  [(set (reg:SME_ZA_HSDFx24 ZA_REGNUM)
+	(unspec:SME_ZA_HSDFx24
+	  [(reg:SME_ZA_HSDFx24 ZA_REGNUM)
 	   (reg:DI SME_STATE_REGNUM)
 	   (match_operand:SI 0 "register_operand" "Uci")
-	   (match_operand:SME_ZA_SDFx24 1 "aligned_register_operand" "Uw<vector_count>")
-	   (match_operand:SME_ZA_SDFx24 2 "aligned_register_operand" "Uw<vector_count>")]
+	   (match_operand:SME_ZA_HSDFx24 1 "aligned_register_operand" "Uw<vector_count>")
+	   (match_operand:SME_ZA_HSDFx24 2 "aligned_register_operand" "Uw<vector_count>")]
 	  SME_FP_TERNARY_SLICE))]
-  "TARGET_SME2
-   && TARGET_STREAMING_SME
-   && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>"
-  "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, 0, vgx<vector_count>], %1, %2"
+  "TARGET_STREAMING_SME2"
+  "<b><optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1, %2"
 )
 
-(define_insn "*aarch64_sme_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>_plus"
-  [(set (reg:SME_ZA_SDF_I ZA_REGNUM)
-	(unspec:SME_ZA_SDF_I
-	  [(reg:SME_ZA_SDF_I ZA_REGNUM)
+(define_insn "*aarch64_sme_<optab><mode><mode>_plus"
+  [(set (reg:SME_ZA_HSDFx24 ZA_REGNUM)
+	(unspec:SME_ZA_HSDFx24
+	  [(reg:SME_ZA_HSDFx24 ZA_REGNUM)
 	   (reg:DI SME_STATE_REGNUM)
 	   (plus:SI (match_operand:SI 0 "register_operand" "Uci")
 		    (match_operand:SI 1 "const_0_to_7_operand"))
-	   (match_operand:SME_ZA_SDFx24 2 "aligned_register_operand" "Uw<vector_count>")
-	   (match_operand:SME_ZA_SDFx24 3 "aligned_register_operand" "Uw<vector_count>")]
+	   (match_operand:SME_ZA_HSDFx24 2 "aligned_register_operand" "Uw<vector_count>")
+	   (match_operand:SME_ZA_HSDFx24 3 "aligned_register_operand" "Uw<vector_count>")]
 	  SME_FP_TERNARY_SLICE))]
-  "TARGET_SME2
-   && TARGET_STREAMING_SME
-   && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>"
-  "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, %1, vgx<vector_count>], %2, %3"
+  "TARGET_STREAMING_SME2"
+  "<b><optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2, %3"
 )
 
-(define_insn "@aarch64_sme_single_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>"
-  [(set (reg:SME_ZA_SDF_I ZA_REGNUM)
-	(unspec:SME_ZA_SDF_I
-	  [(reg:SME_ZA_SDF_I ZA_REGNUM)
+(define_insn "@aarch64_sme_single_<optab><mode><mode>"
+  [(set (reg:SME_ZA_HSDFx24 ZA_REGNUM)
+	(unspec:SME_ZA_HSDFx24
+	  [(reg:SME_ZA_HSDFx24 ZA_REGNUM)
 	   (reg:DI SME_STATE_REGNUM)
 	   (match_operand:SI 0 "register_operand" "Uci")
-	   (match_operand:SME_ZA_SDFx24 1 "register_operand" "w")
-	   (vec_duplicate:SME_ZA_SDFx24
-	     (match_operand:<VSINGLE> 2 "register_operand" "x"))]
+	   (match_operand:SME_ZA_HSDFx24 1 "register_operand" "w")
+	   (vec_duplicate:SME_ZA_HSDFx24
+	     (match_operand:<SME_ZA_HSDFx24:VSINGLE> 2 "register_operand" "x"))]
 	  SME_FP_TERNARY_SLICE))]
-  "TARGET_SME2
-   && TARGET_STREAMING_SME
-   && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>"
-  "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<SME_ZA_SDFx24:Vetype>"
+  "TARGET_STREAMING_SME2"
+  "<b><optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<Vetype>"
 )
 
-(define_insn "*aarch64_sme_single_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>_plus"
-  [(set (reg:SME_ZA_SDF_I ZA_REGNUM)
-	(unspec:SME_ZA_SDF_I
-	  [(reg:SME_ZA_SDF_I ZA_REGNUM)
+(define_insn "*aarch64_sme_single_<optab><mode><mode>_plus"
+  [(set (reg:SME_ZA_HSDFx24 ZA_REGNUM)
+	(unspec:SME_ZA_HSDFx24
+	  [(reg:SME_ZA_HSDFx24 ZA_REGNUM)
 	   (reg:DI SME_STATE_REGNUM)
 	   (plus:SI (match_operand:SI 0 "register_operand" "Uci")
 		    (match_operand:SI 1 "const_0_to_7_operand"))
-	   (match_operand:SME_ZA_SDFx24 2 "register_operand" "w")
-	   (vec_duplicate:SME_ZA_SDFx24
-	     (match_operand:<VSINGLE> 3 "register_operand" "x"))]
+	   (match_operand:SME_ZA_HSDFx24 2 "register_operand" "w")
+	   (vec_duplicate:SME_ZA_HSDFx24
+	     (match_operand:<SME_ZA_HSDFx24:VSINGLE> 3 "register_operand" "x"))]
 	  SME_FP_TERNARY_SLICE))]
-  "TARGET_SME2
-   && TARGET_STREAMING_SME
-   && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>"
-  "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<SME_ZA_SDFx24:Vetype>"
+  "TARGET_STREAMING_SME2"
+  "<b><optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<Vetype>"
 )
 
-(define_insn "@aarch64_sme_lane_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>"
-  [(set (reg:SME_ZA_SDF_I ZA_REGNUM)
-	(unspec:SME_ZA_SDF_I
-	  [(reg:SME_ZA_SDF_I ZA_REGNUM)
+(define_insn "@aarch64_sme_lane_<optab><mode><mode>"
+  [(set (reg:SME_ZA_HSDFx24 ZA_REGNUM)
+	(unspec:SME_ZA_HSDFx24
+	  [(reg:SME_ZA_HSDFx24 ZA_REGNUM)
 	   (reg:DI SME_STATE_REGNUM)
 	   (match_operand:SI 0 "register_operand" "Uci")
-	   (match_operand:SME_ZA_SDFx24 1 "aligned_register_operand" "Uw<vector_count>")
-	   (unspec:SME_ZA_SDFx24
-	     [(match_operand:<VSINGLE> 2 "register_operand" "x")
+	   (match_operand:SME_ZA_HSDFx24 1 "aligned_register_operand" "Uw<vector_count>")
+	   (unspec:SME_ZA_HSDFx24
+	     [(match_operand:<SME_ZA_HSDFx24:VSINGLE> 2 "register_operand" "x")
 	      (match_operand:SI 3 "const_int_operand")]
 	     UNSPEC_SVE_LANE_SELECT)]
 	  SME_FP_TERNARY_SLICE))]
-  "TARGET_SME2
-   && TARGET_STREAMING_SME
-   && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>"
-  "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<SME_ZA_SDFx24:Vetype>[%3]"
+  "TARGET_STREAMING_SME2"
+  "<b><optab>\tza.<Vetype>[%w0, 0, vgx<vector_count>], %1, %2.<Vetype>[%3]"
 )
 
-(define_insn "*aarch64_sme_lane_<optab><SME_ZA_SDF_I:mode><SME_ZA_SDFx24:mode>"
-  [(set (reg:SME_ZA_SDF_I ZA_REGNUM)
-	(unspec:SME_ZA_SDF_I
-	  [(reg:SME_ZA_SDF_I ZA_REGNUM)
+(define_insn "*aarch64_sme_lane_<optab><mode><mode>"
+  [(set (reg:SME_ZA_HSDFx24 ZA_REGNUM)
+	(unspec:SME_ZA_HSDFx24
+	  [(reg:SME_ZA_HSDFx24 ZA_REGNUM)
 	   (reg:DI SME_STATE_REGNUM)
 	   (plus:SI (match_operand:SI 0 "register_operand" "Uci")
 		    (match_operand:SI 1 "const_0_to_7_operand"))
-	   (match_operand:SME_ZA_SDFx24 2 "aligned_register_operand" "Uw<vector_count>")
-	   (unspec:SME_ZA_SDFx24
-	     [(match_operand:<VSINGLE> 3 "register_operand" "x")
+	   (match_operand:SME_ZA_HSDFx24 2 "aligned_register_operand" "Uw<vector_count>")
+	   (unspec:SME_ZA_HSDFx24
+	     [(match_operand:<SME_ZA_HSDFx24:VSINGLE> 3 "register_operand" "x")
 	      (match_operand:SI 4 "const_int_operand")]
 	     UNSPEC_SVE_LANE_SELECT)]
 	  SME_FP_TERNARY_SLICE))]
-  "TARGET_SME2
-   && TARGET_STREAMING_SME
-   && <SME_ZA_SDF_I:elem_bits> == <SME_ZA_SDFx24:elem_bits>"
-  "<optab>\tza.<SME_ZA_SDF_I:Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<SME_ZA_SDFx24:Vetype>[%4]"
+  "TARGET_STREAMING_SME2"
+  "<b><optab>\tza.<Vetype>[%w0, %1, vgx<vector_count>], %2, %3.<Vetype>[%4]"
 )
 
 ;; -------------------------------------------------------------------------
 ;; ---- [FP] Ternary widening arithmetic on ZA slice
 ;; -------------------------------------------------------------------------
 ;; Includes:
-;; - BFMLAL
-;; - BFMLSL
-;; - FMLAL
-;; - FMLSL
+;; - BFMLAL (SME2)
+;; - BFMLSL (SME2)
+;; - FMLAL (SME2)
+;; - FMLSL (SME2)
 ;; -------------------------------------------------------------------------
 
 (define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><SVE_FULL_HF:mode>"
@@ -1876,26 +2088,40 @@
 ;; ---- [FP] Sum of outer products
 ;; -------------------------------------------------------------------------
 ;; Includes:
-;; - BFMOPA
-;; - BFMOPS
+;; - BFMOPA (SME_B16B16)
+;; - BFMOPS (SME_B16B16)
 ;; - FMOPA
 ;; - FMOPS
 ;; -------------------------------------------------------------------------
 
-(define_insn "@aarch64_sme_<optab><SME_ZA_SDF_I:mode><SME_MOP_HSDF:mode>"
-  [(set (reg:SME_ZA_SDF_I ZA_REGNUM)
-	(unspec:SME_ZA_SDF_I
-	  [(reg:SME_ZA_SDF_I ZA_REGNUM)
+(define_insn "@aarch64_sme_<optab><mode><mode>"
+  [(set (reg:SME_MOP_HSDF ZA_REGNUM)
+	(unspec:SME_MOP_HSDF
+	  [(reg:SME_MOP_HSDF ZA_REGNUM)
 	   (reg:DI SME_STATE_REGNUM)
 	   (match_operand:DI 0 "const_int_operand")
-	   (match_operand:<SME_ZA_SDF_I:VPRED> 1 "register_operand" "Upl")
-	   (match_operand:<SME_ZA_SDF_I:VPRED> 2 "register_operand" "Upl")
+	   (match_operand:<VPRED> 1 "register_operand" "Upl")
+	   (match_operand:<VPRED> 2 "register_operand" "Upl")
 	   (match_operand:SME_MOP_HSDF 3 "register_operand" "w")
 	   (match_operand:SME_MOP_HSDF 4 "register_operand" "w")]
 	  SME_FP_MOP))]
-  "TARGET_STREAMING_SME
-   && (<SME_ZA_SDF_I:elem_bits> == 32) == (<SME_MOP_HSDF:elem_bits> <= 32)"
-  "<b><optab>\tza%0.<SME_ZA_SDF_I:Vetype>, %1/m, %2/m, %3.<SME_MOP_HSDF:Vetype>, %4.<SME_MOP_HSDF:Vetype>"
+  "TARGET_STREAMING"
+  "<b><optab>\tza%0.<Vetype>, %1/m, %2/m, %3.<Vetype>, %4.<Vetype>"
+)
+
+(define_insn "@aarch64_sme_<optab><VNx4SI_ONLY:mode><SVE_FULL_HF:mode>"
+  [(set (reg:VNx4SI_ONLY ZA_REGNUM)
+	(unspec:VNx4SI_ONLY
+	  [(reg:VNx4SI_ONLY ZA_REGNUM)
+	   (reg:DI SME_STATE_REGNUM)
+	   (match_operand:DI 0 "const_int_operand")
+	   (match_operand:<VNx4SI_ONLY:VPRED> 1 "register_operand" "Upl")
+	   (match_operand:<VNx4SI_ONLY:VPRED> 2 "register_operand" "Upl")
+	   (match_operand:SVE_FULL_HF 3 "register_operand" "w")
+	   (match_operand:SVE_FULL_HF 4 "register_operand" "w")]
+	  SME_FP_MOP))]
+  "TARGET_STREAMING"
+  "<b><optab>\tza%0.<VNx4SI_ONLY:Vetype>, %1/m, %2/m, %3.<SVE_FULL_HF:Vetype>, %4.<SVE_FULL_HF:Vetype>"
 )
 
 ;; =========================================================================
@@ -1906,8 +2132,8 @@
 ;; ---- Table lookup
 ;; -------------------------------------------------------------------------
 ;; Includes:
-;; - LUTI2
-;; - LUTI4
+;; - LUTI2 (SME2)
+;; - LUTI4 (SME2)
 ;; -------------------------------------------------------------------------
 
 (define_c_enum "unspec" [
diff --git a/gcc/config/aarch64/aarch64-speculation.cc b/gcc/config/aarch64/aarch64-speculation.cc
index 54f5ada..618045a 100644
--- a/gcc/config/aarch64/aarch64-speculation.cc
+++ b/gcc/config/aarch64/aarch64-speculation.cc
@@ -1,5 +1,5 @@
 /* Speculation tracking and mitigation (e.g. CVE 2017-5753) for AArch64.
-   Copyright (C) 2018-2024 Free Software Foundation, Inc.
+   Copyright (C) 2018-2025 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
@@ -160,9 +160,7 @@ aarch64_speculation_clobber_sp ()
   emit_insn (gen_rtx_SET (scratch, sp));
   emit_insn (gen_anddi3 (scratch, scratch, tracker));
   emit_insn (gen_rtx_SET (sp, scratch));
-  rtx_insn *seq = get_insns ();
-  end_sequence ();
-  return seq;
+  return end_sequence ();
 }
 
 /* Generate a code sequence to establish the tracker variable from the
@@ -176,9 +174,7 @@ aarch64_speculation_establish_tracker ()
   rtx cc = aarch64_gen_compare_reg (EQ, sp, const0_rtx);
   emit_insn (gen_cstoredi_neg (tracker,
 			       gen_rtx_NE (CCmode, cc, const0_rtx), cc));
-  rtx_insn *seq = get_insns ();
-  end_sequence ();
-  return seq;
+  return end_sequence ();
 }
 
 /* Main speculation tracking pass.  */
@@ -405,8 +401,7 @@ aarch64_do_track_speculation ()
 		    {
 		      start_sequence ();
 		      emit_insn (seq);
-		      seq = get_insns ();
-		      end_sequence ();
+		      seq = end_sequence ();
 		    }
 
 		  for (rtx_insn *list = seq; list; list = NEXT_INSN (list))
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.cc b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
index d55bee0..b439683 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.cc
@@ -1,5 +1,5 @@
 /* ACLE support for AArch64 SVE (__ARM_FEATURE_SVE intrinsics)
-   Copyright (C) 2018-2024 Free Software Foundation, Inc.
+   Copyright (C) 2018-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -201,6 +201,15 @@ class svac_impl : public function_base
 public:
   CONSTEXPR svac_impl (int unspec) : m_unspec (unspec) {}
 
+  gimple *
+  fold (gimple_folder &f) const override
+  {
+    tree pg = gimple_call_arg (f.call, 0);
+    if (is_pfalse (pg))
+      return f.fold_call_to (pg);
+    return NULL;
+  }
+
   rtx
   expand (function_expander &e) const override
   {
@@ -216,6 +225,14 @@ public:
 class svadda_impl : public function_base
 {
 public:
+  gimple *
+  fold (gimple_folder &f) const override
+  {
+    if (is_pfalse (gimple_call_arg (f.call, 0)))
+      return f.fold_call_to (gimple_call_arg (f.call, 1));
+    return NULL;
+  }
+
   rtx
   expand (function_expander &e) const override
   {
@@ -227,6 +244,21 @@ public:
   }
 };
 
+class svaddv_impl : public reduction
+{
+public:
+  CONSTEXPR svaddv_impl ()
+    : reduction (UNSPEC_SADDV, UNSPEC_UADDV, UNSPEC_FADDV) {}
+
+  gimple *
+  fold (gimple_folder &f) const override
+  {
+    if (is_pfalse (gimple_call_arg (f.call, 0)))
+      return f.fold_call_to (build_zero_cst (TREE_TYPE (f.lhs)));
+    return NULL;
+  }
+};
+
 /* Implements svadr[bhwd].  */
 class svadr_bhwd_impl : public function_base
 {
@@ -245,11 +277,25 @@ public:
     e.args.quick_push (expand_vector_broadcast (mode, shift));
     return e.use_exact_insn (code_for_aarch64_adr_shift (mode));
   }
-
   /* How many bits left to shift the vector displacement.  */
   unsigned int m_shift;
 };
 
+
+class svandv_impl : public reduction
+{
+public:
+  CONSTEXPR svandv_impl () : reduction (UNSPEC_ANDV) {}
+
+  gimple *
+  fold (gimple_folder &f) const override
+  {
+    if (is_pfalse (gimple_call_arg (f.call, 0)))
+      return f.fold_call_to (build_all_ones_cst (TREE_TYPE (f.lhs)));
+    return NULL;
+  }
+};
+
 class svbic_impl : public function_base
 {
 public:
@@ -333,6 +379,14 @@ class svclast_impl : public quiet<function_base>
 public:
   CONSTEXPR svclast_impl (int unspec) : m_unspec (unspec) {}
 
+  gimple *
+  fold (gimple_folder &f) const override
+  {
+    if (is_pfalse (gimple_call_arg (f.call, 0)))
+      return f.fold_call_to (gimple_call_arg (f.call, 1));
+    return NULL;
+  }
+
   rtx
   expand (function_expander &e) const override
   {
@@ -425,6 +479,8 @@ public:
 	return gimple_build_assign (f.lhs, m_code, rhs1, rhs2);
       }
 
+    if (is_pfalse (pg))
+      return f.fold_call_to (pg);
     return NULL;
   }
 
@@ -464,6 +520,15 @@ public:
     : m_code (code), m_unspec_for_sint (unspec_for_sint),
       m_unspec_for_uint (unspec_for_uint) {}
 
+  gimple *
+  fold (gimple_folder &f) const override
+  {
+    tree pg = gimple_call_arg (f.call, 0);
+    if (is_pfalse (pg))
+      return f.fold_call_to (pg);
+    return NULL;
+  }
+
   rtx
   expand (function_expander &e) const override
   {
@@ -502,6 +567,16 @@ public:
 class svcmpuo_impl : public quiet<function_base>
 {
 public:
+
+  gimple *
+  fold (gimple_folder &f) const override
+  {
+    tree pg = gimple_call_arg (f.call, 0);
+    if (is_pfalse (pg))
+      return f.fold_call_to (pg);
+    return NULL;
+  }
+
   rtx
   expand (function_expander &e) const override
   {
@@ -598,6 +673,16 @@ public:
 class svcntp_impl : public function_base
 {
 public:
+
+  gimple *
+  fold (gimple_folder &f) const override
+  {
+    tree pg = gimple_call_arg (f.call, 0);
+    if (is_pfalse (pg))
+      return f.fold_call_to (build_zero_cst (TREE_TYPE (f.lhs)));
+    return NULL;
+  }
+
   rtx
   expand (function_expander &e) const override
   {
@@ -613,6 +698,19 @@ public:
   }
 };
 
+class svcompact_impl
+  : public QUIET_CODE_FOR_MODE0 (aarch64_sve_compact)
+{
+public:
+  gimple *
+  fold (gimple_folder &f) const override
+  {
+    if (is_pfalse (gimple_call_arg (f.call, 0)))
+      return f.fold_call_to (build_zero_cst (TREE_TYPE (f.lhs)));
+    return NULL;
+  }
+};
+
 /* Implements svcreate2, svcreate3 and svcreate4.  */
 class svcreate_impl : public quiet<multi_vector_function>
 {
@@ -684,8 +782,11 @@ public:
 	  optab = e.type_suffix (0).unsigned_p ? ufix_optab : sfix_optab;
 	else if (e.type_suffix (1).integer_p)
 	  optab = e.type_suffix (1).unsigned_p ? ufloat_optab : sfloat_optab;
-	else
+	else if (e.type_suffix (0).element_bits
+		 < e.type_suffix (1).element_bits)
 	  optab = trunc_optab;
+	else
+	  optab = sext_optab;
 	icode = convert_optab_handler (optab, mode0, mode1);
 	gcc_assert (icode != CODE_FOR_nothing);
 	return e.use_exact_insn (icode);
@@ -746,6 +847,18 @@ public:
   }
 };
 
+class svcvtnt_impl : public CODE_FOR_MODE0 (aarch64_sve_cvtnt)
+{
+public:
+  gimple *
+  fold (gimple_folder &f) const override
+  {
+    if (f.pred == PRED_x && is_pfalse (gimple_call_arg (f.call, 1)))
+      f.fold_call_to (build_zero_cst (TREE_TYPE (f.lhs)));
+    return NULL;
+  }
+};
+
 class svdiv_impl : public rtx_code_function
 {
 public:
@@ -755,36 +868,72 @@ public:
   gimple *
   fold (gimple_folder &f) const override
   {
-    tree divisor = gimple_call_arg (f.call, 2);
-    tree divisor_cst = uniform_integer_cst_p (divisor);
+    if (auto *res = f.fold_const_binary (TRUNC_DIV_EXPR))
+      return res;
+
+    /* If the divisor is all ones, fold to dividend.  */
+    tree op1 = gimple_call_arg (f.call, 1);
+    tree op2 = gimple_call_arg (f.call, 2);
+    if (integer_onep (op2))
+      return f.fold_active_lanes_to (op1);
+
+    /* If one of the operands is all zeros, fold to zero vector.  */
+    if (integer_zerop (op1) || integer_zerop (op2))
+      return f.fold_active_lanes_to (build_zero_cst (TREE_TYPE (f.lhs)));
+
+    /* If the divisor is all integer -1, fold to svneg.  */
+    tree pg = gimple_call_arg (f.call, 0);
+    if (!f.type_suffix (0).unsigned_p && integer_minus_onep (op2))
+      {
+	function_instance instance ("svneg", functions::svneg, shapes::unary,
+				    MODE_none, f.type_suffix_ids, GROUP_none,
+				    f.pred, FPM_unused);
+	gcall *call = f.redirect_call (instance);
+	unsigned offset_index = 0;
+	if (f.pred == PRED_m)
+	  {
+	    offset_index = 1;
+	    gimple_call_set_arg (call, 0, op1);
+	  }
+	else
+	  gimple_set_num_ops (call, 5);
+	gimple_call_set_arg (call, offset_index, pg);
+	gimple_call_set_arg (call, offset_index + 1, op1);
+	return call;
+      }
 
-    if (!divisor_cst || !integer_pow2p (divisor_cst))
+    /* If the divisor is a uniform power of 2, fold to a shift
+       instruction.  */
+    tree op2_cst = uniform_integer_cst_p (op2);
+    if (!op2_cst || !integer_pow2p (op2_cst))
       return NULL;
 
     tree new_divisor;
     gcall *call;
 
-    if (f.type_suffix (0).unsigned_p && tree_to_uhwi (divisor_cst) != 1)
+    if (f.type_suffix (0).unsigned_p && tree_to_uhwi (op2_cst) != 1)
       {
 	function_instance instance ("svlsr", functions::svlsr,
 				    shapes::binary_uint_opt_n, MODE_n,
-				    f.type_suffix_ids, GROUP_none, f.pred);
+				    f.type_suffix_ids, GROUP_none, f.pred,
+				    FPM_unused);
 	call = f.redirect_call (instance);
-	tree d = INTEGRAL_TYPE_P (TREE_TYPE (divisor)) ? divisor : divisor_cst;
+	tree d = INTEGRAL_TYPE_P (TREE_TYPE (op2)) ? op2 : op2_cst;
 	new_divisor = wide_int_to_tree (TREE_TYPE (d), tree_log2 (d));
       }
     else
       {
-	if (tree_int_cst_sign_bit (divisor_cst)
-	    || tree_to_shwi (divisor_cst) == 1)
+	if (tree_int_cst_sign_bit (op2_cst)
+	    || tree_to_shwi (op2_cst) == 1)
 	  return NULL;
 
 	function_instance instance ("svasrd", functions::svasrd,
 				    shapes::shift_right_imm, MODE_n,
-				    f.type_suffix_ids, GROUP_none, f.pred);
+				    f.type_suffix_ids, GROUP_none, f.pred,
+				    FPM_unused);
 	call = f.redirect_call (instance);
 	new_divisor = wide_int_to_tree (scalar_types[VECTOR_TYPE_svuint64_t],
-					tree_log2 (divisor_cst));
+					tree_log2 (op2_cst));
       }
 
     gimple_call_set_arg (call, 2, new_divisor);
@@ -799,20 +948,26 @@ public:
   rtx
   expand (function_expander &e) const override
   {
-    /* In the optab, the multiplication operands come before the accumulator
-       operand.  The optab is keyed off the multiplication mode.  */
-    e.rotate_inputs_left (0, 3);
     insn_code icode;
-    if (e.type_suffix_ids[1] == NUM_TYPE_SUFFIXES)
-      icode = e.direct_optab_handler_for_sign (sdot_prod_optab,
-					       udot_prod_optab,
-					       0, GET_MODE (e.args[0]));
+    if (e.fpm_mode == aarch64_sve::FPM_set)
+      icode = code_for_aarch64_sve_dot (e.result_mode ());
     else
-      icode = (e.type_suffix (0).float_p
-	       ? CODE_FOR_aarch64_sve_fdotvnx4sfvnx8hf
-	       : e.type_suffix (0).unsigned_p
-	       ? CODE_FOR_aarch64_sve_udotvnx4sivnx8hi
-	       : CODE_FOR_aarch64_sve_sdotvnx4sivnx8hi);
+      {
+	/* In the optab, the multiplication operands come before the accumulator
+	   operand.  The optab is keyed off the multiplication mode.  */
+	e.rotate_inputs_left (0, 3);
+	if (e.type_suffix_ids[1] == NUM_TYPE_SUFFIXES)
+	  icode = e.convert_optab_handler_for_sign (sdot_prod_optab,
+						    udot_prod_optab,
+						    0, e.result_mode (),
+						    GET_MODE (e.args[0]));
+	else
+	  icode = (e.type_suffix (0).float_p
+		   ? CODE_FOR_aarch64_sve_fdotvnx4sfvnx8hf
+		   : e.type_suffix (0).unsigned_p
+		   ? CODE_FOR_udot_prodvnx4sivnx8hi
+		   : CODE_FOR_sdot_prodvnx4sivnx8hi);
+      }
     return e.use_unpred_insn (icode);
   }
 };
@@ -825,17 +980,24 @@ public:
   rtx
   expand (function_expander &e) const override
   {
+    insn_code icode;
     machine_mode mode0 = GET_MODE (e.args[0]);
     machine_mode mode1 = GET_MODE (e.args[1]);
-    /* Use the same ordering as the dot_prod_optab, with the
-       accumulator last.  */
-    e.rotate_inputs_left (0, 4);
-    int unspec = unspec_for (e);
-    insn_code icode;
-    if (unspec == UNSPEC_FDOT)
-      icode = CODE_FOR_aarch64_fdot_prod_lanevnx4sfvnx8hf;
+    if (e.fpm_mode == aarch64_sve::FPM_set)
+      {
+	icode = code_for_aarch64_sve_dot_lane (mode0);
+      }
     else
-      icode = code_for_aarch64_dot_prod_lane (unspec, mode0, mode1);
+      {
+	/* Use the same ordering as the dot_prod_optab, with the
+	   accumulator last.  */
+	e.rotate_inputs_left (0, 4);
+	int unspec = unspec_for (e);
+	if (unspec == UNSPEC_FDOT)
+	  icode = CODE_FOR_aarch64_fdot_prod_lanevnx4sfvnx8hf;
+	else
+	  icode = code_for_aarch64_dot_prod_lane (unspec, mode0, mode1);
+      }
     return e.use_exact_insn (icode);
   }
 };
@@ -921,7 +1083,8 @@ public:
       return e.use_exact_insn (code_for_aarch64_sve_dup_lane (mode));
 
     /* Treat svdup_lane as if it were svtbl_n.  */
-    return e.use_exact_insn (code_for_aarch64_sve_tbl (e.vector_mode (0)));
+    return e.use_exact_insn (code_for_aarch64_sve (UNSPEC_TBL,
+						   e.vector_mode (0)));
   }
 };
 
@@ -943,7 +1106,7 @@ private:
     tree lhs_type = TREE_TYPE (lhs);
     tree elt_type = TREE_TYPE (lhs_type);
     scalar_mode elt_mode = SCALAR_TYPE_MODE (elt_type);
-    machine_mode vq_mode = aarch64_vq_mode (elt_mode).require ();
+    machine_mode vq_mode = aarch64_v128_mode (elt_mode).require ();
     tree vq_type = build_vector_type_for_mode (elt_type, vq_mode);
 
     unsigned nargs = gimple_call_num_args (f.call);
@@ -1014,7 +1177,7 @@ public:
 
     /* Get the 128-bit Advanced SIMD vector for this data size.  */
     scalar_mode element_mode = GET_MODE_INNER (mode);
-    machine_mode vq_mode = aarch64_vq_mode (element_mode).require ();
+    machine_mode vq_mode = aarch64_v128_mode (element_mode).require ();
     gcc_assert (known_eq (elements_per_vq, GET_MODE_NUNITS (vq_mode)));
 
     /* Put the arguments into a 128-bit Advanced SIMD vector.  We want
@@ -1102,6 +1265,20 @@ public:
   }
 };
 
+class sveorv_impl : public reduction
+{
+public:
+  CONSTEXPR sveorv_impl () : reduction (UNSPEC_XORV) {}
+
+  gimple *
+  fold (gimple_folder &f) const override
+  {
+    if (is_pfalse (gimple_call_arg (f.call, 0)))
+      return f.fold_call_to (build_zero_cst (TREE_TYPE (f.lhs)));
+    return NULL;
+  }
+};
+
 /* Implements svextb, svexth and svextw.  */
 class svext_bhw_impl : public function_base
 {
@@ -1288,6 +1465,20 @@ public:
 class svindex_impl : public function_base
 {
 public:
+  gimple *
+  fold (gimple_folder &f) const override
+  {
+    /* Apply constant folding if base and step are integer constants.  */
+    tree vec_type = TREE_TYPE (f.lhs);
+    tree base = gimple_call_arg (f.call, 0);
+    tree step = gimple_call_arg (f.call, 1);
+    if (TREE_CODE (base) != INTEGER_CST || TREE_CODE (step) != INTEGER_CST)
+      return NULL;
+    return gimple_build_assign (f.lhs,
+				build_vec_series (vec_type, base, step));
+  }
+
+public:
   rtx
   expand (function_expander &e) const override
   {
@@ -1330,7 +1521,8 @@ public:
      BIT_FIELD_REF lowers to Advanced SIMD element extract, so we have to
      ensure the index of the element being accessed is in the range of a
      Advanced SIMD vector width.  */
-  gimple *fold (gimple_folder & f) const override
+  gimple *
+  fold (gimple_folder & f) const override
   {
     tree pred = gimple_call_arg (f.call, 0);
     tree val = gimple_call_arg (f.call, 1);
@@ -1474,11 +1666,12 @@ public:
     gimple_seq stmts = NULL;
     tree pred = f.convert_pred (stmts, vectype, 0);
     tree base = f.fold_contiguous_base (stmts, vectype);
+    tree els = build_zero_cst (vectype);
     gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
 
     tree cookie = f.load_store_cookie (TREE_TYPE (vectype));
-    gcall *new_call = gimple_build_call_internal (IFN_MASK_LOAD, 3,
-						  base, cookie, pred);
+    gcall *new_call = gimple_build_call_internal (IFN_MASK_LOAD, 4,
+						  base, cookie, pred, els);
     gimple_call_set_lhs (new_call, f.lhs);
     return new_call;
   }
@@ -1492,7 +1685,7 @@ public:
 				     e.vector_mode (0), e.gp_mode (0));
     else
       icode = code_for_aarch64 (UNSPEC_LD1_COUNT, e.tuple_mode (0));
-    return e.use_contiguous_load_insn (icode);
+    return e.use_contiguous_load_insn (icode, true);
   }
 };
 
@@ -1505,10 +1698,10 @@ public:
   rtx
   expand (function_expander &e) const override
   {
-    insn_code icode = code_for_aarch64_load (UNSPEC_LD1_SVE, extend_rtx_code (),
+    insn_code icode = code_for_aarch64_load (extend_rtx_code (),
 					     e.vector_mode (0),
 					     e.memory_vector_mode ());
-    return e.use_contiguous_load_insn (icode);
+    return e.use_contiguous_load_insn (icode, true);
   }
 };
 
@@ -1527,6 +1720,8 @@ public:
     e.prepare_gather_address_operands (1);
     /* Put the predicate last, as required by mask_gather_load_optab.  */
     e.rotate_inputs_left (0, 5);
+    /* Add the else operand.  */
+    e.args.quick_push (CONST0_RTX (e.vector_mode (0)));
     machine_mode mem_mode = e.memory_vector_mode ();
     machine_mode int_mode = aarch64_sve_int_mode (mem_mode);
     insn_code icode = convert_optab_handler (mask_gather_load_optab,
@@ -1550,6 +1745,8 @@ public:
     e.rotate_inputs_left (0, 5);
     /* Add a constant predicate for the extension rtx.  */
     e.args.quick_push (CONSTM1_RTX (VNx16BImode));
+    /* Add the else operand.  */
+    e.args.quick_push (CONST0_RTX (e.vector_mode (1)));
     insn_code icode = code_for_aarch64_gather_load (extend_rtx_code (),
 						    e.vector_mode (0),
 						    e.memory_vector_mode ());
@@ -1579,7 +1776,7 @@ public:
   machine_mode
   memory_vector_mode (const function_instance &fi) const override
   {
-    return aarch64_vq_mode (GET_MODE_INNER (fi.vector_mode (0))).require ();
+    return aarch64_v128_mode (GET_MODE_INNER (fi.vector_mode (0))).require ();
   }
 
   rtx
@@ -1613,7 +1810,7 @@ public:
 	tree eltype = TREE_TYPE (lhs_type);
 
 	scalar_mode elmode = GET_MODE_INNER (TYPE_MODE (lhs_type));
-	machine_mode vq_mode = aarch64_vq_mode (elmode).require ();
+	machine_mode vq_mode = aarch64_v128_mode (elmode).require ();
 	tree vectype = build_vector_type_for_mode (eltype, vq_mode);
 
 	tree elt_ptr_type
@@ -1692,6 +1889,7 @@ public:
     /* Get the predicate and base pointer.  */
     gimple_seq stmts = NULL;
     tree pred = f.convert_pred (stmts, vectype, 0);
+    tree els = build_zero_cst (vectype);
     tree base = f.fold_contiguous_base (stmts, vectype);
     gsi_insert_seq_before (f.gsi, stmts, GSI_SAME_STMT);
 
@@ -1710,8 +1908,8 @@ public:
 
     /* Emit the load itself.  */
     tree cookie = f.load_store_cookie (TREE_TYPE (vectype));
-    gcall *new_call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 3,
-						  base, cookie, pred);
+    gcall *new_call = gimple_build_call_internal (IFN_MASK_LOAD_LANES, 4,
+						  base, cookie, pred, els);
     gimple_call_set_lhs (new_call, lhs_array);
     gsi_insert_after (f.gsi, new_call, GSI_SAME_STMT);
 
@@ -1724,7 +1922,7 @@ public:
     machine_mode tuple_mode = e.result_mode ();
     insn_code icode = convert_optab_handler (vec_mask_load_lanes_optab,
 					     tuple_mode, e.vector_mode (0));
-    return e.use_contiguous_load_insn (icode);
+    return e.use_contiguous_load_insn (icode, true);
   }
 };
 
@@ -1795,7 +1993,7 @@ public:
 		       ? code_for_aarch64_ldnt1 (e.vector_mode (0))
 		       : code_for_aarch64 (UNSPEC_LDNT1_COUNT,
 					   e.tuple_mode (0)));
-    return e.use_contiguous_load_insn (icode);
+    return e.use_contiguous_load_insn (icode, true);
   }
 };
 
@@ -1877,6 +2075,19 @@ public:
   }
 };
 
+class svlsl_impl : public rtx_code_function
+{
+public:
+  CONSTEXPR svlsl_impl ()
+    : rtx_code_function (ASHIFT, ASHIFT) {}
+
+  gimple *
+  fold (gimple_folder &f) const override
+  {
+    return f.fold_const_binary (LSHIFT_EXPR);
+  }
+};
+
 class svmad_impl : public function_base
 {
 public:
@@ -1887,6 +2098,80 @@ public:
   }
 };
 
+class svminv_impl : public reduction
+{
+public:
+  CONSTEXPR svminv_impl ()
+    : reduction (UNSPEC_SMINV, UNSPEC_UMINV, UNSPEC_FMINV) {}
+
+  gimple *
+  fold (gimple_folder &f) const override
+  {
+    if (is_pfalse (gimple_call_arg (f.call, 0)))
+      {
+	tree rhs = f.type_suffix (0).integer_p
+	  ? TYPE_MAX_VALUE (TREE_TYPE (f.lhs))
+	  : build_real (TREE_TYPE (f.lhs), dconstinf);
+	return f.fold_call_to (rhs);
+      }
+    return NULL;
+  }
+};
+
+class svmaxnmv_impl : public reduction
+{
+public:
+  CONSTEXPR svmaxnmv_impl () : reduction (UNSPEC_FMAXNMV) {}
+  gimple *
+  fold (gimple_folder &f) const override
+  {
+    if (is_pfalse (gimple_call_arg (f.call, 0)))
+      {
+	REAL_VALUE_TYPE rnan = dconst0;
+	rnan.cl = rvc_nan;
+	return f.fold_call_to (build_real (TREE_TYPE (f.lhs), rnan));
+      }
+    return NULL;
+  }
+};
+
+class svmaxv_impl : public reduction
+{
+public:
+  CONSTEXPR svmaxv_impl ()
+    : reduction (UNSPEC_SMAXV, UNSPEC_UMAXV, UNSPEC_FMAXV) {}
+
+  gimple *
+  fold (gimple_folder &f) const override
+  {
+    if (is_pfalse (gimple_call_arg (f.call, 0)))
+      {
+	tree rhs = f.type_suffix (0).integer_p
+	  ? TYPE_MIN_VALUE (TREE_TYPE (f.lhs))
+	  : build_real (TREE_TYPE (f.lhs), dconstninf);
+	return f.fold_call_to (rhs);
+      }
+    return NULL;
+  }
+};
+
+class svminnmv_impl : public reduction
+{
+public:
+  CONSTEXPR svminnmv_impl () : reduction (UNSPEC_FMINNMV) {}
+  gimple *
+  fold (gimple_folder &f) const override
+  {
+    if (is_pfalse (gimple_call_arg (f.call, 0)))
+      {
+	REAL_VALUE_TYPE rnan = dconst0;
+	rnan.cl = rvc_nan;
+	return f.fold_call_to (build_real (TREE_TYPE (f.lhs), rnan));
+      }
+    return NULL;
+  }
+};
+
 class svmla_impl : public function_base
 {
 public:
@@ -1995,6 +2280,108 @@ public:
   }
 };
 
+class svmul_impl : public rtx_code_function
+{
+public:
+  CONSTEXPR svmul_impl ()
+    : rtx_code_function (MULT, MULT, UNSPEC_COND_FMUL) {}
+
+  gimple *
+  fold (gimple_folder &f) const override
+  {
+    if (auto *res = f.fold_const_binary (MULT_EXPR))
+      return res;
+
+    /* If one of the operands is all ones, fold to other operand.  */
+    tree op1 = gimple_call_arg (f.call, 1);
+    tree op2 = gimple_call_arg (f.call, 2);
+    if (integer_onep (op1))
+      return f.fold_active_lanes_to (op2);
+    if (integer_onep (op2))
+      return f.fold_active_lanes_to (op1);
+
+    /* If one of the operands is all zeros, fold to zero vector.  */
+    if (integer_zerop (op1) || integer_zerop (op2))
+      return f.fold_active_lanes_to (build_zero_cst (TREE_TYPE (f.lhs)));
+
+    /* If one of the operands is all integer -1, fold to svneg.  */
+    if (integer_minus_onep (op1) || integer_minus_onep (op2))
+      {
+	auto mul_by_m1 = [](gimple_folder &f, tree lhs_conv,
+			    vec<tree> &args_conv) -> gimple *
+	  {
+	    gcc_assert (lhs_conv && args_conv.length () == 3);
+	    tree pg = args_conv[0];
+	    tree op1 = args_conv[1];
+	    tree op2 = args_conv[2];
+	    tree negated_op = op1;
+	    if (integer_minus_onep (op1))
+	      negated_op = op2;
+	    type_suffix_pair signed_tsp =
+	      {find_type_suffix (TYPE_signed, f.type_suffix (0).element_bits),
+		f.type_suffix_ids[1]};
+	    function_instance instance ("svneg", functions::svneg,
+					shapes::unary, MODE_none, signed_tsp,
+					GROUP_none, f.pred, FPM_unused);
+	    gcall *call = f.redirect_call (instance);
+	    gimple_call_set_lhs (call, lhs_conv);
+	    unsigned offset = 0;
+	    if (f.pred == PRED_m)
+	      {
+		offset = 1;
+		gimple_call_set_arg (call, 0, op1);
+	      }
+	    else
+	      gimple_set_num_ops (call, 5);
+	    gimple_call_set_arg (call, offset, pg);
+	    gimple_call_set_arg (call, offset + 1, negated_op);
+	    return call;
+	  };
+	tree ty =
+	  get_vector_type (find_type_suffix (TYPE_signed,
+					     f.type_suffix (0).element_bits));
+	return f.convert_and_fold (ty, mul_by_m1);
+      }
+
+    /* If one of the operands is a uniform power of 2, fold to a left shift
+       by immediate.  */
+    tree pg = gimple_call_arg (f.call, 0);
+    tree op1_cst = uniform_integer_cst_p (op1);
+    tree op2_cst = uniform_integer_cst_p (op2);
+    tree shift_op1, shift_op2 = NULL;
+    if (op1_cst && integer_pow2p (op1_cst)
+	&& (f.pred != PRED_m
+	    || is_ptrue (pg, f.type_suffix (0).element_bytes)))
+      {
+	shift_op1 = op2;
+	shift_op2 = op1_cst;
+      }
+    else if (op2_cst && integer_pow2p (op2_cst))
+      {
+	shift_op1 = op1;
+	shift_op2 = op2_cst;
+      }
+    else
+      return NULL;
+
+    if (shift_op2)
+      {
+	shift_op2 = wide_int_to_tree (unsigned_type_for (TREE_TYPE (shift_op2)),
+				      tree_log2 (shift_op2));
+	function_instance instance ("svlsl", functions::svlsl,
+				    shapes::binary_uint_opt_n, MODE_n,
+				    f.type_suffix_ids, GROUP_none, f.pred,
+				    FPM_unused);
+	gcall *call = f.redirect_call (instance);
+	gimple_call_set_arg (call, 1, shift_op1);
+	gimple_call_set_arg (call, 2, shift_op2);
+	return call;
+      }
+
+    return NULL;
+  }
+};
+
 class svnand_impl : public function_base
 {
 public:
@@ -2048,6 +2435,20 @@ public:
   }
 };
 
+class svorv_impl : public reduction
+{
+public:
+  CONSTEXPR svorv_impl () : reduction (UNSPEC_IORV) {}
+
+  gimple *
+  fold (gimple_folder &f) const override
+  {
+    if (is_pfalse (gimple_call_arg (f.call, 0)))
+      return f.fold_call_to (build_zero_cst (TREE_TYPE (f.lhs)));
+    return NULL;
+  }
+};
+
 class svpfalse_impl : public function_base
 {
 public:
@@ -2072,6 +2473,16 @@ class svpfirst_svpnext_impl : public function_base
 {
 public:
   CONSTEXPR svpfirst_svpnext_impl (int unspec) : m_unspec (unspec) {}
+  gimple *
+  fold (gimple_folder &f) const override
+  {
+    tree pg = gimple_call_arg (f.call, 0);
+    if (is_pfalse (pg))
+      return f.fold_call_to (m_unspec == UNSPEC_PFIRST
+			     ? gimple_call_arg (f.call, 1)
+			     : pg);
+    return NULL;
+  }
 
   rtx
   expand (function_expander &e) const override
@@ -2152,6 +2563,13 @@ class svptest_impl : public function_base
 {
 public:
   CONSTEXPR svptest_impl (rtx_code compare) : m_compare (compare) {}
+  gimple *
+  fold (gimple_folder &f) const override
+  {
+    if (is_pfalse (gimple_call_arg (f.call, 0)))
+      return f.fold_call_to (boolean_false_node);
+    return NULL;
+  }
 
   rtx
   expand (function_expander &e) const override
@@ -2567,6 +2985,18 @@ public:
   }
 };
 
+class svsplice_impl : public QUIET_CODE_FOR_MODE0 (aarch64_sve_splice)
+{
+public:
+  gimple *
+  fold (gimple_folder &f) const override
+  {
+    if (is_pfalse (gimple_call_arg (f.call, 0)))
+      return f.fold_call_to (gimple_call_arg (f.call, 2));
+    return NULL;
+  }
+};
+
 class svst1_impl : public full_width_access
 {
 public:
@@ -2748,16 +3178,6 @@ public:
   }
 };
 
-class svtbl_impl : public permute
-{
-public:
-  rtx
-  expand (function_expander &e) const override
-  {
-    return e.use_exact_insn (code_for_aarch64_sve_tbl (e.vector_mode (0)));
-  }
-};
-
 /* Implements svtrn1 and svtrn2.  */
 class svtrn_impl : public binary_permute
 {
@@ -2861,7 +3281,7 @@ public:
        Hence we do the same rotation on arguments as svdot_impl does.  */
     e.rotate_inputs_left (0, 3);
     machine_mode mode = e.vector_mode (0);
-    insn_code icode = code_for_dot_prod (UNSPEC_USDOT, mode);
+    insn_code icode = code_for_dot_prod (UNSPEC_USDOT, e.result_mode (), mode);
     return e.use_exact_insn (icode);
   }
 
@@ -2900,7 +3320,9 @@ public:
     : while_comparison (unspec_for_sint, unspec_for_uint), m_eq_p (eq_p)
   {}
 
-  /* Try to fold a call by treating its arguments as constants of type T.  */
+  /* Try to fold a call by treating its arguments as constants of type T.
+     We have already filtered out the degenerate cases of X .LT. MIN
+     and X .LE. MAX.  */
   template<typename T>
   gimple *
   fold_type (gimple_folder &f) const
@@ -2956,6 +3378,13 @@ public:
     if (f.vectors_per_tuple () > 1)
       return nullptr;
 
+    /* Filter out cases where the condition is always true or always false.  */
+    tree arg1 = gimple_call_arg (f.call, 1);
+    if (!m_eq_p && operand_equal_p (arg1, TYPE_MIN_VALUE (TREE_TYPE (arg1))))
+      return f.fold_to_pfalse ();
+    if (m_eq_p && operand_equal_p (arg1, TYPE_MAX_VALUE (TREE_TYPE (arg1))))
+      return f.fold_to_ptrue ();
+
     if (f.type_suffix (1).unsigned_p)
       return fold_type<poly_uint64> (f);
     else
@@ -3021,13 +3450,13 @@ FUNCTION (svacle, svac_impl, (UNSPEC_COND_FCMLE))
 FUNCTION (svaclt, svac_impl, (UNSPEC_COND_FCMLT))
 FUNCTION (svadd, rtx_code_function, (PLUS, PLUS, UNSPEC_COND_FADD))
 FUNCTION (svadda, svadda_impl,)
-FUNCTION (svaddv, reduction, (UNSPEC_SADDV, UNSPEC_UADDV, UNSPEC_FADDV))
+FUNCTION (svaddv, svaddv_impl,)
 FUNCTION (svadrb, svadr_bhwd_impl, (0))
 FUNCTION (svadrd, svadr_bhwd_impl, (3))
 FUNCTION (svadrh, svadr_bhwd_impl, (1))
 FUNCTION (svadrw, svadr_bhwd_impl, (2))
 FUNCTION (svand, rtx_code_function, (AND, AND))
-FUNCTION (svandv, reduction, (UNSPEC_ANDV))
+FUNCTION (svandv, svandv_impl,)
 FUNCTION (svasr, rtx_code_function, (ASHIFTRT, ASHIFTRT))
 FUNCTION (svasr_wide, shift_wide, (ASHIFTRT, UNSPEC_ASHIFTRT_WIDE))
 FUNCTION (svasrd, unspec_based_function, (UNSPEC_ASRD, -1, -1))
@@ -3084,23 +3513,23 @@ FUNCTION (svcnth_pat, svcnt_bhwd_pat_impl, (VNx8HImode))
 FUNCTION (svcntp, svcntp_impl,)
 FUNCTION (svcntw, svcnt_bhwd_impl, (VNx4SImode))
 FUNCTION (svcntw_pat, svcnt_bhwd_pat_impl, (VNx4SImode))
-FUNCTION (svcompact, QUIET_CODE_FOR_MODE0 (aarch64_sve_compact),)
+FUNCTION (svcompact, svcompact_impl,)
 FUNCTION (svcreate2, svcreate_impl, (2))
 FUNCTION (svcreate3, svcreate_impl, (3))
 FUNCTION (svcreate4, svcreate_impl, (4))
 FUNCTION (svcvt, svcvt_impl,)
-FUNCTION (svcvtnt, CODE_FOR_MODE0 (aarch64_sve_cvtnt),)
+FUNCTION (svcvtnt, svcvtnt_impl,)
 FUNCTION (svdiv, svdiv_impl,)
 FUNCTION (svdivr, rtx_code_function_rotated, (DIV, UDIV, UNSPEC_COND_FDIV))
 FUNCTION (svdot, svdot_impl,)
 FUNCTION (svdot_lane, svdotprod_lane_impl, (UNSPEC_SDOT, UNSPEC_UDOT,
-					    UNSPEC_FDOT))
+					    UNSPEC_FDOT, UNSPEC_DOT_LANE_FP8))
 FUNCTION (svdup, svdup_impl,)
 FUNCTION (svdup_lane, svdup_lane_impl,)
 FUNCTION (svdupq, svdupq_impl,)
 FUNCTION (svdupq_lane, svdupq_lane_impl,)
 FUNCTION (sveor, rtx_code_function, (XOR, XOR, -1))
-FUNCTION (sveorv, reduction, (UNSPEC_XORV))
+FUNCTION (sveorv, sveorv_impl,)
 FUNCTION (svexpa, unspec_based_function, (-1, -1, UNSPEC_FEXPA))
 FUNCTION (svext, QUIET_CODE_FOR_MODE0 (aarch64_sve_ext),)
 FUNCTION (svextb, svext_bhw_impl, (QImode))
@@ -3155,7 +3584,7 @@ FUNCTION (svldnf1uh, svldxf1_extend_impl, (TYPE_SUFFIX_u16, UNSPEC_LDNF1))
 FUNCTION (svldnf1uw, svldxf1_extend_impl, (TYPE_SUFFIX_u32, UNSPEC_LDNF1))
 FUNCTION (svldnt1, svldnt1_impl,)
 FUNCTION (svlen, svlen_impl,)
-FUNCTION (svlsl, rtx_code_function, (ASHIFT, ASHIFT))
+FUNCTION (svlsl, svlsl_impl,)
 FUNCTION (svlsl_wide, shift_wide, (ASHIFT, UNSPEC_ASHIFT_WIDE))
 FUNCTION (svlsr, rtx_code_function, (LSHIFTRT, LSHIFTRT))
 FUNCTION (svlsr_wide, shift_wide, (LSHIFTRT, UNSPEC_LSHIFTRT_WIDE))
@@ -3164,14 +3593,14 @@ FUNCTION (svmax, rtx_code_function, (SMAX, UMAX, UNSPEC_COND_FMAX,
 				     UNSPEC_FMAX))
 FUNCTION (svmaxnm, cond_or_uncond_unspec_function, (UNSPEC_COND_FMAXNM,
 						    UNSPEC_FMAXNM))
-FUNCTION (svmaxnmv, reduction, (UNSPEC_FMAXNMV))
-FUNCTION (svmaxv, reduction, (UNSPEC_SMAXV, UNSPEC_UMAXV, UNSPEC_FMAXV))
+FUNCTION (svmaxnmv, svmaxnmv_impl,)
+FUNCTION (svmaxv, svmaxv_impl,)
 FUNCTION (svmin, rtx_code_function, (SMIN, UMIN, UNSPEC_COND_FMIN,
 				     UNSPEC_FMIN))
 FUNCTION (svminnm, cond_or_uncond_unspec_function, (UNSPEC_COND_FMINNM,
 						    UNSPEC_FMINNM))
-FUNCTION (svminnmv, reduction, (UNSPEC_FMINNMV))
-FUNCTION (svminv, reduction, (UNSPEC_SMINV, UNSPEC_UMINV, UNSPEC_FMINV))
+FUNCTION (svminnmv, svminnmv_impl,)
+FUNCTION (svminv, svminv_impl,)
 FUNCTION (svmla, svmla_impl,)
 FUNCTION (svmla_lane, svmla_lane_impl,)
 FUNCTION (svmls, svmls_impl,)
@@ -3179,7 +3608,7 @@ FUNCTION (svmls_lane, svmls_lane_impl,)
 FUNCTION (svmmla, svmmla_impl,)
 FUNCTION (svmov, svmov_impl,)
 FUNCTION (svmsb, svmsb_impl,)
-FUNCTION (svmul, rtx_code_function, (MULT, MULT, UNSPEC_COND_FMUL))
+FUNCTION (svmul, svmul_impl,)
 FUNCTION (svmul_lane, CODE_FOR_MODE0 (aarch64_mul_lane),)
 FUNCTION (svmulh, unspec_based_function, (UNSPEC_SMUL_HIGHPART,
 					  UNSPEC_UMUL_HIGHPART, -1))
@@ -3194,7 +3623,7 @@ FUNCTION (svnor, svnor_impl,)
 FUNCTION (svnot, svnot_impl,)
 FUNCTION (svorn, svorn_impl,)
 FUNCTION (svorr, rtx_code_function, (IOR, IOR))
-FUNCTION (svorv, reduction, (UNSPEC_IORV))
+FUNCTION (svorv, svorv_impl,)
 FUNCTION (svpfalse, svpfalse_impl,)
 FUNCTION (svpfirst, svpfirst_svpnext_impl, (UNSPEC_PFIRST))
 FUNCTION (svpnext, svpfirst_svpnext_impl, (UNSPEC_PNEXT))
@@ -3256,7 +3685,7 @@ FUNCTION (svset2, svset_impl, (2))
 FUNCTION (svset3, svset_impl, (3))
 FUNCTION (svset4, svset_impl, (4))
 FUNCTION (svsetffr, svsetffr_impl,)
-FUNCTION (svsplice, QUIET_CODE_FOR_MODE0 (aarch64_sve_splice),)
+FUNCTION (svsplice, svsplice_impl,)
 FUNCTION (svsqrt, rtx_code_function, (SQRT, SQRT, UNSPEC_COND_FSQRT))
 FUNCTION (svst1, svst1_impl,)
 FUNCTION (svst1_scatter, svst1_scatter_impl,)
@@ -3274,7 +3703,8 @@ FUNCTION (svsub, svsub_impl,)
 FUNCTION (svsubr, rtx_code_function_rotated, (MINUS, MINUS, UNSPEC_COND_FSUB))
 FUNCTION (svsudot, svusdot_impl, (true))
 FUNCTION (svsudot_lane, svdotprod_lane_impl, (UNSPEC_SUDOT, -1, -1))
-FUNCTION (svtbl, svtbl_impl,)
+FUNCTION (svtbl, quiet<unspec_based_uncond_function>, (UNSPEC_TBL, UNSPEC_TBL,
+						       UNSPEC_TBL))
 FUNCTION (svtmad, CODE_FOR_MODE0 (aarch64_sve_tmad),)
 FUNCTION (svtrn1, svtrn_impl, (0))
 FUNCTION (svtrn1q, unspec_based_function, (UNSPEC_TRN1Q, UNSPEC_TRN1Q,
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.def b/gcc/config/aarch64/aarch64-sve-builtins-base.def
index 65fcba9..4c8d9a6 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.def
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.def
@@ -1,5 +1,5 @@
 /* ACLE support for AArch64 SVE (__ARM_FEATURE_SVE intrinsics)
-   Copyright (C) 2018-2024 Free Software Foundation, Inc.
+   Copyright (C) 2018-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -17,7 +17,7 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-#define REQUIRED_EXTENSIONS AARCH64_FL_SVE
+#define REQUIRED_EXTENSIONS ssve (0)
 DEF_SVE_FUNCTION (svabd, binary_opt_n, all_arith, mxz)
 DEF_SVE_FUNCTION (svabs, unary, all_float_and_signed, mxz)
 DEF_SVE_FUNCTION (svacge, compare_opt_n, all_float, implicit)
@@ -74,6 +74,7 @@ DEF_SVE_FUNCTION (svcreate2, create, all_data, none)
 DEF_SVE_FUNCTION (svcreate2, create, b, none)
 DEF_SVE_FUNCTION (svcreate3, create, all_data, none)
 DEF_SVE_FUNCTION (svcreate4, create, all_data, none)
+DEF_SVE_FUNCTION (svcreate4, create, b, none)
 DEF_SVE_FUNCTION (svcvt, unary_convertxn, cvt, mxz)
 DEF_SVE_FUNCTION (svdiv, binary_opt_n, all_float_and_sd_integer, mxz)
 DEF_SVE_FUNCTION (svdivr, binary_opt_n, all_float_and_sd_integer, mxz)
@@ -96,6 +97,7 @@ DEF_SVE_FUNCTION (svget2, get, all_data, none)
 DEF_SVE_FUNCTION (svget2, get, b, none)
 DEF_SVE_FUNCTION (svget3, get, all_data, none)
 DEF_SVE_FUNCTION (svget4, get, all_data, none)
+DEF_SVE_FUNCTION (svget4, get, b, none)
 DEF_SVE_FUNCTION (svindex, binary_scalar, all_integer, none)
 DEF_SVE_FUNCTION (svinsr, binary_n, all_data, none)
 DEF_SVE_FUNCTION (svlasta, reduction, all_data, implicit)
@@ -223,6 +225,7 @@ DEF_SVE_FUNCTION (svset2, set, all_data, none)
 DEF_SVE_FUNCTION (svset2, set, b, none)
 DEF_SVE_FUNCTION (svset3, set, all_data, none)
 DEF_SVE_FUNCTION (svset4, set, all_data, none)
+DEF_SVE_FUNCTION (svset4, set, b, none)
 DEF_SVE_FUNCTION (svsplice, binary, all_data, implicit)
 DEF_SVE_FUNCTION (svsqrt, unary, all_float, mxz)
 DEF_SVE_FUNCTION (svst1, storexn, all_data, implicit)
@@ -245,6 +248,7 @@ DEF_SVE_FUNCTION (svundef2, inherent, all_data, none)
 DEF_SVE_FUNCTION (svundef2, inherent, b, none)
 DEF_SVE_FUNCTION (svundef3, inherent, all_data, none)
 DEF_SVE_FUNCTION (svundef4, inherent, all_data, none)
+DEF_SVE_FUNCTION (svundef4, inherent, b, none)
 DEF_SVE_FUNCTION (svunpkhi, unary_widen, hsd_integer, none)
 DEF_SVE_FUNCTION (svunpkhi, unary_widen, b, none)
 DEF_SVE_FUNCTION (svunpklo, unary_widen, hsd_integer, none)
@@ -261,7 +265,7 @@ DEF_SVE_FUNCTION (svzip2, binary, all_data, none)
 DEF_SVE_FUNCTION (svzip2, binary_pred, all_pred, none)
 #undef REQUIRED_EXTENSIONS
 
-#define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_SM_OFF
+#define REQUIRED_EXTENSIONS nonstreaming_sve (0)
 DEF_SVE_FUNCTION (svadda, fold_left, all_float, implicit)
 DEF_SVE_FUNCTION (svadrb, adr_offset, none, none)
 DEF_SVE_FUNCTION (svadrd, adr_index, none, none)
@@ -307,7 +311,6 @@ DEF_SVE_FUNCTION (svldnf1sw, load_ext, d_integer, implicit)
 DEF_SVE_FUNCTION (svldnf1ub, load_ext, hsd_integer, implicit)
 DEF_SVE_FUNCTION (svldnf1uh, load_ext, sd_integer, implicit)
 DEF_SVE_FUNCTION (svldnf1uw, load_ext, d_integer, implicit)
-DEF_SVE_FUNCTION (svmmla, mmla, none, none)
 DEF_SVE_FUNCTION (svprfb_gather, prefetch_gather_offset, none, implicit)
 DEF_SVE_FUNCTION (svprfd_gather, prefetch_gather_index, none, implicit)
 DEF_SVE_FUNCTION (svprfh_gather, prefetch_gather_index, none, implicit)
@@ -327,7 +330,7 @@ DEF_SVE_FUNCTION (svtssel, binary_uint, all_float, none)
 DEF_SVE_FUNCTION (svwrffr, setffr, none, implicit)
 #undef REQUIRED_EXTENSIONS
 
-#define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_BF16
+#define REQUIRED_EXTENSIONS ssve (AARCH64_FL_BF16)
 DEF_SVE_FUNCTION (svbfdot, ternary_bfloat_opt_n, s_float, none)
 DEF_SVE_FUNCTION (svbfdot_lane, ternary_bfloat_lanex2, s_float, none)
 DEF_SVE_FUNCTION (svbfmlalb, ternary_bfloat_opt_n, s_float, none)
@@ -338,33 +341,29 @@ DEF_SVE_FUNCTION (svcvt, unary_convertxn, cvt_bfloat, mxz)
 DEF_SVE_FUNCTION (svcvtnt, unary_convert_narrowt, cvt_bfloat, mx)
 #undef REQUIRED_EXTENSIONS
 
-#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \
-			     | AARCH64_FL_BF16 \
-			     | AARCH64_FL_SM_OFF)
+#define REQUIRED_EXTENSIONS nonstreaming_sve (AARCH64_FL_BF16)
 DEF_SVE_FUNCTION (svbfmmla, ternary_bfloat, s_float, none)
 #undef REQUIRED_EXTENSIONS
 
-#define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_I8MM
+#define REQUIRED_EXTENSIONS ssve (AARCH64_FL_I8MM)
 DEF_SVE_FUNCTION (svsudot, ternary_intq_uintq_opt_n, s_signed, none)
 DEF_SVE_FUNCTION (svsudot_lane, ternary_intq_uintq_lane, s_signed, none)
 DEF_SVE_FUNCTION (svusdot, ternary_uintq_intq_opt_n, s_signed, none)
 DEF_SVE_FUNCTION (svusdot_lane, ternary_uintq_intq_lane, s_signed, none)
 #undef REQUIRED_EXTENSIONS
 
-#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \
-			     | AARCH64_FL_I8MM \
-			     | AARCH64_FL_SM_OFF)
+#define REQUIRED_EXTENSIONS nonstreaming_sve (AARCH64_FL_I8MM)
 DEF_SVE_FUNCTION (svmmla, mmla, s_integer, none)
 DEF_SVE_FUNCTION (svusmmla, ternary_uintq_intq, s_signed, none)
 #undef REQUIRED_EXTENSIONS
 
-#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \
-			     | AARCH64_FL_F32MM \
-			     | AARCH64_FL_SM_OFF)
+#define REQUIRED_EXTENSIONS nonstreaming_sve (AARCH64_FL_F32MM)
 DEF_SVE_FUNCTION (svmmla, mmla, s_float, none)
 #undef REQUIRED_EXTENSIONS
 
-#define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_F64MM
+#define REQUIRED_EXTENSIONS nonstreaming_sve (AARCH64_FL_F64MM)
+DEF_SVE_FUNCTION (svld1ro, load_replicate, all_data, implicit)
+DEF_SVE_FUNCTION (svmmla, mmla, d_float, none)
 DEF_SVE_FUNCTION (svtrn1q, binary, all_data, none)
 DEF_SVE_FUNCTION (svtrn2q, binary, all_data, none)
 DEF_SVE_FUNCTION (svuzp1q, binary, all_data, none)
@@ -372,10 +371,3 @@ DEF_SVE_FUNCTION (svuzp2q, binary, all_data, none)
 DEF_SVE_FUNCTION (svzip1q, binary, all_data, none)
 DEF_SVE_FUNCTION (svzip2q, binary, all_data, none)
 #undef REQUIRED_EXTENSIONS
-
-#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \
-			     | AARCH64_FL_F64MM \
-			     | AARCH64_FL_SM_OFF)
-DEF_SVE_FUNCTION (svld1ro, load_replicate, all_data, implicit)
-DEF_SVE_FUNCTION (svmmla, mmla, d_float, none)
-#undef REQUIRED_EXTENSIONS
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-base.h b/gcc/config/aarch64/aarch64-sve-builtins-base.h
index 5bbf356..fa80fc1 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-base.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins-base.h
@@ -1,5 +1,5 @@
 /* ACLE support for AArch64 SVE (__ARM_FEATURE_SVE intrinsics)
-   Copyright (C) 2018-2024 Free Software Foundation, Inc.
+   Copyright (C) 2018-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -37,6 +37,8 @@ namespace aarch64_sve
     extern const function_base *const svadrd;
     extern const function_base *const svadrh;
     extern const function_base *const svadrw;
+    extern const function_base *const svamax;
+    extern const function_base *const svamin;
     extern const function_base *const svand;
     extern const function_base *const svandv;
     extern const function_base *const svasr;
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-functions.h b/gcc/config/aarch64/aarch64-sve-builtins-functions.h
index 7d06a57..6f1c694 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-functions.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins-functions.h
@@ -1,5 +1,5 @@
 /* ACLE support for AArch64 SVE (function_base classes)
-   Copyright (C) 2018-2024 Free Software Foundation, Inc.
+   Copyright (C) 2018-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -270,10 +270,12 @@ public:
   CONSTEXPR unspec_based_function_base (int unspec_for_sint,
 					int unspec_for_uint,
 					int unspec_for_fp,
+					int unspec_for_mfp8 = -1,
 					unsigned int suffix_index = 0)
     : m_unspec_for_sint (unspec_for_sint),
       m_unspec_for_uint (unspec_for_uint),
       m_unspec_for_fp (unspec_for_fp),
+      m_unspec_for_mfp8 (unspec_for_mfp8),
       m_suffix_index (suffix_index)
   {}
 
@@ -281,6 +283,9 @@ public:
   int
   unspec_for (const function_instance &instance) const
   {
+    if (instance.fpm_mode == FPM_set)
+      return m_unspec_for_mfp8;
+
     auto &suffix = instance.type_suffix (m_suffix_index);
     return (!suffix.integer_p ? m_unspec_for_fp
 	    : suffix.unsigned_p ? m_unspec_for_uint
@@ -292,6 +297,7 @@ public:
   int m_unspec_for_sint;
   int m_unspec_for_uint;
   int m_unspec_for_fp;
+  int m_unspec_for_mfp8;
 
   /* Which type suffix is used to choose between the unspecs.  */
   unsigned int m_suffix_index;
@@ -427,7 +433,7 @@ public:
 
   CONSTEXPR sme_1mode_function (int unspec_for_sint, int unspec_for_uint,
 				int unspec_for_fp)
-    : parent (unspec_for_sint, unspec_for_uint, unspec_for_fp, 1)
+    : parent (unspec_for_sint, unspec_for_uint, unspec_for_fp, -1, 1)
   {}
 
   rtx
@@ -443,7 +449,11 @@ public:
 };
 
 /* General SME unspec-based functions, parameterized on both the ZA mode
-   and the vector mode.  */
+   and the vector mode.  If the elements of the ZA and vector modes are
+   the same size (e.g. _za64_f64 or _za32_s32) then the two mode arguments
+   are equal, otherwise the first mode argument is the single-vector integer
+   mode associated with the ZA suffix and the second mode argument is the
+   tuple mode associated with the vector suffix.  */
 template<insn_code (*CODE) (int, machine_mode, machine_mode),
 	 insn_code (*CODE_SINGLE) (int, machine_mode, machine_mode)>
 class sme_2mode_function_t : public read_write_za<unspec_based_function_base>
@@ -453,18 +463,21 @@ public:
 
   CONSTEXPR sme_2mode_function_t (int unspec_for_sint, int unspec_for_uint,
 				  int unspec_for_fp)
-    : parent (unspec_for_sint, unspec_for_uint, unspec_for_fp, 1)
+    : parent (unspec_for_sint, unspec_for_uint, unspec_for_fp, -1, 1)
   {}
 
   rtx
   expand (function_expander &e) const override
   {
     insn_code icode;
+    machine_mode za_mode = e.vector_mode (0);
+    machine_mode v_mode = e.tuple_mode (1);
+    if (GET_MODE_UNIT_BITSIZE (za_mode) == GET_MODE_UNIT_BITSIZE (v_mode))
+      za_mode = v_mode;
     if (e.mode_suffix_id == MODE_single)
-      icode = CODE_SINGLE (unspec_for (e), e.vector_mode (0),
-			   e.tuple_mode (1));
+      icode = CODE_SINGLE (unspec_for (e), za_mode, v_mode);
     else
-      icode = CODE (unspec_for (e), e.vector_mode (0), e.tuple_mode (1));
+      icode = CODE (unspec_for (e), za_mode, v_mode);
     return e.use_exact_insn (icode);
   }
 };
@@ -489,7 +502,8 @@ public:
   {
     int unspec = unspec_for (e);
     insn_code icode;
-    if (e.type_suffix (m_suffix_index).float_p)
+    if (e.type_suffix (m_suffix_index).float_p
+	&& e.fpm_mode != FPM_set)
       {
 	/* Put the operands in the normal (fma ...) order, with the accumulator
 	   last.  This fits naturally since that's also the unprinted operand
@@ -519,7 +533,8 @@ public:
   {
     int unspec = unspec_for (e);
     insn_code icode;
-    if (e.type_suffix (m_suffix_index).float_p)
+    if (e.type_suffix (m_suffix_index).float_p
+	&& e.fpm_mode != FPM_set)
       {
 	/* Put the operands in the normal (fma ...) order, with the accumulator
 	   last.  This fits naturally since that's also the unprinted operand
@@ -600,7 +615,7 @@ public:
     tree perm_type = build_vector_type (ssizetype, nelts);
     return gimple_build_assign (f.lhs, VEC_PERM_EXPR,
 				gimple_call_arg (f.call, 0),
-				gimple_call_arg (f.call, nargs - 1),
+				gimple_call_arg (f.call, nargs == 1 ? 0 : 1),
 				vec_perm_indices_to_tree (perm_type, indices));
   }
 };
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
index f190770..af23a15 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.cc
@@ -1,5 +1,5 @@
 /* ACLE support for AArch64 SVE (function shapes)
-   Copyright (C) 2018-2024 Free Software Foundation, Inc.
+   Copyright (C) 2018-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -96,6 +96,7 @@ apply_predication (const function_instance &instance, tree return_type,
    B       - bfloat16_t
    c       - a predicate-as-counter
    h<elt>  - a half-sized version of <elt>
+   M       - mfloat8_t
    p       - a predicate (represented as TYPE_SUFFIX_b)
    q<elt>  - a quarter-sized version of <elt>
    s<bits> - a signed type with the given number of bits
@@ -140,6 +141,9 @@ parse_element_type (const function_instance &instance, const char *&format)
   if (ch == 'B')
     return TYPE_SUFFIX_bf16;
 
+  if (ch == 'M')
+    return TYPE_SUFFIX_mf8;
+
   if (ch == 'q')
     {
       type_suffix_index suffix = parse_element_type (instance, format);
@@ -268,14 +272,14 @@ parse_type (const function_instance &instance, const char *&format)
     {
       type_suffix_index suffix = parse_element_type (instance, format);
       int neon_index = type_suffixes[suffix].neon64_type;
-      return aarch64_simd_types[neon_index].itype;
+      return aarch64_simd_types_trees[neon_index].itype;
     }
 
   if (ch == 'Q')
     {
       type_suffix_index suffix = parse_element_type (instance, format);
       int neon_index = type_suffixes[suffix].neon128_type;
-      return aarch64_simd_types[neon_index].itype;
+      return aarch64_simd_types_trees[neon_index].itype;
     }
 
   gcc_unreachable ();
@@ -325,6 +329,8 @@ parse_signature (const function_instance &instance, const char *format,
 	argument_types.quick_push (argument_type);
     }
   gcc_assert (format[0] == 0);
+  if (instance.fpm_mode == FPM_set)
+    argument_types.quick_push (get_typenode_from_name (UINT64_TYPE));
   return return_type;
 }
 
@@ -349,8 +355,8 @@ build_one (function_builder &b, const char *signature,
   /* Byte forms of svdupq take 16 arguments.  */
   auto_vec<tree, 16> argument_types;
   function_instance instance (group.base_name, *group.base, *group.shape,
-			      mode_suffix_id, group.types[ti],
-			      group.groups[gi], group.preds[pi]);
+			      mode_suffix_id, group.types[ti], group.groups[gi],
+			      group.preds[pi], group.fpm_mode);
   tree return_type = parse_signature (instance, signature, argument_types);
   apply_predication (instance, return_type, argument_types);
   b.add_unique_function (instance, return_type, argument_types,
@@ -735,6 +741,23 @@ struct binary_za_slice_opt_single_base : public overloaded_base<1>
   }
 };
 
+/* Base class for ext and extq.  */
+struct ext_base : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const override
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,v0,su64", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    return r.resolve_uniform (2, 1);
+  }
+};
+
 /* Base class for inc_dec and inc_dec_pat.  */
 struct inc_dec_base : public overloaded_base<0>
 {
@@ -815,14 +838,38 @@ struct load_gather_sv_base : public overloaded_base<0>
     unsigned int i, nargs;
     mode_suffix_index mode;
     type_suffix_index type;
+    auto restrictions = get_target_type_restrictions (r);
     if (!r.check_gp_argument (2, i, nargs)
-	|| (type = r.infer_pointer_type (i, true)) == NUM_TYPE_SUFFIXES
+	|| (type = r.infer_pointer_type (i, true,
+					 restrictions)) == NUM_TYPE_SUFFIXES
 	|| (mode = r.resolve_sv_displacement (i + 1, type, true),
 	    mode == MODE_none))
       return error_mark_node;
 
     return r.resolve_to (mode, type);
   }
+
+  virtual function_resolver::target_type_restrictions
+  get_target_type_restrictions (const function_instance &) const
+  {
+    return function_resolver::TARGET_32_64;
+  }
+};
+
+/* Base class for load_gather64_sv_index and load_gather64_sv_offset.  */
+struct load_gather64_sv_base : public load_gather_sv_base
+{
+  type_suffix_index
+  vector_base_type (type_suffix_index) const override
+  {
+    return TYPE_SUFFIX_u64;
+  }
+
+  function_resolver::target_type_restrictions
+  get_target_type_restrictions (const function_instance &) const override
+  {
+    return function_resolver::TARGET_ANY;
+  }
 };
 
 /* Base class for load_ext_gather_index and load_ext_gather_offset,
@@ -856,6 +903,53 @@ struct load_ext_gather_base : public overloaded_base<1>
   }
 };
 
+
+/* sv<v0>_t svlut[_<t0>_g](sv<t0>x<g>_t, svuint8_t, uint64_t)
+
+   where the final argument is a constant index, the instruction divides
+   the vector argument in BITS-bit quantities.  */
+template<unsigned int BITS>
+struct luti_base : public overloaded_base<0>
+{
+  bool explicit_group_suffix_p () const override { return false; }
+
+  void
+  build (function_builder &b, const function_group_info &group) const override
+  {
+    /* Format: return type, table vector, indices vector, immediate value.  */
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,t0,vu8,su64", group, MODE_none);
+  }
+
+  bool
+  check (function_checker &c) const override
+  {
+    auto max_range = c.type_suffix (0).element_bits / BITS - 1;
+    return c.require_immediate_range (2, 0, max_range);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    sve_type type;
+    if (!r.check_num_arguments (3)
+	|| !(type = r.infer_sve_type (0))
+	|| !r.require_vector_type (1, VECTOR_TYPE_svuint8_t)
+	|| !r.require_scalar_type (2, "uint64_t"))
+      return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, type);
+  }
+};
+
+/* Specializations for 2-bit and 4-bit indices.  */
+using luti2_def = luti_base<2>;
+SHAPE (luti2)
+
+using luti4_def = luti_base<4>;
+SHAPE (luti4)
+
+
 /* sv<t0>x<g>_t svfoo_t0_g(uint64_t, svuint8_t, uint64_t)
 
    where the first argument is the ZT register number (currently always 0)
@@ -891,16 +985,10 @@ struct mmla_def : public overloaded_base<0>
   build (function_builder &b, const function_group_info &group) const override
   {
     b.add_overloaded_functions (group, MODE_none);
-    /* svmmla is distributed over several extensions.  Allow the common
-       denominator to define the overloaded svmmla function without
-       defining any specific versions.  */
-    if (group.types[0][0] != NUM_TYPE_SUFFIXES)
-      {
-	if (type_suffixes[group.types[0][0]].float_p)
-	  build_all (b, "v0,v0,v0,v0", group, MODE_none);
-	else
-	  build_all (b, "v0,v0,vq0,vq0", group, MODE_none);
-      }
+    if (type_suffixes[group.types[0][0]].float_p)
+      build_all (b, "v0,v0,v0,v0", group, MODE_none);
+    else
+      build_all (b, "v0,v0,vq0,vq0", group, MODE_none);
   }
 
   tree
@@ -994,12 +1082,34 @@ struct store_scatter_base : public overloaded_base<0>
     mode_suffix_index mode;
     type_suffix_index type;
     if (!r.check_gp_argument (has_displacement_p ? 3 : 2, i, nargs)
-	|| (type = r.infer_sd_vector_type (nargs - 1)) == NUM_TYPE_SUFFIXES
+	|| (type = infer_vector_type (r, nargs - 1)) == NUM_TYPE_SUFFIXES
 	|| (mode = r.resolve_gather_address (i, type, false)) == MODE_none)
       return error_mark_node;
 
     return r.resolve_to (mode, type);
   }
+
+  virtual type_suffix_index
+  infer_vector_type (function_resolver &r, unsigned int argno) const
+  {
+    return r.infer_sd_vector_type (argno);
+  }
+};
+
+/* Base class for store_scatter64_index and store_scatter64_offset.  */
+struct store_scatter64_base : public store_scatter_base
+{
+  type_suffix_index
+  vector_base_type (type_suffix_index) const override
+  {
+    return TYPE_SUFFIX_u64;
+  }
+
+  type_suffix_index
+  infer_vector_type (function_resolver &r, unsigned int argno) const override
+  {
+    return r.infer_vector_type (argno);
+  }
 };
 
 /* Base class for ternary operations in which the final argument is an
@@ -2399,29 +2509,31 @@ SHAPE (dupq)
 
    where the final argument is an integer constant expression that when
    multiplied by the number of bytes in t0 is in the range [0, 255].  */
-struct ext_def : public overloaded_base<0>
+struct ext_def : public ext_base
 {
-  void
-  build (function_builder &b, const function_group_info &group) const override
+  bool
+  check (function_checker &c) const override
   {
-    b.add_overloaded_functions (group, MODE_none);
-    build_all (b, "v0,v0,v0,su64", group, MODE_none);
+    unsigned int bytes = c.type_suffix (0).element_bytes;
+    return c.require_immediate_range (2, 0, 256 / bytes - 1);
   }
+};
+SHAPE (ext)
 
-  tree
-  resolve (function_resolver &r) const override
-  {
-    return r.resolve_uniform (2, 1);
-  }
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0>_t, uint64_t)
 
+   where the final argument is an integer constant expression that when
+   multiplied by the number of bytes in t0 is in the range [0, 15].  */
+struct extq_def : public ext_base
+{
   bool
   check (function_checker &c) const override
   {
     unsigned int bytes = c.type_suffix (0).element_bytes;
-    return c.require_immediate_range (2, 0, 256 / bytes - 1);
+    return c.require_immediate_range (2, 0, 16 / bytes - 1);
   }
 };
-SHAPE (ext)
+SHAPE (extq)
 
 /* svboolx<g>_t svfoo_t0_g(sv<t0>_t, sv<t0>_t, uint32_t).  */
 struct extract_pred_def : public nonoverloaded_base
@@ -2706,6 +2818,17 @@ struct inherent_za_def : public nonoverloaded_base
 };
 SHAPE (inherent_za)
 
+/* void svfoo_t0(uint64_t).  */
+struct inherent_za_slice_def : public nonoverloaded_base
+{
+  void
+  build (function_builder &b, const function_group_info &group) const override
+  {
+    build_all (b, "_,su32", group, MODE_none);
+  }
+};
+SHAPE (inherent_za_slice)
+
 /* void svfoo_zt(uint64_t)
 
    where the argument must be zero.  */
@@ -2974,6 +3097,75 @@ struct load_gather_vs_def : public overloaded_base<1>
 };
 SHAPE (load_gather_vs)
 
+/* sv<t0>_t svfoo_[s64]index[_t0](const <t0>_t *, svint64_t)
+   sv<t0>_t svfoo_[u64]index[_t0](const <t0>_t *, svuint64_t).  */
+struct load_gather64_sv_index_def : public load_gather64_sv_base
+{
+  void
+  build (function_builder &b, const function_group_info &group) const override
+  {
+    b.add_overloaded_functions (group, MODE_index);
+    build_all (b, "t0,al,d", group, MODE_s64index);
+    build_all (b, "t0,al,d", group, MODE_u64index);
+  }
+};
+SHAPE (load_gather64_sv_index)
+
+/* sv<t0>_t svfoo_[s64]offset[_t0](const <t0>_t *, svint64_t)
+   sv<t0>_t svfoo_[u64]offset[_t0](const <t0>_t *, svuint64_t).  */
+struct load_gather64_sv_offset_def : public load_gather64_sv_base
+{
+  void
+  build (function_builder &b, const function_group_info &group) const override
+  {
+    b.add_overloaded_functions (group, MODE_offset);
+    build_all (b, "t0,al,d", group, MODE_s64offset);
+    build_all (b, "t0,al,d", group, MODE_u64offset);
+  }
+};
+SHAPE (load_gather64_sv_offset)
+
+/* sv<t0>_t svfoo[_u64base]_index_t0(svuint64_t, int64_t).  */
+struct load_gather64_vs_index_def : public nonoverloaded_base
+{
+  void
+  build (function_builder &b, const function_group_info &group) const override
+  {
+    build_all (b, "t0,b,ss64", group, MODE_u64base_index, true);
+  }
+
+  tree
+  resolve (function_resolver &) const override
+  {
+    /* The short name just makes the base vector mode implicit;
+       no resolution is needed.  */
+    gcc_unreachable ();
+  }
+};
+SHAPE (load_gather64_vs_index)
+
+/* sv<t0>_t svfoo[_u64base]_t0(svuint64_t)
+
+   sv<t0>_t svfoo[_u64base]_offset_t0(svuint64_t, int64_t).  */
+struct load_gather64_vs_offset_def : public nonoverloaded_base
+{
+  void
+  build (function_builder &b, const function_group_info &group) const override
+  {
+    build_all (b, "t0,b", group, MODE_u64base, true);
+    build_all (b, "t0,b,ss64", group, MODE_u64base_offset, true);
+  }
+
+  tree
+  resolve (function_resolver &) const override
+  {
+    /* The short name just makes the base vector mode implicit;
+       no resolution is needed.  */
+    gcc_unreachable ();
+  }
+};
+SHAPE (load_gather64_vs_offset)
+
 /* sv<t0>_t svfoo[_t0](const <t0>_t *)
 
    The only difference from "load" is that this shape has no vnum form.  */
@@ -3026,6 +3218,92 @@ struct pattern_pred_def : public nonoverloaded_base
 };
 SHAPE (pattern_pred)
 
+/* svbool_t svfoo[_t0](sv<t0>_t).  */
+struct pmov_from_vector_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const override
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "vp,v0", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    return r.resolve_uniform (1);
+  }
+};
+SHAPE (pmov_from_vector)
+
+/* svbool_t svfoo[_t0](sv<t0>_t, uint64_t)
+
+   where the final argument is an integer constant expression in the
+   range [0, sizeof (<t0>_t) - 1].  */
+struct pmov_from_vector_lane_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const override
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "vp,v0,su64", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    return r.resolve_uniform (1, 1);
+  }
+
+  bool
+  check (function_checker &c) const override
+  {
+    unsigned int bytes = c.type_suffix (0).element_bytes;
+    return c.require_immediate_range (1, 0, bytes - 1);
+  }
+};
+SHAPE (pmov_from_vector_lane)
+
+/* sv<t0>_t svfoo_t0(uint64_t)
+
+   where the final argument is an integer constant expression in the
+   range [1, sizeof (<t0>_t) - 1].  */
+struct pmov_to_vector_lane_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const override
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,su64", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    type_suffix_index type;
+    gcc_assert (r.pred == PRED_m);
+    if (!r.check_num_arguments (3)
+	|| (type = r.infer_vector_type (0)) == NUM_TYPE_SUFFIXES
+	|| !r.require_vector_type (1, VECTOR_TYPE_svbool_t)
+	|| !r.require_integer_immediate (2))
+      return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, type);
+  }
+
+  bool
+  check (function_checker &c) const override
+  {
+    unsigned int bytes = c.type_suffix (0).element_bytes;
+    /* 1 to account for the vector argument.
+
+       ??? This should probably be folded into function_checker::m_base_arg,
+       but it doesn't currently have the necessary information.  */
+    return c.require_immediate_range (1, 1, bytes - 1);
+  }
+};
+SHAPE (pmov_to_vector_lane)
+
 /* void svfoo(const void *, svprfop)
    void svfoo_vnum(const void *, int64_t, svprfop).  */
 struct prefetch_def : public nonoverloaded_base
@@ -3197,6 +3475,24 @@ struct reduction_def : public overloaded_base<0>
 };
 SHAPE (reduction)
 
+/* <t0>xN_t svfoo[_t0](sv<t0>_t).  */
+struct reduction_neonq_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const override
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "Q0,v0", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    return r.resolve_uniform (1);
+  }
+};
+SHAPE (reduction_neonq)
+
 /* int64_t svfoo[_t0](sv<t0>_t)  (for signed t0)
    uint64_t svfoo[_t0](sv<t0>_t)  (for unsigned t0)
    <t0>_t svfoo[_t0](sv<t0>_t)  (for floating-point t0)
@@ -3594,6 +3890,44 @@ struct store_scatter_offset_restricted_def : public store_scatter_base
 };
 SHAPE (store_scatter_offset_restricted)
 
+/* void svfoo_[s64]index[_t0](<t0>_t *, svint64_t, sv<t0>_t)
+   void svfoo_[u64]index[_t0](<t0>_t *, svuint64_t, sv<t0>_t)
+
+   void svfoo[_u64base]_index[_t0](svuint64_t, int64_t, sv<t0>_t).  */
+struct store_scatter64_index_def : public store_scatter64_base
+{
+  void
+  build (function_builder &b, const function_group_info &group) const override
+  {
+    b.add_overloaded_functions (group, MODE_index);
+    build_all (b, "_,as,d,t0", group, MODE_s64index);
+    build_all (b, "_,as,d,t0", group, MODE_u64index);
+    build_all (b, "_,b,ss64,t0", group, MODE_u64base_index);
+  }
+};
+SHAPE (store_scatter64_index)
+
+/* void svfoo_[s64]offset[_t0](<t0>_t *, svint64_t, sv<t0>_t)
+   void svfoo_[u64]offset[_t0](<t0>_t *, svuint64_t, sv<t0>_t)
+
+   void svfoo[_u64base_t0](svuint64_t, sv<t0>_t)
+
+   void svfoo[_u64base]_offset[_t0](svuint64_t, int64_t, sv<t0>_t).  */
+struct store_scatter64_offset_def : public store_scatter64_base
+{
+  void
+  build (function_builder &b, const function_group_info &group) const override
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    b.add_overloaded_functions (group, MODE_offset);
+    build_all (b, "_,as,d,t0", group, MODE_s64offset);
+    build_all (b, "_,as,d,t0", group, MODE_u64offset);
+    build_all (b, "_,b,t0", group, MODE_u64base);
+    build_all (b, "_,b,ss64,t0", group, MODE_u64base_offset);
+  }
+};
+SHAPE (store_scatter64_offset)
+
 /* void svfoo_t0(uint64_t, uint32_t, svbool_t, void *)
    void svfoo_vnum_t0(uint64_t, uint32_t, svbool_t, void *, int64_t)
 
@@ -3718,6 +4052,34 @@ struct ternary_bfloat_def
 };
 SHAPE (ternary_bfloat)
 
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, svmfloat8_t, svmfloat8_t).  */
+struct ternary_mfloat8_def
+    : public ternary_resize2_base<8, TYPE_mfloat, TYPE_mfloat>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const override
+  {
+    gcc_assert (group.fpm_mode == FPM_set);
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,vM,vM", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    type_suffix_index type;
+    if (!r.check_num_arguments (4)
+	|| (type = r.infer_vector_type (0)) == NUM_TYPE_SUFFIXES
+	|| !r.require_vector_type (1, VECTOR_TYPE_svmfloat8_t)
+	|| !r.require_vector_type (2, VECTOR_TYPE_svmfloat8_t)
+	|| !r.require_scalar_type (3, "uint64_t"))
+      return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, type, TYPE_SUFFIX_mf8, GROUP_none);
+  }
+};
+SHAPE (ternary_mfloat8)
+
 /* sv<t0>_t svfoo[_t0](sv<t0>_t, svbfloat16_t, svbfloat16_t, uint64_t)
 
    where the final argument is an integer constant expression in the range
@@ -3732,6 +4094,64 @@ SHAPE (ternary_bfloat_lane)
 typedef ternary_bfloat_lane_base<2> ternary_bfloat_lanex2_def;
 SHAPE (ternary_bfloat_lanex2)
 
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, svmfloat8_t, svmfloat8_t, uint64_t)
+
+   where the final argument is an integer constant expression in the range
+   [0, 15].  */
+struct ternary_mfloat8_lane_def
+    : public ternary_resize2_lane_base<8, TYPE_mfloat, TYPE_mfloat>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const override
+  {
+    gcc_assert (group.fpm_mode == FPM_set);
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,vM,vM,su64", group, MODE_none);
+  }
+
+  bool
+  check (function_checker &c) const override
+  {
+    return c.require_immediate_lane_index (3, 2, 1);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    type_suffix_index type;
+    if (!r.check_num_arguments (5)
+	|| (type = r.infer_vector_type (0)) == NUM_TYPE_SUFFIXES
+	|| !r.require_vector_type (1, VECTOR_TYPE_svmfloat8_t)
+	|| !r.require_vector_type (2, VECTOR_TYPE_svmfloat8_t)
+	|| !r.require_integer_immediate (3)
+	|| !r.require_scalar_type (4, "uint64_t"))
+      return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, type, TYPE_SUFFIX_mf8, GROUP_none);
+  }
+};
+SHAPE (ternary_mfloat8_lane)
+
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, svmfloat8_t, svmfloat8_t, uint64_t)
+
+   where the final argument is an integer constant expression in the range
+   [0, 7] or [0, 3].  */
+struct ternary_mfloat8_lane_group_selection_def
+    : public ternary_mfloat8_lane_def
+{
+  bool
+  check (function_checker &c) const override
+  {
+    machine_mode mode = c.vector_mode (0);
+    if (mode == E_VNx8HFmode)
+      return c.require_immediate_lane_index (3, 2, 2);
+    else if (mode == E_VNx4SFmode)
+      return c.require_immediate_lane_index (3, 2, 4);
+    gcc_unreachable ();
+  }
+};
+SHAPE (ternary_mfloat8_lane_group_selection)
+
 /* sv<t0>_t svfoo[_t0](sv<t0>_t, svbfloatt16_t, svbfloat16_t)
    sv<t0>_t svfoo[_n_t0](sv<t0>_t, svbfloat16_t, bfloat16_t).  */
 struct ternary_bfloat_opt_n_def
@@ -3747,6 +4167,42 @@ struct ternary_bfloat_opt_n_def
 };
 SHAPE (ternary_bfloat_opt_n)
 
+/* sv<t0>_t svfoo[_t0](sv<t0>_t, svmfloatt8_t, svmfloat8_t)
+   sv<t0>_t svfoo[_n_t0](sv<t0>_t, svmfloat8_t, bfloat8_t).  */
+struct ternary_mfloat8_opt_n_def
+    : public ternary_resize2_opt_n_base<8, TYPE_mfloat, TYPE_mfloat>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const override
+  {
+    gcc_assert (group.fpm_mode == FPM_set);
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,vM,vM", group, MODE_none);
+    build_all (b, "v0,v0,vM,sM", group, MODE_n);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    type_suffix_index type;
+    if (!r.check_num_arguments (4)
+	|| (type = r.infer_vector_type (0)) == NUM_TYPE_SUFFIXES
+	|| !r.require_vector_type (1, VECTOR_TYPE_svmfloat8_t)
+	|| !r.require_vector_or_scalar_type (2)
+	|| !r.require_scalar_type (3, "uint64_t"))
+      return error_mark_node;
+
+    auto mode = r.mode_suffix_id;
+    if (r.scalar_argument_p (2))
+      mode = MODE_n;
+    else if (!r.require_vector_type (2, VECTOR_TYPE_svmfloat8_t))
+      return error_mark_node;
+
+    return r.resolve_to (mode, type, TYPE_SUFFIX_mf8, GROUP_none);
+  }
+};
+SHAPE (ternary_mfloat8_opt_n)
+
 /* sv<t0>_t svfoo[_t0](sv<t0>_t, sv<t0:int:quarter>_t, sv<t0:uint:quarter>_t,
 		       uint64_t)
 
@@ -4315,6 +4771,46 @@ struct unary_convert_narrowt_def : public overloaded_base<1>
 };
 SHAPE (unary_convert_narrowt)
 
+/* sv<t0>_t svfoo_t0[_t1_g](sv<t0>_t, sv<t1>x<g_t, fpm_t)
+
+   Similar to unary_convert_narrowt but for tuple arguments with support for
+   modal floating point.  */
+struct unary_convertxn_narrowt_def : public overloaded_base<1>
+{
+  bool
+  explicit_group_suffix_p () const override
+  {
+    return false;
+  }
+
+  bool
+  has_merge_argument_p (const function_instance &, unsigned int) const override
+  {
+    return true;
+  }
+
+  void
+  build (function_builder &b, const function_group_info &group) const override
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,t1", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    gcc_assert(r.fpm_mode == FPM_set);
+    sve_type type;
+    if (!r.check_num_arguments (3)
+        || !(type = r.infer_sve_type (1))
+	|| !r.require_scalar_type (2, "uint64_t"))
+      return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, type);
+  }
+};
+SHAPE (unary_convertxn_narrowt)
+
 /* sv<t0>x<g0>_t svfoo_t0[_t1_g](sv<t1>x<g1>_t)
 
    where the target type <t0> must be specified explicitly but the
@@ -4347,6 +4843,69 @@ struct unary_convertxn_def : public unary_convert_def
 };
 SHAPE (unary_convertxn)
 
+/* sv<t0>_t svfoo_t0[_t1_g](sv<t1>x<g1>_t)
+
+   where the target type <t0> must be specified explicitly but the
+   source type <t1> can be inferred.
+
+   Functions with a group suffix are unpredicated. */
+struct unary_convertxn_narrow_def : public unary_convert_def
+{
+  bool
+  explicit_group_suffix_p () const override
+  {
+    return false;
+  }
+
+  void
+  build (function_builder &b, const function_group_info &group) const override
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,t1", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    gcc_assert(r.fpm_mode == FPM_set);
+    sve_type type;
+    if (!r.check_num_arguments (2)
+        || !(type = r.infer_sve_type (0))
+	|| !r.require_scalar_type (1, "uint64_t"))
+      return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, type);
+  }
+};
+SHAPE (unary_convertxn_narrow)
+
+/* sv<t0>_t svfoo_<t0>(sv<t0>_t, uint64_t)
+
+   where the final argument is an integer constant expression in the
+   range [0, 16 / sizeof (<t0>_t) - 1].  */
+struct unary_lane_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group) const override
+  {
+    b.add_overloaded_functions (group, MODE_none);
+    build_all (b, "v0,v0,su64", group, MODE_none);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    return r.resolve_uniform (1, 1);
+  }
+
+  bool
+  check (function_checker &c) const override
+  {
+    return c.require_immediate_lane_index (1, 0);
+  }
+};
+SHAPE (unary_lane)
+
 /* sv<t0>_t svfoo[_t0](sv<t0:half>_t).  */
 struct unary_long_def : public overloaded_base<0>
 {
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h
index ea87240..349eae6e 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-shapes.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins-shapes.h
@@ -1,5 +1,5 @@
 /* ACLE support for AArch64 SVE (function shapes)
-   Copyright (C) 2018-2024 Free Software Foundation, Inc.
+   Copyright (C) 2018-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -71,7 +71,11 @@ namespace aarch64_sve
        scalar displacement".
 
      - "_pred" indicates that the function takes an svbool_t argument
-       that does not act as a governing predicate..  */
+       that does not act as a governing predicate..
+
+     - "_group_selection" indicates that the function takes an imm integer
+       argument that selects a specific group of elements that fit a 128 bit
+       vector. */
   namespace shapes
   {
     extern const function_shape *const adr_index;
@@ -128,6 +132,7 @@ namespace aarch64_sve
     extern const function_shape *const dupq;
     extern const function_shape *const dup_neonq;
     extern const function_shape *const ext;
+    extern const function_shape *const extq;
     extern const function_shape *const extract_pred;
     extern const function_shape *const fold_left;
     extern const function_shape *const get;
@@ -139,6 +144,7 @@ namespace aarch64_sve
     extern const function_shape *const inherent;
     extern const function_shape *const inherent_b;
     extern const function_shape *const inherent_za;
+    extern const function_shape *const inherent_za_slice;
     extern const function_shape *const inherent_zt;
     extern const function_shape *const inherent_mask_za;
     extern const function_shape *const ldr_zt;
@@ -152,12 +158,21 @@ namespace aarch64_sve
     extern const function_shape *const load_gather_sv;
     extern const function_shape *const load_gather_sv_restricted;
     extern const function_shape *const load_gather_vs;
+    extern const function_shape *const load_gather64_sv_index;
+    extern const function_shape *const load_gather64_sv_offset;
+    extern const function_shape *const load_gather64_vs_index;
+    extern const function_shape *const load_gather64_vs_offset;
     extern const function_shape *const load_replicate;
     extern const function_shape *const load_za;
+    extern const function_shape *const luti2;
+    extern const function_shape *const luti4;
     extern const function_shape *const luti2_lane_zt;
     extern const function_shape *const luti4_lane_zt;
     extern const function_shape *const mmla;
     extern const function_shape *const pattern_pred;
+    extern const function_shape *const pmov_from_vector;
+    extern const function_shape *const pmov_from_vector_lane;
+    extern const function_shape *const pmov_to_vector_lane;
     extern const function_shape *const prefetch;
     extern const function_shape *const prefetch_gather_index;
     extern const function_shape *const prefetch_gather_offset;
@@ -167,6 +182,7 @@ namespace aarch64_sve
     extern const function_shape *const read_za_m;
     extern const function_shape *const read_za_slice;
     extern const function_shape *const reduction;
+    extern const function_shape *const reduction_neonq;
     extern const function_shape *const reduction_wide;
     extern const function_shape *const reinterpret;
     extern const function_shape *const select_pred;
@@ -186,6 +202,8 @@ namespace aarch64_sve
     extern const function_shape *const store_scatter_index_restricted;
     extern const function_shape *const store_scatter_offset;
     extern const function_shape *const store_scatter_offset_restricted;
+    extern const function_shape *const store_scatter64_index;
+    extern const function_shape *const store_scatter64_offset;
     extern const function_shape *const store_za;
     extern const function_shape *const storexn;
     extern const function_shape *const str_za;
@@ -201,6 +219,10 @@ namespace aarch64_sve
     extern const function_shape *const ternary_lane_rotate;
     extern const function_shape *const ternary_long_lane;
     extern const function_shape *const ternary_long_opt_n;
+    extern const function_shape *const ternary_mfloat8;
+    extern const function_shape *const ternary_mfloat8_lane;
+    extern const function_shape *const ternary_mfloat8_lane_group_selection;
+    extern const function_shape *const ternary_mfloat8_opt_n;
     extern const function_shape *const ternary_opt_n;
     extern const function_shape *const ternary_qq_or_011_lane;
     extern const function_shape *const ternary_qq_lane_rotate;
@@ -217,7 +239,10 @@ namespace aarch64_sve
     extern const function_shape *const unary;
     extern const function_shape *const unary_convert;
     extern const function_shape *const unary_convert_narrowt;
+    extern const function_shape *const unary_convertxn_narrowt;
     extern const function_shape *const unary_convertxn;
+    extern const function_shape *const unary_convertxn_narrow;
+    extern const function_shape *const unary_lane;
     extern const function_shape *const unary_long;
     extern const function_shape *const unary_n;
     extern const function_shape *const unary_narrowb;
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sme.cc b/gcc/config/aarch64/aarch64-sve-builtins-sme.cc
index b66b35a..e0f0f66 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sme.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sme.cc
@@ -1,5 +1,5 @@
 /* ACLE support for AArch64 SME.
-   Copyright (C) 2023-2024 Free Software Foundation, Inc.
+   Copyright (C) 2023-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -352,27 +352,31 @@ public:
   unsigned int m_bits;
 };
 
-class svread_za_impl : public function_base
+template<insn_code (*CODE) (machine_mode)>
+class svread_za_slice_base : public function_base
 {
 public:
-  unsigned int
-  call_properties (const function_instance &) const override
-  {
-    return CP_READ_ZA;
-  }
-
   rtx
   expand (function_expander &e) const override
   {
     machine_mode mode = e.vectors_per_tuple () == 4 ? VNx8DImode : VNx4DImode;
-    rtx res = e.use_exact_insn (code_for_aarch64_sme_read (mode));
+    rtx res = e.use_exact_insn (CODE (mode));
     return aarch64_sve_reinterpret (e.result_mode (), res);
   }
 };
 
+using svread_za_impl = add_call_properties
+  <svread_za_slice_base<code_for_aarch64_sme_read>, CP_READ_ZA>;
+
 using svread_za_tile_impl = add_call_properties<read_write_za_base,
 						CP_READ_ZA>;
 
+using svreadz_za_impl = add_call_properties
+  <svread_za_slice_base<code_for_aarch64_sme_readz>, CP_READ_ZA | CP_WRITE_ZA>;
+
+using svreadz_za_tile_impl = add_call_properties<read_write_za_base,
+						 CP_READ_ZA | CP_WRITE_ZA>;
+
 class svst1_za_impl : public store_za_base
 {
 public:
@@ -476,12 +480,48 @@ public:
   }
 };
 
+/* Return the mode iterator value that is used to represent a zeroing
+   of the ZA vectors described by GROUP.  */
+static machine_mode
+zero_slices_mode (group_suffix_index group)
+{
+  switch (group)
+    {
+    case GROUP_vg1x2:
+      return VNx8SImode;
+    case GROUP_vg1x4:
+      return VNx16SImode;
+
+    case GROUP_vg2x1:
+      return VNx8HImode;
+    case GROUP_vg2x2:
+      return VNx16HImode;
+    case GROUP_vg2x4:
+      return VNx32HImode;
+
+    case GROUP_vg4x1:
+      return VNx16QImode;
+    case GROUP_vg4x2:
+      return VNx32QImode;
+    case GROUP_vg4x4:
+      return VNx64QImode;
+
+    default:
+      gcc_unreachable ();
+    }
+}
+
 class svzero_za_impl : public write_za<function_base>
 {
 public:
   rtx
-  expand (function_expander &) const override
+  expand (function_expander &e) const override
   {
+    if (e.args.length () == 1)
+      {
+	auto mode = zero_slices_mode (e.group_suffix_id);
+	return e.use_exact_insn (code_for_aarch64_sme_zero_za_slices (mode));
+      }
     emit_insn (gen_aarch64_sme_zero_za (gen_int_mode (0xff, SImode)));
     return const0_rtx;
   }
@@ -546,6 +586,9 @@ FUNCTION (svmops_za, sme_2mode_function, (UNSPEC_SME_SMOPS, UNSPEC_SME_UMOPS,
 FUNCTION (svread_za, svread_za_impl,)
 FUNCTION (svread_hor_za, svread_za_tile_impl, (UNSPEC_SME_READ_HOR))
 FUNCTION (svread_ver_za, svread_za_tile_impl, (UNSPEC_SME_READ_VER))
+FUNCTION (svreadz_za, svreadz_za_impl,)
+FUNCTION (svreadz_hor_za, svreadz_za_tile_impl, (UNSPEC_SME_READZ_HOR))
+FUNCTION (svreadz_ver_za, svreadz_za_tile_impl, (UNSPEC_SME_READZ_VER))
 FUNCTION (svst1_hor_za, svst1_za_impl, (UNSPEC_SME_ST1_HOR))
 FUNCTION (svst1_ver_za, svst1_za_impl, (UNSPEC_SME_ST1_VER))
 FUNCTION (svstr_za, svstr_za_impl, )
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sme.def b/gcc/config/aarch64/aarch64-sve-builtins-sme.def
index 416df0b..f75c0a5 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sme.def
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sme.def
@@ -1,5 +1,5 @@
 /* ACLE support for AArch64 SME.
-   Copyright (C) 2023-2024 Free Software Foundation, Inc.
+   Copyright (C) 2023-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -32,12 +32,12 @@
   DEF_SME_ZA_FUNCTION_GS (NAME, SHAPE, TYPES, none, PREDS)
 #endif
 
-#define REQUIRED_EXTENSIONS 0
+#define REQUIRED_EXTENSIONS streaming_compatible (0)
 DEF_SME_FUNCTION (arm_has_sme, bool_inherent, none, none)
 DEF_SME_FUNCTION (arm_in_streaming_mode, bool_inherent, none, none)
 #undef REQUIRED_EXTENSIONS
 
-#define REQUIRED_EXTENSIONS AARCH64_FL_SME
+#define REQUIRED_EXTENSIONS streaming_compatible (AARCH64_FL_SME)
 DEF_SME_FUNCTION (svcntsb, count_inherent, none, none)
 DEF_SME_FUNCTION (svcntsd, count_inherent, none, none)
 DEF_SME_FUNCTION (svcntsh, count_inherent, none, none)
@@ -49,7 +49,7 @@ DEF_SME_ZA_FUNCTION (svzero, inherent_za, za, none)
 DEF_SME_ZA_FUNCTION (svzero_mask, inherent_mask_za, za, none)
 #undef REQUIRED_EXTENSIONS
 
-#define REQUIRED_EXTENSIONS AARCH64_FL_SME | AARCH64_FL_SM_ON
+#define REQUIRED_EXTENSIONS streaming_only (0)
 DEF_SME_ZA_FUNCTION (svaddha, unary_za_m, za_s_integer, za_m)
 DEF_SME_ZA_FUNCTION (svaddva, unary_za_m, za_s_integer, za_m)
 DEF_SME_ZA_FUNCTION (svld1_hor, load_za, all_za, none)
@@ -70,9 +70,7 @@ DEF_SME_ZA_FUNCTION (svwrite_hor, write_za_m, za_all_data, za_m)
 DEF_SME_ZA_FUNCTION (svwrite_ver, write_za_m, za_all_data, za_m)
 #undef REQUIRED_EXTENSIONS
 
-#define REQUIRED_EXTENSIONS (AARCH64_FL_SME \
-			     | AARCH64_FL_SME_I16I64 \
-			     | AARCH64_FL_SM_ON)
+#define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME_I16I64)
 DEF_SME_ZA_FUNCTION (svaddha, unary_za_m, za_d_integer, za_m)
 DEF_SME_ZA_FUNCTION (svaddva, unary_za_m, za_d_integer, za_m)
 DEF_SME_ZA_FUNCTION (svmopa, binary_za_m, mop_i16i64, za_m)
@@ -83,14 +81,12 @@ DEF_SME_ZA_FUNCTION (svusmopa, binary_za_int_m, mop_i16i64_unsigned, za_m)
 DEF_SME_ZA_FUNCTION (svusmops, binary_za_int_m, mop_i16i64_unsigned, za_m)
 #undef REQUIRED_EXTENSIONS
 
-#define REQUIRED_EXTENSIONS (AARCH64_FL_SME \
-			     | AARCH64_FL_SME_F64F64 \
-			     | AARCH64_FL_SM_ON)
+#define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME_F64F64)
 DEF_SME_ZA_FUNCTION (svmopa, binary_za_m, za_d_float, za_m)
 DEF_SME_ZA_FUNCTION (svmops, binary_za_m, za_d_float, za_m)
 #undef REQUIRED_EXTENSIONS
 
-#define REQUIRED_EXTENSIONS AARCH64_FL_SME | AARCH64_FL_SME2
+#define REQUIRED_EXTENSIONS streaming_compatible (AARCH64_FL_SME2)
 DEF_SME_FUNCTION (svldr_zt, ldr_zt, none, none)
 DEF_SME_FUNCTION (svstr_zt, str_zt, none, none)
 DEF_SME_FUNCTION (svzero_zt, inherent_zt, none, none)
@@ -100,7 +96,7 @@ DEF_SME_FUNCTION (svzero_zt, inherent_zt, none, none)
    which will then be resolved to either an integer function or a
    floating-point function.  They are needed because the integer and
    floating-point functions have different architecture requirements.  */
-#define REQUIRED_EXTENSIONS AARCH64_FL_SME | AARCH64_FL_SME2 | AARCH64_FL_SM_ON
+#define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME2)
 DEF_SME_ZA_FUNCTION_GS (svadd, unary_za_slice, za_s_data, vg1x24, none)
 DEF_SME_ZA_FUNCTION_GS (svadd, unary_za_slice, d_za, vg1x24, none)
 DEF_SME_ZA_FUNCTION_GS (svadd_write, binary_za_slice_opt_single, za_s_integer,
@@ -172,10 +168,8 @@ DEF_SME_ZA_FUNCTION_GS (svwrite_hor, write_za, za_bhsd_data, vg24, none)
 DEF_SME_ZA_FUNCTION_GS (svwrite_ver, write_za, za_bhsd_data, vg24, none)
 #undef REQUIRED_EXTENSIONS
 
-#define REQUIRED_EXTENSIONS (AARCH64_FL_SME \
-			     | AARCH64_FL_SME2 \
-			     | AARCH64_FL_SME_I16I64 \
-			     | AARCH64_FL_SM_ON)
+#define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME2 \
+					    | AARCH64_FL_SME_I16I64)
 DEF_SME_ZA_FUNCTION_GS (svadd, unary_za_slice, za_d_integer, vg1x24, none)
 DEF_SME_ZA_FUNCTION_GS (svadd_write, binary_za_slice_opt_single, za_d_integer,
 			vg1x24, none)
@@ -198,10 +192,8 @@ DEF_SME_ZA_FUNCTION_GS (svvdot_lane, dot_za_slice_lane, za_d_h_integer,
 			vg1x4, none)
 #undef REQUIRED_EXTENSIONS
 
-#define REQUIRED_EXTENSIONS (AARCH64_FL_SME \
-			     | AARCH64_FL_SME2 \
-			     | AARCH64_FL_SME_F64F64 \
-			     | AARCH64_FL_SM_ON)
+#define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME2 \
+					    | AARCH64_FL_SME_F64F64)
 DEF_SME_ZA_FUNCTION_GS (svadd, unary_za_slice, za_d_float, vg1x24, none)
 DEF_SME_ZA_FUNCTION_GS (svmla, binary_za_slice_opt_single, za_d_float,
 			vg1x24, none)
@@ -214,6 +206,47 @@ DEF_SME_ZA_FUNCTION_GS (svmls_lane, binary_za_slice_lane, za_d_float,
 DEF_SME_ZA_FUNCTION_GS (svsub, unary_za_slice, za_d_float, vg1x24, none)
 #undef REQUIRED_EXTENSIONS
 
+#define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME_F16F16)
+DEF_SME_ZA_FUNCTION_GS (svadd, unary_za_slice, za_h_float, vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svmla, binary_za_slice_opt_single, za_h_float,
+			vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svmla_lane, binary_za_slice_lane, za_h_float,
+			vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svmls, binary_za_slice_opt_single, za_h_float,
+			vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svmls_lane, binary_za_slice_lane, za_h_float,
+			vg1x24, none)
+DEF_SME_ZA_FUNCTION (svmopa, binary_za_m, za_h_float, za_m)
+DEF_SME_ZA_FUNCTION (svmops, binary_za_m, za_h_float, za_m)
+DEF_SME_ZA_FUNCTION_GS (svsub, unary_za_slice, za_h_float, vg1x24, none)
+#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME_B16B16)
+DEF_SME_ZA_FUNCTION_GS (svadd, unary_za_slice, za_h_bfloat, vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svmla, binary_za_slice_opt_single, za_h_bfloat,
+			vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svmla_lane, binary_za_slice_lane, za_h_bfloat,
+			vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svmls, binary_za_slice_opt_single, za_h_bfloat,
+			vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svmls_lane, binary_za_slice_lane, za_h_bfloat,
+			vg1x24, none)
+DEF_SME_ZA_FUNCTION (svmopa, binary_za_m, za_h_bfloat, za_m)
+DEF_SME_ZA_FUNCTION (svmops, binary_za_m, za_h_bfloat, za_m)
+DEF_SME_ZA_FUNCTION_GS (svsub, unary_za_slice, za_h_bfloat, vg1x24, none)
+#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME2p1)
+DEF_SME_ZA_FUNCTION_GS (svreadz, read_za_slice, za_bhsd_data, vg1x24, none)
+DEF_SME_ZA_FUNCTION (svreadz_hor, read_za, za_all_data, none)
+DEF_SME_ZA_FUNCTION_GS (svreadz_hor, read_za, za_bhsd_data, vg24, none)
+DEF_SME_ZA_FUNCTION (svreadz_ver, read_za, za_all_data, none)
+DEF_SME_ZA_FUNCTION_GS (svreadz_ver, read_za, za_bhsd_data, vg24, none)
+DEF_SME_ZA_FUNCTION_GS (svzero, inherent_za_slice, d_za, vg1x24, none)
+DEF_SME_ZA_FUNCTION_GS (svzero, inherent_za_slice, d_za, vg2, none)
+DEF_SME_ZA_FUNCTION_GS (svzero, inherent_za_slice, d_za, vg4, none)
+#undef REQUIRED_EXTENSIONS
+
 #undef DEF_SME_ZA_FUNCTION
 #undef DEF_SME_ZA_FUNCTION_GS
 #undef DEF_SME_FUNCTION
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sme.h b/gcc/config/aarch64/aarch64-sve-builtins-sme.h
index 1ed8d98..9ed8101 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sme.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sme.h
@@ -1,5 +1,5 @@
 /* ACLE support for AArch64 SME.
-   Copyright (C) 2023-2024 Free Software Foundation, Inc.
+   Copyright (C) 2023-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -53,6 +53,9 @@ namespace aarch64_sve
     extern const function_base *const svread_za;
     extern const function_base *const svread_hor_za;
     extern const function_base *const svread_ver_za;
+    extern const function_base *const svreadz_za;
+    extern const function_base *const svreadz_hor_za;
+    extern const function_base *const svreadz_ver_za;
     extern const function_base *const svst1_hor_za;
     extern const function_base *const svst1_ver_za;
     extern const function_base *const svstr_za;
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
index 146a545..abe21a8 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.cc
@@ -1,5 +1,5 @@
 /* ACLE support for AArch64 SVE (__ARM_FEATURE_SVE2 intrinsics)
-   Copyright (C) 2020-2024 Free Software Foundation, Inc.
+   Copyright (C) 2020-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -78,9 +78,65 @@ unspec_sqrdcmlah (int rot)
     }
 }
 
+class ld1uxq_st1xq_base : public function_base
+{
+public:
+  CONSTEXPR ld1uxq_st1xq_base (machine_mode memory_mode)
+    : m_memory_mode (memory_mode) {}
+
+  tree
+  memory_scalar_type (const function_instance &fi) const override
+  {
+    return fi.scalar_type (0);
+  }
+
+  machine_mode
+  memory_vector_mode (const function_instance &) const override
+  {
+    return m_memory_mode;
+  }
+
+protected:
+  machine_mode m_memory_mode;
+};
+
+class ld234q_st234q_base : public full_width_access
+{
+public:
+  CONSTEXPR ld234q_st234q_base (unsigned int vector_count, machine_mode mode)
+    : full_width_access (vector_count), m_mode (mode)
+  {}
+
+  machine_mode
+  memory_vector_mode (const function_instance &) const override
+  {
+    return m_mode;
+  }
+
+  machine_mode m_mode;
+};
+
 class svaba_impl : public function_base
 {
 public:
+  gimple *
+  fold (gimple_folder &f) const override
+  {
+    /* Fold to svabd if op1 is all zeros.  */
+    tree op1 = gimple_call_arg (f.call, 0);
+    if (!integer_zerop (op1))
+      return NULL;
+    function_instance instance ("svabd", functions::svabd, shapes::binary_opt_n,
+				f.mode_suffix_id, f.type_suffix_ids, GROUP_none,
+				PRED_x, FPM_unused);
+    gcall *call = f.redirect_call (instance);
+    /* Add a ptrue as predicate, because unlike svaba, svabd is
+       predicated.  */
+    gimple_call_set_arg (call, 0, build_all_ones_cst (f.gp_type ()));
+    return call;
+  }
+
+public:
   rtx
   expand (function_expander &e) const override
   {
@@ -90,6 +146,22 @@ public:
   }
 };
 
+class svxar_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const override
+  {
+    /* aarch64_sve2_xar represents this operation with a left-rotate RTX.
+       Convert the right-rotate amount from the intrinsic to fit this.  */
+    machine_mode mode = e.vector_mode (0);
+    HOST_WIDE_INT rot = GET_MODE_UNIT_BITSIZE (mode)
+			- INTVAL (e.args[2]);
+    e.args[2] = aarch64_simd_gen_const_vector_dup (mode, rot);
+    return e.use_exact_insn (code_for_aarch64_sve2_xar (mode));
+  }
+};
+
 class svcdot_impl : public function_base
 {
 public:
@@ -139,13 +211,151 @@ public:
   }
 };
 
+class svcvtl_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const override
+  {
+    return e.use_exact_insn (code_for_aarch64_sve_cvtl (e.result_mode ()));
+  }
+};
+
+class svcvt_fp8_impl : public function_base
+{
+public:
+  CONSTEXPR
+  svcvt_fp8_impl (int unspec) : m_unspec (unspec) {}
+
+  rtx
+  expand (function_expander &e) const override
+  {
+    auto icode = code_for_aarch64_sve2_fp8_cvt (m_unspec, e.result_mode ());
+    return e.use_exact_insn (icode);
+  }
+
+  int m_unspec;
+};
+
 class svcvtn_impl : public function_base
 {
 public:
   rtx
   expand (function_expander &e) const override
   {
-    return e.use_exact_insn (code_for_aarch64_sve_cvtn (e.result_mode ()));
+    insn_code icode;
+    if (e.fpm_mode == FPM_set)
+      icode = code_for_aarch64_sve2_fp8_cvtn (GET_MODE (e.args[0]));
+    else
+      icode = code_for_aarch64_sve_cvtn (e.result_mode ());
+    return e.use_exact_insn (icode);
+  }
+};
+
+class svcvtxnt_impl : public CODE_FOR_MODE1 (aarch64_sve2_cvtxnt)
+{
+public:
+  gimple *
+  fold (gimple_folder &f) const override
+  {
+    if (f.pred == PRED_x && is_pfalse (gimple_call_arg (f.call, 1)))
+      return f.fold_call_to (build_zero_cst (TREE_TYPE (f.lhs)));
+    return NULL;
+  }
+};
+
+class svdup_laneq_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const override
+  {
+    return e.use_exact_insn (code_for_aarch64_sve_dupq (e.result_mode ()));
+  }
+};
+
+class svextq_impl : public permute
+{
+public:
+  gimple *
+  fold (gimple_folder &f) const override
+  {
+    unsigned int index = tree_to_uhwi (gimple_call_arg (f.call, 2));
+    machine_mode mode = f.vector_mode (0);
+    unsigned int subelts = 128U / GET_MODE_UNIT_BITSIZE (mode);
+    poly_uint64 nelts = GET_MODE_NUNITS (mode);
+    vec_perm_builder builder (nelts, subelts, 3);
+    for (unsigned int i = 0; i < 3; ++i)
+      for (unsigned int j = 0; j < subelts; ++j)
+	{
+	  if (index + j < subelts)
+	    builder.quick_push (i * subelts + index + j);
+	  else
+	    builder.quick_push (i * subelts + index + j - subelts + nelts);
+	}
+    return fold_permute (f, builder);
+  }
+
+  rtx
+  expand (function_expander &e) const override
+  {
+    return e.use_exact_insn (code_for_aarch64_sve_extq (e.vector_mode (0)));
+  }
+};
+
+class svld1q_gather_impl : public full_width_access
+{
+public:
+  unsigned int
+  call_properties (const function_instance &) const override
+  {
+    return CP_READ_MEMORY;
+  }
+
+  rtx
+  expand (function_expander &e) const override
+  {
+    e.prepare_gather_address_operands (1, false);
+    auto icode = code_for_aarch64_gather_ld1q (e.tuple_mode (0));
+    return e.use_exact_insn (icode);
+  }
+};
+
+class svld1uxq_impl : public ld1uxq_st1xq_base
+{
+public:
+  using ld1uxq_st1xq_base::ld1uxq_st1xq_base;
+
+  unsigned int
+  call_properties (const function_instance &) const override
+  {
+    return CP_READ_MEMORY;
+  }
+
+  rtx
+  expand (function_expander &e) const override
+  {
+    insn_code icode = code_for_aarch64_sve_ld1_extendq (e.vector_mode (0));
+    return e.use_contiguous_load_insn (icode);
+  }
+};
+
+class svld234q_impl : public ld234q_st234q_base
+{
+public:
+  using ld234q_st234q_base::ld234q_st234q_base;
+
+  unsigned int
+  call_properties (const function_instance &) const override
+  {
+    return CP_READ_MEMORY;
+  }
+
+  rtx
+  expand (function_expander &e) const override
+  {
+    insn_code icode = code_for_aarch64_sve_ldnq (e.result_mode ());
+    return e.use_contiguous_load_insn (icode);
   }
 };
 
@@ -192,6 +402,14 @@ class svmatch_svnmatch_impl : public function_base
 {
 public:
   CONSTEXPR svmatch_svnmatch_impl (int unspec) : m_unspec (unspec) {}
+  gimple *
+  fold (gimple_folder &f) const override
+  {
+    tree pg = gimple_call_arg (f.call, 0);
+    if (is_pfalse (pg))
+      return f.fold_call_to (pg);
+    return NULL;
+  }
 
   rtx
   expand (function_expander &e) const override
@@ -234,7 +452,39 @@ public:
   }
 };
 
-class svpsel_impl : public function_base
+class svpmov_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const override
+  {
+    insn_code icode;
+    if (e.pred == PRED_z)
+      icode = code_for_aarch64_pmov_to (e.vector_mode (0));
+    else
+      icode = code_for_aarch64_pmov_from (e.vector_mode (0));
+    return e.use_exact_insn (icode);
+  }
+};
+
+class svpmov_lane_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const override
+  {
+    insn_code icode;
+    if (e.pred == PRED_m)
+      icode = code_for_aarch64_pmov_lane_to (e.vector_mode (0));
+    else if (e.args[1] == const0_rtx)
+      icode = code_for_aarch64_pmov_from (e.vector_mode (0));
+    else
+      icode = code_for_aarch64_pmov_lane_from (e.vector_mode (0));
+    return e.use_exact_insn (icode);
+  }
+};
+
+class svpsel_lane_impl : public function_base
 {
 public:
   rtx
@@ -304,7 +554,8 @@ public:
 	       that we can use for sensible shift amounts.  */
 	    function_instance instance ("svqshl", functions::svqshl,
 					shapes::binary_int_opt_n, MODE_n,
-					f.type_suffix_ids, GROUP_none, f.pred);
+					f.type_suffix_ids, GROUP_none, f.pred,
+					FPM_unused);
 	    return f.redirect_call (instance);
 	  }
 	else
@@ -312,9 +563,9 @@ public:
 	    /* The saturation has no effect, and [SU]RSHL has immediate forms
 	       that we can use for sensible shift amounts.  */
 	    function_instance instance ("svrshl", functions::svrshl,
-					shapes::binary_int_opt_single_n,
-					MODE_n, f.type_suffix_ids, GROUP_none,
-					f.pred);
+					shapes::binary_int_opt_single_n, MODE_n,
+					f.type_suffix_ids, GROUP_none, f.pred,
+					FPM_unused);
 	    return f.redirect_call (instance);
 	  }
       }
@@ -343,7 +594,8 @@ public:
 				       -wi::to_wide (amount));
 	    function_instance instance ("svasr", functions::svasr,
 					shapes::binary_uint_opt_n, MODE_n,
-					f.type_suffix_ids, GROUP_none, f.pred);
+					f.type_suffix_ids, GROUP_none, f.pred,
+					FPM_unused);
 	    if (f.type_suffix (0).unsigned_p)
 	      {
 		instance.base_name = "svlsr";
@@ -378,7 +630,8 @@ public:
 	       that we can use for sensible shift amounts.  */
 	    function_instance instance ("svlsl", functions::svlsl,
 					shapes::binary_uint_opt_n, MODE_n,
-					f.type_suffix_ids, GROUP_none, f.pred);
+					f.type_suffix_ids, GROUP_none, f.pred,
+					FPM_unused);
 	    gcall *call = f.redirect_call (instance);
 	    gimple_call_set_arg (call, 2, amount);
 	    return call;
@@ -391,7 +644,8 @@ public:
 				       -wi::to_wide (amount));
 	    function_instance instance ("svrshr", functions::svrshr,
 					shapes::shift_right_imm, MODE_n,
-					f.type_suffix_ids, GROUP_none, f.pred);
+					f.type_suffix_ids, GROUP_none, f.pred,
+					FPM_unused);
 	    gcall *call = f.redirect_call (instance);
 	    gimple_call_set_arg (call, 2, amount);
 	    return call;
@@ -418,6 +672,35 @@ public:
 class svsra_impl : public function_base
 {
 public:
+  gimple *
+  fold (gimple_folder &f) const override
+  {
+    /* Fold to svlsr/svasr if op1 is all zeros.  */
+    tree op1 = gimple_call_arg (f.call, 0);
+    if (!integer_zerop (op1))
+      return NULL;
+    function_instance instance ("svlsr", functions::svlsr,
+				shapes::binary_uint_opt_n, MODE_n,
+				f.type_suffix_ids, GROUP_none, PRED_x,
+				FPM_unused);
+    if (!f.type_suffix (0).unsigned_p)
+      {
+	instance.base_name = "svasr";
+	instance.base = functions::svasr;
+      }
+    gcall *call = f.redirect_call (instance);
+    /* Add a ptrue as predicate, because unlike svsra, svlsr/svasr are
+       predicated intrinsics.  */
+    gimple_call_set_arg (call, 0, build_all_ones_cst (f.gp_type ()));
+    /* For svsra, the shift amount (imm3) is uint64_t for all function types,
+       but for svlsr/svasr, imm3 has the same width as the function type.  */
+    tree imm3 = gimple_call_arg (f.call, 2);
+    tree imm3_prec = wide_int_to_tree (f.scalar_type (0),
+				       wi::to_widest (imm3));
+    gimple_call_set_arg (call, 2, imm3_prec);
+    return call;
+  }
+
   rtx
   expand (function_expander &e) const override
   {
@@ -427,6 +710,64 @@ public:
   }
 };
 
+class svst1q_scatter_impl : public full_width_access
+{
+public:
+  unsigned int
+  call_properties (const function_instance &) const override
+  {
+    return CP_WRITE_MEMORY;
+  }
+
+  rtx
+  expand (function_expander &e) const override
+  {
+    rtx data = e.args.last ();
+    e.args.last () = aarch64_sve_reinterpret (VNx2DImode, data);
+    e.prepare_gather_address_operands (1, false);
+    return e.use_exact_insn (CODE_FOR_aarch64_scatter_st1q);
+  }
+};
+
+class svst1xq_impl : public ld1uxq_st1xq_base
+{
+public:
+  using ld1uxq_st1xq_base::ld1uxq_st1xq_base;
+
+  unsigned int
+  call_properties (const function_instance &) const override
+  {
+    return CP_WRITE_MEMORY;
+  }
+
+  rtx
+  expand (function_expander &e) const override
+  {
+    insn_code icode = code_for_aarch64_sve_st1_truncq (e.vector_mode (0));
+    return e.use_contiguous_store_insn (icode);
+  }
+};
+
+class svst234q_impl : public ld234q_st234q_base
+{
+public:
+  using ld234q_st234q_base::ld234q_st234q_base;
+
+  unsigned int
+  call_properties (const function_instance &) const override
+  {
+    return CP_WRITE_MEMORY;
+  }
+
+  rtx
+  expand (function_expander &e) const override
+  {
+    machine_mode tuple_mode = GET_MODE (e.args.last ());
+    insn_code icode = code_for_aarch64_sve_stnq (tuple_mode);
+    return e.use_contiguous_store_insn (icode);
+  }
+};
+
 class svstnt1_scatter_impl : public full_width_access
 {
 public:
@@ -500,6 +841,34 @@ public:
   }
 };
 
+/* Implements svuzpq1 and svuzpq2.  */
+class svuzpq_impl : public binary_permute
+{
+public:
+  CONSTEXPR svuzpq_impl (unsigned int base)
+    : binary_permute (base ? UNSPEC_UZPQ2 : UNSPEC_UZPQ1), m_base (base) {}
+
+  gimple *
+  fold (gimple_folder &f) const override
+  {
+    machine_mode mode = f.vector_mode (0);
+    unsigned int subelts = 128U / GET_MODE_UNIT_BITSIZE (mode);
+    poly_uint64 nelts = GET_MODE_NUNITS (mode);
+    vec_perm_builder builder (nelts, subelts, 3);
+    for (unsigned int i = 0; i < 3; ++i)
+      {
+	for (unsigned int j = 0; j < subelts / 2; ++j)
+	  builder.quick_push (m_base + j * 2 + i * subelts);
+	for (unsigned int j = 0; j < subelts / 2; ++j)
+	  builder.quick_push (m_base + j * 2 + i * subelts + nelts);
+      }
+    return fold_permute (f, builder);
+  }
+
+  /* 0 for svuzpq1, 1 for svuzpq2.  */
+  unsigned int m_base;
+};
+
 /* Implements both svwhilerw and svwhilewr; the unspec parameter decides
    between them.  */
 class svwhilerw_svwhilewr_impl : public full_width_access
@@ -518,6 +887,49 @@ public:
   int m_unspec;
 };
 
+/* Implements svzipq1 and svzipq2.  */
+class svzipq_impl : public binary_permute
+{
+public:
+  CONSTEXPR svzipq_impl (unsigned int base)
+    : binary_permute (base ? UNSPEC_ZIPQ2 : UNSPEC_ZIPQ1), m_base (base) {}
+
+  gimple *
+  fold (gimple_folder &f) const override
+  {
+    machine_mode mode = f.vector_mode (0);
+    unsigned int pairs = 64U / GET_MODE_UNIT_BITSIZE (mode);
+    poly_uint64 nelts = GET_MODE_NUNITS (mode);
+    auto base = m_base * pairs;
+    vec_perm_builder builder (nelts, pairs * 2, 3);
+    for (unsigned int i = 0; i < 3; ++i)
+      for (unsigned int j = 0; j < pairs; ++j)
+	{
+	  builder.quick_push (base + j + i * pairs * 2);
+	  builder.quick_push (base + j + i * pairs * 2 + nelts);
+	}
+    return fold_permute (f, builder);
+  }
+
+  /* 0 for svzipq1, 1 for svzipq2.  */
+  unsigned int m_base;
+};
+
+class svluti_lane_impl : public function_base
+{
+public:
+  CONSTEXPR svluti_lane_impl (unsigned int bits) : m_bits (bits)
+  {}
+
+  rtx expand (function_expander &e) const override
+  {
+    auto mode = e.tuple_mode (0);
+    return e.use_exact_insn (code_for_aarch64_sve_luti (m_bits, mode));
+  }
+
+  unsigned int m_bits;
+};
+
 } /* end anonymous namespace */
 
 namespace aarch64_sve {
@@ -527,24 +939,30 @@ FUNCTION (svabalb, unspec_based_add_function, (UNSPEC_SABDLB,
 					       UNSPEC_UABDLB, -1))
 FUNCTION (svabalt, unspec_based_add_function, (UNSPEC_SABDLT,
 					       UNSPEC_UABDLT, -1))
+FUNCTION (svabdlb, unspec_based_function, (UNSPEC_SABDLB, UNSPEC_UABDLB, -1))
+FUNCTION (svabdlt, unspec_based_function, (UNSPEC_SABDLT, UNSPEC_UABDLT, -1))
+FUNCTION (svadalp, unspec_based_function, (UNSPEC_SADALP, UNSPEC_UADALP, -1))
 FUNCTION (svadclb, unspec_based_function, (-1, UNSPEC_ADCLB, -1))
 FUNCTION (svadclt, unspec_based_function, (-1, UNSPEC_ADCLT, -1))
 FUNCTION (svaddhnb, unspec_based_function, (UNSPEC_ADDHNB, UNSPEC_ADDHNB, -1))
 FUNCTION (svaddhnt, unspec_based_function, (UNSPEC_ADDHNT, UNSPEC_ADDHNT, -1))
-FUNCTION (svabdlb, unspec_based_function, (UNSPEC_SABDLB, UNSPEC_UABDLB, -1))
-FUNCTION (svabdlt, unspec_based_function, (UNSPEC_SABDLT, UNSPEC_UABDLT, -1))
-FUNCTION (svadalp, unspec_based_function, (UNSPEC_SADALP, UNSPEC_UADALP, -1))
 FUNCTION (svaddlb, unspec_based_function, (UNSPEC_SADDLB, UNSPEC_UADDLB, -1))
 FUNCTION (svaddlbt, unspec_based_function, (UNSPEC_SADDLBT, -1, -1))
 FUNCTION (svaddlt, unspec_based_function, (UNSPEC_SADDLT, UNSPEC_UADDLT, -1))
-FUNCTION (svaddwb, unspec_based_function, (UNSPEC_SADDWB, UNSPEC_UADDWB, -1))
-FUNCTION (svaddwt, unspec_based_function, (UNSPEC_SADDWT, UNSPEC_UADDWT, -1))
 FUNCTION (svaddp, unspec_based_pred_function, (UNSPEC_ADDP, UNSPEC_ADDP,
 					       UNSPEC_FADDP))
+FUNCTION (svaddqv, reduction, (UNSPEC_ADDQV, UNSPEC_ADDQV, UNSPEC_FADDQV))
+FUNCTION (svaddwb, unspec_based_function, (UNSPEC_SADDWB, UNSPEC_UADDWB, -1))
+FUNCTION (svaddwt, unspec_based_function, (UNSPEC_SADDWT, UNSPEC_UADDWT, -1))
 FUNCTION (svaesd, fixed_insn_function, (CODE_FOR_aarch64_sve2_aesd))
 FUNCTION (svaese, fixed_insn_function, (CODE_FOR_aarch64_sve2_aese))
 FUNCTION (svaesimc, fixed_insn_function, (CODE_FOR_aarch64_sve2_aesimc))
 FUNCTION (svaesmc, fixed_insn_function, (CODE_FOR_aarch64_sve2_aesmc))
+FUNCTION (svamax, cond_or_uncond_unspec_function,
+	  (UNSPEC_COND_FAMAX, UNSPEC_FAMAX))
+FUNCTION (svamin, cond_or_uncond_unspec_function,
+	  (UNSPEC_COND_FAMIN, UNSPEC_FAMIN))
+FUNCTION (svandqv, reduction, (UNSPEC_ANDQV, UNSPEC_ANDQV, -1))
 FUNCTION (svbcax, CODE_FOR_MODE0 (aarch64_sve2_bcax),)
 FUNCTION (svbdep, unspec_based_function, (UNSPEC_BDEP, UNSPEC_BDEP, -1))
 FUNCTION (svbext, unspec_based_function, (UNSPEC_BEXT, UNSPEC_BEXT, -1))
@@ -561,19 +979,34 @@ FUNCTION (svbsl2n, CODE_FOR_MODE0 (aarch64_sve2_bsl2n),)
 FUNCTION (svcdot, svcdot_impl,)
 FUNCTION (svcdot_lane, svcdot_lane_impl,)
 FUNCTION (svclamp, svclamp_impl,)
+FUNCTION (svcvt1, svcvt_fp8_impl, (UNSPEC_F1CVT))
+FUNCTION (svcvt2, svcvt_fp8_impl, (UNSPEC_F2CVT))
+FUNCTION (svcvtl, svcvtl_impl,)
+FUNCTION (svcvtlt1, svcvt_fp8_impl, (UNSPEC_F1CVTLT))
+FUNCTION (svcvtlt2, svcvt_fp8_impl, (UNSPEC_F2CVTLT))
 FUNCTION (svcvtlt, unspec_based_function, (-1, -1, UNSPEC_COND_FCVTLT))
 FUNCTION (svcvtn, svcvtn_impl,)
+FUNCTION (svcvtnb, fixed_insn_function, (CODE_FOR_aarch64_sve2_fp8_cvtnbvnx16qi))
 FUNCTION (svcvtx, unspec_based_function, (-1, -1, UNSPEC_COND_FCVTX))
-FUNCTION (svcvtxnt, CODE_FOR_MODE1 (aarch64_sve2_cvtxnt),)
+FUNCTION (svcvtxnt, svcvtxnt_impl,)
+FUNCTION (svdup_laneq, svdup_laneq_impl,)
 FUNCTION (sveor3, CODE_FOR_MODE0 (aarch64_sve2_eor3),)
 FUNCTION (sveorbt, unspec_based_function, (UNSPEC_EORBT, UNSPEC_EORBT, -1))
+FUNCTION (sveorqv, reduction, (UNSPEC_EORQV, UNSPEC_EORQV, -1))
 FUNCTION (sveortb, unspec_based_function, (UNSPEC_EORTB, UNSPEC_EORTB, -1))
+FUNCTION (svextq, svextq_impl,)
 FUNCTION (svhadd, unspec_based_function, (UNSPEC_SHADD, UNSPEC_UHADD, -1))
 FUNCTION (svhsub, unspec_based_function, (UNSPEC_SHSUB, UNSPEC_UHSUB, -1))
 FUNCTION (svhistcnt, CODE_FOR_MODE0 (aarch64_sve2_histcnt),)
 FUNCTION (svhistseg, CODE_FOR_MODE0 (aarch64_sve2_histseg),)
 FUNCTION (svhsubr, unspec_based_function_rotated, (UNSPEC_SHSUB,
 						   UNSPEC_UHSUB, -1))
+FUNCTION (svld1q_gather, svld1q_gather_impl,)
+FUNCTION (svld1udq, svld1uxq_impl, (VNx1DImode))
+FUNCTION (svld1uwq, svld1uxq_impl, (VNx1SImode))
+FUNCTION (svld2q, svld234q_impl, (2, VNx2TImode))
+FUNCTION (svld3q, svld234q_impl, (3, VNx3TImode))
+FUNCTION (svld4q, svld234q_impl, (4, VNx4TImode))
 FUNCTION (svldnt1_gather, svldnt1_gather_impl,)
 FUNCTION (svldnt1sb_gather, svldnt1_gather_extend_impl, (TYPE_SUFFIX_s8))
 FUNCTION (svldnt1sh_gather, svldnt1_gather_extend_impl, (TYPE_SUFFIX_s16))
@@ -583,22 +1016,44 @@ FUNCTION (svldnt1uh_gather, svldnt1_gather_extend_impl, (TYPE_SUFFIX_u16))
 FUNCTION (svldnt1uw_gather, svldnt1_gather_extend_impl, (TYPE_SUFFIX_u32))
 FUNCTION (svlogb, unspec_based_function, (-1, -1, UNSPEC_COND_FLOGB))
 FUNCTION (svmatch, svmatch_svnmatch_impl, (UNSPEC_MATCH))
+FUNCTION (svmaxnmp, unspec_based_pred_function, (-1, -1, UNSPEC_FMAXNMP))
+FUNCTION (svmaxnmqv, reduction, (-1, -1, UNSPEC_FMAXNMQV))
 FUNCTION (svmaxp, unspec_based_pred_function, (UNSPEC_SMAXP, UNSPEC_UMAXP,
 					       UNSPEC_FMAXP))
-FUNCTION (svmaxnmp, unspec_based_pred_function, (-1, -1, UNSPEC_FMAXNMP))
+FUNCTION (svmaxqv, reduction, (UNSPEC_SMAXQV, UNSPEC_UMAXQV, UNSPEC_FMAXQV))
+FUNCTION (svminnmp, unspec_based_pred_function, (-1, -1, UNSPEC_FMINNMP))
+FUNCTION (svminnmqv, reduction, (-1, -1, UNSPEC_FMINNMQV))
 FUNCTION (svminp, unspec_based_pred_function, (UNSPEC_SMINP, UNSPEC_UMINP,
 					       UNSPEC_FMINP))
-FUNCTION (svminnmp, unspec_based_pred_function, (-1, -1, UNSPEC_FMINNMP))
-FUNCTION (svmlalb, unspec_based_mla_function, (UNSPEC_SMULLB,
-					       UNSPEC_UMULLB, UNSPEC_FMLALB))
-FUNCTION (svmlalb_lane, unspec_based_mla_lane_function, (UNSPEC_SMULLB,
-							 UNSPEC_UMULLB,
-							 UNSPEC_FMLALB))
-FUNCTION (svmlalt, unspec_based_mla_function, (UNSPEC_SMULLT,
-					       UNSPEC_UMULLT, UNSPEC_FMLALT))
-FUNCTION (svmlalt_lane, unspec_based_mla_lane_function, (UNSPEC_SMULLT,
-							 UNSPEC_UMULLT,
-							 UNSPEC_FMLALT))
+FUNCTION (svminqv, reduction, (UNSPEC_SMINQV, UNSPEC_UMINQV, UNSPEC_FMINQV))
+FUNCTION (svmlalb_lane, unspec_based_mla_lane_function,
+	  (UNSPEC_SMULLB, UNSPEC_UMULLB, UNSPEC_FMLALB,
+	   UNSPEC_FMLALB_FP8))
+FUNCTION (svmlalb, unspec_based_mla_function,
+	  (UNSPEC_SMULLB, UNSPEC_UMULLB, UNSPEC_FMLALB,
+	   UNSPEC_FMLALB_FP8))
+FUNCTION (svmlallbb_lane, unspec_based_mla_lane_function,
+	  (-1, -1, -1, UNSPEC_FMLALLBB_FP8))
+FUNCTION (svmlallbb, unspec_based_mla_function,
+	  (-1, -1, -1, UNSPEC_FMLALLBB_FP8))
+FUNCTION (svmlallbt_lane, unspec_based_mla_lane_function,
+	  (-1, -1, -1, UNSPEC_FMLALLBT_FP8))
+FUNCTION (svmlallbt, unspec_based_mla_function,
+	  (-1, -1, -1, UNSPEC_FMLALLBT_FP8))
+FUNCTION (svmlalltb_lane, unspec_based_mla_lane_function,
+	  (-1, -1, -1, UNSPEC_FMLALLTB_FP8))
+FUNCTION (svmlalltb, unspec_based_mla_function,
+	  (-1, -1, -1, UNSPEC_FMLALLTB_FP8))
+FUNCTION (svmlalltt_lane, unspec_based_mla_lane_function,
+	  (-1, -1, -1, UNSPEC_FMLALLTT_FP8))
+FUNCTION (svmlalltt, unspec_based_mla_function,
+	  (-1, -1, -1, UNSPEC_FMLALLTT_FP8))
+FUNCTION (svmlalt_lane, unspec_based_mla_lane_function,
+	  (UNSPEC_SMULLT, UNSPEC_UMULLT, UNSPEC_FMLALT,
+	   UNSPEC_FMLALT_FP8))
+FUNCTION (svmlalt, unspec_based_mla_function,
+	  (UNSPEC_SMULLT, UNSPEC_UMULLT, UNSPEC_FMLALT,
+	   UNSPEC_FMLALT_FP8))
 FUNCTION (svmlslb, unspec_based_mls_function, (UNSPEC_SMULLB,
 					       UNSPEC_UMULLB, UNSPEC_FMLSLB))
 FUNCTION (svmlslb_lane, unspec_based_mls_lane_function, (UNSPEC_SMULLB,
@@ -619,13 +1074,16 @@ FUNCTION (svmullt_lane, unspec_based_lane_function, (UNSPEC_SMULLT,
 						     UNSPEC_UMULLT, -1))
 FUNCTION (svnbsl, CODE_FOR_MODE0 (aarch64_sve2_nbsl),)
 FUNCTION (svnmatch, svmatch_svnmatch_impl, (UNSPEC_NMATCH))
+FUNCTION (svorqv, reduction, (UNSPEC_ORQV, UNSPEC_ORQV, -1))
 FUNCTION (svpext_lane, svpext_lane_impl,)
+FUNCTION (svpmov, svpmov_impl,)
+FUNCTION (svpmov_lane, svpmov_lane_impl,)
 FUNCTION (svpmul, CODE_FOR_MODE0 (aarch64_sve2_pmul),)
 FUNCTION (svpmullb, unspec_based_function, (-1, UNSPEC_PMULLB, -1))
 FUNCTION (svpmullb_pair, unspec_based_function, (-1, UNSPEC_PMULLB_PAIR, -1))
 FUNCTION (svpmullt, unspec_based_function, (-1, UNSPEC_PMULLT, -1))
 FUNCTION (svpmullt_pair, unspec_based_function, (-1, UNSPEC_PMULLT_PAIR, -1))
-FUNCTION (svpsel, svpsel_impl,)
+FUNCTION (svpsel_lane, svpsel_lane_impl,)
 FUNCTION (svqabs, rtx_code_function, (SS_ABS, UNKNOWN, UNKNOWN))
 FUNCTION (svqcadd, svqcadd_impl,)
 FUNCTION (svqcvt, integer_conversion, (UNSPEC_SQCVT, UNSPEC_SQCVTU,
@@ -657,26 +1115,26 @@ FUNCTION (svqdmullt_lane, unspec_based_lane_function, (UNSPEC_SQDMULLT,
 FUNCTION (svqneg, rtx_code_function, (SS_NEG, UNKNOWN, UNKNOWN))
 FUNCTION (svqrdcmlah, svqrdcmlah_impl,)
 FUNCTION (svqrdcmlah_lane, svqrdcmlah_lane_impl,)
-FUNCTION (svqrdmulh, unspec_based_function, (UNSPEC_SQRDMULH, -1, -1))
-FUNCTION (svqrdmulh_lane, unspec_based_lane_function, (UNSPEC_SQRDMULH,
-						       -1, -1))
 FUNCTION (svqrdmlah, unspec_based_function, (UNSPEC_SQRDMLAH, -1, -1))
 FUNCTION (svqrdmlah_lane, unspec_based_lane_function, (UNSPEC_SQRDMLAH,
 						       -1, -1))
 FUNCTION (svqrdmlsh, unspec_based_function, (UNSPEC_SQRDMLSH, -1, -1))
 FUNCTION (svqrdmlsh_lane, unspec_based_lane_function, (UNSPEC_SQRDMLSH,
 						       -1, -1))
+FUNCTION (svqrdmulh, unspec_based_function, (UNSPEC_SQRDMULH, -1, -1))
+FUNCTION (svqrdmulh_lane, unspec_based_lane_function, (UNSPEC_SQRDMULH,
+						       -1, -1))
 FUNCTION (svqrshl, svqrshl_impl,)
 FUNCTION (svqrshr, unspec_based_uncond_function, (UNSPEC_SQRSHR,
-						  UNSPEC_UQRSHR, -1, 1))
+						  UNSPEC_UQRSHR, -1, -1, 1))
 FUNCTION (svqrshrn, unspec_based_uncond_function, (UNSPEC_SQRSHRN,
-						   UNSPEC_UQRSHRN, -1, 1))
+						   UNSPEC_UQRSHRN, -1, -1, 1))
 FUNCTION (svqrshrnb, unspec_based_function, (UNSPEC_SQRSHRNB,
 					     UNSPEC_UQRSHRNB, -1))
 FUNCTION (svqrshrnt, unspec_based_function, (UNSPEC_SQRSHRNT,
 					     UNSPEC_UQRSHRNT, -1))
-FUNCTION (svqrshru, unspec_based_uncond_function, (UNSPEC_SQRSHRU, -1, -1, 1))
-FUNCTION (svqrshrun, unspec_based_uncond_function, (UNSPEC_SQRSHRUN, -1, -1, 1))
+FUNCTION (svqrshru, unspec_based_uncond_function, (UNSPEC_SQRSHRU, -1, -1, -1, 1))
+FUNCTION (svqrshrun, unspec_based_uncond_function, (UNSPEC_SQRSHRUN, -1, -1, -1, 1))
 FUNCTION (svqrshrunb, unspec_based_function, (UNSPEC_SQRSHRUNB, -1, -1))
 FUNCTION (svqrshrunt, unspec_based_function, (UNSPEC_SQRSHRUNT, -1, -1))
 FUNCTION (svqshl, svqshl_impl,)
@@ -721,6 +1179,12 @@ FUNCTION (svsm4ekey, fixed_insn_function, (CODE_FOR_aarch64_sve2_sm4ekey))
 FUNCTION (svsqadd, svsqadd_impl,)
 FUNCTION (svsra, svsra_impl,)
 FUNCTION (svsri, unspec_based_function, (UNSPEC_SRI, UNSPEC_SRI, -1))
+FUNCTION (svst1dq, svst1xq_impl, (VNx1DImode))
+FUNCTION (svst1q_scatter, svst1q_scatter_impl,)
+FUNCTION (svst1wq, svst1xq_impl, (VNx1SImode))
+FUNCTION (svst2q, svst234q_impl, (2, VNx2TImode))
+FUNCTION (svst3q, svst234q_impl, (3, VNx3TImode))
+FUNCTION (svst4q, svst234q_impl, (4, VNx4TImode))
 FUNCTION (svstnt1_scatter, svstnt1_scatter_impl,)
 FUNCTION (svstnt1b_scatter, svstnt1_scatter_truncate_impl, (QImode))
 FUNCTION (svstnt1h_scatter, svstnt1_scatter_truncate_impl, (HImode))
@@ -734,17 +1198,30 @@ FUNCTION (svsubltb, unspec_based_function, (UNSPEC_SSUBLTB, -1, -1))
 FUNCTION (svsubwb, unspec_based_function, (UNSPEC_SSUBWB, UNSPEC_USUBWB, -1))
 FUNCTION (svsubwt, unspec_based_function, (UNSPEC_SSUBWT, UNSPEC_USUBWT, -1))
 FUNCTION (svtbl2, svtbl2_impl,)
-FUNCTION (svtbx, CODE_FOR_MODE0 (aarch64_sve2_tbx),)
+FUNCTION (svtblq, quiet<unspec_based_uncond_function>, (UNSPEC_TBLQ,
+							UNSPEC_TBLQ,
+							UNSPEC_TBLQ))
+FUNCTION (svtbx, quiet<unspec_based_uncond_function>, (UNSPEC_TBX, UNSPEC_TBX,
+						       UNSPEC_TBX))
+FUNCTION (svtbxq, quiet<unspec_based_uncond_function>, (UNSPEC_TBXQ,
+							UNSPEC_TBXQ,
+							UNSPEC_TBXQ))
 FUNCTION (svunpk, svunpk_impl,)
 FUNCTION (svuqadd, svuqadd_impl,)
 FUNCTION (svuzp, multireg_permute, (UNSPEC_UZP))
 FUNCTION (svuzpq, multireg_permute, (UNSPEC_UZPQ))
-FUNCTION (svzip, multireg_permute, (UNSPEC_ZIP))
-FUNCTION (svzipq, multireg_permute, (UNSPEC_ZIPQ))
+FUNCTION (svuzpq1, svuzpq_impl, (0))
+FUNCTION (svuzpq2, svuzpq_impl, (1))
 FUNCTION (svwhilege, while_comparison, (UNSPEC_WHILEGE, UNSPEC_WHILEHS))
 FUNCTION (svwhilegt, while_comparison, (UNSPEC_WHILEGT, UNSPEC_WHILEHI))
 FUNCTION (svwhilerw, svwhilerw_svwhilewr_impl, (UNSPEC_WHILERW))
 FUNCTION (svwhilewr, svwhilerw_svwhilewr_impl, (UNSPEC_WHILEWR))
-FUNCTION (svxar, CODE_FOR_MODE0 (aarch64_sve2_xar),)
+FUNCTION (svxar, svxar_impl,)
+FUNCTION (svzip, multireg_permute, (UNSPEC_ZIP))
+FUNCTION (svzipq, multireg_permute, (UNSPEC_ZIPQ))
+FUNCTION (svzipq1, svzipq_impl, (0))
+FUNCTION (svzipq2, svzipq_impl, (1))
+FUNCTION (svluti2_lane, svluti_lane_impl, (2))
+FUNCTION (svluti4_lane, svluti_lane_impl, (4))
 
 } /* end namespace aarch64_sve */
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
index 4543402..b622fe3 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.def
@@ -1,5 +1,5 @@
 /* ACLE support for AArch64 SVE (__ARM_FEATURE_SVE intrinsics)
-   Copyright (C) 2020-2024 Free Software Foundation, Inc.
+   Copyright (C) 2020-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -17,17 +17,17 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-#define REQUIRED_EXTENSIONS AARCH64_FL_SVE | AARCH64_FL_SVE2
+#define REQUIRED_EXTENSIONS sve_and_sme (AARCH64_FL_SVE2, 0)
 DEF_SVE_FUNCTION (svaba, ternary_opt_n, all_integer, none)
 DEF_SVE_FUNCTION (svabalb, ternary_long_opt_n, hsd_integer, none)
 DEF_SVE_FUNCTION (svabalt, ternary_long_opt_n, hsd_integer, none)
+DEF_SVE_FUNCTION (svabdlb, binary_long_opt_n, hsd_integer, none)
+DEF_SVE_FUNCTION (svabdlt, binary_long_opt_n, hsd_integer, none)
 DEF_SVE_FUNCTION (svadalp, binary_wide, hsd_integer, mxz)
 DEF_SVE_FUNCTION (svadclb, ternary_opt_n, sd_unsigned, none)
 DEF_SVE_FUNCTION (svadclt, ternary_opt_n, sd_unsigned, none)
 DEF_SVE_FUNCTION (svaddhnb, binary_narrowb_opt_n, hsd_integer, none)
 DEF_SVE_FUNCTION (svaddhnt, binary_narrowt_opt_n, hsd_integer, none)
-DEF_SVE_FUNCTION (svabdlb, binary_long_opt_n, hsd_integer, none)
-DEF_SVE_FUNCTION (svabdlt, binary_long_opt_n, hsd_integer, none)
 DEF_SVE_FUNCTION (svaddlb, binary_long_opt_n, hsd_integer, none)
 DEF_SVE_FUNCTION (svaddlbt, binary_long_opt_n, hsd_signed, none)
 DEF_SVE_FUNCTION (svaddlt, binary_long_opt_n, hsd_integer, none)
@@ -54,8 +54,10 @@ DEF_SVE_FUNCTION (svhadd, binary_opt_n, all_integer, mxz)
 DEF_SVE_FUNCTION (svhsub, binary_opt_n, all_integer, mxz)
 DEF_SVE_FUNCTION (svhsubr, binary_opt_n, all_integer, mxz)
 DEF_SVE_FUNCTION (svlogb, unary_to_int, all_float, mxz)
-DEF_SVE_FUNCTION (svmaxp, binary, all_arith, mx)
 DEF_SVE_FUNCTION (svmaxnmp, binary, all_float, mx)
+DEF_SVE_FUNCTION (svmaxp, binary, all_arith, mx)
+DEF_SVE_FUNCTION (svminnmp, binary, all_float, mx)
+DEF_SVE_FUNCTION (svminp, binary, all_arith, mx)
 DEF_SVE_FUNCTION (svmla_lane, ternary_lane, hsd_integer, none)
 DEF_SVE_FUNCTION (svmlalb, ternary_long_opt_n, s_float_hsd_integer, none)
 DEF_SVE_FUNCTION (svmlalb_lane, ternary_long_lane, s_float_sd_integer, none)
@@ -66,8 +68,6 @@ DEF_SVE_FUNCTION (svmlslb, ternary_long_opt_n, s_float_hsd_integer, none)
 DEF_SVE_FUNCTION (svmlslb_lane, ternary_long_lane, s_float_sd_integer, none)
 DEF_SVE_FUNCTION (svmlslt, ternary_long_opt_n, s_float_hsd_integer, none)
 DEF_SVE_FUNCTION (svmlslt_lane, ternary_long_lane, s_float_sd_integer, none)
-DEF_SVE_FUNCTION (svminp, binary, all_arith, mx)
-DEF_SVE_FUNCTION (svminnmp, binary, all_float, mx)
 DEF_SVE_FUNCTION (svmovlb, unary_long, hsd_integer, none)
 DEF_SVE_FUNCTION (svmovlt, unary_long, hsd_integer, none)
 DEF_SVE_FUNCTION (svmul_lane, binary_lane, hsd_integer, none)
@@ -101,14 +101,15 @@ DEF_SVE_FUNCTION (svqdmullb_lane, binary_long_lane, sd_signed, none)
 DEF_SVE_FUNCTION (svqdmullt, binary_long_opt_n, hsd_signed, none)
 DEF_SVE_FUNCTION (svqdmullt_lane, binary_long_lane, sd_signed, none)
 DEF_SVE_FUNCTION (svqneg, unary, all_signed, mxz)
-DEF_SVE_FUNCTION (svqrdmulh, binary_opt_n, all_signed, none)
-DEF_SVE_FUNCTION (svqrdmulh_lane, binary_lane, hsd_signed, none)
+DEF_SVE_FUNCTION (svqrdcmlah, ternary_rotate, all_signed, none)
+DEF_SVE_FUNCTION (svqrdcmlah_lane, ternary_lane_rotate, hs_signed, none)
 DEF_SVE_FUNCTION (svqrdmlah, ternary_opt_n, all_signed, none)
 DEF_SVE_FUNCTION (svqrdmlah_lane, ternary_lane, hsd_signed, none)
 DEF_SVE_FUNCTION (svqrdmlsh, ternary_opt_n, all_signed, none)
 DEF_SVE_FUNCTION (svqrdmlsh_lane, ternary_lane, hsd_signed, none)
-DEF_SVE_FUNCTION (svqrdcmlah, ternary_rotate, all_signed, none)
-DEF_SVE_FUNCTION (svqrdcmlah_lane, ternary_lane_rotate, hs_signed, none)
+DEF_SVE_FUNCTION (svqrdmulh, binary_opt_n, all_signed, none)
+DEF_SVE_FUNCTION (svqrdmulh_lane, binary_lane, hsd_signed, none)
+DEF_SVE_FUNCTION (svqrshl, binary_int_opt_n, all_integer, mxz)
 DEF_SVE_FUNCTION (svqrshrnb, shift_right_imm_narrowb, hsd_integer, none)
 DEF_SVE_FUNCTION (svqrshrnt, shift_right_imm_narrowt, hsd_integer, none)
 DEF_SVE_FUNCTION (svqrshrunb, shift_right_imm_narrowb_to_uint, hsd_signed, none)
@@ -119,7 +120,6 @@ DEF_SVE_FUNCTION (svqshrnb, shift_right_imm_narrowb, hsd_integer, none)
 DEF_SVE_FUNCTION (svqshrnt, shift_right_imm_narrowt, hsd_integer, none)
 DEF_SVE_FUNCTION (svqshrunb, shift_right_imm_narrowb_to_uint, hsd_signed, none)
 DEF_SVE_FUNCTION (svqshrunt, shift_right_imm_narrowt_to_uint, hsd_signed, none)
-DEF_SVE_FUNCTION (svqrshl, binary_int_opt_n, all_integer, mxz)
 DEF_SVE_FUNCTION (svqsub, binary_opt_n, all_integer, mxz)
 DEF_SVE_FUNCTION (svqsubr, binary_opt_n, all_integer, mxz)
 DEF_SVE_FUNCTION (svqxtnb, unary_narrowb, hsd_integer, none)
@@ -130,11 +130,11 @@ DEF_SVE_FUNCTION (svraddhnb, binary_narrowb_opt_n, hsd_integer, none)
 DEF_SVE_FUNCTION (svraddhnt, binary_narrowt_opt_n, hsd_integer, none)
 DEF_SVE_FUNCTION (svrecpe, unary, s_unsigned, mxz)
 DEF_SVE_FUNCTION (svrhadd, binary_opt_n, all_integer, mxz)
-DEF_SVE_FUNCTION (svrsqrte, unary, s_unsigned, mxz)
 DEF_SVE_FUNCTION (svrshl, binary_int_opt_single_n, all_integer, mxz)
 DEF_SVE_FUNCTION (svrshr, shift_right_imm, all_integer, mxz)
 DEF_SVE_FUNCTION (svrshrnb, shift_right_imm_narrowb, hsd_integer, none)
 DEF_SVE_FUNCTION (svrshrnt, shift_right_imm_narrowt, hsd_integer, none)
+DEF_SVE_FUNCTION (svrsqrte, unary, s_unsigned, mxz)
 DEF_SVE_FUNCTION (svrsra, ternary_shift_right_imm, all_integer, none)
 DEF_SVE_FUNCTION (svrsubhnb, binary_narrowb_opt_n, hsd_integer, none)
 DEF_SVE_FUNCTION (svrsubhnt, binary_narrowt_opt_n, hsd_integer, none)
@@ -166,23 +166,21 @@ DEF_SVE_FUNCTION (svwhilewr, compare_ptr, all_data, none)
 DEF_SVE_FUNCTION (svxar, ternary_shift_right_imm, all_integer, none)
 #undef REQUIRED_EXTENSIONS
 
-#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \
-			     | AARCH64_FL_SVE2 \
-			     | AARCH64_FL_SM_OFF)
+#define REQUIRED_EXTENSIONS nonstreaming_sve (AARCH64_FL_SVE2)
 DEF_SVE_FUNCTION (svhistcnt, binary_to_uint, sd_integer, z)
 DEF_SVE_FUNCTION (svhistseg, binary_to_uint, b_integer, none)
 DEF_SVE_FUNCTION (svldnt1_gather, load_gather_sv_restricted, sd_data, implicit)
 DEF_SVE_FUNCTION (svldnt1_gather, load_gather_vs, sd_data, implicit)
 DEF_SVE_FUNCTION (svldnt1sb_gather, load_ext_gather_offset_restricted, sd_integer, implicit)
-DEF_SVE_FUNCTION (svldnt1sh_gather, load_ext_gather_offset_restricted, sd_integer, implicit)
 DEF_SVE_FUNCTION (svldnt1sh_gather, load_ext_gather_index_restricted, sd_integer, implicit)
-DEF_SVE_FUNCTION (svldnt1sw_gather, load_ext_gather_offset_restricted, d_integer, implicit)
+DEF_SVE_FUNCTION (svldnt1sh_gather, load_ext_gather_offset_restricted, sd_integer, implicit)
 DEF_SVE_FUNCTION (svldnt1sw_gather, load_ext_gather_index_restricted, d_integer, implicit)
+DEF_SVE_FUNCTION (svldnt1sw_gather, load_ext_gather_offset_restricted, d_integer, implicit)
 DEF_SVE_FUNCTION (svldnt1ub_gather, load_ext_gather_offset_restricted, sd_integer, implicit)
-DEF_SVE_FUNCTION (svldnt1uh_gather, load_ext_gather_offset_restricted, sd_integer, implicit)
 DEF_SVE_FUNCTION (svldnt1uh_gather, load_ext_gather_index_restricted, sd_integer, implicit)
-DEF_SVE_FUNCTION (svldnt1uw_gather, load_ext_gather_offset_restricted, d_integer, implicit)
+DEF_SVE_FUNCTION (svldnt1uh_gather, load_ext_gather_offset_restricted, sd_integer, implicit)
 DEF_SVE_FUNCTION (svldnt1uw_gather, load_ext_gather_index_restricted, d_integer, implicit)
+DEF_SVE_FUNCTION (svldnt1uw_gather, load_ext_gather_offset_restricted, d_integer, implicit)
 DEF_SVE_FUNCTION (svmatch, compare, bh_integer, implicit)
 DEF_SVE_FUNCTION (svnmatch, compare, bh_integer, implicit)
 DEF_SVE_FUNCTION (svstnt1_scatter, store_scatter_index_restricted, sd_data, implicit)
@@ -194,89 +192,129 @@ DEF_SVE_FUNCTION (svstnt1w_scatter, store_scatter_index_restricted, d_integer, i
 DEF_SVE_FUNCTION (svstnt1w_scatter, store_scatter_offset_restricted, d_integer, implicit)
 #undef REQUIRED_EXTENSIONS
 
-#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \
-			     | AARCH64_FL_SVE2 \
-			     | AARCH64_FL_SVE2_AES \
-			     | AARCH64_FL_SM_OFF)
+#define REQUIRED_EXTENSIONS nonstreaming_sve (AARCH64_FL_SVE2 \
+					      | AARCH64_FL_SVE2_AES)
 DEF_SVE_FUNCTION (svaesd, binary, b_unsigned, none)
 DEF_SVE_FUNCTION (svaese, binary, b_unsigned, none)
-DEF_SVE_FUNCTION (svaesmc, unary, b_unsigned, none)
 DEF_SVE_FUNCTION (svaesimc, unary, b_unsigned, none)
+DEF_SVE_FUNCTION (svaesmc, unary, b_unsigned, none)
 DEF_SVE_FUNCTION (svpmullb_pair, binary_opt_n, d_unsigned, none)
 DEF_SVE_FUNCTION (svpmullt_pair, binary_opt_n, d_unsigned, none)
 #undef REQUIRED_EXTENSIONS
 
-#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \
-			     | AARCH64_FL_SVE2 \
-			     | AARCH64_FL_SVE2_BITPERM \
-			     | AARCH64_FL_SM_OFF)
+#define REQUIRED_EXTENSIONS nonstreaming_sve (AARCH64_FL_SVE2 \
+					      | AARCH64_FL_SVE2_BITPERM)
 DEF_SVE_FUNCTION (svbdep, binary_opt_n, all_unsigned, none)
 DEF_SVE_FUNCTION (svbext, binary_opt_n, all_unsigned, none)
 DEF_SVE_FUNCTION (svbgrp, binary_opt_n, all_unsigned, none)
 #undef REQUIRED_EXTENSIONS
 
-#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \
-			     | AARCH64_FL_SVE2 \
-			     | AARCH64_FL_SVE2_SHA3 \
-			     | AARCH64_FL_SM_OFF)
+#define REQUIRED_EXTENSIONS nonstreaming_sve (AARCH64_FL_SVE2 \
+					      | AARCH64_FL_SVE2_SHA3)
 DEF_SVE_FUNCTION (svrax1, binary, d_integer, none)
 #undef REQUIRED_EXTENSIONS
 
-#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \
-			     | AARCH64_FL_SVE2 \
-			     | AARCH64_FL_SVE2_SM4 \
-			     | AARCH64_FL_SM_OFF)
+#define REQUIRED_EXTENSIONS nonstreaming_sve (AARCH64_FL_SVE2 \
+					      | AARCH64_FL_SVE2_SM4)
 DEF_SVE_FUNCTION (svsm4e, binary, s_unsigned, none)
 DEF_SVE_FUNCTION (svsm4ekey, binary, s_unsigned, none)
 #undef REQUIRED_EXTENSIONS
 
-#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \
-			     | AARCH64_FL_SVE2 \
-			     | AARCH64_FL_SME \
-			     | AARCH64_FL_SM_ON)
+#define REQUIRED_EXTENSIONS nonstreaming_sve (AARCH64_FL_SVE2p1)
+DEF_SVE_FUNCTION (svaddqv, reduction_neonq, all_arith, implicit)
+DEF_SVE_FUNCTION (svandqv, reduction_neonq, all_integer, implicit)
+DEF_SVE_FUNCTION (svdup_laneq, unary_lane, all_data, none)
+DEF_SVE_FUNCTION (sveorqv, reduction_neonq, all_integer, implicit)
+DEF_SVE_FUNCTION (svextq, extq, all_data, none)
+DEF_SVE_FUNCTION (svld2q, load, all_data, implicit)
+DEF_SVE_FUNCTION (svld3q, load, all_data, implicit)
+DEF_SVE_FUNCTION (svld4q, load, all_data, implicit)
+DEF_SVE_FUNCTION (svmaxnmqv, reduction_neonq, all_float, implicit)
+DEF_SVE_FUNCTION (svmaxqv, reduction_neonq, all_arith, implicit)
+DEF_SVE_FUNCTION (svminnmqv, reduction_neonq, all_float, implicit)
+DEF_SVE_FUNCTION (svminqv, reduction_neonq, all_arith, implicit)
+DEF_SVE_FUNCTION (svpmov, pmov_from_vector, all_integer, none)
+DEF_SVE_FUNCTION (svpmov, inherent, all_integer, z)
+DEF_SVE_FUNCTION (svpmov_lane, pmov_from_vector_lane, all_integer, none)
+DEF_SVE_FUNCTION (svpmov_lane, pmov_to_vector_lane, hsd_integer, m)
+DEF_SVE_FUNCTION (svorqv, reduction_neonq, all_integer, implicit)
+DEF_SVE_FUNCTION (svst2q, store, all_data, implicit)
+DEF_SVE_FUNCTION (svst3q, store, all_data, implicit)
+DEF_SVE_FUNCTION (svst4q, store, all_data, implicit)
+DEF_SVE_FUNCTION (svtblq, binary_uint, all_data, none)
+DEF_SVE_FUNCTION (svtbxq, ternary_uint, all_data, none)
+DEF_SVE_FUNCTION (svuzpq1, binary, all_data, none)
+DEF_SVE_FUNCTION (svuzpq2, binary, all_data, none)
+DEF_SVE_FUNCTION (svzipq1, binary, all_data, none)
+DEF_SVE_FUNCTION (svzipq2, binary, all_data, none)
+#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS sve_and_sme (AARCH64_FL_SVE2p1, 0)
 DEF_SVE_FUNCTION (svclamp, clamp, all_integer, none)
-DEF_SVE_FUNCTION (svpsel, select_pred, all_pred_count, none)
+DEF_SVE_FUNCTION (svpsel_lane, select_pred, all_pred_count, none)
 DEF_SVE_FUNCTION (svrevd, unary, all_data, mxz)
 #undef REQUIRED_EXTENSIONS
 
-#define REQUIRED_EXTENSIONS (AARCH64_FL_SVE \
-			     | AARCH64_FL_SVE2 \
-			     | AARCH64_FL_SME \
-			     | AARCH64_FL_SME2 \
-			     | AARCH64_FL_SM_ON)
-DEF_SVE_FUNCTION_GS (svadd, binary_single, all_integer, x24, none)
+#define REQUIRED_EXTENSIONS sve_and_sme (AARCH64_FL_SVE2p1, AARCH64_FL_SME2)
 DEF_SVE_FUNCTION (svbfmlslb, ternary_bfloat_opt_n, s_float, none)
 DEF_SVE_FUNCTION (svbfmlslb_lane, ternary_bfloat_lane, s_float, none)
 DEF_SVE_FUNCTION (svbfmlslt, ternary_bfloat_opt_n, s_float, none)
 DEF_SVE_FUNCTION (svbfmlslt_lane, ternary_bfloat_lane, s_float, none)
 DEF_SVE_FUNCTION (svclamp, clamp, all_float, none)
-DEF_SVE_FUNCTION_GS (svclamp, clamp, all_arith, x24, none)
 DEF_SVE_FUNCTION (svcntp, count_pred_c, all_count, none)
-DEF_SVE_FUNCTION_GS (svcvt, unary_convertxn, cvt_h_s_float, x2, none)
-DEF_SVE_FUNCTION_GS (svcvt, unary_convertxn, cvt_s_s, x24, none)
-DEF_SVE_FUNCTION_GS (svcvtn, unary_convertxn, cvt_h_s_float, x2, none)
 DEF_SVE_FUNCTION (svdot, ternary_qq_opt_n_or_011, s_narrow_fsu, none)
 DEF_SVE_FUNCTION (svdot_lane, ternary_qq_or_011_lane, s_narrow_fsu, none)
 DEF_SVE_FUNCTION_GS (svld1, load, all_data, x24, implicit)
 DEF_SVE_FUNCTION_GS (svldnt1, load, all_data, x24, implicit)
+DEF_SVE_FUNCTION_GS (svpext_lane, extract_pred, all_count, x12, none)
+DEF_SVE_FUNCTION (svptrue, inherent, all_count, none)
+DEF_SVE_FUNCTION_GS (svqcvtn, unary_convertxn, qcvt_x2, x2, none)
+DEF_SVE_FUNCTION_GS (svqrshrn, shift_right_imm_narrowxn, qrshr_x2, x2, none)
+DEF_SVE_FUNCTION_GS (svqrshrun, shift_right_imm_narrowxn, qrshru_x2, x2, none)
+DEF_SVE_FUNCTION_GS (svst1, storexn, all_data, x24, implicit)
+DEF_SVE_FUNCTION_GS (svstnt1, storexn, all_data, x24, implicit)
+DEF_SVE_FUNCTION_GS (svwhilege, compare_scalar, while_x, x2, none)
+DEF_SVE_FUNCTION (svwhilege, compare_scalar_count, while_x_c, none)
+DEF_SVE_FUNCTION_GS (svwhilegt, compare_scalar, while_x, x2, none)
+DEF_SVE_FUNCTION (svwhilegt, compare_scalar_count, while_x_c, none)
+DEF_SVE_FUNCTION_GS (svwhilele, compare_scalar, while_x, x2, none)
+DEF_SVE_FUNCTION (svwhilele, compare_scalar_count, while_x_c, none)
+DEF_SVE_FUNCTION_GS (svwhilelt, compare_scalar, while_x, x2, none)
+DEF_SVE_FUNCTION (svwhilelt, compare_scalar_count, while_x_c, none)
+#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS nonstreaming_sve (AARCH64_FL_SVE2p1)
+DEF_SVE_FUNCTION (svld1q_gather, load_gather64_sv_offset, all_data, implicit)
+DEF_SVE_FUNCTION (svld1q_gather, load_gather64_sv_index, hsd_data, implicit)
+DEF_SVE_FUNCTION (svld1q_gather, load_gather64_vs_offset, all_data, implicit)
+DEF_SVE_FUNCTION (svld1q_gather, load_gather64_vs_index, hsd_data, implicit)
+DEF_SVE_FUNCTION (svld1udq, load, d_data, implicit)
+DEF_SVE_FUNCTION (svld1uwq, load, s_data, implicit)
+DEF_SVE_FUNCTION (svst1dq, store, d_data, implicit)
+DEF_SVE_FUNCTION (svst1q_scatter, store_scatter64_offset, all_data, implicit)
+DEF_SVE_FUNCTION (svst1q_scatter, store_scatter64_index, hsd_data, implicit)
+DEF_SVE_FUNCTION (svst1wq, store, s_data, implicit)
+#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME2)
+DEF_SVE_FUNCTION_GS (svadd, binary_single, all_integer, x24, none)
+DEF_SVE_FUNCTION_GS (svclamp, clamp, all_arith, x24, none)
+DEF_SVE_FUNCTION_GS (svcvt, unary_convertxn, cvt_h_s_float, x2, none)
+DEF_SVE_FUNCTION_GS (svcvt, unary_convertxn, cvt_s_s, x24, none)
+DEF_SVE_FUNCTION_GS (svcvtn, unary_convertxn, cvt_h_s_float, x2, none)
 DEF_SVE_FUNCTION_GS (svmax, binary_opt_single_n, all_arith, x24, none)
 DEF_SVE_FUNCTION_GS (svmaxnm, binary_opt_single_n, all_float, x24, none)
 DEF_SVE_FUNCTION_GS (svmin, binary_opt_single_n, all_arith, x24, none)
 DEF_SVE_FUNCTION_GS (svminnm, binary_opt_single_n, all_float, x24, none)
-DEF_SVE_FUNCTION_GS (svpext_lane, extract_pred, all_count, x12, none)
-DEF_SVE_FUNCTION (svptrue, inherent, all_count, none)
 DEF_SVE_FUNCTION_GS (svqcvt, unary_convertxn, qcvt_x2, x2, none)
 DEF_SVE_FUNCTION_GS (svqcvt, unary_convertxn, qcvt_x4, x4, none)
-DEF_SVE_FUNCTION_GS (svqcvtn, unary_convertxn, qcvt_x2, x2, none)
 DEF_SVE_FUNCTION_GS (svqcvtn, unary_convertxn, qcvt_x4, x4, none)
 DEF_SVE_FUNCTION_GS (svqdmulh, binary_opt_single_n, all_signed, x24, none)
 DEF_SVE_FUNCTION_GS (svqrshr, shift_right_imm_narrowxn, qrshr_x2, x2, none)
 DEF_SVE_FUNCTION_GS (svqrshr, shift_right_imm_narrowxn, qrshr_x4, x4, none)
-DEF_SVE_FUNCTION_GS (svqrshrn, shift_right_imm_narrowxn, qrshr_x2, x2, none)
 DEF_SVE_FUNCTION_GS (svqrshrn, shift_right_imm_narrowxn, qrshr_x4, x4, none)
 DEF_SVE_FUNCTION_GS (svqrshru, shift_right_imm_narrowxn, qrshru_x2, x2, none)
 DEF_SVE_FUNCTION_GS (svqrshru, shift_right_imm_narrowxn, qrshru_x4, x4, none)
-DEF_SVE_FUNCTION_GS (svqrshrun, shift_right_imm_narrowxn, qrshru_x2, x2, none)
 DEF_SVE_FUNCTION_GS (svqrshrun, shift_right_imm_narrowxn, qrshru_x4, x4, none)
 DEF_SVE_FUNCTION_GS (svrinta, unaryxn, s_float, x24, none)
 DEF_SVE_FUNCTION_GS (svrintm, unaryxn, s_float, x24, none)
@@ -284,19 +322,99 @@ DEF_SVE_FUNCTION_GS (svrintn, unaryxn, s_float, x24, none)
 DEF_SVE_FUNCTION_GS (svrintp, unaryxn, s_float, x24, none)
 DEF_SVE_FUNCTION_GS (svrshl, binary_int_opt_single_n, all_integer, x24, none)
 DEF_SVE_FUNCTION_GS (svsel, binaryxn, all_data, x24, implicit)
-DEF_SVE_FUNCTION_GS (svst1, storexn, all_data, x24, implicit)
-DEF_SVE_FUNCTION_GS (svstnt1, storexn, all_data, x24, implicit)
 DEF_SVE_FUNCTION_GS (svunpk, unary_convertxn, bhs_widen, x24, none)
 DEF_SVE_FUNCTION_GS (svuzp, unaryxn, all_data, x24, none)
 DEF_SVE_FUNCTION_GS (svuzpq, unaryxn, all_data, x24, none)
-DEF_SVE_FUNCTION_GS (svwhilege, compare_scalar, while_x, x2, none)
-DEF_SVE_FUNCTION (svwhilege, compare_scalar_count, while_x_c, none)
-DEF_SVE_FUNCTION_GS (svwhilegt, compare_scalar, while_x, x2, none)
-DEF_SVE_FUNCTION (svwhilegt, compare_scalar_count, while_x_c, none)
-DEF_SVE_FUNCTION_GS (svwhilele, compare_scalar, while_x, x2, none)
-DEF_SVE_FUNCTION (svwhilele, compare_scalar_count, while_x_c, none)
-DEF_SVE_FUNCTION_GS (svwhilelt, compare_scalar, while_x, x2, none)
-DEF_SVE_FUNCTION (svwhilelt, compare_scalar_count, while_x_c, none)
 DEF_SVE_FUNCTION_GS (svzip, unaryxn, all_data, x24, none)
 DEF_SVE_FUNCTION_GS (svzipq, unaryxn, all_data, x24, none)
 #undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS \
+  sve_and_sme (AARCH64_FL_SVE2 | AARCH64_FL_FAMINMAX, \
+	       AARCH64_FL_SME2 | AARCH64_FL_FAMINMAX)
+DEF_SVE_FUNCTION (svamax, binary_opt_single_n, all_float, mxz)
+DEF_SVE_FUNCTION (svamin, binary_opt_single_n, all_float, mxz)
+#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS \
+  sve_and_sme (AARCH64_FL_SVE2 | AARCH64_FL_LUT, \
+	       AARCH64_FL_SME2 | AARCH64_FL_LUT)
+DEF_SVE_FUNCTION (svluti2_lane, luti2, bh_data, none)
+DEF_SVE_FUNCTION (svluti4_lane, luti4, bh_data, none)
+DEF_SVE_FUNCTION_GS (svluti4_lane, luti4, h_data, x2, none)
+#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS streaming_only (AARCH64_FL_SME_F16F16)
+DEF_SVE_FUNCTION_GS (svcvt, unary_convertxn, cvt_f32_f16, x2, none)
+DEF_SVE_FUNCTION_GS (svcvtl, unary_convertxn, cvt_f32_f16, x2, none)
+#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS \
+  sve_and_sme (AARCH64_FL_SVE2 | AARCH64_FL_SVE_B16B16, \
+	       AARCH64_FL_SME2 | AARCH64_FL_SVE_B16B16)
+DEF_SVE_FUNCTION (svadd, binary_opt_n, h_bfloat, mxz)
+DEF_SVE_FUNCTION (svclamp, clamp, h_bfloat, none)
+DEF_SVE_FUNCTION (svmax, binary_opt_single_n, h_bfloat, mxz)
+DEF_SVE_FUNCTION (svmaxnm, binary_opt_single_n, h_bfloat, mxz)
+DEF_SVE_FUNCTION (svmla, ternary_opt_n, h_bfloat, mxz)
+DEF_SVE_FUNCTION (svmla_lane, ternary_lane, h_bfloat, none)
+DEF_SVE_FUNCTION (svmls, ternary_opt_n, h_bfloat, mxz)
+DEF_SVE_FUNCTION (svmls_lane, ternary_lane, h_bfloat, none)
+DEF_SVE_FUNCTION (svmin, binary_opt_single_n, h_bfloat, mxz)
+DEF_SVE_FUNCTION (svminnm, binary_opt_single_n, h_bfloat, mxz)
+DEF_SVE_FUNCTION (svmul, binary_opt_n, h_bfloat, mxz)
+DEF_SVE_FUNCTION (svmul_lane, binary_lane, h_bfloat, none)
+DEF_SVE_FUNCTION (svsub, binary_opt_n, h_bfloat, mxz)
+#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS \
+  streaming_only (AARCH64_FL_SME2 | AARCH64_FL_SVE_B16B16)
+DEF_SVE_FUNCTION_GS (svclamp, clamp, h_bfloat, x24, none)
+DEF_SVE_FUNCTION_GS (svmax, binary_opt_single_n, h_bfloat, x24, none)
+DEF_SVE_FUNCTION_GS (svmaxnm, binary_opt_single_n, h_bfloat, x24, none)
+DEF_SVE_FUNCTION_GS (svmin, binary_opt_single_n, h_bfloat, x24, none)
+DEF_SVE_FUNCTION_GS (svminnm, binary_opt_single_n, h_bfloat, x24, none)
+#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS \
+  sve_and_sme (AARCH64_FL_SVE2 | AARCH64_FL_FP8, \
+	       AARCH64_FL_SME2 | AARCH64_FL_FP8)
+DEF_SVE_FUNCTION_GS_FPM (svcvt1, unary_convert, cvt_mf8, none, none, set)
+DEF_SVE_FUNCTION_GS_FPM (svcvt2, unary_convert, cvt_mf8, none, none, set)
+DEF_SVE_FUNCTION_GS_FPM (svcvtlt1, unary_convert, cvt_mf8, none, none, set)
+DEF_SVE_FUNCTION_GS_FPM (svcvtlt2, unary_convert, cvt_mf8, none, none, set)
+DEF_SVE_FUNCTION_GS_FPM (svcvtn, unary_convertxn_narrow, cvtn_mf8, x2, none, set)
+DEF_SVE_FUNCTION_GS_FPM (svcvtnb, unary_convertxn_narrow, cvtnx_mf8, x2, none, set)
+DEF_SVE_FUNCTION_GS_FPM (svcvtnt, unary_convertxn_narrowt, cvtnx_mf8, x2, none, set)
+#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS \
+  streaming_compatible (AARCH64_FL_SVE2 | AARCH64_FL_FP8FMA, \
+			AARCH64_FL_SSVE_FP8FMA)
+DEF_SVE_FUNCTION_GS_FPM (svmlalb, ternary_mfloat8_opt_n, h_float_mf8, none, none, set)
+DEF_SVE_FUNCTION_GS_FPM (svmlalt, ternary_mfloat8_opt_n, h_float_mf8, none, none, set)
+DEF_SVE_FUNCTION_GS_FPM (svmlalb_lane, ternary_mfloat8_lane, h_float_mf8, none, none, set)
+DEF_SVE_FUNCTION_GS_FPM (svmlalt_lane, ternary_mfloat8_lane, h_float_mf8, none, none, set)
+DEF_SVE_FUNCTION_GS_FPM (svmlallbb, ternary_mfloat8_opt_n, s_float_mf8, none, none, set)
+DEF_SVE_FUNCTION_GS_FPM (svmlallbt, ternary_mfloat8_opt_n, s_float_mf8, none, none, set)
+DEF_SVE_FUNCTION_GS_FPM (svmlalltb, ternary_mfloat8_opt_n, s_float_mf8, none, none, set)
+DEF_SVE_FUNCTION_GS_FPM (svmlalltt, ternary_mfloat8_opt_n, s_float_mf8, none, none, set)
+DEF_SVE_FUNCTION_GS_FPM (svmlalltt_lane, ternary_mfloat8_lane, s_float_mf8, none, none, set)
+DEF_SVE_FUNCTION_GS_FPM (svmlallbb_lane, ternary_mfloat8_lane, s_float_mf8, none, none, set)
+DEF_SVE_FUNCTION_GS_FPM (svmlallbt_lane, ternary_mfloat8_lane, s_float_mf8, none, none, set)
+DEF_SVE_FUNCTION_GS_FPM (svmlalltb_lane, ternary_mfloat8_lane, s_float_mf8, none, none, set)
+#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS \
+  streaming_compatible (AARCH64_FL_SVE2 | AARCH64_FL_FP8DOT4, \
+			AARCH64_FL_SSVE_FP8DOT4)
+DEF_SVE_FUNCTION_GS_FPM (svdot, ternary_mfloat8, s_float_mf8, none, none, set)
+DEF_SVE_FUNCTION_GS_FPM (svdot_lane, ternary_mfloat8_lane_group_selection, s_float_mf8, none, none, set)
+#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS \
+  streaming_compatible (AARCH64_FL_SVE2 | AARCH64_FL_FP8DOT2, \
+			AARCH64_FL_SSVE_FP8DOT2)
+DEF_SVE_FUNCTION_GS_FPM (svdot, ternary_mfloat8, h_float_mf8, none, none, set)
+DEF_SVE_FUNCTION_GS_FPM (svdot_lane, ternary_mfloat8_lane_group_selection, h_float_mf8, none, none, set)
+#undef REQUIRED_EXTENSIONS
diff --git a/gcc/config/aarch64/aarch64-sve-builtins-sve2.h b/gcc/config/aarch64/aarch64-sve-builtins-sve2.h
index 2ac6ede..6d7d0af 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins-sve2.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins-sve2.h
@@ -1,5 +1,5 @@
 /* ACLE support for AArch64 SVE (__ARM_FEATURE_SVE intrinsics)
-   Copyright (C) 2020-2024 Free Software Foundation, Inc.
+   Copyright (C) 2020-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -38,12 +38,14 @@ namespace aarch64_sve
     extern const function_base *const svaddlbt;
     extern const function_base *const svaddlt;
     extern const function_base *const svaddp;
+    extern const function_base *const svaddqv;
     extern const function_base *const svaddwb;
     extern const function_base *const svaddwt;
     extern const function_base *const svaesd;
     extern const function_base *const svaese;
     extern const function_base *const svaesimc;
     extern const function_base *const svaesmc;
+    extern const function_base *const svandqv;
     extern const function_base *const svbcax;
     extern const function_base *const svbdep;
     extern const function_base *const svbext;
@@ -59,18 +61,34 @@ namespace aarch64_sve
     extern const function_base *const svcdot_lane;
     extern const function_base *const svclamp;
     extern const function_base *const svcntp;
+    extern const function_base *const svcvt1;
+    extern const function_base *const svcvt2;
+    extern const function_base *const svcvtl;
     extern const function_base *const svcvtlt;
+    extern const function_base *const svcvtlt1;
+    extern const function_base *const svcvtlt2;
     extern const function_base *const svcvtn;
+    extern const function_base *const svcvtnb;
+    extern const function_base *const svcvtnt;
     extern const function_base *const svcvtx;
     extern const function_base *const svcvtxnt;
+    extern const function_base *const svdup_laneq;
     extern const function_base *const sveor3;
     extern const function_base *const sveorbt;
+    extern const function_base *const sveorqv;
     extern const function_base *const sveortb;
+    extern const function_base *const svextq;
     extern const function_base *const svhadd;
     extern const function_base *const svhistcnt;
     extern const function_base *const svhistseg;
     extern const function_base *const svhsub;
     extern const function_base *const svhsubr;
+    extern const function_base *const svld1q_gather;
+    extern const function_base *const svld1udq;
+    extern const function_base *const svld1uwq;
+    extern const function_base *const svld2q;
+    extern const function_base *const svld3q;
+    extern const function_base *const svld4q;
     extern const function_base *const svldnt1_gather;
     extern const function_base *const svldnt1sb_gather;
     extern const function_base *const svldnt1sh_gather;
@@ -80,18 +98,30 @@ namespace aarch64_sve
     extern const function_base *const svldnt1uw_gather;
     extern const function_base *const svlogb;
     extern const function_base *const svmatch;
-    extern const function_base *const svmaxp;
     extern const function_base *const svmaxnmp;
+    extern const function_base *const svmaxnmqv;
+    extern const function_base *const svmaxp;
+    extern const function_base *const svmaxqv;
+    extern const function_base *const svminnmp;
+    extern const function_base *const svminnmqv;
+    extern const function_base *const svminp;
+    extern const function_base *const svminqv;
     extern const function_base *const svmlalb;
     extern const function_base *const svmlalb_lane;
+    extern const function_base *const svmlallbb_lane;
+    extern const function_base *const svmlallbb;
+    extern const function_base *const svmlallbt_lane;
+    extern const function_base *const svmlallbt;
+    extern const function_base *const svmlalltb_lane;
+    extern const function_base *const svmlalltb;
+    extern const function_base *const svmlalltt_lane;
+    extern const function_base *const svmlalltt;
     extern const function_base *const svmlalt;
     extern const function_base *const svmlalt_lane;
     extern const function_base *const svmlslb;
     extern const function_base *const svmlslb_lane;
     extern const function_base *const svmlslt;
     extern const function_base *const svmlslt_lane;
-    extern const function_base *const svminp;
-    extern const function_base *const svminnmp;
     extern const function_base *const svmovlb;
     extern const function_base *const svmovlt;
     extern const function_base *const svmullb;
@@ -100,13 +130,16 @@ namespace aarch64_sve
     extern const function_base *const svmullt_lane;
     extern const function_base *const svnbsl;
     extern const function_base *const svnmatch;
+    extern const function_base *const svorqv;
     extern const function_base *const svpext_lane;
+    extern const function_base *const svpmov;
+    extern const function_base *const svpmov_lane;
     extern const function_base *const svpmul;
     extern const function_base *const svpmullb;
     extern const function_base *const svpmullb_pair;
     extern const function_base *const svpmullt;
     extern const function_base *const svpmullt_pair;
-    extern const function_base *const svpsel;
+    extern const function_base *const svpsel_lane;
     extern const function_base *const svqabs;
     extern const function_base *const svqcadd;
     extern const function_base *const svqcvt;
@@ -130,12 +163,12 @@ namespace aarch64_sve
     extern const function_base *const svqneg;
     extern const function_base *const svqrdcmlah;
     extern const function_base *const svqrdcmlah_lane;
-    extern const function_base *const svqrdmulh;
-    extern const function_base *const svqrdmulh_lane;
     extern const function_base *const svqrdmlah;
     extern const function_base *const svqrdmlah_lane;
     extern const function_base *const svqrdmlsh;
     extern const function_base *const svqrdmlsh_lane;
+    extern const function_base *const svqrdmulh;
+    extern const function_base *const svqrdmulh_lane;
     extern const function_base *const svqrshl;
     extern const function_base *const svqrshr;
     extern const function_base *const svqrshrn;
@@ -180,6 +213,12 @@ namespace aarch64_sve
     extern const function_base *const svsqadd;
     extern const function_base *const svsra;
     extern const function_base *const svsri;
+    extern const function_base *const svst1dq;
+    extern const function_base *const svst1q_scatter;
+    extern const function_base *const svst1wq;
+    extern const function_base *const svst2q;
+    extern const function_base *const svst3q;
+    extern const function_base *const svst4q;
     extern const function_base *const svstnt1_scatter;
     extern const function_base *const svstnt1b_scatter;
     extern const function_base *const svstnt1h_scatter;
@@ -193,18 +232,26 @@ namespace aarch64_sve
     extern const function_base *const svsubwb;
     extern const function_base *const svsubwt;
     extern const function_base *const svtbl2;
+    extern const function_base *const svtblq;
     extern const function_base *const svtbx;
+    extern const function_base *const svtbxq;
     extern const function_base *const svunpk;
     extern const function_base *const svuqadd;
     extern const function_base *const svuzp;
     extern const function_base *const svuzpq;
-    extern const function_base *const svzip;
-    extern const function_base *const svzipq;
+    extern const function_base *const svuzpq1;
+    extern const function_base *const svuzpq2;
     extern const function_base *const svwhilege;
     extern const function_base *const svwhilegt;
     extern const function_base *const svwhilerw;
     extern const function_base *const svwhilewr;
     extern const function_base *const svxar;
+    extern const function_base *const svzip;
+    extern const function_base *const svzipq;
+    extern const function_base *const svzipq1;
+    extern const function_base *const svzipq2;
+    extern const function_base *const svluti2_lane;
+    extern const function_base *const svluti4_lane;
   }
 }
 
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.cc b/gcc/config/aarch64/aarch64-sve-builtins.cc
index 5ca9ec3..2b627a9 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.cc
+++ b/gcc/config/aarch64/aarch64-sve-builtins.cc
@@ -1,5 +1,5 @@
 /* ACLE support for AArch64 SVE
-   Copyright (C) 2018-2024 Free Software Foundation, Inc.
+   Copyright (C) 2018-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -42,12 +42,13 @@
 #include "emit-rtl.h"
 #include "tree-vector-builder.h"
 #include "stor-layout.h"
-#include "regs.h"
 #include "alias.h"
 #include "gimple-fold.h"
 #include "langhooks.h"
 #include "stringpool.h"
 #include "attribs.h"
+#include "value-range.h"
+#include "tree-ssanames.h"
 #include "aarch64-sve-builtins.h"
 #include "aarch64-sve-builtins-base.h"
 #include "aarch64-sve-builtins-sve2.h"
@@ -55,6 +56,8 @@
 #include "aarch64-sve-builtins-shapes.h"
 #include "aarch64-builtins.h"
 
+using namespace aarch64;
+
 namespace aarch64_sve {
 
 /* Static information about each single-predicate or single-vector
@@ -82,9 +85,8 @@ public:
   /* The decl itself.  */
   tree decl;
 
-  /* The architecture extensions that the function requires, as a set of
-     AARCH64_FL_* flags.  */
-  aarch64_feature_flags required_extensions;
+  /* The architecture extensions that the function requires.  */
+  aarch64_required_extensions required_extensions;
 
   /* True if the decl represents an overloaded function that needs to be
      resolved by function_resolver.  */
@@ -139,7 +141,7 @@ CONSTEXPR const type_suffix_info type_suffixes[NUM_TYPE_SUFFIXES + 1] = {
     BITS / BITS_PER_UNIT, \
     TYPE_##CLASS == TYPE_signed || TYPE_##CLASS == TYPE_unsigned, \
     TYPE_##CLASS == TYPE_unsigned, \
-    TYPE_##CLASS == TYPE_float, \
+    TYPE_##CLASS == TYPE_float || TYPE_##CLASS == TYPE_bfloat, \
     TYPE_##CLASS != TYPE_bool, \
     TYPE_##CLASS == TYPE_bool, \
     false, \
@@ -231,12 +233,11 @@ CONSTEXPR const group_suffix_info group_suffixes[] = {
 #define TYPES_all_arith(S, D) \
   TYPES_all_float (S, D), TYPES_all_integer (S, D)
 
-/*     _bf16
-	_f16 _f32 _f64
-   _s8  _s16 _s32 _s64
-   _u8  _u16 _u32 _u64.  */
 #define TYPES_all_data(S, D) \
-  S (bf16), TYPES_all_arith (S, D)
+  TYPES_b_data (S, D), \
+  TYPES_h_data (S, D), \
+  TYPES_s_data (S, D), \
+  TYPES_d_data (S, D)
 
 /* _b only.  */
 #define TYPES_b(S, D) \
@@ -255,6 +256,12 @@ CONSTEXPR const group_suffix_info group_suffixes[] = {
 #define TYPES_b_integer(S, D) \
   S (s8), TYPES_b_unsigned (S, D)
 
+/* _mf8
+   _s8
+   _u8.  */
+#define TYPES_b_data(S, D) \
+  S (mf8), TYPES_b_integer (S, D)
+
 /* _s8 _s16
    _u8 _u16.  */
 #define TYPES_bh_integer(S, D) \
@@ -277,12 +284,14 @@ CONSTEXPR const group_suffix_info group_suffixes[] = {
 #define TYPES_bhs_integer(S, D) \
   TYPES_bhs_signed (S, D), TYPES_bhs_unsigned (S, D)
 
-/*      _bf16
-	 _f16  _f32
-    _s8  _s16  _s32
-    _u8  _u16  _u32.  */
-#define TYPES_bhs_data(S, D) \
-  S (bf16), S (f16), S (f32), TYPES_bhs_integer (S, D)
+#define TYPES_bh_data(S, D)			\
+  TYPES_b_data (S, D), \
+  TYPES_h_data (S, D)
+
+#define TYPES_bhs_data(S, D)			\
+  TYPES_b_data (S, D), \
+  TYPES_h_data (S, D), \
+  TYPES_s_data (S, D)
 
 /* _s16_s8  _s32_s16  _s64_s32
    _u16_u8  _u32_u16  _u64_u32.  */
@@ -290,11 +299,26 @@ CONSTEXPR const group_suffix_info group_suffixes[] = {
   D (s16, s8), D (s32, s16), D (s64, s32), \
   D (u16, u8), D (u32, u16), D (u64, u32)
 
+/* _bf16.  */
+#define TYPES_h_bfloat(S, D) \
+  S (bf16)
+
+/* _f16.  */
+#define TYPES_h_float(S, D) \
+  S (f16)
+
 /* _s16
    _u16.  */
 #define TYPES_h_integer(S, D) \
   S (s16), S (u16)
 
+/* _bf16
+   _f16
+   _s16
+   _u16.  */
+#define TYPES_h_data(S, D) \
+  S (bf16), S (f16), TYPES_h_integer (S, D)
+
 /* _s16 _s32.  */
 #define TYPES_hs_signed(S, D) \
   S (s16), S (s32)
@@ -308,12 +332,9 @@ CONSTEXPR const group_suffix_info group_suffixes[] = {
 #define TYPES_hs_float(S, D) \
   S (f16), S (f32)
 
-/* _bf16
-    _f16  _f32
-    _s16  _s32
-    _u16  _u32.  */
 #define TYPES_hs_data(S, D) \
-  S (bf16), S (f16), S (f32), TYPES_hs_integer (S, D)
+  TYPES_h_data (S, D), \
+  TYPES_s_data (S, D)
 
 /* _u16 _u64.  */
 #define TYPES_hd_unsigned(S, D) \
@@ -328,10 +349,23 @@ CONSTEXPR const group_suffix_info group_suffixes[] = {
 #define TYPES_hsd_integer(S, D) \
   TYPES_hsd_signed (S, D), S (u16), S (u32), S (u64)
 
+#define TYPES_hsd_data(S, D) \
+  TYPES_h_data (S, D), \
+  TYPES_s_data (S, D), \
+  TYPES_d_data (S, D)
+
+/* _f16_mf8.  */
+#define TYPES_h_float_mf8(S, D) \
+  D (f16, mf8)
+
 /* _f32.  */
 #define TYPES_s_float(S, D) \
   S (f32)
 
+/* _f32_mf8.  */
+#define TYPES_s_float_mf8(S, D) \
+  D (f32, mf8)
+
 /*      _f32
    _s16 _s32 _s64
    _u16 _u32 _u64.  */
@@ -352,10 +386,17 @@ CONSTEXPR const group_suffix_info group_suffixes[] = {
 #define TYPES_s_unsigned(S, D) \
   S (u32)
 
-/* _s32 _u32.  */
+/* _s32
+   _u32.  */
 #define TYPES_s_integer(S, D) \
   TYPES_s_signed (S, D), TYPES_s_unsigned (S, D)
 
+/* _f32
+   _s32
+   _u32.  */
+#define TYPES_s_data(S, D) \
+  TYPES_s_float (S, D), TYPES_s_integer (S, D)
+
 /* _s32 _s64.  */
 #define TYPES_sd_signed(S, D) \
   S (s32), S (s64)
@@ -369,11 +410,9 @@ CONSTEXPR const group_suffix_info group_suffixes[] = {
 #define TYPES_sd_integer(S, D) \
   TYPES_sd_signed (S, D), TYPES_sd_unsigned (S, D)
 
-/* _f32 _f64
-   _s32 _s64
-   _u32 _u64.  */
 #define TYPES_sd_data(S, D) \
-  S (f32), S (f64), TYPES_sd_integer (S, D)
+  TYPES_s_data (S, D), \
+  TYPES_d_data (S, D)
 
 /* _f16 _f32 _f64
 	_s32 _s64
@@ -430,6 +469,10 @@ CONSTEXPR const group_suffix_info group_suffixes[] = {
 #define TYPES_cvt_h_s_float(S, D) \
   D (bf16, f32), D (f16, f32)
 
+/* _f32_f16.  */
+#define TYPES_cvt_f32_f16(S, D) \
+  D (f32, f16)
+
 /* _f32_f16
    _f64_f32.  */
 #define TYPES_cvt_long(S, D) \
@@ -453,6 +496,20 @@ CONSTEXPR const group_suffix_info group_suffixes[] = {
   D (f32, s32), \
   D (f32, u32)
 
+/* _f16_mf8
+   _bf16_mf8.  */
+#define TYPES_cvt_mf8(S, D) \
+  D (f16, mf8), D (bf16, mf8)
+
+/* _mf8_f16
+   _mf8_bf16.  */
+#define TYPES_cvtn_mf8(S, D) \
+  D (mf8, f16), D (mf8, bf16)
+
+/* _mf8_f32.  */
+#define TYPES_cvtnx_mf8(S, D) \
+  D (mf8, f32)
+
 /* { _s32 _s64 } x { _b8 _b16 _b32 _b64 }
    { _u32 _u64 }.  */
 #define TYPES_inc_dec_n1(D, A) \
@@ -512,16 +569,18 @@ CONSTEXPR const group_suffix_info group_suffixes[] = {
   D (u8, s32), \
   D (u16, s64)
 
-/* {     _bf16           }   {     _bf16           }
+/* { _mf8 _bf16          }   { _mf8 _bf16          }
    {      _f16 _f32 _f64 }   {      _f16 _f32 _f64 }
    { _s8  _s16 _s32 _s64 } x { _s8  _s16 _s32 _s64 }
    { _u8  _u16 _u32 _u64 }   { _u8  _u16 _u32 _u64 }.  */
 #define TYPES_reinterpret1(D, A) \
+  D (A, mf8), \
   D (A, bf16), \
   D (A, f16), D (A, f32), D (A, f64), \
   D (A, s8), D (A, s16), D (A, s32), D (A, s64), \
   D (A, u8), D (A, u16), D (A, u32), D (A, u64)
 #define TYPES_reinterpret(S, D) \
+  TYPES_reinterpret1 (D, mf8), \
   TYPES_reinterpret1 (D, bf16), \
   TYPES_reinterpret1 (D, f16), \
   TYPES_reinterpret1 (D, f32), \
@@ -605,6 +664,14 @@ CONSTEXPR const group_suffix_info group_suffixes[] = {
   TYPES_za_bhsd_data (S, D), \
   TYPES_reinterpret1 (D, za128)
 
+/* _za16_bf16.  */
+#define TYPES_za_h_bfloat(S, D) \
+  D (za16, bf16)
+
+/* _za16_f16.  */
+#define TYPES_za_h_float(S, D) \
+  D (za16, f16)
+
 /* _za32_s8.  */
 #define TYPES_za_s_b_signed(S, D) \
    D (za32, s8)
@@ -720,10 +787,15 @@ DEF_SVE_TYPES_ARRAY (bs_unsigned);
 DEF_SVE_TYPES_ARRAY (bhs_signed);
 DEF_SVE_TYPES_ARRAY (bhs_unsigned);
 DEF_SVE_TYPES_ARRAY (bhs_integer);
+DEF_SVE_TYPES_ARRAY (bh_data);
 DEF_SVE_TYPES_ARRAY (bhs_data);
 DEF_SVE_TYPES_ARRAY (bhs_widen);
 DEF_SVE_TYPES_ARRAY (c);
+DEF_SVE_TYPES_ARRAY (h_bfloat);
+DEF_SVE_TYPES_ARRAY (h_float);
+DEF_SVE_TYPES_ARRAY (h_float_mf8);
 DEF_SVE_TYPES_ARRAY (h_integer);
+DEF_SVE_TYPES_ARRAY (h_data);
 DEF_SVE_TYPES_ARRAY (hs_signed);
 DEF_SVE_TYPES_ARRAY (hs_integer);
 DEF_SVE_TYPES_ARRAY (hs_float);
@@ -731,12 +803,15 @@ DEF_SVE_TYPES_ARRAY (hs_data);
 DEF_SVE_TYPES_ARRAY (hd_unsigned);
 DEF_SVE_TYPES_ARRAY (hsd_signed);
 DEF_SVE_TYPES_ARRAY (hsd_integer);
+DEF_SVE_TYPES_ARRAY (hsd_data);
 DEF_SVE_TYPES_ARRAY (s_float);
 DEF_SVE_TYPES_ARRAY (s_float_hsd_integer);
+DEF_SVE_TYPES_ARRAY (s_float_mf8);
 DEF_SVE_TYPES_ARRAY (s_float_sd_integer);
 DEF_SVE_TYPES_ARRAY (s_signed);
 DEF_SVE_TYPES_ARRAY (s_unsigned);
 DEF_SVE_TYPES_ARRAY (s_integer);
+DEF_SVE_TYPES_ARRAY (s_data);
 DEF_SVE_TYPES_ARRAY (sd_signed);
 DEF_SVE_TYPES_ARRAY (sd_unsigned);
 DEF_SVE_TYPES_ARRAY (sd_integer);
@@ -749,10 +824,14 @@ DEF_SVE_TYPES_ARRAY (d_data);
 DEF_SVE_TYPES_ARRAY (cvt);
 DEF_SVE_TYPES_ARRAY (cvt_bfloat);
 DEF_SVE_TYPES_ARRAY (cvt_h_s_float);
+DEF_SVE_TYPES_ARRAY (cvt_f32_f16);
 DEF_SVE_TYPES_ARRAY (cvt_long);
+DEF_SVE_TYPES_ARRAY (cvt_mf8);
 DEF_SVE_TYPES_ARRAY (cvt_narrow_s);
 DEF_SVE_TYPES_ARRAY (cvt_narrow);
 DEF_SVE_TYPES_ARRAY (cvt_s_s);
+DEF_SVE_TYPES_ARRAY (cvtn_mf8);
+DEF_SVE_TYPES_ARRAY (cvtnx_mf8);
 DEF_SVE_TYPES_ARRAY (inc_dec_n);
 DEF_SVE_TYPES_ARRAY (qcvt_x2);
 DEF_SVE_TYPES_ARRAY (qcvt_x4);
@@ -770,6 +849,8 @@ DEF_SVE_TYPES_ARRAY (all_za);
 DEF_SVE_TYPES_ARRAY (d_za);
 DEF_SVE_TYPES_ARRAY (za_bhsd_data);
 DEF_SVE_TYPES_ARRAY (za_all_data);
+DEF_SVE_TYPES_ARRAY (za_h_bfloat);
+DEF_SVE_TYPES_ARRAY (za_h_float);
 DEF_SVE_TYPES_ARRAY (za_s_b_signed);
 DEF_SVE_TYPES_ARRAY (za_s_b_unsigned);
 DEF_SVE_TYPES_ARRAY (za_s_b_integer);
@@ -882,11 +963,16 @@ static const predication_index preds_z[] = { PRED_z, NUM_PREDS };
 /* Used by SME instructions that always merge into ZA.  */
 static const predication_index preds_za_m[] = { PRED_za_m, NUM_PREDS };
 
+#define NONSTREAMING_SVE(X) nonstreaming_only (AARCH64_FL_SVE | (X))
+#define SVE_AND_SME(X, Y) streaming_compatible (AARCH64_FL_SVE | (X), (Y))
+#define SSVE(X) SVE_AND_SME (X, X)
+
 /* A list of all arm_sve.h functions.  */
 static CONSTEXPR const function_group_info function_groups[] = {
-#define DEF_SVE_FUNCTION_GS(NAME, SHAPE, TYPES, GROUPS, PREDS) \
+#define DEF_SVE_FUNCTION_GS_FPM(NAME, SHAPE, TYPES, GROUPS, PREDS, FPM_MODE) \
   { #NAME, &functions::NAME, &shapes::SHAPE, types_##TYPES, groups_##GROUPS, \
-    preds_##PREDS, REQUIRED_EXTENSIONS },
+    preds_##PREDS, aarch64_required_extensions::REQUIRED_EXTENSIONS, \
+    FPM_##FPM_MODE },
 #include "aarch64-sve-builtins.def"
 };
 
@@ -894,7 +980,8 @@ static CONSTEXPR const function_group_info function_groups[] = {
 static CONSTEXPR const function_group_info neon_sve_function_groups[] = {
 #define DEF_NEON_SVE_FUNCTION(NAME, SHAPE, TYPES, GROUPS, PREDS) \
   { #NAME, &neon_sve_bridge_functions::NAME, &shapes::SHAPE, types_##TYPES, \
-    groups_##GROUPS, preds_##PREDS, 0 },
+    groups_##GROUPS, preds_##PREDS, aarch64_required_extensions::ssve (0), \
+    FPM_unused },
 #include "aarch64-neon-sve-bridge-builtins.def"
 };
 
@@ -902,10 +989,13 @@ static CONSTEXPR const function_group_info neon_sve_function_groups[] = {
 static CONSTEXPR const function_group_info sme_function_groups[] = {
 #define DEF_SME_FUNCTION_GS(NAME, SHAPE, TYPES, GROUPS, PREDS) \
   { #NAME, &functions::NAME, &shapes::SHAPE, types_##TYPES, groups_##GROUPS, \
-    preds_##PREDS, REQUIRED_EXTENSIONS },
+    preds_##PREDS, aarch64_required_extensions::REQUIRED_EXTENSIONS, \
+    FPM_unused },
 #define DEF_SME_ZA_FUNCTION_GS(NAME, SHAPE, TYPES, GROUPS, PREDS) \
   { #NAME, &functions::NAME##_za, &shapes::SHAPE, types_##TYPES, \
-    groups_##GROUPS, preds_##PREDS, (REQUIRED_EXTENSIONS | AARCH64_FL_ZA_ON) },
+    groups_##GROUPS, preds_##PREDS, \
+    aarch64_required_extensions::REQUIRED_EXTENSIONS \
+      .and_also (AARCH64_FL_ZA_ON), FPM_unused },
 #include "aarch64-sve-builtins-sme.def"
 };
 
@@ -949,15 +1039,18 @@ static GTY(()) hash_map<tree, registered_function *> *overload_names[2];
 
 /* Record that TYPE is an ABI-defined SVE type that contains NUM_ZR SVE vectors
    and NUM_PR SVE predicates.  MANGLED_NAME, if nonnull, is the ABI-defined
-   mangling of the type.  ACLE_NAME is the <arm_sve.h> name of the type.  */
-static void
+   mangling of the type.  mangling of the type.  ACLE_NAME is the <arm_sve.h>
+   name of the type, or null if <arm_sve.h> does not provide the type.  */
+void
 add_sve_type_attribute (tree type, unsigned int num_zr, unsigned int num_pr,
 			const char *mangled_name, const char *acle_name)
 {
   tree mangled_name_tree
     = (mangled_name ? get_identifier (mangled_name) : NULL_TREE);
+  tree acle_name_tree
+    = (acle_name ? get_identifier (acle_name) : NULL_TREE);
 
-  tree value = tree_cons (NULL_TREE, get_identifier (acle_name), NULL_TREE);
+  tree value = tree_cons (NULL_TREE, acle_name_tree, NULL_TREE);
   value = tree_cons (NULL_TREE, mangled_name_tree, value);
   value = tree_cons (NULL_TREE, size_int (num_pr), value);
   value = tree_cons (NULL_TREE, size_int (num_zr), value);
@@ -1044,14 +1137,6 @@ num_vectors_to_group (unsigned int nvectors)
   gcc_unreachable ();
 }
 
-/* Return the vector type associated with TYPE.  */
-static tree
-get_vector_type (sve_type type)
-{
-  auto vector_type = type_suffixes[type.type].vector_type;
-  return acle_vector_types[type.num_vectors - 1][vector_type];
-}
-
 /* If FNDECL is an SVE builtin, return its function instance, otherwise
    return null.  */
 const function_instance *
@@ -1069,67 +1154,46 @@ lookup_fndecl (tree fndecl)
 }
 
 
-/* Report that LOCATION has a call to FNDECL in which argument ARGNO
-   was not an integer constant expression.  ARGNO counts from zero.  */
-static void
-report_non_ice (location_t location, tree fndecl, unsigned int argno)
-{
-  error_at (location, "argument %d of %qE must be an integer constant"
-	    " expression", argno + 1, fndecl);
-}
-
-/* Report that LOCATION has a call to FNDECL in which argument ARGNO has
-   the value ACTUAL, whereas the function requires a value in the range
-   [MIN, MAX].  ARGNO counts from zero.  */
-static void
-report_out_of_range (location_t location, tree fndecl, unsigned int argno,
-		     HOST_WIDE_INT actual, HOST_WIDE_INT min,
-		     HOST_WIDE_INT max)
+/* Try to fold constant arguments ARG1 and ARG2 using the given tree_code.
+   Operations are not treated as overflowing.  */
+static tree
+aarch64_const_binop (enum tree_code code, tree arg1, tree arg2)
 {
-  if (min == max)
-    error_at (location, "passing %wd to argument %d of %qE, which expects"
-	      " the value %wd", actual, argno + 1, fndecl, min);
-  else
-    error_at (location, "passing %wd to argument %d of %qE, which expects"
-	      " a value in the range [%wd, %wd]", actual, argno + 1, fndecl,
-	      min, max);
+  if (poly_int_tree_p (arg1) && poly_int_tree_p (arg2))
+    {
+      poly_wide_int poly_res;
+      tree type = TREE_TYPE (arg1);
+      signop sign = TYPE_SIGN (type);
+      wi::overflow_type overflow = wi::OVF_NONE;
+
+      /* Return 0 for division by 0, like SDIV and UDIV do.  */
+      if (code == TRUNC_DIV_EXPR && integer_zerop (arg2))
+	return arg2;
+      /* Return 0 if shift amount is out of range. */
+      if (code == LSHIFT_EXPR
+	  && wi::geu_p (wi::to_wide (arg2), TYPE_PRECISION (type)))
+	return build_int_cst (type, 0);
+      if (!poly_int_binop (poly_res, code, arg1, arg2, sign, &overflow))
+	return NULL_TREE;
+      return force_fit_type (type, poly_res, false,
+			     TREE_OVERFLOW (arg1) | TREE_OVERFLOW (arg2));
+    }
+  return NULL_TREE;
 }
 
-/* Report that LOCATION has a call to FNDECL in which argument ARGNO has
-   the value ACTUAL, whereas the function requires either VALUE0 or
-   VALUE1.  ARGNO counts from zero.  */
-static void
-report_neither_nor (location_t location, tree fndecl, unsigned int argno,
-		    HOST_WIDE_INT actual, HOST_WIDE_INT value0,
-		    HOST_WIDE_INT value1)
-{
-  error_at (location, "passing %wd to argument %d of %qE, which expects"
-	    " either %wd or %wd", actual, argno + 1, fndecl, value0, value1);
-}
+/* Return the type that a vector base should have in a gather load or
+   scatter store involving vectors of type TYPE.  In an extending load,
+   TYPE is the result of the extension; in a truncating store, it is the
+   input to the truncation.
 
-/* Report that LOCATION has a call to FNDECL in which argument ARGNO has
-   the value ACTUAL, whereas the function requires one of VALUE0..3.
-   ARGNO counts from zero.  */
-static void
-report_not_one_of (location_t location, tree fndecl, unsigned int argno,
-		   HOST_WIDE_INT actual, HOST_WIDE_INT value0,
-		   HOST_WIDE_INT value1, HOST_WIDE_INT value2,
-		   HOST_WIDE_INT value3)
-{
-  error_at (location, "passing %wd to argument %d of %qE, which expects"
-	    " %wd, %wd, %wd or %wd", actual, argno + 1, fndecl, value0, value1,
-	    value2, value3);
-}
-
-/* Report that LOCATION has a call to FNDECL in which argument ARGNO has
-   the value ACTUAL, whereas the function requires a valid value of
-   enum type ENUMTYPE.  ARGNO counts from zero.  */
-static void
-report_not_enum (location_t location, tree fndecl, unsigned int argno,
-		 HOST_WIDE_INT actual, tree enumtype)
+   Index vectors have the same width as base vectors, but can be either
+   signed or unsigned.  */
+type_suffix_index
+function_shape::vector_base_type (type_suffix_index type) const
 {
-  error_at (location, "passing %wd to argument %d of %qE, which expects"
-	    " a valid %qT value", actual, argno + 1, fndecl, enumtype);
+  unsigned int required_bits = type_suffixes[type].element_bits;
+  gcc_assert (required_bits == 32 || required_bits == 64);
+  return required_bits == 32 ? TYPE_SUFFIX_u32 : TYPE_SUFFIX_u64;
 }
 
 /* Return a hash code for a function_instance.  */
@@ -1145,6 +1209,7 @@ function_instance::hash () const
   h.add_int (type_suffix_ids[1]);
   h.add_int (group_suffix_id);
   h.add_int (pred);
+  h.add_int (fpm_mode);
   return h.end ();
 }
 
@@ -1232,26 +1297,14 @@ registered_function_hasher::equal (value_type value, const compare_type &key)
   return value->instance == key;
 }
 
-sve_switcher::sve_switcher (aarch64_feature_flags flags)
-  : aarch64_simd_switcher (AARCH64_FL_F16 | AARCH64_FL_SVE | flags)
+sve_alignment_switcher::sve_alignment_switcher ()
 {
-  /* Changing the ISA flags and have_regs_of_mode should be enough here.
-     We shouldn't need to pay the compile-time cost of a full target
-     switch.  */
   m_old_maximum_field_alignment = maximum_field_alignment;
   maximum_field_alignment = 0;
-
-  memcpy (m_old_have_regs_of_mode, have_regs_of_mode,
-	  sizeof (have_regs_of_mode));
-  for (int i = 0; i < NUM_MACHINE_MODES; ++i)
-    if (aarch64_sve_mode_p ((machine_mode) i))
-      have_regs_of_mode[i] = true;
 }
 
-sve_switcher::~sve_switcher ()
+sve_alignment_switcher::~sve_alignment_switcher ()
 {
-  memcpy (have_regs_of_mode, m_old_have_regs_of_mode,
-	  sizeof (have_regs_of_mode));
   maximum_field_alignment = m_old_maximum_field_alignment;
 }
 
@@ -1259,7 +1312,7 @@ function_builder::function_builder (handle_pragma_index pragma_index,
 				    bool function_nulls)
 {
   m_overload_type = build_function_type (void_type_node, void_list_node);
-  m_direct_overloads = lang_GNU_CXX ();
+  m_direct_overloads = lang_GNU_CXX () || in_lto_p;
 
   if (initial_indexes[pragma_index] == 0)
     {
@@ -1331,6 +1384,8 @@ function_builder::get_name (const function_instance &instance,
   if (!overloaded_p || instance.shape->explicit_group_suffix_p ())
     append_name (instance.group_suffix ().string);
   append_name (pred_suffixes[instance.pred]);
+  if (instance.fpm_mode == FPM_set)
+    append_name ("_fpm");
   return finish_name ();
 }
 
@@ -1389,16 +1444,17 @@ add_shared_state_attribute (const char *name, bool is_in, bool is_out,
 }
 
 /* Return the appropriate function attributes for INSTANCE, which requires
-   the feature flags in REQUIRED_EXTENSIONS.  */
+   the architecture extensions in REQUIRED_EXTENSIONS.  */
 tree
 function_builder::get_attributes (const function_instance &instance,
-				  aarch64_feature_flags required_extensions)
+				  aarch64_required_extensions
+				    required_extensions)
 {
   tree attrs = NULL_TREE;
 
-  if (required_extensions & AARCH64_FL_SM_ON)
+  if (required_extensions.sm_off == 0)
     attrs = add_attribute ("arm", "streaming", NULL_TREE, attrs);
-  else if (!(required_extensions & AARCH64_FL_SM_OFF))
+  else if (required_extensions.sm_on != 0)
     attrs = add_attribute ("arm", "streaming_compatible", NULL_TREE, attrs);
 
   attrs = add_shared_state_attribute ("in", true, false,
@@ -1424,12 +1480,13 @@ function_builder::get_attributes (const function_instance &instance,
 
 /* Add a function called NAME with type FNTYPE and attributes ATTRS.
    INSTANCE describes what the function does and OVERLOADED_P indicates
-   whether it is overloaded.  REQUIRED_EXTENSIONS are the set of
-   architecture extensions that the function requires.  */
+   whether it is overloaded.  REQUIRED_EXTENSIONS describes the architecture
+   extensions that the function requires.  */
 registered_function &
 function_builder::add_function (const function_instance &instance,
 				const char *name, tree fntype, tree attrs,
-				aarch64_feature_flags required_extensions,
+				aarch64_required_extensions
+				  required_extensions,
 				bool overloaded_p,
 				bool placeholder_p)
 {
@@ -1469,7 +1526,7 @@ function_builder::add_function (const function_instance &instance,
 
 /* Add a built-in function for INSTANCE, with the argument types given
    by ARGUMENT_TYPES and the return type given by RETURN_TYPE.
-   REQUIRED_EXTENSIONS are the set of architecture extensions that the
+   REQUIRED_EXTENSIONS describes the architecture extensions that the
    function requires.  FORCE_DIRECT_OVERLOADS is true if there is a
    one-to-one mapping between "short" and "full" names, and if standard
    overload resolution therefore isn't necessary.  */
@@ -1478,7 +1535,7 @@ function_builder::
 add_unique_function (const function_instance &instance,
 		     tree return_type,
 		     vec<tree> &argument_types,
-		     aarch64_feature_flags required_extensions,
+		     aarch64_required_extensions required_extensions,
 		     bool force_direct_overloads)
 {
   /* Add the function under its full (unique) name.  */
@@ -1516,7 +1573,7 @@ add_unique_function (const function_instance &instance,
 }
 
 /* Add one function decl for INSTANCE, to be used with manual overload
-   resolution.  REQUIRED_EXTENSIONS are the set of architecture extensions
+   resolution.  REQUIRED_EXTENSIONS describes the architecture extensions
    that the function requires.
 
    For simplicity, deal with duplicate attempts to add the same function,
@@ -1527,7 +1584,7 @@ add_unique_function (const function_instance &instance,
 void
 function_builder::
 add_overloaded_function (const function_instance &instance,
-			 aarch64_feature_flags required_extensions)
+			 aarch64_required_extensions required_extensions)
 {
   auto &name_map = overload_names[m_function_nulls];
   if (!name_map)
@@ -1536,9 +1593,17 @@ add_overloaded_function (const function_instance &instance,
   char *name = get_name (instance, true);
   tree id = get_identifier (name);
   if (registered_function **map_value = name_map->get (id))
-    gcc_assert ((*map_value)->instance == instance
-		&& ((*map_value)->required_extensions
-		    & ~required_extensions) == 0);
+    {
+      auto &dst_extensions = (*map_value)->required_extensions;
+      /* Make sure that any streaming and streaming-compatible attributes
+	 on the function type are still correct.  (It might not matter if
+	 they aren't, so this could be relaxed in future if we're sure that
+	 it's safe.)  */
+      gcc_assert ((*map_value)->instance == instance
+		  && (dst_extensions.sm_off || !required_extensions.sm_off)
+		  && (dst_extensions.sm_on || !required_extensions.sm_on));
+      dst_extensions = dst_extensions.common_denominator (required_extensions);
+    }
   else
     {
       registered_function &rfn
@@ -1565,7 +1630,8 @@ function_builder::add_overloaded_functions (const function_group_info &group,
     {
       function_instance instance (group.base_name, *group.base,
 				  *group.shape, mode, types,
-				  group_suffix_id, group.preds[pi]);
+				  group_suffix_id, group.preds[pi],
+				  group.fpm_mode);
       add_overloaded_function (instance, group.required_extensions);
     };
 
@@ -1742,8 +1808,8 @@ function_resolver::lookup_form (mode_suffix_index mode,
 				group_suffix_index group)
 {
   type_suffix_pair types = { type0, type1 };
-  function_instance instance (base_name, base, shape, mode, types,
-			      group, pred);
+  function_instance instance (base_name, base, shape, mode, types, group, pred,
+			      fpm_mode);
   registered_function *rfn
     = function_table->find_with_hash (instance, instance.hash ());
   return rfn ? rfn->decl : NULL_TREE;
@@ -1933,10 +1999,12 @@ function_resolver::infer_64bit_scalar_integer_pair (unsigned int argno)
    corresponding type suffix.  Return that type suffix on success,
    otherwise report an error and return NUM_TYPE_SUFFIXES.
    GATHER_SCATTER_P is true if the function is a gather/scatter
-   operation, and so requires a pointer to 32-bit or 64-bit data.  */
+   operation.  RESTRICTIONS describes any additional restrictions
+   on the target type.  */
 type_suffix_index
 function_resolver::infer_pointer_type (unsigned int argno,
-				       bool gather_scatter_p)
+				       bool gather_scatter_p,
+				       target_type_restrictions restrictions)
 {
   tree actual = get_argument_type (argno);
   if (actual == error_mark_node)
@@ -1962,13 +2030,22 @@ function_resolver::infer_pointer_type (unsigned int argno,
       return NUM_TYPE_SUFFIXES;
     }
   unsigned int bits = type_suffixes[type].element_bits;
-  if (gather_scatter_p && bits != 32 && bits != 64)
+  if (restrictions == TARGET_32_64 && bits != 32 && bits != 64)
     {
       error_at (location, "passing %qT to argument %d of %qE, which"
 		" expects a pointer to 32-bit or 64-bit elements",
 		actual, argno + 1, fndecl);
       return NUM_TYPE_SUFFIXES;
     }
+  if (displacement_units () == UNITS_elements && bits == 8)
+    {
+      error_at (location, "passing %qT to argument %d of %qE, which"
+		" expects the data to be 16 bits or wider",
+		actual, argno + 1, fndecl);
+      inform (location, "use the %<offset%> rather than %<index%> form"
+	      " for 8-bit data");
+      return NUM_TYPE_SUFFIXES;
+    }
 
   return type;
 }
@@ -2079,7 +2156,7 @@ function_resolver::infer_neon128_vector_type (unsigned int argno)
       int neon_index = type_suffixes[suffix_i].neon128_type;
       if (neon_index != ARM_NEON_H_TYPES_LAST)
 	{
-	  tree type = aarch64_simd_types[neon_index].itype;
+	  tree type = aarch64_simd_types_trees[neon_index].itype;
 	  if (type && matches_type_p (type, actual))
 	    return type_suffix_index (suffix_i);
 	}
@@ -2700,7 +2777,8 @@ function_resolver::resolve_sv_displacement (unsigned int argno,
       return mode;
     }
 
-  unsigned int required_bits = type_suffixes[type].element_bits;
+  auto base_type = shape->vector_base_type (type);
+  unsigned int required_bits = type_suffixes[base_type].element_bits;
   if (required_bits == 32
       && displacement_units () == UNITS_elements
       && !lookup_form (MODE_s32index, type)
@@ -2759,7 +2837,8 @@ function_resolver::resolve_sv_displacement (unsigned int argno,
 	}
     }
 
-  if (type_suffix_ids[0] == NUM_TYPE_SUFFIXES)
+  if (type_suffix_ids[0] == NUM_TYPE_SUFFIXES
+      && shape->vector_base_type (TYPE_SUFFIX_u32) == TYPE_SUFFIX_u32)
     {
       /* TYPE has been inferred rather than specified by the user,
 	 so mention it in the error messages.  */
@@ -2850,11 +2929,7 @@ function_resolver::resolve_gather_address (unsigned int argno,
 	return MODE_none;
 
       /* Check whether the type is the right one.  */
-      unsigned int required_bits = type_suffixes[type].element_bits;
-      gcc_assert (required_bits == 32 || required_bits == 64);
-      type_suffix_index required_type = (required_bits == 32
-					 ? TYPE_SUFFIX_u32
-					 : TYPE_SUFFIX_u64);
+      auto required_type = shape->vector_base_type (type);
       if (required_type != base_type)
 	{
 	  error_at (location, "passing %qT to argument %d of %qE,"
@@ -2946,11 +3021,12 @@ function_resolver::check_gp_argument (unsigned int nops,
 {
   gcc_assert (pred != PRED_za_m);
   i = 0;
+  unsigned int nfpm_args = (fpm_mode == FPM_set)? 1:0;
   if (pred != PRED_none)
     {
       /* Unary merge operations should use resolve_unary instead.  */
       gcc_assert (!shape->has_merge_argument_p (*this, nops));
-      nargs = nops + 1;
+      nargs = nops + nfpm_args + 1;
       if (!check_num_arguments (nargs)
 	  || !require_vector_type (i, gp_type_index ()))
 	return false;
@@ -2958,7 +3034,7 @@ function_resolver::check_gp_argument (unsigned int nops,
     }
   else
     {
-      nargs = nops;
+      nargs = nops + nfpm_args;
       if (!check_num_arguments (nargs))
 	return false;
     }
@@ -3432,6 +3508,15 @@ is_ptrue (tree v, unsigned int step)
 	  && vector_cst_all_same (v, step));
 }
 
+/* Return true if V is a constant predicate that acts as a pfalse.  */
+bool
+is_pfalse (tree v)
+{
+  return (TREE_CODE (v) == VECTOR_CST
+	  && TYPE_MODE (TREE_TYPE (v)) == VNx16BImode
+	  && integer_zerop (v));
+}
+
 gimple_folder::gimple_folder (const function_instance &instance, tree fndecl,
 			      gimple_stmt_iterator *gsi_in, gcall *call_in)
   : function_call_info (gimple_location (call_in), instance, fndecl),
@@ -3503,6 +3588,7 @@ gimple_folder::redirect_call (const function_instance &instance)
     return NULL;
 
   gimple_call_set_fndecl (call, rfn->decl);
+  gimple_call_set_fntype (call, TREE_TYPE (rfn->decl));
   return call;
 }
 
@@ -3537,6 +3623,87 @@ gimple_folder::redirect_pred_x ()
   return redirect_call (instance);
 }
 
+/* Fold calls with predicate pfalse:
+   _m predication: lhs = op1.
+   _x or _z: lhs = {0, ...}.
+   Implicit predication that reads from memory: lhs = {0, ...}.
+   Implicit predication that writes to memory or prefetches: no-op.
+   Return the new gimple statement on success, else NULL.  */
+gimple *
+gimple_folder::fold_pfalse ()
+{
+  if (pred == PRED_none)
+    return nullptr;
+  tree arg0 = gimple_call_arg (call, 0);
+  if (pred == PRED_m)
+    {
+      /* Unary function shapes with _m predication are folded to the
+	 inactive vector (arg0), while other function shapes are folded
+	 to op1 (arg1).  */
+      tree arg1 = gimple_call_arg (call, 1);
+      if (is_pfalse (arg1))
+	return fold_call_to (arg0);
+      if (is_pfalse (arg0))
+	return fold_call_to (arg1);
+      return nullptr;
+    }
+  if ((pred == PRED_x || pred == PRED_z) && is_pfalse (arg0))
+    return fold_call_to (build_zero_cst (TREE_TYPE (lhs)));
+  if (pred == PRED_implicit && is_pfalse (arg0))
+    {
+      unsigned int flags = call_properties ();
+      /* Folding to lhs = {0, ...} is not appropriate for intrinsics with
+	 AGGREGATE types as lhs.  */
+      if ((flags & CP_READ_MEMORY)
+	  && !AGGREGATE_TYPE_P (TREE_TYPE (lhs)))
+	return fold_call_to (build_zero_cst (TREE_TYPE (lhs)));
+      if (flags & (CP_WRITE_MEMORY | CP_PREFETCH_MEMORY))
+	return fold_to_stmt_vops (gimple_build_nop ());
+    }
+  return nullptr;
+}
+
+/* Convert the lhs and all non-boolean vector-type operands to TYPE.
+   Pass the converted variables to the callback FP, and finally convert the
+   result back to the original type. Add the necessary conversion statements.
+   Return the new call. Note the tree argument to the callback FP, can only
+   be set once; it will always be a SSA_NAME.  */
+gimple *
+gimple_folder::convert_and_fold (tree type,
+				 gimple *(*fp) (gimple_folder &,
+						tree, vec<tree> &))
+{
+  gcc_assert (VECTOR_TYPE_P (type)
+	      && TYPE_MODE (type) != VNx16BImode);
+  tree old_ty = TREE_TYPE (lhs);
+  gimple_seq stmts = NULL;
+  bool convert_lhs_p = !useless_type_conversion_p (type, old_ty);
+  tree lhs_conv = convert_lhs_p ? make_ssa_name (type) : lhs;
+  unsigned int num_args = gimple_call_num_args (call);
+  auto_vec<tree, 16> args_conv;
+  args_conv.safe_grow (num_args);
+  for (unsigned int i = 0; i < num_args; ++i)
+    {
+      tree op = gimple_call_arg (call, i);
+      tree op_ty = TREE_TYPE (op);
+      args_conv[i] =
+	(VECTOR_TYPE_P (op_ty)
+	 && TYPE_MODE (op_ty) != VNx16BImode
+	 && !useless_type_conversion_p (op_ty, type))
+	? gimple_build (&stmts, VIEW_CONVERT_EXPR, type, op) : op;
+    }
+
+  gimple *new_stmt = fp (*this, lhs_conv, args_conv);
+  gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
+  if (convert_lhs_p)
+    {
+      tree t = build1 (VIEW_CONVERT_EXPR, old_ty, lhs_conv);
+      gimple *g = gimple_build_assign (lhs, VIEW_CONVERT_EXPR, t);
+      gsi_insert_after (gsi, g, GSI_SAME_STMT);
+    }
+  return new_stmt;
+}
+
 /* Fold the call to constant VAL.  */
 gimple *
 gimple_folder::fold_to_cstu (poly_uint64 val)
@@ -3593,6 +3760,73 @@ gimple_folder::fold_to_vl_pred (unsigned int vl)
   return gimple_build_assign (lhs, builder.build ());
 }
 
+/* Try to fold the call to a constant, given that, for integers, the call
+   is roughly equivalent to binary operation CODE.  aarch64_const_binop
+   handles any differences between CODE and the intrinsic.  */
+gimple *
+gimple_folder::fold_const_binary (enum tree_code code)
+{
+  gcc_assert (gimple_call_num_args (call) == 3);
+  tree pg = gimple_call_arg (call, 0);
+  tree op1 = gimple_call_arg (call, 1);
+  tree op2 = gimple_call_arg (call, 2);
+
+  if (type_suffix (0).integer_p
+      && (pred == PRED_x || is_ptrue (pg, type_suffix (0).element_bytes)))
+    if (tree res = vector_const_binop (code, op1, op2, aarch64_const_binop))
+      return gimple_build_assign (lhs, res);
+
+  return NULL;
+}
+
+/* Fold the active lanes to X and set the inactive lanes according to the
+   predication.  Return the new statement.  */
+gimple *
+gimple_folder::fold_active_lanes_to (tree x)
+{
+  /* If predication is _x or the predicate is ptrue, fold to X.  */
+  if (pred == PRED_x
+      || is_ptrue (gimple_call_arg (call, 0), type_suffix (0).element_bytes))
+    return gimple_build_assign (lhs, x);
+
+  /* If the predication is _z or _m, calculate a vector that supplies the
+     values of inactive lanes (the first vector argument for m and a zero
+     vector from z).  */
+  tree vec_inactive;
+  if (pred == PRED_z)
+    vec_inactive = build_zero_cst (TREE_TYPE (lhs));
+  else
+    vec_inactive = gimple_call_arg (call, 1);
+  if (operand_equal_p (x, vec_inactive, 0))
+    return gimple_build_assign (lhs, x);
+
+  gimple_seq stmts = NULL;
+  tree pred = convert_pred (stmts, vector_type (0), 0);
+  gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
+  return gimple_build_assign (lhs, VEC_COND_EXPR, pred, x, vec_inactive);
+}
+
+/* Fold call to assignment statement lhs = t.  */
+gimple *
+gimple_folder::fold_call_to (tree t)
+{
+  if (types_compatible_p (TREE_TYPE (lhs), TREE_TYPE (t)))
+    return fold_to_stmt_vops (gimple_build_assign (lhs, t));
+
+  tree rhs = build1 (VIEW_CONVERT_EXPR, TREE_TYPE (lhs), t);
+  return fold_to_stmt_vops (gimple_build_assign (lhs, VIEW_CONVERT_EXPR, rhs));
+}
+
+/* Fold call to G, incl. adjustments to the virtual operands.  */
+gimple *
+gimple_folder::fold_to_stmt_vops (gimple *g)
+{
+  gimple_seq stmts = NULL;
+  gimple_seq_add_stmt_without_update (&stmts, g);
+  gsi_replace_with_seq_vops (gsi, stmts);
+  return g;
+}
+
 /* Try to fold the call.  Return the new statement on success and null
    on failure.  */
 gimple *
@@ -3612,6 +3846,8 @@ gimple_folder::fold ()
   /* First try some simplifications that are common to many functions.  */
   if (auto *call = redirect_pred_x ())
     return call;
+  if (auto *call = fold_pfalse ())
+    return call;
 
   return base->fold (*this);
 }
@@ -3647,6 +3883,21 @@ function_expander::direct_optab_handler_for_sign (optab signed_op,
   return ::direct_optab_handler (op, mode);
 }
 
+/* Choose between signed and unsigned convert optabs SIGNED_OP and
+   UNSIGNED_OP based on the signedness of type suffix SUFFIX_I, then
+   pick the appropriate optab handler for "converting" from FROM_MODE
+   to TO_MODE.  */
+insn_code
+function_expander::convert_optab_handler_for_sign (optab signed_op,
+						   optab unsigned_op,
+						   unsigned int suffix_i,
+						   machine_mode to_mode,
+						   machine_mode from_mode)
+{
+  optab op = type_suffix (suffix_i).unsigned_p ? unsigned_op : signed_op;
+  return ::convert_optab_handler (op, to_mode, from_mode);
+}
+
 /* Return true if X overlaps any input.  */
 bool
 function_expander::overlaps_input_p (rtx x)
@@ -4138,9 +4389,12 @@ function_expander::use_vcond_mask_insn (insn_code icode,
 /* Implement the call using instruction ICODE, which loads memory operand 1
    into register operand 0 under the control of predicate operand 2.
    Extending loads have a further predicate (operand 3) that nominally
-   controls the extension.  */
+   controls the extension.
+   HAS_ELSE is true if the pattern has an additional operand that specifies
+   the values of inactive lanes.  This exists to match the general maskload
+   interface and is always zero for AArch64.  */
 rtx
-function_expander::use_contiguous_load_insn (insn_code icode)
+function_expander::use_contiguous_load_insn (insn_code icode, bool has_else)
 {
   machine_mode mem_mode = memory_vector_mode ();
 
@@ -4149,6 +4403,11 @@ function_expander::use_contiguous_load_insn (insn_code icode)
   add_input_operand (icode, args[0]);
   if (GET_MODE_UNIT_BITSIZE (mem_mode) < type_suffix (0).element_bits)
     add_input_operand (icode, CONSTM1_RTX (VNx16BImode));
+
+  /* If we have an else operand, add it.  */
+  if (has_else)
+    add_input_operand (icode, CONST0_RTX (mem_mode));
+
   return generate_insn (icode);
 }
 
@@ -4326,6 +4585,13 @@ function_expander::expand ()
   for (unsigned int i = 0; i < nargs; ++i)
     args.quick_push (expand_normal (CALL_EXPR_ARG (call_expr, i)));
 
+  if (fpm_mode == FPM_set)
+    {
+      /* The last element of these functions is always an fpm_t that must be
+         written to FPMR before the call to the instruction itself. */
+      gcc_assert (args.last ()->mode == DImode);
+      emit_move_insn (gen_rtx_REG (DImode, FPM_REGNUM), args.last ());
+    }
   return base->expand (*this);
 }
 
@@ -4376,6 +4642,8 @@ register_type_decl (tree type, const char *name)
 static void
 register_builtin_types ()
 {
+  sve_alignment_switcher switcher;
+
 #define DEF_SVE_TYPE(ACLE_NAME, NCHARS, ABI_NAME, SCALAR_TYPE) \
   scalar_types[VECTOR_TYPE_ ## ACLE_NAME] = SCALAR_TYPE;
 #include "aarch64-sve-builtins.def"
@@ -4398,6 +4666,9 @@ register_builtin_types ()
 	      vectype = build_truth_vector_type_for_mode (BYTES_PER_SVE_VECTOR,
 							  VNx16BImode);
 	      num_pr = 1;
+	      /* Leave svbool_t as indivisible for now.  We don't yet support
+		 C/C++ operators on predicates.  */
+	      TYPE_INDIVISIBLE_P (vectype) = 1;
 	    }
 	  else
 	    {
@@ -4414,12 +4685,12 @@ register_builtin_types ()
 			  && TYPE_ALIGN (vectype) == 128
 			  && known_eq (size, BITS_PER_SVE_VECTOR));
 	      num_zr = 1;
+	      TYPE_INDIVISIBLE_P (vectype) = 0;
 	    }
 	  vectype = build_distinct_type_copy (vectype);
 	  gcc_assert (vectype == TYPE_MAIN_VARIANT (vectype));
 	  SET_TYPE_STRUCTURAL_EQUALITY (vectype);
 	  TYPE_ARTIFICIAL (vectype) = 1;
-	  TYPE_INDIVISIBLE_P (vectype) = 1;
 	  make_type_sizeless (vectype);
 	}
       if (num_pr)
@@ -4447,7 +4718,7 @@ register_builtin_types ()
 void
 init_builtins ()
 {
-  sve_switcher sve;
+  aarch64_target_switcher switcher (AARCH64_FL_SVE);
   register_builtin_types ();
   if (in_lto_p)
     {
@@ -4563,7 +4834,8 @@ handle_arm_sve_h (bool function_nulls_p)
       return;
     }
 
-  sve_switcher sve;
+  aarch64_target_switcher switcher (AARCH64_FL_SVE);
+  sve_alignment_switcher alignment_switcher;
 
   /* Define the vector and tuple types.  */
   for (unsigned int type_i = 0; type_i < NUM_VECTOR_TYPES; ++type_i)
@@ -4572,7 +4844,7 @@ handle_arm_sve_h (bool function_nulls_p)
       register_vector_type (type);
       if (type != VECTOR_TYPE_svcount_t)
 	for (unsigned int count = 2; count <= MAX_TUPLE_SIZE; ++count)
-	  if (type != VECTOR_TYPE_svbool_t || count == 2)
+	  if (type != VECTOR_TYPE_svbool_t || count == 2 || count == 4)
 	    register_tuple_type (count, type);
     }
 
@@ -4594,6 +4866,8 @@ handle_arm_neon_sve_bridge_h (bool function_nulls_p)
   if (initial_indexes[arm_sme_handle] == 0)
     handle_arm_sme_h (true);
 
+  aarch64_target_switcher switcher;
+
   /* Define the functions.  */
   function_builder builder (arm_neon_sve_handle, function_nulls_p);
   for (unsigned int i = 0; i < ARRAY_SIZE (neon_sve_function_groups); ++i)
@@ -4621,7 +4895,7 @@ handle_arm_sme_h (bool function_nulls_p)
       return;
     }
 
-  sme_switcher sme;
+  aarch64_target_switcher switcher (AARCH64_FL_SME);
 
   function_builder builder (arm_sme_handle, function_nulls_p);
   for (unsigned int i = 0; i < ARRAY_SIZE (sme_function_groups); ++i)
@@ -4903,7 +5177,11 @@ bool
 verify_type_context (location_t loc, type_context_kind context,
 		     const_tree type, bool silent_p)
 {
-  if (!sizeless_type_p (type))
+  const_tree tmp = type;
+  if (omp_type_context (context) && POINTER_TYPE_P (type))
+    tmp = strip_pointer_types (tmp);
+
+  if (!sizeless_type_p (tmp))
     return true;
 
   switch (context)
@@ -4963,6 +5241,37 @@ verify_type_context (location_t loc, type_context_kind context,
       if (!silent_p)
 	error_at (loc, "capture by copy of SVE type %qT", type);
       return false;
+
+    case TCTX_OMP_MAP:
+      if (!silent_p)
+	error_at (loc, "SVE type %qT not allowed in %<map%> clause", type);
+      return false;
+
+    case TCTX_OMP_MAP_IMP_REF:
+      if (!silent_p)
+	error ("cannot reference %qT object types in %<target%> region", type);
+      return false;
+
+    case TCTX_OMP_PRIVATE:
+      if (!silent_p)
+	error_at (loc, "SVE type %qT not allowed in"
+		  " %<target%> %<private%> clause", type);
+      return false;
+
+    case TCTX_OMP_FIRSTPRIVATE:
+      if (!silent_p)
+	error_at (loc, "SVE type %qT not allowed in"
+		  " %<target%> %<firstprivate%> clause", type);
+      return false;
+
+    case TCTX_OMP_DEVICE_ADDR:
+      if (!silent_p)
+	error_at (loc, "SVE type %qT not allowed in"
+		  " %<target%> device clauses", type);
+      return false;
+
+    default:
+      break;
     }
   gcc_unreachable ();
 }
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.def b/gcc/config/aarch64/aarch64-sve-builtins.def
index a9243c4..df77497 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.def
+++ b/gcc/config/aarch64/aarch64-sve-builtins.def
@@ -1,5 +1,5 @@
 /* Builtin lists for AArch64 SVE
-   Copyright (C) 2018-2024 Free Software Foundation, Inc.
+   Copyright (C) 2018-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -37,8 +37,13 @@
 #define DEF_SVE_GROUP_SUFFIX(A, B, C)
 #endif
 
+#ifndef DEF_SVE_FUNCTION_GS_FPM
+#define DEF_SVE_FUNCTION_GS_FPM(A, B, C, D, E, F)
+#endif
+
 #ifndef DEF_SVE_FUNCTION_GS
-#define DEF_SVE_FUNCTION_GS(A, B, C, D, E)
+#define DEF_SVE_FUNCTION_GS(A, B, C, D, E) \
+  DEF_SVE_FUNCTION_GS_FPM(A, B, C, D, E, unused)
 #endif
 
 #ifndef DEF_SVE_NEON_TYPE_SUFFIX
@@ -81,6 +86,7 @@ DEF_SVE_MODE (vnum, none, none, vectors)
 
 DEF_SVE_TYPE (svbool_t, 10, __SVBool_t, boolean_type_node)
 DEF_SVE_TYPE (svcount_t, 11, __SVCount_t, boolean_type_node)
+DEF_SVE_TYPE (svmfloat8_t, 13, __SVMfloat8_t, aarch64_mfp8_type_node)
 DEF_SVE_TYPE (svbfloat16_t, 14, __SVBfloat16_t, bfloat16_type_node)
 DEF_SVE_TYPE (svfloat16_t, 13, __SVFloat16_t, aarch64_fp16_type_node)
 DEF_SVE_TYPE (svfloat32_t, 13, __SVFloat32_t, float_type_node)
@@ -107,6 +113,8 @@ DEF_SVE_TYPE_SUFFIX (c8, svcount_t, count, 8, VNx16BImode)
 DEF_SVE_TYPE_SUFFIX (c16, svcount_t, count, 16, VNx16BImode)
 DEF_SVE_TYPE_SUFFIX (c32, svcount_t, count, 32, VNx16BImode)
 DEF_SVE_TYPE_SUFFIX (c64, svcount_t, count, 64, VNx16BImode)
+DEF_SVE_NEON_TYPE_SUFFIX (mf8, svmfloat8_t, mfloat, 8, VNx16QImode,
+			  Mfloat8x8_t, Mfloat8x16_t)
 DEF_SVE_NEON_TYPE_SUFFIX (bf16, svbfloat16_t, bfloat, 16, VNx8BFmode,
 			  Bfloat16x4_t, Bfloat16x8_t)
 DEF_SVE_NEON_TYPE_SUFFIX (f16, svfloat16_t, float, 16, VNx8HFmode,
@@ -161,6 +169,7 @@ DEF_SVE_GROUP_SUFFIX (vg4x4, 4, 4)
 
 #undef DEF_SVE_FUNCTION
 #undef DEF_SVE_FUNCTION_GS
+#undef DEF_SVE_FUNCTION_GS_FPM
 #undef DEF_SVE_GROUP_SUFFIX
 #undef DEF_SME_ZA_SUFFIX
 #undef DEF_SVE_NEON_TYPE_SUFFIX
diff --git a/gcc/config/aarch64/aarch64-sve-builtins.h b/gcc/config/aarch64/aarch64-sve-builtins.h
index 9ab6f20..c145b80 100644
--- a/gcc/config/aarch64/aarch64-sve-builtins.h
+++ b/gcc/config/aarch64/aarch64-sve-builtins.h
@@ -1,5 +1,5 @@
 /* ACLE support for AArch64 SVE
-   Copyright (C) 2018-2024 Free Software Foundation, Inc.
+   Copyright (C) 2018-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -28,6 +28,7 @@
    - the "mode" suffix ("_n", "_index", etc.)
    - the type suffixes ("_s32", "_b8", etc.)
    - the predication suffix ("_x", "_z", etc.)
+   - the "_fpm" suffix when the floating point mode register is set
 
    Each piece of information is individually useful, so we retain this
    classification throughout:
@@ -42,6 +43,8 @@
    - prediction_index extends the predication suffix with an additional
      alternative: PRED_implicit for implicitly-predicated operations
 
+   - fpm_mode represents whether the fpm register is set or not
+
    In addition to its unique full name, a function may have a shorter
    overloaded alias.  This alias removes pieces of the suffixes that
    can be inferred from the arguments, such as by shortening the mode
@@ -164,6 +167,14 @@ enum predication_index
   NUM_PREDS
 };
 
+/* Classifies intrinsics on whether they set the FPM register */
+enum fpm_mode_index
+{
+  FPM_unused,
+  FPM_set,
+  NUM_FPM_MODES
+};
+
 /* Classifies element types, based on type suffixes with the bit count
    removed.  "count" isn't really an element type, but we pretend it is
    for consistency.  */
@@ -173,6 +184,7 @@ enum type_class_index
   TYPE_bfloat,
   TYPE_count,
   TYPE_float,
+  TYPE_mfloat,
   TYPE_signed,
   TYPE_unsigned,
   NUM_TYPE_CLASSES
@@ -363,9 +375,11 @@ struct function_group_info
   const group_suffix_index *groups;
   const predication_index *preds;
 
-  /* The architecture extensions that the functions require, as a set of
-     AARCH64_FL_* flags.  */
-  aarch64_feature_flags required_extensions;
+  /* The architecture extensions that the functions require.  */
+  aarch64_required_extensions required_extensions;
+
+  /* Whether the floating point register is set */
+  fpm_mode_index fpm_mode;
 };
 
 /* Describes a single fully-resolved function (i.e. one that has a
@@ -376,7 +390,7 @@ public:
   function_instance (const char *, const function_base *,
 		     const function_shape *, mode_suffix_index,
 		     const type_suffix_pair &, group_suffix_index,
-		     predication_index);
+		     predication_index, fpm_mode_index);
 
   bool operator== (const function_instance &) const;
   bool operator!= (const function_instance &) const;
@@ -420,6 +434,7 @@ public:
   type_suffix_pair type_suffix_ids;
   group_suffix_index group_suffix_id;
   predication_index pred;
+  fpm_mode_index fpm_mode;
 };
 
 class registered_function;
@@ -432,9 +447,9 @@ public:
   ~function_builder ();
 
   void add_unique_function (const function_instance &, tree,
-			    vec<tree> &, aarch64_feature_flags, bool);
+			    vec<tree> &, aarch64_required_extensions, bool);
   void add_overloaded_function (const function_instance &,
-				aarch64_feature_flags);
+				aarch64_required_extensions);
   void add_overloaded_functions (const function_group_info &,
 				 mode_suffix_index);
 
@@ -446,11 +461,11 @@ private:
 
   char *get_name (const function_instance &, bool);
 
-  tree get_attributes (const function_instance &, aarch64_feature_flags);
+  tree get_attributes (const function_instance &, aarch64_required_extensions);
 
   registered_function &add_function (const function_instance &,
 				     const char *, tree, tree,
-				     aarch64_feature_flags, bool, bool);
+				     aarch64_required_extensions, bool, bool);
 
   /* The function type to use for functions that are resolved by
      function_resolver.  */
@@ -489,6 +504,7 @@ public:
 class function_resolver : public function_call_info
 {
 public:
+  enum target_type_restrictions { TARGET_ANY, TARGET_32_64 };
   enum { SAME_SIZE = 256, HALF_SIZE, QUARTER_SIZE };
   static const type_class_index SAME_TYPE_CLASS = NUM_TYPE_CLASSES;
 
@@ -519,7 +535,8 @@ public:
   vector_type_index infer_predicate_type (unsigned int);
   type_suffix_index infer_integer_scalar_type (unsigned int);
   type_suffix_index infer_64bit_scalar_integer_pair (unsigned int);
-  type_suffix_index infer_pointer_type (unsigned int, bool = false);
+  type_suffix_index infer_pointer_type (unsigned int, bool = false,
+					target_type_restrictions = TARGET_ANY);
   sve_type infer_sve_type (unsigned int);
   sve_type infer_vector_or_tuple_type (unsigned int, unsigned int);
   type_suffix_index infer_vector_type (unsigned int);
@@ -631,11 +648,18 @@ public:
 
   gcall *redirect_call (const function_instance &);
   gimple *redirect_pred_x ();
+  gimple *fold_pfalse ();
+  gimple *convert_and_fold (tree, gimple *(*) (gimple_folder &,
+					       tree, vec<tree> &));
 
   gimple *fold_to_cstu (poly_uint64);
   gimple *fold_to_pfalse ();
   gimple *fold_to_ptrue ();
   gimple *fold_to_vl_pred (unsigned int);
+  gimple *fold_const_binary (enum tree_code);
+  gimple *fold_active_lanes_to (tree);
+  gimple *fold_call_to (tree);
+  gimple *fold_to_stmt_vops (gimple *);
 
   gimple *fold ();
 
@@ -659,6 +683,8 @@ public:
   insn_code direct_optab_handler (optab, unsigned int = 0);
   insn_code direct_optab_handler_for_sign (optab, optab, unsigned int = 0,
 					   machine_mode = E_VOIDmode);
+  insn_code convert_optab_handler_for_sign (optab, optab, unsigned int,
+					    machine_mode, machine_mode);
 
   machine_mode result_mode () const;
 
@@ -691,7 +717,7 @@ public:
   rtx use_pred_x_insn (insn_code);
   rtx use_cond_insn (insn_code, unsigned int = DEFAULT_MERGE_ARGNO);
   rtx use_vcond_mask_insn (insn_code, unsigned int = DEFAULT_MERGE_ARGNO);
-  rtx use_contiguous_load_insn (insn_code);
+  rtx use_contiguous_load_insn (insn_code, bool = false);
   rtx use_contiguous_prefetch_insn (insn_code);
   rtx use_contiguous_store_insn (insn_code);
 
@@ -781,6 +807,8 @@ public:
      more common than false, so provide a default definition.  */
   virtual bool explicit_group_suffix_p () const { return true; }
 
+  virtual type_suffix_index vector_base_type (type_suffix_index) const;
+
   /* Define all functions associated with the given group.  */
   virtual void build (function_builder &,
 		      const function_group_info &) const = 0;
@@ -794,24 +822,17 @@ public:
   virtual bool check (function_checker &) const { return true; }
 };
 
-/* RAII class for enabling enough SVE features to define the built-in
-   types and implement the arm_sve.h pragma.  */
-class sve_switcher : public aarch64_simd_switcher
+/* RAII class for temporarily disabling the effect of any -fpack-struct option.
+   This is used to ensure that sve vector tuple types are defined with the
+   correct alignment.  */
+class sve_alignment_switcher
 {
 public:
-  sve_switcher (aarch64_feature_flags = 0);
-  ~sve_switcher ();
+  sve_alignment_switcher ();
+  ~sve_alignment_switcher ();
 
 private:
   unsigned int m_old_maximum_field_alignment;
-  bool m_old_have_regs_of_mode[MAX_MACHINE_MODE];
-};
-
-/* Extends sve_switch enough for defining arm_sme.h.  */
-class sme_switcher : public sve_switcher
-{
-public:
-  sme_switcher () : sve_switcher (AARCH64_FL_SME) {}
 };
 
 extern const type_suffix_info type_suffixes[NUM_TYPE_SUFFIXES + 1];
@@ -825,6 +846,7 @@ extern tree acle_svprfop;
 
 bool vector_cst_all_same (tree, unsigned int);
 bool is_ptrue (tree, unsigned int);
+bool is_pfalse (tree);
 const function_instance *lookup_fndecl (tree);
 
 /* Try to find a mode with the given mode_suffix_info fields.  Return the
@@ -867,17 +889,24 @@ tuple_type_field (tree type)
   gcc_unreachable ();
 }
 
+/* Return the vector type associated with TYPE.  */
+inline tree
+get_vector_type (sve_type type)
+{
+  auto vector_type = type_suffixes[type.type].vector_type;
+  return acle_vector_types[type.num_vectors - 1][vector_type];
+}
+
 inline function_instance::
-function_instance (const char *base_name_in,
-		   const function_base *base_in,
+function_instance (const char *base_name_in, const function_base *base_in,
 		   const function_shape *shape_in,
 		   mode_suffix_index mode_suffix_id_in,
 		   const type_suffix_pair &type_suffix_ids_in,
 		   group_suffix_index group_suffix_id_in,
-		   predication_index pred_in)
+		   predication_index pred_in, fpm_mode_index fpm_mode_in)
   : base_name (base_name_in), base (base_in), shape (shape_in),
     mode_suffix_id (mode_suffix_id_in), group_suffix_id (group_suffix_id_in),
-    pred (pred_in)
+    pred (pred_in), fpm_mode (fpm_mode_in)
 {
   memcpy (type_suffix_ids, type_suffix_ids_in, sizeof (type_suffix_ids));
 }
@@ -891,7 +920,8 @@ function_instance::operator== (const function_instance &other) const
 	  && type_suffix_ids[0] == other.type_suffix_ids[0]
 	  && type_suffix_ids[1] == other.type_suffix_ids[1]
 	  && group_suffix_id == other.group_suffix_id
-	  && pred == other.pred);
+	  && pred == other.pred
+	  && fpm_mode == other.fpm_mode);
 }
 
 inline bool
diff --git a/gcc/config/aarch64/aarch64-sve.md b/gcc/config/aarch64/aarch64-sve.md
index a5cd42b..6b5113e 100644
--- a/gcc/config/aarch64/aarch64-sve.md
+++ b/gcc/config/aarch64/aarch64-sve.md
@@ -1,5 +1,5 @@
 ;; Machine description for AArch64 SVE.
-;; Copyright (C) 2009-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2009-2025 Free Software Foundation, Inc.
 ;; Contributed by ARM Ltd.
 ;;
 ;; This file is part of GCC.
@@ -124,7 +124,6 @@
 ;;
 ;; == Comparisons and selects
 ;; ---- [INT,FP] Select based on predicates
-;; ---- [INT,FP] Compare and select
 ;; ---- [INT] Comparisons
 ;; ---- [INT] While tests
 ;; ---- [FP] Direct comparisons
@@ -155,8 +154,10 @@
 ;; ---- [FP<-INT] Packs
 ;; ---- [FP<-INT] Unpacks
 ;; ---- [FP<-FP] Packs
+;; ---- [FP<-FP] Truncating conversions
 ;; ---- [FP<-FP] Packs (bfloat16)
 ;; ---- [FP<-FP] Unpacks
+;; ---- [FP<-FP] Extending conversions
 ;; ---- [PRED<-PRED] Packs
 ;; ---- [PRED<-PRED] Unpacks
 ;;
@@ -703,6 +704,23 @@
   }
 )
 
+;; Fold predicated loads/stores with a PTRUE predicate to unpredicated
+;; loads/stores after RA.
+(define_insn_and_split "*aarch64_sve_ptrue<mode>_ldr_str"
+  [(set (match_operand:SVE_FULL 0 "aarch64_sve_nonimmediate_operand" "=Utr,w")
+	(unspec:SVE_FULL
+	  [(match_operand:<VPRED> 1 "aarch64_simd_imm_one")
+	   (match_operand:SVE_FULL 2 "aarch64_sve_nonimmediate_operand" "w,Utr")]
+	   UNSPEC_PRED_X))]
+  "TARGET_SVE && reload_completed
+   && (<MODE>mode == VNx16QImode || !BYTES_BIG_ENDIAN)
+   && ((REG_P (operands[0]) && MEM_P (operands[2]))
+       || (REG_P (operands[2]) && MEM_P (operands[0])))"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(match_dup 2))])
+
 ;; Unpredicated moves that cannot use LDR and STR, i.e. partial vectors
 ;; or vectors for which little-endian ordering isn't acceptable.  Memory
 ;; accesses require secondary reloads.
@@ -1074,9 +1092,9 @@
 ;; ---- Moves of multiple predicates
 ;; -------------------------------------------------------------------------
 
-(define_insn_and_split "movvnx32bi"
-  [(set (match_operand:VNx32BI 0 "nonimmediate_operand")
-	(match_operand:VNx32BI 1 "aarch64_mov_operand"))]
+(define_insn_and_split "mov<mode>"
+  [(set (match_operand:SVE_STRUCT_BI 0 "nonimmediate_operand")
+	(match_operand:SVE_STRUCT_BI 1 "aarch64_mov_operand"))]
   "TARGET_SVE"
   {@ [ cons: =0 , 1   ]
      [ Upa      , Upa ] #
@@ -1086,7 +1104,7 @@
   "&& reload_completed"
   [(const_int 0)]
   {
-    aarch64_split_double_move (operands[0], operands[1], VNx16BImode);
+    aarch64_split_move (operands[0], operands[1], VNx16BImode);
     DONE;
   }
 )
@@ -1287,11 +1305,29 @@
 ;; -------------------------------------------------------------------------
 
 ;; Predicated LD1 (single).
-(define_insn "maskload<mode><vpred>"
+(define_expand "maskload<mode><vpred>"
+  [(set (match_operand:SVE_ALL 0 "register_operand")
+	(unspec:SVE_ALL
+	  [(match_operand:<VPRED> 2 "nonmemory_operand")
+	   (match_operand:SVE_ALL 1 "memory_operand")
+	   (match_operand:SVE_ALL 3 "aarch64_maskload_else_operand")]
+	  UNSPEC_LD1_SVE))]
+  "TARGET_SVE"
+  {
+    if (aarch64_expand_maskloadstore (operands, <MODE>mode))
+      DONE;
+    if (CONSTANT_P (operands[2]))
+      operands[2] = force_reg (<VPRED>mode, operands[2]);
+  }
+)
+
+;; Predicated LD1 (single).
+(define_insn "*aarch64_maskload<mode><vpred>"
   [(set (match_operand:SVE_ALL 0 "register_operand" "=w")
 	(unspec:SVE_ALL
 	  [(match_operand:<VPRED> 2 "register_operand" "Upl")
-	   (match_operand:SVE_ALL 1 "memory_operand" "m")]
+	   (match_operand:SVE_ALL 1 "memory_operand" "m")
+	   (match_operand:SVE_ALL 3 "aarch64_maskload_else_operand")]
 	  UNSPEC_LD1_SVE))]
   "TARGET_SVE"
   "ld1<Vesize>\t%0.<Vctype>, %2/z, %1"
@@ -1302,11 +1338,13 @@
   [(set (match_operand:SVE_STRUCT 0 "register_operand")
 	(unspec:SVE_STRUCT
 	  [(match_dup 2)
-	   (match_operand:SVE_STRUCT 1 "memory_operand")]
+	   (match_operand:SVE_STRUCT 1 "memory_operand")
+	   (match_dup 3)]
 	  UNSPEC_LDN))]
   "TARGET_SVE"
   {
     operands[2] = aarch64_ptrue_reg (<VPRED>mode);
+    operands[3] = CONST0_RTX (<VSINGLE>mode);
   }
 )
 
@@ -1315,7 +1353,8 @@
   [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w")
 	(unspec:SVE_STRUCT
 	  [(match_operand:<VPRED> 2 "register_operand" "Upl")
-	   (match_operand:SVE_STRUCT 1 "memory_operand" "m")]
+	   (match_operand:SVE_STRUCT 1 "memory_operand" "m")
+	   (match_operand:<VSINGLE> 3 "aarch64_maskload_else_operand")]
 	  UNSPEC_LDN))]
   "TARGET_SVE"
   "ld<vector_count><Vesize>\t%0, %2/z, %1"
@@ -1334,15 +1373,16 @@
 ;; -------------------------------------------------------------------------
 
 ;; Predicated load and extend, with 8 elements per 128-bit block.
-(define_insn_and_rewrite "@aarch64_load<SVE_PRED_LOAD:pred_load>_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>"
+(define_insn_and_rewrite "@aarch64_load_<ANY_EXTEND:optab><SVE_HSDI:mode><SVE_PARTIAL_I:mode>"
   [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
 	(unspec:SVE_HSDI
 	  [(match_operand:<SVE_HSDI:VPRED> 3 "general_operand" "UplDnm")
 	   (ANY_EXTEND:SVE_HSDI
 	     (unspec:SVE_PARTIAL_I
 	       [(match_operand:<SVE_PARTIAL_I:VPRED> 2 "register_operand" "Upl")
-		(match_operand:SVE_PARTIAL_I 1 "memory_operand" "m")]
-	       SVE_PRED_LOAD))]
+		(match_operand:SVE_PARTIAL_I 1 "memory_operand" "m")
+		(match_operand:SVE_PARTIAL_I 4 "aarch64_maskload_else_operand")]
+	       UNSPEC_LD1_SVE))]
 	  UNSPEC_PRED_X))]
   "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
   "ld1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vctype>, %2/z, %1"
@@ -1352,6 +1392,26 @@
   }
 )
 
+;; Same as above without the maskload_else_operand to still allow combine to
+;; match a sign-extended pred_mov pattern.
+(define_insn_and_rewrite "*aarch64_load_<ANY_EXTEND:optab>_mov<SVE_HSDI:mode><SVE_PARTIAL_I:mode>"
+  [(set (match_operand:SVE_HSDI 0 "register_operand" "=w")
+	(unspec:SVE_HSDI
+	  [(match_operand:<SVE_HSDI:VPRED> 3 "general_operand" "UplDnm")
+	   (ANY_EXTEND:SVE_HSDI
+	     (unspec:SVE_PARTIAL_I
+	       [(match_operand:<SVE_PARTIAL_I:VPRED> 2 "register_operand" "Upl")
+		(match_operand:SVE_PARTIAL_I 1 "memory_operand" "m")]
+		UNSPEC_PRED_X))]
+	   UNSPEC_PRED_X))]
+  "TARGET_SVE && (~<SVE_HSDI:narrower_mask> & <SVE_PARTIAL_I:self_mask>) == 0"
+  "ld1<ANY_EXTEND:s><SVE_PARTIAL_I:Vesize>\t%0.<SVE_HSDI:Vctype>, %2/z, %1"
+  "&& !CONSTANT_P (operands[3])"
+  {
+    operands[3] = CONSTM1_RTX (<SVE_HSDI:VPRED>mode);
+  }
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- First-faulting contiguous loads
 ;; -------------------------------------------------------------------------
@@ -1433,7 +1493,8 @@
   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
 	(unspec:SVE_FULL
 	  [(match_operand:<VPRED> 2 "register_operand" "Upl")
-	   (match_operand:SVE_FULL 1 "memory_operand" "m")]
+	   (match_operand:SVE_FULL 1 "memory_operand" "m")
+	   (match_operand:SVE_FULL 3 "aarch64_maskload_else_operand")]
 	  UNSPEC_LDNT1_SVE))]
   "TARGET_SVE"
   "ldnt1<Vesize>\t%0.<Vetype>, %2/z, %1"
@@ -1456,11 +1517,13 @@
 	   (match_operand:<V_INT_CONTAINER> 2 "register_operand")
 	   (match_operand:DI 3 "const_int_operand")
 	   (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
+	   (match_dup 6)
 	   (mem:BLK (scratch))]
 	  UNSPEC_LD1_GATHER))]
   "TARGET_SVE && TARGET_NON_STREAMING"
   {
     operands[5] = aarch64_ptrue_reg (<VPRED>mode);
+    operands[6] = CONST0_RTX (<MODE>mode);
   }
 )
 
@@ -1474,6 +1537,7 @@
 	   (match_operand:VNx4SI 2 "register_operand")
 	   (match_operand:DI 3 "const_int_operand")
 	   (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
+	   (match_operand:SVE_4 6 "aarch64_maskload_else_operand")
 	   (mem:BLK (scratch))]
 	  UNSPEC_LD1_GATHER))]
   "TARGET_SVE && TARGET_NON_STREAMING"
@@ -1503,6 +1567,7 @@
 	   (match_operand:VNx2DI 2 "register_operand")
 	   (match_operand:DI 3 "const_int_operand")
 	   (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
+	   (match_operand:SVE_2 6 "aarch64_maskload_else_operand")
 	   (mem:BLK (scratch))]
 	  UNSPEC_LD1_GATHER))]
   "TARGET_SVE && TARGET_NON_STREAMING"
@@ -1531,6 +1596,7 @@
 	     UNSPEC_PRED_X)
 	   (match_operand:DI 3 "const_int_operand")
 	   (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
+	   (match_operand:SVE_2 7 "aarch64_maskload_else_operand")
 	   (mem:BLK (scratch))]
 	  UNSPEC_LD1_GATHER))]
   "TARGET_SVE && TARGET_NON_STREAMING"
@@ -1561,6 +1627,7 @@
 	     UNSPEC_PRED_X)
 	   (match_operand:DI 3 "const_int_operand")
 	   (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
+	   (match_operand:SVE_2 7 "aarch64_maskload_else_operand")
 	   (mem:BLK (scratch))]
 	  UNSPEC_LD1_GATHER))]
   "TARGET_SVE && TARGET_NON_STREAMING"
@@ -1588,6 +1655,7 @@
 	     (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
 	   (match_operand:DI 3 "const_int_operand")
 	   (match_operand:DI 4 "aarch64_gather_scale_operand_<Vesize>")
+	   (match_operand:SVE_2 7 "aarch64_maskload_else_operand")
 	   (mem:BLK (scratch))]
 	  UNSPEC_LD1_GATHER))]
   "TARGET_SVE && TARGET_NON_STREAMING"
@@ -1624,6 +1692,7 @@
 		(match_operand:VNx4SI 2 "register_operand")
 		(match_operand:DI 3 "const_int_operand")
 		(match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_4BHI:Vesize>")
+		(match_operand:SVE_4BHI 7 "aarch64_maskload_else_operand")
 		(mem:BLK (scratch))]
 	       UNSPEC_LD1_GATHER))]
 	  UNSPEC_PRED_X))]
@@ -1663,6 +1732,7 @@
 		(match_operand:VNx2DI 2 "register_operand")
 		(match_operand:DI 3 "const_int_operand")
 		(match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>")
+		(match_operand:SVE_2BHSI 7 "aarch64_maskload_else_operand")
 		(mem:BLK (scratch))]
 	       UNSPEC_LD1_GATHER))]
 	  UNSPEC_PRED_X))]
@@ -1701,6 +1771,7 @@
 		  UNSPEC_PRED_X)
 		(match_operand:DI 3 "const_int_operand")
 		(match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>")
+		(match_operand:SVE_2BHSI 8 "aarch64_maskload_else_operand")
 		(mem:BLK (scratch))]
 	       UNSPEC_LD1_GATHER))]
 	  UNSPEC_PRED_X))]
@@ -1738,6 +1809,7 @@
 		  UNSPEC_PRED_X)
 		(match_operand:DI 3 "const_int_operand")
 		(match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>")
+		(match_operand:SVE_2BHSI 8 "aarch64_maskload_else_operand")
 		(mem:BLK (scratch))]
 	       UNSPEC_LD1_GATHER))]
 	  UNSPEC_PRED_X))]
@@ -1772,6 +1844,7 @@
 		  (match_operand:VNx2DI 6 "aarch64_sve_uxtw_immediate"))
 		(match_operand:DI 3 "const_int_operand")
 		(match_operand:DI 4 "aarch64_gather_scale_operand_<SVE_2BHSI:Vesize>")
+		(match_operand:SVE_2BHSI 8 "aarch64_maskload_else_operand")
 		(mem:BLK (scratch))]
 	       UNSPEC_LD1_GATHER))]
 	  UNSPEC_PRED_X))]
@@ -2250,7 +2323,24 @@
 ;; -------------------------------------------------------------------------
 
 ;; Predicated ST1 (single).
-(define_insn "maskstore<mode><vpred>"
+(define_expand "maskstore<mode><vpred>"
+  [(set (match_operand:SVE_ALL 0 "memory_operand")
+	(unspec:SVE_ALL
+	  [(match_operand:<VPRED> 2 "nonmemory_operand")
+	   (match_operand:SVE_ALL 1 "register_operand")
+	   (match_dup 0)]
+	  UNSPEC_ST1_SVE))]
+  "TARGET_SVE"
+  {
+    if (aarch64_expand_maskloadstore (operands, <MODE>mode))
+      DONE;
+    if (CONSTANT_P (operands[2]))
+      operands[2] = force_reg (<VPRED>mode, operands[2]);
+  }
+)
+
+;; Predicated ST1 (single).
+(define_insn "*aarch64_maskstore<mode><vpred>"
   [(set (match_operand:SVE_ALL 0 "memory_operand" "+m")
 	(unspec:SVE_ALL
 	  [(match_operand:<VPRED> 2 "register_operand" "Upl")
@@ -2802,6 +2892,39 @@
   }
 )
 
+;; Vector constructor combining two half vectors { a, b }
+(define_expand "vec_init<mode><Vhalf>"
+  [(match_operand:SVE_NO2E 0 "register_operand")
+   (match_operand 1 "")]
+  "TARGET_SVE"
+  {
+    aarch64_sve_expand_vector_init_subvector (operands[0], operands[1]);
+    DONE;
+  }
+)
+
+;; Vector constructor combining four quad vectors { a, b, c, d }
+(define_expand "vec_init<mode><Vquad>"
+  [(match_operand:SVE_NO4E 0 "register_operand")
+   (match_operand 1 "")]
+  "TARGET_SVE"
+  {
+    aarch64_sve_expand_vector_init_subvector (operands[0], operands[1]);
+    DONE;
+  }
+)
+
+;; Vector constructor combining eight vectors { a, b, c, d, ... }
+(define_expand "vec_initvnx16qivnx2qi"
+  [(match_operand:VNx16QI 0 "register_operand")
+   (match_operand 1 "")]
+  "TARGET_SVE"
+  {
+    aarch64_sve_expand_vector_init_subvector (operands[0], operands[1]);
+    DONE;
+  }
+)
+
 ;; Shift an SVE vector left and insert a scalar into element 0.
 (define_insn "vec_shl_insert_<mode>"
   [(set (match_operand:SVE_FULL 0 "register_operand")
@@ -2899,10 +3022,11 @@
   {
     poly_int64 val;
     if (poly_int_rtx_p (operands[2], &val)
-	&& known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1))
+	&& known_eq (val, GET_MODE_NUNITS (<MODE>mode) - 1)
+	&& !val.is_constant ())
       {
-	/* The last element can be extracted with a LASTB and a false
-	   predicate.  */
+	/* For VLA, extract the last element with a LASTB and a false
+	   predicate. */
 	rtx sel = aarch64_pfalse_reg (<VPRED>mode);
 	emit_insn (gen_extract_last_<mode> (operands[0], sel, operands[1]));
 	DONE;
@@ -3037,12 +3161,12 @@
   [(set (match_operand:<VEL> 0 "register_operand")
 	(unspec:<VEL>
 	  [(match_operand:<VPRED> 1 "register_operand")
-	   (match_operand:SVE_FULL 2 "register_operand")]
+	   (match_operand:SVE_ALL 2 "register_operand")]
 	  LAST))]
   "TARGET_SVE"
   {@ [ cons: =0 , 1   , 2  ]
-     [ ?r       , Upl , w  ] last<ab>\t%<vwcore>0, %1, %2.<Vetype>
-     [ w        , Upl , w  ] last<ab>\t%<Vetype>0, %1, %2.<Vetype>
+     [ ?r       , Upl , w  ] last<ab>\t%<vccore>0, %1, %2.<Vctype>
+     [ w        , Upl , w  ] last<ab>\t%<Vctype>0, %1, %2.<Vctype>
   }
 )
 
@@ -3063,9 +3187,9 @@
   "TARGET_SVE"
   {
     rtx tmp = gen_reg_rtx (<MODE>mode);
-    emit_insn (gen_vcond_mask_<mode><vpred> (tmp, operands[1],
-					     CONST1_RTX (<MODE>mode),
-					     CONST0_RTX (<MODE>mode)));
+    emit_insn (gen_vcond_mask_<mode><vpred> (tmp, CONST1_RTX (<MODE>mode),
+					     CONST0_RTX (<MODE>mode),
+					     operands[1]));
     emit_insn (gen_vec_extract<mode><Vel> (operands[0], tmp, operands[2]));
     DONE;
   }
@@ -3088,6 +3212,23 @@
 ;; - NOT
 ;; -------------------------------------------------------------------------
 
+(define_expand "ctz<mode>2"
+  [(set (match_operand:SVE_I 0 "register_operand")
+	(unspec:SVE_I
+	  [(match_dup 2)
+	   (ctz:SVE_I
+	     (match_operand:SVE_I 1 "register_operand"))]
+	  UNSPEC_PRED_X))]
+  "TARGET_SVE"
+  {
+     rtx pred = aarch64_ptrue_reg (<VPRED>mode);
+     rtx temp = gen_reg_rtx (<MODE>mode);
+     emit_insn (gen_aarch64_pred_rbit<mode> (temp, pred, operands[1]));
+     emit_insn (gen_aarch64_pred_clz<mode> (operands[0], pred, temp));
+     DONE;
+  }
+)
+
 ;; Unpredicated integer unary arithmetic.
 (define_expand "<optab><mode>2"
   [(set (match_operand:SVE_I 0 "register_operand")
@@ -3178,6 +3319,61 @@
 ;; - REVW
 ;; -------------------------------------------------------------------------
 
+(define_split
+  [(set (match_operand:SVE_FULL_HSDI 0 "register_operand")
+	(rotate:SVE_FULL_HSDI
+	  (match_operand:SVE_FULL_HSDI 1 "register_operand")
+	  (match_operand:SVE_FULL_HSDI 2 "aarch64_constant_vector_operand")))]
+  "TARGET_SVE && can_create_pseudo_p ()"
+  [(set (match_dup 3)
+	(ashift:SVE_FULL_HSDI (match_dup 1)
+			      (match_dup 2)))
+   (set (match_dup 0)
+	(plus:SVE_FULL_HSDI
+	  (lshiftrt:SVE_FULL_HSDI (match_dup 1)
+				  (match_dup 4))
+	  (match_dup 3)))]
+  {
+    if (aarch64_emit_opt_vec_rotate (operands[0], operands[1], operands[2]))
+      DONE;
+
+    if (!TARGET_SVE2)
+      FAIL;
+
+    operands[3] = gen_reg_rtx (<MODE>mode);
+    HOST_WIDE_INT shift_amount =
+      INTVAL (unwrap_const_vec_duplicate (operands[2]));
+    int bitwidth = GET_MODE_UNIT_BITSIZE (<MODE>mode);
+    operands[4] = aarch64_simd_gen_const_vector_dup (<MODE>mode,
+						     bitwidth - shift_amount);
+  }
+)
+
+;; The RTL combiners are able to combine "ior (ashift, ashiftrt)" to a "bswap".
+;; Match that as well.
+(define_insn_and_split "*v_revvnx8hi"
+  [(parallel
+    [(set (match_operand:VNx8HI 0 "register_operand" "=w")
+	  (bswap:VNx8HI (match_operand 1 "register_operand" "w")))
+     (clobber (match_scratch:VNx8BI 2 "=Upl"))])]
+  "TARGET_SVE"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(unspec:VNx8HI
+	  [(match_dup 2)
+	   (unspec:VNx8HI
+	     [(match_dup 1)]
+	     UNSPEC_REVB)]
+	  UNSPEC_PRED_X))]
+  {
+    if (!can_create_pseudo_p ())
+      emit_move_insn (operands[2], CONSTM1_RTX (VNx8BImode));
+    else
+      operands[2] = aarch64_ptrue_reg (VNx8BImode);
+  }
+)
+
 ;; Predicated integer unary operations.
 (define_insn "@aarch64_pred_<optab><mode>"
   [(set (match_operand:SVE_FULL_I 0 "register_operand")
@@ -3754,6 +3950,7 @@
 ;; -------------------------------------------------------------------------
 ;; Includes:
 ;; - NOT
+;; - NOTS
 ;; -------------------------------------------------------------------------
 
 ;; Unpredicated predicate inverse.
@@ -3769,7 +3966,7 @@
 )
 
 ;; Predicated predicate inverse.
-(define_insn "*one_cmpl<mode>3"
+(define_insn "@aarch64_pred_one_cmpl<mode>_z"
   [(set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
 	(and:PRED_ALL
 	  (not:PRED_ALL (match_operand:PRED_ALL 2 "register_operand" "Upa"))
@@ -3778,6 +3975,42 @@
   "not\t%0.b, %1/z, %2.b"
 )
 
+;; Predicated predicate inverse in which the flags are set in the same
+;; way as a PTEST.
+(define_insn "*one_cmpl<mode>3_cc"
+  [(set (reg:CC_NZC CC_REGNUM)
+	(unspec:CC_NZC
+	  [(match_operand:VNx16BI 1 "register_operand" "Upa")
+	   (match_operand 3)
+	   (match_operand:SI 4 "aarch64_sve_ptrue_flag")
+	   (and:PRED_ALL
+	     (not:PRED_ALL
+	       (match_operand:PRED_ALL 2 "register_operand" "Upa"))
+	     (match_dup 3))]
+	  UNSPEC_PTEST))
+   (set (match_operand:PRED_ALL 0 "register_operand" "=Upa")
+	(and:PRED_ALL (not:PRED_ALL (match_dup 2)) (match_dup 3)))]
+  "TARGET_SVE"
+  "nots\t%0.b, %1/z, %2.b"
+)
+
+;; Same, where only the flags result is interesting.
+(define_insn "*one_cmpl<mode>3_ptest"
+  [(set (reg:CC_NZC CC_REGNUM)
+	(unspec:CC_NZC
+	  [(match_operand:VNx16BI 1 "register_operand" "Upa")
+	   (match_operand 3)
+	   (match_operand:SI 4 "aarch64_sve_ptrue_flag")
+	   (and:PRED_ALL
+	     (not:PRED_ALL
+	       (match_operand:PRED_ALL 2 "register_operand" "Upa"))
+	     (match_dup 3))]
+	  UNSPEC_PTEST))
+   (clobber (match_scratch:PRED_ALL 0 "=Upa"))]
+  "TARGET_SVE"
+  "nots\t%0.b, %1/z, %2.b"
+)
+
 ;; =========================================================================
 ;; == Binary arithmetic
 ;; =========================================================================
@@ -3862,8 +4095,8 @@
 	     (match_operand:SVE_I_SIMD_DI 3 "aarch64_sve_<sve_imm_con>_operand"))]
 	  UNSPEC_PRED_X))]
   "TARGET_SVE"
-  {@ [ cons: =0 , 1   , 2  , 3             ; attrs: movprfx ]
-     [ w        , Upl , %0 , <sve_imm_con> ; *              ] #
+  {@ [ cons: =0 , 1   , %2 , 3             ; attrs: movprfx ]
+     [ w        , Upl , 0  , <sve_imm_con> ; *              ] #
      [ w        , Upl , 0  , w             ; *              ] <sve_int_op>\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z3.<Vetype>
      [ ?&w      , Upl , w  , <sve_imm_con> ; yes            ] #
      [ ?&w      , Upl , w  , w             ; yes            ] movprfx\t%Z0, %Z2\;<sve_int_op>\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z3.<Vetype>
@@ -3992,8 +4225,8 @@
 	  (match_operand:SVE_I 1 "register_operand")
 	  (match_operand:SVE_I 2 "aarch64_sve_add_operand")))]
   "TARGET_SVE"
-  {@ [ cons: =0 , 1  , 2   ; attrs: movprfx ]
-     [ w        , %0 , vsa ; *              ] add\t%0.<Vetype>, %0.<Vetype>, #%D2
+  {@ [ cons: =0 , %1 , 2   ; attrs: movprfx ]
+     [ w        , 0  , vsa ; *              ] add\t%0.<Vetype>, %0.<Vetype>, #%D2
      [ w        , 0  , vsn ; *              ] sub\t%0.<Vetype>, %0.<Vetype>, #%N2
      [ w        , 0  , vsi ; *              ] << aarch64_output_sve_vector_inc_dec ("%0.<Vetype>", operands[2]);
      [ ?w       , w  , vsa ; yes            ] movprfx\t%0, %1\;add\t%0.<Vetype>, %0.<Vetype>, #%D2
@@ -4095,80 +4328,57 @@
 (define_expand "@aarch64_adr<mode>_shift"
   [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
 	(plus:SVE_FULL_SDI
-	  (unspec:SVE_FULL_SDI
-	    [(match_dup 4)
-	     (ashift:SVE_FULL_SDI
-	       (match_operand:SVE_FULL_SDI 2 "register_operand")
-	       (match_operand:SVE_FULL_SDI 3 "const_1_to_3_operand"))]
-	    UNSPEC_PRED_X)
+	  (ashift:SVE_FULL_SDI
+	    (match_operand:SVE_FULL_SDI 2 "register_operand")
+	    (match_operand:SVE_FULL_SDI 3 "const_1_to_3_operand"))
 	  (match_operand:SVE_FULL_SDI 1 "register_operand")))]
   "TARGET_SVE && TARGET_NON_STREAMING"
-  {
-    operands[4] = CONSTM1_RTX (<VPRED>mode);
-  }
 )
 
-(define_insn_and_rewrite "*aarch64_adr<mode>_shift"
+(define_insn "*aarch64_adr<mode>_shift"
   [(set (match_operand:SVE_24I 0 "register_operand" "=w")
 	(plus:SVE_24I
-	  (unspec:SVE_24I
-	    [(match_operand 4)
-	     (ashift:SVE_24I
-	       (match_operand:SVE_24I 2 "register_operand" "w")
-	       (match_operand:SVE_24I 3 "const_1_to_3_operand"))]
-	    UNSPEC_PRED_X)
+	  (ashift:SVE_24I
+	    (match_operand:SVE_24I 2 "register_operand" "w")
+	    (match_operand:SVE_24I 3 "const_1_to_3_operand"))
 	  (match_operand:SVE_24I 1 "register_operand" "w")))]
   "TARGET_SVE && TARGET_NON_STREAMING"
   "adr\t%0.<Vctype>, [%1.<Vctype>, %2.<Vctype>, lsl %3]"
-  "&& !CONSTANT_P (operands[4])"
-  {
-    operands[4] = CONSTM1_RTX (<VPRED>mode);
-  }
 )
 
 ;; Same, but with the index being sign-extended from the low 32 bits.
 (define_insn_and_rewrite "*aarch64_adr_shift_sxtw"
   [(set (match_operand:VNx2DI 0 "register_operand" "=w")
 	(plus:VNx2DI
-	  (unspec:VNx2DI
-	    [(match_operand 4)
-	     (ashift:VNx2DI
-	       (unspec:VNx2DI
-		 [(match_operand 5)
-		  (sign_extend:VNx2DI
-		    (truncate:VNx2SI
-		      (match_operand:VNx2DI 2 "register_operand" "w")))]
-		 UNSPEC_PRED_X)
-	       (match_operand:VNx2DI 3 "const_1_to_3_operand"))]
-	    UNSPEC_PRED_X)
+	  (ashift:VNx2DI
+	    (unspec:VNx2DI
+	      [(match_operand 4)
+	       (sign_extend:VNx2DI
+		 (truncate:VNx2SI
+		   (match_operand:VNx2DI 2 "register_operand" "w")))]
+	     UNSPEC_PRED_X)
+	    (match_operand:VNx2DI 3 "const_1_to_3_operand"))
 	  (match_operand:VNx2DI 1 "register_operand" "w")))]
   "TARGET_SVE && TARGET_NON_STREAMING"
   "adr\t%0.d, [%1.d, %2.d, sxtw %3]"
-  "&& (!CONSTANT_P (operands[4]) || !CONSTANT_P (operands[5]))"
+  "&& !CONSTANT_P (operands[4])"
   {
-    operands[5] = operands[4] = CONSTM1_RTX (VNx2BImode);
+    operands[4] = CONSTM1_RTX (VNx2BImode);
   }
 )
 
 ;; Same, but with the index being zero-extended from the low 32 bits.
-(define_insn_and_rewrite "*aarch64_adr_shift_uxtw"
+(define_insn "*aarch64_adr_shift_uxtw"
   [(set (match_operand:VNx2DI 0 "register_operand" "=w")
 	(plus:VNx2DI
-	  (unspec:VNx2DI
-	    [(match_operand 5)
-	     (ashift:VNx2DI
-	       (and:VNx2DI
-		 (match_operand:VNx2DI 2 "register_operand" "w")
-		 (match_operand:VNx2DI 4 "aarch64_sve_uxtw_immediate"))
-	       (match_operand:VNx2DI 3 "const_1_to_3_operand"))]
-	    UNSPEC_PRED_X)
+	  (ashift:VNx2DI
+	    (and:VNx2DI
+	      (match_operand:VNx2DI 2 "register_operand" "w")
+	      (match_operand:VNx2DI 4 "aarch64_sve_uxtw_immediate"))
+	    (match_operand:VNx2DI 3 "const_1_to_3_operand"))
 	  (match_operand:VNx2DI 1 "register_operand" "w")))]
   "TARGET_SVE && TARGET_NON_STREAMING"
   "adr\t%0.d, [%1.d, %2.d, uxtw %3]"
-  "&& !CONSTANT_P (operands[5])"
-  {
-    operands[5] = CONSTM1_RTX (VNx2BImode);
-  }
 )
 
 ;; -------------------------------------------------------------------------
@@ -4211,8 +4421,8 @@
 	       (match_dup 3))]
 	    UNSPEC_PRED_X)))]
   "TARGET_SVE"
-  {@ [ cons: =0 , 1   , 2  , 3 ; attrs: movprfx ]
-     [ w        , Upl , %0 , w ; *              ] <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+  {@ [ cons: =0 , 1   , %2 , 3 ; attrs: movprfx ]
+     [ w        , Upl , 0  , w ; *              ] <su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
      [ ?&w      , Upl , w  , w ; yes            ] movprfx\t%0, %2\;<su>abd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
   }
 )
@@ -4362,7 +4572,7 @@
 ;; -------------------------------------------------------------------------
 
 ;; Unpredicated saturating signed addition and subtraction.
-(define_insn "@aarch64_sve_<optab><mode>"
+(define_insn "<su_optab>s<addsub><mode>3"
   [(set (match_operand:SVE_FULL_I 0 "register_operand")
 	(SBINQOPS:SVE_FULL_I
 	  (match_operand:SVE_FULL_I 1 "register_operand")
@@ -4378,7 +4588,7 @@
 )
 
 ;; Unpredicated saturating unsigned addition and subtraction.
-(define_insn "@aarch64_sve_<optab><mode>"
+(define_insn "<su_optab>s<addsub><mode>3"
   [(set (match_operand:SVE_FULL_I 0 "register_operand")
 	(UBINQOPS:SVE_FULL_I
 	  (match_operand:SVE_FULL_I 1 "register_operand")
@@ -4426,8 +4636,8 @@
 	     MUL_HIGHPART)]
 	  UNSPEC_PRED_X))]
   "TARGET_SVE"
-  {@ [ cons: =0 , 1   , 2  , 3 ; attrs: movprfx ]
-     [ w        , Upl , %0 , w ; *              ] <su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+  {@ [ cons: =0 , 1   , %2 , 3 ; attrs: movprfx ]
+     [ w        , Upl , 0  , w ; *              ] <su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
      [ ?&w      , Upl , w  , w ; yes            ] movprfx\t%0, %2\;<su>mulh\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
   }
 )
@@ -4481,8 +4691,8 @@
 	   (match_operand:SVE_FULL_I 4 "aarch64_simd_imm_zero")]
 	  UNSPEC_SEL))]
   "TARGET_SVE"
-  {@ [ cons: =0 , 1   , 2  , 3  ]
-     [ &w       , Upl , %0 , w  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+  {@ [ cons: =0 , 1   , %2 , 3  ]
+     [ &w       , Upl , 0  , w  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
      [ &w       , Upl , w  , w  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_int_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
   }
   [(set_attr "movprfx" "yes")])
@@ -4626,8 +4836,8 @@
 	  (match_operand:SVE_I 1 "register_operand")
 	  (match_operand:SVE_I 2 "aarch64_sve_logical_operand")))]
   "TARGET_SVE"
-  {@ [ cons: =0 , 1  , 2   ; attrs: movprfx ]
-     [ w        , %0 , vsl ; *              ] <logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
+  {@ [ cons: =0 , %1 , 2   ; attrs: movprfx ]
+     [ w        , 0  , vsl ; *              ] <logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
      [ ?w       , w  , vsl ; yes            ] movprfx\t%0, %1\;<logical>\t%0.<Vetype>, %0.<Vetype>, #%C2
      [ w        , w  , w   ; *              ] <logical>\t%0.d, %1.d, %2.d
   }
@@ -4760,7 +4970,7 @@
     if (CONST_INT_P (operands[2]))
       {
 	amount = gen_const_vec_duplicate (<MODE>mode, operands[2]);
-	if (!aarch64_sve_<lr>shift_operand (operands[2], <MODE>mode))
+	if (!aarch64_sve_<lr>shift_operand (amount, <MODE>mode))
 	  amount = force_reg (<MODE>mode, amount);
       }
     else
@@ -4784,15 +4994,40 @@
 	  UNSPEC_PRED_X))]
   "TARGET_SVE"
   {
+    if (CONSTANT_P (operands[2]))
+      {
+	emit_insn (gen_aarch64_v<optab><mode>3_const (operands[0], operands[1],
+						      operands[2]));
+	DONE;
+      }
     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
   }
 )
 
-;; Shift by a vector, predicated with a PTRUE.  We don't actually need
-;; the predicate for the first alternative, but using Upa or X isn't
-;; likely to gain much and would make the instruction seem less uniform
-;; to the register allocator.
-(define_insn_and_split "@aarch64_pred_<optab><mode>"
+;; Shift by a vector, predicated with a PTRUE.
+(define_expand "@aarch64_pred_<optab><mode>"
+  [(set (match_operand:SVE_I 0 "register_operand")
+	(unspec:SVE_I
+	  [(match_operand:<VPRED> 1 "register_operand")
+	   (ASHIFT:SVE_I
+	     (match_operand:SVE_I 2 "register_operand")
+	     (match_operand:SVE_I 3 "aarch64_sve_<lr>shift_operand"))]
+	  UNSPEC_PRED_X))]
+  "TARGET_SVE"
+  {
+    if (CONSTANT_P (operands[3]))
+      {
+	emit_insn (gen_aarch64_v<optab><mode>3_const (operands[0], operands[2],
+						      operands[3]));
+	DONE;
+      }
+  }
+)
+
+;; We don't actually need the predicate for the first alternative, but
+;; using Upa or X isn't likely to gain much and would make the instruction
+;; seem less uniform to the register allocator.
+(define_insn_and_split "*aarch64_pred_<optab><mode>"
   [(set (match_operand:SVE_I 0 "register_operand")
 	(unspec:SVE_I
 	  [(match_operand:<VPRED> 1 "register_operand")
@@ -4807,21 +5042,32 @@
      [ w        , Upl , w , 0     ; *              ] <shift>r\t%0.<Vetype>, %1/m, %3.<Vetype>, %2.<Vetype>
      [ ?&w      , Upl , w , w     ; yes            ] movprfx\t%0, %2\;<shift>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
   }
-  "&& reload_completed
-   && !register_operand (operands[3], <MODE>mode)"
+  "&& !register_operand (operands[3], <MODE>mode)"
   [(set (match_dup 0) (ASHIFT:SVE_I (match_dup 2) (match_dup 3)))]
   ""
 )
 
-;; Unpredicated shift operations by a constant (post-RA only).
+;; Unpredicated shift operations by a constant.
 ;; These are generated by splitting a predicated instruction whose
 ;; predicate is unused.
-(define_insn "*post_ra_v<optab><mode>3"
+(define_insn "aarch64_vashl<mode>3_const"
+  [(set (match_operand:SVE_I 0 "register_operand")
+	(ashift:SVE_I
+	  (match_operand:SVE_I 1 "register_operand")
+	  (match_operand:SVE_I 2 "aarch64_simd_lshift_imm")))]
+  "TARGET_SVE"
+  {@ [ cons: =0 , 1 , 2   ]
+     [ w	, w , vs1 ] add\t%0.<Vetype>, %1.<Vetype>, %1.<Vetype>
+     [ w	, w , Dl  ] lsl\t%0.<Vetype>, %1.<Vetype>, #%2
+  }
+)
+
+(define_insn "aarch64_v<optab><mode>3_const"
   [(set (match_operand:SVE_I 0 "register_operand" "=w")
-	(ASHIFT:SVE_I
+	(SHIFTRT:SVE_I
 	  (match_operand:SVE_I 1 "register_operand" "w")
-	  (match_operand:SVE_I 2 "aarch64_simd_<lr>shift_imm")))]
-  "TARGET_SVE && reload_completed"
+	  (match_operand:SVE_I 2 "aarch64_simd_rshift_imm")))]
+  "TARGET_SVE"
   "<shift>\t%0.<Vetype>, %1.<Vetype>, #%2"
 )
 
@@ -4943,34 +5189,34 @@
 
 ;; Unpredicated ASRD.
 (define_expand "sdiv_pow2<mode>3"
-  [(set (match_operand:SVE_I 0 "register_operand")
-	(unspec:SVE_I
+  [(set (match_operand:SVE_VDQ_I 0 "register_operand")
+	(unspec:SVE_VDQ_I
 	  [(match_dup 3)
-	   (unspec:SVE_I
-	     [(match_operand:SVE_I 1 "register_operand")
+	   (unspec:SVE_VDQ_I
+	     [(match_operand:SVE_VDQ_I 1 "register_operand")
 	      (match_operand 2 "aarch64_simd_rshift_imm")]
 	     UNSPEC_ASRD)]
 	 UNSPEC_PRED_X))]
   "TARGET_SVE"
   {
-    operands[3] = aarch64_ptrue_reg (<VPRED>mode);
+    operands[3] = aarch64_ptrue_reg (<VPRED>mode, <MODE>mode);
   }
 )
 
 ;; Predicated ASRD.
 (define_insn "*sdiv_pow2<mode>3"
-  [(set (match_operand:SVE_I 0 "register_operand")
-	(unspec:SVE_I
+  [(set (match_operand:SVE_VDQ_I 0 "register_operand")
+	(unspec:SVE_VDQ_I
 	  [(match_operand:<VPRED> 1 "register_operand")
-	   (unspec:SVE_I
-	     [(match_operand:SVE_I 2 "register_operand")
-	      (match_operand:SVE_I 3 "aarch64_simd_rshift_imm")]
+	   (unspec:SVE_VDQ_I
+	     [(match_operand:SVE_VDQ_I 2 "register_operand")
+	      (match_operand:SVE_VDQ_I 3 "aarch64_simd_rshift_imm")]
 	     UNSPEC_ASRD)]
 	  UNSPEC_PRED_X))]
   "TARGET_SVE"
   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
-     [ w        , Upl , 0 ; *              ] asrd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
-     [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;asrd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+     [ w        , Upl , 0 ; *              ] asrd\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, #%3
+     [ ?&w      , Upl , w ; yes            ] movprfx\t%Z0, %Z2\;asrd\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, #%3
   }
 )
 
@@ -5059,6 +5305,21 @@
 ;; - FTSSEL
 ;; -------------------------------------------------------------------------
 
+(define_expand "ldexp<mode>3"
+ [(set (match_operand:GPF_HF 0 "register_operand")
+       (unspec:GPF_HF
+	 [(match_dup 3)
+	  (const_int SVE_STRICT_GP)
+	  (match_operand:GPF_HF 1 "register_operand")
+	  (match_operand:<V_INT_EQUIV> 2 "register_operand")]
+	 UNSPEC_COND_FSCALE))]
+ "TARGET_SVE"
+ {
+   operands[3] = aarch64_ptrue_reg (<VPRED>mode,
+				    GET_MODE_UNIT_SIZE (<MODE>mode));
+ }
+)
+
 ;; Unpredicated floating-point binary operations that take an integer as
 ;; their second operand.
 (define_insn "@aarch64_sve_<optab><mode>"
@@ -5074,17 +5335,17 @@
 ;; Predicated floating-point binary operations that take an integer
 ;; as their second operand.
 (define_insn "@aarch64_pred_<optab><mode>"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-	(unspec:SVE_FULL_F
+  [(set (match_operand:SVE_FULL_F_SCALAR 0 "register_operand")
+	(unspec:SVE_FULL_F_SCALAR
 	  [(match_operand:<VPRED> 1 "register_operand")
 	   (match_operand:SI 4 "aarch64_sve_gp_strictness")
-	   (match_operand:SVE_FULL_F 2 "register_operand")
+	   (match_operand:SVE_FULL_F_SCALAR 2 "register_operand")
 	   (match_operand:<V_INT_EQUIV> 3 "register_operand")]
 	  SVE_COND_FP_BINARY_INT))]
   "TARGET_SVE"
   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
-     [ w        , Upl , 0 , w ; *              ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
-     [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+     [ w        , Upl , 0 , w ; *              ] <sve_fp_op>\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z3.<Vetype>
+     [ ?&w      , Upl , w , w ; yes            ] movprfx\t%Z0, %Z2\;<sve_fp_op>\t%Z0.<Vetype>, %1/m, %Z0.<Vetype>, %Z3.<Vetype>
   }
 )
 
@@ -5223,26 +5484,52 @@
 ;; ---- [FP] General binary arithmetic corresponding to rtx codes
 ;; -------------------------------------------------------------------------
 ;; Includes post-RA forms of:
+;; - BFADD (SVE_B16B16)
+;; - BFMUL (SVE_B16B16)
+;; - BFSUB (SVE_B16B16)
 ;; - FADD
 ;; - FMUL
 ;; - FSUB
 ;; -------------------------------------------------------------------------
 
+;; Split a predicated instruction whose predicate is unused into an
+;; unpredicated instruction.
+(define_split
+  [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
+	(unspec:SVE_FULL_F_B16B16
+	  [(match_operand:<VPRED> 1 "register_operand")
+	   (match_operand:SI 4 "aarch64_sve_gp_strictness")
+	   (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
+	   (match_operand:SVE_FULL_F_B16B16 3 "register_operand")]
+	  <SVE_COND_FP>))]
+  "TARGET_SVE
+   && reload_completed
+   && INTVAL (operands[4]) == SVE_RELAXED_GP"
+  [(set (match_dup 0)
+	(SVE_UNPRED_FP_BINARY:SVE_FULL_F_B16B16 (match_dup 2) (match_dup 3)))]
+)
+
 ;; Unpredicated floating-point binary operations (post-RA only).
-;; These are generated by splitting a predicated instruction whose
-;; predicate is unused.
+;; These are generated by the split above.
 (define_insn "*post_ra_<sve_fp_op><mode>3"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
-	(SVE_UNPRED_FP_BINARY:SVE_FULL_F
-	  (match_operand:SVE_FULL_F 1 "register_operand" "w")
-	  (match_operand:SVE_FULL_F 2 "register_operand" "w")))]
+  [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand" "=w")
+	(SVE_UNPRED_FP_BINARY:SVE_FULL_F_B16B16
+	  (match_operand:SVE_FULL_F_B16B16 1 "register_operand" "w")
+	  (match_operand:SVE_FULL_F_B16B16 2 "register_operand" "w")))]
   "TARGET_SVE && reload_completed"
-  "<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>")
+  "<b><sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>")
 
 ;; -------------------------------------------------------------------------
 ;; ---- [FP] General binary arithmetic corresponding to unspecs
 ;; -------------------------------------------------------------------------
 ;; Includes merging forms of:
+;; - BFADD   (SVE_B16B16)
+;; - BFMAX   (SVE_B16B16)
+;; - BFMAXNM (SVE_B16B16)
+;; - BFMIN   (SVE_B16B16)
+;; - BFMINNM (SVE_B16B16)
+;; - BFMUL   (SVE_B16B16)
+;; - BFSUB   (SVE_B16B16)
 ;; - FADD    (constant forms handled in the "Addition" section)
 ;; - FDIV
 ;; - FDIVR
@@ -5272,14 +5559,14 @@
 ;; Unpredicated floating-point binary operations that need to be predicated
 ;; for SVE.
 (define_expand "<optab><mode>3"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-	(unspec:SVE_FULL_F
+  [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
+	(unspec:SVE_FULL_F_B16B16
 	  [(match_dup 3)
 	   (const_int SVE_RELAXED_GP)
-	   (match_operand:SVE_FULL_F 1 "<sve_pred_fp_rhs1_operand>")
-	   (match_operand:SVE_FULL_F 2 "<sve_pred_fp_rhs2_operand>")]
+	   (match_operand:SVE_FULL_F_B16B16 1 "<sve_pred_fp_rhs1_operand>")
+	   (match_operand:SVE_FULL_F_B16B16 2 "<sve_pred_fp_rhs2_operand>")]
 	  SVE_COND_FP_BINARY_OPTAB))]
-  "TARGET_SVE"
+  "TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
   {
     operands[3] = aarch64_ptrue_reg (<VPRED>mode);
   }
@@ -5304,37 +5591,37 @@
 
 ;; Predicated floating-point operations with merging.
 (define_expand "@cond_<optab><mode>"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-	(unspec:SVE_FULL_F
+  [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
+	(unspec:SVE_FULL_F_B16B16
 	  [(match_operand:<VPRED> 1 "register_operand")
-	   (unspec:SVE_FULL_F
+	   (unspec:SVE_FULL_F_B16B16
 	     [(match_dup 1)
 	      (const_int SVE_STRICT_GP)
-	      (match_operand:SVE_FULL_F 2 "<sve_pred_fp_rhs1_operand>")
-	      (match_operand:SVE_FULL_F 3 "<sve_pred_fp_rhs2_operand>")]
+	      (match_operand:SVE_FULL_F_B16B16 2 "<sve_pred_fp_rhs1_operand>")
+	      (match_operand:SVE_FULL_F_B16B16 3 "<sve_pred_fp_rhs2_operand>")]
 	     SVE_COND_FP_BINARY)
-	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
+	   (match_operand:SVE_FULL_F_B16B16 4 "aarch64_simd_reg_or_zero")]
 	  UNSPEC_SEL))]
-  "TARGET_SVE"
+  "TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
 )
 
 ;; Predicated floating-point operations, merging with the first input.
 (define_insn_and_rewrite "*cond_<optab><mode>_2_relaxed"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-	(unspec:SVE_FULL_F
+  [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
+	(unspec:SVE_FULL_F_B16B16
 	  [(match_operand:<VPRED> 1 "register_operand")
-	   (unspec:SVE_FULL_F
+	   (unspec:SVE_FULL_F_B16B16
 	     [(match_operand 4)
 	      (const_int SVE_RELAXED_GP)
-	      (match_operand:SVE_FULL_F 2 "register_operand")
-	      (match_operand:SVE_FULL_F 3 "register_operand")]
+	      (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
+	      (match_operand:SVE_FULL_F_B16B16 3 "register_operand")]
 	     SVE_COND_FP_BINARY)
 	   (match_dup 2)]
 	  UNSPEC_SEL))]
-  "TARGET_SVE"
+  "TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
-     [ w        , Upl , 0 , w ; *              ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
-     [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+     [ w        , Upl , 0 , w ; *              ] <b><sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+     [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %2\;<b><sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
   }
   "&& !rtx_equal_p (operands[1], operands[4])"
   {
@@ -5343,21 +5630,21 @@
 )
 
 (define_insn "*cond_<optab><mode>_2_strict"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-	(unspec:SVE_FULL_F
+  [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
+	(unspec:SVE_FULL_F_B16B16
 	  [(match_operand:<VPRED> 1 "register_operand")
-	   (unspec:SVE_FULL_F
+	   (unspec:SVE_FULL_F_B16B16
 	     [(match_dup 1)
 	      (const_int SVE_STRICT_GP)
-	      (match_operand:SVE_FULL_F 2 "register_operand")
-	      (match_operand:SVE_FULL_F 3 "register_operand")]
+	      (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
+	      (match_operand:SVE_FULL_F_B16B16 3 "register_operand")]
 	     SVE_COND_FP_BINARY)
 	   (match_dup 2)]
 	  UNSPEC_SEL))]
-  "TARGET_SVE"
+  "TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
-     [ w        , Upl , 0 , w ; *              ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
-     [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+     [ w        , Upl , 0 , w ; *              ] <b><sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+     [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %2\;<b><sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
   }
 )
 
@@ -5406,21 +5693,21 @@
 
 ;; Predicated floating-point operations, merging with the second input.
 (define_insn_and_rewrite "*cond_<optab><mode>_3_relaxed"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-	(unspec:SVE_FULL_F
+  [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
+	(unspec:SVE_FULL_F_B16B16
 	  [(match_operand:<VPRED> 1 "register_operand")
-	   (unspec:SVE_FULL_F
+	   (unspec:SVE_FULL_F_B16B16
 	     [(match_operand 4)
 	      (const_int SVE_RELAXED_GP)
-	      (match_operand:SVE_FULL_F 2 "register_operand")
-	      (match_operand:SVE_FULL_F 3 "register_operand")]
+	      (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
+	      (match_operand:SVE_FULL_F_B16B16 3 "register_operand")]
 	     SVE_COND_FP_BINARY)
 	   (match_dup 3)]
 	  UNSPEC_SEL))]
-  "TARGET_SVE"
+  "TARGET_SVE && (<supports_bf16_rev> || !<is_bf16>)"
   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
-     [ w        , Upl , w , 0 ; *              ] <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
-     [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+     [ w        , Upl , w , 0 ; *              ] <b><sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+     [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %3\;<b><sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
   }
   "&& !rtx_equal_p (operands[1], operands[4])"
   {
@@ -5429,46 +5716,48 @@
 )
 
 (define_insn "*cond_<optab><mode>_3_strict"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-	(unspec:SVE_FULL_F
+  [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
+	(unspec:SVE_FULL_F_B16B16
 	  [(match_operand:<VPRED> 1 "register_operand")
-	   (unspec:SVE_FULL_F
+	   (unspec:SVE_FULL_F_B16B16
 	     [(match_dup 1)
 	      (const_int SVE_STRICT_GP)
-	      (match_operand:SVE_FULL_F 2 "register_operand")
-	      (match_operand:SVE_FULL_F 3 "register_operand")]
+	      (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
+	      (match_operand:SVE_FULL_F_B16B16 3 "register_operand")]
 	     SVE_COND_FP_BINARY)
 	   (match_dup 3)]
 	  UNSPEC_SEL))]
-  "TARGET_SVE"
+  "TARGET_SVE && (<supports_bf16_rev> || !<is_bf16>)"
   {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx ]
-     [ w        , Upl , w , 0 ; *              ] <sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
-     [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %3\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+     [ w        , Upl , w , 0 ; *              ] <b><sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+     [ ?&w      , Upl , w , w ; yes            ] movprfx\t%0, %3\;<b><sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
   }
 )
 
 ;; Predicated floating-point operations, merging with an independent value.
 (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-	(unspec:SVE_FULL_F
+  [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
+	(unspec:SVE_FULL_F_B16B16
 	  [(match_operand:<VPRED> 1 "register_operand")
-	   (unspec:SVE_FULL_F
+	   (unspec:SVE_FULL_F_B16B16
 	     [(match_operand 5)
 	      (const_int SVE_RELAXED_GP)
-	      (match_operand:SVE_FULL_F 2 "register_operand")
-	      (match_operand:SVE_FULL_F 3 "register_operand")]
+	      (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
+	      (match_operand:SVE_FULL_F_B16B16 3 "register_operand")]
 	     SVE_COND_FP_BINARY)
-	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
+	   (match_operand:SVE_FULL_F_B16B16 4 "aarch64_simd_reg_or_zero")]
 	  UNSPEC_SEL))]
   "TARGET_SVE
+   && (<supports_bf16> || !<is_bf16>)
    && !rtx_equal_p (operands[2], operands[4])
-   && !rtx_equal_p (operands[3], operands[4])"
-  {@ [ cons: =0 , 1   , 2 , 3 , 4   ]
-     [ &w       , Upl , 0 , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
-     [ &w       , Upl , w , 0 , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
-     [ &w       , Upl , w , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
-     [ &w       , Upl , w , w , 0   ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
-     [ ?&w      , Upl , w , w , w   ] #
+   && !((<supports_bf16_rev> || !<is_bf16>)
+	&& rtx_equal_p (operands[3], operands[4]))"
+  {@ [ cons: =0 , 1   , 2 , 3 , 4  ; attrs: is_rev ]
+     [ &w       , Upl , 0 , w , Dz ; *    ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<b><sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+     [ &w       , Upl , w , 0 , Dz ; true ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<b><sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+     [ &w       , Upl , w , w , Dz ; *    ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<b><sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+     [ &w       , Upl , w , w , 0  ; *    ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<b><sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+     [ ?&w      , Upl , w , w , w  ; *    ] #
   }
   "&& 1"
   {
@@ -5485,30 +5774,34 @@
     else
       FAIL;
   }
-  [(set_attr "movprfx" "yes")]
+  [(set_attr "movprfx" "yes")
+   (set_attr "is_bf16" "<is_bf16>")
+   (set_attr "supports_bf16_rev" "<supports_bf16_rev>")]
 )
 
 (define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-	(unspec:SVE_FULL_F
+  [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
+	(unspec:SVE_FULL_F_B16B16
 	  [(match_operand:<VPRED> 1 "register_operand")
-	   (unspec:SVE_FULL_F
+	   (unspec:SVE_FULL_F_B16B16
 	     [(match_dup 1)
 	      (const_int SVE_STRICT_GP)
-	      (match_operand:SVE_FULL_F 2 "register_operand")
-	      (match_operand:SVE_FULL_F 3 "register_operand")]
+	      (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
+	      (match_operand:SVE_FULL_F_B16B16 3 "register_operand")]
 	     SVE_COND_FP_BINARY)
-	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
+	   (match_operand:SVE_FULL_F_B16B16 4 "aarch64_simd_reg_or_zero")]
 	  UNSPEC_SEL))]
   "TARGET_SVE
+   && (<supports_bf16> || !<is_bf16>)
    && !rtx_equal_p (operands[2], operands[4])
-   && !rtx_equal_p (operands[3], operands[4])"
-  {@ [ cons: =0 , 1   , 2 , 3 , 4   ]
-     [ &w       , Upl , 0 , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
-     [ &w       , Upl , w , 0 , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
-     [ &w       , Upl , w , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
-     [ &w       , Upl , w , w , 0   ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
-     [ ?&w      , Upl , w , w , w   ] #
+   && !((<supports_bf16_rev> || !<is_bf16>)
+	&& rtx_equal_p (operands[3], operands[4]))"
+  {@ [ cons: =0 , 1   , 2 , 3 , 4  ; attrs: is_rev ]
+     [ &w       , Upl , 0 , w , Dz ; *    ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<b><sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+     [ &w       , Upl , w , 0 , Dz ; true ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<b><sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+     [ &w       , Upl , w , w , Dz ; *    ] movprfx\t%0.<Vetype>, %1/z, %2.<Vetype>\;<b><sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+     [ &w       , Upl , w , w , 0  ; *    ] movprfx\t%0.<Vetype>, %1/m, %2.<Vetype>\;<b><sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+     [ ?&w      , Upl , w , w , w  ; *    ] #
   }
   "&& reload_completed
    && register_operand (operands[4], <MODE>mode)
@@ -5518,7 +5811,9 @@
 					     operands[4], operands[1]));
     operands[4] = operands[2] = operands[0];
   }
-  [(set_attr "movprfx" "yes")]
+  [(set_attr "movprfx" "yes")
+   (set_attr "is_bf16" "<is_bf16>")
+   (set_attr "supports_bf16_rev" "<supports_bf16_rev>")]
 )
 
 ;; Same for operations that take a 1-bit constant.
@@ -5596,7 +5891,7 @@
 ;; -------------------------------------------------------------------------
 
 ;; Predicated floating-point addition.
-(define_insn_and_split "@aarch64_pred_<optab><mode>"
+(define_insn "@aarch64_pred_<optab><mode>"
   [(set (match_operand:SVE_FULL_F 0 "register_operand")
 	(unspec:SVE_FULL_F
 	  [(match_operand:<VPRED> 1 "register_operand")
@@ -5605,8 +5900,8 @@
 	   (match_operand:SVE_FULL_F 3 "aarch64_sve_float_arith_with_sub_operand")]
 	  SVE_COND_FP_ADD))]
   "TARGET_SVE"
-  {@ [ cons: =0 , 1   , 2  , 3   , 4   ; attrs: movprfx ]
-     [ w        , Upl , %0 , vsA , i   ; *              ] fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+  {@ [ cons: =0 , 1   , %2 , 3   , 4   ; attrs: movprfx ]
+     [ w        , Upl , 0  , vsA , i   ; *              ] fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
      [ w        , Upl , 0  , vsN , i   ; *              ] fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
      [ w        , Upl , w  , w   , Z   ; *              ] #
      [ w        , Upl , 0  , w   , Ui1 ; *              ] fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
@@ -5614,13 +5909,6 @@
      [ ?&w      , Upl , w  , vsN , i   ; yes            ] movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, #%N3
      [ ?&w      , Upl , w  , w   , Ui1 ; yes            ] movprfx\t%0, %2\;fadd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
   }
-  ; Split the unpredicated form after reload, so that we don't have
-  ; the unnecessary PTRUE.
-  "&& reload_completed
-   && register_operand (operands[3], <MODE>mode)
-   && INTVAL (operands[4]) == SVE_RELAXED_GP"
-  [(set (match_dup 0) (plus:SVE_FULL_F (match_dup 2) (match_dup 3)))]
-  ""
 )
 
 ;; Predicated floating-point addition of a constant, merging with the
@@ -5919,7 +6207,7 @@
 ;; -------------------------------------------------------------------------
 
 ;; Predicated floating-point subtraction.
-(define_insn_and_split "@aarch64_pred_<optab><mode>"
+(define_insn "@aarch64_pred_<optab><mode>"
   [(set (match_operand:SVE_FULL_F 0 "register_operand")
 	(unspec:SVE_FULL_F
 	  [(match_operand:<VPRED> 1 "register_operand")
@@ -5936,13 +6224,6 @@
      [ ?&w      , Upl , vsA , w , i   ; yes            ] movprfx\t%0, %3\;fsubr\t%0.<Vetype>, %1/m, %0.<Vetype>, #%2
      [ ?&w      , Upl , w   , w , Ui1 ; yes            ] movprfx\t%0, %2\;fsub\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
   }
-  ; Split the unpredicated form after reload, so that we don't have
-  ; the unnecessary PTRUE.
-  "&& reload_completed
-   && register_operand (operands[2], <MODE>mode)
-   && INTVAL (operands[4]) == SVE_RELAXED_GP"
-  [(set (match_dup 0) (minus:SVE_FULL_F (match_dup 2) (match_dup 3)))]
-  ""
 )
 
 ;; Predicated floating-point subtraction from a constant, merging with the
@@ -6094,8 +6375,8 @@
 	     UNSPEC_COND_FSUB)]
 	  UNSPEC_COND_FABS))]
   "TARGET_SVE"
-  {@ [ cons: =0 , 1   , 2  , 3 ; attrs: movprfx ]
-     [ w        , Upl , %0 , w ; *              ] fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+  {@ [ cons: =0 , 1   , %2 , 3 ; attrs: movprfx ]
+     [ w        , Upl , 0  , w ; *              ] fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
      [ ?&w      , Upl , w  , w ; yes            ] movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
   }
   "&& !rtx_equal_p (operands[1], operands[5])"
@@ -6117,8 +6398,8 @@
 	     UNSPEC_COND_FSUB)]
 	  UNSPEC_COND_FABS))]
   "TARGET_SVE"
-  {@ [ cons: =0 , 1   , 2  , 3 ; attrs: movprfx ]
-     [ w        , Upl , %0 , w ; *              ] fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+  {@ [ cons: =0 , 1   , %2 , 3 ; attrs: movprfx ]
+     [ w        , Upl , 0  , w ; *              ] fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
      [ ?&w      , Upl , w  , w ; yes            ] movprfx\t%0, %2\;fabd\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
   }
 )
@@ -6344,11 +6625,12 @@
 ;; ---- [FP] Multiplication
 ;; -------------------------------------------------------------------------
 ;; Includes:
+;; - BFMUL (SVE_B16B16)
 ;; - FMUL
 ;; -------------------------------------------------------------------------
 
 ;; Predicated floating-point multiplication.
-(define_insn_and_split "@aarch64_pred_<optab><mode>"
+(define_insn "@aarch64_pred_<optab><mode>"
   [(set (match_operand:SVE_FULL_F 0 "register_operand")
 	(unspec:SVE_FULL_F
 	  [(match_operand:<VPRED> 1 "register_operand")
@@ -6357,20 +6639,13 @@
 	   (match_operand:SVE_FULL_F 3 "aarch64_sve_float_mul_operand")]
 	  SVE_COND_FP_MUL))]
   "TARGET_SVE"
-  {@ [ cons: =0 , 1   , 2  , 3   , 4   ; attrs: movprfx ]
-     [ w        , Upl , %0 , vsM , i   ; *              ] fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+  {@ [ cons: =0 , 1   , %2 , 3   , 4   ; attrs: movprfx ]
+     [ w        , Upl , 0  , vsM , i   ; *              ] fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
      [ w        , Upl , w  , w   , Z   ; *              ] #
      [ w        , Upl , 0  , w   , Ui1 ; *              ] fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
      [ ?&w      , Upl , w  , vsM , i   ; yes            ] movprfx\t%0, %2\;fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
      [ ?&w      , Upl , w  , w   , Ui1 ; yes            ] movprfx\t%0, %2\;fmul\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
   }
-  ; Split the unpredicated form after reload, so that we don't have
-  ; the unnecessary PTRUE.
-  "&& reload_completed
-   && register_operand (operands[3], <MODE>mode)
-   && INTVAL (operands[4]) == SVE_RELAXED_GP"
-  [(set (match_dup 0) (mult:SVE_FULL_F (match_dup 2) (match_dup 3)))]
-  ""
 )
 
 ;; Merging forms are handled through SVE_COND_FP_BINARY and
@@ -6378,15 +6653,15 @@
 
 ;; Unpredicated multiplication by selected lanes.
 (define_insn "@aarch64_mul_lane_<mode>"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
-	(mult:SVE_FULL_F
-	  (unspec:SVE_FULL_F
-	    [(match_operand:SVE_FULL_F 2 "register_operand" "<sve_lane_con>")
+  [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand" "=w")
+	(mult:SVE_FULL_F_B16B16
+	  (unspec:SVE_FULL_F_B16B16
+	    [(match_operand:SVE_FULL_F_B16B16 2 "register_operand" "<sve_lane_con>")
 	     (match_operand:SI 3 "const_int_operand")]
 	    UNSPEC_SVE_LANE_SELECT)
-	  (match_operand:SVE_FULL_F 1 "register_operand" "w")))]
+	  (match_operand:SVE_FULL_F_B16B16 1 "register_operand" "w")))]
   "TARGET_SVE"
-  "fmul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]"
+  "<b>fmul\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]"
 )
 
 ;; -------------------------------------------------------------------------
@@ -6443,10 +6718,10 @@
 ;; by providing this, but we need to use UNSPECs since rtx logical ops
 ;; aren't defined for floating-point modes.
 (define_insn "*<optab><mode>3"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand" "=w")
-	(unspec:SVE_FULL_F
-	  [(match_operand:SVE_FULL_F 1 "register_operand" "w")
-	   (match_operand:SVE_FULL_F 2 "register_operand" "w")]
+  [(set (match_operand:SVE_F_BF 0 "register_operand" "=w")
+	(unspec:SVE_F_BF
+	  [(match_operand:SVE_F_BF 1 "register_operand" "w")
+	   (match_operand:SVE_F_BF 2 "register_operand" "w")]
 	  LOGICALF))]
   "TARGET_SVE"
   "<logicalf_op>\t%0.d, %1.d, %2.d"
@@ -6588,39 +6863,6 @@
 ;; - FMINNM
 ;; -------------------------------------------------------------------------
 
-;; Unpredicated fmax/fmin (the libm functions).  The optabs for the
-;; smax/smin rtx codes are handled in the generic section above.
-(define_expand "<fmaxmin><mode>3"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-	(unspec:SVE_FULL_F
-	  [(match_dup 3)
-	   (const_int SVE_RELAXED_GP)
-	   (match_operand:SVE_FULL_F 1 "register_operand")
-	   (match_operand:SVE_FULL_F 2 "aarch64_sve_float_maxmin_operand")]
-	  SVE_COND_FP_MAXMIN_PUBLIC))]
-  "TARGET_SVE"
-  {
-    operands[3] = aarch64_ptrue_reg (<VPRED>mode);
-  }
-)
-
-;; Predicated fmax/fmin (the libm functions).  The optabs for the
-;; smax/smin rtx codes are handled in the generic section above.
-(define_expand "cond_<fmaxmin><mode>"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-	(unspec:SVE_FULL_F
-	  [(match_operand:<VPRED> 1 "register_operand")
-	   (unspec:SVE_FULL_F
-	     [(match_dup 1)
-	      (const_int SVE_RELAXED_GP)
-	      (match_operand:SVE_FULL_F 2 "register_operand")
-	      (match_operand:SVE_FULL_F 3 "aarch64_sve_float_maxmin_operand")]
-	     SVE_COND_FP_MAXMIN_PUBLIC)
-	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
-	  UNSPEC_SEL))]
-  "TARGET_SVE"
-)
-
 ;; Predicated floating-point maximum/minimum.
 (define_insn "@aarch64_pred_<optab><mode>"
   [(set (match_operand:SVE_FULL_F 0 "register_operand")
@@ -6631,8 +6873,8 @@
 	   (match_operand:SVE_FULL_F 3 "aarch64_sve_float_maxmin_operand")]
 	  SVE_COND_FP_MAXMIN))]
   "TARGET_SVE"
-  {@ [ cons: =0 , 1   , 2  , 3   ; attrs: movprfx ]
-     [ w        , Upl , %0 , vsB ; *              ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
+  {@ [ cons: =0 , 1   , %2 , 3   ; attrs: movprfx ]
+     [ w        , Upl , 0  , vsB ; *              ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
      [ w        , Upl , 0  , w   ; *              ] <sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
      [ ?&w      , Upl , w  , vsB ; yes            ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, #%3
      [ ?&w      , Upl , w  , w   ; yes            ] movprfx\t%0, %2\;<sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
@@ -6940,8 +7182,8 @@
 	    UNSPEC_PRED_X)
 	  (match_operand:SVE_I 4 "register_operand")))]
   "TARGET_SVE"
-  {@ [ cons: =0 , 1   , 2  , 3 , 4 ; attrs: movprfx ]
-     [ w        , Upl , %0 , w , w ; *              ] mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
+  {@ [ cons: =0 , 1   , %2 , 3 , 4 ; attrs: movprfx ]
+     [ w        , Upl , 0  , w , w ; *              ] mad\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
      [ w        , Upl , w  , w , 0 ; *              ] mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
      [ ?&w      , Upl , w  , w , w ; yes            ] movprfx\t%0, %4\;mla\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
   }
@@ -7082,8 +7324,8 @@
 	       (match_operand:SVE_I 3 "register_operand"))]
 	    UNSPEC_PRED_X)))]
   "TARGET_SVE"
-  {@ [ cons: =0 , 1   , 2  , 3 , 4 ; attrs: movprfx ]
-     [ w        , Upl , %0 , w , w ; *              ] msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
+  {@ [ cons: =0 , 1   , %2 , 3 , 4 ; attrs: movprfx ]
+     [ w        , Upl , 0  , w , w ; *              ] msb\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
      [ w        , Upl , w  , w , 0 ; *              ] mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
      [ ?&w      , Upl , w  , w , w ; yes            ] movprfx\t%0, %4\;mls\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
   }
@@ -7197,7 +7439,7 @@
 ;; -------------------------------------------------------------------------
 
 ;; Four-element integer dot-product with accumulation.
-(define_insn "<sur>dot_prod<vsi2qi>"
+(define_insn "<sur>dot_prod<mode><vsi2qi>"
   [(set (match_operand:SVE_FULL_SDI 0 "register_operand")
 	(plus:SVE_FULL_SDI
 	  (unspec:SVE_FULL_SDI
@@ -7226,7 +7468,7 @@
 	  (match_operand:SVE_FULL_SDI 4 "register_operand")))]
   "TARGET_SVE
    && (<SVE_FULL_SDI:elem_bits> == <SVE_FULL_BHI:elem_bits> * 4
-       || (TARGET_STREAMING_SME2
+       || (TARGET_SVE2p1_OR_SME2
 	   && <SVE_FULL_SDI:elem_bits> == 32
 	   && <SVE_FULL_BHI:elem_bits> == 16))"
   {@ [ cons: =0 , 1 , 2                           , 4 ; attrs: movprfx ]
@@ -7235,7 +7477,7 @@
   }
 )
 
-(define_insn "@<sur>dot_prod<vsi2qi>"
+(define_insn "@<sur>dot_prod<mode><vsi2qi>"
   [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
         (plus:VNx4SI_ONLY
 	  (unspec:VNx4SI_ONLY
@@ -7293,7 +7535,8 @@
     rtx ones = force_reg (<VSI2QI>mode, CONST1_RTX (<VSI2QI>mode));
     rtx diff = gen_reg_rtx (<VSI2QI>mode);
     emit_insn (gen_<su>abd<vsi2qi>3 (diff, operands[1], operands[2]));
-    emit_insn (gen_udot_prod<vsi2qi> (operands[0], diff, ones, operands[3]));
+    emit_insn (gen_udot_prod<mode><vsi2qi> (operands[0], diff, ones,
+					    operands[3]));
     DONE;
   }
 )
@@ -7338,15 +7581,15 @@
 
 ;; Unpredicated floating-point ternary operations.
 (define_expand "<optab><mode>4"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-	(unspec:SVE_FULL_F
+  [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
+	(unspec:SVE_FULL_F_B16B16
 	  [(match_dup 4)
 	   (const_int SVE_RELAXED_GP)
-	   (match_operand:SVE_FULL_F 1 "register_operand")
-	   (match_operand:SVE_FULL_F 2 "register_operand")
-	   (match_operand:SVE_FULL_F 3 "register_operand")]
+	   (match_operand:SVE_FULL_F_B16B16 1 "register_operand")
+	   (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
+	   (match_operand:SVE_FULL_F_B16B16 3 "register_operand")]
 	  SVE_COND_FP_TERNARY))]
-  "TARGET_SVE"
+  "TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
   {
     operands[4] = aarch64_ptrue_reg (<VPRED>mode);
   }
@@ -7354,37 +7597,39 @@
 
 ;; Predicated floating-point ternary operations.
 (define_insn "@aarch64_pred_<optab><mode>"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-	(unspec:SVE_FULL_F
+  [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
+	(unspec:SVE_FULL_F_B16B16
 	  [(match_operand:<VPRED> 1 "register_operand")
 	   (match_operand:SI 5 "aarch64_sve_gp_strictness")
-	   (match_operand:SVE_FULL_F 2 "register_operand")
-	   (match_operand:SVE_FULL_F 3 "register_operand")
-	   (match_operand:SVE_FULL_F 4 "register_operand")]
+	   (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
+	   (match_operand:SVE_FULL_F_B16B16 3 "register_operand")
+	   (match_operand:SVE_FULL_F_B16B16 4 "register_operand")]
 	  SVE_COND_FP_TERNARY))]
-  "TARGET_SVE"
-  {@ [ cons: =0 , 1   , 2  , 3 , 4 ; attrs: movprfx ]
-     [ w        , Upl , %w , w , 0 ; *              ] <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
-     [ w        , Upl , 0  , w , w ; *              ] <sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
-     [ ?&w      , Upl , w  , w , w ; yes            ] movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+  "TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
+  {@ [ cons: =0 , 1   , %2  , 3 , 4 ; attrs: movprfx , is_rev ]
+     [ w        , Upl , w , w , 0 ; *   , *    ] <b><sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+     [ w        , Upl , 0  , w , w ; *   , true ] <b><sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
+     [ ?&w      , Upl , w  , w , w ; yes , *    ] movprfx\t%0, %4\;<b><sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
   }
+  [(set_attr "is_bf16" "<is_bf16>")
+   (set_attr "supports_bf16_rev" "false")]
 )
 
 ;; Predicated floating-point ternary operations with merging.
 (define_expand "@cond_<optab><mode>"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-	(unspec:SVE_FULL_F
+  [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
+	(unspec:SVE_FULL_F_B16B16
 	  [(match_operand:<VPRED> 1 "register_operand")
-	   (unspec:SVE_FULL_F
+	   (unspec:SVE_FULL_F_B16B16
 	     [(match_dup 1)
 	      (const_int SVE_STRICT_GP)
-	      (match_operand:SVE_FULL_F 2 "register_operand")
-	      (match_operand:SVE_FULL_F 3 "register_operand")
-	      (match_operand:SVE_FULL_F 4 "register_operand")]
+	      (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
+	      (match_operand:SVE_FULL_F_B16B16 3 "register_operand")
+	      (match_operand:SVE_FULL_F_B16B16 4 "register_operand")]
 	     SVE_COND_FP_TERNARY)
-	   (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")]
+	   (match_operand:SVE_FULL_F_B16B16 5 "aarch64_simd_reg_or_zero")]
 	  UNSPEC_SEL))]
-  "TARGET_SVE"
+  "TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
 {
   /* Swap the multiplication operands if the fallback value is the
      second of the two.  */
@@ -7441,22 +7686,22 @@
 ;; Predicated floating-point ternary operations, merging with the
 ;; third input.
 (define_insn_and_rewrite "*cond_<optab><mode>_4_relaxed"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-	(unspec:SVE_FULL_F
+  [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
+	(unspec:SVE_FULL_F_B16B16
 	  [(match_operand:<VPRED> 1 "register_operand")
-	   (unspec:SVE_FULL_F
+	   (unspec:SVE_FULL_F_B16B16
 	     [(match_operand 5)
 	      (const_int SVE_RELAXED_GP)
-	      (match_operand:SVE_FULL_F 2 "register_operand")
-	      (match_operand:SVE_FULL_F 3 "register_operand")
-	      (match_operand:SVE_FULL_F 4 "register_operand")]
+	      (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
+	      (match_operand:SVE_FULL_F_B16B16 3 "register_operand")
+	      (match_operand:SVE_FULL_F_B16B16 4 "register_operand")]
 	     SVE_COND_FP_TERNARY)
 	   (match_dup 4)]
 	  UNSPEC_SEL))]
-  "TARGET_SVE"
+  "TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
   {@ [ cons: =0 , 1   , 2 , 3 , 4 ; attrs: movprfx ]
-     [ w        , Upl , w , w , 0 ; *              ] <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
-     [ ?&w      , Upl , w , w , w ; yes            ] movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+     [ w        , Upl , w , w , 0 ; *              ] <b><sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+     [ ?&w      , Upl , w , w , w ; yes            ] movprfx\t%0, %4\;<b><sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
   }
   "&& !rtx_equal_p (operands[1], operands[5])"
   {
@@ -7465,51 +7710,52 @@
 )
 
 (define_insn "*cond_<optab><mode>_4_strict"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-	(unspec:SVE_FULL_F
+  [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
+	(unspec:SVE_FULL_F_B16B16
 	  [(match_operand:<VPRED> 1 "register_operand")
-	   (unspec:SVE_FULL_F
+	   (unspec:SVE_FULL_F_B16B16
 	     [(match_dup 1)
 	      (const_int SVE_STRICT_GP)
-	      (match_operand:SVE_FULL_F 2 "register_operand")
-	      (match_operand:SVE_FULL_F 3 "register_operand")
-	      (match_operand:SVE_FULL_F 4 "register_operand")]
+	      (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
+	      (match_operand:SVE_FULL_F_B16B16 3 "register_operand")
+	      (match_operand:SVE_FULL_F_B16B16 4 "register_operand")]
 	     SVE_COND_FP_TERNARY)
 	   (match_dup 4)]
 	  UNSPEC_SEL))]
-  "TARGET_SVE"
+  "TARGET_SVE && (<supports_bf16> || !<is_bf16>)"
   {@ [ cons: =0 , 1   , 2 , 3 , 4 ; attrs: movprfx ]
-     [ w        , Upl , w , w , 0 ; *              ] <sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
-     [ ?&w      , Upl , w , w , w ; yes            ] movprfx\t%0, %4\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+     [ w        , Upl , w , w , 0 ; *              ] <b><sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+     [ ?&w      , Upl , w , w , w ; yes            ] movprfx\t%0, %4\;<b><sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
   }
 )
 
 ;; Predicated floating-point ternary operations, merging with an
 ;; independent value.
 (define_insn_and_rewrite "*cond_<optab><mode>_any_relaxed"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-	(unspec:SVE_FULL_F
+  [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
+	(unspec:SVE_FULL_F_B16B16
 	  [(match_operand:<VPRED> 1 "register_operand")
-	   (unspec:SVE_FULL_F
+	   (unspec:SVE_FULL_F_B16B16
 	     [(match_operand 6)
 	      (const_int SVE_RELAXED_GP)
-	      (match_operand:SVE_FULL_F 2 "register_operand")
-	      (match_operand:SVE_FULL_F 3 "register_operand")
-	      (match_operand:SVE_FULL_F 4 "register_operand")]
+	      (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
+	      (match_operand:SVE_FULL_F_B16B16 3 "register_operand")
+	      (match_operand:SVE_FULL_F_B16B16 4 "register_operand")]
 	     SVE_COND_FP_TERNARY)
-	   (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")]
+	   (match_operand:SVE_FULL_F_B16B16 5 "aarch64_simd_reg_or_zero")]
 	  UNSPEC_SEL))]
   "TARGET_SVE
-   && !rtx_equal_p (operands[2], operands[5])
-   && !rtx_equal_p (operands[3], operands[5])
+   && (<supports_bf16> || !<is_bf16>)
+   && (<is_bf16> || !rtx_equal_p (operands[2], operands[5]))
+   && (<is_bf16> || !rtx_equal_p (operands[3], operands[5]))
    && !rtx_equal_p (operands[4], operands[5])"
-  {@ [ cons: =0 , 1   , 2 , 3 , 4 , 5   ]
-     [ &w       , Upl , w , w , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
-     [ &w       , Upl , w , w , 0 , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
-     [ &w       , Upl , 0 , w , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
-     [ &w       , Upl , w , 0 , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
-     [ &w       , Upl , w , w , w , 0   ] movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
-     [ ?&w      , Upl , w , w , w , w   ] #
+  {@ [ cons: =0 , 1   , 2 , 3 , 4 , 5  ; attrs: is_rev ]
+     [ &w       , Upl , w , w , w , Dz ; *    ] movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<b><sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+     [ &w       , Upl , w , w , 0 , Dz ; *    ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<b><sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+     [ &w       , Upl , 0 , w , w , Dz ; true ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<b><sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
+     [ &w       , Upl , w , 0 , w , Dz ; true ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<b><sve_fmad_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
+     [ &w       , Upl , w , w , w , 0  ; *    ] movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<b><sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+     [ ?&w      , Upl , w , w , w , w  ; *    ] #
   }
   "&& 1"
   {
@@ -7526,33 +7772,36 @@
     else
       FAIL;
   }
-  [(set_attr "movprfx" "yes")]
+  [(set_attr "movprfx" "yes")
+   (set_attr "is_bf16" "<is_bf16>")
+   (set_attr "supports_bf16_rev" "false")]
 )
 
 (define_insn_and_rewrite "*cond_<optab><mode>_any_strict"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-	(unspec:SVE_FULL_F
+  [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
+	(unspec:SVE_FULL_F_B16B16
 	  [(match_operand:<VPRED> 1 "register_operand")
-	   (unspec:SVE_FULL_F
+	   (unspec:SVE_FULL_F_B16B16
 	     [(match_dup 1)
 	      (const_int SVE_STRICT_GP)
-	      (match_operand:SVE_FULL_F 2 "register_operand")
-	      (match_operand:SVE_FULL_F 3 "register_operand")
-	      (match_operand:SVE_FULL_F 4 "register_operand")]
+	      (match_operand:SVE_FULL_F_B16B16 2 "register_operand")
+	      (match_operand:SVE_FULL_F_B16B16 3 "register_operand")
+	      (match_operand:SVE_FULL_F_B16B16 4 "register_operand")]
 	     SVE_COND_FP_TERNARY)
-	   (match_operand:SVE_FULL_F 5 "aarch64_simd_reg_or_zero")]
+	   (match_operand:SVE_FULL_F_B16B16 5 "aarch64_simd_reg_or_zero")]
 	  UNSPEC_SEL))]
   "TARGET_SVE
-   && !rtx_equal_p (operands[2], operands[5])
-   && !rtx_equal_p (operands[3], operands[5])
+   && (<supports_bf16> || !<is_bf16>)
+   && (<is_bf16> || !rtx_equal_p (operands[2], operands[5]))
+   && (<is_bf16> || !rtx_equal_p (operands[3], operands[5]))
    && !rtx_equal_p (operands[4], operands[5])"
-  {@ [ cons: =0 , 1   , 2 , 3 , 4 , 5   ]
-     [ &w       , Upl , w , w , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
-     [ &w       , Upl , w , w , 0 , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
-     [ &w       , Upl , 0 , w , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
-     [ &w       , Upl , w , 0 , w , Dz  ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<sve_fmad_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
-     [ &w       , Upl , w , w , w , 0   ] movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
-     [ ?&w      , Upl , w , w , w , w   ] #
+  {@ [ cons: =0 , 1   , 2 , 3 , 4 , 5  ; attrs: is_rev ]
+     [ &w       , Upl , w , w , w , Dz ; *    ] movprfx\t%0.<Vetype>, %1/z, %4.<Vetype>\;<b><sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+     [ &w       , Upl , w , w , 0 , Dz ; *    ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<b><sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+     [ &w       , Upl , 0 , w , w , Dz ; true ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<b><sve_fmad_op>\t%0.<Vetype>, %1/m, %3.<Vetype>, %4.<Vetype>
+     [ &w       , Upl , w , 0 , w , Dz ; true ] movprfx\t%0.<Vetype>, %1/z, %0.<Vetype>\;<b><sve_fmad_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %4.<Vetype>
+     [ &w       , Upl , w , w , w , 0  ; *    ] movprfx\t%0.<Vetype>, %1/m, %4.<Vetype>\;<b><sve_fmla_op>\t%0.<Vetype>, %1/m, %2.<Vetype>, %3.<Vetype>
+     [ ?&w      , Upl , w , w , w , w  ; *    ] #
   }
   "&& reload_completed
    && register_operand (operands[5], <MODE>mode)
@@ -7562,25 +7811,27 @@
 					     operands[5], operands[1]));
     operands[5] = operands[4] = operands[0];
   }
-  [(set_attr "movprfx" "yes")]
+  [(set_attr "movprfx" "yes")
+   (set_attr "is_bf16" "<is_bf16>")
+   (set_attr "supports_bf16_rev" "false")]
 )
 
 ;; Unpredicated FMLA and FMLS by selected lanes.  It doesn't seem worth using
 ;; (fma ...) since target-independent code won't understand the indexing.
 (define_insn "@aarch64_<optab>_lane_<mode>"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-	(unspec:SVE_FULL_F
-	  [(match_operand:SVE_FULL_F 1 "register_operand")
-	   (unspec:SVE_FULL_F
-	     [(match_operand:SVE_FULL_F 2 "register_operand")
+  [(set (match_operand:SVE_FULL_F_B16B16 0 "register_operand")
+	(unspec:SVE_FULL_F_B16B16
+	  [(match_operand:SVE_FULL_F_B16B16 1 "register_operand")
+	   (unspec:SVE_FULL_F_B16B16
+	     [(match_operand:SVE_FULL_F_B16B16 2 "register_operand")
 	      (match_operand:SI 3 "const_int_operand")]
 	     UNSPEC_SVE_LANE_SELECT)
-	   (match_operand:SVE_FULL_F 4 "register_operand")]
+	   (match_operand:SVE_FULL_F_B16B16 4 "register_operand")]
 	  SVE_FP_TERNARY_LANE))]
   "TARGET_SVE"
   {@ [ cons: =0 , 1 , 2              , 4 ; attrs: movprfx ]
-     [ w        , w , <sve_lane_con> , 0 ; *              ] <sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]
-     [ ?&w      , w , <sve_lane_con> , w ; yes            ] movprfx\t%0, %4\;<sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]
+     [ w        , w , <sve_lane_con> , 0 ; *              ] <b><sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]
+     [ ?&w      , w , <sve_lane_con> , w ; yes            ] movprfx\t%0, %4\;<b><sve_fp_op>\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>[%3]
   }
 )
 
@@ -7842,8 +8093,8 @@
 ;; - BFDOT (BF16)
 ;; - BFMLALB (BF16)
 ;; - BFMLALT (BF16)
-;; - BFMLSLB (SME2)
-;; - BFMLSLT (SME2)
+;; - BFMLSLB (SVE2p1, SME2)
+;; - BFMLSLT (SVE2p1, SME2)
 ;; - BFMMLA (BF16)
 ;; -------------------------------------------------------------------------
 
@@ -7854,7 +8105,7 @@
 	   (match_operand:VNx8BF 2 "register_operand")
 	   (match_operand:VNx8BF 3 "register_operand")]
 	  SVE_BFLOAT_TERNARY_LONG))]
-  "TARGET_SVE_BF16"
+  ""
   {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
      [ w        , 0 , w , w ; *              ] <sve_fp_op>\t%0.s, %2.h, %3.h
      [ ?&w      , w , w , w ; yes            ] movprfx\t%0, %1\;<sve_fp_op>\t%0.s, %2.h, %3.h
@@ -7870,7 +8121,7 @@
 	   (match_operand:VNx8BF 3 "register_operand")
 	   (match_operand:SI 4 "const_int_operand")]
 	  SVE_BFLOAT_TERNARY_LONG_LANE))]
-  "TARGET_SVE_BF16"
+  ""
   {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
      [ w        , 0 , w , y ; *              ] <sve_fp_op>\t%0.s, %2.h, %3.h[%4]
      [ ?&w      , w , w , y ; yes            ] movprfx\t%0, %1\;<sve_fp_op>\t%0.s, %2.h, %3.h[%4]
@@ -7991,63 +8242,6 @@
 )
 
 ;; -------------------------------------------------------------------------
-;; ---- [INT,FP] Compare and select
-;; -------------------------------------------------------------------------
-;; The patterns in this section are synthetic.
-;; -------------------------------------------------------------------------
-
-;; Integer (signed) vcond.  Don't enforce an immediate range here, since it
-;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
-(define_expand "vcond<SVE_ALL:mode><SVE_I:mode>"
-  [(set (match_operand:SVE_ALL 0 "register_operand")
-	(if_then_else:SVE_ALL
-	  (match_operator 3 "comparison_operator"
-	    [(match_operand:SVE_I 4 "register_operand")
-	     (match_operand:SVE_I 5 "nonmemory_operand")])
-	  (match_operand:SVE_ALL 1 "nonmemory_operand")
-	  (match_operand:SVE_ALL 2 "nonmemory_operand")))]
-  "TARGET_SVE && <SVE_ALL:container_bits> == <SVE_I:container_bits>"
-  {
-    aarch64_expand_sve_vcond (<SVE_ALL:MODE>mode, <SVE_I:MODE>mode, operands);
-    DONE;
-  }
-)
-
-;; Integer vcondu.  Don't enforce an immediate range here, since it
-;; depends on the comparison; leave it to aarch64_expand_sve_vcond instead.
-(define_expand "vcondu<SVE_ALL:mode><SVE_I:mode>"
-  [(set (match_operand:SVE_ALL 0 "register_operand")
-	(if_then_else:SVE_ALL
-	  (match_operator 3 "comparison_operator"
-	    [(match_operand:SVE_I 4 "register_operand")
-	     (match_operand:SVE_I 5 "nonmemory_operand")])
-	  (match_operand:SVE_ALL 1 "nonmemory_operand")
-	  (match_operand:SVE_ALL 2 "nonmemory_operand")))]
-  "TARGET_SVE && <SVE_ALL:container_bits> == <SVE_I:container_bits>"
-  {
-    aarch64_expand_sve_vcond (<SVE_ALL:MODE>mode, <SVE_I:MODE>mode, operands);
-    DONE;
-  }
-)
-
-;; Floating-point vcond.  All comparisons except FCMUO allow a zero operand;
-;; aarch64_expand_sve_vcond handles the case of an FCMUO with zero.
-(define_expand "vcond<mode><v_fp_equiv>"
-  [(set (match_operand:SVE_FULL_HSD 0 "register_operand")
-	(if_then_else:SVE_FULL_HSD
-	  (match_operator 3 "comparison_operator"
-	    [(match_operand:<V_FP_EQUIV> 4 "register_operand")
-	     (match_operand:<V_FP_EQUIV> 5 "aarch64_simd_reg_or_zero")])
-	  (match_operand:SVE_FULL_HSD 1 "nonmemory_operand")
-	  (match_operand:SVE_FULL_HSD 2 "nonmemory_operand")))]
-  "TARGET_SVE"
-  {
-    aarch64_expand_sve_vcond (<MODE>mode, <V_FP_EQUIV>mode, operands);
-    DONE;
-  }
-)
-
-;; -------------------------------------------------------------------------
 ;; ---- [INT] Comparisons
 ;; -------------------------------------------------------------------------
 ;; Includes:
@@ -8443,12 +8637,12 @@
 (define_expand "vec_cmp<mode><vpred>"
   [(set (match_operand:<VPRED> 0 "register_operand")
 	(match_operator:<VPRED> 1 "comparison_operator"
-	  [(match_operand:SVE_FULL_F 2 "register_operand")
-	   (match_operand:SVE_FULL_F 3 "aarch64_simd_reg_or_zero")]))]
+	  [(match_operand:SVE_F 2 "register_operand")
+	   (match_operand:SVE_F 3 "aarch64_simd_reg_or_zero")]))]
   "TARGET_SVE"
   {
     aarch64_expand_sve_vec_cmp_float (operands[0], GET_CODE (operands[1]),
-				      operands[2], operands[3], false);
+				      operands[2], operands[3]);
     DONE;
   }
 )
@@ -8457,10 +8651,10 @@
 (define_insn "@aarch64_pred_fcm<cmp_op><mode>"
   [(set (match_operand:<VPRED> 0 "register_operand")
 	(unspec:<VPRED>
-	  [(match_operand:<VPRED> 1 "register_operand")
+	  [(match_operand:<VPRED> 1 "aarch64_predicate_operand")
 	   (match_operand:SI 2 "aarch64_sve_ptrue_flag")
-	   (match_operand:SVE_FULL_F 3 "register_operand")
-	   (match_operand:SVE_FULL_F 4 "aarch64_simd_reg_or_zero")]
+	   (match_operand:SVE_F 3 "register_operand")
+	   (match_operand:SVE_F 4 "aarch64_simd_reg_or_zero")]
 	  SVE_COND_FP_CMP_I0))]
   "TARGET_SVE"
   {@ [ cons: =0 , 1   , 3 , 4   ]
@@ -8473,10 +8667,10 @@
 (define_insn "@aarch64_pred_fcmuo<mode>"
   [(set (match_operand:<VPRED> 0 "register_operand" "=Upa")
 	(unspec:<VPRED>
-	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
+	  [(match_operand:<VPRED> 1 "aarch64_predicate_operand" "Upl")
 	   (match_operand:SI 2 "aarch64_sve_ptrue_flag")
-	   (match_operand:SVE_FULL_F 3 "register_operand" "w")
-	   (match_operand:SVE_FULL_F 4 "register_operand" "w")]
+	   (match_operand:SVE_F 3 "register_operand" "w")
+	   (match_operand:SVE_F 4 "register_operand" "w")]
 	  UNSPEC_COND_FCMUO))]
   "TARGET_SVE"
   "fcmuo\t%0.<Vetype>, %1/z, %3.<Vetype>, %4.<Vetype>"
@@ -8852,26 +9046,26 @@
 	(unspec:<VEL>
 	  [(match_operand:<VEL> 1 "register_operand")
 	   (match_operand:<VPRED> 2 "register_operand")
-	   (match_operand:SVE_FULL 3 "register_operand")]
+	   (match_operand:SVE_ALL 3 "register_operand")]
 	  CLAST))]
   "TARGET_SVE"
   {@ [ cons: =0 , 1 , 2   , 3  ]
-     [ ?r       , 0 , Upl , w  ] clast<ab>\t%<vwcore>0, %2, %<vwcore>0, %3.<Vetype>
-     [ w        , 0 , Upl , w  ] clast<ab>\t%<Vetype>0, %2, %<Vetype>0, %3.<Vetype>
+     [ ?r       , 0 , Upl , w  ] clast<ab>\t%<vccore>0, %2, %<vccore>0, %3.<Vctype>
+     [ w        , 0 , Upl , w  ] clast<ab>\t%<Vctype>0, %2, %<Vctype>0, %3.<Vctype>
   }
 )
 
 (define_insn "@aarch64_fold_extract_vector_<last_op>_<mode>"
-  [(set (match_operand:SVE_FULL 0 "register_operand")
-	(unspec:SVE_FULL
-	  [(match_operand:SVE_FULL 1 "register_operand")
+  [(set (match_operand:SVE_ALL 0 "register_operand")
+	(unspec:SVE_ALL
+	  [(match_operand:SVE_ALL 1 "register_operand")
 	   (match_operand:<VPRED> 2 "register_operand")
-	   (match_operand:SVE_FULL 3 "register_operand")]
+	   (match_operand:SVE_ALL 3 "register_operand")]
 	  CLAST))]
   "TARGET_SVE"
   {@ [ cons: =0 , 1 , 2   , 3  ]
-     [ w        , 0 , Upl , w  ] clast<ab>\t%0.<Vetype>, %2, %0.<Vetype>, %3.<Vetype>
-     [ ?&w      , w , Upl , w  ] movprfx\t%0, %1\;clast<ab>\t%0.<Vetype>, %2, %0.<Vetype>, %3.<Vetype>
+     [ w        , 0 , Upl , w  ] clast<ab>\t%0.<Vctype>, %2, %0.<Vctype>, %3.<Vctype>
+     [ ?&w      , w , Upl , w  ] movprfx\t%0, %1\;clast<ab>\t%0.<Vctype>, %2, %0.<Vctype>, %3.<Vctype>
   }
 )
 
@@ -9021,6 +9215,7 @@
 ;; -------------------------------------------------------------------------
 ;; Includes:
 ;; - TBL
+;; - TBLQ (SVE2p1)
 ;; -------------------------------------------------------------------------
 
 (define_expand "vec_perm<mode>"
@@ -9036,14 +9231,14 @@
   }
 )
 
-(define_insn "@aarch64_sve_tbl<mode>"
+(define_insn "@aarch64_sve_<perm_insn><mode>"
   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
 	(unspec:SVE_FULL
 	  [(match_operand:SVE_FULL 1 "register_operand" "w")
 	   (match_operand:<V_INT_EQUIV> 2 "register_operand" "w")]
-	  UNSPEC_TBL))]
+	  SVE_TBL))]
   "TARGET_SVE"
-  "tbl\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
+  "<perm_insn>\t%0.<Vetype>, {%1.<Vetype>}, %2.<Vetype>"
 )
 
 ;; -------------------------------------------------------------------------
@@ -9132,9 +9327,13 @@
 ;; - TRN1
 ;; - TRN2
 ;; - UZP1
+;; - UZPQ1 (SVE2p1)
 ;; - UZP2
+;; - UZPQ2 (SVE2p1)
 ;; - ZIP1
+;; - ZIPQ1 (SVE2p1)
 ;; - ZIP2
+;; - ZIPQ2 (SVE2p1)
 ;; -------------------------------------------------------------------------
 
 ;; Like EXT, but start at the first active element.
@@ -9159,7 +9358,7 @@
 	(unspec:SVE_ALL
 	  [(match_operand:SVE_ALL 1 "register_operand" "w")
 	   (match_operand:SVE_ALL 2 "register_operand" "w")]
-	  PERMUTE))]
+	  SVE_PERMUTE))]
   "TARGET_SVE"
   "<perm_insn>\t%0.<Vctype>, %1.<Vctype>, %2.<Vctype>"
 )
@@ -9270,6 +9469,19 @@
   "uzp1\t%0.<Vetype>, %1.<Vetype>, %2.<Vetype>"
 )
 
+;; Integer partial pack packing two partial SVE types into a single full SVE
+;; type of the same element type.  Use UZP1 on the wider type, which discards
+;; the high part of each wide element.  This allows to concat SVE partial types
+;; into a wider vector.
+(define_insn "@aarch64_pack_partial<mode>"
+  [(set (match_operand:SVE_NO2E 0 "register_operand" "=w")
+	(vec_concat:SVE_NO2E
+	  (match_operand:<VHALF> 1 "register_operand" "w")
+	  (match_operand:<VHALF> 2 "register_operand" "w")))]
+  "TARGET_SVE"
+  "uzp1\t%0.<Vctype>, %1.<Vctype>, %2.<Vctype>"
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- [INT<-INT] Unpacks
 ;; -------------------------------------------------------------------------
@@ -9314,18 +9526,37 @@
 ;; - FCVTZU
 ;; -------------------------------------------------------------------------
 
-;; Unpredicated conversion of floats to integers of the same size (HF to HI,
-;; SF to SI or DF to DI).
-(define_expand "<optab><mode><v_int_equiv>2"
-  [(set (match_operand:<V_INT_EQUIV> 0 "register_operand")
-	(unspec:<V_INT_EQUIV>
+;; Unpredicated conversion of floats to integers of the same size or wider,
+;; excluding conversions from DF (see below).
+(define_expand "<optab><SVE_HSF:mode><SVE_HSDI:mode>2"
+  [(set (match_operand:SVE_HSDI 0 "register_operand")
+	(unspec:SVE_HSDI
+	  [(match_dup 2)
+	   (match_dup 3)
+	   (match_operand:SVE_HSF 1 "register_operand")]
+	  SVE_COND_FCVTI))]
+  "TARGET_SVE
+   && (~(<SVE_HSDI:self_mask> | <SVE_HSDI:narrower_mask>) & <SVE_HSF:self_mask>) == 0"
+  {
+    operands[2] = aarch64_sve_fp_pred (<SVE_HSDI:MODE>mode, &operands[3]);
+  }
+)
+
+;; SI <- DF can't use SI <- trunc (DI <- DF) without -ffast-math, so this
+;; truncating variant of FCVTZ{S,U} is useful for auto-vectorization.
+;;
+;; DF is the only source mode for which the mask used above doesn't apply,
+;; we define a separate pattern for it here.
+(define_expand "<optab><VNx2DF_ONLY:mode><SVE_2SDI:mode>2"
+  [(set (match_operand:SVE_2SDI 0 "register_operand")
+	(unspec:SVE_2SDI
 	  [(match_dup 2)
 	   (const_int SVE_RELAXED_GP)
-	   (match_operand:SVE_FULL_F 1 "register_operand")]
+	   (match_operand:VNx2DF_ONLY 1 "register_operand")]
 	  SVE_COND_FCVTI))]
   "TARGET_SVE"
   {
-    operands[2] = aarch64_ptrue_reg (<VPRED>mode);
+    operands[2] = aarch64_ptrue_reg (VNx2BImode);
   }
 )
 
@@ -9344,18 +9575,37 @@
   }
 )
 
-;; Predicated narrowing float-to-integer conversion.
-(define_insn "@aarch64_sve_<optab>_trunc<VNx2DF_ONLY:mode><VNx4SI_ONLY:mode>"
-  [(set (match_operand:VNx4SI_ONLY 0 "register_operand")
-	(unspec:VNx4SI_ONLY
+;; As above, for pairs used by the auto-vectorizer only.
+(define_insn "*aarch64_sve_<optab>_nontrunc<SVE_PARTIAL_F:mode><SVE_HSDI:mode>"
+  [(set (match_operand:SVE_HSDI 0 "register_operand")
+	(unspec:SVE_HSDI
+	  [(match_operand:<SVE_HSDI:VPRED> 1 "aarch64_predicate_operand")
+	   (match_operand:SI 3 "aarch64_sve_gp_strictness")
+	   (match_operand:SVE_PARTIAL_F 2 "register_operand")]
+	  SVE_COND_FCVTI))]
+   "TARGET_SVE
+   && (~(<SVE_HSDI:self_mask> | <SVE_HSDI:narrower_mask>) & <SVE_PARTIAL_F:self_mask>) == 0"
+  {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
+     [ w        , Upl , 0 ; *              ] fcvtz<su>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_PARTIAL_F:Vetype>
+     [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;fcvtz<su>\t%0.<SVE_HSDI:Vetype>, %1/m, %2.<SVE_PARTIAL_F:Vetype>
+  }
+)
+
+;; Predicated narrowing float-to-integer conversion.  The VNx2DF->VNx4SI
+;; variant is provided for the ACLE, where the zeroed odd-indexed lanes are
+;; significant.  The VNx2DF->VNx2SI variant is provided for auto-vectorization,
+;; where the upper 32 bits of each container are ignored.
+(define_insn "@aarch64_sve_<optab>_trunc<VNx2DF_ONLY:mode><SVE_SI:mode>"
+  [(set (match_operand:SVE_SI 0 "register_operand")
+	(unspec:SVE_SI
 	  [(match_operand:VNx2BI 1 "register_operand")
 	   (match_operand:SI 3 "aarch64_sve_gp_strictness")
 	   (match_operand:VNx2DF_ONLY 2 "register_operand")]
 	  SVE_COND_FCVTI))]
   "TARGET_SVE"
   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
-     [ w        , Upl , 0 ; *              ] fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
-     [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;fcvtz<su>\t%0.<VNx4SI_ONLY:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
+     [ w        , Upl , 0 ; *              ] fcvtz<su>\t%0.<SVE_SI:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
+     [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;fcvtz<su>\t%0.<SVE_SI:Vetype>, %1/m, %2.<VNx2DF_ONLY:Vetype>
   }
 )
 
@@ -9500,18 +9750,19 @@
 ;; - UCVTF
 ;; -------------------------------------------------------------------------
 
-;; Unpredicated conversion of integers to floats of the same size
-;; (HI to HF, SI to SF or DI to DF).
-(define_expand "<optab><v_int_equiv><mode>2"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-	(unspec:SVE_FULL_F
+;; Unpredicated conversion of integers to floats of the same size or
+;; narrower.
+(define_expand "<optab><SVE_HSDI:mode><SVE_F:mode>2"
+  [(set (match_operand:SVE_F 0 "register_operand")
+	(unspec:SVE_F
 	  [(match_dup 2)
-	   (const_int SVE_RELAXED_GP)
-	   (match_operand:<V_INT_EQUIV> 1 "register_operand")]
+	   (match_dup 3)
+	   (match_operand:SVE_HSDI 1 "register_operand")]
 	  SVE_COND_ICVTF))]
-  "TARGET_SVE"
+  "TARGET_SVE
+   && (~(<SVE_HSDI:self_mask> | <SVE_HSDI:narrower_mask>) & <SVE_F:self_mask>) == 0"
   {
-    operands[2] = aarch64_ptrue_reg (<VPRED>mode);
+    operands[2] = aarch64_sve_fp_pred (<SVE_HSDI:MODE>mode, &operands[3]);
   }
 )
 
@@ -9531,6 +9782,22 @@
   }
 )
 
+;; As above, for pairs that are used by the auto-vectorizer only.
+(define_insn "*aarch64_sve_<optab>_nonextend<SVE_HSDI:mode><SVE_PARTIAL_F:mode>"
+  [(set (match_operand:SVE_PARTIAL_F 0 "register_operand")
+	(unspec:SVE_PARTIAL_F
+	  [(match_operand:<SVE_HSDI:VPRED> 1 "aarch64_predicate_operand")
+	   (match_operand:SI 3 "aarch64_sve_gp_strictness")
+	   (match_operand:SVE_HSDI 2 "register_operand")]
+	  SVE_COND_ICVTF))]
+  "TARGET_SVE
+   && (~(<SVE_HSDI:self_mask> | <SVE_HSDI:narrower_mask>) & <SVE_PARTIAL_F:self_mask>) == 0"
+  {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
+     [ w        , Upl , 0 ; *              ] <su>cvtf\t%0.<SVE_PARTIAL_F:Vetype>, %1/m, %2.<SVE_HSDI:Vetype>
+     [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;<su>cvtf\t%0.<SVE_PARTIAL_F:Vetype>, %1/m, %2.<SVE_HSDI:Vetype>
+  }
+)
+
 ;; Predicated widening integer-to-float conversion.
 (define_insn "@aarch64_sve_<optab>_extend<VNx4SI_ONLY:mode><VNx2DF_ONLY:mode>"
   [(set (match_operand:VNx2DF_ONLY 0 "register_operand")
@@ -9714,6 +9981,27 @@
   }
 )
 
+;; -------------------------------------------------------------------------
+;; ---- [FP<-FP] Truncating conversions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FCVT
+;; -------------------------------------------------------------------------
+
+;; Unpredicated float-to-float truncation.
+(define_expand "trunc<SVE_SDF:mode><SVE_PARTIAL_HSF:mode>2"
+  [(set (match_operand:SVE_PARTIAL_HSF 0 "register_operand")
+       (unspec:SVE_PARTIAL_HSF
+         [(match_dup 2)
+          (match_dup 3)
+          (match_operand:SVE_SDF 1 "register_operand")]
+         SVE_COND_FCVT))]
+  "TARGET_SVE && (~<SVE_SDF:narrower_mask> & <SVE_PARTIAL_HSF:self_mask>) == 0"
+  {
+    operands[2] = aarch64_sve_fp_pred (<SVE_SDF:MODE>mode, &operands[3]);
+  }
+)
+
 ;; Predicated float-to-float truncation.
 (define_insn "@aarch64_sve_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
   [(set (match_operand:SVE_FULL_HSF 0 "register_operand")
@@ -9729,6 +10017,21 @@
   }
 )
 
+;; As above, for pairs that are used by the auto-vectorizer only.
+(define_insn "*aarch64_sve_<optab>_trunc<SVE_SDF:mode><SVE_PARTIAL_HSF:mode>"
+  [(set (match_operand:SVE_PARTIAL_HSF 0 "register_operand")
+       (unspec:SVE_PARTIAL_HSF
+         [(match_operand:<SVE_SDF:VPRED> 1 "aarch64_predicate_operand")
+          (match_operand:SI 3 "aarch64_sve_gp_strictness")
+          (match_operand:SVE_SDF 2 "register_operand")]
+         SVE_COND_FCVT))]
+  "TARGET_SVE && (~<SVE_SDF:narrower_mask> & <SVE_PARTIAL_HSF:self_mask>) == 0"
+  {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
+     [ w        , Upl , 0 ; *              ] fcvt\t%0.<SVE_PARTIAL_HSF:Vetype>, %1/m, %2.<SVE_SDF:Vetype>
+     [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;fcvt\t%0.<SVE_PARTIAL_HSF:Vetype>, %1/m, %2.<SVE_SDF:Vetype>
+  }
+)
+
 ;; Predicated float-to-float truncation with merging.
 (define_expand "@cond_<optab>_trunc<SVE_FULL_SDF:mode><SVE_FULL_HSF:mode>"
   [(set (match_operand:SVE_FULL_HSF 0 "register_operand")
@@ -9871,6 +10174,27 @@
   }
 )
 
+;; -------------------------------------------------------------------------
+;; ---- [FP<-FP] Extending conversions
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FCVT
+;; -------------------------------------------------------------------------
+
+;; Unpredicated float-to-float extension.
+(define_expand "extend<SVE_PARTIAL_HSF:mode><SVE_SDF:mode>2"
+  [(set (match_operand:SVE_SDF 0 "register_operand")
+       (unspec:SVE_SDF
+         [(match_dup 2)
+          (match_dup 3)
+          (match_operand:SVE_PARTIAL_HSF 1 "register_operand")]
+         SVE_COND_FCVT))]
+  "TARGET_SVE && (~<SVE_SDF:narrower_mask> & <SVE_PARTIAL_HSF:self_mask>) == 0"
+  {
+    operands[2] = aarch64_sve_fp_pred (<SVE_SDF:MODE>mode, &operands[3]);
+  }
+)
+
 ;; Predicated float-to-float extension.
 (define_insn "@aarch64_sve_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
   [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
@@ -9886,6 +10210,21 @@
   }
 )
 
+;; As above, for pairs that are used by the auto-vectorizer only.
+(define_insn "*aarch64_sve_<optab>_nontrunc<SVE_PARTIAL_HSF:mode><SVE_SDF:mode>"
+  [(set (match_operand:SVE_SDF 0 "register_operand")
+	(unspec:SVE_SDF
+	  [(match_operand:<SVE_SDF:VPRED> 1 "aarch64_predicate_operand")
+	   (match_operand:SI 3 "aarch64_sve_gp_strictness")
+	   (match_operand:SVE_PARTIAL_HSF 2 "register_operand")]
+	  SVE_COND_FCVT))]
+  "TARGET_SVE && (~<SVE_SDF:narrower_mask> & <SVE_PARTIAL_HSF:self_mask>) == 0"
+  {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
+     [ w        , Upl , 0 ; *              ] fcvt\t%0.<SVE_SDF:Vetype>, %1/m, %2.<SVE_PARTIAL_HSF:Vetype>
+     [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;fcvt\t%0.<SVE_SDF:Vetype>, %1/m, %2.<SVE_PARTIAL_HSF:Vetype>
+  }
+)
+
 ;; Predicated float-to-float extension with merging.
 (define_expand "@cond_<optab>_nontrunc<SVE_FULL_HSF:mode><SVE_FULL_SDF:mode>"
   [(set (match_operand:SVE_FULL_SDF 0 "register_operand")
@@ -11098,16 +11437,12 @@
 
 (define_insn "@aarch64_sve_set_neonq_<mode>"
   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
-      (unspec:SVE_FULL
-	[(match_operand:SVE_FULL 1 "register_operand" "w")
-	(match_operand:<V128> 2 "register_operand" "w")
-	(match_operand:<VPRED> 3 "register_operand" "Upl")]
-	UNSPEC_SET_NEONQ))]
+	(unspec:SVE_FULL
+	  [(match_operand:SVE_FULL 1 "register_operand" "w")
+	   (match_operand:<V128> 2 "register_operand" "w")
+	   (match_operand:<VPRED> 3 "register_operand" "Upl")]
+	  UNSPEC_SET_NEONQ))]
   "TARGET_SVE
    && BYTES_BIG_ENDIAN"
-  {
-    operands[2] = lowpart_subreg (<MODE>mode, operands[2],
-                                  GET_MODE (operands[2]));
-    return "sel\t%0.<Vetype>, %3, %2.<Vetype>, %1.<Vetype>";
-  }
+  "sel\t%0.<Vetype>, %3, %Z2.<Vetype>, %1.<Vetype>"
 )
diff --git a/gcc/config/aarch64/aarch64-sve2.md b/gcc/config/aarch64/aarch64-sve2.md
index 972b03a..660901d 100644
--- a/gcc/config/aarch64/aarch64-sve2.md
+++ b/gcc/config/aarch64/aarch64-sve2.md
@@ -1,5 +1,5 @@
 ;; Machine description for AArch64 SVE2.
-;; Copyright (C) 2019-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2019-2025 Free Software Foundation, Inc.
 ;; Contributed by ARM Ltd.
 ;;
 ;; This file is part of GCC.
@@ -21,12 +21,22 @@
 ;; The file is organised into the following sections (search for the full
 ;; line):
 ;;
+;; == Moves
+;; ---- Predicate to vector moves
+;; ---- Vector to predicate moves
+;;
 ;; == Loads
+;; ---- 128-bit extending loads
+;; ---- 128-bit structure loads
 ;; ---- Multi-register loads predicated by a counter
+;; ---- 128-bit gather loads
 ;; ---- Non-temporal gather loads
 ;;
 ;; == Stores
+;; ---- 128-bit truncating stores
+;; ---- 128-bit structure stores
 ;; ---- Multi-register stores predicated by a counter
+;; ---- 128-bit scatter stores
 ;; ---- Non-temporal scatter stores
 ;;
 ;; == Predicate manipulation
@@ -46,6 +56,7 @@
 ;; ---- [INT] General binary arithmetic that maps to unspecs
 ;; ---- [INT] Saturating binary arithmetic
 ;; ---- [INT] Saturating left shifts
+;; ---- [FP] Non-widening bfloat16 arithmetic
 ;; ---- [FP] Clamp to minimum/maximum
 ;;
 ;; == Uniform ternary arithmnetic
@@ -56,6 +67,8 @@
 ;; ---- [INT] Shift-and-accumulate operations
 ;; ---- [INT] Shift-and-insert operations
 ;; ---- [INT] Sum of absolute differences
+;; ---- [FP] Mfloat8 Multiply-and-accumulate operations
+;; ---- [FP] Mfloat8 dot products
 ;;
 ;; == Extending arithmetic
 ;; ---- [INT] Multi-register widening conversions
@@ -86,6 +99,7 @@
 ;; == Conversions
 ;; ---- [FP<-FP] Widening conversions
 ;; ---- [FP<-FP] Narrowing conversions
+;; ---- [FP<-FP] Multi-vector widening conversions
 ;; ---- [FP<-FP] Multi-vector narrowing conversions
 ;; ---- [FP<-INT] Multi-vector conversions
 ;; ---- [INT<-FP] Multi-vector conversions
@@ -99,8 +113,13 @@
 ;; ---- [INT,FP] Select based on predicates as counters
 ;; ---- [INT] While tests
 ;;
+;; == Reductions
+;; ---- [INT] Reduction to 128-bit vector
+;; ---- [FP] Reduction to 128-bit vector
+;;
 ;; == Permutation
 ;; ---- [INT,FP] Reversal
+;; ---- [INT,FP] HVLA permutes
 ;; ---- [INT,FP] General permutes
 ;; ---- [INT,FP] Multi-register permutes
 ;; ---- [INT] Optional bit-permute extensions
@@ -109,6 +128,7 @@
 ;; ---- Check for aliases between pointers
 ;; ---- Histogram processing
 ;; ---- String matching
+;; ---- Table lookup
 ;;
 ;; == Cryptographic extensions
 ;; ---- Optional AES extensions
@@ -116,10 +136,121 @@
 ;; ---- Optional SM4 extensions
 
 ;; =========================================================================
+;; == Moves
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- Predicate to vector moves
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - PMOV (to vector) (SVE2p1)
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_pmov_to_<mode>"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
+	(unspec:SVE_FULL_I
+	  [(match_operand:<VPRED> 1 "register_operand" "Upa")]
+	  UNSPEC_PMOV_UNPACK))]
+  "TARGET_SVE2p1 && TARGET_NON_STREAMING"
+  "pmov\t%0, %1.<Vetype>"
+)
+
+(define_insn "@aarch64_pmov_lane_to_<mode>"
+  [(set (match_operand:SVE_FULL_I 0 "register_operand" "=w")
+	(unspec:SVE_FULL_I
+	  [(match_operand:SVE_FULL_I 1 "register_operand" "0")
+	   (match_operand:<VPRED> 2 "register_operand" "Upa")
+	   (match_operand:DI 3 "immediate_operand")]
+	  UNSPEC_PMOV_UNPACK_LANE))]
+  "TARGET_SVE2p1 && TARGET_NON_STREAMING"
+  "pmov\t%0[%3], %2.<Vetype>"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- Vector to predicate moves
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - PMOV (from vector) (SVE2p1)
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_pmov_from_<mode>"
+  [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
+	(unspec:VNx16BI
+	  [(match_operand:SVE_FULL_I 1 "register_operand" "w")]
+	  UNSPEC_PMOV_PACK))]
+  "TARGET_SVE2p1 && TARGET_NON_STREAMING"
+  "pmov\t%0.<Vetype>, %1"
+)
+
+(define_insn "@aarch64_pmov_lane_from_<mode>"
+  [(set (match_operand:VNx16BI 0 "register_operand" "=Upa")
+	(unspec:VNx16BI
+	  [(match_operand:SVE_FULL_I 1 "register_operand" "w")
+	   (match_operand:DI 2 "immediate_operand")]
+	  UNSPEC_PMOV_PACK_LANE))]
+  "TARGET_SVE2p1 && TARGET_NON_STREAMING"
+  "pmov\t%0.<Vetype>, %1[%2]"
+)
+
+;; =========================================================================
 ;; == Loads
 ;; =========================================================================
 
 ;; -------------------------------------------------------------------------
+;; ---- 128-bit extending loads
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - LD1W (to .Q) (SVE2p1)
+;; - LD1D (to .Q) (SVE2p1)
+;; -------------------------------------------------------------------------
+
+;; There isn't really a natural way of representing these instructions
+;; with the modes that we normally use:
+;;
+;; (1) It doesn't really make sense to use VNx1TI (or similar) for the
+;;     result, since there's nothing that can be done with such a mode
+;;     other than to cast it to another mode.  It also isn't how the
+;;     ACLE represents it (for similar reasons).
+;;
+;; (2) Only the lowest bit of each 16 in the predicate is significant,
+;;     but it doesn't really make sense to use VNx1BI to represent it,
+;;     since there is no "PTRUE Pn.Q, ..." instruction.
+;;
+;; (3) We do however need to use VNx1DI and VNx1SI to represent the
+;;     source memories, since none of the normal register modes would
+;;     give the right extent and alignment information (with the alignment
+;;     mattering only for -mstrict-align).
+(define_insn "@aarch64_sve_ld1_extendq<mode>"
+  [(set (match_operand:SVE_FULL_SD 0 "register_operand" "=w")
+	(unspec:SVE_FULL_SD
+	  [(match_operand:<VPRED> 2 "register_operand" "Upl")
+	   (match_operand:<LD1_EXTENDQ_MEM> 1 "memory_operand" "m")]
+	  UNSPEC_LD1_EXTENDQ))]
+  "TARGET_SVE2p1 && TARGET_NON_STREAMING"
+  "ld1<Vesize>\t{%0.q}, %2/z, %1"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- 128-bit structure loads
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - LD2Q (SVE2p1)
+;; - LD3Q (SVE2p1)
+;; - LD4Q (SVE2p1)
+;; -------------------------------------------------------------------------
+
+;; Predicated LD[234]Q.
+(define_insn "@aarch64_sve_ldnq<mode>"
+  [(set (match_operand:SVE_STRUCT 0 "register_operand" "=w")
+	(unspec:SVE_STRUCT
+	  [(match_operand:<VPRED> 2 "register_operand" "Upl")
+	   (match_operand:<VNxTI> 1 "memory_operand" "m")]
+	  UNSPEC_LDNQ))]
+  "TARGET_SVE2p1 && TARGET_NON_STREAMING"
+  "ld<vector_count>q\t{%S0.q - %<Vendreg>0.q}, %2/z, %1"
+)
+
+;; -------------------------------------------------------------------------
 ;; ---- Multi-register loads predicated by a counter
 ;; -------------------------------------------------------------------------
 ;; Includes:
@@ -138,9 +269,10 @@
   [(set (match_operand:SVE_FULLx24 0 "aligned_register_operand" "=Uw<vector_count>")
 	(unspec:SVE_FULLx24
 	  [(match_operand:VNx16BI 2 "register_operand" "Uph")
-	   (match_operand:SVE_FULLx24 1 "memory_operand" "m")]
+	   (match_operand:SVE_FULLx24 1 "memory_operand" "m")
+	   (match_operand:SVE_FULLx24 3 "aarch64_maskload_else_operand")]
 	  LD1_COUNT))]
-  "TARGET_STREAMING_SME2"
+  "TARGET_SVE2p1_OR_SME2"
   "<optab><Vesize>\t%0, %K2/z, %1"
   [(set_attr "stride_type" "ld1_consecutive")]
 )
@@ -196,6 +328,42 @@
 )
 
 ;; -------------------------------------------------------------------------
+;; ---- 128-bit gather loads
+;; -------------------------------------------------------------------------
+;; Includes gather forms of:
+;; - LD1Q (SVE2p1)
+;; -------------------------------------------------------------------------
+
+;; For little-endian targets, it would be enough to use a single pattern,
+;; with a subreg to bitcast the result to whatever mode is needed.
+;; However, on big-endian targets, the bitcast would need to be an
+;; aarch64_sve_reinterpret instruction.  That would interact badly
+;; with the "&" and "?" constraints in this pattern: if the result
+;; of the reinterpret needs to be in the same register as the index,
+;; the RA would tend to prefer to allocate a separate register for the
+;; intermediate (uncast) result, even if the reinterpret prefers tying.
+;;
+;; The index is logically VNx1DI rather than VNx2DI, but introducing
+;; and using VNx1DI would just create more bitcasting.  The ACLE intrinsic
+;; uses svuint64_t, which corresponds to VNx2DI.
+(define_insn "@aarch64_gather_ld1q<mode>"
+  [(set (match_operand:SVE_FULL 0 "register_operand")
+	(unspec:SVE_FULL
+	  [(match_operand:VNx2BI 1 "register_operand")
+	   (match_operand:DI 2 "aarch64_reg_or_zero")
+	   (match_operand:VNx2DI 3 "register_operand")
+	   (mem:BLK (scratch))]
+	  UNSPEC_LD1_GATHER))]
+  "TARGET_SVE2p1 && TARGET_NON_STREAMING"
+  {@ [cons: =0, 1, 2, 3]
+     [&w, Upl, Z, w] ld1q\t{%0.q}, %1/z, [%3.d]
+     [?w, Upl, Z, 0] ^
+     [&w, Upl, r, w] ld1q\t{%0.q}, %1/z, [%3.d, %2]
+     [?w, Upl, r, 0] ^
+  }
+)
+
+;; -------------------------------------------------------------------------
 ;; ---- Non-temporal gather loads
 ;; -------------------------------------------------------------------------
 ;; Includes gather forms of:
@@ -256,6 +424,48 @@
 ;; =========================================================================
 
 ;; -------------------------------------------------------------------------
+;; ---- 128-bit truncating stores
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ST1W (from .Q) (SVE2p1)
+;; - ST1D (from .Q) (SVE2p1)
+;; -------------------------------------------------------------------------
+
+;; See the comment above the corresponding loads for a discussion about the
+;; choice of modes.
+(define_insn "@aarch64_sve_st1_truncq<mode>"
+  [(set (match_operand:<LD1_EXTENDQ_MEM> 0 "memory_operand" "+m")
+	(unspec:<LD1_EXTENDQ_MEM>
+	  [(match_operand:<VPRED> 2 "register_operand" "Upl")
+	   (match_operand:SVE_FULL_SD 1 "register_operand" "w")
+	   (match_dup 0)]
+	  UNSPEC_ST1_TRUNCQ))]
+  "TARGET_SVE2p1 && TARGET_NON_STREAMING"
+  "st1<Vesize>\t{%1.q}, %2, %0"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- 128-bit structure stores
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ST2Q (SVE2p1)
+;; - ST3Q (SVE2p1)
+;; - ST4Q (SVE2p1)
+;; -------------------------------------------------------------------------
+
+;; Predicated ST[234].
+(define_insn "@aarch64_sve_stnq<mode>"
+  [(set (match_operand:<VNxTI> 0 "memory_operand" "+m")
+	(unspec:<VNxTI>
+	  [(match_operand:<VPRED> 2 "register_operand" "Upl")
+	   (match_operand:SVE_STRUCT 1 "register_operand" "w")
+	   (match_dup 0)]
+	  UNSPEC_STNQ))]
+  "TARGET_SVE2p1 && TARGET_NON_STREAMING"
+  "st<vector_count>q\t{%S1.q - %<Vendreg>1.q}, %2, %0"
+)
+
+;; -------------------------------------------------------------------------
 ;; ---- Multi-register stores predicated by a counter
 ;; -------------------------------------------------------------------------
 ;; Includes:
@@ -276,7 +486,7 @@
 	   (match_operand:SVE_FULLx24 1 "aligned_register_operand" "Uw<vector_count>")
 	   (match_dup 0)]
 	  ST1_COUNT))]
-  "TARGET_STREAMING_SME2"
+  "TARGET_SVE2p1_OR_SME2"
   "<optab><Vesize>\t%1, %K2, %0"
   [(set_attr "stride_type" "st1_consecutive")]
 )
@@ -312,6 +522,28 @@
 )
 
 ;; -------------------------------------------------------------------------
+;; ---- 128-bit scatter stores
+;; -------------------------------------------------------------------------
+;; Includes scatter form of:
+;; - ST1Q (SVE2p1)
+;; -------------------------------------------------------------------------
+
+(define_insn "aarch64_scatter_st1q"
+  [(set (mem:BLK (scratch))
+	(unspec:BLK
+	  [(match_operand:VNx2BI 0 "register_operand")
+	   (match_operand:DI 1 "aarch64_reg_or_zero")
+	   (match_operand:VNx2DI 2 "register_operand")
+	   (match_operand:VNx2DI 3 "register_operand")]
+	  UNSPEC_ST1Q_SCATTER))]
+  "TARGET_SVE2p1 && TARGET_NON_STREAMING"
+  {@ [ cons: 0 , 1 , 2 , 3  ]
+     [ Upl     , Z , w , w  ] st1q\t{%3.q}, %0, [%2.d]
+     [ Upl     , r , w , w  ] st1q\t{%3.q}, %0, [%2.d, %1]
+  }
+)
+
+;; -------------------------------------------------------------------------
 ;; ---- Non-temporal scatter stores
 ;; -------------------------------------------------------------------------
 ;; Includes scatter forms of:
@@ -370,7 +602,7 @@
 (define_insn "@aarch64_sve_ptrue_c<BHSD_BITS>"
   [(set (match_operand:VNx16BI 0 "register_operand" "=Uph")
 	(unspec:VNx16BI [(const_int BHSD_BITS)] UNSPEC_PTRUE_C))]
-  "TARGET_STREAMING_SME2"
+  "TARGET_SVE2p1_OR_SME2"
   "ptrue\t%K0.<bits_etype>"
 )
 
@@ -388,7 +620,7 @@
 	   (match_operand:DI 2 "const_int_operand")
 	   (const_int BHSD_BITS)]
 	  UNSPEC_PEXT))]
-  "TARGET_STREAMING_SME2"
+  "TARGET_SVE2p1_OR_SME2"
   "pext\t%0.<bits_etype>, %K1[%2]"
 )
 
@@ -399,7 +631,7 @@
 	   (match_operand:DI 2 "const_int_operand")
 	   (const_int BHSD_BITS)]
 	  UNSPEC_PEXTx2))]
-  "TARGET_STREAMING_SME2"
+  "TARGET_SVE2p1_OR_SME2"
   "pext\t{%S0.<bits_etype>, %T0.<bits_etype>}, %K1[%2]"
 )
 
@@ -418,7 +650,7 @@
 	   (match_operand:SI 3 "register_operand" "Ucj")
 	   (const_int BHSD_BITS)]
 	  UNSPEC_PSEL))]
-  "TARGET_STREAMING_SME2"
+  "TARGET_SVE2p1_OR_SME"
   "psel\t%0, %1, %2.<bits_etype>[%w3, 0]"
 )
 
@@ -432,7 +664,7 @@
 	     (match_operand:SI 4 "const_int_operand"))
 	   (const_int BHSD_BITS)]
 	  UNSPEC_PSEL))]
-  "TARGET_STREAMING_SME2
+  "TARGET_SVE2p1_OR_SME
    && UINTVAL (operands[4]) < 128 / <BHSD_BITS>"
   "psel\t%0, %1, %2.<bits_etype>[%w3, %4]"
 )
@@ -451,7 +683,7 @@
 	   (match_operand:DI 2 "const_int_operand")
 	   (const_int BHSD_BITS)]
 	  UNSPEC_CNTP_C))]
-  "TARGET_STREAMING_SME2"
+  "TARGET_SVE2p1_OR_SME2"
   "cntp\t%x0, %K1.<bits_etype>, vlx%2"
 )
 
@@ -560,9 +792,9 @@
 	    (match_operand:SVE_FULL_I 1 "register_operand")
 	    (match_operand:SVE_FULL_I 2 "register_operand"))
 	  (match_operand:SVE_FULL_I 3 "register_operand")))]
-  "TARGET_STREAMING_SME"
-  {@ [cons: =0,  1, 2, 3; attrs: movprfx]
-     [       w, %0, w, w; *             ] <su>clamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
+  "TARGET_SVE2p1_OR_SME"
+  {@ [cons: =0, %1, 2, 3; attrs: movprfx]
+     [       w,  0, w, w; *             ] <su>clamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
      [     ?&w,  w, w, w; yes           ] movprfx\t%0, %1\;<su>clamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
   }
 )
@@ -580,9 +812,9 @@
 	       UNSPEC_PRED_X)
 	     (match_operand:SVE_FULL_I 3 "register_operand"))]
 	  UNSPEC_PRED_X))]
-  "TARGET_STREAMING_SME"
-  {@ [cons: =0,  1, 2, 3; attrs: movprfx]
-     [       w, %0, w, w; *             ] #
+  "TARGET_SVE2p1_OR_SME"
+  {@ [cons: =0, %1, 2, 3; attrs: movprfx]
+     [       w,  0, w, w; *             ] #
      [     ?&w,  w, w, w; yes           ] #
   }
   "&& true"
@@ -1101,51 +1333,83 @@
 )
 
 ;; -------------------------------------------------------------------------
+;; ---- [FP] Non-widening bfloat16 arithmetic
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - BFADD (SVE_B16B16)
+;; - BFMAX (SVE_B16B16)
+;; - BFMAXNM (SVE_B16B16)
+;; - BFMIN (SVE_B16B16)
+;; - BFMINNM (SVE_B16B16)
+;; - BFMUL (SVE_B16B16)
+;; -------------------------------------------------------------------------
+
+;; Predicated B16B16 binary operations.
+(define_insn "@aarch64_pred_<optab><mode>"
+  [(set (match_operand:VNx8BF_ONLY 0 "register_operand")
+	(unspec:VNx8BF_ONLY
+	  [(match_operand:<VPRED> 1 "register_operand")
+	   (match_operand:SI 4 "aarch64_sve_gp_strictness")
+	   (match_operand:VNx8BF_ONLY 2 "register_operand")
+	   (match_operand:VNx8BF_ONLY 3 "register_operand")]
+	  SVE_COND_FP_BINARY_OPTAB))]
+  "TARGET_SSVE_B16B16 && <supports_bf16>"
+  {@ [ cons: =0 , 1   , 2 , 3 ; attrs: movprfx , is_rev ]
+     [ w        , Upl , 0 , w ; *    , *   ] <b><sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+     [ w        , Upl , w , 0 ; *   , true ] <b><sve_fp_op_rev>\t%0.<Vetype>, %1/m, %0.<Vetype>, %2.<Vetype>
+     [ ?&w      , Upl , w , w ; yes , *    ] movprfx\t%0, %2\;<b><sve_fp_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+  }
+  [(set_attr "is_bf16" "<is_bf16>")
+   (set_attr "supports_bf16_rev" "<supports_bf16_rev>")]
+)
+
+;; -------------------------------------------------------------------------
 ;; ---- [FP] Clamp to minimum/maximum
 ;; -------------------------------------------------------------------------
+;; - BFCLAMP (SVE_B16B16)
 ;; - FCLAMP
 ;; -------------------------------------------------------------------------
 
 ;; The minimum is applied after the maximum, which matters if the maximum
 ;; bound is (unexpectedly) less than the minimum bound.
 (define_insn "@aarch64_sve_fclamp<mode>"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-	(unspec:SVE_FULL_F
-	  [(unspec:SVE_FULL_F
-	     [(match_operand:SVE_FULL_F 1 "register_operand")
-	      (match_operand:SVE_FULL_F 2 "register_operand")]
+  [(set (match_operand:SVE_CLAMP_F 0 "register_operand")
+	(unspec:SVE_CLAMP_F
+	  [(unspec:SVE_CLAMP_F
+	     [(match_operand:SVE_CLAMP_F 1 "register_operand")
+	      (match_operand:SVE_CLAMP_F 2 "register_operand")]
 	     UNSPEC_FMAXNM)
-	   (match_operand:SVE_FULL_F 3 "register_operand")]
+	   (match_operand:SVE_CLAMP_F 3 "register_operand")]
 	  UNSPEC_FMINNM))]
-  "TARGET_STREAMING_SME"
-  {@ [cons: =0,  1, 2, 3; attrs: movprfx]
-     [       w, %0, w, w; *             ] fclamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
-     [     ?&w,  w, w, w; yes           ] movprfx\t%0, %1\;fclamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
+  ""
+  {@ [cons: =0, %1, 2, 3; attrs: movprfx]
+     [       w,  0, w, w; *             ] <b>fclamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
+     [     ?&w,  w, w, w; yes           ] movprfx\t%0, %1\;<b>fclamp\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>
   }
 )
 
 (define_insn_and_split "*aarch64_sve_fclamp<mode>_x"
-  [(set (match_operand:SVE_FULL_F 0 "register_operand")
-	(unspec:SVE_FULL_F
+  [(set (match_operand:SVE_CLAMP_F 0 "register_operand")
+	(unspec:SVE_CLAMP_F
 	  [(match_operand 4)
 	   (const_int SVE_RELAXED_GP)
-	   (unspec:SVE_FULL_F
+	   (unspec:SVE_CLAMP_F
 	     [(match_operand 5)
 	      (const_int SVE_RELAXED_GP)
-	      (match_operand:SVE_FULL_F 1 "register_operand")
-	      (match_operand:SVE_FULL_F 2 "register_operand")]
+	      (match_operand:SVE_CLAMP_F 1 "register_operand")
+	      (match_operand:SVE_CLAMP_F 2 "register_operand")]
 	     UNSPEC_COND_FMAXNM)
-	   (match_operand:SVE_FULL_F 3 "register_operand")]
+	   (match_operand:SVE_CLAMP_F 3 "register_operand")]
 	  UNSPEC_COND_FMINNM))]
-  "TARGET_STREAMING_SME"
-  {@ [cons: =0,  1, 2, 3; attrs: movprfx]
-     [       w, %0, w, w; *             ] #
+  ""
+  {@ [cons: =0, %1, 2, 3; attrs: movprfx]
+     [       w,  0, w, w; *             ] #
      [     ?&w,  w, w, w; yes           ] #
   }
   "&& true"
   [(set (match_dup 0)
-	(unspec:SVE_FULL_F
-	  [(unspec:SVE_FULL_F
+	(unspec:SVE_CLAMP_F
+	  [(unspec:SVE_CLAMP_F
 	     [(match_dup 1)
 	      (match_dup 2)]
 	     UNSPEC_FMAXNM)
@@ -1165,7 +1429,7 @@
 	     (match_operand:<VSINGLE> 3 "register_operand" "w"))]
 	  UNSPEC_FMINNM))]
   "TARGET_STREAMING_SME2"
-  "fclamp\t%0, %2.<Vetype>, %3.<Vetype>"
+  "<b>fclamp\t%0, %2.<Vetype>, %3.<Vetype>"
 )
 
 ;; =========================================================================
@@ -1266,18 +1530,28 @@
 ;; - XAR
 ;; -------------------------------------------------------------------------
 
+;; Also allow the Advanced SIMD modes as the the SVE2 XAR instruction
+;; can handle more element sizes than the TARGET_SHA3 one from Advanced SIMD.
+;; Don't allow the V2DImode use here unless !TARGET_SHA3 as the Advanced SIMD
+;; version should be preferred when available as it is non-destructive on its
+;; input.
 (define_insn "@aarch64_sve2_xar<mode>"
-  [(set (match_operand:SVE_FULL_I 0 "register_operand")
-	(rotatert:SVE_FULL_I
-	  (xor:SVE_FULL_I
-	    (match_operand:SVE_FULL_I 1 "register_operand")
-	    (match_operand:SVE_FULL_I 2 "register_operand"))
-	  (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm")))]
-  "TARGET_SVE2"
-  {@ [ cons: =0 , 1  , 2 ; attrs: movprfx ]
-     [ w        , %0 , w ; *              ] xar\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3
-     [ ?&w      , w  , w ; yes            ] movprfx\t%0, %1\;xar\t%0.<Vetype>, %0.<Vetype>, %2.<Vetype>, #%3
+  [(set (match_operand:SVE_ASIMD_FULL_I 0 "register_operand" "=w,?&w")
+	(rotate:SVE_ASIMD_FULL_I
+	  (xor:SVE_ASIMD_FULL_I
+	    (match_operand:SVE_ASIMD_FULL_I 1 "register_operand" "%0,w")
+	    (match_operand:SVE_ASIMD_FULL_I 2 "register_operand" "w,w"))
+	  (match_operand:SVE_ASIMD_FULL_I 3 "aarch64_simd_lshift_imm")))]
+  "TARGET_SVE2 && !(<MODE>mode == V2DImode && TARGET_SHA3)"
+  {
+    operands[3]
+      = GEN_INT (GET_MODE_UNIT_BITSIZE (<MODE>mode)
+		 - INTVAL (unwrap_const_vec_duplicate (operands[3])));
+    if (which_alternative == 0)
+      return "xar\t%Z0.<Vetype>, %Z0.<Vetype>, %Z2.<Vetype>, #%3";
+    return "movprfx\t%Z0, %Z1\;xar\t%Z0.<Vetype>, %Z0.<Vetype>, %Z2.<Vetype>, #%3";
   }
+  [(set_attr "movprfx" "*,yes")]
 )
 
 ;; -------------------------------------------------------------------------
@@ -1361,9 +1635,9 @@
 	       (match_operand:SVE_FULL_I 2 "register_operand")))]
 	  UNSPEC_PRED_X))]
   "TARGET_SVE2"
-  {@ [ cons: =0 , 1  , 2 ; attrs: movprfx ]
-     [ w        , %0 , w ; *              ] nbsl\t%0.d, %0.d, %2.d, %0.d
-     [ ?&w      , w  , w ; yes            ] movprfx\t%0, %1\;nbsl\t%0.d, %0.d, %2.d, %0.d
+  {@ [ cons: =0 , %1 , 2 ; attrs: movprfx ]
+     [ w        , 0  , w ; *              ] nbsl\t%0.d, %0.d, %2.d, %0.d
+     [ ?&w      , w  , w ; yes            ] movprfx\t%0, %1\;nbsl\t%0.d, %0.d, %2.d, %1.d
   }
   "&& !CONSTANT_P (operands[3])"
   {
@@ -1383,8 +1657,8 @@
 	       (match_operand:SVE_FULL_I 2 "register_operand")))]
 	  UNSPEC_PRED_X))]
   "TARGET_SVE2"
-  {@ [ cons: =0 , 1  , 2 ; attrs: movprfx ]
-     [ w        , %0 , w ; *              ] nbsl\t%0.d, %0.d, %2.d, %2.d
+  {@ [ cons: =0 , %1 , 2 ; attrs: movprfx ]
+     [ w        , 0  , w ; *              ] nbsl\t%0.d, %0.d, %2.d, %2.d
      [ ?&w      , w  , w ; yes            ] movprfx\t%0, %1\;nbsl\t%0.d, %0.d, %2.d, %2.d
   }
   "&& !CONSTANT_P (operands[3])"
@@ -1468,6 +1742,23 @@
   }
 )
 
+(define_insn "*aarch64_sve2_nbsl_unpred<mode>"
+  [(set (match_operand:VDQ_I 0 "register_operand")
+	(not:VDQ_I
+	  (xor:VDQ_I
+	    (and:VDQ_I
+	      (xor:VDQ_I
+		(match_operand:VDQ_I 1 "register_operand")
+		(match_operand:VDQ_I 2 "register_operand"))
+	      (match_operand:VDQ_I 3 "register_operand"))
+	    (match_dup BSL_DUP))))]
+  "TARGET_SVE2"
+  {@ [ cons: =0 , 1         , 2         , 3 ; attrs: movprfx ]
+     [ w        , <bsl_1st> , <bsl_2nd> , w ; *              ] nbsl\t%Z0.d, %Z0.d, %Z<bsl_dup>.d, %Z3.d
+     [ ?&w      , w         , w         , w ; yes            ] movprfx\t%Z0, %Z<bsl_mov>\;nbsl\t%Z0.d, %Z0.d, %Z<bsl_dup>.d, %Z3.d
+  }
+)
+
 ;; Unpredicated bitwise select with inverted first operand.
 ;; (op3 ? ~bsl_mov : bsl_dup) == ((~(bsl_mov ^ bsl_dup) & op3) ^ bsl_dup)
 (define_expand "@aarch64_sve2_bsl1n<mode>"
@@ -1513,6 +1804,23 @@
   }
 )
 
+(define_insn "*aarch64_sve2_bsl1n_unpred<mode>"
+  [(set (match_operand:VDQ_I 0 "register_operand")
+	(xor:VDQ_I
+	  (and:VDQ_I
+	    (not:VDQ_I
+	      (xor:VDQ_I
+		(match_operand:VDQ_I 1 "register_operand")
+		(match_operand:VDQ_I 2 "register_operand")))
+	    (match_operand:VDQ_I 3 "register_operand"))
+	  (match_dup BSL_DUP)))]
+  "TARGET_SVE2"
+  {@ [ cons: =0 , 1         , 2         , 3 ; attrs: movprfx ]
+     [ w        , <bsl_1st> , <bsl_2nd> , w ; *              ] bsl1n\t%Z0.d, %Z0.d, %Z<bsl_dup>.d, %Z3.d
+     [ ?&w      , w         , w         , w ; yes            ] movprfx\t%Z0, %Z<bsl_mov>\;bsl1n\t%Z0.d, %Z0.d, %Z<bsl_dup>.d, %Z3.d
+  }
+)
+
 ;; Unpredicated bitwise select with inverted second operand.
 ;; (bsl_dup ? bsl_mov : ~op3) == ((bsl_dup & bsl_mov) | (~op3 & ~bsl_dup))
 (define_expand "@aarch64_sve2_bsl2n<mode>"
@@ -1587,6 +1895,38 @@
   }
 )
 
+(define_insn "*aarch64_sve2_bsl2n_unpred<mode>"
+  [(set (match_operand:VDQ_I 0 "register_operand")
+	(ior:VDQ_I
+	  (and:VDQ_I
+	    (match_operand:VDQ_I 1 "register_operand")
+	    (match_operand:VDQ_I 2 "register_operand"))
+	  (and:VDQ_I
+	    (not:VDQ_I (match_operand:VDQ_I 3 "register_operand"))
+	    (not:VDQ_I (match_dup BSL_DUP)))))]
+  "TARGET_SVE2"
+  {@ [ cons: =0 , 1         , 2         , 3 ; attrs: movprfx ]
+     [ w        , <bsl_1st> , <bsl_2nd> , w ; *              ] bsl2n\t%Z0.d, %Z0.d, %Z3.d, %Z<bsl_dup>.d
+     [ ?&w      , w         , w         , w ; yes            ] movprfx\t%Z0, %Z<bsl_mov>\;bsl2n\t%Z0.d, %Z0.d, %Z3.d, %Z<bsl_dup>.d
+  }
+)
+
+(define_insn "*aarch64_sve2_bsl2n_unpred<mode>"
+  [(set (match_operand:VDQ_I 0 "register_operand")
+	(ior:VDQ_I
+	  (and:VDQ_I
+	    (match_operand:VDQ_I 1 "register_operand")
+	    (match_operand:VDQ_I 2 "register_operand"))
+	  (and:VDQ_I
+	    (not:VDQ_I (match_dup BSL_DUP))
+	    (not:VDQ_I (match_operand:VDQ_I 3 "register_operand")))))]
+  "TARGET_SVE2"
+  {@ [ cons: =0 , 1         , 2         , 3 ; attrs: movprfx ]
+     [ w        , <bsl_1st> , <bsl_2nd> , w ; *              ] bsl2n\t%Z0.d, %Z0.d, %Z3.d, %Z<bsl_dup>.d
+     [ ?&w      , w         , w         , w ; yes            ] movprfx\t%Z0, %Z<bsl_mov>\;bsl2n\t%Z0.d, %Z0.d, %Z3.d, %Z<bsl_dup>.d
+  }
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- [INT] Shift-and-accumulate operations
 ;; -------------------------------------------------------------------------
@@ -1601,40 +1941,27 @@
 (define_expand "@aarch64_sve_add_<sve_int_op><mode>"
   [(set (match_operand:SVE_FULL_I 0 "register_operand")
 	(plus:SVE_FULL_I
-	  (unspec:SVE_FULL_I
-	    [(match_dup 4)
-	     (SHIFTRT:SVE_FULL_I
-	       (match_operand:SVE_FULL_I 2 "register_operand")
-	       (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))]
-	    UNSPEC_PRED_X)
-	 (match_operand:SVE_FULL_I 1 "register_operand")))]
+	  (SHIFTRT:SVE_FULL_I
+	    (match_operand:SVE_FULL_I 2 "register_operand")
+	    (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))
+	  (match_operand:SVE_FULL_I 1 "register_operand")))]
   "TARGET_SVE2"
-  {
-    operands[4] = CONSTM1_RTX (<VPRED>mode);
-  }
 )
 
 ;; Pattern-match SSRA and USRA as a predicated operation whose predicate
 ;; isn't needed.
-(define_insn_and_rewrite "*aarch64_sve2_sra<mode>"
+(define_insn "*aarch64_sve2_sra<mode>"
   [(set (match_operand:SVE_FULL_I 0 "register_operand")
 	(plus:SVE_FULL_I
-	  (unspec:SVE_FULL_I
-	    [(match_operand 4)
-	     (SHIFTRT:SVE_FULL_I
-	       (match_operand:SVE_FULL_I 2 "register_operand")
-	       (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))]
-	    UNSPEC_PRED_X)
+	  (SHIFTRT:SVE_FULL_I
+	    (match_operand:SVE_FULL_I 2 "register_operand")
+	    (match_operand:SVE_FULL_I 3 "aarch64_simd_rshift_imm"))
 	 (match_operand:SVE_FULL_I 1 "register_operand")))]
   "TARGET_SVE2"
   {@ [ cons: =0 , 1 , 2 ; attrs: movprfx ]
      [ w        , 0 , w ; *              ] <sra_op>sra\t%0.<Vetype>, %2.<Vetype>, #%3
      [ ?&w      , w , w ; yes            ] movprfx\t%0, %1\;<sra_op>sra\t%0.<Vetype>, %2.<Vetype>, #%3
   }
-  "&& !CONSTANT_P (operands[4])"
-  {
-    operands[4] = CONSTM1_RTX (<VPRED>mode);
-  }
 )
 
 ;; SRSRA and URSRA.
@@ -1731,6 +2058,126 @@
   }
 )
 
+;; -------------------------------------------------------------------------
+;; ---- [FP] Mfloat8 Multiply-and-accumulate operations
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FMLALB (vectors, FP8 to FP16) (FP8FMA)
+;; - FMLALT (vectors, FP8 to FP16) (FP8FMA)
+;; - FMLALB (indexed, FP8 to FP16) (FP8FMA)
+;; - FMLALT (indexed, FP8 to FP16) (FP8FMA)
+;; - FMLALLBB (vectors) (FP8FMA)
+;; - FMLALLBB (indexed) (FP8FMA)
+;; - FMLALLBT (vectors) (FP8FMA)
+;; - FMLALLBT (indexed) (FP8FMA)
+;; - FMLALLTB (vectors) (FP8FMA)
+;; - FMLALLTB (indexed) (FP8FMA)
+;; - FMLALLTT (vectors) (FP8FMA)
+;; - FMLALLTT (indexed) (FP8FMA)
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_add_<insn><mode>"
+  [(set (match_operand:VNx8HF_ONLY 0 "register_operand")
+	(unspec:VNx8HF_ONLY
+	  [(match_operand:VNx8HF 1 "register_operand")
+	   (match_operand:VNx16QI 2 "register_operand")
+	   (match_operand:VNx16QI 3 "register_operand")
+	   (reg:DI FPM_REGNUM)]
+	  FMLAL_FP8_HF))]
+  "TARGET_SSVE_FP8FMA"
+  {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
+     [ w        , 0 , w , w ; *              ] <insn>\t%0.h, %2.b, %3.b
+     [ ?&w      , w , w , w ; yes            ] movprfx\t%0, %1\;<insn>\t%0.h, %2.b, %3.b
+  }
+)
+
+(define_insn "@aarch64_sve_add_<insn><mode>"
+  [(set (match_operand:VNx4SF_ONLY 0 "register_operand")
+	(unspec:VNx4SF_ONLY
+	  [(match_operand:VNx4SF 1 "register_operand")
+	   (match_operand:VNx16QI 2 "register_operand")
+	   (match_operand:VNx16QI 3 "register_operand")
+	   (reg:DI FPM_REGNUM)]
+	  FMLALL_FP8_SF))]
+  "TARGET_SSVE_FP8FMA"
+  {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
+     [ w        , 0 , w , w ; *              ] <insn>\t%0.s, %2.b, %3.b
+     [ ?&w      , w , w , w ; yes            ] movprfx\t%0, %1\;<insn>\t%0.s, %2.b, %3.b
+  }
+)
+
+(define_insn "@aarch64_sve_add_lane_<insn><mode>"
+  [(set (match_operand:VNx8HF_ONLY 0 "register_operand")
+	(unspec:VNx8HF_ONLY
+	  [(match_operand:VNx8HF 1 "register_operand")
+	   (match_operand:VNx16QI 2 "register_operand")
+	   (match_operand:VNx16QI 3 "register_operand")
+	   (match_operand:SI 4 "const_int_operand")
+	   (reg:DI FPM_REGNUM)]
+	  FMLAL_FP8_HF))]
+  "TARGET_SSVE_FP8FMA"
+  {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
+     [ w        , 0 , w , y ; *              ] <insn>\t%0.h, %2.b, %3.b[%4]
+     [ ?&w      , w , w , y ; yes            ] movprfx\t%0, %1\;<insn>\t%0.h, %2.b, %3.b[%4]
+  }
+)
+
+(define_insn "@aarch64_sve_add_lane_<insn><mode>"
+  [(set (match_operand:VNx4SF_ONLY 0 "register_operand")
+	(unspec:VNx4SF_ONLY
+	  [(match_operand:VNx4SF 1 "register_operand")
+	   (match_operand:VNx16QI 2 "register_operand")
+	   (match_operand:VNx16QI 3 "register_operand")
+	   (match_operand:SI 4 "const_int_operand")
+	   (reg:DI FPM_REGNUM)]
+	  FMLALL_FP8_SF))]
+  "TARGET_SSVE_FP8FMA"
+  {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
+     [ w        , 0 , w , y ; *              ] <insn>\t%0.s, %2.b, %3.b[%4]
+     [ ?&w      , w , w , y ; yes            ] movprfx\t%0, %1\;<insn>\t%0.s, %2.b, %3.b[%4]
+  }
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Mfloat8 dot products
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FDOT (4-way, vectors) (FP8DOT4)
+;; - FDOT (4-way, indexed) (FP8DOT4)
+;; - FDOT (2-way, vectors) (FP8DOT2)
+;; - FDOT (2-way, indexed) (FP8DOT2)
+;; -------------------------------------------------------------------------
+(define_insn "@aarch64_sve_dot<mode>"
+  [(set (match_operand:SVE_FULL_HSF 0 "register_operand")
+	(unspec:SVE_FULL_HSF
+	  [(match_operand:SVE_FULL_HSF 1 "register_operand")
+	   (match_operand:VNx16QI 2 "register_operand")
+	   (match_operand:VNx16QI 3 "register_operand")
+	   (reg:DI FPM_REGNUM)]
+	  UNSPEC_DOT_FP8))]
+  "TARGET_SSVE_FP8DOT4 && !(<MODE>mode == VNx8HFmode && !TARGET_SSVE_FP8DOT2)"
+  {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
+     [ w        , 0 , w , w ; *              ] fdot\t%0.<Vetype>, %2.b, %3.b
+     [ ?&w      , w , w , w ; yes            ] movprfx\t%0, %1\;fdot\t%0.<Vetype>, %2.b, %3.b
+  }
+)
+
+(define_insn "@aarch64_sve_dot_lane<mode>"
+  [(set (match_operand:SVE_FULL_HSF 0 "register_operand")
+	(unspec:SVE_FULL_HSF
+	  [(match_operand:SVE_FULL_HSF 1 "register_operand")
+	   (match_operand:VNx16QI 2 "register_operand")
+	   (match_operand:VNx16QI 3 "register_operand")
+	   (match_operand:SI 4 "const_int_operand")
+	   (reg:DI FPM_REGNUM)]
+	  UNSPEC_DOT_LANE_FP8))]
+  "TARGET_SSVE_FP8DOT4 && !(<MODE>mode == VNx8HFmode && !TARGET_SSVE_FP8DOT2)"
+  {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
+     [ w        , 0 , w , y ; *              ] fdot\t%0.<Vetype>, %2.b, %3.b[%4]
+     [ ?&w      , w , w , y ; yes            ] movprfx\t%0, %1\;fdot\t%0.<Vetype>, %2.b, %3.b[%4]
+  }
+)
+
 ;; =========================================================================
 ;; == Extending arithmetic
 ;; =========================================================================
@@ -2021,7 +2468,7 @@
 )
 
 ;; Two-way dot-product.
-(define_insn "@aarch64_sve_<sur>dotvnx4sivnx8hi"
+(define_insn "<sur>dot_prodvnx4sivnx8hi"
   [(set (match_operand:VNx4SI 0 "register_operand")
 	(plus:VNx4SI
 	  (unspec:VNx4SI
@@ -2029,7 +2476,7 @@
 	     (match_operand:VNx8HI 2 "register_operand")]
 	    DOTPROD)
 	  (match_operand:VNx4SI 3 "register_operand")))]
-  "TARGET_STREAMING_SME2"
+  "TARGET_SVE2p1_OR_SME2"
   {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
      [ w        , w , w , 0 ; *              ] <sur>dot\t%0.s, %1.h, %2.h
      [ ?&w      , w , w , w ; yes            ] movprfx\t%0, %3\;<sur>dot\t%0.s, %1.h, %2.h
@@ -2062,7 +2509,7 @@
 	   (match_operand:SVE_Fx24 2 "aligned_register_operand" "Uw<vector_count>")]
 	  SVE_FP_BINARY_MULTI))]
   "TARGET_STREAMING_SME2"
-  "<maxmin_uns_op>\t%0, %0, %2"
+  "<b><maxmin_uns_op>\t%0, %0, %2"
 )
 
 (define_insn "@aarch64_sve_single_<maxmin_uns_op><mode>"
@@ -2073,7 +2520,7 @@
 	     (match_operand:<VSINGLE> 2 "register_operand" "x"))]
 	  SVE_FP_BINARY_MULTI))]
   "TARGET_STREAMING_SME2"
-  "<maxmin_uns_op>\t%0, %0, %2.<Vetype>"
+  "<b><maxmin_uns_op>\t%0, %0, %2.<Vetype>"
 )
 
 ;; -------------------------------------------------------------------------
@@ -2127,7 +2574,7 @@
 	     (match_operand:VNx8HF 2 "register_operand")]
 	    UNSPEC_FDOT)
 	  (match_operand:VNx4SF 3 "register_operand")))]
-  "TARGET_STREAMING_SME2"
+  "TARGET_SVE2p1_OR_SME2"
   {@ [ cons: =0 , 1 , 2 , 3 ; attrs: movprfx ]
      [ w        , w , w , 0 ; *              ] fdot\t%0.s, %1.h, %2.h
      [ ?&w      , w , w , w ; yes            ] movprfx\t%0, %3\;fdot\t%0.s, %1.h, %2.h
@@ -2145,7 +2592,7 @@
 	       UNSPEC_SVE_LANE_SELECT)]
 	    UNSPEC_FDOT)
 	  (match_operand:VNx4SF 4 "register_operand")))]
-  "TARGET_STREAMING_SME2"
+  "TARGET_SVE2p1_OR_SME2"
   {@ [ cons: =0 , 1 , 2 , 4 ; attrs: movprfx ]
      [ w        , w , y , 0 ; *              ] fdot\t%0.s, %1.h, %2.h[%3]
      [ ?&w      , w , y , w ; yes            ] movprfx\t%0, %4\;fdot\t%0.s, %1.h, %2.h[%3]
@@ -2192,10 +2639,10 @@
 ;; ---- [INT] Multi-vector narrowing unary arithmetic
 ;; -------------------------------------------------------------------------
 ;; Includes:
-;; - SQCVT
-;; - SQCVTN
-;; - UQCVT
-;; - UQCVTN
+;; - SQCVT (SME2)
+;; - SQCVTN (SME2)
+;; - UQCVT (SME2)
+;; - UQCVTN (SME2)
 ;; -------------------------------------------------------------------------
 
 (define_insn "@aarch64_sve_<optab><VNx16QI_ONLY:mode><VNx16SI_ONLY:mode>"
@@ -2203,7 +2650,7 @@
 	(unspec:VNx16QI_ONLY
 	  [(match_operand:VNx16SI_ONLY 1 "aligned_register_operand" "Uw<vector_count>")]
 	  SVE_QCVTxN))]
-  "TARGET_SME2 && TARGET_STREAMING"
+  "TARGET_STREAMING_SME2"
   "<optab>\t%0.b, %1"
 )
 
@@ -2212,7 +2659,7 @@
 	(unspec:VNx8HI_ONLY
 	  [(match_operand:VNx8SI_ONLY 1 "aligned_register_operand" "Uw<vector_count>")]
 	  SVE_QCVTxN))]
-  "TARGET_SME2 && TARGET_STREAMING"
+  ""
   "<optab>\t%0.h, %1"
 )
 
@@ -2221,7 +2668,7 @@
 	(unspec:VNx8HI_ONLY
 	  [(match_operand:VNx8DI_ONLY 1 "aligned_register_operand" "Uw<vector_count>")]
 	  SVE_QCVTxN))]
-  "TARGET_SME2 && TARGET_STREAMING"
+  "TARGET_STREAMING_SME2"
   "<optab>\t%0.h, %1"
 )
 
@@ -2264,17 +2711,14 @@
 ;; Optimize ((a + b) >> n) where n is half the bitsize of the vector
 (define_insn "*bitmask_shift_plus<mode>"
   [(set (match_operand:SVE_FULL_HSDI 0 "register_operand" "=w")
-	(unspec:SVE_FULL_HSDI
-	   [(match_operand:<VPRED> 1)
-	    (lshiftrt:SVE_FULL_HSDI
-	      (plus:SVE_FULL_HSDI
-		(match_operand:SVE_FULL_HSDI 2 "register_operand" "w")
-		(match_operand:SVE_FULL_HSDI 3 "register_operand" "w"))
-	      (match_operand:SVE_FULL_HSDI 4
-		 "aarch64_simd_shift_imm_vec_exact_top" ""))]
-          UNSPEC_PRED_X))]
+	(lshiftrt:SVE_FULL_HSDI
+	  (plus:SVE_FULL_HSDI
+	    (match_operand:SVE_FULL_HSDI 1 "register_operand" "w")
+	    (match_operand:SVE_FULL_HSDI 2 "register_operand" "w"))
+	  (match_operand:SVE_FULL_HSDI 3
+	    "aarch64_simd_shift_imm_vec_exact_top" "")))]
   "TARGET_SVE2"
-  "addhnb\t%0.<Ventype>, %2.<Vetype>, %3.<Vetype>"
+  "addhnb\t%0.<Ventype>, %1.<Vetype>, %2.<Vetype>"
 )
 
 ;; -------------------------------------------------------------------------
@@ -2326,6 +2770,14 @@
 ;; -------------------------------------------------------------------------
 ;; ---- [INT] Multi-vector narrowing right shifts
 ;; -------------------------------------------------------------------------
+;; Includes:
+;; - SQRSHR (SME2)
+;; - SQRSHRN (SVE2p1, SME2)
+;; - SQRSHRU (SME2)
+;; - SQRSHRUN (SVE2p1, SME2)
+;; - UQRSHR (SME2)
+;; - UQRSHRN (SVE2p1, SME2)
+;; -------------------------------------------------------------------------
 
 (define_insn "@aarch64_sve_<sve_int_op><mode>"
   [(set (match_operand:<VNARROW> 0 "register_operand" "=w")
@@ -2333,7 +2785,7 @@
 	  [(match_operand:SVE_FULL_SIx2_SDIx4 1 "register_operand" "Uw<vector_count>")
 	   (match_operand:DI 2 "const_int_operand")]
 	  SVE2_INT_SHIFT_IMM_NARROWxN))]
-  "TARGET_STREAMING_SME2"
+  "(<MODE>mode == VNx8SImode || TARGET_STREAMING_SME2)"
   "<sve_int_op>\t%0.<Ventype>, %1, #%2"
 )
 
@@ -2467,6 +2919,43 @@
   [(set_attr "movprfx" "yes")]
 )
 
+;; -------------------------------------------------------------------------
+;; -- [FP] Absolute maximum and minimum
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FAMAX
+;; - FAMIN
+;; -------------------------------------------------------------------------
+;; Predicated floating-point absolute maximum and minimum.
+(define_insn_and_rewrite "*aarch64_pred_faminmax_fused"
+  [(set (match_operand:SVE_FULL_F 0 "register_operand")
+	(unspec:SVE_FULL_F
+	  [(match_operand:<VPRED> 1 "register_operand")
+	   (match_operand:SI 4 "aarch64_sve_gp_strictness")
+	   (unspec:SVE_FULL_F
+	     [(match_operand 5)
+	      (const_int SVE_RELAXED_GP)
+	      (match_operand:SVE_FULL_F 2 "register_operand")]
+	     UNSPEC_COND_FABS)
+	   (unspec:SVE_FULL_F
+	     [(match_operand 6)
+	      (const_int SVE_RELAXED_GP)
+	      (match_operand:SVE_FULL_F 3 "register_operand")]
+	     UNSPEC_COND_FABS)]
+	  SVE_COND_SMAXMIN))]
+  "TARGET_FAMINMAX && TARGET_SVE2_OR_SME2"
+  {@ [ cons: =0 , 1   , %2 , 3 ; attrs: movprfx ]
+     [ w        , Upl , 0  , w ; *              ] <faminmax_cond_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+     [ ?&w      , Upl , w  , w ; yes            ] movprfx\t%0, %2\;<faminmax_cond_uns_op>\t%0.<Vetype>, %1/m, %0.<Vetype>, %3.<Vetype>
+  }
+  "&& (!rtx_equal_p (operands[1], operands[5])
+       || !rtx_equal_p (operands[1], operands[6]))"
+  {
+    operands[5] = copy_rtx (operands[1]);
+    operands[6] = copy_rtx (operands[1]);
+  }
+)
+
 ;; =========================================================================
 ;; == Complex arithmetic
 ;; =========================================================================
@@ -2629,6 +3118,14 @@
 ;; ---- [FP<-FP] Widening conversions
 ;; -------------------------------------------------------------------------
 ;; Includes:
+;; - BF1CVT (FP8)
+;; - BF1CVTLT (FP8)
+;; - BF2CVT (FP8)
+;; - BF2CVTLT (FP8)
+;; - F1CVT (FP8)
+;; - F1CVTLT (FP8)
+;; - F2CVT (FP8)
+;; - F2CVTLT (FP8)
 ;; - FCVTLT
 ;; -------------------------------------------------------------------------
 
@@ -2694,6 +3191,16 @@
   "<sve_fp_op>\t%0.<Vetype>, %1/m, %2.<Ventype>"
 )
 
+(define_insn "@aarch64_sve2_fp8_cvt_<fp8_cvt_uns_op><mode>"
+  [(set (match_operand:SVE_FULL_HF 0 "register_operand" "=w")
+	(unspec:SVE_FULL_HF
+	  [(match_operand:VNx16QI 1 "register_operand" "w")
+	  (reg:DI FPM_REGNUM)]
+	  FP8CVT_UNS))]
+  "TARGET_SSVE_FP8"
+  "<b><fp8_cvt_uns_op>\t%0.h, %1.b"
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- [FP<-FP] Narrowing conversions
 ;; -------------------------------------------------------------------------
@@ -2811,13 +3318,40 @@
 )
 
 ;; -------------------------------------------------------------------------
+;; ---- [FP<-FP] Multi-vector widening conversions
+;; -------------------------------------------------------------------------
+;; Includes the multi-register forms of:
+;; - FCVT (SME_F16F16)
+;; - FCVTL (SME_F16F16)
+;; -------------------------------------------------------------------------
+
+(define_insn "extendvnx8hfvnx8sf2"
+  [(set (match_operand:VNx8SF 0 "aligned_register_operand" "=Uw2")
+	(float_extend:VNx8SF
+	  (match_operand:VNx8HF 1 "register_operand" "w")))]
+  "TARGET_STREAMING_SME_F16F16"
+  "fcvt\t%0, %1.h"
+)
+
+(define_insn "@aarch64_sve_cvtl<mode>"
+  [(set (match_operand:VNx8SF_ONLY 0 "aligned_register_operand" "=Uw2")
+	(unspec:VNx8SF_ONLY
+	  [(match_operand:VNx8HF 1 "register_operand" "w")]
+	  UNSPEC_FCVTL))]
+  "TARGET_STREAMING_SME_F16F16"
+  "fcvtl\t%0, %1.h"
+)
+
+;; -------------------------------------------------------------------------
 ;; ---- [FP<-FP] Multi-vector narrowing conversions
 ;; -------------------------------------------------------------------------
 ;; Includes the multi-register forms of:
-;; - BFCVT
-;; - BFCVTN
-;; - FCVT
-;; - FCVTN
+;; - BFCVT (SME2)
+;; - BFCVTN (SME2)
+;; - FCVT (SME2)
+;; - FCVTN (SME2)
+;; - FCVTNB (FP8)
+;; - FCVTNT (FP8)
 ;; -------------------------------------------------------------------------
 
 (define_insn "truncvnx8sf<mode>2"
@@ -2837,9 +3371,44 @@
   "<b>fcvtn\t%0.h, %1"
 )
 
+(define_insn "@aarch64_sve2_fp8_cvtn<mode>"
+  [(set (match_operand:VNx16QI 0 "register_operand" "=w")
+	(unspec:VNx16QI
+	  [(match_operand:SVE_FULL_HFx2 1 "aligned_register_operand" "Uw2")
+	   (reg:DI FPM_REGNUM)]
+	  UNSPEC_FP8FCVTN))]
+  "TARGET_SSVE_FP8"
+  "<b>fcvtn\t%0.b, %1"
+)
+
+(define_insn "@aarch64_sve2_fp8_cvtnb<mode>"
+  [(set (match_operand:VNx16QI_ONLY 0 "register_operand" "=w")
+	(unspec:VNx16QI_ONLY
+	  [(match_operand:VNx8SF 1 "aligned_register_operand" "Uw2")
+	   (reg:DI FPM_REGNUM)]
+	  UNSPEC_FCVTNB))]
+  "TARGET_SSVE_FP8"
+  "fcvtnb\t%0.b, %1"
+)
+
+(define_insn "@aarch64_sve_cvtnt<mode>"
+  [(set (match_operand:VNx16QI_ONLY 0 "register_operand" "=w")
+	(unspec:VNx16QI_ONLY
+	  [(match_operand:VNx16QI_ONLY 1 "register_operand" "0")
+	   (match_operand:VNx8SF 2 "aligned_register_operand" "Uw2")
+	   (reg:DI FPM_REGNUM)]
+	  UNSPEC_FCVTNT))]
+  "TARGET_SSVE_FP8"
+  "fcvtnt\t%0.b, %2"
+)
+
 ;; -------------------------------------------------------------------------
 ;; ---- [FP<-INT] Multi-vector conversions
 ;; -------------------------------------------------------------------------
+;; Includes the multi-register forms of:
+;; - SCVTF (SME2)
+;; - UCVTF (SME2)
+;; -------------------------------------------------------------------------
 
 (define_insn "<optab><v_int_equiv><mode>2"
   [(set (match_operand:SVE_SFx24 0 "aligned_register_operand" "=Uw<vector_count>")
@@ -2852,6 +3421,10 @@
 ;; -------------------------------------------------------------------------
 ;; ---- [INT<-FP] Multi-vector conversions
 ;; -------------------------------------------------------------------------
+;; Includes the multi-register forms of:
+;; - FCVTZS (SME2)
+;; - FCVTZU (SME2)
+;; -------------------------------------------------------------------------
 
 (define_insn "<optab><mode><v_int_equiv>2"
   [(set (match_operand:<V_INT_EQUIV> 0 "aligned_register_operand" "=Uw<vector_count>")
@@ -3098,7 +3671,7 @@
 	   (const_int BHSD_BITS)]
 	  SVE_WHILE_ORDER))
    (clobber (reg:CC_NZC CC_REGNUM))]
-  "TARGET_STREAMING_SME2"
+  "TARGET_SVE2p1_OR_SME2"
   "while<cmp_op>\t{%S0.<bits_etype>, %T0.<bits_etype>}, %x1, %x2"
 )
 
@@ -3112,11 +3685,60 @@
 	   (match_operand:DI 3 "const_int_operand")]
 	  SVE_WHILE_ORDER))
    (clobber (reg:CC_NZC CC_REGNUM))]
-  "TARGET_STREAMING_SME2"
+  "TARGET_SVE2p1_OR_SME2"
   "while<cmp_op>\t%K0.<bits_etype>, %x1, %x2, vlx%3"
 )
 
 ;; =========================================================================
+;; == Reductions
+;; =========================================================================
+
+;; -------------------------------------------------------------------------
+;; ---- [INT] Reduction to 128-bit vector
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - ADDQV (SVE2p1)
+;; - ANDQV (SVE2p1)
+;; - EORQV (SVE2p1)
+;; - ORQV (SVE2p1)
+;; - SMAXQV (SVE2p1)
+;; - SMINQV (SVE2p1)
+;; - UMAXQV (SVE2p1)
+;; - UMINQV (SVE2p1)
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_pred_reduc_<optab>_<mode>"
+  [(set (match_operand:<V128> 0 "register_operand" "=w")
+	(unspec:<V128>
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
+	   (match_operand:SVE_FULL_I 2 "register_operand" "w")]
+	  SVE_INT_REDUCTION_128))]
+  "TARGET_SVE2p1 && TARGET_NON_STREAMING"
+  "<optab>\t%0.<Vtype>, %1, %2.<Vetype>"
+)
+
+;; -------------------------------------------------------------------------
+;; ---- [FP] Reduction to 128-bit vector
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - FADDQV (SVE2p1)
+;; - FMAXNMQV (SVE2p1)
+;; - FMAXQV (SVE2p1)
+;; - FMINNMQV (SVE2p1)
+;; - FMINQV (SVE2p1)
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_pred_reduc_<optab>_<mode>"
+  [(set (match_operand:<V128> 0 "register_operand" "=w")
+	(unspec:<V128>
+	  [(match_operand:<VPRED> 1 "register_operand" "Upl")
+	   (match_operand:SVE_FULL_F 2 "register_operand" "w")]
+	  SVE_FP_REDUCTION_128))]
+  "TARGET_SVE2p1 && TARGET_NON_STREAMING"
+  "<optab>\t%0.<Vtype>, %1, %2.<Vetype>"
+)
+
+;; =========================================================================
 ;; == Permutation
 ;; =========================================================================
 
@@ -3135,7 +3757,7 @@
 	     [(match_operand:SVE_FULL 2 "register_operand")]
 	     UNSPEC_REVD_ONLY)]
 	  UNSPEC_PRED_X))]
-  "TARGET_STREAMING_SME"
+  "TARGET_SVE2p1_OR_SME"
   {@ [ cons: =0 , 1   , 2 ; attrs: movprfx ]
      [ w        , Upl , 0 ; *              ] revd\t%0.q, %1/m, %2.q
      [ ?&w      , Upl , w ; yes            ] movprfx\t%0, %2\;revd\t%0.q, %1/m, %2.q
@@ -3151,7 +3773,7 @@
 	     UNSPEC_REVD_ONLY)
 	   (match_operand:SVE_FULL 3 "register_operand")]
 	  UNSPEC_SEL))]
-  "TARGET_STREAMING_SME"
+  "TARGET_SVE2p1_OR_SME"
   {@ [ cons: =0 , 1   , 2 , 3  ; attrs: movprfx ]
      [ w        , Upl , w , 0  ; *              ] revd\t%0.q, %1/m, %2.q
      [ ?&w      , Upl , w , w  ; yes            ] movprfx\t%0, %3\;revd\t%0.q, %1/m, %2.q
@@ -3159,11 +3781,51 @@
 )
 
 ;; -------------------------------------------------------------------------
+;; ---- [INT,FP] HVLA permutes
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - DUPQ (SVE2p1)
+;; - EXTQ (SVE2p1)
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_dupq<mode>"
+  [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
+	(unspec:SVE_FULL
+	  [(match_operand:SVE_FULL 1 "register_operand" "w")
+	   (match_operand:SI 2 "const_int_operand")]
+	  UNSPEC_DUPQ))]
+  "TARGET_SVE2p1
+   && TARGET_NON_STREAMING
+   && IN_RANGE (INTVAL (operands[2]) * (<elem_bits> / 8), 0, 15)"
+  "dupq\t%0.<Vetype>, %1.<Vetype>[%2]"
+)
+
+(define_insn "@aarch64_sve_extq<mode>"
+  [(set (match_operand:SVE_FULL 0 "register_operand" "=w, ?&w")
+	(unspec:SVE_FULL
+	  [(match_operand:SVE_FULL 1 "register_operand" "0, w")
+	   (match_operand:SVE_FULL 2 "register_operand" "w, w")
+	   (match_operand:SI 3 "const_int_operand")]
+	  UNSPEC_EXTQ))]
+  "TARGET_SVE2p1
+   && TARGET_NON_STREAMING
+   && IN_RANGE (INTVAL (operands[3]) * (<elem_bits> / 8), 0, 15)"
+  {
+    operands[3] = GEN_INT (INTVAL (operands[3]) * (<elem_bits> / 8));
+    return (which_alternative == 0
+	    ? "extq\\t%0.b, %0.b, %2.b, #%3"
+	    : "movprfx\t%0, %1\;extq\\t%0.b, %0.b, %2.b, #%3");
+  }
+  [(set_attr "movprfx" "*,yes")]
+)
+
+;; -------------------------------------------------------------------------
 ;; ---- [INT,FP] General permutes
 ;; -------------------------------------------------------------------------
 ;; Includes:
 ;; - TBL (vector pair form)
 ;; - TBX
+;; - TBXQ (SVE2p1)
 ;; -------------------------------------------------------------------------
 
 ;; TBL on a pair of data vectors.
@@ -3177,16 +3839,16 @@
   "tbl\t%0.<Vetype>, %1, %2.<Vetype>"
 )
 
-;; TBX.  These instructions do not take MOVPRFX.
-(define_insn "@aarch64_sve2_tbx<mode>"
+;; TBX(Q).  These instructions do not take MOVPRFX.
+(define_insn "@aarch64_sve_<perm_insn><mode>"
   [(set (match_operand:SVE_FULL 0 "register_operand" "=w")
 	(unspec:SVE_FULL
 	  [(match_operand:SVE_FULL 1 "register_operand" "0")
 	   (match_operand:SVE_FULL 2 "register_operand" "w")
 	   (match_operand:<V_INT_EQUIV> 3 "register_operand" "w")]
-	  UNSPEC_TBX))]
+	  SVE_TBX))]
   "TARGET_SVE2"
-  "tbx\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
+  "<perm_insn>\t%0.<Vetype>, %2.<Vetype>, %3.<Vetype>"
 )
 
 ;; -------------------------------------------------------------------------
@@ -3421,6 +4083,38 @@
   }
 )
 
+;; -------------------------------------------------------------------------
+;; ---- Table lookup
+;; -------------------------------------------------------------------------
+;; Includes:
+;; - LUTI2
+;; - LUTI4
+;; -------------------------------------------------------------------------
+
+(define_insn "@aarch64_sve_luti<LUTI_BITS><mode>"
+  [(set (match_operand:SVE_FULL_BH 0 "register_operand" "=w")
+	(unspec:SVE_FULL_BH
+	 [(match_operand:SVE_FULL_BH 1 "register_operand" "w")
+	  (match_operand:VNx16QI 2 "register_operand" "w")
+	  (match_operand:DI 3 "const_int_operand")
+	  (const_int LUTI_BITS)]
+	 UNSPEC_SVE_LUTI))]
+  "TARGET_LUT && TARGET_SVE2_OR_SME2"
+  "luti<LUTI_BITS>\t%0.<Vetype>, { %1.<Vetype> }, %2[%3]"
+)
+
+(define_insn "@aarch64_sve_luti<LUTI_BITS><mode>"
+  [(set (match_operand:<VSINGLE> 0 "register_operand" "=w")
+	(unspec:<VSINGLE>
+	 [(match_operand:SVE_FULL_Hx2 1 "register_operand" "w")
+	  (match_operand:VNx16QI 2 "register_operand" "w")
+	  (match_operand:DI 3 "const_int_operand")
+	  (const_int LUTI_BITS)]
+	  UNSPEC_SVE_LUTI))]
+  "TARGET_LUT && TARGET_SVE2_OR_SME2"
+  "luti<LUTI_BITS>\t%0.<Vetype>, %1, %2[%3]"
+)
+
 ;; =========================================================================
 ;; == Cryptographic extensions
 ;; =========================================================================
diff --git a/gcc/config/aarch64/aarch64-sys-regs.def b/gcc/config/aarch64/aarch64-sys-regs.def
index 8b65673..d7ef6da 100644
--- a/gcc/config/aarch64/aarch64-sys-regs.def
+++ b/gcc/config/aarch64/aarch64-sys-regs.def
@@ -1,5 +1,5 @@
 /* aarch64-system-regs.def -- AArch64 opcode support.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of the GNU opcodes library.
@@ -572,12 +572,12 @@
   SYSREG ("mdrar_el1",		CPENC (2,0,1,0,0),	F_REG_READ,		AARCH64_NO_FEATURES)
   SYSREG ("mdscr_el1",		CPENC (2,0,0,2,2),	0,			AARCH64_NO_FEATURES)
   SYSREG ("mdselr_el1",		CPENC (2,0,0,4,2),	F_ARCHEXT,		AARCH64_FEATURE (DEBUGv8p9))
-  SYSREG ("mecid_a0_el2",	CPENC (3,4,10,8,1),	0,			AARCH64_NO_FEATURES)
-  SYSREG ("mecid_a1_el2",	CPENC (3,4,10,8,3),	0,			AARCH64_NO_FEATURES)
-  SYSREG ("mecid_p0_el2",	CPENC (3,4,10,8,0),	0,			AARCH64_NO_FEATURES)
-  SYSREG ("mecid_p1_el2",	CPENC (3,4,10,8,2),	0,			AARCH64_NO_FEATURES)
-  SYSREG ("mecid_rl_a_el3",	CPENC (3,6,10,10,1),	0,			AARCH64_NO_FEATURES)
-  SYSREG ("mecidr_el2",		CPENC (3,4,10,8,7),	F_REG_READ,		AARCH64_NO_FEATURES)
+  SYSREG ("mecid_a0_el2",	CPENC (3,4,10,8,1),	F_ARCHEXT,		AARCH64_FEATURE (V8_7A))
+  SYSREG ("mecid_a1_el2",	CPENC (3,4,10,8,3),	F_ARCHEXT,		AARCH64_FEATURE (V8_7A))
+  SYSREG ("mecid_p0_el2",	CPENC (3,4,10,8,0),	F_ARCHEXT,		AARCH64_FEATURE (V8_7A))
+  SYSREG ("mecid_p1_el2",	CPENC (3,4,10,8,2),	F_ARCHEXT,		AARCH64_FEATURE (V8_7A))
+  SYSREG ("mecid_rl_a_el3",	CPENC (3,6,10,10,1),	F_ARCHEXT,		AARCH64_FEATURE (V8_7A))
+  SYSREG ("mecidr_el2",		CPENC (3,4,10,8,7),	F_REG_READ|F_ARCHEXT,	AARCH64_FEATURE (V8_7A))
   SYSREG ("mfar_el3",		CPENC (3,6,6,0,5),	0,			AARCH64_NO_FEATURES)
   SYSREG ("midr_el1",		CPENC (3,0,0,0,0),	F_REG_READ,		AARCH64_NO_FEATURES)
   SYSREG ("mpam0_el1",		CPENC (3,0,10,5,1),	0,			AARCH64_NO_FEATURES)
@@ -1145,8 +1145,8 @@
   SYSREG ("vbar_el2",		CPENC (3,4,12,0,0),	0,			AARCH64_NO_FEATURES)
   SYSREG ("vbar_el3",		CPENC (3,6,12,0,0),	0,			AARCH64_NO_FEATURES)
   SYSREG ("vdisr_el2",		CPENC (3,4,12,1,1),	F_ARCHEXT,		AARCH64_FEATURE (RAS))
-  SYSREG ("vmecid_a_el2",	CPENC (3,4,10,9,1),	0,			AARCH64_NO_FEATURES)
-  SYSREG ("vmecid_p_el2",	CPENC (3,4,10,9,0),	0,			AARCH64_NO_FEATURES)
+  SYSREG ("vmecid_a_el2",	CPENC (3,4,10,9,1),	F_ARCHEXT,		AARCH64_FEATURE (V8_7A))
+  SYSREG ("vmecid_p_el2",	CPENC (3,4,10,9,0),	F_ARCHEXT,		AARCH64_FEATURE (V8_7A))
   SYSREG ("vmpidr_el2",		CPENC (3,4,0,0,5),	0,			AARCH64_NO_FEATURES)
   SYSREG ("vncr_el2",		CPENC (3,4,2,2,0),	F_ARCHEXT,		AARCH64_FEATURE (V8_4A))
   SYSREG ("vpidr_el2",		CPENC (3,4,0,0,0),	0,			AARCH64_NO_FEATURES)
diff --git a/gcc/config/aarch64/aarch64-tune.md b/gcc/config/aarch64/aarch64-tune.md
index 4fce0c5..40ff147 100644
--- a/gcc/config/aarch64/aarch64-tune.md
+++ b/gcc/config/aarch64/aarch64-tune.md
@@ -1,5 +1,5 @@
 ;; -*- buffer-read-only: t -*-
 ;; Generated automatically by gentune.sh from aarch64-cores.def
 (define_attr "tune"
-	"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88,thunderxt88p1,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,ampere1b,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,neoversen1,ares,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,tsv110,thunderx3t110,neoversev1,zeus,neoverse512tvb,saphira,oryon1,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexa510,cortexa520,cortexa710,cortexa715,cortexa720,cortexa725,cortexx2,cortexx3,cortexx4,cortexx925,neoversen2,cobalt100,neoversen3,neoversev2,grace,neoversev3,neoversev3ae,demeter,generic,generic_armv8_a,generic_armv9_a"
+	"cortexa34,cortexa35,cortexa53,cortexa57,cortexa72,cortexa73,thunderx,thunderxt88,thunderxt88p1,octeontx,octeontxt81,octeontxt83,thunderxt81,thunderxt83,ampere1,ampere1a,ampere1b,emag,xgene1,falkor,qdf24xx,exynosm1,phecda,thunderx2t99p1,vulcan,thunderx2t99,cortexa55,cortexa75,cortexa76,cortexa76ae,cortexa77,cortexa78,cortexa78ae,cortexa78c,cortexa65,cortexa65ae,cortexx1,cortexx1c,neoversen1,ares,neoversee1,octeontx2,octeontx2t98,octeontx2t96,octeontx2t93,octeontx2f95,octeontx2f95n,octeontx2f95mm,a64fx,fujitsu_monaka,tsv110,thunderx3t110,neoversev1,zeus,neoverse512tvb,saphira,oryon1,cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,cortexa73cortexa53,cortexa75cortexa55,cortexa76cortexa55,cortexr82,cortexr82ae,applea12,applem1_0,applem1_1,applem1_2,applem1_3,applem2_0,applem2_1,applem2_2,applem2_3,applem3_0,cortexa510,cortexa520,cortexa520ae,cortexa710,cortexa715,cortexa720,cortexa720ae,cortexa725,cortexx2,cortexx3,cortexx4,cortexx925,neoversen2,cobalt100,neoversen3,neoversev2,grace,neoversev3,neoversev3ae,demeter,olympus,gb10,generic,generic_armv8_a,generic_armv9_a"
 	(const (symbol_ref "((enum attr_tune) aarch64_tune)")))
diff --git a/gcc/config/aarch64/aarch64-tuning-flags.def b/gcc/config/aarch64/aarch64-tuning-flags.def
index a9f48f5..f2c916e 100644
--- a/gcc/config/aarch64/aarch64-tuning-flags.def
+++ b/gcc/config/aarch64/aarch64-tuning-flags.def
@@ -1,4 +1,4 @@
-/* Copyright (C) 2015-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2015-2025 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
@@ -36,12 +36,8 @@ AARCH64_EXTRA_TUNING_OPTION ("rename_fma_regs", RENAME_FMA_REGS)
    are not considered cheap.  */
 AARCH64_EXTRA_TUNING_OPTION ("cheap_shift_extend", CHEAP_SHIFT_EXTEND)
 
-AARCH64_EXTRA_TUNING_OPTION ("rename_load_regs", RENAME_LOAD_REGS)
-
 AARCH64_EXTRA_TUNING_OPTION ("cse_sve_vl_constants", CSE_SVE_VL_CONSTANTS)
 
-AARCH64_EXTRA_TUNING_OPTION ("use_new_vector_costs", USE_NEW_VECTOR_COSTS)
-
 AARCH64_EXTRA_TUNING_OPTION ("matched_vector_throughput", MATCHED_VECTOR_THROUGHPUT)
 
 AARCH64_EXTRA_TUNING_OPTION ("avoid_cross_loop_fma", AVOID_CROSS_LOOP_FMA)
@@ -52,4 +48,23 @@ AARCH64_EXTRA_TUNING_OPTION ("fully_pipelined_fma", FULLY_PIPELINED_FMA)
    rather than re-use an input predicate register.  */
 AARCH64_EXTRA_TUNING_OPTION ("avoid_pred_rmw", AVOID_PRED_RMW)
 
+/* Whether writes to the FPMR are cheap enough that:
+
+       msr     fpmr, <value>
+
+   is better than:
+
+       mrs     tmp, fpmr
+       cmp     tmp, <value>
+       beq     1f
+       msr     fpmr, <value>
+     1:
+
+   even when the branch is predictably taken.  */
+AARCH64_EXTRA_TUNING_OPTION ("cheap_fpmr_write", CHEAP_FPMR_WRITE)
+
+/* Baseline tuning settings suitable for all modern cores.  */
+#define AARCH64_EXTRA_TUNE_BASE (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND	\
+				 | AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA)
+
 #undef AARCH64_EXTRA_TUNING_OPTION
diff --git a/gcc/config/aarch64/aarch64-vxworks.h b/gcc/config/aarch64/aarch64-vxworks.h
index 08731e8d..7b4da93 100644
--- a/gcc/config/aarch64/aarch64-vxworks.h
+++ b/gcc/config/aarch64/aarch64-vxworks.h
@@ -1,6 +1,6 @@
 /* Definitions of target machine for GNU compiler.  Vxworks Aarch 64bit
    version.
-   Copyright (C) 2018-2024 Free Software Foundation, Inc.
+   Copyright (C) 2018-2025 Free Software Foundation, Inc.
    Contributed by Douglas B Rupp
 
 This file is part of GCC.
@@ -66,9 +66,8 @@ along with GCC; see the file COPYING3.  If not see
 #define VXWORKS_PERSONALITY "llvm"
 
 /* VxWorks uses R18 as a TCB pointer.  We must pick something else as
-   the static chain and R18 needs to be claimed "fixed".  Until we
-   arrange to override the common parts of the port family to
-   acknowledge the latter, configure --with-specs="-ffixed-r18".  */
+   the static chain and R18 needs to be claimed "fixed" (TARGET_OS_USES_R18
+   does that in aarch64_conditional_register_usage).  */
 #undef  STATIC_CHAIN_REGNUM
 #define STATIC_CHAIN_REGNUM 9
-
+#define TARGET_OS_USES_R18
diff --git a/gcc/config/aarch64/aarch64.cc b/gcc/config/aarch64/aarch64.cc
index 40dacfc..6e16763 100644
--- a/gcc/config/aarch64/aarch64.cc
+++ b/gcc/config/aarch64/aarch64.cc
@@ -1,5 +1,5 @@
 /* Machine description for AArch64 architecture.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
@@ -59,6 +59,7 @@
 #include "opts.h"
 #include "gimplify.h"
 #include "dwarf2.h"
+#include "dwarf2out.h"
 #include "gimple-iterator.h"
 #include "tree-vectorizer.h"
 #include "aarch64-cost-tables.h"
@@ -82,6 +83,7 @@
 #include "rtlanal.h"
 #include "tree-dfa.h"
 #include "asan.h"
+#include "aarch64-elf-metadata.h"
 #include "aarch64-feature-deps.h"
 #include "config/arm/aarch-common.h"
 #include "config/arm/aarch-common-protos.h"
@@ -107,6 +109,10 @@
    and 1 MOVI/DUP (same size as a call).  */
 #define MAX_SET_SIZE(speed) (speed ? 256 : 96)
 
+#ifndef HAVE_AS_AEABI_BUILD_ATTRIBUTES
+#define HAVE_AS_AEABI_BUILD_ATTRIBUTES 0
+#endif
+
 /* Flags that describe how a function shares certain architectural state
    with its callers.
 
@@ -127,10 +133,19 @@ constexpr auto AARCH64_STATE_SHARED = 1U << 0;
 constexpr auto AARCH64_STATE_IN = 1U << 1;
 constexpr auto AARCH64_STATE_OUT = 1U << 2;
 
+/* Enum to distinguish which type of check is to be done in
+   aarch64_simd_valid_imm.  */
+enum simd_immediate_check {
+  AARCH64_CHECK_MOV,
+  AARCH64_CHECK_ORR,
+  AARCH64_CHECK_AND,
+  AARCH64_CHECK_XOR
+};
+
 /* Information about a legitimate vector immediate operand.  */
 struct simd_immediate_info
 {
-  enum insn_type { MOV, MVN, INDEX, PTRUE };
+  enum insn_type { MOV, MVN, INDEX, PTRUE, SVE_MOV };
   enum modifier_type { LSL, MSL };
 
   simd_immediate_info () {}
@@ -347,7 +362,7 @@ static bool aarch64_print_address_internal (FILE*, machine_mode, rtx,
 					    aarch64_addr_query_type);
 
 /* The processor for which instructions should be scheduled.  */
-enum aarch64_processor aarch64_tune = cortexa53;
+enum aarch64_cpu aarch64_tune = AARCH64_CPU_cortexa53;
 
 /* Global flag for PC relative loads.  */
 bool aarch64_pcrelative_literal_loads;
@@ -415,6 +430,7 @@ static const struct aarch64_flag_desc aarch64_tuning_flags[] =
 #include "tuning_models/neoversev3.h"
 #include "tuning_models/neoversev3ae.h"
 #include "tuning_models/a64fx.h"
+#include "tuning_models/fujitsu_monaka.h"
 
 /* Support for fine-grained override of the tuning structures.  */
 struct aarch64_tuning_override_function
@@ -440,8 +456,8 @@ aarch64_tuning_override_functions[] =
 struct processor
 {
   const char *name;
-  aarch64_processor ident;
-  aarch64_processor sched_core;
+  aarch64_cpu ident;
+  aarch64_cpu sched_core;
   aarch64_arch arch;
   aarch64_feature_flags flags;
   const tune_params *tune;
@@ -451,20 +467,20 @@ struct processor
 static CONSTEXPR const processor all_architectures[] =
 {
 #define AARCH64_ARCH(NAME, CORE, ARCH_IDENT, D, E) \
-  {NAME, CORE, CORE, AARCH64_ARCH_##ARCH_IDENT, \
+  {NAME, AARCH64_CPU_##CORE, AARCH64_CPU_##CORE, AARCH64_ARCH_##ARCH_IDENT, \
    feature_deps::ARCH_IDENT ().enable, NULL},
 #include "aarch64-arches.def"
-  {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, NULL}
+  {NULL, aarch64_no_cpu, aarch64_no_cpu, aarch64_no_arch, 0, NULL}
 };
 
 /* Processor cores implementing AArch64.  */
 static const struct processor all_cores[] =
 {
 #define AARCH64_CORE(NAME, IDENT, SCHED, ARCH, E, COSTS, G, H, I) \
-  {NAME, IDENT, SCHED, AARCH64_ARCH_##ARCH, \
+  {NAME, AARCH64_CPU_##IDENT, AARCH64_CPU_##SCHED, AARCH64_ARCH_##ARCH, \
    feature_deps::cpu_##IDENT, &COSTS##_tunings},
 #include "aarch64-cores.def"
-  {NULL, aarch64_none, aarch64_none, aarch64_no_arch, 0, NULL}
+  {NULL, aarch64_no_cpu, aarch64_no_cpu, aarch64_no_arch, 0, NULL}
 };
 /* Internal representation of system registers.  */
 typedef struct {
@@ -593,14 +609,10 @@ aarch64_lookup_shared_state_flags (tree attrs, const char *state_name)
 {
   for (tree attr = attrs; attr; attr = TREE_CHAIN (attr))
     {
-      if (!cxx11_attribute_p (attr))
-	continue;
-
-      auto ns = IDENTIFIER_POINTER (TREE_PURPOSE (TREE_PURPOSE (attr)));
-      if (strcmp (ns, "arm") != 0)
+      if (!is_attribute_namespace_p ("arm", attr))
 	continue;
 
-      auto attr_name = IDENTIFIER_POINTER (TREE_VALUE (TREE_PURPOSE (attr)));
+      auto attr_name = IDENTIFIER_POINTER (get_attribute_name (attr));
       auto flags = aarch64_attribute_shared_state_flags (attr_name);
       if (!flags)
 	continue;
@@ -855,6 +867,7 @@ static const attribute_spec aarch64_gnu_attributes[] =
        affects_type_identity, handler, exclude } */
   { "aarch64_vector_pcs", 0, 0, false, true,  true,  true,
 			  handle_aarch64_vector_pcs_attribute, NULL },
+  { "indirect_return",    0, 0, false, true, true, true, NULL, NULL },
   { "arm_sve_vector_bits", 1, 1, false, true,  false, true,
 			  aarch64_sve::handle_arm_sve_vector_bits_attribute,
 			  NULL },
@@ -946,6 +959,39 @@ svpattern_token (enum aarch64_svpattern pattern)
   gcc_unreachable ();
 }
 
+/* Return true if RHS is an operand suitable for a CB<cc> (immediate)
+   instruction.  OP_CODE determines the type of the comparison.  */
+bool
+aarch64_cb_rhs (rtx_code op_code, rtx rhs)
+{
+  if (!CONST_INT_P (rhs))
+    return REG_P (rhs);
+
+  HOST_WIDE_INT rhs_val = INTVAL (rhs);
+
+  switch (op_code)
+    {
+    case EQ:
+    case NE:
+    case GT:
+    case GTU:
+    case LT:
+    case LTU:
+      return IN_RANGE (rhs_val, 0, 63);
+
+    case GE:  /* CBGE:   signed greater than or equal */
+    case GEU: /* CBHS: unsigned greater than or equal */
+      return IN_RANGE (rhs_val, 1, 64);
+
+    case LE:  /* CBLE:   signed less than or equal */
+    case LEU: /* CBLS: unsigned less than or equal */
+      return IN_RANGE (rhs_val, -1, 62);
+
+    default:
+      return false;
+    }
+}
+
 /* Return the location of a piece that is known to be passed or returned
    in registers.  FIRST_ZR is the first unused vector argument register
    and FIRST_PR is the first unused predicate argument register.  */
@@ -961,7 +1007,7 @@ pure_scalable_type_info::piece::get_rtx (unsigned int first_zr,
   if (num_zr > 0 && num_pr == 0)
     return gen_rtx_REG (mode, first_zr);
 
-  if (num_zr == 0 && num_pr <= 2)
+  if (num_zr == 0 && num_pr > 0)
     return gen_rtx_REG (mode, first_pr);
 
   gcc_unreachable ();
@@ -1083,7 +1129,7 @@ pure_scalable_type_info::analyze_array (const_tree type)
 
   /* An array of unknown, flexible or variable length will be passed and
      returned by reference whatever we do.  */
-  tree nelts_minus_one = array_type_nelts (type);
+  tree nelts_minus_one = array_type_nelts_minus_one (type);
   if (!tree_fits_uhwi_p (nelts_minus_one))
     return DOESNT_MATTER;
 
@@ -1453,6 +1499,32 @@ aarch64_dwarf_frame_reg_mode (int regno)
   return default_dwarf_frame_reg_mode (regno);
 }
 
+/* Implement TARGET_OUTPUT_CFI_DIRECTIVE.  */
+static bool
+aarch64_output_cfi_directive (FILE *f, dw_cfi_ref cfi)
+{
+  bool found = false;
+  if (cfi->dw_cfi_opc == DW_CFA_AARCH64_negate_ra_state)
+    {
+      fprintf (f, "\t.cfi_negate_ra_state\n");
+      found = true;
+    }
+  return found;
+}
+
+/* Implement TARGET_DW_CFI_OPRND1_DESC.  */
+static bool
+aarch64_dw_cfi_oprnd1_desc (dwarf_call_frame_info cfi_opc,
+			    dw_cfi_oprnd_type &oprnd_type)
+{
+  if (cfi_opc == DW_CFA_AARCH64_negate_ra_state)
+    {
+      oprnd_type = dw_cfi_oprnd_unused;
+      return true;
+    }
+  return false;
+}
+
 /* If X is a CONST_DOUBLE, return its bit representation as a constant
    integer, otherwise return X unmodified.  */
 static rtx
@@ -1650,6 +1722,7 @@ aarch64_classify_vector_mode (machine_mode mode, bool any_target_p = false)
       return (TARGET_FLOAT || any_target_p) ? VEC_ADVSIMD : 0;
 
     case E_VNx32BImode:
+    case E_VNx64BImode:
       return TARGET_SVE ? VEC_SVE_PRED | VEC_STRUCT : 0;
 
     default:
@@ -1657,6 +1730,32 @@ aarch64_classify_vector_mode (machine_mode mode, bool any_target_p = false)
     }
 }
 
+/* Like aarch64_classify_vector_mode, but also include modes that are used
+   for memory operands but not register operands.  Such modes do not count
+   as real vector modes; they are just an internal construct to make things
+   easier to describe.  */
+static unsigned int
+aarch64_classify_vector_memory_mode (machine_mode mode)
+{
+  switch (mode)
+    {
+    case VNx1SImode:
+    case VNx1DImode:
+      return TARGET_SVE ? VEC_SVE_DATA | VEC_PARTIAL : 0;
+
+    case VNx1TImode:
+      return TARGET_SVE ? VEC_SVE_DATA : 0;
+
+    case VNx2TImode:
+    case VNx3TImode:
+    case VNx4TImode:
+      return TARGET_SVE ? VEC_SVE_DATA | VEC_STRUCT : 0;
+
+    default:
+      return aarch64_classify_vector_mode (mode);
+    }
+}
+
 /* Return true if MODE is any of the Advanced SIMD structure modes.  */
 bool
 aarch64_advsimd_struct_mode_p (machine_mode mode)
@@ -1741,7 +1840,7 @@ aarch64_ldn_stn_vectors (machine_mode mode)
 
 /* Given an Advanced SIMD vector mode MODE and a tuple size NELEMS, return the
    corresponding vector structure mode.  */
-static opt_machine_mode
+opt_machine_mode
 aarch64_advsimd_vector_array_mode (machine_mode mode,
 				   unsigned HOST_WIDE_INT nelems)
 {
@@ -1781,13 +1880,15 @@ aarch64_array_mode (machine_mode mode, unsigned HOST_WIDE_INT nelems)
 {
   if (TARGET_SVE && GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
     {
-      /* Use VNx32BI for pairs of predicates, but explicitly reject giving
-	 a mode to other array sizes.  Using integer modes requires a round
-	 trip through memory and generates terrible code.  */
+      /* Use VNx32BI and VNx64BI for tuples of predicates, but explicitly
+	 reject giving a mode to other array sizes.  Using integer modes
+	 requires a round trip through memory and generates terrible code.  */
       if (nelems == 1)
 	return mode;
       if (mode == VNx16BImode && nelems == 2)
 	return VNx32BImode;
+      if (mode == VNx16BImode && nelems == 4)
+	return VNx64BImode;
       return BLKmode;
     }
 
@@ -1909,6 +2010,46 @@ aarch64_sve_int_mode (machine_mode mode)
   return aarch64_sve_data_mode (int_mode, GET_MODE_NUNITS (mode)).require ();
 }
 
+/* Look for a vector mode with the same classification as VEC_MODE,
+   but with each group of FACTOR elements coalesced into a single element.
+   In other words, look for a mode in which the elements are FACTOR times
+   larger and in which the number of elements is FACTOR times smaller.
+
+   Return the mode found, if one exists.  */
+
+static opt_machine_mode
+aarch64_coalesce_units (machine_mode vec_mode, unsigned int factor)
+{
+  auto elt_bits = vector_element_size (GET_MODE_BITSIZE (vec_mode),
+				       GET_MODE_NUNITS (vec_mode));
+  auto vec_flags = aarch64_classify_vector_mode (vec_mode);
+  if (vec_flags & VEC_SVE_PRED)
+    {
+      if (known_eq (GET_MODE_SIZE (vec_mode), BYTES_PER_SVE_PRED))
+	return aarch64_sve_pred_mode (elt_bits * factor);
+      return {};
+    }
+
+  scalar_mode new_elt_mode;
+  if (!int_mode_for_size (elt_bits * factor, false).exists (&new_elt_mode))
+    return {};
+
+  if (vec_flags == VEC_ADVSIMD)
+    {
+      auto mode = aarch64_simd_container_mode (new_elt_mode,
+					       GET_MODE_BITSIZE (vec_mode));
+      if (mode != word_mode)
+	return mode;
+    }
+  else if (vec_flags & VEC_SVE_DATA)
+    {
+      poly_uint64 new_nunits;
+      if (multiple_p (GET_MODE_NUNITS (vec_mode), factor, &new_nunits))
+	return aarch64_sve_data_mode (new_elt_mode, new_nunits);
+    }
+  return {};
+}
+
 /* Implement TARGET_VECTORIZE_RELATED_MODE.  */
 
 static opt_machine_mode
@@ -2020,7 +2161,7 @@ aarch64_hard_regno_nregs (unsigned regno, machine_mode mode)
     case PR_REGS:
     case PR_LO_REGS:
     case PR_HI_REGS:
-      return mode == VNx32BImode ? 2 : 1;
+      return mode == VNx64BImode ? 4 : mode == VNx32BImode ? 2 : 1;
 
     case MOVEABLE_SYSREGS:
     case FFR_REGS:
@@ -2405,15 +2546,22 @@ aarch64_reg_save_mode (unsigned int regno)
   gcc_unreachable ();
 }
 
-/* Given the ISA mode on entry to a callee and the ABI of the callee,
-   return the CONST_INT that should be placed in an UNSPEC_CALLEE_ABI rtx.  */
+/* Return the CONST_INT that should be placed in an UNSPEC_CALLEE_ABI rtx.
+   This value encodes the following information:
+    - the ISA mode on entry to a callee (ISA_MODE)
+    - the ABI of the callee (PCS_VARIANT)
+    - whether the callee has an indirect_return
+      attribute (INDIRECT_RETURN).  */
 
 rtx
-aarch64_gen_callee_cookie (aarch64_isa_mode isa_mode, arm_pcs pcs_variant)
+aarch64_gen_callee_cookie (aarch64_isa_mode isa_mode, arm_pcs pcs_variant,
+			   bool indirect_return)
 {
-  return gen_int_mode ((unsigned int) isa_mode
-		       | (unsigned int) pcs_variant << AARCH64_NUM_ISA_MODES,
-		       DImode);
+  unsigned int im = (unsigned int) isa_mode;
+  unsigned int ir = (indirect_return ? 1 : 0) << AARCH64_NUM_ISA_MODES;
+  unsigned int pv = (unsigned int) pcs_variant
+		     << (AARCH64_NUM_ABI_ATTRIBUTES + AARCH64_NUM_ISA_MODES);
+  return gen_int_mode (im | ir | pv, DImode);
 }
 
 /* COOKIE is a CONST_INT from an UNSPEC_CALLEE_ABI rtx.  Return the
@@ -2422,7 +2570,8 @@ aarch64_gen_callee_cookie (aarch64_isa_mode isa_mode, arm_pcs pcs_variant)
 static const predefined_function_abi &
 aarch64_callee_abi (rtx cookie)
 {
-  return function_abis[UINTVAL (cookie) >> AARCH64_NUM_ISA_MODES];
+  return function_abis[UINTVAL (cookie)
+	 >> (AARCH64_NUM_ABI_ATTRIBUTES + AARCH64_NUM_ISA_MODES)];
 }
 
 /* COOKIE is a CONST_INT from an UNSPEC_CALLEE_ABI rtx.  Return the
@@ -2435,6 +2584,15 @@ aarch64_callee_isa_mode (rtx cookie)
   return UINTVAL (cookie) & ((1 << AARCH64_NUM_ISA_MODES) - 1);
 }
 
+/* COOKIE is a CONST_INT from an UNSPEC_CALLEE_ABI rtx.  Return
+   whether function was marked with an indirect_return attribute.  */
+
+static bool
+aarch64_callee_indirect_return (rtx cookie)
+{
+  return ((UINTVAL (cookie) >> AARCH64_NUM_ISA_MODES) & 1) == 1;
+}
+
 /* INSN is a call instruction.  Return the CONST_INT stored in its
    UNSPEC_CALLEE_ABI rtx.  */
 
@@ -2449,6 +2607,16 @@ aarch64_insn_callee_cookie (const rtx_insn *insn)
   return XVECEXP (unspec, 0, 0);
 }
 
+/* INSN is a call instruction.  Return true if the callee has an
+   indirect_return attribute.  */
+
+bool
+aarch_fun_is_indirect_return (rtx_insn *insn)
+{
+  rtx cookie = aarch64_insn_callee_cookie (insn);
+  return aarch64_callee_indirect_return (cookie);
+}
+
 /* Implement TARGET_INSN_CALLEE_ABI.  */
 
 const predefined_function_abi &
@@ -2501,7 +2669,9 @@ aarch64_regmode_natural_size (machine_mode mode)
      code for Advanced SIMD.  */
   if (!aarch64_sve_vg.is_constant ())
     {
-      unsigned int vec_flags = aarch64_classify_vector_mode (mode);
+      /* REGMODE_NATURAL_SIZE influences general subreg validity rules,
+	 so we need to handle memory-only modes as well.  */
+      unsigned int vec_flags = aarch64_classify_vector_memory_mode (mode);
       if (vec_flags & VEC_SVE_PRED)
 	return BYTES_PER_SVE_PRED;
       if (vec_flags & VEC_SVE_DATA)
@@ -2546,6 +2716,60 @@ aarch64_constant_alignment (const_tree exp, HOST_WIDE_INT align)
   return align;
 }
 
+/* Align definitions of arrays, unions and structures so that
+   initializations and copies can be made more efficient.  This is not
+   ABI-changing, so it only affects places where we can see the
+   definition.  Increasing the alignment tends to introduce padding,
+   so don't do this when optimizing for size/conserving stack space.  */
+
+unsigned
+aarch64_data_alignment (const_tree type, unsigned align)
+{
+  if (optimize_size)
+    return align;
+
+  if (AGGREGATE_TYPE_P (type))
+    {
+      unsigned HOST_WIDE_INT size = 0;
+
+      if (TYPE_SIZE (type) && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
+	  && tree_fits_uhwi_p (TYPE_SIZE (type)))
+	size = tree_to_uhwi (TYPE_SIZE (type));
+
+      /* Align small structs/arrays to 32 bits, or 64 bits if larger.  */
+      if (align < 32 && size <= 32)
+	align = 32;
+      else if (align < 64)
+	align = 64;
+    }
+
+  return align;
+}
+
+unsigned
+aarch64_stack_alignment (const_tree type, unsigned align)
+{
+  if (flag_conserve_stack)
+    return align;
+
+  if (AGGREGATE_TYPE_P (type))
+    {
+      unsigned HOST_WIDE_INT size = 0;
+
+      if (TYPE_SIZE (type) && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
+	  && tree_fits_uhwi_p (TYPE_SIZE (type)))
+	size = tree_to_uhwi (TYPE_SIZE (type));
+
+      /* Align small structs/arrays to 32 bits, or 64 bits if larger.  */
+      if (align < 32 && size <= 32)
+	align = 32;
+      else if (align < 64)
+	align = 64;
+    }
+
+  return align;
+}
+
 /* Return true if calls to DECL should be treated as
    long-calls (ie called via a register).  */
 static bool
@@ -2693,10 +2917,10 @@ aarch64_gen_test_and_branch (rtx_code code, rtx x, int bitnum,
       emit_insn (gen_aarch64_and3nr_compare0 (mode, x, mask));
       rtx cc_reg = gen_rtx_REG (CC_NZVmode, CC_REGNUM);
       rtx x = gen_rtx_fmt_ee (code, CC_NZVmode, cc_reg, const0_rtx);
-      return gen_condjump (x, cc_reg, label);
+      return gen_aarch64_bcond (x, cc_reg, label);
     }
-  return gen_aarch64_tb (code, mode, mode,
-			 x, gen_int_mode (bitnum, mode), label);
+  return gen_aarch64_tbz (code, mode, mode,
+			   x, gen_int_mode (bitnum, mode), label);
 }
 
 /* Consider the operation:
@@ -2897,7 +3121,22 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
 	if (can_create_pseudo_p ())
 	  tmp_reg = gen_reg_rtx (mode);
 
+	HOST_WIDE_INT mid_const = 0;
+	if (TARGET_PECOFF)
+	  {
+	    poly_int64 offset;
+	    strip_offset (imm, &offset);
+
+	    HOST_WIDE_INT const_offset;
+	    if (offset.is_constant (&const_offset))
+	      /* Written this way for the sake of negative offsets.  */
+	      mid_const = const_offset / (1 << 20) * (1 << 20);
+	  }
+	imm = plus_constant (mode, imm, -mid_const);
+
 	emit_move_insn (tmp_reg, gen_rtx_HIGH (mode, copy_rtx (imm)));
+	if (mid_const)
+	  emit_set_insn (tmp_reg, plus_constant (mode, tmp_reg, mid_const));
 	emit_insn (gen_add_losym (dest, tmp_reg, imm));
 	return;
       }
@@ -3000,8 +3239,7 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
 	  aarch64_emit_call_insn (gen_tlsgd_small_si (result, imm));
 	else
 	  aarch64_emit_call_insn (gen_tlsgd_small_di (result, imm));
-	insns = get_insns ();
-	end_sequence ();
+	insns = end_sequence ();
 
 	RTL_CONST_CALL_P (insns) = 1;
 	emit_libcall_block (insns, tmp_reg, result, imm);
@@ -3196,31 +3434,33 @@ aarch64_emit_binop (rtx dest, optab binoptab, rtx op0, rtx op1)
     emit_move_insn (dest, tmp);
 }
 
-/* Split a move from SRC to DST into two moves of mode SINGLE_MODE.  */
+/* Split a move from SRC to DST into multiple moves of mode SINGLE_MODE.  */
 
 void
-aarch64_split_double_move (rtx dst, rtx src, machine_mode single_mode)
+aarch64_split_move (rtx dst, rtx src, machine_mode single_mode)
 {
   machine_mode mode = GET_MODE (dst);
+  auto npieces = exact_div (GET_MODE_SIZE (mode),
+			    GET_MODE_SIZE (single_mode)).to_constant ();
+  auto_vec<rtx, 4> dst_pieces, src_pieces;
 
-  rtx dst0 = simplify_gen_subreg (single_mode, dst, mode, 0);
-  rtx dst1 = simplify_gen_subreg (single_mode, dst, mode,
-				  GET_MODE_SIZE (single_mode));
-  rtx src0 = simplify_gen_subreg (single_mode, src, mode, 0);
-  rtx src1 = simplify_gen_subreg (single_mode, src, mode,
-				  GET_MODE_SIZE (single_mode));
+  /* There should be at least one piece. */
+  gcc_assert (npieces > 0);
 
-  /* At most one pairing may overlap.  */
-  if (reg_overlap_mentioned_p (dst0, src1))
+  for (unsigned int i = 0; i < npieces; ++i)
     {
-      aarch64_emit_move (dst1, src1);
-      aarch64_emit_move (dst0, src0);
+      auto off = i * GET_MODE_SIZE (single_mode);
+      dst_pieces.safe_push (simplify_gen_subreg (single_mode, dst, mode, off));
+      src_pieces.safe_push (simplify_gen_subreg (single_mode, src, mode, off));
     }
+
+  /* At most one pairing may overlap.  */
+  if (reg_overlap_mentioned_p (dst_pieces[0], src))
+    for (unsigned int i = npieces; i-- > 0;)
+      aarch64_emit_move (dst_pieces[i], src_pieces[i]);
   else
-    {
-      aarch64_emit_move (dst0, src0);
-      aarch64_emit_move (dst1, src1);
-    }
+    for (unsigned int i = 0; i < npieces; ++i)
+      aarch64_emit_move (dst_pieces[i], src_pieces[i]);
 }
 
 /* Split a 128-bit move operation into two 64-bit move operations,
@@ -3264,7 +3504,7 @@ aarch64_split_128bit_move (rtx dst, rtx src)
 	}
     }
 
-  aarch64_split_double_move (dst, src, word_mode);
+  aarch64_split_move (dst, src, word_mode);
 }
 
 /* Return true if we should split a move from 128-bit value SRC
@@ -3464,6 +3704,14 @@ aarch64_partial_ptrue_length (rtx_vector_builder &builder,
   if (builder.nelts_per_pattern () == 3)
     return 0;
 
+  /* It is conservatively correct to drop the element size to a lower value,
+     and we must do so if the predicate consists of a leading "foreground"
+     sequence that is smaller than the element size.  Without this,
+     we would test only one bit and so treat everything as either an
+     all-true or an all-false predicate.  */
+  if (builder.nelts_per_pattern () == 2)
+    elt_size = MIN (elt_size, builder.npatterns ());
+
   /* Skip over leading set bits.  */
   unsigned int nelts = builder.encoded_nelts ();
   unsigned int i = 0;
@@ -3495,6 +3743,24 @@ aarch64_partial_ptrue_length (rtx_vector_builder &builder,
   return vl;
 }
 
+/* Return:
+
+   * -1 if all bits of PRED are set
+   * N if PRED has N leading set bits followed by all clear bits
+   * 0 if PRED does not have any of these forms.  */
+
+int
+aarch64_partial_ptrue_length (rtx pred)
+{
+  rtx_vector_builder builder;
+  if (!aarch64_get_sve_pred_bits (builder, pred))
+    return 0;
+
+  auto elt_size = vector_element_size (GET_MODE_BITSIZE (GET_MODE (pred)),
+				       GET_MODE_NUNITS (GET_MODE (pred)));
+  return aarch64_partial_ptrue_length (builder, elt_size);
+}
+
 /* See if there is an svpattern that encodes an SVE predicate of mode
    PRED_MODE in which the first VL bits are set and the rest are clear.
    Return the pattern if so, otherwise return AARCH64_NUM_SVPATTERNS.
@@ -3557,6 +3823,43 @@ aarch64_ptrue_reg (machine_mode mode)
   return gen_lowpart (mode, reg);
 }
 
+/* Return an all-true (restricted to the leading VL bits) predicate register of
+   mode MODE.  */
+
+rtx
+aarch64_ptrue_reg (machine_mode mode, unsigned int vl)
+{
+  gcc_assert (aarch64_sve_pred_mode_p (mode));
+
+  rtx_vector_builder builder (VNx16BImode, vl, 2);
+
+  for (unsigned i = 0; i < vl; i++)
+    builder.quick_push (CONST1_RTX (BImode));
+
+  for (unsigned i = 0; i < vl; i++)
+    builder.quick_push (CONST0_RTX (BImode));
+
+  rtx const_vec = builder.build ();
+  rtx reg = force_reg (VNx16BImode, const_vec);
+  return gen_lowpart (mode, reg);
+}
+
+/* Return a register of mode PRED_MODE for controlling data of mode DATA_MODE.
+
+   DATA_MODE can be a scalar, an Advanced SIMD vector, or an SVE vector.
+   If it's an N-byte scalar or an Advanced SIMD vector, the first N bits
+   of the predicate will be active and the rest will be inactive.
+   If DATA_MODE is an SVE mode, every bit of the predicate will be active.  */
+rtx
+aarch64_ptrue_reg (machine_mode pred_mode, machine_mode data_mode)
+{
+  if (aarch64_sve_mode_p (data_mode))
+    return aarch64_ptrue_reg (pred_mode);
+
+  auto size = GET_MODE_SIZE (data_mode).to_constant ();
+  return aarch64_ptrue_reg (pred_mode, size);
+}
+
 /* Return an all-false predicate register of mode MODE.  */
 
 rtx
@@ -3590,6 +3893,44 @@ aarch64_sve_same_pred_for_ptest_p (rtx *pred1, rtx *pred2)
   return (ptrue1_p && ptrue2_p) || rtx_equal_p (pred1[0], pred2[0]);
 }
 
+
+/* Generate a predicate to control partial SVE mode DATA_MODE as if it
+   were fully packed, enabling the defined elements only.  */
+rtx
+aarch64_sve_packed_pred (machine_mode data_mode)
+{
+  unsigned int container_bytes
+    = aarch64_sve_container_bits (data_mode) / BITS_PER_UNIT;
+  /* Enable the significand of each container only.  */
+  rtx ptrue = force_reg (VNx16BImode, aarch64_ptrue_all (container_bytes));
+  /* Predicate at the element size.  */
+  machine_mode pmode
+    = aarch64_sve_pred_mode (GET_MODE_UNIT_SIZE (data_mode)).require ();
+  return gen_lowpart (pmode, ptrue);
+}
+
+/* Generate a predicate and strictness value to govern a floating-point
+   operation with SVE mode DATA_MODE.
+
+   If DATA_MODE is a partial vector mode, this pair prevents the operation
+   from interpreting undefined elements - unless we don't need to suppress
+   their trapping behavior.  */
+rtx
+aarch64_sve_fp_pred (machine_mode data_mode, rtx *strictness)
+{
+   unsigned int vec_flags = aarch64_classify_vector_mode (data_mode);
+   if (flag_trapping_math && (vec_flags & VEC_PARTIAL))
+     {
+       if (strictness)
+	 *strictness = gen_int_mode (SVE_STRICT_GP, SImode);
+       return aarch64_sve_packed_pred (data_mode);
+     }
+   if (strictness)
+     *strictness = gen_int_mode (SVE_RELAXED_GP, SImode);
+   /* Use the VPRED mode.  */
+   return aarch64_ptrue_reg (aarch64_sve_pred_mode (data_mode));
+}
+
 /* Emit a comparison CMP between OP0 and OP1, both of which have mode
    DATA_MODE, and return the result in a predicate of mode PRED_MODE.
    Use TARGET as the target register if nonnull and convenient.  */
@@ -5570,7 +5911,7 @@ aarch64_expand_sve_const_vector (rtx target, rtx src)
 	 targets, the layout of the 128-bit vector in an Advanced SIMD
 	 register would be different from its layout in an SVE register,
 	 but this 128-bit vector is a memory value only.  */
-      machine_mode vq_mode = aarch64_vq_mode (elt_mode).require ();
+      machine_mode vq_mode = aarch64_v128_mode (elt_mode).require ();
       rtx vq_value = simplify_gen_subreg (vq_mode, src, mode, 0);
       if (vq_value && aarch64_expand_sve_ld1rq (target, vq_value))
 	return target;
@@ -5582,7 +5923,7 @@ aarch64_expand_sve_const_vector (rtx target, rtx src)
 	 See if we can load them using an Advanced SIMD move and then
 	 duplicate it to fill a vector.  This is better than using a GPR
 	 move because it keeps everything in the same register file.  */
-      machine_mode vq_mode = aarch64_vq_mode (elt_mode).require ();
+      machine_mode vq_mode = aarch64_v128_mode (elt_mode).require ();
       rtx_vector_builder builder (vq_mode, npatterns, 1);
       for (unsigned int i = 0; i < npatterns; ++i)
 	{
@@ -5593,7 +5934,7 @@ aarch64_expand_sve_const_vector (rtx target, rtx src)
 	  builder.quick_push (CONST_VECTOR_ENCODED_ELT (src, srci));
 	}
       rtx vq_src = builder.build ();
-      if (aarch64_simd_valid_immediate (vq_src, NULL))
+      if (aarch64_simd_valid_mov_imm (vq_src))
 	{
 	  vq_src = force_reg (vq_mode, vq_src);
 	  return aarch64_expand_sve_dupq (target, mode, vq_src);
@@ -6105,8 +6446,7 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
 	    }
 	}
 
-      if (GET_CODE (imm) == HIGH
-	  || aarch64_simd_valid_immediate (imm, NULL))
+      if (GET_CODE (imm) == HIGH || aarch64_simd_valid_mov_imm (imm))
 	{
 	  emit_insn (gen_rtx_SET (dest, imm));
 	  return;
@@ -6171,19 +6511,51 @@ aarch64_stack_protect_canary_mem (machine_mode mode, rtx decl_rtl,
   return gen_rtx_MEM (mode, force_reg (Pmode, addr));
 }
 
-/* Emit an SVE predicated move from SRC to DEST.  PRED is a predicate
-   that is known to contain PTRUE.  */
+/* Emit a load/store from a subreg of SRC to a subreg of DEST.
+   The subregs have mode NEW_MODE. Use only for reg<->mem moves.  */
+void
+aarch64_emit_load_store_through_mode (rtx dest, rtx src, machine_mode new_mode)
+{
+  gcc_assert ((MEM_P (dest) && register_operand (src, VOIDmode))
+	      || (MEM_P (src) && register_operand (dest, VOIDmode)));
+  auto mode = GET_MODE (dest);
+  auto int_mode = aarch64_sve_int_mode (mode);
+  if (MEM_P (src))
+    {
+      rtx tmp = force_reg (new_mode, adjust_address (src, new_mode, 0));
+      tmp = force_lowpart_subreg (int_mode, tmp, new_mode);
+      emit_move_insn (dest, force_lowpart_subreg (mode, tmp, int_mode));
+    }
+  else
+    {
+      src = force_lowpart_subreg (int_mode, src, mode);
+      emit_move_insn (adjust_address (dest, new_mode, 0),
+		      force_lowpart_subreg (new_mode, src, int_mode));
+    }
+}
+
+/* PRED is a predicate that is known to contain PTRUE.
+   For 128-bit VLS loads/stores, emit LDR/STR.
+   Else, emit an SVE predicated move from SRC to DEST.  */
 
 void
 aarch64_emit_sve_pred_move (rtx dest, rtx pred, rtx src)
 {
-  expand_operand ops[3];
   machine_mode mode = GET_MODE (dest);
-  create_output_operand (&ops[0], dest, mode);
-  create_input_operand (&ops[1], pred, GET_MODE(pred));
-  create_input_operand (&ops[2], src, mode);
-  temporary_volatile_ok v (true);
-  expand_insn (code_for_aarch64_pred_mov (mode), 3, ops);
+  if ((MEM_P (dest) || MEM_P (src))
+      && known_eq (GET_MODE_SIZE (mode), 16)
+      && aarch64_classify_vector_mode (mode) == VEC_SVE_DATA
+      && !BYTES_BIG_ENDIAN)
+    aarch64_emit_load_store_through_mode (dest, src, V16QImode);
+  else
+    {
+      expand_operand ops[3];
+      create_output_operand (&ops[0], dest, mode);
+      create_input_operand (&ops[1], pred, GET_MODE(pred));
+      create_input_operand (&ops[2], src, mode);
+      temporary_volatile_ok v (true);
+      expand_insn (code_for_aarch64_pred_mov (mode), 3, ops);
+    }
 }
 
 /* Expand a pre-RA SVE data move from SRC to DEST in which at least one
@@ -6355,7 +6727,17 @@ aarch64_split_sve_subreg_move (rtx dest, rtx ptrue, rtx src)
 static bool
 aarch64_function_ok_for_sibcall (tree, tree exp)
 {
-  if (crtl->abi->id () != expr_callee_abi (exp).id ())
+  auto from_abi = crtl->abi->id ();
+  auto to_abi = expr_callee_abi (exp).id ();
+
+  /* ARM_PCS_SVE preserves strictly more than ARM_PCS_SIMD, which in
+     turn preserves strictly more than the base PCS.  The callee must
+     preserve everything that the caller is required to preserve.  */
+  if (from_abi != to_abi && to_abi == ARM_PCS_SVE)
+    to_abi = ARM_PCS_SIMD;
+  if (from_abi != to_abi && to_abi == ARM_PCS_SIMD)
+    to_abi = ARM_PCS_AAPCS64;
+  if (from_abi != to_abi)
     return false;
 
   tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
@@ -6365,6 +6747,14 @@ aarch64_function_ok_for_sibcall (tree, tree exp)
     if (bool (aarch64_cfun_shared_flags (state))
 	!= bool (aarch64_fntype_shared_flags (fntype, state)))
       return false;
+
+  /* BTI J is needed where indirect_return functions may return
+     if bti is enabled there.  */
+  if (lookup_attribute ("indirect_return", TYPE_ATTRIBUTES (fntype))
+      && !lookup_attribute ("indirect_return",
+			    TYPE_ATTRIBUTES (TREE_TYPE (cfun->decl))))
+    return false;
+
   return true;
 }
 
@@ -7194,7 +7584,8 @@ aarch64_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
   if (arg.end_marker_p ())
     {
       rtx abi_cookie = aarch64_gen_callee_cookie (pcum->isa_mode,
-						  pcum->pcs_variant);
+						  pcum->pcs_variant,
+						  pcum->indirect_return);
       rtx sme_mode_switch_args = aarch64_finish_sme_mode_switch_args (pcum);
       rtx shared_za_flags = gen_int_mode (pcum->shared_za_flags, SImode);
       rtx shared_zt0_flags = gen_int_mode (pcum->shared_zt0_flags, SImode);
@@ -7226,11 +7617,14 @@ aarch64_init_cumulative_args (CUMULATIVE_ARGS *pcum,
     {
       pcum->pcs_variant = (arm_pcs) fntype_abi (fntype).id ();
       pcum->isa_mode = aarch64_fntype_isa_mode (fntype);
+      pcum->indirect_return = lookup_attribute ("indirect_return",
+						TYPE_ATTRIBUTES (fntype));
     }
   else
     {
       pcum->pcs_variant = ARM_PCS_AAPCS64;
       pcum->isa_mode = AARCH64_DEFAULT_ISA_MODE;
+      pcum->indirect_return = false;
     }
   pcum->aapcs_reg = NULL_RTX;
   pcum->aapcs_arg_processed = false;
@@ -8438,6 +8832,20 @@ aarch_bti_j_insn_p (rtx_insn *insn)
   return GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_BTI_J;
 }
 
+/* Return TRUE if Pointer Authentication for the return address is enabled.  */
+bool
+aarch64_pacret_enabled (void)
+{
+  return (aarch_ra_sign_scope != AARCH_FUNCTION_NONE);
+}
+
+/* Return TRUE if Guarded Control Stack is enabled.  */
+bool
+aarch64_gcs_enabled (void)
+{
+  return (aarch64_enable_gcs == 1);
+}
+
 /* Check if X (or any sub-rtx of X) is a PACIASP/PACIBSP instruction.  */
 bool
 aarch_pac_insn_p (rtx x)
@@ -9012,7 +9420,7 @@ aarch64_process_components (sbitmap components, bool prologue_p)
     {
       bool frame_related_p = aarch64_emit_cfi_for_reg_p (regno);
       machine_mode mode = aarch64_reg_save_mode (regno);
-      
+
       rtx reg = gen_rtx_REG (mode, regno);
       poly_int64 offset = frame.reg_offset[regno];
       if (frame_pointer_needed)
@@ -9149,13 +9557,16 @@ aarch64_emit_stack_tie (rtx reg)
 }
 
 /* Allocate POLY_SIZE bytes of stack space using TEMP1 and TEMP2 as scratch
-   registers.  If POLY_SIZE is not large enough to require a probe this function
-   will only adjust the stack.  When allocating the stack space
-   FRAME_RELATED_P is then used to indicate if the allocation is frame related.
-   FINAL_ADJUSTMENT_P indicates whether we are allocating the area below
-   the saved registers.  If we are then we ensure that any allocation
-   larger than the ABI defined buffer needs a probe so that the
-   invariant of having a 1KB buffer is maintained.
+   registers, given that the stack pointer is currently BYTES_BELOW_SP bytes
+   above the bottom of the static frame.
+
+   If POLY_SIZE is not large enough to require a probe this function will only
+   adjust the stack.  When allocating the stack space FRAME_RELATED_P is then
+   used to indicate if the allocation is frame related.  FINAL_ADJUSTMENT_P
+   indicates whether we are allocating the area below the saved registers.
+   If we are then we ensure that any allocation larger than the ABI defined
+   buffer needs a probe so that the invariant of having a 1KB buffer is
+   maintained.
 
    We emit barriers after each stack adjustment to prevent optimizations from
    breaking the invariant that we never drop the stack more than a page.  This
@@ -9172,6 +9583,7 @@ aarch64_emit_stack_tie (rtx reg)
 static void
 aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
 					poly_int64 poly_size,
+					poly_int64 bytes_below_sp,
 					aarch64_isa_mode force_isa_mode,
 					bool frame_related_p,
 					bool final_adjustment_p)
@@ -9235,8 +9647,8 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
 			  poly_size, temp1, temp2, force_isa_mode,
 			  false, true);
 
-      rtx_insn *insn = get_last_insn ();
-
+      auto initial_cfa_offset = frame.frame_size - bytes_below_sp;
+      auto final_cfa_offset = initial_cfa_offset + poly_size;
       if (frame_related_p)
 	{
 	  /* This is done to provide unwinding information for the stack
@@ -9246,28 +9658,31 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
 	     The tie will expand to nothing but the optimizers will not touch
 	     the instruction.  */
 	  rtx stack_ptr_copy = gen_rtx_REG (Pmode, STACK_CLASH_SVE_CFA_REGNUM);
-	  emit_move_insn (stack_ptr_copy, stack_pointer_rtx);
+	  auto *insn = emit_move_insn (stack_ptr_copy, stack_pointer_rtx);
 	  aarch64_emit_stack_tie (stack_ptr_copy);
 
 	  /* We want the CFA independent of the stack pointer for the
 	     duration of the loop.  */
-	  add_reg_note (insn, REG_CFA_DEF_CFA, stack_ptr_copy);
+	  add_reg_note (insn, REG_CFA_DEF_CFA,
+			plus_constant (Pmode, stack_ptr_copy,
+				       initial_cfa_offset));
 	  RTX_FRAME_RELATED_P (insn) = 1;
 	}
 
       rtx probe_const = gen_int_mode (min_probe_threshold, Pmode);
       rtx guard_const = gen_int_mode (guard_size, Pmode);
 
-      insn = emit_insn (gen_probe_sve_stack_clash (Pmode, stack_pointer_rtx,
-						   stack_pointer_rtx, temp1,
-						   probe_const, guard_const));
+      auto *insn
+	= emit_insn (gen_probe_sve_stack_clash (Pmode, stack_pointer_rtx,
+						stack_pointer_rtx, temp1,
+						probe_const, guard_const));
 
       /* Now reset the CFA register if needed.  */
       if (frame_related_p)
 	{
 	  add_reg_note (insn, REG_CFA_DEF_CFA,
-			gen_rtx_PLUS (Pmode, stack_pointer_rtx,
-				      gen_int_mode (poly_size, Pmode)));
+			plus_constant (Pmode, stack_pointer_rtx,
+				       final_cfa_offset));
 	  RTX_FRAME_RELATED_P (insn) = 1;
 	}
 
@@ -9313,12 +9728,13 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
 	 We can determine which allocation we are doing by looking at
 	 the value of FRAME_RELATED_P since the final allocations are not
 	 frame related.  */
+      auto cfa_offset = frame.frame_size - (bytes_below_sp - rounded_size);
       if (frame_related_p)
 	{
 	  /* We want the CFA independent of the stack pointer for the
 	     duration of the loop.  */
 	  add_reg_note (insn, REG_CFA_DEF_CFA,
-			plus_constant (Pmode, temp1, rounded_size));
+			plus_constant (Pmode, temp1, cfa_offset));
 	  RTX_FRAME_RELATED_P (insn) = 1;
 	}
 
@@ -9340,7 +9756,7 @@ aarch64_allocate_and_probe_stack_space (rtx temp1, rtx temp2,
       if (frame_related_p)
 	{
 	  add_reg_note (insn, REG_CFA_DEF_CFA,
-			plus_constant (Pmode, stack_pointer_rtx, rounded_size));
+			plus_constant (Pmode, stack_pointer_rtx, cfa_offset));
 	  RTX_FRAME_RELATED_P (insn) = 1;
 	}
 
@@ -9615,7 +10031,7 @@ aarch64_expand_prologue (void)
 	  default:
 	    gcc_unreachable ();
 	}
-      add_reg_note (insn, REG_CFA_TOGGLE_RA_MANGLE, const0_rtx);
+      add_reg_note (insn, REG_CFA_NEGATE_RA_STATE, const0_rtx);
       RTX_FRAME_RELATED_P (insn) = 1;
     }
 
@@ -9648,17 +10064,22 @@ aarch64_expand_prologue (void)
      code below does not handle it for -fstack-clash-protection.  */
   gcc_assert (known_eq (initial_adjust, 0) || callee_adjust == 0);
 
+  /* The offset of the current SP from the bottom of the static frame.  */
+  poly_int64 bytes_below_sp = frame_size;
+
   /* Will only probe if the initial adjustment is larger than the guard
      less the amount of the guard reserved for use by the caller's
      outgoing args.  */
   aarch64_allocate_and_probe_stack_space (tmp0_rtx, tmp1_rtx, initial_adjust,
-					  force_isa_mode, true, false);
+					  bytes_below_sp, force_isa_mode,
+					  true, false);
+  bytes_below_sp -= initial_adjust;
 
   if (callee_adjust != 0)
-    aarch64_push_regs (reg1, reg2, callee_adjust);
-
-  /* The offset of the current SP from the bottom of the static frame.  */
-  poly_int64 bytes_below_sp = frame_size - initial_adjust - callee_adjust;
+    {
+      aarch64_push_regs (reg1, reg2, callee_adjust);
+      bytes_below_sp -= callee_adjust;
+    }
 
   if (emit_frame_chain)
     {
@@ -9726,7 +10147,7 @@ aarch64_expand_prologue (void)
 		  || known_eq (frame.reg_offset[VG_REGNUM], bytes_below_sp));
       aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx,
 					      sve_callee_adjust,
-					      force_isa_mode,
+					      bytes_below_sp, force_isa_mode,
 					      !frame_pointer_needed, false);
       bytes_below_sp -= sve_callee_adjust;
     }
@@ -9737,10 +10158,11 @@ aarch64_expand_prologue (void)
 
   /* We may need to probe the final adjustment if it is larger than the guard
      that is assumed by the called.  */
-  gcc_assert (known_eq (bytes_below_sp, final_adjust));
   aarch64_allocate_and_probe_stack_space (tmp1_rtx, tmp0_rtx, final_adjust,
-					  force_isa_mode,
+					  bytes_below_sp, force_isa_mode,
 					  !frame_pointer_needed, true);
+  bytes_below_sp -= final_adjust;
+  gcc_assert (known_eq (bytes_below_sp, 0));
   if (emit_frame_chain && maybe_ne (final_adjust, 0))
     aarch64_emit_stack_tie (hard_frame_pointer_rtx);
 
@@ -10018,12 +10440,12 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall)
 	1) Sibcalls don't return in a normal way, so if we're about to call one
 	   we must authenticate.
 
-	2) The RETAA instruction is not available before ARMv8.3-A, so if we are
-	   generating code for !TARGET_ARMV8_3 we can't use it and must
+	2) The RETAA instruction is not available without FEAT_PAuth, so if we
+	   are generating code for !TARGET_PAUTH we can't use it and must
 	   explicitly authenticate.
     */
   if (aarch64_return_address_signing_enabled ()
-      && (sibcall || !TARGET_ARMV8_3))
+      && (sibcall || !TARGET_PAUTH))
     {
       switch (aarch64_ra_sign_key)
 	{
@@ -10036,7 +10458,7 @@ aarch64_expand_epilogue (rtx_call_insn *sibcall)
 	  default:
 	    gcc_unreachable ();
 	}
-      add_reg_note (insn, REG_CFA_TOGGLE_RA_MANGLE, const0_rtx);
+      add_reg_note (insn, REG_CFA_NEGATE_RA_STATE, const0_rtx);
       RTX_FRAME_RELATED_P (insn) = 1;
     }
 
@@ -10126,7 +10548,9 @@ aarch64_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
   funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
   auto isa_mode = aarch64_fntype_isa_mode (TREE_TYPE (function));
   auto pcs_variant = arm_pcs (fndecl_abi (function).id ());
-  rtx callee_abi = aarch64_gen_callee_cookie (isa_mode, pcs_variant);
+  bool ir = lookup_attribute ("indirect_return",
+			      TYPE_ATTRIBUTES (TREE_TYPE (function)));
+  rtx callee_abi = aarch64_gen_callee_cookie (isa_mode, pcs_variant, ir);
   insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, callee_abi));
   SIBLING_CALL_P (insn) = 1;
 
@@ -10197,7 +10621,7 @@ aarch64_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
 /* Implement TARGET_CASE_VALUES_THRESHOLD.
    The expansion for a table switch is quite expensive due to the number
    of instructions, the table lookup and hard to predict indirect jump.
-   When optimizing for speed, and -O3 enabled, use the per-core tuning if 
+   When optimizing for speed, and -O3 enabled, use the per-core tuning if
    set, otherwise use tables for >= 11 cases as a tradeoff between size and
    performance.  When optimizing for size, use 8 for smallest codesize.  */
 
@@ -10388,7 +10812,8 @@ aarch64_classify_index (struct aarch64_address_info *info, rtx x,
       && contains_reg_of_mode[GENERAL_REGS][GET_MODE (SUBREG_REG (index))])
     index = SUBREG_REG (index);
 
-  if (aarch64_sve_data_mode_p (mode) || mode == VNx1TImode)
+  auto vec_flags = aarch64_classify_vector_memory_mode (mode);
+  if (vec_flags & VEC_SVE_DATA)
     {
       if (type != ADDRESS_REG_REG
 	  || (1 << shift) != GET_MODE_UNIT_SIZE (mode))
@@ -10459,10 +10884,10 @@ aarch64_classify_address (struct aarch64_address_info *info,
      Partial vectors like VNx8QImode allow the same indexed addressing
      mode and MUL VL addressing mode as full vectors like VNx16QImode;
      in both cases, MUL VL counts multiples of GET_MODE_SIZE.  */
-  unsigned int vec_flags = aarch64_classify_vector_mode (mode);
+  unsigned int vec_flags = aarch64_classify_vector_memory_mode (mode);
   vec_flags &= ~VEC_PARTIAL;
 
-  /* On BE, we use load/store pair for all large int mode load/stores.
+  /* We use load/store pair for all large int mode load/stores.
      TI/TF/TDmode may also use a load/store pair.  */
   bool advsimd_struct_p = (vec_flags == (VEC_ADVSIMD | VEC_STRUCT));
   bool load_store_pair_p = (type == ADDR_QUERY_LDP_STP
@@ -10470,8 +10895,7 @@ aarch64_classify_address (struct aarch64_address_info *info,
 			    || mode == TImode
 			    || mode == TFmode
 			    || mode == TDmode
-			    || ((!TARGET_SIMD || BYTES_BIG_ENDIAN)
-				&& advsimd_struct_p));
+			    || advsimd_struct_p);
   /* If we are dealing with ADDR_QUERY_LDP_STP_N that means the incoming mode
      corresponds to the actual size of the memory being loaded/stored and the
      mode of the corresponding addressing mode is half of that.  */
@@ -10495,8 +10919,7 @@ aarch64_classify_address (struct aarch64_address_info *info,
 			    && ((vec_flags == 0
 				 && known_lt (GET_MODE_SIZE (mode), 16))
 				|| vec_flags == VEC_ADVSIMD
-				|| vec_flags & VEC_SVE_DATA
-				|| mode == VNx1TImode));
+				|| vec_flags & VEC_SVE_DATA));
 
   /* For SVE, only accept [Rn], [Rn, #offset, MUL VL] and [Rn, Rm, LSL #shift].
      The latter is not valid for SVE predicates, and that's rejected through
@@ -10505,14 +10928,6 @@ aarch64_classify_address (struct aarch64_address_info *info,
       && (code != REG && code != PLUS))
     return false;
 
-  /* On LE, for AdvSIMD, don't support anything other than POST_INC or
-     REG addressing.  */
-  if (advsimd_struct_p
-      && TARGET_SIMD
-      && !BYTES_BIG_ENDIAN
-      && (code != POST_INC && code != REG))
-    return false;
-
   gcc_checking_assert (GET_MODE (x) == VOIDmode
 		       || SCALAR_INT_MODE_P (GET_MODE (x)));
 
@@ -10615,7 +11030,7 @@ aarch64_classify_address (struct aarch64_address_info *info,
 	  /* Make "m" use the LD1 offset range for SVE data modes, so
 	     that pre-RTL optimizers like ivopts will work to that
 	     instead of the wider LDR/STR range.  */
-	  if (vec_flags == VEC_SVE_DATA || mode == VNx1TImode)
+	  if (vec_flags == VEC_SVE_DATA)
 	    return (type == ADDR_QUERY_M
 		    ? offset_4bit_signed_scaled_p (mode, offset)
 		    : offset_9bit_signed_scaled_p (mode, offset));
@@ -11048,9 +11463,36 @@ aarch64_can_const_movi_rtx_p (rtx x, machine_mode mode)
   vmode = aarch64_simd_container_mode (imode, width);
   rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, ival);
 
-  return aarch64_simd_valid_immediate (v_op, NULL);
+  return aarch64_simd_valid_mov_imm (v_op);
 }
 
+/* Return TRUE if DST and SRC with mode MODE is a valid fp move.  */
+bool
+aarch64_valid_fp_move (rtx dst, rtx src, machine_mode mode)
+{
+  if (!TARGET_FLOAT)
+    return false;
+
+  if (aarch64_reg_or_fp_zero (src, mode))
+    return true;
+
+  if (!register_operand (dst, mode))
+    return false;
+
+  if (MEM_P (src))
+    return true;
+
+  if (aarch64_can_const_movi_rtx_p (src, mode)
+      || aarch64_float_const_representable_p (src)
+      || aarch64_float_const_zero_rtx_p (src))
+    return true;
+
+  /* Block FP immediates which are split during expand.  */
+  if (aarch64_float_const_rtx_p (src))
+    return false;
+
+  return can_create_pseudo_p ();
+}
 
 /* Return the fixed registers used for condition codes.  */
 
@@ -11933,7 +12375,7 @@ sizetochar (int size)
     case 64: return 'd';
     case 32: return 's';
     case 16: return 'h';
-    case 8 : return 'b';
+    case 8:  return 'b';
     default: gcc_unreachable ();
     }
 }
@@ -12515,7 +12957,7 @@ aarch64_print_address_internal (FILE *f, machine_mode mode, rtx x,
 	    return true;
 	  }
 
-	vec_flags = aarch64_classify_vector_mode (mode);
+	vec_flags = aarch64_classify_vector_memory_mode (mode);
 	if ((vec_flags & VEC_ANY_SVE) && !load_store_pair_p)
 	  {
 	    HOST_WIDE_INT vnum
@@ -12829,7 +13271,7 @@ aarch64_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
   unsigned int vec_flags = aarch64_classify_vector_mode (mode);
   if (reg_class_subset_p (rclass, FP_REGS)
       && !((REG_P (x) && HARD_REGISTER_P (x))
-	   || aarch64_simd_valid_immediate (x, NULL))
+	   || aarch64_simd_valid_mov_imm (x))
       && mode != VNx16QImode
       && (vec_flags & VEC_SVE_DATA)
       && ((vec_flags & VEC_PARTIAL) || BYTES_BIG_ENDIAN))
@@ -13078,7 +13520,7 @@ aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
     case PR_REGS:
     case PR_LO_REGS:
     case PR_HI_REGS:
-      return mode == VNx32BImode ? 2 : 1;
+      return mode == VNx64BImode ? 4 : mode == VNx32BImode ? 2 : 1;
 
     case MOVEABLE_SYSREGS:
     case STACK_REG:
@@ -14193,7 +14635,7 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
               /* BFM.  */
 	      if (speed)
 		*cost += extra_cost->alu.bfi;
-	      *cost += rtx_cost (op1, VOIDmode, (enum rtx_code) code, 1, speed);
+	      *cost += rtx_cost (op1, VOIDmode, code, 1, speed);
             }
 
 	  return true;
@@ -14219,6 +14661,13 @@ aarch64_rtx_costs (rtx x, machine_mode mode, int outer ATTRIBUTE_UNUSED,
 	 we don't need to consider that here.  */
       if (x == const0_rtx)
 	*cost = 0;
+      /* If the outer is a COMPARE which is used by the middle-end
+	 and the constant fits how the cmp instruction allows, say the cost
+	 is the same as 1 insn.  */
+      else if (outer == COMPARE
+	       && (aarch64_uimm12_shift (INTVAL (x))
+		   || aarch64_uimm12_shift (-UINTVAL (x))))
+	*cost = COSTS_N_INSNS (1);
       else
 	{
 	  /* To an approximation, building any other constant is
@@ -14573,8 +15022,7 @@ cost_minus:
 	      *cost += extra_cost->alu.extend_arith;
 
 	    op1 = aarch64_strip_extend (op1, true);
-	    *cost += rtx_cost (op1, VOIDmode,
-			       (enum rtx_code) GET_CODE (op1), 0, speed);
+	    *cost += rtx_cost (op1, VOIDmode, GET_CODE (op1), 0, speed);
 	    return true;
 	  }
 
@@ -14585,9 +15033,7 @@ cost_minus:
 	     || aarch64_shift_p (GET_CODE (new_op1)))
 	    && code != COMPARE)
 	  {
-	    *cost += aarch64_rtx_mult_cost (new_op1, MULT,
-					    (enum rtx_code) code,
-					    speed);
+	    *cost += aarch64_rtx_mult_cost (new_op1, MULT, code, speed);
 	    return true;
 	  }
 
@@ -14688,8 +15134,7 @@ cost_plus:
 	      *cost += extra_cost->alu.extend_arith;
 
 	    op0 = aarch64_strip_extend (op0, true);
-	    *cost += rtx_cost (op0, VOIDmode,
-			       (enum rtx_code) GET_CODE (op0), 0, speed);
+	    *cost += rtx_cost (op0, VOIDmode, GET_CODE (op0), 0, speed);
 	    return true;
 	  }
 
@@ -14803,8 +15248,7 @@ cost_plus:
 		  && aarch64_mask_and_shift_for_ubfiz_p (int_mode, op1,
 							 XEXP (op0, 1)))
 		{
-		  *cost += rtx_cost (XEXP (op0, 0), int_mode,
-				     (enum rtx_code) code, 0, speed);
+		  *cost += rtx_cost (XEXP (op0, 0), int_mode, code, 0, speed);
 		  if (speed)
 		    *cost += extra_cost->alu.bfx;
 
@@ -14814,8 +15258,7 @@ cost_plus:
 		{
 		/* We possibly get the immediate for free, this is not
 		   modelled.  */
-		  *cost += rtx_cost (op0, int_mode,
-				     (enum rtx_code) code, 0, speed);
+		  *cost += rtx_cost (op0, int_mode, code, 0, speed);
 		  if (speed)
 		    *cost += extra_cost->alu.logical;
 
@@ -14850,10 +15293,8 @@ cost_plus:
 		}
 
 	      /* In both cases we want to cost both operands.  */
-	      *cost += rtx_cost (new_op0, int_mode, (enum rtx_code) code,
-				 0, speed);
-	      *cost += rtx_cost (op1, int_mode, (enum rtx_code) code,
-				 1, speed);
+	      *cost += rtx_cost (new_op0, int_mode, code, 0, speed);
+	      *cost += rtx_cost (op1, int_mode, code, 1, speed);
 
 	      return true;
 	    }
@@ -14874,7 +15315,7 @@ cost_plus:
       /* MVN-shifted-reg.  */
       if (op0 != x)
         {
-	  *cost += rtx_cost (op0, mode, (enum rtx_code) code, 0, speed);
+	  *cost += rtx_cost (op0, mode, code, 0, speed);
 
           if (speed)
             *cost += extra_cost->alu.log_shift;
@@ -14890,7 +15331,7 @@ cost_plus:
           rtx newop1 = XEXP (op0, 1);
           rtx op0_stripped = aarch64_strip_shift (newop0);
 
-	  *cost += rtx_cost (newop1, mode, (enum rtx_code) code, 1, speed);
+	  *cost += rtx_cost (newop1, mode, code, 1, speed);
 	  *cost += rtx_cost (op0_stripped, mode, XOR, 0, speed);
 
           if (speed)
@@ -15056,7 +15497,7 @@ cost_plus:
 		  && known_eq (INTVAL (XEXP (op1, 1)),
 			       GET_MODE_BITSIZE (mode) - 1))
 		{
-		  *cost += rtx_cost (op0, mode, (rtx_code) code, 0, speed);
+		  *cost += rtx_cost (op0, mode, code, 0, speed);
 		  /* We already demanded XEXP (op1, 0) to be REG_P, so
 		     don't recurse into it.  */
 		  return true;
@@ -15119,7 +15560,7 @@ cost_plus:
 
       /* We can trust that the immediates used will be correct (there
 	 are no by-register forms), so we need only cost op0.  */
-      *cost += rtx_cost (XEXP (x, 0), VOIDmode, (enum rtx_code) code, 0, speed);
+      *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed);
       return true;
 
     case MULT:
@@ -15309,12 +15750,11 @@ cost_plus:
 	       && aarch64_vec_fpconst_pow_of_2 (XEXP (x, 1)) > 0)
 	      || aarch64_fpconst_pow_of_2 (XEXP (x, 1)) > 0))
 	{
-	  *cost += rtx_cost (XEXP (x, 0), VOIDmode, (rtx_code) code,
-			     0, speed);
+	  *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed);
 	  return true;
 	}
 
-      *cost += rtx_cost (x, VOIDmode, (enum rtx_code) code, 0, speed);
+      *cost += rtx_cost (x, VOIDmode, code, 0, speed);
       return true;
 
     case ABS:
@@ -15415,7 +15855,7 @@ cost_plus:
     case CONST_VECTOR:
 	{
 	  /* Load using MOVI/MVNI.  */
-	  if (aarch64_simd_valid_immediate (x, NULL))
+	  if (aarch64_simd_valid_mov_imm (x))
 	    *cost = extra_cost->vect.movi;
 	  else /* Load using constant pool.  */
 	    *cost = extra_cost->ldst.load;
@@ -15501,6 +15941,12 @@ aarch64_register_move_cost (machine_mode mode,
 				reg_class_contents[FFR_REGS]))
     return 80;
 
+  /* Moves to/from sysregs are expensive, and must go via GPR.  */
+  if (from == MOVEABLE_SYSREGS)
+    return 80 + aarch64_register_move_cost (mode, GENERAL_REGS, to);
+  if (to == MOVEABLE_SYSREGS)
+    return 80 + aarch64_register_move_cost (mode, from, GENERAL_REGS);
+
   /* Moving between GPR and stack cost is the same as GP2GP.  */
   if ((from == GENERAL_REGS && to == STACK_REG)
       || (to == GENERAL_REGS && from == STACK_REG))
@@ -15573,9 +16019,128 @@ aarch64_memory_move_cost (machine_mode mode, reg_class_t rclass_i, bool in)
 	    ? aarch64_tune_params.memmov_cost.load_fp
 	    : aarch64_tune_params.memmov_cost.store_fp);
 
+  /* If the move needs to go through GPRs, add the cost of doing that.  */
+  int base = 0;
+  if (rclass_i == MOVEABLE_SYSREGS)
+    base += (in
+	     ? aarch64_register_move_cost (DImode, GENERAL_REGS, rclass_i)
+	     : aarch64_register_move_cost (DImode, rclass_i, GENERAL_REGS));
+
   return (in
-	  ? aarch64_tune_params.memmov_cost.load_int
-	  : aarch64_tune_params.memmov_cost.store_int);
+	  ? base + aarch64_tune_params.memmov_cost.load_int
+	  : base + aarch64_tune_params.memmov_cost.store_int);
+}
+
+/* CALLEE_SAVED_REGS is the set of callee-saved registers that the
+   RA has already decided to use.  Return the total number of registers
+   in class RCLASS that need to be saved and restored, including the
+   frame link registers.  */
+static int
+aarch64_count_saves (const HARD_REG_SET &callee_saved_regs, reg_class rclass)
+{
+  auto saved_gprs = callee_saved_regs & reg_class_contents[rclass];
+  auto nregs = hard_reg_set_popcount (saved_gprs);
+
+  if (TEST_HARD_REG_BIT (reg_class_contents[rclass], LR_REGNUM))
+    {
+      if (aarch64_needs_frame_chain ())
+	nregs += 2;
+      else if (!crtl->is_leaf || df_regs_ever_live_p (LR_REGNUM))
+	nregs += 1;
+    }
+  return nregs;
+}
+
+/* CALLEE_SAVED_REGS is the set of callee-saved registers that the
+   RA has already decided to use.  Return the total number of registers
+   that need to be saved above the hard frame pointer, including the
+   frame link registers.  */
+static int
+aarch64_count_above_hard_fp_saves (const HARD_REG_SET &callee_saved_regs)
+{
+  /* FP and Advanced SIMD registers are saved above the frame pointer
+     but SVE registers are saved below it.  */
+  if (known_le (GET_MODE_SIZE (aarch64_reg_save_mode (V8_REGNUM)), 16U))
+    return aarch64_count_saves (callee_saved_regs, POINTER_AND_FP_REGS);
+  return aarch64_count_saves (callee_saved_regs, POINTER_REGS);
+}
+
+/* Implement TARGET_CALLEE_SAVE_COST.  */
+static int
+aarch64_callee_save_cost (spill_cost_type spill_type, unsigned int regno,
+			  machine_mode mode, unsigned int nregs, int mem_cost,
+			  const HARD_REG_SET &callee_saved_regs,
+			  bool existing_spill_p)
+{
+  /* If we've already committed to saving an odd number of GPRs, assume that
+     saving one more will involve turning an STR into an STP and an LDR
+     into an LDP.  This should still be more expensive than not spilling
+     (meaning that the minimum cost is 1), but it should usually be cheaper
+     than a separate store or load.  */
+  if (GP_REGNUM_P (regno)
+      && nregs == 1
+      && (aarch64_count_saves (callee_saved_regs, GENERAL_REGS) & 1))
+    return 1;
+
+  /* Similarly for saving FP registers, if we only need to save the low
+     64 bits.  (We can also use STP/LDP instead of STR/LDR for Q registers,
+     but that is less likely to be a saving.)  */
+  if (FP_REGNUM_P (regno)
+      && nregs == 1
+      && known_eq (GET_MODE_SIZE (aarch64_reg_save_mode (regno)), 8U)
+      && (aarch64_count_saves (callee_saved_regs, FP_REGS) & 1))
+    return 1;
+
+  /* If this would be the first register that we save, add the cost of
+     allocating or deallocating the frame.  For GPR, FPR, and Advanced SIMD
+     saves, the allocation and deallocation can be folded into the save and
+     restore.  */
+  if (!existing_spill_p
+      && !GP_REGNUM_P (regno)
+      && !(FP_REGNUM_P (regno)
+	   && known_le (GET_MODE_SIZE (aarch64_reg_save_mode (regno)), 16U)))
+    return default_callee_save_cost (spill_type, regno, mode, nregs, mem_cost,
+				     callee_saved_regs, existing_spill_p);
+
+  return mem_cost;
+}
+
+/* Implement TARGET_FRAME_ALLOCATION_COST.  */
+static int
+aarch64_frame_allocation_cost (frame_cost_type,
+			       const HARD_REG_SET &callee_saved_regs)
+{
+  /* The intention is to model the relative costs of different approaches
+     to storing data on the stack, rather than to model the cost of saving
+     data vs not saving it.  This means that we should return 0 if:
+
+     - any frame is going to be allocated with:
+
+	  stp x29, x30, [sp, #-...]!
+
+       to create a frame link.
+
+     - any frame is going to be allocated with:
+
+	  str x30, [sp, #-...]!
+
+       to save the link register.
+
+     In both cases, the allocation and deallocation instructions are the
+     same however we store data to the stack.  (In the second case, the STR
+     could be converted to an STP by saving an extra call-preserved register,
+     but that is modeled by aarch64_callee_save_cost.)
+
+     In other cases, assume that a frame would need to be allocated with a
+     separate subtraction and deallocated with a separate addition.  Saves
+     of call-clobbered registers can then reclaim this cost using a
+     predecrement store and a postincrement load.
+
+     For simplicity, give this addition or subtraction the same cost as
+     a GPR move.  We could parameterize this if necessary.  */
+  if (aarch64_count_above_hard_fp_saves (callee_saved_regs) == 0)
+    return aarch64_tune_params.regmove_cost->GP2GP;
+  return 0;
 }
 
 /* Implement TARGET_INSN_COST.  We have the opportunity to do something
@@ -15597,7 +16162,24 @@ aarch64_insn_cost (rtx_insn *insn, bool speed)
 {
   if (rtx set = single_set (insn))
     return set_rtx_cost (set, speed);
-  return pattern_cost (PATTERN (insn), speed);
+
+  /* If the instruction does multiple sets in parallel, use the cost
+     of the most expensive set.  This copes with instructions that set
+     the flags to a useful value as a side effect.  */
+  rtx pat = PATTERN (insn);
+  if (GET_CODE (pat) == PARALLEL)
+    {
+      int max_cost = 0;
+      for (int i = 0; i < XVECLEN (pat, 0); ++i)
+	{
+	  rtx x = XVECEXP (pat, 0, i);
+	  if (GET_CODE (x) == SET)
+	    max_cost = std::max (max_cost, set_rtx_cost (x, speed));
+	}
+      return max_cost;
+    }
+
+  return pattern_cost (pat, speed);
 }
 
 /* Implement TARGET_INIT_BUILTINS.  */
@@ -15928,6 +16510,45 @@ aarch64_emit_approx_div (rtx quo, rtx num, rtx den)
   return true;
 }
 
+/* Emit an optimized sequence to perform a vector rotate
+   of REG by the vector constant amount AMNT_VEC and place the result
+   in DST.  Return true iff successful.  */
+
+bool
+aarch64_emit_opt_vec_rotate (rtx dst, rtx reg, rtx amnt_vec)
+{
+  rtx amnt = unwrap_const_vec_duplicate (amnt_vec);
+  gcc_assert (CONST_INT_P (amnt));
+  HOST_WIDE_INT rotamnt = UINTVAL (amnt);
+  machine_mode mode = GET_MODE (reg);
+  /* Don't end up here after reload.  */
+  gcc_assert (can_create_pseudo_p ());
+  /* Rotates by half the element width map down to REV* instructions and should
+     always be preferred when possible.  */
+  if (rotamnt == GET_MODE_UNIT_BITSIZE (mode) / 2
+      && expand_rotate_as_vec_perm (mode, dst, reg, amnt))
+    return true;
+  /* 64 and 128-bit vector modes can use the XAR instruction
+     when available.  */
+  else if ((TARGET_SHA3 && mode == V2DImode)
+	   || (TARGET_SVE2
+	       && (known_eq (GET_MODE_SIZE (mode), 8)
+		   || known_eq (GET_MODE_SIZE (mode), 16))))
+    {
+      rtx zeroes = aarch64_gen_shareable_zero (mode);
+      rtx xar_op
+	= gen_rtx_ROTATE (mode, gen_rtx_XOR (mode, reg, zeroes),
+						amnt_vec);
+      emit_set_insn (dst, xar_op);
+      return true;
+    }
+  /* If none of the above, try to expand rotates by any byte amount as
+     permutes.  */
+  else if (expand_rotate_as_vec_perm (mode, dst, reg, amnt))
+    return true;
+  return false;
+}
+
 /* Return the number of instructions that can be issued per cycle.  */
 static int
 aarch64_sched_issue_rate (void)
@@ -16214,7 +16835,7 @@ public:
 private:
   void record_potential_advsimd_unrolling (loop_vec_info);
   void analyze_loop_vinfo (loop_vec_info);
-  void count_ops (unsigned int, vect_cost_for_stmt, stmt_vec_info,
+  void count_ops (unsigned int, vect_cost_for_stmt, stmt_vec_info, slp_tree,
 		  aarch64_vec_op_count *);
   fractional_cost adjust_body_cost_sve (const aarch64_vec_op_count *,
 					fractional_cost, unsigned int,
@@ -16343,16 +16964,6 @@ aarch64_vectorize_create_costs (vec_info *vinfo, bool costing_for_scalar)
   return new aarch64_vector_costs (vinfo, costing_for_scalar);
 }
 
-/* Return true if the current CPU should use the new costs defined
-   in GCC 11.  This should be removed for GCC 12 and above, with the
-   costs applying to all CPUs instead.  */
-static bool
-aarch64_use_new_vector_costs_p ()
-{
-  return (aarch64_tune_params.extra_tuning_flags
-	  & AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS);
-}
-
 /* Return the appropriate SIMD costs for vectors of type VECTYPE.  */
 static const simd_vec_cost *
 aarch64_simd_vec_costs (tree vectype)
@@ -16531,11 +17142,13 @@ aarch64_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
     }
 }
 
-/* Return true if an access of kind KIND for STMT_INFO represents one
-   vector of an LD[234] or ST[234] operation.  Return the total number of
-   vectors (2, 3 or 4) if so, otherwise return a value outside that range.  */
+/* Return true if an access of kind KIND for STMT_INFO (or NODE if SLP)
+   represents one vector of an LD[234] or ST[234] operation.  Return the total
+   number of vectors (2, 3 or 4) if so, otherwise return a value outside that
+   range.  */
 static int
-aarch64_ld234_st234_vectors (vect_cost_for_stmt kind, stmt_vec_info stmt_info)
+aarch64_ld234_st234_vectors (vect_cost_for_stmt kind, stmt_vec_info stmt_info,
+			     slp_tree node)
 {
   if ((kind == vector_load
        || kind == unaligned_load
@@ -16545,7 +17158,7 @@ aarch64_ld234_st234_vectors (vect_cost_for_stmt kind, stmt_vec_info stmt_info)
     {
       stmt_info = DR_GROUP_FIRST_ELEMENT (stmt_info);
       if (stmt_info
-	  && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_LOAD_STORE_LANES)
+	  && vect_mem_access_type (stmt_info, node) == VMAT_LOAD_STORE_LANES)
 	return DR_GROUP_SIZE (stmt_info);
     }
   return 0;
@@ -16783,14 +17396,15 @@ aarch64_detect_scalar_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind,
 }
 
 /* STMT_COST is the cost calculated by aarch64_builtin_vectorization_cost
-   for the vectorized form of STMT_INFO, which has cost kind KIND and which
-   when vectorized would operate on vector type VECTYPE.  Try to subdivide
-   the target-independent categorization provided by KIND to get a more
-   accurate cost.  WHERE specifies where the cost associated with KIND
-   occurs.  */
+   for the vectorized form of STMT_INFO possibly using SLP node NODE, which has
+   cost kind KIND and which when vectorized would operate on vector type
+   VECTYPE.  Try to subdivide the target-independent categorization provided by
+   KIND to get a more accurate cost.  WHERE specifies where the cost associated
+   with KIND occurs.  */
 static fractional_cost
 aarch64_detect_vector_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind,
-				    stmt_vec_info stmt_info, tree vectype,
+				    stmt_vec_info stmt_info, slp_tree node,
+				    tree vectype,
 				    enum vect_cost_model_location where,
 				    fractional_cost stmt_cost)
 {
@@ -16816,10 +17430,11 @@ aarch64_detect_vector_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind,
      cost by the number of elements in the vector.  */
   if (kind == scalar_load
       && sve_costs
-      && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_GATHER_SCATTER)
+      && vect_mem_access_type (stmt_info, node) == VMAT_GATHER_SCATTER)
     {
       unsigned int nunits = vect_nunits_for_cost (vectype);
-      if (GET_MODE_UNIT_BITSIZE (TYPE_MODE (vectype)) == 64)
+      /* Test for VNx2 modes, which have 64-bit containers.  */
+      if (known_eq (GET_MODE_NUNITS (TYPE_MODE (vectype)), aarch64_sve_vg))
 	return { sve_costs->gather_load_x64_cost, nunits };
       return { sve_costs->gather_load_x32_cost, nunits };
     }
@@ -16828,7 +17443,7 @@ aarch64_detect_vector_stmt_subtype (vec_info *vinfo, vect_cost_for_stmt kind,
      in a scatter operation.  */
   if (kind == scalar_store
       && sve_costs
-      && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_GATHER_SCATTER)
+      && vect_mem_access_type (stmt_info, node) == VMAT_GATHER_SCATTER)
     return sve_costs->scatter_store_elt_cost;
 
   /* Detect cases in which vec_to_scalar represents an in-loop reduction.  */
@@ -16952,7 +17567,7 @@ aarch64_sve_adjust_stmt_cost (class vec_info *vinfo, vect_cost_for_stmt kind,
    cost of any embedded operations.  */
 static fractional_cost
 aarch64_adjust_stmt_cost (vec_info *vinfo, vect_cost_for_stmt kind,
-			  stmt_vec_info stmt_info, tree vectype,
+			  stmt_vec_info stmt_info, slp_tree node, tree vectype,
 			  unsigned vec_flags, fractional_cost stmt_cost)
 {
   if (vectype)
@@ -16961,7 +17576,7 @@ aarch64_adjust_stmt_cost (vec_info *vinfo, vect_cost_for_stmt kind,
 
       /* Detect cases in which a vector load or store represents an
 	 LD[234] or ST[234] instruction.  */
-      switch (aarch64_ld234_st234_vectors (kind, stmt_info))
+      switch (aarch64_ld234_st234_vectors (kind, stmt_info, node))
 	{
 	case 2:
 	  stmt_cost += simd_costs->ld2_st2_permute_cost;
@@ -17033,7 +17648,7 @@ aarch64_force_single_cycle (vec_info *vinfo, stmt_vec_info stmt_info)
    information relating to the vector operation in OPS.  */
 void
 aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind,
-				 stmt_vec_info stmt_info,
+				 stmt_vec_info stmt_info, slp_tree node,
 				 aarch64_vec_op_count *ops)
 {
   const aarch64_base_vec_issue_info *base_issue = ops->base_issue_info ();
@@ -17044,7 +17659,8 @@ aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind,
 
   /* Calculate the minimum cycles per iteration imposed by a reduction
      operation.  */
-  if ((kind == scalar_stmt || kind == vector_stmt || kind == vec_to_scalar)
+  if (stmt_info
+      && (kind == scalar_stmt || kind == vector_stmt || kind == vec_to_scalar)
       && vect_is_reduction (stmt_info))
     {
       unsigned int base
@@ -17070,6 +17686,48 @@ aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind,
 	return;
     }
 
+  /* Detect the case where we are using an emulated gather/scatter.  When a
+     target does not support gathers and scatters directly the vectorizer
+     emulates these by constructing an index vector and then issuing an
+     extraction for every lane in the vector.  If the index vector is loaded
+     from memory, the vector load and extractions are subsequently lowered by
+     veclower into a series of scalar index loads.  After the final loads are
+     done it issues a vec_construct to recreate the vector from the scalar.  For
+     costing when we see a vec_to_scalar on a stmt with VMAT_GATHER_SCATTER we
+     are dealing with an emulated instruction and should adjust costing
+     properly.  */
+  if (stmt_info
+      && kind == vec_to_scalar
+      && (m_vec_flags & VEC_ADVSIMD)
+      && vect_mem_access_type (stmt_info, node) == VMAT_GATHER_SCATTER)
+    {
+      auto dr = STMT_VINFO_DATA_REF (stmt_info);
+      tree dr_ref = DR_REF (dr);
+      while (handled_component_p (dr_ref))
+	{
+	  if (TREE_CODE (dr_ref) == ARRAY_REF)
+	    {
+	      tree offset = TREE_OPERAND (dr_ref, 1);
+	      if (SSA_VAR_P (offset))
+		{
+		  if (gimple_vuse (SSA_NAME_DEF_STMT (offset)))
+		    {
+		      if (STMT_VINFO_TYPE (stmt_info) == load_vec_info_type)
+			ops->loads += count - 1;
+		      else
+			  /* Stores want to count both the index to array and data to
+			     array using vec_to_scalar.  However we have index stores
+			     in Adv.SIMD and so we only want to adjust the index
+			     loads.  */
+			ops->loads += count / 2;
+		      return;
+		    }
+		  break;
+		}
+	    }
+	  dr_ref = TREE_OPERAND (dr_ref, 0);
+	}
+    }
 
   /* Count the basic operation cost associated with KIND.  */
   switch (kind)
@@ -17095,7 +17753,8 @@ aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind,
     case vector_load:
     case unaligned_load:
       ops->loads += count;
-      if (m_vec_flags || FLOAT_TYPE_P (aarch64_dr_type (stmt_info)))
+      if (m_vec_flags
+	  || (stmt_info && FLOAT_TYPE_P (aarch64_dr_type (stmt_info))))
 	ops->general_ops += base_issue->fp_simd_load_general_ops * count;
       break;
 
@@ -17103,24 +17762,29 @@ aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind,
     case unaligned_store:
     case scalar_store:
       ops->stores += count;
-      if (m_vec_flags || FLOAT_TYPE_P (aarch64_dr_type (stmt_info)))
+      if (m_vec_flags
+	  || (stmt_info && FLOAT_TYPE_P (aarch64_dr_type (stmt_info))))
 	ops->general_ops += base_issue->fp_simd_store_general_ops * count;
       break;
     }
 
   /* Add any embedded comparison operations.  */
-  if ((kind == scalar_stmt || kind == vector_stmt || kind == vec_to_scalar)
+  if (stmt_info
+      && (kind == scalar_stmt || kind == vector_stmt || kind == vec_to_scalar)
       && vect_embedded_comparison_type (stmt_info))
     ops->general_ops += count;
 
   /* COND_REDUCTIONS need two sets of VEC_COND_EXPRs, whereas so far we
      have only accounted for one.  */
-  if ((kind == vector_stmt || kind == vec_to_scalar)
+  if (stmt_info
+      && (kind == vector_stmt || kind == vec_to_scalar)
       && vect_reduc_type (m_vinfo, stmt_info) == COND_REDUCTION)
     ops->general_ops += count;
 
   /* Count the predicate operations needed by an SVE comparison.  */
-  if (sve_issue && (kind == vector_stmt || kind == vec_to_scalar))
+  if (stmt_info
+      && sve_issue
+      && (kind == vector_stmt || kind == vec_to_scalar))
     if (tree type = vect_comparison_type (stmt_info))
       {
 	unsigned int base = (FLOAT_TYPE_P (type)
@@ -17130,8 +17794,8 @@ aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind,
       }
 
   /* Add any extra overhead associated with LD[234] and ST[234] operations.  */
-  if (simd_issue)
-    switch (aarch64_ld234_st234_vectors (kind, stmt_info))
+  if (stmt_info && simd_issue)
+    switch (aarch64_ld234_st234_vectors (kind, stmt_info, node))
       {
       case 2:
 	ops->general_ops += simd_issue->ld2_st2_general_ops * count;
@@ -17147,9 +17811,10 @@ aarch64_vector_costs::count_ops (unsigned int count, vect_cost_for_stmt kind,
       }
 
   /* Add any overhead associated with gather loads and scatter stores.  */
-  if (sve_issue
+  if (stmt_info
+      && sve_issue
       && (kind == scalar_load || kind == scalar_store)
-      && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_GATHER_SCATTER)
+      && vect_mem_access_type (stmt_info, node) == VMAT_GATHER_SCATTER)
     {
       unsigned int pairs = CEIL (count, 2);
       ops->pred_ops += sve_issue->gather_scatter_pair_pred_ops * pairs;
@@ -17254,7 +17919,7 @@ aarch64_stp_sequence_cost (unsigned int count, vect_cost_for_stmt kind,
 
 unsigned
 aarch64_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
-				     stmt_vec_info stmt_info, slp_tree,
+				     stmt_vec_info stmt_info, slp_tree node,
 				     tree vectype, int misalign,
 				     vect_cost_model_location where)
 {
@@ -17267,7 +17932,7 @@ aarch64_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
 
   /* Do one-time initialization based on the vinfo.  */
   loop_vec_info loop_vinfo = dyn_cast<loop_vec_info> (m_vinfo);
-  if (!m_analyzed_vinfo && aarch64_use_new_vector_costs_p ())
+  if (!m_analyzed_vinfo && !m_costing_for_scalar)
     {
       if (loop_vinfo)
 	analyze_loop_vinfo (loop_vinfo);
@@ -17285,7 +17950,7 @@ aarch64_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
 
   /* Try to get a more accurate cost by looking at STMT_INFO instead
      of just looking at KIND.  */
-  if (stmt_info && aarch64_use_new_vector_costs_p ())
+  if (stmt_info)
     {
       /* If we scalarize a strided store, the vectorizer costs one
 	 vec_to_scalar for each element.  However, we can store the first
@@ -17298,18 +17963,21 @@ aarch64_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
 
       if (vectype && m_vec_flags)
 	stmt_cost = aarch64_detect_vector_stmt_subtype (m_vinfo, kind,
-							stmt_info, vectype,
-							where, stmt_cost);
+							stmt_info, node,
+							vectype, where,
+							stmt_cost);
 
       /* Check if we've seen an SVE gather/scatter operation and which size.  */
       if (kind == scalar_load
 	  && aarch64_sve_mode_p (TYPE_MODE (vectype))
-	  && STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_GATHER_SCATTER)
+	  && vect_mem_access_type (stmt_info, node) == VMAT_GATHER_SCATTER)
 	{
 	  const sve_vec_cost *sve_costs = aarch64_tune_params.vec_costs->sve;
 	  if (sve_costs)
 	    {
-	      if (GET_MODE_UNIT_BITSIZE (TYPE_MODE (vectype)) == 64)
+	      /* Test for VNx2 modes, which have 64-bit containers.  */
+	      if (known_eq (GET_MODE_NUNITS (TYPE_MODE (vectype)),
+			    aarch64_sve_vg))
 		m_sve_gather_scatter_init_cost
 		  += sve_costs->gather_load_x64_init_cost;
 	      else
@@ -17347,23 +18015,13 @@ aarch64_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
   else
     m_num_last_promote_demote = 0;
 
-  if (stmt_info && aarch64_use_new_vector_costs_p ())
+  if (stmt_info)
     {
       /* Account for any extra "embedded" costs that apply additively
 	 to the base cost calculated above.  */
-      stmt_cost = aarch64_adjust_stmt_cost (m_vinfo, kind, stmt_info,
+      stmt_cost = aarch64_adjust_stmt_cost (m_vinfo, kind, stmt_info, node,
 					    vectype, m_vec_flags, stmt_cost);
 
-      /* If we're recording a nonzero vector loop body cost for the
-	 innermost loop, also estimate the operations that would need
-	 to be issued by all relevant implementations of the loop.  */
-      if (loop_vinfo
-	  && (m_costing_for_scalar || where == vect_body)
-	  && (!LOOP_VINFO_LOOP (loop_vinfo)->inner || in_inner_loop_p)
-	  && stmt_cost != 0)
-	for (auto &ops : m_ops)
-	  count_ops (count, kind, stmt_info, &ops);
-
       /* If we're applying the SVE vs. Advanced SIMD unrolling heuristic,
 	 estimate the number of statements in the unrolled Advanced SIMD
 	 loop.  For simplicitly, we assume that one iteration of the
@@ -17388,6 +18046,16 @@ aarch64_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
 	}
     }
 
+  /* If we're recording a nonzero vector loop body cost for the
+     innermost loop, also estimate the operations that would need
+     to be issued by all relevant implementations of the loop.  */
+  if (loop_vinfo
+      && (m_costing_for_scalar || where == vect_body)
+      && (!LOOP_VINFO_LOOP (loop_vinfo)->inner || in_inner_loop_p)
+      && stmt_cost != 0)
+    for (auto &ops : m_ops)
+      count_ops (count, kind, stmt_info, node, &ops);
+
   /* If the statement stores to a decl that is known to be the argument
      to a vld1 in the same function, ignore the store for costing purposes.
      See the comment above m_stores_to_vector_load_decl for more details.  */
@@ -17565,6 +18233,19 @@ adjust_body_cost (loop_vec_info loop_vinfo,
     dump_printf_loc (MSG_NOTE, vect_location,
 		     "Original vector body cost = %d\n", body_cost);
 
+  /* If we know we have a single partial vector iteration, cap the VF
+     to the number of scalar iterations for costing purposes.  */
+  if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo))
+    {
+      auto niters = LOOP_VINFO_INT_NITERS (loop_vinfo);
+      if (niters < estimated_vf && dump_enabled_p ())
+	dump_printf_loc (MSG_NOTE, vect_location,
+			 "Scalar loop iterates at most %wd times.  Capping VF "
+			 " from %d to %wd\n", niters, estimated_vf, niters);
+
+      estimated_vf = MIN (estimated_vf, niters);
+    }
+
   fractional_cost scalar_cycles_per_iter
     = scalar_ops.min_cycles_per_iter () * estimated_vf;
 
@@ -17695,9 +18376,7 @@ aarch64_vector_costs::finish_cost (const vector_costs *uncast_scalar_costs)
 
   auto *scalar_costs
     = static_cast<const aarch64_vector_costs *> (uncast_scalar_costs);
-  if (loop_vinfo
-      && m_vec_flags
-      && aarch64_use_new_vector_costs_p ())
+  if (loop_vinfo && m_vec_flags)
     {
       m_costs[vect_body] = adjust_body_cost (loop_vinfo, scalar_costs,
 					     m_costs[vect_body]);
@@ -17844,140 +18523,6 @@ better_main_loop_than_p (const vector_costs *uncast_other) const
 
 static void initialize_aarch64_code_model (struct gcc_options *);
 
-/* Parse the TO_PARSE string and put the architecture struct that it
-   selects into RES and the architectural features into ISA_FLAGS.
-   Return an aarch_parse_opt_result describing the parse result.
-   If there is an error parsing, RES and ISA_FLAGS are left unchanged.
-   When the TO_PARSE string contains an invalid extension,
-   a copy of the string is created and stored to INVALID_EXTENSION.  */
-
-static enum aarch_parse_opt_result
-aarch64_parse_arch (const char *to_parse, const struct processor **res,
-		    aarch64_feature_flags *isa_flags,
-		    std::string *invalid_extension)
-{
-  const char *ext;
-  const struct processor *arch;
-  size_t len;
-
-  ext = strchr (to_parse, '+');
-
-  if (ext != NULL)
-    len = ext - to_parse;
-  else
-    len = strlen (to_parse);
-
-  if (len == 0)
-    return AARCH_PARSE_MISSING_ARG;
-
-
-  /* Loop through the list of supported ARCHes to find a match.  */
-  for (arch = all_architectures; arch->name != NULL; arch++)
-    {
-      if (strlen (arch->name) == len
-	  && strncmp (arch->name, to_parse, len) == 0)
-	{
-	  auto isa_temp = arch->flags;
-
-	  if (ext != NULL)
-	    {
-	      /* TO_PARSE string contains at least one extension.  */
-	      enum aarch_parse_opt_result ext_res
-		= aarch64_parse_extension (ext, &isa_temp, invalid_extension);
-
-	      if (ext_res != AARCH_PARSE_OK)
-		return ext_res;
-	    }
-	  /* Extension parsing was successful.  Confirm the result
-	     arch and ISA flags.  */
-	  *res = arch;
-	  *isa_flags = isa_temp;
-	  return AARCH_PARSE_OK;
-	}
-    }
-
-  /* ARCH name not found in list.  */
-  return AARCH_PARSE_INVALID_ARG;
-}
-
-/* Parse the TO_PARSE string and put the result tuning in RES and the
-   architecture flags in ISA_FLAGS.  Return an aarch_parse_opt_result
-   describing the parse result.  If there is an error parsing, RES and
-   ISA_FLAGS are left unchanged.
-   When the TO_PARSE string contains an invalid extension,
-   a copy of the string is created and stored to INVALID_EXTENSION.  */
-
-static enum aarch_parse_opt_result
-aarch64_parse_cpu (const char *to_parse, const struct processor **res,
-		   aarch64_feature_flags *isa_flags,
-		   std::string *invalid_extension)
-{
-  const char *ext;
-  const struct processor *cpu;
-  size_t len;
-
-  ext = strchr (to_parse, '+');
-
-  if (ext != NULL)
-    len = ext - to_parse;
-  else
-    len = strlen (to_parse);
-
-  if (len == 0)
-    return AARCH_PARSE_MISSING_ARG;
-
-
-  /* Loop through the list of supported CPUs to find a match.  */
-  for (cpu = all_cores; cpu->name != NULL; cpu++)
-    {
-      if (strlen (cpu->name) == len && strncmp (cpu->name, to_parse, len) == 0)
-	{
-	  auto isa_temp = cpu->flags;
-
-	  if (ext != NULL)
-	    {
-	      /* TO_PARSE string contains at least one extension.  */
-	      enum aarch_parse_opt_result ext_res
-		= aarch64_parse_extension (ext, &isa_temp, invalid_extension);
-
-	      if (ext_res != AARCH_PARSE_OK)
-		return ext_res;
-	    }
-	  /* Extension parsing was successfull.  Confirm the result
-	     cpu and ISA flags.  */
-	  *res = cpu;
-	  *isa_flags = isa_temp;
-	  return AARCH_PARSE_OK;
-	}
-    }
-
-  /* CPU name not found in list.  */
-  return AARCH_PARSE_INVALID_ARG;
-}
-
-/* Parse the TO_PARSE string and put the cpu it selects into RES.
-   Return an aarch_parse_opt_result describing the parse result.
-   If the parsing fails the RES does not change.  */
-
-static enum aarch_parse_opt_result
-aarch64_parse_tune (const char *to_parse, const struct processor **res)
-{
-  const struct processor *cpu;
-
-  /* Loop through the list of supported CPUs to find a match.  */
-  for (cpu = all_cores; cpu->name != NULL; cpu++)
-    {
-      if (strcmp (cpu->name, to_parse) == 0)
-	{
-	  *res = cpu;
-	  return AARCH_PARSE_OK;
-	}
-    }
-
-  /* CPU name not found in list.  */
-  return AARCH_PARSE_INVALID_ARG;
-}
-
 /* Parse TOKEN, which has length LENGTH to see if it is an option
    described in FLAG.  If it is, return the index bit for that fusion type.
    If not, error (printing OPTION_NAME) and return zero.  */
@@ -18186,9 +18731,9 @@ initialize_aarch64_tls_size (struct gcc_options *opts)
 /* Return the CPU corresponding to the enum CPU.  */
 
 static const struct processor *
-aarch64_get_tune_cpu (enum aarch64_processor cpu)
+aarch64_get_tune_cpu (enum aarch64_cpu cpu)
 {
-  gcc_assert (cpu != aarch64_none);
+  gcc_assert (cpu != aarch64_no_cpu);
 
   return &all_cores[cpu];
 }
@@ -18364,9 +18909,16 @@ aarch64_override_options_internal (struct gcc_options *opts)
       aarch64_stack_protector_guard_offset = offs;
     }
 
-  if ((flag_sanitize & SANITIZE_SHADOW_CALL_STACK)
-      && !fixed_regs[R18_REGNUM])
-    error ("%<-fsanitize=shadow-call-stack%> requires %<-ffixed-x18%>");
+  if ((flag_sanitize & SANITIZE_SHADOW_CALL_STACK))
+    {
+      if (!fixed_regs[R18_REGNUM])
+	error ("%<-fsanitize=shadow-call-stack%> requires %<-ffixed-x18%>");
+#ifdef TARGET_OS_USES_R18
+      else
+	sorry ("%<-fsanitize=shadow-call-stack%> conflicts with the use of"
+	       " register x18 by the target operating system");
+#endif
+    }
 
   aarch64_feature_flags isa_flags = aarch64_get_isa_flags (opts);
   if ((isa_flags & (AARCH64_FL_SM_ON | AARCH64_FL_ZA_ON))
@@ -18381,7 +18933,10 @@ aarch64_override_options_internal (struct gcc_options *opts)
 	      " option %<-march%>, or by using the %<target%>"
 	      " attribute or pragma", "sme");
       opts->x_target_flags &= ~MASK_GENERAL_REGS_ONLY;
-      auto new_flags = isa_flags | feature_deps::SME ().enable;
+      auto new_flags = (isa_flags
+			| feature_deps::SME ().enable
+			/* TODO: Remove once we support SME without SVE2.  */
+			| feature_deps::SVE2 ().enable);
       aarch64_set_asm_isa_flags (opts, new_flags);
     }
 
@@ -18508,118 +19063,27 @@ aarch64_override_options_internal (struct gcc_options *opts)
     SET_OPTION_IF_UNSET (opts, &global_options_set, param_fully_pipelined_fma,
 			 1);
 
-  aarch64_override_options_after_change_1 (opts);
-}
-
-/* Print a hint with a suggestion for a core or architecture name that
-   most closely resembles what the user passed in STR.  ARCH is true if
-   the user is asking for an architecture name.  ARCH is false if the user
-   is asking for a core name.  */
-
-static void
-aarch64_print_hint_for_core_or_arch (const char *str, bool arch)
-{
-  auto_vec<const char *> candidates;
-  const struct processor *entry = arch ? all_architectures : all_cores;
-  for (; entry->name != NULL; entry++)
-    candidates.safe_push (entry->name);
-
-#ifdef HAVE_LOCAL_CPU_DETECT
-  /* Add also "native" as possible value.  */
-  if (arch)
-    candidates.safe_push ("native");
-#endif
-
-  char *s;
-  const char *hint = candidates_list_and_hint (str, s, candidates);
-  if (hint)
-    inform (input_location, "valid arguments are: %s;"
-			     " did you mean %qs?", s, hint);
-  else
-    inform (input_location, "valid arguments are: %s", s);
-
-  XDELETEVEC (s);
-}
-
-/* Print a hint with a suggestion for a core name that most closely resembles
-   what the user passed in STR.  */
+  /* TODO: SME codegen without SVE2 is not supported, once this support is added
+     remove this 'sorry' and the implicit enablement of SVE2 in the checks for
+     streaming mode above in this function.  */
+  if (TARGET_SME && !TARGET_SVE2)
+    sorry ("no support for %qs without %qs", "sme", "sve2");
 
-inline static void
-aarch64_print_hint_for_core (const char *str)
-{
-  aarch64_print_hint_for_core_or_arch (str, false);
-}
-
-/* Print a hint with a suggestion for an architecture name that most closely
-   resembles what the user passed in STR.  */
-
-inline static void
-aarch64_print_hint_for_arch (const char *str)
-{
-  aarch64_print_hint_for_core_or_arch (str, true);
-}
-
-
-/* Print a hint with a suggestion for an extension name
-   that most closely resembles what the user passed in STR.  */
-
-void
-aarch64_print_hint_for_extensions (const std::string &str)
-{
-  auto_vec<const char *> candidates;
-  aarch64_get_all_extension_candidates (&candidates);
-  char *s;
-  const char *hint = candidates_list_and_hint (str.c_str (), s, candidates);
-  if (hint)
-    inform (input_location, "valid arguments are: %s;"
-			     " did you mean %qs?", s, hint);
-  else
-    inform (input_location, "valid arguments are: %s", s);
-
-  XDELETEVEC (s);
-}
-
-/* Validate a command-line -mcpu option.  Parse the cpu and extensions (if any)
-   specified in STR and throw errors if appropriate.  Put the results if
-   they are valid in RES and ISA_FLAGS.  Return whether the option is
-   valid.  */
-
-static bool
-aarch64_validate_mcpu (const char *str, const struct processor **res,
-		       aarch64_feature_flags *isa_flags)
-{
-  std::string invalid_extension;
-  enum aarch_parse_opt_result parse_res
-    = aarch64_parse_cpu (str, res, isa_flags, &invalid_extension);
-
-  if (parse_res == AARCH_PARSE_OK)
-    return true;
+  /* Set scalar costing to a high value such that we always pick
+     vectorization.  Increase scalar costing by 10000%.  */
+  if (opts->x_flag_aarch64_max_vectorization)
+    SET_OPTION_IF_UNSET (opts, &global_options_set,
+			 param_vect_scalar_cost_multiplier, 10000);
 
-  switch (parse_res)
-    {
-      case AARCH_PARSE_MISSING_ARG:
-	error ("missing cpu name in %<-mcpu=%s%>", str);
-	break;
-      case AARCH_PARSE_INVALID_ARG:
-	error ("unknown value %qs for %<-mcpu%>", str);
-	aarch64_print_hint_for_core (str);
-	/* A common user error is confusing -march and -mcpu.
-	   If the -mcpu string matches a known architecture then suggest
-	   -march=.  */
-	parse_res = aarch64_parse_arch (str, res, isa_flags, &invalid_extension);
-	if (parse_res == AARCH_PARSE_OK)
-	  inform (input_location, "did you mean %<-march=%s%>?", str);
-	break;
-      case AARCH_PARSE_INVALID_FEATURE:
-	error ("invalid feature modifier %qs in %<-mcpu=%s%>",
-	       invalid_extension.c_str (), str);
-	aarch64_print_hint_for_extensions (invalid_extension);
-	break;
-      default:
-	gcc_unreachable ();
-    }
+  /* Synchronize the -mautovec-preference and aarch64_autovec_preference using
+     whichever one is not default.  If both are set then prefer the param flag
+     over the parameters.  */
+  if (opts->x_autovec_preference != AARCH64_AUTOVEC_DEFAULT)
+    SET_OPTION_IF_UNSET (opts, &global_options_set,
+			 aarch64_autovec_preference,
+			 opts->x_autovec_preference);
 
-  return false;
+  aarch64_override_options_after_change_1 (opts);
 }
 
 /* Straight line speculation indicators.  */
@@ -18686,7 +19150,7 @@ aarch64_validate_sls_mitigation (const char *const_str)
 	}
       else
 	{
-	  error ("invalid argument %<%s%> for %<-mharden-sls=%>", str);
+	  error ("invalid argument %qs for %<-mharden-sls=%>", str);
 	  break;
 	}
       str = strtok_r (NULL, ",", &token_save);
@@ -18695,77 +19159,6 @@ aarch64_validate_sls_mitigation (const char *const_str)
   free (str_root);
 }
 
-/* Validate a command-line -march option.  Parse the arch and extensions
-   (if any) specified in STR and throw errors if appropriate.  Put the
-   results, if they are valid, in RES and ISA_FLAGS.  Return whether the
-   option is valid.  */
-
-static bool
-aarch64_validate_march (const char *str, const struct processor **res,
-			aarch64_feature_flags *isa_flags)
-{
-  std::string invalid_extension;
-  enum aarch_parse_opt_result parse_res
-    = aarch64_parse_arch (str, res, isa_flags, &invalid_extension);
-
-  if (parse_res == AARCH_PARSE_OK)
-    return true;
-
-  switch (parse_res)
-    {
-      case AARCH_PARSE_MISSING_ARG:
-	error ("missing arch name in %<-march=%s%>", str);
-	break;
-      case AARCH_PARSE_INVALID_ARG:
-	error ("unknown value %qs for %<-march%>", str);
-	aarch64_print_hint_for_arch (str);
-	/* A common user error is confusing -march and -mcpu.
-	   If the -march string matches a known CPU suggest -mcpu.  */
-	parse_res = aarch64_parse_cpu (str, res, isa_flags, &invalid_extension);
-	if (parse_res == AARCH_PARSE_OK)
-	  inform (input_location, "did you mean %<-mcpu=%s%>?", str);
-	break;
-      case AARCH_PARSE_INVALID_FEATURE:
-	error ("invalid feature modifier %qs in %<-march=%s%>",
-	       invalid_extension.c_str (), str);
-	aarch64_print_hint_for_extensions (invalid_extension);
-	break;
-      default:
-	gcc_unreachable ();
-    }
-
-  return false;
-}
-
-/* Validate a command-line -mtune option.  Parse the cpu
-   specified in STR and throw errors if appropriate.  Put the
-   result, if it is valid, in RES.  Return whether the option is
-   valid.  */
-
-static bool
-aarch64_validate_mtune (const char *str, const struct processor **res)
-{
-  enum aarch_parse_opt_result parse_res
-    = aarch64_parse_tune (str, res);
-
-  if (parse_res == AARCH_PARSE_OK)
-    return true;
-
-  switch (parse_res)
-    {
-      case AARCH_PARSE_MISSING_ARG:
-	error ("missing cpu name in %<-mtune=%s%>", str);
-	break;
-      case AARCH_PARSE_INVALID_ARG:
-	error ("unknown value %qs for %<-mtune%>", str);
-	aarch64_print_hint_for_core (str);
-	break;
-      default:
-	gcc_unreachable ();
-    }
-  return false;
-}
-
 /* Return the VG value associated with -msve-vector-bits= value VALUE.  */
 
 static poly_uint16
@@ -18801,6 +19194,7 @@ aarch64_handle_no_branch_protection (void)
 {
   aarch_ra_sign_scope = AARCH_FUNCTION_NONE;
   aarch_enable_bti = 0;
+  aarch64_enable_gcs = 0;
 }
 
 static void
@@ -18809,6 +19203,7 @@ aarch64_handle_standard_branch_protection (void)
   aarch_ra_sign_scope = AARCH_FUNCTION_NON_LEAF;
   aarch64_ra_sign_key = AARCH64_KEY_A;
   aarch_enable_bti = 1;
+  aarch64_enable_gcs = 1;
 }
 
 static void
@@ -18835,6 +19230,11 @@ aarch64_handle_bti_protection (void)
 {
   aarch_enable_bti = 1;
 }
+static void
+aarch64_handle_gcs_protection (void)
+{
+  aarch64_enable_gcs = 1;
+}
 
 static const struct aarch_branch_protect_type aarch64_pac_ret_subtypes[] = {
   { "leaf", false, aarch64_handle_pac_ret_leaf, NULL, 0 },
@@ -18849,6 +19249,7 @@ static const struct aarch_branch_protect_type aarch64_branch_protect_types[] =
   { "pac-ret", false, aarch64_handle_pac_ret_protection,
     aarch64_pac_ret_subtypes, ARRAY_SIZE (aarch64_pac_ret_subtypes) },
   { "bti", false, aarch64_handle_bti_protection, NULL, 0 },
+  { "gcs", false, aarch64_handle_gcs_protection, NULL, 0 },
   { NULL, false, NULL, NULL, 0 }
 };
 
@@ -18866,9 +19267,9 @@ aarch64_override_options (void)
   aarch64_feature_flags arch_isa = 0;
   aarch64_set_asm_isa_flags (0);
 
-  const struct processor *cpu = NULL;
-  const struct processor *arch = NULL;
-  const struct processor *tune = NULL;
+  aarch64_cpu cpu = aarch64_no_cpu;
+  aarch64_arch arch = aarch64_no_arch;
+  aarch64_cpu tune = aarch64_no_cpu;
 
   if (aarch64_harden_sls_string)
     aarch64_validate_sls_mitigation (aarch64_harden_sls_string);
@@ -18894,50 +19295,49 @@ aarch64_override_options (void)
   SUBTARGET_OVERRIDE_OPTIONS;
 #endif
 
-  if (cpu && arch)
+  if (cpu != aarch64_no_cpu && arch != aarch64_no_arch)
     {
       /* If both -mcpu and -march are specified, warn if they are not
 	 feature compatible.  feature compatible means that the inclusion of the
 	 cpu features would end up disabling an achitecture feature.  In
 	 otherwords the cpu features need to be a strict superset of the arch
 	 features and if so prefer the -march ISA flags.  */
-      auto full_arch_flags = arch->flags | arch_isa;
-      auto full_cpu_flags = cpu->flags | cpu_isa;
-      if (~full_cpu_flags & full_arch_flags)
+      if (~cpu_isa & arch_isa)
 	{
 	  std::string ext_diff
-	    = aarch64_get_extension_string_for_isa_flags (full_arch_flags,
-							  full_cpu_flags);
+	    = aarch64_get_extension_string_for_isa_flags (arch_isa, cpu_isa);
 	  warning (0, "switch %<-mcpu=%s%> conflicts with %<-march=%s%> switch "
-		      "and resulted in options %<%s%> being added",
+		      "and resulted in options %qs being added",
 		       aarch64_cpu_string,
 		       aarch64_arch_string,
 		       ext_diff.c_str ());
 	}
 
-      selected_arch = arch->arch;
+      selected_arch = arch;
       aarch64_set_asm_isa_flags (arch_isa | AARCH64_FL_DEFAULT_ISA_MODE);
     }
-  else if (cpu)
+  else if (cpu != aarch64_no_cpu)
     {
-      selected_arch = cpu->arch;
+      selected_arch = aarch64_get_tune_cpu (cpu)->arch;
       aarch64_set_asm_isa_flags (cpu_isa | AARCH64_FL_DEFAULT_ISA_MODE);
     }
-  else if (arch)
+  else if (arch != aarch64_no_arch)
     {
-      cpu = &all_cores[arch->ident];
-      selected_arch = arch->arch;
+      cpu = aarch64_get_arch (arch)->ident;
+      selected_arch = arch;
       aarch64_set_asm_isa_flags (arch_isa | AARCH64_FL_DEFAULT_ISA_MODE);
     }
   else
     {
       /* No -mcpu or -march specified, so use the default CPU.  */
-      cpu = &all_cores[TARGET_CPU_DEFAULT];
-      selected_arch = cpu->arch;
-      aarch64_set_asm_isa_flags (cpu->flags | AARCH64_FL_DEFAULT_ISA_MODE);
+      cpu = TARGET_CPU_DEFAULT;
+      const processor *cpu_info = aarch64_get_tune_cpu (cpu);
+      selected_arch = cpu_info->arch;
+      aarch64_set_asm_isa_flags (cpu_info->flags
+				 | AARCH64_FL_DEFAULT_ISA_MODE);
     }
 
-  selected_tune = tune ? tune->ident : cpu->ident;
+  selected_tune = (tune != aarch64_no_cpu) ? tune : cpu;
 
   if (aarch_enable_bti == 2)
     {
@@ -18948,6 +19348,15 @@ aarch64_override_options (void)
 #endif
     }
 
+  if (aarch64_enable_gcs == 2)
+    {
+#ifdef TARGET_ENABLE_GCS
+      aarch64_enable_gcs = 1;
+#else
+      aarch64_enable_gcs = 0;
+#endif
+    }
+
   /* Return address signing is currently not supported for ILP32 targets.  For
      LP64 targets use the configured option in the absence of a command-line
      option for -mbranch-protection.  */
@@ -18966,6 +19375,8 @@ aarch64_override_options (void)
   if (TARGET_ILP32)
     error ("assembler does not support %<-mabi=ilp32%>");
 #endif
+  if (TARGET_ILP32)
+    warning (OPT_Wdeprecated, "%<-mabi=ilp32%> is deprecated");
 
   /* Convert -msve-vector-bits to a VG count.  */
   aarch64_sve_vg = aarch64_convert_sve_vector_bits (aarch64_sve_vector_bits);
@@ -19000,9 +19411,9 @@ static char *
 aarch64_offload_options (void)
 {
   if (TARGET_ILP32)
-    return xstrdup ("-foffload-abi=ilp32");
+    return xstrdup ("-foffload-abi=ilp32 -foffload-abi-host-opts=-mabi=ilp32");
   else
-    return xstrdup ("-foffload-abi=lp64");
+    return xstrdup ("-foffload-abi=lp64 -foffload-abi-host-opts=-mabi=lp64");
 }
 
 static struct machine_function *
@@ -19232,7 +19643,7 @@ struct aarch64_attribute_info
 static bool
 aarch64_handle_attr_arch (const char *str)
 {
-  const struct processor *tmp_arch = NULL;
+  aarch64_arch tmp_arch = aarch64_no_arch;
   std::string invalid_extension;
   aarch64_feature_flags tmp_flags;
   enum aarch_parse_opt_result parse_res
@@ -19240,8 +19651,8 @@ aarch64_handle_attr_arch (const char *str)
 
   if (parse_res == AARCH_PARSE_OK)
     {
-      gcc_assert (tmp_arch);
-      selected_arch = tmp_arch->arch;
+      gcc_assert (tmp_arch != aarch64_no_arch);
+      selected_arch = tmp_arch;
       aarch64_set_asm_isa_flags (tmp_flags | (aarch64_asm_isa_flags
 					      & AARCH64_FL_ISA_MODES));
       return true;
@@ -19259,7 +19670,7 @@ aarch64_handle_attr_arch (const char *str)
       case AARCH_PARSE_INVALID_FEATURE:
 	error ("invalid feature modifier %s of value %qs in "
 	       "%<target()%> pragma or attribute", invalid_extension.c_str (), str);
-	aarch64_print_hint_for_extensions (invalid_extension);
+	aarch64_print_hint_for_extensions (invalid_extension.c_str ());
 	break;
       default:
 	gcc_unreachable ();
@@ -19273,7 +19684,7 @@ aarch64_handle_attr_arch (const char *str)
 static bool
 aarch64_handle_attr_cpu (const char *str)
 {
-  const struct processor *tmp_cpu = NULL;
+  aarch64_cpu tmp_cpu = aarch64_no_cpu;
   std::string invalid_extension;
   aarch64_feature_flags tmp_flags;
   enum aarch_parse_opt_result parse_res
@@ -19281,9 +19692,9 @@ aarch64_handle_attr_cpu (const char *str)
 
   if (parse_res == AARCH_PARSE_OK)
     {
-      gcc_assert (tmp_cpu);
-      selected_tune = tmp_cpu->ident;
-      selected_arch = tmp_cpu->arch;
+      gcc_assert (tmp_cpu != aarch64_no_cpu);
+      selected_tune = tmp_cpu;
+      selected_arch = aarch64_get_tune_cpu (tmp_cpu)->arch;
       aarch64_set_asm_isa_flags (tmp_flags | (aarch64_asm_isa_flags
 					      & AARCH64_FL_ISA_MODES));
       return true;
@@ -19301,7 +19712,7 @@ aarch64_handle_attr_cpu (const char *str)
       case AARCH_PARSE_INVALID_FEATURE:
 	error ("invalid feature modifier %qs of value %qs in "
 	       "%<target()%> pragma or attribute", invalid_extension.c_str (), str);
-	aarch64_print_hint_for_extensions (invalid_extension);
+	aarch64_print_hint_for_extensions (invalid_extension.c_str ());
 	break;
       default:
 	gcc_unreachable ();
@@ -19324,14 +19735,14 @@ aarch64_handle_attr_branch_protection (const char* str)
 static bool
 aarch64_handle_attr_tune (const char *str)
 {
-  const struct processor *tmp_tune = NULL;
+  aarch64_cpu tmp_tune = aarch64_no_cpu;
   enum aarch_parse_opt_result parse_res
     = aarch64_parse_tune (str, &tmp_tune);
 
   if (parse_res == AARCH_PARSE_OK)
     {
-      gcc_assert (tmp_tune);
-      selected_tune = tmp_tune->ident;
+      gcc_assert (tmp_tune != aarch64_no_cpu);
+      selected_tune = tmp_tune;
       return true;
     }
 
@@ -19422,6 +19833,8 @@ static const struct aarch64_attribute_info aarch64_attributes[] =
      OPT_msign_return_address_ },
   { "outline-atomics", aarch64_attr_bool, true, NULL,
      OPT_moutline_atomics},
+  { "max-vectorization", aarch64_attr_bool, false, NULL,
+     OPT_mmax_vectorization},
   { NULL, aarch64_attr_custom, false, NULL, OPT____ }
 };
 
@@ -19639,7 +20052,7 @@ aarch64_process_target_attr (tree args)
 	    = aarch64_parse_extension (with_plus.c_str (), &isa_temp, nullptr);
 
 	  if (ext_res == AARCH_PARSE_OK)
-	    error ("arch extension %<%s%> should be prefixed by %<+%>",
+	    error ("arch extension %qs should be prefixed by %<+%>",
 		   token);
 	  else
 	    error ("pragma or attribute %<target(\"%s\")%> is not valid", token);
@@ -19865,6 +20278,15 @@ aarch64_parse_fmv_features (const char *str, aarch64_feature_flags *isa_flags,
 static bool
 aarch64_process_target_version_attr (tree args)
 {
+  static bool issued_warning = false;
+  if (!issued_warning)
+    {
+      warning (OPT_Wexperimental_fmv_target,
+	       "Function Multi Versioning support is experimental, and the "
+	       "behavior is likely to change");
+      issued_warning = true;
+    }
+
   if (TREE_CODE (args) == TREE_LIST)
     {
       if (TREE_CHAIN (args))
@@ -20353,6 +20775,10 @@ dispatch_function_versions (tree dispatch_decl,
   tree init_fn_id = get_identifier ("__init_cpu_features_resolver");
   tree init_fn_decl = build_decl (UNKNOWN_LOCATION, FUNCTION_DECL,
 				  init_fn_id, init_fn_type);
+  DECL_EXTERNAL (init_fn_decl) = 1;
+  TREE_PUBLIC (init_fn_decl) = 1;
+  DECL_VISIBILITY (init_fn_decl) = VISIBILITY_HIDDEN;
+  DECL_VISIBILITY_SPECIFIED (init_fn_decl) = 1;
   tree arg1 = DECL_ARGUMENTS (dispatch_decl);
   tree arg2 = TREE_CHAIN (arg1);
   ifunc_cpu_init_stmt = gimple_build_call (init_fn_decl, 2, arg1, arg2);
@@ -20372,6 +20798,9 @@ dispatch_function_versions (tree dispatch_decl,
 				get_identifier ("__aarch64_cpu_features"),
 				global_type);
   DECL_EXTERNAL (global_var) = 1;
+  TREE_PUBLIC (global_var) = 1;
+  DECL_VISIBILITY (global_var) = VISIBILITY_HIDDEN;
+  DECL_VISIBILITY_SPECIFIED (global_var) = 1;
   tree mask_var = create_tmp_var (long_long_unsigned_type_node);
 
   tree component_expr = build3 (COMPONENT_REF, long_long_unsigned_type_node,
@@ -20532,7 +20961,6 @@ aarch64_get_function_versions_dispatcher (void *decl)
   struct cgraph_node *node = NULL;
   struct cgraph_node *default_node = NULL;
   struct cgraph_function_version_info *node_v = NULL;
-  struct cgraph_function_version_info *first_v = NULL;
 
   tree dispatch_decl = NULL;
 
@@ -20549,37 +20977,16 @@ aarch64_get_function_versions_dispatcher (void *decl)
   if (node_v->dispatcher_resolver != NULL)
     return node_v->dispatcher_resolver;
 
-  /* Find the default version and make it the first node.  */
-  first_v = node_v;
-  /* Go to the beginning of the chain.  */
-  while (first_v->prev != NULL)
-    first_v = first_v->prev;
-  default_version_info = first_v;
-  while (default_version_info != NULL)
-    {
-      if (get_feature_mask_for_version
-	    (default_version_info->this_node->decl) == 0ULL)
-	break;
-      default_version_info = default_version_info->next;
-    }
+  /* The default node is always the beginning of the chain.  */
+  default_version_info = node_v;
+  while (default_version_info->prev)
+    default_version_info = default_version_info->prev;
+  default_node = default_version_info->this_node;
 
   /* If there is no default node, just return NULL.  */
-  if (default_version_info == NULL)
+  if (!is_function_default_version (default_node->decl))
     return NULL;
 
-  /* Make default info the first node.  */
-  if (first_v != default_version_info)
-    {
-      default_version_info->prev->next = default_version_info->next;
-      if (default_version_info->next)
-	default_version_info->next->prev = default_version_info->prev;
-      first_v->prev = default_version_info;
-      default_version_info->next = first_v;
-      default_version_info->prev = NULL;
-    }
-
-  default_node = default_version_info->this_node;
-
   if (targetm.has_ifunc_p ())
     {
       struct cgraph_function_version_info *it_v = NULL;
@@ -20988,7 +21395,8 @@ aarch64_classify_symbol (rtx x, HOST_WIDE_INT offset)
 	case AARCH64_CMODEL_TINY:
 	  /* With -fPIC non-local symbols use the GOT.  For orthogonality
 	     always use the GOT for extern weak symbols.  */
-	  if ((flag_pic || SYMBOL_REF_WEAK (x))
+	  if (!TARGET_PECOFF
+	      && (flag_pic || SYMBOL_REF_WEAK (x))
 	      && !aarch64_symbol_binds_local_p (x))
 	    return SYMBOL_TINY_GOT;
 
@@ -21010,7 +21418,8 @@ aarch64_classify_symbol (rtx x, HOST_WIDE_INT offset)
 	case AARCH64_CMODEL_SMALL_SPIC:
 	case AARCH64_CMODEL_SMALL_PIC:
 	case AARCH64_CMODEL_SMALL:
-	  if ((flag_pic || SYMBOL_REF_WEAK (x))
+	  if (!TARGET_PECOFF
+	      && (flag_pic || SYMBOL_REF_WEAK (x))
 	      && !aarch64_symbol_binds_local_p (x))
 	    return aarch64_cmodel == AARCH64_CMODEL_SMALL_SPIC
 		    ? SYMBOL_SMALL_GOT_28K : SYMBOL_SMALL_GOT_4G;
@@ -21075,7 +21484,7 @@ aarch64_legitimate_constant_p (machine_mode mode, rtx x)
      ??? It would be possible (but complex) to handle rematerialization
      of other constants via secondary reloads.  */
   if (!GET_MODE_SIZE (mode).is_constant ())
-    return aarch64_simd_valid_immediate (x, NULL);
+    return aarch64_simd_valid_mov_imm (x);
 
   /* Otherwise, accept any CONST_VECTOR that, if all else fails, can at
      least be forced to memory and loaded from there.  */
@@ -21167,6 +21576,7 @@ aarch64_build_builtin_va_list (void)
 			     get_identifier ("__va_list"),
 			     va_list_type);
   DECL_ARTIFICIAL (va_list_name) = 1;
+  TREE_PUBLIC (va_list_name) = 1;
   TYPE_NAME (va_list_type) = va_list_name;
   TYPE_STUB_DECL (va_list_type) = va_list_name;
 
@@ -21720,6 +22130,14 @@ aarch64_conditional_register_usage (void)
       fixed_regs[SPECULATION_SCRATCH_REGNUM] = 1;
       call_used_regs[SPECULATION_SCRATCH_REGNUM] = 1;
     }
+
+#ifdef TARGET_OS_USES_R18
+  /* R18 is the STATIC_CHAIN_REGNUM on most aarch64 ports, but VxWorks
+     uses it as the TCB, so aarch64-vxworks.h overrides
+     STATIC_CHAIN_REGNUM, and here we mark R18 as fixed.  */
+  fixed_regs[R18_REGNUM] = 1;
+  call_used_regs[R18_REGNUM] = 1;
+#endif
 }
 
 /* Implement TARGET_MEMBER_TYPE_FORCES_BLK.  */
@@ -22119,7 +22537,8 @@ aarch64_vfp_is_call_or_return_candidate (machine_mode mode,
 
   if ((!composite_p
        && (GET_MODE_CLASS (mode) == MODE_FLOAT
-	   || GET_MODE_CLASS (mode) == MODE_DECIMAL_FLOAT))
+	   || GET_MODE_CLASS (mode) == MODE_DECIMAL_FLOAT
+	   || (type && TYPE_MAIN_VARIANT (type) == aarch64_mfp8_type_node)))
       || aarch64_short_vector_p (type, mode))
     {
       *count = 1;
@@ -22239,10 +22658,34 @@ aarch64_full_sve_mode (scalar_mode mode)
     }
 }
 
+/* Return the 64-bit Advanced SIMD vector mode for element mode MODE,
+   if it exists.  */
+opt_machine_mode
+aarch64_v64_mode (scalar_mode mode)
+{
+  switch (mode)
+    {
+    case E_SFmode:
+      return V2SFmode;
+    case E_HFmode:
+      return V4HFmode;
+    case E_BFmode:
+      return V4BFmode;
+    case E_SImode:
+      return V2SImode;
+    case E_HImode:
+      return V4HImode;
+    case E_QImode:
+      return V8QImode;
+    default:
+      return {};
+    }
+}
+
 /* Return the 128-bit Advanced SIMD vector mode for element mode MODE,
    if it exists.  */
 opt_machine_mode
-aarch64_vq_mode (scalar_mode mode)
+aarch64_v128_mode (scalar_mode mode)
 {
   switch (mode)
     {
@@ -22281,25 +22724,9 @@ aarch64_simd_container_mode (scalar_mode mode, poly_int64 width)
   if (TARGET_BASE_SIMD)
     {
       if (known_eq (width, 128))
-	return aarch64_vq_mode (mode).else_mode (word_mode);
+	return aarch64_v128_mode (mode).else_mode (word_mode);
       else
-	switch (mode)
-	  {
-	  case E_SFmode:
-	    return V2SFmode;
-	  case E_HFmode:
-	    return V4HFmode;
-	  case E_BFmode:
-	    return V4BFmode;
-	  case E_SImode:
-	    return V2SImode;
-	  case E_HImode:
-	    return V4HImode;
-	  case E_QImode:
-	    return V8QImode;
-	  default:
-	    break;
-	  }
+	return aarch64_v64_mode (mode).else_mode (word_mode);
     }
   return word_mode;
 }
@@ -22359,7 +22786,7 @@ aarch64_preferred_simd_mode (scalar_mode mode)
   if (TARGET_SVE && aarch64_cmp_autovec_modes (VNx16QImode, V16QImode))
     return aarch64_full_sve_mode (mode).else_mode (word_mode);
   if (TARGET_SIMD)
-    return aarch64_vq_mode (mode).else_mode (word_mode);
+    return aarch64_v128_mode (mode).else_mode (word_mode);
   return word_mode;
 }
 
@@ -22467,6 +22894,10 @@ aarch64_mangle_type (const_tree type)
 	return "Dh";
     }
 
+  /* Modal 8 bit floating point types.  */
+  if (TYPE_MAIN_VARIANT (type) == aarch64_mfp8_type_node)
+    return "u6__mfp8";
+
   /* Mangle AArch64-specific internal types.  TYPE_NAME is non-NULL_TREE for
      builtin types.  */
   if (TYPE_NAME (type) != NULL)
@@ -22481,6 +22912,29 @@ aarch64_mangle_type (const_tree type)
   return NULL;
 }
 
+/* Implement TARGET_INVALID_CONVERSION.  */
+
+static const char *
+aarch64_invalid_conversion (const_tree fromtype, const_tree totype)
+{
+  /* Do not allow conversions to/from FP8. But do allow conversions between
+     volatile and const variants of __mfp8. */
+  bool fromtype_is_fp8
+      = (TYPE_MAIN_VARIANT (fromtype) == aarch64_mfp8_type_node);
+  bool totype_is_fp8 = (TYPE_MAIN_VARIANT (totype) == aarch64_mfp8_type_node);
+
+  if (fromtype_is_fp8 && totype_is_fp8)
+    return NULL;
+
+  if (fromtype_is_fp8)
+    return N_ ("invalid conversion from type %<mfloat8_t%>");
+  if (totype_is_fp8)
+    return N_ ("invalid conversion to type %<mfloat8_t%>");
+
+  /* Conversion allowed.  */
+  return NULL;
+}
+
 /* Implement TARGET_VERIFY_TYPE_CONTEXT.  */
 
 static bool
@@ -22620,6 +23074,58 @@ aarch64_sve_index_immediate_p (rtx base_or_step)
 	  && IN_RANGE (INTVAL (base_or_step), -16, 15));
 }
 
+/* Return true if SERIES is a constant vector that can be loaded using
+   an immediate SVE INDEX, considering both SVE and Advanced SIMD modes.
+   When returning true, store the base in *BASE_OUT and the step
+   in *STEP_OUT.  */
+
+static bool
+aarch64_sve_index_series_p (rtx series, rtx *base_out, rtx *step_out)
+{
+  rtx base, step;
+  if (!const_vec_series_p (series, &base, &step)
+      || !CONST_INT_P (base)
+      || !CONST_INT_P (step))
+    return false;
+
+  auto mode = GET_MODE (series);
+  auto elt_mode = as_a<scalar_int_mode> (GET_MODE_INNER (mode));
+  unsigned int vec_flags = aarch64_classify_vector_mode (mode);
+  if (BYTES_BIG_ENDIAN && (vec_flags & VEC_ADVSIMD))
+    {
+      /* On big-endian targets, architectural lane 0 holds the last element
+	 for Advanced SIMD and the first element for SVE; see the comment at
+	 the head of aarch64-sve.md for details.  This means that, from an SVE
+	 point of view, an Advanced SIMD series goes from the last element to
+	 the first.  */
+      auto i = GET_MODE_NUNITS (mode).to_constant () - 1;
+      base = gen_int_mode (UINTVAL (base) + i * UINTVAL (step), elt_mode);
+      step = gen_int_mode (-UINTVAL (step), elt_mode);
+    }
+
+  if (!aarch64_sve_index_immediate_p (base)
+      || !aarch64_sve_index_immediate_p (step))
+    return false;
+
+  /* If the mode spans multiple registers, check that each subseries is
+     in range.  */
+  unsigned int nvectors = aarch64_ldn_stn_vectors (mode);
+  if (nvectors != 1)
+    {
+      unsigned int nunits;
+      if (!GET_MODE_NUNITS (mode).is_constant (&nunits))
+	return false;
+      nunits /= nvectors;
+      for (unsigned int i = 1; i < nvectors; ++i)
+	if (!IN_RANGE (INTVAL (base) + i * nunits * INTVAL (step), -16, 15))
+	  return false;
+    }
+
+  *base_out = base;
+  *step_out = step;
+  return true;
+}
+
 /* Return true if X is a valid immediate for the SVE ADD and SUB instructions
    when applied to mode MODE.  Negate X first if NEGATE_P is true.  */
 
@@ -22709,7 +23215,8 @@ aarch64_sve_float_arith_immediate_p (rtx x, bool negate_p)
   rtx elt;
   REAL_VALUE_TYPE r;
 
-  if (!const_vec_duplicate_p (x, &elt)
+  if (GET_MODE_INNER (GET_MODE (x)) == BFmode
+      || !const_vec_duplicate_p (x, &elt)
       || !CONST_DOUBLE_P (elt))
     return false;
 
@@ -22733,7 +23240,8 @@ aarch64_sve_float_mul_immediate_p (rtx x)
 {
   rtx elt;
 
-  return (const_vec_duplicate_p (x, &elt)
+  return (GET_MODE_INNER (GET_MODE (x)) != BFmode
+	  && const_vec_duplicate_p (x, &elt)
 	  && CONST_DOUBLE_P (elt)
 	  && (real_equal (CONST_DOUBLE_REAL_VALUE (elt), &dconsthalf)
 	      || real_equal (CONST_DOUBLE_REAL_VALUE (elt), &dconst2)));
@@ -22788,34 +23296,32 @@ aarch64_advsimd_valid_immediate_hs (unsigned int val32,
   return false;
 }
 
-/* Return true if replicating VAL64 is a valid immediate for the
+/* Return true if replicating VAL64 with mode MODE is a valid immediate for the
    Advanced SIMD operation described by WHICH.  If INFO is nonnull,
    use it to describe valid immediates.  */
 static bool
 aarch64_advsimd_valid_immediate (unsigned HOST_WIDE_INT val64,
+				 scalar_int_mode mode,
 				 simd_immediate_info *info,
 				 enum simd_immediate_check which)
 {
   unsigned int val32 = val64 & 0xffffffff;
-  unsigned int val16 = val64 & 0xffff;
   unsigned int val8 = val64 & 0xff;
 
-  if (val32 == (val64 >> 32))
+  if (mode != DImode)
     {
-      if ((which & AARCH64_CHECK_ORR) != 0
+      if ((which == AARCH64_CHECK_MOV || which == AARCH64_CHECK_ORR)
 	  && aarch64_advsimd_valid_immediate_hs (val32, info, which,
 						 simd_immediate_info::MOV))
 	return true;
 
-      if ((which & AARCH64_CHECK_BIC) != 0
+      if ((which == AARCH64_CHECK_MOV || which == AARCH64_CHECK_AND)
 	  && aarch64_advsimd_valid_immediate_hs (~val32, info, which,
 						 simd_immediate_info::MVN))
 	return true;
 
       /* Try using a replicated byte.  */
-      if (which == AARCH64_CHECK_MOV
-	  && val16 == (val32 >> 16)
-	  && val8 == (val16 >> 8))
+      if (which == AARCH64_CHECK_MOV && mode == QImode)
 	{
 	  if (info)
 	    *info = simd_immediate_info (QImode, val8);
@@ -22843,47 +23349,41 @@ aarch64_advsimd_valid_immediate (unsigned HOST_WIDE_INT val64,
   return false;
 }
 
-/* Return true if replicating VAL64 gives a valid immediate for an SVE MOV
-   instruction.  If INFO is nonnull, use it to describe valid immediates.  */
+/* Return true if replicating IVAL with MODE gives a valid immediate for an SVE
+   MOV instruction.  If INFO is nonnull, use it to describe valid
+   immediates.  */
 
 static bool
-aarch64_sve_valid_immediate (unsigned HOST_WIDE_INT val64,
-			     simd_immediate_info *info)
+aarch64_sve_valid_immediate (unsigned HOST_WIDE_INT ival, scalar_int_mode mode,
+			     simd_immediate_info *info,
+			     enum simd_immediate_check which)
 {
-  scalar_int_mode mode = DImode;
-  unsigned int val32 = val64 & 0xffffffff;
-  if (val32 == (val64 >> 32))
+  HOST_WIDE_INT val = trunc_int_for_mode (ival, mode);
+
+  if (which == AARCH64_CHECK_MOV)
     {
-      mode = SImode;
-      unsigned int val16 = val32 & 0xffff;
-      if (val16 == (val32 >> 16))
+      if (IN_RANGE (val, -0x80, 0x7f))
 	{
-	  mode = HImode;
-	  unsigned int val8 = val16 & 0xff;
-	  if (val8 == (val16 >> 8))
-	    mode = QImode;
+	  /* DUP with no shift.  */
+	  if (info)
+	    *info = simd_immediate_info (mode, val,
+					 simd_immediate_info::SVE_MOV);
+	  return true;
+	}
+      if ((val & 0xff) == 0 && IN_RANGE (val, -0x8000, 0x7f00))
+	{
+	  /* DUP with LSL #8.  */
+	  if (info)
+	    *info = simd_immediate_info (mode, val,
+					 simd_immediate_info::SVE_MOV);
+	  return true;
 	}
     }
-  HOST_WIDE_INT val = trunc_int_for_mode (val64, mode);
-  if (IN_RANGE (val, -0x80, 0x7f))
-    {
-      /* DUP with no shift.  */
-      if (info)
-	*info = simd_immediate_info (mode, val);
-      return true;
-    }
-  if ((val & 0xff) == 0 && IN_RANGE (val, -0x8000, 0x7f00))
-    {
-      /* DUP with LSL #8.  */
-      if (info)
-	*info = simd_immediate_info (mode, val);
-      return true;
-    }
-  if (aarch64_bitmask_imm (val64, mode))
+  if (aarch64_bitmask_imm (ival, mode))
     {
       /* DUPM.  */
       if (info)
-	*info = simd_immediate_info (mode, val);
+	*info = simd_immediate_info (mode, val, simd_immediate_info::SVE_MOV);
       return true;
     }
   return false;
@@ -22960,12 +23460,97 @@ aarch64_sve_pred_valid_immediate (rtx x, simd_immediate_info *info)
   return false;
 }
 
+/* We can only represent floating point constants which will fit in
+   "quarter-precision" values.  These values are characterised by
+   a sign bit, a 4-bit mantissa and a 3-bit exponent.  And are given
+   by:
+
+   (-1)^s * (n/16) * 2^r
+
+   Where:
+     's' is the sign bit.
+     'n' is an integer in the range 16 <= n <= 31.
+     'r' is an integer in the range -3 <= r <= 4.
+
+   Return true iff R represents a vale encodable into an AArch64 floating point
+   move instruction as an immediate.  Othewise false.  */
+
+static bool
+aarch64_real_float_const_representable_p (REAL_VALUE_TYPE r)
+{
+  /* This represents our current view of how many bits
+     make up the mantissa.  */
+  int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
+  int exponent;
+  unsigned HOST_WIDE_INT mantissa, mask;
+  REAL_VALUE_TYPE m;
+  bool fail = false;
+
+  /* We cannot represent infinities, NaNs or +/-zero.  We won't
+     know if we have +zero until we analyse the mantissa, but we
+     can reject the other invalid values.  */
+  if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
+      || REAL_VALUE_MINUS_ZERO (r))
+    return false;
+
+  /* Extract exponent.  */
+  r = real_value_abs (&r);
+  exponent = REAL_EXP (&r);
+
+  /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
+     highest (sign) bit, with a fixed binary point at bit point_pos.
+     m1 holds the low part of the mantissa, m2 the high part.
+     WARNING: If we ever have a representation using more than 2 * H_W_I - 1
+     bits for the mantissa, this can fail (low bits will be lost).  */
+  real_ldexp (&m, &r, point_pos - exponent);
+  wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
+
+  /* If the low part of the mantissa has bits set we cannot represent
+     the value.  */
+  if (fail || w.ulow () != 0)
+    return false;
+
+  /* We have rejected the lower HOST_WIDE_INT, so update our
+     understanding of how many bits lie in the mantissa and
+     look only at the high HOST_WIDE_INT.  */
+  mantissa = w.elt (1);
+  point_pos -= HOST_BITS_PER_WIDE_INT;
+
+  /* We can only represent values with a mantissa of the form 1.xxxx.  */
+  mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
+  if ((mantissa & mask) != 0)
+    return false;
+
+  /* Having filtered unrepresentable values, we may now remove all
+     but the highest 5 bits.  */
+  mantissa >>= point_pos - 5;
+
+  /* We cannot represent the value 0.0, so reject it.  This is handled
+     elsewhere.  */
+  if (mantissa == 0)
+    return false;
+
+  /* Then, as bit 4 is always set, we can mask it off, leaving
+     the mantissa in the range [0, 15].  */
+  mantissa &= ~(1 << 4);
+  gcc_assert (mantissa <= 15);
+
+  /* GCC internally does not use IEEE754-like encoding (where normalized
+     significands are in the range [1, 2).  GCC uses [0.5, 1) (see real.cc).
+     Our mantissa values are shifted 4 places to the left relative to
+     normalized IEEE754 so we must modify the exponent returned by REAL_EXP
+     by 5 places to correct for GCC's representation.  */
+  exponent = 5 - exponent;
+
+  return (exponent >= 0 && exponent <= 7);
+}
+
 /* Return true if OP is a valid SIMD immediate for the operation
    described by WHICH.  If INFO is nonnull, use it to describe valid
    immediates.  */
-bool
-aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info,
-			      enum simd_immediate_check which)
+static bool
+aarch64_simd_valid_imm (rtx op, simd_immediate_info *info,
+			enum simd_immediate_check which)
 {
   machine_mode mode = GET_MODE (op);
   unsigned int vec_flags = aarch64_classify_vector_mode (mode);
@@ -22987,14 +23572,10 @@ aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info,
   if (CONST_VECTOR_P (op)
       && CONST_VECTOR_DUPLICATE_P (op))
     n_elts = CONST_VECTOR_NPATTERNS (op);
-  else if ((vec_flags & VEC_SVE_DATA)
-	   && const_vec_series_p (op, &base, &step))
+  else if (which == AARCH64_CHECK_MOV
+	   && TARGET_SVE
+	   && aarch64_sve_index_series_p (op, &base, &step))
     {
-      gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
-      if (!aarch64_sve_index_immediate_p (base)
-	  || !aarch64_sve_index_immediate_p (step))
-	return false;
-
       if (info)
 	{
 	  /* Get the corresponding container mode.  E.g. an INDEX on V2SI
@@ -23012,20 +23593,6 @@ aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info,
   else
     return false;
 
-  scalar_float_mode elt_float_mode;
-  if (n_elts == 1
-      && is_a <scalar_float_mode> (elt_mode, &elt_float_mode))
-    {
-      rtx elt = CONST_VECTOR_ENCODED_ELT (op, 0);
-      if (aarch64_float_const_zero_rtx_p (elt)
-	  || aarch64_float_const_representable_p (elt))
-	{
-	  if (info)
-	    *info = simd_immediate_info (elt_float_mode, elt);
-	  return true;
-	}
-    }
-
   /* If all elements in an SVE vector have the same value, we have a free
      choice between using the element mode and using the container mode.
      Using the element mode means that unused parts of the vector are
@@ -23087,10 +23654,155 @@ aarch64_simd_valid_immediate (rtx op, simd_immediate_info *info,
     val64 |= ((unsigned HOST_WIDE_INT) bytes[i % nbytes]
 	      << (i * BITS_PER_UNIT));
 
+  /* Try encoding the integer immediate as a floating point value if it's an
+     exact value.  */
+  scalar_float_mode fmode = DFmode;
+  scalar_int_mode imode = DImode;
+  unsigned HOST_WIDE_INT ival = val64;
+  unsigned int val32 = val64 & 0xffffffff;
+  if (val32 == (val64 >> 32))
+    {
+      fmode = SFmode;
+      imode = SImode;
+      ival = val32;
+      unsigned int val16 = val32 & 0xffff;
+      if (val16 == (val32 >> 16))
+	{
+	  fmode = HFmode;
+	  imode = HImode;
+	  ival = val16;
+	  unsigned int val8 = val16 & 0xff;
+	  if (val8 == (val16 >> 8))
+	    {
+	      imode = QImode;
+	      ival = val8;
+	    }
+	}
+    }
+
+  if (which == AARCH64_CHECK_MOV
+      && imode != QImode
+      && (imode != HImode || TARGET_FP_F16INST))
+    {
+      long int as_long_ints[2];
+      as_long_ints[0] = ival & 0xFFFFFFFF;
+      as_long_ints[1] = (ival >> 32) & 0xFFFFFFFF;
+      if (imode == DImode && FLOAT_WORDS_BIG_ENDIAN)
+	std::swap (as_long_ints[0], as_long_ints[1]);
+
+      REAL_VALUE_TYPE r;
+      real_from_target (&r, as_long_ints, fmode);
+      if (aarch64_real_float_const_representable_p (r))
+	{
+	  if (info)
+	    {
+	      rtx float_val = const_double_from_real_value (r, fmode);
+	      *info = simd_immediate_info (fmode, float_val);
+	    }
+	  return true;
+	}
+    }
+
   if (vec_flags & VEC_SVE_DATA)
-    return aarch64_sve_valid_immediate (val64, info);
-  else
-    return aarch64_advsimd_valid_immediate (val64, info, which);
+    return aarch64_sve_valid_immediate (ival, imode, info, which);
+
+  if (aarch64_advsimd_valid_immediate (val64, imode, info, which))
+    return true;
+
+  if (TARGET_SVE)
+    return aarch64_sve_valid_immediate (ival, imode, info, which);
+  return false;
+}
+
+/* Try to optimize the expansion of a maskload or maskstore with
+   the operands in OPERANDS, given that the vector being loaded or
+   stored has mode MODE.  Return true on success or false if the normal
+   expansion should be used.  */
+
+bool
+aarch64_expand_maskloadstore (rtx *operands, machine_mode mode)
+{
+  /* If the predicate in operands[2] is a patterned SVE PTRUE predicate
+     with patterns VL1, VL2, VL4, VL8, or VL16 and at most the bottom
+     128 bits are loaded/stored, emit an ASIMD load/store.  */
+  int vl = aarch64_partial_ptrue_length (operands[2]);
+  int width = vl * GET_MODE_UNIT_BITSIZE (mode);
+  if (width <= 128
+      && pow2p_hwi (vl)
+      && (vl == 1
+	  || (!BYTES_BIG_ENDIAN
+	      && aarch64_classify_vector_mode (mode) == VEC_SVE_DATA)))
+    {
+      machine_mode new_mode;
+      if (known_eq (width, 128))
+	new_mode = V16QImode;
+      else if (known_eq (width, 64))
+	new_mode = V8QImode;
+      else
+	new_mode = int_mode_for_size (width, 0).require ();
+      aarch64_emit_load_store_through_mode (operands[0], operands[1],
+					    new_mode);
+      return true;
+    }
+  return false;
+}
+
+/* Return true if OP is a valid SIMD move immediate for SVE or AdvSIMD.  */
+bool
+aarch64_simd_valid_mov_imm (rtx op)
+{
+  return aarch64_simd_valid_imm (op, NULL, AARCH64_CHECK_MOV);
+}
+
+/* Return true if OP is a valid SIMD orr immediate for SVE or AdvSIMD.  */
+bool
+aarch64_simd_valid_orr_imm (rtx op)
+{
+  return aarch64_simd_valid_imm (op, NULL, AARCH64_CHECK_ORR);
+}
+
+/* Return true if OP is a valid SIMD and immediate for SVE or AdvSIMD.  */
+bool
+aarch64_simd_valid_and_imm (rtx op)
+{
+  return aarch64_simd_valid_imm (op, NULL, AARCH64_CHECK_AND);
+}
+
+/* Return true if OP is a valid SIMD and immediate which allows the and to be
+   optimized as fmov.  If ELT_BITSIZE is nonnull, use it to return the number of
+   bits to move.  */
+bool
+aarch64_simd_valid_and_imm_fmov (rtx op, unsigned int *elt_bitsize)
+{
+  machine_mode mode = GET_MODE (op);
+  gcc_assert (!aarch64_sve_mode_p (mode));
+
+  auto_vec<target_unit, 16> buffer;
+  unsigned int n_bytes = GET_MODE_SIZE (mode).to_constant ();
+  buffer.reserve (n_bytes);
+
+  bool ok = native_encode_rtx (mode, op, buffer, 0, n_bytes);
+  gcc_assert (ok);
+
+  auto mask = native_decode_int (buffer, 0, n_bytes, n_bytes * BITS_PER_UNIT);
+  int set_bit = wi::exact_log2 (mask + 1);
+  if ((set_bit == 16 && TARGET_SIMD_F16INST)
+      || set_bit == 32
+      || set_bit == 64)
+    {
+      if (elt_bitsize)
+	*elt_bitsize = set_bit;
+      return true;
+    }
+
+  return false;
+}
+
+/* Return true if OP is a valid SIMD xor immediate for SVE.  */
+bool
+aarch64_simd_valid_xor_imm (rtx op)
+{
+  return aarch64_simd_valid_imm (op, NULL, AARCH64_CHECK_XOR);
 }
 
 /* Check whether X is a VEC_SERIES-like constant that starts at 0 and
@@ -23121,6 +23833,19 @@ aarch64_simd_shift_imm_p (rtx x, machine_mode mode, bool left)
     return IN_RANGE (INTVAL (x), 1, bit_width);
 }
 
+
+/* Check whether X can control SVE mode MODE.  */
+bool
+aarch64_sve_valid_pred_p (rtx x, machine_mode mode)
+{
+  machine_mode pred_mode = GET_MODE (x);
+  if (!aarch64_sve_pred_mode_p (pred_mode))
+    return false;
+
+  return known_ge (GET_MODE_NUNITS (pred_mode),
+		   GET_MODE_NUNITS (mode));
+}
+
 /* Return the bitmask CONST_INT to select the bits required by a zero extract
    operation of width WIDTH at bit position POS.  */
 
@@ -23156,7 +23881,7 @@ aarch64_mov_operand_p (rtx x, machine_mode mode)
 	  && GET_MODE (x) != VNx16BImode)
 	return false;
 
-      return aarch64_simd_valid_immediate (x, NULL);
+      return aarch64_simd_valid_mov_imm (x);
     }
 
   /* Remove UNSPEC_SALT_ADDR before checking symbol reference.  */
@@ -23257,7 +23982,7 @@ aarch64_simd_scalar_immediate_valid_for_move (rtx op, scalar_int_mode mode)
 
   vmode = aarch64_simd_container_mode (mode, 64);
   rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
-  return aarch64_simd_valid_immediate (op_v, NULL);
+  return aarch64_simd_valid_mov_imm (op_v);
 }
 
 /* Construct and return a PARALLEL RTX vector with elements numbering the
@@ -23379,6 +24104,16 @@ aarch64_strided_registers_p (rtx *operands, unsigned int num_operands,
   return true;
 }
 
+/* Return the base 2 logarithm of the bit inverse of OP masked by the lowest
+   NELTS bits, if OP is a power of 2.  Otherwise, returns -1.  */
+
+int
+aarch64_exact_log2_inverse (unsigned int nelts, rtx op)
+{
+  return exact_log2 ((~INTVAL (op))
+		     & ((HOST_WIDE_INT_1U << nelts) - 1));
+}
+
 /* Bounds-check lanes.  Ensure OPERAND lies between LOW (inclusive) and
    HIGH (exclusive).  */
 void
@@ -23737,7 +24472,7 @@ aarch64_simd_make_constant (rtx vals)
     gcc_unreachable ();
 
   if (const_vec != NULL_RTX
-      && aarch64_simd_valid_immediate (const_vec, NULL))
+      && aarch64_simd_valid_mov_imm (const_vec))
     /* Load using MOVI/MVNI.  */
     return const_vec;
   else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
@@ -23753,6 +24488,85 @@ aarch64_simd_make_constant (rtx vals)
     return NULL_RTX;
 }
 
+/* VALS is a PARALLEL rtx that contains element values for a vector of
+   mode MODE.  Return a constant that contains all the CONST_INT and
+   CONST_DOUBLE elements of VALS, using any convenient values for the
+   other elements.  */
+
+static rtx
+aarch64_choose_vector_init_constant (machine_mode mode, rtx vals)
+{
+  unsigned int n_elts = XVECLEN (vals, 0);
+
+  /* We really don't care what goes into the parts we will overwrite, but we're
+     more likely to be able to load the constant efficiently if it has fewer,
+     larger, repeating parts (see aarch64_simd_valid_imm).  */
+  rtvec copy = shallow_copy_rtvec (XVEC (vals, 0));
+  for (unsigned int i = 0; i < n_elts; ++i)
+    {
+      rtx x = RTVEC_ELT (copy, i);
+      if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
+	continue;
+      /* This is effectively a bit-reversed increment, e.g.: 8, 4, 12,
+	 2, 10, 6, 12, ... for n_elts == 16.  The early break makes the
+	 outer "i" loop O(n_elts * log(n_elts)).  */
+      unsigned int j = 0;
+      for (;;)
+	{
+	  for (unsigned int bit = n_elts / 2; bit > 0; bit /= 2)
+	    {
+	      j ^= bit;
+	      if (j & bit)
+		break;
+	    }
+	  rtx test = XVECEXP (vals, 0, i ^ j);
+	  if (CONST_INT_P (test) || CONST_DOUBLE_P (test))
+	    {
+	      RTVEC_ELT (copy, i) = test;
+	      break;
+	    }
+	  gcc_assert (j != 0);
+	}
+    }
+
+  rtx c = gen_rtx_CONST_VECTOR (mode, copy);
+  if (aarch64_simd_valid_mov_imm (c))
+    return c;
+
+  /* Try generating a stepped sequence.  */
+  if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+    for (unsigned int i = 0; i < n_elts; ++i)
+      if (CONST_INT_P (XVECEXP (vals, 0, i)))
+	{
+	  auto base = UINTVAL (XVECEXP (vals, 0, i));
+	  for (unsigned int j = i + 1; j < n_elts; ++j)
+	    if (CONST_INT_P (XVECEXP (vals, 0, j)))
+	      {
+		/* It doesn't matter whether this division is exact.
+		   All that matters is whether the constant we produce
+		   is valid.  */
+		HOST_WIDE_INT diff = UINTVAL (XVECEXP (vals, 0, j)) - base;
+		unsigned HOST_WIDE_INT step = diff / int (j - i);
+		rtx_vector_builder builder (mode, n_elts, 1);
+		for (unsigned int k = 0; k < n_elts; ++k)
+		  {
+		    rtx x = XVECEXP (vals, 0, k);
+		    if (!CONST_INT_P (x))
+		      x = gen_int_mode (int (k - i) * step + base,
+					GET_MODE_INNER (mode));
+		    builder.quick_push (x);
+		  }
+		rtx step_c = builder.build ();
+		if (aarch64_simd_valid_mov_imm (step_c))
+		  return step_c;
+		break;
+	      }
+	  break;
+	}
+
+  return c;
+}
+
 /* A subroutine of aarch64_expand_vector_init, with the same interface.
    The caller has already tried a divide-and-conquer approach, so do
    not consider that case here.  */
@@ -23766,7 +24580,6 @@ aarch64_expand_vector_init_fallback (rtx target, rtx vals)
   int n_elts = XVECLEN (vals, 0);
   /* The number of vector elements which are not constant.  */
   int n_var = 0;
-  rtx any_const = NULL_RTX;
   /* The first element of vals.  */
   rtx v0 = XVECEXP (vals, 0, 0);
   bool all_same = true;
@@ -23792,8 +24605,6 @@ aarch64_expand_vector_init_fallback (rtx target, rtx vals)
       rtx x = XVECEXP (vals, 0, i);
       if (!(CONST_INT_P (x) || CONST_DOUBLE_P (x)))
 	++n_var;
-      else
-	any_const = x;
 
       all_same &= rtx_equal_p (x, v0);
     }
@@ -23937,31 +24748,9 @@ aarch64_expand_vector_init_fallback (rtx target, rtx vals)
      can.  */
   if (n_var != n_elts)
     {
-      rtx copy = copy_rtx (vals);
-
-      /* Load constant part of vector.  We really don't care what goes into the
-	 parts we will overwrite, but we're more likely to be able to load the
-	 constant efficiently if it has fewer, larger, repeating parts
-	 (see aarch64_simd_valid_immediate).  */
-      for (int i = 0; i < n_elts; i++)
-	{
-	  rtx x = XVECEXP (vals, 0, i);
-	  if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
-	    continue;
-	  rtx subst = any_const;
-	  for (int bit = n_elts / 2; bit > 0; bit /= 2)
-	    {
-	      /* Look in the copied vector, as more elements are const.  */
-	      rtx test = XVECEXP (copy, 0, i ^ bit);
-	      if (CONST_INT_P (test) || CONST_DOUBLE_P (test))
-		{
-		  subst = test;
-		  break;
-		}
-	    }
-	  XVECEXP (copy, 0, i) = subst;
-	}
-      aarch64_expand_vector_init_fallback (target, copy);
+      /* Load the constant part of the vector.  */
+      rtx constant = aarch64_choose_vector_init_constant (mode, vals);
+      emit_move_insn (target, constant);
     }
 
   /* Insert the variable lanes directly.  */
@@ -24030,6 +24819,28 @@ seq_cost_ignoring_scalar_moves (const rtx_insn *seq, bool speed)
   return cost;
 }
 
+/* *VECTOR is an Advanced SIMD structure mode and *INDEX is a constant index
+   into it.  Narrow *VECTOR and *INDEX so that they reference a single vector
+   of mode SUBVEC_MODE.  IS_DEST is true if *VECTOR is a destination operand,
+   false if it is a source operand.  */
+
+void
+aarch64_decompose_vec_struct_index (machine_mode subvec_mode,
+				    rtx *vector, rtx *index, bool is_dest)
+{
+  auto elts_per_vector = GET_MODE_NUNITS (subvec_mode).to_constant ();
+  auto subvec = UINTVAL (*index) / elts_per_vector;
+  auto subelt = UINTVAL (*index) % elts_per_vector;
+  auto subvec_byte = subvec * GET_MODE_SIZE (subvec_mode);
+  if (is_dest)
+    *vector = simplify_gen_subreg (subvec_mode, *vector, GET_MODE (*vector),
+				   subvec_byte);
+  else
+    *vector = force_subreg (subvec_mode, *vector, GET_MODE (*vector),
+			    subvec_byte);
+  *index = gen_int_mode (subelt, SImode);
+}
+
 /* Expand a vector initialization sequence, such that TARGET is
    initialized to contain VALS.  */
 
@@ -24063,12 +24874,18 @@ aarch64_expand_vector_init (rtx target, rtx vals)
       rtx tmp_reg = gen_reg_rtx (GET_MODE (new_vals));
       aarch64_expand_vector_init (tmp_reg, new_vals);
       halves[i] = gen_rtx_SUBREG (mode, tmp_reg, 0);
-      rtx_insn *rec_seq = get_insns ();
-      end_sequence ();
+      rtx_insn *rec_seq = end_sequence ();
       costs[i] = seq_cost_ignoring_scalar_moves (rec_seq, !optimize_size);
       emit_insn (rec_seq);
     }
 
+  /* The two halves should (by induction) be individually endian-correct.
+     However, in the memory layout provided by VALS, the nth element of
+     HALVES[0] comes immediately before the nth element HALVES[1].
+     This means that, on big-endian targets, the nth element of HALVES[0]
+     is more significant than the nth element HALVES[1].  */
+  if (BYTES_BIG_ENDIAN)
+    std::swap (halves[0], halves[1]);
   rtvec v = gen_rtvec (2, halves[0], halves[1]);
   rtx_insn *zip1_insn
     = emit_set_insn (target, gen_rtx_UNSPEC (mode, v, UNSPEC_ZIP1));
@@ -24076,8 +24893,7 @@ aarch64_expand_vector_init (rtx target, rtx vals)
     = (!optimize_size) ? std::max (costs[0], costs[1]) : costs[0] + costs[1];
   seq_total_cost += insn_cost (zip1_insn, !optimize_size);
 
-  rtx_insn *seq = get_insns ();
-  end_sequence ();
+  rtx_insn *seq = end_sequence ();
 
   start_sequence ();
   aarch64_expand_vector_init_fallback (target, vals);
@@ -24358,6 +25174,54 @@ aarch64_sve_expand_vector_init (rtx target, rtx vals)
     aarch64_sve_expand_vector_init_insert_elems (target, v, nelts);
 }
 
+/* Initialize register TARGET from the two vector subelements in PARALLEL
+   rtx VALS.  */
+
+void
+aarch64_sve_expand_vector_init_subvector (rtx target, rtx vals)
+{
+  machine_mode mode = GET_MODE (target);
+  int nelts = XVECLEN (vals, 0);
+
+  gcc_assert (nelts % 2 == 0);
+
+  /* We have to be concatting vector.  */
+  machine_mode elem_mode = GET_MODE (XVECEXP (vals, 0, 0));
+  gcc_assert (VECTOR_MODE_P (elem_mode));
+
+  auto_vec<rtx> worklist;
+  machine_mode wider_mode = elem_mode;
+
+  for (int i = 0; i < nelts; i++)
+    worklist.safe_push (force_reg (elem_mode, XVECEXP (vals, 0, i)));
+
+  /* Keep widening pairwise to have maximum throughput.  */
+  while (nelts >= 2)
+    {
+      wider_mode
+	= related_vector_mode (wider_mode, GET_MODE_INNER (wider_mode),
+			       GET_MODE_NUNITS (wider_mode) * 2).require ();
+
+      for (int i = 0; i < nelts; i += 2)
+	{
+	  rtx arg0 = worklist[i];
+	  rtx arg1 = worklist[i+1];
+	  gcc_assert (GET_MODE (arg0) == GET_MODE (arg1));
+
+	  rtx tmp = gen_reg_rtx (wider_mode);
+	  emit_insn (gen_aarch64_pack_partial (wider_mode, tmp, arg0, arg1));
+	  worklist[i / 2] = tmp;
+	}
+
+      nelts /= 2;
+    }
+
+  gcc_assert (wider_mode == mode);
+  emit_move_insn (target, worklist[0]);
+
+  return;
+}
+
 /* Check whether VALUE is a vector constant in which every element
    is either a power of 2 or a negated power of 2.  If so, return
    a constant vector of log2s, and flip CODE between PLUS and MINUS
@@ -24519,16 +25383,12 @@ aarch64_declare_function_name (FILE *stream, const char* name,
     targ_options = TREE_TARGET_OPTION (target_option_current_node);
   gcc_assert (targ_options);
 
-  const struct processor *this_arch
-    = aarch64_get_arch (targ_options->x_selected_arch);
-
   auto isa_flags = aarch64_get_asm_isa_flags (targ_options);
-  std::string extension
-    = aarch64_get_extension_string_for_isa_flags (isa_flags,
-						  this_arch->flags);
+  aarch64_arch arch = targ_options->x_selected_arch;
+  std::string to_print
+    = aarch64_get_arch_string_for_assembler (arch, isa_flags);
   /* Only update the assembler .arch string if it is distinct from the last
      such string we printed.  */
-  std::string to_print = this_arch->name + extension;
   if (to_print != aarch64_last_printed_arch_string)
     {
       asm_fprintf (asm_out_file, "\t.arch %s\n", to_print.c_str ());
@@ -24650,23 +25510,19 @@ aarch64_start_file (void)
   struct cl_target_option *default_options
     = TREE_TARGET_OPTION (target_option_default_node);
 
-  const struct processor *default_arch
-    = aarch64_get_arch (default_options->x_selected_arch);
+  aarch64_arch default_arch = default_options->x_selected_arch;
   auto default_isa_flags = aarch64_get_asm_isa_flags (default_options);
-  std::string extension
-    = aarch64_get_extension_string_for_isa_flags (default_isa_flags,
-						  default_arch->flags);
-
-   aarch64_last_printed_arch_string = default_arch->name + extension;
-   aarch64_last_printed_tune_string = "";
-   asm_fprintf (asm_out_file, "\t.arch %s\n",
-		aarch64_last_printed_arch_string.c_str ());
+  std::string arch_string
+    = aarch64_get_arch_string_for_assembler (default_arch, default_isa_flags);
+  aarch64_last_printed_arch_string = arch_string;
+  aarch64_last_printed_tune_string = "";
+  asm_fprintf (asm_out_file, "\t.arch %s\n",
+	       arch_string.c_str ());
 
-   default_file_start ();
+  default_file_start ();
 }
 
 /* Emit load exclusive.  */
-
 static void
 aarch64_emit_load_exclusive (machine_mode mode, rtx rval,
 			     rtx mem, rtx model_rtx)
@@ -25093,109 +25949,55 @@ aarch64_c_mode_for_suffix (char suffix)
   return VOIDmode;
 }
 
-/* We can only represent floating point constants which will fit in
-   "quarter-precision" values.  These values are characterised by
-   a sign bit, a 4-bit mantissa and a 3-bit exponent.  And are given
-   by:
-
-   (-1)^s * (n/16) * 2^r
-
-   Where:
-     's' is the sign bit.
-     'n' is an integer in the range 16 <= n <= 31.
-     'r' is an integer in the range -3 <= r <= 4.  */
-
-/* Return true iff X can be represented by a quarter-precision
+/* Return true iff X with mode MODE can be represented by a quarter-precision
    floating point immediate operand X.  Note, we cannot represent 0.0.  */
+
 bool
 aarch64_float_const_representable_p (rtx x)
 {
-  /* This represents our current view of how many bits
-     make up the mantissa.  */
-  int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
-  int exponent;
-  unsigned HOST_WIDE_INT mantissa, mask;
-  REAL_VALUE_TYPE r, m;
-  bool fail;
-
   x = unwrap_const_vec_duplicate (x);
-  if (!CONST_DOUBLE_P (x))
-    return false;
+  machine_mode mode = GET_MODE (x);
 
-  if (GET_MODE (x) == VOIDmode
-      || (GET_MODE (x) == HFmode && !TARGET_FP_F16INST))
+  if (DECIMAL_FLOAT_MODE_P (mode))
     return false;
 
-  r = *CONST_DOUBLE_REAL_VALUE (x);
-
-  /* We cannot represent infinities, NaNs or +/-zero.  We won't
-     know if we have +zero until we analyse the mantissa, but we
-     can reject the other invalid values.  */
-  if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r)
-      || REAL_VALUE_MINUS_ZERO (r))
+  if (!CONST_DOUBLE_P (x))
     return false;
 
-  /* For BFmode, only handle 0.0. */
-  if (GET_MODE (x) == BFmode)
-    return real_iszero (&r, false);
-
-  /* Extract exponent.  */
-  r = real_value_abs (&r);
-  exponent = REAL_EXP (&r);
-
-  /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
-     highest (sign) bit, with a fixed binary point at bit point_pos.
-     m1 holds the low part of the mantissa, m2 the high part.
-     WARNING: If we ever have a representation using more than 2 * H_W_I - 1
-     bits for the mantissa, this can fail (low bits will be lost).  */
-  real_ldexp (&m, &r, point_pos - exponent);
-  wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
-
-  /* If the low part of the mantissa has bits set we cannot represent
-     the value.  */
-  if (w.ulow () != 0)
+  if ((mode == HFmode && !TARGET_FP_F16INST)
+      || mode == BFmode)
     return false;
-  /* We have rejected the lower HOST_WIDE_INT, so update our
-     understanding of how many bits lie in the mantissa and
-     look only at the high HOST_WIDE_INT.  */
-  mantissa = w.elt (1);
-  point_pos -= HOST_BITS_PER_WIDE_INT;
 
-  /* We can only represent values with a mantissa of the form 1.xxxx.  */
-  mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
-  if ((mantissa & mask) != 0)
-    return false;
+  REAL_VALUE_TYPE r = *CONST_DOUBLE_REAL_VALUE (x);
 
-  /* Having filtered unrepresentable values, we may now remove all
-     but the highest 5 bits.  */
-  mantissa >>= point_pos - 5;
+  return aarch64_real_float_const_representable_p (r);
+}
 
-  /* We cannot represent the value 0.0, so reject it.  This is handled
-     elsewhere.  */
-  if (mantissa == 0)
-    return false;
+/* Returns the string with the fmov instruction which is equivalent to an and
+   instruction with the SIMD immediate CONST_VECTOR.  */
+char*
+aarch64_output_fmov (rtx const_vector)
+{
+  bool is_valid;
+  static char templ[40];
+  char element_char;
+  unsigned int elt_bitsize;
 
-  /* Then, as bit 4 is always set, we can mask it off, leaving
-     the mantissa in the range [0, 15].  */
-  mantissa &= ~(1 << 4);
-  gcc_assert (mantissa <= 15);
+  is_valid = aarch64_simd_valid_and_imm_fmov (const_vector, &elt_bitsize);
+  gcc_assert (is_valid);
 
-  /* GCC internally does not use IEEE754-like encoding (where normalized
-     significands are in the range [1, 2).  GCC uses [0.5, 1) (see real.cc).
-     Our mantissa values are shifted 4 places to the left relative to
-     normalized IEEE754 so we must modify the exponent returned by REAL_EXP
-     by 5 places to correct for GCC's representation.  */
-  exponent = 5 - exponent;
+  element_char = sizetochar (elt_bitsize);
+  snprintf (templ, sizeof (templ), "fmov\t%%%c0, %%%c1", element_char,
+	    element_char);
 
-  return (exponent >= 0 && exponent <= 7);
+  return templ;
 }
 
-/* Returns the string with the instruction for AdvSIMD MOVI, MVNI, ORR or BIC
-   immediate with a CONST_VECTOR of MODE and WIDTH.  WHICH selects whether to
-   output MOVI/MVNI, ORR or BIC immediate.  */
+/* Returns the string with the instruction for the SIMD immediate
+ * CONST_VECTOR of MODE and WIDTH.  WHICH selects a move, and(bic) or orr.  */
 char*
-aarch64_output_simd_mov_immediate (rtx const_vector, unsigned width,
-				   enum simd_immediate_check which)
+aarch64_output_simd_imm (rtx const_vector, unsigned width,
+			 enum simd_immediate_check which)
 {
   bool is_valid;
   static char templ[40];
@@ -25206,11 +26008,7 @@ aarch64_output_simd_mov_immediate (rtx const_vector, unsigned width,
 
   struct simd_immediate_info info;
 
-  /* This will return true to show const_vector is legal for use as either
-     a AdvSIMD MOVI instruction (or, implicitly, MVNI), ORR or BIC immediate.
-     It will also update INFO to show how the immediate should be generated.
-     WHICH selects whether to check for MOVI/MVNI, ORR or BIC.  */
-  is_valid = aarch64_simd_valid_immediate (const_vector, &info, which);
+  is_valid = aarch64_simd_valid_imm (const_vector, &info, which);
   gcc_assert (is_valid);
 
   element_char = sizetochar (GET_MODE_BITSIZE (info.elt_mode));
@@ -25245,6 +26043,24 @@ aarch64_output_simd_mov_immediate (rtx const_vector, unsigned width,
 
   if (which == AARCH64_CHECK_MOV)
     {
+      if (info.insn == simd_immediate_info::INDEX)
+	{
+	  gcc_assert (TARGET_SVE);
+	  snprintf (templ, sizeof (templ), "index\t%%Z0.%c, #"
+		    HOST_WIDE_INT_PRINT_DEC ", #" HOST_WIDE_INT_PRINT_DEC,
+		    element_char, INTVAL (info.u.index.base),
+		    INTVAL (info.u.index.step));
+	  return templ;
+	}
+
+      if (info.insn == simd_immediate_info::SVE_MOV)
+	{
+	  gcc_assert (TARGET_SVE);
+	  snprintf (templ, sizeof (templ), "mov\t%%Z0.%c, #" HOST_WIDE_INT_PRINT_DEC,
+		    element_char, INTVAL (info.u.mov.value));
+	  return templ;
+	}
+
       mnemonic = info.insn == simd_immediate_info::MVN ? "mvni" : "movi";
       shift_op = (info.u.mov.modifier == simd_immediate_info::MSL
 		  ? "msl" : "lsl");
@@ -25263,9 +26079,21 @@ aarch64_output_simd_mov_immediate (rtx const_vector, unsigned width,
     }
   else
     {
-      /* For AARCH64_CHECK_BIC and AARCH64_CHECK_ORR.  */
-      mnemonic = info.insn == simd_immediate_info::MVN ? "bic" : "orr";
-      if (info.u.mov.shift)
+      /* AARCH64_CHECK_ORR, AARCH64_CHECK_AND or AARCH64_CHECK_XOR.  */
+      mnemonic = "orr";
+      if (which == AARCH64_CHECK_AND)
+	mnemonic = info.insn == simd_immediate_info::MVN ? "bic" : "and";
+      else if (which == AARCH64_CHECK_XOR)
+	mnemonic = "eor";
+
+      if (info.insn == simd_immediate_info::SVE_MOV)
+	{
+	  gcc_assert (TARGET_SVE);
+	  snprintf (templ, sizeof (templ), "%s\t%%Z0.%c, %%Z0.%c, "
+		    HOST_WIDE_INT_PRINT_DEC, mnemonic, element_char,
+		    element_char, INTVAL (info.u.mov.value));
+	}
+      else if (info.u.mov.shift)
 	snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, #"
 		  HOST_WIDE_INT_PRINT_DEC ", %s #%d", mnemonic, lane_count,
 		  element_char, UINTVAL (info.u.mov.value), "lsl",
@@ -25278,6 +26106,38 @@ aarch64_output_simd_mov_immediate (rtx const_vector, unsigned width,
   return templ;
 }
 
+/* Returns the string with the ORR instruction for the SIMD immediate
+   CONST_VECTOR of WIDTH bits.  */
+char*
+aarch64_output_simd_orr_imm (rtx const_vector, unsigned width)
+{
+  return aarch64_output_simd_imm (const_vector, width, AARCH64_CHECK_ORR);
+}
+
+/* Returns the string with the AND/BIC instruction for the SIMD immediate
+   CONST_VECTOR of WIDTH bits.  */
+char*
+aarch64_output_simd_and_imm (rtx const_vector, unsigned width)
+{
+  return aarch64_output_simd_imm (const_vector, width, AARCH64_CHECK_AND);
+}
+
+/* Returns the string with the EOR instruction for the SIMD immediate
+   CONST_VECTOR of WIDTH bits.  */
+char*
+aarch64_output_simd_xor_imm (rtx const_vector, unsigned width)
+{
+  return aarch64_output_simd_imm (const_vector, width, AARCH64_CHECK_XOR);
+}
+
+/* Returns the string with the MOV instruction for the SIMD immediate
+   CONST_VECTOR of WIDTH bits.  */
+char*
+aarch64_output_simd_mov_imm (rtx const_vector, unsigned width)
+{
+  return aarch64_output_simd_imm (const_vector, width, AARCH64_CHECK_MOV);
+}
+
 char*
 aarch64_output_scalar_simd_mov_immediate (rtx immediate, scalar_int_mode mode)
 {
@@ -25299,7 +26159,7 @@ aarch64_output_scalar_simd_mov_immediate (rtx immediate, scalar_int_mode mode)
 
   vmode = aarch64_simd_container_mode (mode, width);
   rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
-  return aarch64_output_simd_mov_immediate (v_op, width);
+  return aarch64_output_simd_mov_imm (v_op, width);
 }
 
 /* Return the output string to use for moving immediate CONST_VECTOR
@@ -25311,8 +26171,9 @@ aarch64_output_sve_mov_immediate (rtx const_vector)
   static char templ[40];
   struct simd_immediate_info info;
   char element_char;
+  bool is_valid;
 
-  bool is_valid = aarch64_simd_valid_immediate (const_vector, &info);
+  is_valid = aarch64_simd_valid_imm (const_vector, &info, AARCH64_CHECK_MOV);
   gcc_assert (is_valid);
 
   element_char = sizetochar (GET_MODE_BITSIZE (info.elt_mode));
@@ -25368,8 +26229,11 @@ aarch64_output_sve_mov_immediate (rtx const_vector)
 	}
     }
 
-  snprintf (templ, sizeof (templ), "mov\t%%0.%c, #" HOST_WIDE_INT_PRINT_DEC,
-	    element_char, INTVAL (info.u.mov.value));
+  if (info.u.mov.value == const0_rtx && TARGET_NON_STREAMING)
+    snprintf (templ, sizeof (templ), "movi\t%%d0, #0");
+  else
+    snprintf (templ, sizeof (templ), "mov\t%%0.%c, #" HOST_WIDE_INT_PRINT_DEC,
+	      element_char, INTVAL (info.u.mov.value));
   return templ;
 }
 
@@ -25381,9 +26245,10 @@ char *
 aarch64_output_sve_ptrues (rtx const_unspec)
 {
   static char templ[40];
-
   struct simd_immediate_info info;
-  bool is_valid = aarch64_simd_valid_immediate (const_unspec, &info);
+  bool is_valid;
+
+  is_valid = aarch64_simd_valid_imm (const_unspec, &info, AARCH64_CHECK_MOV);
   gcc_assert (is_valid && info.insn == simd_immediate_info::PTRUE);
 
   char element_char = sizetochar (GET_MODE_BITSIZE (info.elt_mode));
@@ -25653,26 +26518,23 @@ aarch64_evpc_reencode (struct expand_vec_perm_d *d)
 {
   expand_vec_perm_d newd;
 
-  if (d->vec_flags != VEC_ADVSIMD)
+  /* The subregs that we'd create are not supported for big-endian SVE;
+     see aarch64_modes_compatible_p for details.  */
+  if (BYTES_BIG_ENDIAN && (d->vec_flags & VEC_ANY_SVE))
     return false;
 
   /* Get the new mode.  Always twice the size of the inner
      and half the elements.  */
-  poly_uint64 vec_bits = GET_MODE_BITSIZE (d->vmode);
-  unsigned int new_elt_bits = GET_MODE_UNIT_BITSIZE (d->vmode) * 2;
-  auto new_elt_mode = int_mode_for_size (new_elt_bits, false).require ();
-  machine_mode new_mode = aarch64_simd_container_mode (new_elt_mode, vec_bits);
-
-  if (new_mode == word_mode)
+  machine_mode new_mode;
+  if (!aarch64_coalesce_units (d->vmode, 2).exists (&new_mode))
     return false;
 
   vec_perm_indices newpermindices;
-
   if (!newpermindices.new_shrunk_vector (d->perm, 2))
     return false;
 
   newd.vmode = new_mode;
-  newd.vec_flags = VEC_ADVSIMD;
+  newd.vec_flags = d->vec_flags;
   newd.op_mode = newd.vmode;
   newd.op_vec_flags = newd.vec_flags;
   newd.target = d->target ? gen_lowpart (new_mode, d->target) : NULL;
@@ -25680,6 +26542,8 @@ aarch64_evpc_reencode (struct expand_vec_perm_d *d)
   newd.op1 = d->op1 ? gen_lowpart (new_mode, d->op1) : NULL;
   newd.testing_p = d->testing_p;
   newd.one_vector_p = d->one_vector_p;
+  newd.zero_op0_p = d->zero_op0_p;
+  newd.zero_op1_p = d->zero_op1_p;
 
   newd.perm.new_vector (newpermindices.encoding (), newd.one_vector_p ? 1 : 2,
 			newpermindices.nelts_per_input ());
@@ -25921,12 +26785,112 @@ aarch64_evpc_dup (struct expand_vec_perm_d *d)
   in0 = d->op0;
   lane = GEN_INT (elt); /* The pattern corrects for big-endian.  */
 
-  rtx parallel = gen_rtx_PARALLEL (vmode, gen_rtvec (1, lane));
+  rtx parallel = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, lane));
   rtx select = gen_rtx_VEC_SELECT (GET_MODE_INNER (vmode), in0, parallel);
   emit_set_insn (out, gen_rtx_VEC_DUPLICATE (vmode, select));
   return true;
 }
 
+/* Recognize things that can be done using the SVE2p1 Hybrid-VLA
+   permutations, which apply Advanced-SIMD-style permutations to each
+   individual 128-bit block.  */
+
+static bool
+aarch64_evpc_hvla (struct expand_vec_perm_d *d)
+{
+  machine_mode vmode = d->vmode;
+  if (!TARGET_SVE2p1
+      || !TARGET_NON_STREAMING
+      || d->vec_flags != VEC_SVE_DATA
+      || GET_MODE_UNIT_BITSIZE (vmode) > 64)
+    return false;
+
+  /* Set SUBELTS to the number of elements in an Advanced SIMD vector
+     and make sure that adding SUBELTS to each block of SUBELTS indices
+     gives the next block of SUBELTS indices.  That is, it must be possible
+     to interpret the index vector as SUBELTS interleaved linear series in
+     which each series has step SUBELTS.  */
+  unsigned int subelts = 128U / GET_MODE_UNIT_BITSIZE (vmode);
+  unsigned int pairs = subelts / 2;
+  for (unsigned int i = 0; i < subelts; ++i)
+    if (!d->perm.series_p (i, subelts, d->perm[i], subelts))
+      return false;
+
+  /* Used once we have verified that we can use UNSPEC to do the operation.  */
+  auto use_binary = [&](int unspec) -> bool
+    {
+      if (!d->testing_p)
+	{
+	  rtvec vec = gen_rtvec (2, d->op0, d->op1);
+	  emit_set_insn (d->target, gen_rtx_UNSPEC (vmode, vec, unspec));
+	}
+      return true;
+    };
+
+  /* Now check whether the first SUBELTS elements match a supported
+     Advanced-SIMD-style operation.  */
+  poly_int64 first = d->perm[0];
+  poly_int64 nelt = d->perm.length ();
+  auto try_zip = [&]() -> bool
+    {
+      if (maybe_ne (first, 0) && maybe_ne (first, pairs))
+	return false;
+      for (unsigned int i = 0; i < pairs; ++i)
+	if (maybe_ne (d->perm[i * 2], first + i)
+	    || maybe_ne (d->perm[i * 2 + 1], first + nelt + i))
+	  return false;
+      return use_binary (maybe_ne (first, 0) ? UNSPEC_ZIPQ2 : UNSPEC_ZIPQ1);
+    };
+  auto try_uzp = [&]() -> bool
+    {
+      if (maybe_ne (first, 0) && maybe_ne (first, 1))
+	return false;
+      for (unsigned int i = 0; i < pairs; ++i)
+	if (maybe_ne (d->perm[i], first + i * 2)
+	    || maybe_ne (d->perm[i + pairs], first + nelt + i * 2))
+	  return false;
+      return use_binary (maybe_ne (first, 0) ? UNSPEC_UZPQ2 : UNSPEC_UZPQ1);
+    };
+  auto try_extq = [&]() -> bool
+    {
+      HOST_WIDE_INT start;
+      if (!first.is_constant (&start) || !IN_RANGE (start, 0, subelts - 1))
+	return false;
+      for (unsigned int i = 0; i < subelts; ++i)
+	{
+	  poly_int64 next = (start + i >= subelts
+			     ? start + i - subelts + nelt
+			     : start + i);
+	  if (maybe_ne (d->perm[i], next))
+	    return false;
+	}
+      if (!d->testing_p)
+	{
+	  rtx op2 = gen_int_mode (start, SImode);
+	  emit_insn (gen_aarch64_sve_extq (vmode, d->target,
+					   d->op0, d->op1, op2));
+	}
+      return true;
+    };
+  auto try_dupq = [&]() -> bool
+    {
+      HOST_WIDE_INT start;
+      if (!first.is_constant (&start) || !IN_RANGE (start, 0, subelts - 1))
+	return false;
+      for (unsigned int i = 0; i < subelts; ++i)
+	if (maybe_ne (d->perm[i], start))
+	  return false;
+      if (!d->testing_p)
+	{
+	  rtx op1 = gen_int_mode (start, SImode);
+	  emit_insn (gen_aarch64_sve_dupq (vmode, d->target, d->op0, op1));
+	}
+      return true;
+    };
+
+  return try_zip () || try_uzp () || try_extq () || try_dupq ();
+}
+
 static bool
 aarch64_evpc_tbl (struct expand_vec_perm_d *d)
 {
@@ -25996,12 +26960,23 @@ aarch64_evpc_tbl (struct expand_vec_perm_d *d)
 static bool
 aarch64_evpc_sve_tbl (struct expand_vec_perm_d *d)
 {
-  unsigned HOST_WIDE_INT nelt;
+  if (!d->one_vector_p)
+    {
+      /* aarch64_expand_sve_vec_perm does not yet handle variable-length
+	 vectors.  */
+      if (!d->perm.length ().is_constant ())
+	return false;
 
-  /* Permuting two variable-length vectors could overflow the
-     index range.  */
-  if (!d->one_vector_p && !d->perm.length ().is_constant (&nelt))
-    return false;
+      /* This permutation reduces to the vec_perm optab if the elements are
+	 large enough to hold all selector indices.  Do not handle that case
+	 here, since the general TBL+SUB+TBL+ORR sequence is too expensive to
+	 be considered a "native" constant permutation.
+
+	 Not doing this would undermine code that queries can_vec_perm_const_p
+	 with allow_variable_p set to false.  See PR121027.  */
+      if (selector_fits_mode_p (d->vmode, d->perm))
+	return false;
+    }
 
   if (d->testing_p)
     return true;
@@ -26162,6 +27137,40 @@ aarch64_evpc_ins (struct expand_vec_perm_d *d)
   return true;
 }
 
+/* Recognize patterns suitable for the AND instructions.  */
+static bool
+aarch64_evpc_and (struct expand_vec_perm_d *d)
+{
+  /* Either d->op0 or d->op1 should be a vector of all zeros.  */
+  if (d->one_vector_p || (!d->zero_op0_p && !d->zero_op1_p))
+    return false;
+
+  machine_mode mode = d->vmode;
+  machine_mode sel_mode;
+  if (!related_int_vector_mode (mode).exists (&sel_mode))
+    return false;
+
+  insn_code and_code = optab_handler (and_optab, sel_mode);
+  rtx and_mask = vec_perm_and_mask (sel_mode, d->perm, d->zero_op0_p);
+  if (and_code == CODE_FOR_nothing || !and_mask)
+    return false;
+
+  if (d->testing_p)
+    return true;
+
+  class expand_operand ops[3];
+  rtx in = d->zero_op0_p ? d->op1 : d->op0;
+  create_output_operand (&ops[0], gen_lowpart (sel_mode, d->target), sel_mode);
+  create_input_operand (&ops[1], gen_lowpart (sel_mode, in), sel_mode);
+  create_input_operand (&ops[2], and_mask, sel_mode);
+  expand_insn (and_code, 3, ops);
+  rtx result = gen_lowpart (mode, ops[0].value);
+  if (!rtx_equal_p (d->target, result))
+    emit_move_insn (d->target, result);
+
+  return true;
+}
+
 static bool
 aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
 {
@@ -26197,12 +27206,16 @@ aarch64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
 	    return true;
 	  else if (aarch64_evpc_uzp (d))
 	    return true;
+	  else if (aarch64_evpc_and (d))
+	    return true;
 	  else if (aarch64_evpc_trn (d))
 	    return true;
 	  else if (aarch64_evpc_sel (d))
 	    return true;
 	  else if (aarch64_evpc_ins (d))
 	    return true;
+	  else if (aarch64_evpc_hvla (d))
+	    return true;
 	  else if (aarch64_evpc_reencode (d))
 	    return true;
 
@@ -26255,11 +27268,17 @@ aarch64_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
   d.op_mode = op_mode;
   d.op_vec_flags = aarch64_classify_vector_mode (d.op_mode);
   d.target = target;
-  d.op0 = op0 ? force_reg (op_mode, op0) : NULL_RTX;
-  if (op0 == op1)
-    d.op1 = d.op0;
+  d.op0 = op0;
+  if (d.op0 && !register_operand (d.op0, op_mode))
+    d.op0 = force_reg (op_mode, d.op0);
+  if (op0 && d.one_vector_p)
+    d.op1 = copy_rtx (d.op0);
   else
-    d.op1 = op1 ? force_reg (op_mode, op1) : NULL_RTX;
+    {
+      d.op1 = op1;
+      if (d.op1 && !register_operand (d.op1, op_mode))
+       d.op1 = force_reg (op_mode, d.op1);
+    }
   d.testing_p = !target;
 
   if (!d.testing_p)
@@ -26347,7 +27366,7 @@ aarch64_emit_sve_fp_cond (rtx target, rtx_code code, rtx pred,
 			  bool known_ptrue_p, rtx op0, rtx op1)
 {
   rtx flag = gen_int_mode (known_ptrue_p, SImode);
-  rtx unspec = gen_rtx_UNSPEC (GET_MODE (pred),
+  rtx unspec = gen_rtx_UNSPEC (GET_MODE (target),
 			       gen_rtvec (4, pred, flag, op0, op1),
 			       aarch64_unspec_cond_code (code));
   emit_set_insn (target, unspec);
@@ -26366,10 +27385,10 @@ static void
 aarch64_emit_sve_or_fp_conds (rtx target, rtx_code code1, rtx_code code2,
 			      rtx pred, bool known_ptrue_p, rtx op0, rtx op1)
 {
-  machine_mode pred_mode = GET_MODE (pred);
-  rtx tmp1 = gen_reg_rtx (pred_mode);
+  machine_mode target_mode = GET_MODE (target);
+  rtx tmp1 = gen_reg_rtx (target_mode);
   aarch64_emit_sve_fp_cond (tmp1, code1, pred, known_ptrue_p, op0, op1);
-  rtx tmp2 = gen_reg_rtx (pred_mode);
+  rtx tmp2 = gen_reg_rtx (target_mode);
   aarch64_emit_sve_fp_cond (tmp2, code2, pred, known_ptrue_p, op0, op1);
   aarch64_emit_binop (target, ior_optab, tmp1, tmp2);
 }
@@ -26386,27 +27405,37 @@ static void
 aarch64_emit_sve_invert_fp_cond (rtx target, rtx_code code, rtx pred,
 				 bool known_ptrue_p, rtx op0, rtx op1)
 {
-  machine_mode pred_mode = GET_MODE (pred);
-  rtx tmp = gen_reg_rtx (pred_mode);
+  rtx tmp = gen_reg_rtx (GET_MODE (target));
   aarch64_emit_sve_fp_cond (tmp, code, pred, known_ptrue_p, op0, op1);
   aarch64_emit_unop (target, one_cmpl_optab, tmp);
 }
 
 /* Expand an SVE floating-point comparison using the SVE equivalent of:
 
-     (set TARGET (CODE OP0 OP1))
-
-   If CAN_INVERT_P is true, the caller can also handle inverted results;
-   return true if the result is in fact inverted.  */
+     (set TARGET (CODE OP0 OP1)).  */
 
-bool
-aarch64_expand_sve_vec_cmp_float (rtx target, rtx_code code,
-				  rtx op0, rtx op1, bool can_invert_p)
+void
+aarch64_expand_sve_vec_cmp_float (rtx target, rtx_code code, rtx op0, rtx op1)
 {
-  machine_mode pred_mode = GET_MODE (target);
   machine_mode data_mode = GET_MODE (op0);
+  rtx pred = aarch64_sve_fp_pred (data_mode, nullptr);
 
-  rtx ptrue = aarch64_ptrue_reg (pred_mode);
+  /* The governing and destination modes.  */
+  machine_mode pred_mode = GET_MODE (pred);
+  machine_mode target_mode = GET_MODE (target);
+
+  /* For partial vector modes, the choice of predicate mode depends
+     on whether we need to suppress exceptions for inactive elements.
+     If we do need to suppress exceptions, the predicate mode matches
+     the element size rather than the container size and the predicate
+     marks the upper bits in each container as inactive.  The predicate
+     is then a ptrue wrt TARGET_MODE but not wrt PRED_MODE.  It is the
+     latter which matters here.
+
+     If we don't need to suppress exceptions, the predicate mode matches
+     the container size, PRED_MODE == TARGET_MODE, and the predicate is
+     thus a ptrue wrt both TARGET_MODE and PRED_MODE.  */
+  bool known_ptrue_p = pred_mode == target_mode;
   switch (code)
     {
     case UNORDERED:
@@ -26419,16 +27448,15 @@ aarch64_expand_sve_vec_cmp_float (rtx target, rtx_code code,
     case GE:
     case EQ:
     case NE:
-      {
-	/* There is native support for the comparison.  */
-	aarch64_emit_sve_fp_cond (target, code, ptrue, true, op0, op1);
-	return false;
-      }
+      /* There is native support for the comparison.  */
+      aarch64_emit_sve_fp_cond (target, code, pred, known_ptrue_p, op0, op1);
+      return;
 
     case LTGT:
       /* This is a trapping operation (LT or GT).  */
-      aarch64_emit_sve_or_fp_conds (target, LT, GT, ptrue, true, op0, op1);
-      return false;
+      aarch64_emit_sve_or_fp_conds (target, LT, GT,
+				    pred, known_ptrue_p, op0, op1);
+      return;
 
     case UNEQ:
       if (!flag_trapping_math)
@@ -26436,8 +27464,8 @@ aarch64_expand_sve_vec_cmp_float (rtx target, rtx_code code,
 	  /* This would trap for signaling NaNs.  */
 	  op1 = force_reg (data_mode, op1);
 	  aarch64_emit_sve_or_fp_conds (target, UNORDERED, EQ,
-					ptrue, true, op0, op1);
-	  return false;
+					pred, known_ptrue_p, op0, op1);
+	  return;
 	}
       /* fall through */
     case UNLT:
@@ -26446,11 +27474,19 @@ aarch64_expand_sve_vec_cmp_float (rtx target, rtx_code code,
     case UNGE:
       if (flag_trapping_math)
 	{
-	  /* Work out which elements are ordered.  */
-	  rtx ordered = gen_reg_rtx (pred_mode);
 	  op1 = force_reg (data_mode, op1);
-	  aarch64_emit_sve_invert_fp_cond (ordered, UNORDERED,
-					   ptrue, true, op0, op1);
+
+	  /* Work out which elements are unordered.  */
+	  rtx uo_tmp = gen_reg_rtx (target_mode);
+	  aarch64_emit_sve_fp_cond (uo_tmp, UNORDERED,
+				    pred, known_ptrue_p, op0, op1);
+
+	  /* Invert the result.  Governered by PRED so that we only
+	     flip the active bits.  */
+	  rtx ordered = gen_reg_rtx (pred_mode);
+	  uo_tmp = gen_lowpart (pred_mode, uo_tmp);
+	  emit_insn (gen_aarch64_pred_one_cmpl_z (pred_mode, ordered,
+						  pred, uo_tmp));
 
 	  /* Test the opposite condition for the ordered elements,
 	     then invert the result.  */
@@ -26458,15 +27494,9 @@ aarch64_expand_sve_vec_cmp_float (rtx target, rtx_code code,
 	    code = NE;
 	  else
 	    code = reverse_condition_maybe_unordered (code);
-	  if (can_invert_p)
-	    {
-	      aarch64_emit_sve_fp_cond (target, code,
-					ordered, false, op0, op1);
-	      return true;
-	    }
 	  aarch64_emit_sve_invert_fp_cond (target, code,
 					   ordered, false, op0, op1);
-	  return false;
+	  return;
 	}
       break;
 
@@ -26481,43 +27511,8 @@ aarch64_expand_sve_vec_cmp_float (rtx target, rtx_code code,
 
   /* There is native support for the inverse comparison.  */
   code = reverse_condition_maybe_unordered (code);
-  if (can_invert_p)
-    {
-      aarch64_emit_sve_fp_cond (target, code, ptrue, true, op0, op1);
-      return true;
-    }
-  aarch64_emit_sve_invert_fp_cond (target, code, ptrue, true, op0, op1);
-  return false;
-}
-
-/* Expand an SVE vcond pattern with operands OPS.  DATA_MODE is the mode
-   of the data being selected and CMP_MODE is the mode of the values being
-   compared.  */
-
-void
-aarch64_expand_sve_vcond (machine_mode data_mode, machine_mode cmp_mode,
-			  rtx *ops)
-{
-  machine_mode pred_mode = aarch64_get_mask_mode (cmp_mode).require ();
-  rtx pred = gen_reg_rtx (pred_mode);
-  if (FLOAT_MODE_P (cmp_mode))
-    {
-      if (aarch64_expand_sve_vec_cmp_float (pred, GET_CODE (ops[3]),
-					    ops[4], ops[5], true))
-	std::swap (ops[1], ops[2]);
-    }
-  else
-    aarch64_expand_sve_vec_cmp_int (pred, GET_CODE (ops[3]), ops[4], ops[5]);
-
-  if (!aarch64_sve_reg_or_dup_imm (ops[1], data_mode))
-    ops[1] = force_reg (data_mode, ops[1]);
-  /* The "false" value can only be zero if the "true" value is a constant.  */
-  if (register_operand (ops[1], data_mode)
-      || !aarch64_simd_reg_or_zero (ops[2], data_mode))
-    ops[2] = force_reg (data_mode, ops[2]);
-
-  rtvec vec = gen_rtvec (3, pred, ops[1], ops[2]);
-  emit_set_insn (ops[0], gen_rtx_UNSPEC (data_mode, vec, UNSPEC_SEL));
+  aarch64_emit_sve_invert_fp_cond (target, code,
+				   pred, known_ptrue_p, op0, op1);
 }
 
 /* Return true if:
@@ -27081,6 +28076,9 @@ aarch64_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn **gen_seq,
   if (op_mode == VOIDmode)
     op_mode = GET_MODE (op1);
 
+  if (CONST_SCALAR_INT_P (op1))
+    canonicalize_comparison (op_mode, &code, &op1);
+
   switch (op_mode)
     {
     case E_QImode:
@@ -27097,13 +28095,13 @@ aarch64_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn **gen_seq,
 
     case E_SFmode:
       cmp_mode = SFmode;
-      cc_mode = aarch64_select_cc_mode ((rtx_code) code, op0, op1);
+      cc_mode = aarch64_select_cc_mode (code, op0, op1);
       icode = cc_mode == CCFPEmode ? CODE_FOR_fcmpesf : CODE_FOR_fcmpsf;
       break;
 
     case E_DFmode:
       cmp_mode = DFmode;
-      cc_mode = aarch64_select_cc_mode ((rtx_code) code, op0, op1);
+      cc_mode = aarch64_select_cc_mode (code, op0, op1);
       icode = cc_mode == CCFPEmode ? CODE_FOR_fcmpedf : CODE_FOR_fcmpdf;
       break;
 
@@ -27119,8 +28117,7 @@ aarch64_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn **gen_seq,
       end_sequence ();
       return NULL_RTX;
     }
-  *prep_seq = get_insns ();
-  end_sequence ();
+  *prep_seq = end_sequence ();
 
   create_fixed_operand (&ops[0], op0);
   create_fixed_operand (&ops[1], op1);
@@ -27131,10 +28128,9 @@ aarch64_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn **gen_seq,
       end_sequence ();
       return NULL_RTX;
     }
-  *gen_seq = get_insns ();
-  end_sequence ();
+  *gen_seq = end_sequence ();
 
-  return gen_rtx_fmt_ee ((rtx_code) code, cc_mode,
+  return gen_rtx_fmt_ee (code, cc_mode,
 			 gen_rtx_REG (cc_mode, CC_REGNUM), const0_rtx);
 }
 
@@ -27157,6 +28153,9 @@ aarch64_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev,
   if (op_mode == VOIDmode)
     op_mode = GET_MODE (op1);
 
+  if (CONST_SCALAR_INT_P (op1))
+    canonicalize_comparison (op_mode, &cmp_code, &op1);
+
   switch (op_mode)
     {
     case E_QImode:
@@ -27171,12 +28170,12 @@ aarch64_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev,
 
     case E_SFmode:
       cmp_mode = SFmode;
-      cc_mode = aarch64_select_cc_mode ((rtx_code) cmp_code, op0, op1);
+      cc_mode = aarch64_select_cc_mode (cmp_code, op0, op1);
       break;
 
     case E_DFmode:
       cmp_mode = DFmode;
-      cc_mode = aarch64_select_cc_mode ((rtx_code) cmp_code, op0, op1);
+      cc_mode = aarch64_select_cc_mode (cmp_code, op0, op1);
       break;
 
     default:
@@ -27193,11 +28192,10 @@ aarch64_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev,
       end_sequence ();
       return NULL_RTX;
     }
-  *prep_seq = get_insns ();
-  end_sequence ();
+  *prep_seq = end_sequence ();
 
   target = gen_rtx_REG (cc_mode, CC_REGNUM);
-  aarch64_cond = aarch64_get_condition_code_1 (cc_mode, (rtx_code) cmp_code);
+  aarch64_cond = aarch64_get_condition_code_1 (cc_mode, cmp_code);
 
   if (bit_code != AND)
     {
@@ -27233,10 +28231,9 @@ aarch64_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev,
       return NULL_RTX;
     }
 
-  *gen_seq = get_insns ();
-  end_sequence ();
+  *gen_seq = end_sequence ();
 
-  return gen_rtx_fmt_ee ((rtx_code) cmp_code, VOIDmode, target, const0_rtx);
+  return gen_rtx_fmt_ee (cmp_code, VOIDmode, target, const0_rtx);
 }
 
 #undef TARGET_GEN_CCMP_FIRST
@@ -28750,7 +29747,7 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
 					int num, bool explicit_p)
 {
   tree t, ret_type;
-  unsigned int nds_elt_bits;
+  unsigned int nds_elt_bits, wds_elt_bits;
   unsigned HOST_WIDE_INT const_simdlen;
 
   if (!TARGET_SIMD)
@@ -28795,10 +29792,14 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
   if (TREE_CODE (ret_type) != VOID_TYPE)
     {
       nds_elt_bits = lane_size (SIMD_CLONE_ARG_TYPE_VECTOR, ret_type);
+      wds_elt_bits = nds_elt_bits;
       vec_elts.safe_push (std::make_pair (ret_type, nds_elt_bits));
     }
   else
-    nds_elt_bits = POINTER_SIZE;
+    {
+      nds_elt_bits = POINTER_SIZE;
+      wds_elt_bits = 0;
+    }
 
   int i;
   tree type_arg_types = TYPE_ARG_TYPES (TREE_TYPE (node->decl));
@@ -28806,44 +29807,65 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
   for (t = (decl_arg_p ? DECL_ARGUMENTS (node->decl) : type_arg_types), i = 0;
        t && t != void_list_node; t = TREE_CHAIN (t), i++)
     {
-      tree arg_type = decl_arg_p ? TREE_TYPE (t) : TREE_VALUE (t);
+      tree type = decl_arg_p ? TREE_TYPE (t) : TREE_VALUE (t);
       if (clonei->args[i].arg_type != SIMD_CLONE_ARG_TYPE_UNIFORM
-	  && !supported_simd_type (arg_type))
+	  && !supported_simd_type (type))
 	{
 	  if (!explicit_p)
 	    ;
-	  else if (COMPLEX_FLOAT_TYPE_P (ret_type))
+	  else if (COMPLEX_FLOAT_TYPE_P (type))
 	    warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
 			"GCC does not currently support argument type %qT "
-			"for simd", arg_type);
+			"for simd", type);
 	  else
 	    warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
 			"unsupported argument type %qT for simd",
-			arg_type);
+			type);
 	  return 0;
 	}
-      unsigned lane_bits = lane_size (clonei->args[i].arg_type, arg_type);
+      unsigned lane_bits = lane_size (clonei->args[i].arg_type, type);
       if (clonei->args[i].arg_type == SIMD_CLONE_ARG_TYPE_VECTOR)
-	vec_elts.safe_push (std::make_pair (arg_type, lane_bits));
+	vec_elts.safe_push (std::make_pair (type, lane_bits));
       if (nds_elt_bits > lane_bits)
 	nds_elt_bits = lane_bits;
+      if (wds_elt_bits < lane_bits)
+	wds_elt_bits = lane_bits;
     }
 
-  clonei->vecsize_mangle = 'n';
+  /* If we could not determine the WDS type from available parameters/return,
+     then fallback to using uintptr_t.  */
+  if (wds_elt_bits == 0)
+    wds_elt_bits = POINTER_SIZE;
+
   clonei->mask_mode = VOIDmode;
   poly_uint64 simdlen;
-  auto_vec<poly_uint64> simdlens (2);
+  typedef struct
+    {
+      poly_uint64 len;
+      char mangle;
+    } aarch64_clone_info;
+  auto_vec<aarch64_clone_info, 3> clones;
+
   /* Keep track of the possible simdlens the clones of this function can have,
      and check them later to see if we support them.  */
   if (known_eq (clonei->simdlen, 0U))
     {
       simdlen = exact_div (poly_uint64 (64), nds_elt_bits);
       if (maybe_ne (simdlen, 1U))
-	simdlens.safe_push (simdlen);
-      simdlens.safe_push (simdlen * 2);
+	clones.safe_push ({simdlen, 'n'});
+      clones.safe_push ({simdlen * 2, 'n'});
+      /* Only create an SVE simd clone if we aren't dealing with an unprototyped
+	 function.
+	 We have also disabled support for creating SVE simdclones for functions
+	 with function bodies and any simdclones when -msve-vector-bits is used.
+	 TODO: add support for these.  */
+      if (prototype_p (TREE_TYPE (node->decl))
+	  && !node->definition
+	  && !aarch64_sve_vg.is_constant ())
+	clones.safe_push ({exact_div (BITS_PER_SVE_VECTOR, wds_elt_bits), 's'});
     }
   else
-    simdlens.safe_push (clonei->simdlen);
+    clones.safe_push ({clonei->simdlen, 'n'});
 
   clonei->vecsize_int = 0;
   clonei->vecsize_float = 0;
@@ -28857,11 +29879,12 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
      simdclone would cause a vector type to be larger than 128-bits, and reject
      such a clone.  */
   unsigned j = 0;
-  while (j < simdlens.length ())
+  while (j < clones.length ())
     {
       bool remove_simdlen = false;
       for (auto elt : vec_elts)
-	if (known_gt (simdlens[j] * elt.second, 128U))
+	if (clones[j].mangle == 'n'
+	    && known_gt (clones[j].len * elt.second, 128U))
 	  {
 	    /* Don't issue a warning for every simdclone when there is no
 	       specific simdlen clause.  */
@@ -28869,18 +29892,17 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
 	      warning_at (DECL_SOURCE_LOCATION (node->decl), 0,
 			  "GCC does not currently support simdlen %wd for "
 			  "type %qT",
-			  constant_lower_bound (simdlens[j]), elt.first);
+			  constant_lower_bound (clones[j].len), elt.first);
 	    remove_simdlen = true;
 	    break;
 	  }
       if (remove_simdlen)
-	simdlens.ordered_remove (j);
+	clones.ordered_remove (j);
       else
 	j++;
     }
 
-
-  int count = simdlens.length ();
+  int count = clones.length ();
   if (count == 0)
     {
       if (explicit_p && known_eq (clonei->simdlen, 0U))
@@ -28897,32 +29919,128 @@ aarch64_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
     }
 
   gcc_assert (num < count);
-  clonei->simdlen = simdlens[num];
+  clonei->simdlen = clones[num].len;
+  clonei->vecsize_mangle = clones[num].mangle;
+  /* SVE simdclones always have a Mask, so set inbranch to 1.  */
+  if (clonei->vecsize_mangle == 's')
+    clonei->inbranch = 1;
   return count;
 }
 
-/* Implement TARGET_SIMD_CLONE_ADJUST.  */
+/* Helper function to adjust an SVE vector type of an SVE simd clone.  Returns
+   an SVE vector type based on the element type of the vector TYPE, with SIMDLEN
+   number of elements.  If IS_MASK, returns an SVE mask type appropriate for use
+   with the SVE type it would otherwise return.  */
 
+static tree
+simd_clone_adjust_sve_vector_type (tree type, bool is_mask, poly_uint64 simdlen)
+{
+  unsigned int num_zr = 0;
+  unsigned int num_pr = 0;
+  machine_mode vector_mode;
+  type = TREE_TYPE (type);
+  scalar_mode scalar_m = SCALAR_TYPE_MODE (type);
+  vector_mode = aarch64_sve_data_mode (scalar_m, simdlen).require ();
+  type = build_vector_type_for_mode (type, vector_mode);
+  if (is_mask)
+    {
+      type = truth_type_for (type);
+      num_pr = 1;
+    }
+  else
+    num_zr = 1;
+
+  /* We create new types here with the SVE type attribute instead of using ACLE
+     types as we need to support unpacked vectors which aren't available as
+     ACLE SVE types.  */
+
+  /* ??? This creates anonymous "SVE type" attributes for all types,
+     even those that correspond to <arm_sve.h> types.  This affects type
+     compatibility in C/C++, but not in gimple.  (Gimple type equivalence
+     is instead decided by TARGET_COMPATIBLE_VECTOR_TYPES_P.)
+
+     Thus a C/C++ definition of the implementation function will have a
+     different function type from the declaration that this code creates.
+     However, it doesn't seem worth trying to fix that until we have a
+     way of handling implementations that operate on unpacked types.  */
+  type = build_distinct_type_copy (type);
+  aarch64_sve::add_sve_type_attribute (type, num_zr, num_pr, NULL, NULL);
+  return type;
+}
+
+/* Implement TARGET_SIMD_CLONE_ADJUST.  */
 static void
 aarch64_simd_clone_adjust (struct cgraph_node *node)
 {
-  /* Add aarch64_vector_pcs target attribute to SIMD clones so they
-     use the correct ABI.  */
-
   tree t = TREE_TYPE (node->decl);
-  TYPE_ATTRIBUTES (t) = make_attribute ("aarch64_vector_pcs", "default",
-					TYPE_ATTRIBUTES (t));
+
+  if (node->simdclone->vecsize_mangle == 's')
+    {
+      /* This is additive and has no effect if SVE, or a superset thereof, is
+	 already enabled.  */
+      tree target = build_string (strlen ("+sve") + 1, "+sve");
+      if (!aarch64_option_valid_attribute_p (node->decl, NULL_TREE, target, 0))
+	gcc_unreachable ();
+      push_function_decl (node->decl);
+    }
+  else
+    {
+      /* Add aarch64_vector_pcs target attribute to SIMD clones so they
+	 use the correct ABI.  */
+      TYPE_ATTRIBUTES (t) = make_attribute ("aarch64_vector_pcs", "default",
+					    TYPE_ATTRIBUTES (t));
+    }
+
+  cgraph_simd_clone *sc = node->simdclone;
+
+  for (unsigned i = 0; i < sc->nargs; ++i)
+    {
+      bool is_mask = false;
+      tree type;
+      switch (sc->args[i].arg_type)
+	{
+	case SIMD_CLONE_ARG_TYPE_MASK:
+	  is_mask = true;
+	  gcc_fallthrough ();
+	case SIMD_CLONE_ARG_TYPE_VECTOR:
+	case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_CONSTANT_STEP:
+	case SIMD_CLONE_ARG_TYPE_LINEAR_VAL_VARIABLE_STEP:
+	  type = sc->args[i].vector_type;
+	  gcc_assert (VECTOR_TYPE_P (type));
+	  if (node->simdclone->vecsize_mangle == 's')
+	    type = simd_clone_adjust_sve_vector_type (type, is_mask,
+						      sc->simdlen);
+	  sc->args[i].vector_type = type;
+	  break;
+	default:
+	  continue;
+	}
+    }
+  if (node->simdclone->vecsize_mangle == 's')
+    {
+      tree ret_type = TREE_TYPE (t);
+      if (VECTOR_TYPE_P (ret_type))
+	TREE_TYPE (t)
+	  = simd_clone_adjust_sve_vector_type (ret_type, false,
+					       node->simdclone->simdlen);
+      pop_function_decl ();
+    }
 }
 
 /* Implement TARGET_SIMD_CLONE_USABLE.  */
 
 static int
-aarch64_simd_clone_usable (struct cgraph_node *node)
+aarch64_simd_clone_usable (struct cgraph_node *node, machine_mode vector_mode)
 {
   switch (node->simdclone->vecsize_mangle)
     {
     case 'n':
-      if (!TARGET_SIMD)
+      if (!TARGET_SIMD || aarch64_sve_mode_p (vector_mode))
+	return -1;
+      return 0;
+    case 's':
+      if (!TARGET_SVE
+	  || !aarch64_sve_mode_p (vector_mode))
 	return -1;
       return 0;
     default:
@@ -28946,6 +30064,8 @@ aarch64_comp_type_attributes (const_tree type1, const_tree type2)
 
   if (!check_attr ("gnu", "aarch64_vector_pcs"))
     return 0;
+  if (!check_attr ("gnu", "indirect_return"))
+    return 0;
   if (!check_attr ("gnu", "Advanced SIMD type"))
     return 0;
   if (!check_attr ("gnu", "SVE type"))
@@ -29020,8 +30140,20 @@ aarch64_stack_protect_guard (void)
   return NULL_TREE;
 }
 
-/* Return the diagnostic message string if the binary operation OP is
-   not permitted on TYPE1 and TYPE2, NULL otherwise.  */
+/* Implement TARGET_INVALID_UNARY_OP.  */
+
+static const char *
+aarch64_invalid_unary_op (int op, const_tree type)
+{
+  /* Reject all single-operand operations on __mfp8 except for &.  */
+  if (TYPE_MAIN_VARIANT (type) == aarch64_mfp8_type_node && op != ADDR_EXPR)
+    return N_ ("operation not permitted on type %<mfloat8_t%>");
+
+  /* Operation allowed.  */
+  return NULL;
+}
+
+/* Implement TARGET_INVALID_BINARY_OP.  */
 
 static const char *
 aarch64_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
@@ -29035,6 +30167,11 @@ aarch64_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1,
 	  != aarch64_sve::builtin_type_p (type2)))
     return N_("cannot combine GNU and SVE vectors in a binary operation");
 
+  /* Reject all 2-operand operations on __mfp8.  */
+  if (TYPE_MAIN_VARIANT (type1) == aarch64_mfp8_type_node
+      || TYPE_MAIN_VARIANT (type2) == aarch64_mfp8_type_node)
+    return N_ ("operation not permitted on type %<mfloat8_t%>");
+
   /* Operation allowed.  */
   return NULL;
 }
@@ -29050,55 +30187,43 @@ aarch64_can_tag_addresses ()
 
 /* Implement TARGET_ASM_FILE_END for AArch64.  This adds the AArch64 GNU NOTE
    section at the end if needed.  */
-#define GNU_PROPERTY_AARCH64_FEATURE_1_AND	0xc0000000
-#define GNU_PROPERTY_AARCH64_FEATURE_1_BTI	(1U << 0)
-#define GNU_PROPERTY_AARCH64_FEATURE_1_PAC	(1U << 1)
 void
 aarch64_file_end_indicate_exec_stack ()
 {
   file_end_indicate_exec_stack ();
 
-  unsigned feature_1_and = 0;
-  if (aarch_bti_enabled ())
-    feature_1_and |= GNU_PROPERTY_AARCH64_FEATURE_1_BTI;
+  /* Check whether the current assembler supports AEABI build attributes, if
+     not fallback to .note.gnu.property section.  */
+  if (HAVE_AS_AEABI_BUILD_ATTRIBUTES)
+    {
+      using namespace aarch64;
+      aeabi_subsection<BA_TagFeature_t, bool, 3>
+	aeabi_subsec ("aeabi_feature_and_bits", true);
 
-  if (aarch_ra_sign_scope != AARCH_FUNCTION_NONE)
-    feature_1_and |= GNU_PROPERTY_AARCH64_FEATURE_1_PAC;
+      aeabi_subsec.append (
+	make_aeabi_attribute (Tag_Feature_BTI, aarch_bti_enabled ()));
+      aeabi_subsec.append (
+	make_aeabi_attribute (Tag_Feature_PAC, aarch64_pacret_enabled ()));
+      aeabi_subsec.append (
+	make_aeabi_attribute (Tag_Feature_GCS, aarch64_gcs_enabled ()));
 
-  if (feature_1_and)
+      if (!aeabi_subsec.empty ())
+	aeabi_subsec.write (asm_out_file);
+    }
+  else
     {
-      /* Generate .note.gnu.property section.  */
-      switch_to_section (get_section (".note.gnu.property",
-				      SECTION_NOTYPE, NULL));
+      aarch64::section_note_gnu_property gnu_properties;
 
-      /* PT_NOTE header: namesz, descsz, type.
-	 namesz = 4 ("GNU\0")
-	 descsz = 16 (Size of the program property array)
-		  [(12 + padding) * Number of array elements]
-	 type   = 5 (NT_GNU_PROPERTY_TYPE_0).  */
-      assemble_align (POINTER_SIZE);
-      assemble_integer (GEN_INT (4), 4, 32, 1);
-      assemble_integer (GEN_INT (ROUND_UP (12, POINTER_BYTES)), 4, 32, 1);
-      assemble_integer (GEN_INT (5), 4, 32, 1);
-
-      /* PT_NOTE name.  */
-      assemble_string ("GNU", 4);
-
-      /* PT_NOTE contents for NT_GNU_PROPERTY_TYPE_0:
-	 type   = GNU_PROPERTY_AARCH64_FEATURE_1_AND
-	 datasz = 4
-	 data   = feature_1_and.  */
-      assemble_integer (GEN_INT (GNU_PROPERTY_AARCH64_FEATURE_1_AND), 4, 32, 1);
-      assemble_integer (GEN_INT (4), 4, 32, 1);
-      assemble_integer (GEN_INT (feature_1_and), 4, 32, 1);
-
-      /* Pad the size of the note to the required alignment.  */
-      assemble_align (POINTER_SIZE);
+      if (aarch_bti_enabled ())
+	gnu_properties.bti_enabled ();
+      if (aarch64_pacret_enabled ())
+	gnu_properties.pac_enabled ();
+      if (aarch64_gcs_enabled ())
+	gnu_properties.gcs_enabled ();
+
+      gnu_properties.write ();
     }
 }
-#undef GNU_PROPERTY_AARCH64_FEATURE_1_PAC
-#undef GNU_PROPERTY_AARCH64_FEATURE_1_BTI
-#undef GNU_PROPERTY_AARCH64_FEATURE_1_AND
 
 /* Helper function for straight line speculation.
    Return what barrier should be emitted for straight line speculation
@@ -29674,8 +30799,7 @@ aarch64_mode_emit (int entity, int mode, int prev_mode, HARD_REG_SET live)
 					 aarch64_local_sme_state (prev_mode));
       break;
     }
-  rtx_insn *seq = get_insns ();
-  end_sequence ();
+  rtx_insn *seq = end_sequence ();
 
   /* Get the set of clobbered registers that are currently live.  */
   HARD_REG_SET clobbers = {};
@@ -30085,8 +31209,7 @@ aarch64_md_asm_adjust (vec<rtx> &outputs, vec<rtx> &inputs,
 	    emit_insn (REGNO (x) == ZA_REGNUM
 		       ? gen_aarch64_asm_update_za (id_rtx)
 		       : gen_aarch64_asm_update_zt0 (id_rtx));
-	    seq = get_insns ();
-	    end_sequence ();
+	    seq = end_sequence ();
 
 	    auto mode = REGNO (x) == ZA_REGNUM ? VNx16QImode : V8DImode;
 	    uses.safe_push (gen_rtx_REG (mode, REGNO (x)));
@@ -30121,8 +31244,7 @@ aarch64_switch_pstate_sm_for_landing_pad (basic_block bb)
   args_switch.emit_epilogue ();
   if (guard_label)
     emit_label (guard_label);
-  auto seq = get_insns ();
-  end_sequence ();
+  auto seq = end_sequence ();
 
   emit_insn_after (seq, bb_note (bb));
   return true;
@@ -30145,8 +31267,7 @@ aarch64_switch_pstate_sm_for_jump (rtx_insn *jump)
   aarch64_switch_pstate_sm (AARCH64_ISA_MODE_SM_ON, AARCH64_ISA_MODE_SM_OFF);
   if (guard_label)
     emit_label (guard_label);
-  auto seq = get_insns ();
-  end_sequence ();
+  auto seq = end_sequence ();
 
   emit_insn_before (seq, jump);
   return true;
@@ -30180,8 +31301,7 @@ aarch64_switch_pstate_sm_for_call (rtx_call_insn *call)
   args_switch.emit_epilogue ();
   if (args_guard_label)
     emit_label (args_guard_label);
-  auto args_seq = get_insns ();
-  end_sequence ();
+  auto args_seq = end_sequence ();
   emit_insn_before (args_seq, call);
 
   if (find_reg_note (call, REG_NORETURN, NULL_RTX))
@@ -30201,8 +31321,7 @@ aarch64_switch_pstate_sm_for_call (rtx_call_insn *call)
   return_switch.emit_epilogue ();
   if (return_guard_label)
     emit_label (return_guard_label);
-  auto result_seq = get_insns ();
-  end_sequence ();
+  auto result_seq = end_sequence ();
   emit_insn_after (result_seq, call);
   return true;
 }
@@ -30356,8 +31475,6 @@ aarch64_valid_sysreg_name_p (const char *regname)
   const sysreg_t *sysreg = aarch64_lookup_sysreg_map (regname);
   if (sysreg == NULL)
     return aarch64_is_implem_def_reg (regname);
-  if (sysreg->arch_reqs)
-    return bool (aarch64_isa_flags & sysreg->arch_reqs);
   return true;
 }
 
@@ -30381,11 +31498,277 @@ aarch64_retrieve_sysreg (const char *regname, bool write_p, bool is128op)
   if ((write_p && (sysreg->properties & F_REG_READ))
       || (!write_p && (sysreg->properties & F_REG_WRITE)))
     return NULL;
-  if ((~aarch64_isa_flags & sysreg->arch_reqs) != 0)
-    return NULL;
   return sysreg->encoding;
 }
 
+/* Report that LOCATION has a call to FNDECL in which argument ARGNO
+   was not an integer constant expression.  ARGNO counts from zero.  */
+void
+aarch64::report_non_ice (location_t location, tree fndecl, unsigned int argno)
+{
+  error_at (location, "argument %d of %qE must be an integer constant"
+	    " expression", argno + 1, fndecl);
+}
+
+/* Report that LOCATION has a call to FNDECL in which argument ARGNO has
+   the value ACTUAL, whereas the function requires a value in the range
+   [MIN, MAX].  ARGNO counts from zero.  */
+void
+aarch64::report_out_of_range (location_t location, tree fndecl,
+			      unsigned int argno, HOST_WIDE_INT actual,
+			      HOST_WIDE_INT min, HOST_WIDE_INT max)
+{
+  if (min == max)
+    error_at (location, "passing %wd to argument %d of %qE, which expects"
+	      " the value %wd", actual, argno + 1, fndecl, min);
+  else
+    error_at (location, "passing %wd to argument %d of %qE, which expects"
+	      " a value in the range [%wd, %wd]", actual, argno + 1, fndecl,
+	      min, max);
+}
+
+/* Report that LOCATION has a call to FNDECL in which argument ARGNO has
+   the value ACTUAL, whereas the function requires either VALUE0 or
+   VALUE1.  ARGNO counts from zero.  */
+void
+aarch64::report_neither_nor (location_t location, tree fndecl,
+			     unsigned int argno, HOST_WIDE_INT actual,
+			     HOST_WIDE_INT value0, HOST_WIDE_INT value1)
+{
+  error_at (location, "passing %wd to argument %d of %qE, which expects"
+	    " either %wd or %wd", actual, argno + 1, fndecl, value0, value1);
+}
+
+/* Report that LOCATION has a call to FNDECL in which argument ARGNO has
+   the value ACTUAL, whereas the function requires one of VALUE0..3.
+   ARGNO counts from zero.  */
+void
+aarch64::report_not_one_of (location_t location, tree fndecl,
+			    unsigned int argno, HOST_WIDE_INT actual,
+			    HOST_WIDE_INT value0, HOST_WIDE_INT value1,
+			    HOST_WIDE_INT value2,
+			    HOST_WIDE_INT value3)
+{
+  error_at (location, "passing %wd to argument %d of %qE, which expects"
+	    " %wd, %wd, %wd or %wd", actual, argno + 1, fndecl, value0, value1,
+	    value2, value3);
+}
+
+/* Report that LOCATION has a call to FNDECL in which argument ARGNO has
+   the value ACTUAL, whereas the function requires a valid value of
+   enum type ENUMTYPE.  ARGNO counts from zero.  */
+void
+aarch64::report_not_enum (location_t location, tree fndecl, unsigned int argno,
+			  HOST_WIDE_INT actual, tree enumtype)
+{
+  error_at (location, "passing %wd to argument %d of %qE, which expects"
+	    " a valid %qT value", actual, argno + 1, fndecl, enumtype);
+}
+
+/* Generate assembly to calculate CRC
+   using carry-less multiplication instruction.
+   OPERANDS[1] is input CRC,
+   OPERANDS[2] is data (message),
+   OPERANDS[3] is the polynomial without the leading 1.  */
+
+void
+aarch64_expand_crc_using_pmull (scalar_mode crc_mode,
+				scalar_mode data_mode,
+				rtx *operands)
+{
+  /* Check and keep arguments.  */
+  gcc_assert (!CONST_INT_P (operands[0]));
+  gcc_assert (CONST_INT_P (operands[3]));
+  rtx crc = operands[1];
+  rtx data = operands[2];
+  rtx polynomial = operands[3];
+
+  unsigned HOST_WIDE_INT crc_size = GET_MODE_BITSIZE (crc_mode);
+  unsigned HOST_WIDE_INT data_size = GET_MODE_BITSIZE (data_mode);
+  gcc_assert (crc_size <= 32);
+  gcc_assert (data_size <= crc_size);
+
+  /* Calculate the quotient.  */
+  unsigned HOST_WIDE_INT
+      q = gf2n_poly_long_div_quotient (UINTVAL (polynomial), crc_size);
+  /* CRC calculation's main part.  */
+  if (crc_size > data_size)
+    crc = expand_shift (RSHIFT_EXPR, DImode, crc, crc_size - data_size,
+			NULL_RTX, 1);
+
+  rtx t0 = force_reg (DImode, gen_int_mode (q, DImode));
+  polynomial = simplify_gen_unary (ZERO_EXTEND, DImode, polynomial,
+				   GET_MODE (polynomial));
+  rtx t1 = force_reg (DImode, polynomial);
+
+  rtx a0 = expand_binop (DImode, xor_optab, crc, data, NULL_RTX, 1,
+			 OPTAB_WIDEN);
+
+  rtx pmull_res = gen_reg_rtx (TImode);
+  emit_insn (gen_aarch64_crypto_pmulldi (pmull_res, a0, t0));
+  a0 = gen_lowpart (DImode, pmull_res);
+
+  a0 = expand_shift (RSHIFT_EXPR, DImode, a0, crc_size, NULL_RTX, 1);
+
+  emit_insn (gen_aarch64_crypto_pmulldi (pmull_res, a0, t1));
+  a0 = gen_lowpart (DImode, pmull_res);
+
+  if (crc_size > data_size)
+    {
+      rtx crc_part = expand_shift (LSHIFT_EXPR, DImode, operands[1], data_size,
+				   NULL_RTX, 0);
+      a0 = expand_binop (DImode, xor_optab, a0, crc_part, NULL_RTX, 1,
+			 OPTAB_DIRECT);
+    }
+
+  aarch64_emit_move (operands[0], gen_lowpart (crc_mode, a0));
+}
+
+/* Generate assembly to calculate reversed CRC
+   using carry-less multiplication instruction.
+   OPERANDS[1] is input CRC,
+   OPERANDS[2] is data,
+   OPERANDS[3] is the polynomial without the leading 1.  */
+
+void
+aarch64_expand_reversed_crc_using_pmull (scalar_mode crc_mode,
+					 scalar_mode data_mode,
+					 rtx *operands)
+{
+  /* Check and keep arguments.  */
+  gcc_assert (!CONST_INT_P (operands[0]));
+  gcc_assert (CONST_INT_P (operands[3]));
+  rtx crc = operands[1];
+  rtx data = operands[2];
+  rtx polynomial = operands[3];
+
+  unsigned HOST_WIDE_INT crc_size = GET_MODE_BITSIZE (crc_mode);
+  unsigned HOST_WIDE_INT data_size = GET_MODE_BITSIZE (data_mode);
+  gcc_assert (crc_size <= 32);
+  gcc_assert (data_size <= crc_size);
+
+  /* Calculate the quotient.  */
+  unsigned HOST_WIDE_INT
+      q = gf2n_poly_long_div_quotient (UINTVAL (polynomial), crc_size);
+  /* Reflect the calculated quotient.  */
+  q = reflect_hwi (q, crc_size + 1);
+  rtx t0 = force_reg (DImode, gen_int_mode (q, DImode));
+
+  /* Reflect the polynomial.  */
+  unsigned HOST_WIDE_INT ref_polynomial = reflect_hwi (UINTVAL (polynomial),
+						       crc_size);
+  /* An unshifted multiplier would require the final result to be extracted
+     using a shift right by DATA_SIZE - 1 bits.  Shift the multiplier left
+     so that the shift right can be by CRC_SIZE bits instead.  */
+  ref_polynomial <<= crc_size - data_size + 1;
+  rtx t1 = force_reg (DImode, gen_int_mode (ref_polynomial, DImode));
+
+  /* CRC calculation's main part.  */
+  rtx a0 = expand_binop (DImode, xor_optab, crc, data, NULL_RTX, 1,
+			 OPTAB_WIDEN);
+
+  /* Perform carry-less multiplication and get low part.  */
+  rtx pmull_res = gen_reg_rtx (TImode);
+  emit_insn (gen_aarch64_crypto_pmulldi (pmull_res, a0, t0));
+  a0 = gen_lowpart (DImode, pmull_res);
+
+  a0 = expand_binop (DImode, and_optab, a0,
+		     gen_int_mode (GET_MODE_MASK (data_mode), DImode),
+		     NULL_RTX, 1, OPTAB_WIDEN);
+
+  /* Perform carry-less multiplication.  */
+  emit_insn (gen_aarch64_crypto_pmulldi (pmull_res, a0, t1));
+
+  /* Perform a shift right by CRC_SIZE as an extraction of lane 1.  */
+  machine_mode crc_vmode = aarch64_v128_mode (crc_mode).require ();
+  a0 = (crc_size > data_size ? gen_reg_rtx (crc_mode) : operands[0]);
+  emit_insn (gen_aarch64_get_lane (crc_vmode, a0,
+				   gen_lowpart (crc_vmode, pmull_res),
+				   aarch64_endian_lane_rtx (crc_vmode, 1)));
+
+  if (crc_size > data_size)
+    {
+      rtx crc_part = expand_shift (RSHIFT_EXPR, crc_mode, crc, data_size,
+				   NULL_RTX, 1);
+      a0 = expand_binop (crc_mode, xor_optab, a0, crc_part, operands[0], 1,
+			 OPTAB_WIDEN);
+      aarch64_emit_move (operands[0], a0);
+    }
+}
+
+/* Expand the spaceship optab for floating-point operands.
+
+   If the result is compared against (-1, 0, 1 , 2), expand into
+   fcmpe + conditional branch insns.
+
+   Otherwise (the result is just stored as an integer), expand into
+   fcmpe + a sequence of conditional select/increment/invert insns.  */
+void
+aarch64_expand_fp_spaceship (rtx dest, rtx op0, rtx op1, rtx hint)
+{
+  rtx cc_reg = gen_rtx_REG (CCFPEmode, CC_REGNUM);
+  emit_set_insn (cc_reg, gen_rtx_COMPARE (CCFPEmode, op0, op1));
+
+  rtx cc_gt = gen_rtx_GT (VOIDmode, cc_reg, const0_rtx);
+  rtx cc_lt = gen_rtx_LT (VOIDmode, cc_reg, const0_rtx);
+  rtx cc_un = gen_rtx_UNORDERED (VOIDmode, cc_reg, const0_rtx);
+
+  if (hint == const0_rtx)
+    {
+      rtx un_label = gen_label_rtx ();
+      rtx lt_label = gen_label_rtx ();
+      rtx gt_label = gen_label_rtx ();
+      rtx end_label = gen_label_rtx ();
+
+      rtx temp = gen_rtx_IF_THEN_ELSE (VOIDmode, cc_un,
+			gen_rtx_LABEL_REF (Pmode, un_label), pc_rtx);
+      aarch64_emit_unlikely_jump (gen_rtx_SET (pc_rtx, temp));
+
+      temp = gen_rtx_IF_THEN_ELSE (VOIDmode, cc_lt,
+			gen_rtx_LABEL_REF (Pmode, lt_label), pc_rtx);
+      emit_jump_insn (gen_rtx_SET (pc_rtx, temp));
+
+      temp = gen_rtx_IF_THEN_ELSE (VOIDmode, cc_gt,
+			gen_rtx_LABEL_REF (Pmode, gt_label), pc_rtx);
+      emit_jump_insn (gen_rtx_SET (pc_rtx, temp));
+
+      /* Equality.  */
+      emit_move_insn (dest, const0_rtx);
+      emit_jump (end_label);
+
+      emit_label (un_label);
+      emit_move_insn (dest, const2_rtx);
+      emit_jump (end_label);
+
+      emit_label (gt_label);
+      emit_move_insn (dest, const1_rtx);
+      emit_jump (end_label);
+
+      emit_label (lt_label);
+      emit_move_insn (dest, constm1_rtx);
+
+      emit_label (end_label);
+    }
+  else
+    {
+      rtx temp0 = gen_reg_rtx (SImode);
+      rtx temp1 = gen_reg_rtx (SImode);
+      rtx cc_ungt = gen_rtx_UNGT (VOIDmode, cc_reg, const0_rtx);
+
+      /* The value of hint is stored if the operands are unordered.  */
+      rtx temp_un = gen_int_mode (UINTVAL (hint) - 1, SImode);
+      if (!aarch64_reg_zero_or_m1_or_1 (temp_un, SImode))
+	temp_un = force_reg (SImode, temp_un);
+
+      emit_set_insn (temp0, gen_rtx_IF_THEN_ELSE (SImode, cc_lt,
+			constm1_rtx, const0_rtx));
+      emit_set_insn (temp1, gen_rtx_IF_THEN_ELSE (SImode, cc_un,
+			temp_un, const0_rtx));
+      emit_set_insn (dest, gen_rtx_IF_THEN_ELSE (SImode, cc_ungt,
+			gen_rtx_PLUS (SImode, temp1, const1_rtx), temp0));
+    }
+}
+
 /* Target-specific selftests.  */
 
 #if CHECKING_P
@@ -30555,6 +31938,16 @@ aarch64_test_sysreg_encoding_clashes (void)
     }
 }
 
+/* Test SVE arithmetic folding.  */
+
+static void
+aarch64_test_sve_folding ()
+{
+  tree res = fold_unary (BIT_NOT_EXPR, ssizetype,
+			 ssize_int (poly_int64 (1, 1)));
+  ASSERT_TRUE (operand_equal_p (res, ssize_int (poly_int64 (-2, -1))));
+}
+
 /* Run all target-specific selftests.  */
 
 static void
@@ -30563,6 +31956,7 @@ aarch64_run_selftests (void)
   aarch64_test_loading_full_dump ();
   aarch64_test_fractional_cost ();
   aarch64_test_sysreg_encoding_clashes ();
+  aarch64_test_sve_folding ();
 }
 
 } // namespace selftest
@@ -30591,6 +31985,15 @@ aarch64_run_selftests (void)
 #undef TARGET_ASM_ALIGNED_SI_OP
 #define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
 
+#if TARGET_PECOFF
+#undef TARGET_ASM_UNALIGNED_HI_OP
+#define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
+#undef TARGET_ASM_UNALIGNED_SI_OP
+#define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
+#undef TARGET_ASM_UNALIGNED_DI_OP
+#define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
+#endif
+
 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
   hook_bool_const_tree_hwi_hwi_const_tree_true
@@ -30752,6 +32155,12 @@ aarch64_libgcc_floating_mode_supported_p
 #undef TARGET_MANGLE_TYPE
 #define TARGET_MANGLE_TYPE aarch64_mangle_type
 
+#undef TARGET_INVALID_CONVERSION
+#define TARGET_INVALID_CONVERSION aarch64_invalid_conversion
+
+#undef TARGET_INVALID_UNARY_OP
+#define TARGET_INVALID_UNARY_OP aarch64_invalid_unary_op
+
 #undef TARGET_INVALID_BINARY_OP
 #define TARGET_INVALID_BINARY_OP aarch64_invalid_binary_op
 
@@ -30761,6 +32170,12 @@ aarch64_libgcc_floating_mode_supported_p
 #undef TARGET_MEMORY_MOVE_COST
 #define TARGET_MEMORY_MOVE_COST aarch64_memory_move_cost
 
+#undef TARGET_CALLEE_SAVE_COST
+#define TARGET_CALLEE_SAVE_COST aarch64_callee_save_cost
+
+#undef TARGET_FRAME_ALLOCATION_COST
+#define TARGET_FRAME_ALLOCATION_COST aarch64_frame_allocation_cost
+
 #undef TARGET_MIN_DIVISIONS_FOR_RECIP_MUL
 #define TARGET_MIN_DIVISIONS_FOR_RECIP_MUL aarch64_min_divisions_for_recip_mul
 
@@ -30811,6 +32226,12 @@ aarch64_libgcc_floating_mode_supported_p
 #undef TARGET_DWARF_FRAME_REG_MODE
 #define TARGET_DWARF_FRAME_REG_MODE aarch64_dwarf_frame_reg_mode
 
+#undef TARGET_OUTPUT_CFI_DIRECTIVE
+#define TARGET_OUTPUT_CFI_DIRECTIVE aarch64_output_cfi_directive
+
+#undef TARGET_DW_CFI_OPRND1_DESC
+#define TARGET_DW_CFI_OPRND1_DESC aarch64_dw_cfi_oprnd1_desc
+
 #undef TARGET_PROMOTED_TYPE
 #define TARGET_PROMOTED_TYPE aarch64_promoted_type
 
@@ -31185,6 +32606,9 @@ aarch64_libgcc_floating_mode_supported_p
 #undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
 #define TARGET_MANGLE_DECL_ASSEMBLER_NAME aarch64_mangle_decl_assembler_name
 
+#undef TARGET_DOCUMENTATION_NAME
+#define TARGET_DOCUMENTATION_NAME "AArch64"
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-aarch64.h"
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 2dfb999..d5c4a42 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -1,5 +1,5 @@
 /* Machine description for AArch64 architecture.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
@@ -96,6 +96,8 @@
 
 #define LONG_LONG_TYPE_SIZE	64
 
+#define WIDEST_HARDWARE_FP_SIZE	64
+
 /* This value is the amount of bytes a caller is allowed to drop the stack
    before probing has to be done for stack clash protection.  */
 #define STACK_CLASH_CALLER_GUARD 1024
@@ -119,24 +121,11 @@
    of LSE instructions.  */
 #define TARGET_OUTLINE_ATOMICS (aarch64_flag_outline_atomics)
 
-/* Align definitions of arrays, unions and structures so that
-   initializations and copies can be made more efficient.  This is not
-   ABI-changing, so it only affects places where we can see the
-   definition.  Increasing the alignment tends to introduce padding,
-   so don't do this when optimizing for size/conserving stack space.  */
-#define AARCH64_EXPAND_ALIGNMENT(COND, EXP, ALIGN)			\
-  (((COND) && ((ALIGN) < BITS_PER_WORD)					\
-    && (TREE_CODE (EXP) == ARRAY_TYPE					\
-	|| TREE_CODE (EXP) == UNION_TYPE				\
-	|| TREE_CODE (EXP) == RECORD_TYPE)) ? BITS_PER_WORD : (ALIGN))
-
-/* Align global data.  */
-#define DATA_ALIGNMENT(EXP, ALIGN)			\
-  AARCH64_EXPAND_ALIGNMENT (!optimize_size, EXP, ALIGN)
-
-/* Similarly, make sure that objects on the stack are sensibly aligned.  */
-#define LOCAL_ALIGNMENT(EXP, ALIGN)				\
-  AARCH64_EXPAND_ALIGNMENT (!flag_conserve_stack, EXP, ALIGN)
+/* Align global data as an optimization.  */
+#define DATA_ALIGNMENT(EXP, ALIGN) aarch64_data_alignment (EXP, ALIGN)
+
+/* Align stack data as an optimization.  */
+#define LOCAL_ALIGNMENT(EXP, ALIGN) aarch64_stack_alignment (EXP, ALIGN)
 
 #define STRUCTURE_SIZE_BOUNDARY		8
 
@@ -156,6 +145,16 @@
 
 #define PCC_BITFIELD_TYPE_MATTERS	1
 
+/* Use the same RTL truth representation for vector elements as we do
+   for scalars.  This maintains the property that a comparison like
+   eq:V4SI is a composition of 4 individual eq:SIs, just like plus:V4SI
+   is a composition of 4 individual plus:SIs.
+
+   This means that Advanced SIMD comparisons are represented in RTL as
+   (neg (op ...)).  */
+
+#define VECTOR_STORE_FLAG_VALUE(MODE) CONST1_RTX (GET_MODE_INNER (MODE))
+
 #ifndef USED_FOR_TARGET
 
 /* Define an enum of all features (ISA modes, architectures and extensions).
@@ -326,16 +325,25 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED
 /* SVE2 SM4 instructions, enabled through +sve2-sm4.  */
 #define TARGET_SVE2_SM4 (AARCH64_HAVE_ISA (SVE2_SM4) && TARGET_NON_STREAMING)
 
+/* SVE2p1 instructions, enabled through +sve2p1.  */
+#define TARGET_SVE2p1 AARCH64_HAVE_ISA (SVE2p1)
+
 /* SME instructions, enabled through +sme.  Note that this does not
-   imply anything about the state of PSTATE.SM.  */
+   imply anything about the state of PSTATE.SM; instructions that require
+   SME and streaming mode should use TARGET_STREAMING instead.  */
 #define TARGET_SME AARCH64_HAVE_ISA (SME)
 
-/* Same with streaming mode enabled.  */
-#define TARGET_STREAMING_SME (TARGET_STREAMING && TARGET_SME)
-
 /* The FEAT_SME_I16I64 extension to SME, enabled through +sme-i16i64.  */
 #define TARGET_SME_I16I64 AARCH64_HAVE_ISA (SME_I16I64)
 
+/* The FEAT_SME_B16B16 extension to SME, enabled through +sme-b16b16.  */
+#define TARGET_STREAMING_SME_B16B16 \
+  (AARCH64_HAVE_ISA (SME_B16B16) && TARGET_STREAMING)
+
+/* The FEAT_SME_F16F16 extension to SME, enabled through +sme-f16f16.  */
+#define TARGET_STREAMING_SME_F16F16 \
+  (AARCH64_HAVE_ISA (SME_F16F16) && TARGET_STREAMING)
+
 /* The FEAT_SME_F64F64 extension to SME, enabled through +sme-f64f64.  */
 #define TARGET_SME_F64F64 AARCH64_HAVE_ISA (SME_F64F64)
 
@@ -345,17 +353,21 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED
 /* Same with streaming mode enabled.  */
 #define TARGET_STREAMING_SME2 (TARGET_STREAMING && TARGET_SME2)
 
+#define TARGET_STREAMING_SME2p1 (TARGET_STREAMING && AARCH64_HAVE_ISA (SME2p1))
+
+#define TARGET_SME_B16B16 AARCH64_HAVE_ISA (SME_B16B16)
+
 /* ARMv8.3-A features.  */
 #define TARGET_ARMV8_3	AARCH64_HAVE_ISA (V8_3A)
 
 /* Javascript conversion instruction from Armv8.3-a.  */
-#define TARGET_JSCVT	(TARGET_FLOAT && TARGET_ARMV8_3)
+#define TARGET_JSCVT	AARCH64_HAVE_ISA (JSCVT)
 
 /* Armv8.3-a Complex number extension to AdvSIMD extensions.  */
-#define TARGET_COMPLEX (TARGET_SIMD && TARGET_ARMV8_3)
+#define TARGET_COMPLEX AARCH64_HAVE_ISA (FCMA)
 
 /* Floating-point rounding instructions from Armv8.5-a.  */
-#define TARGET_FRINT (AARCH64_HAVE_ISA (V8_5A) && TARGET_FLOAT)
+#define TARGET_FRINT AARCH64_HAVE_ISA (FRINTTS)
 
 /* TME instructions are enabled.  */
 #define TARGET_TME AARCH64_HAVE_ISA (TME)
@@ -398,6 +410,9 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED
 /* CSSC instructions are enabled through +cssc.  */
 #define TARGET_CSSC AARCH64_HAVE_ISA (CSSC)
 
+/* CB<cc> instructions are enabled through +cmpbr.  */
+#define TARGET_CMPBR AARCH64_HAVE_ISA (CMPBR)
+
 /* Make sure this is always defined so we don't have to check for ifdefs
    but rather use normal ifs.  */
 #ifndef TARGET_FIX_ERR_A53_835769_DEFAULT
@@ -415,7 +430,7 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED
 
 /* The RCPC2 extensions from Armv8.4-a that allow immediate offsets to LDAPR
    and sign-extending versions.*/
-#define TARGET_RCPC2 ((AARCH64_HAVE_ISA (V8_4A) && TARGET_RCPC) || TARGET_RCPC3)
+#define TARGET_RCPC2 AARCH64_HAVE_ISA (RCPC2)
 
 /* RCPC3 (Release Consistency) extensions, optional from Armv8.2-a.  */
 #define TARGET_RCPC3 AARCH64_HAVE_ISA (RCPC3)
@@ -457,6 +472,13 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED
     enabled through +gcs.  */
 #define TARGET_GCS AARCH64_HAVE_ISA (GCS)
 
+/* Floating Point Absolute Maximum/Minimum extension instructions are
+   enabled through +faminmax.  */
+#define TARGET_FAMINMAX AARCH64_HAVE_ISA (FAMINMAX)
+
+/* Lookup table (LUTI) extension instructions are enabled through +lut.  */
+#define TARGET_LUT AARCH64_HAVE_ISA (LUT)
+
 /* Prefer different predicate registers for the output of a predicated
    operation over re-using an existing input predicate.  */
 #define TARGET_SVE_PRED_CLOBBER (TARGET_SVE \
@@ -466,6 +488,60 @@ constexpr auto AARCH64_FL_DEFAULT_ISA_MODE ATTRIBUTE_UNUSED
 /* fp8 instructions are enabled through +fp8.  */
 #define TARGET_FP8 AARCH64_HAVE_ISA (FP8)
 
+/* See the comment above the tuning flag for details.  */
+#define TARGET_CHEAP_FPMR_WRITE \
+  (bool (aarch64_tune_params.extra_tuning_flags \
+	 & AARCH64_EXTRA_TUNE_CHEAP_FPMR_WRITE))
+
+/* Combinatorial tests.  */
+
+#define TARGET_SVE2_OR_SME2 \
+  ((TARGET_SVE2 || TARGET_STREAMING) \
+   && (TARGET_SME2 || TARGET_NON_STREAMING))
+
+/* There's no need to check TARGET_SME for streaming or streaming-compatible
+   functions, since streaming mode itself implies SME.  */
+#define TARGET_SVE2p1_OR_SME (TARGET_SVE2p1 || TARGET_STREAMING)
+
+#define TARGET_SVE2p1_OR_SME2 \
+  ((TARGET_SVE2p1 || TARGET_STREAMING) \
+   && (TARGET_SME2 || TARGET_NON_STREAMING))
+
+#define TARGET_SSVE_B16B16 \
+  (AARCH64_HAVE_ISA (SVE_B16B16) && TARGET_SVE2_OR_SME2)
+
+/* Some fp8 instructions require +fp8 and one of +sve2 or +sme2.  */
+#define TARGET_SSVE_FP8 (TARGET_FP8 \
+			 && (TARGET_SVE2 || TARGET_STREAMING) \
+			 && (TARGET_SME2 || TARGET_NON_STREAMING))
+
+/* fp8 multiply-accumulate instructions are enabled through +fp8fma.  */
+#define TARGET_FP8FMA AARCH64_HAVE_ISA (FP8FMA)
+
+/* SVE2 versions of fp8 multiply-accumulate instructions are enabled for
+   non-streaming mode by +fp8fma and for streaming mode by +ssve-fp8fma.  */
+#define TARGET_SSVE_FP8FMA \
+  (((TARGET_SVE2 && TARGET_FP8FMA) || TARGET_STREAMING) \
+   && (AARCH64_HAVE_ISA (SSVE_FP8FMA) || TARGET_NON_STREAMING))
+
+/* fp8 four way dot product enabled through +fp8dot4.  */
+#define TARGET_FP8DOT4 AARCH64_HAVE_ISA (FP8DOT4)
+
+/* Streaming versions of fp8 four way dot product instructions are enabled
+through +ssve-fp8dot4.  */
+#define TARGET_SSVE_FP8DOT4 ((\
+		(TARGET_SVE2 && TARGET_FP8DOT4) || TARGET_STREAMING) \
+		&& (AARCH64_HAVE_ISA(SSVE_FP8DOT4) || TARGET_NON_STREAMING))
+
+/* fp8 two way dot product enabled through +fp8dot2.  */
+#define TARGET_FP8DOT2 AARCH64_HAVE_ISA (FP8DOT2)
+
+/* Streaming versions of fp8 two way dot product instructions are enabled
+through +ssve-fp8dot2.  */
+#define TARGET_SSVE_FP8DOT2 ((\
+		(TARGET_SVE2 && TARGET_FP8DOT2) || TARGET_STREAMING) \
+		&& (AARCH64_HAVE_ISA(SSVE_FP8DOT2) || TARGET_NON_STREAMING))
+
 /* Standard register usage.  */
 
 /* 31 64-bit general purpose registers R0-R30:
@@ -855,16 +931,9 @@ enum reg_class
 /* CPU/ARCH option handling.  */
 #include "config/aarch64/aarch64-opts.h"
 
-enum target_cpus
-{
-#define AARCH64_CORE(NAME, INTERNAL_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \
-  TARGET_CPU_##INTERNAL_IDENT,
-#include "aarch64-cores.def"
-};
-
 /* If there is no CPU defined at configure, use generic as default.  */
 #ifndef TARGET_CPU_DEFAULT
-# define TARGET_CPU_DEFAULT TARGET_CPU_generic_armv8_a
+# define TARGET_CPU_DEFAULT AARCH64_CPU_generic_armv8_a
 #endif
 
 /* If inserting NOP before a mult-accumulate insn remember to adjust the
@@ -880,7 +949,7 @@ enum target_cpus
     aarch64_final_prescan_insn (INSN);			\
 
 /* The processor for which instructions should be scheduled.  */
-extern enum aarch64_processor aarch64_tune;
+extern enum aarch64_cpu aarch64_tune;
 
 /* RTL generation support.  */
 #define INIT_EXPANDERS aarch64_init_expanders ()
@@ -1114,6 +1183,7 @@ typedef struct
 {
   enum arm_pcs pcs_variant;
   aarch64_isa_mode isa_mode;
+  bool indirect_return;		/* Whether function is marked with indirect_return attribute.  */
   int aapcs_arg_processed;	/* No need to lay out this argument again.  */
   int aapcs_ncrn;		/* Next Core register number.  */
   int aapcs_nextncrn;		/* Next next core register number.  */
@@ -1280,6 +1350,13 @@ typedef struct
 #define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
   ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2)
 
+/* Have space for both SP and GCSPR in the NONLOCAL case in
+   emit_stack_save as well as in __builtin_setjmp, __builtin_longjmp
+   and __builtin_nonlocal_goto.
+   Note: On ILP32 the documented buf size is not enough PR84150.  */
+#define STACK_SAVEAREA_MODE(LEVEL)			\
+  ((LEVEL) == SAVE_NONLOCAL ? E_CDImode : Pmode)
+
 #define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (Pmode, LR_REGNUM)
 
 #define RETURN_ADDR_RTX aarch64_return_addr
@@ -1400,7 +1477,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
 #define HAVE_LOCAL_CPU_DETECT
 # define EXTRA_SPEC_FUNCTIONS                                           \
   { "local_cpu_detect", host_detect_local_cpu },                        \
-  MCPU_TO_MARCH_SPEC_FUNCTIONS
+  AARCH64_BASE_SPEC_FUNCTIONS
 
 /* Rewrite -m{arch,cpu,tune}=native based on the host system information.
    When rewriting -march=native convert it into an -mcpu option if no other
@@ -1417,7 +1494,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
  { "tune", "%{!mcpu=*:%{!mtune=*:%{!march=native:-mtune=%(VALUE)}}}" },
 #else
 # define MCPU_MTUNE_NATIVE_SPECS ""
-# define EXTRA_SPEC_FUNCTIONS MCPU_TO_MARCH_SPEC_FUNCTIONS
+# define EXTRA_SPEC_FUNCTIONS AARCH64_BASE_SPEC_FUNCTIONS
 # define CONFIG_TUNE_SPEC                                                \
   {"tune", "%{!mcpu=*:%{!mtune=*:-mtune=%(VALUE)}}"},
 #endif
@@ -1432,21 +1509,32 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
   {"cpu",  "%{!march=*:%{!mcpu=*:-mcpu=%(VALUE)}}" },   \
   CONFIG_TUNE_SPEC
 
-#define MCPU_TO_MARCH_SPEC \
-   " %{mcpu=*:-march=%:rewrite_mcpu(%{mcpu=*:%*})}"
+#define MARCH_REWRITE_SPEC \
+   "%{march=*:-march=%:rewrite_march(%{march=*:%*});" \
+     "mcpu=*:-march=%:rewrite_mcpu(%{mcpu=*:%*})}"
 
+extern const char *aarch64_rewrite_march (int argc, const char **argv);
 extern const char *aarch64_rewrite_mcpu (int argc, const char **argv);
-#define MCPU_TO_MARCH_SPEC_FUNCTIONS \
-  { "rewrite_mcpu", aarch64_rewrite_mcpu },
+extern const char *is_host_cpu_not_armv8_base (int argc, const char **argv);
+#define AARCH64_BASE_SPEC_FUNCTIONS		       \
+  { "rewrite_march", aarch64_rewrite_march },          \
+  { "rewrite_mcpu",            aarch64_rewrite_mcpu }, \
+  { "is_local_not_armv8_base", is_host_cpu_not_armv8_base },
+
 
 #define ASM_CPU_SPEC \
-   MCPU_TO_MARCH_SPEC
+   MARCH_REWRITE_SPEC
 
 #define EXTRA_SPECS						\
   { "asm_cpu_spec",		ASM_CPU_SPEC }
 
 #define ASM_OUTPUT_POOL_EPILOGUE  aarch64_asm_output_pool_epilogue
 
+/* This type is the user-visible __mfp8, and a pointer to that type.  We
+   need it in many places in the backend.  Defined in aarch64-builtins.cc.  */
+extern GTY(()) tree aarch64_mfp8_type_node;
+extern GTY(()) tree aarch64_mfp8_ptr_type_node;
+
 /* This type is the user-visible __fp16, and a pointer to that type.  We
    need it in many places in the backend.  Defined in aarch64-builtins.cc.  */
 extern GTY(()) tree aarch64_fp16_type_node;
@@ -1567,6 +1655,10 @@ enum class aarch64_tristate_mode : int { NO, YES, MAYBE };
   { int (aarch64_tristate_mode::MAYBE), \
     int (aarch64_local_sme_state::ANY) }
 
+/* Zero terminated list of regnos for which hardreg PRE should be
+   applied.  */
+#define HARDREG_PRE_REGNOS { FPM_REGNUM, 0 }
+
 #endif
 
 #endif /* GCC_AARCH64_H */
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index c54b29c..27efc91 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -1,5 +1,5 @@
 ;; Machine description for AArch64 architecture.
-;; Copyright (C) 2009-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2009-2025 Free Software Foundation, Inc.
 ;; Contributed by ARM Ltd.
 ;;
 ;; This file is part of GCC.
@@ -136,6 +136,14 @@
     ;; The actual value can sometimes vary, because it does not track
     ;; changes to PSTATE.ZA that happen during a lazy save and restore.
     ;; Those effects are instead tracked by ZA_SAVED_REGNUM.
+    ;;
+    ;; Sequences also write to this register if they synchronize the
+    ;; actual contents of ZA and PSTATE.ZA with the current function's
+    ;; ZA_REGNUM and SME_STATE_REGNUM.  Conceptually, these extra writes
+    ;; do not change the value of SME_STATE_REGNUM.  They simply act as
+    ;; sequencing points.  They means that all direct accesses to ZA can
+    ;; depend only on ZA_REGNUM and SME_STATE_REGNUM, rather than also
+    ;; depending on ZA_SAVED_REGNUM etc.
     (SME_STATE_REGNUM 89)
 
     ;; Instructions write to this register if they set TPIDR2_EL0 to a
@@ -198,8 +206,10 @@
     UNSPEC_AUTIB1716
     UNSPEC_AUTIASP
     UNSPEC_AUTIBSP
+    UNSPEC_BSL
     UNSPEC_CALLEE_ABI
     UNSPEC_CASESI
+    UNSPEC_COMBINE
     UNSPEC_CPYMEM
     UNSPEC_CRC32B
     UNSPEC_CRC32CB
@@ -209,6 +219,8 @@
     UNSPEC_CRC32H
     UNSPEC_CRC32W
     UNSPEC_CRC32X
+    UNSPEC_DUP
+    UNSPEC_DUP_LANE
     UNSPEC_FCVTZS
     UNSPEC_FCVTZU
     UNSPEC_FJCVTZS
@@ -227,6 +239,7 @@
     UNSPEC_FRINTP
     UNSPEC_FRINTX
     UNSPEC_FRINTZ
+    UNSPEC_GET_LANE
     UNSPEC_GOTSMALLPIC
     UNSPEC_GOTSMALLPIC28K
     UNSPEC_GOTSMALLTLS
@@ -236,6 +249,10 @@
     UNSPEC_LDP_FST
     UNSPEC_LDP_SND
     UNSPEC_LD1
+    UNSPEC_LD1_DUP
+    UNSPEC_LD1x2
+    UNSPEC_LD1x3
+    UNSPEC_LD1x4
     UNSPEC_LD2
     UNSPEC_LD2_DREG
     UNSPEC_LD2_DUP
@@ -265,12 +282,17 @@
     UNSPEC_REV
     UNSPEC_SADALP
     UNSPEC_SCVTF
+    UNSPEC_SET_LANE
     UNSPEC_SETMEM
     UNSPEC_SISD_NEG
     UNSPEC_SISD_SSHL
     UNSPEC_SISD_USHL
     UNSPEC_SSHL_2S
     UNSPEC_ST1
+    UNSPEC_ST1_LANE
+    UNSPEC_ST1x2
+    UNSPEC_ST1x3
+    UNSPEC_ST1x4
     UNSPEC_ST2
     UNSPEC_ST3
     UNSPEC_ST4
@@ -314,6 +336,8 @@
     UNSPEC_UNPACKSLO
     UNSPEC_UNPACKULO
     UNSPEC_PACK
+    UNSPEC_VCREATE
+    UNSPEC_VEC_COPY
     UNSPEC_WHILEGE
     UNSPEC_WHILEGT
     UNSPEC_WHILEHI
@@ -340,6 +364,7 @@
     UNSPEC_UPDATE_FFRT
     UNSPEC_RDFFR
     UNSPEC_WRFFR
+    UNSPEC_WRITE_FPMR
     UNSPEC_SYSREG_RDI
     UNSPEC_SYSREG_RTI
     UNSPEC_SYSREG_WDI
@@ -381,6 +406,11 @@
     UNSPECV_BTI_C		; Represent BTI c.
     UNSPECV_BTI_J		; Represent BTI j.
     UNSPECV_BTI_JC		; Represent BTI jc.
+    UNSPECV_CHKFEAT		; Represent CHKFEAT X16.
+    UNSPECV_GCSPR		; Represent MRS Xn, GCSPR_EL0
+    UNSPECV_GCSPOPM		; Represent GCSPOPM.
+    UNSPECV_GCSSS1		; Represent GCSSS1 Xt.
+    UNSPECV_GCSSS2		; Represent GCSSS2 Xt.
     UNSPECV_TSTART		; Represent transaction start.
     UNSPECV_TCOMMIT		; Represent transaction commit.
     UNSPECV_TCANCEL		; Represent transaction cancel.
@@ -509,18 +539,37 @@
     (const_string "yes")
     (const_string "no")))
 
+;; True if this a bfloat16 operation.  Only used for certain instructions.
+(define_attr "is_bf16" "false,true" (const_string "false"))
+
+;; True if this alternative uses an SVE instruction in which the operands
+;; are reversed.  This can happen for naturally commutative operations
+;; such as FADD, or when using things like FSUBR in preference to FSUB,
+;; or similarly when using things like FMAD in preference to FMLA.
+(define_attr "is_rev" "false,true" (const_string "false"))
+
+;; True if this operation supports is_rev-style instructions for bfloat16.
+(define_attr "supports_bf16_rev" "false,true" (const_string "false"))
+
+;; Selectively enable alternatives based on the mode of the operation.
+(define_attr "mode_enabled" "false,true"
+  (cond [(and (eq_attr "is_bf16" "true")
+	      (eq_attr "is_rev" "true")
+	      (eq_attr "supports_bf16_rev" "false"))
+	 (const_string "false")]
+	(const_string "true")))
+
 ;; Attribute that controls whether an alternative is enabled or not.
-;; Currently it is only used to disable alternatives which touch fp or simd
-;; registers when -mgeneral-regs-only is specified or to require a special
-;; architecture support.
-(define_attr "enabled" "no,yes" (attr "arch_enabled"))
+(define_attr "enabled" "no,yes"
+  (if_then_else (and (eq_attr "arch_enabled" "yes")
+		     (eq_attr "mode_enabled" "true"))
+		(const_string "yes")
+		(const_string "no")))
 
 ;; Attribute that specifies whether we are dealing with a branch to a
 ;; label that is far away, i.e. further away than the maximum/minimum
 ;; representable in a signed 21-bits number.
-;; 0 :=: no
-;; 1 :=: yes
-(define_attr "far_branch" "" (const_int 0))
+(define_attr "far_branch" "no,yes" (const_string "no"))
 
 ;; Attribute that specifies whether the alternative uses MOVPRFX.
 (define_attr "movprfx" "no,yes" (const_string "no"))
@@ -589,8 +638,6 @@
 (include "../arm/cortex-a53.md")
 (include "../arm/cortex-a57.md")
 (include "../arm/exynos-m1.md")
-(include "falkor.md")
-(include "saphira.md")
 (include "thunderx.md")
 (include "../arm/xgene1.md")
 (include "thunderx2t99.md")
@@ -619,7 +666,7 @@
 
 (define_insn "aarch64_write_sysregdi"
   [(unspec_volatile:DI [(match_operand 0 "aarch64_sysreg_string" "")
-			(match_operand:DI 1 "register_operand" "rZ")]
+			(match_operand:DI 1 "aarch64_reg_or_zero" "rZ")]
 		       UNSPEC_SYSREG_WDI)]
   ""
   "msr\t%0, %x1"
@@ -633,6 +680,10 @@
  "msrr\t%x0, %x1, %H1"
 )
 
+;; -------------------------------------------------------------------
+;; Unconditional jumps
+;; -------------------------------------------------------------------
+
 (define_insn "indirect_jump"
   [(set (pc) (match_operand:DI 0 "register_operand" "r"))]
   ""
@@ -651,43 +702,372 @@
   [(set_attr "type" "branch")]
 )
 
-(define_expand "cbranch<mode>4"
+;; Maximum PC-relative positive/negative displacements for various branching
+;; instructions.
+(define_constants
+  [
+    ;; +/- 1MiB.  Used by B.<cond>, CBZ, CBNZ.
+    (BRANCH_LEN_P_1MiB  1048572)
+    (BRANCH_LEN_N_1MiB -1048576)
+
+    ;; +/- 32KiB.  Used by TBZ, TBNZ.
+    (BRANCH_LEN_P_32KiB  32764)
+    (BRANCH_LEN_N_32KiB -32768)
+
+    ;; +/- 1KiB.  Used by CBB<cond>, CBH<cond>, CB<cond>.
+    (BRANCH_LEN_P_1Kib  1020)
+    (BRANCH_LEN_N_1Kib -1024)
+  ]
+)
+
+;; -------------------------------------------------------------------
+;; Conditional jumps
+;; -------------------------------------------------------------------
+
+;; The order of the rules below is important.
+;; Higher priority rules are preferred because they can express larger
+;; displacements.
+;; 1) EQ/NE comparisons against zero are handled by CBZ/CBNZ.
+;; 2) LT/GE comparisons against zero are handled by TBZ/TBNZ.
+;; 3) When the CMPBR extension is enabled:
+;;   a) Comparisons between two registers are handled by
+;;      CBB<cond>/CBH<cond>/CB<cond>.
+;;   b) Comparisons between a GP register and an in range immediate are
+;;      handled by CB<cond> (immediate).
+;; 4) Otherwise, emit a CMP+B<cond> sequence.
+;; -------------------------------------------------------------------
+
+(define_expand "cbranch<GPI:mode>4"
   [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator"
 			    [(match_operand:GPI 1 "register_operand")
 			     (match_operand:GPI 2 "aarch64_plus_operand")])
-			   (label_ref (match_operand 3 "" ""))
+			   (label_ref (match_operand 3))
 			   (pc)))]
   ""
-  "
-  operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]), operands[1],
-					 operands[2]);
-  operands[2] = const0_rtx;
-  "
+  {
+    if (TARGET_CMPBR && aarch64_cb_rhs (GET_CODE (operands[0]), operands[2]))
+      {
+	/* The branch is supported natively.  */
+      }
+    else
+      {
+        operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]),
+					       operands[1], operands[2]);
+        operands[2] = const0_rtx;
+      }
+  }
 )
 
-(define_expand "cbranch<mode>4"
+(define_expand "cbranch<SHORT:mode>4"
   [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator"
-			    [(match_operand:GPF 1 "register_operand")
-			     (match_operand:GPF 2 "aarch64_fp_compare_operand")])
-			   (label_ref (match_operand 3 "" ""))
+			    [(match_operand:SHORT 1 "register_operand")
+			     (match_operand:SHORT 2 "aarch64_reg_or_zero")])
+			   (label_ref (match_operand 3))
 			   (pc)))]
+  "TARGET_CMPBR"
   ""
-  "
-  operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]), operands[1],
-					 operands[2]);
-  operands[2] = const0_rtx;
-  "
 )
 
-(define_expand "cbranchcc4"
+(define_expand "cbranch<mode>4"
   [(set (pc) (if_then_else
-	      (match_operator 0 "aarch64_comparison_operator"
-	       [(match_operand 1 "cc_register")
-	        (match_operand 2 "const0_operand")])
-	      (label_ref (match_operand 3 "" ""))
-	      (pc)))]
+		(match_operator 0 "aarch64_comparison_operator"
+		 [(match_operand:GPF_F16 1 "register_operand")
+		  (match_operand:GPF_F16 2 "aarch64_fp_compare_operand")])
+		(label_ref (match_operand 3))
+		(pc)))]
   ""
-  "")
+  {
+    operands[1] = aarch64_gen_compare_reg (GET_CODE (operands[0]), operands[1],
+					   operands[2]);
+    operands[2] = const0_rtx;
+  }
+)
+
+(define_expand "cbranchcc4"
+  [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator"
+			    [(match_operand 1 "cc_register")
+			     (match_operand 2 "const0_operand")])
+			   (label_ref (match_operand 3))
+			   (pc)))]
+  ""
+  ""
+)
+
+;; For an EQ/NE comparison against zero, emit `CBZ`/`CBNZ`
+(define_insn "aarch64_cbz<optab><mode>1"
+  [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r")
+				(const_int 0))
+			   (label_ref (match_operand 1))
+			   (pc)))]
+  "!aarch64_track_speculation"
+  {
+    if (get_attr_length (insn) == 8)
+      return aarch64_gen_far_branch (operands, 1, "Lcb", "<inv_cb>\\t%<w>0, ");
+    else
+      return "<cbz>\\t%<w>0, %l1";
+  }
+  [(set_attr "type" "branch")
+   (set (attr "length")
+	(if_then_else (and (ge (minus (match_dup 1) (pc))
+			       (const_int BRANCH_LEN_N_1MiB))
+			   (lt (minus (match_dup 1) (pc))
+			       (const_int BRANCH_LEN_P_1MiB)))
+		      (const_int 4)
+		      (const_int 8)))
+   (set (attr "far_branch")
+	(if_then_else (and (ge (minus (match_dup 2) (pc))
+			       (const_int BRANCH_LEN_N_1MiB))
+			   (lt (minus (match_dup 2) (pc))
+			       (const_int BRANCH_LEN_P_1MiB)))
+		      (const_string "no")
+		      (const_string "yes")))]
+)
+
+;; For an LT/GE comparison against zero, emit `TBZ`/`TBNZ`
+(define_insn "*aarch64_tbz<optab><mode>1"
+  [(set (pc) (if_then_else (LTGE (match_operand:ALLI 0 "register_operand" "r")
+				 (const_int 0))
+			   (label_ref (match_operand 1))
+			   (pc)))
+   (clobber (reg:CC CC_REGNUM))]
+  "!aarch64_track_speculation"
+  {
+    if (get_attr_length (insn) == 8)
+      {
+	if (get_attr_far_branch (insn) == FAR_BRANCH_YES)
+	  return aarch64_gen_far_branch (operands, 1, "Ltb",
+					 "<inv_tb>\\t%<w>0, <sizem1>, ");
+	else
+	  {
+	    char buf[64];
+	    uint64_t val = ((uint64_t) 1)
+		<< (GET_MODE_SIZE (<MODE>mode) * BITS_PER_UNIT - 1);
+	    sprintf (buf, "tst\t%%<w>0, %" PRId64, val);
+	    output_asm_insn (buf, operands);
+	    return "<bcond>\t%l1";
+	  }
+      }
+    else
+      return "<tbz>\t%<w>0, <sizem1>, %l1";
+  }
+  [(set_attr "type" "branch")
+   (set (attr "length")
+	(if_then_else (and (ge (minus (match_dup 1) (pc))
+			       (const_int BRANCH_LEN_N_32KiB))
+			   (lt (minus (match_dup 1) (pc))
+			       (const_int BRANCH_LEN_P_32KiB)))
+		      (const_int 4)
+		      (const_int 8)))
+   (set (attr "far_branch")
+	(if_then_else (and (ge (minus (match_dup 1) (pc))
+			       (const_int BRANCH_LEN_N_1MiB))
+			   (lt (minus (match_dup 1) (pc))
+			       (const_int BRANCH_LEN_P_1MiB)))
+		      (const_string "no")
+		      (const_string "yes")))]
+)
+
+;; Emit a `CB<cond> (register)` or `CB<cond> (immediate)` instruction.
+;; The immediate range depends on the comparison code.
+;; Comparisons against immediates outside this range fall back to
+;; CMP + B<cond>.
+(define_insn "aarch64_cb<INT_CMP:code><GPI:mode>"
+  [(set (pc) (if_then_else (INT_CMP
+			     (match_operand:GPI 0 "register_operand" "r")
+			     (match_operand:GPI 1 "nonmemory_operand"
+			       "r<INT_CMP:cmpbr_imm_constraint>"))
+			   (label_ref (match_operand 2))
+			   (pc)))]
+  "TARGET_CMPBR && aarch64_cb_rhs (<INT_CMP:CODE>, operands[1])"
+  {
+    return (get_attr_far_branch (insn) == FAR_BRANCH_NO)
+      ? "cb<INT_CMP:cmp_op>\\t%<w>0, %<w>1, %l2"
+      : aarch64_gen_far_branch (operands, 2, "L",
+          "cb<INT_CMP:inv_cmp_op>\\t%<w>0, %<w>1, ");
+  }
+  [(set_attr "type" "branch")
+   (set (attr "length")
+	(if_then_else (and (ge (minus (match_dup 2) (pc))
+			       (const_int BRANCH_LEN_N_1Kib))
+			   (lt (minus (match_dup 2) (pc))
+			       (const_int BRANCH_LEN_P_1Kib)))
+		      (const_int 4)
+		      (const_int 8)))
+   (set (attr "far_branch")
+	(if_then_else (and (ge (minus (match_dup 2) (pc))
+			       (const_int BRANCH_LEN_N_1Kib))
+			   (lt (minus (match_dup 2) (pc))
+			       (const_int BRANCH_LEN_P_1Kib)))
+		      (const_string "no")
+		      (const_string "yes")))]
+)
+
+;; Emit a `CBB<cond> (register)` or `CBH<cond> (register)` instruction.
+(define_insn "aarch64_cb<INT_CMP:code><SHORT:mode>"
+  [(set (pc) (if_then_else (INT_CMP
+			     (match_operand:SHORT 0 "register_operand" "r")
+			     (match_operand:SHORT 1 "aarch64_reg_or_zero" "rZ"))
+			   (label_ref (match_operand 2))
+			   (pc)))]
+  "TARGET_CMPBR"
+  {
+    return (get_attr_far_branch (insn) == FAR_BRANCH_NO)
+      ? "cb<SHORT:cmpbr_suffix><INT_CMP:cmp_op>\\t%<w>0, %<w>1, %l2"
+      : aarch64_gen_far_branch (operands, 2, "L",
+          "cb<SHORT:cmpbr_suffix><INT_CMP:inv_cmp_op>\\t%<w>0, %<w>1, ");
+  }
+  [(set_attr "type" "branch")
+   (set (attr "length")
+	(if_then_else (and (ge (minus (match_dup 2) (pc))
+			       (const_int BRANCH_LEN_N_1Kib))
+			   (lt (minus (match_dup 2) (pc))
+			       (const_int BRANCH_LEN_P_1Kib)))
+		      (const_int 4)
+		      (const_int 8)))
+   (set (attr "far_branch")
+	(if_then_else (and (ge (minus (match_dup 2) (pc))
+			       (const_int BRANCH_LEN_N_1Kib))
+			   (lt (minus (match_dup 2) (pc))
+			       (const_int BRANCH_LEN_P_1Kib)))
+		      (const_string "no")
+		      (const_string "yes")))]
+)
+
+;; Emit `B<cond>`, assuming that the condition is already in the CC register.
+(define_insn "aarch64_bcond"
+  [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator"
+			    [(match_operand 1 "cc_register")
+			     (const_int 0)])
+			   (label_ref (match_operand 2))
+			   (pc)))]
+  ""
+  {
+    /* GCC's traditional style has been to use "beq" instead of "b.eq", etc.,
+       but the "." is required for SVE conditions.  */
+    bool use_dot_p = GET_MODE (operands[1]) == CC_NZCmode;
+    if (get_attr_length (insn) == 8)
+      return aarch64_gen_far_branch (operands, 2, "Lbcond",
+				     use_dot_p ? "b.%M0\\t" : "b%M0\\t");
+    else
+      return use_dot_p ? "b.%m0\\t%l2" : "b%m0\\t%l2";
+  }
+  [(set_attr "type" "branch")
+   (set (attr "length")
+	(if_then_else (and (ge (minus (match_dup 2) (pc))
+			       (const_int BRANCH_LEN_N_1MiB))
+			   (lt (minus (match_dup 2) (pc))
+			       (const_int BRANCH_LEN_P_1MiB)))
+		      (const_int 4)
+		      (const_int 8)))
+   (set (attr "far_branch")
+	(if_then_else (and (ge (minus (match_dup 2) (pc))
+			       (const_int BRANCH_LEN_N_1MiB))
+			   (lt (minus (match_dup 2) (pc))
+			       (const_int BRANCH_LEN_P_1MiB)))
+		      (const_string "no")
+		      (const_string "yes")))]
+)
+
+;; For a 24-bit immediate CST we can optimize the compare for equality
+;; and branch sequence from:
+;; 	mov	x0, #imm1
+;; 	movk	x0, #imm2, lsl 16 /* x0 contains CST.  */
+;; 	cmp	x1, x0
+;; 	b<ne,eq> .Label
+;; into the shorter:
+;; 	sub	x0, x1, #(CST & 0xfff000)
+;; 	subs	x0, x0, #(CST & 0x000fff)
+;; 	b<ne,eq> .Label
+(define_insn_and_split "*aarch64_bcond_wide_imm<GPI:mode>"
+  [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r")
+			        (match_operand:GPI 1 "aarch64_imm24" "n"))
+			   (label_ref:P (match_operand 2))
+			   (pc)))]
+  "!aarch64_move_imm (INTVAL (operands[1]), <GPI:MODE>mode)
+   && !aarch64_plus_operand (operands[1], <GPI:MODE>mode)
+   && !reload_completed"
+  "#"
+  "&& true"
+  [(const_int 0)]
+  {
+    HOST_WIDE_INT lo_imm = UINTVAL (operands[1]) & 0xfff;
+    HOST_WIDE_INT hi_imm = UINTVAL (operands[1]) & 0xfff000;
+    rtx tmp = gen_reg_rtx (<GPI:MODE>mode);
+    emit_insn (gen_add<GPI:mode>3 (tmp, operands[0], GEN_INT (-hi_imm)));
+    emit_insn (gen_add<GPI:mode>3_compare0 (tmp, tmp, GEN_INT (-lo_imm)));
+    rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
+    rtx cmp_rtx = gen_rtx_fmt_ee (<EQL:CMP>, <GPI:MODE>mode,
+				  cc_reg, const0_rtx);
+    emit_jump_insn (gen_aarch64_bcond (cmp_rtx, cc_reg, operands[2]));
+    DONE;
+  }
+)
+
+;; -------------------------------------------------------------------
+;; Test bit and branch
+;; -------------------------------------------------------------------
+
+(define_expand "tbranch_<code><mode>3"
+  [(set (pc) (if_then_else (EQL
+			     (match_operand:SHORT 0 "register_operand")
+			     (match_operand 1 "const0_operand"))
+			   (label_ref (match_operand 2 ""))
+			   (pc)))]
+  ""
+{
+  rtx bitvalue = gen_reg_rtx (<ZEROM>mode);
+  rtx reg = gen_lowpart (<ZEROM>mode, operands[0]);
+  rtx val = gen_int_mode (HOST_WIDE_INT_1U << UINTVAL (operands[1]),
+			  <MODE>mode);
+  emit_insn (gen_and<zerom>3 (bitvalue, reg, val));
+  operands[1] = const0_rtx;
+  operands[0] = aarch64_gen_compare_reg (<CODE>, bitvalue,
+					 operands[1]);
+})
+
+(define_insn "@aarch64_tbz<optab><ALLI:mode><GPI:mode>"
+  [(set (pc) (if_then_else (EQL
+			     (zero_extract:GPI
+			       (match_operand:ALLI 0 "register_operand" "r")
+			       (const_int 1)
+			       (match_operand 1 "aarch64_simd_shift_imm_<ALLI:mode>" "n"))
+			     (const_int 0))
+			   (label_ref (match_operand 2))
+			   (pc)))
+   (clobber (reg:CC CC_REGNUM))]
+  "!aarch64_track_speculation"
+  {
+    if (get_attr_length (insn) == 8)
+      {
+	if (get_attr_far_branch (insn) == 1)
+	  return aarch64_gen_far_branch (operands, 2, "Ltb",
+					 "<inv_tb>\\t%<ALLI:w>0, %1, ");
+	else
+	  {
+	    operands[1] = GEN_INT (HOST_WIDE_INT_1U << UINTVAL (operands[1]));
+	    return "tst\t%<ALLI:w>0, %1\;<bcond>\t%l2";
+	  }
+      }
+    else
+      return "<tbz>\t%<ALLI:w>0, %1, %l2";
+  }
+  [(set_attr "type" "branch")
+   (set (attr "length")
+	(if_then_else (and (ge (minus (match_dup 2) (pc))
+			       (const_int BRANCH_LEN_N_32KiB))
+			   (lt (minus (match_dup 2) (pc))
+			       (const_int BRANCH_LEN_P_32KiB)))
+		      (const_int 4)
+		      (const_int 8)))
+   (set (attr "far_branch")
+	(if_then_else (and (ge (minus (match_dup 2) (pc))
+			       (const_int BRANCH_LEN_N_1MiB))
+			   (lt (minus (match_dup 2) (pc))
+			       (const_int BRANCH_LEN_P_1MiB)))
+		      (const_string "no")
+		      (const_string "yes")))]
+
+)
 
 (define_insn "@ccmp<CC_ONLY:mode><GPI:mode>"
   [(set (match_operand:CC_ONLY 1 "cc_register")
@@ -819,71 +1199,6 @@
   }
 )
 
-(define_insn "condjump"
-  [(set (pc) (if_then_else (match_operator 0 "aarch64_comparison_operator"
-			    [(match_operand 1 "cc_register" "") (const_int 0)])
-			   (label_ref (match_operand 2 "" ""))
-			   (pc)))]
-  ""
-  {
-    /* GCC's traditional style has been to use "beq" instead of "b.eq", etc.,
-       but the "." is required for SVE conditions.  */
-    bool use_dot_p = GET_MODE (operands[1]) == CC_NZCmode;
-    if (get_attr_length (insn) == 8)
-      return aarch64_gen_far_branch (operands, 2, "Lbcond",
-				     use_dot_p ? "b.%M0\\t" : "b%M0\\t");
-    else
-      return use_dot_p ? "b.%m0\\t%l2" : "b%m0\\t%l2";
-  }
-  [(set_attr "type" "branch")
-   (set (attr "length")
-	(if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -1048576))
-			   (lt (minus (match_dup 2) (pc)) (const_int 1048572)))
-		      (const_int 4)
-		      (const_int 8)))
-   (set (attr "far_branch")
-	(if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -1048576))
-			   (lt (minus (match_dup 2) (pc)) (const_int 1048572)))
-		      (const_int 0)
-		      (const_int 1)))]
-)
-
-;; For a 24-bit immediate CST we can optimize the compare for equality
-;; and branch sequence from:
-;; 	mov	x0, #imm1
-;; 	movk	x0, #imm2, lsl 16 /* x0 contains CST.  */
-;; 	cmp	x1, x0
-;; 	b<ne,eq> .Label
-;; into the shorter:
-;; 	sub	x0, x1, #(CST & 0xfff000)
-;; 	subs	x0, x0, #(CST & 0x000fff)
-;; 	b<ne,eq> .Label
-(define_insn_and_split "*compare_condjump<GPI:mode>"
-  [(set (pc) (if_then_else (EQL
-			      (match_operand:GPI 0 "register_operand" "r")
-			      (match_operand:GPI 1 "aarch64_imm24" "n"))
-			   (label_ref:P (match_operand 2 "" ""))
-			   (pc)))]
-  "!aarch64_move_imm (INTVAL (operands[1]), <GPI:MODE>mode)
-   && !aarch64_plus_operand (operands[1], <GPI:MODE>mode)
-   && !reload_completed"
-  "#"
-  "&& true"
-  [(const_int 0)]
-  {
-    HOST_WIDE_INT lo_imm = UINTVAL (operands[1]) & 0xfff;
-    HOST_WIDE_INT hi_imm = UINTVAL (operands[1]) & 0xfff000;
-    rtx tmp = gen_reg_rtx (<GPI:MODE>mode);
-    emit_insn (gen_add<GPI:mode>3 (tmp, operands[0], GEN_INT (-hi_imm)));
-    emit_insn (gen_add<GPI:mode>3_compare0 (tmp, tmp, GEN_INT (-lo_imm)));
-    rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
-    rtx cmp_rtx = gen_rtx_fmt_ee (<EQL:CMP>, <GPI:MODE>mode,
-				  cc_reg, const0_rtx);
-    emit_jump_insn (gen_condjump (cmp_rtx, cc_reg, operands[2]));
-    DONE;
-  }
-)
-
 (define_expand "casesi"
   [(match_operand:SI 0 "register_operand")	; Index
    (match_operand:SI 1 "const_int_operand")	; Lower bound
@@ -996,7 +1311,7 @@
        the address into a DImode MEM so that aarch64_print_operand knows
        how to print it.  */
     operands[0] = gen_rtx_MEM (DImode, operands[0]);
-    return pftype[INTVAL(operands[1])][locality];
+    return pftype[INTVAL (operands[1]) & 1][locality];
   }
   [(set_attr "type" "load_4")]
 )
@@ -1075,126 +1390,87 @@
    (set_attr "sls_length" "retbr")]
 )
 
-(define_insn "aarch64_cb<optab><mode>1"
-  [(set (pc) (if_then_else (EQL (match_operand:GPI 0 "register_operand" "r")
-				(const_int 0))
-			   (label_ref (match_operand 1 "" ""))
-			   (pc)))]
-  "!aarch64_track_speculation"
-  {
-    if (get_attr_length (insn) == 8)
-      return aarch64_gen_far_branch (operands, 1, "Lcb", "<inv_cb>\\t%<w>0, ");
-    else
-      return "<cbz>\\t%<w>0, %l1";
-  }
-  [(set_attr "type" "branch")
-   (set (attr "length")
-	(if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -1048576))
-			   (lt (minus (match_dup 1) (pc)) (const_int 1048572)))
-		      (const_int 4)
-		      (const_int 8)))
-   (set (attr "far_branch")
-	(if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -1048576))
-			   (lt (minus (match_dup 2) (pc)) (const_int 1048572)))
-		      (const_int 0)
-		      (const_int 1)))]
-)
-
-(define_expand "tbranch_<code><mode>3"
-  [(set (pc) (if_then_else
-              (EQL (match_operand:SHORT 0 "register_operand")
-                   (match_operand 1 "const0_operand"))
-              (label_ref (match_operand 2 ""))
-              (pc)))]
+(define_expand "save_stack_nonlocal"
+  [(set (match_operand 0 "memory_operand")
+        (match_operand 1 "register_operand"))]
   ""
 {
-  rtx bitvalue = gen_reg_rtx (<ZEROM>mode);
-  rtx reg = gen_lowpart (<ZEROM>mode, operands[0]);
-  rtx val = gen_int_mode (HOST_WIDE_INT_1U << UINTVAL (operands[1]), <MODE>mode);
-  emit_insn (gen_and<zerom>3 (bitvalue, reg, val));
-  operands[1] = const0_rtx;
-  operands[0] = aarch64_gen_compare_reg (<CODE>, bitvalue,
-					 operands[1]);
-})
+  rtx stack_slot = adjust_address (operands[0], Pmode, 0);
+  emit_move_insn (stack_slot, operands[1]);
 
-(define_insn "@aarch64_tb<optab><ALLI:mode><GPI:mode>"
-  [(set (pc) (if_then_else
-	      (EQL (zero_extract:GPI (match_operand:ALLI 0 "register_operand" "r")
-				     (const_int 1)
-				     (match_operand 1
-				       "aarch64_simd_shift_imm_<ALLI:mode>" "n"))
-		   (const_int 0))
-	     (label_ref (match_operand 2 "" ""))
-	     (pc)))
-   (clobber (reg:CC CC_REGNUM))]
-  "!aarch64_track_speculation"
-  {
-    if (get_attr_length (insn) == 8)
-      {
-	if (get_attr_far_branch (insn) == 1)
-	  return aarch64_gen_far_branch (operands, 2, "Ltb",
-					 "<inv_tb>\\t%<ALLI:w>0, %1, ");
-	else
-	  {
-	    operands[1] = GEN_INT (HOST_WIDE_INT_1U << UINTVAL (operands[1]));
-	    return "tst\t%<ALLI:w>0, %1\;<bcond>\t%l2";
-	  }
-      }
-    else
-      return "<tbz>\t%<ALLI:w>0, %1, %l2";
-  }
-  [(set_attr "type" "branch")
-   (set (attr "length")
-	(if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -32768))
-			   (lt (minus (match_dup 2) (pc)) (const_int 32764)))
-		      (const_int 4)
-		      (const_int 8)))
-   (set (attr "far_branch")
-	(if_then_else (and (ge (minus (match_dup 2) (pc)) (const_int -1048576))
-			   (lt (minus (match_dup 2) (pc)) (const_int 1048572)))
-		      (const_int 0)
-		      (const_int 1)))]
+  if (aarch64_gcs_enabled ())
+    {
+      /* Save GCS with code like
+		mov     x16, 1
+		chkfeat x16
+		tbnz    x16, 0, .L_done
+		mrs     tmp, gcspr_el0
+		str     tmp, [%0, 8]
+	.L_done:  */
+
+      rtx done_label = gen_label_rtx ();
+      rtx r16 = gen_rtx_REG (DImode, R16_REGNUM);
+      emit_move_insn (r16, const1_rtx);
+      emit_insn (gen_aarch64_chkfeat ());
+      emit_insn (gen_tbranch_neqi3 (r16, const0_rtx, done_label));
+      rtx gcs_slot = adjust_address (operands[0], Pmode, GET_MODE_SIZE (Pmode));
+      rtx gcs = gen_reg_rtx (Pmode);
+      emit_insn (gen_aarch64_load_gcspr (gcs));
+      emit_move_insn (gcs_slot, gcs);
+      emit_label (done_label);
+    }
+  DONE;
+})
 
-)
+(define_expand "restore_stack_nonlocal"
+  [(set (match_operand 0 "register_operand" "")
+	(match_operand 1 "memory_operand" ""))]
+  ""
+{
+  rtx stack_slot = adjust_address (operands[1], Pmode, 0);
+  emit_move_insn (operands[0], stack_slot);
 
-(define_insn "*cb<optab><mode>1"
-  [(set (pc) (if_then_else (LTGE (match_operand:ALLI 0 "register_operand" "r")
-				 (const_int 0))
-			   (label_ref (match_operand 1 "" ""))
-			   (pc)))
-   (clobber (reg:CC CC_REGNUM))]
-  "!aarch64_track_speculation"
-  {
-    if (get_attr_length (insn) == 8)
-      {
-	if (get_attr_far_branch (insn) == 1)
-	  return aarch64_gen_far_branch (operands, 1, "Ltb",
-					 "<inv_tb>\\t%<w>0, <sizem1>, ");
-	else
-	  {
-	    char buf[64];
-	    uint64_t val = ((uint64_t) 1)
-		<< (GET_MODE_SIZE (<MODE>mode) * BITS_PER_UNIT - 1);
-	    sprintf (buf, "tst\t%%<w>0, %" PRId64, val);
-	    output_asm_insn (buf, operands);
-	    return "<bcond>\t%l1";
-	  }
-      }
-    else
-      return "<tbz>\t%<w>0, <sizem1>, %l1";
-  }
-  [(set_attr "type" "branch")
-   (set (attr "length")
-	(if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -32768))
-			   (lt (minus (match_dup 1) (pc)) (const_int 32764)))
-		      (const_int 4)
-		      (const_int 8)))
-   (set (attr "far_branch")
-	(if_then_else (and (ge (minus (match_dup 1) (pc)) (const_int -1048576))
-			   (lt (minus (match_dup 1) (pc)) (const_int 1048572)))
-		      (const_int 0)
-		      (const_int 1)))]
-)
+  if (aarch64_gcs_enabled ())
+    {
+      /* Restore GCS with code like
+		mov     x16, 1
+		chkfeat x16
+		tbnz    x16, 0, .L_done
+		ldr     tmp1, [%1, 8]
+		mrs     tmp2, gcspr_el0
+		subs    tmp2, tmp1, tmp2
+		b.eq    .L_done
+	.L_loop:
+		gcspopm
+		subs    tmp2, tmp2, 8
+		b.ne    .L_loop
+	.L_done:  */
+
+      rtx loop_label = gen_label_rtx ();
+      rtx done_label = gen_label_rtx ();
+      rtx r16 = gen_rtx_REG (DImode, R16_REGNUM);
+      emit_move_insn (r16, const1_rtx);
+      emit_insn (gen_aarch64_chkfeat ());
+      emit_insn (gen_tbranch_neqi3 (r16, const0_rtx, done_label));
+      rtx gcs_slot = adjust_address (operands[1], Pmode, GET_MODE_SIZE (Pmode));
+      rtx gcs_old = gen_reg_rtx (Pmode);
+      emit_move_insn (gcs_old, gcs_slot);
+      rtx gcs_now = gen_reg_rtx (Pmode);
+      emit_insn (gen_aarch64_load_gcspr (gcs_now));
+      emit_insn (gen_subdi3_compare1 (gcs_now, gcs_old, gcs_now));
+      rtx cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
+      rtx cmp_rtx = gen_rtx_fmt_ee (EQ, DImode, cc_reg, const0_rtx);
+      emit_jump_insn (gen_aarch64_bcond (cmp_rtx, cc_reg, done_label));
+      emit_label (loop_label);
+      emit_insn (gen_aarch64_gcspopm_xzr ());
+      emit_insn (gen_adddi3_compare0 (gcs_now, gcs_now, GEN_INT (-8)));
+      cc_reg = gen_rtx_REG (CC_NZmode, CC_REGNUM);
+      cmp_rtx = gen_rtx_fmt_ee (NE, DImode, cc_reg, const0_rtx);
+      emit_jump_insn (gen_aarch64_bcond (cmp_rtx, cc_reg, loop_label));
+      emit_label (done_label);
+    }
+  DONE;
+})
 
 ;; -------------------------------------------------------------------
 ;; Subroutine calls and sibcalls
@@ -1409,7 +1685,7 @@
      [w, r Z  ; neon_from_gp<q>, nosimd     ] fmov\t%s0, %w1
      [w, w    ; neon_dup       , simd       ] dup\t%<Vetype>0, %1.<v>[0]
      [w, w    ; neon_dup       , nosimd     ] fmov\t%s0, %s1
-     [Umv, r  ; mrs            , *          ] msr\t%0, %x1
+     [Umv, rZ ; mrs            , *          ] msr\t%0, %x1
      [r, Umv  ; mrs            , *          ] mrs\t%x0, %1
   }
 )
@@ -1473,7 +1749,7 @@
      [r  , w  ; f_mrc    , fp  , 4] fmov\t%w0, %s1
      [w  , w  ; fmov     , fp  , 4] fmov\t%s0, %s1
      [w  , Ds ; neon_move, simd, 4] << aarch64_output_scalar_simd_mov_immediate (operands[1], SImode);
-     [Umv, r  ; mrs      , *   , 4] msr\t%0, %x1
+     [Umv, rZ ; mrs      , *   , 4] msr\t%0, %x1
      [r, Umv  ; mrs      , *   , 4] mrs\t%x0, %1
   }
   "CONST_INT_P (operands[1]) && !aarch64_move_imm (INTVAL (operands[1]), SImode)
@@ -1491,30 +1767,30 @@
   "(register_operand (operands[0], DImode)
     || aarch64_reg_or_zero (operands[1], DImode))"
   {@ [cons: =0, 1; attrs: type, arch, length]
-     [w, Z  ; neon_move, simd, 4] movi\t%0.2d, #0
-     [r, r  ; mov_reg  , *   , 4] mov\t%x0, %x1
-     [k, r  ; mov_reg  , *   , 4] mov\t%0, %x1
-     [r, k  ; mov_reg  , *   , 4] mov\t%x0, %1
-     [r, O  ; mov_imm  , *   , 4] << aarch64_is_mov_xn_imm (INTVAL (operands[1])) ? "mov\t%x0, %1" : "mov\t%w0, %1";
-     [r, n  ; mov_imm  , *   ,16] #
+     [w, Z   ; neon_move, simd, 4] movi\t%0.2d, #0
+     [r, r   ; mov_reg  , *   , 4] mov\t%x0, %x1
+     [k, r   ; mov_reg  , *   , 4] mov\t%0, %x1
+     [r, k   ; mov_reg  , *   , 4] mov\t%x0, %1
+     [r, O   ; mov_imm  , *   , 4] << aarch64_is_mov_xn_imm (INTVAL (operands[1])) ? "mov\t%x0, %1" : "mov\t%w0, %1";
+     [r, n   ; mov_imm  , *   ,16] #
      /* The "mov_imm" type for CNT is just a placeholder.  */
-     [r, Usv; mov_imm  , sve , 4] << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands[1]);
-     [r, Usr; mov_imm  , sve,  4] << aarch64_output_sve_rdvl (operands[1]);
-     [r, UsR; mov_imm  , sme,  4] << aarch64_output_rdsvl (operands[1]);
-     [r, m  ; load_8   , *   , 4] ldr\t%x0, %1
-     [w, m  ; load_8   , fp  , 4] ldr\t%d0, %1
-     [m, r Z; store_8  , *   , 4] str\t%x1, %0
-     [m, w  ; store_8  , fp  , 4] str\t%d1, %0
-     [r, Usw; load_8   , *   , 8] << TARGET_ILP32 ? "adrp\t%0, %A1\;ldr\t%w0, [%0, %L1]" : "adrp\t%0, %A1\;ldr\t%0, [%0, %L1]";
-     [r, Usa; adr      , *   , 4] adr\t%x0, %c1
-     [r, Ush; adr      , *   , 4] adrp\t%x0, %A1
-     [w, r Z; f_mcr    , fp  , 4] fmov\t%d0, %x1
-     [r, w  ; f_mrc    , fp  , 4] fmov\t%x0, %d1
-     [w, w  ; fmov     , fp  , 4] fmov\t%d0, %d1
-     [w, Dd ; neon_move, simd, 4] << aarch64_output_scalar_simd_mov_immediate (operands[1], DImode);
-     [w, Dx ; neon_move, simd, 8] #
-     [Umv, r; mrs      , *   , 4] msr\t%0, %1
-     [r, Umv; mrs      , *   , 4] mrs\t%0, %1
+     [r, Usv ; mov_imm  , sve , 4] << aarch64_output_sve_cnt_immediate ("cnt", "%x0", operands[1]);
+     [r, Usr ; mov_imm  , sve,  4] << aarch64_output_sve_rdvl (operands[1]);
+     [r, UsR ; mov_imm  , sme,  4] << aarch64_output_rdsvl (operands[1]);
+     [r, m   ; load_8   , *   , 4] ldr\t%x0, %1
+     [w, m   ; load_8   , fp  , 4] ldr\t%d0, %1
+     [m, r Z ; store_8  , *   , 4] str\t%x1, %0
+     [m, w   ; store_8  , fp  , 4] str\t%d1, %0
+     [r, Usw ; load_8   , *   , 8] << TARGET_ILP32 ? "adrp\t%0, %A1\;ldr\t%w0, [%0, %L1]" : "adrp\t%0, %A1\;ldr\t%0, [%0, %L1]";
+     [r, Usa ; adr      , *   , 4] adr\t%x0, %c1
+     [r, Ush ; adr      , *   , 4] adrp\t%x0, %A1
+     [w, r Z ; f_mcr    , fp  , 4] fmov\t%d0, %x1
+     [r, w   ; f_mrc    , fp  , 4] fmov\t%x0, %d1
+     [w, w   ; fmov     , fp  , 4] fmov\t%d0, %d1
+     [w, Dd  ; neon_move, simd, 4] << aarch64_output_scalar_simd_mov_immediate (operands[1], DImode);
+     [w, Dx  ; neon_move, simd, 8] #
+     [Umv, rZ; mrs      , *   , 4] msr\t%0, %x1
+     [r, Umv ; mrs      , *   , 4] mrs\t%0, %1
   }
   "CONST_INT_P (operands[1])
    && REG_P (operands[0])
@@ -1638,14 +1914,32 @@
         && ! (GET_CODE (operands[1]) == CONST_DOUBLE
 	      && aarch64_float_const_zero_rtx_p (operands[1])))
       operands[1] = force_reg (<MODE>mode, operands[1]);
+
+    if (GET_CODE (operands[1]) == CONST_DOUBLE
+	&& can_create_pseudo_p ()
+	&& !aarch64_can_const_movi_rtx_p (operands[1], <MODE>mode)
+	&& !aarch64_float_const_representable_p (operands[1])
+	&& !aarch64_float_const_zero_rtx_p (operands[1])
+	&&  aarch64_float_const_rtx_p (operands[1]))
+      {
+	unsigned HOST_WIDE_INT ival;
+	bool res = aarch64_reinterpret_float_as_int (operands[1], &ival);
+	gcc_assert (res);
+
+	machine_mode intmode
+	  = int_mode_for_size (GET_MODE_BITSIZE (<MODE>mode), 0).require ();
+	rtx tmp = gen_reg_rtx (intmode);
+	emit_move_insn (tmp, gen_int_mode (ival, intmode));
+	emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
+	DONE;
+      }
   }
 )
 
 (define_insn "*mov<mode>_aarch64"
   [(set (match_operand:HFBF 0 "nonimmediate_operand")
 	(match_operand:HFBF 1 "general_operand"))]
-  "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode)
-    || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))"
+  "aarch64_valid_fp_move (operands[0], operands[1], <MODE>mode)"
   {@ [ cons: =0 , 1   ; attrs: type , arch  ]
      [ w        , Y   ; neon_move   , simd  ] movi\t%0.4h, #0
      [ w        , ?rY ; f_mcr       , fp16  ] fmov\t%h0, %w1
@@ -1668,8 +1962,7 @@
 (define_insn "*mov<mode>_aarch64"
   [(set (match_operand:SFD 0 "nonimmediate_operand")
 	(match_operand:SFD 1 "general_operand"))]
-  "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode)
-    || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))"
+  "aarch64_valid_fp_move (operands[0], operands[1], <MODE>mode)"
   {@ [ cons: =0 , 1   ; attrs: type , arch  ]
      [ w        , Y   ; neon_move   , simd  ] movi\t%0.2s, #0
      [ w        , ?rY ; f_mcr       , *     ] fmov\t%s0, %w1
@@ -1689,8 +1982,7 @@
 (define_insn "*mov<mode>_aarch64"
   [(set (match_operand:DFD 0 "nonimmediate_operand")
 	(match_operand:DFD 1 "general_operand"))]
-  "TARGET_FLOAT && (register_operand (operands[0], <MODE>mode)
-    || aarch64_reg_or_fp_zero (operands[1], <MODE>mode))"
+  "aarch64_valid_fp_move (operands[0], operands[1], <MODE>mode)"
   {@ [ cons: =0 , 1   ; attrs: type , arch  ]
      [ w        , Y   ; neon_move   , simd  ] movi\t%d0, #0
      [ w        , ?rY ; f_mcr       , *     ] fmov\t%d0, %x1
@@ -1707,27 +1999,6 @@
   }
 )
 
-(define_split
-  [(set (match_operand:GPF_HF 0 "nonimmediate_operand")
-	(match_operand:GPF_HF 1 "const_double_operand"))]
-  "can_create_pseudo_p ()
-   && !aarch64_can_const_movi_rtx_p (operands[1], <MODE>mode)
-   && !aarch64_float_const_representable_p (operands[1])
-   && !aarch64_float_const_zero_rtx_p (operands[1])
-   &&  aarch64_float_const_rtx_p (operands[1])"
-  [(const_int 0)]
-  {
-    unsigned HOST_WIDE_INT ival;
-    if (!aarch64_reinterpret_float_as_int (operands[1], &ival))
-      FAIL;
-
-    rtx tmp = gen_reg_rtx (<FCVT_TARGET>mode);
-    emit_move_insn (tmp, gen_int_mode (ival, <FCVT_TARGET>mode));
-    emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
-    DONE;
-  }
-)
-
 (define_insn "*mov<mode>_aarch64"
   [(set (match_operand:TFD 0
 	 "nonimmediate_operand" "=w,w,?r ,w ,?r,w,?w,w,m,?r,m ,m")
@@ -1765,6 +2036,44 @@
   }
 )
 
+;; The preferred way of writing to the FPMR is to test whether it already
+;; has the desired value and branch around the write if so.  This reduces
+;; the number of redundant FPMR writes caused by ABI boundaries, such as in:
+;;
+;;    for (...)
+;;      fp8_kernel (..., fpmr_value);
+;;
+;; Without this optimization, fp8_kernel would set FPMR to fpmr_value each
+;; time that it is called.
+;;
+;; We do this as a split so that hardreg_pre can optimize the moves first.
+(define_split
+  [(set (reg:DI FPM_REGNUM)
+        (match_operand:DI 0 "aarch64_reg_or_zero"))]
+  "TARGET_FP8 && !TARGET_CHEAP_FPMR_WRITE && can_create_pseudo_p ()"
+  [(const_int 0)]
+  {
+    auto label = gen_label_rtx ();
+    rtx current = copy_to_reg (gen_rtx_REG (DImode, FPM_REGNUM));
+    rtx cond = gen_rtx_EQ (VOIDmode, current, operands[0]);
+    emit_jump_insn (gen_cbranchdi4 (cond, current, operands[0], label));
+    emit_insn (gen_aarch64_write_fpmr (operands[0]));
+    emit_label (label);
+    DONE;
+  }
+)
+
+;; A write to the FPMR that is already protected by a conditional branch.
+;; Since this instruction is introduced late, it shouldn't matter too much
+;; that we're using an unspec for a move.
+(define_insn "aarch64_write_fpmr"
+  [(set (reg:DI FPM_REGNUM)
+        (unspec:DI [(match_operand:DI 0 "aarch64_reg_or_zero" "rZ")]
+		   UNSPEC_WRITE_FPMR))]
+  "TARGET_FP8"
+  "msr\tfpmr, %x0"
+)
+
 (define_expand "aarch64_cpymemdi"
   [(parallel
      [(set (match_operand 2) (const_int 0))
@@ -2025,9 +2334,9 @@
   "aarch64_mem_pair_offset (operands[4], <MODE>mode)
    && known_eq (INTVAL (operands[5]),
 		INTVAL (operands[4]) + GET_MODE_SIZE (<MODE>mode))"
-  {@ [cons: =&0, 1, =2, =3; attrs: type     ]
-     [       rk, 0,  r,  r; load_<ldpstp_sz>] ldp\t%<w>2, %<w>3, [%0, %4]!
-     [       rk, 0,  w,  w; neon_load1_2reg ] ldp\t%<v>2, %<v>3, [%0, %4]!
+  {@ [cons: =0,   1, =2, =3; attrs: type     ]
+     [       &rk, 0,  r,  r; load_<ldpstp_sz>] ldp\t%<w>2, %<w>3, [%0, %4]!
+     [       rk,  0,  w,  w; neon_load1_2reg ] ldp\t%<v>2, %<v>3, [%0, %4]!
   }
 )
 
@@ -2081,9 +2390,9 @@
 		INTVAL (operands[4]) + GET_MODE_SIZE (<MODE>mode))
    && !reg_overlap_mentioned_p (operands[0], operands[2])
    && !reg_overlap_mentioned_p (operands[0], operands[3])"
-  {@ [cons: =&0, 1,   2,   3; attrs: type      ]
-     [       rk, 0, rYZ, rYZ; store_<ldpstp_sz>] stp\t%<w>2, %<w>3, [%0, %4]!
-     [       rk, 0,   w,   w; neon_store1_2reg ] stp\t%<v>2, %<v>3, [%0, %4]!
+  {@ [cons: =0, 1,   2,   3; attrs: type      ]
+     [     &rk, 0, rYZ, rYZ; store_<ldpstp_sz>] stp\t%<w>2, %<w>3, [%0, %4]!
+     [      rk, 0,   w,   w; neon_store1_2reg ] stp\t%<v>2, %<v>3, [%0, %4]!
   }
 )
 
@@ -2329,15 +2638,15 @@
      (match_operand:GPI 1 "register_operand")
      (match_operand:GPI 2 "aarch64_pluslong_operand")))]
   ""
-  {@ [ cons: =0 , 1   , 2   ; attrs: type , arch  ]
-     [ rk       , %rk , I   ; alu_imm     , *     ] add\t%<w>0, %<w>1, %2
-     [ rk       , rk  , r   ; alu_sreg    , *     ] add\t%<w>0, %<w>1, %<w>2
-     [ w        , w   , w   ; neon_add    , simd  ] add\t%<rtn>0<vas>, %<rtn>1<vas>, %<rtn>2<vas>
-     [ rk       , rk  , J   ; alu_imm     , *     ] sub\t%<w>0, %<w>1, #%n2
-     [ r        , rk  , Uaa ; multiple    , *     ] #
-     [ r        , 0   , Uai ; alu_imm     , sve   ] << aarch64_output_sve_scalar_inc_dec (operands[2]);
-     [ rk       , rk  , Uav ; alu_imm     , sve   ] << aarch64_output_sve_addvl_addpl (operands[2]);
-     [ rk       , rk  , UaV ; alu_imm     , sme   ] << aarch64_output_addsvl_addspl (operands[2]);
+  {@ [ cons: =0 , %1 , 2   ; attrs: type , arch  ]
+     [ rk       , rk , I   ; alu_imm     , *     ] add\t%<w>0, %<w>1, %2
+     [ rk       , rk , r   ; alu_sreg    , *     ] add\t%<w>0, %<w>1, %<w>2
+     [ w        , w  , w   ; neon_add    , simd  ] add\t%<rtn>0<vas>, %<rtn>1<vas>, %<rtn>2<vas>
+     [ rk       , rk , J   ; alu_imm     , *     ] sub\t%<w>0, %<w>1, #%n2
+     [ r        , rk , Uaa ; multiple    , *     ] #
+     [ r        , 0  , Uai ; alu_imm     , sve   ] << aarch64_output_sve_scalar_inc_dec (operands[2]);
+     [ rk       , rk , Uav ; alu_imm     , sve   ] << aarch64_output_sve_addvl_addpl (operands[2]);
+     [ rk       , rk , UaV ; alu_imm     , sme   ] << aarch64_output_addsvl_addspl (operands[2]);
   }
   ;; The "alu_imm" types for INC/DEC and ADDVL/ADDPL are just placeholders.
 )
@@ -2350,11 +2659,11 @@
      (plus:SI (match_operand:SI 1 "register_operand")
 	      (match_operand:SI 2 "aarch64_pluslong_operand"))))]
   ""
-  {@ [ cons: =0 , 1   , 2   ; attrs: type ]
-     [ rk       , %rk , I   ; alu_imm     ] add\t%w0, %w1, %2
-     [ rk       , rk  , r   ; alu_sreg    ] add\t%w0, %w1, %w2
-     [ rk       , rk  , J   ; alu_imm     ] sub\t%w0, %w1, #%n2
-     [ r        , rk  , Uaa ; multiple    ] #
+  {@ [ cons: =0 , %1 , 2   ; attrs: type ]
+     [ rk       , rk , I   ; alu_imm     ] add\t%w0, %w1, %2
+     [ rk       , rk , r   ; alu_sreg    ] add\t%w0, %w1, %w2
+     [ rk       , rk , J   ; alu_imm     ] sub\t%w0, %w1, #%n2
+     [ r        , rk , Uaa ; multiple    ] #
   }
 )
 
@@ -2423,14 +2732,14 @@
      (match_operand:GPI 1 "register_operand")
      (match_operand:GPI 2 "aarch64_pluslong_or_poly_operand")))]
   "TARGET_SVE && operands[0] != stack_pointer_rtx"
-  {@ [ cons: =0 , 1   , 2   ; attrs: type ]
-     [ r        , %rk , I   ; alu_imm     ] add\t%<w>0, %<w>1, %2
-     [ r        , rk  , r   ; alu_sreg    ] add\t%<w>0, %<w>1, %<w>2
-     [ r        , rk  , J   ; alu_imm     ] sub\t%<w>0, %<w>1, #%n2
-     [ r        , rk  , Uaa ; multiple    ] #
-     [ r        , 0   , Uai ; alu_imm     ] << aarch64_output_sve_scalar_inc_dec (operands[2]);
-     [ r        , rk  , Uav ; alu_imm     ] << aarch64_output_sve_addvl_addpl (operands[2]);
-     [ &r       , rk  , Uat ; multiple    ] #
+  {@ [ cons: =0 , %1 , 2   ; attrs: type ]
+     [ r        , rk , I   ; alu_imm     ] add\t%<w>0, %<w>1, %2
+     [ r        , rk , r   ; alu_sreg    ] add\t%<w>0, %<w>1, %<w>2
+     [ r        , rk , J   ; alu_imm     ] sub\t%<w>0, %<w>1, #%n2
+     [ r        , rk , Uaa ; multiple    ] #
+     [ r        , 0  , Uai ; alu_imm     ] << aarch64_output_sve_scalar_inc_dec (operands[2]);
+     [ r        , rk , Uav ; alu_imm     ] << aarch64_output_sve_addvl_addpl (operands[2]);
+     [ &r       , rk , Uat ; multiple    ] #
   }
   "&& epilogue_completed
    && !reg_overlap_mentioned_p (operands[0], operands[1])
@@ -2602,10 +2911,10 @@
    (set (match_operand:GPI 0 "register_operand")
 	(plus:GPI (match_dup 1) (match_dup 2)))]
   ""
-  {@ [ cons: =0 , 1   , 2 ; attrs: type ]
-     [ r        , %rk , r ; alus_sreg   ] adds\t%<w>0, %<w>1, %<w>2
-     [ r        , rk  , I ; alus_imm    ] adds\t%<w>0, %<w>1, %2
-     [ r        , rk  , J ; alus_imm    ] subs\t%<w>0, %<w>1, #%n2
+  {@ [ cons: =0 , %1 , 2 ; attrs: type ]
+     [ r        , rk , r ; alus_sreg   ] adds\t%<w>0, %<w>1, %<w>2
+     [ r        , rk , I ; alus_imm    ] adds\t%<w>0, %<w>1, %2
+     [ r        , rk , J ; alus_imm    ] subs\t%<w>0, %<w>1, #%n2
   }
 )
 
@@ -2619,10 +2928,10 @@
    (set (match_operand:DI 0 "register_operand")
 	(zero_extend:DI (plus:SI (match_dup 1) (match_dup 2))))]
   ""
-  {@ [ cons: =0 , 1   , 2 ; attrs: type ]
-     [ r        , %rk , r ; alus_sreg   ] adds\t%w0, %w1, %w2
-     [ r        , rk  , I ; alus_imm    ] adds\t%w0, %w1, %2
-     [ r        , rk  , J ; alus_imm    ] subs\t%w0, %w1, #%n2
+  {@ [ cons: =0 , %1 , 2 ; attrs: type ]
+     [ r        , rk , r ; alus_sreg   ] adds\t%w0, %w1, %w2
+     [ r        , rk , I ; alus_imm    ] adds\t%w0, %w1, %2
+     [ r        , rk , J ; alus_imm    ] subs\t%w0, %w1, #%n2
   }
 )
 
@@ -2823,10 +3132,10 @@
 		   (match_operand:GPI 1 "aarch64_plus_operand"))
 	 (const_int 0)))]
   ""
-  {@ [ cons: 0 , 1 ; attrs: type ]
-     [ %r      , r ; alus_sreg   ] cmn\t%<w>0, %<w>1
-     [ r       , I ; alus_imm    ] cmn\t%<w>0, %1
-     [ r       , J ; alus_imm    ] cmp\t%<w>0, #%n1
+  {@ [ cons: %0 , 1 ; attrs: type ]
+     [ r        , r ; alus_sreg   ] cmn\t%<w>0, %<w>1
+     [ r        , I ; alus_imm    ] cmn\t%<w>0, %1
+     [ r        , J ; alus_imm    ] cmp\t%<w>0, #%n1
   }
 )
 
@@ -4181,26 +4490,28 @@
 
 (define_insn "fcmp<mode>"
   [(set (reg:CCFP CC_REGNUM)
-        (compare:CCFP (match_operand:GPF 0 "register_operand")
-		      (match_operand:GPF 1 "aarch64_fp_compare_operand")))]
+	(compare:CCFP
+	  (match_operand:GPF_F16 0 "register_operand")
+	  (match_operand:GPF_F16 1 "aarch64_fp_compare_operand")))]
    "TARGET_FLOAT"
    {@ [ cons: 0 , 1  ]
       [ w       , Y  ] fcmp\t%<s>0, #0.0
       [ w       , w  ] fcmp\t%<s>0, %<s>1
   }
-  [(set_attr "type" "fcmp<s>")]
+  [(set_attr "type" "fcmp<stype>")]
 )
 
 (define_insn "fcmpe<mode>"
   [(set (reg:CCFPE CC_REGNUM)
-        (compare:CCFPE (match_operand:GPF 0 "register_operand")
-		       (match_operand:GPF 1 "aarch64_fp_compare_operand")))]
+	(compare:CCFPE
+	  (match_operand:GPF_F16 0 "register_operand")
+	  (match_operand:GPF_F16 1 "aarch64_fp_compare_operand")))]
    "TARGET_FLOAT"
    {@ [ cons: 0 , 1  ]
       [ w       , Y  ] fcmpe\t%<s>0, #0.0
       [ w       , w  ] fcmpe\t%<s>0, %<s>1
   }
-  [(set_attr "type" "fcmp<s>")]
+  [(set_attr "type" "fcmp<stype>")]
 )
 
 (define_insn "*cmp_swp_<shift>_reg<mode>"
@@ -4236,6 +4547,49 @@
   [(set_attr "type" "alus_ext")]
 )
 
+;; <=> operator pattern (integer)
+;; (a == b) ? 0 : (a < b) ? -1 : 1.
+(define_expand "spaceship<mode>4"
+  [(match_operand:SI  0 "register_operand")
+   (match_operand:GPI 1 "register_operand")
+   (match_operand:GPI 2 "register_operand")
+   (match_operand:SI  3 "const_int_operand")]
+  ""
+  {
+    // 1 indicates unsigned comparison, -1 indicates signed.
+    gcc_assert (operands[3] == constm1_rtx || operands[3] == const1_rtx);
+
+    rtx cc_reg = aarch64_gen_compare_reg (EQ, operands[1], operands[2]);
+    RTX_CODE code_gt = operands[3] == const1_rtx ? GTU : GT;
+    RTX_CODE code_lt = operands[3] == const1_rtx ? LTU : LT;
+
+    rtx cc_gt = gen_rtx_fmt_ee (code_gt, VOIDmode, cc_reg, const0_rtx);
+    rtx cc_lt = gen_rtx_fmt_ee (code_lt, VOIDmode, cc_reg, const0_rtx);
+
+    rtx temp = gen_reg_rtx (SImode);
+    emit_insn (gen_rtx_SET (temp, gen_rtx_IF_THEN_ELSE (SImode, cc_gt,
+						const1_rtx, const0_rtx)));
+    emit_insn (gen_rtx_SET (operands[0], gen_rtx_IF_THEN_ELSE (SImode, cc_lt,
+						constm1_rtx, temp)));
+    DONE;
+  }
+)
+
+;; <=> operator pattern (floating-point)
+;; (a == b) ? 0 : (a < b) ? -1 : (a > b) ? 1 : UNORDERED.
+(define_expand "spaceship<mode>4"
+  [(match_operand:SI  0 "register_operand")
+   (match_operand:GPF 1 "register_operand")
+   (match_operand:GPF 2 "register_operand")
+   (match_operand:SI  3 "const_int_operand")]
+  "TARGET_FLOAT"
+  {
+    aarch64_expand_fp_spaceship (operands[0], operands[1], operands[2],
+				operands[3]);
+    DONE;
+  }
+)
+
 ;; -------------------------------------------------------------------
 ;; Store-flag and conditional select insns
 ;; -------------------------------------------------------------------
@@ -4268,8 +4622,8 @@
 (define_expand "cstore<mode>4"
   [(set (match_operand:SI 0 "register_operand")
 	(match_operator:SI 1 "aarch64_comparison_operator_mode"
-	 [(match_operand:GPF 2 "register_operand")
-	  (match_operand:GPF 3 "aarch64_fp_compare_operand")]))]
+	 [(match_operand:GPF_F16 2 "register_operand")
+	  (match_operand:GPF_F16 3 "aarch64_fp_compare_operand")]))]
   ""
   "
   operands[2] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2],
@@ -4353,38 +4707,6 @@
   [(set_attr "type" "csel")]
 )
 
-(define_expand "cmov<mode>6"
-  [(set (match_operand:GPI 0 "register_operand")
-	(if_then_else:GPI
-	 (match_operator 1 "aarch64_comparison_operator"
-	  [(match_operand:GPI 2 "register_operand")
-	   (match_operand:GPI 3 "aarch64_plus_operand")])
-	 (match_operand:GPI 4 "register_operand")
-	 (match_operand:GPI 5 "register_operand")))]
-  ""
-  "
-  operands[2] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2],
-				      operands[3]);
-  operands[3] = const0_rtx;
-  "
-)
-
-(define_expand "cmov<mode>6"
-  [(set (match_operand:GPF 0 "register_operand")
-	(if_then_else:GPF
-	 (match_operator 1 "aarch64_comparison_operator"
-	  [(match_operand:GPF 2 "register_operand")
-	   (match_operand:GPF 3 "aarch64_fp_compare_operand")])
-	 (match_operand:GPF 4 "register_operand")
-	 (match_operand:GPF 5 "register_operand")))]
-  ""
-  "
-  operands[2] = aarch64_gen_compare_reg (GET_CODE (operands[1]), operands[2],
-				      operands[3]);
-  operands[3] = const0_rtx;
-  "
-)
-
 (define_insn "*cmov<mode>_insn"
   [(set (match_operand:ALLI 0 "register_operand")
 	(if_then_else:ALLI
@@ -4566,6 +4888,63 @@
   [(set_attr "type" "crc")]
 )
 
+;; Reversed CRC
+(define_expand "crc_rev<ALLI:mode><ALLX:mode>4"
+  [;; return value (calculated CRC)
+   (match_operand:ALLX 0 "register_operand" "=r")
+   ;; initial CRC
+   (match_operand:ALLX 1 "register_operand" "r")
+   ;; data
+   (match_operand:ALLI 2 "register_operand" "r")
+   ;; polynomial without leading 1
+   (match_operand:ALLX 3)]
+  ""
+  {
+    /* If the polynomial is the same as the polynomial of crc32c* instruction,
+       put that instruction.  crc32c uses iSCSI polynomial.  */
+    if (TARGET_CRC32 && INTVAL (operands[3]) == 0x1EDC6F41
+	&& <ALLX:MODE>mode == SImode)
+      emit_insn (gen_aarch64_crc32c<ALLI:crc_data_type> (operands[0],
+							 operands[1],
+							 operands[2]));
+    /* If the polynomial is the same as the polynomial of crc32* instruction,
+	put that instruction.  crc32 uses HDLC etc.  polynomial.  */
+    else if (TARGET_CRC32 && INTVAL (operands[3]) == 0x04C11DB7
+	     && <ALLX:MODE>mode == SImode)
+      emit_insn (gen_aarch64_crc32<ALLI:crc_data_type> (operands[0],
+							operands[1],
+							operands[2]));
+    else if (TARGET_AES && <ALLI:sizen> <= <ALLX:sizen>)
+      aarch64_expand_reversed_crc_using_pmull (<ALLX:MODE>mode,
+					       <ALLI:MODE>mode,
+					       operands);
+    else
+      /* Otherwise, generate table-based CRC.  */
+      expand_reversed_crc_table_based (operands[0], operands[1], operands[2],
+				       operands[3], <ALLI:MODE>mode,
+				       generate_reflecting_code_standard);
+    DONE;
+  }
+)
+
+;; Bit-forward CRC
+(define_expand "crc<ALLI:mode><ALLX:mode>4"
+  [;; return value (calculated CRC)
+   (match_operand:ALLX 0 "register_operand" "=r")
+   ;; initial CRC
+   (match_operand:ALLX 1 "register_operand" "r")
+   ;; data
+   (match_operand:ALLI 2 "register_operand" "r")
+   ;; polynomial without leading 1
+   (match_operand:ALLX 3)]
+  "TARGET_AES && <ALLI:sizen> <= <ALLX:sizen>"
+  {
+    aarch64_expand_crc_using_pmull (<ALLX:MODE>mode, <ALLI:MODE>mode,
+				    operands);
+    DONE;
+  }
+)
+
 (define_insn "*csinc2<mode>_insn"
   [(set (match_operand:GPI 0 "register_operand" "=r")
         (plus:GPI (match_operand 2 "aarch64_comparison_operation" "")
@@ -4832,8 +5211,8 @@
 	(LOGICAL:GPI (match_operand:GPI 1 "register_operand")
 		     (match_operand:GPI 2 "aarch64_logical_operand")))]
   ""
-  {@ [ cons: =0 , 1  , 2        ; attrs: type , arch  ]
-     [ r        , %r , r        ; logic_reg   , *     ] <logical>\t%<w>0, %<w>1, %<w>2
+  {@ [ cons: =0 , %1 , 2        ; attrs: type , arch  ]
+     [ r        , r  , r        ; logic_reg   , *     ] <logical>\t%<w>0, %<w>1, %<w>2
      [ rk       , r  , <lconst> ; logic_imm   , *     ] <logical>\t%<w>0, %<w>1, %2
      [ w        , 0  , <lconst> ; *           , sve   ] <logical>\t%Z0.<s>, %Z0.<s>, #%2
      [ w        , w  , w        ; neon_logic  , simd  ] <logical>\t%0.<Vbtype>, %1.<Vbtype>, %2.<Vbtype>
@@ -4847,8 +5226,8 @@
          (LOGICAL:SI (match_operand:SI 1 "register_operand")
 		     (match_operand:SI 2 "aarch64_logical_operand"))))]
   ""
-  {@ [ cons: =0 , 1  , 2 ; attrs: type ]
-     [ r        , %r , r ; logic_reg   ] <logical>\t%w0, %w1, %w2
+  {@ [ cons: =0 , %1 , 2 ; attrs: type ]
+     [ r        , r  , r ; logic_reg   ] <logical>\t%w0, %w1, %w2
      [ rk       , r  , K ; logic_imm   ] <logical>\t%w0, %w1, %2
   }
 )
@@ -4862,8 +5241,8 @@
    (set (match_operand:GPI 0 "register_operand")
 	(and:GPI (match_dup 1) (match_dup 2)))]
   ""
-  {@ [ cons: =0 , 1  , 2        ; attrs: type ]
-     [ r        , %r , r        ; logics_reg  ] ands\t%<w>0, %<w>1, %<w>2
+  {@ [ cons: =0 , %1 , 2        ; attrs: type ]
+     [ r        , r  , r        ; logics_reg  ] ands\t%<w>0, %<w>1, %<w>2
      [ r        , r  , <lconst> ; logics_imm  ] ands\t%<w>0, %<w>1, %2
   }
 )
@@ -4878,8 +5257,8 @@
    (set (match_operand:DI 0 "register_operand")
 	(zero_extend:DI (and:SI (match_dup 1) (match_dup 2))))]
   ""
-  {@ [ cons: =0 , 1  , 2 ; attrs: type ]
-     [ r        , %r , r ; logics_reg  ] ands\t%w0, %w1, %w2
+  {@ [ cons: =0 , %1 , 2 ; attrs: type ]
+     [ r        , r  , r ; logics_reg  ] ands\t%w0, %w1, %w2
      [ r        , r  , K ; logics_imm  ] ands\t%w0, %w1, %2
   }
 )
@@ -5345,6 +5724,15 @@
 	(popcount:ALLI (match_operand:ALLI 1 "register_operand")))]
   "TARGET_CSSC ? GET_MODE_BITSIZE (<MODE>mode) >= 32 : TARGET_SIMD"
 {
+  if (!TARGET_CSSC && TARGET_SVE && <MODE>mode != QImode)
+    {
+      rtx tmp = gen_reg_rtx (<VEC_POP_MODE>mode);
+      rtx op1 = gen_lowpart (<VEC_POP_MODE>mode, operands[1]);
+      emit_insn (gen_popcount<vec_pop_mode>2 (tmp, op1));
+      emit_move_insn (operands[0], gen_lowpart (<MODE>mode, tmp));
+      DONE;
+    }
+
   if (!TARGET_CSSC)
     {
       rtx v = gen_reg_rtx (V8QImode);
@@ -5383,6 +5771,19 @@
    (match_operand:TI 1 "register_operand")]
   "TARGET_SIMD && !TARGET_CSSC"
 {
+  /* For SVE we can do popcount on DImode chunks of the TImode argument
+     and then use a cheap ADDP reduction.  The SVE CNT instruction requires
+     materializing a PTRUE so don't do this if optimizing for size.  */
+  if (TARGET_SVE && !optimize_function_for_size_p (cfun))
+    {
+      rtx v = gen_reg_rtx (V2DImode);
+      rtx v1 = gen_reg_rtx (V2DImode);
+      emit_move_insn (v, gen_lowpart (V2DImode, operands[1]));
+      rtx p = aarch64_ptrue_reg (VNx2BImode, 16);
+      emit_insn (gen_aarch64_pred_popcountv2di (v1, p, v));
+      emit_insn (gen_reduc_plus_scal_v2di (operands[0], v1));
+      DONE;
+    }
   rtx v = gen_reg_rtx (V16QImode);
   rtx v1 = gen_reg_rtx (V16QImode);
   emit_move_insn (v, gen_lowpart (V16QImode, operands[1]));
@@ -5454,9 +5855,9 @@
 		  (match_operand:GPI 1 "aarch64_logical_operand"))
 	 (const_int 0)))]
   ""
-  {@ [ cons: 0 , 1        ; attrs: type ]
-     [ %r      , r        ; logics_reg  ] tst\t%<w>0, %<w>1
-     [ r       , <lconst> ; logics_imm  ] tst\t%<w>0, %1
+  {@ [ cons: %0 , 1        ; attrs: type ]
+     [ r        , r        ; logics_reg  ] tst\t%<w>0, %<w>1
+     [ r        , <lconst> ; logics_imm  ] tst\t%<w>0, %1
   }
 )
 
@@ -5971,10 +6372,11 @@
 
 (define_insn "*extr<mode>5_insn"
   [(set (match_operand:GPI 0 "register_operand" "=r")
-	(ior:GPI (ashift:GPI (match_operand:GPI 1 "register_operand" "r")
-			     (match_operand 3 "const_int_operand" "n"))
-		 (lshiftrt:GPI (match_operand:GPI 2 "register_operand" "r")
-			       (match_operand 4 "const_int_operand" "n"))))]
+	(any_or_plus:GPI
+	  (ashift:GPI (match_operand:GPI 1 "register_operand" "r")
+		      (match_operand 3 "const_int_operand" "n"))
+	  (lshiftrt:GPI (match_operand:GPI 2 "register_operand" "r")
+		      (match_operand 4 "const_int_operand" "n"))))]
   "UINTVAL (operands[3]) < GET_MODE_BITSIZE (<MODE>mode) &&
    (UINTVAL (operands[3]) + UINTVAL (operands[4]) == GET_MODE_BITSIZE (<MODE>mode))"
   "extr\\t%<w>0, %<w>1, %<w>2, %4"
@@ -5985,10 +6387,11 @@
 ;; so we have to match both orderings.
 (define_insn "*extr<mode>5_insn_alt"
   [(set (match_operand:GPI 0 "register_operand" "=r")
-	(ior:GPI  (lshiftrt:GPI (match_operand:GPI 2 "register_operand" "r")
-			        (match_operand 4 "const_int_operand" "n"))
-		  (ashift:GPI (match_operand:GPI 1 "register_operand" "r")
-			      (match_operand 3 "const_int_operand" "n"))))]
+	(any_or_plus:GPI
+	  (lshiftrt:GPI (match_operand:GPI 2 "register_operand" "r")
+		        (match_operand 4 "const_int_operand" "n"))
+	  (ashift:GPI (match_operand:GPI 1 "register_operand" "r")
+		      (match_operand 3 "const_int_operand" "n"))))]
   "UINTVAL (operands[3]) < GET_MODE_BITSIZE (<MODE>mode)
    && (UINTVAL (operands[3]) + UINTVAL (operands[4])
        == GET_MODE_BITSIZE (<MODE>mode))"
@@ -6000,10 +6403,11 @@
 (define_insn "*extrsi5_insn_uxtw"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(zero_extend:DI
-	 (ior:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
-			    (match_operand 3 "const_int_operand" "n"))
-		 (lshiftrt:SI (match_operand:SI 2 "register_operand" "r")
-			      (match_operand 4 "const_int_operand" "n")))))]
+	 (any_or_plus:SI
+	   (ashift:SI (match_operand:SI 1 "register_operand" "r")
+		      (match_operand 3 "const_int_operand" "n"))
+	   (lshiftrt:SI (match_operand:SI 2 "register_operand" "r")
+		      (match_operand 4 "const_int_operand" "n")))))]
   "UINTVAL (operands[3]) < 32 &&
    (UINTVAL (operands[3]) + UINTVAL (operands[4]) == 32)"
   "extr\\t%w0, %w1, %w2, %4"
@@ -6013,10 +6417,11 @@
 (define_insn "*extrsi5_insn_uxtw_alt"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(zero_extend:DI
-	 (ior:SI (lshiftrt:SI (match_operand:SI 2 "register_operand" "r")
-			       (match_operand 4 "const_int_operand" "n"))
-		 (ashift:SI (match_operand:SI 1 "register_operand" "r")
-			    (match_operand 3 "const_int_operand" "n")))))]
+	 (any_or_plus:SI
+	   (lshiftrt:SI (match_operand:SI 2 "register_operand" "r")
+			(match_operand 4 "const_int_operand" "n"))
+	   (ashift:SI (match_operand:SI 1 "register_operand" "r")
+		      (match_operand 3 "const_int_operand" "n")))))]
   "UINTVAL (operands[3]) < 32 &&
    (UINTVAL (operands[3]) + UINTVAL (operands[4]) == 32)"
   "extr\\t%w0, %w1, %w2, %4"
@@ -6025,13 +6430,13 @@
 
 (define_insn "*extrsi5_insn_di"
   [(set (match_operand:SI 0 "register_operand" "=r")
-	(ior:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
-			   (match_operand 3 "const_int_operand" "n"))
-		(match_operator:SI 6 "subreg_lowpart_operator"
-		  [(zero_extract:DI
-		     (match_operand:DI 2 "register_operand" "r")
-		     (match_operand 5 "const_int_operand" "n")
-		     (match_operand 4 "const_int_operand" "n"))])))]
+	(any_or_plus:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
+				    (match_operand 3 "const_int_operand" "n"))
+			 (match_operator:SI 6 "subreg_lowpart_operator"
+			  [(zero_extract:DI
+			     (match_operand:DI 2 "register_operand" "r")
+			     (match_operand 5 "const_int_operand" "n")
+			     (match_operand 4 "const_int_operand" "n"))])))]
   "UINTVAL (operands[3]) < 32
    && UINTVAL (operands[3]) + UINTVAL (operands[4]) == 32
    && INTVAL (operands[3]) == INTVAL (operands[5])"
@@ -6177,7 +6582,8 @@
       return "ins\t%0.<bits_etype>[%1], %2.<bits_etype>[0]";
     return "ins\t%0.<bits_etype>[%1], %w2";
   }
-  [(set_attr "type" "bfm,neon_ins_q,neon_ins_q")]
+  [(set_attr "type" "bfm,neon_ins_q,neon_ins_q")
+   (set_attr "arch" "*,simd,simd")]
 )
 
 (define_insn "*insv_reg<mode>"
@@ -6207,10 +6613,11 @@
 			  (match_dup 1))
 	(match_dup 2))]
   {
-    operands[2] = lowpart_subreg (<GPI:MODE>mode, operands[2],
-				  <ALLX:MODE>mode);
+    operands[2] = force_lowpart_subreg (<GPI:MODE>mode, operands[2],
+					<ALLX:MODE>mode);
   }
-  [(set_attr "type" "bfm,neon_ins_q,neon_ins_q")]
+  [(set_attr "type" "bfm,neon_ins_q,neon_ins_q")
+   (set_attr "arch" "*,simd,simd")]
 )
 
 (define_insn "*aarch64_bfi<GPI:mode><ALLX:mode>4"
@@ -6242,7 +6649,8 @@
   {
     operands[2] = lowpart_subreg (DImode, operands[3], <ALLX:MODE>mode);
   }
-  [(set_attr "type" "bfm,neon_ins_q,neon_ins_q")]
+  [(set_attr "type" "bfm,neon_ins_q,neon_ins_q")
+   (set_attr "arch" "*,simd,simd")]
 )
 
 ;;  Match a bfi instruction where the shift of OP3 means that we are
@@ -7218,13 +7626,12 @@
 }
 )
 
-;; For copysign (x, y), we want to generate:
+;; For copysignf (x, y), we want to generate:
 ;;
-;;   LDR d2, #(1 << 63)
-;;   BSL v2.8b, [y], [x]
+;;	movi    v31.4s, 0x80, lsl 24
+;;	bit     v0.16b, v1.16b, v31.16b
 ;;
-;; or another, equivalent, sequence using one of BSL/BIT/BIF.  Because
-;; we expect these operations to nearly always operate on
+;; Because we expect these operations to nearly always operate on
 ;; floating-point values, we do not want the operation to be
 ;; simplified into a bit-field insert operation that operates on the
 ;; integer side, since typically that would involve three inter-bank
@@ -7239,32 +7646,25 @@
    (match_operand:GPF 2 "nonmemory_operand")]
   "TARGET_SIMD"
 {
-  rtx signbit_const = GEN_INT (HOST_WIDE_INT_M1U
-			       << (GET_MODE_BITSIZE (<MODE>mode) - 1));
-  /* copysign (x, -1) should instead be expanded as orr with the sign
-     bit.  */
+  rtx sign = GEN_INT (HOST_WIDE_INT_M1U << (GET_MODE_BITSIZE (<MODE>mode) - 1));
+  rtx v_bitmask = gen_const_vec_duplicate (<VQ_INT_EQUIV>mode, sign);
+  v_bitmask = force_reg (<VQ_INT_EQUIV>mode, v_bitmask);
+
+  /* copysign (x, -1) should instead be expanded as orr with the signbit.  */
   rtx op2_elt = unwrap_const_vec_duplicate (operands[2]);
+
   if (GET_CODE (op2_elt) == CONST_DOUBLE
       && real_isneg (CONST_DOUBLE_REAL_VALUE (op2_elt)))
     {
-      rtx v_bitmask
-	= force_reg (V2<V_INT_EQUIV>mode,
-		     gen_const_vec_duplicate (V2<V_INT_EQUIV>mode,
-					      signbit_const));
-
-      emit_insn (gen_iorv2<v_int_equiv>3 (
-	lowpart_subreg (V2<V_INT_EQUIV>mode, operands[0], <MODE>mode),
-	lowpart_subreg (V2<V_INT_EQUIV>mode, operands[1], <MODE>mode),
+      emit_insn (gen_ior<vq_int_equiv>3 (
+	lowpart_subreg (<VQ_INT_EQUIV>mode, operands[0], <MODE>mode),
+	force_lowpart_subreg (<VQ_INT_EQUIV>mode, operands[1], <MODE>mode),
 	v_bitmask));
       DONE;
     }
-
-  machine_mode int_mode = <V_INT_EQUIV>mode;
-  rtx bitmask = gen_reg_rtx (int_mode);
-  emit_move_insn (bitmask, signbit_const);
   operands[2] = force_reg (<MODE>mode, operands[2]);
   emit_insn (gen_copysign<mode>3_insn (operands[0], operands[1], operands[2],
-				       bitmask));
+				       v_bitmask));
   DONE;
 }
 )
@@ -7273,23 +7673,21 @@
   [(set (match_operand:GPF 0 "register_operand")
 	(unspec:GPF [(match_operand:GPF 1 "register_operand")
 		     (match_operand:GPF 2 "register_operand")
-		     (match_operand:<V_INT_EQUIV> 3 "register_operand")]
+		     (match_operand:<VQ_INT_EQUIV> 3 "register_operand")]
 	 UNSPEC_COPYSIGN))]
   "TARGET_SIMD"
   {@ [ cons: =0 , 1 , 2 , 3 ; attrs: type  ]
      [ w        , w , w , 0 ; neon_bsl<q>  ] bsl\t%0.<Vbtype>, %2.<Vbtype>, %1.<Vbtype>
      [ w        , 0 , w , w ; neon_bsl<q>  ] bit\t%0.<Vbtype>, %2.<Vbtype>, %3.<Vbtype>
      [ w        , w , 0 , w ; neon_bsl<q>  ] bif\t%0.<Vbtype>, %1.<Vbtype>, %3.<Vbtype>
-     [ r        , r , 0 , X ; bfm          ] bfxil\t%<w1>0, %<w1>1, #0, <sizem1>
   }
 )
 
-
-;; For xorsign (x, y), we want to generate:
+;; For xorsignf (x, y), we want to generate:
 ;;
-;; LDR   d2, #1<<63
-;; AND   v3.8B, v1.8B, v2.8B
-;; EOR   v0.8B, v0.8B, v3.8B
+;;	movi    v31.4s, 0x80, lsl 24
+;;	and     v31.16b, v31.16b, v1.16b
+;;	eor     v0.16b, v31.16b, v0.16b
 ;;
 
 (define_expand "@xorsign<mode>3"
@@ -7299,8 +7697,8 @@
   "TARGET_SIMD"
 {
   rtx tmp = gen_reg_rtx (<VCONQ>mode);
-  rtx op1 = lowpart_subreg (<VCONQ>mode, operands[1], <MODE>mode);
-  rtx op2 = lowpart_subreg (<VCONQ>mode, operands[2], <MODE>mode);
+  rtx op1 = force_lowpart_subreg (<VCONQ>mode, operands[1], <MODE>mode);
+  rtx op2 = force_lowpart_subreg (<VCONQ>mode, operands[2], <MODE>mode);
   emit_insn (gen_xorsign3 (<VCONQ>mode, tmp, op1, op2));
   emit_move_insn (operands[0],
 		  lowpart_subreg (<MODE>mode, tmp, <VCONQ>mode));
@@ -7608,7 +8006,8 @@
     if (TARGET_SVE)
       {
 	rtx abi = aarch64_gen_callee_cookie (AARCH64_ISA_MODE,
-					     aarch64_tlsdesc_abi_id ());
+					     aarch64_tlsdesc_abi_id (),
+					     false);
 	rtx_insn *call
 	  = emit_call_insn (gen_tlsdesc_small_sve_<mode> (operands[0], abi));
 	RTL_CONST_CALL_P (call) = 1;
@@ -7671,11 +8070,11 @@
   [(set_attr "type" "f_cvtf2i")]
 )
 
-;; Pointer authentication patterns are always provided.  In architecture
-;; revisions prior to ARMv8.3-A these HINT instructions operate as NOPs.
+;; Pointer authentication patterns are always provided.  On targets that
+;; don't implement FEAT_PAuth these HINT instructions operate as NOPs.
 ;; This lets the user write portable software which authenticates pointers
-;; when run on something which implements ARMv8.3-A, and which runs
-;; correctly, but does not authenticate pointers, where ARMv8.3-A is not
+;; when run on something which implements FEAT_PAuth, and which runs
+;; correctly, but does not authenticate pointers, where FEAT_PAuth is not
 ;; implemented.
 
 ;; Signing/Authenticating R30 using SP as the salt.
@@ -7848,8 +8247,8 @@
 	     : gen_stack_protect_test_si) (operands[0], operands[1]));
 
   rtx cc_reg = gen_rtx_REG (CCmode, CC_REGNUM);
-  emit_jump_insn (gen_condjump (gen_rtx_EQ (VOIDmode, cc_reg, const0_rtx),
-				cc_reg, operands[2]));
+  emit_jump_insn (gen_aarch64_bcond (gen_rtx_EQ (VOIDmode, cc_reg, const0_rtx),
+				     cc_reg, operands[2]));
   DONE;
 })
 
@@ -8312,6 +8711,49 @@
   "msr\tnzcv, %0"
 )
 
+;; CHKFEAT instruction
+(define_insn "aarch64_chkfeat"
+  [(set (reg:DI R16_REGNUM)
+        (unspec_volatile:DI [(reg:DI R16_REGNUM)] UNSPECV_CHKFEAT))]
+  ""
+  "hint\\t40 // chkfeat x16"
+)
+
+;; Guarded Control Stack (GCS) instructions
+(define_insn "aarch64_load_gcspr"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec_volatile:DI [(const_int 0)] UNSPECV_GCSPR))]
+  ""
+  "mrs\\t%0, s3_3_c2_c5_1 // gcspr_el0"
+  [(set_attr "type" "mrs")]
+)
+
+(define_insn "aarch64_gcspopm"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(unspec_volatile:DI [(match_operand:DI 1 "register_operand" "0")] UNSPECV_GCSPOPM))]
+  ""
+  "sysl\\t%0, #3, c7, c7, #1 // gcspopm"
+)
+
+(define_insn "aarch64_gcspopm_xzr"
+  [(unspec_volatile [(const_int 0)] UNSPECV_GCSPOPM)]
+  ""
+  "sysl\\txzr, #3, c7, c7, #1 // gcspopm"
+)
+
+(define_insn "aarch64_gcsss1"
+  [(unspec_volatile [(match_operand:DI 0 "register_operand" "r")] UNSPECV_GCSSS1)]
+  ""
+  "sys\\t#3, c7, c7, #2, %0 // gcsss1"
+)
+
+(define_insn "aarch64_gcsss2"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+  	(unspec_volatile:DI [(match_operand:DI 1 "register_operand" "0")] UNSPECV_GCSSS2))]
+  ""
+  "sysl\\t%0, #3, c7, c7, #3 // gcsss2"
+)
+
 ;; AdvSIMD Stuff
 (include "aarch64-simd.md")
 
diff --git a/gcc/config/aarch64/aarch64.opt b/gcc/config/aarch64/aarch64.opt
index c2c9965..9ca753e 100644
--- a/gcc/config/aarch64/aarch64.opt
+++ b/gcc/config/aarch64/aarch64.opt
@@ -1,5 +1,5 @@
 ; Machine description for AArch64 architecture.
-; Copyright (C) 2009-2024 Free Software Foundation, Inc.
+; Copyright (C) 2009-2025 Free Software Foundation, Inc.
 ; Contributed by ARM Ltd.
 ;
 ; This file is part of GCC.
@@ -25,7 +25,7 @@ HeaderInclude
 config/arm/aarch-common.h
 
 TargetVariable
-enum aarch64_processor selected_tune = aarch64_none
+enum aarch64_cpu selected_tune = aarch64_no_cpu
 
 TargetVariable
 enum aarch64_arch selected_arch = aarch64_no_arch
@@ -46,6 +46,9 @@ TargetVariable
 unsigned aarch_enable_bti = 2
 
 TargetVariable
+unsigned aarch64_enable_gcs = 2
+
+TargetVariable
 enum aarch64_key_type aarch64_ra_sign_key = AARCH64_KEY_A
 
 ; The TLS dialect names to use with -mtls-dialect.
@@ -287,6 +290,17 @@ msve-vector-bits=
 Target RejectNegative Joined Enum(sve_vector_bits) Var(aarch64_sve_vector_bits) Init(SVE_SCALABLE)
 -msve-vector-bits=<number>	Set the number of bits in an SVE vector register.
 
+mautovec-preference=
+Target RejectNegative Joined Var(autovec_preference) Enum(aarch64_autovec_preference) Init(AARCH64_AUTOVEC_DEFAULT)
+-mautovec-preference=[default|asimd-only|sve-only|prefer-asimd|prefer-sve]
+Force an ISA selection strategy for auto-vectorization.  For best performance it
+is highly recommended to use -mcpu or -mtune instead.  This parameter should
+only be used for code exploration.
+
+mmax-vectorization
+Target Var(flag_aarch64_max_vectorization) Save
+Override the scalar cost model such that vectorization is always profitable.
+
 mverbose-cost-dump
 Target Undocumented Var(flag_aarch64_verbose_cost)
 Enables verbose cost model dumping in the debug dump files.
@@ -353,8 +367,8 @@ The number of Newton iterations for calculating the reciprocal for double type.
 
 -param=aarch64-autovec-preference=
 Target Joined Var(aarch64_autovec_preference) Enum(aarch64_autovec_preference) Init(AARCH64_AUTOVEC_DEFAULT) Param
---param=aarch64-autovec-preference=[default|asimd-only|sve-only|prefer-asimd|prefer-sve]
-Force an ISA selection strategy for auto-vectorization.
+An old alias for -mautovec-preference.  If both -mautovec-preference and
+--param=aarch64-autovec-preference are passed, the --param value will be used.
 
 Enum
 Name(aarch64_autovec_preference) Type(enum aarch64_autovec_preference_enum) UnknownError(unknown autovec preference %qs)
@@ -428,3 +442,7 @@ handling.  One means we try to form pairs involving one or more existing
 individual writeback accesses where possible.  A value of two means we
 also try to opportunistically form writeback opportunities by folding in
 trailing destructive updates of the base register used by a pair.
+
+Wexperimental-fmv-target
+Target Var(warn_experimental_fmv) Warning Init(1)
+Warn about usage of experimental Function Multi Versioning.
diff --git a/gcc/config/aarch64/aarch64.opt.urls b/gcc/config/aarch64/aarch64.opt.urls
index 4fa9038..7ec14a9 100644
--- a/gcc/config/aarch64/aarch64.opt.urls
+++ b/gcc/config/aarch64/aarch64.opt.urls
@@ -92,3 +92,6 @@ UrlSuffix(gcc/AArch64-Options.html#index-mstack-protector-guard-reg)
 mstack-protector-guard-offset=
 UrlSuffix(gcc/AArch64-Options.html#index-mstack-protector-guard-offset)
 
+Wexperimental-fmv-target
+UrlSuffix(gcc/AArch64-Options.html#index-Wexperimental-fmv-target)
+
diff --git a/gcc/config/aarch64/arm_acle.h b/gcc/config/aarch64/arm_acle.h
index ab4e7e6..507b6e7 100644
--- a/gcc/config/aarch64/arm_acle.h
+++ b/gcc/config/aarch64/arm_acle.h
@@ -1,6 +1,6 @@
 /* AArch64 Non-NEON ACLE intrinsics include file.
 
-   Copyright (C) 2014-2024 Free Software Foundation, Inc.
+   Copyright (C) 2014-2025 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
@@ -118,8 +118,22 @@ __revl (unsigned long __value)
     return __rev (__value);
 }
 
+__extension__ extern __inline double
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__sqrt (double __x)
+{
+  return __builtin_aarch64_sqrtdf (__x);
+}
+
+__extension__ extern __inline float
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__sqrtf (float __x)
+{
+  return __builtin_aarch64_sqrtsf (__x);
+}
+
 #pragma GCC push_options
-#pragma GCC target ("arch=armv8.3-a")
+#pragma GCC target ("+nothing+jscvt")
 __extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __jcvt (double __a)
@@ -129,8 +143,22 @@ __jcvt (double __a)
 
 #pragma GCC pop_options
 
+__extension__ extern __inline double
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__fma (double __x, double __y, double __z)
+{
+  return __builtin_fma (__x, __y, __z);
+}
+
+__extension__ extern __inline float
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__fmaf (float __x, float __y, float __z)
+{
+  return __builtin_fmaf (__x, __y, __z);
+}
+
 #pragma GCC push_options
-#pragma GCC target ("arch=armv8.5-a")
+#pragma GCC target ("+nothing+frintts")
 __extension__ extern __inline float
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __rint32zf (float __a)
@@ -190,8 +218,33 @@ __rint64x (double __a)
 
 #pragma GCC pop_options
 
+
 #pragma GCC push_options
+#pragma GCC target ("+nothing")
+
+/* Feature constants for CHKFEAT operation.  */
+#define _CHKFEAT_GCS 1
+
+__extension__ extern __inline uint64_t
+__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
+__chkfeat (uint64_t __feat)
+{
+  return __builtin_aarch64_chkfeat (__feat) ^ __feat;
+}
+
+#define __gcspr() \
+  __builtin_aarch64_gcspr ()
+
+#define __gcspopm() \
+  __builtin_aarch64_gcspopm ()
+
+#define __gcsss(__stack) \
+  __builtin_aarch64_gcsss (__stack)
 
+#pragma GCC pop_options
+
+
+#pragma GCC push_options
 #pragma GCC target ("+nothing+crc")
 
 __extension__ extern __inline uint32_t
diff --git a/gcc/config/aarch64/arm_bf16.h b/gcc/config/aarch64/arm_bf16.h
index afe6a22..eba1a4b 100644
--- a/gcc/config/aarch64/arm_bf16.h
+++ b/gcc/config/aarch64/arm_bf16.h
@@ -1,6 +1,6 @@
 /* Arm BF16 instrinsics include file.
 
-   Copyright (C) 2019-2024 Free Software Foundation, Inc.
+   Copyright (C) 2019-2025 Free Software Foundation, Inc.
    Contributed by Arm.
 
    This file is part of GCC.
diff --git a/gcc/config/aarch64/arm_fp16.h b/gcc/config/aarch64/arm_fp16.h
index b22da85..ef9d6f2 100644
--- a/gcc/config/aarch64/arm_fp16.h
+++ b/gcc/config/aarch64/arm_fp16.h
@@ -1,6 +1,6 @@
 /* ARM FP16 scalar intrinsics include file.
 
-   Copyright (C) 2016-2024 Free Software Foundation, Inc.
+   Copyright (C) 2016-2025 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index e376685..4899ace 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -1,6 +1,6 @@
 /* ARM NEON intrinsics include file.
 
-   Copyright (C) 2011-2024 Free Software Foundation, Inc.
+   Copyright (C) 2011-2025 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
@@ -30,55 +30,15 @@
 #pragma GCC push_options
 #pragma GCC target ("+nothing+simd")
 
+#include <stdint.h>
 #include <arm_private_fp8.h>
-#pragma GCC aarch64 "arm_neon.h"
+#include <arm_private_neon_types.h>
 
-#include <stdint.h>
+#pragma GCC aarch64 "arm_neon.h"
 
 #define __AARCH64_UINT64_C(__C) ((uint64_t) __C)
 #define __AARCH64_INT64_C(__C) ((int64_t) __C)
 
-typedef __Int8x8_t int8x8_t;
-typedef __Int16x4_t int16x4_t;
-typedef __Int32x2_t int32x2_t;
-typedef __Int64x1_t int64x1_t;
-typedef __Float16x4_t float16x4_t;
-typedef __Float32x2_t float32x2_t;
-typedef __Poly8x8_t poly8x8_t;
-typedef __Poly16x4_t poly16x4_t;
-typedef __Uint8x8_t uint8x8_t;
-typedef __Uint16x4_t uint16x4_t;
-typedef __Uint32x2_t uint32x2_t;
-typedef __Float64x1_t float64x1_t;
-typedef __Uint64x1_t uint64x1_t;
-typedef __Int8x16_t int8x16_t;
-typedef __Int16x8_t int16x8_t;
-typedef __Int32x4_t int32x4_t;
-typedef __Int64x2_t int64x2_t;
-typedef __Float16x8_t float16x8_t;
-typedef __Float32x4_t float32x4_t;
-typedef __Float64x2_t float64x2_t;
-typedef __Poly8x16_t poly8x16_t;
-typedef __Poly16x8_t poly16x8_t;
-typedef __Poly64x2_t poly64x2_t;
-typedef __Poly64x1_t poly64x1_t;
-typedef __Uint8x16_t uint8x16_t;
-typedef __Uint16x8_t uint16x8_t;
-typedef __Uint32x4_t uint32x4_t;
-typedef __Uint64x2_t uint64x2_t;
-
-typedef __Poly8_t poly8_t;
-typedef __Poly16_t poly16_t;
-typedef __Poly64_t poly64_t;
-typedef __Poly128_t poly128_t;
-
-typedef __fp16 float16_t;
-typedef float float32_t;
-typedef double float64_t;
-
-typedef __Bfloat16x4_t bfloat16x4_t;
-typedef __Bfloat16x8_t bfloat16x8_t;
-
 /* __aarch64_vdup_lane internal macros.  */
 #define __aarch64_vdup_lane_any(__size, __q, __a, __b) \
   vdup##__q##_n_##__size (__aarch64_vget_lane_any (__a, __b))
@@ -1904,35 +1864,35 @@ __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqadd_s8 (int8x8_t __a, int8x8_t __b)
 {
-  return (int8x8_t) __builtin_aarch64_sqaddv8qi (__a, __b);
+  return (int8x8_t) __builtin_aarch64_ssaddv8qi (__a, __b);
 }
 
 __extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqadd_s16 (int16x4_t __a, int16x4_t __b)
 {
-  return (int16x4_t) __builtin_aarch64_sqaddv4hi (__a, __b);
+  return (int16x4_t) __builtin_aarch64_ssaddv4hi (__a, __b);
 }
 
 __extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqadd_s32 (int32x2_t __a, int32x2_t __b)
 {
-  return (int32x2_t) __builtin_aarch64_sqaddv2si (__a, __b);
+  return (int32x2_t) __builtin_aarch64_ssaddv2si (__a, __b);
 }
 
 __extension__ extern __inline int64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqadd_s64 (int64x1_t __a, int64x1_t __b)
 {
-  return (int64x1_t) {__builtin_aarch64_sqadddi (__a[0], __b[0])};
+  return (int64x1_t) {__builtin_aarch64_ssadddi (__a[0], __b[0])};
 }
 
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqadd_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  return __builtin_aarch64_uqaddv8qi_uuu (__a, __b);
+  return __builtin_aarch64_usaddv8qi_uuu (__a, __b);
 }
 
 __extension__ extern __inline int8x8_t
@@ -2191,189 +2151,189 @@ __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqadd_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  return __builtin_aarch64_uqaddv4hi_uuu (__a, __b);
+  return __builtin_aarch64_usaddv4hi_uuu (__a, __b);
 }
 
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqadd_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  return __builtin_aarch64_uqaddv2si_uuu (__a, __b);
+  return __builtin_aarch64_usaddv2si_uuu (__a, __b);
 }
 
 __extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqadd_u64 (uint64x1_t __a, uint64x1_t __b)
 {
-  return (uint64x1_t) {__builtin_aarch64_uqadddi_uuu (__a[0], __b[0])};
+  return (uint64x1_t) {__builtin_aarch64_usadddi_uuu (__a[0], __b[0])};
 }
 
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqaddq_s8 (int8x16_t __a, int8x16_t __b)
 {
-  return (int8x16_t) __builtin_aarch64_sqaddv16qi (__a, __b);
+  return (int8x16_t) __builtin_aarch64_ssaddv16qi (__a, __b);
 }
 
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqaddq_s16 (int16x8_t __a, int16x8_t __b)
 {
-  return (int16x8_t) __builtin_aarch64_sqaddv8hi (__a, __b);
+  return (int16x8_t) __builtin_aarch64_ssaddv8hi (__a, __b);
 }
 
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqaddq_s32 (int32x4_t __a, int32x4_t __b)
 {
-  return (int32x4_t) __builtin_aarch64_sqaddv4si (__a, __b);
+  return (int32x4_t) __builtin_aarch64_ssaddv4si (__a, __b);
 }
 
 __extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqaddq_s64 (int64x2_t __a, int64x2_t __b)
 {
-  return (int64x2_t) __builtin_aarch64_sqaddv2di (__a, __b);
+  return (int64x2_t) __builtin_aarch64_ssaddv2di (__a, __b);
 }
 
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqaddq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  return __builtin_aarch64_uqaddv16qi_uuu (__a, __b);
+  return __builtin_aarch64_usaddv16qi_uuu (__a, __b);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqaddq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return __builtin_aarch64_uqaddv8hi_uuu (__a, __b);
+  return __builtin_aarch64_usaddv8hi_uuu (__a, __b);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqaddq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return __builtin_aarch64_uqaddv4si_uuu (__a, __b);
+  return __builtin_aarch64_usaddv4si_uuu (__a, __b);
 }
 
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqaddq_u64 (uint64x2_t __a, uint64x2_t __b)
 {
-  return __builtin_aarch64_uqaddv2di_uuu (__a, __b);
+  return __builtin_aarch64_usaddv2di_uuu (__a, __b);
 }
 
 __extension__ extern __inline int8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqsub_s8 (int8x8_t __a, int8x8_t __b)
 {
-  return (int8x8_t) __builtin_aarch64_sqsubv8qi (__a, __b);
+  return (int8x8_t) __builtin_aarch64_sssubv8qi (__a, __b);
 }
 
 __extension__ extern __inline int16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqsub_s16 (int16x4_t __a, int16x4_t __b)
 {
-  return (int16x4_t) __builtin_aarch64_sqsubv4hi (__a, __b);
+  return (int16x4_t) __builtin_aarch64_sssubv4hi (__a, __b);
 }
 
 __extension__ extern __inline int32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqsub_s32 (int32x2_t __a, int32x2_t __b)
 {
-  return (int32x2_t) __builtin_aarch64_sqsubv2si (__a, __b);
+  return (int32x2_t) __builtin_aarch64_sssubv2si (__a, __b);
 }
 
 __extension__ extern __inline int64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqsub_s64 (int64x1_t __a, int64x1_t __b)
 {
-  return (int64x1_t) {__builtin_aarch64_sqsubdi (__a[0], __b[0])};
+  return (int64x1_t) {__builtin_aarch64_sssubdi (__a[0], __b[0])};
 }
 
 __extension__ extern __inline uint8x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqsub_u8 (uint8x8_t __a, uint8x8_t __b)
 {
-  return __builtin_aarch64_uqsubv8qi_uuu (__a, __b);
+  return __builtin_aarch64_ussubv8qi_uuu (__a, __b);
 }
 
 __extension__ extern __inline uint16x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqsub_u16 (uint16x4_t __a, uint16x4_t __b)
 {
-  return __builtin_aarch64_uqsubv4hi_uuu (__a, __b);
+  return __builtin_aarch64_ussubv4hi_uuu (__a, __b);
 }
 
 __extension__ extern __inline uint32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqsub_u32 (uint32x2_t __a, uint32x2_t __b)
 {
-  return __builtin_aarch64_uqsubv2si_uuu (__a, __b);
+  return __builtin_aarch64_ussubv2si_uuu (__a, __b);
 }
 
 __extension__ extern __inline uint64x1_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqsub_u64 (uint64x1_t __a, uint64x1_t __b)
 {
-  return (uint64x1_t) {__builtin_aarch64_uqsubdi_uuu (__a[0], __b[0])};
+  return (uint64x1_t) {__builtin_aarch64_ussubdi_uuu (__a[0], __b[0])};
 }
 
 __extension__ extern __inline int8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqsubq_s8 (int8x16_t __a, int8x16_t __b)
 {
-  return (int8x16_t) __builtin_aarch64_sqsubv16qi (__a, __b);
+  return (int8x16_t) __builtin_aarch64_sssubv16qi (__a, __b);
 }
 
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqsubq_s16 (int16x8_t __a, int16x8_t __b)
 {
-  return (int16x8_t) __builtin_aarch64_sqsubv8hi (__a, __b);
+  return (int16x8_t) __builtin_aarch64_sssubv8hi (__a, __b);
 }
 
 __extension__ extern __inline int32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqsubq_s32 (int32x4_t __a, int32x4_t __b)
 {
-  return (int32x4_t) __builtin_aarch64_sqsubv4si (__a, __b);
+  return (int32x4_t) __builtin_aarch64_sssubv4si (__a, __b);
 }
 
 __extension__ extern __inline int64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqsubq_s64 (int64x2_t __a, int64x2_t __b)
 {
-  return (int64x2_t) __builtin_aarch64_sqsubv2di (__a, __b);
+  return (int64x2_t) __builtin_aarch64_sssubv2di (__a, __b);
 }
 
 __extension__ extern __inline uint8x16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqsubq_u8 (uint8x16_t __a, uint8x16_t __b)
 {
-  return __builtin_aarch64_uqsubv16qi_uuu (__a, __b);
+  return __builtin_aarch64_ussubv16qi_uuu (__a, __b);
 }
 
 __extension__ extern __inline uint16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqsubq_u16 (uint16x8_t __a, uint16x8_t __b)
 {
-  return __builtin_aarch64_uqsubv8hi_uuu (__a, __b);
+  return __builtin_aarch64_ussubv8hi_uuu (__a, __b);
 }
 
 __extension__ extern __inline uint32x4_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqsubq_u32 (uint32x4_t __a, uint32x4_t __b)
 {
-  return __builtin_aarch64_uqsubv4si_uuu (__a, __b);
+  return __builtin_aarch64_ussubv4si_uuu (__a, __b);
 }
 
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqsubq_u64 (uint64x2_t __a, uint64x2_t __b)
 {
-  return __builtin_aarch64_uqsubv2di_uuu (__a, __b);
+  return __builtin_aarch64_ussubv2di_uuu (__a, __b);
 }
 
 __extension__ extern __inline int8x8_t
@@ -17583,56 +17543,56 @@ __extension__ extern __inline int8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqaddb_s8 (int8_t __a, int8_t __b)
 {
-  return (int8_t) __builtin_aarch64_sqaddqi (__a, __b);
+  return (int8_t) __builtin_aarch64_ssaddqi (__a, __b);
 }
 
 __extension__ extern __inline int16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqaddh_s16 (int16_t __a, int16_t __b)
 {
-  return (int16_t) __builtin_aarch64_sqaddhi (__a, __b);
+  return (int16_t) __builtin_aarch64_ssaddhi (__a, __b);
 }
 
 __extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqadds_s32 (int32_t __a, int32_t __b)
 {
-  return (int32_t) __builtin_aarch64_sqaddsi (__a, __b);
+  return (int32_t) __builtin_aarch64_ssaddsi (__a, __b);
 }
 
 __extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqaddd_s64 (int64_t __a, int64_t __b)
 {
-  return __builtin_aarch64_sqadddi (__a, __b);
+  return __builtin_aarch64_ssadddi (__a, __b);
 }
 
 __extension__ extern __inline uint8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqaddb_u8 (uint8_t __a, uint8_t __b)
 {
-  return (uint8_t) __builtin_aarch64_uqaddqi_uuu (__a, __b);
+  return (uint8_t) __builtin_aarch64_usaddqi_uuu (__a, __b);
 }
 
 __extension__ extern __inline uint16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqaddh_u16 (uint16_t __a, uint16_t __b)
 {
-  return (uint16_t) __builtin_aarch64_uqaddhi_uuu (__a, __b);
+  return (uint16_t) __builtin_aarch64_usaddhi_uuu (__a, __b);
 }
 
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqadds_u32 (uint32_t __a, uint32_t __b)
 {
-  return (uint32_t) __builtin_aarch64_uqaddsi_uuu (__a, __b);
+  return (uint32_t) __builtin_aarch64_usaddsi_uuu (__a, __b);
 }
 
 __extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqaddd_u64 (uint64_t __a, uint64_t __b)
 {
-  return __builtin_aarch64_uqadddi_uuu (__a, __b);
+  return __builtin_aarch64_usadddi_uuu (__a, __b);
 }
 
 /* vqdmlal */
@@ -19282,56 +19242,56 @@ __extension__ extern __inline int8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqsubb_s8 (int8_t __a, int8_t __b)
 {
-  return (int8_t) __builtin_aarch64_sqsubqi (__a, __b);
+  return (int8_t) __builtin_aarch64_sssubqi (__a, __b);
 }
 
 __extension__ extern __inline int16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqsubh_s16 (int16_t __a, int16_t __b)
 {
-  return (int16_t) __builtin_aarch64_sqsubhi (__a, __b);
+  return (int16_t) __builtin_aarch64_sssubhi (__a, __b);
 }
 
 __extension__ extern __inline int32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqsubs_s32 (int32_t __a, int32_t __b)
 {
-  return (int32_t) __builtin_aarch64_sqsubsi (__a, __b);
+  return (int32_t) __builtin_aarch64_sssubsi (__a, __b);
 }
 
 __extension__ extern __inline int64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqsubd_s64 (int64_t __a, int64_t __b)
 {
-  return __builtin_aarch64_sqsubdi (__a, __b);
+  return __builtin_aarch64_sssubdi (__a, __b);
 }
 
 __extension__ extern __inline uint8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqsubb_u8 (uint8_t __a, uint8_t __b)
 {
-  return (uint8_t) __builtin_aarch64_uqsubqi_uuu (__a, __b);
+  return (uint8_t) __builtin_aarch64_ussubqi_uuu (__a, __b);
 }
 
 __extension__ extern __inline uint16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqsubh_u16 (uint16_t __a, uint16_t __b)
 {
-  return (uint16_t) __builtin_aarch64_uqsubhi_uuu (__a, __b);
+  return (uint16_t) __builtin_aarch64_ussubhi_uuu (__a, __b);
 }
 
 __extension__ extern __inline uint32_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqsubs_u32 (uint32_t __a, uint32_t __b)
 {
-  return (uint32_t) __builtin_aarch64_uqsubsi_uuu (__a, __b);
+  return (uint32_t) __builtin_aarch64_ussubsi_uuu (__a, __b);
 }
 
 __extension__ extern __inline uint64_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 vqsubd_u64 (uint64_t __a, uint64_t __b)
 {
-  return __builtin_aarch64_uqsubdi_uuu (__a, __b);
+  return __builtin_aarch64_ussubdi_uuu (__a, __b);
 }
 
 /* vqtbl2 */
@@ -26949,9 +26909,9 @@ vrax1q_u64 (uint64x2_t __a, uint64x2_t __b)
 
 __extension__ extern __inline uint64x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-vxarq_u64 (uint64x2_t __a, uint64x2_t __b, const int imm6)
+vxarq_u64 (uint64x2_t __a, uint64x2_t __b, const int __imm6)
 {
-  return __builtin_aarch64_xarqv2di_uuus (__a, __b,imm6);
+  return __builtin_aarch64_xarqv2di_uuus (__a, __b, __imm6);
 }
 
 __extension__ extern __inline uint8x16_t
@@ -27015,7 +26975,7 @@ vbcaxq_s64 (int64x2_t __a, int64x2_t __b, int64x2_t __c)
 /* AdvSIMD Complex numbers intrinsics.  */
 
 #pragma GCC push_options
-#pragma GCC target ("arch=armv8.3-a")
+#pragma GCC target ("+nothing+fcma")
 
 #pragma GCC push_options
 #pragma GCC target ("+fp16")
@@ -27678,7 +27638,7 @@ vfmlslq_laneq_high_f16 (float32x4_t __r, float16x8_t __a, float16x8_t __b,
 #pragma GCC pop_options
 
 #pragma GCC push_options
-#pragma GCC target ("arch=armv8.5-a")
+#pragma GCC target ("+nothing+simd+frintts")
 
 __extension__ extern __inline float32x2_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
diff --git a/gcc/config/aarch64/arm_neon_sve_bridge.h b/gcc/config/aarch64/arm_neon_sve_bridge.h
index a862acb..47093e1 100644
--- a/gcc/config/aarch64/arm_neon_sve_bridge.h
+++ b/gcc/config/aarch64/arm_neon_sve_bridge.h
@@ -1,5 +1,5 @@
 /* AArch64 NEON-SVE Bridge intrinsics include file.
-   Copyright (C) 2023-2024 Free Software Foundation, Inc.
+   Copyright (C) 2023-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/aarch64/arm_private_fp8.h b/gcc/config/aarch64/arm_private_fp8.h
index 5668cc2..3f60294 100644
--- a/gcc/config/aarch64/arm_private_fp8.h
+++ b/gcc/config/aarch64/arm_private_fp8.h
@@ -2,7 +2,7 @@
    Do not include this file directly. Use one of arm_neon.h
    arm_sme.h arm_sve.h instead.
 
-   Copyright (C) 2024 Free Software Foundation, Inc.
+   Copyright (C) 2024-2025 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
@@ -40,6 +40,8 @@ extern "C"
 {
 #endif
 
+  typedef __mfp8 mfloat8_t;
+
   typedef uint64_t fpm_t;
 
   enum __ARM_FPM_FORMAT
diff --git a/gcc/config/aarch64/arm_private_neon_types.h b/gcc/config/aarch64/arm_private_neon_types.h
new file mode 100644
index 0000000..ac8aa40
--- /dev/null
+++ b/gcc/config/aarch64/arm_private_neon_types.h
@@ -0,0 +1,79 @@
+/* AArch64 type definitions for arm_neon.h
+   Do not include this file directly. Use one of arm_neon.h, arm_sme.h,
+   or arm_sve.h instead.
+
+   Copyright (C) 2024-2025 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _GCC_ARM_PRIVATE_NEON_TYPES_H
+#define _GCC_ARM_PRIVATE_NEON_TYPES_H
+
+#if !defined(_AARCH64_NEON_H_) && !defined(_ARM_SVE_H_)
+#error "This file should not be used standalone. Please include one of arm_neon.h arm_sve.h arm_sme.h instead."
+#endif
+
+typedef __Int8x8_t int8x8_t;
+typedef __Int16x4_t int16x4_t;
+typedef __Int32x2_t int32x2_t;
+typedef __Int64x1_t int64x1_t;
+typedef __Float16x4_t float16x4_t;
+typedef __Float32x2_t float32x2_t;
+typedef __Poly8x8_t poly8x8_t;
+typedef __Poly16x4_t poly16x4_t;
+typedef __Uint8x8_t uint8x8_t;
+typedef __Uint16x4_t uint16x4_t;
+typedef __Uint32x2_t uint32x2_t;
+typedef __Float64x1_t float64x1_t;
+typedef __Uint64x1_t uint64x1_t;
+typedef __Int8x16_t int8x16_t;
+typedef __Int16x8_t int16x8_t;
+typedef __Int32x4_t int32x4_t;
+typedef __Int64x2_t int64x2_t;
+typedef __Float16x8_t float16x8_t;
+typedef __Float32x4_t float32x4_t;
+typedef __Float64x2_t float64x2_t;
+typedef __Poly8x16_t poly8x16_t;
+typedef __Poly16x8_t poly16x8_t;
+typedef __Poly64x2_t poly64x2_t;
+typedef __Poly64x1_t poly64x1_t;
+typedef __Uint8x16_t uint8x16_t;
+typedef __Uint16x8_t uint16x8_t;
+typedef __Uint32x4_t uint32x4_t;
+typedef __Uint64x2_t uint64x2_t;
+
+typedef __Poly8_t poly8_t;
+typedef __Poly16_t poly16_t;
+typedef __Poly64_t poly64_t;
+typedef __Poly128_t poly128_t;
+
+typedef __Mfloat8x8_t mfloat8x8_t;
+typedef __Mfloat8x16_t mfloat8x16_t;
+
+typedef __fp16 float16_t;
+typedef float float32_t;
+typedef double float64_t;
+
+typedef __Bfloat16x4_t bfloat16x4_t;
+typedef __Bfloat16x8_t bfloat16x8_t;
+
+#endif
diff --git a/gcc/config/aarch64/arm_sme.h b/gcc/config/aarch64/arm_sme.h
index 28f536a..33f583c 100644
--- a/gcc/config/aarch64/arm_sme.h
+++ b/gcc/config/aarch64/arm_sme.h
@@ -1,5 +1,5 @@
 /* AArch64 SME intrinsics include file.
-   Copyright (C) 2023-2024 Free Software Foundation, Inc.
+   Copyright (C) 2023-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/aarch64/arm_sve.h b/gcc/config/aarch64/arm_sve.h
index aa0bd99..4a8285e 100644
--- a/gcc/config/aarch64/arm_sve.h
+++ b/gcc/config/aarch64/arm_sve.h
@@ -1,5 +1,5 @@
 /* AArch64 SVE intrinsics include file.
-   Copyright (C) 2018-2024 Free Software Foundation, Inc.
+   Copyright (C) 2018-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -27,12 +27,9 @@
 
 #include <stdint.h>
 #include <arm_private_fp8.h>
+#include <arm_private_neon_types.h>
 #include <arm_bf16.h>
 
-typedef __fp16 float16_t;
-typedef float float32_t;
-typedef double float64_t;
-
 /* NOTE: This implementation of arm_sve.h is intentionally short.  It does
    not define the SVE types and intrinsic functions directly in C and C++
    code, but instead uses the following pragma to tell GCC to insert the
diff --git a/gcc/config/aarch64/atomics.md b/gcc/config/aarch64/atomics.md
index 32a0a72..36b0dbd 100644
--- a/gcc/config/aarch64/atomics.md
+++ b/gcc/config/aarch64/atomics.md
@@ -1,5 +1,5 @@
 ;; Machine description for AArch64 processor synchronization primitives.
-;; Copyright (C) 2009-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2009-2025 Free Software Foundation, Inc.
 ;; Contributed by ARM Ltd.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/aarch64/biarchilp32.h b/gcc/config/aarch64/biarchilp32.h
index 993ac78..311b7a8 100644
--- a/gcc/config/aarch64/biarchilp32.h
+++ b/gcc/config/aarch64/biarchilp32.h
@@ -1,7 +1,7 @@
 /* Make configure files to produce biarch compiler defaulting to ilp32 ABI.
    This file must be included very first, while the OS specific file later
    to overwrite otherwise wrong defaults.
-   Copyright (C) 2013-2024 Free Software Foundation, Inc.
+   Copyright (C) 2013-2025 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
 This file is part of GCC.
diff --git a/gcc/config/aarch64/biarchlp64.h b/gcc/config/aarch64/biarchlp64.h
index fd0762c..43dfbc9 100644
--- a/gcc/config/aarch64/biarchlp64.h
+++ b/gcc/config/aarch64/biarchlp64.h
@@ -1,7 +1,7 @@
 /* Make configure files to produce biarch compiler defaulting to ilp64 ABI.
    This file must be included very first, while the OS specific file later
    to overwrite otherwise wrong defaults.
-   Copyright (C) 2013-2024 Free Software Foundation, Inc.
+   Copyright (C) 2013-2025 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
 This file is part of GCC.
diff --git a/gcc/config/aarch64/check-sve-md.awk b/gcc/config/aarch64/check-sve-md.awk
index d774990..8867b57 100644
--- a/gcc/config/aarch64/check-sve-md.awk
+++ b/gcc/config/aarch64/check-sve-md.awk
@@ -1,5 +1,5 @@
 #!/usr/bin/awk -f
-# Copyright (C) 2019-2024 Free Software Foundation, Inc.
+# Copyright (C) 2019-2025 Free Software Foundation, Inc.
 #
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index f491e4b..dc1925d 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -1,5 +1,5 @@
 ;; Machine description for AArch64 architecture.
-;; Copyright (C) 2009-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2009-2025 Free Software Foundation, Inc.
 ;; Contributed by ARM Ltd.
 ;;
 ;; This file is part of GCC.
@@ -304,6 +304,24 @@
   (and (match_code "const_int")
        (match_test "(unsigned HOST_WIDE_INT) ival <= 7")))
 
+(define_constraint "Uc0"
+  "@internal
+  A constraint that matches the integers 0...63."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 63)")))
+
+(define_constraint "Uc1"
+  "@internal
+  A constraint that matches the integers 1...64."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 1, 64)")))
+
+(define_constraint "Uc2"
+  "@internal
+  A constraint that matches the integers -1...62."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -1, 62)")))
+
 (define_constraint "Up3"
   "@internal
   A constraint that matches the integers 2^(0...4)."
@@ -464,21 +482,32 @@
   "@internal
    A constraint that matches vector of immediates for orr."
  (and (match_code "const_vector")
-      (match_test "aarch64_simd_valid_immediate (op, NULL,
-						 AARCH64_CHECK_ORR)")))
+      (match_test "aarch64_simd_valid_orr_imm (op)")))
+
+(define_constraint "Df"
+  "@internal
+   A constraint that matches a vector of immediates for and which can be
+   optimized as fmov."
+ (and (match_code "const_vector")
+      (match_test "aarch64_simd_valid_and_imm_fmov (op)")))
 
 (define_constraint "Db"
   "@internal
-   A constraint that matches vector of immediates for bic."
+   A constraint that matches vector of immediates for and/bic."
+ (and (match_code "const_vector")
+      (match_test "aarch64_simd_valid_and_imm (op)")))
+
+(define_constraint "De"
+  "@internal
+   A constraint that matches vector of immediates for xor."
  (and (match_code "const_vector")
-      (match_test "aarch64_simd_valid_immediate (op, NULL,
-						 AARCH64_CHECK_BIC)")))
+      (match_test "aarch64_simd_valid_xor_imm (op)")))
 
 (define_constraint "Dn"
   "@internal
  A constraint that matches vector of immediates."
  (and (match_code "const,const_vector")
-      (match_test "aarch64_simd_valid_immediate (op, NULL)")))
+      (match_test "aarch64_simd_valid_mov_imm (op)")))
 
 (define_constraint "Dh"
   "@internal
diff --git a/gcc/config/aarch64/cortex-a57-fma-steering.cc b/gcc/config/aarch64/cortex-a57-fma-steering.cc
index b6e74da..fd6da66 100644
--- a/gcc/config/aarch64/cortex-a57-fma-steering.cc
+++ b/gcc/config/aarch64/cortex-a57-fma-steering.cc
@@ -1,5 +1,5 @@
 /* FMA steering optimization pass for Cortex-A57.
-   Copyright (C) 2015-2024 Free Software Foundation, Inc.
+   Copyright (C) 2015-2025 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
diff --git a/gcc/config/aarch64/cygming.h b/gcc/config/aarch64/cygming.h
index 9ce140a..7e2203c 100644
--- a/gcc/config/aarch64/cygming.h
+++ b/gcc/config/aarch64/cygming.h
@@ -1,6 +1,6 @@
 /* Operating system specific defines to be used when targeting GCC for
    hosting on Windows32, using a Unix style C library and tools.
-   Copyright (C) 1995-2024 Free Software Foundation, Inc.
+   Copyright (C) 1995-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -21,8 +21,13 @@ along with GCC; see the file COPYING3.  If not see
 #ifndef GCC_AARCH64_CYGMING_H
 #define GCC_AARCH64_CYGMING_H
 
+#define DWARF2_DEBUGGING_INFO 1
+
 #undef PREFERRED_DEBUGGING_TYPE
-#define PREFERRED_DEBUGGING_TYPE DINFO_TYPE_NONE
+#define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
+
+#undef DWARF2_UNWIND_INFO
+#define DWARF2_UNWIND_INFO 0
 
 #define FASTCALL_PREFIX '@'
 
@@ -73,7 +78,39 @@ still needed for compilation.  */
 
 /* Declare the type properly for any external libcall.  */
 #define ASM_OUTPUT_EXTERNAL_LIBCALL(FILE, FUN) \
-  mingw_pe_declare_function_type (FILE, XSTR (FUN, 0), 1)
+  mingw_pe_declare_type (FILE, XSTR (FUN, 0), 1, 1)
+
+/* Use section relative relocations for debugging offsets.  Unlike
+   other targets that fake this by putting the section VMA at 0, PE
+   won't allow it.  */
+#define ASM_OUTPUT_DWARF_OFFSET(FILE, SIZE, LABEL, OFFSET, SECTION) \
+  do {								\
+    switch (SIZE)						\
+      {								\
+      case 4:							\
+	fputs ("\t.secrel32\t", FILE);				\
+	assemble_name (FILE, LABEL);				\
+	if ((OFFSET) != 0)					\
+	  fprintf (FILE, "+" HOST_WIDE_INT_PRINT_DEC,		\
+		   (HOST_WIDE_INT) (OFFSET));			\
+	break;							\
+      case 8:							\
+	/* This is a hack.  There is no 64-bit section relative	\
+	   relocation.  However, the COFF format also does not	\
+	   support 64-bit file offsets; 64-bit applications are	\
+	   limited to 32-bits of code+data in any one module.	\
+	   Fake the 64-bit offset by zero-extending it.  */	\
+	fputs ("\t.secrel32\t", FILE);				\
+	assemble_name (FILE, LABEL);				\
+	if ((OFFSET) != 0)					\
+	  fprintf (FILE, "+" HOST_WIDE_INT_PRINT_DEC,		\
+		   (HOST_WIDE_INT) (OFFSET));			\
+	fputs ("\n\t.long\t0", FILE);				\
+	break;							\
+      default:							\
+	gcc_unreachable ();					\
+      }								\
+  } while (0)
 
 #define TARGET_OS_CPP_BUILTINS()					\
   do									\
@@ -171,10 +208,27 @@ still needed for compilation.  */
     mingw_handle_selectany_attribute, NULL }
 
 #undef SUB_TARGET_RECORD_STUB
-#define SUB_TARGET_RECORD_STUB mingw_pe_record_stub
+#define SUB_TARGET_RECORD_STUB(NAME, DECL) mingw_pe_record_stub((NAME), \
+  DECL_WEAK ((DECL)))
 
 #define SUPPORTS_ONE_ONLY 1
 
+#undef ASM_DECLARE_OBJECT_NAME
+#define ASM_DECLARE_OBJECT_NAME(STREAM, NAME, DECL)			\
+  do {									\
+    mingw_pe_declare_type (STREAM, NAME, TREE_PUBLIC (DECL), 0);	\
+    ASM_OUTPUT_LABEL ((STREAM), (NAME));				\
+  } while (0)
+
+
+#undef ASM_DECLARE_FUNCTION_NAME
+#define ASM_DECLARE_FUNCTION_NAME(STREAM, NAME, DECL)	\
+  do {									\
+    mingw_pe_declare_type (STREAM, NAME, TREE_PUBLIC (DECL), 1);	\
+    aarch64_declare_function_name (STREAM, NAME, DECL);			\
+  } while (0)
+
+
 /* Define this to be nonzero if static stack checking is supported.  */
 #define STACK_CHECK_STATIC_BUILTIN 1
 
@@ -186,8 +240,15 @@ still needed for compilation.  */
 #undef GOT_ALIAS_SET
 #define GOT_ALIAS_SET mingw_GOT_alias_set ()
 
-#define PE_COFF_LEGITIMIZE_EXTERN_DECL 1
+#define PE_COFF_LEGITIMIZE_EXTERN_DECL(RTX) \
+  (GET_CODE (RTX) == SYMBOL_REF && SYMBOL_REF_WEAK (RTX))
 
 #define HAVE_64BIT_POINTERS 1
 
+/* Kludge because of missing PE-COFF support for early LTO debug.  */
+#undef  TARGET_ASM_LTO_START
+#define TARGET_ASM_LTO_START mingw_pe_asm_lto_start
+#undef  TARGET_ASM_LTO_END
+#define TARGET_ASM_LTO_END mingw_pe_asm_lto_end
+
 #endif
diff --git a/gcc/config/aarch64/driver-aarch64.cc b/gcc/config/aarch64/driver-aarch64.cc
index b620351..0333746 100644
--- a/gcc/config/aarch64/driver-aarch64.cc
+++ b/gcc/config/aarch64/driver-aarch64.cc
@@ -1,5 +1,5 @@
 /* Native CPU detection for aarch64.
-   Copyright (C) 2015-2024 Free Software Foundation, Inc.
+   Copyright (C) 2015-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -60,9 +60,10 @@ struct aarch64_core_data
 #define ALL_VARIANTS ((unsigned)-1)
 /* Default architecture to use if -mcpu=native did not detect a known CPU.  */
 #define DEFAULT_ARCH "8A"
+#define DEFAULT_CPU "generic-armv8-a"
 
 #define AARCH64_CORE(CORE_NAME, CORE_IDENT, SCHED, ARCH, FLAGS, COSTS, IMP, PART, VARIANT) \
-  { CORE_NAME, #ARCH, IMP, PART, VARIANT, feature_deps::cpu_##CORE_IDENT },
+  { CORE_NAME, #ARCH, IMP, PART, unsigned(VARIANT), feature_deps::cpu_##CORE_IDENT },
 
 static CONSTEXPR const aarch64_core_data aarch64_cpu_data[] =
 {
@@ -106,6 +107,19 @@ get_arch_from_id (const char* id)
   return NULL;
 }
 
+/* Return an aarch64_core_data for the cpu described
+   by ID, or NULL if ID describes something we don't know about.  */
+
+static const aarch64_core_data *
+get_cpu_from_id (const char* name)
+{
+  for (unsigned i = 0; aarch64_cpu_data[i].name != NULL; i++)
+    if (strcmp (name, aarch64_cpu_data[i].name) == 0)
+      return &aarch64_cpu_data[i];
+
+  return NULL;
+}
+
 /* Check wether the CORE array is the same as the big.LITTLE BL_CORE.
    For an example CORE={0xd08, 0xd03} and
    BL_CORE=AARCH64_BIG_LITTLE (0xd08, 0xd03) will return true.  */
@@ -256,9 +270,9 @@ host_detect_local_cpu (int argc, const char **argv)
   bool cpu = false;
   unsigned int i = 0;
   unsigned char imp = INVALID_IMP;
-  unsigned int cores[2] = { INVALID_CORE, INVALID_CORE };
+  unsigned int cores[3] = { INVALID_CORE, INVALID_CORE, INVALID_CORE };
   unsigned int n_cores = 0;
-  unsigned int variants[2] = { ALL_VARIANTS, ALL_VARIANTS };
+  unsigned int variants[3] = { ALL_VARIANTS, ALL_VARIANTS, ALL_VARIANTS };
   unsigned int n_variants = 0;
   bool processed_exts = false;
   aarch64_feature_flags extension_flags = 0;
@@ -314,7 +328,7 @@ host_detect_local_cpu (int argc, const char **argv)
 	  unsigned cvariant = parse_field (buf);
 	  if (!contains_core_p (variants, cvariant))
 	    {
-              if (n_variants == 2)
+	      if (n_variants == 3)
                 goto not_found;
 
               variants[n_variants++] = cvariant;
@@ -326,7 +340,7 @@ host_detect_local_cpu (int argc, const char **argv)
 	  unsigned ccore = parse_field (buf);
 	  if (!contains_core_p (cores, ccore))
 	    {
-	      if (n_cores == 2)
+	      if (n_cores == 3)
 		goto not_found;
 
 	      cores[n_cores++] = ccore;
@@ -383,11 +397,15 @@ host_detect_local_cpu (int argc, const char **argv)
   /* Weird cpuinfo format that we don't know how to handle.  */
   if (n_cores == 0
       || n_cores > 2
-      || (n_cores == 1 && n_variants != 1)
       || imp == INVALID_IMP
       || !processed_exts)
     goto not_found;
 
+  /* If we have one core type but multiple variants, consider
+     that as one variant with ALL_VARIANTS instead.  */
+  if (n_cores == 1 && n_variants != 1)
+    variants[0] = ALL_VARIANTS;
+
   /* Simple case, one core type or just looking for the arch. */
   if (n_cores == 1 || arch)
     {
@@ -399,18 +417,11 @@ host_detect_local_cpu (int argc, const char **argv)
                 || variants[0] == aarch64_cpu_data[i].variant))
 	  break;
 
-      if (aarch64_cpu_data[i].name == NULL)
+      if (arch)
 	{
-	  auto arch_info = get_arch_from_id (DEFAULT_ARCH);
-
-	  gcc_assert (arch_info);
-
-	  res = concat ("-march=", arch_info->name, NULL);
-	  default_flags = arch_info->flags;
-	}
-      else if (arch)
-	{
-	  const char *arch_id = aarch64_cpu_data[i].arch;
+	  const char *arch_id = (aarch64_cpu_data[i].name
+				 ? aarch64_cpu_data[i].arch
+				 : DEFAULT_ARCH);
 	  auto arch_info = get_arch_from_id (arch_id);
 
 	  /* We got some arch indentifier that's not in aarch64-arches.def?  */
@@ -420,12 +431,15 @@ host_detect_local_cpu (int argc, const char **argv)
 	  res = concat ("-march=", arch_info->name, NULL);
 	  default_flags = arch_info->flags;
 	}
-      else
+      else if (cpu || aarch64_cpu_data[i].name)
 	{
-	  default_flags = aarch64_cpu_data[i].flags;
+	  auto cpu_info = (aarch64_cpu_data[i].name
+			   ? &aarch64_cpu_data[i]
+			   : get_cpu_from_id (DEFAULT_CPU));
+	  default_flags = cpu_info->flags;
 	  res = concat ("-m",
 			cpu ? "cpu" : "tune", "=",
-			aarch64_cpu_data[i].name,
+			cpu_info->name,
 			NULL);
 	}
     }
@@ -445,6 +459,20 @@ host_detect_local_cpu (int argc, const char **argv)
 	      break;
 	    }
 	}
+
+      /* On big.LITTLE if we find any unknown CPUs we can still pick arch
+	 features as the cores should have the same features.  So just pick
+	 the feature flags from any of the cpus.  */
+      if (cpu && aarch64_cpu_data[i].name == NULL)
+	{
+	  auto cpu_info = get_cpu_from_id (DEFAULT_CPU);
+
+	  gcc_assert (cpu_info);
+
+	  res = concat ("-mcpu=", cpu_info->name, NULL);
+	  default_flags = cpu_info->flags;
+	}
+
       if (!res)
 	goto not_found;
     }
@@ -476,4 +504,3 @@ not_found:
     return NULL;
   }
 }
-
diff --git a/gcc/config/aarch64/falkor-tag-collision-avoidance.cc b/gcc/config/aarch64/falkor-tag-collision-avoidance.cc
deleted file mode 100644
index c58a3001..0000000
--- a/gcc/config/aarch64/falkor-tag-collision-avoidance.cc
+++ /dev/null
@@ -1,888 +0,0 @@
-/* Tag Collision Avoidance pass for Falkor.
-   Copyright (C) 2018-2024 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful, but
-   WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-   General Public License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with GCC; see the file COPYING3.  If not see
-   <http://www.gnu.org/licenses/>.  */
-
-#define IN_TARGET_CODE 1
-
-#include "config.h"
-#define INCLUDE_LIST
-#include "system.h"
-#include "coretypes.h"
-#include "backend.h"
-#include "target.h"
-#include "rtl.h"
-#include "tree.h"
-#include "tree-pass.h"
-#include "aarch64-protos.h"
-#include "hash-map.h"
-#include "cfgloop.h"
-#include "cfgrtl.h"
-#include "rtl-iter.h"
-#include "df.h"
-#include "memmodel.h"
-#include "optabs.h"
-#include "regs.h"
-#include "recog.h"
-#include "function-abi.h"
-#include "regrename.h"
-#include "print-rtl.h"
-
-/* The Falkor hardware prefetching system uses the encoding of the registers
-   and offsets of loads to decide which of the multiple hardware prefetchers to
-   assign the load to.  This has the positive effect of accelerating prefetches
-   when all related loads with uniform strides are assigned to the same
-   prefetcher unit.  The down side is that because of the way the assignment
-   works, multiple unrelated loads may end up on the same prefetch unit, thus
-   causing the unit to bounce between different sets of addresses and never
-   train correctly.  The point of this pass is to avoid such collisions so that
-   unrelated loads are spread out to different prefetchers.  It also makes a
-   rudimentary attempt to ensure that related loads with the same tags don't
-   get moved out unnecessarily.
-
-   Perhaps a future enhancement would be to make a more concerted attempt to
-   get related loads under the same tag.  See the memcpy/memset implementation
-   for falkor in glibc to understand the kind of impact this can have on
-   falkor.
-
-   The assignment of loads is based on a tag that is computed from the encoding
-   of the first destination register (the only destination in case of LDR), the
-   base register and the offset (either the register or the immediate value, as
-   encoded in the instruction).  This is what the 14 bit tag looks like:
-
-   |<- 6 bits ->|<- 4b ->|<- 4b ->|
-   --------------------------------
-   |  OFFSET    |  SRC   |  DST   |
-   --------------------------------
-
-   For all cases, the SRC and DST are the 4 LSB of the encoding of the register
-   in the instruction.  Offset computation is more involved and is as follows:
-
-   - For register offset addressing: 4 LSB of the offset register with the MSB
-     of the 6 bits set to 1.
-
-   - For immediate offset: 4 LSB of the encoded immediate offset.  The encoding
-     depends on the width of the load and is expressed as multiples of the
-     width.
-
-   - For loads with update: 4 LSB of the offset.  The encoding here is the
-     exact number by which the base is offset and incremented.
-
-   Based on the above it is clear that registers 0 and 16 will result in
-   collisions, 1 and 17 and so on.  This pass detects such collisions within a
-   def/use chain of the source register in a loop and tries to resolve the
-   collision by renaming one of the destination registers.  */
-
-/* Get the destination part of the tag.  */
-#define TAG_GET_DEST(__tag) ((__tag) & 0xf)
-
-/* Get the tag with the destination part updated.  */
-#define TAG_UPDATE_DEST(__tag, __dest) (((__tag) & ~0xf) | (__dest & 0xf))
-
-#define MAX_PREFETCH_STRIDE 2048
-
-/* The instruction information structure.  This is used to cache information
-   about the INSN that we derive when traversing through all of the insns in
-   loops.  */
-class tag_insn_info
-{
-public:
-  rtx_insn *insn;
-  rtx dest;
-  rtx base;
-  rtx offset;
-  bool writeback;
-  bool ldp;
-
-  tag_insn_info (rtx_insn *i, rtx d, rtx b, rtx o, bool w, bool p)
-    : insn (i), dest (d), base (b), offset (o), writeback (w), ldp (p)
-  {}
-
-  /* Compute the tag based on BASE, DEST and OFFSET of the load.  */
-  unsigned tag ()
-    {
-      unsigned int_offset = 0;
-      rtx offset = this->offset;
-      unsigned dest = REGNO (this->dest);
-      unsigned base = REGNO (this->base);
-      machine_mode dest_mode = GET_MODE (this->dest);
-
-      /* Falkor does not support SVE; GET_LOAD_INFO ensures that the
-	 destination mode is constant here.  */
-      unsigned dest_mode_size = GET_MODE_SIZE (dest_mode).to_constant ();
-
-      /* For loads of larger than 16 bytes, the DEST part of the tag is 0.  */
-      if ((dest_mode_size << this->ldp) > 16)
-	dest = 0;
-
-      if (offset && REG_P (offset))
-	int_offset = (1 << 5) | REGNO (offset);
-      else if (offset && CONST_INT_P (offset))
-	{
-	  int_offset = INTVAL (offset);
-	  int_offset /= dest_mode_size;
-	  if (!this->writeback)
-	    int_offset >>= 2;
-	}
-      return ((dest & 0xf)
-	      | ((base & 0xf) << 4)
-	      | ((int_offset & 0x3f) << 8));
-    }
-};
-
-/* Hash map to traverse and process instructions with colliding tags.  */
-typedef hash_map <rtx, auto_vec <tag_insn_info *> > tag_map_t;
-
-/* Vector of instructions with colliding tags.  */
-typedef auto_vec <tag_insn_info *> insn_info_list_t;
-
-/* Pair of instruction information and unavailable register set to pass to
-   CHECK_COLLIDING_TAGS.  */
-typedef std::pair <tag_insn_info *, HARD_REG_SET *> arg_pair_t;
-
-
-/* Callback to free all tag_insn_info objects.  */
-bool
-free_insn_info (const rtx &t ATTRIBUTE_UNUSED, insn_info_list_t *v,
-		void *arg ATTRIBUTE_UNUSED)
-{
-  while (v->length () > 0)
-    delete v->pop ();
-
-  return true;
-}
-
-
-/* Add all aliases of the register to the unavailable register set.  REG is the
-   smallest register number that can then be used to reference its aliases.
-   UNAVAILABLE is the hard register set to add the ignored register numbers to
-   and MODE is the mode in which the registers would have been used.  */
-static void
-ignore_all_aliases (HARD_REG_SET *unavailable, machine_mode mode, unsigned reg)
-{
-  add_to_hard_reg_set (unavailable, mode, reg);
-  add_to_hard_reg_set (unavailable, mode, reg + 16);
-  add_to_hard_reg_set (unavailable, mode, reg + 32);
-  add_to_hard_reg_set (unavailable, mode, reg + 48);
-}
-
-
-/* Callback to check which destination registers are unavailable to us for
-   renaming because of the base and offset colliding.  This is a callback that
-   gets called for every name value pair (T, V) in the TAG_MAP.  The ARG is an
-   std::pair of the tag_insn_info of the original insn and the hard register
-   set UNAVAILABLE that is used to record hard register numbers that cannot be
-   used for the renaming.  This always returns true since we want to traverse
-   through the entire TAG_MAP.  */
-bool
-check_colliding_tags (const rtx &t, const insn_info_list_t &v, arg_pair_t *arg)
-{
-  HARD_REG_SET *unavailable = arg->second;
-  unsigned orig_tag = arg->first->tag ();
-  unsigned tag = INTVAL (t);
-  machine_mode mode = GET_MODE (arg->first->dest);
-
-  /* Can't collide with emptiness.  */
-  if (v.length () == 0)
-    return true;
-
-  /* Drop all aliased destination registers that result in the same
-     tag.  It is not necessary to drop all of them but we do anyway
-     because it is quicker than checking ranges.  */
-  if (TAG_UPDATE_DEST (tag, 0) == TAG_UPDATE_DEST (orig_tag, 0))
-    ignore_all_aliases (unavailable, mode, TAG_GET_DEST (tag));
-
-  return true;
-}
-
-
-/* Initialize and build a set of hard register numbers UNAVAILABLE to avoid for
-   renaming.  INSN_INFO is the original insn, TAG_MAP is the map of the list of
-   insns indexed by their tags, HEAD is the def/use chain head of the
-   destination register of the original insn.  The routine returns the super
-   class of register classes that may be used during the renaming.  */
-static enum reg_class
-init_unavailable (tag_insn_info *insn_info, tag_map_t &tag_map, du_head_p head,
-		  HARD_REG_SET *unavailable)
-{
-  unsigned dest = head->regno;
-  enum reg_class super_class = NO_REGS;
-  machine_mode mode = GET_MODE (insn_info->dest);
-
-  CLEAR_HARD_REG_SET (*unavailable);
-
-  for (struct du_chain *tmp = head->first; tmp; tmp = tmp->next_use)
-    {
-      if (DEBUG_INSN_P (tmp->insn))
-	continue;
-
-      *unavailable |= ~reg_class_contents[tmp->cl];
-      super_class = reg_class_superunion[(int) super_class][(int) tmp->cl];
-    }
-
-  for (unsigned i = 0; i < FIRST_PSEUDO_REGISTER; i++)
-    if (fixed_regs[i] || global_regs[i])
-      add_to_hard_reg_set (unavailable, mode, i);
-
-  arg_pair_t arg = arg_pair_t (insn_info, unavailable);
-
-  /* Exclude all registers that would lead to collisions with other loads.  */
-  tag_map.traverse <arg_pair_t *, check_colliding_tags> (&arg);
-
-  /* Finally, also ignore all aliases of the current reg.  */
-  ignore_all_aliases (unavailable, mode, dest & 0xf);
-
-  return super_class;
-}
-
-
-/* Find a suitable and available register and rename the chain of occurrences
-   of the register  defined in the def/use chain headed by HEAD in which INSN
-   exists.  CUR_TAG, TAGS and TAG_MAP are used to determine which registers are
-   unavailable due to a potential collision due to the rename.  The routine
-   returns the register number in case of a successful rename or -1 to indicate
-   failure.  */
-static int
-rename_chain (tag_insn_info *insn_info, tag_map_t &tag_map, du_head_p head)
-{
-  unsigned dest_regno = head->regno;
-
-  if (head->cannot_rename || head->renamed)
-    return -1;
-
-  HARD_REG_SET unavailable;
-
-  enum reg_class super_class = init_unavailable (insn_info, tag_map, head,
-						 &unavailable);
-
-  unsigned new_regno = find_rename_reg (head, super_class, &unavailable,
-					dest_regno, false);
-
-  /* Attempt to rename as long as regrename doesn't just throw the same
-     register at us.  */
-  if (new_regno != dest_regno && regrename_do_replace (head, new_regno))
-    {
-      if (dump_file && (dump_flags & TDF_DETAILS))
-	  fprintf (dump_file, "\tInsn %d: Renamed %d to %d\n",
-		   INSN_UID (insn_info->insn), dest_regno, new_regno);
-
-      return new_regno;
-    }
-
-  return -1;
-}
-
-
-/* Return true if REGNO is not safe to rename.  */
-static bool
-unsafe_rename_p (unsigned regno)
-{
-  /* Avoid renaming registers used for argument passing and return value.  In
-     future we could be a little less conservative and walk through the basic
-     blocks to see if there are any call or syscall sites.  */
-  if (regno <= R8_REGNUM
-      || (regno >= V0_REGNUM && regno < V8_REGNUM))
-    return true;
-
-  /* Don't attempt to rename registers that may have specific meanings.  */
-  switch (regno)
-    {
-    case LR_REGNUM:
-    case HARD_FRAME_POINTER_REGNUM:
-    case FRAME_POINTER_REGNUM:
-    case STACK_POINTER_REGNUM:
-      return true;
-    }
-
-  return false;
-}
-
-
-/* Go through the def/use chains for the register and find the chain for this
-   insn to rename.  The function returns the hard register number in case of a
-   successful rename and -1 otherwise.  */
-static int
-rename_dest (tag_insn_info *insn_info, tag_map_t &tag_map)
-{
-  struct du_chain *chain = NULL;
-  du_head_p head = NULL;
-  int i;
-
-  unsigned dest_regno = REGNO (insn_info->dest);
-
-  if (unsafe_rename_p (dest_regno))
-    return -1;
-
-  /* Search the chain where this instruction is (one of) the root.  */
-  rtx_insn *insn = insn_info->insn;
-  operand_rr_info *dest_op_info = insn_rr[INSN_UID (insn)].op_info;
-
-  for (i = 0; i < dest_op_info->n_chains; i++)
-    {
-      /* The register tracked by this chain does not match the
-	 destination register of insn.  */
-      if (dest_op_info->heads[i]->regno != dest_regno)
-	continue;
-
-      head = dest_op_info->heads[i];
-      /* The chain was merged in another, find the new head.  */
-      if (!head->first)
-	head = regrename_chain_from_id (head->id);
-
-      for (chain = head->first; chain; chain = chain->next_use)
-	/* Found the insn in the chain, so try renaming the register in this
-	   chain.  */
-	if (chain->insn == insn)
-	  return rename_chain (insn_info, tag_map, head);
-    }
-
-  return -1;
-}
-
-
-/* Flag to track if the map has changed.  */
-static bool map_changed = false;
-
-/* The actual reallocation logic.  For each vector of collisions V, try to
-   resolve the collision by attempting to rename the destination register of
-   all but one of the loads.  This is a callback that is invoked for each
-   name-value pair (T, V) in TAG_MAP.  The function returns true whenever it
-   returns unchanged and false otherwise to halt traversal.  */
-bool
-avoid_collisions_1 (const rtx &t, insn_info_list_t *v, tag_map_t *tag_map)
-{
-  /* We need at least two loads to cause a tag collision, return unchanged.  */
-  if (v->length () < 2)
-    return true;
-
-  tag_insn_info *vec_start = v->pop ();
-  tag_insn_info *insn_info = vec_start;
-
-  /* Try to rename at least one register to reduce the collision.  If we
-     iterate all the way through, we end up dropping one of the loads from the
-     list.  This is fine because we want at most one element to ensure that a
-     subsequent rename attempt does not end up worsening the collision.  */
-  do
-    {
-      int new_regno;
-
-      if ((new_regno = rename_dest (insn_info, *tag_map)) != -1)
-	{
-	  rtx new_tag = GEN_INT (TAG_UPDATE_DEST (INTVAL (t), new_regno));
-
-	  tag_map->get_or_insert (new_tag).safe_push (insn_info);
-	  df_set_regs_ever_live (new_regno, true);
-	  map_changed = true;
-	  return false;
-	}
-
-      v->safe_insert (0, insn_info);
-      insn_info = v->pop ();
-    }
-  while (insn_info != vec_start);
-
-  if (dump_file)
-    fprintf (dump_file, "\t>> Failed to rename destination in insn %d\n\t>>",
-	     INSN_UID (insn_info->insn));
-
-  /* Drop the last element and move on to the next tag.  */
-  delete insn_info;
-  return true;
-}
-
-
-/* For each set of collisions, attempt to rename the registers or insert a move
-   to avoid the collision.  We repeatedly traverse through TAG_MAP using
-   AVOID_COLLISIONS_1 trying to rename registers to avoid collisions until a
-   full traversal results in no change in the map.  */
-static void
-avoid_collisions (tag_map_t &tag_map)
-{
-  do
-    {
-      map_changed = false;
-      tag_map.traverse <tag_map_t *, avoid_collisions_1> (&tag_map);
-    }
-  while (map_changed);
-}
-
-
-
-/* Find the use def chain in which INSN exists and then see if there is a
-   definition inside the loop and outside it.  We use this as a simple
-   approximation to determine whether the base register is an IV.  The basic
-   idea is to find INSN in the use-def chains for its base register and find
-   all definitions that reach it.  Of all these definitions, there should be at
-   least one definition that is a simple addition of a constant value, either
-   as a binary operation or a pre or post update.
-
-   The function returns true if the base register is estimated to be an IV.  */
-static bool
-iv_p (rtx_insn *insn, rtx reg, struct loop *loop)
-{
-  df_ref ause;
-  unsigned regno = REGNO (reg);
-
-  /* Ignore loads from the stack.  */
-  if (regno == SP_REGNUM)
-    return false;
-
-  for (ause = DF_REG_USE_CHAIN (regno); ause; ause = DF_REF_NEXT_REG (ause))
-    {
-      if (!DF_REF_INSN_INFO (ause)
-	  || !NONDEBUG_INSN_P (DF_REF_INSN (ause)))
-	continue;
-
-      if (insn != DF_REF_INSN (ause))
-	continue;
-
-      struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn);
-      df_ref def_rec;
-
-      FOR_EACH_INSN_INFO_DEF (def_rec, insn_info)
-	{
-	  rtx_insn *insn = DF_REF_INSN (def_rec);
-	  basic_block bb = BLOCK_FOR_INSN (insn);
-
-	  if (dominated_by_p (CDI_DOMINATORS, bb, loop->header)
-	      && bb->loop_father == loop)
-	    {
-	      if (recog_memoized (insn) < 0)
-		continue;
-
-	      rtx pat = PATTERN (insn);
-
-	      /* Prefetch or clobber; unlikely to be a constant stride.  The
-		 falkor software prefetcher tuning is pretty conservative, so
-		 its presence indicates that the access pattern is probably
-		 strided but most likely with an unknown stride size or a
-		 stride size that is quite large.  */
-	      if (GET_CODE (pat) != SET)
-		continue;
-
-	      rtx x = SET_SRC (pat);
-	      if (GET_CODE (x) == ZERO_EXTRACT
-		  || GET_CODE (x) == ZERO_EXTEND
-		  || GET_CODE (x) == SIGN_EXTEND)
-		x = XEXP (x, 0);
-
-	      /* Loading the value from memory; unlikely to be a constant
-		 stride.  */
-	      if (MEM_P (x))
-		continue;
-
-	      /* An increment or decrement by a constant MODE_SIZE amount or
-		 the result of a binary expression is likely to be an IV.  */
-	      if (GET_CODE (x) == POST_INC
-		  || GET_CODE (x) == POST_DEC
-		  || GET_CODE (x) == PRE_INC
-		  || GET_CODE (x) == PRE_DEC)
-		return true;
-	      else if (BINARY_P (x)
-		       && (CONST_INT_P (XEXP (x, 0))
-			   || CONST_INT_P (XEXP (x, 1))))
-		{
-		  rtx stride = (CONST_INT_P (XEXP (x, 0))
-				? XEXP (x, 0) : XEXP (x, 1));
-
-		  /* Don't bother with very long strides because the prefetcher
-		     is unable to train on them anyway.  */
-		  if (INTVAL (stride) < MAX_PREFETCH_STRIDE)
-		    return true;
-		}
-	    }
-	}
-      return false;
-    }
-  return false;
-}
-
-
-/* Return true if SRC is a strided load in the LOOP, false otherwise.
-   If it is a strided load, set the BASE and OFFSET.  Also, if this is
-   a pre/post increment load, set PRE_POST to true.  */
-static bool
-valid_src_p (rtx src, rtx_insn *insn, struct loop *loop, bool *pre_post,
-	     rtx *base, rtx *offset, bool load_pair)
-{
-  subrtx_var_iterator::array_type array;
-  rtx x = NULL_RTX;
-
-  FOR_EACH_SUBRTX_VAR (iter, array, src, NONCONST)
-    if (MEM_P (*iter))
-      {
-	x = *iter;
-	break;
-      }
-
-  if (!x)
-    return false;
-
-  struct aarch64_address_info addr;
-  machine_mode mode = GET_MODE (x);
-
-  if (!aarch64_classify_address (&addr, XEXP (x, 0), mode, true))
-    return false;
-
-  if (addr.type != ADDRESS_REG_IMM
-      && addr.type != ADDRESS_REG_WB
-      && addr.type != ADDRESS_REG_REG
-      && addr.type != ADDRESS_REG_UXTW
-      && addr.type != ADDRESS_REG_SXTW)
-    return false;
-
-  unsigned regno = REGNO (addr.base);
-  if (global_regs[regno] || fixed_regs[regno])
-    return false;
-
-  if (addr.type == ADDRESS_REG_WB)
-    {
-      unsigned code = GET_CODE (XEXP (x, 0));
-
-      *pre_post = true;
-      *base = addr.base;
-
-      if (code == PRE_MODIFY || code == POST_MODIFY)
-	*offset = addr.offset;
-      else
-	{
-	  /*Writeback is only supported for fixed-width modes.  */
-	  unsigned int_offset = GET_MODE_SIZE (mode).to_constant ();
-
-	  /* For post-incremented load pairs we would increment the base twice
-	     over, so make that adjustment.  */
-	  if (load_pair && (code == POST_INC || code == POST_DEC))
-	    int_offset *= 2;
-
-	  *offset = GEN_INT (int_offset);
-	}
-      return true;
-    }
-  else if (addr.type == ADDRESS_REG_IMM || addr.type == ADDRESS_REG_REG)
-    {
-      /* Check if the load is strided.  */
-      if (!iv_p (insn, addr.base, loop))
-	return false;
-
-      *base = addr.base;
-      *offset = addr.offset;
-      return true;
-    }
-
-  return false;
-}
-
-
-/* Return true if INSN is a strided load in LOOP.  If it is a strided load, set
-   the DEST, BASE and OFFSET.  Also, if this is a pre/post increment load, set
-   PRE_POST to true.
-
-   The routine does checks on the destination of the insn and depends on
-   STRIDED_LOAD_P to check the source and fill in the BASE and OFFSET.  */
-static bool
-get_load_info (rtx_insn *insn, struct loop *loop, rtx *dest, rtx *base,
-	       rtx *offset, bool *pre_post, bool *ldp)
-{
-  if (!INSN_P (insn) || recog_memoized (insn) < 0)
-    return false;
-
-  rtx pat = PATTERN (insn);
-  unsigned code = GET_CODE (pat);
-  bool load_pair = (code == PARALLEL);
-
-  /* For a load pair we need only the first base and destination
-     registers.  We however need to ensure that our pre/post increment
-     offset is doubled; we do that in STRIDED_LOAD_P.  */
-  if (load_pair)
-    {
-      pat = XVECEXP (pat, 0, 0);
-      code = GET_CODE (pat);
-    }
-
-  if (code != SET)
-    return false;
-
-  rtx dest_rtx = SET_DEST (pat);
-
-  if (!REG_P (dest_rtx))
-    return false;
-
-  unsigned regno = REGNO (dest_rtx);
-  machine_mode mode = GET_MODE (dest_rtx);
-  machine_mode inner_mode = GET_MODE_INNER (mode);
-
-  /* Falkor does not support SVE vectors.  */
-  if (!GET_MODE_SIZE (mode).is_constant ())
-    return false;
-
-  /* Ignore vector struct or lane loads.  */
-  if (GET_MODE_SIZE (mode).to_constant ()
-      != GET_MODE_SIZE (inner_mode).to_constant ())
-    return false;
-
-  /* The largest width we want to bother with is a load of a pair of
-     quad-words.  */
-  if ((GET_MODE_SIZE (mode).to_constant () << load_pair)
-      > GET_MODE_SIZE (OImode))
-    return false;
-
-  /* Ignore loads into the stack pointer because it is unlikely to be a
-     stream.  */
-  if (regno == SP_REGNUM)
-    return false;
-
-  if (valid_src_p (SET_SRC (pat), insn, loop, pre_post, base, offset,
-		   load_pair))
-    {
-      *dest = dest_rtx;
-      *ldp = load_pair;
-
-      return true;
-    }
-
-  return false;
-}
-
-
-/* Return whether INSN and CAND are in the same def/use chain.  */
-static bool
-in_same_chain (rtx_insn *insn, rtx_insn *cand, unsigned regno)
-{
-  struct du_chain *chain = NULL;
-  du_head_p head = NULL;
-  int i;
-
-  /* Search the chain where this instruction is (one of) the root.  */
-  operand_rr_info *op_info = insn_rr[INSN_UID (insn)].op_info;
-
-  for (i = 0; i < op_info->n_chains; i++)
-    {
-      /* The register tracked by this chain does not match the
-	 dest register of insn.  */
-      if (op_info->heads[i]->regno != regno)
-	continue;
-
-      head = op_info->heads[i];
-      /* The chain was merged in another, find the new head.  */
-      if (!head->first)
-	head = regrename_chain_from_id (head->id);
-
-      bool found_insn = false, found_cand = false;
-
-      for (chain = head->first; chain; chain = chain->next_use)
-	{
-	  rtx *loc = &SET_DEST (PATTERN (chain->insn));
-
-	  if (chain->loc != loc)
-	    continue;
-
-	  if (chain->insn == insn)
-	    found_insn = true;
-
-	  if (chain->insn == cand)
-	    found_cand = true;
-
-	  if (found_insn && found_cand)
-	    return true;
-	}
-    }
-
-  return false;
-}
-
-
-/* Callback function to traverse the tag map and drop loads that have the same
-   destination and are in the same chain of occurrence.  Routine always returns
-   true to allow traversal through all of TAG_MAP.  */
-bool
-single_dest_per_chain (const rtx &t ATTRIBUTE_UNUSED, insn_info_list_t *v,
-		       void *arg ATTRIBUTE_UNUSED)
-{
-  for (int i = v->length () - 1; i>= 1; i--)
-    {
-      tag_insn_info *insn_info = (*v)[i];
-
-      for (int j = v->length () - 2; j >= 0; j--)
-	{
-	  /* Filter out destinations in the same chain.  */
-	  if (in_same_chain (insn_info->insn, (*v)[j]->insn,
-			     REGNO (insn_info->dest)))
-	    {
-	      v->ordered_remove (j);
-	      i = v->length ();
-	      break;
-	    }
-	}
-    }
-
-  return true;
-}
-
-
-/* Callback invoked for each name-value pair (T, INSN_INFO) to dump the insn
-   list INSN_INFO for tag T.  */
-bool
-dump_insn_list (const rtx &t, const insn_info_list_t &insn_info,
-		void *unused ATTRIBUTE_UNUSED)
-{
-  gcc_assert (dump_file);
-  fprintf (dump_file, "Tag 0x" HOST_WIDE_INT_PRINT_HEX_PURE " ::\n", INTVAL (t));
-
-  for (unsigned i = 0; i < insn_info.length (); i++)
-    dump_insn_slim (dump_file, insn_info[i]->insn);
-
-  fprintf (dump_file, "\n");
-
-  return true;
-}
-
-
-/* Record all loads in LOOP into TAG_MAP indexed by the falkor hardware
-   prefetcher memory tags.  */
-static void
-record_loads (tag_map_t &tag_map, struct loop *loop)
-{
-  rtx_insn *insn;
-  basic_block *body, bb;
-
-  body = get_loop_body (loop);
-
-  for (unsigned i = 0; i < loop->num_nodes; i++)
-    {
-      bb = body[i];
-      FOR_BB_INSNS (bb, insn)
-	{
-	  rtx base = NULL_RTX;
-	  rtx dest = NULL_RTX;
-	  rtx offset = NULL_RTX;
-	  bool writeback = false;
-	  bool ldp = false;
-
-	  if (!INSN_P (insn) || DEBUG_INSN_P (insn))
-	    continue;
-
-	  if (get_load_info (insn, loop, &dest, &base, &offset, &writeback,
-			     &ldp))
-	    {
-	      tag_insn_info *i = new tag_insn_info (insn, dest, base, offset,
-						    writeback, ldp);
-	      rtx tag = GEN_INT (i->tag ());
-	      tag_map.get_or_insert (tag).safe_push (i);
-	    }
-	}
-    }
-
-  if (dump_file)
-    {
-      fprintf (dump_file, "Loop %d: Tag map generated.\n", loop->num);
-      tag_map.traverse <void *, dump_insn_list> (NULL);
-    }
-
-  /* Try to reduce the dataset before launching into the rename attempt.  Drop
-     destinations in the same collision chain that appear in the same def/use
-     chain, all as defs.  These chains will move together in a rename so
-     there's no point in keeping both in there.  */
-  tag_map.traverse <void *, single_dest_per_chain> (NULL);
-}
-
-
-/* Tag collision avoidance pass for Falkor.  The pass runs in two phases for
-   each loop; the first phase collects all loads that we consider as
-   interesting for renaming into a tag-indexed map of lists.  The second phase
-   renames the destination register of the loads in an attempt to spread out
-   the loads into different tags.  */
-void
-execute_tag_collision_avoidance ()
-{
-  df_set_flags (DF_RD_PRUNE_DEAD_DEFS);
-  df_chain_add_problem (DF_UD_CHAIN);
-  df_compute_regs_ever_live (true);
-  df_note_add_problem ();
-  df_analyze ();
-  df_set_flags (DF_DEFER_INSN_RESCAN);
-
-  regrename_init (true);
-  regrename_analyze (NULL);
-
-  compute_bb_for_insn ();
-  calculate_dominance_info (CDI_DOMINATORS);
-  loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
-
-  for (auto loop : loops_list (cfun, LI_FROM_INNERMOST))
-    {
-      tag_map_t tag_map (512);
-
-      record_loads (tag_map, loop);
-      avoid_collisions (tag_map);
-      if (dump_file)
-	{
-	  fprintf (dump_file, "Loop %d: Completed rename.\n", loop->num);
-	  tag_map.traverse <void *, dump_insn_list> (NULL);
-	}
-      tag_map.traverse <void *, free_insn_info> (NULL);
-    }
-
-  loop_optimizer_finalize ();
-  free_dominance_info (CDI_DOMINATORS);
-  regrename_finish ();
-}
-
-
-const pass_data pass_data_tag_collision_avoidance =
-{
-  RTL_PASS, /* type */
-  "tag_collision_avoidance", /* name */
-  OPTGROUP_NONE, /* optinfo_flags */
-  TV_NONE, /* tv_id */
-  0, /* properties_required */
-  0, /* properties_provided */
-  0, /* properties_destroyed */
-  0, /* todo_flags_start */
-  TODO_df_finish, /* todo_flags_finish */
-};
-
-
-class pass_tag_collision_avoidance : public rtl_opt_pass
-{
-public:
-  pass_tag_collision_avoidance (gcc::context *ctxt)
-    : rtl_opt_pass (pass_data_tag_collision_avoidance, ctxt)
-  {}
-
-  /* opt_pass methods: */
-  virtual bool gate (function *)
-    {
-      return ((aarch64_tune_params.extra_tuning_flags
-	       & AARCH64_EXTRA_TUNE_RENAME_LOAD_REGS)
-	      && optimize >= 2);
-    }
-
-  virtual unsigned int execute (function *)
-    {
-      execute_tag_collision_avoidance ();
-      return 0;
-    }
-
-}; // class pass_tag_collision_avoidance
-
-
-/* Create a new pass instance.  */
-rtl_opt_pass *
-make_pass_tag_collision_avoidance (gcc::context *ctxt)
-{
-  return new pass_tag_collision_avoidance (ctxt);
-}
diff --git a/gcc/config/aarch64/falkor.md b/gcc/config/aarch64/falkor.md
deleted file mode 100644
index 0c5cf93..0000000
--- a/gcc/config/aarch64/falkor.md
+++ /dev/null
@@ -1,687 +0,0 @@
-;; Falkor pipeline description
-;; Copyright (C) 2017-2024 Free Software Foundation, Inc.
-;;
-;; This file is part of GCC.
-;;
-;; GCC is free software; you can redistribute it and/or modify it
-;; under the terms of the GNU General Public License as published by
-;; the Free Software Foundation; either version 3, or (at your option)
-;; any later version.
-;;
-;; GCC is distributed in the hope that it will be useful, but
-;; WITHOUT ANY WARRANTY; without even the implied warranty of
-;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-;; General Public License for more details.
-;;
-;; You should have received a copy of the GNU General Public License
-;; along with GCC; see the file COPYING3.  If not see
-;; <http://www.gnu.org/licenses/>.
-
-(define_automaton "falkor")
-
-;; Complex int instructions (e.g. multiply and divide) execute in the X
-;; pipeline.  Simple int instructions execute in the X, Y, and Z pipelines.
-
-(define_cpu_unit "falkor_x" "falkor")
-(define_cpu_unit "falkor_y" "falkor")
-(define_cpu_unit "falkor_z" "falkor")
-
-;; Branches execute in the B pipeline or in one of the int pipelines depending
-;; on how complex it is.  Simple int insns (like movz) can also execute here.
-
-(define_cpu_unit "falkor_b" "falkor")
-
-;; Vector and FP insns execute in the VX and VY pipelines.
-
-(define_automaton "falkor_vfp")
-
-(define_cpu_unit "falkor_vx" "falkor_vfp")
-(define_cpu_unit "falkor_vy" "falkor_vfp")
-
-;; Loads execute in the LD pipeline.
-;; Stores execute in the ST, SD, and VSD pipelines, for address, data, and
-;; vector data.
-
-(define_automaton "falkor_mem")
-
-(define_cpu_unit "falkor_ld" "falkor_mem")
-(define_cpu_unit "falkor_st" "falkor_mem")
-(define_cpu_unit "falkor_sd" "falkor_mem")
-(define_cpu_unit "falkor_vsd" "falkor_mem")
-
-;; The GTOV and VTOG pipelines are for general to vector reg moves, and vice
-;; versa.
-
-(define_cpu_unit "falkor_gtov" "falkor")
-(define_cpu_unit "falkor_vtog" "falkor")
-
-;; Common reservation combinations.
-
-(define_reservation "falkor_vxvy" "falkor_vx|falkor_vy")
-(define_reservation "falkor_zb"   "falkor_z|falkor_b")
-(define_reservation "falkor_xyz"  "falkor_x|falkor_y|falkor_z")
-(define_reservation "falkor_xyzb" "falkor_x|falkor_y|falkor_z|falkor_b")
-
-;; SIMD Floating-Point Instructions
-
-(define_insn_reservation "falkor_afp_1_vxvy" 1
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_fp_neg_s,neon_fp_neg_d,neon_fp_abs_s,neon_fp_abs_d"))
-  "falkor_vxvy")
-
-(define_insn_reservation "falkor_afp_1_vxvy_vxvy" 1
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_fp_neg_s_q,neon_fp_neg_d_q,neon_fp_abs_s_q,neon_fp_abs_d_q"))
-  "falkor_vxvy+falkor_vxvy")
-
-(define_insn_reservation "falkor_afp_2_vxvy" 2
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_fp_minmax_s,neon_fp_minmax_d,neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_d,neon_fp_compare_s,neon_fp_compare_d,neon_fp_round_s,neon_fp_round_d"))
-  "falkor_vxvy")
-
-(define_insn_reservation "falkor_afp_2_vxvy_vxvy" 2
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_fp_minmax_s_q,neon_fp_minmax_d_q,neon_fp_compare_s_q,neon_fp_compare_d_q,neon_fp_round_s_q,neon_fp_round_d_q"))
-  "falkor_vxvy+falkor_vxvy")
-
-(define_insn_reservation "falkor_afp_3_vxvy" 3
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_fp_reduc_minmax_s_q,neon_fp_reduc_minmax_d_q,neon_fp_abd_s,neon_fp_abd_d,neon_fp_addsub_s,neon_fp_addsub_d,neon_fp_reduc_add_s,neon_fp_reduc_add_d"))
-  "falkor_vxvy")
-
-(define_insn_reservation "falkor_afp_3_vxvy_vxvy" 3
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_fp_abd_s_q,neon_fp_abd_d_q,neon_fp_addsub_s_q,neon_fp_addsub_d_q,neon_fp_reduc_add_s_q,neon_fp_reduc_add_d_q"))
-  "falkor_vxvy+falkor_vxvy")
-
-(define_insn_reservation "falkor_afp_4_vxvy" 4
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_fp_to_int_s,neon_fp_to_int_d,neon_int_to_fp_s,neon_int_to_fp_d,neon_fp_cvt_widen_h,neon_fp_cvt_widen_s"))
-  "falkor_vxvy")
-
-(define_insn_reservation "falkor_afp_4_vxvy_vxvy" 4
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_fp_to_int_s_q,neon_fp_to_int_d_q,neon_int_to_fp_s_q,neon_int_to_fp_d_q"))
-  "falkor_vxvy+falkor_vxvy")
-
-(define_insn_reservation "falkor_afp_5_vxvy_mul" 5
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_fp_mul_s,neon_fp_mul_s_scalar"))
-  "falkor_vxvy")
-
-(define_insn_reservation "falkor_afp_5_vxvy_mla" 5
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_fp_mla_s,neon_fp_mla_s_scalar"))
-  "falkor_vxvy")
-
-(define_insn_reservation "falkor_afp_5_vxvy_vxvy_mul" 5
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_fp_mul_s_q,neon_fp_mul_s_scalar_q"))
-  "falkor_vxvy")
-
-(define_insn_reservation "falkor_afp_5_vxvy_vxvy_mla" 5
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_fp_mla_s_q,neon_fp_mla_s_scalar_q"))
-  "falkor_vxvy")
-
-(define_insn_reservation "falkor_afp_6_vxvy_mul" 6
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_fp_mul_d"))
-  "falkor_vxvy")
-
-(define_insn_reservation "falkor_afp_6_vxvy_mla" 6
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_fp_mla_d"))
-  "falkor_vxvy")
-
-(define_insn_reservation "falkor_afp_6_vxvy_vxvy_mul" 6
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_fp_mul_d_q,neon_fp_mul_d_scalar_q"))
-  "falkor_vxvy+falkor_vxvy")
-
-(define_insn_reservation "falkor_afp_6_vxvy_vxvy_mla" 6
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_fp_mla_d_q,neon_fp_mla_d_scalar_q"))
-  "falkor_vxvy+falkor_vxvy")
-
-(define_insn_reservation "falkor_afp_4_vxvy_vxvy_vxvy" 4
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_fp_cvt_narrow_s_q,neon_fp_cvt_narrow_d_q"))
-  "falkor_vxvy+falkor_vxvy,falkor_vxvy")
-
-(define_insn_reservation "falkor_afp_6_vx_vy" 6
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_fp_div_s"))
-  "falkor_vx+falkor_vy")
-
-(define_insn_reservation "falkor_afp_11_vx_vy" 11
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_fp_div_d"))
-  "falkor_vx+falkor_vy")
-
-(define_insn_reservation "falkor_afp_6_vx_vy_vx_vy" 6
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_fp_div_s_q"))
-  "(falkor_vx+falkor_vy),(falkor_vx+falkor_vy)")
-
-(define_insn_reservation "falkor_afp_11_vx_vy_vx_vy" 11
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_fp_div_d_q"))
-  "(falkor_vx+falkor_vy),(falkor_vx+falkor_vy)")
-
-(define_insn_reservation "falkor_afp_12_vx_vy" 12
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_fp_sqrt_s"))
-  "falkor_vx+falkor_vy")
-
-(define_insn_reservation "falkor_afp_22_vx_vy" 22
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_fp_sqrt_d"))
-  "falkor_vx+falkor_vy")
-
-(define_insn_reservation "falkor_afp_12_vx_vy_vx_vy" 12
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_fp_sqrt_s_q"))
-  "(falkor_vx+falkor_vy),(falkor_vx+falkor_vy)")
-
-(define_insn_reservation "falkor_afp_22_vx_vy_vx_vy" 22
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_fp_sqrt_d_q"))
-  "(falkor_vx+falkor_vy),(falkor_vx+falkor_vy)")
-
-;; SIMD Integer Instructions
-
-(define_insn_reservation "falkor_ai_1_vxvy" 1
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_add,neon_reduc_add,neon_logic,neon_neg,neon_sub"))
-  "falkor_vxvy")
-
-(define_insn_reservation "falkor_ai_1_vxvy_vxvy" 1
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_shift_imm_long,neon_add_q,neon_reduc_add_q,neon_logic_q,neon_neg_q,neon_sub_q"))
-  "falkor_vxvy+falkor_vxvy")
-
-(define_insn_reservation "falkor_ai_2_vxvy" 2
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_add_long,neon_sub_long,neon_add_halve,neon_sub_halve,neon_shift_imm,neon_shift_reg,neon_minmax,neon_abs,neon_compare,neon_compare_zero,neon_tst"))
-  "falkor_vxvy")
-
-(define_insn_reservation "falkor_ai_2_vxvy_vxvy" 2
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_add_halve_q,neon_sub_halve_q,neon_shift_imm_q,neon_shift_reg_q,neon_minmax_q,neon_abs_q,neon_compare_q,neon_compare_zero_q,neon_tst_q,neon_reduc_add_long"))
-  "falkor_vxvy+falkor_vxvy")
-
-(define_insn_reservation "falkor_ai_3_vxvy" 3
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_shift_acc,neon_reduc_add_acc,neon_abd,neon_qadd,neon_qsub,neon_qabs,neon_qneg,neon_sat_shift_imm,neon_sat_shift_imm_narrow_q,neon_sat_shift_reg,neon_reduc_minmax"))
-  "falkor_vxvy")
-
-(define_insn_reservation "falkor_ai_4_vxvy" 4
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_reduc_minmax_q"))
-  "falkor_vxvy")
-
-(define_insn_reservation "falkor_ai_3_vxvy_vxvy" 3
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_shift_acc_q,neon_reduc_add_acc_q,neon_abd_q,neon_abd_long,neon_qadd_q,neon_qsub_q,neon_qabs_q,neon_qneg_q,neon_sat_shift_imm_q,neon_sat_shift_reg_q"))
-  "falkor_vxvy+falkor_vxvy")
-
-(define_insn_reservation "falkor_ai_4_vxvy_mul" 4
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_mul_b,neon_mul_h,neon_mul_s,neon_mul_h_scalar,neon_mul_s_scalar,neon_sat_mul_b,neon_sat_mul_h,neon_sat_mul_s,neon_sat_mul_h_scalar,neon_sat_mul_s_scalar"))
-  "falkor_vxvy")
-
-(define_insn_reservation "falkor_ai_4_vxvy_mla" 4
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_mla_b,neon_mla_h,neon_mla_s,neon_mla_h_scalar,neon_mla_s_scalar"))
-  "falkor_vxvy")
-
-(define_insn_reservation "falkor_ai_4_vxvy_vxvy_mul" 4
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_mul_b_q,neon_mul_h_q,neon_mul_s_q,neon_mul_h_scalar_q,neon_mul_s_scalar_q,neon_sat_mul_b_q,neon_sat_mul_h_q,neon_sat_mul_s_q,neon_mul_b_long,neon_mul_h_long,neon_mul_s_long,neon_mul_d_long,neon_mul_h_scalar_long,neon_mul_s_scalar_long,neon_sat_mul_b_long,neon_sat_mul_h_long,neon_sat_mul_s_long,neon_sat_mul_h_scalar_q,neon_sat_mul_s_scalar_q,neon_sat_mul_h_scalar_long,neon_sat_mul_s_scalar_long"))
-  "falkor_vxvy+falkor_vxvy")
-
-(define_insn_reservation "falkor_ai_4_vxvy_vxvy_mla" 4
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_mla_b_q,neon_mla_h_q,neon_mla_s_q,neon_mla_h_scalar_q,neon_mla_s_scalar_q,neon_mla_b_long,neon_mla_h_long,neon_mla_s_long,neon_mla_h_scalar_long,neon_mla_s_scalar_long,neon_sat_mla_b_long,neon_sat_mla_h_long,neon_sat_mla_s_long,neon_sat_mla_h_scalar_long,neon_sat_mla_s_scalar_long"))
-  "falkor_vxvy+falkor_vxvy")
-
-(define_insn_reservation "falkor_ai_4_vxvy_vxvy" 4
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_add_halve_narrow_q,neon_sub_halve_narrow_q,neon_arith_acc"))
-  "falkor_vxvy+falkor_vxvy")
-
-(define_insn_reservation "falkor_2_ai_vxvy_vxvy_vxvy_vxvy" 2
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_add_widen,neon_sub_widen"))
-  "(falkor_vxvy+falkor_vxvy),(falkor_vxvy+falkor_vxvy)")
-
-(define_insn_reservation "falkor_4_ai_vxvy_vxvy_vxvy_vxvy" 4
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_arith_acc_q"))
-  "(falkor_vxvy+falkor_vxvy),(falkor_vxvy+falkor_vxvy)")
-
-;; SIMD Load Instructions
-
-(define_insn_reservation "falkor_ald_4_ld" 4
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_load1_1reg,neon_load1_1reg_q,neon_load1_all_lanes,neon_load2_one_lane"))
-  "falkor_ld")
-
-(define_insn_reservation "falkor_ald_4_ld_none" 4
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_load1_2reg,neon_load2_2reg,neon_load2_all_lanes"))
-  "falkor_ld")
-
-(define_insn_reservation "falkor_ald_4_ld_ld" 4
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_load1_2reg_q,neon_load2_2reg_q,neon_load2_all_lanes_q,neon_load3_one_lane,neon_load4_one_lane,neon_ldp,neon_ldp_q"))
-  "falkor_ld,falkor_ld")
-
-(define_insn_reservation "falkor_ald_4_ld_ld_none" 4
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_load1_3reg,neon_load3_3reg,neon_load3_all_lanes"))
-  "falkor_ld,falkor_ld")
-
-(define_insn_reservation "falkor_ald_4_ld_ld_ld" 4
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_load1_3reg_q,neon_load3_3reg_q,neon_load3_all_lanes_q"))
-  "falkor_ld,falkor_ld,falkor_ld")
-
-(define_insn_reservation "falkor_ald_4_ld_ld_none_none" 4
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_load1_4reg,neon_load4_4reg"))
-  "falkor_ld,falkor_ld")
-
-(define_insn_reservation "falkor_ald_4_ld_ld_ld_ld" 4
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_load1_4reg_q,neon_load4_4reg_q,neon_load4_all_lanes,neon_load4_all_lanes_q"))
-  "falkor_ld,falkor_ld,falkor_ld,falkor_ld")
-
-;; Arithmetic and Logical Instructions
-
-(define_insn_reservation "falkor_alu_1_xyz" 1
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "alus_sreg,alus_imm,alus_shift_imm,csel,adc_reg,alu_imm,alu_sreg,alu_shift_imm_lsl_1to4,alu_shift_imm_other,alu_ext,alus_ext,logic_imm,logic_reg,logic_shift_imm,logics_imm,logics_reg,logics_shift_imm,mov_reg"))
-  "falkor_xyz")
-
-;; SIMD Miscellaneous Instructions
-
-;; No separate type for ins and dup.  But this is correct for both.
-
-(define_insn_reservation "falkor_am_3_gtov" 3
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_from_gp"))
-  "falkor_gtov")
-
-;; No separate type for ins and dup.  Assuming dup is more common.  Ins is
-;; gtov+vxvy and latency of 4.
-
-(define_insn_reservation "falkor_am_3_gtov_gtov" 3
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_from_gp_q"))
-  "falkor_gtov,falkor_gtov")
-
-;; DUP  does not use vector pipes in Q mode, only gtov+gtov.
-(define_insn_reservation "falkor_am_1_gtov_gtov" 1
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_dup_q"))
-  "falkor_gtov*2")
-
-;; neon_to_gp_q is used for 32-bit ARM instructions that move 64-bits of data
-;; so no use needed here.
-
-(define_insn_reservation "falkor_am_3_vtog" 3
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_to_gp"))
-  "falkor_vtog")
-
-(define_insn_reservation "falkor_am_1_vxvy" 1
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_bsl,neon_dup,neon_ext,neon_ins,neon_ins_q,neon_move,neon_rev,neon_tbl1,neon_permute,neon_shift_imm_narrow_q"))
-  "falkor_vxvy")
-
-(define_insn_reservation "falkor_am_1_vxvy_vxvy" 1
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_bsl_q,neon_ext_q,neon_move_q,neon_rev_q,neon_tbl1_q,neon_permute_q"))
-  "falkor_vxvy+falkor_vxvy")
-
-(define_insn_reservation "falkor_am_2_vxvy" 2
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_cls,neon_cnt,neon_rbit"))
-  "falkor_vxvy")
-
-(define_insn_reservation "falkor_am_4_vxvy_vxvy" 4
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_cls_q,neon_cnt_q,neon_rbit_q,neon_tbl2"))
-  "falkor_vxvy+falkor_vxvy")
-
-(define_insn_reservation "falkor_am_3_vxvy" 3
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_fp_recpe_s,neon_fp_recpe_d,neon_fp_rsqrte_s,neon_fp_rsqrte_d,neon_fp_recpx_s,neon_fp_recpx_d"))
-  "falkor_vxvy")
-
-(define_insn_reservation "falkor_am_3_vxvy_vxvy" 3
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_fp_recpe_s_q,neon_fp_recpe_d_q,neon_fp_rsqrte_s_q,neon_fp_rsqrte_d_q"))
-  "falkor_vxvy+falkor_vxvy")
-
-(define_insn_reservation "falkor_am_5_vxvy" 5
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_fp_recps_s"))
-  "falkor_vxvy")
-
-(define_insn_reservation "falkor_am_5_vxvy_vxvy" 5
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_fp_recps_s_q"))
-  "falkor_vxvy+falkor_vxvy")
-
-(define_insn_reservation "falkor_am_6_vxvy" 6
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_fp_recps_d,neon_fp_rsqrts_d"))
-  "falkor_vxvy")
-
-(define_insn_reservation "falkor_am_6_vxvy_vxvy" 6
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_fp_recps_d_q,neon_fp_rsqrts_d_q"))
-  "falkor_vxvy+falkor_vxvy")
-
-(define_insn_reservation "falkor_am_5_vxvy_vxvy_vxvy" 5
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_tbl2_q,neon_tbl3"))
-  "(falkor_vxvy+falkor_vxvy),falkor_vxvy")
-
-(define_insn_reservation "falkor_am_6_vxvy_vxvy_vxvy_vxvy" 6
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_tbl3_q,neon_tbl4"))
-  "(falkor_vxvy+falkor_vxvy),(falkor_vxvy+falkor_vxvy)")
-
-(define_insn_reservation "falkor_am_7_vxvy_vxvy_vxvy_vxvy_vxvy" 7
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_tbl4_q"))
-  "(falkor_vxvy+falkor_vxvy),(falkor_vxvy+falkor_vxvy),falkor_vxvy")
-
-;; SIMD Store Instructions
-
-;; ??? stp is neon_store1_2reg in aarch64.md, but neon_stp in aarch64-simd.md.
-;; Similarly with ldp.
-
-(define_insn_reservation "falkor_ast_st_vsd" 0
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_store1_1reg,neon_store1_1reg_q,neon_store1_one_lane,neon_store1_one_lane_q,neon_store1_2reg,neon_store2_2reg,neon_store2_one_lane,neon_store2_one_lane_q,neon_stp"))
-  "falkor_st+falkor_vsd")
-
-(define_insn_reservation "falkor_as_0_st_vsd_st_vsd" 0
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_store1_2reg_q,neon_store1_3reg,neon_store1_4reg,neon_store2_2reg_q,neon_store3_3reg,neon_store4_4reg,neon_store3_one_lane,neon_store3_one_lane_q,neon_store4_one_lane,neon_store4_one_lane_q,neon_stp_q"))
-  "(falkor_st+falkor_vsd),(falkor_st+falkor_vsd)")
-
-(define_insn_reservation "falkor_as_0_st_vsd_st_vsd_st_vsd" 0
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_store1_3reg_q,neon_store3_3reg_q"))
-  "(falkor_st+falkor_vsd),(falkor_st+falkor_vsd),(falkor_st+falkor_vsd)")
-
-(define_insn_reservation "falkor_as_0_st_vsd_st_vsd_st_vsd_st_vsd" 0
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "neon_store1_4reg_q,neon_store4_4reg_q"))
-  "(falkor_st+falkor_vsd),(falkor_st+falkor_vsd),(falkor_st+falkor_vsd),(falkor_st+falkor_vsd)")
-
-;; Branch Instructions
-
-(define_insn_reservation "falkor_branch_0_zb" 0
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "branch"))
-  "falkor_zb")
-
-(define_insn_reservation "falkor_call_0_xyzb" 0
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "call"))
-  "falkor_xyzb")
-
-;; Cryptography Extensions
-
-(define_insn_reservation "falkor_cry_1_vxvy" 1
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "crypto_sha1_fast"))
-  "falkor_vxvy")
-
-(define_insn_reservation "falkor_cry_2_vxvy" 2
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "crypto_aesmc"))
-  "falkor_vxvy")
-
-(define_insn_reservation "falkor_cry_2_vxvy_vxvy" 2
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "crypto_sha1_xor,crypto_sha256_fast,crypto_pmull"))
-  "falkor_vxvy+falkor_vxvy")
-
-(define_insn_reservation "falkor_cry_4_vy_vx" 4
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "crypto_sha1_slow"))
-  "falkor_vy+falkor_vx")
-
-(define_insn_reservation "falkor_cry_6_vy_vx" 6
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "crypto_sha256_slow"))
-  "falkor_vy+falkor_vx")
-
-(define_insn_reservation "falkor_cry_3_vxvy_vxvy" 3
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "crypto_aese"))
-  "falkor_vxvy+falkor_vxvy")
-
-;; FP Load Instructions
-
-(define_insn_reservation "falkor_fld_4_ld" 4
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "f_loads,f_loadd"))
-  "falkor_ld")
-
-;; No separate FP store section, these are found in the SIMD store section.
-
-(define_insn_reservation "falkor_fld_0_st_vsd" 0
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "f_stores,f_stored"))
-  "falkor_st+falkor_vsd")
-
-;; FP Data Processing Instructions
-
-(define_insn_reservation "falkor_fpdt_0_vxvy" 0
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "fcmps,fcmpd,fccmps,fccmpd"))
-  "falkor_vxvy")
-
-(define_insn_reservation "falkor_fpdt_5_vtog" 5
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "f_cvtf2i"))
-  "falkor_vtog")
-
-(define_insn_reservation "falkor_fpdt_1_vxvy" 1
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "ffariths,ffarithd,fcsel"))
-  "falkor_vxvy")
-
-(define_insn_reservation "falkor_fpdt_2_vxvy" 2
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "f_minmaxd,f_minmaxs,f_rintd,f_rints"))
-  "falkor_vxvy")
-
-;; Scalar FP ABD is handled same as vector FP ABD.
-
-(define_insn_reservation "falkor_fpdt_3_vxvy" 3
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "faddd,fadds"))
-  "falkor_vxvy")
-
-(define_insn_reservation "falkor_fpdt_4_vxvy" 4
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "f_cvt"))
-  "falkor_vxvy")
-
-(define_insn_reservation "falkor_fpdt_5_vxvy_mul" 5
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "fmuls"))
-  "falkor_vxvy")
-
-(define_insn_reservation "falkor_fpdt_5_vxvy_mla" 5
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "fmacs,ffmas"))
-  "falkor_vxvy")
-
-(define_insn_reservation "falkor_fpdt_6_vxvy_mul" 6
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "fmuld"))
-  "falkor_vxvy")
-
-(define_insn_reservation "falkor_fpdt_6_vxvy_mla" 6
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "fmacd,ffmad"))
-  "falkor_vxvy")
-
-(define_insn_reservation "falkor_fpdt_6_vx_vy" 6
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "fdivs"))
-  "falkor_vx+falkor_vy")
-
-(define_insn_reservation "falkor_fpdt_11_vx_vy" 11
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "fdivd"))
-  "falkor_vx+falkor_vy")
-
-(define_insn_reservation "falkor_fpdt_12_vx_vy" 12
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "fsqrts"))
-  "falkor_vxvy")
-
-(define_insn_reservation "falkor_fpdt_22_vx_vy" 22
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "fsqrtd"))
-  "falkor_vxvy")
-
-;; FP Miscellaneous Instructions
-
-(define_insn_reservation "falkor_fpmsc_3_vtog" 3
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "f_mrc"))
-  "falkor_vtog")
-
-(define_insn_reservation "falkor_fpmsc_3_gtov" 3
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "f_mcr"))
-  "falkor_gtov")
-
-(define_insn_reservation "falkor_fpmsc_1_vxvy" 1
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "fmov,fconsts,fconstd"))
-  "falkor_vxvy")
-
-;; No separate type for float-to-fixed conversions.  Same type as
-;; float-to-int conversions.  They schedule the same though, so no problem.
-
-(define_insn_reservation "falkor_fpmsc_6_gtov" 6
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "f_cvti2f"))
-  "falkor_gtov")
-
-;; Load Instructions
-
-(define_insn_reservation "falkor_ld_3_ld" 3
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "load_4,load_8,load_16"))
-  "falkor_ld")
-
-;; Miscellaneous Data-Processing Instructions
-
-(define_insn_reservation "falkor_misc_1_xyz" 1
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "bfx,bfm,extend,rotate_imm,shift_imm"))
-  "falkor_xyz")
-
-(define_insn_reservation "falkor_misc_2_x" 2
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "crc"))
-  "falkor_x")
-
-(define_insn_reservation "falkor_misc_2_xyz" 2
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "clz,rbit,rev"))
-  "falkor_xyz")
-
-;; Divide and Multiply Instructions
-
-(define_insn_reservation "falkor_muldiv_4_x_mul" 4
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "mul"))
-  "falkor_x")
-
-(define_insn_reservation "falkor_muldiv_4_x_mla" 4
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "mla,smlal,umlal"))
-  "falkor_x")
-
-(define_insn_reservation "falkor_muldiv_5_x_mul" 5
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "smull,umull"))
-  "falkor_x")
-
-(define_insn_reservation "falkor_md_11_x_z" 11
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "sdiv,udiv"))
-  "falkor_x+falkor_z")
-
-;; Move and Shift Instructions
-
-(define_insn_reservation "falkor_mvs_1_xyz" 1
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "mov_imm,shift_reg"))
-  "falkor_xyz")
-
-(define_insn_reservation "falkor_mvs_1_xyzb" 1
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "adr"))
-  "falkor_xyzb")
-
-;; Other Instructions
-
-;; Block is for instruction scheduling blockage insns in RTL.  There are no
-;; hardware instructions emitted for them, so don't use any resources.
-
-(define_insn_reservation "falkor_other_0_nothing" 0
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "trap,block"))
-  "nothing")
-
-(define_insn_reservation "falkor_other_2_z" 2
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "mrs"))
-  "falkor_z")
-
-;; Assume multiple instructions use all pipes.
-
-(define_insn_reservation "falkor_extra" 1
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "multiple"))
-  "falkor_x+falkor_y+falkor_z+falkor_b+falkor_vx+falkor_vy+falkor_ld+falkor_st+falkor_sd+falkor_vsd+falkor_gtov+falkor_vtog")
-
-;; Store Instructions
-
-;; No use of store_rel, store3, or store4 in aarch64.
-
-(define_insn_reservation "falkor_st_0_st_sd" 0
-  (and (eq_attr "tune" "falkor")
-       (eq_attr "type" "store_4,store_8,store_16"))
-  "falkor_st+falkor_sd")
-
-;; Muliply bypasses.
-
-;; 1 cycle latency (0 bubble) for an integer mul or mac feeding into a mac.
-
-(define_bypass 1
-  "falkor_ai_4_vxvy_mul,falkor_ai_4_vxvy_mla,falkor_ai_4_vxvy_vxvy_mul,falkor_ai_4_vxvy_vxvy_mla,falkor_muldiv_4_x_mul,falkor_muldiv_4_x_mla,falkor_muldiv_5_x_mul"
-  "falkor_ai_4_vxvy_mla,falkor_ai_4_vxvy_vxvy_mla,falkor_muldiv_4_x_mla")
-
-;; 3 cycle latency (2 bubbles) for an FP mul or mac feeding into a mac.
-
-(define_bypass 3
-  "falkor_afp_5_vxvy_mul,falkor_afp_5_vxvy_mla,falkor_afp_5_vxvy_vxvy_mul,falkor_afp_5_vxvy_vxvy_mla,falkor_afp_6_vxvy_mul,falkor_afp_6_vxvy_mla,falkor_afp_6_vxvy_vxvy_mul,falkor_afp_6_vxvy_vxvy_mla,falkor_fpdt_5_vxvy_mul,falkor_fpdt_5_vxvy_mla,falkor_fpdt_6_vxvy_mul,falkor_fpdt_6_vxvy_mla"
-  "falkor_afp_5_vxvy_mla,falkor_afp_5_vxvy_vxvy_mla,falkor_afp_6_vxvy_mla,falkor_afp_6_vxvy_vxvy_mla,falkor_fpdt_5_vxvy_mla,falkor_fpdt_6_vxvy_mla")
diff --git a/gcc/config/aarch64/fractional-cost.h b/gcc/config/aarch64/fractional-cost.h
index f7f2309..1e0de8f 100644
--- a/gcc/config/aarch64/fractional-cost.h
+++ b/gcc/config/aarch64/fractional-cost.h
@@ -1,5 +1,5 @@
 // Simple fixed-point representation of fractional costs
-// Copyright (C) 2021-2024 Free Software Foundation, Inc.
+// Copyright (C) 2021-2025 Free Software Foundation, Inc.
 //
 // This file is part of GCC.
 //
diff --git a/gcc/config/aarch64/gcc-auto-profile b/gcc/config/aarch64/gcc-auto-profile
new file mode 100755
index 0000000..4d5c2e3
--- /dev/null
+++ b/gcc/config/aarch64/gcc-auto-profile
@@ -0,0 +1,53 @@
+#!/bin/sh
+# Profile workload for gcc profile feedback (autofdo) using Linux perf.
+# Copyright The GNU Toolchain Authors.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify it under
+# the terms of the GNU General Public License as published by the Free
+# Software Foundation; either version 3, or (at your option) any later
+# version.
+
+# GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+# WARRANTY; without even the implied warranty of MERCHANTABILITY or
+# FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+# for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.  */
+
+# Run perf record with branch stack sampling and check for
+# specific error message to see if it is supported.
+use_brbe=true
+output=$(perf record -j any,u /bin/true 2>&1)
+case "$output" in
+  *"PMU Hardware or event type doesn't support branch stack sampling."*)
+    use_brbe=false;;
+  *)
+    use_brbe=true;;
+esac
+
+FLAGS=u
+if [ "$1" = "--kernel" ] ; then
+  FLAGS=k
+  shift
+elif [ "$1" = "--all" ] ; then
+  FLAGS=u,k
+  shift
+fi
+
+if [ "$use_brbe" = true ] ; then
+  if grep -q hypervisor /proc/cpuinfo ; then
+    echo >&2 "Warning: branch profiling may not be functional in VMs"
+  fi
+  set -x
+  perf record -j any,$FLAGS "$@"
+  set +x
+else
+  echo >&2 "Warning: branch profiling may not be functional without BRBE"
+  set -x
+  perf record "$@"
+  set +x
+fi
diff --git a/gcc/config/aarch64/geniterators.sh b/gcc/config/aarch64/geniterators.sh
index 7cc70a7..385d58d 100644
--- a/gcc/config/aarch64/geniterators.sh
+++ b/gcc/config/aarch64/geniterators.sh
@@ -1,6 +1,6 @@
 #!/bin/sh
 #
-# Copyright (C) 2014-2024 Free Software Foundation, Inc.
+# Copyright (C) 2014-2025 Free Software Foundation, Inc.
 # Contributed by ARM Ltd.
 #
 # This file is part of GCC.
diff --git a/gcc/config/aarch64/gentune.sh b/gcc/config/aarch64/gentune.sh
index 96c6f13..82554e3 100644
--- a/gcc/config/aarch64/gentune.sh
+++ b/gcc/config/aarch64/gentune.sh
@@ -1,6 +1,6 @@
 #!/bin/sh
 #
-# Copyright (C) 2011-2024 Free Software Foundation, Inc.
+# Copyright (C) 2011-2025 Free Software Foundation, Inc.
 # Contributed by ARM Ltd.
 #
 # This file is part of GCC.
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 20a318e..c59fcd6 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -1,5 +1,5 @@
 ;; Machine description for AArch64 architecture.
-;; Copyright (C) 2009-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2009-2025 Free Software Foundation, Inc.
 ;; Contributed by ARM Ltd.
 ;;
 ;; This file is part of GCC.
@@ -41,6 +41,8 @@
 ;; Iterators for single modes, for "@" patterns.
 (define_mode_iterator SI_ONLY [SI])
 (define_mode_iterator DI_ONLY [DI])
+(define_mode_iterator V8HF_ONLY [V8HF])
+(define_mode_iterator V4SF_ONLY [V4SF])
 
 ;; Iterator for all integer modes (up to 64-bit)
 (define_mode_iterator ALLI [QI HI SI DI])
@@ -93,6 +95,10 @@
 ;; integer modes; 64-bit scalar integer mode.
 (define_mode_iterator VSDQ_I_DI [V8QI V16QI V4HI V8HI V2SI V4SI V2DI DI])
 
+;; Advanced SIMD and scalar, 64 & 128-bit container; 8 and 16-bit scalar
+;; integer modes.
+(define_mode_iterator VSDQ_I_QI_HI [VDQ_I HI QI])
+
 ;; Double vector modes.
 (define_mode_iterator VD [V8QI V4HI V4HF V2SI V2SF V4BF])
 
@@ -138,6 +144,13 @@
 ;; VQ without 2 element modes.
 (define_mode_iterator VQ_NO2E [V16QI V8HI V4SI V8HF V4SF V8BF])
 
+;; SVE modes without 2 and 4 element modes.
+(define_mode_iterator SVE_NO4E [VNx16QI VNx8QI VNx8HI VNx8HF VNx8BF])
+
+;; SVE modes without 2 element modes.
+(define_mode_iterator SVE_NO2E [SVE_NO4E VNx4QI VNx4HI VNx4HF VNx4BF VNx4SI
+				VNx4SF])
+
 ;; 2 element quad vector modes.
 (define_mode_iterator VQ_2E [V2DI V2DF])
 
@@ -181,9 +194,17 @@
 ;; Advanced SIMD single Float modes.
 (define_mode_iterator VDQSF [V2SF V4SF])
 
+;; Quad vector float modes with half/bfloat elements.
+(define_mode_iterator VQ_BHF [V8HF V8BF])
+
 ;; Quad vector Float modes with half/single elements.
 (define_mode_iterator VQ_HSF [V8HF V4SF])
 
+(define_mode_iterator VDQ_HSF_FDOT [(V4HF "TARGET_FP8DOT2")
+				    (V8HF "TARGET_FP8DOT2")
+				    (V2SF "TARGET_FP8DOT4")
+				    (V4SF "TARGET_FP8DOT4")])
+
 ;; Modes suitable to use as the return type of a vcond expression.
 (define_mode_iterator VDQF_COND [V2SF V2SI V4SF V4SI V2DF V2DI])
 
@@ -290,6 +311,8 @@
 ;; Advanced SIMD modes for H, S and D types.
 (define_mode_iterator VDQHSD [V4HI V8HI V2SI V4SI V2DI])
 
+(define_mode_iterator VDQHSD_V1DI [VDQHSD V1DI])
+
 ;; Advanced SIMD and scalar integer modes for H and S.
 (define_mode_iterator VSDQ_HSI [V4HI V8HI V2SI V4SI HI SI])
 
@@ -424,12 +447,21 @@
 			     (V8HF "TARGET_SIMD_F16INST")
 			     V2SF V4SF])
 
+;; Modes available for Advanced SIMD lut operations.
+(define_mode_iterator VLUT [V8QI V16QI V4HI V4HF V4BF])
+(define_mode_iterator VLUTx2 [V2x8HI V2x8HF V2x8BF])
+
+;; Modes available for Advanced SIMD FP8 conversion operations.
+(define_mode_iterator VCVTFPM [V4HF V8HF V4SF])
+
 ;; Iterators for single modes, for "@" patterns.
 (define_mode_iterator VNx16QI_ONLY [VNx16QI])
 (define_mode_iterator VNx16SI_ONLY [VNx16SI])
 (define_mode_iterator VNx8HI_ONLY [VNx8HI])
+(define_mode_iterator VNx8HF_ONLY [VNx8HF])
 (define_mode_iterator VNx8BF_ONLY [VNx8BF])
 (define_mode_iterator VNx8SI_ONLY [VNx8SI])
+(define_mode_iterator VNx8SF_ONLY [VNx8SF])
 (define_mode_iterator VNx8DI_ONLY [VNx8DI])
 (define_mode_iterator VNx4SI_ONLY [VNx4SI])
 (define_mode_iterator VNx4SF_ONLY [VNx4SF])
@@ -444,9 +476,35 @@
 ;; All fully-packed SVE integer vector modes.
 (define_mode_iterator SVE_FULL_I [VNx16QI VNx8HI VNx4SI VNx2DI])
 
-;; All fully-packed SVE floating-point vector modes.
+;; All fully-packed SVE integer and Advanced SIMD integer modes.
+(define_mode_iterator SVE_ASIMD_FULL_I [SVE_FULL_I VDQ_I])
+
+;; Fully-packed SVE floating-point vector modes, excluding BF16.
 (define_mode_iterator SVE_FULL_F [VNx8HF VNx4SF VNx2DF])
 
+;; Partial SVE floating-point vector modes, excluding BF16.
+(define_mode_iterator SVE_PARTIAL_F [VNx2HF VNx4HF VNx2SF])
+
+;; SVE floating-point vector modes, excluding BF16.
+(define_mode_iterator SVE_F [SVE_PARTIAL_F SVE_FULL_F])
+
+;; Fully-packed SVE floating-point vector modes and their scalar equivalents.
+(define_mode_iterator SVE_FULL_F_SCALAR [SVE_FULL_F GPF_HF])
+
+(define_mode_iterator SVE_FULL_F_B16B16 [(VNx8BF "TARGET_SSVE_B16B16") SVE_FULL_F])
+
+(define_mode_iterator SVE_PARTIAL_F_B16B16 [(VNx2BF "TARGET_SSVE_B16B16")
+					    (VNx4BF "TARGET_SSVE_B16B16")
+					    SVE_PARTIAL_F])
+
+(define_mode_iterator SVE_F_B16B16 [SVE_PARTIAL_F_B16B16 SVE_FULL_F_B16B16])
+
+;; Modes for which (B)FCLAMP is supported.
+(define_mode_iterator SVE_CLAMP_F [(VNx8BF "TARGET_SSVE_B16B16")
+				   (VNx8HF "TARGET_SVE2p1_OR_SME2")
+				   (VNx4SF "TARGET_SVE2p1_OR_SME2")
+				   (VNx2DF "TARGET_SVE2p1_OR_SME2")])
+
 ;; Fully-packed SVE integer vector modes that have 8-bit or 16-bit elements.
 (define_mode_iterator SVE_FULL_BHI [VNx16QI VNx8HI])
 
@@ -460,6 +518,9 @@
 ;; Fully-packed SVE vector modes that have 16-bit float elements.
 (define_mode_iterator SVE_FULL_HF [VNx8BF VNx8HF])
 
+;; Pairs of the above.
+(define_mode_iterator SVE_FULL_HFx2 [VNx16BF VNx16HF])
+
 ;; Fully-packed SVE vector modes that have 16-bit, 32-bit or 64-bit elements.
 (define_mode_iterator SVE_FULL_HSD [VNx8HI VNx4SI VNx2DI
 				    VNx8BF VNx8HF VNx4SF VNx2DF])
@@ -480,6 +541,13 @@
 ;; elements.
 (define_mode_iterator SVE_FULL_HSF [VNx8HF VNx4SF])
 
+;; Partial SVE floating-point vector modes that have 16-bit or 32-bit
+;; elements.
+(define_mode_iterator SVE_PARTIAL_HSF [VNx2HF VNx4HF VNx2SF])
+
+;; SVE floating-point vector modes that have 16-bit or 32-bit elements.
+(define_mode_iterator SVE_HSF [SVE_PARTIAL_HSF SVE_FULL_HSF])
+
 ;; Fully-packed SVE integer vector modes that have 16-bit or 64-bit elements.
 (define_mode_iterator SVE_FULL_HDI [VNx8HI VNx2DI])
 
@@ -504,10 +572,19 @@
 (define_mode_iterator SVE_MATMULF [(VNx4SF "TARGET_SVE_F32MM")
 				   (VNx2DF "TARGET_SVE_F64MM")])
 
+;; SVE floating-point vector modes that have 32-bit or 64-bit elements.
+(define_mode_iterator SVE_SDF [VNx2SF SVE_FULL_SDF])
+
 ;; Fully-packed SVE vector modes that have 32-bit or smaller elements.
 (define_mode_iterator SVE_FULL_BHS [VNx16QI VNx8HI VNx4SI
 				    VNx8BF VNx8HF VNx4SF])
 
+;; Fully-packed SVE vector byte modes that have 16-bit or smaller elements.
+(define_mode_iterator SVE_FULL_BH [VNx16QI VNx8HI VNx8HF VNx8BF])
+
+;; Pairs of fully-packed SVE vector modes (half word only)
+(define_mode_iterator SVE_FULL_Hx2 [VNx16HI VNx16HF VNx16BF])
+
 ;; Fully-packed SVE vector modes that have 32-bit elements.
 (define_mode_iterator SVE_FULL_S [VNx4SI VNx4SF])
 
@@ -519,15 +596,19 @@
 				     VNx4HI VNx2HI
 				     VNx2SI])
 
+;; All SVE integer vector modes.
+(define_mode_iterator SVE_I [VNx16QI VNx8QI VNx4QI VNx2QI
+			     VNx8HI VNx4HI VNx2HI
+			     VNx4SI VNx2SI
+			     VNx2DI])
+
+(define_mode_iterator SVE_BF [VNx2BF VNx4BF VNx8BF])
+
+;; All SVE floating-point vector modes.
+(define_mode_iterator SVE_F_BF [SVE_F SVE_BF])
+
 ;; All SVE vector modes.
-(define_mode_iterator SVE_ALL [VNx16QI VNx8QI VNx4QI VNx2QI
-			       VNx8HI VNx4HI VNx2HI
-			       VNx8HF VNx4HF VNx2HF
-			       VNx8BF VNx4BF VNx2BF
-			       VNx4SI VNx2SI
-			       VNx4SF VNx2SF
-			       VNx2DI
-			       VNx2DF])
+(define_mode_iterator SVE_ALL [SVE_I SVE_F_BF])
 
 ;; All SVE 2-vector modes.
 (define_mode_iterator SVE_FULLx2 [VNx32QI VNx16HI VNx8SI VNx4DI
@@ -546,27 +627,26 @@
 ;; All SVE vector structure modes.
 (define_mode_iterator SVE_STRUCT [SVE_FULLx2 SVE_FULLx3 SVE_FULLx4])
 
+(define_mode_iterator SVE_STRUCT_BI [VNx32BI VNx64BI])
+
 ;; All SVE vector and structure modes.
 (define_mode_iterator SVE_ALL_STRUCT [SVE_ALL SVE_STRUCT])
 
-;; All SVE integer vector modes.
-(define_mode_iterator SVE_I [VNx16QI VNx8QI VNx4QI VNx2QI
-			     VNx8HI VNx4HI VNx2HI
-			     VNx4SI VNx2SI
-			     VNx2DI])
-
 ;; All SVE integer vector modes and Advanced SIMD 64-bit vector
 ;; element modes
 (define_mode_iterator SVE_I_SIMD_DI [SVE_I V2DI])
 
 ;; All SVE and Advanced SIMD integer vector modes.
-(define_mode_iterator SVE_VDQ_I [SVE_I VDQ_I])
+(define_mode_iterator SVE_VDQ_I [SVE_I VDQ_I V1DI])
 
 ;; SVE integer vector modes whose elements are 16 bits or wider.
 (define_mode_iterator SVE_HSDI [VNx8HI VNx4HI VNx2HI
 				VNx4SI VNx2SI
 				VNx2DI])
 
+;; SVE integer vector modes with 32-bit elements.
+(define_mode_iterator SVE_SI [VNx2SI VNx4SI])
+
 (define_mode_iterator SVE_DIx24 [VNx4DI VNx8DI])
 
 ;; SVE modes with 2 or 4 elements.
@@ -582,6 +662,9 @@
 (define_mode_iterator SVE_2 [VNx2QI VNx2HI VNx2HF VNx2BF
 			     VNx2SI VNx2SF VNx2DI VNx2DF])
 
+;; SVE SI and DI modes with 2 elements.
+(define_mode_iterator SVE_2SDI [VNx2SI VNx2DI])
+
 ;; SVE integer modes with 2 elements, excluding the widest element.
 (define_mode_iterator SVE_2BHSI [VNx2QI VNx2HI VNx2SI])
 
@@ -634,7 +717,9 @@
 (define_mode_iterator SVE_Ix24 [VNx32QI VNx16HI VNx8SI VNx4DI
 				VNx64QI VNx32HI VNx16SI VNx8DI])
 
-(define_mode_iterator SVE_Fx24 [VNx16HF VNx8SF VNx4DF
+(define_mode_iterator SVE_Fx24 [(VNx16BF "TARGET_SSVE_B16B16")
+				(VNx32BF "TARGET_SSVE_B16B16")
+				VNx16HF VNx8SF VNx4DF
 				VNx32HF VNx16SF VNx8DF])
 
 (define_mode_iterator SVE_SFx24 [VNx8SF VNx16SF])
@@ -643,8 +728,6 @@
 (define_mode_iterator SME_ZA_I [VNx16QI VNx8HI VNx4SI VNx2DI VNx1TI])
 (define_mode_iterator SME_ZA_SDI [VNx4SI (VNx2DI "TARGET_SME_I16I64")])
 
-(define_mode_iterator SME_ZA_SDF_I [VNx4SI (VNx2DI "TARGET_SME_F64F64")])
-
 (define_mode_iterator SME_ZA_BIx24 [VNx32QI VNx64QI])
 
 (define_mode_iterator SME_ZA_BHIx124 [VNx16QI VNx32QI VNx64QI
@@ -664,13 +747,20 @@
 (define_mode_iterator SME_ZA_SDIx24 [VNx8SI (VNx4DI "TARGET_SME_I16I64")
 				     VNx16SI (VNx8DI "TARGET_SME_I16I64")])
 
-(define_mode_iterator SME_ZA_SDFx24 [VNx8SF (VNx4DF "TARGET_SME_F64F64")
-				     VNx16SF (VNx8DF "TARGET_SME_F64F64")])
+(define_mode_iterator SME_ZA_HSDFx24 [VNx8SF VNx16SF
+				      (VNx4DF "TARGET_SME_F64F64")
+				      (VNx8DF "TARGET_SME_F64F64")
+				      (VNx16HF "TARGET_STREAMING_SME_F16F16")
+				      (VNx32HF "TARGET_STREAMING_SME_F16F16")
+				      (VNx16BF "TARGET_STREAMING_SME_B16B16")
+				      (VNx32BF "TARGET_STREAMING_SME_B16B16")])
 
 ;; The modes for which outer product instructions are supported.
 (define_mode_iterator SME_MOP_BHI [VNx16QI (VNx8HI "TARGET_SME_I16I64")])
-(define_mode_iterator SME_MOP_HSDF [VNx8BF VNx8HF VNx4SF
-				    (VNx2DF "TARGET_SME_F64F64")])
+(define_mode_iterator SME_MOP_HSDF [VNx4SF
+				    (VNx2DF "TARGET_SME_F64F64")
+				    (VNx8HF "TARGET_STREAMING_SME_F16F16")
+				    (VNx8BF "TARGET_STREAMING_SME_B16B16")])
 
 ;; ------------------------------------------------------------------
 ;; Unspec enumerations for Advance SIMD. These could well go into
@@ -682,6 +772,14 @@
     UNSPEC_ASHIFT_SIGNED	; Used in aarch-simd.md.
     UNSPEC_ASHIFT_UNSIGNED	; Used in aarch64-simd.md.
     UNSPEC_ABS		; Used in aarch64-simd.md.
+    UNSPEC_FCVTN_FP8	; Used in aarch64-simd.md.
+    UNSPEC_FCVTN2_FP8	; Used in aarch64-builtins.cc.
+    UNSPEC_F1CVTL_FP8	; Used in aarch64-simd.md.
+    UNSPEC_F1CVTL2_FP8	; Used in aarch64-builtins.cc.
+    UNSPEC_F2CVTL_FP8	; Used in aarch64-simd.md.
+    UNSPEC_F2CVTL2_FP8	; Used in aarch64-builtins.cc.
+    UNSPEC_FDOT_FP8	; Used in aarch64-simd.md.
+    UNSPEC_FDOT_LANE_FP8 ; Used in aarch64-simd.md.
     UNSPEC_FMAX		; Used in aarch64-simd.md.
     UNSPEC_FMAXNMV	; Used in aarch64-simd.md.
     UNSPEC_FMAXV	; Used in aarch64-simd.md.
@@ -690,6 +788,7 @@
     UNSPEC_FMINV	; Used in aarch64-simd.md.
     UNSPEC_FADDV	; Used in aarch64-simd.md.
     UNSPEC_FNEG		; Used in aarch64-simd.md.
+    UNSPEC_FSCALE	; Used in aarch64-simd.md.
     UNSPEC_ADDV		; Used in aarch64-simd.md.
     UNSPEC_SMAXV	; Used in aarch64-simd.md.
     UNSPEC_SMINV	; Used in aarch64-simd.md.
@@ -728,7 +827,9 @@
     UNSPEC_USHLL	; Used in aarch64-simd.md.
     UNSPEC_ADDP		; Used in aarch64-simd.md.
     UNSPEC_TBL		; Used in vector permute patterns.
+    UNSPEC_TBLQ		; Used in vector permute patterns.
     UNSPEC_TBX		; Used in vector permute patterns.
+    UNSPEC_TBXQ		; Used in vector permute patterns.
     UNSPEC_CONCAT	; Used in vector permute patterns.
 
     ;; The following permute unspecs are generated directly by
@@ -841,6 +942,8 @@
     UNSPEC_COND_CMPNE_WIDE ; Used in aarch64-sve.md.
     UNSPEC_COND_FABS	; Used in aarch64-sve.md.
     UNSPEC_COND_FADD	; Used in aarch64-sve.md.
+    UNSPEC_COND_FAMAX	; Used in aarch64-sve.md.
+    UNSPEC_COND_FAMIN	; Used in aarch64-sve.md.
     UNSPEC_COND_FCADD90	; Used in aarch64-sve.md.
     UNSPEC_COND_FCADD270 ; Used in aarch64-sve.md.
     UNSPEC_COND_FCMEQ	; Used in aarch64-sve.md.
@@ -881,6 +984,8 @@
     UNSPEC_COND_FSQRT	; Used in aarch64-sve.md.
     UNSPEC_COND_FSUB	; Used in aarch64-sve.md.
     UNSPEC_COND_SCVTF	; Used in aarch64-sve.md.
+    UNSPEC_COND_SMAX	; Used in aarch64-sve.md.
+    UNSPEC_COND_SMIN	; Used in aarch64-sve.md.
     UNSPEC_COND_UCVTF	; Used in aarch64-sve.md.
     UNSPEC_LASTA	; Used in aarch64-sve.md.
     UNSPEC_LASTB	; Used in aarch64-sve.md.
@@ -927,17 +1032,32 @@
     UNSPEC_COND_FCVTX	; Used in aarch64-sve2.md.
     UNSPEC_COND_FCVTXNT	; Used in aarch64-sve2.md.
     UNSPEC_COND_FLOGB	; Used in aarch64-sve2.md.
+    UNSPEC_DOT_FP8	; Used in aarch64-sve2.md.
+    UNSPEC_DOT_LANE_FP8	; Used in aarch64-sve2.md.
     UNSPEC_EORBT	; Used in aarch64-sve2.md.
     UNSPEC_EORTB	; Used in aarch64-sve2.md.
+    UNSPEC_F1CVT	; Used in aarch64-sve2.md.
+    UNSPEC_F1CVTLT	; Used in aarch64-sve2.md.
+    UNSPEC_F2CVT	; Used in aarch64-sve2.md.
+    UNSPEC_F2CVTLT	; Used in aarch64-sve2.md.
     UNSPEC_FADDP	; Used in aarch64-sve2.md.
+    UNSPEC_FCVTNB	; Used in aarch64-sve2.md.
+    UNSPEC_FCVTNT	; Used in aarch64-sve2.md.
     UNSPEC_FMAXNMP	; Used in aarch64-sve2.md.
     UNSPEC_FMAXP	; Used in aarch64-sve2.md.
     UNSPEC_FMINNMP	; Used in aarch64-sve2.md.
     UNSPEC_FMINP	; Used in aarch64-sve2.md.
     UNSPEC_FMLALB	; Used in aarch64-sve2.md.
+    UNSPEC_FMLALB_FP8	; Used in aarch64-sve2.md.
+    UNSPEC_FMLALLBB_FP8	; Used in aarch64-sve2.md.
+    UNSPEC_FMLALLBT_FP8	; Used in aarch64-sve2.md.
+    UNSPEC_FMLALLTB_FP8	; Used in aarch64-sve2.md.
+    UNSPEC_FMLALLTT_FP8	; Used in aarch64-sve2.md.
     UNSPEC_FMLALT	; Used in aarch64-sve2.md.
+    UNSPEC_FMLALT_FP8	; Used in aarch64-sve2.md.
     UNSPEC_FMLSLB	; Used in aarch64-sve2.md.
     UNSPEC_FMLSLT	; Used in aarch64-sve2.md.
+    UNSPEC_FP8FCVTN	; Used in aarch64-sve2.md.
     UNSPEC_HISTCNT	; Used in aarch64-sve2.md.
     UNSPEC_HISTSEG	; Used in aarch64-sve2.md.
     UNSPEC_LD1_COUNT	; Used in aarch64-sve2.md.
@@ -1015,6 +1135,7 @@
     UNSPEC_SUBHNB	; Used in aarch64-sve2.md.
     UNSPEC_SUBHNT	; Used in aarch64-sve2.md.
     UNSPEC_TBL2		; Used in aarch64-sve2.md.
+    UNSPEC_TRN		; Used in aarch64-builtins.cc
     UNSPEC_UABDLB	; Used in aarch64-sve2.md.
     UNSPEC_UABDLT	; Used in aarch64-sve2.md.
     UNSPEC_UADDLB	; Used in aarch64-sve2.md.
@@ -1057,16 +1178,52 @@
     UNSPEC_BFCVTN2     ; Used in aarch64-simd.md.
     UNSPEC_BFCVT       ; Used in aarch64-simd.md.
     UNSPEC_FCVTXN	; Used in aarch64-simd.md.
+    UNSPEC_FAMAX       ; Used in aarch64-simd.md.
+    UNSPEC_FAMIN       ; Used in aarch64-simd.md.
+    UNSPEC_LUTI		; Used in aarch64-simd.md.
+    UNSPEC_LUTI2	; Used in aarch64-simd.md.
+    UNSPEC_LUTI4	; Used in aarch64-simd.md.
 
     ;; All used in aarch64-sve2.md
+    UNSPEC_ADDQV
+    UNSPEC_ANDQV
+    UNSPEC_DUPQ
+    UNSPEC_EORQV
+    UNSPEC_EXTQ
+    UNSPEC_FADDQV
+    UNSPEC_FMAXQV
+    UNSPEC_FMAXNMQV
+    UNSPEC_FMINQV
+    UNSPEC_FMINNMQV
+    UNSPEC_FCVTL
     UNSPEC_FCVTN
     UNSPEC_FDOT
+    UNSPEC_LD1_EXTENDQ
+    UNSPEC_LD1Q_GATHER
+    UNSPEC_LDNQ
+    UNSPEC_ORQV
+    UNSPEC_PMOV_PACK
+    UNSPEC_PMOV_PACK_LANE
+    UNSPEC_PMOV_UNPACK
+    UNSPEC_PMOV_UNPACK_LANE
+    UNSPEC_SMAXQV
+    UNSPEC_SMINQV
     UNSPEC_SQCVT
     UNSPEC_SQCVTN
     UNSPEC_SQCVTU
     UNSPEC_SQCVTUN
+    UNSPEC_ST1_TRUNCQ
+    UNSPEC_ST1Q_SCATTER
+    UNSPEC_STNQ
+    UNSPEC_UMAXQV
+    UNSPEC_UMINQV
     UNSPEC_UQCVT
     UNSPEC_UQCVTN
+    UNSPEC_UZPQ1
+    UNSPEC_UZPQ2
+    UNSPEC_ZIPQ1
+    UNSPEC_ZIPQ2
+    UNSPEC_SVE_LUTI
 
     ;; All used in aarch64-sme.md
     UNSPEC_SME_ADD
@@ -1088,6 +1245,9 @@
     UNSPEC_SME_READ
     UNSPEC_SME_READ_HOR
     UNSPEC_SME_READ_VER
+    UNSPEC_SME_READZ
+    UNSPEC_SME_READZ_HOR
+    UNSPEC_SME_READZ_VER
     UNSPEC_SME_SDOT
     UNSPEC_SME_SVDOT
     UNSPEC_SME_SMLA
@@ -1230,7 +1390,7 @@
 (define_mode_attr bitsize [(V8QI "64") (V16QI "128")
 			   (V4HI "64") (V8HI "128")
 			   (V2SI "64") (V4SI "128")
-				       (V2DI "128")])
+			   (V1DI "64") (V2DI "128")])
 
 ;; Map a floating point or integer mode to the appropriate register name prefix
 (define_mode_attr s [(HF "h") (SF "s") (DF "d") (SI "s") (DI "d")])
@@ -1280,6 +1440,10 @@
 ;; Map a mode to a specific constraint character.
 (define_mode_attr cmode [(QI "q") (HI "h") (SI "s") (DI "d")])
 
+;; Map a mode to a specific constraint character for calling
+;; appropriate version of crc.
+(define_mode_attr crc_data_type [(QI "b") (HI "h") (SI "w") (DI "x")])
+
 ;; Map modes to Usg and Usj constraints for SISD right shifts
 (define_mode_attr cmode_simd [(SI "g") (DI "j")])
 
@@ -1314,7 +1478,11 @@
 			 (V4x16QI "16b") (V4x8HI "8h")
 			 (V4x4SI "4s") (V4x2DI "2d")
 			 (V4x8HF "8h") (V4x4SF "4s")
-			 (V4x2DF "2d") (V4x8BF "8h")])
+			 (V4x2DF "2d") (V4x8BF "8h")
+			 (VNx16QI "16b") (VNx8HI "8h")
+			 (VNx4SI "4s") (VNx2DI "2d")
+			 (VNx8HF "8h") (VNx4SF "4s")
+			 (VNx2DF "2d") (VNx8BF "8h")])
 
 ;; Map mode to type used in widening multiplies.
 (define_mode_attr Vcondtype [(V4HI "4h") (V8HI "4h") (V2SI "2s") (V4SI "2s")])
@@ -1466,6 +1634,12 @@
 			  (QI   "8b")  (HI    "8b")
 			  (V4BF "8b")  (V8BF  "16b")])
 
+;; Mode to double type mapping.
+(define_mode_attr Vconqtype [(V8QI ".16b") (V16QI ".16b")
+			     (V4HI ".8h") (V8HI ".8h")
+			     (V4HF ".8h") (V8HF ".8h")
+			     (V4BF ".8h") (V8BF ".8h")])
+
 ;; Advanced SIMD vector structure to element modes.
 (define_mode_attr VSTRUCT_ELT [(V2x8QI "V8QI") (V2x4HI "V4HI")
 			       (V2x2SI "V2SI") (V2x1DI "DI")
@@ -1531,6 +1705,30 @@
 		       (SI   "SI") (HI    "HI")
 		       (QI   "QI")
 		       (V4BF "BF") (V8BF "BF")
+		       (V2x8QI "QI") (V2x4HI "HI")
+		       (V2x2SI "SI") (V2x1DI "DI")
+		       (V2x4HF "HF") (V2x2SF "SF")
+		       (V2x1DF "DF") (V2x4BF "BF")
+		       (V3x8QI "QI") (V3x4HI "HI")
+		       (V3x2SI "SI") (V3x1DI "DI")
+		       (V3x4HF "HF") (V3x2SF "SF")
+		       (V3x1DF "DF") (V3x4BF "BF")
+		       (V4x8QI "QI") (V4x4HI "HI")
+		       (V4x2SI "SI") (V4x1DI "DI")
+		       (V4x4HF "HF") (V4x2SF "SF")
+		       (V4x1DF "DF") (V4x4BF "BF")
+		       (V2x16QI "QI") (V2x8HI "HI")
+		       (V2x4SI "SI") (V2x2DI "DI")
+		       (V2x8HF "HF") (V2x4SF "SF")
+		       (V2x2DF "DF") (V2x8BF "BF")
+		       (V3x16QI "QI") (V3x8HI "HI")
+		       (V3x4SI "SI") (V3x2DI "DI")
+		       (V3x8HF "HF") (V3x4SF "SF")
+		       (V3x2DF "DF") (V3x8BF "BF")
+		       (V4x16QI "QI") (V4x8HI "HI")
+		       (V4x4SI "SI") (V4x2DI "DI")
+		       (V4x8HF "HF") (V4x4SF "SF")
+		       (V4x2DF "DF") (V4x8BF "BF")
 		       (VNx16QI "QI") (VNx8QI "QI") (VNx4QI "QI") (VNx2QI "QI")
 		       (VNx8HI "HI") (VNx4HI "HI") (VNx2HI "HI")
 		       (VNx8HF "HF") (VNx4HF "HF") (VNx2HF "HF")
@@ -1552,6 +1750,30 @@
 		       (DF   "df") (SI   "si")
 		       (HI   "hi") (QI   "qi")
 		       (V4BF "bf") (V8BF "bf")
+		       (V2x8QI "qi") (V2x4HI "hi")
+		       (V2x2SI "si") (V2x1DI "di")
+		       (V2x4HF "hf") (V2x2SF "sf")
+		       (V2x1DF "df") (V2x4BF "bf")
+		       (V3x8QI "qi") (V3x4HI "hi")
+		       (V3x2SI "si") (V3x1DI "di")
+		       (V3x4HF "hf") (V3x2SF "sf")
+		       (V3x1DF "df") (V3x4BF "bf")
+		       (V4x8QI "qi") (V4x4HI "hi")
+		       (V4x2SI "si") (V4x1DI "di")
+		       (V4x4HF "hf") (V4x2SF "sf")
+		       (V4x1DF "df") (V4x4BF "bf")
+		       (V2x16QI "qi") (V2x8HI "hi")
+		       (V2x4SI "si") (V2x2DI "di")
+		       (V2x8HF "hf") (V2x4SF "sf")
+		       (V2x2DF "df") (V2x8BF "bf")
+		       (V3x16QI "qi") (V3x8HI "hi")
+		       (V3x4SI "si") (V3x2DI "di")
+		       (V3x8HF "hf") (V3x4SF "sf")
+		       (V3x2DF "df") (V3x8BF "bf")
+		       (V4x16QI "qi") (V4x8HI "hi")
+		       (V4x4SI "si") (V4x2DI "di")
+		       (V4x8HF "hf") (V4x4SF "sf")
+		       (V4x2DF "df") (V4x8BF "bf")
 		       (VNx16QI "qi") (VNx8QI "qi") (VNx4QI "qi") (VNx2QI "qi")
 		       (VNx8HI "hi") (VNx4HI "hi") (VNx2HI "hi")
 		       (VNx8HF "hf") (VNx4HF "hf") (VNx2HF "hf")
@@ -1596,6 +1818,7 @@
 			 (V2SI "V4SI") (V4SI "V4SI")
 			 (DI   "V2DI") (V2DI "V2DI")
 			 (V4HF "V8HF") (V8HF "V8HF")
+			 (V4BF "V8BF") (V8BF "V8BF")
 			 (V2SF "V4SF") (V4SF "V4SF")
 			 (V2DF "V2DF") (SI   "V4SI")
 			 (HI   "V8HI") (QI   "V16QI")
@@ -1608,7 +1831,13 @@
 			 (V2DI "DI")    (V2SF  "SF")
 			 (V4SF "V2SF")  (V4HF "V2HF")
 			 (V8HF "V4HF")  (V2DF  "DF")
-			 (V8BF "V4BF")])
+			 (V8BF "V4BF")
+			 (VNx16QI "VNx8QI") (VNx8QI "VNx4QI")
+			 (VNx4QI "VNx2QI")
+			 (VNx8HI "VNx4HI")  (VNx4HI "VNx2HI")
+			 (VNx8HF "VNx4HF")  (VNx4HF "VNx2HF")
+			 (VNx8BF "VNx4BF")  (VNx4BF "VNx2BF")
+			 (VNx4SI "VNx2SI")  (VNx4SF "VNx2SF")])
 
 ;; Half modes of all vector modes, in lower-case.
 (define_mode_attr Vhalf [(V8QI "v4qi")  (V16QI "v8qi")
@@ -1616,7 +1845,18 @@
 			 (V8HF  "v4hf") (V8BF  "v4bf")
 			 (V2SI "si")    (V4SI  "v2si")
 			 (V2DI "di")    (V2SF  "sf")
-			 (V4SF "v2sf")  (V2DF  "df")])
+			 (V4SF "v2sf")  (V2DF  "df")
+			 (VNx16QI "vnx8qi") (VNx8QI "vnx4qi")
+			 (VNx4QI "vnx2qi")
+			 (VNx8HI "vnx4hi")  (VNx4HI "vnx2hi")
+			 (VNx8HF "vnx4hf")  (VNx4HF "vnx2hf")
+			 (VNx8BF "vnx4bf")  (VNx4BF "vnx2bf")
+			 (VNx4SI "vnx2si")  (VNx4SF "vnx2sf")])
+
+;; Quad modes of all vector modes, in lower-case.
+(define_mode_attr Vquad [(VNx16QI "vnx4qi") (VNx8QI "vnx2qi")
+			 (VNx8HI "vnx2hi")  (VNx8HF "vnx2hf")
+			 (VNx8BF "vnx2bf")])
 
 ;; Single-element half modes of quad vector modes.
 (define_mode_attr V1HALF [(V2DI "V1DI")  (V2DF  "V1DF")])
@@ -1687,6 +1927,23 @@
 (define_mode_attr V2ntype [(V8HI "16b") (V4SI "8h")
 			   (V2DI "4s")])
 
+;; The result of FCVTN on two vectors of the given mode.  The result has
+;; twice as many QI elements as the input.
+(define_mode_attr VPACKB [(V4HF "V8QI") (V8HF "V16QI") (V4SF "V8QI")])
+(define_mode_attr VPACKBtype [(V4HF "8b") (V8HF "16b") (V4SF "8b")])
+
+;; Modes narrowed all the way to bytes.
+(define_mode_attr VNARROWB [(V4HF "V8QI") (V8HF "V16QI")
+			    (V2SF "V8QI") (V4SF "V16QI")])
+
+;; Register suffix for modes narrowed to bytes.
+(define_mode_attr Vnbtype [(V4HF "8b") (V8HF "16b")
+			   (V2SF "8b") (V4SF "16b")])
+
+;; Register suffix representing one group of byte elements per wider element.
+(define_mode_attr Vnbsubtype [(V4HF "2b") (V8HF "2b")
+			      (V2SF "4b") (V4SF "4b")])
+
 ;; Widened modes of vector modes.
 (define_mode_attr VWIDE [(V8QI  "V8HI")  (V4HI  "V4SI")
 			 (V2SI  "V2DI")  (V16QI "V8HI")
@@ -1843,8 +2100,12 @@
 ;; Like vwcore, but for the container mode rather than the element mode.
 (define_mode_attr vccore [(VNx16QI "w") (VNx8QI "w") (VNx4QI "w") (VNx2QI "x")
 			  (VNx8HI "w") (VNx4HI "w") (VNx2HI "x")
+			  (VNx8HF "w") (VNx4HF "w") (VNx2HF "x")
+			  (VNx8BF "w") (VNx4BF "w") (VNx2BF "x")
 			  (VNx4SI "w") (VNx2SI "x")
-			  (VNx2DI "x")])
+			  (VNx4SF "w") (VNx2SF "x")
+			  (VNx2DI "x")
+			  (VNx2DF "x")])
 
 ;; Double vector types for ALLX.
 (define_mode_attr Vallxd [(QI "8b") (HI "4h") (SI "2s")])
@@ -1886,15 +2147,13 @@
 			       (VNx8SF  "vnx8si") (VNx16SF "vnx16si")
 ])
 
-;; Floating-point equivalent of selected modes.
-(define_mode_attr V_FP_EQUIV [(VNx8HI "VNx8HF") (VNx8HF "VNx8HF")
-			      (VNx8BF "VNx8HF")
-			      (VNx4SI "VNx4SF") (VNx4SF "VNx4SF")
-			      (VNx2DI "VNx2DF") (VNx2DF "VNx2DF")])
-(define_mode_attr v_fp_equiv [(VNx8HI "vnx8hf") (VNx8HF "vnx8hf")
-			      (VNx8BF "vnx8hf")
-			      (VNx4SI "vnx4sf") (VNx4SF "vnx4sf")
-			      (VNx2DI "vnx2df") (VNx2DF "vnx2df")])
+;; Mode with floating-point values replaced by 128-bit vector integers.
+(define_mode_attr VQ_INT_EQUIV [(DF   "V2DI")   (SF   "V4SI")
+])
+
+;; Lower case mode with floating-point values replaced by 128-bit vector integers.
+(define_mode_attr vq_int_equiv [(DF   "v2di")   (SF   "v4si")
+])
 
 ;; Maps full and partial vector modes of any element type to a full-vector
 ;; integer mode with the same number of units.
@@ -1925,16 +2184,6 @@
 				   (VNx4SF "vnx4si") (VNx2SF "vnx2di")
 				   (VNx2DF "vnx2di")])
 
-;; Mode for vector conditional operations where the comparison has
-;; different type from the lhs.
-(define_mode_attr V_cmp_mixed [(V2SI "V2SF") (V4SI "V4SF")
-			       (V2DI "V2DF") (V2SF "V2SI")
-			       (V4SF "V4SI") (V2DF "V2DI")])
-
-(define_mode_attr v_cmp_mixed [(V2SI "v2sf") (V4SI "v4sf")
-			       (V2DI "v2df") (V2SF "v2si")
-			       (V4SF "v4si") (V2DF "v2di")])
-
 ;; Lower case element modes (as used in shift immediate patterns).
 (define_mode_attr ve_mode [(V8QI "qi") (V16QI "qi")
 			   (V4HI "hi") (V8HI  "hi")
@@ -1974,7 +2223,22 @@
 			   (V4x4HF "V") (V4x8HF "V")
 			   (V4x2SF "V") (V4x4SF "V")
 			   (V4x1DF "V") (V4x2DF "V")
-			   (V4x4BF "V") (V4x8BF "V")])
+			   (V4x4BF "V") (V4x8BF "V")
+
+			   (VNx32QI "T") (VNx16HI "T")
+			   (VNx8SI  "T") (VNx4DI  "T")
+			   (VNx16BF "T") (VNx16HF "T")
+			   (VNx8SF  "T") (VNx4DF "T")
+
+			   (VNx48QI "U") (VNx24HI "U")
+			   (VNx12SI "U") (VNx6DI  "U")
+			   (VNx24BF "U") (VNx24HF "U")
+			   (VNx12SF "U") (VNx6DF "U")
+
+			   (VNx64QI "V") (VNx32HI "V")
+			   (VNx16SI "V") (VNx8DI  "V")
+			   (VNx32BF "V") (VNx32HF "V")
+			   (VNx16SF "V") (VNx8DF "V")])
 
 ;; This is both the number of Q-Registers needed to hold the corresponding
 ;; opaque large integer mode, and the number of elements touched by the
@@ -2256,7 +2520,9 @@
 			   (VNx8DI "vnx2di") (VNx8DF "vnx2df")])
 
 ;; The predicate mode associated with an SVE data mode.  For structure modes
-;; this is equivalent to the <VPRED> of the subvector mode.
+;; this is equivalent to the <VPRED> of the subvector mode.  For partial
+;; vector modes, this is equivalent to the <VPRED> of a full SVE mode with
+;; the same number of elements.
 (define_mode_attr VPRED [(VNx16QI "VNx16BI") (VNx8QI "VNx8BI")
 			 (VNx4QI "VNx4BI") (VNx2QI "VNx2BI")
 			 (VNx8HI "VNx8BI") (VNx4HI "VNx4BI") (VNx2HI "VNx2BI")
@@ -2284,7 +2550,8 @@
 			 (VNx8DI "VNx2BI") (VNx8DF "VNx2BI")
 			 (V8QI "VNx8BI") (V16QI "VNx16BI")
 			 (V4HI "VNx4BI") (V8HI "VNx8BI") (V2SI "VNx2BI")
-			 (V4SI "VNx4BI") (V2DI "VNx2BI")])
+			 (V4SI "VNx4BI") (V2DI "VNx2BI") (V1DI "VNx2BI")
+			 (HF "VNx8BI") (SF "VNx4BI") (DF "VNx2BI")])
 
 ;; ...and again in lower case.
 (define_mode_attr vpred [(VNx16QI "vnx16bi") (VNx8QI "vnx8bi")
@@ -2318,6 +2585,29 @@
 			   (VNx4SI "VNx8SI") (VNx4SF "VNx8SF")
 			   (VNx2DI "VNx4DI") (VNx2DF "VNx4DF")])
 
+(define_mode_attr VNxTI [(VNx32QI "VNx2TI") (VNx16HI "VNx2TI")
+			 (VNx8SI  "VNx2TI") (VNx4DI  "VNx2TI")
+			 (VNx16BF "VNx2TI") (VNx16HF "VNx2TI")
+			 (VNx8SF  "VNx2TI") (VNx4DF "VNx2TI")
+
+			 (VNx48QI "VNx3TI") (VNx24HI "VNx3TI")
+			 (VNx12SI "VNx3TI") (VNx6DI  "VNx3TI")
+			 (VNx24BF "VNx3TI") (VNx24HF "VNx3TI")
+			 (VNx12SF "VNx3TI") (VNx6DF "VNx3TI")
+
+			 (VNx64QI "VNx4TI") (VNx32HI "VNx4TI")
+			 (VNx16SI "VNx4TI") (VNx8DI  "VNx4TI")
+			 (VNx32BF "VNx4TI") (VNx32HF "VNx4TI")
+			 (VNx16SF "VNx4TI") (VNx8DF "VNx4TI")])
+
+;; The Advanced SIMD modes of popcount corresponding to scalar modes.
+(define_mode_attr VEC_POP_MODE [(QI "V8QI") (HI "V4HI")
+				(SI "V2SI") (DI "V1DI")])
+
+;; ...and again in lower case.
+(define_mode_attr vec_pop_mode [(QI "v8qi") (HI "v4hi")
+				(SI "v2si") (DI "v1di")])
+
 ;; On AArch64 the By element instruction doesn't have a 2S variant.
 ;; However because the instruction always selects a pair of values
 ;; The normal 3SAME instruction can be used here instead.
@@ -2370,24 +2660,28 @@
 (define_mode_attr data_bytes [(VNx16BI "1") (VNx8BI "2")
 			      (VNx4BI "4") (VNx2BI "8")])
 
-;; Two-nybble mask for partial vector modes: nunits, byte size.
-(define_mode_attr self_mask [(VNx8QI "0x81")
-			     (VNx4QI "0x41")
-			     (VNx2QI "0x21")
-			     (VNx4HI "0x42")
-			     (VNx2HI "0x22")
-			     (VNx2SI "0x24")])
-
-;; For SVE_HSDI vector modes, the mask of narrower modes, encoded as above.
-(define_mode_attr narrower_mask [(VNx8HI "0x81") (VNx4HI "0x41")
-				 (VNx2HI "0x21")
-				 (VNx4SI "0x43") (VNx2SI "0x23")
-				 (VNx2DI "0x27")])
+;; Two-nybble mask for vector modes: nunits, byte size.
+(define_mode_attr self_mask [(VNx2HI "0x22") (VNx2HF "0x22")
+			     (VNx4HI "0x42") (VNx4HF "0x42")
+			     (VNx8HI "0x82") (VNx8HF "0x82")
+			     (VNx2SI "0x24") (VNx2SF "0x24")
+			     (VNx4SI "0x44") (VNx4SF "0x44")
+			     (VNx2DI "0x28") (VNx2DF "0x28")
+			     (VNx8QI "0x81") (VNx4QI "0x41") (VNx2QI "0x21")])
+
+;; The mask of narrower vector modes, encoded as above.
+(define_mode_attr narrower_mask [(VNx8HI "0x81") (VNx8HF "0x81")
+				 (VNx4HI "0x41") (VNx4HF "0x41")
+				 (VNx2HI "0x21") (VNx2HF "0x21")
+				 (VNx4SI "0x43") (VNx4SF "0x43")
+				 (VNx2SI "0x23") (VNx2SF "0x23")
+				 (VNx2DI "0x27") (VNx2DF "0x27")])
 
 ;; The constraint to use for an SVE [SU]DOT, FMUL, FMLA or FMLS lane index.
 (define_mode_attr sve_lane_con [(VNx8HI "y") (VNx4SI "y") (VNx2DI "x")
 							  (V2DI "x")
-				(VNx8HF "y") (VNx4SF "y") (VNx2DF "x")])
+				(VNx8BF "y") (VNx8HF "y")
+				(VNx4SF "y") (VNx2DF "x")])
 
 ;; The constraint to use for an SVE FCMLA lane index.
 (define_mode_attr sve_lane_pair_con [(VNx8HF "y") (VNx4SF "x")])
@@ -2396,9 +2690,17 @@
 				 (V8HI "vec") (V2SI "vec") (V4SI "vec")
 				 (V2DI "vec") (DI "offset")])
 
-(define_mode_attr b [(VNx8BF "b") (VNx8HF "") (VNx4SF "") (VNx2DF "")
-		     (VNx16BF "b") (VNx16HF "")
-		     (VNx32BF "b") (VNx32HF "")])
+(define_mode_attr b [(V4BF "b") (V4HF "") (V8BF "b") (V8HF "")
+		     (VNx2BF "b") (VNx2HF "") (VNx2SF "")
+		     (VNx4BF "b") (VNx4HF "") (VNx4SF "")
+		     (VNx8BF "b") (VNx8HF "") (VNx2DF "")
+		     (VNx16BF "b") (VNx16HF "") (VNx8SF "") (VNx4DF "")
+		     (VNx32BF "b") (VNx32HF "") (VNx16SF "") (VNx8DF "")])
+
+(define_mode_attr is_bf16 [(VNx2BF "true") (VNx4BF "true") (VNx8BF "true")
+			   (VNx2HF "false") (VNx4HF "false") (VNx8HF "false")
+			   (VNx2SF "false") (VNx4SF "false")
+			   (VNx2DF "false")])
 
 (define_mode_attr aligned_operand [(VNx16QI "register_operand")
 				   (VNx8HI "register_operand")
@@ -2420,6 +2722,9 @@
 			       (VNx64QI "Uw4") (VNx32HI "Uw4")
 			       (VNx32BF "Uw4") (VNx32HF "Uw4")])
 
+(define_mode_attr LD1_EXTENDQ_MEM [(VNx4SI "VNx1SI") (VNx4SF "VNx1SI")
+				   (VNx2DI "VNx1DI") (VNx2DF "VNx1DI")])
+
 ;; -------------------------------------------------------------------
 ;; Code Iterators
 ;; -------------------------------------------------------------------
@@ -2438,6 +2743,10 @@
 ;; Code iterator for logical operations
 (define_code_iterator LOGICAL [and ior xor])
 
+;; Code iterator for operations that are equivalent when the
+;; two input operands are known have disjoint bits set.
+(define_code_iterator any_or_plus [plus ior xor])
+
 ;; LOGICAL with plus, for when | gets converted to +.
 (define_code_iterator LOGICAL_OR_PLUS [and ior xor plus])
 
@@ -2652,6 +2961,36 @@
 			  (geu "hs")
 			  (gtu "hi")])
 
+(define_code_attr inv_cmp_op [(lt "ge")
+			  (le "gt")
+			  (eq "ne")
+			  (ne "eq")
+			  (ge "lt")
+			  (gt "le")
+			  (ltu "hs")
+			  (leu "hi")
+			  (geu "lo")
+			  (gtu "ls")])
+
+(define_mode_attr cmpbr_suffix [(QI "b") (HI "h")])
+
+(define_code_iterator INT_CMP [lt le eq ne ge gt ltu leu geu gtu])
+
+(define_code_attr cmpbr_imm_constraint [
+    (eq "Uc0")
+    (ne "Uc0")
+    (gt "Uc0")
+    (gtu "Uc0")
+    (lt "Uc0")
+    (ltu "Uc0")
+
+    (ge "Uc1")
+    (geu "Uc1")
+
+    (le "Uc2")
+    (leu "Uc2")
+])
+
 (define_code_attr fix_trunc_optab [(fix "fix_trunc")
 				   (unsigned_fix "fixuns_trunc")])
 
@@ -2874,6 +3213,12 @@
 (define_code_attr inc_dec [(minus "dec") (ss_minus "sqdec") (us_minus "uqdec")
 			   (plus "inc") (ss_plus "sqinc") (us_plus "uqinc")])
 
+;; The predicated FP operation associated with each rtl code.  This is only
+;; useful for operations that have both predicated and unpredicated forms.
+(define_code_attr SVE_COND_FP [(plus "UNSPEC_COND_FADD")
+			       (minus "UNSPEC_COND_FSUB")
+			       (mult "UNSPEC_COND_FMUL")])
+
 ;; -------------------------------------------------------------------
 ;; Int Iterators.
 ;; -------------------------------------------------------------------
@@ -2945,6 +3290,21 @@
 			      UNSPEC_TRN1 UNSPEC_TRN2
 			      UNSPEC_UZP1 UNSPEC_UZP2])
 
+(define_int_iterator SVE_PERMUTE
+  [PERMUTE
+   (UNSPEC_UZPQ1 "TARGET_SVE2p1 && TARGET_NON_STREAMING")
+   (UNSPEC_UZPQ2 "TARGET_SVE2p1 && TARGET_NON_STREAMING")
+   (UNSPEC_ZIPQ1 "TARGET_SVE2p1 && TARGET_NON_STREAMING")
+   (UNSPEC_ZIPQ2 "TARGET_SVE2p1 && TARGET_NON_STREAMING")])
+
+(define_int_iterator SVE_TBL
+  [UNSPEC_TBL
+   (UNSPEC_TBLQ "TARGET_SVE2p1 && TARGET_NON_STREAMING")])
+
+(define_int_iterator SVE_TBX
+  [UNSPEC_TBX
+   (UNSPEC_TBXQ "TARGET_SVE2p1 && TARGET_NON_STREAMING")])
+
 (define_int_iterator PERMUTEQ [UNSPEC_ZIP1Q UNSPEC_ZIP2Q
 			       UNSPEC_TRN1Q UNSPEC_TRN2Q
 			       UNSPEC_UZP1Q UNSPEC_UZP2Q])
@@ -2999,6 +3359,10 @@
 
 (define_int_iterator LAST [UNSPEC_LASTA UNSPEC_LASTB])
 
+;; Iterators for fp8 operations
+
+(define_int_iterator FAMINMAX_UNS [UNSPEC_FAMAX UNSPEC_FAMIN])
+
 (define_int_iterator SVE_INT_UNARY [UNSPEC_REVB
 				    UNSPEC_REVH UNSPEC_REVW])
 
@@ -3022,19 +3386,19 @@
 					  UNSPEC_FMIN UNSPEC_FMINNM])
 
 (define_int_iterator SVE_BFLOAT_TERNARY_LONG
-  [UNSPEC_BFDOT
-   UNSPEC_BFMLALB
-   UNSPEC_BFMLALT
-   (UNSPEC_BFMLSLB "TARGET_SME2 && TARGET_STREAMING_SME")
-   (UNSPEC_BFMLSLT "TARGET_SME2 && TARGET_STREAMING_SME")
-   (UNSPEC_BFMMLA "TARGET_NON_STREAMING")])
+  [(UNSPEC_BFDOT "TARGET_SVE_BF16")
+   (UNSPEC_BFMLALB "TARGET_SVE_BF16")
+   (UNSPEC_BFMLALT "TARGET_SVE_BF16")
+   (UNSPEC_BFMLSLB "TARGET_SVE2p1_OR_SME2")
+   (UNSPEC_BFMLSLT "TARGET_SVE2p1_OR_SME2")
+   (UNSPEC_BFMMLA "TARGET_SVE_BF16 && TARGET_NON_STREAMING")])
 
 (define_int_iterator SVE_BFLOAT_TERNARY_LONG_LANE
-  [UNSPEC_BFDOT
-   UNSPEC_BFMLALB
-   UNSPEC_BFMLALT
-   (UNSPEC_BFMLSLB "TARGET_SME2 && TARGET_STREAMING_SME")
-   (UNSPEC_BFMLSLT "TARGET_SME2 && TARGET_STREAMING_SME")])
+  [(UNSPEC_BFDOT "TARGET_SVE_BF16")
+   (UNSPEC_BFMLALB "TARGET_SVE_BF16")
+   (UNSPEC_BFMLALT "TARGET_SVE_BF16")
+   (UNSPEC_BFMLSLB "TARGET_SVE2p1_OR_SME2")
+   (UNSPEC_BFMLSLT "TARGET_SVE2p1_OR_SME2")])
 
 (define_int_iterator SVE_INT_REDUCTION [UNSPEC_ANDV
 					UNSPEC_IORV
@@ -3044,12 +3408,27 @@
 					UNSPEC_UMINV
 					UNSPEC_XORV])
 
+(define_int_iterator SVE_INT_REDUCTION_128 [UNSPEC_ADDQV
+					    UNSPEC_ANDQV
+					    UNSPEC_EORQV
+					    UNSPEC_ORQV
+					    UNSPEC_SMAXQV
+					    UNSPEC_SMINQV
+					    UNSPEC_UMAXQV
+					    UNSPEC_UMINQV])
+
 (define_int_iterator SVE_FP_REDUCTION [UNSPEC_FADDV
 				       UNSPEC_FMAXV
 				       UNSPEC_FMAXNMV
 				       UNSPEC_FMINV
 				       UNSPEC_FMINNMV])
 
+(define_int_iterator SVE_FP_REDUCTION_128 [UNSPEC_FADDQV
+					   UNSPEC_FMAXQV
+					   UNSPEC_FMAXNMQV
+					   UNSPEC_FMINQV
+					   UNSPEC_FMINNMQV])
+
 (define_int_iterator SVE_COND_FP_UNARY [UNSPEC_COND_FABS
 					UNSPEC_COND_FNEG
 					UNSPEC_COND_FRECPX
@@ -3079,15 +3458,20 @@
 (define_int_iterator SVE_COND_FCVTI [UNSPEC_COND_FCVTZS UNSPEC_COND_FCVTZU])
 (define_int_iterator SVE_COND_ICVTF [UNSPEC_COND_SCVTF UNSPEC_COND_UCVTF])
 
-(define_int_iterator SVE_COND_FP_BINARY [UNSPEC_COND_FADD
-					 UNSPEC_COND_FDIV
-					 UNSPEC_COND_FMAX
-					 UNSPEC_COND_FMAXNM
-					 UNSPEC_COND_FMIN
-					 UNSPEC_COND_FMINNM
-					 UNSPEC_COND_FMUL
-					 UNSPEC_COND_FMULX
-					 UNSPEC_COND_FSUB])
+(define_int_iterator SVE_COND_FP_BINARY
+  [UNSPEC_COND_FADD
+   (UNSPEC_COND_FAMAX "TARGET_FAMINMAX && TARGET_SVE2_OR_SME2")
+   (UNSPEC_COND_FAMIN "TARGET_FAMINMAX && TARGET_SVE2_OR_SME2")
+   UNSPEC_COND_FDIV
+   UNSPEC_COND_FMAX
+   UNSPEC_COND_FMAXNM
+   UNSPEC_COND_FMIN
+   UNSPEC_COND_FMINNM
+   UNSPEC_COND_FMUL
+   UNSPEC_COND_FMULX
+   UNSPEC_COND_FSUB
+   UNSPEC_COND_SMAX
+   UNSPEC_COND_SMIN])
 
 ;; Same as SVE_COND_FP_BINARY, but without codes that have a dedicated
 ;; <optab><mode>3 expander.
@@ -3098,7 +3482,9 @@
 					       UNSPEC_COND_FMINNM
 					       UNSPEC_COND_FMUL
 					       UNSPEC_COND_FMULX
-					       UNSPEC_COND_FSUB])
+					       UNSPEC_COND_FSUB
+					       UNSPEC_COND_SMAX
+					       UNSPEC_COND_SMIN])
 
 (define_int_iterator SVE_COND_FP_BINARY_INT [UNSPEC_COND_FSCALE])
 
@@ -3110,10 +3496,15 @@
 					    UNSPEC_COND_FMAXNM
 					    UNSPEC_COND_FMIN
 					    UNSPEC_COND_FMINNM
-					    UNSPEC_COND_FMUL])
+					    UNSPEC_COND_FMUL
+					    UNSPEC_COND_SMAX
+					    UNSPEC_COND_SMIN])
 
-(define_int_iterator SVE_COND_FP_BINARY_REG [UNSPEC_COND_FDIV
-					     UNSPEC_COND_FMULX])
+(define_int_iterator SVE_COND_FP_BINARY_REG
+  [(UNSPEC_COND_FAMAX "TARGET_FAMINMAX && TARGET_SVE2_OR_SME2")
+   (UNSPEC_COND_FAMIN "TARGET_FAMINMAX && TARGET_SVE2_OR_SME2")
+   UNSPEC_COND_FDIV
+   UNSPEC_COND_FMULX])
 
 (define_int_iterator SVE_COND_FCADD [UNSPEC_COND_FCADD90
 				     UNSPEC_COND_FCADD270])
@@ -3121,12 +3512,12 @@
 (define_int_iterator SVE_COND_FP_MAXMIN [UNSPEC_COND_FMAX
 					 UNSPEC_COND_FMAXNM
 					 UNSPEC_COND_FMIN
-					 UNSPEC_COND_FMINNM])
+					 UNSPEC_COND_FMINNM
+					 UNSPEC_COND_SMAX
+					 UNSPEC_COND_SMIN])
 
-;; Floating-point max/min operations that correspond to optabs,
-;; as opposed to those that are internal to the port.
-(define_int_iterator SVE_COND_FP_MAXMIN_PUBLIC [UNSPEC_COND_FMAXNM
-						UNSPEC_COND_FMINNM])
+(define_int_iterator SVE_COND_SMAXMIN [UNSPEC_COND_SMAX
+				       UNSPEC_COND_SMIN])
 
 (define_int_iterator SVE_COND_FP_TERNARY [UNSPEC_COND_FMLA
 					  UNSPEC_COND_FMLS
@@ -3189,10 +3580,6 @@
 
 (define_int_iterator SVE_LDFF1_LDNF1 [UNSPEC_LDFF1 UNSPEC_LDNF1])
 
-(define_int_iterator SVE_PRED_LOAD [UNSPEC_PRED_X UNSPEC_LD1_SVE])
-
-(define_int_attr pred_load [(UNSPEC_PRED_X "_x") (UNSPEC_LD1_SVE "")])
-
 (define_int_iterator LD1_COUNT [UNSPEC_LD1_COUNT UNSPEC_LDNT1_COUNT])
 
 (define_int_iterator ST1_COUNT [UNSPEC_ST1_COUNT UNSPEC_STNT1_COUNT])
@@ -3298,12 +3685,13 @@
 						 UNSPEC_UQRSHRNT
 						 UNSPEC_UQSHRNT])
 
-(define_int_iterator SVE2_INT_SHIFT_IMM_NARROWxN [UNSPEC_SQRSHR
-						  UNSPEC_SQRSHRN
-						  UNSPEC_SQRSHRU
-						  UNSPEC_SQRSHRUN
-						  UNSPEC_UQRSHR
-						  UNSPEC_UQRSHRN])
+(define_int_iterator SVE2_INT_SHIFT_IMM_NARROWxN
+  [(UNSPEC_SQRSHR "TARGET_STREAMING_SME2")
+   (UNSPEC_SQRSHRN "TARGET_SVE2p1_OR_SME2")
+   (UNSPEC_SQRSHRU "TARGET_STREAMING_SME2")
+   (UNSPEC_SQRSHRUN "TARGET_SVE2p1_OR_SME2")
+   (UNSPEC_UQRSHR "TARGET_STREAMING_SME2")
+   (UNSPEC_UQRSHRN "TARGET_SVE2p1_OR_SME2")])
 
 (define_int_iterator SVE2_INT_SHIFT_INSERT [UNSPEC_SLI UNSPEC_SRI])
 
@@ -3448,9 +3836,12 @@
 
 (define_int_iterator SVE2_PMULL_PAIR [UNSPEC_PMULLB_PAIR UNSPEC_PMULLT_PAIR])
 
-(define_int_iterator SVE_QCVTxN [UNSPEC_SQCVT UNSPEC_SQCVTN
-				 UNSPEC_SQCVTU UNSPEC_SQCVTUN
-				 UNSPEC_UQCVT UNSPEC_UQCVTN])
+(define_int_iterator SVE_QCVTxN [(UNSPEC_SQCVT "TARGET_STREAMING_SME2")
+				 (UNSPEC_SQCVTN "TARGET_SVE2p1_OR_SME2")
+				 (UNSPEC_SQCVTU "TARGET_STREAMING_SME2")
+				 (UNSPEC_SQCVTUN "TARGET_SVE2p1_OR_SME2")
+				 (UNSPEC_UQCVT "TARGET_STREAMING_SME2")
+				 (UNSPEC_UQCVTN "TARGET_SVE2p1_OR_SME2")])
 
 (define_int_iterator SVE2_SFx24_UNARY [UNSPEC_FRINTA UNSPEC_FRINTM
 				       UNSPEC_FRINTN UNSPEC_FRINTP])
@@ -3496,9 +3887,10 @@
 (define_int_iterator UNSPEC_REVD_ONLY [UNSPEC_REVD])
 
 (define_int_iterator SME_LD1 [UNSPEC_SME_LD1_HOR UNSPEC_SME_LD1_VER])
-(define_int_iterator SME_READ [UNSPEC_SME_READ_HOR UNSPEC_SME_READ_VER])
+(define_int_iterator SME_READ_HV [UNSPEC_SME_READ_HOR UNSPEC_SME_READ_VER])
+(define_int_iterator SME_READZ_HV [UNSPEC_SME_READZ_HOR UNSPEC_SME_READZ_VER])
 (define_int_iterator SME_ST1 [UNSPEC_SME_ST1_HOR UNSPEC_SME_ST1_VER])
-(define_int_iterator SME_WRITE [UNSPEC_SME_WRITE_HOR UNSPEC_SME_WRITE_VER])
+(define_int_iterator SME_WRITE_HV [UNSPEC_SME_WRITE_HOR UNSPEC_SME_WRITE_VER])
 
 (define_int_iterator SME_BINARY_SDI [UNSPEC_SME_ADDHA UNSPEC_SME_ADDVA])
 
@@ -3516,7 +3908,7 @@
 
 (define_int_iterator SME_BINARY_SLICE_SDI [UNSPEC_SME_ADD UNSPEC_SME_SUB])
 
-(define_int_iterator SME_BINARY_SLICE_SDF [UNSPEC_SME_FADD UNSPEC_SME_FSUB])
+(define_int_iterator SME_BINARY_SLICE_HSDF [UNSPEC_SME_FADD UNSPEC_SME_FSUB])
 
 (define_int_iterator SME_BINARY_WRITE_SLICE_SDI [UNSPEC_SME_ADD_WRITE
 						 UNSPEC_SME_SUB_WRITE])
@@ -3544,24 +3936,64 @@
  [UNSPECV_ATOMIC_LDOP_OR UNSPECV_ATOMIC_LDOP_BIC
   UNSPECV_ATOMIC_LDOP_XOR UNSPECV_ATOMIC_LDOP_PLUS])
 
-(define_int_attr atomic_ldop
- [(UNSPECV_ATOMIC_LDOP_OR "set") (UNSPECV_ATOMIC_LDOP_BIC "clr")
-  (UNSPECV_ATOMIC_LDOP_XOR "eor") (UNSPECV_ATOMIC_LDOP_PLUS "add")])
-
-(define_int_attr atomic_ldoptab
- [(UNSPECV_ATOMIC_LDOP_OR "ior") (UNSPECV_ATOMIC_LDOP_BIC "bic")
-  (UNSPECV_ATOMIC_LDOP_XOR "xor") (UNSPECV_ATOMIC_LDOP_PLUS "add")])
-
 (define_int_iterator SUBDI_BITS [8 16 32])
 
 (define_int_iterator BHSD_BITS [8 16 32 64])
 
 (define_int_iterator LUTI_BITS [2 4])
 
+(define_int_iterator GET_FPSCR
+  [UNSPECV_GET_FPSR UNSPECV_GET_FPCR])
+
+(define_int_iterator SET_FPSCR
+  [UNSPECV_SET_FPSR UNSPECV_SET_FPCR])
+
+(define_int_iterator FP8CVT_UNS
+  [UNSPEC_F1CVT
+   UNSPEC_F2CVT
+   UNSPEC_F1CVTLT
+   UNSPEC_F2CVTLT])
+
+(define_int_iterator FMLAL_FP8_HF
+  [UNSPEC_FMLALB_FP8
+   UNSPEC_FMLALT_FP8])
+
+(define_int_iterator FMLALL_FP8_SF
+  [UNSPEC_FMLALLBB_FP8
+   UNSPEC_FMLALLBT_FP8
+   UNSPEC_FMLALLTB_FP8
+   UNSPEC_FMLALLTT_FP8])
+
+;; Iterators for fpm instructions
+
+(define_int_iterator FPM_UNARY_UNS [UNSPEC_F1CVTL_FP8 UNSPEC_F2CVTL_FP8])
+
+(define_int_iterator FPM_BINARY_UNS [UNSPEC_FCVTN_FP8])
+
+(define_int_iterator FSCALE_UNS [UNSPEC_FSCALE])
+
+(define_int_iterator FPM_FDOT [UNSPEC_FDOT_FP8])
+(define_int_iterator FPM_FDOT_LANE [UNSPEC_FDOT_LANE_FP8])
+
 ;; -------------------------------------------------------------------
 ;; Int Iterators Attributes.
 ;; -------------------------------------------------------------------
 
+;; The AArch64 insn mnemonic associated with an unspec.
+(define_int_attr insn
+  [(UNSPEC_F1CVTL_FP8 "f1cvtl")
+   (UNSPEC_F2CVTL_FP8 "f2cvtl")
+   (UNSPEC_FCVTN_FP8 "fcvtn")
+   (UNSPEC_FDOT_FP8 "fdot")
+   (UNSPEC_FDOT_LANE_FP8 "fdot")
+   (UNSPEC_FMLALB_FP8 "fmlalb")
+   (UNSPEC_FMLALT_FP8 "fmlalt")
+   (UNSPEC_FMLALLBB_FP8 "fmlallbb")
+   (UNSPEC_FMLALLBT_FP8 "fmlallbt")
+   (UNSPEC_FMLALLTB_FP8 "fmlalltb")
+   (UNSPEC_FMLALLTT_FP8 "fmlalltt")
+   (UNSPEC_FSCALE "fscale")])
+
 ;; The optab associated with an operation.  Note that for ANDF, IORF
 ;; and XORF, the optab pattern is not actually defined; we just use this
 ;; name for consistency with the integer patterns.
@@ -3585,6 +4017,8 @@
 			(UNSPEC_UMINV "umin")
 			(UNSPEC_SMAXV "smax")
 			(UNSPEC_SMINV "smin")
+			(UNSPEC_ADDQV "addqv")
+			(UNSPEC_ANDQV "andqv")
 			(UNSPEC_CADD90 "cadd90")
 			(UNSPEC_CADD270 "cadd270")
 			(UNSPEC_CDOT "cdot")
@@ -3595,9 +4029,15 @@
 			(UNSPEC_CMLA90 "cmla90")
 			(UNSPEC_CMLA180 "cmla180")
 			(UNSPEC_CMLA270 "cmla270")
+			(UNSPEC_EORQV "eorqv")
 			(UNSPEC_FADDV "plus")
+			(UNSPEC_FADDQV "faddqv")
+			(UNSPEC_FMAXQV "fmaxqv")
+			(UNSPEC_FMAXNMQV "fmaxnmqv")
 			(UNSPEC_FMAXNMV "smax")
 			(UNSPEC_FMAXV "smax_nan")
+			(UNSPEC_FMINQV "fminqv")
+			(UNSPEC_FMINNMQV "fminnmqv")
 			(UNSPEC_FMINNMV "smin")
 			(UNSPEC_FMINV "smin_nan")
 		        (UNSPEC_SMUL_HIGHPART "smulh")
@@ -3613,11 +4053,16 @@
 			(UNSPEC_FTSSEL "ftssel")
 			(UNSPEC_LD1_COUNT "ld1")
 			(UNSPEC_LDNT1_COUNT "ldnt1")
+			(UNSPEC_ORQV "orqv")
 			(UNSPEC_PMULLB "pmullb")
 			(UNSPEC_PMULLB_PAIR "pmullb_pair")
 			(UNSPEC_PMULLT "pmullt")
 			(UNSPEC_PMULLT_PAIR "pmullt_pair")
 			(UNSPEC_SMATMUL "smatmul")
+			(UNSPEC_SMAXQV "smaxqv")
+			(UNSPEC_SMINQV "sminqv")
+			(UNSPEC_UMAXQV "umaxqv")
+			(UNSPEC_UMINQV "uminqv")
 			(UNSPEC_UZP "uzp")
 			(UNSPEC_UZPQ "uzpq")
 			(UNSPEC_ZIP "zip")
@@ -3640,6 +4085,8 @@
 			(UNSPEC_SME_LD1_VER "ld1_ver")
 			(UNSPEC_SME_READ_HOR "read_hor")
 			(UNSPEC_SME_READ_VER "read_ver")
+			(UNSPEC_SME_READZ_HOR "readz_hor")
+			(UNSPEC_SME_READZ_VER "readz_ver")
 			(UNSPEC_SME_SDOT "sdot")
 			(UNSPEC_SME_SVDOT "svdot")
 			(UNSPEC_SME_SMLA "smla")
@@ -3692,6 +4139,8 @@
 			(UNSPEC_ZIP2Q "zip2q")
 			(UNSPEC_COND_FABS "abs")
 			(UNSPEC_COND_FADD "add")
+			(UNSPEC_COND_FAMAX "famax")
+			(UNSPEC_COND_FAMIN "famin")
 			(UNSPEC_COND_FCADD90 "cadd90")
 			(UNSPEC_COND_FCADD270 "cadd270")
 			(UNSPEC_COND_FCMLA "fcmla")
@@ -3703,9 +4152,9 @@
 			(UNSPEC_COND_FCVTZU "fixuns_trunc")
 			(UNSPEC_COND_FDIV "div")
 			(UNSPEC_COND_FMAX "fmax_nan")
-			(UNSPEC_COND_FMAXNM "smax")
+			(UNSPEC_COND_FMAXNM "fmax")
 			(UNSPEC_COND_FMIN "fmin_nan")
-			(UNSPEC_COND_FMINNM "smin")
+			(UNSPEC_COND_FMINNM "fmin")
 			(UNSPEC_COND_FMLA "fma")
 			(UNSPEC_COND_FMLS "fnma")
 			(UNSPEC_COND_FMUL "mul")
@@ -3725,6 +4174,8 @@
 			(UNSPEC_COND_FSQRT "sqrt")
 			(UNSPEC_COND_FSUB "sub")
 			(UNSPEC_COND_SCVTF "float")
+			(UNSPEC_COND_SMAX "smax")
+			(UNSPEC_COND_SMIN "smin")
 			(UNSPEC_COND_UCVTF "floatuns")])
 
 (define_int_attr fmaxmin [(UNSPEC_FMAX "fmax_nan")
@@ -3732,9 +4183,7 @@
 			  (UNSPEC_FMAXNMV "fmax")
 			  (UNSPEC_FMIN "fmin_nan")
 			  (UNSPEC_FMINNM "fmin")
-			  (UNSPEC_FMINNMV "fmin")
-			  (UNSPEC_COND_FMAXNM "fmax")
-			  (UNSPEC_COND_FMINNM "fmin")])
+			  (UNSPEC_FMINNMV "fmin")])
 
 (define_int_attr  maxmin_uns_op [(UNSPEC_UMAXV "umax")
 				 (UNSPEC_UMINV "umin")
@@ -3757,6 +4206,8 @@
 (define_code_attr binqops_op_rev [(ss_plus "sqsub")
 				  (ss_minus "sqadd")])
 
+(define_code_attr faminmax_op [(smax "famax") (smin "famin")])
+
 ;; The SVE logical instruction that implements an unspec.
 (define_int_attr logicalf_op [(UNSPEC_ANDF "and")
 		 	      (UNSPEC_IORF "orr")
@@ -3909,12 +4360,16 @@
 
 (define_int_attr perm_insn [(UNSPEC_ZIP1 "zip1") (UNSPEC_ZIP2 "zip2")
 			    (UNSPEC_ZIP1Q "zip1") (UNSPEC_ZIP2Q "zip2")
+			    (UNSPEC_ZIPQ1 "zipq1") (UNSPEC_ZIPQ2 "zipq2")
 			    (UNSPEC_TRN1 "trn1") (UNSPEC_TRN2 "trn2")
 			    (UNSPEC_TRN1Q "trn1") (UNSPEC_TRN2Q "trn2")
 			    (UNSPEC_UZP1 "uzp1") (UNSPEC_UZP2 "uzp2")
 			    (UNSPEC_UZP1Q "uzp1") (UNSPEC_UZP2Q "uzp2")
+			    (UNSPEC_UZPQ1 "uzpq1") (UNSPEC_UZPQ2 "uzpq2")
 			    (UNSPEC_UZP "uzp") (UNSPEC_UZPQ "uzp")
-			    (UNSPEC_ZIP "zip") (UNSPEC_ZIPQ "zip")])
+			    (UNSPEC_ZIP "zip") (UNSPEC_ZIPQ "zip")
+			    (UNSPEC_TBL "tbl") (UNSPEC_TBLQ "tblq")
+			    (UNSPEC_TBX "tbx") (UNSPEC_TBXQ "tbxq")])
 
 ; op code for REV instructions (size within which elements are reversed).
 (define_int_attr rev_op [(UNSPEC_REV64 "64") (UNSPEC_REV32 "32")
@@ -3965,6 +4420,12 @@
 (define_int_attr frintnzs_op [(UNSPEC_FRINT32Z "frint32z") (UNSPEC_FRINT32X "frint32x")
 			      (UNSPEC_FRINT64Z "frint64z") (UNSPEC_FRINT64X "frint64x")])
 
+(define_int_attr faminmax_cond_uns_op
+  [(UNSPEC_COND_SMAX "famax") (UNSPEC_COND_SMIN "famin")])
+
+(define_int_attr faminmax_uns_op
+  [(UNSPEC_FAMAX "famax") (UNSPEC_FAMIN "famin")])
+
 ;; The condition associated with an UNSPEC_COND_<xx>.
 (define_int_attr cmp_op [(UNSPEC_COND_CMPEQ_WIDE "eq")
 			 (UNSPEC_COND_CMPGE_WIDE "ge")
@@ -4228,6 +4689,8 @@
 			    (UNSPEC_FTSSEL "ftssel")
 			    (UNSPEC_COND_FABS "fabs")
 			    (UNSPEC_COND_FADD "fadd")
+			    (UNSPEC_COND_FAMAX "famax")
+			    (UNSPEC_COND_FAMIN "famin")
 			    (UNSPEC_COND_FCVTLT "fcvtlt")
 			    (UNSPEC_COND_FCVTX "fcvtx")
 			    (UNSPEC_COND_FDIV "fdiv")
@@ -4249,9 +4712,13 @@
 			    (UNSPEC_COND_FRINTZ "frintz")
 			    (UNSPEC_COND_FSCALE "fscale")
 			    (UNSPEC_COND_FSQRT "fsqrt")
-			    (UNSPEC_COND_FSUB "fsub")])
+			    (UNSPEC_COND_FSUB "fsub")
+			    (UNSPEC_COND_SMAX "fmaxnm")
+			    (UNSPEC_COND_SMIN "fminnm")])
 
 (define_int_attr sve_fp_op_rev [(UNSPEC_COND_FADD "fadd")
+				(UNSPEC_COND_FAMAX "famax")
+				(UNSPEC_COND_FAMIN "famin")
 				(UNSPEC_COND_FDIV "fdivr")
 				(UNSPEC_COND_FMAX "fmax")
 				(UNSPEC_COND_FMAXNM "fmaxnm")
@@ -4259,7 +4726,9 @@
 				(UNSPEC_COND_FMINNM "fminnm")
 				(UNSPEC_COND_FMUL "fmul")
 				(UNSPEC_COND_FMULX "fmulx")
-				(UNSPEC_COND_FSUB "fsubr")])
+				(UNSPEC_COND_FSUB "fsubr")
+				(UNSPEC_COND_SMAX "fmaxnm")
+				(UNSPEC_COND_SMIN "fminnm")])
 
 (define_int_attr sme_int_op [(UNSPEC_SME_ADD_WRITE "add")
 			     (UNSPEC_SME_SUB_WRITE "sub")])
@@ -4376,6 +4845,45 @@
 			      (UNSPEC_COND_FNMLA "fnmad")
 			      (UNSPEC_COND_FNMLS "fnmsb")])
 
+(define_int_attr supports_bf16 [(UNSPEC_COND_FADD "true")
+				(UNSPEC_COND_FAMAX "false")
+				(UNSPEC_COND_FAMIN "false")
+				(UNSPEC_COND_FDIV "false")
+				(UNSPEC_COND_FMAX "true")
+				(UNSPEC_COND_FMAXNM "true")
+				(UNSPEC_COND_FMIN "true")
+				(UNSPEC_COND_FMINNM "true")
+				(UNSPEC_COND_FMLA "true")
+				(UNSPEC_COND_FMLS "true")
+				(UNSPEC_COND_FMUL "true")
+				(UNSPEC_COND_FMULX "false")
+				(UNSPEC_COND_FMULX "false")
+				(UNSPEC_COND_FNMLA "false")
+				(UNSPEC_COND_FNMLS "false")
+				(UNSPEC_COND_FSUB "true")
+				(UNSPEC_COND_SMAX "true")
+				(UNSPEC_COND_SMIN "true")])
+
+;; Differs from supports_bf16 only in UNSPEC_COND_FSUB.
+(define_int_attr supports_bf16_rev [(UNSPEC_COND_FADD "true")
+				    (UNSPEC_COND_FAMAX "false")
+				    (UNSPEC_COND_FAMIN "false")
+				    (UNSPEC_COND_FDIV "false")
+				    (UNSPEC_COND_FMAX "true")
+				    (UNSPEC_COND_FMAXNM "true")
+				    (UNSPEC_COND_FMIN "true")
+				    (UNSPEC_COND_FMINNM "true")
+				    (UNSPEC_COND_FMLA "true")
+				    (UNSPEC_COND_FMLS "true")
+				    (UNSPEC_COND_FMUL "true")
+				    (UNSPEC_COND_FMULX "false")
+				    (UNSPEC_COND_FMULX "false")
+				    (UNSPEC_COND_FNMLA "false")
+				    (UNSPEC_COND_FNMLS "false")
+				    (UNSPEC_COND_FSUB "false")
+				    (UNSPEC_COND_SMAX "true")
+				    (UNSPEC_COND_SMIN "true")])
+
 ;; The register constraint to use for the final operand in a binary BRK.
 (define_int_attr brk_reg_con [(UNSPEC_BRKN "0")
 			      (UNSPEC_BRKPA "Upa") (UNSPEC_BRKPB "Upa")])
@@ -4388,6 +4896,8 @@
 ;; <optab><mode>3 pattern.
 (define_int_attr sve_pred_fp_rhs1_operand
   [(UNSPEC_COND_FADD "register_operand")
+   (UNSPEC_COND_FAMAX "register_operand")
+   (UNSPEC_COND_FAMIN "register_operand")
    (UNSPEC_COND_FDIV "register_operand")
    (UNSPEC_COND_FMAX "register_operand")
    (UNSPEC_COND_FMAXNM "register_operand")
@@ -4395,12 +4905,16 @@
    (UNSPEC_COND_FMINNM "register_operand")
    (UNSPEC_COND_FMUL "register_operand")
    (UNSPEC_COND_FMULX "register_operand")
-   (UNSPEC_COND_FSUB "aarch64_sve_float_arith_operand")])
+   (UNSPEC_COND_FSUB "aarch64_sve_float_arith_operand")
+   (UNSPEC_COND_SMAX "register_operand")
+   (UNSPEC_COND_SMIN "register_operand")])
 
 ;; The predicate to use for the second input operand in a floating-point
 ;; <optab><mode>3 pattern.
 (define_int_attr sve_pred_fp_rhs2_operand
   [(UNSPEC_COND_FADD "aarch64_sve_float_arith_with_sub_operand")
+   (UNSPEC_COND_FAMAX "register_operand")
+   (UNSPEC_COND_FAMIN "register_operand")
    (UNSPEC_COND_FDIV "register_operand")
    (UNSPEC_COND_FMAX "aarch64_sve_float_maxmin_operand")
    (UNSPEC_COND_FMAXNM "aarch64_sve_float_maxmin_operand")
@@ -4408,7 +4922,9 @@
    (UNSPEC_COND_FMINNM "aarch64_sve_float_maxmin_operand")
    (UNSPEC_COND_FMUL "aarch64_sve_float_mul_operand")
    (UNSPEC_COND_FMULX "register_operand")
-   (UNSPEC_COND_FSUB "register_operand")])
+   (UNSPEC_COND_FSUB "register_operand")
+   (UNSPEC_COND_SMAX "aarch64_sve_float_maxmin_operand")
+   (UNSPEC_COND_SMIN "aarch64_sve_float_maxmin_operand")])
 
 ;; Likewise for immediates only.
 (define_int_attr sve_pred_fp_rhs2_immediate
@@ -4416,7 +4932,9 @@
    (UNSPEC_COND_FMAXNM "aarch64_sve_float_maxmin_immediate")
    (UNSPEC_COND_FMIN "aarch64_sve_float_maxmin_immediate")
    (UNSPEC_COND_FMINNM "aarch64_sve_float_maxmin_immediate")
-   (UNSPEC_COND_FMUL "aarch64_sve_float_mul_immediate")])
+   (UNSPEC_COND_FMUL "aarch64_sve_float_mul_immediate")
+   (UNSPEC_COND_SMAX "aarch64_sve_float_maxmin_immediate")
+   (UNSPEC_COND_SMIN "aarch64_sve_float_maxmin_immediate")])
 
 ;; The maximum number of element bits that an instruction can handle.
 (define_int_attr max_elem_bits [(UNSPEC_UADDV "64") (UNSPEC_SADDV "32")
@@ -4434,6 +4952,8 @@
 		     (UNSPEC_SME_LD1_VER "v")
 		     (UNSPEC_SME_READ_HOR "h")
 		     (UNSPEC_SME_READ_VER "v")
+		     (UNSPEC_SME_READZ_HOR "h")
+		     (UNSPEC_SME_READZ_VER "v")
 		     (UNSPEC_SME_ST1_HOR "h")
 		     (UNSPEC_SME_ST1_VER "v")
 		     (UNSPEC_SME_WRITE_HOR "h")
@@ -4450,12 +4970,6 @@
 
 ;; Iterators and attributes for fpcr fpsr getter setters
 
-(define_int_iterator GET_FPSCR
-  [UNSPECV_GET_FPSR UNSPECV_GET_FPCR])
-
-(define_int_iterator SET_FPSCR
-  [UNSPECV_SET_FPSR UNSPECV_SET_FPCR])
-
 (define_int_attr fpscr_name
   [(UNSPECV_GET_FPSR "fpsr")
    (UNSPECV_SET_FPSR "fpsr")
@@ -4463,3 +4977,17 @@
    (UNSPECV_SET_FPCR "fpcr")])
 
 (define_int_attr bits_etype [(8 "b") (16 "h") (32 "s") (64 "d")])
+
+(define_int_attr atomic_ldop
+ [(UNSPECV_ATOMIC_LDOP_OR "set") (UNSPECV_ATOMIC_LDOP_BIC "clr")
+  (UNSPECV_ATOMIC_LDOP_XOR "eor") (UNSPECV_ATOMIC_LDOP_PLUS "add")])
+
+(define_int_attr atomic_ldoptab
+ [(UNSPECV_ATOMIC_LDOP_OR "ior") (UNSPECV_ATOMIC_LDOP_BIC "bic")
+  (UNSPECV_ATOMIC_LDOP_XOR "xor") (UNSPECV_ATOMIC_LDOP_PLUS "add")])
+
+(define_int_attr fp8_cvt_uns_op
+  [(UNSPEC_F1CVT "f1cvt")
+   (UNSPEC_F2CVT "f2cvt")
+   (UNSPEC_F1CVTLT "f1cvtlt")
+   (UNSPEC_F2CVTLT "f2cvtlt")])
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 8f3aab2..32056da 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -1,5 +1,5 @@
 ;; Machine description for AArch64 architecture.
-;; Copyright (C) 2009-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2009-2025 Free Software Foundation, Inc.
 ;; Contributed by ARM Ltd.
 ;;
 ;; This file is part of GCC.
@@ -118,14 +118,18 @@
 (define_predicate "aarch64_reg_or_orr_imm"
    (ior (match_operand 0 "register_operand")
 	(and (match_code "const_vector")
-	     (match_test "aarch64_simd_valid_immediate (op, NULL,
-							AARCH64_CHECK_ORR)"))))
+	     (match_test "aarch64_simd_valid_orr_imm (op)"))))
 
-(define_predicate "aarch64_reg_or_bic_imm"
+(define_predicate "aarch64_reg_or_and_imm"
    (ior (match_operand 0 "register_operand")
 	(and (match_code "const_vector")
-	     (match_test "aarch64_simd_valid_immediate (op, NULL,
-							AARCH64_CHECK_BIC)"))))
+	     (ior (match_test "aarch64_simd_valid_and_imm (op)")
+		  (match_test "aarch64_simd_valid_and_imm_fmov (op)")))))
+
+(define_predicate "aarch64_reg_or_xor_imm"
+   (ior (match_operand 0 "register_operand")
+        (and (match_code "const_vector")
+             (match_test "aarch64_simd_valid_xor_imm (op)"))))
 
 (define_predicate "aarch64_fp_compare_operand"
   (ior (match_operand 0 "register_operand")
@@ -583,6 +587,11 @@
   return aarch64_simd_shift_imm_p (op, mode, false);
 })
 
+(define_special_predicate "aarch64_predicate_operand"
+  (and (match_code "reg,subreg")
+       (match_test "register_operand (op, GET_MODE (op))")
+       (match_test "aarch64_sve_valid_pred_p (op, mode)")))
+
 (define_predicate "aarch64_simd_imm_zero"
   (and (match_code "const,const_vector")
        (match_test "op == CONST0_RTX (GET_MODE (op))")))
@@ -920,6 +929,7 @@
 
 (define_predicate "aarch64_sve_float_maxmin_immediate"
   (and (match_code "const_vector")
+       (match_test "GET_MODE_INNER (GET_MODE (op)) != BFmode")
        (ior (match_test "op == CONST0_RTX (GET_MODE (op))")
 	    (match_test "op == CONST1_RTX (GET_MODE (op))"))))
 
@@ -945,11 +955,6 @@
   (ior (match_operand 0 "register_operand")
        (match_operand 0 "aarch64_sve_logical_immediate")))
 
-(define_predicate "aarch64_orr_imm_sve_advsimd"
-  (ior (match_operand 0 "aarch64_reg_or_orr_imm")
-       (and (match_test "TARGET_SVE")
-	    (match_operand 0 "aarch64_sve_logical_operand"))))
-
 (define_predicate "aarch64_sve_gather_offset_b"
   (ior (match_operand 0 "register_operand")
        (match_operand 0 "aarch64_sve_gather_immediate_b")))
@@ -1069,3 +1074,7 @@
   (and (match_code "const_int")
        (match_test "IN_RANGE (INTVAL (op),  -4096, 4080)
 		    && !(INTVAL (op) & 0xf)")))
+
+(define_predicate "aarch64_maskload_else_operand"
+  (and (match_code "const_vector")
+       (match_test "op == CONST0_RTX (GET_MODE (op))")))
diff --git a/gcc/config/aarch64/rtems.h b/gcc/config/aarch64/rtems.h
index 245cd9d..edfea75 100644
--- a/gcc/config/aarch64/rtems.h
+++ b/gcc/config/aarch64/rtems.h
@@ -1,5 +1,5 @@
 /* Definitions for RTEMS based AARCH64 system.
-   Copyright (C) 2016-2024 Free Software Foundation, Inc.
+   Copyright (C) 2016-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/aarch64/saphira.md b/gcc/config/aarch64/saphira.md
deleted file mode 100644
index b24811f..0000000
--- a/gcc/config/aarch64/saphira.md
+++ /dev/null
@@ -1,560 +0,0 @@
-;; Saphira pipeline description
-;; Copyright (C) 2017-2024 Free Software Foundation, Inc.
-;;
-;; This file is part of GCC.
-;;
-;; GCC is free software; you can redistribute it and/or modify it
-;; under the terms of the GNU General Public License as published by
-;; the Free Software Foundation; either version 3, or (at your option)
-;; any later version.
-;;
-;; GCC is distributed in the hope that it will be useful, but
-;; WITHOUT ANY WARRANTY; without even the implied warranty of
-;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
-;; General Public License for more details.
-;;
-;; You should have received a copy of the GNU General Public License
-;; along with GCC; see the file COPYING3.  If not see
-;; <http://www.gnu.org/licenses/>.
-
-(define_automaton "saphira")
-
-;; Complex int instructions (e.g. multiply and divide) execute in the X
-;; pipeline.  Simple int instructions execute in the X, Y, Z and B pipelines.
-
-(define_cpu_unit "saphira_x" "saphira")
-(define_cpu_unit "saphira_y" "saphira")
-
-;; Branches execute in the Z or B pipeline or in one of the int pipelines depending
-;; on how complex it is.  Simple int insns (like movz) can also execute here.
-
-(define_cpu_unit "saphira_z" "saphira")
-(define_cpu_unit "saphira_b" "saphira")
-
-;; Vector and FP insns execute in the VX and VY pipelines.
-
-(define_automaton "saphira_vfp")
-
-(define_cpu_unit "saphira_vx" "saphira_vfp")
-(define_cpu_unit "saphira_vy" "saphira_vfp")
-
-;; Loads execute in the LD pipeline.
-;; Stores execute in the ST pipeline, for address, data, and
-;; vector data.
-
-(define_automaton "saphira_mem")
-
-(define_cpu_unit "saphira_ld" "saphira_mem")
-(define_cpu_unit "saphira_st" "saphira_mem")
-
-;; The GTOV and VTOG pipelines are for general to vector reg moves, and vice
-;; versa.
-
-(define_cpu_unit "saphira_gtov" "saphira")
-(define_cpu_unit "saphira_vtog" "saphira")
-
-;; Common reservation combinations.
-
-(define_reservation "saphira_vxvy" "saphira_vx|saphira_vy")
-(define_reservation "saphira_zb"   "saphira_z|saphira_b")
-(define_reservation "saphira_xyzb" "saphira_x|saphira_y|saphira_z|saphira_b")
-
-;; SIMD Floating-Point Instructions
-
-(define_insn_reservation "saphira_afp_1_vxvy" 1
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "neon_fp_neg_s,neon_fp_neg_d,neon_fp_abs_s,neon_fp_abs_d,neon_fp_neg_s_q,neon_fp_neg_d_q,neon_fp_abs_s_q,neon_fp_abs_d_q"))
-  "saphira_vxvy")
-
-(define_insn_reservation "saphira_afp_2_vxvy" 2
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "neon_fp_minmax_s,neon_fp_minmax_d,neon_fp_reduc_minmax_s,neon_fp_reduc_minmax_d,neon_fp_compare_s,neon_fp_compare_d,neon_fp_round_s,neon_fp_round_d,neon_fp_minmax_s_q,neon_fp_minmax_d_q,neon_fp_compare_s_q,neon_fp_compare_d_q,neon_fp_round_s_q,neon_fp_round_d_q"))
-  "saphira_vxvy")
-
-(define_insn_reservation "saphira_afp_3_vxvy" 3
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "neon_fp_reduc_minmax_s_q,neon_fp_reduc_minmax_d_q,neon_fp_abd_s,neon_fp_abd_d,neon_fp_addsub_s,neon_fp_addsub_d,neon_fp_reduc_add_s,neon_fp_reduc_add_d,neon_fp_abd_s_q,neon_fp_abd_d_q,neon_fp_addsub_s_q,neon_fp_addsub_d_q,neon_fp_reduc_add_s_q,neon_fp_reduc_add_d_q"))
-  "saphira_vxvy")
-
-(define_insn_reservation "saphira_afp_4_vxvy" 4
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "neon_fp_to_int_s,neon_fp_to_int_d,neon_int_to_fp_s,neon_int_to_fp_d,neon_fp_cvt_widen_h,neon_fp_cvt_widen_s,neon_fp_to_int_s_q,neon_fp_to_int_d_q,neon_int_to_fp_s_q,neon_int_to_fp_d_q"))
-  "saphira_vxvy")
-
-(define_insn_reservation "saphira_afp_5_vxvy_mul" 5
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "neon_fp_mul_s,neon_fp_mul_s_scalar,neon_fp_mul_s_q,neon_fp_mul_s_scalar_q"))
-  "saphira_vxvy")
-
-(define_insn_reservation "saphira_afp_5_vxvy_mla" 5
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "neon_fp_mla_s,neon_fp_mla_s_scalar,neon_fp_mla_s_q,neon_fp_mla_s_scalar_q"))
-  "saphira_vxvy")
-
-(define_insn_reservation "saphira_afp_6_vxvy_mul" 6
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "neon_fp_mul_d,neon_fp_mul_d_q,neon_fp_mul_d_scalar_q"))
-  "saphira_vxvy")
-
-(define_insn_reservation "saphira_afp_6_vxvy_mla" 6
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "neon_fp_mla_d,neon_fp_mla_d_q,neon_fp_mla_d_scalar_q"))
-  "saphira_vxvy")
-
-(define_insn_reservation "saphira_afp_4_vxvy_vxvy_vxvy" 4
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "neon_fp_cvt_narrow_s_q,neon_fp_cvt_narrow_d_q"))
-  "saphira_vxvy+saphira_vxvy,saphira_vxvy")
-
-(define_insn_reservation "saphira_afp_6_vx_vy" 6
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "neon_fp_div_s"))
-  "saphira_vx+saphira_vy")
-
-(define_insn_reservation "saphira_afp_11_vx_vy" 11
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "neon_fp_div_d"))
-  "saphira_vx+saphira_vy")
-
-(define_insn_reservation "saphira_afp_6_vx_vy_vx_vy" 6
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "neon_fp_div_s_q"))
-  "(saphira_vx+saphira_vy),(saphira_vx+saphira_vy)")
-
-(define_insn_reservation "saphira_afp_11_vx_vy_vx_vy" 11
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "neon_fp_div_d_q"))
-  "(saphira_vx+saphira_vy),(saphira_vx+saphira_vy)")
-
-(define_insn_reservation "saphira_afp_12_vx_vy" 12
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "neon_fp_sqrt_s"))
-  "saphira_vx+saphira_vy")
-
-(define_insn_reservation "saphira_afp_22_vx_vy" 22
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "neon_fp_sqrt_d"))
-  "saphira_vx+saphira_vy")
-
-(define_insn_reservation "saphira_afp_12_vx_vy_vx_vy" 12
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "neon_fp_sqrt_s_q"))
-  "(saphira_vx+saphira_vy),(saphira_vx+saphira_vy)")
-
-(define_insn_reservation "saphira_afp_22_vx_vy_vx_vy" 22
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "neon_fp_sqrt_d_q"))
-  "(saphira_vx+saphira_vy),(saphira_vx+saphira_vy)")
-
-;; SIMD Integer Instructions
-
-(define_insn_reservation "saphira_ai_1_vxvy" 1
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "neon_add,neon_reduc_add,neon_logic,neon_neg,neon_sub,neon_add_q,neon_reduc_add_q,neon_logic_q,neon_neg_q,neon_sub_q"))
-  "saphira_vxvy")
-
-(define_insn_reservation "saphira_ai_2_vxvy" 2
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "neon_add_long,neon_sub_long,neon_add_halve,neon_sub_halve,neon_shift_imm,neon_shift_reg,neon_minmax,neon_abs,neon_compare,neon_compare_zero,neon_tst,neon_shift_imm_long,neon_reduc_add_long,neon_add_halve_q,neon_sub_halve_q,neon_shift_imm_q,neon_shift_reg_q,neon_minmax_q,neon_abs_q,neon_compare_q,neon_compare_zero_q,neon_tst_q"))
-  "saphira_vxvy")
-
-(define_insn_reservation "saphira_ai_3_vxvy" 3
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "neon_shift_acc,neon_reduc_add_acc,neon_abd,neon_qadd,neon_qsub,neon_qabs,neon_qneg,neon_sat_shift_imm,neon_sat_shift_imm_narrow_q,neon_sat_shift_reg,neon_shift_acc_q,neon_reduc_add_acc_q,neon_abd_q,neon_abd_long,neon_qadd_q,neon_qsub_q,neon_qabs_q,neon_qneg_q,neon_sat_shift_imm_q,neon_sat_shift_reg_q,neon_add_halve_narrow_q,neon_sub_halve_narrow_q"))
-  "saphira_vxvy")
-
-(define_insn_reservation "saphira_ai_4_vxvy" 4
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "neon_reduc_minmax,neon_reduc_minmax_q,neon_arith_acc,neon_arith_acc_q"))
-  "saphira_vxvy")
-
-(define_insn_reservation "saphira_ai_4_vxvy_mul" 4
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "neon_mul_b,neon_mul_h,neon_mul_s,neon_mul_h_scalar,neon_mul_s_scalar,neon_sat_mul_b,neon_sat_mul_h,neon_sat_mul_s,neon_sat_mul_h_scalar,neon_sat_mul_s_scalar,neon_mul_b_q,neon_mul_h_q,neon_mul_s_q,neon_mul_h_scalar_q,neon_mul_s_scalar_q,neon_sat_mul_b_q,neon_sat_mul_h_q,neon_sat_mul_s_q,neon_mul_b_long,neon_mul_h_long,neon_mul_s_long,neon_mul_d_long,neon_mul_h_scalar_long,neon_mul_s_scalar_long,neon_sat_mul_b_long,neon_sat_mul_h_long,neon_sat_mul_s_long,neon_sat_mul_h_scalar_q,neon_sat_mul_s_scalar_q,neon_sat_mul_h_scalar_long,neon_sat_mul_s_scalar_long"))
-  "saphira_vxvy")
-
-(define_insn_reservation "saphira_ai_4_vxvy_mla" 4
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "neon_mla_b,neon_mla_h,neon_mla_s,neon_mla_h_scalar,neon_mla_s_scalar,neon_mla_b_q,neon_mla_h_q,neon_mla_s_q,neon_mla_h_scalar_q,neon_mla_s_scalar_q,neon_mla_b_long,neon_mla_h_long,neon_mla_s_long,neon_mla_h_scalar_long,neon_mla_s_scalar_long,neon_sat_mla_b_long,neon_sat_mla_h_long,neon_sat_mla_s_long,neon_sat_mla_h_scalar_long,neon_sat_mla_s_scalar_long"))
-  "saphira_vxvy")
-
-(define_insn_reservation "saphira_2_ai_vxvy_vxvy" 2
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "neon_add_widen,neon_sub_widen"))
-  "(saphira_vxvy),(saphira_vxvy)")
-
-;; SIMD Load Instructions
-
-(define_insn_reservation "saphira_ald_4_ld" 4
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "neon_load1_1reg,neon_load1_1reg_q,neon_load1_all_lanes,neon_load2_one_lane"))
-  "saphira_ld")
-
-(define_insn_reservation "saphira_ald_4_ld_none" 4
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "neon_load1_2reg,neon_load2_2reg,neon_load2_all_lanes"))
-  "saphira_ld")
-
-(define_insn_reservation "saphira_ald_4_ld_ld" 4
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "neon_load1_2reg_q,neon_load2_2reg_q,neon_load2_all_lanes_q,neon_load3_one_lane,neon_load4_one_lane,neon_ldp,neon_ldp_q"))
-  "saphira_ld,saphira_ld")
-
-(define_insn_reservation "saphira_ald_4_ld_ld_none" 4
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "neon_load1_3reg,neon_load3_3reg,neon_load3_all_lanes"))
-  "saphira_ld,saphira_ld")
-
-(define_insn_reservation "saphira_ald_4_ld_ld_ld" 4
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "neon_load1_3reg_q,neon_load3_3reg_q,neon_load3_all_lanes_q"))
-  "saphira_ld,saphira_ld,saphira_ld")
-
-(define_insn_reservation "saphira_ald_4_ld_ld_none_none" 4
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "neon_load1_4reg,neon_load4_4reg"))
-  "saphira_ld,saphira_ld")
-
-(define_insn_reservation "saphira_ald_4_ld_ld_ld_ld" 4
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "neon_load1_4reg_q,neon_load4_4reg_q,neon_load4_all_lanes,neon_load4_all_lanes_q"))
-  "saphira_ld,saphira_ld,saphira_ld,saphira_ld")
-
-;; Arithmetic and Logical Instructions
-
-(define_insn_reservation "saphira_alu_1_xyz" 1
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "alus_sreg,alus_imm,alus_shift_imm,csel,adc_reg,alu_imm,alu_sreg,alu_shift_imm_lsl_1to4,alu_shift_imm_other,alu_ext,alus_ext,logic_imm,logic_reg,logic_shift_imm,logics_imm,logics_reg,logics_shift_imm,mov_reg"))
-  "saphira_xyzb")
-
-;; SIMD Miscellaneous Instructions
-
-;; No separate type for ins and dup.  But this is correct for both.
-
-(define_insn_reservation "saphira_am_3_gtov" 3
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "neon_from_gp"))
-  "saphira_gtov")
-
-;; No separate type for ins and dup.  Assuming dup is more common.  Ins is
-;; gtov+vxvy and latency of 4.
-
-(define_insn_reservation "saphira_am_3_gtov_gtov" 3
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "neon_from_gp_q"))
-  "saphira_gtov,saphira_gtov")
-
-;; DUP  does not use vector pipes in Q mode, only gtov+gtov.
-(define_insn_reservation "saphira_am_1_gtov_gtov" 1
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "neon_dup_q"))
-  "saphira_gtov,saphira_gtov")
-
-;; neon_to_gp_q is used for 32-bit ARM instructions that move 64-bits of data
-;; so no use needed here.
-
-(define_insn_reservation "saphira_am_3_vtog" 3
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "neon_to_gp"))
-  "saphira_vtog")
-
-(define_insn_reservation "saphira_am_1_vxvy" 1
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "neon_bsl,neon_dup,neon_ext,neon_ins,neon_ins_q,neon_move,neon_rev,neon_tbl1,neon_permute,neon_shift_imm_narrow_q,neon_bsl_q,neon_ext_q,neon_move_q,neon_rev_q,neon_tbl1_q,neon_permute_q,neon_tbl1,neon_tbl1_q,neon_tbl2_q,neon_tbl2"))
-  "saphira_vxvy")
-
-(define_insn_reservation "saphira_am_2_vxvy" 2
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "neon_cls,neon_cnt,neon_rbit,neon_cls_q,neon_cnt_q,neon_rbit_q,neon_tbl2,neon_tbl3_q,neon_tbl3"))
-  "saphira_vxvy")
-
-(define_insn_reservation "saphira_am_3_vxvy" 3
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "neon_fp_recpe_s,neon_fp_recpe_d,neon_fp_rsqrte_s,neon_fp_rsqrte_d,neon_fp_recpx_s,neon_fp_recpx_d,neon_fp_recpe_s_q,neon_fp_recpe_d_q,neon_fp_rsqrte_s_q,neon_fp_rsqrte_d_q,neon_tbl4_q,neon_tbl4"))
-  "saphira_vxvy")
-
-(define_insn_reservation "saphira_am_5_vxvy" 5
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "neon_fp_recps_s,neon_fp_recps_s_q"))
-  "saphira_vxvy")
-
-(define_insn_reservation "saphira_am_6_vxvy" 6
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "neon_fp_recps_d,neon_fp_rsqrts_d,neon_fp_recps_d_q,neon_fp_rsqrts_d_q"))
-  "saphira_vxvy")
-
-;; SIMD Store Instructions
-
-;; ??? stp is neon_store1_2reg in aarch64.md, but neon_stp in aarch64-simd.md.
-;; Similarly with ldp.
-
-(define_insn_reservation "saphira_ast_st_vsd" 0
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "neon_store1_1reg,neon_store1_1reg_q,neon_store1_one_lane,neon_store1_one_lane_q,neon_store1_2reg,neon_store2_2reg,neon_store2_one_lane,neon_store2_one_lane_q,neon_stp"))
-  "saphira_st")
-
-(define_insn_reservation "saphira_as_0_st_vsd_st_vsd" 0
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "neon_store1_2reg_q,neon_store1_3reg,neon_store1_4reg,neon_store2_2reg_q,neon_store3_3reg,neon_store4_4reg,neon_store3_one_lane,neon_store3_one_lane_q,neon_store4_one_lane,neon_store4_one_lane_q,neon_stp_q"))
-  "(saphira_st),(saphira_st)")
-
-(define_insn_reservation "saphira_as_0_st_vsd_st_vsd_st_vsd" 0
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "neon_store1_3reg_q,neon_store3_3reg_q"))
-  "(saphira_st),(saphira_st),(saphira_st)")
-
-(define_insn_reservation "saphira_as_0_st_vsd_st_vsd_st_vsd_st_vsd" 0
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "neon_store1_4reg_q,neon_store4_4reg_q"))
-  "(saphira_st),(saphira_st),(saphira_st),(saphira_st)")
-
-;; Branch Instructions
-
-(define_insn_reservation "saphira_branch_0_zb" 0
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "branch"))
-  "saphira_zb")
-
-(define_insn_reservation "saphira_call_0_xyzb" 0
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "call"))
-  "saphira_xyzb")
-
-;; Cryptography Extensions
-
-(define_insn_reservation "saphira_cry_1_vxvy" 1
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "crypto_sha1_fast"))
-  "saphira_vxvy")
-
-(define_insn_reservation "saphira_cry_2_vxvy" 2
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "crypto_aesmc"))
-  "saphira_vxvy")
-
-(define_insn_reservation "saphira_cry_2_vxvy_vxvy" 2
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "crypto_sha1_xor,crypto_sha256_fast,crypto_pmull,crypto_aese"))
-  "saphira_vxvy")
-
-(define_insn_reservation "saphira_cry_4_vy_vx" 4
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "crypto_sha1_slow"))
-  "saphira_vxvy")
-
-(define_insn_reservation "saphira_cry_5_vy_vx" 5
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "crypto_sha256_slow"))
-  "saphira_vxvy")
-
-;; FP Load Instructions
-
-(define_insn_reservation "saphira_fld_4_ld" 4
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "f_loads,f_loadd"))
-  "saphira_ld")
-
-;; No separate FP store section, these are found in the SIMD store section.
-
-(define_insn_reservation "saphira_fld_0_st_vsd" 0
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "f_stores,f_stored"))
-  "saphira_st")
-
-;; FP Data Processing Instructions
-
-(define_insn_reservation "saphira_fpdt_0_vxvy" 0
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "fcmps,fcmpd,fccmps,fccmpd"))
-  "saphira_vxvy")
-
-(define_insn_reservation "saphira_fpdt_5_vtog" 5
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "f_cvtf2i"))
-  "saphira_vtog")
-
-(define_insn_reservation "saphira_fpdt_1_vxvy" 1
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "ffariths,ffarithd,fcsel"))
-  "saphira_vxvy")
-
-(define_insn_reservation "saphira_fpdt_2_vxvy" 2
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "f_minmaxd,f_minmaxs,f_rintd,f_rints"))
-  "saphira_vxvy")
-
-;; Scalar FP ABD is handled same as vector FP ABD.
-
-(define_insn_reservation "saphira_fpdt_3_vxvy" 3
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "faddd,fadds"))
-  "saphira_vxvy")
-
-(define_insn_reservation "saphira_fpdt_4_vxvy" 4
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "f_cvt"))
-  "saphira_vxvy")
-
-(define_insn_reservation "saphira_fpdt_5_vxvy_mul" 5
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "fmuls"))
-  "saphira_vxvy")
-
-(define_insn_reservation "saphira_fpdt_5_vxvy_mla" 5
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "fmacs,ffmas"))
-  "saphira_vxvy")
-
-(define_insn_reservation "saphira_fpdt_6_vxvy_mul" 6
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "fmuld"))
-  "saphira_vxvy")
-
-(define_insn_reservation "saphira_fpdt_6_vxvy_mla" 6
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "fmacd,ffmad"))
-  "saphira_vxvy")
-
-(define_insn_reservation "saphira_fpdt_6_vx_vy" 6
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "fdivs"))
-  "saphira_vx+saphira_vy")
-
-(define_insn_reservation "saphira_fpdt_11_vx_vy" 11
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "fdivd"))
-  "saphira_vx+saphira_vy")
-
-(define_insn_reservation "saphira_fpdt_12_vx_vy" 12
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "fsqrts"))
-  "saphira_vxvy")
-
-(define_insn_reservation "saphira_fpdt_22_vx_vy" 22
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "fsqrtd"))
-  "saphira_vxvy")
-
-;; FP Miscellaneous Instructions
-
-(define_insn_reservation "saphira_fpmsc_3_vtog" 3
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "f_mrc"))
-  "saphira_vtog")
-
-(define_insn_reservation "saphira_fpmsc_3_gtov" 3
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "f_mcr"))
-  "saphira_gtov")
-
-(define_insn_reservation "saphira_fpmsc_1_vxvy" 1
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "fmov,fconsts,fconstd"))
-  "saphira_vxvy")
-
-;; No separate type for float-to-fixed conversions.  Same type as
-;; float-to-int conversions.  They schedule the same though, so no problem.
-
-(define_insn_reservation "saphira_fpmsc_6_gtov" 6
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "f_cvti2f"))
-  "saphira_gtov")
-
-;; Load Instructions
-
-(define_insn_reservation "saphira_ld_3_ld" 3
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "load_4,load_8,load_16"))
-  "saphira_ld")
-
-;; Miscellaneous Data-Processing Instructions
-
-(define_insn_reservation "saphira_misc_1_xyzb" 1
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "bfx,bfm,extend,rotate_imm,shift_imm"))
-  "saphira_xyzb")
-
-(define_insn_reservation "saphira_misc_2_x" 2
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "crc"))
-  "saphira_x")
-
-(define_insn_reservation "saphira_misc_2_xyzb" 2
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "clz,rbit,rev"))
-  "saphira_xyzb")
-
-;; Divide and Multiply Instructions
-
-(define_insn_reservation "saphira_muldiv_4_x_mul" 4
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "mul"))
-  "saphira_x")
-
-(define_insn_reservation "saphira_muldiv_4_x_mla" 4
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "mla,smlal,umlal"))
-  "saphira_x")
-
-(define_insn_reservation "saphira_muldiv_5_x_mul" 5
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "smull,umull"))
-  "saphira_x")
-
-(define_insn_reservation "saphira_md_11_x_zb" 11
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "sdiv,udiv"))
-  "saphira_x+saphira_zb")
-
-;; Move and Shift Instructions
-
-(define_insn_reservation "saphira_mvs_1_xyzb" 1
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "mov_imm,shift_reg,adr"))
-  "saphira_xyzb")
-
-;; Other Instructions
-
-;; Block is for instruction scheduling blockage insns in RTL.  There are no
-;; hardware instructions emitted for them, so don't use any resources.
-
-(define_insn_reservation "saphira_other_0_nothing" 0
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "trap,block"))
-  "nothing")
-
-(define_insn_reservation "saphira_other_2_ld" 2
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "mrs"))
-  "saphira_ld")
-
-;; Assume multiple instructions use all pipes.
-
-(define_insn_reservation "saphira_extra" 1
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "multiple"))
-  "saphira_x+saphira_y+saphira_z+saphira_b+saphira_vx+saphira_vy+saphira_ld+saphira_st+saphira_gtov+saphira_vtog")
-
-;; Store Instructions
-
-;; No use of store_rel, store3, or store4 in aarch64.
-
-(define_insn_reservation "saphira_st_0_st_sd" 0
-  (and (eq_attr "tune" "saphira")
-       (eq_attr "type" "store_4,store_8,store_16"))
-  "saphira_st")
-
-;; Muliply bypasses.
-
-;; 1 cycle latency (0 bubble) for an integer mul or mac feeding into a mac.
-
-(define_bypass 1
-  "saphira_ai_4_vxvy_mul,saphira_ai_4_vxvy_mla,saphira_muldiv_4_x_mul,saphira_muldiv_4_x_mla,saphira_muldiv_5_x_mul"
-  "saphira_ai_4_vxvy_mla,saphira_muldiv_4_x_mla")
-
-;; 3 cycle latency (2 bubbles) for an FP mul or mac feeding into a mac.
-
-(define_bypass 3
-  "saphira_afp_5_vxvy_mul,saphira_afp_5_vxvy_mla,saphira_afp_6_vxvy_mul,saphira_afp_6_vxvy_mla,saphira_fpdt_5_vxvy_mul,saphira_fpdt_5_vxvy_mla,saphira_fpdt_6_vxvy_mul,saphira_fpdt_6_vxvy_mla"
-  "saphira_afp_5_vxvy_mla,saphira_afp_6_vxvy_mla,saphira_fpdt_5_vxvy_mla,saphira_fpdt_6_vxvy_mla")
-
diff --git a/gcc/config/aarch64/t-aarch64 b/gcc/config/aarch64/t-aarch64
index c2a0715..38a8c06 100644
--- a/gcc/config/aarch64/t-aarch64
+++ b/gcc/config/aarch64/t-aarch64
@@ -1,5 +1,5 @@
 # Machine description for AArch64 architecture.
-#  Copyright (C) 2009-2024 Free Software Foundation, Inc.
+#  Copyright (C) 2009-2025 Free Software Foundation, Inc.
 #  Contributed by ARM Ltd.
 #
 #  This file is part of GCC.
@@ -50,11 +50,12 @@ endif
 s-mddeps: s-aarch64-tune-md
 
 aarch64-builtins.o: $(srcdir)/config/aarch64/aarch64-builtins.cc $(CONFIG_H) \
-  $(SYSTEM_H) coretypes.h $(TM_H) \
+  $(SYSTEM_H) coretypes.h $(TM_H) $(REGS_H) \
   $(RTL_H) $(TREE_H) expr.h $(TM_P_H) $(RECOG_H) langhooks.h \
   $(DIAGNOSTIC_CORE_H) $(OPTABS_H) \
   $(srcdir)/config/aarch64/aarch64-simd-builtins.def \
   $(srcdir)/config/aarch64/aarch64-simd-builtin-types.def \
+  $(srcdir)/config/aarch64/aarch64-simd-pragma-builtins.def \
   aarch64-builtin-iterators.h
 	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
 		$(srcdir)/config/aarch64/aarch64-builtins.cc
@@ -68,7 +69,7 @@ aarch64-sve-builtins.o: $(srcdir)/config/aarch64/aarch64-sve-builtins.cc \
   $(TM_P_H) memmodel.h insn-codes.h $(OPTABS_H) $(RECOG_H) $(DIAGNOSTIC_H) \
   $(EXPR_H) $(BASIC_BLOCK_H) $(FUNCTION_H) fold-const.h $(GIMPLE_H) \
   gimple-iterator.h gimplify.h explow.h $(EMIT_RTL_H) tree-vector-builder.h \
-  stor-layout.h $(REG_H) alias.h gimple-fold.h langhooks.h \
+  stor-layout.h alias.h gimple-fold.h langhooks.h \
   stringpool.h \
   $(srcdir)/config/aarch64/aarch64-sve-builtins.h \
   $(srcdir)/config/aarch64/aarch64-sve-builtins-shapes.h \
@@ -139,6 +140,17 @@ aarch-common.o: $(srcdir)/config/arm/aarch-common.cc $(CONFIG_H) $(SYSTEM_H) \
 	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
 		$(srcdir)/config/arm/aarch-common.cc
 
+aarch64-elf-metadata.o: $(srcdir)/config/aarch64/aarch64-elf-metadata.cc \
+    $(CONFIG_H) \
+    $(BACKEND_H) \
+    $(RTL_H) \
+    $(SYSTEM_H) \
+    $(TARGET_H) \
+    $(srcdir)/config/aarch64/aarch64-elf-metadata.h \
+    output.h
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_SPPFLAGS) $(INCLUDES) \
+	  $(srcdir)/config/aarch64/aarch64-elf-metadata.cc
+
 aarch64-c.o: $(srcdir)/config/aarch64/aarch64-c.cc $(CONFIG_H) $(SYSTEM_H) \
     coretypes.h $(TM_H) $(TREE_H) output.h $(C_COMMON_H) $(TARGET_H)
 	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
@@ -169,16 +181,6 @@ aarch64-speculation.o: $(srcdir)/config/aarch64/aarch64-speculation.cc \
 	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_SPPFLAGS) $(INCLUDES) \
 	  $(srcdir)/config/aarch64/aarch64-speculation.cc
 
-falkor-tag-collision-avoidance.o: \
-    $(srcdir)/config/aarch64/falkor-tag-collision-avoidance.cc \
-    $(CONFIG_H) $(SYSTEM_H) $(TM_H) $(REGS_H) insn-config.h $(RTL_BASE_H) \
-    dominance.h cfg.h cfganal.h $(BASIC_BLOCK_H) $(INSN_ATTR_H) $(RECOG_H) \
-    output.h hash-map.h $(DF_H) $(OBSTACK_H) $(TARGET_H) $(RTL_H) \
-    $(CONTEXT_H) $(TREE_PASS_H) regrename.h \
-    $(srcdir)/config/aarch64/aarch64-protos.h
-	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
-		$(srcdir)/config/aarch64/falkor-tag-collision-avoidance.cc
-
 aarch-bti-insert.o: $(srcdir)/config/arm/aarch-bti-insert.cc \
     $(CONFIG_H) $(SYSTEM_H) $(TM_H) $(REGS_H) insn-config.h $(RTL_BASE_H) \
     dominance.h cfg.h cfganal.h $(BASIC_BLOCK_H) $(INSN_ATTR_H) $(RECOG_H) \
diff --git a/gcc/config/aarch64/t-aarch64-freebsd b/gcc/config/aarch64/t-aarch64-freebsd
index ecf5e78..6e9af61 100644
--- a/gcc/config/aarch64/t-aarch64-freebsd
+++ b/gcc/config/aarch64/t-aarch64-freebsd
@@ -1,5 +1,5 @@
 # Machine description for AArch64 architecture.
-#  Copyright (C) 2016-2024 Free Software Foundation, Inc.
+#  Copyright (C) 2016-2025 Free Software Foundation, Inc.
 #
 #  This file is part of GCC.
 #
diff --git a/gcc/config/aarch64/t-aarch64-linux b/gcc/config/aarch64/t-aarch64-linux
index 3f7b786..70e36b3 100644
--- a/gcc/config/aarch64/t-aarch64-linux
+++ b/gcc/config/aarch64/t-aarch64-linux
@@ -1,5 +1,5 @@
 # Machine description for AArch64 architecture.
-#  Copyright (C) 2009-2024 Free Software Foundation, Inc.
+#  Copyright (C) 2009-2025 Free Software Foundation, Inc.
 #  Contributed by ARM Ltd.
 #
 #  This file is part of GCC.
diff --git a/gcc/config/aarch64/t-aarch64-netbsd b/gcc/config/aarch64/t-aarch64-netbsd
index ecf5e78..6e9af61 100644
--- a/gcc/config/aarch64/t-aarch64-netbsd
+++ b/gcc/config/aarch64/t-aarch64-netbsd
@@ -1,5 +1,5 @@
 # Machine description for AArch64 architecture.
-#  Copyright (C) 2016-2024 Free Software Foundation, Inc.
+#  Copyright (C) 2016-2025 Free Software Foundation, Inc.
 #
 #  This file is part of GCC.
 #
diff --git a/gcc/config/aarch64/t-aarch64-rtems b/gcc/config/aarch64/t-aarch64-rtems
index 7598d63..a02eced 100644
--- a/gcc/config/aarch64/t-aarch64-rtems
+++ b/gcc/config/aarch64/t-aarch64-rtems
@@ -1,6 +1,6 @@
 # Multilibs for aarch64 RTEMS targets.
 #
-# Copyright (C) 2024 Free Software Foundation, Inc.
+# Copyright (C) 2024-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/aarch64/t-aarch64-vxworks b/gcc/config/aarch64/t-aarch64-vxworks
index 8d3fe94..ea33909 100644
--- a/gcc/config/aarch64/t-aarch64-vxworks
+++ b/gcc/config/aarch64/t-aarch64-vxworks
@@ -1,6 +1,6 @@
 # Multilibs for VxWorks.
 #
-# Copyright (C) 2018-2024 Free Software Foundation, Inc.
+# Copyright (C) 2018-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/aarch64/thunderx.md b/gcc/config/aarch64/thunderx.md
index 01b4cb3..b741208 100644
--- a/gcc/config/aarch64/thunderx.md
+++ b/gcc/config/aarch64/thunderx.md
@@ -1,5 +1,5 @@
 ;; Cavium ThunderX pipeline description
-;; Copyright (C) 2014-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2014-2025 Free Software Foundation, Inc.
 ;;
 ;; Written by Andrew Pinski  <apinski@cavium.com>
 
diff --git a/gcc/config/aarch64/thunderx2t99.md b/gcc/config/aarch64/thunderx2t99.md
index 81d28f9..7b8c89b 100644
--- a/gcc/config/aarch64/thunderx2t99.md
+++ b/gcc/config/aarch64/thunderx2t99.md
@@ -1,5 +1,5 @@
 ;; Cavium ThunderX 2 CN99xx pipeline description
-;; Copyright (C) 2016-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2016-2025 Free Software Foundation, Inc.
 ;;
 ;; Contributed by Cavium, Broadcom and Mentor Embedded.
 
diff --git a/gcc/config/aarch64/thunderx3t110.md b/gcc/config/aarch64/thunderx3t110.md
index f0ece79..7d07dbb 100644
--- a/gcc/config/aarch64/thunderx3t110.md
+++ b/gcc/config/aarch64/thunderx3t110.md
@@ -1,5 +1,5 @@
 ;; Cavium ThunderX 3 CN11xx pipeline description
-;; Copyright (C) 2020-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2020-2025 Free Software Foundation, Inc.
 ;;
 ;; Contributed by Marvell
 
diff --git a/gcc/config/aarch64/tsv110.md b/gcc/config/aarch64/tsv110.md
index 323270e..e07ccb2 100644
--- a/gcc/config/aarch64/tsv110.md
+++ b/gcc/config/aarch64/tsv110.md
@@ -1,5 +1,5 @@
 ;; tsv110 pipeline description
-;; Copyright (C) 2018-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2018-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/aarch64/tuning_models/a64fx.h b/gcc/config/aarch64/tuning_models/a64fx.h
index 378a1b3..2622cd8 100644
--- a/gcc/config/aarch64/tuning_models/a64fx.h
+++ b/gcc/config/aarch64/tuning_models/a64fx.h
@@ -1,5 +1,5 @@
 /* Tuning model description for AArch64 architecture.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -150,7 +150,7 @@ static const struct tune_params a64fx_tunings =
     4 /* store_pred.  */
   }, /* memmov_cost.  */
   7, /* issue_rate  */
-  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops  */
+  AARCH64_FUSE_BASE, /* fusible_ops  */
   "32",	/* function_align.  */
   "16",	/* jump_align.  */
   "32",	/* loop_align.  */
diff --git a/gcc/config/aarch64/tuning_models/ampere1.h b/gcc/config/aarch64/tuning_models/ampere1.h
index ace9bf4..f033016 100644
--- a/gcc/config/aarch64/tuning_models/ampere1.h
+++ b/gcc/config/aarch64/tuning_models/ampere1.h
@@ -1,5 +1,5 @@
 /* Tuning model description for AArch64 architecture.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -88,11 +88,8 @@ static const struct tune_params ampere1_tunings =
     4 /* store_pred.  */
   }, /* memmov_cost.  */
   4, /* issue_rate  */
-  (AARCH64_FUSE_ADRP_ADD | AARCH64_FUSE_AES_AESMC |
-   AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_MOVK_MOVK |
-   AARCH64_FUSE_ALU_BRANCH /* adds, ands, bics, ccmp, ccmn */ |
-   AARCH64_FUSE_CMP_BRANCH),
-  /* fusible_ops  */
+  (AARCH64_FUSE_BASE | AARCH64_FUSE_ADRP_ADD | AARCH64_FUSE_MOVK
+   | AARCH64_FUSE_ALU_BRANCH), /* fusible_ops  */
   "32",		/* function_align.  */
   "4",		/* jump_align.  */
   "32:16",	/* loop_align.  */
diff --git a/gcc/config/aarch64/tuning_models/ampere1a.h b/gcc/config/aarch64/tuning_models/ampere1a.h
index 7fd7c9f..41481a7 100644
--- a/gcc/config/aarch64/tuning_models/ampere1a.h
+++ b/gcc/config/aarch64/tuning_models/ampere1a.h
@@ -1,5 +1,5 @@
 /* Tuning model description for AArch64 architecture.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -39,12 +39,9 @@ static const struct tune_params ampere1a_tunings =
     4 /* store_pred.  */
   }, /* memmov_cost.  */
   4, /* issue_rate  */
-  (AARCH64_FUSE_ADRP_ADD | AARCH64_FUSE_AES_AESMC |
-   AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_MOVK_MOVK |
-   AARCH64_FUSE_ALU_BRANCH /* adds, ands, bics, ccmp, ccmn */ |
-   AARCH64_FUSE_CMP_BRANCH | AARCH64_FUSE_ALU_CBZ |
-   AARCH64_FUSE_ADDSUB_2REG_CONST1),
-  /* fusible_ops  */
+  (AARCH64_FUSE_BASE | AARCH64_FUSE_ADRP_ADD | AARCH64_FUSE_MOVK
+   | AARCH64_FUSE_ALU_BRANCH | AARCH64_FUSE_ALU_CBZ
+   | AARCH64_FUSE_ADDSUB_2REG_CONST1), /* fusible_ops  */
   "32",		/* function_align.  */
   "4",		/* jump_align.  */
   "32:16",	/* loop_align.  */
diff --git a/gcc/config/aarch64/tuning_models/ampere1b.h b/gcc/config/aarch64/tuning_models/ampere1b.h
index dbf679c..2ad6003 100644
--- a/gcc/config/aarch64/tuning_models/ampere1b.h
+++ b/gcc/config/aarch64/tuning_models/ampere1b.h
@@ -1,5 +1,5 @@
 /* Tuning model description for the Ampere1B core.
-   Copyright (C) 2023-2024 Free Software Foundation, Inc.
+   Copyright (C) 2023-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -88,12 +88,9 @@ static const struct tune_params ampere1b_tunings =
     4 /* store_pred.  */
   }, /* memmov_cost.  */
   4, /* issue_rate  */
-  (AARCH64_FUSE_ADRP_ADD | AARCH64_FUSE_AES_AESMC |
-   AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_MOVK_MOVK |
-   AARCH64_FUSE_ALU_BRANCH /* adds, ands, bics, ccmp, ccmn */ |
-   AARCH64_FUSE_CMP_BRANCH | AARCH64_FUSE_ALU_CBZ |
-   AARCH64_FUSE_ADDSUB_2REG_CONST1),
-  /* fusible_ops  */
+  (AARCH64_FUSE_BASE | AARCH64_FUSE_ADRP_ADD | AARCH64_FUSE_MOVK
+   | AARCH64_FUSE_ALU_BRANCH | AARCH64_FUSE_ALU_CBZ
+   | AARCH64_FUSE_ADDSUB_2REG_CONST1), /* fusible_ops  */
   "32",		/* function_align.  */
   "4",		/* jump_align.  */
   "32:16",	/* loop_align.  */
@@ -105,9 +102,8 @@ static const struct tune_params ampere1b_tunings =
   2,	/* min_div_recip_mul_df.  */
   0,	/* max_case_values.  */
   tune_params::AUTOPREFETCHER_STRONG,	/* autoprefetcher_model.  */
-  (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND
-   | AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA
-   | AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA), /* tune_flags.  */
+  (AARCH64_EXTRA_TUNE_BASE
+   | AARCH64_EXTRA_TUNE_AVOID_CROSS_LOOP_FMA), /* tune_flags.  */
   &ampere1b_prefetch_tune,
   AARCH64_LDP_STP_POLICY_ALIGNED,   /* ldp_policy_model.  */
   AARCH64_LDP_STP_POLICY_ALIGNED    /* stp_policy_model.  */
diff --git a/gcc/config/aarch64/tuning_models/cortexa35.h b/gcc/config/aarch64/tuning_models/cortexa35.h
index a68ac19..56168c8 100644
--- a/gcc/config/aarch64/tuning_models/cortexa35.h
+++ b/gcc/config/aarch64/tuning_models/cortexa35.h
@@ -1,5 +1,5 @@
 /* Tuning model description for AArch64 architecture.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -40,8 +40,8 @@ static const struct tune_params cortexa35_tunings =
     4 /* store_pred.  */
   }, /* memmov_cost.  */
   1, /* issue_rate  */
-  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
-   | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops  */
+  (AARCH64_FUSE_BASE | AARCH64_FUSE_MOVK | AARCH64_FUSE_ADRP_ADD
+   | AARCH64_FUSE_ADRP_LDR), /* fusible_ops  */
   "16",	/* function_align.  */
   "4",	/* jump_align.  */
   "8",	/* loop_align.  */
diff --git a/gcc/config/aarch64/tuning_models/cortexa53.h b/gcc/config/aarch64/tuning_models/cortexa53.h
index 11f1682..83daae4 100644
--- a/gcc/config/aarch64/tuning_models/cortexa53.h
+++ b/gcc/config/aarch64/tuning_models/cortexa53.h
@@ -1,5 +1,5 @@
 /* Tuning model description for AArch64 architecture.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -49,8 +49,8 @@ static const struct tune_params cortexa53_tunings =
     4 /* store_pred.  */
   }, /* memmov_cost.  */
   2, /* issue_rate  */
-  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
-   | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops  */
+  (AARCH64_FUSE_BASE | AARCH64_FUSE_MOVK | AARCH64_FUSE_ADRP_ADD
+   | AARCH64_FUSE_ADRP_LDR), /* fusible_ops  */
   "16",	/* function_align.  */
   "4",	/* jump_align.  */
   "8",	/* loop_align.  */
diff --git a/gcc/config/aarch64/tuning_models/cortexa57.h b/gcc/config/aarch64/tuning_models/cortexa57.h
index 336da0b..8da7fa9 100644
--- a/gcc/config/aarch64/tuning_models/cortexa57.h
+++ b/gcc/config/aarch64/tuning_models/cortexa57.h
@@ -1,5 +1,5 @@
 /* Tuning model description for AArch64 architecture.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -87,8 +87,7 @@ static const struct tune_params cortexa57_tunings =
     4 /* store_pred.  */
   }, /* memmov_cost.  */
   3, /* issue_rate  */
-  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
-   | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops  */
+  (AARCH64_FUSE_BASE | AARCH64_FUSE_MOVK | AARCH64_FUSE_ADRP_ADD), /* fusible_ops  */
   "16",	/* function_align.  */
   "4",	/* jump_align.  */
   "8",	/* loop_align.  */
diff --git a/gcc/config/aarch64/tuning_models/cortexa72.h b/gcc/config/aarch64/tuning_models/cortexa72.h
index a2eb2422..f9a330f 100644
--- a/gcc/config/aarch64/tuning_models/cortexa72.h
+++ b/gcc/config/aarch64/tuning_models/cortexa72.h
@@ -1,5 +1,5 @@
 /* Tuning model description for AArch64 architecture.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -39,8 +39,7 @@ static const struct tune_params cortexa72_tunings =
     4 /* store_pred.  */
   }, /* memmov_cost.  */
   3, /* issue_rate  */
-  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
-   | AARCH64_FUSE_MOVK_MOVK), /* fusible_ops  */
+  (AARCH64_FUSE_BASE | AARCH64_FUSE_MOVK | AARCH64_FUSE_ADRP_ADD), /* fusible_ops  */
   "16",	/* function_align.  */
   "4",	/* jump_align.  */
   "8",	/* loop_align.  */
diff --git a/gcc/config/aarch64/tuning_models/cortexa73.h b/gcc/config/aarch64/tuning_models/cortexa73.h
index fc06639..038fd08 100644
--- a/gcc/config/aarch64/tuning_models/cortexa73.h
+++ b/gcc/config/aarch64/tuning_models/cortexa73.h
@@ -1,5 +1,5 @@
 /* Tuning model description for AArch64 architecture.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -39,8 +39,8 @@ static const struct tune_params cortexa73_tunings =
     4 /* store_pred.  */
   }, /* memmov_cost.  */
   2, /* issue_rate.  */
-  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
-   | AARCH64_FUSE_MOVK_MOVK | AARCH64_FUSE_ADRP_LDR), /* fusible_ops  */
+  (AARCH64_FUSE_BASE | AARCH64_FUSE_MOVK | AARCH64_FUSE_ADRP_ADD
+   | AARCH64_FUSE_ADRP_LDR), /* fusible_ops  */
   "16",	/* function_align.  */
   "4",	/* jump_align.  */
   "8",	/* loop_align.  */
diff --git a/gcc/config/aarch64/tuning_models/cortexx925.h b/gcc/config/aarch64/tuning_models/cortexx925.h
index b509cae..7d0162e 100644
--- a/gcc/config/aarch64/tuning_models/cortexx925.h
+++ b/gcc/config/aarch64/tuning_models/cortexx925.h
@@ -1,5 +1,5 @@
 /* Tuning model description for AArch64 architecture.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -22,24 +22,6 @@
 
 #include "generic.h"
 
-static const struct cpu_addrcost_table cortexx925_addrcost_table =
-{
-    {
-      1, /* hi  */
-      0, /* si  */
-      0, /* di  */
-      1, /* ti  */
-    },
-  0, /* pre_modify  */
-  0, /* post_modify  */
-  2, /* post_modify_ld3_st3  */
-  2, /* post_modify_ld4_st4  */
-  0, /* register_offset  */
-  0, /* register_sextend  */
-  0, /* register_zextend  */
-  0 /* imm_offset  */
-};
-
 static const struct cpu_regmove_cost cortexx925_regmove_cost =
 {
   3, /* GP2GP  */
@@ -209,7 +191,7 @@ static const struct cpu_vector_cost cortexx925_vector_cost =
 static const struct tune_params cortexx925_tunings =
 {
   &cortexa76_extra_costs,
-  &cortexx925_addrcost_table,
+  &generic_armv9_a_addrcost_table,
   &cortexx925_regmove_cost,
   &cortexx925_vector_cost,
   &generic_branch_cost,
@@ -223,7 +205,9 @@ static const struct tune_params cortexx925_tunings =
     2 /* store_pred.  */
   }, /* memmov_cost.  */
   10, /* issue_rate  */
-  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops  */
+  (AARCH64_FUSE_BASE
+   | AARCH64_FUSE_CMP_CSEL
+   | AARCH64_FUSE_CMP_CSET), /* fusible_ops  */
   "32:16",	/* function_align.  */
   "4",		/* jump_align.  */
   "32:16",	/* loop_align.  */
@@ -235,14 +219,13 @@ static const struct tune_params cortexx925_tunings =
   2,	/* min_div_recip_mul_df.  */
   0,	/* max_case_values.  */
   tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
-  (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND
+  (AARCH64_EXTRA_TUNE_BASE
    | AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
-   | AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
    | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
    | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW),	/* tune_flags.  */
-  &generic_prefetch_tune,
+  &generic_armv9a_prefetch_tune,
   AARCH64_LDP_STP_POLICY_ALWAYS,   /* ldp_policy_model.  */
   AARCH64_LDP_STP_POLICY_ALWAYS	   /* stp_policy_model.  */
 };
 
-#endif /* GCC_AARCH64_H_CORTEXX925.  */
-\ No newline at end of file
+#endif /* GCC_AARCH64_H_CORTEXX925.  */
diff --git a/gcc/config/aarch64/tuning_models/emag.h b/gcc/config/aarch64/tuning_models/emag.h
index b6a9c9e..264a281 100644
--- a/gcc/config/aarch64/tuning_models/emag.h
+++ b/gcc/config/aarch64/tuning_models/emag.h
@@ -1,5 +1,5 @@
 /* Tuning model description for AArch64 architecture.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/aarch64/tuning_models/exynosm1.h b/gcc/config/aarch64/tuning_models/exynosm1.h
index 2a21763..71876df 100644
--- a/gcc/config/aarch64/tuning_models/exynosm1.h
+++ b/gcc/config/aarch64/tuning_models/exynosm1.h
@@ -1,5 +1,5 @@
 /* Tuning model description for AArch64 architecture.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -123,7 +123,7 @@ static const struct tune_params exynosm1_tunings =
     4 /* store_pred.  */
   }, /* memmov_cost.  */
   3,	/* issue_rate  */
-  (AARCH64_FUSE_AES_AESMC), /* fusible_ops  */
+  AARCH64_FUSE_BASE, /* fusible_ops  */
   "4",	/* function_align.  */
   "4",	/* jump_align.  */
   "4",	/* loop_align.  */
diff --git a/gcc/config/aarch64/tuning_models/fujitsu_monaka.h b/gcc/config/aarch64/tuning_models/fujitsu_monaka.h
new file mode 100644
index 0000000..5dc4024
--- /dev/null
+++ b/gcc/config/aarch64/tuning_models/fujitsu_monaka.h
@@ -0,0 +1,64 @@
+/* Tuning model description for FUJITSU-MONAKA.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_AARCH64_H_FUJITSU_MONAKA
+#define GCC_AARCH64_H_FUJITSU_MONAKA
+
+#include "generic.h"
+#include "generic_armv9_a.h"
+
+/* Tuning parameters for FUJITSU-MONAKA processor.  It is copied from the
+   generic one except for the vector width for now.  */
+static const struct tune_params fujitsu_monaka_tunings =
+{
+  &cortexa76_extra_costs,
+  &generic_armv9_a_addrcost_table,
+  &generic_armv9_a_regmove_cost,
+  &generic_armv9_a_vector_cost,
+  &generic_branch_cost,
+  &generic_approx_modes,
+  SVE_256, /* sve_width.  */
+  { 4, /* load_int.  */
+    1, /* store_int.  */
+    6, /* load_fp.  */
+    2, /* store_fp.  */
+    6, /* load_pred.  */
+    1 /* store_pred.  */
+  }, /* memmov_cost.  */
+  3, /* issue_rate.  */
+  AARCH64_FUSE_BASE, /* fusible_ops.  */
+  "32:16",	/* function_align.  */
+  "4",		/* jump_align.  */
+  "32:16",	/* loop_align.  */
+  2,	/* int_reassoc_width.  */
+  4,	/* fp_reassoc_width.  */
+  2,	/* fma_reassoc_width.  */
+  2,	/* vec_reassoc_width.  */
+  2,	/* min_div_recip_mul_sf.  */
+  2,	/* min_div_recip_mul_df.  */
+  0,	/* max_case_values.  */
+  tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
+  (AARCH64_EXTRA_TUNE_BASE
+   | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT),	/* tune_flags.  */
+  &generic_prefetch_tune,
+  AARCH64_LDP_STP_POLICY_ALWAYS,   /* ldp_policy_model.  */
+  AARCH64_LDP_STP_POLICY_ALWAYS	   /* stp_policy_model.  */
+};
+
+#endif /* GCC_AARCH64_H_FUJITSU_MONAKA.  */
diff --git a/gcc/config/aarch64/tuning_models/generic.h b/gcc/config/aarch64/tuning_models/generic.h
index 101969b..a822c05 100644
--- a/gcc/config/aarch64/tuning_models/generic.h
+++ b/gcc/config/aarch64/tuning_models/generic.h
@@ -1,5 +1,5 @@
 /* Tuning model description for AArch64 architecture.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
@@ -105,8 +105,8 @@ static const sve_vec_cost generic_sve_vector_cost =
   2, /* fadda_f64_cost  */
   4, /* gather_load_x32_cost  */
   2, /* gather_load_x64_cost  */
-  12, /* gather_load_x32_init_cost  */
-  4, /* gather_load_x64_init_cost  */
+  0, /* gather_load_x32_init_cost  */
+  0, /* gather_load_x64_init_cost  */
   1 /* scatter_store_elt_cost  */
 };
 
@@ -168,7 +168,7 @@ static const struct tune_params generic_tunings =
     4 /* store_pred.  */
   }, /* memmov_cost.  */
   2, /* issue_rate  */
-  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops  */
+  AARCH64_FUSE_BASE, /* fusible_ops  */
   "16:12",	/* function_align.  */
   "4",	/* jump_align.  */
   "8",	/* loop_align.  */
diff --git a/gcc/config/aarch64/tuning_models/generic_armv8_a.h b/gcc/config/aarch64/tuning_models/generic_armv8_a.h
index b5088af..01080ca 100644
--- a/gcc/config/aarch64/tuning_models/generic_armv8_a.h
+++ b/gcc/config/aarch64/tuning_models/generic_armv8_a.h
@@ -1,5 +1,5 @@
 /* Tuning model description for AArch64 architecture.
-   Copyright (C) 2023-2024 Free Software Foundation, Inc.
+   Copyright (C) 2023-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -169,7 +169,7 @@ static const struct tune_params generic_armv8_a_tunings =
     4 /* store_pred.  */
   }, /* memmov_cost.  */
   3, /* issue_rate  */
-  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops  */
+  AARCH64_FUSE_BASE, /* fusible_ops  */
   "32:16",	/* function_align.  */
   "4",		/* jump_align.  */
   "32:16",	/* loop_align.  */
@@ -181,11 +181,10 @@ static const struct tune_params generic_armv8_a_tunings =
   2,	/* min_div_recip_mul_df.  */
   0,	/* max_case_values.  */
   tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
-  (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND
+  (AARCH64_EXTRA_TUNE_BASE
    | AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
-   | AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
    | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT),	/* tune_flags.  */
-  &generic_prefetch_tune,
+  &generic_armv8_a_prefetch_tune,
   AARCH64_LDP_STP_POLICY_ALWAYS,   /* ldp_policy_model.  */
   AARCH64_LDP_STP_POLICY_ALWAYS    /* stp_policy_model.  */
 };
diff --git a/gcc/config/aarch64/tuning_models/generic_armv9_a.h b/gcc/config/aarch64/tuning_models/generic_armv9_a.h
index 999985e..f76a250 100644
--- a/gcc/config/aarch64/tuning_models/generic_armv9_a.h
+++ b/gcc/config/aarch64/tuning_models/generic_armv9_a.h
@@ -1,5 +1,5 @@
 /* Tuning model description for AArch64 architecture.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -207,6 +207,18 @@ static const struct cpu_vector_cost generic_armv9_a_vector_cost =
   &generic_armv9_a_vec_issue_info /* issue_info  */
 };
 
+/* Generic prefetch settings (which disable prefetch).  */
+static const cpu_prefetch_tune generic_armv9a_prefetch_tune =
+{
+  0,			/* num_slots  */
+  -1,			/* l1_cache_size  */
+  64,			/* l1_cache_line_size  */
+  -1,			/* l2_cache_size  */
+  true,			/* prefetch_dynamic_strides */
+  -1,			/* minimum_stride */
+  -1			/* default_opt_level  */
+};
+
 static const struct tune_params generic_armv9_a_tunings =
 {
   &cortexa76_extra_costs,
@@ -224,7 +236,9 @@ static const struct tune_params generic_armv9_a_tunings =
     1 /* store_pred.  */
   }, /* memmov_cost.  */
   3, /* issue_rate  */
-  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops  */
+  (AARCH64_FUSE_BASE
+   | AARCH64_FUSE_CMP_CSEL
+   | AARCH64_FUSE_CMP_CSET), /* fusible_ops  */
   "32:16",	/* function_align.  */
   "4",		/* jump_align.  */
   "32:16",	/* loop_align.  */
@@ -236,10 +250,9 @@ static const struct tune_params generic_armv9_a_tunings =
   2,	/* min_div_recip_mul_df.  */
   0,	/* max_case_values.  */
   tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
-  (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND
-   | AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
+  (AARCH64_EXTRA_TUNE_BASE
    | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT),	/* tune_flags.  */
-  &generic_prefetch_tune,
+  &generic_armv9a_prefetch_tune,
   AARCH64_LDP_STP_POLICY_ALWAYS,   /* ldp_policy_model.  */
   AARCH64_LDP_STP_POLICY_ALWAYS	   /* stp_policy_model.  */
 };
diff --git a/gcc/config/aarch64/tuning_models/neoverse512tvb.h b/gcc/config/aarch64/tuning_models/neoverse512tvb.h
index d2a0b64..964b4ac 100644
--- a/gcc/config/aarch64/tuning_models/neoverse512tvb.h
+++ b/gcc/config/aarch64/tuning_models/neoverse512tvb.h
@@ -1,5 +1,5 @@
 /* Tuning model description for AArch64 architecture.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -143,7 +143,7 @@ static const struct tune_params neoverse512tvb_tunings =
     1 /* store_pred.  */
   }, /* memmov_cost.  */
   3, /* issue_rate  */
-  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops  */
+  AARCH64_FUSE_NEOVERSE_BASE, /* fusible_ops  */
   "32:16",	/* function_align.  */
   "4",		/* jump_align.  */
   "32:16",	/* loop_align.  */
@@ -155,10 +155,11 @@ static const struct tune_params neoverse512tvb_tunings =
   2,	/* min_div_recip_mul_df.  */
   0,	/* max_case_values.  */
   tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
-  (AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
-   | AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
-   | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT),	/* tune_flags.  */
-  &generic_prefetch_tune,
+  (AARCH64_EXTRA_TUNE_BASE
+   | AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
+   | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
+   | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW),	/* tune_flags.  */
+  &generic_armv9a_prefetch_tune,
   AARCH64_LDP_STP_POLICY_ALWAYS,   /* ldp_policy_model.  */
   AARCH64_LDP_STP_POLICY_ALWAYS	   /* stp_policy_model.  */
 };
diff --git a/gcc/config/aarch64/tuning_models/neoversen1.h b/gcc/config/aarch64/tuning_models/neoversen1.h
index c1c7751..9dc37bd 100644
--- a/gcc/config/aarch64/tuning_models/neoversen1.h
+++ b/gcc/config/aarch64/tuning_models/neoversen1.h
@@ -1,5 +1,5 @@
 /* Tuning model description for AArch64 architecture.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -25,7 +25,7 @@
 static const struct tune_params neoversen1_tunings =
 {
   &cortexa76_extra_costs,
-  &generic_addrcost_table,
+  &generic_armv8_a_addrcost_table,
   &generic_regmove_cost,
   &cortexa57_vector_cost,
   &generic_branch_cost,
@@ -39,7 +39,7 @@ static const struct tune_params neoversen1_tunings =
     4 /* store_pred.  */
   }, /* memmov_cost.  */
   3, /* issue_rate  */
-  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops  */
+  AARCH64_FUSE_BASE, /* fusible_ops  */
   "32:16",	/* function_align.  */
   "4",		/* jump_align.  */
   "32:16",	/* loop_align.  */
@@ -51,8 +51,8 @@ static const struct tune_params neoversen1_tunings =
   2,	/* min_div_recip_mul_df.  */
   0,	/* max_case_values.  */
   tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
-  (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND),	/* tune_flags.  */
-  &generic_prefetch_tune,
+  (AARCH64_EXTRA_TUNE_BASE),	/* tune_flags.  */
+  &generic_armv9a_prefetch_tune,
   AARCH64_LDP_STP_POLICY_ALWAYS,   /* ldp_policy_model.  */
   AARCH64_LDP_STP_POLICY_ALWAYS    /* stp_policy_model.  */
 };
diff --git a/gcc/config/aarch64/tuning_models/neoversen2.h b/gcc/config/aarch64/tuning_models/neoversen2.h
index 1a5b669..9fbc059 100644
--- a/gcc/config/aarch64/tuning_models/neoversen2.h
+++ b/gcc/config/aarch64/tuning_models/neoversen2.h
@@ -1,5 +1,5 @@
 /* Tuning model description for AArch64 architecture.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -22,24 +22,6 @@
 
 #include "generic.h"
 
-static const struct cpu_addrcost_table neoversen2_addrcost_table =
-{
-    {
-      1, /* hi  */
-      0, /* si  */
-      0, /* di  */
-      1, /* ti  */
-    },
-  0, /* pre_modify  */
-  0, /* post_modify  */
-  2, /* post_modify_ld3_st3  */
-  2, /* post_modify_ld4_st4  */
-  0, /* register_offset  */
-  0, /* register_sextend  */
-  0, /* register_zextend  */
-  0 /* imm_offset  */
-};
-
 static const struct cpu_regmove_cost neoversen2_regmove_cost =
 {
   1, /* GP2GP  */
@@ -209,7 +191,7 @@ static const struct cpu_vector_cost neoversen2_vector_cost =
 static const struct tune_params neoversen2_tunings =
 {
   &cortexa76_extra_costs,
-  &neoversen2_addrcost_table,
+  &generic_armv9_a_addrcost_table,
   &neoversen2_regmove_cost,
   &neoversen2_vector_cost,
   &generic_branch_cost,
@@ -223,7 +205,7 @@ static const struct tune_params neoversen2_tunings =
     1 /* store_pred.  */
   }, /* memmov_cost.  */
   5, /* issue_rate  */
-  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops  */
+  AARCH64_FUSE_NEOVERSE_BASE, /* fusible_ops  */
   "32:16",	/* function_align.  */
   "4",		/* jump_align.  */
   "32:16",	/* loop_align.  */
@@ -235,14 +217,13 @@ static const struct tune_params neoversen2_tunings =
   2,	/* min_div_recip_mul_df.  */
   0,	/* max_case_values.  */
   tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
-  (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND
+  (AARCH64_EXTRA_TUNE_BASE
    | AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
-   | AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
    | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
    | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW),	/* tune_flags.  */
-  &generic_prefetch_tune,
+  &generic_armv9a_prefetch_tune,
   AARCH64_LDP_STP_POLICY_ALWAYS,   /* ldp_policy_model.  */
   AARCH64_LDP_STP_POLICY_ALWAYS	   /* stp_policy_model.  */
 };
 
-#endif /* GCC_AARCH64_H_NEOVERSEN2.  */
-\ No newline at end of file
+#endif /* GCC_AARCH64_H_NEOVERSEN2.  */
diff --git a/gcc/config/aarch64/tuning_models/neoversen3.h b/gcc/config/aarch64/tuning_models/neoversen3.h
index 3e2b84c..78177e7 100644
--- a/gcc/config/aarch64/tuning_models/neoversen3.h
+++ b/gcc/config/aarch64/tuning_models/neoversen3.h
@@ -1,5 +1,5 @@
 /* Tuning model description for AArch64 architecture.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -22,24 +22,6 @@
 
 #include "generic.h"
 
-static const struct cpu_addrcost_table neoversen3_addrcost_table =
-{
-    {
-      1, /* hi  */
-      0, /* si  */
-      0, /* di  */
-      1, /* ti  */
-    },
-  0, /* pre_modify  */
-  0, /* post_modify  */
-  2, /* post_modify_ld3_st3  */
-  2, /* post_modify_ld4_st4  */
-  0, /* register_offset  */
-  0, /* register_sextend  */
-  0, /* register_zextend  */
-  0 /* imm_offset  */
-};
-
 static const struct cpu_regmove_cost neoversen3_regmove_cost =
 {
   3, /* GP2GP  */
@@ -209,7 +191,7 @@ static const struct cpu_vector_cost neoversen3_vector_cost =
 static const struct tune_params neoversen3_tunings =
 {
   &cortexa76_extra_costs,
-  &neoversen3_addrcost_table,
+  &generic_armv9_a_addrcost_table,
   &neoversen3_regmove_cost,
   &neoversen3_vector_cost,
   &generic_branch_cost,
@@ -223,7 +205,7 @@ static const struct tune_params neoversen3_tunings =
     2 /* store_pred.  */
   }, /* memmov_cost.  */
   5, /* issue_rate  */
-  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops  */
+  AARCH64_FUSE_NEOVERSE_BASE, /* fusible_ops  */
   "32:16",	/* function_align.  */
   "4",		/* jump_align.  */
   "32:16",	/* loop_align.  */
@@ -235,13 +217,12 @@ static const struct tune_params neoversen3_tunings =
   2,	/* min_div_recip_mul_df.  */
   0,	/* max_case_values.  */
   tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
-  (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND
+  (AARCH64_EXTRA_TUNE_BASE
    | AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
-   | AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
    | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT),	/* tune_flags.  */
-  &generic_prefetch_tune,
+  &generic_armv9a_prefetch_tune,
   AARCH64_LDP_STP_POLICY_ALWAYS,   /* ldp_policy_model.  */
   AARCH64_LDP_STP_POLICY_ALWAYS	   /* stp_policy_model.  */
 };
 
-#endif /* GCC_AARCH64_H_NEOVERSEN3.  */
-\ No newline at end of file
+#endif /* GCC_AARCH64_H_NEOVERSEN3.  */
diff --git a/gcc/config/aarch64/tuning_models/neoversev1.h b/gcc/config/aarch64/tuning_models/neoversev1.h
index 705ed02..f1ec7dc 100644
--- a/gcc/config/aarch64/tuning_models/neoversev1.h
+++ b/gcc/config/aarch64/tuning_models/neoversev1.h
@@ -1,5 +1,5 @@
 /* Tuning model description for AArch64 architecture.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -214,7 +214,7 @@ static const struct tune_params neoversev1_tunings =
     1 /* store_pred.  */
   }, /* memmov_cost.  */
   3, /* issue_rate  */
-  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops  */
+  AARCH64_FUSE_NEOVERSE_BASE, /* fusible_ops  */
   "32:16",	/* function_align.  */
   "4",		/* jump_align.  */
   "32:16",	/* loop_align.  */
@@ -226,12 +226,11 @@ static const struct tune_params neoversev1_tunings =
   2,	/* min_div_recip_mul_df.  */
   0,	/* max_case_values.  */
   tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
-  (AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
-   | AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
+  (AARCH64_EXTRA_TUNE_BASE
+   | AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
    | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
-   | AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND
    | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW),	/* tune_flags.  */
-  &generic_prefetch_tune,
+  &generic_armv9a_prefetch_tune,
   AARCH64_LDP_STP_POLICY_ALWAYS,   /* ldp_policy_model.  */
   AARCH64_LDP_STP_POLICY_ALWAYS    /* stp_policy_model.  */
 };
diff --git a/gcc/config/aarch64/tuning_models/neoversev2.h b/gcc/config/aarch64/tuning_models/neoversev2.h
index 52aad7d..b000fb4 100644
--- a/gcc/config/aarch64/tuning_models/neoversev2.h
+++ b/gcc/config/aarch64/tuning_models/neoversev2.h
@@ -1,5 +1,5 @@
 /* Tuning model description for AArch64 architecture.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -22,24 +22,6 @@
 
 #include "generic.h"
 
-static const struct cpu_addrcost_table neoversev2_addrcost_table =
-{
-    {
-      1, /* hi  */
-      0, /* si  */
-      0, /* di  */
-      1, /* ti  */
-    },
-  0, /* pre_modify  */
-  0, /* post_modify  */
-  2, /* post_modify_ld3_st3  */
-  2, /* post_modify_ld4_st4  */
-  0, /* register_offset  */
-  0, /* register_sextend  */
-  0, /* register_zextend  */
-  0 /* imm_offset  */
-};
-
 static const struct cpu_regmove_cost neoversev2_regmove_cost =
 {
   1, /* GP2GP  */
@@ -209,7 +191,7 @@ static const struct cpu_vector_cost neoversev2_vector_cost =
 static const struct tune_params neoversev2_tunings =
 {
   &cortexa76_extra_costs,
-  &neoversev2_addrcost_table,
+  &generic_armv9_a_addrcost_table,
   &neoversev2_regmove_cost,
   &neoversev2_vector_cost,
   &generic_branch_cost,
@@ -223,10 +205,7 @@ static const struct tune_params neoversev2_tunings =
     2 /* store_pred.  */
   }, /* memmov_cost.  */
   5, /* issue_rate  */
-  (AARCH64_FUSE_AES_AESMC
-   | AARCH64_FUSE_CMP_BRANCH
-   | AARCH64_FUSE_CMP_CSEL
-   | AARCH64_FUSE_CMP_CSET), /* fusible_ops  */
+  AARCH64_FUSE_NEOVERSE_BASE, /* fusible_ops  */
   "32:16",	/* function_align.  */
   "4",		/* jump_align.  */
   "32:16",	/* loop_align.  */
@@ -238,13 +217,11 @@ static const struct tune_params neoversev2_tunings =
   2,	/* min_div_recip_mul_df.  */
   0,	/* max_case_values.  */
   tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
-  (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND
+  (AARCH64_EXTRA_TUNE_BASE
    | AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
-   | AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
    | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
-   | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW
-   | AARCH64_EXTRA_TUNE_FULLY_PIPELINED_FMA),	/* tune_flags.  */
-  &generic_prefetch_tune,
+   | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW),	/* tune_flags.  */
+  &generic_armv9a_prefetch_tune,
   AARCH64_LDP_STP_POLICY_ALWAYS,   /* ldp_policy_model.  */
   AARCH64_LDP_STP_POLICY_ALWAYS	   /* stp_policy_model.  */
 };
diff --git a/gcc/config/aarch64/tuning_models/neoversev3.h b/gcc/config/aarch64/tuning_models/neoversev3.h
index c91e8c8..ad3cd22 100644
--- a/gcc/config/aarch64/tuning_models/neoversev3.h
+++ b/gcc/config/aarch64/tuning_models/neoversev3.h
@@ -1,5 +1,5 @@
 /* Tuning model description for AArch64 architecture.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -22,24 +22,6 @@
 
 #include "generic.h"
 
-static const struct cpu_addrcost_table neoversev3_addrcost_table =
-{
-    {
-      1, /* hi  */
-      0, /* si  */
-      0, /* di  */
-      1, /* ti  */
-    },
-  0, /* pre_modify  */
-  0, /* post_modify  */
-  2, /* post_modify_ld3_st3  */
-  2, /* post_modify_ld4_st4  */
-  0, /* register_offset  */
-  0, /* register_sextend  */
-  0, /* register_zextend  */
-  0 /* imm_offset  */
-};
-
 static const struct cpu_regmove_cost neoversev3_regmove_cost =
 {
   3, /* GP2GP  */
@@ -209,7 +191,7 @@ static const struct cpu_vector_cost neoversev3_vector_cost =
 static const struct tune_params neoversev3_tunings =
 {
   &cortexa76_extra_costs,
-  &neoversev3_addrcost_table,
+  &generic_armv9_a_addrcost_table,
   &neoversev3_regmove_cost,
   &neoversev3_vector_cost,
   &generic_branch_cost,
@@ -223,7 +205,7 @@ static const struct tune_params neoversev3_tunings =
     2 /* store_pred.  */
   }, /* memmov_cost.  */
   10, /* issue_rate  */
-  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops  */
+  AARCH64_FUSE_NEOVERSE_BASE, /* fusible_ops  */
   "32:16",	/* function_align.  */
   "4",		/* jump_align.  */
   "32:16",	/* loop_align.  */
@@ -235,14 +217,13 @@ static const struct tune_params neoversev3_tunings =
   2,	/* min_div_recip_mul_df.  */
   0,	/* max_case_values.  */
   tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
-  (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND
+  (AARCH64_EXTRA_TUNE_BASE
    | AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
-   | AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
    | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
    | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW),	/* tune_flags.  */
-  &generic_prefetch_tune,
+  &generic_armv9a_prefetch_tune,
   AARCH64_LDP_STP_POLICY_ALWAYS,   /* ldp_policy_model.  */
   AARCH64_LDP_STP_POLICY_ALWAYS	   /* stp_policy_model.  */
 };
 
-#endif /* GCC_AARCH64_H_NEOVERSEV3.  */
-\ No newline at end of file
+#endif /* GCC_AARCH64_H_NEOVERSEV3.  */
diff --git a/gcc/config/aarch64/tuning_models/neoversev3ae.h b/gcc/config/aarch64/tuning_models/neoversev3ae.h
index 61e4393..a0adef0 100644
--- a/gcc/config/aarch64/tuning_models/neoversev3ae.h
+++ b/gcc/config/aarch64/tuning_models/neoversev3ae.h
@@ -1,5 +1,5 @@
 /* Tuning model description for AArch64 architecture.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -22,24 +22,6 @@
 
 #include "generic.h"
 
-static const struct cpu_addrcost_table neoversev3ae_addrcost_table =
-{
-    {
-      1, /* hi  */
-      0, /* si  */
-      0, /* di  */
-      1, /* ti  */
-    },
-  0, /* pre_modify  */
-  0, /* post_modify  */
-  2, /* post_modify_ld3_st3  */
-  2, /* post_modify_ld4_st4  */
-  0, /* register_offset  */
-  0, /* register_sextend  */
-  0, /* register_zextend  */
-  0 /* imm_offset  */
-};
-
 static const struct cpu_regmove_cost neoversev3ae_regmove_cost =
 {
   3, /* GP2GP  */
@@ -209,7 +191,7 @@ static const struct cpu_vector_cost neoversev3ae_vector_cost =
 static const struct tune_params neoversev3ae_tunings =
 {
   &cortexa76_extra_costs,
-  &neoversev3ae_addrcost_table,
+  &generic_armv9_a_addrcost_table,
   &neoversev3ae_regmove_cost,
   &neoversev3ae_vector_cost,
   &generic_branch_cost,
@@ -223,7 +205,7 @@ static const struct tune_params neoversev3ae_tunings =
     2 /* store_pred.  */
   }, /* memmov_cost.  */
   10, /* issue_rate  */
-  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_CMP_BRANCH), /* fusible_ops  */
+  AARCH64_FUSE_NEOVERSE_BASE, /* fusible_ops  */
   "32:16",	/* function_align.  */
   "4",		/* jump_align.  */
   "32:16",	/* loop_align.  */
@@ -235,14 +217,13 @@ static const struct tune_params neoversev3ae_tunings =
   2,	/* min_div_recip_mul_df.  */
   0,	/* max_case_values.  */
   tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
-  (AARCH64_EXTRA_TUNE_CHEAP_SHIFT_EXTEND
+  (AARCH64_EXTRA_TUNE_BASE
    | AARCH64_EXTRA_TUNE_CSE_SVE_VL_CONSTANTS
-   | AARCH64_EXTRA_TUNE_USE_NEW_VECTOR_COSTS
    | AARCH64_EXTRA_TUNE_MATCHED_VECTOR_THROUGHPUT
    | AARCH64_EXTRA_TUNE_AVOID_PRED_RMW),	/* tune_flags.  */
-  &generic_prefetch_tune,
+  &generic_armv9a_prefetch_tune,
   AARCH64_LDP_STP_POLICY_ALWAYS,   /* ldp_policy_model.  */
   AARCH64_LDP_STP_POLICY_ALWAYS	   /* stp_policy_model.  */
 };
 
-#endif /* GCC_AARCH64_H_NEOVERSEV3AE.  */
-\ No newline at end of file
+#endif /* GCC_AARCH64_H_NEOVERSEV3AE.  */
diff --git a/gcc/config/aarch64/tuning_models/qdf24xx.h b/gcc/config/aarch64/tuning_models/qdf24xx.h
index 8e039e0..583d30a 100644
--- a/gcc/config/aarch64/tuning_models/qdf24xx.h
+++ b/gcc/config/aarch64/tuning_models/qdf24xx.h
@@ -1,5 +1,5 @@
 /* Tuning model description for AArch64 architecture.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -115,8 +115,7 @@ static const struct tune_params qdf24xx_tunings =
     4 /* store_pred.  */
   }, /* memmov_cost.  */
   4, /* issue_rate  */
-  (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
-   | AARCH64_FUSE_MOVK_MOVK), /* fuseable_ops  */
+  (AARCH64_FUSE_MOVK | AARCH64_FUSE_ADRP_ADD), /* fuseable_ops  */
   "16",	/* function_align.  */
   "8",	/* jump_align.  */
   "16",	/* loop_align.  */
@@ -128,7 +127,7 @@ static const struct tune_params qdf24xx_tunings =
   2,	/* min_div_recip_mul_df.  */
   0,	/* max_case_values.  */
   tune_params::AUTOPREFETCHER_WEAK,	/* autoprefetcher_model.  */
-  AARCH64_EXTRA_TUNE_RENAME_LOAD_REGS, /* tune_flags.  */
+  (AARCH64_EXTRA_TUNE_NONE), /* tune_flags.  */
   &qdf24xx_prefetch_tune,
   AARCH64_LDP_STP_POLICY_ALWAYS,   /* ldp_policy_model.  */
   AARCH64_LDP_STP_POLICY_ALWAYS    /* stp_policy_model.  */
diff --git a/gcc/config/aarch64/tuning_models/saphira.h b/gcc/config/aarch64/tuning_models/saphira.h
index 0f7d2da..684f395 100644
--- a/gcc/config/aarch64/tuning_models/saphira.h
+++ b/gcc/config/aarch64/tuning_models/saphira.h
@@ -1,5 +1,5 @@
 /* Tuning model description for AArch64 architecture.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -41,8 +41,7 @@ static const struct tune_params saphira_tunings =
     4 /* store_pred.  */
   }, /* memmov_cost.  */
   4, /* issue_rate  */
-  (AARCH64_FUSE_MOV_MOVK | AARCH64_FUSE_ADRP_ADD
-   | AARCH64_FUSE_MOVK_MOVK), /* fuseable_ops  */
+  (AARCH64_FUSE_MOVK | AARCH64_FUSE_ADRP_ADD), /* fuseable_ops  */
   "16",	/* function_align.  */
   "8",	/* jump_align.  */
   "16",	/* loop_align.  */
diff --git a/gcc/config/aarch64/tuning_models/thunderx.h b/gcc/config/aarch64/tuning_models/thunderx.h
index a2f9e4a..d79c139 100644
--- a/gcc/config/aarch64/tuning_models/thunderx.h
+++ b/gcc/config/aarch64/tuning_models/thunderx.h
@@ -1,5 +1,5 @@
 /* Tuning model description for AArch64 architecture.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/aarch64/tuning_models/thunderx2t99.h b/gcc/config/aarch64/tuning_models/thunderx2t99.h
index 4ece60c..513c615 100644
--- a/gcc/config/aarch64/tuning_models/thunderx2t99.h
+++ b/gcc/config/aarch64/tuning_models/thunderx2t99.h
@@ -1,5 +1,5 @@
 /* Tuning model description for AArch64 architecture.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -115,7 +115,7 @@ static const struct tune_params thunderx2t99_tunings =
     4 /* store_pred.  */
   }, /* memmov_cost.  */
   4, /* issue_rate.  */
-  (AARCH64_FUSE_ALU_BRANCH | AARCH64_FUSE_AES_AESMC
+  (AARCH64_FUSE_BASE | AARCH64_FUSE_ALU_BRANCH
    | AARCH64_FUSE_ALU_CBZ), /* fusible_ops  */
   "16",	/* function_align.  */
   "8",	/* jump_align.  */
diff --git a/gcc/config/aarch64/tuning_models/thunderx3t110.h b/gcc/config/aarch64/tuning_models/thunderx3t110.h
index a29c7b6..a2547b8 100644
--- a/gcc/config/aarch64/tuning_models/thunderx3t110.h
+++ b/gcc/config/aarch64/tuning_models/thunderx3t110.h
@@ -1,5 +1,5 @@
 /* Tuning model description for AArch64 architecture.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -114,7 +114,7 @@ static const struct tune_params thunderx3t110_tunings =
     4 /* store_pred.  */
   }, /* memmov_cost.  */
   6, /* issue_rate.  */
-  (AARCH64_FUSE_ALU_BRANCH | AARCH64_FUSE_AES_AESMC
+  (AARCH64_FUSE_BASE | AARCH64_FUSE_ALU_BRANCH
    | AARCH64_FUSE_ALU_CBZ), /* fusible_ops  */
   "16",	/* function_align.  */
   "8",	/* jump_align.  */
diff --git a/gcc/config/aarch64/tuning_models/thunderxt88.h b/gcc/config/aarch64/tuning_models/thunderxt88.h
index 4e344bc..6be5c52 100644
--- a/gcc/config/aarch64/tuning_models/thunderxt88.h
+++ b/gcc/config/aarch64/tuning_models/thunderxt88.h
@@ -1,5 +1,5 @@
 /* Tuning model description for AArch64 architecture.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/aarch64/tuning_models/tsv110.h b/gcc/config/aarch64/tuning_models/tsv110.h
index f838158..458286e 100644
--- a/gcc/config/aarch64/tuning_models/tsv110.h
+++ b/gcc/config/aarch64/tuning_models/tsv110.h
@@ -1,5 +1,5 @@
 /* Tuning model description for AArch64 architecture.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -115,7 +115,7 @@ static const struct tune_params tsv110_tunings =
     4 /* store_pred.  */
   }, /* memmov_cost.  */
   4,    /* issue_rate  */
-  (AARCH64_FUSE_AES_AESMC | AARCH64_FUSE_ALU_BRANCH
+  (AARCH64_FUSE_BASE | AARCH64_FUSE_ALU_BRANCH
    | AARCH64_FUSE_ALU_CBZ), /* fusible_ops  */
   "16", /* function_align.  */
   "4",  /* jump_align.  */
diff --git a/gcc/config/aarch64/tuning_models/xgene1.h b/gcc/config/aarch64/tuning_models/xgene1.h
index 432793e..b4f01ee 100644
--- a/gcc/config/aarch64/tuning_models/xgene1.h
+++ b/gcc/config/aarch64/tuning_models/xgene1.h
@@ -1,5 +1,5 @@
 /* Tuning model description for AArch64 architecture.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/alpha/alpha-modes.def b/gcc/config/alpha/alpha-modes.def
index 82b900c..9392979 100644
--- a/gcc/config/alpha/alpha-modes.def
+++ b/gcc/config/alpha/alpha-modes.def
@@ -1,5 +1,5 @@
 /* Alpha extra machine modes. 
-   Copyright (C) 2003-2024 Free Software Foundation, Inc.
+   Copyright (C) 2003-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -17,6 +17,10 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
+/* 256-bit integer mode used by "reload_out<mode>_safe_bwa" secondary
+   reload patterns to obtain 4 scratch registers.  */
+INT_MODE (OI, 32);
+
 /* 128-bit floating point.  This gets reset in alpha_option_override
    if VAX float format is in use.  */
 FLOAT_MODE (TF, 16, ieee_quad_format);
diff --git a/gcc/config/alpha/alpha-passes.def b/gcc/config/alpha/alpha-passes.def
index 1b94391..ba83a76 100644
--- a/gcc/config/alpha/alpha-passes.def
+++ b/gcc/config/alpha/alpha-passes.def
@@ -1,5 +1,5 @@
 /* Description of target passes for DEC Alpha
-   Copyright (C) 2016-2024 Free Software Foundation, Inc.
+   Copyright (C) 2016-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/alpha/alpha-protos.h b/gcc/config/alpha/alpha-protos.h
index 15a885e..b0c8936 100644
--- a/gcc/config/alpha/alpha-protos.h
+++ b/gcc/config/alpha/alpha-protos.h
@@ -1,5 +1,5 @@
 /* Prototypes for alpha.cc functions used in the md file & elsewhere.
-   Copyright (C) 1999-2024 Free Software Foundation, Inc.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -43,6 +43,7 @@ extern enum reg_class alpha_preferred_reload_class (rtx, enum reg_class);
 extern void alpha_set_memflags (rtx, rtx);
 extern bool alpha_split_const_mov (machine_mode, rtx *);
 extern bool alpha_expand_mov (machine_mode, rtx *);
+extern bool alpha_expand_mov_safe_bwa (machine_mode, rtx *);
 extern bool alpha_expand_mov_nobwx (machine_mode, rtx *);
 extern void alpha_expand_movmisalign (machine_mode, rtx *);
 extern void alpha_emit_floatuns (rtx[]);
@@ -53,12 +54,16 @@ extern void alpha_expand_unaligned_load (rtx, rtx, HOST_WIDE_INT,
 					 HOST_WIDE_INT, int);
 extern void alpha_expand_unaligned_store (rtx, rtx, HOST_WIDE_INT,
 					  HOST_WIDE_INT);
+extern void alpha_expand_unaligned_store_safe_partial (rtx, rtx, HOST_WIDE_INT,
+						       HOST_WIDE_INT,
+						       HOST_WIDE_INT);
 extern int alpha_expand_block_move (rtx []);
 extern int alpha_expand_block_clear (rtx []);
 extern rtx alpha_expand_zap_mask (HOST_WIDE_INT);
 extern void alpha_expand_builtin_vector_binop (rtx (*)(rtx, rtx, rtx),
 					       machine_mode,
 					       rtx, rtx, rtx);
+extern rtx alpha_emit_unlikely_jump (rtx, rtx);
 extern void alpha_expand_builtin_establish_vms_condition_handler (rtx, rtx);
 extern void alpha_expand_builtin_revert_vms_condition_handler (rtx);
 
diff --git a/gcc/config/alpha/alpha.cc b/gcc/config/alpha/alpha.cc
index 74631a4..5082c1c 100644
--- a/gcc/config/alpha/alpha.cc
+++ b/gcc/config/alpha/alpha.cc
@@ -1,5 +1,5 @@
 /* Subroutines used for code generation on the DEC Alpha.
-   Copyright (C) 1992-2024 Free Software Foundation, Inc.
+   Copyright (C) 1992-2025 Free Software Foundation, Inc.
    Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
 
 This file is part of GCC.
@@ -460,8 +460,9 @@ alpha_option_override (void)
 	    line_size = cpu_table[i].line_size;
 	    l1_size = cpu_table[i].l1_size;
 	    l2_size = cpu_table[i].l2_size;
-	    target_flags &= ~ (MASK_BWX | MASK_MAX | MASK_FIX | MASK_CIX);
-	    target_flags |= cpu_table[i].flags;
+	    target_flags &= ~((MASK_BWX | MASK_MAX | MASK_FIX | MASK_CIX)
+			      & ~target_flags_explicit);
+	    target_flags |= cpu_table[i].flags & ~target_flags_explicit;
 	    break;
 	  }
       if (i == ct_size)
@@ -1035,8 +1036,7 @@ alpha_legitimize_address_1 (rtx x, rtx scratch, machine_mode mode)
 	    RTL_CONST_CALL_P (insn) = 1;
 	    use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
 
-	    insn = get_insns ();
-	    end_sequence ();
+	    insn = end_sequence ();
 
 	    emit_libcall_block (insn, dest, r0, x);
 	    return dest;
@@ -1058,8 +1058,7 @@ alpha_legitimize_address_1 (rtx x, rtx scratch, machine_mode mode)
 	    RTL_CONST_CALL_P (insn) = 1;
 	    use_reg (&CALL_INSN_FUNCTION_USAGE (insn), r16);
 
-	    insn = get_insns ();
-	    end_sequence ();
+	    insn = end_sequence ();
 
 	    eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
 				  UNSPEC_TLSLDM_CALL);
@@ -1660,8 +1659,10 @@ alpha_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
 	      if (!aligned_memory_operand (x, mode))
 		sri->icode = direct_optab_handler (reload_in_optab, mode);
 	    }
-	  else
+	  else if (aligned_memory_operand (x, mode) || !TARGET_SAFE_BWA)
 	    sri->icode = direct_optab_handler (reload_out_optab, mode);
+	  else
+	    sri->icode = code_for_reload_out_safe_bwa (mode);
 	  return NO_REGS;
 	}
     }
@@ -2390,6 +2391,70 @@ alpha_expand_mov_nobwx (machine_mode mode, rtx *operands)
   return false;
 }
 
+/* Expand a multi-thread and async-signal safe QImode or HImode
+   move instruction; return true if all work is done.  */
+
+bool
+alpha_expand_mov_safe_bwa (machine_mode mode, rtx *operands)
+{
+  /* If the output is not a register, the input must be.  */
+  if (MEM_P (operands[0]))
+    operands[1] = force_reg (mode, operands[1]);
+
+  /* If it's a memory load, the sequence is the usual non-BWX one.  */
+  if (any_memory_operand (operands[1], mode))
+    return alpha_expand_mov_nobwx (mode, operands);
+
+  /* Handle memory store cases, unaligned and aligned.  The only case
+     where we can be called during reload is for aligned loads; all
+     other cases require temporaries.  */
+  if (any_memory_operand (operands[0], mode))
+    {
+      if (aligned_memory_operand (operands[0], mode))
+	{
+	  rtx label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
+	  emit_label (XEXP (label, 0));
+
+	  rtx aligned_mem, bitnum;
+	  rtx status = gen_reg_rtx (SImode);
+	  rtx temp = gen_reg_rtx (SImode);
+	  get_aligned_mem (operands[0], &aligned_mem, &bitnum);
+	  emit_insn (gen_aligned_store_safe_bwa (aligned_mem, operands[1],
+						 bitnum, status, temp));
+
+	  rtx cond = gen_rtx_EQ (DImode,
+				 gen_rtx_SUBREG (DImode, status, 0),
+				 const0_rtx);
+	  alpha_emit_unlikely_jump (cond, label);
+	}
+      else
+	{
+	  rtx addr = gen_reg_rtx (DImode);
+	  emit_insn (gen_rtx_SET (addr, get_unaligned_address (operands[0])));
+
+	  rtx aligned_addr = gen_reg_rtx (DImode);
+	  emit_insn (gen_rtx_SET (aligned_addr,
+				  gen_rtx_AND (DImode, addr, GEN_INT (-8))));
+
+	  rtx label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
+	  emit_label (XEXP (label, 0));
+
+	  rtx status = gen_reg_rtx (DImode);
+	  rtx temp = gen_reg_rtx (DImode);
+	  rtx seq = gen_unaligned_store_safe_bwa (mode, addr, operands[1],
+						  aligned_addr, status, temp);
+	  alpha_set_memflags (seq, operands[0]);
+	  emit_insn (seq);
+
+	  rtx cond = gen_rtx_EQ (DImode, status, const0_rtx);
+	  alpha_emit_unlikely_jump (cond, label);
+	}
+      return true;
+    }
+
+  return false;
+}
+
 /* Implement the movmisalign patterns.  One of the operands is a memory
    that is not naturally aligned.  Emit instructions to load it.  */
 
@@ -2414,7 +2479,11 @@ alpha_expand_movmisalign (machine_mode mode, rtx *operands)
     {
       if (!reg_or_0_operand (operands[1], mode))
 	operands[1] = force_reg (mode, operands[1]);
-      alpha_expand_unaligned_store (operands[0], operands[1], 8, 0);
+      if (TARGET_SAFE_PARTIAL)
+	alpha_expand_unaligned_store_safe_partial (operands[0], operands[1],
+						   8, 0, BITS_PER_UNIT);
+      else
+	alpha_expand_unaligned_store (operands[0], operands[1], 8, 0);
     }
   else
     gcc_unreachable ();
@@ -3143,8 +3212,7 @@ alpha_emit_xfloating_libcall (rtx func, rtx target, rtx operands[],
   CALL_INSN_FUNCTION_USAGE (tmp) = usage;
   RTL_CONST_CALL_P (tmp) = 1;
 
-  tmp = get_insns ();
-  end_sequence ();
+  tmp = end_sequence ();
 
   emit_libcall_block (tmp, target, reg, equiv);
 }
@@ -3269,7 +3337,7 @@ alpha_emit_xfloating_cvt (enum rtx_code orig_code, rtx operands[])
      set (OP[1] OP[3])
    is valid.  Naturally, output operand ordering is little-endian.
    This is used by *movtf_internal and *movti_internal.  */
-  
+
 void
 alpha_split_tmode_pair (rtx operands[4], machine_mode mode,
 			bool fixup_overlap)
@@ -3606,6 +3674,310 @@ alpha_expand_unaligned_store (rtx dst, rtx src,
   emit_move_insn (meml, dstl);
 }
 
+/* Store data SRC of size SIZE using unaligned methods to location
+   referred by base DST plus offset OFS and of alignment ALIGN.  This is
+   a multi-thread and async-signal safe implementation for all sizes from
+   8 down to 1.
+
+   For BWX targets it is straightforward, we just write data piecemeal,
+   taking any advantage of the alignment known and observing that we
+   shouldn't have been called for alignments of 32 or above in the first
+   place (though adding support for that would be easy).
+
+   For non-BWX targets we need to load data from memory, mask it such as
+   to keep any part outside the area written, insert data to be stored,
+   and write the result back atomically.  For sizes that are not a power
+   of 2 there are no byte mask or insert machine instructions available
+   so the mask required has to be built by hand, however ZAP and ZAPNOT
+   instructions can then be used to apply the mask.  Since LL/SC loops
+   are used, the high and low parts have to be disentangled from each
+   other and handled sequentially except for size 1 where there is only
+   the low part to be written.  */
+
+void
+alpha_expand_unaligned_store_safe_partial (rtx dst, rtx src,
+					   HOST_WIDE_INT size,
+					   HOST_WIDE_INT ofs,
+					   HOST_WIDE_INT align)
+{
+  if (TARGET_BWX)
+    {
+      machine_mode mode = align >= 2 * BITS_PER_UNIT ? HImode : QImode;
+      HOST_WIDE_INT step = mode == HImode ? 2 : 1;
+
+      while (1)
+	{
+	  rtx dstl = src == const0_rtx ? const0_rtx : gen_lowpart (mode, src);
+	  rtx meml = adjust_address (dst, mode, ofs);
+	  emit_move_insn (meml, dstl);
+
+	  ofs += step;
+	  size -= step;
+	  if (size == 0)
+	    return;
+
+	  if (size < step)
+	    {
+	      mode = QImode;
+	      step = 1;
+	    }
+
+	  if (src != const0_rtx)
+	    src = expand_simple_binop (DImode, LSHIFTRT, src,
+				       GEN_INT (step * BITS_PER_UNIT),
+				       NULL, 1, OPTAB_WIDEN);
+	}
+    }
+
+  rtx dsta = XEXP (dst, 0);
+  if (GET_CODE (dsta) == LO_SUM)
+    dsta = force_reg (Pmode, dsta);
+
+  rtx addr = copy_addr_to_reg (plus_constant (Pmode, dsta, ofs));
+
+  rtx byte_mask = NULL_RTX;
+  switch (size)
+    {
+    case 3:
+    case 5:
+    case 6:
+    case 7:
+      /* If size is not a power of 2 we need to build the byte mask from
+	 size by hand.  This is SIZE consecutive bits starting from bit 0.  */
+      byte_mask = force_reg (DImode, GEN_INT (~(HOST_WIDE_INT_M1U << size)));
+
+      /* Unlike with machine INSxx and MSKxx operations there is no
+	 implicit mask applied to addr with corresponding operations
+	 made by hand, so extract the byte index now.  */
+      emit_insn (gen_rtx_SET (addr,
+			      gen_rtx_AND (DImode, addr, GEN_INT (~-8))));
+    }
+
+  /* Must handle high before low for degenerate case of aligned.  */
+  if (size != 1)
+    {
+      rtx addrh = gen_reg_rtx (DImode);
+      rtx aligned_addrh = gen_reg_rtx (DImode);
+      emit_insn (gen_rtx_SET (addrh,
+			      plus_constant (DImode, dsta, ofs + size - 1)));
+      emit_insn (gen_rtx_SET (aligned_addrh,
+			      gen_rtx_AND (DImode, addrh, GEN_INT (-8))));
+
+      /* AND addresses cannot be in any alias set, since they may implicitly
+	 alias surrounding code.  Ideally we'd have some alias set that
+	 covered all types except those with alignment 8 or higher.  */
+      rtx memh = change_address (dst, DImode, aligned_addrh);
+      set_mem_alias_set (memh, 0);
+
+      rtx insh = gen_reg_rtx (DImode);
+      rtx maskh = NULL_RTX;
+      switch (size)
+	{
+	case 1:
+	case 2:
+	case 4:
+	case 8:
+	  if (src != CONST0_RTX (GET_MODE (src)))
+	    emit_insn (gen_insxh (insh, gen_lowpart (DImode, src),
+				  GEN_INT (size * 8), addr));
+	  break;
+	case 3:
+	case 5:
+	case 6:
+	case 7:
+	  {
+	    /* For the high part we shift the byte mask right by 8 minus
+	       the byte index in addr, so we need an extra calculation.  */
+	    rtx shamt = gen_reg_rtx (DImode);
+	    emit_insn (gen_rtx_SET (shamt,
+				    gen_rtx_MINUS (DImode,
+						   force_reg (DImode,
+							      GEN_INT (8)),
+						   addr)));
+
+	    maskh = gen_reg_rtx (DImode);
+	    rtx shift = gen_rtx_LSHIFTRT (DImode, byte_mask, shamt);
+	    emit_insn (gen_rtx_SET (maskh, shift));
+
+	    /* Insert any bytes required by hand, by doing a byte-wise
+	       shift on SRC right by the same number and then zap the
+	       bytes outside the byte mask.  */
+	    if (src != CONST0_RTX (GET_MODE (src)))
+	      {
+		rtx byte_loc = gen_reg_rtx (DImode);
+		emit_insn (gen_rtx_SET (byte_loc,
+					gen_rtx_ASHIFT (DImode,
+							shamt, GEN_INT (3))));
+		rtx bytes = gen_reg_rtx (DImode);
+		emit_insn (gen_rtx_SET (bytes,
+					gen_rtx_LSHIFTRT (DImode,
+							  gen_lowpart (DImode,
+								       src),
+							  byte_loc)));
+
+		rtx zapmask = gen_rtx_NOT (QImode,
+					   gen_rtx_SUBREG (QImode, maskh, 0));
+		rtx zap = gen_rtx_UNSPEC (DImode, gen_rtvec (1, zapmask),
+					  UNSPEC_ZAP);
+		emit_insn (gen_rtx_SET (insh,
+					gen_rtx_AND (DImode, zap, bytes)));
+	      }
+	  }
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+
+      rtx labelh = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
+      emit_label (XEXP (labelh, 0));
+
+      rtx dsth = gen_reg_rtx (DImode);
+      emit_insn (gen_load_locked (DImode, dsth, memh));
+
+      switch (size)
+	{
+	case 1:
+	case 2:
+	case 4:
+	case 8:
+	  emit_insn (gen_mskxh (dsth, dsth, GEN_INT (size * 8), addr));
+	  break;
+	case 3:
+	case 5:
+	case 6:
+	case 7:
+	  {
+	    rtx zapmask = gen_rtx_SUBREG (QImode, maskh, 0);
+	    rtx zap = gen_rtx_UNSPEC (DImode, gen_rtvec (1, zapmask),
+				      UNSPEC_ZAP);
+	    emit_insn (gen_rtx_SET (dsth, gen_rtx_AND (DImode, zap, dsth)));
+	  }
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+
+      if (src != CONST0_RTX (GET_MODE (src)))
+	dsth = expand_simple_binop (DImode, IOR, insh, dsth, dsth, 0,
+				    OPTAB_WIDEN);
+
+      emit_insn (gen_store_conditional (DImode, dsth, memh, dsth));
+
+      alpha_emit_unlikely_jump (gen_rtx_EQ (DImode, dsth, const0_rtx), labelh);
+    }
+
+  /* Now handle low.  */
+  rtx addrl = gen_reg_rtx (DImode);
+  rtx aligned_addrl = gen_reg_rtx (DImode);
+  emit_insn (gen_rtx_SET (addrl, plus_constant (DImode, dsta, ofs)));
+  emit_insn (gen_rtx_SET (aligned_addrl,
+			  gen_rtx_AND (DImode, addrl, GEN_INT (-8))));
+
+  /* AND addresses cannot be in any alias set, since they may implicitly
+     alias surrounding code.  Ideally we'd have some alias set that
+     covered all types except those with alignment 8 or higher.  */
+  rtx meml = change_address (dst, DImode, aligned_addrl);
+  set_mem_alias_set (meml, 0);
+
+  rtx insl = gen_reg_rtx (DImode);
+  rtx maskl;
+  switch (size)
+    {
+    case 1:
+      if (src != CONST0_RTX (GET_MODE (src)))
+	emit_insn (gen_insbl (insl, gen_lowpart (QImode, src), addr));
+      break;
+    case 2:
+      if (src != CONST0_RTX (GET_MODE (src)))
+	emit_insn (gen_inswl (insl, gen_lowpart (HImode, src), addr));
+      break;
+    case 4:
+      if (src != CONST0_RTX (GET_MODE (src)))
+	emit_insn (gen_insll (insl, gen_lowpart (SImode, src), addr));
+      break;
+    case 8:
+      if (src != CONST0_RTX (GET_MODE (src)))
+	emit_insn (gen_insql (insl, gen_lowpart (DImode, src), addr));
+      break;
+    case 3:
+    case 5:
+    case 6:
+    case 7:
+      /* For the low part we shift the byte mask left by the byte index,
+	 which is already in ADDR.  */
+      maskl = gen_reg_rtx (DImode);
+      emit_insn (gen_rtx_SET (maskl,
+			      gen_rtx_ASHIFT (DImode, byte_mask, addr)));
+
+      /* Insert any bytes required by hand, by doing a byte-wise shift
+	 on SRC left by the same number and then zap the bytes outside
+	 the byte mask.  */
+      if (src != CONST0_RTX (GET_MODE (src)))
+	{
+	  rtx byte_loc = gen_reg_rtx (DImode);
+	  emit_insn (gen_rtx_SET (byte_loc,
+				  gen_rtx_ASHIFT (DImode,
+						  force_reg (DImode, addr),
+						  GEN_INT (3))));
+	  rtx bytes = gen_reg_rtx (DImode);
+	  emit_insn (gen_rtx_SET (bytes,
+				  gen_rtx_ASHIFT (DImode,
+						  gen_lowpart (DImode, src),
+						  byte_loc)));
+
+	  rtx zapmask = gen_rtx_NOT (QImode,
+				     gen_rtx_SUBREG (QImode, maskl, 0));
+	  rtx zap = gen_rtx_UNSPEC (DImode, gen_rtvec (1, zapmask),
+				    UNSPEC_ZAP);
+	  emit_insn (gen_rtx_SET (insl, gen_rtx_AND (DImode, zap, bytes)));
+	}
+      break;
+      default:
+	gcc_unreachable ();
+      }
+
+  rtx labell = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
+  emit_label (XEXP (labell, 0));
+
+  rtx dstl = gen_reg_rtx (DImode);
+  emit_insn (gen_load_locked (DImode, dstl, meml));
+
+  switch (size)
+    {
+    case 1:
+      emit_insn (gen_mskbl (dstl, dstl, addr));
+      break;
+    case 2:
+      emit_insn (gen_mskwl (dstl, dstl, addr));
+      break;
+    case 4:
+      emit_insn (gen_mskll (dstl, dstl, addr));
+      break;
+    case 8:
+      emit_insn (gen_mskql (dstl, dstl, addr));
+      break;
+    case 3:
+    case 5:
+    case 6:
+    case 7:
+      {
+	rtx zapmask = gen_rtx_SUBREG (QImode, maskl, 0);
+	rtx zap = gen_rtx_UNSPEC (DImode, gen_rtvec (1, zapmask), UNSPEC_ZAP);
+	emit_insn (gen_rtx_SET (dstl, gen_rtx_AND (DImode, zap, dstl)));
+      }
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  if (src != CONST0_RTX (GET_MODE (src)))
+    dstl = expand_simple_binop (DImode, IOR, insl, dstl, dstl, 0, OPTAB_WIDEN);
+
+  emit_insn (gen_store_conditional (DImode, dstl, meml, dstl));
+
+  alpha_emit_unlikely_jump (gen_rtx_EQ (DImode, dstl, const0_rtx), labell);
+}
+
 /* The block move code tries to maximize speed by separating loads and
    stores at the expense of register pressure: we load all of the data
    before we store it back out.  There are two secondary effects worth
@@ -3625,10 +3997,6 @@ alpha_expand_unaligned_load_words (rtx *out_regs, rtx smem,
   rtx sreg, areg, tmp, smema;
   HOST_WIDE_INT i;
 
-  smema = XEXP (smem, 0);
-  if (GET_CODE (smema) == LO_SUM)
-    smema = force_reg (Pmode, smema);
-
   /* Generate all the tmp registers we need.  */
   for (i = 0; i < words; ++i)
     {
@@ -3640,6 +4008,10 @@ alpha_expand_unaligned_load_words (rtx *out_regs, rtx smem,
   if (ofs != 0)
     smem = adjust_address (smem, GET_MODE (smem), ofs);
 
+  smema = XEXP (smem, 0);
+  if (GET_CODE (smema) == LO_SUM)
+    smema = force_reg (Pmode, smema);
+
   /* Load up all of the source data.  */
   for (i = 0; i < words; ++i)
     {
@@ -3698,10 +4070,6 @@ alpha_expand_unaligned_store_words (rtx *data_regs, rtx dmem,
   rtx st_addr_1, st_addr_2, dmema;
   HOST_WIDE_INT i;
 
-  dmema = XEXP (dmem, 0);
-  if (GET_CODE (dmema) == LO_SUM)
-    dmema = force_reg (Pmode, dmema);
-
   /* Generate all the tmp registers we need.  */
   if (data_regs != NULL)
     for (i = 0; i < words; ++i)
@@ -3712,6 +4080,10 @@ alpha_expand_unaligned_store_words (rtx *data_regs, rtx dmem,
   if (ofs != 0)
     dmem = adjust_address (dmem, GET_MODE (dmem), ofs);
 
+  dmema = XEXP (dmem, 0);
+  if (GET_CODE (dmema) == LO_SUM)
+    dmema = force_reg (Pmode, dmema);
+
   st_addr_2 = change_address (dmem, DImode,
 			      gen_rtx_AND (DImode,
 					   plus_constant (DImode, dmema,
@@ -3771,6 +4143,186 @@ alpha_expand_unaligned_store_words (rtx *data_regs, rtx dmem,
   emit_move_insn (st_addr_1, st_tmp_1);
 }
 
+/* Store an integral number of consecutive unaligned quadwords.  DATA_REGS
+   may be NULL to store zeros.  This is a multi-thread and async-signal
+   safe implementation.  */
+
+static void
+alpha_expand_unaligned_store_words_safe_partial (rtx *data_regs, rtx dmem,
+						HOST_WIDE_INT words,
+						HOST_WIDE_INT ofs,
+						HOST_WIDE_INT align)
+{
+  rtx const im8 = GEN_INT (-8);
+  rtx ins_tmps[MAX_MOVE_WORDS];
+  HOST_WIDE_INT i;
+
+  /* Generate all the tmp registers we need.  */
+  for (i = 0; i < words; i++)
+    ins_tmps[i] = data_regs != NULL ? gen_reg_rtx (DImode) : const0_rtx;
+
+  if (ofs != 0)
+    dmem = adjust_address (dmem, GET_MODE (dmem), ofs);
+
+  /* For BWX store the ends before we start fiddling with data registers
+     to fill the middle.  Also if we have no more than two quadwords,
+     then obviously we're done.  */
+  if (TARGET_BWX)
+    {
+      rtx datan = data_regs ? data_regs[words - 1] : const0_rtx;
+      rtx data0 = data_regs ? data_regs[0] : const0_rtx;
+      HOST_WIDE_INT e = (words - 1) * 8;
+
+      alpha_expand_unaligned_store_safe_partial (dmem, data0, 8, 0, align);
+      alpha_expand_unaligned_store_safe_partial (dmem, datan, 8, e, align);
+      if (words <= 2)
+	return;
+    }
+
+  rtx dmema = XEXP (dmem, 0);
+  if (GET_CODE (dmema) == LO_SUM)
+    dmema = force_reg (Pmode, dmema);
+
+  /* Shift the input data into place.  */
+  rtx dreg = copy_addr_to_reg (dmema);
+  if (data_regs != NULL)
+    {
+      for (i = words - 1; i >= 0; i--)
+	{
+	  emit_insn (gen_insqh (ins_tmps[i], data_regs[i], dreg));
+	  emit_insn (gen_insql (data_regs[i], data_regs[i], dreg));
+	}
+      for (i = words - 1; i > 0; i--)
+	ins_tmps[i - 1] = expand_simple_binop (DImode, IOR, data_regs[i],
+					       ins_tmps[i - 1],
+					       ins_tmps[i - 1],
+					       1, OPTAB_DIRECT);
+    }
+
+  if (!TARGET_BWX)
+    {
+      rtx temp = gen_reg_rtx (DImode);
+      rtx mem = gen_rtx_MEM (DImode,
+			     expand_simple_binop (Pmode, AND, dreg, im8,
+						  NULL_RTX, 1, OPTAB_DIRECT));
+
+      rtx label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
+      emit_label (XEXP (label, 0));
+
+      emit_insn (gen_load_locked (DImode, temp, mem));
+      emit_insn (gen_mskql (temp, temp, dreg));
+      if (data_regs != NULL)
+	temp = expand_simple_binop (DImode, IOR, temp, data_regs[0],
+				    temp, 1, OPTAB_DIRECT);
+      emit_insn (gen_store_conditional (DImode, temp, mem, temp));
+
+      alpha_emit_unlikely_jump (gen_rtx_EQ (DImode, temp, const0_rtx), label);
+    }
+
+  for (i = words - 1; i > 0; --i)
+    {
+      rtx temp = change_address (dmem, Pmode,
+				 gen_rtx_AND (Pmode,
+					      plus_constant (Pmode,
+							     dmema, i * 8),
+					      im8));
+      set_mem_alias_set (temp, 0);
+      emit_move_insn (temp, ins_tmps[i - 1]);
+    }
+
+  if (!TARGET_BWX)
+    {
+      rtx temp = gen_reg_rtx (DImode);
+      rtx addr = expand_simple_binop (Pmode, PLUS, dreg,
+				      GEN_INT (words * 8 - 1),
+				      NULL_RTX, 1, OPTAB_DIRECT);
+      rtx mem = gen_rtx_MEM (DImode,
+			     expand_simple_binop (Pmode, AND, addr, im8,
+						  NULL_RTX, 1, OPTAB_DIRECT));
+
+      rtx label = gen_rtx_LABEL_REF (VOIDmode, gen_label_rtx ());
+      emit_label (XEXP (label, 0));
+
+      emit_insn (gen_load_locked (DImode, temp, mem));
+      emit_insn (gen_mskqh (temp, temp, dreg));
+      if (data_regs != NULL)
+	temp = expand_simple_binop (DImode, IOR, temp, ins_tmps[words - 1],
+				    temp, 1, OPTAB_DIRECT);
+      emit_insn (gen_store_conditional (DImode, temp, mem, temp));
+
+      alpha_emit_unlikely_jump (gen_rtx_EQ (DImode, temp, const0_rtx), label);
+    }
+}
+
+/* Get the base alignment and offset of EXPR in A and O respectively.
+   Check for any pseudo register pointer alignment and for any tree
+   node information and return the largest alignment determined and
+   its associated offset.  */
+
+static void
+alpha_get_mem_rtx_alignment_and_offset (rtx expr, int &a, HOST_WIDE_INT &o)
+{
+  HOST_WIDE_INT tree_offset = 0, reg_offset = 0, mem_offset = 0;
+  int tree_align = 0, reg_align = 0, mem_align = MEM_ALIGN (expr);
+
+  gcc_assert (MEM_P (expr));
+
+  rtx addr = XEXP (expr, 0);
+  switch (GET_CODE (addr))
+    {
+    case REG:
+      reg_align = REGNO_POINTER_ALIGN (REGNO (addr));
+      break;
+
+    case PLUS:
+      if (REG_P (XEXP (addr, 0)) && CONST_INT_P (XEXP (addr, 1)))
+	{
+	  reg_offset = INTVAL (XEXP (addr, 1));
+	  reg_align = REGNO_POINTER_ALIGN (REGNO (XEXP (addr, 0)));
+	}
+      break;
+
+    default:
+      break;
+    }
+
+  tree mem = MEM_EXPR (expr);
+  if (mem != NULL_TREE)
+    {
+      HOST_WIDE_INT comp_offset = 0;
+
+      for (; TREE_CODE (mem) == COMPONENT_REF; mem = TREE_OPERAND (mem, 0))
+	{
+	  tree byte_offset = component_ref_field_offset (mem);
+	  tree bit_offset = DECL_FIELD_BIT_OFFSET (TREE_OPERAND (mem, 1));
+	  poly_int64 offset;
+	  if (!byte_offset
+	      || !poly_int_tree_p (byte_offset, &offset)
+	      || !tree_fits_shwi_p (bit_offset))
+	    break;
+	  comp_offset += offset + tree_to_shwi (bit_offset) / BITS_PER_UNIT;
+	}
+
+      if (TREE_CODE (mem) == MEM_REF)
+	{
+	  tree_offset = comp_offset + mem_ref_offset (mem).force_shwi ();
+	  tree_align = get_object_alignment (get_base_address (mem));
+	}
+    }
+
+  if (reg_align > mem_align)
+    {
+      mem_offset = reg_offset;
+      mem_align = reg_align;
+    }
+  if (tree_align > mem_align)
+    {
+      mem_offset = tree_offset;
+      mem_align = tree_align;
+    }
+  o = mem_offset;
+  a = mem_align;
+}
 
 /* Expand string/block move operations.
 
@@ -3799,27 +4351,19 @@ alpha_expand_block_move (rtx operands[])
   else if (orig_bytes > MAX_MOVE_WORDS * UNITS_PER_WORD)
     return 0;
 
-  /* Look for additional alignment information from recorded register info.  */
+  /* Look for stricter alignment.  */
+  HOST_WIDE_INT c;
+  int a;
 
-  tmp = XEXP (orig_src, 0);
-  if (REG_P (tmp))
-    src_align = MAX (src_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
-  else if (GET_CODE (tmp) == PLUS
-	   && REG_P (XEXP (tmp, 0))
-	   && CONST_INT_P (XEXP (tmp, 1)))
+  alpha_get_mem_rtx_alignment_and_offset (orig_src, a, c);
+  if (a > src_align)
     {
-      unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
-      unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
-
-      if (a > src_align)
-	{
-          if (a >= 64 && c % 8 == 0)
-	    src_align = 64;
-          else if (a >= 32 && c % 4 == 0)
-	    src_align = 32;
-          else if (a >= 16 && c % 2 == 0)
-	    src_align = 16;
-	}
+      if (a >= 64 && c % 8 == 0)
+	src_align = 64;
+      else if (a >= 32 && c % 4 == 0)
+	src_align = 32;
+      else if (a >= 16 && c % 2 == 0)
+	src_align = 16;
 
       if (MEM_P (orig_src) && MEM_ALIGN (orig_src) < src_align)
 	{
@@ -3828,25 +4372,15 @@ alpha_expand_block_move (rtx operands[])
 	}
     }
 
-  tmp = XEXP (orig_dst, 0);
-  if (REG_P (tmp))
-    dst_align = MAX (dst_align, REGNO_POINTER_ALIGN (REGNO (tmp)));
-  else if (GET_CODE (tmp) == PLUS
-	   && REG_P (XEXP (tmp, 0))
-	   && CONST_INT_P (XEXP (tmp, 1)))
+  alpha_get_mem_rtx_alignment_and_offset (orig_dst, a, c);
+  if (a > dst_align)
     {
-      unsigned HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
-      unsigned int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
-
-      if (a > dst_align)
-	{
-          if (a >= 64 && c % 8 == 0)
-	    dst_align = 64;
-          else if (a >= 32 && c % 4 == 0)
-	    dst_align = 32;
-          else if (a >= 16 && c % 2 == 0)
-	    dst_align = 16;
-	}
+      if (a >= 64 && c % 8 == 0)
+	dst_align = 64;
+      else if (a >= 32 && c % 4 == 0)
+	dst_align = 32;
+      else if (a >= 16 && c % 2 == 0)
+	dst_align = 16;
 
       if (MEM_P (orig_dst) && MEM_ALIGN (orig_dst) < dst_align)
 	{
@@ -3876,14 +4410,44 @@ alpha_expand_block_move (rtx operands[])
     {
       words = bytes / 4;
 
-      for (i = 0; i < words; ++i)
-	data_regs[nregs + i] = gen_reg_rtx (SImode);
+      /* Load an even quantity of SImode data pieces only.  */
+      unsigned int hwords = words / 2;
+      for (i = 0; i / 2 < hwords; ++i)
+	{
+	  data_regs[nregs + i] = gen_reg_rtx (SImode);
+	  emit_move_insn (data_regs[nregs + i],
+			  adjust_address (orig_src, SImode, ofs + i * 4));
+	}
 
-      for (i = 0; i < words; ++i)
-	emit_move_insn (data_regs[nregs + i],
-			adjust_address (orig_src, SImode, ofs + i * 4));
+      /* If we'll be using unaligned stores, merge data from pairs
+	 of SImode registers into DImode registers so that we can
+	 store it more efficiently via quadword unaligned stores.  */
+      unsigned int j;
+      if (dst_align < 32)
+	for (i = 0, j = 0; i < words / 2; ++i, j = i * 2)
+	  {
+	    rtx hi = expand_simple_binop (DImode, ASHIFT,
+					  data_regs[nregs + j + 1],
+					  GEN_INT (32), NULL_RTX,
+					  1, OPTAB_WIDEN);
+	    data_regs[nregs + i] = expand_simple_binop (DImode, IOR, hi,
+							data_regs[nregs + j],
+							NULL_RTX,
+							1, OPTAB_WIDEN);
+	  }
+      else
+	j = i;
 
-      nregs += words;
+      /* Take care of any remaining odd trailing SImode data piece.  */
+      if (j < words)
+	{
+	  data_regs[nregs + i] = gen_reg_rtx (SImode);
+	  emit_move_insn (data_regs[nregs + i],
+			  adjust_address (orig_src, SImode, ofs + j * 4));
+	  ++i;
+	}
+
+      nregs += i;
       bytes -= words * 4;
       ofs += words * 4;
     }
@@ -3914,14 +4478,19 @@ alpha_expand_block_move (rtx operands[])
   if (bytes >= 2)
     {
       if (src_align >= 16)
-	{
-	  do {
-	    data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
-	    emit_move_insn (tmp, adjust_address (orig_src, HImode, ofs));
+	do
+	  {
+	    tmp = gen_reg_rtx (DImode);
+	    emit_move_insn (tmp,
+			    expand_simple_unop (DImode, SET,
+						adjust_address (orig_src,
+								HImode, ofs),
+						NULL_RTX, 1));
+	    data_regs[nregs++] = gen_rtx_SUBREG (HImode, tmp, 0);
 	    bytes -= 2;
 	    ofs += 2;
-	  } while (bytes >= 2);
-	}
+	  }
+	while (bytes >= 2);
       else if (! TARGET_BWX)
 	{
 	  data_regs[nregs++] = tmp = gen_reg_rtx (HImode);
@@ -3991,27 +4560,74 @@ alpha_expand_block_move (rtx operands[])
 	if (GET_MODE (data_regs[i + words]) != DImode)
 	  break;
 
-      if (words == 1)
-	alpha_expand_unaligned_store (orig_dst, data_regs[i], 8, ofs);
+      if (TARGET_SAFE_PARTIAL)
+	{
+	  if (words == 1)
+	    alpha_expand_unaligned_store_safe_partial (orig_dst, data_regs[i],
+						       8, ofs, dst_align);
+	  else
+	    alpha_expand_unaligned_store_words_safe_partial (data_regs + i,
+							     orig_dst, words,
+							     ofs, dst_align);
+	}
       else
-        alpha_expand_unaligned_store_words (data_regs + i, orig_dst,
-					    words, ofs);
-
+	{
+	  if (words == 1)
+	    alpha_expand_unaligned_store (orig_dst, data_regs[i], 8, ofs);
+	  else
+	    alpha_expand_unaligned_store_words (data_regs + i, orig_dst,
+						words, ofs);
+	}
       i += words;
       ofs += words * 8;
     }
 
-  /* Due to the above, this won't be aligned.  */
-  /* ??? If we have more than one of these, consider constructing full
-     words in registers and using alpha_expand_unaligned_store_words.  */
+  /* If we are in the partial memory access safety mode with a non-BWX
+     target, then coalesce data loaded of different widths so as to
+     minimize the number of safe partial stores as they are expensive.  */
+  if (!TARGET_BWX && TARGET_SAFE_PARTIAL)
+    {
+      HOST_WIDE_INT size = 0;
+      unsigned int n;
+
+      for (n = i; i < nregs; i++)
+	{
+	  if (i != n)
+	    {
+	      /* Don't widen SImode data where obtained by extraction.  */
+	      rtx data = data_regs[n];
+	      if (GET_MODE (data) == SImode && src_align < 32)
+		data = gen_rtx_SUBREG (DImode, data, 0);
+	      rtx field = expand_simple_binop (DImode, ASHIFT, data_regs[i],
+					       GEN_INT (size * BITS_PER_UNIT),
+					       NULL_RTX, 1, OPTAB_DIRECT);
+	      data_regs[n] = expand_simple_binop (DImode, IOR, data, field,
+						  data, 1, OPTAB_WIDEN);
+	    }
+	  size += GET_MODE_SIZE (GET_MODE (data_regs[i]));
+	  gcc_assert (size < 8);
+	}
+      if (size > 0)
+	alpha_expand_unaligned_store_safe_partial (orig_dst, data_regs[n],
+						   size, ofs, dst_align);
+      ofs += size;
+    }
+
+  /* We've done aligned stores above, this won't be aligned.  */
   while (i < nregs && GET_MODE (data_regs[i]) == SImode)
     {
-      alpha_expand_unaligned_store (orig_dst, data_regs[i], 4, ofs);
+      gcc_assert (TARGET_BWX || !TARGET_SAFE_PARTIAL);
+      if (TARGET_SAFE_PARTIAL)
+	alpha_expand_unaligned_store_safe_partial (orig_dst, data_regs[i],
+						   4, ofs, dst_align);
+      else
+	alpha_expand_unaligned_store (orig_dst, data_regs[i], 4, ofs);
       ofs += 4;
       i++;
+      gcc_assert (i == nregs || GET_MODE (data_regs[i]) != SImode);
     }
 
-  if (dst_align >= 16)
+  if (TARGET_BWX && dst_align >= 16)
     while (i < nregs && GET_MODE (data_regs[i]) == HImode)
       {
 	emit_move_insn (adjust_address (orig_dst, HImode, ofs), data_regs[i]);
@@ -4021,7 +4637,12 @@ alpha_expand_block_move (rtx operands[])
   else
     while (i < nregs && GET_MODE (data_regs[i]) == HImode)
       {
-	alpha_expand_unaligned_store (orig_dst, data_regs[i], 2, ofs);
+	gcc_assert (TARGET_BWX || !TARGET_SAFE_PARTIAL);
+	if (TARGET_SAFE_PARTIAL)
+	  alpha_expand_unaligned_store_safe_partial (orig_dst, data_regs[i],
+						     2, ofs, dst_align);
+	else
+	  alpha_expand_unaligned_store (orig_dst, data_regs[i], 2, ofs);
 	i++;
 	ofs += 2;
       }
@@ -4030,6 +4651,7 @@ alpha_expand_block_move (rtx operands[])
   while (i < nregs)
     {
       gcc_assert (GET_MODE (data_regs[i]) == QImode);
+      gcc_assert (TARGET_BWX || !TARGET_SAFE_PARTIAL);
       emit_move_insn (adjust_address (orig_dst, QImode, ofs), data_regs[i]);
       i++;
       ofs += 1;
@@ -4038,6 +4660,27 @@ alpha_expand_block_move (rtx operands[])
   return 1;
 }
 
+/* Expand a multi-thread and async-signal safe partial clear of a longword
+   or a quadword quantity indicated by MODE at aligned memory location MEM
+   according to MASK.  */
+
+static void
+alpha_expand_clear_safe_partial_nobwx (rtx mem, machine_mode mode,
+				       HOST_WIDE_INT mask)
+{
+  rtx label = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
+  emit_label (XEXP (label, 0));
+
+  rtx temp = gen_reg_rtx (mode);
+  rtx status = mode == DImode ? temp : gen_rtx_SUBREG (DImode, temp, 0);
+
+  emit_insn (gen_load_locked (mode, temp, mem));
+  emit_insn (gen_rtx_SET (temp, gen_rtx_AND (mode, temp, GEN_INT (mask))));
+  emit_insn (gen_store_conditional (mode, status, mem, temp));
+
+  alpha_emit_unlikely_jump (gen_rtx_EQ (DImode, status, const0_rtx), label);
+}
+
 int
 alpha_expand_block_clear (rtx operands[])
 {
@@ -4048,7 +4691,6 @@ alpha_expand_block_clear (rtx operands[])
   HOST_WIDE_INT align = INTVAL (align_rtx) * BITS_PER_UNIT;
   HOST_WIDE_INT alignofs = 0;
   rtx orig_dst = operands[0];
-  rtx tmp;
   int i, words, ofs = 0;
 
   if (orig_bytes <= 0)
@@ -4057,24 +4699,23 @@ alpha_expand_block_clear (rtx operands[])
     return 0;
 
   /* Look for stricter alignment.  */
-  tmp = XEXP (orig_dst, 0);
-  if (REG_P (tmp))
-    align = MAX (align, REGNO_POINTER_ALIGN (REGNO (tmp)));
-  else if (GET_CODE (tmp) == PLUS
-	   && REG_P (XEXP (tmp, 0))
-	   && CONST_INT_P (XEXP (tmp, 1)))
+  HOST_WIDE_INT c;
+  int a;
+
+  alpha_get_mem_rtx_alignment_and_offset (orig_dst, a, c);
+  if (a > align)
     {
-      HOST_WIDE_INT c = INTVAL (XEXP (tmp, 1));
-      int a = REGNO_POINTER_ALIGN (REGNO (XEXP (tmp, 0)));
+      if (a >= 64)
+	align = a, alignofs = -c & 7;
+      else if (a >= 32)
+	align = a, alignofs = -c & 3;
+      else if (a >= 16)
+	align = a, alignofs = -c & 1;
 
-      if (a > align)
+      if (MEM_P (orig_dst) && MEM_ALIGN (orig_dst) < align)
 	{
-          if (a >= 64)
-	    align = a, alignofs = 8 - c % 8;
-          else if (a >= 32)
-	    align = a, alignofs = 4 - c % 4;
-          else if (a >= 16)
-	    align = a, alignofs = 2 - c % 2;
+	  orig_dst = shallow_copy_rtx (orig_dst);
+	  set_mem_align (orig_dst, align);
 	}
     }
 
@@ -4084,8 +4725,9 @@ alpha_expand_block_clear (rtx operands[])
     {
       /* Given that alignofs is bounded by align, the only time BWX could
 	 generate three stores is for a 7 byte fill.  Prefer two individual
-	 stores over a load/mask/store sequence.  */
-      if ((!TARGET_BWX || alignofs == 7)
+	 stores over a load/mask/store sequence.  In the partial safety
+	 mode always do individual stores regardless of their count.  */
+      if ((!TARGET_BWX || (!TARGET_SAFE_PARTIAL && alignofs == 7))
 	       && align >= 32
 	       && !(alignofs == 4 && bytes >= 4))
 	{
@@ -4111,10 +4753,15 @@ alpha_expand_block_clear (rtx operands[])
 	    }
 	  alignofs = 0;
 
-	  tmp = expand_binop (mode, and_optab, mem, GEN_INT (mask),
-			      NULL_RTX, 1, OPTAB_WIDEN);
+	  if (TARGET_SAFE_PARTIAL)
+	    alpha_expand_clear_safe_partial_nobwx (mem, mode, mask);
+	  else
+	    {
+	      tmp = expand_binop (mode, and_optab, mem, GEN_INT (mask),
+				  NULL_RTX, 1, OPTAB_WIDEN);
 
-	  emit_move_insn (mem, tmp);
+	      emit_move_insn (mem, tmp);
+	    }
 	}
 
       if (TARGET_BWX && (alignofs & 1) && bytes >= 1)
@@ -4219,7 +4866,11 @@ alpha_expand_block_clear (rtx operands[])
     {
       words = bytes / 8;
 
-      alpha_expand_unaligned_store_words (NULL, orig_dst, words, ofs);
+      if (TARGET_SAFE_PARTIAL)
+	alpha_expand_unaligned_store_words_safe_partial (NULL, orig_dst,
+							 words, ofs, align);
+      else
+	alpha_expand_unaligned_store_words (NULL, orig_dst, words, ofs);
 
       bytes -= words * 8;
       ofs += words * 8;
@@ -4236,47 +4887,58 @@ alpha_expand_block_clear (rtx operands[])
 
   /* If we have appropriate alignment (and it wouldn't take too many
      instructions otherwise), mask out the bytes we need.  */
-  if (TARGET_BWX ? words > 2 : bytes > 0)
+  if ((TARGET_BWX ? !TARGET_SAFE_PARTIAL && words > 2 : bytes > 0)
+      && (align >= 64 || (align >= 32 && bytes < 4)))
     {
-      if (align >= 64)
-	{
-	  rtx mem, tmp;
-	  HOST_WIDE_INT mask;
+      machine_mode mode = (align >= 64 ? DImode : SImode);
+      rtx mem, tmp;
+      HOST_WIDE_INT mask;
 
-	  mem = adjust_address (orig_dst, DImode, ofs);
-	  set_mem_alias_set (mem, 0);
+      mem = adjust_address (orig_dst, mode, ofs);
+      set_mem_alias_set (mem, 0);
 
-	  mask = HOST_WIDE_INT_M1U << (bytes * 8);
+      mask = HOST_WIDE_INT_M1U << (bytes * 8);
 
-	  tmp = expand_binop (DImode, and_optab, mem, GEN_INT (mask),
-			      NULL_RTX, 1, OPTAB_WIDEN);
-
-	  emit_move_insn (mem, tmp);
-	  return 1;
-	}
-      else if (align >= 32 && bytes < 4)
+      if (TARGET_SAFE_PARTIAL)
+	alpha_expand_clear_safe_partial_nobwx (mem, mode, mask);
+      else
 	{
-	  rtx mem, tmp;
-	  HOST_WIDE_INT mask;
-
-	  mem = adjust_address (orig_dst, SImode, ofs);
-	  set_mem_alias_set (mem, 0);
-
-	  mask = HOST_WIDE_INT_M1U << (bytes * 8);
-
-	  tmp = expand_binop (SImode, and_optab, mem, GEN_INT (mask),
+	  tmp = expand_binop (mode, and_optab, mem, GEN_INT (mask),
 			      NULL_RTX, 1, OPTAB_WIDEN);
 
 	  emit_move_insn (mem, tmp);
-	  return 1;
 	}
+      return 1;
     }
 
-  if (!TARGET_BWX && bytes >= 4)
+  if (bytes >= 4)
     {
-      alpha_expand_unaligned_store (orig_dst, const0_rtx, 4, ofs);
-      bytes -= 4;
-      ofs += 4;
+      if (align >= 32)
+	do
+	  {
+	    emit_move_insn (adjust_address (orig_dst, SImode, ofs),
+			    const0_rtx);
+	    bytes -= 4;
+	    ofs += 4;
+	  }
+	while (bytes >= 4);
+      else if (!TARGET_BWX)
+	{
+	  gcc_assert (bytes < 8);
+	  if (TARGET_SAFE_PARTIAL)
+	    {
+	      alpha_expand_unaligned_store_safe_partial (orig_dst, const0_rtx,
+							 bytes, ofs, align);
+	      ofs += bytes;
+	      bytes = 0;
+	    }
+	  else
+	    {
+	      alpha_expand_unaligned_store (orig_dst, const0_rtx, 4, ofs);
+	      bytes -= 4;
+	      ofs += 4;
+	    }
+	}
     }
 
   if (bytes >= 2)
@@ -4292,18 +4954,38 @@ alpha_expand_block_clear (rtx operands[])
 	}
       else if (! TARGET_BWX)
 	{
-	  alpha_expand_unaligned_store (orig_dst, const0_rtx, 2, ofs);
-	  bytes -= 2;
-	  ofs += 2;
+	  gcc_assert (bytes < 4);
+	  if (TARGET_SAFE_PARTIAL)
+	    {
+	      alpha_expand_unaligned_store_safe_partial (orig_dst, const0_rtx,
+							 bytes, ofs, align);
+	      ofs += bytes;
+	      bytes = 0;
+	    }
+	  else
+	    {
+	      alpha_expand_unaligned_store (orig_dst, const0_rtx, 2, ofs);
+	      bytes -= 2;
+	      ofs += 2;
+	    }
 	}
     }
 
   while (bytes > 0)
-    {
-      emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx);
-      bytes -= 1;
-      ofs += 1;
-    }
+    if (TARGET_BWX || !TARGET_SAFE_PARTIAL)
+      {
+	emit_move_insn (adjust_address (orig_dst, QImode, ofs), const0_rtx);
+	bytes -= 1;
+	ofs += 1;
+      }
+    else
+      {
+	gcc_assert (bytes < 2);
+	alpha_expand_unaligned_store_safe_partial (orig_dst, const0_rtx,
+						   bytes, ofs, align);
+	ofs += bytes;
+	bytes = 0;
+      }
 
   return 1;
 }
@@ -4351,12 +5033,13 @@ alpha_expand_builtin_vector_binop (rtx (*gen) (rtx, rtx, rtx),
 /* A subroutine of the atomic operation splitters.  Jump to LABEL if
    COND is true.  Mark the jump as unlikely to be taken.  */
 
-static void
-emit_unlikely_jump (rtx cond, rtx label)
+rtx
+alpha_emit_unlikely_jump (rtx cond, rtx label)
 {
   rtx x = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, label, pc_rtx);
   rtx_insn *insn = emit_jump_insn (gen_rtx_SET (pc_rtx, x));
   add_reg_br_prob_note (insn, profile_probability::very_unlikely ());
+  return insn;
 }
 
 /* Subroutines of the atomic operation splitters.  Emit barriers
@@ -4410,7 +5093,7 @@ emit_insxl (machine_mode mode, rtx op1, rtx op2)
 }
 
 /* Expand an atomic fetch-and-operate pattern.  CODE is the binary operation
-   to perform.  MEM is the memory on which to operate.  VAL is the second 
+   to perform.  MEM is the memory on which to operate.  VAL is the second
    operand of the binary operator.  BEFORE and AFTER are optional locations to
    return the value of MEM either before of after the operation.  SCRATCH is
    a scratch register.  */
@@ -4448,7 +5131,7 @@ alpha_split_atomic_op (enum rtx_code code, rtx mem, rtx val, rtx before,
   emit_insn (gen_store_conditional (mode, cond, mem, scratch));
 
   x = gen_rtx_EQ (DImode, cond, const0_rtx);
-  emit_unlikely_jump (x, label);
+  alpha_emit_unlikely_jump (x, label);
 
   alpha_post_atomic_barrier (model);
 }
@@ -4498,7 +5181,7 @@ alpha_split_compare_and_swap (rtx operands[])
       emit_insn (gen_rtx_SET (cond, x));
       x = gen_rtx_EQ (DImode, cond, const0_rtx);
     }
-  emit_unlikely_jump (x, label2);
+  alpha_emit_unlikely_jump (x, label2);
 
   emit_move_insn (cond, newval);
   emit_insn (gen_store_conditional
@@ -4507,7 +5190,7 @@ alpha_split_compare_and_swap (rtx operands[])
   if (!is_weak)
     {
       x = gen_rtx_EQ (DImode, cond, const0_rtx);
-      emit_unlikely_jump (x, label1);
+      alpha_emit_unlikely_jump (x, label1);
     }
 
   if (!is_mm_relaxed (mod_f))
@@ -4594,7 +5277,7 @@ alpha_split_compare_and_swap_12 (rtx operands[])
   label2 = gen_rtx_LABEL_REF (DImode, gen_label_rtx ());
 
   emit_insn (gen_load_locked (DImode, scratch, mem));
-  
+
   width = GEN_INT (GET_MODE_BITSIZE (mode));
   mask = GEN_INT (mode == QImode ? 0xff : 0xffff);
   emit_insn (gen_extxl (dest, scratch, width, addr));
@@ -4610,7 +5293,7 @@ alpha_split_compare_and_swap_12 (rtx operands[])
       emit_insn (gen_rtx_SET (cond, x));
       x = gen_rtx_EQ (DImode, cond, const0_rtx);
     }
-  emit_unlikely_jump (x, label2);
+  alpha_emit_unlikely_jump (x, label2);
 
   emit_insn (gen_mskxl (cond, scratch, mask, addr));
 
@@ -4622,7 +5305,7 @@ alpha_split_compare_and_swap_12 (rtx operands[])
   if (!is_weak)
     {
       x = gen_rtx_EQ (DImode, cond, const0_rtx);
-      emit_unlikely_jump (x, label1);
+      alpha_emit_unlikely_jump (x, label1);
     }
 
   if (!is_mm_relaxed (mod_f))
@@ -4662,7 +5345,7 @@ alpha_split_atomic_exchange (rtx operands[])
   emit_insn (gen_store_conditional (mode, cond, mem, scratch));
 
   x = gen_rtx_EQ (DImode, cond, const0_rtx);
-  emit_unlikely_jump (x, label);
+  alpha_emit_unlikely_jump (x, label);
 
   alpha_post_atomic_barrier (model);
 }
@@ -4725,7 +5408,7 @@ alpha_split_atomic_exchange_12 (rtx operands[])
   emit_label (XEXP (label, 0));
 
   emit_insn (gen_load_locked (DImode, scratch, mem));
-  
+
   width = GEN_INT (GET_MODE_BITSIZE (mode));
   mask = GEN_INT (mode == QImode ? 0xff : 0xffff);
   emit_insn (gen_extxl (dest, scratch, width, addr));
@@ -4736,7 +5419,7 @@ alpha_split_atomic_exchange_12 (rtx operands[])
   emit_insn (gen_store_conditional (DImode, scratch, mem, scratch));
 
   x = gen_rtx_EQ (DImode, scratch, const0_rtx);
-  emit_unlikely_jump (x, label);
+  alpha_emit_unlikely_jump (x, label);
 
   alpha_post_atomic_barrier (model);
 }
@@ -4910,8 +5593,7 @@ alpha_gp_save_rtx (void)
       m = validize_mem (m);
       emit_move_insn (m, pic_offset_table_rtx);
 
-      seq = get_insns ();
-      end_sequence ();
+      seq = end_sequence ();
 
       /* We used to simply emit the sequence after entry_of_function.
 	 However this breaks the CFG if the first instruction in the
@@ -5019,7 +5701,7 @@ get_trap_mode_suffix (void)
 	  gcc_unreachable ();
 	}
       break;
-      
+
     default:
       gcc_unreachable ();
     }
@@ -5056,7 +5738,7 @@ get_round_mode_suffix (void)
 
     case ROUND_SUFFIX_C:
       return "c";
-      
+
     default:
       gcc_unreachable ();
     }
@@ -6151,7 +6833,7 @@ alpha_setup_incoming_varargs (cumulative_args_t pcum,
       /* Detect whether integer registers or floating-point registers
 	 are needed by the detected va_arg statements.  See above for
 	 how these values are computed.  Note that the "escape" value
-	 is VA_LIST_MAX_FPR_SIZE, which is 255, which has both of 
+	 is VA_LIST_MAX_FPR_SIZE, which is 255, which has both of
 	 these bits set.  */
       gcc_assert ((VA_LIST_MAX_FPR_SIZE & 3) == 3);
 
@@ -6754,7 +7436,7 @@ alpha_fold_builtin_cmpbge (unsigned HOST_WIDE_INT opint[], long op_const)
   return NULL;
 }
 
-/* Fold the builtin for the ZAPNOT instruction.  This is essentially a 
+/* Fold the builtin for the ZAPNOT instruction.  This is essentially a
    specialized form of an AND operation.  Other byte manipulation instructions
    are defined in terms of this instruction, so this is also used as a
    subroutine for other builtins.
@@ -6821,7 +7503,7 @@ alpha_fold_builtin_extxx (tree op[], unsigned HOST_WIDE_INT opint[],
       else
 	zap_op = op;
     }
-  
+
   opint[1] = bytemask;
   return alpha_fold_builtin_zapnot (zap_op, opint, zap_const);
 }
@@ -7422,7 +8104,7 @@ alpha_vms_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
 
 HOST_WIDE_INT
 alpha_vms_initial_elimination_offset (unsigned int from, unsigned int to)
-{ 
+{
   /* The only possible attempts we ever expect are ARG or FRAME_PTR to
      HARD_FRAME or STACK_PTR.  We need the alpha_procedure_type to decide
      on the proper computations and will need the register save area size
@@ -7433,7 +8115,7 @@ alpha_vms_initial_elimination_offset (unsigned int from, unsigned int to)
   /* PT_NULL procedures have no frame of their own and we only allow
      elimination to the stack pointer. This is the argument pointer and we
      resolve the soft frame pointer to that as well.  */
-     
+
   if (alpha_procedure_type == PT_NULL)
     return 0;
 
@@ -7448,13 +8130,13 @@ alpha_vms_initial_elimination_offset (unsigned int from, unsigned int to)
                                    ^         ^              ^               ^
 			      ARG_PTR FRAME_PTR HARD_FRAME_PTR       STACK_PTR
 
-			      
+
      PT_REGISTER procedures are similar in that they may have a frame of their
      own. They have no regs-sa/pv/outgoing-args area.
 
      We first compute offset to HARD_FRAME_PTR, then add what we need to get
      to STACK_PTR if need be.  */
-  
+
   {
     HOST_WIDE_INT offset;
     HOST_WIDE_INT pv_save_size = alpha_procedure_type == PT_STACK ? 8 : 0;
@@ -7473,10 +8155,10 @@ alpha_vms_initial_elimination_offset (unsigned int from, unsigned int to)
       default:
 	gcc_unreachable ();
       }
-    
+
     if (to == STACK_POINTER_REGNUM)
       offset += ALPHA_ROUND (crtl->outgoing_args_size);
-    
+
     return offset;
   }
 }
@@ -8642,6 +9324,7 @@ summarize_insn (rtx x, struct shadow_summary *sum, int set)
 	    break;
 
 	  case 'i':
+	  case 'L':
 	    break;
 
 	  default:
@@ -8828,7 +9511,7 @@ alpha_handle_trap_shadows (void)
    suitably aligned.  This is very processor-specific.  */
 /* There are a number of entries in alphaev4_insn_pipe and alphaev5_insn_pipe
    that are marked "fake".  These instructions do not exist on that target,
-   but it is possible to see these insns with deranged combinations of 
+   but it is possible to see these insns with deranged combinations of
    command-line options, such as "-mtune=ev4 -mmax".  Instead of aborting,
    choose a result at random.  */
 
@@ -9465,7 +10148,7 @@ And in the noreturn case:
      after the insn.  In case trap is the last insn in the function,
      emit NOP to guarantee that PC remains inside function boundaries.
      This workaround is needed to get reliable backtraces.  */
-  
+
   rtx_insn *insn = prev_active_insn (get_last_insn ());
 
   if (insn && NONJUMP_INSN_P (insn))
@@ -9725,7 +10408,7 @@ alpha_write_linkage (FILE *stream, const char *funname)
    the section; 0 if the default should be used.  */
 
 static void
-vms_asm_named_section (const char *name, unsigned int flags, 
+vms_asm_named_section (const char *name, unsigned int flags,
 		       tree decl ATTRIBUTE_UNUSED)
 {
   fputc ('\n', asm_out_file);
diff --git a/gcc/config/alpha/alpha.h b/gcc/config/alpha/alpha.h
index ee9e091..ffa826f 100644
--- a/gcc/config/alpha/alpha.h
+++ b/gcc/config/alpha/alpha.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler, for DEC Alpha.
-   Copyright (C) 1992-2024 Free Software Foundation, Inc.
+   Copyright (C) 1992-2025 Free Software Foundation, Inc.
    Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
 
 This file is part of GCC.
diff --git a/gcc/config/alpha/alpha.md b/gcc/config/alpha/alpha.md
index bd92392..a44178d 100644
--- a/gcc/config/alpha/alpha.md
+++ b/gcc/config/alpha/alpha.md
@@ -1,5 +1,5 @@
 ;; Machine description for DEC Alpha for GNU C compiler
-;; Copyright (C) 1992-2024 Free Software Foundation, Inc.
+;; Copyright (C) 1992-2025 Free Software Foundation, Inc.
 ;; Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
 ;;
 ;; This file is part of GCC.
@@ -4200,8 +4200,33 @@
 			    << INTVAL (operands[2])));
 })
 
+;; Multi-thread and async-signal safe variant.  Operand 0 is the aligned
+;; SImode MEM.  Operand 1 is the data to store. Operand 2 is the number
+;; of bits within the word that the value should be placed.  Operand 3 is
+;; the SImode status.  Operand 4 is a SImode temporary.
+
+(define_expand "aligned_store_safe_bwa"
+  [(set (match_operand:SI 3 "register_operand")
+	(unspec_volatile:SI
+	  [(match_operand:SI 0 "memory_operand")] UNSPECV_LL))
+   (set (subreg:DI (match_dup 3) 0)
+	(and:DI (subreg:DI (match_dup 3) 0) (match_dup 5)))
+   (set (subreg:DI (match_operand:SI 4 "register_operand") 0)
+	(ashift:DI (zero_extend:DI (match_operand 1 "register_operand"))
+		   (match_operand:DI 2 "const_int_operand")))
+   (set (subreg:DI (match_dup 3) 0)
+	(ior:DI (subreg:DI (match_dup 4) 0) (subreg:DI (match_dup 3) 0)))
+   (parallel [(set (subreg:DI (match_dup 3) 0)
+		   (unspec_volatile:DI [(const_int 0)] UNSPECV_SC))
+	      (set (match_dup 0) (match_dup 3))])]
+  ""
+{
+  operands[5] = GEN_INT (~(GET_MODE_MASK (GET_MODE (operands[1]))
+			   << INTVAL (operands[2])));
+})
+
 ;; For the unaligned byte and halfword cases, we use code similar to that
-;; in the ;; Architecture book, but reordered to lower the number of registers
+;; in the Architecture book, but reordered to lower the number of registers
 ;; required.  Operand 0 is the address.  Operand 1 is the data to store.
 ;; Operands 2, 3, and 4 are DImode temporaries, where operands 2 and 4 may
 ;; be the same temporary, if desired.  If the address is in a register,
@@ -4227,6 +4252,31 @@
   ""
   "operands[5] = GEN_INT (GET_MODE_MASK (<MODE>mode));")
 
+;; Multi-thread and async-signal safe variant.  Operand 0 is the address.
+;; Operand 1 is the data to store.  Operand 2 is the aligned address.
+;; Operand 3 is the DImode status.  Operand 4 is a DImode temporary.
+
+(define_expand "@unaligned_store<mode>_safe_bwa"
+  [(set (match_operand:DI 3 "register_operand")
+	(unspec_volatile:DI
+	  [(mem:DI (match_operand:DI 2 "register_operand"))] UNSPECV_LL))
+   (set (match_dup 3)
+	(and:DI (not:DI
+		  (ashift:DI (match_dup 5)
+			     (ashift:DI (match_operand:DI 0 "register_operand")
+					(const_int 3))))
+		(match_dup 3)))
+   (set (match_operand:DI 4 "register_operand")
+	(ashift:DI (zero_extend:DI
+		     (match_operand:I12MODE 1 "register_operand"))
+		   (ashift:DI (match_dup 0) (const_int 3))))
+   (set (match_dup 3) (ior:DI (match_dup 4) (match_dup 3)))
+   (parallel [(set (match_dup 3)
+		   (unspec_volatile:DI [(const_int 0)] UNSPECV_SC))
+	      (set (mem:DI (match_dup 2)) (match_dup 3))])]
+  ""
+  "operands[5] = GEN_INT (GET_MODE_MASK (<MODE>mode));")
+
 ;; Here are the define_expand's for QI and HI moves that use the above
 ;; patterns.  We have the normal sets, plus the ones that need scratch
 ;; registers for reload.
@@ -4236,8 +4286,8 @@
 	(match_operand:I12MODE 1 "general_operand"))]
   ""
 {
-  if (TARGET_BWX
-      ? alpha_expand_mov (<MODE>mode, operands)
+  if (TARGET_BWX ? alpha_expand_mov (<MODE>mode, operands)
+      : TARGET_SAFE_BWA ? alpha_expand_mov_safe_bwa (<MODE>mode, operands)
       : alpha_expand_mov_nobwx (<MODE>mode, operands))
     DONE;
 })
@@ -4292,7 +4342,9 @@
 	  operands[1] = gen_lowpart (HImode, operands[1]);
 	do_aligned2:
 	  operands[0] = gen_lowpart (HImode, operands[0]);
-	  done = alpha_expand_mov_nobwx (HImode, operands);
+	  done = (TARGET_SAFE_BWA
+		  ? alpha_expand_mov_safe_bwa (HImode, operands)
+		  : alpha_expand_mov_nobwx (HImode, operands));
 	  gcc_assert (done);
 	  DONE;
 	}
@@ -4371,6 +4423,8 @@
     }
   else
     {
+      gcc_assert (!TARGET_SAFE_BWA);
+
       rtx addr = get_unaligned_address (operands[0]);
       rtx scratch1 = gen_rtx_REG (DImode, regno);
       rtx scratch2 = gen_rtx_REG (DImode, regno + 1);
@@ -4388,6 +4442,52 @@
   DONE;
 })
 
+(define_expand "@reload_out<mode>_safe_bwa"
+  [(parallel [(match_operand:RELOAD12 0 "any_memory_operand" "=m")
+	      (match_operand:RELOAD12 1 "register_operand" "r")
+	      (match_operand:OI 2 "register_operand" "=&r")])]
+  "!TARGET_BWX && TARGET_SAFE_BWA"
+{
+  unsigned regno = REGNO (operands[2]);
+
+  if (<MODE>mode == CQImode)
+    {
+      operands[0] = gen_lowpart (HImode, operands[0]);
+      operands[1] = gen_lowpart (HImode, operands[1]);
+    }
+
+  rtx addr = get_unaligned_address (operands[0]);
+  rtx status = gen_rtx_REG (DImode, regno);
+  rtx areg = gen_rtx_REG (DImode, regno + 1);
+  rtx aligned_addr = gen_rtx_REG (DImode, regno + 2);
+  rtx scratch = gen_rtx_REG (DImode, regno + 3);
+
+  if (REG_P (addr))
+    areg = addr;
+  else
+    emit_move_insn (areg, addr);
+  emit_move_insn (aligned_addr, gen_rtx_AND (DImode, areg, GEN_INT (-8)));
+
+  rtx label = gen_label_rtx ();
+  emit_label (label);
+  LABEL_NUSES (label) = 1;
+
+  rtx seq = gen_reload_out<reloadmode>_unaligned_safe_bwa (areg, operands[1],
+							   aligned_addr,
+							   status, scratch);
+  alpha_set_memflags (seq, operands[0]);
+  emit_insn (seq);
+
+  rtx label_ref = gen_rtx_LABEL_REF (DImode, label);
+  rtx cond = gen_rtx_EQ (DImode, status, const0_rtx);
+  rtx jump = alpha_emit_unlikely_jump (cond, label_ref);
+  JUMP_LABEL (jump) = label;
+
+  cfun->split_basic_blocks_after_reload = 1;
+
+  DONE;
+})
+
 ;; Helpers for the above.  The way reload is structured, we can't
 ;; always get a proper address for a stack slot during reload_foo
 ;; expansion, so we must delay our address manipulations until after.
@@ -4420,10 +4520,55 @@
 {
   rtx aligned_mem, bitnum;
   get_aligned_mem (operands[0], &aligned_mem, &bitnum);
-  emit_insn (gen_aligned_store (aligned_mem, operands[1], bitnum,
-				operands[2], operands[3]));
+  if (TARGET_SAFE_BWA)
+    {
+      rtx label = gen_label_rtx ();
+      emit_label (label);
+      LABEL_NUSES (label) = 1;
+
+      rtx status = operands[2];
+      rtx temp = operands[3];
+      emit_insn (gen_aligned_store_safe_bwa (aligned_mem, operands[1], bitnum,
+					     status, temp));
+
+      rtx label_ref = gen_rtx_LABEL_REF (DImode, label);
+      rtx cond = gen_rtx_EQ (DImode, gen_rtx_SUBREG (DImode, status, 0),
+			     const0_rtx);
+      rtx jump = alpha_emit_unlikely_jump (cond, label_ref);
+      JUMP_LABEL (jump) = label;
+
+      cfun->split_basic_blocks_after_reload = 1;
+    }
+  else
+    emit_insn (gen_aligned_store (aligned_mem, operands[1], bitnum,
+				  operands[2], operands[3]));
   DONE;
 })
+
+;; Operand 0 is the address.  Operand 1 is the data to store.  Operand 2
+;; is the aligned address.  Operand 3 is the DImode status.  Operand 4 is
+;; a DImode scratch.
+
+(define_expand "reload_out<mode>_unaligned_safe_bwa"
+  [(set (match_operand:DI 3 "register_operand")
+	(unspec_volatile:DI [(mem:DI (match_operand:DI 2 "register_operand"))]
+			    UNSPECV_LL))
+   (set (match_dup 3)
+	(and:DI (not:DI
+		  (ashift:DI (match_dup 5)
+			     (ashift:DI (match_operand:DI 0 "register_operand")
+					(const_int 3))))
+		(match_dup 3)))
+   (set (match_operand:DI 4 "register_operand")
+	(ashift:DI (zero_extend:DI
+		     (match_operand:I12MODE 1 "register_operand"))
+		   (ashift:DI (match_dup 0) (const_int 3))))
+   (set (match_dup 3) (ior:DI (match_dup 4) (match_dup 3)))
+   (parallel [(set (match_dup 3)
+		   (unspec_volatile:DI [(const_int 0)] UNSPECV_SC))
+	      (set (mem:DI (match_dup 2)) (match_dup 3))])]
+  ""
+  "operands[5] = GEN_INT (GET_MODE_MASK (<MODE>mode));")
 
 ;; Vector operations
 
@@ -4626,7 +4771,7 @@
   [(set (zero_extract:DI (match_operand:BLK 0 "memory_operand")
 			 (match_operand:DI 1 "const_int_operand")
 			 (match_operand:DI 2 "const_int_operand"))
-	(match_operand:DI 3 "register_operand"))]
+	(match_operand:DI 3 "reg_or_0_operand"))]
   ""
 {
   /* We can do 16, 32 and 64 bit fields, if aligned on byte boundaries.  */
@@ -4636,9 +4781,15 @@
 	  && INTVAL (operands[1]) != 64))
     FAIL;
 
-  alpha_expand_unaligned_store (operands[0], operands[3],
-				INTVAL (operands[1]) / 8,
-				INTVAL (operands[2]) / 8);
+  if (TARGET_SAFE_PARTIAL)
+    alpha_expand_unaligned_store_safe_partial (operands[0], operands[3],
+					       INTVAL (operands[1]) / 8,
+					       INTVAL (operands[2]) / 8,
+					       BITS_PER_UNIT);
+  else
+    alpha_expand_unaligned_store (operands[0], operands[3],
+				  INTVAL (operands[1]) / 8,
+				  INTVAL (operands[2]) / 8);
   DONE;
 })
 
@@ -5005,12 +5156,28 @@
   rtx pv = gen_rtx_REG (Pmode, 27);
 
   /* This bit is the same as expand_builtin_longjmp.  */
-  emit_move_insn (hard_frame_pointer_rtx, fp);
+
+  emit_clobber (gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode)));
+  emit_clobber (gen_rtx_MEM (BLKmode, hard_frame_pointer_rtx));
+
   emit_move_insn (pv, lab);
+
+  /* Restore the frame pointer and stack pointer.  We must use a
+     temporary since the setjmp buffer may be a local.  */
+  fp = copy_to_reg (fp);
   emit_stack_restore (SAVE_NONLOCAL, stack);
+
+  /* Ensure the frame pointer move is not optimized.  */
+  emit_insn (gen_blockage ());
+  emit_clobber (hard_frame_pointer_rtx);
+  emit_clobber (frame_pointer_rtx);
+  emit_move_insn (hard_frame_pointer_rtx, fp);
+
   emit_use (hard_frame_pointer_rtx);
   emit_use (stack_pointer_rtx);
 
+  /* End of the bit corresponding to expand_builtin_longjmp.  */
+
   /* Load the label we are jumping through into $27 so that we know
      where to look for it when we get back to setjmp's function for
      restoring the gp.  */
@@ -5171,7 +5338,7 @@
     }
   };
 
-  bool write = INTVAL (operands[1]) != 0;
+  bool write = (INTVAL (operands[1]) & 1) != 0;
   bool lru = INTVAL (operands[2]) != 0;
 
   return alt[write][lru];
diff --git a/gcc/config/alpha/alpha.opt b/gcc/config/alpha/alpha.opt
index 62543d2..3c1320b 100644
--- a/gcc/config/alpha/alpha.opt
+++ b/gcc/config/alpha/alpha.opt
@@ -1,6 +1,6 @@
 ; Options for the DEC Alpha port of the compiler
 ;
-; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
@@ -69,6 +69,14 @@ mcix
 Target Mask(CIX)
 Emit code for the counting ISA extension.
 
+msafe-bwa
+Target Mask(SAFE_BWA)
+Emit multi-thread and async-signal safe code for byte and word memory accesses.
+
+msafe-partial
+Target Mask(SAFE_PARTIAL)
+Emit multi-thread and async-signal safe code for partial memory accesses.
+
 mexplicit-relocs
 Target Mask(EXPLICIT_RELOCS)
 Emit code using explicit relocation directives.
diff --git a/gcc/config/alpha/alpha.opt.urls b/gcc/config/alpha/alpha.opt.urls
index a55c083..9361587 100644
--- a/gcc/config/alpha/alpha.opt.urls
+++ b/gcc/config/alpha/alpha.opt.urls
@@ -35,6 +35,12 @@ UrlSuffix(gcc/DEC-Alpha-Options.html#index-mfix)
 mcix
 UrlSuffix(gcc/DEC-Alpha-Options.html#index-mcix)
 
+msafe-bwa
+UrlSuffix(gcc/DEC-Alpha-Options.html#index-msafe-bwa)
+
+msafe-partial
+UrlSuffix(gcc/DEC-Alpha-Options.html#index-msafe-partial)
+
 mexplicit-relocs
 UrlSuffix(gcc/DEC-Alpha-Options.html#index-mexplicit-relocs)
 
diff --git a/gcc/config/alpha/constraints.md b/gcc/config/alpha/constraints.md
index 4383f1f..8cb6af4 100644
--- a/gcc/config/alpha/constraints.md
+++ b/gcc/config/alpha/constraints.md
@@ -1,5 +1,5 @@
 ;; Constraint definitions for DEC Alpha.
-;; Copyright (C) 2007-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2007-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/alpha/driver-alpha.cc b/gcc/config/alpha/driver-alpha.cc
index 816d06b..36754b8 100644
--- a/gcc/config/alpha/driver-alpha.cc
+++ b/gcc/config/alpha/driver-alpha.cc
@@ -1,5 +1,5 @@
 /* Subroutines for the gcc driver.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
    Contributed by Arthur Loiret <aloiret@debian.org>
 
 This file is part of GCC.
@@ -33,7 +33,7 @@ along with GCC; see the file COPYING3.  If not see
 
 /* Bit defines for amask instruction.  */
 #define AMASK_BWX          0x1          /* byte/word extension.  */
-#define AMASK_FIX          0x2          /* sqrt and f <-> i conversions 
+#define AMASK_FIX          0x2          /* sqrt and f <-> i conversions
 					   extension.  */
 #define AMASK_CIX          0x4          /* count extension.  */
 #define AMASK_MVI          0x100        /* multimedia extension.  */
diff --git a/gcc/config/alpha/elf.h b/gcc/config/alpha/elf.h
index a10454a..5157392 100644
--- a/gcc/config/alpha/elf.h
+++ b/gcc/config/alpha/elf.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler, for DEC Alpha w/ELF.
-   Copyright (C) 1996-2024 Free Software Foundation, Inc.
+   Copyright (C) 1996-2025 Free Software Foundation, Inc.
    Contributed by Richard Henderson (rth@tamu.edu).
 
 This file is part of GCC.
@@ -25,7 +25,7 @@ along with GCC; see the file COPYING3.  If not see
 #define ASM_SPEC  "%{G*} %{relax:-relax} %{mcpu=*:-m%*}"
 
 /* Do not output a .file directive at the beginning of the input file.  */
- 
+
 #undef  TARGET_ASM_FILE_START_FILE_DIRECTIVE
 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE false
 
diff --git a/gcc/config/alpha/elf.opt b/gcc/config/alpha/elf.opt
index 65f9b4f..8fc8f59 100644
--- a/gcc/config/alpha/elf.opt
+++ b/gcc/config/alpha/elf.opt
@@ -1,6 +1,6 @@
 ; Alpha ELF options.
 
-; Copyright (C) 2011-2024 Free Software Foundation, Inc.
+; Copyright (C) 2011-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/alpha/ev4.md b/gcc/config/alpha/ev4.md
index c634d1c..d6194ec 100644
--- a/gcc/config/alpha/ev4.md
+++ b/gcc/config/alpha/ev4.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for Alpha EV4.
-;;   Copyright (C) 2002-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2002-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/alpha/ev5.md b/gcc/config/alpha/ev5.md
index cb817da..0d5931a 100644
--- a/gcc/config/alpha/ev5.md
+++ b/gcc/config/alpha/ev5.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for Alpha EV5.
-;;   Copyright (C) 2002-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2002-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/alpha/ev6.md b/gcc/config/alpha/ev6.md
index 773c3be..b35e445 100644
--- a/gcc/config/alpha/ev6.md
+++ b/gcc/config/alpha/ev6.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for Alpha EV6.
-;;   Copyright (C) 2002-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2002-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/alpha/linux-elf.h b/gcc/config/alpha/linux-elf.h
index 40ccf0b..3fd3b83 100644
--- a/gcc/config/alpha/linux-elf.h
+++ b/gcc/config/alpha/linux-elf.h
@@ -1,6 +1,6 @@
 /* Definitions of target machine for GNU compiler
    for Alpha Linux-based GNU systems using ELF.
-   Copyright (C) 1996-2024 Free Software Foundation, Inc.
+   Copyright (C) 1996-2025 Free Software Foundation, Inc.
    Contributed by Richard Henderson.
 
 This file is part of GCC.
diff --git a/gcc/config/alpha/linux.h b/gcc/config/alpha/linux.h
index 840a03a..c4e843f 100644
--- a/gcc/config/alpha/linux.h
+++ b/gcc/config/alpha/linux.h
@@ -1,6 +1,6 @@
 /* Definitions of target machine for GNU compiler,
    for Alpha Linux-based GNU systems.
-   Copyright (C) 1996-2024 Free Software Foundation, Inc.
+   Copyright (C) 1996-2025 Free Software Foundation, Inc.
    Contributed by Richard Henderson.
 
 This file is part of GCC.
diff --git a/gcc/config/alpha/netbsd.h b/gcc/config/alpha/netbsd.h
index c013884..51d2e23 100644
--- a/gcc/config/alpha/netbsd.h
+++ b/gcc/config/alpha/netbsd.h
@@ -1,6 +1,6 @@
 /* Definitions of target machine for GNU compiler,
    for Alpha NetBSD systems.
-   Copyright (C) 1998-2024 Free Software Foundation, Inc.
+   Copyright (C) 1998-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/alpha/openbsd.h b/gcc/config/alpha/openbsd.h
index 5e44b33..323f0ab 100644
--- a/gcc/config/alpha/openbsd.h
+++ b/gcc/config/alpha/openbsd.h
@@ -1,5 +1,5 @@
 /* Configuration file for an alpha OpenBSD target.
-   Copyright (C) 1999-2024 Free Software Foundation, Inc.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/alpha/predicates.md b/gcc/config/alpha/predicates.md
index 9accda2..548898f 100644
--- a/gcc/config/alpha/predicates.md
+++ b/gcc/config/alpha/predicates.md
@@ -1,5 +1,5 @@
 ;; Predicate definitions for DEC Alpha.
-;; Copyright (C) 2004-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2004-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/alpha/sync.md b/gcc/config/alpha/sync.md
index 7c0ed74..c7e1380 100644
--- a/gcc/config/alpha/sync.md
+++ b/gcc/config/alpha/sync.md
@@ -1,5 +1,5 @@
 ;; GCC machine description for Alpha synchronization instructions.
-;; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/alpha/t-alpha b/gcc/config/alpha/t-alpha
index 112d484..4ab7e4f 100644
--- a/gcc/config/alpha/t-alpha
+++ b/gcc/config/alpha/t-alpha
@@ -1,4 +1,4 @@
-# Copyright (C) 2016-2024 Free Software Foundation, Inc.
+# Copyright (C) 2016-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/alpha/t-vms b/gcc/config/alpha/t-vms
index 08a29fe..e9fa3c8 100644
--- a/gcc/config/alpha/t-vms
+++ b/gcc/config/alpha/t-vms
@@ -1,4 +1,4 @@
-# Copyright (C) 1996-2024 Free Software Foundation, Inc.
+# Copyright (C) 1996-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/alpha/vms.h b/gcc/config/alpha/vms.h
index 8038f0e..1086773 100644
--- a/gcc/config/alpha/vms.h
+++ b/gcc/config/alpha/vms.h
@@ -1,5 +1,5 @@
 /* Output variables, constants and external declarations, for GNU compiler.
-   Copyright (C) 1996-2024 Free Software Foundation, Inc.
+   Copyright (C) 1996-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -188,8 +188,8 @@ typedef struct {int num_args; enum avms_arg_type atypes[6];} avms_arg_info;
 #define ASM_OUTPUT_CASE_LABEL(FILE,PREFIX,NUM,TABLEINSN)	\
 { ASM_OUTPUT_ALIGN (FILE, 3); (*targetm.asm_out.internal_label) (FILE, PREFIX, NUM); }
 
-/* This says how to output assembler code to declare an                
-   uninitialized external linkage data object.  */ 
+/* This says how to output assembler code to declare an
+   uninitialized external linkage data object.  */
 
 #define COMMON_ASM_OP "\t.comm\t"
 
diff --git a/gcc/config/arc/arc-arch.h b/gcc/config/arc/arc-arch.h
index e62043d..ef23aad 100644
--- a/gcc/config/arc/arc-arch.h
+++ b/gcc/config/arc/arc-arch.h
@@ -1,6 +1,6 @@
 /* Definitions of types that are used to store ARC architecture and
    device information.
-   Copyright (C) 2016-2024 Free Software Foundation, Inc.
+   Copyright (C) 2016-2025 Free Software Foundation, Inc.
    Contributed by Claudiu Zissulescu (claziss@synopsys.com)
 
 This file is part of GCC.
diff --git a/gcc/config/arc/arc-arches.def b/gcc/config/arc/arc-arches.def
index 03c567e..94f6a54 100644
--- a/gcc/config/arc/arc-arches.def
+++ b/gcc/config/arc/arc-arches.def
@@ -1,5 +1,5 @@
 /* ARC ARCH architectures.
-   Copyright (C) 2016-2024 Free Software Foundation, Inc.
+   Copyright (C) 2016-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/arc/arc-c.cc b/gcc/config/arc/arc-c.cc
index 1b001be..8af8e63 100644
--- a/gcc/config/arc/arc-c.cc
+++ b/gcc/config/arc/arc-c.cc
@@ -1,4 +1,4 @@
-/* Copyright (C) 2016-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2016-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/arc/arc-c.def b/gcc/config/arc/arc-c.def
index 5e3c7ff..c3c8802 100644
--- a/gcc/config/arc/arc-c.def
+++ b/gcc/config/arc/arc-c.def
@@ -1,4 +1,4 @@
-/* Copyright (C) 2016-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2016-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/arc/arc-cpus.def b/gcc/config/arc/arc-cpus.def
index 851c43e..76ca62d 100644
--- a/gcc/config/arc/arc-cpus.def
+++ b/gcc/config/arc/arc-cpus.def
@@ -1,5 +1,5 @@
 /* ARC CPU architectures.
-   Copyright (C) 2016-2024 Free Software Foundation, Inc.
+   Copyright (C) 2016-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/arc/arc-modes.def b/gcc/config/arc/arc-modes.def
index 8017ec5..7c7dff9 100644
--- a/gcc/config/arc/arc-modes.def
+++ b/gcc/config/arc/arc-modes.def
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler, Synopsys DesignWare ARC cpu.
-   Copyright (C) 2002-2024 Free Software Foundation, Inc.
+   Copyright (C) 2002-2025 Free Software Foundation, Inc.
    Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
 		on behalf of Synopsys Inc.
 
@@ -24,6 +24,7 @@ along with GCC; see the file COPYING3.  If not see
 
 CC_MODE (CC_ZN);
 CC_MODE (CC_Z);
+CC_MODE (CC_V);
 CC_MODE (CC_C);
 CC_MODE (CC_FP_GT);
 CC_MODE (CC_FP_GE);
diff --git a/gcc/config/arc/arc-options.def b/gcc/config/arc/arc-options.def
index 9a1c9d4..a5f6053 100644
--- a/gcc/config/arc/arc-options.def
+++ b/gcc/config/arc/arc-options.def
@@ -1,5 +1,5 @@
 /* ARC options.
-   Copyright (C) 2016-2024 Free Software Foundation, Inc.
+   Copyright (C) 2016-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/arc/arc-opts.h b/gcc/config/arc/arc-opts.h
index 4120859..cd0c3c2 100644
--- a/gcc/config/arc/arc-opts.h
+++ b/gcc/config/arc/arc-opts.h
@@ -1,6 +1,6 @@
 /* GCC option-handling definitions for the Synopsys DesignWare ARC architecture.
 
-   Copyright (C) 2007-2024 Free Software Foundation, Inc.
+   Copyright (C) 2007-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/arc/arc-passes.def b/gcc/config/arc/arc-passes.def
index f00dcd4..c626752 100644
--- a/gcc/config/arc/arc-passes.def
+++ b/gcc/config/arc/arc-passes.def
@@ -1,5 +1,5 @@
 /* Description of target passes for ARC.
-   Copyright (C) 2019-2024 Free Software Foundation, Inc. */
+   Copyright (C) 2019-2025 Free Software Foundation, Inc. */
 
 /* This file is part of GCC.
 
diff --git a/gcc/config/arc/arc-protos.h b/gcc/config/arc/arc-protos.h
index 281cdfc..2db643c 100644
--- a/gcc/config/arc/arc-protos.h
+++ b/gcc/config/arc/arc-protos.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler, Synopsys DesignWare ARC cpu.
-   Copyright (C) 2000-2024 Free Software Foundation, Inc.
+   Copyright (C) 2000-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -55,6 +55,7 @@ extern bool arc_check_mov_const (HOST_WIDE_INT );
 extern bool arc_split_mov_const (rtx *);
 extern bool arc_can_use_return_insn (void);
 extern bool arc_split_move_p (rtx *);
+extern void arc_gen_unlikely_cbranch (enum rtx_code, machine_mode, rtx);
 #endif /* RTX_CODE */
 
 
diff --git a/gcc/config/arc/arc-simd.h b/gcc/config/arc/arc-simd.h
index 14bc994..7ab3730 100644
--- a/gcc/config/arc/arc-simd.h
+++ b/gcc/config/arc/arc-simd.h
@@ -1,5 +1,5 @@
 /* Synopsys DesignWare ARC SIMD include file.
-   Copyright (C) 2007-2024 Free Software Foundation, Inc.
+   Copyright (C) 2007-2025 Free Software Foundation, Inc.
    Written by Saurabh Verma (saurabh.verma@celunite.com) on behalf os Synopsys
    Inc.
 
diff --git a/gcc/config/arc/arc-tables.opt b/gcc/config/arc/arc-tables.opt
index f2d66ef..dd00a97 100644
--- a/gcc/config/arc/arc-tables.opt
+++ b/gcc/config/arc/arc-tables.opt
@@ -2,7 +2,7 @@
 ; Generated by    : ./gcc/config/arc/genoptions.awk
 ; Generated from  : ./gcc/config/arc/arc-cpu.def
 ;
-; Copyright (C) 2016-2024 Free Software Foundation, Inc.
+; Copyright (C) 2016-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/arc/arc.cc b/gcc/config/arc/arc.cc
index c800226..bb5db97 100644
--- a/gcc/config/arc/arc.cc
+++ b/gcc/config/arc/arc.cc
@@ -1,5 +1,5 @@
 /* Subroutines used for code generation on the Synopsys DesignWare ARC cpu.
-   Copyright (C) 1994-2024 Free Software Foundation, Inc.
+   Copyright (C) 1994-2025 Free Software Foundation, Inc.
 
    Sources derived from work done by Sankhya Technologies (www.sankhya.com) on
    behalf of Synopsys Inc.
@@ -720,8 +720,6 @@ static rtx arc_legitimize_address_0 (rtx, rtx, machine_mode mode);
 #define TARGET_NO_SPECULATION_IN_DELAY_SLOTS_P	\
   arc_no_speculation_in_delay_slots_p
 
-#undef TARGET_LRA_P
-#define TARGET_LRA_P arc_lra_p
 #define TARGET_REGISTER_PRIORITY arc_register_priority
 /* Stores with scaled offsets have different displacement ranges.  */
 #define TARGET_DIFFERENT_ADDR_DISPLACEMENT_P hook_bool_void_true
@@ -1438,6 +1436,13 @@ get_arc_condition_code (rtx comparison)
 	case GEU : return ARC_CC_NC;
 	default : gcc_unreachable ();
 	}
+    case E_CC_Vmode:
+      switch (GET_CODE (comparison))
+	{
+	case EQ : return ARC_CC_NV;
+	case NE : return ARC_CC_V;
+	default : gcc_unreachable ();
+	}
     case E_CC_FP_GTmode:
       if (TARGET_ARGONAUT_SET && TARGET_SPFP)
 	switch (GET_CODE (comparison))
@@ -1548,6 +1553,13 @@ arc_select_cc_mode (enum rtx_code op, rtx x, rtx y)
   machine_mode mode = GET_MODE (x);
   rtx x1;
 
+  /* Matches all instructions which can do .f and clobbers only Z flag.  */
+  if (GET_MODE_CLASS (mode) == MODE_INT
+      && y == const0_rtx
+      && GET_CODE (x) == MULT
+      && (op == EQ || op == NE))
+    return CC_Zmode;
+
   /* For an operation that sets the condition codes as a side-effect, the
      C and V flags is not set as for cmp, so we can only use comparisons where
      this doesn't matter.  (For LT and GE we can use "mi" and "pl"
@@ -4228,7 +4240,7 @@ enum arc_shift_alg
 {
   SHIFT_MOVE,		/* Register-to-register move.  */
   SHIFT_LOOP,		/* Zero-overhead loop implementation.  */
-  SHIFT_INLINE,		/* Mmultiple LSHIFTs and LSHIFT-PLUSs.  */ 
+  SHIFT_INLINE,		/* Mmultiple LSHIFTs and LSHIFT-PLUSs.  */
   SHIFT_AND_ROT,        /* Bitwise AND, then ROTATERTs.  */
   SHIFT_SWAP,		/* SWAP then multiple LSHIFTs/LSHIFT-PLUSs.  */
   SHIFT_AND_SWAP_ROT	/* Bitwise AND, then SWAP, then ROTATERTs.  */
@@ -8220,8 +8232,7 @@ hwloop_optimize (hwloop_info loop)
   insn = emit_insn (gen_arc_lp (loop->start_label,
 				loop->end_label));
 
-  seq = get_insns ();
-  end_sequence ();
+  seq = end_sequence ();
 
   entry_after = BB_END (entry_bb);
   if (!single_succ_p (entry_bb) || vec_safe_length (loop->incoming) > 1
@@ -9674,7 +9685,7 @@ arc_delegitimize_address (rtx orig_x)
 rtx
 gen_acc1 (void)
 {
-  return gen_rtx_REG (SImode, TARGET_BIG_ENDIAN ? 56: 57);
+  return gen_rtx_REG (SImode, TARGET_BIG_ENDIAN ? 56 : 57);
 }
 
 /* Return a REG rtx for acc2.  N.B. the gcc-internal representation may
@@ -9684,7 +9695,7 @@ gen_acc1 (void)
 rtx
 gen_acc2 (void)
 {
-  return gen_rtx_REG (SImode, TARGET_BIG_ENDIAN ? 57: 56);
+  return gen_rtx_REG (SImode, TARGET_BIG_ENDIAN ? 57 : 56);
 }
 
 /* When estimating sizes during arc_reorg, when optimizing for speed, there
@@ -10156,14 +10167,6 @@ arc_eh_uses (int regno)
   return false;
 }
 
-/* Return true if we use LRA instead of reload pass.  */
-
-bool
-arc_lra_p (void)
-{
-  return arc_lra_flag;
-}
-
 /* ??? Should we define TARGET_REGISTER_PRIORITY?  We might perfer to
    use q registers, because some insn are shorter with them.  OTOH we
    already have separate alternatives for this purpose, and other
@@ -11554,6 +11557,21 @@ arc_libm_function_max_error (unsigned cfn, machine_mode mode,
   return default_libm_function_max_error (cfn, mode, boundary_p);
 }
 
+void
+arc_gen_unlikely_cbranch (enum rtx_code cmp, machine_mode cc_mode, rtx label)
+{
+  rtx cc_reg, x;
+
+  cc_reg = gen_rtx_REG (cc_mode, CC_REG);
+  label = gen_rtx_LABEL_REF (VOIDmode, label);
+
+  x = gen_rtx_fmt_ee (cmp, VOIDmode, cc_reg, const0_rtx);
+  x = gen_rtx_IF_THEN_ELSE (VOIDmode, x, label, pc_rtx);
+
+  emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
+}
+
+
 #undef TARGET_USE_ANCHORS_FOR_SYMBOL_P
 #define TARGET_USE_ANCHORS_FOR_SYMBOL_P arc_use_anchors_for_symbol_p
 
@@ -11581,6 +11599,9 @@ arc_libm_function_max_error (unsigned cfn, machine_mode mode,
 #undef  TARGET_LIBM_FUNCTION_MAX_ERROR
 #define TARGET_LIBM_FUNCTION_MAX_ERROR arc_libm_function_max_error
 
+#undef TARGET_DOCUMENTATION_NAME
+#define TARGET_DOCUMENTATION_NAME "ARC"
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-arc.h"
diff --git a/gcc/config/arc/arc.h b/gcc/config/arc/arc.h
index 0a1ecb7..f2a4f0d 100644
--- a/gcc/config/arc/arc.h
+++ b/gcc/config/arc/arc.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler, Synopsys DesignWare ARC cpu.
-   Copyright (C) 1994-2024 Free Software Foundation, Inc.
+   Copyright (C) 1994-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -608,8 +608,8 @@ extern enum reg_class arc_regno_reg_class[];
    needed to represent mode MODE in a register of class CLASS.  */
 
 #define CLASS_MAX_NREGS(CLASS, MODE) \
-(( GET_MODE_SIZE (MODE) == 16 && CLASS == SIMD_VR_REGS) ? 1: \
-((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD))
+((GET_MODE_SIZE (MODE) == 16 && CLASS == SIMD_VR_REGS) ? 1 \
+ : ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD))
 
 #define SMALL_INT(X) ((unsigned) ((X) + 0x100) < 0x200)
 #define SMALL_INT_RANGE(X, OFFSET, SHIFT)	\
@@ -868,9 +868,9 @@ extern int arc_initial_elimination_offset(int from, int to);
 
 /* Recognize any constant value that is a valid address.  */
 #define CONSTANT_ADDRESS_P(X)					\
-  (flag_pic ? (arc_legitimate_pic_addr_p (X) || LABEL_P (X)):	\
-   (GET_CODE (X) == LABEL_REF || GET_CODE (X) == SYMBOL_REF	\
-    || GET_CODE (X) == CONST_INT || GET_CODE (X) == CONST))
+  (flag_pic ? (arc_legitimate_pic_addr_p (X) || LABEL_P (X))	\
+   : (GET_CODE (X) == LABEL_REF || GET_CODE (X) == SYMBOL_REF	\
+      || GET_CODE (X) == CONST_INT || GET_CODE (X) == CONST))
 
 /* Is the argument a const_int rtx, containing an exact power of 2 */
 #define  IS_POWEROF2_P(X) (! ( (X) & ((X) - 1)) && (X))
@@ -1660,8 +1660,4 @@ enum
 /* The default option for BI/BIH instructions.  */
 #define DEFAULT_BRANCH_INDEX 0
 
-#ifndef TARGET_LRA
-#define TARGET_LRA arc_lra_p()
-#endif
-
 #endif /* GCC_ARC_H */
diff --git a/gcc/config/arc/arc.md b/gcc/config/arc/arc.md
index 9004b60..d119464 100644
--- a/gcc/config/arc/arc.md
+++ b/gcc/config/arc/arc.md
@@ -1,5 +1,5 @@
 ;; Machine description of the Synopsys DesignWare ARC cpu for GNU C compiler
-;; Copyright (C) 1994-2024 Free Software Foundation, Inc.
+;; Copyright (C) 1994-2025 Free Software Foundation, Inc.
 
 ;; Sources derived from work done by Sankhya Technologies (www.sankhya.com) on
 ;; behalf of Synopsys Inc.
@@ -842,6 +842,9 @@ archs4x, archs4xd"
 ; Optab prefix for sign/zero-extending operations
 (define_code_attr su_optab [(sign_extend "") (zero_extend "u")])
 
+;; Code iterator for sign/zero extension
+(define_code_iterator ANY_EXTEND [sign_extend zero_extend])
+
 (define_insn "*<SEZ_prefix>xt<SQH_postfix>_cmp0_noout"
   [(set (match_operand 0 "cc_set_register" "")
 	(compare:CC_ZN (SEZ:SI (match_operand:SQH 1 "register_operand" "r"))
@@ -1068,11 +1071,67 @@ archs4x, archs4xd"
    (set_attr "cond" "set_zn")
    (set_attr "length" "*,4,4,4,8")])
 
-;; The next two patterns are for plos, ior, xor, and, and mult.
+(define_expand "<su_optab>mulvsi4"
+  [(ANY_EXTEND:DI (match_operand:SI 0 "register_operand"))
+   (ANY_EXTEND:DI (match_operand:SI 1 "register_operand"))
+   (ANY_EXTEND:DI (match_operand:SI 2 "register_operand"))
+   (label_ref (match_operand 3 "" ""))]
+  "TARGET_MPY"
+  {
+    emit_insn (gen_<su_optab>mulsi3_Vcmp (operands[0], operands[1],
+					  operands[2]));
+    arc_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]);
+    DONE;
+  })
+
+(define_insn "<su_optab>mulsi3_Vcmp"
+  [(parallel
+    [(set
+      (reg:CC_V CC_REG)
+      (compare:CC_V
+       (mult:DI
+	(ANY_EXTEND:DI (match_operand:SI 1 "register_operand"  "%0,r,r,r"))
+	(ANY_EXTEND:DI (match_operand:SI 2 "nonmemory_operand"  "I,L,r,C32")))
+       (ANY_EXTEND:DI (mult:SI (match_dup 1) (match_dup 2)))))
+     (set (match_operand:SI 0 "register_operand"	       "=r,r,r,r")
+	  (mult:SI (match_dup 1) (match_dup 2)))])]
+  "register_operand (operands[1], SImode)
+   || register_operand (operands[2], SImode)"
+  "mpy<su_optab>.f\\t%0,%1,%2"
+  [(set_attr "length" "4,4,4,8")
+   (set_attr "type"   "multi")])
+
+(define_insn "*mulsi3_cmp0"
+  [(set (reg:CC_Z CC_REG)
+	(compare:CC_Z
+	 (mult:SI
+	  (match_operand:SI 1 "register_operand"  "%r,0,r")
+	  (match_operand:SI 2 "nonmemory_operand" "rL,I,i"))
+	 (const_int 0)))
+   (set (match_operand:SI 0 "register_operand"    "=r,r,r")
+	(mult:SI (match_dup 1) (match_dup 2)))]
+ "TARGET_MPY"
+ "mpy%?.f\\t%0,%1,%2"
+ [(set_attr "length" "4,4,8")
+  (set_attr "type" "multi")])
+
+(define_insn "*mulsi3_cmp0_noout"
+  [(set (reg:CC_Z CC_REG)
+	(compare:CC_Z
+	 (mult:SI
+	  (match_operand:SI 0 "register_operand"   "%r,r,r")
+	  (match_operand:SI 1 "nonmemory_operand"  "rL,I,i"))
+	 (const_int 0)))]
+ "TARGET_MPY"
+ "mpy%?.f\\t0,%0,%1"
+ [(set_attr "length" "4,4,8")
+  (set_attr "type" "multi")])
+
+;; The next two patterns are for plus, ior, xor, and.
 (define_insn "*commutative_binary_cmp0_noout"
   [(set (match_operand 0 "cc_set_register" "")
 	(match_operator 4 "zn_compare_operator"
-	  [(match_operator:SI 3 "commutative_operator"
+	  [(match_operator:SI 3 "commutative_operator_sans_mult"
 	     [(match_operand:SI 1 "register_operand" "%r,r")
 	      (match_operand:SI 2 "nonmemory_operand" "rL,Cal")])
 	   (const_int 0)]))]
@@ -1085,7 +1144,7 @@ archs4x, archs4xd"
 (define_insn "*commutative_binary_cmp0"
   [(set (match_operand 3 "cc_set_register" "")
 	(match_operator 5 "zn_compare_operator"
-	  [(match_operator:SI 4 "commutative_operator"
+	  [(match_operator:SI 4 "commutative_operator_sans_mult"
 	     [(match_operand:SI 1 "register_operand"  "%0, 0,r,r")
 	      (match_operand:SI 2 "nonmemory_operand" "rL,rI,r,Cal")])
 	   (const_int 0)]))
@@ -2734,6 +2793,56 @@ archs4x, archs4xd"
 }
   [(set_attr "length" "8")])
 
+(define_insn "addsi3_v"
+ [(set (match_operand:SI       0 "register_operand"  "=r,r,r,  r")
+       (plus:SI (match_operand:SI 1 "register_operand"   "r,r,0,  r")
+		(match_operand:SI 2 "nonmemory_operand"  "r,L,I,C32")))
+  (set (reg:CC_V CC_REG)
+       (compare:CC_V (sign_extend:DI (plus:SI (match_dup 1)
+					      (match_dup 2)))
+		     (plus:DI (sign_extend:DI (match_dup 1))
+			      (sign_extend:DI (match_dup 2)))))]
+ ""
+ "add.f\\t%0,%1,%2"
+ [(set_attr "cond"   "set")
+  (set_attr "type"   "compare")
+  (set_attr "length" "4,4,4,8")])
+
+(define_expand "addvsi4"
+  [(match_operand:SI 0 "register_operand")
+   (match_operand:SI 1 "register_operand")
+   (match_operand:SI 2 "nonmemory_operand")
+   (label_ref (match_operand 3 "" ""))]
+  ""
+  "emit_insn (gen_addsi3_v (operands[0], operands[1], operands[2]));
+   arc_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]);
+   DONE;")
+
+(define_insn "addsi3_c"
+ [(set (match_operand:SI       0 "register_operand"  "=r,r,r,  r")
+       (plus:SI (match_operand:SI 1 "register_operand"   "r,r,0,  r")
+		(match_operand:SI 2 "nonmemory_operand"  "r,L,I,C32")))
+  (set (reg:CC_C CC_REG)
+       (compare:CC_C (plus:SI (match_dup 1)
+			      (match_dup 2))
+		     (match_dup 1)))]
+ ""
+ "add.f\\t%0,%1,%2"
+ [(set_attr "cond"   "set")
+  (set_attr "type"   "compare")
+  (set_attr "length" "4,4,4,8")])
+
+(define_expand "uaddvsi4"
+  [(match_operand:SI 0 "register_operand")
+   (match_operand:SI 1 "register_operand")
+   (match_operand:SI 2 "nonmemory_operand")
+   (label_ref (match_operand 3 "" ""))]
+  ""
+  "emit_insn (gen_addsi3_c (operands[0], operands[1], operands[2]));
+   arc_gen_unlikely_cbranch (LTU, CC_Cmode, operands[3]);
+   DONE;")
+
+
 (define_insn "add_f"
   [(set (reg:CC_C CC_REG)
 	(compare:CC_C
@@ -2914,6 +3023,54 @@ archs4x, archs4xd"
   (set_attr "cpu_facility" "*,cd,*,*,*,*,*,*,*,*")
   ])
 
+(define_insn "subsi3_v"
+  [(set (match_operand:SI	 0 "register_operand"  "=r,r,r,  r")
+	(minus:SI (match_operand:SI 1 "register_operand"   "r,r,0,  r")
+		  (match_operand:SI 2 "nonmemory_operand"  "r,L,I,C32")))
+   (set (reg:CC_V CC_REG)
+	(compare:CC_V (sign_extend:DI (minus:SI (match_dup 1)
+						(match_dup 2)))
+		      (minus:DI (sign_extend:DI (match_dup 1))
+				(sign_extend:DI (match_dup 2)))))]
+   ""
+   "sub.f\\t%0,%1,%2"
+   [(set_attr "cond"	"set")
+    (set_attr "type"	"compare")
+    (set_attr "length"	"4,4,4,8")])
+
+(define_expand "subvsi4"
+ [(match_operand:SI 0 "register_operand")
+  (match_operand:SI 1 "register_operand")
+  (match_operand:SI 2 "nonmemory_operand")
+  (label_ref (match_operand 3 "" ""))]
+  ""
+  "emit_insn (gen_subsi3_v (operands[0], operands[1], operands[2]));
+   arc_gen_unlikely_cbranch (NE, CC_Vmode, operands[3]);
+   DONE;")
+
+(define_insn "subsi3_c"
+  [(set (match_operand:SI	 0 "register_operand"	"=r,r,r,  r")
+	(minus:SI (match_operand:SI 1 "register_operand"	 "r,r,0,  r")
+		  (match_operand:SI 2 "nonmemory_operand"	 "r,L,I,C32")))
+   (set (reg:CC_C CC_REG)
+	(compare:CC_C (match_dup 1)
+		      (match_dup 2)))]
+   ""
+   "sub.f\\t%0,%1,%2"
+   [(set_attr "cond"	"set")
+    (set_attr "type"	"compare")
+    (set_attr "length"	"4,4,4,8")])
+
+(define_expand "usubvsi4"
+  [(match_operand:SI 0 "register_operand")
+   (match_operand:SI 1 "register_operand")
+   (match_operand:SI 2 "nonmemory_operand")
+   (label_ref (match_operand 3 "" ""))]
+   ""
+   "emit_insn (gen_subsi3_c (operands[0], operands[1], operands[2]));
+    arc_gen_unlikely_cbranch (LTU, CC_Cmode, operands[3]);
+    DONE;")
+
 (define_expand "subdi3"
   [(set (match_operand:DI 0 "register_operand" "")
 	(minus:DI (match_operand:DI 1 "register_operand" "")
@@ -5439,9 +5596,9 @@ archs4x, archs4xd"
 	     (match_operand:SI 2 "const_int_operand" "n"))]
   "TARGET_HS"
   {
-   if (INTVAL (operands[1]))
+    if ((INTVAL (operands[1]) & 1) != 0)
       return "prefetchw [%0]";
-   else
+    else
       return "prefetch [%0]";
   }
   [(set_attr "type" "load")
@@ -5454,9 +5611,9 @@ archs4x, archs4xd"
 	     (match_operand:SI 3 "const_int_operand" "n,n,n"))]
   "TARGET_HS"
   {
-   if (INTVAL (operands[2]))
+    if ((INTVAL (operands[2]) & 1) != 0)
       return "prefetchw\\t[%0, %1]";
-   else
+    else
       return "prefetch\\t[%0, %1]";
   }
   [(set_attr "type" "load")
@@ -5468,10 +5625,10 @@ archs4x, archs4xd"
 	     (match_operand:SI 2 "const_int_operand" "n"))]
   "TARGET_HS"
   {
-   operands[0] = gen_rtx_MEM (SImode, operands[0]);
-   if (INTVAL (operands[1]))
+    operands[0] = gen_rtx_MEM (SImode, operands[0]);
+    if ((INTVAL (operands[1]) & 1) != 0)
       return "prefetchw%U0\\t%0";
-   else
+    else
       return "prefetch%U0\\t%0";
    }
   [(set_attr "type" "load")
diff --git a/gcc/config/arc/arc.opt b/gcc/config/arc/arc.opt
index 5abb297..9bd04f1 100644
--- a/gcc/config/arc/arc.opt
+++ b/gcc/config/arc/arc.opt
@@ -1,6 +1,6 @@
 ; Options for the Synopsys DesignWare ARC port of the compiler
 ;
-; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
@@ -401,8 +401,8 @@ Pass -marclinux_prof option through to linker.
 
 ;; lra is still unproven for ARC, so allow to fall back to reload with -mno-lra.
 mlra
-Target Var(arc_lra_flag) Init(1) Save
-Use LRA instead of reload.
+Target Ignore
+Does nothing.  Preserved for backward compatibility.
 
 mlra-priority-none
 Target RejectNegative Var(arc_lra_priority_tag, ARC_LRA_PRIORITY_NONE)
diff --git a/gcc/config/arc/arc600.md b/gcc/config/arc/arc600.md
index a63232d..d6101b1 100644
--- a/gcc/config/arc/arc600.md
+++ b/gcc/config/arc/arc600.md
@@ -1,6 +1,6 @@
 ;; DFA scheduling description of the Synopsys DesignWare ARC600 cpu
 ;; for GNU C compiler
-;; Copyright (C) 2007-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2007-2025 Free Software Foundation, Inc.
 ;; Contributor: Joern Rennecke <joern.rennecke@embecosm.com>
 ;;              on behalf of Synopsys Inc.
 
diff --git a/gcc/config/arc/arc700.md b/gcc/config/arc/arc700.md
index 157e527..24befb4 100644
--- a/gcc/config/arc/arc700.md
+++ b/gcc/config/arc/arc700.md
@@ -5,7 +5,7 @@
 ;;    Ramana Radhakrishnan(ramana.radhakrishnan@codito.com)
 ;;    Factoring out and improvement of ARC700 Scheduling by
 ;;    Joern Rennecke (joern.rennecke@embecosm.com)
-;; Copyright (C) 2006-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2006-2025 Free Software Foundation, Inc.
 
 ;; This file is part of GCC.
 
diff --git a/gcc/config/arc/arcEM.md b/gcc/config/arc/arcEM.md
index 7bdee0b..26b7716 100644
--- a/gcc/config/arc/arcEM.md
+++ b/gcc/config/arc/arcEM.md
@@ -1,6 +1,6 @@
 ;; DFA scheduling description of the Synopsys DesignWare ARC EM cpu
 ;; for GNU C compiler
-;; Copyright (C) 2007-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2007-2025 Free Software Foundation, Inc.
 ;; Contributor: Claudiu Zissulescu <claudiu.zissulescu@synopsys.com>
 
 ;; This file is part of GCC.
diff --git a/gcc/config/arc/arcHS.md b/gcc/config/arc/arcHS.md
index 285593b..8b1b1fe 100644
--- a/gcc/config/arc/arcHS.md
+++ b/gcc/config/arc/arcHS.md
@@ -1,6 +1,6 @@
 ;; DFA scheduling description of the Synopsys DesignWare ARC HS cpu
 ;; for GNU C compiler
-;; Copyright (C) 2007-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2007-2025 Free Software Foundation, Inc.
 ;; Contributor: Claudiu Zissulescu <claudiu.zissulescu@synopsys.com>
 
 ;; This file is part of GCC.
diff --git a/gcc/config/arc/arcHS4x.md b/gcc/config/arc/arcHS4x.md
index 8f8bdc3..aba443e 100644
--- a/gcc/config/arc/arcHS4x.md
+++ b/gcc/config/arc/arcHS4x.md
@@ -1,6 +1,6 @@
 ;; DFA scheduling description of the Synopsys DesignWare ARC HS4x cpu
 ;; for GNU C compiler
-;; Copyright (C) 2017-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2017-2025 Free Software Foundation, Inc.
 
 ;; This file is part of GCC.
 
diff --git a/gcc/config/arc/atomic.md b/gcc/config/arc/atomic.md
index cf500cf..95b379c 100644
--- a/gcc/config/arc/atomic.md
+++ b/gcc/config/arc/atomic.md
@@ -1,5 +1,5 @@
 ;; GCC machine description for ARC atomic instructions.
-;; Copyright (C) 2015-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2015-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/arc/big.h b/gcc/config/arc/big.h
index 5357896..46e8800 100644
--- a/gcc/config/arc/big.h
+++ b/gcc/config/arc/big.h
@@ -1,6 +1,6 @@
 /* Definition of big endian ARC machine for GNU compiler.
 
-   Copyright (C) 2017-2024 Free Software Foundation, Inc.
+   Copyright (C) 2017-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/arc/builtins.def b/gcc/config/arc/builtins.def
index afed3ef..e3c5780 100644
--- a/gcc/config/arc/builtins.def
+++ b/gcc/config/arc/builtins.def
@@ -1,4 +1,4 @@
-/* Copyright (C) 2015-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2015-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/arc/constraints.md b/gcc/config/arc/constraints.md
index 9c92547..e2becdc 100644
--- a/gcc/config/arc/constraints.md
+++ b/gcc/config/arc/constraints.md
@@ -1,5 +1,5 @@
 ;; Constraint definitions for Synopsys DesignWare ARC.
-;; Copyright (C) 2007-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2007-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/arc/driver-arc.cc b/gcc/config/arc/driver-arc.cc
index b27326b..b70b21c 100644
--- a/gcc/config/arc/driver-arc.cc
+++ b/gcc/config/arc/driver-arc.cc
@@ -1,5 +1,5 @@
 /* Subroutines for the gcc driver.
-   Copyright (C) 2016-2024 Free Software Foundation, Inc.
+   Copyright (C) 2016-2025 Free Software Foundation, Inc.
    Contributed by Claudiu Zissulescu <claziss@synopsys.com>
 
    This file is part of GCC.
diff --git a/gcc/config/arc/elf.h b/gcc/config/arc/elf.h
index d598617..f60e375 100644
--- a/gcc/config/arc/elf.h
+++ b/gcc/config/arc/elf.h
@@ -1,6 +1,6 @@
 /* Target macros for arc*-elf targets.
 
-   Copyright (C) 2017-2024 Free Software Foundation, Inc.
+   Copyright (C) 2017-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/arc/fpx.md b/gcc/config/arc/fpx.md
index 654d5b0..803b07c 100644
--- a/gcc/config/arc/fpx.md
+++ b/gcc/config/arc/fpx.md
@@ -1,6 +1,6 @@
 ;; Machine description of the Synopsys DesignWare ARC cpu Floating Point
 ;; extensions for GNU C compiler
-;; Copyright (C) 2007-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2007-2025 Free Software Foundation, Inc.
 
 ;; This file is part of GCC.
 
diff --git a/gcc/config/arc/genmultilib.awk b/gcc/config/arc/genmultilib.awk
index 92d1797..d3c36f1 100644
--- a/gcc/config/arc/genmultilib.awk
+++ b/gcc/config/arc/genmultilib.awk
@@ -1,4 +1,4 @@
-# Copyright (C) 2016-2024 Free Software Foundation, Inc.
+# Copyright (C) 2016-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/arc/genoptions.awk b/gcc/config/arc/genoptions.awk
index 7c55a35..7c01d15 100644
--- a/gcc/config/arc/genoptions.awk
+++ b/gcc/config/arc/genoptions.awk
@@ -1,4 +1,4 @@
-# Copyright (C) 2016-2024 Free Software Foundation, Inc.
+# Copyright (C) 2016-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/arc/linux.h b/gcc/config/arc/linux.h
index 55918d6..26172d9 100644
--- a/gcc/config/arc/linux.h
+++ b/gcc/config/arc/linux.h
@@ -1,6 +1,6 @@
 /* Target macros for arc*-*-linux targets.
 
-   Copyright (C) 2017-2024 Free Software Foundation, Inc.
+   Copyright (C) 2017-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/arc/predicates.md b/gcc/config/arc/predicates.md
index afcb8e6..f506cee 100644
--- a/gcc/config/arc/predicates.md
+++ b/gcc/config/arc/predicates.md
@@ -1,5 +1,5 @@
 ;; Predicate definitions for Synopsys DesignWare ARC.
-;; Copyright (C) 2007-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2007-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
@@ -419,6 +419,8 @@
       return code == EQ || code == NE;
     case E_CC_Cmode:
       return code == LTU || code == GEU;
+    case E_CC_Vmode:
+      return code == EQ || code == NE;
     case E_CC_FP_GTmode:
       return code == GT || code == UNLE;
     case E_CC_FP_GEmode:
@@ -451,7 +453,12 @@
 })
 
 (define_predicate "equality_comparison_operator"
-  (match_code "eq, ne"))
+  (match_code "eq, ne")
+  {
+    machine_mode opmode = GET_MODE (XEXP (op, 0));
+    return opmode != CC_Vmode;
+  }
+)
 
 (define_predicate "ge_lt_comparison_operator"
   (match_code "ge, lt"))
diff --git a/gcc/config/arc/simdext.md b/gcc/config/arc/simdext.md
index 4e51a23..a53b2ba 100644
--- a/gcc/config/arc/simdext.md
+++ b/gcc/config/arc/simdext.md
@@ -1,5 +1,5 @@
 ;; Machine description of the Synopsys DesignWare ARC cpu for GNU C compiler
-;; Copyright (C) 2007-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2007-2025 Free Software Foundation, Inc.
 
 ;; This file is part of GCC.
 
@@ -1643,7 +1643,7 @@
 
 ;; We can use dmac as well here.  To be investigated which version
 ;; brings more.
-(define_expand "sdot_prodv2hi"
+(define_expand "sdot_prodsiv2hi"
   [(match_operand:SI 0 "register_operand" "")
    (match_operand:V2HI 1 "register_operand" "")
    (match_operand:V2HI 2 "register_operand" "")
@@ -1656,7 +1656,7 @@
  DONE;
 })
 
-(define_expand "udot_prodv2hi"
+(define_expand "udot_prodsiv2hi"
   [(match_operand:SI 0 "register_operand" "")
    (match_operand:V2HI 1 "register_operand" "")
    (match_operand:V2HI 2 "register_operand" "")
@@ -1669,7 +1669,7 @@
  DONE;
 })
 
-(define_expand "sdot_prodv4hi"
+(define_expand "sdot_prodv2siv4hi"
   [(match_operand:V2SI 0 "register_operand" "")
    (match_operand:V4HI 1 "register_operand" "")
    (match_operand:V4HI 2 "register_operand" "")
@@ -1688,7 +1688,7 @@
  DONE;
 })
 
-(define_expand "udot_prodv4hi"
+(define_expand "udot_prodv2siv4hi"
   [(match_operand:V2SI 0 "register_operand" "")
    (match_operand:V4HI 1 "register_operand" "")
    (match_operand:V4HI 2 "register_operand" "")
diff --git a/gcc/config/arc/t-arc b/gcc/config/arc/t-arc
index 7db1141..b4b3cac 100644
--- a/gcc/config/arc/t-arc
+++ b/gcc/config/arc/t-arc
@@ -1,6 +1,6 @@
 # GCC Makefile fragment for Synopsys DesignWare ARC.
 #
-# Copyright (C) 2016-2024 Free Software Foundation, Inc.
+# Copyright (C) 2016-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/arc/t-multilib b/gcc/config/arc/t-multilib
index 6a03527..43aa8f4 100644
--- a/gcc/config/arc/t-multilib
+++ b/gcc/config/arc/t-multilib
@@ -3,7 +3,7 @@
 # Generated from  : ./gcc/config/arc/arc-cpu.def
 # Used by         : tmake_file from Makefile and genmultilib
 
-# Copyright (C) 2016-2024 Free Software Foundation, Inc.
+# Copyright (C) 2016-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/arc/t-multilib-linux b/gcc/config/arc/t-multilib-linux
index 07bc77b..e726872 100644
--- a/gcc/config/arc/t-multilib-linux
+++ b/gcc/config/arc/t-multilib-linux
@@ -1,4 +1,4 @@
-# Copyright (C) 2018-2024 Free Software Foundation, Inc.
+# Copyright (C) 2018-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/arm/README-interworking b/gcc/config/arm/README-interworking
index fd40cfd..df7e342 100644
--- a/gcc/config/arm/README-interworking
+++ b/gcc/config/arm/README-interworking
@@ -742,7 +742,7 @@ used.
      interworking as the --support-old-code switch has taken care if this.
 
 
-Copyright (C) 1998-2024 Free Software Foundation, Inc.
+Copyright (C) 1998-2025 Free Software Foundation, Inc.
 
 Copying and distribution of this file, with or without modification,
 are permitted in any medium without royalty provided the copyright
diff --git a/gcc/config/arm/aarch-bti-insert.cc b/gcc/config/arm/aarch-bti-insert.cc
index 14d3697..6f81750 100644
--- a/gcc/config/arm/aarch-bti-insert.cc
+++ b/gcc/config/arm/aarch-bti-insert.cc
@@ -1,5 +1,5 @@
 /* Branch Target Identification for AArch64 architecture.
-   Copyright (C) 2019-2024 Free Software Foundation, Inc.
+   Copyright (C) 2019-2025 Free Software Foundation, Inc.
    Contributed by Arm Ltd.
 
    This file is part of GCC.
@@ -92,6 +92,22 @@ const pass_data pass_data_insert_bti =
   0, /* todo_flags_finish.  */
 };
 
+/* Decide if BTI J is needed after a call instruction.  */
+static bool
+call_needs_bti_j (rtx_insn *insn)
+{
+  /* Call returns twice, one of which may be indirect.  */
+  if (find_reg_note (insn, REG_SETJMP, NULL))
+    return true;
+
+  /* Tail call does not return.  */
+  if (SIBLING_CALL_P (insn))
+    return false;
+
+  /* Check if the function is marked to return indirectly.  */
+  return aarch_fun_is_indirect_return (insn);
+}
+
 /* Insert the BTI instruction.  */
 /* This is implemented as a late RTL pass that runs before branch
    shortening and does the following.  */
@@ -147,10 +163,9 @@ rest_of_insert_bti (void)
 		}
 	    }
 
-	  /* Also look for calls to setjmp () which would be marked with
-	     REG_SETJMP note and put a BTI J after.  This is where longjump ()
-	     will return.  */
-	  if (CALL_P (insn) && (find_reg_note (insn, REG_SETJMP, NULL)))
+	  /* Also look for calls that may return indirectly, such as setjmp,
+	     and put a BTI J after them.  */
+	  if (CALL_P (insn) && call_needs_bti_j (insn))
 	    {
 	      bti_insn = aarch_gen_bti_j ();
 	      emit_insn_after (bti_insn, insn);
diff --git a/gcc/config/arm/aarch-common-protos.h b/gcc/config/arm/aarch-common-protos.h
index 9849fcb..077387b 100644
--- a/gcc/config/arm/aarch-common-protos.h
+++ b/gcc/config/arm/aarch-common-protos.h
@@ -1,6 +1,6 @@
 /* Functions and structures shared between arm and aarch64.
 
-   Copyright (C) 1991-2024 Free Software Foundation, Inc.
+   Copyright (C) 1991-2025 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
@@ -48,6 +48,7 @@ extern bool aarch_bti_j_insn_p (rtx_insn *);
 extern bool aarch_pac_insn_p (rtx);
 extern rtx aarch_gen_bti_c (void);
 extern rtx aarch_gen_bti_j (void);
+extern bool aarch_fun_is_indirect_return (rtx_insn *);
 
 /* RTX cost table definitions.  These are used when tuning for speed rather
    than for size and should reflect the _additional_ cost over the cost
diff --git a/gcc/config/arm/aarch-common.cc b/gcc/config/arm/aarch-common.cc
index aa405af..9cd926e 100644
--- a/gcc/config/arm/aarch-common.cc
+++ b/gcc/config/arm/aarch-common.cc
@@ -1,7 +1,7 @@
 /* Dependency checks for instruction scheduling, shared between ARM and
    AARCH64.
 
-   Copyright (C) 1991-2024 Free Software Foundation, Inc.
+   Copyright (C) 1991-2025 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
@@ -655,8 +655,7 @@ arm_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
 	  emit_move_insn (dest, tmp);
 	}
     }
-  rtx_insn *seq = get_insns ();
-  end_sequence ();
+  rtx_insn *seq = end_sequence ();
 
   return saw_asm_flag ? seq : NULL;
 }
@@ -711,9 +710,9 @@ aarch_validate_mbranch_protection (
 	{
 	  res = false;
 	  if (strcmp (str, "") == 0)
-	    error ("missing feature or flag for %<%s%>", opt);
+	    error ("missing feature or flag for %qs", opt);
 	  else
-	    error ("invalid argument %<%s%> for %<%s%>", str, opt);
+	    error ("invalid argument %qs for %qs", str, opt);
 	  break;
 	}
 
@@ -724,7 +723,7 @@ aarch_validate_mbranch_protection (
       if (reject_alone && alone_str != NULL)
 	{
 	  res = false;
-	  error ("argument %<%s%> can only appear alone in %<%s%>",
+	  error ("argument %qs can only appear alone in %qs",
 		 alone_str, opt);
 	  break;
 	}
diff --git a/gcc/config/arm/aarch-common.h b/gcc/config/arm/aarch-common.h
index 6bfdbda..1722490 100644
--- a/gcc/config/arm/aarch-common.h
+++ b/gcc/config/arm/aarch-common.h
@@ -1,6 +1,6 @@
 /* Types shared between arm and aarch64.
 
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
    Contributed by Arm Ltd.
 
    This file is part of GCC.
diff --git a/gcc/config/arm/aarch-cost-tables.h b/gcc/config/arm/aarch-cost-tables.h
index 56297f8..c7a14b3 100644
--- a/gcc/config/arm/aarch-cost-tables.h
+++ b/gcc/config/arm/aarch-cost-tables.h
@@ -1,6 +1,6 @@
 /* RTX cost tables shared between arm and aarch64.
 
-   Copyright (C) 2013-2024 Free Software Foundation, Inc.
+   Copyright (C) 2013-2025 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
diff --git a/gcc/config/arm/aout.h b/gcc/config/arm/aout.h
index db9e8dd..a9b0dfa 100644
--- a/gcc/config/arm/aout.h
+++ b/gcc/config/arm/aout.h
@@ -1,7 +1,7 @@
 /* Definitions of target machine for GNU compiler, for ARM with a.out
-   Copyright (C) 1995-2024 Free Software Foundation, Inc.
+   Copyright (C) 1995-2025 Free Software Foundation, Inc.
    Contributed by Richard Earnshaw (rearnsha@armltd.co.uk).
-   
+
    This file is part of GCC.
 
    GCC is free software; you can redistribute it and/or modify it
@@ -69,11 +69,6 @@
   "d20", "?20", "d21", "?21", "d22", "?22", "d23", "?23",	\
   "d24", "?24", "d25", "?25", "d26", "?26", "d27", "?27",	\
   "d28", "?28", "d29", "?29", "d30", "?30", "d31", "?31",	\
-  "wr0",   "wr1",   "wr2",   "wr3",				\
-  "wr4",   "wr5",   "wr6",   "wr7",				\
-  "wr8",   "wr9",   "wr10",  "wr11",				\
-  "wr12",  "wr13",  "wr14",  "wr15",				\
-  "wcgr0", "wcgr1", "wcgr2", "wcgr3",				\
   "cc", "vfpcc", "sfp", "afp", "apsrq", "apsrge", "p0",		\
   "ra_auth_code"						\
 }
@@ -165,7 +160,7 @@
 #define ASM_GENERATE_INTERNAL_LABEL(STRING, PREFIX, NUM)  \
   sprintf (STRING, "*%s%s%u", LOCAL_LABEL_PREFIX, PREFIX, (unsigned int)(NUM))
 #endif
-     
+
 /* Output an element of a dispatch table.  */
 #define ASM_OUTPUT_ADDR_VEC_ELT(STREAM, VALUE)			\
   do								\
@@ -174,7 +169,7 @@
       asm_fprintf (STREAM, "\t.word\t%LL%d\n", VALUE);		\
     }								\
   while (0)
-	  
+
 
 /* Thumb-2 always uses addr_diff_elf so that the Table Branch instructions
    can be used.  For non-pic code where the offsets do not suitable for
@@ -266,7 +261,7 @@
   fprintf (STREAM, "\t.space\t%d\n", (int) (NBYTES))
 
 /* Align output to a power of two.  Horrible /bin/as.  */
-#ifndef ASM_OUTPUT_ALIGN  
+#ifndef ASM_OUTPUT_ALIGN
 #define ASM_OUTPUT_ALIGN(STREAM, POWER)			\
   do							\
     {							\
@@ -292,7 +287,7 @@
     }							\
   while (0)
 #endif
-     
+
 /* Output a local common block.  /bin/as can't do this, so hack a
    `.space' into the bss segment.  Note that this is *bad* practice,
    which is guaranteed NOT to work since it doesn't define STATIC
@@ -308,7 +303,7 @@
     }									\
   while (0)
 #endif
-     
+
 /* Output a zero-initialized block.  */
 #ifndef ASM_OUTPUT_ALIGNED_BSS
 #define ASM_OUTPUT_ALIGNED_BSS(STREAM, DECL, NAME, SIZE, ALIGN) \
diff --git a/gcc/config/arm/arm-builtins.cc b/gcc/config/arm/arm-builtins.cc
index c9d50bf..3bb2566 100644
--- a/gcc/config/arm/arm-builtins.cc
+++ b/gcc/config/arm/arm-builtins.cc
@@ -1,5 +1,5 @@
 /* Description of builtins used by the ARM backend.
-   Copyright (C) 2014-2024 Free Software Foundation, Inc.
+   Copyright (C) 2014-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -45,6 +45,9 @@
 #include "arm-builtins.h"
 #include "stringpool.h"
 #include "attribs.h"
+#include "basic-block.h"
+#include "gimple.h"
+#include "ssa.h"
 
 #define SIMD_MAX_BUILTIN_ARGS 7
 
@@ -477,19 +480,6 @@ arm_ternop_unone_unone_none_none_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   (arm_ternop_unone_unone_none_none_qualifiers)
 
 static enum arm_type_qualifiers
-arm_ternop_unone_none_unone_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_unsigned, qualifier_none, qualifier_unsigned,
-      qualifier_immediate };
-#define TERNOP_UNONE_NONE_UNONE_IMM_QUALIFIERS \
-  (arm_ternop_unone_none_unone_imm_qualifiers)
-
-static enum arm_type_qualifiers
-arm_ternop_none_none_unone_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_none, qualifier_none, qualifier_unsigned, qualifier_immediate };
-#define TERNOP_NONE_NONE_UNONE_IMM_QUALIFIERS \
-  (arm_ternop_none_none_unone_imm_qualifiers)
-
-static enum arm_type_qualifiers
 arm_ternop_unone_unone_none_imm_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   = { qualifier_unsigned, qualifier_unsigned, qualifier_none,
     qualifier_immediate };
@@ -624,200 +614,6 @@ arm_quadop_unone_unone_unone_none_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   (arm_quadop_unone_unone_unone_none_pred_qualifiers)
 
 static enum arm_type_qualifiers
-arm_strs_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_void, qualifier_pointer, qualifier_none };
-#define STRS_QUALIFIERS (arm_strs_qualifiers)
-
-static enum arm_type_qualifiers
-arm_stru_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_void, qualifier_pointer, qualifier_unsigned };
-#define STRU_QUALIFIERS (arm_stru_qualifiers)
-
-static enum arm_type_qualifiers
-arm_strss_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_void, qualifier_pointer, qualifier_unsigned,
-      qualifier_none};
-#define STRSS_QUALIFIERS (arm_strss_qualifiers)
-
-static enum arm_type_qualifiers
-arm_strsu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_void, qualifier_pointer, qualifier_unsigned,
-      qualifier_unsigned};
-#define STRSU_QUALIFIERS (arm_strsu_qualifiers)
-
-static enum arm_type_qualifiers
-arm_strsbs_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_void, qualifier_unsigned, qualifier_immediate, qualifier_none};
-#define STRSBS_QUALIFIERS (arm_strsbs_qualifiers)
-
-static enum arm_type_qualifiers
-arm_strsbu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_void, qualifier_unsigned, qualifier_immediate,
-      qualifier_unsigned};
-#define STRSBU_QUALIFIERS (arm_strsbu_qualifiers)
-
-static enum arm_type_qualifiers
-arm_strs_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_void, qualifier_pointer, qualifier_none, qualifier_predicate};
-#define STRS_P_QUALIFIERS (arm_strs_p_qualifiers)
-
-static enum arm_type_qualifiers
-arm_stru_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_void, qualifier_pointer, qualifier_unsigned,
-      qualifier_predicate};
-#define STRU_P_QUALIFIERS (arm_stru_p_qualifiers)
-
-static enum arm_type_qualifiers
-arm_strsu_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_void, qualifier_pointer, qualifier_unsigned,
-      qualifier_unsigned, qualifier_predicate};
-#define STRSU_P_QUALIFIERS (arm_strsu_p_qualifiers)
-
-static enum arm_type_qualifiers
-arm_strss_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_void, qualifier_pointer, qualifier_unsigned,
-      qualifier_none, qualifier_predicate};
-#define STRSS_P_QUALIFIERS (arm_strss_p_qualifiers)
-
-static enum arm_type_qualifiers
-arm_strsbs_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_void, qualifier_unsigned, qualifier_immediate,
-      qualifier_none, qualifier_predicate};
-#define STRSBS_P_QUALIFIERS (arm_strsbs_p_qualifiers)
-
-static enum arm_type_qualifiers
-arm_strsbu_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_void, qualifier_unsigned, qualifier_immediate,
-      qualifier_unsigned, qualifier_predicate};
-#define STRSBU_P_QUALIFIERS (arm_strsbu_p_qualifiers)
-
-static enum arm_type_qualifiers
-arm_ldrgu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_unsigned, qualifier_pointer, qualifier_unsigned};
-#define LDRGU_QUALIFIERS (arm_ldrgu_qualifiers)
-
-static enum arm_type_qualifiers
-arm_ldrgs_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_none, qualifier_pointer, qualifier_unsigned};
-#define LDRGS_QUALIFIERS (arm_ldrgs_qualifiers)
-
-static enum arm_type_qualifiers
-arm_ldrs_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_none, qualifier_pointer};
-#define LDRS_QUALIFIERS (arm_ldrs_qualifiers)
-
-static enum arm_type_qualifiers
-arm_ldru_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_unsigned, qualifier_pointer};
-#define LDRU_QUALIFIERS (arm_ldru_qualifiers)
-
-static enum arm_type_qualifiers
-arm_ldrgbs_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_none, qualifier_unsigned, qualifier_immediate};
-#define LDRGBS_QUALIFIERS (arm_ldrgbs_qualifiers)
-
-static enum arm_type_qualifiers
-arm_ldrgbu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate};
-#define LDRGBU_QUALIFIERS (arm_ldrgbu_qualifiers)
-
-static enum arm_type_qualifiers
-arm_ldrgbs_z_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_none, qualifier_unsigned, qualifier_immediate,
-      qualifier_predicate};
-#define LDRGBS_Z_QUALIFIERS (arm_ldrgbs_z_qualifiers)
-
-static enum arm_type_qualifiers
-arm_ldrgbu_z_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate,
-      qualifier_predicate};
-#define LDRGBU_Z_QUALIFIERS (arm_ldrgbu_z_qualifiers)
-
-static enum arm_type_qualifiers
-arm_ldrgs_z_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_none, qualifier_pointer, qualifier_unsigned,
-      qualifier_predicate};
-#define LDRGS_Z_QUALIFIERS (arm_ldrgs_z_qualifiers)
-
-static enum arm_type_qualifiers
-arm_ldrgu_z_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_unsigned, qualifier_pointer, qualifier_unsigned,
-      qualifier_predicate};
-#define LDRGU_Z_QUALIFIERS (arm_ldrgu_z_qualifiers)
-
-static enum arm_type_qualifiers
-arm_ldrs_z_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_none, qualifier_pointer, qualifier_predicate};
-#define LDRS_Z_QUALIFIERS (arm_ldrs_z_qualifiers)
-
-static enum arm_type_qualifiers
-arm_ldru_z_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_unsigned, qualifier_pointer, qualifier_predicate};
-#define LDRU_Z_QUALIFIERS (arm_ldru_z_qualifiers)
-
-static enum arm_type_qualifiers
-arm_quinop_unone_unone_unone_unone_imm_pred_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_unsigned, qualifier_unsigned, qualifier_unsigned,
-      qualifier_unsigned, qualifier_immediate, qualifier_predicate };
-#define QUINOP_UNONE_UNONE_UNONE_UNONE_IMM_PRED_QUALIFIERS \
-  (arm_quinop_unone_unone_unone_unone_imm_pred_qualifiers)
-
-static enum arm_type_qualifiers
-arm_ldrgbwbxu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate};
-#define LDRGBWBXU_QUALIFIERS (arm_ldrgbwbxu_qualifiers)
-
-static enum arm_type_qualifiers
-arm_ldrgbwbxu_z_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate,
-      qualifier_predicate};
-#define LDRGBWBXU_Z_QUALIFIERS (arm_ldrgbwbxu_z_qualifiers)
-
-static enum arm_type_qualifiers
-arm_ldrgbwbs_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_none, qualifier_unsigned, qualifier_immediate};
-#define LDRGBWBS_QUALIFIERS (arm_ldrgbwbs_qualifiers)
-
-static enum arm_type_qualifiers
-arm_ldrgbwbu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate};
-#define LDRGBWBU_QUALIFIERS (arm_ldrgbwbu_qualifiers)
-
-static enum arm_type_qualifiers
-arm_ldrgbwbs_z_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_none, qualifier_unsigned, qualifier_immediate,
-      qualifier_predicate};
-#define LDRGBWBS_Z_QUALIFIERS (arm_ldrgbwbs_z_qualifiers)
-
-static enum arm_type_qualifiers
-arm_ldrgbwbu_z_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_unsigned, qualifier_unsigned, qualifier_immediate,
-      qualifier_predicate};
-#define LDRGBWBU_Z_QUALIFIERS (arm_ldrgbwbu_z_qualifiers)
-
-static enum arm_type_qualifiers
-arm_strsbwbs_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_unsigned, qualifier_unsigned, qualifier_const, qualifier_none};
-#define STRSBWBS_QUALIFIERS (arm_strsbwbs_qualifiers)
-
-static enum arm_type_qualifiers
-arm_strsbwbu_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_unsigned, qualifier_unsigned, qualifier_const, qualifier_unsigned};
-#define STRSBWBU_QUALIFIERS (arm_strsbwbu_qualifiers)
-
-static enum arm_type_qualifiers
-arm_strsbwbs_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_unsigned, qualifier_unsigned, qualifier_const,
-      qualifier_none, qualifier_predicate};
-#define STRSBWBS_P_QUALIFIERS (arm_strsbwbs_p_qualifiers)
-
-static enum arm_type_qualifiers
-arm_strsbwbu_p_qualifiers[SIMD_MAX_BUILTIN_ARGS]
-  = { qualifier_unsigned, qualifier_unsigned, qualifier_const,
-      qualifier_unsigned, qualifier_predicate};
-#define STRSBWBU_P_QUALIFIERS (arm_strsbwbu_p_qualifiers)
-
-static enum arm_type_qualifiers
 arm_lsll_qualifiers[SIMD_MAX_BUILTIN_ARGS]
   = { qualifier_unsigned, qualifier_unsigned, qualifier_none};
 #define LSLL_QUALIFIERS (arm_lsll_qualifiers)
@@ -908,6 +704,13 @@ typedef struct {
   enum arm_type_qualifiers *qualifiers;
 } arm_builtin_datum;
 
+constexpr insn_code CODE_FOR_neon_sdotv8qi = CODE_FOR_neon_sdotv2siv8qi;
+constexpr insn_code CODE_FOR_neon_udotv8qi = CODE_FOR_neon_udotv2siv8qi;
+constexpr insn_code CODE_FOR_neon_usdotv8qi = CODE_FOR_neon_usdotv2siv8qi;
+constexpr insn_code CODE_FOR_neon_sdotv16qi = CODE_FOR_neon_sdotv4siv16qi;
+constexpr insn_code CODE_FOR_neon_udotv16qi = CODE_FOR_neon_udotv4siv16qi;
+constexpr insn_code CODE_FOR_neon_usdotv16qi = CODE_FOR_neon_usdotv4siv16qi;
+
 #define CF(N,X) CODE_FOR_neon_##N##X
 
 #define VAR1(T, N, A) \
@@ -1013,252 +816,6 @@ static arm_builtin_cde_datum cde_builtin_data[] =
 
 enum arm_builtins
 {
-  ARM_BUILTIN_GETWCGR0,
-  ARM_BUILTIN_GETWCGR1,
-  ARM_BUILTIN_GETWCGR2,
-  ARM_BUILTIN_GETWCGR3,
-
-  ARM_BUILTIN_SETWCGR0,
-  ARM_BUILTIN_SETWCGR1,
-  ARM_BUILTIN_SETWCGR2,
-  ARM_BUILTIN_SETWCGR3,
-
-  ARM_BUILTIN_WZERO,
-
-  ARM_BUILTIN_WAVG2BR,
-  ARM_BUILTIN_WAVG2HR,
-  ARM_BUILTIN_WAVG2B,
-  ARM_BUILTIN_WAVG2H,
-
-  ARM_BUILTIN_WACCB,
-  ARM_BUILTIN_WACCH,
-  ARM_BUILTIN_WACCW,
-
-  ARM_BUILTIN_WMACS,
-  ARM_BUILTIN_WMACSZ,
-  ARM_BUILTIN_WMACU,
-  ARM_BUILTIN_WMACUZ,
-
-  ARM_BUILTIN_WSADB,
-  ARM_BUILTIN_WSADBZ,
-  ARM_BUILTIN_WSADH,
-  ARM_BUILTIN_WSADHZ,
-
-  ARM_BUILTIN_WALIGNI,
-  ARM_BUILTIN_WALIGNR0,
-  ARM_BUILTIN_WALIGNR1,
-  ARM_BUILTIN_WALIGNR2,
-  ARM_BUILTIN_WALIGNR3,
-
-  ARM_BUILTIN_TMIA,
-  ARM_BUILTIN_TMIAPH,
-  ARM_BUILTIN_TMIABB,
-  ARM_BUILTIN_TMIABT,
-  ARM_BUILTIN_TMIATB,
-  ARM_BUILTIN_TMIATT,
-
-  ARM_BUILTIN_TMOVMSKB,
-  ARM_BUILTIN_TMOVMSKH,
-  ARM_BUILTIN_TMOVMSKW,
-
-  ARM_BUILTIN_TBCSTB,
-  ARM_BUILTIN_TBCSTH,
-  ARM_BUILTIN_TBCSTW,
-
-  ARM_BUILTIN_WMADDS,
-  ARM_BUILTIN_WMADDU,
-
-  ARM_BUILTIN_WPACKHSS,
-  ARM_BUILTIN_WPACKWSS,
-  ARM_BUILTIN_WPACKDSS,
-  ARM_BUILTIN_WPACKHUS,
-  ARM_BUILTIN_WPACKWUS,
-  ARM_BUILTIN_WPACKDUS,
-
-  ARM_BUILTIN_WADDB,
-  ARM_BUILTIN_WADDH,
-  ARM_BUILTIN_WADDW,
-  ARM_BUILTIN_WADDSSB,
-  ARM_BUILTIN_WADDSSH,
-  ARM_BUILTIN_WADDSSW,
-  ARM_BUILTIN_WADDUSB,
-  ARM_BUILTIN_WADDUSH,
-  ARM_BUILTIN_WADDUSW,
-  ARM_BUILTIN_WSUBB,
-  ARM_BUILTIN_WSUBH,
-  ARM_BUILTIN_WSUBW,
-  ARM_BUILTIN_WSUBSSB,
-  ARM_BUILTIN_WSUBSSH,
-  ARM_BUILTIN_WSUBSSW,
-  ARM_BUILTIN_WSUBUSB,
-  ARM_BUILTIN_WSUBUSH,
-  ARM_BUILTIN_WSUBUSW,
-
-  ARM_BUILTIN_WAND,
-  ARM_BUILTIN_WANDN,
-  ARM_BUILTIN_WOR,
-  ARM_BUILTIN_WXOR,
-
-  ARM_BUILTIN_WCMPEQB,
-  ARM_BUILTIN_WCMPEQH,
-  ARM_BUILTIN_WCMPEQW,
-  ARM_BUILTIN_WCMPGTUB,
-  ARM_BUILTIN_WCMPGTUH,
-  ARM_BUILTIN_WCMPGTUW,
-  ARM_BUILTIN_WCMPGTSB,
-  ARM_BUILTIN_WCMPGTSH,
-  ARM_BUILTIN_WCMPGTSW,
-
-  ARM_BUILTIN_TEXTRMSB,
-  ARM_BUILTIN_TEXTRMSH,
-  ARM_BUILTIN_TEXTRMSW,
-  ARM_BUILTIN_TEXTRMUB,
-  ARM_BUILTIN_TEXTRMUH,
-  ARM_BUILTIN_TEXTRMUW,
-  ARM_BUILTIN_TINSRB,
-  ARM_BUILTIN_TINSRH,
-  ARM_BUILTIN_TINSRW,
-
-  ARM_BUILTIN_WMAXSW,
-  ARM_BUILTIN_WMAXSH,
-  ARM_BUILTIN_WMAXSB,
-  ARM_BUILTIN_WMAXUW,
-  ARM_BUILTIN_WMAXUH,
-  ARM_BUILTIN_WMAXUB,
-  ARM_BUILTIN_WMINSW,
-  ARM_BUILTIN_WMINSH,
-  ARM_BUILTIN_WMINSB,
-  ARM_BUILTIN_WMINUW,
-  ARM_BUILTIN_WMINUH,
-  ARM_BUILTIN_WMINUB,
-
-  ARM_BUILTIN_WMULUM,
-  ARM_BUILTIN_WMULSM,
-  ARM_BUILTIN_WMULUL,
-
-  ARM_BUILTIN_PSADBH,
-  ARM_BUILTIN_WSHUFH,
-
-  ARM_BUILTIN_WSLLH,
-  ARM_BUILTIN_WSLLW,
-  ARM_BUILTIN_WSLLD,
-  ARM_BUILTIN_WSRAH,
-  ARM_BUILTIN_WSRAW,
-  ARM_BUILTIN_WSRAD,
-  ARM_BUILTIN_WSRLH,
-  ARM_BUILTIN_WSRLW,
-  ARM_BUILTIN_WSRLD,
-  ARM_BUILTIN_WRORH,
-  ARM_BUILTIN_WRORW,
-  ARM_BUILTIN_WRORD,
-  ARM_BUILTIN_WSLLHI,
-  ARM_BUILTIN_WSLLWI,
-  ARM_BUILTIN_WSLLDI,
-  ARM_BUILTIN_WSRAHI,
-  ARM_BUILTIN_WSRAWI,
-  ARM_BUILTIN_WSRADI,
-  ARM_BUILTIN_WSRLHI,
-  ARM_BUILTIN_WSRLWI,
-  ARM_BUILTIN_WSRLDI,
-  ARM_BUILTIN_WRORHI,
-  ARM_BUILTIN_WRORWI,
-  ARM_BUILTIN_WRORDI,
-
-  ARM_BUILTIN_WUNPCKIHB,
-  ARM_BUILTIN_WUNPCKIHH,
-  ARM_BUILTIN_WUNPCKIHW,
-  ARM_BUILTIN_WUNPCKILB,
-  ARM_BUILTIN_WUNPCKILH,
-  ARM_BUILTIN_WUNPCKILW,
-
-  ARM_BUILTIN_WUNPCKEHSB,
-  ARM_BUILTIN_WUNPCKEHSH,
-  ARM_BUILTIN_WUNPCKEHSW,
-  ARM_BUILTIN_WUNPCKEHUB,
-  ARM_BUILTIN_WUNPCKEHUH,
-  ARM_BUILTIN_WUNPCKEHUW,
-  ARM_BUILTIN_WUNPCKELSB,
-  ARM_BUILTIN_WUNPCKELSH,
-  ARM_BUILTIN_WUNPCKELSW,
-  ARM_BUILTIN_WUNPCKELUB,
-  ARM_BUILTIN_WUNPCKELUH,
-  ARM_BUILTIN_WUNPCKELUW,
-
-  ARM_BUILTIN_WABSB,
-  ARM_BUILTIN_WABSH,
-  ARM_BUILTIN_WABSW,
-
-  ARM_BUILTIN_WADDSUBHX,
-  ARM_BUILTIN_WSUBADDHX,
-
-  ARM_BUILTIN_WABSDIFFB,
-  ARM_BUILTIN_WABSDIFFH,
-  ARM_BUILTIN_WABSDIFFW,
-
-  ARM_BUILTIN_WADDCH,
-  ARM_BUILTIN_WADDCW,
-
-  ARM_BUILTIN_WAVG4,
-  ARM_BUILTIN_WAVG4R,
-
-  ARM_BUILTIN_WMADDSX,
-  ARM_BUILTIN_WMADDUX,
-
-  ARM_BUILTIN_WMADDSN,
-  ARM_BUILTIN_WMADDUN,
-
-  ARM_BUILTIN_WMULWSM,
-  ARM_BUILTIN_WMULWUM,
-
-  ARM_BUILTIN_WMULWSMR,
-  ARM_BUILTIN_WMULWUMR,
-
-  ARM_BUILTIN_WMULWL,
-
-  ARM_BUILTIN_WMULSMR,
-  ARM_BUILTIN_WMULUMR,
-
-  ARM_BUILTIN_WQMULM,
-  ARM_BUILTIN_WQMULMR,
-
-  ARM_BUILTIN_WQMULWM,
-  ARM_BUILTIN_WQMULWMR,
-
-  ARM_BUILTIN_WADDBHUSM,
-  ARM_BUILTIN_WADDBHUSL,
-
-  ARM_BUILTIN_WQMIABB,
-  ARM_BUILTIN_WQMIABT,
-  ARM_BUILTIN_WQMIATB,
-  ARM_BUILTIN_WQMIATT,
-
-  ARM_BUILTIN_WQMIABBN,
-  ARM_BUILTIN_WQMIABTN,
-  ARM_BUILTIN_WQMIATBN,
-  ARM_BUILTIN_WQMIATTN,
-
-  ARM_BUILTIN_WMIABB,
-  ARM_BUILTIN_WMIABT,
-  ARM_BUILTIN_WMIATB,
-  ARM_BUILTIN_WMIATT,
-
-  ARM_BUILTIN_WMIABBN,
-  ARM_BUILTIN_WMIABTN,
-  ARM_BUILTIN_WMIATBN,
-  ARM_BUILTIN_WMIATTN,
-
-  ARM_BUILTIN_WMIAWBB,
-  ARM_BUILTIN_WMIAWBT,
-  ARM_BUILTIN_WMIAWTB,
-  ARM_BUILTIN_WMIAWTT,
-
-  ARM_BUILTIN_WMIAWBBN,
-  ARM_BUILTIN_WMIAWBTN,
-  ARM_BUILTIN_WMIAWTBN,
-  ARM_BUILTIN_WMIAWTTN,
-
-  ARM_BUILTIN_WMERGE,
-
   ARM_BUILTIN_GET_FPSCR,
   ARM_BUILTIN_SET_FPSCR,
   ARM_BUILTIN_GET_FPSCR_NZCVQC,
@@ -2075,115 +1632,6 @@ struct builtin_description
 
 static const struct builtin_description bdesc_2arg[] =
 {
-#define IWMMXT_BUILTIN(code, string, builtin) \
-  { isa_bit_iwmmxt, CODE_FOR_##code, \
-    "__builtin_arm_" string,			     \
-    ARM_BUILTIN_##builtin, UNKNOWN, 0 },
-
-#define IWMMXT2_BUILTIN(code, string, builtin) \
-  { isa_bit_iwmmxt2, CODE_FOR_##code, \
-    "__builtin_arm_" string,			      \
-    ARM_BUILTIN_##builtin, UNKNOWN, 0 },
-
-  IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
-  IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
-  IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
-  IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
-  IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
-  IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
-  IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
-  IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
-  IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
-  IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
-  IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
-  IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
-  IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
-  IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
-  IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
-  IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
-  IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
-  IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
-  IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
-  IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
-  IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
-  IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
-  IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
-  IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
-  IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
-  IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
-  IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
-  IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
-  IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
-  IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
-  IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
-  IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
-  IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
-  IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
-  IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
-  IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
-  IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
-  IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
-  IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
-  IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
-  IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
-  IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
-  IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
-  IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
-  IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
-  IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
-  IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
-  IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
-  IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
-  IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
-  IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
-  IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
-  IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
-  IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
-  IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
-  IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
-  IWMMXT2_BUILTIN (iwmmxt_waddsubhx, "waddsubhx", WADDSUBHX)
-  IWMMXT2_BUILTIN (iwmmxt_wsubaddhx, "wsubaddhx", WSUBADDHX)
-  IWMMXT2_BUILTIN (iwmmxt_wabsdiffb, "wabsdiffb", WABSDIFFB)
-  IWMMXT2_BUILTIN (iwmmxt_wabsdiffh, "wabsdiffh", WABSDIFFH)
-  IWMMXT2_BUILTIN (iwmmxt_wabsdiffw, "wabsdiffw", WABSDIFFW)
-  IWMMXT2_BUILTIN (iwmmxt_avg4, "wavg4", WAVG4)
-  IWMMXT2_BUILTIN (iwmmxt_avg4r, "wavg4r", WAVG4R)
-  IWMMXT2_BUILTIN (iwmmxt_wmulwsm, "wmulwsm", WMULWSM)
-  IWMMXT2_BUILTIN (iwmmxt_wmulwum, "wmulwum", WMULWUM)
-  IWMMXT2_BUILTIN (iwmmxt_wmulwsmr, "wmulwsmr", WMULWSMR)
-  IWMMXT2_BUILTIN (iwmmxt_wmulwumr, "wmulwumr", WMULWUMR)
-  IWMMXT2_BUILTIN (iwmmxt_wmulwl, "wmulwl", WMULWL)
-  IWMMXT2_BUILTIN (iwmmxt_wmulsmr, "wmulsmr", WMULSMR)
-  IWMMXT2_BUILTIN (iwmmxt_wmulumr, "wmulumr", WMULUMR)
-  IWMMXT2_BUILTIN (iwmmxt_wqmulm, "wqmulm", WQMULM)
-  IWMMXT2_BUILTIN (iwmmxt_wqmulmr, "wqmulmr", WQMULMR)
-  IWMMXT2_BUILTIN (iwmmxt_wqmulwm, "wqmulwm", WQMULWM)
-  IWMMXT2_BUILTIN (iwmmxt_wqmulwmr, "wqmulwmr", WQMULWMR)
-  IWMMXT_BUILTIN (iwmmxt_walignr0, "walignr0", WALIGNR0)
-  IWMMXT_BUILTIN (iwmmxt_walignr1, "walignr1", WALIGNR1)
-  IWMMXT_BUILTIN (iwmmxt_walignr2, "walignr2", WALIGNR2)
-  IWMMXT_BUILTIN (iwmmxt_walignr3, "walignr3", WALIGNR3)
-
-#define IWMMXT_BUILTIN2(code, builtin) \
-  { isa_bit_iwmmxt, CODE_FOR_##code, NULL, \
-    ARM_BUILTIN_##builtin, UNKNOWN, 0 },
-
-#define IWMMXT2_BUILTIN2(code, builtin) \
-  { isa_bit_iwmmxt2, CODE_FOR_##code, NULL, \
-    ARM_BUILTIN_##builtin, UNKNOWN, 0 },
-
-  IWMMXT2_BUILTIN2 (iwmmxt_waddbhusm, WADDBHUSM)
-  IWMMXT2_BUILTIN2 (iwmmxt_waddbhusl, WADDBHUSL)
-  IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
-  IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
-  IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
-  IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
-  IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
-  IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
-  IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
-  IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
-
-
 #define FP_BUILTIN(L, U) \
   {isa_nobit, CODE_FOR_##L, "__builtin_arm_"#L, ARM_BUILTIN_##U, \
    UNKNOWN, 0},
@@ -2210,31 +1658,6 @@ static const struct builtin_description bdesc_2arg[] =
 
 static const struct builtin_description bdesc_1arg[] =
 {
-  IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
-  IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
-  IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
-  IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
-  IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
-  IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
-  IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
-  IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
-  IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
-  IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
-  IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
-  IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
-  IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
-  IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
-  IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
-  IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
-  IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
-  IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
-  IWMMXT2_BUILTIN (iwmmxt_wabsv8qi3, "wabsb", WABSB)
-  IWMMXT2_BUILTIN (iwmmxt_wabsv4hi3, "wabsh", WABSH)
-  IWMMXT2_BUILTIN (iwmmxt_wabsv2si3, "wabsw", WABSW)
-  IWMMXT_BUILTIN (tbcstv8qi, "tbcstb", TBCSTB)
-  IWMMXT_BUILTIN (tbcstv4hi, "tbcsth", TBCSTH)
-  IWMMXT_BUILTIN (tbcstv2si, "tbcstw", TBCSTW)
-
 #define CRYPTO1(L, U, R, A) CRYPTO_BUILTIN (L, U)
 #define CRYPTO2(L, U, R, A1, A2)
 #define CRYPTO3(L, U, R, A1, A2, A3)
@@ -2256,387 +1679,6 @@ static const struct builtin_description bdesc_3arg[] =
  };
 #undef CRYPTO_BUILTIN
 
-/* Set up all the iWMMXt builtins.  This is not called if
-   TARGET_IWMMXT is zero.  */
-
-static void
-arm_init_iwmmxt_builtins (void)
-{
-  const struct builtin_description * d;
-  size_t i;
-
-  tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
-  tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
-  tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
-
-  tree v8qi_ftype_v8qi_v8qi_int
-    = build_function_type_list (V8QI_type_node,
-				V8QI_type_node, V8QI_type_node,
-				integer_type_node, NULL_TREE);
-  tree v4hi_ftype_v4hi_int
-    = build_function_type_list (V4HI_type_node,
-				V4HI_type_node, integer_type_node, NULL_TREE);
-  tree v2si_ftype_v2si_int
-    = build_function_type_list (V2SI_type_node,
-				V2SI_type_node, integer_type_node, NULL_TREE);
-  tree v2si_ftype_di_di
-    = build_function_type_list (V2SI_type_node,
-				long_long_integer_type_node,
-				long_long_integer_type_node,
-				NULL_TREE);
-  tree di_ftype_di_int
-    = build_function_type_list (long_long_integer_type_node,
-				long_long_integer_type_node,
-				integer_type_node, NULL_TREE);
-  tree di_ftype_di_int_int
-    = build_function_type_list (long_long_integer_type_node,
-				long_long_integer_type_node,
-				integer_type_node,
-				integer_type_node, NULL_TREE);
-  tree int_ftype_v8qi
-    = build_function_type_list (integer_type_node,
-				V8QI_type_node, NULL_TREE);
-  tree int_ftype_v4hi
-    = build_function_type_list (integer_type_node,
-				V4HI_type_node, NULL_TREE);
-  tree int_ftype_v2si
-    = build_function_type_list (integer_type_node,
-				V2SI_type_node, NULL_TREE);
-  tree int_ftype_v8qi_int
-    = build_function_type_list (integer_type_node,
-				V8QI_type_node, integer_type_node, NULL_TREE);
-  tree int_ftype_v4hi_int
-    = build_function_type_list (integer_type_node,
-				V4HI_type_node, integer_type_node, NULL_TREE);
-  tree int_ftype_v2si_int
-    = build_function_type_list (integer_type_node,
-				V2SI_type_node, integer_type_node, NULL_TREE);
-  tree v8qi_ftype_v8qi_int_int
-    = build_function_type_list (V8QI_type_node,
-				V8QI_type_node, integer_type_node,
-				integer_type_node, NULL_TREE);
-  tree v4hi_ftype_v4hi_int_int
-    = build_function_type_list (V4HI_type_node,
-				V4HI_type_node, integer_type_node,
-				integer_type_node, NULL_TREE);
-  tree v2si_ftype_v2si_int_int
-    = build_function_type_list (V2SI_type_node,
-				V2SI_type_node, integer_type_node,
-				integer_type_node, NULL_TREE);
-  /* Miscellaneous.  */
-  tree v8qi_ftype_v4hi_v4hi
-    = build_function_type_list (V8QI_type_node,
-				V4HI_type_node, V4HI_type_node, NULL_TREE);
-  tree v4hi_ftype_v2si_v2si
-    = build_function_type_list (V4HI_type_node,
-				V2SI_type_node, V2SI_type_node, NULL_TREE);
-  tree v8qi_ftype_v4hi_v8qi
-    = build_function_type_list (V8QI_type_node,
-	                        V4HI_type_node, V8QI_type_node, NULL_TREE);
-  tree v2si_ftype_v4hi_v4hi
-    = build_function_type_list (V2SI_type_node,
-				V4HI_type_node, V4HI_type_node, NULL_TREE);
-  tree v2si_ftype_v8qi_v8qi
-    = build_function_type_list (V2SI_type_node,
-				V8QI_type_node, V8QI_type_node, NULL_TREE);
-  tree v4hi_ftype_v4hi_di
-    = build_function_type_list (V4HI_type_node,
-				V4HI_type_node, long_long_integer_type_node,
-				NULL_TREE);
-  tree v2si_ftype_v2si_di
-    = build_function_type_list (V2SI_type_node,
-				V2SI_type_node, long_long_integer_type_node,
-				NULL_TREE);
-  tree di_ftype_void
-    = build_function_type_list (long_long_unsigned_type_node, NULL_TREE);
-  tree int_ftype_void
-    = build_function_type_list (integer_type_node, NULL_TREE);
-  tree di_ftype_v8qi
-    = build_function_type_list (long_long_integer_type_node,
-				V8QI_type_node, NULL_TREE);
-  tree di_ftype_v4hi
-    = build_function_type_list (long_long_integer_type_node,
-				V4HI_type_node, NULL_TREE);
-  tree di_ftype_v2si
-    = build_function_type_list (long_long_integer_type_node,
-				V2SI_type_node, NULL_TREE);
-  tree v2si_ftype_v4hi
-    = build_function_type_list (V2SI_type_node,
-				V4HI_type_node, NULL_TREE);
-  tree v4hi_ftype_v8qi
-    = build_function_type_list (V4HI_type_node,
-				V8QI_type_node, NULL_TREE);
-  tree v8qi_ftype_v8qi
-    = build_function_type_list (V8QI_type_node,
-	                        V8QI_type_node, NULL_TREE);
-  tree v4hi_ftype_v4hi
-    = build_function_type_list (V4HI_type_node,
-	                        V4HI_type_node, NULL_TREE);
-  tree v2si_ftype_v2si
-    = build_function_type_list (V2SI_type_node,
-	                        V2SI_type_node, NULL_TREE);
-
-  tree di_ftype_di_v4hi_v4hi
-    = build_function_type_list (long_long_unsigned_type_node,
-				long_long_unsigned_type_node,
-				V4HI_type_node, V4HI_type_node,
-				NULL_TREE);
-
-  tree di_ftype_v4hi_v4hi
-    = build_function_type_list (long_long_unsigned_type_node,
-				V4HI_type_node,V4HI_type_node,
-				NULL_TREE);
-
-  tree v2si_ftype_v2si_v4hi_v4hi
-    = build_function_type_list (V2SI_type_node,
-                                V2SI_type_node, V4HI_type_node,
-                                V4HI_type_node, NULL_TREE);
-
-  tree v2si_ftype_v2si_v8qi_v8qi
-    = build_function_type_list (V2SI_type_node,
-                                V2SI_type_node, V8QI_type_node,
-                                V8QI_type_node, NULL_TREE);
-
-  tree di_ftype_di_v2si_v2si
-     = build_function_type_list (long_long_unsigned_type_node,
-                                 long_long_unsigned_type_node,
-                                 V2SI_type_node, V2SI_type_node,
-                                 NULL_TREE);
-
-   tree di_ftype_di_di_int
-     = build_function_type_list (long_long_unsigned_type_node,
-                                 long_long_unsigned_type_node,
-                                 long_long_unsigned_type_node,
-                                 integer_type_node, NULL_TREE);
-
-   tree void_ftype_int
-     = build_function_type_list (void_type_node,
-                                 integer_type_node, NULL_TREE);
-
-   tree v8qi_ftype_char
-     = build_function_type_list (V8QI_type_node,
-                                 signed_char_type_node, NULL_TREE);
-
-   tree v4hi_ftype_short
-     = build_function_type_list (V4HI_type_node,
-                                 short_integer_type_node, NULL_TREE);
-
-   tree v2si_ftype_int
-     = build_function_type_list (V2SI_type_node,
-                                 integer_type_node, NULL_TREE);
-
-  /* Normal vector binops.  */
-  tree v8qi_ftype_v8qi_v8qi
-    = build_function_type_list (V8QI_type_node,
-				V8QI_type_node, V8QI_type_node, NULL_TREE);
-  tree v4hi_ftype_v4hi_v4hi
-    = build_function_type_list (V4HI_type_node,
-				V4HI_type_node,V4HI_type_node, NULL_TREE);
-  tree v2si_ftype_v2si_v2si
-    = build_function_type_list (V2SI_type_node,
-				V2SI_type_node, V2SI_type_node, NULL_TREE);
-  tree di_ftype_di_di
-    = build_function_type_list (long_long_unsigned_type_node,
-				long_long_unsigned_type_node,
-				long_long_unsigned_type_node,
-				NULL_TREE);
-
-  /* Add all builtins that are more or less simple operations on two
-     operands.  */
-  for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
-    {
-      /* Use one of the operands; the target can have a different mode for
-	 mask-generating compares.  */
-      machine_mode mode;
-      tree type;
-
-      if (d->name == 0
-	  || !(d->feature == isa_bit_iwmmxt
-	       || d->feature == isa_bit_iwmmxt2))
-	continue;
-
-      mode = insn_data[d->icode].operand[1].mode;
-
-      switch (mode)
-	{
-	case E_V8QImode:
-	  type = v8qi_ftype_v8qi_v8qi;
-	  break;
-	case E_V4HImode:
-	  type = v4hi_ftype_v4hi_v4hi;
-	  break;
-	case E_V2SImode:
-	  type = v2si_ftype_v2si_v2si;
-	  break;
-	case E_DImode:
-	  type = di_ftype_di_di;
-	  break;
-
-	default:
-	  gcc_unreachable ();
-	}
-
-      def_mbuiltin (d->feature, d->name, type, d->code);
-    }
-
-  /* Add the remaining MMX insns with somewhat more complicated types.  */
-#define iwmmx_mbuiltin(NAME, TYPE, CODE)			\
-  def_mbuiltin (isa_bit_iwmmxt, "__builtin_arm_" NAME, \
-		(TYPE), ARM_BUILTIN_ ## CODE)
-
-#define iwmmx2_mbuiltin(NAME, TYPE, CODE)                      \
-  def_mbuiltin (isa_bit_iwmmxt2, "__builtin_arm_" NAME, \
-		(TYPE),	ARM_BUILTIN_ ## CODE)
-
-  iwmmx_mbuiltin ("wzero", di_ftype_void, WZERO);
-  iwmmx_mbuiltin ("setwcgr0", void_ftype_int, SETWCGR0);
-  iwmmx_mbuiltin ("setwcgr1", void_ftype_int, SETWCGR1);
-  iwmmx_mbuiltin ("setwcgr2", void_ftype_int, SETWCGR2);
-  iwmmx_mbuiltin ("setwcgr3", void_ftype_int, SETWCGR3);
-  iwmmx_mbuiltin ("getwcgr0", int_ftype_void, GETWCGR0);
-  iwmmx_mbuiltin ("getwcgr1", int_ftype_void, GETWCGR1);
-  iwmmx_mbuiltin ("getwcgr2", int_ftype_void, GETWCGR2);
-  iwmmx_mbuiltin ("getwcgr3", int_ftype_void, GETWCGR3);
-
-  iwmmx_mbuiltin ("wsllh", v4hi_ftype_v4hi_di, WSLLH);
-  iwmmx_mbuiltin ("wsllw", v2si_ftype_v2si_di, WSLLW);
-  iwmmx_mbuiltin ("wslld", di_ftype_di_di, WSLLD);
-  iwmmx_mbuiltin ("wsllhi", v4hi_ftype_v4hi_int, WSLLHI);
-  iwmmx_mbuiltin ("wsllwi", v2si_ftype_v2si_int, WSLLWI);
-  iwmmx_mbuiltin ("wslldi", di_ftype_di_int, WSLLDI);
-
-  iwmmx_mbuiltin ("wsrlh", v4hi_ftype_v4hi_di, WSRLH);
-  iwmmx_mbuiltin ("wsrlw", v2si_ftype_v2si_di, WSRLW);
-  iwmmx_mbuiltin ("wsrld", di_ftype_di_di, WSRLD);
-  iwmmx_mbuiltin ("wsrlhi", v4hi_ftype_v4hi_int, WSRLHI);
-  iwmmx_mbuiltin ("wsrlwi", v2si_ftype_v2si_int, WSRLWI);
-  iwmmx_mbuiltin ("wsrldi", di_ftype_di_int, WSRLDI);
-
-  iwmmx_mbuiltin ("wsrah", v4hi_ftype_v4hi_di, WSRAH);
-  iwmmx_mbuiltin ("wsraw", v2si_ftype_v2si_di, WSRAW);
-  iwmmx_mbuiltin ("wsrad", di_ftype_di_di, WSRAD);
-  iwmmx_mbuiltin ("wsrahi", v4hi_ftype_v4hi_int, WSRAHI);
-  iwmmx_mbuiltin ("wsrawi", v2si_ftype_v2si_int, WSRAWI);
-  iwmmx_mbuiltin ("wsradi", di_ftype_di_int, WSRADI);
-
-  iwmmx_mbuiltin ("wrorh", v4hi_ftype_v4hi_di, WRORH);
-  iwmmx_mbuiltin ("wrorw", v2si_ftype_v2si_di, WRORW);
-  iwmmx_mbuiltin ("wrord", di_ftype_di_di, WRORD);
-  iwmmx_mbuiltin ("wrorhi", v4hi_ftype_v4hi_int, WRORHI);
-  iwmmx_mbuiltin ("wrorwi", v2si_ftype_v2si_int, WRORWI);
-  iwmmx_mbuiltin ("wrordi", di_ftype_di_int, WRORDI);
-
-  iwmmx_mbuiltin ("wshufh", v4hi_ftype_v4hi_int, WSHUFH);
-
-  iwmmx_mbuiltin ("wsadb", v2si_ftype_v2si_v8qi_v8qi, WSADB);
-  iwmmx_mbuiltin ("wsadh", v2si_ftype_v2si_v4hi_v4hi, WSADH);
-  iwmmx_mbuiltin ("wmadds", v2si_ftype_v4hi_v4hi, WMADDS);
-  iwmmx2_mbuiltin ("wmaddsx", v2si_ftype_v4hi_v4hi, WMADDSX);
-  iwmmx2_mbuiltin ("wmaddsn", v2si_ftype_v4hi_v4hi, WMADDSN);
-  iwmmx_mbuiltin ("wmaddu", v2si_ftype_v4hi_v4hi, WMADDU);
-  iwmmx2_mbuiltin ("wmaddux", v2si_ftype_v4hi_v4hi, WMADDUX);
-  iwmmx2_mbuiltin ("wmaddun", v2si_ftype_v4hi_v4hi, WMADDUN);
-  iwmmx_mbuiltin ("wsadbz", v2si_ftype_v8qi_v8qi, WSADBZ);
-  iwmmx_mbuiltin ("wsadhz", v2si_ftype_v4hi_v4hi, WSADHZ);
-
-  iwmmx_mbuiltin ("textrmsb", int_ftype_v8qi_int, TEXTRMSB);
-  iwmmx_mbuiltin ("textrmsh", int_ftype_v4hi_int, TEXTRMSH);
-  iwmmx_mbuiltin ("textrmsw", int_ftype_v2si_int, TEXTRMSW);
-  iwmmx_mbuiltin ("textrmub", int_ftype_v8qi_int, TEXTRMUB);
-  iwmmx_mbuiltin ("textrmuh", int_ftype_v4hi_int, TEXTRMUH);
-  iwmmx_mbuiltin ("textrmuw", int_ftype_v2si_int, TEXTRMUW);
-  iwmmx_mbuiltin ("tinsrb", v8qi_ftype_v8qi_int_int, TINSRB);
-  iwmmx_mbuiltin ("tinsrh", v4hi_ftype_v4hi_int_int, TINSRH);
-  iwmmx_mbuiltin ("tinsrw", v2si_ftype_v2si_int_int, TINSRW);
-
-  iwmmx_mbuiltin ("waccb", di_ftype_v8qi, WACCB);
-  iwmmx_mbuiltin ("wacch", di_ftype_v4hi, WACCH);
-  iwmmx_mbuiltin ("waccw", di_ftype_v2si, WACCW);
-
-  iwmmx_mbuiltin ("tmovmskb", int_ftype_v8qi, TMOVMSKB);
-  iwmmx_mbuiltin ("tmovmskh", int_ftype_v4hi, TMOVMSKH);
-  iwmmx_mbuiltin ("tmovmskw", int_ftype_v2si, TMOVMSKW);
-
-  iwmmx2_mbuiltin ("waddbhusm", v8qi_ftype_v4hi_v8qi, WADDBHUSM);
-  iwmmx2_mbuiltin ("waddbhusl", v8qi_ftype_v4hi_v8qi, WADDBHUSL);
-
-  iwmmx_mbuiltin ("wpackhss", v8qi_ftype_v4hi_v4hi, WPACKHSS);
-  iwmmx_mbuiltin ("wpackhus", v8qi_ftype_v4hi_v4hi, WPACKHUS);
-  iwmmx_mbuiltin ("wpackwus", v4hi_ftype_v2si_v2si, WPACKWUS);
-  iwmmx_mbuiltin ("wpackwss", v4hi_ftype_v2si_v2si, WPACKWSS);
-  iwmmx_mbuiltin ("wpackdus", v2si_ftype_di_di, WPACKDUS);
-  iwmmx_mbuiltin ("wpackdss", v2si_ftype_di_di, WPACKDSS);
-
-  iwmmx_mbuiltin ("wunpckehub", v4hi_ftype_v8qi, WUNPCKEHUB);
-  iwmmx_mbuiltin ("wunpckehuh", v2si_ftype_v4hi, WUNPCKEHUH);
-  iwmmx_mbuiltin ("wunpckehuw", di_ftype_v2si, WUNPCKEHUW);
-  iwmmx_mbuiltin ("wunpckehsb", v4hi_ftype_v8qi, WUNPCKEHSB);
-  iwmmx_mbuiltin ("wunpckehsh", v2si_ftype_v4hi, WUNPCKEHSH);
-  iwmmx_mbuiltin ("wunpckehsw", di_ftype_v2si, WUNPCKEHSW);
-  iwmmx_mbuiltin ("wunpckelub", v4hi_ftype_v8qi, WUNPCKELUB);
-  iwmmx_mbuiltin ("wunpckeluh", v2si_ftype_v4hi, WUNPCKELUH);
-  iwmmx_mbuiltin ("wunpckeluw", di_ftype_v2si, WUNPCKELUW);
-  iwmmx_mbuiltin ("wunpckelsb", v4hi_ftype_v8qi, WUNPCKELSB);
-  iwmmx_mbuiltin ("wunpckelsh", v2si_ftype_v4hi, WUNPCKELSH);
-  iwmmx_mbuiltin ("wunpckelsw", di_ftype_v2si, WUNPCKELSW);
-
-  iwmmx_mbuiltin ("wmacs", di_ftype_di_v4hi_v4hi, WMACS);
-  iwmmx_mbuiltin ("wmacsz", di_ftype_v4hi_v4hi, WMACSZ);
-  iwmmx_mbuiltin ("wmacu", di_ftype_di_v4hi_v4hi, WMACU);
-  iwmmx_mbuiltin ("wmacuz", di_ftype_v4hi_v4hi, WMACUZ);
-
-  iwmmx_mbuiltin ("walign", v8qi_ftype_v8qi_v8qi_int, WALIGNI);
-  iwmmx_mbuiltin ("tmia", di_ftype_di_int_int, TMIA);
-  iwmmx_mbuiltin ("tmiaph", di_ftype_di_int_int, TMIAPH);
-  iwmmx_mbuiltin ("tmiabb", di_ftype_di_int_int, TMIABB);
-  iwmmx_mbuiltin ("tmiabt", di_ftype_di_int_int, TMIABT);
-  iwmmx_mbuiltin ("tmiatb", di_ftype_di_int_int, TMIATB);
-  iwmmx_mbuiltin ("tmiatt", di_ftype_di_int_int, TMIATT);
-
-  iwmmx2_mbuiltin ("wabsb", v8qi_ftype_v8qi, WABSB);
-  iwmmx2_mbuiltin ("wabsh", v4hi_ftype_v4hi, WABSH);
-  iwmmx2_mbuiltin ("wabsw", v2si_ftype_v2si, WABSW);
-
-  iwmmx2_mbuiltin ("wqmiabb", v2si_ftype_v2si_v4hi_v4hi, WQMIABB);
-  iwmmx2_mbuiltin ("wqmiabt", v2si_ftype_v2si_v4hi_v4hi, WQMIABT);
-  iwmmx2_mbuiltin ("wqmiatb", v2si_ftype_v2si_v4hi_v4hi, WQMIATB);
-  iwmmx2_mbuiltin ("wqmiatt", v2si_ftype_v2si_v4hi_v4hi, WQMIATT);
-
-  iwmmx2_mbuiltin ("wqmiabbn", v2si_ftype_v2si_v4hi_v4hi, WQMIABBN);
-  iwmmx2_mbuiltin ("wqmiabtn", v2si_ftype_v2si_v4hi_v4hi, WQMIABTN);
-  iwmmx2_mbuiltin ("wqmiatbn", v2si_ftype_v2si_v4hi_v4hi, WQMIATBN);
-  iwmmx2_mbuiltin ("wqmiattn", v2si_ftype_v2si_v4hi_v4hi, WQMIATTN);
-
-  iwmmx2_mbuiltin ("wmiabb", di_ftype_di_v4hi_v4hi, WMIABB);
-  iwmmx2_mbuiltin ("wmiabt", di_ftype_di_v4hi_v4hi, WMIABT);
-  iwmmx2_mbuiltin ("wmiatb", di_ftype_di_v4hi_v4hi, WMIATB);
-  iwmmx2_mbuiltin ("wmiatt", di_ftype_di_v4hi_v4hi, WMIATT);
-
-  iwmmx2_mbuiltin ("wmiabbn", di_ftype_di_v4hi_v4hi, WMIABBN);
-  iwmmx2_mbuiltin ("wmiabtn", di_ftype_di_v4hi_v4hi, WMIABTN);
-  iwmmx2_mbuiltin ("wmiatbn", di_ftype_di_v4hi_v4hi, WMIATBN);
-  iwmmx2_mbuiltin ("wmiattn", di_ftype_di_v4hi_v4hi, WMIATTN);
-
-  iwmmx2_mbuiltin ("wmiawbb", di_ftype_di_v2si_v2si, WMIAWBB);
-  iwmmx2_mbuiltin ("wmiawbt", di_ftype_di_v2si_v2si, WMIAWBT);
-  iwmmx2_mbuiltin ("wmiawtb", di_ftype_di_v2si_v2si, WMIAWTB);
-  iwmmx2_mbuiltin ("wmiawtt", di_ftype_di_v2si_v2si, WMIAWTT);
-
-  iwmmx2_mbuiltin ("wmiawbbn", di_ftype_di_v2si_v2si, WMIAWBBN);
-  iwmmx2_mbuiltin ("wmiawbtn", di_ftype_di_v2si_v2si, WMIAWBTN);
-  iwmmx2_mbuiltin ("wmiawtbn", di_ftype_di_v2si_v2si, WMIAWTBN);
-  iwmmx2_mbuiltin ("wmiawttn", di_ftype_di_v2si_v2si, WMIAWTTN);
-
-  iwmmx2_mbuiltin ("wmerge", di_ftype_di_di_int, WMERGE);
-
-  iwmmx_mbuiltin ("tbcstb", v8qi_ftype_char, TBCSTB);
-  iwmmx_mbuiltin ("tbcsth", v4hi_ftype_short, TBCSTH);
-  iwmmx_mbuiltin ("tbcstw", v2si_ftype_int, TBCSTW);
-
-#undef iwmmx_mbuiltin
-#undef iwmmx2_mbuiltin
-}
-
 static void
 arm_init_fp16_builtins (void)
 {
@@ -2651,9 +1693,6 @@ arm_init_fp16_builtins (void)
 void
 arm_init_builtins (void)
 {
-  if (TARGET_REALLY_IWMMXT)
-    arm_init_iwmmxt_builtins ();
-
   /* This creates the arm_simd_floatHF_type_node so must come before
      arm_init_neon_builtins which uses it.  */
   arm_init_fp16_builtins ();
@@ -2743,15 +1782,11 @@ arm_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
    clear instructions.  */
 
 static rtx
-safe_vector_operand (rtx x, machine_mode mode)
+safe_vector_operand (rtx x, machine_mode mode ATTRIBUTE_UNUSED)
 {
   if (x != const0_rtx)
     return x;
-  x = gen_reg_rtx (mode);
-
-  emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
-			       : gen_rtx_SUBREG (DImode, x, 0)));
-  return x;
+  __builtin_unreachable ();
 }
 
 /* Function to expand ternary builtins.  */
@@ -3250,8 +2285,7 @@ constant_arg:
      builtin and error out if not.  */
   start_sequence ();
   emit_insn (pat);
-  insn = get_insns ();
-  end_sequence ();
+  insn = end_sequence ();
 
   if (recog_memoized (insn) < 0)
     error ("this builtin is not supported for this target");
@@ -3463,21 +2497,10 @@ arm_general_expand_builtin (unsigned int fcode,
   const struct builtin_description * d;
   enum insn_code    icode;
   tree              arg0;
-  tree              arg1;
-  tree              arg2;
   rtx               op0;
   rtx               op1;
-  rtx               op2;
   rtx               pat;
   size_t            i;
-  machine_mode tmode;
-  machine_mode mode0;
-  machine_mode mode1;
-  machine_mode mode2;
-  int opint;
-  int selector;
-  int mask;
-  int imm;
 
   if (fcode == ARM_BUILTIN_SIMD_LANE_CHECK)
     {
@@ -3566,499 +2589,6 @@ arm_general_expand_builtin (unsigned int fcode,
       emit_insn (gen_cstoresi4 (target, op1, target, const0_rtx));
       return target;
 
-    case ARM_BUILTIN_TEXTRMSB:
-    case ARM_BUILTIN_TEXTRMUB:
-    case ARM_BUILTIN_TEXTRMSH:
-    case ARM_BUILTIN_TEXTRMUH:
-    case ARM_BUILTIN_TEXTRMSW:
-    case ARM_BUILTIN_TEXTRMUW:
-      icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
-	       : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
-	       : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
-	       : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
-	       : CODE_FOR_iwmmxt_textrmw);
-
-      arg0 = CALL_EXPR_ARG (exp, 0);
-      arg1 = CALL_EXPR_ARG (exp, 1);
-      op0 = expand_normal (arg0);
-      op1 = expand_normal (arg1);
-      tmode = insn_data[icode].operand[0].mode;
-      mode0 = insn_data[icode].operand[1].mode;
-      mode1 = insn_data[icode].operand[2].mode;
-
-      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
-	op0 = copy_to_mode_reg (mode0, op0);
-      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
-	{
-	  /* @@@ better error message */
-	  error ("selector must be an immediate");
-	  return gen_reg_rtx (tmode);
-	}
-
-      opint = INTVAL (op1);
-      if (fcode == ARM_BUILTIN_TEXTRMSB || fcode == ARM_BUILTIN_TEXTRMUB)
-	{
-	  if (opint > 7 || opint < 0)
-	    error ("the range of selector should be in 0 to 7");
-	}
-      else if (fcode == ARM_BUILTIN_TEXTRMSH || fcode == ARM_BUILTIN_TEXTRMUH)
-	{
-	  if (opint > 3 || opint < 0)
-	    error ("the range of selector should be in 0 to 3");
-	}
-      else /* ARM_BUILTIN_TEXTRMSW || ARM_BUILTIN_TEXTRMUW.  */
-	{
-	  if (opint > 1 || opint < 0)
-	    error ("the range of selector should be in 0 to 1");
-	}
-
-      if (target == 0
-	  || GET_MODE (target) != tmode
-	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
-	target = gen_reg_rtx (tmode);
-      pat = GEN_FCN (icode) (target, op0, op1);
-      if (! pat)
-	return 0;
-      emit_insn (pat);
-      return target;
-
-    case ARM_BUILTIN_WALIGNI:
-      /* If op2 is immediate, call walighi, else call walighr.  */
-      arg0 = CALL_EXPR_ARG (exp, 0);
-      arg1 = CALL_EXPR_ARG (exp, 1);
-      arg2 = CALL_EXPR_ARG (exp, 2);
-      op0 = expand_normal (arg0);
-      op1 = expand_normal (arg1);
-      op2 = expand_normal (arg2);
-      if (CONST_INT_P (op2))
-        {
-	  icode = CODE_FOR_iwmmxt_waligni;
-          tmode = insn_data[icode].operand[0].mode;
-	  mode0 = insn_data[icode].operand[1].mode;
-	  mode1 = insn_data[icode].operand[2].mode;
-	  mode2 = insn_data[icode].operand[3].mode;
-          if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
-	    op0 = copy_to_mode_reg (mode0, op0);
-          if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
-	    op1 = copy_to_mode_reg (mode1, op1);
-          gcc_assert ((*insn_data[icode].operand[3].predicate) (op2, mode2));
-	  selector = INTVAL (op2);
-	  if (selector > 7 || selector < 0)
-	    error ("the range of selector should be in 0 to 7");
-	}
-      else
-        {
-	  icode = CODE_FOR_iwmmxt_walignr;
-          tmode = insn_data[icode].operand[0].mode;
-	  mode0 = insn_data[icode].operand[1].mode;
-	  mode1 = insn_data[icode].operand[2].mode;
-	  mode2 = insn_data[icode].operand[3].mode;
-          if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
-	    op0 = copy_to_mode_reg (mode0, op0);
-          if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
-	    op1 = copy_to_mode_reg (mode1, op1);
-          if (!(*insn_data[icode].operand[3].predicate) (op2, mode2))
-	    op2 = copy_to_mode_reg (mode2, op2);
-	}
-      if (target == 0
-	  || GET_MODE (target) != tmode
-	  || !(*insn_data[icode].operand[0].predicate) (target, tmode))
-	target = gen_reg_rtx (tmode);
-      pat = GEN_FCN (icode) (target, op0, op1, op2);
-      if (!pat)
-	return 0;
-      emit_insn (pat);
-      return target;
-
-    case ARM_BUILTIN_TINSRB:
-    case ARM_BUILTIN_TINSRH:
-    case ARM_BUILTIN_TINSRW:
-    case ARM_BUILTIN_WMERGE:
-      icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
-	       : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
-	       : fcode == ARM_BUILTIN_WMERGE ? CODE_FOR_iwmmxt_wmerge
-	       : CODE_FOR_iwmmxt_tinsrw);
-      arg0 = CALL_EXPR_ARG (exp, 0);
-      arg1 = CALL_EXPR_ARG (exp, 1);
-      arg2 = CALL_EXPR_ARG (exp, 2);
-      op0 = expand_normal (arg0);
-      op1 = expand_normal (arg1);
-      op2 = expand_normal (arg2);
-      tmode = insn_data[icode].operand[0].mode;
-      mode0 = insn_data[icode].operand[1].mode;
-      mode1 = insn_data[icode].operand[2].mode;
-      mode2 = insn_data[icode].operand[3].mode;
-
-      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
-	op0 = copy_to_mode_reg (mode0, op0);
-      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
-	op1 = copy_to_mode_reg (mode1, op1);
-      if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
-	{
-	  error ("selector must be an immediate");
-	  return const0_rtx;
-	}
-      if (icode == CODE_FOR_iwmmxt_wmerge)
-	{
-	  selector = INTVAL (op2);
-	  if (selector > 7 || selector < 0)
-	    error ("the range of selector should be in 0 to 7");
-	}
-      if ((icode == CODE_FOR_iwmmxt_tinsrb)
-	  || (icode == CODE_FOR_iwmmxt_tinsrh)
-	  || (icode == CODE_FOR_iwmmxt_tinsrw))
-        {
-	  mask = 0x01;
-	  selector= INTVAL (op2);
-	  if (icode == CODE_FOR_iwmmxt_tinsrb && (selector < 0 || selector > 7))
-	    error ("the range of selector should be in 0 to 7");
-	  else if (icode == CODE_FOR_iwmmxt_tinsrh && (selector < 0 ||selector > 3))
-	    error ("the range of selector should be in 0 to 3");
-	  else if (icode == CODE_FOR_iwmmxt_tinsrw && (selector < 0 ||selector > 1))
-	    error ("the range of selector should be in 0 to 1");
-	  mask <<= selector;
-	  op2 = GEN_INT (mask);
-	}
-      if (target == 0
-	  || GET_MODE (target) != tmode
-	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
-	target = gen_reg_rtx (tmode);
-      pat = GEN_FCN (icode) (target, op0, op1, op2);
-      if (! pat)
-	return 0;
-      emit_insn (pat);
-      return target;
-
-    case ARM_BUILTIN_SETWCGR0:
-    case ARM_BUILTIN_SETWCGR1:
-    case ARM_BUILTIN_SETWCGR2:
-    case ARM_BUILTIN_SETWCGR3:
-      icode = (fcode == ARM_BUILTIN_SETWCGR0 ? CODE_FOR_iwmmxt_setwcgr0
-	       : fcode == ARM_BUILTIN_SETWCGR1 ? CODE_FOR_iwmmxt_setwcgr1
-	       : fcode == ARM_BUILTIN_SETWCGR2 ? CODE_FOR_iwmmxt_setwcgr2
-	       : CODE_FOR_iwmmxt_setwcgr3);
-      arg0 = CALL_EXPR_ARG (exp, 0);
-      op0 = expand_normal (arg0);
-      mode0 = insn_data[icode].operand[0].mode;
-      if (!(*insn_data[icode].operand[0].predicate) (op0, mode0))
-        op0 = copy_to_mode_reg (mode0, op0);
-      pat = GEN_FCN (icode) (op0);
-      if (!pat)
-	return 0;
-      emit_insn (pat);
-      return 0;
-
-    case ARM_BUILTIN_GETWCGR0:
-    case ARM_BUILTIN_GETWCGR1:
-    case ARM_BUILTIN_GETWCGR2:
-    case ARM_BUILTIN_GETWCGR3:
-      icode = (fcode == ARM_BUILTIN_GETWCGR0 ? CODE_FOR_iwmmxt_getwcgr0
-	       : fcode == ARM_BUILTIN_GETWCGR1 ? CODE_FOR_iwmmxt_getwcgr1
-	       : fcode == ARM_BUILTIN_GETWCGR2 ? CODE_FOR_iwmmxt_getwcgr2
-	       : CODE_FOR_iwmmxt_getwcgr3);
-      tmode = insn_data[icode].operand[0].mode;
-      if (target == 0
-	  || GET_MODE (target) != tmode
-	  || !(*insn_data[icode].operand[0].predicate) (target, tmode))
-        target = gen_reg_rtx (tmode);
-      pat = GEN_FCN (icode) (target);
-      if (!pat)
-        return 0;
-      emit_insn (pat);
-      return target;
-
-    case ARM_BUILTIN_WSHUFH:
-      icode = CODE_FOR_iwmmxt_wshufh;
-      arg0 = CALL_EXPR_ARG (exp, 0);
-      arg1 = CALL_EXPR_ARG (exp, 1);
-      op0 = expand_normal (arg0);
-      op1 = expand_normal (arg1);
-      tmode = insn_data[icode].operand[0].mode;
-      mode1 = insn_data[icode].operand[1].mode;
-      mode2 = insn_data[icode].operand[2].mode;
-
-      if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
-	op0 = copy_to_mode_reg (mode1, op0);
-      if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
-	{
-	  error ("mask must be an immediate");
-	  return const0_rtx;
-	}
-      selector = INTVAL (op1);
-      if (selector < 0 || selector > 255)
-	error ("the range of mask should be in 0 to 255");
-      if (target == 0
-	  || GET_MODE (target) != tmode
-	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
-	target = gen_reg_rtx (tmode);
-      pat = GEN_FCN (icode) (target, op0, op1);
-      if (! pat)
-	return 0;
-      emit_insn (pat);
-      return target;
-
-    case ARM_BUILTIN_WMADDS:
-      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmadds, exp, target);
-    case ARM_BUILTIN_WMADDSX:
-      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsx, exp, target);
-    case ARM_BUILTIN_WMADDSN:
-      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddsn, exp, target);
-    case ARM_BUILTIN_WMADDU:
-      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddu, exp, target);
-    case ARM_BUILTIN_WMADDUX:
-      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddux, exp, target);
-    case ARM_BUILTIN_WMADDUN:
-      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wmaddun, exp, target);
-    case ARM_BUILTIN_WSADBZ:
-      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, exp, target);
-    case ARM_BUILTIN_WSADHZ:
-      return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, exp, target);
-
-      /* Several three-argument builtins.  */
-    case ARM_BUILTIN_WMACS:
-    case ARM_BUILTIN_WMACU:
-    case ARM_BUILTIN_TMIA:
-    case ARM_BUILTIN_TMIAPH:
-    case ARM_BUILTIN_TMIATT:
-    case ARM_BUILTIN_TMIATB:
-    case ARM_BUILTIN_TMIABT:
-    case ARM_BUILTIN_TMIABB:
-    case ARM_BUILTIN_WQMIABB:
-    case ARM_BUILTIN_WQMIABT:
-    case ARM_BUILTIN_WQMIATB:
-    case ARM_BUILTIN_WQMIATT:
-    case ARM_BUILTIN_WQMIABBN:
-    case ARM_BUILTIN_WQMIABTN:
-    case ARM_BUILTIN_WQMIATBN:
-    case ARM_BUILTIN_WQMIATTN:
-    case ARM_BUILTIN_WMIABB:
-    case ARM_BUILTIN_WMIABT:
-    case ARM_BUILTIN_WMIATB:
-    case ARM_BUILTIN_WMIATT:
-    case ARM_BUILTIN_WMIABBN:
-    case ARM_BUILTIN_WMIABTN:
-    case ARM_BUILTIN_WMIATBN:
-    case ARM_BUILTIN_WMIATTN:
-    case ARM_BUILTIN_WMIAWBB:
-    case ARM_BUILTIN_WMIAWBT:
-    case ARM_BUILTIN_WMIAWTB:
-    case ARM_BUILTIN_WMIAWTT:
-    case ARM_BUILTIN_WMIAWBBN:
-    case ARM_BUILTIN_WMIAWBTN:
-    case ARM_BUILTIN_WMIAWTBN:
-    case ARM_BUILTIN_WMIAWTTN:
-    case ARM_BUILTIN_WSADB:
-    case ARM_BUILTIN_WSADH:
-      icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
-	       : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
-	       : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
-	       : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
-	       : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
-	       : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
-	       : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
-	       : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
-	       : fcode == ARM_BUILTIN_WQMIABB ? CODE_FOR_iwmmxt_wqmiabb
-	       : fcode == ARM_BUILTIN_WQMIABT ? CODE_FOR_iwmmxt_wqmiabt
-	       : fcode == ARM_BUILTIN_WQMIATB ? CODE_FOR_iwmmxt_wqmiatb
-	       : fcode == ARM_BUILTIN_WQMIATT ? CODE_FOR_iwmmxt_wqmiatt
-	       : fcode == ARM_BUILTIN_WQMIABBN ? CODE_FOR_iwmmxt_wqmiabbn
-	       : fcode == ARM_BUILTIN_WQMIABTN ? CODE_FOR_iwmmxt_wqmiabtn
-	       : fcode == ARM_BUILTIN_WQMIATBN ? CODE_FOR_iwmmxt_wqmiatbn
-	       : fcode == ARM_BUILTIN_WQMIATTN ? CODE_FOR_iwmmxt_wqmiattn
-	       : fcode == ARM_BUILTIN_WMIABB ? CODE_FOR_iwmmxt_wmiabb
-	       : fcode == ARM_BUILTIN_WMIABT ? CODE_FOR_iwmmxt_wmiabt
-	       : fcode == ARM_BUILTIN_WMIATB ? CODE_FOR_iwmmxt_wmiatb
-	       : fcode == ARM_BUILTIN_WMIATT ? CODE_FOR_iwmmxt_wmiatt
-	       : fcode == ARM_BUILTIN_WMIABBN ? CODE_FOR_iwmmxt_wmiabbn
-	       : fcode == ARM_BUILTIN_WMIABTN ? CODE_FOR_iwmmxt_wmiabtn
-	       : fcode == ARM_BUILTIN_WMIATBN ? CODE_FOR_iwmmxt_wmiatbn
-	       : fcode == ARM_BUILTIN_WMIATTN ? CODE_FOR_iwmmxt_wmiattn
-	       : fcode == ARM_BUILTIN_WMIAWBB ? CODE_FOR_iwmmxt_wmiawbb
-	       : fcode == ARM_BUILTIN_WMIAWBT ? CODE_FOR_iwmmxt_wmiawbt
-	       : fcode == ARM_BUILTIN_WMIAWTB ? CODE_FOR_iwmmxt_wmiawtb
-	       : fcode == ARM_BUILTIN_WMIAWTT ? CODE_FOR_iwmmxt_wmiawtt
-	       : fcode == ARM_BUILTIN_WMIAWBBN ? CODE_FOR_iwmmxt_wmiawbbn
-	       : fcode == ARM_BUILTIN_WMIAWBTN ? CODE_FOR_iwmmxt_wmiawbtn
-	       : fcode == ARM_BUILTIN_WMIAWTBN ? CODE_FOR_iwmmxt_wmiawtbn
-	       : fcode == ARM_BUILTIN_WMIAWTTN ? CODE_FOR_iwmmxt_wmiawttn
-	       : fcode == ARM_BUILTIN_WSADB ? CODE_FOR_iwmmxt_wsadb
-	       : CODE_FOR_iwmmxt_wsadh);
-      arg0 = CALL_EXPR_ARG (exp, 0);
-      arg1 = CALL_EXPR_ARG (exp, 1);
-      arg2 = CALL_EXPR_ARG (exp, 2);
-      op0 = expand_normal (arg0);
-      op1 = expand_normal (arg1);
-      op2 = expand_normal (arg2);
-      tmode = insn_data[icode].operand[0].mode;
-      mode0 = insn_data[icode].operand[1].mode;
-      mode1 = insn_data[icode].operand[2].mode;
-      mode2 = insn_data[icode].operand[3].mode;
-
-      if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
-	op0 = copy_to_mode_reg (mode0, op0);
-      if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
-	op1 = copy_to_mode_reg (mode1, op1);
-      if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
-	op2 = copy_to_mode_reg (mode2, op2);
-      if (target == 0
-	  || GET_MODE (target) != tmode
-	  || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
-	target = gen_reg_rtx (tmode);
-      pat = GEN_FCN (icode) (target, op0, op1, op2);
-      if (! pat)
-	return 0;
-      emit_insn (pat);
-      return target;
-
-    case ARM_BUILTIN_WZERO:
-      target = gen_reg_rtx (DImode);
-      emit_insn (gen_iwmmxt_clrdi (target));
-      return target;
-
-    case ARM_BUILTIN_WSRLHI:
-    case ARM_BUILTIN_WSRLWI:
-    case ARM_BUILTIN_WSRLDI:
-    case ARM_BUILTIN_WSLLHI:
-    case ARM_BUILTIN_WSLLWI:
-    case ARM_BUILTIN_WSLLDI:
-    case ARM_BUILTIN_WSRAHI:
-    case ARM_BUILTIN_WSRAWI:
-    case ARM_BUILTIN_WSRADI:
-    case ARM_BUILTIN_WRORHI:
-    case ARM_BUILTIN_WRORWI:
-    case ARM_BUILTIN_WRORDI:
-    case ARM_BUILTIN_WSRLH:
-    case ARM_BUILTIN_WSRLW:
-    case ARM_BUILTIN_WSRLD:
-    case ARM_BUILTIN_WSLLH:
-    case ARM_BUILTIN_WSLLW:
-    case ARM_BUILTIN_WSLLD:
-    case ARM_BUILTIN_WSRAH:
-    case ARM_BUILTIN_WSRAW:
-    case ARM_BUILTIN_WSRAD:
-    case ARM_BUILTIN_WRORH:
-    case ARM_BUILTIN_WRORW:
-    case ARM_BUILTIN_WRORD:
-      icode = (fcode == ARM_BUILTIN_WSRLHI ? CODE_FOR_lshrv4hi3_iwmmxt
-	       : fcode == ARM_BUILTIN_WSRLWI ? CODE_FOR_lshrv2si3_iwmmxt
-	       : fcode == ARM_BUILTIN_WSRLDI ? CODE_FOR_lshrdi3_iwmmxt
-	       : fcode == ARM_BUILTIN_WSLLHI ? CODE_FOR_ashlv4hi3_iwmmxt
-	       : fcode == ARM_BUILTIN_WSLLWI ? CODE_FOR_ashlv2si3_iwmmxt
-	       : fcode == ARM_BUILTIN_WSLLDI ? CODE_FOR_ashldi3_iwmmxt
-	       : fcode == ARM_BUILTIN_WSRAHI ? CODE_FOR_ashrv4hi3_iwmmxt
-	       : fcode == ARM_BUILTIN_WSRAWI ? CODE_FOR_ashrv2si3_iwmmxt
-	       : fcode == ARM_BUILTIN_WSRADI ? CODE_FOR_ashrdi3_iwmmxt
-	       : fcode == ARM_BUILTIN_WRORHI ? CODE_FOR_rorv4hi3
-	       : fcode == ARM_BUILTIN_WRORWI ? CODE_FOR_rorv2si3
-	       : fcode == ARM_BUILTIN_WRORDI ? CODE_FOR_rordi3
-	       : fcode == ARM_BUILTIN_WSRLH  ? CODE_FOR_lshrv4hi3_di
-	       : fcode == ARM_BUILTIN_WSRLW  ? CODE_FOR_lshrv2si3_di
-	       : fcode == ARM_BUILTIN_WSRLD  ? CODE_FOR_lshrdi3_di
-	       : fcode == ARM_BUILTIN_WSLLH  ? CODE_FOR_ashlv4hi3_di
-	       : fcode == ARM_BUILTIN_WSLLW  ? CODE_FOR_ashlv2si3_di
-	       : fcode == ARM_BUILTIN_WSLLD  ? CODE_FOR_ashldi3_di
-	       : fcode == ARM_BUILTIN_WSRAH  ? CODE_FOR_ashrv4hi3_di
-	       : fcode == ARM_BUILTIN_WSRAW  ? CODE_FOR_ashrv2si3_di
-	       : fcode == ARM_BUILTIN_WSRAD  ? CODE_FOR_ashrdi3_di
-	       : fcode == ARM_BUILTIN_WRORH  ? CODE_FOR_rorv4hi3_di
-	       : fcode == ARM_BUILTIN_WRORW  ? CODE_FOR_rorv2si3_di
-	       : fcode == ARM_BUILTIN_WRORD  ? CODE_FOR_rordi3_di
-	       : CODE_FOR_nothing);
-      arg1 = CALL_EXPR_ARG (exp, 1);
-      op1 = expand_normal (arg1);
-      if (GET_MODE (op1) == VOIDmode)
-	{
-	  imm = INTVAL (op1);
-	  if ((fcode == ARM_BUILTIN_WRORWI || fcode == ARM_BUILTIN_WRORW)
-	      && (imm < 0 || imm > 32))
-	    {
-	      const char *builtin = (fcode == ARM_BUILTIN_WRORWI
-				     ? "_mm_rori_pi32" : "_mm_ror_pi32");
-	      error ("the range of count should be in 0 to 32; "
-		     "please check the intrinsic %qs in code", builtin);
-	    }
-	  else if ((fcode == ARM_BUILTIN_WRORHI || fcode == ARM_BUILTIN_WRORH)
-		   && (imm < 0 || imm > 16))
-	    {
-	      const char *builtin = (fcode == ARM_BUILTIN_WRORHI
-				     ? "_mm_rori_pi16" : "_mm_ror_pi16");
-	      error ("the range of count should be in 0 to 16; "
-		     "please check the intrinsic %qs in code", builtin);
-	    }
-	  else if ((fcode == ARM_BUILTIN_WRORDI || fcode == ARM_BUILTIN_WRORD)
-		   && (imm < 0 || imm > 64))
-	    {
-	      const char *builtin = (fcode == ARM_BUILTIN_WRORDI
-				     ? "_mm_rori_si64" : "_mm_ror_si64");
-	      error ("the range of count should be in 0 to 64; "
-		     "please check the intrinsic %qs in code", builtin);
-	    }
-	  else if (imm < 0)
-	    {
-	      const char *builtin;
-	      switch (fcode)
-		{
-		  case ARM_BUILTIN_WSRLHI:
-		    builtin = "_mm_srli_pi16";
-		    break;
-		  case ARM_BUILTIN_WSRLWI:
-		    builtin = "_mm_srli_pi32";
-		    break;
-		  case ARM_BUILTIN_WSRLDI:
-		    builtin = "_mm_srli_si64";
-		    break;
-		  case ARM_BUILTIN_WSLLHI:
-		    builtin = "_mm_slli_pi16";
-		    break;
-		  case ARM_BUILTIN_WSLLWI:
-		    builtin = "_mm_slli_pi32";
-		    break;
-		  case ARM_BUILTIN_WSLLDI:
-		    builtin = "_mm_slli_si64";
-		    break;
-		  case ARM_BUILTIN_WSRAHI:
-		    builtin = "_mm_srai_pi16";
-		    break;
-		  case ARM_BUILTIN_WSRAWI:
-		    builtin = "_mm_srai_pi32";
-		    break;
-		  case ARM_BUILTIN_WSRADI:
-		    builtin = "_mm_srai_si64";
-		    break;
-		  case ARM_BUILTIN_WSRLH:
-		    builtin = "_mm_srl_pi16";
-		    break;
-		  case ARM_BUILTIN_WSRLW:
-		    builtin = "_mm_srl_pi32";
-		    break;
-		  case ARM_BUILTIN_WSRLD:
-		    builtin = "_mm_srl_si64";
-		    break;
-		  case ARM_BUILTIN_WSLLH:
-		    builtin = "_mm_sll_pi16";
-		    break;
-		  case ARM_BUILTIN_WSLLW:
-		    builtin = "_mm_sll_pi32";
-		    break;
-		  case ARM_BUILTIN_WSLLD:
-		    builtin = "_mm_sll_si64";
-		    break;
-		  case ARM_BUILTIN_WSRAH:
-		    builtin = "_mm_sra_pi16";
-		    break;
-		  case ARM_BUILTIN_WSRAW:
-		    builtin = "_mm_sra_si64";
-		    break;
-		  default:
-		    builtin = "_mm_sra_si64";
-		    break;
-		}
-	      error ("the count should be no less than 0; "
-		     "please check the intrinsic %qs in code", builtin);
-	    }
-	}
-      return arm_expand_binop_builtin (icode, exp, target);
-
     default:
       break;
     }
@@ -4202,9 +2732,8 @@ arm_general_check_builtin_call (unsigned int code)
 
 /* Implement TARGET_CHECK_BUILTIN_CALL.  */
 bool
-arm_check_builtin_call (location_t loc, vec<location_t> arg_loc,
-			tree fndecl, tree orig_fndecl,
-			unsigned int nargs, tree *args)
+arm_check_builtin_call (location_t loc, vec<location_t> arg_loc, tree fndecl,
+			tree orig_fndecl, unsigned int nargs, tree *args, bool)
 {
   unsigned int code = DECL_MD_FUNCTION_CODE (fndecl);
   unsigned int subcode = code >> ARM_BUILTIN_SHIFT;
@@ -4254,4 +2783,56 @@ arm_cde_end_args (tree fndecl)
     }
 }
 
+/* Fold a call to vaeseq_u8 and vaesdq_u8.
+   That is `vaeseq_u8 (x ^ y, 0)` gets folded
+   into `vaeseq_u8 (x, y)`.*/
+static gimple *
+arm_fold_aes_op (gcall *stmt)
+{
+  tree arg0 = gimple_call_arg (stmt, 0);
+  tree arg1 = gimple_call_arg (stmt, 1);
+  if (integer_zerop (arg0))
+    arg0 = arg1;
+  else if (!integer_zerop (arg1))
+    return nullptr;
+  if (TREE_CODE (arg0) != SSA_NAME)
+    return nullptr;
+  if (!has_single_use (arg0))
+    return nullptr;
+  auto *s = dyn_cast<gassign *> (SSA_NAME_DEF_STMT (arg0));
+  if (!s || gimple_assign_rhs_code (s) != BIT_XOR_EXPR)
+    return nullptr;
+  gimple_call_set_arg (stmt, 0, gimple_assign_rhs1 (s));
+  gimple_call_set_arg (stmt, 1, gimple_assign_rhs2 (s));
+  return stmt;
+}
+
+/* Try to fold STMT, given that it's a call to the built-in function with
+   subcode FCODE.  Return the new statement on success and null on
+   failure.  */
+gimple *
+arm_general_gimple_fold_builtin (unsigned int fcode, gcall *stmt)
+{
+  gimple *new_stmt = NULL;
+
+  switch (fcode)
+    {
+    case ARM_BUILTIN_CRYPTO_AESE:
+    case ARM_BUILTIN_CRYPTO_AESD:
+      new_stmt = arm_fold_aes_op (stmt);
+      break;
+    }
+
+  /* GIMPLE assign statements (unlike calls) require a non-null lhs.  If we
+     created an assign statement with a null lhs, then fix this by assigning
+     to a new (and subsequently unused) variable.  */
+  if (new_stmt && is_gimple_assign (new_stmt) && !gimple_assign_lhs (new_stmt))
+    {
+      tree new_lhs = make_ssa_name (gimple_call_return_type (stmt));
+      gimple_assign_set_lhs (new_stmt, new_lhs);
+    }
+
+  return new_stmt;
+}
+
 #include "gt-arm-builtins.h"
diff --git a/gcc/config/arm/arm-builtins.h b/gcc/config/arm/arm-builtins.h
index 4d98b47..3a64661 100644
--- a/gcc/config/arm/arm-builtins.h
+++ b/gcc/config/arm/arm-builtins.h
@@ -1,5 +1,5 @@
 /* Declarations for determining resolver for a given builtin.
-   Copyright (C) 2020-2024 Free Software Foundation, Inc.
+   Copyright (C) 2020-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -32,6 +32,7 @@ enum resolver_ident {
 };
 enum resolver_ident arm_describe_resolver (tree);
 unsigned arm_cde_end_args (tree);
+gimple *arm_general_gimple_fold_builtin (unsigned int, gcall *);
 
 #define ENTRY(E, M, Q, S, T, G) E,
 enum arm_simd_type
diff --git a/gcc/config/arm/arm-c.cc b/gcc/config/arm/arm-c.cc
index 6e10262..d257e62 100644
--- a/gcc/config/arm/arm-c.cc
+++ b/gcc/config/arm/arm-c.cc
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2007-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -163,7 +163,7 @@ arm_pragma_arm (cpp_reader *)
 /* Implement TARGET_RESOLVE_OVERLOADED_BUILTIN.  */
 tree
 arm_resolve_overloaded_builtin (location_t loc, tree fndecl,
-				void *uncast_arglist)
+				void *uncast_arglist, bool)
 {
   enum resolver_ident resolver = arm_describe_resolver (fndecl);
   if (resolver == arm_cde_resolver)
@@ -373,13 +373,6 @@ arm_cpu_builtins (struct cpp_reader* pfile)
   builtin_define (arm_arch_name);
   if (arm_arch_xscale)
     builtin_define ("__XSCALE__");
-  if (arm_arch_iwmmxt)
-    {
-      builtin_define ("__IWMMXT__");
-      builtin_define ("__ARM_WMMX");
-    }
-  if (arm_arch_iwmmxt2)
-    builtin_define ("__IWMMXT2__");
   /* ARMv6KZ was originally identified as the misspelled __ARM_ARCH_6ZK__.  To
      preserve the existing behavior, the misspelled feature macro must still be
      defined.  */
diff --git a/gcc/config/arm/arm-cpus.in b/gcc/config/arm/arm-cpus.in
index 451b15f..7f5a8c6 100644
--- a/gcc/config/arm/arm-cpus.in
+++ b/gcc/config/arm/arm-cpus.in
@@ -1,6 +1,6 @@
 # CPU, FPU and architecture specifications for ARM.
 #
-# Copyright (C) 2011-2024 Free Software Foundation, Inc.
+# Copyright (C) 2011-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
@@ -102,12 +102,6 @@ define feature armv8
 # ARMv8 CRC32 instructions.
 define feature crc32
 
-# XScale v2 (Wireless MMX).
-define feature iwmmxt
-
-# XScale Wireless MMX2.
-define feature iwmmxt2
-
 # Architecture rel 8.1.
 define feature armv8_1
 
@@ -778,18 +772,19 @@ begin arch armv9-a
  option bf16 add bf16 FP_ARMv8 DOTPROD
 end arch armv9-a
 
+# We no-longer support the iwmmxt{,2} extensions, so treat these like xscale.
 begin arch iwmmxt
- tune for iwmmxt
+ tune for xscale
  tune flags LDSCHED STRONG XSCALE
  base 5TE
- isa ARMv5te xscale iwmmxt
+ isa ARMv5te xscale
 end arch iwmmxt
 
 begin arch iwmmxt2
- tune for iwmmxt2
+ tune for xscale
  tune flags LDSCHED STRONG XSCALE
  base 5TE
- isa ARMv5te xscale iwmmxt iwmmxt2
+ isa ARMv5te xscale
 end arch iwmmxt2
 
 # CPU entries
@@ -924,23 +919,12 @@ end cpu arm10e
 
 begin cpu xscale
  tune flags LDSCHED XSCALE
+ alias iwmmxt iwmmxt2
  architecture armv5te
  isa xscale
  costs xscale
 end cpu xscale
 
-begin cpu iwmmxt
- tune flags LDSCHED XSCALE
- architecture iwmmxt
- costs xscale
-end cpu iwmmxt
-
-begin cpu iwmmxt2
- tune flags LDSCHED XSCALE
- architecture iwmmxt2
- costs xscale
-end cpu iwmmxt2
-
 begin cpu fa606te
  tune flags LDSCHED
  architecture armv5te
@@ -1689,6 +1673,14 @@ begin cpu star-mc1
  architecture armv8-m.main+dsp+fp
  option nofp remove ALL_FP
  option nodsp remove armv7em
+ option cdecp0 add cdecp0
+ option cdecp1 add cdecp1
+ option cdecp2 add cdecp2
+ option cdecp3 add cdecp3
+ option cdecp4 add cdecp4
+ option cdecp5 add cdecp5
+ option cdecp6 add cdecp6
+ option cdecp7 add cdecp7
  isa quirk_no_asmcpu quirk_vlldm
  costs v7m
 end cpu star-mc1
diff --git a/gcc/config/arm/arm-d.cc b/gcc/config/arm/arm-d.cc
index 9c344dc..75de807 100644
--- a/gcc/config/arm/arm-d.cc
+++ b/gcc/config/arm/arm-d.cc
@@ -1,5 +1,5 @@
 /* Subroutines for the D front end on the ARM architecture.
-   Copyright (C) 2017-2024 Free Software Foundation, Inc.
+   Copyright (C) 2017-2025 Free Software Foundation, Inc.
 
 GCC is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
diff --git a/gcc/config/arm/arm-d.h b/gcc/config/arm/arm-d.h
index 0d3b9dc..ca8ba2d 100644
--- a/gcc/config/arm/arm-d.h
+++ b/gcc/config/arm/arm-d.h
@@ -1,5 +1,5 @@
 /* Definitions for the D front end on the ARM architecture.
-   Copyright (C) 2022-2024 Free Software Foundation, Inc.
+   Copyright (C) 2022-2025 Free Software Foundation, Inc.
 
 GCC is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
diff --git a/gcc/config/arm/arm-fixed.md b/gcc/config/arm/arm-fixed.md
index bac1c0e..38cf753 100644
--- a/gcc/config/arm/arm-fixed.md
+++ b/gcc/config/arm/arm-fixed.md
@@ -1,4 +1,4 @@
-;; Copyright (C) 2011-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2011-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/arm/arm-flags.h b/gcc/config/arm/arm-flags.h
index 2b8c49d..8bcf289 100644
--- a/gcc/config/arm/arm-flags.h
+++ b/gcc/config/arm/arm-flags.h
@@ -1,6 +1,6 @@
 /* Flags used to identify the presence of processor capabilities.
 
-   Copyright (C) 2016-2024 Free Software Foundation, Inc.
+   Copyright (C) 2016-2025 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
diff --git a/gcc/config/arm/arm-generic.md b/gcc/config/arm/arm-generic.md
index 5ff5b33..a8af0e6 100644
--- a/gcc/config/arm/arm-generic.md
+++ b/gcc/config/arm/arm-generic.md
@@ -1,5 +1,5 @@
 ;; Generic ARM Pipeline Description
-;; Copyright (C) 2003-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2003-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
@@ -96,14 +96,14 @@
   (and (eq_attr "generic_sched" "yes")
        (and (eq_attr "ldsched" "yes") 
 	    (and (eq_attr "type" "load_byte,load_4")
-	         (eq_attr "tune" "xscale,iwmmxt,iwmmxt2"))))
+	         (eq_attr "tune" "xscale"))))
   "core")
 
 (define_insn_reservation "load_ldsched" 2
   (and (eq_attr "generic_sched" "yes")
        (and (eq_attr "ldsched" "yes") 
 	    (and (eq_attr "type" "load_byte,load_4")
-	         (eq_attr "tune" "!xscale,iwmmxt,iwmmxt2"))))
+	         (eq_attr "tune" "!xscale"))))
   "core")
 
 (define_insn_reservation "load_or_store" 2
diff --git a/gcc/config/arm/arm-ldmstm.ml b/gcc/config/arm/arm-ldmstm.ml
index b221d73..aa565f6 100644
--- a/gcc/config/arm/arm-ldmstm.ml
+++ b/gcc/config/arm/arm-ldmstm.ml
@@ -1,5 +1,5 @@
 (* Auto-generate ARM ldm/stm patterns
-   Copyright (C) 2010-2024 Free Software Foundation, Inc.
+   Copyright (C) 2010-2025 Free Software Foundation, Inc.
    Contributed by CodeSourcery.
 
    This file is part of GCC.
@@ -335,7 +335,7 @@ let _ =
 "/* ARM ldm/stm instruction patterns.  This file was automatically generated";
 "   using arm-ldmstm.ml.  Please do not edit manually.";
 "";
-"   Copyright (C) 2010-2024 Free Software Foundation, Inc.";
+"   Copyright (C) 2010-2025 Free Software Foundation, Inc.";
 "   Contributed by CodeSourcery.";
 "";
 "   This file is part of GCC.";
diff --git a/gcc/config/arm/arm-mlib.h b/gcc/config/arm/arm-mlib.h
index e3552d8..211f222 100644
--- a/gcc/config/arm/arm-mlib.h
+++ b/gcc/config/arm/arm-mlib.h
@@ -1,6 +1,6 @@
 /* Arm multilib default option include file.
 
-   Copyright (C) 2023-2024 Free Software Foundation, Inc.
+   Copyright (C) 2023-2025 Free Software Foundation, Inc.
    Contributed by Arm.
 
    This file is part of GCC.
diff --git a/gcc/config/arm/arm-modes.def b/gcc/config/arm/arm-modes.def
index 4d592ad..daa881a 100644
--- a/gcc/config/arm/arm-modes.def
+++ b/gcc/config/arm/arm-modes.def
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler, for ARM.
-   Copyright (C) 2002-2024 Free Software Foundation, Inc.
+   Copyright (C) 2002-2025 Free Software Foundation, Inc.
    Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
    and Martin Simmons (@harleqn.co.uk).
    More major hacks by Richard Earnshaw (rearnsha@arm.com)
@@ -105,3 +105,25 @@ INT_MODE (EI, 24);
 INT_MODE (OI, 32);
 INT_MODE (CI, 48);
 INT_MODE (XI, 64);
+
+/* Define MVE modes for structures of 2 and 4 q-registers.  */
+#define MVE_STRUCT_MODES(NVECS, VB, VH, VS, VD)		\
+  VECTOR_MODES_WITH_PREFIX (V##NVECS##x, INT, 16, 3);	\
+  VECTOR_MODES_WITH_PREFIX (V##NVECS##x, FLOAT, 16, 3);	\
+  \
+  ADJUST_NUNITS (VB##QI, NVECS * 16);	\
+  ADJUST_NUNITS (VH##HI, NVECS * 8);	\
+  ADJUST_NUNITS (VS##SI, NVECS * 4);	\
+  ADJUST_NUNITS (VD##DI, NVECS * 2);	\
+  ADJUST_NUNITS (VH##HF, NVECS * 8);	\
+  ADJUST_NUNITS (VS##SF, NVECS * 4);	\
+  \
+  ADJUST_ALIGNMENT (VB##QI, 16); \
+  ADJUST_ALIGNMENT (VH##HI, 16); \
+  ADJUST_ALIGNMENT (VS##SI, 16); \
+  ADJUST_ALIGNMENT (VD##DI, 16); \
+  ADJUST_ALIGNMENT (VH##HF, 16); \
+  ADJUST_ALIGNMENT (VS##SF, 16);
+
+MVE_STRUCT_MODES (2, V2x16, V2x8, V2x4, V2x2)
+MVE_STRUCT_MODES (4, V4x16, V4x8, V4x4, V4x2)
diff --git a/gcc/config/arm/arm-mve-builtins-base.cc b/gcc/config/arm/arm-mve-builtins-base.cc
index e0ae593..fd0be2c 100644
--- a/gcc/config/arm/arm-mve-builtins-base.cc
+++ b/gcc/config/arm/arm-mve-builtins-base.cc
@@ -1,5 +1,5 @@
 /* ACLE support for Arm MVE (__ARM_FEATURE_MVE intrinsics)
-   Copyright (C) 2023-2024 Free Software Foundation, Inc.
+   Copyright (C) 2023-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -30,6 +30,7 @@
 #include "basic-block.h"
 #include "function.h"
 #include "gimple.h"
+#include "emit-rtl.h"
 #include "arm-mve-builtins.h"
 #include "arm-mve-builtins-shapes.h"
 #include "arm-mve-builtins-base.h"
@@ -39,6 +40,59 @@ using namespace arm_mve;
 
 namespace {
 
+/* Implements vdup_* intrinsics.  */
+class vdupq_impl : public quiet<function_base>
+{
+public:
+  CONSTEXPR vdupq_impl (int unspec_for_m_n_sint,
+			int unspec_for_m_n_uint,
+			int unspec_for_m_n_fp)
+    : m_unspec_for_m_n_sint (unspec_for_m_n_sint),
+      m_unspec_for_m_n_uint (unspec_for_m_n_uint),
+      m_unspec_for_m_n_fp (unspec_for_m_n_fp)
+  {}
+  int m_unspec_for_m_n_sint;
+  int m_unspec_for_m_n_uint;
+  int m_unspec_for_m_n_fp;
+
+  rtx expand (function_expander &e) const override
+  {
+    gcc_assert (e.mode_suffix_id == MODE_n);
+
+    insn_code code;
+    machine_mode mode = e.vector_mode (0);
+
+    switch (e.pred)
+    {
+    case PRED_none:
+      /* No predicate, _n suffix.  */
+      code = code_for_mve_vdupq_n (mode);
+      return e.use_exact_insn (code);
+
+    case PRED_m:
+    case PRED_x:
+      /* "m" or "x" predicate, _n suffix.  */
+      if (e.type_suffix (0).integer_p)
+	if (e.type_suffix (0).unsigned_p)
+	  code = code_for_mve_q_m_n (m_unspec_for_m_n_uint,
+				     m_unspec_for_m_n_uint, mode);
+	else
+	  code = code_for_mve_q_m_n (m_unspec_for_m_n_sint,
+				     m_unspec_for_m_n_sint, mode);
+      else
+	code = code_for_mve_q_m_n_f (m_unspec_for_m_n_fp, mode);
+
+      if (e.pred == PRED_m)
+	return e.use_cond_insn (code, 0);
+      else
+	return e.use_pred_x_insn (code);
+
+    default:
+      gcc_unreachable ();
+    }
+  }
+};
+
 /* Implements vreinterpretq_* intrinsics.  */
 class vreinterpretq_impl : public quiet<function_base>
 {
@@ -96,16 +150,18 @@ public:
   expand (function_expander &e) const override
   {
     insn_code icode;
-    if (e.type_suffix (0).float_p)
-      icode = code_for_mve_vld1q_f(e.vector_mode (0));
-    else
+    switch (e.pred)
       {
-	if (e.type_suffix (0).unsigned_p)
-	  icode = code_for_mve_vld1q(VLD1Q_U,
-				     e.vector_mode (0));
-	else
-	  icode = code_for_mve_vld1q(VLD1Q_S,
-				     e.vector_mode (0));
+      case PRED_none:
+	icode = code_for_mve_vldrq (e.vector_mode (0));
+	break;
+
+      case PRED_z:
+	icode = code_for_mve_vldrq_z (e.vector_mode (0));
+	break;
+
+      default:
+	gcc_unreachable ();
       }
     return e.use_contiguous_load_insn (icode);
   }
@@ -124,21 +180,993 @@ public:
   expand (function_expander &e) const override
   {
     insn_code icode;
-    if (e.type_suffix (0).float_p)
-      icode = code_for_mve_vst1q_f(e.vector_mode (0));
+    switch (e.pred)
+      {
+      case PRED_none:
+	icode = code_for_mve_vstrq (e.vector_mode (0));
+	break;
+
+      case PRED_p:
+	icode = code_for_mve_vstrq_p (e.vector_mode (0));
+	break;
+
+      default:
+	gcc_unreachable ();
+      }
+    return e.use_contiguous_store_insn (icode);
+  }
+};
+
+/* Builds the vstrq* intrinsics.  */
+class vstrq_impl : public store_truncating
+{
+public:
+  using store_truncating::store_truncating;
+
+  rtx expand (function_expander &e) const override
+  {
+    insn_code icode;
+    switch (e.pred)
+      {
+      case PRED_none:
+	if (e.vector_mode (0) == e.memory_vector_mode ())
+	  /* Non-truncating store case.  */
+	  icode = code_for_mve_vstrq (e.vector_mode (0));
+	else
+	  /* Truncating store case.
+	     (there is only one possible truncation for each memory mode so only
+	     one mode argument is needed).  */
+	  icode = code_for_mve_vstrq_truncate (e.memory_vector_mode ());
+	break;
+
+      case PRED_p:
+	if (e.vector_mode (0) == e.memory_vector_mode ())
+	  icode = code_for_mve_vstrq_p (e.vector_mode (0));
+	else
+	  icode = code_for_mve_vstrq_p_truncate (e.memory_vector_mode ());
+	break;
+
+      default:
+	gcc_unreachable ();
+      }
+
+    return e.use_contiguous_store_insn (icode);
+  }
+};
+
+  /* Builds the vstrq_scatter*offset intrinsics.  */
+class vstrq_scatter_impl : public store_truncating
+{
+public:
+  using store_truncating::store_truncating;
+
+  CONSTEXPR vstrq_scatter_impl (bool shifted,
+				scalar_mode to_int_mode,
+				opt_scalar_mode to_float_mode)
+    : store_truncating (to_int_mode, to_float_mode),
+      m_shifted (shifted)
+  {}
+
+  /* Shifted offset (true) or plain offset (false).  */
+  bool m_shifted;
+
+  rtx expand (function_expander &e) const override
+  {
+    insn_code icode;
+    machine_mode memory_mode = e.memory_vector_mode ();
+
+    switch (e.pred)
+      {
+      case PRED_none:
+	icode = (e.vector_mode (0) == memory_mode
+		 /* Non-truncating store case.  */
+		 ? (m_shifted
+		    ? code_for_mve_vstrq_scatter_shifted_offset (memory_mode)
+		    : code_for_mve_vstrq_scatter_offset (memory_mode))
+		 /* Truncating store case.  */
+		 : (m_shifted
+		    ? CODE_FOR_mve_vstrq_truncate_scatter_shifted_offset_v4si
+		    : code_for_mve_vstrq_truncate_scatter_offset (memory_mode)));
+	break;
+
+      case PRED_p:
+	icode = (e.vector_mode (0) == memory_mode
+		 ? (m_shifted
+		    ? code_for_mve_vstrq_scatter_shifted_offset_p (memory_mode)
+		    : code_for_mve_vstrq_scatter_offset_p (memory_mode))
+		 : (m_shifted
+		    ? CODE_FOR_mve_vstrq_truncate_scatter_shifted_offset_p_v4si
+		    : code_for_mve_vstrq_truncate_scatter_offset_p (memory_mode)));
+	break;
+
+      default:
+	gcc_unreachable ();
+      }
+    return e.use_exact_insn (icode);
+  }
+};
+
+  /* Builds the vstrq_scatter_base intrinsics.  */
+class vstrq_scatter_base_impl : public function_base
+{
+public:
+  CONSTEXPR vstrq_scatter_base_impl (scalar_mode to_int_mode)
+    : m_to_int_mode (to_int_mode)
+  {}
+
+  unsigned int call_properties (const function_instance &) const override
+  {
+    return CP_WRITE_MEMORY;
+  }
+
+  machine_mode memory_vector_mode (const function_instance &fi) const override
+  {
+    poly_uint64 nunits = GET_MODE_NUNITS (fi.vector_mode (0));
+    return arm_mve_data_mode (m_to_int_mode, nunits).require ();
+  }
+
+  rtx expand (function_expander &e) const override
+  {
+    insn_code icode;
+    rtx insns, base_ptr, new_base = NULL_RTX;
+    machine_mode base_mode;
+
+    if ((e.mode_suffix_id != MODE_none)
+	&& (e.mode_suffix_id != MODE_wb))
+      gcc_unreachable ();
+
+    /* In _wb mode, the start offset is passed via a pointer,
+       dereference it.  */
+    if (e.mode_suffix_id == MODE_wb)
+      {
+	base_mode = e.memory_vector_mode ();
+	rtx base = gen_reg_rtx (base_mode);
+	base_ptr = e.args[0];
+	emit_insn (gen_rtx_SET (base, gen_rtx_MEM (base_mode, base_ptr)));
+	e.args[0] = base;
+	new_base = gen_reg_rtx (base_mode);
+	e.args.quick_insert (0, new_base);
+      }
+
+    switch (e.pred)
+      {
+      case PRED_none:
+	icode = (e.mode_suffix_id == MODE_none)
+	  ? code_for_mve_vstrq_scatter_base (e.vector_mode (0))
+	  : code_for_mve_vstrq_scatter_base_wb (e.vector_mode (0));
+	break;
+
+      case PRED_p:
+	icode = (e.mode_suffix_id == MODE_none)
+	  ? code_for_mve_vstrq_scatter_base_p (e.vector_mode (0))
+	  : code_for_mve_vstrq_scatter_base_wb_p (e.vector_mode (0));
+	break;
+
+      default:
+	gcc_unreachable ();
+      }
+    insns = e.use_exact_insn (icode);
+
+    /* Update offset as appropriate.  */
+    if (e.mode_suffix_id == MODE_wb)
+      emit_insn (gen_rtx_SET (gen_rtx_MEM (base_mode, base_ptr), new_base));
+
+    return insns;
+  }
+
+  /* The mode of a single memory element.  */
+  scalar_mode m_to_int_mode;
+};
+
+/* Builds the vldrq* intrinsics.  */
+class vldrq_impl : public load_extending
+{
+public:
+  using load_extending::load_extending;
+
+  rtx expand (function_expander &e) const override
+  {
+    insn_code icode;
+    switch (e.pred)
+      {
+      case PRED_none:
+	if (e.vector_mode (0) == e.memory_vector_mode ())
+	  /* Non-extending load case.  */
+	  icode = code_for_mve_vldrq (e.vector_mode (0));
+	else
+	  /* Extending load case.
+	     (there is only one extension for each memory mode so only one type
+	     argument is needed).  */
+	  icode = code_for_mve_vldrq_extend (e.memory_vector_mode (),
+					     e.type_suffix (0).unsigned_p
+					     ? ZERO_EXTEND
+					     : SIGN_EXTEND);
+	break;
+
+      case PRED_z:
+	if (e.vector_mode (0) == e.memory_vector_mode ())
+	  icode = code_for_mve_vldrq_z (e.vector_mode (0));
+	else
+	  icode = code_for_mve_vldrq_z_extend (e.memory_vector_mode (),
+					       e.type_suffix (0).unsigned_p
+					       ? ZERO_EXTEND
+					       : SIGN_EXTEND);
+	break;
+
+      default:
+	gcc_unreachable ();
+      }
+
+    return e.use_contiguous_load_insn (icode);
+  }
+};
+
+/* Builds the vldrq_gather*offset intrinsics.  */
+class vldrq_gather_impl : public load_extending
+{
+public:
+  using load_extending::load_extending;
+
+  CONSTEXPR vldrq_gather_impl (bool shifted,
+			       type_suffix_index signed_memory_type,
+			       type_suffix_index unsigned_memory_type,
+			       type_suffix_index float_memory_type)
+    : load_extending (signed_memory_type, unsigned_memory_type, float_memory_type),
+      m_shifted (shifted)
+  {}
+
+  CONSTEXPR vldrq_gather_impl (bool shifted,
+			       type_suffix_index signed_memory_type,
+			       type_suffix_index unsigned_memory_type)
+    : load_extending (signed_memory_type, unsigned_memory_type, NUM_TYPE_SUFFIXES),
+      m_shifted (shifted)
+  {}
+
+  /* Shifted offset (true) or plain offset (false).  */
+  bool m_shifted;
+
+  rtx expand (function_expander &e) const override
+  {
+    insn_code icode;
+    machine_mode memory_mode = e.memory_vector_mode ();
+    rtx_code extend = (e.type_suffix (0).unsigned_p
+		       ? ZERO_EXTEND
+		       : SIGN_EXTEND);
+
+    switch (e.pred)
+      {
+      case PRED_none:
+	icode = (e.vector_mode (0) == memory_mode
+		 /* Non-extending load case.  */
+		 ? (m_shifted
+		    ? code_for_mve_vldrq_gather_shifted_offset (memory_mode)
+		    : code_for_mve_vldrq_gather_offset (memory_mode))
+		 /* Extending load case.  */
+		 : (m_shifted
+		    ? code_for_mve_vldrq_gather_shifted_offset_extend_v4si (extend)
+		    : code_for_mve_vldrq_gather_offset_extend (memory_mode,
+							       extend)));
+	break;
+
+      case PRED_z:
+	icode = (e.vector_mode (0) == memory_mode
+		 ? (m_shifted
+		    ? code_for_mve_vldrq_gather_shifted_offset_z (memory_mode)
+		    : code_for_mve_vldrq_gather_offset_z (memory_mode))
+		 : (m_shifted
+		    ? code_for_mve_vldrq_gather_shifted_offset_z_extend_v4si (extend)
+		    : code_for_mve_vldrq_gather_offset_z_extend (memory_mode,
+								 extend)));
+	break;
+
+      default:
+	gcc_unreachable ();
+      }
+
+    return e.use_exact_insn (icode);
+  }
+};
+
+  /* Builds the vldrq_gather_base intrinsics.  */
+class vldrq_gather_base_impl : public load_extending
+{
+public:
+  using load_extending::load_extending;
+
+  machine_mode memory_vector_mode (const function_instance &fi) const override
+  {
+    unsigned int element_bits = fi.type_suffix (0).element_bits;
+    type_suffix_index suffix = find_type_suffix (TYPE_unsigned, element_bits);
+    return type_suffixes[suffix].vector_mode;
+  }
+
+  rtx expand (function_expander &e) const override
+  {
+    insn_code icode;
+    rtx insns, base_ptr, new_base = NULL_RTX;
+    machine_mode base_mode;
+
+    if ((e.mode_suffix_id != MODE_none)
+	&& (e.mode_suffix_id != MODE_wb))
+      gcc_unreachable ();
+
+    /* In _wb mode, the start offset is passed via a pointer,
+       dereference it.  */
+    if (e.mode_suffix_id == MODE_wb)
+      {
+	base_mode = e.memory_vector_mode ();
+	rtx base = gen_reg_rtx (base_mode);
+	base_ptr = e.args[0];
+	emit_insn (gen_rtx_SET (base, gen_rtx_MEM (base_mode, base_ptr)));
+	e.args[0] = base;
+	new_base = gen_reg_rtx (base_mode);
+	e.args.quick_insert (0, new_base);
+      }
+
+    switch (e.pred)
+      {
+      case PRED_none:
+	icode = (e.mode_suffix_id == MODE_none)
+	  ? code_for_mve_vldrq_gather_base (e.vector_mode (0))
+	  : code_for_mve_vldrq_gather_base_wb (e.vector_mode (0));
+	break;
+
+      case PRED_z:
+	icode = (e.mode_suffix_id == MODE_none)
+	  ? code_for_mve_vldrq_gather_base_z (e.vector_mode (0))
+	  : code_for_mve_vldrq_gather_base_wb_z (e.vector_mode (0));
+	break;
+
+      default:
+	gcc_unreachable ();
+      }
+    insns = e.use_exact_insn (icode);
+
+    /* Update offset as appropriate.  */
+    if (e.mode_suffix_id == MODE_wb)
+      emit_insn (gen_rtx_SET (gen_rtx_MEM (base_mode, base_ptr), new_base));
+
+    return insns;
+  }
+};
+
+  /* Implements vctp8q, vctp16q, vctp32q and vctp64q intrinsics.  */
+class vctpq_impl : public function_base
+{
+public:
+  CONSTEXPR vctpq_impl (machine_mode mode)
+    : m_mode (mode)
+  {}
+
+  /* Mode this intrinsic operates on.  */
+  machine_mode m_mode;
+
+  rtx
+  expand (function_expander &e) const override
+  {
+    insn_code code;
+    rtx target;
+
+    if (e.mode_suffix_id != MODE_none)
+      gcc_unreachable ();
+
+    switch (e.pred)
+      {
+      case PRED_none:
+	/* No predicate, no suffix.  */
+	code = code_for_mve_vctpq (m_mode, m_mode);
+	target = e.use_exact_insn (code);
+	break;
+
+      case PRED_m:
+	/* No suffix, "m" predicate.  */
+	code = code_for_mve_vctpq_m (m_mode, m_mode);
+	target = e.use_cond_insn (code, 0);
+	break;
+
+      default:
+	gcc_unreachable ();
+      }
+
+    rtx HItarget = gen_reg_rtx (HImode);
+    emit_move_insn (HItarget, gen_lowpart (HImode, target));
+    return HItarget;
+  }
+};
+
+  /* Implements vcvtq intrinsics.  */
+class vcvtq_impl : public function_base
+{
+public:
+  rtx
+  expand (function_expander &e) const override
+  {
+    insn_code code;
+    machine_mode target_mode = e.vector_mode (0);
+    int unspec;
+    switch (e.pred)
+      {
+      case PRED_none:
+	switch (e.mode_suffix_id)
+	  {
+	  case MODE_none:
+	    /* No predicate, no suffix.  */
+	    if (e.type_suffix (0).integer_p)
+	      {
+		unspec = (e.type_suffix (0).unsigned_p
+			  ? VCVTQ_FROM_F_U
+			  : VCVTQ_FROM_F_S);
+		code = code_for_mve_q_from_f (unspec, unspec, target_mode);
+	      }
+	    else
+	      {
+		unspec = (e.type_suffix (1).unsigned_p
+			  ? VCVTQ_TO_F_U
+			  : VCVTQ_TO_F_S);
+		code = code_for_mve_q_to_f (unspec, unspec, target_mode);
+	      }
+	    break;
+
+	  case MODE_n:
+	    /* No predicate, _n suffix.  */
+	    if (e.type_suffix (0).integer_p)
+	      {
+		unspec = (e.type_suffix (0).unsigned_p
+			  ? VCVTQ_N_FROM_F_U
+			  : VCVTQ_N_FROM_F_S);
+		code = code_for_mve_q_n_from_f (unspec, unspec, target_mode);
+	      }
+	    else
+	      {
+		unspec = (e.type_suffix (1).unsigned_p
+			  ? VCVTQ_N_TO_F_U
+			  : VCVTQ_N_TO_F_S);
+		code = code_for_mve_q_n_to_f (unspec, unspec, target_mode);
+	      }
+	    break;
+
+	  default:
+	    gcc_unreachable ();
+	  }
+	return e.use_exact_insn (code);
+
+      case PRED_m:
+      case PRED_x:
+	switch (e.mode_suffix_id)
+	  {
+	  case MODE_none:
+	    /* No suffix, "m" or "x" predicate.  */
+	    if (e.type_suffix (0).integer_p)
+	      {
+		unspec = (e.type_suffix (0).unsigned_p
+			  ? VCVTQ_M_FROM_F_U
+			  : VCVTQ_M_FROM_F_S);
+		code = code_for_mve_q_m_from_f (unspec, unspec, target_mode);
+	      }
+	    else
+	      {
+		unspec = (e.type_suffix (1).unsigned_p
+			  ? VCVTQ_M_TO_F_U
+			  : VCVTQ_M_TO_F_S);
+		code = code_for_mve_q_m_to_f (unspec, unspec, target_mode);
+	      }
+	    break;
+
+	  case MODE_n:
+	    /* _n suffix, "m" or "x" predicate.  */
+	    if (e.type_suffix (0).integer_p)
+	      {
+		unspec = (e.type_suffix (0).unsigned_p
+			  ? VCVTQ_M_N_FROM_F_U
+			  : VCVTQ_M_N_FROM_F_S);
+		code = code_for_mve_q_m_n_from_f (unspec, unspec, target_mode);
+	      }
+	    else
+	      {
+		unspec = (e.type_suffix (1).unsigned_p
+			  ? VCVTQ_M_N_TO_F_U
+			  : VCVTQ_M_N_TO_F_S);
+		code = code_for_mve_q_m_n_to_f (unspec, unspec, target_mode);
+	      }
+	    break;
+
+	  default:
+	    gcc_unreachable ();
+	  }
+	if (e.pred == PRED_m)
+	  return e.use_cond_insn (code, 0);
+	else
+	  return e.use_pred_x_insn (code);
+
+      default:
+	gcc_unreachable ();
+      }
+
+    gcc_unreachable ();
+  }
+};
+
+  /* Implements vcvt[bt]q_f32_f16 and vcvt[bt]q_f16_f32
+     intrinsics.  */
+class vcvtxq_impl : public function_base
+{
+public:
+  CONSTEXPR vcvtxq_impl (int unspec_f16_f32, int unspec_for_m_f16_f32,
+			 int unspec_f32_f16, int unspec_for_m_f32_f16)
+    : m_unspec_f16_f32 (unspec_f16_f32),
+      m_unspec_for_m_f16_f32 (unspec_for_m_f16_f32),
+      m_unspec_f32_f16 (unspec_f32_f16),
+      m_unspec_for_m_f32_f16 (unspec_for_m_f32_f16)
+  {}
+
+  /* The unspec code associated with vcvt[bt]q.  */
+  int m_unspec_f16_f32;
+  int m_unspec_for_m_f16_f32;
+  int m_unspec_f32_f16;
+  int m_unspec_for_m_f32_f16;
+
+  rtx
+  expand (function_expander &e) const override
+  {
+    insn_code code;
+    switch (e.pred)
+      {
+      case PRED_none:
+	/* No predicate.  */
+	if (e.type_suffix (0).element_bits == 16)
+	  code = code_for_mve_q_f16_f32v8hf (m_unspec_f16_f32);
+	else
+	  code = code_for_mve_q_f32_f16v4sf (m_unspec_f32_f16);
+	return e.use_exact_insn (code);
+
+      case PRED_m:
+      case PRED_x:
+	/* "m" or "x" predicate.  */
+	if (e.type_suffix (0).element_bits == 16)
+	  code = code_for_mve_q_m_f16_f32v8hf (m_unspec_for_m_f16_f32);
+	else
+	  code = code_for_mve_q_m_f32_f16v4sf (m_unspec_for_m_f32_f16);
+
+	if (e.pred == PRED_m)
+	  return e.use_cond_insn (code, 0);
+	else
+	  return e.use_pred_x_insn (code);
+
+      default:
+	gcc_unreachable ();
+      }
+
+    gcc_unreachable ();
+  }
+};
+
+/* Map the vidup / vddup function directly to CODE (UNSPEC, M) where M is the
+   vector mode associated with type suffix 0.  We need this special case
+   because in MODE_wb the builtins derefrence the first parameter and update
+   its contents.  We also have to insert the two additional parameters needed
+   by the builtins compared to the intrinsics.  In wrapping mode, we have to
+   match the 'hack' to make sure the 'wrap' parameters is in odd register.  */
+class viddup_impl : public function_base
+{
+public:
+  CONSTEXPR viddup_impl (bool inc_dec, bool wrap)
+    : m_inc_dec (inc_dec), m_wrap (wrap)
+  {}
+
+  /* Increment (true) or decrement (false).  */
+  bool m_inc_dec;
+  /* v[id]wdup (true) or v[id]dup (false).  */
+  bool m_wrap;
+
+  unsigned int
+  call_properties (const function_instance &fi) const override
+  {
+    if (fi.mode_suffix_id == MODE_wb)
+      return CP_WRITE_MEMORY | CP_READ_MEMORY;
     else
+      return 0;
+  }
+
+  tree
+  memory_scalar_type (const function_instance &) const override
+  {
+    return get_typenode_from_name (UINT32_TYPE);
+  }
+
+  rtx
+  expand (function_expander &e) const override
+  {
+    machine_mode mode = e.vector_mode (0);
+    insn_code code;
+    rtx insns, offset_ptr;
+    rtx new_offset;
+    int offset_arg_no;
+
+    if (! e.type_suffix (0).integer_p)
+      gcc_unreachable ();
+
+    if ((e.mode_suffix_id != MODE_n)
+	&& (e.mode_suffix_id != MODE_wb))
+      gcc_unreachable ();
+
+    offset_arg_no = (e.pred == PRED_m) ? 1 : 0;
+
+    /* In _wb mode, the start offset is passed via a pointer,
+       dereference it.  */
+    if (e.mode_suffix_id == MODE_wb)
       {
-	if (e.type_suffix (0).unsigned_p)
-	  icode = code_for_mve_vst1q(VST1Q_U,
-				     e.vector_mode (0));
+	rtx offset = gen_reg_rtx (SImode);
+	offset_ptr = e.args[offset_arg_no];
+	emit_insn (gen_rtx_SET (offset, gen_rtx_MEM (SImode, offset_ptr)));
+	e.args[offset_arg_no] = offset;
+      }
+
+    /* We have to shuffle parameters because the builtin needs additional
+       arguments:
+       - the updated "new_offset"
+       - total increment (incr * number of lanes) in the non-wrapping case
+       - hack to pass wrap in the top end of DImode operand so that it is
+         actually in a odd register  */
+    new_offset = gen_reg_rtx (SImode);
+    e.args.quick_insert (offset_arg_no, new_offset);
+
+    if (m_wrap)
+      {
+	rtx wrap = gen_reg_rtx (DImode);
+	emit_insn (gen_rtx_SET (gen_rtx_SUBREG (SImode, wrap, 4),
+				e.args[offset_arg_no + 2]));
+	emit_insn (gen_rtx_SET (gen_rtx_SUBREG (SImode, wrap, 0),
+				GEN_INT (0)));
+	e.args[offset_arg_no + 2] = wrap;
+      }
+    else
+      {
+	rtx incr = e.args[offset_arg_no + 2];
+	rtx total_incr = gen_int_mode (INTVAL (incr)
+				       * GET_MODE_NUNITS (e.vector_mode (0)),
+				       SImode);
+	e.args.quick_push (total_incr);
+      }
+
+    /* _wb mode uses the _n builtins and adds code to update the
+       offset.  */
+    switch (e.pred)
+      {
+      case PRED_none:
+	/* No predicate.  */
+	code = m_wrap
+	  ? (m_inc_dec
+	     ? code_for_mve_q_wb_u_insn (VIWDUPQ, mode)
+	     : code_for_mve_q_wb_u_insn (VDWDUPQ, mode))
+	  : (m_inc_dec
+	     ? code_for_mve_q_u_insn (VIDUPQ, mode)
+	     : code_for_mve_q_u_insn (VDDUPQ, mode));
+	insns = e.use_exact_insn (code);
+	break;
+
+      case PRED_m:
+      case PRED_x:
+	/* "m" or "x" predicate.  */
+	code = m_wrap
+	  ? (m_inc_dec
+	     ? code_for_mve_q_m_wb_u_insn (VIWDUPQ_M, mode)
+	     : code_for_mve_q_m_wb_u_insn (VDWDUPQ_M, mode))
+	  : (m_inc_dec
+	     ? code_for_mve_q_m_wb_u_insn (VIDUPQ_M, mode)
+	     : code_for_mve_q_m_wb_u_insn (VDDUPQ_M, mode));
+
+	if (e.pred == PRED_m)
+	  insns = e.use_cond_insn (code, 0);
 	else
-	  icode = code_for_mve_vst1q(VST1Q_S,
-				     e.vector_mode (0));
+	  insns = e.use_pred_x_insn (code);
+	break;
+
+      default:
+	gcc_unreachable ();
+      }
+
+    /* Update offset as appropriate.  */
+    if (e.mode_suffix_id == MODE_wb)
+      emit_insn (gen_rtx_SET (gen_rtx_MEM (Pmode, offset_ptr), new_offset));
+
+    return insns;
+  }
+};
+
+/* Map the vshlc function directly to CODE (UNSPEC, M) where M is the vector
+   mode associated with type suffix 0.  We need this special case because the
+   intrinsics derefrence the second parameter and update its contents.  */
+class vshlc_impl : public function_base
+{
+public:
+  unsigned int
+  call_properties (const function_instance &) const override
+  {
+    return CP_WRITE_MEMORY | CP_READ_MEMORY;
+  }
+
+  tree
+  memory_scalar_type (const function_instance &) const override
+  {
+    return get_typenode_from_name (UINT32_TYPE);
+  }
+
+  rtx
+  expand (function_expander &e) const override
+  {
+    machine_mode mode = e.vector_mode (0);
+    insn_code code;
+    rtx insns, carry_ptr, carry, new_carry;
+    int carry_arg_no;
+
+    if (! e.type_suffix (0).integer_p)
+      gcc_unreachable ();
+
+    if (e.mode_suffix_id != MODE_none)
+      gcc_unreachable ();
+
+    carry_arg_no = 1;
+
+    carry = gen_reg_rtx (SImode);
+    carry_ptr = e.args[carry_arg_no];
+    emit_insn (gen_rtx_SET (carry, gen_rtx_MEM (SImode, carry_ptr)));
+    e.args[carry_arg_no] = carry;
+
+    new_carry = gen_reg_rtx (SImode);
+    e.args.quick_insert (0, new_carry);
+
+    switch (e.pred)
+      {
+      case PRED_none:
+	/* No predicate.  */
+	code = e.type_suffix (0).unsigned_p
+	  ? code_for_mve_vshlcq (VSHLCQ_U, mode)
+	  : code_for_mve_vshlcq (VSHLCQ_S, mode);
+	insns = e.use_exact_insn (code);
+	break;
+
+      case PRED_m:
+	/* "m" predicate.  */
+	code = e.type_suffix (0).unsigned_p
+	  ? code_for_mve_vshlcq_m (VSHLCQ_M_U, mode)
+	  : code_for_mve_vshlcq_m (VSHLCQ_M_S, mode);
+	insns = e.use_cond_insn (code, 0);
+	break;
+
+      default:
+	gcc_unreachable ();
+      }
+
+    /* Update carry.  */
+    emit_insn (gen_rtx_SET (gen_rtx_MEM (Pmode, carry_ptr), new_carry));
+
+    return insns;
+  }
+};
+
+/* Map the vadc and similar functions directly to CODE (UNSPEC, UNSPEC).  Take
+   care of the implicit carry argument.  */
+class vadc_vsbc_impl : public function_base
+{
+public:
+  CONSTEXPR vadc_vsbc_impl (bool init_carry, bool add)
+    : m_init_carry (init_carry), m_add (add)
+  {}
+
+  /* Initialize carry with 0 (vadci).  */
+  bool m_init_carry;
+  /* Add (true) or Sub (false).  */
+  bool m_add;
+
+  unsigned int
+  call_properties (const function_instance &) const override
+  {
+    unsigned int flags = CP_WRITE_MEMORY | CP_READ_FPCR;
+    if (!m_init_carry)
+      flags |= CP_READ_MEMORY;
+    return flags;
+  }
+
+  tree
+  memory_scalar_type (const function_instance &) const override
+  {
+    /* carry is "unsigned int".  */
+    return get_typenode_from_name ("unsigned int");
+  }
+
+  rtx
+  expand (function_expander &e) const override
+  {
+    insn_code code;
+    rtx insns, carry_ptr, carry_out;
+    int carry_out_arg_no;
+    int unspec;
+
+    if (! e.type_suffix (0).integer_p)
+      gcc_unreachable ();
+
+    if (e.mode_suffix_id != MODE_none)
+      gcc_unreachable ();
+
+    /* Remove carry from arguments, it is implicit for the builtin.  */
+    switch (e.pred)
+      {
+      case PRED_none:
+	carry_out_arg_no = 2;
+	break;
+
+      case PRED_m:
+	carry_out_arg_no = 3;
+	break;
+
+      default:
+	gcc_unreachable ();
+      }
+
+    carry_ptr = e.args[carry_out_arg_no];
+    e.args.ordered_remove (carry_out_arg_no);
+
+    if (!m_init_carry)
+      {
+	/* Prepare carry in:
+	   set_fpscr ( (fpscr & ~0x20000000u)
+		       | ((*carry & 1u) << 29) )  */
+	rtx carry_in = gen_reg_rtx (SImode);
+	rtx fpscr = gen_reg_rtx (SImode);
+	emit_insn (gen_get_fpscr_nzcvqc (fpscr));
+	emit_insn (gen_rtx_SET (carry_in, gen_rtx_MEM (SImode, carry_ptr)));
+
+	emit_insn (gen_rtx_SET (carry_in,
+				gen_rtx_ASHIFT (SImode,
+						carry_in,
+						GEN_INT (29))));
+	emit_insn (gen_rtx_SET (carry_in,
+				gen_rtx_AND (SImode,
+					     carry_in,
+					     GEN_INT (0x20000000))));
+	emit_insn (gen_rtx_SET (fpscr,
+				gen_rtx_AND (SImode,
+					     fpscr,
+					     GEN_INT (~0x20000000))));
+	emit_insn (gen_rtx_SET (carry_in,
+				gen_rtx_IOR (SImode,
+					     carry_in,
+					     fpscr)));
+	emit_insn (gen_set_fpscr_nzcvqc (carry_in));
+      }
+
+    switch (e.pred)
+      {
+      case PRED_none:
+	/* No predicate.  */
+	unspec = m_add
+	  ? (m_init_carry
+	     ? (e.type_suffix (0).unsigned_p
+		? VADCIQ_U
+		: VADCIQ_S)
+	     : (e.type_suffix (0).unsigned_p
+		? VADCQ_U
+		: VADCQ_S))
+	  : (m_init_carry
+	     ? (e.type_suffix (0).unsigned_p
+		? VSBCIQ_U
+		: VSBCIQ_S)
+	     : (e.type_suffix (0).unsigned_p
+		? VSBCQ_U
+		: VSBCQ_S));
+	code = code_for_mve_q_v4si (unspec, unspec);
+	insns = e.use_exact_insn (code);
+	break;
+
+      case PRED_m:
+	/* "m" predicate.  */
+	unspec = m_add
+	  ? (m_init_carry
+	     ? (e.type_suffix (0).unsigned_p
+		? VADCIQ_M_U
+		: VADCIQ_M_S)
+	     : (e.type_suffix (0).unsigned_p
+		? VADCQ_M_U
+		: VADCQ_M_S))
+	  : (m_init_carry
+	     ? (e.type_suffix (0).unsigned_p
+		? VSBCIQ_M_U
+		: VSBCIQ_M_S)
+	     : (e.type_suffix (0).unsigned_p
+		? VSBCQ_M_U
+		: VSBCQ_M_S));
+	code = code_for_mve_q_m_v4si (unspec, unspec);
+	insns = e.use_cond_insn (code, 0);
+	break;
+
+      default:
+	gcc_unreachable ();
+      }
+
+    /* Update carry_out.  */
+    carry_out = gen_reg_rtx (SImode);
+    emit_insn (gen_get_fpscr_nzcvqc (carry_out));
+    emit_insn (gen_rtx_SET (carry_out,
+			    gen_rtx_LSHIFTRT (SImode,
+					      carry_out,
+					      GEN_INT (29))));
+    emit_insn (gen_rtx_SET (carry_out,
+			    gen_rtx_AND (SImode,
+					 carry_out,
+					 GEN_INT (1))));
+    emit_insn (gen_rtx_SET (gen_rtx_MEM (Pmode, carry_ptr), carry_out));
+
+    return insns;
+  }
+};
+
+
+/* Implements vst2 and vst4.  */
+class vst24_impl : public full_width_access
+{
+public:
+  using full_width_access::full_width_access;
+
+  unsigned int
+  call_properties (const function_instance &) const override
+  {
+    return CP_WRITE_MEMORY;
+  }
+
+  rtx
+  expand (function_expander &e) const override
+  {
+    insn_code icode;
+    switch (vectors_per_tuple ())
+      {
+      case 2:
+	icode = code_for_mve_vst2q (e.vector_mode (0));
+	break;
+
+      case 4:
+	icode = code_for_mve_vst4q (e.vector_mode (0));
+	break;
+
+      default:
+	gcc_unreachable ();
       }
     return e.use_contiguous_store_insn (icode);
   }
 };
 
+/* Implements vld2 and vld4.  */
+class vld24_impl : public full_width_access
+{
+public:
+  using full_width_access::full_width_access;
+
+  unsigned int
+  call_properties (const function_instance &) const override
+  {
+    return CP_READ_MEMORY;
+  }
+
+  rtx
+  expand (function_expander &e) const override
+  {
+    insn_code icode;
+    switch (vectors_per_tuple ())
+      {
+      case 2:
+	icode = code_for_mve_vld2q (e.vector_mode (0));
+	break;
+
+      case 4:
+	icode = code_for_mve_vld4q (e.vector_mode (0));
+	break;
+
+      default:
+	gcc_unreachable ();
+      }
+    return e.use_contiguous_load_insn (icode);
+  }
+};
+
 } /* end anonymous namespace */
 
 namespace arm_mve {
@@ -309,12 +1337,15 @@ namespace arm_mve {
 FUNCTION_PRED_P_S_U (vabavq, VABAVQ)
 FUNCTION_WITHOUT_N (vabdq, VABDQ)
 FUNCTION (vabsq, unspec_based_mve_function_exact_insn, (ABS, ABS, ABS, -1, -1, -1, VABSQ_M_S, -1, VABSQ_M_F, -1, -1, -1))
+FUNCTION (vadciq, vadc_vsbc_impl, (true, true))
+FUNCTION (vadcq, vadc_vsbc_impl, (false, true))
 FUNCTION_WITH_RTX_M_N (vaddq, PLUS, VADDQ)
 FUNCTION_PRED_P_S_U (vaddlvaq, VADDLVAQ)
 FUNCTION_PRED_P_S_U (vaddlvq, VADDLVQ)
 FUNCTION_PRED_P_S_U (vaddvq, VADDVQ)
 FUNCTION_PRED_P_S_U (vaddvaq, VADDVAQ)
 FUNCTION_WITH_RTX_M (vandq, AND, VANDQ)
+FUNCTION (vbicq, unspec_based_mve_function_exact_insn_vbic, (VBICQ_N_S, VBICQ_N_U, VBICQ_M_S, VBICQ_M_U, VBICQ_M_F, VBICQ_M_N_S, VBICQ_M_N_U))
 FUNCTION_ONLY_N (vbrsrq, VBRSRQ)
 FUNCTION (vcaddq_rot90, unspec_mve_function_exact_insn_rot, (UNSPEC_VCADD90, UNSPEC_VCADD90, UNSPEC_VCADD90, VCADDQ_ROT90_M, VCADDQ_ROT90_M, VCADDQ_ROT90_M_F))
 FUNCTION (vcaddq_rot270, unspec_mve_function_exact_insn_rot, (UNSPEC_VCADD270, UNSPEC_VCADD270, UNSPEC_VCADD270, VCADDQ_ROT270_M, VCADDQ_ROT270_M, VCADDQ_ROT270_M_F))
@@ -339,7 +1370,22 @@ FUNCTION (vcmpltq, unspec_based_mve_function_exact_insn_vcmp, (LT, UNKNOWN, LT,
 FUNCTION (vcmpcsq, unspec_based_mve_function_exact_insn_vcmp, (UNKNOWN, GEU, UNKNOWN, UNKNOWN, VCMPCSQ_M_U, UNKNOWN, UNKNOWN, VCMPCSQ_M_N_U, UNKNOWN))
 FUNCTION (vcmphiq, unspec_based_mve_function_exact_insn_vcmp, (UNKNOWN, GTU, UNKNOWN, UNKNOWN, VCMPHIQ_M_U, UNKNOWN, UNKNOWN, VCMPHIQ_M_N_U, UNKNOWN))
 FUNCTION_WITHOUT_M_N (vcreateq, VCREATEQ)
-FUNCTION_ONLY_N (vdupq, VDUPQ)
+FUNCTION (vctp8q, vctpq_impl, (V16BImode))
+FUNCTION (vctp16q, vctpq_impl, (V8BImode))
+FUNCTION (vctp32q, vctpq_impl, (V4BImode))
+FUNCTION (vctp64q, vctpq_impl, (V2QImode))
+FUNCTION_WITHOUT_N_NO_F (vcvtaq, VCVTAQ)
+FUNCTION (vcvtbq, vcvtxq_impl, (VCVTBQ_F16_F32, VCVTBQ_M_F16_F32, VCVTBQ_F32_F16, VCVTBQ_M_F32_F16))
+FUNCTION (vcvtq, vcvtq_impl,)
+FUNCTION_WITHOUT_N_NO_F (vcvtmq, VCVTMQ)
+FUNCTION_WITHOUT_N_NO_F (vcvtnq, VCVTNQ)
+FUNCTION_WITHOUT_N_NO_F (vcvtpq, VCVTPQ)
+FUNCTION (vcvttq, vcvtxq_impl, (VCVTTQ_F16_F32, VCVTTQ_M_F16_F32, VCVTTQ_F32_F16, VCVTTQ_M_F32_F16))
+FUNCTION (vddupq, viddup_impl, (false, false))
+FUNCTION (vdupq, vdupq_impl, (VDUPQ_M_N_S, VDUPQ_M_N_U, VDUPQ_M_N_F))
+FUNCTION (vdwdupq, viddup_impl, (false, true))
+FUNCTION (vidupq, viddup_impl, (true, false))
+FUNCTION (viwdupq, viddup_impl, (true, true))
 FUNCTION_WITH_RTX_M (veorq, XOR, VEORQ)
 FUNCTION (vfmaq, unspec_mve_function_exact_insn, (-1, -1, VFMAQ_F, -1, -1, VFMAQ_N_F, -1, -1, VFMAQ_M_F, -1, -1, VFMAQ_M_N_F))
 FUNCTION (vfmasq, unspec_mve_function_exact_insn, (-1, -1, -1, -1, -1, VFMASQ_N_F, -1, -1, -1, -1, -1, VFMASQ_M_N_F))
@@ -347,6 +1393,20 @@ FUNCTION (vfmsq, unspec_mve_function_exact_insn, (-1, -1, VFMSQ_F, -1, -1, -1, -
 FUNCTION_WITH_M_N_NO_F (vhaddq, VHADDQ)
 FUNCTION_WITH_M_N_NO_F (vhsubq, VHSUBQ)
 FUNCTION (vld1q, vld1_impl,)
+FUNCTION (vld2q, vld24_impl, (2))
+FUNCTION (vld4q, vld24_impl, (4))
+FUNCTION (vldrbq, vldrq_impl, (TYPE_SUFFIX_s8, TYPE_SUFFIX_u8))
+FUNCTION (vldrbq_gather, vldrq_gather_impl, (false, TYPE_SUFFIX_s8, TYPE_SUFFIX_u8))
+FUNCTION (vldrdq_gather, vldrq_gather_impl, (false, TYPE_SUFFIX_s64, TYPE_SUFFIX_u64, NUM_TYPE_SUFFIXES))
+FUNCTION (vldrdq_gather_base, vldrq_gather_base_impl, (TYPE_SUFFIX_s64, TYPE_SUFFIX_u64))
+FUNCTION (vldrdq_gather_shifted, vldrq_gather_impl, (true, TYPE_SUFFIX_s64, TYPE_SUFFIX_u64, NUM_TYPE_SUFFIXES))
+FUNCTION (vldrhq, vldrq_impl, (TYPE_SUFFIX_s16, TYPE_SUFFIX_u16, TYPE_SUFFIX_f16))
+FUNCTION (vldrhq_gather, vldrq_gather_impl, (false, TYPE_SUFFIX_s16, TYPE_SUFFIX_u16, TYPE_SUFFIX_f16))
+FUNCTION (vldrhq_gather_shifted, vldrq_gather_impl, (true, TYPE_SUFFIX_s16, TYPE_SUFFIX_u16, TYPE_SUFFIX_f16))
+FUNCTION (vldrwq, vldrq_impl, (TYPE_SUFFIX_s32, TYPE_SUFFIX_u32, TYPE_SUFFIX_f32))
+FUNCTION (vldrwq_gather, vldrq_gather_impl, (false, TYPE_SUFFIX_s32, TYPE_SUFFIX_u32, TYPE_SUFFIX_f32))
+FUNCTION (vldrwq_gather_base, vldrq_gather_base_impl, (TYPE_SUFFIX_s32, TYPE_SUFFIX_u32, TYPE_SUFFIX_f32))
+FUNCTION (vldrwq_gather_shifted, vldrq_gather_impl, (true, TYPE_SUFFIX_s32, TYPE_SUFFIX_u32, TYPE_SUFFIX_f32))
 FUNCTION_PRED_P_S (vmaxavq, VMAXAVQ)
 FUNCTION_WITHOUT_N_NO_U_F (vmaxaq, VMAXAQ)
 FUNCTION_ONLY_F (vmaxnmaq, VMAXNMAQ)
@@ -394,6 +1454,7 @@ FUNCTION_WITH_RTX_M_N (vmulq, MULT, VMULQ)
 FUNCTION_WITH_RTX_M_N_NO_F (vmvnq, NOT, VMVNQ)
 FUNCTION (vnegq, unspec_based_mve_function_exact_insn, (NEG, NEG, NEG, -1, -1, -1, VNEGQ_M_S, -1, VNEGQ_M_F, -1, -1, -1))
 FUNCTION_WITHOUT_M_N (vpselq, VPSELQ)
+FUNCTION (vornq, unspec_based_mve_function_exact_insn_vorn, (-1, -1, VORNQ_M_S, VORNQ_M_U, VORNQ_M_F, -1, -1))
 FUNCTION_WITH_RTX_M_N_NO_N_F (vorrq, IOR, VORRQ)
 FUNCTION_WITHOUT_N_NO_U_F (vqabsq, VQABSQ)
 FUNCTION_WITH_M_N_NO_F (vqaddq, VQADDQ)
@@ -454,6 +1515,9 @@ FUNCTION_WITH_M_N_NO_F (vrshlq, VRSHLQ)
 FUNCTION_ONLY_N_NO_F (vrshrnbq, VRSHRNBQ)
 FUNCTION_ONLY_N_NO_F (vrshrntq, VRSHRNTQ)
 FUNCTION_ONLY_N_NO_F (vrshrq, VRSHRQ)
+FUNCTION (vsbciq, vadc_vsbc_impl, (true, false))
+FUNCTION (vsbcq, vadc_vsbc_impl, (false, false))
+FUNCTION (vshlcq, vshlc_impl,)
 FUNCTION_ONLY_N_NO_F (vshllbq, VSHLLBQ)
 FUNCTION_ONLY_N_NO_F (vshlltq, VSHLLTQ)
 FUNCTION_WITH_M_N_R (vshlq, VSHLQ)
@@ -463,6 +1527,20 @@ FUNCTION_ONLY_N_NO_F (vshrq, VSHRQ)
 FUNCTION_ONLY_N_NO_F (vsliq, VSLIQ)
 FUNCTION_ONLY_N_NO_F (vsriq, VSRIQ)
 FUNCTION (vst1q, vst1_impl,)
+FUNCTION (vst2q, vst24_impl, (2))
+FUNCTION (vst4q, vst24_impl, (4))
+FUNCTION (vstrbq, vstrq_impl, (QImode, opt_scalar_mode ()))
+FUNCTION (vstrbq_scatter, vstrq_scatter_impl, (false, QImode, opt_scalar_mode ()))
+FUNCTION (vstrdq_scatter, vstrq_scatter_impl, (false, DImode, opt_scalar_mode ()))
+FUNCTION (vstrdq_scatter_base, vstrq_scatter_base_impl, (DImode))
+FUNCTION (vstrdq_scatter_shifted, vstrq_scatter_impl, (true, DImode, opt_scalar_mode ()))
+FUNCTION (vstrhq, vstrq_impl, (HImode, HFmode))
+FUNCTION (vstrhq_scatter, vstrq_scatter_impl, (false, HImode, HFmode))
+FUNCTION (vstrhq_scatter_shifted, vstrq_scatter_impl, (true, HImode, HFmode))
+FUNCTION (vstrwq, vstrq_impl, (SImode, SFmode))
+FUNCTION (vstrwq_scatter, vstrq_scatter_impl, (false, SImode, SFmode))
+FUNCTION (vstrwq_scatter_base, vstrq_scatter_base_impl, (SImode))
+FUNCTION (vstrwq_scatter_shifted, vstrq_scatter_impl, (true, SImode, SFmode))
 FUNCTION_WITH_RTX_M_N (vsubq, MINUS, VSUBQ)
 FUNCTION (vuninitializedq, vuninitializedq_impl,)
 
diff --git a/gcc/config/arm/arm-mve-builtins-base.def b/gcc/config/arm/arm-mve-builtins-base.def
index 90d031e..0400c3c 100644
--- a/gcc/config/arm/arm-mve-builtins-base.def
+++ b/gcc/config/arm/arm-mve-builtins-base.def
@@ -1,5 +1,5 @@
 /* ACLE support for Arm MVE (__ARM_FEATURE_MVE intrinsics)
-   Copyright (C) 2023-2024 Free Software Foundation, Inc.
+   Copyright (C) 2023-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -21,15 +21,18 @@
 DEF_MVE_FUNCTION (vabavq, binary_acca_int32, all_integer, p_or_none)
 DEF_MVE_FUNCTION (vabdq, binary, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vabsq, unary, all_signed, mx_or_none)
+DEF_MVE_FUNCTION (vadciq, vadc_vsbc, integer_32, m_or_none)
+DEF_MVE_FUNCTION (vadcq, vadc_vsbc, integer_32, m_or_none)
 DEF_MVE_FUNCTION (vaddlvaq, unary_widen_acc, integer_32, p_or_none)
 DEF_MVE_FUNCTION (vaddlvq, unary_acc, integer_32, p_or_none)
 DEF_MVE_FUNCTION (vaddq, binary_opt_n, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vaddvaq, unary_int32_acc, all_integer, p_or_none)
 DEF_MVE_FUNCTION (vaddvq, unary_int32, all_integer, p_or_none)
 DEF_MVE_FUNCTION (vandq, binary, all_integer, mx_or_none)
+DEF_MVE_FUNCTION (vbicq, binary_orrq, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vbrsrq, binary_imm32, all_integer, mx_or_none)
-DEF_MVE_FUNCTION (vcaddq_rot90, binary, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vcaddq_rot270, binary, all_integer, mx_or_none)
+DEF_MVE_FUNCTION (vcaddq_rot90, binary, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vclsq, unary, all_signed, mx_or_none)
 DEF_MVE_FUNCTION (vclzq, unary, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vcmpcsq, cmp, all_unsigned, m_or_none)
@@ -41,13 +44,35 @@ DEF_MVE_FUNCTION (vcmpleq, cmp, all_signed, m_or_none)
 DEF_MVE_FUNCTION (vcmpltq, cmp, all_signed, m_or_none)
 DEF_MVE_FUNCTION (vcmpneq, cmp, all_integer, m_or_none)
 DEF_MVE_FUNCTION (vcreateq, create, all_integer_with_64, none)
+DEF_MVE_FUNCTION (vctp16q, vctp, none, m_or_none)
+DEF_MVE_FUNCTION (vctp32q, vctp, none, m_or_none)
+DEF_MVE_FUNCTION (vctp64q, vctp, none, m_or_none)
+DEF_MVE_FUNCTION (vctp8q, vctp, none, m_or_none)
+DEF_MVE_FUNCTION (vddupq, viddup, all_unsigned, mx_or_none)
 DEF_MVE_FUNCTION (vdupq, unary_n, all_integer, mx_or_none)
+DEF_MVE_FUNCTION (vdwdupq, vidwdup, all_unsigned, mx_or_none)
 DEF_MVE_FUNCTION (veorq, binary, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vhaddq, binary_opt_n, all_integer, mx_or_none)
-DEF_MVE_FUNCTION (vhcaddq_rot90, binary, all_signed, mx_or_none)
 DEF_MVE_FUNCTION (vhcaddq_rot270, binary, all_signed, mx_or_none)
+DEF_MVE_FUNCTION (vhcaddq_rot90, binary, all_signed, mx_or_none)
 DEF_MVE_FUNCTION (vhsubq, binary_opt_n, all_integer, mx_or_none)
-DEF_MVE_FUNCTION (vld1q, load, all_integer, none)
+DEF_MVE_FUNCTION (vidupq, viddup, all_unsigned, mx_or_none)
+DEF_MVE_FUNCTION (viwdupq, vidwdup, all_unsigned, mx_or_none)
+DEF_MVE_FUNCTION (vld1q, load, all_integer, z_or_none)
+DEF_MVE_FUNCTION (vld2q, load, all_integer, none)
+DEF_MVE_FUNCTION (vld4q, load, all_integer, none)
+DEF_MVE_FUNCTION (vldrbq, load_ext, all_integer, z_or_none)
+DEF_MVE_FUNCTION (vldrbq_gather, load_ext_gather_offset, all_integer, z_or_none)
+DEF_MVE_FUNCTION (vldrdq_gather, load_ext_gather_offset, integer_64, z_or_none)
+DEF_MVE_FUNCTION (vldrdq_gather_base, load_gather_base, integer_64, z_or_none)
+DEF_MVE_FUNCTION (vldrdq_gather_shifted, load_ext_gather_offset, integer_64, z_or_none)
+DEF_MVE_FUNCTION (vldrhq, load_ext, integer_16_32, z_or_none)
+DEF_MVE_FUNCTION (vldrhq_gather, load_ext_gather_offset, integer_16_32, z_or_none)
+DEF_MVE_FUNCTION (vldrhq_gather_shifted, load_ext_gather_offset, integer_16_32, z_or_none)
+DEF_MVE_FUNCTION (vldrwq, load_ext, integer_32, z_or_none)
+DEF_MVE_FUNCTION (vldrwq_gather, load_ext_gather_offset, integer_32, z_or_none)
+DEF_MVE_FUNCTION (vldrwq_gather_base, load_gather_base, integer_32, z_or_none)
+DEF_MVE_FUNCTION (vldrwq_gather_shifted, load_ext_gather_offset, integer_32, z_or_none)
 DEF_MVE_FUNCTION (vmaxaq, binary_maxamina, all_signed, m_or_none)
 DEF_MVE_FUNCTION (vmaxavq, binary_maxavminav, all_signed, p_or_none)
 DEF_MVE_FUNCTION (vmaxq, binary, all_integer, mx_or_none)
@@ -80,12 +105,13 @@ DEF_MVE_FUNCTION (vmovnbq, binary_move_narrow, integer_16_32, m_or_none)
 DEF_MVE_FUNCTION (vmovntq, binary_move_narrow, integer_16_32, m_or_none)
 DEF_MVE_FUNCTION (vmulhq, binary, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vmullbq_int, binary_widen, all_integer, mx_or_none)
-DEF_MVE_FUNCTION (vmulltq_int, binary_widen, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vmullbq_poly, binary_widen_poly, poly_8_16, mx_or_none)
+DEF_MVE_FUNCTION (vmulltq_int, binary_widen, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vmulltq_poly, binary_widen_poly, poly_8_16, mx_or_none)
 DEF_MVE_FUNCTION (vmulq, binary_opt_n, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vmvnq, mvn, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vnegq, unary, all_signed, mx_or_none)
+DEF_MVE_FUNCTION (vornq, binary_orrq, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vorrq, binary_orrq, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vpselq, vpsel, all_integer_with_64, none)
 DEF_MVE_FUNCTION (vqabsq, unary, all_signed, m_or_none)
@@ -142,6 +168,9 @@ DEF_MVE_FUNCTION (vrshlq, binary_round_lshift, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vrshrnbq, binary_rshift_narrow, integer_16_32, m_or_none)
 DEF_MVE_FUNCTION (vrshrntq, binary_rshift_narrow, integer_16_32, m_or_none)
 DEF_MVE_FUNCTION (vrshrq, binary_rshift, all_integer, mx_or_none)
+DEF_MVE_FUNCTION (vsbciq, vadc_vsbc, integer_32, m_or_none)
+DEF_MVE_FUNCTION (vsbcq, vadc_vsbc, integer_32, m_or_none)
+DEF_MVE_FUNCTION (vshlcq, vshlc, all_integer, m_or_none)
 DEF_MVE_FUNCTION (vshllbq, binary_widen_n, integer_8_16, mx_or_none)
 DEF_MVE_FUNCTION (vshlltq, binary_widen_n, integer_8_16, mx_or_none)
 DEF_MVE_FUNCTION (vshlq, binary_lshift, all_integer, mx_or_none)
@@ -151,7 +180,21 @@ DEF_MVE_FUNCTION (vshrntq, binary_rshift_narrow, integer_16_32, m_or_none)
 DEF_MVE_FUNCTION (vshrq, binary_rshift, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vsliq, ternary_lshift, all_integer, m_or_none)
 DEF_MVE_FUNCTION (vsriq, ternary_rshift, all_integer, m_or_none)
-DEF_MVE_FUNCTION (vst1q, store, all_integer, none)
+DEF_MVE_FUNCTION (vst1q, store, all_integer, p_or_none)
+DEF_MVE_FUNCTION (vst2q, store, all_integer, none)
+DEF_MVE_FUNCTION (vst4q, store, all_integer, none)
+DEF_MVE_FUNCTION (vstrbq, store, all_integer, p_or_none)
+DEF_MVE_FUNCTION (vstrbq_scatter, store_scatter_offset, all_integer, p_or_none)
+DEF_MVE_FUNCTION (vstrhq, store, integer_16_32, p_or_none)
+DEF_MVE_FUNCTION (vstrhq_scatter, store_scatter_offset, integer_16_32, p_or_none)
+DEF_MVE_FUNCTION (vstrhq_scatter_shifted, store_scatter_offset, integer_16_32, p_or_none)
+DEF_MVE_FUNCTION (vstrwq, store, integer_32, p_or_none)
+DEF_MVE_FUNCTION (vstrwq_scatter, store_scatter_offset, integer_32, p_or_none)
+DEF_MVE_FUNCTION (vstrwq_scatter_base, store_scatter_base, integer_32, p_or_none)
+DEF_MVE_FUNCTION (vstrwq_scatter_shifted, store_scatter_offset, integer_32, p_or_none)
+DEF_MVE_FUNCTION (vstrdq_scatter, store_scatter_offset, integer_64, p_or_none)
+DEF_MVE_FUNCTION (vstrdq_scatter_base, store_scatter_base, integer_64, p_or_none)
+DEF_MVE_FUNCTION (vstrdq_scatter_shifted, store_scatter_offset, integer_64, p_or_none)
 DEF_MVE_FUNCTION (vsubq, binary_opt_n, all_integer, mx_or_none)
 DEF_MVE_FUNCTION (vuninitializedq, inherent, all_integer_with_64, none)
 #undef REQUIRES_FLOAT
@@ -161,30 +204,49 @@ DEF_MVE_FUNCTION (vabdq, binary, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vabsq, unary, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vaddq, binary_opt_n, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vandq, binary, all_float, mx_or_none)
+DEF_MVE_FUNCTION (vbicq, binary_orrq, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vbrsrq, binary_imm32, all_float, mx_or_none)
-DEF_MVE_FUNCTION (vcaddq_rot90, binary, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vcaddq_rot270, binary, all_float, mx_or_none)
+DEF_MVE_FUNCTION (vcaddq_rot90, binary, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vcmlaq, ternary, all_float, m_or_none)
-DEF_MVE_FUNCTION (vcmlaq_rot90, ternary, all_float, m_or_none)
 DEF_MVE_FUNCTION (vcmlaq_rot180, ternary, all_float, m_or_none)
 DEF_MVE_FUNCTION (vcmlaq_rot270, ternary, all_float, m_or_none)
-DEF_MVE_FUNCTION (vcmulq, binary, all_float, mx_or_none)
-DEF_MVE_FUNCTION (vcmulq_rot90, binary, all_float, mx_or_none)
-DEF_MVE_FUNCTION (vcmulq_rot180, binary, all_float, mx_or_none)
-DEF_MVE_FUNCTION (vcmulq_rot270, binary, all_float, mx_or_none)
+DEF_MVE_FUNCTION (vcmlaq_rot90, ternary, all_float, m_or_none)
 DEF_MVE_FUNCTION (vcmpeqq, cmp, all_float, m_or_none)
 DEF_MVE_FUNCTION (vcmpgeq, cmp, all_float, m_or_none)
 DEF_MVE_FUNCTION (vcmpgtq, cmp, all_float, m_or_none)
 DEF_MVE_FUNCTION (vcmpleq, cmp, all_float, m_or_none)
 DEF_MVE_FUNCTION (vcmpltq, cmp, all_float, m_or_none)
 DEF_MVE_FUNCTION (vcmpneq, cmp, all_float, m_or_none)
+DEF_MVE_FUNCTION (vcmulq, binary, all_float, mx_or_none)
+DEF_MVE_FUNCTION (vcmulq_rot180, binary, all_float, mx_or_none)
+DEF_MVE_FUNCTION (vcmulq_rot270, binary, all_float, mx_or_none)
+DEF_MVE_FUNCTION (vcmulq_rot90, binary, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vcreateq, create, all_float, none)
+DEF_MVE_FUNCTION (vcvtaq, vcvtx, cvtx, mx_or_none)
+DEF_MVE_FUNCTION (vcvtbq, vcvt_f16_f32, cvt_f16_f32, mx_or_none)
+DEF_MVE_FUNCTION (vcvtbq, vcvt_f32_f16, cvt_f32_f16, mx_or_none)
+DEF_MVE_FUNCTION (vcvtmq, vcvtx, cvtx, mx_or_none)
+DEF_MVE_FUNCTION (vcvtnq, vcvtx, cvtx, mx_or_none)
+DEF_MVE_FUNCTION (vcvtpq, vcvtx, cvtx, mx_or_none)
+DEF_MVE_FUNCTION (vcvtq, vcvt, cvt, mx_or_none)
+DEF_MVE_FUNCTION (vcvttq, vcvt_f16_f32, cvt_f16_f32, mx_or_none)
+DEF_MVE_FUNCTION (vcvttq, vcvt_f32_f16, cvt_f32_f16, mx_or_none)
 DEF_MVE_FUNCTION (vdupq, unary_n, all_float, mx_or_none)
 DEF_MVE_FUNCTION (veorq, binary, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vfmaq, ternary_opt_n, all_float, m_or_none)
 DEF_MVE_FUNCTION (vfmasq, ternary_n, all_float, m_or_none)
 DEF_MVE_FUNCTION (vfmsq, ternary, all_float, m_or_none)
-DEF_MVE_FUNCTION (vld1q, load, all_float, none)
+DEF_MVE_FUNCTION (vld1q, load, all_float, z_or_none)
+DEF_MVE_FUNCTION (vld2q, load, all_float, none)
+DEF_MVE_FUNCTION (vld4q, load, all_float, none)
+DEF_MVE_FUNCTION (vldrhq, load_ext, float_16, z_or_none)
+DEF_MVE_FUNCTION (vldrhq_gather, load_ext_gather_offset, float_16, z_or_none)
+DEF_MVE_FUNCTION (vldrhq_gather_shifted, load_ext_gather_offset, float_16, z_or_none)
+DEF_MVE_FUNCTION (vldrwq, load_ext, float_32, z_or_none)
+DEF_MVE_FUNCTION (vldrwq_gather, load_ext_gather_offset, float_32, z_or_none)
+DEF_MVE_FUNCTION (vldrwq_gather_base, load_gather_base, float_32, z_or_none)
+DEF_MVE_FUNCTION (vldrwq_gather_shifted, load_ext_gather_offset, float_32, z_or_none)
 DEF_MVE_FUNCTION (vmaxnmaq, binary, all_float, m_or_none)
 DEF_MVE_FUNCTION (vmaxnmavq, binary_maxvminv, all_float, p_or_none)
 DEF_MVE_FUNCTION (vmaxnmq, binary, all_float, mx_or_none)
@@ -195,10 +257,11 @@ DEF_MVE_FUNCTION (vminnmq, binary, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vminnmvq, binary_maxvminv, all_float, p_or_none)
 DEF_MVE_FUNCTION (vmulq, binary_opt_n, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vnegq, unary, all_float, mx_or_none)
+DEF_MVE_FUNCTION (vornq, binary_orrq, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vorrq, binary_orrq, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vpselq, vpsel, all_float, none)
 DEF_MVE_FUNCTION (vreinterpretq, unary_convert, reinterpret_float, none)
-DEF_MVE_FUNCTION (vrev32q, unary, float16, mx_or_none)
+DEF_MVE_FUNCTION (vrev32q, unary, float_16, mx_or_none)
 DEF_MVE_FUNCTION (vrev64q, unary, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vrndaq, unary, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vrndmq, unary, all_float, mx_or_none)
@@ -206,7 +269,16 @@ DEF_MVE_FUNCTION (vrndnq, unary, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vrndpq, unary, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vrndq, unary, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vrndxq, unary, all_float, mx_or_none)
-DEF_MVE_FUNCTION (vst1q, store, all_float, none)
+DEF_MVE_FUNCTION (vst1q, store, all_float, p_or_none)
+DEF_MVE_FUNCTION (vst2q, store, all_float, none)
+DEF_MVE_FUNCTION (vst4q, store, all_float, none)
+DEF_MVE_FUNCTION (vstrhq, store, float_16, p_or_none)
+DEF_MVE_FUNCTION (vstrhq_scatter, store_scatter_offset, float_16, p_or_none)
+DEF_MVE_FUNCTION (vstrhq_scatter_shifted, store_scatter_offset, float_16, p_or_none)
+DEF_MVE_FUNCTION (vstrwq, store, float_32, p_or_none)
+DEF_MVE_FUNCTION (vstrwq_scatter, store_scatter_offset, float_32, p_or_none)
+DEF_MVE_FUNCTION (vstrwq_scatter_base, store_scatter_base, float_32, p_or_none)
+DEF_MVE_FUNCTION (vstrwq_scatter_shifted, store_scatter_offset, float_32, p_or_none)
 DEF_MVE_FUNCTION (vsubq, binary_opt_n, all_float, mx_or_none)
 DEF_MVE_FUNCTION (vuninitializedq, inherent, all_float, none)
 #undef REQUIRES_FLOAT
diff --git a/gcc/config/arm/arm-mve-builtins-base.h b/gcc/config/arm/arm-mve-builtins-base.h
index c9b52a8..6ff3195 100644
--- a/gcc/config/arm/arm-mve-builtins-base.h
+++ b/gcc/config/arm/arm-mve-builtins-base.h
@@ -1,5 +1,5 @@
 /* ACLE support for Arm MVE (__ARM_FEATURE_MVE intrinsics)
-   Copyright (C) 2023-2024 Free Software Foundation, Inc.
+   Copyright (C) 2023-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -26,12 +26,15 @@ namespace functions {
 extern const function_base *const vabavq;
 extern const function_base *const vabdq;
 extern const function_base *const vabsq;
+extern const function_base *const vadciq;
+extern const function_base *const vadcq;
 extern const function_base *const vaddlvaq;
 extern const function_base *const vaddlvq;
 extern const function_base *const vaddq;
 extern const function_base *const vaddvaq;
 extern const function_base *const vaddvq;
 extern const function_base *const vandq;
+extern const function_base *const vbicq;
 extern const function_base *const vbrsrq;
 extern const function_base *const vcaddq_rot270;
 extern const function_base *const vcaddq_rot90;
@@ -54,7 +57,20 @@ extern const function_base *const vcmulq_rot180;
 extern const function_base *const vcmulq_rot270;
 extern const function_base *const vcmulq_rot90;
 extern const function_base *const vcreateq;
+extern const function_base *const vctp16q;
+extern const function_base *const vctp32q;
+extern const function_base *const vctp64q;
+extern const function_base *const vctp8q;
+extern const function_base *const vcvtaq;
+extern const function_base *const vcvtbq;
+extern const function_base *const vcvtmq;
+extern const function_base *const vcvtnq;
+extern const function_base *const vcvtpq;
+extern const function_base *const vcvtq;
+extern const function_base *const vcvttq;
+extern const function_base *const vddupq;
 extern const function_base *const vdupq;
+extern const function_base *const vdwdupq;
 extern const function_base *const veorq;
 extern const function_base *const vfmaq;
 extern const function_base *const vfmasq;
@@ -63,7 +79,23 @@ extern const function_base *const vhaddq;
 extern const function_base *const vhcaddq_rot270;
 extern const function_base *const vhcaddq_rot90;
 extern const function_base *const vhsubq;
+extern const function_base *const vidupq;
+extern const function_base *const viwdupq;
 extern const function_base *const vld1q;
+extern const function_base *const vld2q;
+extern const function_base *const vld4q;
+extern const function_base *const vldrbq;
+extern const function_base *const vldrbq_gather;
+extern const function_base *const vldrdq_gather;
+extern const function_base *const vldrdq_gather_base;
+extern const function_base *const vldrdq_gather_shifted;
+extern const function_base *const vldrhq;
+extern const function_base *const vldrhq_gather;
+extern const function_base *const vldrhq_gather_shifted;
+extern const function_base *const vldrwq;
+extern const function_base *const vldrwq_gather;
+extern const function_base *const vldrwq_gather_base;
+extern const function_base *const vldrwq_gather_shifted;
 extern const function_base *const vmaxaq;
 extern const function_base *const vmaxavq;
 extern const function_base *const vmaxnmaq;
@@ -110,6 +142,7 @@ extern const function_base *const vmulltq_poly;
 extern const function_base *const vmulq;
 extern const function_base *const vmvnq;
 extern const function_base *const vnegq;
+extern const function_base *const vornq;
 extern const function_base *const vorrq;
 extern const function_base *const vpselq;
 extern const function_base *const vqabsq;
@@ -171,6 +204,9 @@ extern const function_base *const vrshlq;
 extern const function_base *const vrshrnbq;
 extern const function_base *const vrshrntq;
 extern const function_base *const vrshrq;
+extern const function_base *const vsbciq;
+extern const function_base *const vsbcq;
+extern const function_base *const vshlcq;
 extern const function_base *const vshllbq;
 extern const function_base *const vshlltq;
 extern const function_base *const vshlq;
@@ -180,6 +216,20 @@ extern const function_base *const vshrq;
 extern const function_base *const vsliq;
 extern const function_base *const vsriq;
 extern const function_base *const vst1q;
+extern const function_base *const vst2q;
+extern const function_base *const vst4q;
+extern const function_base *const vstrbq;
+extern const function_base *const vstrbq_scatter;
+extern const function_base *const vstrdq_scatter;
+extern const function_base *const vstrdq_scatter_base;
+extern const function_base *const vstrdq_scatter_shifted;
+extern const function_base *const vstrhq;
+extern const function_base *const vstrhq_scatter;
+extern const function_base *const vstrhq_scatter_shifted;
+extern const function_base *const vstrwq;
+extern const function_base *const vstrwq_scatter;
+extern const function_base *const vstrwq_scatter_base;
+extern const function_base *const vstrwq_scatter_shifted;
 extern const function_base *const vsubq;
 extern const function_base *const vuninitializedq;
 
diff --git a/gcc/config/arm/arm-mve-builtins-functions.h b/gcc/config/arm/arm-mve-builtins-functions.h
index ac2a731..16ef165 100644
--- a/gcc/config/arm/arm-mve-builtins-functions.h
+++ b/gcc/config/arm/arm-mve-builtins-functions.h
@@ -1,5 +1,5 @@
 /* ACLE support for Arm MVE (function_base classes)
-   Copyright (C) 2023-2024 Free Software Foundation, Inc.
+   Copyright (C) 2023-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -20,6 +20,8 @@
 #ifndef GCC_ARM_MVE_BUILTINS_FUNCTIONS_H
 #define GCC_ARM_MVE_BUILTINS_FUNCTIONS_H
 
+#include "arm-protos.h"
+
 namespace arm_mve {
 
 /* Wrap T, which is derived from function_base, and indicate that the
@@ -40,17 +42,23 @@ public:
 };
 
 /* An incomplete function_base for functions that have an associated
-   rtx_code for signed integers, unsigned integers and floating-point
-   values for the non-predicated, non-suffixed intrinsic, and unspec
-   codes, with separate codes for signed integers, unsigned integers
-   and floating-point values.  The class simply records information
-   about the mapping for derived classes to use.  */
+   rtx_code or an unspec for signed integers, unsigned integers and
+   floating-point values for the non-predicated, non-suffixed
+   intrinsics, and unspec codes, with separate codes for signed
+   integers, unsigned integers and floating-point values for
+   predicated and/or suffixed intrinsics.  The class simply records
+   information about the mapping for derived classes to use and
+   provides a generic expand_unspec () to avoid duplicating expansion
+   code in derived classes.  */
 class unspec_based_mve_function_base : public function_base
 {
 public:
   CONSTEXPR unspec_based_mve_function_base (rtx_code code_for_sint,
 					    rtx_code code_for_uint,
 					    rtx_code code_for_fp,
+					    int unspec_for_sint,
+					    int unspec_for_uint,
+					    int unspec_for_fp,
 					    int unspec_for_n_sint,
 					    int unspec_for_n_uint,
 					    int unspec_for_n_fp,
@@ -63,6 +71,9 @@ public:
     : m_code_for_sint (code_for_sint),
       m_code_for_uint (code_for_uint),
       m_code_for_fp (code_for_fp),
+      m_unspec_for_sint (unspec_for_sint),
+      m_unspec_for_uint (unspec_for_uint),
+      m_unspec_for_fp (unspec_for_fp),
       m_unspec_for_n_sint (unspec_for_n_sint),
       m_unspec_for_n_uint (unspec_for_n_uint),
       m_unspec_for_n_fp (unspec_for_n_fp),
@@ -83,6 +94,9 @@ public:
   /* The unspec code associated with signed-integer, unsigned-integer
      and floating-point operations respectively.  It covers the cases
      with the _n suffix, and/or the _m predicate.  */
+  int m_unspec_for_sint;
+  int m_unspec_for_uint;
+  int m_unspec_for_fp;
   int m_unspec_for_n_sint;
   int m_unspec_for_n_uint;
   int m_unspec_for_n_fp;
@@ -92,8 +106,101 @@ public:
   int m_unspec_for_m_n_sint;
   int m_unspec_for_m_n_uint;
   int m_unspec_for_m_n_fp;
+
+  rtx expand_unspec (function_expander &e) const;
 };
 
+/* Expand the unspecs, which is common to all intrinsics using
+   unspec_based_mve_function_base.  If some combinations are not
+   supported for an intrinsics family, they should be handled by the
+   caller (and not crash here).  */
+rtx
+unspec_based_mve_function_base::expand_unspec (function_expander &e) const
+{
+    machine_mode mode = e.vector_mode (0);
+    insn_code code;
+
+    switch (e.pred)
+      {
+      case PRED_none:
+	switch (e.mode_suffix_id)
+	  {
+	  case MODE_none:
+	    /* No predicate, no suffix.  */
+	    if (e.type_suffix (0).integer_p)
+	      {
+		int unspec = (e.type_suffix (0).unsigned_p
+			      ? m_unspec_for_uint
+			      : m_unspec_for_sint);
+		code = code_for_mve_q (unspec, unspec, mode);
+	      }
+	    else
+	      code = code_for_mve_q_f (m_unspec_for_fp, mode);
+	    break;
+
+	  case MODE_n:
+	    /* No predicate, _n suffix.  */
+	    if (e.type_suffix (0).integer_p)
+	      {
+		int unspec = (e.type_suffix (0).unsigned_p
+			      ? m_unspec_for_n_uint
+			      : m_unspec_for_n_sint);
+		code = code_for_mve_q_n (unspec, unspec, mode);
+	      }
+	    else
+	      code = code_for_mve_q_n_f (m_unspec_for_n_fp, mode);
+	    break;
+
+	  default:
+	    gcc_unreachable ();
+	  }
+	return e.use_exact_insn (code);
+
+      case PRED_m:
+      case PRED_x:
+	switch (e.mode_suffix_id)
+	  {
+	  case MODE_none:
+	    /* No suffix, "m" or "x" predicate.  */
+	    if (e.type_suffix (0).integer_p)
+	      {
+		int unspec = (e.type_suffix (0).unsigned_p
+			      ? m_unspec_for_m_uint
+			      : m_unspec_for_m_sint);
+		code = code_for_mve_q_m (unspec, unspec, mode);
+	      }
+	    else
+	      code = code_for_mve_q_m_f (m_unspec_for_m_fp, mode);
+	    break;
+
+	  case MODE_n:
+	    /* _n suffix, "m" or "x" predicate.  */
+	    if (e.type_suffix (0).integer_p)
+	      {
+		int unspec = (e.type_suffix (0).unsigned_p
+			      ? m_unspec_for_m_n_uint
+			      : m_unspec_for_m_n_sint);
+		code = code_for_mve_q_m_n (unspec, unspec, mode);
+	      }
+	    else
+	      code = code_for_mve_q_m_n_f (m_unspec_for_m_n_fp, mode);
+	    break;
+
+	  default:
+	    gcc_unreachable ();
+	  }
+
+	if (e.pred == PRED_m)
+	  return e.use_cond_insn (code, 0);
+	else
+	  return e.use_pred_x_insn (code);
+	break;
+
+      default:
+	gcc_unreachable ();
+      }
+}
+
 /* Map the function directly to CODE (UNSPEC, M) where M is the vector
    mode associated with type suffix 0, except when there is no
    predicate and no _n suffix, in which case we use the appropriate
@@ -117,6 +224,9 @@ public:
     : unspec_based_mve_function_base (code_for_sint,
 				      code_for_uint,
 				      code_for_fp,
+				      -1,
+				      -1,
+				      -1,
 				      unspec_for_n_sint,
 				      unspec_for_n_uint,
 				      unspec_for_n_fp,
@@ -137,97 +247,13 @@ public:
       return e.map_to_rtx_codes (m_code_for_sint, m_code_for_uint,
 				 m_code_for_fp);
 
-    insn_code code;
-    switch (e.pred)
-      {
-      case PRED_none:
-	if (e.mode_suffix_id == MODE_n)
-	  /* No predicate, _n suffix.  */
-	  {
-	    if (e.type_suffix (0).integer_p)
-	      if (e.type_suffix (0).unsigned_p)
-		code = code_for_mve_q_n (m_unspec_for_n_uint, m_unspec_for_n_uint, e.vector_mode (0));
-	      else
-		code = code_for_mve_q_n (m_unspec_for_n_sint, m_unspec_for_n_sint, e.vector_mode (0));
-	    else
-	      code = code_for_mve_q_n_f (m_unspec_for_n_fp, e.vector_mode (0));
-
-	    return e.use_exact_insn (code);
-	  }
-	gcc_unreachable ();
-	break;
-
-      case PRED_m:
-	switch (e.mode_suffix_id)
-	  {
-	  case MODE_none:
-	    /* No suffix, "m" predicate.  */
-	    if (e.type_suffix (0).integer_p)
-	      if (e.type_suffix (0).unsigned_p)
-		code = code_for_mve_q_m (m_unspec_for_m_uint, m_unspec_for_m_uint, e.vector_mode (0));
-	      else
-		code = code_for_mve_q_m (m_unspec_for_m_sint, m_unspec_for_m_sint, e.vector_mode (0));
-	    else
-	      code = code_for_mve_q_m_f (m_unspec_for_m_fp, e.vector_mode (0));
-	    break;
-
-	  case MODE_n:
-	    /* _n suffix, "m" predicate.  */
-	    if (e.type_suffix (0).integer_p)
-	      if (e.type_suffix (0).unsigned_p)
-		code = code_for_mve_q_m_n (m_unspec_for_m_n_uint, m_unspec_for_m_n_uint, e.vector_mode (0));
-	      else
-		code = code_for_mve_q_m_n (m_unspec_for_m_n_sint, m_unspec_for_m_n_sint, e.vector_mode (0));
-	    else
-	      code = code_for_mve_q_m_n_f (m_unspec_for_m_n_fp, e.vector_mode (0));
-	    break;
-
-	  default:
-	    gcc_unreachable ();
-	  }
-	return e.use_cond_insn (code, 0);
-
-      case PRED_x:
-	switch (e.mode_suffix_id)
-	  {
-	  case MODE_none:
-	    /* No suffix, "x" predicate.  */
-	    if (e.type_suffix (0).integer_p)
-	      if (e.type_suffix (0).unsigned_p)
-		code = code_for_mve_q_m (m_unspec_for_m_uint, m_unspec_for_m_uint, e.vector_mode (0));
-	      else
-		code = code_for_mve_q_m (m_unspec_for_m_sint, m_unspec_for_m_sint, e.vector_mode (0));
-	    else
-	      code = code_for_mve_q_m_f (m_unspec_for_m_fp, e.vector_mode (0));
-	    break;
-
-	  case MODE_n:
-	    /* _n suffix, "x" predicate.  */
-	    if (e.type_suffix (0).integer_p)
-	      if (e.type_suffix (0).unsigned_p)
-		code = code_for_mve_q_m_n (m_unspec_for_m_n_uint, m_unspec_for_m_n_uint, e.vector_mode (0));
-	      else
-		code = code_for_mve_q_m_n (m_unspec_for_m_n_sint, m_unspec_for_m_n_sint, e.vector_mode (0));
-	    else
-	      code = code_for_mve_q_m_n_f (m_unspec_for_m_n_fp, e.vector_mode (0));
-	    break;
-
-	  default:
-	    gcc_unreachable ();
-	  }
-	return e.use_pred_x_insn (code);
-
-      default:
-	gcc_unreachable ();
-      }
-
-    gcc_unreachable ();
+    return expand_unspec (e);
   }
 };
 
 /* Map the function directly to CODE (UNSPEC, M) where M is the vector
    mode associated with type suffix 0.  */
-class unspec_mve_function_exact_insn : public function_base
+class unspec_mve_function_exact_insn : public unspec_based_mve_function_base
 {
 public:
   CONSTEXPR unspec_mve_function_exact_insn (int unspec_for_sint,
@@ -242,143 +268,33 @@ public:
 					    int unspec_for_m_n_sint,
 					    int unspec_for_m_n_uint,
 					    int unspec_for_m_n_fp)
-    : m_unspec_for_sint (unspec_for_sint),
-      m_unspec_for_uint (unspec_for_uint),
-      m_unspec_for_fp (unspec_for_fp),
-      m_unspec_for_n_sint (unspec_for_n_sint),
-      m_unspec_for_n_uint (unspec_for_n_uint),
-      m_unspec_for_n_fp (unspec_for_n_fp),
-      m_unspec_for_m_sint (unspec_for_m_sint),
-      m_unspec_for_m_uint (unspec_for_m_uint),
-      m_unspec_for_m_fp (unspec_for_m_fp),
-      m_unspec_for_m_n_sint (unspec_for_m_n_sint),
-      m_unspec_for_m_n_uint (unspec_for_m_n_uint),
-      m_unspec_for_m_n_fp (unspec_for_m_n_fp)
+    : unspec_based_mve_function_base (UNKNOWN,
+				      UNKNOWN,
+				      UNKNOWN,
+				      unspec_for_sint,
+				      unspec_for_uint,
+				      unspec_for_fp,
+				      unspec_for_n_sint,
+				      unspec_for_n_uint,
+				      unspec_for_n_fp,
+				      unspec_for_m_sint,
+				      unspec_for_m_uint,
+				      unspec_for_m_fp,
+				      unspec_for_m_n_sint,
+				      unspec_for_m_n_uint,
+				      unspec_for_m_n_fp)
   {}
 
-  /* The unspec code associated with signed-integer, unsigned-integer
-     and floating-point operations respectively.  It covers the cases
-     with the _n suffix, and/or the _m predicate.  */
-  int m_unspec_for_sint;
-  int m_unspec_for_uint;
-  int m_unspec_for_fp;
-  int m_unspec_for_n_sint;
-  int m_unspec_for_n_uint;
-  int m_unspec_for_n_fp;
-  int m_unspec_for_m_sint;
-  int m_unspec_for_m_uint;
-  int m_unspec_for_m_fp;
-  int m_unspec_for_m_n_sint;
-  int m_unspec_for_m_n_uint;
-  int m_unspec_for_m_n_fp;
-
   rtx
   expand (function_expander &e) const override
   {
-    insn_code code;
-    switch (e.pred)
-      {
-      case PRED_none:
-	switch (e.mode_suffix_id)
-	  {
-	  case MODE_none:
-	    /* No predicate, no suffix.  */
-	    if (e.type_suffix (0).integer_p)
-	      if (e.type_suffix (0).unsigned_p)
-		code = code_for_mve_q (m_unspec_for_uint, m_unspec_for_uint, e.vector_mode (0));
-	      else
-		code = code_for_mve_q (m_unspec_for_sint, m_unspec_for_sint, e.vector_mode (0));
-	    else
-	      code = code_for_mve_q_f (m_unspec_for_fp, e.vector_mode (0));
-	    break;
-
-	  case MODE_n:
-	    /* No predicate, _n suffix.  */
-	    if (e.type_suffix (0).integer_p)
-	      if (e.type_suffix (0).unsigned_p)
-		code = code_for_mve_q_n (m_unspec_for_n_uint, m_unspec_for_n_uint, e.vector_mode (0));
-	      else
-		code = code_for_mve_q_n (m_unspec_for_n_sint, m_unspec_for_n_sint, e.vector_mode (0));
-	    else
-	      code = code_for_mve_q_n_f (m_unspec_for_n_fp, e.vector_mode (0));
-	    break;
-
-	  default:
-	    gcc_unreachable ();
-	  }
-	return e.use_exact_insn (code);
-
-      case PRED_m:
-	switch (e.mode_suffix_id)
-	  {
-	  case MODE_none:
-	    /* No suffix, "m" predicate.  */
-	    if (e.type_suffix (0).integer_p)
-	      if (e.type_suffix (0).unsigned_p)
-		code = code_for_mve_q_m (m_unspec_for_m_uint, m_unspec_for_m_uint, e.vector_mode (0));
-	      else
-		code = code_for_mve_q_m (m_unspec_for_m_sint, m_unspec_for_m_sint, e.vector_mode (0));
-	    else
-	      code = code_for_mve_q_m_f (m_unspec_for_m_fp, e.vector_mode (0));
-	    break;
-
-	  case MODE_n:
-	    /* _n suffix, "m" predicate.  */
-	    if (e.type_suffix (0).integer_p)
-	      if (e.type_suffix (0).unsigned_p)
-		code = code_for_mve_q_m_n (m_unspec_for_m_n_uint, m_unspec_for_m_n_uint, e.vector_mode (0));
-	      else
-		code = code_for_mve_q_m_n (m_unspec_for_m_n_sint, m_unspec_for_m_n_sint, e.vector_mode (0));
-	    else
-	      code = code_for_mve_q_m_n_f (m_unspec_for_m_n_fp, e.vector_mode (0));
-	    break;
-
-	  default:
-	    gcc_unreachable ();
-	  }
-	return e.use_cond_insn (code, 0);
-
-      case PRED_x:
-	switch (e.mode_suffix_id)
-	  {
-	  case MODE_none:
-	    /* No suffix, "x" predicate.  */
-	    if (e.type_suffix (0).integer_p)
-	      if (e.type_suffix (0).unsigned_p)
-		code = code_for_mve_q_m (m_unspec_for_m_uint, m_unspec_for_m_uint, e.vector_mode (0));
-	      else
-		code = code_for_mve_q_m (m_unspec_for_m_sint, m_unspec_for_m_sint, e.vector_mode (0));
-	    else
-	      code = code_for_mve_q_m_f (m_unspec_for_m_fp, e.vector_mode (0));
-	    break;
-
-	  case MODE_n:
-	    /* _n suffix, "x" predicate.  */
-	    if (e.type_suffix (0).integer_p)
-	      if (e.type_suffix (0).unsigned_p)
-		code = code_for_mve_q_m_n (m_unspec_for_m_n_uint, m_unspec_for_m_n_uint, e.vector_mode (0));
-	      else
-		code = code_for_mve_q_m_n (m_unspec_for_m_n_sint, m_unspec_for_m_n_sint, e.vector_mode (0));
-	    else
-	      code = code_for_mve_q_m_n_f (m_unspec_for_m_n_fp, e.vector_mode (0));
-	    break;
-
-	  default:
-	    gcc_unreachable ();
-	  }
-	return e.use_pred_x_insn (code);
-
-      default:
-	gcc_unreachable ();
-      }
-
-    gcc_unreachable ();
+    return expand_unspec (e);
   }
 };
 
 /* Map the function directly to CODE (UNSPEC), when there is a
    non-predicated version and one with the "_p" predicate.  */
-class unspec_mve_function_exact_insn_pred_p : public function_base
+class unspec_mve_function_exact_insn_pred_p : public unspec_based_mve_function_base
 {
 public:
   CONSTEXPR unspec_mve_function_exact_insn_pred_p (int unspec_for_sint,
@@ -387,19 +303,23 @@ public:
 						   int unspec_for_p_sint,
 						   int unspec_for_p_uint,
 						   int unspec_for_p_fp)
-    : m_unspec_for_sint (unspec_for_sint),
-      m_unspec_for_uint (unspec_for_uint),
-      m_unspec_for_fp (unspec_for_fp),
+    : unspec_based_mve_function_base (UNKNOWN, /* No RTX code.  */
+				      UNKNOWN,
+				      UNKNOWN,
+				      unspec_for_sint,
+				      unspec_for_uint,
+				      unspec_for_fp,
+				      -1, -1, -1, /* No _n intrinsics.  */
+				      -1, -1, -1, /* No _m intrinsics.  */
+				      -1, -1, -1), /* No _m_n intrinsics.  */
       m_unspec_for_p_sint (unspec_for_p_sint),
       m_unspec_for_p_uint (unspec_for_p_uint),
       m_unspec_for_p_fp (unspec_for_p_fp)
   {}
 
-  /* The unspec code associated with signed-integer and unsigned-integer
-     operations, with no predicate, or with "_p" predicate.  */
-  int m_unspec_for_sint;
-  int m_unspec_for_uint;
-  int m_unspec_for_fp;
+  /* The unspec code associated with signed-integer and
+     unsigned-integer or floating-point operations with "_p"
+     predicate.  */
   int m_unspec_for_p_sint;
   int m_unspec_for_p_uint;
   int m_unspec_for_p_fp;
@@ -408,6 +328,7 @@ public:
   expand (function_expander &e) const override
   {
     insn_code code;
+    int unspec;
 
     if (m_unspec_for_sint == VADDLVQ_S
 	|| m_unspec_for_sint == VADDLVAQ_S
@@ -423,62 +344,49 @@ public:
 	switch (e.pred)
 	  {
 	  case PRED_none:
-	    if (e.type_suffix (0).unsigned_p)
-	      code = code_for_mve_q_v4si (m_unspec_for_uint, m_unspec_for_uint);
-	    else
-	      code = code_for_mve_q_v4si (m_unspec_for_sint, m_unspec_for_sint);
+	    unspec = (e.type_suffix (0).unsigned_p
+		      ? m_unspec_for_uint
+		      : m_unspec_for_sint);
+	    code = code_for_mve_q_v4si (unspec, unspec);
 	    return e.use_exact_insn (code);
 
 	  case PRED_p:
-	    if (e.type_suffix (0).unsigned_p)
-	      code = code_for_mve_q_p_v4si (m_unspec_for_p_uint, m_unspec_for_p_uint);
-	    else
-	      code = code_for_mve_q_p_v4si (m_unspec_for_p_sint, m_unspec_for_p_sint);
+	    unspec = (e.type_suffix (0).unsigned_p
+		      ? m_unspec_for_p_uint
+		      : m_unspec_for_p_sint);
+	    code = code_for_mve_q_p_v4si (unspec, unspec);
 	    return e.use_exact_insn (code);
 
 	  default:
 	    gcc_unreachable ();
 	  }
       }
-    else
-      {
-	switch (e.pred)
-	  {
-	  case PRED_none:
-	    if (e.type_suffix (0).integer_p)
-	      if (e.type_suffix (0).unsigned_p)
-		code = code_for_mve_q (m_unspec_for_uint, m_unspec_for_uint, e.vector_mode (0));
-	      else
-		code = code_for_mve_q (m_unspec_for_sint, m_unspec_for_sint, e.vector_mode (0));
-	    else
-	      code = code_for_mve_q_f (m_unspec_for_fp, e.vector_mode (0));
 
-	    return e.use_exact_insn (code);
-
-	  case PRED_p:
-	    if (e.type_suffix (0).integer_p)
-	      if (e.type_suffix (0).unsigned_p)
-		code = code_for_mve_q_p (m_unspec_for_p_uint, m_unspec_for_p_uint, e.vector_mode (0));
-	      else
-		code = code_for_mve_q_p (m_unspec_for_p_sint, m_unspec_for_p_sint, e.vector_mode (0));
-	    else
-	      code = code_for_mve_q_p_f (m_unspec_for_p_fp, e.vector_mode (0));
-
-	    return e.use_exact_insn (code);
+    if (e.pred == PRED_p)
+      {
+	machine_mode mode = e.vector_mode (0);
 
-	  default:
-	    gcc_unreachable ();
+	if (e.type_suffix (0).integer_p)
+	  {
+	    unspec = (e.type_suffix (0).unsigned_p
+		      ? m_unspec_for_p_uint
+		      : m_unspec_for_p_sint);
+	    code = code_for_mve_q_p (unspec, unspec, mode);
 	  }
+	else
+	  code = code_for_mve_q_p_f (m_unspec_for_p_fp, mode);
+
+	return e.use_exact_insn (code);
       }
 
-    gcc_unreachable ();
+    return expand_unspec (e);
   }
 };
 
 /* Map the function directly to CODE (UNSPEC, M) for vshl-like
    builtins. The difference with unspec_mve_function_exact_insn is
    that this function handles MODE_r and the related unspecs..  */
-class unspec_mve_function_exact_insn_vshl : public function_base
+class unspec_mve_function_exact_insn_vshl : public unspec_based_mve_function_base
 {
 public:
   CONSTEXPR unspec_mve_function_exact_insn_vshl (int unspec_for_sint,
@@ -493,31 +401,29 @@ public:
 						 int unspec_for_m_r_uint,
 						 int unspec_for_r_sint,
 						 int unspec_for_r_uint)
-    : m_unspec_for_sint (unspec_for_sint),
-      m_unspec_for_uint (unspec_for_uint),
-      m_unspec_for_n_sint (unspec_for_n_sint),
-      m_unspec_for_n_uint (unspec_for_n_uint),
-      m_unspec_for_m_sint (unspec_for_m_sint),
-      m_unspec_for_m_uint (unspec_for_m_uint),
-      m_unspec_for_m_n_sint (unspec_for_m_n_sint),
-      m_unspec_for_m_n_uint (unspec_for_m_n_uint),
+    : unspec_based_mve_function_base (UNKNOWN,
+				      UNKNOWN,
+				      UNKNOWN,
+				      unspec_for_sint,
+				      unspec_for_uint,
+				      -1,
+				      unspec_for_n_sint,
+				      unspec_for_n_uint,
+				      -1,
+				      unspec_for_m_sint,
+				      unspec_for_m_uint,
+				      -1,
+				      unspec_for_m_n_sint,
+				      unspec_for_m_n_uint,
+				      -1),
       m_unspec_for_m_r_sint (unspec_for_m_r_sint),
       m_unspec_for_m_r_uint (unspec_for_m_r_uint),
       m_unspec_for_r_sint (unspec_for_r_sint),
       m_unspec_for_r_uint (unspec_for_r_uint)
   {}
 
-  /* The unspec code associated with signed-integer, unsigned-integer
-     and floating-point operations respectively.  It covers the cases
-     with the _n suffix, and/or the _m predicate.  */
-  int m_unspec_for_sint;
-  int m_unspec_for_uint;
-  int m_unspec_for_n_sint;
-  int m_unspec_for_n_uint;
-  int m_unspec_for_m_sint;
-  int m_unspec_for_m_uint;
-  int m_unspec_for_m_n_sint;
-  int m_unspec_for_m_n_uint;
+  /* The unspec code associated with signed-integer and unsigned-integer
+     operations with MODE_r with or without PRED_m.  */
   int m_unspec_for_m_r_sint;
   int m_unspec_for_m_r_uint;
   int m_unspec_for_r_sint;
@@ -527,101 +433,147 @@ public:
   expand (function_expander &e) const override
   {
     insn_code code;
-    switch (e.pred)
+    int unspec;
+
+    if (e.mode_suffix_id == MODE_r)
       {
-      case PRED_none:
-	switch (e.mode_suffix_id)
+	machine_mode mode = e.vector_mode (0);
+	switch (e.pred)
 	  {
-	  case MODE_none:
-	    /* No predicate, no suffix.  */
-	    if (e.type_suffix (0).unsigned_p)
-	      code = code_for_mve_q (m_unspec_for_uint, m_unspec_for_uint, e.vector_mode (0));
-	    else
-	      code = code_for_mve_q (m_unspec_for_sint, m_unspec_for_sint, e.vector_mode (0));
-	    break;
-
-	  case MODE_n:
-	    /* No predicate, _n suffix.  */
-	    if (e.type_suffix (0).unsigned_p)
-	      code = code_for_mve_q_n (m_unspec_for_n_uint, m_unspec_for_n_uint, e.vector_mode (0));
-	    else
-	      code = code_for_mve_q_n (m_unspec_for_n_sint, m_unspec_for_n_sint, e.vector_mode (0));
-	    break;
-
-	  case MODE_r:
+	  case PRED_none:
 	    /* No predicate, _r suffix.  */
-	    if (e.type_suffix (0).unsigned_p)
-	      code = code_for_mve_q_r (m_unspec_for_r_uint, m_unspec_for_r_uint, e.vector_mode (0));
+	    unspec = (e.type_suffix (0).unsigned_p
+		      ? m_unspec_for_r_uint
+		      : m_unspec_for_r_sint);
+	    code = code_for_mve_q_r (unspec, unspec, mode);
+	    return e.use_exact_insn (code);
+
+	  case PRED_m:
+	  case PRED_x:
+	    /* _r suffix, "m" or "x" predicate.  */
+	    unspec = (e.type_suffix (0).unsigned_p
+		      ? m_unspec_for_m_r_uint
+		      : m_unspec_for_m_r_sint);
+	    code = code_for_mve_q_m_r (unspec, unspec, mode);
+
+	    if (e.pred == PRED_m)
+	      return e.use_cond_insn (code, 0);
 	    else
-	      code = code_for_mve_q_r (m_unspec_for_r_sint, m_unspec_for_r_sint, e.vector_mode (0));
-	    break;
+	      return e.use_pred_x_insn (code);
 
 	  default:
 	    gcc_unreachable ();
 	  }
-	return e.use_exact_insn (code);
+      }
 
-      case PRED_m:
-	switch (e.mode_suffix_id)
-	  {
-	  case MODE_none:
-	    /* No suffix, "m" predicate.  */
-	    if (e.type_suffix (0).unsigned_p)
-	      code = code_for_mve_q_m (m_unspec_for_m_uint, m_unspec_for_m_uint, e.vector_mode (0));
-	    else
-	      code = code_for_mve_q_m (m_unspec_for_m_sint, m_unspec_for_m_sint, e.vector_mode (0));
-	    break;
+      return expand_unspec (e);
+  }
+};
 
-	  case MODE_n:
-	    /* _n suffix, "m" predicate.  */
-	    if (e.type_suffix (0).unsigned_p)
-	      code = code_for_mve_q_m_n (m_unspec_for_m_n_uint, m_unspec_for_m_n_uint, e.vector_mode (0));
-	    else
-	      code = code_for_mve_q_m_n (m_unspec_for_m_n_sint, m_unspec_for_m_n_sint, e.vector_mode (0));
-	    break;
+/* Map the function directly to CODE (M) for vbic-like builtins. The difference
+   with unspec_based_mve_function_exact_insn is that this function has vbic
+   hardcoded for the PRED_none, MODE_none version, rather than using an
+   RTX.  */
+class unspec_based_mve_function_exact_insn_vbic : public unspec_based_mve_function_base
+{
+public:
+  CONSTEXPR unspec_based_mve_function_exact_insn_vbic (int unspec_for_n_sint,
+						       int unspec_for_n_uint,
+						       int unspec_for_m_sint,
+						       int unspec_for_m_uint,
+						       int unspec_for_m_fp,
+						       int unspec_for_m_n_sint,
+						       int unspec_for_m_n_uint)
+    : unspec_based_mve_function_base (UNKNOWN,
+				      UNKNOWN,
+				      UNKNOWN,
+				      -1, -1, -1, /* No non-predicated, no mode intrinsics.  */
+				      unspec_for_n_sint,
+				      unspec_for_n_uint,
+				      -1,
+				      unspec_for_m_sint,
+				      unspec_for_m_uint,
+				      unspec_for_m_fp,
+				      unspec_for_m_n_sint,
+				      unspec_for_m_n_uint,
+				      -1)
+  {}
 
-	  case MODE_r:
-	    /* _r suffix, "m" predicate.  */
-	    if (e.type_suffix (0).unsigned_p)
-	      code = code_for_mve_q_m_r (m_unspec_for_m_r_uint, m_unspec_for_m_r_uint, e.vector_mode (0));
+  rtx
+  expand (function_expander &e) const override
+  {
+    machine_mode mode = e.vector_mode (0);
+    insn_code code;
+
+    /* No suffix, no predicate, use the right RTX code.  */
+    if (e.pred == PRED_none
+	&& e.mode_suffix_id == MODE_none)
+	  {
+	    if (e.type_suffix (0).integer_p)
+	      if (e.type_suffix (0).unsigned_p)
+		code = code_for_mve_vbicq_u (mode);
+	      else
+		code = code_for_mve_vbicq_s (mode);
 	    else
-	      code = code_for_mve_q_m_r (m_unspec_for_m_r_sint, m_unspec_for_m_r_sint, e.vector_mode (0));
-	    break;
+	      code = code_for_mve_vbicq_f (mode);
 
-	  default:
-	    gcc_unreachable ();
+	    return e.use_exact_insn (code);
 	  }
-	return e.use_cond_insn (code, 0);
 
-      case PRED_x:
-	switch (e.mode_suffix_id)
-	  {
-	  case MODE_none:
-	    /* No suffix, "x" predicate.  */
-	    if (e.type_suffix (0).unsigned_p)
-	      code = code_for_mve_q_m (m_unspec_for_m_uint, m_unspec_for_m_uint, e.vector_mode (0));
-	    else
-	      code = code_for_mve_q_m (m_unspec_for_m_sint, m_unspec_for_m_sint, e.vector_mode (0));
-	    break;
+    return expand_unspec (e);
+  }
+};
 
-	  case MODE_n:
-	    /* _n suffix, "x" predicate.  */
-	    if (e.type_suffix (0).unsigned_p)
-	      code = code_for_mve_q_m_n (m_unspec_for_m_n_uint, m_unspec_for_m_n_uint, e.vector_mode (0));
-	    else
-	      code = code_for_mve_q_m_n (m_unspec_for_m_n_sint, m_unspec_for_m_n_sint, e.vector_mode (0));
-	    break;
+/* Map the function directly to CODE (M) for vorn-like builtins. The difference
+   with unspec_based_mve_function_exact_insn is that this function has vbic
+   hardcoded for the PRED_none, MODE_none version, rather than using an
+   RTX.  */
+class unspec_based_mve_function_exact_insn_vorn : public unspec_based_mve_function_base
+{
+public:
+  CONSTEXPR unspec_based_mve_function_exact_insn_vorn (int unspec_for_n_sint,
+						       int unspec_for_n_uint,
+						       int unspec_for_m_sint,
+						       int unspec_for_m_uint,
+						       int unspec_for_m_fp,
+						       int unspec_for_m_n_sint,
+						       int unspec_for_m_n_uint)
+    : unspec_based_mve_function_base (UNKNOWN,
+				      UNKNOWN,
+				      UNKNOWN,
+				      -1, -1, -1, /* No non-predicated, no mode unspec intrinsics.  */
+				      unspec_for_n_sint,
+				      unspec_for_n_uint,
+				      -1,
+				      unspec_for_m_sint,
+				      unspec_for_m_uint,
+				      unspec_for_m_fp,
+				      unspec_for_m_n_sint,
+				      unspec_for_m_n_uint,
+				      -1)
+  {}
 
-	  default:
-	    gcc_unreachable ();
-	  }
-	return e.use_pred_x_insn (code);
+  rtx
+  expand (function_expander &e) const override
+  {
+    machine_mode mode = e.vector_mode (0);
+    insn_code code;
 
-      default:
-	gcc_unreachable ();
+    /* No suffix, no predicate, use the right RTX code.  */
+    if (e.pred == PRED_none
+	&& e.mode_suffix_id == MODE_none)
+      {
+	if (e.type_suffix (0).integer_p)
+	  if (e.type_suffix (0).unsigned_p)
+	    code = code_for_mve_vornq_u (mode);
+	  else
+	    code = code_for_mve_vornq_s (mode);
+	else
+	  code = code_for_mve_vornq_f (mode);
+	return e.use_exact_insn (code);
       }
 
-    gcc_unreachable ();
+    return expand_unspec (e);
   }
 };
 
@@ -641,9 +593,8 @@ public:
     : unspec_based_mve_function_base (code_for_sint,
 				      code_for_uint,
 				      code_for_fp,
-				      -1,
-				      -1,
-				      -1,
+				      -1, -1, -1, /* No non-predicated, no mode intrinsics.  */
+				      -1, -1, -1, /* No _n intrinsics.  */
 				      unspec_for_m_sint,
 				      unspec_for_m_uint,
 				      unspec_for_m_fp,
@@ -662,24 +613,30 @@ public:
     /* No suffix, no predicate, use the right RTX code.  */
     if (e.pred == PRED_none)
       {
+	rtx_code r_code;
+
 	switch (e.mode_suffix_id)
 	  {
 	  case MODE_none:
 	    if (e.type_suffix (0).integer_p)
-	      if (e.type_suffix (0).unsigned_p)
-		code = code_for_mve_vcmpq (m_code_for_uint, mode);
-	      else
-		code = code_for_mve_vcmpq (m_code_for_sint, mode);
+	      {
+		r_code = (e.type_suffix (0).unsigned_p
+			  ? m_code_for_uint
+			  : m_code_for_sint);
+		code = code_for_mve_vcmpq (r_code, mode);
+	      }
 	    else
 	      code = code_for_mve_vcmpq_f (m_code_for_fp, mode);
 	    break;
 
 	  case MODE_n:
 	    if (e.type_suffix (0).integer_p)
-	      if (e.type_suffix (0).unsigned_p)
-		code = code_for_mve_vcmpq_n (m_code_for_uint, mode);
-	      else
-		code = code_for_mve_vcmpq_n (m_code_for_sint, mode);
+	      {
+		r_code = (e.type_suffix (0).unsigned_p
+			  ? m_code_for_uint
+			  : m_code_for_sint);
+		code = code_for_mve_vcmpq_n (r_code, mode);
+	      }
 	    else
 	      code = code_for_mve_vcmpq_n_f (m_code_for_fp, mode);
 	    break;
@@ -691,6 +648,8 @@ public:
       }
     else
       {
+	int unspec;
+
 	switch (e.pred)
 	  {
 	  case PRED_m:
@@ -699,10 +658,12 @@ public:
 	      case MODE_none:
 		/* No suffix, "m" predicate.  */
 		if (e.type_suffix (0).integer_p)
-		  if (e.type_suffix (0).unsigned_p)
-		    code = code_for_mve_vcmpq_m (m_unspec_for_m_uint, m_unspec_for_m_uint, mode);
-		  else
-		    code = code_for_mve_vcmpq_m (m_unspec_for_m_sint, m_unspec_for_m_sint, mode);
+		  {
+		    unspec = (e.type_suffix (0).unsigned_p
+			      ? m_unspec_for_m_uint
+			      : m_unspec_for_m_sint);
+		    code = code_for_mve_vcmpq_m (unspec, unspec, mode);
+		  }
 		else
 		  code = code_for_mve_vcmpq_m_f (m_unspec_for_m_fp, mode);
 		break;
@@ -710,10 +671,12 @@ public:
 	      case MODE_n:
 		/* _n suffix, "m" predicate.  */
 		if (e.type_suffix (0).integer_p)
-		  if (e.type_suffix (0).unsigned_p)
-		    code = code_for_mve_vcmpq_m_n (m_unspec_for_m_n_uint, m_unspec_for_m_n_uint, mode);
-		  else
-		    code = code_for_mve_vcmpq_m_n (m_unspec_for_m_n_sint, m_unspec_for_m_n_sint, mode);
+		  {
+		    unspec = (e.type_suffix (0).unsigned_p
+			      ? m_unspec_for_m_n_uint
+			      : m_unspec_for_m_n_sint);
+		    code = code_for_mve_vcmpq_m_n (unspec, unspec, mode);
+		  }
 		else
 		  code = code_for_mve_vcmpq_m_n_f (m_unspec_for_m_n_fp, mode);
 		break;
@@ -738,7 +701,9 @@ public:
 /* Map the function directly to CODE (UNSPEC, UNSPEC, UNSPEC, M) where
    M is the vector mode associated with type suffix 0.  USed for the
    operations where there is a "rot90" or "rot270" suffix, depending
-   on the UNSPEC.  */
+   on the UNSPEC.  We cannot use
+   unspec_based_mve_function_base::expand_unspec () because we call
+   code_for_mve_q with one more parameter.  */
 class unspec_mve_function_exact_insn_rot : public function_base
 {
 public:
@@ -769,7 +734,9 @@ public:
   rtx
   expand (function_expander &e) const override
   {
+    machine_mode mode = e.vector_mode (0);
     insn_code code;
+    int unspec;
 
     switch (e.pred)
       {
@@ -779,12 +746,14 @@ public:
 	  case MODE_none:
 	    /* No predicate, no suffix.  */
 	    if (e.type_suffix (0).integer_p)
-	      if (e.type_suffix (0).unsigned_p)
-		code = code_for_mve_q (m_unspec_for_uint, m_unspec_for_uint, m_unspec_for_uint, e.vector_mode (0));
-	      else
-		code = code_for_mve_q (m_unspec_for_sint, m_unspec_for_sint, m_unspec_for_sint, e.vector_mode (0));
+	      {
+		unspec = (e.type_suffix (0).unsigned_p
+			  ? m_unspec_for_uint
+			  : m_unspec_for_sint);
+		code = code_for_mve_q (unspec, unspec, unspec, mode);
+	      }
 	    else
-	      code = code_for_mve_q_f (m_unspec_for_fp, m_unspec_for_fp, e.vector_mode (0));
+	      code = code_for_mve_q_f (m_unspec_for_fp, m_unspec_for_fp, mode);
 	    break;
 
 	  default:
@@ -793,42 +762,30 @@ public:
 	return e.use_exact_insn (code);
 
       case PRED_m:
+      case PRED_x:
 	switch (e.mode_suffix_id)
 	  {
 	  case MODE_none:
-	    /* No suffix, "m" predicate.  */
+	    /* No suffix, "m" or "x" predicate.  */
 	    if (e.type_suffix (0).integer_p)
-	      if (e.type_suffix (0).unsigned_p)
-		code = code_for_mve_q_m (m_unspec_for_m_uint, m_unspec_for_m_uint, m_unspec_for_m_uint, e.vector_mode (0));
-	      else
-		code = code_for_mve_q_m (m_unspec_for_m_sint, m_unspec_for_m_sint, m_unspec_for_m_sint, e.vector_mode (0));
+	      {
+		unspec = (e.type_suffix (0).unsigned_p
+			  ? m_unspec_for_m_uint
+			  : m_unspec_for_m_sint);
+		code = code_for_mve_q_m (unspec, unspec, unspec, mode);
+	      }
 	    else
-	      code = code_for_mve_q_m_f (m_unspec_for_m_fp, m_unspec_for_m_fp, e.vector_mode (0));
-	    break;
-
-	  default:
-	    gcc_unreachable ();
-	  }
-	return e.use_cond_insn (code, 0);
+	      code = code_for_mve_q_m_f (m_unspec_for_m_fp, m_unspec_for_m_fp, mode);
 
-      case PRED_x:
-	switch (e.mode_suffix_id)
-	  {
-	  case MODE_none:
-	    /* No suffix, "x" predicate.  */
-	    if (e.type_suffix (0).integer_p)
-	      if (e.type_suffix (0).unsigned_p)
-		code = code_for_mve_q_m (m_unspec_for_m_uint, m_unspec_for_m_uint, m_unspec_for_m_uint, e.vector_mode (0));
-	      else
-		code = code_for_mve_q_m (m_unspec_for_m_sint, m_unspec_for_m_sint, m_unspec_for_m_sint, e.vector_mode (0));
+	    if (e.pred == PRED_m)
+	      return e.use_cond_insn (code, 0);
 	    else
-	      code = code_for_mve_q_m_f (m_unspec_for_m_fp, m_unspec_for_m_fp, e.vector_mode (0));
+	      return e.use_pred_x_insn (code);
 	    break;
 
 	  default:
 	    gcc_unreachable ();
 	  }
-	return e.use_pred_x_insn (code);
 
       default:
 	gcc_unreachable ();
@@ -866,7 +823,9 @@ public:
   rtx
   expand (function_expander &e) const override
   {
+    machine_mode mode = e.vector_mode (0);
     insn_code code;
+    int unspec;
 
     if (! e.type_suffix (0).integer_p)
       gcc_unreachable ();
@@ -878,30 +837,25 @@ public:
       {
       case PRED_none:
 	/* No predicate, no suffix.  */
-	if (e.type_suffix (0).unsigned_p)
-	  code = code_for_mve_q_int (m_unspec_for_uint, m_unspec_for_uint, e.vector_mode (0));
-	else
-	  code = code_for_mve_q_int (m_unspec_for_sint, m_unspec_for_sint, e.vector_mode (0));
+	unspec = (e.type_suffix (0).unsigned_p
+		  ? m_unspec_for_uint
+		  : m_unspec_for_sint);
+	code = code_for_mve_q_int (unspec, unspec, mode);
 
 	return e.use_exact_insn (code);
 
       case PRED_m:
-	/* No suffix, "m" predicate.  */
-	if (e.type_suffix (0).unsigned_p)
-	  code = code_for_mve_q_int_m (m_unspec_for_m_uint, m_unspec_for_m_uint, e.vector_mode (0));
-	else
-	  code = code_for_mve_q_int_m (m_unspec_for_m_sint, m_unspec_for_m_sint, e.vector_mode (0));
-
-	return e.use_cond_insn (code, 0);
-
       case PRED_x:
-	/* No suffix, "x" predicate.  */
-	if (e.type_suffix (0).unsigned_p)
-	  code = code_for_mve_q_int_m (m_unspec_for_m_uint, m_unspec_for_m_uint, e.vector_mode (0));
+	/* No suffix, "m" or "x" predicate.  */
+	unspec = (e.type_suffix (0).unsigned_p
+		  ? m_unspec_for_m_uint
+		  : m_unspec_for_m_sint);
+	code = code_for_mve_q_int_m (unspec, unspec, mode);
+
+	if (e.pred == PRED_m)
+	  return e.use_cond_insn (code, 0);
 	else
-	  code = code_for_mve_q_int_m (m_unspec_for_m_sint, m_unspec_for_m_sint, e.vector_mode (0));
-
-	return e.use_pred_x_insn (code);
+	  return e.use_pred_x_insn (code);
 
       default:
 	gcc_unreachable ();
@@ -933,6 +887,7 @@ public:
   rtx
   expand (function_expander &e) const override
   {
+    machine_mode mode = e.vector_mode (0);
     insn_code code;
 
     if (e.mode_suffix_id != MODE_none)
@@ -945,18 +900,18 @@ public:
       {
       case PRED_none:
 	/* No predicate, no suffix.  */
-	code = code_for_mve_q_poly (m_unspec_for_poly, m_unspec_for_poly, e.vector_mode (0));
+	code = code_for_mve_q_poly (m_unspec_for_poly, m_unspec_for_poly, mode);
 	return e.use_exact_insn (code);
 
       case PRED_m:
-	/* No suffix, "m" predicate.  */
-	code = code_for_mve_q_poly_m (m_unspec_for_m_poly, m_unspec_for_m_poly, e.vector_mode (0));
-	return e.use_cond_insn (code, 0);
-
       case PRED_x:
-	/* No suffix, "x" predicate.  */
-	code = code_for_mve_q_poly_m (m_unspec_for_m_poly, m_unspec_for_m_poly, e.vector_mode (0));
-	return e.use_pred_x_insn (code);
+	/* No suffix, "m" or "x" predicate.  */
+	code = code_for_mve_q_poly_m (m_unspec_for_m_poly, m_unspec_for_m_poly, mode);
+
+	if (e.pred == PRED_m)
+	  return e.use_cond_insn (code, 0);
+	else
+	  return e.use_pred_x_insn (code);
 
       default:
 	gcc_unreachable ();
@@ -1003,19 +958,6 @@ public:
   memory_vector_mode (const function_instance &fi) const override
   {
     machine_mode mode = fi.vector_mode (0);
-    /* Vectors of floating-point are managed in memory as vectors of
-       integers.  */
-    switch (mode)
-      {
-      case E_V4SFmode:
-	mode = E_V4SImode;
-	break;
-      case E_V8HFmode:
-	mode = E_V8HImode;
-	break;
-      default:
-	break;
-      }
 
     if (m_vectors_per_tuple != 1)
       mode = targetm.array_mode (mode, m_vectors_per_tuple).require ();
@@ -1024,6 +966,107 @@ public:
   }
 };
 
+/* A function_base that loads elements from memory and extends them
+   to a wider element.  The memory element type is a fixed part of
+   the function base name.  */
+class load_extending : public function_base
+{
+public:
+  CONSTEXPR load_extending (type_suffix_index signed_memory_type,
+			    type_suffix_index unsigned_memory_type,
+			    type_suffix_index float_memory_type)
+    : m_signed_memory_type (signed_memory_type),
+      m_unsigned_memory_type (unsigned_memory_type),
+      m_float_memory_type (float_memory_type)
+  {}
+  CONSTEXPR load_extending (type_suffix_index signed_memory_type,
+			    type_suffix_index unsigned_memory_type)
+    : m_signed_memory_type (signed_memory_type),
+      m_unsigned_memory_type (unsigned_memory_type),
+      m_float_memory_type (NUM_TYPE_SUFFIXES)
+  {}
+
+  unsigned int call_properties (const function_instance &) const override
+  {
+    return CP_READ_MEMORY;
+  }
+
+  tree memory_scalar_type (const function_instance &fi) const override
+  {
+    type_suffix_index memory_type_suffix
+      = (fi.type_suffix (0).integer_p
+	 ? (fi.type_suffix (0).unsigned_p
+	    ? m_unsigned_memory_type
+	    : m_signed_memory_type)
+	 : m_float_memory_type);
+    return scalar_types[type_suffixes[memory_type_suffix].vector_type];
+  }
+
+  machine_mode memory_vector_mode (const function_instance &fi) const override
+  {
+    type_suffix_index memory_type_suffix
+      = (fi.type_suffix (0).integer_p
+	 ? (fi.type_suffix (0).unsigned_p
+	    ? m_unsigned_memory_type
+	    : m_signed_memory_type)
+	 : m_float_memory_type);
+    machine_mode mem_mode = type_suffixes[memory_type_suffix].vector_mode;
+    machine_mode reg_mode = fi.vector_mode (0);
+
+    return arm_mve_data_mode (GET_MODE_INNER (mem_mode),
+			      GET_MODE_NUNITS (reg_mode)).require ();
+  }
+
+  /* The type of the memory elements.  This is part of the function base
+     name rather than a true type suffix.  */
+  type_suffix_index m_signed_memory_type;
+  type_suffix_index m_unsigned_memory_type;
+  type_suffix_index m_float_memory_type;
+};
+
+/* A function_base that truncates vector elements and stores them to memory.
+   The memory element width is a fixed part of the function base name.  */
+class store_truncating : public function_base
+{
+public:
+  CONSTEXPR store_truncating (scalar_mode to_int_mode,
+			      opt_scalar_mode to_float_mode)
+    : m_to_int_mode (to_int_mode), m_to_float_mode (to_float_mode)
+  {}
+
+  unsigned int call_properties (const function_instance &) const override
+  {
+    return CP_WRITE_MEMORY;
+  }
+
+  tree memory_scalar_type (const function_instance &fi) const override
+  {
+    /* In truncating stores, the signedness of the memory element is defined
+       to be the same as the signedness of the vector element.  The signedness
+       doesn't make any difference to the behavior of the function.  */
+    type_class_index tclass = fi.type_suffix (0).tclass;
+    unsigned int element_bits
+      = GET_MODE_BITSIZE (fi.type_suffix (0).integer_p
+			  ? m_to_int_mode
+			  : m_to_float_mode.require ());
+    type_suffix_index suffix = find_type_suffix (tclass, element_bits);
+    return scalar_types[type_suffixes[suffix].vector_type];
+  }
+
+  machine_mode memory_vector_mode (const function_instance &fi) const override
+  {
+    poly_uint64 nunits = GET_MODE_NUNITS (fi.vector_mode (0));
+    scalar_mode mode = (fi.type_suffix (0).integer_p
+			? m_to_int_mode
+			: m_to_float_mode.require ());
+    return arm_mve_data_mode (mode, nunits).require ();
+  }
+
+  /* The mode of a single memory element.  */
+  scalar_mode m_to_int_mode;
+  opt_scalar_mode m_to_float_mode;
+};
+
 } /* end namespace arm_mve */
 
 /* Declare the global function base NAME, creating it from an instance
diff --git a/gcc/config/arm/arm-mve-builtins-shapes.cc b/gcc/config/arm/arm-mve-builtins-shapes.cc
index ba20c6a..aeb1453 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.cc
+++ b/gcc/config/arm/arm-mve-builtins-shapes.cc
@@ -1,5 +1,5 @@
 /* ACLE support for Arm MVE (function shapes)
-   Copyright (C) 2023-2024 Free Software Foundation, Inc.
+   Copyright (C) 2023-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -150,6 +150,7 @@ parse_element_type (const function_instance &instance, const char *&format)
    _       - void
    al      - array pointer for loads
    as      - array pointer for stores
+   b       - pointer to vector of unsigned, width given by the first type suffix
    p       - predicates with type mve_pred16_t
    s<elt>  - a scalar type with the given element suffix
    t<elt>  - a vector or tuple type with given element suffix [*1]
@@ -181,6 +182,15 @@ parse_type (const function_instance &instance, const char *&format)
       gcc_unreachable ();
     }
 
+  if (ch == 'b')
+    {
+      type_class_index tclass = TYPE_unsigned;
+      unsigned int bits = instance.type_suffix (0).element_bits;
+      type_suffix_index suffix = find_type_suffix (tclass, bits);
+      tree acle_type = acle_vector_types[0][type_suffixes[suffix].vector_type];
+      return build_pointer_type (acle_type);
+    }
+
   if (ch == 'p')
     return get_mve_pred16_t ();
 
@@ -195,7 +205,7 @@ parse_type (const function_instance &instance, const char *&format)
       type_suffix_index suffix = parse_element_type (instance, format);
       vector_type_index vector_type = type_suffixes[suffix].vector_type;
       unsigned int num_vectors = instance.vectors_per_tuple ();
-      return acle_vector_types[num_vectors - 1][vector_type];
+      return acle_vector_types[num_vectors >> 1][vector_type];
     }
 
   if (ch == 'v')
@@ -320,6 +330,45 @@ build_16_32 (function_builder &b, const char *signature,
     }
 }
 
+/* TYPE is the largest type suffix associated with the arguments of R, but the
+   result is twice as wide.  Return the associated type suffix of
+   EXPECTED_TCLASS if it exists, otherwise report an appropriate error and
+   return NUM_TYPE_SUFFIXES.  */
+static type_suffix_index
+long_type_suffix (function_resolver &r,
+		  type_suffix_index type,
+		  type_class_index expected_tclass)
+{
+  unsigned int element_bits = type_suffixes[type].element_bits;
+  if (expected_tclass == function_resolver::SAME_TYPE_CLASS)
+    expected_tclass = type_suffixes[type].tclass;
+
+  if (type_suffixes[type].integer_p && element_bits < 64)
+    return find_type_suffix (expected_tclass, element_bits * 2);
+
+  r.report_no_such_form (type);
+  return NUM_TYPE_SUFFIXES;
+}
+
+/* Return the type suffix half as wide as TYPE with EXPECTED_TCLASS if it
+   exists, otherwise report an appropriate error and return
+   NUM_TYPE_SUFFIXES.  */
+static type_suffix_index
+half_type_suffix (function_resolver &r,
+		  type_suffix_index type,
+		  type_class_index expected_tclass)
+{
+  unsigned int element_bits = type_suffixes[type].element_bits;
+  if (expected_tclass == function_resolver::SAME_TYPE_CLASS)
+    expected_tclass = type_suffixes[type].tclass;
+
+  if (type_suffixes[type].integer_p && element_bits > 8)
+    return find_type_suffix (expected_tclass, element_bits / 2);
+
+  r.report_no_such_form (type);
+  return NUM_TYPE_SUFFIXES;
+}
+
 /* Declare the function shape NAME, pointing it to an instance
    of class <NAME>_def.  */
 #define SHAPE(NAME) \
@@ -330,7 +379,8 @@ build_16_32 (function_builder &b, const char *signature,
 struct nonoverloaded_base : public function_shape
 {
   bool
-  explicit_type_suffix_p (unsigned int, enum predication_index, enum mode_suffix_index) const override
+  explicit_type_suffix_p (unsigned int, enum predication_index,
+			  enum mode_suffix_index, type_suffix_info) const override
   {
     return true;
   }
@@ -347,6 +397,12 @@ struct nonoverloaded_base : public function_shape
     return false;
   }
 
+  bool
+  mode_after_pred () const override
+  {
+    return true;
+  }
+
   tree
   resolve (function_resolver &) const override
   {
@@ -360,7 +416,8 @@ template<unsigned int EXPLICIT_MASK>
 struct overloaded_base : public function_shape
 {
   bool
-  explicit_type_suffix_p (unsigned int i, enum predication_index, enum mode_suffix_index) const override
+  explicit_type_suffix_p (unsigned int i, enum predication_index,
+			  enum mode_suffix_index, type_suffix_info) const override
   {
     return (EXPLICIT_MASK >> i) & 1;
   }
@@ -376,6 +433,12 @@ struct overloaded_base : public function_shape
   {
     return false;
   }
+
+  bool
+  mode_after_pred () const override
+  {
+    return true;
+  }
 };
 
 /* <T0>_t vfoo[_t0](<T0>_t, <T0>_t)
@@ -475,18 +538,23 @@ struct binary_acca_int32_def : public overloaded_base<0>
   {
     unsigned int i, nargs;
     type_suffix_index type;
+    const char *first_type_name;
+
     if (!r.check_gp_argument (3, i, nargs)
 	|| (type = r.infer_vector_type (1)) == NUM_TYPE_SUFFIXES)
       return error_mark_node;
 
+    first_type_name = (type_suffixes[type].unsigned_p
+		       ? "uint32_t"
+		       : "int32_t");
+    if (!r.require_scalar_type (0, first_type_name))
+      return error_mark_node;
+
     unsigned int last_arg = i + 1;
     for (i = 1; i < last_arg; i++)
       if (!r.require_matching_vector_type (i, type))
 	return error_mark_node;
 
-    if (!r.require_integer_immediate (0))
-      return error_mark_node;
-
     return r.resolve_to (r.mode_suffix_id, type);
   }
 };
@@ -512,18 +580,24 @@ struct binary_acca_int64_def : public overloaded_base<0>
   {
     unsigned int i, nargs;
     type_suffix_index type;
+    const char *first_type_name;
+
     if (!r.check_gp_argument (3, i, nargs)
 	|| (type = r.infer_vector_type (1)) == NUM_TYPE_SUFFIXES)
       return error_mark_node;
 
+
+    first_type_name = (type_suffixes[type].unsigned_p
+		       ? "uint64_t"
+		       : "int64_t");
+    if (!r.require_scalar_type (0, first_type_name))
+      return error_mark_node;
+
     unsigned int last_arg = i + 1;
     for (i = 1; i < last_arg; i++)
       if (!r.require_matching_vector_type (i, type))
 	return error_mark_node;
 
-    if (!r.require_integer_immediate (0))
-      return error_mark_node;
-
     return r.resolve_to (r.mode_suffix_id, type);
   }
 };
@@ -611,7 +685,7 @@ struct binary_lshift_unsigned_def : public overloaded_base<0>
 	 bool preserve_user_namespace) const override
   {
     b.add_overloaded_functions (group, MODE_n, preserve_user_namespace);
-    build_all (b, "vu0,vs0,ss32", group, MODE_n, preserve_user_namespace);
+    build_all (b, "vu0,vs0,su64", group, MODE_n, preserve_user_namespace);
   }
 
   tree
@@ -620,6 +694,7 @@ struct binary_lshift_unsigned_def : public overloaded_base<0>
     unsigned int i, nargs;
     type_suffix_index type;
     if (!r.check_gp_argument (2, i, nargs)
+	|| !r.require_integer_immediate (i)
 	|| (type = r.infer_vector_type (i-1)) == NUM_TYPE_SUFFIXES)
       return error_mark_node;
 
@@ -634,10 +709,6 @@ struct binary_lshift_unsigned_def : public overloaded_base<0>
 	  return error_mark_node;
       }
 
-    for (; i < nargs; ++i)
-      if (!r.require_integer_immediate (i))
-	return error_mark_node;
-
     return r.resolve_to (r.mode_suffix_id, type);
   }
 
@@ -769,16 +840,13 @@ struct binary_move_narrow_def : public overloaded_base<0>
   resolve (function_resolver &r) const override
   {
     unsigned int i, nargs;
-    type_suffix_index type;
+    type_suffix_index type, narrow_suffix;
     if (!r.check_gp_argument (2, i, nargs)
-	|| (type = r.infer_vector_type (1)) == NUM_TYPE_SUFFIXES)
+	|| (type = r.infer_vector_type (1)) == NUM_TYPE_SUFFIXES
+	|| ((narrow_suffix = half_type_suffix (r, type, r.SAME_TYPE_CLASS))
+	    == NUM_TYPE_SUFFIXES))
       return error_mark_node;
 
-    type_suffix_index narrow_suffix
-      = find_type_suffix (type_suffixes[type].tclass,
-			  type_suffixes[type].element_bits / 2);
-
-
     if (!r.require_matching_vector_type (0, narrow_suffix))
       return error_mark_node;
 
@@ -806,15 +874,13 @@ struct binary_move_narrow_unsigned_def : public overloaded_base<0>
   resolve (function_resolver &r) const override
   {
     unsigned int i, nargs;
-    type_suffix_index type;
+    type_suffix_index type, narrow_suffix;
     if (!r.check_gp_argument (2, i, nargs)
-	|| (type = r.infer_vector_type (1)) == NUM_TYPE_SUFFIXES)
+	|| (type = r.infer_vector_type (1)) == NUM_TYPE_SUFFIXES
+	|| ((narrow_suffix = half_type_suffix (r, type, TYPE_unsigned))
+	    == NUM_TYPE_SUFFIXES))
       return error_mark_node;
 
-    type_suffix_index narrow_suffix
-      = find_type_suffix (TYPE_unsigned,
-			  type_suffixes[type].element_bits / 2);
-
     if (!r.require_matching_vector_type (0, narrow_suffix))
       return error_mark_node;
 
@@ -865,7 +931,12 @@ SHAPE (binary_opt_n)
    int16x8_t [__arm_]vorrq_m[_s16](int16x8_t inactive, int16x8_t a, int16x8_t b, mve_pred16_t p)
    int16x8_t [__arm_]vorrq_x[_s16](int16x8_t a, int16x8_t b, mve_pred16_t p)
    int16x8_t [__arm_]vorrq[_n_s16](int16x8_t a, const int16_t imm)
-   int16x8_t [__arm_]vorrq_m_n[_s16](int16x8_t a, const int16_t imm, mve_pred16_t p)  */
+   int16x8_t [__arm_]vorrq_m_n[_s16](int16x8_t a, const int16_t imm, mve_pred16_t p)
+
+   No "_n" forms for floating-point, nor 8-bit integers:
+   float16x8_t [__arm_]vorrq[_f16](float16x8_t a, float16x8_t b)
+   float16x8_t [__arm_]vorrq_m[_f16](float16x8_t inactive, float16x8_t a, float16x8_t b, mve_pred16_t p)
+   float16x8_t [__arm_]vorrq_x[_f16](float16x8_t a, float16x8_t b, mve_pred16_t p)  */
 struct binary_orrq_def : public overloaded_base<0>
 {
   bool
@@ -1090,23 +1161,21 @@ struct binary_rshift_narrow_def : public overloaded_base<0>
 	 bool preserve_user_namespace) const override
   {
     b.add_overloaded_functions (group, MODE_n, preserve_user_namespace);
-    build_all (b, "vh0,vh0,v0,ss32", group, MODE_n, preserve_user_namespace);
+    build_all (b, "vh0,vh0,v0,su64", group, MODE_n, preserve_user_namespace);
   }
 
   tree
   resolve (function_resolver &r) const override
   {
     unsigned int i, nargs;
-    type_suffix_index type;
+    type_suffix_index type, narrow_suffix;
     if (!r.check_gp_argument (3, i, nargs)
 	|| (type = r.infer_vector_type (1)) == NUM_TYPE_SUFFIXES
+	|| ((narrow_suffix = half_type_suffix (r, type, r.SAME_TYPE_CLASS))
+	    == NUM_TYPE_SUFFIXES)
 	|| !r.require_integer_immediate (i))
       return error_mark_node;
 
-    type_suffix_index narrow_suffix
-      = find_type_suffix (type_suffixes[type].tclass,
-			  type_suffixes[type].element_bits / 2);
-
     if (!r.require_matching_vector_type (0, narrow_suffix))
       return error_mark_node;
 
@@ -1137,23 +1206,21 @@ struct binary_rshift_narrow_unsigned_def : public overloaded_base<0>
 	 bool preserve_user_namespace) const override
   {
     b.add_overloaded_functions (group, MODE_n, preserve_user_namespace);
-    build_all (b, "vhu0,vhu0,v0,ss32", group, MODE_n, preserve_user_namespace);
+    build_all (b, "vhu0,vhu0,v0,su64", group, MODE_n, preserve_user_namespace);
   }
 
   tree
   resolve (function_resolver &r) const override
   {
     unsigned int i, nargs;
-    type_suffix_index type;
+    type_suffix_index type, narrow_suffix;
     if (!r.check_gp_argument (3, i, nargs)
 	|| (type = r.infer_vector_type (1)) == NUM_TYPE_SUFFIXES
+	|| ((narrow_suffix = half_type_suffix (r, type, TYPE_unsigned))
+	    == NUM_TYPE_SUFFIXES)
 	|| !r.require_integer_immediate (i))
       return error_mark_node;
 
-    type_suffix_index narrow_suffix
-      = find_type_suffix (TYPE_unsigned,
-			  type_suffixes[type].element_bits / 2);
-
     if (!r.require_matching_vector_type (0, narrow_suffix))
       return error_mark_node;
 
@@ -1190,15 +1257,13 @@ struct binary_widen_def : public overloaded_base<0>
   resolve (function_resolver &r) const override
   {
     unsigned int i, nargs;
-    type_suffix_index type;
+    type_suffix_index type, wide_suffix;
     if (!r.check_gp_argument (2, i, nargs)
-	|| (type = r.infer_vector_type (i - 1)) == NUM_TYPE_SUFFIXES)
+	|| (type = r.infer_vector_type (i - 1)) == NUM_TYPE_SUFFIXES
+	|| ((wide_suffix = long_type_suffix (r, type, r.SAME_TYPE_CLASS))
+	    == NUM_TYPE_SUFFIXES))
       return error_mark_node;
 
-    type_suffix_index wide_suffix
-      = find_type_suffix (type_suffixes[type].tclass,
-			  type_suffixes[type].element_bits * 2);
-
     if (!r.require_matching_vector_type (i, type))
       return error_mark_node;
 
@@ -1283,17 +1348,15 @@ struct binary_widen_n_def : public overloaded_base<0>
   resolve (function_resolver &r) const override
   {
     unsigned int i, nargs;
-    type_suffix_index type;
+    type_suffix_index type, wide_suffix;
     tree res;
     if (!r.check_gp_argument (2, i, nargs)
 	|| (type = r.infer_vector_type (i - 1)) == NUM_TYPE_SUFFIXES
+	|| ((wide_suffix = long_type_suffix (r, type, r.SAME_TYPE_CLASS))
+	    == NUM_TYPE_SUFFIXES)
 	|| !r.require_integer_immediate (i))
       return error_mark_node;
 
-    type_suffix_index wide_suffix
-      = find_type_suffix (type_suffixes[type].tclass,
-			  type_suffixes[type].element_bits * 2);
-
     /* Check the inactive argument has the wide type.  */
     if (((r.pred == PRED_m) && (r.infer_vector_type (0) == wide_suffix))
 	|| r.pred == PRED_none
@@ -1337,15 +1400,13 @@ struct binary_widen_opt_n_def : public overloaded_base<0>
   resolve (function_resolver &r) const override
   {
     unsigned int i, nargs;
-    type_suffix_index type;
+    type_suffix_index type, wide_suffix;
     if (!r.check_gp_argument (2, i, nargs)
-	|| (type = r.infer_vector_type (i - 1)) == NUM_TYPE_SUFFIXES)
+	|| (type = r.infer_vector_type (i - 1)) == NUM_TYPE_SUFFIXES
+	|| ((wide_suffix = long_type_suffix (r, type, r.SAME_TYPE_CLASS))
+	    == NUM_TYPE_SUFFIXES))
       return error_mark_node;
 
-    type_suffix_index wide_suffix
-      = find_type_suffix (type_suffixes[type].tclass,
-			  type_suffixes[type].element_bits * 2);
-
     /* Skip last argument, may be scalar, will be checked below by
        finish_opt_n_resolution.  */
     unsigned int last_arg = i--;
@@ -1403,12 +1464,6 @@ struct create_def : public nonoverloaded_base
   {
     build_all (b, "v0,su64,su64", group, MODE_none, preserve_user_namespace);
   }
-
-  tree
-  resolve (function_resolver &r) const override
-  {
-    return r.resolve_uniform (0, 2);
-  }
 };
 SHAPE (create)
 
@@ -1428,7 +1483,9 @@ struct inherent_def : public nonoverloaded_base
 };
 SHAPE (inherent)
 
-/* sv<t0>_t svfoo[_t0](const <t0>_t *)
+/* <T0>_t vfoo[_t0](const <s0>_t *)
+
+   where <s0> is the scalar name of <T0>.
 
    Example: vld1q.
    int8x16_t [__arm_]vld1q[_s8](int8_t const *base)
@@ -1460,6 +1517,122 @@ struct load_def : public overloaded_base<0>
 };
 SHAPE (load)
 
+/* <T0>_t foo_t0 (const <X>_t *)
+
+   where <X> is determined by the function base name.
+
+   Example: vldrq.
+   int32x4_t [__arm_]vldrwq_s32 (int32_t const *base)
+   uint32x4_t [__arm_]vldrhq_z_u32 (uint16_t const *base, mve_pred16_t p)  */
+struct load_ext_def : public nonoverloaded_base
+{
+  void
+  build (function_builder &b, const function_group_info &group,
+	 bool preserve_user_namespace) const override
+  {
+    build_all (b, "t0,al", group, MODE_none, preserve_user_namespace);
+  }
+};
+SHAPE (load_ext)
+
+/* Base class for load_ext_gather_offset and load_ext_gather_shifted_offset,
+   which differ only in the units of the displacement.  */
+struct load_ext_gather : public overloaded_base<0>
+{
+  bool
+  explicit_mode_suffix_p (enum predication_index, enum mode_suffix_index) const override
+  {
+    return true;
+  }
+
+  bool
+  mode_after_pred () const override
+  {
+    return false;
+  }
+};
+
+/* <T0>_t vfoo[_t0](<X>_t, const int)
+
+   where <X> has the same width as <T0> but is of unsigned type.
+
+   Example: vldrwq_gather_base
+   int32x4_t [__arm_]vldrwq_gather_base_s32(uint32x4_t addr, const int offset)
+   float32x4_t [__arm_]vldrwq_gather_base_z_f32(uint32x4_t addr, const int offset, mve_pred16_t p)
+   int64x2_t [__arm_]vldrdq_gather_base_wb_s64(uint64x2_t *addr, const int offset)  */
+struct load_gather_base_def : public nonoverloaded_base
+{
+  bool
+  explicit_mode_suffix_p (enum predication_index, enum mode_suffix_index) const override
+  {
+    return true;
+  }
+
+  bool
+  mode_after_pred () const override
+  {
+    return false;
+  }
+
+  void
+  build (function_builder &b, const function_group_info &group,
+	 bool preserve_user_namespace) const override
+  {
+    build_all (b, "v0,vu0,ss64", group, MODE_none, preserve_user_namespace);
+    build_all (b, "v0,b,ss64", group, MODE_wb, preserve_user_namespace);
+  }
+
+  bool
+  check (function_checker &c) const override
+  {
+    unsigned int multiple = c.type_suffix (0).element_bits / 8;
+    int bound = 127 * multiple;
+    return c.require_immediate_range_multiple (1, -bound, bound, multiple);
+  }
+};
+SHAPE (load_gather_base)
+
+/* <T0>_t vfoo[_t0](<X>_t const *, <Y>_t)
+
+   where <X> might be tied to <t0> (for non-extending loads) or might
+   depend on the function base name (for extending loads),
+   <Y> has the same width as <T0> but is of unsigned type.
+
+   Example: vldrhq_gather_offset
+   int16x8_t [__arm_]vldrhq_gather_offset[_s16](int16_t const *base, uint16x8_t offset)
+   int32x4_t [__arm_]vldrhq_gather_offset_z[_s32](int16_t const *base, uint32x4_t offset, mve_pred16_t p)  */
+struct load_ext_gather_offset_def : public load_ext_gather
+{
+  void
+  build (function_builder &b, const function_group_info &group,
+	 bool preserve_user_namespace) const override
+  {
+    b.add_overloaded_functions (group, MODE_offset, preserve_user_namespace);
+    build_all (b, "v0,al,vu0", group, MODE_offset, preserve_user_namespace);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    unsigned int i, nargs;
+    mode_suffix_index mode = MODE_offset;
+    type_suffix_index ptr_type;
+    type_suffix_index offset_type;
+    if (!r.check_gp_argument (2, i, nargs)
+	|| (ptr_type = r.infer_pointer_type (0)) == NUM_TYPE_SUFFIXES
+	|| (offset_type = r.infer_vector_type (1)) == NUM_TYPE_SUFFIXES)
+      return error_mark_node;
+
+    /* tclass comes from base argument, element bits come from the offset
+       argument.  */
+    type_suffix_index type = find_type_suffix (type_suffixes[ptr_type].tclass,
+			       type_suffixes[offset_type].element_bits);
+
+    return r.resolve_to (mode, type);
+  }
+};
+SHAPE (load_ext_gather_offset)
+
 /* <T0>_t vfoo[_t0](<T0>_t)
    <T0>_t vfoo_n_t0(<sT0>_t)
 
@@ -1509,14 +1682,18 @@ struct mvn_def : public overloaded_base<0>
 };
 SHAPE (mvn)
 
-/* void vfoo[_t0](<X>_t *, v<t0>[xN]_t)
+/* void vfoo[_t0](<X>_t *, <T0>[xN]_t)
 
    where <X> might be tied to <t0> (for non-truncating stores) or might
    depend on the function base name (for truncating stores).
 
    Example: vst1q.
    void [__arm_]vst1q[_s8](int8_t *base, int8x16_t value)
-   void [__arm_]vst1q_p[_s8](int8_t *base, int8x16_t value, mve_pred16_t p)  */
+   void [__arm_]vst1q_p[_s8](int8_t *base, int8x16_t value, mve_pred16_t p)
+
+   Example: vstrb.
+   void [__arm_]vstrbq[_s16](int8_t *base, int16x8_t value)
+   void [__arm_]vstrbq_p[_s16](int8_t *base, int16x8_t value, mve_pred16_t p)  */
 struct store_def : public overloaded_base<0>
 {
   void
@@ -1524,7 +1701,7 @@ struct store_def : public overloaded_base<0>
 	 bool preserve_user_namespace) const override
   {
     b.add_overloaded_functions (group, MODE_none, preserve_user_namespace);
-    build_all (b, "_,as,v0", group, MODE_none, preserve_user_namespace);
+    build_all (b, "_,as,t0", group, MODE_none, preserve_user_namespace);
   }
 
   tree
@@ -1536,7 +1713,7 @@ struct store_def : public overloaded_base<0>
     type_suffix_index type;
     if (!r.check_gp_argument (2, i, nargs)
 	|| !r.require_pointer_type (0)
-	|| (type = r.infer_vector_type (1)) == NUM_TYPE_SUFFIXES)
+	|| (type = r.infer_tuple_type (1)) == NUM_TYPE_SUFFIXES)
       return error_mark_node;
 
     return r.resolve_to (r.mode_suffix_id, type);
@@ -1544,6 +1721,135 @@ struct store_def : public overloaded_base<0>
 };
 SHAPE (store)
 
+/* Base class for store_scatter_offset and store_scatter_shifted_offset, which
+   differ only in the units of the displacement.  Also used by
+   store_scatter_base.  */
+struct store_scatter : public overloaded_base<0>
+{
+  bool
+  explicit_mode_suffix_p (enum predication_index, enum mode_suffix_index) const override
+  {
+    return true;
+  }
+
+  bool
+  mode_after_pred () const override
+  {
+    return false;
+  }
+};
+
+/* void vfoo[_t0](<X>_t *, <Y>_t, <T0>_t)
+
+   where <X> might be tied to <t0> (for non-truncating stores) or might
+   depend on the function base name (for truncating stores),
+   <Y> has the same width as <T0> but is of unsigned type.
+
+   Example: vstrbq_scatter_offset
+   void [__arm_]vstrbq_scatter_offset[_s16](int8_t *base, uint16x8_t offset, int16x8_t value)
+   void [__arm_]vstrbq_scatter_offset_p[_s16](int8_t *base, uint16x8_t offset, int16x8_t value, mve_pred16_t p)  */
+struct store_scatter_offset_def : public store_scatter
+{
+  void
+  build (function_builder &b, const function_group_info &group,
+	 bool preserve_user_namespace) const override
+  {
+    b.add_overloaded_functions (group, MODE_offset, preserve_user_namespace);
+    build_all (b, "_,as,vu0,v0", group, MODE_offset, preserve_user_namespace);
+  }
+
+  /* Resolve a scatter store that takes a scalar pointer base and a vector
+     displacement.
+
+     The stored data is the final argument, and it determines the
+     type suffix.  */
+  tree
+  resolve (function_resolver &r) const override
+  {
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (3, i, nargs)
+	|| !r.require_pointer_type (0)
+	|| (type = r.infer_vector_type (2)) == NUM_TYPE_SUFFIXES)
+      return error_mark_node;
+
+    /* Offset (arg 1) should be a vector of unsigned with same width as value
+       (arg 2).  */
+    type_suffix_index offset_type
+      = find_type_suffix (TYPE_unsigned, type_suffixes[type].element_bits);
+    if (!r.require_matching_vector_type (1, offset_type))
+      return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, type);
+  }
+};
+SHAPE (store_scatter_offset)
+
+/* void vfoo[_t0](<Y>_t, const int, <T0>_t)
+
+   where <X> is tied to <t0>.
+   <Y> has the same width as <T0> but is of unsigned type.
+
+   Example: vstrbq_scatter_base
+   void [__arm_]vstrwq_scatter_base[_s32](uint32x4_t addr, const int offset, int32x4_t value)
+   void [__arm_]vstrwq_scatter_base_p[_s32](uint32x4_t addr, const int offset, int32x4_t value, mve_pred16_t p)
+   void [__arm_]vstrdq_scatter_base_wb[_s64](uint64x2_t *addr, const int offset, int64x2_t value)  */
+struct store_scatter_base_def : public store_scatter
+{
+  void
+  build (function_builder &b, const function_group_info &group,
+	 bool preserve_user_namespace) const override
+  {
+    b.add_overloaded_functions (group, MODE_none, preserve_user_namespace);
+    b.add_overloaded_functions (group, MODE_wb, preserve_user_namespace);
+    build_all (b, "_,vu0,ss64,v0", group, MODE_none, preserve_user_namespace);
+    build_all (b, "_,b,ss64,v0", group, MODE_wb, preserve_user_namespace);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    gcc_assert ((r.mode_suffix_id == MODE_none)
+		|| (r.mode_suffix_id == MODE_wb));
+
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (3, i, nargs)
+	|| !r.require_integer_immediate (1)
+	|| (type = r.infer_vector_type (2)) == NUM_TYPE_SUFFIXES)
+      return error_mark_node;
+
+    type_suffix_index base_type
+      = find_type_suffix (TYPE_unsigned, type_suffixes[type].element_bits);
+
+    if (r.mode_suffix_id == MODE_none)
+      {
+	/* Base (arg 0) should be a vector of unsigned with same width as value
+	   (arg 2).  */
+	if (!r.require_matching_vector_type (0, base_type))
+	  return error_mark_node;
+      }
+    else
+      {
+	/* Base (arg 0) should be a pointer to a vector of unsigned with the
+	   same width as value (arg 2).  */
+	if (!r.require_pointer_to_type (0, r.get_vector_type (base_type)))
+	  return error_mark_node;
+      }
+
+    return r.resolve_to (r.mode_suffix_id, type);
+  }
+
+  bool
+  check (function_checker &c) const override
+  {
+    int multiple = c.type_suffix (0).element_bits / 8;
+    int bound = 127 * multiple;
+    return c.require_immediate_range_multiple (1, -bound, bound, multiple);
+  }
+};
+SHAPE (store_scatter_base)
+
 /* <T0>_t vfoo[_t0](<T0>_t, <T0>_t, <T0>_t)
 
    i.e. the standard shape for ternary operations that operate on
@@ -1587,7 +1893,7 @@ struct ternary_lshift_def : public overloaded_base<0>
 	 bool preserve_user_namespace) const override
   {
     b.add_overloaded_functions (group, MODE_n, preserve_user_namespace);
-    build_all (b, "v0,v0,v0,ss32", group, MODE_n, preserve_user_namespace);
+    build_all (b, "v0,v0,v0,su64", group, MODE_n, preserve_user_namespace);
   }
 
   tree
@@ -1682,7 +1988,7 @@ struct ternary_rshift_def : public overloaded_base<0>
 	 bool preserve_user_namespace) const override
   {
     b.add_overloaded_functions (group, MODE_n, preserve_user_namespace);
-    build_all (b, "v0,v0,v0,ss32", group, MODE_n, preserve_user_namespace);
+    build_all (b, "v0,v0,v0,su64", group, MODE_n, preserve_user_namespace);
   }
 
   tree
@@ -1837,11 +2143,18 @@ struct unary_int32_acc_def : public overloaded_base<0>
   {
     unsigned int i, nargs;
     type_suffix_index type;
+    const char *first_type_name;
+
     if (!r.check_gp_argument (2, i, nargs)
-	|| !r.require_integer_immediate (0)
 	|| (type = r.infer_vector_type (1)) == NUM_TYPE_SUFFIXES)
       return error_mark_node;
 
+    first_type_name = (type_suffixes[type].unsigned_p
+		       ? "uint32_t"
+		       : "int32_t");
+    if (!r.require_scalar_type (0, first_type_name))
+      return error_mark_node;
+
     return r.resolve_to (r.mode_suffix_id, type);
   }
 };
@@ -1857,7 +2170,7 @@ struct unary_n_def : public overloaded_base<0>
 {
   bool
   explicit_type_suffix_p (unsigned int, enum predication_index pred,
-			  enum mode_suffix_index) const override
+			  enum mode_suffix_index, type_suffix_info) const override
   {
     return pred != PRED_m;
   }
@@ -1923,16 +2236,14 @@ struct unary_widen_def : public overloaded_base<0>
   resolve (function_resolver &r) const override
   {
     unsigned int i, nargs;
-    type_suffix_index type;
+    type_suffix_index type, wide_suffix;
     tree res;
     if (!r.check_gp_argument (1, i, nargs)
-	|| (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES)
+	|| (type = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES
+	|| ((wide_suffix = long_type_suffix (r, type, r.SAME_TYPE_CLASS))
+	    == NUM_TYPE_SUFFIXES))
       return error_mark_node;
 
-    type_suffix_index wide_suffix
-      = find_type_suffix (type_suffixes[type].tclass,
-			  type_suffixes[type].element_bits * 2);
-
     /* Check the inactive argument has the wide type.  */
     if ((r.pred == PRED_m)
 	&& (r.infer_vector_type (0) != wide_suffix))
@@ -1980,6 +2291,425 @@ struct unary_widen_acc_def : public overloaded_base<0>
 };
 SHAPE (unary_widen_acc)
 
+/* <T0>_t vfoo[_t0](T0, T0, uint32_t*)
+
+   Example: vadcq.
+   int32x4_t [__arm_]vadcq[_s32](int32x4_t a, int32x4_t b, unsigned *carry)
+   int32x4_t [__arm_]vadcq_m[_s32](int32x4_t inactive, int32x4_t a, int32x4_t b, unsigned *carry, mve_pred16_t p)  */
+struct vadc_vsbc_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group,
+	 bool preserve_user_namespace) const override
+  {
+    b.add_overloaded_functions (group, MODE_none, preserve_user_namespace);
+    build_all (b, "v0,v0,v0,as", group, MODE_none, preserve_user_namespace);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (3, i, nargs)
+	|| (type = r.infer_vector_type (0)) == NUM_TYPE_SUFFIXES)
+      return error_mark_node;
+
+    if (!r.require_matching_vector_type (1, type))
+      return error_mark_node;
+
+    /* Check that last arg is a pointer.  */
+    if (!POINTER_TYPE_P (r.get_argument_type (i)))
+      return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, type);
+  }
+};
+SHAPE (vadc_vsbc)
+
+/* mve_pred16_t foo_t0(uint32_t)
+
+   Example: vctp16q.
+   mve_pred16_t [__arm_]vctp16q(uint32_t a)
+   mve_pred16_t [__arm_]vctp16q_m(uint32_t a, mve_pred16_t p)  */
+struct vctp_def : public nonoverloaded_base
+{
+  void
+  build (function_builder &b, const function_group_info &group,
+	 bool preserve_user_namespace) const override
+  {
+    build_all (b, "p,su32", group, MODE_none, preserve_user_namespace);
+  }
+};
+SHAPE (vctp)
+
+/* <T0>_t foo_t0[_t1](<T1>_t)
+   <T0>_t foo_t0_n[_t1](<T1>_t, const int)
+
+   Example: vcvtq.
+   float32x4_t [__arm_]vcvtq[_f32_s32](int32x4_t a)
+   float32x4_t [__arm_]vcvtq_m[_f32_s32](float32x4_t inactive, int32x4_t a, mve_pred16_t p)
+   float32x4_t [__arm_]vcvtq_x[_f32_s32](int32x4_t a, mve_pred16_t p)
+   float32x4_t [__arm_]vcvtq_n[_f32_s32](int32x4_t a, const int imm6)
+   float32x4_t [__arm_]vcvtq_m_n[_f32_s32](float32x4_t inactive, int32x4_t a, const int imm6, mve_pred16_t p)
+   float32x4_t [__arm_]vcvtq_x_n[_f32_s32](int32x4_t a, const int imm6, mve_pred16_t p)
+   int32x4_t [__arm_]vcvtq_s32_f32(float32x4_t a)
+   int32x4_t [__arm_]vcvtq_m[_s32_f32](int32x4_t inactive, float32x4_t a, mve_pred16_t p)
+   int32x4_t [__arm_]vcvtq_x_s32_f32(float32x4_t a, mve_pred16_t p)
+   int32x4_t [__arm_]vcvtq_n_s32_f32(float32x4_t a, const int imm6)
+   int32x4_t [__arm_]vcvtq_m_n[_s32_f32](int32x4_t inactive, float32x4_t a, const int imm6, mve_pred16_t p)
+   int32x4_t [__arm_]vcvtq_x_n_s32_f32(float32x4_t a, const int imm6, mve_pred16_t p)  */
+struct vcvt_def : public overloaded_base<0>
+{
+  bool
+  explicit_type_suffix_p (unsigned int i, enum predication_index pred,
+			  enum mode_suffix_index,
+			  type_suffix_info type_info) const override
+  {
+    if (pred != PRED_m
+	&& ((i == 0 && type_info.integer_p)
+	    || (i == 1 && type_info.float_p)))
+      return true;
+    return false;
+  }
+
+  bool
+  explicit_mode_suffix_p (enum predication_index,
+			  enum mode_suffix_index) const override
+  {
+    return true;
+  }
+
+  void
+  build (function_builder &b, const function_group_info &group,
+	 bool preserve_user_namespace) const override
+  {
+    b.add_overloaded_functions (group, MODE_none, preserve_user_namespace);
+    b.add_overloaded_functions (group, MODE_n, preserve_user_namespace);
+    build_all (b, "v0,v1", group, MODE_none, preserve_user_namespace);
+    build_all (b, "v0,v1,su64", group, MODE_n, preserve_user_namespace);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    unsigned int i, nargs;
+    type_suffix_index from_type;
+    tree res;
+    unsigned int nimm = (r.mode_suffix_id == MODE_none) ? 0 : 1;
+
+    if (!r.check_gp_argument (1 + nimm, i, nargs)
+	|| (from_type
+	    = r.infer_vector_type (i - nimm)) == NUM_TYPE_SUFFIXES)
+      return error_mark_node;
+
+    if (nimm > 0
+	&& !r.require_integer_immediate (i))
+      return error_mark_node;
+
+    type_suffix_index to_type;
+
+    if (type_suffixes[from_type].integer_p)
+      {
+	to_type = find_type_suffix (TYPE_float,
+				    type_suffixes[from_type].element_bits);
+      }
+    else
+      {
+	/* This should not happen: when 'from_type' is float, the type
+	   suffixes are not overloaded (except for "m" predication,
+	   handled above). */
+	gcc_assert (r.pred == PRED_m);
+
+	/* Get the return type from the 'inactive' argument.  */
+	to_type = r.infer_vector_type (0);
+      }
+
+    if ((res = r.lookup_form (r.mode_suffix_id, to_type, from_type)))
+	return res;
+
+    return r.report_no_such_form (from_type);
+  }
+
+  bool
+  check (function_checker &c) const override
+  {
+    if (c.mode_suffix_id == MODE_none)
+      return true;
+
+    unsigned int bits = c.type_suffix (0).element_bits;
+    return c.require_immediate_range (1, 1, bits);
+  }
+};
+SHAPE (vcvt)
+
+/* float16x8_t foo_f16_f32(float16x8_t, float32x4_t)
+
+   Example: vcvttq_f16_f32.
+   float16x8_t [__arm_]vcvttq_f16_f32(float16x8_t a, float32x4_t b)
+   float16x8_t [__arm_]vcvttq_m_f16_f32(float16x8_t a, float32x4_t b, mve_pred16_t p)
+*/
+struct vcvt_f16_f32_def : public nonoverloaded_base
+{
+  void
+  build (function_builder &b, const function_group_info &group,
+	 bool preserve_user_namespace) const override
+  {
+    build_all (b, "v0,v0,v1", group, MODE_none, preserve_user_namespace);
+  }
+};
+SHAPE (vcvt_f16_f32)
+
+/* float32x4_t foo_f32_f16(float16x8_t)
+
+   Example: vcvttq_f32_f16.
+   float32x4_t [__arm_]vcvttq_f32_f16(float16x8_t a)
+   float32x4_t [__arm_]vcvttq_m_f32_f16(float32x4_t inactive, float16x8_t a, mve_pred16_t p)
+   float32x4_t [__arm_]vcvttq_x_f32_f16(float16x8_t a, mve_pred16_t p)
+*/
+struct vcvt_f32_f16_def : public nonoverloaded_base
+{
+  void
+  build (function_builder &b, const function_group_info &group,
+	 bool preserve_user_namespace) const override
+  {
+    build_all (b, "v0,v1", group, MODE_none, preserve_user_namespace);
+  }
+};
+SHAPE (vcvt_f32_f16)
+
+/* <T0>_t foo_t0[_t1](<T1>_t)
+
+   Example: vcvtaq.
+   int16x8_t [__arm_]vcvtaq_s16_f16(float16x8_t a)
+   int16x8_t [__arm_]vcvtaq_m[_s16_f16](int16x8_t inactive, float16x8_t a, mve_pred16_t p)
+   int16x8_t [__arm_]vcvtaq_x_s16_f16(float16x8_t a, mve_pred16_t p)
+*/
+struct vcvtx_def : public overloaded_base<0>
+{
+  bool
+  explicit_type_suffix_p (unsigned int, enum predication_index pred,
+			  enum mode_suffix_index,
+			  type_suffix_info) const override
+  {
+    return pred != PRED_m;
+  }
+
+  bool
+  skip_overload_p (enum predication_index pred, enum mode_suffix_index)
+    const override
+  {
+    return pred != PRED_m;
+  }
+
+  void
+  build (function_builder &b, const function_group_info &group,
+	 bool preserve_user_namespace) const override
+  {
+    b.add_overloaded_functions (group, MODE_none, preserve_user_namespace);
+    build_all (b, "v0,v1", group, MODE_none, preserve_user_namespace);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    unsigned int i, nargs;
+    type_suffix_index from_type;
+    tree res;
+
+    if (!r.check_gp_argument (1, i, nargs)
+	|| (from_type
+	    = r.infer_vector_type (i)) == NUM_TYPE_SUFFIXES)
+      return error_mark_node;
+
+    type_suffix_index to_type;
+
+    gcc_assert (r.pred == PRED_m);
+
+    /* Get the return type from the 'inactive' argument.  */
+    to_type = r.infer_vector_type (0);
+
+    if ((res = r.lookup_form (r.mode_suffix_id, to_type, from_type)))
+	return res;
+
+    return r.report_no_such_form (from_type);
+  }
+};
+SHAPE (vcvtx)
+
+/* <T0>_t vfoo[_n]_t0(uint32_t, const int)
+   <T0>_t vfoo[_wb]_t0(uint32_t *, const int)
+
+   Shape for vector increment or decrement and duplicate operations that take
+   an integer or pointer to integer first argument and an immediate, and
+   produce a vector.
+
+   Check that 'imm' is one of 1, 2, 4 or 8.
+
+   Example: vddupq.
+   uint8x16_t [__arm_]vddupq[_n]_u8(uint32_t a, const int imm)
+   uint8x16_t [__arm_]vddupq[_wb]_u8(uint32_t *a, const int imm)
+   uint8x16_t [__arm_]vddupq_m[_n_u8](uint8x16_t inactive, uint32_t a, const int imm, mve_pred16_t p)
+   uint8x16_t [__arm_]vddupq_m[_wb_u8](uint8x16_t inactive, uint32_t *a, const int imm, mve_pred16_t p)
+   uint8x16_t [__arm_]vddupq_x[_n]_u8(uint32_t a, const int imm, mve_pred16_t p)
+   uint8x16_t [__arm_]vddupq_x[_wb]_u8(uint32_t *a, const int imm, mve_pred16_t p)  */
+struct viddup_def : public overloaded_base<0>
+{
+  bool
+  explicit_type_suffix_p (unsigned int i, enum predication_index pred,
+			  enum mode_suffix_index,
+			  type_suffix_info) const override
+  {
+    return ((i == 0) && (pred != PRED_m));
+  }
+
+  bool
+  skip_overload_p (enum predication_index, enum mode_suffix_index mode) const override
+  {
+    /* For MODE_wb, share the overloaded instance with MODE_n.  */
+    if (mode == MODE_wb)
+      return true;
+
+    return false;
+  }
+
+  void
+  build (function_builder &b, const function_group_info &group,
+	 bool preserve_user_namespace) const override
+  {
+    b.add_overloaded_functions (group, MODE_none, preserve_user_namespace);
+    build_all (b, "v0,su32,su64", group, MODE_n, preserve_user_namespace);
+    build_all (b, "v0,as,su64", group, MODE_wb, preserve_user_namespace);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    unsigned int i, nargs;
+    type_suffix_index type_suffix = NUM_TYPE_SUFFIXES;
+    if (!r.check_gp_argument (2, i, nargs))
+      return error_mark_node;
+
+    type_suffix = r.type_suffix_ids[0];
+    /* With PRED_m, ther is no type suffix, so infer it from the first (inactive)
+       argument.  */
+    if (type_suffix == NUM_TYPE_SUFFIXES)
+      type_suffix = r.infer_vector_type (0);
+
+    unsigned int last_arg = i - 1;
+    /* Check that last_arg is either scalar or pointer.  */
+    if (!r.scalar_argument_p (last_arg))
+      return error_mark_node;
+
+    if (!r.require_integer_immediate (last_arg + 1))
+      return error_mark_node;
+
+    /* With MODE_n we expect a scalar, with MODE_wb we expect a pointer.  */
+    mode_suffix_index mode_suffix;
+    if (POINTER_TYPE_P (r.get_argument_type (last_arg)))
+      mode_suffix = MODE_wb;
+    else
+      mode_suffix = MODE_n;
+
+    return r.resolve_to (mode_suffix, type_suffix);
+  }
+
+  bool
+  check (function_checker &c) const override
+  {
+    return c.require_immediate_one_of (1, 1, 2, 4, 8);
+  }
+};
+SHAPE (viddup)
+
+/* <T0>_t vfoo[_n]_t0(uint32_t, uint32_t, const int)
+   <T0>_t vfoo[_wb]_t0(uint32_t *, uint32_t, const int)
+
+   Shape for vector increment or decrement with wrap and duplicate operations
+   that take an integer or pointer to integer first argument, an integer second
+   argument and an immediate, and produce a vector.
+
+   Check that 'imm' is one of 1, 2, 4 or 8.
+
+   Example: vdwdupq.
+   uint8x16_t [__arm_]vdwdupq[_n]_u8(uint32_t a, uint32_t b, const int imm)
+   uint8x16_t [__arm_]vdwdupq[_wb]_u8(uint32_t *a, uint32_t b, const int imm)
+   uint8x16_t [__arm_]vdwdupq_m[_n_u8](uint8x16_t inactive, uint32_t a, uint32_t b, const int imm, mve_pred16_t p)
+   uint8x16_t [__arm_]vdwdupq_m[_wb_u8](uint8x16_t inactive, uint32_t *a, uint32_t b, const int imm, mve_pred16_t p)
+   uint8x16_t [__arm_]vdwdupq_x[_n]_u8(uint32_t a, uint32_t b, const int imm, mve_pred16_t p)
+   uint8x16_t [__arm_]vdwdupq_x[_wb]_u8(uint32_t *a, uint32_t b, const int imm, mve_pred16_t p)  */
+struct vidwdup_def : public overloaded_base<0>
+{
+  bool
+  explicit_type_suffix_p (unsigned int i, enum predication_index pred,
+			  enum mode_suffix_index,
+			  type_suffix_info) const override
+  {
+    return ((i == 0) && (pred != PRED_m));
+  }
+
+  bool
+  skip_overload_p (enum predication_index, enum mode_suffix_index mode) const override
+  {
+    /* For MODE_wb, share the overloaded instance with MODE_n.  */
+    if (mode == MODE_wb)
+      return true;
+
+    return false;
+  }
+
+  void
+  build (function_builder &b, const function_group_info &group,
+	 bool preserve_user_namespace) const override
+  {
+    b.add_overloaded_functions (group, MODE_none, preserve_user_namespace);
+    build_all (b, "v0,su32,su32,su64", group, MODE_n, preserve_user_namespace);
+    build_all (b, "v0,as,su32,su64", group, MODE_wb, preserve_user_namespace);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    unsigned int i, nargs;
+    type_suffix_index type_suffix = NUM_TYPE_SUFFIXES;
+    if (!r.check_gp_argument (3, i, nargs))
+      return error_mark_node;
+
+    type_suffix = r.type_suffix_ids[0];
+    /* With PRED_m, ther is no type suffix, so infer it from the first (inactive)
+       argument.  */
+    if (type_suffix == NUM_TYPE_SUFFIXES)
+      type_suffix = r.infer_vector_type (0);
+
+    unsigned int last_arg = i - 2;
+    /* Check that last_arg is either scalar or pointer.  */
+    if (!r.scalar_argument_p (last_arg))
+      return error_mark_node;
+
+    if (!r.scalar_argument_p (last_arg + 1))
+      return error_mark_node;
+
+    if (!r.require_integer_immediate (last_arg + 2))
+      return error_mark_node;
+
+    /* With MODE_n we expect a scalar, with MODE_wb we expect a pointer.  */
+    mode_suffix_index mode_suffix;
+    if (POINTER_TYPE_P (r.get_argument_type (last_arg)))
+      mode_suffix = MODE_wb;
+    else
+      mode_suffix = MODE_n;
+
+    return r.resolve_to (mode_suffix, type_suffix);
+  }
+
+  bool
+  check (function_checker &c) const override
+  {
+    return c.require_immediate_one_of (2, 1, 2, 4, 8);
+  }
+};
+SHAPE (vidwdup)
+
 /* <T0>_t vfoo[_t0](<T0>_t, <T0>_t, mve_pred16_t)
 
    i.e. a version of the standard ternary shape in which
@@ -2019,6 +2749,50 @@ struct vpsel_def : public overloaded_base<0>
 };
 SHAPE (vpsel)
 
+/* <T0>_t vfoo[_t0](T0, uint32_t* , const int)
+
+   Check that 'imm' is in [1..32].
+
+   Example: vshlcq.
+   uint8x16_t [__arm_]vshlcq[_u8](uint8x16_t a, uint32_t *b, const int imm)
+   uint8x16_t [__arm_]vshlcq_m[_u8](uint8x16_t a, uint32_t *b, const int imm, mve_pred16_t p)  */
+struct vshlc_def : public overloaded_base<0>
+{
+  void
+  build (function_builder &b, const function_group_info &group,
+	 bool preserve_user_namespace) const override
+  {
+    b.add_overloaded_functions (group, MODE_none, preserve_user_namespace);
+    build_all (b, "v0,v0,as,su64", group, MODE_none, preserve_user_namespace);
+  }
+
+  tree
+  resolve (function_resolver &r) const override
+  {
+    unsigned int i, nargs;
+    type_suffix_index type;
+    if (!r.check_gp_argument (3, i, nargs)
+	|| (type = r.infer_vector_type (0)) == NUM_TYPE_SUFFIXES)
+      return error_mark_node;
+
+    /* Check that arg #2 is a pointer.  */
+    if (!POINTER_TYPE_P (r.get_argument_type (i - 1)))
+      return error_mark_node;
+
+    if (!r.require_integer_immediate (i))
+      return error_mark_node;
+
+    return r.resolve_to (r.mode_suffix_id, type);
+  }
+
+  bool
+  check (function_checker &c) const override
+  {
+    return c.require_immediate_range (2, 1, 32);
+  }
+};
+SHAPE (vshlc)
+
 } /* end namespace arm_mve */
 
 #undef SHAPE
diff --git a/gcc/config/arm/arm-mve-builtins-shapes.h b/gcc/config/arm/arm-mve-builtins-shapes.h
index 61aa4fa..6b839c5 100644
--- a/gcc/config/arm/arm-mve-builtins-shapes.h
+++ b/gcc/config/arm/arm-mve-builtins-shapes.h
@@ -1,5 +1,5 @@
 /* ACLE support for Arm MVE (function shapes)
-   Copyright (C) 2023-2024 Free Software Foundation, Inc.
+   Copyright (C) 2023-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -62,8 +62,13 @@ namespace arm_mve
     extern const function_shape *const create;
     extern const function_shape *const inherent;
     extern const function_shape *const load;
+    extern const function_shape *const load_ext;
+    extern const function_shape *const load_ext_gather_offset;
+    extern const function_shape *const load_gather_base;
     extern const function_shape *const mvn;
     extern const function_shape *const store;
+    extern const function_shape *const store_scatter_base;
+    extern const function_shape *const store_scatter_offset;
     extern const function_shape *const ternary;
     extern const function_shape *const ternary_lshift;
     extern const function_shape *const ternary_n;
@@ -77,7 +82,16 @@ namespace arm_mve
     extern const function_shape *const unary_n;
     extern const function_shape *const unary_widen;
     extern const function_shape *const unary_widen_acc;
+    extern const function_shape *const vadc_vsbc;
+    extern const function_shape *const vctp;
+    extern const function_shape *const vcvt;
+    extern const function_shape *const vcvt_f16_f32;
+    extern const function_shape *const vcvt_f32_f16;
+    extern const function_shape *const vcvtx;
+    extern const function_shape *const viddup;
+    extern const function_shape *const vidwdup;
     extern const function_shape *const vpsel;
+    extern const function_shape *const vshlc;
 
   } /* end namespace arm_mve::shapes */
 } /* end namespace arm_mve */
diff --git a/gcc/config/arm/arm-mve-builtins.cc b/gcc/config/arm/arm-mve-builtins.cc
index 7e82176..42b53cc0 100644
--- a/gcc/config/arm/arm-mve-builtins.cc
+++ b/gcc/config/arm/arm-mve-builtins.cc
@@ -1,5 +1,5 @@
 /* ACLE support for Arm MVE
-   Copyright (C) 2021-2024 Free Software Foundation, Inc.
+   Copyright (C) 2021-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -38,6 +38,7 @@
 #include "gimple-iterator.h"
 #include "explow.h"
 #include "emit-rtl.h"
+#include "stor-layout.h"
 #include "langhooks.h"
 #include "stringpool.h"
 #include "attribs.h"
@@ -149,8 +150,10 @@ CONSTEXPR const type_suffix_info type_suffixes[NUM_TYPE_SUFFIXES + 1] = {
    class ("b", "f", etc.) and a numerical bit count.  */
 
 /* _f16.  */
-#define TYPES_float16(S, D) \
-  S (f16)
+#define TYPES_float_16(S, D) S (f16)
+
+/* _f32.  */
+#define TYPES_float_32(S, D) S (f32)
 
 /* _f16 _f32.  */
 #define TYPES_all_float(S, D) \
@@ -205,6 +208,40 @@ CONSTEXPR const type_suffix_info type_suffixes[NUM_TYPE_SUFFIXES + 1] = {
 #define TYPES_signed_32(S, D) \
   S (s32)
 
+/* s64 _u64.  */
+#define TYPES_integer_64(S, D) \
+  S (s64), S (u64)
+
+/* All the type combinations allowed by vcvtq.  */
+#define TYPES_cvt(S, D) \
+  D (f16, s16), \
+  D (f16, u16), \
+  \
+  D (f32, s32), \
+  D (f32, u32), \
+  \
+  D (s16, f16), \
+  D (s32, f32), \
+  \
+  D (u16, f16), \
+  D (u32, f32)
+
+/* vcvt[bt]q_f16_f132.  */
+#define TYPES_cvt_f16_f32(S, D) \
+  D (f16, f32)
+
+/* vcvt[bt]q_f32_f16.  */
+#define TYPES_cvt_f32_f16(S, D) \
+  D (f32, f16)
+
+/* All the type combinations allowed by vcvtXq.  */
+#define TYPES_cvtx(S, D) \
+  D (s16, f16), \
+  D (s32, f32), \
+  \
+  D (u16, f16), \
+  D (u32, f32)
+
 #define TYPES_reinterpret_signed1(D, A) \
   D (A, s8), D (A, s16), D (A, s32), D (A, s64)
 
@@ -273,7 +310,8 @@ static const type_suffix_pair types_none[] = {
 
 DEF_MVE_TYPES_ARRAY (all_integer);
 DEF_MVE_TYPES_ARRAY (all_integer_with_64);
-DEF_MVE_TYPES_ARRAY (float16);
+DEF_MVE_TYPES_ARRAY (float_16);
+DEF_MVE_TYPES_ARRAY (float_32);
 DEF_MVE_TYPES_ARRAY (all_float);
 DEF_MVE_TYPES_ARRAY (all_signed);
 DEF_MVE_TYPES_ARRAY (all_unsigned);
@@ -281,9 +319,14 @@ DEF_MVE_TYPES_ARRAY (integer_8);
 DEF_MVE_TYPES_ARRAY (integer_8_16);
 DEF_MVE_TYPES_ARRAY (integer_16_32);
 DEF_MVE_TYPES_ARRAY (integer_32);
+DEF_MVE_TYPES_ARRAY (integer_64);
 DEF_MVE_TYPES_ARRAY (poly_8_16);
 DEF_MVE_TYPES_ARRAY (signed_16_32);
 DEF_MVE_TYPES_ARRAY (signed_32);
+DEF_MVE_TYPES_ARRAY (cvt);
+DEF_MVE_TYPES_ARRAY (cvt_f16_f32);
+DEF_MVE_TYPES_ARRAY (cvt_f32_f16);
+DEF_MVE_TYPES_ARRAY (cvtx);
 DEF_MVE_TYPES_ARRAY (reinterpret_integer);
 DEF_MVE_TYPES_ARRAY (reinterpret_float);
 
@@ -308,6 +351,11 @@ static const predication_index preds_p_or_none[] = {
   PRED_p, PRED_none, NUM_PREDS
 };
 
+/* Used by functions that have the z predicated form, in addition to
+   an unpredicated form.  */
+static const predication_index preds_z_or_none[]
+  = {PRED_z, PRED_none, NUM_PREDS};
+
 /* A list of all MVE ACLE functions.  */
 static CONSTEXPR const function_group_info function_groups[] = {
 #define DEF_MVE_FUNCTION(NAME, SHAPE, TYPES, PREDS)			\
@@ -432,7 +480,7 @@ register_builtin_tuple_types (vector_type_index type)
     {
       for (unsigned int num_vectors = 2; num_vectors <= 4; num_vectors += 2)
 	acle_vector_types[num_vectors >> 1][type] = void_type_node;
-    return;
+      return;
     }
 
   const char *vector_type_name = info->acle_name;
@@ -445,15 +493,23 @@ register_builtin_tuple_types (vector_type_index type)
 
       tree vectype = acle_vector_types[0][type];
       tree arrtype = build_array_type_nelts (vectype, num_vectors);
-      gcc_assert (TYPE_MODE_RAW (arrtype) == TYPE_MODE (arrtype));
+      gcc_assert (VECTOR_MODE_P (TYPE_MODE (arrtype))
+		  && TYPE_MODE_RAW (arrtype) == TYPE_MODE (arrtype)
+		  && TYPE_ALIGN (arrtype) == 64);
+
+      /* Build a structure type that contains a single field of type ARRTYPE.
+	 The field is called 'val', as mandated by ACLE.  */
       tree field = build_decl (input_location, FIELD_DECL,
 			       get_identifier ("val"), arrtype);
-
-      tree t = lang_hooks.types.simulate_record_decl (input_location, buffer,
-						      make_array_slice (&field,
-									1));
-      gcc_assert (TYPE_MODE_RAW (t) == TYPE_MODE (t));
-      acle_vector_types[num_vectors >> 1][type] = TREE_TYPE (t);
+      tree tuple_type
+	= lang_hooks.types.simulate_record_decl (input_location,
+						 buffer,
+						 make_array_slice (&field, 1));
+      gcc_assert (VECTOR_MODE_P (TYPE_MODE (tuple_type))
+		  && TYPE_MODE_RAW (tuple_type) == TYPE_MODE (tuple_type)
+		  && TYPE_ALIGN (tuple_type) == 64);
+
+      acle_vector_types[num_vectors >> 1][type] = tuple_type;
     }
 }
 
@@ -492,6 +548,13 @@ handle_arm_mve_h (bool preserve_user_namespace)
       return;
     }
 
+  if (!handle_arm_mve_types_p)
+    {
+      error ("this definition requires MVE types, please include %qs",
+	     "arm_mve_types.h");
+      return;
+    }
+
   /* Define MVE functions.  */
   function_table = new hash_table<registered_function_hasher> (1023);
   function_builder builder;
@@ -585,6 +648,20 @@ report_out_of_range (location_t location, tree fndecl, unsigned int argno,
 	    min, max);
 }
 
+/* Report that LOCATION has a call to FNDECL in which argument ARGNO has the
+   value ACTUAL, whereas the function requires a value multiple of MULT in the
+   range [MIN, MAX].  ARGNO counts from zero.  */
+static void
+report_out_of_range_multiple (location_t location, tree fndecl,
+			      unsigned int argno,
+			      HOST_WIDE_INT actual, HOST_WIDE_INT min,
+			      HOST_WIDE_INT max, HOST_WIDE_INT mult)
+{
+  error_at (location, "passing %wd to argument %d of %qE, which expects"
+	    " a value multiple of %wd in the range [%wd, %wd]", actual,
+	    argno + 1, fndecl, mult, min, max);
+}
+
 /* Report that LOCATION has a call to FNDECL in which argument ARGNO has
    the value ACTUAL, whereas the function requires a valid value of
    enum type ENUMTYPE.  ARGNO counts from zero.  */
@@ -596,6 +673,20 @@ report_not_enum (location_t location, tree fndecl, unsigned int argno,
 	    " a valid %qT value", actual, argno + 1, fndecl, enumtype);
 }
 
+/* Report that LOCATION has a call to FNDECL in which argument ARGNO has
+   the value ACTUAL, whereas the function requires one of VALUE0..3.
+   ARGNO counts from zero.  */
+static void
+report_not_one_of (location_t location, tree fndecl, unsigned int argno,
+		   HOST_WIDE_INT actual, HOST_WIDE_INT value0,
+		   HOST_WIDE_INT value1, HOST_WIDE_INT value2,
+		   HOST_WIDE_INT value3)
+{
+  error_at (location, "passing %wd to argument %d of %qE, which expects"
+	    " %wd, %wd, %wd or %wd", actual, argno + 1, fndecl, value0, value1,
+	    value2, value3);
+}
+
 /* Checks that the mve.fp extension is enabled, given that REQUIRES_FLOAT
    indicates whether it is required or not for function FNDECL.
    Report an error against LOCATION if not.  */
@@ -703,6 +794,7 @@ function_instance::has_inactive_argument () const
     return false;
 
   if (mode_suffix_id == MODE_r
+      || (base == functions::vbicq && mode_suffix_id == MODE_n)
       || base == functions::vcmlaq
       || base == functions::vcmlaq_rot90
       || base == functions::vcmlaq_rot180
@@ -715,6 +807,12 @@ function_instance::has_inactive_argument () const
       || base == functions::vcmpltq
       || base == functions::vcmpcsq
       || base == functions::vcmphiq
+      || base == functions::vctp16q
+      || base == functions::vctp32q
+      || base == functions::vctp64q
+      || base == functions::vctp8q
+      || (base == functions::vcvtbq && type_suffix (0).element_bits == 16)
+      || (base == functions::vcvttq && type_suffix (0).element_bits == 16)
       || base == functions::vfmaq
       || base == functions::vfmasq
       || base == functions::vfmsq
@@ -755,6 +853,7 @@ function_instance::has_inactive_argument () const
       || (base == functions::vrshlq && mode_suffix_id == MODE_n)
       || base == functions::vrshrnbq
       || base == functions::vrshrntq
+      || base == functions::vshlcq
       || base == functions::vshrnbq
       || base == functions::vshrntq
       || base == functions::vsliq
@@ -815,15 +914,23 @@ function_builder::get_name (const function_instance &instance,
   if (preserve_user_namespace)
     append_name ("__arm_");
   append_name (instance.base_name);
-  append_name (pred_suffixes[instance.pred]);
+
+  if (instance.shape->mode_after_pred ())
+    append_name (pred_suffixes[instance.pred]);
+
   if (!overloaded_p
       || instance.shape->explicit_mode_suffix_p (instance.pred,
 						 instance.mode_suffix_id))
     append_name (instance.mode_suffix ().string);
+
+  if (!instance.shape->mode_after_pred ())
+    append_name (pred_suffixes[instance.pred]);
+
   for (unsigned int i = 0; i < 2; ++i)
     if (!overloaded_p
 	|| instance.shape->explicit_type_suffix_p (i, instance.pred,
-						   instance.mode_suffix_id))
+						   instance.mode_suffix_id,
+						   instance.type_suffix (i)))
       append_name (instance.type_suffix (i).string);
   return finish_name ();
 }
@@ -1001,9 +1108,11 @@ function_builder::add_overloaded_functions (const function_group_info &group,
   for (unsigned int pi = 0; group.preds[pi] != NUM_PREDS; ++pi)
     {
       unsigned int explicit_type0
-	= (*group.shape)->explicit_type_suffix_p (0, group.preds[pi], mode);
+	= (*group.shape)->explicit_type_suffix_p (0, group.preds[pi], mode,
+						  type_suffixes[NUM_TYPE_SUFFIXES]);
       unsigned int explicit_type1
-	= (*group.shape)->explicit_type_suffix_p (1, group.preds[pi], mode);
+	= (*group.shape)->explicit_type_suffix_p (1, group.preds[pi], mode,
+						  type_suffixes[NUM_TYPE_SUFFIXES]);
 
       if ((*group.shape)->skip_overload_p (group.preds[pi], mode))
 	continue;
@@ -1193,7 +1302,7 @@ function_resolver::infer_vector_or_tuple_type (unsigned int argno,
 	tree type = acle_vector_types[size_i][type_i];
 	if (type && matches_type_p (type, actual))
 	  {
-	    if (size_i + 1 == num_vectors)
+	    if (size_i == (num_vectors >> 1))
 	      return type_suffix_index (suffix_i);
 
 	    if (num_vectors == 1)
@@ -1234,6 +1343,16 @@ function_resolver::infer_vector_type (unsigned int argno)
   return infer_vector_or_tuple_type (argno, 1);
 }
 
+/* If the function operates on tuples of vectors, require argument ARGNO to be
+   a tuple with the appropriate number of vectors, otherwise require it to be a
+   single vector.  Return the associated type suffix on success.  Report an
+   error and return NUM_TYPE_SUFFIXES on failure.  */
+type_suffix_index
+function_resolver::infer_tuple_type (unsigned int argno)
+{
+  return infer_vector_or_tuple_type (argno, vectors_per_tuple ());
+}
+
 /* Require argument ARGNO to be a vector or scalar argument.  Return true
    if it is, otherwise report an appropriate error.  */
 bool
@@ -1561,6 +1680,31 @@ function_resolver::require_pointer_type (unsigned int argno)
   return true;
 }
 
+/* Require argument ARGNO to be a pointer to EXPECTED type.  Return true on
+   success, otherwise report an error and return false.  */
+bool
+function_resolver::require_pointer_to_type (unsigned int argno, tree expected)
+{
+  tree actual = get_argument_type (argno);
+  if (TREE_CODE (actual) != POINTER_TYPE)
+    {
+      error_at (location, "passing %qT to argument %d of %qE, which"
+		" expects a pointer type", actual, argno + 1, fndecl);
+      return false;
+    }
+
+  tree target = TREE_TYPE (actual);
+  if (target != expected)
+    {
+      error_at (location, "passing %qT to argument %d of %qE, which"
+		" expects a pointer to %qT", actual, argno + 1, fndecl,
+		expected);
+      return false;
+    }
+
+  return true;
+}
+
 /* Require the function to have exactly EXPECTED arguments.  Return true
    if it does, otherwise report an appropriate error.  */
 bool
@@ -1601,6 +1745,7 @@ function_resolver::check_gp_argument (unsigned int nops,
 
 	case PRED_p:
 	case PRED_x:
+	case PRED_z:
 	  /* Add final predicate.  */
 	  nargs = nops + 1;
 	  break;
@@ -1896,6 +2041,26 @@ function_checker::require_immediate (unsigned int argno,
   return true;
 }
 
+/* Check that argument ARGNO is a signed integer constant expression and store
+   its value in VALUE_OUT if so.  The caller should first check that argument
+   ARGNO exists.  */
+bool
+function_checker::require_signed_immediate (unsigned int argno,
+					    HOST_WIDE_INT &value_out)
+{
+  gcc_assert (argno < m_nargs);
+  tree arg = m_args[argno];
+
+  if (!tree_fits_shwi_p (arg))
+    {
+      report_non_ice (location, fndecl, argno);
+      return false;
+    }
+
+  value_out = tree_to_shwi (arg);
+  return true;
+}
+
 /* Check that argument REL_ARGNO is an integer constant expression that has
    a valid value for enumeration type TYPE.  REL_ARGNO counts from the end
    of the predication arguments.  */
@@ -1925,6 +2090,36 @@ function_checker::require_immediate_enum (unsigned int rel_argno, tree type)
   return false;
 }
 
+/* Check that argument REL_ARGNO is an integer constant expression that
+   has one of the given values.  */
+bool
+function_checker::require_immediate_one_of (unsigned int rel_argno,
+					    HOST_WIDE_INT value0,
+					    HOST_WIDE_INT value1,
+					    HOST_WIDE_INT value2,
+					    HOST_WIDE_INT value3)
+{
+  unsigned int argno = m_base_arg + rel_argno;
+  if (!argument_exists_p (argno))
+    return true;
+
+  HOST_WIDE_INT actual;
+  if (!require_immediate (argno, actual))
+    return false;
+
+  if (actual != value0
+      && actual != value1
+      && actual != value2
+      && actual != value3)
+    {
+      report_not_one_of (location, fndecl, argno, actual,
+			 value0, value1, value2, value3);
+      return false;
+    }
+
+  return true;
+}
+
 /* Check that argument REL_ARGNO is an integer constant expression in the
    range [MIN, MAX].  REL_ARGNO counts from the end of the predication
    arguments.  */
@@ -1953,6 +2148,32 @@ function_checker::require_immediate_range (unsigned int rel_argno,
   return true;
 }
 
+/* Check that argument REL_ARGNO is a signed integer constant expression in the
+   range [MIN, MAX].  Also check that REL_ARGNO is a multiple of MULT.  */
+bool
+function_checker::require_immediate_range_multiple (unsigned int rel_argno,
+						    HOST_WIDE_INT min,
+						    HOST_WIDE_INT max,
+						    HOST_WIDE_INT mult)
+{
+  unsigned int argno = m_base_arg + rel_argno;
+  if (!argument_exists_p (argno))
+    return true;
+
+  HOST_WIDE_INT actual;
+  if (!require_signed_immediate (argno, actual))
+    return false;
+
+  if (!IN_RANGE (actual, min, max)
+      || (actual % mult) != 0)
+    {
+      report_out_of_range_multiple (location, fndecl, argno, actual, min, max, mult);
+      return false;
+    }
+
+  return true;
+}
+
 /* Perform semantic checks on the call.  Return true if the call is valid,
    otherwise report a suitable error.  */
 bool
@@ -2100,7 +2321,37 @@ function_expander::add_input_operand (insn_code icode, rtx x)
       mode = GET_MODE (x);
     }
   else if (VALID_MVE_PRED_MODE (mode))
-    x = gen_lowpart (mode, x);
+    {
+      if (CONST_INT_P (x))
+	{
+	  if (mode == V8BImode || mode == V4BImode)
+	    {
+	      /* In V8BI or V4BI each element has 2 or 4 bits, if those bits
+		 aren't all the same, gen_lowpart might ICE.  Canonicalize all
+		 the 2 or 4 bits to all ones if any of them is non-zero.  V8BI
+		 and V4BI multi-bit masks are interpreted byte-by-byte at
+		 instruction level, but such constants should describe lanes,
+		 rather than bytes.  See the section on MVE intrinsics in the
+		 Arm ACLE specification.  */
+	      unsigned HOST_WIDE_INT xi = UINTVAL (x);
+	      xi |= ((xi & 0x5555) << 1) | ((xi & 0xaaaa) >> 1);
+	      if (mode == V4BImode)
+		xi |= ((xi & 0x3333) << 2) | ((xi & 0xcccc) >> 2);
+	      if (xi != UINTVAL (x))
+		warning_at (location, 0, "constant predicate argument %d"
+			    " (%wx) does not map to %d lane numbers,"
+			    " converted to %wx",
+			    opno, UINTVAL (x) & 0xffff,
+			    mode == V8BImode ? 8 : 4,
+			    xi & 0xffff);
+
+	      x = gen_int_mode (xi, HImode);
+	    }
+	  x = gen_lowpart (mode, x);
+	}
+      else
+	x = force_lowpart_subreg (mode, x, GET_MODE (x));
+    }
 
   m_ops.safe_grow (m_ops.length () + 1, true);
   create_input_operand (&m_ops.last (), x, mode);
@@ -2237,6 +2488,8 @@ function_expander::use_contiguous_load_insn (insn_code icode)
 
   add_output_operand (icode);
   add_mem_operand (mem_mode, get_contiguous_base ());
+  if (pred == PRED_z)
+    add_input_operand (icode, args[1]);
   return generate_insn (icode);
 }
 
@@ -2249,6 +2502,8 @@ function_expander::use_contiguous_store_insn (insn_code icode)
 
   add_mem_operand (mem_mode, get_contiguous_base ());
   add_input_operand (icode, args[1]);
+  if (pred == PRED_p)
+    add_input_operand (icode, args[2]);
   return generate_insn (icode);
 }
 
diff --git a/gcc/config/arm/arm-mve-builtins.def b/gcc/config/arm/arm-mve-builtins.def
index 24ebb33..b93e030 100644
--- a/gcc/config/arm/arm-mve-builtins.def
+++ b/gcc/config/arm/arm-mve-builtins.def
@@ -1,5 +1,5 @@
 /* Builtin lists for Arm MVE
-   Copyright (C) 2021-2024 Free Software Foundation, Inc.
+   Copyright (C) 2021-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -36,6 +36,7 @@
 DEF_MVE_MODE (n, none, none, none)
 DEF_MVE_MODE (offset, none, none, bytes)
 DEF_MVE_MODE (r, none, none, none)
+DEF_MVE_MODE (wb, none, none, none)
 
 #define REQUIRES_FLOAT false
 DEF_MVE_TYPE (mve_pred16_t, boolean_type_node)
diff --git a/gcc/config/arm/arm-mve-builtins.h b/gcc/config/arm/arm-mve-builtins.h
index f282236..3a0d50d 100644
--- a/gcc/config/arm/arm-mve-builtins.h
+++ b/gcc/config/arm/arm-mve-builtins.h
@@ -1,5 +1,5 @@
 /* ACLE support for Arm MVE
-   Copyright (C) 2021-2024 Free Software Foundation, Inc.
+   Copyright (C) 2021-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -387,6 +387,7 @@ public:
   type_suffix_index infer_pointer_type (unsigned int);
   type_suffix_index infer_vector_or_tuple_type (unsigned int, unsigned int);
   type_suffix_index infer_vector_type (unsigned int);
+  type_suffix_index infer_tuple_type (unsigned int);
 
   bool require_vector_or_scalar_type (unsigned int);
 
@@ -398,10 +399,11 @@ public:
 				    unsigned int = SAME_SIZE);
   bool require_scalar_type (unsigned int, const char *);
   bool require_pointer_type (unsigned int);
+  bool require_pointer_to_type (unsigned int, tree);
   bool require_integer_immediate (unsigned int);
   bool require_derived_scalar_type (unsigned int, type_class_index,
 				    unsigned int = SAME_SIZE);
-  
+
   bool check_num_arguments (unsigned int);
   bool check_gp_argument (unsigned int, unsigned int &, unsigned int &);
   tree resolve_unary (type_class_index = SAME_TYPE_CLASS,
@@ -433,7 +435,11 @@ public:
 
   bool require_immediate_enum (unsigned int, tree);
   bool require_immediate_lane_index (unsigned int, unsigned int = 1);
+  bool require_immediate_one_of (unsigned int, HOST_WIDE_INT, HOST_WIDE_INT,
+				 HOST_WIDE_INT, HOST_WIDE_INT);
   bool require_immediate_range (unsigned int, HOST_WIDE_INT, HOST_WIDE_INT);
+  bool require_immediate_range_multiple (unsigned int, HOST_WIDE_INT,
+					 HOST_WIDE_INT, HOST_WIDE_INT);
 
   bool check ();
 
@@ -441,6 +447,7 @@ private:
   bool argument_exists_p (unsigned int);
 
   bool require_immediate (unsigned int, HOST_WIDE_INT &);
+  bool require_signed_immediate (unsigned int, HOST_WIDE_INT &);
 
   /* The type of the resolved function.  */
   tree m_fntype;
@@ -571,9 +578,14 @@ public:
 class function_shape
 {
 public:
-  virtual bool explicit_type_suffix_p (unsigned int, enum predication_index, enum mode_suffix_index) const = 0;
-  virtual bool explicit_mode_suffix_p (enum predication_index, enum mode_suffix_index) const = 0;
-  virtual bool skip_overload_p (enum predication_index, enum mode_suffix_index) const = 0;
+  virtual bool explicit_type_suffix_p (unsigned int, enum predication_index,
+				       enum mode_suffix_index,
+				       type_suffix_info) const = 0;
+  virtual bool explicit_mode_suffix_p (enum predication_index,
+				       enum mode_suffix_index) const = 0;
+  virtual bool skip_overload_p (enum predication_index,
+				enum mode_suffix_index) const = 0;
+  virtual bool mode_after_pred () const = 0;
 
   /* Define all functions associated with the given group.  */
   virtual void build (function_builder &,
@@ -722,7 +734,7 @@ inline tree
 function_instance::tuple_type (unsigned int i) const
 {
   unsigned int num_vectors = vectors_per_tuple ();
-  return acle_vector_types[num_vectors - 1][type_suffix (i).vector_type];
+  return acle_vector_types[num_vectors >> 1][type_suffix (i).vector_type];
 }
 
 /* Return the vector or predicate mode associated with type suffix I.  */
diff --git a/gcc/config/arm/arm-opts.h b/gcc/config/arm/arm-opts.h
index cb322ac..5c543bf 100644
--- a/gcc/config/arm/arm-opts.h
+++ b/gcc/config/arm/arm-opts.h
@@ -1,5 +1,5 @@
 /* Definitions for option handling for ARM.
-   Copyright (C) 1991-2024 Free Software Foundation, Inc.
+   Copyright (C) 1991-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -46,7 +46,6 @@ enum arm_abi_type
   ARM_ABI_APCS,
   ARM_ABI_ATPCS,
   ARM_ABI_AAPCS,
-  ARM_ABI_IWMMXT,
   ARM_ABI_AAPCS_LINUX
 };
 
diff --git a/gcc/config/arm/arm-passes.def b/gcc/config/arm/arm-passes.def
index 8c9e79b..b3bd915 100644
--- a/gcc/config/arm/arm-passes.def
+++ b/gcc/config/arm/arm-passes.def
@@ -1,5 +1,5 @@
 /* Arm-specific passes declarations.
-   Copyright (C) 2022-2024 Free Software Foundation, Inc.
+   Copyright (C) 2022-2025 Free Software Foundation, Inc.
    Contributed by Arm Ltd.
 
    This file is part of GCC.
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 50cae2b..ff7e765 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -1,5 +1,5 @@
 /* Prototypes for exported functions defined in arm.cc and pe.c
-   Copyright (C) 1999-2024 Free Software Foundation, Inc.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
    Contributed by Richard Earnshaw (rearnsha@arm.com)
    Minor hacks by Nick Clifton (nickc@cygnus.com)
 
@@ -31,8 +31,8 @@ extern enum unwind_info_type arm_except_unwind_info (struct gcc_options *);
 extern int use_return_insn (int, rtx);
 extern bool use_simple_return_p (void);
 extern enum reg_class arm_regno_class (int);
-extern bool arm_check_builtin_call (location_t , vec<location_t> , tree,
-				    tree, unsigned int, tree *);
+extern bool arm_check_builtin_call (location_t, vec<location_t>, tree, tree,
+				    unsigned int, tree *, bool);
 extern void arm_load_pic_register (unsigned long, rtx);
 extern int arm_volatile_func (void);
 extern void arm_expand_prologue (void);
@@ -190,8 +190,6 @@ extern void arm_output_multireg_pop (rtx *, bool, rtx, bool, bool);
 extern void arm_set_return_address (rtx, rtx);
 extern int arm_eliminable_register (rtx);
 extern const char *arm_output_shift(rtx *, int);
-extern const char *arm_output_iwmmxt_shift_immediate (const char *, rtx *, bool);
-extern const char *arm_output_iwmmxt_tinsr (rtx *);
 extern unsigned int arm_sync_loop_insns (rtx , rtx *);
 extern int arm_attr_length_push_multi(rtx, rtx);
 extern int arm_attr_length_pop_multi(rtx *, bool, bool);
@@ -202,6 +200,7 @@ extern rtx arm_load_tp (rtx);
 extern bool arm_coproc_builtin_available (enum unspecv);
 extern bool arm_coproc_ldc_stc_legitimate_address (rtx);
 extern rtx arm_stack_protect_tls_canary_mem (bool);
+extern bool arm_ldrd_legitimate_address (rtx);
 
 
 #if defined TREE_CODE
@@ -210,6 +209,7 @@ extern bool arm_pad_reg_upward (machine_mode, tree, int);
 #endif
 extern int arm_apply_result_size (void);
 extern opt_machine_mode arm_get_mask_mode (machine_mode mode);
+extern bool arm_noce_conversion_profitable_p (rtx_insn *,struct noce_if_info *);
 
 #endif /* RTX_CODE */
 
@@ -405,7 +405,6 @@ extern bool arm_expand_vector_compare (rtx, rtx_code, rtx, rtx, bool);
 #endif /* RTX_CODE */
 
 extern bool arm_gen_setmem (rtx *);
-extern void arm_expand_vcond (rtx *, machine_mode);
 extern void arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel);
 
 extern bool arm_autoinc_modes_ok_p (machine_mode, enum arm_auto_incmodes);
@@ -474,12 +473,6 @@ extern int arm_ld_sched;
 /* Nonzero if this chip is a StrongARM.  */
 extern int arm_tune_strongarm;
 
-/* Nonzero if this chip supports Intel Wireless MMX technology.  */
-extern int arm_arch_iwmmxt;
-
-/* Nonzero if this chip supports Intel Wireless MMX2 technology.  */
-extern int arm_arch_iwmmxt2;
-
 /* Nonzero if this chip is an XScale.  */
 extern int arm_arch_xscale;
 
@@ -615,4 +608,7 @@ void arm_initialize_isa (sbitmap, const enum isa_feature *);
 const char * arm_gen_far_branch (rtx *, int, const char * , const char *);
 
 bool arm_mve_immediate_check(rtx, machine_mode, bool);
+
+opt_machine_mode arm_mve_data_mode (scalar_mode, poly_uint64);
+
 #endif /* ! GCC_ARM_PROTOS_H */
diff --git a/gcc/config/arm/arm-simd-builtin-types.def b/gcc/config/arm/arm-simd-builtin-types.def
index 40d3b48..c5d1066 100644
--- a/gcc/config/arm/arm-simd-builtin-types.def
+++ b/gcc/config/arm/arm-simd-builtin-types.def
@@ -1,5 +1,5 @@
 /* Builtin AdvSIMD types.
-   Copyright (C) 2014-2024 Free Software Foundation, Inc.
+   Copyright (C) 2014-2025 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
diff --git a/gcc/config/arm/arm-tables.opt b/gcc/config/arm/arm-tables.opt
index d3eb9a9..544de84 100644
--- a/gcc/config/arm/arm-tables.opt
+++ b/gcc/config/arm/arm-tables.opt
@@ -2,7 +2,7 @@
 ; Generated automatically by parsecpu.awk from arm-cpus.in.
 ; Do not edit.
 
-; Copyright (C) 2011-2024 Free Software Foundation, Inc.
+; Copyright (C) 2011-2025 Free Software Foundation, Inc.
 
 ; This file is part of GCC.
 
@@ -67,12 +67,6 @@ EnumValue
 Enum(processor_type) String(xscale) Value( TARGET_CPU_xscale)
 
 EnumValue
-Enum(processor_type) String(iwmmxt) Value( TARGET_CPU_iwmmxt)
-
-EnumValue
-Enum(processor_type) String(iwmmxt2) Value( TARGET_CPU_iwmmxt2)
-
-EnumValue
 Enum(processor_type) String(fa606te) Value( TARGET_CPU_fa606te)
 
 EnumValue
diff --git a/gcc/config/arm/arm-tune.md b/gcc/config/arm/arm-tune.md
index 6a631d8..20b5f93 100644
--- a/gcc/config/arm/arm-tune.md
+++ b/gcc/config/arm/arm-tune.md
@@ -2,7 +2,7 @@
 ; Generated automatically by parsecpu.awk from arm-cpus.in.
 ; Do not edit.
 
-; Copyright (C) 2011-2024 Free Software Foundation, Inc.
+; Copyright (C) 2011-2025 Free Software Foundation, Inc.
 
 ; This file is part of GCC.
 
@@ -25,31 +25,30 @@
 	fa526,fa626,arm7tdmi,
 	arm710t,arm9,arm9tdmi,
 	arm920t,arm10tdmi,arm9e,
-	arm10e,xscale,iwmmxt,
-	iwmmxt2,fa606te,fa626te,
-	fmp626,fa726te,arm926ejs,
-	arm1026ejs,arm1136js,arm1136jfs,
-	arm1176jzs,arm1176jzfs,mpcorenovfp,
-	mpcore,arm1156t2s,arm1156t2fs,
-	cortexm1,cortexm0,cortexm0plus,
-	cortexm1smallmultiply,cortexm0smallmultiply,cortexm0plussmallmultiply,
-	genericv7a,cortexa5,cortexa7,
-	cortexa8,cortexa9,cortexa12,
-	cortexa15,cortexa17,cortexr4,
-	cortexr4f,cortexr5,cortexr7,
-	cortexr8,cortexm7,cortexm4,
-	cortexm3,marvell_pj4,cortexa15cortexa7,
-	cortexa17cortexa7,cortexa32,cortexa35,
-	cortexa53,cortexa57,cortexa72,
-	cortexa73,exynosm1,xgene1,
-	cortexa57cortexa53,cortexa72cortexa53,cortexa73cortexa35,
-	cortexa73cortexa53,cortexa55,cortexa75,
-	cortexa76,cortexa76ae,cortexa77,
-	cortexa78,cortexa78ae,cortexa78c,
-	cortexa710,cortexx1,cortexx1c,
-	neoversen1,cortexa75cortexa55,cortexa76cortexa55,
-	neoversev1,neoversen2,cortexm23,
-	cortexm33,cortexm35p,cortexm52,
-	cortexm55,starmc1,cortexm85,
-	cortexr52,cortexr52plus"
+	arm10e,xscale,fa606te,
+	fa626te,fmp626,fa726te,
+	arm926ejs,arm1026ejs,arm1136js,
+	arm1136jfs,arm1176jzs,arm1176jzfs,
+	mpcorenovfp,mpcore,arm1156t2s,
+	arm1156t2fs,cortexm1,cortexm0,
+	cortexm0plus,cortexm1smallmultiply,cortexm0smallmultiply,
+	cortexm0plussmallmultiply,genericv7a,cortexa5,
+	cortexa7,cortexa8,cortexa9,
+	cortexa12,cortexa15,cortexa17,
+	cortexr4,cortexr4f,cortexr5,
+	cortexr7,cortexr8,cortexm7,
+	cortexm4,cortexm3,marvell_pj4,
+	cortexa15cortexa7,cortexa17cortexa7,cortexa32,
+	cortexa35,cortexa53,cortexa57,
+	cortexa72,cortexa73,exynosm1,
+	xgene1,cortexa57cortexa53,cortexa72cortexa53,
+	cortexa73cortexa35,cortexa73cortexa53,cortexa55,
+	cortexa75,cortexa76,cortexa76ae,
+	cortexa77,cortexa78,cortexa78ae,
+	cortexa78c,cortexa710,cortexx1,
+	cortexx1c,neoversen1,cortexa75cortexa55,
+	cortexa76cortexa55,neoversev1,neoversen2,
+	cortexm23,cortexm33,cortexm35p,
+	cortexm52,cortexm55,starmc1,
+	cortexm85,cortexr52,cortexr52plus"
 	(const (symbol_ref "((enum attr_tune) arm_tune)")))
diff --git a/gcc/config/arm/arm.cc b/gcc/config/arm/arm.cc
index d54564a..bde06f3 100644
--- a/gcc/config/arm/arm.cc
+++ b/gcc/config/arm/arm.cc
@@ -1,5 +1,5 @@
 /* Output routines for GCC for ARM.
-   Copyright (C) 1991-2024 Free Software Foundation, Inc.
+   Copyright (C) 1991-2025 Free Software Foundation, Inc.
    Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
    and Martin Simmons (@harleqn.co.uk).
    More major hacks by Richard Earnshaw (rearnsha@arm.com).
@@ -75,6 +75,8 @@
 #include "opts.h"
 #include "aarch-common.h"
 #include "aarch-common-protos.h"
+#include "machmode.h"
+#include "arm-builtins.h"
 
 /* This file should be included last.  */
 #include "target-def.h"
@@ -277,6 +279,7 @@ static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
+static opt_machine_mode arm_array_mode (machine_mode, unsigned HOST_WIDE_INT);
 static bool arm_array_mode_supported_p (machine_mode,
 					unsigned HOST_WIDE_INT);
 static machine_mode arm_preferred_simd_mode (scalar_mode);
@@ -514,6 +517,8 @@ static const scoped_attribute_specs *const arm_attribute_table[] =
 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
 #undef TARGET_VECTOR_MODE_SUPPORTED_P
 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
+#undef TARGET_ARRAY_MODE
+#define TARGET_ARRAY_MODE arm_array_mode
 #undef TARGET_ARRAY_MODE_SUPPORTED_P
 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
@@ -814,6 +819,9 @@ static const scoped_attribute_specs *const arm_attribute_table[] =
 #undef TARGET_MODES_TIEABLE_P
 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
 
+#undef TARGET_NOCE_CONVERSION_PROFITABLE_P
+#define TARGET_NOCE_CONVERSION_PROFITABLE_P arm_noce_conversion_profitable_p
+
 #undef TARGET_CAN_CHANGE_MODE_CLASS
 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
 
@@ -940,12 +948,6 @@ int arm_ld_sched = 0;
 /* Nonzero if this chip is a StrongARM.  */
 int arm_tune_strongarm = 0;
 
-/* Nonzero if this chip supports Intel Wireless MMX technology.  */
-int arm_arch_iwmmxt = 0;
-
-/* Nonzero if this chip supports Intel Wireless MMX2 technology.  */
-int arm_arch_iwmmxt2 = 0;
-
 /* Nonzero if this chip is an XScale.  */
 int arm_arch_xscale = 0;
 
@@ -2852,7 +2854,9 @@ arm_gimple_fold_builtin (gimple_stmt_iterator *gsi)
   switch (code & ARM_BUILTIN_CLASS)
     {
     case ARM_BUILTIN_GENERAL:
+      new_stmt = arm_general_gimple_fold_builtin (subcode, stmt);
       break;
+
     case ARM_BUILTIN_MVE:
       new_stmt = arm_mve::gimple_fold_builtin (subcode, stmt);
     }
@@ -2899,6 +2903,7 @@ arm_build_builtin_va_list (void)
 			     get_identifier ("__va_list"),
 			     va_list_type);
   DECL_ARTIFICIAL (va_list_name) = 1;
+  TREE_PUBLIC (va_list_name) = 1;
   TYPE_NAME (va_list_type) = va_list_name;
   TYPE_STUB_DECL (va_list_type) = va_list_name;
   /* Create the __ap field.  */
@@ -2959,11 +2964,6 @@ arm_option_check_internal (struct gcc_options *opts)
 {
   int flags = opts->x_target_flags;
 
-  /* iWMMXt and NEON are incompatible.  */
-  if (TARGET_IWMMXT
-      && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
-    error ("iWMMXt and NEON are incompatible");
-
   /* Make sure that the processor choice does not conflict with any of the
      other command line choices.  */
   if (TARGET_ARM_P (flags)
@@ -2986,10 +2986,6 @@ arm_option_check_internal (struct gcc_options *opts)
     warning (0, "%<-g%> with %<-mno-apcs-frame%> may not give sensible "
 	     "debugging");
 
-  /* iWMMXt unsupported under Thumb mode.  */
-  if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
-    error ("iWMMXt unsupported under Thumb mode");
-
   if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
     error ("cannot use %<-mtp=cp15%> with 16-bit Thumb");
 
@@ -3008,17 +3004,17 @@ arm_option_check_internal (struct gcc_options *opts)
       /* We only support -mslow-flash-data on M-profile targets with
 	 MOVT.  */
       if (target_slow_flash_data && (!TARGET_HAVE_MOVT || common_unsupported_modes))
-	error ("%s only supports non-pic code on M-profile targets with the "
+	error ("%qs only supports non-pic code on M-profile targets with the "
 	       "MOVT instruction", flag);
 
       /* We only support -mpure-code on M-profile targets.  */
       if (target_pure_code && common_unsupported_modes)
-	error ("%s only supports non-pic code on M-profile targets", flag);
+	error ("%qs only supports non-pic code on M-profile targets", flag);
 
       /* Cannot load addresses: -mslow-flash-data forbids literal pool and
 	 -mword-relocations forbids relocation of MOVT/MOVW.  */
       if (target_word_relocations)
-	error ("%s incompatible with %<-mword-relocations%>", flag);
+	error ("%qs is incompatible with %<-mword-relocations%>", flag);
     }
 }
 
@@ -3917,8 +3913,6 @@ arm_option_reconfigure_globals (void)
   arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
   arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
   arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
-  arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
-  arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
   arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
   arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
   arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
@@ -3986,12 +3980,6 @@ arm_options_perform_arch_sanity_checks (void)
   if (arm_arch5t)
     target_flags &= ~MASK_INTERWORK;
 
-  if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
-    error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
-
-  if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
-    error ("iwmmxt abi requires an iwmmxt capable cpu");
-
   /* BPABI targets use linker tricks to allow interworking on cores
      without thumb support.  */
   if (TARGET_INTERWORK
@@ -4032,9 +4020,7 @@ arm_options_perform_arch_sanity_checks (void)
 
   if (TARGET_AAPCS_BASED)
     {
-      if (arm_abi == ARM_ABI_IWMMXT)
-	arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
-      else if (TARGET_HARD_FLOAT_ABI)
+      if (TARGET_HARD_FLOAT_ABI)
 	{
 	  arm_pcs_default = ARM_PCS_AAPCS_VFP;
 	  if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2)
@@ -4544,11 +4530,6 @@ use_return_insn (int iscond, rtx sibling)
       if (reg_needs_saving_p (regno))
 	return 0;
 
-  if (TARGET_REALLY_IWMMXT)
-    for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
-      if (reg_needs_saving_p (regno))
-	return 0;
-
   return 1;
 }
 
@@ -6037,9 +6018,6 @@ arm_libcall_value_1 (machine_mode mode)
 {
   if (TARGET_AAPCS_BASED)
     return aapcs_libcall_value (mode);
-  else if (TARGET_IWMMXT_ABI
-	   && arm_vector_mode_supported_p (mode))
-    return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
   else
     return gen_rtx_REG (mode, ARG_REGISTER (1));
 }
@@ -6072,9 +6050,7 @@ arm_function_value_regno_p (const unsigned int regno)
       || (TARGET_32BIT
 	  && TARGET_AAPCS_BASED
 	  && TARGET_HARD_FLOAT
-	  && regno == FIRST_VFP_REGNUM)
-      || (TARGET_IWMMXT_ABI
-	  && regno == FIRST_IWMMXT_REGNUM))
+	  && regno == FIRST_VFP_REGNUM))
     return true;
 
   return false;
@@ -6091,8 +6067,6 @@ arm_apply_result_size (void)
     {
       if (TARGET_HARD_FLOAT_ABI)
 	size += 32;
-      if (TARGET_IWMMXT_ABI)
-	size += 8;
     }
 
   return size;
@@ -6254,7 +6228,6 @@ const struct pcs_attribute_arg
 #if 0
     /* We could recognize these, but changes would be needed elsewhere
      * to implement them.  */
-    {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
     {"atpcs", ARM_PCS_ATPCS},
     {"apcs", ARM_PCS_APCS},
 #endif
@@ -7184,26 +7157,12 @@ arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
 
   /* On the ARM, the offset starts at 0.  */
   pcum->nregs = 0;
-  pcum->iwmmxt_nregs = 0;
   pcum->can_split = true;
 
   /* Varargs vectors are treated the same as long long.
      named_count avoids having to change the way arm handles 'named' */
   pcum->named_count = 0;
   pcum->nargs = 0;
-
-  if (TARGET_REALLY_IWMMXT && fntype)
-    {
-      tree fn_arg;
-
-      for (fn_arg = TYPE_ARG_TYPES (fntype);
-	   fn_arg;
-	   fn_arg = TREE_CHAIN (fn_arg))
-	pcum->named_count += 1;
-
-      if (! pcum->named_count)
-	pcum->named_count = INT_MAX;
-    }
 }
 
 /* Return 2 if double word alignment is required for argument passing,
@@ -7297,22 +7256,6 @@ arm_function_arg (cumulative_args_t pcum_v, const function_arg_info &arg)
       return pcum->aapcs_reg;
     }
 
-  /* Varargs vectors are treated the same as long long.
-     named_count avoids having to change the way arm handles 'named' */
-  if (TARGET_IWMMXT_ABI
-      && arm_vector_mode_supported_p (arg.mode)
-      && pcum->named_count > pcum->nargs + 1)
-    {
-      if (pcum->iwmmxt_nregs <= 9)
-	return gen_rtx_REG (arg.mode,
-			    pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
-      else
-	{
-	  pcum->can_split = false;
-	  return NULL_RTX;
-	}
-    }
-
   /* Put doubleword aligned quantities in even register pairs.  */
   if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
     {
@@ -7372,9 +7315,6 @@ arm_arg_partial_bytes (cumulative_args_t pcum_v, const function_arg_info &arg)
       return pcum->aapcs_partial;
     }
 
-  if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (arg.mode))
-    return 0;
-
   if (NUM_ARG_REGS > nregs
       && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (arg.mode, arg.type))
       && pcum->can_split)
@@ -7411,12 +7351,7 @@ arm_function_arg_advance (cumulative_args_t pcum_v,
   else
     {
       pcum->nargs += 1;
-      if (arm_vector_mode_supported_p (arg.mode)
-	  && pcum->named_count > pcum->nargs
-	  && TARGET_IWMMXT_ABI)
-	pcum->iwmmxt_nregs += 1;
-      else
-	pcum->nregs += ARM_NUM_REGS2 (arg.mode, arg.type);
+      pcum->nregs += ARM_NUM_REGS2 (arg.mode, arg.type);
     }
 }
 
@@ -8006,10 +7941,11 @@ arm_function_ok_for_sibcall (tree decl, tree exp)
       && DECL_WEAK (decl))
     return false;
 
-  /* We cannot tailcall an indirect call by descriptor if all the call-clobbered
-     general registers are live (r0-r3 and ip).  This can happen when:
-      - IP contains the static chain, or
-      - IP is needed for validating the PAC signature.  */
+  /* Indirect tailcalls need a call-clobbered register to hold the function
+     address.  But we only have r0-r3 and ip in that class.  If r0-r3 all hold
+     function arguments, then we can only use IP.  But IP may be needed in the
+     epilogue (for PAC validation), or for passing the static chain.  We have
+     to disable the tail call if nothing is available.  */
   if (!decl
       && ((CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
 	  || arm_current_function_pac_enabled_p()))
@@ -8021,18 +7957,33 @@ arm_function_ok_for_sibcall (tree decl, tree exp)
       arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
       cum_v = pack_cumulative_args (&cum);
 
-      for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
+      tree arg;
+      call_expr_arg_iterator iter;
+      unsigned used_regs = 0;
+
+      /* Layout each actual argument in turn.  If it is allocated to
+	 core regs, note which regs have been allocated.  */
+      FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
 	{
-	  tree type = TREE_VALUE (t);
-	  if (!VOID_TYPE_P (type))
+	  tree type = TREE_TYPE (arg);
+	  function_arg_info arg_info (type, /*named=*/true);
+	  rtx reg = arm_function_arg (cum_v, arg_info);
+	  if (reg && REG_P (reg)
+	      && REGNO (reg) <= LAST_ARG_REGNUM)
 	    {
-	      function_arg_info arg (type, /*named=*/true);
-	      arm_function_arg_advance (cum_v, arg);
+	      /* Avoid any chance of UB here.  We don't care if TYPE
+		 is very large since it will use up all the argument regs.  */
+	      unsigned nregs = MIN (ARM_NUM_REGS2 (GET_MODE (reg), type),
+				    LAST_ARG_REGNUM + 1);
+	      used_regs |= ((1 << nregs) - 1) << REGNO (reg);
 	    }
+	  arm_function_arg_advance (cum_v, arg_info);
 	}
 
-      function_arg_info arg (integer_type_node, /*named=*/true);
-      if (!arm_function_arg (cum_v, arg))
+      /* We've used all the argument regs, and we know IP is live during the
+	 epilogue for some reason, so we can't tailcall.  */
+      if ((used_regs & ((1 << (LAST_ARG_REGNUM + 1)) - 1))
+	  == ((1 << (LAST_ARG_REGNUM + 1)) - 1))
 	return false;
     }
 
@@ -8059,8 +8010,8 @@ legitimate_pic_operand_p (rtx x)
 
 /* Record that the current function needs a PIC register.  If PIC_REG is null,
    a new pseudo is allocated as PIC register, otherwise PIC_REG is used.  In
-   both case cfun->machine->pic_reg is initialized if we have not already done
-   so.  COMPUTE_NOW decide whether and where to set the PIC register.  If true,
+   both cases cfun->machine->pic_reg is initialized if we have not already done
+   so.  COMPUTE_NOW decides whether and where to set the PIC register.  If true,
    PIC register is reloaded in the current position of the instruction stream
    irregardless of whether it was loaded before.  Otherwise, it is only loaded
    if not already done so (crtl->uses_pic_offset_table is null).  Note that
@@ -8080,6 +8031,7 @@ require_pic_register (rtx pic_reg, bool compute_now)
   if (!crtl->uses_pic_offset_table || compute_now)
     {
       gcc_assert (can_create_pseudo_p ()
+		  || (arm_pic_register != INVALID_REGNUM)
 		  || (pic_reg != NULL_RTX
 		      && REG_P (pic_reg)
 		      && GET_MODE (pic_reg) == Pmode));
@@ -8121,8 +8073,7 @@ require_pic_register (rtx pic_reg, bool compute_now)
 	      else
 		arm_load_pic_register (0UL, pic_reg);
 
-	      seq = get_insns ();
-	      end_sequence ();
+	      seq = end_sequence ();
 
 	      for (insn = seq; insn; insn = NEXT_INSN (insn))
 		if (INSN_P (insn))
@@ -8878,12 +8829,6 @@ arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
 	    && INTVAL (index) > -1024
 	    && (INTVAL (index) & 3) == 0);
 
-  if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
-    return (code == CONST_INT
-	    && INTVAL (index) < 1024
-	    && INTVAL (index) > -1024
-	    && (INTVAL (index) & 3) == 0);
-
   if (GET_MODE_SIZE (mode) <= 4
       && ! (arm_arch4
 	    && (mode == HImode
@@ -8963,17 +8908,6 @@ thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
 	    && INTVAL (index) > -256
 	    && (INTVAL (index) & 3) == 0);
 
-  if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
-    {
-      /* For DImode assume values will usually live in core regs
-	 and only allow LDRD addressing modes.  */
-      if (!TARGET_LDRD || mode != DImode)
-	return (code == CONST_INT
-		&& INTVAL (index) < 1024
-		&& INTVAL (index) > -1024
-		&& (INTVAL (index) & 3) == 0);
-    }
-
   /* For quad modes, we restrict the constant offset to be slightly less
      than what the instruction format permits.  We do this because for
      quad mode moves, we will actually decompose them into two separate
@@ -9344,10 +9278,7 @@ arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
 				     LCT_PURE, /* LCT_CONST?  */
 				     Pmode, reg, Pmode);
 
-  rtx_insn *insns = get_insns ();
-  end_sequence ();
-
-  return insns;
+  return end_sequence ();
 }
 
 static rtx
@@ -11891,7 +11822,7 @@ arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
 
     case CONST_DOUBLE:
       if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
-	  && (mode == SFmode || !TARGET_VFP_SINGLE))
+	  && (mode == SFmode || mode == HFmode || !TARGET_VFP_SINGLE))
 	{
 	  if (vfp3_const_double_rtx (x))
 	    {
@@ -11916,12 +11847,18 @@ arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
       return true;
 
     case CONST_VECTOR:
-      /* Fixme.  */
       if (((TARGET_NEON && TARGET_HARD_FLOAT
 	    && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode)))
 	   || TARGET_HAVE_MVE)
 	  && simd_immediate_valid_for_move (x, mode, NULL, NULL))
 	*cost = COSTS_N_INSNS (1);
+      else if (TARGET_HAVE_MVE)
+	{
+	  /* 128-bit vector requires two vldr.64 on MVE.  */
+	  *cost = COSTS_N_INSNS (2);
+	  if (speed_p)
+	    *cost += extra_cost->ldst.loadd * 2;
+	}
       else
 	*cost = COSTS_N_INSNS (4);
       return true;
@@ -12429,11 +12366,6 @@ arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
       if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
 	  || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
 	return 15;
-      else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
-	       || (from != IWMMXT_REGS && to == IWMMXT_REGS))
-	return 4;
-      else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
-	return 20;
       else
 	return 2;
     }
@@ -14237,6 +14169,30 @@ adjacent_mem_locations (rtx a, rtx b)
   return 0;
 }
 
+/* Helper routine for ldm_stm_operation_p.  Decompose a simple offset
+   address into the base register and the offset.  Return false iff
+   it is more complex than this.  */
+static inline bool
+decompose_addr_for_ldm_stm (rtx addr, rtx *base, HOST_WIDE_INT *offset)
+{
+  if (REG_P (addr))
+    {
+      *base = addr;
+      *offset = 0;
+      return true;
+    }
+  else if (GET_CODE (addr) == PLUS
+      && REG_P (XEXP (addr, 0))
+      && CONST_INT_P (XEXP (addr, 1)))
+    {
+      *base = XEXP (addr, 0);
+      *offset = INTVAL (XEXP (addr, 1));
+      return true;
+    }
+
+  return false;
+}
+
 /* Return true if OP is a valid load or store multiple operation.  LOAD is true
    for load operations, false for store operations.  CONSECUTIVE is true
    if the register numbers in the operation must be consecutive in the register
@@ -14252,23 +14208,25 @@ adjacent_mem_locations (rtx a, rtx b)
      1.  If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
      2.  REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
      3.  If consecutive is TRUE, then for kth register being loaded,
-         REGNO (R_dk) = REGNO (R_d0) + k.
+	 REGNO (R_dk) = REGNO (R_d0) + k.
    The pattern for store is similar.  */
 bool
 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
-                     bool consecutive, bool return_pc)
+		     bool consecutive, bool return_pc)
 {
-  HOST_WIDE_INT count = XVECLEN (op, 0);
-  rtx reg, mem, addr;
-  unsigned regno;
-  unsigned first_regno;
-  HOST_WIDE_INT i = 1, base = 0, offset = 0;
+  int count = XVECLEN (op, 0);
+  rtx reg, mem;
+  rtx addr_base;
+  int reg_loc, mem_loc;
+  unsigned prev_regno;
+  HOST_WIDE_INT addr_offset;
   rtx elt;
   bool addr_reg_in_reglist = false;
   bool update = false;
-  int reg_increment;
-  int offset_adj;
-  int regs_per_val;
+  int reg_bytes;
+  int words_per_reg;  /* How many words in memory a register takes.  */
+  int elt_num = 0;
+  int base_elt_num;  /* Element number of the first transfer operation.  */
 
   /* If not in SImode, then registers must be consecutive
      (e.g., VLDM instructions for DFmode).  */
@@ -14276,138 +14234,140 @@ ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
   /* Setting return_pc for stores is illegal.  */
   gcc_assert (!return_pc || load);
 
-  /* Set up the increments and the regs per val based on the mode.  */
-  reg_increment = GET_MODE_SIZE (mode);
-  regs_per_val = reg_increment / 4;
-  offset_adj = return_pc ? 1 : 0;
+  /* Set up the increments and sizes for the mode.  */
+  reg_bytes = GET_MODE_SIZE (mode);
+  words_per_reg = ARM_NUM_REGS (mode);
+
+  /* If this is a return, then the first element in the par must be
+     (return).  */
+  if (return_pc)
+    {
+      if (GET_CODE (XVECEXP (op, 0, 0)) != RETURN)
+	return false;
+      elt_num++;
+    }
 
-  if (count <= 1
-      || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
-      || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
+  if (elt_num >= count)
     return false;
 
   /* Check if this is a write-back.  */
-  elt = XVECEXP (op, 0, offset_adj);
+  elt = XVECEXP (op, 0, elt_num);
+  if (GET_CODE (elt) != SET)
+    return false;
   if (GET_CODE (SET_SRC (elt)) == PLUS)
     {
-      i++;
-      base = 1;
+      elt_num++;
       update = true;
 
       /* The offset adjustment must be the number of registers being
-         popped times the size of a single register.  */
+	 popped times the size of a single register.  */
       if (!REG_P (SET_DEST (elt))
-          || !REG_P (XEXP (SET_SRC (elt), 0))
-          || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
-          || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
-          || INTVAL (XEXP (SET_SRC (elt), 1)) !=
-             ((count - 1 - offset_adj) * reg_increment))
-        return false;
+	  || !REG_P (XEXP (SET_SRC (elt), 0))
+	  || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
+	  || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
+	  /* ??? Can't this be negative for a PUSH?  */
+	  || (INTVAL (XEXP (SET_SRC (elt), 1)) !=
+	      ((count - elt_num) * reg_bytes)))
+	return false;
     }
 
-  i = i + offset_adj;
-  base = base + offset_adj;
-  /* Perform a quick check so we don't blow up below. If only one reg is loaded,
-     success depends on the type: VLDM can do just one reg,
-     LDM must do at least two.  */
-  if ((count <= i) && (mode == SImode))
-      return false;
+  base_elt_num = elt_num;
+  /* There must be at least one register to transfer.  */
+  if (base_elt_num >= count)
+    return false;
 
-  elt = XVECEXP (op, 0, i - 1);
+  elt = XVECEXP (op, 0, elt_num);
   if (GET_CODE (elt) != SET)
     return false;
 
+  /* Where to look for the register and memory elements.  These save us
+     needing to check LOAD multiple times in the loop.  */
   if (load)
     {
-      reg = SET_DEST (elt);
-      mem = SET_SRC (elt);
+      reg_loc = 0;  /* SET_DEST.  */
+      mem_loc = 1;  /* SET_SRC.  */
     }
   else
     {
-      reg = SET_SRC (elt);
-      mem = SET_DEST (elt);
+      mem_loc = 0;  /* SET_DEST.  */
+      reg_loc = 1;  /* SET_SRC.  */
     }
 
+  reg = XEXP (elt, reg_loc);
+  mem = XEXP (elt, mem_loc);
+
   if (!REG_P (reg) || !MEM_P (mem))
     return false;
 
-  regno = REGNO (reg);
-  first_regno = regno;
-  addr = XEXP (mem, 0);
-  if (GET_CODE (addr) == PLUS)
-    {
-      if (!CONST_INT_P (XEXP (addr, 1)))
-	return false;
-
-      offset = INTVAL (XEXP (addr, 1));
-      addr = XEXP (addr, 0);
-    }
-
-  if (!REG_P (addr))
+  prev_regno = REGNO (reg);
+  if (!decompose_addr_for_ldm_stm (XEXP (mem, 0), &addr_base, &addr_offset))
     return false;
 
-  /* Don't allow SP to be loaded unless it is also the base register. It
-     guarantees that SP is reset correctly when an LDM instruction
-     is interrupted. Otherwise, we might end up with a corrupt stack.  */
-  if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
+  /* Don't allow SP to be loaded unless it is also the base register.
+     Otherwise SP will not be correctly restored if an LDM instruction is
+     interrupted (low latency interrupt or address fault), which can result in
+     stack corruption.  */
+  if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr_base) != SP_REGNUM))
     return false;
 
-  if (regno == REGNO (addr))
-    addr_reg_in_reglist = true;
+  addr_reg_in_reglist = (prev_regno == REGNO (addr_base));
 
-  for (; i < count; i++)
+  for (elt_num++; elt_num < count; elt_num++)
     {
-      elt = XVECEXP (op, 0, i);
+      rtx elt_base;
+      HOST_WIDE_INT elt_offset;
+
+      elt = XVECEXP (op, 0, elt_num);
       if (GET_CODE (elt) != SET)
-        return false;
+	return false;
 
-      if (load)
-        {
-          reg = SET_DEST (elt);
-          mem = SET_SRC (elt);
-        }
-      else
-        {
-          reg = SET_SRC (elt);
-          mem = SET_DEST (elt);
-        }
+      reg = XEXP (elt, reg_loc);
+      mem = XEXP (elt, mem_loc);
 
       if (!REG_P (reg)
-          || GET_MODE (reg) != mode
-          || REGNO (reg) <= regno
-          || (consecutive
-              && (REGNO (reg) !=
-                  (unsigned int) (first_regno + regs_per_val * (i - base))))
-          /* Don't allow SP to be loaded unless it is also the base register. It
-             guarantees that SP is reset correctly when an LDM instruction
-             is interrupted. Otherwise, we might end up with a corrupt stack.  */
-          || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
-          || !MEM_P (mem)
-          || GET_MODE (mem) != mode
-          || ((GET_CODE (XEXP (mem, 0)) != PLUS
-	       || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
-	       || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
-	       || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
-                   offset + (i - base) * reg_increment))
-	      && (!REG_P (XEXP (mem, 0))
-		  || offset + (i - base) * reg_increment != 0)))
-        return false;
+	  || GET_MODE (reg) != mode
+	  || REGNO (reg) <= prev_regno
+	  || (consecutive
+	      && REGNO (reg) != prev_regno + words_per_reg)
+	  /* Don't allow SP to be loaded unless it is also the base register
+	     (see similar comment above).  */
+	  || (load
+	      && (REGNO (reg) == SP_REGNUM)
+	      && (REGNO (addr_base) != SP_REGNUM))
+	  || !MEM_P (mem)
+	  || GET_MODE (mem) != mode
+	  || !decompose_addr_for_ldm_stm (XEXP (mem, 0), &elt_base,
+					  &elt_offset)
+	  || REGNO (addr_base) != REGNO (elt_base)
+	  || addr_offset + (elt_num - base_elt_num) * reg_bytes != elt_offset)
+	return false;
 
-      regno = REGNO (reg);
-      if (regno == REGNO (addr))
-        addr_reg_in_reglist = true;
+      prev_regno = REGNO (reg);
+      if (prev_regno == REGNO (addr_base))
+	{
+	  /* Storing the base register is unpredictable if it is not the first
+	     transfer register and the base register is being modified.  */
+	  if (update && !load)
+	    return false;
+	  addr_reg_in_reglist = true;
+	}
     }
 
   if (load)
     {
       if (update && addr_reg_in_reglist)
-        return false;
+	return false;
+
+      /* A return instruction must load PC last.  */
+      if (return_pc && prev_regno != PC_REGNUM)
+	return false;
 
-      /* For Thumb-1, address register is always modified - either by write-back
-         or by explicit load.  If the pattern does not describe an update,
-         then the address register must be in the list of loaded registers.  */
+      /* For Thumb-1, address register is always modified - either by
+	 write-back or by explicit load.  If the pattern does not describe an
+	 update, then the address register must be in the list of loaded
+	 registers.  */
       if (TARGET_THUMB1)
-        return update || addr_reg_in_reglist;
+	return update || addr_reg_in_reglist;
     }
 
   return true;
@@ -14931,8 +14891,6 @@ arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
 
   if (!multiple_operation_profitable_p (false, count, 0))
     {
-      rtx seq;
-
       start_sequence ();
 
       for (i = 0; i < count; i++)
@@ -14941,10 +14899,7 @@ arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
       if (wback_offset != 0)
 	emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
 
-      seq = get_insns ();
-      end_sequence ();
-
-      return seq;
+      return end_sequence ();
     }
 
   result = gen_rtx_PARALLEL (VOIDmode,
@@ -14982,8 +14937,6 @@ arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
 
   if (!multiple_operation_profitable_p (false, count, 0))
     {
-      rtx seq;
-
       start_sequence ();
 
       for (i = 0; i < count; i++)
@@ -14992,10 +14945,7 @@ arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
       if (wback_offset != 0)
 	emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
 
-      seq = get_insns ();
-      end_sequence ();
-
-      return seq;
+      return end_sequence ();
     }
 
   result = gen_rtx_PARALLEL (VOIDmode,
@@ -15341,9 +15291,9 @@ arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
   HOST_WIDE_INT srcoffset, dstoffset;
   HOST_WIDE_INT src_autoinc, dst_autoinc;
   rtx mem, addr;
-  
+
   gcc_assert (interleave_factor >= 1 && interleave_factor <= 4);
-  
+
   /* Use hard registers if we have aligned source or destination so we can use
      load/store multiple with contiguous registers.  */
   if (dst_aligned || src_aligned)
@@ -15357,7 +15307,7 @@ arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
   src = copy_addr_to_reg (XEXP (srcbase, 0));
 
   srcoffset = dstoffset = 0;
-  
+
   /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
      For copying the last bytes we want to subtract this offset again.  */
   src_autoinc = dst_autoinc = 0;
@@ -15411,14 +15361,14 @@ arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
 
       remaining -= block_size_bytes;
     }
-  
+
   /* Copy any whole words left (note these aren't interleaved with any
      subsequent halfword/byte load/stores in the interests of simplicity).  */
-  
+
   words = remaining / UNITS_PER_WORD;
 
   gcc_assert (words < interleave_factor);
-  
+
   if (src_aligned && words > 1)
     {
       emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
@@ -15464,11 +15414,11 @@ arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
     }
 
   remaining -= words * UNITS_PER_WORD;
-  
+
   gcc_assert (remaining < 4);
-  
+
   /* Copy a halfword if necessary.  */
-  
+
   if (remaining >= 2)
     {
       halfword_tmp = gen_reg_rtx (SImode);
@@ -15492,11 +15442,11 @@ arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
       remaining -= 2;
       srcoffset += 2;
     }
-  
+
   gcc_assert (remaining < 2);
-  
+
   /* Copy last byte.  */
-  
+
   if ((remaining & 1) != 0)
     {
       byte_tmp = gen_reg_rtx (SImode);
@@ -15517,9 +15467,9 @@ arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
       remaining--;
       srcoffset++;
     }
-  
+
   /* Store last halfword if we haven't done so already.  */
-  
+
   if (halfword_tmp)
     {
       addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
@@ -15538,7 +15488,7 @@ arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
       emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
       dstoffset++;
     }
-  
+
   gcc_assert (remaining == 0 && srcoffset == dstoffset);
 }
 
@@ -15557,7 +15507,7 @@ arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
 		      rtx *loop_mem)
 {
   *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
-  
+
   /* Although the new mem does not refer to a known location,
      it does keep up to LENGTH bytes of alignment.  */
   *loop_mem = change_address (mem, BLKmode, *loop_reg);
@@ -15577,14 +15527,14 @@ arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
 {
   rtx src_reg, dest_reg, final_src, test;
   HOST_WIDE_INT leftover;
-  
+
   leftover = length % bytes_per_iter;
   length -= leftover;
-  
+
   /* Create registers and memory references for use within the loop.  */
   arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
   arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
-  
+
   /* Calculate the value that SRC_REG should have after the last iteration of
      the loop.  */
   final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
@@ -15593,7 +15543,7 @@ arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
   /* Emit the start of the loop.  */
   rtx_code_label *label = gen_label_rtx ();
   emit_label (label);
-  
+
   /* Emit the loop body.  */
   arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
 				     interleave_factor);
@@ -15601,11 +15551,11 @@ arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
   /* Move on to the next block.  */
   emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
   emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
-  
+
   /* Emit the loop condition.  */
   test = gen_rtx_NE (VOIDmode, src_reg, final_src);
   emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
-  
+
   /* Mop up any left-over bytes.  */
   if (leftover)
     arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
@@ -15619,7 +15569,7 @@ static int
 arm_cpymemqi_unaligned (rtx *operands)
 {
   HOST_WIDE_INT length = INTVAL (operands[2]);
-  
+
   if (optimize_size)
     {
       bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
@@ -15630,7 +15580,7 @@ arm_cpymemqi_unaligned (rtx *operands)
 	 resulting code can be smaller.  */
       unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
       HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
-      
+
       if (length > 12)
 	arm_block_move_unaligned_loop (operands[0], operands[1], length,
 				       interleave_factor, bytes_per_iter);
@@ -15648,7 +15598,7 @@ arm_cpymemqi_unaligned (rtx *operands)
       else
 	arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
     }
-  
+
   return 1;
 }
 
@@ -16149,14 +16099,16 @@ arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
 	case UNGT:
 	case UNGE:
 	case UNEQ:
-	case LTGT:
 	  return CCFPmode;
 
 	case LT:
 	case LE:
 	case GT:
 	case GE:
-	  return CCFPEmode;
+	case LTGT:
+	  return (flag_finite_math_only
+		  ? CCFPmode
+		  : CCFPEmode);
 
 	default:
 	  gcc_unreachable ();
@@ -17519,8 +17471,7 @@ struct minipool_node
   rtx value;
   /* The mode of value.  */
   machine_mode mode;
-  /* The size of the value.  With iWMMXt enabled
-     sizes > 4 also imply an alignment of 8-bytes.  */
+  /* The size of the value.  */
   int fix_size;
 };
 
@@ -18880,8 +18831,7 @@ cmse_clear_registers (sbitmap to_clear_bitmap, uint32_t *padding_bits_to_clear,
 	      XVECEXP (par, 0, k++) = set;
 	      emit_use (reg);
 	    }
-	  use_seq = get_insns ();
-	  end_sequence ();
+	  use_seq = end_sequence ();
 
 	  emit_insn_after (use_seq, emit_insn (par));
 	}
@@ -18926,8 +18876,7 @@ cmse_clear_registers (sbitmap to_clear_bitmap, uint32_t *padding_bits_to_clear,
       rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
       XVECEXP (par, 0, j) = clobber;
 
-      use_seq = get_insns ();
-      end_sequence ();
+      use_seq = end_sequence ();
 
       emit_insn_after (use_seq, emit_insn (par));
     }
@@ -19168,8 +19117,7 @@ cmse_nonsecure_call_inline_register_clear (void)
 	  cmse_clear_registers (to_clear_bitmap, padding_bits_to_clear,
 				NUM_ARG_REGS, ip_reg, clearing_reg);
 
-	  seq = get_insns ();
-	  end_sequence ();
+	  seq = end_sequence ();
 	  emit_insn_before (seq, insn);
 
 	  /* The AAPCS requires the callee to widen integral types narrower
@@ -20183,9 +20131,7 @@ output_move_double (rtx *operands, bool emit, int *count)
 		}
 	      else
 		{
-		  /* Use a single insn if we can.
-		     FIXME: IWMMXT allows offsets larger than ldrd can
-		     handle, fix these up with a pair of ldr.  */
+		  /* Use a single insn if we can.  */
 		  if (can_ldrd
 		      && (TARGET_THUMB2
 		      || !CONST_INT_P (otherops[2])
@@ -20210,9 +20156,7 @@ output_move_double (rtx *operands, bool emit, int *count)
 	    }
 	  else
 	    {
-	      /* Use a single insn if we can.
-		 FIXME: IWMMXT allows offsets larger than ldrd can handle,
-		 fix these up with a pair of ldr.  */
+	      /* Use a single insn if we can.  */
 	      if (can_ldrd
 		  && (TARGET_THUMB2
 		  || !CONST_INT_P (otherops[2])
@@ -20450,8 +20394,6 @@ output_move_double (rtx *operands, bool emit, int *count)
 	  otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
 	  otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
 
-	  /* IWMMXT allows offsets larger than strd can handle,
-	     fix these up with a pair of str.  */
 	  if (!TARGET_THUMB2
 	      && CONST_INT_P (otherops[2])
 	      && (INTVAL(otherops[2]) <= -256
@@ -20746,9 +20688,13 @@ output_move_neon (rtx *operands)
   nregs = REG_NREGS (reg) / 2;
   gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
 	      || NEON_REGNO_OK_FOR_QUAD (regno));
-  gcc_assert (VALID_NEON_DREG_MODE (mode)
-	      || VALID_NEON_QREG_MODE (mode)
-	      || VALID_NEON_STRUCT_MODE (mode));
+  gcc_assert ((TARGET_NEON
+	       && (VALID_NEON_DREG_MODE (mode)
+		   || VALID_NEON_QREG_MODE (mode)
+		   || VALID_NEON_STRUCT_MODE (mode)))
+	      || (TARGET_HAVE_MVE
+		  && (VALID_MVE_MODE (mode)
+		      || VALID_MVE_STRUCT_MODE (mode))));
   gcc_assert (MEM_P (mem));
 
   addr = XEXP (mem, 0);
@@ -20851,8 +20797,9 @@ output_move_neon (rtx *operands)
   return "";
 }
 
-/* Compute and return the length of neon_mov<mode>, where <mode> is
-   one of VSTRUCT modes: EI, OI, CI or XI.  */
+/* Compute and return the length of neon_mov<mode>, where <mode> is one of
+   VSTRUCT modes: EI, OI, CI or XI for Neon, and V2x16QI, V2x8HI, V2x4SI,
+   V2x8HF, V2x4SF, V2x16QI, V2x8HI, V2x4SI, V2x8HF, V2x4SF for MVE.  */
 int
 arm_attr_length_move_neon (rtx_insn *insn)
 {
@@ -20869,10 +20816,20 @@ arm_attr_length_move_neon (rtx_insn *insn)
 	{
 	case E_EImode:
 	case E_OImode:
+	case E_V2x16QImode:
+	case E_V2x8HImode:
+	case E_V2x4SImode:
+	case E_V2x8HFmode:
+	case E_V2x4SFmode:
 	  return 8;
 	case E_CImode:
 	  return 12;
 	case E_XImode:
+	case E_V4x16QImode:
+	case E_V4x8HImode:
+	case E_V4x4SImode:
+	case E_V4x8HFmode:
+	case E_V4x4SFmode:
 	  return 16;
 	default:
 	  gcc_unreachable ();
@@ -21373,34 +21330,6 @@ arm_compute_save_core_reg_mask (void)
   if (cfun->machine->lr_save_eliminated)
     save_reg_mask &= ~ (1 << LR_REGNUM);
 
-  if (TARGET_REALLY_IWMMXT
-      && ((bit_count (save_reg_mask)
-	   + ARM_NUM_INTS (crtl->args.pretend_args_size +
-			   arm_compute_static_chain_stack_bytes())
-	   ) % 2) != 0)
-    {
-      /* The total number of registers that are going to be pushed
-	 onto the stack is odd.  We need to ensure that the stack
-	 is 64-bit aligned before we start to save iWMMXt registers,
-	 and also before we start to create locals.  (A local variable
-	 might be a double or long long which we will load/store using
-	 an iWMMXt instruction).  Therefore we need to push another
-	 ARM register, so that the stack will be 64-bit aligned.  We
-	 try to avoid using the arg registers (r0 -r3) as they might be
-	 used to pass values in a tail call.  */
-      for (reg = 4; reg <= 12; reg++)
-	if ((save_reg_mask & (1 << reg)) == 0)
-	  break;
-
-      if (reg <= 12)
-	save_reg_mask |= (1 << reg);
-      else
-	{
-	  cfun->machine->sibcall_blocked = 1;
-	  save_reg_mask |= (1 << 3);
-	}
-    }
-
   /* We may need to push an additional register for use initializing the
      PIC base register.  */
   if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
@@ -21608,19 +21537,17 @@ output_return_instruction (rtx operand, bool really_return, bool reverse,
 
       if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
 	{
-	  /* There are three possible reasons for the IP register
-	     being saved.  1) a stack frame was created, in which case
-	     IP contains the old stack pointer, or 2) an ISR routine
-	     corrupted it, or 3) it was saved to align the stack on
-	     iWMMXt.  In case 1, restore IP into SP, otherwise just
-	     restore IP.  */
+	  /* There are two possible reasons for the IP register being saved.
+	     1) a stack frame was created, in which case IP contains the old
+	     stack pointer, or 2) an ISR routine corrupted it.  In case 1,
+	     restore IP into SP, otherwise just restore IP.  */
 	  if (frame_pointer_needed)
 	    {
 	      live_regs_mask &= ~ (1 << IP_REGNUM);
 	      live_regs_mask |=   (1 << SP_REGNUM);
 	    }
 	  else
-	    gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
+	    gcc_assert (IS_INTERRUPT (func_type));
 	}
 
       /* On some ARM architectures it is faster to use LDR rather than
@@ -22470,27 +22397,54 @@ static void
 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
 {
   int num_regs = 0;
-  int i, j;
   rtx par;
   rtx dwarf = NULL_RTX;
   rtx tmp, reg;
   bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
   int offset_adj;
   int emit_update;
+  unsigned long reg_bits;
 
   offset_adj = return_in_pc ? 1 : 0;
-  for (i = 0; i <= LAST_ARM_REGNUM; i++)
-    if (saved_regs_mask & (1 << i))
-      num_regs++;
+  for (reg_bits = saved_regs_mask; reg_bits;
+       reg_bits &= ~(reg_bits & -reg_bits))
+    num_regs++;
 
   gcc_assert (num_regs && num_regs <= 16);
 
   /* If SP is in reglist, then we don't emit SP update insn.  */
   emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
 
+  /* If popping just one register, use LDR reg, [SP], #4, unless
+     we're generating Thumb code and reg is a low reg.  */
+  if (num_regs == 1
+      && emit_update
+      && !return_in_pc
+      && (TARGET_ARM
+	  /* For Thumb we want to use POP for a single low register.  */
+	  || (saved_regs_mask & ~0xff)))
+    {
+      int i = exact_log2 (saved_regs_mask);
+
+      rtx dwarf_reg = reg = gen_rtx_REG (SImode, i);
+      if (arm_current_function_pac_enabled_p () && i == IP_REGNUM)
+	dwarf_reg = gen_rtx_REG (SImode, RA_AUTH_CODE);
+      /* Emit single load with writeback.	 */
+      tmp = gen_frame_mem (SImode,
+			   gen_rtx_POST_INC (Pmode,
+					     stack_pointer_rtx));
+      tmp = emit_insn (gen_rtx_SET (reg, tmp));
+      REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, dwarf_reg,
+					dwarf);
+      arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD,
+				   stack_pointer_rtx, stack_pointer_rtx);
+      return;
+    }
+
   /* The parallel needs to hold num_regs SETs
      and one SET for the stack update.  */
-  par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
+  par = gen_rtx_PARALLEL (VOIDmode,
+			  rtvec_alloc (num_regs + emit_update + offset_adj));
 
   if (return_in_pc)
     XVECEXP (par, 0, 0) = ret_rtx;
@@ -22498,58 +22452,49 @@ arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
   if (emit_update)
     {
       /* Increment the stack pointer, based on there being
-         num_regs 4-byte registers to restore.  */
+	 num_regs 4-byte registers to restore.	*/
       tmp = gen_rtx_SET (stack_pointer_rtx,
-                         plus_constant (Pmode,
-                                        stack_pointer_rtx,
-                                        4 * num_regs));
+			 plus_constant (Pmode,
+					stack_pointer_rtx,
+					4 * num_regs));
       RTX_FRAME_RELATED_P (tmp) = 1;
       XVECEXP (par, 0, offset_adj) = tmp;
     }
 
   /* Now restore every reg, which may include PC.  */
-  for (j = 0, i = 0; j < num_regs; i++)
-    if (saved_regs_mask & (1 << i))
-      {
-	rtx dwarf_reg = reg = gen_rtx_REG (SImode, i);
-	if (arm_current_function_pac_enabled_p () && i == IP_REGNUM)
-	  dwarf_reg = gen_rtx_REG (SImode, RA_AUTH_CODE);
-        if ((num_regs == 1) && emit_update && !return_in_pc)
-          {
-            /* Emit single load with writeback.  */
-            tmp = gen_frame_mem (SImode,
-                                 gen_rtx_POST_INC (Pmode,
-                                                   stack_pointer_rtx));
-            tmp = emit_insn (gen_rtx_SET (reg, tmp));
-	    REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, dwarf_reg,
-					      dwarf);
-            return;
-          }
-
-        tmp = gen_rtx_SET (reg,
-                           gen_frame_mem
-                           (SImode,
-                            plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
-        RTX_FRAME_RELATED_P (tmp) = 1;
-        XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
-
-        /* We need to maintain a sequence for DWARF info too.  As dwarf info
-           should not have PC, skip PC.  */
-        if (i != PC_REGNUM)
-	  dwarf = alloc_reg_note (REG_CFA_RESTORE, dwarf_reg, dwarf);
+  int j = 0;
+  int elt = emit_update + offset_adj;
+  for (reg_bits = saved_regs_mask; reg_bits;
+       reg_bits &= ~(reg_bits & -reg_bits))
+    {
+      int i = exact_log2 (reg_bits & -reg_bits);
+      rtx dwarf_reg = reg = gen_rtx_REG (SImode, i);
 
-        j++;
-      }
+      if (i == IP_REGNUM && arm_current_function_pac_enabled_p ())
+	dwarf_reg = gen_rtx_REG (SImode, RA_AUTH_CODE);
+      tmp = gen_rtx_SET (reg,
+			 gen_frame_mem
+			 (SImode,
+			  plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
+      RTX_FRAME_RELATED_P (tmp) = 1;
+      XVECEXP (par, 0, elt) = tmp;
 
-  if (return_in_pc)
-    par = emit_jump_insn (par);
-  else
-    par = emit_insn (par);
+      /* We need to maintain a sequence for DWARF info too.  As dwarf info
+	 should not have PC, skip PC.	 */
+      if (i != PC_REGNUM)
+	dwarf = alloc_reg_note (REG_CFA_RESTORE, dwarf_reg, dwarf);
+      j++;
+      elt++;
+    }
 
+  par = return_in_pc ? emit_jump_insn (par) : emit_insn (par);
   REG_NOTES (par) = dwarf;
-  if (!return_in_pc)
+
+  if (!return_in_pc && emit_update)
     arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
 				 stack_pointer_rtx, stack_pointer_rtx);
+  else if (!return_in_pc)
+    RTX_FRAME_RELATED_P (par) = 1;
 }
 
 /* Generate and emit an insn pattern that we will recognize as a pop_multi
@@ -23054,8 +22999,6 @@ arm_compute_frame_layout (void)
 
   if (TARGET_32BIT)
     {
-      unsigned int regno;
-
       offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
       core_saved = bit_count (offsets->saved_regs_mask) * 4;
       saved = core_saved;
@@ -23064,16 +23007,6 @@ arm_compute_frame_layout (void)
 	 preserve that condition at any subroutine call.  We also require the
 	 soft frame pointer to be doubleword aligned.  */
 
-      if (TARGET_REALLY_IWMMXT)
-	{
-	  /* Check for the call-saved iWMMXt registers.  */
-	  for (regno = FIRST_IWMMXT_REGNUM;
-	       regno <= LAST_IWMMXT_REGNUM;
-	       regno++)
-	    if (reg_needs_saving_p (regno))
-	      saved += 8;
-	}
-
       func_type = arm_current_func_type ();
       /* Space for saved VFP registers.  */
       if (! IS_VOLATILE (func_type)
@@ -23289,18 +23222,6 @@ arm_save_coproc_regs(void)
   int saved_size = 0;
   unsigned reg;
   unsigned start_reg;
-  rtx insn;
-
-  if (TARGET_REALLY_IWMMXT)
-  for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
-    if (reg_needs_saving_p (reg))
-      {
-	insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
-	insn = gen_rtx_MEM (V2SImode, insn);
-	insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
-	RTX_FRAME_RELATED_P (insn) = 1;
-	saved_size += 8;
-      }
 
   if (TARGET_VFP_BASE)
     {
@@ -24459,42 +24380,9 @@ arm_print_operand (FILE *stream, rtx x, int code)
       return;
 
     case 'U':
-      if (!REG_P (x)
-	  || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
-	  || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
-	/* Bad value for wCG register number.  */
-	{
-	  output_operand_lossage ("invalid operand for code '%c'", code);
-	  return;
-	}
-
-      else
-	fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
-      return;
-
-      /* Print an iWMMXt control register name.  */
     case 'w':
-      if (!CONST_INT_P (x)
-	  || INTVAL (x) < 0
-	  || INTVAL (x) >= 16)
-	/* Bad value for wC register number.  */
-	{
-	  output_operand_lossage ("invalid operand for code '%c'", code);
-	  return;
-	}
-
-      else
-	{
-	  static const char * wc_reg_names [16] =
-	    {
-	      "wCID",  "wCon",  "wCSSF", "wCASF",
-	      "wC4",   "wC5",   "wC6",   "wC7",
-	      "wCGR0", "wCGR1", "wCGR2", "wCGR3",
-	      "wC12",  "wC13",  "wC14",  "wC15"
-	    };
-
-	  fputs (wc_reg_names [INTVAL (x)], stream);
-	}
+      /* Former iWMMXT support, removed after GCC-15.  */
+      output_operand_lossage ("obsolete iWMMXT format code '%c'", code);
       return;
 
     /* Print the high single-precision register of a VFP double-precision
@@ -24710,11 +24598,11 @@ arm_print_operand (FILE *stream, rtx x, int code)
 	    asm_fprintf (stream, "[%r", REGNO (XEXP (addr, 0)));
 	    inc_val = GET_MODE_SIZE (GET_MODE (x));
 	    if (code == POST_INC || code == POST_DEC)
-	      asm_fprintf (stream, "], #%s%d",(code == POST_INC)
-					      ? "": "-", inc_val);
+	      asm_fprintf (stream, "], #%s%d", (code == POST_INC)
+					       ? "" : "-", inc_val);
 	    else
-	      asm_fprintf (stream, ", #%s%d]!",(code == PRE_INC)
-					       ? "": "-", inc_val);
+	      asm_fprintf (stream, ", #%s%d]!", (code == PRE_INC)
+						? "" : "-", inc_val);
 	  }
 	else if (code == POST_MODIFY || code == PRE_MODIFY)
 	  {
@@ -24723,9 +24611,9 @@ arm_print_operand (FILE *stream, rtx x, int code)
 	    if (postinc_reg && CONST_INT_P (postinc_reg))
 	      {
 		if (code == POST_MODIFY)
-		  asm_fprintf (stream, "], #%wd",INTVAL (postinc_reg));
+		  asm_fprintf (stream, "], #%wd", INTVAL (postinc_reg));
 		else
-		  asm_fprintf (stream, ", #%wd]!",INTVAL (postinc_reg));
+		  asm_fprintf (stream, ", #%wd]!", INTVAL (postinc_reg));
 	      }
 	  }
 	else if (code == PLUS)
@@ -24923,7 +24811,8 @@ arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
 			 REGNO (XEXP (x, 0)),
 			 GET_CODE (x) == PRE_DEC ? "-" : "",
 			 GET_MODE_SIZE (mode));
-	  else if (TARGET_HAVE_MVE && (mode == OImode || mode == XImode))
+	  else if (TARGET_HAVE_MVE
+		   && VALID_MVE_STRUCT_MODE (mode))
 	    asm_fprintf (stream, "[%r]!", REGNO (XEXP (x,0)));
 	  else
 	    asm_fprintf (stream, "[%r], #%s%d", REGNO (XEXP (x, 0)),
@@ -25813,20 +25702,21 @@ arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
      if (TARGET_HAVE_MVE)
        return ((VALID_MVE_MODE (mode) && NEON_REGNO_OK_FOR_QUAD (regno))
 	       || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
-	       || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8)));
+	       || (mode == V2x16QImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
+	       || (mode == V2x8HImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
+	       || (mode == V2x4SImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
+	       || (mode == V2x8HFmode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
+	       || (mode == V2x4SFmode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
+	       || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8))
+	       || (mode == V4x16QImode && NEON_REGNO_OK_FOR_NREGS (regno, 8))
+	       || (mode == V4x8HImode && NEON_REGNO_OK_FOR_NREGS (regno, 8))
+	       || (mode == V4x4SImode && NEON_REGNO_OK_FOR_NREGS (regno, 8))
+	       || (mode == V4x8HFmode && NEON_REGNO_OK_FOR_NREGS (regno, 8))
+	       || (mode == V4x4SFmode && NEON_REGNO_OK_FOR_NREGS (regno, 8)));
 
       return false;
     }
 
-  if (TARGET_REALLY_IWMMXT)
-    {
-      if (IS_IWMMXT_GR_REGNUM (regno))
-	return mode == SImode;
-
-      if (IS_IWMMXT_REGNUM (regno))
-	return VALID_IWMMXT_REG_MODE (mode);
-    }
-
   /* We allow almost any value to be stored in the general registers.
      Restrict doubleword quantities to even register pairs in ARM state
      so that we can use ldrd. The same restriction applies for MVE
@@ -25932,12 +25822,6 @@ arm_regno_class (int regno)
         return VFP_HI_REGS;
     }
 
-  if (IS_IWMMXT_REGNUM (regno))
-    return IWMMXT_REGS;
-
-  if (IS_IWMMXT_GR_REGNUM (regno))
-    return IWMMXT_GR_REGS;
-
   return NO_REGS;
 }
 
@@ -27678,35 +27562,40 @@ thumb2_expand_return (bool simple_return)
       /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
 	 functions or adapt code to handle according to ACLE.  This path should
 	 not be reachable for cmse_nonsecure_entry functions though we prefer
-	 to assert it for now to ensure that future code changes do not silently
-	 change this behavior.  */
+	 to assert it for now to ensure that future code changes do not
+	 silently change this behavior.  */
       gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
       if (arm_current_function_pac_enabled_p ())
-        {
-          gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
-          arm_emit_multi_reg_pop (saved_regs_mask);
-          emit_insn (gen_aut_nop ());
-          emit_jump_insn (simple_return_rtx);
-        }
-      else if (num_regs == 1)
-        {
-          rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
-          rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
-          rtx addr = gen_rtx_MEM (SImode,
-                                  gen_rtx_POST_INC (SImode,
-                                                    stack_pointer_rtx));
-          set_mem_alias_set (addr, get_frame_alias_set ());
-          XVECEXP (par, 0, 0) = ret_rtx;
-          XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
-          RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
-          emit_jump_insn (par);
-        }
+	{
+	  gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
+	  arm_emit_multi_reg_pop (saved_regs_mask);
+	  emit_insn (gen_aut_nop ());
+	  emit_jump_insn (simple_return_rtx);
+	}
+      /* Use LDR PC, [sp], #4.  Only do this if not optimizing for size and
+	 there's a known performance benefit (we don't know this exactly, but
+	 preferring LDRD/STRD over LDM/STM is a reasonable proxy).  */
+      else if (num_regs == 1
+	       && !optimize_size
+	       && current_tune->prefer_ldrd_strd)
+	{
+	  rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
+	  rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
+	  rtx addr = gen_rtx_MEM (SImode,
+				  gen_rtx_POST_INC (SImode,
+						    stack_pointer_rtx));
+	  set_mem_alias_set (addr, get_frame_alias_set ());
+	  XVECEXP (par, 0, 0) = ret_rtx;
+	  XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
+	  RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
+	  emit_jump_insn (par);
+	}
       else
-        {
-          saved_regs_mask &= ~ (1 << LR_REGNUM);
-          saved_regs_mask |=   (1 << PC_REGNUM);
-          arm_emit_multi_reg_pop (saved_regs_mask);
-        }
+	{
+	  saved_regs_mask &= ~ (1 << LR_REGNUM);
+	  saved_regs_mask |=   (1 << PC_REGNUM);
+	  arm_emit_multi_reg_pop (saved_regs_mask);
+	}
     }
   else
     {
@@ -27850,27 +27739,6 @@ arm_expand_epilogue_apcs_frame (bool really_return)
                                     gen_rtx_REG (SImode, IP_REGNUM));
     }
 
-  if (TARGET_IWMMXT)
-    {
-      /* The frame pointer is guaranteed to be non-double-word aligned, as
-         it is set to double-word-aligned old_stack_pointer - 4.  */
-      rtx_insn *insn;
-      int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
-
-      for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
-	if (reg_needs_saving_p (i))
-          {
-            rtx addr = gen_frame_mem (V2SImode,
-                                 plus_constant (Pmode, hard_frame_pointer_rtx,
-                                                - lrm_count * 4));
-            insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
-            REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
-                                               gen_rtx_REG (V2SImode, i),
-                                               NULL_RTX);
-            lrm_count += 2;
-          }
-    }
-
   /* saved_regs_mask should contain IP which contains old stack pointer
      at the time of activation creation.  Since SP and IP are adjacent registers,
      we can restore the value directly into SP.  */
@@ -28083,23 +27951,6 @@ arm_expand_epilogue (bool really_return)
                                     stack_pointer_rtx);
     }
 
-  if (TARGET_IWMMXT)
-    for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
-      if (reg_needs_saving_p (i))
-        {
-          rtx_insn *insn;
-          rtx addr = gen_rtx_MEM (V2SImode,
-                                  gen_rtx_POST_INC (SImode,
-                                                    stack_pointer_rtx));
-          set_mem_alias_set (addr, get_frame_alias_set ());
-          insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
-          REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
-                                             gen_rtx_REG (V2SImode, i),
-                                             NULL_RTX);
-	  arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
-				       stack_pointer_rtx, stack_pointer_rtx);
-        }
-
   if (saved_regs_mask)
     {
       rtx insn;
@@ -28120,7 +27971,10 @@ arm_expand_epilogue (bool really_return)
           return_in_pc = true;
         }
 
-      if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
+      if (num_regs == 1
+	  && !optimize_size
+	  && current_tune->prefer_ldrd_strd
+	  && !(IS_INTERRUPT (func_type) && return_in_pc))
         {
           for (i = 0; i <= LAST_ARM_REGNUM; i++)
             if (saved_regs_mask & (1 << i))
@@ -29737,7 +29591,7 @@ arm_vector_mode_supported_p (machine_mode mode)
       || mode == V8BFmode))
     return true;
 
-  if ((TARGET_NEON || TARGET_IWMMXT)
+  if (TARGET_NEON
       && ((mode == V2SImode)
 	  || (mode == V4HImode)
 	  || (mode == V8QImode)))
@@ -29759,6 +29613,27 @@ arm_vector_mode_supported_p (machine_mode mode)
   return false;
 }
 
+/* Implements target hook array_mode.  */
+static opt_machine_mode
+arm_array_mode (machine_mode mode, unsigned HOST_WIDE_INT nelems)
+{
+  if (TARGET_HAVE_MVE
+      /* MVE accepts only tuples of 2 or 4 vectors.  */
+      && (nelems == 2
+	  || nelems == 4))
+    {
+      machine_mode struct_mode;
+      FOR_EACH_MODE_IN_CLASS (struct_mode, GET_MODE_CLASS (mode))
+	{
+	  if (GET_MODE_INNER (struct_mode) == GET_MODE_INNER (mode)
+	      && known_eq (GET_MODE_NUNITS (struct_mode),
+			   GET_MODE_NUNITS (mode) * nelems))
+	    return struct_mode;
+	}
+    }
+  return opt_machine_mode ();
+}
+
 /* Implements target hook array_mode_supported_p.  */
 
 static bool
@@ -29808,19 +29683,6 @@ arm_preferred_simd_mode (scalar_mode mode)
       default:;
       }
 
-  if (TARGET_REALLY_IWMMXT)
-    switch (mode)
-      {
-      case E_SImode:
-	return V2SImode;
-      case E_HImode:
-	return V4HImode;
-      case E_QImode:
-	return V8QImode;
-
-      default:;
-      }
-
   if (TARGET_HAVE_MVE)
     switch (mode)
       {
@@ -29902,12 +29764,6 @@ arm_debugger_regno (unsigned int regno)
 	return 256 + (regno - FIRST_VFP_REGNUM) / 2;
     }
 
-  if (IS_IWMMXT_GR_REGNUM (regno))
-    return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
-
-  if (IS_IWMMXT_REGNUM (regno))
-    return 112 + regno - FIRST_IWMMXT_REGNUM;
-
   if (IS_PAC_REGNUM (regno))
     return DWARF_PAC_REGNUM;
 
@@ -30494,95 +30350,6 @@ arm_output_shift(rtx * operands, int set_flags)
   return "";
 }
 
-/* Output assembly for a WMMX immediate shift instruction.  */
-const char *
-arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
-{
-  int shift = INTVAL (operands[2]);
-  char templ[50];
-  machine_mode opmode = GET_MODE (operands[0]);
-
-  gcc_assert (shift >= 0);
-
-  /* If the shift value in the register versions is > 63 (for D qualifier),
-     31 (for W qualifier) or 15 (for H qualifier).  */
-  if (((opmode == V4HImode) && (shift > 15))
-	|| ((opmode == V2SImode) && (shift > 31))
-	|| ((opmode == DImode) && (shift > 63)))
-  {
-    if (wror_or_wsra)
-      {
-        sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
-        output_asm_insn (templ, operands);
-        if (opmode == DImode)
-          {
-	    sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
-	    output_asm_insn (templ, operands);
-          }
-      }
-    else
-      {
-        /* The destination register will contain all zeros.  */
-        sprintf (templ, "wzero\t%%0");
-        output_asm_insn (templ, operands);
-      }
-    return "";
-  }
-
-  if ((opmode == DImode) && (shift > 32))
-    {
-      sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
-      output_asm_insn (templ, operands);
-      sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
-      output_asm_insn (templ, operands);
-    }
-  else
-    {
-      sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
-      output_asm_insn (templ, operands);
-    }
-  return "";
-}
-
-/* Output assembly for a WMMX tinsr instruction.  */
-const char *
-arm_output_iwmmxt_tinsr (rtx *operands)
-{
-  int mask = INTVAL (operands[3]);
-  int i;
-  char templ[50];
-  int units = mode_nunits[GET_MODE (operands[0])];
-  gcc_assert ((mask & (mask - 1)) == 0);
-  for (i = 0; i < units; ++i)
-    {
-      if ((mask & 0x01) == 1)
-        {
-          break;
-        }
-      mask >>= 1;
-    }
-  gcc_assert (i < units);
-  {
-    switch (GET_MODE (operands[0]))
-      {
-      case E_V8QImode:
-	sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
-	break;
-      case E_V4HImode:
-	sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
-	break;
-      case E_V2SImode:
-	sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
-	break;
-      default:
-	gcc_unreachable ();
-	break;
-      }
-    output_asm_insn (templ, operands);
-  }
-  return "";
-}
-
 /* Output an arm casesi dispatch sequence.  Used by arm_casesi_internal insn.
    Responsible for the handling of switch statements in arm.  */
 const char *
@@ -30955,26 +30722,6 @@ arm_conditional_register_usage (void)
 	fixed_regs[VPR_REGNUM] = 0;
     }
 
-  if (TARGET_REALLY_IWMMXT && !TARGET_GENERAL_REGS_ONLY)
-    {
-      regno = FIRST_IWMMXT_GR_REGNUM;
-      /* The 2002/10/09 revision of the XScale ABI has wCG0
-         and wCG1 as call-preserved registers.  The 2002/11/21
-         revision changed this so that all wCG registers are
-         scratch registers.  */
-      for (regno = FIRST_IWMMXT_GR_REGNUM;
-	   regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
-	fixed_regs[regno] = 0;
-      /* The XScale ABI has wR0 - wR9 as scratch registers,
-	 the rest as call-preserved registers.  */
-      for (regno = FIRST_IWMMXT_REGNUM;
-	   regno <= LAST_IWMMXT_REGNUM; ++ regno)
-	{
-	  fixed_regs[regno] = 0;
-	  call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
-	}
-    }
-
   if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
     {
       fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
@@ -31139,10 +30886,10 @@ int
 vfp3_const_double_for_fract_bits (rtx operand)
 {
   REAL_VALUE_TYPE r0;
-  
+
   if (!CONST_DOUBLE_P (operand))
     return 0;
-  
+
   r0 = *CONST_DOUBLE_REAL_VALUE (operand);
   if (exact_real_inverse (DFmode, &r0)
       && !REAL_VALUE_NEGATIVE (r0))
@@ -31777,50 +31524,6 @@ arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
     }
 }
 
-/* Expand a vcond or vcondu pattern with operands OPERANDS.
-   CMP_RESULT_MODE is the mode of the comparison result.  */
-
-void
-arm_expand_vcond (rtx *operands, machine_mode cmp_result_mode)
-{
-  /* When expanding for MVE, we do not want to emit a (useless) vpsel in
-     arm_expand_vector_compare, and another one here.  */
-  rtx mask;
-
-  if (TARGET_HAVE_MVE)
-    mask = gen_reg_rtx (arm_mode_to_pred_mode (cmp_result_mode).require ());
-  else
-    mask = gen_reg_rtx (cmp_result_mode);
-
-  bool inverted = arm_expand_vector_compare (mask, GET_CODE (operands[3]),
-					     operands[4], operands[5], true);
-  if (inverted)
-    std::swap (operands[1], operands[2]);
-  if (TARGET_NEON)
-  emit_insn (gen_neon_vbsl (GET_MODE (operands[0]), operands[0],
-			    mask, operands[1], operands[2]));
-  else
-    {
-      machine_mode cmp_mode = GET_MODE (operands[0]);
-
-      switch (GET_MODE_CLASS (cmp_mode))
-	{
-	case MODE_VECTOR_INT:
-	  emit_insn (gen_mve_q (VPSELQ_S, VPSELQ_S, cmp_mode, operands[0],
-				operands[1], operands[2], mask));
-	  break;
-	case MODE_VECTOR_FLOAT:
-	  if (TARGET_HAVE_MVE_FLOAT)
-	    emit_insn (gen_mve_q_f (VPSELQ_F, cmp_mode, operands[0],
-				    operands[1], operands[2], mask));
-	  else
-	    gcc_unreachable ();
-	  break;
-	default:
-	  gcc_unreachable ();
-	}
-    }
-}
 
 #define MAX_VECT_LEN 16
 
@@ -32404,7 +32107,7 @@ arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
 	  else
 	    return false;
 	}
-      
+
       return true;
 
     case ARM_POST_DEC:
@@ -32421,10 +32124,10 @@ arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
 	return false;
 
       return true;
-     
+
     default:
       return false;
-      
+
     }
 
   return false;
@@ -32435,7 +32138,7 @@ arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
    Additionally, the default expansion code is not available or suitable
    for post-reload insn splits (this can occur when the register allocator
    chooses not to do a shift in NEON).
-   
+
    This function is used in both initial expand and post-reload splits, and
    handles all kinds of 64-bit shifts.
 
@@ -33465,6 +33168,15 @@ aarch_gen_bti_j (void)
   return gen_bti_nop ();
 }
 
+/* For AArch32, we always return false because indirect_return attribute
+   is only supported on AArch64 targets.  */
+
+bool
+aarch_fun_is_indirect_return (rtx_insn *)
+{
+  return false;
+}
+
 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN.  Return true if INSN can be
    scheduled for speculative execution.  Reject the long-running division
    and square-root instructions.  */
@@ -33505,7 +33217,7 @@ arm_asan_shadow_offset (void)
 
 /* This is a temporary fix for PR60655.  Ideally we need
    to handle most of these cases in the generic part but
-   currently we reject minus (..) (sym_ref).  We try to 
+   currently we reject minus (..) (sym_ref).  We try to
    ameliorate the case with minus (sym_ref1) (sym_ref2)
    where they are in the same section.  */
 
@@ -33828,7 +33540,7 @@ arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
   return build_target_option_node (opts, opts_set);
 }
 
-static void 
+static void
 add_attribute (const char * mode, tree *attributes)
 {
   size_t len = strlen (mode);
@@ -33859,7 +33571,7 @@ arm_insert_attributes (tree fndecl, tree * attributes)
   /* Nested definitions must inherit mode.  */
   if (current_function_decl)
    {
-     mode = TARGET_THUMB ? "thumb" : "arm";      
+     mode = TARGET_THUMB ? "thumb" : "arm";
      add_attribute (mode, attributes);
      return;
    }
@@ -34454,6 +34166,30 @@ arm_coproc_ldc_stc_legitimate_address (rtx op)
   return false;
 }
 
+/* Return true if OP is a valid memory operand for LDRD/STRD without any
+   register overlap restrictions.  Allow [base] and [base, imm] for now.  */
+bool
+arm_ldrd_legitimate_address (rtx op)
+{
+  if (!MEM_P (op))
+    return false;
+
+  op = XEXP (op, 0);
+  if (REG_P (op))
+    return true;
+
+  if (GET_CODE (op) != PLUS)
+    return false;
+  if (!REG_P (XEXP (op, 0)) || !CONST_INT_P (XEXP (op, 1)))
+    return false;
+
+  HOST_WIDE_INT val = INTVAL (XEXP (op, 1));
+
+  if (TARGET_ARM)
+    return IN_RANGE (val, -255, 255);
+  return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
+}
+
 /* Return the diagnostic message string if conversion from FROMTYPE to
    TOTYPE is not allowed, NULL otherwise.  */
 
@@ -35214,6 +34950,32 @@ arm_mve_dlstp_check_inc_counter (loop *loop, rtx_insn* vctp_insn,
   return vctp_insn;
 }
 
+/* Helper function to 'arm_mve_dlstp_check_dec_counter' to make sure DEC_INSN
+   is of the expected form:
+   (set (reg a) (plus (reg a) (const_int)))
+   where (reg a) is the same as CONDCOUNT.
+   Return a rtx with the set if it is in the right format or NULL_RTX
+   otherwise.  */
+
+static rtx
+check_dec_insn (rtx_insn *dec_insn, rtx condcount)
+{
+  if (!NONDEBUG_INSN_P (dec_insn))
+    return NULL_RTX;
+  rtx dec_set = single_set (dec_insn);
+  if (!dec_set
+      || !REG_P (SET_DEST (dec_set))
+      || GET_CODE (SET_SRC (dec_set)) != PLUS
+      || !REG_P (XEXP (SET_SRC (dec_set), 0))
+      || !CONST_INT_P (XEXP (SET_SRC (dec_set), 1))
+      || REGNO (SET_DEST (dec_set))
+	  != REGNO (XEXP (SET_SRC (dec_set), 0))
+      || REGNO (SET_DEST (dec_set)) != REGNO (condcount))
+    return NULL_RTX;
+
+  return dec_set;
+}
+
 /* Helper function to `arm_mve_loop_valid_for_dlstp`.  In the case of a
    counter that is decrementing, ensure that it is decrementing by the
    right amount in each iteration and that the target condition is what
@@ -35230,30 +34992,19 @@ arm_mve_dlstp_check_dec_counter (loop *loop, rtx_insn* vctp_insn,
      loop latch.  Here we simply need to verify that this counter is the same
      reg that is also used in the vctp_insn and that it is not otherwise
      modified.  */
-  rtx_insn *dec_insn = BB_END (loop->latch);
+  rtx dec_set = check_dec_insn (BB_END (loop->latch), condcount);
   /* If not in the loop latch, try to find the decrement in the loop header.  */
-  if (!NONDEBUG_INSN_P (dec_insn))
+  if (dec_set == NULL_RTX)
   {
     df_ref temp = df_bb_regno_only_def_find (loop->header, REGNO (condcount));
     /* If we haven't been able to find the decrement, bail out.  */
     if (!temp)
       return NULL;
-    dec_insn = DF_REF_INSN (temp);
-  }
+    dec_set = check_dec_insn (DF_REF_INSN (temp), condcount);
 
-  rtx dec_set = single_set (dec_insn);
-
-  /* Next, ensure that it is a PLUS of the form:
-     (set (reg a) (plus (reg a) (const_int)))
-     where (reg a) is the same as condcount.  */
-  if (!dec_set
-      || !REG_P (SET_DEST (dec_set))
-      || !REG_P (XEXP (SET_SRC (dec_set), 0))
-      || !CONST_INT_P (XEXP (SET_SRC (dec_set), 1))
-      || REGNO (SET_DEST (dec_set))
-	  != REGNO (XEXP (SET_SRC (dec_set), 0))
-      || REGNO (SET_DEST (dec_set)) != REGNO (condcount))
-    return NULL;
+    if (dec_set == NULL_RTX)
+      return NULL;
+  }
 
   decrementnum = INTVAL (XEXP (SET_SRC (dec_set), 1));
 
@@ -35328,9 +35079,10 @@ arm_mve_dlstp_check_dec_counter (loop *loop, rtx_insn* vctp_insn,
     return NULL;
   else if (REG_P (condconst))
     {
-      basic_block pre_loop_bb = single_pred (loop_preheader_edge (loop)->src);
-      if (!pre_loop_bb)
+      basic_block preheader_b = loop_preheader_edge (loop)->src;
+      if (!single_pred_p (preheader_b))
 	return NULL;
+      basic_block pre_loop_bb = single_pred (preheader_b);
 
       rtx initial_compare = NULL_RTX;
       if (!(prev_nonnote_nondebug_insn_bb (BB_END (pre_loop_bb))
@@ -35796,7 +35548,8 @@ arm_attempt_dlstp_transform (rtx label)
 	  df_ref insn_uses = NULL;
 	  FOR_EACH_INSN_USE (insn_uses, insn)
 	  {
-	    if (rtx_equal_p (vctp_vpr_generated, DF_REF_REG (insn_uses)))
+	    if (reg_overlap_mentioned_p (vctp_vpr_generated,
+					 DF_REF_REG (insn_uses)))
 	      {
 		end_sequence ();
 		return 1;
@@ -35818,8 +35571,7 @@ arm_attempt_dlstp_transform (rtx label)
 	  emit_insn (PATTERN (insn));
 	}
     }
-  seq = get_insns ();
-  end_sequence ();
+  seq = end_sequence ();
 
   /* Re-write the entire BB contents with the transformed
      sequence.  */
@@ -36044,6 +35796,9 @@ arm_mode_base_reg_class (machine_mode mode)
   return MODE_BASE_REG_REG_CLASS (mode);
 }
 
+#undef TARGET_DOCUMENTATION_NAME
+#define TARGET_DOCUMENTATION_NAME "ARM"
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 /* Implement TARGET_VECTORIZE_GET_MASK_MODE.  */
@@ -36057,6 +35812,143 @@ arm_get_mask_mode (machine_mode mode)
   return default_get_mask_mode (mode);
 }
 
+/* Helper function to determine whether SEQ represents a sequence of
+   instructions representing the vsel<cond> floating point instructions.
+   This is an heuristic to check whether the proposed optimisation is desired,
+   the choice has no consequence for correctness.  */
+static bool
+arm_is_vsel_fp_insn (rtx_insn *seq)
+{
+  rtx_insn *curr_insn = seq;
+  rtx set = NULL_RTX;
+  /* The pattern may start with a simple set with register operands.  Skip
+     through any of those.  */
+  while (curr_insn)
+    {
+      set = single_set (curr_insn);
+      if (!set
+	  || !REG_P (SET_DEST (set)))
+	return false;
+
+      if (!REG_P (SET_SRC (set)))
+	break;
+      curr_insn = NEXT_INSN (curr_insn);
+    }
+
+  if (!set)
+    return false;
+
+  /* The next instruction should be a compare.  */
+  if (!REG_P (SET_DEST (set))
+      || GET_CODE (SET_SRC (set)) != COMPARE)
+    return false;
+
+  curr_insn = NEXT_INSN (curr_insn);
+  if (!curr_insn)
+    return false;
+
+  /* And the last instruction should be an IF_THEN_ELSE.  */
+  set = single_set (curr_insn);
+  if (!set
+      || !REG_P (SET_DEST (set))
+      || GET_CODE (SET_SRC (set)) != IF_THEN_ELSE)
+    return false;
+
+  return !NEXT_INSN (curr_insn);
+}
+
+
+/* Helper function to determine whether SEQ represents a sequence of
+   instructions representing the Armv8.1-M Mainline conditional arithmetic
+   instructions: csinc, csneg and csinv. The cinc instruction is generated
+   using a different mechanism.
+   This is an heuristic to check whether the proposed optimisation is desired,
+   the choice has no consequence for correctness.  */
+
+static bool
+arm_is_v81m_cond_insn (rtx_insn *seq)
+{
+  rtx_insn *curr_insn = seq;
+  rtx set = NULL_RTX;
+  /* The pattern may start with a simple set with register operands.  Skip
+     through any of those.  */
+  while (curr_insn)
+    {
+      set = single_set (curr_insn);
+      if (!set
+	  || !REG_P (SET_DEST (set)))
+	return false;
+
+      if (!REG_P (SET_SRC (set)))
+	break;
+      curr_insn = NEXT_INSN (curr_insn);
+    }
+
+  if (!set)
+    return false;
+
+  /* The next instruction should be one of:
+     NEG: for csneg,
+     PLUS: for csinc,
+     NOT: for csinv.  */
+  if (GET_CODE (SET_SRC (set)) != NEG
+      && GET_CODE (SET_SRC (set)) != PLUS
+      && GET_CODE (SET_SRC (set)) != NOT)
+    return false;
+
+  curr_insn = NEXT_INSN (curr_insn);
+  if (!curr_insn)
+    return false;
+
+  /* The next instruction should be a COMPARE.  */
+  set = single_set (curr_insn);
+  if (!set
+      || !REG_P (SET_DEST (set))
+      || GET_CODE (SET_SRC (set)) != COMPARE)
+    return false;
+
+  curr_insn = NEXT_INSN (curr_insn);
+  if (!curr_insn)
+    return false;
+
+  /* And the last instruction should be an IF_THEN_ELSE.  */
+  set = single_set (curr_insn);
+  if (!set
+      || !REG_P (SET_DEST (set))
+      || GET_CODE (SET_SRC (set)) != IF_THEN_ELSE)
+    return false;
+
+  return !NEXT_INSN (curr_insn);
+}
+
+/* For Armv8.1-M Mainline we have both conditional execution through IT blocks,
+   as well as conditional arithmetic instructions controlled by
+   TARGET_COND_ARITH.  To generate the latter we rely on a special part of the
+   "ce" pass that generates code for targets that don't support conditional
+   execution of general instructions known as "noce".  These transformations
+   happen before 'reload_completed'.  However, "noce" also triggers for some
+   unwanted patterns [PR 116444] that prevent "ce" optimisations after reload.
+   To make sure we can get both we use the TARGET_NOCE_CONVERSION_PROFITABLE_P
+   hook to only allow "noce" to generate the patterns that are profitable.  */
+
+bool
+arm_noce_conversion_profitable_p (rtx_insn *seq, struct noce_if_info *if_info)
+{
+  if (!TARGET_COND_ARITH
+      || reload_completed)
+    return default_noce_conversion_profitable_p (seq, if_info);
+
+  if (arm_is_v81m_cond_insn (seq))
+    return true;
+
+  /* Look for vsel<cond> opportunities as we still want to codegen these for
+     Armv8.1-M Mainline targets.  */
+  if (arm_is_vsel_fp_insn (seq))
+    return true;
+
+  return false;
+}
+
 /* Output assembly to read the thread pointer from the appropriate TPIDR
    register into DEST.  If PRED_P also emit the %? that can be used to
    output the predication code.  */
@@ -36087,4 +35979,18 @@ arm_output_load_tpidr (rtx dst, bool pred_p)
   return "";
 }
 
+/* Return the MVE vector mode that has NUNITS elements of mode INNER_MODE.  */
+opt_machine_mode
+arm_mve_data_mode (scalar_mode inner_mode, poly_uint64 nunits)
+{
+  enum mode_class mclass
+    = (SCALAR_FLOAT_MODE_P (inner_mode) ? MODE_VECTOR_FLOAT : MODE_VECTOR_INT);
+  machine_mode mode;
+  FOR_EACH_MODE_IN_CLASS (mode, mclass)
+    if (inner_mode == GET_MODE_INNER (mode)
+	&& known_eq (nunits, GET_MODE_NUNITS (mode)))
+      return mode;
+  return opt_machine_mode ();
+}
+
 #include "gt-arm.h"
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 0cd5d73..2e9d678 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler, for ARM.
-   Copyright (C) 1991-2024 Free Software Foundation, Inc.
+   Copyright (C) 1991-2025 Free Software Foundation, Inc.
    Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
    and Martin Simmons (@harleqn.co.uk).
    More major hacks by Richard Earnshaw (rearnsha@arm.com)
@@ -137,13 +137,6 @@ emission of floating point pcs attributes.  */
 #define TARGET_MAYBE_HARD_FLOAT (arm_float_abi != ARM_FLOAT_ABI_SOFT)
 /* Use hardware floating point calling convention.  */
 #define TARGET_HARD_FLOAT_ABI		(arm_float_abi == ARM_FLOAT_ABI_HARD)
-#define TARGET_IWMMXT			(arm_arch_iwmmxt)
-#define TARGET_IWMMXT2			(arm_arch_iwmmxt2)
-#define TARGET_REALLY_IWMMXT		(TARGET_IWMMXT && TARGET_32BIT \
-					 && !TARGET_GENERAL_REGS_ONLY)
-#define TARGET_REALLY_IWMMXT2		(TARGET_IWMMXT2 && TARGET_32BIT \
-					 && !TARGET_GENERAL_REGS_ONLY)
-#define TARGET_IWMMXT_ABI (TARGET_32BIT && arm_abi == ARM_ABI_IWMMXT)
 #define TARGET_ARM                      (! TARGET_THUMB)
 #define TARGET_EITHER			1 /* (TARGET_ARM | TARGET_THUMB) */
 #define TARGET_BACKTRACE	        (crtl->is_leaf \
@@ -394,9 +387,11 @@ emission of floating point pcs attributes.  */
    TARGET_MODE_CHECK that also takes into account the selected CPU and
    architecture.  */
 #define OPTION_DEFAULT_SPECS \
-  {"arch", "%{!march=*:%{!mcpu=*:-march=%(VALUE)}}" }, \
-  {"cpu", "%{!march=*:%{!mcpu=*:-mcpu=%(VALUE)}}" }, \
-  {"tune", "%{!mcpu=*:%{!mtune=*:-mtune=%(VALUE)}}" }, \
+  {"arch", "%{!march=*|march=unset:"\
+      "%{!mcpu=*|mcpu=unset:%<march=* %<mcpu=* -march=%(VALUE)}}" }, \
+  {"tune", "%{!mcpu=*|mcpu=unset:%{!mtune=*:-mtune=%(VALUE)}}" }, \
+  {"cpu", "%{!march=*|march=unset:"\
+      "%{!mcpu=*|mcpu=unset:%<march=* %<mcpu=* -mcpu=%(VALUE)}}" }, \
   {"float", "%{!mfloat-abi=*:-mfloat-abi=%(VALUE)}" }, \
   {"fpu", "%{!mfpu=*:-mfpu=%(VALUE)}"}, \
   {"abi", "%{!mabi=*:-mabi=%(VALUE)}"}, \
@@ -524,12 +519,6 @@ extern int arm_ld_sched;
 /* Nonzero if this chip is a StrongARM.  */
 extern int arm_tune_strongarm;
 
-/* Nonzero if this chip supports Intel XScale with Wireless MMX technology.  */
-extern int arm_arch_iwmmxt;
-
-/* Nonzero if this chip supports Intel Wireless MMX2 technology.  */
-extern int arm_arch_iwmmxt2;
-
 /* Nonzero if this chip is an XScale.  */
 extern int arm_arch_xscale;
 
@@ -853,10 +842,6 @@ extern const int arm_arch_cde_coproc_bits[];
   1,1,1,1,1,1,1,1,		\
   1,1,1,1,1,1,1,1,		\
   1,1,1,1,1,1,1,1,		\
-  /* IWMMXT regs.  */		\
-  1,1,1,1,1,1,1,1,		\
-  1,1,1,1,1,1,1,1,		\
-  1,1,1,1,			\
   /* Specials.  */		\
   1,1,1,1,1,1,1,1		\
 }
@@ -883,10 +868,6 @@ extern const int arm_arch_cde_coproc_bits[];
   1,1,1,1,1,1,1,1,		\
   1,1,1,1,1,1,1,1,		\
   1,1,1,1,1,1,1,1,		\
-  /* IWMMXT regs.  */		\
-  1,1,1,1,1,1,1,1,		\
-  1,1,1,1,1,1,1,1,		\
-  1,1,1,1,			\
   /* Specials.  */		\
   1,1,1,1,1,1,1,1		\
 }
@@ -1008,23 +989,11 @@ extern const int arm_arch_cde_coproc_bits[];
 /* Register to use for pushing function arguments.  */
 #define STACK_POINTER_REGNUM	SP_REGNUM
 
-#define FIRST_IWMMXT_REGNUM	(LAST_HI_VFP_REGNUM + 1)
-#define LAST_IWMMXT_REGNUM	(FIRST_IWMMXT_REGNUM + 15)
-
-/* Need to sync with WCGR in iwmmxt.md.  */
-#define FIRST_IWMMXT_GR_REGNUM	(LAST_IWMMXT_REGNUM + 1)
-#define LAST_IWMMXT_GR_REGNUM	(FIRST_IWMMXT_GR_REGNUM + 3)
-
-#define IS_IWMMXT_REGNUM(REGNUM) \
-  (((REGNUM) >= FIRST_IWMMXT_REGNUM) && ((REGNUM) <= LAST_IWMMXT_REGNUM))
-#define IS_IWMMXT_GR_REGNUM(REGNUM) \
-  (((REGNUM) >= FIRST_IWMMXT_GR_REGNUM) && ((REGNUM) <= LAST_IWMMXT_GR_REGNUM))
-
 /* Base register for access to local variables of the function.  */
-#define FRAME_POINTER_REGNUM	102
+#define FRAME_POINTER_REGNUM	(CC_REGNUM + 2)
 
 /* Base register for access to arguments of the function.  */
-#define ARG_POINTER_REGNUM	103
+#define ARG_POINTER_REGNUM	(FRAME_POINTER_REGNUM + 1)
 
 #define FIRST_VFP_REGNUM	16
 #define D7_VFP_REGNUM		(FIRST_VFP_REGNUM + 15)
@@ -1065,9 +1034,8 @@ extern const int arm_arch_cde_coproc_bits[];
 
 /* The number of hard registers is 16 ARM + 1 CC + 1 SFP + 1 AFP
    + 1 APSRQ + 1 APSRGE + 1 VPR + 1 Pseudo register to save PAC.  */
-/* Intel Wireless MMX Technology registers add 16 + 4 more.  */
 /* VFP (VFP3) adds 32 (64) + 1 VFPCC.  */
-#define FIRST_PSEUDO_REGISTER   108
+#define FIRST_PSEUDO_REGISTER   88
 
 #define DWARF_PAC_REGNUM 143
 
@@ -1084,9 +1052,6 @@ extern const int arm_arch_cde_coproc_bits[];
 #define SUBTARGET_FRAME_POINTER_REQUIRED 0
 #endif
 
-#define VALID_IWMMXT_REG_MODE(MODE) \
- (arm_vector_mode_supported_p (MODE) || (MODE) == DImode)
-
 /* Modes valid for Neon D registers.  */
 #define VALID_NEON_DREG_MODE(MODE) \
   ((MODE) == V2SImode || (MODE) == V4HImode || (MODE) == V8QImode \
@@ -1125,8 +1090,20 @@ extern const int arm_arch_cde_coproc_bits[];
   ((MODE) == TImode || (MODE) == EImode || (MODE) == OImode \
    || (MODE) == CImode || (MODE) == XImode)
 
-#define VALID_MVE_STRUCT_MODE(MODE) \
-  ((MODE) == TImode || (MODE) == OImode || (MODE) == XImode)
+#define VALID_MVE_STRUCT_MODE(MODE)			    \
+  ((MODE) == TImode					    \
+   || (MODE) == OImode					    \
+   || (MODE) == XImode					    \
+   || (MODE) == V2x16QImode				    \
+   || (MODE) == V2x8HImode				    \
+   || (MODE) == V2x4SImode				    \
+   || (MODE) == V2x8HFmode				    \
+   || (MODE) == V2x4SFmode				    \
+   || (MODE) == V4x16QImode				    \
+   || (MODE) == V4x8HImode				    \
+   || (MODE) == V4x4SImode				    \
+   || (MODE) == V4x8HFmode				    \
+   || (MODE) == V4x4SFmode)
 
 /* The conditions under which vector modes are supported for general
    arithmetic using Neon.  */
@@ -1154,9 +1131,9 @@ extern const int arm_arch_cde_coproc_bits[];
 /* The conditions under which vector modes are supported for general
    arithmetic by any vector extension.  */
 
-#define ARM_HAVE_V8QI_ARITH (ARM_HAVE_NEON_V8QI_ARITH || TARGET_REALLY_IWMMXT)
-#define ARM_HAVE_V4HI_ARITH (ARM_HAVE_NEON_V4HI_ARITH || TARGET_REALLY_IWMMXT)
-#define ARM_HAVE_V2SI_ARITH (ARM_HAVE_NEON_V2SI_ARITH || TARGET_REALLY_IWMMXT)
+#define ARM_HAVE_V8QI_ARITH (ARM_HAVE_NEON_V8QI_ARITH)
+#define ARM_HAVE_V4HI_ARITH (ARM_HAVE_NEON_V4HI_ARITH)
+#define ARM_HAVE_V2SI_ARITH (ARM_HAVE_NEON_V2SI_ARITH)
 
 #define ARM_HAVE_V16QI_ARITH (ARM_HAVE_NEON_V16QI_ARITH || TARGET_HAVE_MVE)
 #define ARM_HAVE_V8HI_ARITH (ARM_HAVE_NEON_V8HI_ARITH || TARGET_HAVE_MVE)
@@ -1190,9 +1167,9 @@ extern const int arm_arch_cde_coproc_bits[];
 /* The conditions under which vector modes are supported by load/store
    instructions by any vector extension.  */
 
-#define ARM_HAVE_V8QI_LDST (ARM_HAVE_NEON_V8QI_LDST || TARGET_REALLY_IWMMXT)
-#define ARM_HAVE_V4HI_LDST (ARM_HAVE_NEON_V4HI_LDST || TARGET_REALLY_IWMMXT)
-#define ARM_HAVE_V2SI_LDST (ARM_HAVE_NEON_V2SI_LDST || TARGET_REALLY_IWMMXT)
+#define ARM_HAVE_V8QI_LDST (ARM_HAVE_NEON_V8QI_LDST)
+#define ARM_HAVE_V4HI_LDST (ARM_HAVE_NEON_V4HI_LDST)
+#define ARM_HAVE_V2SI_LDST (ARM_HAVE_NEON_V2SI_LDST)
 
 #define ARM_HAVE_V16QI_LDST (ARM_HAVE_NEON_V16QI_LDST || TARGET_HAVE_MVE)
 #define ARM_HAVE_V8HI_LDST (ARM_HAVE_NEON_V8HI_LDST || TARGET_HAVE_MVE)
@@ -1224,8 +1201,6 @@ extern int arm_regs_in_sequence[];
    function.  */
 
 #define VREG(X)  (FIRST_VFP_REGNUM + (X))
-#define WREG(X)  (FIRST_IWMMXT_REGNUM + (X))
-#define WGREG(X) (FIRST_IWMMXT_GR_REGNUM + (X))
 
 #define REG_ALLOC_ORDER				\
 {						\
@@ -1251,12 +1226,6 @@ extern int arm_regs_in_sequence[];
   VREG(20), VREG(21), VREG(22), VREG(23),	\
   VREG(24), VREG(25), VREG(26), VREG(27),	\
   VREG(28), VREG(29), VREG(30), VREG(31),	\
-  /* IWMMX registers.  */			\
-  WREG(0),  WREG(1),  WREG(2),  WREG(3),	\
-  WREG(4),  WREG(5),  WREG(6),  WREG(7),	\
-  WREG(8),  WREG(9),  WREG(10), WREG(11),	\
-  WREG(12), WREG(13), WREG(14), WREG(15),	\
-  WGREG(0), WGREG(1), WGREG(2), WGREG(3),	\
   /* Registers not for general use.  */		\
   CC_REGNUM, VFPCC_REGNUM,			\
   FRAME_POINTER_REGNUM, ARG_POINTER_REGNUM,	\
@@ -1301,8 +1270,6 @@ enum reg_class
   VFP_LO_REGS,
   VFP_HI_REGS,
   VFP_REGS,
-  IWMMXT_REGS,
-  IWMMXT_GR_REGS,
   CC_REG,
   VFPCC_REG,
   SFP_REG,
@@ -1332,8 +1299,6 @@ enum reg_class
   "VFP_LO_REGS",	\
   "VFP_HI_REGS",	\
   "VFP_REGS",		\
-  "IWMMXT_REGS",	\
-  "IWMMXT_GR_REGS",	\
   "CC_REG",		\
   "VFPCC_REG",		\
   "SFP_REG",		\
@@ -1349,29 +1314,27 @@ enum reg_class
    of length N_REG_CLASSES.  */
 #define REG_CLASS_CONTENTS						\
 {									\
-  { 0x00000000, 0x00000000, 0x00000000, 0x00000000 }, /* NO_REGS  */	\
-  { 0x000000FF, 0x00000000, 0x00000000, 0x00000000 }, /* LO_REGS */	\
-  { 0x00002000, 0x00000000, 0x00000000, 0x00000000 }, /* STACK_REG */	\
-  { 0x000020FF, 0x00000000, 0x00000000, 0x00000000 }, /* BASE_REGS */	\
-  { 0x00005F00, 0x00000000, 0x00000000, 0x00000000 }, /* HI_REGS */	\
-  { 0x0000100F, 0x00000000, 0x00000000, 0x00000000 }, /* CALLER_SAVE_REGS */ \
-  { 0x00005555, 0x00000000, 0x00000000, 0x00000000 }, /* EVEN_REGS.  */ \
-  { 0x00005FFF, 0x00000000, 0x00000000, 0x00000000 }, /* GENERAL_REGS */ \
-  { 0x00007FFF, 0x00000000, 0x00000000, 0x00000000 }, /* CORE_REGS */	\
-  { 0xFFFF0000, 0x00000000, 0x00000000, 0x00000000 }, /* VFP_D0_D7_REGS  */ \
-  { 0xFFFF0000, 0x0000FFFF, 0x00000000, 0x00000000 }, /* VFP_LO_REGS  */ \
-  { 0x00000000, 0xFFFF0000, 0x0000FFFF, 0x00000000 }, /* VFP_HI_REGS  */ \
-  { 0xFFFF0000, 0xFFFFFFFF, 0x0000FFFF, 0x00000000 }, /* VFP_REGS  */	\
-  { 0x00000000, 0x00000000, 0xFFFF0000, 0x00000000 }, /* IWMMXT_REGS */	\
-  { 0x00000000, 0x00000000, 0x00000000, 0x0000000F }, /* IWMMXT_GR_REGS */ \
-  { 0x00000000, 0x00000000, 0x00000000, 0x00000010 }, /* CC_REG */	\
-  { 0x00000000, 0x00000000, 0x00000000, 0x00000020 }, /* VFPCC_REG */	\
-  { 0x00000000, 0x00000000, 0x00000000, 0x00000040 }, /* SFP_REG */	\
-  { 0x00000000, 0x00000000, 0x00000000, 0x00000080 }, /* AFP_REG */	\
-  { 0x00000000, 0x00000000, 0x00000000, 0x00000400 }, /* VPR_REG.  */	\
-  { 0x00000000, 0x00000000, 0x00000000, 0x00000800 }, /* PAC_REG.  */	\
-  { 0x00005FFF, 0x00000000, 0x00000000, 0x00000400 }, /* GENERAL_AND_VPR_REGS.  */ \
-  { 0xFFFF7FFF, 0xFFFFFFFF, 0xFFFFFFFF, 0x0000040F }  /* ALL_REGS.  */	\
+  { 0x00000000, 0x00000000, 0x00000000 }, /* NO_REGS  */		\
+  { 0x000000FF, 0x00000000, 0x00000000 }, /* LO_REGS */			\
+  { 0x00002000, 0x00000000, 0x00000000 }, /* STACK_REG */		\
+  { 0x000020FF, 0x00000000, 0x00000000 }, /* BASE_REGS */		\
+  { 0x00005F00, 0x00000000, 0x00000000 }, /* HI_REGS */			\
+  { 0x0000100F, 0x00000000, 0x00000000 }, /* CALLER_SAVE_REGS */	\
+  { 0x00005555, 0x00000000, 0x00000000 }, /* EVEN_REGS.  */		\
+  { 0x00005FFF, 0x00000000, 0x00000000 }, /* GENERAL_REGS */		\
+  { 0x00007FFF, 0x00000000, 0x00000000 }, /* CORE_REGS */		\
+  { 0xFFFF0000, 0x00000000, 0x00000000 }, /* VFP_D0_D7_REGS  */		\
+  { 0xFFFF0000, 0x0000FFFF, 0x00000000 }, /* VFP_LO_REGS  */		\
+  { 0x00000000, 0xFFFF0000, 0x0000FFFF }, /* VFP_HI_REGS  */		\
+  { 0xFFFF0000, 0xFFFFFFFF, 0x0000FFFF }, /* VFP_REGS  */		\
+  { 0x00000000, 0x00000000, 0x00010000 }, /* CC_REG */			\
+  { 0x00000000, 0x00000000, 0x00020000 }, /* VFPCC_REG */		\
+  { 0x00000000, 0x00000000, 0x00040000 }, /* SFP_REG */			\
+  { 0x00000000, 0x00000000, 0x00080000 }, /* AFP_REG */			\
+  { 0x00000000, 0x00000000, 0x00400000 }, /* VPR_REG.  */		\
+  { 0x00000000, 0x00000000, 0x00800000 }, /* PAC_REG.  */		\
+  { 0x00005FFF, 0x00000000, 0x00400000 }, /* GENERAL_AND_VPR_REGS.  */	\
+  { 0xFFFF7FFF, 0xFFFFFFFF, 0x0040FFFF }  /* ALL_REGS.  */		\
 }
 
 #define FP_SYSREGS \
@@ -1424,7 +1387,7 @@ extern const char *fp_sysreg_names[NB_FP_SYSREGS];
    but prevents the compiler from extending the lifetime of these
    registers.  */
 #define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P \
-  arm_small_register_classes_for_mode_p 
+  arm_small_register_classes_for_mode_p
 
 /* Must leave BASE_REGS reloads alone */
 #define THUMB_SECONDARY_INPUT_RELOAD_CLASS(CLASS, MODE, X)		\
@@ -1446,39 +1409,34 @@ extern const char *fp_sysreg_names[NB_FP_SYSREGS];
 /* Return the register class of a scratch register needed to copy IN into
    or out of a register in CLASS in MODE.  If it can be done directly,
    NO_REGS is returned.  */
-#define SECONDARY_OUTPUT_RELOAD_CLASS(CLASS, MODE, X)		\
-  /* Restrict which direct reloads are allowed for VFP/iWMMXt regs.  */ \
-  ((TARGET_HARD_FLOAT && IS_VFP_CLASS (CLASS))			\
-   ? coproc_secondary_reload_class (MODE, X, FALSE)		\
-   : (TARGET_IWMMXT && (CLASS) == IWMMXT_REGS)			\
-   ? coproc_secondary_reload_class (MODE, X, TRUE)		\
-   : TARGET_32BIT						\
-   ? (((MODE) == HImode && ! arm_arch4 && true_regnum (X) == -1) \
-    ? GENERAL_REGS : NO_REGS)					\
-   : THUMB_SECONDARY_OUTPUT_RELOAD_CLASS (CLASS, MODE, X))
+#define SECONDARY_OUTPUT_RELOAD_CLASS(CLASS, MODE, X)			\
+  /* Restrict which direct reloads are allowed for VFP regs.  */	\
+  ((TARGET_HARD_FLOAT && IS_VFP_CLASS (CLASS))				\
+   ? coproc_secondary_reload_class (MODE, X, FALSE)			\
+   : (TARGET_32BIT							\
+      ? (((MODE) == HImode && ! arm_arch4 && true_regnum (X) == -1)	\
+	 ? GENERAL_REGS							\
+	 : NO_REGS)							\
+      : THUMB_SECONDARY_OUTPUT_RELOAD_CLASS (CLASS, MODE, X)))
 
 /* If we need to load shorts byte-at-a-time, then we need a scratch.  */
-#define SECONDARY_INPUT_RELOAD_CLASS(CLASS, MODE, X)		\
-  /* Restrict which direct reloads are allowed for VFP/iWMMXt regs.  */ \
-  ((TARGET_HARD_FLOAT && IS_VFP_CLASS (CLASS))			\
-    ? coproc_secondary_reload_class (MODE, X, FALSE) :		\
-    (TARGET_IWMMXT && (CLASS) == IWMMXT_REGS) ?			\
-    coproc_secondary_reload_class (MODE, X, TRUE) :		\
-   (TARGET_32BIT ?						\
-    (((CLASS) == IWMMXT_REGS || (CLASS) == IWMMXT_GR_REGS)	\
-     && CONSTANT_P (X))						\
-    ? GENERAL_REGS :						\
-    (((MODE) == HImode && ! arm_arch4				\
-      && (MEM_P (X)					\
-	  || ((REG_P (X) || GET_CODE (X) == SUBREG)	\
-	      && true_regnum (X) == -1)))			\
-     ? GENERAL_REGS : NO_REGS)					\
-    : THUMB_SECONDARY_INPUT_RELOAD_CLASS (CLASS, MODE, X)))
+#define SECONDARY_INPUT_RELOAD_CLASS(CLASS, MODE, X)			\
+  /* Restrict which direct reloads are allowed for VFP regs.  */	\
+  ((TARGET_HARD_FLOAT && IS_VFP_CLASS (CLASS))				\
+   ? coproc_secondary_reload_class (MODE, X, FALSE)			\
+   : (TARGET_32BIT							\
+      ? (((MODE) == HImode						\
+	  && ! arm_arch4						\
+	  && (MEM_P (X)							\
+	      || ((REG_P (X) || GET_CODE (X) == SUBREG)			\
+		  && true_regnum (X) == -1)))				\
+	 ? GENERAL_REGS							\
+	 : NO_REGS)							\
+      : THUMB_SECONDARY_INPUT_RELOAD_CLASS (CLASS, MODE, X)))
 
 /* Return the maximum number of consecutive registers
    needed to represent mode MODE in a register of class CLASS.
-   ARM regs are UNITS_PER_WORD bits.  
-   FIXME: Is this true for iWMMX?  */
+   ARM regs are UNITS_PER_WORD bits.  */
 #define CLASS_MAX_NREGS(CLASS, MODE)  \
   (CLASS == VPR_REG)		      \
   ? CEIL (GET_MODE_SIZE (MODE), 2)    \
@@ -1645,20 +1603,19 @@ machine_function;
 #define ARM_Q_BIT_READ (arm_q_bit_access ())
 #define ARM_GE_BITS_READ (arm_ge_bits_access ())
 
-/* As in the machine_function, a global set of call-via labels, for code 
+/* As in the machine_function, a global set of call-via labels, for code
    that is in text_section.  */
 extern GTY(()) rtx thumb_call_via_label[14];
 
 /* The number of potential ways of assigning to a co-processor.  */
 #define ARM_NUM_COPROC_SLOTS 1
 
-/* Enumeration of procedure calling standard variants.  We don't really 
+/* Enumeration of procedure calling standard variants.  We don't really
    support all of these yet.  */
 enum arm_pcs
 {
   ARM_PCS_AAPCS,	/* Base standard AAPCS.  */
   ARM_PCS_AAPCS_VFP,	/* Use VFP registers for floating point values.  */
-  ARM_PCS_AAPCS_IWMMXT, /* Use iWMMXT registers for vectors.  */
   /* This must be the last AAPCS variant.  */
   ARM_PCS_AAPCS_LOCAL,	/* Private call within this compilation unit.  */
   ARM_PCS_ATPCS,	/* ATPCS.  */
@@ -1676,8 +1633,6 @@ typedef struct
 {
   /* This is the number of registers of arguments scanned so far.  */
   int nregs;
-  /* This is the number of iWMMXt register arguments scanned so far.  */
-  int iwmmxt_nregs;
   int named_count;
   int nargs;
   /* Which procedure call variant to use for this call.  */
@@ -1725,9 +1680,7 @@ typedef struct
 #define FUNCTION_ARG_REGNO_P(REGNO)					\
    (IN_RANGE ((REGNO), 0, 3)						\
     || (TARGET_AAPCS_BASED && TARGET_HARD_FLOAT				\
-	&& IN_RANGE ((REGNO), FIRST_VFP_REGNUM, FIRST_VFP_REGNUM + 15))	\
-    || (TARGET_IWMMXT_ABI						\
-	&& IN_RANGE ((REGNO), FIRST_IWMMXT_REGNUM, FIRST_IWMMXT_REGNUM + 9)))
+	&& IN_RANGE ((REGNO), FIRST_VFP_REGNUM, FIRST_VFP_REGNUM + 15)))
 
 
 /* If your target environment doesn't prefix user functions with an
@@ -2243,12 +2196,16 @@ extern int making_const_table;
 
 #define SELECT_CC_MODE(OP, X, Y)  arm_select_cc_mode (OP, X, Y)
 
-#define REVERSIBLE_CC_MODE(MODE) 1
+/* Floating-point modes cannot be reversed unless we don't care about
+   NaNs.  */
+#define REVERSIBLE_CC_MODE(MODE)			\
+  (flag_finite_math_only				\
+   || !((MODE) == CCFPmode || (MODE) == CCFPEmode))
 
 #define REVERSE_CONDITION(CODE,MODE) \
   (((MODE) == CCFPmode || (MODE) == CCFPEmode) \
-   ? reverse_condition_maybe_unordered (code) \
-   : reverse_condition (code))
+   ? reverse_condition_maybe_unordered (CODE) \
+   : reverse_condition (CODE))
 
 #define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) \
   ((VALUE) = GET_MODE_UNIT_BITSIZE (MODE), 2)
@@ -2538,6 +2495,11 @@ const char *arm_be8_option (int argc, const char **argv);
 #define TARGET_MODE_SPECS						\
   " %{!marm:%{!mthumb:%:target_mode_check(%{march=*:arch %*;mcpu=*:cpu %*;:})}}"
 
+/* Cleanup any stray -march=/-mcpu= if either is followed by "unset".  */
+#define ARCH_CPU_CLEANUP_SPECS	\
+  " %{march=unset:%<march=*} "	\
+  " %{mcpu=unset:%<mcpu=*} "
+
 /* Generate a canonical string to represent the architecture selected.  */
 #define ARCH_CANONICAL_SPECS				\
   " -march=%:canon_arch(%{mcpu=*: cpu %*} "		\
@@ -2559,6 +2521,7 @@ const char *arm_be8_option (int argc, const char **argv);
    individual rules so that any option suppression (%<opt...)is
    completed before starting subsequent rules.  */
 #define DRIVER_SELF_SPECS			\
+  ARCH_CPU_CLEANUP_SPECS,			\
   MCPU_MTUNE_NATIVE_SPECS,			\
   TARGET_MODE_SPECS,				\
   MULTILIB_ARCH_CANONICAL_SPECS,		\
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index aae4789..5e5e112 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -1,5 +1,5 @@
 ;;- Machine description for ARM for GNU compiler
-;;  Copyright (C) 1991-2024 Free Software Foundation, Inc.
+;;  Copyright (C) 1991-2025 Free Software Foundation, Inc.
 ;;  Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
 ;;  and Martin Simmons (@harleqn.co.uk).
 ;;  More major hacks by Richard Earnshaw (rearnsha@arm.com).
@@ -37,12 +37,12 @@
    (LR_REGNUM        14)	; Return address register
    (PC_REGNUM	     15)	; Program counter
    (LAST_ARM_REGNUM  15)	;
-   (CC_REGNUM       100)	; Condition code pseudo register
-   (VFPCC_REGNUM    101)	; VFP Condition code pseudo register
-   (APSRQ_REGNUM    104)	; Q bit pseudo register
-   (APSRGE_REGNUM   105)	; GE bits pseudo register
-   (VPR_REGNUM      106)	; Vector Predication Register - MVE register.
-   (RA_AUTH_CODE    107)	; Pseudo register to save PAC.
+   (CC_REGNUM        80)	; Condition code pseudo register
+   (VFPCC_REGNUM     81)	; VFP Condition code pseudo register
+   (APSRQ_REGNUM     84)	; Q bit pseudo register
+   (APSRGE_REGNUM    85)	; GE bits pseudo register
+   (VPR_REGNUM       86)	; Vector Predication Register - MVE register.
+   (RA_AUTH_CODE     87)	; Pseudo register to save PAC.
   ]
 )
 ;; 3rd operand to select_dominance_cc_mode
@@ -149,7 +149,7 @@
 ; This attribute is used to compute attribute "enabled",
 ; use type "any" to enable an alternative in all cases.
 (define_attr "arch" "any, a, t, 32, t1, t2, v6,nov6, v6t2, \
-		     v8mb, fix_vlldm, iwmmxt, iwmmxt2, armv6_or_vfpv3, \
+		     v8mb, fix_vlldm, armv6_or_vfpv3, \
 		     neon, mve"
   (const_string "any"))
 
@@ -197,10 +197,6 @@
 	      (match_test "fix_vlldm"))
 	 (const_string "yes")
 
-	 (and (eq_attr "arch" "iwmmxt2")
-	      (match_test "TARGET_REALLY_IWMMXT2"))
-	 (const_string "yes")
-
 	 (and (eq_attr "arch" "armv6_or_vfpv3")
 	      (match_test "arm_arch6 || TARGET_VFP3"))
 	 (const_string "yes")
@@ -362,18 +358,7 @@
     alus_ext, alus_imm, alus_sreg,\
     alus_shift_imm, alus_shift_reg, bfm, csel, rev, logic_imm, logic_reg,\
     logic_shift_imm, logic_shift_reg, logics_imm, logics_reg,\
-    logics_shift_imm, logics_shift_reg, extend, shift_imm, float, fcsel,\
-    wmmx_wor, wmmx_wxor, wmmx_wand, wmmx_wandn, wmmx_wmov, wmmx_tmcrr,\
-    wmmx_tmrrc, wmmx_wldr, wmmx_wstr, wmmx_tmcr, wmmx_tmrc, wmmx_wadd,\
-    wmmx_wsub, wmmx_wmul, wmmx_wmac, wmmx_wavg2, wmmx_tinsr, wmmx_textrm,\
-    wmmx_wshufh, wmmx_wcmpeq, wmmx_wcmpgt, wmmx_wmax, wmmx_wmin, wmmx_wpack,\
-    wmmx_wunpckih, wmmx_wunpckil, wmmx_wunpckeh, wmmx_wunpckel, wmmx_wror,\
-    wmmx_wsra, wmmx_wsrl, wmmx_wsll, wmmx_wmadd, wmmx_tmia, wmmx_tmiaph,\
-    wmmx_tmiaxy, wmmx_tbcst, wmmx_tmovmsk, wmmx_wacc, wmmx_waligni,\
-    wmmx_walignr, wmmx_tandc, wmmx_textrc, wmmx_torc, wmmx_torvsc, wmmx_wsad,\
-    wmmx_wabs, wmmx_wabsdiff, wmmx_waddsubhx, wmmx_wsubaddhx, wmmx_wavg4,\
-    wmmx_wmulw, wmmx_wqmulm, wmmx_wqmulwm, wmmx_waddbhus, wmmx_wqmiaxy,\
-    wmmx_wmiaxy, wmmx_wmiawxy, wmmx_wmerge")
+    logics_shift_imm, logics_shift_reg, extend, shift_imm, float, fcsel")
 		(const_string "single")
 	        (const_string "multi")))
 
@@ -435,7 +420,6 @@
 	  (const_string "yes")
 	  (const_string "no"))))
 
-(include "marvell-f-iwmmxt.md")
 (include "arm-generic.md")
 (include "arm926ejs.md")
 (include "arm1020e.md")
@@ -2432,11 +2416,11 @@
 )
 
 (define_insn "<US>mull"
-  [(set (match_operand:SI 0 "s_register_operand" "=r,&r")
+  [(set (match_operand:SI 0 "s_register_operand" "=r,&r,&r,&r")
 	(mult:SI
-	 (match_operand:SI 2 "s_register_operand" "%r,r")
-	 (match_operand:SI 3 "s_register_operand" "r,r")))
-   (set (match_operand:SI 1 "s_register_operand" "=r,&r")
+	 (match_operand:SI 2 "s_register_operand" "%r,r,r,r")
+	 (match_operand:SI 3 "s_register_operand" "r,r,0,1")))
+   (set (match_operand:SI 1 "s_register_operand" "=r,&r,&r,&r")
 	(truncate:SI
 	 (lshiftrt:DI
 	  (mult:DI (SE:DI (match_dup 2)) (SE:DI (match_dup 3)))
@@ -2445,7 +2429,7 @@
   "<US>mull%?\\t%0, %1, %2, %3"
   [(set_attr "type" "umull")
    (set_attr "predicable" "yes")
-   (set_attr "arch" "v6,nov6")]
+   (set_attr "arch" "v6,nov6,nov6,nov6")]
 )
 
 (define_expand "<Us>maddsidi4"
@@ -2893,14 +2877,12 @@
 ;; Split DImode and, ior, xor operations.  Simply perform the logical
 ;; operation on the upper and lower halves of the registers.
 ;; This is needed for atomic operations in arm_split_atomic_op.
-;; Avoid splitting IWMMXT instructions.
 (define_split
   [(set (match_operand:DI 0 "s_register_operand" "")
 	(match_operator:DI 6 "logical_binary_operator"
 	  [(match_operand:DI 1 "s_register_operand" "")
 	   (match_operand:DI 2 "s_register_operand" "")]))]
-  "TARGET_32BIT && reload_completed
-   && ! IS_IWMMXT_REGNUM (REGNO (operands[0]))"
+  "TARGET_32BIT && reload_completed"
   [(set (match_dup 0) (match_op_dup:SI 6 [(match_dup 1) (match_dup 2)]))
    (set (match_dup 3) (match_op_dup:SI 6 [(match_dup 4) (match_dup 5)]))]
   "
@@ -6345,7 +6327,6 @@
   "TARGET_32BIT
    && !(TARGET_HARD_FLOAT)
    && !(TARGET_HAVE_MVE || TARGET_HAVE_MVE_FLOAT)
-   && !TARGET_IWMMXT
    && (   register_operand (operands[0], DImode)
        || register_operand (operands[1], DImode))"
   "*
@@ -6554,7 +6535,7 @@
 (define_insn "*arm_movsi_insn"
   [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r,rk,m")
 	(match_operand:SI 1 "general_operand"      "rk, I,K,j,mi,rk"))]
-  "TARGET_ARM && !TARGET_IWMMXT && !TARGET_HARD_FLOAT
+  "TARGET_ARM && !TARGET_HARD_FLOAT
    && (   register_operand (operands[0], SImode)
        || register_operand (operands[1], SImode))"
   "@
@@ -11964,12 +11945,10 @@
 (define_insn "*pop_multiple_with_writeback_and_return"
   [(match_parallel 0 "pop_multiple_return"
     [(return)
-     (set (match_operand:SI 1 "s_register_operand" "+rk")
+     (set (match_operand:SI 1 "register_operand" "")
           (plus:SI (match_dup 1)
-                   (match_operand:SI 2 "const_int_I_operand" "I")))
-     (set (match_operand:SI 3 "s_register_operand" "=rk")
-          (mem:SI (match_dup 1)))
-        ])]
+                   (match_operand:SI 2 "const_int_I_operand" "")))
+    ])]
   "TARGET_32BIT && (reload_in_progress || reload_completed)"
   "*
   {
@@ -13125,10 +13104,8 @@
   [(set_attr "conds" "unconditional")
    (set_attr "type" "nop")])
 
-;; Vector bits common to IWMMXT, Neon and MVE
+;; Vector bits common to Neon and MVE
 (include "vec-common.md")
-;; Load the Intel Wireless Multimedia Extension patterns
-(include "iwmmxt.md")
 ;; Load the VFP co-processor patterns
 (include "vfp.md")
 ;; Thumb-1 patterns
diff --git a/gcc/config/arm/arm.opt b/gcc/config/arm/arm.opt
index d88c7a5..d5eeeae 100644
--- a/gcc/config/arm/arm.opt
+++ b/gcc/config/arm/arm.opt
@@ -1,6 +1,6 @@
 ; Options for the ARM port of the compiler.
 
-; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
@@ -58,9 +58,6 @@ EnumValue
 Enum(arm_abi_type) String(aapcs) Value(ARM_ABI_AAPCS)
 
 EnumValue
-Enum(arm_abi_type) String(iwmmxt) Value(ARM_ABI_IWMMXT)
-
-EnumValue
 Enum(arm_abi_type) String(aapcs-linux) Value(ARM_ABI_AAPCS_LINUX)
 
 mabort-on-noreturn
diff --git a/gcc/config/arm/arm1020e.md b/gcc/config/arm/arm1020e.md
index 67bad8e..e296edc 100644
--- a/gcc/config/arm/arm1020e.md
+++ b/gcc/config/arm/arm1020e.md
@@ -1,5 +1,5 @@
 ;; ARM 1020E & ARM 1022E Pipeline Description
-;; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;; Contributed by Richard Earnshaw (richard.earnshaw@arm.com)
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/arm/arm1026ejs.md b/gcc/config/arm/arm1026ejs.md
index cb95d54..50dcbef 100644
--- a/gcc/config/arm/arm1026ejs.md
+++ b/gcc/config/arm/arm1026ejs.md
@@ -1,5 +1,5 @@
 ;; ARM 1026EJ-S Pipeline Description
-;; Copyright (C) 2003-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2003-2025 Free Software Foundation, Inc.
 ;; Written by CodeSourcery, LLC.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/arm/arm1136jfs.md b/gcc/config/arm/arm1136jfs.md
index 654b47b..b90e10a 100644
--- a/gcc/config/arm/arm1136jfs.md
+++ b/gcc/config/arm/arm1136jfs.md
@@ -1,5 +1,5 @@
 ;; ARM 1136J[F]-S Pipeline Description
-;; Copyright (C) 2003-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2003-2025 Free Software Foundation, Inc.
 ;; Written by CodeSourcery, LLC.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/arm/arm926ejs.md b/gcc/config/arm/arm926ejs.md
index 859c04a..817d0cf 100644
--- a/gcc/config/arm/arm926ejs.md
+++ b/gcc/config/arm/arm926ejs.md
@@ -1,5 +1,5 @@
 ;; ARM 926EJ-S Pipeline Description
-;; Copyright (C) 2003-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2003-2025 Free Software Foundation, Inc.
 ;; Written by CodeSourcery, LLC.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/arm/arm_acle.h b/gcc/config/arm/arm_acle.h
index b83d353..c6c03fd 100644
--- a/gcc/config/arm/arm_acle.h
+++ b/gcc/config/arm/arm_acle.h
@@ -1,6 +1,6 @@
 /* ARM Non-NEON ACLE intrinsics include file.
 
-   Copyright (C) 2013-2024 Free Software Foundation, Inc.
+   Copyright (C) 2013-2025 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
diff --git a/gcc/config/arm/arm_acle_builtins.def b/gcc/config/arm/arm_acle_builtins.def
index 7156a8d..e253546 100644
--- a/gcc/config/arm/arm_acle_builtins.def
+++ b/gcc/config/arm/arm_acle_builtins.def
@@ -1,5 +1,5 @@
 /* ACLE builtin definitions for ARM.
-   Copyright (C) 2016-2024 Free Software Foundation, Inc.
+   Copyright (C) 2016-2025 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
diff --git a/gcc/config/arm/arm_bf16.h b/gcc/config/arm/arm_bf16.h
index aeae6da..839d5cd 100644
--- a/gcc/config/arm/arm_bf16.h
+++ b/gcc/config/arm/arm_bf16.h
@@ -1,6 +1,6 @@
 /* Arm BF16 intrinsics include file.
 
-   Copyright (C) 2019-2024 Free Software Foundation, Inc.
+   Copyright (C) 2019-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/arm/arm_cde.h b/gcc/config/arm/arm_cde.h
index d52c511..f3032e2 100644
--- a/gcc/config/arm/arm_cde.h
+++ b/gcc/config/arm/arm_cde.h
@@ -1,6 +1,6 @@
 /* Arm Custom Datapath Extension (CDE) intrinsics include file.
 
-   Copyright (C) 2020-2024 Free Software Foundation, Inc.
+   Copyright (C) 2020-2025 Free Software Foundation, Inc.
    Contributed by Arm Ltd.
 
    This file is part of GCC.
diff --git a/gcc/config/arm/arm_cde_builtins.def b/gcc/config/arm/arm_cde_builtins.def
index 868962e..17f3ded 100644
--- a/gcc/config/arm/arm_cde_builtins.def
+++ b/gcc/config/arm/arm_cde_builtins.def
@@ -1,5 +1,5 @@
 /* Arm Custom Datapath Extension (CDE) builtin definitions.
-   Copyright (C) 2020-2024 Free Software Foundation, Inc.
+   Copyright (C) 2020-2025 Free Software Foundation, Inc.
    Contributed by Arm Ltd.
 
    This file is part of GCC.
diff --git a/gcc/config/arm/arm_cmse.h b/gcc/config/arm/arm_cmse.h
index 0ebf737..eb6e04b 100644
--- a/gcc/config/arm/arm_cmse.h
+++ b/gcc/config/arm/arm_cmse.h
@@ -1,6 +1,6 @@
 /* ARMv8-M Secure Extensions intrinsics include file.
 
-   Copyright (C) 2015-2024 Free Software Foundation, Inc.
+   Copyright (C) 2015-2025 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
diff --git a/gcc/config/arm/arm_fp16.h b/gcc/config/arm/arm_fp16.h
index 7ce6dd2..0faf00a 100644
--- a/gcc/config/arm/arm_fp16.h
+++ b/gcc/config/arm/arm_fp16.h
@@ -1,6 +1,6 @@
 /* ARM FP16 intrinsics include file.
 
-   Copyright (C) 2016-2024 Free Software Foundation, Inc.
+   Copyright (C) 2016-2025 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
diff --git a/gcc/config/arm/arm_mve.h b/gcc/config/arm/arm_mve.h
index ae1b543..ee18a47 100644
--- a/gcc/config/arm/arm_mve.h
+++ b/gcc/config/arm/arm_mve.h
@@ -1,6 +1,6 @@
 /* Arm MVE intrinsics include file.
 
-   Copyright (C) 2019-2024 Free Software Foundation, Inc.
+   Copyright (C) 2019-2025 Free Software Foundation, Inc.
    Contributed by Arm.
 
    This file is part of GCC.
@@ -15,6 +15,10 @@
    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
    License for more details.
 
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
    You should have received a copy of the GNU General Public License
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
@@ -41,433 +45,12 @@
 #endif
 
 #ifndef __ARM_MVE_PRESERVE_USER_NAMESPACE
-#define vst4q(__addr, __value) __arm_vst4q(__addr, __value)
-#define vornq(__a, __b) __arm_vornq(__a, __b)
-#define vbicq(__a, __b) __arm_vbicq(__a, __b)
-#define vbicq_m_n(__a, __imm, __p) __arm_vbicq_m_n(__a, __imm, __p)
-#define vshlcq(__a, __b, __imm) __arm_vshlcq(__a, __b, __imm)
-#define vbicq_m(__inactive, __a, __b, __p) __arm_vbicq_m(__inactive, __a, __b, __p)
-#define vornq_m(__inactive, __a, __b, __p) __arm_vornq_m(__inactive, __a, __b, __p)
-#define vstrbq_scatter_offset(__base, __offset, __value) __arm_vstrbq_scatter_offset(__base, __offset, __value)
-#define vstrbq(__addr, __value) __arm_vstrbq(__addr, __value)
-#define vstrwq_scatter_base(__addr, __offset, __value) __arm_vstrwq_scatter_base(__addr, __offset, __value)
-#define vldrbq_gather_offset(__base, __offset) __arm_vldrbq_gather_offset(__base, __offset)
-#define vstrbq_p(__addr, __value, __p) __arm_vstrbq_p(__addr, __value, __p)
-#define vstrbq_scatter_offset_p(__base, __offset, __value, __p) __arm_vstrbq_scatter_offset_p(__base, __offset, __value, __p)
-#define vstrwq_scatter_base_p(__addr, __offset, __value, __p) __arm_vstrwq_scatter_base_p(__addr, __offset, __value, __p)
-#define vldrbq_gather_offset_z(__base, __offset, __p) __arm_vldrbq_gather_offset_z(__base, __offset, __p)
-#define vldrhq_gather_offset(__base, __offset) __arm_vldrhq_gather_offset(__base, __offset)
-#define vldrhq_gather_offset_z(__base, __offset, __p) __arm_vldrhq_gather_offset_z(__base, __offset, __p)
-#define vldrhq_gather_shifted_offset(__base, __offset) __arm_vldrhq_gather_shifted_offset(__base, __offset)
-#define vldrhq_gather_shifted_offset_z(__base, __offset, __p) __arm_vldrhq_gather_shifted_offset_z(__base, __offset, __p)
-#define vldrdq_gather_offset(__base, __offset) __arm_vldrdq_gather_offset(__base, __offset)
-#define vldrdq_gather_offset_z(__base, __offset, __p) __arm_vldrdq_gather_offset_z(__base, __offset, __p)
-#define vldrdq_gather_shifted_offset(__base, __offset) __arm_vldrdq_gather_shifted_offset(__base, __offset)
-#define vldrdq_gather_shifted_offset_z(__base, __offset, __p) __arm_vldrdq_gather_shifted_offset_z(__base, __offset, __p)
-#define vldrwq_gather_offset(__base, __offset) __arm_vldrwq_gather_offset(__base, __offset)
-#define vldrwq_gather_offset_z(__base, __offset, __p) __arm_vldrwq_gather_offset_z(__base, __offset, __p)
-#define vldrwq_gather_shifted_offset(__base, __offset) __arm_vldrwq_gather_shifted_offset(__base, __offset)
-#define vldrwq_gather_shifted_offset_z(__base, __offset, __p) __arm_vldrwq_gather_shifted_offset_z(__base, __offset, __p)
-#define vstrhq_scatter_offset(__base, __offset, __value) __arm_vstrhq_scatter_offset(__base, __offset, __value)
-#define vstrhq_scatter_offset_p(__base, __offset, __value, __p) __arm_vstrhq_scatter_offset_p(__base, __offset, __value, __p)
-#define vstrhq_scatter_shifted_offset(__base, __offset, __value) __arm_vstrhq_scatter_shifted_offset(__base, __offset, __value)
-#define vstrhq_scatter_shifted_offset_p(__base, __offset, __value, __p) __arm_vstrhq_scatter_shifted_offset_p(__base, __offset, __value, __p)
-#define vstrhq(__addr, __value) __arm_vstrhq(__addr, __value)
-#define vstrhq_p(__addr, __value, __p) __arm_vstrhq_p(__addr, __value, __p)
-#define vstrwq(__addr, __value) __arm_vstrwq(__addr, __value)
-#define vstrwq_p(__addr, __value, __p) __arm_vstrwq_p(__addr, __value, __p)
-#define vstrdq_scatter_base_p(__addr, __offset, __value, __p) __arm_vstrdq_scatter_base_p(__addr, __offset, __value, __p)
-#define vstrdq_scatter_base(__addr, __offset, __value) __arm_vstrdq_scatter_base(__addr, __offset, __value)
-#define vstrdq_scatter_offset_p(__base, __offset, __value, __p) __arm_vstrdq_scatter_offset_p(__base, __offset, __value, __p)
-#define vstrdq_scatter_offset(__base, __offset, __value) __arm_vstrdq_scatter_offset(__base, __offset, __value)
-#define vstrdq_scatter_shifted_offset_p(__base, __offset, __value, __p) __arm_vstrdq_scatter_shifted_offset_p(__base, __offset, __value, __p)
-#define vstrdq_scatter_shifted_offset(__base, __offset, __value) __arm_vstrdq_scatter_shifted_offset(__base, __offset, __value)
-#define vstrwq_scatter_offset_p(__base, __offset, __value, __p) __arm_vstrwq_scatter_offset_p(__base, __offset, __value, __p)
-#define vstrwq_scatter_offset(__base, __offset, __value) __arm_vstrwq_scatter_offset(__base, __offset, __value)
-#define vstrwq_scatter_shifted_offset_p(__base, __offset, __value, __p) __arm_vstrwq_scatter_shifted_offset_p(__base, __offset, __value, __p)
-#define vstrwq_scatter_shifted_offset(__base, __offset, __value) __arm_vstrwq_scatter_shifted_offset(__base, __offset, __value)
 #define vuninitializedq(__v) __arm_vuninitializedq(__v)
-#define vddupq_m(__inactive, __a, __imm, __p) __arm_vddupq_m(__inactive, __a, __imm, __p)
-#define vddupq_u8(__a, __imm) __arm_vddupq_u8(__a, __imm)
-#define vddupq_u32(__a, __imm) __arm_vddupq_u32(__a, __imm)
-#define vddupq_u16(__a, __imm) __arm_vddupq_u16(__a, __imm)
-#define vdwdupq_m(__inactive, __a, __b, __imm, __p) __arm_vdwdupq_m(__inactive, __a, __b, __imm, __p)
-#define vdwdupq_u8(__a, __b, __imm) __arm_vdwdupq_u8(__a, __b, __imm)
-#define vdwdupq_u32(__a, __b, __imm) __arm_vdwdupq_u32(__a, __b, __imm)
-#define vdwdupq_u16(__a, __b, __imm) __arm_vdwdupq_u16(__a, __b, __imm)
-#define vidupq_m(__inactive, __a, __imm, __p) __arm_vidupq_m(__inactive, __a, __imm, __p)
-#define vidupq_u8(__a, __imm) __arm_vidupq_u8(__a, __imm)
-#define vidupq_u32(__a, __imm) __arm_vidupq_u32(__a, __imm)
-#define vidupq_u16(__a, __imm) __arm_vidupq_u16(__a, __imm)
-#define viwdupq_m(__inactive, __a, __b, __imm, __p) __arm_viwdupq_m(__inactive, __a, __b, __imm, __p)
-#define viwdupq_u8(__a, __b, __imm) __arm_viwdupq_u8(__a, __b, __imm)
-#define viwdupq_u32(__a, __b, __imm) __arm_viwdupq_u32(__a, __b, __imm)
-#define viwdupq_u16(__a, __b, __imm) __arm_viwdupq_u16(__a, __b, __imm)
-#define vstrdq_scatter_base_wb(__addr, __offset, __value) __arm_vstrdq_scatter_base_wb(__addr, __offset, __value)
-#define vstrdq_scatter_base_wb_p(__addr, __offset, __value, __p) __arm_vstrdq_scatter_base_wb_p(__addr, __offset, __value, __p)
-#define vstrwq_scatter_base_wb_p(__addr, __offset, __value, __p) __arm_vstrwq_scatter_base_wb_p(__addr, __offset, __value, __p)
-#define vstrwq_scatter_base_wb(__addr, __offset, __value) __arm_vstrwq_scatter_base_wb(__addr, __offset, __value)
-#define vddupq_x_u8(__a, __imm, __p) __arm_vddupq_x_u8(__a, __imm, __p)
-#define vddupq_x_u16(__a, __imm, __p) __arm_vddupq_x_u16(__a, __imm, __p)
-#define vddupq_x_u32(__a, __imm, __p) __arm_vddupq_x_u32(__a, __imm, __p)
-#define vdwdupq_x_u8(__a, __b, __imm, __p) __arm_vdwdupq_x_u8(__a, __b, __imm, __p)
-#define vdwdupq_x_u16(__a, __b, __imm, __p) __arm_vdwdupq_x_u16(__a, __b, __imm, __p)
-#define vdwdupq_x_u32(__a, __b, __imm, __p) __arm_vdwdupq_x_u32(__a, __b, __imm, __p)
-#define vidupq_x_u8(__a, __imm, __p) __arm_vidupq_x_u8(__a, __imm, __p)
-#define vidupq_x_u16(__a, __imm, __p) __arm_vidupq_x_u16(__a, __imm, __p)
-#define vidupq_x_u32(__a, __imm, __p) __arm_vidupq_x_u32(__a, __imm, __p)
-#define viwdupq_x_u8(__a, __b, __imm, __p) __arm_viwdupq_x_u8(__a, __b, __imm, __p)
-#define viwdupq_x_u16(__a, __b, __imm, __p) __arm_viwdupq_x_u16(__a, __b, __imm, __p)
-#define viwdupq_x_u32(__a, __b, __imm, __p) __arm_viwdupq_x_u32(__a, __b, __imm, __p)
-#define vbicq_x(__a, __b, __p) __arm_vbicq_x(__a, __b, __p)
-#define vornq_x(__a, __b, __p) __arm_vornq_x(__a, __b, __p)
-#define vadciq(__a, __b, __carry_out) __arm_vadciq(__a, __b, __carry_out)
-#define vadciq_m(__inactive, __a, __b, __carry_out, __p) __arm_vadciq_m(__inactive, __a, __b, __carry_out, __p)
-#define vadcq(__a, __b, __carry) __arm_vadcq(__a, __b, __carry)
-#define vadcq_m(__inactive, __a, __b, __carry, __p) __arm_vadcq_m(__inactive, __a, __b, __carry, __p)
-#define vsbciq(__a, __b, __carry_out) __arm_vsbciq(__a, __b, __carry_out)
-#define vsbciq_m(__inactive, __a, __b, __carry_out, __p) __arm_vsbciq_m(__inactive, __a, __b, __carry_out, __p)
-#define vsbcq(__a, __b, __carry) __arm_vsbcq(__a, __b, __carry)
-#define vsbcq_m(__inactive, __a, __b, __carry, __p) __arm_vsbcq_m(__inactive, __a, __b, __carry, __p)
-#define vst1q_p(__addr, __value, __p) __arm_vst1q_p(__addr, __value, __p)
-#define vst2q(__addr, __value) __arm_vst2q(__addr, __value)
-#define vld1q_z(__base, __p) __arm_vld1q_z(__base, __p)
-#define vld2q(__addr) __arm_vld2q(__addr)
-#define vld4q(__addr) __arm_vld4q(__addr)
 #define vsetq_lane(__a, __b, __idx) __arm_vsetq_lane(__a, __b, __idx)
 #define vgetq_lane(__a, __idx) __arm_vgetq_lane(__a, __idx)
-#define vshlcq_m(__a, __b, __imm, __p) __arm_vshlcq_m(__a, __b, __imm, __p)
-#define vcvttq_f32(__a) __arm_vcvttq_f32(__a)
-#define vcvtbq_f32(__a) __arm_vcvtbq_f32(__a)
-#define vcvtq(__a) __arm_vcvtq(__a)
-#define vcvtq_n(__a, __imm6) __arm_vcvtq_n(__a, __imm6)
-#define vcvtaq_m(__inactive, __a, __p) __arm_vcvtaq_m(__inactive, __a, __p)
-#define vcvtq_m(__inactive, __a, __p) __arm_vcvtq_m(__inactive, __a, __p)
-#define vcvtbq_m(__a, __b, __p) __arm_vcvtbq_m(__a, __b, __p)
-#define vcvttq_m(__a, __b, __p) __arm_vcvttq_m(__a, __b, __p)
-#define vcvtmq_m(__inactive, __a, __p) __arm_vcvtmq_m(__inactive, __a, __p)
-#define vcvtnq_m(__inactive, __a, __p) __arm_vcvtnq_m(__inactive, __a, __p)
-#define vcvtpq_m(__inactive, __a, __p) __arm_vcvtpq_m(__inactive, __a, __p)
-#define vcvtq_m_n(__inactive, __a, __imm6, __p) __arm_vcvtq_m_n(__inactive, __a, __imm6, __p)
-#define vcvtq_x(__a, __p) __arm_vcvtq_x(__a, __p)
-#define vcvtq_x_n(__a, __imm6, __p) __arm_vcvtq_x_n(__a, __imm6, __p)
 
 
-#define vst4q_s8( __addr, __value) __arm_vst4q_s8( __addr, __value)
-#define vst4q_s16( __addr, __value) __arm_vst4q_s16( __addr, __value)
-#define vst4q_s32( __addr, __value) __arm_vst4q_s32( __addr, __value)
-#define vst4q_u8( __addr, __value) __arm_vst4q_u8( __addr, __value)
-#define vst4q_u16( __addr, __value) __arm_vst4q_u16( __addr, __value)
-#define vst4q_u32( __addr, __value) __arm_vst4q_u32( __addr, __value)
-#define vst4q_f16( __addr, __value) __arm_vst4q_f16( __addr, __value)
-#define vst4q_f32( __addr, __value) __arm_vst4q_f32( __addr, __value)
-#define vcvttq_f32_f16(__a) __arm_vcvttq_f32_f16(__a)
-#define vcvtbq_f32_f16(__a) __arm_vcvtbq_f32_f16(__a)
-#define vcvtq_f16_s16(__a) __arm_vcvtq_f16_s16(__a)
-#define vcvtq_f32_s32(__a) __arm_vcvtq_f32_s32(__a)
-#define vcvtq_f16_u16(__a) __arm_vcvtq_f16_u16(__a)
-#define vcvtq_f32_u32(__a) __arm_vcvtq_f32_u32(__a)
-#define vcvtaq_s16_f16(__a) __arm_vcvtaq_s16_f16(__a)
-#define vcvtaq_s32_f32(__a) __arm_vcvtaq_s32_f32(__a)
-#define vcvtnq_s16_f16(__a) __arm_vcvtnq_s16_f16(__a)
-#define vcvtnq_s32_f32(__a) __arm_vcvtnq_s32_f32(__a)
-#define vcvtpq_s16_f16(__a) __arm_vcvtpq_s16_f16(__a)
-#define vcvtpq_s32_f32(__a) __arm_vcvtpq_s32_f32(__a)
-#define vcvtmq_s16_f16(__a) __arm_vcvtmq_s16_f16(__a)
-#define vcvtmq_s32_f32(__a) __arm_vcvtmq_s32_f32(__a)
-#define vcvtq_s16_f16(__a) __arm_vcvtq_s16_f16(__a)
-#define vcvtq_s32_f32(__a) __arm_vcvtq_s32_f32(__a)
-#define vcvtq_u16_f16(__a) __arm_vcvtq_u16_f16(__a)
-#define vcvtq_u32_f32(__a) __arm_vcvtq_u32_f32(__a)
-#define vcvtpq_u16_f16(__a) __arm_vcvtpq_u16_f16(__a)
-#define vcvtpq_u32_f32(__a) __arm_vcvtpq_u32_f32(__a)
-#define vcvtnq_u16_f16(__a) __arm_vcvtnq_u16_f16(__a)
-#define vcvtnq_u32_f32(__a) __arm_vcvtnq_u32_f32(__a)
-#define vcvtmq_u16_f16(__a) __arm_vcvtmq_u16_f16(__a)
-#define vcvtmq_u32_f32(__a) __arm_vcvtmq_u32_f32(__a)
-#define vcvtaq_u16_f16(__a) __arm_vcvtaq_u16_f16(__a)
-#define vcvtaq_u32_f32(__a) __arm_vcvtaq_u32_f32(__a)
-#define vctp16q(__a) __arm_vctp16q(__a)
-#define vctp32q(__a) __arm_vctp32q(__a)
-#define vctp64q(__a) __arm_vctp64q(__a)
-#define vctp8q(__a) __arm_vctp8q(__a)
 #define vpnot(__a) __arm_vpnot(__a)
-#define vcvtq_n_f16_s16(__a,  __imm6) __arm_vcvtq_n_f16_s16(__a,  __imm6)
-#define vcvtq_n_f32_s32(__a,  __imm6) __arm_vcvtq_n_f32_s32(__a,  __imm6)
-#define vcvtq_n_f16_u16(__a,  __imm6) __arm_vcvtq_n_f16_u16(__a,  __imm6)
-#define vcvtq_n_f32_u32(__a,  __imm6) __arm_vcvtq_n_f32_u32(__a,  __imm6)
-#define vcvtq_n_s16_f16(__a,  __imm6) __arm_vcvtq_n_s16_f16(__a,  __imm6)
-#define vcvtq_n_s32_f32(__a,  __imm6) __arm_vcvtq_n_s32_f32(__a,  __imm6)
-#define vcvtq_n_u16_f16(__a,  __imm6) __arm_vcvtq_n_u16_f16(__a,  __imm6)
-#define vcvtq_n_u32_f32(__a,  __imm6) __arm_vcvtq_n_u32_f32(__a,  __imm6)
-#define vornq_u8(__a, __b) __arm_vornq_u8(__a, __b)
-#define vbicq_u8(__a, __b) __arm_vbicq_u8(__a, __b)
-#define vornq_s8(__a, __b) __arm_vornq_s8(__a, __b)
-#define vbicq_s8(__a, __b) __arm_vbicq_s8(__a, __b)
-#define vornq_u16(__a, __b) __arm_vornq_u16(__a, __b)
-#define vbicq_u16(__a, __b) __arm_vbicq_u16(__a, __b)
-#define vornq_s16(__a, __b) __arm_vornq_s16(__a, __b)
-#define vbicq_s16(__a, __b) __arm_vbicq_s16(__a, __b)
-#define vornq_u32(__a, __b) __arm_vornq_u32(__a, __b)
-#define vbicq_u32(__a, __b) __arm_vbicq_u32(__a, __b)
-#define vornq_s32(__a, __b) __arm_vornq_s32(__a, __b)
-#define vbicq_s32(__a, __b) __arm_vbicq_s32(__a, __b)
-#define vbicq_n_u16(__a,  __imm) __arm_vbicq_n_u16(__a,  __imm)
-#define vornq_f16(__a, __b) __arm_vornq_f16(__a, __b)
-#define vbicq_f16(__a, __b) __arm_vbicq_f16(__a, __b)
-#define vbicq_n_s16(__a,  __imm) __arm_vbicq_n_s16(__a,  __imm)
-#define vbicq_n_u32(__a,  __imm) __arm_vbicq_n_u32(__a,  __imm)
-#define vornq_f32(__a, __b) __arm_vornq_f32(__a, __b)
-#define vbicq_f32(__a, __b) __arm_vbicq_f32(__a, __b)
-#define vbicq_n_s32(__a,  __imm) __arm_vbicq_n_s32(__a,  __imm)
-#define vctp8q_m(__a, __p) __arm_vctp8q_m(__a, __p)
-#define vctp64q_m(__a, __p) __arm_vctp64q_m(__a, __p)
-#define vctp32q_m(__a, __p) __arm_vctp32q_m(__a, __p)
-#define vctp16q_m(__a, __p) __arm_vctp16q_m(__a, __p)
-#define vcvttq_f16_f32(__a, __b) __arm_vcvttq_f16_f32(__a, __b)
-#define vcvtbq_f16_f32(__a, __b) __arm_vcvtbq_f16_f32(__a, __b)
-#define vbicq_m_n_s16(__a,  __imm, __p) __arm_vbicq_m_n_s16(__a,  __imm, __p)
-#define vbicq_m_n_s32(__a,  __imm, __p) __arm_vbicq_m_n_s32(__a,  __imm, __p)
-#define vbicq_m_n_u16(__a,  __imm, __p) __arm_vbicq_m_n_u16(__a,  __imm, __p)
-#define vbicq_m_n_u32(__a,  __imm, __p) __arm_vbicq_m_n_u32(__a,  __imm, __p)
-#define vcvtaq_m_s16_f16(__inactive, __a, __p) __arm_vcvtaq_m_s16_f16(__inactive, __a, __p)
-#define vcvtaq_m_u16_f16(__inactive, __a, __p) __arm_vcvtaq_m_u16_f16(__inactive, __a, __p)
-#define vcvtaq_m_s32_f32(__inactive, __a, __p) __arm_vcvtaq_m_s32_f32(__inactive, __a, __p)
-#define vcvtaq_m_u32_f32(__inactive, __a, __p) __arm_vcvtaq_m_u32_f32(__inactive, __a, __p)
-#define vcvtq_m_f16_s16(__inactive, __a, __p) __arm_vcvtq_m_f16_s16(__inactive, __a, __p)
-#define vcvtq_m_f16_u16(__inactive, __a, __p) __arm_vcvtq_m_f16_u16(__inactive, __a, __p)
-#define vcvtq_m_f32_s32(__inactive, __a, __p) __arm_vcvtq_m_f32_s32(__inactive, __a, __p)
-#define vcvtq_m_f32_u32(__inactive, __a, __p) __arm_vcvtq_m_f32_u32(__inactive, __a, __p)
-#define vshlcq_s8(__a,  __b,  __imm) __arm_vshlcq_s8(__a,  __b,  __imm)
-#define vshlcq_u8(__a,  __b,  __imm) __arm_vshlcq_u8(__a,  __b,  __imm)
-#define vshlcq_s16(__a,  __b,  __imm) __arm_vshlcq_s16(__a,  __b,  __imm)
-#define vshlcq_u16(__a,  __b,  __imm) __arm_vshlcq_u16(__a,  __b,  __imm)
-#define vshlcq_s32(__a,  __b,  __imm) __arm_vshlcq_s32(__a,  __b,  __imm)
-#define vshlcq_u32(__a,  __b,  __imm) __arm_vshlcq_u32(__a,  __b,  __imm)
-#define vcvtbq_m_f16_f32(__a, __b, __p) __arm_vcvtbq_m_f16_f32(__a, __b, __p)
-#define vcvtbq_m_f32_f16(__inactive, __a, __p) __arm_vcvtbq_m_f32_f16(__inactive, __a, __p)
-#define vcvttq_m_f16_f32(__a, __b, __p) __arm_vcvttq_m_f16_f32(__a, __b, __p)
-#define vcvttq_m_f32_f16(__inactive, __a, __p) __arm_vcvttq_m_f32_f16(__inactive, __a, __p)
-#define vcvtmq_m_s16_f16(__inactive, __a, __p) __arm_vcvtmq_m_s16_f16(__inactive, __a, __p)
-#define vcvtnq_m_s16_f16(__inactive, __a, __p) __arm_vcvtnq_m_s16_f16(__inactive, __a, __p)
-#define vcvtpq_m_s16_f16(__inactive, __a, __p) __arm_vcvtpq_m_s16_f16(__inactive, __a, __p)
-#define vcvtq_m_s16_f16(__inactive, __a, __p) __arm_vcvtq_m_s16_f16(__inactive, __a, __p)
-#define vcvtmq_m_u16_f16(__inactive, __a, __p) __arm_vcvtmq_m_u16_f16(__inactive, __a, __p)
-#define vcvtnq_m_u16_f16(__inactive, __a, __p) __arm_vcvtnq_m_u16_f16(__inactive, __a, __p)
-#define vcvtpq_m_u16_f16(__inactive, __a, __p) __arm_vcvtpq_m_u16_f16(__inactive, __a, __p)
-#define vcvtq_m_u16_f16(__inactive, __a, __p) __arm_vcvtq_m_u16_f16(__inactive, __a, __p)
-#define vcvtmq_m_s32_f32(__inactive, __a, __p) __arm_vcvtmq_m_s32_f32(__inactive, __a, __p)
-#define vcvtnq_m_s32_f32(__inactive, __a, __p) __arm_vcvtnq_m_s32_f32(__inactive, __a, __p)
-#define vcvtpq_m_s32_f32(__inactive, __a, __p) __arm_vcvtpq_m_s32_f32(__inactive, __a, __p)
-#define vcvtq_m_s32_f32(__inactive, __a, __p) __arm_vcvtq_m_s32_f32(__inactive, __a, __p)
-#define vcvtmq_m_u32_f32(__inactive, __a, __p) __arm_vcvtmq_m_u32_f32(__inactive, __a, __p)
-#define vcvtnq_m_u32_f32(__inactive, __a, __p) __arm_vcvtnq_m_u32_f32(__inactive, __a, __p)
-#define vcvtpq_m_u32_f32(__inactive, __a, __p) __arm_vcvtpq_m_u32_f32(__inactive, __a, __p)
-#define vcvtq_m_u32_f32(__inactive, __a, __p) __arm_vcvtq_m_u32_f32(__inactive, __a, __p)
-#define vcvtq_m_n_f16_u16(__inactive, __a,  __imm6, __p) __arm_vcvtq_m_n_f16_u16(__inactive, __a,  __imm6, __p)
-#define vcvtq_m_n_f16_s16(__inactive, __a,  __imm6, __p) __arm_vcvtq_m_n_f16_s16(__inactive, __a,  __imm6, __p)
-#define vcvtq_m_n_f32_u32(__inactive, __a,  __imm6, __p) __arm_vcvtq_m_n_f32_u32(__inactive, __a,  __imm6, __p)
-#define vcvtq_m_n_f32_s32(__inactive, __a,  __imm6, __p) __arm_vcvtq_m_n_f32_s32(__inactive, __a,  __imm6, __p)
-#define vbicq_m_s8(__inactive, __a, __b, __p) __arm_vbicq_m_s8(__inactive, __a, __b, __p)
-#define vbicq_m_s32(__inactive, __a, __b, __p) __arm_vbicq_m_s32(__inactive, __a, __b, __p)
-#define vbicq_m_s16(__inactive, __a, __b, __p) __arm_vbicq_m_s16(__inactive, __a, __b, __p)
-#define vbicq_m_u8(__inactive, __a, __b, __p) __arm_vbicq_m_u8(__inactive, __a, __b, __p)
-#define vbicq_m_u32(__inactive, __a, __b, __p) __arm_vbicq_m_u32(__inactive, __a, __b, __p)
-#define vbicq_m_u16(__inactive, __a, __b, __p) __arm_vbicq_m_u16(__inactive, __a, __b, __p)
-#define vornq_m_s8(__inactive, __a, __b, __p) __arm_vornq_m_s8(__inactive, __a, __b, __p)
-#define vornq_m_s32(__inactive, __a, __b, __p) __arm_vornq_m_s32(__inactive, __a, __b, __p)
-#define vornq_m_s16(__inactive, __a, __b, __p) __arm_vornq_m_s16(__inactive, __a, __b, __p)
-#define vornq_m_u8(__inactive, __a, __b, __p) __arm_vornq_m_u8(__inactive, __a, __b, __p)
-#define vornq_m_u32(__inactive, __a, __b, __p) __arm_vornq_m_u32(__inactive, __a, __b, __p)
-#define vornq_m_u16(__inactive, __a, __b, __p) __arm_vornq_m_u16(__inactive, __a, __b, __p)
-#define vbicq_m_f32(__inactive, __a, __b, __p) __arm_vbicq_m_f32(__inactive, __a, __b, __p)
-#define vbicq_m_f16(__inactive, __a, __b, __p) __arm_vbicq_m_f16(__inactive, __a, __b, __p)
-#define vcvtq_m_n_s32_f32(__inactive, __a,  __imm6, __p) __arm_vcvtq_m_n_s32_f32(__inactive, __a,  __imm6, __p)
-#define vcvtq_m_n_s16_f16(__inactive, __a,  __imm6, __p) __arm_vcvtq_m_n_s16_f16(__inactive, __a,  __imm6, __p)
-#define vcvtq_m_n_u32_f32(__inactive, __a,  __imm6, __p) __arm_vcvtq_m_n_u32_f32(__inactive, __a,  __imm6, __p)
-#define vcvtq_m_n_u16_f16(__inactive, __a,  __imm6, __p) __arm_vcvtq_m_n_u16_f16(__inactive, __a,  __imm6, __p)
-#define vornq_m_f32(__inactive, __a, __b, __p) __arm_vornq_m_f32(__inactive, __a, __b, __p)
-#define vornq_m_f16(__inactive, __a, __b, __p) __arm_vornq_m_f16(__inactive, __a, __b, __p)
-#define vstrbq_s8( __addr, __value) __arm_vstrbq_s8( __addr, __value)
-#define vstrbq_u8( __addr, __value) __arm_vstrbq_u8( __addr, __value)
-#define vstrbq_u16( __addr, __value) __arm_vstrbq_u16( __addr, __value)
-#define vstrbq_scatter_offset_s8( __base, __offset, __value) __arm_vstrbq_scatter_offset_s8( __base, __offset, __value)
-#define vstrbq_scatter_offset_u8( __base, __offset, __value) __arm_vstrbq_scatter_offset_u8( __base, __offset, __value)
-#define vstrbq_scatter_offset_u16( __base, __offset, __value) __arm_vstrbq_scatter_offset_u16( __base, __offset, __value)
-#define vstrbq_s16( __addr, __value) __arm_vstrbq_s16( __addr, __value)
-#define vstrbq_u32( __addr, __value) __arm_vstrbq_u32( __addr, __value)
-#define vstrbq_scatter_offset_s16( __base, __offset, __value) __arm_vstrbq_scatter_offset_s16( __base, __offset, __value)
-#define vstrbq_scatter_offset_u32( __base, __offset, __value) __arm_vstrbq_scatter_offset_u32( __base, __offset, __value)
-#define vstrbq_s32( __addr, __value) __arm_vstrbq_s32( __addr, __value)
-#define vstrbq_scatter_offset_s32( __base, __offset, __value) __arm_vstrbq_scatter_offset_s32( __base, __offset, __value)
-#define vstrwq_scatter_base_s32(__addr,  __offset, __value) __arm_vstrwq_scatter_base_s32(__addr,  __offset, __value)
-#define vstrwq_scatter_base_u32(__addr,  __offset, __value) __arm_vstrwq_scatter_base_u32(__addr,  __offset, __value)
-#define vldrbq_gather_offset_u8(__base, __offset) __arm_vldrbq_gather_offset_u8(__base, __offset)
-#define vldrbq_gather_offset_s8(__base, __offset) __arm_vldrbq_gather_offset_s8(__base, __offset)
-#define vldrbq_s8(__base) __arm_vldrbq_s8(__base)
-#define vldrbq_u8(__base) __arm_vldrbq_u8(__base)
-#define vldrbq_gather_offset_u16(__base, __offset) __arm_vldrbq_gather_offset_u16(__base, __offset)
-#define vldrbq_gather_offset_s16(__base, __offset) __arm_vldrbq_gather_offset_s16(__base, __offset)
-#define vldrbq_s16(__base) __arm_vldrbq_s16(__base)
-#define vldrbq_u16(__base) __arm_vldrbq_u16(__base)
-#define vldrbq_gather_offset_u32(__base, __offset) __arm_vldrbq_gather_offset_u32(__base, __offset)
-#define vldrbq_gather_offset_s32(__base, __offset) __arm_vldrbq_gather_offset_s32(__base, __offset)
-#define vldrbq_s32(__base) __arm_vldrbq_s32(__base)
-#define vldrbq_u32(__base) __arm_vldrbq_u32(__base)
-#define vldrwq_gather_base_s32(__addr,  __offset) __arm_vldrwq_gather_base_s32(__addr,  __offset)
-#define vldrwq_gather_base_u32(__addr,  __offset) __arm_vldrwq_gather_base_u32(__addr,  __offset)
-#define vstrbq_p_s8( __addr, __value, __p) __arm_vstrbq_p_s8( __addr, __value, __p)
-#define vstrbq_p_s32( __addr, __value, __p) __arm_vstrbq_p_s32( __addr, __value, __p)
-#define vstrbq_p_s16( __addr, __value, __p) __arm_vstrbq_p_s16( __addr, __value, __p)
-#define vstrbq_p_u8( __addr, __value, __p) __arm_vstrbq_p_u8( __addr, __value, __p)
-#define vstrbq_p_u32( __addr, __value, __p) __arm_vstrbq_p_u32( __addr, __value, __p)
-#define vstrbq_p_u16( __addr, __value, __p) __arm_vstrbq_p_u16( __addr, __value, __p)
-#define vstrbq_scatter_offset_p_s8( __base, __offset, __value, __p) __arm_vstrbq_scatter_offset_p_s8( __base, __offset, __value, __p)
-#define vstrbq_scatter_offset_p_s32( __base, __offset, __value, __p) __arm_vstrbq_scatter_offset_p_s32( __base, __offset, __value, __p)
-#define vstrbq_scatter_offset_p_s16( __base, __offset, __value, __p) __arm_vstrbq_scatter_offset_p_s16( __base, __offset, __value, __p)
-#define vstrbq_scatter_offset_p_u8( __base, __offset, __value, __p) __arm_vstrbq_scatter_offset_p_u8( __base, __offset, __value, __p)
-#define vstrbq_scatter_offset_p_u32( __base, __offset, __value, __p) __arm_vstrbq_scatter_offset_p_u32( __base, __offset, __value, __p)
-#define vstrbq_scatter_offset_p_u16( __base, __offset, __value, __p) __arm_vstrbq_scatter_offset_p_u16( __base, __offset, __value, __p)
-#define vstrwq_scatter_base_p_s32(__addr,  __offset, __value, __p) __arm_vstrwq_scatter_base_p_s32(__addr,  __offset, __value, __p)
-#define vstrwq_scatter_base_p_u32(__addr,  __offset, __value, __p) __arm_vstrwq_scatter_base_p_u32(__addr,  __offset, __value, __p)
-#define vldrbq_gather_offset_z_s16(__base, __offset, __p) __arm_vldrbq_gather_offset_z_s16(__base, __offset, __p)
-#define vldrbq_gather_offset_z_u8(__base, __offset, __p) __arm_vldrbq_gather_offset_z_u8(__base, __offset, __p)
-#define vldrbq_gather_offset_z_s32(__base, __offset, __p) __arm_vldrbq_gather_offset_z_s32(__base, __offset, __p)
-#define vldrbq_gather_offset_z_u16(__base, __offset, __p) __arm_vldrbq_gather_offset_z_u16(__base, __offset, __p)
-#define vldrbq_gather_offset_z_u32(__base, __offset, __p) __arm_vldrbq_gather_offset_z_u32(__base, __offset, __p)
-#define vldrbq_gather_offset_z_s8(__base, __offset, __p) __arm_vldrbq_gather_offset_z_s8(__base, __offset, __p)
-#define vldrbq_z_s16(__base, __p) __arm_vldrbq_z_s16(__base, __p)
-#define vldrbq_z_u8(__base, __p) __arm_vldrbq_z_u8(__base, __p)
-#define vldrbq_z_s8(__base, __p) __arm_vldrbq_z_s8(__base, __p)
-#define vldrbq_z_s32(__base, __p) __arm_vldrbq_z_s32(__base, __p)
-#define vldrbq_z_u16(__base, __p) __arm_vldrbq_z_u16(__base, __p)
-#define vldrbq_z_u32(__base, __p) __arm_vldrbq_z_u32(__base, __p)
-#define vldrwq_gather_base_z_u32(__addr,  __offset, __p) __arm_vldrwq_gather_base_z_u32(__addr,  __offset, __p)
-#define vldrwq_gather_base_z_s32(__addr,  __offset, __p) __arm_vldrwq_gather_base_z_s32(__addr,  __offset, __p)
-#define vldrhq_gather_offset_s32(__base, __offset) __arm_vldrhq_gather_offset_s32(__base, __offset)
-#define vldrhq_gather_offset_s16(__base, __offset) __arm_vldrhq_gather_offset_s16(__base, __offset)
-#define vldrhq_gather_offset_u32(__base, __offset) __arm_vldrhq_gather_offset_u32(__base, __offset)
-#define vldrhq_gather_offset_u16(__base, __offset) __arm_vldrhq_gather_offset_u16(__base, __offset)
-#define vldrhq_gather_offset_z_s32(__base, __offset, __p) __arm_vldrhq_gather_offset_z_s32(__base, __offset, __p)
-#define vldrhq_gather_offset_z_s16(__base, __offset, __p) __arm_vldrhq_gather_offset_z_s16(__base, __offset, __p)
-#define vldrhq_gather_offset_z_u32(__base, __offset, __p) __arm_vldrhq_gather_offset_z_u32(__base, __offset, __p)
-#define vldrhq_gather_offset_z_u16(__base, __offset, __p) __arm_vldrhq_gather_offset_z_u16(__base, __offset, __p)
-#define vldrhq_gather_shifted_offset_s32(__base, __offset) __arm_vldrhq_gather_shifted_offset_s32(__base, __offset)
-#define vldrhq_gather_shifted_offset_s16(__base, __offset) __arm_vldrhq_gather_shifted_offset_s16(__base, __offset)
-#define vldrhq_gather_shifted_offset_u32(__base, __offset) __arm_vldrhq_gather_shifted_offset_u32(__base, __offset)
-#define vldrhq_gather_shifted_offset_u16(__base, __offset) __arm_vldrhq_gather_shifted_offset_u16(__base, __offset)
-#define vldrhq_gather_shifted_offset_z_s32(__base, __offset, __p) __arm_vldrhq_gather_shifted_offset_z_s32(__base, __offset, __p)
-#define vldrhq_gather_shifted_offset_z_s16(__base, __offset, __p) __arm_vldrhq_gather_shifted_offset_z_s16(__base, __offset, __p)
-#define vldrhq_gather_shifted_offset_z_u32(__base, __offset, __p) __arm_vldrhq_gather_shifted_offset_z_u32(__base, __offset, __p)
-#define vldrhq_gather_shifted_offset_z_u16(__base, __offset, __p) __arm_vldrhq_gather_shifted_offset_z_u16(__base, __offset, __p)
-#define vldrhq_s32(__base) __arm_vldrhq_s32(__base)
-#define vldrhq_s16(__base) __arm_vldrhq_s16(__base)
-#define vldrhq_u32(__base) __arm_vldrhq_u32(__base)
-#define vldrhq_u16(__base) __arm_vldrhq_u16(__base)
-#define vldrhq_z_s32(__base, __p) __arm_vldrhq_z_s32(__base, __p)
-#define vldrhq_z_s16(__base, __p) __arm_vldrhq_z_s16(__base, __p)
-#define vldrhq_z_u32(__base, __p) __arm_vldrhq_z_u32(__base, __p)
-#define vldrhq_z_u16(__base, __p) __arm_vldrhq_z_u16(__base, __p)
-#define vldrwq_s32(__base) __arm_vldrwq_s32(__base)
-#define vldrwq_u32(__base) __arm_vldrwq_u32(__base)
-#define vldrwq_z_s32(__base, __p) __arm_vldrwq_z_s32(__base, __p)
-#define vldrwq_z_u32(__base, __p) __arm_vldrwq_z_u32(__base, __p)
-#define vldrhq_f16(__base) __arm_vldrhq_f16(__base)
-#define vldrhq_z_f16(__base, __p) __arm_vldrhq_z_f16(__base, __p)
-#define vldrwq_f32(__base) __arm_vldrwq_f32(__base)
-#define vldrwq_z_f32(__base, __p) __arm_vldrwq_z_f32(__base, __p)
-#define vldrdq_gather_base_s64(__addr,  __offset) __arm_vldrdq_gather_base_s64(__addr,  __offset)
-#define vldrdq_gather_base_u64(__addr,  __offset) __arm_vldrdq_gather_base_u64(__addr,  __offset)
-#define vldrdq_gather_base_z_s64(__addr,  __offset, __p) __arm_vldrdq_gather_base_z_s64(__addr,  __offset, __p)
-#define vldrdq_gather_base_z_u64(__addr,  __offset, __p) __arm_vldrdq_gather_base_z_u64(__addr,  __offset, __p)
-#define vldrdq_gather_offset_s64(__base, __offset) __arm_vldrdq_gather_offset_s64(__base, __offset)
-#define vldrdq_gather_offset_u64(__base, __offset) __arm_vldrdq_gather_offset_u64(__base, __offset)
-#define vldrdq_gather_offset_z_s64(__base, __offset, __p) __arm_vldrdq_gather_offset_z_s64(__base, __offset, __p)
-#define vldrdq_gather_offset_z_u64(__base, __offset, __p) __arm_vldrdq_gather_offset_z_u64(__base, __offset, __p)
-#define vldrdq_gather_shifted_offset_s64(__base, __offset) __arm_vldrdq_gather_shifted_offset_s64(__base, __offset)
-#define vldrdq_gather_shifted_offset_u64(__base, __offset) __arm_vldrdq_gather_shifted_offset_u64(__base, __offset)
-#define vldrdq_gather_shifted_offset_z_s64(__base, __offset, __p) __arm_vldrdq_gather_shifted_offset_z_s64(__base, __offset, __p)
-#define vldrdq_gather_shifted_offset_z_u64(__base, __offset, __p) __arm_vldrdq_gather_shifted_offset_z_u64(__base, __offset, __p)
-#define vldrhq_gather_offset_f16(__base, __offset) __arm_vldrhq_gather_offset_f16(__base, __offset)
-#define vldrhq_gather_offset_z_f16(__base, __offset, __p) __arm_vldrhq_gather_offset_z_f16(__base, __offset, __p)
-#define vldrhq_gather_shifted_offset_f16(__base, __offset) __arm_vldrhq_gather_shifted_offset_f16(__base, __offset)
-#define vldrhq_gather_shifted_offset_z_f16(__base, __offset, __p) __arm_vldrhq_gather_shifted_offset_z_f16(__base, __offset, __p)
-#define vldrwq_gather_base_f32(__addr,  __offset) __arm_vldrwq_gather_base_f32(__addr,  __offset)
-#define vldrwq_gather_base_z_f32(__addr,  __offset, __p) __arm_vldrwq_gather_base_z_f32(__addr,  __offset, __p)
-#define vldrwq_gather_offset_f32(__base, __offset) __arm_vldrwq_gather_offset_f32(__base, __offset)
-#define vldrwq_gather_offset_s32(__base, __offset) __arm_vldrwq_gather_offset_s32(__base, __offset)
-#define vldrwq_gather_offset_u32(__base, __offset) __arm_vldrwq_gather_offset_u32(__base, __offset)
-#define vldrwq_gather_offset_z_f32(__base, __offset, __p) __arm_vldrwq_gather_offset_z_f32(__base, __offset, __p)
-#define vldrwq_gather_offset_z_s32(__base, __offset, __p) __arm_vldrwq_gather_offset_z_s32(__base, __offset, __p)
-#define vldrwq_gather_offset_z_u32(__base, __offset, __p) __arm_vldrwq_gather_offset_z_u32(__base, __offset, __p)
-#define vldrwq_gather_shifted_offset_f32(__base, __offset) __arm_vldrwq_gather_shifted_offset_f32(__base, __offset)
-#define vldrwq_gather_shifted_offset_s32(__base, __offset) __arm_vldrwq_gather_shifted_offset_s32(__base, __offset)
-#define vldrwq_gather_shifted_offset_u32(__base, __offset) __arm_vldrwq_gather_shifted_offset_u32(__base, __offset)
-#define vldrwq_gather_shifted_offset_z_f32(__base, __offset, __p) __arm_vldrwq_gather_shifted_offset_z_f32(__base, __offset, __p)
-#define vldrwq_gather_shifted_offset_z_s32(__base, __offset, __p) __arm_vldrwq_gather_shifted_offset_z_s32(__base, __offset, __p)
-#define vldrwq_gather_shifted_offset_z_u32(__base, __offset, __p) __arm_vldrwq_gather_shifted_offset_z_u32(__base, __offset, __p)
-#define vstrhq_f16(__addr, __value) __arm_vstrhq_f16(__addr, __value)
-#define vstrhq_scatter_offset_s32( __base, __offset, __value) __arm_vstrhq_scatter_offset_s32( __base, __offset, __value)
-#define vstrhq_scatter_offset_s16( __base, __offset, __value) __arm_vstrhq_scatter_offset_s16( __base, __offset, __value)
-#define vstrhq_scatter_offset_u32( __base, __offset, __value) __arm_vstrhq_scatter_offset_u32( __base, __offset, __value)
-#define vstrhq_scatter_offset_u16( __base, __offset, __value) __arm_vstrhq_scatter_offset_u16( __base, __offset, __value)
-#define vstrhq_scatter_offset_p_s32( __base, __offset, __value, __p) __arm_vstrhq_scatter_offset_p_s32( __base, __offset, __value, __p)
-#define vstrhq_scatter_offset_p_s16( __base, __offset, __value, __p) __arm_vstrhq_scatter_offset_p_s16( __base, __offset, __value, __p)
-#define vstrhq_scatter_offset_p_u32( __base, __offset, __value, __p) __arm_vstrhq_scatter_offset_p_u32( __base, __offset, __value, __p)
-#define vstrhq_scatter_offset_p_u16( __base, __offset, __value, __p) __arm_vstrhq_scatter_offset_p_u16( __base, __offset, __value, __p)
-#define vstrhq_scatter_shifted_offset_s32( __base, __offset, __value) __arm_vstrhq_scatter_shifted_offset_s32( __base, __offset, __value)
-#define vstrhq_scatter_shifted_offset_s16( __base, __offset, __value) __arm_vstrhq_scatter_shifted_offset_s16( __base, __offset, __value)
-#define vstrhq_scatter_shifted_offset_u32( __base, __offset, __value) __arm_vstrhq_scatter_shifted_offset_u32( __base, __offset, __value)
-#define vstrhq_scatter_shifted_offset_u16( __base, __offset, __value) __arm_vstrhq_scatter_shifted_offset_u16( __base, __offset, __value)
-#define vstrhq_scatter_shifted_offset_p_s32( __base, __offset, __value, __p) __arm_vstrhq_scatter_shifted_offset_p_s32( __base, __offset, __value, __p)
-#define vstrhq_scatter_shifted_offset_p_s16( __base, __offset, __value, __p) __arm_vstrhq_scatter_shifted_offset_p_s16( __base, __offset, __value, __p)
-#define vstrhq_scatter_shifted_offset_p_u32( __base, __offset, __value, __p) __arm_vstrhq_scatter_shifted_offset_p_u32( __base, __offset, __value, __p)
-#define vstrhq_scatter_shifted_offset_p_u16( __base, __offset, __value, __p) __arm_vstrhq_scatter_shifted_offset_p_u16( __base, __offset, __value, __p)
-#define vstrhq_s32(__addr, __value) __arm_vstrhq_s32(__addr, __value)
-#define vstrhq_s16(__addr, __value) __arm_vstrhq_s16(__addr, __value)
-#define vstrhq_u32(__addr, __value) __arm_vstrhq_u32(__addr, __value)
-#define vstrhq_u16(__addr, __value) __arm_vstrhq_u16(__addr, __value)
-#define vstrhq_p_f16(__addr, __value, __p) __arm_vstrhq_p_f16(__addr, __value, __p)
-#define vstrhq_p_s32(__addr, __value, __p) __arm_vstrhq_p_s32(__addr, __value, __p)
-#define vstrhq_p_s16(__addr, __value, __p) __arm_vstrhq_p_s16(__addr, __value, __p)
-#define vstrhq_p_u32(__addr, __value, __p) __arm_vstrhq_p_u32(__addr, __value, __p)
-#define vstrhq_p_u16(__addr, __value, __p) __arm_vstrhq_p_u16(__addr, __value, __p)
-#define vstrwq_f32(__addr, __value) __arm_vstrwq_f32(__addr, __value)
-#define vstrwq_s32(__addr, __value) __arm_vstrwq_s32(__addr, __value)
-#define vstrwq_u32(__addr, __value) __arm_vstrwq_u32(__addr, __value)
-#define vstrwq_p_f32(__addr, __value, __p) __arm_vstrwq_p_f32(__addr, __value, __p)
-#define vstrwq_p_s32(__addr, __value, __p) __arm_vstrwq_p_s32(__addr, __value, __p)
-#define vstrwq_p_u32(__addr, __value, __p) __arm_vstrwq_p_u32(__addr, __value, __p)
-#define vstrdq_scatter_base_p_s64(__addr, __offset, __value, __p) __arm_vstrdq_scatter_base_p_s64(__addr, __offset, __value, __p)
-#define vstrdq_scatter_base_p_u64(__addr, __offset, __value, __p) __arm_vstrdq_scatter_base_p_u64(__addr, __offset, __value, __p)
-#define vstrdq_scatter_base_s64(__addr, __offset, __value) __arm_vstrdq_scatter_base_s64(__addr, __offset, __value)
-#define vstrdq_scatter_base_u64(__addr, __offset, __value) __arm_vstrdq_scatter_base_u64(__addr, __offset, __value)
-#define vstrdq_scatter_offset_p_s64(__base, __offset, __value, __p) __arm_vstrdq_scatter_offset_p_s64(__base, __offset, __value, __p)
-#define vstrdq_scatter_offset_p_u64(__base, __offset, __value, __p) __arm_vstrdq_scatter_offset_p_u64(__base, __offset, __value, __p)
-#define vstrdq_scatter_offset_s64(__base, __offset, __value) __arm_vstrdq_scatter_offset_s64(__base, __offset, __value)
-#define vstrdq_scatter_offset_u64(__base, __offset, __value) __arm_vstrdq_scatter_offset_u64(__base, __offset, __value)
-#define vstrdq_scatter_shifted_offset_p_s64(__base, __offset, __value, __p) __arm_vstrdq_scatter_shifted_offset_p_s64(__base, __offset, __value, __p)
-#define vstrdq_scatter_shifted_offset_p_u64(__base, __offset, __value, __p) __arm_vstrdq_scatter_shifted_offset_p_u64(__base, __offset, __value, __p)
-#define vstrdq_scatter_shifted_offset_s64(__base, __offset, __value) __arm_vstrdq_scatter_shifted_offset_s64(__base, __offset, __value)
-#define vstrdq_scatter_shifted_offset_u64(__base, __offset, __value) __arm_vstrdq_scatter_shifted_offset_u64(__base, __offset, __value)
-#define vstrhq_scatter_offset_f16(__base, __offset, __value) __arm_vstrhq_scatter_offset_f16(__base, __offset, __value)
-#define vstrhq_scatter_offset_p_f16(__base, __offset, __value, __p) __arm_vstrhq_scatter_offset_p_f16(__base, __offset, __value, __p)
-#define vstrhq_scatter_shifted_offset_f16(__base, __offset, __value) __arm_vstrhq_scatter_shifted_offset_f16(__base, __offset, __value)
-#define vstrhq_scatter_shifted_offset_p_f16(__base, __offset, __value, __p) __arm_vstrhq_scatter_shifted_offset_p_f16(__base, __offset, __value, __p)
-#define vstrwq_scatter_base_f32(__addr, __offset, __value) __arm_vstrwq_scatter_base_f32(__addr, __offset, __value)
-#define vstrwq_scatter_base_p_f32(__addr, __offset, __value, __p) __arm_vstrwq_scatter_base_p_f32(__addr, __offset, __value, __p)
-#define vstrwq_scatter_offset_f32(__base, __offset, __value) __arm_vstrwq_scatter_offset_f32(__base, __offset, __value)
-#define vstrwq_scatter_offset_p_f32(__base, __offset, __value, __p) __arm_vstrwq_scatter_offset_p_f32(__base, __offset, __value, __p)
-#define vstrwq_scatter_offset_p_s32(__base, __offset, __value, __p) __arm_vstrwq_scatter_offset_p_s32(__base, __offset, __value, __p)
-#define vstrwq_scatter_offset_p_u32(__base, __offset, __value, __p) __arm_vstrwq_scatter_offset_p_u32(__base, __offset, __value, __p)
-#define vstrwq_scatter_offset_s32(__base, __offset, __value) __arm_vstrwq_scatter_offset_s32(__base, __offset, __value)
-#define vstrwq_scatter_offset_u32(__base, __offset, __value) __arm_vstrwq_scatter_offset_u32(__base, __offset, __value)
-#define vstrwq_scatter_shifted_offset_f32(__base, __offset, __value) __arm_vstrwq_scatter_shifted_offset_f32(__base, __offset, __value)
-#define vstrwq_scatter_shifted_offset_p_f32(__base, __offset, __value, __p) __arm_vstrwq_scatter_shifted_offset_p_f32(__base, __offset, __value, __p)
-#define vstrwq_scatter_shifted_offset_p_s32(__base, __offset, __value, __p) __arm_vstrwq_scatter_shifted_offset_p_s32(__base, __offset, __value, __p)
-#define vstrwq_scatter_shifted_offset_p_u32(__base, __offset, __value, __p) __arm_vstrwq_scatter_shifted_offset_p_u32(__base, __offset, __value, __p)
-#define vstrwq_scatter_shifted_offset_s32(__base, __offset, __value) __arm_vstrwq_scatter_shifted_offset_s32(__base, __offset, __value)
-#define vstrwq_scatter_shifted_offset_u32(__base, __offset, __value) __arm_vstrwq_scatter_shifted_offset_u32(__base, __offset, __value)
 #define vuninitializedq_u8(void) __arm_vuninitializedq_u8(void)
 #define vuninitializedq_u16(void) __arm_vuninitializedq_u16(void)
 #define vuninitializedq_u32(void) __arm_vuninitializedq_u32(void)
@@ -478,204 +61,6 @@
 #define vuninitializedq_s64(void) __arm_vuninitializedq_s64(void)
 #define vuninitializedq_f16(void) __arm_vuninitializedq_f16(void)
 #define vuninitializedq_f32(void) __arm_vuninitializedq_f32(void)
-#define vddupq_m_n_u8(__inactive, __a,  __imm, __p) __arm_vddupq_m_n_u8(__inactive, __a,  __imm, __p)
-#define vddupq_m_n_u32(__inactive, __a,  __imm, __p) __arm_vddupq_m_n_u32(__inactive, __a,  __imm, __p)
-#define vddupq_m_n_u16(__inactive, __a,  __imm, __p) __arm_vddupq_m_n_u16(__inactive, __a,  __imm, __p)
-#define vddupq_m_wb_u8(__inactive,  __a,  __imm, __p) __arm_vddupq_m_wb_u8(__inactive,  __a,  __imm, __p)
-#define vddupq_m_wb_u16(__inactive,  __a,  __imm, __p) __arm_vddupq_m_wb_u16(__inactive,  __a,  __imm, __p)
-#define vddupq_m_wb_u32(__inactive,  __a,  __imm, __p) __arm_vddupq_m_wb_u32(__inactive,  __a,  __imm, __p)
-#define vddupq_n_u8(__a,  __imm) __arm_vddupq_n_u8(__a,  __imm)
-#define vddupq_n_u32(__a,  __imm) __arm_vddupq_n_u32(__a,  __imm)
-#define vddupq_n_u16(__a,  __imm) __arm_vddupq_n_u16(__a,  __imm)
-#define vddupq_wb_u8( __a,  __imm) __arm_vddupq_wb_u8( __a,  __imm)
-#define vddupq_wb_u16( __a,  __imm) __arm_vddupq_wb_u16( __a,  __imm)
-#define vddupq_wb_u32( __a,  __imm) __arm_vddupq_wb_u32( __a,  __imm)
-#define vdwdupq_m_n_u8(__inactive, __a, __b,  __imm, __p) __arm_vdwdupq_m_n_u8(__inactive, __a, __b,  __imm, __p)
-#define vdwdupq_m_n_u32(__inactive, __a, __b,  __imm, __p) __arm_vdwdupq_m_n_u32(__inactive, __a, __b,  __imm, __p)
-#define vdwdupq_m_n_u16(__inactive, __a, __b,  __imm, __p) __arm_vdwdupq_m_n_u16(__inactive, __a, __b,  __imm, __p)
-#define vdwdupq_m_wb_u8(__inactive,  __a, __b,  __imm, __p) __arm_vdwdupq_m_wb_u8(__inactive,  __a, __b,  __imm, __p)
-#define vdwdupq_m_wb_u32(__inactive,  __a, __b,  __imm, __p) __arm_vdwdupq_m_wb_u32(__inactive,  __a, __b,  __imm, __p)
-#define vdwdupq_m_wb_u16(__inactive,  __a, __b,  __imm, __p) __arm_vdwdupq_m_wb_u16(__inactive,  __a, __b,  __imm, __p)
-#define vdwdupq_n_u8(__a, __b,  __imm) __arm_vdwdupq_n_u8(__a, __b,  __imm)
-#define vdwdupq_n_u32(__a, __b,  __imm) __arm_vdwdupq_n_u32(__a, __b,  __imm)
-#define vdwdupq_n_u16(__a, __b,  __imm) __arm_vdwdupq_n_u16(__a, __b,  __imm)
-#define vdwdupq_wb_u8( __a, __b,  __imm) __arm_vdwdupq_wb_u8( __a, __b,  __imm)
-#define vdwdupq_wb_u32( __a, __b,  __imm) __arm_vdwdupq_wb_u32( __a, __b,  __imm)
-#define vdwdupq_wb_u16( __a, __b,  __imm) __arm_vdwdupq_wb_u16( __a, __b,  __imm)
-#define vidupq_m_n_u8(__inactive, __a,  __imm, __p) __arm_vidupq_m_n_u8(__inactive, __a,  __imm, __p)
-#define vidupq_m_n_u32(__inactive, __a,  __imm, __p) __arm_vidupq_m_n_u32(__inactive, __a,  __imm, __p)
-#define vidupq_m_n_u16(__inactive, __a,  __imm, __p) __arm_vidupq_m_n_u16(__inactive, __a,  __imm, __p)
-#define vidupq_m_wb_u8(__inactive,  __a,  __imm, __p) __arm_vidupq_m_wb_u8(__inactive,  __a,  __imm, __p)
-#define vidupq_m_wb_u16(__inactive,  __a,  __imm, __p) __arm_vidupq_m_wb_u16(__inactive,  __a,  __imm, __p)
-#define vidupq_m_wb_u32(__inactive,  __a,  __imm, __p) __arm_vidupq_m_wb_u32(__inactive,  __a,  __imm, __p)
-#define vidupq_n_u8(__a,  __imm) __arm_vidupq_n_u8(__a,  __imm)
-#define vidupq_n_u32(__a,  __imm) __arm_vidupq_n_u32(__a,  __imm)
-#define vidupq_n_u16(__a,  __imm) __arm_vidupq_n_u16(__a,  __imm)
-#define vidupq_wb_u8( __a,  __imm) __arm_vidupq_wb_u8( __a,  __imm)
-#define vidupq_wb_u16( __a,  __imm) __arm_vidupq_wb_u16( __a,  __imm)
-#define vidupq_wb_u32( __a,  __imm) __arm_vidupq_wb_u32( __a,  __imm)
-#define viwdupq_m_n_u8(__inactive, __a, __b,  __imm, __p) __arm_viwdupq_m_n_u8(__inactive, __a, __b,  __imm, __p)
-#define viwdupq_m_n_u32(__inactive, __a, __b,  __imm, __p) __arm_viwdupq_m_n_u32(__inactive, __a, __b,  __imm, __p)
-#define viwdupq_m_n_u16(__inactive, __a, __b,  __imm, __p) __arm_viwdupq_m_n_u16(__inactive, __a, __b,  __imm, __p)
-#define viwdupq_m_wb_u8(__inactive,  __a, __b,  __imm, __p) __arm_viwdupq_m_wb_u8(__inactive,  __a, __b,  __imm, __p)
-#define viwdupq_m_wb_u32(__inactive,  __a, __b,  __imm, __p) __arm_viwdupq_m_wb_u32(__inactive,  __a, __b,  __imm, __p)
-#define viwdupq_m_wb_u16(__inactive,  __a, __b,  __imm, __p) __arm_viwdupq_m_wb_u16(__inactive,  __a, __b,  __imm, __p)
-#define viwdupq_n_u8(__a, __b,  __imm) __arm_viwdupq_n_u8(__a, __b,  __imm)
-#define viwdupq_n_u32(__a, __b,  __imm) __arm_viwdupq_n_u32(__a, __b,  __imm)
-#define viwdupq_n_u16(__a, __b,  __imm) __arm_viwdupq_n_u16(__a, __b,  __imm)
-#define viwdupq_wb_u8( __a, __b,  __imm) __arm_viwdupq_wb_u8( __a, __b,  __imm)
-#define viwdupq_wb_u32( __a, __b,  __imm) __arm_viwdupq_wb_u32( __a, __b,  __imm)
-#define viwdupq_wb_u16( __a, __b,  __imm) __arm_viwdupq_wb_u16( __a, __b,  __imm)
-#define vldrdq_gather_base_wb_s64(__addr, __offset) __arm_vldrdq_gather_base_wb_s64(__addr, __offset)
-#define vldrdq_gather_base_wb_u64(__addr, __offset) __arm_vldrdq_gather_base_wb_u64(__addr, __offset)
-#define vldrdq_gather_base_wb_z_s64(__addr, __offset, __p) __arm_vldrdq_gather_base_wb_z_s64(__addr, __offset, __p)
-#define vldrdq_gather_base_wb_z_u64(__addr, __offset, __p) __arm_vldrdq_gather_base_wb_z_u64(__addr, __offset, __p)
-#define vldrwq_gather_base_wb_f32(__addr, __offset) __arm_vldrwq_gather_base_wb_f32(__addr, __offset)
-#define vldrwq_gather_base_wb_s32(__addr, __offset) __arm_vldrwq_gather_base_wb_s32(__addr, __offset)
-#define vldrwq_gather_base_wb_u32(__addr, __offset) __arm_vldrwq_gather_base_wb_u32(__addr, __offset)
-#define vldrwq_gather_base_wb_z_f32(__addr, __offset, __p) __arm_vldrwq_gather_base_wb_z_f32(__addr, __offset, __p)
-#define vldrwq_gather_base_wb_z_s32(__addr, __offset, __p) __arm_vldrwq_gather_base_wb_z_s32(__addr, __offset, __p)
-#define vldrwq_gather_base_wb_z_u32(__addr, __offset, __p) __arm_vldrwq_gather_base_wb_z_u32(__addr, __offset, __p)
-#define vstrdq_scatter_base_wb_p_s64(__addr, __offset, __value, __p) __arm_vstrdq_scatter_base_wb_p_s64(__addr, __offset, __value, __p)
-#define vstrdq_scatter_base_wb_p_u64(__addr, __offset, __value, __p) __arm_vstrdq_scatter_base_wb_p_u64(__addr, __offset, __value, __p)
-#define vstrdq_scatter_base_wb_s64(__addr, __offset, __value) __arm_vstrdq_scatter_base_wb_s64(__addr, __offset, __value)
-#define vstrdq_scatter_base_wb_u64(__addr, __offset, __value) __arm_vstrdq_scatter_base_wb_u64(__addr, __offset, __value)
-#define vstrwq_scatter_base_wb_p_s32(__addr, __offset, __value, __p) __arm_vstrwq_scatter_base_wb_p_s32(__addr, __offset, __value, __p)
-#define vstrwq_scatter_base_wb_p_f32(__addr, __offset, __value, __p) __arm_vstrwq_scatter_base_wb_p_f32(__addr, __offset, __value, __p)
-#define vstrwq_scatter_base_wb_p_u32(__addr, __offset, __value, __p) __arm_vstrwq_scatter_base_wb_p_u32(__addr, __offset, __value, __p)
-#define vstrwq_scatter_base_wb_s32(__addr, __offset, __value) __arm_vstrwq_scatter_base_wb_s32(__addr, __offset, __value)
-#define vstrwq_scatter_base_wb_u32(__addr, __offset, __value) __arm_vstrwq_scatter_base_wb_u32(__addr, __offset, __value)
-#define vstrwq_scatter_base_wb_f32(__addr, __offset, __value) __arm_vstrwq_scatter_base_wb_f32(__addr, __offset, __value)
-#define vddupq_x_n_u8(__a,  __imm, __p) __arm_vddupq_x_n_u8(__a,  __imm, __p)
-#define vddupq_x_n_u16(__a,  __imm, __p) __arm_vddupq_x_n_u16(__a,  __imm, __p)
-#define vddupq_x_n_u32(__a,  __imm, __p) __arm_vddupq_x_n_u32(__a,  __imm, __p)
-#define vddupq_x_wb_u8(__a,  __imm, __p) __arm_vddupq_x_wb_u8(__a,  __imm, __p)
-#define vddupq_x_wb_u16(__a,  __imm, __p) __arm_vddupq_x_wb_u16(__a,  __imm, __p)
-#define vddupq_x_wb_u32(__a,  __imm, __p) __arm_vddupq_x_wb_u32(__a,  __imm, __p)
-#define vdwdupq_x_n_u8(__a, __b,  __imm, __p) __arm_vdwdupq_x_n_u8(__a, __b,  __imm, __p)
-#define vdwdupq_x_n_u16(__a, __b,  __imm, __p) __arm_vdwdupq_x_n_u16(__a, __b,  __imm, __p)
-#define vdwdupq_x_n_u32(__a, __b,  __imm, __p) __arm_vdwdupq_x_n_u32(__a, __b,  __imm, __p)
-#define vdwdupq_x_wb_u8(__a, __b,  __imm, __p) __arm_vdwdupq_x_wb_u8(__a, __b,  __imm, __p)
-#define vdwdupq_x_wb_u16(__a, __b,  __imm, __p) __arm_vdwdupq_x_wb_u16(__a, __b,  __imm, __p)
-#define vdwdupq_x_wb_u32(__a, __b,  __imm, __p) __arm_vdwdupq_x_wb_u32(__a, __b,  __imm, __p)
-#define vidupq_x_n_u8(__a,  __imm, __p) __arm_vidupq_x_n_u8(__a,  __imm, __p)
-#define vidupq_x_n_u16(__a,  __imm, __p) __arm_vidupq_x_n_u16(__a,  __imm, __p)
-#define vidupq_x_n_u32(__a,  __imm, __p) __arm_vidupq_x_n_u32(__a,  __imm, __p)
-#define vidupq_x_wb_u8(__a,  __imm, __p) __arm_vidupq_x_wb_u8(__a,  __imm, __p)
-#define vidupq_x_wb_u16(__a,  __imm, __p) __arm_vidupq_x_wb_u16(__a,  __imm, __p)
-#define vidupq_x_wb_u32(__a,  __imm, __p) __arm_vidupq_x_wb_u32(__a,  __imm, __p)
-#define viwdupq_x_n_u8(__a, __b,  __imm, __p) __arm_viwdupq_x_n_u8(__a, __b,  __imm, __p)
-#define viwdupq_x_n_u16(__a, __b,  __imm, __p) __arm_viwdupq_x_n_u16(__a, __b,  __imm, __p)
-#define viwdupq_x_n_u32(__a, __b,  __imm, __p) __arm_viwdupq_x_n_u32(__a, __b,  __imm, __p)
-#define viwdupq_x_wb_u8(__a, __b,  __imm, __p) __arm_viwdupq_x_wb_u8(__a, __b,  __imm, __p)
-#define viwdupq_x_wb_u16(__a, __b,  __imm, __p) __arm_viwdupq_x_wb_u16(__a, __b,  __imm, __p)
-#define viwdupq_x_wb_u32(__a, __b,  __imm, __p) __arm_viwdupq_x_wb_u32(__a, __b,  __imm, __p)
-#define vbicq_x_s8(__a, __b, __p) __arm_vbicq_x_s8(__a, __b, __p)
-#define vbicq_x_s16(__a, __b, __p) __arm_vbicq_x_s16(__a, __b, __p)
-#define vbicq_x_s32(__a, __b, __p) __arm_vbicq_x_s32(__a, __b, __p)
-#define vbicq_x_u8(__a, __b, __p) __arm_vbicq_x_u8(__a, __b, __p)
-#define vbicq_x_u16(__a, __b, __p) __arm_vbicq_x_u16(__a, __b, __p)
-#define vbicq_x_u32(__a, __b, __p) __arm_vbicq_x_u32(__a, __b, __p)
-#define vornq_x_s8(__a, __b, __p) __arm_vornq_x_s8(__a, __b, __p)
-#define vornq_x_s16(__a, __b, __p) __arm_vornq_x_s16(__a, __b, __p)
-#define vornq_x_s32(__a, __b, __p) __arm_vornq_x_s32(__a, __b, __p)
-#define vornq_x_u8(__a, __b, __p) __arm_vornq_x_u8(__a, __b, __p)
-#define vornq_x_u16(__a, __b, __p) __arm_vornq_x_u16(__a, __b, __p)
-#define vornq_x_u32(__a, __b, __p) __arm_vornq_x_u32(__a, __b, __p)
-#define vcvtaq_x_s16_f16(__a, __p) __arm_vcvtaq_x_s16_f16(__a, __p)
-#define vcvtaq_x_s32_f32(__a, __p) __arm_vcvtaq_x_s32_f32(__a, __p)
-#define vcvtaq_x_u16_f16(__a, __p) __arm_vcvtaq_x_u16_f16(__a, __p)
-#define vcvtaq_x_u32_f32(__a, __p) __arm_vcvtaq_x_u32_f32(__a, __p)
-#define vcvtnq_x_s16_f16(__a, __p) __arm_vcvtnq_x_s16_f16(__a, __p)
-#define vcvtnq_x_s32_f32(__a, __p) __arm_vcvtnq_x_s32_f32(__a, __p)
-#define vcvtnq_x_u16_f16(__a, __p) __arm_vcvtnq_x_u16_f16(__a, __p)
-#define vcvtnq_x_u32_f32(__a, __p) __arm_vcvtnq_x_u32_f32(__a, __p)
-#define vcvtpq_x_s16_f16(__a, __p) __arm_vcvtpq_x_s16_f16(__a, __p)
-#define vcvtpq_x_s32_f32(__a, __p) __arm_vcvtpq_x_s32_f32(__a, __p)
-#define vcvtpq_x_u16_f16(__a, __p) __arm_vcvtpq_x_u16_f16(__a, __p)
-#define vcvtpq_x_u32_f32(__a, __p) __arm_vcvtpq_x_u32_f32(__a, __p)
-#define vcvtmq_x_s16_f16(__a, __p) __arm_vcvtmq_x_s16_f16(__a, __p)
-#define vcvtmq_x_s32_f32(__a, __p) __arm_vcvtmq_x_s32_f32(__a, __p)
-#define vcvtmq_x_u16_f16(__a, __p) __arm_vcvtmq_x_u16_f16(__a, __p)
-#define vcvtmq_x_u32_f32(__a, __p) __arm_vcvtmq_x_u32_f32(__a, __p)
-#define vcvtbq_x_f32_f16(__a, __p) __arm_vcvtbq_x_f32_f16(__a, __p)
-#define vcvttq_x_f32_f16(__a, __p) __arm_vcvttq_x_f32_f16(__a, __p)
-#define vcvtq_x_f16_u16(__a, __p) __arm_vcvtq_x_f16_u16(__a, __p)
-#define vcvtq_x_f16_s16(__a, __p) __arm_vcvtq_x_f16_s16(__a, __p)
-#define vcvtq_x_f32_s32(__a, __p) __arm_vcvtq_x_f32_s32(__a, __p)
-#define vcvtq_x_f32_u32(__a, __p) __arm_vcvtq_x_f32_u32(__a, __p)
-#define vcvtq_x_n_f16_s16(__a,  __imm6, __p) __arm_vcvtq_x_n_f16_s16(__a,  __imm6, __p)
-#define vcvtq_x_n_f16_u16(__a,  __imm6, __p) __arm_vcvtq_x_n_f16_u16(__a,  __imm6, __p)
-#define vcvtq_x_n_f32_s32(__a,  __imm6, __p) __arm_vcvtq_x_n_f32_s32(__a,  __imm6, __p)
-#define vcvtq_x_n_f32_u32(__a,  __imm6, __p) __arm_vcvtq_x_n_f32_u32(__a,  __imm6, __p)
-#define vcvtq_x_s16_f16(__a, __p) __arm_vcvtq_x_s16_f16(__a, __p)
-#define vcvtq_x_s32_f32(__a, __p) __arm_vcvtq_x_s32_f32(__a, __p)
-#define vcvtq_x_u16_f16(__a, __p) __arm_vcvtq_x_u16_f16(__a, __p)
-#define vcvtq_x_u32_f32(__a, __p) __arm_vcvtq_x_u32_f32(__a, __p)
-#define vcvtq_x_n_s16_f16(__a,  __imm6, __p) __arm_vcvtq_x_n_s16_f16(__a,  __imm6, __p)
-#define vcvtq_x_n_s32_f32(__a,  __imm6, __p) __arm_vcvtq_x_n_s32_f32(__a,  __imm6, __p)
-#define vcvtq_x_n_u16_f16(__a,  __imm6, __p) __arm_vcvtq_x_n_u16_f16(__a,  __imm6, __p)
-#define vcvtq_x_n_u32_f32(__a,  __imm6, __p) __arm_vcvtq_x_n_u32_f32(__a,  __imm6, __p)
-#define vbicq_x_f16(__a, __b, __p) __arm_vbicq_x_f16(__a, __b, __p)
-#define vbicq_x_f32(__a, __b, __p) __arm_vbicq_x_f32(__a, __b, __p)
-#define vornq_x_f16(__a, __b, __p) __arm_vornq_x_f16(__a, __b, __p)
-#define vornq_x_f32(__a, __b, __p) __arm_vornq_x_f32(__a, __b, __p)
-#define vadciq_s32(__a, __b,  __carry_out) __arm_vadciq_s32(__a, __b,  __carry_out)
-#define vadciq_u32(__a, __b,  __carry_out) __arm_vadciq_u32(__a, __b,  __carry_out)
-#define vadciq_m_s32(__inactive, __a, __b,  __carry_out, __p) __arm_vadciq_m_s32(__inactive, __a, __b,  __carry_out, __p)
-#define vadciq_m_u32(__inactive, __a, __b,  __carry_out, __p) __arm_vadciq_m_u32(__inactive, __a, __b,  __carry_out, __p)
-#define vadcq_s32(__a, __b,  __carry) __arm_vadcq_s32(__a, __b,  __carry)
-#define vadcq_u32(__a, __b,  __carry) __arm_vadcq_u32(__a, __b,  __carry)
-#define vadcq_m_s32(__inactive, __a, __b,  __carry, __p) __arm_vadcq_m_s32(__inactive, __a, __b,  __carry, __p)
-#define vadcq_m_u32(__inactive, __a, __b,  __carry, __p) __arm_vadcq_m_u32(__inactive, __a, __b,  __carry, __p)
-#define vsbciq_s32(__a, __b,  __carry_out) __arm_vsbciq_s32(__a, __b,  __carry_out)
-#define vsbciq_u32(__a, __b,  __carry_out) __arm_vsbciq_u32(__a, __b,  __carry_out)
-#define vsbciq_m_s32(__inactive, __a, __b,  __carry_out, __p) __arm_vsbciq_m_s32(__inactive, __a, __b,  __carry_out, __p)
-#define vsbciq_m_u32(__inactive, __a, __b,  __carry_out, __p) __arm_vsbciq_m_u32(__inactive, __a, __b,  __carry_out, __p)
-#define vsbcq_s32(__a, __b,  __carry) __arm_vsbcq_s32(__a, __b,  __carry)
-#define vsbcq_u32(__a, __b,  __carry) __arm_vsbcq_u32(__a, __b,  __carry)
-#define vsbcq_m_s32(__inactive, __a, __b,  __carry, __p) __arm_vsbcq_m_s32(__inactive, __a, __b,  __carry, __p)
-#define vsbcq_m_u32(__inactive, __a, __b,  __carry, __p) __arm_vsbcq_m_u32(__inactive, __a, __b,  __carry, __p)
-#define vst1q_p_u8(__addr, __value, __p) __arm_vst1q_p_u8(__addr, __value, __p)
-#define vst1q_p_s8(__addr, __value, __p) __arm_vst1q_p_s8(__addr, __value, __p)
-#define vst2q_s8(__addr, __value) __arm_vst2q_s8(__addr, __value)
-#define vst2q_u8(__addr, __value) __arm_vst2q_u8(__addr, __value)
-#define vld1q_z_u8(__base, __p) __arm_vld1q_z_u8(__base, __p)
-#define vld1q_z_s8(__base, __p) __arm_vld1q_z_s8(__base, __p)
-#define vld2q_s8(__addr) __arm_vld2q_s8(__addr)
-#define vld2q_u8(__addr) __arm_vld2q_u8(__addr)
-#define vld4q_s8(__addr) __arm_vld4q_s8(__addr)
-#define vld4q_u8(__addr) __arm_vld4q_u8(__addr)
-#define vst1q_p_u16(__addr, __value, __p) __arm_vst1q_p_u16(__addr, __value, __p)
-#define vst1q_p_s16(__addr, __value, __p) __arm_vst1q_p_s16(__addr, __value, __p)
-#define vst2q_s16(__addr, __value) __arm_vst2q_s16(__addr, __value)
-#define vst2q_u16(__addr, __value) __arm_vst2q_u16(__addr, __value)
-#define vld1q_z_u16(__base, __p) __arm_vld1q_z_u16(__base, __p)
-#define vld1q_z_s16(__base, __p) __arm_vld1q_z_s16(__base, __p)
-#define vld2q_s16(__addr) __arm_vld2q_s16(__addr)
-#define vld2q_u16(__addr) __arm_vld2q_u16(__addr)
-#define vld4q_s16(__addr) __arm_vld4q_s16(__addr)
-#define vld4q_u16(__addr) __arm_vld4q_u16(__addr)
-#define vst1q_p_u32(__addr, __value, __p) __arm_vst1q_p_u32(__addr, __value, __p)
-#define vst1q_p_s32(__addr, __value, __p) __arm_vst1q_p_s32(__addr, __value, __p)
-#define vst2q_s32(__addr, __value) __arm_vst2q_s32(__addr, __value)
-#define vst2q_u32(__addr, __value) __arm_vst2q_u32(__addr, __value)
-#define vld1q_z_u32(__base, __p) __arm_vld1q_z_u32(__base, __p)
-#define vld1q_z_s32(__base, __p) __arm_vld1q_z_s32(__base, __p)
-#define vld2q_s32(__addr) __arm_vld2q_s32(__addr)
-#define vld2q_u32(__addr) __arm_vld2q_u32(__addr)
-#define vld4q_s32(__addr) __arm_vld4q_s32(__addr)
-#define vld4q_u32(__addr) __arm_vld4q_u32(__addr)
-#define vld4q_f16(__addr) __arm_vld4q_f16(__addr)
-#define vld2q_f16(__addr) __arm_vld2q_f16(__addr)
-#define vld1q_z_f16(__base, __p) __arm_vld1q_z_f16(__base, __p)
-#define vst2q_f16(__addr, __value) __arm_vst2q_f16(__addr, __value)
-#define vst1q_p_f16(__addr, __value, __p) __arm_vst1q_p_f16(__addr, __value, __p)
-#define vld4q_f32(__addr) __arm_vld4q_f32(__addr)
-#define vld2q_f32(__addr) __arm_vld2q_f32(__addr)
-#define vld1q_z_f32(__base, __p) __arm_vld1q_z_f32(__base, __p)
-#define vst2q_f32(__addr, __value) __arm_vst2q_f32(__addr, __value)
-#define vst1q_p_f32(__addr, __value, __p) __arm_vst1q_p_f32(__addr, __value, __p)
 #define vsetq_lane_f16(__a, __b,  __idx) __arm_vsetq_lane_f16(__a, __b,  __idx)
 #define vsetq_lane_f32(__a, __b,  __idx) __arm_vsetq_lane_f32(__a, __b,  __idx)
 #define vsetq_lane_s16(__a, __b,  __idx) __arm_vsetq_lane_s16(__a, __b,  __idx)
@@ -712,12 +97,6 @@
 #define urshrl(__p0, __p1) __arm_urshrl(__p0, __p1)
 #define lsll(__p0, __p1) __arm_lsll(__p0, __p1)
 #define asrl(__p0, __p1) __arm_asrl(__p0, __p1)
-#define vshlcq_m_s8(__a,  __b,  __imm, __p) __arm_vshlcq_m_s8(__a,  __b,  __imm, __p)
-#define vshlcq_m_u8(__a,  __b,  __imm, __p) __arm_vshlcq_m_u8(__a,  __b,  __imm, __p)
-#define vshlcq_m_s16(__a,  __b,  __imm, __p) __arm_vshlcq_m_s16(__a,  __b,  __imm, __p)
-#define vshlcq_m_u16(__a,  __b,  __imm, __p) __arm_vshlcq_m_u16(__a,  __b,  __imm, __p)
-#define vshlcq_m_s32(__a,  __b,  __imm, __p) __arm_vshlcq_m_s32(__a,  __b,  __imm, __p)
-#define vshlcq_m_u32(__a,  __b,  __imm, __p) __arm_vshlcq_m_u32(__a,  __b,  __imm, __p)
 #endif
 
 /* For big-endian, GCC's vector indices are reversed within each 64 bits
@@ -732,88 +111,6 @@
   __builtin_arm_lane_check (__ARM_NUM_LANES(__vec),     \
 			    __ARM_LANEQ(__vec, __idx))
 
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst4q_s8 (int8_t * __addr, int8x16x4_t __value)
-{
-  union { int8x16x4_t __i; __builtin_neon_xi __o; } __rv;
-  __rv.__i = __value;
-  __builtin_mve_vst4qv16qi ((__builtin_neon_qi *) __addr, __rv.__o);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst4q_s16 (int16_t * __addr, int16x8x4_t __value)
-{
-  union { int16x8x4_t __i; __builtin_neon_xi __o; } __rv;
-  __rv.__i = __value;
-  __builtin_mve_vst4qv8hi ((__builtin_neon_hi *) __addr, __rv.__o);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst4q_s32 (int32_t * __addr, int32x4x4_t __value)
-{
-  union { int32x4x4_t __i; __builtin_neon_xi __o; } __rv;
-  __rv.__i = __value;
-  __builtin_mve_vst4qv4si ((__builtin_neon_si *) __addr, __rv.__o);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst4q_u8 (uint8_t * __addr, uint8x16x4_t __value)
-{
-  union { uint8x16x4_t __i; __builtin_neon_xi __o; } __rv;
-  __rv.__i = __value;
-  __builtin_mve_vst4qv16qi ((__builtin_neon_qi *) __addr, __rv.__o);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst4q_u16 (uint16_t * __addr, uint16x8x4_t __value)
-{
-  union { uint16x8x4_t __i; __builtin_neon_xi __o; } __rv;
-  __rv.__i = __value;
-  __builtin_mve_vst4qv8hi ((__builtin_neon_hi *) __addr, __rv.__o);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst4q_u32 (uint32_t * __addr, uint32x4x4_t __value)
-{
-  union { uint32x4x4_t __i; __builtin_neon_xi __o; } __rv;
-  __rv.__i = __value;
-  __builtin_mve_vst4qv4si ((__builtin_neon_si *) __addr, __rv.__o);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vctp16q (uint32_t __a)
-{
-  return __builtin_mve_vctp16qv8bi (__a);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vctp32q (uint32_t __a)
-{
-  return __builtin_mve_vctp32qv4bi (__a);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vctp64q (uint32_t __a)
-{
-  return __builtin_mve_vctp64qv2qi (__a);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vctp8q (uint32_t __a)
-{
-  return __builtin_mve_vctp8qv16bi (__a);
-}
-
 __extension__ extern __inline mve_pred16_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vpnot (mve_pred16_t __a)
@@ -821,2621 +118,6 @@ __arm_vpnot (mve_pred16_t __a)
   return __builtin_mve_vpnotv16bi (__a);
 }
 
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq_u8 (uint8x16_t __a, uint8x16_t __b)
-{
-  return __builtin_mve_vornq_uv16qi (__a, __b);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_u8 (uint8x16_t __a, uint8x16_t __b)
-{
-  return __builtin_mve_vbicq_uv16qi (__a, __b);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq_s8 (int8x16_t __a, int8x16_t __b)
-{
-  return __builtin_mve_vornq_sv16qi (__a, __b);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_s8 (int8x16_t __a, int8x16_t __b)
-{
-  return __builtin_mve_vbicq_sv16qi (__a, __b);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq_u16 (uint16x8_t __a, uint16x8_t __b)
-{
-  return __builtin_mve_vornq_uv8hi (__a, __b);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_u16 (uint16x8_t __a, uint16x8_t __b)
-{
-  return __builtin_mve_vbicq_uv8hi (__a, __b);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq_s16 (int16x8_t __a, int16x8_t __b)
-{
-  return __builtin_mve_vornq_sv8hi (__a, __b);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_s16 (int16x8_t __a, int16x8_t __b)
-{
-  return __builtin_mve_vbicq_sv8hi (__a, __b);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq_u32 (uint32x4_t __a, uint32x4_t __b)
-{
-  return __builtin_mve_vornq_uv4si (__a, __b);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_u32 (uint32x4_t __a, uint32x4_t __b)
-{
-  return __builtin_mve_vbicq_uv4si (__a, __b);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq_s32 (int32x4_t __a, int32x4_t __b)
-{
-  return __builtin_mve_vornq_sv4si (__a, __b);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_s32 (int32x4_t __a, int32x4_t __b)
-{
-  return __builtin_mve_vbicq_sv4si (__a, __b);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_n_u16 (uint16x8_t __a, const int __imm)
-{
-  return __builtin_mve_vbicq_n_uv8hi (__a, __imm);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_n_s16 (int16x8_t __a, const int __imm)
-{
-  return __builtin_mve_vbicq_n_sv8hi (__a, __imm);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_n_u32 (uint32x4_t __a, const int __imm)
-{
-  return __builtin_mve_vbicq_n_uv4si (__a, __imm);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_n_s32 (int32x4_t __a, const int __imm)
-{
-  return __builtin_mve_vbicq_n_sv4si (__a, __imm);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vctp8q_m (uint32_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vctp8q_mv16bi (__a, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vctp64q_m (uint32_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vctp64q_mv2qi (__a, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vctp32q_m (uint32_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vctp32q_mv4bi (__a, __p);
-}
-
-__extension__ extern __inline mve_pred16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vctp16q_m (uint32_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vctp16q_mv8bi (__a, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_m_n_s16 (int16x8_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vbicq_m_n_sv8hi (__a, __imm, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_m_n_s32 (int32x4_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vbicq_m_n_sv4si (__a, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_m_n_u16 (uint16x8_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vbicq_m_n_uv8hi (__a, __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_m_n_u32 (uint32x4_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vbicq_m_n_uv4si (__a, __imm, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_s8 (int8x16_t __a, uint32_t * __b, const int __imm)
-{
-  int8x16_t __res = __builtin_mve_vshlcq_vec_sv16qi (__a, *__b, __imm);
-  *__b = __builtin_mve_vshlcq_carry_sv16qi (__a, *__b, __imm);
-  return __res;
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_u8 (uint8x16_t __a, uint32_t * __b, const int __imm)
-{
-  uint8x16_t __res = __builtin_mve_vshlcq_vec_uv16qi (__a, *__b, __imm);
-  *__b = __builtin_mve_vshlcq_carry_uv16qi (__a, *__b, __imm);
-  return __res;
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_s16 (int16x8_t __a, uint32_t * __b, const int __imm)
-{
-  int16x8_t __res = __builtin_mve_vshlcq_vec_sv8hi (__a, *__b, __imm);
-  *__b = __builtin_mve_vshlcq_carry_sv8hi (__a, *__b, __imm);
-  return __res;
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_u16 (uint16x8_t __a, uint32_t * __b, const int __imm)
-{
-  uint16x8_t __res = __builtin_mve_vshlcq_vec_uv8hi (__a, *__b, __imm);
-  *__b = __builtin_mve_vshlcq_carry_uv8hi (__a, *__b, __imm);
-  return __res;
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_s32 (int32x4_t __a, uint32_t * __b, const int __imm)
-{
-  int32x4_t __res = __builtin_mve_vshlcq_vec_sv4si (__a, *__b, __imm);
-  *__b = __builtin_mve_vshlcq_carry_sv4si (__a, *__b, __imm);
-  return __res;
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_u32 (uint32x4_t __a, uint32_t * __b, const int __imm)
-{
-  uint32x4_t __res = __builtin_mve_vshlcq_vec_uv4si (__a, *__b, __imm);
-  *__b = __builtin_mve_vshlcq_carry_uv4si (__a, *__b, __imm);
-  return __res;
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_m_s8 (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vbicq_m_sv16qi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_m_s32 (int32x4_t __inactive, int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vbicq_m_sv4si (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_m_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vbicq_m_sv8hi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_m_u8 (uint8x16_t __inactive, uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vbicq_m_uv16qi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_m_u32 (uint32x4_t __inactive, uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vbicq_m_uv4si (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_m_u16 (uint16x8_t __inactive, uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vbicq_m_uv8hi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq_m_s8 (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vornq_m_sv16qi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq_m_s32 (int32x4_t __inactive, int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vornq_m_sv4si (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq_m_s16 (int16x8_t __inactive, int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vornq_m_sv8hi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq_m_u8 (uint8x16_t __inactive, uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vornq_m_uv16qi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq_m_u32 (uint32x4_t __inactive, uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vornq_m_uv4si (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq_m_u16 (uint16x8_t __inactive, uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vornq_m_uv8hi (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_scatter_offset_s8 (int8_t * __base, uint8x16_t __offset, int8x16_t __value)
-{
-  __builtin_mve_vstrbq_scatter_offset_sv16qi ((__builtin_neon_qi *) __base, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_scatter_offset_s32 (int8_t * __base, uint32x4_t __offset, int32x4_t __value)
-{
-  __builtin_mve_vstrbq_scatter_offset_sv4si ((__builtin_neon_qi *) __base, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_scatter_offset_s16 (int8_t * __base, uint16x8_t __offset, int16x8_t __value)
-{
-  __builtin_mve_vstrbq_scatter_offset_sv8hi ((__builtin_neon_qi *) __base, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_scatter_offset_u8 (uint8_t * __base, uint8x16_t __offset, uint8x16_t __value)
-{
-  __builtin_mve_vstrbq_scatter_offset_uv16qi ((__builtin_neon_qi *) __base, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_scatter_offset_u32 (uint8_t * __base, uint32x4_t __offset, uint32x4_t __value)
-{
-  __builtin_mve_vstrbq_scatter_offset_uv4si ((__builtin_neon_qi *) __base, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_scatter_offset_u16 (uint8_t * __base, uint16x8_t __offset, uint16x8_t __value)
-{
-  __builtin_mve_vstrbq_scatter_offset_uv8hi ((__builtin_neon_qi *) __base, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_s8 (int8_t * __addr, int8x16_t __value)
-{
-  __builtin_mve_vstrbq_sv16qi ((__builtin_neon_qi *) __addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_s32 (int8_t * __addr, int32x4_t __value)
-{
-  __builtin_mve_vstrbq_sv4si ((__builtin_neon_qi *) __addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_s16 (int8_t * __addr, int16x8_t __value)
-{
-  __builtin_mve_vstrbq_sv8hi ((__builtin_neon_qi *) __addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_u8 (uint8_t * __addr, uint8x16_t __value)
-{
-  __builtin_mve_vstrbq_uv16qi ((__builtin_neon_qi *) __addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_u32 (uint8_t * __addr, uint32x4_t __value)
-{
-  __builtin_mve_vstrbq_uv4si ((__builtin_neon_qi *) __addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_u16 (uint8_t * __addr, uint16x8_t __value)
-{
-  __builtin_mve_vstrbq_uv8hi ((__builtin_neon_qi *) __addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_s32 (uint32x4_t __addr, const int __offset, int32x4_t __value)
-{
-  __builtin_mve_vstrwq_scatter_base_sv4si (__addr, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_u32 (uint32x4_t __addr, const int __offset, uint32x4_t __value)
-{
-  __builtin_mve_vstrwq_scatter_base_uv4si (__addr, __offset, __value);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_gather_offset_u8 (uint8_t const * __base, uint8x16_t __offset)
-{
-  return __builtin_mve_vldrbq_gather_offset_uv16qi ((__builtin_neon_qi *) __base, __offset);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_gather_offset_s8 (int8_t const * __base, uint8x16_t __offset)
-{
-  return __builtin_mve_vldrbq_gather_offset_sv16qi ((__builtin_neon_qi *) __base, __offset);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_s8 (int8_t const * __base)
-{
-  return __builtin_mve_vldrbq_sv16qi ((__builtin_neon_qi *) __base);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_u8 (uint8_t const * __base)
-{
-  return __builtin_mve_vldrbq_uv16qi ((__builtin_neon_qi *) __base);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_gather_offset_u16 (uint8_t const * __base, uint16x8_t __offset)
-{
-  return __builtin_mve_vldrbq_gather_offset_uv8hi ((__builtin_neon_qi *) __base, __offset);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_gather_offset_s16 (int8_t const * __base, uint16x8_t __offset)
-{
-  return __builtin_mve_vldrbq_gather_offset_sv8hi ((__builtin_neon_qi *) __base, __offset);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_s16 (int8_t const * __base)
-{
-  return __builtin_mve_vldrbq_sv8hi ((__builtin_neon_qi *) __base);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_u16 (uint8_t const * __base)
-{
-  return __builtin_mve_vldrbq_uv8hi ((__builtin_neon_qi *) __base);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_gather_offset_u32 (uint8_t const * __base, uint32x4_t __offset)
-{
-  return __builtin_mve_vldrbq_gather_offset_uv4si ((__builtin_neon_qi *) __base, __offset);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_gather_offset_s32 (int8_t const * __base, uint32x4_t __offset)
-{
-  return __builtin_mve_vldrbq_gather_offset_sv4si ((__builtin_neon_qi *) __base, __offset);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_s32 (int8_t const * __base)
-{
-  return __builtin_mve_vldrbq_sv4si ((__builtin_neon_qi *) __base);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_u32 (uint8_t const * __base)
-{
-  return __builtin_mve_vldrbq_uv4si ((__builtin_neon_qi *) __base);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_gather_base_s32 (uint32x4_t __addr, const int __offset)
-{
-  return __builtin_mve_vldrwq_gather_base_sv4si (__addr, __offset);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_gather_base_u32 (uint32x4_t __addr, const int __offset)
-{
-  return __builtin_mve_vldrwq_gather_base_uv4si (__addr, __offset);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_p_s8 (int8_t * __addr, int8x16_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrbq_p_sv16qi ((__builtin_neon_qi *) __addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_p_s32 (int8_t * __addr, int32x4_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrbq_p_sv4si ((__builtin_neon_qi *) __addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_p_s16 (int8_t * __addr, int16x8_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrbq_p_sv8hi ((__builtin_neon_qi *) __addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_p_u8 (uint8_t * __addr, uint8x16_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrbq_p_uv16qi ((__builtin_neon_qi *) __addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_p_u32 (uint8_t * __addr, uint32x4_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrbq_p_uv4si ((__builtin_neon_qi *) __addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_p_u16 (uint8_t * __addr, uint16x8_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrbq_p_uv8hi ((__builtin_neon_qi *) __addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_scatter_offset_p_s8 (int8_t * __base, uint8x16_t __offset, int8x16_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrbq_scatter_offset_p_sv16qi ((__builtin_neon_qi *) __base, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_scatter_offset_p_s32 (int8_t * __base, uint32x4_t __offset, int32x4_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrbq_scatter_offset_p_sv4si ((__builtin_neon_qi *) __base, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_scatter_offset_p_s16 (int8_t * __base, uint16x8_t __offset, int16x8_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrbq_scatter_offset_p_sv8hi ((__builtin_neon_qi *) __base, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_scatter_offset_p_u8 (uint8_t * __base, uint8x16_t __offset, uint8x16_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrbq_scatter_offset_p_uv16qi ((__builtin_neon_qi *) __base, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_scatter_offset_p_u32 (uint8_t * __base, uint32x4_t __offset, uint32x4_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrbq_scatter_offset_p_uv4si ((__builtin_neon_qi *) __base, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_scatter_offset_p_u16 (uint8_t * __base, uint16x8_t __offset, uint16x8_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrbq_scatter_offset_p_uv8hi ((__builtin_neon_qi *) __base, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_p_s32 (uint32x4_t __addr, const int __offset, int32x4_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrwq_scatter_base_p_sv4si (__addr, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_p_u32 (uint32x4_t __addr, const int __offset, uint32x4_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrwq_scatter_base_p_uv4si (__addr, __offset, __value, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_gather_offset_z_s8 (int8_t const * __base, uint8x16_t __offset, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrbq_gather_offset_z_sv16qi ((__builtin_neon_qi *) __base, __offset, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_gather_offset_z_s32 (int8_t const * __base, uint32x4_t __offset, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrbq_gather_offset_z_sv4si ((__builtin_neon_qi *) __base, __offset, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_gather_offset_z_s16 (int8_t const * __base, uint16x8_t __offset, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrbq_gather_offset_z_sv8hi ((__builtin_neon_qi *) __base, __offset, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_gather_offset_z_u8 (uint8_t const * __base, uint8x16_t __offset, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrbq_gather_offset_z_uv16qi ((__builtin_neon_qi *) __base, __offset, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_gather_offset_z_u32 (uint8_t const * __base, uint32x4_t __offset, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrbq_gather_offset_z_uv4si ((__builtin_neon_qi *) __base, __offset, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_gather_offset_z_u16 (uint8_t const * __base, uint16x8_t __offset, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrbq_gather_offset_z_uv8hi ((__builtin_neon_qi *) __base, __offset, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_z_s8 (int8_t const * __base, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrbq_z_sv16qi ((__builtin_neon_qi *) __base, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_z_s32 (int8_t const * __base, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrbq_z_sv4si ((__builtin_neon_qi *) __base, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_z_s16 (int8_t const * __base, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrbq_z_sv8hi ((__builtin_neon_qi *) __base, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_z_u8 (uint8_t const * __base, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrbq_z_uv16qi ((__builtin_neon_qi *) __base, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_z_u32 (uint8_t const * __base, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrbq_z_uv4si ((__builtin_neon_qi *) __base, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_z_u16 (uint8_t const * __base, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrbq_z_uv8hi ((__builtin_neon_qi *) __base, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_gather_base_z_s32 (uint32x4_t __addr, const int __offset, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrwq_gather_base_z_sv4si (__addr, __offset, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_gather_base_z_u32 (uint32x4_t __addr, const int __offset, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrwq_gather_base_z_uv4si (__addr, __offset, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_offset_s32 (int16_t const * __base, uint32x4_t __offset)
-{
-  return __builtin_mve_vldrhq_gather_offset_sv4si ((__builtin_neon_hi *) __base, __offset);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_offset_s16 (int16_t const * __base, uint16x8_t __offset)
-{
-  return __builtin_mve_vldrhq_gather_offset_sv8hi ((__builtin_neon_hi *) __base, __offset);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_offset_u32 (uint16_t const * __base, uint32x4_t __offset)
-{
-  return __builtin_mve_vldrhq_gather_offset_uv4si ((__builtin_neon_hi *) __base, __offset);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_offset_u16 (uint16_t const * __base, uint16x8_t __offset)
-{
-  return __builtin_mve_vldrhq_gather_offset_uv8hi ((__builtin_neon_hi *) __base, __offset);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_offset_z_s32 (int16_t const * __base, uint32x4_t __offset, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrhq_gather_offset_z_sv4si ((__builtin_neon_hi *) __base, __offset, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_offset_z_s16 (int16_t const * __base, uint16x8_t __offset, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrhq_gather_offset_z_sv8hi ((__builtin_neon_hi *) __base, __offset, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_offset_z_u32 (uint16_t const * __base, uint32x4_t __offset, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrhq_gather_offset_z_uv4si ((__builtin_neon_hi *) __base, __offset, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_offset_z_u16 (uint16_t const * __base, uint16x8_t __offset, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrhq_gather_offset_z_uv8hi ((__builtin_neon_hi *) __base, __offset, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_shifted_offset_s32 (int16_t const * __base, uint32x4_t __offset)
-{
-  return __builtin_mve_vldrhq_gather_shifted_offset_sv4si ((__builtin_neon_hi *) __base, __offset);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_shifted_offset_s16 (int16_t const * __base, uint16x8_t __offset)
-{
-  return __builtin_mve_vldrhq_gather_shifted_offset_sv8hi ((__builtin_neon_hi *) __base, __offset);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_shifted_offset_u32 (uint16_t const * __base, uint32x4_t __offset)
-{
-  return __builtin_mve_vldrhq_gather_shifted_offset_uv4si ((__builtin_neon_hi *) __base, __offset);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_shifted_offset_u16 (uint16_t const * __base, uint16x8_t __offset)
-{
-  return __builtin_mve_vldrhq_gather_shifted_offset_uv8hi ((__builtin_neon_hi *) __base, __offset);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_shifted_offset_z_s32 (int16_t const * __base, uint32x4_t __offset, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrhq_gather_shifted_offset_z_sv4si ((__builtin_neon_hi *) __base, __offset, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_shifted_offset_z_s16 (int16_t const * __base, uint16x8_t __offset, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrhq_gather_shifted_offset_z_sv8hi ((__builtin_neon_hi *) __base, __offset, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_shifted_offset_z_u32 (uint16_t const * __base, uint32x4_t __offset, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrhq_gather_shifted_offset_z_uv4si ((__builtin_neon_hi *) __base, __offset, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_shifted_offset_z_u16 (uint16_t const * __base, uint16x8_t __offset, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrhq_gather_shifted_offset_z_uv8hi ((__builtin_neon_hi *) __base, __offset, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_s32 (int16_t const * __base)
-{
-  return __builtin_mve_vldrhq_sv4si ((__builtin_neon_hi *) __base);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_s16 (int16_t const * __base)
-{
-  return __builtin_mve_vldrhq_sv8hi ((__builtin_neon_hi *) __base);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_u32 (uint16_t const * __base)
-{
-  return __builtin_mve_vldrhq_uv4si ((__builtin_neon_hi *) __base);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_u16 (uint16_t const * __base)
-{
-  return __builtin_mve_vldrhq_uv8hi ((__builtin_neon_hi *) __base);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_z_s32 (int16_t const * __base, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrhq_z_sv4si ((__builtin_neon_hi *) __base, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_z_s16 (int16_t const * __base, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrhq_z_sv8hi ((__builtin_neon_hi *) __base, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_z_u32 (uint16_t const * __base, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrhq_z_uv4si ((__builtin_neon_hi *) __base, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_z_u16 (uint16_t const * __base, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrhq_z_uv8hi ((__builtin_neon_hi *) __base, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_s32 (int32_t const * __base)
-{
-  return __builtin_mve_vldrwq_sv4si ((__builtin_neon_si *) __base);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_u32 (uint32_t const * __base)
-{
-  return __builtin_mve_vldrwq_uv4si ((__builtin_neon_si *) __base);
-}
-
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_z_s32 (int32_t const * __base, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrwq_z_sv4si ((__builtin_neon_si *) __base, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_z_u32 (uint32_t const * __base, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrwq_z_uv4si ((__builtin_neon_si *) __base, __p);
-}
-
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrdq_gather_base_s64 (uint64x2_t __addr, const int __offset)
-{
-  return __builtin_mve_vldrdq_gather_base_sv2di (__addr, __offset);
-}
-
-__extension__ extern __inline uint64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrdq_gather_base_u64 (uint64x2_t __addr, const int __offset)
-{
-  return __builtin_mve_vldrdq_gather_base_uv2di (__addr, __offset);
-}
-
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrdq_gather_base_z_s64 (uint64x2_t __addr, const int __offset, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrdq_gather_base_z_sv2di (__addr, __offset, __p);
-}
-
-__extension__ extern __inline uint64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrdq_gather_base_z_u64 (uint64x2_t __addr, const int __offset, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrdq_gather_base_z_uv2di (__addr, __offset, __p);
-}
-
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrdq_gather_offset_s64 (int64_t const * __base, uint64x2_t __offset)
-{
-  return __builtin_mve_vldrdq_gather_offset_sv2di ((__builtin_neon_di *) __base, __offset);
-}
-
-__extension__ extern __inline uint64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrdq_gather_offset_u64 (uint64_t const * __base, uint64x2_t __offset)
-{
-  return __builtin_mve_vldrdq_gather_offset_uv2di ((__builtin_neon_di *) __base, __offset);
-}
-
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrdq_gather_offset_z_s64 (int64_t const * __base, uint64x2_t __offset, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrdq_gather_offset_z_sv2di ((__builtin_neon_di *) __base, __offset, __p);
-}
-
-
-__extension__ extern __inline uint64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrdq_gather_offset_z_u64 (uint64_t const * __base, uint64x2_t __offset, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrdq_gather_offset_z_uv2di ((__builtin_neon_di *) __base, __offset, __p);
-}
-
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrdq_gather_shifted_offset_s64 (int64_t const * __base, uint64x2_t __offset)
-{
-  return __builtin_mve_vldrdq_gather_shifted_offset_sv2di ((__builtin_neon_di *) __base, __offset);
-}
-
-__extension__ extern __inline uint64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrdq_gather_shifted_offset_u64 (uint64_t const * __base, uint64x2_t __offset)
-{
-  return __builtin_mve_vldrdq_gather_shifted_offset_uv2di ((__builtin_neon_di *) __base, __offset);
-}
-
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrdq_gather_shifted_offset_z_s64 (int64_t const * __base, uint64x2_t __offset, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrdq_gather_shifted_offset_z_sv2di ((__builtin_neon_di *) __base, __offset, __p);
-}
-
-__extension__ extern __inline uint64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrdq_gather_shifted_offset_z_u64 (uint64_t const * __base, uint64x2_t __offset, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrdq_gather_shifted_offset_z_uv2di ((__builtin_neon_di *) __base, __offset, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_gather_offset_s32 (int32_t const * __base, uint32x4_t __offset)
-{
-  return __builtin_mve_vldrwq_gather_offset_sv4si ((__builtin_neon_si *) __base, __offset);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_gather_offset_u32 (uint32_t const * __base, uint32x4_t __offset)
-{
-  return __builtin_mve_vldrwq_gather_offset_uv4si ((__builtin_neon_si *) __base, __offset);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_gather_offset_z_s32 (int32_t const * __base, uint32x4_t __offset, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrwq_gather_offset_z_sv4si ((__builtin_neon_si *) __base, __offset, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_gather_offset_z_u32 (uint32_t const * __base, uint32x4_t __offset, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrwq_gather_offset_z_uv4si ((__builtin_neon_si *) __base, __offset, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_gather_shifted_offset_s32 (int32_t const * __base, uint32x4_t __offset)
-{
-  return __builtin_mve_vldrwq_gather_shifted_offset_sv4si ((__builtin_neon_si *) __base, __offset);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_gather_shifted_offset_u32 (uint32_t const * __base, uint32x4_t __offset)
-{
-  return __builtin_mve_vldrwq_gather_shifted_offset_uv4si ((__builtin_neon_si *) __base, __offset);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_gather_shifted_offset_z_s32 (int32_t const * __base, uint32x4_t __offset, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrwq_gather_shifted_offset_z_sv4si ((__builtin_neon_si *) __base, __offset, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_gather_shifted_offset_z_u32 (uint32_t const * __base, uint32x4_t __offset, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrwq_gather_shifted_offset_z_uv4si ((__builtin_neon_si *) __base, __offset, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_scatter_offset_s32 (int16_t * __base, uint32x4_t __offset, int32x4_t __value)
-{
-  __builtin_mve_vstrhq_scatter_offset_sv4si ((__builtin_neon_hi *) __base, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_scatter_offset_s16 (int16_t * __base, uint16x8_t __offset, int16x8_t __value)
-{
-  __builtin_mve_vstrhq_scatter_offset_sv8hi ((__builtin_neon_hi *) __base, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_scatter_offset_u32 (uint16_t * __base, uint32x4_t __offset, uint32x4_t __value)
-{
-  __builtin_mve_vstrhq_scatter_offset_uv4si ((__builtin_neon_hi *) __base, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_scatter_offset_u16 (uint16_t * __base, uint16x8_t __offset, uint16x8_t __value)
-{
-  __builtin_mve_vstrhq_scatter_offset_uv8hi ((__builtin_neon_hi *) __base, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_scatter_offset_p_s32 (int16_t * __base, uint32x4_t __offset, int32x4_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrhq_scatter_offset_p_sv4si ((__builtin_neon_hi *) __base, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_scatter_offset_p_s16 (int16_t * __base, uint16x8_t __offset, int16x8_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrhq_scatter_offset_p_sv8hi ((__builtin_neon_hi *) __base, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_scatter_offset_p_u32 (uint16_t * __base, uint32x4_t __offset, uint32x4_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrhq_scatter_offset_p_uv4si ((__builtin_neon_hi *) __base, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_scatter_offset_p_u16 (uint16_t * __base, uint16x8_t __offset, uint16x8_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrhq_scatter_offset_p_uv8hi ((__builtin_neon_hi *) __base, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_scatter_shifted_offset_s32 (int16_t * __base, uint32x4_t __offset, int32x4_t __value)
-{
-  __builtin_mve_vstrhq_scatter_shifted_offset_sv4si ((__builtin_neon_hi *) __base, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_scatter_shifted_offset_s16 (int16_t * __base, uint16x8_t __offset, int16x8_t __value)
-{
-  __builtin_mve_vstrhq_scatter_shifted_offset_sv8hi ((__builtin_neon_hi *) __base, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_scatter_shifted_offset_u32 (uint16_t * __base, uint32x4_t __offset, uint32x4_t __value)
-{
-  __builtin_mve_vstrhq_scatter_shifted_offset_uv4si ((__builtin_neon_hi *) __base, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_scatter_shifted_offset_u16 (uint16_t * __base, uint16x8_t __offset, uint16x8_t __value)
-{
-  __builtin_mve_vstrhq_scatter_shifted_offset_uv8hi ((__builtin_neon_hi *) __base, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_scatter_shifted_offset_p_s32 (int16_t * __base, uint32x4_t __offset, int32x4_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrhq_scatter_shifted_offset_p_sv4si ((__builtin_neon_hi *) __base, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_scatter_shifted_offset_p_s16 (int16_t * __base, uint16x8_t __offset, int16x8_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrhq_scatter_shifted_offset_p_sv8hi ((__builtin_neon_hi *) __base, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_scatter_shifted_offset_p_u32 (uint16_t * __base, uint32x4_t __offset, uint32x4_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrhq_scatter_shifted_offset_p_uv4si ((__builtin_neon_hi *) __base, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_scatter_shifted_offset_p_u16 (uint16_t * __base, uint16x8_t __offset, uint16x8_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrhq_scatter_shifted_offset_p_uv8hi ((__builtin_neon_hi *) __base, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_s32 (int16_t * __addr, int32x4_t __value)
-{
-  __builtin_mve_vstrhq_sv4si ((__builtin_neon_hi *) __addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_s16 (int16_t * __addr, int16x8_t __value)
-{
-  __builtin_mve_vstrhq_sv8hi ((__builtin_neon_hi *) __addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_u32 (uint16_t * __addr, uint32x4_t __value)
-{
-  __builtin_mve_vstrhq_uv4si ((__builtin_neon_hi *) __addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_u16 (uint16_t * __addr, uint16x8_t __value)
-{
-  __builtin_mve_vstrhq_uv8hi ((__builtin_neon_hi *) __addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_p_s32 (int16_t * __addr, int32x4_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrhq_p_sv4si ((__builtin_neon_hi *) __addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_p_s16 (int16_t * __addr, int16x8_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrhq_p_sv8hi ((__builtin_neon_hi *) __addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_p_u32 (uint16_t * __addr, uint32x4_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrhq_p_uv4si ((__builtin_neon_hi *) __addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_p_u16 (uint16_t * __addr, uint16x8_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrhq_p_uv8hi ((__builtin_neon_hi *) __addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_s32 (int32_t * __addr, int32x4_t __value)
-{
-  __builtin_mve_vstrwq_sv4si ((__builtin_neon_si *) __addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_u32 (uint32_t * __addr, uint32x4_t __value)
-{
-  __builtin_mve_vstrwq_uv4si ((__builtin_neon_si *) __addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_p_s32 (int32_t * __addr, int32x4_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrwq_p_sv4si ((__builtin_neon_si *) __addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_p_u32 (uint32_t * __addr, uint32x4_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrwq_p_uv4si ((__builtin_neon_si *) __addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_base_p_s64 (uint64x2_t __addr, const int __offset, int64x2_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrdq_scatter_base_p_sv2di (__addr, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_base_p_u64 (uint64x2_t __addr, const int __offset, uint64x2_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrdq_scatter_base_p_uv2di (__addr, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_base_s64 (uint64x2_t __addr, const int __offset, int64x2_t __value)
-{
-  __builtin_mve_vstrdq_scatter_base_sv2di (__addr, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_base_u64 (uint64x2_t __addr, const int __offset, uint64x2_t __value)
-{
-  __builtin_mve_vstrdq_scatter_base_uv2di (__addr, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_offset_p_s64 (int64_t * __base, uint64x2_t __offset, int64x2_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrdq_scatter_offset_p_sv2di ((__builtin_neon_di *) __base, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_offset_p_u64 (uint64_t * __base, uint64x2_t __offset, uint64x2_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrdq_scatter_offset_p_uv2di ((__builtin_neon_di *) __base, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_offset_s64 (int64_t * __base, uint64x2_t __offset, int64x2_t __value)
-{
-  __builtin_mve_vstrdq_scatter_offset_sv2di ((__builtin_neon_di *) __base, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_offset_u64 (uint64_t * __base, uint64x2_t __offset, uint64x2_t __value)
-{
-  __builtin_mve_vstrdq_scatter_offset_uv2di ((__builtin_neon_di *) __base, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_shifted_offset_p_s64 (int64_t * __base, uint64x2_t __offset, int64x2_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrdq_scatter_shifted_offset_p_sv2di ((__builtin_neon_di *) __base, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_shifted_offset_p_u64 (uint64_t * __base, uint64x2_t __offset, uint64x2_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrdq_scatter_shifted_offset_p_uv2di ((__builtin_neon_di *) __base, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_shifted_offset_s64 (int64_t * __base, uint64x2_t __offset, int64x2_t __value)
-{
-  __builtin_mve_vstrdq_scatter_shifted_offset_sv2di ((__builtin_neon_di *) __base, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_shifted_offset_u64 (uint64_t * __base, uint64x2_t __offset, uint64x2_t __value)
-{
-  __builtin_mve_vstrdq_scatter_shifted_offset_uv2di ((__builtin_neon_di *) __base, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_offset_p_s32 (int32_t * __base, uint32x4_t __offset, int32x4_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrwq_scatter_offset_p_sv4si ((__builtin_neon_si *) __base, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_offset_p_u32 (uint32_t * __base, uint32x4_t __offset, uint32x4_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrwq_scatter_offset_p_uv4si ((__builtin_neon_si *) __base, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_offset_s32 (int32_t * __base, uint32x4_t __offset, int32x4_t __value)
-{
-  __builtin_mve_vstrwq_scatter_offset_sv4si ((__builtin_neon_si *) __base, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_offset_u32 (uint32_t * __base, uint32x4_t __offset, uint32x4_t __value)
-{
-  __builtin_mve_vstrwq_scatter_offset_uv4si ((__builtin_neon_si *) __base, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_shifted_offset_p_s32 (int32_t * __base, uint32x4_t __offset, int32x4_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrwq_scatter_shifted_offset_p_sv4si ((__builtin_neon_si *) __base, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_shifted_offset_p_u32 (uint32_t * __base, uint32x4_t __offset, uint32x4_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrwq_scatter_shifted_offset_p_uv4si ((__builtin_neon_si *) __base, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_shifted_offset_s32 (int32_t * __base, uint32x4_t __offset, int32x4_t __value)
-{
-  __builtin_mve_vstrwq_scatter_shifted_offset_sv4si ((__builtin_neon_si *) __base, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_shifted_offset_u32 (uint32_t * __base, uint32x4_t __offset, uint32x4_t __value)
-{
-  __builtin_mve_vstrwq_scatter_shifted_offset_uv4si ((__builtin_neon_si *) __base, __offset, __value);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vddupq_m_n_u8 (uint8x16_t __inactive, uint32_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vddupq_m_n_uv16qi (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vddupq_m_n_u32 (uint32x4_t __inactive, uint32_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vddupq_m_n_uv4si (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vddupq_m_n_u16 (uint16x8_t __inactive, uint32_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vddupq_m_n_uv8hi (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vddupq_m_wb_u8 (uint8x16_t __inactive, uint32_t * __a, const int __imm, mve_pred16_t __p)
-{
-  uint8x16_t __res = __builtin_mve_vddupq_m_n_uv16qi (__inactive, * __a, __imm, __p);
-  *__a -= __imm * 16u;
-  return __res;
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vddupq_m_wb_u16 (uint16x8_t __inactive, uint32_t * __a, const int __imm, mve_pred16_t __p)
-{
-  uint16x8_t __res = __builtin_mve_vddupq_m_n_uv8hi (__inactive, *__a, __imm, __p);
-  *__a -= __imm * 8u;
-  return __res;
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vddupq_m_wb_u32 (uint32x4_t __inactive, uint32_t * __a, const int __imm, mve_pred16_t __p)
-{
-  uint32x4_t __res = __builtin_mve_vddupq_m_n_uv4si (__inactive, *__a, __imm, __p);
-  *__a -= __imm * 4u;
-  return __res;
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vddupq_n_u8 (uint32_t __a, const int __imm)
-{
-  return __builtin_mve_vddupq_n_uv16qi (__a, __imm);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vddupq_n_u32 (uint32_t __a, const int __imm)
-{
-  return __builtin_mve_vddupq_n_uv4si (__a, __imm);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vddupq_n_u16 (uint32_t __a, const int __imm)
-{
-  return __builtin_mve_vddupq_n_uv8hi (__a, __imm);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdwdupq_m_n_u8 (uint8x16_t __inactive, uint32_t __a, uint32_t __b, const int __imm, mve_pred16_t __p)
-{
-  uint64_t __c = ((uint64_t) __b) << 32;
-  return __builtin_mve_vdwdupq_m_n_uv16qi (__inactive, __a, __c, __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdwdupq_m_n_u32 (uint32x4_t __inactive, uint32_t __a, uint32_t __b, const int __imm, mve_pred16_t __p)
-{
-  uint64_t __c = ((uint64_t) __b) << 32;
-  return __builtin_mve_vdwdupq_m_n_uv4si (__inactive, __a, __c, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdwdupq_m_n_u16 (uint16x8_t __inactive, uint32_t __a, uint32_t __b, const int __imm, mve_pred16_t __p)
-{
-  uint64_t __c = ((uint64_t) __b) << 32;
-  return __builtin_mve_vdwdupq_m_n_uv8hi (__inactive, __a, __c, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdwdupq_m_wb_u8 (uint8x16_t __inactive, uint32_t * __a, uint32_t __b, const int __imm, mve_pred16_t __p)
-{
-  uint64_t __c = ((uint64_t) __b) << 32;
-  uint8x16_t __res =  __builtin_mve_vdwdupq_m_n_uv16qi (__inactive, *__a, __c, __imm, __p);
-  *__a = __builtin_mve_vdwdupq_m_wb_uv16qi (__inactive, *__a, __c, __imm, __p);
-  return __res;
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdwdupq_m_wb_u32 (uint32x4_t __inactive, uint32_t * __a, uint32_t __b, const int __imm, mve_pred16_t __p)
-{
-  uint64_t __c = ((uint64_t) __b) << 32;
-  uint32x4_t __res =  __builtin_mve_vdwdupq_m_n_uv4si (__inactive, *__a, __c, __imm, __p);
-  *__a = __builtin_mve_vdwdupq_m_wb_uv4si (__inactive, *__a, __c, __imm, __p);
-  return __res;
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdwdupq_m_wb_u16 (uint16x8_t __inactive, uint32_t * __a, uint32_t __b, const int __imm, mve_pred16_t __p)
-{
-  uint64_t __c = ((uint64_t) __b) << 32;
-  uint16x8_t __res =  __builtin_mve_vdwdupq_m_n_uv8hi (__inactive, *__a, __c, __imm, __p);
-  *__a = __builtin_mve_vdwdupq_m_wb_uv8hi (__inactive, *__a, __c, __imm, __p);
-  return __res;
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdwdupq_n_u8 (uint32_t __a, uint32_t __b, const int __imm)
-{
-  uint64_t __c = ((uint64_t) __b) << 32;
-  return __builtin_mve_vdwdupq_n_uv16qi (__a, __c, __imm);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdwdupq_n_u32 (uint32_t __a, uint32_t __b, const int __imm)
-{
-  uint64_t __c = ((uint64_t) __b) << 32;
-  return __builtin_mve_vdwdupq_n_uv4si (__a, __c, __imm);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdwdupq_n_u16 (uint32_t __a, uint32_t __b, const int __imm)
-{
-  uint64_t __c = ((uint64_t) __b) << 32;
-  return __builtin_mve_vdwdupq_n_uv8hi (__a, __c, __imm);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdwdupq_wb_u8 (uint32_t * __a, uint32_t __b, const int __imm)
-{
-  uint64_t __c = ((uint64_t) __b) << 32;
-  uint8x16_t __res = __builtin_mve_vdwdupq_n_uv16qi (*__a, __c, __imm);
-  *__a = __builtin_mve_vdwdupq_wb_uv16qi (*__a, __c, __imm);
-  return __res;
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdwdupq_wb_u32 (uint32_t * __a, uint32_t __b, const int __imm)
-{
-  uint64_t __c = ((uint64_t) __b) << 32;
-  uint32x4_t __res = __builtin_mve_vdwdupq_n_uv4si (*__a, __c, __imm);
-  *__a = __builtin_mve_vdwdupq_wb_uv4si (*__a, __c, __imm);
-  return __res;
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdwdupq_wb_u16 (uint32_t * __a, uint32_t __b, const int __imm)
-{
-  uint64_t __c = ((uint64_t) __b) << 32;
-  uint16x8_t __res = __builtin_mve_vdwdupq_n_uv8hi (*__a, __c, __imm);
-  *__a = __builtin_mve_vdwdupq_wb_uv8hi (*__a, __c, __imm);
-  return __res;
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vidupq_m_n_u8 (uint8x16_t __inactive, uint32_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vidupq_m_n_uv16qi (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vidupq_m_n_u32 (uint32x4_t __inactive, uint32_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vidupq_m_n_uv4si (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vidupq_m_n_u16 (uint16x8_t __inactive, uint32_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vidupq_m_n_uv8hi (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vidupq_n_u8 (uint32_t __a, const int __imm)
-{
-  return __builtin_mve_vidupq_n_uv16qi (__a, __imm);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vidupq_m_wb_u8 (uint8x16_t __inactive, uint32_t * __a, const int __imm, mve_pred16_t __p)
-{
-  uint8x16_t __res = __builtin_mve_vidupq_m_n_uv16qi (__inactive, *__a, __imm, __p);
-  *__a += __imm * 16u;
-  return __res;
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vidupq_m_wb_u16 (uint16x8_t __inactive, uint32_t * __a, const int __imm, mve_pred16_t __p)
-{
-  uint16x8_t __res = __builtin_mve_vidupq_m_n_uv8hi (__inactive, *__a, __imm, __p);
-  *__a += __imm * 8u;
-  return __res;
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vidupq_m_wb_u32 (uint32x4_t __inactive, uint32_t * __a, const int __imm, mve_pred16_t __p)
-{
-  uint32x4_t __res = __builtin_mve_vidupq_m_n_uv4si (__inactive, *__a, __imm, __p);
-  *__a += __imm * 4u;
-  return __res;
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vidupq_n_u32 (uint32_t __a, const int __imm)
-{
-  return __builtin_mve_vidupq_n_uv4si (__a, __imm);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vidupq_n_u16 (uint32_t __a, const int __imm)
-{
-  return __builtin_mve_vidupq_n_uv8hi (__a, __imm);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vidupq_wb_u8 (uint32_t * __a, const int __imm)
-{
-  uint8x16_t __res = __builtin_mve_vidupq_n_uv16qi (*__a, __imm);
-  *__a += __imm * 16u;
-  return __res;
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vidupq_wb_u16 (uint32_t * __a, const int __imm)
-{
-  uint16x8_t __res = __builtin_mve_vidupq_n_uv8hi (*__a, __imm);
-  *__a += __imm * 8u;
-  return __res;
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vidupq_wb_u32 (uint32_t * __a, const int __imm)
-{
-  uint32x4_t __res = __builtin_mve_vidupq_n_uv4si (*__a, __imm);
-  *__a += __imm * 4u;
-  return __res;
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vddupq_wb_u8 (uint32_t * __a, const int __imm)
-{
-  uint8x16_t __res = __builtin_mve_vddupq_n_uv16qi (*__a, __imm);
-  *__a -= __imm * 16u;
-  return __res;
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vddupq_wb_u16 (uint32_t * __a, const int __imm)
-{
-  uint16x8_t __res = __builtin_mve_vddupq_n_uv8hi (*__a, __imm);
-  *__a -= __imm * 8u;
-  return __res;
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vddupq_wb_u32 (uint32_t * __a, const int __imm)
-{
-  uint32x4_t __res = __builtin_mve_vddupq_n_uv4si (*__a, __imm);
-  *__a -= __imm * 4u;
-  return __res;
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_viwdupq_m_n_u8 (uint8x16_t __inactive, uint32_t __a, uint32_t __b, const int __imm, mve_pred16_t __p)
-{
-  uint64_t __c = ((uint64_t) __b) << 32;
-  return __builtin_mve_viwdupq_m_n_uv16qi (__inactive, __a, __c, __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_viwdupq_m_n_u32 (uint32x4_t __inactive, uint32_t __a, uint32_t __b, const int __imm, mve_pred16_t __p)
-{
-  uint64_t __c = ((uint64_t) __b) << 32;
-  return __builtin_mve_viwdupq_m_n_uv4si (__inactive, __a, __c, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_viwdupq_m_n_u16 (uint16x8_t __inactive, uint32_t __a, uint32_t __b, const int __imm, mve_pred16_t __p)
-{
-  uint64_t __c = ((uint64_t) __b) << 32;
-  return __builtin_mve_viwdupq_m_n_uv8hi (__inactive, __a, __c, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_viwdupq_m_wb_u8 (uint8x16_t __inactive, uint32_t * __a, uint32_t __b, const int __imm, mve_pred16_t __p)
-{
-  uint64_t __c = ((uint64_t) __b) << 32;
-  uint8x16_t __res = __builtin_mve_viwdupq_m_n_uv16qi (__inactive, *__a, __c, __imm, __p);
-  *__a =  __builtin_mve_viwdupq_m_wb_uv16qi (__inactive, *__a, __c, __imm, __p);
-  return __res;
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_viwdupq_m_wb_u32 (uint32x4_t __inactive, uint32_t * __a, uint32_t __b, const int __imm, mve_pred16_t __p)
-{
-  uint64_t __c = ((uint64_t) __b) << 32;
-  uint32x4_t __res = __builtin_mve_viwdupq_m_n_uv4si (__inactive, *__a, __c, __imm, __p);
-  *__a =  __builtin_mve_viwdupq_m_wb_uv4si (__inactive, *__a, __c, __imm, __p);
-  return __res;
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_viwdupq_m_wb_u16 (uint16x8_t __inactive, uint32_t * __a, uint32_t __b, const int __imm, mve_pred16_t __p)
-{
-  uint64_t __c = ((uint64_t) __b) << 32;
-  uint16x8_t __res = __builtin_mve_viwdupq_m_n_uv8hi (__inactive, *__a, __c, __imm, __p);
-  *__a =  __builtin_mve_viwdupq_m_wb_uv8hi (__inactive, *__a, __c, __imm, __p);
-  return __res;
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_viwdupq_n_u8 (uint32_t __a, uint32_t __b, const int __imm)
-{
-  uint64_t __c = ((uint64_t) __b) << 32;
-  return __builtin_mve_viwdupq_n_uv16qi (__a, __c, __imm);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_viwdupq_n_u32 (uint32_t __a, uint32_t __b, const int __imm)
-{
-  uint64_t __c = ((uint64_t) __b) << 32;
-  return __builtin_mve_viwdupq_n_uv4si (__a, __c, __imm);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_viwdupq_n_u16 (uint32_t __a, uint32_t __b, const int __imm)
-{
-  uint64_t __c = ((uint64_t) __b) << 32;
-  return __builtin_mve_viwdupq_n_uv8hi (__a, __c, __imm);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_viwdupq_wb_u8 (uint32_t * __a, uint32_t __b, const int __imm)
-{
-  uint64_t __c = ((uint64_t) __b) << 32;
-  uint8x16_t __res = __builtin_mve_viwdupq_n_uv16qi (*__a, __c, __imm);
-  *__a = __builtin_mve_viwdupq_wb_uv16qi (*__a, __c, __imm);
-  return __res;
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_viwdupq_wb_u32 (uint32_t * __a, uint32_t __b, const int __imm)
-{
-  uint64_t __c = ((uint64_t) __b) << 32;
-  uint32x4_t __res = __builtin_mve_viwdupq_n_uv4si (*__a, __c, __imm);
-  *__a = __builtin_mve_viwdupq_wb_uv4si (*__a, __c, __imm);
-  return __res;
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_viwdupq_wb_u16 (uint32_t * __a, uint32_t __b, const int __imm)
-{
-  uint64_t __c = ((uint64_t) __b) << 32;
-  uint16x8_t __res = __builtin_mve_viwdupq_n_uv8hi (*__a, __c, __imm);
-  *__a = __builtin_mve_viwdupq_wb_uv8hi (*__a, __c, __imm);
-  return __res;
-}
-
-
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrdq_gather_base_wb_s64 (uint64x2_t * __addr, const int __offset)
-{
-  int64x2_t
-  result = __builtin_mve_vldrdq_gather_base_nowb_sv2di (*__addr, __offset);
-  *__addr = __builtin_mve_vldrdq_gather_base_wb_sv2di (*__addr, __offset);
-  return result;
-}
-
-__extension__ extern __inline uint64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrdq_gather_base_wb_u64 (uint64x2_t * __addr, const int __offset)
-{
-  uint64x2_t
-  result = __builtin_mve_vldrdq_gather_base_nowb_uv2di (*__addr, __offset);
-  *__addr = __builtin_mve_vldrdq_gather_base_wb_uv2di (*__addr, __offset);
-  return result;
-}
-
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrdq_gather_base_wb_z_s64 (uint64x2_t * __addr, const int __offset, mve_pred16_t __p)
-{
-  int64x2_t
-  result = __builtin_mve_vldrdq_gather_base_nowb_z_sv2di (*__addr, __offset, __p);
-  *__addr = __builtin_mve_vldrdq_gather_base_wb_z_sv2di (*__addr, __offset, __p);
-  return result;
-}
-
-__extension__ extern __inline uint64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrdq_gather_base_wb_z_u64 (uint64x2_t * __addr, const int __offset, mve_pred16_t __p)
-{
-  uint64x2_t
-  result = __builtin_mve_vldrdq_gather_base_nowb_z_uv2di (*__addr, __offset, __p);
-  *__addr = __builtin_mve_vldrdq_gather_base_wb_z_uv2di (*__addr, __offset, __p);
-  return result;
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_gather_base_wb_s32 (uint32x4_t * __addr, const int __offset)
-{
-  int32x4_t
-  result = __builtin_mve_vldrwq_gather_base_nowb_sv4si (*__addr, __offset);
-  *__addr = __builtin_mve_vldrwq_gather_base_wb_sv4si (*__addr, __offset);
-  return result;
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_gather_base_wb_u32 (uint32x4_t * __addr, const int __offset)
-{
-  uint32x4_t
-  result = __builtin_mve_vldrwq_gather_base_nowb_uv4si (*__addr, __offset);
-  *__addr = __builtin_mve_vldrwq_gather_base_wb_uv4si (*__addr, __offset);
-  return result;
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_gather_base_wb_z_s32 (uint32x4_t * __addr, const int __offset, mve_pred16_t __p)
-{
-  int32x4_t
-  result = __builtin_mve_vldrwq_gather_base_nowb_z_sv4si (*__addr, __offset, __p);
-  *__addr = __builtin_mve_vldrwq_gather_base_wb_z_sv4si (*__addr, __offset, __p);
-  return result;
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_gather_base_wb_z_u32 (uint32x4_t * __addr, const int __offset, mve_pred16_t __p)
-{
-  uint32x4_t
-  result = __builtin_mve_vldrwq_gather_base_nowb_z_uv4si (*__addr, __offset, __p);
-  *__addr = __builtin_mve_vldrwq_gather_base_wb_z_uv4si (*__addr, __offset, __p);
-  return result;
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_base_wb_s64 (uint64x2_t * __addr, const int __offset, int64x2_t __value)
-{
-  *__addr = __builtin_mve_vstrdq_scatter_base_wb_sv2di (*__addr, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_base_wb_u64 (uint64x2_t * __addr, const int __offset, uint64x2_t __value)
-{
-  *__addr = __builtin_mve_vstrdq_scatter_base_wb_uv2di (*__addr, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_base_wb_p_s64 (uint64x2_t * __addr, const int __offset, int64x2_t __value, mve_pred16_t __p)
-{
- *__addr =  __builtin_mve_vstrdq_scatter_base_wb_p_sv2di (*__addr, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_base_wb_p_u64 (uint64x2_t * __addr, const int __offset, uint64x2_t __value, mve_pred16_t __p)
-{
-  *__addr = __builtin_mve_vstrdq_scatter_base_wb_p_uv2di (*__addr, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_wb_p_s32 (uint32x4_t * __addr, const int __offset, int32x4_t __value, mve_pred16_t __p)
-{
-  *__addr = __builtin_mve_vstrwq_scatter_base_wb_p_sv4si (*__addr, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_wb_p_u32 (uint32x4_t * __addr, const int __offset, uint32x4_t __value, mve_pred16_t __p)
-{
-  *__addr = __builtin_mve_vstrwq_scatter_base_wb_p_uv4si (*__addr, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_wb_s32 (uint32x4_t * __addr, const int __offset, int32x4_t __value)
-{
-  *__addr = __builtin_mve_vstrwq_scatter_base_wb_sv4si (*__addr, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_wb_u32 (uint32x4_t * __addr, const int __offset, uint32x4_t __value)
-{
-  *__addr = __builtin_mve_vstrwq_scatter_base_wb_uv4si (*__addr, __offset, __value);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vddupq_x_n_u8 (uint32_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vddupq_m_n_uv16qi (__arm_vuninitializedq_u8 (), __a, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vddupq_x_n_u16 (uint32_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vddupq_m_n_uv8hi (__arm_vuninitializedq_u16 (), __a, __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vddupq_x_n_u32 (uint32_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vddupq_m_n_uv4si (__arm_vuninitializedq_u32 (), __a, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vddupq_x_wb_u8 (uint32_t *__a, const int __imm, mve_pred16_t __p)
-{
-  uint8x16_t __arg1 = __arm_vuninitializedq_u8 ();
-  uint8x16_t __res = __builtin_mve_vddupq_m_n_uv16qi (__arg1, * __a, __imm, __p);
-  *__a -= __imm * 16u;
-  return __res;
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vddupq_x_wb_u16 (uint32_t *__a, const int __imm, mve_pred16_t __p)
-{
-  uint16x8_t __arg1 = __arm_vuninitializedq_u16 ();
-  uint16x8_t __res = __builtin_mve_vddupq_m_n_uv8hi (__arg1, *__a, __imm, __p);
-  *__a -= __imm * 8u;
-  return __res;
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vddupq_x_wb_u32 (uint32_t *__a, const int __imm, mve_pred16_t __p)
-{
-  uint32x4_t __arg1 = __arm_vuninitializedq_u32 ();
-  uint32x4_t __res = __builtin_mve_vddupq_m_n_uv4si (__arg1, *__a, __imm, __p);
-  *__a -= __imm * 4u;
-  return __res;
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdwdupq_x_n_u8 (uint32_t __a, uint32_t __b, const int __imm, mve_pred16_t __p)
-{
-  uint64_t __c = ((uint64_t) __b) << 32;
-  return __builtin_mve_vdwdupq_m_n_uv16qi (__arm_vuninitializedq_u8 (), __a, __c, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdwdupq_x_n_u16 (uint32_t __a, uint32_t __b, const int __imm, mve_pred16_t __p)
-{
-  uint64_t __c = ((uint64_t) __b) << 32;
-  return __builtin_mve_vdwdupq_m_n_uv8hi (__arm_vuninitializedq_u16 (), __a, __c, __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdwdupq_x_n_u32 (uint32_t __a, uint32_t __b, const int __imm, mve_pred16_t __p)
-{
-  uint64_t __c = ((uint64_t) __b) << 32;
-  return __builtin_mve_vdwdupq_m_n_uv4si (__arm_vuninitializedq_u32 (), __a, __c, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdwdupq_x_wb_u8 (uint32_t *__a, uint32_t __b, const int __imm, mve_pred16_t __p)
-{
-  uint64_t __c = ((uint64_t) __b) << 32;
-  uint8x16_t __arg1 = __arm_vuninitializedq_u8 ();
-  uint8x16_t __res = __builtin_mve_vdwdupq_m_n_uv16qi (__arg1, *__a, __c, __imm, __p);
-  *__a = __builtin_mve_vdwdupq_m_wb_uv16qi (__arg1, *__a, __c, __imm, __p);
-  return __res;
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdwdupq_x_wb_u16 (uint32_t *__a, uint32_t __b, const int __imm, mve_pred16_t __p)
-{
-  uint64_t __c = ((uint64_t) __b) << 32;
-  uint16x8_t __arg1 = __arm_vuninitializedq_u16 ();
-  uint16x8_t __res =  __builtin_mve_vdwdupq_m_n_uv8hi (__arg1, *__a, __c, __imm, __p);
-  *__a = __builtin_mve_vdwdupq_m_wb_uv8hi (__arg1, *__a, __c, __imm, __p);
-  return __res;
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdwdupq_x_wb_u32 (uint32_t *__a, uint32_t __b, const int __imm, mve_pred16_t __p)
-{
-  uint64_t __c = ((uint64_t) __b) << 32;
-  uint32x4_t __arg1 = __arm_vuninitializedq_u32 ();
-  uint32x4_t __res =  __builtin_mve_vdwdupq_m_n_uv4si (__arg1, *__a, __c, __imm, __p);
-  *__a = __builtin_mve_vdwdupq_m_wb_uv4si (__arg1, *__a, __c, __imm, __p);
-  return __res;
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vidupq_x_n_u8 (uint32_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vidupq_m_n_uv16qi (__arm_vuninitializedq_u8 (), __a, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vidupq_x_n_u16 (uint32_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vidupq_m_n_uv8hi (__arm_vuninitializedq_u16 (), __a, __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vidupq_x_n_u32 (uint32_t __a, const int __imm, mve_pred16_t __p)
-{
-  return __builtin_mve_vidupq_m_n_uv4si (__arm_vuninitializedq_u32 (), __a, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vidupq_x_wb_u8 (uint32_t *__a, const int __imm, mve_pred16_t __p)
-{
-  uint8x16_t __arg1 = __arm_vuninitializedq_u8 ();
-  uint8x16_t __res = __builtin_mve_vidupq_m_n_uv16qi (__arg1, *__a, __imm, __p);
-  *__a += __imm * 16u;
-  return __res;
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vidupq_x_wb_u16 (uint32_t *__a, const int __imm, mve_pred16_t __p)
-{
-  uint16x8_t __arg1 = __arm_vuninitializedq_u16 ();
-  uint16x8_t __res = __builtin_mve_vidupq_m_n_uv8hi (__arg1, *__a, __imm, __p);
-  *__a += __imm * 8u;
-  return __res;
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vidupq_x_wb_u32 (uint32_t *__a, const int __imm, mve_pred16_t __p)
-{
-  uint32x4_t __arg1 = __arm_vuninitializedq_u32 ();
-  uint32x4_t __res = __builtin_mve_vidupq_m_n_uv4si (__arg1, *__a, __imm, __p);
-  *__a += __imm * 4u;
-  return __res;
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_viwdupq_x_n_u8 (uint32_t __a, uint32_t __b, const int __imm, mve_pred16_t __p)
-{
-  uint64_t __c = ((uint64_t) __b) << 32;
-  return __builtin_mve_viwdupq_m_n_uv16qi (__arm_vuninitializedq_u8 (), __a, __c, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_viwdupq_x_n_u16 (uint32_t __a, uint32_t __b, const int __imm, mve_pred16_t __p)
-{
-  uint64_t __c = ((uint64_t) __b) << 32;
-  return __builtin_mve_viwdupq_m_n_uv8hi (__arm_vuninitializedq_u16 (), __a, __c, __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_viwdupq_x_n_u32 (uint32_t __a, uint32_t __b, const int __imm, mve_pred16_t __p)
-{
-  uint64_t __c = ((uint64_t) __b) << 32;
-  return __builtin_mve_viwdupq_m_n_uv4si (__arm_vuninitializedq_u32 (), __a, __c, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_viwdupq_x_wb_u8 (uint32_t *__a, uint32_t __b, const int __imm, mve_pred16_t __p)
-{
-  uint64_t __c = ((uint64_t) __b) << 32;
-  uint8x16_t __arg1 = __arm_vuninitializedq_u8 ();
-  uint8x16_t __res = __builtin_mve_viwdupq_m_n_uv16qi (__arg1, *__a, __c, __imm, __p);
-  *__a =  __builtin_mve_viwdupq_m_wb_uv16qi (__arg1, *__a, __c, __imm, __p);
-  return __res;
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_viwdupq_x_wb_u16 (uint32_t *__a, uint32_t __b, const int __imm, mve_pred16_t __p)
-{
-  uint64_t __c = ((uint64_t) __b) << 32;
-  uint16x8_t __arg1 = __arm_vuninitializedq_u16 ();
-  uint16x8_t __res = __builtin_mve_viwdupq_m_n_uv8hi (__arg1, *__a, __c, __imm, __p);
-  *__a =  __builtin_mve_viwdupq_m_wb_uv8hi (__arg1, *__a, __c, __imm, __p);
-  return __res;
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_viwdupq_x_wb_u32 (uint32_t *__a, uint32_t __b, const int __imm, mve_pred16_t __p)
-{
-  uint64_t __c = ((uint64_t) __b) << 32;
-  uint32x4_t __arg1 = __arm_vuninitializedq_u32 ();
-  uint32x4_t __res = __builtin_mve_viwdupq_m_n_uv4si (__arg1, *__a, __c, __imm, __p);
-  *__a =  __builtin_mve_viwdupq_m_wb_uv4si (__arg1, *__a, __c, __imm, __p);
-  return __res;
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_x_s8 (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vbicq_m_sv16qi (__arm_vuninitializedq_s8 (), __a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_x_s16 (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vbicq_m_sv8hi (__arm_vuninitializedq_s16 (), __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_x_s32 (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vbicq_m_sv4si (__arm_vuninitializedq_s32 (), __a, __b, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_x_u8 (uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vbicq_m_uv16qi (__arm_vuninitializedq_u8 (), __a, __b, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_x_u16 (uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vbicq_m_uv8hi (__arm_vuninitializedq_u16 (), __a, __b, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_x_u32 (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vbicq_m_uv4si (__arm_vuninitializedq_u32 (), __a, __b, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq_x_s8 (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vornq_m_sv16qi (__arm_vuninitializedq_s8 (), __a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq_x_s16 (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vornq_m_sv8hi (__arm_vuninitializedq_s16 (), __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq_x_s32 (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vornq_m_sv4si (__arm_vuninitializedq_s32 (), __a, __b, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq_x_u8 (uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vornq_m_uv16qi (__arm_vuninitializedq_u8 (), __a, __b, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq_x_u16 (uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vornq_m_uv8hi (__arm_vuninitializedq_u16 (), __a, __b, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq_x_u32 (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vornq_m_uv4si (__arm_vuninitializedq_u32 (), __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vadciq_s32 (int32x4_t __a, int32x4_t __b, unsigned * __carry_out)
-{
-  int32x4_t __res = __builtin_mve_vadciq_sv4si (__a, __b);
-  *__carry_out = (__builtin_arm_get_fpscr_nzcvqc () >> 29) & 0x1u;
-  return __res;
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vadciq_u32 (uint32x4_t __a, uint32x4_t __b, unsigned * __carry_out)
-{
-  uint32x4_t __res = __builtin_mve_vadciq_uv4si (__a, __b);
-  *__carry_out = (__builtin_arm_get_fpscr_nzcvqc () >> 29) & 0x1u;
-  return __res;
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vadciq_m_s32 (int32x4_t __inactive, int32x4_t __a, int32x4_t __b, unsigned * __carry_out, mve_pred16_t __p)
-{
-  int32x4_t __res =  __builtin_mve_vadciq_m_sv4si (__inactive, __a, __b, __p);
-  *__carry_out = (__builtin_arm_get_fpscr_nzcvqc () >> 29) & 0x1u;
-  return __res;
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vadciq_m_u32 (uint32x4_t __inactive, uint32x4_t __a, uint32x4_t __b, unsigned * __carry_out, mve_pred16_t __p)
-{
-  uint32x4_t __res = __builtin_mve_vadciq_m_uv4si (__inactive, __a, __b, __p);
-  *__carry_out = (__builtin_arm_get_fpscr_nzcvqc () >> 29) & 0x1u;
-  return __res;
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vadcq_s32 (int32x4_t __a, int32x4_t __b, unsigned * __carry)
-{
-  __builtin_arm_set_fpscr_nzcvqc((__builtin_arm_get_fpscr_nzcvqc () & ~0x20000000u) | ((*__carry & 0x1u) << 29));
-  int32x4_t __res = __builtin_mve_vadcq_sv4si (__a, __b);
-  *__carry = (__builtin_arm_get_fpscr_nzcvqc () >> 29) & 0x1u;
-  return __res;
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vadcq_u32 (uint32x4_t __a, uint32x4_t __b, unsigned * __carry)
-{
-  __builtin_arm_set_fpscr_nzcvqc((__builtin_arm_get_fpscr_nzcvqc () & ~0x20000000u) | ((*__carry & 0x1u) << 29));
-  uint32x4_t __res = __builtin_mve_vadcq_uv4si (__a, __b);
-  *__carry = (__builtin_arm_get_fpscr_nzcvqc () >> 29) & 0x1u;
-  return __res;
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vadcq_m_s32 (int32x4_t __inactive, int32x4_t __a, int32x4_t __b, unsigned * __carry, mve_pred16_t __p)
-{
-  __builtin_arm_set_fpscr_nzcvqc((__builtin_arm_get_fpscr_nzcvqc () & ~0x20000000u) | ((*__carry & 0x1u) << 29));
-  int32x4_t __res = __builtin_mve_vadcq_m_sv4si (__inactive, __a, __b, __p);
-  *__carry = (__builtin_arm_get_fpscr_nzcvqc () >> 29) & 0x1u;
-  return __res;
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vadcq_m_u32 (uint32x4_t __inactive, uint32x4_t __a, uint32x4_t __b, unsigned * __carry, mve_pred16_t __p)
-{
-  __builtin_arm_set_fpscr_nzcvqc((__builtin_arm_get_fpscr_nzcvqc () & ~0x20000000u) | ((*__carry & 0x1u) << 29));
-  uint32x4_t __res =  __builtin_mve_vadcq_m_uv4si (__inactive, __a, __b, __p);
-  *__carry = (__builtin_arm_get_fpscr_nzcvqc () >> 29) & 0x1u;
-  return __res;
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsbciq_s32 (int32x4_t __a, int32x4_t __b, unsigned * __carry_out)
-{
-  int32x4_t __res = __builtin_mve_vsbciq_sv4si (__a, __b);
-  *__carry_out = (__builtin_arm_get_fpscr_nzcvqc () >> 29) & 0x1u;
-  return __res;
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsbciq_u32 (uint32x4_t __a, uint32x4_t __b, unsigned * __carry_out)
-{
-  uint32x4_t __res = __builtin_mve_vsbciq_uv4si (__a, __b);
-  *__carry_out = (__builtin_arm_get_fpscr_nzcvqc () >> 29) & 0x1u;
-  return __res;
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsbciq_m_s32 (int32x4_t __inactive, int32x4_t __a, int32x4_t __b, unsigned * __carry_out, mve_pred16_t __p)
-{
-  int32x4_t __res = __builtin_mve_vsbciq_m_sv4si (__inactive, __a, __b, __p);
-  *__carry_out = (__builtin_arm_get_fpscr_nzcvqc () >> 29) & 0x1u;
-  return __res;
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsbciq_m_u32 (uint32x4_t __inactive, uint32x4_t __a, uint32x4_t __b, unsigned * __carry_out, mve_pred16_t __p)
-{
-  uint32x4_t __res = __builtin_mve_vsbciq_m_uv4si (__inactive, __a, __b, __p);
-  *__carry_out = (__builtin_arm_get_fpscr_nzcvqc () >> 29) & 0x1u;
-  return __res;
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsbcq_s32 (int32x4_t __a, int32x4_t __b, unsigned * __carry)
-{
-  __builtin_arm_set_fpscr_nzcvqc((__builtin_arm_get_fpscr_nzcvqc () & ~0x20000000u) | ((*__carry & 0x1u) << 29));
-  int32x4_t __res = __builtin_mve_vsbcq_sv4si (__a, __b);
-  *__carry = (__builtin_arm_get_fpscr_nzcvqc () >> 29) & 0x1u;
-  return __res;
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsbcq_u32 (uint32x4_t __a, uint32x4_t __b, unsigned * __carry)
-{
-  __builtin_arm_set_fpscr_nzcvqc((__builtin_arm_get_fpscr_nzcvqc () & ~0x20000000u) | ((*__carry & 0x1u) << 29));
-  uint32x4_t __res =  __builtin_mve_vsbcq_uv4si (__a, __b);
-  *__carry = (__builtin_arm_get_fpscr_nzcvqc () >> 29) & 0x1u;
-  return __res;
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsbcq_m_s32 (int32x4_t __inactive, int32x4_t __a, int32x4_t __b, unsigned * __carry, mve_pred16_t __p)
-{
-  __builtin_arm_set_fpscr_nzcvqc((__builtin_arm_get_fpscr_nzcvqc () & ~0x20000000u) | ((*__carry & 0x1u) << 29));
-  int32x4_t __res = __builtin_mve_vsbcq_m_sv4si (__inactive, __a, __b, __p);
-  *__carry = (__builtin_arm_get_fpscr_nzcvqc () >> 29) & 0x1u;
-  return __res;
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsbcq_m_u32 (uint32x4_t __inactive, uint32x4_t __a, uint32x4_t __b, unsigned * __carry, mve_pred16_t __p)
-{
-  __builtin_arm_set_fpscr_nzcvqc((__builtin_arm_get_fpscr_nzcvqc () & ~0x20000000u) | ((*__carry & 0x1u) << 29));
-  uint32x4_t __res = __builtin_mve_vsbcq_m_uv4si (__inactive, __a, __b, __p);
-  *__carry = (__builtin_arm_get_fpscr_nzcvqc () >> 29) & 0x1u;
-  return __res;
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst1q_p_u8 (uint8_t * __addr, uint8x16_t __value, mve_pred16_t __p)
-{
-  return __arm_vstrbq_p_u8 (__addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst1q_p_s8 (int8_t * __addr, int8x16_t __value, mve_pred16_t __p)
-{
-  return __arm_vstrbq_p_s8 (__addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst2q_s8 (int8_t * __addr, int8x16x2_t __value)
-{
-  union { int8x16x2_t __i; __builtin_neon_oi __o; } __rv;
-  __rv.__i = __value;
-  __builtin_mve_vst2qv16qi ((__builtin_neon_qi *) __addr, __rv.__o);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst2q_u8 (uint8_t * __addr, uint8x16x2_t __value)
-{
-  union { uint8x16x2_t __i; __builtin_neon_oi __o; } __rv;
-  __rv.__i = __value;
-  __builtin_mve_vst2qv16qi ((__builtin_neon_qi *) __addr, __rv.__o);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld1q_z_u8 (uint8_t const *__base, mve_pred16_t __p)
-{
-  return __arm_vldrbq_z_u8 ( __base, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld1q_z_s8 (int8_t const *__base, mve_pred16_t __p)
-{
-  return __arm_vldrbq_z_s8 ( __base, __p);
-}
-
-__extension__ extern __inline int8x16x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld2q_s8 (int8_t const * __addr)
-{
-  union { int8x16x2_t __i; __builtin_neon_oi __o; } __rv;
-  __rv.__o = __builtin_mve_vld2qv16qi ((__builtin_neon_qi *) __addr);
-  return __rv.__i;
-}
-
-__extension__ extern __inline uint8x16x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld2q_u8 (uint8_t const * __addr)
-{
-  union { uint8x16x2_t __i; __builtin_neon_oi __o; } __rv;
-  __rv.__o = __builtin_mve_vld2qv16qi ((__builtin_neon_qi *) __addr);
-  return __rv.__i;
-}
-
-__extension__ extern __inline int8x16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld4q_s8 (int8_t const * __addr)
-{
-  union { int8x16x4_t __i; __builtin_neon_xi __o; } __rv;
-  __rv.__o = __builtin_mve_vld4qv16qi ((__builtin_neon_qi *) __addr);
-  return __rv.__i;
-}
-
-__extension__ extern __inline uint8x16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld4q_u8 (uint8_t const * __addr)
-{
-  union { uint8x16x4_t __i; __builtin_neon_xi __o; } __rv;
-  __rv.__o = __builtin_mve_vld4qv16qi ((__builtin_neon_qi *) __addr);
-  return __rv.__i;
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst1q_p_u16 (uint16_t * __addr, uint16x8_t __value, mve_pred16_t __p)
-{
-  return __arm_vstrhq_p_u16 (__addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst1q_p_s16 (int16_t * __addr, int16x8_t __value, mve_pred16_t __p)
-{
-  return __arm_vstrhq_p_s16 (__addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst2q_s16 (int16_t * __addr, int16x8x2_t __value)
-{
-  union { int16x8x2_t __i; __builtin_neon_oi __o; } __rv;
-  __rv.__i = __value;
-  __builtin_mve_vst2qv8hi ((__builtin_neon_hi *) __addr, __rv.__o);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst2q_u16 (uint16_t * __addr, uint16x8x2_t __value)
-{
-  union { uint16x8x2_t __i; __builtin_neon_oi __o; } __rv;
-  __rv.__i = __value;
-  __builtin_mve_vst2qv8hi ((__builtin_neon_hi *) __addr, __rv.__o);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld1q_z_u16 (uint16_t const *__base, mve_pred16_t __p)
-{
-  return __arm_vldrhq_z_u16 ( __base, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld1q_z_s16 (int16_t const *__base, mve_pred16_t __p)
-{
-  return __arm_vldrhq_z_s16 ( __base, __p);
-}
-
-__extension__ extern __inline int16x8x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld2q_s16 (int16_t const * __addr)
-{
-  union { int16x8x2_t __i; __builtin_neon_oi __o; } __rv;
-  __rv.__o = __builtin_mve_vld2qv8hi ((__builtin_neon_hi *) __addr);
-  return __rv.__i;
-}
-
-__extension__ extern __inline uint16x8x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld2q_u16 (uint16_t const * __addr)
-{
-  union { uint16x8x2_t __i; __builtin_neon_oi __o; } __rv;
-  __rv.__o = __builtin_mve_vld2qv8hi ((__builtin_neon_hi *) __addr);
-  return __rv.__i;
-}
-
-__extension__ extern __inline int16x8x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld4q_s16 (int16_t const * __addr)
-{
-  union { int16x8x4_t __i; __builtin_neon_xi __o; } __rv;
-  __rv.__o = __builtin_mve_vld4qv8hi ((__builtin_neon_hi *) __addr);
-  return __rv.__i;
-}
-
-__extension__ extern __inline uint16x8x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld4q_u16 (uint16_t const * __addr)
-{
-  union { uint16x8x4_t __i; __builtin_neon_xi __o; } __rv;
-  __rv.__o = __builtin_mve_vld4qv8hi ((__builtin_neon_hi *) __addr);
-  return __rv.__i;
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst1q_p_u32 (uint32_t * __addr, uint32x4_t __value, mve_pred16_t __p)
-{
-  return __arm_vstrwq_p_u32 (__addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst1q_p_s32 (int32_t * __addr, int32x4_t __value, mve_pred16_t __p)
-{
-  return __arm_vstrwq_p_s32 (__addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst2q_s32 (int32_t * __addr, int32x4x2_t __value)
-{
-  union { int32x4x2_t __i; __builtin_neon_oi __o; } __rv;
-  __rv.__i = __value;
-  __builtin_mve_vst2qv4si ((__builtin_neon_si *) __addr, __rv.__o);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst2q_u32 (uint32_t * __addr, uint32x4x2_t __value)
-{
-  union { uint32x4x2_t __i; __builtin_neon_oi __o; } __rv;
-  __rv.__i = __value;
-  __builtin_mve_vst2qv4si ((__builtin_neon_si *) __addr, __rv.__o);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld1q_z_u32 (uint32_t const *__base, mve_pred16_t __p)
-{
-  return __arm_vldrwq_z_u32 ( __base, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld1q_z_s32 (int32_t const *__base, mve_pred16_t __p)
-{
-  return __arm_vldrwq_z_s32 ( __base, __p);
-}
-
-__extension__ extern __inline int32x4x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld2q_s32 (int32_t const * __addr)
-{
-  union { int32x4x2_t __i; __builtin_neon_oi __o; } __rv;
-  __rv.__o = __builtin_mve_vld2qv4si ((__builtin_neon_si *) __addr);
-  return __rv.__i;
-}
-
-__extension__ extern __inline uint32x4x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld2q_u32 (uint32_t const * __addr)
-{
-  union { uint32x4x2_t __i; __builtin_neon_oi __o; } __rv;
-  __rv.__o = __builtin_mve_vld2qv4si ((__builtin_neon_si *) __addr);
-  return __rv.__i;
-}
-
-__extension__ extern __inline int32x4x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld4q_s32 (int32_t const * __addr)
-{
-  union { int32x4x4_t __i; __builtin_neon_xi __o; } __rv;
-  __rv.__o = __builtin_mve_vld4qv4si ((__builtin_neon_si *) __addr);
-  return __rv.__i;
-}
-
-__extension__ extern __inline uint32x4x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld4q_u32 (uint32_t const * __addr)
-{
-  union { uint32x4x4_t __i; __builtin_neon_xi __o; } __rv;
-  __rv.__o = __builtin_mve_vld4qv4si ((__builtin_neon_si *) __addr);
-  return __rv.__i;
-}
-
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsetq_lane_s16 (int16_t __a, int16x8_t __b, const int __idx)
@@ -3684,1219 +366,8 @@ __arm_srshr (int32_t value, const int shift)
   return __builtin_mve_srshr_si (value, shift);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_m_s8 (int8x16_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
-{
-  int8x16_t __res = __builtin_mve_vshlcq_m_vec_sv16qi (__a, *__b, __imm, __p);
-  *__b = __builtin_mve_vshlcq_m_carry_sv16qi (__a, *__b, __imm, __p);
-  return __res;
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_m_u8 (uint8x16_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
-{
-  uint8x16_t __res = __builtin_mve_vshlcq_m_vec_uv16qi (__a, *__b, __imm, __p);
-  *__b = __builtin_mve_vshlcq_m_carry_uv16qi (__a, *__b, __imm, __p);
-  return __res;
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_m_s16 (int16x8_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
-{
-  int16x8_t __res = __builtin_mve_vshlcq_m_vec_sv8hi (__a, *__b, __imm, __p);
-  *__b = __builtin_mve_vshlcq_m_carry_sv8hi (__a, *__b, __imm, __p);
-  return __res;
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_m_u16 (uint16x8_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
-{
-  uint16x8_t __res = __builtin_mve_vshlcq_m_vec_uv8hi (__a, *__b, __imm, __p);
-  *__b = __builtin_mve_vshlcq_m_carry_uv8hi (__a, *__b, __imm, __p);
-  return __res;
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_m_s32 (int32x4_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
-{
-  int32x4_t __res = __builtin_mve_vshlcq_m_vec_sv4si (__a, *__b, __imm, __p);
-  *__b = __builtin_mve_vshlcq_m_carry_sv4si (__a, *__b, __imm, __p);
-  return __res;
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_m_u32 (uint32x4_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
-{
-  uint32x4_t __res = __builtin_mve_vshlcq_m_vec_uv4si (__a, *__b, __imm, __p);
-  *__b = __builtin_mve_vshlcq_m_carry_uv4si (__a, *__b, __imm, __p);
-  return __res;
-}
-
 #if (__ARM_FEATURE_MVE & 2) /* MVE Floating point.  */
 
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst4q_f16 (float16_t * __addr, float16x8x4_t __value)
-{
-  union { float16x8x4_t __i; __builtin_neon_xi __o; } __rv;
-  __rv.__i = __value;
-  __builtin_mve_vst4qv8hf (__addr, __rv.__o);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst4q_f32 (float32_t * __addr, float32x4x4_t __value)
-{
-  union { float32x4x4_t __i; __builtin_neon_xi __o; } __rv;
-  __rv.__i = __value;
-  __builtin_mve_vst4qv4sf (__addr, __rv.__o);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvttq_f32_f16 (float16x8_t __a)
-{
-  return __builtin_mve_vcvttq_f32_f16v4sf (__a);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtbq_f32_f16 (float16x8_t __a)
-{
-  return __builtin_mve_vcvtbq_f32_f16v4sf (__a);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_f16_s16 (int16x8_t __a)
-{
-  return __builtin_mve_vcvtq_to_f_sv8hf (__a);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_f32_s32 (int32x4_t __a)
-{
-  return __builtin_mve_vcvtq_to_f_sv4sf (__a);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_f16_u16 (uint16x8_t __a)
-{
-  return __builtin_mve_vcvtq_to_f_uv8hf (__a);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_f32_u32 (uint32x4_t __a)
-{
-  return __builtin_mve_vcvtq_to_f_uv4sf (__a);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_s16_f16 (float16x8_t __a)
-{
-  return __builtin_mve_vcvtq_from_f_sv8hi (__a);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_s32_f32 (float32x4_t __a)
-{
-  return __builtin_mve_vcvtq_from_f_sv4si (__a);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_u16_f16 (float16x8_t __a)
-{
-  return __builtin_mve_vcvtq_from_f_uv8hi (__a);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_u32_f32 (float32x4_t __a)
-{
-  return __builtin_mve_vcvtq_from_f_uv4si (__a);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtpq_u16_f16 (float16x8_t __a)
-{
-  return __builtin_mve_vcvtpq_uv8hi (__a);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtpq_u32_f32 (float32x4_t __a)
-{
-  return __builtin_mve_vcvtpq_uv4si (__a);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtnq_u16_f16 (float16x8_t __a)
-{
-  return __builtin_mve_vcvtnq_uv8hi (__a);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtnq_u32_f32 (float32x4_t __a)
-{
-  return __builtin_mve_vcvtnq_uv4si (__a);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtmq_u16_f16 (float16x8_t __a)
-{
-  return __builtin_mve_vcvtmq_uv8hi (__a);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtmq_u32_f32 (float32x4_t __a)
-{
-  return __builtin_mve_vcvtmq_uv4si (__a);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtaq_u16_f16 (float16x8_t __a)
-{
-  return __builtin_mve_vcvtaq_uv8hi (__a);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtaq_u32_f32 (float32x4_t __a)
-{
-  return __builtin_mve_vcvtaq_uv4si (__a);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtaq_s16_f16 (float16x8_t __a)
-{
-  return __builtin_mve_vcvtaq_sv8hi (__a);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtaq_s32_f32 (float32x4_t __a)
-{
-  return __builtin_mve_vcvtaq_sv4si (__a);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtnq_s16_f16 (float16x8_t __a)
-{
-  return __builtin_mve_vcvtnq_sv8hi (__a);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtnq_s32_f32 (float32x4_t __a)
-{
-  return __builtin_mve_vcvtnq_sv4si (__a);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtpq_s16_f16 (float16x8_t __a)
-{
-  return __builtin_mve_vcvtpq_sv8hi (__a);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtpq_s32_f32 (float32x4_t __a)
-{
-  return __builtin_mve_vcvtpq_sv4si (__a);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtmq_s16_f16 (float16x8_t __a)
-{
-  return __builtin_mve_vcvtmq_sv8hi (__a);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtmq_s32_f32 (float32x4_t __a)
-{
-  return __builtin_mve_vcvtmq_sv4si (__a);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_n_f16_s16 (int16x8_t __a, const int __imm6)
-{
-  return __builtin_mve_vcvtq_n_to_f_sv8hf (__a, __imm6);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_n_f32_s32 (int32x4_t __a, const int __imm6)
-{
-  return __builtin_mve_vcvtq_n_to_f_sv4sf (__a, __imm6);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_n_f16_u16 (uint16x8_t __a, const int __imm6)
-{
-  return __builtin_mve_vcvtq_n_to_f_uv8hf (__a, __imm6);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_n_f32_u32 (uint32x4_t __a, const int __imm6)
-{
-  return __builtin_mve_vcvtq_n_to_f_uv4sf (__a, __imm6);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_n_s16_f16 (float16x8_t __a, const int __imm6)
-{
-  return __builtin_mve_vcvtq_n_from_f_sv8hi (__a, __imm6);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_n_s32_f32 (float32x4_t __a, const int __imm6)
-{
-  return __builtin_mve_vcvtq_n_from_f_sv4si (__a, __imm6);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_n_u16_f16 (float16x8_t __a, const int __imm6)
-{
-  return __builtin_mve_vcvtq_n_from_f_uv8hi (__a, __imm6);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_n_u32_f32 (float32x4_t __a, const int __imm6)
-{
-  return __builtin_mve_vcvtq_n_from_f_uv4si (__a, __imm6);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq_f16 (float16x8_t __a, float16x8_t __b)
-{
-  return __builtin_mve_vornq_fv8hf (__a, __b);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_f16 (float16x8_t __a, float16x8_t __b)
-{
-  return __builtin_mve_vbicq_fv8hf (__a, __b);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq_f32 (float32x4_t __a, float32x4_t __b)
-{
-  return __builtin_mve_vornq_fv4sf (__a, __b);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_f32 (float32x4_t __a, float32x4_t __b)
-{
-  return __builtin_mve_vbicq_fv4sf (__a, __b);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvttq_f16_f32 (float16x8_t __a, float32x4_t __b)
-{
-  return __builtin_mve_vcvttq_f16_f32v8hf (__a, __b);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtbq_f16_f32 (float16x8_t __a, float32x4_t __b)
-{
-  return __builtin_mve_vcvtbq_f16_f32v8hf (__a, __b);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtaq_m_s16_f16 (int16x8_t __inactive, float16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtaq_m_sv8hi (__inactive, __a, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtaq_m_u16_f16 (uint16x8_t __inactive, float16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtaq_m_uv8hi (__inactive, __a, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtaq_m_s32_f32 (int32x4_t __inactive, float32x4_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtaq_m_sv4si (__inactive, __a, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtaq_m_u32_f32 (uint32x4_t __inactive, float32x4_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtaq_m_uv4si (__inactive, __a, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_m_f16_s16 (float16x8_t __inactive, int16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtq_m_to_f_sv8hf (__inactive, __a, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_m_f16_u16 (float16x8_t __inactive, uint16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtq_m_to_f_uv8hf (__inactive, __a, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_m_f32_s32 (float32x4_t __inactive, int32x4_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtq_m_to_f_sv4sf (__inactive, __a, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_m_f32_u32 (float32x4_t __inactive, uint32x4_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtq_m_to_f_uv4sf (__inactive, __a, __p);
-}
-
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtbq_m_f16_f32 (float16x8_t __a, float32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtbq_m_f16_f32v8hf (__a, __b, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtbq_m_f32_f16 (float32x4_t __inactive, float16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtbq_m_f32_f16v4sf (__inactive, __a, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvttq_m_f16_f32 (float16x8_t __a, float32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvttq_m_f16_f32v8hf (__a, __b, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvttq_m_f32_f16 (float32x4_t __inactive, float16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvttq_m_f32_f16v4sf (__inactive, __a, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtmq_m_s16_f16 (int16x8_t __inactive, float16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtmq_m_sv8hi (__inactive, __a, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtnq_m_s16_f16 (int16x8_t __inactive, float16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtnq_m_sv8hi (__inactive, __a, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtpq_m_s16_f16 (int16x8_t __inactive, float16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtpq_m_sv8hi (__inactive, __a, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_m_s16_f16 (int16x8_t __inactive, float16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtq_m_from_f_sv8hi (__inactive, __a, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtmq_m_u16_f16 (uint16x8_t __inactive, float16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtmq_m_uv8hi (__inactive, __a, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtnq_m_u16_f16 (uint16x8_t __inactive, float16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtnq_m_uv8hi (__inactive, __a, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtpq_m_u16_f16 (uint16x8_t __inactive, float16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtpq_m_uv8hi (__inactive, __a, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_m_u16_f16 (uint16x8_t __inactive, float16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtq_m_from_f_uv8hi (__inactive, __a, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtmq_m_s32_f32 (int32x4_t __inactive, float32x4_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtmq_m_sv4si (__inactive, __a, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtnq_m_s32_f32 (int32x4_t __inactive, float32x4_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtnq_m_sv4si (__inactive, __a, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtpq_m_s32_f32 (int32x4_t __inactive, float32x4_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtpq_m_sv4si (__inactive, __a, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_m_s32_f32 (int32x4_t __inactive, float32x4_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtq_m_from_f_sv4si (__inactive, __a, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtmq_m_u32_f32 (uint32x4_t __inactive, float32x4_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtmq_m_uv4si (__inactive, __a, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtnq_m_u32_f32 (uint32x4_t __inactive, float32x4_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtnq_m_uv4si (__inactive, __a, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtpq_m_u32_f32 (uint32x4_t __inactive, float32x4_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtpq_m_uv4si (__inactive, __a, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_m_u32_f32 (uint32x4_t __inactive, float32x4_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtq_m_from_f_uv4si (__inactive, __a, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_m_n_f16_u16 (float16x8_t __inactive, uint16x8_t __a, const int __imm6, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtq_m_n_to_f_uv8hf (__inactive, __a, __imm6, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_m_n_f16_s16 (float16x8_t __inactive, int16x8_t __a, const int __imm6, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtq_m_n_to_f_sv8hf (__inactive, __a, __imm6, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_m_n_f32_u32 (float32x4_t __inactive, uint32x4_t __a, const int __imm6, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtq_m_n_to_f_uv4sf (__inactive, __a, __imm6, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_m_n_f32_s32 (float32x4_t __inactive, int32x4_t __a, const int __imm6, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtq_m_n_to_f_sv4sf (__inactive, __a, __imm6, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_m_f32 (float32x4_t __inactive, float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vbicq_m_fv4sf (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_m_f16 (float16x8_t __inactive, float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vbicq_m_fv8hf (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_m_n_s32_f32 (int32x4_t __inactive, float32x4_t __a, const int __imm6, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtq_m_n_from_f_sv4si (__inactive, __a, __imm6, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_m_n_s16_f16 (int16x8_t __inactive, float16x8_t __a, const int __imm6, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtq_m_n_from_f_sv8hi (__inactive, __a, __imm6, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_m_n_u32_f32 (uint32x4_t __inactive, float32x4_t __a, const int __imm6, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtq_m_n_from_f_uv4si (__inactive, __a, __imm6, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_m_n_u16_f16 (uint16x8_t __inactive, float16x8_t __a, const int __imm6, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtq_m_n_from_f_uv8hi (__inactive, __a, __imm6, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq_m_f32 (float32x4_t __inactive, float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vornq_m_fv4sf (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq_m_f16 (float16x8_t __inactive, float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vornq_m_fv8hf (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_f32 (float32_t const * __base)
-{
-  return __builtin_mve_vldrwq_fv4sf((__builtin_neon_si *) __base);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_z_f32 (float32_t const * __base, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrwq_z_fv4sf((__builtin_neon_si *) __base, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_z_f16 (float16_t const * __base, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrhq_z_fv8hf((__builtin_neon_hi *) __base, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_f16 (float16_t const * __base)
-{
-  return __builtin_mve_vldrhq_fv8hf((__builtin_neon_hi *) __base);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_offset_f16 (float16_t const * __base, uint16x8_t __offset)
-{
-  return __builtin_mve_vldrhq_gather_offset_fv8hf((__builtin_neon_hi *) __base, __offset);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_offset_z_f16 (float16_t const * __base, uint16x8_t __offset, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrhq_gather_offset_z_fv8hf((__builtin_neon_hi *) __base, __offset, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_shifted_offset_f16 (float16_t const * __base, uint16x8_t __offset)
-{
-  return __builtin_mve_vldrhq_gather_shifted_offset_fv8hf ((__builtin_neon_hi *) __base, __offset);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_shifted_offset_z_f16 (float16_t const * __base, uint16x8_t __offset, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrhq_gather_shifted_offset_z_fv8hf ((__builtin_neon_hi *) __base, __offset, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_gather_base_f32 (uint32x4_t __addr, const int __offset)
-{
-  return __builtin_mve_vldrwq_gather_base_fv4sf (__addr, __offset);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_gather_base_z_f32 (uint32x4_t __addr, const int __offset, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrwq_gather_base_z_fv4sf (__addr, __offset, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_gather_offset_f32 (float32_t const * __base, uint32x4_t __offset)
-{
-  return __builtin_mve_vldrwq_gather_offset_fv4sf((__builtin_neon_si *) __base, __offset);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_gather_offset_z_f32 (float32_t const * __base, uint32x4_t __offset, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrwq_gather_offset_z_fv4sf((__builtin_neon_si *) __base, __offset, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_gather_shifted_offset_f32 (float32_t const * __base, uint32x4_t __offset)
-{
-  return __builtin_mve_vldrwq_gather_shifted_offset_fv4sf ((__builtin_neon_si *) __base, __offset);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_gather_shifted_offset_z_f32 (float32_t const * __base, uint32x4_t __offset, mve_pred16_t __p)
-{
-  return __builtin_mve_vldrwq_gather_shifted_offset_z_fv4sf ((__builtin_neon_si *) __base, __offset, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_p_f32 (float32_t * __addr, float32x4_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrwq_p_fv4sf ((__builtin_neon_si *) __addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_f32 (float32_t * __addr, float32x4_t __value)
-{
-  __builtin_mve_vstrwq_fv4sf ((__builtin_neon_si *) __addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_f16 (float16_t * __addr, float16x8_t __value)
-{
-  __builtin_mve_vstrhq_fv8hf ((__builtin_neon_hi *) __addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_p_f16 (float16_t * __addr, float16x8_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrhq_p_fv8hf ((__builtin_neon_hi *) __addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_scatter_offset_f16 (float16_t * __base, uint16x8_t __offset, float16x8_t __value)
-{
-  __builtin_mve_vstrhq_scatter_offset_fv8hf ((__builtin_neon_hi *) __base, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_scatter_offset_p_f16 (float16_t * __base, uint16x8_t __offset, float16x8_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrhq_scatter_offset_p_fv8hf ((__builtin_neon_hi *) __base, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_scatter_shifted_offset_f16 (float16_t * __base, uint16x8_t __offset, float16x8_t __value)
-{
-  __builtin_mve_vstrhq_scatter_shifted_offset_fv8hf ((__builtin_neon_hi *) __base, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_scatter_shifted_offset_p_f16 (float16_t * __base, uint16x8_t __offset, float16x8_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrhq_scatter_shifted_offset_p_fv8hf ((__builtin_neon_hi *) __base, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_f32 (uint32x4_t __addr, const int __offset, float32x4_t __value)
-{
-  __builtin_mve_vstrwq_scatter_base_fv4sf (__addr, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_p_f32 (uint32x4_t __addr, const int __offset, float32x4_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrwq_scatter_base_p_fv4sf (__addr, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_offset_f32 (float32_t * __base, uint32x4_t __offset, float32x4_t __value)
-{
-  __builtin_mve_vstrwq_scatter_offset_fv4sf ((__builtin_neon_si *) __base, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_offset_p_f32 (float32_t * __base, uint32x4_t __offset, float32x4_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrwq_scatter_offset_p_fv4sf ((__builtin_neon_si *) __base, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_shifted_offset_f32 (float32_t * __base, uint32x4_t __offset, float32x4_t __value)
-{
-  __builtin_mve_vstrwq_scatter_shifted_offset_fv4sf ((__builtin_neon_si *) __base, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_shifted_offset_p_f32 (float32_t * __base, uint32x4_t __offset, float32x4_t __value, mve_pred16_t __p)
-{
-  __builtin_mve_vstrwq_scatter_shifted_offset_p_fv4sf ((__builtin_neon_si *) __base, __offset, __value, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_gather_base_wb_f32 (uint32x4_t * __addr, const int __offset)
-{
-  float32x4_t
-  result = __builtin_mve_vldrwq_gather_base_nowb_fv4sf (*__addr, __offset);
-  *__addr = __builtin_mve_vldrwq_gather_base_wb_fv4sf (*__addr, __offset);
-  return result;
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_gather_base_wb_z_f32 (uint32x4_t * __addr, const int __offset, mve_pred16_t __p)
-{
-  float32x4_t
-  result = __builtin_mve_vldrwq_gather_base_nowb_z_fv4sf (*__addr, __offset, __p);
-  *__addr = __builtin_mve_vldrwq_gather_base_wb_z_fv4sf (*__addr, __offset, __p);
-  return result;
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_wb_f32 (uint32x4_t * __addr, const int __offset, float32x4_t __value)
-{
-  *__addr = __builtin_mve_vstrwq_scatter_base_wb_fv4sf (*__addr, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_wb_p_f32 (uint32x4_t * __addr, const int __offset, float32x4_t __value, mve_pred16_t __p)
-{
-  *__addr = __builtin_mve_vstrwq_scatter_base_wb_p_fv4sf (*__addr, __offset, __value, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtaq_x_s16_f16 (float16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtaq_m_sv8hi (__arm_vuninitializedq_s16 (), __a, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtaq_x_s32_f32 (float32x4_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtaq_m_sv4si (__arm_vuninitializedq_s32 (), __a, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtaq_x_u16_f16 (float16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtaq_m_uv8hi (__arm_vuninitializedq_u16 (), __a, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtaq_x_u32_f32 (float32x4_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtaq_m_uv4si (__arm_vuninitializedq_u32 (), __a, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtnq_x_s16_f16 (float16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtnq_m_sv8hi (__arm_vuninitializedq_s16 (), __a, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtnq_x_s32_f32 (float32x4_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtnq_m_sv4si (__arm_vuninitializedq_s32 (), __a, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtnq_x_u16_f16 (float16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtnq_m_uv8hi (__arm_vuninitializedq_u16 (), __a, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtnq_x_u32_f32 (float32x4_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtnq_m_uv4si (__arm_vuninitializedq_u32 (), __a, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtpq_x_s16_f16 (float16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtpq_m_sv8hi (__arm_vuninitializedq_s16 (), __a, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtpq_x_s32_f32 (float32x4_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtpq_m_sv4si (__arm_vuninitializedq_s32 (), __a, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtpq_x_u16_f16 (float16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtpq_m_uv8hi (__arm_vuninitializedq_u16 (), __a, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtpq_x_u32_f32 (float32x4_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtpq_m_uv4si (__arm_vuninitializedq_u32 (), __a, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtmq_x_s16_f16 (float16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtmq_m_sv8hi (__arm_vuninitializedq_s16 (), __a, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtmq_x_s32_f32 (float32x4_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtmq_m_sv4si (__arm_vuninitializedq_s32 (), __a, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtmq_x_u16_f16 (float16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtmq_m_uv8hi (__arm_vuninitializedq_u16 (), __a, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtmq_x_u32_f32 (float32x4_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtmq_m_uv4si (__arm_vuninitializedq_u32 (), __a, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtbq_x_f32_f16 (float16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtbq_m_f32_f16v4sf (__arm_vuninitializedq_f32 (), __a, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvttq_x_f32_f16 (float16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvttq_m_f32_f16v4sf (__arm_vuninitializedq_f32 (), __a, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_x_f16_u16 (uint16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtq_m_to_f_uv8hf (__arm_vuninitializedq_f16 (), __a, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_x_f16_s16 (int16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtq_m_to_f_sv8hf (__arm_vuninitializedq_f16 (), __a, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_x_f32_s32 (int32x4_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtq_m_to_f_sv4sf (__arm_vuninitializedq_f32 (), __a, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_x_f32_u32 (uint32x4_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtq_m_to_f_uv4sf (__arm_vuninitializedq_f32 (), __a, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_x_n_f16_s16 (int16x8_t __a, const int __imm6, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtq_m_n_to_f_sv8hf (__arm_vuninitializedq_f16 (), __a, __imm6, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_x_n_f16_u16 (uint16x8_t __a, const int __imm6, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtq_m_n_to_f_uv8hf (__arm_vuninitializedq_f16 (), __a, __imm6, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_x_n_f32_s32 (int32x4_t __a, const int __imm6, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtq_m_n_to_f_sv4sf (__arm_vuninitializedq_f32 (), __a, __imm6, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_x_n_f32_u32 (uint32x4_t __a, const int __imm6, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtq_m_n_to_f_uv4sf (__arm_vuninitializedq_f32 (), __a, __imm6, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_x_s16_f16 (float16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtq_m_from_f_sv8hi (__arm_vuninitializedq_s16 (), __a, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_x_s32_f32 (float32x4_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtq_m_from_f_sv4si (__arm_vuninitializedq_s32 (), __a, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_x_u16_f16 (float16x8_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtq_m_from_f_uv8hi (__arm_vuninitializedq_u16 (), __a, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_x_u32_f32 (float32x4_t __a, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtq_m_from_f_uv4si (__arm_vuninitializedq_u32 (), __a, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_x_n_s16_f16 (float16x8_t __a, const int __imm6, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtq_m_n_from_f_sv8hi (__arm_vuninitializedq_s16 (), __a, __imm6, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_x_n_s32_f32 (float32x4_t __a, const int __imm6, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtq_m_n_from_f_sv4si (__arm_vuninitializedq_s32 (), __a, __imm6, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_x_n_u16_f16 (float16x8_t __a, const int __imm6, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtq_m_n_from_f_uv8hi (__arm_vuninitializedq_u16 (), __a, __imm6, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_x_n_u32_f32 (float32x4_t __a, const int __imm6, mve_pred16_t __p)
-{
-  return __builtin_mve_vcvtq_m_n_from_f_uv4si (__arm_vuninitializedq_u32 (), __a, __imm6, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_x_f16 (float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vbicq_m_fv8hf (__arm_vuninitializedq_f16 (), __a, __b, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_x_f32 (float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vbicq_m_fv4sf (__arm_vuninitializedq_f32 (), __a, __b, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq_x_f16 (float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vornq_m_fv8hf (__arm_vuninitializedq_f16 (), __a, __b, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq_x_f32 (float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
-{
-  return __builtin_mve_vornq_m_fv4sf (__arm_vuninitializedq_f32 (), __a, __b, __p);
-}
-
-__extension__ extern __inline float16x8x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld4q_f16 (float16_t const * __addr)
-{
-  union { float16x8x4_t __i; __builtin_neon_xi __o; } __rv;
-  __rv.__o = __builtin_mve_vld4qv8hf (__addr);
-  return __rv.__i;
-}
-
-__extension__ extern __inline float16x8x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld2q_f16 (float16_t const * __addr)
-{
-  union { float16x8x2_t __i; __builtin_neon_oi __o; } __rv;
-  __rv.__o = __builtin_mve_vld2qv8hf (__addr);
-  return __rv.__i;
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld1q_z_f16 (float16_t const *__base, mve_pred16_t __p)
-{
-  return __arm_vldrhq_z_f16 (__base, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst2q_f16 (float16_t * __addr, float16x8x2_t __value)
-{
-  union { float16x8x2_t __i; __builtin_neon_oi __o; } __rv;
-  __rv.__i = __value;
-  __builtin_mve_vst2qv8hf (__addr, __rv.__o);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst1q_p_f16 (float16_t * __addr, float16x8_t __value, mve_pred16_t __p)
-{
-  return __arm_vstrhq_p_f16 (__addr, __value, __p);
-}
-
-__extension__ extern __inline float32x4x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld4q_f32 (float32_t const * __addr)
-{
-  union { float32x4x4_t __i; __builtin_neon_xi __o; } __rv;
-  __rv.__o = __builtin_mve_vld4qv4sf (__addr);
-  return __rv.__i;
-}
-
-__extension__ extern __inline float32x4x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld2q_f32 (float32_t const * __addr)
-{
-  union { float32x4x2_t __i; __builtin_neon_oi __o; } __rv;
-  __rv.__o = __builtin_mve_vld2qv4sf (__addr);
-  return __rv.__i;
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld1q_z_f32 (float32_t const *__base, mve_pred16_t __p)
-{
-  return __arm_vldrwq_z_f32 (__base, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst2q_f32 (float32_t * __addr, float32x4x2_t __value)
-{
-  union { float32x4x2_t __i; __builtin_neon_oi __o; } __rv;
-  __rv.__i = __value;
-  __builtin_mve_vst2qv4sf (__addr, __rv.__o);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst1q_p_f32 (float32_t * __addr, float32x4_t __value, mve_pred16_t __p)
-{
-  return __arm_vstrwq_p_f32 (__addr, __value, __p);
-}
-
 __extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsetq_lane_f16 (float16_t __a, float16x8_t __b, const int __idx)
@@ -4933,2119 +404,6 @@ __arm_vgetq_lane_f32 (float32x4_t __a, const int __idx)
 #endif
 
 #ifdef __cplusplus
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst4q (int8_t * __addr, int8x16x4_t __value)
-{
- __arm_vst4q_s8 (__addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst4q (int16_t * __addr, int16x8x4_t __value)
-{
- __arm_vst4q_s16 (__addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst4q (int32_t * __addr, int32x4x4_t __value)
-{
- __arm_vst4q_s32 (__addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst4q (uint8_t * __addr, uint8x16x4_t __value)
-{
- __arm_vst4q_u8 (__addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst4q (uint16_t * __addr, uint16x8x4_t __value)
-{
- __arm_vst4q_u16 (__addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst4q (uint32_t * __addr, uint32x4x4_t __value)
-{
- __arm_vst4q_u32 (__addr, __value);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq (uint8x16_t __a, uint8x16_t __b)
-{
- return __arm_vornq_u8 (__a, __b);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq (uint8x16_t __a, uint8x16_t __b)
-{
- return __arm_vbicq_u8 (__a, __b);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq (int8x16_t __a, int8x16_t __b)
-{
- return __arm_vornq_s8 (__a, __b);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq (int8x16_t __a, int8x16_t __b)
-{
- return __arm_vbicq_s8 (__a, __b);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq (uint16x8_t __a, uint16x8_t __b)
-{
- return __arm_vornq_u16 (__a, __b);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq (uint16x8_t __a, uint16x8_t __b)
-{
- return __arm_vbicq_u16 (__a, __b);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq (int16x8_t __a, int16x8_t __b)
-{
- return __arm_vornq_s16 (__a, __b);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq (int16x8_t __a, int16x8_t __b)
-{
- return __arm_vbicq_s16 (__a, __b);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq (uint32x4_t __a, uint32x4_t __b)
-{
- return __arm_vornq_u32 (__a, __b);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq (uint32x4_t __a, uint32x4_t __b)
-{
- return __arm_vbicq_u32 (__a, __b);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq (int32x4_t __a, int32x4_t __b)
-{
- return __arm_vornq_s32 (__a, __b);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq (int32x4_t __a, int32x4_t __b)
-{
- return __arm_vbicq_s32 (__a, __b);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq (uint16x8_t __a, const int __imm)
-{
- return __arm_vbicq_n_u16 (__a, __imm);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq (int16x8_t __a, const int __imm)
-{
- return __arm_vbicq_n_s16 (__a, __imm);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq (uint32x4_t __a, const int __imm)
-{
- return __arm_vbicq_n_u32 (__a, __imm);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq (int32x4_t __a, const int __imm)
-{
- return __arm_vbicq_n_s32 (__a, __imm);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_m_n (int16x8_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vbicq_m_n_s16 (__a, __imm, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_m_n (int32x4_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vbicq_m_n_s32 (__a, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_m_n (uint16x8_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vbicq_m_n_u16 (__a, __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_m_n (uint32x4_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vbicq_m_n_u32 (__a, __imm, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq (int8x16_t __a, uint32_t * __b, const int __imm)
-{
- return __arm_vshlcq_s8 (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq (uint8x16_t __a, uint32_t * __b, const int __imm)
-{
- return __arm_vshlcq_u8 (__a, __b, __imm);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq (int16x8_t __a, uint32_t * __b, const int __imm)
-{
- return __arm_vshlcq_s16 (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq (uint16x8_t __a, uint32_t * __b, const int __imm)
-{
- return __arm_vshlcq_u16 (__a, __b, __imm);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq (int32x4_t __a, uint32_t * __b, const int __imm)
-{
- return __arm_vshlcq_s32 (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq (uint32x4_t __a, uint32_t * __b, const int __imm)
-{
- return __arm_vshlcq_u32 (__a, __b, __imm);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_m (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vbicq_m_s8 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_m (int32x4_t __inactive, int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vbicq_m_s32 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_m (int16x8_t __inactive, int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vbicq_m_s16 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_m (uint8x16_t __inactive, uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vbicq_m_u8 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_m (uint32x4_t __inactive, uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vbicq_m_u32 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_m (uint16x8_t __inactive, uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vbicq_m_u16 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq_m (int8x16_t __inactive, int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vornq_m_s8 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq_m (int32x4_t __inactive, int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vornq_m_s32 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq_m (int16x8_t __inactive, int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vornq_m_s16 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq_m (uint8x16_t __inactive, uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vornq_m_u8 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq_m (uint32x4_t __inactive, uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vornq_m_u32 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq_m (uint16x8_t __inactive, uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vornq_m_u16 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_scatter_offset (int8_t * __base, uint8x16_t __offset, int8x16_t __value)
-{
- __arm_vstrbq_scatter_offset_s8 (__base, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_scatter_offset (int8_t * __base, uint32x4_t __offset, int32x4_t __value)
-{
- __arm_vstrbq_scatter_offset_s32 (__base, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_scatter_offset (int8_t * __base, uint16x8_t __offset, int16x8_t __value)
-{
- __arm_vstrbq_scatter_offset_s16 (__base, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_scatter_offset (uint8_t * __base, uint8x16_t __offset, uint8x16_t __value)
-{
- __arm_vstrbq_scatter_offset_u8 (__base, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_scatter_offset (uint8_t * __base, uint32x4_t __offset, uint32x4_t __value)
-{
- __arm_vstrbq_scatter_offset_u32 (__base, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_scatter_offset (uint8_t * __base, uint16x8_t __offset, uint16x8_t __value)
-{
- __arm_vstrbq_scatter_offset_u16 (__base, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq (int8_t * __addr, int8x16_t __value)
-{
- __arm_vstrbq_s8 (__addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq (int8_t * __addr, int32x4_t __value)
-{
- __arm_vstrbq_s32 (__addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq (int8_t * __addr, int16x8_t __value)
-{
- __arm_vstrbq_s16 (__addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq (uint8_t * __addr, uint8x16_t __value)
-{
- __arm_vstrbq_u8 (__addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq (uint8_t * __addr, uint32x4_t __value)
-{
- __arm_vstrbq_u32 (__addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq (uint8_t * __addr, uint16x8_t __value)
-{
- __arm_vstrbq_u16 (__addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base (uint32x4_t __addr, const int __offset, int32x4_t __value)
-{
- __arm_vstrwq_scatter_base_s32 (__addr, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base (uint32x4_t __addr, const int __offset, uint32x4_t __value)
-{
- __arm_vstrwq_scatter_base_u32 (__addr, __offset, __value);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_gather_offset (uint8_t const * __base, uint8x16_t __offset)
-{
- return __arm_vldrbq_gather_offset_u8 (__base, __offset);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_gather_offset (int8_t const * __base, uint8x16_t __offset)
-{
- return __arm_vldrbq_gather_offset_s8 (__base, __offset);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_gather_offset (uint8_t const * __base, uint16x8_t __offset)
-{
- return __arm_vldrbq_gather_offset_u16 (__base, __offset);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_gather_offset (int8_t const * __base, uint16x8_t __offset)
-{
- return __arm_vldrbq_gather_offset_s16 (__base, __offset);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_gather_offset (uint8_t const * __base, uint32x4_t __offset)
-{
- return __arm_vldrbq_gather_offset_u32 (__base, __offset);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_gather_offset (int8_t const * __base, uint32x4_t __offset)
-{
- return __arm_vldrbq_gather_offset_s32 (__base, __offset);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_p (int8_t * __addr, int8x16_t __value, mve_pred16_t __p)
-{
- __arm_vstrbq_p_s8 (__addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_p (int8_t * __addr, int32x4_t __value, mve_pred16_t __p)
-{
- __arm_vstrbq_p_s32 (__addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_p (int8_t * __addr, int16x8_t __value, mve_pred16_t __p)
-{
- __arm_vstrbq_p_s16 (__addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_p (uint8_t * __addr, uint8x16_t __value, mve_pred16_t __p)
-{
- __arm_vstrbq_p_u8 (__addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_p (uint8_t * __addr, uint32x4_t __value, mve_pred16_t __p)
-{
- __arm_vstrbq_p_u32 (__addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_p (uint8_t * __addr, uint16x8_t __value, mve_pred16_t __p)
-{
- __arm_vstrbq_p_u16 (__addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_scatter_offset_p (int8_t * __base, uint8x16_t __offset, int8x16_t __value, mve_pred16_t __p)
-{
- __arm_vstrbq_scatter_offset_p_s8 (__base, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_scatter_offset_p (int8_t * __base, uint32x4_t __offset, int32x4_t __value, mve_pred16_t __p)
-{
- __arm_vstrbq_scatter_offset_p_s32 (__base, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_scatter_offset_p (int8_t * __base, uint16x8_t __offset, int16x8_t __value, mve_pred16_t __p)
-{
- __arm_vstrbq_scatter_offset_p_s16 (__base, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_scatter_offset_p (uint8_t * __base, uint8x16_t __offset, uint8x16_t __value, mve_pred16_t __p)
-{
- __arm_vstrbq_scatter_offset_p_u8 (__base, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_scatter_offset_p (uint8_t * __base, uint32x4_t __offset, uint32x4_t __value, mve_pred16_t __p)
-{
- __arm_vstrbq_scatter_offset_p_u32 (__base, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrbq_scatter_offset_p (uint8_t * __base, uint16x8_t __offset, uint16x8_t __value, mve_pred16_t __p)
-{
- __arm_vstrbq_scatter_offset_p_u16 (__base, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_p (uint32x4_t __addr, const int __offset, int32x4_t __value, mve_pred16_t __p)
-{
- __arm_vstrwq_scatter_base_p_s32 (__addr, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_p (uint32x4_t __addr, const int __offset, uint32x4_t __value, mve_pred16_t __p)
-{
- __arm_vstrwq_scatter_base_p_u32 (__addr, __offset, __value, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_gather_offset_z (int8_t const * __base, uint8x16_t __offset, mve_pred16_t __p)
-{
- return __arm_vldrbq_gather_offset_z_s8 (__base, __offset, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_gather_offset_z (int8_t const * __base, uint32x4_t __offset, mve_pred16_t __p)
-{
- return __arm_vldrbq_gather_offset_z_s32 (__base, __offset, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_gather_offset_z (int8_t const * __base, uint16x8_t __offset, mve_pred16_t __p)
-{
- return __arm_vldrbq_gather_offset_z_s16 (__base, __offset, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_gather_offset_z (uint8_t const * __base, uint8x16_t __offset, mve_pred16_t __p)
-{
- return __arm_vldrbq_gather_offset_z_u8 (__base, __offset, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_gather_offset_z (uint8_t const * __base, uint32x4_t __offset, mve_pred16_t __p)
-{
- return __arm_vldrbq_gather_offset_z_u32 (__base, __offset, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrbq_gather_offset_z (uint8_t const * __base, uint16x8_t __offset, mve_pred16_t __p)
-{
- return __arm_vldrbq_gather_offset_z_u16 (__base, __offset, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_offset (int16_t const * __base, uint32x4_t __offset)
-{
- return __arm_vldrhq_gather_offset_s32 (__base, __offset);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_offset (int16_t const * __base, uint16x8_t __offset)
-{
- return __arm_vldrhq_gather_offset_s16 (__base, __offset);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_offset (uint16_t const * __base, uint32x4_t __offset)
-{
- return __arm_vldrhq_gather_offset_u32 (__base, __offset);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_offset (uint16_t const * __base, uint16x8_t __offset)
-{
- return __arm_vldrhq_gather_offset_u16 (__base, __offset);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_offset_z (int16_t const * __base, uint32x4_t __offset, mve_pred16_t __p)
-{
- return __arm_vldrhq_gather_offset_z_s32 (__base, __offset, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_offset_z (int16_t const * __base, uint16x8_t __offset, mve_pred16_t __p)
-{
- return __arm_vldrhq_gather_offset_z_s16 (__base, __offset, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_offset_z (uint16_t const * __base, uint32x4_t __offset, mve_pred16_t __p)
-{
- return __arm_vldrhq_gather_offset_z_u32 (__base, __offset, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_offset_z (uint16_t const * __base, uint16x8_t __offset, mve_pred16_t __p)
-{
- return __arm_vldrhq_gather_offset_z_u16 (__base, __offset, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_shifted_offset (int16_t const * __base, uint32x4_t __offset)
-{
- return __arm_vldrhq_gather_shifted_offset_s32 (__base, __offset);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_shifted_offset (int16_t const * __base, uint16x8_t __offset)
-{
- return __arm_vldrhq_gather_shifted_offset_s16 (__base, __offset);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_shifted_offset (uint16_t const * __base, uint32x4_t __offset)
-{
- return __arm_vldrhq_gather_shifted_offset_u32 (__base, __offset);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_shifted_offset (uint16_t const * __base, uint16x8_t __offset)
-{
- return __arm_vldrhq_gather_shifted_offset_u16 (__base, __offset);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_shifted_offset_z (int16_t const * __base, uint32x4_t __offset, mve_pred16_t __p)
-{
- return __arm_vldrhq_gather_shifted_offset_z_s32 (__base, __offset, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_shifted_offset_z (int16_t const * __base, uint16x8_t __offset, mve_pred16_t __p)
-{
- return __arm_vldrhq_gather_shifted_offset_z_s16 (__base, __offset, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_shifted_offset_z (uint16_t const * __base, uint32x4_t __offset, mve_pred16_t __p)
-{
- return __arm_vldrhq_gather_shifted_offset_z_u32 (__base, __offset, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_shifted_offset_z (uint16_t const * __base, uint16x8_t __offset, mve_pred16_t __p)
-{
- return __arm_vldrhq_gather_shifted_offset_z_u16 (__base, __offset, __p);
-}
-
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrdq_gather_offset (int64_t const * __base, uint64x2_t __offset)
-{
- return __arm_vldrdq_gather_offset_s64 (__base, __offset);
-}
-
-__extension__ extern __inline uint64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrdq_gather_offset (uint64_t const * __base, uint64x2_t __offset)
-{
- return __arm_vldrdq_gather_offset_u64 (__base, __offset);
-}
-
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrdq_gather_offset_z (int64_t const * __base, uint64x2_t __offset, mve_pred16_t __p)
-{
- return __arm_vldrdq_gather_offset_z_s64 (__base, __offset, __p);
-}
-
-__extension__ extern __inline uint64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrdq_gather_offset_z (uint64_t const * __base, uint64x2_t __offset, mve_pred16_t __p)
-{
- return __arm_vldrdq_gather_offset_z_u64 (__base, __offset, __p);
-}
-
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrdq_gather_shifted_offset (int64_t const * __base, uint64x2_t __offset)
-{
- return __arm_vldrdq_gather_shifted_offset_s64 (__base, __offset);
-}
-
-__extension__ extern __inline uint64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrdq_gather_shifted_offset (uint64_t const * __base, uint64x2_t __offset)
-{
- return __arm_vldrdq_gather_shifted_offset_u64 (__base, __offset);
-}
-
-__extension__ extern __inline int64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrdq_gather_shifted_offset_z (int64_t const * __base, uint64x2_t __offset, mve_pred16_t __p)
-{
- return __arm_vldrdq_gather_shifted_offset_z_s64 (__base, __offset, __p);
-}
-
-__extension__ extern __inline uint64x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrdq_gather_shifted_offset_z (uint64_t const * __base, uint64x2_t __offset, mve_pred16_t __p)
-{
- return __arm_vldrdq_gather_shifted_offset_z_u64 (__base, __offset, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_gather_offset (int32_t const * __base, uint32x4_t __offset)
-{
- return __arm_vldrwq_gather_offset_s32 (__base, __offset);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_gather_offset (uint32_t const * __base, uint32x4_t __offset)
-{
- return __arm_vldrwq_gather_offset_u32 (__base, __offset);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_gather_offset_z (int32_t const * __base, uint32x4_t __offset, mve_pred16_t __p)
-{
- return __arm_vldrwq_gather_offset_z_s32 (__base, __offset, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_gather_offset_z (uint32_t const * __base, uint32x4_t __offset, mve_pred16_t __p)
-{
- return __arm_vldrwq_gather_offset_z_u32 (__base, __offset, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_gather_shifted_offset (int32_t const * __base, uint32x4_t __offset)
-{
- return __arm_vldrwq_gather_shifted_offset_s32 (__base, __offset);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_gather_shifted_offset (uint32_t const * __base, uint32x4_t __offset)
-{
- return __arm_vldrwq_gather_shifted_offset_u32 (__base, __offset);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_gather_shifted_offset_z (int32_t const * __base, uint32x4_t __offset, mve_pred16_t __p)
-{
- return __arm_vldrwq_gather_shifted_offset_z_s32 (__base, __offset, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_gather_shifted_offset_z (uint32_t const * __base, uint32x4_t __offset, mve_pred16_t __p)
-{
- return __arm_vldrwq_gather_shifted_offset_z_u32 (__base, __offset, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_scatter_offset (int16_t * __base, uint32x4_t __offset, int32x4_t __value)
-{
- __arm_vstrhq_scatter_offset_s32 (__base, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_scatter_offset (int16_t * __base, uint16x8_t __offset, int16x8_t __value)
-{
- __arm_vstrhq_scatter_offset_s16 (__base, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_scatter_offset (uint16_t * __base, uint32x4_t __offset, uint32x4_t __value)
-{
- __arm_vstrhq_scatter_offset_u32 (__base, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_scatter_offset (uint16_t * __base, uint16x8_t __offset, uint16x8_t __value)
-{
- __arm_vstrhq_scatter_offset_u16 (__base, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_scatter_offset_p (int16_t * __base, uint32x4_t __offset, int32x4_t __value, mve_pred16_t __p)
-{
- __arm_vstrhq_scatter_offset_p_s32 (__base, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_scatter_offset_p (int16_t * __base, uint16x8_t __offset, int16x8_t __value, mve_pred16_t __p)
-{
- __arm_vstrhq_scatter_offset_p_s16 (__base, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_scatter_offset_p (uint16_t * __base, uint32x4_t __offset, uint32x4_t __value, mve_pred16_t __p)
-{
- __arm_vstrhq_scatter_offset_p_u32 (__base, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_scatter_offset_p (uint16_t * __base, uint16x8_t __offset, uint16x8_t __value, mve_pred16_t __p)
-{
- __arm_vstrhq_scatter_offset_p_u16 (__base, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_scatter_shifted_offset (int16_t * __base, uint32x4_t __offset, int32x4_t __value)
-{
- __arm_vstrhq_scatter_shifted_offset_s32 (__base, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_scatter_shifted_offset (int16_t * __base, uint16x8_t __offset, int16x8_t __value)
-{
- __arm_vstrhq_scatter_shifted_offset_s16 (__base, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_scatter_shifted_offset (uint16_t * __base, uint32x4_t __offset, uint32x4_t __value)
-{
- __arm_vstrhq_scatter_shifted_offset_u32 (__base, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_scatter_shifted_offset (uint16_t * __base, uint16x8_t __offset, uint16x8_t __value)
-{
- __arm_vstrhq_scatter_shifted_offset_u16 (__base, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_scatter_shifted_offset_p (int16_t * __base, uint32x4_t __offset, int32x4_t __value, mve_pred16_t __p)
-{
- __arm_vstrhq_scatter_shifted_offset_p_s32 (__base, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_scatter_shifted_offset_p (int16_t * __base, uint16x8_t __offset, int16x8_t __value, mve_pred16_t __p)
-{
- __arm_vstrhq_scatter_shifted_offset_p_s16 (__base, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_scatter_shifted_offset_p (uint16_t * __base, uint32x4_t __offset, uint32x4_t __value, mve_pred16_t __p)
-{
- __arm_vstrhq_scatter_shifted_offset_p_u32 (__base, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_scatter_shifted_offset_p (uint16_t * __base, uint16x8_t __offset, uint16x8_t __value, mve_pred16_t __p)
-{
- __arm_vstrhq_scatter_shifted_offset_p_u16 (__base, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq (int16_t * __addr, int32x4_t __value)
-{
- __arm_vstrhq_s32 (__addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq (int16_t * __addr, int16x8_t __value)
-{
- __arm_vstrhq_s16 (__addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq (uint16_t * __addr, uint32x4_t __value)
-{
- __arm_vstrhq_u32 (__addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq (uint16_t * __addr, uint16x8_t __value)
-{
- __arm_vstrhq_u16 (__addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_p (int16_t * __addr, int32x4_t __value, mve_pred16_t __p)
-{
- __arm_vstrhq_p_s32 (__addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_p (int16_t * __addr, int16x8_t __value, mve_pred16_t __p)
-{
- __arm_vstrhq_p_s16 (__addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_p (uint16_t * __addr, uint32x4_t __value, mve_pred16_t __p)
-{
- __arm_vstrhq_p_u32 (__addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_p (uint16_t * __addr, uint16x8_t __value, mve_pred16_t __p)
-{
- __arm_vstrhq_p_u16 (__addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq (int32_t * __addr, int32x4_t __value)
-{
- __arm_vstrwq_s32 (__addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq (uint32_t * __addr, uint32x4_t __value)
-{
- __arm_vstrwq_u32 (__addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_p (int32_t * __addr, int32x4_t __value, mve_pred16_t __p)
-{
- __arm_vstrwq_p_s32 (__addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_p (uint32_t * __addr, uint32x4_t __value, mve_pred16_t __p)
-{
- __arm_vstrwq_p_u32 (__addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_base_p (uint64x2_t __addr, const int __offset, int64x2_t __value, mve_pred16_t __p)
-{
- __arm_vstrdq_scatter_base_p_s64 (__addr, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_base_p (uint64x2_t __addr, const int __offset, uint64x2_t __value, mve_pred16_t __p)
-{
- __arm_vstrdq_scatter_base_p_u64 (__addr, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_base (uint64x2_t __addr, const int __offset, int64x2_t __value)
-{
- __arm_vstrdq_scatter_base_s64 (__addr, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_base (uint64x2_t __addr, const int __offset, uint64x2_t __value)
-{
- __arm_vstrdq_scatter_base_u64 (__addr, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_offset_p (int64_t * __base, uint64x2_t __offset, int64x2_t __value, mve_pred16_t __p)
-{
- __arm_vstrdq_scatter_offset_p_s64 (__base, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_offset_p (uint64_t * __base, uint64x2_t __offset, uint64x2_t __value, mve_pred16_t __p)
-{
- __arm_vstrdq_scatter_offset_p_u64 (__base, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_offset (int64_t * __base, uint64x2_t __offset, int64x2_t __value)
-{
- __arm_vstrdq_scatter_offset_s64 (__base, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_offset (uint64_t * __base, uint64x2_t __offset, uint64x2_t __value)
-{
- __arm_vstrdq_scatter_offset_u64 (__base, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_shifted_offset_p (int64_t * __base, uint64x2_t __offset, int64x2_t __value, mve_pred16_t __p)
-{
- __arm_vstrdq_scatter_shifted_offset_p_s64 (__base, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_shifted_offset_p (uint64_t * __base, uint64x2_t __offset, uint64x2_t __value, mve_pred16_t __p)
-{
- __arm_vstrdq_scatter_shifted_offset_p_u64 (__base, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_shifted_offset (int64_t * __base, uint64x2_t __offset, int64x2_t __value)
-{
- __arm_vstrdq_scatter_shifted_offset_s64 (__base, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_shifted_offset (uint64_t * __base, uint64x2_t __offset, uint64x2_t __value)
-{
- __arm_vstrdq_scatter_shifted_offset_u64 (__base, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_offset_p (int32_t * __base, uint32x4_t __offset, int32x4_t __value, mve_pred16_t __p)
-{
- __arm_vstrwq_scatter_offset_p_s32 (__base, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_offset_p (uint32_t * __base, uint32x4_t __offset, uint32x4_t __value, mve_pred16_t __p)
-{
- __arm_vstrwq_scatter_offset_p_u32 (__base, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_offset (int32_t * __base, uint32x4_t __offset, int32x4_t __value)
-{
- __arm_vstrwq_scatter_offset_s32 (__base, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_offset (uint32_t * __base, uint32x4_t __offset, uint32x4_t __value)
-{
- __arm_vstrwq_scatter_offset_u32 (__base, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_shifted_offset_p (int32_t * __base, uint32x4_t __offset, int32x4_t __value, mve_pred16_t __p)
-{
- __arm_vstrwq_scatter_shifted_offset_p_s32 (__base, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_shifted_offset_p (uint32_t * __base, uint32x4_t __offset, uint32x4_t __value, mve_pred16_t __p)
-{
- __arm_vstrwq_scatter_shifted_offset_p_u32 (__base, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_shifted_offset (int32_t * __base, uint32x4_t __offset, int32x4_t __value)
-{
- __arm_vstrwq_scatter_shifted_offset_s32 (__base, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_shifted_offset (uint32_t * __base, uint32x4_t __offset, uint32x4_t __value)
-{
- __arm_vstrwq_scatter_shifted_offset_u32 (__base, __offset, __value);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vddupq_m (uint8x16_t __inactive, uint32_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vddupq_m_n_u8 (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vddupq_m (uint32x4_t __inactive, uint32_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vddupq_m_n_u32 (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vddupq_m (uint16x8_t __inactive, uint32_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vddupq_m_n_u16 (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vddupq_m (uint8x16_t __inactive, uint32_t * __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vddupq_m_wb_u8 (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vddupq_m (uint16x8_t __inactive, uint32_t * __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vddupq_m_wb_u16 (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vddupq_m (uint32x4_t __inactive, uint32_t * __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vddupq_m_wb_u32 (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vddupq_u8 (uint32_t __a, const int __imm)
-{
- return __arm_vddupq_n_u8 (__a, __imm);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vddupq_u32 (uint32_t __a, const int __imm)
-{
- return __arm_vddupq_n_u32 (__a, __imm);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vddupq_u16 (uint32_t __a, const int __imm)
-{
- return __arm_vddupq_n_u16 (__a, __imm);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdwdupq_m (uint8x16_t __inactive, uint32_t __a, uint32_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vdwdupq_m_n_u8 (__inactive, __a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdwdupq_m (uint32x4_t __inactive, uint32_t __a, uint32_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vdwdupq_m_n_u32 (__inactive, __a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdwdupq_m (uint16x8_t __inactive, uint32_t __a, uint32_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vdwdupq_m_n_u16 (__inactive, __a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdwdupq_m (uint8x16_t __inactive, uint32_t * __a, uint32_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vdwdupq_m_wb_u8 (__inactive, __a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdwdupq_m (uint32x4_t __inactive, uint32_t * __a, uint32_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vdwdupq_m_wb_u32 (__inactive, __a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdwdupq_m (uint16x8_t __inactive, uint32_t * __a, uint32_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vdwdupq_m_wb_u16 (__inactive, __a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdwdupq_u8 (uint32_t __a, uint32_t __b, const int __imm)
-{
- return __arm_vdwdupq_n_u8 (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdwdupq_u32 (uint32_t __a, uint32_t __b, const int __imm)
-{
- return __arm_vdwdupq_n_u32 (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdwdupq_u16 (uint32_t __a, uint32_t __b, const int __imm)
-{
- return __arm_vdwdupq_n_u16 (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdwdupq_u8 (uint32_t * __a, uint32_t __b, const int __imm)
-{
- return __arm_vdwdupq_wb_u8 (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdwdupq_u32 (uint32_t * __a, uint32_t __b, const int __imm)
-{
- return __arm_vdwdupq_wb_u32 (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdwdupq_u16 (uint32_t * __a, uint32_t __b, const int __imm)
-{
- return __arm_vdwdupq_wb_u16 (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vidupq_m (uint8x16_t __inactive, uint32_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vidupq_m_n_u8 (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vidupq_m (uint32x4_t __inactive, uint32_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vidupq_m_n_u32 (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vidupq_m (uint16x8_t __inactive, uint32_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vidupq_m_n_u16 (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vidupq_u8 (uint32_t __a, const int __imm)
-{
- return __arm_vidupq_n_u8 (__a, __imm);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vidupq_m (uint8x16_t __inactive, uint32_t * __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vidupq_m_wb_u8 (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vidupq_m (uint16x8_t __inactive, uint32_t * __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vidupq_m_wb_u16 (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vidupq_m (uint32x4_t __inactive, uint32_t * __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vidupq_m_wb_u32 (__inactive, __a, __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vidupq_u32 (uint32_t __a, const int __imm)
-{
- return __arm_vidupq_n_u32 (__a, __imm);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vidupq_u16 (uint32_t __a, const int __imm)
-{
- return __arm_vidupq_n_u16 (__a, __imm);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vidupq_u8 (uint32_t * __a, const int __imm)
-{
- return __arm_vidupq_wb_u8 (__a, __imm);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vidupq_u16 (uint32_t * __a, const int __imm)
-{
- return __arm_vidupq_wb_u16 (__a, __imm);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vidupq_u32 (uint32_t * __a, const int __imm)
-{
- return __arm_vidupq_wb_u32 (__a, __imm);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vddupq_u8 (uint32_t * __a, const int __imm)
-{
- return __arm_vddupq_wb_u8 (__a, __imm);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vddupq_u16 (uint32_t * __a, const int __imm)
-{
- return __arm_vddupq_wb_u16 (__a, __imm);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vddupq_u32 (uint32_t * __a, const int __imm)
-{
- return __arm_vddupq_wb_u32 (__a, __imm);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_viwdupq_m (uint8x16_t __inactive, uint32_t __a, uint32_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_viwdupq_m_n_u8 (__inactive, __a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_viwdupq_m (uint32x4_t __inactive, uint32_t __a, uint32_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_viwdupq_m_n_u32 (__inactive, __a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_viwdupq_m (uint16x8_t __inactive, uint32_t __a, uint32_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_viwdupq_m_n_u16 (__inactive, __a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_viwdupq_m (uint8x16_t __inactive, uint32_t * __a, uint32_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_viwdupq_m_wb_u8 (__inactive, __a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_viwdupq_m (uint32x4_t __inactive, uint32_t * __a, uint32_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_viwdupq_m_wb_u32 (__inactive, __a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_viwdupq_m (uint16x8_t __inactive, uint32_t * __a, uint32_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_viwdupq_m_wb_u16 (__inactive, __a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_viwdupq_u8 (uint32_t __a, uint32_t __b, const int __imm)
-{
- return __arm_viwdupq_n_u8 (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_viwdupq_u32 (uint32_t __a, uint32_t __b, const int __imm)
-{
- return __arm_viwdupq_n_u32 (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_viwdupq_u16 (uint32_t __a, uint32_t __b, const int __imm)
-{
- return __arm_viwdupq_n_u16 (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_viwdupq_u8 (uint32_t * __a, uint32_t __b, const int __imm)
-{
- return __arm_viwdupq_wb_u8 (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_viwdupq_u32 (uint32_t * __a, uint32_t __b, const int __imm)
-{
- return __arm_viwdupq_wb_u32 (__a, __b, __imm);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_viwdupq_u16 (uint32_t * __a, uint32_t __b, const int __imm)
-{
- return __arm_viwdupq_wb_u16 (__a, __b, __imm);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_base_wb (uint64x2_t * __addr, const int __offset, int64x2_t __value)
-{
- __arm_vstrdq_scatter_base_wb_s64 (__addr, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_base_wb (uint64x2_t * __addr, const int __offset, uint64x2_t __value)
-{
- __arm_vstrdq_scatter_base_wb_u64 (__addr, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_base_wb_p (uint64x2_t * __addr, const int __offset, int64x2_t __value, mve_pred16_t __p)
-{
- __arm_vstrdq_scatter_base_wb_p_s64 (__addr, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrdq_scatter_base_wb_p (uint64x2_t * __addr, const int __offset, uint64x2_t __value, mve_pred16_t __p)
-{
- __arm_vstrdq_scatter_base_wb_p_u64 (__addr, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_wb_p (uint32x4_t * __addr, const int __offset, int32x4_t __value, mve_pred16_t __p)
-{
- __arm_vstrwq_scatter_base_wb_p_s32 (__addr, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_wb_p (uint32x4_t * __addr, const int __offset, uint32x4_t __value, mve_pred16_t __p)
-{
- __arm_vstrwq_scatter_base_wb_p_u32 (__addr, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_wb (uint32x4_t * __addr, const int __offset, int32x4_t __value)
-{
- __arm_vstrwq_scatter_base_wb_s32 (__addr, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_wb (uint32x4_t * __addr, const int __offset, uint32x4_t __value)
-{
- __arm_vstrwq_scatter_base_wb_u32 (__addr, __offset, __value);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vddupq_x_u8 (uint32_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vddupq_x_n_u8 (__a, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vddupq_x_u16 (uint32_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vddupq_x_n_u16 (__a, __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vddupq_x_u32 (uint32_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vddupq_x_n_u32 (__a, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vddupq_x_u8 (uint32_t *__a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vddupq_x_wb_u8 (__a, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vddupq_x_u16 (uint32_t *__a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vddupq_x_wb_u16 (__a, __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vddupq_x_u32 (uint32_t *__a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vddupq_x_wb_u32 (__a, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdwdupq_x_u8 (uint32_t __a, uint32_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vdwdupq_x_n_u8 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdwdupq_x_u16 (uint32_t __a, uint32_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vdwdupq_x_n_u16 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdwdupq_x_u32 (uint32_t __a, uint32_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vdwdupq_x_n_u32 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdwdupq_x_u8 (uint32_t *__a, uint32_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vdwdupq_x_wb_u8 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdwdupq_x_u16 (uint32_t *__a, uint32_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vdwdupq_x_wb_u16 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vdwdupq_x_u32 (uint32_t *__a, uint32_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vdwdupq_x_wb_u32 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vidupq_x_u8 (uint32_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vidupq_x_n_u8 (__a, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vidupq_x_u16 (uint32_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vidupq_x_n_u16 (__a, __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vidupq_x_u32 (uint32_t __a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vidupq_x_n_u32 (__a, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vidupq_x_u8 (uint32_t *__a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vidupq_x_wb_u8 (__a, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vidupq_x_u16 (uint32_t *__a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vidupq_x_wb_u16 (__a, __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vidupq_x_u32 (uint32_t *__a, const int __imm, mve_pred16_t __p)
-{
- return __arm_vidupq_x_wb_u32 (__a, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_viwdupq_x_u8 (uint32_t __a, uint32_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_viwdupq_x_n_u8 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_viwdupq_x_u16 (uint32_t __a, uint32_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_viwdupq_x_n_u16 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_viwdupq_x_u32 (uint32_t __a, uint32_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_viwdupq_x_n_u32 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_viwdupq_x_u8 (uint32_t *__a, uint32_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_viwdupq_x_wb_u8 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_viwdupq_x_u16 (uint32_t *__a, uint32_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_viwdupq_x_wb_u16 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_viwdupq_x_u32 (uint32_t *__a, uint32_t __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_viwdupq_x_wb_u32 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_x (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vbicq_x_s8 (__a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_x (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vbicq_x_s16 (__a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_x (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vbicq_x_s32 (__a, __b, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_x (uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vbicq_x_u8 (__a, __b, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_x (uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vbicq_x_u16 (__a, __b, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_x (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vbicq_x_u32 (__a, __b, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq_x (int8x16_t __a, int8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vornq_x_s8 (__a, __b, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq_x (int16x8_t __a, int16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vornq_x_s16 (__a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq_x (int32x4_t __a, int32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vornq_x_s32 (__a, __b, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq_x (uint8x16_t __a, uint8x16_t __b, mve_pred16_t __p)
-{
- return __arm_vornq_x_u8 (__a, __b, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq_x (uint16x8_t __a, uint16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vornq_x_u16 (__a, __b, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq_x (uint32x4_t __a, uint32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vornq_x_u32 (__a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vadciq (int32x4_t __a, int32x4_t __b, unsigned * __carry_out)
-{
- return __arm_vadciq_s32 (__a, __b, __carry_out);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vadciq (uint32x4_t __a, uint32x4_t __b, unsigned * __carry_out)
-{
- return __arm_vadciq_u32 (__a, __b, __carry_out);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vadciq_m (int32x4_t __inactive, int32x4_t __a, int32x4_t __b, unsigned * __carry_out, mve_pred16_t __p)
-{
- return __arm_vadciq_m_s32 (__inactive, __a, __b, __carry_out, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vadciq_m (uint32x4_t __inactive, uint32x4_t __a, uint32x4_t __b, unsigned * __carry_out, mve_pred16_t __p)
-{
- return __arm_vadciq_m_u32 (__inactive, __a, __b, __carry_out, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vadcq (int32x4_t __a, int32x4_t __b, unsigned * __carry)
-{
- return __arm_vadcq_s32 (__a, __b, __carry);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vadcq (uint32x4_t __a, uint32x4_t __b, unsigned * __carry)
-{
- return __arm_vadcq_u32 (__a, __b, __carry);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vadcq_m (int32x4_t __inactive, int32x4_t __a, int32x4_t __b, unsigned * __carry, mve_pred16_t __p)
-{
- return __arm_vadcq_m_s32 (__inactive, __a, __b, __carry, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vadcq_m (uint32x4_t __inactive, uint32x4_t __a, uint32x4_t __b, unsigned * __carry, mve_pred16_t __p)
-{
- return __arm_vadcq_m_u32 (__inactive, __a, __b, __carry, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsbciq (int32x4_t __a, int32x4_t __b, unsigned * __carry_out)
-{
- return __arm_vsbciq_s32 (__a, __b, __carry_out);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsbciq (uint32x4_t __a, uint32x4_t __b, unsigned * __carry_out)
-{
- return __arm_vsbciq_u32 (__a, __b, __carry_out);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsbciq_m (int32x4_t __inactive, int32x4_t __a, int32x4_t __b, unsigned * __carry_out, mve_pred16_t __p)
-{
- return __arm_vsbciq_m_s32 (__inactive, __a, __b, __carry_out, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsbciq_m (uint32x4_t __inactive, uint32x4_t __a, uint32x4_t __b, unsigned * __carry_out, mve_pred16_t __p)
-{
- return __arm_vsbciq_m_u32 (__inactive, __a, __b, __carry_out, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsbcq (int32x4_t __a, int32x4_t __b, unsigned * __carry)
-{
- return __arm_vsbcq_s32 (__a, __b, __carry);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsbcq (uint32x4_t __a, uint32x4_t __b, unsigned * __carry)
-{
- return __arm_vsbcq_u32 (__a, __b, __carry);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsbcq_m (int32x4_t __inactive, int32x4_t __a, int32x4_t __b, unsigned * __carry, mve_pred16_t __p)
-{
- return __arm_vsbcq_m_s32 (__inactive, __a, __b, __carry, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vsbcq_m (uint32x4_t __inactive, uint32x4_t __a, uint32x4_t __b, unsigned * __carry, mve_pred16_t __p)
-{
- return __arm_vsbcq_m_u32 (__inactive, __a, __b, __carry, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst1q_p (uint8_t * __addr, uint8x16_t __value, mve_pred16_t __p)
-{
- __arm_vst1q_p_u8 (__addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst1q_p (int8_t * __addr, int8x16_t __value, mve_pred16_t __p)
-{
- __arm_vst1q_p_s8 (__addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst2q (int8_t * __addr, int8x16x2_t __value)
-{
- __arm_vst2q_s8 (__addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst2q (uint8_t * __addr, uint8x16x2_t __value)
-{
- __arm_vst2q_u8 (__addr, __value);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld1q_z (uint8_t const *__base, mve_pred16_t __p)
-{
- return __arm_vld1q_z_u8 (__base, __p);
-}
-
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld1q_z (int8_t const *__base, mve_pred16_t __p)
-{
- return __arm_vld1q_z_s8 (__base, __p);
-}
-
-__extension__ extern __inline int8x16x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld2q (int8_t const * __addr)
-{
- return __arm_vld2q_s8 (__addr);
-}
-
-__extension__ extern __inline uint8x16x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld2q (uint8_t const * __addr)
-{
- return __arm_vld2q_u8 (__addr);
-}
-
-__extension__ extern __inline int8x16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld4q (int8_t const * __addr)
-{
- return __arm_vld4q_s8 (__addr);
-}
-
-__extension__ extern __inline uint8x16x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld4q (uint8_t const * __addr)
-{
- return __arm_vld4q_u8 (__addr);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst1q_p (uint16_t * __addr, uint16x8_t __value, mve_pred16_t __p)
-{
- __arm_vst1q_p_u16 (__addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst1q_p (int16_t * __addr, int16x8_t __value, mve_pred16_t __p)
-{
- __arm_vst1q_p_s16 (__addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst2q (int16_t * __addr, int16x8x2_t __value)
-{
- __arm_vst2q_s16 (__addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst2q (uint16_t * __addr, uint16x8x2_t __value)
-{
- __arm_vst2q_u16 (__addr, __value);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld1q_z (uint16_t const *__base, mve_pred16_t __p)
-{
- return __arm_vld1q_z_u16 (__base, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld1q_z (int16_t const *__base, mve_pred16_t __p)
-{
- return __arm_vld1q_z_s16 (__base, __p);
-}
-
-__extension__ extern __inline int16x8x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld2q (int16_t const * __addr)
-{
- return __arm_vld2q_s16 (__addr);
-}
-
-__extension__ extern __inline uint16x8x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld2q (uint16_t const * __addr)
-{
- return __arm_vld2q_u16 (__addr);
-}
-
-__extension__ extern __inline int16x8x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld4q (int16_t const * __addr)
-{
- return __arm_vld4q_s16 (__addr);
-}
-
-__extension__ extern __inline uint16x8x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld4q (uint16_t const * __addr)
-{
- return __arm_vld4q_u16 (__addr);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst1q_p (uint32_t * __addr, uint32x4_t __value, mve_pred16_t __p)
-{
- __arm_vst1q_p_u32 (__addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst1q_p (int32_t * __addr, int32x4_t __value, mve_pred16_t __p)
-{
- __arm_vst1q_p_s32 (__addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst2q (int32_t * __addr, int32x4x2_t __value)
-{
- __arm_vst2q_s32 (__addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst2q (uint32_t * __addr, uint32x4x2_t __value)
-{
- __arm_vst2q_u32 (__addr, __value);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld1q_z (uint32_t const *__base, mve_pred16_t __p)
-{
- return __arm_vld1q_z_u32 (__base, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld1q_z (int32_t const *__base, mve_pred16_t __p)
-{
- return __arm_vld1q_z_s32 (__base, __p);
-}
-
-__extension__ extern __inline int32x4x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld2q (int32_t const * __addr)
-{
- return __arm_vld2q_s32 (__addr);
-}
-
-__extension__ extern __inline uint32x4x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld2q (uint32_t const * __addr)
-{
- return __arm_vld2q_u32 (__addr);
-}
-
-__extension__ extern __inline int32x4x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld4q (int32_t const * __addr)
-{
- return __arm_vld4q_s32 (__addr);
-}
-
-__extension__ extern __inline uint32x4x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld4q (uint32_t const * __addr)
-{
- return __arm_vld4q_u32 (__addr);
-}
 
 __extension__ extern __inline int16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
@@ -7159,764 +517,8 @@ __arm_vgetq_lane (uint64x2_t __a, const int __idx)
  return __arm_vgetq_lane_u64 (__a, __idx);
 }
 
-__extension__ extern __inline int8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_m (int8x16_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vshlcq_m_s8 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint8x16_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_m (uint8x16_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vshlcq_m_u8 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_m (int16x8_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vshlcq_m_s16 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_m (uint16x8_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vshlcq_m_u16 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_m (int32x4_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vshlcq_m_s32 (__a, __b, __imm, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vshlcq_m (uint32x4_t __a, uint32_t * __b, const int __imm, mve_pred16_t __p)
-{
- return __arm_vshlcq_m_u32 (__a, __b, __imm, __p);
-}
-
 #if (__ARM_FEATURE_MVE & 2)  /* MVE Floating point.  */
 
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst4q (float16_t * __addr, float16x8x4_t __value)
-{
- __arm_vst4q_f16 (__addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst4q (float32_t * __addr, float32x4x4_t __value)
-{
- __arm_vst4q_f32 (__addr, __value);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvttq_f32 (float16x8_t __a)
-{
- return __arm_vcvttq_f32_f16 (__a);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtbq_f32 (float16x8_t __a)
-{
- return __arm_vcvtbq_f32_f16 (__a);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq (int16x8_t __a)
-{
- return __arm_vcvtq_f16_s16 (__a);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq (int32x4_t __a)
-{
- return __arm_vcvtq_f32_s32 (__a);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq (uint16x8_t __a)
-{
- return __arm_vcvtq_f16_u16 (__a);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq (uint32x4_t __a)
-{
- return __arm_vcvtq_f32_u32 (__a);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_n (int16x8_t __a, const int __imm6)
-{
- return __arm_vcvtq_n_f16_s16 (__a, __imm6);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_n (int32x4_t __a, const int __imm6)
-{
- return __arm_vcvtq_n_f32_s32 (__a, __imm6);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_n (uint16x8_t __a, const int __imm6)
-{
- return __arm_vcvtq_n_f16_u16 (__a, __imm6);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_n (uint32x4_t __a, const int __imm6)
-{
- return __arm_vcvtq_n_f32_u32 (__a, __imm6);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq (float16x8_t __a, float16x8_t __b)
-{
- return __arm_vornq_f16 (__a, __b);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq (float16x8_t __a, float16x8_t __b)
-{
- return __arm_vbicq_f16 (__a, __b);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq (float32x4_t __a, float32x4_t __b)
-{
- return __arm_vornq_f32 (__a, __b);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq (float32x4_t __a, float32x4_t __b)
-{
- return __arm_vbicq_f32 (__a, __b);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtaq_m (int16x8_t __inactive, float16x8_t __a, mve_pred16_t __p)
-{
- return __arm_vcvtaq_m_s16_f16 (__inactive, __a, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtaq_m (uint16x8_t __inactive, float16x8_t __a, mve_pred16_t __p)
-{
- return __arm_vcvtaq_m_u16_f16 (__inactive, __a, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtaq_m (int32x4_t __inactive, float32x4_t __a, mve_pred16_t __p)
-{
- return __arm_vcvtaq_m_s32_f32 (__inactive, __a, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtaq_m (uint32x4_t __inactive, float32x4_t __a, mve_pred16_t __p)
-{
- return __arm_vcvtaq_m_u32_f32 (__inactive, __a, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_m (float16x8_t __inactive, int16x8_t __a, mve_pred16_t __p)
-{
- return __arm_vcvtq_m_f16_s16 (__inactive, __a, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_m (float16x8_t __inactive, uint16x8_t __a, mve_pred16_t __p)
-{
- return __arm_vcvtq_m_f16_u16 (__inactive, __a, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_m (float32x4_t __inactive, int32x4_t __a, mve_pred16_t __p)
-{
- return __arm_vcvtq_m_f32_s32 (__inactive, __a, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_m (float32x4_t __inactive, uint32x4_t __a, mve_pred16_t __p)
-{
- return __arm_vcvtq_m_f32_u32 (__inactive, __a, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtbq_m (float16x8_t __a, float32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vcvtbq_m_f16_f32 (__a, __b, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtbq_m (float32x4_t __inactive, float16x8_t __a, mve_pred16_t __p)
-{
- return __arm_vcvtbq_m_f32_f16 (__inactive, __a, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvttq_m (float16x8_t __a, float32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vcvttq_m_f16_f32 (__a, __b, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvttq_m (float32x4_t __inactive, float16x8_t __a, mve_pred16_t __p)
-{
- return __arm_vcvttq_m_f32_f16 (__inactive, __a, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtmq_m (int16x8_t __inactive, float16x8_t __a, mve_pred16_t __p)
-{
- return __arm_vcvtmq_m_s16_f16 (__inactive, __a, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtnq_m (int16x8_t __inactive, float16x8_t __a, mve_pred16_t __p)
-{
- return __arm_vcvtnq_m_s16_f16 (__inactive, __a, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtpq_m (int16x8_t __inactive, float16x8_t __a, mve_pred16_t __p)
-{
- return __arm_vcvtpq_m_s16_f16 (__inactive, __a, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_m (int16x8_t __inactive, float16x8_t __a, mve_pred16_t __p)
-{
- return __arm_vcvtq_m_s16_f16 (__inactive, __a, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtmq_m (uint16x8_t __inactive, float16x8_t __a, mve_pred16_t __p)
-{
- return __arm_vcvtmq_m_u16_f16 (__inactive, __a, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtnq_m (uint16x8_t __inactive, float16x8_t __a, mve_pred16_t __p)
-{
- return __arm_vcvtnq_m_u16_f16 (__inactive, __a, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtpq_m (uint16x8_t __inactive, float16x8_t __a, mve_pred16_t __p)
-{
- return __arm_vcvtpq_m_u16_f16 (__inactive, __a, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_m (uint16x8_t __inactive, float16x8_t __a, mve_pred16_t __p)
-{
- return __arm_vcvtq_m_u16_f16 (__inactive, __a, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtmq_m (int32x4_t __inactive, float32x4_t __a, mve_pred16_t __p)
-{
- return __arm_vcvtmq_m_s32_f32 (__inactive, __a, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtnq_m (int32x4_t __inactive, float32x4_t __a, mve_pred16_t __p)
-{
- return __arm_vcvtnq_m_s32_f32 (__inactive, __a, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtpq_m (int32x4_t __inactive, float32x4_t __a, mve_pred16_t __p)
-{
- return __arm_vcvtpq_m_s32_f32 (__inactive, __a, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_m (int32x4_t __inactive, float32x4_t __a, mve_pred16_t __p)
-{
- return __arm_vcvtq_m_s32_f32 (__inactive, __a, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtmq_m (uint32x4_t __inactive, float32x4_t __a, mve_pred16_t __p)
-{
- return __arm_vcvtmq_m_u32_f32 (__inactive, __a, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtnq_m (uint32x4_t __inactive, float32x4_t __a, mve_pred16_t __p)
-{
- return __arm_vcvtnq_m_u32_f32 (__inactive, __a, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtpq_m (uint32x4_t __inactive, float32x4_t __a, mve_pred16_t __p)
-{
- return __arm_vcvtpq_m_u32_f32 (__inactive, __a, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_m (uint32x4_t __inactive, float32x4_t __a, mve_pred16_t __p)
-{
- return __arm_vcvtq_m_u32_f32 (__inactive, __a, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_m_n (float16x8_t __inactive, uint16x8_t __a, const int __imm6, mve_pred16_t __p)
-{
- return __arm_vcvtq_m_n_f16_u16 (__inactive, __a, __imm6, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_m_n (float16x8_t __inactive, int16x8_t __a, const int __imm6, mve_pred16_t __p)
-{
- return __arm_vcvtq_m_n_f16_s16 (__inactive, __a, __imm6, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_m_n (float32x4_t __inactive, uint32x4_t __a, const int __imm6, mve_pred16_t __p)
-{
- return __arm_vcvtq_m_n_f32_u32 (__inactive, __a, __imm6, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_m_n (float32x4_t __inactive, int32x4_t __a, const int __imm6, mve_pred16_t __p)
-{
- return __arm_vcvtq_m_n_f32_s32 (__inactive, __a, __imm6, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_m (float32x4_t __inactive, float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vbicq_m_f32 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_m (float16x8_t __inactive, float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vbicq_m_f16 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline int32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_m_n (int32x4_t __inactive, float32x4_t __a, const int __imm6, mve_pred16_t __p)
-{
- return __arm_vcvtq_m_n_s32_f32 (__inactive, __a, __imm6, __p);
-}
-
-__extension__ extern __inline int16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_m_n (int16x8_t __inactive, float16x8_t __a, const int __imm6, mve_pred16_t __p)
-{
- return __arm_vcvtq_m_n_s16_f16 (__inactive, __a, __imm6, __p);
-}
-
-__extension__ extern __inline uint32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_m_n (uint32x4_t __inactive, float32x4_t __a, const int __imm6, mve_pred16_t __p)
-{
- return __arm_vcvtq_m_n_u32_f32 (__inactive, __a, __imm6, __p);
-}
-
-__extension__ extern __inline uint16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_m_n (uint16x8_t __inactive, float16x8_t __a, const int __imm6, mve_pred16_t __p)
-{
- return __arm_vcvtq_m_n_u16_f16 (__inactive, __a, __imm6, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq_m (float32x4_t __inactive, float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vornq_m_f32 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq_m (float16x8_t __inactive, float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vornq_m_f16 (__inactive, __a, __b, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_offset (float16_t const * __base, uint16x8_t __offset)
-{
- return __arm_vldrhq_gather_offset_f16 (__base, __offset);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_offset_z (float16_t const * __base, uint16x8_t __offset, mve_pred16_t __p)
-{
- return __arm_vldrhq_gather_offset_z_f16 (__base, __offset, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_shifted_offset (float16_t const * __base, uint16x8_t __offset)
-{
- return __arm_vldrhq_gather_shifted_offset_f16 (__base, __offset);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrhq_gather_shifted_offset_z (float16_t const * __base, uint16x8_t __offset, mve_pred16_t __p)
-{
- return __arm_vldrhq_gather_shifted_offset_z_f16 (__base, __offset, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_gather_offset (float32_t const * __base, uint32x4_t __offset)
-{
- return __arm_vldrwq_gather_offset_f32 (__base, __offset);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_gather_offset_z (float32_t const * __base, uint32x4_t __offset, mve_pred16_t __p)
-{
- return __arm_vldrwq_gather_offset_z_f32 (__base, __offset, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_gather_shifted_offset (float32_t const * __base, uint32x4_t __offset)
-{
- return __arm_vldrwq_gather_shifted_offset_f32 (__base, __offset);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vldrwq_gather_shifted_offset_z (float32_t const * __base, uint32x4_t __offset, mve_pred16_t __p)
-{
- return __arm_vldrwq_gather_shifted_offset_z_f32 (__base, __offset, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_p (float32_t * __addr, float32x4_t __value, mve_pred16_t __p)
-{
- __arm_vstrwq_p_f32 (__addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq (float32_t * __addr, float32x4_t __value)
-{
- __arm_vstrwq_f32 (__addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq (float16_t * __addr, float16x8_t __value)
-{
- __arm_vstrhq_f16 (__addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_p (float16_t * __addr, float16x8_t __value, mve_pred16_t __p)
-{
- __arm_vstrhq_p_f16 (__addr, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_scatter_offset (float16_t * __base, uint16x8_t __offset, float16x8_t __value)
-{
- __arm_vstrhq_scatter_offset_f16 (__base, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_scatter_offset_p (float16_t * __base, uint16x8_t __offset, float16x8_t __value, mve_pred16_t __p)
-{
- __arm_vstrhq_scatter_offset_p_f16 (__base, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_scatter_shifted_offset (float16_t * __base, uint16x8_t __offset, float16x8_t __value)
-{
- __arm_vstrhq_scatter_shifted_offset_f16 (__base, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrhq_scatter_shifted_offset_p (float16_t * __base, uint16x8_t __offset, float16x8_t __value, mve_pred16_t __p)
-{
- __arm_vstrhq_scatter_shifted_offset_p_f16 (__base, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base (uint32x4_t __addr, const int __offset, float32x4_t __value)
-{
- __arm_vstrwq_scatter_base_f32 (__addr, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_p (uint32x4_t __addr, const int __offset, float32x4_t __value, mve_pred16_t __p)
-{
- __arm_vstrwq_scatter_base_p_f32 (__addr, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_offset (float32_t * __base, uint32x4_t __offset, float32x4_t __value)
-{
- __arm_vstrwq_scatter_offset_f32 (__base, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_offset_p (float32_t * __base, uint32x4_t __offset, float32x4_t __value, mve_pred16_t __p)
-{
- __arm_vstrwq_scatter_offset_p_f32 (__base, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_shifted_offset (float32_t * __base, uint32x4_t __offset, float32x4_t __value)
-{
- __arm_vstrwq_scatter_shifted_offset_f32 (__base, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_shifted_offset_p (float32_t * __base, uint32x4_t __offset, float32x4_t __value, mve_pred16_t __p)
-{
- __arm_vstrwq_scatter_shifted_offset_p_f32 (__base, __offset, __value, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_wb (uint32x4_t * __addr, const int __offset, float32x4_t __value)
-{
- __arm_vstrwq_scatter_base_wb_f32 (__addr, __offset, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vstrwq_scatter_base_wb_p (uint32x4_t * __addr, const int __offset, float32x4_t __value, mve_pred16_t __p)
-{
- __arm_vstrwq_scatter_base_wb_p_f32 (__addr, __offset, __value, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_x (uint16x8_t __a, mve_pred16_t __p)
-{
- return __arm_vcvtq_x_f16_u16 (__a, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_x (int16x8_t __a, mve_pred16_t __p)
-{
- return __arm_vcvtq_x_f16_s16 (__a, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_x (int32x4_t __a, mve_pred16_t __p)
-{
- return __arm_vcvtq_x_f32_s32 (__a, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_x (uint32x4_t __a, mve_pred16_t __p)
-{
- return __arm_vcvtq_x_f32_u32 (__a, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_x_n (int16x8_t __a, const int __imm6, mve_pred16_t __p)
-{
- return __arm_vcvtq_x_n_f16_s16 (__a, __imm6, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_x_n (uint16x8_t __a, const int __imm6, mve_pred16_t __p)
-{
- return __arm_vcvtq_x_n_f16_u16 (__a, __imm6, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_x_n (int32x4_t __a, const int __imm6, mve_pred16_t __p)
-{
- return __arm_vcvtq_x_n_f32_s32 (__a, __imm6, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vcvtq_x_n (uint32x4_t __a, const int __imm6, mve_pred16_t __p)
-{
- return __arm_vcvtq_x_n_f32_u32 (__a, __imm6, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_x (float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vbicq_x_f16 (__a, __b, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vbicq_x (float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vbicq_x_f32 (__a, __b, __p);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq_x (float16x8_t __a, float16x8_t __b, mve_pred16_t __p)
-{
- return __arm_vornq_x_f16 (__a, __b, __p);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vornq_x (float32x4_t __a, float32x4_t __b, mve_pred16_t __p)
-{
- return __arm_vornq_x_f32 (__a, __b, __p);
-}
-
-__extension__ extern __inline float16x8x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld4q (float16_t const * __addr)
-{
- return __arm_vld4q_f16 (__addr);
-}
-
-__extension__ extern __inline float16x8x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld2q (float16_t const * __addr)
-{
- return __arm_vld2q_f16 (__addr);
-}
-
-__extension__ extern __inline float16x8_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld1q_z (float16_t const *__base, mve_pred16_t __p)
-{
- return __arm_vld1q_z_f16 (__base, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst2q (float16_t * __addr, float16x8x2_t __value)
-{
- __arm_vst2q_f16 (__addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst1q_p (float16_t * __addr, float16x8_t __value, mve_pred16_t __p)
-{
- __arm_vst1q_p_f16 (__addr, __value, __p);
-}
-
-__extension__ extern __inline float32x4x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld4q (float32_t const * __addr)
-{
- return __arm_vld4q_f32 (__addr);
-}
-
-__extension__ extern __inline float32x4x2_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld2q (float32_t const * __addr)
-{
- return __arm_vld2q_f32 (__addr);
-}
-
-__extension__ extern __inline float32x4_t
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vld1q_z (float32_t const *__base, mve_pred16_t __p)
-{
- return __arm_vld1q_z_f32 (__base, __p);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst2q (float32_t * __addr, float32x4x2_t __value)
-{
- __arm_vst2q_f32 (__addr, __value);
-}
-
-__extension__ extern __inline void
-__attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
-__arm_vst1q_p (float32_t * __addr, float32x4_t __value, mve_pred16_t __p)
-{
- __arm_vst1q_p_f32 (__addr, __value, __p);
-}
-
 __extension__ extern __inline float16x8_t
 __attribute__ ((__always_inline__, __gnu_inline__, __artificial__))
 __arm_vsetq_lane (float16_t __a, float16x8_t __b, const int __idx)
@@ -8256,443 +858,6 @@ extern void *__ARM_undef;
 
 #if (__ARM_FEATURE_MVE & 2) /* MVE Floating point.  */
 
-#define __arm_vst4q(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16x4_t]: __arm_vst4q_s8 (__ARM_mve_coerce_s8_ptr(__p0, int8_t *), __ARM_mve_coerce(__p1, int8x16x4_t)), \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_int16x8x4_t]: __arm_vst4q_s16 (__ARM_mve_coerce_s16_ptr(__p0, int16_t *), __ARM_mve_coerce(__p1, int16x8x4_t)), \
-  int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4x4_t]: __arm_vst4q_s32 (__ARM_mve_coerce_s32_ptr(__p0, int32_t *), __ARM_mve_coerce(__p1, int32x4x4_t)), \
-  int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16x4_t]: __arm_vst4q_u8 (__ARM_mve_coerce_u8_ptr(__p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16x4_t)), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8x4_t]: __arm_vst4q_u16 (__ARM_mve_coerce_u16_ptr(__p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8x4_t)), \
-  int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4x4_t]: __arm_vst4q_u32 (__ARM_mve_coerce_u32_ptr(__p0, uint32_t *), __ARM_mve_coerce(__p1, uint32x4x4_t)), \
-  int (*)[__ARM_mve_type_float16_t_ptr][__ARM_mve_type_float16x8x4_t]: __arm_vst4q_f16 (__ARM_mve_coerce_f16_ptr(__p0, float16_t *), __ARM_mve_coerce(__p1, float16x8x4_t)), \
-  int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4x4_t]: __arm_vst4q_f32 (__ARM_mve_coerce_f32_ptr(__p0, float32_t *), __ARM_mve_coerce(__p1, float32x4x4_t)));})
-
-#define __arm_vcvtbq_f32(p0) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_float16x8_t]: __arm_vcvtbq_f32_f16 (__ARM_mve_coerce(__p0, float16x8_t)));})
-
-#define __arm_vcvttq_f32(p0) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_float16x8_t]: __arm_vcvttq_f32_f16 (__ARM_mve_coerce(__p0, float16x8_t)));})
-
-#define __arm_vcvtq(p0) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vcvtq_f16_s16 (__ARM_mve_coerce(__p0, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vcvtq_f32_s32 (__ARM_mve_coerce(__p0, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vcvtq_f16_u16 (__ARM_mve_coerce(__p0, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vcvtq_f32_u32 (__ARM_mve_coerce(__p0, uint32x4_t)));})
-
-#define __arm_vcvtq_n(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vcvtq_n_f16_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vcvtq_n_f32_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vcvtq_n_f16_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vcvtq_n_f32_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1));})
-
-#define __arm_vbicq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vbicq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce_i_scalar (__p1, int)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vbicq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce_i_scalar (__p1, int)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vbicq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce_i_scalar (__p1, int)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vbicq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce_i_scalar (__p1, int)), \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vbicq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vbicq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vbicq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vbicq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vbicq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vbicq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vbicq_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t)), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vbicq_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t)));})
-
-#define __arm_vornq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vornq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vornq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vornq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vornq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vornq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vornq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vornq_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t)), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vornq_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t)));})
-
-#define __arm_vbicq_m_n(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vbicq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1, p2), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vbicq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1, p2), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vbicq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vbicq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2));})
-
-#define __arm_vshlcq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlcq_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1, p2), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vshlcq_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1, p2), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vshlcq_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1, p2), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshlcq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1, p2), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlcq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlcq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2));})
-
-#define __arm_vcvtaq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcvtaq_m_s16_f16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcvtaq_m_s32_f32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcvtaq_m_u16_f16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcvtaq_m_u32_f32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2));})
-
-#define __arm_vcvtq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcvtq_m_f16_s16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcvtq_m_f32_s32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vcvtq_m_f16_u16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vcvtq_m_f32_u32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), p2), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcvtq_m_s16_f16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcvtq_m_s32_f32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcvtq_m_u16_f16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcvtq_m_u32_f32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2));})
-
-#define __arm_vcvtq_m_n(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcvtq_m_n_s16_f16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2, p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcvtq_m_n_s32_f32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2, p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcvtq_m_n_u16_f16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2, p3), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcvtq_m_n_u32_f32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2, p3), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_int16x8_t]: __arm_vcvtq_m_n_f16_s16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, int16x8_t), p2, p3), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_int32x4_t]: __arm_vcvtq_m_n_f32_s32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2, p3), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vcvtq_m_n_f16_u16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), p2, p3), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vcvtq_m_n_f32_u32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), p2, p3));})
-
-#define __arm_vcvtbq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float16x8_t]: __arm_vcvtbq_m_f32_f16 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float32x4_t]: __arm_vcvtbq_m_f16_f32 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float32x4_t), p2));})
-
-#define __arm_vcvttq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float16x8_t]: __arm_vcvttq_m_f32_f16 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float32x4_t]: __arm_vcvttq_m_f16_f32 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float32x4_t), p2));})
-
-#define __arm_vcvtmq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcvtmq_m_s16_f16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcvtmq_m_s32_f32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcvtmq_m_u16_f16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcvtmq_m_u32_f32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2));})
-
-#define __arm_vcvtnq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcvtnq_m_s16_f16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcvtnq_m_s32_f32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcvtnq_m_u16_f16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcvtnq_m_u32_f32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2));})
-
-#define __arm_vcvtpq_m(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcvtpq_m_s16_f16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcvtpq_m_s32_f32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_float16x8_t]: __arm_vcvtpq_m_u16_f16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, float16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_float32x4_t]: __arm_vcvtpq_m_u32_f32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, float32x4_t), p2));})
-
-#define __arm_vbicq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vbicq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vbicq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vbicq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vbicq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vbicq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vbicq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vbicq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vbicq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));})
-
-#define __arm_vornq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vornq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vornq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vornq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vornq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vornq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vornq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vornq_m_f16 (__ARM_mve_coerce(__p0, float16x8_t), __ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vornq_m_f32 (__ARM_mve_coerce(__p0, float32x4_t), __ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));})
-
-#define __arm_vld1q_z(p0,p1) ( \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
-  int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_z_s8 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), p1), \
-  int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld1q_z_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), p1), \
-  int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld1q_z_s32 (__ARM_mve_coerce_s32_ptr(p0, int32_t *), p1), \
-  int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld1q_z_u8 (__ARM_mve_coerce_u8_ptr(p0, uint8_t *), p1), \
-  int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld1q_z_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), p1), \
-  int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld1q_z_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *), p1), \
-  int (*)[__ARM_mve_type_float16_t_ptr]: __arm_vld1q_z_f16 (__ARM_mve_coerce_f16_ptr(p0, float16_t *), p1), \
-  int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vld1q_z_f32 (__ARM_mve_coerce_f32_ptr(p0, float32_t *), p1)))
-
-#define __arm_vld2q(p0) ( \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
-  int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld2q_s8 (__ARM_mve_coerce_s8_ptr(p0, int8_t *)), \
-  int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld2q_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *)), \
-  int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld2q_s32 (__ARM_mve_coerce_s32_ptr(p0, int32_t *)), \
-  int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld2q_u8 (__ARM_mve_coerce_u8_ptr(p0, uint8_t *)), \
-  int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld2q_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *)), \
-  int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld2q_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *)), \
-  int (*)[__ARM_mve_type_float16_t_ptr]: __arm_vld2q_f16 (__ARM_mve_coerce_f16_ptr(p0, float16_t *)), \
-  int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vld2q_f32 (__ARM_mve_coerce_f32_ptr(p0, float32_t *))))
-
-#define __arm_vld4q(p0) ( \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
-  int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld4q_s8 (__ARM_mve_coerce_s8_ptr(p0, int8_t *)), \
-  int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld4q_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *)), \
-  int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld4q_s32 (__ARM_mve_coerce_s32_ptr(p0, int32_t *)), \
-  int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld4q_u8 (__ARM_mve_coerce_u8_ptr(p0, uint8_t *)), \
-  int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld4q_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *)), \
-  int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld4q_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *)), \
-  int (*)[__ARM_mve_type_float16_t_ptr]: __arm_vld4q_f16 (__ARM_mve_coerce_f16_ptr(p0, float16_t *)), \
-  int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vld4q_f32 (__ARM_mve_coerce_f32_ptr(p0, float32_t *))))
-
-#define __arm_vldrhq_gather_offset(p0,p1) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_offset_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrhq_gather_offset_s32 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint32x4_t)), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_offset_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrhq_gather_offset_u32 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t)), \
-  int (*)[__ARM_mve_type_float16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_offset_f16 (__ARM_mve_coerce_f16_ptr(p0, float16_t *), __ARM_mve_coerce(__p1, uint16x8_t)));})
-
-#define __arm_vldrhq_gather_offset_z(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_offset_z_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrhq_gather_offset_z_s32 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_offset_z_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrhq_gather_offset_z_u32 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2), \
-  int (*)[__ARM_mve_type_float16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_offset_z_f16 (__ARM_mve_coerce_f16_ptr(p0, float16_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2));})
-
-#define __arm_vldrhq_gather_shifted_offset(p0,p1) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_shifted_offset_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrhq_gather_shifted_offset_s32 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint32x4_t)), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_shifted_offset_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrhq_gather_shifted_offset_u32 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t)), \
-  int (*)[__ARM_mve_type_float16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_shifted_offset_f16 (__ARM_mve_coerce_f16_ptr(p0, float16_t *), __ARM_mve_coerce(__p1, uint16x8_t)));})
-
-#define __arm_vldrhq_gather_shifted_offset_z(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_shifted_offset_z_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrhq_gather_shifted_offset_z_s32 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_shifted_offset_z_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrhq_gather_shifted_offset_z_u32 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2), \
-  int (*)[__ARM_mve_type_float16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_shifted_offset_z_f16 (__ARM_mve_coerce_f16_ptr(p0, float16_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2));})
-
-#define __arm_vldrwq_gather_offset(p0,p1) ( \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
-  int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vldrwq_gather_offset_s32 (__ARM_mve_coerce_s32_ptr(p0, int32_t *), p1), \
-  int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vldrwq_gather_offset_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *), p1), \
-  int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vldrwq_gather_offset_f32 (__ARM_mve_coerce_f32_ptr(p0, float32_t *), p1)))
-
-#define __arm_vldrwq_gather_offset_z(p0,p1,p2) ( \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
-  int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vldrwq_gather_offset_z_s32 (__ARM_mve_coerce_s32_ptr(p0, int32_t *), p1, p2), \
-  int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vldrwq_gather_offset_z_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *), p1, p2), \
-  int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vldrwq_gather_offset_z_f32 (__ARM_mve_coerce_f32_ptr(p0, float32_t *), p1, p2)))
-
-#define __arm_vldrwq_gather_shifted_offset(p0,p1) ( \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
-  int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vldrwq_gather_shifted_offset_s32 (__ARM_mve_coerce_s32_ptr(p0, int32_t *), p1), \
-  int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vldrwq_gather_shifted_offset_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *), p1), \
-  int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vldrwq_gather_shifted_offset_f32 (__ARM_mve_coerce_f32_ptr(p0, float32_t *), p1)))
-
-#define __arm_vldrwq_gather_shifted_offset_z(p0,p1,p2) ( \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
-  int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vldrwq_gather_shifted_offset_z_s32 (__ARM_mve_coerce_s32_ptr(p0, int32_t *), p1, p2), \
-  int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vldrwq_gather_shifted_offset_z_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *), p1, p2), \
-  int (*)[__ARM_mve_type_float32_t_ptr]: __arm_vldrwq_gather_shifted_offset_z_f32 (__ARM_mve_coerce_f32_ptr(p0, float32_t *), p1, p2)))
-
-#define __arm_vst1q_p(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16_t]: __arm_vst1q_p_s8 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_int16x8_t]: __arm_vst1q_p_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vst1q_p_s32 (__ARM_mve_coerce_s32_ptr(p0, int32_t *), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vst1q_p_u8 (__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vst1q_p_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vst1q_p_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2), \
-  int (*)[__ARM_mve_type_float16_t_ptr][__ARM_mve_type_float16x8_t]: __arm_vst1q_p_f16 (__ARM_mve_coerce_f16_ptr(p0, float16_t *), __ARM_mve_coerce(__p1, float16x8_t), p2), \
-  int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4_t]: __arm_vst1q_p_f32 (__ARM_mve_coerce_f32_ptr(p0, float32_t *), __ARM_mve_coerce(__p1, float32x4_t), p2));})
-
-#define __arm_vst2q(p0,p1) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16x2_t]: __arm_vst2q_s8 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, int8x16x2_t)), \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_int16x8x2_t]: __arm_vst2q_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, int16x8x2_t)), \
-  int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4x2_t]: __arm_vst2q_s32 (__ARM_mve_coerce_s32_ptr(p0, int32_t *), __ARM_mve_coerce(__p1, int32x4x2_t)), \
-  int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16x2_t]: __arm_vst2q_u8 (__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16x2_t)), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8x2_t]: __arm_vst2q_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8x2_t)), \
-  int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4x2_t]: __arm_vst2q_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *), __ARM_mve_coerce(__p1, uint32x4x2_t)), \
-  int (*)[__ARM_mve_type_float16_t_ptr][__ARM_mve_type_float16x8x2_t]: __arm_vst2q_f16 (__ARM_mve_coerce_f16_ptr(p0, float16_t *), __ARM_mve_coerce(__p1, float16x8x2_t)), \
-  int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4x2_t]: __arm_vst2q_f32 (__ARM_mve_coerce_f32_ptr(p0, float32_t *), __ARM_mve_coerce(__p1, float32x4x2_t)));})
-
-#define __arm_vstrhq(p0,p1) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_int16x8_t]: __arm_vstrhq_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrhq_s32 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vstrhq_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrhq_u32 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t)), \
-  int (*)[__ARM_mve_type_float16_t_ptr][__ARM_mve_type_float16x8_t]: __arm_vstrhq_f16 (__ARM_mve_coerce_f16_ptr(p0, float16_t *), __ARM_mve_coerce(__p1, float16x8_t)));})
-
-#define __arm_vstrhq_p(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_int16x8_t]: __arm_vstrhq_p_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrhq_p_s32 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vstrhq_p_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrhq_p_u32 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2), \
-  int (*)[__ARM_mve_type_float16_t_ptr][__ARM_mve_type_float16x8_t]: __arm_vstrhq_p_f16 (__ARM_mve_coerce_f16_ptr(p0, float16_t *), __ARM_mve_coerce(__p1, float16x8_t), p2));})
-
-#define __arm_vstrhq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vstrhq_scatter_offset_p_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vstrhq_scatter_offset_p_s32 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vstrhq_scatter_offset_p_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vstrhq_scatter_offset_p_u32 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3), \
-  int (*)[__ARM_mve_type_float16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_float16x8_t]: __arm_vstrhq_scatter_offset_p_f16 (__ARM_mve_coerce_f16_ptr(p0, float16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3));})
-
-#define __arm_vstrhq_scatter_offset(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vstrhq_scatter_offset_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vstrhq_scatter_offset_s32 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vstrhq_scatter_offset_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vstrhq_scatter_offset_u32 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t)), \
-  int (*)[__ARM_mve_type_float16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_float16x8_t]: __arm_vstrhq_scatter_offset_f16 (__ARM_mve_coerce_f16_ptr(p0, float16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, float16x8_t)));})
-
-#define __arm_vstrhq_scatter_shifted_offset_p(p0,p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vstrhq_scatter_shifted_offset_p_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vstrhq_scatter_shifted_offset_p_s32 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vstrhq_scatter_shifted_offset_p_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vstrhq_scatter_shifted_offset_p_u32 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3), \
-  int (*)[__ARM_mve_type_float16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_float16x8_t]: __arm_vstrhq_scatter_shifted_offset_p_f16 (__ARM_mve_coerce_f16_ptr(p0, float16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3));})
-
-#define __arm_vstrhq_scatter_shifted_offset(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vstrhq_scatter_shifted_offset_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vstrhq_scatter_shifted_offset_s32 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vstrhq_scatter_shifted_offset_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vstrhq_scatter_shifted_offset_u32 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t)), \
-  int (*)[__ARM_mve_type_float16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_float16x8_t]: __arm_vstrhq_scatter_shifted_offset_f16 (__ARM_mve_coerce_f16_ptr(p0, float16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, float16x8_t)));})
-
-#define __arm_vstrwq_p(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrwq_p_s32 (__ARM_mve_coerce_s32_ptr(p0, int32_t *), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrwq_p_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2), \
-  int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4_t]: __arm_vstrwq_p_f32 (__ARM_mve_coerce_f32_ptr(p0, float32_t *), __ARM_mve_coerce(__p1, float32x4_t), p2));})
-
-#define __arm_vstrwq(p0,p1) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrwq_s32 (__ARM_mve_coerce_s32_ptr(p0, int32_t *), __ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrwq_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *), __ARM_mve_coerce(__p1, uint32x4_t)), \
-  int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4_t]: __arm_vstrwq_f32 (__ARM_mve_coerce_f32_ptr(p0, float32_t *), __ARM_mve_coerce(__p1, float32x4_t)));})
-
-#define __arm_vstrhq_scatter_offset(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vstrhq_scatter_offset_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vstrhq_scatter_offset_s32 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vstrhq_scatter_offset_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vstrhq_scatter_offset_u32 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t)), \
-  int (*)[__ARM_mve_type_float16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_float16x8_t]: __arm_vstrhq_scatter_offset_f16 (__ARM_mve_coerce_f16_ptr(p0, float16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, float16x8_t)));})
-
-#define __arm_vstrhq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vstrhq_scatter_offset_p_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vstrhq_scatter_offset_p_s32 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vstrhq_scatter_offset_p_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vstrhq_scatter_offset_p_u32 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3), \
-  int (*)[__ARM_mve_type_float16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_float16x8_t]: __arm_vstrhq_scatter_offset_p_f16 (__ARM_mve_coerce_f16_ptr(p0, float16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3));})
-
-#define __arm_vstrhq_scatter_shifted_offset(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vstrhq_scatter_shifted_offset_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vstrhq_scatter_shifted_offset_s32 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vstrhq_scatter_shifted_offset_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vstrhq_scatter_shifted_offset_u32 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t)), \
-  int (*)[__ARM_mve_type_float16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_float16x8_t]: __arm_vstrhq_scatter_shifted_offset_f16 (__ARM_mve_coerce_f16_ptr(p0, float16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, float16x8_t)));})
-
-#define __arm_vstrhq_scatter_shifted_offset_p(p0,p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vstrhq_scatter_shifted_offset_p_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vstrhq_scatter_shifted_offset_p_s32 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vstrhq_scatter_shifted_offset_p_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vstrhq_scatter_shifted_offset_p_u32 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3), \
-  int (*)[__ARM_mve_type_float16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_float16x8_t]: __arm_vstrhq_scatter_shifted_offset_p_f16 (__ARM_mve_coerce_f16_ptr(p0, float16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3));})
-
-#define __arm_vstrwq_scatter_base(p0,p1,p2) ({ __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_s32 (p0, p1, __ARM_mve_coerce(__p2, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_base_u32 (p0, p1, __ARM_mve_coerce(__p2, uint32x4_t)), \
-  int (*)[__ARM_mve_type_float32x4_t]: __arm_vstrwq_scatter_base_f32 (p0, p1, __ARM_mve_coerce(__p2, float32x4_t)));})
-
-#define __arm_vstrwq_scatter_base_p(p0,p1,p2,p3) ({ __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_p_s32(p0, p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_base_p_u32(p0, p1, __ARM_mve_coerce(__p2, uint32x4_t), p3), \
-  int (*)[__ARM_mve_type_float32x4_t]: __arm_vstrwq_scatter_base_p_f32(p0, p1, __ARM_mve_coerce(__p2, float32x4_t), p3));})
-
-#define __arm_vstrwq_scatter_offset(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_offset_s32 (__ARM_mve_coerce_s32_ptr(__p0, int32_t *), p1, __ARM_mve_coerce(__p2, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_offset_u32 (__ARM_mve_coerce_u32_ptr(__p0, uint32_t *), p1, __ARM_mve_coerce(__p2, uint32x4_t)), \
-  int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4_t]: __arm_vstrwq_scatter_offset_f32 (__ARM_mve_coerce_f32_ptr(__p0, float32_t *), p1, __ARM_mve_coerce(__p2, float32x4_t)));})
-
-#define __arm_vstrwq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_offset_p_s32 (__ARM_mve_coerce_s32_ptr(__p0, int32_t *), p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_offset_p_u32 (__ARM_mve_coerce_u32_ptr(__p0, uint32_t *), p1, __ARM_mve_coerce(__p2, uint32x4_t), p3), \
-  int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4_t]: __arm_vstrwq_scatter_offset_p_f32 (__ARM_mve_coerce_f32_ptr(__p0, float32_t *), p1, __ARM_mve_coerce(__p2, float32x4_t), p3));})
-
-#define __arm_vstrwq_scatter_shifted_offset(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_shifted_offset_s32 (__ARM_mve_coerce_s32_ptr(p0, int32_t *), __p1, __ARM_mve_coerce(__p2, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_shifted_offset_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *), __p1, __ARM_mve_coerce(__p2, uint32x4_t)), \
-  int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4_t]: __arm_vstrwq_scatter_shifted_offset_f32 (__ARM_mve_coerce_f32_ptr(p0, float32_t *), __p1, __ARM_mve_coerce(__p2, float32x4_t)));})
-
-#define __arm_vstrwq_scatter_shifted_offset_p(p0,p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_shifted_offset_p_s32 (__ARM_mve_coerce_s32_ptr(p0, int32_t *), __p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_shifted_offset_p_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *), __p1, __ARM_mve_coerce(__p2, uint32x4_t), p3), \
-  int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4_t]: __arm_vstrwq_scatter_shifted_offset_p_f32 (__ARM_mve_coerce_f32_ptr(p0, float32_t *), __p1, __ARM_mve_coerce(__p2, float32x4_t), p3));})
-
-#define __arm_vstrwq_scatter_shifted_offset_p(p0,p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_shifted_offset_p_s32 (__ARM_mve_coerce_s32_ptr(p0, int32_t *), __p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_shifted_offset_p_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *), __p1, __ARM_mve_coerce(__p2, uint32x4_t), p3), \
-  int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4_t]: __arm_vstrwq_scatter_shifted_offset_p_f32 (__ARM_mve_coerce_f32_ptr(p0, float32_t *), __p1, __ARM_mve_coerce(__p2, float32x4_t), p3));})
-
-#define __arm_vstrwq_scatter_shifted_offset(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_shifted_offset_s32 (__ARM_mve_coerce_s32_ptr(p0, int32_t *), __p1, __ARM_mve_coerce(__p2, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_shifted_offset_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *), __p1, __ARM_mve_coerce(__p2, uint32x4_t)), \
-  int (*)[__ARM_mve_type_float32_t_ptr][__ARM_mve_type_float32x4_t]: __arm_vstrwq_scatter_shifted_offset_f32 (__ARM_mve_coerce_f32_ptr(p0, float32_t *), __p1, __ARM_mve_coerce(__p2, float32x4_t)));})
-
 #define __arm_vuninitializedq(p0) ({ __typeof(p0) __p0 = (p0); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
   int (*)[__ARM_mve_type_int8x16_t]: __arm_vuninitializedq_s8 (), \
@@ -8706,56 +871,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_float16x8_t]: __arm_vuninitializedq_f16 (), \
   int (*)[__ARM_mve_type_float32x4_t]: __arm_vuninitializedq_f32 ());})
 
-#define __arm_vstrwq_scatter_base_wb(p0,p1,p2) ({ __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_wb_s32 (p0, p1, __ARM_mve_coerce(__p2, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_base_wb_u32 (p0, p1, __ARM_mve_coerce(__p2, uint32x4_t)), \
-  int (*)[__ARM_mve_type_float32x4_t]: __arm_vstrwq_scatter_base_wb_f32 (p0, p1, __ARM_mve_coerce(__p2, float32x4_t)));})
-
-#define __arm_vstrwq_scatter_base_wb_p(p0,p1,p2,p3) ({ __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_wb_p_s32 (p0, p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_base_wb_p_u32 (p0, p1, __ARM_mve_coerce(__p2, uint32x4_t), p3), \
-  int (*)[__ARM_mve_type_float32x4_t]: __arm_vstrwq_scatter_base_wb_p_f32 (p0, p1, __ARM_mve_coerce(__p2, float32x4_t), p3));})
-
-#define __arm_vbicq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vbicq_x_s8   (__ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vbicq_x_s16  (__ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vbicq_x_s32  (__ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vbicq_x_u8 (__ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vbicq_x_u16 (__ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vbicq_x_u32 (__ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vbicq_x_f16 (__ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vbicq_x_f32 (__ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));})
-
-#define __arm_vcvtq_x(p1,p2) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vcvtq_x_f16_s16 (__ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vcvtq_x_f32_s32 (__ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vcvtq_x_f16_u16 (__ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vcvtq_x_f32_u32 (__ARM_mve_coerce(__p1, uint32x4_t), p2));})
-
-#define __arm_vcvtq_x_n(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vcvtq_x_n_f16_s16 (__ARM_mve_coerce(__p1, int16x8_t), p2, p3), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vcvtq_x_n_f32_s32 (__ARM_mve_coerce(__p1, int32x4_t), p2, p3), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vcvtq_x_n_f16_u16 (__ARM_mve_coerce(__p1, uint16x8_t), p2, p3), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vcvtq_x_n_f32_u32 (__ARM_mve_coerce(__p1, uint32x4_t), p2, p3));})
-
-#define __arm_vornq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vornq_x_s8 (__ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vornq_x_s16 (__ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vornq_x_s32 (__ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vornq_x_u8 (__ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vornq_x_u16 (__ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vornq_x_u32 (__ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3), \
-  int (*)[__ARM_mve_type_float16x8_t][__ARM_mve_type_float16x8_t]: __arm_vornq_x_f16 (__ARM_mve_coerce(__p1, float16x8_t), __ARM_mve_coerce(__p2, float16x8_t), p3), \
-  int (*)[__ARM_mve_type_float32x4_t][__ARM_mve_type_float32x4_t]: __arm_vornq_x_f32 (__ARM_mve_coerce(__p1, float32x4_t), __ARM_mve_coerce(__p2, float32x4_t), p3));})
-
 #define __arm_vgetq_lane(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
   int (*)[__ARM_mve_type_int8x16_t]: __arm_vgetq_lane_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1), \
@@ -8785,295 +900,6 @@ extern void *__ARM_undef;
 
 #else /* MVE Integer.  */
 
-#define __arm_vstrwq_scatter_base_wb(p0,p1,p2) ({ __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_wb_s32 (p0, p1, __ARM_mve_coerce(__p2, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_base_wb_u32 (p0, p1, __ARM_mve_coerce(__p2, uint32x4_t)));})
-
-#define __arm_vstrwq_scatter_base_wb_p(p0,p1,p2,p3) ({ __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_wb_p_s32 (p0, p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_base_wb_p_u32 (p0, p1, __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-
-#define __arm_vst4q(p0,p1) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16x4_t]: __arm_vst4q_s8 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, int8x16x4_t)), \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_int16x8x4_t]: __arm_vst4q_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, int16x8x4_t)), \
-  int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4x4_t]: __arm_vst4q_s32 (__ARM_mve_coerce_s32_ptr(p0, int32_t *), __ARM_mve_coerce(__p1, int32x4x4_t)), \
-  int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16x4_t]: __arm_vst4q_u8 (__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16x4_t)), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8x4_t]: __arm_vst4q_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8x4_t)), \
-  int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4x4_t]: __arm_vst4q_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *), __ARM_mve_coerce(__p1, uint32x4x4_t)));})
-
-#define __arm_vornq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vornq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vornq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vornq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vornq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vornq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vornq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)));})
-
-#define __arm_vbicq(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int_n]: __arm_vbicq_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce_i_scalar (__p1, int)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int_n]: __arm_vbicq_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce_i_scalar (__p1, int)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vbicq_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce_i_scalar (__p1, int)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vbicq_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce_i_scalar (__p1, int)), \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vbicq_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vbicq_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vbicq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vbicq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t)), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vbicq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vbicq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t)));})
-
-#define __arm_vshlcq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlcq_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1, p2), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vshlcq_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1, p2), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vshlcq_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1, p2), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshlcq_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1, p2), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlcq_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlcq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2));})
-
-#define __arm_vbicq_m_n(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vbicq_m_n_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1, p2), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vbicq_m_n_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1, p2), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vbicq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vbicq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2));})
-
-#define __arm_vbicq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vbicq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vbicq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vbicq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vbicq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vbicq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vbicq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-
-#define __arm_vornq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vornq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), __ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vornq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), __ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vornq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vornq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vornq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vornq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-
-#define __arm_vstrwq_scatter_base(p0,p1,p2) ({ __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_s32(p0, p1, __ARM_mve_coerce(__p2, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_base_u32(p0, p1, __ARM_mve_coerce(__p2, uint32x4_t)));})
-
-#define __arm_vldrbq_gather_offset(p0,p1) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_s8 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, uint8x16_t)), \
-  int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_s16 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \
-  int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_s32 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, uint32x4_t)), \
-  int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_u8 (__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t)), \
-  int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_u16 (__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_u32 (__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t)));})
-
-#define __arm_vstrwq_scatter_base_p(p0,p1,p2,p3) ({ __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_base_p_s32 (p0, p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_base_p_u32 (p0, p1, __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-
-#define __arm_vldrhq_gather_offset(p0,p1) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_offset_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrhq_gather_offset_s32 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint32x4_t)), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_offset_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrhq_gather_offset_u32 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t)));})
-
-#define __arm_vldrhq_gather_offset_z(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_offset_z_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrhq_gather_offset_z_s32 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_offset_z_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrhq_gather_offset_z_u32 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
-
-#define __arm_vldrhq_gather_shifted_offset(p0,p1) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_shifted_offset_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrhq_gather_shifted_offset_s32 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint32x4_t)), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_shifted_offset_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrhq_gather_shifted_offset_u32 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t)));})
-
-#define __arm_vldrhq_gather_shifted_offset_z(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_shifted_offset_z_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrhq_gather_shifted_offset_z_s32 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrhq_gather_shifted_offset_z_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrhq_gather_shifted_offset_z_u32 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
-
-#define __arm_vldrwq_gather_offset(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vldrwq_gather_offset_s32 (__ARM_mve_coerce_s32_ptr(__p0, int32_t *), p1), \
-  int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vldrwq_gather_offset_u32 (__ARM_mve_coerce_u32_ptr(__p0, uint32_t *), p1));})
-
-#define __arm_vldrwq_gather_offset_z(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vldrwq_gather_offset_z_s32 (__ARM_mve_coerce_s32_ptr(__p0, int32_t *), p1, p2), \
-  int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vldrwq_gather_offset_z_u32 (__ARM_mve_coerce_u32_ptr(__p0, uint32_t *), p1, p2));})
-
-#define __arm_vldrwq_gather_shifted_offset(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vldrwq_gather_shifted_offset_s32 (__ARM_mve_coerce_s32_ptr(__p0, int32_t *), p1), \
-  int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vldrwq_gather_shifted_offset_u32 (__ARM_mve_coerce_u32_ptr(__p0, uint32_t *), p1));})
-
-#define __arm_vldrwq_gather_shifted_offset_z(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vldrwq_gather_shifted_offset_z_s32 (__ARM_mve_coerce_s32_ptr(__p0, int32_t *), p1, p2), \
-  int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vldrwq_gather_shifted_offset_z_u32 (__ARM_mve_coerce_u32_ptr(__p0, uint32_t *), p1, p2));})
-
-#define __arm_vst1q_p(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16_t]: __arm_vst1q_p_s8 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_int16x8_t]: __arm_vst1q_p_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vst1q_p_s32 (__ARM_mve_coerce_s32_ptr(p0, int32_t *), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vst1q_p_u8 (__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vst1q_p_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vst1q_p_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
-
-#define __arm_vst2q(p0,p1) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16x2_t]: __arm_vst2q_s8 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, int8x16x2_t)), \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_int16x8x2_t]: __arm_vst2q_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, int16x8x2_t)), \
-  int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4x2_t]: __arm_vst2q_s32 (__ARM_mve_coerce_s32_ptr(p0, int32_t *), __ARM_mve_coerce(__p1, int32x4x2_t)), \
-  int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16x2_t]: __arm_vst2q_u8 (__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16x2_t)), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8x2_t]: __arm_vst2q_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8x2_t)), \
-  int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4x2_t]: __arm_vst2q_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *), __ARM_mve_coerce(__p1, uint32x4x2_t)));})
-
-#define __arm_vstrhq(p0,p1) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_int16x8_t]: __arm_vstrhq_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrhq_s32 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vstrhq_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrhq_u32 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t)));})
-
-#define __arm_vstrhq_p(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_int16x8_t]: __arm_vstrhq_p_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrhq_p_s32 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vstrhq_p_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrhq_p_u32 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
-
-#define __arm_vstrhq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vstrhq_scatter_offset_p_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vstrhq_scatter_offset_p_s32 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vstrhq_scatter_offset_p_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vstrhq_scatter_offset_p_u32 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-
-#define __arm_vstrhq_scatter_offset(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vstrhq_scatter_offset_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vstrhq_scatter_offset_s32 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vstrhq_scatter_offset_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vstrhq_scatter_offset_u32 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t)));})
-
-#define __arm_vstrhq_scatter_shifted_offset_p(p0,p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vstrhq_scatter_shifted_offset_p_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vstrhq_scatter_shifted_offset_p_s32 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vstrhq_scatter_shifted_offset_p_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vstrhq_scatter_shifted_offset_p_u32 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-
-#define __arm_vstrhq_scatter_shifted_offset(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vstrhq_scatter_shifted_offset_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vstrhq_scatter_shifted_offset_s32 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vstrhq_scatter_shifted_offset_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vstrhq_scatter_shifted_offset_u32 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t)));})
-
-
-#define __arm_vstrwq(p0,p1) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrwq_s32 (__ARM_mve_coerce_s32_ptr(p0, int32_t *), __ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrwq_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *), __ARM_mve_coerce(__p1, uint32x4_t)));})
-
-#define __arm_vstrwq_p(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrwq_p_s32 (__ARM_mve_coerce_s32_ptr(p0, int32_t *), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrwq_p_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
-
-#define __arm_vstrdq_scatter_base_p(p0,p1,p2,p3) ({ __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_base_p_s64 (p0, p1, __ARM_mve_coerce(__p2, int64x2_t), p3), \
-  int (*)[__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_base_p_u64 (p0, p1, __ARM_mve_coerce(__p2, uint64x2_t), p3));})
-
-#define __arm_vstrdq_scatter_base(p0,p1,p2) ({ __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_base_s64 (p0, p1, __ARM_mve_coerce(__p2, int64x2_t)), \
-  int (*)[__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_base_u64 (p0, p1, __ARM_mve_coerce(__p2, uint64x2_t)));})
-
-#define __arm_vstrhq_scatter_offset(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vstrhq_scatter_offset_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vstrhq_scatter_offset_s32 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vstrhq_scatter_offset_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vstrhq_scatter_offset_u32 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t)));})
-
-#define __arm_vstrhq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vstrhq_scatter_offset_p_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vstrhq_scatter_offset_p_s32 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vstrhq_scatter_offset_p_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vstrhq_scatter_offset_p_u32 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-
-#define __arm_vstrhq_scatter_shifted_offset(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vstrhq_scatter_shifted_offset_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vstrhq_scatter_shifted_offset_s32 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vstrhq_scatter_shifted_offset_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vstrhq_scatter_shifted_offset_u32 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t)));})
-
-#define __arm_vstrhq_scatter_shifted_offset_p(p0,p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vstrhq_scatter_shifted_offset_p_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vstrhq_scatter_shifted_offset_p_s32 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vstrhq_scatter_shifted_offset_p_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
-  int (*)[__ARM_mve_type_uint16_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vstrhq_scatter_shifted_offset_p_u32 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-
-#define __arm_vstrwq_scatter_offset(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_offset_s32 (__ARM_mve_coerce_s32_ptr(__p0, int32_t *), p1, __ARM_mve_coerce(__p2, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_offset_u32 (__ARM_mve_coerce_u32_ptr(__p0, uint32_t *), p1, __ARM_mve_coerce(__p2, uint32x4_t)));})
-
-#define __arm_vstrwq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_offset_p_s32 (__ARM_mve_coerce_s32_ptr(__p0, int32_t *), p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_offset_p_u32 (__ARM_mve_coerce_u32_ptr(__p0, uint32_t *), p1, __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-
-#define __arm_vstrwq_scatter_shifted_offset(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_shifted_offset_s32 (__ARM_mve_coerce_s32_ptr(p0, int32_t *), __p1, __ARM_mve_coerce(__p2, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_shifted_offset_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *), __p1, __ARM_mve_coerce(__p2, uint32x4_t)));})
-
-#define __arm_vstrwq_scatter_shifted_offset_p(p0,p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int32_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrwq_scatter_shifted_offset_p_s32 (__ARM_mve_coerce_s32_ptr(p0, int32_t *), __p1, __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint32_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrwq_scatter_shifted_offset_p_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *), __p1, __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-
 #define __arm_vuninitializedq(p0) ({ __typeof(p0) __p0 = (p0); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
   int (*)[__ARM_mve_type_int8x16_t]: __arm_vuninitializedq_s8 (), \
@@ -9085,51 +911,6 @@ extern void *__ARM_undef;
   int (*)[__ARM_mve_type_uint32x4_t]: __arm_vuninitializedq_u32 (), \
   int (*)[__ARM_mve_type_uint64x2_t]: __arm_vuninitializedq_u64 ());})
 
-#define __arm_vornq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vornq_x_s8 (__ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vornq_x_s16 (__ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vornq_x_s32 (__ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vornq_x_u8 (__ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vornq_x_u16 (__ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vornq_x_u32 (__ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-
-#define __arm_vbicq_x(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8x16_t][__ARM_mve_type_int8x16_t]: __arm_vbicq_x_s8   (__ARM_mve_coerce(__p1, int8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
-  int (*)[__ARM_mve_type_int16x8_t][__ARM_mve_type_int16x8_t]: __arm_vbicq_x_s16  (__ARM_mve_coerce(__p1, int16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vbicq_x_s32  (__ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vbicq_x_u8 (__ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vbicq_x_u16 (__ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vbicq_x_u32 (__ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-
-#define __arm_vld1q_z(p0,p1) ( _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
-  int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld1q_z_s8 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), p1), \
-  int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld1q_z_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *), p1), \
-  int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld1q_z_s32 (__ARM_mve_coerce_s32_ptr(p0, int32_t *), p1), \
-  int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld1q_z_u8 (__ARM_mve_coerce_u8_ptr(p0, uint8_t *), p1), \
-  int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld1q_z_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *), p1), \
-  int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld1q_z_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *), p1)))
-
-#define __arm_vld2q(p0) ( _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
-  int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld2q_s8 (__ARM_mve_coerce_s8_ptr(p0, int8_t *)), \
-  int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld2q_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *)), \
-  int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld2q_s32 (__ARM_mve_coerce_s32_ptr(p0, int32_t *)), \
-  int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld2q_u8 (__ARM_mve_coerce_u8_ptr(p0, uint8_t *)), \
-  int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld2q_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *)), \
-  int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld2q_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *))))
-
-
-#define __arm_vld4q(p0) ( _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
-  int (*)[__ARM_mve_type_int8_t_ptr]: __arm_vld4q_s8 (__ARM_mve_coerce_s8_ptr(p0, int8_t *)), \
-  int (*)[__ARM_mve_type_int16_t_ptr]: __arm_vld4q_s16 (__ARM_mve_coerce_s16_ptr(p0, int16_t *)), \
-  int (*)[__ARM_mve_type_int32_t_ptr]: __arm_vld4q_s32 (__ARM_mve_coerce_s32_ptr(p0, int32_t *)), \
-  int (*)[__ARM_mve_type_uint8_t_ptr]: __arm_vld4q_u8 (__ARM_mve_coerce_u8_ptr(p0, uint8_t *)), \
-  int (*)[__ARM_mve_type_uint16_t_ptr]: __arm_vld4q_u16 (__ARM_mve_coerce_u16_ptr(p0, uint16_t *)), \
-  int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vld4q_u32 (__ARM_mve_coerce_u32_ptr(p0, uint32_t *))))
-
 #define __arm_vgetq_lane(p0,p1) ({ __typeof(p0) __p0 = (p0); \
   _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
   int (*)[__ARM_mve_type_int8x16_t]: __arm_vgetq_lane_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1), \
@@ -9155,354 +936,6 @@ extern void *__ARM_undef;
 
 #endif /* MVE Integer.  */
 
-
-
-#define __arm_vdwdupq_x_u8(p1,p2,p3,p4) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int_n]: __arm_vdwdupq_x_n_u8 ((uint32_t) __p1, p2, p3, p4), \
-  int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vdwdupq_x_wb_u8 (__ARM_mve_coerce_u32_ptr(__p1, uint32_t *), p2, p3, p4));})
-
-#define __arm_vdwdupq_x_u16(p1,p2,p3,p4) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int_n]: __arm_vdwdupq_x_n_u16 ((uint32_t) __p1, p2, p3, p4), \
-  int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vdwdupq_x_wb_u16 (__ARM_mve_coerce_u32_ptr(__p1, uint32_t *), p2, p3, p4));})
-
-#define __arm_vdwdupq_x_u32(p1,p2,p3,p4) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int_n]: __arm_vdwdupq_x_n_u32 ((uint32_t) __p1, p2, p3, p4), \
-  int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vdwdupq_x_wb_u32 (__ARM_mve_coerce_u32_ptr(__p1, uint32_t *), p2, p3, p4));})
-
-#define __arm_viwdupq_x_u8(p1,p2,p3,p4) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int_n]: __arm_viwdupq_x_n_u8 ((uint32_t) __p1, p2, p3, p4), \
-  int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_viwdupq_x_wb_u8 (__ARM_mve_coerce_u32_ptr(__p1, uint32_t *), p2, p3, p4));})
-
-#define __arm_viwdupq_x_u16(p1,p2,p3,p4) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int_n]: __arm_viwdupq_x_n_u16 ((uint32_t) __p1, p2, p3, p4), \
-  int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_viwdupq_x_wb_u16 (__ARM_mve_coerce_u32_ptr(__p1, uint32_t *), p2, p3, p4));})
-
-#define __arm_viwdupq_x_u32(p1,p2,p3,p4) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int_n]: __arm_viwdupq_x_n_u32 ((uint32_t) __p1, p2, p3, p4), \
-  int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_viwdupq_x_wb_u32 (__ARM_mve_coerce_u32_ptr(__p1, uint32_t *), p2, p3, p4));})
-
-#define __arm_vidupq_x_u8(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int_n]: __arm_vidupq_x_n_u8 ((uint32_t) __p1, p2, p3), \
-  int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vidupq_x_wb_u8 (__ARM_mve_coerce_u32_ptr(__p1, uint32_t *), p2, p3));})
-
-#define __arm_vddupq_x_u8(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int_n]: __arm_vddupq_x_n_u8 ((uint32_t) __p1, p2, p3), \
-  int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vddupq_x_wb_u8 (__ARM_mve_coerce_u32_ptr(__p1, uint32_t *), p2, p3));})
-
-#define __arm_vidupq_x_u16(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int_n]: __arm_vidupq_x_n_u16 ((uint32_t) __p1, p2, p3), \
-  int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vidupq_x_wb_u16 (__ARM_mve_coerce_u32_ptr(__p1, uint32_t *), p2, p3));})
-
-#define __arm_vddupq_x_u16(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int_n]: __arm_vddupq_x_n_u16 ((uint32_t) __p1, p2, p3), \
-  int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vddupq_x_wb_u16 (__ARM_mve_coerce_u32_ptr(__p1, uint32_t *), p2, p3));})
-
-#define __arm_vidupq_x_u32(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int_n]: __arm_vidupq_x_n_u32 ((uint32_t) __p1, p2, p3), \
-  int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vidupq_x_wb_u32 (__ARM_mve_coerce_u32_ptr(__p1, uint32_t *), p2, p3));})
-
-#define __arm_vddupq_x_u32(p1,p2,p3) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int_n]: __arm_vddupq_x_n_u32 ((uint32_t) __p1, p2, p3), \
-  int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vddupq_x_wb_u32 (__ARM_mve_coerce_u32_ptr(__p1, uint32_t *), p2, p3));})
-
-#define __arm_vadciq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vadciq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vadciq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
-
-#define __arm_vstrdq_scatter_base_wb_p(p0,p1,p2,p3) ({ __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_base_wb_p_s64 (p0, p1, __ARM_mve_coerce(__p2, int64x2_t), p3), \
-  int (*)[__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_base_wb_p_u64 (p0, p1, __ARM_mve_coerce(__p2, uint64x2_t), p3));})
-
-#define __arm_vstrdq_scatter_base_wb(p0,p1,p2) ({ __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_base_wb_s64 (p0, p1, __ARM_mve_coerce(__p2, int64x2_t)), \
-  int (*)[__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_base_wb_u64 (p0, p1, __ARM_mve_coerce(__p2, uint64x2_t)));})
-
-#define __arm_vldrdq_gather_offset(p0,p1) ( _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
-  int (*)[__ARM_mve_type_int64_t_ptr]: __arm_vldrdq_gather_offset_s64 (__ARM_mve_coerce_s64_ptr(p0, int64_t *), p1), \
-  int (*)[__ARM_mve_type_uint64_t_ptr]: __arm_vldrdq_gather_offset_u64 (__ARM_mve_coerce_u64_ptr(p0, uint64_t *), p1)))
-
-#define __arm_vldrdq_gather_offset_z(p0,p1,p2) ( _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
-  int (*)[__ARM_mve_type_int64_t_ptr]: __arm_vldrdq_gather_offset_z_s64 (__ARM_mve_coerce_s64_ptr(p0, int64_t *), p1, p2), \
-  int (*)[__ARM_mve_type_uint64_t_ptr]: __arm_vldrdq_gather_offset_z_u64 (__ARM_mve_coerce_u64_ptr(p0, uint64_t *), p1, p2)))
-
-#define __arm_vldrdq_gather_shifted_offset(p0,p1) ( _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
-  int (*)[__ARM_mve_type_int64_t_ptr]: __arm_vldrdq_gather_shifted_offset_s64 (__ARM_mve_coerce_s64_ptr(p0, int64_t *), p1), \
-  int (*)[__ARM_mve_type_uint64_t_ptr]: __arm_vldrdq_gather_shifted_offset_u64 (__ARM_mve_coerce_u64_ptr(p0, uint64_t *), p1)))
-
-#define __arm_vldrdq_gather_shifted_offset_z(p0,p1,p2) ( _Generic( (int (*)[__ARM_mve_typeid(p0)])0, \
-  int (*)[__ARM_mve_type_int64_t_ptr]: __arm_vldrdq_gather_shifted_offset_z_s64 (__ARM_mve_coerce_s64_ptr(p0, int64_t *), p1, p2), \
-  int (*)[__ARM_mve_type_uint64_t_ptr]: __arm_vldrdq_gather_shifted_offset_z_u64 (__ARM_mve_coerce_u64_ptr(p0, uint64_t *), p1, p2)))
-
-#define __arm_vadciq_m(p0,p1,p2,p3,p4) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vadciq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3, p4), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vadciq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3, p4));})
-
-#define __arm_vadciq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vadciq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vadciq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
-
-#define __arm_vadcq_m(p0,p1,p2,p3,p4) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vadcq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3, p4), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vadcq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3, p4));})
-
-#define __arm_vadcq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vadcq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vadcq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
-
-#define __arm_vsbciq_m(p0,p1,p2,p3,p4) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vsbciq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3, p4), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vsbciq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3, p4));})
-
-#define __arm_vsbciq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vsbciq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vsbciq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
-
-#define __arm_vsbcq_m(p0,p1,p2,p3,p4) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vsbcq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3, p4), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vsbcq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3, p4));})
-
-#define __arm_vsbcq(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int32x4_t][__ARM_mve_type_int32x4_t]: __arm_vsbcq_s32 (__ARM_mve_coerce(__p0, int32x4_t), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vsbcq_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
-
-#define __arm_vldrbq_gather_offset_z(p0,p1,p2) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_z_s8 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
-  int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_z_s16 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_z_s32 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_z_u8 (__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
-  int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_z_u16 (__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_z_u32 (__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
-
-#define __arm_vldrbq_gather_offset(p0,p1) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_s8(__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, uint8x16_t)), \
-  int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_s16(__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \
-  int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_s32(__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, uint32x4_t)), \
-  int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vldrbq_gather_offset_u8(__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t)), \
-  int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vldrbq_gather_offset_u16(__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vldrbq_gather_offset_u32(__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t)));})
-
-#define __arm_vidupq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
- __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
- int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vidupq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), (uint32_t) __p1, p2, p3), \
- int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vidupq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), (uint32_t) __p1, p2, p3), \
- int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vidupq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), (uint32_t) __p1, p2, p3), \
- int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint32_t_ptr]: __arm_vidupq_m_wb_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce_u32_ptr(__p1, uint32_t *), p2, p3), \
- int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32_t_ptr]: __arm_vidupq_m_wb_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce_u32_ptr(__p1, uint32_t *), p2, p3), \
- int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t_ptr]: __arm_vidupq_m_wb_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce_u32_ptr(__p1, uint32_t *), p2, p3));})
-
-#define __arm_vddupq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
- __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
- int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vddupq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), (uint32_t) __p1, p2, p3), \
- int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vddupq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), (uint32_t) __p1, p2, p3), \
- int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vddupq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), (uint32_t) __p1, p2, p3), \
- int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint32_t_ptr]: __arm_vddupq_m_wb_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce_u32_ptr(__p1, uint32_t *), p2, p3), \
- int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32_t_ptr]: __arm_vddupq_m_wb_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce_u32_ptr(__p1, uint32_t *), p2, p3), \
- int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t_ptr]: __arm_vddupq_m_wb_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce_u32_ptr(__p1, uint32_t *), p2, p3));})
-
-#define __arm_vidupq_u16(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int_n]: __arm_vidupq_n_u16 ((uint32_t) __p0, p1), \
-  int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vidupq_wb_u16 (__ARM_mve_coerce_u32_ptr(__p0, uint32_t *), p1));})
-
-#define __arm_vidupq_u32(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int_n]: __arm_vidupq_n_u32 ((uint32_t) __p0, p1), \
-  int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vidupq_wb_u32 (__ARM_mve_coerce_u32_ptr(__p0, uint32_t *), p1));})
-
-#define __arm_vidupq_u8(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int_n]: __arm_vidupq_n_u8 ((uint32_t) __p0, p1), \
-  int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vidupq_wb_u8 (__ARM_mve_coerce_u32_ptr(__p0, uint32_t *), p1));})
-
-#define __arm_vddupq_u16(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int_n]: __arm_vddupq_n_u16 ((uint32_t) __p0, p1), \
-  int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vddupq_wb_u16 (__ARM_mve_coerce_u32_ptr(__p0, uint32_t *), p1));})
-
-#define __arm_vddupq_u32(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int_n]: __arm_vddupq_n_u32 ((uint32_t) __p0, p1), \
-  int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vddupq_wb_u32 (__ARM_mve_coerce_u32_ptr(__p0, uint32_t *), p1));})
-
-#define __arm_vddupq_u8(p0,p1) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int_n]: __arm_vddupq_n_u8 ((uint32_t) __p0, p1), \
-  int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vddupq_wb_u8 (__ARM_mve_coerce_u32_ptr(__p0, uint32_t *), p1));})
-
-#define __arm_viwdupq_m(p0,p1,p2,p3,p4) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_viwdupq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce_i_scalar(__p1, int), p2, p3, p4), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_viwdupq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce_i_scalar(__p1, int), p2, p3, p4), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_viwdupq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce_i_scalar(__p1, int), p2, p3, p4), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint32_t_ptr]: __arm_viwdupq_m_wb_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce_u32_ptr(__p1, uint32_t *), p2, p3, p4), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32_t_ptr]: __arm_viwdupq_m_wb_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce_u32_ptr(__p1, uint32_t *), p2, p3, p4), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t_ptr]: __arm_viwdupq_m_wb_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce_u32_ptr(__p1, uint32_t *), p2, p3, p4));})
-
-#define __arm_viwdupq_u16(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int_n]: __arm_viwdupq_n_u16 (__ARM_mve_coerce_i_scalar(__p0, int), p1, (const int) p2), \
-  int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_viwdupq_wb_u16 (__ARM_mve_coerce_u32_ptr(__p0, uint32_t *), p1, (const int) p2));})
-
-#define __arm_viwdupq_u32(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int_n]: __arm_viwdupq_n_u32 (__ARM_mve_coerce_i_scalar(__p0, int), p1, p2), \
-  int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_viwdupq_wb_u32 (__ARM_mve_coerce_u32_ptr(__p0, uint32_t *), p1, p2));})
-
-#define __arm_viwdupq_u8(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int_n]: __arm_viwdupq_n_u8 (__ARM_mve_coerce_i_scalar(__p0, int), p1, p2), \
-  int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_viwdupq_wb_u8 (__ARM_mve_coerce_u32_ptr(__p0, uint32_t *), p1, p2));})
-
-#define __arm_vdwdupq_m(p0,p1,p2,p3,p4) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_int_n]: __arm_vdwdupq_m_n_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce_i_scalar(__p1, int), p2, p3, p4), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_int_n]: __arm_vdwdupq_m_n_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce_i_scalar(__p1, int), p2, p3, p4), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_int_n]: __arm_vdwdupq_m_n_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce_i_scalar(__p1, int), p2, p3, p4), \
-  int (*)[__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint32_t_ptr]: __arm_vdwdupq_m_wb_u8 (__ARM_mve_coerce(__p0, uint8x16_t), __ARM_mve_coerce_u32_ptr(__p1, uint32_t *), p2, p3, p4), \
-  int (*)[__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint32_t_ptr]: __arm_vdwdupq_m_wb_u16 (__ARM_mve_coerce(__p0, uint16x8_t), __ARM_mve_coerce_u32_ptr(__p1, uint32_t *), p2, p3, p4), \
-  int (*)[__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32_t_ptr]: __arm_vdwdupq_m_wb_u32 (__ARM_mve_coerce(__p0, uint32x4_t), __ARM_mve_coerce_u32_ptr(__p1, uint32_t *), p2, p3, p4));})
-
-#define __arm_vdwdupq_u16(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int_n]: __arm_vdwdupq_n_u16 (__ARM_mve_coerce_i_scalar(__p0, int), p1, p2), \
-  int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vdwdupq_wb_u16 (__ARM_mve_coerce_u32_ptr(__p0, uint32_t *), p1, p2));})
-
-#define __arm_vdwdupq_u32(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int_n]: __arm_vdwdupq_n_u32 (__ARM_mve_coerce_i_scalar(__p0, int), p1, p2), \
-  int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vdwdupq_wb_u32 (__ARM_mve_coerce_u32_ptr(__p0, uint32_t *), p1, p2));})
-
-#define __arm_vdwdupq_u8(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int_n]: __arm_vdwdupq_n_u8 (__ARM_mve_coerce_i_scalar(__p0, int), p1, p2), \
-  int (*)[__ARM_mve_type_uint32_t_ptr]: __arm_vdwdupq_wb_u8 (__ARM_mve_coerce_u32_ptr(__p0, uint32_t *), p1, p2));})
-
-#define __arm_vshlcq_m(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)])0, \
-  int (*)[__ARM_mve_type_int8x16_t]: __arm_vshlcq_m_s8 (__ARM_mve_coerce(__p0, int8x16_t), p1, p2, p3), \
-  int (*)[__ARM_mve_type_int16x8_t]: __arm_vshlcq_m_s16 (__ARM_mve_coerce(__p0, int16x8_t), p1, p2, p3), \
-  int (*)[__ARM_mve_type_int32x4_t]: __arm_vshlcq_m_s32 (__ARM_mve_coerce(__p0, int32x4_t), p1, p2, p3), \
-  int (*)[__ARM_mve_type_uint8x16_t]: __arm_vshlcq_m_u8 (__ARM_mve_coerce(__p0, uint8x16_t), p1, p2, p3), \
-  int (*)[__ARM_mve_type_uint16x8_t]: __arm_vshlcq_m_u16 (__ARM_mve_coerce(__p0, uint16x8_t), p1, p2, p3), \
-  int (*)[__ARM_mve_type_uint32x4_t]: __arm_vshlcq_m_u32 (__ARM_mve_coerce(__p0, uint32x4_t), p1, p2, p3));})
-
-#define __arm_vstrbq(p0,p1) ({ __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16_t]: __arm_vstrbq_s8 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, int8x16_t)), \
-  int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int16x8_t]: __arm_vstrbq_s16 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, int16x8_t)), \
-  int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrbq_s32 (__ARM_mve_coerce_s8_ptr(p0, int8_t *), __ARM_mve_coerce(__p1, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vstrbq_u8 (__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t)), \
-  int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vstrbq_u16 (__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrbq_u32 (__ARM_mve_coerce_u8_ptr(p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t)));})
-
-#define __arm_vstrbq_p(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)])0, \
-  int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int8x16_t]: __arm_vstrbq_p_s8 (__ARM_mve_coerce_s8_ptr(__p0, int8_t *), __ARM_mve_coerce(__p1, int8x16_t), p2), \
-  int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int16x8_t]: __arm_vstrbq_p_s16 (__ARM_mve_coerce_s8_ptr(__p0, int8_t *), __ARM_mve_coerce(__p1, int16x8_t), p2), \
-  int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_int32x4_t]: __arm_vstrbq_p_s32 (__ARM_mve_coerce_s8_ptr(__p0, int8_t *), __ARM_mve_coerce(__p1, int32x4_t), p2), \
-  int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t]: __arm_vstrbq_p_u8 (__ARM_mve_coerce_u8_ptr(__p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t), p2), \
-  int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t]: __arm_vstrbq_p_u16 (__ARM_mve_coerce_u8_ptr(__p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t), p2), \
-  int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t]: __arm_vstrbq_p_u32 (__ARM_mve_coerce_u8_ptr(__p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t), p2));})
-
-#define __arm_vstrdq_scatter_base(p0,p1,p2) ({ __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_base_s64 (p0, p1, __ARM_mve_coerce(__p2, int64x2_t)), \
-  int (*)[__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_base_u64 (p0, p1, __ARM_mve_coerce(__p2, uint64x2_t)));})
-
-#define __arm_vstrdq_scatter_base_p(p0,p1,p2,p3) ({ __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_base_p_s64 (p0, p1, __ARM_mve_coerce(__p2, int64x2_t), p3), \
-  int (*)[__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_base_p_u64 (p0, p1, __ARM_mve_coerce(__p2, uint64x2_t), p3));})
-
-#define __arm_vstrbq_scatter_offset(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int8x16_t]: __arm_vstrbq_scatter_offset_s8 (__ARM_mve_coerce_s8_ptr(__p0, int8_t *), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, int8x16_t)), \
-  int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vstrbq_scatter_offset_s16 (__ARM_mve_coerce_s8_ptr(__p0, int8_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int16x8_t)), \
-  int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vstrbq_scatter_offset_s32 (__ARM_mve_coerce_s8_ptr(__p0, int8_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int32x4_t)), \
-  int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vstrbq_scatter_offset_u8 (__ARM_mve_coerce_u8_ptr(__p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t)), \
-  int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vstrbq_scatter_offset_u16 (__ARM_mve_coerce_u8_ptr(__p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t)), \
-  int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vstrbq_scatter_offset_u32 (__ARM_mve_coerce_u8_ptr(__p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t)));})
-
-#define __arm_vstrbq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p1) __p1 = (p1); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p1)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_mve_type_int8x16_t]: __arm_vstrbq_scatter_offset_p_s8 (__ARM_mve_coerce_s8_ptr(__p0, int8_t *), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, int8x16_t), p3), \
-  int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_int16x8_t]: __arm_vstrbq_scatter_offset_p_s16 (__ARM_mve_coerce_s8_ptr(__p0, int8_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, int16x8_t), p3), \
-  int (*)[__ARM_mve_type_int8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_int32x4_t]: __arm_vstrbq_scatter_offset_p_s32 (__ARM_mve_coerce_s8_ptr(__p0, int8_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, int32x4_t), p3), \
-  int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint8x16_t][__ARM_mve_type_uint8x16_t]: __arm_vstrbq_scatter_offset_p_u8 (__ARM_mve_coerce_u8_ptr(__p0, uint8_t *), __ARM_mve_coerce(__p1, uint8x16_t), __ARM_mve_coerce(__p2, uint8x16_t), p3), \
-  int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint16x8_t][__ARM_mve_type_uint16x8_t]: __arm_vstrbq_scatter_offset_p_u16 (__ARM_mve_coerce_u8_ptr(__p0, uint8_t *), __ARM_mve_coerce(__p1, uint16x8_t), __ARM_mve_coerce(__p2, uint16x8_t), p3), \
-  int (*)[__ARM_mve_type_uint8_t_ptr][__ARM_mve_type_uint32x4_t][__ARM_mve_type_uint32x4_t]: __arm_vstrbq_scatter_offset_p_u32 (__ARM_mve_coerce_u8_ptr(__p0, uint8_t *), __ARM_mve_coerce(__p1, uint32x4_t), __ARM_mve_coerce(__p2, uint32x4_t), p3));})
-
-#define __arm_vstrdq_scatter_offset_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_offset_p_s64 (__ARM_mve_coerce_s64_ptr(__p0, int64_t *), p1, __ARM_mve_coerce(__p2, int64x2_t), p3), \
-  int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_offset_p_u64 (__ARM_mve_coerce_u64_ptr(__p0, uint64_t *), p1, __ARM_mve_coerce(__p2, uint64x2_t), p3));})
-
-#define __arm_vstrdq_scatter_offset(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_offset_s64 (__ARM_mve_coerce_s64_ptr(__p0, int64_t *), p1, __ARM_mve_coerce(__p2, int64x2_t)), \
-  int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_offset_u64 (__ARM_mve_coerce_u64_ptr(__p0, uint64_t *), p1, __ARM_mve_coerce(__p2, uint64x2_t)));})
-
-#define __arm_vstrdq_scatter_shifted_offset_p(p0,p1,p2,p3) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_shifted_offset_p_s64 (__ARM_mve_coerce_s64_ptr(__p0, int64_t *), p1, __ARM_mve_coerce(__p2, int64x2_t), p3), \
-  int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_shifted_offset_p_u64 (__ARM_mve_coerce_u64_ptr(__p0, uint64_t *), p1, __ARM_mve_coerce(__p2, uint64x2_t), p3));})
-
-#define __arm_vstrdq_scatter_shifted_offset(p0,p1,p2) ({ __typeof(p0) __p0 = (p0); \
-  __typeof(p2) __p2 = (p2); \
-  _Generic( (int (*)[__ARM_mve_typeid(__p0)][__ARM_mve_typeid(__p2)])0, \
-  int (*)[__ARM_mve_type_int64_t_ptr][__ARM_mve_type_int64x2_t]: __arm_vstrdq_scatter_shifted_offset_s64 (__ARM_mve_coerce_s64_ptr(__p0, int64_t *), p1, __ARM_mve_coerce(__p2, int64x2_t)), \
-  int (*)[__ARM_mve_type_uint64_t_ptr][__ARM_mve_type_uint64x2_t]: __arm_vstrdq_scatter_shifted_offset_u64 (__ARM_mve_coerce_u64_ptr(__p0, uint64_t *), p1, __ARM_mve_coerce(__p2, uint64x2_t)));})
-
 #endif /* __cplusplus  */
 #endif /* __ARM_FEATURE_MVE  */
 #endif /* _GCC_ARM_MVE_H.  */
diff --git a/gcc/config/arm/arm_mve_builtins.def b/gcc/config/arm/arm_mve_builtins.def
index f141aab..e24e779 100644
--- a/gcc/config/arm/arm_mve_builtins.def
+++ b/gcc/config/arm/arm_mve_builtins.def
@@ -1,5 +1,5 @@
 /*  MVE builtin definitions for Arm.
-    Copyright  (C) 2019-2024 Free Software Foundation, Inc.
+    Copyright  (C) 2019-2025 Free Software Foundation, Inc.
     Contributed by Arm.
 
     This file is part of GCC.
@@ -18,7 +18,6 @@
     along with GCC; see the file COPYING3.  If not see
     <http://www.gnu.org/licenses/>.  */
 
-VAR5 (STORE1, vst4q, v16qi, v8hi, v4si, v8hf, v4sf)
 VAR2 (UNOP_NONE_NONE, vrndxq_f, v8hf, v4sf)
 VAR2 (UNOP_NONE_NONE, vrndq_f, v8hf, v4sf)
 VAR2 (UNOP_NONE_NONE, vrndpq_f, v8hf, v4sf)
@@ -27,7 +26,7 @@ VAR2 (UNOP_NONE_NONE, vrndmq_f, v8hf, v4sf)
 VAR2 (UNOP_NONE_NONE, vrndaq_f, v8hf, v4sf)
 VAR2 (UNOP_NONE_NONE, vrev64q_f, v8hf, v4sf)
 VAR2 (UNOP_NONE_NONE, vnegq_f, v8hf, v4sf)
-VAR2 (UNOP_NONE_NONE, vdupq_n_f, v8hf, v4sf)
+VAR5 (UNOP_NONE_NONE, vdupq_n, v8hf, v4sf, v16qi, v8hi, v4si)
 VAR2 (UNOP_NONE_NONE, vabsq_f, v8hf, v4sf)
 VAR1 (UNOP_NONE_NONE, vrev32q_f, v8hf)
 VAR1 (UNOP_NONE_NONE, vcvttq_f32_f16, v4sf)
@@ -39,7 +38,6 @@ VAR3 (UNOP_SNONE_SNONE, vqnegq_s, v16qi, v8hi, v4si)
 VAR3 (UNOP_SNONE_SNONE, vqabsq_s, v16qi, v8hi, v4si)
 VAR3 (UNOP_SNONE_SNONE, vnegq_s, v16qi, v8hi, v4si)
 VAR3 (UNOP_SNONE_SNONE, vmvnq_s, v16qi, v8hi, v4si)
-VAR3 (UNOP_SNONE_SNONE, vdupq_n_s, v16qi, v8hi, v4si)
 VAR3 (UNOP_SNONE_SNONE, vclzq_s, v16qi, v8hi, v4si)
 VAR3 (UNOP_SNONE_SNONE, vclsq_s, v16qi, v8hi, v4si)
 VAR3 (UNOP_SNONE_SNONE, vaddvq_s, v16qi, v8hi, v4si)
@@ -57,7 +55,6 @@ VAR1 (UNOP_SNONE_SNONE, vrev16q_s, v16qi)
 VAR1 (UNOP_SNONE_SNONE, vaddlvq_s, v4si)
 VAR3 (UNOP_UNONE_UNONE, vrev64q_u, v16qi, v8hi, v4si)
 VAR3 (UNOP_UNONE_UNONE, vmvnq_u, v16qi, v8hi, v4si)
-VAR3 (UNOP_UNONE_UNONE, vdupq_n_u, v16qi, v8hi, v4si)
 VAR3 (UNOP_UNONE_UNONE, vclzq_u, v16qi, v8hi, v4si)
 VAR3 (UNOP_UNONE_UNONE, vaddvq_u, v16qi, v8hi, v4si)
 VAR2 (UNOP_UNONE_UNONE, vrev32q_u, v16qi, v8hi)
@@ -288,15 +285,11 @@ VAR1 (TERNOP_UNONE_UNONE_UNONE_UNONE, vrmlaldavhaq_u, v4si)
 VAR2 (TERNOP_NONE_NONE_UNONE_PRED, vcvtq_m_to_f_u, v8hf, v4sf)
 VAR2 (TERNOP_NONE_NONE_NONE_PRED, vcvtq_m_to_f_s, v8hf, v4sf)
 VAR2 (TERNOP_PRED_NONE_NONE_PRED, vcmpeqq_m_f, v8hf, v4sf)
-VAR3 (TERNOP_UNONE_NONE_UNONE_IMM, vshlcq_carry_s, v16qi, v8hi, v4si)
-VAR3 (TERNOP_UNONE_UNONE_UNONE_IMM, vshlcq_carry_u, v16qi, v8hi, v4si)
 VAR2 (TERNOP_UNONE_UNONE_NONE_IMM, vqrshrunbq_n_s, v8hi, v4si)
 VAR3 (TERNOP_UNONE_UNONE_NONE_NONE, vabavq_s, v16qi, v8hi, v4si)
 VAR3 (TERNOP_UNONE_UNONE_UNONE_UNONE, vabavq_u, v16qi, v8hi, v4si)
 VAR2 (TERNOP_UNONE_UNONE_NONE_PRED, vcvtaq_m_u, v8hi, v4si)
 VAR2 (TERNOP_NONE_NONE_NONE_PRED, vcvtaq_m_s, v8hi, v4si)
-VAR3 (TERNOP_UNONE_UNONE_UNONE_IMM, vshlcq_vec_u, v16qi, v8hi, v4si)
-VAR3 (TERNOP_NONE_NONE_UNONE_IMM, vshlcq_vec_s, v16qi, v8hi, v4si)
 VAR4 (TERNOP_UNONE_UNONE_UNONE_PRED, vpselq_u, v16qi, v8hi, v4si, v2di)
 VAR4 (TERNOP_NONE_NONE_NONE_PRED, vpselq_s, v16qi, v8hi, v4si, v2di)
 VAR3 (TERNOP_UNONE_UNONE_UNONE_PRED, vrev64q_m_u, v16qi, v8hi, v4si)
@@ -669,178 +662,6 @@ VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vandq_m_f, v8hf, v4sf)
 VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vaddq_m_n_f, v8hf, v4sf)
 VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vaddq_m_f, v8hf, v4sf)
 VAR2 (QUADOP_NONE_NONE_NONE_NONE_PRED, vabdq_m_f, v8hf, v4sf)
-VAR3 (STRS, vstrbq_s, v16qi, v8hi, v4si)
-VAR3 (STRU, vstrbq_u, v16qi, v8hi, v4si)
-VAR3 (STRSS, vstrbq_scatter_offset_s, v16qi, v8hi, v4si)
-VAR3 (STRSU, vstrbq_scatter_offset_u, v16qi, v8hi, v4si)
-VAR1 (STRSBS, vstrwq_scatter_base_s, v4si)
-VAR1 (STRSBU, vstrwq_scatter_base_u, v4si)
-VAR3 (LDRGU, vldrbq_gather_offset_u, v16qi, v8hi, v4si)
-VAR3 (LDRGS, vldrbq_gather_offset_s, v16qi, v8hi, v4si)
-VAR3 (LDRS, vldrbq_s, v16qi, v8hi, v4si)
-VAR3 (LDRU, vldrbq_u, v16qi, v8hi, v4si)
-VAR1 (LDRGBS, vldrwq_gather_base_s, v4si)
-VAR1 (LDRGBU, vldrwq_gather_base_u, v4si)
-VAR3 (STRS_P, vstrbq_p_s, v16qi, v8hi, v4si)
-VAR3 (STRU_P, vstrbq_p_u, v16qi, v8hi, v4si)
-VAR3 (STRSS_P, vstrbq_scatter_offset_p_s, v16qi, v8hi, v4si)
-VAR3 (STRSU_P, vstrbq_scatter_offset_p_u, v16qi, v8hi, v4si)
-VAR1 (STRSBS_P, vstrwq_scatter_base_p_s, v4si)
-VAR1 (STRSBU_P, vstrwq_scatter_base_p_u, v4si)
-VAR1 (LDRGBS_Z, vldrwq_gather_base_z_s, v4si)
-VAR1 (LDRGBU_Z, vldrwq_gather_base_z_u, v4si)
-VAR3 (LDRGS_Z, vldrbq_gather_offset_z_s, v16qi, v8hi, v4si)
-VAR3 (LDRGU_Z, vldrbq_gather_offset_z_u, v16qi, v8hi, v4si)
-VAR3 (LDRS_Z, vldrbq_z_s, v16qi, v8hi, v4si)
-VAR3 (LDRU_Z, vldrbq_z_u, v16qi, v8hi, v4si)
-VAR3 (LDRU, vld1q_u, v16qi, v8hi, v4si)
-VAR3 (LDRS, vld1q_s, v16qi, v8hi, v4si)
-VAR2 (LDRU_Z, vldrhq_z_u, v8hi, v4si)
-VAR2 (LDRU, vldrhq_u, v8hi, v4si)
-VAR2 (LDRS_Z, vldrhq_z_s, v8hi, v4si)
-VAR2 (LDRS, vldrhq_s, v8hi, v4si)
-VAR2 (LDRS, vld1q_f, v8hf, v4sf)
-VAR2 (LDRGU_Z, vldrhq_gather_shifted_offset_z_u, v8hi, v4si)
-VAR2 (LDRGU_Z, vldrhq_gather_offset_z_u, v8hi, v4si)
-VAR2 (LDRGU, vldrhq_gather_shifted_offset_u, v8hi, v4si)
-VAR2 (LDRGU, vldrhq_gather_offset_u, v8hi, v4si)
-VAR2 (LDRGS_Z, vldrhq_gather_shifted_offset_z_s, v8hi, v4si)
-VAR2 (LDRGS_Z, vldrhq_gather_offset_z_s, v8hi, v4si)
-VAR2 (LDRGS, vldrhq_gather_shifted_offset_s, v8hi, v4si)
-VAR2 (LDRGS, vldrhq_gather_offset_s, v8hi, v4si)
-VAR1 (LDRS, vldrhq_f, v8hf)
-VAR1 (LDRS_Z, vldrhq_z_f, v8hf)
-VAR1 (LDRS, vldrwq_f, v4sf)
-VAR1 (LDRS, vldrwq_s, v4si)
-VAR1 (LDRU, vldrwq_u, v4si)
-VAR1 (LDRS_Z, vldrwq_z_f, v4sf)
-VAR1 (LDRS_Z, vldrwq_z_s, v4si)
-VAR1 (LDRU_Z, vldrwq_z_u, v4si)
-VAR1 (LDRGBS, vldrdq_gather_base_s, v2di)
-VAR1 (LDRGBS, vldrwq_gather_base_f, v4sf)
-VAR1 (LDRGBS_Z, vldrdq_gather_base_z_s, v2di)
-VAR1 (LDRGBS_Z, vldrwq_gather_base_z_f, v4sf)
-VAR1 (LDRGBU, vldrdq_gather_base_u, v2di)
-VAR1 (LDRGBU_Z, vldrdq_gather_base_z_u, v2di)
-VAR1 (LDRGS, vldrdq_gather_offset_s, v2di)
-VAR1 (LDRGS, vldrdq_gather_shifted_offset_s, v2di)
-VAR1 (LDRGS, vldrhq_gather_offset_f, v8hf)
-VAR1 (LDRGS, vldrhq_gather_shifted_offset_f, v8hf)
-VAR1 (LDRGS, vldrwq_gather_offset_f, v4sf)
-VAR1 (LDRGS, vldrwq_gather_offset_s, v4si)
-VAR1 (LDRGS, vldrwq_gather_shifted_offset_f, v4sf)
-VAR1 (LDRGS, vldrwq_gather_shifted_offset_s, v4si)
-VAR1 (LDRGS_Z, vldrdq_gather_offset_z_s, v2di)
-VAR1 (LDRGS_Z, vldrdq_gather_shifted_offset_z_s, v2di)
-VAR1 (LDRGS_Z, vldrhq_gather_offset_z_f, v8hf)
-VAR1 (LDRGS_Z, vldrhq_gather_shifted_offset_z_f, v8hf)
-VAR1 (LDRGS_Z, vldrwq_gather_offset_z_f, v4sf)
-VAR1 (LDRGS_Z, vldrwq_gather_offset_z_s, v4si)
-VAR1 (LDRGS_Z, vldrwq_gather_shifted_offset_z_f, v4sf)
-VAR1 (LDRGS_Z, vldrwq_gather_shifted_offset_z_s, v4si)
-VAR1 (LDRGU, vldrdq_gather_offset_u, v2di)
-VAR1 (LDRGU, vldrdq_gather_shifted_offset_u, v2di)
-VAR1 (LDRGU, vldrwq_gather_offset_u, v4si)
-VAR1 (LDRGU, vldrwq_gather_shifted_offset_u, v4si)
-VAR1 (LDRGU_Z, vldrdq_gather_offset_z_u, v2di)
-VAR1 (LDRGU_Z, vldrdq_gather_shifted_offset_z_u, v2di)
-VAR1 (LDRGU_Z, vldrwq_gather_offset_z_u, v4si)
-VAR1 (LDRGU_Z, vldrwq_gather_shifted_offset_z_u, v4si)
-VAR3 (STRU, vst1q_u, v16qi, v8hi, v4si)
-VAR3 (STRS, vst1q_s, v16qi, v8hi, v4si)
-VAR2 (STRU_P, vstrhq_p_u, v8hi, v4si)
-VAR2 (STRU, vstrhq_u, v8hi, v4si)
-VAR2 (STRS_P, vstrhq_p_s, v8hi, v4si)
-VAR2 (STRS, vstrhq_s, v8hi, v4si)
-VAR2 (STRS, vst1q_f, v8hf, v4sf)
-VAR2 (STRSU_P, vstrhq_scatter_shifted_offset_p_u, v8hi, v4si)
-VAR2 (STRSU_P, vstrhq_scatter_offset_p_u, v8hi, v4si)
-VAR2 (STRSU, vstrhq_scatter_shifted_offset_u, v8hi, v4si)
-VAR2 (STRSU, vstrhq_scatter_offset_u, v8hi, v4si)
-VAR2 (STRSS_P, vstrhq_scatter_shifted_offset_p_s, v8hi, v4si)
-VAR2 (STRSS_P, vstrhq_scatter_offset_p_s, v8hi, v4si)
-VAR2 (STRSS, vstrhq_scatter_shifted_offset_s, v8hi, v4si)
-VAR2 (STRSS, vstrhq_scatter_offset_s, v8hi, v4si)
-VAR1 (STRS, vstrhq_f, v8hf)
-VAR1 (STRS_P, vstrhq_p_f, v8hf)
-VAR1 (STRS, vstrwq_f, v4sf)
-VAR1 (STRS, vstrwq_s, v4si)
-VAR1 (STRU, vstrwq_u, v4si)
-VAR1 (STRS_P, vstrwq_p_f, v4sf)
-VAR1 (STRS_P, vstrwq_p_s, v4si)
-VAR1 (STRU_P, vstrwq_p_u, v4si)
-VAR1 (STRSBS, vstrdq_scatter_base_s, v2di)
-VAR1 (STRSBS, vstrwq_scatter_base_f, v4sf)
-VAR1 (STRSBS_P, vstrdq_scatter_base_p_s, v2di)
-VAR1 (STRSBS_P, vstrwq_scatter_base_p_f, v4sf)
-VAR1 (STRSBU, vstrdq_scatter_base_u, v2di)
-VAR1 (STRSBU_P, vstrdq_scatter_base_p_u, v2di)
-VAR1 (STRSS, vstrdq_scatter_offset_s, v2di)
-VAR1 (STRSS, vstrdq_scatter_shifted_offset_s, v2di)
-VAR1 (STRSS, vstrhq_scatter_offset_f, v8hf)
-VAR1 (STRSS, vstrhq_scatter_shifted_offset_f, v8hf)
-VAR1 (STRSS, vstrwq_scatter_offset_f, v4sf)
-VAR1 (STRSS, vstrwq_scatter_offset_s, v4si)
-VAR1 (STRSS, vstrwq_scatter_shifted_offset_f, v4sf)
-VAR1 (STRSS, vstrwq_scatter_shifted_offset_s, v4si)
-VAR1 (STRSS_P, vstrdq_scatter_offset_p_s, v2di)
-VAR1 (STRSS_P, vstrdq_scatter_shifted_offset_p_s, v2di)
-VAR1 (STRSS_P, vstrhq_scatter_offset_p_f, v8hf)
-VAR1 (STRSS_P, vstrhq_scatter_shifted_offset_p_f, v8hf)
-VAR1 (STRSS_P, vstrwq_scatter_offset_p_f, v4sf)
-VAR1 (STRSS_P, vstrwq_scatter_offset_p_s, v4si)
-VAR1 (STRSS_P, vstrwq_scatter_shifted_offset_p_f, v4sf)
-VAR1 (STRSS_P, vstrwq_scatter_shifted_offset_p_s, v4si)
-VAR1 (STRSU, vstrdq_scatter_offset_u, v2di)
-VAR1 (STRSU, vstrdq_scatter_shifted_offset_u, v2di)
-VAR1 (STRSU, vstrwq_scatter_offset_u, v4si)
-VAR1 (STRSU, vstrwq_scatter_shifted_offset_u, v4si)
-VAR1 (STRSU_P, vstrdq_scatter_offset_p_u, v2di)
-VAR1 (STRSU_P, vstrdq_scatter_shifted_offset_p_u, v2di)
-VAR1 (STRSU_P, vstrwq_scatter_offset_p_u, v4si)
-VAR1 (STRSU_P, vstrwq_scatter_shifted_offset_p_u, v4si)
-VAR3 (TERNOP_UNONE_UNONE_UNONE_IMM, viwdupq_wb_u, v16qi, v4si, v8hi)
-VAR3 (TERNOP_UNONE_UNONE_UNONE_IMM, vdwdupq_wb_u, v16qi, v4si, v8hi)
-VAR3 (QUINOP_UNONE_UNONE_UNONE_UNONE_IMM_PRED, viwdupq_m_wb_u, v16qi, v8hi, v4si)
-VAR3 (QUINOP_UNONE_UNONE_UNONE_UNONE_IMM_PRED, vdwdupq_m_wb_u, v16qi, v8hi, v4si)
-VAR3 (QUINOP_UNONE_UNONE_UNONE_UNONE_IMM_PRED, viwdupq_m_n_u, v16qi, v8hi, v4si)
-VAR3 (QUINOP_UNONE_UNONE_UNONE_UNONE_IMM_PRED, vdwdupq_m_n_u, v16qi, v8hi, v4si)
-VAR3 (BINOP_UNONE_UNONE_IMM, vddupq_n_u, v16qi, v8hi, v4si)
-VAR3 (BINOP_UNONE_UNONE_IMM, vidupq_n_u, v16qi, v8hi, v4si)
-VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_PRED, vddupq_m_n_u, v16qi, v8hi, v4si)
-VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_PRED, vidupq_m_n_u, v16qi, v8hi, v4si)
-VAR3 (TERNOP_UNONE_UNONE_UNONE_IMM, vdwdupq_n_u, v16qi, v4si, v8hi)
-VAR3 (TERNOP_UNONE_UNONE_UNONE_IMM, viwdupq_n_u, v16qi, v4si, v8hi)
-VAR1 (STRSBWBU, vstrwq_scatter_base_wb_u, v4si)
-VAR1 (STRSBWBU, vstrdq_scatter_base_wb_u, v2di)
-VAR1 (STRSBWBU_P, vstrwq_scatter_base_wb_p_u, v4si)
-VAR1 (STRSBWBU_P, vstrdq_scatter_base_wb_p_u, v2di)
-VAR1 (STRSBWBS, vstrwq_scatter_base_wb_s, v4si)
-VAR1 (STRSBWBS, vstrwq_scatter_base_wb_f, v4sf)
-VAR1 (STRSBWBS, vstrdq_scatter_base_wb_s, v2di)
-VAR1 (STRSBWBS_P, vstrwq_scatter_base_wb_p_s, v4si)
-VAR1 (STRSBWBS_P, vstrwq_scatter_base_wb_p_f, v4sf)
-VAR1 (STRSBWBS_P, vstrdq_scatter_base_wb_p_s, v2di)
-VAR1 (LDRGBWBU_Z, vldrwq_gather_base_nowb_z_u, v4si)
-VAR1 (LDRGBWBU_Z, vldrdq_gather_base_nowb_z_u, v2di)
-VAR1 (LDRGBWBU, vldrwq_gather_base_nowb_u, v4si)
-VAR1 (LDRGBWBU, vldrdq_gather_base_nowb_u, v2di)
-VAR1 (LDRGBWBS_Z, vldrwq_gather_base_nowb_z_s, v4si)
-VAR1 (LDRGBWBS_Z, vldrwq_gather_base_nowb_z_f, v4sf)
-VAR1 (LDRGBWBS_Z, vldrdq_gather_base_nowb_z_s, v2di)
-VAR1 (LDRGBWBS, vldrwq_gather_base_nowb_s, v4si)
-VAR1 (LDRGBWBS, vldrwq_gather_base_nowb_f, v4sf)
-VAR1 (LDRGBWBS, vldrdq_gather_base_nowb_s, v2di)
-VAR1 (LDRGBWBXU_Z, vldrdq_gather_base_wb_z_s, v2di)
-VAR1 (LDRGBWBXU_Z, vldrdq_gather_base_wb_z_u, v2di)
-VAR1 (LDRGBWBXU, vldrdq_gather_base_wb_s, v2di)
-VAR1 (LDRGBWBXU, vldrdq_gather_base_wb_u, v2di)
-VAR1 (LDRGBWBXU_Z, vldrwq_gather_base_wb_z_s, v4si)
-VAR1 (LDRGBWBXU_Z, vldrwq_gather_base_wb_z_f, v4sf)
-VAR1 (LDRGBWBXU_Z, vldrwq_gather_base_wb_z_u, v4si)
-VAR1 (LDRGBWBXU, vldrwq_gather_base_wb_s, v4si)
-VAR1 (LDRGBWBXU, vldrwq_gather_base_wb_f, v4sf)
-VAR1 (LDRGBWBXU, vldrwq_gather_base_wb_u, v4si)
 VAR1 (BINOP_NONE_NONE_NONE, vadciq_s, v4si)
 VAR1 (BINOP_UNONE_UNONE_UNONE, vadciq_u, v4si)
 VAR1 (BINOP_NONE_NONE_NONE, vadcq_s, v4si)
@@ -857,9 +678,6 @@ VAR1 (QUADOP_NONE_NONE_NONE_NONE_PRED, vsbciq_m_s, v4si)
 VAR1 (QUADOP_UNONE_UNONE_UNONE_UNONE_PRED, vsbciq_m_u, v4si)
 VAR1 (QUADOP_NONE_NONE_NONE_NONE_PRED, vsbcq_m_s, v4si)
 VAR1 (QUADOP_UNONE_UNONE_UNONE_UNONE_PRED, vsbcq_m_u, v4si)
-VAR5 (STORE1, vst2q, v16qi, v8hi, v4si, v8hf, v4sf)
-VAR5 (LOAD1, vld4q, v16qi, v8hi, v4si, v8hf, v4sf)
-VAR5 (LOAD1, vld2q, v16qi, v8hi, v4si, v8hf, v4sf)
 VAR1 (ASRL, sqrshr_,si)
 VAR1 (ASRL, sqrshrl_sat64_,di)
 VAR1 (ASRL, sqrshrl_sat48_,di)
@@ -874,7 +692,3 @@ VAR1 (UQSHL, urshr_, si)
 VAR1 (UQSHL, urshrl_, di)
 VAR1 (UQSHL, uqshl_, si)
 VAR1 (UQSHL, uqshll_, di)
-VAR3 (QUADOP_NONE_NONE_UNONE_IMM_PRED, vshlcq_m_vec_s, v16qi, v8hi, v4si)
-VAR3 (QUADOP_NONE_NONE_UNONE_IMM_PRED, vshlcq_m_carry_s, v16qi, v8hi, v4si)
-VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_PRED, vshlcq_m_vec_u, v16qi, v8hi, v4si)
-VAR3 (QUADOP_UNONE_UNONE_UNONE_IMM_PRED, vshlcq_m_carry_u, v16qi, v8hi, v4si)
diff --git a/gcc/config/arm/arm_mve_types.h b/gcc/config/arm/arm_mve_types.h
index f549f88..42e7466 100644
--- a/gcc/config/arm/arm_mve_types.h
+++ b/gcc/config/arm/arm_mve_types.h
@@ -1,6 +1,6 @@
 /* Arm MVE intrinsics include file.
 
-   Copyright (C) 2020-2024 Free Software Foundation, Inc.
+   Copyright (C) 2020-2025 Free Software Foundation, Inc.
    Contributed by Arm.
 
    This file is part of GCC.
@@ -15,6 +15,10 @@
    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
    License for more details.
 
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
    You should have received a copy of the GNU General Public License
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
diff --git a/gcc/config/arm/arm_neon.h b/gcc/config/arm/arm_neon.h
index 8e70c71..105385f 100644
--- a/gcc/config/arm/arm_neon.h
+++ b/gcc/config/arm/arm_neon.h
@@ -1,6 +1,6 @@
 /* ARM NEON intrinsics include file.
 
-   Copyright (C) 2006-2024 Free Software Foundation, Inc.
+   Copyright (C) 2006-2025 Free Software Foundation, Inc.
    Contributed by CodeSourcery.
 
    This file is part of GCC.
@@ -27,7 +27,21 @@
 #ifndef _GCC_ARM_NEON_H
 #define _GCC_ARM_NEON_H 1
 
+/* This header is only useful if we're compiling with -mfloat-abi=hard or
+   -mfloat-abi=softfp.  But we can't detect that directly here as the
+   compiler does not provide a pre-define for it.  However, we can check
+   whether forcing VFP will cause __ARM_FP to become defined and use that.  */
+
+#pragma GCC push_options
+#pragma GCC target ("fpu=vfp")
 #ifndef __ARM_FP
+#define __ARM_SOFT_ABI 1
+#else
+#define __ARM_SOFT_ABI 0
+#endif
+#pragma GCC pop_options
+
+#if __ARM_SOFT_ABI
 #error "NEON intrinsics not available with the soft-float ABI.  Please use -mfloat-abi=softfp or -mfloat-abi=hard"
 #else
 
@@ -10854,7 +10868,7 @@ vld1q_s64_x2 (const int64_t * __a)
 
 __extension__ extern __inline int8x16x3_t
 __attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_s8_x3 (const uint8_t * __a)
+vld1q_s8_x3 (const int8_t * __a)
 {
   union { int8x16x3_t __i; __builtin_neon_ci __o; } __rv;
   __rv.__o = __builtin_neon_vld1q_x3v16qi ((const __builtin_neon_qi *) __a);
@@ -10863,7 +10877,7 @@ vld1q_s8_x3 (const uint8_t * __a)
 
 __extension__ extern __inline int16x8x3_t
 __attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_s16_x3 (const uint16_t * __a)
+vld1q_s16_x3 (const int16_t * __a)
 {
   union { int16x8x3_t __i; __builtin_neon_ci __o; } __rv;
   __rv.__o = __builtin_neon_vld1q_x3v8hi ((const __builtin_neon_hi *) __a);
@@ -10890,7 +10904,7 @@ vld1q_s64_x3 (const int64_t * __a)
 
 __extension__ extern __inline int8x16x4_t
 __attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_s8_x4 (const uint8_t * __a)
+vld1q_s8_x4 (const int8_t * __a)
 {
   union { int8x16x4_t __i; __builtin_neon_xi __o; } __rv;
   __rv.__o = __builtin_neon_vld1q_x4v16qi ((const __builtin_neon_qi *) __a);
@@ -10899,7 +10913,7 @@ vld1q_s8_x4 (const uint8_t * __a)
 
 __extension__ extern __inline int16x8x4_t
 __attribute__  ((__always_inline__, __gnu_inline__, __artificial__))
-vld1q_s16_x4 (const uint16_t * __a)
+vld1q_s16_x4 (const int16_t * __a)
 {
   union { int16x8x4_t __i; __builtin_neon_xi __o; } __rv;
   __rv.__o = __builtin_neon_vld1q_x4v8hi ((const __builtin_neon_hi *) __a);
@@ -20924,11 +20938,6 @@ vbfdotq_lane_f32 (float32x4_t __r, bfloat16x8_t __a, bfloat16x4_t __b,
   return __builtin_neon_vbfdot_lanev4bfv4sf (__r, __a, __b, __index);
 }
 
-#pragma GCC pop_options
-
-#pragma GCC push_options
-#pragma GCC target ("arch=armv8.2-a+bf16")
-
 typedef struct bfloat16x4x2_t
 {
   bfloat16x4_t val[2];
@@ -21489,4 +21498,7 @@ vst4q_lane_bf16 (bfloat16_t * __a, bfloat16x8x4_t __b, const int __c)
 #pragma GCC pop_options
 
 #endif
+
+#undef __ARM_SOFT_ABI
+
 #endif
diff --git a/gcc/config/arm/arm_neon_builtins.def b/gcc/config/arm/arm_neon_builtins.def
index 0c5d40b..7209663 100644
--- a/gcc/config/arm/arm_neon_builtins.def
+++ b/gcc/config/arm/arm_neon_builtins.def
@@ -1,5 +1,5 @@
 /* NEON builtin definitions for ARM.
-   Copyright (C) 2013-2024 Free Software Foundation, Inc.
+   Copyright (C) 2013-2025 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
 
    This file is part of GCC.
diff --git a/gcc/config/arm/arm_vfp_builtins.def b/gcc/config/arm/arm_vfp_builtins.def
index af71d7a..1fbf71e 100644
--- a/gcc/config/arm/arm_vfp_builtins.def
+++ b/gcc/config/arm/arm_vfp_builtins.def
@@ -1,5 +1,5 @@
 /* VFP instruction builtin definitions.
-   Copyright (C) 2016-2024 Free Software Foundation, Inc.
+   Copyright (C) 2016-2025 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
    This file is part of GCC.
 
diff --git a/gcc/config/arm/bpabi.h b/gcc/config/arm/bpabi.h
index 7a279f3..ce61542 100644
--- a/gcc/config/arm/bpabi.h
+++ b/gcc/config/arm/bpabi.h
@@ -1,6 +1,6 @@
 /* Configuration file for ARM BPABI targets.
-   Copyright (C) 2004-2024 Free Software Foundation, Inc.
-   Contributed by CodeSourcery, LLC   
+   Copyright (C) 2004-2025 Free Software Foundation, Inc.
+   Contributed by CodeSourcery, LLC
 
    This file is part of GCC.
 
@@ -55,7 +55,7 @@
 #define TARGET_FIX_V4BX_SPEC " %{mcpu=arm8|mcpu=arm810|mcpu=strongarm*"\
   "|march=armv4|mcpu=fa526|mcpu=fa626:--fix-v4bx}"
 
-#define TARGET_FDPIC_ASM_SPEC ""
+#define TARGET_FDPIC_ASM_SPEC "%{mfdpic: --fdpic}"
 
 #define BE8_LINK_SPEC							\
   "%{!r:%{!mbe32:%:be8_linkopt(%{mlittle-endian:little}"		\
diff --git a/gcc/config/arm/common.md b/gcc/config/arm/common.md
index 96456f5..ca7c5dc 100644
--- a/gcc/config/arm/common.md
+++ b/gcc/config/arm/common.md
@@ -1,5 +1,5 @@
 ;; Common predicate definitions for ARM, Thumb and AArch64
-;; Copyright (C) 2020-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2020-2025 Free Software Foundation, Inc.
 ;; Contributed by Fujitsu Ltd.
 
 ;; This file is part of GCC.
diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md
index cd15967..24743a8 100644
--- a/gcc/config/arm/constraints.md
+++ b/gcc/config/arm/constraints.md
@@ -1,5 +1,5 @@
 ;; Constraint definitions for ARM and Thumb
-;; Copyright (C) 2006-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2006-2025 Free Software Foundation, Inc.
 ;; Contributed by ARM Ltd.
 
 ;; This file is part of GCC.
@@ -19,11 +19,12 @@
 ;; <http://www.gnu.org/licenses/>.
 
 ;; The following register constraints have been used:
-;; - in ARM/Thumb-2 state: t, w, x, y, z
+;; - in ARM/Thumb-2 state: t, w, x
 ;; - in Thumb state: h, b
 ;; - in both states: l, c, k, q, Cs, Ts, US
 ;; In ARM state, 'l' is an alias for 'r'
 ;; 'f' and 'v' were previously used for FPA and MAVERICK registers.
+;; 'y' and 'z' were previously used for iWMMX registers (removed after gcc-15)
 
 ;; The following normal constraints have been used:
 ;; in ARM/Thumb-2 state: G, I, j, J, K, L, M
@@ -39,7 +40,7 @@
 ;; in all states: Pg
 
 ;; The following memory constraints have been used:
-;; in ARM/Thumb-2 state: Uh, Ut, Uv, Uy, Un, Um, Us, Up, Uf, Ux, Ul
+;; in ARM/Thumb-2 state: Uh, Ut, Uv, Un, Um, Us, Uo, Up, Uf, Ux, Ul, Uz
 ;; in ARM state: Uq
 ;; in Thumb state: Uu, Uw
 ;; in all states: Q
@@ -112,13 +113,6 @@
 (define_register_constraint "x" "TARGET_32BIT ? VFP_D0_D7_REGS : NO_REGS"
  "The VFP registers @code{d0}-@code{d7}.")
 
-(define_register_constraint "y" "TARGET_REALLY_IWMMXT ? IWMMXT_REGS : NO_REGS"
- "The Intel iWMMX co-processor registers.")
-
-(define_register_constraint "z"
- "TARGET_REALLY_IWMMXT ? IWMMXT_GR_REGS : NO_REGS"
- "The Intel iWMMX GR registers.")
-
 (define_register_constraint "l" "TARGET_THUMB ? LO_REGS : GENERAL_REGS"
  "In Thumb state the core registers @code{r0}-@code{r7}.")
 
@@ -478,12 +472,6 @@
                    ? arm_coproc_mem_operand_no_writeback (op)
                    : neon_vector_mem_operand (op, 2, true)")))
 
-(define_memory_constraint "Uy"
- "@internal
-  In ARM/Thumb-2 state a valid iWMMX load/store address."
- (and (match_code "mem")
-      (match_test "TARGET_32BIT && arm_coproc_mem_operand (op, TRUE)")))
-
 (define_memory_constraint "Un"
  "@internal
   In ARM/Thumb-2 state a valid address for Neon doubleword vector
@@ -585,6 +573,12 @@
  (and (match_code "mem")
       (match_test "arm_coproc_ldc_stc_legitimate_address (op)")))
 
+(define_memory_constraint "Uo"
+ "@internal
+  A memory operand for Arm/Thumb-2 LDRD/STRD"
+ (and (match_code "mem")
+      (match_test "arm_ldrd_legitimate_address (op)")))
+
 ;; We used to have constraint letters for S and R in ARM state, but
 ;; all uses of these now appear to have been removed.
 
diff --git a/gcc/config/arm/cortex-a15-neon.md b/gcc/config/arm/cortex-a15-neon.md
index 2918306..2e1fca0 100644
--- a/gcc/config/arm/cortex-a15-neon.md
+++ b/gcc/config/arm/cortex-a15-neon.md
@@ -1,5 +1,5 @@
 ;; ARM Cortex-A15 NEON pipeline description
-;; Copyright (C) 2012-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2012-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/arm/cortex-a15.md b/gcc/config/arm/cortex-a15.md
index f6f52f7..0bb76c1 100644
--- a/gcc/config/arm/cortex-a15.md
+++ b/gcc/config/arm/cortex-a15.md
@@ -1,5 +1,5 @@
 ;; ARM Cortex-A15 pipeline description
-;; Copyright (C) 2011-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2011-2025 Free Software Foundation, Inc.
 ;;
 ;; Written by Matthew Gretton-Dann <matthew.gretton-dann@arm.com>
 
diff --git a/gcc/config/arm/cortex-a17-neon.md b/gcc/config/arm/cortex-a17-neon.md
index 9fe1d4c..a16443f 100644
--- a/gcc/config/arm/cortex-a17-neon.md
+++ b/gcc/config/arm/cortex-a17-neon.md
@@ -1,5 +1,5 @@
 ;; ARM Cortex-A17 NEON pipeline description
-;; Copyright (C) 2014-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2014-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/arm/cortex-a17.md b/gcc/config/arm/cortex-a17.md
index e515380..1f5dafe 100644
--- a/gcc/config/arm/cortex-a17.md
+++ b/gcc/config/arm/cortex-a17.md
@@ -1,5 +1,5 @@
 ;; ARM Cortex-A17 pipeline description
-;; Copyright (C) 2014-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2014-2025 Free Software Foundation, Inc.
 ;;
 ;; Contributed by ARM Ltd.
 ;;
diff --git a/gcc/config/arm/cortex-a5.md b/gcc/config/arm/cortex-a5.md
index 3617b32..2e4d14c 100644
--- a/gcc/config/arm/cortex-a5.md
+++ b/gcc/config/arm/cortex-a5.md
@@ -1,5 +1,5 @@
 ;; ARM Cortex-A5 pipeline description
-;; Copyright (C) 2010-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2010-2025 Free Software Foundation, Inc.
 ;; Contributed by CodeSourcery.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/arm/cortex-a53.md b/gcc/config/arm/cortex-a53.md
index 50666d231..94692f4 100644
--- a/gcc/config/arm/cortex-a53.md
+++ b/gcc/config/arm/cortex-a53.md
@@ -1,5 +1,5 @@
 ;; ARM Cortex-A53 pipeline description
-;; Copyright (C) 2013-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2013-2025 Free Software Foundation, Inc.
 ;;
 ;; Contributed by ARM Ltd.
 ;;
diff --git a/gcc/config/arm/cortex-a57.md b/gcc/config/arm/cortex-a57.md
index d11fea0..5bc2e8f 100644
--- a/gcc/config/arm/cortex-a57.md
+++ b/gcc/config/arm/cortex-a57.md
@@ -1,5 +1,5 @@
 ;; ARM Cortex-A57 pipeline description
-;; Copyright (C) 2014-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2014-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/arm/cortex-a7.md b/gcc/config/arm/cortex-a7.md
index 8f326d7..0d463b0 100644
--- a/gcc/config/arm/cortex-a7.md
+++ b/gcc/config/arm/cortex-a7.md
@@ -1,5 +1,5 @@
 ;; ARM Cortex-A7 pipeline description
-;; Copyright (C) 2012-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2012-2025 Free Software Foundation, Inc.
 ;;
 ;; Contributed by ARM Ltd.
 ;; Based on cortex-a5.md which was originally contributed by CodeSourcery.
diff --git a/gcc/config/arm/cortex-a8-neon.md b/gcc/config/arm/cortex-a8-neon.md
index dfb7fa3..fcaa8a8 100644
--- a/gcc/config/arm/cortex-a8-neon.md
+++ b/gcc/config/arm/cortex-a8-neon.md
@@ -1,5 +1,5 @@
 ;; ARM Cortex-A8 NEON scheduling description.
-;; Copyright (C) 2007-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2007-2025 Free Software Foundation, Inc.
 ;; Contributed by CodeSourcery.
 
 ;; This file is part of GCC.
diff --git a/gcc/config/arm/cortex-a8.md b/gcc/config/arm/cortex-a8.md
index 1ef9b3f..078d7c4 100644
--- a/gcc/config/arm/cortex-a8.md
+++ b/gcc/config/arm/cortex-a8.md
@@ -1,5 +1,5 @@
 ;; ARM Cortex-A8 scheduling description.
-;; Copyright (C) 2007-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2007-2025 Free Software Foundation, Inc.
 ;; Contributed by CodeSourcery.
 
 ;; This file is part of GCC.
diff --git a/gcc/config/arm/cortex-a9-neon.md b/gcc/config/arm/cortex-a9-neon.md
index 5f9f633..d5f184c 100644
--- a/gcc/config/arm/cortex-a9-neon.md
+++ b/gcc/config/arm/cortex-a9-neon.md
@@ -1,5 +1,5 @@
 ;; ARM Cortex-A9 pipeline description
-;; Copyright (C) 2010-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2010-2025 Free Software Foundation, Inc.
 ;;
 ;; Neon pipeline description contributed by ARM Ltd.
 ;;
diff --git a/gcc/config/arm/cortex-a9.md b/gcc/config/arm/cortex-a9.md
index 222f669..a1afa9f 100644
--- a/gcc/config/arm/cortex-a9.md
+++ b/gcc/config/arm/cortex-a9.md
@@ -1,5 +1,5 @@
 ;; ARM Cortex-A9 pipeline description
-;; Copyright (C) 2008-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2008-2025 Free Software Foundation, Inc.
 ;; Originally written by CodeSourcery for VFP.
 ;;
 ;; Rewritten by Ramana Radhakrishnan <ramana.radhakrishnan@arm.com>
diff --git a/gcc/config/arm/cortex-m4-fpu.md b/gcc/config/arm/cortex-m4-fpu.md
index 1c5c774..783fd37 100644
--- a/gcc/config/arm/cortex-m4-fpu.md
+++ b/gcc/config/arm/cortex-m4-fpu.md
@@ -1,5 +1,5 @@
 ;; ARM Cortex-M4 FPU pipeline description
-;; Copyright (C) 2010-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2010-2025 Free Software Foundation, Inc.
 ;; Contributed by CodeSourcery.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/arm/cortex-m4.md b/gcc/config/arm/cortex-m4.md
index 78469ba..d5d1623 100644
--- a/gcc/config/arm/cortex-m4.md
+++ b/gcc/config/arm/cortex-m4.md
@@ -1,5 +1,5 @@
 ;; ARM Cortex-M4 pipeline description
-;; Copyright (C) 2010-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2010-2025 Free Software Foundation, Inc.
 ;; Contributed by CodeSourcery.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/arm/cortex-m7.md b/gcc/config/arm/cortex-m7.md
index 90bd175..74d9f2d 100644
--- a/gcc/config/arm/cortex-m7.md
+++ b/gcc/config/arm/cortex-m7.md
@@ -1,5 +1,5 @@
 ;; ARM Cortex-M7 pipeline description
-;; Copyright (C) 2014-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2014-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/arm/cortex-r4.md b/gcc/config/arm/cortex-r4.md
index 4695fbd..2b399a4 100644
--- a/gcc/config/arm/cortex-r4.md
+++ b/gcc/config/arm/cortex-r4.md
@@ -1,5 +1,5 @@
 ;; ARM Cortex-R4 scheduling description.
-;; Copyright (C) 2007-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2007-2025 Free Software Foundation, Inc.
 ;; Contributed by CodeSourcery.
 
 ;; This file is part of GCC.
diff --git a/gcc/config/arm/cortex-r4f.md b/gcc/config/arm/cortex-r4f.md
index 4f11bc2..efd60f1 100644
--- a/gcc/config/arm/cortex-r4f.md
+++ b/gcc/config/arm/cortex-r4f.md
@@ -1,5 +1,5 @@
 ;; ARM Cortex-R4F VFP pipeline description
-;; Copyright (C) 2007-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2007-2025 Free Software Foundation, Inc.
 ;; Written by CodeSourcery.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/arm/crypto.def b/gcc/config/arm/crypto.def
index 4f61748..acc3c8f 100644
--- a/gcc/config/arm/crypto.def
+++ b/gcc/config/arm/crypto.def
@@ -1,5 +1,5 @@
 /* Cryptographic instruction builtin definitions.
-   Copyright (C) 2013-2024 Free Software Foundation, Inc.
+   Copyright (C) 2013-2025 Free Software Foundation, Inc.
    Contributed by ARM Ltd.
    This file is part of GCC.
 
diff --git a/gcc/config/arm/crypto.md b/gcc/config/arm/crypto.md
index 879e0f9..75e34f2 100644
--- a/gcc/config/arm/crypto.md
+++ b/gcc/config/arm/crypto.md
@@ -1,5 +1,5 @@
 ;; ARMv8-A crypto patterns.
-;; Copyright (C) 2013-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2013-2025 Free Software Foundation, Inc.
 ;; Contributed by ARM Ltd.
 
 ;; This file is part of GCC.
diff --git a/gcc/config/arm/driver-arm.cc b/gcc/config/arm/driver-arm.cc
index 514f1dc..56b07bb 100644
--- a/gcc/config/arm/driver-arm.cc
+++ b/gcc/config/arm/driver-arm.cc
@@ -1,5 +1,5 @@
 /* Subroutines for the gcc driver.
-   Copyright (C) 2011-2024 Free Software Foundation, Inc.
+   Copyright (C) 2011-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/arm/elf.h b/gcc/config/arm/elf.h
index 97230d1..a271fdb 100644
--- a/gcc/config/arm/elf.h
+++ b/gcc/config/arm/elf.h
@@ -1,9 +1,9 @@
 /* Definitions of target machine for GNU compiler.
    For ARM with ELF obj format.
-   Copyright (C) 1995-2024 Free Software Foundation, Inc.
+   Copyright (C) 1995-2025 Free Software Foundation, Inc.
    Contributed by Philip Blundell <philb@gnu.org> and
    Catherine Moore <clm@cygnus.com>
-   
+
    This file is part of GCC.
 
    GCC is free software; you can redistribute it and/or modify it
@@ -111,7 +111,7 @@
 #ifndef LINK_SPEC
 #define LINK_SPEC "%{mbig-endian:-EB} %{mlittle-endian:-EL} -X"
 #endif
-  
+
 /* Run-time Target Specification.  */
 #ifndef TARGET_DEFAULT
 #define TARGET_DEFAULT (MASK_APCS_FRAME)
diff --git a/gcc/config/arm/exynos-m1.md b/gcc/config/arm/exynos-m1.md
index 045e613..0919365 100644
--- a/gcc/config/arm/exynos-m1.md
+++ b/gcc/config/arm/exynos-m1.md
@@ -1,5 +1,5 @@
 ;; Samsung Exynos M1 pipeline description
-;; Copyright (C) 2014-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2014-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/arm/fa526.md b/gcc/config/arm/fa526.md
index c350533..e4ab9ff 100644
--- a/gcc/config/arm/fa526.md
+++ b/gcc/config/arm/fa526.md
@@ -1,5 +1,5 @@
 ;; Faraday FA526 Pipeline Description
-;; Copyright (C) 2010-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2010-2025 Free Software Foundation, Inc.
 ;; Written by I-Jui Sung, based on ARM926EJ-S Pipeline Description.
 
 ;; This file is part of GCC.
diff --git a/gcc/config/arm/fa606te.md b/gcc/config/arm/fa606te.md
index 02e8375..c7ff56a 100644
--- a/gcc/config/arm/fa606te.md
+++ b/gcc/config/arm/fa606te.md
@@ -1,5 +1,5 @@
 ;; Faraday FA606TE Pipeline Description
-;; Copyright (C) 2010-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2010-2025 Free Software Foundation, Inc.
 ;; Written by Mingfeng Wu, based on ARM926EJ-S Pipeline Description.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/arm/fa626te.md b/gcc/config/arm/fa626te.md
index e80fb4c..37dcec0 100644
--- a/gcc/config/arm/fa626te.md
+++ b/gcc/config/arm/fa626te.md
@@ -1,5 +1,5 @@
 ;; Faraday FA626TE Pipeline Description
-;; Copyright (C) 2010-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2010-2025 Free Software Foundation, Inc.
 ;; Written by I-Jui Sung, based on ARM926EJ-S Pipeline Description.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/arm/fa726te.md b/gcc/config/arm/fa726te.md
index abd6c6d..3a7d348 100644
--- a/gcc/config/arm/fa726te.md
+++ b/gcc/config/arm/fa726te.md
@@ -1,5 +1,5 @@
 ;; Faraday FA726TE Pipeline Description
-;; Copyright (C) 2010-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2010-2025 Free Software Foundation, Inc.
 ;; Written by I-Jui Sung, based on ARM926EJ-S Pipeline Description.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/arm/fmp626.md b/gcc/config/arm/fmp626.md
index 30412a9..9083a2f 100644
--- a/gcc/config/arm/fmp626.md
+++ b/gcc/config/arm/fmp626.md
@@ -1,5 +1,5 @@
 ;; Faraday FA626TE Pipeline Description
-;; Copyright (C) 2010-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2010-2025 Free Software Foundation, Inc.
 ;; Written by Mingfeng Wu, based on ARM926EJ-S Pipeline Description.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/arm/freebsd.h b/gcc/config/arm/freebsd.h
index ee4860a..8f01fcb 100644
--- a/gcc/config/arm/freebsd.h
+++ b/gcc/config/arm/freebsd.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler, FreeBSD/arm version.
-   Copyright (C) 2002-2024 Free Software Foundation, Inc.
+   Copyright (C) 2002-2025 Free Software Foundation, Inc.
    Contributed by Wasabi Systems, Inc.
 
    This file is part of GCC.
diff --git a/gcc/config/arm/fuchsia-elf.h b/gcc/config/arm/fuchsia-elf.h
index d905b78..882bf06 100644
--- a/gcc/config/arm/fuchsia-elf.h
+++ b/gcc/config/arm/fuchsia-elf.h
@@ -1,5 +1,5 @@
 /* Configuration file for ARM Fuchsia ELF targets.
-   Copyright (C) 2017-2024 Free Software Foundation, Inc.
+   Copyright (C) 2017-2025 Free Software Foundation, Inc.
    Contributed by Google.
 
    This file is part of GCC.
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index b9ff01c..0c163ed 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -1,6 +1,6 @@
 
 ;; Code and mode itertator and attribute definitions for the ARM backend
-;; Copyright (C) 2010-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2010-2025 Free Software Foundation, Inc.
 ;; Contributed by ARM Ltd.
 ;;
 ;; This file is part of GCC.
@@ -59,30 +59,25 @@
 ;; A list of modes which the VFP unit can handle
 (define_mode_iterator SDF [(SF "") (DF "TARGET_VFP_DOUBLE")])
 
-;; Integer element sizes implemented by IWMMXT.
-(define_mode_iterator VMMX [V2SI V4HI V8QI])
-
-(define_mode_iterator VMMX2 [V4HI V2SI])
-
 ;; Integer element sizes for shifts.
 (define_mode_iterator VSHFT [V4HI V2SI DI])
 
-;; Integer and float modes supported by Neon and IWMMXT.
+;; Integer and float modes supported by Neon.
 (define_mode_iterator VALL [V2DI V2SI V4HI V8QI V2SF V4SI V8HI V16QI V4SF])
 
-;; Integer and float modes supported by Neon, IWMMXT and MVE.
+;; Integer and float modes supported by Neon and MVE.
 (define_mode_iterator VNIM1 [V16QI V8HI V4SI V4SF V2DI])
 
-;; Integer and float modes supported by Neon and IWMMXT but not MVE.
+;; Integer and float modes supported by Neon but not MVE.
 (define_mode_iterator VNINOTM1 [V2SI V4HI V8QI V2SF])
 
-;; Integer and float modes supported by Neon and IWMMXT, except V2DI.
+;; Integer and float modes supported by Neon, except V2DI.
 (define_mode_iterator VALLW [V2SI V4HI V8QI V2SF V4SI V8HI V16QI V4SF])
 
-;; Integer modes supported by Neon and IWMMXT
+;; Integer modes supported by Neon
 (define_mode_iterator VINT [V2DI V2SI V4HI V8QI V4SI V8HI V16QI])
 
-;; Integer modes supported by Neon and IWMMXT, except V2DI
+;; Integer modes supported by Neon, except V2DI
 (define_mode_iterator VINTW [V2SI V4HI V8QI V4SI V8HI V16QI])
 
 ;; Double-width vector modes, on which we support arithmetic (no HF!)
@@ -139,7 +134,36 @@
 
 ;; Opaque structure types wider than TImode.
 (define_mode_iterator VSTRUCT [(EI "!TARGET_HAVE_MVE") OI
-			       (CI "!TARGET_HAVE_MVE") XI])
+			       (CI "!TARGET_HAVE_MVE") XI
+			       (V2x16QI "TARGET_HAVE_MVE")
+			       (V2x8HI "TARGET_HAVE_MVE")
+			       (V2x4SI "TARGET_HAVE_MVE")
+			       (V2x8HF "TARGET_HAVE_MVE_FLOAT")
+			       (V2x4SF "TARGET_HAVE_MVE_FLOAT")
+			       (V4x16QI "TARGET_HAVE_MVE")
+			       (V4x8HI "TARGET_HAVE_MVE")
+			       (V4x4SI "TARGET_HAVE_MVE")
+			       (V4x8HF "TARGET_HAVE_MVE_FLOAT")
+			       (V4x4SF "TARGET_HAVE_MVE_FLOAT")
+			       ])
+
+;; Structure types of the same size as OImode
+(define_mode_iterator VSTRUCT2 [OI
+			       (V2x16QI "TARGET_HAVE_MVE")
+			       (V2x8HI "TARGET_HAVE_MVE")
+			       (V2x4SI "TARGET_HAVE_MVE")
+			       (V2x8HF "TARGET_HAVE_MVE_FLOAT")
+			       (V2x4SF "TARGET_HAVE_MVE_FLOAT")
+			       ])
+
+;; Structure types of the same size as XImode
+(define_mode_iterator VSTRUCT4 [XI
+			       (V4x16QI "TARGET_HAVE_MVE")
+			       (V4x8HI "TARGET_HAVE_MVE")
+			       (V4x4SI "TARGET_HAVE_MVE")
+			       (V4x8HF "TARGET_HAVE_MVE_FLOAT")
+			       (V4x4SF "TARGET_HAVE_MVE_FLOAT")
+			       ])
 
 ;; Opaque structure types used in table lookups (except vtbl1/vtbx1).
 (define_mode_iterator VTAB [TI EI OI])
@@ -271,18 +295,54 @@
 
 ;; MVE mode iterator.
 (define_mode_iterator MVE_types [V16QI V8HI V4SI V2DI TI V8HF V4SF V2DF])
-(define_mode_iterator MVE_vecs [V16QI V8HI V4SI V2DI V8HF V4SF V2DF])
+(define_mode_iterator MVE_vecs [V16QI V8HI V4SI V2DI V8HF V4SF])
 (define_mode_iterator MVE_VLD_ST [V16QI V8HI V4SI V8HF V4SF])
+(define_mode_iterator MVE_VLD_ST_scatter [V16QI V8HI V4SI V8HF V4SF V2DI])
+(define_mode_iterator MVE_VLD_ST_scatter_shifted [V8HI V4SI V8HF V4SF V2DI])
 (define_mode_iterator MVE_0 [V8HF V4SF])
 (define_mode_iterator MVE_1 [V16QI V8HI V4SI V2DI])
-(define_mode_iterator MVE_3 [V16QI V8HI])
 (define_mode_iterator MVE_2 [V16QI V8HI V4SI])
+(define_mode_iterator MVE_3 [V16QI V8HI])
+(define_mode_iterator MVE_4 [V4SI V4SF V2DI])
 (define_mode_iterator MVE_5 [V8HI V4SI])
 (define_mode_iterator MVE_7 [V16BI V8BI V4BI V2QI])
 (define_mode_iterator MVE_7_HI [HI V16BI V8BI V4BI V2QI])
 (define_mode_iterator MVE_V8HF [V8HF])
 (define_mode_iterator MVE_V16QI [V16QI])
 
+(define_mode_attr MVE_VLD2_VST2 [(V16QI "V2x16QI")
+				 (V8HI "V2x8HI")
+				 (V4SI "V2x4SI")
+				 (V8HF "V2x8HF")
+				 (V4SF "V2x4SF")])
+(define_mode_attr MVE_vld2_vst2 [(V16QI "v2x16qi")
+				 (V8HI "v2x8hi")
+				 (V4SI "v2x4si")
+				 (V8HF "v2x8hf")
+				 (V4SF "v2x4sf")])
+
+(define_mode_attr MVE_VLD4_VST4 [(V16QI "V4x16QI")
+				 (V8HI "V4x8HI")
+				 (V4SI "V4x4SI")
+				 (V8HF "V4x8HF")
+				 (V4SF "V4x4SF")])
+
+(define_mode_attr MVE_vld4_vst4 [(V16QI "v4x16qi")
+				 (V8HI "v4x8hi")
+				 (V4SI "v4x4si")
+				 (V8HF "v4x8hf")
+				 (V4SF "v4x4sf")])
+
+;; Types for MVE truncating stores and widening loads
+(define_mode_iterator MVE_w_narrow_TYPE [V8QI V4QI V4HI])
+(define_mode_attr MVE_w_narrow_type [(V8QI "v8qi") (V4QI "v4qi") (V4HI "v4hi")])
+(define_mode_attr MVE_wide_n_TYPE [(V8QI "V8HI") (V4QI "V4SI") (V4HI "V4SI")])
+(define_mode_attr MVE_wide_n_type [(V8QI "v8hi") (V4QI "v4si") (V4HI "v4si")])
+(define_mode_attr MVE_wide_n_sz_elem [(V8QI "16") (V4QI "32") (V4HI "32")])
+(define_mode_attr MVE_wide_n_VPRED [(V8QI "V8BI") (V4QI "V4BI") (V4HI "V4BI")])
+(define_mode_attr MVE_scatter_offset [(V16QI "V16QI") (V8HI "V8HI") (V4SI "V4SI") (V8HF "V8HI") (V4SF "V4SI") (V2DI "V2DI")])
+(define_mode_attr MVE_scatter_shift [(V8HI "1") (V4SI "2") (V8HF "1") (V4SF "2") (V2DI "3")])
+
 ;;----------------------------------------------------------------------------
 ;; Code iterators
 ;;----------------------------------------------------------------------------
@@ -444,6 +504,7 @@
 		     VANDQ_M_S VANDQ_M_U
 		     VBICQ_M_S VBICQ_M_U
 		     VEORQ_M_S VEORQ_M_U
+		     VORNQ_M_S VORNQ_M_U
 		     VORRQ_M_S VORRQ_M_U
 		     ])
 
@@ -594,6 +655,7 @@
 		     VANDQ_M_F
 		     VBICQ_M_F
 		     VEORQ_M_F
+		     VORNQ_M_F
 		     VORRQ_M_F
 		     ])
 
@@ -939,6 +1001,10 @@
 		 (VABDQ_S "vabd") (VABDQ_U "vabd") (VABDQ_F "vabd")
 		 (VABSQ_M_F "vabs")
 		 (VABSQ_M_S "vabs")
+		 (VADCIQ_M_S "vadci") (VADCIQ_M_U "vadci")
+		 (VADCIQ_S "vadci") (VADCIQ_U "vadci")
+		 (VADCQ_M_S "vadc") (VADCQ_M_U "vadc")
+		 (VADCQ_S "vadc") (VADCQ_U "vadc")
 		 (VADDLVAQ_P_S "vaddlva") (VADDLVAQ_P_U "vaddlva")
 		 (VADDLVAQ_S "vaddlva") (VADDLVAQ_U "vaddlva")
 		 (VADDLVQ_P_S "vaddlv") (VADDLVQ_P_U "vaddlv")
@@ -964,6 +1030,26 @@
 		 (VCMLAQ_M_F "vcmla") (VCMLAQ_ROT90_M_F "vcmla") (VCMLAQ_ROT180_M_F "vcmla") (VCMLAQ_ROT270_M_F "vcmla")
 		 (VCMULQ_M_F "vcmul") (VCMULQ_ROT90_M_F "vcmul") (VCMULQ_ROT180_M_F "vcmul") (VCMULQ_ROT270_M_F "vcmul")
 		 (VCREATEQ_S "vcreate") (VCREATEQ_U "vcreate") (VCREATEQ_F "vcreate")
+		 (VCVTAQ_M_S "vcvta") (VCVTAQ_M_U "vcvta")
+		 (VCVTAQ_S "vcvta") (VCVTAQ_U "vcvta")
+		 (VCVTBQ_F16_F32 "vcvtb") (VCVTTQ_F16_F32 "vcvtt")
+		 (VCVTBQ_F32_F16 "vcvtb") (VCVTTQ_F32_F16 "vcvtt")
+		 (VCVTBQ_M_F16_F32 "vcvtb") (VCVTTQ_M_F16_F32 "vcvtt")
+		 (VCVTBQ_M_F32_F16 "vcvtb") (VCVTTQ_M_F32_F16 "vcvtt")
+		 (VCVTMQ_M_S "vcvtm") (VCVTMQ_M_U "vcvtm")
+		 (VCVTMQ_S "vcvtm") (VCVTMQ_U "vcvtm")
+		 (VCVTNQ_M_S "vcvtn") (VCVTNQ_M_U "vcvtn")
+		 (VCVTNQ_S "vcvtn") (VCVTNQ_U "vcvtn")
+		 (VCVTPQ_M_S "vcvtp") (VCVTPQ_M_U "vcvtp")
+		 (VCVTPQ_S "vcvtp") (VCVTPQ_U "vcvtp")
+		 (VCVTQ_FROM_F_S "vcvt") (VCVTQ_FROM_F_U "vcvt")
+		 (VCVTQ_M_FROM_F_S "vcvt") (VCVTQ_M_FROM_F_U "vcvt")
+		 (VCVTQ_M_N_FROM_F_S "vcvt") (VCVTQ_M_N_FROM_F_U "vcvt")
+		 (VCVTQ_M_N_TO_F_S "vcvt") (VCVTQ_M_N_TO_F_U "vcvt")
+		 (VCVTQ_M_TO_F_S "vcvt") (VCVTQ_M_TO_F_U "vcvt")
+		 (VCVTQ_N_FROM_F_S "vcvt") (VCVTQ_N_FROM_F_U "vcvt")
+		 (VCVTQ_N_TO_F_S "vcvt") (VCVTQ_N_TO_F_U "vcvt")
+		 (VCVTQ_TO_F_S "vcvt") (VCVTQ_TO_F_U "vcvt")
 		 (VDUPQ_M_N_S "vdup") (VDUPQ_M_N_U "vdup") (VDUPQ_M_N_F "vdup")
 		 (VDUPQ_N_S "vdup") (VDUPQ_N_U "vdup") (VDUPQ_N_F "vdup")
 		 (VEORQ_M_S "veor") (VEORQ_M_U "veor") (VEORQ_M_F "veor")
@@ -985,6 +1071,10 @@
 		 (VHSUBQ_M_S "vhsub") (VHSUBQ_M_U "vhsub")
 		 (VHSUBQ_N_S "vhsub") (VHSUBQ_N_U "vhsub")
 		 (VHSUBQ_S "vhsub") (VHSUBQ_U "vhsub")
+		 (VIDUPQ "vidup") (VDDUPQ "vddup")
+		 (VIDUPQ_M "vidup") (VDDUPQ_M "vddup")
+		 (VIWDUPQ "viwdup") (VDWDUPQ "vdwdup")
+		 (VIWDUPQ_M "viwdup") (VDWDUPQ_M "vdwdup")
 		 (VMAXAQ_M_S "vmaxa")
 		 (VMAXAQ_S "vmaxa")
 		 (VMAXAVQ_P_S "vmaxav")
@@ -1074,6 +1164,7 @@
 		 (VMVNQ_N_S "vmvn") (VMVNQ_N_U "vmvn")
 		 (VNEGQ_M_F "vneg")
 		 (VNEGQ_M_S "vneg")
+		 (VORNQ_M_S "vorn") (VORNQ_M_U "vorn") (VORNQ_M_F "vorn")
 		 (VORRQ_M_N_S "vorr") (VORRQ_M_N_U "vorr")
 		 (VORRQ_M_S "vorr") (VORRQ_M_U "vorr") (VORRQ_M_F "vorr")
 		 (VORRQ_N_S "vorr") (VORRQ_N_U "vorr")
@@ -1208,6 +1299,10 @@
 		 (VRSHRNTQ_N_S "vrshrnt") (VRSHRNTQ_N_U "vrshrnt")
 		 (VRSHRQ_M_N_S "vrshr") (VRSHRQ_M_N_U "vrshr")
 		 (VRSHRQ_N_S "vrshr") (VRSHRQ_N_U "vrshr")
+		 (VSBCIQ_M_S "vsbci") (VSBCIQ_M_U "vsbci")
+		 (VSBCIQ_S "vsbci") (VSBCIQ_U "vsbci")
+		 (VSBCQ_M_S "vsbc") (VSBCQ_M_U "vsbc")
+		 (VSBCQ_S "vsbc") (VSBCQ_U "vsbc")
 		 (VSHLLBQ_M_N_S "vshllb") (VSHLLBQ_M_N_U "vshllb")
 		 (VSHLLBQ_N_S "vshllb") (VSHLLBQ_N_U "vshllb")
 		 (VSHLLTQ_M_N_S "vshllt") (VSHLLTQ_M_N_U "vshllt")
@@ -1317,6 +1412,9 @@
 		 (VRNDXQ_F "vrintx") (VRNDXQ_M_F "vrintx")
 		 ])
 
+(define_int_attr viddupq_op [ (VIDUPQ "plus") (VDDUPQ "minus")])
+(define_int_attr viddupq_m_op [ (VIDUPQ_M "plus") (VDDUPQ_M "minus")])
+
 ;; plus and minus are the only SHIFTABLE_OPS for which Thumb2 allows
 ;; a stack pointer operand.  The minus operation is a candidate for an rsub
 ;; and hence only plus is supported.
@@ -1541,9 +1639,6 @@
 ;; distinguishes between 16-bit Thumb and 32-bit Thumb/ARM.
 (define_mode_attr arch [(CC_Z "32") (SI "t1")])
 
-;; Determine element size suffix from vector mode.
-(define_mode_attr MMX_char [(V8QI "b") (V4HI "h") (V2SI "w") (DI "d")])
-
 ;; vtbl<n> suffix for NEON vector modes.
 (define_mode_attr VTAB_n [(TI "2") (EI "3") (OI "4")])
 
@@ -1746,6 +1841,16 @@
                             (DI   "u64") (V2DI  "u64")
                             (V2SF "f32") (V4SF  "f32")])
 
+;; Same, but for MVE gather loads.
+;; Note that using "uNN" or "NN" everywhere would work too.
+;; We use this to match the expected output described in ACLE.
+(define_mode_attr MVE_u_elem [(V16QI "u8")
+                              (V8HI  "u16")
+                              (V4SI  "u32")
+                              (V2DI  "u64")
+                              (V8HF  "f16")
+                              (V4SF  "u32")])
+
 ;; Element types for extraction of unsigned scalars.
 (define_mode_attr V_uf_sclr [(V8QI "u8")  (V16QI "u8")
                  (V4HI "u16") (V8HI "u16")
@@ -1769,6 +1874,11 @@
 			     (V2SF "s") (V4SF  "s")
 			     (V2SF "s") (V4SF  "s")])
 
+(define_mode_attr MVE_elem_ch [(V4QI "b") (V8QI "b") (V16QI "b")
+			       (V4HI "h") (V8HI "h") (V8HF "h")
+			       (V4SI "w") (V4SF "w")
+			       (V2DI "d")])
+
 (define_mode_attr VH_elem_ch [(V4HI "s") (V8HI  "s")
 			      (V4HF "s") (V8HF  "s")
 			      (HF "s")])
@@ -2471,27 +2581,9 @@
 		       (VQRSHRNBQ_M_N_S "s") (VQRSHRNBQ_M_N_U "u")
 		       (VMLALDAVAXQ_P_S "s")
 		       (VMLALDAVAQ_P_S "s") (VMLALDAVAQ_P_U "u")
-		       (VSTRWQSB_S "s") (VSTRWQSB_U "u") (VSTRBQSO_S "s")
-		       (VSTRBQSO_U "u") (VSTRBQ_S "s") (VSTRBQ_U "u")
-		       (VLDRBQGO_S "s") (VLDRBQGO_U "u") (VLDRBQ_S "s")
-		       (VLDRBQ_U "u") (VLDRWQGB_S "s") (VLDRWQGB_U "u")
-		       (VLD1Q_S "s") (VLD1Q_U "u") (VLDRHQGO_S "s")
-		       (VLDRHQGO_U "u") (VLDRHQGSO_S "s") (VLDRHQGSO_U "u")
-		       (VLDRHQ_S "s") (VLDRHQ_U "u") (VLDRWQ_S "s")
-		       (VLDRWQ_U "u") (VLDRDQGB_S "s") (VLDRDQGB_U "u")
-		       (VLDRDQGO_S "s") (VLDRDQGO_U "u") (VLDRDQGSO_S "s")
-		       (VLDRDQGSO_U "u") (VLDRWQGO_S "s") (VLDRWQGO_U "u")
-		       (VLDRWQGSO_S "s") (VLDRWQGSO_U "u") (VST1Q_S "s")
-		       (VST1Q_U "u") (VSTRHQSO_S "s") (VSTRHQSO_U "u")
-		       (VSTRHQSSO_S "s") (VSTRHQSSO_U "u") (VSTRHQ_S "s")
-		       (VSTRHQ_U "u") (VSTRWQ_S "s") (VSTRWQ_U "u")
-		       (VSTRDQSB_S "s") (VSTRDQSB_U "u") (VSTRDQSO_S "s")
-		       (VSTRDQSO_U "u") (VSTRDQSSO_S "s") (VSTRDQSSO_U "u")
-		       (VSTRWQSO_U "u") (VSTRWQSO_S "s") (VSTRWQSSO_U "u")
-		       (VSTRWQSSO_S "s") (VSTRWQSBWB_S "s") (VSTRWQSBWB_U "u")
-		       (VLDRWQGBWB_S "s") (VLDRWQGBWB_U "u") (VLDRDQGBWB_S "s")
-		       (VLDRDQGBWB_U "u") (VSTRDQSBWB_S "s") (VADCQ_M_S "s")
-		       (VSTRDQSBWB_U "u") (VSBCQ_U "u")  (VSBCQ_M_U "u")
+		       (VSTRDQSB_S "s") (VSTRDQSB_U "u")
+		       (VADCQ_M_S "s")
+		       (VSBCQ_U "u")  (VSBCQ_M_U "u")
 		       (VSBCQ_S "s")  (VSBCQ_M_S "s") (VSBCIQ_U "u")
 		       (VSBCIQ_M_U "u") (VSBCIQ_S "s") (VSBCIQ_M_S "s")
 		       (VADCQ_U "u")  (VADCQ_M_U "u") (VADCQ_S "s")
@@ -2720,14 +2812,10 @@
 (define_int_iterator VREV64Q [VREV64Q_S VREV64Q_U])
 (define_int_iterator VCVTQ_FROM_F [VCVTQ_FROM_F_S VCVTQ_FROM_F_U])
 (define_int_iterator VREV16Q [VREV16Q_U VREV16Q_S])
-(define_int_iterator VCVTAQ [VCVTAQ_U VCVTAQ_S])
 (define_int_iterator VDUPQ_N [VDUPQ_N_U VDUPQ_N_S])
 (define_int_iterator VADDVQ [VADDVQ_U VADDVQ_S])
 (define_int_iterator VREV32Q [VREV32Q_U VREV32Q_S])
 (define_int_iterator VMOVLxQ [VMOVLBQ_S VMOVLBQ_U VMOVLTQ_U VMOVLTQ_S])
-(define_int_iterator VCVTPQ [VCVTPQ_S VCVTPQ_U])
-(define_int_iterator VCVTNQ [VCVTNQ_S VCVTNQ_U])
-(define_int_iterator VCVTMQ [VCVTMQ_S VCVTMQ_U])
 (define_int_iterator VADDLVQ [VADDLVQ_U VADDLVQ_S])
 (define_int_iterator VCVTQ_N_TO_F [VCVTQ_N_TO_F_S VCVTQ_N_TO_F_U])
 (define_int_iterator VCREATEQ [VCREATEQ_U VCREATEQ_S])
@@ -2783,7 +2871,6 @@
 (define_int_iterator VSHLLxQ_N [VSHLLBQ_N_S VSHLLBQ_N_U VSHLLTQ_N_S VSHLLTQ_N_U])
 (define_int_iterator VRMLALDAVHQ [VRMLALDAVHQ_U VRMLALDAVHQ_S])
 (define_int_iterator VBICQ_M_N [VBICQ_M_N_S VBICQ_M_N_U])
-(define_int_iterator VCVTAQ_M [VCVTAQ_M_S VCVTAQ_M_U])
 (define_int_iterator VCVTQ_M_TO_F [VCVTQ_M_TO_F_S VCVTQ_M_TO_F_U])
 (define_int_iterator VQRSHRNBQ_N [VQRSHRNBQ_N_U VQRSHRNBQ_N_S])
 (define_int_iterator VABAVQ [VABAVQ_S VABAVQ_U])
@@ -2833,9 +2920,6 @@
 (define_int_iterator VMVNQ_M_N [VMVNQ_M_N_U VMVNQ_M_N_S])
 (define_int_iterator VQSHRNTQ_N [VQSHRNTQ_N_U VQSHRNTQ_N_S])
 (define_int_iterator VSHRNTQ_N [VSHRNTQ_N_S VSHRNTQ_N_U])
-(define_int_iterator VCVTMQ_M [VCVTMQ_M_S VCVTMQ_M_U])
-(define_int_iterator VCVTNQ_M [VCVTNQ_M_S VCVTNQ_M_U])
-(define_int_iterator VCVTPQ_M [VCVTPQ_M_S VCVTPQ_M_U])
 (define_int_iterator VCVTQ_M_N_FROM_F [VCVTQ_M_N_FROM_F_S VCVTQ_M_N_FROM_F_U])
 (define_int_iterator VCVTQ_M_FROM_F [VCVTQ_M_FROM_F_U VCVTQ_M_FROM_F_S])
 (define_int_iterator VRMLALDAVHQ_P [VRMLALDAVHQ_P_S VRMLALDAVHQ_P_U])
@@ -2897,49 +2981,25 @@
 (define_int_iterator VSHLLxQ_M_N [VSHLLBQ_M_N_U VSHLLBQ_M_N_S VSHLLTQ_M_N_U VSHLLTQ_M_N_S])
 (define_int_iterator VSHRNBQ_M_N [VSHRNBQ_M_N_S VSHRNBQ_M_N_U])
 (define_int_iterator VSHRNTQ_M_N [VSHRNTQ_M_N_S VSHRNTQ_M_N_U])
-(define_int_iterator VSTRWSBQ [VSTRWQSB_S VSTRWQSB_U])
-(define_int_iterator VSTRBSOQ [VSTRBQSO_S VSTRBQSO_U])
-(define_int_iterator VSTRBQ [VSTRBQ_S VSTRBQ_U])
-(define_int_iterator VLDRBGOQ [VLDRBQGO_S VLDRBQGO_U])
-(define_int_iterator VLDRBQ [VLDRBQ_S VLDRBQ_U])
-(define_int_iterator VLDRWGBQ [VLDRWQGB_S VLDRWQGB_U])
-(define_int_iterator VLD1Q [VLD1Q_S VLD1Q_U])
-(define_int_iterator VLDRHGOQ [VLDRHQGO_S VLDRHQGO_U])
-(define_int_iterator VLDRHGSOQ [VLDRHQGSO_S VLDRHQGSO_U])
-(define_int_iterator VLDRHQ [VLDRHQ_S VLDRHQ_U])
-(define_int_iterator VLDRWQ [VLDRWQ_S VLDRWQ_U])
-(define_int_iterator VLDRDGBQ [VLDRDQGB_S VLDRDQGB_U])
-(define_int_iterator VLDRDGOQ [VLDRDQGO_S VLDRDQGO_U])
-(define_int_iterator VLDRDGSOQ [VLDRDQGSO_S VLDRDQGSO_U])
-(define_int_iterator VLDRWGOQ [VLDRWQGO_S VLDRWQGO_U])
-(define_int_iterator VLDRWGSOQ [VLDRWQGSO_S VLDRWQGSO_U])
-(define_int_iterator VST1Q [VST1Q_S VST1Q_U])
-(define_int_iterator VSTRHSOQ [VSTRHQSO_S VSTRHQSO_U])
-(define_int_iterator VSTRHSSOQ [VSTRHQSSO_S VSTRHQSSO_U])
-(define_int_iterator VSTRHQ [VSTRHQ_S VSTRHQ_U])
-(define_int_iterator VSTRWQ [VSTRWQ_S VSTRWQ_U])
-(define_int_iterator VSTRDSBQ [VSTRDQSB_S VSTRDQSB_U])
-(define_int_iterator VSTRDSOQ [VSTRDQSO_S VSTRDQSO_U])
-(define_int_iterator VSTRDSSOQ [VSTRDQSSO_S VSTRDQSSO_U])
-(define_int_iterator VSTRWSOQ [VSTRWQSO_S VSTRWQSO_U])
-(define_int_iterator VSTRWSSOQ [VSTRWQSSO_S VSTRWQSSO_U])
-(define_int_iterator VSTRWSBWBQ [VSTRWQSBWB_S VSTRWQSBWB_U])
-(define_int_iterator VLDRWGBWBQ [VLDRWQGBWB_S VLDRWQGBWB_U])
-(define_int_iterator VSTRDSBWBQ [VSTRDQSBWB_S VSTRDQSBWB_U])
-(define_int_iterator VLDRDGBWBQ [VLDRDQGBWB_S VLDRDQGBWB_U])
-(define_int_iterator VADCIQ [VADCIQ_U VADCIQ_S])
-(define_int_iterator VADCIQ_M [VADCIQ_M_U VADCIQ_M_S])
-(define_int_iterator VSBCQ [VSBCQ_U VSBCQ_S])
-(define_int_iterator VSBCQ_M [VSBCQ_M_U VSBCQ_M_S])
-(define_int_iterator VSBCIQ [VSBCIQ_U VSBCIQ_S])
-(define_int_iterator VSBCIQ_M [VSBCIQ_M_U VSBCIQ_M_S])
-(define_int_iterator VADCQ [VADCQ_U VADCQ_S])
-(define_int_iterator VADCQ_M [VADCQ_M_U VADCQ_M_S])
+(define_int_iterator VxCIQ [VADCIQ_U VADCIQ_S VSBCIQ_U VSBCIQ_S])
+(define_int_iterator VxCIQ_M [VADCIQ_M_U VADCIQ_M_S VSBCIQ_M_U VSBCIQ_M_S])
+(define_int_iterator VxCQ [VADCQ_U VADCQ_S  VSBCQ_U VSBCQ_S])
+(define_int_iterator VxCQ_M [VADCQ_M_U VADCQ_M_S  VSBCQ_M_U VSBCQ_M_S])
 (define_int_iterator UQRSHLLQ [UQRSHLL_64 UQRSHLL_48])
 (define_int_iterator SQRSHRLQ [SQRSHRL_64 SQRSHRL_48])
 (define_int_iterator VSHLCQ_M [VSHLCQ_M_S VSHLCQ_M_U])
 (define_int_iterator VQSHLUQ_M_N [VQSHLUQ_M_N_S])
 (define_int_iterator VQSHLUQ_N [VQSHLUQ_N_S])
+(define_int_iterator VCVTxQ_F16_F32 [VCVTBQ_F16_F32 VCVTTQ_F16_F32])
+(define_int_iterator VCVTxQ_F32_F16 [VCVTBQ_F32_F16 VCVTTQ_F32_F16])
+(define_int_iterator VCVTxQ_M_F16_F32 [VCVTBQ_M_F16_F32 VCVTTQ_M_F16_F32])
+(define_int_iterator VCVTxQ_M_F32_F16 [VCVTBQ_M_F32_F16 VCVTTQ_M_F32_F16])
+(define_int_iterator VCVTxQ [VCVTAQ_S VCVTAQ_U VCVTMQ_S VCVTMQ_U VCVTNQ_S VCVTNQ_U VCVTPQ_S VCVTPQ_U])
+(define_int_iterator VCVTxQ_M [VCVTAQ_M_S VCVTAQ_M_U VCVTMQ_M_S VCVTMQ_M_U VCVTNQ_M_S VCVTNQ_M_U VCVTPQ_M_S VCVTPQ_M_U])
+(define_int_iterator VIDDUPQ [VIDUPQ VDDUPQ])
+(define_int_iterator VIDDUPQ_M [VIDUPQ_M VDDUPQ_M])
+(define_int_iterator VIDWDUPQ [VIWDUPQ VDWDUPQ])
+(define_int_iterator VIDWDUPQ_M [VIWDUPQ_M VDWDUPQ_M])
 (define_int_iterator DLSTP [DLSTP8 DLSTP16 DLSTP32
 				   DLSTP64])
 (define_int_iterator LETP [LETP8 LETP16 LETP32
diff --git a/gcc/config/arm/iwmmxt.md b/gcc/config/arm/iwmmxt.md
deleted file mode 100644
index d0fc51c..0000000
--- a/gcc/config/arm/iwmmxt.md
+++ /dev/null
@@ -1,1766 +0,0 @@
-;; Patterns for the Intel Wireless MMX technology architecture.
-;; Copyright (C) 2003-2024 Free Software Foundation, Inc.
-;; Contributed by Red Hat.
-
-;; This file is part of GCC.
-
-;; GCC is free software; you can redistribute it and/or modify it under
-;; the terms of the GNU General Public License as published by the Free
-;; Software Foundation; either version 3, or (at your option) any later
-;; version.
-
-;; GCC is distributed in the hope that it will be useful, but WITHOUT
-;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-;; License for more details.
-
-;; You should have received a copy of the GNU General Public License
-;; along with GCC; see the file COPYING3.  If not see
-;; <http://www.gnu.org/licenses/>.
-
-;; Register numbers. Need to sync with FIRST_IWMMXT_GR_REGNUM in arm.h
-(define_constants
-  [(WCGR0           96)
-   (WCGR1           97)
-   (WCGR2           98)
-   (WCGR3           99)
-  ]
-)
-
-(define_insn "tbcstv8qi"
-  [(set (match_operand:V8QI                   0 "register_operand" "=y")
-        (vec_duplicate:V8QI (match_operand:QI 1 "s_register_operand" "r")))]
-  "TARGET_REALLY_IWMMXT"
-  "tbcstb%?\\t%0, %1"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_tbcst")]
-)
-
-(define_insn "tbcstv4hi"
-  [(set (match_operand:V4HI                   0 "register_operand" "=y")
-        (vec_duplicate:V4HI (match_operand:HI 1 "s_register_operand" "r")))]
-  "TARGET_REALLY_IWMMXT"
-  "tbcsth%?\\t%0, %1"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_tbcst")]
-)
-
-(define_insn "tbcstv2si"
-  [(set (match_operand:V2SI                   0 "register_operand" "=y")
-        (vec_duplicate:V2SI (match_operand:SI 1 "s_register_operand" "r")))]
-  "TARGET_REALLY_IWMMXT"
-  "tbcstw%?\\t%0, %1"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_tbcst")]
-)
-
-(define_insn "iwmmxt_iordi3"
-  [(set (match_operand:DI         0 "register_operand" "=y")
-        (ior:DI (match_operand:DI 1 "register_operand" "%y")
-		(match_operand:DI 2 "register_operand"  "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "wor%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "length" "4")
-   (set_attr "type" "wmmx_wor")]
-)
-
-(define_insn "iwmmxt_xordi3"
-  [(set (match_operand:DI         0 "register_operand" "=y")
-        (xor:DI (match_operand:DI 1 "register_operand" "%y")
-		(match_operand:DI 2 "register_operand"  "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "wxor%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "length" "4")
-   (set_attr "type" "wmmx_wxor")]
-)
-
-(define_insn "iwmmxt_anddi3"
-  [(set (match_operand:DI         0 "register_operand" "=y")
-        (and:DI (match_operand:DI 1 "register_operand" "%y")
-		(match_operand:DI 2 "register_operand"  "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "wand%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "length" "4")
-   (set_attr "type" "wmmx_wand")]
-)
-
-(define_insn "iwmmxt_nanddi3"
-  [(set (match_operand:DI                 0 "register_operand" "=y")
-        (and:DI (match_operand:DI         1 "register_operand"  "y")
-		(not:DI (match_operand:DI 2 "register_operand"  "y"))))]
-  "TARGET_REALLY_IWMMXT"
-  "wandn%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wandn")]
-)
-
-(define_insn "*iwmmxt_arm_movdi"
-  [(set (match_operand:DI 0 "nonimmediate_di_operand" "=r, r, r, r, m,y,y,r, y,Uy,*w, r,*w,*w, *Uv")
-        (match_operand:DI 1 "di_operand"              "rDa,Db,Dc,mi,r,y,r,y,Uy,y,  r,*w,*w,*Uvi,*w"))]
-  "TARGET_REALLY_IWMMXT
-   && (   register_operand (operands[0], DImode)
-       || register_operand (operands[1], DImode))"
-  "*
-  switch (which_alternative)
-    {
-    case 0:
-    case 1:
-    case 2:
-      return \"#\";
-    case 3: case 4:
-      return output_move_double (operands, true, NULL);
-    case 5:
-      return \"wmov%?\\t%0,%1\";
-    case 6:
-      return \"tmcrr%?\\t%0,%Q1,%R1\";
-    case 7:
-      return \"tmrrc%?\\t%Q0,%R0,%1\";
-    case 8:
-      return \"wldrd%?\\t%0,%1\";
-    case 9:
-      return \"wstrd%?\\t%1,%0\";
-    case 10:
-      return \"fmdrr%?\\t%P0, %Q1, %R1\\t%@ int\";
-    case 11:
-      return \"fmrrd%?\\t%Q0, %R0, %P1\\t%@ int\";
-    case 12:
-      if (TARGET_VFP_SINGLE)
-	return \"fcpys%?\\t%0, %1\\t%@ int\;fcpys%?\\t%p0, %p1\\t%@ int\";
-      else
-	return \"fcpyd%?\\t%P0, %P1\\t%@ int\";
-    case 13: case 14:
-      return output_move_vfp (operands);
-    default:
-      gcc_unreachable ();
-    }
-  "
-  [(set (attr "length") (cond [(eq_attr "alternative" "0,3,4") (const_int 8)
-                              (eq_attr "alternative" "1") (const_int 12)
-                              (eq_attr "alternative" "2") (const_int 16)
-                              (eq_attr "alternative" "12")
-                               (if_then_else
-                                 (eq (symbol_ref "TARGET_VFP_SINGLE") (const_int 1))
-                                 (const_int 8)
-                                 (const_int 4))]
-                              (const_int 4)))
-   (set_attr "type" "*,*,*,load_8,store_8,*,*,*,*,*,f_mcrr,f_mrrc,\
-                     ffarithd,f_loadd,f_stored")
-   (set_attr "arm_pool_range" "*,*,*,1020,*,*,*,*,*,*,*,*,*,1020,*")
-   (set_attr "arm_neg_pool_range" "*,*,*,1008,*,*,*,*,*,*,*,*,*,1008,*")]
-)
-
-(define_insn "*iwmmxt_movsi_insn"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r,rk, m,z,r,?z,?Uy,*t, r,*t,*t  ,*Uv")
-	(match_operand:SI 1 "general_operand"      " rk,I,K,j,mi,rk,r,z,Uy,  z, r,*t,*t,*Uvi, *t"))]
-  "TARGET_REALLY_IWMMXT
-   && (   register_operand (operands[0], SImode)
-       || register_operand (operands[1], SImode))"
-  "*
-   switch (which_alternative)
-     {
-     case 0: return \"mov\\t%0, %1\";
-     case 1: return \"mov\\t%0, %1\";
-     case 2: return \"mvn\\t%0, #%B1\";
-     case 3: return \"movw\\t%0, %1\";
-     case 4: return \"ldr\\t%0, %1\";
-     case 5: return \"str\\t%1, %0\";
-     case 6: return \"tmcr\\t%0, %1\";
-     case 7: return \"tmrc\\t%0, %1\";
-     case 8: return arm_output_load_gr (operands);
-     case 9: return \"wstrw\\t%1, %0\";
-     case 10:return \"fmsr\\t%0, %1\";
-     case 11:return \"fmrs\\t%0, %1\";
-     case 12:return \"fcpys\\t%0, %1\\t%@ int\";
-     case 13: case 14:
-       return output_move_vfp (operands);
-     default:
-       gcc_unreachable ();
-     }"
-  [(set_attr "type"           "*,*,*,*,load_4,store_4,*,*,*,*,f_mcr,f_mrc,\
-                               fmov,f_loads,f_stores")
-   (set_attr "length"         "*,*,*,*,*,        *,*,*,  16,     *,*,*,*,*,*")
-   (set_attr "pool_range"     "*,*,*,*,4096,     *,*,*,1024,     *,*,*,*,1020,*")
-   (set_attr "neg_pool_range" "*,*,*,*,4084,     *,*,*,   *,  1012,*,*,*,1008,*")
-   ;; Note - the "predicable" attribute is not allowed to have alternatives.
-   ;; Since the wSTRw wCx instruction is not predicable, we cannot support
-   ;; predicating any of the alternatives in this template.  Instead,
-   ;; we do the predication ourselves, in cond_iwmmxt_movsi_insn.
-   (set_attr "predicable"     "no")
-   ;; Also - we have to pretend that these insns clobber the condition code
-   ;; bits as otherwise arm_final_prescan_insn() will try to conditionalize
-   ;; them.
-   (set_attr "conds" "clob")]
-)
-
-;; Because iwmmxt_movsi_insn is not predicable, we provide the
-;; cond_exec version explicitly, with appropriate constraints.
-
-(define_insn "*cond_iwmmxt_movsi_insn"
-  [(cond_exec
-     (match_operator 2 "arm_comparison_operator"
-      [(match_operand 3 "cc_register" "")
-      (const_int 0)])
-     (set (match_operand:SI 0 "nonimmediate_operand" "=r,r,r, m,z,r")
-	  (match_operand:SI 1 "general_operand"      "rI,K,mi,r,r,z")))]
-  "TARGET_REALLY_IWMMXT
-   && (   register_operand (operands[0], SImode)
-       || register_operand (operands[1], SImode))"
-  "*
-   switch (which_alternative)
-   {
-   case 0: return \"mov%?\\t%0, %1\";
-   case 1: return \"mvn%?\\t%0, #%B1\";
-   case 2: return \"ldr%?\\t%0, %1\";
-   case 3: return \"str%?\\t%1, %0\";
-   case 4: return \"tmcr%?\\t%0, %1\";
-   default: return \"tmrc%?\\t%0, %1\";
-  }"
-  [(set_attr "type"           "*,*,load_4,store_4,*,*")
-   (set_attr "pool_range"     "*,*,4096,     *,*,*")
-   (set_attr "neg_pool_range" "*,*,4084,     *,*,*")]
-)
-
-(define_insn "mov<mode>_internal"
-  [(set (match_operand:VMMX 0 "nonimmediate_operand" "=y,m,y,?r,?y,?r,?r,?m")
-	(match_operand:VMMX 1 "general_operand"       "y,y,mi,y,r,r,mi,r"))]
-  "TARGET_REALLY_IWMMXT"
-  "*
-   switch (which_alternative)
-   {
-   case 0: return \"wmov%?\\t%0, %1\";
-   case 1: return \"wstrd%?\\t%1, %0\";
-   case 2: return \"wldrd%?\\t%0, %1\";
-   case 3: return \"tmrrc%?\\t%Q0, %R0, %1\";
-   case 4: return \"tmcrr%?\\t%0, %Q1, %R1\";
-   case 5: return \"#\";
-   default: return output_move_double (operands, true, NULL);
-   }"
-  [(set_attr "predicable" "yes")
-   (set_attr "length"         "4,     4,   4,4,4,8,   8,8")
-   (set_attr "type"           "wmmx_wmov,wmmx_wstr,wmmx_wldr,wmmx_tmrrc,wmmx_tmcrr,*,load_4,store_4")
-   (set_attr "pool_range"     "*,     *, 256,*,*,*, 256,*")
-   (set_attr "neg_pool_range" "*,     *, 244,*,*,*, 244,*")]
-)
-
-(define_expand "iwmmxt_setwcgr0"
-  [(set (reg:SI WCGR0)
-	(match_operand:SI 0 "register_operand"))]
-  "TARGET_REALLY_IWMMXT"
-  {}
-)
-
-(define_expand "iwmmxt_setwcgr1"
-  [(set (reg:SI WCGR1)
-	(match_operand:SI 0 "register_operand"))]
-  "TARGET_REALLY_IWMMXT"
-  {}
-)
-
-(define_expand "iwmmxt_setwcgr2"
-  [(set (reg:SI WCGR2)
-	(match_operand:SI 0 "register_operand"))]
-  "TARGET_REALLY_IWMMXT"
-  {}
-)
-
-(define_expand "iwmmxt_setwcgr3"
-  [(set (reg:SI WCGR3)
-	(match_operand:SI 0 "register_operand"))]
-  "TARGET_REALLY_IWMMXT"
-  {}
-)
-
-(define_expand "iwmmxt_getwcgr0"
-  [(set (match_operand:SI 0 "register_operand")
-        (reg:SI WCGR0))]
-  "TARGET_REALLY_IWMMXT"
-  {}
-)
-
-(define_expand "iwmmxt_getwcgr1"
-  [(set (match_operand:SI 0 "register_operand")
-        (reg:SI WCGR1))]
-  "TARGET_REALLY_IWMMXT"
-  {}
-)
-
-(define_expand "iwmmxt_getwcgr2"
-  [(set (match_operand:SI 0 "register_operand")
-        (reg:SI WCGR2))]
-  "TARGET_REALLY_IWMMXT"
-  {}
-)
-
-(define_expand "iwmmxt_getwcgr3"
-  [(set (match_operand:SI 0 "register_operand")
-        (reg:SI WCGR3))]
-  "TARGET_REALLY_IWMMXT"
-  {}
-)
-
-(define_insn "*and<mode>3_iwmmxt"
-  [(set (match_operand:VMMX           0 "register_operand" "=y")
-        (and:VMMX (match_operand:VMMX 1 "register_operand"  "y")
-	          (match_operand:VMMX 2 "register_operand"  "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "wand\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wand")]
-)
-
-(define_insn "*ior<mode>3_iwmmxt"
-  [(set (match_operand:VMMX           0 "register_operand" "=y")
-        (ior:VMMX (match_operand:VMMX 1 "register_operand"  "y")
-	          (match_operand:VMMX 2 "register_operand"  "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "wor\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wor")]
-)
-
-(define_insn "*xor<mode>3_iwmmxt"
-  [(set (match_operand:VMMX           0 "register_operand" "=y")
-        (xor:VMMX (match_operand:VMMX 1 "register_operand"  "y")
-	          (match_operand:VMMX 2 "register_operand"  "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "wxor\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wxor")]
-)
-
-
-;; Vector add/subtract
-
-(define_insn "*add<mode>3_iwmmxt"
-  [(set (match_operand:VMMX            0 "register_operand" "=y")
-        (plus:VMMX (match_operand:VMMX 1 "register_operand" "y")
-	           (match_operand:VMMX 2 "register_operand" "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "wadd<MMX_char>%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wadd")]
-)
-
-(define_insn "ssaddv8qi3"
-  [(set (match_operand:V8QI               0 "register_operand" "=y")
-        (ss_plus:V8QI (match_operand:V8QI 1 "register_operand"  "y")
-		      (match_operand:V8QI 2 "register_operand"  "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "waddbss%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wadd")]
-)
-
-(define_insn "ssaddv4hi3"
-  [(set (match_operand:V4HI               0 "register_operand" "=y")
-        (ss_plus:V4HI (match_operand:V4HI 1 "register_operand"  "y")
-		      (match_operand:V4HI 2 "register_operand"  "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "waddhss%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wadd")]
-)
-
-(define_insn "ssaddv2si3"
-  [(set (match_operand:V2SI               0 "register_operand" "=y")
-        (ss_plus:V2SI (match_operand:V2SI 1 "register_operand"  "y")
-		      (match_operand:V2SI 2 "register_operand"  "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "waddwss%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wadd")]
-)
-
-(define_insn "usaddv8qi3"
-  [(set (match_operand:V8QI               0 "register_operand" "=y")
-        (us_plus:V8QI (match_operand:V8QI 1 "register_operand"  "y")
-		      (match_operand:V8QI 2 "register_operand"  "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "waddbus%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wadd")]
-)
-
-(define_insn "usaddv4hi3"
-  [(set (match_operand:V4HI               0 "register_operand" "=y")
-        (us_plus:V4HI (match_operand:V4HI 1 "register_operand"  "y")
-		      (match_operand:V4HI 2 "register_operand"  "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "waddhus%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wadd")]
-)
-
-(define_insn "usaddv2si3"
-  [(set (match_operand:V2SI               0 "register_operand" "=y")
-        (us_plus:V2SI (match_operand:V2SI 1 "register_operand"  "y")
-		      (match_operand:V2SI 2 "register_operand"  "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "waddwus%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wadd")]
-)
-
-(define_insn "*sub<mode>3_iwmmxt"
-  [(set (match_operand:VMMX             0 "register_operand" "=y")
-        (minus:VMMX (match_operand:VMMX 1 "register_operand"  "y")
-		    (match_operand:VMMX 2 "register_operand"  "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "wsub<MMX_char>%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wsub")]
-)
-
-(define_insn "sssubv8qi3"
-  [(set (match_operand:V8QI                0 "register_operand" "=y")
-        (ss_minus:V8QI (match_operand:V8QI 1 "register_operand"  "y")
-		       (match_operand:V8QI 2 "register_operand"  "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "wsubbss%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wsub")]
-)
-
-(define_insn "sssubv4hi3"
-  [(set (match_operand:V4HI                0 "register_operand" "=y")
-        (ss_minus:V4HI (match_operand:V4HI 1 "register_operand" "y")
-		       (match_operand:V4HI 2 "register_operand" "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "wsubhss%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wsub")]
-)
-
-(define_insn "sssubv2si3"
-  [(set (match_operand:V2SI                0 "register_operand" "=y")
-        (ss_minus:V2SI (match_operand:V2SI 1 "register_operand" "y")
-		       (match_operand:V2SI 2 "register_operand" "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "wsubwss%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wsub")]
-)
-
-(define_insn "ussubv8qi3"
-  [(set (match_operand:V8QI                0 "register_operand" "=y")
-        (us_minus:V8QI (match_operand:V8QI 1 "register_operand" "y")
-		       (match_operand:V8QI 2 "register_operand" "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "wsubbus%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wsub")]
-)
-
-(define_insn "ussubv4hi3"
-  [(set (match_operand:V4HI                0 "register_operand" "=y")
-        (us_minus:V4HI (match_operand:V4HI 1 "register_operand" "y")
-		       (match_operand:V4HI 2 "register_operand" "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "wsubhus%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wsub")]
-)
-
-(define_insn "ussubv2si3"
-  [(set (match_operand:V2SI                0 "register_operand" "=y")
-        (us_minus:V2SI (match_operand:V2SI 1 "register_operand" "y")
-		       (match_operand:V2SI 2 "register_operand" "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "wsubwus%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wsub")]
-)
-
-(define_insn "*mulv4hi3_iwmmxt"
-  [(set (match_operand:V4HI            0 "register_operand" "=y")
-        (mult:V4HI (match_operand:V4HI 1 "register_operand" "y")
-		   (match_operand:V4HI 2 "register_operand" "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "wmulul%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmul")]
-)
-
-(define_insn "smulv4hi3_highpart"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-	  (truncate:V4HI
-	    (lshiftrt:V4SI
-	      (mult:V4SI (sign_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
-	                 (sign_extend:V4SI (match_operand:V4HI 2 "register_operand" "y")))
-	      (const_int 16))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmulsm%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmul")]
-)
-
-(define_insn "umulv4hi3_highpart"
-  [(set (match_operand:V4HI 0 "register_operand" "=y")
-	  (truncate:V4HI
-	    (lshiftrt:V4SI
-	      (mult:V4SI (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
-	                 (zero_extend:V4SI (match_operand:V4HI 2 "register_operand" "y")))
-	      (const_int 16))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmulum%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmul")]
-)
-
-(define_insn "iwmmxt_wmacs"
-  [(set (match_operand:DI               0 "register_operand" "=y")
-	(unspec:DI [(match_operand:DI   1 "register_operand" "0")
-	            (match_operand:V4HI 2 "register_operand" "y")
-	            (match_operand:V4HI 3 "register_operand" "y")] UNSPEC_WMACS))]
-  "TARGET_REALLY_IWMMXT"
-  "wmacs%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmac")]
-)
-
-(define_insn "iwmmxt_wmacsz"
-  [(set (match_operand:DI               0 "register_operand" "=y")
-	(unspec:DI [(match_operand:V4HI 1 "register_operand" "y")
-	            (match_operand:V4HI 2 "register_operand" "y")] UNSPEC_WMACSZ))]
-  "TARGET_REALLY_IWMMXT"
-  "wmacsz%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmac")]
-)
-
-(define_insn "iwmmxt_wmacu"
-  [(set (match_operand:DI               0 "register_operand" "=y")
-	(unspec:DI [(match_operand:DI   1 "register_operand" "0")
-	            (match_operand:V4HI 2 "register_operand" "y")
-	            (match_operand:V4HI 3 "register_operand" "y")] UNSPEC_WMACU))]
-  "TARGET_REALLY_IWMMXT"
-  "wmacu%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmac")]
-)
-
-(define_insn "iwmmxt_wmacuz"
-  [(set (match_operand:DI               0 "register_operand" "=y")
-	(unspec:DI [(match_operand:V4HI 1 "register_operand" "y")
-	            (match_operand:V4HI 2 "register_operand" "y")] UNSPEC_WMACUZ))]
-  "TARGET_REALLY_IWMMXT"
-  "wmacuz%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmac")]
-)
-
-;; Same as xordi3, but don't show input operands so that we don't think
-;; they are live.
-(define_insn "iwmmxt_clrdi"
-  [(set (match_operand:DI 0 "register_operand" "=y")
-        (unspec:DI [(const_int 0)] UNSPEC_CLRDI))]
-  "TARGET_REALLY_IWMMXT"
-  "wxor%?\\t%0, %0, %0"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wxor")]
-)
-
-;; Seems like cse likes to generate these, so we have to support them.
-
-(define_insn "iwmmxt_clrv8qi"
-  [(set (match_operand:V8QI 0 "s_register_operand" "=y")
-        (const_vector:V8QI [(const_int 0) (const_int 0)
-			    (const_int 0) (const_int 0)
-			    (const_int 0) (const_int 0)
-			    (const_int 0) (const_int 0)]))]
-  "TARGET_REALLY_IWMMXT"
-  "wxor%?\\t%0, %0, %0"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wxor")]
-)
-
-(define_insn "iwmmxt_clrv4hi"
-  [(set (match_operand:V4HI 0 "s_register_operand" "=y")
-        (const_vector:V4HI [(const_int 0) (const_int 0)
-			    (const_int 0) (const_int 0)]))]
-  "TARGET_REALLY_IWMMXT"
-  "wxor%?\\t%0, %0, %0"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wxor")]
-)
-
-(define_insn "iwmmxt_clrv2si"
-  [(set (match_operand:V2SI 0 "register_operand" "=y")
-        (const_vector:V2SI [(const_int 0) (const_int 0)]))]
-  "TARGET_REALLY_IWMMXT"
-  "wxor%?\\t%0, %0, %0"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wxor")]
-)
-
-;; Unsigned averages/sum of absolute differences
-
-(define_insn "iwmmxt_uavgrndv8qi3"
-  [(set (match_operand:V8QI                                    0 "register_operand" "=y")
-        (truncate:V8QI
-	  (lshiftrt:V8HI
-	    (plus:V8HI
-	      (plus:V8HI (zero_extend:V8HI (match_operand:V8QI 1 "register_operand" "y"))
-	                 (zero_extend:V8HI (match_operand:V8QI 2 "register_operand" "y")))
-	      (const_vector:V8HI [(const_int 1)
-	                          (const_int 1)
-	                          (const_int 1)
-	                          (const_int 1)
-	                          (const_int 1)
-	                          (const_int 1)
-	                          (const_int 1)
-	                          (const_int 1)]))
-	    (const_int 1))))]
-  "TARGET_REALLY_IWMMXT"
-  "wavg2br%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wavg2")]
-)
-
-(define_insn "iwmmxt_uavgrndv4hi3"
-  [(set (match_operand:V4HI                                    0 "register_operand" "=y")
-        (truncate:V4HI
-	  (lshiftrt:V4SI
-            (plus:V4SI
-	      (plus:V4SI (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
-	                 (zero_extend:V4SI (match_operand:V4HI 2 "register_operand" "y")))
-	      (const_vector:V4SI [(const_int 1)
-	                          (const_int 1)
-	                          (const_int 1)
-	                          (const_int 1)]))
-	    (const_int 1))))]
-  "TARGET_REALLY_IWMMXT"
-  "wavg2hr%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wavg2")]
-)
-
-(define_insn "iwmmxt_uavgv8qi3"
-  [(set (match_operand:V8QI                                  0 "register_operand" "=y")
-        (truncate:V8QI
-	  (lshiftrt:V8HI
-	    (plus:V8HI (zero_extend:V8HI (match_operand:V8QI 1 "register_operand" "y"))
-	               (zero_extend:V8HI (match_operand:V8QI 2 "register_operand" "y")))
-	    (const_int 1))))]
-  "TARGET_REALLY_IWMMXT"
-  "wavg2b%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wavg2")]
-)
-
-(define_insn "iwmmxt_uavgv4hi3"
-  [(set (match_operand:V4HI                                  0 "register_operand" "=y")
-        (truncate:V4HI
-	  (lshiftrt:V4SI
-	    (plus:V4SI (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
-	               (zero_extend:V4SI (match_operand:V4HI 2 "register_operand" "y")))
-	    (const_int 1))))]
-  "TARGET_REALLY_IWMMXT"
-  "wavg2h%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wavg2")]
-)
-
-;; Insert/extract/shuffle
-
-(define_insn "iwmmxt_tinsrb"
-  [(set (match_operand:V8QI                0 "register_operand" "=y")
-        (vec_merge:V8QI
-	  (vec_duplicate:V8QI
-	    (truncate:QI (match_operand:SI 2 "nonimmediate_operand" "r")))
-	  (match_operand:V8QI              1 "register_operand"     "0")
-	  (match_operand:SI                3 "immediate_operand"    "i")))]
-  "TARGET_REALLY_IWMMXT"
-  "*
-   {
-     return arm_output_iwmmxt_tinsr (operands);
-   }
-   "
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_tinsr")]
-)
-
-(define_insn "iwmmxt_tinsrh"
-  [(set (match_operand:V4HI                0 "register_operand"    "=y")
-        (vec_merge:V4HI
-          (vec_duplicate:V4HI
-            (truncate:HI (match_operand:SI 2 "nonimmediate_operand" "r")))
-	  (match_operand:V4HI              1 "register_operand"     "0")
-	  (match_operand:SI                3 "immediate_operand"    "i")))]
-  "TARGET_REALLY_IWMMXT"
-  "*
-   {
-     return arm_output_iwmmxt_tinsr (operands);
-   }
-   "
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_tinsr")]
-)
-
-(define_insn "iwmmxt_tinsrw"
-  [(set (match_operand:V2SI   0 "register_operand"    "=y")
-        (vec_merge:V2SI
-          (vec_duplicate:V2SI
-            (match_operand:SI 2 "nonimmediate_operand" "r"))
-          (match_operand:V2SI 1 "register_operand"     "0")
-          (match_operand:SI   3 "immediate_operand"    "i")))]
-  "TARGET_REALLY_IWMMXT"
-  "*
-   {
-     return arm_output_iwmmxt_tinsr (operands);
-   }
-   "
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_tinsr")]
-)
-
-(define_insn "iwmmxt_textrmub"
-  [(set (match_operand:SI                                   0 "register_operand" "=r")
-        (zero_extend:SI (vec_select:QI (match_operand:V8QI  1 "register_operand" "y")
-		                       (parallel
-				         [(match_operand:SI 2 "immediate_operand" "i")]))))]
-  "TARGET_REALLY_IWMMXT"
-  "textrmub%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_textrm")]
-)
-
-(define_insn "iwmmxt_textrmsb"
-  [(set (match_operand:SI                                   0 "register_operand" "=r")
-        (sign_extend:SI (vec_select:QI (match_operand:V8QI  1 "register_operand" "y")
-				       (parallel
-				         [(match_operand:SI 2 "immediate_operand" "i")]))))]
-  "TARGET_REALLY_IWMMXT"
-  "textrmsb%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_textrm")]
-)
-
-(define_insn "iwmmxt_textrmuh"
-  [(set (match_operand:SI                                   0 "register_operand" "=r")
-        (zero_extend:SI (vec_select:HI (match_operand:V4HI  1 "register_operand" "y")
-				       (parallel
-				         [(match_operand:SI 2 "immediate_operand" "i")]))))]
-  "TARGET_REALLY_IWMMXT"
-  "textrmuh%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_textrm")]
-)
-
-(define_insn "iwmmxt_textrmsh"
-  [(set (match_operand:SI                                   0 "register_operand" "=r")
-        (sign_extend:SI (vec_select:HI (match_operand:V4HI  1 "register_operand" "y")
-				       (parallel
-				         [(match_operand:SI 2 "immediate_operand" "i")]))))]
-  "TARGET_REALLY_IWMMXT"
-  "textrmsh%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_textrm")]
-)
-
-;; There are signed/unsigned variants of this instruction, but they are
-;; pointless.
-(define_insn "iwmmxt_textrmw"
-  [(set (match_operand:SI                           0 "register_operand" "=r")
-        (vec_select:SI (match_operand:V2SI          1 "register_operand" "y")
-		       (parallel [(match_operand:SI 2 "immediate_operand" "i")])))]
-  "TARGET_REALLY_IWMMXT"
-  "textrmsw%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_textrm")]
-)
-
-(define_insn "iwmmxt_wshufh"
-  [(set (match_operand:V4HI               0 "register_operand" "=y")
-        (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "y")
-		      (match_operand:SI   2 "immediate_operand" "i")] UNSPEC_WSHUFH))]
-  "TARGET_REALLY_IWMMXT"
-  "wshufh%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wshufh")]
-)
-
-;; Mask-generating comparisons
-;;
-;; Note - you cannot use patterns like these here:
-;;
-;;   (set (match:<vector>) (<comparator>:<vector> (match:<vector>) (match:<vector>)))
-;;
-;; Because GCC will assume that the truth value (1 or 0) is installed
-;; into the entire destination vector, (with the '1' going into the least
-;; significant element of the vector).  This is not how these instructions
-;; behave.
-
-(define_insn "eqv8qi3"
-  [(set (match_operand:V8QI                        0 "register_operand" "=y")
-	(unspec_volatile:V8QI [(match_operand:V8QI 1 "register_operand"  "y")
-	                       (match_operand:V8QI 2 "register_operand"  "y")]
-	                      VUNSPEC_WCMP_EQ))]
-  "TARGET_REALLY_IWMMXT"
-  "wcmpeqb%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wcmpeq")]
-)
-
-(define_insn "eqv4hi3"
-  [(set (match_operand:V4HI                        0 "register_operand" "=y")
-	(unspec_volatile:V4HI [(match_operand:V4HI 1 "register_operand"  "y")
-		               (match_operand:V4HI 2 "register_operand"  "y")]
-	                       VUNSPEC_WCMP_EQ))]
-  "TARGET_REALLY_IWMMXT"
-  "wcmpeqh%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wcmpeq")]
-)
-
-(define_insn "eqv2si3"
-  [(set (match_operand:V2SI    0 "register_operand" "=y")
-	(unspec_volatile:V2SI
-	  [(match_operand:V2SI 1 "register_operand"  "y")
-	   (match_operand:V2SI 2 "register_operand"  "y")]
-           VUNSPEC_WCMP_EQ))]
-  "TARGET_REALLY_IWMMXT"
-  "wcmpeqw%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wcmpeq")]
-)
-
-(define_insn "gtuv8qi3"
-  [(set (match_operand:V8QI                        0 "register_operand" "=y")
-	(unspec_volatile:V8QI [(match_operand:V8QI 1 "register_operand"  "y")
-	                       (match_operand:V8QI 2 "register_operand"  "y")]
-	                       VUNSPEC_WCMP_GTU))]
-  "TARGET_REALLY_IWMMXT"
-  "wcmpgtub%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wcmpgt")]
-)
-
-(define_insn "gtuv4hi3"
-  [(set (match_operand:V4HI                        0 "register_operand" "=y")
-        (unspec_volatile:V4HI [(match_operand:V4HI 1 "register_operand"  "y")
-                               (match_operand:V4HI 2 "register_operand"  "y")]
-                               VUNSPEC_WCMP_GTU))]
-  "TARGET_REALLY_IWMMXT"
-  "wcmpgtuh%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wcmpgt")]
-)
-
-(define_insn "gtuv2si3"
-  [(set (match_operand:V2SI                        0 "register_operand" "=y")
-	(unspec_volatile:V2SI [(match_operand:V2SI 1 "register_operand"  "y")
-	                       (match_operand:V2SI 2 "register_operand"  "y")]
-	                       VUNSPEC_WCMP_GTU))]
-  "TARGET_REALLY_IWMMXT"
-  "wcmpgtuw%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wcmpgt")]
-)
-
-(define_insn "gtv8qi3"
-  [(set (match_operand:V8QI                        0 "register_operand" "=y")
-	(unspec_volatile:V8QI [(match_operand:V8QI 1 "register_operand"  "y")
-	                       (match_operand:V8QI 2 "register_operand"  "y")]
-	                       VUNSPEC_WCMP_GT))]
-  "TARGET_REALLY_IWMMXT"
-  "wcmpgtsb%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wcmpgt")]
-)
-
-(define_insn "gtv4hi3"
-  [(set (match_operand:V4HI                        0 "register_operand" "=y")
-	(unspec_volatile:V4HI [(match_operand:V4HI 1 "register_operand"  "y")
-	                       (match_operand:V4HI 2 "register_operand"  "y")]
-	                       VUNSPEC_WCMP_GT))]
-  "TARGET_REALLY_IWMMXT"
-  "wcmpgtsh%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wcmpgt")]
-)
-
-(define_insn "gtv2si3"
-  [(set (match_operand:V2SI                        0 "register_operand" "=y")
-	(unspec_volatile:V2SI [(match_operand:V2SI 1 "register_operand"  "y")
-	                       (match_operand:V2SI 2 "register_operand"  "y")]
-	                       VUNSPEC_WCMP_GT))]
-  "TARGET_REALLY_IWMMXT"
-  "wcmpgtsw%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wcmpgt")]
-)
-
-;; Max/min insns
-
-(define_insn "*smax<mode>3_iwmmxt"
-  [(set (match_operand:VMMX            0 "register_operand" "=y")
-        (smax:VMMX (match_operand:VMMX 1 "register_operand" "y")
-		   (match_operand:VMMX 2 "register_operand" "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "wmaxs<MMX_char>%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmax")]
-)
-
-(define_insn "*umax<mode>3_iwmmxt"
-  [(set (match_operand:VMMX            0 "register_operand" "=y")
-        (umax:VMMX (match_operand:VMMX 1 "register_operand" "y")
-		   (match_operand:VMMX 2 "register_operand" "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "wmaxu<MMX_char>%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmax")]
-)
-
-(define_insn "*smin<mode>3_iwmmxt"
-  [(set (match_operand:VMMX            0 "register_operand" "=y")
-        (smin:VMMX (match_operand:VMMX 1 "register_operand" "y")
-		   (match_operand:VMMX 2 "register_operand" "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "wmins<MMX_char>%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmin")]
-)
-
-(define_insn "*umin<mode>3_iwmmxt"
-  [(set (match_operand:VMMX            0 "register_operand" "=y")
-        (umin:VMMX (match_operand:VMMX 1 "register_operand" "y")
-		   (match_operand:VMMX 2 "register_operand" "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "wminu<MMX_char>%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmin")]
-)
-
-;; Pack/unpack insns.
-
-(define_insn "iwmmxt_wpackhss"
-  [(set (match_operand:V8QI                     0 "register_operand" "=y")
-	(vec_concat:V8QI
-	  (ss_truncate:V4QI (match_operand:V4HI 1 "register_operand" "y"))
-	  (ss_truncate:V4QI (match_operand:V4HI 2 "register_operand" "y"))))]
-  "TARGET_REALLY_IWMMXT"
-  "wpackhss%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wpack")]
-)
-
-(define_insn "iwmmxt_wpackwss"
-  [(set (match_operand:V4HI                     0 "register_operand" "=y")
-        (vec_concat:V4HI
-	  (ss_truncate:V2HI (match_operand:V2SI 1 "register_operand" "y"))
-	  (ss_truncate:V2HI (match_operand:V2SI 2 "register_operand" "y"))))]
-  "TARGET_REALLY_IWMMXT"
-  "wpackwss%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wpack")]
-)
-
-(define_insn "iwmmxt_wpackdss"
-  [(set (match_operand:V2SI                 0 "register_operand" "=y")
-	(vec_concat:V2SI
-	  (ss_truncate:SI (match_operand:DI 1 "register_operand" "y"))
-	  (ss_truncate:SI (match_operand:DI 2 "register_operand" "y"))))]
-  "TARGET_REALLY_IWMMXT"
-  "wpackdss%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wpack")]
-)
-
-(define_insn "iwmmxt_wpackhus"
-  [(set (match_operand:V8QI                     0 "register_operand" "=y")
-	(vec_concat:V8QI
-	  (us_truncate:V4QI (match_operand:V4HI 1 "register_operand" "y"))
-	  (us_truncate:V4QI (match_operand:V4HI 2 "register_operand" "y"))))]
-  "TARGET_REALLY_IWMMXT"
-  "wpackhus%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wpack")]
-)
-
-(define_insn "iwmmxt_wpackwus"
-  [(set (match_operand:V4HI                     0 "register_operand" "=y")
-	(vec_concat:V4HI
-	  (us_truncate:V2HI (match_operand:V2SI 1 "register_operand" "y"))
-	  (us_truncate:V2HI (match_operand:V2SI 2 "register_operand" "y"))))]
-  "TARGET_REALLY_IWMMXT"
-  "wpackwus%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wpack")]
-)
-
-(define_insn "iwmmxt_wpackdus"
-  [(set (match_operand:V2SI                 0 "register_operand" "=y")
-	(vec_concat:V2SI
-	  (us_truncate:SI (match_operand:DI 1 "register_operand" "y"))
-	  (us_truncate:SI (match_operand:DI 2 "register_operand" "y"))))]
-  "TARGET_REALLY_IWMMXT"
-  "wpackdus%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wpack")]
-)
-
-(define_insn "iwmmxt_wunpckihb"
-  [(set (match_operand:V8QI                                      0 "register_operand" "=y")
-	(vec_merge:V8QI
-	  (vec_select:V8QI (match_operand:V8QI 1 "register_operand" "y")
-		           (parallel [(const_int 4)
-			              (const_int 0)
-			              (const_int 5)
-			              (const_int 1)
-			              (const_int 6)
-			              (const_int 2)
-			              (const_int 7)
-			              (const_int 3)]))
-          (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "y")
-			   (parallel [(const_int 0)
-			              (const_int 4)
-			              (const_int 1)
-			              (const_int 5)
-			              (const_int 2)
-			              (const_int 6)
-			              (const_int 3)
-			              (const_int 7)]))
-          (const_int 85)))]
-  "TARGET_REALLY_IWMMXT"
-  "wunpckihb%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wunpckih")]
-)
-
-(define_insn "iwmmxt_wunpckihh"
-  [(set (match_operand:V4HI                                      0 "register_operand" "=y")
-	(vec_merge:V4HI
-	  (vec_select:V4HI (match_operand:V4HI 1 "register_operand" "y")
-		           (parallel [(const_int 2)
-			              (const_int 0)
-			              (const_int 3)
-			              (const_int 1)]))
-	  (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y")
-		           (parallel [(const_int 0)
-			              (const_int 2)
-			              (const_int 1)
-			              (const_int 3)]))
-          (const_int 5)))]
-  "TARGET_REALLY_IWMMXT"
-  "wunpckihh%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wunpckih")]
-)
-
-(define_insn "iwmmxt_wunpckihw"
-  [(set (match_operand:V2SI                    0 "register_operand" "=y")
-	(vec_merge:V2SI
-	  (vec_select:V2SI (match_operand:V2SI 1 "register_operand" "y")
-		           (parallel [(const_int 1)
-		                      (const_int 0)]))
-          (vec_select:V2SI (match_operand:V2SI 2 "register_operand" "y")
-		           (parallel [(const_int 0)
-			              (const_int 1)]))
-          (const_int 1)))]
-  "TARGET_REALLY_IWMMXT"
-  "wunpckihw%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wunpckih")]
-)
-
-(define_insn "iwmmxt_wunpckilb"
-  [(set (match_operand:V8QI                                      0 "register_operand" "=y")
-	(vec_merge:V8QI
-	  (vec_select:V8QI (match_operand:V8QI 1 "register_operand" "y")
-		           (parallel [(const_int 0)
-			              (const_int 4)
-			              (const_int 1)
-			              (const_int 5)
-		                      (const_int 2)
-				      (const_int 6)
-				      (const_int 3)
-				      (const_int 7)]))
-	  (vec_select:V8QI (match_operand:V8QI 2 "register_operand" "y")
-		           (parallel [(const_int 4)
-			              (const_int 0)
-			              (const_int 5)
-			              (const_int 1)
-			              (const_int 6)
-			              (const_int 2)
-			              (const_int 7)
-			              (const_int 3)]))
-	  (const_int 85)))]
-  "TARGET_REALLY_IWMMXT"
-  "wunpckilb%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wunpckil")]
-)
-
-(define_insn "iwmmxt_wunpckilh"
-  [(set (match_operand:V4HI                                      0 "register_operand" "=y")
-	(vec_merge:V4HI
-	  (vec_select:V4HI (match_operand:V4HI 1 "register_operand" "y")
-		           (parallel [(const_int 0)
-			              (const_int 2)
-			              (const_int 1)
-			              (const_int 3)]))
-	  (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y")
-			   (parallel [(const_int 2)
-			              (const_int 0)
-			              (const_int 3)
-			              (const_int 1)]))
-	  (const_int 5)))]
-  "TARGET_REALLY_IWMMXT"
-  "wunpckilh%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wunpckil")]
-)
-
-(define_insn "iwmmxt_wunpckilw"
-  [(set (match_operand:V2SI                    0 "register_operand" "=y")
-	(vec_merge:V2SI
-	  (vec_select:V2SI (match_operand:V2SI 1 "register_operand" "y")
-		           (parallel [(const_int 0)
-				      (const_int 1)]))
-	  (vec_select:V2SI (match_operand:V2SI 2 "register_operand" "y")
-		           (parallel [(const_int 1)
-			              (const_int 0)]))
-	  (const_int 1)))]
-  "TARGET_REALLY_IWMMXT"
-  "wunpckilw%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wunpckil")]
-)
-
-(define_insn "iwmmxt_wunpckehub"
-  [(set (match_operand:V4HI                     0 "register_operand" "=y")
-	(vec_select:V4HI
-	  (zero_extend:V8HI (match_operand:V8QI 1 "register_operand" "y"))
-	  (parallel [(const_int 4) (const_int 5)
-	             (const_int 6) (const_int 7)])))]
-  "TARGET_REALLY_IWMMXT"
-  "wunpckehub%?\\t%0, %1"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wunpckeh")]
-)
-
-(define_insn "iwmmxt_wunpckehuh"
-  [(set (match_operand:V2SI                     0 "register_operand" "=y")
-	(vec_select:V2SI
-	  (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
-	  (parallel [(const_int 2) (const_int 3)])))]
-  "TARGET_REALLY_IWMMXT"
-  "wunpckehuh%?\\t%0, %1"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wunpckeh")]
-)
-
-(define_insn "iwmmxt_wunpckehuw"
-  [(set (match_operand:DI                       0 "register_operand" "=y")
-	(vec_select:DI
-	  (zero_extend:V2DI (match_operand:V2SI 1 "register_operand" "y"))
-	  (parallel [(const_int 1)])))]
-  "TARGET_REALLY_IWMMXT"
-  "wunpckehuw%?\\t%0, %1"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wunpckeh")]
-)
-
-(define_insn "iwmmxt_wunpckehsb"
-  [(set (match_operand:V4HI                     0 "register_operand" "=y")
-        (vec_select:V4HI
-	  (sign_extend:V8HI (match_operand:V8QI 1 "register_operand" "y"))
-	  (parallel [(const_int 4) (const_int 5)
-	             (const_int 6) (const_int 7)])))]
-  "TARGET_REALLY_IWMMXT"
-  "wunpckehsb%?\\t%0, %1"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wunpckeh")]
-)
-
-(define_insn "iwmmxt_wunpckehsh"
-  [(set (match_operand:V2SI                     0 "register_operand" "=y")
-	(vec_select:V2SI
-	  (sign_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
-	  (parallel [(const_int 2) (const_int 3)])))]
-  "TARGET_REALLY_IWMMXT"
-  "wunpckehsh%?\\t%0, %1"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wunpckeh")]
-)
-
-(define_insn "iwmmxt_wunpckehsw"
-  [(set (match_operand:DI                       0 "register_operand" "=y")
-	(vec_select:DI
-	  (sign_extend:V2DI (match_operand:V2SI 1 "register_operand" "y"))
-	  (parallel [(const_int 1)])))]
-  "TARGET_REALLY_IWMMXT"
-  "wunpckehsw%?\\t%0, %1"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wunpckeh")]
-)
-
-(define_insn "iwmmxt_wunpckelub"
-  [(set (match_operand:V4HI                     0 "register_operand" "=y")
-	(vec_select:V4HI
-	  (zero_extend:V8HI (match_operand:V8QI 1 "register_operand" "y"))
-	  (parallel [(const_int 0) (const_int 1)
-		     (const_int 2) (const_int 3)])))]
-  "TARGET_REALLY_IWMMXT"
-  "wunpckelub%?\\t%0, %1"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wunpckel")]
-)
-
-(define_insn "iwmmxt_wunpckeluh"
-  [(set (match_operand:V2SI                     0 "register_operand" "=y")
-	(vec_select:V2SI
-	  (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
-	  (parallel [(const_int 0) (const_int 1)])))]
-  "TARGET_REALLY_IWMMXT"
-  "wunpckeluh%?\\t%0, %1"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wunpckel")]
-)
-
-(define_insn "iwmmxt_wunpckeluw"
-  [(set (match_operand:DI                       0 "register_operand" "=y")
-	(vec_select:DI
-	  (zero_extend:V2DI (match_operand:V2SI 1 "register_operand" "y"))
-	  (parallel [(const_int 0)])))]
-  "TARGET_REALLY_IWMMXT"
-  "wunpckeluw%?\\t%0, %1"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wunpckel")]
-)
-
-(define_insn "iwmmxt_wunpckelsb"
-  [(set (match_operand:V4HI                     0 "register_operand" "=y")
-	(vec_select:V4HI
-	  (sign_extend:V8HI (match_operand:V8QI 1 "register_operand" "y"))
-	  (parallel [(const_int 0) (const_int 1)
-		     (const_int 2) (const_int 3)])))]
-  "TARGET_REALLY_IWMMXT"
-  "wunpckelsb%?\\t%0, %1"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wunpckel")]
-)
-
-(define_insn "iwmmxt_wunpckelsh"
-  [(set (match_operand:V2SI                     0 "register_operand" "=y")
-	(vec_select:V2SI
-	  (sign_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
-	  (parallel [(const_int 0) (const_int 1)])))]
-  "TARGET_REALLY_IWMMXT"
-  "wunpckelsh%?\\t%0, %1"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wunpckel")]
-)
-
-(define_insn "iwmmxt_wunpckelsw"
-  [(set (match_operand:DI                       0 "register_operand" "=y")
-        (vec_select:DI
-	  (sign_extend:V2DI (match_operand:V2SI 1 "register_operand" "y"))
-	  (parallel [(const_int 0)])))]
-  "TARGET_REALLY_IWMMXT"
-  "wunpckelsw%?\\t%0, %1"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wunpckel")]
-)
-
-;; Shifts
-
-(define_insn "ror<mode>3"
-  [(set (match_operand:VSHFT                 0 "register_operand" "=y,y")
-        (rotatert:VSHFT (match_operand:VSHFT 1 "register_operand" "y,y")
-		        (match_operand:SI    2 "imm_or_reg_operand" "z,i")))]
-  "TARGET_REALLY_IWMMXT"
-  "*
-  switch  (which_alternative)
-    {
-    case 0:
-      return \"wror<MMX_char>g%?\\t%0, %1, %2\";
-    case 1:
-      return arm_output_iwmmxt_shift_immediate (\"wror<MMX_char>\", operands, true);
-    default:
-      gcc_unreachable ();
-    }
-  "
-  [(set_attr "predicable" "yes")
-   (set_attr "arch" "*, iwmmxt2")
-   (set_attr "type" "wmmx_wror, wmmx_wror")]
-)
-
-(define_insn "ashr<mode>3_iwmmxt"
-  [(set (match_operand:VSHFT                 0 "register_operand" "=y,y")
-        (ashiftrt:VSHFT (match_operand:VSHFT 1 "register_operand" "y,y")
-			(match_operand:SI    2 "imm_or_reg_operand" "z,i")))]
-  "TARGET_REALLY_IWMMXT"
-  "*
-  switch  (which_alternative)
-    {
-    case 0:
-      return \"wsra<MMX_char>g%?\\t%0, %1, %2\";
-    case 1:
-      return arm_output_iwmmxt_shift_immediate (\"wsra<MMX_char>\", operands, true);
-    default:
-      gcc_unreachable ();
-    }
-  "
-  [(set_attr "predicable" "yes")
-   (set_attr "arch" "*, iwmmxt2")
-   (set_attr "type" "wmmx_wsra, wmmx_wsra")]
-)
-
-(define_insn "lshr<mode>3_iwmmxt"
-  [(set (match_operand:VSHFT                 0 "register_operand" "=y,y")
-        (lshiftrt:VSHFT (match_operand:VSHFT 1 "register_operand" "y,y")
-			(match_operand:SI    2 "imm_or_reg_operand" "z,i")))]
-  "TARGET_REALLY_IWMMXT"
-  "*
-  switch  (which_alternative)
-    {
-    case 0:
-      return \"wsrl<MMX_char>g%?\\t%0, %1, %2\";
-    case 1:
-      return arm_output_iwmmxt_shift_immediate (\"wsrl<MMX_char>\", operands, false);
-    default:
-      gcc_unreachable ();
-    }
-  "
-  [(set_attr "predicable" "yes")
-   (set_attr "arch" "*, iwmmxt2")
-   (set_attr "type" "wmmx_wsrl, wmmx_wsrl")]
-)
-
-(define_insn "ashl<mode>3_iwmmxt"
-  [(set (match_operand:VSHFT               0 "register_operand" "=y,y")
-        (ashift:VSHFT (match_operand:VSHFT 1 "register_operand" "y,y")
-		      (match_operand:SI    2 "imm_or_reg_operand" "z,i")))]
-  "TARGET_REALLY_IWMMXT"
-  "*
-  switch  (which_alternative)
-    {
-    case 0:
-      return \"wsll<MMX_char>g%?\\t%0, %1, %2\";
-    case 1:
-      return arm_output_iwmmxt_shift_immediate (\"wsll<MMX_char>\", operands, false);
-    default:
-      gcc_unreachable ();
-    }
-  "
-  [(set_attr "predicable" "yes")
-   (set_attr "arch" "*, iwmmxt2")
-   (set_attr "type" "wmmx_wsll, wmmx_wsll")]
-)
-
-(define_insn "ror<mode>3_di"
-  [(set (match_operand:VSHFT                 0 "register_operand" "=y,y")
-        (rotatert:VSHFT (match_operand:VSHFT 1 "register_operand" "y,y")
-		        (match_operand:DI    2 "imm_or_reg_operand" "y,i")))]
-  "TARGET_REALLY_IWMMXT"
-  "*
-  switch (which_alternative)
-    {
-    case 0:
-      return \"wror<MMX_char>%?\\t%0, %1, %2\";
-    case 1:
-      return arm_output_iwmmxt_shift_immediate (\"wror<MMX_char>\", operands, true);
-    default:
-      gcc_unreachable ();
-    }
-  "
-  [(set_attr "predicable" "yes")
-   (set_attr "arch" "*, iwmmxt2")
-   (set_attr "type" "wmmx_wror, wmmx_wror")]
-)
-
-(define_insn "ashr<mode>3_di"
-  [(set (match_operand:VSHFT                 0 "register_operand" "=y,y")
-        (ashiftrt:VSHFT (match_operand:VSHFT 1 "register_operand" "y,y")
-		        (match_operand:DI    2 "imm_or_reg_operand" "y,i")))]
-  "TARGET_REALLY_IWMMXT"
-  "*
-  switch (which_alternative)
-    {
-    case 0:
-      return \"wsra<MMX_char>%?\\t%0, %1, %2\";
-    case 1:
-      return arm_output_iwmmxt_shift_immediate (\"wsra<MMX_char>\", operands, true);
-    default:
-      gcc_unreachable ();
-    }
-  "
-  [(set_attr "predicable" "yes")
-   (set_attr "arch" "*, iwmmxt2")
-   (set_attr "type" "wmmx_wsra, wmmx_wsra")]
-)
-
-(define_insn "lshr<mode>3_di"
-  [(set (match_operand:VSHFT                 0 "register_operand" "=y,y")
-        (lshiftrt:VSHFT (match_operand:VSHFT 1 "register_operand" "y,y")
-		        (match_operand:DI    2 "register_operand" "y,i")))]
-  "TARGET_REALLY_IWMMXT"
-  "*
-  switch (which_alternative)
-    {
-    case 0:
-      return \"wsrl<MMX_char>%?\\t%0, %1, %2\";
-    case 1:
-      return arm_output_iwmmxt_shift_immediate (\"wsrl<MMX_char>\", operands, false);
-    default:
-      gcc_unreachable ();
-    }
-  "
-  [(set_attr "predicable" "yes")
-   (set_attr "arch" "*, iwmmxt2")
-   (set_attr "type" "wmmx_wsrl, wmmx_wsrl")]
-)
-
-(define_insn "ashl<mode>3_di"
-  [(set (match_operand:VSHFT               0 "register_operand" "=y,y")
-        (ashift:VSHFT (match_operand:VSHFT 1 "register_operand" "y,y")
-		      (match_operand:DI    2 "imm_or_reg_operand" "y,i")))]
-  "TARGET_REALLY_IWMMXT"
-  "*
-  switch (which_alternative)
-    {
-    case 0:
-      return \"wsll<MMX_char>%?\\t%0, %1, %2\";
-    case 1:
-      return arm_output_iwmmxt_shift_immediate (\"wsll<MMX_char>\", operands, false);
-    default:
-      gcc_unreachable ();
-    }
-  "
-  [(set_attr "predicable" "yes")
-   (set_attr "arch" "*, iwmmxt2")
-   (set_attr "type" "wmmx_wsll, wmmx_wsll")]
-)
-
-(define_insn "iwmmxt_wmadds"
-  [(set (match_operand:V2SI                                        0 "register_operand" "=y")
-	(plus:V2SI
-	  (mult:V2SI
-	    (vec_select:V2SI (sign_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
-	                     (parallel [(const_int 1) (const_int 3)]))
-	    (vec_select:V2SI (sign_extend:V4SI (match_operand:V4HI 2 "register_operand" "y"))
-	                     (parallel [(const_int 1) (const_int 3)])))
-	  (mult:V2SI
-	    (vec_select:V2SI (sign_extend:V4SI (match_dup 1))
-	                     (parallel [(const_int 0) (const_int 2)]))
-	    (vec_select:V2SI (sign_extend:V4SI (match_dup 2))
-	                     (parallel [(const_int 0) (const_int 2)])))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmadds%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmadd")]
-)
-
-(define_insn "iwmmxt_wmaddu"
-  [(set (match_operand:V2SI               0 "register_operand" "=y")
-	(plus:V2SI
-	  (mult:V2SI
-	    (vec_select:V2SI (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
-	                     (parallel [(const_int 1) (const_int 3)]))
-	    (vec_select:V2SI (zero_extend:V4SI (match_operand:V4HI 2 "register_operand" "y"))
-	                     (parallel [(const_int 1) (const_int 3)])))
-	  (mult:V2SI
-	    (vec_select:V2SI (zero_extend:V4SI (match_dup 1))
-	                     (parallel [(const_int 0) (const_int 2)]))
-	    (vec_select:V2SI (zero_extend:V4SI (match_dup 2))
-	                     (parallel [(const_int 0) (const_int 2)])))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmaddu%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmadd")]
-)
-
-(define_insn "iwmmxt_tmia"
-  [(set (match_operand:DI                     0 "register_operand" "=y")
-	(plus:DI (match_operand:DI            1 "register_operand" "0")
-		 (mult:DI (sign_extend:DI
-			    (match_operand:SI 2 "register_operand" "r"))
-			  (sign_extend:DI
-			    (match_operand:SI 3 "register_operand" "r")))))]
-  "TARGET_REALLY_IWMMXT"
-  "tmia%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_tmia")]
-)
-
-(define_insn "iwmmxt_tmiaph"
-  [(set (match_operand:DI                                    0 "register_operand" "=y")
-	(plus:DI (match_operand:DI                           1 "register_operand" "0")
-		 (plus:DI
-		   (mult:DI (sign_extend:DI
-			      (truncate:HI (match_operand:SI 2 "register_operand" "r")))
-			    (sign_extend:DI
-			      (truncate:HI (match_operand:SI 3 "register_operand" "r"))))
-		   (mult:DI (sign_extend:DI
-			      (truncate:HI (ashiftrt:SI (match_dup 2) (const_int 16))))
-			    (sign_extend:DI
-			      (truncate:HI (ashiftrt:SI (match_dup 3) (const_int 16))))))))]
-  "TARGET_REALLY_IWMMXT"
-  "tmiaph%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_tmiaph")]
-)
-
-(define_insn "iwmmxt_tmiabb"
-  [(set (match_operand:DI                                  0 "register_operand" "=y")
-	(plus:DI (match_operand:DI                         1 "register_operand" "0")
-		 (mult:DI (sign_extend:DI
-			    (truncate:HI (match_operand:SI 2 "register_operand" "r")))
-			  (sign_extend:DI
-			    (truncate:HI (match_operand:SI 3 "register_operand" "r"))))))]
-  "TARGET_REALLY_IWMMXT"
-  "tmiabb%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_tmiaxy")]
-)
-
-(define_insn "iwmmxt_tmiatb"
-  [(set (match_operand:DI                         0 "register_operand" "=y")
-	(plus:DI (match_operand:DI                1 "register_operand" "0")
-		 (mult:DI (sign_extend:DI
-			    (truncate:HI
-			      (ashiftrt:SI
-				(match_operand:SI 2 "register_operand" "r")
-				(const_int 16))))
-			  (sign_extend:DI
-			    (truncate:HI
-			      (match_operand:SI   3 "register_operand" "r"))))))]
-  "TARGET_REALLY_IWMMXT"
-  "tmiatb%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_tmiaxy")]
-)
-
-(define_insn "iwmmxt_tmiabt"
-  [(set (match_operand:DI                         0 "register_operand" "=y")
-	(plus:DI (match_operand:DI                1 "register_operand" "0")
-		 (mult:DI (sign_extend:DI
-			    (truncate:HI
-			      (match_operand:SI   2 "register_operand" "r")))
-			  (sign_extend:DI
-			    (truncate:HI
-			      (ashiftrt:SI
-				(match_operand:SI 3 "register_operand" "r")
-				(const_int 16)))))))]
-  "TARGET_REALLY_IWMMXT"
-  "tmiabt%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_tmiaxy")]
-)
-
-(define_insn "iwmmxt_tmiatt"
-  [(set (match_operand:DI          0 "register_operand" "=y")
-	(plus:DI (match_operand:DI 1 "register_operand" "0")
-		 (mult:DI (sign_extend:DI
-			    (truncate:HI
-			      (ashiftrt:SI
-				(match_operand:SI 2 "register_operand" "r")
-				(const_int 16))))
-			  (sign_extend:DI
-			    (truncate:HI
-			      (ashiftrt:SI
-				(match_operand:SI 3 "register_operand" "r")
-				(const_int 16)))))))]
-  "TARGET_REALLY_IWMMXT"
-  "tmiatt%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_tmiaxy")]
-)
-
-(define_insn "iwmmxt_tmovmskb"
-  [(set (match_operand:SI               0 "register_operand" "=r")
-	(unspec:SI [(match_operand:V8QI 1 "register_operand" "y")] UNSPEC_TMOVMSK))]
-  "TARGET_REALLY_IWMMXT"
-  "tmovmskb%?\\t%0, %1"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_tmovmsk")]
-)
-
-(define_insn "iwmmxt_tmovmskh"
-  [(set (match_operand:SI               0 "register_operand" "=r")
-	(unspec:SI [(match_operand:V4HI 1 "register_operand" "y")] UNSPEC_TMOVMSK))]
-  "TARGET_REALLY_IWMMXT"
-  "tmovmskh%?\\t%0, %1"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_tmovmsk")]
-)
-
-(define_insn "iwmmxt_tmovmskw"
-  [(set (match_operand:SI               0 "register_operand" "=r")
-	(unspec:SI [(match_operand:V2SI 1 "register_operand" "y")] UNSPEC_TMOVMSK))]
-  "TARGET_REALLY_IWMMXT"
-  "tmovmskw%?\\t%0, %1"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_tmovmsk")]
-)
-
-(define_insn "iwmmxt_waccb"
-  [(set (match_operand:DI               0 "register_operand" "=y")
-	(unspec:DI [(match_operand:V8QI 1 "register_operand" "y")] UNSPEC_WACC))]
-  "TARGET_REALLY_IWMMXT"
-  "waccb%?\\t%0, %1"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wacc")]
-)
-
-(define_insn "iwmmxt_wacch"
-  [(set (match_operand:DI               0 "register_operand" "=y")
-	(unspec:DI [(match_operand:V4HI 1 "register_operand" "y")] UNSPEC_WACC))]
-  "TARGET_REALLY_IWMMXT"
-  "wacch%?\\t%0, %1"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wacc")]
-)
-
-(define_insn "iwmmxt_waccw"
-  [(set (match_operand:DI               0 "register_operand" "=y")
-	(unspec:DI [(match_operand:V2SI 1 "register_operand" "y")] UNSPEC_WACC))]
-  "TARGET_REALLY_IWMMXT"
-  "waccw%?\\t%0, %1"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wacc")]
-)
-
-;; use unspec here to prevent 8 * imm to be optimized by cse
-(define_insn "iwmmxt_waligni"
-  [(set (match_operand:V8QI                                0 "register_operand" "=y")
-	(unspec:V8QI [(subreg:V8QI
-		        (ashiftrt:TI
-		          (subreg:TI (vec_concat:V16QI
-				       (match_operand:V8QI 1 "register_operand" "y")
-				       (match_operand:V8QI 2 "register_operand" "y")) 0)
-		          (mult:SI
-		            (match_operand:SI              3 "immediate_operand" "i")
-		            (const_int 8))) 0)] UNSPEC_WALIGNI))]
-  "TARGET_REALLY_IWMMXT"
-  "waligni%?\\t%0, %1, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_waligni")]
-)
-
-(define_insn "iwmmxt_walignr"
-  [(set (match_operand:V8QI                           0 "register_operand" "=y")
-	(subreg:V8QI (ashiftrt:TI
-		       (subreg:TI (vec_concat:V16QI
-				    (match_operand:V8QI 1 "register_operand" "y")
-				    (match_operand:V8QI 2 "register_operand" "y")) 0)
-		       (mult:SI
-		         (zero_extract:SI (match_operand:SI 3 "register_operand" "z") (const_int 3) (const_int 0))
-		         (const_int 8))) 0))]
-  "TARGET_REALLY_IWMMXT"
-  "walignr%U3%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_walignr")]
-)
-
-(define_insn "iwmmxt_walignr0"
-  [(set (match_operand:V8QI                           0 "register_operand" "=y")
-	(subreg:V8QI (ashiftrt:TI
-		       (subreg:TI (vec_concat:V16QI
-				    (match_operand:V8QI 1 "register_operand" "y")
-				    (match_operand:V8QI 2 "register_operand" "y")) 0)
-		       (mult:SI
-		         (zero_extract:SI (reg:SI WCGR0) (const_int 3) (const_int 0))
-		         (const_int 8))) 0))]
-  "TARGET_REALLY_IWMMXT"
-  "walignr0%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_walignr")]
-)
-
-(define_insn "iwmmxt_walignr1"
-  [(set (match_operand:V8QI                           0 "register_operand" "=y")
-	(subreg:V8QI (ashiftrt:TI
-		       (subreg:TI (vec_concat:V16QI
-				    (match_operand:V8QI 1 "register_operand" "y")
-				    (match_operand:V8QI 2 "register_operand" "y")) 0)
-		       (mult:SI
-		         (zero_extract:SI (reg:SI WCGR1) (const_int 3) (const_int 0))
-		         (const_int 8))) 0))]
-  "TARGET_REALLY_IWMMXT"
-  "walignr1%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_walignr")]
-)
-
-(define_insn "iwmmxt_walignr2"
-  [(set (match_operand:V8QI                           0 "register_operand" "=y")
-	(subreg:V8QI (ashiftrt:TI
-		       (subreg:TI (vec_concat:V16QI
-				    (match_operand:V8QI 1 "register_operand" "y")
-				    (match_operand:V8QI 2 "register_operand" "y")) 0)
-		       (mult:SI
-		         (zero_extract:SI (reg:SI WCGR2) (const_int 3) (const_int 0))
-		         (const_int 8))) 0))]
-  "TARGET_REALLY_IWMMXT"
-  "walignr2%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_walignr")]
-)
-
-(define_insn "iwmmxt_walignr3"
-  [(set (match_operand:V8QI                           0 "register_operand" "=y")
-	(subreg:V8QI (ashiftrt:TI
-		       (subreg:TI (vec_concat:V16QI
-				    (match_operand:V8QI 1 "register_operand" "y")
-				    (match_operand:V8QI 2 "register_operand" "y")) 0)
-		       (mult:SI
-		         (zero_extract:SI (reg:SI WCGR3) (const_int 3) (const_int 0))
-		         (const_int 8))) 0))]
-  "TARGET_REALLY_IWMMXT"
-  "walignr3%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_walignr")]
-)
-
-(define_insn "iwmmxt_wsadb"
-  [(set (match_operand:V2SI               0 "register_operand" "=y")
-        (unspec:V2SI [
-		      (match_operand:V2SI 1 "register_operand" "0")
-		      (match_operand:V8QI 2 "register_operand" "y")
-		      (match_operand:V8QI 3 "register_operand" "y")] UNSPEC_WSAD))]
-  "TARGET_REALLY_IWMMXT"
-  "wsadb%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wsad")]
-)
-
-(define_insn "iwmmxt_wsadh"
-  [(set (match_operand:V2SI               0 "register_operand" "=y")
-        (unspec:V2SI [
-		      (match_operand:V2SI 1 "register_operand" "0")
-		      (match_operand:V4HI 2 "register_operand" "y")
-		      (match_operand:V4HI 3 "register_operand" "y")] UNSPEC_WSAD))]
-  "TARGET_REALLY_IWMMXT"
-  "wsadh%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wsad")]
-)
-
-(define_insn "iwmmxt_wsadbz"
-  [(set (match_operand:V2SI               0 "register_operand" "=y")
-        (unspec:V2SI [(match_operand:V8QI 1 "register_operand" "y")
-		      (match_operand:V8QI 2 "register_operand" "y")] UNSPEC_WSADZ))]
-  "TARGET_REALLY_IWMMXT"
-  "wsadbz%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wsad")]
-)
-
-(define_insn "iwmmxt_wsadhz"
-  [(set (match_operand:V2SI               0 "register_operand" "=y")
-        (unspec:V2SI [(match_operand:V4HI 1 "register_operand" "y")
-		      (match_operand:V4HI 2 "register_operand" "y")] UNSPEC_WSADZ))]
-  "TARGET_REALLY_IWMMXT"
-  "wsadhz%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wsad")]
-)
-
-(include "iwmmxt2.md")
diff --git a/gcc/config/arm/iwmmxt2.md b/gcc/config/arm/iwmmxt2.md
deleted file mode 100644
index 2dd14a3..0000000
--- a/gcc/config/arm/iwmmxt2.md
+++ /dev/null
@@ -1,903 +0,0 @@
-;; Patterns for the Intel Wireless MMX technology architecture.
-;; Copyright (C) 2011-2024 Free Software Foundation, Inc.
-;; Written by Marvell, Inc.
-;;
-;; This file is part of GCC.
-;;
-;; GCC is free software; you can redistribute it and/or modify it
-;; under the terms of the GNU General Public License as published
-;; by the Free Software Foundation; either version 3, or (at your
-;; option) any later version.
-
-;; GCC is distributed in the hope that it will be useful, but WITHOUT
-;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-;; License for more details.
-
-;; You should have received a copy of the GNU General Public License
-;; along with GCC; see the file COPYING3.  If not see
-;; <http://www.gnu.org/licenses/>.
-
-(define_insn "iwmmxt_wabs<mode>3"
-  [(set (match_operand:VMMX               0 "register_operand" "=y")
-        (unspec:VMMX [(match_operand:VMMX 1 "register_operand"  "y")] UNSPEC_WABS))]
-  "TARGET_REALLY_IWMMXT"
-  "wabs<MMX_char>%?\\t%0, %1"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wabs")]
-)
-
-(define_insn "iwmmxt_wabsdiffb"
-  [(set (match_operand:V8QI                          0 "register_operand" "=y")
-	(truncate:V8QI
-	  (abs:V8HI
-	    (minus:V8HI
-	      (zero_extend:V8HI (match_operand:V8QI  1 "register_operand"  "y"))
-	      (zero_extend:V8HI (match_operand:V8QI  2 "register_operand"  "y"))))))]
- "TARGET_REALLY_IWMMXT"
- "wabsdiffb%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
-  (set_attr "type" "wmmx_wabsdiff")]
-)
-
-(define_insn "iwmmxt_wabsdiffh"
-  [(set (match_operand:V4HI                          0 "register_operand" "=y")
-        (truncate: V4HI
-          (abs:V4SI
-            (minus:V4SI
-              (zero_extend:V4SI (match_operand:V4HI  1 "register_operand"  "y"))
-	      (zero_extend:V4SI (match_operand:V4HI  2 "register_operand"  "y"))))))]
-  "TARGET_REALLY_IWMMXT"
-  "wabsdiffh%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wabsdiff")]
-)
-
-(define_insn "iwmmxt_wabsdiffw"
-  [(set (match_operand:V2SI                          0 "register_operand" "=y")
-        (truncate: V2SI
-	  (abs:V2DI
-	    (minus:V2DI
-	      (zero_extend:V2DI (match_operand:V2SI  1 "register_operand"  "y"))
-	      (zero_extend:V2DI (match_operand:V2SI  2 "register_operand"  "y"))))))]
- "TARGET_REALLY_IWMMXT"
- "wabsdiffw%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
-  (set_attr "type" "wmmx_wabsdiff")]
-)
-
-(define_insn "iwmmxt_waddsubhx"
-  [(set (match_operand:V4HI                                        0 "register_operand" "=y")
-	(vec_merge:V4HI
-	  (ss_minus:V4HI
-	    (match_operand:V4HI                                    1 "register_operand" "y")
-	    (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y")
-	                     (parallel [(const_int 1) (const_int 0) (const_int 3) (const_int 2)])))
-	  (ss_plus:V4HI
-	    (match_dup 1)
-	    (vec_select:V4HI (match_dup 2)
-	                     (parallel [(const_int 1) (const_int 0) (const_int 3) (const_int 2)])))
-	  (const_int 10)))]
-  "TARGET_REALLY_IWMMXT"
-  "waddsubhx%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_waddsubhx")]
-)
-
-(define_insn "iwmmxt_wsubaddhx"
-  [(set (match_operand:V4HI                                        0 "register_operand" "=y")
-	(vec_merge:V4HI
-	  (ss_plus:V4HI
-	    (match_operand:V4HI                                    1 "register_operand" "y")
-	    (vec_select:V4HI (match_operand:V4HI 2 "register_operand" "y")
-	                     (parallel [(const_int 1) (const_int 0) (const_int 3) (const_int 2)])))
-	  (ss_minus:V4HI
-	    (match_dup 1)
-	    (vec_select:V4HI (match_dup 2)
-	                     (parallel [(const_int 1) (const_int 0) (const_int 3) (const_int 2)])))
-	  (const_int 10)))]
-  "TARGET_REALLY_IWMMXT"
-  "wsubaddhx%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wsubaddhx")]
-)
-
-(define_insn "addc<mode>3"
-  [(set (match_operand:VMMX2      0 "register_operand" "=y")
-	(unspec:VMMX2
-          [(plus:VMMX2
-             (match_operand:VMMX2 1 "register_operand"  "y")
-	     (match_operand:VMMX2 2 "register_operand"  "y"))] UNSPEC_WADDC))]
-  "TARGET_REALLY_IWMMXT"
-  "wadd<MMX_char>c%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wadd")]
-)
-
-(define_insn "iwmmxt_avg4"
-[(set (match_operand:V8QI                                 0 "register_operand" "=y")
-      (truncate:V8QI
-        (vec_select:V8HI
-	  (vec_merge:V8HI
-	    (lshiftrt:V8HI
-	      (plus:V8HI
-	        (plus:V8HI
-		  (plus:V8HI
-	            (plus:V8HI
-		      (zero_extend:V8HI (match_operand:V8QI 1 "register_operand" "y"))
-		      (zero_extend:V8HI (match_operand:V8QI 2 "register_operand" "y")))
-		    (vec_select:V8HI (zero_extend:V8HI (match_dup 1))
-		                     (parallel [(const_int 7) (const_int 0) (const_int 1) (const_int 2)
-				                (const_int 3) (const_int 4) (const_int 5) (const_int 6)])))
-		  (vec_select:V8HI (zero_extend:V8HI (match_dup 2))
-		                   (parallel [(const_int 7) (const_int 0) (const_int 1) (const_int 2)
-				              (const_int 3) (const_int 4) (const_int 5) (const_int 6)])))
-	        (const_vector:V8HI [(const_int 1) (const_int 1) (const_int 1) (const_int 1)
-	                            (const_int 1) (const_int 1) (const_int 1) (const_int 1)]))
-	      (const_int 2))
-	    (const_vector:V8HI [(const_int 0) (const_int 0) (const_int 0) (const_int 0)
-	                        (const_int 0) (const_int 0) (const_int 0) (const_int 0)])
-	    (const_int 254))
-	  (parallel [(const_int 1) (const_int 2) (const_int 3) (const_int 4)
-	             (const_int 5) (const_int 6) (const_int 7) (const_int 0)]))))]
-  "TARGET_REALLY_IWMMXT"
-  "wavg4%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wavg4")]
-)
-
-(define_insn "iwmmxt_avg4r"
-  [(set (match_operand:V8QI                                   0 "register_operand" "=y")
-	(truncate:V8QI
-	  (vec_select:V8HI
-	    (vec_merge:V8HI
-	      (lshiftrt:V8HI
-	        (plus:V8HI
-		  (plus:V8HI
-		    (plus:V8HI
-		      (plus:V8HI
-		        (zero_extend:V8HI (match_operand:V8QI 1 "register_operand" "y"))
-		        (zero_extend:V8HI (match_operand:V8QI 2 "register_operand" "y")))
-		      (vec_select:V8HI (zero_extend:V8HI (match_dup 1))
-		                       (parallel [(const_int 7) (const_int 0) (const_int 1) (const_int 2)
-				                  (const_int 3) (const_int 4) (const_int 5) (const_int 6)])))
-		    (vec_select:V8HI (zero_extend:V8HI (match_dup 2))
-		                     (parallel [(const_int 7) (const_int 0) (const_int 1) (const_int 2)
-				                (const_int 3) (const_int 4) (const_int 5) (const_int 6)])))
-		  (const_vector:V8HI [(const_int 2) (const_int 2) (const_int 2) (const_int 2)
-		                      (const_int 2) (const_int 2) (const_int 2) (const_int 2)]))
-	        (const_int 2))
-	      (const_vector:V8HI [(const_int 0) (const_int 0) (const_int 0) (const_int 0)
-	                          (const_int 0) (const_int 0) (const_int 0) (const_int 0)])
-	      (const_int 254))
-	    (parallel [(const_int 1) (const_int 2) (const_int 3) (const_int 4)
-	               (const_int 5) (const_int 6) (const_int 7) (const_int 0)]))))]
-  "TARGET_REALLY_IWMMXT"
-  "wavg4r%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wavg4")]
-)
-
-(define_insn "iwmmxt_wmaddsx"
-  [(set (match_operand:V2SI                                        0 "register_operand" "=y")
-	(plus:V2SI
-	  (mult:V2SI
-	    (vec_select:V2SI (sign_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
-	                     (parallel [(const_int 1) (const_int 3)]))
-	    (vec_select:V2SI (sign_extend:V4SI (match_operand:V4HI 2 "register_operand" "y"))
-	                     (parallel [(const_int 0) (const_int 2)])))
-	  (mult:V2SI
-	    (vec_select:V2SI (sign_extend:V4SI (match_dup 1))
-	                     (parallel [(const_int 0) (const_int 2)]))
-	    (vec_select:V2SI (sign_extend:V4SI (match_dup 2))
-	                     (parallel [(const_int 1) (const_int 3)])))))]
- "TARGET_REALLY_IWMMXT"
-  "wmaddsx%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-	(set_attr "type" "wmmx_wmadd")]
-)
-
-(define_insn "iwmmxt_wmaddux"
-  [(set (match_operand:V2SI                                        0 "register_operand" "=y")
-	(plus:V2SI
-	  (mult:V2SI
-	    (vec_select:V2SI (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
-	                     (parallel [(const_int 1) (const_int 3)]))
-	    (vec_select:V2SI (zero_extend:V4SI (match_operand:V4HI 2 "register_operand" "y"))
-	                     (parallel [(const_int 0) (const_int 2)])))
-	  (mult:V2SI
-	    (vec_select:V2SI (zero_extend:V4SI (match_dup 1))
-	                     (parallel [(const_int 0) (const_int 2)]))
-	    (vec_select:V2SI (zero_extend:V4SI (match_dup 2))
-	                     (parallel [(const_int 1) (const_int 3)])))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmaddux%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmadd")]
-)
-
-(define_insn "iwmmxt_wmaddsn"
- [(set (match_operand:V2SI                                     0 "register_operand" "=y")
-    (minus:V2SI
-      (mult:V2SI
-        (vec_select:V2SI (sign_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
-	                 (parallel [(const_int 0) (const_int 2)]))
-        (vec_select:V2SI (sign_extend:V4SI (match_operand:V4HI 2 "register_operand" "y"))
-	                 (parallel [(const_int 0) (const_int 2)])))
-      (mult:V2SI
-        (vec_select:V2SI (sign_extend:V4SI (match_dup 1))
-	                 (parallel [(const_int 1) (const_int 3)]))
-        (vec_select:V2SI (sign_extend:V4SI (match_dup 2))
-	                 (parallel [(const_int 1) (const_int 3)])))))]
- "TARGET_REALLY_IWMMXT"
- "wmaddsn%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")
-  (set_attr "type" "wmmx_wmadd")]
-)
-
-(define_insn "iwmmxt_wmaddun"
-  [(set (match_operand:V2SI                                        0 "register_operand" "=y")
-	(minus:V2SI
-	  (mult:V2SI
-	    (vec_select:V2SI (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
-	                     (parallel [(const_int 0) (const_int 2)]))
-	    (vec_select:V2SI (zero_extend:V4SI (match_operand:V4HI 2 "register_operand" "y"))
-	                     (parallel [(const_int 0) (const_int 2)])))
-	  (mult:V2SI
-	    (vec_select:V2SI (zero_extend:V4SI (match_dup 1))
-	                     (parallel [(const_int 1) (const_int 3)]))
-	    (vec_select:V2SI (zero_extend:V4SI (match_dup 2))
-	                     (parallel [(const_int 1) (const_int 3)])))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmaddun%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmadd")]
-)
-
-(define_insn "iwmmxt_wmulwsm"
-  [(set (match_operand:V2SI                         0 "register_operand" "=y")
-	(truncate:V2SI
-	  (ashiftrt:V2DI
-	    (mult:V2DI
-	      (sign_extend:V2DI (match_operand:V2SI 1 "register_operand" "y"))
-	      (sign_extend:V2DI (match_operand:V2SI 2 "register_operand" "y")))
-	    (const_int 32))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmulwsm%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmulw")]
-)
-
-(define_insn "iwmmxt_wmulwum"
-  [(set (match_operand:V2SI                         0 "register_operand" "=y")
-	(truncate:V2SI
-          (lshiftrt:V2DI
-	    (mult:V2DI
-	      (zero_extend:V2DI (match_operand:V2SI 1 "register_operand" "y"))
-	      (zero_extend:V2DI (match_operand:V2SI 2 "register_operand" "y")))
-	    (const_int 32))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmulwum%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmulw")]
-)
-
-(define_insn "iwmmxt_wmulsmr"
-  [(set (match_operand:V4HI                           0 "register_operand" "=y")
-	(truncate:V4HI
-	  (ashiftrt:V4SI
-	    (plus:V4SI
-	      (mult:V4SI
-	        (sign_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
-		(sign_extend:V4SI (match_operand:V4HI 2 "register_operand" "y")))
-	      (const_vector:V4SI [(const_int 32768)
-	                          (const_int 32768)
-				  (const_int 32768)]))
-	    (const_int 16))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmulsmr%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmul")]
-)
-
-(define_insn "iwmmxt_wmulumr"
-  [(set (match_operand:V4HI                           0 "register_operand" "=y")
-	(truncate:V4HI
-	  (lshiftrt:V4SI
-	    (plus:V4SI
-	      (mult:V4SI
-	        (zero_extend:V4SI (match_operand:V4HI 1 "register_operand" "y"))
-		(zero_extend:V4SI (match_operand:V4HI 2 "register_operand" "y")))
-	      (const_vector:V4SI [(const_int 32768)
-				  (const_int 32768)
-				  (const_int 32768)
-				  (const_int 32768)]))
-	  (const_int 16))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmulumr%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmul")]
-)
-
-(define_insn "iwmmxt_wmulwsmr"
-  [(set (match_operand:V2SI                           0 "register_operand" "=y")
-	(truncate:V2SI
-	  (ashiftrt:V2DI
-	    (plus:V2DI
-	      (mult:V2DI
-	        (sign_extend:V2DI (match_operand:V2SI 1 "register_operand" "y"))
-		(sign_extend:V2DI (match_operand:V2SI 2 "register_operand" "y")))
-	      (const_vector:V2DI [(const_int 2147483648)
-				  (const_int 2147483648)]))
-	    (const_int 32))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmulwsmr%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmul")]
-)
-
-(define_insn "iwmmxt_wmulwumr"
-  [(set (match_operand:V2SI                           0 "register_operand" "=y")
-	(truncate:V2SI
-	  (lshiftrt:V2DI
-	    (plus:V2DI
-	      (mult:V2DI
-	        (zero_extend:V2DI (match_operand:V2SI 1 "register_operand" "y"))
-		(zero_extend:V2DI (match_operand:V2SI 2 "register_operand" "y")))
-	      (const_vector:V2DI [(const_int 2147483648)
-			          (const_int 2147483648)]))
-	    (const_int 32))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmulwumr%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmulw")]
-)
-
-(define_insn "iwmmxt_wmulwl"
-  [(set (match_operand:V2SI   0 "register_operand" "=y")
-        (mult:V2SI
-          (match_operand:V2SI 1 "register_operand" "y")
-	  (match_operand:V2SI 2 "register_operand" "y")))]
-  "TARGET_REALLY_IWMMXT"
-  "wmulwl%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmulw")]
-)
-
-(define_insn "iwmmxt_wqmulm"
-  [(set (match_operand:V4HI            0 "register_operand" "=y")
-        (unspec:V4HI [(match_operand:V4HI 1 "register_operand" "y")
-		      (match_operand:V4HI 2 "register_operand" "y")] UNSPEC_WQMULM))]
-  "TARGET_REALLY_IWMMXT"
-  "wqmulm%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wqmulm")]
-)
-
-(define_insn "iwmmxt_wqmulwm"
-  [(set (match_operand:V2SI               0 "register_operand" "=y")
-	(unspec:V2SI [(match_operand:V2SI 1 "register_operand" "y")
-		      (match_operand:V2SI 2 "register_operand" "y")] UNSPEC_WQMULWM))]
-  "TARGET_REALLY_IWMMXT"
-  "wqmulwm%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wqmulwm")]
-)
-
-(define_insn "iwmmxt_wqmulmr"
-  [(set (match_operand:V4HI               0 "register_operand" "=y")
-	(unspec:V4HI [(match_operand:V4HI 1 "register_operand" "y")
-		      (match_operand:V4HI 2 "register_operand" "y")] UNSPEC_WQMULMR))]
-  "TARGET_REALLY_IWMMXT"
-  "wqmulmr%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wqmulm")]
-)
-
-(define_insn "iwmmxt_wqmulwmr"
-  [(set (match_operand:V2SI            0 "register_operand" "=y")
-        (unspec:V2SI [(match_operand:V2SI 1 "register_operand" "y")
-		      (match_operand:V2SI 2 "register_operand" "y")] UNSPEC_WQMULWMR))]
-  "TARGET_REALLY_IWMMXT"
-  "wqmulwmr%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wqmulwm")]
-)
-
-(define_insn "iwmmxt_waddbhusm"
-  [(set (match_operand:V8QI                          0 "register_operand" "=y")
-	(vec_concat:V8QI
-	  (const_vector:V4QI [(const_int 0) (const_int 0) (const_int 0) (const_int 0)])
-	  (us_truncate:V4QI
-	    (ss_plus:V4HI
-	      (match_operand:V4HI                    1 "register_operand" "y")
-	      (zero_extend:V4HI
-	        (vec_select:V4QI (match_operand:V8QI 2 "register_operand" "y")
-	                         (parallel [(const_int 4) (const_int 5) (const_int 6) (const_int 7)])))))))]
-  "TARGET_REALLY_IWMMXT"
-  "waddbhusm%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_waddbhus")]
-)
-
-(define_insn "iwmmxt_waddbhusl"
-  [(set (match_operand:V8QI                          0 "register_operand" "=y")
-	(vec_concat:V8QI
-	  (us_truncate:V4QI
-	    (ss_plus:V4HI
-	      (match_operand:V4HI                    1 "register_operand" "y")
-	      (zero_extend:V4HI
-		(vec_select:V4QI (match_operand:V8QI 2 "register_operand" "y")
-		                 (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)])))))
-	  (const_vector:V4QI [(const_int 0) (const_int 0) (const_int 0) (const_int 0)])))]
-  "TARGET_REALLY_IWMMXT"
-  "waddbhusl%?\\t%0, %1, %2"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_waddbhus")]
-)
-
-(define_insn "iwmmxt_wqmiabb"
-  [(set (match_operand:V2SI	                             0 "register_operand" "=y")
-	(unspec:V2SI [(match_operand:V2SI                    1 "register_operand" "0")
-		      (zero_extract:V4HI (match_operand:V4HI 2 "register_operand" "y") (const_int 16) (const_int 0))
-		      (zero_extract:V4HI (match_dup 2) (const_int 16) (const_int 32))
-		      (zero_extract:V4HI (match_operand:V4HI 3 "register_operand" "y") (const_int 16) (const_int 0))
-		      (zero_extract:V4HI (match_dup 3) (const_int 16) (const_int 32))] UNSPEC_WQMIAxy))]
-  "TARGET_REALLY_IWMMXT"
-  "wqmiabb%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wqmiaxy")]
-)
-
-(define_insn "iwmmxt_wqmiabt"
-  [(set (match_operand:V2SI	                             0 "register_operand" "=y")
-	(unspec:V2SI [(match_operand:V2SI                    1 "register_operand" "0")
-	              (zero_extract:V4HI (match_operand:V4HI 2 "register_operand" "y") (const_int 16) (const_int 0))
-		      (zero_extract:V4HI (match_dup 2) (const_int 16) (const_int 32))
-		      (zero_extract:V4HI (match_operand:V4HI 3 "register_operand" "y") (const_int 16) (const_int 16))
-		      (zero_extract:V4HI (match_dup 3) (const_int 16) (const_int 48))] UNSPEC_WQMIAxy))]
-  "TARGET_REALLY_IWMMXT"
-  "wqmiabt%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wqmiaxy")]
-)
-
-(define_insn "iwmmxt_wqmiatb"
-  [(set (match_operand:V2SI                                  0 "register_operand" "=y")
-        (unspec:V2SI [(match_operand:V2SI                    1 "register_operand" "0")
-	              (zero_extract:V4HI (match_operand:V4HI 2 "register_operand" "y") (const_int 16) (const_int 16))
-	              (zero_extract:V4HI (match_dup 2) (const_int 16) (const_int 48))
-	              (zero_extract:V4HI (match_operand:V4HI 3 "register_operand" "y") (const_int 16) (const_int 0))
-	              (zero_extract:V4HI (match_dup 3) (const_int 16) (const_int 32))] UNSPEC_WQMIAxy))]
-  "TARGET_REALLY_IWMMXT"
-  "wqmiatb%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wqmiaxy")]
-)
-
-(define_insn "iwmmxt_wqmiatt"
-  [(set (match_operand:V2SI                                  0 "register_operand" "=y")
-        (unspec:V2SI [(match_operand:V2SI                    1 "register_operand" "0")
-	              (zero_extract:V4HI (match_operand:V4HI 2 "register_operand" "y") (const_int 16) (const_int 16))
-	              (zero_extract:V4HI (match_dup 2) (const_int 16) (const_int 48))
-	              (zero_extract:V4HI (match_operand:V4HI 3 "register_operand" "y") (const_int 16) (const_int 16))
-	              (zero_extract:V4HI (match_dup 3) (const_int 16) (const_int 48))] UNSPEC_WQMIAxy))]
-  "TARGET_REALLY_IWMMXT"
-  "wqmiatt%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wqmiaxy")]
-)
-
-(define_insn "iwmmxt_wqmiabbn"
-  [(set (match_operand:V2SI                                  0 "register_operand" "=y")
-        (unspec:V2SI [(match_operand:V2SI                    1 "register_operand" "0")
-                      (zero_extract:V4HI (match_operand:V4HI 2 "register_operand" "y") (const_int 16) (const_int 0))
-	              (zero_extract:V4HI (match_dup 2) (const_int 16) (const_int 32))
-	              (zero_extract:V4HI (match_operand:V4HI 3 "register_operand" "y") (const_int 16) (const_int 0))
-	              (zero_extract:V4HI (match_dup 3) (const_int 16) (const_int 32))] UNSPEC_WQMIAxyn))]
-  "TARGET_REALLY_IWMMXT"
-  "wqmiabbn%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wqmiaxy")]
-)
-
-(define_insn "iwmmxt_wqmiabtn"
-  [(set (match_operand:V2SI                                  0 "register_operand" "=y")
-        (unspec:V2SI [(match_operand:V2SI                    1 "register_operand" "0")
-                      (zero_extract:V4HI (match_operand:V4HI 2 "register_operand" "y") (const_int 16) (const_int 0))
-	              (zero_extract:V4HI (match_dup 2) (const_int 16) (const_int 32))
-	              (zero_extract:V4HI (match_operand:V4HI 3 "register_operand" "y") (const_int 16) (const_int 16))
-	              (zero_extract:V4HI (match_dup 3) (const_int 16) (const_int 48))] UNSPEC_WQMIAxyn))]
-  "TARGET_REALLY_IWMMXT"
-  "wqmiabtn%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wqmiaxy")]
-)
-
-(define_insn "iwmmxt_wqmiatbn"
-  [(set (match_operand:V2SI                                  0 "register_operand" "=y")
-        (unspec:V2SI [(match_operand:V2SI                    1 "register_operand" "0")
-                      (zero_extract:V4HI (match_operand:V4HI 2 "register_operand" "y") (const_int 16) (const_int 16))
-	              (zero_extract:V4HI (match_dup 2) (const_int 16) (const_int 48))
-	              (zero_extract:V4HI (match_operand:V4HI 3 "register_operand" "y") (const_int 16) (const_int 0))
-	              (zero_extract:V4HI (match_dup 3) (const_int 16) (const_int 32))] UNSPEC_WQMIAxyn))]
-  "TARGET_REALLY_IWMMXT"
-  "wqmiatbn%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wqmiaxy")]
-)
-
-(define_insn "iwmmxt_wqmiattn"
- [(set (match_operand:V2SI                                  0 "register_operand" "=y")
-       (unspec:V2SI [(match_operand:V2SI                    1 "register_operand" "0")
-                     (zero_extract:V4HI (match_operand:V4HI 2 "register_operand" "y") (const_int 16) (const_int 16))
-	             (zero_extract:V4HI (match_dup 2) (const_int 16) (const_int 48))
-	             (zero_extract:V4HI (match_operand:V4HI 3 "register_operand" "y") (const_int 16) (const_int 16))
-	             (zero_extract:V4HI (match_dup 3) (const_int 16) (const_int 48))] UNSPEC_WQMIAxyn))]
-  "TARGET_REALLY_IWMMXT"
-  "wqmiattn%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wqmiaxy")]
-)
-
-(define_insn "iwmmxt_wmiabb"
-  [(set	(match_operand:DI	                          0 "register_operand" "=y")
-	(plus:DI (match_operand:DI	                  1 "register_operand" "0")
-		 (plus:DI
-		   (mult:DI
-		     (sign_extend:DI
-		       (vec_select:HI (match_operand:V4HI 2 "register_operand" "y")
-				      (parallel [(const_int 0)])))
-		     (sign_extend:DI
-		       (vec_select:HI (match_operand:V4HI 3 "register_operand" "y")
-				      (parallel [(const_int 0)]))))
-		   (mult:DI
-		     (sign_extend:DI
-		       (vec_select:HI (match_dup 2)
-			              (parallel [(const_int 2)])))
-		     (sign_extend:DI
-		       (vec_select:HI (match_dup 3)
-				      (parallel [(const_int 2)])))))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmiabb%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmiaxy")]
-)
-
-(define_insn "iwmmxt_wmiabt"
-  [(set	(match_operand:DI	                          0 "register_operand" "=y")
-	(plus:DI (match_operand:DI	                  1 "register_operand" "0")
-		 (plus:DI
-		   (mult:DI
-		     (sign_extend:DI
-		       (vec_select:HI (match_operand:V4HI 2 "register_operand" "y")
-				      (parallel [(const_int 0)])))
-		     (sign_extend:DI
-		       (vec_select:HI (match_operand:V4HI 3 "register_operand" "y")
-				      (parallel [(const_int 1)]))))
-		   (mult:DI
-		     (sign_extend:DI
-		       (vec_select:HI (match_dup 2)
-				      (parallel [(const_int 2)])))
-		     (sign_extend:DI
-		       (vec_select:HI (match_dup 3)
-				      (parallel [(const_int 3)])))))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmiabt%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmiaxy")]
-)
-
-(define_insn "iwmmxt_wmiatb"
-  [(set	(match_operand:DI	                          0 "register_operand" "=y")
-	(plus:DI (match_operand:DI	                  1 "register_operand" "0")
-		 (plus:DI
-		   (mult:DI
-		     (sign_extend:DI
-		       (vec_select:HI (match_operand:V4HI 2 "register_operand" "y")
-				      (parallel [(const_int 1)])))
-		     (sign_extend:DI
-		       (vec_select:HI (match_operand:V4HI 3 "register_operand" "y")
-				      (parallel [(const_int 0)]))))
-		   (mult:DI
-		     (sign_extend:DI
-		       (vec_select:HI (match_dup 2)
-				      (parallel [(const_int 3)])))
-		     (sign_extend:DI
-		       (vec_select:HI (match_dup 3)
-				      (parallel [(const_int 2)])))))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmiatb%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmiaxy")]
-)
-
-(define_insn "iwmmxt_wmiatt"
-  [(set	(match_operand:DI	                   0 "register_operand" "=y")
-        (plus:DI (match_operand:DI	           1 "register_operand" "0")
-          (plus:DI
-            (mult:DI
-              (sign_extend:DI
-                (vec_select:HI (match_operand:V4HI 2 "register_operand" "y")
-	                       (parallel [(const_int 1)])))
-	      (sign_extend:DI
-	        (vec_select:HI (match_operand:V4HI 3 "register_operand" "y")
-	                       (parallel [(const_int 1)]))))
-            (mult:DI
-	      (sign_extend:DI
-                (vec_select:HI (match_dup 2)
-	                       (parallel [(const_int 3)])))
-              (sign_extend:DI
-                (vec_select:HI (match_dup 3)
-	                       (parallel [(const_int 3)])))))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmiatt%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmiaxy")]
-)
-
-(define_insn "iwmmxt_wmiabbn"
-  [(set	(match_operand:DI	                           0 "register_operand" "=y")
-	(minus:DI (match_operand:DI	                   1 "register_operand" "0")
-		  (plus:DI
-		    (mult:DI
-		      (sign_extend:DI
-			(vec_select:HI (match_operand:V4HI 2 "register_operand" "y")
-				       (parallel [(const_int 0)])))
-		      (sign_extend:DI
-		        (vec_select:HI (match_operand:V4HI 3 "register_operand" "y")
-				       (parallel [(const_int 0)]))))
-		    (mult:DI
-		      (sign_extend:DI
-			(vec_select:HI (match_dup 2)
-				       (parallel [(const_int 2)])))
-		      (sign_extend:DI
-		        (vec_select:HI (match_dup 3)
-				       (parallel [(const_int 2)])))))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmiabbn%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmiaxy")]
-)
-
-(define_insn "iwmmxt_wmiabtn"
-  [(set	(match_operand:DI	                           0 "register_operand" "=y")
-	(minus:DI (match_operand:DI	                   1 "register_operand" "0")
-		  (plus:DI
-		    (mult:DI
-		      (sign_extend:DI
-			(vec_select:HI (match_operand:V4HI 2 "register_operand" "y")
-				       (parallel [(const_int 0)])))
-		      (sign_extend:DI
-		        (vec_select:HI (match_operand:V4HI 3 "register_operand" "y")
-				       (parallel [(const_int 1)]))))
-		    (mult:DI
-		      (sign_extend:DI
-		        (vec_select:HI (match_dup 2)
-				       (parallel [(const_int 2)])))
-		      (sign_extend:DI
-			(vec_select:HI (match_dup 3)
-				       (parallel [(const_int 3)])))))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmiabtn%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmiaxy")]
-)
-
-(define_insn "iwmmxt_wmiatbn"
-  [(set (match_operand:DI	                           0 "register_operand" "=y")
-	(minus:DI (match_operand:DI	                   1 "register_operand" "0")
-		  (plus:DI
-		    (mult:DI
-		      (sign_extend:DI
-			(vec_select:HI (match_operand:V4HI 2 "register_operand" "y")
-				       (parallel [(const_int 1)])))
-		      (sign_extend:DI
-		        (vec_select:HI (match_operand:V4HI 3 "register_operand" "y")
-				       (parallel [(const_int 0)]))))
-		    (mult:DI
-		      (sign_extend:DI
-		        (vec_select:HI (match_dup 2)
-				       (parallel [(const_int 3)])))
-		      (sign_extend:DI
-			(vec_select:HI (match_dup 3)
-				       (parallel [(const_int 2)])))))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmiatbn%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmiaxy")]
-)
-
-(define_insn "iwmmxt_wmiattn"
-  [(set (match_operand:DI	                           0 "register_operand" "=y")
-	(minus:DI (match_operand:DI	                   1 "register_operand" "0")
-		  (plus:DI
-		    (mult:DI
-		      (sign_extend:DI
-			(vec_select:HI (match_operand:V4HI 2 "register_operand" "y")
-				       (parallel [(const_int 1)])))
-		      (sign_extend:DI
-			(vec_select:HI (match_operand:V4HI 3 "register_operand" "y")
-				       (parallel [(const_int 1)]))))
-		    (mult:DI
-		      (sign_extend:DI
-			(vec_select:HI (match_dup 2)
-				       (parallel [(const_int 3)])))
-		      (sign_extend:DI
-			(vec_select:HI (match_dup 3)
-				       (parallel [(const_int 3)])))))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmiattn%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmiaxy")]
-)
-
-(define_insn "iwmmxt_wmiawbb"
-  [(set (match_operand:DI	0 "register_operand" "=y")
-	(plus:DI
-	  (match_operand:DI      1 "register_operand" "0")
-	  (mult:DI
-	    (sign_extend:DI (vec_select:SI (match_operand:V2SI 2 "register_operand" "y") (parallel [(const_int 0)])))
-	    (sign_extend:DI (vec_select:SI (match_operand:V2SI 3 "register_operand" "y") (parallel [(const_int 0)]))))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmiawbb%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmiawxy")]
-)
-
-(define_insn "iwmmxt_wmiawbt"
-  [(set (match_operand:DI	                               0 "register_operand" "=y")
-	(plus:DI
-	  (match_operand:DI                                    1 "register_operand" "0")
-	  (mult:DI
-	    (sign_extend:DI (vec_select:SI (match_operand:V2SI 2 "register_operand" "y") (parallel [(const_int 0)])))
-	    (sign_extend:DI (vec_select:SI (match_operand:V2SI 3 "register_operand" "y") (parallel [(const_int 1)]))))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmiawbt%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmiawxy")]
-)
-
-(define_insn "iwmmxt_wmiawtb"
-  [(set (match_operand:DI	                               0 "register_operand" "=y")
-	(plus:DI
-	  (match_operand:DI                                    1 "register_operand" "0")
-	  (mult:DI
-	    (sign_extend:DI (vec_select:SI (match_operand:V2SI 2 "register_operand" "y") (parallel [(const_int 1)])))
-	    (sign_extend:DI (vec_select:SI (match_operand:V2SI 3 "register_operand" "y") (parallel [(const_int 0)]))))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmiawtb%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmiawxy")]
-)
-
-(define_insn "iwmmxt_wmiawtt"
-[(set (match_operand:DI	                                     0 "register_operand" "=y")
-      (plus:DI
-	(match_operand:DI                                    1 "register_operand" "0")
-	(mult:DI
-	  (sign_extend:DI (vec_select:SI (match_operand:V2SI 2 "register_operand" "y") (parallel [(const_int 1)])))
-	  (sign_extend:DI (vec_select:SI (match_operand:V2SI 3 "register_operand" "y") (parallel [(const_int 1)]))))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmiawtt%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmiawxy")]
-)
-
-(define_insn "iwmmxt_wmiawbbn"
-  [(set (match_operand:DI	                               0 "register_operand" "=y")
-	(minus:DI
-	  (match_operand:DI                                    1 "register_operand" "0")
-	  (mult:DI
-	    (sign_extend:DI (vec_select:SI (match_operand:V2SI 2 "register_operand" "y") (parallel [(const_int 0)])))
-	    (sign_extend:DI (vec_select:SI (match_operand:V2SI 3 "register_operand" "y") (parallel [(const_int 0)]))))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmiawbbn%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmiawxy")]
-)
-
-(define_insn "iwmmxt_wmiawbtn"
-  [(set (match_operand:DI	                               0 "register_operand" "=y")
-	(minus:DI
-	  (match_operand:DI                                    1 "register_operand" "0")
-	  (mult:DI
-	    (sign_extend:DI (vec_select:SI (match_operand:V2SI 2 "register_operand" "y") (parallel [(const_int 0)])))
-	    (sign_extend:DI (vec_select:SI (match_operand:V2SI 3 "register_operand" "y") (parallel [(const_int 1)]))))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmiawbtn%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmiawxy")]
-)
-
-(define_insn "iwmmxt_wmiawtbn"
-  [(set (match_operand:DI	                               0 "register_operand" "=y")
-	(minus:DI
-	  (match_operand:DI                                    1 "register_operand" "0")
-	  (mult:DI
-	    (sign_extend:DI (vec_select:SI (match_operand:V2SI 2 "register_operand" "y") (parallel [(const_int 1)])))
-	    (sign_extend:DI (vec_select:SI (match_operand:V2SI 3 "register_operand" "y") (parallel [(const_int 0)]))))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmiawtbn%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmiawxy")]
-)
-
-(define_insn "iwmmxt_wmiawttn"
-  [(set (match_operand:DI	                               0 "register_operand" "=y")
-	(minus:DI
-	  (match_operand:DI                                    1 "register_operand" "0")
-	  (mult:DI
-	    (sign_extend:DI (vec_select:SI (match_operand:V2SI 2 "register_operand" "y") (parallel [(const_int 1)])))
-	    (sign_extend:DI (vec_select:SI (match_operand:V2SI 3 "register_operand" "y") (parallel [(const_int 1)]))))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmiawttn%?\\t%0, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmiawxy")]
-)
-
-(define_insn "iwmmxt_wmerge"
-  [(set (match_operand:DI         0 "register_operand" "=y")
-	(ior:DI
-	  (ashift:DI
-	    (match_operand:DI     2 "register_operand" "y")
-	    (minus:SI
-	      (const_int 64)
-	      (mult:SI
-	        (match_operand:SI 3 "immediate_operand" "i")
-		(const_int 8))))
-	  (lshiftrt:DI
-	    (ashift:DI
-	      (match_operand:DI   1 "register_operand" "y")
-	      (mult:SI
-	        (match_dup 3)
-		(const_int 8)))
-	    (mult:SI
-	      (match_dup 3)
-	      (const_int 8)))))]
-  "TARGET_REALLY_IWMMXT"
-  "wmerge%?\\t%0, %1, %2, %3"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_wmerge")]
-)
-
-(define_insn "iwmmxt_tandc<mode>3"
-  [(set (reg:CC CC_REGNUM)
-	(subreg:CC (unspec:VMMX [(const_int 0)] UNSPEC_TANDC) 0))
-   (unspec:CC [(reg:SI 15)] UNSPEC_TANDC)]
-  "TARGET_REALLY_IWMMXT"
-  "tandc<MMX_char>%?\\t r15"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_tandc")]
-)
-
-(define_insn "iwmmxt_torc<mode>3"
-  [(set (reg:CC CC_REGNUM)
-	(subreg:CC (unspec:VMMX [(const_int 0)] UNSPEC_TORC) 0))
-   (unspec:CC [(reg:SI 15)] UNSPEC_TORC)]
-  "TARGET_REALLY_IWMMXT"
-  "torc<MMX_char>%?\\t r15"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_torc")]
-)
-
-(define_insn "iwmmxt_torvsc<mode>3"
-  [(set (reg:CC CC_REGNUM)
-	(subreg:CC (unspec:VMMX [(const_int 0)] UNSPEC_TORVSC) 0))
-   (unspec:CC [(reg:SI 15)] UNSPEC_TORVSC)]
-  "TARGET_REALLY_IWMMXT"
-  "torvsc<MMX_char>%?\\t r15"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_torvsc")]
-)
-
-(define_insn "iwmmxt_textrc<mode>3"
-  [(set (reg:CC CC_REGNUM)
-	(subreg:CC (unspec:VMMX [(const_int 0)
-		                 (match_operand:SI 0 "immediate_operand" "i")] UNSPEC_TEXTRC) 0))
-   (unspec:CC [(reg:SI 15)] UNSPEC_TEXTRC)]
-  "TARGET_REALLY_IWMMXT"
-  "textrc<MMX_char>%?\\t r15, %0"
-  [(set_attr "predicable" "yes")
-   (set_attr "type" "wmmx_textrc")]
-)
diff --git a/gcc/config/arm/ldmstm.md b/gcc/config/arm/ldmstm.md
index e37464a..b43a4c7 100644
--- a/gcc/config/arm/ldmstm.md
+++ b/gcc/config/arm/ldmstm.md
@@ -1,7 +1,7 @@
 /* ARM ldm/stm instruction patterns.  This file was automatically generated
    using arm-ldmstm.ml.  Please do not edit manually.
 
-   Copyright (C) 2010-2024 Free Software Foundation, Inc.
+   Copyright (C) 2010-2025 Free Software Foundation, Inc.
    Contributed by CodeSourcery.
 
    This file is part of GCC.
diff --git a/gcc/config/arm/ldrdstrd.md b/gcc/config/arm/ldrdstrd.md
index 9d3c9a0..f68ca28 100644
--- a/gcc/config/arm/ldrdstrd.md
+++ b/gcc/config/arm/ldrdstrd.md
@@ -1,6 +1,6 @@
 ;; ARM ldrd/strd peephole optimizations.
 ;;
-;; Copyright (C) 2013-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2013-2025 Free Software Foundation, Inc.
 ;;
 ;; Written by Greta Yorsh <greta.yorsh@arm.com>
 
diff --git a/gcc/config/arm/linux-eabi.h b/gcc/config/arm/linux-eabi.h
index eef791f..8ef5643 100644
--- a/gcc/config/arm/linux-eabi.h
+++ b/gcc/config/arm/linux-eabi.h
@@ -1,6 +1,6 @@
 /* Configuration file for ARM GNU/Linux EABI targets.
-   Copyright (C) 2004-2024 Free Software Foundation, Inc.
-   Contributed by CodeSourcery, LLC   
+   Copyright (C) 2004-2025 Free Software Foundation, Inc.
+   Contributed by CodeSourcery, LLC
 
    This file is part of GCC.
 
@@ -46,12 +46,15 @@
 #undef  TARGET_LINKER_EMULATION
 #if TARGET_BIG_ENDIAN_DEFAULT
 #define TARGET_LINKER_EMULATION "armelfb_linux_eabi"
+#define TARGET_FDPIC_LINKER_EMULATION "armelfb_linux_fdpiceabi"
 #else
 #define TARGET_LINKER_EMULATION "armelf_linux_eabi"
+#define TARGET_FDPIC_LINKER_EMULATION "armelf_linux_fdpiceabi"
 #endif
 
 #undef  SUBTARGET_EXTRA_LINK_SPEC
-#define SUBTARGET_EXTRA_LINK_SPEC " -m " TARGET_LINKER_EMULATION
+#define SUBTARGET_EXTRA_LINK_SPEC " -m %{mfdpic: " \
+  TARGET_FDPIC_LINKER_EMULATION ";:" TARGET_LINKER_EMULATION "}"
 
 /* GNU/Linux on ARM currently supports three dynamic linkers:
    - ld-linux.so.2 - for the legacy ABI
diff --git a/gcc/config/arm/linux-elf.h b/gcc/config/arm/linux-elf.h
index ccae8ab..d4d389e 100644
--- a/gcc/config/arm/linux-elf.h
+++ b/gcc/config/arm/linux-elf.h
@@ -1,5 +1,5 @@
 /* Definitions for ARM running Linux-based GNU systems using ELF
-   Copyright (C) 1993-2024 Free Software Foundation, Inc.
+   Copyright (C) 1993-2025 Free Software Foundation, Inc.
    Contributed by Philip Blundell <philb@gnu.org>
 
    This file is part of GCC.
diff --git a/gcc/config/arm/linux-gas.h b/gcc/config/arm/linux-gas.h
index 1a9eb4d..0bd76b3 100644
--- a/gcc/config/arm/linux-gas.h
+++ b/gcc/config/arm/linux-gas.h
@@ -1,6 +1,6 @@
 /* Definitions of target machine for GNU compiler.
    ARM Linux-based GNU systems version.
-   Copyright (C) 1997-2024 Free Software Foundation, Inc.
+   Copyright (C) 1997-2025 Free Software Foundation, Inc.
    Contributed by Russell King  <rmk92@ecs.soton.ac.uk>.
 
    This file is part of GCC.
diff --git a/gcc/config/arm/marvell-f-iwmmxt.md b/gcc/config/arm/marvell-f-iwmmxt.md
deleted file mode 100644
index 2f57cd5..0000000
--- a/gcc/config/arm/marvell-f-iwmmxt.md
+++ /dev/null
@@ -1,189 +0,0 @@
-;; Marvell WMMX2 pipeline description
-;; Copyright (C) 2011-2024 Free Software Foundation, Inc.
-;; Written by Marvell, Inc.
-
-;; This file is part of GCC.
-
-;; GCC is free software; you can redistribute it and/or modify it
-;; under the terms of the GNU General Public License as published
-;; by the Free Software Foundation; either version 3, or (at your
-;; option) any later version.
-
-;; GCC is distributed in the hope that it will be useful, but WITHOUT
-;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-;; License for more details.
-
-;; You should have received a copy of the GNU General Public License
-;; along with GCC; see the file COPYING3.  If not see
-;; <http://www.gnu.org/licenses/>.
-
-
-(define_automaton "marvell_f_iwmmxt")
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; Pipelines
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;; This is a 7-stage pipelines:
-;;
-;;    MD | MI | ME1 | ME2 | ME3 | ME4 | MW
-;;
-;; There are various bypasses modelled to a greater or lesser extent.
-;;
-;; Latencies in this file correspond to the number of cycles after
-;; the issue stage that it takes for the result of the instruction to
-;; be computed, or for its side-effects to occur.
-
-(define_cpu_unit "mf_iwmmxt_MD" "marvell_f_iwmmxt")
-(define_cpu_unit "mf_iwmmxt_MI" "marvell_f_iwmmxt")
-(define_cpu_unit "mf_iwmmxt_ME1" "marvell_f_iwmmxt")
-(define_cpu_unit "mf_iwmmxt_ME2" "marvell_f_iwmmxt")
-(define_cpu_unit "mf_iwmmxt_ME3" "marvell_f_iwmmxt")
-(define_cpu_unit "mf_iwmmxt_ME4" "marvell_f_iwmmxt")
-(define_cpu_unit "mf_iwmmxt_MW" "marvell_f_iwmmxt")
-
-(define_reservation "mf_iwmmxt_ME"
-      "mf_iwmmxt_ME1,mf_iwmmxt_ME2,mf_iwmmxt_ME3,mf_iwmmxt_ME4"
-)
-
-(define_reservation "mf_iwmmxt_pipeline"
-      "mf_iwmmxt_MD, mf_iwmmxt_MI, mf_iwmmxt_ME, mf_iwmmxt_MW"
-)
-
-;; An attribute to indicate whether our reservations are applicable.
-(define_attr "marvell_f_iwmmxt" "yes,no"
-  (const (if_then_else (symbol_ref "arm_arch_iwmmxt")
-                       (const_string "yes") (const_string "no"))))
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; instruction classes
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-;; An attribute appended to instructions for classification
-
-(define_attr "wmmxt_shift" "yes,no"
-  (if_then_else (eq_attr "type" "wmmx_wror, wmmx_wsll, wmmx_wsra, wmmx_wsrl")
-		(const_string "yes") (const_string "no"))
-)
-
-(define_attr "wmmxt_pack" "yes,no"
-  (if_then_else (eq_attr "type" "wmmx_waligni, wmmx_walignr, wmmx_wmerge,\
-                                 wmmx_wpack, wmmx_wshufh, wmmx_wunpckeh,\
-                                 wmmx_wunpckih, wmmx_wunpckel, wmmx_wunpckil")
-		(const_string "yes") (const_string "no"))
-)
-
-(define_attr "wmmxt_mult_c1" "yes,no"
-  (if_then_else (eq_attr "type" "wmmx_wmac, wmmx_wmadd, wmmx_wmiaxy,\
-                                 wmmx_wmiawxy, wmmx_wmulw, wmmx_wqmiaxy,\
-                                 wmmx_wqmulwm")
-		(const_string "yes") (const_string "no"))
-)
-
-(define_attr "wmmxt_mult_c2" "yes,no"
-  (if_then_else (eq_attr "type" "wmmx_wmul, wmmx_wqmulm")
-		(const_string "yes") (const_string "no"))
-)
-
-(define_attr "wmmxt_alu_c1" "yes,no"
-  (if_then_else (eq_attr "type" "wmmx_wabs, wmmx_wabsdiff, wmmx_wand,\
-                                 wmmx_wandn, wmmx_wmov, wmmx_wor, wmmx_wxor")
-	        (const_string "yes") (const_string "no"))
-)
-
-(define_attr "wmmxt_alu_c2" "yes,no"
-  (if_then_else (eq_attr "type" "wmmx_wacc, wmmx_wadd, wmmx_waddsubhx,\
-                                 wmmx_wavg2, wmmx_wavg4, wmmx_wcmpeq,\
-                                 wmmx_wcmpgt, wmmx_wmax, wmmx_wmin,\
-                                 wmmx_wsub, wmmx_waddbhus, wmmx_wsubaddhx")
-		(const_string "yes") (const_string "no"))
-)
-
-(define_attr "wmmxt_alu_c3" "yes,no"
-  (if_then_else (eq_attr "type" "wmmx_wsad")
-	        (const_string "yes") (const_string "no"))
-)
-
-(define_attr "wmmxt_transfer_c1" "yes,no"
-  (if_then_else (eq_attr "type" "wmmx_tbcst, wmmx_tinsr,\
-                                 wmmx_tmcr, wmmx_tmcrr")
-                (const_string "yes") (const_string "no"))
-)
-
-(define_attr "wmmxt_transfer_c2" "yes,no"
-  (if_then_else (eq_attr "type" "wmmx_textrm, wmmx_tmovmsk,\
-                                 wmmx_tmrc, wmmx_tmrrc")
-	        (const_string "yes") (const_string "no"))
-)
-
-(define_attr "wmmxt_transfer_c3" "yes,no"
-  (if_then_else (eq_attr "type" "wmmx_tmia, wmmx_tmiaph, wmmx_tmiaxy")
-	        (const_string "yes") (const_string "no"))
-)
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; Main description
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-
-(define_insn_reservation "marvell_f_iwmmxt_alu_c1" 1
-  (and (eq_attr "marvell_f_iwmmxt" "yes")
-       (eq_attr "wmmxt_alu_c1" "yes"))
-  "mf_iwmmxt_pipeline")
-
-(define_insn_reservation "marvell_f_iwmmxt_pack" 1
-  (and (eq_attr "marvell_f_iwmmxt" "yes")
-       (eq_attr "wmmxt_pack" "yes"))
-  "mf_iwmmxt_pipeline")
-
-(define_insn_reservation "marvell_f_iwmmxt_shift" 1
-  (and (eq_attr "marvell_f_iwmmxt" "yes")
-       (eq_attr "wmmxt_shift" "yes"))
-  "mf_iwmmxt_pipeline")
-
-(define_insn_reservation "marvell_f_iwmmxt_transfer_c1" 1
-  (and (eq_attr "marvell_f_iwmmxt" "yes")
-       (eq_attr "wmmxt_transfer_c1" "yes"))
-  "mf_iwmmxt_pipeline")
-
-(define_insn_reservation "marvell_f_iwmmxt_transfer_c2" 5
-  (and (eq_attr "marvell_f_iwmmxt" "yes")
-       (eq_attr "wmmxt_transfer_c2" "yes"))
-  "mf_iwmmxt_pipeline")
-
-(define_insn_reservation "marvell_f_iwmmxt_alu_c2" 2
-  (and (eq_attr "marvell_f_iwmmxt" "yes")
-       (eq_attr "wmmxt_alu_c2" "yes"))
-  "mf_iwmmxt_pipeline")
-
-(define_insn_reservation "marvell_f_iwmmxt_alu_c3" 3
-  (and (eq_attr "marvell_f_iwmmxt" "yes")
-       (eq_attr "wmmxt_alu_c3" "yes"))
-  "mf_iwmmxt_pipeline")
-
-(define_insn_reservation "marvell_f_iwmmxt_transfer_c3" 4
-  (and (eq_attr "marvell_f_iwmmxt" "yes")
-       (eq_attr "wmmxt_transfer_c3" "yes"))
-  "mf_iwmmxt_pipeline")
-
-(define_insn_reservation "marvell_f_iwmmxt_mult_c1" 4
-  (and (eq_attr "marvell_f_iwmmxt" "yes")
-       (eq_attr "wmmxt_mult_c1" "yes"))
-  "mf_iwmmxt_pipeline")
-
-;There is a forwarding path from ME3 stage
-(define_insn_reservation "marvell_f_iwmmxt_mult_c2" 3
-  (and (eq_attr "marvell_f_iwmmxt" "yes")
-       (eq_attr "wmmxt_mult_c2" "yes"))
-  "mf_iwmmxt_pipeline")
-
-(define_insn_reservation "marvell_f_iwmmxt_wstr" 0
-  (and (eq_attr "marvell_f_iwmmxt" "yes")
-       (eq_attr "type" "wmmx_wstr"))
-  "mf_iwmmxt_pipeline")
-
-;There is a forwarding path from MW stage
-(define_insn_reservation "marvell_f_iwmmxt_wldr" 5
-  (and (eq_attr "marvell_f_iwmmxt" "yes")
-       (eq_attr "type" "wmmx_wldr"))
-  "mf_iwmmxt_pipeline")
diff --git a/gcc/config/arm/marvell-pj4.md b/gcc/config/arm/marvell-pj4.md
index 75be9b8..510b56a 100644
--- a/gcc/config/arm/marvell-pj4.md
+++ b/gcc/config/arm/marvell-pj4.md
@@ -1,5 +1,5 @@
 ;; Marvell ARM Processor Pipeline Description
-;; Copyright (C) 2010-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2010-2025 Free Software Foundation, Inc.
 ;; Contributed by Marvell.
 
 ;; This file is part of GCC.
diff --git a/gcc/config/arm/mmintrin.h b/gcc/config/arm/mmintrin.h
index e9cc3dd..f6e0911 100644
--- a/gcc/config/arm/mmintrin.h
+++ b/gcc/config/arm/mmintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2002-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -24,1816 +24,6 @@
 #ifndef _MMINTRIN_H_INCLUDED
 #define _MMINTRIN_H_INCLUDED
 
-#ifndef __IWMMXT__
-#error mmintrin.h included without enabling WMMX/WMMX2 instructions (e.g. -march=iwmmxt or -march=iwmmxt2)
-#endif
-
-#ifndef __ENABLE_DEPRECATED_IWMMXT
-#warning support for WMMX/WMMX2 is deprecated and will be removed in GCC 15.  Define __ENABLE_DEPRECATED_IWMMXT to suppress this warning
-#endif
-
-#if defined __cplusplus
-extern "C" {
-/* Intrinsics use C name-mangling.  */
-#endif /* __cplusplus */
-
-/* The data type intended for user use.  */
-typedef unsigned long long __m64, __int64;
-
-/* Internal data types for implementing the intrinsics.  */
-typedef int __v2si __attribute__ ((vector_size (8)));
-typedef short __v4hi __attribute__ ((vector_size (8)));
-typedef signed char __v8qi __attribute__ ((vector_size (8)));
-
-/* Provided for source compatibility with MMX.  */
-extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm_empty (void)
-{
-}
-
-/* "Convert" __m64 and __int64 into each other.  */
-static __inline __m64
-_mm_cvtsi64_m64 (__int64 __i)
-{
-  return __i;
-}
-
-static __inline __int64
-_mm_cvtm64_si64 (__m64 __i)
-{
-  return __i;
-}
-
-static __inline int
-_mm_cvtsi64_si32 (__int64 __i)
-{
-  return __i;
-}
-
-static __inline __int64
-_mm_cvtsi32_si64 (int __i)
-{
-  return (__i & 0xffffffff);
-}
-
-/* Pack the four 16-bit values from M1 into the lower four 8-bit values of
-   the result, and the four 16-bit values from M2 into the upper four 8-bit
-   values of the result, all with signed saturation.  */
-static __inline __m64
-_mm_packs_pi16 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_arm_wpackhss ((__v4hi)__m1, (__v4hi)__m2);
-}
-
-/* Pack the two 32-bit values from M1 in to the lower two 16-bit values of
-   the result, and the two 32-bit values from M2 into the upper two 16-bit
-   values of the result, all with signed saturation.  */
-static __inline __m64
-_mm_packs_pi32 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_arm_wpackwss ((__v2si)__m1, (__v2si)__m2);
-}
-
-/* Copy the 64-bit value from M1 into the lower 32-bits of the result, and
-   the 64-bit value from M2 into the upper 32-bits of the result, all with
-   signed saturation for values that do not fit exactly into 32-bits.  */
-static __inline __m64
-_mm_packs_pi64 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_arm_wpackdss ((long long)__m1, (long long)__m2);
-}
-
-/* Pack the four 16-bit values from M1 into the lower four 8-bit values of
-   the result, and the four 16-bit values from M2 into the upper four 8-bit
-   values of the result, all with unsigned saturation.  */
-static __inline __m64
-_mm_packs_pu16 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_arm_wpackhus ((__v4hi)__m1, (__v4hi)__m2);
-}
-
-/* Pack the two 32-bit values from M1 into the lower two 16-bit values of
-   the result, and the two 32-bit values from M2 into the upper two 16-bit
-   values of the result, all with unsigned saturation.  */
-static __inline __m64
-_mm_packs_pu32 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_arm_wpackwus ((__v2si)__m1, (__v2si)__m2);
-}
-
-/* Copy the 64-bit value from M1 into the lower 32-bits of the result, and
-   the 64-bit value from M2 into the upper 32-bits of the result, all with
-   unsigned saturation for values that do not fit exactly into 32-bits.  */
-static __inline __m64
-_mm_packs_pu64 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_arm_wpackdus ((long long)__m1, (long long)__m2);
-}
-
-/* Interleave the four 8-bit values from the high half of M1 with the four
-   8-bit values from the high half of M2.  */
-static __inline __m64
-_mm_unpackhi_pi8 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_arm_wunpckihb ((__v8qi)__m1, (__v8qi)__m2);
-}
-
-/* Interleave the two 16-bit values from the high half of M1 with the two
-   16-bit values from the high half of M2.  */
-static __inline __m64
-_mm_unpackhi_pi16 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_arm_wunpckihh ((__v4hi)__m1, (__v4hi)__m2);
-}
-
-/* Interleave the 32-bit value from the high half of M1 with the 32-bit
-   value from the high half of M2.  */
-static __inline __m64
-_mm_unpackhi_pi32 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_arm_wunpckihw ((__v2si)__m1, (__v2si)__m2);
-}
-
-/* Interleave the four 8-bit values from the low half of M1 with the four
-   8-bit values from the low half of M2.  */
-static __inline __m64
-_mm_unpacklo_pi8 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_arm_wunpckilb ((__v8qi)__m1, (__v8qi)__m2);
-}
-
-/* Interleave the two 16-bit values from the low half of M1 with the two
-   16-bit values from the low half of M2.  */
-static __inline __m64
-_mm_unpacklo_pi16 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_arm_wunpckilh ((__v4hi)__m1, (__v4hi)__m2);
-}
-
-/* Interleave the 32-bit value from the low half of M1 with the 32-bit
-   value from the low half of M2.  */
-static __inline __m64
-_mm_unpacklo_pi32 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_arm_wunpckilw ((__v2si)__m1, (__v2si)__m2);
-}
-
-/* Take the four 8-bit values from the low half of M1, sign extend them,
-   and return the result as a vector of four 16-bit quantities.  */
-static __inline __m64
-_mm_unpackel_pi8 (__m64 __m1)
-{
-  return (__m64) __builtin_arm_wunpckelsb ((__v8qi)__m1);
-}
-
-/* Take the two 16-bit values from the low half of M1, sign extend them,
-   and return the result as a vector of two 32-bit quantities.  */
-static __inline __m64
-_mm_unpackel_pi16 (__m64 __m1)
-{
-  return (__m64) __builtin_arm_wunpckelsh ((__v4hi)__m1);
-}
-
-/* Take the 32-bit value from the low half of M1, and return it sign extended
-  to 64 bits.  */
-static __inline __m64
-_mm_unpackel_pi32 (__m64 __m1)
-{
-  return (__m64) __builtin_arm_wunpckelsw ((__v2si)__m1);
-}
-
-/* Take the four 8-bit values from the high half of M1, sign extend them,
-   and return the result as a vector of four 16-bit quantities.  */
-static __inline __m64
-_mm_unpackeh_pi8 (__m64 __m1)
-{
-  return (__m64) __builtin_arm_wunpckehsb ((__v8qi)__m1);
-}
-
-/* Take the two 16-bit values from the high half of M1, sign extend them,
-   and return the result as a vector of two 32-bit quantities.  */
-static __inline __m64
-_mm_unpackeh_pi16 (__m64 __m1)
-{
-  return (__m64) __builtin_arm_wunpckehsh ((__v4hi)__m1);
-}
-
-/* Take the 32-bit value from the high half of M1, and return it sign extended
-  to 64 bits.  */
-static __inline __m64
-_mm_unpackeh_pi32 (__m64 __m1)
-{
-  return (__m64) __builtin_arm_wunpckehsw ((__v2si)__m1);
-}
-
-/* Take the four 8-bit values from the low half of M1, zero extend them,
-   and return the result as a vector of four 16-bit quantities.  */
-static __inline __m64
-_mm_unpackel_pu8 (__m64 __m1)
-{
-  return (__m64) __builtin_arm_wunpckelub ((__v8qi)__m1);
-}
-
-/* Take the two 16-bit values from the low half of M1, zero extend them,
-   and return the result as a vector of two 32-bit quantities.  */
-static __inline __m64
-_mm_unpackel_pu16 (__m64 __m1)
-{
-  return (__m64) __builtin_arm_wunpckeluh ((__v4hi)__m1);
-}
-
-/* Take the 32-bit value from the low half of M1, and return it zero extended
-  to 64 bits.  */
-static __inline __m64
-_mm_unpackel_pu32 (__m64 __m1)
-{
-  return (__m64) __builtin_arm_wunpckeluw ((__v2si)__m1);
-}
-
-/* Take the four 8-bit values from the high half of M1, zero extend them,
-   and return the result as a vector of four 16-bit quantities.  */
-static __inline __m64
-_mm_unpackeh_pu8 (__m64 __m1)
-{
-  return (__m64) __builtin_arm_wunpckehub ((__v8qi)__m1);
-}
-
-/* Take the two 16-bit values from the high half of M1, zero extend them,
-   and return the result as a vector of two 32-bit quantities.  */
-static __inline __m64
-_mm_unpackeh_pu16 (__m64 __m1)
-{
-  return (__m64) __builtin_arm_wunpckehuh ((__v4hi)__m1);
-}
-
-/* Take the 32-bit value from the high half of M1, and return it zero extended
-  to 64 bits.  */
-static __inline __m64
-_mm_unpackeh_pu32 (__m64 __m1)
-{
-  return (__m64) __builtin_arm_wunpckehuw ((__v2si)__m1);
-}
-
-/* Add the 8-bit values in M1 to the 8-bit values in M2.  */
-static __inline __m64
-_mm_add_pi8 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_arm_waddb ((__v8qi)__m1, (__v8qi)__m2);
-}
-
-/* Add the 16-bit values in M1 to the 16-bit values in M2.  */
-static __inline __m64
-_mm_add_pi16 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_arm_waddh ((__v4hi)__m1, (__v4hi)__m2);
-}
-
-/* Add the 32-bit values in M1 to the 32-bit values in M2.  */
-static __inline __m64
-_mm_add_pi32 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_arm_waddw ((__v2si)__m1, (__v2si)__m2);
-}
-
-/* Add the 8-bit values in M1 to the 8-bit values in M2 using signed
-   saturated arithmetic.  */
-static __inline __m64
-_mm_adds_pi8 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_arm_waddbss ((__v8qi)__m1, (__v8qi)__m2);
-}
-
-/* Add the 16-bit values in M1 to the 16-bit values in M2 using signed
-   saturated arithmetic.  */
-static __inline __m64
-_mm_adds_pi16 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_arm_waddhss ((__v4hi)__m1, (__v4hi)__m2);
-}
-
-/* Add the 32-bit values in M1 to the 32-bit values in M2 using signed
-   saturated arithmetic.  */
-static __inline __m64
-_mm_adds_pi32 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_arm_waddwss ((__v2si)__m1, (__v2si)__m2);
-}
-
-/* Add the 8-bit values in M1 to the 8-bit values in M2 using unsigned
-   saturated arithmetic.  */
-static __inline __m64
-_mm_adds_pu8 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_arm_waddbus ((__v8qi)__m1, (__v8qi)__m2);
-}
-
-/* Add the 16-bit values in M1 to the 16-bit values in M2 using unsigned
-   saturated arithmetic.  */
-static __inline __m64
-_mm_adds_pu16 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_arm_waddhus ((__v4hi)__m1, (__v4hi)__m2);
-}
-
-/* Add the 32-bit values in M1 to the 32-bit values in M2 using unsigned
-   saturated arithmetic.  */
-static __inline __m64
-_mm_adds_pu32 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_arm_waddwus ((__v2si)__m1, (__v2si)__m2);
-}
-
-/* Subtract the 8-bit values in M2 from the 8-bit values in M1.  */
-static __inline __m64
-_mm_sub_pi8 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_arm_wsubb ((__v8qi)__m1, (__v8qi)__m2);
-}
-
-/* Subtract the 16-bit values in M2 from the 16-bit values in M1.  */
-static __inline __m64
-_mm_sub_pi16 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_arm_wsubh ((__v4hi)__m1, (__v4hi)__m2);
-}
-
-/* Subtract the 32-bit values in M2 from the 32-bit values in M1.  */
-static __inline __m64
-_mm_sub_pi32 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_arm_wsubw ((__v2si)__m1, (__v2si)__m2);
-}
-
-/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed
-   saturating arithmetic.  */
-static __inline __m64
-_mm_subs_pi8 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_arm_wsubbss ((__v8qi)__m1, (__v8qi)__m2);
-}
-
-/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
-   signed saturating arithmetic.  */
-static __inline __m64
-_mm_subs_pi16 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_arm_wsubhss ((__v4hi)__m1, (__v4hi)__m2);
-}
-
-/* Subtract the 32-bit values in M2 from the 32-bit values in M1 using
-   signed saturating arithmetic.  */
-static __inline __m64
-_mm_subs_pi32 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_arm_wsubwss ((__v2si)__m1, (__v2si)__m2);
-}
-
-/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using
-   unsigned saturating arithmetic.  */
-static __inline __m64
-_mm_subs_pu8 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_arm_wsubbus ((__v8qi)__m1, (__v8qi)__m2);
-}
-
-/* Subtract the 16-bit values in M2 from the 16-bit values in M1 using
-   unsigned saturating arithmetic.  */
-static __inline __m64
-_mm_subs_pu16 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_arm_wsubhus ((__v4hi)__m1, (__v4hi)__m2);
-}
-
-/* Subtract the 32-bit values in M2 from the 32-bit values in M1 using
-   unsigned saturating arithmetic.  */
-static __inline __m64
-_mm_subs_pu32 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_arm_wsubwus ((__v2si)__m1, (__v2si)__m2);
-}
-
-/* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing
-   four 32-bit intermediate results, which are then summed by pairs to
-   produce two 32-bit results.  */
-static __inline __m64
-_mm_madd_pi16 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_arm_wmadds ((__v4hi)__m1, (__v4hi)__m2);
-}
-
-/* Multiply four 16-bit values in M1 by four 16-bit values in M2 producing
-   four 32-bit intermediate results, which are then summed by pairs to
-   produce two 32-bit results.  */
-static __inline __m64
-_mm_madd_pu16 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_arm_wmaddu ((__v4hi)__m1, (__v4hi)__m2);
-}
-
-/* Multiply four signed 16-bit values in M1 by four signed 16-bit values in
-   M2 and produce the high 16 bits of the 32-bit results.  */
-static __inline __m64
-_mm_mulhi_pi16 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_arm_wmulsm ((__v4hi)__m1, (__v4hi)__m2);
-}
-
-/* Multiply four signed 16-bit values in M1 by four signed 16-bit values in
-   M2 and produce the high 16 bits of the 32-bit results.  */
-static __inline __m64
-_mm_mulhi_pu16 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_arm_wmulum ((__v4hi)__m1, (__v4hi)__m2);
-}
-
-/* Multiply four 16-bit values in M1 by four 16-bit values in M2 and produce
-   the low 16 bits of the results.  */
-static __inline __m64
-_mm_mullo_pi16 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_arm_wmulul ((__v4hi)__m1, (__v4hi)__m2);
-}
-
-/* Shift four 16-bit values in M left by COUNT.  */
-static __inline __m64
-_mm_sll_pi16 (__m64 __m, __m64 __count)
-{
-  return (__m64) __builtin_arm_wsllh ((__v4hi)__m, __count);
-}
-
-static __inline __m64
-_mm_slli_pi16 (__m64 __m, int __count)
-{
-  return (__m64) __builtin_arm_wsllhi ((__v4hi)__m, __count);
-}
-
-/* Shift two 32-bit values in M left by COUNT.  */
-static __inline __m64
-_mm_sll_pi32 (__m64 __m, __m64 __count)
-{
-  return (__m64) __builtin_arm_wsllw ((__v2si)__m, __count);
-}
-
-static __inline __m64
-_mm_slli_pi32 (__m64 __m, int __count)
-{
-  return (__m64) __builtin_arm_wsllwi ((__v2si)__m, __count);
-}
-
-/* Shift the 64-bit value in M left by COUNT.  */
-static __inline __m64
-_mm_sll_si64 (__m64 __m, __m64 __count)
-{
-  return (__m64) __builtin_arm_wslld (__m, __count);
-}
-
-static __inline __m64
-_mm_slli_si64 (__m64 __m, int __count)
-{
-  return (__m64) __builtin_arm_wslldi (__m, __count);
-}
-
-/* Shift four 16-bit values in M right by COUNT; shift in the sign bit.  */
-static __inline __m64
-_mm_sra_pi16 (__m64 __m, __m64 __count)
-{
-  return (__m64) __builtin_arm_wsrah ((__v4hi)__m, __count);
-}
-
-static __inline __m64
-_mm_srai_pi16 (__m64 __m, int __count)
-{
-  return (__m64) __builtin_arm_wsrahi ((__v4hi)__m, __count);
-}
-
-/* Shift two 32-bit values in M right by COUNT; shift in the sign bit.  */
-static __inline __m64
-_mm_sra_pi32 (__m64 __m, __m64 __count)
-{
-  return (__m64) __builtin_arm_wsraw ((__v2si)__m, __count);
-}
-
-static __inline __m64
-_mm_srai_pi32 (__m64 __m, int __count)
-{
-  return (__m64) __builtin_arm_wsrawi ((__v2si)__m, __count);
-}
-
-/* Shift the 64-bit value in M right by COUNT; shift in the sign bit.  */
-static __inline __m64
-_mm_sra_si64 (__m64 __m, __m64 __count)
-{
-  return (__m64) __builtin_arm_wsrad (__m, __count);
-}
-
-static __inline __m64
-_mm_srai_si64 (__m64 __m, int __count)
-{
-  return (__m64) __builtin_arm_wsradi (__m, __count);
-}
-
-/* Shift four 16-bit values in M right by COUNT; shift in zeros.  */
-static __inline __m64
-_mm_srl_pi16 (__m64 __m, __m64 __count)
-{
-  return (__m64) __builtin_arm_wsrlh ((__v4hi)__m, __count);
-}
-
-static __inline __m64
-_mm_srli_pi16 (__m64 __m, int __count)
-{
-  return (__m64) __builtin_arm_wsrlhi ((__v4hi)__m, __count);
-}
-
-/* Shift two 32-bit values in M right by COUNT; shift in zeros.  */
-static __inline __m64
-_mm_srl_pi32 (__m64 __m, __m64 __count)
-{
-  return (__m64) __builtin_arm_wsrlw ((__v2si)__m, __count);
-}
-
-static __inline __m64
-_mm_srli_pi32 (__m64 __m, int __count)
-{
-  return (__m64) __builtin_arm_wsrlwi ((__v2si)__m, __count);
-}
-
-/* Shift the 64-bit value in M left by COUNT; shift in zeros.  */
-static __inline __m64
-_mm_srl_si64 (__m64 __m, __m64 __count)
-{
-  return (__m64) __builtin_arm_wsrld (__m, __count);
-}
-
-static __inline __m64
-_mm_srli_si64 (__m64 __m, int __count)
-{
-  return (__m64) __builtin_arm_wsrldi (__m, __count);
-}
-
-/* Rotate four 16-bit values in M right by COUNT.  */
-static __inline __m64
-_mm_ror_pi16 (__m64 __m, __m64 __count)
-{
-  return (__m64) __builtin_arm_wrorh ((__v4hi)__m, __count);
-}
-
-static __inline __m64
-_mm_rori_pi16 (__m64 __m, int __count)
-{
-  return (__m64) __builtin_arm_wrorhi ((__v4hi)__m, __count);
-}
-
-/* Rotate two 32-bit values in M right by COUNT.  */
-static __inline __m64
-_mm_ror_pi32 (__m64 __m, __m64 __count)
-{
-  return (__m64) __builtin_arm_wrorw ((__v2si)__m, __count);
-}
-
-static __inline __m64
-_mm_rori_pi32 (__m64 __m, int __count)
-{
-  return (__m64) __builtin_arm_wrorwi ((__v2si)__m, __count);
-}
-
-/* Rotate two 64-bit values in M right by COUNT.  */
-static __inline __m64
-_mm_ror_si64 (__m64 __m, __m64 __count)
-{
-  return (__m64) __builtin_arm_wrord (__m, __count);
-}
-
-static __inline __m64
-_mm_rori_si64 (__m64 __m, int __count)
-{
-  return (__m64) __builtin_arm_wrordi (__m, __count);
-}
-
-/* Bit-wise AND the 64-bit values in M1 and M2.  */
-static __inline __m64
-_mm_and_si64 (__m64 __m1, __m64 __m2)
-{
-  return __builtin_arm_wand (__m1, __m2);
-}
-
-/* Bit-wise complement the 64-bit value in M1 and bit-wise AND it with the
-   64-bit value in M2.  */
-static __inline __m64
-_mm_andnot_si64 (__m64 __m1, __m64 __m2)
-{
-  return __builtin_arm_wandn (__m2, __m1);
-}
-
-/* Bit-wise inclusive OR the 64-bit values in M1 and M2.  */
-static __inline __m64
-_mm_or_si64 (__m64 __m1, __m64 __m2)
-{
-  return __builtin_arm_wor (__m1, __m2);
-}
-
-/* Bit-wise exclusive OR the 64-bit values in M1 and M2.  */
-static __inline __m64
-_mm_xor_si64 (__m64 __m1, __m64 __m2)
-{
-  return __builtin_arm_wxor (__m1, __m2);
-}
-
-/* Compare eight 8-bit values.  The result of the comparison is 0xFF if the
-   test is true and zero if false.  */
-static __inline __m64
-_mm_cmpeq_pi8 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_arm_wcmpeqb ((__v8qi)__m1, (__v8qi)__m2);
-}
-
-static __inline __m64
-_mm_cmpgt_pi8 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_arm_wcmpgtsb ((__v8qi)__m1, (__v8qi)__m2);
-}
-
-static __inline __m64
-_mm_cmpgt_pu8 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_arm_wcmpgtub ((__v8qi)__m1, (__v8qi)__m2);
-}
-
-/* Compare four 16-bit values.  The result of the comparison is 0xFFFF if
-   the test is true and zero if false.  */
-static __inline __m64
-_mm_cmpeq_pi16 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_arm_wcmpeqh ((__v4hi)__m1, (__v4hi)__m2);
-}
-
-static __inline __m64
-_mm_cmpgt_pi16 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_arm_wcmpgtsh ((__v4hi)__m1, (__v4hi)__m2);
-}
-
-static __inline __m64
-_mm_cmpgt_pu16 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_arm_wcmpgtuh ((__v4hi)__m1, (__v4hi)__m2);
-}
-
-/* Compare two 32-bit values.  The result of the comparison is 0xFFFFFFFF if
-   the test is true and zero if false.  */
-static __inline __m64
-_mm_cmpeq_pi32 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_arm_wcmpeqw ((__v2si)__m1, (__v2si)__m2);
-}
-
-static __inline __m64
-_mm_cmpgt_pi32 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_arm_wcmpgtsw ((__v2si)__m1, (__v2si)__m2);
-}
-
-static __inline __m64
-_mm_cmpgt_pu32 (__m64 __m1, __m64 __m2)
-{
-  return (__m64) __builtin_arm_wcmpgtuw ((__v2si)__m1, (__v2si)__m2);
-}
-
-/* Element-wise multiplication of unsigned 16-bit values __B and __C, followed
-   by accumulate across all elements and __A.  */
-static __inline __m64
-_mm_mac_pu16 (__m64 __A, __m64 __B, __m64 __C)
-{
-  return __builtin_arm_wmacu (__A, (__v4hi)__B, (__v4hi)__C);
-}
-
-/* Element-wise multiplication of signed 16-bit values __B and __C, followed
-   by accumulate across all elements and __A.  */
-static __inline __m64
-_mm_mac_pi16 (__m64 __A, __m64 __B, __m64 __C)
-{
-  return __builtin_arm_wmacs (__A, (__v4hi)__B, (__v4hi)__C);
-}
-
-/* Element-wise multiplication of unsigned 16-bit values __B and __C, followed
-   by accumulate across all elements.  */
-static __inline __m64
-_mm_macz_pu16 (__m64 __A, __m64 __B)
-{
-  return __builtin_arm_wmacuz ((__v4hi)__A, (__v4hi)__B);
-}
-
-/* Element-wise multiplication of signed 16-bit values __B and __C, followed
-   by accumulate across all elements.  */
-static __inline __m64
-_mm_macz_pi16 (__m64 __A, __m64 __B)
-{
-  return __builtin_arm_wmacsz ((__v4hi)__A, (__v4hi)__B);
-}
-
-/* Accumulate across all unsigned 8-bit values in __A.  */
-static __inline __m64
-_mm_acc_pu8 (__m64 __A)
-{
-  return __builtin_arm_waccb ((__v8qi)__A);
-}
-
-/* Accumulate across all unsigned 16-bit values in __A.  */
-static __inline __m64
-_mm_acc_pu16 (__m64 __A)
-{
-  return __builtin_arm_wacch ((__v4hi)__A);
-}
-
-/* Accumulate across all unsigned 32-bit values in __A.  */
-static __inline __m64
-_mm_acc_pu32 (__m64 __A)
-{
-  return __builtin_arm_waccw ((__v2si)__A);
-}
-
-static __inline __m64
-_mm_mia_si64 (__m64 __A, int __B, int __C)
-{
-  return __builtin_arm_tmia (__A, __B, __C);
-}
-
-static __inline __m64
-_mm_miaph_si64 (__m64 __A, int __B, int __C)
-{
-  return __builtin_arm_tmiaph (__A, __B, __C);
-}
-
-static __inline __m64
-_mm_miabb_si64 (__m64 __A, int __B, int __C)
-{
-  return __builtin_arm_tmiabb (__A, __B, __C);
-}
-
-static __inline __m64
-_mm_miabt_si64 (__m64 __A, int __B, int __C)
-{
-  return __builtin_arm_tmiabt (__A, __B, __C);
-}
-
-static __inline __m64
-_mm_miatb_si64 (__m64 __A, int __B, int __C)
-{
-  return __builtin_arm_tmiatb (__A, __B, __C);
-}
-
-static __inline __m64
-_mm_miatt_si64 (__m64 __A, int __B, int __C)
-{
-  return __builtin_arm_tmiatt (__A, __B, __C);
-}
-
-/* Extract one of the elements of A and sign extend.  The selector N must
-   be immediate.  */
-#define _mm_extract_pi8(A, N) __builtin_arm_textrmsb ((__v8qi)(A), (N))
-#define _mm_extract_pi16(A, N) __builtin_arm_textrmsh ((__v4hi)(A), (N))
-#define _mm_extract_pi32(A, N) __builtin_arm_textrmsw ((__v2si)(A), (N))
-
-/* Extract one of the elements of A and zero extend.  The selector N must
-   be immediate.  */
-#define _mm_extract_pu8(A, N) __builtin_arm_textrmub ((__v8qi)(A), (N))
-#define _mm_extract_pu16(A, N) __builtin_arm_textrmuh ((__v4hi)(A), (N))
-#define _mm_extract_pu32(A, N) __builtin_arm_textrmuw ((__v2si)(A), (N))
-
-/* Inserts word D into one of the elements of A.  The selector N must be
-   immediate.  */
-#define _mm_insert_pi8(A, D, N) \
-  ((__m64) __builtin_arm_tinsrb ((__v8qi)(A), (D), (N)))
-#define _mm_insert_pi16(A, D, N) \
-  ((__m64) __builtin_arm_tinsrh ((__v4hi)(A), (D), (N)))
-#define _mm_insert_pi32(A, D, N) \
-  ((__m64) __builtin_arm_tinsrw ((__v2si)(A), (D), (N)))
-
-/* Compute the element-wise maximum of signed 8-bit values.  */
-static __inline __m64
-_mm_max_pi8 (__m64 __A, __m64 __B)
-{
-  return (__m64) __builtin_arm_wmaxsb ((__v8qi)__A, (__v8qi)__B);
-}
-
-/* Compute the element-wise maximum of signed 16-bit values.  */
-static __inline __m64
-_mm_max_pi16 (__m64 __A, __m64 __B)
-{
-  return (__m64) __builtin_arm_wmaxsh ((__v4hi)__A, (__v4hi)__B);
-}
-
-/* Compute the element-wise maximum of signed 32-bit values.  */
-static __inline __m64
-_mm_max_pi32 (__m64 __A, __m64 __B)
-{
-  return (__m64) __builtin_arm_wmaxsw ((__v2si)__A, (__v2si)__B);
-}
-
-/* Compute the element-wise maximum of unsigned 8-bit values.  */
-static __inline __m64
-_mm_max_pu8 (__m64 __A, __m64 __B)
-{
-  return (__m64) __builtin_arm_wmaxub ((__v8qi)__A, (__v8qi)__B);
-}
-
-/* Compute the element-wise maximum of unsigned 16-bit values.  */
-static __inline __m64
-_mm_max_pu16 (__m64 __A, __m64 __B)
-{
-  return (__m64) __builtin_arm_wmaxuh ((__v4hi)__A, (__v4hi)__B);
-}
-
-/* Compute the element-wise maximum of unsigned 32-bit values.  */
-static __inline __m64
-_mm_max_pu32 (__m64 __A, __m64 __B)
-{
-  return (__m64) __builtin_arm_wmaxuw ((__v2si)__A, (__v2si)__B);
-}
-
-/* Compute the element-wise minimum of signed 16-bit values.  */
-static __inline __m64
-_mm_min_pi8 (__m64 __A, __m64 __B)
-{
-  return (__m64) __builtin_arm_wminsb ((__v8qi)__A, (__v8qi)__B);
-}
-
-/* Compute the element-wise minimum of signed 16-bit values.  */
-static __inline __m64
-_mm_min_pi16 (__m64 __A, __m64 __B)
-{
-  return (__m64) __builtin_arm_wminsh ((__v4hi)__A, (__v4hi)__B);
-}
-
-/* Compute the element-wise minimum of signed 32-bit values.  */
-static __inline __m64
-_mm_min_pi32 (__m64 __A, __m64 __B)
-{
-  return (__m64) __builtin_arm_wminsw ((__v2si)__A, (__v2si)__B);
-}
-
-/* Compute the element-wise minimum of unsigned 16-bit values.  */
-static __inline __m64
-_mm_min_pu8 (__m64 __A, __m64 __B)
-{
-  return (__m64) __builtin_arm_wminub ((__v8qi)__A, (__v8qi)__B);
-}
-
-/* Compute the element-wise minimum of unsigned 16-bit values.  */
-static __inline __m64
-_mm_min_pu16 (__m64 __A, __m64 __B)
-{
-  return (__m64) __builtin_arm_wminuh ((__v4hi)__A, (__v4hi)__B);
-}
-
-/* Compute the element-wise minimum of unsigned 32-bit values.  */
-static __inline __m64
-_mm_min_pu32 (__m64 __A, __m64 __B)
-{
-  return (__m64) __builtin_arm_wminuw ((__v2si)__A, (__v2si)__B);
-}
-
-/* Create an 8-bit mask of the signs of 8-bit values.  */
-static __inline int
-_mm_movemask_pi8 (__m64 __A)
-{
-  return __builtin_arm_tmovmskb ((__v8qi)__A);
-}
-
-/* Create an 8-bit mask of the signs of 16-bit values.  */
-static __inline int
-_mm_movemask_pi16 (__m64 __A)
-{
-  return __builtin_arm_tmovmskh ((__v4hi)__A);
-}
-
-/* Create an 8-bit mask of the signs of 32-bit values.  */
-static __inline int
-_mm_movemask_pi32 (__m64 __A)
-{
-  return __builtin_arm_tmovmskw ((__v2si)__A);
-}
-
-/* Return a combination of the four 16-bit values in A.  The selector
-   must be an immediate.  */
-#define _mm_shuffle_pi16(A, N) \
-  ((__m64) __builtin_arm_wshufh ((__v4hi)(A), (N)))
-
-
-/* Compute the rounded averages of the unsigned 8-bit values in A and B.  */
-static __inline __m64
-_mm_avg_pu8 (__m64 __A, __m64 __B)
-{
-  return (__m64) __builtin_arm_wavg2br ((__v8qi)__A, (__v8qi)__B);
-}
-
-/* Compute the rounded averages of the unsigned 16-bit values in A and B.  */
-static __inline __m64
-_mm_avg_pu16 (__m64 __A, __m64 __B)
-{
-  return (__m64) __builtin_arm_wavg2hr ((__v4hi)__A, (__v4hi)__B);
-}
-
-/* Compute the averages of the unsigned 8-bit values in A and B.  */
-static __inline __m64
-_mm_avg2_pu8 (__m64 __A, __m64 __B)
-{
-  return (__m64) __builtin_arm_wavg2b ((__v8qi)__A, (__v8qi)__B);
-}
-
-/* Compute the averages of the unsigned 16-bit values in A and B.  */
-static __inline __m64
-_mm_avg2_pu16 (__m64 __A, __m64 __B)
-{
-  return (__m64) __builtin_arm_wavg2h ((__v4hi)__A, (__v4hi)__B);
-}
-
-/* Compute the sum of the absolute differences of the unsigned 8-bit
-   values in A and B.  Return the value in the lower 16-bit word; the
-   upper words are cleared.  */
-static __inline __m64
-_mm_sad_pu8 (__m64 __A, __m64 __B)
-{
-  return (__m64) __builtin_arm_wsadbz ((__v8qi)__A, (__v8qi)__B);
-}
-
-static __inline __m64
-_mm_sada_pu8 (__m64 __A, __m64 __B, __m64 __C)
-{
-  return (__m64) __builtin_arm_wsadb ((__v2si)__A, (__v8qi)__B, (__v8qi)__C);
-}
-
-/* Compute the sum of the absolute differences of the unsigned 16-bit
-   values in A and B.  Return the value in the lower 32-bit word; the
-   upper words are cleared.  */
-static __inline __m64
-_mm_sad_pu16 (__m64 __A, __m64 __B)
-{
-  return (__m64) __builtin_arm_wsadhz ((__v4hi)__A, (__v4hi)__B);
-}
-
-static __inline __m64
-_mm_sada_pu16 (__m64 __A, __m64 __B, __m64 __C)
-{
-  return (__m64) __builtin_arm_wsadh ((__v2si)__A, (__v4hi)__B, (__v4hi)__C);
-}
-
-
-/* Compute the sum of the absolute differences of the unsigned 8-bit
-   values in A and B.  Return the value in the lower 16-bit word; the
-   upper words are cleared.  */
-static __inline __m64
-_mm_sadz_pu8 (__m64 __A, __m64 __B)
-{
-  return (__m64) __builtin_arm_wsadbz ((__v8qi)__A, (__v8qi)__B);
-}
-
-/* Compute the sum of the absolute differences of the unsigned 16-bit
-   values in A and B.  Return the value in the lower 32-bit word; the
-   upper words are cleared.  */
-static __inline __m64
-_mm_sadz_pu16 (__m64 __A, __m64 __B)
-{
-  return (__m64) __builtin_arm_wsadhz ((__v4hi)__A, (__v4hi)__B);
-}
-
-#define _mm_align_si64(__A,__B, N) \
-  (__m64) __builtin_arm_walign ((__v8qi) (__A),(__v8qi) (__B), (N))
-
-/* Creates a 64-bit zero.  */
-static __inline __m64
-_mm_setzero_si64 (void)
-{
-  return __builtin_arm_wzero ();
-}
-
-/* Set and Get arbitrary iWMMXt Control registers.
-   Note only registers 0-3 and 8-11 are currently defined,
-   the rest are reserved.  */
-
-static __inline void
-_mm_setwcx (const int __value, const int __regno)
-{
-  switch (__regno)
-    {
-    case 0:
-      __asm __volatile ("tmcr wcid, %0" :: "r"(__value));
-      break;
-    case 1:
-      __asm __volatile ("tmcr wcon, %0" :: "r"(__value));
-      break;
-    case 2:
-      __asm __volatile ("tmcr wcssf, %0" :: "r"(__value));
-      break;
-    case 3:
-      __asm __volatile ("tmcr wcasf, %0" :: "r"(__value));
-      break;
-    case 8:
-      __builtin_arm_setwcgr0 (__value);
-      break;
-    case 9:
-      __builtin_arm_setwcgr1 (__value);
-      break;
-    case 10:
-      __builtin_arm_setwcgr2 (__value);
-      break;
-    case 11:
-      __builtin_arm_setwcgr3 (__value);
-      break;
-    default:
-      break;
-    }
-}
-
-static __inline int
-_mm_getwcx (const int __regno)
-{
-  int __value;
-  switch (__regno)
-    {
-    case 0:
-      __asm __volatile ("tmrc %0, wcid" : "=r"(__value));
-      break;
-    case 1:
-      __asm __volatile ("tmrc %0, wcon" : "=r"(__value));
-      break;
-    case 2:
-      __asm __volatile ("tmrc %0, wcssf" : "=r"(__value));
-      break;
-    case 3:
-      __asm __volatile ("tmrc %0, wcasf" : "=r"(__value));
-      break;
-    case 8:
-      return __builtin_arm_getwcgr0 ();
-    case 9:
-      return __builtin_arm_getwcgr1 ();
-    case 10:
-      return __builtin_arm_getwcgr2 ();
-    case 11:
-      return __builtin_arm_getwcgr3 ();
-    default:
-      break;
-    }
-  return __value;
-}
-
-/* Creates a vector of two 32-bit values; I0 is least significant.  */
-static __inline __m64
-_mm_set_pi32 (int __i1, int __i0)
-{
-  union
-  {
-    __m64 __q;
-    struct
-    {
-      unsigned int __i0;
-      unsigned int __i1;
-    } __s;
-  } __u;
-
-  __u.__s.__i0 = __i0;
-  __u.__s.__i1 = __i1;
-
-  return __u.__q;
-}
-
-/* Creates a vector of four 16-bit values; W0 is least significant.  */
-static __inline __m64
-_mm_set_pi16 (short __w3, short __w2, short __w1, short __w0)
-{
-  unsigned int __i1 = (unsigned short) __w3 << 16 | (unsigned short) __w2;
-  unsigned int __i0 = (unsigned short) __w1 << 16 | (unsigned short) __w0;
-
-  return _mm_set_pi32 (__i1, __i0);
-}
-
-/* Creates a vector of eight 8-bit values; B0 is least significant.  */
-static __inline __m64
-_mm_set_pi8 (char __b7, char __b6, char __b5, char __b4,
-	     char __b3, char __b2, char __b1, char __b0)
-{
-  unsigned int __i1, __i0;
-
-  __i1 = (unsigned char)__b7;
-  __i1 = __i1 << 8 | (unsigned char)__b6;
-  __i1 = __i1 << 8 | (unsigned char)__b5;
-  __i1 = __i1 << 8 | (unsigned char)__b4;
-
-  __i0 = (unsigned char)__b3;
-  __i0 = __i0 << 8 | (unsigned char)__b2;
-  __i0 = __i0 << 8 | (unsigned char)__b1;
-  __i0 = __i0 << 8 | (unsigned char)__b0;
-
-  return _mm_set_pi32 (__i1, __i0);
-}
-
-/* Similar, but with the arguments in reverse order.  */
-static __inline __m64
-_mm_setr_pi32 (int __i0, int __i1)
-{
-  return _mm_set_pi32 (__i1, __i0);
-}
-
-static __inline __m64
-_mm_setr_pi16 (short __w0, short __w1, short __w2, short __w3)
-{
-  return _mm_set_pi16 (__w3, __w2, __w1, __w0);
-}
-
-static __inline __m64
-_mm_setr_pi8 (char __b0, char __b1, char __b2, char __b3,
-	      char __b4, char __b5, char __b6, char __b7)
-{
-  return _mm_set_pi8 (__b7, __b6, __b5, __b4, __b3, __b2, __b1, __b0);
-}
-
-/* Creates a vector of two 32-bit values, both elements containing I.  */
-static __inline __m64
-_mm_set1_pi32 (int __i)
-{
-  return _mm_set_pi32 (__i, __i);
-}
-
-/* Creates a vector of four 16-bit values, all elements containing W.  */
-static __inline __m64
-_mm_set1_pi16 (short __w)
-{
-  unsigned int __i = (unsigned short)__w << 16 | (unsigned short)__w;
-  return _mm_set1_pi32 (__i);
-}
-
-/* Creates a vector of four 16-bit values, all elements containing B.  */
-static __inline __m64
-_mm_set1_pi8 (char __b)
-{
-  unsigned int __w = (unsigned char)__b << 8 | (unsigned char)__b;
-  unsigned int __i = __w << 16 | __w;
-  return _mm_set1_pi32 (__i);
-}
-
-#ifdef __IWMMXT2__
-static __inline __m64
-_mm_abs_pi8 (__m64 m1)
-{
-  return (__m64) __builtin_arm_wabsb ((__v8qi)m1);
-}
-
-static __inline __m64
-_mm_abs_pi16 (__m64 m1)
-{
-  return (__m64) __builtin_arm_wabsh ((__v4hi)m1);
-
-}
-
-static __inline __m64
-_mm_abs_pi32 (__m64 m1)
-{
-  return (__m64) __builtin_arm_wabsw ((__v2si)m1);
-
-}
-
-static __inline __m64
-_mm_addsubhx_pi16 (__m64 a, __m64 b)
-{
-  return (__m64) __builtin_arm_waddsubhx ((__v4hi)a, (__v4hi)b);
-}
-
-static __inline __m64
-_mm_absdiff_pu8 (__m64 a, __m64 b)
-{
-  return (__m64) __builtin_arm_wabsdiffb ((__v8qi)a, (__v8qi)b);
-}
-
-static __inline __m64
-_mm_absdiff_pu16 (__m64 a, __m64 b)
-{
-  return (__m64) __builtin_arm_wabsdiffh ((__v4hi)a, (__v4hi)b);
-}
-
-static __inline __m64
-_mm_absdiff_pu32 (__m64 a, __m64 b)
-{
-  return (__m64) __builtin_arm_wabsdiffw ((__v2si)a, (__v2si)b);
-}
-
-static __inline __m64
-_mm_addc_pu16 (__m64 a, __m64 b)
-{
-  __m64 result;
-  __asm__ __volatile__ ("waddhc	%0, %1, %2" : "=y" (result) : "y" (a),  "y" (b));
-  return result;
-}
-
-static __inline __m64
-_mm_addc_pu32 (__m64 a, __m64 b)
-{
-  __m64 result;
-  __asm__ __volatile__ ("waddwc	%0, %1, %2" : "=y" (result) : "y" (a),  "y" (b));
-  return result;
-}
-
-static __inline __m64
-_mm_avg4_pu8 (__m64 a, __m64 b)
-{
-  return (__m64) __builtin_arm_wavg4 ((__v8qi)a, (__v8qi)b);
-}
-
-static __inline __m64
-_mm_avg4r_pu8 (__m64 a, __m64 b)
-{
-  return (__m64) __builtin_arm_wavg4r ((__v8qi)a, (__v8qi)b);
-}
-
-static __inline __m64
-_mm_maddx_pi16 (__m64 a, __m64 b)
-{
-  return (__m64) __builtin_arm_wmaddsx ((__v4hi)a, (__v4hi)b);
-}
-
-static __inline __m64
-_mm_maddx_pu16 (__m64 a, __m64 b)
-{
-  return (__m64) __builtin_arm_wmaddux ((__v4hi)a, (__v4hi)b);
-}
-
-static __inline __m64
-_mm_msub_pi16 (__m64 a, __m64 b)
-{
-  return (__m64) __builtin_arm_wmaddsn ((__v4hi)a, (__v4hi)b);
-}
-
-static __inline __m64
-_mm_msub_pu16 (__m64 a, __m64 b)
-{
-  return (__m64) __builtin_arm_wmaddun ((__v4hi)a, (__v4hi)b);
-}
-
-static __inline __m64
-_mm_mulhi_pi32 (__m64 a, __m64 b)
-{
-  return (__m64) __builtin_arm_wmulwsm ((__v2si)a, (__v2si)b);
-}
-
-static __inline __m64
-_mm_mulhi_pu32 (__m64 a, __m64 b)
-{
-  return (__m64) __builtin_arm_wmulwum ((__v2si)a, (__v2si)b);
-}
-
-static __inline __m64
-_mm_mulhir_pi16 (__m64 a, __m64 b)
-{
-  return (__m64) __builtin_arm_wmulsmr ((__v4hi)a, (__v4hi)b);
-}
-
-static __inline __m64
-_mm_mulhir_pi32 (__m64 a, __m64 b)
-{
-  return (__m64) __builtin_arm_wmulwsmr ((__v2si)a, (__v2si)b);
-}
-
-static __inline __m64
-_mm_mulhir_pu16 (__m64 a, __m64 b)
-{
-  return (__m64) __builtin_arm_wmulumr ((__v4hi)a, (__v4hi)b);
-}
-
-static __inline __m64
-_mm_mulhir_pu32 (__m64 a, __m64 b)
-{
-  return (__m64) __builtin_arm_wmulwumr ((__v2si)a, (__v2si)b);
-}
-
-static __inline __m64
-_mm_mullo_pi32 (__m64 a, __m64 b)
-{
-  return (__m64) __builtin_arm_wmulwl ((__v2si)a, (__v2si)b);
-}
-
-static __inline __m64
-_mm_qmulm_pi16 (__m64 a, __m64 b)
-{
-  return (__m64) __builtin_arm_wqmulm ((__v4hi)a, (__v4hi)b);
-}
-
-static __inline __m64
-_mm_qmulm_pi32 (__m64 a, __m64 b)
-{
-  return (__m64) __builtin_arm_wqmulwm ((__v2si)a, (__v2si)b);
-}
-
-static __inline __m64
-_mm_qmulmr_pi16 (__m64 a, __m64 b)
-{
-  return (__m64) __builtin_arm_wqmulmr ((__v4hi)a, (__v4hi)b);
-}
-
-static __inline __m64
-_mm_qmulmr_pi32 (__m64 a, __m64 b)
-{
-  return (__m64) __builtin_arm_wqmulwmr ((__v2si)a, (__v2si)b);
-}
-
-static __inline __m64
-_mm_subaddhx_pi16 (__m64 a, __m64 b)
-{
-  return (__m64) __builtin_arm_wsubaddhx ((__v4hi)a, (__v4hi)b);
-}
-
-static __inline __m64
-_mm_addbhusl_pu8 (__m64 a, __m64 b)
-{
-  return (__m64) __builtin_arm_waddbhusl ((__v4hi)a, (__v8qi)b);
-}
-
-static __inline __m64
-_mm_addbhusm_pu8 (__m64 a, __m64 b)
-{
-  return (__m64) __builtin_arm_waddbhusm ((__v4hi)a, (__v8qi)b);
-}
-
-#define _mm_qmiabb_pi32(acc, m1, m2) \
-  ({\
-   __m64 _acc = acc;\
-   __m64 _m1 = m1;\
-   __m64 _m2 = m2;\
-   _acc = (__m64) __builtin_arm_wqmiabb ((__v2si)_acc, (__v4hi)_m1, (__v4hi)_m2);\
-   _acc;\
-   })
-
-#define _mm_qmiabbn_pi32(acc, m1, m2) \
-  ({\
-   __m64 _acc = acc;\
-   __m64 _m1 = m1;\
-   __m64 _m2 = m2;\
-   _acc = (__m64) __builtin_arm_wqmiabbn ((__v2si)_acc, (__v4hi)_m1, (__v4hi)_m2);\
-   _acc;\
-   })
-
-#define _mm_qmiabt_pi32(acc, m1, m2) \
-  ({\
-   __m64 _acc = acc;\
-   __m64 _m1 = m1;\
-   __m64 _m2 = m2;\
-   _acc = (__m64) __builtin_arm_wqmiabt ((__v2si)_acc, (__v4hi)_m1, (__v4hi)_m2);\
-   _acc;\
-   })
-
-#define _mm_qmiabtn_pi32(acc, m1, m2) \
-  ({\
-   __m64 _acc=acc;\
-   __m64 _m1=m1;\
-   __m64 _m2=m2;\
-   _acc = (__m64) __builtin_arm_wqmiabtn ((__v2si)_acc, (__v4hi)_m1, (__v4hi)_m2);\
-   _acc;\
-   })
-
-#define _mm_qmiatb_pi32(acc, m1, m2) \
-  ({\
-   __m64 _acc = acc;\
-   __m64 _m1 = m1;\
-   __m64 _m2 = m2;\
-   _acc = (__m64) __builtin_arm_wqmiatb ((__v2si)_acc, (__v4hi)_m1, (__v4hi)_m2);\
-   _acc;\
-   })
-
-#define _mm_qmiatbn_pi32(acc, m1, m2) \
-  ({\
-   __m64 _acc = acc;\
-   __m64 _m1 = m1;\
-   __m64 _m2 = m2;\
-   _acc = (__m64) __builtin_arm_wqmiatbn ((__v2si)_acc, (__v4hi)_m1, (__v4hi)_m2);\
-   _acc;\
-   })
-
-#define _mm_qmiatt_pi32(acc, m1, m2) \
-  ({\
-   __m64 _acc = acc;\
-   __m64 _m1 = m1;\
-   __m64 _m2 = m2;\
-   _acc = (__m64) __builtin_arm_wqmiatt ((__v2si)_acc, (__v4hi)_m1, (__v4hi)_m2);\
-   _acc;\
-   })
-
-#define _mm_qmiattn_pi32(acc, m1, m2) \
-  ({\
-   __m64 _acc = acc;\
-   __m64 _m1 = m1;\
-   __m64 _m2 = m2;\
-   _acc = (__m64) __builtin_arm_wqmiattn ((__v2si)_acc, (__v4hi)_m1, (__v4hi)_m2);\
-   _acc;\
-   })
-
-#define _mm_wmiabb_si64(acc, m1, m2) \
-  ({\
-   __m64 _acc = acc;\
-   __m64 _m1 = m1;\
-   __m64 _m2 = m2;\
-   _acc = (__m64) __builtin_arm_wmiabb (_acc, (__v4hi)_m1, (__v4hi)_m2);\
-   _acc;\
-   })
-
-#define _mm_wmiabbn_si64(acc, m1, m2) \
-  ({\
-   __m64 _acc = acc;\
-   __m64 _m1 = m1;\
-   __m64 _m2 = m2;\
-   _acc = (__m64) __builtin_arm_wmiabbn (_acc, (__v4hi)_m1, (__v4hi)_m2);\
-   _acc;\
-   })
-
-#define _mm_wmiabt_si64(acc, m1, m2) \
-  ({\
-   __m64 _acc = acc;\
-   __m64 _m1 = m1;\
-   __m64 _m2 = m2;\
-   _acc = (__m64) __builtin_arm_wmiabt (_acc, (__v4hi)_m1, (__v4hi)_m2);\
-   _acc;\
-   })
-
-#define _mm_wmiabtn_si64(acc, m1, m2) \
-  ({\
-   __m64 _acc = acc;\
-   __m64 _m1 = m1;\
-   __m64 _m2 = m2;\
-   _acc = (__m64) __builtin_arm_wmiabtn (_acc, (__v4hi)_m1, (__v4hi)_m2);\
-   _acc;\
-   })
-
-#define _mm_wmiatb_si64(acc, m1, m2) \
-  ({\
-   __m64 _acc = acc;\
-   __m64 _m1 = m1;\
-   __m64 _m2 = m2;\
-   _acc = (__m64) __builtin_arm_wmiatb (_acc, (__v4hi)_m1, (__v4hi)_m2);\
-   _acc;\
-   })
-
-#define _mm_wmiatbn_si64(acc, m1, m2) \
-  ({\
-   __m64 _acc = acc;\
-   __m64 _m1 = m1;\
-   __m64 _m2 = m2;\
-   _acc = (__m64) __builtin_arm_wmiatbn (_acc, (__v4hi)_m1, (__v4hi)_m2);\
-   _acc;\
-   })
-
-#define _mm_wmiatt_si64(acc, m1, m2) \
-  ({\
-   __m64 _acc = acc;\
-   __m64 _m1 = m1;\
-   __m64 _m2 = m2;\
-   _acc = (__m64) __builtin_arm_wmiatt (_acc, (__v4hi)_m1, (__v4hi)_m2);\
-   _acc;\
-   })
-
-#define _mm_wmiattn_si64(acc, m1, m2) \
-  ({\
-   __m64 _acc = acc;\
-   __m64 _m1 = m1;\
-   __m64 _m2 = m2;\
-   _acc = (__m64) __builtin_arm_wmiattn (_acc, (__v4hi)_m1, (__v4hi)_m2);\
-   _acc;\
-   })
-
-#define _mm_wmiawbb_si64(acc, m1, m2) \
-  ({\
-   __m64 _acc = acc;\
-   __m64 _m1 = m1;\
-   __m64 _m2 = m2;\
-   _acc = (__m64) __builtin_arm_wmiawbb (_acc, (__v2si)_m1, (__v2si)_m2);\
-   _acc;\
-   })
-
-#define _mm_wmiawbbn_si64(acc, m1, m2) \
-  ({\
-   __m64 _acc = acc;\
-   __m64 _m1 = m1;\
-   __m64 _m2 = m2;\
-   _acc = (__m64) __builtin_arm_wmiawbbn (_acc, (__v2si)_m1, (__v2si)_m2);\
-   _acc;\
-   })
-
-#define _mm_wmiawbt_si64(acc, m1, m2) \
-  ({\
-   __m64 _acc = acc;\
-   __m64 _m1 = m1;\
-   __m64 _m2 = m2;\
-   _acc = (__m64) __builtin_arm_wmiawbt (_acc, (__v2si)_m1, (__v2si)_m2);\
-   _acc;\
-   })
-
-#define _mm_wmiawbtn_si64(acc, m1, m2) \
-  ({\
-   __m64 _acc = acc;\
-   __m64 _m1 = m1;\
-   __m64 _m2 = m2;\
-   _acc = (__m64) __builtin_arm_wmiawbtn (_acc, (__v2si)_m1, (__v2si)_m2);\
-   _acc;\
-   })
-
-#define _mm_wmiawtb_si64(acc, m1, m2) \
-  ({\
-   __m64 _acc = acc;\
-   __m64 _m1 = m1;\
-   __m64 _m2 = m2;\
-   _acc = (__m64) __builtin_arm_wmiawtb (_acc, (__v2si)_m1, (__v2si)_m2);\
-   _acc;\
-   })
-
-#define _mm_wmiawtbn_si64(acc, m1, m2) \
-  ({\
-   __m64 _acc = acc;\
-   __m64 _m1 = m1;\
-   __m64 _m2 = m2;\
-   _acc = (__m64) __builtin_arm_wmiawtbn (_acc, (__v2si)_m1, (__v2si)_m2);\
-   _acc;\
-   })
-
-#define _mm_wmiawtt_si64(acc, m1, m2) \
-  ({\
-   __m64 _acc = acc;\
-   __m64 _m1 = m1;\
-   __m64 _m2 = m2;\
-   _acc = (__m64) __builtin_arm_wmiawtt (_acc, (__v2si)_m1, (__v2si)_m2);\
-   _acc;\
-   })
-
-#define _mm_wmiawttn_si64(acc, m1, m2) \
-  ({\
-   __m64 _acc = acc;\
-   __m64 _m1 = m1;\
-   __m64 _m2 = m2;\
-   _acc = (__m64) __builtin_arm_wmiawttn (_acc, (__v2si)_m1, (__v2si)_m2);\
-   _acc;\
-   })
-
-/* The third arguments should be an immediate.  */
-#define _mm_merge_si64(a, b, n) \
-  ({\
-   __m64 result;\
-   result = (__m64) __builtin_arm_wmerge ((__m64) (a), (__m64) (b), (n));\
-   result;\
-   })
-#endif  /* __IWMMXT2__ */
-
-static __inline __m64
-_mm_alignr0_si64 (__m64 a, __m64 b)
-{
-  return (__m64) __builtin_arm_walignr0 ((__v8qi) a, (__v8qi) b);
-}
-
-static __inline __m64
-_mm_alignr1_si64 (__m64 a, __m64 b)
-{
-  return (__m64) __builtin_arm_walignr1 ((__v8qi) a, (__v8qi) b);
-}
-
-static __inline __m64
-_mm_alignr2_si64 (__m64 a, __m64 b)
-{
-  return (__m64) __builtin_arm_walignr2 ((__v8qi) a, (__v8qi) b);
-}
-
-static __inline __m64
-_mm_alignr3_si64 (__m64 a, __m64 b)
-{
-  return (__m64) __builtin_arm_walignr3 ((__v8qi) a, (__v8qi) b);
-}
-
-static __inline void
-_mm_tandcb ()
-{
-  __asm __volatile ("tandcb r15");
-}
-
-static __inline void
-_mm_tandch ()
-{
-  __asm __volatile ("tandch r15");
-}
-
-static __inline void
-_mm_tandcw ()
-{
-  __asm __volatile ("tandcw r15");
-}
-
-#define _mm_textrcb(n) \
-  ({\
-   __asm__ __volatile__ (\
-     "textrcb r15, %0" : : "i" (n));\
-   })
-
-#define _mm_textrch(n) \
-  ({\
-   __asm__ __volatile__ (\
-     "textrch r15, %0" : : "i" (n));\
-   })
-
-#define _mm_textrcw(n) \
-  ({\
-   __asm__ __volatile__ (\
-     "textrcw r15, %0" : : "i" (n));\
-   })
-
-static __inline void
-_mm_torcb ()
-{
-  __asm __volatile ("torcb r15");
-}
-
-static __inline void
-_mm_torch ()
-{
-  __asm __volatile ("torch r15");
-}
-
-static __inline void
-_mm_torcw ()
-{
-  __asm __volatile ("torcw r15");
-}
-
-#ifdef __IWMMXT2__
-static __inline void
-_mm_torvscb ()
-{
-  __asm __volatile ("torvscb r15");
-}
-
-static __inline void
-_mm_torvsch ()
-{
-  __asm __volatile ("torvsch r15");
-}
-
-static __inline void
-_mm_torvscw ()
-{
-  __asm __volatile ("torvscw r15");
-}
-#endif /* __IWMMXT2__ */
-
-static __inline __m64
-_mm_tbcst_pi8 (int value)
-{
-  return (__m64) __builtin_arm_tbcstb ((signed char) value);
-}
-
-static __inline __m64
-_mm_tbcst_pi16 (int value)
-{
-  return (__m64) __builtin_arm_tbcsth ((short) value);
-}
-
-static __inline __m64
-_mm_tbcst_pi32 (int value)
-{
-  return (__m64) __builtin_arm_tbcstw (value);
-}
-
-#define _m_empty _mm_empty
-#define _m_packsswb _mm_packs_pi16
-#define _m_packssdw _mm_packs_pi32
-#define _m_packuswb _mm_packs_pu16
-#define _m_packusdw _mm_packs_pu32
-#define _m_packssqd _mm_packs_pi64
-#define _m_packusqd _mm_packs_pu64
-#define _mm_packs_si64 _mm_packs_pi64
-#define _mm_packs_su64 _mm_packs_pu64
-#define _m_punpckhbw _mm_unpackhi_pi8
-#define _m_punpckhwd _mm_unpackhi_pi16
-#define _m_punpckhdq _mm_unpackhi_pi32
-#define _m_punpcklbw _mm_unpacklo_pi8
-#define _m_punpcklwd _mm_unpacklo_pi16
-#define _m_punpckldq _mm_unpacklo_pi32
-#define _m_punpckehsbw _mm_unpackeh_pi8
-#define _m_punpckehswd _mm_unpackeh_pi16
-#define _m_punpckehsdq _mm_unpackeh_pi32
-#define _m_punpckehubw _mm_unpackeh_pu8
-#define _m_punpckehuwd _mm_unpackeh_pu16
-#define _m_punpckehudq _mm_unpackeh_pu32
-#define _m_punpckelsbw _mm_unpackel_pi8
-#define _m_punpckelswd _mm_unpackel_pi16
-#define _m_punpckelsdq _mm_unpackel_pi32
-#define _m_punpckelubw _mm_unpackel_pu8
-#define _m_punpckeluwd _mm_unpackel_pu16
-#define _m_punpckeludq _mm_unpackel_pu32
-#define _m_paddb _mm_add_pi8
-#define _m_paddw _mm_add_pi16
-#define _m_paddd _mm_add_pi32
-#define _m_paddsb _mm_adds_pi8
-#define _m_paddsw _mm_adds_pi16
-#define _m_paddsd _mm_adds_pi32
-#define _m_paddusb _mm_adds_pu8
-#define _m_paddusw _mm_adds_pu16
-#define _m_paddusd _mm_adds_pu32
-#define _m_psubb _mm_sub_pi8
-#define _m_psubw _mm_sub_pi16
-#define _m_psubd _mm_sub_pi32
-#define _m_psubsb _mm_subs_pi8
-#define _m_psubsw _mm_subs_pi16
-#define _m_psubuw _mm_subs_pi32
-#define _m_psubusb _mm_subs_pu8
-#define _m_psubusw _mm_subs_pu16
-#define _m_psubusd _mm_subs_pu32
-#define _m_pmaddwd _mm_madd_pi16
-#define _m_pmadduwd _mm_madd_pu16
-#define _m_pmulhw _mm_mulhi_pi16
-#define _m_pmulhuw _mm_mulhi_pu16
-#define _m_pmullw _mm_mullo_pi16
-#define _m_pmacsw _mm_mac_pi16
-#define _m_pmacuw _mm_mac_pu16
-#define _m_pmacszw _mm_macz_pi16
-#define _m_pmacuzw _mm_macz_pu16
-#define _m_paccb _mm_acc_pu8
-#define _m_paccw _mm_acc_pu16
-#define _m_paccd _mm_acc_pu32
-#define _m_pmia _mm_mia_si64
-#define _m_pmiaph _mm_miaph_si64
-#define _m_pmiabb _mm_miabb_si64
-#define _m_pmiabt _mm_miabt_si64
-#define _m_pmiatb _mm_miatb_si64
-#define _m_pmiatt _mm_miatt_si64
-#define _m_psllw _mm_sll_pi16
-#define _m_psllwi _mm_slli_pi16
-#define _m_pslld _mm_sll_pi32
-#define _m_pslldi _mm_slli_pi32
-#define _m_psllq _mm_sll_si64
-#define _m_psllqi _mm_slli_si64
-#define _m_psraw _mm_sra_pi16
-#define _m_psrawi _mm_srai_pi16
-#define _m_psrad _mm_sra_pi32
-#define _m_psradi _mm_srai_pi32
-#define _m_psraq _mm_sra_si64
-#define _m_psraqi _mm_srai_si64
-#define _m_psrlw _mm_srl_pi16
-#define _m_psrlwi _mm_srli_pi16
-#define _m_psrld _mm_srl_pi32
-#define _m_psrldi _mm_srli_pi32
-#define _m_psrlq _mm_srl_si64
-#define _m_psrlqi _mm_srli_si64
-#define _m_prorw _mm_ror_pi16
-#define _m_prorwi _mm_rori_pi16
-#define _m_prord _mm_ror_pi32
-#define _m_prordi _mm_rori_pi32
-#define _m_prorq _mm_ror_si64
-#define _m_prorqi _mm_rori_si64
-#define _m_pand _mm_and_si64
-#define _m_pandn _mm_andnot_si64
-#define _m_por _mm_or_si64
-#define _m_pxor _mm_xor_si64
-#define _m_pcmpeqb _mm_cmpeq_pi8
-#define _m_pcmpeqw _mm_cmpeq_pi16
-#define _m_pcmpeqd _mm_cmpeq_pi32
-#define _m_pcmpgtb _mm_cmpgt_pi8
-#define _m_pcmpgtub _mm_cmpgt_pu8
-#define _m_pcmpgtw _mm_cmpgt_pi16
-#define _m_pcmpgtuw _mm_cmpgt_pu16
-#define _m_pcmpgtd _mm_cmpgt_pi32
-#define _m_pcmpgtud _mm_cmpgt_pu32
-#define _m_pextrb _mm_extract_pi8
-#define _m_pextrw _mm_extract_pi16
-#define _m_pextrd _mm_extract_pi32
-#define _m_pextrub _mm_extract_pu8
-#define _m_pextruw _mm_extract_pu16
-#define _m_pextrud _mm_extract_pu32
-#define _m_pinsrb _mm_insert_pi8
-#define _m_pinsrw _mm_insert_pi16
-#define _m_pinsrd _mm_insert_pi32
-#define _m_pmaxsb _mm_max_pi8
-#define _m_pmaxsw _mm_max_pi16
-#define _m_pmaxsd _mm_max_pi32
-#define _m_pmaxub _mm_max_pu8
-#define _m_pmaxuw _mm_max_pu16
-#define _m_pmaxud _mm_max_pu32
-#define _m_pminsb _mm_min_pi8
-#define _m_pminsw _mm_min_pi16
-#define _m_pminsd _mm_min_pi32
-#define _m_pminub _mm_min_pu8
-#define _m_pminuw _mm_min_pu16
-#define _m_pminud _mm_min_pu32
-#define _m_pmovmskb _mm_movemask_pi8
-#define _m_pmovmskw _mm_movemask_pi16
-#define _m_pmovmskd _mm_movemask_pi32
-#define _m_pshufw _mm_shuffle_pi16
-#define _m_pavgb _mm_avg_pu8
-#define _m_pavgw _mm_avg_pu16
-#define _m_pavg2b _mm_avg2_pu8
-#define _m_pavg2w _mm_avg2_pu16
-#define _m_psadbw _mm_sad_pu8
-#define _m_psadwd _mm_sad_pu16
-#define _m_psadzbw _mm_sadz_pu8
-#define _m_psadzwd _mm_sadz_pu16
-#define _m_paligniq _mm_align_si64
-#define _m_cvt_si2pi _mm_cvtsi64_m64
-#define _m_cvt_pi2si _mm_cvtm64_si64
-#define _m_from_int _mm_cvtsi32_si64
-#define _m_to_int _mm_cvtsi64_si32
-
-#if defined __cplusplus
-}; /* End "C" */
-#endif /* __cplusplus */
+#error WMMX/WMMX2 intrinsics are no-longer supported
 
 #endif /* _MMINTRIN_H_INCLUDED */
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index 706a45c..8527bd7 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -1,5 +1,5 @@
 ;; Arm M-profile Vector Extension Machine Description
-;; Copyright (C) 2019-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2019-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
@@ -94,26 +94,30 @@
    (set_attr "thumb2_pool_range" "*,*,*,*,1018,*,*,*")
    (set_attr "neg_pool_range" "*,*,*,*,996,*,*,*")])
 
-(define_insn "mve_vdup<mode>"
-  [(set (match_operand:MVE_vecs 0 "s_register_operand" "=w")
-	(vec_duplicate:MVE_vecs
+;;
+;; [vdupq_n_u, vdupq_n_s, vdupq_n_f]
+;;
+(define_insn "@mve_vdupq_n<mode>"
+  [(set (match_operand:MVE_VLD_ST 0 "s_register_operand" "=w")
+	(vec_duplicate:MVE_VLD_ST
 	  (match_operand:<V_elem> 1 "s_register_operand" "r")))]
   "TARGET_HAVE_MVE || TARGET_HAVE_MVE_FLOAT"
   "vdup.<V_sz_elem>\t%q0, %1"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vdup<mode>"))
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vdupq_n<mode>"))
   (set_attr "length" "4")
    (set_attr "type" "mve_move")])
 
 ;;
 ;; [vst4q])
 ;;
-(define_insn "mve_vst4q<mode>"
-  [(set (match_operand:XI 0 "mve_struct_operand" "=Ug")
-	(unspec:XI [(match_operand:XI 1 "s_register_operand" "w")
-		    (unspec:MVE_VLD_ST [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+(define_insn "@mve_vst4q<mode>"
+  [(set (match_operand:<MVE_VLD4_VST4> 0 "mve_struct_operand" "=Ug")
+	(unspec:<MVE_VLD4_VST4>
+		[(match_operand:<MVE_VLD4_VST4> 1 "s_register_operand" "w")
+		 (unspec:MVE_VLD_ST [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
 	 VST4Q))
   ]
-  "TARGET_HAVE_MVE"
+  "(TARGET_HAVE_MVE && VALID_MVE_STRUCT_MODE (<MVE_VLD4_VST4>mode))"
 {
    rtx ops[6];
    int regno = REGNO (operands[1]);
@@ -189,21 +193,6 @@
 ])
 
 ;;
-;; [vdupq_n_f])
-;;
-(define_insn "@mve_<mve_insn>q_n_f<mode>"
-  [
-   (set (match_operand:MVE_0 0 "s_register_operand" "=w")
-	(unspec:MVE_0 [(match_operand:<V_elem> 1 "s_register_operand" "r")]
-	 MVE_FP_N_VDUPQ_ONLY))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "<mve_insn>.%#<V_sz_elem>\t%q0, %1"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_f<mode>"))
-  (set_attr "type" "mve_move")
-])
-
-;;
 ;; [vrev32q_f])
 ;;
 (define_insn "@mve_<mve_insn>q_f<mode>"
@@ -217,48 +206,35 @@
  [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_f<mode>"))
   (set_attr "type" "mve_move")
 ])
-;;
-;; [vcvttq_f32_f16])
-;;
-(define_insn "mve_vcvttq_f32_f16v4sf"
-  [
-   (set (match_operand:V4SF 0 "s_register_operand" "=w")
-	(unspec:V4SF [(match_operand:V8HF 1 "s_register_operand" "w")]
-	 VCVTTQ_F32_F16))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vcvtt.f32.f16\t%q0, %q1"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvttq_f32_f16v4sf"))
-  (set_attr "type" "mve_move")
-])
 
 ;;
-;; [vcvtbq_f32_f16])
+;; [vcvtbq_f32_f16]
+;; [vcvttq_f32_f16]
 ;;
-(define_insn "mve_vcvtbq_f32_f16v4sf"
+(define_insn "@mve_<mve_insn>q_f32_f16v4sf"
   [
    (set (match_operand:V4SF 0 "s_register_operand" "=w")
 	(unspec:V4SF [(match_operand:V8HF 1 "s_register_operand" "w")]
-	 VCVTBQ_F32_F16))
+	 VCVTxQ_F32_F16))
   ]
   "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vcvtb.f32.f16\t%q0, %q1"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtbq_f32_f16v4sf"))
+  "<mve_insn>.f32.f16\t%q0, %q1"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_f32_f16v4sf"))
   (set_attr "type" "mve_move")
 ])
 
 ;;
-;; [vcvtq_to_f_s, vcvtq_to_f_u])
+;; [vcvtq_to_f_s, vcvtq_to_f_u]
 ;;
-(define_insn "mve_vcvtq_to_f_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_to_f_<supf><mode>"
   [
    (set (match_operand:MVE_0 0 "s_register_operand" "=w")
 	(unspec:MVE_0 [(match_operand:<MVE_CNVT> 1 "s_register_operand" "w")]
 	 VCVTQ_TO_F))
   ]
   "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vcvt.f%#<V_sz_elem>.<supf>%#<V_sz_elem>\t%q0, %q1"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtq_to_f_<supf><mode>"))
+  "<mve_insn>.f%#<V_sz_elem>.<supf>%#<V_sz_elem>\t%q0, %q1"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_to_f_<supf><mode>"))
   (set_attr "type" "mve_move")
 ])
 
@@ -278,17 +254,17 @@
 ])
 
 ;;
-;; [vcvtq_from_f_s, vcvtq_from_f_u])
+;; [vcvtq_from_f_s, vcvtq_from_f_u]
 ;;
-(define_insn "mve_vcvtq_from_f_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_from_f_<supf><mode>"
   [
    (set (match_operand:MVE_5 0 "s_register_operand" "=w")
 	(unspec:MVE_5 [(match_operand:<MVE_CNVT> 1 "s_register_operand" "w")]
 	 VCVTQ_FROM_F))
   ]
   "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vcvt.<supf>%#<V_sz_elem>.f%#<V_sz_elem>\t%q0, %q1"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtq_from_f_<supf><mode>"))
+  "<mve_insn>.<supf>%#<V_sz_elem>.f%#<V_sz_elem>\t%q0, %q1"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_from_f_<supf><mode>"))
   (set_attr "type" "mve_move")
 ])
 
@@ -329,21 +305,6 @@
 )
 
 ;;
-;; [vdupq_n_u, vdupq_n_s])
-;;
-(define_insn "@mve_<mve_insn>q_n_<supf><mode>"
-  [
-   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
-	(unspec:MVE_2 [(match_operand:<V_elem> 1 "s_register_operand" "r")]
-	 VDUPQ_N))
-  ]
-  "TARGET_HAVE_MVE"
-  "<mve_insn>.%#<V_sz_elem>\t%q0, %1"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
-  (set_attr "type" "mve_move")
-])
-
-;;
 ;; [vclzq_u, vclzq_s])
 ;;
 (define_insn "@mve_vclzq_s<mode>"
@@ -429,62 +390,20 @@
 ])
 
 ;;
-;; [vcvtpq_s, vcvtpq_u])
-;;
-(define_insn "mve_vcvtpq_<supf><mode>"
-  [
-   (set (match_operand:MVE_5 0 "s_register_operand" "=w")
-	(unspec:MVE_5 [(match_operand:<MVE_CNVT> 1 "s_register_operand" "w")]
-	 VCVTPQ))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vcvtp.<supf>%#<V_sz_elem>.f%#<V_sz_elem>\t%q0, %q1"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtpq_<supf><mode>"))
-  (set_attr "type" "mve_move")
-])
-
-;;
-;; [vcvtnq_s, vcvtnq_u])
-;;
-(define_insn "mve_vcvtnq_<supf><mode>"
-  [
-   (set (match_operand:MVE_5 0 "s_register_operand" "=w")
-	(unspec:MVE_5 [(match_operand:<MVE_CNVT> 1 "s_register_operand" "w")]
-	 VCVTNQ))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vcvtn.<supf>%#<V_sz_elem>.f%#<V_sz_elem>\t%q0, %q1"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtnq_<supf><mode>"))
-  (set_attr "type" "mve_move")
-])
-
-;;
-;; [vcvtmq_s, vcvtmq_u])
-;;
-(define_insn "mve_vcvtmq_<supf><mode>"
-  [
-   (set (match_operand:MVE_5 0 "s_register_operand" "=w")
-	(unspec:MVE_5 [(match_operand:<MVE_CNVT> 1 "s_register_operand" "w")]
-	 VCVTMQ))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vcvtm.<supf>%#<V_sz_elem>.f%#<V_sz_elem>\t%q0, %q1"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtmq_<supf><mode>"))
-  (set_attr "type" "mve_move")
-])
-
-;;
-;; [vcvtaq_u, vcvtaq_s])
+;; [vcvtaq_u, vcvtaq_s]
+;; [vcvtmq_s, vcvtmq_u]
+;; [vcvtnq_s, vcvtnq_u]
+;; [vcvtpq_s, vcvtpq_u]
 ;;
-(define_insn "mve_vcvtaq_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_<supf><mode>"
   [
    (set (match_operand:MVE_5 0 "s_register_operand" "=w")
 	(unspec:MVE_5 [(match_operand:<MVE_CNVT> 1 "s_register_operand" "w")]
-	 VCVTAQ))
+	 VCVTxQ))
   ]
   "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vcvta.<supf>%#<V_sz_elem>.f%#<V_sz_elem>\t%q0, %q1"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtaq_<supf><mode>"))
+  "<mve_insn>.<supf>%#<V_sz_elem>.f%#<V_sz_elem>\t%q0, %q1"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
   (set_attr "type" "mve_move")
 ])
 
@@ -537,7 +456,7 @@
 ;;
 ;; [vctp8q vctp16q vctp32q vctp64q])
 ;;
-(define_insn "mve_vctp<MVE_vctp>q<MVE_vpred>"
+(define_insn "@mve_vctp<MVE_vctp>q<MVE_vpred>"
   [
    (set (match_operand:MVE_7 0 "vpr_register_operand" "=Up")
 	(unspec:MVE_7 [(match_operand:SI 1 "s_register_operand" "r")]
@@ -581,9 +500,9 @@
 ])
 
 ;;
-;; [vcvtq_n_to_f_s, vcvtq_n_to_f_u])
+;; [vcvtq_n_to_f_s, vcvtq_n_to_f_u]
 ;;
-(define_insn "mve_vcvtq_n_to_f_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_n_to_f_<supf><mode>"
   [
    (set (match_operand:MVE_0 0 "s_register_operand" "=w")
 	(unspec:MVE_0 [(match_operand:<MVE_CNVT> 1 "s_register_operand" "w")
@@ -591,8 +510,8 @@
 	 VCVTQ_N_TO_F))
   ]
   "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vcvt.f<V_sz_elem>.<supf><V_sz_elem>\t%q0, %q1, %2"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtq_n_to_f_<supf><mode>"))
+  "<mve_insn>.f<V_sz_elem>.<supf><V_sz_elem>\t%q0, %q1, %2"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_to_f_<supf><mode>"))
   (set_attr "type" "mve_move")
 ])
 
@@ -679,9 +598,9 @@
 ])
 
 ;;
-;; [vcvtq_n_from_f_s, vcvtq_n_from_f_u])
+;; [vcvtq_n_from_f_s, vcvtq_n_from_f_u]
 ;;
-(define_insn "mve_vcvtq_n_from_f_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_n_from_f_<supf><mode>"
   [
    (set (match_operand:MVE_5 0 "s_register_operand" "=w")
 	(unspec:MVE_5 [(match_operand:<MVE_CNVT> 1 "s_register_operand" "w")
@@ -689,8 +608,8 @@
 	 VCVTQ_N_FROM_F))
   ]
   "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vcvt.<supf><V_sz_elem>.f<V_sz_elem>\t%q0, %q1, %2"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtq_n_from_f_<supf><mode>"))
+  "<mve_insn>.<supf><V_sz_elem>.f<V_sz_elem>\t%q0, %q1, %2"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_from_f_<supf><mode>"))
   (set_attr "type" "mve_move")
 ])
 
@@ -858,7 +777,7 @@
 ;;
 ;; [vbicq_s, vbicq_u])
 ;;
-(define_insn "mve_vbicq_u<mode>"
+(define_insn "@mve_vbicq_u<mode>"
   [
    (set (match_operand:MVE_2 0 "s_register_operand" "=w")
 	(and:MVE_2 (not:MVE_2 (match_operand:MVE_2 2 "s_register_operand" "w"))
@@ -870,7 +789,7 @@
   (set_attr "type" "mve_move")
 ])
 
-(define_expand "mve_vbicq_s<mode>"
+(define_expand "@mve_vbicq_s<mode>"
   [
    (set (match_operand:MVE_2 0 "s_register_operand")
 	(and:MVE_2 (not:MVE_2 (match_operand:MVE_2 2 "s_register_operand"))
@@ -1076,9 +995,9 @@
 ])
 
 ;;
-;; [vornq_u, vornq_s])
+;; [vornq_u, vornq_s]
 ;;
-(define_insn "mve_vornq_s<mode>"
+(define_insn "@mve_vornq_s<mode>"
   [
    (set (match_operand:MVE_2 0 "s_register_operand" "=w")
 	(ior:MVE_2 (not:MVE_2 (match_operand:MVE_2 2 "s_register_operand" "w"))
@@ -1090,7 +1009,7 @@
   (set_attr "type" "mve_move")
 ])
 
-(define_expand "mve_vornq_u<mode>"
+(define_expand "@mve_vornq_u<mode>"
   [
    (set (match_operand:MVE_2 0 "s_register_operand")
 	(ior:MVE_2 (not:MVE_2 (match_operand:MVE_2 2 "s_register_operand"))
@@ -1264,7 +1183,7 @@
 ;;
 ;; [vbicq_f])
 ;;
-(define_insn "mve_vbicq_f<mode>"
+(define_insn "@mve_vbicq_f<mode>"
   [
    (set (match_operand:MVE_0 0 "s_register_operand" "=w")
 	(and:MVE_0 (not:MVE_0 (match_operand:MVE_0 1 "s_register_operand" "w"))
@@ -1327,7 +1246,7 @@
 ;;
 ;; [vctp8q_m vctp16q_m vctp32q_m vctp64q_m])
 ;;
-(define_insn "mve_vctp<MVE_vctp>q_m<MVE_vpred>"
+(define_insn "@mve_vctp<MVE_vctp>q_m<MVE_vpred>"
   [
    (set (match_operand:MVE_7 0 "vpr_register_operand" "=Up")
 	(unspec:MVE_7 [(match_operand:SI 1 "s_register_operand" "r")
@@ -1342,34 +1261,19 @@
 ])
 
 ;;
-;; [vcvtbq_f16_f32])
+;; [vcvtbq_f16_f32]
+;; [vcvttq_f16_f32]
 ;;
-(define_insn "mve_vcvtbq_f16_f32v8hf"
+(define_insn "@mve_<mve_insn>q_f16_f32v8hf"
   [
    (set (match_operand:V8HF 0 "s_register_operand" "=w")
 	(unspec:V8HF [(match_operand:V8HF 1 "s_register_operand" "0")
 		      (match_operand:V4SF 2 "s_register_operand" "w")]
-	 VCVTBQ_F16_F32))
+	 VCVTxQ_F16_F32))
   ]
   "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vcvtb.f16.f32\t%q0, %q2"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtbq_f16_f32v8hf"))
-  (set_attr "type" "mve_move")
-])
-
-;;
-;; [vcvttq_f16_f32])
-;;
-(define_insn "mve_vcvttq_f16_f32v8hf"
-  [
-   (set (match_operand:V8HF 0 "s_register_operand" "=w")
-	(unspec:V8HF [(match_operand:V8HF 1 "s_register_operand" "0")
-		      (match_operand:V4SF 2 "s_register_operand" "w")]
-	 VCVTTQ_F16_F32))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vcvtt.f16.f32\t%q0, %q2"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvttq_f16_f32v8hf"))
+  "<mve_insn>.f16.f32\t%q0, %q2"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_f16_f32v8hf"))
   (set_attr "type" "mve_move")
 ])
 
@@ -1499,9 +1403,9 @@
 ])
 
 ;;
-;; [vornq_f])
+;; [vornq_f]
 ;;
-(define_insn "mve_vornq_f<mode>"
+(define_insn "@mve_vornq_f<mode>"
   [
    (set (match_operand:MVE_0 0 "s_register_operand" "=w")
 	(ior:MVE_0 (not:MVE_0 (match_operand:MVE_0 2 "s_register_operand" "w"))
@@ -1655,26 +1559,29 @@
   (set_attr "length""8")])
 
 ;;
-;; [vcvtaq_m_u, vcvtaq_m_s])
+;; [vcvtaq_m_u, vcvtaq_m_s]
+;; [vcvtmq_m_s, vcvtmq_m_u]
+;; [vcvtnq_m_s, vcvtnq_m_u]
+;; [vcvtpq_m_u, vcvtpq_m_s]
 ;;
-(define_insn "mve_vcvtaq_m_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_m_<supf><mode>"
   [
    (set (match_operand:MVE_5 0 "s_register_operand" "=w")
 	(unspec:MVE_5 [(match_operand:MVE_5 1 "s_register_operand" "0")
 		       (match_operand:<MVE_CNVT> 2 "s_register_operand" "w")
 		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VCVTAQ_M))
+	 VCVTxQ_M))
   ]
   "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vpst\;vcvtat.<supf>%#<V_sz_elem>.f%#<V_sz_elem>\t%q0, %q2"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtaq_<supf><mode>"))
+  "vpst\;<mve_insn>t.<supf>%#<V_sz_elem>.f%#<V_sz_elem>\t%q0, %q2"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf><mode>"))
   (set_attr "type" "mve_move")
   (set_attr "length""8")])
 
 ;;
-;; [vcvtq_m_to_f_s, vcvtq_m_to_f_u])
+;; [vcvtq_m_to_f_s, vcvtq_m_to_f_u]
 ;;
-(define_insn "mve_vcvtq_m_to_f_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_m_to_f_<supf><mode>"
   [
    (set (match_operand:MVE_0 0 "s_register_operand" "=w")
 	(unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
@@ -1683,8 +1590,8 @@
 	 VCVTQ_M_TO_F))
   ]
   "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vpst\;vcvtt.f%#<V_sz_elem>.<supf>%#<V_sz_elem>\t%q0, %q2"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtq_to_f_<supf><mode>"))
+  "vpst\;<mve_insn>t.f%#<V_sz_elem>.<supf>%#<V_sz_elem>\t%q0, %q2"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_to_f_<supf><mode>"))
   (set_attr "type" "mve_move")
   (set_attr "length""8")])
 
@@ -1758,35 +1665,7 @@
 ;;
 ;; [vshlcq_u vshlcq_s]
 ;;
-(define_expand "mve_vshlcq_vec_<supf><mode>"
- [(match_operand:MVE_2 0 "s_register_operand")
-  (match_operand:MVE_2 1 "s_register_operand")
-  (match_operand:SI 2 "s_register_operand")
-  (match_operand:SI 3 "mve_imm_32")
-  (unspec:MVE_2 [(const_int 0)] VSHLCQ)]
- "TARGET_HAVE_MVE"
-{
-  rtx ignore_wb = gen_reg_rtx (SImode);
-  emit_insn(gen_mve_vshlcq_<supf><mode>(operands[0], ignore_wb, operands[1],
-				      operands[2], operands[3]));
-  DONE;
-})
-
-(define_expand "mve_vshlcq_carry_<supf><mode>"
- [(match_operand:SI 0 "s_register_operand")
-  (match_operand:MVE_2 1 "s_register_operand")
-  (match_operand:SI 2 "s_register_operand")
-  (match_operand:SI 3 "mve_imm_32")
-  (unspec:MVE_2 [(const_int 0)] VSHLCQ)]
- "TARGET_HAVE_MVE"
-{
-  rtx ignore_vec = gen_reg_rtx (<MODE>mode);
-  emit_insn(gen_mve_vshlcq_<supf><mode>(ignore_vec, operands[0], operands[1],
-				      operands[2], operands[3]));
-  DONE;
-})
-
-(define_insn "mve_vshlcq_<supf><mode>"
+(define_insn "@mve_vshlcq_<supf><mode>"
  [(set (match_operand:MVE_2 0 "s_register_operand" "=w")
        (unspec:MVE_2 [(match_operand:MVE_2 2 "s_register_operand" "0")
 		      (match_operand:SI 3 "s_register_operand" "1")
@@ -1903,7 +1782,7 @@
   ]
   "TARGET_HAVE_MVE"
   "vpst\;<mve_insn>t.%#<V_sz_elem>\t%q0, %2"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_<supf><mode>"))
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n<mode>"))
   (set_attr "type" "mve_move")
    (set_attr "length""8")])
 
@@ -2237,74 +2116,42 @@
    (set_attr "length""8")])
 
 ;;
-;; [vcvtbq_m_f16_f32])
+;; [vcvtbq_m_f16_f32]
+;; [vcvttq_m_f16_f32]
 ;;
-(define_insn "mve_vcvtbq_m_f16_f32v8hf"
+(define_insn "@mve_<mve_insn>q_m_f16_f32v8hf"
   [
    (set (match_operand:V8HF 0 "s_register_operand" "=w")
 	(unspec:V8HF [(match_operand:V8HF 1 "s_register_operand" "0")
 		       (match_operand:V4SF 2 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VCVTBQ_M_F16_F32))
+		       (match_operand:V4BI 3 "vpr_register_operand" "Up")]
+	 VCVTxQ_M_F16_F32))
   ]
   "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vpst\;vcvtbt.f16.f32\t%q0, %q2"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtbq_f16_f32v8hf"))
+  "vpst\;<mve_insn>t.f16.f32\t%q0, %q2"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_f16_f32v8hf"))
   (set_attr "type" "mve_move")
    (set_attr "length""8")])
 
 ;;
-;; [vcvtbq_m_f32_f16])
+;; [vcvtbq_m_f32_f16]
+;; [vcvttq_m_f32_f16]
 ;;
-(define_insn "mve_vcvtbq_m_f32_f16v4sf"
+(define_insn "@mve_<mve_insn>q_m_f32_f16v4sf"
   [
    (set (match_operand:V4SF 0 "s_register_operand" "=w")
 	(unspec:V4SF [(match_operand:V4SF 1 "s_register_operand" "0")
 		       (match_operand:V8HF 2 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VCVTBQ_M_F32_F16))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vpst\;vcvtbt.f32.f16\t%q0, %q2"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtbq_f32_f16v4sf"))
-  (set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
-;; [vcvttq_m_f16_f32])
-;;
-(define_insn "mve_vcvttq_m_f16_f32v8hf"
-  [
-   (set (match_operand:V8HF 0 "s_register_operand" "=w")
-	(unspec:V8HF [(match_operand:V8HF 1 "s_register_operand" "0")
-		       (match_operand:V4SF 2 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VCVTTQ_M_F16_F32))
+		       (match_operand:V8BI 3 "vpr_register_operand" "Up")]
+	 VCVTxQ_M_F32_F16))
   ]
   "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vpst\;vcvttt.f16.f32\t%q0, %q2"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvttq_f16_f32v8hf"))
+  "vpst\;<mve_insn>t.f32.f16\t%q0, %q2"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_f32_f16v4sf"))
   (set_attr "type" "mve_move")
    (set_attr "length""8")])
 
 ;;
-;; [vcvttq_m_f32_f16])
-;;
-(define_insn "mve_vcvttq_m_f32_f16v4sf"
-  [
-   (set (match_operand:V4SF 0 "s_register_operand" "=w")
-	(unspec:V4SF [(match_operand:V4SF 1 "s_register_operand" "0")
-		       (match_operand:V8HF 2 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VCVTTQ_M_F32_F16))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vpst\;vcvttt.f32.f16\t%q0, %q2"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvttq_f32_f16v4sf"))
-  (set_attr "type" "mve_move")
-  (set_attr "length""8")])
-
-;;
 ;; [vdupq_m_n_f])
 ;;
 (define_insn "@mve_<mve_insn>q_m_n_f<mode>"
@@ -2317,7 +2164,7 @@
   ]
   "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
   "vpst\;<mve_insn>t.%#<V_sz_elem>\t%q0, %2"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_f<mode>"))
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n<mode>"))
   (set_attr "type" "mve_move")
    (set_attr "length""8")])
 
@@ -2599,61 +2446,11 @@
   (set_attr "type" "mve_move")
    (set_attr "length""8")])
 
-;;
-;; [vcvtmq_m_s, vcvtmq_m_u])
-;;
-(define_insn "mve_vcvtmq_m_<supf><mode>"
-  [
-   (set (match_operand:MVE_5 0 "s_register_operand" "=w")
-	(unspec:MVE_5 [(match_operand:MVE_5 1 "s_register_operand" "0")
-		       (match_operand:<MVE_CNVT> 2 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VCVTMQ_M))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vpst\;vcvtmt.<supf>%#<V_sz_elem>.f%#<V_sz_elem>\t%q0, %q2"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtmq_<supf><mode>"))
-  (set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
-;; [vcvtpq_m_u, vcvtpq_m_s])
-;;
-(define_insn "mve_vcvtpq_m_<supf><mode>"
-  [
-   (set (match_operand:MVE_5 0 "s_register_operand" "=w")
-	(unspec:MVE_5 [(match_operand:MVE_5 1 "s_register_operand" "0")
-		       (match_operand:<MVE_CNVT> 2 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VCVTPQ_M))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vpst\;vcvtpt.<supf>%#<V_sz_elem>.f%#<V_sz_elem>\t%q0, %q2"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtpq_<supf><mode>"))
-  (set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
-;; [vcvtnq_m_s, vcvtnq_m_u])
-;;
-(define_insn "mve_vcvtnq_m_<supf><mode>"
-  [
-   (set (match_operand:MVE_5 0 "s_register_operand" "=w")
-	(unspec:MVE_5 [(match_operand:MVE_5 1 "s_register_operand" "0")
-		       (match_operand:<MVE_CNVT> 2 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VCVTNQ_M))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vpst\;vcvtnt.<supf>%#<V_sz_elem>.f%#<V_sz_elem>\t%q0, %q2"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtnq_<supf><mode>"))
-  (set_attr "type" "mve_move")
-   (set_attr "length""8")])
 
 ;;
-;; [vcvtq_m_n_from_f_s, vcvtq_m_n_from_f_u])
+;; [vcvtq_m_n_from_f_s, vcvtq_m_n_from_f_u]
 ;;
-(define_insn "mve_vcvtq_m_n_from_f_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_m_n_from_f_<supf><mode>"
   [
    (set (match_operand:MVE_5 0 "s_register_operand" "=w")
 	(unspec:MVE_5 [(match_operand:MVE_5 1 "s_register_operand" "0")
@@ -2663,8 +2460,8 @@
 	 VCVTQ_M_N_FROM_F))
   ]
   "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vpst\;vcvtt.<supf>%#<V_sz_elem>.f%#<V_sz_elem>\t%q0, %q2, %3"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtq_n_from_f_<supf><mode>"))
+  "vpst\;<mve_insn>t.<supf>%#<V_sz_elem>.f%#<V_sz_elem>\t%q0, %q2, %3"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_from_f_<supf><mode>"))
   (set_attr "type" "mve_move")
    (set_attr "length""8")])
 
@@ -2686,9 +2483,9 @@
    (set_attr "length""8")])
 
 ;;
-;; [vcvtq_m_from_f_u, vcvtq_m_from_f_s])
+;; [vcvtq_m_from_f_u, vcvtq_m_from_f_s]
 ;;
-(define_insn "mve_vcvtq_m_from_f_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_m_from_f_<supf><mode>"
   [
    (set (match_operand:MVE_5 0 "s_register_operand" "=w")
 	(unspec:MVE_5 [(match_operand:MVE_5 1 "s_register_operand" "0")
@@ -2697,8 +2494,8 @@
 	 VCVTQ_M_FROM_F))
   ]
   "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vpst\;vcvtt.<supf>%#<V_sz_elem>.f%#<V_sz_elem>\t%q0, %q2"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtq_from_f_<supf><mode>"))
+  "vpst\;<mve_insn>t.<supf>%#<V_sz_elem>.f%#<V_sz_elem>\t%q0, %q2"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_from_f_<supf><mode>"))
   (set_attr "type" "mve_move")
   (set_attr "length""8")])
 
@@ -2757,9 +2554,9 @@
   (set_attr "length" "8")])
 
 ;;
-;; [vcvtq_m_n_to_f_u, vcvtq_m_n_to_f_s])
+;; [vcvtq_m_n_to_f_u, vcvtq_m_n_to_f_s]
 ;;
-(define_insn "mve_vcvtq_m_n_to_f_<supf><mode>"
+(define_insn "@mve_<mve_insn>q_m_n_to_f_<supf><mode>"
   [
    (set (match_operand:MVE_0 0 "s_register_operand" "=w")
 	(unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
@@ -2769,8 +2566,8 @@
 	 VCVTQ_M_N_TO_F))
   ]
   "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vpst\;vcvtt.f%#<V_sz_elem>.<supf>%#<V_sz_elem>\t%q0, %q2, %3"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vcvtq_n_to_f_<supf><mode>"))
+  "vpst\;<mve_insn>t.f%#<V_sz_elem>.<supf>%#<V_sz_elem>\t%q0, %q2, %3"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_n_to_f_<supf><mode>"))
   (set_attr "type" "mve_move")
    (set_attr "length""8")])
 
@@ -2859,6 +2656,7 @@
 ;; [vandq_m_u, vandq_m_s]
 ;; [vbicq_m_u, vbicq_m_s]
 ;; [veorq_m_u, veorq_m_s]
+;; [vornq_m_u, vornq_m_s]
 ;; [vorrq_m_u, vorrq_m_s]
 ;;
 (define_insn "@mve_<mve_insn>q_m_<supf><mode>"
@@ -2986,24 +2784,6 @@
    (set_attr "length""8")])
 
 ;;
-;; [vornq_m_u, vornq_m_s])
-;;
-(define_insn "mve_vornq_m_<supf><mode>"
-  [
-   (set (match_operand:MVE_2 0 "s_register_operand" "=w")
-	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
-		       (match_operand:MVE_2 2 "s_register_operand" "w")
-		       (match_operand:MVE_2 3 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")]
-	 VORNQ_M))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vornt\t%q0, %q2, %q3"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vornq_<supf><mode>"))
-  (set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
 ;; [vqshlq_m_n_s, vqshlq_m_n_u]
 ;; [vshlq_m_n_s, vshlq_m_n_u]
 ;;
@@ -3257,6 +3037,7 @@
 ;; [vandq_m_f]
 ;; [vbicq_m_f]
 ;; [veorq_m_f]
+;; [vornq_m_f]
 ;; [vorrq_m_f]
 ;;
 (define_insn "@mve_<mve_insn>q_m_f<mode>"
@@ -3336,2193 +3117,682 @@
   (set_attr "type" "mve_move")
    (set_attr "length""8")])
 
-;;
-;; [vornq_m_f])
-;;
-(define_insn "mve_vornq_m_f<mode>"
-  [
-   (set (match_operand:MVE_0 0 "s_register_operand" "=w")
-	(unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "0")
-		       (match_operand:MVE_0 2 "s_register_operand" "w")
-		       (match_operand:MVE_0 3 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")]
-	 VORNQ_M_F))
+;; Vector stores
+;; [vstrbq_s8, vstrhq_s16, vstrwq_s32,
+;;  vstrbq_u8, vstrhq_u16, vstrwq_u32,
+;;  vst1q ]
+(define_insn "@mve_vstrq_<mode>"
+  [(set (match_operand:MVE_VLD_ST 0 "mve_memory_operand" "=Ux")
+	(unspec:MVE_VLD_ST
+	  [(match_operand:MVE_VLD_ST 1 "s_register_operand" "w")]
+	  VSTRQ))
   ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vpst\;vornt\t%q0, %q2, %q3"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vornq_f<mode>"))
-  (set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
-;;
-;; [vstrbq_s vstrbq_u]
-;;
-(define_insn "mve_vstrbq_<supf><mode>"
-  [(set (match_operand:<MVE_B_ELEM> 0 "mve_memory_operand" "=Ux")
-	(unspec:<MVE_B_ELEM> [(match_operand:MVE_2 1 "s_register_operand" "w")]
-	 VSTRBQ))
-  ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[2];
-   int regno = REGNO (operands[1]);
-   ops[1] = gen_rtx_REG (TImode, regno);
-   ops[0]  = operands[0];
-   output_asm_insn("vstrb.<V_sz_elem>\t%q1, %E0",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrbq_<supf><mode>"))
-  (set_attr "length" "4")])
-
-;;
-;; [vstrbq_scatter_offset_s vstrbq_scatter_offset_u]
-;;
-(define_expand "mve_vstrbq_scatter_offset_<supf><mode>"
-  [(match_operand:<MVE_B_ELEM> 0 "mve_scatter_memory")
-   (match_operand:MVE_2 1 "s_register_operand")
-   (match_operand:MVE_2 2 "s_register_operand")
-   (unspec:V4SI [(const_int 0)] VSTRBSOQ)]
-  "TARGET_HAVE_MVE"
-{
-  rtx ind = XEXP (operands[0], 0);
-  gcc_assert (REG_P (ind));
-  emit_insn (gen_mve_vstrbq_scatter_offset_<supf><mode>_insn (ind, operands[1],
-							      operands[2]));
-  DONE;
-})
-
-(define_insn "mve_vstrbq_scatter_offset_<supf><mode>_insn"
-  [(set (mem:BLK (scratch))
-	(unspec:BLK
-	  [(match_operand:SI 0 "register_operand" "r")
-	   (match_operand:MVE_2 1 "s_register_operand" "w")
-	   (match_operand:MVE_2 2 "s_register_operand" "w")]
-	  VSTRBSOQ))]
-  "TARGET_HAVE_MVE"
-  "vstrb.<V_sz_elem>\t%q2, [%0, %q1]"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrbq_scatter_offset_<supf><mode>_insn"))
-  (set_attr "length" "4")])
-
-;;
-;; [vstrwq_scatter_base_s vstrwq_scatter_base_u]
-;;
-(define_insn "mve_vstrwq_scatter_base_<supf>v4si"
-  [(set (mem:BLK (scratch))
-	(unspec:BLK
-		[(match_operand:V4SI 0 "s_register_operand" "w")
-		 (match_operand:SI 1 "immediate_operand" "i")
-		 (match_operand:V4SI 2 "s_register_operand" "w")]
-	 VSTRWSBQ))
-  ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[3];
-   ops[0] = operands[0];
-   ops[1] = operands[1];
-   ops[2] = operands[2];
-   output_asm_insn("vstrw.u32\t%q2, [%q0, %1]",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_base_<supf>v4si"))
-  (set_attr "length" "4")])
-
-;;
-;; [vldrbq_gather_offset_s vldrbq_gather_offset_u]
-;;
-(define_insn "mve_vldrbq_gather_offset_<supf><mode>"
-  [(set (match_operand:MVE_2 0 "s_register_operand" "=&w")
-	(unspec:MVE_2 [(match_operand:<MVE_B_ELEM> 1 "memory_operand" "Us")
-		       (match_operand:MVE_2 2 "s_register_operand" "w")]
-	 VLDRBGOQ))
-  ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[3];
-   ops[0] = operands[0];
-   ops[1] = operands[1];
-   ops[2] = operands[2];
-   if (!strcmp ("<supf>","s") && <V_sz_elem> == 8)
-     output_asm_insn ("vldrb.u8\t%q0, [%m1, %q2]",ops);
-   else
-     output_asm_insn ("vldrb.<supf><V_sz_elem>\t%q0, [%m1, %q2]",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrbq_gather_offset_<supf><mode>"))
-  (set_attr "length" "4")])
-
-;;
-;; [vldrbq_s vldrbq_u]
-;;
-(define_insn "mve_vldrbq_<supf><mode>"
-  [(set (match_operand:MVE_2 0 "s_register_operand" "=w")
-	(unspec:MVE_2 [(match_operand:<MVE_B_ELEM> 1 "mve_memory_operand" "Ux")]
-	 VLDRBQ))
-  ]
-  "TARGET_HAVE_MVE"
+  "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
+   || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))"
 {
-   rtx ops[2];
-   int regno = REGNO (operands[0]);
-   ops[0] = gen_rtx_REG (TImode, regno);
-   ops[1]  = operands[1];
-   if (<V_sz_elem> == 8)
-     output_asm_insn ("vldrb.<V_sz_elem>\t%q0, %E1",ops);
-   else
-     output_asm_insn ("vldrb.<supf><V_sz_elem>\t%q0, %E1",ops);
-   return "";
+  rtx ops[2];
+  int regno = REGNO (operands[1]);
+  ops[1] = gen_rtx_REG (TImode, regno);
+  ops[0]  = operands[0];
+  output_asm_insn ("vstr<MVE_elem_ch>.<V_sz_elem>\t%q1, %E0",ops);
+  return "";
 }
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrbq_<supf><mode>"))
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrq_<mode>"))
   (set_attr "length" "4")])
 
-;;
-;; [vldrwq_gather_base_s vldrwq_gather_base_u]
-;;
-(define_insn "mve_vldrwq_gather_base_<supf>v4si"
-  [(set (match_operand:V4SI 0 "s_register_operand" "=&w")
-	(unspec:V4SI [(match_operand:V4SI 1 "s_register_operand" "w")
-		      (match_operand:SI 2 "immediate_operand" "i")]
-	 VLDRWGBQ))
+;; Predicated vector stores
+;; [vstrbq_p_s8, vstrhq_p_s16, vstrwq_p_s32,
+;;  vstrbq_p_u8, vstrhq_p_u16, vstrwq_p_u32,
+;;  vst1q_p ]
+(define_insn "@mve_vstrq_p_<mode>"
+  [(set (match_operand:MVE_VLD_ST 0 "mve_memory_operand" "=Ux")
+	(unspec:MVE_VLD_ST [
+	   (match_operand:MVE_VLD_ST 1 "s_register_operand" "w")
+	   (match_operand:<MVE_VPRED> 2 "vpr_register_operand" "Up")
+	   (match_dup 0)
+	] VSTRQ_P))
   ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[3];
-   ops[0] = operands[0];
-   ops[1] = operands[1];
-   ops[2] = operands[2];
-   output_asm_insn ("vldrw.u32\t%q0, [%q1, %2]",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_base_<supf>v4si"))
-  (set_attr "length" "4")])
-
-;;
-;; [vstrbq_scatter_offset_p_s vstrbq_scatter_offset_p_u]
-;;
-(define_expand "mve_vstrbq_scatter_offset_p_<supf><mode>"
-  [(match_operand:<MVE_B_ELEM>  0 "mve_scatter_memory")
-   (match_operand:MVE_2 1 "s_register_operand")
-   (match_operand:MVE_2 2 "s_register_operand")
-   (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")
-   (unspec:V4SI [(const_int 0)] VSTRBSOQ)]
-  "TARGET_HAVE_MVE"
-{
-  rtx ind = XEXP (operands[0], 0);
-  gcc_assert (REG_P (ind));
-  emit_insn (
-    gen_mve_vstrbq_scatter_offset_p_<supf><mode>_insn (ind, operands[1],
-						       operands[2],
-						       operands[3]));
-  DONE;
-})
-
-(define_insn "mve_vstrbq_scatter_offset_p_<supf><mode>_insn"
-  [(set (mem:BLK (scratch))
-	(unspec:BLK
-	  [(match_operand:SI 0 "register_operand" "r")
-	   (match_operand:MVE_2 1 "s_register_operand" "w")
-	   (match_operand:MVE_2 2 "s_register_operand" "w")
-	   (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	  VSTRBSOQ))]
-  "TARGET_HAVE_MVE"
-  "vpst\;vstrbt.<V_sz_elem>\t%q2, [%0, %q1]"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrbq_scatter_offset_<supf><mode>_insn"))
-  (set_attr "length" "8")])
-
-;;
-;; [vstrwq_scatter_base_p_s vstrwq_scatter_base_p_u]
-;;
-(define_insn "mve_vstrwq_scatter_base_p_<supf>v4si"
-  [(set (mem:BLK (scratch))
-	(unspec:BLK
-		[(match_operand:V4SI 0 "s_register_operand" "w")
-		 (match_operand:SI 1 "immediate_operand" "i")
-		 (match_operand:V4SI 2 "s_register_operand" "w")
-		 (match_operand:V4BI 3 "vpr_register_operand" "Up")]
-	 VSTRWSBQ))
-  ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[3];
-   ops[0] = operands[0];
-   ops[1] = operands[1];
-   ops[2] = operands[2];
-   output_asm_insn ("vpst\n\tvstrwt.u32\t%q2, [%q0, %1]",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_base_<supf>v4si"))
-  (set_attr "length" "8")])
-
-(define_insn "mve_vstrbq_p_<supf><mode>"
-  [(set (match_operand:<MVE_B_ELEM> 0 "mve_memory_operand" "=Ux")
-	(unspec:<MVE_B_ELEM>
-	 [(match_operand:MVE_2 1 "s_register_operand" "w")
-	  (match_operand:<MVE_VPRED> 2 "vpr_register_operand" "Up")
-	  (match_dup 0)]
-	 VSTRBQ))]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[2];
-   int regno = REGNO (operands[1]);
-   ops[1] = gen_rtx_REG (TImode, regno);
-   ops[0]  = operands[0];
-   output_asm_insn ("vpst\;vstrbt.<V_sz_elem>\t%q1, %E0",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrbq_<supf><mode>"))
-  (set_attr "length" "8")])
-
-;;
-;; [vldrbq_gather_offset_z_s vldrbq_gather_offset_z_u]
-;;
-(define_insn "mve_vldrbq_gather_offset_z_<supf><mode>"
-  [(set (match_operand:MVE_2 0 "s_register_operand" "=&w")
-	(unspec:MVE_2 [(match_operand:<MVE_B_ELEM> 1 "memory_operand" "Us")
-		       (match_operand:MVE_2 2 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	 VLDRBGOQ))
-  ]
-  "TARGET_HAVE_MVE"
+  "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
+   || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))"
 {
-   rtx ops[4];
-   ops[0] = operands[0];
-   ops[1] = operands[1];
-   ops[2] = operands[2];
-   ops[3] = operands[3];
-   if (!strcmp ("<supf>","s") && <V_sz_elem> == 8)
-     output_asm_insn ("vpst\n\tvldrbt.u8\t%q0, [%m1, %q2]",ops);
-   else
-     output_asm_insn ("vpst\n\tvldrbt.<supf><V_sz_elem>\t%q0, [%m1, %q2]",ops);
-   return "";
+  rtx ops[2];
+  int regno = REGNO (operands[1]);
+  ops[1] = gen_rtx_REG (TImode, regno);
+  ops[0]  = operands[0];
+  output_asm_insn ("vpst\;vstr<MVE_elem_ch>t.<V_sz_elem>\t%q1, %E0",ops);
+  return "";
 }
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrbq_gather_offset_<supf><mode>"))
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrq_<mode>"))
+  (set_attr "type" "mve_move")
   (set_attr "length" "8")])
 
-;;
-;; [vldrbq_z_s vldrbq_z_u]
-;;
-(define_insn "mve_vldrbq_z_<supf><mode>"
-  [(set (match_operand:MVE_2 0 "s_register_operand" "=w")
-	(unspec:MVE_2 [(match_operand:<MVE_B_ELEM> 1 "mve_memory_operand" "Ux")
-		       (match_operand:<MVE_VPRED> 2 "vpr_register_operand" "Up")]
-	 VLDRBQ))
-  ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[2];
-   int regno = REGNO (operands[0]);
-   ops[0] = gen_rtx_REG (TImode, regno);
-   ops[1]  = operands[1];
-   if (<V_sz_elem> == 8)
-     output_asm_insn ("vpst\;vldrbt.<V_sz_elem>\t%q0, %E1",ops);
-   else
-     output_asm_insn ("vpst\;vldrbt.<supf><V_sz_elem>\t%q0, %E1",ops);
-   return "";
+;; Truncating vector stores
+;; [vstrbq_s16, vstrbq_s32, vstrhq_s32,
+;;  vstrbq_u16, vstrbq_u32, vstrhq_u32]
+(define_insn "@mve_vstrq_truncate_<mode>"
+  [(set (match_operand:MVE_w_narrow_TYPE 0 "mve_memory_operand" "=Ux")
+	(unspec:MVE_w_narrow_TYPE
+	  [(truncate:MVE_w_narrow_TYPE
+	    (match_operand:<MVE_wide_n_TYPE> 1 "s_register_operand" "w"))]
+	  VSTRQ_TRUNC
+	))]
+  "TARGET_HAVE_MVE"
+{
+  rtx ops[2];
+  int regno = REGNO (operands[1]);
+  ops[1] = gen_rtx_REG (TImode, regno);
+  ops[0]  = operands[0];
+  output_asm_insn ("vstr<MVE_elem_ch>.<MVE_wide_n_sz_elem>\t%q1, %E0",ops);
+  return "";
 }
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrbq_<supf><mode>"))
-  (set_attr "length" "8")])
+  [(set (attr "mve_unpredicated_insn")
+	(symbol_ref "CODE_FOR_mve_vstrq_truncate_<mode>"))
+   (set_attr "length" "4")])
 
-;;
-;; [vldrwq_gather_base_z_s vldrwq_gather_base_z_u]
-;;
-(define_insn "mve_vldrwq_gather_base_z_<supf>v4si"
-  [(set (match_operand:V4SI 0 "s_register_operand" "=&w")
-	(unspec:V4SI [(match_operand:V4SI 1 "s_register_operand" "w")
-		      (match_operand:SI 2 "immediate_operand" "i")
-		      (match_operand:V4BI 3 "vpr_register_operand" "Up")]
-	 VLDRWGBQ))
-  ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[3];
-   ops[0] = operands[0];
-   ops[1] = operands[1];
-   ops[2] = operands[2];
-   output_asm_insn ("vpst\n\tvldrwt.u32\t%q0, [%q1, %2]",ops);
-   return "";
+;; Predicated truncating vector stores
+;; [vstrbq_p_s16, vstrbq_p_s32, vstrhq_p_s32,
+;;  vstrbq_p_u16, vstrbq_p_u32, vstrhq_p_u32]
+(define_insn "@mve_vstrq_p_truncate_<mode>"
+  [(set (match_operand:MVE_w_narrow_TYPE 0 "mve_memory_operand" "=Ux")
+	(unspec:MVE_w_narrow_TYPE [
+	  (truncate:MVE_w_narrow_TYPE
+	    (match_operand:<MVE_wide_n_TYPE> 1 "s_register_operand" "w"))
+	  (match_operand:<MVE_wide_n_VPRED> 2 "vpr_register_operand" "Up")
+	  (match_dup 0)
+	] VSTRQ_TRUNC_P))]
+  "TARGET_HAVE_MVE"
+{
+  rtx ops[2];
+  int regno = REGNO (operands[1]);
+  ops[1] = gen_rtx_REG (TImode, regno);
+  ops[0]  = operands[0];
+  output_asm_insn (
+    "vpst\;vstr<MVE_elem_ch>t.<MVE_wide_n_sz_elem>\t%q1, %E0",
+    ops
+  );
+  return "";
 }
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_base_<supf>v4si"))
+ [(set (attr "mve_unpredicated_insn")
+       (symbol_ref "CODE_FOR_mve_vstrq_truncate_<mode>"))
+  (set_attr "type" "mve_move")
   (set_attr "length" "8")])
 
-;;
-;; [vldrhq_f]
-;;
-(define_insn "mve_vldrhq_fv8hf"
-  [(set (match_operand:V8HF 0 "s_register_operand" "=w")
-	(unspec:V8HF [(match_operand:V8HI 1 "mve_memory_operand" "Ux")]
-	 VLDRHQ_F))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-   rtx ops[2];
-   int regno = REGNO (operands[0]);
-   ops[0] = gen_rtx_REG (TImode, regno);
-   ops[1]  = operands[1];
-   output_asm_insn ("vldrh.16\t%q0, %E1",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrhq_fv8hf"))
-  (set_attr "length" "4")])
-
-;;
-;; [vldrhq_gather_offset_s vldrhq_gather_offset_u]
-;;
-(define_insn "mve_vldrhq_gather_offset_<supf><mode>"
-  [(set (match_operand:MVE_5 0 "s_register_operand" "=&w")
-	(unspec:MVE_5 [(match_operand:<MVE_H_ELEM> 1 "memory_operand" "Us")
-		       (match_operand:MVE_5 2 "s_register_operand" "w")]
-	VLDRHGOQ))
-  ]
-  "TARGET_HAVE_MVE"
+;; Vector Loads
+;; [vldrbq_s8, vldrhq_s16, vldrwq_s32,
+;;  vldrbq_u8, vldrhq_u16, vldrwq_u32,
+;;  vld1q ]
+(define_insn "@mve_vldrq_<mode>"
+  [(set (match_operand:MVE_VLD_ST 0 "s_register_operand" "=w")
+	(unspec:MVE_VLD_ST
+	  [(match_operand:MVE_VLD_ST 1 "mve_memory_operand" "Ux")]
+	  VLDRQ))]
+  "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
+   || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))"
 {
-   rtx ops[3];
-   ops[0] = operands[0];
-   ops[1] = operands[1];
-   ops[2] = operands[2];
-   if (!strcmp ("<supf>","s") && <V_sz_elem> == 16)
-     output_asm_insn ("vldrh.u16\t%q0, [%m1, %q2]",ops);
-   else
-     output_asm_insn ("vldrh.<supf><V_sz_elem>\t%q0, [%m1, %q2]",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrhq_gather_offset_<supf><mode>"))
+  rtx ops[2];
+  int regno = REGNO (operands[0]);
+  ops[0] = gen_rtx_REG (TImode, regno);
+  ops[1]  = operands[1];
+  output_asm_insn ("vldr<MVE_elem_ch>.<V_sz_elem>\t%q0, %E1",ops);
+  return "";
+ }
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrq_<mode>"))
   (set_attr "length" "4")])
 
-;;
-;; [vldrhq_gather_offset_z_s vldrhq_gather_offset_z_u]
-;;
-(define_insn "mve_vldrhq_gather_offset_z_<supf><mode>"
-  [(set (match_operand:MVE_5 0 "s_register_operand" "=&w")
-	(unspec:MVE_5 [(match_operand:<MVE_H_ELEM> 1 "memory_operand" "Us")
-		       (match_operand:MVE_5 2 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")
-	]VLDRHGOQ))
-  ]
-  "TARGET_HAVE_MVE"
+;; Predicated vector loads
+;; [vldrbq_z_s8, vldrhq_z_s16, vldrwq_z_s32,
+;;  vldrbq_z_u8, vldrhq_z_u16, vldrwq_z_u32,
+;;  vld1q_z ]
+(define_insn "@mve_vldrq_z_<mode>"
+  [(set (match_operand:MVE_VLD_ST 0 "s_register_operand" "=w")
+	(unspec:MVE_VLD_ST [
+	   (match_operand:MVE_VLD_ST 1 "mve_memory_operand" "Ux")
+	   (match_operand:<MVE_VPRED> 2 "vpr_register_operand" "Up")
+	] VLDRQ_Z))]
+  "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
+   || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))"
 {
-   rtx ops[4];
-   ops[0] = operands[0];
-   ops[1] = operands[1];
-   ops[2] = operands[2];
-   ops[3] = operands[3];
-   if (!strcmp ("<supf>","s") && <V_sz_elem> == 16)
-     output_asm_insn ("vpst\n\tvldrht.u16\t%q0, [%m1, %q2]",ops);
-   else
-     output_asm_insn ("vpst\n\tvldrht.<supf><V_sz_elem>\t%q0, [%m1, %q2]",ops);
-   return "";
+  rtx ops[2];
+  int regno = REGNO (operands[0]);
+  ops[0] = gen_rtx_REG (TImode, regno);
+  ops[1]  = operands[1];
+  output_asm_insn ("vpst\;vldr<MVE_elem_ch>t.<V_sz_elem>\t%q0, %E1",ops);
+  return "";
 }
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrhq_gather_offset_<supf><mode>"))
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrq_<mode>"))
+  (set_attr "type" "mve_move")
   (set_attr "length" "8")])
 
-;;
-;; [vldrhq_gather_shifted_offset_s vldrhq_gather_shifted_offset_u]
-;;
-(define_insn "mve_vldrhq_gather_shifted_offset_<supf><mode>"
-  [(set (match_operand:MVE_5 0 "s_register_operand" "=&w")
-	(unspec:MVE_5 [(match_operand:<MVE_H_ELEM> 1 "memory_operand" "Us")
-		       (match_operand:MVE_5 2 "s_register_operand" "w")]
-	VLDRHGSOQ))
-  ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[3];
-   ops[0] = operands[0];
-   ops[1] = operands[1];
-   ops[2] = operands[2];
-      if (!strcmp ("<supf>","s") && <V_sz_elem> == 16)
-     output_asm_insn ("vldrh.u16\t%q0, [%m1, %q2, uxtw #1]",ops);
-   else
-     output_asm_insn ("vldrh.<supf><V_sz_elem>\t%q0, [%m1, %q2, uxtw #1]",ops);
-   return "";
+;; Extending vector loads
+;; [vldrbq_s16, vldrbq_s32, vldrhq_s32,
+;;  vldrbq_u16, vldrbq_u32, vldrhq_u32]
+(define_insn "@mve_vldrq_extend_<mode><US>"
+  [(set (match_operand:<MVE_wide_n_TYPE> 0 "s_register_operand" "=w")
+	(unspec:<MVE_wide_n_TYPE>
+	  [(SE:<MVE_wide_n_TYPE>
+	    (match_operand:MVE_w_narrow_TYPE 1 "mve_memory_operand" "Ux"))]
+	  VLDRQ_EXT))]
+  "TARGET_HAVE_MVE"
+{
+  rtx ops[2];
+  int regno = REGNO (operands[0]);
+  ops[0] = gen_rtx_REG (TImode, regno);
+  ops[1]  = operands[1];
+  output_asm_insn ("vldr<MVE_elem_ch>.<US><MVE_wide_n_sz_elem>\t%q0, %E1",ops);
+  return "";
 }
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrhq_gather_shifted_offset_<supf><mode>"))
+ [(set (attr "mve_unpredicated_insn")
+       (symbol_ref "CODE_FOR_mve_vldrq_extend_<mode><US>"))
   (set_attr "length" "4")])
 
-;;
-;; [vldrhq_gather_shifted_offset_z_s vldrhq_gather_shited_offset_z_u]
-;;
-(define_insn "mve_vldrhq_gather_shifted_offset_z_<supf><mode>"
-  [(set (match_operand:MVE_5 0 "s_register_operand" "=&w")
-	(unspec:MVE_5 [(match_operand:<MVE_H_ELEM> 1 "memory_operand" "Us")
-		       (match_operand:MVE_5 2 "s_register_operand" "w")
-		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")
-	]VLDRHGSOQ))
-  ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[4];
-   ops[0] = operands[0];
-   ops[1] = operands[1];
-   ops[2] = operands[2];
-   ops[3] = operands[3];
-   if (!strcmp ("<supf>","s") && <V_sz_elem> == 16)
-     output_asm_insn ("vpst\n\tvldrht.u16\t%q0, [%m1, %q2, uxtw #1]",ops);
-   else
-     output_asm_insn ("vpst\n\tvldrht.<supf><V_sz_elem>\t%q0, [%m1, %q2, uxtw #1]",ops);
-   return "";
+;; Predicated extending vector loads
+;; [vldrbq_z_s16, vldrbq_z_s32, vldrhq_z_s32,
+;;  vldrbq_z_u16, vldrbq_z_u32, vldrhq_z_u32]
+(define_insn "@mve_vldrq_z_extend_<mode><US>"
+  [(set (match_operand:<MVE_wide_n_TYPE> 0 "s_register_operand" "=w")
+	  (unspec:<MVE_wide_n_TYPE> [
+	      (SE:<MVE_wide_n_TYPE>
+		(match_operand:MVE_w_narrow_TYPE 1 "mve_memory_operand" "Ux"))
+	      (match_operand:<MVE_wide_n_VPRED> 2 "vpr_register_operand" "Up")
+	  ] VLDRQ_EXT_Z))]
+  "TARGET_HAVE_MVE"
+{
+  rtx ops[2];
+  int regno = REGNO (operands[0]);
+  ops[0] = gen_rtx_REG (TImode, regno);
+  ops[1]  = operands[1];
+  output_asm_insn (
+    "vpst\;vldr<MVE_elem_ch>t.<US><MVE_wide_n_sz_elem>\t%q0, %E1",
+    ops
+  );
+  return "";
 }
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrhq_gather_shifted_offset_<supf><mode>"))
+ [(set (attr "mve_unpredicated_insn")
+       (symbol_ref "CODE_FOR_mve_vldrq_extend_<mode><US>"))
+  (set_attr "type" "mve_move")
   (set_attr "length" "8")])
 
+;; Vector scatter stores with offset
 ;;
-;; [vldrhq_s, vldrhq_u]
+;; [vstrbq_scatter_offset_s8,  vstrbq_scatter_offset_u8,
+;;  vstrhq_scatter_offset_s16, vstrhq_scatter_offset_u16,
+;;  vstrwq_scatter_offset_s32, vstrwq_scatter_offset_u32,
+;;  vstrdq_scatter_offset_s64, vstrdq_scatter_offset_u64,
+;;  vstrhq_scatter_offset_f16,
+;;  vstrwq_scatter_offset_f32]
 ;;
-(define_insn "mve_vldrhq_<supf><mode>"
-  [(set (match_operand:MVE_5 0 "s_register_operand" "=w")
-	(unspec:MVE_5 [(match_operand:<MVE_H_ELEM> 1 "mve_memory_operand" "Ux")]
-	 VLDRHQ))
-  ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[2];
-   int regno = REGNO (operands[0]);
-   ops[0] = gen_rtx_REG (TImode, regno);
-   ops[1]  = operands[1];
-   if (<V_sz_elem> == 16)
-     output_asm_insn ("vldrh.16\t%q0, %E1",ops);
-   else
-     output_asm_insn ("vldrh.<supf><V_sz_elem>\t%q0, %E1",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrhq_<supf><mode>"))
+(define_insn "@mve_vstrq_scatter_offset_<mode>"
+  [(set (mem:BLK (scratch))
+	(unspec:BLK
+	  [(match_operand:SI 0 "register_operand" "r")
+	   (match_operand:<MVE_scatter_offset> 1 "s_register_operand" "w")
+	   (match_operand:MVE_VLD_ST_scatter 2 "s_register_operand" "w")]
+	  VSTRQSO))]
+  "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
+   || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))"
+  "vstr<MVE_elem_ch>.<V_sz_elem>\t%q2, [%0, %q1]"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrq_scatter_offset_<mode>"))
   (set_attr "length" "4")])
 
+;; Predicated vector scatter stores with offset
 ;;
-;; [vldrhq_z_f]
+;; [vstrbq_scatter_offset_p_s8, vstrbq_scatter_offset_p_u8,
+;; [vstrhq_scatter_offset_p_s16, vstrhq_scatter_offset_p_u16,
+;; [vstrwq_scatter_offset_p_s32, vstrwq_scatter_offset_p_u32,
+;; [vstrdq_scatter_offset_p_s64, vstrdq_scatter_offset_p_u64,
+;; [vstrhq_scatter_offset_p_f16,
+;; [vstrwq_scatter_offset_p_f32]
 ;;
-(define_insn "mve_vldrhq_z_fv8hf"
-  [(set (match_operand:V8HF 0 "s_register_operand" "=w")
-	(unspec:V8HF [(match_operand:V8HI 1 "mve_memory_operand" "Ux")
-	(match_operand:<MVE_VPRED> 2 "vpr_register_operand" "Up")]
-	 VLDRHQ_F))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-   rtx ops[2];
-   int regno = REGNO (operands[0]);
-   ops[0] = gen_rtx_REG (TImode, regno);
-   ops[1]  = operands[1];
-   output_asm_insn ("vpst\;vldrht.16\t%q0, %E1",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrhq_fv8hf"))
-  (set_attr "length" "8")])
-
-;;
-;; [vldrhq_z_s vldrhq_z_u]
-;;
-(define_insn "mve_vldrhq_z_<supf><mode>"
-  [(set (match_operand:MVE_5 0 "s_register_operand" "=w")
-	(unspec:MVE_5 [(match_operand:<MVE_H_ELEM> 1 "mve_memory_operand" "Ux")
-	(match_operand:<MVE_VPRED> 2 "vpr_register_operand" "Up")]
-	 VLDRHQ))
-  ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[2];
-   int regno = REGNO (operands[0]);
-   ops[0] = gen_rtx_REG (TImode, regno);
-   ops[1]  = operands[1];
-   if (<V_sz_elem> == 16)
-     output_asm_insn ("vpst\;vldrht.16\t%q0, %E1",ops);
-   else
-     output_asm_insn ("vpst\;vldrht.<supf><V_sz_elem>\t%q0, %E1",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrhq_<supf><mode>"))
+(define_insn "@mve_vstrq_scatter_offset_p_<mode>"
+  [(set (mem:BLK (scratch))
+	(unspec:BLK
+	  [(match_operand:SI 0 "register_operand" "r")
+	   (match_operand:<MVE_scatter_offset> 1 "s_register_operand" "w")
+	   (match_operand:MVE_VLD_ST_scatter 2 "s_register_operand" "w")
+	   (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
+	  VSTRQSO_P))]
+  "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
+   || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))"
+  "vpst\;vstr<MVE_elem_ch>t.<V_sz_elem>\t%q2, [%0, %q1]"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrq_scatter_offset_<mode>"))
   (set_attr "length" "8")])
 
+;; Truncating vector scatter stores with offset
 ;;
-;; [vldrwq_f]
-;;
-(define_insn "mve_vldrwq_fv4sf"
-  [(set (match_operand:V4SF 0 "s_register_operand" "=w")
-	(unspec:V4SF [(match_operand:V4SI 1 "mve_memory_operand" "Ux")]
-	 VLDRWQ_F))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-   rtx ops[2];
-   int regno = REGNO (operands[0]);
-   ops[0] = gen_rtx_REG (TImode, regno);
-   ops[1]  = operands[1];
-   output_asm_insn ("vldrw.32\t%q0, %E1",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_fv4sf"))
-  (set_attr "length" "4")])
-
-;;
-;; [vldrwq_s vldrwq_u]
+;; [vstrbq_scatter_offset_s16, vstrbq_scatter_offset_u16,
+;; [vstrbq_scatter_offset_s32, vstrbq_scatter_offset_u32,
+;; [vstrhq_scatter_offset_s32, vstrhq_scatter_offset_u32]
 ;;
-(define_insn "mve_vldrwq_<supf>v4si"
-  [(set (match_operand:V4SI 0 "s_register_operand" "=w")
-	(unspec:V4SI [(match_operand:V4SI 1 "mve_memory_operand" "Ux")]
-	 VLDRWQ))
-  ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[2];
-   int regno = REGNO (operands[0]);
-   ops[0] = gen_rtx_REG (TImode, regno);
-   ops[1]  = operands[1];
-   output_asm_insn ("vldrw.32\t%q0, %E1",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_<supf>v4si"))
+(define_insn "@mve_vstrq_truncate_scatter_offset_<mode>"
+  [(set (mem:BLK (scratch))
+	(unspec:BLK
+	  [(match_operand:SI 0 "register_operand" "r")
+	   (match_operand:<MVE_wide_n_TYPE> 1 "s_register_operand" "w")
+	   (truncate:MVE_w_narrow_TYPE
+	     (match_operand:<MVE_wide_n_TYPE> 2 "s_register_operand" "w"))]
+	  VSTRQSO_TRUNC))]
+  "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MVE_wide_n_TYPE>mode))"
+  "vstr<MVE_elem_ch>.<MVE_wide_n_sz_elem>\t%q2, [%0, %q1]"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrq_truncate_scatter_offset_<mode>"))
   (set_attr "length" "4")])
 
-;;
-;; [vldrwq_z_f]
-;;
-(define_insn "mve_vldrwq_z_fv4sf"
-  [(set (match_operand:V4SF 0 "s_register_operand" "=w")
-	(unspec:V4SF [(match_operand:V4SI 1 "mve_memory_operand" "Ux")
-	(match_operand:V4BI 2 "vpr_register_operand" "Up")]
-	 VLDRWQ_F))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-   rtx ops[2];
-   int regno = REGNO (operands[0]);
-   ops[0] = gen_rtx_REG (TImode, regno);
-   ops[1]  = operands[1];
-   output_asm_insn ("vpst\;vldrwt.32\t%q0, %E1",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_fv4sf"))
-  (set_attr "length" "8")])
 
+;; Predicated truncating vector scatter stores with offset
 ;;
-;; [vldrwq_z_s vldrwq_z_u]
+;; [vstrbq_scatter_offset_p_s16, vstrbq_scatter_offset_p_u16,
+;; [vstrbq_scatter_offset_p_s32, vstrbq_scatter_offset_p_u32,
+;; [vstrhq_scatter_offset_p_s32, vstrhq_scatter_offset_p_u32]
 ;;
-(define_insn "mve_vldrwq_z_<supf>v4si"
-  [(set (match_operand:V4SI 0 "s_register_operand" "=w")
-	(unspec:V4SI [(match_operand:V4SI 1 "mve_memory_operand" "Ux")
-	(match_operand:V4BI 2 "vpr_register_operand" "Up")]
-	 VLDRWQ))
-  ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[2];
-   int regno = REGNO (operands[0]);
-   ops[0] = gen_rtx_REG (TImode, regno);
-   ops[1]  = operands[1];
-   output_asm_insn ("vpst\;vldrwt.32\t%q0, %E1",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_<supf>v4si"))
+(define_insn "@mve_vstrq_truncate_scatter_offset_p_<mode>"
+  [(set (mem:BLK (scratch))
+	(unspec:BLK
+	  [(match_operand:SI 0 "register_operand" "r")
+	   (match_operand:<MVE_wide_n_TYPE> 1 "s_register_operand" "w")
+	   (truncate:MVE_w_narrow_TYPE
+	     (match_operand:<MVE_wide_n_TYPE> 2 "s_register_operand" "w"))
+	   (match_operand:<MVE_wide_n_VPRED> 3 "vpr_register_operand" "Up")]
+	  VSTRQSO_TRUNC_P))]
+  "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MVE_wide_n_TYPE>mode))"
+  "vpst\;vstr<MVE_elem_ch>t.<MVE_wide_n_sz_elem>\t%q2, [%0, %q1]"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrq_truncate_scatter_offset_<mode>"))
   (set_attr "length" "8")])
 
-(define_expand "@mve_vld1q_f<mode>"
-  [(match_operand:MVE_0 0 "s_register_operand")
-   (unspec:MVE_0 [(match_operand:<MVE_CNVT> 1 "mve_memory_operand")] VLD1Q_F)
-  ]
-  "TARGET_HAVE_MVE || TARGET_HAVE_MVE_FLOAT"
-{
-  emit_insn (gen_mve_vldr<V_sz_elem1>q_f<mode>(operands[0],operands[1]));
-  DONE;
-})
-
-(define_expand "@mve_vld1q_<supf><mode>"
-  [(match_operand:MVE_2 0 "s_register_operand")
-   (unspec:MVE_2 [(match_operand:MVE_2 1 "mve_memory_operand")] VLD1Q)
-  ]
-  "TARGET_HAVE_MVE"
-{
-  emit_insn (gen_mve_vldr<V_sz_elem1>q_<supf><mode>(operands[0],operands[1]));
-  DONE;
-})
-
+;; Vector scatter stores with base
 ;;
-;; [vldrdq_gather_base_s vldrdq_gather_base_u]
+;; [vstrdq_scatter_base_s vstrdq_scatter_base_u]
+;; [vstrwq_scatter_base_s vstrwq_scatter_base_u]
+;; [vstrwq_scatter_base_f]
 ;;
-(define_insn "mve_vldrdq_gather_base_<supf>v2di"
-  [(set (match_operand:V2DI 0 "s_register_operand" "=&w")
-	(unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")
-		      (match_operand:SI 2 "immediate_operand" "i")]
-	 VLDRDGBQ))
+(define_insn "@mve_vstrq_scatter_base_<mode>"
+  [(set (mem:BLK (scratch))
+	(unspec:BLK
+		[(match_operand:<MVE_scatter_offset> 0 "s_register_operand" "w")
+		 (match_operand:SI 1 "immediate_operand" "i")
+		 (match_operand:MVE_4 2 "s_register_operand" "w")]
+	 VSTRSBQ))
   ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[3];
-   ops[0] = operands[0];
-   ops[1] = operands[1];
-   ops[2] = operands[2];
-   output_asm_insn ("vldrd.64\t%q0, [%q1, %2]",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrdq_gather_base_<supf>v2di"))
+  "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
+   || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))"
+  "vstr<MVE_elem_ch>.u<V_sz_elem>\t%q2, [%q0, %1]"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrq_scatter_base_<mode>"))
   (set_attr "length" "4")])
 
+;; Vector gather loads with offset
 ;;
-;; [vldrdq_gather_base_z_s vldrdq_gather_base_z_u]
-;;
-(define_insn "mve_vldrdq_gather_base_z_<supf>v2di"
-  [(set (match_operand:V2DI 0 "s_register_operand" "=&w")
-	(unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")
-		      (match_operand:SI 2 "immediate_operand" "i")
-		      (match_operand:V2QI 3 "vpr_register_operand" "Up")]
-	 VLDRDGBQ))
-  ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[3];
-   ops[0] = operands[0];
-   ops[1] = operands[1];
-   ops[2] = operands[2];
-   output_asm_insn ("vpst\n\tvldrdt.u64\t%q0, [%q1, %2]",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrdq_gather_base_<supf>v2di"))
-  (set_attr "length" "8")])
-
-;;
+;; [vldrbq_gather_offset_s vldrbq_gather_offset_u]
+;; [vldrhq_gather_offset_s vldrhq_gather_offset_u]
+;; [vldrhq_gather_offset_f]
+;; [vldrwq_gather_offset_s vldrwq_gather_offset_u]
+;; [vldrwq_gather_offset_f]
 ;; [vldrdq_gather_offset_s vldrdq_gather_offset_u]
 ;;
-(define_insn "mve_vldrdq_gather_offset_<supf>v2di"
- [(set (match_operand:V2DI 0 "s_register_operand" "=&w")
-       (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "Us")
-		     (match_operand:V2DI 2 "s_register_operand" "w")]
-	VLDRDGOQ))
- ]
- "TARGET_HAVE_MVE"
-{
-  rtx ops[3];
-  ops[0] = operands[0];
-  ops[1] = operands[1];
-  ops[2] = operands[2];
-  output_asm_insn ("vldrd.u64\t%q0, [%m1, %q2]",ops);
-  return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrdq_gather_offset_<supf>v2di"))
-  (set_attr "length" "4")])
-
-;;
-;; [vldrdq_gather_offset_z_s vldrdq_gather_offset_z_u]
-;;
-(define_insn "mve_vldrdq_gather_offset_z_<supf>v2di"
- [(set (match_operand:V2DI 0 "s_register_operand" "=&w")
-       (unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "Us")
-		     (match_operand:V2DI 2 "s_register_operand" "w")
-		     (match_operand:V2QI 3 "vpr_register_operand" "Up")]
-	VLDRDGOQ))
- ]
- "TARGET_HAVE_MVE"
-{
-  rtx ops[3];
-  ops[0] = operands[0];
-  ops[1] = operands[1];
-  ops[2] = operands[2];
-  output_asm_insn ("vpst\n\tvldrdt.u64\t%q0, [%m1, %q2]",ops);
-  return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrdq_gather_offset_<supf>v2di"))
-  (set_attr "length" "8")])
-
-;;
-;; [vldrdq_gather_shifted_offset_s vldrdq_gather_shifted_offset_u]
-;;
-(define_insn "mve_vldrdq_gather_shifted_offset_<supf>v2di"
-  [(set (match_operand:V2DI 0 "s_register_operand" "=&w")
-	(unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "Us")
-		      (match_operand:V2DI 2 "s_register_operand" "w")]
-	 VLDRDGSOQ))
+(define_insn "@mve_vldrq_gather_offset_<mode>"
+  [(set (match_operand:MVE_VLD_ST_scatter 0 "s_register_operand" "=&w")
+	(unspec:MVE_VLD_ST_scatter
+	    [(match_operand:SI 1 "register_operand" "r")
+	     (match_operand:<MVE_scatter_offset> 2 "s_register_operand" "w")
+	     (mem:BLK (scratch))]
+	 VLDRGOQ))
   ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[3];
-   ops[0] = operands[0];
-   ops[1] = operands[1];
-   ops[2] = operands[2];
-   output_asm_insn ("vldrd.u64\t%q0, [%m1, %q2, uxtw #3]",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrdq_gather_shifted_offset_<supf>v2di"))
+  "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
+   || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))"
+  "vldr<MVE_elem_ch>.<MVE_u_elem>\t%q0, [%1, %q2]"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrq_gather_offset_<mode>"))
   (set_attr "length" "4")])
 
+;; Extending vector gather loads with offset
 ;;
-;; [vldrdq_gather_shifted_offset_z_s vldrdq_gather_shifted_offset_z_u]
-;;
-(define_insn "mve_vldrdq_gather_shifted_offset_z_<supf>v2di"
-  [(set (match_operand:V2DI 0 "s_register_operand" "=&w")
-	(unspec:V2DI [(match_operand:V2DI 1 "memory_operand" "Us")
-		      (match_operand:V2DI 2 "s_register_operand" "w")
-		      (match_operand:V2QI 3 "vpr_register_operand" "Up")]
-	 VLDRDGSOQ))
-  ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[3];
-   ops[0] = operands[0];
-   ops[1] = operands[1];
-   ops[2] = operands[2];
-   output_asm_insn ("vpst\n\tvldrdt.u64\t%q0, [%m1, %q2, uxtw #3]",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrdq_gather_shifted_offset_<supf>v2di"))
-  (set_attr "length" "8")])
-
-;;
-;; [vldrhq_gather_offset_f]
+;; [vldrbq_gather_offset_s vldrbq_gather_offset_u]
+;; [vldrhq_gather_offset_s vldrhq_gather_offset_u]
 ;;
-(define_insn "mve_vldrhq_gather_offset_fv8hf"
-  [(set (match_operand:V8HF 0 "s_register_operand" "=&w")
-	(unspec:V8HF [(match_operand:V8HI 1 "memory_operand" "Us")
-		      (match_operand:V8HI 2 "s_register_operand" "w")]
-	 VLDRHQGO_F))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-   rtx ops[3];
-   ops[0] = operands[0];
-   ops[1] = operands[1];
-   ops[2] = operands[2];
-   output_asm_insn ("vldrh.f16\t%q0, [%m1, %q2]",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrhq_gather_offset_fv8hf"))
+(define_insn "@mve_vldrq_gather_offset_extend_<mode><US>"
+  [(set (match_operand:<MVE_wide_n_TYPE> 0 "s_register_operand" "=&w")
+	(SE:<MVE_wide_n_TYPE>
+	  (unspec:MVE_w_narrow_TYPE
+	    [(match_operand:SI 1 "register_operand" "r")
+	     (match_operand:<MVE_wide_n_TYPE> 2 "s_register_operand" "w")
+	     (mem:BLK (scratch))]
+	   VLDRGOQ_EXT)))
+  ]
+  "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MVE_wide_n_TYPE>mode))"
+  "vldr<MVE_elem_ch>.<US><MVE_wide_n_sz_elem>\t%q0, [%1, %q2]"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrq_gather_offset_extend_<mode><US>"))
   (set_attr "length" "4")])
 
+;; Predicated gather loads with offset
 ;;
+;; [vldrbq_gather_offset_z_s vldrbq_gather_offset_z_u]
+;; [vldrhq_gather_offset_z_s vldrhq_gather_offset_z_u]
 ;; [vldrhq_gather_offset_z_f]
+;; [vldrwq_gather_offset_z_s vldrwq_gather_offset_z_u]
+;; [vldrwq_gather_offset_z_f]
+;; [vldrdq_gather_offset_z_s vldrdq_gather_offset_z_u]
 ;;
-(define_insn "mve_vldrhq_gather_offset_z_fv8hf"
-  [(set (match_operand:V8HF 0 "s_register_operand" "=&w")
-	(unspec:V8HF [(match_operand:V8HI 1 "memory_operand" "Us")
-		      (match_operand:V8HI 2 "s_register_operand" "w")
-		      (match_operand:V8BI 3 "vpr_register_operand" "Up")]
-	 VLDRHQGO_F))
+(define_insn "@mve_vldrq_gather_offset_z_<mode>"
+  [(set (match_operand:MVE_VLD_ST_scatter 0 "s_register_operand" "=&w")
+	(unspec:MVE_VLD_ST_scatter
+	    [(match_operand:SI 1 "register_operand" "r")
+	     (match_operand:<MVE_scatter_offset> 2 "s_register_operand" "w")
+	     (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")
+	     (mem:BLK (scratch))]
+	 VLDRGOQ_Z))
   ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-   rtx ops[4];
-   ops[0] = operands[0];
-   ops[1] = operands[1];
-   ops[2] = operands[2];
-   ops[3] = operands[3];
-   output_asm_insn ("vpst\n\tvldrht.f16\t%q0, [%m1, %q2]",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrhq_gather_offset_fv8hf"))
+  "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
+   || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))"
+  "vpst\n\tvldr<MVE_elem_ch>t.<MVE_u_elem>\t%q0, [%1, %q2]"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrq_gather_offset_<mode>"))
   (set_attr "length" "8")])
 
+;; Predicated extending gather loads with offset
 ;;
-;; [vldrhq_gather_shifted_offset_f]
+;; [vldrbq_gather_offset_z_s vldrbq_gather_offset_z_u]
+;; [vldrhq_gather_offset_z_s vldrhq_gather_offset_z_u]
 ;;
-(define_insn "mve_vldrhq_gather_shifted_offset_fv8hf"
-  [(set (match_operand:V8HF 0 "s_register_operand" "=&w")
-	(unspec:V8HF [(match_operand:V8HI 1 "memory_operand" "Us")
-		      (match_operand:V8HI 2 "s_register_operand" "w")]
-	 VLDRHQGSO_F))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-   rtx ops[3];
-   ops[0] = operands[0];
-   ops[1] = operands[1];
-   ops[2] = operands[2];
-   output_asm_insn ("vldrh.f16\t%q0, [%m1, %q2, uxtw #1]",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrhq_gather_shifted_offset_fv8hf"))
-  (set_attr "length" "4")])
+(define_insn "@mve_vldrq_gather_offset_z_extend_<mode><US>"
+  [(set (match_operand:<MVE_wide_n_TYPE> 0 "s_register_operand" "=&w")
+	(SE:<MVE_wide_n_TYPE>
+	   (unspec:MVE_w_narrow_TYPE
+	     [(match_operand:SI 1 "register_operand" "r")
+	      (match_operand:<MVE_wide_n_TYPE> 2 "s_register_operand" "w")
+	      (match_operand:<MVE_wide_n_VPRED> 3 "vpr_register_operand" "Up")
+	      (mem:BLK (scratch))]
+	VLDRGOQ_EXT_Z)))
+  ]
+  "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MVE_wide_n_TYPE>mode))"
+  "vpst\n\tvldr<MVE_elem_ch>t.<US><MVE_wide_n_sz_elem>\t%q0, [%1, %q2]"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrq_gather_offset_extend_<mode><US>"))
+  (set_attr "length" "8")])
 
+;; Predicated vector scatter stores with base
 ;;
-;; [vldrhq_gather_shifted_offset_z_f]
+;; [vstrdq_scatter_base_p_s vstrdq_scatter_base_p_u]
+;; [vstrwq_scatter_base_p_s vstrwq_scatter_base_p_u]
+;; [vstrwq_scatter_base_p_f]
 ;;
-(define_insn "mve_vldrhq_gather_shifted_offset_z_fv8hf"
-  [(set (match_operand:V8HF 0 "s_register_operand" "=&w")
-	(unspec:V8HF [(match_operand:V8HI 1 "memory_operand" "Us")
-		      (match_operand:V8HI 2 "s_register_operand" "w")
-		      (match_operand:V8BI 3 "vpr_register_operand" "Up")]
-	 VLDRHQGSO_F))
+(define_insn "@mve_vstrq_scatter_base_p_<mode>"
+  [(set (mem:BLK (scratch))
+	(unspec:BLK
+		[(match_operand:<MVE_scatter_offset> 0 "s_register_operand" "w")
+		 (match_operand:SI 1 "immediate_operand" "i")
+		 (match_operand:MVE_4 2 "s_register_operand" "w")
+		 (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
+	 VSTRSBQ_P))
   ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-   rtx ops[4];
-   ops[0] = operands[0];
-   ops[1] = operands[1];
-   ops[2] = operands[2];
-   ops[3] = operands[3];
-   output_asm_insn ("vpst\n\tvldrht.f16\t%q0, [%m1, %q2, uxtw #1]",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrhq_gather_shifted_offset_fv8hf"))
+  "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
+   || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))"
+  "vpst\n\tvstr<MVE_elem_ch>t.u<V_sz_elem>\t%q2, [%q0, %1]"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrq_scatter_base_<mode>"))
   (set_attr "length" "8")])
 
+;; Vector gather loads with base
 ;;
+;; [vldrwq_gather_base_s vldrwq_gather_base_u]
 ;; [vldrwq_gather_base_f]
+;; [vldrdq_gather_base_s vldrdq_gather_base_u]
 ;;
-(define_insn "mve_vldrwq_gather_base_fv4sf"
-  [(set (match_operand:V4SF 0 "s_register_operand" "=&w")
-	(unspec:V4SF [(match_operand:V4SI 1 "s_register_operand" "w")
-		      (match_operand:SI 2 "immediate_operand" "i")]
-	 VLDRWQGB_F))
+(define_insn "@mve_vldrq_gather_base_<mode>"
+  [(set (match_operand:MVE_4 0 "s_register_operand" "=&w")
+	(unspec:MVE_4 [(match_operand:<MVE_scatter_offset> 1 "s_register_operand" "w")
+		       (match_operand:SI 2 "immediate_operand" "i")]
+	 VLDRGBQ))
   ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-   rtx ops[3];
-   ops[0] = operands[0];
-   ops[1] = operands[1];
-   ops[2] = operands[2];
-   output_asm_insn ("vldrw.u32\t%q0, [%q1, %2]",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_base_fv4sf"))
+  "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
+   || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))"
+  "vldr<MVE_elem_ch>.u<V_sz_elem>\t%q0, [%q1, %2]"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrq_gather_base_<mode>"))
   (set_attr "length" "4")])
 
+;; Predicated vector gather loads with base
 ;;
+;; [vldrwq_gather_base_z_s vldrwq_gather_base_z_u]
 ;; [vldrwq_gather_base_z_f]
+;; [vldrdq_gather_base_z_s vldrdq_gather_base_z_u]
 ;;
-(define_insn "mve_vldrwq_gather_base_z_fv4sf"
-  [(set (match_operand:V4SF 0 "s_register_operand" "=&w")
-	(unspec:V4SF [(match_operand:V4SI 1 "s_register_operand" "w")
-		      (match_operand:SI 2 "immediate_operand" "i")
-		      (match_operand:V4BI 3 "vpr_register_operand" "Up")]
-	 VLDRWQGB_F))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-   rtx ops[3];
-   ops[0] = operands[0];
-   ops[1] = operands[1];
-   ops[2] = operands[2];
-   output_asm_insn ("vpst\n\tvldrwt.u32\t%q0, [%q1, %2]",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_base_fv4sf"))
-  (set_attr "length" "8")])
-
-;;
-;; [vldrwq_gather_offset_f]
-;;
-(define_insn "mve_vldrwq_gather_offset_fv4sf"
-  [(set (match_operand:V4SF 0 "s_register_operand" "=&w")
-	(unspec:V4SF [(match_operand:V4SI 1 "memory_operand" "Us")
-		       (match_operand:V4SI 2 "s_register_operand" "w")]
-	 VLDRWQGO_F))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-   rtx ops[3];
-   ops[0] = operands[0];
-   ops[1] = operands[1];
-   ops[2] = operands[2];
-   output_asm_insn ("vldrw.u32\t%q0, [%m1, %q2]",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_offset_fv4sf"))
-  (set_attr "length" "4")])
-
-;;
-;; [vldrwq_gather_offset_s vldrwq_gather_offset_u]
-;;
-(define_insn "mve_vldrwq_gather_offset_<supf>v4si"
-  [(set (match_operand:V4SI 0 "s_register_operand" "=&w")
-	(unspec:V4SI [(match_operand:V4SI 1 "memory_operand" "Us")
-		       (match_operand:V4SI 2 "s_register_operand" "w")]
-	 VLDRWGOQ))
-  ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[3];
-   ops[0] = operands[0];
-   ops[1] = operands[1];
-   ops[2] = operands[2];
-   output_asm_insn ("vldrw.u32\t%q0, [%m1, %q2]",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_offset_<supf>v4si"))
-  (set_attr "length" "4")])
-
-;;
-;; [vldrwq_gather_offset_z_f]
-;;
-(define_insn "mve_vldrwq_gather_offset_z_fv4sf"
-  [(set (match_operand:V4SF 0 "s_register_operand" "=&w")
-	(unspec:V4SF [(match_operand:V4SI 1 "memory_operand" "Us")
-		      (match_operand:V4SI 2 "s_register_operand" "w")
-		      (match_operand:V4BI 3 "vpr_register_operand" "Up")]
-	 VLDRWQGO_F))
+(define_insn "@mve_vldrq_gather_base_z_<mode>"
+  [(set (match_operand:MVE_4 0 "s_register_operand" "=&w")
+	(unspec:MVE_4 [(match_operand:<MVE_scatter_offset> 1 "s_register_operand" "w")
+		       (match_operand:SI 2 "immediate_operand" "i")
+		       (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
+	 VLDRGBQ_Z))
   ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-   rtx ops[4];
-   ops[0] = operands[0];
-   ops[1] = operands[1];
-   ops[2] = operands[2];
-   ops[3] = operands[3];
-   output_asm_insn ("vpst\n\tvldrwt.u32\t%q0, [%m1, %q2]",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_offset_fv4sf"))
+  "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
+   || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))"
+  "vpst\n\tvldr<MVE_elem_ch>t.u<V_sz_elem>\t%q0, [%q1, %2]"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrq_gather_base_<mode>"))
   (set_attr "length" "8")])
 
-;;
-;; [vldrwq_gather_offset_z_s vldrwq_gather_offset_z_u]
-;;
-(define_insn "mve_vldrwq_gather_offset_z_<supf>v4si"
-  [(set (match_operand:V4SI 0 "s_register_operand" "=&w")
-	(unspec:V4SI [(match_operand:V4SI 1 "memory_operand" "Us")
-		      (match_operand:V4SI 2 "s_register_operand" "w")
-		      (match_operand:V4BI 3 "vpr_register_operand" "Up")]
-	 VLDRWGOQ))
-  ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[4];
-   ops[0] = operands[0];
-   ops[1] = operands[1];
-   ops[2] = operands[2];
-   ops[3] = operands[3];
-   output_asm_insn ("vpst\n\tvldrwt.u32\t%q0, [%m1, %q2]",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_offset_<supf>v4si"))
-  (set_attr "length" "8")])
 
+;; Gather loads with shifted offset
 ;;
+;; [vldrhq_gather_shifted_offset_s vldrhq_gather_shifted_offset_u]
+;; [vldrhq_gather_shifted_offset_f]
+;; [vldrwq_gather_shifted_offset_s vldrwq_gather_shifted_offset_u]
 ;; [vldrwq_gather_shifted_offset_f]
+;; [vldrdq_gather_shifted_offset_s vldrdq_gather_shifted_offset_u]
 ;;
-(define_insn "mve_vldrwq_gather_shifted_offset_fv4sf"
-  [(set (match_operand:V4SF 0 "s_register_operand" "=&w")
-	(unspec:V4SF [(match_operand:V4SI 1 "memory_operand" "Us")
-		      (match_operand:V4SI 2 "s_register_operand" "w")]
-	 VLDRWQGSO_F))
+(define_insn "@mve_vldrq_gather_shifted_offset_<mode>"
+  [(set (match_operand:MVE_VLD_ST_scatter_shifted 0 "s_register_operand" "=&w")
+	(unspec:MVE_VLD_ST_scatter_shifted
+		[(match_operand:SI 1 "register_operand" "r")
+		 (match_operand:<MVE_scatter_offset> 2 "s_register_operand" "w")
+		 (mem:BLK (scratch))]
+	VLDRGSOQ))
   ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-   rtx ops[3];
-   ops[0] = operands[0];
-   ops[1] = operands[1];
-   ops[2] = operands[2];
-   output_asm_insn ("vldrw.u32\t%q0, [%m1, %q2, uxtw #2]",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_shifted_offset_fv4sf"))
+  "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
+   || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))"
+  "vldr<MVE_elem_ch>.<MVE_u_elem>\t%q0, [%1, %q2, uxtw #<MVE_scatter_shift>]"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrq_gather_shifted_offset_<mode>"))
   (set_attr "length" "4")])
 
+;; Extending gather loads with shifted offset
 ;;
-;; [vldrwq_gather_shifted_offset_s vldrwq_gather_shifted_offset_u]
+;; [vldrhq_gather_shifted_offset_s vldrhq_gather_shifted_offset_u]
 ;;
-(define_insn "mve_vldrwq_gather_shifted_offset_<supf>v4si"
+(define_insn "@mve_vldrq_gather_shifted_offset_extend_v4si<US>"
   [(set (match_operand:V4SI 0 "s_register_operand" "=&w")
-	(unspec:V4SI [(match_operand:V4SI 1 "memory_operand" "Us")
-		      (match_operand:V4SI 2 "s_register_operand" "w")]
-	 VLDRWGSOQ))
+	(SE:V4SI
+	  (unspec:V4HI
+	    [(match_operand:SI 1 "register_operand" "r")
+	     (match_operand:V4SI 2 "s_register_operand" "w")
+	     (mem:BLK (scratch))]
+	   VLDRGSOQ_EXT)))
   ]
   "TARGET_HAVE_MVE"
-{
-   rtx ops[3];
-   ops[0] = operands[0];
-   ops[1] = operands[1];
-   ops[2] = operands[2];
-   output_asm_insn ("vldrw.u32\t%q0, [%m1, %q2, uxtw #2]",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_shifted_offset_<supf>v4si"))
+  "vldrh.<US>32\t%q0, [%1, %q2, uxtw #1]"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrq_gather_shifted_offset_extend_v4si<US>"))
   (set_attr "length" "4")])
 
+;; Predicated gather loads with shifted offset
 ;;
+;; [vldrhq_gather_shifted_offset_z_s vldrhq_gather_shited_offset_z_u]
+;; [vldrhq_gather_shifted_offset_z_f]
+;; [vldrwq_gather_shifted_offset_z_s vldrwq_gather_shifted_offset_z_u]
 ;; [vldrwq_gather_shifted_offset_z_f]
+;; [vldrdq_gather_shifted_offset_z_s vldrdq_gather_shifted_offset_z_u]
 ;;
-(define_insn "mve_vldrwq_gather_shifted_offset_z_fv4sf"
-  [(set (match_operand:V4SF 0 "s_register_operand" "=&w")
-	(unspec:V4SF [(match_operand:V4SI 1 "memory_operand" "Us")
-		      (match_operand:V4SI 2 "s_register_operand" "w")
-		      (match_operand:V4BI 3 "vpr_register_operand" "Up")]
-	 VLDRWQGSO_F))
+(define_insn "@mve_vldrq_gather_shifted_offset_z_<mode>"
+  [(set (match_operand:MVE_VLD_ST_scatter_shifted 0 "s_register_operand" "=&w")
+	(unspec:MVE_VLD_ST_scatter_shifted
+		[(match_operand:SI 1 "register_operand" "r")
+		 (match_operand:<MVE_scatter_offset> 2 "s_register_operand" "w")
+		 (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")
+		 (mem:BLK (scratch))]
+	VLDRGSOQ_Z))
   ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-   rtx ops[4];
-   ops[0] = operands[0];
-   ops[1] = operands[1];
-   ops[2] = operands[2];
-   ops[3] = operands[3];
-   output_asm_insn ("vpst\n\tvldrwt.u32\t%q0, [%m1, %q2, uxtw #2]",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_shifted_offset_fv4sf"))
+  "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
+   || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))"
+  "vpst\n\tvldr<MVE_elem_ch>t.<MVE_u_elem>\t%q0, [%1, %q2, uxtw #<MVE_scatter_shift>]"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrq_gather_shifted_offset_<mode>"))
   (set_attr "length" "8")])
 
+;; Predicated extending gather loads with shifted offset
 ;;
-;; [vldrwq_gather_shifted_offset_z_s vldrwq_gather_shifted_offset_z_u]
+;; [vldrhq_gather_shifted_offset_z_s vldrhq_gather_shifted_offset_z_u]
 ;;
-(define_insn "mve_vldrwq_gather_shifted_offset_z_<supf>v4si"
+(define_insn "@mve_vldrq_gather_shifted_offset_z_extend_v4si<US>"
   [(set (match_operand:V4SI 0 "s_register_operand" "=&w")
-	(unspec:V4SI [(match_operand:V4SI 1 "memory_operand" "Us")
-		      (match_operand:V4SI 2 "s_register_operand" "w")
-		      (match_operand:V4BI 3 "vpr_register_operand" "Up")]
-	 VLDRWGSOQ))
+	(SE:V4SI
+	  (unspec:V4HI
+	    [(match_operand:SI 1 "register_operand" "r")
+	     (match_operand:V4SI 2 "s_register_operand" "w")
+	     (match_operand:V4BI 3 "vpr_register_operand" "Up")
+	     (mem:BLK (scratch))]
+	   VLDRGSOQ_EXT_Z)))
   ]
   "TARGET_HAVE_MVE"
-{
-   rtx ops[4];
-   ops[0] = operands[0];
-   ops[1] = operands[1];
-   ops[2] = operands[2];
-   ops[3] = operands[3];
-   output_asm_insn ("vpst\n\tvldrwt.u32\t%q0, [%m1, %q2, uxtw #2]",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_shifted_offset_<supf>v4si"))
-  (set_attr "length" "8")])
-
-;;
-;; [vstrhq_f]
-;;
-(define_insn "mve_vstrhq_fv8hf"
-  [(set (match_operand:V8HI 0 "mve_memory_operand" "=Ux")
-	(unspec:V8HI [(match_operand:V8HF 1 "s_register_operand" "w")]
-	 VSTRHQ_F))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-   rtx ops[2];
-   int regno = REGNO (operands[1]);
-   ops[1] = gen_rtx_REG (TImode, regno);
-   ops[0]  = operands[0];
-   output_asm_insn ("vstrh.16\t%q1, %E0",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrhq_fv8hf"))
+  "vpst\n\tvldrht.<US>32\t%q0, [%1, %q2, uxtw #1]"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrq_gather_shifted_offset_extend_v4si<US>"))
   (set_attr "length" "4")])
 
-;;
-;; [vstrhq_p_f]
-;;
-(define_insn "mve_vstrhq_p_fv8hf"
-  [(set (match_operand:V8HI 0 "mve_memory_operand" "=Ux")
-	(unspec:V8HI
-	 [(match_operand:V8HF 1 "s_register_operand" "w")
-	  (match_operand:V8BI 2 "vpr_register_operand" "Up")
-	  (match_dup 0)]
-	 VSTRHQ_F))]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-   rtx ops[2];
-   int regno = REGNO (operands[1]);
-   ops[1] = gen_rtx_REG (TImode, regno);
-   ops[0]  = operands[0];
-   output_asm_insn ("vpst\;vstrht.16\t%q1, %E0",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrhq_fv8hf"))
-  (set_attr "length" "8")])
-
-;;
-;; [vstrhq_p_s vstrhq_p_u]
-;;
-(define_insn "mve_vstrhq_p_<supf><mode>"
-  [(set (match_operand:<MVE_H_ELEM> 0 "mve_memory_operand" "=Ux")
-	(unspec:<MVE_H_ELEM>
-	 [(match_operand:MVE_5 1 "s_register_operand" "w")
-	  (match_operand:<MVE_VPRED> 2 "vpr_register_operand" "Up")
-	  (match_dup 0)]
-	 VSTRHQ))
-  ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[2];
-   int regno = REGNO (operands[1]);
-   ops[1] = gen_rtx_REG (TImode, regno);
-   ops[0]  = operands[0];
-   output_asm_insn ("vpst\;vstrht.<V_sz_elem>\t%q1, %E0",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrhq_<supf><mode>"))
-  (set_attr "length" "8")])
-
-;;
-;; [vstrhq_scatter_offset_p_s vstrhq_scatter_offset_p_u]
-;;
-(define_expand "mve_vstrhq_scatter_offset_p_<supf><mode>"
-  [(match_operand:<MVE_H_ELEM> 0 "mve_scatter_memory")
-   (match_operand:MVE_5 1 "s_register_operand")
-   (match_operand:MVE_5 2 "s_register_operand")
-   (match_operand:<MVE_VPRED> 3 "vpr_register_operand")
-   (unspec:V4SI [(const_int 0)] VSTRHSOQ)]
-  "TARGET_HAVE_MVE"
-{
-  rtx ind = XEXP (operands[0], 0);
-  gcc_assert (REG_P (ind));
-  emit_insn (
-    gen_mve_vstrhq_scatter_offset_p_<supf><mode>_insn (ind, operands[1],
-						       operands[2],
-						       operands[3]));
-  DONE;
-})
-
-(define_insn "mve_vstrhq_scatter_offset_p_<supf><mode>_insn"
-  [(set (mem:BLK (scratch))
-	(unspec:BLK
-	  [(match_operand:SI 0 "register_operand" "r")
-	   (match_operand:MVE_5 1 "s_register_operand" "w")
-	   (match_operand:MVE_5 2 "s_register_operand" "w")
-	   (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	  VSTRHSOQ))]
-  "TARGET_HAVE_MVE"
-  "vpst\;vstrht.<V_sz_elem>\t%q2, [%0, %q1]"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrhq_scatter_offset_<supf><mode>_insn"))
-  (set_attr "length" "8")])
-
-;;
-;; [vstrhq_scatter_offset_s vstrhq_scatter_offset_u]
-;;
-(define_expand "mve_vstrhq_scatter_offset_<supf><mode>"
-  [(match_operand:<MVE_H_ELEM> 0 "mve_scatter_memory")
-   (match_operand:MVE_5 1 "s_register_operand")
-   (match_operand:MVE_5 2 "s_register_operand")
-   (unspec:V4SI [(const_int 0)] VSTRHSOQ)]
-  "TARGET_HAVE_MVE"
-{
-  rtx ind = XEXP (operands[0], 0);
-  gcc_assert (REG_P (ind));
-  emit_insn (gen_mve_vstrhq_scatter_offset_<supf><mode>_insn (ind, operands[1],
-							      operands[2]));
-  DONE;
-})
-
-(define_insn "mve_vstrhq_scatter_offset_<supf><mode>_insn"
-  [(set (mem:BLK (scratch))
-	(unspec:BLK
-	  [(match_operand:SI 0 "register_operand" "r")
-	   (match_operand:MVE_5 1 "s_register_operand" "w")
-	   (match_operand:MVE_5 2 "s_register_operand" "w")]
-	  VSTRHSOQ))]
-  "TARGET_HAVE_MVE"
-  "vstrh.<V_sz_elem>\t%q2, [%0, %q1]"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrhq_scatter_offset_<supf><mode>_insn"))
-  (set_attr "length" "4")])
-
-;;
-;; [vstrhq_scatter_shifted_offset_p_s vstrhq_scatter_shifted_offset_p_u]
-;;
-(define_expand "mve_vstrhq_scatter_shifted_offset_p_<supf><mode>"
-  [(match_operand:<MVE_H_ELEM> 0 "mve_scatter_memory")
-   (match_operand:MVE_5 1 "s_register_operand")
-   (match_operand:MVE_5 2 "s_register_operand")
-   (match_operand:<MVE_VPRED> 3 "vpr_register_operand")
-   (unspec:V4SI [(const_int 0)] VSTRHSSOQ)]
-  "TARGET_HAVE_MVE"
-{
-  rtx ind = XEXP (operands[0], 0);
-  gcc_assert (REG_P (ind));
-  emit_insn (
-    gen_mve_vstrhq_scatter_shifted_offset_p_<supf><mode>_insn (ind, operands[1],
-							       operands[2],
-							       operands[3]));
-  DONE;
-})
-
-(define_insn "mve_vstrhq_scatter_shifted_offset_p_<supf><mode>_insn"
-  [(set (mem:BLK (scratch))
-	(unspec:BLK
-	  [(match_operand:SI 0 "register_operand" "r")
-	   (match_operand:MVE_5 1 "s_register_operand" "w")
-	   (match_operand:MVE_5 2 "s_register_operand" "w")
-	   (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
-	  VSTRHSSOQ))]
-  "TARGET_HAVE_MVE"
-  "vpst\;vstrht.<V_sz_elem>\t%q2, [%0, %q1, uxtw #1]"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrhq_scatter_shifted_offset_<supf><mode>_insn"))
-  (set_attr "length" "8")])
-
+;; Vector scatter stores with shifted offset
 ;;
 ;; [vstrhq_scatter_shifted_offset_s vstrhq_scatter_shifted_offset_u]
-;;
-(define_expand "mve_vstrhq_scatter_shifted_offset_<supf><mode>"
-  [(match_operand:<MVE_H_ELEM> 0 "mve_scatter_memory")
-   (match_operand:MVE_5 1 "s_register_operand")
-   (match_operand:MVE_5 2 "s_register_operand")
-   (unspec:V4SI [(const_int 0)] VSTRHSSOQ)]
-  "TARGET_HAVE_MVE"
-{
-  rtx ind = XEXP (operands[0], 0);
-  gcc_assert (REG_P (ind));
-  emit_insn (
-    gen_mve_vstrhq_scatter_shifted_offset_<supf><mode>_insn (ind, operands[1],
-							     operands[2]));
-  DONE;
-})
-
-(define_insn "mve_vstrhq_scatter_shifted_offset_<supf><mode>_insn"
-  [(set (mem:BLK (scratch))
-	(unspec:BLK
-	  [(match_operand:SI 0 "register_operand" "r")
-	   (match_operand:MVE_5 1 "s_register_operand" "w")
-	   (match_operand:MVE_5 2 "s_register_operand" "w")]
-	  VSTRHSSOQ))]
-  "TARGET_HAVE_MVE"
-  "vstrh.<V_sz_elem>\t%q2, [%0, %q1, uxtw #1]"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrhq_scatter_shifted_offset_<supf><mode>_insn"))
-  (set_attr "length" "4")])
-
-;;
-;; [vstrhq_s, vstrhq_u]
-;;
-(define_insn "mve_vstrhq_<supf><mode>"
-  [(set (match_operand:<MVE_H_ELEM> 0 "mve_memory_operand" "=Ux")
-	(unspec:<MVE_H_ELEM> [(match_operand:MVE_5 1 "s_register_operand" "w")]
-	 VSTRHQ))
-  ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[2];
-   int regno = REGNO (operands[1]);
-   ops[1] = gen_rtx_REG (TImode, regno);
-   ops[0]  = operands[0];
-   output_asm_insn ("vstrh.<V_sz_elem>\t%q1, %E0",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrhq_<supf><mode>"))
-  (set_attr "length" "4")])
-
-;;
-;; [vstrwq_f]
-;;
-(define_insn "mve_vstrwq_fv4sf"
-  [(set (match_operand:V4SI 0 "mve_memory_operand" "=Ux")
-	(unspec:V4SI [(match_operand:V4SF 1 "s_register_operand" "w")]
-	 VSTRWQ_F))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-   rtx ops[2];
-   int regno = REGNO (operands[1]);
-   ops[1] = gen_rtx_REG (TImode, regno);
-   ops[0]  = operands[0];
-   output_asm_insn ("vstrw.32\t%q1, %E0",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_fv4sf"))
-  (set_attr "length" "4")])
-
-;;
-;; [vstrwq_p_f]
-;;
-(define_insn "mve_vstrwq_p_fv4sf"
-  [(set (match_operand:V4SI 0 "mve_memory_operand" "=Ux")
-	(unspec:V4SI
-	 [(match_operand:V4SF 1 "s_register_operand" "w")
-	  (match_operand:V4BI 2 "vpr_register_operand" "Up")
-	  (match_dup 0)]
-	 VSTRWQ_F))]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-   rtx ops[2];
-   int regno = REGNO (operands[1]);
-   ops[1] = gen_rtx_REG (TImode, regno);
-   ops[0]  = operands[0];
-   output_asm_insn ("vpst\;vstrwt.32\t%q1, %E0",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_fv4sf"))
-  (set_attr "length" "8")])
-
-;;
-;; [vstrwq_p_s vstrwq_p_u]
-;;
-(define_insn "mve_vstrwq_p_<supf>v4si"
-  [(set (match_operand:V4SI 0 "mve_memory_operand" "=Ux")
-	(unspec:V4SI
-	 [(match_operand:V4SI 1 "s_register_operand" "w")
-	  (match_operand:V4BI 2 "vpr_register_operand" "Up")
-	  (match_dup 0)]
-	 VSTRWQ))]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[2];
-   int regno = REGNO (operands[1]);
-   ops[1] = gen_rtx_REG (TImode, regno);
-   ops[0]  = operands[0];
-   output_asm_insn ("vpst\;vstrwt.32\t%q1, %E0",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_<supf>v4si"))
-  (set_attr "length" "8")])
-
-;;
-;; [vstrwq_s vstrwq_u]
-;;
-(define_insn "mve_vstrwq_<supf>v4si"
-  [(set (match_operand:V4SI 0 "mve_memory_operand" "=Ux")
-	(unspec:V4SI [(match_operand:V4SI 1 "s_register_operand" "w")]
-	 VSTRWQ))
-  ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[2];
-   int regno = REGNO (operands[1]);
-   ops[1] = gen_rtx_REG (TImode, regno);
-   ops[0]  = operands[0];
-   output_asm_insn ("vstrw.32\t%q1, %E0",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_<supf>v4si"))
-  (set_attr "length" "4")])
-
-(define_expand "@mve_vst1q_f<mode>"
-  [(match_operand:<MVE_CNVT> 0 "mve_memory_operand")
-   (unspec:<MVE_CNVT> [(match_operand:MVE_0 1 "s_register_operand")] VST1Q_F)
-  ]
-  "TARGET_HAVE_MVE || TARGET_HAVE_MVE_FLOAT"
-{
-  emit_insn (gen_mve_vstr<V_sz_elem1>q_f<mode>(operands[0],operands[1]));
-  DONE;
-})
-
-(define_expand "@mve_vst1q_<supf><mode>"
-  [(match_operand:MVE_2 0 "mve_memory_operand")
-   (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand")] VST1Q)
-  ]
-  "TARGET_HAVE_MVE"
-{
-  emit_insn (gen_mve_vstr<V_sz_elem1>q_<supf><mode>(operands[0],operands[1]));
-  DONE;
-})
-
-;;
-;; [vstrdq_scatter_base_p_s vstrdq_scatter_base_p_u]
-;;
-(define_insn "mve_vstrdq_scatter_base_p_<supf>v2di"
-  [(set (mem:BLK (scratch))
-	(unspec:BLK
-		[(match_operand:V2DI 0 "s_register_operand" "w")
-		 (match_operand:SI 1 "mve_vldrd_immediate" "Ri")
-		 (match_operand:V2DI 2 "s_register_operand" "w")
-		 (match_operand:V2QI 3 "vpr_register_operand" "Up")]
-	 VSTRDSBQ))
-  ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[3];
-   ops[0] = operands[0];
-   ops[1] = operands[1];
-   ops[2] = operands[2];
-   output_asm_insn ("vpst\;\tvstrdt.u64\t%q2, [%q0, %1]",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrdq_scatter_base_<supf>v2di"))
-  (set_attr "length" "8")])
-
-;;
-;; [vstrdq_scatter_base_s vstrdq_scatter_base_u]
-;;
-(define_insn "mve_vstrdq_scatter_base_<supf>v2di"
-  [(set (mem:BLK (scratch))
-	(unspec:BLK
-		[(match_operand:V2DI 0 "s_register_operand" "=w")
-		 (match_operand:SI 1 "mve_vldrd_immediate" "Ri")
-		 (match_operand:V2DI 2 "s_register_operand" "w")]
-	 VSTRDSBQ))
-  ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[3];
-   ops[0] = operands[0];
-   ops[1] = operands[1];
-   ops[2] = operands[2];
-   output_asm_insn ("vstrd.u64\t%q2, [%q0, %1]",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrdq_scatter_base_<supf>v2di"))
-  (set_attr "length" "4")])
-
-;;
-;; [vstrdq_scatter_offset_p_s vstrdq_scatter_offset_p_u]
-;;
-(define_expand "mve_vstrdq_scatter_offset_p_<supf>v2di"
-  [(match_operand:V2DI 0 "mve_scatter_memory")
-   (match_operand:V2DI 1 "s_register_operand")
-   (match_operand:V2DI 2 "s_register_operand")
-   (match_operand:V2QI 3 "vpr_register_operand")
-   (unspec:V4SI [(const_int 0)] VSTRDSOQ)]
-  "TARGET_HAVE_MVE"
-{
-  rtx ind = XEXP (operands[0], 0);
-  gcc_assert (REG_P (ind));
-  emit_insn (gen_mve_vstrdq_scatter_offset_p_<supf>v2di_insn (ind, operands[1],
-							      operands[2],
-							      operands[3]));
-  DONE;
-})
-
-(define_insn "mve_vstrdq_scatter_offset_p_<supf>v2di_insn"
-  [(set (mem:BLK (scratch))
-	(unspec:BLK
-	  [(match_operand:SI 0 "register_operand" "r")
-	   (match_operand:V2DI 1 "s_register_operand" "w")
-	   (match_operand:V2DI 2 "s_register_operand" "w")
-	   (match_operand:V2QI 3 "vpr_register_operand" "Up")]
-	  VSTRDSOQ))]
-  "TARGET_HAVE_MVE"
-  "vpst\;vstrdt.64\t%q2, [%0, %q1]"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrdq_scatter_offset_<supf>v2di_insn"))
-  (set_attr "length" "8")])
-
-;;
-;; [vstrdq_scatter_offset_s vstrdq_scatter_offset_u]
-;;
-(define_expand "mve_vstrdq_scatter_offset_<supf>v2di"
-  [(match_operand:V2DI 0 "mve_scatter_memory")
-   (match_operand:V2DI 1 "s_register_operand")
-   (match_operand:V2DI 2 "s_register_operand")
-   (unspec:V4SI [(const_int 0)] VSTRDSOQ)]
-  "TARGET_HAVE_MVE"
-{
-  rtx ind = XEXP (operands[0], 0);
-  gcc_assert (REG_P (ind));
-  emit_insn (gen_mve_vstrdq_scatter_offset_<supf>v2di_insn (ind, operands[1],
-							    operands[2]));
-  DONE;
-})
-
-(define_insn "mve_vstrdq_scatter_offset_<supf>v2di_insn"
-  [(set (mem:BLK (scratch))
-	(unspec:BLK
-	  [(match_operand:SI 0 "register_operand" "r")
-	   (match_operand:V2DI 1 "s_register_operand" "w")
-	   (match_operand:V2DI 2 "s_register_operand" "w")]
-	  VSTRDSOQ))]
-  "TARGET_HAVE_MVE"
-  "vstrd.64\t%q2, [%0, %q1]"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrdq_scatter_offset_<supf>v2di_insn"))
-  (set_attr "length" "4")])
-
-;;
-;; [vstrdq_scatter_shifted_offset_p_s vstrdq_scatter_shifted_offset_p_u]
-;;
-(define_expand "mve_vstrdq_scatter_shifted_offset_p_<supf>v2di"
-  [(match_operand:V2DI 0 "mve_scatter_memory")
-   (match_operand:V2DI 1 "s_register_operand")
-   (match_operand:V2DI 2 "s_register_operand")
-   (match_operand:V2QI 3 "vpr_register_operand")
-   (unspec:V4SI [(const_int 0)] VSTRDSSOQ)]
-  "TARGET_HAVE_MVE"
-{
-  rtx ind = XEXP (operands[0], 0);
-  gcc_assert (REG_P (ind));
-  emit_insn (
-    gen_mve_vstrdq_scatter_shifted_offset_p_<supf>v2di_insn (ind, operands[1],
-							     operands[2],
-							     operands[3]));
-  DONE;
-})
-
-(define_insn "mve_vstrdq_scatter_shifted_offset_p_<supf>v2di_insn"
-  [(set (mem:BLK (scratch))
-	(unspec:BLK
-	  [(match_operand:SI 0 "register_operand" "r")
-	   (match_operand:V2DI 1 "s_register_operand" "w")
-	   (match_operand:V2DI 2 "s_register_operand" "w")
-	   (match_operand:V2QI 3 "vpr_register_operand" "Up")]
-	  VSTRDSSOQ))]
-  "TARGET_HAVE_MVE"
-  "vpst\;vstrdt.64\t%q2, [%0, %q1, uxtw #3]"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrdq_scatter_shifted_offset_<supf>v2di_insn"))
-  (set_attr "length" "8")])
-
-;;
-;; [vstrdq_scatter_shifted_offset_s vstrdq_scatter_shifted_offset_u]
-;;
-(define_expand "mve_vstrdq_scatter_shifted_offset_<supf>v2di"
-  [(match_operand:V2DI 0 "mve_scatter_memory")
-   (match_operand:V2DI 1 "s_register_operand")
-   (match_operand:V2DI 2 "s_register_operand")
-   (unspec:V4SI [(const_int 0)] VSTRDSSOQ)]
-  "TARGET_HAVE_MVE"
-{
-  rtx ind = XEXP (operands[0], 0);
-  gcc_assert (REG_P (ind));
-  emit_insn (
-    gen_mve_vstrdq_scatter_shifted_offset_<supf>v2di_insn (ind, operands[1],
-							   operands[2]));
-  DONE;
-})
-
-(define_insn "mve_vstrdq_scatter_shifted_offset_<supf>v2di_insn"
-  [(set (mem:BLK (scratch))
-	(unspec:BLK
-	  [(match_operand:SI 0 "register_operand" "r")
-	   (match_operand:V2DI 1 "s_register_operand" "w")
-	   (match_operand:V2DI 2 "s_register_operand" "w")]
-	  VSTRDSSOQ))]
-  "TARGET_HAVE_MVE"
-  "vstrd.64\t%q2, [%0, %q1, uxtw #3]"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrdq_scatter_shifted_offset_<supf>v2di_insn"))
-  (set_attr "length" "4")])
-
-;;
-;; [vstrhq_scatter_offset_f]
-;;
-(define_expand "mve_vstrhq_scatter_offset_fv8hf"
-  [(match_operand:V8HI 0 "mve_scatter_memory")
-   (match_operand:V8HI 1 "s_register_operand")
-   (match_operand:V8HF 2 "s_register_operand")
-   (unspec:V4SI [(const_int 0)] VSTRHQSO_F)]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-  rtx ind = XEXP (operands[0], 0);
-  gcc_assert (REG_P (ind));
-  emit_insn (gen_mve_vstrhq_scatter_offset_fv8hf_insn (ind, operands[1],
-						       operands[2]));
-  DONE;
-})
-
-(define_insn "mve_vstrhq_scatter_offset_fv8hf_insn"
-  [(set (mem:BLK (scratch))
-	(unspec:BLK
-	  [(match_operand:SI 0 "register_operand" "r")
-	   (match_operand:V8HI 1 "s_register_operand" "w")
-	   (match_operand:V8HF 2 "s_register_operand" "w")]
-	  VSTRHQSO_F))]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vstrh.16\t%q2, [%0, %q1]"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrhq_scatter_offset_fv8hf_insn"))
-  (set_attr "length" "4")])
-
-;;
-;; [vstrhq_scatter_offset_p_f]
-;;
-(define_expand "mve_vstrhq_scatter_offset_p_fv8hf"
-  [(match_operand:V8HI 0 "mve_scatter_memory")
-   (match_operand:V8HI 1 "s_register_operand")
-   (match_operand:V8HF 2 "s_register_operand")
-   (match_operand:V8BI 3 "vpr_register_operand")
-   (unspec:V4SI [(const_int 0)] VSTRHQSO_F)]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-  rtx ind = XEXP (operands[0], 0);
-  gcc_assert (REG_P (ind));
-  emit_insn (gen_mve_vstrhq_scatter_offset_p_fv8hf_insn (ind, operands[1],
-							 operands[2],
-							 operands[3]));
-  DONE;
-})
-
-(define_insn "mve_vstrhq_scatter_offset_p_fv8hf_insn"
-  [(set (mem:BLK (scratch))
-	(unspec:BLK
-	  [(match_operand:SI 0 "register_operand" "r")
-	   (match_operand:V8HI 1 "s_register_operand" "w")
-	   (match_operand:V8HF 2 "s_register_operand" "w")
-	   (match_operand:V8BI 3 "vpr_register_operand" "Up")]
-	  VSTRHQSO_F))]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vpst\;vstrht.16\t%q2, [%0, %q1]"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrhq_scatter_offset_fv8hf_insn"))
-  (set_attr "length" "8")])
-
-;;
 ;; [vstrhq_scatter_shifted_offset_f]
-;;
-(define_expand "mve_vstrhq_scatter_shifted_offset_fv8hf"
-  [(match_operand:V8HI 0 "memory_operand" "=Us")
-   (match_operand:V8HI 1 "s_register_operand" "w")
-   (match_operand:V8HF 2 "s_register_operand" "w")
-   (unspec:V4SI [(const_int 0)] VSTRHQSSO_F)]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-  rtx ind = XEXP (operands[0], 0);
-  gcc_assert (REG_P (ind));
-  emit_insn (gen_mve_vstrhq_scatter_shifted_offset_fv8hf_insn (ind, operands[1],
-							       operands[2]));
-  DONE;
-})
-
-(define_insn "mve_vstrhq_scatter_shifted_offset_fv8hf_insn"
-  [(set (mem:BLK (scratch))
-	(unspec:BLK
-	  [(match_operand:SI 0 "register_operand" "r")
-	   (match_operand:V8HI 1 "s_register_operand" "w")
-	   (match_operand:V8HF 2 "s_register_operand" "w")]
-	  VSTRHQSSO_F))]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vstrh.16\t%q2, [%0, %q1, uxtw #1]"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrhq_scatter_shifted_offset_fv8hf_insn"))
-  (set_attr "length" "4")])
-
-;;
-;; [vstrhq_scatter_shifted_offset_p_f]
-;;
-(define_expand "mve_vstrhq_scatter_shifted_offset_p_fv8hf"
-  [(match_operand:V8HI 0 "memory_operand" "=Us")
-   (match_operand:V8HI 1 "s_register_operand" "w")
-   (match_operand:V8HF 2 "s_register_operand" "w")
-   (match_operand:V8BI 3 "vpr_register_operand" "Up")
-   (unspec:V4SI [(const_int 0)] VSTRHQSSO_F)]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-  rtx ind = XEXP (operands[0], 0);
-  gcc_assert (REG_P (ind));
-  emit_insn (
-    gen_mve_vstrhq_scatter_shifted_offset_p_fv8hf_insn (ind, operands[1],
-							operands[2],
-							operands[3]));
-  DONE;
-})
-
-(define_insn "mve_vstrhq_scatter_shifted_offset_p_fv8hf_insn"
-  [(set (mem:BLK (scratch))
-	(unspec:BLK
-	  [(match_operand:SI 0 "register_operand" "r")
-	   (match_operand:V8HI 1 "s_register_operand" "w")
-	   (match_operand:V8HF 2 "s_register_operand" "w")
-	   (match_operand:V8BI 3 "vpr_register_operand" "Up")]
-	  VSTRHQSSO_F))]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vpst\;vstrht.16\t%q2, [%0, %q1, uxtw #1]"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrhq_scatter_shifted_offset_fv8hf_insn"))
-  (set_attr "length" "8")])
-
-;;
-;; [vstrwq_scatter_base_f]
-;;
-(define_insn "mve_vstrwq_scatter_base_fv4sf"
-  [(set (mem:BLK (scratch))
-	(unspec:BLK
-		[(match_operand:V4SI 0 "s_register_operand" "w")
-		 (match_operand:SI 1 "immediate_operand" "i")
-		 (match_operand:V4SF 2 "s_register_operand" "w")]
-	 VSTRWQSB_F))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-   rtx ops[3];
-   ops[0] = operands[0];
-   ops[1] = operands[1];
-   ops[2] = operands[2];
-   output_asm_insn ("vstrw.u32\t%q2, [%q0, %1]",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_base_fv4sf"))
-  (set_attr "length" "4")])
-
-;;
-;; [vstrwq_scatter_base_p_f]
-;;
-(define_insn "mve_vstrwq_scatter_base_p_fv4sf"
-  [(set (mem:BLK (scratch))
-	(unspec:BLK
-		[(match_operand:V4SI 0 "s_register_operand" "w")
-		 (match_operand:SI 1 "immediate_operand" "i")
-		 (match_operand:V4SF 2 "s_register_operand" "w")
-		 (match_operand:V4BI 3 "vpr_register_operand" "Up")]
-	 VSTRWQSB_F))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-   rtx ops[3];
-   ops[0] = operands[0];
-   ops[1] = operands[1];
-   ops[2] = operands[2];
-   output_asm_insn ("vpst\n\tvstrwt.u32\t%q2, [%q0, %1]",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_base_fv4sf"))
-  (set_attr "length" "8")])
-
-;;
-;; [vstrwq_scatter_offset_f]
-;;
-(define_expand "mve_vstrwq_scatter_offset_fv4sf"
-  [(match_operand:V4SI 0 "mve_scatter_memory")
-   (match_operand:V4SI 1 "s_register_operand")
-   (match_operand:V4SF 2 "s_register_operand")
-   (unspec:V4SI [(const_int 0)] VSTRWQSO_F)]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-  rtx ind = XEXP (operands[0], 0);
-  gcc_assert (REG_P (ind));
-  emit_insn (gen_mve_vstrwq_scatter_offset_fv4sf_insn (ind, operands[1],
-						       operands[2]));
-  DONE;
-})
-
-(define_insn "mve_vstrwq_scatter_offset_fv4sf_insn"
+;; [vstrwq_scatter_shifted_offset_s vstrwq_scatter_shifted_offset_u]
+;; [vstrwq_scatter_shifted_offset_f]
+;; [vstrdq_scatter_shifted_offset_s vstrdq_scatter_shifted_offset_u]
+(define_insn "@mve_vstrq_scatter_shifted_offset_<mode>"
   [(set (mem:BLK (scratch))
 	(unspec:BLK
 	  [(match_operand:SI 0 "register_operand" "r")
-	   (match_operand:V4SI 1 "s_register_operand" "w")
-	   (match_operand:V4SF 2 "s_register_operand" "w")]
-	  VSTRWQSO_F))]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vstrw.32\t%q2, [%0, %q1]"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_offset_fv4sf_insn"))
+	   (match_operand:<MVE_scatter_offset> 1 "s_register_operand" "w")
+	   (match_operand:MVE_VLD_ST_scatter_shifted 2 "s_register_operand" "w")]
+	  VSTRSSOQ))]
+  "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
+   || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))"
+  "vstr<MVE_elem_ch>.<V_sz_elem>\t%q2, [%0, %q1, uxtw #<MVE_scatter_shift>]"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrq_scatter_shifted_offset_<mode>"))
   (set_attr "length" "4")])
 
+;; Truncating vector scatter stores with shifted offset
 ;;
-;; [vstrwq_scatter_offset_p_f]
-;;
-(define_expand "mve_vstrwq_scatter_offset_p_fv4sf"
-  [(match_operand:V4SI 0 "mve_scatter_memory")
-   (match_operand:V4SI 1 "s_register_operand")
-   (match_operand:V4SF 2 "s_register_operand")
-   (match_operand:V4BI 3 "vpr_register_operand")
-   (unspec:V4SI [(const_int 0)] VSTRWQSO_F)]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-  rtx ind = XEXP (operands[0], 0);
-  gcc_assert (REG_P (ind));
-  emit_insn (gen_mve_vstrwq_scatter_offset_p_fv4sf_insn (ind, operands[1],
-							 operands[2],
-							 operands[3]));
-  DONE;
-})
-
-(define_insn "mve_vstrwq_scatter_offset_p_fv4sf_insn"
+;; [vstrhq_scatter_shifted_offset_s32 vstrhq_scatter_shifted_offset_u32]
+(define_insn "mve_vstrq_truncate_scatter_shifted_offset_v4si"
   [(set (mem:BLK (scratch))
 	(unspec:BLK
 	  [(match_operand:SI 0 "register_operand" "r")
 	   (match_operand:V4SI 1 "s_register_operand" "w")
-	   (match_operand:V4SF 2 "s_register_operand" "w")
-	   (match_operand:V4BI 3 "vpr_register_operand" "Up")]
-	  VSTRWQSO_F))]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vpst\;vstrwt.32\t%q2, [%0, %q1]"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_offset_fv4sf_insn"))
-  (set_attr "length" "8")])
-
-;;
-;; [vstrwq_scatter_offset_s vstrwq_scatter_offset_u]
-;;
-(define_expand "mve_vstrwq_scatter_offset_p_<supf>v4si"
-  [(match_operand:V4SI 0 "mve_scatter_memory")
-   (match_operand:V4SI 1 "s_register_operand")
-   (match_operand:V4SI 2 "s_register_operand")
-   (match_operand:V4BI 3 "vpr_register_operand")
-   (unspec:V4SI [(const_int 0)] VSTRWSOQ)]
+	   (truncate:V4HI
+	     (match_operand:V4SI 2 "s_register_operand" "w"))]
+	  VSTRSSOQ_TRUNC))]
   "TARGET_HAVE_MVE"
-{
-  rtx ind = XEXP (operands[0], 0);
-  gcc_assert (REG_P (ind));
-  emit_insn (gen_mve_vstrwq_scatter_offset_p_<supf>v4si_insn (ind, operands[1],
-							      operands[2],
-							      operands[3]));
-  DONE;
-})
-
-(define_insn "mve_vstrwq_scatter_offset_p_<supf>v4si_insn"
-  [(set (mem:BLK (scratch))
-	(unspec:BLK
-	  [(match_operand:SI 0 "register_operand" "r")
-	   (match_operand:V4SI 1 "s_register_operand" "w")
-	   (match_operand:V4SI 2 "s_register_operand" "w")
-	   (match_operand:V4BI 3 "vpr_register_operand" "Up")]
-	  VSTRWSOQ))]
-  "TARGET_HAVE_MVE"
-  "vpst\;vstrwt.32\t%q2, [%0, %q1]"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_offset_<supf>v4si_insn"))
-  (set_attr "length" "8")])
-
-;;
-;; [vstrwq_scatter_offset_s vstrwq_scatter_offset_u]
-;;
-(define_expand "mve_vstrwq_scatter_offset_<supf>v4si"
-  [(match_operand:V4SI 0 "mve_scatter_memory")
-   (match_operand:V4SI 1 "s_register_operand")
-   (match_operand:V4SI 2 "s_register_operand")
-   (unspec:V4SI [(const_int 0)] VSTRWSOQ)]
-  "TARGET_HAVE_MVE"
-{
-  rtx ind = XEXP (operands[0], 0);
-  gcc_assert (REG_P (ind));
-  emit_insn (gen_mve_vstrwq_scatter_offset_<supf>v4si_insn (ind, operands[1],
-							    operands[2]));
-  DONE;
-})
-
-(define_insn "mve_vstrwq_scatter_offset_<supf>v4si_insn"
-  [(set (mem:BLK (scratch))
-	(unspec:BLK
-	  [(match_operand:SI 0 "register_operand" "r")
-	   (match_operand:V4SI 1 "s_register_operand" "w")
-	   (match_operand:V4SI 2 "s_register_operand" "w")]
-	  VSTRWSOQ))]
-  "TARGET_HAVE_MVE"
-  "vstrw.32\t%q2, [%0, %q1]"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_offset_<supf>v4si_insn"))
+  "vstrh.32\t%q2, [%0, %q1, uxtw #1]"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrq_truncate_scatter_shifted_offset_v4si"))
   (set_attr "length" "4")])
 
+;; Predicated vector scatter stores with shifted offset
 ;;
-;; [vstrwq_scatter_shifted_offset_f]
-;;
-(define_expand "mve_vstrwq_scatter_shifted_offset_fv4sf"
-  [(match_operand:V4SI 0 "mve_scatter_memory")
-   (match_operand:V4SI 1 "s_register_operand")
-   (match_operand:V4SF 2 "s_register_operand")
-   (unspec:V4SI [(const_int 0)] VSTRWQSSO_F)]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-  rtx ind = XEXP (operands[0], 0);
-  gcc_assert (REG_P (ind));
-  emit_insn (gen_mve_vstrwq_scatter_shifted_offset_fv4sf_insn (ind, operands[1],
-							       operands[2]));
-  DONE;
-})
-
-(define_insn "mve_vstrwq_scatter_shifted_offset_fv4sf_insn"
-  [(set (mem:BLK (scratch))
-	(unspec:BLK
-	  [(match_operand:SI 0 "register_operand" "r")
-	   (match_operand:V4SI 1 "s_register_operand" "w")
-	   (match_operand:V4SF 2 "s_register_operand" "w")]
-	 VSTRWQSSO_F))]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vstrw.32\t%q2, [%0, %q1, uxtw #2]"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_shifted_offset_fv4sf_insn"))
-  (set_attr "length" "8")])
-
-;;
+;; [vstrhq_scatter_shifted_offset_p_s vstrhq_scatter_shifted_offset_p_u]
+;; [vstrhq_scatter_shifted_offset_p_f]
+;; [vstrwq_scatter_shifted_offset_p_s vstrwq_scatter_shifted_offset_p_u]
 ;; [vstrwq_scatter_shifted_offset_p_f]
+;; [vstrdq_scatter_shifted_offset_p_s vstrdq_scatter_shifted_offset_p_u]
 ;;
-(define_expand "mve_vstrwq_scatter_shifted_offset_p_fv4sf"
-  [(match_operand:V4SI 0 "mve_scatter_memory")
-   (match_operand:V4SI 1 "s_register_operand")
-   (match_operand:V4SF 2 "s_register_operand")
-   (match_operand:V4BI 3 "vpr_register_operand")
-   (unspec:V4SI [(const_int 0)] VSTRWQSSO_F)]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-  rtx ind = XEXP (operands[0], 0);
-  gcc_assert (REG_P (ind));
-  emit_insn (
-    gen_mve_vstrwq_scatter_shifted_offset_p_fv4sf_insn (ind, operands[1],
-							operands[2],
-							operands[3]));
-  DONE;
-})
-
-(define_insn "mve_vstrwq_scatter_shifted_offset_p_fv4sf_insn"
+(define_insn "@mve_vstrq_scatter_shifted_offset_p_<mode>"
   [(set (mem:BLK (scratch))
 	(unspec:BLK
 	  [(match_operand:SI 0 "register_operand" "r")
-	   (match_operand:V4SI 1 "s_register_operand" "w")
-	   (match_operand:V4SF 2 "s_register_operand" "w")
-	   (match_operand:V4BI 3 "vpr_register_operand" "Up")]
-	  VSTRWQSSO_F))]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-  "vpst\;vstrwt.32\t%q2, [%0, %q1, uxtw #2]"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_shifted_offset_fv4sf_insn"))
+	   (match_operand:<MVE_scatter_offset> 1 "s_register_operand" "w")
+	   (match_operand:MVE_VLD_ST_scatter_shifted 2 "s_register_operand" "w")
+	   (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
+	  VSTRSSOQ_P))]
+  "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
+   || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))"
+  "vpst\;vstr<MVE_elem_ch>t.<V_sz_elem>\t%q2, [%0, %q1, uxtw #<MVE_scatter_shift>]"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrq_scatter_shifted_offset_<mode>"))
   (set_attr "length" "8")])
 
+;; Predicated truncating vector scatter stores with shifted offset
 ;;
-;; [vstrwq_scatter_shifted_offset_p_s vstrwq_scatter_shifted_offset_p_u]
-;;
-(define_expand "mve_vstrwq_scatter_shifted_offset_p_<supf>v4si"
-  [(match_operand:V4SI 0 "mve_scatter_memory")
-   (match_operand:V4SI 1 "s_register_operand")
-   (match_operand:V4SI 2 "s_register_operand")
-   (match_operand:V4BI 3 "vpr_register_operand")
-   (unspec:V4SI [(const_int 0)] VSTRWSSOQ)]
-  "TARGET_HAVE_MVE"
-{
-  rtx ind = XEXP (operands[0], 0);
-  gcc_assert (REG_P (ind));
-  emit_insn (
-    gen_mve_vstrwq_scatter_shifted_offset_p_<supf>v4si_insn (ind, operands[1],
-							     operands[2],
-							     operands[3]));
-  DONE;
-})
-
-(define_insn "mve_vstrwq_scatter_shifted_offset_p_<supf>v4si_insn"
+;; [vstrhq_scatter_shifted_offset_p_s32 vstrhq_scatter_shifted_offset_p_u32]
+(define_insn "mve_vstrq_truncate_scatter_shifted_offset_p_v4si"
   [(set (mem:BLK (scratch))
 	(unspec:BLK
 	  [(match_operand:SI 0 "register_operand" "r")
 	   (match_operand:V4SI 1 "s_register_operand" "w")
-	   (match_operand:V4SI 2 "s_register_operand" "w")
-	   (match_operand:V4BI 3 "vpr_register_operand" "Up")]
-	  VSTRWSSOQ))]
+	   (truncate:V4HI
+	     (match_operand:V4SI 2 "s_register_operand" "w"))
+	   (match_operand:<MVE_VPRED> 3 "vpr_register_operand" "Up")]
+	  VSTRSSOQ_TRUNC_P))]
   "TARGET_HAVE_MVE"
-  "vpst\;vstrwt.32\t%q2, [%0, %q1, uxtw #2]"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_shifted_offset_<supf>v4si_insn"))
+  "vpst\;vstrht.32\t%q2, [%0, %q1, uxtw #1]"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrq_truncate_scatter_shifted_offset_v4si"))
   (set_attr "length" "8")])
 
 ;;
-;; [vstrwq_scatter_shifted_offset_s vstrwq_scatter_shifted_offset_u]
 ;;
-(define_expand "mve_vstrwq_scatter_shifted_offset_<supf>v4si"
-  [(match_operand:V4SI 0 "mve_scatter_memory")
-   (match_operand:V4SI 1 "s_register_operand")
-   (match_operand:V4SI 2 "s_register_operand")
-   (unspec:V4SI [(const_int 0)] VSTRWSSOQ)]
-  "TARGET_HAVE_MVE"
-{
-  rtx ind = XEXP (operands[0], 0);
-  gcc_assert (REG_P (ind));
-  emit_insn (
-    gen_mve_vstrwq_scatter_shifted_offset_<supf>v4si_insn (ind, operands[1],
-							   operands[2]));
-  DONE;
-})
-
-(define_insn "mve_vstrwq_scatter_shifted_offset_<supf>v4si_insn"
-  [(set (mem:BLK (scratch))
-	(unspec:BLK
-	  [(match_operand:SI 0 "register_operand" "r")
-	   (match_operand:V4SI 1 "s_register_operand" "w")
-	   (match_operand:V4SI 2 "s_register_operand" "w")]
-	  VSTRWSSOQ))]
-  "TARGET_HAVE_MVE"
-  "vstrw.32\t%q2, [%0, %q1, uxtw #2]"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_shifted_offset_<supf>v4si_insn"))
-  (set_attr "length" "4")])
-
+;; [vddupq_u_insn, vidupq_u_insn]
 ;;
-;; [vidupq_n_u])
-;;
-(define_expand "mve_vidupq_n_u<mode>"
- [(match_operand:MVE_2 0 "s_register_operand")
-  (match_operand:SI 1 "s_register_operand")
-  (match_operand:SI 2 "mve_imm_selective_upto_8")]
- "TARGET_HAVE_MVE"
-{
-  rtx temp = gen_reg_rtx (SImode);
-  emit_move_insn (temp, operands[1]);
-  rtx inc = gen_int_mode (INTVAL(operands[2]) * <MVE_LANES>, SImode);
-  emit_insn (gen_mve_vidupq_u<mode>_insn (operands[0], temp, operands[1],
-					  operands[2], inc));
-  DONE;
-})
-
-;;
-;; [vidupq_u_insn])
-;;
-(define_insn "mve_vidupq_u<mode>_insn"
+(define_insn "@mve_<mve_insn>q_u<mode>_insn"
  [(set (match_operand:MVE_2 0 "s_register_operand" "=w")
        (unspec:MVE_2 [(match_operand:SI 2 "s_register_operand" "1")
 		      (match_operand:SI 3 "mve_imm_selective_upto_8" "Rg")]
-	 VIDUPQ))
+	VIDDUPQ))
   (set (match_operand:SI 1 "s_register_operand" "=Te")
-       (plus:SI (match_dup 2)
-		(match_operand:SI 4 "immediate_operand" "i")))]
+       (<viddupq_op>:SI (match_dup 2)
+			(match_operand:SI 4 "immediate_operand" "i")))]
  "TARGET_HAVE_MVE"
- "vidup.u%#<V_sz_elem>\t%q0, %1, %3")
+ "<mve_insn>.u%#<V_sz_elem>\t%q0, %1, %3")
 
 ;;
-;; [vidupq_m_n_u])
+;; [vddupq_m_wb_u_insn, vidupq_m_wb_u_insn]
 ;;
-(define_expand "mve_vidupq_m_n_u<mode>"
-  [(match_operand:MVE_2 0 "s_register_operand")
-   (match_operand:MVE_2 1 "s_register_operand")
-   (match_operand:SI 2 "s_register_operand")
-   (match_operand:SI 3 "mve_imm_selective_upto_8")
-   (match_operand:<MVE_VPRED> 4 "vpr_register_operand")]
-  "TARGET_HAVE_MVE"
-{
-  rtx temp = gen_reg_rtx (SImode);
-  emit_move_insn (temp, operands[2]);
-  rtx inc = gen_int_mode (INTVAL(operands[3]) * <MVE_LANES>, SImode);
-  emit_insn (gen_mve_vidupq_m_wb_u<mode>_insn(operands[0], operands[1], temp,
-					     operands[2], operands[3],
-					     operands[4], inc));
-  DONE;
-})
-
-;;
-;; [vidupq_m_wb_u_insn])
-;;
-(define_insn "mve_vidupq_m_wb_u<mode>_insn"
+(define_insn "@mve_<mve_insn>q_m_wb_u<mode>_insn"
  [(set (match_operand:MVE_2 0 "s_register_operand" "=w")
        (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
 		      (match_operand:SI 3 "s_register_operand" "2")
 		      (match_operand:SI 4 "mve_imm_selective_upto_8" "Rg")
 		      (match_operand:<MVE_VPRED> 5 "vpr_register_operand" "Up")]
-	VIDUPQ_M))
+	VIDDUPQ_M))
   (set (match_operand:SI 2 "s_register_operand" "=Te")
-       (plus:SI (match_dup 3)
-		(match_operand:SI 6 "immediate_operand" "i")))]
+       (<viddupq_m_op>:SI (match_dup 3)
+			  (match_operand:SI 6 "immediate_operand" "i")))]
  "TARGET_HAVE_MVE"
- "vpst\;\tvidupt.u%#<V_sz_elem>\t%q0, %2, %4"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vidupq_u<mode>_insn"))
+ "vpst\;<mve_insn>t.u%#<V_sz_elem>\t%q0, %2, %4"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_u<mode>_insn"))
   (set_attr "length""8")])
 
 ;;
-;; [vddupq_n_u])
-;;
-(define_expand "mve_vddupq_n_u<mode>"
- [(match_operand:MVE_2 0 "s_register_operand")
-  (match_operand:SI 1 "s_register_operand")
-  (match_operand:SI 2 "mve_imm_selective_upto_8")]
- "TARGET_HAVE_MVE"
-{
-  rtx temp = gen_reg_rtx (SImode);
-  emit_move_insn (temp, operands[1]);
-  rtx inc = gen_int_mode (INTVAL(operands[2]) * <MVE_LANES>, SImode);
-  emit_insn (gen_mve_vddupq_u<mode>_insn (operands[0], temp, operands[1],
-					  operands[2], inc));
-  DONE;
-})
-
-;;
-;; [vddupq_u_insn])
-;;
-(define_insn "mve_vddupq_u<mode>_insn"
- [(set (match_operand:MVE_2 0 "s_register_operand" "=w")
-       (unspec:MVE_2 [(match_operand:SI 2 "s_register_operand" "1")
-		      (match_operand:SI 3 "immediate_operand" "i")]
-	VDDUPQ))
-  (set (match_operand:SI 1 "s_register_operand" "=Te")
-       (minus:SI (match_dup 2)
-		 (match_operand:SI 4 "immediate_operand" "i")))]
- "TARGET_HAVE_MVE"
- "vddup.u%#<V_sz_elem>\t%q0, %1, %3")
-
-;;
-;; [vddupq_m_n_u])
+;; [vdwdupq_wb_u_insn, viwdupq_wb_u_insn]
 ;;
-(define_expand "mve_vddupq_m_n_u<mode>"
-  [(match_operand:MVE_2 0 "s_register_operand")
-   (match_operand:MVE_2 1 "s_register_operand")
-   (match_operand:SI 2 "s_register_operand")
-   (match_operand:SI 3 "mve_imm_selective_upto_8")
-   (match_operand:<MVE_VPRED> 4 "vpr_register_operand")]
-  "TARGET_HAVE_MVE"
-{
-  rtx temp = gen_reg_rtx (SImode);
-  emit_move_insn (temp, operands[2]);
-  rtx inc = gen_int_mode (INTVAL(operands[3]) * <MVE_LANES>, SImode);
-  emit_insn (gen_mve_vddupq_m_wb_u<mode>_insn(operands[0], operands[1], temp,
-					     operands[2], operands[3],
-					     operands[4], inc));
-  DONE;
-})
-
-;;
-;; [vddupq_m_wb_u_insn])
-;;
-(define_insn "mve_vddupq_m_wb_u<mode>_insn"
- [(set (match_operand:MVE_2 0 "s_register_operand" "=w")
-       (unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
-		      (match_operand:SI 3 "s_register_operand" "2")
-		      (match_operand:SI 4 "mve_imm_selective_upto_8" "Rg")
-		      (match_operand:<MVE_VPRED> 5 "vpr_register_operand" "Up")]
-	VDDUPQ_M))
-  (set (match_operand:SI 2 "s_register_operand" "=Te")
-       (minus:SI (match_dup 3)
-		 (match_operand:SI 6 "immediate_operand" "i")))]
- "TARGET_HAVE_MVE"
- "vpst\;vddupt.u%#<V_sz_elem>\t%q0, %2, %4"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vddupq_u<mode>_insn"))
-  (set_attr "length""8")])
-
-;;
-;; [vdwdupq_n_u])
-;;
-(define_expand "mve_vdwdupq_n_u<mode>"
- [(match_operand:MVE_2 0 "s_register_operand")
-  (match_operand:SI 1 "s_register_operand")
-  (match_operand:DI 2 "s_register_operand")
-  (match_operand:SI 3 "mve_imm_selective_upto_8")]
- "TARGET_HAVE_MVE"
-{
-  rtx ignore_wb = gen_reg_rtx (SImode);
-  emit_insn (gen_mve_vdwdupq_wb_u<mode>_insn (operands[0], ignore_wb,
-					      operands[1], operands[2],
-					      operands[3]));
-  DONE;
-})
-
-;;
-;; [vdwdupq_wb_u])
-;;
-(define_expand "mve_vdwdupq_wb_u<mode>"
- [(match_operand:SI 0 "s_register_operand")
-  (match_operand:SI 1 "s_register_operand")
-  (match_operand:DI 2 "s_register_operand")
-  (match_operand:SI 3 "mve_imm_selective_upto_8")
-  (unspec:MVE_2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
- "TARGET_HAVE_MVE"
-{
-  rtx ignore_vec = gen_reg_rtx (<MODE>mode);
-  emit_insn (gen_mve_vdwdupq_wb_u<mode>_insn (ignore_vec, operands[0],
-					      operands[1], operands[2],
-					      operands[3]));
-  DONE;
-})
-
-;;
-;; [vdwdupq_wb_u_insn])
-;;
-(define_insn "mve_vdwdupq_wb_u<mode>_insn"
+(define_insn "@mve_<mve_insn>q_wb_u<mode>_insn"
   [(set (match_operand:MVE_2 0 "s_register_operand" "=w")
 	(unspec:MVE_2 [(match_operand:SI 2 "s_register_operand" "1")
 		       (subreg:SI (match_operand:DI 3 "s_register_operand" "r") 4)
 		       (match_operand:SI 4 "mve_imm_selective_upto_8" "Rg")]
-	 VDWDUPQ))
+	 VIDWDUPQ))
    (set (match_operand:SI 1 "s_register_operand" "=Te")
 	(unspec:SI [(match_dup 2)
 		    (subreg:SI (match_dup 3) 4)
 		    (match_dup 4)]
-	 VDWDUPQ))]
+	 VIDWDUPQ))]
   "TARGET_HAVE_MVE"
-  "vdwdup.u%#<V_sz_elem>\t%q0, %2, %R3, %4"
-)
-
-;;
-;; [vdwdupq_m_n_u])
-;;
-(define_expand "mve_vdwdupq_m_n_u<mode>"
- [(match_operand:MVE_2 0 "s_register_operand")
-  (match_operand:MVE_2 1 "s_register_operand")
-  (match_operand:SI 2 "s_register_operand")
-  (match_operand:DI 3 "s_register_operand")
-  (match_operand:SI 4 "mve_imm_selective_upto_8")
-  (match_operand:<MVE_VPRED> 5 "vpr_register_operand")]
- "TARGET_HAVE_MVE"
-{
-  rtx ignore_wb = gen_reg_rtx (SImode);
-  emit_insn (gen_mve_vdwdupq_m_wb_u<mode>_insn (operands[0], ignore_wb,
-						operands[1], operands[2],
-						operands[3], operands[4],
-						operands[5]));
-  DONE;
-})
-
-;;
-;; [vdwdupq_m_wb_u])
-;;
-(define_expand "mve_vdwdupq_m_wb_u<mode>"
- [(match_operand:SI 0 "s_register_operand")
-  (match_operand:MVE_2 1 "s_register_operand")
-  (match_operand:SI 2 "s_register_operand")
-  (match_operand:DI 3 "s_register_operand")
-  (match_operand:SI 4 "mve_imm_selective_upto_8")
-  (match_operand:<MVE_VPRED> 5 "vpr_register_operand")]
- "TARGET_HAVE_MVE"
-{
-  rtx ignore_vec = gen_reg_rtx (<MODE>mode);
-  emit_insn (gen_mve_vdwdupq_m_wb_u<mode>_insn (ignore_vec, operands[0],
-						operands[1], operands[2],
-						operands[3], operands[4],
-						operands[5]));
-  DONE;
-})
+  "<mve_insn>.u%#<V_sz_elem>\t%q0, %2, %R3, %4"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_wb_u<mode>_insn"))
+  (set_attr "type" "mve_move")])
 
 ;;
-;; [vdwdupq_m_wb_u_insn])
+;; [vdwdupq_m_wb_u_insn, viwdupq_m_wb_u_insn]
 ;;
-(define_insn "mve_vdwdupq_m_wb_u<mode>_insn"
+(define_insn "@mve_<mve_insn>q_m_wb_u<mode>_insn"
   [(set (match_operand:MVE_2 0 "s_register_operand" "=w")
-	(unspec:MVE_2 [(match_operand:MVE_2 2 "s_register_operand" "0")
-		       (match_operand:SI 3 "s_register_operand" "1")
+	(unspec:MVE_2 [(match_operand:MVE_2 1 "s_register_operand" "0")
+		       (match_operand:SI 3 "s_register_operand" "2")
 		       (subreg:SI (match_operand:DI 4 "s_register_operand" "r") 4)
 		       (match_operand:SI 5 "mve_imm_selective_upto_8" "Rg")
 		       (match_operand:<MVE_VPRED> 6 "vpr_register_operand" "Up")]
-	 VDWDUPQ_M))
-   (set (match_operand:SI 1 "s_register_operand" "=Te")
-	(unspec:SI [(match_dup 2)
+	 VIDWDUPQ_M))
+   (set (match_operand:SI 2 "s_register_operand" "=Te")
+	(unspec:SI [(match_dup 1)
 		    (match_dup 3)
 		    (subreg:SI (match_dup 4) 4)
 		    (match_dup 5)
 		    (match_dup 6)]
-	 VDWDUPQ_M))
+	 VIDWDUPQ_M))
   ]
   "TARGET_HAVE_MVE"
-  "vpst\;vdwdupt.u%#<V_sz_elem>\t%q2, %3, %R4, %5"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vdwdupq_wb_u<mode>_insn"))
+  "vpst\;<mve_insn>t.u%#<V_sz_elem>\t%q1, %3, %R4, %5"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_wb_u<mode>_insn"))
   (set_attr "type" "mve_move")
   (set_attr "length""8")])
 
@@ -5562,24 +3832,6 @@
 })
 
 ;;
-;; [viwdupq_wb_u_insn])
-;;
-(define_insn "mve_viwdupq_wb_u<mode>_insn"
-  [(set (match_operand:MVE_2 0 "s_register_operand" "=w")
-	(unspec:MVE_2 [(match_operand:SI 2 "s_register_operand" "1")
-		       (subreg:SI (match_operand:DI 3 "s_register_operand" "r") 4)
-		       (match_operand:SI 4 "mve_imm_selective_upto_8" "Rg")]
-	 VIWDUPQ))
-   (set (match_operand:SI 1 "s_register_operand" "=Te")
-	(unspec:SI [(match_dup 2)
-		    (subreg:SI (match_dup 3) 4)
-		    (match_dup 4)]
-	 VIWDUPQ))]
-  "TARGET_HAVE_MVE"
-  "viwdup.u%#<V_sz_elem>\t%q0, %2, %R3, %4"
-)
-
-;;
 ;; [viwdupq_m_n_u])
 ;;
 (define_expand "mve_viwdupq_m_n_u<mode>"
@@ -5619,497 +3871,98 @@
   DONE;
 })
 
-;;
-;; [viwdupq_m_wb_u_insn])
-;;
-(define_insn "mve_viwdupq_m_wb_u<mode>_insn"
-  [(set (match_operand:MVE_2 0 "s_register_operand" "=w")
-	(unspec:MVE_2 [(match_operand:MVE_2 2 "s_register_operand" "0")
-		       (match_operand:SI 3 "s_register_operand" "1")
-		       (subreg:SI (match_operand:DI 4 "s_register_operand" "r") 4)
-		       (match_operand:SI 5 "mve_imm_selective_upto_8" "Rg")
-		       (match_operand:<MVE_VPRED> 6 "vpr_register_operand" "Up")]
-	 VIWDUPQ_M))
-   (set (match_operand:SI 1 "s_register_operand" "=Te")
-	(unspec:SI [(match_dup 2)
-		    (match_dup 3)
-		    (subreg:SI (match_dup 4) 4)
-		    (match_dup 5)
-		    (match_dup 6)]
-	 VIWDUPQ_M))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;\tviwdupt.u%#<V_sz_elem>\t%q2, %3, %R4, %5"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_viwdupq_wb_u<mode>_insn"))
-  (set_attr "type" "mve_move")
-   (set_attr "length""8")])
-
+;; Vector scatter stores with base and write-back
 ;;
 ;; [vstrwq_scatter_base_wb_s vstrwq_scatter_base_wb_u]
+;; [vstrwq_scatter_base_wb_f]
+;; [vstrdq_scatter_base_wb_s vstrdq_scatter_base_wb_u]
 ;;
-(define_insn "mve_vstrwq_scatter_base_wb_<supf>v4si"
+(define_insn "@mve_vstrq_scatter_base_wb_<mode>"
   [(set (mem:BLK (scratch))
 	(unspec:BLK
-		[(match_operand:V4SI 1 "s_register_operand" "0")
+		[(match_operand:<MVE_scatter_offset> 1 "s_register_operand" "0")
 		 (match_operand:SI 2 "mve_vldrd_immediate" "Ri")
-		 (match_operand:V4SI 3 "s_register_operand" "w")]
-	 VSTRWSBWBQ))
-   (set (match_operand:V4SI 0 "s_register_operand" "=w")
-	(unspec:V4SI [(match_dup 1) (match_dup 2)]
-	 VSTRWSBWBQ))
+		 (match_operand:MVE_4 3 "s_register_operand" "w")]
+	 VSTRSBWBQ))
+   (set (match_operand:<MVE_scatter_offset> 0 "s_register_operand" "=w")
+	(unspec:<MVE_scatter_offset> [(match_dup 1) (match_dup 2)]
+	 VSTRSBWBQ))
   ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[3];
-   ops[0] = operands[1];
-   ops[1] = operands[2];
-   ops[2] = operands[3];
-   output_asm_insn ("vstrw.u32\t%q2, [%q0, %1]!",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_base_wb_<supf>v4si"))
+  "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
+   || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))"
+  "vstr<MVE_elem_ch>.u<V_sz_elem>\t%q3, [%q1, %2]!"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrq_scatter_base_wb_<mode>"))
   (set_attr "length" "4")])
 
+;; Predicated vector scatter stores with base and write-back
 ;;
 ;; [vstrwq_scatter_base_wb_p_s vstrwq_scatter_base_wb_p_u]
-;;
-(define_insn "mve_vstrwq_scatter_base_wb_p_<supf>v4si"
- [(set (mem:BLK (scratch))
-       (unspec:BLK
-		[(match_operand:V4SI 1 "s_register_operand" "0")
-		 (match_operand:SI 2 "mve_vldrd_immediate" "Ri")
-		 (match_operand:V4SI 3 "s_register_operand" "w")
-		 (match_operand:V4BI 4 "vpr_register_operand" "Up")]
-	VSTRWSBWBQ))
-   (set (match_operand:V4SI 0 "s_register_operand" "=w")
-	(unspec:V4SI [(match_dup 1) (match_dup 2)]
-	 VSTRWSBWBQ))
-  ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[3];
-   ops[0] = operands[1];
-   ops[1] = operands[2];
-   ops[2] = operands[3];
-   output_asm_insn ("vpst\;\tvstrwt.u32\t%q2, [%q0, %1]!",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_base_wb_<supf>v4si"))
-  (set_attr "length" "8")])
-
-;;
-;; [vstrwq_scatter_base_wb_f]
-;;
-(define_insn "mve_vstrwq_scatter_base_wb_fv4sf"
- [(set (mem:BLK (scratch))
-       (unspec:BLK
-		[(match_operand:V4SI 1 "s_register_operand" "0")
-		 (match_operand:SI 2 "mve_vldrd_immediate" "Ri")
-		 (match_operand:V4SF 3 "s_register_operand" "w")]
-	 VSTRWQSBWB_F))
-   (set (match_operand:V4SI 0 "s_register_operand" "=w")
-	(unspec:V4SI [(match_dup 1) (match_dup 2)]
-	 VSTRWQSBWB_F))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-   rtx ops[3];
-   ops[0] = operands[1];
-   ops[1] = operands[2];
-   ops[2] = operands[3];
-   output_asm_insn ("vstrw.u32\t%q2, [%q0, %1]!",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_base_wb_fv4sf"))
-  (set_attr "length" "4")])
-
-;;
 ;; [vstrwq_scatter_base_wb_p_f]
+;; [vstrdq_scatter_base_wb_p_s vstrdq_scatter_base_wb_p_u]
 ;;
-(define_insn "mve_vstrwq_scatter_base_wb_p_fv4sf"
+(define_insn "@mve_vstrq_scatter_base_wb_p_<mode>"
  [(set (mem:BLK (scratch))
        (unspec:BLK
-		[(match_operand:V4SI 1 "s_register_operand" "0")
-		 (match_operand:SI 2 "mve_vstrw_immediate" "Rl")
-		 (match_operand:V4SF 3 "s_register_operand" "w")
-		 (match_operand:V4BI 4 "vpr_register_operand" "Up")]
-	VSTRWQSBWB_F))
-   (set (match_operand:V4SI 0 "s_register_operand" "=w")
-	(unspec:V4SI [(match_dup 1) (match_dup 2)]
-	 VSTRWQSBWB_F))
-  ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-   rtx ops[3];
-   ops[0] = operands[1];
-   ops[1] = operands[2];
-   ops[2] = operands[3];
-   output_asm_insn ("vpst\;vstrwt.u32\t%q2, [%q0, %1]!",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrwq_scatter_base_wb_fv4sf"))
-  (set_attr "length" "8")])
-
-;;
-;; [vstrdq_scatter_base_wb_s vstrdq_scatter_base_wb_u]
-;;
-(define_insn "mve_vstrdq_scatter_base_wb_<supf>v2di"
-  [(set (mem:BLK (scratch))
-	(unspec:BLK
-		[(match_operand:V2DI 1 "s_register_operand" "0")
+		[(match_operand:<MVE_scatter_offset> 1 "s_register_operand" "0")
 		 (match_operand:SI 2 "mve_vldrd_immediate" "Ri")
-		 (match_operand:V2DI 3 "s_register_operand" "w")]
-	 VSTRDSBWBQ))
-   (set (match_operand:V2DI 0 "s_register_operand" "=&w")
-	(unspec:V2DI [(match_dup 1) (match_dup 2)]
-	 VSTRDSBWBQ))
+		 (match_operand:MVE_4 3 "s_register_operand" "w")
+		 (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")]
+	VSTRSBWBQ_P))
+   (set (match_operand:<MVE_scatter_offset> 0 "s_register_operand" "=w")
+	(unspec:<MVE_scatter_offset> [(match_dup 1) (match_dup 2)]
+	 VSTRSBWBQ_P))
   ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[3];
-   ops[0] = operands[1];
-   ops[1] = operands[2];
-   ops[2] = operands[3];
-   output_asm_insn ("vstrd.u64\t%q2, [%q0, %1]!",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrdq_scatter_base_wb_<supf>v2di"))
-  (set_attr "length" "4")])
-
-;;
-;; [vstrdq_scatter_base_wb_p_s vstrdq_scatter_base_wb_p_u]
-;;
-(define_insn "mve_vstrdq_scatter_base_wb_p_<supf>v2di"
-  [(set (mem:BLK (scratch))
-	(unspec:BLK
-		[(match_operand:V2DI 1 "s_register_operand" "0")
-		 (match_operand:SI 2 "mve_vldrd_immediate" "Ri")
-		 (match_operand:V2DI 3 "s_register_operand" "w")
-		 (match_operand:V2QI 4 "vpr_register_operand" "Up")]
-	 VSTRDSBWBQ))
-   (set (match_operand:V2DI 0 "s_register_operand" "=w")
-	(unspec:V2DI [(match_dup 1) (match_dup 2)]
-	 VSTRDSBWBQ))
-  ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[3];
-   ops[0] = operands[1];
-   ops[1] = operands[2];
-   ops[2] = operands[3];
-   output_asm_insn ("vpst\;vstrdt.u64\t%q2, [%q0, %1]!",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrdq_scatter_base_wb_<supf>v2di"))
+  "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
+   || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))"
+  "vpst\;\tvstr<MVE_elem_ch>t.u<V_sz_elem>\t%q3, [%q1, %2]!"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vstrq_scatter_base_wb_<mode>"))
   (set_attr "length" "8")])
 
-(define_expand "mve_vldrwq_gather_base_wb_<supf>v4si"
-  [(match_operand:V4SI 0 "s_register_operand")
-   (match_operand:V4SI 1 "s_register_operand")
-   (match_operand:SI 2 "mve_vldrd_immediate")
-   (unspec:V4SI [(const_int 0)] VLDRWGBWBQ)]
-  "TARGET_HAVE_MVE"
-{
-  rtx ignore_result = gen_reg_rtx (V4SImode);
-  emit_insn (
-  gen_mve_vldrwq_gather_base_wb_<supf>v4si_insn (ignore_result, operands[0],
-						 operands[1], operands[2]));
-  DONE;
-})
-
-(define_expand "mve_vldrwq_gather_base_nowb_<supf>v4si"
-  [(match_operand:V4SI 0 "s_register_operand")
-   (match_operand:V4SI 1 "s_register_operand")
-   (match_operand:SI 2 "mve_vldrd_immediate")
-   (unspec:V4SI [(const_int 0)] VLDRWGBWBQ)]
-  "TARGET_HAVE_MVE"
-{
-  rtx ignore_wb = gen_reg_rtx (V4SImode);
-  emit_insn (
-  gen_mve_vldrwq_gather_base_wb_<supf>v4si_insn (operands[0], ignore_wb,
-						 operands[1], operands[2]));
-  DONE;
-})
-
+;; Vector gather loads with base and write-back
 ;;
 ;; [vldrwq_gather_base_wb_s vldrwq_gather_base_wb_u]
-;;
-(define_insn "mve_vldrwq_gather_base_wb_<supf>v4si_insn"
-  [(set (match_operand:V4SI 0 "s_register_operand" "=&w")
-	(unspec:V4SI [(match_operand:V4SI 2 "s_register_operand" "1")
-		      (match_operand:SI 3 "mve_vldrd_immediate" "Ri")
-		      (mem:BLK (scratch))]
-	 VLDRWGBWBQ))
-   (set (match_operand:V4SI 1 "s_register_operand" "=&w")
-	(unspec:V4SI [(match_dup 2) (match_dup 3)]
-	 VLDRWGBWBQ))
-  ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[3];
-   ops[0] = operands[0];
-   ops[1] = operands[2];
-   ops[2] = operands[3];
-   output_asm_insn ("vldrw.u32\t%q0, [%q1, %2]!",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_base_wb_<supf>v4si_insn"))
-  (set_attr "length" "4")])
-
-(define_expand "mve_vldrwq_gather_base_wb_z_<supf>v4si"
-  [(match_operand:V4SI 0 "s_register_operand")
-   (match_operand:V4SI 1 "s_register_operand")
-   (match_operand:SI 2 "mve_vldrd_immediate")
-   (match_operand:V4BI 3 "vpr_register_operand")
-   (unspec:V4SI [(const_int 0)] VLDRWGBWBQ)]
-  "TARGET_HAVE_MVE"
-{
-  rtx ignore_result = gen_reg_rtx (V4SImode);
-  emit_insn (
-  gen_mve_vldrwq_gather_base_wb_z_<supf>v4si_insn (ignore_result, operands[0],
-						   operands[1], operands[2],
-						   operands[3]));
-  DONE;
-})
-(define_expand "mve_vldrwq_gather_base_nowb_z_<supf>v4si"
-  [(match_operand:V4SI 0 "s_register_operand")
-   (match_operand:V4SI 1 "s_register_operand")
-   (match_operand:SI 2 "mve_vldrd_immediate")
-   (match_operand:V4BI 3 "vpr_register_operand")
-   (unspec:V4SI [(const_int 0)] VLDRWGBWBQ)]
-  "TARGET_HAVE_MVE"
-{
-  rtx ignore_wb = gen_reg_rtx (V4SImode);
-  emit_insn (
-  gen_mve_vldrwq_gather_base_wb_z_<supf>v4si_insn (operands[0], ignore_wb,
-						   operands[1], operands[2],
-						   operands[3]));
-  DONE;
-})
-
-;;
-;; [vldrwq_gather_base_wb_z_s vldrwq_gather_base_wb_z_u]
-;;
-(define_insn "mve_vldrwq_gather_base_wb_z_<supf>v4si_insn"
-  [(set (match_operand:V4SI 0 "s_register_operand" "=&w")
-	(unspec:V4SI [(match_operand:V4SI 2 "s_register_operand" "1")
-		      (match_operand:SI 3 "mve_vldrd_immediate" "Ri")
-		      (match_operand:V4BI 4 "vpr_register_operand" "Up")
-		      (mem:BLK (scratch))]
-	 VLDRWGBWBQ))
-   (set (match_operand:V4SI 1 "s_register_operand" "=&w")
-	(unspec:V4SI [(match_dup 2) (match_dup 3)]
-	 VLDRWGBWBQ))
-  ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[3];
-   ops[0] = operands[0];
-   ops[1] = operands[2];
-   ops[2] = operands[3];
-   output_asm_insn ("vpst\;vldrwt.u32\t%q0, [%q1, %2]!",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_base_wb_<supf>v4si_insn"))
-  (set_attr "length" "8")])
-
-(define_expand "mve_vldrwq_gather_base_wb_fv4sf"
-  [(match_operand:V4SI 0 "s_register_operand")
-   (match_operand:V4SI 1 "s_register_operand")
-   (match_operand:SI 2 "mve_vldrd_immediate")
-   (unspec:V4SI [(const_int 0)] VLDRWQGBWB_F)]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-  rtx ignore_result = gen_reg_rtx (V4SFmode);
-  emit_insn (
-  gen_mve_vldrwq_gather_base_wb_fv4sf_insn (ignore_result, operands[0],
-					    operands[1], operands[2]));
-  DONE;
-})
-
-(define_expand "mve_vldrwq_gather_base_nowb_fv4sf"
-  [(match_operand:V4SF 0 "s_register_operand")
-   (match_operand:V4SI 1 "s_register_operand")
-   (match_operand:SI 2 "mve_vldrd_immediate")
-   (unspec:V4SI [(const_int 0)] VLDRWQGBWB_F)]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-  rtx ignore_wb = gen_reg_rtx (V4SImode);
-  emit_insn (
-  gen_mve_vldrwq_gather_base_wb_fv4sf_insn (operands[0], ignore_wb,
-					    operands[1], operands[2]));
-  DONE;
-})
-
-;;
 ;; [vldrwq_gather_base_wb_f]
+;; [vldrdq_gather_base_wb_s vldrdq_gather_base_wb_u]
 ;;
-(define_insn "mve_vldrwq_gather_base_wb_fv4sf_insn"
-  [(set (match_operand:V4SF 0 "s_register_operand" "=&w")
-	(unspec:V4SF [(match_operand:V4SI 2 "s_register_operand" "1")
-		      (match_operand:SI 3 "mve_vldrd_immediate" "Ri")
-		      (mem:BLK (scratch))]
-	 VLDRWQGBWB_F))
-   (set (match_operand:V4SI 1 "s_register_operand" "=&w")
-	(unspec:V4SI [(match_dup 2) (match_dup 3)]
-	 VLDRWQGBWB_F))
+(define_insn "@mve_vldrq_gather_base_wb_<mode>"
+  [(set (match_operand:MVE_4 0 "s_register_operand" "=&w")
+	(unspec:MVE_4 [(match_operand:<MVE_scatter_offset> 2 "s_register_operand" "1")
+		       (match_operand:SI 3 "mve_vldrd_immediate" "Ri")
+		       (mem:BLK (scratch))]
+	 VLDRGBWBQ))
+   (set (match_operand:<MVE_scatter_offset> 1 "s_register_operand" "=&w")
+	(unspec:<MVE_scatter_offset> [(match_dup 2) (match_dup 3)]
+	 VLDRGBWBQ))
   ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-   rtx ops[3];
-   ops[0] = operands[0];
-   ops[1] = operands[2];
-   ops[2] = operands[3];
-   output_asm_insn ("vldrw.u32\t%q0, [%q1, %2]!",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_base_wb_fv4sf_insn"))
+  "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
+   || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))"
+  "vldr<MVE_elem_ch>.u<V_sz_elem>\t%q0, [%q1, %3]!"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrq_gather_base_wb_<mode>"))
   (set_attr "length" "4")])
 
-(define_expand "mve_vldrwq_gather_base_wb_z_fv4sf"
-  [(match_operand:V4SI 0 "s_register_operand")
-   (match_operand:V4SI 1 "s_register_operand")
-   (match_operand:SI 2 "mve_vldrd_immediate")
-   (match_operand:V4BI 3 "vpr_register_operand")
-   (unspec:V4SI [(const_int 0)] VLDRWQGBWB_F)]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-  rtx ignore_result = gen_reg_rtx (V4SFmode);
-  emit_insn (
-  gen_mve_vldrwq_gather_base_wb_z_fv4sf_insn (ignore_result, operands[0],
-					      operands[1], operands[2],
-					      operands[3]));
-  DONE;
-})
-
-(define_expand "mve_vldrwq_gather_base_nowb_z_fv4sf"
-  [(match_operand:V4SF 0 "s_register_operand")
-   (match_operand:V4SI 1 "s_register_operand")
-   (match_operand:SI 2 "mve_vldrd_immediate")
-   (match_operand:V4BI 3 "vpr_register_operand")
-   (unspec:V4SI [(const_int 0)] VLDRWQGBWB_F)]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-  rtx ignore_wb = gen_reg_rtx (V4SImode);
-  emit_insn (
-  gen_mve_vldrwq_gather_base_wb_z_fv4sf_insn (operands[0], ignore_wb,
-					      operands[1], operands[2],
-					      operands[3]));
-  DONE;
-})
-
+;; Predicated vector gather loads with base and write-back
 ;;
+;; [vldrwq_gather_base_wb_z_s vldrwq_gather_base_wb_z_u]
 ;; [vldrwq_gather_base_wb_z_f]
+;; [vldrdq_gather_base_wb_z_s vldrdq_gather_base_wb_z_u]
 ;;
-(define_insn "mve_vldrwq_gather_base_wb_z_fv4sf_insn"
-  [(set (match_operand:V4SF 0 "s_register_operand" "=&w")
-	(unspec:V4SF [(match_operand:V4SI 2 "s_register_operand" "1")
-		      (match_operand:SI 3 "mve_vldrd_immediate" "Ri")
-		      (match_operand:V4BI 4 "vpr_register_operand" "Up")
-		      (mem:BLK (scratch))]
-	 VLDRWQGBWB_F))
-   (set (match_operand:V4SI 1 "s_register_operand" "=&w")
-	(unspec:V4SI [(match_dup 2) (match_dup 3)]
-	 VLDRWQGBWB_F))
+(define_insn "@mve_vldrq_gather_base_wb_z_<mode>"
+  [(set (match_operand:MVE_4 0 "s_register_operand" "=&w")
+	(unspec:MVE_4 [(match_operand:<MVE_scatter_offset> 2 "s_register_operand" "1")
+		       (match_operand:SI 3 "mve_vldrd_immediate" "Ri")
+		       (match_operand:<MVE_VPRED> 4 "vpr_register_operand" "Up")
+		       (mem:BLK (scratch))]
+	 VLDRGBWBQ_Z))
+   (set (match_operand:<MVE_scatter_offset> 1 "s_register_operand" "=&w")
+	(unspec:<MVE_scatter_offset> [(match_dup 2) (match_dup 3)]
+	 VLDRGBWBQ_Z))
   ]
-  "TARGET_HAVE_MVE && TARGET_HAVE_MVE_FLOAT"
-{
-   rtx ops[3];
-   ops[0] = operands[0];
-   ops[1] = operands[2];
-   ops[2] = operands[3];
-   output_asm_insn ("vpst\;vldrwt.u32\t%q0, [%q1, %2]!",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrwq_gather_base_wb_fv4sf_insn"))
+  "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
+   || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))"
+  "vpst\;vldr<MVE_elem_ch>t.u<V_sz_elem>\t%q0, [%q1, %3]!"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrq_gather_base_wb_<mode>"))
   (set_attr "length" "8")])
 
-(define_expand "mve_vldrdq_gather_base_wb_<supf>v2di"
-  [(match_operand:V2DI 0 "s_register_operand")
-   (match_operand:V2DI 1 "s_register_operand")
-   (match_operand:SI 2 "mve_vldrd_immediate")
-   (unspec:V2DI [(const_int 0)] VLDRDGBWBQ)]
-  "TARGET_HAVE_MVE"
-{
-  rtx ignore_result = gen_reg_rtx (V2DImode);
-  emit_insn (
-  gen_mve_vldrdq_gather_base_wb_<supf>v2di_insn (ignore_result, operands[0],
-						 operands[1], operands[2]));
-  DONE;
-})
-
-(define_expand "mve_vldrdq_gather_base_nowb_<supf>v2di"
-  [(match_operand:V2DI 0 "s_register_operand")
-   (match_operand:V2DI 1 "s_register_operand")
-   (match_operand:SI 2 "mve_vldrd_immediate")
-   (unspec:V2DI [(const_int 0)] VLDRDGBWBQ)]
-  "TARGET_HAVE_MVE"
-{
-  rtx ignore_wb = gen_reg_rtx (V2DImode);
-  emit_insn (
-  gen_mve_vldrdq_gather_base_wb_<supf>v2di_insn (operands[0], ignore_wb,
-						 operands[1], operands[2]));
-  DONE;
-})
-
-
-;;
-;; [vldrdq_gather_base_wb_s vldrdq_gather_base_wb_u]
-;;
-(define_insn "mve_vldrdq_gather_base_wb_<supf>v2di_insn"
-  [(set (match_operand:V2DI 0 "s_register_operand" "=&w")
-	(unspec:V2DI [(match_operand:V2DI 2 "s_register_operand" "1")
-		      (match_operand:SI 3 "mve_vldrd_immediate" "Ri")
-		      (mem:BLK (scratch))]
-	 VLDRDGBWBQ))
-   (set (match_operand:V2DI 1 "s_register_operand" "=&w")
-	(unspec:V2DI [(match_dup 2) (match_dup 3)]
-	 VLDRDGBWBQ))
-  ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[3];
-   ops[0] = operands[0];
-   ops[1] = operands[2];
-   ops[2] = operands[3];
-   output_asm_insn ("vldrd.64\t%q0, [%q1, %2]!",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrdq_gather_base_wb_<supf>v2di_insn"))
-  (set_attr "length" "4")])
-
-(define_expand "mve_vldrdq_gather_base_wb_z_<supf>v2di"
-  [(match_operand:V2DI 0 "s_register_operand")
-   (match_operand:V2DI 1 "s_register_operand")
-   (match_operand:SI 2 "mve_vldrd_immediate")
-   (match_operand:V2QI 3 "vpr_register_operand")
-   (unspec:V2DI [(const_int 0)] VLDRDGBWBQ)]
-  "TARGET_HAVE_MVE"
-{
-  rtx ignore_result = gen_reg_rtx (V2DImode);
-  emit_insn (
-  gen_mve_vldrdq_gather_base_wb_z_<supf>v2di_insn (ignore_result, operands[0],
-						   operands[1], operands[2],
-						   operands[3]));
-  DONE;
-})
-
-(define_expand "mve_vldrdq_gather_base_nowb_z_<supf>v2di"
-  [(match_operand:V2DI 0 "s_register_operand")
-   (match_operand:V2DI 1 "s_register_operand")
-   (match_operand:SI 2 "mve_vldrd_immediate")
-   (match_operand:V2QI 3 "vpr_register_operand")
-   (unspec:V2DI [(const_int 0)] VLDRDGBWBQ)]
-  "TARGET_HAVE_MVE"
-{
-  rtx ignore_wb = gen_reg_rtx (V2DImode);
-  emit_insn (
-  gen_mve_vldrdq_gather_base_wb_z_<supf>v2di_insn (operands[0], ignore_wb,
-						   operands[1], operands[2],
-						   operands[3]));
-  DONE;
-})
-
 (define_insn "get_fpscr_nzcvqc"
  [(set (match_operand:SI 0 "register_operand" "=r")
    (unspec_volatile:SI [(reg:SI VFPCC_REGNUM)] UNSPEC_GET_FPSCR_NZCVQC))]
@@ -6126,194 +3979,97 @@
  [(set_attr "type" "mve_move")])
 
 ;;
-;; [vldrdq_gather_base_wb_z_s vldrdq_gather_base_wb_z_u]
+;; [vadciq_u, vadciq_s]
+;; [vsbciq_s, vsbciq_u]
 ;;
-(define_insn "mve_vldrdq_gather_base_wb_z_<supf>v2di_insn"
-  [(set (match_operand:V2DI 0 "s_register_operand" "=&w")
-	(unspec:V2DI [(match_operand:V2DI 2 "s_register_operand" "1")
-		      (match_operand:SI 3 "mve_vldrd_immediate" "Ri")
-		      (match_operand:V2QI 4 "vpr_register_operand" "Up")
-		      (mem:BLK (scratch))]
-	 VLDRDGBWBQ))
-   (set (match_operand:V2DI 1 "s_register_operand" "=&w")
-	(unspec:V2DI [(match_dup 2) (match_dup 3)]
-	 VLDRDGBWBQ))
-  ]
-  "TARGET_HAVE_MVE"
-{
-   rtx ops[3];
-   ops[0] = operands[0];
-   ops[1] = operands[2];
-   ops[2] = operands[3];
-   output_asm_insn ("vpst\;vldrdt.u64\t%q0, [%q1, %2]!",ops);
-   return "";
-}
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vldrdq_gather_base_wb_<supf>v2di_insn"))
-  (set_attr "length" "8")])
-;;
-;; [vadciq_m_s, vadciq_m_u])
-;;
-(define_insn "mve_vadciq_m_<supf>v4si"
-  [(set (match_operand:V4SI 0 "s_register_operand" "=w")
-	(unspec:V4SI [(match_operand:V4SI 1 "s_register_operand" "0")
-		      (match_operand:V4SI 2 "s_register_operand" "w")
-		      (match_operand:V4SI 3 "s_register_operand" "w")
-		      (match_operand:V4BI 4 "vpr_register_operand" "Up")]
-	 VADCIQ_M))
-   (set (reg:SI VFPCC_REGNUM)
-	(unspec:SI [(const_int 0)]
-	 VADCIQ_M))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vadcit.i32\t%q0, %q2, %q3"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vadciq_<supf>v4si"))
-  (set_attr "type" "mve_move")
-   (set_attr "length" "8")])
-
-;;
-;; [vadciq_u, vadciq_s])
-;;
-(define_insn "mve_vadciq_<supf>v4si"
+(define_insn "@mve_<mve_insn>q_<supf>v4si"
   [(set (match_operand:V4SI 0 "s_register_operand" "=w")
 	(unspec:V4SI [(match_operand:V4SI 1 "s_register_operand" "w")
 		      (match_operand:V4SI 2 "s_register_operand" "w")]
-	 VADCIQ))
+	 VxCIQ))
    (set (reg:SI VFPCC_REGNUM)
 	(unspec:SI [(const_int 0)]
-	 VADCIQ))
+	 VxCIQ))
   ]
   "TARGET_HAVE_MVE"
-  "vadci.i32\t%q0, %q1, %q2"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vadciq_<supf>v4si"))
+  "<mve_insn>.i32\t%q0, %q1, %q2"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf>v4si"))
   (set_attr "type" "mve_move")
    (set_attr "length" "4")])
 
 ;;
-;; [vadcq_m_s, vadcq_m_u])
+;; [vadciq_m_s, vadciq_m_u]
+;; [vsbciq_m_u, vsbciq_m_s]
 ;;
-(define_insn "mve_vadcq_m_<supf>v4si"
+(define_insn "@mve_<mve_insn>q_m_<supf>v4si"
   [(set (match_operand:V4SI 0 "s_register_operand" "=w")
 	(unspec:V4SI [(match_operand:V4SI 1 "s_register_operand" "0")
 		      (match_operand:V4SI 2 "s_register_operand" "w")
 		      (match_operand:V4SI 3 "s_register_operand" "w")
 		      (match_operand:V4BI 4 "vpr_register_operand" "Up")]
-	 VADCQ_M))
+	 VxCIQ_M))
    (set (reg:SI VFPCC_REGNUM)
-	(unspec:SI [(reg:SI VFPCC_REGNUM)]
-	 VADCQ_M))
+	(unspec:SI [(const_int 0)]
+	 VxCIQ_M))
   ]
   "TARGET_HAVE_MVE"
-  "vpst\;vadct.i32\t%q0, %q2, %q3"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vadcq_<supf>v4si"))
+  "vpst\;<mve_insn>t.i32\t%q0, %q2, %q3"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf>v4si"))
   (set_attr "type" "mve_move")
    (set_attr "length" "8")])
 
 ;;
-;; [vadcq_u, vadcq_s])
+;; [vadcq_u, vadcq_s]
+;; [vsbcq_s, vsbcq_u]
 ;;
-(define_insn "mve_vadcq_<supf>v4si"
+(define_insn "@mve_<mve_insn>q_<supf>v4si"
   [(set (match_operand:V4SI 0 "s_register_operand" "=w")
 	(unspec:V4SI [(match_operand:V4SI 1 "s_register_operand" "w")
 		       (match_operand:V4SI 2 "s_register_operand" "w")]
-	 VADCQ))
+	 VxCQ))
    (set (reg:SI VFPCC_REGNUM)
 	(unspec:SI [(reg:SI VFPCC_REGNUM)]
-	 VADCQ))
+	 VxCQ))
   ]
   "TARGET_HAVE_MVE"
-  "vadc.i32\t%q0, %q1, %q2"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vadcq_<supf>v4si"))
+  "<mve_insn>.i32\t%q0, %q1, %q2"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf>v4si"))
   (set_attr "type" "mve_move")
    (set_attr "length" "4")
    (set_attr "conds" "set")])
 
 ;;
-;; [vsbciq_m_u, vsbciq_m_s])
+;; [vadcq_m_s, vadcq_m_u]
+;; [vsbcq_m_u, vsbcq_m_s]
 ;;
-(define_insn "mve_vsbciq_m_<supf>v4si"
+(define_insn "@mve_<mve_insn>q_m_<supf>v4si"
   [(set (match_operand:V4SI 0 "s_register_operand" "=w")
-	(unspec:V4SI [(match_operand:V4SI 1 "s_register_operand" "w")
-		      (match_operand:V4SI 2 "s_register_operand" "w")
-		      (match_operand:V4SI 3 "s_register_operand" "w")
-		      (match_operand:V4BI 4 "vpr_register_operand" "Up")]
-	 VSBCIQ_M))
-   (set (reg:SI VFPCC_REGNUM)
-	(unspec:SI [(const_int 0)]
-	 VSBCIQ_M))
-  ]
-  "TARGET_HAVE_MVE"
-  "vpst\;vsbcit.i32\t%q0, %q2, %q3"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vsbciq_<supf>v4si"))
-  (set_attr "type" "mve_move")
-   (set_attr "length" "8")])
-
-;;
-;; [vsbciq_s, vsbciq_u])
-;;
-(define_insn "mve_vsbciq_<supf>v4si"
-  [(set (match_operand:V4SI 0 "s_register_operand" "=w")
-	(unspec:V4SI [(match_operand:V4SI 1 "s_register_operand" "w")
-		      (match_operand:V4SI 2 "s_register_operand" "w")]
-	 VSBCIQ))
-   (set (reg:SI VFPCC_REGNUM)
-	(unspec:SI [(const_int 0)]
-	 VSBCIQ))
-  ]
-  "TARGET_HAVE_MVE"
-  "vsbci.i32\t%q0, %q1, %q2"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vsbciq_<supf>v4si"))
-  (set_attr "type" "mve_move")
-   (set_attr "length" "4")])
-
-;;
-;; [vsbcq_m_u, vsbcq_m_s])
-;;
-(define_insn "mve_vsbcq_m_<supf>v4si"
-  [(set (match_operand:V4SI 0 "s_register_operand" "=w")
-	(unspec:V4SI [(match_operand:V4SI 1 "s_register_operand" "w")
+	(unspec:V4SI [(match_operand:V4SI 1 "s_register_operand" "0")
 		      (match_operand:V4SI 2 "s_register_operand" "w")
 		      (match_operand:V4SI 3 "s_register_operand" "w")
 		      (match_operand:V4BI 4 "vpr_register_operand" "Up")]
-	 VSBCQ_M))
+	 VxCQ_M))
    (set (reg:SI VFPCC_REGNUM)
 	(unspec:SI [(reg:SI VFPCC_REGNUM)]
-	 VSBCQ_M))
+	 VxCQ_M))
   ]
   "TARGET_HAVE_MVE"
-  "vpst\;vsbct.i32\t%q0, %q2, %q3"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vsbcq_<supf>v4si"))
+  "vpst\;<mve_insn>t.i32\t%q0, %q2, %q3"
+ [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_<mve_insn>q_<supf>v4si"))
   (set_attr "type" "mve_move")
    (set_attr "length" "8")])
 
 ;;
-;; [vsbcq_s, vsbcq_u])
-;;
-(define_insn "mve_vsbcq_<supf>v4si"
-  [(set (match_operand:V4SI 0 "s_register_operand" "=w")
-	(unspec:V4SI [(match_operand:V4SI 1 "s_register_operand" "w")
-		      (match_operand:V4SI 2 "s_register_operand" "w")]
-	 VSBCQ))
-   (set (reg:SI VFPCC_REGNUM)
-	(unspec:SI [(reg:SI VFPCC_REGNUM)]
-	 VSBCQ))
-  ]
-  "TARGET_HAVE_MVE"
-  "vsbc.i32\t%q0, %q1, %q2"
- [(set (attr "mve_unpredicated_insn") (symbol_ref "CODE_FOR_mve_vsbcq_<supf>v4si"))
-  (set_attr "type" "mve_move")
-   (set_attr "length" "4")])
-
-;;
 ;; [vst2q])
 ;;
-(define_insn "mve_vst2q<mode>"
-  [(set (match_operand:OI 0 "mve_struct_operand" "=Ug")
-	(unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
-		    (unspec:MVE_VLD_ST [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+(define_insn "@mve_vst2q<mode>"
+  [(set (match_operand:<MVE_VLD2_VST2> 0 "mve_struct_operand" "=Ug")
+	(unspec:<MVE_VLD2_VST2>
+		[(match_operand:<MVE_VLD2_VST2> 1 "s_register_operand" "w")
+		 (unspec:MVE_VLD_ST [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
 	 VST2Q))
   ]
-  "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
-   || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))"
+  "(TARGET_HAVE_MVE && VALID_MVE_STRUCT_MODE (<MVE_VLD2_VST2>mode))"
 {
    rtx ops[4];
    int regno = REGNO (operands[1]);
@@ -6334,14 +4090,14 @@
 ;;
 ;; [vld2q])
 ;;
-(define_insn "mve_vld2q<mode>"
-  [(set (match_operand:OI 0 "s_register_operand" "=w")
-	(unspec:OI [(match_operand:OI 1 "mve_struct_operand" "Ug")
-		    (unspec:MVE_VLD_ST [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+(define_insn "@mve_vld2q<mode>"
+  [(set (match_operand:<MVE_VLD2_VST2> 0 "s_register_operand" "=w")
+	(unspec:<MVE_VLD2_VST2>
+		[(match_operand:<MVE_VLD2_VST2> 1 "mve_struct_operand" "Ug")
+		 (unspec:MVE_VLD_ST [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
 	 VLD2Q))
   ]
-  "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
-   || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))"
+  "(TARGET_HAVE_MVE && VALID_MVE_STRUCT_MODE (<MVE_VLD2_VST2>mode))"
 {
    rtx ops[4];
    int regno = REGNO (operands[0]);
@@ -6362,14 +4118,14 @@
 ;;
 ;; [vld4q])
 ;;
-(define_insn "mve_vld4q<mode>"
-  [(set (match_operand:XI 0 "s_register_operand" "=w")
-	(unspec:XI [(match_operand:XI 1 "mve_struct_operand" "Ug")
-		    (unspec:MVE_VLD_ST [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+(define_insn "@mve_vld4q<mode>"
+  [(set (match_operand:<MVE_VLD4_VST4> 0 "s_register_operand" "=w")
+	(unspec:<MVE_VLD4_VST4>
+		[(match_operand:<MVE_VLD4_VST4> 1 "mve_struct_operand" "Ug")
+		 (unspec:MVE_VLD_ST [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
 	 VLD4Q))
   ]
-  "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
-   || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))"
+  "(TARGET_HAVE_MVE && VALID_MVE_STRUCT_MODE (<MVE_VLD4_VST4>mode))"
 {
    rtx ops[6];
    int regno = REGNO (operands[0]);
@@ -6435,8 +4191,7 @@
     (vec_select:<V_elem>
      (match_operand:MVE_2 1 "s_register_operand" "w")
      (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
-  "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
-   || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))"
+  "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))"
 {
   if (BYTES_BIG_ENDIAN)
     {
@@ -6454,8 +4209,7 @@
     (vec_select:<V_elem>
      (match_operand:MVE_2 1 "s_register_operand" "w")
      (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
-  "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
-   || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))"
+  "(TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))"
 {
   if (BYTES_BIG_ENDIAN)
     {
@@ -6652,39 +4406,7 @@
 ;;
 ;; [vshlcq_m_u vshlcq_m_s]
 ;;
-(define_expand "mve_vshlcq_m_vec_<supf><mode>"
- [(match_operand:MVE_2 0 "s_register_operand")
-  (match_operand:MVE_2 1 "s_register_operand")
-  (match_operand:SI 2 "s_register_operand")
-  (match_operand:SI 3 "mve_imm_32")
-  (match_operand:<MVE_VPRED> 4 "vpr_register_operand")
-  (unspec:MVE_2 [(const_int 0)] VSHLCQ_M)]
- "TARGET_HAVE_MVE"
-{
-  rtx ignore_wb = gen_reg_rtx (SImode);
-  emit_insn (gen_mve_vshlcq_m_<supf><mode> (operands[0], ignore_wb, operands[1],
-					    operands[2], operands[3],
-					    operands[4]));
-  DONE;
-})
-
-(define_expand "mve_vshlcq_m_carry_<supf><mode>"
- [(match_operand:SI 0 "s_register_operand")
-  (match_operand:MVE_2 1 "s_register_operand")
-  (match_operand:SI 2 "s_register_operand")
-  (match_operand:SI 3 "mve_imm_32")
-  (match_operand:<MVE_VPRED> 4 "vpr_register_operand")
-  (unspec:MVE_2 [(const_int 0)] VSHLCQ_M)]
- "TARGET_HAVE_MVE"
-{
-  rtx ignore_vec = gen_reg_rtx (<MODE>mode);
-  emit_insn (gen_mve_vshlcq_m_<supf><mode> (ignore_vec, operands[0],
-					    operands[1], operands[2],
-					    operands[3], operands[4]));
-  DONE;
-})
-
-(define_insn "mve_vshlcq_m_<supf><mode>"
+(define_insn "@mve_vshlcq_m_<supf><mode>"
  [(set (match_operand:MVE_2 0 "s_register_operand" "=w")
        (unspec:MVE_2 [(match_operand:MVE_2 2 "s_register_operand" "0")
 		      (match_operand:SI 3 "s_register_operand" "1")
@@ -6865,7 +4587,7 @@
 ;; Expanders for vec_cmp and vcond
 
 (define_expand "vec_cmp<mode><MVE_vpred>"
-  [(set (match_operand:<MVE_VPRED> 0 "s_register_operand")
+  [(set (match_operand:<MVE_VPRED> 0 "vpr_register_operand")
 	(match_operator:<MVE_VPRED> 1 "comparison_operator"
 	  [(match_operand:MVE_VLD_ST 2 "s_register_operand")
 	   (match_operand:MVE_VLD_ST 3 "reg_or_zero_operand")]))]
@@ -6878,7 +4600,7 @@
 })
 
 (define_expand "vec_cmpu<mode><MVE_vpred>"
-  [(set (match_operand:<MVE_VPRED> 0 "s_register_operand")
+  [(set (match_operand:<MVE_VPRED> 0 "vpr_register_operand")
 	(match_operator:<MVE_VPRED> 1 "comparison_operator"
 	  [(match_operand:MVE_2 2 "s_register_operand")
 	   (match_operand:MVE_2 3 "reg_or_zero_operand")]))]
@@ -6892,7 +4614,7 @@
 (define_expand "vcond_mask_<mode><MVE_vpred>"
   [(set (match_operand:MVE_VLD_ST 0 "s_register_operand")
 	(if_then_else:MVE_VLD_ST
-	  (match_operand:<MVE_VPRED> 3 "s_register_operand")
+	  (match_operand:<MVE_VPRED> 3 "vpr_register_operand")
 	  (match_operand:MVE_VLD_ST 1 "s_register_operand")
 	  (match_operand:MVE_VLD_ST 2 "s_register_operand")))]
   "TARGET_HAVE_MVE"
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index fa4a7ae..8446dd7 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -1,5 +1,5 @@
 ;; ARM NEON coprocessor Machine Description
-;; Copyright (C) 2006-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2006-2025 Free Software Foundation, Inc.
 ;; Written by CodeSourcery.
 ;;
 ;; This file is part of GCC.
@@ -215,8 +215,8 @@
 })
 
 (define_split
-  [(set (match_operand:OI 0 "s_register_operand" "")
-	(match_operand:OI 1 "s_register_operand" ""))]
+  [(set (match_operand:VSTRUCT2 0 "s_register_operand" "")
+	(match_operand:VSTRUCT2 1 "s_register_operand" ""))]
   "(TARGET_NEON || TARGET_HAVE_MVE)&& reload_completed"
   [(set (match_dup 0) (match_dup 1))
    (set (match_dup 2) (match_dup 3))]
@@ -256,8 +256,8 @@
 })
 
 (define_split
-  [(set (match_operand:XI 0 "s_register_operand" "")
-	(match_operand:XI 1 "s_register_operand" ""))]
+  [(set (match_operand:VSTRUCT4 0 "s_register_operand" "")
+	(match_operand:VSTRUCT4 1 "s_register_operand" ""))]
   "(TARGET_NEON || TARGET_HAVE_MVE) && reload_completed"
   [(set (match_dup 0) (match_dup 1))
    (set (match_dup 2) (match_dup 3))
@@ -2738,17 +2738,6 @@
   [(set_attr "type" "neon_fp_minmax_s<q>")]
 )
 
-;; Vector forms for the IEEE-754 fmax()/fmin() functions
-(define_insn "<fmaxmin><mode>3"
-  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
-	(unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
-		       (match_operand:VCVTF 2 "s_register_operand" "w")]
-		       VMAXMINFNM))]
-  "TARGET_NEON && TARGET_VFP5"
-  "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
-  [(set_attr "type" "neon_fp_minmax_s<q>")]
-)
-
 (define_expand "neon_vpadd<mode>"
   [(match_operand:VD 0 "s_register_operand")
    (match_operand:VD 1 "s_register_operand")
@@ -2989,7 +2978,7 @@
 ;; ...
 ;;
 ;; and so the vectorizer provides r, in which the result has to be accumulated.
-(define_insn "<sup>dot_prod<vsi2qi>"
+(define_insn "<sup>dot_prod<mode><vsi2qi>"
   [(set (match_operand:VCVTI 0 "register_operand" "=w")
 	(plus:VCVTI
 	  (unspec:VCVTI [(match_operand:<VSI2QI> 1 "register_operand" "w")
@@ -3002,7 +2991,7 @@
 )
 
 ;; These instructions map to the __builtins for the Dot Product operations
-(define_expand "neon_<sup>dot<vsi2qi>"
+(define_expand "neon_<sup>dot<mode><vsi2qi>"
   [(set (match_operand:VCVTI 0 "register_operand" "=w")
 	(plus:VCVTI
 	  (unspec:VCVTI [(match_operand:<VSI2QI> 2 "register_operand")
@@ -3013,7 +3002,7 @@
 )
 
 ;; These instructions map to the __builtins for the Dot Product operations.
-(define_insn "neon_usdot<vsi2qi>"
+(define_insn "neon_usdot<mode><vsi2qi>"
   [(set (match_operand:VCVTI 0 "register_operand" "=w")
 	(plus:VCVTI
 	  (unspec:VCVTI
@@ -3112,7 +3101,7 @@
 )
 
 ;; Auto-vectorizer pattern for usdot
-(define_expand "usdot_prod<vsi2qi>"
+(define_expand "usdot_prod<mode><vsi2qi>"
   [(set (match_operand:VCVTI 0 "register_operand")
 	(plus:VCVTI (unspec:VCVTI [(match_operand:<VSI2QI> 1
 							"register_operand")
diff --git a/gcc/config/arm/netbsd-eabi.h b/gcc/config/arm/netbsd-eabi.h
index c8e7d5f..30a3639 100644
--- a/gcc/config/arm/netbsd-eabi.h
+++ b/gcc/config/arm/netbsd-eabi.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler, NetBSD/arm ELF version.
-   Copyright (C) 2002-2024 Free Software Foundation, Inc.
+   Copyright (C) 2002-2025 Free Software Foundation, Inc.
    Contributed by Wasabi Systems, Inc.
 
    This file is part of GCC.
diff --git a/gcc/config/arm/netbsd-elf.h b/gcc/config/arm/netbsd-elf.h
index 24ed8f7..cf87b79 100644
--- a/gcc/config/arm/netbsd-elf.h
+++ b/gcc/config/arm/netbsd-elf.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler, NetBSD/arm ELF version.
-   Copyright (C) 2002-2024 Free Software Foundation, Inc.
+   Copyright (C) 2002-2025 Free Software Foundation, Inc.
    Contributed by Wasabi Systems, Inc.
 
    This file is part of GCC.
diff --git a/gcc/config/arm/parsecpu.awk b/gcc/config/arm/parsecpu.awk
index 384462b..e5d97b7 100644
--- a/gcc/config/arm/parsecpu.awk
+++ b/gcc/config/arm/parsecpu.awk
@@ -1,5 +1,5 @@
 # Manipulate the CPU, FPU and architecture descriptions for ARM.
-# Copyright (C) 2017-2024 Free Software Foundation, Inc.
+# Copyright (C) 2017-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
@@ -62,7 +62,7 @@ function boilerplate (style) {
     print cc "Generated automatically by parsecpu.awk from arm-cpus.in."
     print cc "Do not edit."
     print ""
-    print cc "Copyright (C) 2011-2024 Free Software Foundation, Inc."
+    print cc "Copyright (C) 2011-2025 Free Software Foundation, Inc."
     print ""
     print cc "This file is part of GCC."
     print ""
diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md
index 197054b..c683ec2 100644
--- a/gcc/config/arm/predicates.md
+++ b/gcc/config/arm/predicates.md
@@ -1,5 +1,5 @@
 ;; Predicate definitions for ARM and Thumb
-;; Copyright (C) 2004-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2004-2025 Free Software Foundation, Inc.
 ;; Contributed by ARM Ltd.
 
 ;; This file is part of GCC.
@@ -99,11 +99,21 @@
 })
 
 (define_predicate "vpr_register_operand"
-  (match_code "reg")
+  (match_code "reg,subreg")
 {
-  return REG_P (op)
+  if (SUBREG_P (op))
+    {
+      /* Only allow subregs if they are strictly type punning.	*/
+      if ((GET_MODE_SIZE (GET_MODE (SUBREG_REG (op)))
+	   != GET_MODE_SIZE (GET_MODE (op)))
+	  || SUBREG_BYTE (op) != 0)
+	return false;
+      op = SUBREG_REG (op);
+    }
+
+  return (REG_P (op)
 	  && (REGNO (op) >= FIRST_PSEUDO_REGISTER
-	      || IS_VPR_REGNUM (REGNO (op)));
+	      || IS_VPR_REGNUM (REGNO (op))));
 })
 
 (define_predicate "imm_for_neon_inv_logic_operand"
@@ -796,14 +806,8 @@
 
 ;;-------------------------------------------------------------------------
 ;;
-;; iWMMXt predicates
-;;
-
-(define_predicate "imm_or_reg_operand"
-  (ior (match_operand 0 "immediate_operand")
-       (match_operand 0 "register_operand")))
-
 ;; Neon predicates
+;;
 
 (define_predicate "const_multiple_of_8_operand"
   (match_code "const_int")
@@ -849,6 +853,10 @@
   (and (match_operand 0 "memory_operand")
        (match_code "reg" "0")))
 
+;; True if the operand is memory reference suitable for a ldrd/strd.
+(define_predicate "arm_ldrd_memory_operand"
+  (match_test "arm_ldrd_legitimate_address (op)"))
+
 ;; Predicates for parallel expanders based on mode.
 (define_special_predicate "vect_par_constant_high" 
   (match_code "parallel")
@@ -893,7 +901,8 @@
 
 (define_predicate "mem_noofs_operand"
   (and (match_code "mem")
-       (match_code "reg" "0")))
+       (match_code "reg" "0")
+       (match_operand 0 "memory_operand")))
 
 (define_predicate "call_insn_operand"
   (ior (and (match_code "symbol_ref")
diff --git a/gcc/config/arm/rtems.h b/gcc/config/arm/rtems.h
index e408f45..c7be4de 100644
--- a/gcc/config/arm/rtems.h
+++ b/gcc/config/arm/rtems.h
@@ -1,5 +1,5 @@
 /* Definitions for RTEMS based ARM systems using EABI.
-   Copyright (C) 2011-2024 Free Software Foundation, Inc.
+   Copyright (C) 2011-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/arm/semi.h b/gcc/config/arm/semi.h
index 9c36369..05f64af 100644
--- a/gcc/config/arm/semi.h
+++ b/gcc/config/arm/semi.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler.  ARM on semi-hosted platform
-   Copyright (C) 1994-2024 Free Software Foundation, Inc.
+   Copyright (C) 1994-2025 Free Software Foundation, Inc.
    Contributed by Richard Earnshaw (richard.earnshaw@arm.com)
 
    This file is part of GCC.
diff --git a/gcc/config/arm/symbian.h b/gcc/config/arm/symbian.h
index 6431d61..4a0678b 100644
--- a/gcc/config/arm/symbian.h
+++ b/gcc/config/arm/symbian.h
@@ -1,6 +1,6 @@
 /* Configuration file for Symbian OS on ARM processors.
-   Copyright (C) 2004-2024 Free Software Foundation, Inc.
-   Contributed by CodeSourcery, LLC   
+   Copyright (C) 2004-2025 Free Software Foundation, Inc.
+   Contributed by CodeSourcery, LLC
 
    This file is part of GCC.
 
@@ -29,7 +29,7 @@
 
    Make all symbols hidden by default.  Symbian OS expects that all
    exported symbols will be explicitly marked with
-   "__declspec(dllexport)".  
+   "__declspec(dllexport)".
 
    Enumeration types use 4 bytes, even if the enumerals are small,
    unless explicitly overridden.
@@ -63,7 +63,7 @@
 #undef SUBTARGET_ASM_FLOAT_SPEC
 #define SUBTARGET_ASM_FLOAT_SPEC \
   "%{!mfpu=*:-mfpu=vfp} %{!mcpu=*:%{!march=*:-march=armv5t}}"
-  
+
 /* Define the __symbian__ macro.  */
 #undef TARGET_OS_CPP_BUILTINS
 #define TARGET_OS_CPP_BUILTINS()				\
diff --git a/gcc/config/arm/sync.md b/gcc/config/arm/sync.md
index 0a8347f..82416fd 100644
--- a/gcc/config/arm/sync.md
+++ b/gcc/config/arm/sync.md
@@ -1,5 +1,5 @@
 ;; Machine description for ARM processor synchronization primitives.
-;; Copyright (C) 2010-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2010-2025 Free Software Foundation, Inc.
 ;; Written by Marcus Shawcroft (marcus.shawcroft@arm.com)
 ;; 64bit Atomics by Dave Gilbert (david.gilbert@linaro.org)
 ;;
@@ -161,7 +161,7 @@
 (define_insn "arm_atomic_loaddi2_ldrd"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(unspec_volatile:DI
-	  [(match_operand:DI 1 "memory_operand" "m")]
+	  [(match_operand:DI 1 "arm_ldrd_memory_operand" "Uo")]
 	    VUNSPEC_LDRD_ATOMIC))]
   "ARM_DOUBLEWORD_ALIGN && TARGET_HAVE_LPAE"
   "ldrd\t%0, %H0, %1"
diff --git a/gcc/config/arm/t-aprofile b/gcc/config/arm/t-aprofile
index a368bf9..4daeab8 100644
--- a/gcc/config/arm/t-aprofile
+++ b/gcc/config/arm/t-aprofile
@@ -1,4 +1,4 @@
-# Copyright (C) 2012-2024 Free Software Foundation, Inc.
+# Copyright (C) 2012-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/arm/t-arm b/gcc/config/arm/t-arm
index 2755a8c..670f574 100644
--- a/gcc/config/arm/t-arm
+++ b/gcc/config/arm/t-arm
@@ -1,6 +1,6 @@
 # Rules common to all arm targets
 #
-# Copyright (C) 2004-2024 Free Software Foundation, Inc.
+# Copyright (C) 2004-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
@@ -50,11 +50,8 @@ MD_INCLUDES=	$(srcdir)/config/arm/arm1020e.md \
 		$(srcdir)/config/arm/fa726te.md \
 		$(srcdir)/config/arm/fmp626.md \
 		$(srcdir)/config/arm/iterators.md \
-		$(srcdir)/config/arm/iwmmxt.md \
-		$(srcdir)/config/arm/iwmmxt2.md \
 		$(srcdir)/config/arm/ldmstm.md \
 		$(srcdir)/config/arm/ldrdstrd.md \
-		$(srcdir)/config/arm/marvell-f-iwmmxt.md \
 		$(srcdir)/config/arm/mve.md \
 		$(srcdir)/config/arm/neon.md \
 		$(srcdir)/config/arm/predicates.md \
diff --git a/gcc/config/arm/t-arm-elf b/gcc/config/arm/t-arm-elf
index aa64e50..fdfdf9c 100644
--- a/gcc/config/arm/t-arm-elf
+++ b/gcc/config/arm/t-arm-elf
@@ -1,4 +1,4 @@
-# Copyright (C) 1998-2024 Free Software Foundation, Inc.
+# Copyright (C) 1998-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/arm/t-fuchsia b/gcc/config/arm/t-fuchsia
index 443b7c2..0b5629e 100644
--- a/gcc/config/arm/t-fuchsia
+++ b/gcc/config/arm/t-fuchsia
@@ -1,4 +1,4 @@
-# Copyright (C) 2017-2024 Free Software Foundation, Inc.
+# Copyright (C) 2017-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/arm/t-linux-eabi b/gcc/config/arm/t-linux-eabi
index f2a7843..549bbf4 100644
--- a/gcc/config/arm/t-linux-eabi
+++ b/gcc/config/arm/t-linux-eabi
@@ -1,4 +1,4 @@
-# Copyright (C) 2005-2024 Free Software Foundation, Inc.
+# Copyright (C) 2005-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/arm/t-multilib b/gcc/config/arm/t-multilib
index 3111704..ec7507c 100644
--- a/gcc/config/arm/t-multilib
+++ b/gcc/config/arm/t-multilib
@@ -1,4 +1,4 @@
-# Copyright (C) 2016-2024 Free Software Foundation, Inc.
+# Copyright (C) 2016-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/arm/t-phoenix b/gcc/config/arm/t-phoenix
index 0341de0..c679b59 100644
--- a/gcc/config/arm/t-phoenix
+++ b/gcc/config/arm/t-phoenix
@@ -1,4 +1,4 @@
-# Copyright (C) 2016-2024 Free Software Foundation, Inc.
+# Copyright (C) 2016-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/arm/t-rmprofile b/gcc/config/arm/t-rmprofile
index a2ac857..9cddaaf 100644
--- a/gcc/config/arm/t-rmprofile
+++ b/gcc/config/arm/t-rmprofile
@@ -1,4 +1,4 @@
-# Copyright (C) 2016-2024 Free Software Foundation, Inc.
+# Copyright (C) 2016-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/arm/t-rtems b/gcc/config/arm/t-rtems
index b2fcf57..797640b 100644
--- a/gcc/config/arm/t-rtems
+++ b/gcc/config/arm/t-rtems
@@ -17,8 +17,8 @@ MULTILIB_DIRNAMES	+= eb
 MULTILIB_OPTIONS	+= mthumb
 MULTILIB_DIRNAMES	+= thumb
 
-MULTILIB_OPTIONS	+= march=armv5te+fp/march=armv6-m/march=armv7-a/march=armv7-a+simd/march=armv7-r/march=armv7-r+fp/mcpu=cortex-r52/mcpu=cortex-m3/mcpu=cortex-m4/mcpu=cortex-m4+nofp/mcpu=cortex-m7
-MULTILIB_DIRNAMES	+= armv5te+fp       armv6-m       armv7-a       armv7-a+simd       armv7-r       armv7-r+fp       cortex-r52      cortex-m3      cortex-m4      cortex-m4+nofp      cortex-m7
+MULTILIB_OPTIONS	+= march=armv5te+fp/march=armv6-m/march=armv7-a/march=armv7-a+simd/march=armv7-r/march=armv7-r+fp/mcpu=cortex-r52/mcpu=cortex-m3/mcpu=cortex-m33/mcpu=cortex-m4/mcpu=cortex-m4+nofp/mcpu=cortex-m7
+MULTILIB_DIRNAMES	+= armv5te+fp       armv6-m       armv7-a       armv7-a+simd       armv7-r       armv7-r+fp       cortex-r52      cortex-m3      cortex-m33      cortex-m4      cortex-m4+nofp      cortex-m7
 
 MULTILIB_OPTIONS	+= mfloat-abi=hard
 MULTILIB_DIRNAMES	+= hard
@@ -33,6 +33,7 @@ MULTILIB_REQUIRED	+= mthumb/march=armv7-r+fp/mfloat-abi=hard
 MULTILIB_REQUIRED	+= mthumb/march=armv7-r
 MULTILIB_REQUIRED	+= mthumb/mcpu=cortex-r52/mfloat-abi=hard
 MULTILIB_REQUIRED	+= mthumb/mcpu=cortex-m3
+MULTILIB_REQUIRED	+= mthumb/mcpu=cortex-m33
 MULTILIB_REQUIRED	+= mthumb/mcpu=cortex-m4/mfloat-abi=hard
 MULTILIB_REQUIRED	+= mthumb/mcpu=cortex-m4+nofp
 MULTILIB_REQUIRED	+= mthumb/mcpu=cortex-m7/mfloat-abi=hard
diff --git a/gcc/config/arm/t-symbian b/gcc/config/arm/t-symbian
index 08b1e13..619ef87 100644
--- a/gcc/config/arm/t-symbian
+++ b/gcc/config/arm/t-symbian
@@ -1,4 +1,4 @@
-# Copyright (C) 2004-2024 Free Software Foundation, Inc.
+# Copyright (C) 2004-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/arm/t-vxworks b/gcc/config/arm/t-vxworks
index 4451f27..c1cb5bd 100644
--- a/gcc/config/arm/t-vxworks
+++ b/gcc/config/arm/t-vxworks
@@ -1,4 +1,4 @@
-# Copyright (C) 2003-2024 Free Software Foundation, Inc.
+# Copyright (C) 2003-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/arm/thumb1.md b/gcc/config/arm/thumb1.md
index b4d7c6e..f9e89e9 100644
--- a/gcc/config/arm/thumb1.md
+++ b/gcc/config/arm/thumb1.md
@@ -1,5 +1,5 @@
 ;; ARM Thumb-1 Machine Description
-;; Copyright (C) 2007-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2007-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
@@ -1146,8 +1146,8 @@
       switch (get_attr_length (insn))
 	{
 	case 4:  return "b%d0\t%l2";
-	case 6:  return "b%D0\t.LCB%=;b\t%l2\t%@long jump\n.LCB%=:";
-	case 8:  return "b%D0\t.LCB%=;bl\t%l2\t%@far jump\n.LCB%=:";
+	case 6:  return "b%D0\t.LCB%=\;b\t%l2\t%@long jump\n.LCB%=:";
+	case 8:  return "b%D0\t.LCB%=\;bl\t%l2\t%@far jump\n.LCB%=:";
 	default: gcc_unreachable ();
 	}
     }
@@ -1810,6 +1810,34 @@
    (set_attr "type" "multiple")]
 )
 
+;; Re-split an LEU/GEU sequence if combine tries to oversimplify a 3-plus
+;; insn sequence.  Beware of the early-clobber of operand0
+(define_split
+ [(set (match_operand:SI 0 "s_register_operand")
+       (leu:SI (match_operand:SI 1 "s_register_operand")
+	       (match_operand:SI 2 "s_register_operand")))]
+ "TARGET_THUMB1
+  && !reg_overlap_mentioned_p (operands[0], operands[1])
+  && !reg_overlap_mentioned_p (operands[0], operands[2])"
+ [(set (match_dup 0) (const_int 0))
+  (set (match_dup 0) (plus:SI (plus:SI (match_dup 0) (match_dup 0))
+			      (geu:SI (match_dup 2) (match_dup 1))))]
+ {}
+)
+
+(define_split
+ [(set (match_operand:SI 0 "s_register_operand")
+       (geu:SI (match_operand:SI 1 "s_register_operand")
+	       (match_operand:SI 2 "thumb1_cmp_operand")))]
+ "TARGET_THUMB1
+  && !reg_overlap_mentioned_p (operands[0], operands[1])
+  && !reg_overlap_mentioned_p (operands[0], operands[2])"
+ [(set (match_dup 0) (const_int 0))
+  (set (match_dup 0) (plus:SI (plus:SI (match_dup 0) (match_dup 0))
+			      (geu:SI (match_dup 1) (match_dup 2))))]
+ {}
+)
+
 
 (define_insn "*thumb_jump"
   [(set (pc)
diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md
index 66b3ae6..019f9d4 100644
--- a/gcc/config/arm/thumb2.md
+++ b/gcc/config/arm/thumb2.md
@@ -1,5 +1,5 @@
 ;; ARM Thumb-2 Machine Description
-;; Copyright (C) 2007-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2007-2025 Free Software Foundation, Inc.
 ;; Written by CodeSourcery, LLC.
 ;;
 ;; This file is part of GCC.
@@ -235,7 +235,7 @@
 (define_insn "*thumb2_movsi_insn"
   [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,l,r,r,lk*r,m")
 	(match_operand:SI 1 "general_operand"	   "rk,I,Py,K,j,mi,lk*r"))]
-  "TARGET_THUMB2 && !TARGET_IWMMXT && !TARGET_HARD_FLOAT
+  "TARGET_THUMB2 && !TARGET_HARD_FLOAT
    && (   register_operand (operands[0], SImode)
        || register_operand (operands[1], SImode))"
 {
diff --git a/gcc/config/arm/types.md b/gcc/config/arm/types.md
index 9527bdb..e517b91 100644
--- a/gcc/config/arm/types.md
+++ b/gcc/config/arm/types.md
@@ -1,6 +1,6 @@
 ;; Instruction Classification for ARM for GNU compiler.
 
-;; Copyright (C) 1991-2024 Free Software Foundation, Inc.
+;; Copyright (C) 1991-2025 Free Software Foundation, Inc.
 ;; Contributed by ARM Ltd.
 
 ;; This file is part of GCC.
@@ -184,70 +184,6 @@
 ; untyped            insn without type information - default, and error,
 ;                    case.
 ;
-; The classification below is for instructions used by the Wireless MMX
-; Technology. Each attribute value is used to classify an instruction of the
-; same name or family.
-;
-; wmmx_tandc
-; wmmx_tbcst
-; wmmx_textrc
-; wmmx_textrm
-; wmmx_tinsr
-; wmmx_tmcr
-; wmmx_tmcrr
-; wmmx_tmia
-; wmmx_tmiaph
-; wmmx_tmiaxy
-; wmmx_tmrc
-; wmmx_tmrrc
-; wmmx_tmovmsk
-; wmmx_torc
-; wmmx_torvsc
-; wmmx_wabs
-; wmmx_wdiff
-; wmmx_wacc
-; wmmx_wadd
-; wmmx_waddbhus
-; wmmx_waddsubhx
-; wmmx_waligni
-; wmmx_walignr
-; wmmx_wand
-; wmmx_wandn
-; wmmx_wavg2
-; wmmx_wavg4
-; wmmx_wcmpeq
-; wmmx_wcmpgt
-; wmmx_wmac
-; wmmx_wmadd
-; wmmx_wmax
-; wmmx_wmerge
-; wmmx_wmiawxy
-; wmmx_wmiaxy
-; wmmx_wmin
-; wmmx_wmov
-; wmmx_wmul
-; wmmx_wmulw
-; wmmx_wldr
-; wmmx_wor
-; wmmx_wpack
-; wmmx_wqmiaxy
-; wmmx_wqmulm
-; wmmx_wqmulwm
-; wmmx_wror
-; wmmx_wsad
-; wmmx_wshufh
-; wmmx_wsll
-; wmmx_wsra
-; wmmx_wsrl
-; wmmx_wstr
-; wmmx_wsub
-; wmmx_wsubaddhx
-; wmmx_wunpckeh
-; wmmx_wunpckel
-; wmmx_wunpckih
-; wmmx_wunpckil
-; wmmx_wxor
-;
 ; The classification below is for NEON instructions. If a new neon type is
 ; added, please ensure this is added to the is_neon_type attribute below too.
 ;
@@ -714,65 +650,6 @@
   umull,\
   umulls,\
   untyped,\
-  wmmx_tandc,\
-  wmmx_tbcst,\
-  wmmx_textrc,\
-  wmmx_textrm,\
-  wmmx_tinsr,\
-  wmmx_tmcr,\
-  wmmx_tmcrr,\
-  wmmx_tmia,\
-  wmmx_tmiaph,\
-  wmmx_tmiaxy,\
-  wmmx_tmrc,\
-  wmmx_tmrrc,\
-  wmmx_tmovmsk,\
-  wmmx_torc,\
-  wmmx_torvsc,\
-  wmmx_wabs,\
-  wmmx_wabsdiff,\
-  wmmx_wacc,\
-  wmmx_wadd,\
-  wmmx_waddbhus,\
-  wmmx_waddsubhx,\
-  wmmx_waligni,\
-  wmmx_walignr,\
-  wmmx_wand,\
-  wmmx_wandn,\
-  wmmx_wavg2,\
-  wmmx_wavg4,\
-  wmmx_wcmpeq,\
-  wmmx_wcmpgt,\
-  wmmx_wmac,\
-  wmmx_wmadd,\
-  wmmx_wmax,\
-  wmmx_wmerge,\
-  wmmx_wmiawxy,\
-  wmmx_wmiaxy,\
-  wmmx_wmin,\
-  wmmx_wmov,\
-  wmmx_wmul,\
-  wmmx_wmulw,\
-  wmmx_wldr,\
-  wmmx_wor,\
-  wmmx_wpack,\
-  wmmx_wqmiaxy,\
-  wmmx_wqmulm,\
-  wmmx_wqmulwm,\
-  wmmx_wror,\
-  wmmx_wsad,\
-  wmmx_wshufh,\
-  wmmx_wsll,\
-  wmmx_wsra,\
-  wmmx_wsrl,\
-  wmmx_wstr,\
-  wmmx_wsub,\
-  wmmx_wsubaddhx,\
-  wmmx_wunpckeh,\
-  wmmx_wunpckel,\
-  wmmx_wunpckih,\
-  wmmx_wunpckil,\
-  wmmx_wxor,\
 \
   neon_add,\
   neon_add_q,\
diff --git a/gcc/config/arm/uclinux-eabi.h b/gcc/config/arm/uclinux-eabi.h
index 769dee7..b2df541 100644
--- a/gcc/config/arm/uclinux-eabi.h
+++ b/gcc/config/arm/uclinux-eabi.h
@@ -1,5 +1,5 @@
 /* Definitions for ARM EABI ucLinux
-   Copyright (C) 2006-2024 Free Software Foundation, Inc.
+   Copyright (C) 2006-2025 Free Software Foundation, Inc.
    Contributed by Paul Brook <paul@codesourcery.com>
 
    This file is part of GCC.
diff --git a/gcc/config/arm/uclinux-elf.h b/gcc/config/arm/uclinux-elf.h
index 22618f2..1e953d6 100644
--- a/gcc/config/arm/uclinux-elf.h
+++ b/gcc/config/arm/uclinux-elf.h
@@ -1,5 +1,5 @@
 /* Definitions for ARM running ucLinux using ELF
-   Copyright (C) 1999-2024 Free Software Foundation, Inc.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
    Contributed by Philip Blundell <pb@nexus.co.uk>
 
    This file is part of GCC.
diff --git a/gcc/config/arm/uclinuxfdpiceabi.h b/gcc/config/arm/uclinuxfdpiceabi.h
index 18c5136..1a832d1 100644
--- a/gcc/config/arm/uclinuxfdpiceabi.h
+++ b/gcc/config/arm/uclinuxfdpiceabi.h
@@ -1,5 +1,5 @@
 /* Configuration file for ARM GNU/Linux FDPIC EABI targets.
-   Copyright (C) 2018-2024 Free Software Foundation, Inc.
+   Copyright (C) 2018-2025 Free Software Foundation, Inc.
    Contributed by STMicroelectronics.
 
    This file is part of GCC.
diff --git a/gcc/config/arm/unknown-elf.h b/gcc/config/arm/unknown-elf.h
index 6b31304..ab39182 100644
--- a/gcc/config/arm/unknown-elf.h
+++ b/gcc/config/arm/unknown-elf.h
@@ -1,5 +1,5 @@
 /* Definitions for non-Linux based ARM systems using ELF
-   Copyright (C) 1998-2024 Free Software Foundation, Inc.
+   Copyright (C) 1998-2025 Free Software Foundation, Inc.
    Contributed by Catherine Moore <clm@cygnus.com>
 
    This file is part of GCC.
@@ -91,6 +91,6 @@
 /* The libgcc udivmod functions may throw exceptions.  If newlib is
    configured to support long longs in I/O, then printf will depend on
    udivmoddi4, which will depend on the exception unwind routines,
-   which will depend on abort, which is defined in libc.  */ 
+   which will depend on abort, which is defined in libc.  */
 #undef LINK_GCC_C_SEQUENCE_SPEC
 #define LINK_GCC_C_SEQUENCE_SPEC "--start-group %G %{!nolibc:%L} --end-group"
diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
index f5f4d15..c1ee972 100644
--- a/gcc/config/arm/unspecs.md
+++ b/gcc/config/arm/unspecs.md
@@ -1,5 +1,5 @@
 ;; Unspec defintions.
-;; Copyright (C) 2012-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2012-2025 Free Software Foundation, Inc.
 ;; Contributed by ARM Ltd.
 
 ;; This file is part of GCC.
@@ -21,7 +21,6 @@
 ;; UNSPEC Usage:
 ;; Note: sin and cos are no-longer used.
 ;; Unspec enumerators for Neon are defined in neon.md.
-;; Unspec enumerators for iwmmxt2 are defined in iwmmxt2.md
 
 (define_c_enum "unspec" [
   UNSPEC_PUSH_MULT      ; `push multiple' operation:
@@ -42,17 +41,6 @@
                         ; and stack frame generation.  Operand 0 is the
                         ; register to "use".
   UNSPEC_CHECK_ARCH     ; Set CCs to indicate 26-bit or 32-bit mode.
-  UNSPEC_WSHUFH         ; Used by the intrinsic form of the iWMMXt WSHUFH instruction.
-  UNSPEC_WACC           ; Used by the intrinsic form of the iWMMXt WACC instruction.
-  UNSPEC_TMOVMSK        ; Used by the intrinsic form of the iWMMXt TMOVMSK instruction.
-  UNSPEC_WSAD           ; Used by the intrinsic form of the iWMMXt WSAD instruction.
-  UNSPEC_WSADZ          ; Used by the intrinsic form of the iWMMXt WSADZ instruction.
-  UNSPEC_WMACS          ; Used by the intrinsic form of the iWMMXt WMACS instruction.
-  UNSPEC_WMACU          ; Used by the intrinsic form of the iWMMXt WMACU instruction.
-  UNSPEC_WMACSZ         ; Used by the intrinsic form of the iWMMXt WMACSZ instruction.
-  UNSPEC_WMACUZ         ; Used by the intrinsic form of the iWMMXt WMACUZ instruction.
-  UNSPEC_CLRDI          ; Used by the intrinsic form of the iWMMXt CLRDI instruction.
-  UNSPEC_WALIGNI        ; Used by the intrinsic form of the iWMMXt WALIGN instruction.
   UNSPEC_TLS            ; A symbol that has been treated properly for TLS usage.
   UNSPEC_PIC_LABEL      ; A label used for PIC access that does not appear in the
                         ; instruction stream.
@@ -164,18 +152,6 @@
 
 
 (define_c_enum "unspec" [
-  UNSPEC_WADDC		; Used by the intrinsic form of the iWMMXt WADDC instruction.
-  UNSPEC_WABS		; Used by the intrinsic form of the iWMMXt WABS instruction.
-  UNSPEC_WQMULWMR	; Used by the intrinsic form of the iWMMXt WQMULWMR instruction.
-  UNSPEC_WQMULMR	; Used by the intrinsic form of the iWMMXt WQMULMR instruction.
-  UNSPEC_WQMULWM	; Used by the intrinsic form of the iWMMXt WQMULWM instruction.
-  UNSPEC_WQMULM		; Used by the intrinsic form of the iWMMXt WQMULM instruction.
-  UNSPEC_WQMIAxyn	; Used by the intrinsic form of the iWMMXt WMIAxyn instruction.
-  UNSPEC_WQMIAxy	; Used by the intrinsic form of the iWMMXt WMIAxy instruction.
-  UNSPEC_TANDC		; Used by the intrinsic form of the iWMMXt TANDC instruction.
-  UNSPEC_TORC		; Used by the intrinsic form of the iWMMXt TORC instruction.
-  UNSPEC_TORVSC		; Used by the intrinsic form of the iWMMXt TORVSC instruction.
-  UNSPEC_TEXTRC		; Used by the intrinsic form of the iWMMXt TEXTRC instruction.
   UNSPEC_GET_FPSCR_NZCVQC	; Represent fetch of FPSCR_nzcvqc content.
 ])
 
@@ -205,12 +181,7 @@
                         ;   a 64-bit object.
   VUNSPEC_POOL_16       ; `pool-entry(16)'.  An entry in the constant pool for
                         ;   a 128-bit object.
-  VUNSPEC_TMRC          ; Used by the iWMMXt TMRC instruction.
-  VUNSPEC_TMCR          ; Used by the iWMMXt TMCR instruction.
   VUNSPEC_ALIGN8        ; 8-byte alignment version of VUNSPEC_ALIGN
-  VUNSPEC_WCMP_EQ       ; Used by the iWMMXt WCMPEQ instructions
-  VUNSPEC_WCMP_GTU      ; Used by the iWMMXt WCMPGTU instructions
-  VUNSPEC_WCMP_GT       ; Used by the iwMMXT WCMPGT instructions
   VUNSPEC_EH_RETURN     ; Use to override the return address for exception
                         ; handling.
   VUNSPEC_ATOMIC_CAS	; Represent an atomic compare swap.
@@ -1146,70 +1117,36 @@
   VMAXNMQ_M_F
   VMINNMQ_M_F
   VSUBQ_M_F
-  VSTRWQSB_S
-  VSTRWQSB_U
-  VSTRBQSO_S
-  VSTRBQSO_U
-  VSTRBQ_S
-  VSTRBQ_U
-  VLDRBQGO_S
-  VLDRBQGO_U
-  VLDRBQ_S
-  VLDRBQ_U
-  VLDRWQGB_S
-  VLDRWQGB_U
-  VLD1Q_F
-  VLD1Q_S
-  VLD1Q_U
-  VLDRHQ_F
-  VLDRHQGO_S
-  VLDRHQGO_U
-  VLDRHQGSO_S
-  VLDRHQGSO_U
-  VLDRHQ_S
-  VLDRHQ_U
-  VLDRWQ_F
-  VLDRWQ_S
-  VLDRWQ_U
-  VLDRDQGB_S
-  VLDRDQGB_U
-  VLDRDQGO_S
-  VLDRDQGO_U
-  VLDRDQGSO_S
-  VLDRDQGSO_U
-  VLDRHQGO_F
-  VLDRHQGSO_F
-  VLDRWQGB_F
-  VLDRWQGO_F
-  VLDRWQGO_S
-  VLDRWQGO_U
-  VLDRWQGSO_F
-  VLDRWQGSO_S
-  VLDRWQGSO_U
-  VSTRHQ_F
-  VST1Q_S
-  VST1Q_U
-  VSTRHQSO_S
-  VSTRHQ_U
-  VSTRWQ_S
-  VSTRWQ_U
-  VSTRWQ_F
-  VST1Q_F
+  VSTRSBQ
+  VSTRSBQ_P
+  VSTRQSO
+  VSTRQSO_P
+  VSTRQSO_TRUNC
+  VSTRQSO_TRUNC_P
+  VLDRQ
+  VLDRQ_Z
+  VLDRQ_EXT
+  VLDRQ_EXT_Z
+  VLDRGOQ
+  VLDRGOQ_Z
+  VLDRGOQ_EXT
+  VLDRGOQ_EXT_Z
+  VLDRGBQ
+  VLDRGBQ_Z
+  VLDRGSOQ
+  VLDRGSOQ_Z
+  VLDRGSOQ_EXT
+  VLDRGSOQ_EXT_Z
+  VSTRQ
+  VSTRQ_P
+  VSTRQ_TRUNC
+  VSTRQ_TRUNC_P
   VSTRDQSB_S
   VSTRDQSB_U
-  VSTRDQSO_S
-  VSTRDQSO_U
-  VSTRDQSSO_S
-  VSTRDQSSO_U
-  VSTRWQSO_S
-  VSTRWQSO_U
-  VSTRWQSSO_S
-  VSTRWQSSO_U
-  VSTRHQSO_F
-  VSTRHQSSO_F
-  VSTRWQSB_F
-  VSTRWQSO_F
-  VSTRWQSSO_F
+  VSTRSSOQ
+  VSTRSSOQ_P
+  VSTRSSOQ_TRUNC
+  VSTRSSOQ_TRUNC_P
   VDDUPQ
   VDDUPQ_M
   VDWDUPQ
@@ -1218,16 +1155,10 @@
   VIDUPQ_M
   VIWDUPQ
   VIWDUPQ_M
-  VSTRWQSBWB_S
-  VSTRWQSBWB_U
-  VLDRWQGBWB_S
-  VLDRWQGBWB_U
-  VSTRWQSBWB_F
-  VLDRWQGBWB_F
-  VSTRDQSBWB_S
-  VSTRDQSBWB_U
-  VLDRDQGBWB_S
-  VLDRDQGBWB_U
+  VSTRSBWBQ
+  VSTRSBWBQ_P
+  VLDRGBWBQ
+  VLDRGBWBQ_Z
   VADCQ_U
   VADCQ_M_U
   VADCQ_S
@@ -1249,9 +1180,6 @@
   VST2Q
   VSHLCQ_M_U
   VSHLCQ_M_S
-  VSTRHQSO_U
-  VSTRHQSSO_S
-  VSTRHQSSO_U
   VSTRHQ_S
   SRSHRL
   SRSHR
diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md
index ff1c27a..061165e 100644
--- a/gcc/config/arm/vec-common.md
+++ b/gcc/config/arm/vec-common.md
@@ -1,5 +1,5 @@
-;; Machine Description for shared bits common to IWMMXT and Neon.
-;; Copyright (C) 2006-2024 Free Software Foundation, Inc.
+;; Machine Description for shared bits common to Neon and MVE.
+;; Copyright (C) 2006-2025 Free Software Foundation, Inc.
 ;; Written by CodeSourcery.
 ;;
 ;; This file is part of GCC.
@@ -24,7 +24,6 @@
   [(set (match_operand:VNIM1 0 "nonimmediate_operand")
 	(match_operand:VNIM1 1 "general_operand"))]
   "TARGET_NEON
-   || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))
    || (TARGET_HAVE_MVE && VALID_MVE_SI_MODE (<MODE>mode))
    || (TARGET_HAVE_MVE_FLOAT && VALID_MVE_SF_MODE (<MODE>mode))"
    {
@@ -46,8 +45,7 @@
 (define_expand "mov<mode>"
   [(set (match_operand:VNINOTM1 0 "nonimmediate_operand")
 	(match_operand:VNINOTM1 1 "general_operand"))]
-  "TARGET_NEON
-   || (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (<MODE>mode))"
+  "TARGET_NEON"
 {
   gcc_checking_assert (aligned_operand (operands[0], <MODE>mode));
   gcc_checking_assert (aligned_operand (operands[1], <MODE>mode));
@@ -83,7 +81,7 @@
 })
 
 ;; Vector arithmetic.  Expanders are blank, then unnamed insns implement
-;; patterns separately for Neon, IWMMXT and MVE.
+;; patterns separately for Neon and MVE.
 
 (define_expand "add<mode>3"
   [(set (match_operand:VDQ 0 "s_register_operand")
@@ -103,10 +101,7 @@
   [(set (match_operand:VDQWH 0 "s_register_operand")
 	(mult:VDQWH (match_operand:VDQWH 1 "s_register_operand")
 		    (match_operand:VDQWH 2 "s_register_operand")))]
-  "ARM_HAVE_<MODE>_ARITH
-   && (!TARGET_REALLY_IWMMXT
-       || <MODE>mode == V4HImode
-       || <MODE>mode == V2SImode)"
+  "ARM_HAVE_<MODE>_ARITH"
 )
 
 (define_expand "smin<mode>3"
@@ -137,6 +132,17 @@
    "ARM_HAVE_<MODE>_ARITH"
 )
 
+;; Vector forms for the IEEE-754 fmax()/fmin() functions
+;; Fixme: Should be enabled for MVE as well, but currently that uses an
+;; incompatible expasion.
+(define_expand "<fmaxmin><mode>3"
+  [(set (match_operand:VF 0 "s_register_operand" "")
+	(unspec:VF [(match_operand:VF 1 "s_register_operand")
+		    (match_operand:VF 2 "s_register_operand")]
+		   VMAXMINFNM))]
+  "TARGET_NEON && TARGET_VFP5 && ARM_HAVE_<MODE>_ARITH"
+)
+
 (define_expand "vec_perm<mode>"
   [(match_operand:VE 0 "s_register_operand")
    (match_operand:VE 1 "s_register_operand")
@@ -205,13 +211,13 @@
 (define_expand "one_cmpl<mode>2"
   [(set (match_operand:VDQ 0 "s_register_operand")
 	(not:VDQ (match_operand:VDQ 1 "s_register_operand")))]
-  "ARM_HAVE_<MODE>_ARITH && !TARGET_REALLY_IWMMXT"
+  "ARM_HAVE_<MODE>_ARITH"
 )
 
 (define_expand "<absneg_str><mode>2"
   [(set (match_operand:VDQWH 0 "s_register_operand" "")
 	(ABSNEG:VDQWH (match_operand:VDQWH 1 "s_register_operand" "")))]
-  "ARM_HAVE_<MODE>_ARITH && !TARGET_REALLY_IWMMXT"
+  "ARM_HAVE_<MODE>_ARITH"
 )
 
 (define_expand "cadd<rot><mode>3"
@@ -284,8 +290,7 @@
  [(set (match_operand:VDQ 0 "nonimmediate_operand")
 	(unspec:VDQ [(match_operand:VDQ 1 "general_operand")]
 	 UNSPEC_MISALIGNED_ACCESS))]
- "ARM_HAVE_<MODE>_LDST && !BYTES_BIG_ENDIAN
-  && unaligned_access && !TARGET_REALLY_IWMMXT"
+ "ARM_HAVE_<MODE>_LDST && !BYTES_BIG_ENDIAN && unaligned_access"
 {
   rtx *memloc;
   bool for_store = false;
@@ -362,7 +367,7 @@
 	(unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w,w")
 		       (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Ds")]
 	 VSHLQ))]
-  "ARM_HAVE_<MODE>_ARITH && !TARGET_REALLY_IWMMXT"
+  "ARM_HAVE_<MODE>_ARITH"
   "@
    <mve_insn>.<supf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2
    * return neon_output_shift_immediate (\"vshl\", 'i', &operands[2], <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode), true);"
@@ -374,7 +379,7 @@
   [(set (match_operand:VDQIW 0 "s_register_operand" "")
 	(ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "")
 		      (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "")))]
-  "ARM_HAVE_<MODE>_ARITH && !TARGET_REALLY_IWMMXT"
+  "ARM_HAVE_<MODE>_ARITH"
 {
   emit_insn (gen_mve_vshlq_u<mode> (operands[0], operands[1], operands[2]));
   DONE;
@@ -387,7 +392,7 @@
   [(set (match_operand:VDQIW 0 "s_register_operand")
 	(ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand")
 			(match_operand:VDQIW 2 "imm_rshift_or_reg_neon")))]
-  "ARM_HAVE_<MODE>_ARITH && !TARGET_REALLY_IWMMXT"
+  "ARM_HAVE_<MODE>_ARITH"
 {
   if (s_register_operand (operands[2], <MODE>mode))
     {
@@ -405,7 +410,7 @@
   [(set (match_operand:VDQIW 0 "s_register_operand")
 	(lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand")
 			(match_operand:VDQIW 2 "imm_rshift_or_reg_neon")))]
-  "ARM_HAVE_<MODE>_ARITH && !TARGET_REALLY_IWMMXT"
+  "ARM_HAVE_<MODE>_ARITH"
 {
   if (s_register_operand (operands[2], <MODE>mode))
     {
@@ -416,88 +421,26 @@
     }
 })
 
-;; Conditional instructions.  These are comparisons with conditional moves for
-;; vectors.  They perform the assignment:
-;;
-;;     Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
-;;
-;; where op3 is <, <=, ==, !=, >= or >.  Operations are performed
-;; element-wise.
-
-(define_expand "vcond<mode><mode>"
-  [(set (match_operand:VDQWH 0 "s_register_operand")
-	(if_then_else:VDQWH
-	  (match_operator 3 "comparison_operator"
-	    [(match_operand:VDQWH 4 "s_register_operand")
-	     (match_operand:VDQWH 5 "reg_or_zero_operand")])
-	  (match_operand:VDQWH 1 "s_register_operand")
-	  (match_operand:VDQWH 2 "s_register_operand")))]
-  "ARM_HAVE_<MODE>_ARITH
-   && !TARGET_REALLY_IWMMXT
-   && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
-{
-  arm_expand_vcond (operands, <V_cmp_result>mode);
-  DONE;
-})
-
-(define_expand "vcond<V_cvtto><mode>"
-  [(set (match_operand:<V_CVTTO> 0 "s_register_operand")
-	(if_then_else:<V_CVTTO>
-	  (match_operator 3 "comparison_operator"
-	    [(match_operand:V32 4 "s_register_operand")
-	     (match_operand:V32 5 "reg_or_zero_operand")])
-	  (match_operand:<V_CVTTO> 1 "s_register_operand")
-	  (match_operand:<V_CVTTO> 2 "s_register_operand")))]
-  "ARM_HAVE_<MODE>_ARITH
-   && !TARGET_REALLY_IWMMXT
-   && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
-{
-  arm_expand_vcond (operands, <V_cmp_result>mode);
-  DONE;
-})
-
-(define_expand "vcond<VH_cvtto><mode>"
-  [(set (match_operand:<VH_CVTTO> 0 "s_register_operand")
-	(if_then_else:<VH_CVTTO>
-	  (match_operator 3 "comparison_operator"
-	    [(match_operand:V16 4 "s_register_operand")
-	     (match_operand:V16 5 "reg_or_zero_operand")])
-	  (match_operand:<VH_CVTTO> 1 "s_register_operand")
-	  (match_operand:<VH_CVTTO> 2 "s_register_operand")))]
-  "ARM_HAVE_<MODE>_ARITH
-   && !TARGET_REALLY_IWMMXT
-   && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
-{
-  arm_expand_vcond (operands, <V_cmp_result>mode);
-  DONE;
-})
-
-(define_expand "vcondu<mode><v_cmp_result>"
-  [(set (match_operand:VDQW 0 "s_register_operand")
-	(if_then_else:VDQW
-	  (match_operator 3 "arm_comparison_operator"
-	    [(match_operand:<V_cmp_result> 4 "s_register_operand")
-	     (match_operand:<V_cmp_result> 5 "reg_or_zero_operand")])
-	  (match_operand:VDQW 1 "s_register_operand")
-	  (match_operand:VDQW 2 "s_register_operand")))]
-  "ARM_HAVE_<MODE>_ARITH
-   && !TARGET_REALLY_IWMMXT"
-{
-  arm_expand_vcond (operands, <V_cmp_result>mode);
-  DONE;
-})
-
 (define_expand "vec_load_lanesoi<mode>"
   [(set (match_operand:OI 0 "s_register_operand")
         (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
                     (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
 		   UNSPEC_VLD2))]
-  "TARGET_NEON || TARGET_HAVE_MVE"
+  "TARGET_NEON"
 {
-  if (TARGET_NEON)
-    emit_insn (gen_neon_vld2<mode> (operands[0], operands[1]));
-  else
-    emit_insn (gen_mve_vld2q<mode> (operands[0], operands[1]));
+  emit_insn (gen_neon_vld2<mode> (operands[0], operands[1]));
+  DONE;
+})
+
+;;; On MVE we use V2xYYY modes instead of OI
+(define_expand "vec_load_lanes<MVE_vld2_vst2><mode>"
+  [(set (match_operand:<MVE_VLD2_VST2> 0 "s_register_operand")
+        (unspec:<MVE_VLD2_VST2> [(match_operand:<MVE_VLD2_VST2> 1 "neon_struct_operand")
+                    (unspec:MVE_VLD_ST [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+		   UNSPEC_VLD2))]
+  "(TARGET_HAVE_MVE && VALID_MVE_STRUCT_MODE (<MVE_VLD2_VST2>mode))"
+{
+  emit_insn (gen_mve_vld2q<mode> (operands[0], operands[1]));
   DONE;
 })
 
@@ -506,12 +449,21 @@
 	(unspec:OI [(match_operand:OI 1 "s_register_operand")
                     (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
                    UNSPEC_VST2))]
-  "TARGET_NEON || TARGET_HAVE_MVE"
+  "TARGET_NEON"
 {
-  if (TARGET_NEON)
-    emit_insn (gen_neon_vst2<mode> (operands[0], operands[1]));
-  else
-    emit_insn (gen_mve_vst2q<mode> (operands[0], operands[1]));
+  emit_insn (gen_neon_vst2<mode> (operands[0], operands[1]));
+  DONE;
+})
+
+;;; On MVE we use V2xYYY modes instead of OI
+(define_expand "vec_store_lanes<MVE_vld2_vst2><mode>"
+  [(set (match_operand:<MVE_VLD2_VST2> 0 "neon_struct_operand")
+	(unspec:<MVE_VLD2_VST2> [(match_operand:<MVE_VLD2_VST2> 1 "s_register_operand")
+                    (unspec:MVE_VLD_ST [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VST2))]
+  "(TARGET_HAVE_MVE && VALID_MVE_STRUCT_MODE (<MVE_VLD2_VST2>mode))"
+{
+  emit_insn (gen_mve_vst2q<mode> (operands[0], operands[1]));
   DONE;
 })
 
@@ -519,12 +471,21 @@
   [(match_operand:XI 0 "s_register_operand")
    (match_operand:XI 1 "neon_struct_operand")
    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
-  "TARGET_NEON || TARGET_HAVE_MVE"
+  "TARGET_NEON"
 {
-  if (TARGET_NEON)
-    emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
-  else
-    emit_insn (gen_mve_vld4q<mode> (operands[0], operands[1]));
+  emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
+  DONE;
+})
+
+;;; On MVE we use V4xYYY modes instead of XI
+(define_expand "vec_load_lanes<MVE_vld4_vst4><mode>"
+  [(set (match_operand:<MVE_VLD4_VST4> 0 "s_register_operand")
+        (unspec:<MVE_VLD4_VST4> [(match_operand:<MVE_VLD4_VST4> 1 "neon_struct_operand")
+                    (unspec:MVE_VLD_ST [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+		   UNSPEC_VLD4))]
+  "(TARGET_HAVE_MVE && VALID_MVE_STRUCT_MODE (<MVE_VLD4_VST4>mode))"
+{
+  emit_insn (gen_mve_vld4q<mode> (operands[0], operands[1]));
   DONE;
 })
 
@@ -532,12 +493,21 @@
   [(match_operand:XI 0 "neon_struct_operand")
    (match_operand:XI 1 "s_register_operand")
    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
-  "TARGET_NEON || TARGET_HAVE_MVE"
+  "TARGET_NEON"
 {
-  if (TARGET_NEON)
-    emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
-  else
-    emit_insn (gen_mve_vst4q<mode> (operands[0], operands[1]));
+  emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
+  DONE;
+})
+
+;;; On MVE we use V4xYYY modes instead of XI
+(define_expand "vec_store_lanes<MVE_vld4_vst4><mode>"
+  [(set (match_operand:<MVE_VLD4_VST4> 0 "neon_struct_operand")
+	(unspec:<MVE_VLD4_VST4> [(match_operand:<MVE_VLD4_VST4> 1 "s_register_operand")
+                    (unspec:MVE_VLD_ST [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
+                   UNSPEC_VST4))]
+  "(TARGET_HAVE_MVE && VALID_MVE_STRUCT_MODE (<MVE_VLD4_VST4>mode))"
+{
+  emit_insn (gen_mve_vst4q<mode> (operands[0], operands[1]));
   DONE;
 })
 
@@ -630,8 +600,7 @@
 (define_expand "clz<mode>2"
  [(set (match_operand:VDQIW 0 "s_register_operand")
        (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand")))]
-  "ARM_HAVE_<MODE>_ARITH
-   && !TARGET_REALLY_IWMMXT"
+  "ARM_HAVE_<MODE>_ARITH"
 )
 (define_expand "vec_init<mode><V_elem_l>"
   [(match_operand:VDQX 0 "s_register_operand")
diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md
index 773f556..379f5f7 100644
--- a/gcc/config/arm/vfp.md
+++ b/gcc/config/arm/vfp.md
@@ -1,5 +1,5 @@
 ;; ARM VFP instruction patterns
-;; Copyright (C) 2003-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2003-2025 Free Software Foundation, Inc.
 ;; Written by CodeSourcery.
 ;;
 ;; This file is part of GCC.
@@ -367,7 +367,7 @@
     case 8:
       return \"vmov%?\\t%Q0, %R0, %P1\\t%@ int\";
     case 9:
-      if (TARGET_VFP_SINGLE || TARGET_HAVE_MVE)
+      if (TARGET_VFP_SINGLE && !TARGET_HAVE_MVE)
 	return \"vmov%?.f32\\t%0, %1\\t%@ int\;vmov%?.f32\\t%p0, %p1\\t%@ int\";
       else
 	return \"vmov%?.f64\\t%P0, %P1\\t%@ int\";
@@ -385,7 +385,7 @@
 			       (symbol_ref "arm_count_output_move_double_insns (operands) * 4")
                               (eq_attr "alternative" "9")
                                (if_then_else
-                                 (match_test "TARGET_VFP_SINGLE")
+                                 (match_test "TARGET_VFP_SINGLE && !TARGET_HAVE_MVE")
                                  (const_int 8)
                                  (const_int 4))]
                               (const_int 4)))
@@ -744,7 +744,7 @@
       case 6: case 7: case 9:
 	return output_move_double (operands, true, NULL);
       case 8:
-	if (TARGET_VFP_SINGLE)
+	if (TARGET_VFP_SINGLE && !TARGET_HAVE_MVE)
 	  return \"vmov%?.f32\\t%0, %1\;vmov%?.f32\\t%p0, %p1\";
 	else
 	  return \"vmov%?.f64\\t%P0, %P1\";
@@ -758,7 +758,7 @@
    (set (attr "length") (cond [(eq_attr "alternative" "6,7,9") (const_int 8)
 			       (eq_attr "alternative" "8")
 				(if_then_else
-				 (match_test "TARGET_VFP_SINGLE")
+				 (match_test "TARGET_VFP_SINGLE && !TARGET_HAVE_MVE")
 				 (const_int 8)
 				 (const_int 4))]
 			      (const_int 4)))
diff --git a/gcc/config/arm/vfp11.md b/gcc/config/arm/vfp11.md
index 63745e3..80ad8d3 100644
--- a/gcc/config/arm/vfp11.md
+++ b/gcc/config/arm/vfp11.md
@@ -1,5 +1,5 @@
 ;; ARM VFP11 pipeline description
-;; Copyright (C) 2003-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2003-2025 Free Software Foundation, Inc.
 ;; Written by CodeSourcery.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/arm/vxworks.h b/gcc/config/arm/vxworks.h
index 7765d92..9811ecb 100644
--- a/gcc/config/arm/vxworks.h
+++ b/gcc/config/arm/vxworks.h
@@ -1,10 +1,10 @@
 /* Definitions of target machine for GCC,
-   for ARM with targeting the VXWorks run time environment. 
-   Copyright (C) 1999-2024 Free Software Foundation, Inc.
+   for ARM with targeting the VXWorks run time environment.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
 
    Contributed by: Mike Stump <mrs@wrs.com>
    Brought up to date by CodeSourcery, LLC.
-   
+
 This file is part of GCC.
 
 GCC is free software; you can redistribute it and/or modify
diff --git a/gcc/config/arm/vxworks.opt b/gcc/config/arm/vxworks.opt
index b722701..627217ce 100644
--- a/gcc/config/arm/vxworks.opt
+++ b/gcc/config/arm/vxworks.opt
@@ -1,6 +1,6 @@
 ; ARM VxWorks options.
 
-; Copyright (C) 2011-2024 Free Software Foundation, Inc.
+; Copyright (C) 2011-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/arm/xgene1.md b/gcc/config/arm/xgene1.md
index e511468..c48cd61 100644
--- a/gcc/config/arm/xgene1.md
+++ b/gcc/config/arm/xgene1.md
@@ -1,5 +1,5 @@
 ;; Machine description for AppliedMicro xgene1 core.
-;; Copyright (C) 2012-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2012-2025 Free Software Foundation, Inc.
 ;; Contributed by Theobroma Systems Design und Consulting GmbH.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/avr/avr-arch.h b/gcc/config/avr/avr-arch.h
index 69e8db1..efd7b14 100644
--- a/gcc/config/avr/avr-arch.h
+++ b/gcc/config/avr/avr-arch.h
@@ -1,6 +1,5 @@
-/* Definitions of types that are used to store AVR architecture and
-   device information.
-   Copyright (C) 2012-2024 Free Software Foundation, Inc.
+/* Device information for AVR 8-bit microcontrollers.
+   Copyright (C) 2012-2025 Free Software Foundation, Inc.
    Contributed by Georg-Johann Lay (avr@gjlay.de)
 
 This file is part of GCC.
@@ -14,7 +13,7 @@ GCC is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
-    
+
 You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
@@ -159,6 +158,14 @@ AVR_ERRATA_SKIP
        http://www.atmel.com/dyn/resources/prod_documents/doc2494.pdf
        http://www.atmel.com/dyn/resources/prod_documents/doc1436.pdf
 
+AVR_CVT
+  The device supports a CVT (Compact Vector Table) which can be selected
+  with -mcvt, which links startup-code crt<mcu>-cvt.o instead of the
+  usual crt<mcu>.o.  This assumes that AVR-LibC implements Issue #1010.
+    https://github.com/avrdudes/avr-libc/issues/1010
+  crt<mcu>-cvt.o also pulls in __do_cvt_init from lib<mcu>.a which sets
+  bit CPUINT_CTRLA.CPUINT_CVT in order to activate the CVT.
+
 AVR_ISA_RCALL
   Always use RJMP / RCALL and assume JMP / CALL are not available.
   This affects multilib selection via specs generation and -mshort-calls.
@@ -199,14 +206,16 @@ AVR_ISA_FLMAP
 enum avr_device_specific_features
 {
   AVR_ISA_NONE,
-  AVR_ISA_RMW     = 0x1, /* device has RMW instructions. */
+  AVR_CVT         = 0x1, /* Device supports a "Compact Vector Table" (-mcvt)
+			    as configured in field CPUINT_CTRLA.CPUINT_CVT. */
   AVR_SHORT_SP    = 0x2, /* Stack Pointer has 8 bits width. */
-  AVR_ERRATA_SKIP = 0x4, /* device has a core erratum. */
-  AVR_ISA_LDS     = 0x8, /* whether LDS / STS is valid for all data in static
-			    storage.  Only useful for reduced Tiny.	 */
-  AVR_ISA_RCALL	  = 0x10, /* Use RJMP / RCALL even though JMP / CALL
+  AVR_ERRATA_SKIP = 0x4, /* Device has a core erratum. */
+  AVR_ISA_RMW     = 0x8, /* Device has RMW instructions. */
+  AVR_ISA_LDS     = 0x10, /* Whether LDS / STS is valid for all data in static
+			     storage.  Only useful for reduced Tiny.	 */
+  AVR_ISA_RCALL	  = 0x20, /* Use RJMP / RCALL even though JMP / CALL
 			     are available (-mshort-calls).	 */
-  AVR_ISA_FLMAP	  = 0x20  /* Has NVMCTRL_CTRLB.FLMAP to select which 32 KiB
+  AVR_ISA_FLMAP	  = 0x40  /* Has NVMCTRL_CTRLB.FLMAP to select which 32 KiB
 			     block of program memory is visible in the RAM
 			     address space.	 */
 };
diff --git a/gcc/config/avr/avr-c.cc b/gcc/config/avr/avr-c.cc
index ca484f2..9176a49 100644
--- a/gcc/config/avr/avr-c.cc
+++ b/gcc/config/avr/avr-c.cc
@@ -1,4 +1,5 @@
-/* Copyright (C) 2009-2024 Free Software Foundation, Inc.
+/* Code for the C/C++ front end for AVR 8-bit microcontrollers.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
    Contributed by Anatoly Sokolov (aesok@post.ru)
 
    This file is part of GCC.
@@ -7,12 +8,12 @@
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 3, or (at your option)
    any later version.
-   
+
    GCC is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
-   
+
    You should have received a copy of the GNU General Public License
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
@@ -35,7 +36,7 @@
 
 enum avr_builtin_id
   {
-#define DEF_BUILTIN(NAME, N_ARGS, TYPE, CODE, LIBNAME)  \
+#define DEF_BUILTIN(NAME, N_ARGS, TYPE, CODE, LIBNAME, ATTRS) \
     AVR_BUILTIN_ ## NAME,
 #include "builtins.def"
 #undef DEF_BUILTIN
@@ -44,26 +45,61 @@ enum avr_builtin_id
   };
 
 
-/* Implement `TARGET_RESOLVE_OVERLOADED_PLUGIN'.  */
+/* Some of our built-in functions are available for GNU-C only:
+   - Built-ins that use named address-spaces.
+   - Built-ins that use fixed-point types.  */
+
+static bool
+avr_builtin_supported_p (location_t loc, avr_builtin_id bid)
+{
+  if (! lang_GNU_C () // Means "C" actually, not "GNU-C".
+      && bid >= AVR_FIRST_C_ONLY_BUILTIN_ID)
+    {
+      if (loc != UNKNOWN_LOCATION)
+	error_at (loc, "built-in function is only supported for GNU-C");
+      return false;
+    }
+
+  const bool uses_as = (bid == AVR_BUILTIN_FLASH_SEGMENT
+			|| bid == AVR_BUILTIN_STRLEN_FLASH
+			|| bid == AVR_BUILTIN_STRLEN_FLASHX
+			|| bid == AVR_BUILTIN_STRLEN_MEMX);
+  if (AVR_TINY && uses_as)
+    {
+      if (loc != UNKNOWN_LOCATION)
+	error_at (loc, "built-in function for named address-space is not"
+		  " supported for reduced Tiny devices");
+      return false;
+    }
+
+  return true;
+}
+
+
+/* Implement `TARGET_RESOLVE_OVERLOADED_BUILTIN'.  */
 
 static tree
-avr_resolve_overloaded_builtin (unsigned int iloc, tree fndecl, void *vargs)
+avr_resolve_overloaded_builtin (location_t loc, tree fndecl, void *vargs, bool)
 {
+  const avr_builtin_id bid = (avr_builtin_id) DECL_MD_FUNCTION_CODE (fndecl);
+
+  if (! avr_builtin_supported_p (loc, bid))
+    return error_mark_node;
+
   tree type0, type1, fold = NULL_TREE;
-  enum avr_builtin_id id = AVR_BUILTIN_COUNT;
-  location_t loc = (location_t) iloc;
-  vec<tree, va_gc> &args = * (vec<tree, va_gc>*) vargs;
+  avr_builtin_id id = AVR_BUILTIN_COUNT;
+  vec<tree, va_gc> &args = * (vec<tree, va_gc> *) vargs;
 
-  switch (DECL_MD_FUNCTION_CODE (fndecl))
+  switch (bid)
     {
     default:
       break;
 
     case AVR_BUILTIN_ABSFX:
-      if (args.length() != 1)
+      if (args.length () != 1)
 	{
 	  error_at (loc, "%qs expects 1 argument but %d given",
-		    "absfx", (int) args.length());
+		    "absfx", (int) args.length ());
 
 	  fold = error_mark_node;
 	  break;
@@ -119,10 +155,10 @@ avr_resolve_overloaded_builtin (unsigned int iloc, tree fndecl, void *vargs)
       break; // absfx
 
     case AVR_BUILTIN_ROUNDFX:
-      if (args.length() != 2)
+      if (args.length () != 2)
 	{
 	  error_at (loc, "%qs expects 2 arguments but %d given",
-		    "roundfx", (int) args.length());
+		    "roundfx", (int) args.length ());
 
 	  fold = error_mark_node;
 	  break;
@@ -185,10 +221,10 @@ avr_resolve_overloaded_builtin (unsigned int iloc, tree fndecl, void *vargs)
       break; // roundfx
 
     case AVR_BUILTIN_COUNTLSFX:
-      if (args.length() != 1)
+      if (args.length () != 1)
 	{
 	  error_at (loc, "%qs expects 1 argument but %d given",
-		    "countlsfx", (int) args.length());
+		    "countlsfx", (int) args.length ());
 
 	  fold = error_mark_node;
 	  break;
@@ -290,7 +326,7 @@ avr_toupper (char *up, const char *lo)
 /* Worker function for TARGET_CPU_CPP_BUILTINS.  */
 
 void
-avr_cpu_cpp_builtins (struct cpp_reader *pfile)
+avr_cpu_cpp_builtins (cpp_reader *pfile)
 {
   builtin_define_std ("AVR");
 
@@ -384,6 +420,9 @@ avr_cpu_cpp_builtins (struct cpp_reader *pfile)
   if (TARGET_RMW)
     cpp_define (pfile, "__AVR_ISA_RMW__");
 
+  if (TARGET_CVT)
+    cpp_define (pfile, "__AVR_CVT__");
+
   cpp_define_formatted (pfile, "__AVR_SFR_OFFSET__=0x%x",
 			avr_arch->sfr_offset);
 
@@ -499,8 +538,9 @@ avr_cpu_cpp_builtins (struct cpp_reader *pfile)
   /* Define builtin macros so that the user can easily query whether or
      not a specific builtin is available. */
 
-#define DEF_BUILTIN(NAME, N_ARGS, TYPE, CODE, LIBNAME)  \
-  cpp_define (pfile, "__BUILTIN_AVR_" #NAME);
+#define DEF_BUILTIN(NAME, N_ARGS, TYPE, CODE, LIBNAME, ATTRS)		\
+  if (avr_builtin_supported_p (UNKNOWN_LOCATION, AVR_BUILTIN_ ## NAME))	\
+    cpp_define (pfile, "__BUILTIN_AVR_" #NAME);
 #include "builtins.def"
 #undef DEF_BUILTIN
 
diff --git a/gcc/config/avr/avr-devices.cc b/gcc/config/avr/avr-devices.cc
index 456a6b7..63041b1 100644
--- a/gcc/config/avr/avr-devices.cc
+++ b/gcc/config/avr/avr-devices.cc
@@ -1,4 +1,4 @@
-/* Copyright (C) 2009-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2009-2025 Free Software Foundation, Inc.
    Contributed by Anatoly Sokolov (aesok@post.ru)
 
    This file is part of GCC.
@@ -7,12 +7,12 @@
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 3, or (at your option)
    any later version.
-   
+
    GCC is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
-   
+
    You should have received a copy of the GNU General Public License
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
diff --git a/gcc/config/avr/avr-dimode.md b/gcc/config/avr/avr-dimode.md
index c357213..903bfbf 100644
--- a/gcc/config/avr/avr-dimode.md
+++ b/gcc/config/avr/avr-dimode.md
@@ -1,6 +1,5 @@
-;;   Machine description for GNU compiler,
-;;   for Atmel AVR micro controllers.
-;;   Copyright (C) 1998-2024 Free Software Foundation, Inc.
+;;   Support 64-bit operations for AVR 8-bit microcontrollers.
+;;   Copyright (C) 1998-2025 Free Software Foundation, Inc.
 ;;   Contributed by Georg Lay (avr@gjlay.de)
 ;;
 ;; This file is part of GCC.
@@ -460,11 +459,11 @@
                       (label_ref (match_operand 3))
                       (pc)))]
   "avr_have_dimode"
-   {
+  {
     int icode = (int) GET_CODE (operands[0]);
 
     targetm.canonicalize_comparison (&icode, &operands[1], &operands[2], false);
-    operands[0] = gen_rtx_fmt_ee ((enum rtx_code) icode,
+    operands[0] = gen_rtx_fmt_ee ((rtx_code) icode,
                                   VOIDmode, operands[1], operands[2]);
 
     rtx acc_a = gen_rtx_REG (<MODE>mode, ACC_A);
@@ -489,7 +488,7 @@
         emit_jump_insn (gen_cbranch_<mode>2_split (operands[0], operands[3]));
       }
     DONE;
-   })
+  })
 
 (define_insn_and_split "cbranch_<mode>2_split"
   [(set (pc)
diff --git a/gcc/config/avr/avr-fixed.md b/gcc/config/avr/avr-fixed.md
index 911b8b2..ce46beb 100644
--- a/gcc/config/avr/avr-fixed.md
+++ b/gcc/config/avr/avr-fixed.md
@@ -1,6 +1,5 @@
-;;   This file contains instructions that support fixed-point operations
-;;   for Atmel AVR micro controllers.
-;;   Copyright (C) 2012-2024 Free Software Foundation, Inc.
+;;   Support fixed-point operations for AVR 8-bit microcontrollers.
+;;   Copyright (C) 2012-2025 Free Software Foundation, Inc.
 ;;
 ;;   Contributed by Sean D'Epagnier  (sean@depagnier.com)
 ;;                  Georg-Johann Lay (avr@gjlay.de)
diff --git a/gcc/config/avr/avr-log.cc b/gcc/config/avr/avr-log.cc
index d702c5f..fadb3ca 100644
--- a/gcc/config/avr/avr-log.cc
+++ b/gcc/config/avr/avr-log.cc
@@ -1,5 +1,5 @@
-/* Subroutines for log output for Atmel AVR back end.
-   Copyright (C) 2011-2024 Free Software Foundation, Inc.
+/* Subroutines for log output for AVR 8-bit microcontrollers.
+   Copyright (C) 2011-2025 Free Software Foundation, Inc.
    Contributed by Georg-Johann Lay (avr@gjlay.de)
 
    This file is part of GCC.
@@ -8,12 +8,12 @@
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 3, or (at your option)
    any later version.
-   
+
    GCC is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
-   
+
    You should have received a copy of the GNU General Public License
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
@@ -77,6 +77,10 @@ avr_log_t avr_log;
 /* The worker function implementing the %-codes */
 static void avr_log_vadump (FILE*, const char*, va_list);
 
+/* Forward to fprintf for convenience.  Return the number of consumed format
+   chars after a %-code, or 0 if unrecognized and nothing consumed.  */
+static int avr_forward_to_printf (FILE*, const char*, va_list);
+
 /* Wrapper for avr_log_vadump.  If STREAM is NULL we are called by avr_dump,
    i.e. output to dump_file if available.  The 2nd argument is __FUNCTION__.
    The 3rd argument is the format string. */
@@ -157,14 +161,6 @@ avr_log_vadump (FILE *file, const char *caller, va_list ap)
 	      }
 	      break;
 
-	    case 'd':
-	      fprintf (file, "%d", va_arg (ap, int));
-	      break;
-
-	    case 'x':
-	      fprintf (file, "%x", va_arg (ap, int));
-	      break;
-
 	    case 'b':
 	      fprintf (file, "%s", va_arg (ap, int) ? "true" : "false");
 	      break;
@@ -258,11 +254,18 @@ avr_log_vadump (FILE *file, const char *caller, va_list ap)
 	      abort();
 
 	    default:
-	      /* Unknown %-code: Stop printing */
-
-	      fprintf (file, "??? %%%c ???%s\n", *(fmt-1), fmt);
-	      fmt = "";
-
+	      int n_used = avr_forward_to_printf (file, fmt - 1, ap);
+	      if (n_used > 0)
+		{
+		  // "-1" due to "*fmt++" above.
+		  fmt += n_used - 1;
+		}
+	      else
+		{
+		  // Unknown %-code: Stop printing.
+		  fprintf (file, "??? %%%c ???%s\n", *(fmt-1), fmt);
+		  fmt = "";
+		}
 	      break;
 	    }
 	  break; /* % */
@@ -273,6 +276,63 @@ avr_log_vadump (FILE *file, const char *caller, va_list ap)
 }
 
 
+#define IS_INTC(c) (c == 'd' || c == 'x' || c == 'X')
+
+// Consume FMTs like:  %x  %4x  %04x  %*x  %0*x
+// and similar for 'd' or 'ld' or 'X' instead of 'x'.
+
+static int
+avr_forward_to_printf (FILE *file, const char* const fmt, va_list ap)
+{
+  const char *p = fmt;
+  bool len_p = false;
+
+  // Optional fill
+  p += p[0] == '0' || p[0] == ' ';
+
+  // optional length
+  if (p[0] >= '1' && p[0] <= '9')
+    ++p;
+  else if (p[0] == '*')
+    ++p, len_p = true;
+
+  // Type
+  const bool long_p = p[0] == 'l';
+
+  if (IS_INTC (p[0])
+      || (long_p && IS_INTC (p[1])))
+    {
+      p += 1 + long_p;
+
+      const int n_used = (int) (p - fmt);
+
+      char xfm[10] = { '%' };
+      memcpy (xfm + 1, fmt, n_used);
+      xfm[1 + n_used] = '\0';
+
+      if (len_p)
+	{
+	  const int len = va_arg (ap, int);
+	  if (long_p)
+	    fprintf (file, xfm, len, va_arg (ap, long));
+	  else
+	    fprintf (file, xfm, len, va_arg (ap, int));
+	}
+      else
+	{
+	  if (long_p)
+	    fprintf (file, xfm, va_arg (ap, long));
+	  else
+	    fprintf (file, xfm, va_arg (ap, int));
+	}
+
+      return n_used;
+    }
+
+  return 0;
+}
+
+
 /* Called from avr.cc:avr_option_override().
    Parse argument of -mlog= and set respective fields in avr_log.  */
 
@@ -282,17 +342,17 @@ avr_log_set_avr_log (void)
   bool all = TARGET_ALL_DEBUG != 0;
 
   if (all)
-    avr_log_details = "all";
+    avropt_log_details = "all";
 
-  if (all || avr_log_details)
+  if (all || avropt_log_details)
     {
       /* Adding , at beginning and end of string makes searching easier.  */
 
-      char *str = (char*) alloca (3 + strlen (avr_log_details));
+      char *str = (char*) alloca (3 + strlen (avropt_log_details));
       bool info;
 
       str[0] = ',';
-      strcat (stpcpy (str+1, avr_log_details), ",");
+      strcat (stpcpy (str+1, avropt_log_details), ",");
 
       all |= strstr (str, ",all,") != NULL;
       info = strstr (str, ",?,") != NULL;
diff --git a/gcc/config/avr/avr-mcus.def b/gcc/config/avr/avr-mcus.def
index 068875a..2e7c8ac 100644
--- a/gcc/config/avr/avr-mcus.def
+++ b/gcc/config/avr/avr-mcus.def
@@ -1,5 +1,5 @@
-/* AVR MCUs.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+/* Information on supported AVR 8-bit microcontrollers.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -49,7 +49,7 @@
 	ARCH	Specifies the multilib variant together with AVR_SHORT_SP
 
 	ATTR	Specifies the device specific features
-		- additional ISA, short SP, errata skip etc.,
+		- additional ISA, short SP, errata skip etc., see avr-arch.h
 
 	MACRO	If NULL, this is a core and not a device.  If non-NULL,
 		supply respective built-in macro.
@@ -309,104 +309,117 @@ AVR_MCU ("atxmega16c4",      ARCH_AVRXMEGA2, AVR_ISA_RMW,  "__AVR_ATxmega16C4__"
 AVR_MCU ("atxmega32a4u",     ARCH_AVRXMEGA2, AVR_ISA_RMW,  "__AVR_ATxmega32A4U__", 0x2000, 0x0, 0x9000, 0)
 AVR_MCU ("atxmega32c4",      ARCH_AVRXMEGA2, AVR_ISA_RMW,  "__AVR_ATxmega32C4__",  0x2000, 0x0, 0x9000, 0)
 AVR_MCU ("atxmega32e5",      ARCH_AVRXMEGA2, AVR_ISA_NONE, "__AVR_ATxmega32E5__",  0x2000, 0x0, 0x9000, 0)
-AVR_MCU ("avr64da28",        ARCH_AVRXMEGA2, AVR_ISA_FLMAP, "__AVR_AVR64DA28__",   0x6000, 0x0, 0x10000, 0)
-AVR_MCU ("avr64da32",        ARCH_AVRXMEGA2, AVR_ISA_FLMAP, "__AVR_AVR64DA32__",   0x6000, 0x0, 0x10000, 0)
-AVR_MCU ("avr64da48",        ARCH_AVRXMEGA2, AVR_ISA_FLMAP, "__AVR_AVR64DA48__",   0x6000, 0x0, 0x10000, 0)
-AVR_MCU ("avr64da64",        ARCH_AVRXMEGA2, AVR_ISA_FLMAP, "__AVR_AVR64DA64__",   0x6000, 0x0, 0x10000, 0)
-AVR_MCU ("avr64db28",        ARCH_AVRXMEGA2, AVR_ISA_FLMAP, "__AVR_AVR64DB28__",   0x6000, 0x0, 0x10000, 0)
-AVR_MCU ("avr64db32",        ARCH_AVRXMEGA2, AVR_ISA_FLMAP, "__AVR_AVR64DB32__",   0x6000, 0x0, 0x10000, 0)
-AVR_MCU ("avr64db48",        ARCH_AVRXMEGA2, AVR_ISA_FLMAP, "__AVR_AVR64DB48__",   0x6000, 0x0, 0x10000, 0)
-AVR_MCU ("avr64db64",        ARCH_AVRXMEGA2, AVR_ISA_FLMAP, "__AVR_AVR64DB64__",   0x6000, 0x0, 0x10000, 0)
-AVR_MCU ("avr64dd14",        ARCH_AVRXMEGA2, AVR_ISA_FLMAP, "__AVR_AVR64DD14__",   0x6000, 0x0, 0x10000, 0)
-AVR_MCU ("avr64dd20",        ARCH_AVRXMEGA2, AVR_ISA_FLMAP, "__AVR_AVR64DD20__",   0x6000, 0x0, 0x10000, 0)
-AVR_MCU ("avr64dd28",        ARCH_AVRXMEGA2, AVR_ISA_FLMAP, "__AVR_AVR64DD28__",   0x6000, 0x0, 0x10000, 0)
-AVR_MCU ("avr64dd32",        ARCH_AVRXMEGA2, AVR_ISA_FLMAP, "__AVR_AVR64DD32__",   0x6000, 0x0, 0x10000, 0)
-AVR_MCU ("avr64du28",        ARCH_AVRXMEGA2, AVR_ISA_FLMAP, "__AVR_AVR64DU28__",   0x6000, 0x0, 0x10000, 0)
-AVR_MCU ("avr64du32",        ARCH_AVRXMEGA2, AVR_ISA_FLMAP, "__AVR_AVR64DU32__",   0x6000, 0x0, 0x10000, 0)
-AVR_MCU ("avr64ea28",        ARCH_AVRXMEGA2, AVR_ISA_FLMAP, "__AVR_AVR64EA28__",   0x6800, 0x0, 0x10000, 0)
-AVR_MCU ("avr64ea32",        ARCH_AVRXMEGA2, AVR_ISA_FLMAP, "__AVR_AVR64EA32__",   0x6800, 0x0, 0x10000, 0)
-AVR_MCU ("avr64ea48",        ARCH_AVRXMEGA2, AVR_ISA_FLMAP, "__AVR_AVR64EA48__",   0x6800, 0x0, 0x10000, 0)
+AVR_MCU ("avr64da28",        ARCH_AVRXMEGA2, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR64DA28__",   0x6000, 0x0, 0x10000, 0)
+AVR_MCU ("avr64da32",        ARCH_AVRXMEGA2, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR64DA32__",   0x6000, 0x0, 0x10000, 0)
+AVR_MCU ("avr64da48",        ARCH_AVRXMEGA2, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR64DA48__",   0x6000, 0x0, 0x10000, 0)
+AVR_MCU ("avr64da64",        ARCH_AVRXMEGA2, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR64DA64__",   0x6000, 0x0, 0x10000, 0)
+AVR_MCU ("avr64da28s",       ARCH_AVRXMEGA2, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR64DA28S__",  0x6000, 0x0, 0x10000, 0)
+AVR_MCU ("avr64da32s",       ARCH_AVRXMEGA2, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR64DA32S__",  0x6000, 0x0, 0x10000, 0)
+AVR_MCU ("avr64da48s",       ARCH_AVRXMEGA2, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR64DA48S__",  0x6000, 0x0, 0x10000, 0)
+AVR_MCU ("avr64da64s",       ARCH_AVRXMEGA2, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR64DA64S__",  0x6000, 0x0, 0x10000, 0)
+AVR_MCU ("avr64db28",        ARCH_AVRXMEGA2, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR64DB28__",   0x6000, 0x0, 0x10000, 0)
+AVR_MCU ("avr64db32",        ARCH_AVRXMEGA2, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR64DB32__",   0x6000, 0x0, 0x10000, 0)
+AVR_MCU ("avr64db48",        ARCH_AVRXMEGA2, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR64DB48__",   0x6000, 0x0, 0x10000, 0)
+AVR_MCU ("avr64db64",        ARCH_AVRXMEGA2, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR64DB64__",   0x6000, 0x0, 0x10000, 0)
+AVR_MCU ("avr64dd14",        ARCH_AVRXMEGA2, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR64DD14__",   0x6000, 0x0, 0x10000, 0)
+AVR_MCU ("avr64dd20",        ARCH_AVRXMEGA2, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR64DD20__",   0x6000, 0x0, 0x10000, 0)
+AVR_MCU ("avr64dd28",        ARCH_AVRXMEGA2, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR64DD28__",   0x6000, 0x0, 0x10000, 0)
+AVR_MCU ("avr64dd32",        ARCH_AVRXMEGA2, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR64DD32__",   0x6000, 0x0, 0x10000, 0)
+AVR_MCU ("avr64du28",        ARCH_AVRXMEGA2, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR64DU28__",   0x6000, 0x0, 0x10000, 0)
+AVR_MCU ("avr64du32",        ARCH_AVRXMEGA2, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR64DU32__",   0x6000, 0x0, 0x10000, 0)
+AVR_MCU ("avr64ea28",        ARCH_AVRXMEGA2, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR64EA28__",   0x6800, 0x0, 0x10000, 0)
+AVR_MCU ("avr64ea32",        ARCH_AVRXMEGA2, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR64EA32__",   0x6800, 0x0, 0x10000, 0)
+AVR_MCU ("avr64ea48",        ARCH_AVRXMEGA2, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR64EA48__",   0x6800, 0x0, 0x10000, 0)
+AVR_MCU ("avr64sd28",        ARCH_AVRXMEGA2, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR64SD28__",   0x6000, 0x0, 0x10000, 0)
+AVR_MCU ("avr64sd32",        ARCH_AVRXMEGA2, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR64SD32__",   0x6000, 0x0, 0x10000, 0)
+AVR_MCU ("avr64sd48",        ARCH_AVRXMEGA2, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR64SD48__",   0x6000, 0x0, 0x10000, 0)
 /* Xmega, Flash + RAM < 64K, flash visible in RAM address space */
-AVR_MCU ("avrxmega3",        ARCH_AVRXMEGA3, AVR_ISA_NONE,  NULL,                  0x3f00, 0x0, 0x8000, 0)
-AVR_MCU ("attiny202",        ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny202__",   0x3f80, 0x0, 0x800,  0x8000)
-AVR_MCU ("attiny204",        ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny204__",   0x3f80, 0x0, 0x800,  0x8000)
-AVR_MCU ("attiny402",        ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny402__",   0x3f00, 0x0, 0x1000, 0x8000)
-AVR_MCU ("attiny404",        ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny404__",   0x3f00, 0x0, 0x1000, 0x8000)
-AVR_MCU ("attiny406",        ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny406__",   0x3f00, 0x0, 0x1000, 0x8000)
-AVR_MCU ("attiny804",        ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny804__",   0x3e00, 0x0, 0x2000, 0x8000)
-AVR_MCU ("attiny806",        ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny806__",   0x3e00, 0x0, 0x2000, 0x8000)
-AVR_MCU ("attiny807",        ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny807__",   0x3e00, 0x0, 0x2000, 0x8000)
-AVR_MCU ("attiny1604",       ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_ATtiny1604__",  0x3c00, 0x0, 0x4000, 0x8000)
-AVR_MCU ("attiny1606",       ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_ATtiny1606__",  0x3c00, 0x0, 0x4000, 0x8000)
-AVR_MCU ("attiny1607",       ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_ATtiny1607__",  0x3c00, 0x0, 0x4000, 0x8000)
-AVR_MCU ("attiny212",        ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny212__",   0x3f80, 0x0, 0x800,  0x8000)
-AVR_MCU ("attiny214",        ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny214__",   0x3f80, 0x0, 0x800,  0x8000)
-AVR_MCU ("attiny412",        ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny412__",   0x3f00, 0x0, 0x1000, 0x8000)
-AVR_MCU ("attiny414",        ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny414__",   0x3f00, 0x0, 0x1000, 0x8000)
-AVR_MCU ("attiny416",        ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny416__",   0x3f00, 0x0, 0x1000, 0x8000)
-AVR_MCU ("attiny416auto",    ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny416AUTO__", 0x3f00, 0x0, 0x1000, 0x8000)
-AVR_MCU ("attiny417",        ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny417__",   0x3f00, 0x0, 0x1000, 0x8000)
-AVR_MCU ("attiny814",        ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny814__",   0x3e00, 0x0, 0x2000, 0x8000)
-AVR_MCU ("attiny816",        ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny816__",   0x3e00, 0x0, 0x2000, 0x8000)
-AVR_MCU ("attiny817",        ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny817__",   0x3e00, 0x0, 0x2000, 0x8000)
-AVR_MCU ("attiny1614",       ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_ATtiny1614__",  0x3800, 0x0, 0x4000, 0x8000)
-AVR_MCU ("attiny1616",       ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_ATtiny1616__",  0x3800, 0x0, 0x4000, 0x8000)
-AVR_MCU ("attiny1617",       ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_ATtiny1617__",  0x3800, 0x0, 0x4000, 0x8000)
-AVR_MCU ("attiny3214",       ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_ATtiny3214__",  0x3800, 0x0, 0x8000, 0x8000)
-AVR_MCU ("attiny3216",       ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_ATtiny3216__",  0x3800, 0x0, 0x8000, 0x8000)
-AVR_MCU ("attiny3217",       ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_ATtiny3217__",  0x3800, 0x0, 0x8000, 0x8000)
-AVR_MCU ("attiny424",        ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny424__",   0x3e00, 0x0, 0x1000, 0x8000)
-AVR_MCU ("attiny426",        ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny426__",   0x3e00, 0x0, 0x1000, 0x8000)
-AVR_MCU ("attiny427",        ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny427__",   0x3e00, 0x0, 0x1000, 0x8000)
-AVR_MCU ("attiny824",        ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny824__",   0x3c00, 0x0, 0x2000, 0x8000)
-AVR_MCU ("attiny826",        ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny826__",   0x3c00, 0x0, 0x2000, 0x8000)
-AVR_MCU ("attiny827",        ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATtiny827__",   0x3c00, 0x0, 0x2000, 0x8000)
-AVR_MCU ("attiny1624",       ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_ATtiny1624__",  0x3800, 0x0, 0x4000, 0x8000)
-AVR_MCU ("attiny1626",       ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_ATtiny1626__",  0x3800, 0x0, 0x4000, 0x8000)
-AVR_MCU ("attiny1627",       ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_ATtiny1627__",  0x3800, 0x0, 0x4000, 0x8000)
-AVR_MCU ("attiny3224",       ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_ATtiny3224__",  0x3400, 0x0, 0x8000, 0x8000)
-AVR_MCU ("attiny3226",       ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_ATtiny3226__",  0x3400, 0x0, 0x8000, 0x8000)
-AVR_MCU ("attiny3227",       ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_ATtiny3227__",  0x3400, 0x0, 0x8000, 0x8000)
-AVR_MCU ("atmega808",        ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATmega808__",   0x3c00, 0x0, 0x2000, 0x4000)
-AVR_MCU ("atmega809",        ARCH_AVRXMEGA3, AVR_ISA_RCALL, "__AVR_ATmega809__",   0x3c00, 0x0, 0x2000, 0x4000)
-AVR_MCU ("atmega1608",       ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_ATmega1608__",  0x3800, 0x0, 0x4000, 0x4000)
-AVR_MCU ("atmega1609",       ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_ATmega1609__",  0x3800, 0x0, 0x4000, 0x4000)
-AVR_MCU ("atmega3208",       ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_ATmega3208__",  0x3000, 0x0, 0x8000, 0x4000)
-AVR_MCU ("atmega3209",       ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_ATmega3209__",  0x3000, 0x0, 0x8000, 0x4000)
-AVR_MCU ("atmega4808",       ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_ATmega4808__",  0x2800, 0x0, 0xc000, 0x4000)
-AVR_MCU ("atmega4809",       ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_ATmega4809__",  0x2800, 0x0, 0xc000, 0x4000)
-AVR_MCU ("avr16dd14",        ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_AVR16DD14__",   0x7800, 0x0, 0x4000, 0x8000)
-AVR_MCU ("avr16dd20",        ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_AVR16DD20__",   0x7800, 0x0, 0x4000, 0x8000)
-AVR_MCU ("avr16dd28",        ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_AVR16DD28__",   0x7800, 0x0, 0x4000, 0x8000)
-AVR_MCU ("avr16dd32",        ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_AVR16DD32__",   0x7800, 0x0, 0x4000, 0x8000)
-AVR_MCU ("avr16du14",        ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_AVR16DU14__",   0x7800, 0x0, 0x4000, 0x8000)
-AVR_MCU ("avr16du20",        ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_AVR16DU20__",   0x7800, 0x0, 0x4000, 0x8000)
-AVR_MCU ("avr16du28",        ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_AVR16DU28__",   0x7800, 0x0, 0x4000, 0x8000)
-AVR_MCU ("avr16du32",        ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_AVR16DU32__",   0x7800, 0x0, 0x4000, 0x8000)
-AVR_MCU ("avr32da28",        ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_AVR32DA28__",   0x7000, 0x0, 0x8000, 0x8000)
-AVR_MCU ("avr32da32",        ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_AVR32DA32__",   0x7000, 0x0, 0x8000, 0x8000)
-AVR_MCU ("avr32da48",        ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_AVR32DA48__",   0x7000, 0x0, 0x8000, 0x8000)
-AVR_MCU ("avr32db28",        ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_AVR32DB28__",   0x7000, 0x0, 0x8000, 0x8000)
-AVR_MCU ("avr32db32",        ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_AVR32DB32__",   0x7000, 0x0, 0x8000, 0x8000)
-AVR_MCU ("avr32db48",        ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_AVR32DB48__",   0x7000, 0x0, 0x8000, 0x8000)
-AVR_MCU ("avr32dd14",        ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_AVR32DD14__",   0x7000, 0x0, 0x8000, 0x8000)
-AVR_MCU ("avr32dd20",        ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_AVR32DD20__",   0x7000, 0x0, 0x8000, 0x8000)
-AVR_MCU ("avr32dd28",        ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_AVR32DD28__",   0x7000, 0x0, 0x8000, 0x8000)
-AVR_MCU ("avr32dd32",        ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_AVR32DD32__",   0x7000, 0x0, 0x8000, 0x8000)
-AVR_MCU ("avr32du14",        ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_AVR32DU14__",   0x7000, 0x0, 0x8000, 0x8000)
-AVR_MCU ("avr32du20",        ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_AVR32DU20__",   0x7000, 0x0, 0x8000, 0x8000)
-AVR_MCU ("avr32du28",        ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_AVR32DU28__",   0x7000, 0x0, 0x8000, 0x8000)
-AVR_MCU ("avr32du32",        ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_AVR32DU32__",   0x7000, 0x0, 0x8000, 0x8000)
-AVR_MCU ("avr16eb14",        ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_AVR16EB14__",   0x7800, 0x0, 0x4000, 0x8000)
-AVR_MCU ("avr16eb20",        ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_AVR16EB20__",   0x7800, 0x0, 0x4000, 0x8000)
-AVR_MCU ("avr16eb28",        ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_AVR16EB28__",   0x7800, 0x0, 0x4000, 0x8000)
-AVR_MCU ("avr16eb32",        ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_AVR16EB32__",   0x7800, 0x0, 0x4000, 0x8000)
-AVR_MCU ("avr16ea28",        ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_AVR16EA28__",   0x7800, 0x0, 0x4000, 0x8000)
-AVR_MCU ("avr16ea32",        ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_AVR16EA32__",   0x7800, 0x0, 0x4000, 0x8000)
-AVR_MCU ("avr16ea48",        ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_AVR16EA48__",   0x7800, 0x0, 0x4000, 0x8000)
-AVR_MCU ("avr32ea28",        ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_AVR32EA28__",   0x7000, 0x0, 0x8000, 0x8000)
-AVR_MCU ("avr32ea32",        ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_AVR32EA32__",   0x7000, 0x0, 0x8000, 0x8000)
-AVR_MCU ("avr32ea48",        ARCH_AVRXMEGA3, AVR_ISA_NONE,  "__AVR_AVR32EA48__",   0x7000, 0x0, 0x8000, 0x8000)
+AVR_MCU ("avrxmega3",        ARCH_AVRXMEGA3, AVR_ISA_NONE,            NULL,                  0x3f00, 0x0, 0x8000, 0)
+AVR_MCU ("attiny202",        ARCH_AVRXMEGA3, AVR_CVT | AVR_ISA_RCALL, "__AVR_ATtiny202__",   0x3f80, 0x0, 0x800,  0x8000)
+AVR_MCU ("attiny204",        ARCH_AVRXMEGA3, AVR_CVT | AVR_ISA_RCALL, "__AVR_ATtiny204__",   0x3f80, 0x0, 0x800,  0x8000)
+AVR_MCU ("attiny402",        ARCH_AVRXMEGA3, AVR_CVT | AVR_ISA_RCALL, "__AVR_ATtiny402__",   0x3f00, 0x0, 0x1000, 0x8000)
+AVR_MCU ("attiny404",        ARCH_AVRXMEGA3, AVR_CVT | AVR_ISA_RCALL, "__AVR_ATtiny404__",   0x3f00, 0x0, 0x1000, 0x8000)
+AVR_MCU ("attiny406",        ARCH_AVRXMEGA3, AVR_CVT | AVR_ISA_RCALL, "__AVR_ATtiny406__",   0x3f00, 0x0, 0x1000, 0x8000)
+AVR_MCU ("attiny804",        ARCH_AVRXMEGA3, AVR_CVT | AVR_ISA_RCALL, "__AVR_ATtiny804__",   0x3e00, 0x0, 0x2000, 0x8000)
+AVR_MCU ("attiny806",        ARCH_AVRXMEGA3, AVR_CVT | AVR_ISA_RCALL, "__AVR_ATtiny806__",   0x3e00, 0x0, 0x2000, 0x8000)
+AVR_MCU ("attiny807",        ARCH_AVRXMEGA3, AVR_CVT | AVR_ISA_RCALL, "__AVR_ATtiny807__",   0x3e00, 0x0, 0x2000, 0x8000)
+AVR_MCU ("attiny1604",       ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_ATtiny1604__",  0x3c00, 0x0, 0x4000, 0x8000)
+AVR_MCU ("attiny1606",       ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_ATtiny1606__",  0x3c00, 0x0, 0x4000, 0x8000)
+AVR_MCU ("attiny1607",       ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_ATtiny1607__",  0x3c00, 0x0, 0x4000, 0x8000)
+AVR_MCU ("attiny212",        ARCH_AVRXMEGA3, AVR_CVT | AVR_ISA_RCALL, "__AVR_ATtiny212__",   0x3f80, 0x0, 0x800,  0x8000)
+AVR_MCU ("attiny214",        ARCH_AVRXMEGA3, AVR_CVT | AVR_ISA_RCALL, "__AVR_ATtiny214__",   0x3f80, 0x0, 0x800,  0x8000)
+AVR_MCU ("attiny412",        ARCH_AVRXMEGA3, AVR_CVT | AVR_ISA_RCALL, "__AVR_ATtiny412__",   0x3f00, 0x0, 0x1000, 0x8000)
+AVR_MCU ("attiny414",        ARCH_AVRXMEGA3, AVR_CVT | AVR_ISA_RCALL, "__AVR_ATtiny414__",   0x3f00, 0x0, 0x1000, 0x8000)
+AVR_MCU ("attiny416",        ARCH_AVRXMEGA3, AVR_CVT | AVR_ISA_RCALL, "__AVR_ATtiny416__",   0x3f00, 0x0, 0x1000, 0x8000)
+AVR_MCU ("attiny416auto",    ARCH_AVRXMEGA3, AVR_CVT | AVR_ISA_RCALL, "__AVR_ATtiny416AUTO__", 0x3f00, 0x0, 0x1000, 0x8000)
+AVR_MCU ("attiny417",        ARCH_AVRXMEGA3, AVR_CVT | AVR_ISA_RCALL, "__AVR_ATtiny417__",   0x3f00, 0x0, 0x1000, 0x8000)
+AVR_MCU ("attiny814",        ARCH_AVRXMEGA3, AVR_CVT | AVR_ISA_RCALL, "__AVR_ATtiny814__",   0x3e00, 0x0, 0x2000, 0x8000)
+AVR_MCU ("attiny816",        ARCH_AVRXMEGA3, AVR_CVT | AVR_ISA_RCALL, "__AVR_ATtiny816__",   0x3e00, 0x0, 0x2000, 0x8000)
+AVR_MCU ("attiny817",        ARCH_AVRXMEGA3, AVR_CVT | AVR_ISA_RCALL, "__AVR_ATtiny817__",   0x3e00, 0x0, 0x2000, 0x8000)
+AVR_MCU ("attiny1614",       ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_ATtiny1614__",  0x3800, 0x0, 0x4000, 0x8000)
+AVR_MCU ("attiny1616",       ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_ATtiny1616__",  0x3800, 0x0, 0x4000, 0x8000)
+AVR_MCU ("attiny1617",       ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_ATtiny1617__",  0x3800, 0x0, 0x4000, 0x8000)
+AVR_MCU ("attiny3214",       ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_ATtiny3214__",  0x3800, 0x0, 0x8000, 0x8000)
+AVR_MCU ("attiny3216",       ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_ATtiny3216__",  0x3800, 0x0, 0x8000, 0x8000)
+AVR_MCU ("attiny3217",       ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_ATtiny3217__",  0x3800, 0x0, 0x8000, 0x8000)
+AVR_MCU ("attiny424",        ARCH_AVRXMEGA3, AVR_CVT | AVR_ISA_RCALL, "__AVR_ATtiny424__",   0x3e00, 0x0, 0x1000, 0x8000)
+AVR_MCU ("attiny426",        ARCH_AVRXMEGA3, AVR_CVT | AVR_ISA_RCALL, "__AVR_ATtiny426__",   0x3e00, 0x0, 0x1000, 0x8000)
+AVR_MCU ("attiny427",        ARCH_AVRXMEGA3, AVR_CVT | AVR_ISA_RCALL, "__AVR_ATtiny427__",   0x3e00, 0x0, 0x1000, 0x8000)
+AVR_MCU ("attiny824",        ARCH_AVRXMEGA3, AVR_CVT | AVR_ISA_RCALL, "__AVR_ATtiny824__",   0x3c00, 0x0, 0x2000, 0x8000)
+AVR_MCU ("attiny826",        ARCH_AVRXMEGA3, AVR_CVT | AVR_ISA_RCALL, "__AVR_ATtiny826__",   0x3c00, 0x0, 0x2000, 0x8000)
+AVR_MCU ("attiny827",        ARCH_AVRXMEGA3, AVR_CVT | AVR_ISA_RCALL, "__AVR_ATtiny827__",   0x3c00, 0x0, 0x2000, 0x8000)
+AVR_MCU ("attiny1624",       ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_ATtiny1624__",  0x3800, 0x0, 0x4000, 0x8000)
+AVR_MCU ("attiny1626",       ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_ATtiny1626__",  0x3800, 0x0, 0x4000, 0x8000)
+AVR_MCU ("attiny1627",       ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_ATtiny1627__",  0x3800, 0x0, 0x4000, 0x8000)
+AVR_MCU ("attiny3224",       ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_ATtiny3224__",  0x3400, 0x0, 0x8000, 0x8000)
+AVR_MCU ("attiny3226",       ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_ATtiny3226__",  0x3400, 0x0, 0x8000, 0x8000)
+AVR_MCU ("attiny3227",       ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_ATtiny3227__",  0x3400, 0x0, 0x8000, 0x8000)
+AVR_MCU ("atmega808",        ARCH_AVRXMEGA3, AVR_CVT | AVR_ISA_RCALL, "__AVR_ATmega808__",   0x3c00, 0x0, 0x2000, 0x4000)
+AVR_MCU ("atmega809",        ARCH_AVRXMEGA3, AVR_CVT | AVR_ISA_RCALL, "__AVR_ATmega809__",   0x3c00, 0x0, 0x2000, 0x4000)
+AVR_MCU ("atmega1608",       ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_ATmega1608__",  0x3800, 0x0, 0x4000, 0x4000)
+AVR_MCU ("atmega1609",       ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_ATmega1609__",  0x3800, 0x0, 0x4000, 0x4000)
+AVR_MCU ("atmega3208",       ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_ATmega3208__",  0x3000, 0x0, 0x8000, 0x4000)
+AVR_MCU ("atmega3209",       ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_ATmega3209__",  0x3000, 0x0, 0x8000, 0x4000)
+AVR_MCU ("atmega4808",       ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_ATmega4808__",  0x2800, 0x0, 0xc000, 0x4000)
+AVR_MCU ("atmega4809",       ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_ATmega4809__",  0x2800, 0x0, 0xc000, 0x4000)
+AVR_MCU ("avr16dd14",        ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_AVR16DD14__",   0x7800, 0x0, 0x4000, 0x8000)
+AVR_MCU ("avr16dd20",        ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_AVR16DD20__",   0x7800, 0x0, 0x4000, 0x8000)
+AVR_MCU ("avr16dd28",        ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_AVR16DD28__",   0x7800, 0x0, 0x4000, 0x8000)
+AVR_MCU ("avr16dd32",        ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_AVR16DD32__",   0x7800, 0x0, 0x4000, 0x8000)
+AVR_MCU ("avr16du14",        ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_AVR16DU14__",   0x7800, 0x0, 0x4000, 0x8000)
+AVR_MCU ("avr16du20",        ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_AVR16DU20__",   0x7800, 0x0, 0x4000, 0x8000)
+AVR_MCU ("avr16du28",        ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_AVR16DU28__",   0x7800, 0x0, 0x4000, 0x8000)
+AVR_MCU ("avr16du32",        ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_AVR16DU32__",   0x7800, 0x0, 0x4000, 0x8000)
+AVR_MCU ("avr32da28",        ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_AVR32DA28__",   0x7000, 0x0, 0x8000, 0x8000)
+AVR_MCU ("avr32da32",        ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_AVR32DA32__",   0x7000, 0x0, 0x8000, 0x8000)
+AVR_MCU ("avr32da48",        ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_AVR32DA48__",   0x7000, 0x0, 0x8000, 0x8000)
+AVR_MCU ("avr32da28s",       ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_AVR32DA28S__",  0x7000, 0x0, 0x8000, 0x8000)
+AVR_MCU ("avr32da32s",       ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_AVR32DA32S__",  0x7000, 0x0, 0x8000, 0x8000)
+AVR_MCU ("avr32da48s",       ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_AVR32DA48S__",  0x7000, 0x0, 0x8000, 0x8000)
+AVR_MCU ("avr32db28",        ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_AVR32DB28__",   0x7000, 0x0, 0x8000, 0x8000)
+AVR_MCU ("avr32db32",        ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_AVR32DB32__",   0x7000, 0x0, 0x8000, 0x8000)
+AVR_MCU ("avr32db48",        ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_AVR32DB48__",   0x7000, 0x0, 0x8000, 0x8000)
+AVR_MCU ("avr32dd14",        ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_AVR32DD14__",   0x7000, 0x0, 0x8000, 0x8000)
+AVR_MCU ("avr32dd20",        ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_AVR32DD20__",   0x7000, 0x0, 0x8000, 0x8000)
+AVR_MCU ("avr32dd28",        ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_AVR32DD28__",   0x7000, 0x0, 0x8000, 0x8000)
+AVR_MCU ("avr32dd32",        ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_AVR32DD32__",   0x7000, 0x0, 0x8000, 0x8000)
+AVR_MCU ("avr32du14",        ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_AVR32DU14__",   0x7000, 0x0, 0x8000, 0x8000)
+AVR_MCU ("avr32du20",        ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_AVR32DU20__",   0x7000, 0x0, 0x8000, 0x8000)
+AVR_MCU ("avr32du28",        ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_AVR32DU28__",   0x7000, 0x0, 0x8000, 0x8000)
+AVR_MCU ("avr32du32",        ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_AVR32DU32__",   0x7000, 0x0, 0x8000, 0x8000)
+AVR_MCU ("avr16eb14",        ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_AVR16EB14__",   0x7800, 0x0, 0x4000, 0x8000)
+AVR_MCU ("avr16eb20",        ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_AVR16EB20__",   0x7800, 0x0, 0x4000, 0x8000)
+AVR_MCU ("avr16eb28",        ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_AVR16EB28__",   0x7800, 0x0, 0x4000, 0x8000)
+AVR_MCU ("avr16eb32",        ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_AVR16EB32__",   0x7800, 0x0, 0x4000, 0x8000)
+AVR_MCU ("avr16ea28",        ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_AVR16EA28__",   0x7800, 0x0, 0x4000, 0x8000)
+AVR_MCU ("avr16ea32",        ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_AVR16EA32__",   0x7800, 0x0, 0x4000, 0x8000)
+AVR_MCU ("avr16ea48",        ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_AVR16EA48__",   0x7800, 0x0, 0x4000, 0x8000)
+AVR_MCU ("avr32ea28",        ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_AVR32EA28__",   0x7000, 0x0, 0x8000, 0x8000)
+AVR_MCU ("avr32ea32",        ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_AVR32EA32__",   0x7000, 0x0, 0x8000, 0x8000)
+AVR_MCU ("avr32ea48",        ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_AVR32EA48__",   0x7000, 0x0, 0x8000, 0x8000)
+AVR_MCU ("avr32sd20",        ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_AVR32SD20__",   0x7000, 0x0, 0x8000, 0x8000)
+AVR_MCU ("avr32sd28",        ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_AVR32SD28__",   0x7000, 0x0, 0x8000, 0x8000)
+AVR_MCU ("avr32sd32",        ARCH_AVRXMEGA3, AVR_CVT,                 "__AVR_AVR32SD32__",   0x7000, 0x0, 0x8000, 0x8000)
 /* Xmega, 64K < Flash <= 128K, RAM <= 64K */
 AVR_MCU ("avrxmega4",        ARCH_AVRXMEGA4, AVR_ISA_NONE, NULL,                   0x2000, 0x0, 0x11000, 0)
 AVR_MCU ("atxmega64a3",      ARCH_AVRXMEGA4, AVR_ISA_NONE, "__AVR_ATxmega64A3__",  0x2000, 0x0, 0x11000, 0)
@@ -417,14 +430,18 @@ AVR_MCU ("atxmega64b1",      ARCH_AVRXMEGA4, AVR_ISA_RMW,  "__AVR_ATxmega64B1__"
 AVR_MCU ("atxmega64b3",      ARCH_AVRXMEGA4, AVR_ISA_RMW,  "__AVR_ATxmega64B3__",  0x2000, 0x0, 0x11000, 0)
 AVR_MCU ("atxmega64c3",      ARCH_AVRXMEGA4, AVR_ISA_RMW,  "__AVR_ATxmega64C3__",  0x2000, 0x0, 0x11000, 0)
 AVR_MCU ("atxmega64d4",      ARCH_AVRXMEGA4, AVR_ISA_NONE, "__AVR_ATxmega64D4__",  0x2000, 0x0, 0x11000, 0)
-AVR_MCU ("avr128da28",       ARCH_AVRXMEGA4, AVR_ISA_FLMAP, "__AVR_AVR128DA28__",  0x4000, 0x0, 0x20000, 0)
-AVR_MCU ("avr128da32",       ARCH_AVRXMEGA4, AVR_ISA_FLMAP, "__AVR_AVR128DA32__",  0x4000, 0x0, 0x20000, 0)
-AVR_MCU ("avr128da48",       ARCH_AVRXMEGA4, AVR_ISA_FLMAP, "__AVR_AVR128DA48__",  0x4000, 0x0, 0x20000, 0)
-AVR_MCU ("avr128da64",       ARCH_AVRXMEGA4, AVR_ISA_FLMAP, "__AVR_AVR128DA64__",  0x4000, 0x0, 0x20000, 0)
-AVR_MCU ("avr128db28",       ARCH_AVRXMEGA4, AVR_ISA_FLMAP, "__AVR_AVR128DB28__",  0x4000, 0x0, 0x20000, 0)
-AVR_MCU ("avr128db32",       ARCH_AVRXMEGA4, AVR_ISA_FLMAP, "__AVR_AVR128DB32__",  0x4000, 0x0, 0x20000, 0)
-AVR_MCU ("avr128db48",       ARCH_AVRXMEGA4, AVR_ISA_FLMAP, "__AVR_AVR128DB48__",  0x4000, 0x0, 0x20000, 0)
-AVR_MCU ("avr128db64",       ARCH_AVRXMEGA4, AVR_ISA_FLMAP, "__AVR_AVR128DB64__",  0x4000, 0x0, 0x20000, 0)
+AVR_MCU ("avr128da28",       ARCH_AVRXMEGA4, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR128DA28__",  0x4000, 0x0, 0x20000, 0)
+AVR_MCU ("avr128da32",       ARCH_AVRXMEGA4, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR128DA32__",  0x4000, 0x0, 0x20000, 0)
+AVR_MCU ("avr128da48",       ARCH_AVRXMEGA4, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR128DA48__",  0x4000, 0x0, 0x20000, 0)
+AVR_MCU ("avr128da64",       ARCH_AVRXMEGA4, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR128DA64__",  0x4000, 0x0, 0x20000, 0)
+AVR_MCU ("avr128da28s",      ARCH_AVRXMEGA4, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR128DA28S__", 0x4000, 0x0, 0x20000, 0)
+AVR_MCU ("avr128da32s",      ARCH_AVRXMEGA4, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR128DA32S__", 0x4000, 0x0, 0x20000, 0)
+AVR_MCU ("avr128da48s",      ARCH_AVRXMEGA4, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR128DA48S__", 0x4000, 0x0, 0x20000, 0)
+AVR_MCU ("avr128da64s",      ARCH_AVRXMEGA4, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR128DA64S__", 0x4000, 0x0, 0x20000, 0)
+AVR_MCU ("avr128db28",       ARCH_AVRXMEGA4, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR128DB28__",  0x4000, 0x0, 0x20000, 0)
+AVR_MCU ("avr128db32",       ARCH_AVRXMEGA4, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR128DB32__",  0x4000, 0x0, 0x20000, 0)
+AVR_MCU ("avr128db48",       ARCH_AVRXMEGA4, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR128DB48__",  0x4000, 0x0, 0x20000, 0)
+AVR_MCU ("avr128db64",       ARCH_AVRXMEGA4, AVR_CVT | AVR_ISA_FLMAP, "__AVR_AVR128DB64__",  0x4000, 0x0, 0x20000, 0)
 /* Xmega, 64K < Flash <= 128K, RAM > 64K */
 AVR_MCU ("avrxmega5",        ARCH_AVRXMEGA5, AVR_ISA_NONE, NULL,                   0x2000, 0x0, 0x11000, 0)
 AVR_MCU ("atxmega64a1",      ARCH_AVRXMEGA5, AVR_ISA_NONE, "__AVR_ATxmega64A1__",  0x2000, 0x0, 0x11000, 0)
diff --git a/gcc/config/avr/avr-modes.def b/gcc/config/avr/avr-modes.def
index e0633d6..95d3814 100644
--- a/gcc/config/avr/avr-modes.def
+++ b/gcc/config/avr/avr-modes.def
@@ -1,4 +1,5 @@
-/* Copyright (C) 2012-2024 Free Software Foundation, Inc.
+/* Extra machine modes for AVR 8-bit microcontrollers.
+   Copyright (C) 2012-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -17,6 +18,13 @@
    <http://www.gnu.org/licenses/>.  */
 
 FRACTIONAL_INT_MODE (PSI, 24, 3);
+INT_N (PSI, 24);
+
+/* Used when the N (and Z) flag(s) of SREG are set.
+   The N flag indicates  whether the value is negative.
+   The Z flag indicates  whether the value is zero.  */
+CC_MODE (CCN);
+CC_MODE (CCZN);
 
 /* Make TA and UTA 64 bits wide.
    128 bit wide modes would be insane on a 8-bit machine.
diff --git a/gcc/config/avr/avr-passes-fuse-move.h b/gcc/config/avr/avr-passes-fuse-move.h
new file mode 100644
index 0000000..734d537
--- /dev/null
+++ b/gcc/config/avr/avr-passes-fuse-move.h
@@ -0,0 +1,1187 @@
+/* Support for avr-passes.cc for AVR 8-bit microcontrollers.
+   Copyright (C) 2024-2025 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+
+/* FIXME: The documentation in hard-reg-set.h is wrong in that it states
+   that HARD_REG_SET is a scalar iff HARD_REG_SET is a macro.
+   This is not the case:  HARD_REG_SET is a typedef no matter what.
+   So in order to get the lower 32 bits (and maybe more) as a scalar
+   we have to invoke type traits as we can't #ifdef HARD_REG_SET  */
+template<typename T, typename ELT, bool = std::is_same<T, ELT>::value>
+struct elt0_getter;
+
+// All hard regs fit in one HARD_REG_ELT_TYPE: T === ELT.
+template<typename T, typename ELT>
+struct elt0_getter<T, ELT, true>
+{
+  static inline const ELT &get (const T &t)
+  {
+    return t;
+  }
+};
+
+// HARD_REG_SET is not a scalar but a composite with HARD_REG_ELT_TYPE elts[].
+template<typename T, typename ELT>
+struct elt0_getter<T, ELT, false>
+{
+  static inline const ELT &get (const T &t)
+  {
+    return t.elts[0];
+  }
+};
+
+
+// To track known values held in General Purpose Registers R2 ... R31.
+
+struct memento_t
+{
+  // One bit for each GPR.
+  gprmask_t known = 0;
+
+  std::array<uint8_t, REG_32> values;
+
+  static gprmask_t fixed_regs_mask;
+
+  void apply (const ply_t &);
+
+  void apply_insn (rtx_insn *insn, bool unused)
+  {
+    apply_insn1 (insn, unused);
+    known &= ~memento_t::fixed_regs_mask;
+  }
+
+private:
+  void apply_insn1 (rtx_insn *, bool);
+
+public:
+  bool knows (int rno, int n = 1) const
+  {
+    gcc_checking_assert (gpr_regno_p (rno, n));
+    const gprmask_t mask = regmask (rno, n);
+    return (known & mask) == mask;
+  }
+
+  uint8_t operator[] (int rno) const
+  {
+    gcc_checking_assert (gpr_regno_p (rno));
+    return values[rno];
+  }
+
+  // Set the 8-bit register number DEST as known to hold value VAL.
+  void set_value (int dest, int val)
+  {
+    gcc_checking_assert (gpr_regno_p (dest, 1));
+    values[dest] = (uint8_t) val;
+    set_known (dest);
+  }
+
+  void copy_value (int dest, int src)
+  {
+    gcc_checking_assert (gpr_regno_p (dest, 1));
+    gcc_checking_assert (gpr_regno_p (src, 1));
+    values[dest] = values[src];
+    set_known (dest, knows (src));
+  }
+
+  void copy_values (int dest, int src, int n_bytes)
+  {
+    gcc_checking_assert (gpr_regno_p (dest, n_bytes));
+    gcc_checking_assert (gpr_regno_p (src, n_bytes));
+    if (dest < src)
+      for (int n = 0; n < n_bytes; ++n)
+	copy_value (n + dest, n + src);
+    else if (dest > src)
+      for (int n = n_bytes - 1; n >= 0; --n)
+	copy_value (n + dest, n + src);
+  }
+
+  // Get the value as a CONST_INT or NULL_RTX when any byte is unknown.
+  rtx get_value_as_const_int (int regno, int n_bytes) const
+  {
+    gcc_checking_assert (gpr_regno_p (regno, n_bytes));
+
+    if (! knows (regno, n_bytes))
+      return NULL_RTX;
+
+    const machine_mode mode = size_to_mode (n_bytes);
+    uint64_t val = 0;
+
+    for (int i = n_bytes - 1; i >= 0; --i)
+      val = 256 * val + values[regno + i];
+
+    return gen_int_mode (val, mode);
+  }
+
+  // Copy the known state and the value (provided it is known) from
+  // register SRC to register DEST.
+  void copy_values (rtx dest, rtx src)
+  {
+    if (REG_P (dest) && REG_P (src)
+	&& GET_MODE_SIZE (GET_MODE (src)) <= GET_MODE_SIZE (GET_MODE (dest)))
+      {
+	int n_bytes = std::min (GET_MODE_SIZE (GET_MODE (src)),
+				GET_MODE_SIZE (GET_MODE (dest)));
+	copy_values (REGNO (dest), REGNO (src), n_bytes);
+      }
+  }
+
+  void set_values (rtx dest, rtx src)
+  {
+    gcc_assert (REG_P (dest) && CONST_INT_P (src));
+    int regno = REGNO (dest);
+    for (int i = 0; i < GET_MODE_SIZE (GET_MODE (dest)); ++i)
+      set_value (regno + i, avr_uint8 (src, i));
+  }
+
+  // Value >= 0 of the i-th reg or -1 if unknown.
+  int value (int i) const
+  {
+    gcc_checking_assert (gpr_regno_p (i));
+    return knows (i) ? (int) values[i] : -1;
+  }
+
+  // Value >= 0 of the rno-th reg[n] or -1 if unknown.
+  int64_t value (int rno, int n, bool strict = true) const
+  {
+    gcc_assert (n <= 4);
+    gcc_checking_assert (gpr_regno_p (rno, n));
+    if (! knows (rno, n))
+      {
+	if (! strict)
+	  return -1;
+	gcc_unreachable ();
+      }
+
+    uint64_t val = 0;
+    for (int r = rno + n - 1; r >= rno; --r)
+      val = 256 * val + values[r];
+
+    return val;
+  }
+
+  void set_known (int r, bool kno = true)
+  {
+    gcc_checking_assert (gpr_regno_p (r));
+    known = kno
+      ? known | (1u << r)
+      : known & ~(1u << r);
+  }
+
+  void set_unknown (int r)
+  {
+    gcc_checking_assert (gpr_regno_p (r));
+    set_known (r, false);
+  }
+
+  int n_known () const
+  {
+    return popcount_hwi (known);
+  }
+
+  // Hamming byte distance of R[n] to VAL.
+  int hamming (int r, int n, uint64_t val) const
+  {
+    gcc_assert (n <= 8);
+    gcc_checking_assert (gpr_regno_p (r, n));
+
+    int ham = 0;
+    for (int i = 0; i < n; ++i)
+      ham += value (r + i) != (uint8_t) (val >> (8 * i));
+
+    return ham;
+  }
+
+  // Calculate the Hamming byte distance, ignoring regs in IGNORES.
+  int distance_to (const memento_t &that, gprmask_t ignores = 0) const
+  {
+    int d = 0;
+    for (int r = FIRST_GPR; r < REG_32; ++r)
+      if (! (ignores & (1u << r)))
+	d += value (r) != that.value (r);
+    return d;
+  }
+
+  // Return true when *this and THAT are the same, with the only allowed
+  // exceptions as of mask IGNORES.
+  bool equals (const memento_t &that, gprmask_t ignores) const
+  {
+    if ((known & ~ignores) != (that.known & ~ignores))
+      return false;
+
+    for (int r = FIRST_GPR; r < REG_32; ++r)
+      if (! (ignores & (1u << r)))
+	if (value (r) != that.value (r))
+	  return false;
+
+    return true;
+  }
+
+  // Return TRUE iff the N_BYTES registers starting at REGNO are known
+  // to contain VAL.
+  bool have_value (int rno, int n_bytes, int val) const
+  {
+    gcc_assert (n_bytes <= 4);
+    for (int i = rno; i < rno + n_bytes; ++i)
+      if (value (i) != (uint8_t) val)
+	return false;
+      else
+	val >>= 8;
+
+    return true;
+  }
+
+  // The regno of a d-reg that has a known value, or 0 if none found.
+  int known_dregno (void) const
+  {
+    const gprmask_t dregs = known & 0xffff0000 & ~memento_t::fixed_regs_mask;
+    return dregs ? clz_hwi (1) - clz_hwi (dregs) : 0;
+  }
+
+  // Return a regno for a register that contains value VAL8 and that does
+  // not overlap with the registers mentioned in EXCLUDES.  Else return 0.
+  int regno_with_value (uint8_t val8, gprmask_t excludes) const
+  {
+    for (int r = REG_31; r >= FIRST_GPR; --r)
+      if (value (r) == val8
+	  && ! (regmask (r, 1) & excludes))
+	return r;
+    return 0;
+  }
+
+  // Return a regno for a 16-bit reg that contains value HI8:LO8 and that does
+  // not overlap with the registers mentioned in EXCLUDES.  Else return 0.
+  int reg16_with_value (uint8_t lo8, uint8_t hi8, gprmask_t excludes) const
+  {
+    for (int r = REG_30; r >= FIRST_GPR; r -= 2)
+      if (! (regmask (r, 2) & excludes)
+	  && value (r) == lo8
+	  && value (r + 1) == hi8)
+	return r;
+    return 0;
+  }
+
+  void operator&= (const HARD_REG_SET &hrs)
+  {
+    known &= elt0_getter<HARD_REG_SET, HARD_REG_ELT_TYPE>::get (hrs);
+  }
+
+  // Coalesce register knowledge about *this and THAT.
+  void coalesce (const memento_t &that)
+  {
+    known &= that.known;
+
+    for (int i = FIRST_GPR; i < REG_32; ++i)
+      if (values[i] != that.values[i])
+	set_unknown (i);
+  }
+
+  void dump (const char *msg = nullptr, FILE *f = dump_file) const
+  {
+    if (f)
+      {
+	msg = msg && msg[0] ? msg : "%s\n";
+	const char *const xs = strstr (msg, "%s");
+	gcc_assert (xs);
+
+	fprintf (f, "%.*s", (int) (xs - msg), msg);
+	fprintf (f, " (%d known): ", n_known ());
+	for (int i = FIRST_GPR; i < REG_32; ++i)
+	  if (knows (i))
+	    fprintf (f, " r%d=%02x", i, values[i]);
+
+	fprintf (f, "%s", xs + strlen ("%s"));
+      }
+  }
+}; // memento_t
+
+
+// In avr-fuse-move, a possible step towards an optimal code sequence
+// to load a compile-time constant.  A ply_t represents one or two
+// instructions.  There are cases where there is no 1-to-1 correspondence
+// between a ply_t and an insn; but a sequence of ply_ts can be mapped to
+// a sequence of insns; though there are cases where 2 or more ply_ts map
+// to a single insn and vice versa.
+
+struct ply_t
+{
+  // The destination register with .size in { 1, 2 }.
+  int regno;
+  int size;
+
+  // The performed operation where .arg represents an optional source operand.
+  // .code may be one of:  SET (ldi, clr, ldi+mov), REG (mov, movw), NEG (neg),
+  // NOT (com), PRE_INC (inc), PRE_DEC (dec), ROTATE (swap), ASHIFT (lsl),
+  // LSHIFTRT (lsr), ASHIFTRT (asr), PLUS (add), MINUS (sub), AND (and),
+  // IOR (or), XOR (eor), SS_PLUS (adiw, sbiw), MOD (set+bld, clt+bld, bld).
+  rtx_code code;
+  int arg;
+
+  // Code size in terms of words / instructions.  Extra costs for, say
+  // a CLT prior to a sequence of BLDs, are added to the 1st element.
+  int cost;
+
+  // We only consider ply_ts that reduce the Hamming distance by 0, 1 or 2.
+  // There are exotic cases where the Hamming distance temporarily increases,
+  // but we don't consider them.  (They may fall out of the algorithm anyways,
+  // for example when a "set_some" insn is used that restores its scratch.
+  int dhamming = 1;
+
+  // Whether this is a SET that's intended for insn "set_some"'s payload.
+  bool in_set_some = false;
+
+  // 0 or an upper scratch register.  One needed for SETs of a lower reg.
+  // SETs in a set_some don't need a scratch.
+  int scratch = 0;
+
+  // Statistics.
+  static int n_ply_ts;
+  static int max_n_ply_ts;
+
+  gprmask_t mask_dest () const
+  {
+    return regmask (regno, size);
+  }
+
+  gprmask_t mask_src () const
+  {
+    if (code == SET)
+      return 0;
+    else if (code == REG)
+      return regmask (arg, size);
+    else if (code == PLUS || code == MINUS || code == AND
+	     || code == IOR || code == XOR)
+      return regmask (arg, size) | mask_dest ();
+    else
+      return mask_dest ();
+  }
+
+  bool is_movw () const
+  {
+    return size == 2 && code == REG;
+  }
+
+  bool is_adiw () const
+  {
+    return size == 2 && code == SS_PLUS;
+  }
+
+  bool is_bld () const
+  {
+    return code == MOD;
+  }
+
+  // A BLD setting one bit.
+  bool is_setbld () const
+  {
+    return is_bld () && popcount_hwi (arg) == 1;
+  }
+
+  // A BLD clearing one bit.
+  bool is_cltbld () const
+  {
+    return is_bld () && popcount_hwi (arg) == 7;
+  }
+
+  rtx_code bld_rtx_code () const
+  {
+    return select<rtx_code>()
+      : is_setbld () ? IOR
+      : is_cltbld () ? AND
+      : UNKNOWN;
+  }
+
+  // Is *P a BLD of the same kind?
+  bool is_same_bld (const ply_t *p) const
+  {
+    gcc_assert (is_bld ());
+    return p && bld_rtx_code () == p->bld_rtx_code ();
+  }
+
+  int bld_bitno () const
+  {
+    gcc_assert (is_bld ());
+    int bit = exact_log2 (popcount_hwi (arg) == 1 ? arg : 0xff ^ arg);
+    gcc_assert (IN_RANGE (bit, 0, 7));
+
+    return bit;
+  }
+
+  bool needs_scratch () const
+  {
+    return code == SET && AVRasm::ldi_needs_scratch (regno, arg);
+  }
+
+  // Return true when *this modifies (changes *AND* uses) the result
+  // generated by *P.
+  bool changes_result_of (const ply_t *p) const
+  {
+    return code != REG && code != SET && (mask_dest() & p->mask_dest());
+  }
+
+  bool overrides (const ply_t *p) const
+  {
+    return code == REG || code == SET
+      ? mask_dest () & p->mask_dest ()
+      : false;
+  }
+
+  bool commutes_with (const ply_t *p, int scratch = 0) const
+  {
+    if (code == SET || p->code == SET)
+      {
+	// SETs will be emit as a group where they commute.
+	if (code == SET && p->code == SET)
+	  return true;
+
+	// Grant more flexibility to move around expensive SETs.
+	if (! scratch
+	    && (needs_scratch () || p->needs_scratch ()))
+	  return false;
+      }
+
+    if (is_bld () || p->is_bld ())
+      {
+	// BLD requires a previous SET or CLT which means that like
+	// BLDs should occur as a contiguous sequence.  This limits
+	// re-ordering for the purpose of canonicalization of instruction
+	// ordering.
+	return ((is_cltbld () && p->is_cltbld ())
+		|| (is_setbld () && p->is_setbld ()));
+      }
+
+    gprmask_t msrc = 1u << scratch;
+    gprmask_t m1 = mask_dest() | mask_src();
+    gprmask_t m2 = p->mask_dest() | p->mask_src();
+    return (m1 & m2) == 0 && ((m1 | m2) & msrc) == 0;
+  }
+
+  // Expected insn name; used in dumps.
+  const char *insn_name () const
+  {
+    if (code == SET)
+      return select<const char *>()
+	: in_set_some ? "set_some"
+	: scratch && needs_scratch () ? "*reload_inqi"
+	: "movqi_insn";
+
+    return "???";
+  }
+
+  void dump (int level = 0, FILE *f = dump_file) const
+  {
+    if (f)
+      {
+	if (level)
+	  avr_fdump (f, ";; .%d ply_t R%d[%d] = %C", level, regno, size, code);
+	else
+	  avr_fdump (f, ";; ply_t R%d[%d] = %C", regno, size, code);
+	if (code == REG || is_adiw ())
+	  fprintf (f, " %d", arg);
+	else if (code == PLUS || code == MINUS || code == AND
+		 || code == IOR || code == XOR)
+	  fprintf (f, " R%d", arg);
+	else if (is_setbld ())
+	  fprintf (f, " BLD |= 0x%02x", arg);
+	else if (is_cltbld ())
+	  fprintf (f, " BLD &= 0x%02x", arg);
+	else
+	  fprintf (f, " 0x%x = %d", arg, arg);
+
+	const char *const name = insn_name ();
+	fprintf (f, ", cost=%d, dhamm=%d", cost, dhamming);
+	if (name && name[0] != '?')
+	  fprintf (f, ", \"%s\"", name);
+	fprintf (f, "\n");
+      }
+  }
+
+  // Helper for dump_plys:  Value of the destination.
+  int dest_value (const memento_t &memo) const
+  {
+    return memo.value (regno, size);
+  }
+
+  // Helper for dump_plys:  Value of 1st source arg provided it is a register.
+  int src1_value (const memento_t &memo) const
+  {
+    int rsrc = regno;
+
+    switch (code)
+      {
+      default:
+	return -1;
+
+      case REG:
+	gcc_assert (size == 1 || size == 2);
+	rsrc = arg;
+	break;
+
+      case SS_PLUS:
+	gcc_assert (size == 2);
+	break;
+
+      case NEG: case NOT: case PRE_DEC: case PRE_INC:
+      case ASHIFT: case LSHIFTRT: case ASHIFTRT: case ROTATE:
+      case AND: case IOR: case XOR: case MOD:
+      case PLUS: case MINUS:
+	gcc_assert (size == 1);
+	break;
+      }
+
+    return memo.value (rsrc, size);
+  }
+
+  // Helper for dump_plys:  Value of 2nd source argument.
+  int src2_value (const memento_t &memo) const
+  {
+    switch (code)
+      {
+      default:
+	break;
+
+      case AND: case IOR: case XOR:
+      case PLUS: case MINUS:
+	gcc_assert (size == 1);
+
+	return memo.value (arg, 1);
+      }
+
+    return -1;
+  }
+
+  // Dumping a solution (or parts of it) is tedious because when
+  // their specific action should be displayed.
+  static void dump_plys (FILE *f, int level, int len,
+			 const ply_t *const ps[], const memento_t &m0)
+  {
+    if (f)
+      {
+	memento_t memo = m0;
+
+	for (int i = 0; i < len; ++i)
+	  ps[i]->dump (level, memo, f);
+      }
+  }
+
+  void dump (int level, memento_t &memo, FILE *f = dump_file) const
+  {
+    if (! f)
+      return;
+
+    const ply_t &p = *this;
+
+    // Keep track of chars in the current line for neat alignment.
+    int cs = level > 0
+      ? fprintf (f, ";; .%d ", level)
+      : fprintf (f, ";; ");
+    cs += fprintf (f, "ply_t %-4s R%d[%d] = ", p.mnemonic (), p.regno, p.size);
+
+    const int x = p.src1_value (memo);
+    const int y = p.src2_value (memo);
+
+    memo.apply (p);
+
+    const int z = p.dest_value (memo);
+
+    switch (p.code)
+      {
+      default:
+	fprintf (f, "%s ???", rtx_name[p.code]);
+	gcc_unreachable ();
+	break;
+
+      case REG:
+	cs += fprintf (f, "R%d = 0x%0*x", p.arg, 2 * p.size, x);
+	break;
+
+      case SET:
+	cs += fprintf (f, "0x%02x = %d, \"%s\"", p.arg, p.arg, insn_name ());
+	break;
+
+      case PRE_DEC: case PRE_INC:
+      case ASHIFT: case LSHIFTRT: case ASHIFTRT: case ROTATE:
+	cs += fprintf (f, "R%d %s = 0x%02x = 0x%02x %s",
+		       p.regno, p.op_str (), z, x, p.op_str ());
+	break;
+
+      case NEG: case NOT:
+	cs += fprintf (f, "%sR%d = 0x%02x = %s0x%02x",
+		       p.op_str (), p.regno, z, p.op_str (), x);
+	break;
+
+      case PLUS: case MINUS:
+      case AND: case IOR: case XOR:
+	cs += fprintf (f, "R%d %s R%d = 0x%02x = 0x%02x %s 0x%02x",
+		       p.regno, p.op_str (), p.arg, z, x, p.op_str (), y);
+	break;
+
+      case SS_PLUS: // ADIW / SBIW
+	{
+	  int arg = (int16_t) p.arg;
+	  char op = arg < 0 ? '-' : '+';
+	  cs += fprintf (f, "R%d %c %d = 0x%04x = 0x%04x %c %d", p.regno,
+			 op, std::abs (arg), z, x, op, std::abs (arg));
+	}
+	break;
+
+      case MOD: // BLD
+	{
+	  const char opc = "&|" [p.is_setbld ()];
+	  cs += fprintf (f, "R%d %c 0x%02x = 0x%02x = 0x%02x %c bit%d",
+			 p.regno, opc, p.arg, z, x, opc, p.bld_bitno ());
+	}
+	break;
+      }
+
+    cs += fprintf (f, ", ");
+
+    while (cs++ < 56)
+      fputc (' ', f);
+
+    fprintf (f, "cost=%d, dhamm=%d\n", p.cost, p.dhamming);
+  }
+
+  // AVR mnemnic; used in dumps.
+  const char *mnemonic () const
+  {
+    if (is_bld ())
+      {
+	static char s_bld[] = "BLD*";
+	s_bld[3] = '0' + bld_bitno ();
+	return s_bld;
+      }
+
+    return select<const char *>()
+      : code == LSHIFTRT ? "LSR"
+      : code == ASHIFTRT ? "ASR"
+      : code == ASHIFT ? "LSL"
+      : code == ROTATE ? "SWAP"
+      : code == PRE_DEC ? "DEC"
+      : code == PRE_INC ? "INC"
+      : code == MINUS ? "SUB"
+      : code == PLUS ? "ADD"
+      : code == NEG ? "NEG"
+      : code == NOT ? "COM"
+      : code == AND ? "AND"
+      : code == IOR ? "OR"
+      : code == XOR ? "EOR"
+      : code == REG ? size == 1 ? "MOV" : "MOVW"
+      : code == SET ? arg == 0 ? "CLR" : "LDI"
+      : code == SS_PLUS ? arg < 0 ? "SBIW" : "ADIW"
+      : rtx_name[code];
+  }
+
+  // Return a string of length 1 for CODE, or "?".
+  static const char *code_name_str1 (rtx_code code)
+  {
+    return select<const char *>()
+      : code == NEG ? "-"
+      : code == NOT ? "~"
+      : code == AND ? "&"
+      : code == IOR ? "|"
+      : code == XOR ? "^"
+      : code == PLUS ? "+"
+      : code == MINUS ? "-"
+      : "?";
+  }
+
+  // Short semantics representation used in dumps.
+  const char *op_str () const
+  {
+    return select<const char *>()
+      : code == LSHIFTRT ? ">> 1"
+      : code == ASHIFTRT ? ">> 1"
+      : code == ASHIFT ? "<< 1"
+      : code == ROTATE ? ">>> 4"
+      : code == PRE_DEC ? "- 1"
+      : code == PRE_INC ? "+ 1"
+      : code == SS_PLUS ? "+"
+      : *(ply_t::code_name_str1 (code)) != '?' ? ply_t::code_name_str1 (code)
+      : rtx_name[code];
+  }
+}; // ply_t
+
+
+// A set of ply_t's.  We prefer std:array (with some expected upper
+// bound for the number of ply_t's as generated by bbinfo_t::get_plies())
+// over std::vector.  That way, all plies_t are only allocated once as
+// elements of avr_pass_fuse_move::BInfo.
+
+struct plies_t
+{
+  int n_plies;
+  std::array<ply_t, 50> plies;
+
+  int emit_insns (const insninfo_t &, const memento_t &) const;
+  int emit_sets (const insninfo_t&, int &n_insns, const memento_t&, int) const;
+  int emit_blds (const insninfo_t &, int &n_insns, int i0) const;
+  void add_plies_movw (int regno, int size, uint64_t, int, const memento_t &);
+
+  void reset ()
+  {
+    n_plies = 0;
+  }
+
+  void add (const ply_t &ply)
+  {
+    if (n_plies < (int) plies.size ())
+      {
+	plies[n_plies++] = ply;
+	ply_t::n_ply_ts += 1;
+      }
+    else
+      avr_dump (";; WARNING: plies_t is full\n");
+  }
+
+  void add (ply_t, const ply_t *prev, const memento_t &, bool maybe_set_some);
+
+  plies_t () {}
+
+  plies_t (int n, const ply_t *const ps[])
+  {
+    gcc_assert (n <= (int) plies.size ());
+    for (int i = 0; i < n; ++i)
+      plies[i] = *ps[i];
+    n_plies = n;
+  }
+
+  static int max_n_plies;
+}; // plies_t
+
+
+// An 8-bit value leaf of absint_byte_t.
+// May be known to equal an 8-bit value.
+// May be known to equal the content of an 8-bit GPR.
+struct absint_val_t
+{
+  int16_t val8 = -1;
+  int8_t regno = 0;
+
+  absint_val_t () {}
+
+  bool knows_val8 () const
+  {
+    gcc_assert (IN_RANGE (val8, -1, 0xff));
+    return val8 >= 0;
+  }
+
+  bool knows_regno () const
+  {
+    gcc_assert (IN_RANGE (regno, 0, REG_31));
+    return regno;
+  }
+
+  bool clueless () const
+  {
+    return ! knows_val8 () && ! knows_regno ();
+  }
+
+  gprmask_t reg_mask () const
+  {
+    return regno ? regmask (regno, 1) : 0;
+  }
+
+  void dump (FILE *f = dump_file) const
+  {
+    if (f)
+      {
+	if (knows_regno ())
+	  fprintf (f, "r%d%s", regno, knows_val8 () ? "=" : "");
+	if (knows_val8 ())
+	  fprintf (f, "%02x", val8);
+	else if (! knows_regno ())
+	  fprintf (f, "--");
+      }
+  }
+}; // absint_val_t
+
+
+// One byte in AbsInt.
+class absint_byte_t
+{
+  // "SET": the value is .x0.
+  rtx_code code = UNKNOWN;
+  absint_val_t x0;
+  absint_val_t x1;
+
+public:
+
+  const absint_val_t &arg (int i) const
+  {
+    gcc_assert (IN_RANGE (i, 0, arity () - 1));
+    return i == 1 ? x1 : x0;
+  }
+
+  rtx_code get_code () const
+  {
+    return code;
+  }
+
+  absint_byte_t () {}
+
+  absint_byte_t (absint_val_t x)
+    : code(x.clueless () ? UNKNOWN : SET), x0(x)
+  {}
+
+  // new = <code> A0  where CODE is a unary operation.
+  absint_byte_t (rtx_code c, const absint_byte_t &a0)
+    : code(c)
+  {
+    switch (code)
+      {
+      default:
+	gcc_unreachable ();
+
+      case NOT:
+	if (a0.can (CONST_INT))
+	  init_val8 (absint_byte_t::eval (code, a0.val8 ()));
+	else if (a0.can (REG))
+	  x0 = a0.x0;
+	else if (a0.can (NOT))
+	  init_regno (a0.regno ());
+	else
+	  code = UNKNOWN;
+	break;
+
+      case SIGN_EXTEND:
+	if (a0.can (CONST_INT))
+	  init_val8 (absint_byte_t::eval (code, a0.val8 ()));
+	else if (a0.can (REG))
+	  x0 = a0.x0;
+	else
+	  code = UNKNOWN;
+	break;
+      }
+  }
+
+  // new = A0 <code> A1  where CODE is a binary operation.
+  absint_byte_t (rtx_code c, const absint_byte_t &a0, const absint_byte_t &a1)
+    : code(c)
+  {
+    gcc_assert (c == AND || c == IOR || c == XOR || code == PLUS);
+
+    if (a1.is_image1 (c))
+      *this = a1;
+    else if (a0.is_image1 (c))
+      *this = a0;
+    else if (a1.is_neutral (c))
+      *this = a0;
+    else if (a0.is_neutral (c))
+      *this = a1;
+    else if (a0.can (CONST_INT) && a1.can (CONST_INT))
+      init_val8 (absint_byte_t::eval (code, a0.val8 (), a1.val8 ()));
+    else if (a0.can (REG) && a1.can (CONST_INT))
+      {
+	x0 = a0.x0;
+	x1 = a1.x0;
+	if (code == XOR && a1.val8 () == 0xff)
+	  code = NOT;
+      }
+    else if (a0.can (CONST_INT) && a1.can (REG))
+      {
+	x0 = a1.x0;
+	x1 = a0.x0;
+	if (code == XOR && a0.val8 () == 0xff)
+	  code = NOT;
+      }
+    else if (a0.can (REG) && a1.can (REG))
+      {
+	x0.regno = std::min (a0.regno (), a1.regno ());
+	x1.regno = std::max (a0.regno (), a1.regno ());
+      }
+    else
+      code = UNKNOWN;
+  }
+
+  int arity () const
+  {
+    return select<int>()
+      : code == UNKNOWN ? 0
+      : code == SET || code == NOT || code == SIGN_EXTEND ? 1
+      : code == AND || code == IOR || code == XOR || code == PLUS ? 2
+      : bad_case<int> ();
+  }
+
+  // Return a byte with 8 signs according to code CODE.
+  absint_byte_t get_signs (rtx_code ext) const
+  {
+    return select<absint_byte_t>()
+      : ext == ZERO_EXTEND ? absint_byte_t::from_val8 (0)
+      : ext == SIGN_EXTEND ? absint_byte_t (SIGN_EXTEND, *this)
+      : ext == LSHIFTRT ? absint_byte_t::from_val8 (0)
+      : ext == ASHIFTRT ? absint_byte_t (SIGN_EXTEND, *this)
+      : bad_case<absint_byte_t> ();
+  }
+
+  gprmask_t reg_mask () const
+  {
+    return select<gprmask_t>()
+      : code == SET ? x0.reg_mask ()
+      : arity () == 1 ? x0.reg_mask ()
+      : arity () == 2 ? x0.reg_mask () | x1.reg_mask ()
+      : bad_case<gprmask_t> ();
+  }
+
+  bool check () const
+  {
+    return select<bool>()
+      : arity () >= 1 && x0.clueless () ? false
+      : arity () == 2 && x1.clueless () ? false
+      : true;
+  }
+
+  static inline uint8_t eval (rtx_code code, uint8_t x)
+  {
+    return select<int>()
+      : code == NOT ? ~x
+      : code == SIGN_EXTEND ? (x >= 0x80 ? 0xff : 0x00)
+      : bad_case<int> ();
+  }
+
+  static inline uint8_t eval (rtx_code code, uint8_t x, uint8_t y)
+  {
+    return select<int>()
+      : code == AND ? x & y
+      : code == IOR ? x | y
+      : code == XOR ? x ^ y
+      : code == PLUS ? x + y
+      : bad_case<int> ();
+  }
+
+  bool is_neutral (rtx_code c) const
+  {
+    return can (CONST_INT) && val8 () == AVRasm::neutral_val (c);
+  }
+
+  bool is_image1 (rtx_code c) const
+  {
+    return can (CONST_INT) && val8 () == AVRasm::image1_val (c);
+  }
+
+  bool can (rtx_code c) const
+  {
+    if (code == SET)
+      gcc_assert (IN_RANGE (x0.val8, 0, 0xff) || gpr_regno_p (x0.regno));
+
+    if (c == CONST_INT)
+      return code == SET && x0.knows_val8 ();
+    else if (c == REG)
+      return code == SET && x0.knows_regno ();
+    else if (c == VALUE)
+      return code != UNKNOWN;
+    else if (c == UNKNOWN
+	     || c == SET || c == NOT || c == SIGN_EXTEND
+	     || c == AND || c == IOR || c == XOR || c == PLUS)
+      return code == c;
+
+    gcc_unreachable ();
+  }
+
+  // Return the known byte value in 0...0xff, or -1 if unknown and ! STRICT.
+  int val8 (bool strict = true) const
+  {
+    gcc_assert (! strict || code == SET);
+    gcc_assert (! strict || can (CONST_INT));
+    return can (CONST_INT) ? x0.val8 : -1;
+  }
+
+  int regno (bool strict = true) const
+  {
+    gcc_assert (! strict || code == SET);
+    gcc_assert (! strict || can (REG));
+    return can (REG) ? x0.regno : 0;
+  }
+
+  void init_val8 (int v)
+  {
+    gcc_assert (IN_RANGE (v, 0, 0xff));
+    x0.val8 = v;
+    x0.regno = 0;
+    code = SET;
+  }
+
+  void init_regno (int r)
+  {
+    gcc_assert (gpr_regno_p (r));
+    x0.val8 = -1;
+    x0.regno = r;
+    code = SET;
+  }
+
+  void learn_val8 (int v)
+  {
+    gcc_assert (IN_RANGE (v, 0, 0xff));
+    gcc_assert (code == SET || code == UNKNOWN);
+    x0.val8 = v;
+    code = SET;
+  }
+
+  void learn_regno (int r)
+  {
+    gcc_assert (gpr_regno_p (r));
+    gcc_assert (code == SET || code == UNKNOWN);
+    x0.regno = r;
+    code = SET;
+  }
+
+  static inline absint_byte_t from_val8 (int val, bool strict = true)
+  {
+    gcc_assert (IN_RANGE (val, -1, 0xff));
+    gcc_assert (! strict || val >= 0);
+    absint_byte_t b;
+    if (val >= 0)
+      b.init_val8 (val);
+
+    return  b;
+  }
+
+  // Return a SET rtx that can replace the set_src of INSN.
+  // Returns BINARY_P or NULL_RTX.
+  absint_byte_t find_alternative_binary (const memento_t &memo) const
+  {
+    gprmask_t excludes = x1.knows_regno () ? regmask (x1.regno, 1) : 0;
+    absint_byte_t alt = *this;
+
+    if (arity () == 2
+	&& x0.knows_regno ()
+	&& x1.knows_val8 ()
+	&& (! x1.knows_regno () || x0.regno != x1.regno)
+	&& (alt.x1.regno = memo.regno_with_value (x1.val8, excludes)))
+      {
+	if (dump_flags & TDF_FOLDING)
+	  {
+	    alt.dump (";; AI.alternative AI=[%s]");
+	    dump (" can replace AI=[%s]\n");
+	  }
+
+	return alt;
+      }
+
+    return absint_byte_t {};
+  }
+
+  rtx to_rtx () const
+  {
+    if (arity () == 2)
+      {
+	gcc_assert (x0.knows_regno ());
+	gcc_assert (x1.knows_regno ());
+	rtx op0 = gen_rtx_REG (QImode, x0.regno);
+	rtx op1 = gen_rtx_REG (QImode, x1.regno);
+	return gen_rtx_fmt_ee (code, QImode, op0, op1);
+      }
+
+    gcc_unreachable ();
+  }
+
+  void dump (const char *msg = nullptr, FILE *f = dump_file) const
+  {
+    if (f)
+      {
+	msg = msg && msg[0] ? msg : "%s";
+	const char *const xs = strstr (msg, "%s");
+	gcc_assert (xs);
+
+	fprintf (f, "%.*s", (int) (xs - msg), msg);
+	if (code == UNKNOWN)
+	  fprintf (f, "--");
+	else if (code == SET)
+	  x0.dump (f);
+	else if (code == NOT)
+	  {
+	    fprintf (f, "~");
+	    x0.dump (f);
+	  }
+	else if (code == SIGN_EXTEND)
+	  {
+	    fprintf (f, "signs(");
+	    x0.dump (f);
+	    fprintf (f, ")");
+	  }
+	else if (arity () == 2)
+	  {
+	    x0.dump (f);
+	    fprintf (f, "%s", ply_t::code_name_str1 (code));
+	    x1.dump (f);
+	  }
+	else
+	  gcc_unreachable ();
+
+	fprintf (f, "%s", xs + strlen ("%s"));
+      }
+  }
+}; // absint_byte_t
+
+
+struct bbinfo_t
+{
+  // All BBs of the current function.
+  static bbinfo_t *bb_info;
+
+  // bbinfo_t holds additional information for this basic block.
+  basic_block bb;
+
+  // Known values held in GPRs.
+  memento_t regs;
+
+  // Represents the "time" when the value was set.  When we have the choice
+  // between several registers to copy from, we use the first (oldest) set.
+  // This can avoid copy-chains.
+  std::array<int, REG_32> ticks;
+  static int tick;
+
+  // Whether according BB is done and optimized.
+  bool done;
+
+  static void optimize_one_function (function *func);
+  void optimize_one_block (bool &changed);
+  void enter ();
+  void leave ();
+
+  // Used when finding a best plies_t.  This object is only needed
+  // once and can be shared between all basic blocks.
+  struct find_plies_data_t
+  {
+    // These are used by [run_]find_plies()
+    const ply_t *ply_stack[N_BEST_PLYS];
+    plies_t plies[N_BEST_PLYS];
+    plies_t solution;
+    // Register knowledge at start of recursive algo.
+    memento_t regs0;
+    int max_ply_cost;
+    int movmode_cost;
+    int n_best_plys;
+    int n_get_plies; // Only for bookkeeping / statistics.
+  }; // find_plies_data_t
+
+  static find_plies_data_t *fpd;
+  static bool try_fuse_p;
+  static bool try_mem0_p;
+  static bool try_bin_arg1_p;
+  static bool try_simplify_p;
+  static bool try_split_ldi_p;
+  static bool try_split_any_p;
+  static bool use_arith_p;
+  static bool use_set_some_p;
+
+  static void get_plies (plies_t &, const insninfo_t &, const memento_t &,
+			 const ply_t *);
+  static void find_plies (int depth, const insninfo_t &, const memento_t &);
+  bool run_find_plies (const insninfo_t &, const memento_t &) const;
+}; // bbinfo_t
diff --git a/gcc/config/avr/avr-passes.cc b/gcc/config/avr/avr-passes.cc
new file mode 100644
index 0000000..284f49d
--- /dev/null
+++ b/gcc/config/avr/avr-passes.cc
@@ -0,0 +1,5761 @@
+/* Support for avr-passes.def for AVR 8-bit microcontrollers.
+   Copyright (C) 2024-2025 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#define IN_TARGET_CODE 1
+
+#define INCLUDE_ARRAY
+#define INCLUDE_VECTOR
+#include "config.h"
+#include "system.h"
+#include "intl.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "target.h"
+#include "rtl.h"
+#include "tree.h"
+#include "diagnostic-core.h"
+#include "cfghooks.h"
+#include "cfganal.h"
+#include "df.h"
+#include "memmodel.h"
+#include "tm_p.h"
+#include "optabs.h"
+#include "regs.h"
+#include "emit-rtl.h"
+#include "recog.h"
+#include "explow.h"
+#include "cfgrtl.h"
+#include "context.h"
+#include "tree-pass.h"
+#include "insn-attr.h"
+#include "tm-constrs.h"
+
+
+#define CONST_INT_OR_FIXED_P(X) (CONST_INT_P (X) || CONST_FIXED_P (X))
+
+#define FIRST_GPR (AVR_TINY ? REG_18 : REG_2)
+
+
+// Emit pattern PAT, and ICE when the insn is not valid / not recognized.
+
+static rtx_insn *
+emit_valid_insn (rtx pat)
+{
+  rtx_insn *insn = emit_insn (pat);
+
+  if (! valid_insn_p (insn))  // Also runs recog().
+    fatal_insn ("emit unrecognizable insn", insn);
+
+  return insn;
+}
+
+// Emit a single_set with an optional scratch operand.  This function
+// asserts that the new insn is valid and recognized.
+
+static rtx_insn *
+emit_valid_move_clobbercc (rtx dest, rtx src, rtx scratch = NULL_RTX)
+{
+  rtx pat = scratch
+    ? gen_gen_move_clobbercc_scratch (dest, src, scratch)
+    : gen_gen_move_clobbercc (dest, src);
+
+  return emit_valid_insn (pat);
+}
+
+
+namespace
+{
+
+/////////////////////////////////////////////////////////////////////////////
+// Before we start with the very code, introduce some helpers that are
+// quite generic, though up to now only avr-fuse-add makes use of them.
+
+/* Get the next / previous NONDEBUG_INSN_P after INSN in basic block BB.
+   This assumes we are in CFG layout mode so that BLOCK_FOR_INSN()
+   can be used.  */
+
+static rtx_insn *
+next_nondebug_insn_bb (basic_block bb, rtx_insn *insn, bool forward = true)
+{
+  while (insn)
+    {
+      insn = forward ? NEXT_INSN (insn) : PREV_INSN (insn);
+
+      if (insn && NONDEBUG_INSN_P (insn))
+	return BLOCK_FOR_INSN (insn) == bb ? insn : nullptr;
+    }
+
+  return insn;
+}
+
+static rtx_insn *
+prev_nondebug_insn_bb (basic_block bb, rtx_insn *insn)
+{
+  return next_nondebug_insn_bb (bb, insn, false);
+}
+
+
+/* Like `single_set' with the addition that it sets REGNO_SCRATCH when the
+   insn is a single_set with a QImode scratch register.  When the insn has
+   no QImode scratch or just a scratch:QI, then set REGNO_SCRATCH = 0.
+   The assumption is that the function is only used after the splits for
+   REG_CC so that the pattern is a parallel with 2 elements (INSN has no
+   scratch operand), or 3 elements (INSN does have a scratch operand).  */
+
+static rtx
+single_set_with_scratch (rtx_insn *insn, int &regno_scratch)
+{
+  regno_scratch = 0;
+
+  if (! INSN_P (insn))
+    return NULL_RTX;
+
+  rtx set, clo, reg, pat = PATTERN (insn);
+
+  // Search for SET + CLOBBER(QI) + CLOBBER(CC).
+  if (GET_CODE (pat) == PARALLEL
+      && XVECLEN (pat, 0) == 3
+      && GET_CODE (set = XVECEXP (pat, 0, 0)) == SET
+      // At this pass, all insn are endowed with clobber(CC).
+      && GET_CODE (clo = XVECEXP (pat, 0, 2)) == CLOBBER
+      && GET_MODE (XEXP (clo, 0)) == CCmode
+      && GET_CODE (clo = XVECEXP (pat, 0, 1)) == CLOBBER
+      && REG_P (reg = XEXP (clo, 0))
+      && GET_MODE (reg) == QImode)
+    {
+      regno_scratch = REGNO (reg);
+      return set;
+    }
+
+  return single_set (insn);
+}
+
+
+// One bit for each GRP in REG_0 ... REG_31.
+using gprmask_t = uint32_t;
+
+// True when this is a valid GPR number for ordinary code, e.g.
+// registers wider than 2 bytes have to start at an exven regno.
+// TMP_REG and ZERO_REG are not considered valid, even though
+// the C source can use register vars with them.
+static inline bool
+gpr_regno_p (int regno, int n_bytes = 1)
+{
+  return (IN_RANGE (regno, FIRST_GPR, REG_32 - n_bytes)
+	  // Size in { 1, 2, 3, 4, 8 } bytes.
+	  && ((1u << n_bytes) & 0x11e)
+	  // Registers >= 2 bytes start at an even regno.
+	  && (n_bytes == 1 || regno % 2 == 0));
+}
+
+// There are cases where the C source defines local reg vars
+// for R1 etc.  The assumption is that this is handled before
+// calling this function, e.g. by skipping code when a register
+// overlaps with a fixed register.
+static inline gprmask_t
+regmask (int regno, int size)
+{
+  gcc_checking_assert (gpr_regno_p (regno, size));
+  gprmask_t bits = (1u << size) - 1;
+
+  return bits << regno;
+}
+
+// Mask for hard register X that's some GPR, including fixed regs like R0.
+static gprmask_t
+regmask (rtx x)
+{
+  gcc_assert (REG_P (x));
+  gprmask_t bits = (1u << GET_MODE_SIZE (GET_MODE (x))) - 1;
+
+  return bits << REGNO (x);
+}
+
+
+// Whether X has bits in the range [B0 ... B1]
+static inline bool
+has_bits_in (gprmask_t x, int b0, int b1)
+{
+  if (b0 > b1 || b0 > 31 || b1 < 0)
+    return false;
+
+  const gprmask_t m = (2u << (b1 - b0)) - 1;
+  return x & (m << b0);
+}
+
+
+template<typename T>
+T bad_case ()
+{
+  gcc_unreachable ();
+}
+
+#define select false ? bad_case
+
+
+namespace AVRasm
+{
+  // Returns true when we a scratch reg is needed in order to get
+  // (siged or unsigned) 8-bit value VAL in some GPR.
+  // When it's about costs rather than the sheer requirement for a
+  // scratch, see also AVRasm::constant_cost.
+  static inline bool ldi_needs_scratch (int regno, int val)
+  {
+    return regno < REG_16 && IN_RANGE (val & 0xff, 2, 254);
+  }
+
+  // Return a byte value x >= 0 such that  x <code> y == x for all y, or -1.
+  static inline int neutral_val (rtx_code code)
+  {
+    return select<int>()
+      : code == AND ? 0xff
+      : code == IOR ? 0x00
+      : code == XOR ? 0x00
+      : code == PLUS ? 0
+      : -1;
+  }
+
+  // When there exists a value x such that the image of the function
+  //   y -> y <code> x  has order 1, then return that x.  Else return -1.
+  static inline int image1_val (rtx_code code)
+  {
+    return select<int>()
+      : code == AND ? 0x00
+      : code == IOR ? 0xff
+      : -1;
+  }
+
+  // Cost of 8-bit binary operation  x o= VAL  provided a scratch is
+  // available as needed.
+  static int constant_cost (rtx_code code, int regno, uint8_t val)
+  {
+    bool needs_scratch_p = select<bool>()
+      : code == PLUS ? regno < REG_16 && val != 1 && val != 0xff
+      : code == XOR ? val != 0xff && (regno < REG_16 || val != 0x80)
+      : code == IOR ? regno < REG_16
+      : code == AND ? regno < REG_16 && val != 0
+      : code == SET ? regno < REG_16 && val != 0
+      : bad_case<bool> ();
+
+    return val == AVRasm::neutral_val (code)
+      ? 0
+      : 1 + needs_scratch_p;
+  }
+}; // AVRasm
+
+
+// Returns the mode mask for a mode size of SIZE bytes.
+static uint64_t size_to_mask (int size)
+{
+  return ((uint64_t) 2 << (8 * size - 1)) - 1;
+}
+
+// Return the scalar int mode for a modesize of 1, 2, 3, 4 or 8 bytes.
+static machine_mode size_to_mode (int size)
+{
+  return select<machine_mode>()
+    : size == 1 ? QImode
+    : size == 2 ? HImode
+    : size == 3 ? PSImode
+    : size == 4 ? SImode
+    : size == 8 ? DImode
+    : bad_case<machine_mode> ();
+}
+
+
+//////////////////////////////////////////////////////////////////////////////
+// Optimize moves after reload: -mfuse-move=<0,23>
+
+/* The purpose of this pass is to perform optimizations after reload
+   like the following ones:
+
+   Without optimization		     |	 With optimization
+   ====================		     |	 =================
+
+   long long fn_zero (void)	    (1)
+   {
+      return 0;
+   }
+
+   ldi r18, 0	  ;  movqi_insn	     |	 ldi r18, 0	;  movqi_insn
+   ldi r19, 0	  ;  movqi_insn	     |	 ldi r19, 0	;  movqi_insn
+   ldi r20, 0	  ;  movqi_insn	     |	 movw r20, r18	;  *movhi
+   ldi r21, 0	  ;  movqi_insn	     |
+   ldi r22, 0	  ;  movqi_insn	     |	 movw r22, r18	;  *movhi
+   ldi r23, 0	  ;  movqi_insn	     |
+   ldi r24, 0	  ;  movqi_insn	     |	 movw r24, r18	;  *movhi
+   ldi r25, 0	  ;  movqi_insn	     |
+   ret				     |	 ret
+
+   int fn_eq0 (char c)		    (2)
+   {
+       return c == 0;
+   }
+
+   mov r18, r24	   ;  movqi_insn     |	 mov r18, r24	;  movqi_insn
+   ldi r24, 1	   ;  *movhi	     |	 ldi r24, 1	;  *movhi
+   ldi r25, 0			     |	 ldi r25, 0
+   cp  r18, ZERO   ;  cmpqi3	     |	 cpse r18, ZERO ;  peephole
+   breq .+4	   ;  branch	     |
+   ldi r24, 0	   ;  *movhi	     |	 ldi r24, 0	;  movqi_insn
+   ldi r25, 0			     |
+   ret				     |	 ret
+
+   int a, b;			    (3)
+
+   void fn_store_ab (void)
+   {
+       a = 1;
+       b = -1;
+   }
+
+   ldi r24, 1	   ;  *movhi	     |	ldi r24, 1	 ;  *movhi
+   ldi r25, 0			     |	ldi r25, 0
+   sts a+1, r25	   ;  *movhi	     |	sts a+1, r25	 ;  *movhi
+   sts a,   r24			     |	sts a,	 r24
+   ldi r24, -1	   ;  *movhi	     |	sbiw r24, 2	 ;  *addhi3
+   ldi r25, -1			     |
+   sts b+1, r25	   ;  *movhi	     |	sts b+1, r25	 ;  *movhi
+   sts b,   r24			     |	sts b,	 r24
+   ret				     |	ret
+
+   unsigned fn_crc (unsigned x, unsigned y)   (4)
+   {
+       for (char i = 8; i--; x <<= 1)
+	   y ^= (x ^ y) & 0x80 ? 79U : 0U;
+       return y;
+   }
+
+   movw r18, r24   ;  *movhi	     |	movw r18, r24	 ;  *movhi
+   movw r24, r22   ;  *movhi	     |	movw r24, r22	 ;  *movhi
+   ldi	r22, 8	   ;  movqi_insn     |	ldi  r22, 8	 ;  movqi_insn
+  .L13:				     | .L13:
+   movw r30, r18   ;  *movhi	     |	movw r30, r18	 ;  *movhi
+   eor	r30, r24   ;  *xorqi3	     |	eor  r30, r24	 ;  *xorqi3
+   eor	r31, r25   ;  *xorqi3	     |	eor  r31, r25	 ;  *xorqi3
+   mov	r20, r30   ;  *andhi3	     |	mov  r20, r30	 ;  *andqi3
+   andi r20, 1<<7		     |	andi r20, 1<<7
+   clr	r21			     |
+   sbrs r30, 7	   ;  *sbrx_branchhi |	sbrc r30, 7	 ;  *sbrx_branchhi
+   rjmp .+4			     |
+   ldi	r20, 79	   ;  movqi_insn     |	ldi  r20, 79	 ;  movqi_insn
+   ldi	r21, 0	   ;  movqi_insn     |
+   eor	r24, r20   ;  *xorqi3	     |	eor r24, r20	 ;  *xorqi3
+   eor	r25, r21   ;  *xorqi3	     |
+   lsl	r18	   ;  *ashlhi3_const |	lsl  r18	 ;  *ashlhi3_const
+   rol	r19			     |	rol  r19
+   subi r22, 1	   ;  *op8.for.cczn.p|	subi r22, 1	 ;  *op8.for.cczn.plus
+   brne .L13	   ;  branch_ZN	     |	brne .L13	 ;  branch_ZN
+   ret				     |	ret
+
+   #define SPDR (*(uint8_t volatile*) 0x2c)     (5)
+
+   void fn_PR49807 (long big)
+   {
+       SPDR = big >> 24;
+       SPDR = big >> 16;
+       SPDR = big >> 8;
+       SPDR = big;
+   }
+
+   movw r20, r22   ;  *movhi	     |	movw r20, r22	 ;  *movhi
+   movw r22, r24   ;  *movhi	     |	movw r22, r24	 ;  *movhi
+   mov	r24, r23   ;  *ashrsi3_const |
+   clr	r27			     |
+   sbrc r24,7			     |
+   com	r27			     |
+   mov	r25, r27		     |
+   mov	r26, r27		     |
+   out	0xc, r24   ;  movqi_insn     |	out 0xc, r23	 ;  movqi_insn
+   movw r24, r22   ;  *ashrsi3_const |
+   clr	r27			     |
+   sbrc r25, 7			     |
+   com	r27			     |
+   mov	r26, r27		     |
+   out	0xc, r24   ;  movqi_insn     |	out 0xc, r24	 ;  movqi_insn
+   clr	r27	   ;  *ashrsi3_const |
+   sbrc r23, 7			     |
+   dec	r27			     |
+   mov	r26, r23		     |
+   mov	r25, r22		     |
+   mov	r24, r21		     |
+   out	0xc, r24   ;  movqi_insn     |	out 0xc, r21	 ;  movqi_insn
+   out	0xc, r20   ;  movqi_insn     |	out 0xc, r20	 ;  movqi_insn
+   ret				     |	ret
+
+   The insns of each basic block are traversed from first to last.
+   Each insn is optimized on its own, or may be fused with the
+   previous insn like in example (1).
+      As the insns are traversed, memento_t keeps track of known values
+   held in the GPRs (general purpse registers) R2 ... R31 by simulating
+   the effect of the current insn in memento_t.apply_insn().
+      The basic blocks are traversed in reverse post order so as to
+   maximize the chance that GPRs from all preceding blocks are known,
+   which is the case in example (2).  The traversal of the basic block
+   is performed by bbinfo_t.optimize_one_function().
+      bbinfo_t.optimize_one_block() traverses the insns of a BB and tries
+   the following optimizations:
+
+   bbinfo_t::try_fuse_p
+      Try to fuse two 8-bit insns to one MOVW like in (1).
+
+   bbinfo_t::try_simplify_p
+      Only perform the simplest optimizations that don't impede the
+      traceability of the generated code, which are:
+      - Transform operations like  Rn = Rn=0 ^ Rm  to  Rn = Rm.
+      - Remove insns that are no-ops like  Rn = Rn ^ Rm=0.
+
+   bbinfo_t::try_bin_arg1_p
+      In insns like  EOR Rn, arg1  where arg1 is known or is a reg that
+      dies in the insn, *and* there is a different register Rm that's
+      known to contain the same value, then arg1 is replaced with Rm.
+
+   bbinfo_t::try_split_ldi_p
+      Tries to simplify loads of constants like in examples (1), (2) and (3).
+      It may use arithmetic instructions like AND with registers that
+      are holding known values when this is profitable.
+
+   bbinfo_t::try_split_any_p
+      Split all insns where the operation can be performed on individual
+      bytes, like andsi3.  In example (4) the andhi3 can be optimized
+      to an andqi3.
+
+   bbinfo_t::try_mem0_p
+      Try to fuse a mem = reg insn to mem = __zero_reg__.
+      This should only occur when -msplit-ldst is on, but may
+      also occur with pushes since push<mode>1 splits them.
+*/
+
+
+// A basic block with additional information like the GPR state.
+// The main entry point for the pass.  Runs various strategies
+// like try_fuse, try_simplify, try_bin_arg1, try_split_ldi, try_split_any
+// depending on -mfuse-add=<0,11>.
+struct bbinfo_t;
+
+// Additional insn information on a  REG = non-memory  single_set insn
+// for quick access.  Only valid when the m_size member is non-zero.
+struct insninfo_t;
+
+// Helper classes with data needed by the try_xxx optimizers.
+struct optimize_data_t;
+struct insn_optimize_data_t;
+
+// Records which GPRs R0 ... R31 are holding a known value,
+// and which values these are.
+struct memento_t;
+
+// Abstract Interpretation of expressions.
+// absint_val_t represents an 8-bit value that equals the content of
+//    some GPR, or equals some known value (or both, or none of them).
+// absint_byte_t represents an 8-bit entity that is equivalent to
+//    an absint_val_t, or is equivalent to some (unary or binary) operation
+//    on absint_val_t's like NOT, AND, IOR, XOR that operate bit-wise (and
+//    hence also byte-wise).
+// absint_t represents an array of absint_byte_t's.  When some insn is applied
+//    to a GPR state, then memento_t.apply_insn() represents the RHS of
+//    a single_set as an absint_t, and then applies that result to the GPRs.
+//    For example, in  int y = x << 8  the representation is  x = [r25; r24]
+//    and  RHS = [r24; 00].
+struct absint_val_t;
+class absint_byte_t;
+struct absint_t;
+
+// A ply_t is a potential step towards an optimal sequence to load a constant
+// value into a multi-byte register.  A ply_t loosely relates to one AVR
+// instruction, but it may also represent a sequence of instructions.
+// For example, loading a constant into a lower register when no sratch reg
+// is available may take up to 4 instructions.  There is no 1:1 correspondence
+// to insns, either.
+//    try_split_ldi determines the best sequence of ply_t's by means of a
+// brute-force search with tree pruning:  It's much too complicated to
+// construct a good sequence directly, but there are many conditions that
+// good sequence will satisfy, implemented in bbinfo_t::find_plies.
+struct ply_t;
+struct plies_t;
+
+// The maximal number of ply_t's in any conceivable optimal solution
+// that is better than what a vanilla mov<mode> generates.
+// This is 6 for modes <= 4 and 8 for modes == 8.
+static constexpr int N_BEST_PLYS = 8;
+
+#define FUSE_MOVE_MAX_MODESIZE 8
+
+#include "avr-passes-fuse-move.h"
+
+// Static members.
+
+gprmask_t memento_t::fixed_regs_mask;
+
+// Statistics.
+int ply_t::n_ply_ts;
+int ply_t::max_n_ply_ts;
+int plies_t::max_n_plies;
+
+bbinfo_t *bbinfo_t::bb_info;
+int bbinfo_t::tick;
+bbinfo_t::find_plies_data_t *bbinfo_t::fpd;
+
+// Which optimizations should be performed.
+bool bbinfo_t::try_fuse_p;
+bool bbinfo_t::try_bin_arg1_p;
+bool bbinfo_t::try_split_ldi_p;
+bool bbinfo_t::try_split_any_p;
+bool bbinfo_t::try_simplify_p;
+bool bbinfo_t::use_arith_p;
+bool bbinfo_t::use_set_some_p;
+bool bbinfo_t::try_mem0_p;
+
+
+// Abstract Interpretation of expressions.
+// A bunch of absint_byte_t's.
+
+struct absint_t
+{
+  static constexpr int eq_size = FUSE_MOVE_MAX_MODESIZE;
+  std::array<absint_byte_t, eq_size> eq;
+
+  rtx xexp = NULL_RTX;
+  rtx xexp_new = NULL_RTX;
+
+  absint_byte_t &operator[] (int i)
+  {
+    gcc_assert (IN_RANGE (i, 0, absint_t::eq_size - 1));
+    return eq[i];
+  }
+
+  const absint_byte_t &operator[] (int i) const
+  {
+    gcc_assert (IN_RANGE (i, 0, absint_t::eq_size - 1));
+    return eq[i];
+  }
+
+  absint_t () {}
+
+  absint_t (rtx xold)
+    : xexp(xold)
+  {}
+
+  absint_t (rtx xold, rtx xnew, int n_bytes)
+    : xexp(xold), xexp_new(xnew)
+  {
+    gcc_assert (n_bytes <= eq_size);
+    if (xnew)
+      for (int i = 0; i < n_bytes; ++i)
+	eq[i].learn_val8 (avr_uint8 (xnew, i));
+  }
+
+  // CODE != UNKNOWN: Maximal index of a byte with code CODE, or -1.
+  // CODE == UNKNOWN: Maximal index of a byte with known CODE, or -1.
+  int max_knows (rtx_code code = UNKNOWN) const
+  {
+    for (int i = eq_size - 1; i >= 0; --i)
+      if ((code == UNKNOWN && ! eq[i].can (UNKNOWN))
+	  || (code != UNKNOWN && eq[i].can (code)))
+	return i;
+    return -1;
+  }
+
+  // CODE != UNKNOWN: Maximal i such that all bytes < i have code CODE.
+  // CODE == UNKNOWN: Maximal i such that all bytes < i have code != UNKNOWN.
+  int end_knows (rtx_code code = UNKNOWN) const
+  {
+    for (int i = 0; i < eq_size; ++i)
+      if ((code == UNKNOWN && eq[i].can (UNKNOWN))
+	  || (code != UNKNOWN && ! eq[i].can (code)))
+	return i;
+    return eq_size;
+  }
+
+  // Number of bytes for which there is usable information.
+  int popcount () const
+  {
+    int pop = 0;
+    for (int i = 0; i < eq_size; ++i)
+      pop += ! eq[i].can (UNKNOWN);
+    return pop;
+  }
+
+  // Get the value under the assumption that all eq[].val8 are known.
+  uint64_t get_value (int n_bytes, bool strict = true) const
+  {
+    gcc_assert (IN_RANGE (n_bytes, 1, eq_size));
+    gcc_assert (! strict || end_knows (CONST_INT) >= n_bytes);
+
+    uint64_t val = 0;
+    for (int i = n_bytes - 1; i >= 0; --i)
+      val = 256 * val + eq[i].val8 (strict);
+    return val;
+  }
+
+  // Get n-byte value as a const_int, or NULL_RTX when (partially) unknown.
+  rtx get_value_as_const_int (int n_bytes) const
+  {
+    gcc_checking_assert (gpr_regno_p (REG_24, n_bytes));
+
+    if (end_knows (CONST_INT) < n_bytes)
+      return NULL_RTX;
+
+    const uint64_t val = get_value (n_bytes);
+    const machine_mode mode = size_to_mode (n_bytes);
+
+    return gen_int_mode (val, mode);
+  }
+
+  // Find a 16-bit register that contains the same value like held
+  // in positions I1 and I2 (if any).  Return 0 when nothing appropriate
+  // for a MOVW is found.
+  int reg16_with_value (int i1, int i2, const memento_t &memo) const
+  {
+    if (i1 == (i2 ^ 1))
+      {
+	const int lo8 = eq[i1 & ~1].val8 (false);
+	const int hi8 = eq[i1 | 1].val8 (false);
+	if (lo8 >= 0 && hi8 >= 0)
+	  return memo.reg16_with_value (lo8, hi8, 0);
+      }
+    return 0;
+  }
+
+  // When X is a REG rtx with a known content as of MEMO, then return
+  // the respective value as a constant for mode MODE.
+  // If X is NULL_RTX, or not a REG, or not known, then return NULL_RTX.
+  static rtx maybe_fold (rtx x, const memento_t &memo)
+  {
+    int n_bytes;
+
+    if (x != NULL_RTX
+	&& REG_P (x)
+	&& (n_bytes = GET_MODE_SIZE (GET_MODE (x))) <= FUSE_MOVE_MAX_MODESIZE
+	&& gpr_regno_p (REGNO (x), n_bytes))
+      {
+	rtx xval = memo.get_value_as_const_int (REGNO (x), n_bytes);
+	if (xval)
+	  return avr_chunk (GET_MODE (x), xval, 0);
+      }
+
+    return NULL_RTX;
+  }
+
+  // Try to conclude about the bytes that comprise X.  DEST_MODE is the
+  // context mode that is used when X is CONST_INT and has VOIDmode.
+  static absint_t explore (rtx x, const memento_t &memo,
+			   machine_mode dest_mode = VOIDmode)
+  {
+    const rtx_code code = GET_CODE (x);
+    bool worth_dumping = dump_file && (dump_flags & TDF_FOLDING);
+
+    const machine_mode mode = GET_MODE (x) == VOIDmode
+      ? dest_mode
+      : GET_MODE (x);
+
+    const int n_bytes = mode == VOIDmode && CONST_INT_P (x)
+      ? absint_t::eq_size
+      : GET_MODE_SIZE (mode);
+
+    if (! IN_RANGE (n_bytes, 1, absint_t::eq_size))
+      return absint_t (x);
+
+    // Eat our own dog food as produced by try_plit_ldi.
+
+    rtx xop0 = BINARY_P (x) || UNARY_P (x) ? XEXP (x, 0) : NULL_RTX;
+    rtx xval0 = xop0 && CONST_INT_OR_FIXED_P (xop0)
+      ? xop0
+      : absint_t::maybe_fold (xop0, memo);
+
+    if (UNARY_P (x)
+	&& REG_P (xop0)
+	&& GET_MODE (xop0) == mode
+	&& xval0)
+      {
+	rtx y = simplify_unary_operation (code, mode, xval0, mode);
+	if (y && CONST_INT_OR_FIXED_P (y))
+	  return absint_t (x, y, n_bytes);
+      }
+
+    rtx xop1 = BINARY_P (x) ? XEXP (x, 1) : NULL_RTX;
+    rtx xval1 = xop1 && CONST_INT_OR_FIXED_P (xop1)
+      ? xop1
+      : absint_t::maybe_fold (xop1, memo);
+
+    if (BINARY_P (x)
+	&& xval0 && xval1)
+      {
+	rtx y = simplify_binary_operation (code, mode, xval0, xval1);
+	if (y && CONST_INT_OR_FIXED_P (y))
+	  return absint_t (x, y, n_bytes);
+      }
+
+    // No fold to a constant value was found:
+    // Look at the individual bytes more closely.
+
+    absint_t ai (x);
+
+    switch (code)
+      {
+      default:
+	worth_dumping = false;
+	break;
+
+      case REG:
+	if (END_REGNO (x) <= REG_32
+	    && ! (regmask (x) & memento_t::fixed_regs_mask))
+	  for (unsigned r = REGNO (x); r < END_REGNO (x); ++r)
+	    {
+	      ai[r - REGNO (x)].learn_regno (r);
+	      if (memo.knows (r))
+		ai[r - REGNO (x)].learn_val8 (memo.value (r));
+	    }
+	break;
+
+      CASE_CONST_UNIQUE:
+	ai = absint_t (x, x, n_bytes);
+	break;
+
+      case ASHIFT:
+      case ASHIFTRT:
+      case LSHIFTRT:
+      case ROTATE:
+      case ROTATERT:
+	if ((CONST_INT_P (xop1) && INTVAL (xop1) >= 8)
+	    // DImode shift offsets for transparent calls are shipped in R16.
+	    || n_bytes == 8)
+	  ai = explore_shift (x, memo);
+	break;
+
+      case AND:
+      case IOR:
+      case XOR:
+	{
+	  const absint_t ai0 = absint_t::explore (xop0, memo, mode);
+	  const absint_t ai1 = absint_t::explore (xop1, memo, mode);
+	  for (int i = 0; i < n_bytes; ++i)
+	    ai[i] = absint_byte_t (code, ai0[i], ai1[i]);
+	}
+	break;
+
+      case NOT:
+	{
+	  const absint_t ai0 = absint_t::explore (xop0, memo);
+	  for (int i = 0; i < n_bytes; ++i)
+	    ai[i] = absint_byte_t (NOT, ai0[i]);
+	}
+	break;
+
+      case ZERO_EXTEND:
+      case SIGN_EXTEND:
+	{
+	  const absint_t ai0 = absint_t::explore (xop0, memo);
+	  const int ai0_size = GET_MODE_SIZE (GET_MODE (xop0));
+	  const absint_byte_t b_signs = ai0[ai0_size - 1].get_signs (code);
+	  for (int i = 0; i < n_bytes; ++i)
+	    ai[i] = i < ai0_size ? ai0[i] : b_signs;
+	}
+	break;
+
+      case PLUS:
+      case MINUS:
+	if (SCALAR_INT_MODE_P (mode)
+	    || ALL_SCALAR_FIXED_POINT_MODE_P (mode))
+	  {
+	    const absint_t ai0 = absint_t::explore (xop0, memo, mode);
+	    const absint_t ai1 = absint_t::explore (xop1, memo, mode);
+	    if (code == MINUS)
+	      for (int i = 0; i < n_bytes && ai1[i].val8 (false) == 0; ++i)
+		ai[i] = ai0[i];
+
+	    if (code == PLUS)
+	      for (int i = 0; i < n_bytes; ++i)
+		{
+		  if (ai0[i].val8 (false) == 0)
+		    ai[i] = ai1[i];
+		  else if (ai1[i].val8 (false) == 0)
+		    ai[i] = ai0[i];
+		  else
+		    {
+		      ai[i] = absint_byte_t (code, ai0[i], ai1[i]);
+		      break;
+		    }
+		}
+
+	    if (code == PLUS
+		&& GET_CODE (xop0) == ZERO_EXTEND
+		&& CONST_INT_P (xop1))
+	      {
+		rtx exop = XEXP (xop0, 0);
+		int exsize = GET_MODE_SIZE (GET_MODE (exop));
+		rtx lo_xop1 = avr_chunk (GET_MODE (exop), xop1, 0);
+		if (lo_xop1 == const0_rtx)
+		  for (int i = exsize; i < n_bytes; ++i)
+		    ai[i] = ai1[i];
+	      }
+	  }
+	break; // PLUS, MINUS
+
+      case MULT:
+	if (GET_MODE (xop0) == mode
+	    && SCALAR_INT_MODE_P (mode))
+	  {
+	    // The constant may be located in xop0's zero_extend...
+	    const absint_t ai0 = absint_t::explore (xop0, memo, mode);
+	    const absint_t ai1 = absint_t::explore (xop1, memo, mode);
+	    const int end0 = ai0.end_knows (CONST_INT);
+	    const int end1 = ai1.end_knows (CONST_INT);
+	    const uint64_t mul0 = end0 > 0 ? ai0.get_value (end0) : 1;
+	    const uint64_t mul1 = end1 > 0 ? ai1.get_value (end1) : 1;
+	    // Shifting in off/8 zero bytes from the right.
+	    const int off = mul0 * mul1 != 0 ? ctz_hwi (mul0 * mul1) : 0;
+	    for (int i = 0; i < off / 8; ++i)
+	      ai[i].learn_val8 (0);
+	  }
+	break; // MULT
+
+      case BSWAP:
+	if (GET_MODE (xop0) == mode)
+	  {
+	    const absint_t ai0 = absint_t::explore (xop0, memo);
+	    for (int i = 0; i < n_bytes; ++i)
+	      ai[i] = ai0[n_bytes - 1 - i];
+	  }
+	break;
+      } // switch code
+
+    if (worth_dumping)
+      {
+	avr_dump (";; AI.explore %C:%m ", code, mode);
+	ai.dump ();
+      }
+
+    for (int i = 0; i < n_bytes; ++i)
+      gcc_assert (ai[i].check ());
+
+    return ai;
+  }
+
+  // Helper for the method above.
+  static absint_t explore_shift (rtx x, const memento_t &memo)
+  {
+    absint_t ai (x);
+
+    const rtx_code code = GET_CODE (x);
+    const machine_mode mode = GET_MODE (x);
+    const int n_bytes = GET_MODE_SIZE (mode);
+
+    if (! BINARY_P (x))
+      return ai;
+
+    rtx xop0 = XEXP (x, 0);
+    rtx xop1 = XEXP (x, 1);
+
+    // Look at shift offsets of DImode more closely;
+    // they are in R16 for __lshrdi3 etc.  Patch xop1 on success.
+    if (n_bytes == 8
+	&& ! CONST_INT_P (xop1)
+	&& GET_MODE (xop0) == mode)
+      {
+	const int n_off = GET_MODE_SIZE (GET_MODE (xop1));
+	const absint_t aoff = absint_t::explore (xop1, memo);
+	xop1 = aoff.get_value_as_const_int (n_off);
+      }
+
+    if (! xop1
+	|| GET_MODE (xop0) != mode
+	|| ! IN_RANGE (n_bytes, 1, FUSE_MOVE_MAX_MODESIZE)
+	|| ! CONST_INT_P (xop1)
+	|| ! IN_RANGE (INTVAL (xop1), 8, 8 * n_bytes - 1))
+      return ai;
+
+    const int off = INTVAL (xop1);
+    const absint_t ai0 = absint_t::explore (xop0, memo);
+
+    switch (GET_CODE (x))
+      {
+      default:
+	break;
+
+      case ASHIFT:
+	// Shifting in 0x00's from the right.
+	for (int i = 0; i < off / 8; ++i)
+	  ai[i].learn_val8 (0);
+	break;
+
+      case LSHIFTRT:
+      case ASHIFTRT:
+	{
+	  // Shifting in 0x00's or signs from the left.
+	  absint_byte_t b_signs = ai0[n_bytes - 1].get_signs (GET_CODE (x));
+	  for (int i = n_bytes - off / 8; i < n_bytes; ++i)
+	    ai[i] = b_signs;
+	  if (off == 8 * n_bytes - 1)
+	    if (code == ASHIFTRT)
+	      ai[0] = b_signs;
+	}
+	break;
+      }
+
+    if (off % 8 != 0
+	|| ai0.popcount () == 0)
+      return ai;
+
+    // For shift offsets that are a multiple of 8, record the
+    // action on the constituent bytes.
+
+    // Bytes are moving left by this offset (or zero for "none").
+    const int boffL = select<int>()
+      : code == ROTATE || code == ASHIFT ? off / 8
+      : code == ROTATERT ? n_bytes - off / 8
+      : 0;
+
+    // Bytes are moving right by this offset (or zero for "none").
+    const int boffR = select<int>()
+      : code == ROTATERT || code == ASHIFTRT || code == LSHIFTRT ? off / 8
+      : code == ROTATE ? n_bytes - off / 8
+      : 0;
+
+    if (dump_flags & TDF_FOLDING)
+      {
+	avr_dump (";; AI.explore_shift %C:%m ", code, mode);
+	if (boffL)
+	  avr_dump ("<< %d%s", 8 * boffL, boffL && boffR ? ", " : "");
+	if (boffR)
+	  avr_dump (">> %d", 8 * boffR);
+	avr_dump ("\n");
+      }
+
+    if (boffL)
+      for (int i = 0; i < n_bytes - boffL; ++i)
+	ai[i + boffL] = ai0[i];
+
+    if (boffR)
+      for (int i = boffR; i < n_bytes; ++i)
+	ai[i - boffR] = ai0[i];
+
+    return ai;
+  }
+
+  void dump (const char *msg = nullptr, FILE *f = dump_file) const
+  {
+    if (f)
+      dump (NULL_RTX, msg, f);
+  }
+
+  void dump (rtx dest, const char *msg = nullptr, FILE *f = dump_file) const
+  {
+    if (f)
+      {
+	int regno = dest && REG_P (dest) ? REGNO (dest) : 0;
+
+	msg = msg && msg[0] ? msg : "AI=[%s]\n";
+	const char *const xs = strstr (msg, "%s");
+	gcc_assert (xs);
+
+	fprintf (f, "%.*s", (int) (xs - msg), msg);
+	for (int i = max_knows (); i >= 0; --i)
+	  {
+	    const int sub_regno = eq[i].regno (false /*nonstrict*/);
+	    const bool nop = regno &&  sub_regno == regno + i;
+	    eq[i].dump (nop ? "%s=nop" : "%s", f);
+	    fprintf (f, "%s", i ? "; " : "");
+	  }
+	fprintf (f, "%s", xs + strlen ("%s"));
+      }
+  }
+}; // absint_t
+
+
+// Information for a REG = non-memory single_set.
+
+struct insninfo_t
+{
+  // This is an insn that sets the m_size bytes of m_regno to either
+  // - A compile time constant m_isrc (m_code = CONST_INT), or
+  // - The contents of register number m_rsrc (m_code = REG).
+  int m_size = 0;
+  int m_regno;
+  int m_rsrc;
+  rtx_code m_code;
+  uint64_t m_isrc;
+  rtx_insn *m_insn = nullptr;
+  rtx m_set = NULL_RTX;
+  rtx m_src = NULL_RTX;
+  int m_scratch = 0; // 0 or the register number of a QImode scratch.
+  rtx_code m_old_code = UNKNOWN;
+
+  // Knowledge about the bytes of the SET_SRC:  A byte may have a known
+  // value, may be known to equal some register (e.g. with BSWAP),
+  // or both, or may be unknown.
+  absint_t m_ai;
+
+  // May be set for binary operations.
+  absint_byte_t m_new_src;
+
+  bool init1 (insn_optimize_data_t &, int max_size, const char *purpose);
+
+  // Upper bound for the cost (in words) of a move<mode> insn that
+  // performs a REG = CONST_XXX = .m_isrc move of modesize .m_size.
+  int cost () const;
+  bool combine (const insninfo_t &prev, const insninfo_t &curr);
+  int emit_insn () const;
+
+  bool needs_scratch () const
+  {
+    gcc_assert (m_code == CONST_INT);
+
+    for (int i = 0; i < m_size; ++i)
+      if (AVRasm::ldi_needs_scratch (m_regno, m_isrc >> (8 * i)))
+	return true;
+
+    return false;
+  }
+
+  int hamming (const memento_t &memo) const
+  {
+    gcc_assert (m_code == CONST_INT);
+
+    int h = 0;
+    for (int i = 0; i < m_size; ++i)
+      h += ! memo.have_value (m_regno + i, 1, 0xff & (m_isrc >> (8 * i)));
+
+    return h;
+  }
+
+  // Upper bound for the number of ply_t's of a solution, given Hamming
+  // distance of HAMM (-1 for unknown).
+  int n_best_plys (int hamm = -1) const
+  {
+    gcc_assert (m_code == CONST_INT);
+
+    if (m_size == 8)
+      return (hamm >= 0 ? hamm : m_size);
+    else if (hamm <= 4)
+      return (hamm >= 0 ? hamm : m_size)
+	// The following terms is the max number of MOVWs with a
+	// Hamming difference of less than 2.
+	+ (AVR_HAVE_MOVW && m_regno < REG_14) * m_size / 2
+	+ (AVR_HAVE_MOVW && m_regno == REG_14) * std::max (0, m_size - 2)
+	- (AVR_HAVE_MOVW && hamm == 4 && (uint32_t) m_isrc % 0x10001 == 0);
+    else
+      gcc_unreachable ();
+  }
+}; // insninfo_t
+
+
+struct insn_optimize_data_t
+{
+  // Known values held in GPRs prior to the action of .insn / .ii,
+  memento_t &regs;
+  rtx_insn *insn;
+  insninfo_t ii;
+  bool unused;
+
+  insn_optimize_data_t () = delete;
+
+  insn_optimize_data_t (memento_t &memo)
+    : regs(memo)
+  {}
+}; // insn_optimize_data_t
+
+struct optimize_data_t
+{
+  insn_optimize_data_t prev;
+  insn_optimize_data_t curr;
+
+  // Number >= 0 of new insns that replace the curr insn and maybe also the
+  // prev insn.  -1 when no replacement has been found.
+  int n_new_insns = -1;
+
+  // .prev will be removed provided we have (potentially zero) new insns.
+  bool delete_prev_p = false;
+
+  // Ignore these GPRs when comparing the simulation results of
+  // old and new insn sequences.  Usually some scratch reg(s).
+  gprmask_t ignore_mask = 0;
+
+  optimize_data_t () = delete;
+
+  optimize_data_t (memento_t &prev_regs, memento_t &curr_regs)
+    : prev(prev_regs), curr(curr_regs)
+  {}
+
+  bool try_fuse (bbinfo_t *);
+  bool try_mem0 (bbinfo_t *);
+  bool try_bin_arg1 (bbinfo_t *);
+  bool try_simplify (bbinfo_t *);
+  bool try_split_ldi (bbinfo_t *);
+  bool try_split_any (bbinfo_t *);
+  bool fail (const char *reason);
+  bool emit_signs (int r_sign, gprmask_t);
+  void emit_move_mask (int dest, int src, int n_bytes, gprmask_t &);
+  rtx_insn *emit_sequence (basic_block, rtx_insn *);
+  bool get_2ary_operands (rtx_code &, const absint_byte_t &,
+			  insn_optimize_data_t &, int r_dest,
+			  absint_val_t &, absint_val_t &, int &ex_cost);
+  rtx_insn *emit_and_apply_move (memento_t &, rtx dest, rtx src);
+
+  // M2 is the state of GPRs as the sequence starts; M1 is the state one before.
+  static void apply_sequence (const std::vector<rtx_insn *> &insns,
+			      memento_t &m1, memento_t &m2)
+  {
+    gcc_assert (insns.size () >= 1);
+
+    for (auto &i : insns)
+      {
+	m1 = m2;
+	m2.apply_insn (i, false);
+      }
+  }
+}; // optimize_data_t
+
+
+// Emit INSNS before .curr.insn, replacing .curr.insn and also .prev.insn when
+// .delete_prev_p is on.  Adjusts .curr.regs and .prev.regs accordingly.
+rtx_insn *
+optimize_data_t::emit_sequence (basic_block bb, rtx_insn *insns)
+{
+  gcc_assert (n_new_insns >= 0);
+
+  // The old insns will be replaced by and simulated...
+  const std::vector<rtx_insn *> old_insns = delete_prev_p
+    ? std::vector<rtx_insn *> { prev.insn, curr.insn }
+    : std::vector<rtx_insn *> { curr.insn };
+
+  // ...against the new insns.
+  std::vector<rtx_insn *> new_insns;
+  for (rtx_insn *i = insns; i; i = NEXT_INSN (i))
+    new_insns.push_back (i);
+
+  rtx_insn *new_curr_insn;
+
+  memento_t &m1 = prev.regs;
+  memento_t &m2 = curr.regs;
+
+  if (new_insns.empty ())
+    {
+      if (delete_prev_p)
+	{
+	  m2 = m1;
+	  m1.known = 0;
+	  new_curr_insn = prev_nondebug_insn_bb (bb, prev.insn);
+	}
+      else
+	new_curr_insn = prev.insn;
+    }
+  else
+    {
+      // We are going to emit at least one new insn.  Simulate the effect of
+      // the new sequence and compare it against the effect of the old one.
+      // Both effects must be the same (modulo scratch regs).
+
+      memento_t n1 = m1;
+      memento_t n2 = m2;
+
+      if (delete_prev_p)
+	{
+	  m2 = m1, m1.known = 0;
+	  n2 = n1, n1.known = 0;
+	}
+
+      avr_dump (";; Applying new route...\n");
+      optimize_data_t::apply_sequence (new_insns, n1, n2);
+
+      avr_dump (";; Applying old route...\n");
+      optimize_data_t::apply_sequence (old_insns, m1, m2);
+      avr_dump ("\n");
+
+      if (! m2.equals (n2, ignore_mask))
+	{
+	  // When we come here, then
+	  // - We have a genuine bug, and/or
+	  // - We did produce insns that are opaque to absint_t's explore().
+	  avr_dump ("INCOMPLETE APPLICATION:\n");
+	  m2.dump ("regs old route=%s\n\n");
+	  n2.dump ("regs new route=%s\n\n");
+	  avr_dump ("The new insns are:\n%L", insns);
+
+	  fatal_insn ("incomplete application of insn", insns);
+	}
+
+      // Use N1 and N2 as the new GPR states.  Even though they are equal
+      // modulo ignore_mask, N2 may know more about GPRs when it doesn't
+      // clobber the scratch reg.
+      m1 = n1;
+      m2 = n2;
+
+      emit_insn_before (insns, curr.insn);
+
+      new_curr_insn = new_insns.back ();
+    }
+
+  if (delete_prev_p)
+    SET_INSN_DELETED (prev.insn);
+
+  SET_INSN_DELETED (curr.insn);
+
+  return new_curr_insn;
+}
+
+
+const pass_data avr_pass_data_fuse_move =
+{
+  RTL_PASS,	 // type
+  "",		 // name (will be patched)
+  OPTGROUP_NONE, // optinfo_flags
+  TV_MACH_DEP,	 // tv_id
+  0,		 // properties_required
+  0,		 // properties_provided
+  0,		 // properties_destroyed
+  0,		 // todo_flags_start
+  TODO_df_finish | TODO_df_verify // todo_flags_finish
+};
+
+
+class avr_pass_fuse_move : public rtl_opt_pass
+{
+public:
+  avr_pass_fuse_move (gcc::context *ctxt, const char *name)
+    : rtl_opt_pass (avr_pass_data_fuse_move, ctxt)
+  {
+    this->name = name;
+  }
+
+  unsigned int execute (function *func) final override
+  {
+    if (optimize > 0 && avropt_fuse_move > 0)
+      {
+	df_note_add_problem ();
+	df_analyze ();
+
+	bbinfo_t::optimize_one_function (func);
+      }
+
+    return 0;
+  }
+}; // avr_pass_fuse_move
+
+
+// Append PLY to .plies[].  A SET or BLD ply may start a new sequence of
+// SETs or BLDs and gets assigned the overhead of the sequence like for an
+// initial SET or CLT instruction.  A SET ply my be added in two flavours:
+// One that starts a sequence of single_sets, and one that represents the
+// payload of a set_some insn.  MEMO is the GPR state prior to PLY.
+void
+plies_t::add (ply_t ply, const ply_t *prev, const memento_t &memo,
+	      bool maybe_set_some)
+{
+  if (ply.code == SET)
+    {
+      if (prev && prev->code == SET)
+	{
+	  // Proceed with the SET sequence flavour.
+	  ply.in_set_some = prev->in_set_some;
+
+	  if (ply.in_set_some)
+	    ply.scratch = 0;
+	  else if (! ply.scratch && ply.needs_scratch ())
+	    ply.cost += 2;
+	}
+      else
+	{
+	  // The 1st SET in a sequence.  May use set_some to set
+	  // all bytes in one insn, or a bunch of single_sets.
+
+	  // Route1: Bunch of single_sets.
+	  const int ply_cost = ply.cost;
+	  if (! ply.scratch && ply.needs_scratch ())
+	    ply.cost += 2;
+	  ply.in_set_some = false;
+
+	  add (ply);
+
+	  if (maybe_set_some)
+	    {
+	      // Route 2: One set_some: The 1st SET gets all the overhead.
+	      ply.scratch = 0;
+	      ply.cost = ply_cost + 1 + ! memo.known_dregno ();
+	      ply.in_set_some = true;
+	    }
+	}
+    } // SET
+  else if (ply.is_bld ())
+    {
+      // The first BLD in a series of BLDs gets the extra costs
+      // for the SET / CLT that precedes the BLDs.
+      ply.cost += ! ply.is_same_bld (prev);
+    }
+
+  add (ply);
+}
+
+
+// Emit insns for .plies[] and return the number of emitted insns.
+// The emitted insns represent the effect of II with MEMO, which
+// is the GPR knowledge before II is executed.
+int
+plies_t::emit_insns (const insninfo_t &ii, const memento_t &memo) const
+{
+  int n_insns = 0;
+
+  for (int i = 0; i < n_plies; ++i)
+    {
+      const ply_t &p = plies[i];
+
+      // SETs and BLDs are dumped by their emit_xxxs().
+      if (p.code != SET && ! p.is_bld ())
+	p.dump ();
+
+      rtx src1 = NULL_RTX;
+      rtx src2 = NULL_RTX;
+      rtx dest = NULL_RTX;
+      rtx xscratch = NULL_RTX;
+      rtx_code code = p.code;
+
+      switch (p.code)
+	{
+	default:
+	  avr_dump ("\n\n;; Bad ply_t:\n");
+	  p.dump (i + 1);
+	  gcc_unreachable ();
+	  break;
+
+	case REG: // *movhi = MOVW; movqi_insn = MOV
+	  dest = gen_rtx_REG (p.size == 1 ? QImode : HImode, p.regno);
+	  src1 = gen_rtx_REG (p.size == 1 ? QImode : HImode, p.arg);
+	  break;
+
+	case SET: // movqi_insn = LDI, CLR; set_some = (LDI + MOV) ** size.
+	  i += emit_sets (ii, n_insns, memo, i) - 1;
+	  continue;
+
+	case MOD: // *ior<mode>3, *and<mode>3 = SET + BLD... / CLT + BLD...
+	  i += emit_blds (ii, n_insns, i) - 1;
+	  continue;
+
+	case MINUS: // *subqi3 = SUB
+	case PLUS:  // *addqi3 = ADD
+	case AND: // *andqi3 = AND
+	case IOR: // *iorqi3 = OR
+	case XOR: // *xorqi3 = EOR
+	  dest = gen_rtx_REG (QImode, p.regno);
+	  src2 = gen_rtx_REG (QImode, p.arg);
+	  break;
+
+	case PRE_INC: // *addqi3 = INC
+	case PRE_DEC: // *addqi3 = DEC
+	  code = PLUS;
+	  dest = gen_rtx_REG (QImode, p.regno);
+	  src2 = p.code == PRE_INC ? const1_rtx : constm1_rtx;
+	  break;
+
+	case NEG: // *negqi2 = NEG
+	case NOT: // *one_cmplqi2 = COM
+	  dest = gen_rtx_REG (QImode, p.regno);
+	  src1 = dest;
+	  break;
+
+	case ROTATE:   // *rotlqi3 = SWAP
+	case ASHIFT:   // *ashlqi3 = LSL
+	case ASHIFTRT: // *ashrqi3 = ASR
+	case LSHIFTRT: // *lshrqi3 = LSR
+	  dest = gen_rtx_REG (QImode, p.regno);
+	  src2 = GEN_INT (code == ROTATE ? 4 : 1);
+	  break;
+
+	case SS_PLUS: // *addhi3 = ADIW, SBIW
+	  code = PLUS;
+	  dest = gen_rtx_REG (HImode, p.regno);
+	  src2 = gen_int_mode (p.arg, HImode);
+	  break;
+	} // switch p.code
+
+      gcc_assert (dest && (! src1) + (! src2) == 1);
+
+      rtx src = code == REG || code == SET
+	? src1
+	: (src2
+	   ? gen_rtx_fmt_ee (code, GET_MODE (dest), dest, src2)
+	   : gen_rtx_fmt_e (code, GET_MODE (dest), src1));
+
+      emit_valid_move_clobbercc (dest, src, xscratch);
+      n_insns += 1;
+    }
+
+  return n_insns;
+}
+
+
+// Helper for .emit_insns().  Emit an ior<mode>3 or and<mode>3 insn
+// that's equivalent to a sequence of contiguous BLDs starting at
+// .plies[ISTART].  Updates N_INSNS according to the number of insns
+// emitted and returns the number of consumed plys in .plies[].
+int
+plies_t::emit_blds (const insninfo_t &ii, int &n_insns, int istart) const
+{
+  const ply_t &first = plies[istart];
+
+  gcc_assert (ii.m_size <= 4);
+  gcc_assert (first.is_bld ());
+
+  const rtx_code code = first.is_setbld () ? IOR : AND;
+  const machine_mode mode = size_to_mode (ii.m_size);
+
+  // Determine mask and number of BLDs.
+
+  uint32_t mask = 0;
+  int n_blds = 0;
+
+  for (int i = istart; i < n_plies; ++i, ++n_blds)
+    {
+      const ply_t &p = plies[i];
+      if (! p.is_bld () || ! p.is_same_bld (& first))
+	break;
+
+      // For AND, work on the 1-complement of the mask,
+      // i.e. 1's specify which bits to clear.
+      uint8_t mask8 = code == IOR ? p.arg : ~p.arg;
+      mask |= mask8 << (8 * (p.regno - ii.m_regno));
+    }
+
+  mask = GET_MODE_MASK (mode) & (code == IOR ? mask : ~mask);
+
+  if (dump_file)
+    {
+      fprintf (dump_file, ";; emit_blds[%d...%d] R%d[%d]%s=%0*x\n",
+	       istart, istart + n_blds - 1, ii.m_regno, ii.m_size,
+	       code == IOR ? "|" : "&", 2 * ii.m_size, (int) mask);
+    }
+
+  for (int i = 0; i < n_blds; ++i)
+    plies[i + istart].dump ();
+
+  rtx dest = gen_rtx_REG (mode, ii.m_regno);
+  rtx src = gen_rtx_fmt_ee (code, mode, dest, gen_int_mode (mask, mode));
+  rtx xscratch = mode == QImode ? NULL_RTX : gen_rtx_SCRATCH (QImode);
+
+  emit_valid_move_clobbercc (dest, src, xscratch);
+  n_insns += 1;
+
+  return n_blds;
+}
+
+
+// Emit insns for a contiguous sequence of SET ply_t's starting at
+// .plies[ISTART].  Advances N_INSNS by the number of emitted insns.
+// MEMO ist the state of the GPRs before II is executed, where II
+// represents the insn under optimization.
+// The emitted insns are "movqi_insn" or "*reload_inqi"
+// when .plies[ISTART].in_set_some is not set, and one "set_some" insn
+// when .plies[ISTART].in_set_some is set.
+int
+plies_t::emit_sets (const insninfo_t &ii, int &n_insns, const memento_t &memo,
+		    int istart) const
+{
+  gcc_assert (plies[istart].code == SET);
+
+  const bool in_set_some = plies[istart].in_set_some;
+
+  // Some d-regno that holds a compile-time constant, or 0.
+  const int known_dregno = memo.known_dregno ();
+
+  // Determine number of contiguous SETs,
+  // and sort them in ps[] such that smaller regnos come first.
+
+  const ply_t *ps[FUSE_MOVE_MAX_MODESIZE];
+  int n_sets = 0;
+
+  for (int i = istart; i < n_plies && plies[i].code == SET; ++i)
+    ps[n_sets++] = & plies[i];
+
+  if (dump_file)
+    {
+      fprintf (dump_file, ";; emit_sets[%d...%d] R%d[%d]=%0*" PRIx64,
+	       istart, istart + n_sets - 1, ii.m_regno, ii.m_size,
+	       2 * ii.m_size, ii.m_isrc);
+      fprintf (dump_file, ", scratch=%s%d", "R" + ! ii.m_scratch, ii.m_scratch);
+      fprintf (dump_file, ", known_dreg=%s%d, set_some=%d\n",
+	       "R" + ! known_dregno, known_dregno, in_set_some);
+    }
+
+  for (int i = 0; i < n_sets; ++i)
+    ps[i]->dump ();
+
+  // Sort.  This is most useful on regs like (reg:SI REG_14).
+  for (int i = 0; i < n_sets - 1; ++i)
+    for (int j = i + 1; j < n_sets; ++j)
+      if (ps[i]->regno > ps[j]->regno)
+	std::swap (ps[i], ps[j]);
+
+  // Prepare operands.
+  rtx dst[FUSE_MOVE_MAX_MODESIZE];
+  rtx src[FUSE_MOVE_MAX_MODESIZE];
+  for (int i = 0; i < n_sets; ++i)
+    {
+      dst[i] = gen_rtx_REG (QImode, ps[i]->regno);
+      src[i] = gen_int_mode (ps[i]->arg, QImode);
+    }
+
+  if (in_set_some)
+    {
+      // Emit a "set_some" insn that sets all of the collected 8-bit SETs.
+      // This is a parallel with n_sets QImode SETs as payload.
+
+      gcc_assert (! known_dregno || memo.knows (known_dregno));
+
+      // A scratch reg...
+      rtx op1 = known_dregno
+	? gen_rtx_REG (QImode, known_dregno)
+	: const0_rtx;
+      // ...with a known content, so it can be restored without saving.
+      rtx op2 = known_dregno
+	? gen_int_mode (memo.values[known_dregno], QImode)
+	: const0_rtx;
+      // Target register envelope.
+      rtx op3 = GEN_INT (ii.m_regno);
+      rtx op4 = GEN_INT (ii.m_size);
+
+      // Payload.
+      for (int i = 0; i < n_sets; ++i)
+	dst[i] = gen_rtx_SET (dst[i], src[i]);
+
+      rtvec vec = gen_rtvec (5 + n_sets,
+			     gen_rtx_USE (VOIDmode, op1),
+			     gen_rtx_USE (VOIDmode, op2),
+			     gen_rtx_USE (VOIDmode, op3),
+			     gen_rtx_USE (VOIDmode, op4),
+			     gen_rtx_CLOBBER (VOIDmode, cc_reg_rtx),
+			     dst[0], dst[1], dst[2], dst[3]);
+      rtx pattern = gen_rtx_PARALLEL (VOIDmode, vec);
+
+      emit_valid_insn (pattern);
+      n_insns += 1;
+    }
+  else
+    {
+      // Emit a bunch of movqi_insn / *reload_inqi insns.
+
+      for (int i = 0; i < n_sets; ++i)
+	if (ii.m_scratch
+	    && AVRasm::constant_cost (SET, ps[i]->regno, ps[i]->arg) > 1)
+	  {
+	    rtx scratch = gen_rtx_REG (QImode, ii.m_scratch);
+	    bool use_reload_inqi = true;
+	    if (use_reload_inqi)
+	      {
+		emit_valid_move_clobbercc (dst[i], src[i], scratch);
+		n_insns += 1;
+	      }
+	    else
+	      {
+		emit_valid_move_clobbercc (scratch, src[i]);
+		emit_valid_move_clobbercc (dst[i], scratch);
+		n_insns += 2;
+	      }
+	  }
+	else
+	  {
+	    emit_valid_move_clobbercc (dst[i], src[i]);
+	    n_insns += 1;
+	  }
+    }
+
+  return n_sets;
+}
+
+
+// Try to find an operation such that  Y = op (X).
+// Shifts and rotates are regarded as unary operaions with
+// an implied 2nd operand or 1 or 4, respectively.
+static rtx_code
+find_arith (uint8_t y, uint8_t x)
+{
+#define RETIF(ex, code) y == (0xff & (ex)) ? code
+  return select<rtx_code>()
+    : RETIF (x + 1, PRE_INC)
+    : RETIF (x - 1, PRE_DEC)
+    : RETIF ((x << 4) | (x >> 4), ROTATE)
+    : RETIF (-x, NEG)
+    : RETIF (~x, NOT)
+    : RETIF (x >> 1, LSHIFTRT)
+    : RETIF (x << 1, ASHIFT)
+    : RETIF ((x >> 1) | (x & 0x80), ASHIFTRT)
+    : UNKNOWN;
+#undef RETIF
+}
+
+
+// Try to find an operation such that  Z = X op X.
+static rtx_code
+find_arith2 (uint8_t z, uint8_t x, uint8_t y)
+{
+#define RETIF(ex, code) z == (0xff & (ex)) ? code
+  return select<rtx_code>()
+    : RETIF (x + y, PLUS)
+    : RETIF (x - y, MINUS)
+    : RETIF (x & y, AND)
+    : RETIF (x | y, IOR)
+    : RETIF (x ^ y, XOR)
+    : UNKNOWN;
+#undef RETIF
+}
+
+
+// Add plies to .plies[] that represent a MOVW, but only ones that reduce
+// the Hamming distance from REGNO[SIZE] to VAL by exactly DHAMM.
+void
+plies_t::add_plies_movw (int regno, int size, uint64_t val,
+			 int dhamm, const memento_t &memo)
+{
+  if (! AVR_HAVE_MOVW || size < 2)
+    return;
+
+  for (int i = 0; i < size - 1; i += 2)
+    {
+      // MOVW that sets less than 2 regs to the target value is
+      // not needed for the upper regs.
+      if (dhamm != 2 && regno + i >= REG_16)
+	continue;
+
+      const uint16_t val16 = val >> (8 * i);
+      const uint8_t lo8 = val16;
+      const uint8_t hi8 = val16 >> 8;
+
+      // When one of the target bytes is already as expected, then
+      // no MOVW is needed for an optimal sequence.
+      if (memo.have_value (regno + i, 1, lo8)
+	  || memo.have_value (regno + i + 1, 1, hi8))
+	continue;
+
+      const int h_old = memo.hamming (regno + i, 2, val16);
+
+      // Record MOVWs that reduce the Hamming distance by DHAMM as requested.
+      for (int j = FIRST_GPR; j < REG_32; j += 2)
+	if (j != regno + i
+	    && memo.knows (j, 2))
+	  {
+	    const int h_new = memo.hamming (j, 2, val16);
+	    if (h_new == h_old - dhamm)
+	      add (ply_t { regno + i, 2, REG, j, 1, dhamm });
+	  }
+    }
+}
+
+
+// Set PS to plys that reduce the Hamming distance from II.m_regno to
+// compile-time constant II.m_isrc by 2, 1 or 0.  PREV is NULL or points
+// to a previous ply_t.  MEMO is the GPR state after PREV and prior to the
+// added plys.
+void
+bbinfo_t::get_plies (plies_t &ps, const insninfo_t &ii, const memento_t &memo,
+		     const ply_t *prev)
+{
+  ps.reset ();
+
+  fpd->n_get_plies += 1;
+
+  const bool maybe_set_some = (bbinfo_t::use_set_some_p && ii.needs_scratch ());
+
+  // Start with cheap plies, then continue to more expensive ones.
+  const int regno = ii.m_regno;
+  const int size = ii.m_size;
+  const uint64_t val = ii.m_isrc;
+
+  // Find MOVW with a Hamming delta of 2.
+  ps.add_plies_movw (regno, size, val, 2, memo);
+
+  // Find ADIW / SBIW
+  if (AVR_HAVE_ADIW && size >= 2)
+    for (int i = 0; i < size - 1; i += 2)
+      if (regno + i >= REG_24
+	  && memo.knows (regno + i, 2))
+	{
+	  const int16_t value16 = memo[regno + i] + 256 * memo[regno + i + 1];
+	  const int16_t lo16 = val >> (8 * i);
+	  const int16_t delta = lo16 - value16;
+	  const uint8_t lo8 = val >> (8 * i);
+	  const uint8_t hi8 = val >> (8 * i + 8);
+	  if (IN_RANGE (delta, -63, 63)
+	      && lo8 != memo[regno + i]
+	      && hi8 != memo[regno + i + 1])
+	    {
+	      ps.add (ply_t { regno + i, 2, SS_PLUS, delta, 1, 2 });
+	    }
+	}
+
+  // Find 1-reg plies.  In an optimal sequence, each 1-reg ply will decrease
+  // the Hamming distance.  Thus we only have to consider plies that set
+  // one of the target bytes to the target value VAL.  Start with the
+  // high registers since that is the canonical order when two plies commute.
+
+  for (int i = size - 1; i >= 0; --i)
+    {
+      const uint8_t val8 = val >> (8 * i);
+
+      // Nothing to do for this byte when its value is already as desired.
+      if (memo.have_value (regno + i, 1, val8))
+	continue;
+
+      // LDI or CLR.
+      if (regno + i >= REG_16 || val8 == 0)
+	ps.add (ply_t { regno + i, 1, SET, val8, 1 }, prev, memo,
+		maybe_set_some);
+
+      // We only may need to MOV non-zero values since there is CLR,
+      // and only when there is no LDI.
+      if (val8 != 0
+	  && regno + i < REG_16)
+	{
+	  // MOV where the source register is one of the target regs.
+	  for (int j = 0; j < size; ++j)
+	    if (j != i)
+	      if (memo.have_value (regno + j, 1, val8))
+		ps.add (ply_t { regno + i, 1, REG, regno + j, 1 });
+
+	  // MOV where the source register is not a target reg.
+	  // FIXME: ticks.
+	  for (int j = FIRST_GPR; j < REG_32; ++j)
+	    if (! IN_RANGE (j, regno, regno + size - 1))
+	      if (memo.have_value (j, 1, val8))
+		ps.add (ply_t { regno + i, 1, REG, j, 1 });
+
+	  // LDI + MOV.
+	  if (regno + i < REG_16 && val8 != 0)
+	    {
+	      ply_t p { regno + i, 1, SET, val8, 2 };
+	      p.scratch = ii.m_scratch;
+	      ps.add (p, prev, memo, maybe_set_some);
+	    }
+	}
+    }
+
+  // Arithmetic like INC, DEC or ASHIFT.
+  for (int i = size - 1; i >= 0; --i)
+    if (bbinfo_t::use_arith_p
+	&& regno + i < REG_16
+	&& memo.knows (regno + i))
+      {
+	const uint8_t y = val >> (8 * i);
+	const uint8_t x = memo[regno + i];
+	rtx_code code;
+
+	if (y == 0 || y == x)
+	  continue;
+
+	// INC, DEC, SWAP, LSL, NEG, ...
+	if (UNKNOWN != (code = find_arith (y, x)))
+	  {
+	    ps.add (ply_t { regno + i, 1, code, x /* dummy */, 1 });
+	    continue;
+	  }
+
+	// ADD, AND, ...
+	for (int r = FIRST_GPR; r < REG_32; ++r)
+	  if (r != regno + i
+	      && memo.knows (r)
+	      && memo[r] != 0
+	      && UNKNOWN != (code = find_arith2 (y, x, memo[r])))
+	    {
+	      ps.add (ply_t { regno + i, 1, code, r, 1 });
+	    }
+
+	if (size < 2 || size > 4)
+	  continue;
+
+	// SET + BLD
+	if ((x & y) == x && popcount_hwi (x ^ y) == 1)
+	  ps.add (ply_t { regno + i, 1, MOD, x ^ y, 1 },
+		  prev, memo, maybe_set_some);
+
+	// CLT + BLD
+	if ((x & y) == y && popcount_hwi (x ^ y) == 1)
+	  ps.add (ply_t { regno + i, 1, MOD, x ^ y ^ 0xff, 1 },
+		  prev, memo, maybe_set_some);
+      }
+
+  if (bbinfo_t::use_arith_p
+      // For 8-byte values, don't use ply_t's with only a partial reduction
+      // of the hamming distance.
+      && size <= 4)
+    {
+      // Find MOVW with a Hamming delta of 1, then 0.
+      ps.add_plies_movw (regno, size, val, 1, memo);
+      ps.add_plies_movw (regno, size, val, 0, memo);
+    }
+
+  plies_t::max_n_plies = std::max (plies_t::max_n_plies, ps.n_plies);
+}
+
+
+// Try to combine two 8-bit insns PREV and CURR that (effectively)
+// are REG = CONST_INT to one 16-bit such insn.  Returns true on success.
+bool
+insninfo_t::combine (const insninfo_t &prev, const insninfo_t &curr)
+{
+  if (prev.m_size == 1 && curr.m_size == 1
+      && prev.m_regno == (1 ^ curr.m_regno)
+      && curr.m_code == CONST_INT
+      && prev.m_code == CONST_INT)
+    {
+      m_regno = curr.m_regno & ~1;
+      m_code = CONST_INT;
+      m_size = 2;
+      m_scratch = std::max (curr.m_scratch, prev.m_scratch);
+      m_isrc = m_regno == prev.m_regno
+	? (uint8_t) prev.m_isrc + 256 * (uint8_t) curr.m_isrc
+	: (uint8_t) curr.m_isrc + 256 * (uint8_t) prev.m_isrc;
+
+      return true;
+    }
+
+  return false;
+}
+
+
+// Return the cost (in terms of words) of the respective mov<mode> insn.
+// This can be used as an upper bound for the ply_t's cost.
+int
+insninfo_t::cost () const
+{
+  if (m_code != CONST_INT)
+    return m_size;
+
+  if (m_regno >= REG_16 || m_isrc == 0)
+    return m_size
+      // MOVW can save one instruction.
+      - (AVR_HAVE_MOVW && m_size == 4 && (uint32_t) m_isrc % 0x10001 == 0);
+
+  // LDI + MOV to a lower reg.
+  if (m_scratch && m_size == 1)
+    return 2;
+
+  if (m_size == 8)
+    {
+      int len = m_size;
+      for (int i = 0; i < m_size; ++i)
+	len += m_regno + i < REG_16 && (0xff & (m_isrc >> (8 * i))) != 0;
+      return len;
+    }
+
+  // All other cases are complicated.  Ask the output oracle.
+  const machine_mode mode = size_to_mode (m_size);
+  rtx xscratch = m_scratch ? all_regs_rtx[m_scratch] : NULL_RTX;
+  rtx xop[] = { gen_rtx_REG (mode, m_regno), gen_int_mode (m_isrc, mode) };
+  int len;
+  if (m_size == 4)
+    output_reload_insisf (xop, xscratch, &len);
+  else
+    output_reload_in_const (xop, xscratch, &len, false);
+
+  return len;
+}
+
+// Emit the according REG = REG-or-CONST_INT insn.  Returns 1 or aborts
+// when the insn is not of that form.
+int
+insninfo_t::emit_insn () const
+{
+  int n_insns = 0;
+
+  machine_mode mode = size_to_mode (m_size);
+  rtx xsrc = NULL_RTX;
+  rtx xscratch = NULL_RTX;
+
+  gcc_assert (m_size > 0);
+
+  switch (m_code)
+    {
+    default:
+      gcc_unreachable ();
+
+    case CONST_INT:
+      xsrc = gen_int_mode (m_isrc, mode);
+      if (m_scratch && m_regno < REG_16)
+	xscratch = gen_rtx_REG (QImode, m_scratch);
+      break;
+
+    case REG:
+      gcc_assert (gpr_regno_p (m_rsrc, m_size));
+      if (m_regno != m_rsrc)
+	xsrc = gen_rtx_REG (mode, m_rsrc);
+      break;
+    }
+
+  if (xsrc)
+    {
+      rtx dest = gen_rtx_REG (mode, m_regno);
+      emit_valid_move_clobbercc (dest, xsrc, xscratch);
+      n_insns += 1;
+    }
+
+  return n_insns;
+}
+
+
+// Entering a basic block means combining known register values from
+// all incoming BBs.
+void
+bbinfo_t::enter ()
+{
+  avr_dump ("\n;; Entering [bb %d]\n", bb->index);
+
+  gcc_assert (! done);
+
+  edge e;
+  edge_iterator ei;
+  gprmask_t pred_known_mask = ~0u;
+  bbinfo_t *bbi = nullptr;
+
+  // A quick iteration over all predecessors / incoming edges to reveal
+  // whether this BB is worth a closer look.
+  FOR_EACH_EDGE (e, ei, bb->preds)
+    {
+      basic_block pred = e->src;
+      bbi = & bb_info[pred->index];
+
+      pred_known_mask &= bbi->regs.known;
+
+      if (dump_file)
+	{
+	  avr_dump (";; [bb %d] <- [bb %d] ", e->dest->index, e->src->index);
+	  if (bbi->done)
+	    bbi->regs.dump ();
+	  else
+	    avr_dump (" (unknown)\n");
+	}
+    }
+
+  // Only if all predecessors have already been handled, we can
+  // have known values as we are entering the current BB.
+  if (pred_known_mask != 0
+      && bbi != nullptr)
+    {
+      // Initialize current BB info from BI, an arbitrary predecessor.
+
+      regs = bbi->regs;
+
+      // Coalesce the output values from all predecessing BBs.  At the
+      // start of the current BB, a value is only known if it is known
+      // in *all* predecessors and *all* these values are the same.
+      FOR_EACH_EDGE (e, ei, bb->preds)
+	{
+	  regs.coalesce (bb_info[e->src->index].regs);
+	}
+    }
+
+  if (dump_file)
+    {
+      avr_dump (";; [bb %d] known at start: ", bb->index);
+      if (regs.known)
+	regs.dump ();
+      else
+	avr_dump (" (none)\n");
+      avr_dump ("\n");
+    }
+}
+
+
+void
+bbinfo_t::leave ()
+{
+  done = true;
+
+  if (dump_file)
+    fprintf (dump_file, ";; Leaving [bb %d]\n\n", bb->index);
+}
+
+
+/* Initialize according to INSN which is a 1-byte single_set that's
+   (effectively) a reg = reg or reg = const move.  INSN may be the result
+   of the current pass's optimization, e.g. something like INC R2 where R2
+   has a known content.  MEMO is the state prior to INSN.  Only CONST
+   cases are recorded; plus cases that are non-trivial for example when
+   an XOR decays to a move.  */
+
+bool
+insninfo_t::init1 (insn_optimize_data_t &iod, int max_size,
+		   const char *purpose = "")
+{
+  m_size = 0;
+  m_insn = iod.insn;
+  m_old_code = UNKNOWN;
+  iod.unused = false;
+
+  if (! iod.insn
+      || ! (m_set = single_set_with_scratch (iod.insn, m_scratch)))
+    return false;
+
+  rtx dest = SET_DEST (m_set);
+  machine_mode mode = GET_MODE (dest);
+  const int n_bytes = GET_MODE_SIZE (mode);
+  max_size = std::min (max_size, FUSE_MOVE_MAX_MODESIZE);
+
+  if (! REG_P (dest)
+      || END_REGNO (dest) > REG_32
+      || n_bytes > max_size)
+    return false;
+
+  // Omit insns that (explicitly) touch fixed GPRs in any way.
+  using elt0_getter_HRS = elt0_getter<HARD_REG_SET, HARD_REG_ELT_TYPE>;
+  HARD_REG_SET hregs;
+  CLEAR_HARD_REG_SET (hregs);
+  find_all_hard_regs (PATTERN (iod.insn), & hregs);
+  if (memento_t::fixed_regs_mask & (gprmask_t) elt0_getter_HRS::get (hregs))
+    {
+      avr_dump (";; %sinit1 has fixed GPRs\n", purpose);
+      return false;
+    }
+
+  if ((iod.unused = find_reg_note (iod.insn, REG_UNUSED, dest)))
+    return false;
+
+  m_src = SET_SRC (m_set);
+  m_regno = REGNO (dest);
+  const rtx_code src_code = GET_CODE (m_src);
+
+  m_ai = absint_t::explore (m_src, iod.regs, mode);
+
+  if (m_ai.popcount ())
+    {
+      if (m_ai.end_knows (CONST_INT) >= n_bytes)
+	{
+	  m_code = CONST_INT;
+	  m_old_code = CONSTANT_P (m_src) ? UNKNOWN : src_code;
+	  m_isrc = m_ai.get_value (n_bytes);
+	  m_size = n_bytes;
+	}
+      else if (! REG_P (m_src)
+	       && n_bytes == 1
+	       && m_ai.end_knows (REG) >= n_bytes)
+	{
+	  m_code = REG;
+	  m_old_code = src_code;
+	  m_rsrc = m_ai[0].regno ();
+	  m_size = n_bytes;
+	}
+      else if (n_bytes == 1)
+	{
+	  absint_byte_t &aib = m_new_src;
+	  aib = m_ai[0].find_alternative_binary (iod.regs);
+
+	  if (aib.arity () == 2
+	      && aib.arg (0).regno == m_regno)
+	    {
+	      m_old_code = src_code;
+	      m_code = aib.get_code ();
+	      m_size = n_bytes;
+	    }
+	}
+      else if (n_bytes >= 2
+	       && m_ai.end_knows (VALUE) >= n_bytes)
+	{
+	  m_code = src_code;
+	  m_size = n_bytes;
+	}
+
+      if (dump_file && m_size != 0)
+	{
+	  avr_dump (";; %sinit1 (%C", purpose,
+		    m_old_code ? m_old_code : m_code);
+	  if (m_old_code)
+	    avr_dump ("-> %C", m_code);
+	  avr_dump (") insn %d to R%d[%d] := %C:%m = ", INSN_UID (iod.insn),
+		    m_regno, n_bytes, src_code, mode);
+
+	  m_ai.dump (dest);
+
+	  if (dump_flags & TDF_FOLDING)
+	    avr_dump ("\n");
+	}
+    }
+
+  return m_size != 0;
+}
+
+
+// The private worker for .apply_insn().
+void
+memento_t::apply_insn1 (rtx_insn *insn, bool unused)
+{
+  gcc_assert (NONDEBUG_INSN_P (insn));
+
+  if (INSN_CODE (insn) == CODE_FOR_set_some)
+    {
+      // This insn only sets some selected bytes of register $3 of
+      // modesize $4.  If non-0, then $1 is a QImode scratch d-reg with
+      // a known value of $2.
+
+      const auto &xop = recog_data.operand;
+      extract_insn (insn);
+      gcc_assert (recog_data.n_operands == 7);
+      gcc_assert (set_some_operation (xop[0], VOIDmode));
+
+      const rtx &xscratch = xop[1];
+      const rtx &xscratch_value = xop[2];
+      const int sets_start = 5;
+
+      for (int i = sets_start; i < XVECLEN (xop[0], 0); ++i)
+	{
+	  rtx xset = XVECEXP (xop[0], 0, i);
+	  avr_dump (";; set_some %r = %r\n", XEXP (xset, 0), XEXP (xset, 1));
+	  set_values (XEXP (xset, 0), XEXP (xset, 1));
+	}
+
+      if (REG_P (xscratch))
+	{
+	  avr_dump (";; set_some %r = %r restore\n", xscratch, xscratch_value);
+	  set_values (xscratch, xscratch_value);
+	}
+
+      return;
+    } // CODE_FOR_set_some
+
+  memento_t mold = *this;
+
+  // When insn changes a register in whatever way, set it to "unknown".
+
+  HARD_REG_SET rset;
+  find_all_hard_reg_sets (insn, &rset, true /* implicit */);
+  (*this) &= ~rset;
+
+  rtx set = single_set (insn);
+  rtx dest;
+
+  if (! set
+      || ! REG_P (dest = SET_DEST (set))
+      || END_REGNO (dest) > REG_32
+      || (regmask (dest) & memento_t::fixed_regs_mask))
+    return;
+
+  rtx src = SET_SRC (set);
+  const rtx_code src_code = GET_CODE (src);
+  const machine_mode mode = GET_MODE (dest);
+  const int n_bytes = GET_MODE_SIZE (mode);
+
+  // Insns that are too complicated or have a poor yield.
+  // Just record which regs are clobberd / changed.
+  if (n_bytes > FUSE_MOVE_MAX_MODESIZE
+      || MEM_P (src)
+      || (REG_P (src) && END_REGNO (src) > REG_32))
+    {
+      // Comparisons may clobber the compared reg when it is unused after.
+      if (src_code == COMPARE
+	  && REG_P (XEXP (src, 0))
+	  && CONSTANT_P (XEXP (src, 1)))
+	{
+	  rtx reg = XEXP (src, 0);
+	  for (unsigned r = REGNO (reg); r < END_REGNO (reg); ++r)
+	    set_unknown (r);
+	}
+      return;
+    }
+
+  if (unused)
+    return;
+
+  // Simulate the effect of some selected insns that are likely to produce
+  // or propagate known values.
+
+  // Get an abstract representation of src.  Bytes may be unknown,
+  // known to equal some 8-bit compile-time constant (CTC) value,
+  // or are known to equal some 8-bit register.
+  // TODO: Currently, only the ai[].val8 knowledge ist used.
+  //       What's the best way to make use of ai[].regno ?
+
+  absint_t ai = absint_t::explore (src, mold, mode);
+
+  if (ai.popcount ())
+    {
+      avr_dump (";; apply_insn %d R%d[%d] := %C:%m = ", INSN_UID (insn),
+		REGNO (dest), n_bytes, src_code, mode);
+      ai.dump ();
+
+      for (int i = 0; i < n_bytes; ++i)
+	if (ai[i].can (CONST_INT))
+	  set_value (i + REGNO (dest), ai[i].val8 ());
+    }
+}
+
+
+void
+memento_t::apply (const ply_t &p)
+{
+  if (p.is_movw ())
+    {
+      copy_value (p.regno, p.arg);
+      copy_value (p.regno + 1, p.arg + 1);
+    }
+  else if (p.is_adiw ())
+    {
+      int val = p.arg + values[p.regno] + 256 * values[1 + p.regno];
+      set_value (p.regno, val);
+      set_value (p.regno + 1, val >> 8);
+    }
+  else if (p.size == 1)
+    {
+      switch (p.code)
+	{
+	default:
+	  gcc_unreachable ();
+	  break;
+
+	case REG:
+	  copy_value (p.regno, p.arg);
+	  break;
+
+	case SET:
+	  set_value (p.regno, p.arg);
+	  if (p.scratch >= REG_16)
+	    set_unknown (p.scratch);
+	  break;
+
+	case MOD: // BLD
+	  gcc_assert (knows (p.regno));
+	  if (popcount_hwi (p.arg) == 1)
+	    values[p.regno] |= p.arg;
+	  else if (popcount_hwi (p.arg) == 7)
+	    values[p.regno] &= p.arg;
+	  else
+	    gcc_unreachable ();
+	  break;
+
+#define DO_ARITH1(code, expr)						\
+	  case code:							\
+	    gcc_assert (knows (p.regno));				\
+	    {								\
+	      const int x = values[p.regno];				\
+	      set_value (p.regno, expr);				\
+	    }								\
+	    break
+
+#define DO_ARITH2(code, expr)						\
+	  case code:							\
+	    gcc_assert (knows (p.regno));				\
+	    gcc_assert (knows (p.arg));					\
+	    {								\
+	      const int x = values[p.regno];				\
+	      const int y = values[p.arg];				\
+	      set_value (p.regno, expr);				\
+	    }								\
+	    break
+
+	  DO_ARITH1 (NEG, -x);
+	  DO_ARITH1 (NOT, ~x);
+	  DO_ARITH1 (PRE_INC, x + 1);
+	  DO_ARITH1 (PRE_DEC, x - 1);
+	  DO_ARITH1 (ROTATE, (x << 4) | (x >> 4));
+	  DO_ARITH1 (ASHIFT, x << 1);
+	  DO_ARITH1 (LSHIFTRT, x >> 1);
+	  DO_ARITH1 (ASHIFTRT, (x >> 1) | (x & 0x80));
+
+	  DO_ARITH2 (AND, x & y);
+	  DO_ARITH2 (IOR, x | y);
+	  DO_ARITH2 (XOR, x ^ y);
+	  DO_ARITH2 (PLUS, x + y);
+	  DO_ARITH2 (MINUS, x - y);
+#undef DO_ARITH1
+#undef DO_ARITH2
+	}
+    } // size == 1
+  else
+    gcc_unreachable ();
+}
+
+
+// Try to find a sequence of ply_t's that represent a II.m_regno = II.m_isrc
+// insn that sets a reg to a compile-time constant, and that is more
+// efficient than just a move insn.  (When try_split_any_p is on, then
+// solutions that perform equal to a move insn are also allowed).
+// MEMO0 is the GPR state before II runs.  A solution has been found
+// when .fpd->solution has at least one entry.  LEN specifies the
+// depth of recursion, which works on the LEN-th ply_t.
+void
+bbinfo_t::find_plies (int len, const insninfo_t &ii, const memento_t &memo0)
+{
+  if (len > fpd->n_best_plys)
+    return;
+
+  memento_t memo = memo0;
+  bool ply_applied_p = false;
+
+  //!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+  const bool extra = dump_file && (dump_flags & TDF_FOLDING);
+
+  if (extra)
+    {
+      fprintf (dump_file, ";; #%d (HAM=%d): get_plies R%d[%d] = ", len,
+	       ii.hamming (fpd->regs0), ii.m_regno, ii.m_size);
+      fprintf (dump_file, "0x%0*" PRIx64 "\n",
+	       2 * ii.m_size, ii.m_isrc & size_to_mask (ii.m_size));
+    }
+
+  plies_t &ps = fpd->plies[len - 1];
+
+  const ply_t *const prev = len >= 2 ? fpd->ply_stack[len - 2] : nullptr;
+  const ply_t *const prev2 = len >= 3 ? fpd->ply_stack[len - 3] : nullptr;
+
+  bbinfo_t::get_plies (ps, ii, memo0, prev);
+
+#define NEXT(reason)					\
+  do {							\
+    if (extra)						\
+      fprintf (dump_file, ";; cont=%s\n", reason);	\
+    goto next;						\
+  } while (0)
+
+  for (int ip = 0; ip < ps.n_plies; ++ip)
+    {
+      const ply_t &p = ps.plies[ip];
+
+      fpd->ply_stack[len - 1] = &p;
+
+      if (0)
+	next: continue;
+
+      if (extra)
+	ply_t::dump_plys (dump_file, len, 1, fpd->ply_stack + len - 1, memo0);
+
+      // A MOVW with a Hamming distance of < 2 requires more plys.
+      if (p.is_movw () && len + (2 - p.dhamming) > fpd->n_best_plys)
+	NEXT ("movw.plys");
+
+      if (len >= 2)
+	{
+	  // Destroying (parts of) the results of the previous ply
+	  // won't yield an optimal sequence.
+	  if (p.overrides (prev))
+	    NEXT ("overrides");
+
+	  // When two plys are independent of each other, then only
+	  // investigate sequences that operate on the higher reg first.
+	  // This canonicalization reduces the number of candidates,
+	  if (p.commutes_with (prev, ii.m_scratch)
+	      && p.regno > prev->regno)
+	    NEXT ("noncanonic");
+
+	  // Two subsequent BLDs touching the same register.
+	  if (p.is_bld ()
+	      && prev->is_bld ()
+	      && p.changes_result_of (prev))
+	    NEXT ("2bld");
+
+	  // When there is a BLD, then at least 2 of the same kind
+	  // shall occur in a row.
+	  if (prev->is_bld ()
+	      && ! p.is_bld ()
+	      && (len == 2
+		  || (prev->is_setbld () && ! prev2->is_setbld ())
+		  || (prev->is_cltbld () && ! prev2->is_cltbld ())))
+	    NEXT ("1bld");
+	}
+
+      // The hamming delta of a MOVW may be less than 2, namely 0 or 1.
+      // When the latter is the case, then a reasonable sequence must
+      // modify the result of the MOVW.
+      if (len >= 2
+	  && prev->is_movw ()
+	  && prev->dhamming == 1
+	  && ! p.changes_result_of (prev))
+	NEXT ("movw.dh=1");
+
+      if (len >= 3
+	  && prev2->is_movw ()
+	  && prev2->dhamming == 0
+	  && ! p.changes_result_of (prev2))
+	NEXT ("movw.dh=0");
+
+      // When setting an n-byte destination, then at most n/2 MOVWs
+      // will occur in an optimal sequence.
+      int n_movw = 0;
+      for (int i = 0; i < len; ++i)
+	n_movw += fpd->ply_stack[i]->is_movw ();
+      if (n_movw > ii.m_size / 2)
+	NEXT ("movws");
+
+      if (ply_applied_p)
+	memo = memo0;
+
+      memo.apply (p);
+
+      ply_applied_p = true;
+
+      // Calculate the cost of the sequence we have so far.  Scale by some
+      // factor so that we can express that ADIW is more expensive than MOVW
+      // because it is slower, but without defeating MOVW.
+      const int SCALE = 4;
+
+      int penal = 0;
+      int cost = SCALE * 0;
+
+      bool movw_p = 0;
+      for (int i = 0; i < len; ++i)
+	{
+	  bool adiw_p = fpd->ply_stack[i]->is_adiw ();
+	  cost += SCALE * fpd->ply_stack[i]->cost + adiw_p;
+	  penal += adiw_p;
+	  movw_p |= fpd->ply_stack[i]->is_movw ();
+	}
+      penal += movw_p;
+
+      const int hamm = ii.hamming (memo);
+
+      // The current Hamming distance yields a lower bound of how many
+      // plys are still required.  Consider that future cost already now.
+      int future_cost = AVR_HAVE_MOVW || (AVR_HAVE_ADIW && ii.m_regno >= REG_22)
+	? (1 + hamm) / 2
+	: hamm;
+
+      // Similarly, when MOVW doesn't decrease the Hamming distance by 2,
+      // then we know that at least 2 - dhamming plys must follow in the
+      // future.  (MOVW + ADIW will not occur.)
+      if (p.is_movw ())
+	future_cost = std::max (future_cost, 2 - p.dhamming);
+
+      if (extra && future_cost)
+	avr_dump (";; future cost = %d, dh=%d\n", future_cost, hamm);
+
+      cost += SCALE * future_cost;
+
+      bool profitable = (cost < SCALE * fpd->max_ply_cost
+			 || (bbinfo_t::try_split_any_p
+			     && fpd->solution.n_plies == 0
+			     && cost / SCALE <= fpd->max_ply_cost
+			     && cost / SCALE == fpd->movmode_cost));
+      if (! profitable)
+	{
+	  if (extra)
+	    avr_dump (";; cont=cost %d+%d/%d\n", cost / SCALE, penal, SCALE);
+	  continue;
+	}
+
+      if (hamm)
+	{
+	  // Go down that rabbit hole.
+	  gcc_assert (ply_applied_p);
+	  bbinfo_t::find_plies (1 + len, ii, memo);
+	  continue;
+	}
+
+      // Found a solution that's better than everything so far.
+
+      // Reduce the upper cost bound according to the found solution.
+      // No future solution will be more expensive.
+      fpd->max_ply_cost = cost / SCALE;
+
+      fpd->solution = plies_t (len, fpd->ply_stack);
+
+      if (dump_file)
+	{
+	  avr_dump (";; #%d FOUND COST = %d%s\n", len, cost / SCALE,
+		    penal ? " with penalty" : "");
+	  ply_t::dump_plys (dump_file, 0, len, fpd->ply_stack, fpd->regs0);
+	  if (extra)
+	    avr_dump (";; END\n");
+	}
+    } // for ply_t's
+
+#undef NEXT
+}
+
+
+// Run .find_plies() and return true when .fpd->solution is a sequence of ply_t's
+// that represents II, a REG = CONST insn.  MEMO is the GPR state prior to II.
+bool
+bbinfo_t::run_find_plies (const insninfo_t &ii, const memento_t &memo) const
+{
+  fpd->solution.reset ();
+  fpd->regs0 = memo;
+  fpd->n_get_plies = 0;
+
+  const int hamm = ii.hamming (memo);
+
+  if (hamm == 0)
+    {
+      avr_dump (";; Found redundant insn %d\n",
+		ii.m_insn ? INSN_UID (ii.m_insn) : 0);
+      return true;
+    }
+
+  // Upper bound (in words) for any solution that's better than mov<mode>.
+  // Will be decreased by find plies as it finds better solutions.
+  fpd->movmode_cost = ii.cost ();
+  fpd->max_ply_cost = fpd->movmode_cost;
+
+  // With a non-zero Hamming distance, this insn will require at least one
+  // instruction.  When the upper bound for required instructions is that
+  // small, then the current insn is good enough.
+  if (fpd->max_ply_cost <= 1)
+    return false;
+
+  fpd->n_best_plys = ii.n_best_plys (hamm);
+  gcc_assert (fpd->n_best_plys <= N_BEST_PLYS);
+
+  if (dump_file)
+    {
+      const uint64_t mask = size_to_mask (ii.m_size);
+      fprintf (dump_file, ";; find_plies R%d[%d] = 0x%0*" PRIx64,
+	       ii.m_regno, ii.m_size, 2 * ii.m_size, ii.m_isrc & mask);
+      if (ii.m_scratch)
+	fprintf (dump_file, ", scratch=r%d", ii.m_scratch);
+      memo.dump ("\n;; regs%s\n");
+    }
+
+  avr_dump (";; mov<mode> cost = %d\n", fpd->max_ply_cost);
+  avr_dump (";; max plys = %d\n", fpd->n_best_plys);
+  ply_t::n_ply_ts = 0;
+
+  find_plies (1, ii, memo);
+
+  avr_dump (";; get_plies called %d times\n", fpd->n_get_plies);
+  avr_dump (";; n_ply_ts = %d\n", ply_t::n_ply_ts);
+  ply_t::max_n_ply_ts = std::max (ply_t::max_n_ply_ts, ply_t::n_ply_ts);
+
+  return fpd->solution.n_plies != 0;
+}
+
+
+// Try to propagate __zero_reg__ to a mem = reg insn's source.
+// Returns true on success and sets .n_new_insns.
+bool
+optimize_data_t::try_mem0 (bbinfo_t *)
+{
+  rtx_insn *insn = curr.ii.m_insn;
+  rtx set, mem, reg;
+  machine_mode mode;
+
+  if (insn
+      && (set = single_set (insn))
+      && MEM_P (mem = SET_DEST (set))
+      && REG_P (reg = SET_SRC (set))
+      && GET_MODE_SIZE (mode = GET_MODE (mem)) <= 4
+      && END_REGNO (reg) <= REG_32
+      && ! (regmask (reg) & memento_t::fixed_regs_mask)
+      && curr.regs.have_value (REGNO (reg), GET_MODE_SIZE (mode), 0x0))
+    {
+      avr_dump (";; Found insn %d: mem:%m = 0 = r%d\n", INSN_UID (insn),
+		mode, REGNO (reg));
+
+      // Some insns like PUSHes don't clobber REG_CC.
+      bool clobbers_cc = GET_CODE (PATTERN (insn)) == PARALLEL;
+
+      if (clobbers_cc)
+	emit_valid_move_clobbercc (mem, CONST0_RTX (mode));
+      else
+	emit_valid_insn (gen_rtx_SET (mem, CONST0_RTX (mode)));
+
+      n_new_insns = 1;
+
+      return true;
+    }
+
+  return false;
+}
+
+
+// Try to fuse two 1-byte insns .prev and .curr to one 2-byte insn (MOVW).
+// Returns true on success, and sets .n_new_insns, .ignore_mask etc.
+bool
+optimize_data_t::try_fuse (bbinfo_t *bbi)
+{
+  insninfo_t comb;
+
+  if (! prev.ii.m_size
+      || ! curr.ii.m_size
+      || ! comb.combine (prev.ii, curr.ii))
+    return false;
+
+  avr_dump (";; Working on fuse of insn %d + insn %d = 0x%04x\n",
+	    INSN_UID (prev.insn), INSN_UID (curr.insn),
+	    (unsigned) comb.m_isrc);
+
+  bool found = bbi->run_find_plies (comb, prev.regs);
+  if (found)
+    {
+      avr_dump (";; Found fuse of insns %d and %d\n",
+		INSN_UID (prev.insn), INSN_UID (curr.insn));
+
+      n_new_insns = bbinfo_t::fpd->solution.emit_insns (comb, prev.regs);
+      delete_prev_p = true;
+
+      if (prev.ii.m_scratch)
+	ignore_mask |= regmask (prev.ii.m_scratch, 1);
+      if (curr.ii.m_scratch)
+	ignore_mask |= regmask (curr.ii.m_scratch, 1);
+      ignore_mask &= ~regmask (comb.m_regno, comb.m_size);
+    }
+
+  return found;
+}
+
+
+// Try to replace an arithmetic 1-byte insn by a reg-reg move.
+// Returns true on success, and sets .n_new_insns etc.
+bool
+optimize_data_t::try_simplify (bbinfo_t *)
+{
+  if (curr.ii.m_size == 1
+      && curr.ii.m_old_code != REG
+      && curr.ii.m_code == REG)
+    {
+      avr_dump (";; Found simplify of insn %d\n", INSN_UID (curr.insn));
+
+      n_new_insns = curr.ii.emit_insn ();
+
+      return true;
+    }
+
+  return false;
+}
+
+
+// Try to replace XEXP (*, 1) of a binary operation by a cheaper expression.
+// Returns true on success; sets .n_new_insns, .ignore_mask, .delete_prev_p.
+bool
+optimize_data_t::try_bin_arg1 (bbinfo_t *)
+{
+  if (curr.ii.m_size != 1
+      || curr.ii.m_new_src.arity () != 2
+      || curr.unused)
+    return false;
+
+  avr_dump (";; Working on bin_arg1 insn %d\n", INSN_UID (curr.insn));
+
+  gcc_assert (curr.ii.m_src && BINARY_P (curr.ii.m_src));
+  rtx xarg1_old = XEXP (curr.ii.m_src, 1);
+
+  const absint_byte_t &aib = curr.ii.m_new_src;
+  const absint_val_t &arg0 = aib.arg (0);
+  const absint_val_t &arg1 = aib.arg (1);
+  const absint_val_t &arg1_old = curr.ii.m_ai[0].arg (1);
+
+  rtx src = NULL_RTX;
+
+  if (CONSTANT_P (xarg1_old))
+    {
+      // Sometimes, we allow expensive constants as 2nd operand like
+      // in  R2 += 2  which produces two INCs.  When we have the
+      // constant handy in a reg, then use that instead of the constant.
+      const rtx_code code = aib.get_code ();
+      gcc_assert (arg1.val8 == (INTVAL (xarg1_old) & 0xff));
+
+      if (AVRasm::constant_cost (code, arg0.regno, arg1.val8) > 1)
+	  src = aib.to_rtx ();
+    }
+  else if (REG_P (xarg1_old)
+	   && dead_or_set_p (curr.insn, xarg1_old))
+    {
+      src = aib.to_rtx ();
+
+      // The 2nd operand is a reg with a known content that dies
+      // at the current insn.  Chances are high that the register
+      // holds a reload value only used by the current insn.
+      if (prev.ii.m_size == 1
+	  && rtx_equal_p (xarg1_old, SET_DEST (prev.ii.m_set))
+	  && CONSTANT_P (prev.ii.m_src))
+	{
+	  avr_dump (";; Found dying reload insn %d\n", INSN_UID (prev.insn));
+
+	  delete_prev_p = true;
+	  ignore_mask = regmask (arg1_old.regno, 1);
+	}
+    }
+
+  if (src)
+    {
+      rtx dest = SET_DEST (curr.ii.m_set);
+
+      avr_dump (";; Found bin_arg1 for insn %d: ", INSN_UID (curr.insn));
+      avr_dump ("%C:%m %r", curr.ii.m_code, GET_MODE (dest), xarg1_old);
+      aib.dump (" = %s\n");
+
+      emit_valid_move_clobbercc (dest, src);
+      n_new_insns = 1;
+    }
+
+  return src != NULL_RTX;
+}
+
+
+// Try to replace a REG = CONST insn by a cheaper sequence.
+// Returns true on success, and sets .n_new_insns, .ignore_mask etc.
+bool
+optimize_data_t::try_split_ldi (bbinfo_t *bbi)
+{
+  if (! curr.ii.m_size
+      || curr.unused
+      || curr.ii.m_code != CONST_INT
+      || (! bbinfo_t::try_split_any_p
+	  // Finding plys will only ever succeed when there are
+	  // regs with a known value.
+	  && ! (curr.regs.known
+		|| (AVR_HAVE_MOVW
+		    && curr.ii.m_regno < REG_16 && curr.ii.m_size == 4))))
+    return false;
+
+  avr_dump (";; Working on split_ldi insn %d\n", INSN_UID (curr.insn));
+
+  bool found = bbi->run_find_plies (curr.ii, curr.regs);
+  if (found)
+    {
+      avr_dump (";; Found split for ldi insn %d\n", INSN_UID (curr.insn));
+
+      n_new_insns = bbinfo_t::fpd->solution.emit_insns (curr.ii, curr.regs);
+
+      if (curr.ii.m_scratch)
+	ignore_mask = regmask (curr.ii.m_scratch, 1);
+    }
+
+  return found;
+}
+
+
+// Helper for try_split_any().
+bool
+optimize_data_t::fail (const char *reason)
+{
+  n_new_insns = -1;
+
+  if (dump_file)
+    fprintf (dump_file, ";; Giving up split_any: %s\n", reason);
+
+  return false;
+}
+
+
+// Helper for try_split_any().
+rtx_insn *
+optimize_data_t::emit_and_apply_move (memento_t &memo, rtx dest, rtx src)
+{
+  rtx_insn *insn = emit_valid_move_clobbercc (dest, src);
+  n_new_insns += 1;
+  memo.apply_insn (insn, false);
+
+  return insn;
+}
+
+
+// Set X0 and X1 so that they are operands valid for a andqi3, iorqi3, xorqi3
+// or addqi3 insn with destination R_DEST.  The method loads X1 to
+// a scratch reg as needed and records the GPR effect in IOD.regs.
+// EXTRA_COST are extra costs in units of words of insns that cost more
+// than one instruction.  This is a helper for try_split_any().
+bool
+optimize_data_t
+    ::get_2ary_operands (rtx_code &code, const absint_byte_t &aib,
+			 insn_optimize_data_t &iod, int r_dest,
+			 absint_val_t &x0, absint_val_t &x1, int &extra_cost)
+{
+  if (code != IOR && code != AND && code != XOR && code != PLUS)
+    return fail ("2ary: unknown code");
+
+  x0 = aib.arg (0);
+  x1 = aib.arg (1);
+
+  if (! x0.knows_regno ()
+      || x1.clueless ())
+    return fail ("2ary: clueless");
+
+  int val8 = x1.val8;
+  int val8_cost = val8 < 0 ? 100 : AVRasm::constant_cost (code, r_dest, val8);
+
+  if (x0.regno == r_dest
+      && (x1.knows_regno ()
+	  || val8_cost <= 1))
+    {
+      if (code == XOR
+	  && val8 == 0x80
+	  && x0.regno >= REG_16)
+	{
+	  // xorxi3 can only "r,0,r".
+	  // x0 ^ 0x80  <=>  x0 - 0x80.
+	  x1.regno = 0;
+	  code = MINUS;
+	}
+      return true;
+    }
+
+  const bool and_1_bit = code == AND && popcount_hwi (val8) == 1;
+  // andqi3 has a "r,r,Cb1" alternative where Cb1 has exactly 1 bit set.
+  // This can accommodate bytes of higher AND Cb<N> alternatives.
+  if (x0.regno != r_dest)
+    {
+      if (and_1_bit)
+	{
+	  extra_cost += 1 + (r_dest < REG_16);
+	  return true;
+	}
+      else if (x1.regno == r_dest)
+	{
+	  std::swap (x0, x1);
+	  return true;
+	}
+      return fail ("2ary is a 3-operand insn");
+    }
+
+  // Now we have:
+  // 1)  r_dest = x0.regno, and
+  // 2)  x1 is val8, and
+  // 3)  x1 costs 2.
+
+  const bool needs_scratch_p = select<bool>()
+    : code == XOR ? true
+    : code == AND ? popcount_hwi (val8) != 7
+    : code == IOR ? popcount_hwi (val8) != 1
+    : code == PLUS ? IN_RANGE (val8, 3, 0xff - 3)
+    : bad_case<bool> ();
+
+  const int r_val8 = iod.regs.regno_with_value (val8, 0 /* excludes: none */);
+  if (r_val8)
+    {
+      // Found a reg that already holds the constant.
+      x1.val8 = -1;
+      x1.regno = r_val8;
+      return true;
+    }
+  else if (iod.ii.m_scratch)
+    {
+      // Using the insn's scratch reg.
+      rtx xdst = gen_rtx_REG (QImode, iod.ii.m_scratch);
+      rtx xsrc = gen_int_mode (x1.val8, QImode);
+      emit_and_apply_move (iod.regs, xdst, xsrc);
+
+      x1.regno = iod.ii.m_scratch;
+      x1.val8 = -1;
+
+      return true;
+    }
+  else if (! needs_scratch_p)
+    {
+      // Some constants (1 and -1) can be loaded without a scratch.
+      extra_cost += 1;
+      return true;
+    }
+  else if (and_1_bit)
+    {
+      // This can always fall back to BST + CLR + BLD, but may be cheaper.
+      extra_cost += 1 + (r_dest < REG_16);
+      return true;
+    }
+
+  return fail ("2ary: expensive constant");
+}
+
+
+static inline bool
+any_shift_p (rtx_code code)
+{
+  return code == LSHIFTRT || code == ASHIFTRT || code == ASHIFT;
+}
+
+// Try to split .curr into a sequence of 1-byte insns.
+// Returns true on success.  Sets .n_new_insns and .ignore_mask.
+bool
+optimize_data_t::try_split_any (bbinfo_t *)
+{
+  if (curr.ii.m_size < 2
+      // Constants are split by split_ldi.
+      || CONSTANT_P (curr.ii.m_src)
+      // Splitting requires knowledge about what to do with each byte.
+      || curr.ii.m_ai.end_knows (VALUE) < curr.ii.m_size)
+    return false;
+
+  avr_dump (";; Working on split_any %C:%m insn %d\n", curr.ii.m_code,
+	    GET_MODE (SET_DEST (curr.ii.m_set)), INSN_UID (curr.insn));
+
+  const insninfo_t &ii = curr.ii;
+  const int n_bytes = ii.m_size;
+  int extra_cost = 0;
+  int binop_cost = -1;
+
+  // For plain AND, IOR, XOR get the current cost in units of words.
+  if (BINARY_P (curr.ii.m_src))
+    {
+      const rtx_code code = curr.ii.m_code;
+      if ((code == IOR || code == AND || code == XOR)
+	  && REG_P (XEXP (curr.ii.m_src, 0))
+	  && CONSTANT_P (XEXP (curr.ii.m_src, 1)))
+	{
+	  binop_cost = get_attr_length (curr.insn);
+	  avr_dump (";; Competing against %C:%m cost = %d\n", code,
+		    GET_MODE (curr.ii.m_src), binop_cost);
+	}
+    }
+
+  // Step 1: Work out conflicts and which sign extends to perform.
+
+  const gprmask_t regs_dest = regmask (ii.m_regno, n_bytes);
+  int r_sign = 0;
+  gprmask_t regs_signs = 0;
+  bool has_lsl = false;
+  bool has_lsr = false;
+
+  for (int i = 0; i < n_bytes; ++i)
+    {
+      const absint_byte_t &aib = ii.m_ai[i];
+      const int r_dest = ii.m_regno + i;
+      const gprmask_t regs_src = aib.reg_mask ();
+
+      // When only regs to the right are used, or only regs to the left
+      // are used, then there's no conflict like it is arising for rotates.
+      // For now, only implement conflict-free splits.
+      has_lsl |= has_bits_in (regs_src & regs_dest, 0, r_dest - 1);
+      has_lsr |= has_bits_in (regs_src & regs_dest, r_dest + 1, 31);
+      if (has_lsl && has_lsr)
+	return fail ("has both << and >>");
+
+      if (aib.get_code () == SIGN_EXTEND)
+	{
+	  const absint_val_t x0 = aib.arg (0);
+	  if (! r_sign)
+	    r_sign = x0.regno;
+	  else if (r_sign != x0.regno)
+	    return fail ("too many signs");
+
+	  // Signs are handled below after all the other bytes.
+	  regs_signs |= regmask (r_dest, 1);
+	}
+    }
+
+  // Step 2: Work on the individual bytes and emit according insns.
+
+  n_new_insns = 0;
+  memento_t memo = curr.regs;
+
+  const int step = has_lsl ? -1 : 1;
+  const int istart = step == 1 ? 0 : n_bytes - 1;
+  const int iend = step == 1 ? n_bytes : -1;
+
+  for (int i = istart; i != iend; i += step)
+    {
+      const absint_byte_t &aib = ii.m_ai[i];
+      const int r_dest = ii.m_regno + i;
+      rtx_code code = aib.get_code ();
+      rtx xsrc = NULL_RTX;
+      rtx xdest = gen_rtx_REG (QImode, r_dest);
+
+      if (code == SET)
+	{
+	  const int r_src = aib.regno (false);
+	  const int val8 = aib.val8 (false);
+	  int r16;
+
+	  // A no-op...
+	  if (r_dest == r_src)
+	    continue;
+	  // ...or an existing 16-bit constant...
+	  else if (AVR_HAVE_MOVW
+		   && i + step != iend
+		   // Next is not a no-op.
+		   && ii.m_ai[i + step].regno (false) != r_dest + step
+		   // Eligible for MOVW.
+		   && r_dest + step == (r_dest ^ 1)
+		   && r_dest % 2 == i % 2
+		   && (r16 = ii.m_ai.reg16_with_value (i, i + step, memo)))
+	    {
+	      xdest = gen_rtx_REG (HImode, r_dest & ~1);
+	      xsrc = gen_rtx_REG (HImode, r16);
+	      i += step;
+	    }
+	  // ...or a reg-reg move from a multi-byte move...
+	  else if (r_src
+		   // Prefer a reg-reg move over a (potential) load
+		   // of a constant, because the subsequent RTL
+		   // peephole pass may combine it to a MOVW again.
+		   && AVR_HAVE_MOVW
+		   && REG_P (curr.ii.m_src))
+	    xsrc = gen_rtx_REG (QImode, r_src);
+	  // ...or a cheap constant...
+	  else if (val8 >= 0
+		   && AVRasm::constant_cost (SET, r_dest, val8) <= 1)
+	    xsrc = gen_int_mode (val8, QImode);
+	  // ...or a reg-reg move...
+	  else if (r_src)
+	    xsrc = gen_rtx_REG (QImode, r_src);
+	  // ...or a costly constant that already exists in some reg...
+	  else if (memo.regno_with_value (val8, 0 /* excludes: none */))
+	    xsrc = gen_rtx_REG (QImode, memo.regno_with_value (val8, 0));
+	  // ...or a costly constant loaded into curr.insn's scratch reg...
+	  else if (ii.m_scratch)
+	    {
+	      rtx xscratch = gen_rtx_REG (QImode, ii.m_scratch);
+	      rtx xval8 = gen_int_mode (val8, QImode);
+	      emit_and_apply_move (memo, xscratch, xval8);
+	      xsrc = xscratch;
+	    }
+	  // ...or a costly constant (1 or -1) that doesn't need a scratch.
+	  else if (! AVRasm::ldi_needs_scratch (r_dest, val8))
+	    {
+	      extra_cost += 1;
+	      xsrc = gen_int_mode (val8, QImode);
+	    }
+	  else
+	    return fail ("expensive val8");
+	} // SET
+      else if (aib.arity () == 1)
+	{
+	  if (aib.get_code () == SIGN_EXTEND)
+	    // Signs are handled after all the others.
+	    continue;
+	  else
+	    {
+	      const absint_val_t x0 = aib.arg (0);
+	      rtx xop0 = gen_rtx_REG (QImode, x0.regno);
+	      xsrc = gen_rtx_fmt_e (code, QImode, xop0);
+	    }
+	} // unary
+      else if (aib.arity () == 2)
+	{
+	  absint_val_t x0;
+	  absint_val_t x1;
+	  insn_optimize_data_t iod (memo);
+	  iod.ii = curr.ii;
+
+	  if (! get_2ary_operands (code, aib, iod, r_dest, x0, x1, extra_cost))
+	    return false;
+	  rtx xop0 = gen_rtx_REG (QImode, x0.regno);
+	  rtx xop1 = x1.knows_val8 ()
+	    ? gen_int_mode (x1.val8, QImode)
+	    : gen_rtx_REG (QImode, x1.regno);
+
+	  xsrc = gen_rtx_fmt_ee (code, QImode, xop0, xop1);
+	} // binary
+
+      if (! xsrc)
+	return fail ("no source found");
+
+      if (r_sign
+	  && (regmask (xdest) & regmask (r_sign, 1)))
+	return fail ("clobbered r_sign");
+
+      emit_and_apply_move (memo, xdest, xsrc);
+    }
+
+  // Step 3: Emit insns for sign extend.
+  // No more need to track memo beyond this point.
+
+  if (! emit_signs (r_sign, regs_signs))
+    return false;
+
+  if (binop_cost >= 0)
+    {
+      avr_dump (";; Expected cost: %d + %d\n", n_new_insns, extra_cost);
+      if (n_new_insns + extra_cost > binop_cost)
+	return fail ("too expensive");
+    }
+
+  if (ii.m_scratch)
+    ignore_mask = regmask (ii.m_scratch, 1);
+
+  return true;
+}
+
+
+// A helper for try_split_any() above.
+// Emit sign extends from R_MSB.7 to all regs in REGS_SIGNS.
+bool
+optimize_data_t::emit_signs (const int r_msb, gprmask_t regs_signs)
+{
+  if (! regs_signs)
+    return true;
+  else if (! r_msb)
+    return fail ("fatal: no r_msb given");
+
+  // Pick an arbitrary reg from the sign destinations when the source
+  // isn't one of the signs.
+  const int r_signs = regs_signs & regmask (r_msb, 1)
+    ? r_msb
+    : ctz_hwi (regs_signs);
+
+  // Set all bits in r_signs according to the sign of r_msb using the
+  // r,r,C07 alternative of ashrqi3.
+  rtx xsrc = gen_rtx_fmt_ee (ASHIFTRT, QImode,
+			     gen_rtx_REG (QImode, r_msb), GEN_INT (7));
+  emit_valid_move_clobbercc (gen_rtx_REG (QImode, r_signs), xsrc);
+  regs_signs &= ~regmask (r_signs, 1);
+
+  // Set up a 16-bit sign register if possible.
+  int r16_signs = 0;
+  if (regs_signs & regmask (r_signs ^ 1, 1))
+    {
+      emit_move_mask (r_signs ^ 1, r_signs, 1, regs_signs);
+      r16_signs = r_signs & ~1;
+    }
+
+  // Handle all 16-bit signs regs provided MOVW.
+  if (AVR_HAVE_MOVW)
+    for (int r = FIRST_GPR; r < REG_32; r += 2)
+      {
+	const gprmask_t m = regmask (r, 2);
+	if ((m & regs_signs) == m)
+	  {
+	    if (r16_signs)
+	      emit_move_mask (r, r16_signs, 2, regs_signs);
+	    else
+	      {
+		emit_move_mask (r + 0, r_signs, 1, regs_signs);
+		emit_move_mask (r + 1, r_signs, 1, regs_signs);
+		r16_signs = r;
+	      }
+	  }
+      }
+
+  // Handle all remaining signs.
+  while (regs_signs)
+    emit_move_mask (ctz_hwi (regs_signs), r_signs, 1, regs_signs);
+
+  return true;
+}
+
+// Helper for the method above.  Move N_BYTES registers from R_SRC to R_DST,
+// keeping track of which regs are still to be done in MASK.
+void
+optimize_data_t::emit_move_mask (int r_dst, int r_src, int n_bytes,
+				 gprmask_t &mask)
+{
+  const gprmask_t mask_dst = regmask (r_dst, n_bytes);
+  const gprmask_t mask_src = regmask (r_src, n_bytes);
+  gcc_assert ((mask_dst & mask) == mask_dst);
+  gcc_assert ((mask_src & mask) == 0);
+  rtx xdst = gen_rtx_REG (size_to_mode (n_bytes), r_dst);
+  rtx xsrc = gen_rtx_REG (size_to_mode (n_bytes), r_src);
+  emit_valid_move_clobbercc (xdst, xsrc);
+  n_new_insns += 1;
+  mask &= ~mask_dst;
+}
+
+
+void
+bbinfo_t::optimize_one_block (bool &changed)
+{
+  memento_t prev_regs;
+
+  rtx_insn *insn = next_nondebug_insn_bb (bb, BB_HEAD (bb));
+
+  for (rtx_insn *next_insn; insn; insn = next_insn)
+    {
+      next_insn = next_nondebug_insn_bb (bb, insn);
+
+      avr_dump ("\n;; Working on insn %d\n%r\n\n", INSN_UID (insn), insn);
+
+      optimize_data_t od (prev_regs, regs);
+
+      od.prev.insn = prev_nondebug_insn_bb (bb, insn);
+      od.curr.insn = insn;
+
+      od.prev.ii.init1 (od.prev, 1, "IIprev ");
+      od.curr.ii.init1 (od.curr, 8, "IIcurr ");
+
+      start_sequence ();
+
+      bool found = ((bbinfo_t::try_fuse_p && od.try_fuse (this))
+		    || (bbinfo_t::try_bin_arg1_p && od.try_bin_arg1 (this))
+		    || (bbinfo_t::try_simplify_p && od.try_simplify (this))
+		    || (bbinfo_t::try_split_ldi_p && od.try_split_ldi (this))
+		    || (bbinfo_t::try_split_any_p && od.try_split_any (this))
+		    || (bbinfo_t::try_mem0_p && od.try_mem0 (this)));
+
+      rtx_insn *new_insns = end_sequence ();
+
+      gcc_assert (found == (od.n_new_insns >= 0));
+
+      ++tick;
+
+      // This insn will become the previous one in the next loop iteration.
+      // Just used in dumps.
+      rtx_insn *new_curr_insn;
+
+      if (! found)
+	{
+	  // Nothing changed.
+	  avr_dump (";; Keeping old route.\n");
+	  gcc_assert (! od.delete_prev_p);
+
+	  prev_regs = regs;
+	  regs.apply_insn (insn, false);
+
+	  new_curr_insn = insn;
+	}
+      else
+	{
+	  // We have new_insns.
+	  changed = true;
+
+	  if (dump_file)
+	    {
+	      avr_dump ("\n;; EMIT %d new insn%s replacing ",
+			od.n_new_insns, "s" + (od.n_new_insns == 1));
+	      if (od.delete_prev_p)
+		avr_dump ("insn %d and ", INSN_UID (od.prev.insn));
+	      avr_dump ("insn %d, delete_prev=%d:\n%L\n", INSN_UID (insn),
+			od.delete_prev_p, new_insns);
+	    }
+
+	  new_curr_insn = od.emit_sequence (bb, new_insns);
+	} // found
+
+      if (dump_file && new_curr_insn)
+	{
+	  avr_dump ("\n");
+
+	  const int d = regs.distance_to (prev_regs);
+	  if (d || new_curr_insn != insn)
+	    avr_dump (";; %d regs changed state:\n", d);
+
+	  if (new_curr_insn != insn)
+	    {
+	      avr_dump (";; Befor insn %d", INSN_UID (new_curr_insn));
+	      prev_regs.dump ();
+	    }
+
+	  avr_dump (";; After insn %d", INSN_UID (new_curr_insn));
+	  regs.dump ();
+	}
+    } // for BB insns
+}
+
+
+void
+bbinfo_t::optimize_one_function (function *func)
+{
+  bbinfo_t::fpd = XNEW (bbinfo_t::find_plies_data_t);
+  bbinfo_t::bb_info = XCNEWVEC (bbinfo_t, last_basic_block_for_fn (func));
+  int *post_order = XNEWVEC (int, n_basic_blocks_for_fn (func));
+
+  plies_t::max_n_plies = 0;
+
+  using elt0_getter_HRS = elt0_getter<HARD_REG_SET, HARD_REG_ELT_TYPE>;
+  memento_t::fixed_regs_mask = (gprmask_t) elt0_getter_HRS::get (fixed_reg_set);
+
+  // Option -mfuse-move=<0,23> provides a 3:2:2:2 mixed radix value:
+  // -mfuse-move= 0 1 2 3 4 5 6 7 8 9 10 1 2 3 4 5 6 7 8 9 20 1 2 3  Digit
+  // fuse           1   1   1   1   1    1   1   1   1   1    1   1      0
+  // bin_arg1         1 1     1 1      1 1     1 1     1 1      1 1      1
+  // split_any            1 1 1 1          1 1 1 1          1 1 1 1      2
+  // split_ldi                    1 1  1 1 1 1 1 1 1 1 1 1  1 1 1 1      3
+  // use arith                                     1 1 1 1  1 1 1 1      3
+
+  // Which optimization(s) to perform.
+  bbinfo_t::try_fuse_p = avropt_fuse_move & 0x1;      // Digit 0 in [0, 1].
+  bbinfo_t::try_mem0_p = avropt_fuse_move & 0x1;      // Digit 0 in [0, 1].
+  bbinfo_t::try_bin_arg1_p = avropt_fuse_move & 0x2;  // Digit 1 in [0, 1].
+  bbinfo_t::try_split_any_p = avropt_fuse_move & 0x4; // Digit 2 in [0, 1].
+  bbinfo_t::try_split_ldi_p = avropt_fuse_move >> 3;    // Digit 3 in [0, 2].
+  bbinfo_t::use_arith_p = (avropt_fuse_move >> 3) >= 2; // Digit 3 in [0, 2].
+  bbinfo_t::use_set_some_p = bbinfo_t::try_split_ldi_p; // Digit 3 in [0, 2].
+  bbinfo_t::try_simplify_p = avropt_fuse_move != 0;
+
+  // Topologically sort BBs from last to first.
+
+  const int n_post_order = post_order_compute (post_order, false, false);
+  bool changed = false;
+
+  // Traverse the BBs from first to last in order to increase the chance
+  // that register values from all incoming edges are known.
+
+  for (int n = n_post_order - 1; n >= 0; --n)
+    {
+      basic_block bb = BASIC_BLOCK_FOR_FN (func, post_order[n]);
+
+      bbinfo_t::bb_info[bb->index].bb = bb;
+      bbinfo_t::bb_info[bb->index].enter ();
+      bbinfo_t::bb_info[bb->index].optimize_one_block (changed);
+      bbinfo_t::bb_info[bb->index].leave ();
+    }
+
+  if (plies_t::max_n_plies)
+    avr_dump (";; max_n_plies=%d\n", (int) plies_t::max_n_plies);
+
+  if (changed)
+    {
+      df_note_add_problem ();
+      df_analyze ();
+    }
+
+  XDELETEVEC (post_order);
+  XDELETEVEC (bbinfo_t::bb_info);
+  XDELETE (bbinfo_t::fpd);
+}
+
+} // anonymous namespace
+
+
+namespace
+{
+
+
+//////////////////////////////////////////////////////////////////////////////
+// Try to replace 2 cbranch insns with 1 comparison and 2 branches.
+
+static const pass_data avr_pass_data_ifelse =
+{
+  RTL_PASS,      // type
+  "",            // name (will be patched)
+  OPTGROUP_NONE, // optinfo_flags
+  TV_DF_SCAN,    // tv_id
+  0,             // properties_required
+  0,             // properties_provided
+  0,             // properties_destroyed
+  0,             // todo_flags_start
+  TODO_df_finish | TODO_df_verify // todo_flags_finish
+};
+
+class avr_pass_ifelse : public rtl_opt_pass
+{
+public:
+  avr_pass_ifelse (gcc::context *ctxt, const char *name)
+    : rtl_opt_pass (avr_pass_data_ifelse, ctxt)
+  {
+    this->name = name;
+  }
+
+  bool gate (function *) final override
+  {
+    return optimize > 0;
+  }
+
+  unsigned int execute (function *func) final override;
+}; // avr_pass_ifelse
+
+
+/* Return TRUE iff comparison code CODE is explicitly signed.  */
+
+static bool
+avr_strict_signed_p (rtx_code code)
+{
+  return code == GT || code == GE || code == LT || code == LE;
+}
+
+
+/* Return TRUE iff comparison code CODE is explicitly unsigned.  */
+
+static bool
+avr_strict_unsigned_p (rtx_code code)
+{
+  return code == GTU || code == GEU || code == LTU || code == LEU;
+}
+
+#include "config/avr/ranges.h"
+
+/* Suppose the inputs represent a code like
+
+      if (x <CMP1> XVAL1)  goto way1;
+      if (x <CMP2> XVAL2)  goto way2;
+      way3:;
+
+   with two integer mode comparisons where XVAL1 and XVAL2 are CONST_INT.
+   When this can be rewritten in the form
+
+      if (x <cond1> xval)  goto way1;
+      if (x <cond2> xval)  goto way2;
+      way3:;
+
+  then set CMP1 = cond1, CMP2 = cond2, and return xval.  Else return NULL_RTX.
+  When SWAPT is returned true, then way1 and way2 must be swapped.
+  When the incomping SWAPT is false, the outgoing one will be false, too.  */
+
+static rtx
+avr_2comparisons_rhs (rtx_code &cmp1, rtx xval1,
+		      rtx_code &cmp2, rtx xval2,
+		      machine_mode mode, bool &swapt)
+{
+  const bool may_swapt = swapt;
+  swapt = false;
+
+  //////////////////////////////////////////////////////////////////
+  // Step 0: Decide about signedness, map xval1/2 to the range
+  //         of [un]signed machine mode.
+
+  const bool signed1_p = avr_strict_signed_p (cmp1);
+  const bool signed2_p = avr_strict_signed_p (cmp2);
+  const bool unsigned1_p = avr_strict_unsigned_p (cmp1);
+  const bool unsigned2_p = avr_strict_unsigned_p (cmp2);
+  const bool signed_p = signed1_p || signed2_p;
+  bool unsigned_p = unsigned1_p || unsigned2_p;
+
+  using T = Ranges::scalar_type;
+  T val1 = INTVAL (xval1);
+  T val2 = INTVAL (xval2);
+
+  if (signed_p + unsigned_p > 1)
+    {
+      // Don't go down that rabbit hole.  When the RHSs are the
+      // same, we can still save one comparison.
+      return val1 == val2 ? xval1 : NULL_RTX;
+    }
+
+  // Decide about signedness.  When no explicit signedness is present,
+  // then cases that are close to the unsigned boundary like  EQ 0, EQ 1
+  // can also be optimized.
+  if (unsigned_p
+      || (! signed_p && IN_RANGE (val1, -2, 2)))
+    {
+      unsigned_p = true;
+      val1 = UINTVAL (xval1) & GET_MODE_MASK (mode);
+      val2 = UINTVAL (xval2) & GET_MODE_MASK (mode);
+    }
+
+  // No way we can decompose the domain in a usable manner when the
+  // RHSes are too far apart.
+  if (! IN_RANGE (val1 - val2, -2, 2))
+    return NULL_RTX;
+
+  //////////////////////////////////////////////////////////////////
+  // Step 1: Represent the input conditions as truth Ranges.  This
+  //         establishes a decomposition / coloring of the domain.
+
+  Ranges dom = Ranges::NBitsRanges (GET_MODE_BITSIZE (mode), unsigned_p,
+				    Ranges::ALL);
+  Ranges r[4] = { dom, dom.truth (cmp1, val1), dom.truth (cmp2, val2), dom };
+
+  // r[1] shadows r[2] shadows r[3].  r[0] is just for nice indices.
+  r[3].minus (r[2]);
+  r[3].minus (r[1]);
+  r[2].minus (r[1]);
+
+  //////////////////////////////////////////////////////////////////
+  // Step 2: Filter for cases where the domain decomposes into three
+  //         intervals:  One to the left, one to the right, and one
+  //         in the middle where the latter holds exactly one value.
+
+  for (int i = 1; i <= 3; ++i)
+    {
+      // Keep track of which Ranges is which.
+      r[i].tag = i;
+
+      gcc_assert (r[i].check ());
+
+      // Filter for proper intervals.  Also return for the empty set,
+      // since cases where [m_min, m_max] decomposes into two intervals
+      // or less have been sorted out by the generic optimizers already,
+      // and hence should not be seen here.  And more than two intervals
+      // at a time cannot be optimized of course.
+      if (r[i].size () != 1)
+	return NULL_RTX;
+    }
+
+  // Bubble-sort the three intervals such that:
+  // [1] is the left interval, i.e. the one taken by LT[U].
+  // [2] is the middle interval, i.e. the one taken by EQ.
+  // [3] is the right interval, i.e. the one taken by GT[U].
+  Ranges::sort2 (r[1], r[3]);
+  Ranges::sort2 (r[2], r[3]);
+  Ranges::sort2 (r[1], r[2]);
+
+  if (dump_file)
+    fprintf (dump_file,
+	     ";; Decomposed: .%d=[%ld, %ld] .%d=[%ld, %ld] .%d=[%ld, %ld]\n",
+	     r[1].tag, (long) r[1].ranges[0].lo, (long) r[1].ranges[0].hi,
+	     r[2].tag, (long) r[2].ranges[0].lo, (long) r[2].ranges[0].hi,
+	     r[3].tag, (long) r[3].ranges[0].lo, (long) r[3].ranges[0].hi);
+
+  // EQ / NE can handle only one value.
+  if (r[2].cardinality (0) != 1)
+    return NULL_RTX;
+
+  // Success! This is the sought for xval.
+  const T val = r[2].ranges[0].lo;
+
+  //////////////////////////////////////////////////////////////////
+  // Step 3: Work out which label gets which condition, trying to
+  //         avoid the expensive codes GT[U] and LE[U] if possible.
+  //         Avoiding expensive codes is always possible when labels
+  //         way1 and way2 may be swapped.
+
+  // The xx1 ways have an expensive GT for cmp1 which can be avoided
+  // by swapping way1 with way2.
+  swapt = may_swapt && r[3].tag == 1;
+  if (swapt)
+    std::swap (r[3], r[2].tag == 2 ? r[2] : r[1]);
+
+  // 6 = 3! ways to assign LT, EQ, GT to the three labels.
+  const int way = 100 * r[1].tag + 10 * r[2].tag + r[3].tag;
+
+  if (dump_file)
+    fprintf (dump_file, ";; Success: unsigned=%d, swapt=%d, way=%d, rhs=%ld\n",
+	     unsigned_p, swapt, way, (long) val);
+
+#define WAY(w, c1, c2)					\
+  case w:						\
+    cmp1 = unsigned_p ? unsigned_condition (c1) : c1;	\
+    cmp2 = unsigned_p ? unsigned_condition (c2) : c2;	\
+    break;
+
+  switch (way)
+    {
+    default:
+      gcc_unreachable ();
+
+      // cmp1 gets the LT, avoid difficult branches for cmp2.
+      WAY (123, LT, EQ);
+      WAY (132, LT, NE);
+
+      // cmp1 gets the EQ, avoid difficult branches for cmp2.
+      WAY (213, EQ, LT);
+      WAY (312, EQ, GE);
+
+      // cmp1 gets the difficult GT, unavoidable as we may not swap way1/2.
+      WAY (231, GT, NE);
+      WAY (321, GT, EQ);
+    }
+
+#undef WAY
+
+  return gen_int_mode (val, mode);
+}
+
+
+/* A helper for the next method.  Suppose we have two conditional branches
+   with REG and CONST_INT operands
+
+      if (reg <cond1> xval1) goto label1;
+      if (reg <cond2> xval2) goto label2;
+
+   If the second comparison is redundant and there are codes <cmp1>
+   and <cmp2> such that the sequence can be performed as
+
+      REG_CC = compare (reg, xval);
+      if (REG_CC <cmp1> 0) goto label1;
+      if (REG_CC <cmp2> 0) goto label2;
+
+   then set COND1 to cmp1, COND2 to cmp2, SWAPT to true when the branch
+   targets have to be swapped, and return XVAL.  Otherwise, return NULL_RTX.
+   This function may clobber COND1 and COND2 even when it returns NULL_RTX.
+
+   REVERSE_COND1 can be set to reverse condition COND1.  This is useful
+   when the second comparison does not follow the first one, but is
+   located after label1 like in:
+
+      if (reg <cond1> xval1) goto label1;
+      ...
+      label1:
+      if (reg <cond2> xval2) goto label2;
+
+   In such a case we cannot swap the labels, and we may end up with a
+   difficult branch -- though one comparison can still be optimized out.
+   Getting rid of such difficult branches would require to reorder blocks. */
+
+static rtx
+avr_redundant_compare (rtx xreg1, rtx_code &cond1, rtx xval1,
+		       rtx xreg2, rtx_code &cond2, rtx xval2,
+		       bool reverse_cond1, bool &swapt)
+{
+  // Make sure we have two REG <cond> CONST_INT comparisons with the same reg.
+  if (! rtx_equal_p (xreg1, xreg2)
+      || ! CONST_INT_P (xval1)
+      || ! CONST_INT_P (xval2))
+    return NULL_RTX;
+
+  if (reverse_cond1)
+    cond1 = reverse_condition (cond1);
+
+  // Allow swapping label1 <-> label2 only when ! reverse_cond1.
+  swapt = ! reverse_cond1;
+  rtx_code c1 = cond1;
+  rtx_code c2 = cond2;
+  rtx xval = avr_2comparisons_rhs (c1, xval1,
+				   c2, xval2, GET_MODE (xreg1), swapt);
+  if (! xval)
+    return NULL_RTX;
+
+  if (dump_file)
+    {
+      rtx_code a1 = reverse_cond1 ? reverse_condition (cond1) : cond1;
+      rtx_code b1 = reverse_cond1 ? reverse_condition (c1) : c1;
+      const char *s_rev1 = reverse_cond1 ? " reverse_cond1" : "";
+      avr_dump (";; cond1: %C %r%s\n", a1, xval1, s_rev1);
+      avr_dump (";; cond2: %C %r\n", cond2, xval2);
+      avr_dump (";; => %C %d\n", b1, (int) INTVAL (xval));
+      avr_dump (";; => %C %d\n", c2, (int) INTVAL (xval));
+    }
+
+  cond1 = c1;
+  cond2 = c2;
+
+  return xval;
+}
+
+
+/* Similar to the function above, but assume that
+
+      if (xreg1 <cond1> xval1) goto label1;
+      if (xreg2 <cond2> xval2) goto label2;
+
+   are two subsequent REG-REG comparisons.  When this can be represented as
+
+      REG_CC = compare (reg, xval);
+      if (REG_CC <cmp1> 0) goto label1;
+      if (REG_CC <cmp2> 0) goto label2;
+
+   then set XREG1 to reg, COND1 and COND2 accordingly, and return xval.
+   Otherwise, return NULL_RTX.  This optmization can be performed
+   when { xreg1, xval1 } and { xreg2, xval2 } are equal as sets.
+   It can be done in such a way that no difficult branches occur.  */
+
+static rtx
+avr_redundant_compare_regs (rtx &xreg1, rtx_code &cond1, rtx &xval1,
+			    rtx &xreg2, rtx_code &cond2, rtx &xval2,
+			    bool reverse_cond1)
+{
+  bool swapped;
+
+  if (! REG_P (xval1))
+    return NULL_RTX;
+  else if (rtx_equal_p (xreg1, xreg2)
+	   && rtx_equal_p (xval1, xval2))
+    swapped = false;
+  else if (rtx_equal_p (xreg1, xval2)
+	   && rtx_equal_p (xreg2, xval1))
+    swapped = true;
+  else
+    return NULL_RTX;
+
+  // Found a redundant REG-REG comparison.  Assume that the incoming
+  // representation has been canonicalized by CANONICALIZE_COMPARISON.
+  // We can always represent this using only one comparison and in such
+  // a way that no difficult branches are required.
+
+  if (dump_file)
+    {
+      const char *s_rev1 = reverse_cond1 ? " reverse_cond1" : "";
+      avr_dump (";; %r %C %r%s\n", xreg1, cond1, xval1, s_rev1);
+      avr_dump (";; %r %C %r\n", xreg2, cond2, xval2);
+    }
+
+  if (reverse_cond1)
+    cond1 = reverse_condition (cond1);
+
+  if (swapped)
+    {
+      if (cond1 == EQ || cond1 == NE)
+	{
+	  avr_dump (";; case #21\n");
+	  std::swap (xreg1, xval1);
+	}
+      else
+	{
+	  std::swap (xreg2, xval2);
+	  cond2 = swap_condition (cond2);
+
+	  // The swap may have introduced a difficult comparison.
+	  // In order to get of it, only a few cases need extra care.
+	  if ((cond1 == LT && cond2 == GT)
+	      || (cond1 == LTU && cond2 == GTU))
+	    {
+	      avr_dump (";; case #22\n");
+	      cond2 = NE;
+	    }
+	  else
+	    avr_dump (";; case #23\n");
+	}
+    }
+  else
+    avr_dump (";; case #20\n");
+
+  return xval1;
+}
+
+
+/* INSN1 and INSN2 are two cbranch insns for the same integer mode.
+   When FOLLOW_LABEL1 is false, then INSN2 is located in the fallthrough
+   path of INSN1.  When FOLLOW_LABEL1 is true, then INSN2 is located at
+   the true edge of INSN1, INSN2 is preceded by a barrier, and no other
+   edge leads to the basic block of INSN2.
+
+   Try to replace INSN1 and INSN2 by a compare insn and two branch insns.
+   When such a replacement has been performed, then return the insn where the
+   caller should continue scanning the insn stream.  Else, return nullptr.  */
+
+static rtx_insn *
+avr_optimize_2ifelse (rtx_jump_insn *insn1,
+		      rtx_jump_insn *insn2, bool follow_label1)
+{
+  avr_dump (";; Investigating jump_insn %d and jump_insn %d.\n",
+	    INSN_UID (insn1), INSN_UID (insn2));
+
+  // Extract the operands of the insns:
+  // $0 = comparison operator ($1, $2)
+  // $1 = reg
+  // $2 = reg or const_int
+  // $3 = code_label
+  // $4 = optional SCRATCH for HI, PSI, SI cases.
+
+  const auto &op = recog_data.operand;
+
+  extract_insn (insn1);
+  rtx xop1[5] = { op[0], op[1], op[2], op[3], op[4] };
+  int n_operands = recog_data.n_operands;
+
+  extract_insn (insn2);
+  rtx xop2[5] = { op[0], op[1], op[2], op[3], op[4] };
+
+  rtx_code code1 = GET_CODE (xop1[0]);
+  rtx_code code2 = GET_CODE (xop2[0]);
+  bool swap_targets = false;
+
+  // Search redundant REG-REG comparison.
+  rtx xval = avr_redundant_compare_regs (xop1[1], code1, xop1[2],
+					 xop2[1], code2, xop2[2],
+					 follow_label1);
+
+  // Search redundant REG-CONST_INT comparison.
+  if (! xval)
+    xval = avr_redundant_compare (xop1[1], code1, xop1[2],
+				  xop2[1], code2, xop2[2],
+				  follow_label1, swap_targets);
+  if (! xval)
+    {
+      avr_dump (";; Nothing found for jump_insn %d and jump_insn %d.\n",
+		INSN_UID (insn1), INSN_UID (insn2));
+      return nullptr;
+    }
+
+  if (follow_label1)
+    code1 = reverse_condition (code1);
+
+  //////////////////////////////////////////////////////
+  // Found a replacement.
+
+  if (dump_file)
+    {
+      avr_dump (";; => %C %r\n", code1, xval);
+      avr_dump (";; => %C %r\n", code2, xval);
+
+      fprintf (dump_file, "\n;; Found chain of jump_insn %d and"
+	       " jump_insn %d, follow_label1=%d:\n",
+	       INSN_UID (insn1), INSN_UID (insn2), follow_label1);
+      print_rtl_single (dump_file, PATTERN (insn1));
+      print_rtl_single (dump_file, PATTERN (insn2));
+    }
+
+  rtx_insn *next_insn
+    = next_nonnote_nondebug_insn (follow_label1 ? insn1 : insn2);
+
+  // Pop the new branch conditions and the new comparison.
+  // Prematurely split into compare + branch so that we can drop
+  // the 2nd comparison.  The following pass, split2, splits all
+  // insns for REG_CC, and it should still work as usual even when
+  // there are already some REG_CC insns around.
+
+  rtx xcond1 = gen_rtx_fmt_ee (code1, VOIDmode, cc_reg_rtx, const0_rtx);
+  rtx xcond2 = gen_rtx_fmt_ee (code2, VOIDmode, cc_reg_rtx, const0_rtx);
+  rtx xpat1 = gen_branch (xop1[3], xcond1);
+  rtx xpat2 = gen_branch (xop2[3], xcond2);
+  rtx xcompare = NULL_RTX;
+  machine_mode mode = GET_MODE (xop1[1]);
+
+  if (mode == QImode)
+    {
+      gcc_assert (n_operands == 4);
+      xcompare = gen_cmpqi3 (xop1[1], xval);
+    }
+  else
+    {
+      gcc_assert (n_operands == 5);
+      rtx scratch = GET_CODE (xop1[4]) == SCRATCH ? xop2[4] : xop1[4];
+      rtx (*gen_cmp)(rtx,rtx,rtx)
+	= mode == HImode  ? gen_gen_comparehi
+	: mode == PSImode ? gen_gen_comparepsi
+	: gen_gen_comparesi; // SImode
+      xcompare = gen_cmp (xop1[1], xval, scratch);
+    }
+
+  // Emit that stuff.
+
+  rtx_insn *cmp = emit_insn_before (xcompare, insn1);
+  rtx_jump_insn *branch1 = emit_jump_insn_after (xpat1, insn1);
+  rtx_jump_insn *branch2 = emit_jump_insn_after (xpat2, insn2);
+
+  JUMP_LABEL (branch1) = xop1[3];
+  JUMP_LABEL (branch2) = xop2[3];
+  // delete_insn() decrements LABEL_NUSES when deleting a JUMP_INSN,
+  // but when we pop a new JUMP_INSN, do it by hand.
+  ++LABEL_NUSES (xop1[3]);
+  ++LABEL_NUSES (xop2[3]);
+
+  delete_insn (insn1);
+  delete_insn (insn2);
+
+  if (swap_targets)
+    {
+      gcc_assert (! follow_label1);
+
+      basic_block to1 = BLOCK_FOR_INSN (xop1[3]);
+      basic_block to2 = BLOCK_FOR_INSN (xop2[3]);
+      edge e1 = find_edge (BLOCK_FOR_INSN (branch1), to1);
+      edge e2 = find_edge (BLOCK_FOR_INSN (branch2), to2);
+      gcc_assert (e1);
+      gcc_assert (e2);
+      redirect_edge_and_branch (e1, to2);
+      redirect_edge_and_branch (e2, to1);
+    }
+
+  // As a side effect, also recog the new insns.
+  gcc_assert (valid_insn_p (cmp));
+  gcc_assert (valid_insn_p (branch1));
+  gcc_assert (valid_insn_p (branch2));
+
+  return next_insn;
+}
+
+
+/* Sequences like
+
+      SREG = compare (reg, 1 + val);
+	  if (SREG >= 0)  goto label1;
+      SREG = compare (reg, val);
+	  if (SREG == 0)  goto label2;
+
+   can be optimized to
+
+      SREG = compare (reg, val);
+	  if (SREG == 0)  goto label2;
+	  if (SREG >= 0)  goto label1;
+
+   Almost all cases where one of the comparisons is redundant can
+   be transformed in such a way that only one comparison is required
+   and no difficult branches are needed.  */
+
+unsigned int
+avr_pass_ifelse::execute (function *)
+{
+  rtx_insn *next_insn;
+
+  for (rtx_insn *insn = get_insns (); insn; insn = next_insn)
+    {
+      next_insn = next_nonnote_nondebug_insn (insn);
+
+      if (! next_insn)
+	break;
+
+      // Search for two cbranch insns.  The first one is a cbranch.
+      // Filter for "cbranch<mode>4_insn" with mode in QI, HI, PSI, SI.
+
+      if (! JUMP_P (insn))
+	continue;
+
+      int icode1 = recog_memoized (insn);
+
+      if (icode1 != CODE_FOR_cbranchqi4_insn
+	  && icode1 != CODE_FOR_cbranchhi4_insn
+	  && icode1 != CODE_FOR_cbranchpsi4_insn
+	  && icode1 != CODE_FOR_cbranchsi4_insn)
+	continue;
+
+      rtx_jump_insn *insn1 = as_a<rtx_jump_insn *> (insn);
+
+      // jmp[0]: We can optimize cbranches that follow cbranch insn1.
+      rtx_insn *jmp[2] = { next_insn, nullptr };
+
+      // jmp[1]: A cbranch following the label of cbranch insn1.
+      if (LABEL_NUSES (JUMP_LABEL (insn1)) == 1)
+	{
+	  rtx_insn *code_label1 = JUMP_LABEL_AS_INSN (insn1);
+	  rtx_insn *barrier = prev_nonnote_nondebug_insn (code_label1);
+
+	  // When the target label of insn1 is used exactly once and is
+	  // not a fallthrough, i.e. is preceded by a barrier, then
+	  // consider the insn following that label.
+	  if (barrier && BARRIER_P (barrier))
+	    jmp[1] = next_nonnote_nondebug_insn (code_label1);
+      }
+
+      // With almost certainty, only one of the two possible jumps can
+      // be optimized with insn1, but it's hard to tell which one a priori.
+      // Just try both.  In the unlikely case where both could be optimized,
+      // prefer jmp[0] because eliminating difficult branches is impeded
+      // by following label1.
+
+      for (int j = 0; j < 2; ++j)
+	if (jmp[j] && JUMP_P (jmp[j])
+	    && recog_memoized (jmp[j]) == icode1)
+	  {
+	    rtx_insn *next
+	      = avr_optimize_2ifelse (insn1, as_a<rtx_jump_insn *> (jmp[j]),
+				      j == 1 /* follow_label1 */);
+	    if (next)
+	      {
+		next_insn = next;
+		break;
+	      }
+	  }
+
+    } // loop insns
+
+  return 0;
+}
+
+
+
+//////////////////////////////////////////////////////////////////////////////
+// Optimize results of the casesi expander for modes < SImode.
+
+static const pass_data avr_pass_data_casesi =
+{
+  RTL_PASS,      // type
+  "",            // name (will be patched)
+  OPTGROUP_NONE, // optinfo_flags
+  TV_DF_SCAN,    // tv_id
+  0,             // properties_required
+  0,             // properties_provided
+  0,             // properties_destroyed
+  0,             // todo_flags_start
+  0              // todo_flags_finish
+};
+
+class avr_pass_casesi : public rtl_opt_pass
+{
+public:
+  avr_pass_casesi (gcc::context *ctxt, const char *name)
+    : rtl_opt_pass (avr_pass_data_casesi, ctxt)
+  {
+    this->name = name;
+  }
+
+  bool gate (function *) final override
+  {
+    return optimize > 0;
+  }
+
+  unsigned int execute (function *) final override;
+}; // avr_pass_casesi
+
+
+/* Make one parallel insn with all the patterns from insns i[0]..i[5].  */
+
+static rtx_insn *
+avr_parallel_insn_from_insns (rtx_insn *i[5])
+{
+  rtvec vec = gen_rtvec (5, PATTERN (i[0]), PATTERN (i[1]), PATTERN (i[2]),
+			 PATTERN (i[3]), PATTERN (i[4]));
+  start_sequence ();
+  emit (gen_rtx_PARALLEL (VOIDmode, vec));
+  return end_sequence ();
+}
+
+
+/* Return true if we see an insn stream generated by casesi expander together
+   with an extension to SImode of the switch value.
+
+   If this is the case, fill in the insns from casesi to INSNS[1..5] and
+   the SImode extension to INSNS[0].  Moreover, extract the operands of
+   pattern casesi_<mode>_sequence forged from the sequence to recog_data.  */
+
+static bool
+avr_is_casesi_sequence (basic_block bb, rtx_insn *insn, rtx_insn *insns[5])
+{
+  rtx set_4, set_0;
+
+  /* A first and quick test for a casesi sequences.  As a side effect of
+     the test, harvest respective insns to INSNS[0..4].  */
+
+  if (!(JUMP_P (insns[4] = insn)
+	// casesi is the only insn that comes up with UNSPEC_INDEX_JMP,
+	// hence the following test ensures that we are actually dealing
+	// with code from casesi.
+	&& (set_4 = single_set (insns[4]))
+	&& UNSPEC == GET_CODE (SET_SRC (set_4))
+	&& UNSPEC_INDEX_JMP == XINT (SET_SRC (set_4), 1)
+
+	&& (insns[3] = prev_real_insn (insns[4]))
+	&& (insns[2] = prev_real_insn (insns[3]))
+	&& (insns[1] = prev_real_insn (insns[2]))
+
+	// Insn prior to casesi.
+	&& (insns[0] = prev_real_insn (insns[1]))
+	&& (set_0 = single_set (insns[0]))
+	&& extend_operator (SET_SRC (set_0), SImode)))
+    {
+      return false;
+    }
+
+  if (dump_file)
+    {
+      fprintf (dump_file, ";; Sequence from casesi in "
+	       "[bb %d]:\n\n", bb->index);
+      for (int i = 0; i < 5; i++)
+	print_rtl_single (dump_file, insns[i]);
+    }
+
+  /* We have to deal with quite some operands.  Extracting them by hand
+     would be tedious, therefore wrap the insn patterns into a parallel,
+     run recog against it and then use insn extract to get the operands. */
+
+  rtx_insn *xinsn = avr_parallel_insn_from_insns (insns);
+
+  INSN_CODE (xinsn) = recog (PATTERN (xinsn), xinsn, NULL /* num_clobbers */);
+
+  /* Failing to recognize means that someone changed the casesi expander or
+     that some passes prior to this one performed some unexpected changes.
+     Gracefully drop such situations instead of aborting.  */
+
+  if (INSN_CODE (xinsn) < 0)
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; Sequence not recognized, giving up.\n\n");
+
+      return false;
+    }
+
+  gcc_assert (CODE_FOR_casesi_qi_sequence == INSN_CODE (xinsn)
+	      || CODE_FOR_casesi_hi_sequence == INSN_CODE (xinsn));
+
+  extract_insn (xinsn);
+
+  // Assert on the anatomy of xinsn's operands we are going to work with.
+
+  gcc_assert (recog_data.n_operands == 12);
+  gcc_assert (recog_data.n_dups == 3);
+
+  if (dump_file)
+    {
+      fprintf (dump_file, ";; Operands extracted:\n");
+      for (int i = 0; i < recog_data.n_operands; i++)
+	avr_fdump (dump_file, ";; $%d = %r\n", i, recog_data.operand[i]);
+      fprintf (dump_file, "\n");
+    }
+
+  return true;
+}
+
+
+/* INSNS[1..4] is a sequence as generated by casesi and INSNS[0] is an
+   extension of an 8-bit or 16-bit integer to SImode.  XOP contains the
+   operands of INSNS as extracted by insn_extract from pattern
+   casesi_<mode>_sequence:
+
+      $0: SImode reg switch value as result of $10.
+      $1: Negative of smallest index in switch.
+      $2: Number of entries in switch.
+      $3: Label to table.
+      $4: Label if out-of-bounds.
+      $5: $0 + $1.
+      $6: 3-byte PC: subreg:HI ($5) + label_ref ($3)
+	  2-byte PC: subreg:HI ($5)
+      $7: HI reg index into table (Z or pseudo)
+      $8: Z or scratch:HI (to be clobbered)
+      $9: R24 or const0_rtx (to be clobbered)
+      $10: Extension to SImode of an 8-bit or 16-bit integer register $11.
+      $11: QImode or HImode register input of $10.
+
+   Try to optimize this sequence, i.e. use the original HImode / QImode
+   switch value instead of SImode.  */
+
+static void
+avr_optimize_casesi (rtx_insn *insns[5], rtx *xop)
+{
+  // Original mode of the switch value; this is QImode or HImode.
+  machine_mode mode = GET_MODE (xop[11]);
+
+  // How the original switch value was extended to SImode; this is
+  // SIGN_EXTEND or ZERO_EXTEND.
+  rtx_code code = GET_CODE (xop[10]);
+
+  // Lower index, upper index (plus one) and range of case calues.
+  HOST_WIDE_INT low_idx = -INTVAL (xop[1]);
+  HOST_WIDE_INT num_idx = INTVAL (xop[2]);
+  HOST_WIDE_INT hig_idx = low_idx + num_idx;
+
+  // Maximum ranges of (un)signed QImode resp. HImode.
+  unsigned umax = QImode == mode ? 0xff : 0xffff;
+  int imax = QImode == mode ? 0x7f : 0x7fff;
+  int imin = -imax - 1;
+
+  // Testing the case range and whether it fits into the range of the
+  // (un)signed mode.  This test should actually always pass because it
+  // makes no sense to have case values outside the mode range.  Notice
+  // that case labels which are unreachable because they are outside the
+  // mode of the switch value (e.g. "case -1" for uint8_t) have already
+  // been thrown away by the middle-end.
+
+  if (SIGN_EXTEND == code
+      && low_idx >= imin
+      && hig_idx <= imax)
+    {
+      // ok
+    }
+  else if (ZERO_EXTEND == code
+	   && low_idx >= 0
+	   && (unsigned) hig_idx <= umax)
+    {
+      // ok
+    }
+  else
+    {
+      if (dump_file)
+	fprintf (dump_file, ";; Case ranges too big, giving up.\n\n");
+      return;
+    }
+
+  // Do normalization of switch value $10 and out-of-bound check in its
+  // original mode instead of in SImode.  Use a newly created pseudo.
+  // This will replace insns[1..2].
+
+  start_sequence ();
+
+  rtx reg = copy_to_mode_reg (mode, xop[11]);
+
+  rtx (*gen_add)(rtx,rtx,rtx) = QImode == mode ? gen_addqi3 : gen_addhi3;
+  rtx (*gen_cbranch)(rtx,rtx,rtx,rtx)
+    = QImode == mode ? gen_cbranchqi4 : gen_cbranchhi4;
+
+  emit_insn (gen_add (reg, reg, gen_int_mode (-low_idx, mode)));
+  rtx op0 = reg; rtx op1 = gen_int_mode (num_idx, mode);
+  rtx labelref = copy_rtx (xop[4]);
+  rtx xbranch = gen_cbranch (gen_rtx_fmt_ee (GTU, VOIDmode, op0, op1),
+			     op0, op1, labelref);
+  rtx_insn *cbranch = emit_jump_insn (xbranch);
+  JUMP_LABEL (cbranch) = xop[4];
+  ++LABEL_NUSES (xop[4]);
+
+  rtx_insn *seq1 = get_insns ();
+  rtx_insn *last1 = get_last_insn ();
+  end_sequence ();
+
+  emit_insn_after (seq1, insns[2]);
+
+  // After the out-of-bounds test and corresponding branch, use a
+  // 16-bit index.  If QImode is used, extend it to HImode first.
+  // This will replace insns[4].
+
+  start_sequence ();
+
+  if (QImode == mode)
+    reg = force_reg (HImode, gen_rtx_fmt_e (code, HImode, reg));
+
+  rtx pat_4 = AVR_3_BYTE_PC
+    ? gen_movhi (xop[7], reg)
+    : gen_addhi3 (xop[7], reg, gen_rtx_LABEL_REF (VOIDmode, xop[3]));
+
+  emit_insn (pat_4);
+
+  rtx_insn *seq2 = get_insns ();
+  rtx_insn *last2 = get_last_insn ();
+  end_sequence ();
+
+  emit_insn_after (seq2, insns[3]);
+
+  if (dump_file)
+    {
+      fprintf (dump_file, ";; New insns: ");
+
+      for (rtx_insn *insn = seq1; ; insn = NEXT_INSN (insn))
+	{
+	  fprintf (dump_file, "%d, ", INSN_UID (insn));
+	  if (insn == last1)
+	    break;
+	}
+      for (rtx_insn *insn = seq2; ; insn = NEXT_INSN (insn))
+	{
+	  fprintf (dump_file, "%d%s", INSN_UID (insn),
+		   insn == last2 ? ".\n\n" : ", ");
+	  if (insn == last2)
+	    break;
+	}
+
+      fprintf (dump_file, ";; Deleting insns: %d, %d, %d.\n\n",
+	       INSN_UID (insns[1]), INSN_UID (insns[2]), INSN_UID (insns[3]));
+    }
+
+  // Pseudodelete the SImode and subreg of SImode insns.  We don't care
+  // about the extension insns[0]: Its result is now unused and other
+  // passes will clean it up.
+
+  SET_INSN_DELETED (insns[1]);
+  SET_INSN_DELETED (insns[2]);
+  SET_INSN_DELETED (insns[3]);
+}
+
+
+unsigned int
+avr_pass_casesi::execute (function *func)
+{
+  basic_block bb;
+
+  FOR_EACH_BB_FN (bb, func)
+    {
+      rtx_insn *insn, *insns[5];
+
+      FOR_BB_INSNS (bb, insn)
+	{
+	  if (avr_is_casesi_sequence (bb, insn, insns))
+	    {
+	      avr_optimize_casesi (insns, recog_data.operand);
+	    }
+	}
+    }
+
+  return 0;
+}
+
+} // anonymous namespace
+
+/* Perform some extra checks on operands of casesi_<mode>_sequence.
+   Not all operand dependencies can be described by means of predicates.
+   This function performs left over checks and should always return true.
+   Returning false means that someone changed the casesi expander but did
+   not adjust casesi_<mode>_sequence.  */
+
+bool
+avr_casei_sequence_check_operands (rtx *xop)
+{
+  rtx sub_5 = NULL_RTX;
+
+  if (AVR_HAVE_EIJMP_EICALL
+      // The last clobber op of the tablejump.
+      && xop[9] == all_regs_rtx[REG_24])
+    {
+      // $6 is: (subreg:SI ($5) 0)
+      sub_5 = xop[6];
+    }
+
+  if (!AVR_HAVE_EIJMP_EICALL
+      // $6 is: (plus:HI (subreg:SI ($5) 0)
+      //		 (label_ref ($3)))
+      && PLUS == GET_CODE (xop[6])
+      && LABEL_REF == GET_CODE (XEXP (xop[6], 1))
+      && rtx_equal_p (xop[3], XEXP (XEXP (xop[6], 1), 0))
+      // The last clobber op of the tablejump.
+      && xop[9] == const0_rtx)
+    {
+      sub_5 = XEXP (xop[6], 0);
+    }
+
+  if (sub_5
+      && SUBREG_P (sub_5)
+      && SUBREG_BYTE (sub_5) == 0
+      && rtx_equal_p (xop[5], SUBREG_REG (sub_5)))
+    return true;
+
+  if (dump_file)
+    fprintf (dump_file, "\n;; Failed condition for casesi_<mode>_sequence\n\n");
+
+  return false;
+}
+
+namespace
+{
+
+
+//////////////////////////////////////////////////////////////////////////////
+// Find more POST_INC and PRE_DEC cases.
+
+static const pass_data avr_pass_data_fuse_add =
+{
+  RTL_PASS,	    // type
+  "",		    // name (will be patched)
+  OPTGROUP_NONE,    // optinfo_flags
+  TV_MACH_DEP,	    // tv_id
+  0,		    // properties_required
+  0,		    // properties_provided
+  0,		    // properties_destroyed
+  0,		    // todo_flags_start
+  TODO_df_finish    // todo_flags_finish
+};
+
+class avr_pass_fuse_add : public rtl_opt_pass
+{
+public:
+  avr_pass_fuse_add (gcc::context *ctxt, const char *name)
+    : rtl_opt_pass (avr_pass_data_fuse_add, ctxt)
+  {
+    this->name = name;
+  }
+
+  // Cloning is required because we are running one instance of the pass
+  // before peephole2. and a second one after cprop_hardreg.
+  opt_pass * clone () final override
+  {
+    return make_avr_pass_fuse_add (m_ctxt);
+  }
+
+  unsigned int execute (function *func) final override
+  {
+    func->machine->n_avr_fuse_add_executed += 1;
+    n_avr_fuse_add_executed = func->machine->n_avr_fuse_add_executed;
+
+    if (optimize && avropt_fuse_add > 0)
+      return execute1 (func);
+    return 0;
+  }
+
+  unsigned int execute1 (function *);
+
+  struct Some_Insn
+  {
+    rtx_insn *insn = nullptr;
+    rtx dest, src;
+    bool valid () const { return insn != nullptr; }
+    void set_deleted ()
+    {
+      gcc_assert (insn);
+      SET_INSN_DELETED (insn);
+      insn = nullptr;
+    }
+  };
+
+  // If .insn is not NULL, then this is a  reg:HI += const_int
+  // of an address register.
+  struct Add_Insn : Some_Insn
+  {
+    rtx addend;
+    int regno;
+    Add_Insn () {}
+    Add_Insn (rtx_insn *insn);
+  };
+
+  // If .insn is not NULL, then this sets an address register
+  // to a constant value.
+  struct Ldi_Insn : Some_Insn
+  {
+    int regno;
+    Ldi_Insn () {}
+    Ldi_Insn (rtx_insn *insn);
+  };
+
+  // If .insn is not NULL, then this is a load or store insn where the
+  // address is REG or POST_INC with an address register.
+  struct Mem_Insn : Some_Insn
+  {
+    rtx reg_or_0, mem, addr, addr_reg;
+    int addr_regno;
+    rtx_code addr_code;
+    machine_mode mode;
+    addr_space_t addr_space;
+    bool store_p, volatile_p;
+    Mem_Insn () {}
+    Mem_Insn (rtx_insn *insn);
+  };
+
+  rtx_insn *fuse_ldi_add (Ldi_Insn &prev_ldi, Add_Insn &add);
+  rtx_insn *fuse_add_add (Add_Insn &prev_add, Add_Insn &add);
+  rtx_insn *fuse_add_mem (Add_Insn &prev_add, Mem_Insn &mem);
+  rtx_insn *fuse_mem_add (Mem_Insn &prev_mem, Add_Insn &add);
+}; // avr_pass_fuse_add
+
+
+/* Describe properties of AVR's indirect load and store instructions
+   LD, LDD, ST, STD, LPM, ELPM depending on register number, volatility etc.
+   Rules for "volatile" accesses are:
+
+	 | Xmega	   |  non-Xmega
+   ------+-----------------+----------------
+   load  | read LSB first  | read LSB first
+   store | write LSB first | write MSB first
+*/
+
+struct AVR_LdSt_Props
+{
+  bool has_postinc, has_predec, has_ldd;
+  // The insn printers will use POST_INC or PRE_DEC addressing, no matter
+  // what adressing modes we are feeding into them.
+  bool want_postinc, want_predec;
+
+  AVR_LdSt_Props (int regno, bool store_p, bool volatile_p, addr_space_t as)
+  {
+    bool generic_p = ADDR_SPACE_GENERIC_P (as);
+    bool flashx_p = (! generic_p
+		     && as != ADDR_SPACE_MEMX && as != ADDR_SPACE_FLASHX);
+    has_postinc = generic_p || (flashx_p && regno == REG_Z);
+    has_predec = generic_p;
+    has_ldd = ! AVR_TINY && generic_p && (regno == REG_Y || regno == REG_Z);
+    want_predec  = volatile_p && generic_p && ! AVR_XMEGA && store_p;
+    want_postinc = volatile_p && generic_p && (AVR_XMEGA || ! store_p);
+    want_postinc |= flashx_p && regno == REG_Z;
+  }
+
+  AVR_LdSt_Props (const avr_pass_fuse_add::Mem_Insn &m)
+    : AVR_LdSt_Props (m.addr_regno, m.store_p, m.volatile_p, m.addr_space)
+  {
+    gcc_assert (m.valid ());
+  }
+};
+
+
+/* Emit a single_set that clobbers REG_CC.  */
+
+static rtx_insn *
+emit_move_ccc (rtx dest, rtx src)
+{
+  return emit_insn (gen_gen_move_clobbercc (dest, src));
+}
+
+
+/* Emit a single_set that clobbers REG_CC after insn AFTER.  */
+
+static rtx_insn *
+emit_move_ccc_after (rtx dest, rtx src, rtx_insn *after)
+{
+  return emit_insn_after (gen_gen_move_clobbercc (dest, src), after);
+}
+
+static bool
+reg_seen_between_p (const_rtx reg, const rtx_insn *from, const rtx_insn *to)
+{
+  return (reg_used_between_p (reg, from, to)
+	  || reg_set_between_p (reg, from, to));
+}
+
+
+static void
+avr_maybe_adjust_cfa (rtx_insn *insn, rtx reg, int addend)
+{
+  if (addend
+      && frame_pointer_needed
+      && REGNO (reg) == FRAME_POINTER_REGNUM
+      && avropt_fuse_add == 3)
+    {
+      rtx plus = plus_constant (Pmode, reg, addend);
+      RTX_FRAME_RELATED_P (insn) = 1;
+      add_reg_note (insn, REG_CFA_ADJUST_CFA, gen_rtx_SET (reg, plus));
+    }
+}
+
+
+// If successful, this represents a SET of a pointer register to a constant.
+avr_pass_fuse_add::Ldi_Insn::Ldi_Insn (rtx_insn *insn)
+{
+  rtx set = single_set (insn);
+  if (!set)
+    return;
+
+  src = SET_SRC (set);
+  dest = SET_DEST (set);
+
+  if (REG_P (dest)
+      && GET_MODE (dest) == Pmode
+      && IN_RANGE (regno = REGNO (dest), REG_X, REG_Z)
+      && CONSTANT_P (src))
+    {
+      this->insn = insn;
+    }
+}
+
+// If successful, this represents a PLUS with CONST_INT of a pointer
+// register X, Y or Z.  Otherwise, the object is not valid().
+avr_pass_fuse_add::Add_Insn::Add_Insn (rtx_insn *insn)
+{
+  rtx set = single_set (insn);
+  if (!set)
+    return;
+
+  src = SET_SRC (set);
+  dest = SET_DEST (set);
+  if (REG_P (dest)
+      // We are only interested in PLUSes that change address regs.
+      && GET_MODE (dest) == Pmode
+      && IN_RANGE (regno = REGNO (dest), REG_X, REG_Z)
+      && PLUS == GET_CODE (src)
+      && rtx_equal_p (XEXP (src, 0), dest)
+      && CONST_INT_P (XEXP (src, 1)))
+    {
+      // This is reg:HI += const_int.
+      addend = XEXP (src, 1);
+      this->insn = insn;
+    }
+}
+
+// If successful, this represents a load or store insn where the addressing
+// mode uses pointer register X, Y or Z.  Otherwise, the object is not valid().
+avr_pass_fuse_add::avr_pass_fuse_add::Mem_Insn::Mem_Insn (rtx_insn *insn)
+{
+  rtx set = single_set (insn);
+  if (!set)
+    return;
+
+  src = SET_SRC (set);
+  dest = SET_DEST (set);
+  mode = GET_MODE (dest);
+
+  if (MEM_P (dest)
+      && (REG_P (src) || src == CONST0_RTX (mode)))
+    {
+      reg_or_0 = src;
+      mem = dest;
+    }
+  else if (REG_P (dest) && MEM_P (src))
+    {
+      reg_or_0 = dest;
+      mem = src;
+    }
+  else
+    return;
+
+  if (avr_mem_memx_p (mem)
+      || avr_load_libgcc_p (mem))
+    return;
+
+  addr = XEXP (mem, 0);
+  addr_code = GET_CODE (addr);
+
+  if (addr_code == REG)
+    addr_reg = addr;
+  else if (addr_code == POST_INC || addr_code == PRE_DEC)
+    addr_reg = XEXP (addr, 0);
+  else
+    return;
+
+  addr_regno = REGNO (addr_reg);
+
+  if (avropt_fuse_add == 2
+      && frame_pointer_needed
+      && addr_regno == FRAME_POINTER_REGNUM)
+    MEM_VOLATILE_P (mem) = 0;
+
+  if (reg_overlap_mentioned_p (reg_or_0, addr) // Can handle CONSTANT_P.
+      || addr_regno > REG_Z
+      || avr_mem_memx_p (mem)
+      // The following optimizations only handle REG and POST_INC,
+      // so that's all what we allow here.
+      || (addr_code != REG && addr_code != POST_INC))
+    return;
+
+  addr_space = MEM_ADDR_SPACE (mem);
+  volatile_p = MEM_VOLATILE_P (mem);
+  store_p = MEM_P (dest);
+
+  // Turn this "valid".
+  this->insn = insn;
+}
+
+/* Try to combine a Ldi insn with a PLUS CONST_INT addend to one Ldi insn.
+   If LDI is valid, then it precedes ADD in the same block.
+   When a replacement is found, a new insn is emitted and the old insns
+   are pseudo-deleted.  The returned insn is the point where the calling
+   scanner should continue.  When no replacement is found, nullptr is
+   returned and nothing changed.  */
+
+rtx_insn *
+avr_pass_fuse_add::fuse_ldi_add (Ldi_Insn &ldi, Add_Insn &add)
+{
+  if (! ldi.valid ()
+      || reg_seen_between_p (ldi.dest, ldi.insn, add.insn))
+    {
+      // If something is between the Ldi and the current insn, we can
+      // set the Ldi invalid to speed future scans.
+      return ldi.insn = nullptr;
+    }
+
+  // Found a Ldi with const and a PLUS insns in the same BB,
+  // and with no interfering insns between them.
+
+  // Emit new Ldi with the sum of the original offsets after the old Ldi.
+  rtx xval = plus_constant (Pmode, ldi.src, INTVAL (add.addend));
+
+  rtx_insn *insn = emit_move_ccc_after (ldi.dest, xval, ldi.insn);
+  avr_dump (";; new Ldi[%d] insn %d after %d: R%d = %r\n\n", ldi.regno,
+	    INSN_UID (insn), INSN_UID (ldi.insn), ldi.regno, xval);
+
+  rtx_insn *next = NEXT_INSN (add.insn);
+  ldi.set_deleted ();
+  add.set_deleted ();
+
+  return next;
+}
+
+/* Try to combine two PLUS insns with CONST_INT addend to one such insn.
+   If PREV_ADD is valid, then it precedes ADD in the same basic block.
+   When a replacement is found, a new insn is emitted and the old insns
+   are pseudo-deleted.  The returned insn is the point where the calling
+   scanner should continue.  When no replacement is found, nullptr is
+   returned and nothing changed.  */
+
+rtx_insn *
+avr_pass_fuse_add::fuse_add_add (Add_Insn &prev_add, Add_Insn &add)
+{
+  if (! prev_add.valid ()
+      || reg_seen_between_p (add.dest, prev_add.insn, add.insn))
+    {
+      // If something is between the previous Add and the current insn,
+      // we can set the previous Add invalid to speed future scans.
+      return prev_add.insn = nullptr;
+    }
+
+  // Found two PLUS insns in the same BB, and with no interfering
+  // insns between them.
+  rtx plus = plus_constant (Pmode, add.src, INTVAL (prev_add.addend));
+
+  rtx_insn *next;
+  if (REG_P (plus))
+    {
+      avr_dump (";; Add[%d] from %d annihilates %d\n\n", add.regno,
+		INSN_UID (prev_add.insn), INSN_UID (add.insn));
+      next = NEXT_INSN (add.insn);
+    }
+  else
+    {
+      // Emit after the current insn, so that it will be picked
+      // up as next valid Add insn.
+      next = emit_move_ccc_after (add.dest, plus, add.insn);
+      avr_dump (";; #1 new Add[%d] insn %d after %d: R%d += %d\n\n",
+		add.regno, INSN_UID (next), INSN_UID (add.insn),
+		add.regno, (int) INTVAL (XEXP (plus, 1)));
+      gcc_assert (GET_CODE (plus) == PLUS);
+    }
+
+  add.set_deleted ();
+  prev_add.set_deleted ();
+
+  return next;
+}
+
+/* Try to combine a PLUS of the address register with a load or store insn.
+   If ADD is valid, then it precedes MEM in the same basic block.
+   When a replacement is found, a new insn is emitted and the old insns
+   are pseudo-deleted.  The returned insn is the point where the calling
+   scanner should continue.  When no replacement is found, nullptr is
+   returned and nothing changed.  */
+
+rtx_insn *
+avr_pass_fuse_add::fuse_add_mem (Add_Insn &add, Mem_Insn &mem)
+{
+  if (! add.valid ()
+      || reg_seen_between_p (add.dest, add.insn, mem.insn))
+    {
+      // If something is between the Add and the current insn, we can
+      // set the Add invalid to speed future scans.
+      return add.insn = nullptr;
+    }
+
+  AVR_LdSt_Props ap { mem };
+
+  int msize = GET_MODE_SIZE (mem.mode);
+
+  // The mem insn really wants PRE_DEC.
+  bool case1 = ((mem.addr_code == REG || mem.addr_code == POST_INC)
+		&& msize > 1 && ap.want_predec && ! ap.has_ldd);
+
+  // The offset can be consumed by a PRE_DEC.
+  bool case2 = (- INTVAL (add.addend) == msize
+		&& (mem.addr_code == REG || mem.addr_code == POST_INC)
+		&& ap.has_predec && ! ap.want_postinc);
+
+  if (! case1 && ! case2)
+    return nullptr;
+
+  // Change from REG or POST_INC to PRE_DEC.
+  rtx xmem = change_address (mem.mem, mem.mode,
+			     gen_rtx_PRE_DEC (Pmode, mem.addr_reg));
+  rtx dest = mem.store_p ? xmem : mem.reg_or_0;
+  rtx src  = mem.store_p ? mem.reg_or_0 : xmem;
+
+  rtx_insn *next = emit_move_ccc_after (dest, src, mem.insn);
+  add_reg_note (next, REG_INC, mem.addr_reg);
+  avr_dump (";; new Mem[%d] insn %d after %d: %r = %r\n\n", mem.addr_regno,
+	    INSN_UID (next), INSN_UID (mem.insn), dest, src);
+
+  // Changing REG or POST_INC -> PRE_DEC means that the addend before
+  // the memory access must be increased by the size of the access,
+  rtx plus = plus_constant (Pmode, add.src, msize);
+  if (! REG_P (plus))
+    {
+      rtx_insn *insn = emit_move_ccc_after (add.dest, plus, add.insn);
+      avr_dump (";; #2 new Add[%d] insn %d after %d: R%d += %d\n\n",
+		add.regno, INSN_UID (insn), INSN_UID (add.insn),
+		add.regno, (int) INTVAL (XEXP (plus, 1)));
+      gcc_assert (GET_CODE (plus) == PLUS);
+    }
+  else
+    avr_dump (";; Add[%d] insn %d consumed into %d\n\n",
+	      add.regno, INSN_UID (add.insn), INSN_UID (next));
+
+  // Changing POST_INC -> PRE_DEC means that the addend after the mem has to be
+  // the size of the access.  The hope is that this new add insn may be unused.
+  if (mem.addr_code == POST_INC)
+    {
+      plus = plus_constant (Pmode, add.dest, msize);
+      rtx_insn *next2 = emit_move_ccc_after (add.dest, plus, next);
+      avr_dump (";; #3 new Add[%d] insn %d after %d: R%d += %d\n\n", add.regno,
+		INSN_UID (next2), INSN_UID (next), add.regno, msize);
+      next = next2;
+    }
+
+  add.set_deleted ();
+  mem.set_deleted ();
+
+  return next;
+}
+
+/* Try to combine a load or store insn with a PLUS of the address register.
+   If MEM is valid, then it precedes ADD in the same basic block.
+   When a replacement is found, a new insn is emitted and the old insns
+   are pseudo-deleted.  The returned insn is the point where the calling
+   scanner should continue.  When no replacement is found, nullptr is
+   returned and nothing changed.  */
+
+rtx_insn *
+avr_pass_fuse_add::fuse_mem_add (Mem_Insn &mem, Add_Insn &add)
+{
+  if (! mem.valid ()
+      || reg_seen_between_p (add.dest, mem.insn, add.insn))
+    {
+      // If something is between the Mem and the current insn, we can
+      // set the Mem invalid to speed future scans.
+      return mem.insn = nullptr;
+    }
+
+  AVR_LdSt_Props ap { mem };
+
+  int msize = GET_MODE_SIZE (mem.mode);
+
+  // The add insn can be consumed by a POST_INC.
+  bool case1 = (mem.addr_code == REG
+		&& INTVAL (add.addend) == msize
+		&& ap.has_postinc && ! ap.want_predec);
+
+  // There are cases where even a partial consumption of the offset is better.
+  // This are the cases where no LD+offset addressing is available, because
+  // the address register is obviously used after the mem insn, and a mem insn
+  // with REG addressing mode will have to restore the address.
+  bool case2 = (mem.addr_code == REG
+		&& msize > 1 && ap.want_postinc && ! ap.has_ldd);
+
+  if (! case1 && ! case2)
+    return nullptr;
+
+  // Change addressing mode from REG to POST_INC.
+  rtx xmem = change_address (mem.mem, mem.mode,
+			     gen_rtx_POST_INC (Pmode, mem.addr_reg));
+  rtx dest = mem.store_p ? xmem : mem.reg_or_0;
+  rtx src  = mem.store_p ? mem.reg_or_0 : xmem;
+
+  rtx_insn *insn = emit_move_ccc_after (dest, src, mem.insn);
+  add_reg_note (insn, REG_INC, mem.addr_reg);
+  avr_dump (";; new Mem[%d] insn %d after %d: %r = %r\n\n", add.regno,
+	    INSN_UID (insn), INSN_UID (mem.insn), dest, src);
+
+  rtx_insn *next = NEXT_INSN (add.insn);
+
+  // Changing REG -> POST_INC means that the post addend must be
+  // decreased by the size of the access.
+  rtx plus = plus_constant (Pmode, add.src, -msize);
+  if (! REG_P (plus))
+    {
+      next = emit_move_ccc_after (mem.addr_reg, plus, add.insn);
+      avr_dump (";; #4 new Add[%d] insn %d after %d: R%d += %d\n\n",
+		add.regno, INSN_UID (next), INSN_UID (add.insn),
+		add.regno, (int) INTVAL (XEXP (plus, 1)));
+      gcc_assert (GET_CODE (plus) == PLUS);
+    }
+  else
+    avr_dump (";; Add[%d] insn %d consumed into %d\n\n",
+	      add.regno, INSN_UID (add.insn), INSN_UID (insn));
+
+  add.set_deleted ();
+  mem.set_deleted ();
+
+  return next;
+}
+
+/* Try to post-reload combine PLUS with CONST_INt of pointer registers with:
+   - Sets to a constant address.
+   - PLUS insn of that kind.
+   - Indirect loads and stores.
+   In almost all cases, combine opportunities arise from the preparation
+   done by `avr_split_fake_addressing_move', but in some rare cases combinations
+   are found for the ordinary cores, too.
+      As we consider at most one Mem insn per try, there may still be missed
+   optimizations like  POST_INC + PLUS + POST_INC  might be performed
+   as  PRE_DEC + PRE_DEC  for two adjacent locations.  */
+
+unsigned int
+avr_pass_fuse_add::execute1 (function *func)
+{
+  df_note_add_problem ();
+  df_analyze ();
+
+  int n_add = 0, n_mem = 0, n_ldi = 0;
+  basic_block bb;
+
+  FOR_EACH_BB_FN (bb, func)
+    {
+      Ldi_Insn prev_ldi_insns[REG_32];
+      Add_Insn prev_add_insns[REG_32];
+      Mem_Insn prev_mem_insns[REG_32];
+      rtx_insn *insn, *curr;
+
+      avr_dump ("\n;; basic block %d\n\n", bb->index);
+
+      FOR_BB_INSNS_SAFE (bb, insn, curr)
+	{
+	  rtx_insn *next = nullptr;
+	  Ldi_Insn ldi_insn { insn };
+	  Add_Insn add_insn { insn };
+	  Mem_Insn mem_insn { insn };
+
+	  if (add_insn.valid ())
+	    {
+	      // Found reg:HI += const_int
+	      avr_dump (";; insn %d: Add[%d]: R%d += %d\n\n",
+			INSN_UID (add_insn.insn), add_insn.regno,
+			add_insn.regno, (int) INTVAL (add_insn.addend));
+	      Ldi_Insn &prev_ldi_insn = prev_ldi_insns[add_insn.regno];
+	      Add_Insn &prev_add_insn = prev_add_insns[add_insn.regno];
+	      Mem_Insn &prev_mem_insn = prev_mem_insns[add_insn.regno];
+	      if ((next = fuse_ldi_add (prev_ldi_insn, add_insn)))
+		curr = next, n_ldi += 1;
+	      else if ((next = fuse_add_add (prev_add_insn, add_insn)))
+		curr = next, n_add += 1;
+	      else if ((next = fuse_mem_add (prev_mem_insn, add_insn)))
+		curr = next, n_mem += 1;
+	      else
+		prev_add_insn = add_insn;
+	    }
+	  else if (mem_insn.valid ())
+	    {
+	      int addr_regno = REGNO (mem_insn.addr_reg);
+	      avr_dump (";; insn %d: Mem[%d]: %r = %r\n\n",
+			INSN_UID (mem_insn.insn), addr_regno,
+			mem_insn.dest, mem_insn.src);
+	      Add_Insn &prev_add_insn = prev_add_insns[addr_regno];
+	      if ((next = fuse_add_mem (prev_add_insn, mem_insn)))
+		curr = next, n_mem += 1;
+	      else
+		prev_mem_insns[addr_regno] = mem_insn;
+	    }
+	  else if (ldi_insn.valid ())
+	    {
+	      if (! CONST_INT_P (ldi_insn.src))
+		avr_dump (";; insn %d: Ldi[%d]: R%d = %r\n\n",
+			  INSN_UID (ldi_insn.insn), ldi_insn.regno,
+			  ldi_insn.regno, ldi_insn.src);
+	      prev_ldi_insns[ldi_insn.regno] = ldi_insn;
+	    }
+	} // for insns
+    } // for BBs
+
+  avr_dump (";; Function %f: Found %d changes: %d ldi, %d add, %d mem.\n",
+	    n_ldi + n_add + n_mem, n_ldi, n_add, n_mem);
+
+  return 0;
+}
+
+
+
+//////////////////////////////////////////////////////////////////////////////
+// Split insns with nonzero_bits() after combine.
+
+static const pass_data avr_pass_data_split_nzb =
+{
+  RTL_PASS,	    // type
+  "",		    // name (will be patched)
+  OPTGROUP_NONE,    // optinfo_flags
+  TV_DF_SCAN,	    // tv_id
+  0,		    // properties_required
+  0,		    // properties_provided
+  0,		    // properties_destroyed
+  0,		    // todo_flags_start
+  0		    // todo_flags_finish
+};
+
+class avr_pass_split_nzb : public rtl_opt_pass
+{
+public:
+  avr_pass_split_nzb (gcc::context *ctxt, const char *name)
+    : rtl_opt_pass (avr_pass_data_split_nzb, ctxt)
+  {
+    this->name = name;
+  }
+
+  unsigned int execute (function *) final override
+  {
+    if (avropt_use_nonzero_bits)
+      split_nzb_insns ();
+    return 0;
+  }
+
+  void split_nzb_insns ();
+
+}; // avr_pass_split_nzb
+
+
+void
+avr_pass_split_nzb::split_nzb_insns ()
+{
+  rtx_insn *next;
+
+  for (rtx_insn *insn = get_insns (); insn; insn = next)
+    {
+      next = NEXT_INSN (insn);
+
+      if (INSN_P (insn)
+	  && single_set (insn)
+	  && get_attr_nzb (insn) == NZB_YES)
+	{
+	  rtx_insn *last = try_split (PATTERN (insn), insn, 1 /*last*/);
+
+	  // The nonzero_bits() insns *must* split.  If not: ICE.
+	  if (last == insn)
+	    {
+	      debug_rtx (insn);
+	      internal_error ("failed to split insn");
+	    }
+	}
+    }
+}
+
+
+
+//////////////////////////////////////////////////////////////////////////////
+// Split shift insns after peephole2 / befor avr-fuse-move.
+
+static const pass_data avr_pass_data_split_after_peephole2 =
+{
+  RTL_PASS,	    // type
+  "",		    // name (will be patched)
+  OPTGROUP_NONE,    // optinfo_flags
+  TV_DF_SCAN,	    // tv_id
+  0,		    // properties_required
+  0,		    // properties_provided
+  0,		    // properties_destroyed
+  0,		    // todo_flags_start
+  0		    // todo_flags_finish
+};
+
+class avr_pass_split_after_peephole2 : public rtl_opt_pass
+{
+public:
+  avr_pass_split_after_peephole2 (gcc::context *ctxt, const char *name)
+    : rtl_opt_pass (avr_pass_data_split_after_peephole2, ctxt)
+  {
+    this->name = name;
+  }
+
+  unsigned int execute (function *) final override
+  {
+    if (avr_shift_is_3op ())
+      split_all_insns ();
+    return 0;
+  }
+
+}; // avr_pass_split_after_peephole2
+
+} // anonymous namespace
+
+
+/* Whether some shift insn alternatives are a `3op' 3-operand insn.
+   This 3op alternatives allow the source and the destination register
+   of the shift to be different right from the start, because the splitter
+   will split the 3op shift into a 3-operand byte shift and a 2-operand
+   residual bit shift.  (When the residual shift has an offset of one
+   less than the bitsize, then the residual shift is also a 3op insn.)  */
+
+bool
+avr_shift_is_3op ()
+{
+  // Don't split for OPTIMIZE_SIZE_MAX (-Oz).
+  // For OPTIMIZE_SIZE_BALANCED (-Os), we still split because
+  // the size overhead (if at all) is marginal.
+
+  return (avropt_split_bit_shift
+	  && optimize > 0
+	  && avr_optimize_size_level () < OPTIMIZE_SIZE_MAX);
+}
+
+
+/* Implement constraints `C2a', `C2l', `C2r' ... `C4a', `C4l', `C4r'.
+   Whether we split an N_BYTES shift of code CODE in { ASHIFTRT,
+   LSHIFTRT, ASHIFT } into a byte shift and a residual bit shift.  */
+
+bool
+avr_split_shift_p (int n_bytes, int offset, rtx_code code)
+{
+  gcc_assert (n_bytes == 4 || n_bytes == 3 || n_bytes == 2);
+
+  if (! avr_shift_is_3op ()
+      || offset % 8 == 0)
+    return false;
+
+  if (n_bytes == 4)
+    return select<bool>()
+      : code == ASHIFT ? IN_RANGE (offset, 9, 31) && offset != 15
+      : code == ASHIFTRT ? IN_RANGE (offset, 9, 29) && offset != 15
+      : code == LSHIFTRT ? IN_RANGE (offset, 9, 31) && offset != 15
+      : bad_case<bool> ();
+
+  if (n_bytes == 3)
+    return select<bool>()
+      : code == ASHIFT ? IN_RANGE (offset, 9, 23) && offset != 15
+      : code == ASHIFTRT ? IN_RANGE (offset, 9, 21) && offset != 15
+      : code == LSHIFTRT ? IN_RANGE (offset, 9, 23) && offset != 15
+      : bad_case<bool> ();
+
+  if (n_bytes == 2)
+    return select<bool>()
+      : code == ASHIFT ? IN_RANGE (offset, 9, 15)
+      : code == ASHIFTRT ? IN_RANGE (offset, 9, 13)
+      : code == LSHIFTRT ? IN_RANGE (offset, 9, 15)
+      : bad_case<bool> ();
+
+  return false;
+}
+
+
+/* Emit a DEST = SRC <code> OFF shift of QImode, HImode or PSImode.
+   SCRATCH is a QImode d-register, scratch:QI, or NULL_RTX.
+   This function is used to emit shifts that have been split into
+   a byte shift and a residual bit shift that operates on a mode
+   strictly smaller than the original shift.  */
+
+static void
+avr_emit_shift (rtx_code code, rtx dest, rtx src, int off, rtx scratch)
+{
+  const machine_mode mode = GET_MODE (dest);
+  const int n_bits = GET_MODE_BITSIZE (mode);
+  rtx xoff = GEN_INT (off);
+
+  // Work out which alternatives can handle 3 operands independent
+  // of options.
+
+  const bool b8_is_3op = off == 6;
+
+  const bool b16_is_3op = select<bool>()
+    : code == ASHIFT ? (satisfies_constraint_C7c (xoff) // 7...12
+			// The "C05 C06" alternative of *ashlhi3_const.
+			|| (AVR_HAVE_MUL && scratch && (off == 5 || off == 6)))
+    : code == LSHIFTRT ? satisfies_constraint_C7c (xoff)
+    : code == ASHIFTRT ? off == 7
+    : bad_case<bool> ();
+
+  const bool b24_is_3op = select<bool>()
+    : code == ASHIFT ? off == 15
+    : code == LSHIFTRT ? off == 15
+    : code == ASHIFTRT ? false
+    : bad_case<bool> ();
+
+  const bool is_3op = (off % 8 == 0
+		       || off == n_bits - 1
+		       || (code == ASHIFTRT && off == n_bits - 2)
+		       || (n_bits == 8 && b8_is_3op)
+		       || (n_bits == 16 && b16_is_3op)
+		       || (n_bits == 24 && b24_is_3op));
+  rtx shift;
+
+  if (is_3op)
+    {
+      shift = gen_rtx_fmt_ee (code, mode, src, xoff);
+    }
+  else
+    {
+      if (REGNO (dest) != REGNO (src))
+	emit_valid_move_clobbercc (dest, src);
+      shift = gen_rtx_fmt_ee (code, mode, dest, xoff);
+    }
+
+  if (n_bits == 8)
+    // 8-bit shifts don't have a scratch operand.
+    scratch = NULL_RTX;
+  else if (! scratch && n_bits == 24)
+    // 24-bit shifts always have a scratch operand.
+    scratch = gen_rtx_SCRATCH (QImode);
+
+  emit_valid_move_clobbercc (dest, shift, scratch);
+}
+
+
+/* Handle the 4-byte case of avr_split_shift below:
+   Split 4-byte shift  DEST = SRC <code> IOFF  into a 3-operand
+   byte shift and a residual shift in a smaller mode if possible.
+   SCRATCH is a QImode upper scratch register or NULL_RTX.  */
+
+static bool
+avr_split_shift4 (rtx dest, rtx src, int ioff, rtx scratch, rtx_code code)
+{
+  gcc_assert (GET_MODE_SIZE (GET_MODE (dest)) == 4);
+
+  if (code == ASHIFT)
+    {
+      if (IN_RANGE (ioff, 25, 31))
+	{
+	  rtx dst8 = avr_byte (dest, 3);
+	  rtx src8 = avr_byte (src, 0);
+	  avr_emit_shift (code, dst8, src8, ioff - 24, NULL_RTX);
+	  emit_valid_move_clobbercc (avr_byte (dest, 2), const0_rtx);
+	  emit_valid_move_clobbercc (avr_word (dest, 0), const0_rtx);
+	  return true;
+	}
+      else if (IN_RANGE (ioff, 17, 23))
+	{
+	  rtx dst16 = avr_word (dest, 2);
+	  rtx src16 = avr_word (src, 0);
+	  avr_emit_shift (code, dst16, src16, ioff - 16, scratch);
+	  emit_valid_move_clobbercc (avr_word (dest, 0), const0_rtx);
+	  return true;
+	}
+      // ...the 9...14 cases are only handled by define_split because
+      // for now, we don't exploit that the low byte is zero.
+    }
+  else if (code == ASHIFTRT
+	   || code == LSHIFTRT)
+    {
+      if (IN_RANGE (ioff, 25, 30 + (code == LSHIFTRT)))
+	{
+	  rtx dst8 = avr_byte (dest, 0);
+	  rtx src8 = avr_byte (src, 3);
+	  avr_emit_shift (code, dst8, src8, ioff - 24, NULL_RTX);
+	  if (code == ASHIFTRT)
+	    {
+	      rtx signs = avr_byte (dest, 1);
+	      avr_emit_shift (code, signs, src8, 7, NULL_RTX);
+	      emit_valid_move_clobbercc (avr_byte (dest, 2), signs);
+	      emit_valid_move_clobbercc (avr_byte (dest, 3), signs);
+	    }
+	  else
+	    {
+	      emit_valid_move_clobbercc (avr_byte (dest, 1), const0_rtx);
+	      emit_valid_move_clobbercc (avr_word (dest, 2), const0_rtx);
+	    }
+	  return true;
+	}
+      else if (IN_RANGE (ioff, 17, 23))
+	{
+	  rtx dst16 = avr_word (dest, 0);
+	  rtx src16 = avr_word (src, 2);
+	  avr_emit_shift (code, dst16, src16, ioff - 16, scratch);
+	  if (code == ASHIFTRT)
+	    {
+	      rtx msb = avr_byte (src, 3);
+	      rtx signs = avr_byte (dest, 2);
+	      avr_emit_shift (code, signs, msb, 7, NULL_RTX);
+	      emit_valid_move_clobbercc (avr_byte (dest, 3), signs);
+	    }
+	  else
+	    emit_valid_move_clobbercc (avr_word (dest, 2), const0_rtx);
+
+	  return true;
+	}
+      else if (IN_RANGE (ioff, 9, 15))
+	{
+	  avr_emit_shift (code, dest, src, 8, scratch);
+	  rtx dst24 = avr_chunk (PSImode, dest, 0);
+	  rtx src24 = avr_chunk (PSImode, dest, 0);
+	  avr_emit_shift (code, dst24, src24, ioff - 8, scratch);
+	  return true;
+	}
+    }
+  else
+    gcc_unreachable ();
+
+  return false;
+}
+
+
+/* Handle the 3-byte case of avr_split_shift below:
+   Split 3-byte shift  DEST = SRC <code> IOFF  into a 3-operand
+   byte shift and a residual shift in a smaller mode if possible.
+   SCRATCH is a QImode upper scratch register or NULL_RTX.  */
+
+static bool
+avr_split_shift3 (rtx dest, rtx src, int ioff, rtx scratch, rtx_code code)
+{
+  gcc_assert (GET_MODE_SIZE (GET_MODE (dest)) == 3);
+
+  if (code == ASHIFT)
+    {
+      if (IN_RANGE (ioff, 17, 23))
+	{
+	  rtx dst8 = avr_byte (dest, 2);
+	  rtx src8 = avr_byte (src, 0);
+	  avr_emit_shift (code, dst8, src8, ioff - 16, NULL_RTX);
+	  emit_valid_move_clobbercc (avr_word (dest, 0), const0_rtx);
+	  return true;
+	}
+      // ...the 9...14 cases are only handled by define_split because
+      // for now, we don't exploit that the low byte is zero.
+    }
+  else if (code == ASHIFTRT
+	   || code == LSHIFTRT)
+    {
+      if (IN_RANGE (ioff, 17, 22 + (code == LSHIFTRT)))
+	{
+	  rtx dst8 = avr_byte (dest, 0);
+	  rtx src8 = avr_byte (src, 2);
+	  avr_emit_shift (code, dst8, src8, ioff - 16, NULL_RTX);
+	  if (code == ASHIFTRT)
+	    {
+	      rtx signs = avr_byte (dest, 1);
+	      avr_emit_shift (code, signs, src8, 7, NULL_RTX);
+	      emit_valid_move_clobbercc (avr_byte (dest, 2), signs);
+	    }
+	  else
+	    {
+	      emit_valid_move_clobbercc (avr_byte (dest, 1), const0_rtx);
+	      emit_valid_move_clobbercc (avr_byte (dest, 2), const0_rtx);
+	    }
+	  return true;
+	}
+      else if (IN_RANGE (ioff, 9, 15))
+	{
+	  avr_emit_shift (code, dest, src, 8, scratch);
+	  rtx dst16 = avr_chunk (HImode, dest, 0);
+	  rtx src16 = avr_chunk (HImode, dest, 0);
+	  avr_emit_shift (code, dst16, src16, ioff - 8, scratch);
+	  return true;
+	}
+    }
+  else
+    gcc_unreachable ();
+
+  return false;
+}
+
+
+/* Handle the 2-byte case of avr_split_shift below:
+   Split 2-byte shift  DEST = SRC <code> IOFF  into a 3-operand
+   byte shift and a residual shift in a smaller mode if possible.
+   SCRATCH is a QImode upper scratch register or NULL_RTX.  */
+
+static bool
+avr_split_shift2 (rtx dest, rtx src, int ioff, rtx /*scratch*/, rtx_code code)
+{
+  gcc_assert (GET_MODE_SIZE (GET_MODE (dest)) == 2);
+
+  if (code == ASHIFT)
+    {
+      if (IN_RANGE (ioff, 9, 15))
+	{
+	  rtx dst8 = avr_byte (dest, 1);
+	  rtx src8 = avr_byte (src, 0);
+	  avr_emit_shift (code, dst8, src8, ioff - 8, NULL_RTX);
+	  emit_valid_move_clobbercc (avr_byte (dest, 0), const0_rtx);
+	  return true;
+	}
+    }
+  else if (code == ASHIFTRT
+	   || code == LSHIFTRT)
+    {
+      if (IN_RANGE (ioff, 9, 14 + (code == LSHIFTRT)))
+	{
+	  rtx dst8 = avr_byte (dest, 0);
+	  rtx src8 = avr_byte (src, 1);
+	  rtx signs = const0_rtx;
+	  avr_emit_shift (code, dst8, src8, ioff - 8, NULL_RTX);
+	  if (code == ASHIFTRT)
+	    {
+	      signs = avr_byte (dest, 1);
+	      avr_emit_shift (code, signs, src8, 7, NULL_RTX);
+	    }
+	  emit_valid_move_clobbercc (avr_byte (dest, 1), signs);
+	  return true;
+	}
+    }
+  else
+    gcc_unreachable ();
+
+  return false;
+}
+
+
+/* Worker for a define_split that runs when -msplit-bit-shift is on.
+   Split a shift of code CODE into a 3op byte shift and a residual bit shift.
+   Return 'true' when a split has been performed and insns have been emitted.
+   Otherwise, return 'false'.  */
+
+bool
+avr_split_shift (rtx xop[], rtx scratch, rtx_code code)
+{
+  scratch = scratch && REG_P (scratch) ? scratch : NULL_RTX;
+  rtx dest = xop[0];
+  rtx src = xop[1];
+  int ioff = INTVAL (xop[2]);
+  int n_bytes = GET_MODE_SIZE (GET_MODE (dest));
+
+  return select<bool>()
+    : n_bytes == 2 ? avr_split_shift2 (dest, src, ioff, scratch, code)
+    : n_bytes == 3 ? avr_split_shift3 (dest, src, ioff, scratch, code)
+    : n_bytes == 4 ? avr_split_shift4 (dest, src, ioff, scratch, code)
+    : bad_case<bool> ();
+}
+
+
+namespace
+{
+
+
+//////////////////////////////////////////////////////////////////////////////
+// Determine whether an ISR may use the __gcc_isr pseudo-instruction.
+
+static const pass_data avr_pass_data_pre_proep =
+{
+  RTL_PASS,	    // type
+  "",		    // name (will be patched)
+  OPTGROUP_NONE,    // optinfo_flags
+  TV_DF_SCAN,	    // tv_id
+  0,		    // properties_required
+  0,		    // properties_provided
+  0,		    // properties_destroyed
+  0,		    // todo_flags_start
+  0		    // todo_flags_finish
+};
+
+class avr_pass_pre_proep : public rtl_opt_pass
+{
+public:
+  avr_pass_pre_proep (gcc::context *ctxt, const char *name)
+    : rtl_opt_pass (avr_pass_data_pre_proep, ctxt)
+  {
+    this->name = name;
+  }
+
+  void compute_maybe_gasisr (function *);
+
+  unsigned int execute (function *fun) final override
+  {
+    if (avropt_gasisr_prologues
+	// Whether this function is an ISR worth scanning at all.
+	&& !fun->machine->is_no_gccisr
+	&& (fun->machine->is_interrupt
+	    || fun->machine->is_signal)
+	&& !cfun->machine->is_naked
+	// Paranoia: Non-local gotos and labels that might escape.
+	&& !cfun->calls_setjmp
+	&& !cfun->has_nonlocal_label
+	&& !cfun->has_forced_label_in_static)
+      {
+	compute_maybe_gasisr (fun);
+      }
+
+    return 0;
+  }
+
+}; // avr_pass_pre_proep
+
+
+/* Set fun->machine->gasisr.maybe provided we don't find anything that
+   prohibits GAS generating parts of ISR prologues / epilogues for us.  */
+
+void
+avr_pass_pre_proep::compute_maybe_gasisr (function *fun)
+{
+  // Don't use BB iterators so that we see JUMP_TABLE_DATA.
+
+  for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    {
+      // Transparent calls always use [R]CALL and are filtered out by GAS.
+      // ISRs don't use -mcall-prologues, hence what remains to be filtered
+      // out are open coded (tail) calls.
+
+      if (CALL_P (insn))
+	return;
+
+      // __tablejump2__ clobbers something and is targeted by JMP so
+      // that GAS won't see its usage.
+
+      if (AVR_HAVE_JMP_CALL
+	  && JUMP_TABLE_DATA_P (insn))
+	return;
+
+      // Non-local gotos not seen in *FUN.
+
+      if (JUMP_P (insn)
+	  && find_reg_note (insn, REG_NON_LOCAL_GOTO, NULL_RTX))
+	return;
+    }
+
+  fun->machine->gasisr.maybe = 1;
+}
+
+
+
+//////////////////////////////////////////////////////////////////////////////
+// Late recomputation of notes so we can use `reg_unused_after()' and friends.
+
+static const pass_data avr_pass_data_recompute_notes =
+{
+  RTL_PASS,      // type
+  "",            // name (will be patched)
+  OPTGROUP_NONE, // optinfo_flags
+  TV_DF_SCAN,    // tv_id
+  0,             // properties_required
+  0,             // properties_provided
+  0,             // properties_destroyed
+  0,             // todo_flags_start
+  TODO_df_finish | TODO_df_verify // todo_flags_finish
+};
+
+class avr_pass_recompute_notes : public rtl_opt_pass
+{
+public:
+  avr_pass_recompute_notes (gcc::context *ctxt, const char *name)
+    : rtl_opt_pass (avr_pass_data_recompute_notes, ctxt)
+  {
+    this->name = name;
+  }
+
+  unsigned int execute (function *) final override
+  {
+    df_note_add_problem ();
+    df_analyze ();
+
+    return 0;
+  }
+}; // avr_pass_recompute_notes
+
+} // anonymous namespace
+
+
+
+//////////////////////////////////////////////////////////////////////////////
+// Function visible and used outside this module.
+
+/* During reload, we allow much more addresses than Reduced Tiny actually
+   supports.  Split them after reload in order to get closer to the
+   core's capabilities.  This sets the stage for pass .avr-fuse-add.  */
+
+bool
+avr_split_fake_addressing_move (rtx_insn * /*insn*/, rtx *xop)
+{
+  bool store_p = false;
+  rtx mem, reg_or_0;
+
+  if (REG_P (xop[0]) && MEM_P (xop[1]))
+    {
+      reg_or_0 = xop[0];
+      mem = xop[1];
+    }
+  else if (MEM_P (xop[0])
+	   && (REG_P (xop[1])
+	       || xop[1] == CONST0_RTX (GET_MODE (xop[0]))))
+    {
+      mem = xop[0];
+      reg_or_0 = xop[1];
+      store_p = true;
+    }
+  else
+    return false;
+
+  machine_mode mode = GET_MODE (mem);
+  rtx base, addr = XEXP (mem, 0);
+  rtx_code addr_code = GET_CODE (addr);
+
+  if (REG_P (reg_or_0)
+      && reg_overlap_mentioned_p (reg_or_0, addr))
+    return false;
+  else if (addr_code == PLUS || addr_code == PRE_DEC || addr_code == POST_INC)
+    base = XEXP (addr, 0);
+  else if (addr_code == REG)
+    base = addr;
+  else
+    return false;
+
+  if (REGNO (base) > REG_Z)
+    return false;
+
+  if (! AVR_TINY
+      // Only keep base registers that can't do PLUS addressing.
+      && ((REGNO (base) != REG_X
+	   && ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (mem)))
+	  || avr_load_libgcc_p (mem)
+	  || avr_mem_memx_p (mem)))
+    return false;
+
+  bool volatile_p = MEM_VOLATILE_P (mem);
+  bool mem_volatile_p = false;
+  if (frame_pointer_needed
+      && REGNO (base) == FRAME_POINTER_REGNUM)
+    {
+      if (avropt_fuse_add < 2
+	  // Be a projection (we always split PLUS).
+	  || (avropt_fuse_add == 2 && volatile_p && addr_code != PLUS))
+	return false;
+
+      // Changing the frame pointer locally may confuse later passes
+      // like .dse2 which don't track changes of FP, not even when
+      // respective CFA notes are present.  An example is pr22141-1.c.
+      if (avropt_fuse_add == 2)
+	mem_volatile_p = true;
+    }
+
+  rtx_code new_code = UNKNOWN;
+  HOST_WIDE_INT add = 0, sub = 0;
+  int msize = GET_MODE_SIZE (mode);
+
+  AVR_LdSt_Props ap { (int) REGNO (base), store_p, volatile_p,
+		      ADDR_SPACE_GENERIC };
+
+  switch (addr_code)
+    {
+    default:
+      return false;
+
+    case PLUS:
+      add = INTVAL (XEXP (addr, 1));
+      if (msize == 1)
+	{
+	  new_code = REG;
+	  sub = -add;
+	}
+      else if (ap.want_predec)
+	{
+	  // volatile stores prefer PRE_DEC (MSB first)
+	  sub = -add;
+	  add += msize;
+	  new_code = PRE_DEC;
+	}
+      else
+	{
+	  new_code = POST_INC;
+	  sub = -add - msize;
+	}
+      break;
+
+    case POST_INC:
+      // volatile stores prefer PRE_DEC (MSB first)
+      if (msize > 1 && ap.want_predec)
+	{
+	  add = msize;
+	  new_code = PRE_DEC;
+	  sub = msize;
+	  break;
+	}
+      return false;
+
+    case PRE_DEC:
+      // volatile loads prefer POST_INC (LSB first)
+      if (msize > 1 && ap.want_postinc)
+	{
+	  add = -msize;
+	  new_code = POST_INC;
+	  sub = -msize;
+	  break;
+	}
+      return false;
+
+    case REG:
+      if (msize == 1)
+	return false;
+
+      if (ap.want_predec)
+	{
+	  add = msize;
+	  new_code = PRE_DEC;
+	  sub = 0;
+	}
+      else
+	{
+	  add = 0;
+	  new_code = POST_INC;
+	  sub = -msize;
+	}
+      break;
+    } // switch addr_code
+
+  rtx_insn *insn;
+
+  if (add)
+    {
+      insn = emit_move_ccc (base, plus_constant (Pmode, base, add));
+      avr_maybe_adjust_cfa (insn, base, add);
+    }
+
+  rtx new_addr = new_code == REG
+    ? base
+    : gen_rtx_fmt_e (new_code, Pmode, base);
+
+  rtx new_mem = change_address (mem, mode, new_addr);
+  if (mem_volatile_p)
+    MEM_VOLATILE_P (new_mem) = 1;
+
+  insn = emit_move_ccc (store_p ? new_mem : reg_or_0,
+			store_p ? reg_or_0 : new_mem);
+  if (auto_inc_p (new_addr))
+    {
+      add_reg_note (insn, REG_INC, base);
+      int off = new_code == POST_INC ? msize : -msize;
+      avr_maybe_adjust_cfa (insn, base, off);
+    }
+
+  if (sub)
+    {
+      insn = emit_move_ccc (base, plus_constant (Pmode, base, sub));
+      avr_maybe_adjust_cfa (insn, base, sub);
+    }
+
+  return true;
+}
+
+
+/* Given memory reference mem(ADDR), return true when it can be split into
+   single-byte moves, and all resulting addresses are natively supported.
+   ADDR is in addr-space generic.  */
+
+static bool
+splittable_address_p (rtx addr, int n_bytes)
+{
+  if (CONSTANT_ADDRESS_P (addr)
+      || GET_CODE (addr) == PRE_DEC
+      || GET_CODE (addr) == POST_INC)
+    return true;
+
+  if (! AVR_TINY)
+    {
+      rtx base = select<rtx>()
+	: REG_P (addr) ? addr
+	: GET_CODE (addr) == PLUS ? XEXP (addr, 0)
+	: NULL_RTX;
+
+      int off = select<int>()
+	: REG_P (addr) ? 0
+	: GET_CODE (addr) == PLUS ? (int) INTVAL (XEXP (addr, 1))
+	: -1;
+
+      return (base && REG_P (base)
+	      && (REGNO (base) == REG_Y || REGNO (base) == REG_Z)
+	      && IN_RANGE (off, 0, 64 - n_bytes));
+    }
+
+  return false;
+}
+
+
+/* Like avr_byte(), but also knows how to split POST_INC and PRE_DEC
+   memory references.  */
+
+static rtx
+avr_byte_maybe_mem (rtx x, int n)
+{
+  rtx addr, b;
+  if (MEM_P (x)
+      && (GET_CODE (addr = XEXP (x, 0)) == POST_INC
+	  || GET_CODE (addr) == PRE_DEC))
+    b = gen_rtx_MEM (QImode, copy_rtx (addr));
+  else
+    b = avr_byte (x, n);
+
+  if (MEM_P (x))
+    gcc_assert (MEM_P (b));
+
+  return b;
+}
+
+
+/* Split multi-byte load / stores into 1-byte such insns
+   provided non-volatile, addr-space = generic, no reg-overlap
+   and the resulting addressings are all natively supported.
+   Returns true when the  XOP[0] = XOP[1]  insn has been split and
+   false, otherwise.  */
+
+bool
+avr_split_ldst (rtx *xop)
+{
+  rtx dest = xop[0];
+  rtx src = xop[1];
+  machine_mode mode = GET_MODE (dest);
+  int n_bytes = GET_MODE_SIZE (mode);
+  rtx mem, reg_or_0;
+
+  if (MEM_P (dest) && reg_or_0_operand (src, mode))
+    {
+      mem = dest;
+      reg_or_0 = src;
+    }
+  else if (register_operand (dest, mode) && MEM_P (src))
+    {
+      reg_or_0 = dest;
+      mem = src;
+    }
+  else
+    return false;
+
+  rtx addr = XEXP (mem, 0);
+
+  if (MEM_VOLATILE_P (mem)
+      || ! ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (mem))
+      || ! IN_RANGE (n_bytes, 2, 4)
+      || ! splittable_address_p (addr, n_bytes)
+      || reg_overlap_mentioned_p (reg_or_0, addr))
+    return false;
+
+  const int step = GET_CODE (addr) == PRE_DEC ? -1 : 1;
+  const int istart = step > 0 ? 0 : n_bytes - 1;
+  const int iend = istart + step * n_bytes;
+
+  for (int i = istart; i != iend; i += step)
+    {
+      rtx di = avr_byte_maybe_mem (dest, i);
+      rtx si = avr_byte_maybe_mem (src, i);
+      emit_move_ccc (di, si);
+    }
+
+  return true;
+}
+
+
+
+// Functions  make_<pass-name> (gcc::context*)  where <pass-name> is
+// according to the pass declaration in avr-passes.def.  GCC's pass
+// manager uses these function to create the respective pass object.
+
+// Optimize results of the casesi expander for modes < SImode.
+
+rtl_opt_pass *
+make_avr_pass_casesi (gcc::context *ctxt)
+{
+  return new avr_pass_casesi (ctxt, "avr-casesi");
+}
+
+rtl_opt_pass *
+make_avr_pass_split_nzb (gcc::context *ctxt)
+{
+  return new avr_pass_split_nzb (ctxt, "avr-split-nzb");
+}
+
+// Try to replace 2 cbranch insns with 1 comparison and 2 branches.
+
+rtl_opt_pass *
+make_avr_pass_ifelse (gcc::context *ctxt)
+{
+  return new avr_pass_ifelse (ctxt, "avr-ifelse");
+}
+
+// Determine whether an ISR may use the __gcc_isr pseudo-instruction.
+
+rtl_opt_pass *
+make_avr_pass_pre_proep (gcc::context *ctxt)
+{
+  return new avr_pass_pre_proep (ctxt, "avr-pre-proep");
+}
+
+// Find more POST_INC and PRE_DEC cases.
+
+rtl_opt_pass *
+make_avr_pass_fuse_add (gcc::context *ctxt)
+{
+  return new avr_pass_fuse_add (ctxt, "avr-fuse-add");
+}
+
+// Late recomputation of notes so we can use `reg_unused_after()' and friends.
+
+rtl_opt_pass *
+make_avr_pass_recompute_notes (gcc::context *ctxt)
+{
+  return new avr_pass_recompute_notes (ctxt, "avr-notes-free-cfg");
+}
+
+// Optimize moves after reload.
+
+rtl_opt_pass *
+make_avr_pass_fuse_move (gcc::context *ctxt)
+{
+  return new avr_pass_fuse_move (ctxt, "avr-fuse-move");
+}
+
+// Split insns after peephole2 / befor avr-fuse-move.
+
+rtl_opt_pass *
+make_avr_pass_split_after_peephole2 (gcc::context *ctxt)
+{
+  return new avr_pass_split_after_peephole2 (ctxt, "avr-split-after-peephole2");
+}
diff --git a/gcc/config/avr/avr-passes.def b/gcc/config/avr/avr-passes.def
index 748260e..eb60a93 100644
--- a/gcc/config/avr/avr-passes.def
+++ b/gcc/config/avr/avr-passes.def
@@ -1,5 +1,5 @@
-/* Description of target passes for AVR.
-   Copyright (C) 2016-2024 Free Software Foundation, Inc. */
+/* Description of target passes for AVR 8-bit microcontrollers.
+   Copyright (C) 2016-2025 Free Software Foundation, Inc. */
 
 /* This file is part of GCC.
 
@@ -20,12 +20,33 @@
 /* A post reload optimization pass that fuses PLUS insns with CONST_INT
    addend with a load or store insn to get POST_INC or PRE_DEC addressing.
    It can also fuse two PLUSes to a single one, which may occur due to
-   splits from `avr_split_tiny_move'.  We do this in an own pass because
-   it can find more cases than peephole2, for example when there are
-   unrelated insns between the interesting ones.  */
+   splits from `avr_split_fake_addressing_move'.  We do this in an own
+   pass because it can find more cases than peephole2, for example when
+   there are unrelated insns between the interesting ones.  */
 
 INSERT_PASS_BEFORE (pass_peephole2, 1, avr_pass_fuse_add);
 
+/* There are cases where avr-fuse-add doesn't find POST_INC cases because
+   the RTL code at that time is too long-winded, and moves registers back and
+   forth (which seems to be the same reason for why pass auto_inc_dec cannot
+   find POST_INC, either).  Some of that long-windedness is cleaned up very
+   late in pass cprop_hardreg, which opens up new opportunities to find post
+   increments.  An example is the following function from AVR-LibC's qsort:
+
+   void swapfunc (char *a, char *b, int n)
+   {
+       do
+       {
+           char tmp = *a;
+           *a++ = *b;
+           *b++ = tmp;
+       } while (--n > 0);
+   }
+
+   Hence, run avr-fuse-add twice; the second time after cprop_hardreg.  */
+
+INSERT_PASS_AFTER (pass_cprop_hardreg, 1, avr_pass_fuse_add);
+
 /* An analysis pass that runs prior to prologue / epilogue generation.
    Computes cfun->machine->gasisr.maybe which is used in prologue and
    epilogue generation provided -mgas-isr-prologues is on.  */
@@ -47,12 +68,24 @@ INSERT_PASS_BEFORE (pass_free_cfg, 1, avr_pass_recompute_notes);
    tries to fix such situations by operating on the original mode.  This
    reduces code size and register pressure.
 
-   The assertion is that the code generated by casesi is unaltered and a
+   The assertion is that the code generated by casesi is unaltered and
    a sign-extend or zero-extend from QImode or HImode precedes the casesi
-   insns withaout any insns in between.  */
+   insns without any insns in between.  */
 
 INSERT_PASS_AFTER (pass_expand, 1, avr_pass_casesi);
 
+/* Some combine insns have nonzero_bits() in their condition, though insns
+   should not use such stuff in their condition.  Therefore, we split such
+   insn into something without nonzero_bits() in their condition right after
+   insn combine.
+
+   Since neither split_all_insns() nor split_all_insns_noflow() work at that
+   point (presumably since there are splits involving branches), we split
+   respective insns (and only such insns) by hand.  Respective insns are
+   tagged with insn attribute nzb = "yes" so that they are easy to spot.  */
+
+INSERT_PASS_AFTER (pass_combine, 1, avr_pass_split_nzb);
+
 /* If-else decision trees generated for switch / case may produce sequences
    like
 
@@ -72,3 +105,26 @@ INSERT_PASS_AFTER (pass_expand, 1, avr_pass_casesi);
    Hence, run a mini pass right before split2 which introduces REG_CC.  */
 
 INSERT_PASS_BEFORE (pass_split_after_reload, 1, avr_pass_ifelse);
+
+/* A post reload pass that tracks known values held in registers
+   and performs optimizations based on that knowledge.
+   It also splits non-memory insns that can be represented in
+   terms of byte operations.
+
+   It runs between the two instances of the RTL peephole pass because
+   -  The RTL peepholer may provide a scratch reg for *reload_in<mode>.
+   -  The RTL peepholer may optimize insns involving lower registers.  */
+
+INSERT_PASS_AFTER (pass_peephole2, 1, avr_pass_fuse_move);
+
+/* Run an instance of post-reload split prior to avr-fuse-move.
+   Purpose is to split the `3op' alternative (which allows 3 operands)
+   of shift insns into a 3-operand shift with a byte offset, and
+   a 2-operand residual shift.  This additional split pass runs after
+   the 1st RTL peephole pass but prior to avr-fuse-move.
+      The respective define_split patterns have a `n_avr_fuse_add_executed'
+   condition (amongst others) so that split passes that run before
+   the 1st RTL peephole pass won't split them.  Shifts with a constant
+   offset that is a multiple of 8 are split by avr-fuse-move.  */
+
+INSERT_PASS_AFTER (pass_peephole2, 1, avr_pass_split_after_peephole2);
diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h
index 34298b9..ca30136 100644
--- a/gcc/config/avr/avr-protos.h
+++ b/gcc/config/avr/avr-protos.h
@@ -1,6 +1,5 @@
-/* Prototypes for exported functions defined in avr.cc
-   
-   Copyright (C) 2000-2024 Free Software Foundation, Inc.
+/* Prototypes for tm_p.h for AVR 8-bit microcontrollers.
+   Copyright (C) 2000-2025 Free Software Foundation, Inc.
    Contributed by Denis Chertykov (chertykov@gmail.com)
 
    This file is part of GCC.
@@ -20,14 +19,14 @@
    <http://www.gnu.org/licenses/>.  */
 
 
-extern int avr_function_arg_regno_p (int r);
-extern void avr_cpu_cpp_builtins (struct cpp_reader * pfile);
+extern bool avr_function_arg_regno_p (int r);
+extern void avr_cpu_cpp_builtins (cpp_reader * pfile);
 extern enum reg_class avr_regno_reg_class (int r);
 extern void asm_globalize_label (FILE *file, const char *name);
 extern void avr_adjust_reg_alloc_order (void);
 extern int avr_initial_elimination_offset (int from, int to);
 extern int avr_simple_epilogue (void);
-extern int avr_hard_regno_rename_ok (unsigned int, unsigned int);
+extern bool avr_hard_regno_rename_ok (unsigned int, unsigned int);
 extern rtx avr_return_addr_rtx (int count, rtx tem);
 extern void avr_register_target_pragmas (void);
 extern void avr_init_expanders (void);
@@ -47,23 +46,37 @@ extern void avr_init_cumulative_args (CUMULATIVE_ARGS*, tree, rtx, tree);
 #endif /* TREE_CODE */
 
 #ifdef RTX_CODE
+extern rtx avr_chunk (machine_mode mode, rtx x, int n);
+extern rtx avr_byte (rtx x, int n);
+extern rtx avr_word (rtx x, int n);
+extern int8_t avr_int8 (rtx x, int n);
+extern uint8_t avr_uint8 (rtx x, int n);
+extern int16_t avr_int16 (rtx x, int n);
+extern uint16_t avr_uint16 (rtx x, int n);
 extern const char *output_movqi (rtx_insn *insn, rtx operands[], int *l);
 extern const char *output_movhi (rtx_insn *insn, rtx operands[], int *l);
 extern const char *output_movsisf (rtx_insn *insn, rtx operands[], int *l);
+extern const char *avr_out_set_some (rtx_insn *, rtx*, int*);
 extern const char *avr_out_tstsi (rtx_insn *, rtx*, int*);
 extern const char *avr_out_tsthi (rtx_insn *, rtx*, int*);
 extern const char *avr_out_tstpsi (rtx_insn *, rtx*, int*);
 extern const char *avr_out_compare (rtx_insn *, rtx*, int*);
 extern const char *avr_out_compare64 (rtx_insn *, rtx*, int*);
-extern const char *ret_cond_branch (rtx x, int len, int reverse);
+extern const char *avr_cond_branch (rtx_insn *, rtx *);
 extern const char *avr_out_movpsi (rtx_insn *, rtx*, int*);
 extern const char *avr_out_sign_extend (rtx_insn *, rtx*, int*);
 extern const char *avr_out_insert_notbit (rtx_insn *, rtx*, int*);
 extern const char *avr_out_insv (rtx_insn *, rtx*, int*);
 extern const char *avr_out_extr (rtx_insn *, rtx*, int*);
 extern const char *avr_out_extr_not (rtx_insn *, rtx*, int*);
+extern const char *avr_out_sextr (rtx_insn *, rtx*, int*);
 extern const char *avr_out_plus_set_ZN (rtx*, int*);
-extern const char *avr_out_cmp_ext (rtx*, enum rtx_code, int*);
+extern const char *avr_out_plus_set_N (rtx*, int*);
+extern const char *avr_out_op8_set_ZN (rtx_code, rtx*, int*);
+extern int avr_len_op8_set_ZN (rtx_code, rtx*);
+extern bool avr_op8_ZN_operator (rtx);
+extern const char *avr_out_cmp_ext (rtx*, rtx_code, int*);
+extern bool avr_set_some_operation (rtx);
 
 extern const char *ashlqi3_out (rtx_insn *insn, rtx operands[], int *len);
 extern const char *ashlhi3_out (rtx_insn *insn, rtx operands[], int *len);
@@ -90,53 +103,62 @@ extern void avr_expand_prologue (void);
 extern void avr_expand_epilogue (bool);
 extern bool avr_emit_cpymemhi (rtx*);
 extern void avr_emit_xior_with_shift (rtx_insn*, rtx*, int);
-extern int avr_epilogue_uses (int regno);
-extern bool avr_split_tiny_move (rtx_insn *insn, rtx *operands);
+extern void avr_emit_skip_pixop (rtx_code, rtx, rtx, rtx, rtx_code, rtx, int);
+extern void avr_emit_skip_clear (rtx, rtx, rtx_code, rtx, int);
+extern bool avr_epilogue_uses (int regno);
 
 extern void avr_output_addr_vec (rtx_insn*, rtx);
 extern const char *avr_out_sbxx_branch (rtx_insn *insn, rtx operands[]);
 extern const char* avr_out_bitop (rtx, rtx*, int*);
 extern const char* avr_out_plus (rtx, rtx*, int* =NULL, bool =true);
 extern const char* avr_out_plus_ext (rtx_insn*, rtx*, int*);
+extern const char* avr_out_add_msb (rtx_insn*, rtx*, rtx_code, int*);
 extern const char* avr_out_round (rtx_insn *, rtx*, int* =NULL);
 extern const char* avr_out_addto_sp (rtx*, int*);
 extern const char* avr_out_xload (rtx_insn *, rtx*, int*);
+extern const char* avr_out_fload (rtx_insn *, rtx*, int*);
 extern const char* avr_out_cpymem (rtx_insn *, rtx*, int*);
 extern const char* avr_out_insert_bits (rtx*, int*);
 extern bool avr_popcount_each_byte (rtx, int, int);
 extern bool avr_xor_noclobber_dconst (rtx, int);
 extern bool avr_has_nibble_0xf (rtx);
 
-extern int extra_constraint_Q (rtx x);
+extern bool extra_constraint_Q (rtx x);
 extern int avr_adjust_insn_length (rtx_insn *insn, int len);
+extern void output_reload_in_const (rtx *, rtx clobber, int *len, bool clear_p);
 extern const char* output_reload_inhi (rtx*, rtx, int*);
 extern const char* output_reload_insisf (rtx*, rtx, int*);
 extern const char* avr_out_reload_inpsi (rtx*, rtx, int*);
 extern const char* avr_out_lpm (rtx_insn *, rtx*, int*);
-extern void avr_notice_update_cc (rtx body, rtx_insn *insn);
-extern int reg_unused_after (rtx_insn *insn, rtx reg);
+extern const char* avr_out_cmp_lsr (rtx_insn *, rtx*, int*);
+extern void avr_maybe_cmp_lsr (rtx *);
+extern bool reg_unused_after (rtx_insn *insn, rtx reg);
 extern int avr_jump_mode (rtx x, rtx_insn *insn, int = 0);
-extern int test_hard_reg_class (enum reg_class rclass, rtx x);
-extern int jump_over_one_insn_p (rtx_insn *insn, rtx dest);
+extern bool test_hard_reg_class (enum reg_class rclass, rtx x);
+extern bool jump_over_one_insn_p (rtx_insn *insn, rtx dest);
+extern bool avr_nonzero_bits_lsr_operands_p (rtx_code, rtx *);
 
 extern void avr_final_prescan_insn (rtx_insn *insn, rtx *operand,
 				    int num_operands);
-extern RTX_CODE avr_normalize_condition (RTX_CODE condition);
+extern rtx_code avr_normalize_condition (rtx_code condition);
 extern void out_shift_with_cnt (const char *templ, rtx_insn *insn,
 				rtx operands[], int *len, int t_len);
-extern enum reg_class avr_mode_code_base_reg_class (machine_mode, addr_space_t, RTX_CODE, RTX_CODE);
-extern bool avr_regno_mode_code_ok_for_base_p (int, machine_mode, addr_space_t, RTX_CODE, RTX_CODE);
+extern enum reg_class avr_mode_code_base_reg_class (machine_mode, addr_space_t, rtx_code, rtx_code);
+extern bool avr_regno_mode_code_ok_for_base_p (int, machine_mode, addr_space_t, rtx_code, rtx_code);
 extern rtx avr_incoming_return_addr_rtx (void);
 extern rtx avr_legitimize_reload_address (rtx*, machine_mode, int, int, int, int, rtx (*)(rtx,int));
 extern bool avr_adiw_reg_p (rtx);
 extern bool avr_mem_flash_p (rtx);
+extern bool avr_mem_flashx_p (rtx);
 extern bool avr_mem_memx_p (rtx);
 extern bool avr_load_libgcc_p (rtx);
 extern bool avr_xload_libgcc_p (machine_mode);
+extern bool avr_fload_libgcc_p (machine_mode);
+extern bool avr_load_libgcc_mem_p (rtx, addr_space_t, bool use_libgcc);
+extern bool avr_load_libgcc_insn_p (rtx_insn *, addr_space_t, bool use_libgcc);
 extern rtx avr_eval_addr_attrib (rtx x);
-extern bool avr_casei_sequence_check_operands (rtx *xop);
 
-extern bool avr_float_lib_compare_returns_bool (machine_mode, enum rtx_code);
+extern bool avr_float_lib_compare_returns_bool (machine_mode, rtx_code);
 
 static inline unsigned
 regmask (machine_mode mode, unsigned regno)
@@ -154,6 +176,16 @@ extern rtx zero_reg_rtx;
 extern rtx all_regs_rtx[32];
 extern rtx rampz_rtx;
 extern rtx cc_reg_rtx;
+extern rtx ccn_reg_rtx;
+extern rtx cczn_reg_rtx;
+
+extern int n_avr_fuse_add_executed;
+extern bool avr_shift_is_3op ();
+extern bool avr_split_shift_p (int n_bytes, int offset, rtx_code);
+extern bool avr_split_shift (rtx xop[], rtx xscratch, rtx_code);
+extern bool avr_split_ldst (rtx xop[]);
+
+extern int avr_optimize_size_level ();
 
 #endif /* RTX_CODE */
 
@@ -163,18 +195,35 @@ extern void asm_output_float (FILE *file, REAL_VALUE_TYPE n);
 
 extern bool avr_have_dimode;
 
+/* From avr-passes.cc */
+
 namespace gcc { class context; }
 class rtl_opt_pass;
 
 extern rtl_opt_pass *make_avr_pass_fuse_add (gcc::context *);
+extern rtl_opt_pass *make_avr_pass_fuse_move (gcc::context *);
 extern rtl_opt_pass *make_avr_pass_pre_proep (gcc::context *);
 extern rtl_opt_pass *make_avr_pass_recompute_notes (gcc::context *);
 extern rtl_opt_pass *make_avr_pass_casesi (gcc::context *);
 extern rtl_opt_pass *make_avr_pass_ifelse (gcc::context *);
+extern rtl_opt_pass *make_avr_pass_split_nzb (gcc::context *);
+extern rtl_opt_pass *make_avr_pass_split_after_peephole2 (gcc::context *);
+#ifdef RTX_CODE
+extern bool avr_casei_sequence_check_operands (rtx *xop);
+extern bool avr_split_fake_addressing_move (rtx_insn *insn, rtx *operands);
+#endif /* RTX_CODE */
 
 /* From avr-log.cc */
 
+#ifdef GCC_DUMPFILE_H
+#define avr_dump(...)							\
+  do {									\
+    if (dump_file)							\
+      avr_vdump (dump_file, __FUNCTION__, __VA_ARGS__);			\
+  } while (0)
+#else
 #define avr_dump(...) avr_vdump (NULL, __FUNCTION__, __VA_ARGS__)
+#endif /* GCC_DUMPFILE_H */
 #define avr_edump(...) avr_vdump (stderr, __FUNCTION__, __VA_ARGS__)
 #define avr_fdump(FIL, ...) avr_vdump (FIL, __FUNCTION__, __VA_ARGS__)
 
diff --git a/gcc/config/avr/avr-stdint.h b/gcc/config/avr/avr-stdint.h
index a9acdec..8f3571b 100644
--- a/gcc/config/avr/avr-stdint.h
+++ b/gcc/config/avr/avr-stdint.h
@@ -1,5 +1,5 @@
 /* Definitions for <stdint.h> types on systems using newlib.
-   Copyright (C) 2012-2024 Free Software Foundation, Inc.
+   Copyright (C) 2012-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/avr/avr.cc b/gcc/config/avr/avr.cc
index c520b98..c469297 100644
--- a/gcc/config/avr/avr.cc
+++ b/gcc/config/avr/avr.cc
@@ -1,5 +1,5 @@
-/* Subroutines for insn-output.cc for ATMEL AVR micro controllers
-   Copyright (C) 1998-2024 Free Software Foundation, Inc.
+/* Subroutines for insn-output.cc for AVR 8-bit microcontrollers
+   Copyright (C) 1998-2025 Free Software Foundation, Inc.
    Contributed by Denis Chertykov (chertykov@gmail.com)
 
    This file is part of GCC.
@@ -8,12 +8,12 @@
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 3, or (at your option)
    any later version.
-   
+
    GCC is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
-   
+
    You should have received a copy of the GNU General Public License
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
@@ -50,11 +50,10 @@
 #include "explow.h"
 #include "expr.h"
 #include "langhooks.h"
-#include "cfgrtl.h"
 #include "builtins.h"
-#include "context.h"
 #include "tree-pass.h"
-#include "print-rtl.h"
+#include "context.h"
+#include "pass_manager.h"
 #include "rtl-iter.h"
 
 /* This file should be included last.  */
@@ -109,13 +108,14 @@
    enum from avr.h (or designated initialized must be used).  */
 const avr_addrspace_t avr_addrspace[ADDR_SPACE_COUNT] =
 {
-  { ADDR_SPACE_RAM,  0, 2, "", 0, NULL },
+  { ADDR_SPACE_RAM,  0, 2, "", 0, nullptr },
   { ADDR_SPACE_FLASH,  1, 2, "__flash",   0, ".progmem.data" },
   { ADDR_SPACE_FLASH1, 1, 2, "__flash1",  1, ".progmem1.data" },
   { ADDR_SPACE_FLASH2, 1, 2, "__flash2",  2, ".progmem2.data" },
   { ADDR_SPACE_FLASH3, 1, 2, "__flash3",  3, ".progmem3.data" },
   { ADDR_SPACE_FLASH4, 1, 2, "__flash4",  4, ".progmem4.data" },
   { ADDR_SPACE_FLASH5, 1, 2, "__flash5",  5, ".progmem5.data" },
+  { ADDR_SPACE_FLASHX, 1, 3, "__flashx",  0, ".progmemx.data" },
   { ADDR_SPACE_MEMX, 1, 3, "__memx",  0, ".progmemx.data" },
 };
 
@@ -154,17 +154,6 @@ static const char *out_movqi_mr_r (rtx_insn *, rtx[], int *);
 static const char *out_movhi_mr_r (rtx_insn *, rtx[], int *);
 static const char *out_movsi_mr_r (rtx_insn *, rtx[], int *);
 
-static int get_sequence_length (rtx_insn *insns);
-static int sequent_regs_live (void);
-static const char *ptrreg_to_str (int);
-static const char *cond_string (enum rtx_code);
-static int avr_num_arg_regs (machine_mode, const_tree);
-static int avr_operand_rtx_cost (rtx, machine_mode, enum rtx_code,
-				 int, bool);
-static void output_reload_in_const (rtx *, rtx, int *, bool);
-static struct machine_function *avr_init_machine_status (void);
-static bool _reg_unused_after (rtx_insn *insn, rtx reg, bool look_at_insn);
-
 
 /* Prototypes for hook implementors if needed before their implementation.  */
 
@@ -196,7 +185,11 @@ rtx zero_reg_rtx;
 
 /* Condition Code register RTX (reg:CC REG_CC) */
 extern GTY(()) rtx cc_reg_rtx;
+extern GTY(()) rtx ccn_reg_rtx;
+extern GTY(()) rtx cczn_reg_rtx;
 rtx cc_reg_rtx;
+rtx ccn_reg_rtx;
+rtx cczn_reg_rtx;
 
 /* RTXs for all general purpose registers as QImode */
 extern GTY(()) rtx all_regs_rtx[REG_32];
@@ -222,7 +215,7 @@ static GTY(()) rtx xstring_e;
 
 /* Current architecture.  */
 const avr_arch_t *avr_arch;
-enum avr_arch_id avr_arch_index;
+avr_arch_id avr_arch_index;
 
 /* Unnamed sections associated to __attribute__((progmem)) aka. PROGMEM
    or to address space __flash* or __memx.  Only used as singletons inside
@@ -237,6 +230,45 @@ bool avr_need_clear_bss_p = false;
 bool avr_need_copy_data_p = false;
 bool avr_has_rodata_p = false;
 
+/* To track if we satisfy __call_main from AVR-LibC.  */
+bool avr_no_call_main_p = false;
+
+/* Counts how often pass avr-fuse-add has been executed.  Is is kept in
+   sync with cfun->machine->n_avr_fuse_add_executed and serves as an
+   insn condition for shift insn splitters.  */
+int n_avr_fuse_add_executed = 0;
+
+static location_t avr_insn_location = UNKNOWN_LOCATION;
+
+/* Similar to ctz_hwi etc, but as constexpr so we can use it in
+   static_assert.  */
+static constexpr int
+avr_ctz (uint64_t x)
+{
+#define TSTB(n) (x & ((uint64_t) 1 << n)) ? n
+  return
+    TSTB (0) : TSTB (1) : TSTB (2) : TSTB (3) : TSTB (4)
+    : TSTB (5) : TSTB (6) : TSTB (7) : TSTB (8) : TSTB (9)
+    : TSTB (10) : TSTB (11) : TSTB (12) : TSTB (13) : TSTB (14)
+    : TSTB (15) : TSTB (16) : TSTB (17) : TSTB (18) : TSTB (19)
+    : TSTB (20) : TSTB (21) : TSTB (22) : TSTB (23) : TSTB (24)
+    : TSTB (25) : TSTB (26) : TSTB (27) : TSTB (28) : TSTB (29)
+    : TSTB (30) : TSTB (31) : TSTB (32) : TSTB (33) : TSTB (34)
+    : TSTB (35) : TSTB (36) : TSTB (37) : TSTB (38) : TSTB (39)
+    : TSTB (40) : TSTB (41) : TSTB (42) : TSTB (43) : TSTB (44)
+    : TSTB (45) : TSTB (46) : TSTB (47) : TSTB (48) : TSTB (49)
+    : TSTB (50) : TSTB (51) : TSTB (52) : TSTB (53) : TSTB (54)
+    : TSTB (55) : TSTB (56) : TSTB (57) : TSTB (58) : TSTB (59)
+    : TSTB (60) : TSTB (61) : TSTB (62) : TSTB (63)
+    : 64;
+#undef TSTB
+}
+
+/* Make sure that there are enough section flags bits.  avr allocates 4.  */
+static_assert (8 * sizeof (decltype (section_common::flags))
+	       >= 4u + avr_ctz (SECTION_MACH_DEP),
+	       "section_common::flags is too narrow");
+
 
 /* Transform UP into lowercase and write the result to LO.
    You must provide enough space for LO.  Return LO.  */
@@ -255,820 +287,153 @@ avr_tolower (char *lo, const char *up)
 }
 
 
-/* Constraint helper function.  XVAL is a CONST_INT or a CONST_DOUBLE.
-   Return true if the least significant N_BYTES bytes of XVAL all have a
-   popcount in POP_MASK and false, otherwise.  POP_MASK represents a subset
-   of integers which contains an integer N iff bit N of POP_MASK is set.  */
+/* Return chunk of mode MODE of X as an rtx.  N specifies the subreg
+   byte at which the chunk starts.  N must be an integral multiple
+   of the mode size.  */
 
-bool
-avr_popcount_each_byte (rtx xval, int n_bytes, int pop_mask)
+rtx
+avr_chunk (machine_mode mode, rtx x, int n)
 {
-  machine_mode mode = GET_MODE (xval);
-
-  if (VOIDmode == mode)
-    mode = SImode;
-
-  for (int i = 0; i < n_bytes; i++)
-    {
-      rtx xval8 = simplify_gen_subreg (QImode, xval, mode, i);
-      unsigned int val8 = UINTVAL (xval8) & GET_MODE_MASK (QImode);
-
-      if ((pop_mask & (1 << popcount_hwi (val8))) == 0)
-	return false;
-    }
-
-  return true;
+  gcc_assert (n % GET_MODE_SIZE (mode) == 0);
+  machine_mode xmode = GET_MODE (x) == VOIDmode ? DImode : GET_MODE (x);
+  return simplify_gen_subreg (mode, x, xmode, n);
 }
 
 
-/* Constraint helper function.  XVAL is a CONST_INT.  Return true if we
-   can perform XOR without a clobber reg, provided the operation is on
-   a d-register.  This means each byte is in { 0, 0xff, 0x80 }.  */
+/* Return the N-th byte of X as an rtx.  */
 
-bool
-avr_xor_noclobber_dconst (rtx xval, int n_bytes)
+rtx
+avr_byte (rtx x, int n)
 {
-  machine_mode mode = GET_MODE (xval);
-
-  if (VOIDmode == mode)
-    mode = SImode;
-
-  for (int i = 0; i < n_bytes; ++i)
-    {
-      rtx xval8 = simplify_gen_subreg (QImode, xval, mode, i);
-      unsigned int val8 = UINTVAL (xval8) & GET_MODE_MASK (QImode);
-
-      if (val8 != 0 && val8 != 0xff && val8 != 0x80)
-	return false;
-    }
-
-  return true;
+  return avr_chunk (QImode, x, n);
 }
 
 
-/* Access some RTX as INT_MODE.  If X is a CONST_FIXED we can get
-   the bit representation of X by "casting" it to CONST_INT.  */
+/* Return the sub-word of X starting at byte number N.  */
 
 rtx
-avr_to_int_mode (rtx x)
+avr_word (rtx x, int n)
 {
-  machine_mode mode = GET_MODE (x);
-
-  return VOIDmode == mode
-    ? x
-    : simplify_gen_subreg (int_mode_for_mode (mode).require (), x, mode, 0);
+  return avr_chunk (HImode, x, n);
 }
 
 
-/* Return true if hard register REG supports the ADIW and SBIW instructions.  */
+/* Return the N-th byte of compile-time constant X as an int8_t.  */
 
-bool
-avr_adiw_reg_p (rtx reg)
+int8_t
+avr_int8 (rtx x, int n)
 {
-  return (AVR_HAVE_ADIW
-	  && test_hard_reg_class (ADDW_REGS, reg));
-}
+  gcc_assert (CONST_INT_P (x) || CONST_FIXED_P (x) || CONST_DOUBLE_P (x));
 
-
-static bool
-ra_in_progress ()
-{
-  return avr_lra_p ? lra_in_progress : reload_in_progress;
+  return (int8_t) trunc_int_for_mode (INTVAL (avr_byte (x, n)), QImode);
 }
 
+/* Return the N-th byte of compile-time constant X as an uint8_t.  */
 
-namespace {
-
-static const pass_data avr_pass_data_recompute_notes =
-{
-  RTL_PASS,      // type
-  "",            // name (will be patched)
-  OPTGROUP_NONE, // optinfo_flags
-  TV_DF_SCAN,    // tv_id
-  0,             // properties_required
-  0,             // properties_provided
-  0,             // properties_destroyed
-  0,             // todo_flags_start
-  TODO_df_finish | TODO_df_verify // todo_flags_finish
-};
-
-
-class avr_pass_recompute_notes : public rtl_opt_pass
-{
-public:
-  avr_pass_recompute_notes (gcc::context *ctxt, const char *name)
-    : rtl_opt_pass (avr_pass_data_recompute_notes, ctxt)
-  {
-    this->name = name;
-  }
-
-  virtual unsigned int execute (function *)
-  {
-    df_note_add_problem ();
-    df_analyze ();
-
-    return 0;
-  }
-}; // avr_pass_recompute_notes
-
-static const pass_data avr_pass_data_casesi =
-{
-  RTL_PASS,      // type
-  "",            // name (will be patched)
-  OPTGROUP_NONE, // optinfo_flags
-  TV_DF_SCAN,    // tv_id
-  0,             // properties_required
-  0,             // properties_provided
-  0,             // properties_destroyed
-  0,             // todo_flags_start
-  0              // todo_flags_finish
-};
-
-
-class avr_pass_casesi : public rtl_opt_pass
-{
-public:
-  avr_pass_casesi (gcc::context *ctxt, const char *name)
-    : rtl_opt_pass (avr_pass_data_casesi, ctxt)
-  {
-    this->name = name;
-  }
-
-  void avr_rest_of_handle_casesi (function *);
-
-  virtual bool gate (function *) { return optimize > 0; }
-
-  virtual unsigned int execute (function *func)
-  {
-    avr_rest_of_handle_casesi (func);
-
-    return 0;
-  }
-}; // avr_pass_casesi
-
-
-static const pass_data avr_pass_data_ifelse =
+uint8_t
+avr_uint8 (rtx x, int n)
 {
-  RTL_PASS,      // type
-  "",            // name (will be patched)
-  OPTGROUP_NONE, // optinfo_flags
-  TV_DF_SCAN,    // tv_id
-  0,             // properties_required
-  0,             // properties_provided
-  0,             // properties_destroyed
-  0,             // todo_flags_start
-  TODO_df_finish | TODO_df_verify // todo_flags_finish
-};
-
-class avr_pass_ifelse : public rtl_opt_pass
-{
-public:
-  avr_pass_ifelse (gcc::context *ctxt, const char *name)
-    : rtl_opt_pass (avr_pass_data_ifelse, ctxt)
-  {
-    this->name = name;
-  }
-
-  void avr_rest_of_handle_ifelse (function *);
-
-  virtual bool gate (function *) { return optimize > 0; }
-
-  virtual unsigned int execute (function *func)
-  {
-    avr_rest_of_handle_ifelse (func);
-
-    return 0;
-  }
-}; // avr_pass_ifelse
+  return (uint8_t) avr_int8 (x, n);
+}
 
-} // anon namespace
 
-rtl_opt_pass *
-make_avr_pass_recompute_notes (gcc::context *ctxt)
-{
-  return new avr_pass_recompute_notes (ctxt, "avr-notes-free-cfg");
-}
+/* Return the sub-word of compile-time constant X that starts
+   at byte N as an int16_t.  */
 
-rtl_opt_pass *
-make_avr_pass_casesi (gcc::context *ctxt)
+int16_t
+avr_int16 (rtx x, int n)
 {
-  return new avr_pass_casesi (ctxt, "avr-casesi");
-}
+  gcc_assert (CONST_INT_P (x) || CONST_FIXED_P (x) || CONST_DOUBLE_P (x));
 
-rtl_opt_pass *
-make_avr_pass_ifelse (gcc::context *ctxt)
-{
-  return new avr_pass_ifelse (ctxt, "avr-ifelse");
+  return (int16_t) trunc_int_for_mode (INTVAL (avr_word (x, n)), HImode);
 }
 
+/* Return the sub-word of compile-time constant X that starts
+   at byte N as an uint16_t.  */
 
-/* Make one parallel insn with all the patterns from insns i[0]..i[5].  */
-
-static rtx_insn *
-avr_parallel_insn_from_insns (rtx_insn *i[5])
+uint16_t
+avr_uint16 (rtx x, int n)
 {
-  rtvec vec = gen_rtvec (5, PATTERN (i[0]), PATTERN (i[1]), PATTERN (i[2]),
-			 PATTERN (i[3]), PATTERN (i[4]));
-  start_sequence();
-  emit (gen_rtx_PARALLEL (VOIDmode, vec));
-  rtx_insn *insn = get_insns();
-  end_sequence();
-
-  return insn;
+  return (uint16_t) avr_int16 (x, n);
 }
 
 
-/* Return true if we see an insn stream generated by casesi expander together
-   with an extension to SImode of the switch value.
-
-   If this is the case, fill in the insns from casesi to INSNS[1..5] and
-   the SImode extension to INSNS[0].  Moreover, extract the operands of
-   pattern casesi_<mode>_sequence forged from the sequence to recog_data.  */
+/* Constraint helper function.  XVAL is a CONST_INT or a CONST_DOUBLE.
+   Return true if the least significant N_BYTES bytes of XVAL all have a
+   popcount in POP_MASK and false, otherwise.  POP_MASK represents a subset
+   of integers which contains an integer N iff bit N of POP_MASK is set.  */
 
-static bool
-avr_is_casesi_sequence (basic_block bb, rtx_insn *insn, rtx_insn *insns[5])
+bool
+avr_popcount_each_byte (rtx xval, int n_bytes, int pop_mask)
 {
-  rtx set_4, set_0;
-
-  /* A first and quick test for a casesi sequences.  As a side effect of
-     the test, harvest respective insns to INSNS[0..4].  */
-
-  if (!(JUMP_P (insns[4] = insn)
-	// casesi is the only insn that comes up with UNSPEC_INDEX_JMP,
-	// hence the following test ensures that we are actually dealing
-	// with code from casesi.
-	&& (set_4 = single_set (insns[4]))
-	&& UNSPEC == GET_CODE (SET_SRC (set_4))
-	&& UNSPEC_INDEX_JMP == XINT (SET_SRC (set_4), 1)
-
-	&& (insns[3] = prev_real_insn (insns[4]))
-	&& (insns[2] = prev_real_insn (insns[3]))
-	&& (insns[1] = prev_real_insn (insns[2]))
-
-	// Insn prior to casesi.
-	&& (insns[0] = prev_real_insn (insns[1]))
-	&& (set_0 = single_set (insns[0]))
-	&& extend_operator (SET_SRC (set_0), SImode)))
-    {
-      return false;
-    }
-
-  if (dump_file)
-    {
-      fprintf (dump_file, ";; Sequence from casesi in "
-	       "[bb %d]:\n\n", bb->index);
-      for (int i = 0; i < 5; i++)
-	print_rtl_single (dump_file, insns[i]);
-    }
-
-  /* We have to deal with quite some operands.  Extracting them by hand
-     would be tedious, therefore wrap the insn patterns into a parallel,
-     run recog against it and then use insn extract to get the operands. */
-
-  rtx_insn *xinsn = avr_parallel_insn_from_insns (insns);
-
-  INSN_CODE (xinsn) = recog (PATTERN (xinsn), xinsn, NULL /* num_clobbers */);
-
-  /* Failing to recognize means that someone changed the casesi expander or
-     that some passes prior to this one performed some unexpected changes.
-     Gracefully drop such situations instead of aborting.  */
-
-  if (INSN_CODE (xinsn) < 0)
+  for (int i = 0; i < n_bytes; i++)
     {
-      if (dump_file)
-	fprintf (dump_file, ";; Sequence not recognized, giving up.\n\n");
-
-      return false;
-    }
-
-  gcc_assert (CODE_FOR_casesi_qi_sequence == INSN_CODE (xinsn)
-	      || CODE_FOR_casesi_hi_sequence == INSN_CODE (xinsn));
-
-  extract_insn (xinsn);
-
-  // Assert on the anatomy of xinsn's operands we are going to work with.
-
-  gcc_assert (recog_data.n_operands == 11);
-  gcc_assert (recog_data.n_dups == 4);
+      unsigned int val8 = avr_uint8 (xval, i);
 
-  if (dump_file)
-    {
-      fprintf (dump_file, ";; Operands extracted:\n");
-      for (int i = 0; i < recog_data.n_operands; i++)
-	avr_fdump (dump_file, ";; $%d = %r\n", i, recog_data.operand[i]);
-      fprintf (dump_file, "\n");
+      if ((pop_mask & (1 << popcount_hwi (val8))) == 0)
+	return false;
     }
 
   return true;
 }
 
 
-/* Perform some extra checks on operands of casesi_<mode>_sequence.
-   Not all operand dependencies can be described by means of predicates.
-   This function performs left over checks and should always return true.
-   Returning false means that someone changed the casesi expander but did
-   not adjust casesi_<mode>_sequence.  */
+/* Constraint helper function.  XVAL is a CONST_INT.  Return true if we
+   can perform XOR without a clobber reg, provided the operation is on
+   a d-register.  This means each byte is in { 0, 0xff, 0x80 }.  */
 
 bool
-avr_casei_sequence_check_operands (rtx *xop)
+avr_xor_noclobber_dconst (rtx xval, int n_bytes)
 {
-  rtx sub_5 = NULL_RTX;
-
-  if (AVR_HAVE_EIJMP_EICALL
-      // The last clobber op of the tablejump.
-      && xop[8] == all_regs_rtx[REG_24])
+  for (int i = 0; i < n_bytes; ++i)
     {
-      // $6 is: (subreg:SI ($5) 0)
-      sub_5 = xop[6];
-    }
+      unsigned int val8 = avr_uint8 (xval, i);
 
-  if (!AVR_HAVE_EIJMP_EICALL
-      // $6 is: (plus:HI (subreg:SI ($5) 0)
-      //		 (label_ref ($3)))
-      && PLUS == GET_CODE (xop[6])
-      && LABEL_REF == GET_CODE (XEXP (xop[6], 1))
-      && rtx_equal_p (xop[3], XEXP (XEXP (xop[6], 1), 0))
-      // The last clobber op of the tablejump.
-      && xop[8] == const0_rtx)
-    {
-      sub_5 = XEXP (xop[6], 0);
+      if (val8 != 0 && val8 != 0xff && val8 != 0x80)
+	return false;
     }
 
-  if (sub_5
-      && SUBREG_P (sub_5)
-      && SUBREG_BYTE (sub_5) == 0
-      && rtx_equal_p (xop[5], SUBREG_REG (sub_5)))
-    return true;
-
-  if (dump_file)
-    fprintf (dump_file, "\n;; Failed condition for casesi_<mode>_sequence\n\n");
-
-  return false;
+  return true;
 }
 
 
-/* INSNS[1..4] is a sequence as generated by casesi and INSNS[0] is an
-   extension of an 8-bit or 16-bit integer to SImode.  XOP contains the
-   operands of INSNS as extracted by insn_extract from pattern
-   casesi_<mode>_sequence:
-
-      $0: SImode reg switch value as result of $9.
-      $1: Negative of smallest index in switch.
-      $2: Number of entries in switch.
-      $3: Label to table.
-      $4: Label if out-of-bounds.
-      $5: $0 + $1.
-      $6: 3-byte PC: subreg:HI ($5) + label_ref ($3)
-	  2-byte PC: subreg:HI ($5)
-      $7: HI reg index into table (Z or pseudo)
-      $8: R24 or const0_rtx (to be clobbered)
-      $9: Extension to SImode of an 8-bit or 16-bit integer register $10.
-      $10: QImode or HImode register input of $9.
-
-   Try to optimize this sequence, i.e. use the original HImode / QImode
-   switch value instead of SImode.  */
+/* Access some RTX as INT_MODE.  If X is a CONST_FIXED we can get
+   the bit representation of X by "casting" it to CONST_INT.  */
 
-static void
-avr_optimize_casesi (rtx_insn *insns[5], rtx *xop)
+rtx
+avr_to_int_mode (rtx x)
 {
-  // Original mode of the switch value; this is QImode or HImode.
-  machine_mode mode = GET_MODE (xop[10]);
-
-  // How the original switch value was extended to SImode; this is
-  // SIGN_EXTEND or ZERO_EXTEND.
-  enum rtx_code code = GET_CODE (xop[9]);
-
-  // Lower index, upper index (plus one) and range of case calues.
-  HOST_WIDE_INT low_idx = -INTVAL (xop[1]);
-  HOST_WIDE_INT num_idx = INTVAL (xop[2]);
-  HOST_WIDE_INT hig_idx = low_idx + num_idx;
-
-  // Maximum ranges of (un)signed QImode resp. HImode.
-  unsigned umax = QImode == mode ? 0xff : 0xffff;
-  int imax = QImode == mode ? 0x7f : 0x7fff;
-  int imin = -imax - 1;
-
-  // Testing the case range and whether it fits into the range of the
-  // (un)signed mode.  This test should actually always pass because it
-  // makes no sense to have case values outside the mode range.  Notice
-  // that case labels which are unreachable because they are outside the
-  // mode of the switch value (e.g. "case -1" for uint8_t) have already
-  // been thrown away by the middle-end.
-
-  if (SIGN_EXTEND == code
-      && low_idx >= imin
-      && hig_idx <= imax)
-    {
-      // ok
-    }
-  else if (ZERO_EXTEND == code
-	   && low_idx >= 0
-	   && (unsigned) hig_idx <= umax)
-    {
-      // ok
-    }
-  else
-    {
-      if (dump_file)
-	fprintf (dump_file, ";; Case ranges too big, giving up.\n\n");
-      return;
-    }
-
-  // Do normalization of switch value $10 and out-of-bound check in its
-  // original mode instead of in SImode.  Use a newly created pseudo.
-  // This will replace insns[1..2].
-
-  start_sequence();
-
-  rtx reg = copy_to_mode_reg (mode, xop[10]);
-
-  rtx (*gen_add)(rtx,rtx,rtx) = QImode == mode ? gen_addqi3 : gen_addhi3;
-  rtx (*gen_cbranch)(rtx,rtx,rtx,rtx)
-    = QImode == mode ? gen_cbranchqi4 : gen_cbranchhi4;
-
-  emit_insn (gen_add (reg, reg, gen_int_mode (-low_idx, mode)));
-  rtx op0 = reg; rtx op1 = gen_int_mode (num_idx, mode);
-  rtx labelref = copy_rtx (xop[4]);
-  rtx xbranch = gen_cbranch (gen_rtx_fmt_ee (GTU, VOIDmode, op0, op1),
-			     op0, op1, labelref);
-  rtx_insn *cbranch = emit_jump_insn (xbranch);
-  JUMP_LABEL (cbranch) = xop[4];
-  ++LABEL_NUSES (xop[4]);
-
-  rtx_insn *seq1 = get_insns();
-  rtx_insn *last1 = get_last_insn();
-  end_sequence();
-
-  emit_insn_after (seq1, insns[2]);
-
-  // After the out-of-bounds test and corresponding branch, use a
-  // 16-bit index.  If QImode is used, extend it to HImode first.
-  // This will replace insns[4].
-
-  start_sequence();
-
-  if (QImode == mode)
-    reg = force_reg (HImode, gen_rtx_fmt_e (code, HImode, reg));
-
-  rtx pat_4 = AVR_3_BYTE_PC
-    ? gen_movhi (xop[7], reg)
-    : gen_addhi3 (xop[7], reg, gen_rtx_LABEL_REF (VOIDmode, xop[3]));
-
-  emit_insn (pat_4);
-
-  rtx_insn *seq2 = get_insns();
-  rtx_insn *last2 = get_last_insn();
-  end_sequence();
-
-  emit_insn_after (seq2, insns[3]);
-
-  if (dump_file)
-    {
-      fprintf (dump_file, ";; New insns: ");
-
-      for (rtx_insn *insn = seq1; ; insn = NEXT_INSN (insn))
-	{
-	  fprintf (dump_file, "%d, ", INSN_UID (insn));
-	  if (insn == last1)
-	    break;
-	}
-      for (rtx_insn *insn = seq2; ; insn = NEXT_INSN (insn))
-	{
-	  fprintf (dump_file, "%d%s", INSN_UID (insn),
-		   insn == last2 ? ".\n\n" : ", ");
-	  if (insn == last2)
-	    break;
-	}
-
-      fprintf (dump_file, ";; Deleting insns: %d, %d, %d.\n\n",
-	       INSN_UID (insns[1]), INSN_UID (insns[2]), INSN_UID (insns[3]));
-    }
-
-  // Pseudodelete the SImode and subreg of SImode insns.  We don't care
-  // about the extension insns[0]: Its result is now unused and other
-  // passes will clean it up.
+  machine_mode mode = GET_MODE (x);
 
-  SET_INSN_DELETED (insns[1]);
-  SET_INSN_DELETED (insns[2]);
-  SET_INSN_DELETED (insns[3]);
+  return VOIDmode == mode
+    ? x
+    : simplify_gen_subreg (int_mode_for_mode (mode).require (), x, mode, 0);
 }
 
 
-void
-avr_pass_casesi::avr_rest_of_handle_casesi (function *func)
-{
-  basic_block bb;
-
-  FOR_EACH_BB_FN (bb, func)
-    {
-      rtx_insn *insn, *insns[5];
+/* Return true if hard register REG supports the ADIW and SBIW instructions.  */
 
-      FOR_BB_INSNS (bb, insn)
-	{
-	  if (avr_is_casesi_sequence (bb, insn, insns))
-	    {
-	      avr_optimize_casesi (insns, recog_data.operand);
-	    }
-	}
-    }
+bool
+avr_adiw_reg_p (rtx reg)
+{
+  return (AVR_HAVE_ADIW
+	  && test_hard_reg_class (ADDW_REGS, reg));
 }
 
 
-/* A helper for the next method.  Suppose we have two conditional branches
-
-      if (reg <cond1> xval1) goto label1;
-      if (reg <cond2> xval2) goto label2;
-
-   If the second comparison is redundant and there is a code <cond> such
-   that the sequence can be performed as
-
-      REG_CC = compare (reg, xval1);
-      if (REG_CC <cond1> 0)  goto label1;
-      if (REG_CC <cond> 0)   goto label2;
-
-   then return <cond>.  Otherwise, return UNKNOWN.
-   xval1 and xval2 are CONST_INT, and mode is the scalar int mode in which
-   the comparison will be carried out.  reverse_cond1 can be set to reverse
-   condition cond1.  This is useful if the second comparison does not follow
-   the first one, but is located after label1 like in:
-
-      if (reg <cond1> xval1) goto label1;
-      ...
-      label1:
-      if (reg <cond2> xval2) goto label2;  */
-
-static enum rtx_code
-avr_redundant_compare (enum rtx_code cond1, rtx xval1,
-		       enum rtx_code cond2, rtx xval2,
-		       machine_mode mode, bool reverse_cond1)
-{
-  HOST_WIDE_INT ival1 = INTVAL (xval1);
-  HOST_WIDE_INT ival2 = INTVAL (xval2);
-
-  unsigned HOST_WIDE_INT mask = GET_MODE_MASK (mode);
-  unsigned HOST_WIDE_INT uval1 = mask & UINTVAL (xval1);
-  unsigned HOST_WIDE_INT uval2 = mask & UINTVAL (xval2);
-
-  if (reverse_cond1)
-    cond1 = reverse_condition (cond1);
-
-  if (cond1 == EQ)
-    {
-      ////////////////////////////////////////////////
-      // A sequence like
-      //    if (reg == val)  goto label1;
-      //    if (reg > val)   goto label2;
-      // can be re-written using the same, simple comparison like in:
-      //    REG_CC = compare (reg, val)
-      //    if (REG_CC == 0)  goto label1;
-      //    if (REG_CC >= 0)  goto label2;
-      if (ival1 == ival2
-	  && (cond2 == GT || cond2 == GTU))
-	return avr_normalize_condition (cond2);
-
-      // Similar, but the input sequence is like
-      //    if (reg == val)  goto label1;
-      //    if (reg >= val)  goto label2;
-      if (ival1 == ival2
-	  && (cond2 == GE || cond2 == GEU))
-	return cond2;
-
-      // Similar, but the input sequence is like
-      //    if (reg == val)      goto label1;
-      //    if (reg >= val + 1)  goto label2;
-      if ((cond2 == GE && ival2 == 1 + ival1)
-	  || (cond2 == GEU && uval2 == 1 + uval1))
-	return cond2;
-
-      // Similar, but the input sequence is like
-      //    if (reg == val)      goto label1;
-      //    if (reg >  val - 1)  goto label2;
-      if ((cond2 == GT && ival2 == ival1 - 1)
-	  || (cond2 == GTU && uval2 == uval1 - 1))
-	return avr_normalize_condition (cond2);
-
-      /////////////////////////////////////////////////////////
-      // A sequence like
-      //    if (reg == val)      goto label1;
-      //    if (reg < 1 + val)   goto label2;
-      // can be re-written as
-      //    REG_CC = compare (reg, val)
-      //    if (REG_CC == 0)  goto label1;
-      //    if (REG_CC < 0)   goto label2;
-      if ((cond2 == LT && ival2 == 1 + ival1)
-	  || (cond2 == LTU && uval2 == 1 + uval1))
-	return cond2;
-
-      // Similar, but with an input sequence like
-      //    if (reg == val)   goto label1;
-      //    if (reg <= val)   goto label2;
-      if (ival1 == ival2
-	  && (cond2 == LE || cond2 == LEU))
-	return avr_normalize_condition (cond2);
-
-      // Similar, but with an input sequence like
-      //    if (reg == val)  goto label1;
-      //    if (reg < val)   goto label2;
-      if (ival1 == ival2
-	  && (cond2 == LT || cond2 == LTU))
-	return cond2;
-
-      // Similar, but with an input sequence like
-      //    if (reg == val)      goto label1;
-      //    if (reg <= val - 1)  goto label2;
-      if ((cond2 == LE && ival2 == ival1 - 1)
-	  || (cond2 == LEU && uval2 == uval1 - 1))
-	return avr_normalize_condition (cond2);
-
-    } // cond1 == EQ
+/* Return true iff REGNO is in R16...R31.  */
 
-  return UNKNOWN;
+static bool
+avr_ld_regno_p (int regno)
+{
+  return TEST_HARD_REG_CLASS (LD_REGS, regno);
 }
 
 
-/* If-else decision trees generated for switch / case may produce sequences
-   like
-
-      SREG = compare (reg, val);
-	  if (SREG == 0)  goto label1;
-      SREG = compare (reg, 1 + val);
-	  if (SREG >= 0)  goto label2;
-
-   which can be optimized to
-
-      SREG = compare (reg, val);
-	  if (SREG == 0)  goto label1;
-	  if (SREG >= 0)  goto label2;
-
-   The optimal place for such a pass would be directly after expand, but
-   it's not possible for a jump insn to target more than one code label.
-   Hence, run a mini pass right before split2 which introduces REG_CC.  */
-
-void
-avr_pass_ifelse::avr_rest_of_handle_ifelse (function *)
+static bool
+ra_in_progress ()
 {
-  rtx_insn *next_insn;
-
-  for (rtx_insn *insn = get_insns(); insn; insn = next_insn)
-    {
-      next_insn = next_nonnote_nondebug_insn (insn);
-
-      if (! next_insn)
-	break;
-
-      // Search for two cbranch insns.  The first one is a cbranch.
-      // Filter for "cbranch<mode>4_insn" with mode in QI, HI, PSI, SI.
-
-      if (! JUMP_P (insn))
-	continue;
-
-      int icode1 = recog_memoized (insn);
-
-      if (icode1 != CODE_FOR_cbranchqi4_insn
-	  && icode1 != CODE_FOR_cbranchhi4_insn
-	  && icode1 != CODE_FOR_cbranchpsi4_insn
-	  && icode1 != CODE_FOR_cbranchsi4_insn)
-	continue;
-
-      rtx_jump_insn *insn1 = as_a<rtx_jump_insn *> (insn);
-      rtx_jump_insn *insn2 = nullptr;
-      bool follow_label1 = false;
-
-      // Extract the operands of the first insn:
-      // $0 = comparison operator ($1, $2)
-      // $1 = reg
-      // $2 = reg or const_int
-      // $3 = code_label
-      // $4 = optional SCRATCH for HI, PSI, SI cases.
-
-      const auto &op = recog_data.operand;
-
-      extract_insn (insn1);
-      rtx xop1[5] = { op[0], op[1], op[2], op[3], op[4] };
-      int n_operands = recog_data.n_operands;
-
-      // For now, we can optimize cbranches that follow an EQ cbranch,
-      // and cbranches that follow the label of a NE cbranch.
-
-      if (GET_CODE (xop1[0]) == EQ
-	  && JUMP_P (next_insn)
-	  && recog_memoized (next_insn) == icode1)
-	{
-	  // The 2nd cbranch insn follows insn1, i.e. is located in the
-	  // fallthrough path of insn1.
-
-	  insn2 = as_a<rtx_jump_insn *> (next_insn);
-	}
-      else if (GET_CODE (xop1[0]) == NE)
-	{
-	  // insn1 might branch to a label followed by a cbranch.
-
-	  rtx target1 = JUMP_LABEL (insn1);
-	  rtx_insn *code_label1 = JUMP_LABEL_AS_INSN (insn1);
-	  rtx_insn *next = next_nonnote_nondebug_insn (code_label1);
-	  rtx_insn *barrier = prev_nonnote_nondebug_insn (code_label1);
-
-	  if (// Target label of insn1 is used exactly once and
-	      // is not a fallthru, i.e. is preceded by a barrier.
-	      LABEL_NUSES (target1) == 1
-	      && barrier
-	      && BARRIER_P (barrier)
-	      // Following the target label is a cbranch of the same kind.
-	      && next
-	      && JUMP_P (next)
-	      && recog_memoized (next) == icode1)
-	    {
-	      follow_label1 = true;
-	      insn2 = as_a<rtx_jump_insn *> (next);
-	    }
-	}
-
-      if (! insn2)
-	continue;
-
-      // Also extract operands of insn2, and filter for REG + CONST_INT
-      // comparsons against the same register.
-
-      extract_insn (insn2);
-      rtx xop2[5] = { op[0], op[1], op[2], op[3], op[4] };
-
-      if (! rtx_equal_p (xop1[1], xop2[1])
-	  || ! CONST_INT_P (xop1[2])
-	  || ! CONST_INT_P (xop2[2]))
-	continue;
-
-      machine_mode mode = GET_MODE (xop1[1]);
-      enum rtx_code code1 = GET_CODE (xop1[0]);
-      enum rtx_code code2 = GET_CODE (xop2[0]);
-
-      code2 = avr_redundant_compare (code1, xop1[2], code2, xop2[2],
-				     mode, follow_label1);
-      if (code2 == UNKNOWN)
-	continue;
-
-      //////////////////////////////////////////////////////
-      // Found a replacement.
-
-      if (dump_file)
-	{
-	  fprintf (dump_file, "\n;; Found chain of jump_insn %d and"
-		   " jump_insn %d, follow_label1=%d:\n",
-		   INSN_UID (insn1), INSN_UID (insn2), follow_label1);
-	  print_rtl_single (dump_file, PATTERN (insn1));
-	  print_rtl_single (dump_file, PATTERN (insn2));
-	}
-
-      if (! follow_label1)
-	next_insn = next_nonnote_nondebug_insn (insn2);
-
-      // Pop the new branch conditions and the new comparison.
-      // Prematurely split into compare + branch so that we can drop
-      // the 2nd comparison.  The following pass, split2, splits all
-      // insns for REG_CC, and it should still work as usual even when
-      // there are already some REG_CC insns around.
-
-      rtx xcond1 = gen_rtx_fmt_ee (code1, VOIDmode, cc_reg_rtx, const0_rtx);
-      rtx xcond2 = gen_rtx_fmt_ee (code2, VOIDmode, cc_reg_rtx, const0_rtx);
-      rtx xpat1 = gen_branch (xop1[3], xcond1);
-      rtx xpat2 = gen_branch (xop2[3], xcond2);
-      rtx xcompare = NULL_RTX;
-
-      if (mode == QImode)
-	{
-	  gcc_assert (n_operands == 4);
-	  xcompare = gen_cmpqi3 (xop1[1], xop1[2]);
-	}
-      else
-	{
-	  gcc_assert (n_operands == 5);
-	  rtx (*gen_cmp)(rtx,rtx,rtx)
-	    = mode == HImode  ? gen_gen_comparehi
-	    : mode == PSImode ? gen_gen_comparepsi
-	    : gen_gen_comparesi; // SImode
-	  xcompare = gen_cmp (xop1[1], xop1[2], xop1[4]);
-	}
-
-      // Emit that stuff.
-
-      rtx_insn *cmp = emit_insn_before (xcompare, insn1);
-      rtx_jump_insn *branch1 = emit_jump_insn_before (xpat1, insn1);
-      rtx_jump_insn *branch2 = emit_jump_insn_before (xpat2, insn2);
-
-      JUMP_LABEL (branch1) = xop1[3];
-      JUMP_LABEL (branch2) = xop2[3];
-      // delete_insn() decrements LABEL_NUSES when deleting a JUMP_INSN, but
-      // when we pop a new JUMP_INSN, do it by hand.
-      ++LABEL_NUSES (xop1[3]);
-      ++LABEL_NUSES (xop2[3]);
-
-      delete_insn (insn1);
-      delete_insn (insn2);
-
-      // As a side effect, also recog the new insns.
-      gcc_assert (valid_insn_p (cmp));
-      gcc_assert (valid_insn_p (branch1));
-      gcc_assert (valid_insn_p (branch2));
-    } // loop insns
+  return avropt_lra_p ? lra_in_progress : reload_in_progress;
 }
 
 
@@ -1080,14 +445,14 @@ avr_set_core_architecture (void)
 {
   /* Search for mcu core architecture.  */
 
-  if (!avr_mmcu)
-    avr_mmcu = AVR_MMCU_DEFAULT;
+  if (!avropt_mmcu)
+    avropt_mmcu = AVR_MMCU_DEFAULT;
 
   avr_arch = &avr_arch_types[0];
 
   for (const avr_mcu_t *mcu = avr_mcu_types; ; mcu++)
     {
-      if (mcu->name == NULL)
+      if (!mcu->name)
 	{
 	  /* Reached the end of `avr_mcu_types'.  This should actually never
 	     happen as options are provided by device-specs.  It could be a
@@ -1095,18 +460,18 @@ avr_set_core_architecture (void)
 	     with -mmcu=<device>. */
 
 	  error ("unknown core architecture %qs specified with %qs",
-		 avr_mmcu, "-mmcu=");
+		 avropt_mmcu, "-mmcu=");
 	  avr_inform_core_architectures ();
 	  break;
 	}
-      else if (strcmp (mcu->name, avr_mmcu) == 0
+      else if (strcmp (mcu->name, avropt_mmcu) == 0
 	       // Is this a proper architecture ?
-	       && mcu->macro == NULL)
+	       && !mcu->macro)
 	{
 	  avr_arch = &avr_arch_types[mcu->arch_id];
 	  avr_arch_index = mcu->arch_id;
-	  if (avr_n_flash < 0)
-	    avr_n_flash = 1 + (mcu->flash_size - 1) / 0x10000;
+	  if (avropt_n_flash < 0)
+	    avropt_n_flash = 1 + (mcu->flash_size - 1) / 0x10000;
 
 	  return true;
 	}
@@ -1129,7 +494,7 @@ avr_option_override (void)
      with the challenge of AVR's very few address registers and fails to
      perform the requested spills.  */
 
-  if (avr_strict_X)
+  if (avropt_strict_X)
     flag_caller_saves = 0;
 
   /* Unwind tables currently require a frame pointer for correctness,
@@ -1163,14 +528,14 @@ avr_option_override (void)
     warning (OPT_fPIE, "%<-fPIE%> is not supported");
 
 #if !defined (HAVE_AS_AVR_MGCCISR_OPTION)
-  avr_gasisr_prologues = 0;
+  avropt_gasisr_prologues = 0;
 #endif
 
-  if (!avr_set_core_architecture())
+  if (!avr_set_core_architecture ())
     return;
 
   /* Sould be set by avr-common.cc */
-  gcc_assert (avr_long_double >= avr_double && avr_double >= 32);
+  gcc_assert (avropt_long_double >= avropt_double && avropt_double >= 32);
 
   /* RAM addresses of some SFRs common to all devices in respective arch. */
 
@@ -1189,17 +554,41 @@ avr_option_override (void)
   avr_addr.sp_l = 0x3D + avr_arch->sfr_offset;
   avr_addr.sp_h = avr_addr.sp_l + 1;
 
-  init_machine_status = avr_init_machine_status;
+  init_machine_status = []()
+  {
+    return ggc_cleared_alloc<machine_function> ();
+  };
+
+  avr_log_set_avr_log ();
 
-  avr_log_set_avr_log();
+  /* As long as peep2_rescan is not implemented, see
+     http://gcc.gnu.org/ml/gcc-patches/2011-10/msg02819.html
+     we add a second peephole2 run to get best results.  */
+  {
+    opt_pass *extra_peephole2
+      = g->get_passes ()->get_pass_peephole2 ()->clone ();
+    extra_peephole2->name = "avr-peep2-after-fuse-move";
+    register_pass_info peep2_2_info
+      = { extra_peephole2, "avr-fuse-move", 1, PASS_POS_INSERT_AFTER };
+
+    register_pass (&peep2_2_info);
+  }
+}
+
+
+int
+avr_optimize_size_level ()
+{
+  return cfun && cfun->decl
+    ? opt_for_fn (cfun->decl, optimize_size)
+    : optimize_size;
 }
 
-/* Function to set up the backend function structure.  */
 
-static struct machine_function *
-avr_init_machine_status (void)
+static bool
+avr_optimize_size_max_p ()
 {
-  return ggc_cleared_alloc<machine_function> ();
+  return avr_optimize_size_level () == OPTIMIZE_SIZE_MAX;
 }
 
 
@@ -1216,7 +605,9 @@ avr_init_expanders (void)
   tmp_reg_rtx  = all_regs_rtx[AVR_TMP_REGNO];
   zero_reg_rtx = all_regs_rtx[AVR_ZERO_REGNO];
 
-  cc_reg_rtx  = gen_rtx_REG (CCmode, REG_CC);
+  cc_reg_rtx = gen_rtx_REG (CCmode, REG_CC);
+  ccn_reg_rtx = gen_rtx_REG (CCNmode, REG_CC);
+  cczn_reg_rtx = gen_rtx_REG (CCZNmode, REG_CC);
 
   lpm_addr_reg_rtx = gen_rtx_REG (HImode, REG_Z);
 
@@ -1226,6 +617,12 @@ avr_init_expanders (void)
   rampy_rtx = gen_rtx_MEM (QImode, GEN_INT (avr_addr.rampy));
   rampz_rtx = gen_rtx_MEM (QImode, GEN_INT (avr_addr.rampz));
 
+  MEM_VOLATILE_P (sreg_rtx) = 1;
+  MEM_VOLATILE_P (rampd_rtx) = 1;
+  MEM_VOLATILE_P (rampx_rtx) = 1;
+  MEM_VOLATILE_P (rampy_rtx) = 1;
+  MEM_VOLATILE_P (rampz_rtx) = 1;
+
   xstring_empty = gen_rtx_CONST_STRING (VOIDmode, "");
   xstring_e = gen_rtx_CONST_STRING (VOIDmode, "e");
 
@@ -1239,10 +636,10 @@ avr_init_expanders (void)
 /* Implement `REGNO_REG_CLASS'.  */
 /* Return register class for register R.  */
 
-enum reg_class
+reg_class
 avr_regno_reg_class (int r)
 {
-  static const enum reg_class reg_class_tab[] =
+  static const reg_class reg_class_tab[] =
     {
       R0_REG,
       /* r1 - r15 */
@@ -1306,7 +703,7 @@ avr_decl_flash_p (tree decl)
 
 
 /* Return TRUE if DECL is a VAR_DECL located in the 24-bit flash
-   address space and FALSE, otherwise.  */
+   address space __memx and FALSE, otherwise.  */
 
 static bool
 avr_decl_memx_p (tree decl)
@@ -1321,6 +718,22 @@ avr_decl_memx_p (tree decl)
 }
 
 
+/* Return TRUE if DECL is a VAR_DECL located in the 24-bit flash
+   address space __flashx and FALSE, otherwise.  */
+
+static bool
+avr_decl_flashx_p (tree decl)
+{
+  if (TREE_CODE (decl) != VAR_DECL
+      || TREE_TYPE (decl) == error_mark_node)
+    {
+      return false;
+    }
+
+  return ADDR_SPACE_FLASHX == TYPE_ADDR_SPACE (TREE_TYPE (decl));
+}
+
+
 /* Return TRUE if X is a MEM rtx located in flash and FALSE, otherwise.  */
 
 bool
@@ -1331,8 +744,8 @@ avr_mem_flash_p (rtx x)
 }
 
 
-/* Return TRUE if X is a MEM rtx located in the 24-bit flash
-   address space and FALSE, otherwise.  */
+/* Return TRUE if X is a MEM rtx located in the 24-bit
+   address space __memx and FALSE, otherwise.  */
 
 bool
 avr_mem_memx_p (rtx x)
@@ -1342,8 +755,19 @@ avr_mem_memx_p (rtx x)
 }
 
 
+/* Return TRUE if X is a MEM rtx located in the 24-bit flash
+   address space __flashx and FALSE, otherwise.  */
+
+bool
+avr_mem_flashx_p (rtx x)
+{
+  return (MEM_P (x)
+	  && ADDR_SPACE_FLASHX == MEM_ADDR_SPACE (x));
+}
+
+
 /* A helper for the subsequent function attribute used to dig for
-   attribute 'name' in a FUNCTION_DECL or FUNCTION_TYPE */
+   attribute 'name' in a FUNCTION_DECL or FUNCTION_TYPE.  */
 
 static inline bool
 avr_lookup_function_attribute1 (const_tree func, const char *name)
@@ -1398,6 +822,7 @@ avr_naked_function_p (tree func)
   return avr_lookup_function_attribute1 (func, "naked");
 }
 
+
 /* Return nonzero if FUNC is a noblock function.  */
 
 static bool
@@ -1440,6 +865,7 @@ avr_interrupt_function (tree func)
   return avr_interrupt_signal_function (func, "interrupt");
 }
 
+
 /* Return 1 if FUNC is a signal function that has a "signal" attribute
    (and perhaps also "signal(num)" attributes.  Return -1 if FUNC has
    "signal(num)" attribute(s) but no "signal" attribute.  */
@@ -1450,6 +876,7 @@ avr_signal_function (tree func)
   return avr_interrupt_signal_function (func, "signal");
 }
 
+
 /* Return nonzero if FUNC is an OS_task function.  */
 
 static bool
@@ -1458,6 +885,7 @@ avr_OS_task_function_p (tree func)
   return avr_lookup_function_attribute1 (func, "OS_task");
 }
 
+
 /* Return nonzero if FUNC is an OS_main function.  */
 
 static bool
@@ -1489,6 +917,7 @@ avr_can_inline_p (tree /* caller */, tree /* callee */)
   return true;
 }
 
+
 /* Implement `TARGET_SET_CURRENT_FUNCTION'.  */
 /* Sanity cheching for above function attributes.  */
 
@@ -1498,8 +927,12 @@ avr_set_current_function (tree decl)
   if (decl == NULL_TREE
       || current_function_decl == NULL_TREE
       || current_function_decl == error_mark_node
-      || ! cfun->machine
-      || cfun->machine->attributes_checked_p)
+      || ! cfun->machine)
+    return;
+
+  n_avr_fuse_add_executed = cfun->machine->n_avr_fuse_add_executed;
+
+  if (cfun->machine->attributes_checked_p)
     return;
 
   location_t loc = DECL_SOURCE_LOCATION (decl);
@@ -1595,7 +1028,7 @@ avr_set_current_function (tree decl)
 
   /* Don't print the above diagnostics more than once.  */
 
-  cfun->machine->attributes_checked_p = 1;
+  cfun->machine->attributes_checked_p = true;
 }
 
 
@@ -1626,7 +1059,8 @@ static inline int
 avr_outgoing_args_size (void)
 {
   return (ACCUMULATE_OUTGOING_ARGS
-	  ? (HOST_WIDE_INT) crtl->outgoing_args_size : 0);
+	  ? (HOST_WIDE_INT) crtl->outgoing_args_size
+	  : 0);
 }
 
 
@@ -1648,7 +1082,7 @@ static int
 avr_regs_to_save (HARD_REG_SET *set)
 {
   int count = 0;
-  int int_or_sig_p = cfun->machine->is_interrupt || cfun->machine->is_signal;
+  bool int_or_sig_p = cfun->machine->is_interrupt || cfun->machine->is_signal;
 
   if (set)
     CLEAR_HARD_REG_SET (*set);
@@ -1673,7 +1107,7 @@ avr_regs_to_save (HARD_REG_SET *set)
 	  || (df_regs_ever_live_p (reg)
 	      && (int_or_sig_p || !call_used_or_fixed_reg_p (reg))
 	      /* Don't record frame pointer registers here.  They are treated
-		 indivitually in prologue.  */
+		 individually in the prologue.  */
 	      && !(frame_pointer_needed
 		   && (reg == REG_Y || reg == REG_Y + 1))))
 	{
@@ -1734,8 +1168,8 @@ avr_initial_elimination_offset (int from, int to)
       // If FROM is ARG_POINTER_REGNUM, we are not in an ISR as ISRs
       // might not have arguments.  Hence the following is not affected
       // by gasisr prologues.
-      offset += avr_regs_to_save (NULL);
-      return (get_frame_size () + avr_outgoing_args_size()
+      offset += avr_regs_to_save (nullptr);
+      return (get_frame_size () + avr_outgoing_args_size ()
 	      + avr_pc_size + 1 + offset);
     }
 }
@@ -1793,7 +1227,7 @@ avr_build_builtin_va_list (void)
 
 /* Worker function for `INCOMING_RETURN_ADDR_RTX'.  */
 /* Return contents of MEM at frame pointer + stack size + 1 (+2 if 3-byte PC).
-   This is return address of function.  */
+   This is the return address of the function.  */
 
 rtx
 avr_return_addr_rtx (int count, rtx tem)
@@ -1802,7 +1236,7 @@ avr_return_addr_rtx (int count, rtx tem)
 
   /* Can only return this function's return address. Others not supported.  */
   if (count)
-    return NULL;
+    return NULL_RTX;
 
   if (AVR_3_BYTE_PC)
     {
@@ -1813,7 +1247,7 @@ avr_return_addr_rtx (int count, rtx tem)
   else
     r = gen_rtx_SYMBOL_REF (Pmode, ".L__stack_usage+1");
 
-  cfun->machine->use_L__stack_usage = 1;
+  cfun->machine->use_L__stack_usage = true;
 
   r = gen_rtx_PLUS (Pmode, tem, r);
   r = gen_frame_mem (Pmode, memory_address (Pmode, r));
@@ -1821,6 +1255,7 @@ avr_return_addr_rtx (int count, rtx tem)
   return r;
 }
 
+
 /* Return 1 if the function epilogue is just a single "ret".  */
 
 int
@@ -1828,14 +1263,15 @@ avr_simple_epilogue (void)
 {
   return (! frame_pointer_needed
 	  && get_frame_size () == 0
-	  && avr_outgoing_args_size() == 0
-	  && avr_regs_to_save (NULL) == 0
+	  && avr_outgoing_args_size () == 0
+	  && avr_regs_to_save (nullptr) == 0
 	  && ! cfun->machine->is_interrupt
 	  && ! cfun->machine->is_signal
 	  && ! cfun->machine->is_naked
 	  && ! TREE_THIS_VOLATILE (current_function_decl));
 }
 
+
 /* This function checks sequence of live registers.  */
 
 static int
@@ -1896,682 +1332,9 @@ sequent_regs_live (void)
 }
 
 
-namespace {
-static const pass_data avr_pass_data_fuse_add =
-{
-  RTL_PASS,	    // type
-  "",		    // name (will be patched)
-  OPTGROUP_NONE,    // optinfo_flags
-  TV_DF_SCAN,	    // tv_id
-  0,		    // properties_required
-  0,		    // properties_provided
-  0,		    // properties_destroyed
-  0,		    // todo_flags_start
-  TODO_df_finish    // todo_flags_finish
-};
-
-
-class avr_pass_fuse_add : public rtl_opt_pass
-{
-public:
-  avr_pass_fuse_add (gcc::context *ctxt, const char *name)
-    : rtl_opt_pass (avr_pass_data_fuse_add, ctxt)
-  {
-    this->name = name;
-  }
-
-  virtual bool gate (function *) { return optimize && avr_fuse_add > 0; }
-
-  virtual unsigned int execute (function *);
-
-  struct Some_Insn
-  {
-    rtx_insn *insn = nullptr;
-    rtx dest, src;
-    bool valid () const { return insn != nullptr; }
-    void set_deleted ()
-    {
-      gcc_assert (insn);
-      SET_INSN_DELETED (insn);
-      insn = nullptr;
-    }
-  };
-
-  // If .insn is not NULL, then this is a  reg:HI += const_int
-  // of an address register.
-  struct Add_Insn : Some_Insn
-  {
-    rtx addend;
-    int regno;
-    Add_Insn () {}
-    Add_Insn (rtx_insn *insn);
-  };
-
-  // If .insn is not NULL, then this sets an address register
-  // to a constant value.
-  struct Ldi_Insn : Some_Insn
-  {
-    int regno;
-    Ldi_Insn () {}
-    Ldi_Insn (rtx_insn *insn);
-  };
-
-  // If .insn is not NULL, then this is a load or store insn where the
-  // address is REG or POST_INC with an address register.
-  struct Mem_Insn : Some_Insn
-  {
-    rtx reg_or_0, mem, addr, addr_reg;
-    int addr_regno;
-    enum rtx_code addr_code;
-    machine_mode mode;
-    addr_space_t addr_space;
-    bool store_p, volatile_p;
-    Mem_Insn () {}
-    Mem_Insn (rtx_insn *insn);
-  };
-
-  rtx_insn *fuse_ldi_add (Ldi_Insn &prev_ldi, Add_Insn &add);
-  rtx_insn *fuse_add_add (Add_Insn &prev_add, Add_Insn &add);
-  rtx_insn *fuse_add_mem (Add_Insn &prev_add, Mem_Insn &mem);
-  rtx_insn *fuse_mem_add (Mem_Insn &prev_mem, Add_Insn &add);
-}; // avr_pass_fuse_add
-
-} // anon namespace
-
-rtl_opt_pass *
-make_avr_pass_fuse_add (gcc::context *ctxt)
-{
-  return new avr_pass_fuse_add (ctxt, "avr-fuse-add");
-}
-
-/* Describe properties of AVR's indirect load and store instructions
-   LD, LDD, ST, STD, LPM, ELPM depending on register number, volatility etc.
-   Rules for "volatile" accesses are:
-
-         | Xmega           |  non-Xmega
-   ------+-----------------+----------------
-   load  | read LSB first  | read LSB first
-   store | write LSB first | write MSB first
-*/
-
-struct AVR_LdSt_Props
-{
-  bool has_postinc, has_predec, has_ldd;
-  // The insn printers will use POST_INC or PRE_DEC addressing, no matter
-  // what adressing modes we are feeding into them.
-  bool want_postinc, want_predec;
-
-  AVR_LdSt_Props (int regno, bool store_p, bool volatile_p, addr_space_t as)
-  {
-    bool generic_p = ADDR_SPACE_GENERIC_P (as);
-    bool flashx_p = ! generic_p && as != ADDR_SPACE_MEMX;
-    has_postinc = generic_p || (flashx_p && regno == REG_Z);
-    has_predec = generic_p;
-    has_ldd = ! AVR_TINY && generic_p && (regno == REG_Y || regno == REG_Z);
-    want_predec  = volatile_p && generic_p && ! AVR_XMEGA && store_p;
-    want_postinc = volatile_p && generic_p && (AVR_XMEGA || ! store_p);
-    want_postinc |= flashx_p && regno == REG_Z;
-  }
-
-  AVR_LdSt_Props (const avr_pass_fuse_add::Mem_Insn &m)
-    : AVR_LdSt_Props (m.addr_regno, m.store_p, m.volatile_p, m.addr_space)
-  {
-    gcc_assert (m.valid ());
-  }
-};
-
-/* Emit a single_set that clobbers REG_CC.  */
-
-static rtx_insn *
-emit_move_ccc (rtx dest, rtx src)
-{
-  return emit_insn (gen_gen_move_clobbercc (dest, src));
-}
-
-/* Emit a single_set that clobbers REG_CC after insn AFTER.  */
-
-static rtx_insn *
-emit_move_ccc_after (rtx dest, rtx src, rtx_insn *after)
-{
-  return emit_insn_after (gen_gen_move_clobbercc (dest, src), after);
-}
-
-static bool
-reg_seen_between_p (const_rtx reg, const rtx_insn *from, const rtx_insn *to)
-{
-  return (reg_used_between_p (reg, from, to)
-	  || reg_set_between_p (reg, from, to));
-}
-
-
-static void
-avr_maybe_adjust_cfa (rtx_insn *insn, rtx reg, int addend)
-{
-  if (addend
-      && frame_pointer_needed
-      && REGNO (reg) == FRAME_POINTER_REGNUM
-      && avr_fuse_add == 3)
-    {
-      rtx plus = plus_constant (Pmode, reg, addend);
-      RTX_FRAME_RELATED_P (insn) = 1;
-      add_reg_note (insn, REG_CFA_ADJUST_CFA, gen_rtx_SET (reg, plus));
-    }
-}
-
-
-// If successful, this represents a SET of a pointer register to a constant.
-avr_pass_fuse_add::Ldi_Insn::Ldi_Insn (rtx_insn *insn)
-{
-  rtx set = single_set (insn);
-  if (!set)
-    return;
-
-  src = SET_SRC (set);
-  dest = SET_DEST (set);
-
-  if (REG_P (dest)
-      && GET_MODE (dest) == Pmode
-      && IN_RANGE (regno = REGNO (dest), REG_X, REG_Z)
-      && CONSTANT_P (src))
-    {
-      this->insn = insn;
-    }
-}
-
-// If successful, this represents a PLUS with CONST_INT of a pointer
-// register X, Y or Z.  Otherwise, the object is not valid().
-avr_pass_fuse_add::Add_Insn::Add_Insn (rtx_insn *insn)
-{
-  rtx set = single_set (insn);
-  if (!set)
-    return;
-
-  src = SET_SRC (set);
-  dest = SET_DEST (set);
-  if (REG_P (dest)
-      // We are only interested in PLUSes that change address regs.
-      && GET_MODE (dest) == Pmode
-      && IN_RANGE (regno = REGNO (dest), REG_X, REG_Z)
-      && PLUS == GET_CODE (src)
-      && rtx_equal_p (XEXP (src, 0), dest)
-      && CONST_INT_P (XEXP (src, 1)))
-    {
-      // This is reg:HI += const_int.
-      addend = XEXP (src, 1);
-      this->insn = insn;
-    }
-}
-
-// If successful, this represents a load or store insn where the addressing
-// mode uses pointer register X, Y or Z.  Otherwise, the object is not valid().
-avr_pass_fuse_add::Mem_Insn::Mem_Insn (rtx_insn *insn)
-{
-  rtx set = single_set (insn);
-  if (!set)
-    return;
-
-  src = SET_SRC (set);
-  dest = SET_DEST (set);
-  mode = GET_MODE (dest);
-
-  if (MEM_P (dest)
-      && (REG_P (src) || src == CONST0_RTX (mode)))
-    {
-      reg_or_0 = src;
-      mem = dest;
-    }
-  else if (REG_P (dest) && MEM_P (src))
-    {
-      reg_or_0 = dest;
-      mem = src;
-    }
-  else
-    return;
-
-  if (avr_mem_memx_p (mem)
-      || avr_load_libgcc_p (mem))
-    return;
-
-  addr = XEXP (mem, 0);
-  addr_code = GET_CODE (addr);
-
-  if (addr_code == REG)
-    addr_reg = addr;
-  else if (addr_code == POST_INC || addr_code == PRE_DEC)
-    addr_reg = XEXP (addr, 0);
-  else
-    return;
-
-  addr_regno = REGNO (addr_reg);
-
-  if (avr_fuse_add == 2
-      && frame_pointer_needed
-      && addr_regno == FRAME_POINTER_REGNUM)
-    MEM_VOLATILE_P (mem) = 0;
-
-  if (reg_overlap_mentioned_p (reg_or_0, addr) // Can handle CONSTANT_P.
-      || addr_regno > REG_Z
-      || avr_mem_memx_p (mem)
-      // The following optimizations only handle REG and POST_INC,
-      // so that's all what we allow here.
-      || (addr_code != REG && addr_code != POST_INC))
-    return;
-
-  addr_space = MEM_ADDR_SPACE (mem);
-  volatile_p = MEM_VOLATILE_P (mem);
-  store_p = MEM_P (dest);
-
-  // Turn this "valid".
-  this->insn = insn;
-}
-
-/* Try to combine a Ldi insn with a PLUS CONST_INT addend to one Ldi insn.
-   If LDI is valid, then it precedes ADD in the same block.
-   When a replacement is found, a new insn is emitted and the old insns
-   are pseudo-deleted.  The returned insn is the point where the calling
-   scanner should continue.  When no replacement is found, nullptr is
-   returned and nothing changed.  */
-
-rtx_insn *
-avr_pass_fuse_add::fuse_ldi_add (Ldi_Insn &ldi, Add_Insn &add)
-{
-  if (! ldi.valid ()
-      || reg_seen_between_p (ldi.dest, ldi.insn, add.insn))
-    {
-      // If something is between the Ldi and the current insn, we can
-      // set the Ldi invalid to speed future scans.
-      return ldi.insn = nullptr;
-    }
-
-  // Found a Ldi with const and a PLUS insns in the same BB,
-  // and with no interfering insns between them.
-
-  // Emit new Ldi with the sum of the original offsets after the old Ldi.
-  rtx xval = plus_constant (Pmode, ldi.src, INTVAL (add.addend));
-
-  rtx_insn *insn = emit_move_ccc_after (ldi.dest, xval, ldi.insn);
-  avr_dump (";; new Ldi[%d] insn %d after %d: R%d = %r\n\n", ldi.regno,
-	    INSN_UID (insn), INSN_UID (ldi.insn), ldi.regno, xval);
-
-  rtx_insn *next = NEXT_INSN (add.insn);
-  ldi.set_deleted ();
-  add.set_deleted ();
-
-  return next;
-}
-
-/* Try to combine two PLUS insns with CONST_INT addend to one such insn.
-   If PREV_ADD is valid, then it precedes ADD in the same basic block.
-   When a replacement is found, a new insn is emitted and the old insns
-   are pseudo-deleted.  The returned insn is the point where the calling
-   scanner should continue.  When no replacement is found, nullptr is
-   returned and nothing changed.  */
-
-rtx_insn *
-avr_pass_fuse_add::fuse_add_add (Add_Insn &prev_add, Add_Insn &add)
-{
-  if (! prev_add.valid ()
-      || reg_seen_between_p (add.dest, prev_add.insn, add.insn))
-    {
-      // If something is between the previous Add and the current insn,
-      // we can set the previous Add invalid to speed future scans.
-      return prev_add.insn = nullptr;
-    }
-
-  // Found two PLUS insns in the same BB, and with no interfering
-  // insns between them.
-  rtx plus = plus_constant (Pmode, add.src, INTVAL (prev_add.addend));
-
-  rtx_insn *next;
-  if (REG_P (plus))
-    {
-      avr_dump (";; Add[%d] from %d annihilates %d\n\n", add.regno,
-		INSN_UID (prev_add.insn), INSN_UID (add.insn));
-      next = NEXT_INSN (add.insn);
-    }
-  else
-    {
-      // Emit after the current insn, so that it will be picked
-      // up as next valid Add insn.
-      next = emit_move_ccc_after (add.dest, plus, add.insn);
-      avr_dump (";; #1 new Add[%d] insn %d after %d: R%d += %d\n\n",
-		add.regno, INSN_UID (next), INSN_UID (add.insn),
-		add.regno, (int) INTVAL (XEXP (plus, 1)));
-      gcc_assert (GET_CODE (plus) == PLUS);
-    }
-
-  add.set_deleted ();
-  prev_add.set_deleted ();
-
-  return next;
-}
-
-/* Try to combine a PLUS of the address register with a load or store insn.
-   If ADD is valid, then it precedes MEM in the same basic block.
-   When a replacement is found, a new insn is emitted and the old insns
-   are pseudo-deleted.  The returned insn is the point where the calling
-   scanner should continue.  When no replacement is found, nullptr is
-   returned and nothing changed.  */
-
-rtx_insn *
-avr_pass_fuse_add::fuse_add_mem (Add_Insn &add, Mem_Insn &mem)
-{
-  if (! add.valid ()
-      || reg_seen_between_p (add.dest, add.insn, mem.insn))
-    {
-      // If something is between the Add and the current insn, we can
-      // set the Add invalid to speed future scans.
-      return add.insn = nullptr;
-    }
-
-  AVR_LdSt_Props ap { mem };
-
-  int msize = GET_MODE_SIZE (mem.mode);
-
-  // The mem insn really wants PRE_DEC.
-  bool case1 = ((mem.addr_code == REG || mem.addr_code == POST_INC)
-		&& msize > 1 && ap.want_predec && ! ap.has_ldd);
-
-  // The offset can be consumed by a PRE_DEC.
-  bool case2 = (- INTVAL (add.addend) == msize
-		&& (mem.addr_code == REG || mem.addr_code == POST_INC)
-		&& ap.has_predec && ! ap.want_postinc);
-
-  if (! case1 && ! case2)
-    return nullptr;
-
-  // Change from REG or POST_INC to PRE_DEC.
-  rtx xmem = change_address (mem.mem, mem.mode,
-			     gen_rtx_PRE_DEC (Pmode, mem.addr_reg));
-  rtx dest = mem.store_p ? xmem : mem.reg_or_0;
-  rtx src  = mem.store_p ? mem.reg_or_0 : xmem;
-
-  rtx_insn *next = emit_move_ccc_after (dest, src, mem.insn);
-  add_reg_note (next, REG_INC, mem.addr_reg);
-  avr_dump (";; new Mem[%d] insn %d after %d: %r = %r\n\n", mem.addr_regno,
-	    INSN_UID (next), INSN_UID (mem.insn), dest, src);
-
-  // Changing REG or POST_INC -> PRE_DEC means that the addend before
-  // the memory access must be increased by the size of the access,
-  rtx plus = plus_constant (Pmode, add.src, msize);
-  if (! REG_P (plus))
-    {
-      rtx_insn *insn = emit_move_ccc_after (add.dest, plus, add.insn);
-      avr_dump (";; #2 new Add[%d] insn %d after %d: R%d += %d\n\n",
-		add.regno, INSN_UID (insn), INSN_UID (add.insn),
-		add.regno, (int) INTVAL (XEXP (plus, 1)));
-      gcc_assert (GET_CODE (plus) == PLUS);
-    }
-  else
-    avr_dump (";; Add[%d] insn %d consumed into %d\n\n",
-	      add.regno, INSN_UID (add.insn), INSN_UID (next));
-
-  // Changing POST_INC -> PRE_DEC means that the addend after the mem has to be
-  // the size of the access.  The hope is that this new add insn may be unused.
-  if (mem.addr_code == POST_INC)
-    {
-      plus = plus_constant (Pmode, add.dest, msize);
-      rtx_insn *next2 = emit_move_ccc_after (add.dest, plus, next);
-      avr_dump (";; #3 new Add[%d] insn %d after %d: R%d += %d\n\n", add.regno,
-		INSN_UID (next2), INSN_UID (next), add.regno, msize);
-      next = next2;
-    }
-
-  add.set_deleted ();
-  mem.set_deleted ();
-
-  return next;
-}
-
-/* Try to combine a load or store insn with a PLUS of the address register.
-   If MEM is valid, then it precedes ADD in the same basic block.
-   When a replacement is found, a new insn is emitted and the old insns
-   are pseudo-deleted.  The returned insn is the point where the calling
-   scanner should continue.  When no replacement is found, nullptr is
-   returned and nothing changed.  */
-
-rtx_insn *
-avr_pass_fuse_add::fuse_mem_add (Mem_Insn &mem, Add_Insn &add)
-{
-  if (! mem.valid ()
-      || reg_seen_between_p (add.dest, mem.insn, add.insn))
-    {
-      // If something is between the Mem and the current insn, we can
-      // set the Mem invalid to speed future scans.
-      return mem.insn = nullptr;
-    }
-
-  AVR_LdSt_Props ap { mem };
-
-  int msize = GET_MODE_SIZE (mem.mode);
-
-  // The add insn can be consumed by a POST_INC.
-  bool case1 = (mem.addr_code == REG
-		&& INTVAL (add.addend) == msize
-		&& ap.has_postinc && ! ap.want_predec);
-
-  // There are cases where even a partial consumption of the offset is better.
-  // This are the cases where no LD+offset addressing is available, because
-  // the address register is obviously used after the mem insn, and a mem insn
-  // with REG addressing mode will have to restore the address.
-  bool case2 = (mem.addr_code == REG
-		&& msize > 1 && ap.want_postinc && ! ap.has_ldd);
-
-  if (! case1 && ! case2)
-    return nullptr;
-
-  // Change addressing mode from REG to POST_INC.
-  rtx xmem = change_address (mem.mem, mem.mode,
-			     gen_rtx_POST_INC (Pmode, mem.addr_reg));
-  rtx dest = mem.store_p ? xmem : mem.reg_or_0;
-  rtx src  = mem.store_p ? mem.reg_or_0 : xmem;
-
-  rtx_insn *insn = emit_move_ccc_after (dest, src, mem.insn);
-  add_reg_note (insn, REG_INC, mem.addr_reg);
-  avr_dump (";; new Mem[%d] insn %d after %d: %r = %r\n\n", add.regno,
-	    INSN_UID (insn), INSN_UID (mem.insn), dest, src);
-
-  rtx_insn *next = NEXT_INSN (add.insn);
-
-  // Changing REG -> POST_INC means that the post addend must be
-  // decreased by the size of the access.
-  rtx plus = plus_constant (Pmode, add.src, -msize);
-  if (! REG_P (plus))
-    {
-      next = emit_move_ccc_after (mem.addr_reg, plus, add.insn);
-      avr_dump (";; #4 new Add[%d] insn %d after %d: R%d += %d\n\n",
-		add.regno, INSN_UID (next), INSN_UID (add.insn),
-		add.regno, (int) INTVAL (XEXP (plus, 1)));
-      gcc_assert (GET_CODE (plus) == PLUS);
-    }
-  else
-    avr_dump (";; Add[%d] insn %d consumed into %d\n\n",
-	      add.regno, INSN_UID (add.insn), INSN_UID (insn));
-
-  add.set_deleted ();
-  mem.set_deleted ();
-
-  return next;
-}
-
-/* Try to post-reload combine PLUS with CONST_INt of pointer registers with:
-   - Sets to a constant address.
-   - PLUS insn of that kind.
-   - Indirect loads and stores.
-   In almost all cases, combine opportunities arise from the preparation
-   done by `avr_split_tiny_move', but in some rare cases combinations are
-   found for the ordinary cores, too.
-      As we consider at most one Mem insn per try, there may still be missed
-   optimizations like  POST_INC + PLUS + POST_INC  might be performed
-   as  PRE_DEC + PRE_DEC  for two adjacent locations.  */
-
-unsigned int
-avr_pass_fuse_add::execute (function *func)
-{
-  df_note_add_problem ();
-  df_analyze ();
-
-  int n_add = 0, n_mem = 0, n_ldi = 0;
-  basic_block bb;
-
-  FOR_EACH_BB_FN (bb, func)
-    {
-      Ldi_Insn prev_ldi_insns[REG_32];
-      Add_Insn prev_add_insns[REG_32];
-      Mem_Insn prev_mem_insns[REG_32];
-      rtx_insn *insn, *curr;
-
-      avr_dump ("\n;; basic block %d\n\n", bb->index);
-
-      FOR_BB_INSNS_SAFE (bb, insn, curr)
-	{
-	  rtx_insn *next = nullptr;
-	  Ldi_Insn ldi_insn { insn };
-	  Add_Insn add_insn { insn };
-	  Mem_Insn mem_insn { insn };
-
-	  if (add_insn.valid ())
-	    {
-	      // Found reg:HI += const_int
-	      avr_dump (";; insn %d: Add[%d]: R%d += %d\n\n",
-			INSN_UID (add_insn.insn), add_insn.regno,
-			add_insn.regno, (int) INTVAL (add_insn.addend));
-	      Ldi_Insn &prev_ldi_insn = prev_ldi_insns[add_insn.regno];
-	      Add_Insn &prev_add_insn = prev_add_insns[add_insn.regno];
-	      Mem_Insn &prev_mem_insn = prev_mem_insns[add_insn.regno];
-	      if ((next = fuse_ldi_add (prev_ldi_insn, add_insn)))
-		curr = next, n_ldi += 1;
-	      else if ((next = fuse_add_add (prev_add_insn, add_insn)))
-		curr = next, n_add += 1;
-	      else if ((next = fuse_mem_add (prev_mem_insn, add_insn)))
-		curr = next, n_mem += 1;
-	      else
-		prev_add_insn = add_insn;
-	    }
-	  else if (mem_insn.valid ())
-	    {
-	      int addr_regno = REGNO (mem_insn.addr_reg);
-	      avr_dump (";; insn %d: Mem[%d]: %r = %r\n\n",
-			INSN_UID (mem_insn.insn), addr_regno,
-			mem_insn.dest, mem_insn.src);
-	      Add_Insn &prev_add_insn = prev_add_insns[addr_regno];
-	      if ((next = fuse_add_mem (prev_add_insn, mem_insn)))
-		curr = next, n_mem += 1;
-	      else
-		prev_mem_insns[addr_regno] = mem_insn;
-	    }
-	  else if (ldi_insn.valid ())
-	    {
-	      if (! CONST_INT_P (ldi_insn.src))
-		avr_dump (";; insn %d: Ldi[%d]: R%d = %r\n\n",
-			  INSN_UID (ldi_insn.insn), ldi_insn.regno,
-			  ldi_insn.regno, ldi_insn.src);
-	      prev_ldi_insns[ldi_insn.regno] = ldi_insn;
-	    }
-	} // for insns
-    } // for BBs
-
-  avr_dump (";; Function %f: Found %d changes: %d ldi, %d add, %d mem.\n",
-	    n_ldi + n_add + n_mem, n_ldi, n_add, n_mem);
-
-  return 0;
-}
-
-
-namespace {
-static const pass_data avr_pass_data_pre_proep =
-{
-  RTL_PASS,      // type
-  "",	    // name (will be patched)
-  OPTGROUP_NONE, // optinfo_flags
-  TV_DF_SCAN,    // tv_id
-  0,	     // properties_required
-  0,	     // properties_provided
-  0,	     // properties_destroyed
-  0,	     // todo_flags_start
-  0	      // todo_flags_finish
-};
-
-
-class avr_pass_pre_proep : public rtl_opt_pass
-{
-public:
-  avr_pass_pre_proep (gcc::context *ctxt, const char *name)
-    : rtl_opt_pass (avr_pass_data_pre_proep, ctxt)
-  {
-    this->name = name;
-  }
-
-  void compute_maybe_gasisr (function *);
-
-  virtual unsigned int execute (function *fun)
-  {
-    if (avr_gasisr_prologues
-	// Whether this function is an ISR worth scanning at all.
-	&& !fun->machine->is_no_gccisr
-	&& (fun->machine->is_interrupt
-	    || fun->machine->is_signal)
-	&& !cfun->machine->is_naked
-	// Paranoia: Non-local gotos and labels that might escape.
-	&& !cfun->calls_setjmp
-	&& !cfun->has_nonlocal_label
-	&& !cfun->has_forced_label_in_static)
-      {
-	compute_maybe_gasisr (fun);
-      }
-
-    return 0;
-  }
-
-}; // avr_pass_pre_proep
-
-} // anon namespace
-
-rtl_opt_pass *
-make_avr_pass_pre_proep (gcc::context *ctxt)
-{
-  return new avr_pass_pre_proep (ctxt, "avr-pre-proep");
-}
-
-
-/* Set fun->machine->gasisr.maybe provided we don't find anything that
-   prohibits GAS generating parts of ISR prologues / epilogues for us.  */
-
-void
-avr_pass_pre_proep::compute_maybe_gasisr (function *fun)
-{
-  // Don't use BB iterators so that we see JUMP_TABLE_DATA.
-
-  for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
-    {
-      // Transparent calls always use [R]CALL and are filtered out by GAS.
-      // ISRs don't use -mcall-prologues, hence what remains to be filtered
-      // out are open coded (tail) calls.
-
-      if (CALL_P (insn))
-	return;
-
-      // __tablejump2__ clobbers something and is targeted by JMP so
-      // that GAS won't see its usage.
-
-      if (AVR_HAVE_JMP_CALL
-	  && JUMP_TABLE_DATA_P (insn))
-	return;
-
-      // Non-local gotos not seen in *FUN.
-
-      if (JUMP_P (insn)
-	  && find_reg_note (insn, REG_NON_LOCAL_GOTO, NULL_RTX))
-	return;
-    }
-
-  fun->machine->gasisr.maybe = 1;
-}
-
-
 /* Obtain the length sequence of insns.  */
 
-int
+static int
 get_sequence_length (rtx_insn *insns)
 {
   int length = 0;
@@ -2699,7 +1462,7 @@ avr_prologue_setup_frame (HOST_WIDE_INT size, HARD_REG_SET set)
 
   if (minimize
       && (frame_pointer_needed
-	  || avr_outgoing_args_size() > 8
+	  || avr_outgoing_args_size () > 8
 	  || (AVR_2_BYTE_PC && live_seq > 6)
 	  || live_seq > 7))
     {
@@ -2738,11 +1501,9 @@ avr_prologue_setup_frame (HOST_WIDE_INT size, HARD_REG_SET set)
 	   reg >= first_reg;
 	   reg = (reg == REG_28 ? LAST_CALLEE_SAVED_REG : reg - 1), ++offset)
 	{
-	  rtx m, r;
-
-	  m = gen_rtx_MEM (QImode, plus_constant (Pmode, stack_pointer_rtx,
-						  offset));
-	  r = gen_rtx_REG (QImode, reg);
+	  rtx m = gen_rtx_MEM (QImode, plus_constant (Pmode, stack_pointer_rtx,
+						      offset));
+	  rtx r = gen_rtx_REG (QImode, reg);
 	  add_reg_note (insn, REG_CFA_OFFSET, gen_rtx_SET (m, r));
 	}
 
@@ -2782,13 +1543,13 @@ avr_prologue_setup_frame (HOST_WIDE_INT size, HARD_REG_SET set)
 	      or
 		  sp -= size
 		  fp =  sp    (*)
-	      the optimum method depends on function type, stack and
+	      the optimal method depends on the function type, stack and
 	      frame size.  To avoid a complex logic, both methods are
-	      tested and shortest is selected.
+	      tested and the shortest one is selected.
 
 	      There is also the case where SIZE != 0 and no frame pointer is
 	      needed; this can occur if ACCUMULATE_OUTGOING_ARGS is on.
-	      In that case, insn (*) is not needed in that case.
+	      In that case, insn (*) is not needed.
 	      We use the X register as scratch. This is save because in X
 	      is call-clobbered.
 		 In an interrupt routine, the case of SIZE != 0 together with
@@ -2899,8 +1660,7 @@ avr_prologue_setup_frame (HOST_WIDE_INT size, HARD_REG_SET set)
 							-size_cfa)));
 	    }
 
-	  fp_plus_insns = get_insns ();
-	  end_sequence ();
+	  fp_plus_insns = end_sequence ();
 
 	  /************  Method 2: Adjust Stack pointer  ************/
 
@@ -2932,8 +1692,7 @@ avr_prologue_setup_frame (HOST_WIDE_INT size, HARD_REG_SET set)
 		  RTX_FRAME_RELATED_P (insn) = 1;
 		}
 
-	      sp_plus_insns = get_insns ();
-	      end_sequence ();
+	      sp_plus_insns = end_sequence ();
 
 	      /************ Use shortest method  ************/
 
@@ -2959,7 +1718,7 @@ void
 avr_expand_prologue (void)
 {
   HARD_REG_SET set;
-  HOST_WIDE_INT size = get_frame_size() + avr_outgoing_args_size();
+  HOST_WIDE_INT size = get_frame_size () + avr_outgoing_args_size ();
 
   cfun->machine->stack_usage = 0;
 
@@ -2985,7 +1744,7 @@ avr_expand_prologue (void)
 	     ZERO_REG and TMP_REG and one additional, optional register for
 	     us in an optimal way.  This even scans through inline asm.  */
 
-	  cfun->machine->gasisr.yes = 1;
+	  cfun->machine->gasisr.yes = true;
 
 	  // The optional reg or TMP_REG if we don't need one.  If we need one,
 	  // remove that reg from SET so that it's not puhed / popped twice.
@@ -3150,10 +1909,10 @@ avr_asm_function_end_prologue (FILE *file)
 
   if (ACCUMULATE_OUTGOING_ARGS)
     fprintf (file, "/* outgoing args size = %d */\n",
-	     avr_outgoing_args_size());
+	     avr_outgoing_args_size ());
 
   fprintf (file, "/* frame size = " HOST_WIDE_INT_PRINT_DEC " */\n",
-	   (HOST_WIDE_INT) get_frame_size());
+	   (HOST_WIDE_INT) get_frame_size ());
 
   if (!cfun->machine->gasisr.yes)
     {
@@ -3176,16 +1935,15 @@ avr_asm_function_end_prologue (FILE *file)
 
 /* Worker function for `EPILOGUE_USES'.  */
 
-int
+bool
 avr_epilogue_uses (int /*regno*/)
 {
-  if (reload_completed
-      && cfun->machine
-      && (cfun->machine->is_interrupt || cfun->machine->is_signal))
-    return 1;
-  return 0;
+  return (reload_completed
+	  && cfun->machine
+	  && (cfun->machine->is_interrupt || cfun->machine->is_signal));
 }
 
+
 /*  Helper for avr_expand_epilogue.  Emit a pop of a byte register.  */
 
 static void
@@ -3198,6 +1956,7 @@ emit_pop_byte (unsigned regno)
   emit_insn (gen_rtx_SET (reg, mem));
 }
 
+
 /*  Output RTL epilogue.  */
 
 void
@@ -3206,7 +1965,7 @@ avr_expand_epilogue (bool sibcall_p)
   HARD_REG_SET set;
   bool isr_p = cfun->machine->is_interrupt || cfun->machine->is_signal;
 
-  HOST_WIDE_INT size = get_frame_size() + avr_outgoing_args_size();
+  HOST_WIDE_INT size = get_frame_size () + avr_outgoing_args_size ();
 
   /* epilogue: naked  */
   if (cfun->machine->is_naked)
@@ -3299,8 +2058,7 @@ avr_expand_epilogue (bool sibcall_p)
       emit_insn (gen_movhi_sp_r (stack_pointer_rtx, fp,
 				 GEN_INT (irq_state)));
 
-      rtx_insn *fp_plus_insns = get_insns ();
-      end_sequence ();
+      rtx_insn *fp_plus_insns = end_sequence ();
 
       /********** Method 2: Adjust Stack pointer  **********/
 
@@ -3311,8 +2069,7 @@ avr_expand_epilogue (bool sibcall_p)
 	  emit_move_insn (stack_pointer_rtx,
 			  plus_constant (Pmode, stack_pointer_rtx, size));
 
-	  rtx_insn *sp_plus_insns = get_insns ();
-	  end_sequence ();
+	  rtx_insn *sp_plus_insns = end_sequence ();
 
 	  /************ Use shortest method  ************/
 
@@ -3417,7 +2174,7 @@ avr_expand_epilogue (bool sibcall_p)
 static void
 avr_asm_function_begin_epilogue (FILE *file)
 {
-  app_disable();
+  app_disable ();
   fprintf (file, "/* epilogue start */\n");
 }
 
@@ -3427,7 +2184,7 @@ avr_asm_function_begin_epilogue (FILE *file)
 static bool
 avr_cannot_modify_jumps_p (void)
 {
-  /* Naked Functions must not have any instructions after
+  /* Naked functions must not have any instructions after
      their epilogue, see PR42240 */
 
   return (reload_completed
@@ -3478,7 +2235,7 @@ avr_address_tiny_absdata_p (rtx x, machine_mode mode)
 
 static inline bool
 avr_reg_ok_for_addr_p (rtx reg, addr_space_t as,
-		       RTX_CODE outer_code, bool strict)
+		       rtx_code outer_code, bool strict)
 {
   return (REG_P (reg)
 	  && (avr_regno_mode_code_ok_for_base_p (REGNO (reg), QImode,
@@ -3488,7 +2245,7 @@ avr_reg_ok_for_addr_p (rtx reg, addr_space_t as,
 }
 
 
-/* Return nonzero if X (an RTX) is a legitimate memory address on the target
+/* Return nonzero if rtx X is a legitimate memory address on the target
    machine for a memory operand of mode MODE.  */
 
 static bool
@@ -3736,7 +2493,7 @@ avr_legitimize_reload_address (rtx *px, machine_mode mode, int opnum,
 static const char *
 avr_asm_len (const char *tpl, rtx *operands, int *plen, int n_words)
 {
-  if (plen == NULL)
+  if (plen == nullptr)
     output_asm_insn (tpl, operands);
   else
     {
@@ -3764,17 +2521,16 @@ ptrreg_to_str (int regno)
       output_operand_lossage ("address operand requires constraint for"
 			      " X, Y, or Z register");
     }
-  return NULL;
+  return nullptr;
 }
 
-/* Return the condition name as a string.
-   Used in conditional jump constructing  */
+
+/* Return the condition name as a string to be used in a BR** instruction.
+   Used in conditional jump constructing.  */
 
 static const char *
-cond_string (enum rtx_code code)
+avr_cond_string (rtx_code code, bool cc_overflow_unusable)
 {
-  bool cc_overflow_unusable = false;
-
   switch (code)
     {
     case NE:
@@ -3782,15 +2538,9 @@ cond_string (enum rtx_code code)
     case EQ:
       return "eq";
     case GE:
-      if (cc_overflow_unusable)
-	return "pl";
-      else
-	return "ge";
+      return cc_overflow_unusable ? "pl" : "ge";
     case LT:
-      if (cc_overflow_unusable)
-	return "mi";
-      else
-	return "lt";
+      return cc_overflow_unusable ? "mi" : "lt";
     case GEU:
       return "sh";
     case LTU:
@@ -3820,6 +2570,7 @@ avr_address_tiny_pm_p (rtx x)
   return false;
 }
 
+
 /* Implement `TARGET_PRINT_OPERAND_ADDRESS'.  */
 /* Output ADDR to FILE as address.  */
 
@@ -3867,11 +2618,17 @@ avr_print_operand_address (FILE *file, machine_mode /*mode*/, rtx addr)
 	      fprintf (file, "+" HOST_WIDE_INT_PRINT_DEC ")",
 		       2 * INTVAL (XEXP (x, 1)));
 	      if (AVR_3_BYTE_PC)
-		if (warning (0, "pointer offset from symbol maybe incorrect"))
-		  {
-		    output_addr_const (stderr, addr);
-		    fprintf (stderr, "\n");
-		  }
+		{
+		  location_t loc = avr_insn_location != UNKNOWN_LOCATION
+		    ? avr_insn_location
+		    : input_location;
+		  if (warning_at (loc, 0, "pointer offset from symbol may be"
+				  " incorrect"))
+		    {
+		      output_addr_const (stderr, addr);
+		      fprintf (stderr, "\n");
+		    }
+		}
 	    }
 	  else
 	    {
@@ -3956,6 +2713,78 @@ avr_print_operand (FILE *file, rtx x, int code)
       rtx op = XEXP (XEXP (x, 0), 0);
       fprintf (file, "%s", reg_names[REGNO (op) + ij]);
     }
+  else if (code == 'i')
+    {
+      const int sfr0 = avr_arch->sfr_offset;
+      bool lossage_p = false;
+
+      switch (GET_CODE (x))
+	{
+	default:
+	  lossage_p = true;
+	  break;
+
+	case CONST_INT:
+	  {
+	    const auto ival = INTVAL (x);
+
+	    if (io_address_operand (x, VOIDmode))
+	      {
+		if (AVR_HAVE_RAMPZ && ival == avr_addr.rampz)
+		  fprintf (file, "__RAMPZ__");
+		else if (AVR_HAVE_RAMPY && ival == avr_addr.rampy)
+		  fprintf (file, "__RAMPY__");
+		else if (AVR_HAVE_RAMPX && ival == avr_addr.rampx)
+		  fprintf (file, "__RAMPX__");
+		else if (AVR_HAVE_RAMPD && ival == avr_addr.rampd)
+		  fprintf (file, "__RAMPD__");
+		else if ((AVR_XMEGA || AVR_TINY) && ival == avr_addr.ccp)
+		  fprintf (file, "__CCP__");
+		else if (ival == avr_addr.sreg)   fprintf (file, "__SREG__");
+		else if (ival == avr_addr.sp_l)   fprintf (file, "__SP_L__");
+		else if (ival == avr_addr.sp_h)   fprintf (file, "__SP_H__");
+		else
+		  fprintf (file, HOST_WIDE_INT_PRINT_HEX, ival - sfr0);
+	      }
+	    else
+	      {
+		char buf[17];
+		/* Printed indirectly through buffer, as
+		   output_operand_lossage is translatable but uses printf
+		   format strings, so HOST_WIDE_INT_PRINT_HEX_PURE macro can't
+		   be used there to make translation possible and how exactly
+		   can be HOST_WIDE_INT printed is host dependent.  */
+		snprintf (buf, sizeof buf, HOST_WIDE_INT_PRINT_HEX_PURE,
+			  ival);
+		output_operand_lossage ("bad I/O address 0x%s outside of "
+					"valid range [0x%x, 0x%x] for %%i "
+					"operand", buf, sfr0, sfr0 + 0x3f);
+	      }
+	  }
+	  break; // CONST_INT
+
+	case MEM:
+	  if (io_address_operand (XEXP (x, 0), VOIDmode))
+	    avr_print_operand (file, XEXP (x, 0), 'i');
+	  else
+	    lossage_p = true;
+	  break;
+
+	case SYMBOL_REF:
+	  if (io_address_operand (x, VOIDmode))
+	    {
+	      rtx addr = plus_constant (HImode, x, -sfr0);
+	      avr_print_operand_address (file, VOIDmode, addr);
+	    }
+	  else
+	    lossage_p = true;
+	  break;
+	} // switch code
+
+      if (lossage_p)
+	output_operand_lossage ("%s operand cannot be used as %%i I/O "
+				"address operand", rtx_name[GET_CODE (x)]);
+    } // code = i
   else if (REG_P (x))
     {
       if (x == zero_reg_rtx)
@@ -3967,34 +2796,7 @@ avr_print_operand (FILE *file, rtx x, int code)
     }
   else if (CONST_INT_P (x))
     {
-      HOST_WIDE_INT ival = INTVAL (x);
-
-      if ('i' != code)
-	fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival + abcd);
-      else if (low_io_address_operand (x, VOIDmode)
-	       || high_io_address_operand (x, VOIDmode))
-	{
-	  if (AVR_HAVE_RAMPZ && ival == avr_addr.rampz)
-	    fprintf (file, "__RAMPZ__");
-	  else if (AVR_HAVE_RAMPY && ival == avr_addr.rampy)
-	    fprintf (file, "__RAMPY__");
-	  else if (AVR_HAVE_RAMPX && ival == avr_addr.rampx)
-	    fprintf (file, "__RAMPX__");
-	  else if (AVR_HAVE_RAMPD && ival == avr_addr.rampd)
-	    fprintf (file, "__RAMPD__");
-	  else if ((AVR_XMEGA || AVR_TINY) && ival == avr_addr.ccp)
-	    fprintf (file, "__CCP__");
-	  else if (ival == avr_addr.sreg)   fprintf (file, "__SREG__");
-	  else if (ival == avr_addr.sp_l)   fprintf (file, "__SP_L__");
-	  else if (ival == avr_addr.sp_h)   fprintf (file, "__SP_H__");
-	  else
-	    {
-	      fprintf (file, HOST_WIDE_INT_PRINT_HEX,
-		       ival - avr_arch->sfr_offset);
-	    }
-	}
-      else
-	fatal_insn ("bad address, not an I/O address:", x);
+      fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) + abcd);
     }
   else if (MEM_P (x))
     {
@@ -4006,18 +2808,19 @@ avr_print_operand (FILE *file, rtx x, int code)
 	    fatal_insn ("bad address, not a constant:", addr);
 	  /* Assembler template with m-code is data - not progmem section */
 	  if (text_segment_operand (addr, VOIDmode))
-	    if (warning (0, "accessing data memory with"
-			 " program memory address"))
-	      {
-		output_addr_const (stderr, addr);
-		fprintf(stderr,"\n");
-	      }
+	    {
+	      location_t loc = avr_insn_location != UNKNOWN_LOCATION
+		? avr_insn_location
+		: input_location;
+	      if (warning_at (loc, 0, "accessing data memory with"
+			      " program memory address"))
+		{
+		  output_addr_const (stderr, addr);
+		  fprintf (stderr,"\n");
+		}
+	    }
 	  output_addr_const (file, addr);
 	}
-      else if (code == 'i')
-	{
-	  avr_print_operand (file, addr, 'i');
-	}
       else if (code == 'o')
 	{
 	  if (GET_CODE (addr) != PLUS)
@@ -4047,32 +2850,28 @@ avr_print_operand (FILE *file, rtx x, int code)
 	{
 	  avr_print_operand_address (file, VOIDmode, XEXP (addr, 0));
 	  if (REGNO (XEXP (addr, 0)) == REG_X)
-	    fatal_insn ("internal compiler error.  Bad address:"
-			,addr);
+	    fatal_insn ("internal compiler error.  Bad address:", addr);
 	  fputc ('+', file);
 	  avr_print_operand (file, XEXP (addr, 1), code);
 	}
       else
 	avr_print_operand_address (file, VOIDmode, addr);
     }
-  else if (code == 'i')
-    {
-      if (SYMBOL_REF_P (x) && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_IO))
-	avr_print_operand_address
-	  (file, VOIDmode, plus_constant (HImode, x, -avr_arch->sfr_offset));
-      else
-	fatal_insn ("bad address, not an I/O address:", x);
-    }
   else if (code == 'x')
     {
       /* Constant progmem address - like used in jmp or call */
       if (text_segment_operand (x, VOIDmode) == 0)
-	if (warning (0, "accessing program memory"
-		     " with data memory address"))
-	  {
-	    output_addr_const (stderr, x);
-	    fprintf(stderr,"\n");
-	  }
+	{
+	  location_t loc = avr_insn_location != UNKNOWN_LOCATION
+	    ? avr_insn_location
+	    : input_location;
+	  if (warning_at (loc, 0, "accessing program memory"
+			  " with data memory address"))
+	    {
+	      output_addr_const (stderr, x);
+	      fprintf (stderr, "\n");
+	    }
+	}
       /* Use normal symbol for direct address no linker trampoline needed */
       output_addr_const (file, x);
     }
@@ -4103,10 +2902,11 @@ avr_print_operand (FILE *file, rtx x, int code)
     }
   else if (GET_CODE (x) == CONST_STRING)
     fputs (XSTR (x, 0), file);
-  else if (code == 'j')
-    fputs (cond_string (GET_CODE (x)), file);
-  else if (code == 'k')
-    fputs (cond_string (reverse_condition (GET_CODE (x))), file);
+  else if (code == 'j' || code == 'L')
+    fputs (avr_cond_string (GET_CODE (x), code == 'L'), file);
+  else if (code == 'k' || code == 'K')
+    fputs (avr_cond_string (reverse_condition (GET_CODE (x)), code == 'K'),
+	   file);
   else
     avr_print_operand_address (file, VOIDmode, x);
 }
@@ -4121,7 +2921,7 @@ avr_print_operand (FILE *file, rtx x, int code)
 static bool
 avr_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
 				    unsigned int align,
-				    enum by_pieces_operation op, bool speed_p)
+				    by_pieces_operation op, bool speed_p)
 {
   if (op != MOVE_BY_PIECES
       || (speed_p && size > MOVE_MAX_PIECES))
@@ -4130,6 +2930,7 @@ avr_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
   return size <= MOVE_MAX_PIECES;
 }
 
+
 /* Choose mode for jump insn:
    1 - relative jump in range -63 <= x <= 62 ;
    2 - relative jump in range -2046 <= x <= 2045 ;
@@ -4156,16 +2957,21 @@ avr_jump_mode (rtx x, rtx_insn *insn, int extra)
   return 2;
 }
 
-/* Return an AVR condition jump commands.
-   X is a comparison RTX.
-   LEN is a number returned by avr_jump_mode function.
-   If REVERSE nonzero then condition code in X must be reversed.  */
+
+/* Return the asm code for conditional branch INSN, where XOP[0] is the jump
+   target label and XOP[1] is a comparison operator of REG_CC against 0.  */
 
 const char *
-ret_cond_branch (rtx x, int len, int reverse)
+avr_cond_branch (rtx_insn *insn, rtx *xop)
 {
-  RTX_CODE cond = reverse ? reverse_condition (GET_CODE (x)) : GET_CODE (x);
-  bool cc_overflow_unusable = false;
+  machine_mode ccmode = GET_MODE (XEXP (xop[1], 0));
+  rtx_code cond = GET_CODE (xop[1]);
+  bool cc_overflow_unusable = ccmode != CCmode;
+  int len = avr_jump_mode (xop[0], insn);
+
+  if (ccmode == CCNmode)
+    // The N flag can only do < 0 and >= 0.
+    gcc_assert (cond == GE || cond == LT);
 
   switch (cond)
     {
@@ -4227,33 +3033,20 @@ ret_cond_branch (rtx x, int len, int reverse)
 	       "brsh .+4" CR_TAB
 	       "jmp %0"));
     default:
-      if (reverse)
-	{
-	  switch (len)
-	    {
-	    case 1:
-	      return "br%k1 %0";
-	    case 2:
-	      return ("br%j1 .+2" CR_TAB
-		      "rjmp %0");
-	    default:
-	      return ("br%j1 .+4" CR_TAB
-		      "jmp %0");
-	    }
-	}
-      else
+      switch (len)
 	{
-	  switch (len)
-	    {
-	    case 1:
-	      return "br%j1 %0";
-	    case 2:
-	      return ("br%k1 .+2" CR_TAB
-		      "rjmp %0");
-	    default:
-	      return ("br%k1 .+4" CR_TAB
-		      "jmp %0");
-	    }
+	case 1:
+	  return cc_overflow_unusable
+	    ? "br%L1 %0"
+	    : "br%j1 %0";
+	case 2:
+	  return cc_overflow_unusable
+	    ? "br%K1 .+2" CR_TAB "rjmp %0"
+	    : "br%k1 .+2" CR_TAB "rjmp %0";
+	default:
+	  return cc_overflow_unusable
+	    ? "br%K1 .+4" CR_TAB "jmp %0"
+	    : "br%k1 .+4" CR_TAB "jmp %0";
 	}
     }
   return "";
@@ -4267,6 +3060,8 @@ void
 avr_final_prescan_insn (rtx_insn *insn, rtx * /*operands*/,
 			int /*num_operands*/)
 {
+  avr_insn_location = LOCATION_LOCUS (INSN_LOCATION (insn));
+
   if (avr_log.rtx_costs)
     {
       rtx set = single_set (insn);
@@ -4278,7 +3073,7 @@ avr_final_prescan_insn (rtx_insn *insn, rtx * /*operands*/,
       else
 	fprintf (asm_out_file, "/* DEBUG: pattern-cost = %d.  */\n",
 		 rtx_cost (PATTERN (insn), VOIDmode, INSN, 0,
-			   optimize_insn_for_speed_p()));
+			   optimize_insn_for_speed_p ()));
     }
 
   if (avr_log.insn_addresses)
@@ -4289,16 +3084,19 @@ avr_final_prescan_insn (rtx_insn *insn, rtx * /*operands*/,
 
 /* Implement `TARGET_ASM_FINAL_POSTSCAN_INSN'.  */
 /* When GAS generates (parts of) ISR prologue / epilogue for us, we must
-   hint GAS about the end of the code to scan.  There migh be code located
+   hint GAS about the end of the code to scan.  There might be code located
    after the last epilogue.  */
 
 static void
 avr_asm_final_postscan_insn (FILE *stream, rtx_insn *insn, rtx *, int)
 {
+  if (!next_real_insn (insn))
+    avr_insn_location = UNKNOWN_LOCATION;
+
   if (cfun->machine->gasisr.yes
       && !next_real_insn (insn))
     {
-      app_disable();
+      app_disable ();
       fprintf (stream, "\t__gcc_isr %d,r%d\n", GASISR_Done,
 	       cfun->machine->gasisr.regno);
     }
@@ -4309,7 +3107,7 @@ avr_asm_final_postscan_insn (FILE *stream, rtx_insn *insn, rtx *, int)
 /* Returns nonzero if REGNO is the number of a hard
    register in which function arguments are sometimes passed.  */
 
-int
+bool
 avr_function_arg_regno_p (int r)
 {
   return AVR_TINY
@@ -4328,15 +3126,16 @@ avr_init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype, rtx libname,
 {
   cum->nregs = AVR_TINY ? 1 + REG_25 - REG_20 : 1 + REG_25 - REG_8;
   cum->regno = FIRST_CUM_REG;
-  cum->has_stack_args = 0;
+  cum->has_stack_args = false;
   if (!libname && stdarg_p (fntype))
     cum->nregs = 0;
 
   /* Assume the calle may be tail called */
 
-  cfun->machine->sibcall_fails = 0;
+  cfun->machine->sibcall_fails = false;
 }
 
+
 /* Returns the number of registers to allocate for a function argument.  */
 
 static int
@@ -4366,7 +3165,7 @@ avr_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
   if (cum->nregs && bytes <= cum->nregs)
     return gen_rtx_REG (arg.mode, cum->regno - bytes);
 
-  cum->has_stack_args = 1;
+  cum->has_stack_args = true;
 
   return NULL_RTX;
 }
@@ -4400,7 +3199,7 @@ avr_function_arg_advance (cumulative_args_t cum_v, const function_arg_info &arg)
 	 pass &args_so_far, too.  At present, CUMULATIVE_ARGS is target
 	 dependent so that such an extension is not wanted.  */
 
-      cfun->machine->sibcall_fails = 1;
+      cfun->machine->sibcall_fails = true;
     }
 
   /* Test if all registers needed by the ABI are actually available.  If the
@@ -4423,6 +3222,7 @@ avr_function_arg_advance (cumulative_args_t cum_v, const function_arg_info &arg)
     }
 }
 
+
 /* Implement `TARGET_FUNCTION_OK_FOR_SIBCALL' */
 /* Decide whether we can make a sibling call to a function.  DECL is the
    declaration of the function being targeted by the call and EXP is the
@@ -4473,6 +3273,7 @@ avr_function_ok_for_sibcall (tree decl_callee, tree exp_callee)
   return true;
 }
 
+
 /***********************************************************************
   Functions for outputting various mov's for a various modes
 ************************************************************************/
@@ -4491,7 +3292,9 @@ avr_load_libgcc_p (rtx op)
 	  && avr_mem_flash_p (op));
 }
 
-/* Return true if a value of mode MODE is read by __xload_* function.  */
+
+/* Return true if a value of mode MODE is read by __xload_* function
+   provided it is located in __memx.  */
 
 bool
 avr_xload_libgcc_p (machine_mode mode)
@@ -4499,14 +3302,239 @@ avr_xload_libgcc_p (machine_mode mode)
   int n_bytes = GET_MODE_SIZE (mode);
 
   return (n_bytes > 1
-	  || avr_n_flash > 1);
+	  || avropt_n_flash > 1);
+}
+
+
+/* Return true if a value of mode MODE is read by __fload_* function
+   provided it is located in __flashx.  */
+
+bool
+avr_fload_libgcc_p (machine_mode)
+{
+  return (! AVR_HAVE_ELPMX
+	  && ! AVR_HAVE_LPMX);
+}
+
+
+/* USE_LIBGCC = true:  Return true when MEM is a mem rtx for address space
+   AS that will be loaded using a libgcc support function.
+   USE_LIBGCC = false:  Return true when MEM is a mem rtx for address space
+   AS that will be loaded inline (without using a libgcc support function).  */
+
+bool
+avr_load_libgcc_mem_p (rtx mem, addr_space_t as, bool use_libgcc)
+{
+  if (MEM_P (mem))
+    {
+      machine_mode mode = GET_MODE (mem);
+      rtx addr = XEXP (mem, 0);
+
+      if (MEM_ADDR_SPACE (mem) != as
+	  || GET_MODE (addr) != targetm.addr_space.pointer_mode (as))
+	return false;
+
+      switch (as)
+	{
+	default:
+	  gcc_unreachable ();
+
+	case ADDR_SPACE_FLASH:
+	  return avr_load_libgcc_p (mem) == use_libgcc;
+
+	case ADDR_SPACE_MEMX:
+	  return avr_xload_libgcc_p (mode) == use_libgcc;
+
+	case ADDR_SPACE_FLASHX:
+	  return avr_fload_libgcc_p (mode) == use_libgcc;
+	}
+    }
+
+  return false;
+}
+
+
+/* Like `avr_load_libgcc_mem_p()', but for a single_set insn with
+   a SET_SRC according to avr_load_libgcc_mem_p.  */
+
+bool
+avr_load_libgcc_insn_p (rtx_insn *insn, addr_space_t as, bool use_libgcc)
+{
+  rtx set = single_set (insn);
+  return (set
+	  && avr_load_libgcc_mem_p (SET_SRC (set), as, use_libgcc));
+}
+
+
+/* Return true when INSN has a REG_UNUSED note for hard reg REG.
+   rtlanal.cc::find_reg_note() uses == to compare XEXP (link, 0)
+   therefore use a custom function.  */
+
+static bool
+avr_insn_has_reg_unused_note_p (rtx_insn *insn, rtx reg)
+{
+  for (rtx link = REG_NOTES (insn); link; link = XEXP (link, 1))
+    if (REG_NOTE_KIND (link) == REG_UNUSED
+	&& REG_P (XEXP (link, 0))
+	&& REGNO (reg) >= REGNO (XEXP (link, 0))
+	&& END_REGNO (reg) <= END_REGNO (XEXP (link, 0)))
+      return true;
+
+  return false;
+}
+
+
+/* A helper for the next function.
+   Return nonzero if REG is not used after INSN.
+   We assume REG is a reload reg, and therefore does
+   not live past labels.  It may live past calls or jumps though.  */
+
+static bool
+_reg_unused_after (rtx_insn *insn, rtx reg, bool look_at_insn)
+{
+  if (look_at_insn)
+    {
+      /* If the reg is set by this instruction, then it is safe for our
+	 case.  Disregard the case where this is a store to memory, since
+	 we are checking a register used in the store address.  */
+      rtx set = single_set (insn);
+      if (set && !MEM_P (SET_DEST (set))
+	  && reg_overlap_mentioned_p (reg, SET_DEST (set)))
+	return true;
+
+      /* This case occurs when fuse-add introduced a POST_INC addressing,
+	 but the address register is unused after.  */
+      if (set)
+	{
+	  rtx mem = MEM_P (SET_SRC (set)) ? SET_SRC (set) : SET_DEST (set);
+	  if (MEM_P (mem)
+	      && reg_overlap_mentioned_p (reg, XEXP (mem, 0))
+	      && avr_insn_has_reg_unused_note_p (insn, reg))
+	    return true;
+	}
+    }
+
+  while ((insn = NEXT_INSN (insn)))
+    {
+      rtx set;
+      rtx_code code = GET_CODE (insn);
+
+#if 0
+      /* If this is a label that existed before reload, then the register
+	 if dead here.  However, if this is a label added by reorg, then
+	 the register may still be live here.  We can't tell the difference,
+	 so we just ignore labels completely.  */
+      if (code == CODE_LABEL)
+	return true;
+      /* else */
+#endif
+
+      if (!INSN_P (insn))
+	continue;
+
+      if (code == JUMP_INSN)
+	return false;
+
+      /* If this is a sequence, we must handle them all at once.
+	 We could have for instance a call that sets the target register,
+	 and an insn in a delay slot that uses the register.  In this case,
+	 we must return 0.  */
+      else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
+	{
+	  rtx_sequence *seq = as_a <rtx_sequence *> (PATTERN (insn));
+	  bool retval = false;
+
+	  for (int i = 0; i < seq->len (); i++)
+	    {
+	      rtx_insn *this_insn = seq->insn (i);
+	      rtx set = single_set (this_insn);
+
+	      if (CALL_P (this_insn))
+		code = CALL_INSN;
+	      else if (JUMP_P (this_insn))
+		{
+		  if (INSN_ANNULLED_BRANCH_P (this_insn))
+		    return false;
+		  code = JUMP_INSN;
+		}
+
+	      if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
+		return false;
+	      if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
+		{
+		  if (!MEM_P (SET_DEST (set)))
+		    retval = true;
+		  else
+		    return false;
+		}
+	      if (!set
+		  && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
+		return false;
+	    }
+	  if (retval)
+	    return true;
+	  else if (code == JUMP_INSN)
+	    return false;
+	}
+
+      if (code == CALL_INSN)
+	{
+	  rtx tem;
+	  for (tem = CALL_INSN_FUNCTION_USAGE (insn); tem; tem = XEXP (tem, 1))
+	    if (GET_CODE (XEXP (tem, 0)) == USE
+		&& REG_P (XEXP (XEXP (tem, 0), 0))
+		&& reg_overlap_mentioned_p (reg, XEXP (XEXP (tem, 0), 0)))
+	      return false;
+	  if (call_used_or_fixed_reg_p (REGNO (reg)))
+	    return true;
+	}
+
+      set = single_set (insn);
+
+      if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
+	return false;
+      if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
+	return !MEM_P (SET_DEST (set));
+      if (!set && reg_overlap_mentioned_p (reg, PATTERN (insn)))
+	return false;
+    }
+  return true;
+}
+
+
+/* Return nonzero if register REG dead after INSN.  */
+
+bool
+reg_unused_after (rtx_insn *insn, rtx reg)
+{
+  return (dead_or_set_p (insn, reg)
+	  || (REG_P (reg) && _reg_unused_after (insn, reg, true)));
+}
+
+
+/* Return true when REGNO is set by INSN but not used by the following code.
+   The difference to reg_unused_after() is that reg_unused_after() returns
+   true for the entire result even when the result *IS* being used atfer.  */
+
+static bool
+avr_result_regno_unused_p (rtx_insn *insn, unsigned regno)
+{
+  if (!insn || !single_set (insn) || regno >= REG_32)
+    return false;
+
+  rtx dest = SET_DEST (single_set (insn));
+  if (!REG_P (dest) || !IN_RANGE (regno, REGNO (dest), END_REGNO (dest) - 1))
+    return false;
+
+  return (avr_insn_has_reg_unused_note_p (insn, all_regs_rtx[regno])
+	  || _reg_unused_after (insn, all_regs_rtx[regno], false));
 }
 
 
 /* Fixme: This is a hack because secondary reloads don't works as expected.
 
    Find an unused d-register to be used as scratch in INSN.
-   EXCLUDE is either NULL_RTX or some register. In the case where EXCLUDE
+   EXCLUDE is either NULL_RTX or some register.  In the case where EXCLUDE
    is a register, skip all possible return values that overlap EXCLUDE.
    The policy for the returned register is similar to that of
    `reg_unused_after', i.e. the returned register may overlap the SET_DEST
@@ -4575,7 +3603,7 @@ avr_out_lpm_no_lpmx (rtx_insn *insn, rtx *xop, int *plen)
   switch (GET_CODE (addr))
     {
     default:
-      gcc_unreachable();
+      gcc_unreachable ();
 
     case REG:
 
@@ -4584,7 +3612,7 @@ avr_out_lpm_no_lpmx (rtx_insn *insn, rtx *xop, int *plen)
       switch (n_bytes)
 	{
 	default:
-	  gcc_unreachable();
+	  gcc_unreachable ();
 
 	case 1:
 	  avr_asm_len ("%4lpm", xop, plen, 1);
@@ -4624,7 +3652,7 @@ avr_out_lpm_no_lpmx (rtx_insn *insn, rtx *xop, int *plen)
 
       for (int i = 0; i < n_bytes; ++i)
 	{
-	  rtx reg = simplify_gen_subreg (QImode, dest, GET_MODE (dest), i);
+	  rtx reg = avr_byte (dest, i);
 
 	  if (i > 0)
 	    avr_asm_len ("adiw %2,1", xop, plen, 1);
@@ -4672,7 +3700,7 @@ avr_out_lpm (rtx_insn *insn, rtx *op, int *plen)
     }
 
   rtx addr = XEXP (src, 0);
-  RTX_CODE code = GET_CODE (addr);
+  rtx_code code = GET_CODE (addr);
 
   gcc_assert (REG_P (dest));
   gcc_assert (REG == code || POST_INC == code);
@@ -4727,7 +3755,7 @@ avr_out_lpm (rtx_insn *insn, rtx *op, int *plen)
   switch (GET_CODE (addr))
     {
     default:
-      gcc_unreachable();
+      gcc_unreachable ();
 
     case REG:
 
@@ -4736,7 +3764,7 @@ avr_out_lpm (rtx_insn *insn, rtx *op, int *plen)
       switch (n_bytes)
 	{
 	default:
-	  gcc_unreachable();
+	  gcc_unreachable ();
 
 	case 1:
 	  avr_asm_len ("%4lpm %0,%a2", xop, plen, 1);
@@ -4818,7 +3846,9 @@ avr_out_lpm (rtx_insn *insn, rtx *op, int *plen)
 }
 
 
-/* Worker function for xload_8 insn.  */
+/* Load a value from 24-bit address space __memx and return "".
+   PLEN == 0: Output instructions.
+   PLEN != 0: Set *PLEN to the length of the sequence in words.  */
 
 const char *
 avr_out_xload (rtx_insn * /*insn*/, rtx *op, int *plen)
@@ -4848,6 +3878,279 @@ avr_out_xload (rtx_insn * /*insn*/, rtx *op, int *plen)
 }
 
 
+/* Load a value from 24-bit address space __flashx and return "".
+   PLEN == 0: Output instructions.
+   PLEN != 0: Set *PLEN to the length of the sequence in words.  */
+
+const char *
+avr_out_fload (rtx_insn * /*insn*/, rtx *xop, int *plen)
+{
+  gcc_assert (AVR_HAVE_ELPMX
+	      || (! AVR_HAVE_ELPM && AVR_HAVE_LPMX));
+  if (plen)
+    *plen = 0;
+
+  if (AVR_HAVE_ELPMX)
+    avr_asm_len ("out __RAMPZ__,%1", xop, plen, 1);
+
+  const int n_bytes = GET_MODE_SIZE (GET_MODE (xop[0]));
+  const char *s_load = AVR_HAVE_ELPMX ? "elpm %0,Z" : "lpm %0,Z";
+  const char *s_load_inc = AVR_HAVE_ELPMX ? "elpm %0,Z+" : "lpm %0,Z+";
+  const char *s_load_tmp_inc = AVR_HAVE_ELPMX ? "elpm r0,Z+" : "lpm r0,Z+";
+  bool use_tmp_for_r30 = false;
+
+  // There are nasty cases where reload assigns a register to dest that
+  // overlaps Z, even though fmov<mode> clobbers REG_Z.
+  for (int i = 0; i < n_bytes; ++i)
+    {
+      rtx b = avr_byte (xop[0], i);
+      if (i == n_bytes - 1)
+	avr_asm_len (s_load, &b, plen, 1);
+      else if (REGNO (b) == REG_30)
+	{
+	  avr_asm_len (s_load_tmp_inc, &b, plen, 1);
+	  use_tmp_for_r30 = true;
+	}
+      else
+	avr_asm_len (s_load_inc, &b, plen, 1);
+    }
+
+  if (use_tmp_for_r30)
+    avr_asm_len ("mov r30,r0", xop, plen, 1);
+
+  if (AVR_HAVE_ELPMX && AVR_HAVE_RAMPD)
+    avr_asm_len ("out __RAMPZ__,__zero_reg__", xop, plen, 1);
+
+  return "";
+}
+
+
+/* A helper for `output_reload_insisf' and `output_reload_inhi'.  */
+/* Set register OP[0] to compile-time constant OP[1].
+   CLOBBER_REG is a QI clobber register or NULL_RTX.
+   LEN == NULL: output instructions.
+   LEN != NULL: set *LEN to the length of the instruction sequence
+		(in words) printed with LEN = NULL.
+   If CLEAR_P is true, OP[0] had been cleard to Zero already.
+   If CLEAR_P is false, nothing is known about OP[0].
+
+   The effect on cc0 is as follows:
+
+   Load 0 to any register except ZERO_REG : NONE
+   Load ld register with any value        : NONE
+   Anything else:                         : CLOBBER  */
+
+void
+output_reload_in_const (rtx *op, rtx clobber_reg, int *len, bool clear_p)
+{
+  rtx src = op[1];
+  rtx dest = op[0];
+  rtx xval, xdest[4];
+  int ival[4];
+  int clobber_val = 1234;
+  bool cooked_clobber_p = false;
+  bool set_p = false;
+  machine_mode mode = GET_MODE (dest);
+  int n_bytes = GET_MODE_SIZE (mode);
+
+  gcc_assert (REG_P (dest)
+	      && CONSTANT_P (src));
+
+  if (len)
+    *len = 0;
+
+  /* (REG:SI 14) is special: It's neither in LD_REGS nor in NO_LD_REGS
+     but has some subregs that are in LD_REGS.  Use the MSB (REG:QI 17).  */
+
+  if (REGNO (dest) < REG_16
+      && END_REGNO (dest) > REG_16)
+    {
+      clobber_reg = all_regs_rtx[END_REGNO (dest) - 1];
+    }
+
+  /* We might need a clobber reg but don't have one.  Look at the value to
+     be loaded more closely.  A clobber is only needed if it is a symbol
+     or contains a byte that is neither 0, -1 or a power of 2.  */
+
+  if (NULL_RTX == clobber_reg
+      && !test_hard_reg_class (LD_REGS, dest)
+      && (! (CONST_INT_P (src) || CONST_FIXED_P (src) || CONST_DOUBLE_P (src))
+	  || !avr_popcount_each_byte (src, n_bytes,
+				      (1 << 0) | (1 << 1) | (1 << 8))))
+    {
+      /* We have no clobber register but need one.  Cook one up.
+	 That's cheaper than loading from constant pool.  */
+
+      cooked_clobber_p = true;
+      clobber_reg = all_regs_rtx[REG_Z + 1];
+      avr_asm_len ("mov __tmp_reg__,%0", &clobber_reg, len, 1);
+    }
+
+  /* Now start filling DEST from LSB to MSB.  */
+
+  for (int n = 0; n < n_bytes; n++)
+    {
+      bool done_byte = false;
+      rtx xop[3];
+
+      /* Crop the n-th destination byte.  */
+
+      xdest[n] = avr_byte (dest, n);
+      int ldreg_p = test_hard_reg_class (LD_REGS, xdest[n]);
+
+      if (!CONST_INT_P (src)
+	  && !CONST_FIXED_P (src)
+	  && !CONST_DOUBLE_P (src))
+	{
+	  static const char *const asm_code[][2] =
+	    {
+	      { "ldi %2,lo8(%1)"  CR_TAB "mov %0,%2",    "ldi %0,lo8(%1)"  },
+	      { "ldi %2,hi8(%1)"  CR_TAB "mov %0,%2",    "ldi %0,hi8(%1)"  },
+	      { "ldi %2,hlo8(%1)" CR_TAB "mov %0,%2",    "ldi %0,hlo8(%1)" },
+	      { "ldi %2,hhi8(%1)" CR_TAB "mov %0,%2",    "ldi %0,hhi8(%1)" }
+	    };
+
+	  xop[0] = xdest[n];
+	  xop[1] = src;
+	  xop[2] = clobber_reg;
+
+	  avr_asm_len (asm_code[n][ldreg_p], xop, len, ldreg_p ? 1 : 2);
+
+	  continue;
+	}
+
+      /* Crop the n-th source byte.  */
+
+      xval = avr_byte (src, n);
+      ival[n] = INTVAL (xval);
+
+      /* Look if we can reuse the low word by means of MOVW.  */
+
+      if (n == 2
+	  && n_bytes >= 4
+	  && AVR_HAVE_MOVW)
+	{
+	  int lo16 = avr_int16 (src, 0);
+	  int hi16 = avr_int16 (src, 2);
+
+	  if (lo16 == hi16)
+	    {
+	      if (lo16 != 0 || ! clear_p)
+		avr_asm_len ("movw %C0,%A0", &op[0], len, 1);
+
+	      break;
+	    }
+	}
+
+      /* Don't use CLR so that cc0 is set as expected.  */
+
+      if (ival[n] == 0)
+	{
+	  if (!clear_p)
+	    avr_asm_len (ldreg_p ? "ldi %0,0"
+			 : AVR_ZERO_REGNO == REGNO (xdest[n]) ? "clr %0"
+			 : "mov %0,__zero_reg__",
+			 &xdest[n], len, 1);
+	  continue;
+	}
+
+      if (clobber_val == ival[n]
+	  && REGNO (clobber_reg) == REGNO (xdest[n]))
+	{
+	  continue;
+	}
+
+      /* LD_REGS can use LDI to move a constant value */
+
+      if (ldreg_p)
+	{
+	  xop[0] = xdest[n];
+	  xop[1] = xval;
+	  avr_asm_len ("ldi %0,lo8(%1)", xop, len, 1);
+	  continue;
+	}
+
+      /* Try to reuse value already loaded in some lower byte. */
+
+      for (int j = 0; j < n; j++)
+	if (ival[j] == ival[n])
+	  {
+	    xop[0] = xdest[n];
+	    xop[1] = xdest[j];
+
+	    avr_asm_len ("mov %0,%1", xop, len, 1);
+	    done_byte = true;
+	    break;
+	  }
+
+      if (done_byte)
+	continue;
+
+      /* Need no clobber reg for -1: Use CLR/DEC */
+
+      if (ival[n] == -1)
+	{
+	  if (!clear_p)
+	    avr_asm_len ("clr %0", &xdest[n], len, 1);
+
+	  avr_asm_len ("dec %0", &xdest[n], len, 1);
+	  continue;
+	}
+      else if (ival[n] == 1)
+	{
+	  if (!clear_p)
+	    avr_asm_len ("clr %0", &xdest[n], len, 1);
+
+	  avr_asm_len ("inc %0", &xdest[n], len, 1);
+	  continue;
+	}
+
+      /* Use T flag or INC to manage powers of 2 if we have
+	 no clobber reg.  */
+
+      if (NULL_RTX == clobber_reg
+	  && single_one_operand (xval, QImode))
+	{
+	  xop[0] = xdest[n];
+	  xop[1] = GEN_INT (exact_log2 (ival[n] & GET_MODE_MASK (QImode)));
+
+	  gcc_assert (constm1_rtx != xop[1]);
+
+	  if (!set_p)
+	    {
+	      set_p = true;
+	      avr_asm_len ("set", xop, len, 1);
+	    }
+
+	  if (!clear_p)
+	    avr_asm_len ("clr %0", xop, len, 1);
+
+	  avr_asm_len ("bld %0,%1", xop, len, 1);
+	  continue;
+	}
+
+      /* We actually need the LD_REGS clobber reg.  */
+
+      gcc_assert (NULL_RTX != clobber_reg);
+
+      xop[0] = xdest[n];
+      xop[1] = xval;
+      xop[2] = clobber_reg;
+      clobber_val = ival[n];
+
+      avr_asm_len ("ldi %2,lo8(%1)" CR_TAB
+		   "mov %0,%2", xop, len, 2);
+    }
+
+  /* If we cooked up a clobber reg above, restore it.  */
+
+  if (cooked_clobber_p)
+    {
+      avr_asm_len ("mov %0,__tmp_reg__", &clobber_reg, len, 1);
+    }
+}
+
+
 const char *
 output_movqi (rtx_insn *insn, rtx operands[], int *plen)
 {
@@ -4951,7 +4254,7 @@ output_movhi (rtx_insn *insn, rtx xop[], int *plen)
 	} /* REG_P (src) */
       else if (CONSTANT_P (src))
 	{
-	  return output_reload_inhi (xop, NULL, plen);
+	  return output_reload_inhi (xop, NULL_RTX, plen);
 	}
       else if (MEM_P (src))
 	{
@@ -5000,11 +4303,11 @@ avr_out_movqi_r_mr_reg_disp_tiny (rtx_insn *insn, rtx op[], int *plen)
       rtx base2 = all_regs_rtx[1 ^ REGNO (dest)];
 
       if (!reg_unused_after (insn, base2))
-	avr_asm_len ("mov __tmp_reg__,%0" , &base2, plen, 1);
+	avr_asm_len ("mov __tmp_reg__,%0", &base2, plen, 1);
       avr_asm_len (TINY_ADIW (%I1, %J1, %o1) CR_TAB
 		   "ld %0,%b1", op, plen, 3);
       if (!reg_unused_after (insn, base2))
-	avr_asm_len ("mov %0,__tmp_reg__" , &base2, plen, 1);
+	avr_asm_len ("mov %0,__tmp_reg__", &base2, plen, 1);
     }
 
   return "";
@@ -5031,40 +4334,66 @@ out_movqi_r_mr (rtx_insn *insn, rtx op[], int *plen)
     {
       /* memory access by reg+disp */
 
-      int disp = INTVAL (XEXP (x, 1));
-
       if (AVR_TINY)
 	return avr_out_movqi_r_mr_reg_disp_tiny (insn, op, plen);
 
+      if (plen)
+	*plen = 0;
+
+      int disp = INTVAL (XEXP (x, 1));
+      rtx base = XEXP (x, 0);
+      rtx base2 = all_regs_rtx[1 ^ REGNO (dest)];
+      bool partial_clobber = (reg_overlap_mentioned_p (dest, base)
+			      && ! reg_unused_after (insn, base2));
+
       if (disp - GET_MODE_SIZE (GET_MODE (src)) >= 63)
 	{
+	  // PR117744: The base register overlaps dest and is
+	  // only partially clobbered.
+	  if (partial_clobber)
+	    avr_asm_len ("mov __tmp_reg__,%0", &base2, plen, 1);
+
 	  if (REGNO (XEXP (x, 0)) != REG_Y)
 	    fatal_insn ("incorrect insn:",insn);
 
 	  if (disp <= 63 + MAX_LD_OFFSET (GET_MODE (src)))
-	    return avr_asm_len ("adiw r28,%o1-63" CR_TAB
-				"ldd %0,Y+63"     CR_TAB
-				"sbiw r28,%o1-63", op, plen, -3);
+	    avr_asm_len ("adiw r28,%o1-63" CR_TAB
+			 "ldd %0,Y+63"     CR_TAB
+			 "sbiw r28,%o1-63", op, plen, 3);
+	  else
+	    avr_asm_len ("subi r28,lo8(-%o1)" CR_TAB
+			 "sbci r29,hi8(-%o1)" CR_TAB
+			 "ld %0,Y"            CR_TAB
+			 "subi r28,lo8(%o1)"  CR_TAB
+			 "sbci r29,hi8(%o1)", op, plen, 5);
 
-	  return avr_asm_len ("subi r28,lo8(-%o1)" CR_TAB
-			      "sbci r29,hi8(-%o1)" CR_TAB
-			      "ld %0,Y"            CR_TAB
-			      "subi r28,lo8(%o1)"  CR_TAB
-			      "sbci r29,hi8(%o1)", op, plen, -5);
+	  if (partial_clobber)
+	    avr_asm_len ("mov __tmp_reg__,%0", &base2, plen, 1);
+
+	  return "";
 	}
       else if (REGNO (XEXP (x, 0)) == REG_X)
 	{
 	  /* This is a paranoid case LEGITIMIZE_RELOAD_ADDRESS must exclude
 	     it but I have this situation with extremal optimizing options.  */
 
+	  // PR117744: The base register overlaps dest and is
+	  // only partially clobbered.
+	  bool clobber_r26 = (partial_clobber
+			      && REGNO (base) == (REGNO (base) & ~1));
+	  if (partial_clobber
+	      && ! clobber_r26)
+	    avr_asm_len ("mov __tmp_reg__,%0", &base2, plen, 1);
+
 	  avr_asm_len ("adiw r26,%o1" CR_TAB
-		       "ld %0,X", op, plen, -2);
+		       "ld %0,X", op, plen, 2);
 
-	  if (!reg_overlap_mentioned_p (dest, XEXP (x, 0))
-	      && !reg_unused_after (insn, XEXP (x, 0)))
-	    {
-	      avr_asm_len ("sbiw r26,%o1", op, plen, 1);
-	    }
+	  if (clobber_r26)
+	    avr_asm_len ("subi r26,lo8(%o1)", op, plen, 1);
+	  else if (partial_clobber)
+	    avr_asm_len ("mov %0,__tmp_reg__", &base2, plen, 1);
+	  else if (! reg_unused_after (insn, base))
+	    avr_asm_len ("sbiw r26,%o1", op, plen, 1);
 
 	  return "";
 	}
@@ -5588,7 +4917,7 @@ avr_out_movsi_mr_r_reg_no_disp_tiny (rtx_insn *insn, rtx op[], int *l)
       /* "ld r26,-X" is undefined */
       if (reg_unused_after (insn, base))
 	{
-	  return *l = 7, ("mov __tmp_reg__, %B1"  CR_TAB
+	  return *l = 7, ("mov __tmp_reg__,%B1"   CR_TAB
 			  "st %0,%A1"             CR_TAB
 			  TINY_ADIW (%E0, %F0, 1) CR_TAB
 			  "st %0+,__tmp_reg__"    CR_TAB
@@ -5597,7 +4926,7 @@ avr_out_movsi_mr_r_reg_no_disp_tiny (rtx_insn *insn, rtx op[], int *l)
 	}
       else
 	{
-	  return *l = 9, ("mov __tmp_reg__, %B1"  CR_TAB
+	  return *l = 9, ("mov __tmp_reg__,%B1"   CR_TAB
 			  "st %0,%A1"             CR_TAB
 			  TINY_ADIW (%E0, %F0, 1) CR_TAB
 			  "st %0+,__tmp_reg__"    CR_TAB
@@ -5695,7 +5024,7 @@ out_movsi_mr_r (rtx_insn *insn, rtx op[], int *l)
     {
       if (io_address_operand (base, SImode))
 	{
-	  return *l=4,("out %i0, %A1"  CR_TAB
+	  return *l=4,("out %i0,%A1"   CR_TAB
 		       "out %i0+1,%B1" CR_TAB
 		       "out %i0+2,%C1" CR_TAB
 		       "out %i0+3,%D1");
@@ -6606,7 +5935,7 @@ avr_out_movhi_mr_r_reg_no_disp_tiny (rtx_insn *insn, rtx op[], int *plen)
 		       TINY_ADIW (%E0, %F0, 1) CR_TAB
 		       "st %0,__tmp_reg__"     CR_TAB
 		       TINY_SBIW (%E0, %F0, 1) CR_TAB
-		       "st %0, %A1", op, plen, -7);
+		       "st %0,%A1", op, plen, -7);
     }
 
   return !mem_volatile_p && reg_unused_after (insn, base)
@@ -6786,176 +6115,149 @@ out_movhi_mr_r (rtx_insn *insn, rtx op[], int *plen)
 }
 
 
-/* During reload, we allow much more addresses than Reduced Tiny actually
-   supports.  Split them after reload in order to get closer to the
-   core's capabilities.  This sets the stage for pass .avr-fuse-add.  */
+/* Output code for the "set_some" insn that sets some QImode GPRs.
+   $0 is a parallel; for its layout see the description of the next function.
+   $0[5] ... $0[8] are SETs of QImode registers to const_int values.  All
+      of them are bytes in the register described by $3 and $4.  SET $0[5]
+      is mandatory, but all the following ones are optional.
+   $1 is a QImode scratch d-register or const0_rtx.
+   $2 is the known 8-bit value held in $1 before the insn starts.  When the
+      code below clobbers $1, then it must restore $1 to $2 at the end.
+   $3 The register number of a GPR.
+   $4 The modesize of $3 in 1...4.
+   PLEN == 0:  Output instructions.
+   PLEN != 0:  Set *PLEN to the length of the sequence in words.  */
 
-bool
-avr_split_tiny_move (rtx_insn * /*insn*/, rtx *xop)
+const char *
+avr_out_set_some (rtx_insn *insn, rtx *xop, int *plen)
 {
-  bool store_p = false;
-  rtx mem, reg_or_0;
-
-  if (REG_P (xop[0]) && MEM_P (xop[1]))
-    {
-      reg_or_0 = xop[0];
-      mem = xop[1];
-    }
-  else if (MEM_P (xop[0])
-	   && (REG_P (xop[1])
-	       || xop[1] == CONST0_RTX (GET_MODE (xop[0]))))
-    {
-      mem = xop[0];
-      reg_or_0 = xop[1];
-      store_p = true;
-    }
-  else
-    return false;
+  const int vlen = XVECLEN (xop[0], 0);
+  const int sets_start = 5;
+  gcc_assert (vlen > sets_start);
 
-  machine_mode mode = GET_MODE (mem);
-  rtx base, addr = XEXP (mem, 0);
-  enum rtx_code addr_code = GET_CODE (addr);
+  if (plen)
+    *plen = 0;
 
-  if (REG_P (reg_or_0)
-      && reg_overlap_mentioned_p (reg_or_0, addr))
-    return false;
-  else if (addr_code == PLUS || addr_code == PRE_DEC || addr_code == POST_INC)
-    base = XEXP (addr, 0);
-  else if (addr_code == REG)
-    base = addr;
-  else
-    return false;
+  rtx op[4];
+  rtx &dest = op[0], &src = op[1], &scratch = op[2], &oldval = op[3];
+  scratch = REG_P (xop[1]) ? xop[1] : NULL_RTX;
+  oldval = NULL_RTX;
 
-  if (REGNO (base) > REG_Z)
-    return false;
+  /* There are 3 ways to get a scratch, starting withe the most preferred ones:
+     1) avr_find_unused_d_reg() need not to be restored, and it takes care
+	of fixed regs.  This is an unlikely case, e.g. with -fno-peephole2.
+     2) "set_some" provides a scratch register with a known content.
+	This scratch need not be saved but has to be restored to its value.
+     3) A last resort approach saves and restores some upper register.
+     Notice that "set_some" will only be emit when avr-fuse-move is fed
+     with mov insn(s) that don't have a scratch reg but need one;
+     hence "set_some" won't have a scratch reg at its disposal, either.  */
 
-  if (! AVR_TINY
-      // Only keep base registers that can't do PLUS addressing.
-      && ((REGNO (base) != REG_X
-	   && ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (mem)))
-	  || avr_load_libgcc_p (mem)
-	  || avr_mem_memx_p (mem)))
-    return false;
+  bool knows_way_p = false;
 
-  bool volatile_p = MEM_VOLATILE_P (mem);
-  bool mem_volatile_p = false;
-  if (frame_pointer_needed
-      && REGNO (base) == FRAME_POINTER_REGNUM)
+  for (int i = sets_start; i < vlen; ++i)
     {
-      if (avr_fuse_add < 2
-	  // Be a projection (we always split PLUS).
-	  || (avr_fuse_add == 2 && volatile_p && addr_code != PLUS))
-	return false;
-
-      // Changing the frame pointer locally may confuse later passes
-      // like .dse2 which don't track changes of FP, not even when
-      // respective CFA notes are present.  An example is pr22141-1.c.
-      if (avr_fuse_add == 2)
-	mem_volatile_p = true;
-    }
+      rtx xset = XVECEXP (xop[0], 0, i);
 
-  enum rtx_code new_code = UNKNOWN;
-  HOST_WIDE_INT add = 0, sub = 0;
-  int msize = GET_MODE_SIZE (mode);
+      gcc_assert (GET_CODE (xset) == SET
+		  && REG_P (dest = XEXP (xset, 0))
+		  && CONST_INT_P (src = XEXP (xset, 1)));
 
-  AVR_LdSt_Props ap { REGNO (base), store_p, volatile_p, ADDR_SPACE_GENERIC };
-
-  switch (addr_code)
-    {
-    default:
-      return false;
-
-    case PLUS:
-      add = INTVAL (XEXP (addr, 1));
-      if (msize == 1)
-	{
-	  new_code = REG;
-	  sub = -add;
-	}
-      else if (ap.want_predec)
-	{
-	  // volatile stores prefer PRE_DEC (MSB first)
-	  sub = -add;
-	  add += msize;
-	  new_code = PRE_DEC;
-	}
+      if (src == const0_rtx)
+	avr_asm_len ("clr %0", op, plen, 1);
+      else if (src == const1_rtx)
+	avr_asm_len ("clr %0" CR_TAB
+		     "inc %0", op, plen, 2);
+      else if (src == constm1_rtx)
+	avr_asm_len ("clr %0" CR_TAB
+		     "dec %0", op, plen, 2);
       else
 	{
-	  new_code = POST_INC;
-	  sub = -add - msize;
-	}
-      break;
+	  if (! knows_way_p)
+	    {
+	      knows_way_p = true;
 
-    case POST_INC:
-      // volatile stores prefer PRE_DEC (MSB first)
-      if (msize > 1 && ap.want_predec)
-	{
-	  add = msize;
-	  new_code = PRE_DEC;
-	  sub = msize;
-	  break;
-	}
-      return false;
+	      static const machine_mode size_to_mode[4 + 1] =
+		{
+		  VOIDmode, QImode, HImode, PSImode, SImode
+		};
 
-    case PRE_DEC:
-      // volatile loads prefer POST_INC (LSB first)
-      if (msize > 1 && ap.want_postinc)
-	{
-	  add = -msize;
-	  new_code = POST_INC;
-	  sub = -msize;
-	  break;
-	}
-      return false;
+	      const int ex_regno = INTVAL (xop[3]);
+	      const int ex_modesize = INTVAL (xop[4]);
+	      rtx exclude = gen_rtx_REG (size_to_mode[ex_modesize], ex_regno);
+	      rtx dreg = avr_find_unused_d_reg (insn, exclude);
 
-    case REG:
-      if (msize == 1)
-	return false;
+	      if (dreg)
+		{
+		  // Way 1
+		  scratch = dreg;
+		  oldval = NULL_RTX;
+		}
+	      else if (scratch)
+		{
+		  // Way 2
+		  if (! reg_unused_after (insn, scratch))
+		    oldval = xop[2];
+		}
+	      else
+		{
+		  // Way 3
+		  scratch = all_regs_rtx[REG_24];
+		  oldval = tmp_reg_rtx;
+		  avr_asm_len ("mov %3,%2", op, plen, 1);
+		}
+	    } // decide about way
 
-      if (ap.want_predec)
-	{
-	  add = msize;
-	  new_code = PRE_DEC;
-	  sub = 0;
-	}
-      else
-	{
-	  add = 0;
-	  new_code = POST_INC;
-	  sub = -msize;
-	}
-      break;
-    } // switch addr_code
+	  avr_asm_len ("ldi %2,%1" CR_TAB
+		       "mov %0,%2", op, plen, 2);
+	} // needs a scratch
+    } // for $0[5] ... $0[8].
 
-  rtx_insn *insn;
+  if (oldval)
+    avr_asm_len (REG_P (oldval)
+		 ? "mov %2,%3"
+		 : "ldi %2,%3", op, plen, 1);
+  return "";
+}
 
-  if (add)
-    {
-      insn = emit_move_ccc (base, plus_constant (Pmode, base, add));
-      avr_maybe_adjust_cfa (insn, base, add);
-    }
 
-  rtx new_addr = new_code == REG
-    ? base
-    : gen_rtx_fmt_e (new_code, Pmode, base);
+/* Implements the `set_some_operation' predicate.
+   PARA is a parallel with the following elements:
+   [0] is a USE of an 8-bit scratch d-register or const0_rtx.
+   [1] is the known value held in [0].  When [0] is used as a scratch,
+       then its value has to be restored to [1] after the respective insn.
+   [2] is the regno of a GPR, and
+   [3] is the mode size of that GPR.  All SETs [5]... of PARA will set
+       bytes of that GPR, but in many cases not all of them.
+   [4]  In a clobber of REG_CC.
+   [5] [6] [7] [8]  SETs of an 8-bit register to a const_int value, where
+       all destinations are sub-bytes of [2].  Element [5] is mandatory,
+       and the following elements are optional.  */
 
-  rtx new_mem = change_address (mem, mode, new_addr);
-  if (mem_volatile_p)
-    MEM_VOLATILE_P (new_mem) = 1;
+bool
+avr_set_some_operation (rtx para)
+{
+  const int sets_start = 5;
+  const int n_sets = XVECLEN (para, 0) - sets_start;
 
-  insn = emit_move_ccc (store_p ? new_mem : reg_or_0,
-			store_p ? reg_or_0 : new_mem);
-  if (auto_inc_p (new_addr))
-    {
-      add_reg_note (insn, REG_INC, base);
-      int off = new_code == POST_INC ? msize : -msize;
-      avr_maybe_adjust_cfa (insn, base, off);
-    }
+  if (! IN_RANGE (n_sets, 1, 4))
+    return false;
 
-  if (sub)
+  if (GET_CODE (XVECEXP (para, 0, 4)) != CLOBBER
+      || GET_CODE (XVECEXP (para, 0, 0)) != USE
+      || GET_CODE (XVECEXP (para, 0, 1)) != USE
+      || GET_CODE (XVECEXP (para, 0, 2)) != USE
+      || GET_CODE (XVECEXP (para, 0, 3)) != USE)
+    return false;
+
+  for (int i = sets_start; i < XVECLEN (para, 0); ++i)
     {
-      insn = emit_move_ccc (base, plus_constant (Pmode, base, sub));
-      avr_maybe_adjust_cfa (insn, base, sub);
+      rtx xset = XVECEXP (para, 0, i);
+      if (! xset
+	  || GET_CODE (xset) != SET
+	  || ! register_operand (XEXP (xset, 0), QImode)
+	  || ! const_int_operand (XEXP (xset, 1), QImode))
+	return false;
     }
 
   return true;
@@ -6982,7 +6284,7 @@ avr_frame_pointer_required_p (void)
    For now, just look at the next insn, which misses some opportunities like
    following jumps.  */
 
-static RTX_CODE
+static rtx_code
 compare_condition (rtx_insn *insn)
 {
   rtx set;
@@ -7009,7 +6311,7 @@ compare_condition (rtx_insn *insn)
 static bool
 compare_sign_p (rtx_insn *insn)
 {
-  RTX_CODE cond = compare_condition (insn);
+  rtx_code cond = compare_condition (insn);
   return (cond == GE || cond == LT);
 }
 
@@ -7019,7 +6321,7 @@ compare_sign_p (rtx_insn *insn)
 static bool
 compare_eq_p (rtx_insn *insn)
 {
-  RTX_CODE cond = compare_condition (insn);
+  rtx_code cond = compare_condition (insn);
   return (cond == EQ || cond == NE);
 }
 
@@ -7032,7 +6334,7 @@ compare_eq_p (rtx_insn *insn)
 static void
 avr_canonicalize_comparison (int *icode, rtx *op0, rtx *op1, bool op0_fixed)
 {
-  enum rtx_code code = (enum rtx_code) *icode;
+  rtx_code code = (rtx_code) *icode;
   machine_mode mode = GET_MODE (*op0);
 
   bool signed_p = code == GT || code == LE;
@@ -7105,18 +6407,116 @@ avr_canonicalize_comparison (int *icode, rtx *op0, rtx *op1, bool op0_fixed)
     }
 }
 
-/* Implement TARGET_C_MODE_FOR_FLOATING_TYPE.  Return SFmode or DFmode
-   for TI_{LONG_,}DOUBLE_TYPE which is for {long,} double type, go with
-   the default one for the others.  */
 
-static machine_mode
-avr_c_mode_for_floating_type (enum tree_index ti)
+/* Try to turn a GEU or LTU comparison of register XOP[1] into an
+   NE / EQ comparison of the higher bytes of XOP[1] against 0.
+   XOP[1] has scalar int or scalar fixed-point mode of 2, 3 or 4 bytes.
+   XOP[2] is a compile-time constant, and XOP[0] = XOP[1] <comp> XOP[2]
+   is the comparison operator.  XOP[3] is the branch label, and XOP[4]
+   is a QImode scratch operand.
+      When XOP[1] (viewed as a CONST_INT) is an integral power of 256,
+   then a GTU or LTU comparison can be turned into a NE or EQ comparison
+   of the high bytes against zero.  For example, the C code
+
+	if (x >= 1)
+	  ccc = 0;
+
+   where x is an unsigned _Accum may be compiled as:
+
+	or r24,r25		 ;  *cmpsi_lsr
+	breq .L1		 ;  branch
+	sts ccc,__zero_reg__	 ;  movqi_insn
+     .L1:
+
+   In the case of success, the operands will be such that they comprise
+   a *cmp<mode>_lsr insn, where mode is HI, PSI or SI, and XOP[0] will be
+   a NE or EQ branch condition.  Otherwise, XOP[] is unchanged.  */
+
+void
+avr_maybe_cmp_lsr (rtx *xop)
 {
-  if (ti == TI_DOUBLE_TYPE)
-    return avr_double == 32 ? SFmode : DFmode;
-  if (ti == TI_LONG_DOUBLE_TYPE)
-    return avr_long_double == 32 ? SFmode : DFmode;
-  return default_mode_for_floating_type (ti);
+  rtx_code comp = GET_CODE (xop[0]);
+
+  if ((comp == GEU || comp == LTU)
+      && (CONST_INT_P (xop[2]) || CONST_FIXED_P (xop[2])))
+    {
+      rtx xreg = avr_to_int_mode (xop[1]);
+      rtx xval = avr_to_int_mode (xop[2]);
+      machine_mode imode = GET_MODE (xreg);
+      auto uval = UINTVAL (xval) & GET_MODE_MASK (imode);
+      int shift = exact_log2 (uval);
+
+      if (shift == 8 || shift == 16 || shift == 24)
+	{
+	  // Operands such that the compare becomes *cmp<mode>_lsr.
+	  xop[1] = gen_rtx_LSHIFTRT (imode, xreg, GEN_INT (shift));
+	  xop[2] = const0_rtx;
+	  xop[4] = gen_rtx_SCRATCH (QImode);
+	  // Branch condition.
+	  xop[0] = gen_rtx_fmt_ee (comp == GEU ? NE : EQ,
+				   VOIDmode, xop[1], xop[2]);
+	}
+    }
+}
+
+
+/* Output an EQ / NE compare of HI, PSI or SI register XOP[0] against 0,
+   where only the bits starting at XOP[1] are relevant.  XOP[1] is a
+   const_int that is 8, 16 or 24.  Return "".
+   PLEN == 0:  Output instructions.
+   PLEN != 0:  Set *PLEN to the length of the sequence in words.  */
+
+const char *
+avr_out_cmp_lsr (rtx_insn *insn, rtx *xop, int *plen)
+{
+  rtx xreg = xop[0];
+  const int n_bytes = GET_MODE_SIZE (GET_MODE (xreg));
+  const int shift = INTVAL (xop[1]);
+  const rtx_code cond = compare_condition (insn);
+
+  gcc_assert (shift == 8 || shift == 16 || shift == 24);
+  gcc_assert (shift < 8 * n_bytes);
+  gcc_assert (cond == UNKNOWN || cond == NE || cond == EQ);
+
+  const bool used_p = ! reg_unused_after (insn, xreg);
+
+  if (plen)
+    *plen = 0;
+
+  if (shift / 8 == n_bytes - 1)
+    {
+      rtx xmsb = avr_byte (xreg, n_bytes - 1);
+      avr_asm_len ("tst %0", &xmsb, plen, 1);
+    }
+  else if (n_bytes == 4
+	   && shift <= 16
+	   && AVR_HAVE_ADIW
+	   && REGNO (xreg) >= REG_22
+	   // The sequence also works when xreg is unused after,
+	   // but SBIW is slower than OR.
+	   && used_p)
+    {
+      avr_asm_len ("sbiw %C0,0", &xreg, plen, 1);
+      if (shift == 8)
+	avr_asm_len ("cpc %B0,__zero_reg__", &xreg, plen, 1);
+    }
+  else
+    {
+      rtx op[2] = { avr_byte (xreg, shift / 8), tmp_reg_rtx };
+      if (used_p)
+	{
+	  avr_asm_len ("mov %1,%0", op, plen, 1);
+	  op[0] = tmp_reg_rtx;
+	}
+
+      for (int i = 1 + shift / 8; i < n_bytes; ++i)
+	{
+	  op[1] = avr_byte (xreg, i);
+	  avr_asm_len ("or %0,%1", op, plen, 1);
+	}
+    }
+
+  return "";
 }
 
 
@@ -7153,9 +6553,6 @@ avr_out_compare (rtx_insn *insn, rtx *xop, int *plen)
       xval = avr_to_int_mode (xop[1]);
     }
 
-  /* MODE of the comparison.  */
-  machine_mode mode = GET_MODE (xreg);
-
   gcc_assert (REG_P (xreg));
   gcc_assert ((CONST_INT_P (xval) && n_bytes <= 4)
 	      || (const_double_operand (xval, VOIDmode) && n_bytes == 8));
@@ -7163,13 +6560,16 @@ avr_out_compare (rtx_insn *insn, rtx *xop, int *plen)
   if (plen)
     *plen = 0;
 
+  const rtx_code cond = compare_condition (insn);
+  const bool eqne_p = cond == EQ || cond == NE;
+
   /* Comparisons == +/-1 and != +/-1 can be done similar to camparing
      against 0 by ORing the bytes.  This is one instruction shorter.
      Notice that 64-bit comparisons are always against reg:ALL8 18 (ACC_A)
      and therefore don't use this.  */
 
-  if (!test_hard_reg_class (LD_REGS, xreg)
-      && compare_eq_p (insn)
+  if (eqne_p
+      && ! test_hard_reg_class (LD_REGS, xreg)
       && reg_unused_after (insn, xreg))
     {
       if (xval == const1_rtx)
@@ -7198,69 +6598,65 @@ avr_out_compare (rtx_insn *insn, rtx *xop, int *plen)
 	}
     }
 
-  /* Comparisons == -1 and != -1 of a d-register that's used after the
-     comparison.  (If it's unused after we use CPI / SBCI or ADIW sequence
-     from below.)  Instead of  CPI Rlo,-1 / LDI Rx,-1 / CPC Rhi,Rx  we can
-     use  CPI Rlo,-1 / CPC Rhi,Rlo  which is 1 instruction shorter:
-     If CPI is true then Rlo contains -1 and we can use Rlo instead of Rx
-     when CPC'ing the high part.  If CPI is false then CPC cannot render
-     the result to true.  This also works for the more generic case where
-     the constant is of the form 0xabab.  */
+  /* Comparisons == and != may change the order in which the sub-bytes are
+     being compared.  Start with the high 16 bits so we can use SBIW.  */
 
-  if (n_bytes == 2
-      && xval != const0_rtx
-      && test_hard_reg_class (LD_REGS, xreg)
-      && compare_eq_p (insn)
-      && !reg_unused_after (insn, xreg))
+  if (n_bytes == 4
+      && eqne_p
+      && AVR_HAVE_ADIW
+      && REGNO (xreg) >= REG_22
+      && (xval == const0_rtx
+	  || (IN_RANGE (avr_int16 (xval, 2), 0, 63)
+	      && reg_unused_after (insn, xreg))))
     {
-      rtx xlo8 = simplify_gen_subreg (QImode, xval, mode, 0);
-      rtx xhi8 = simplify_gen_subreg (QImode, xval, mode, 1);
+      xop[2] = avr_word (xval, 2);
+      return avr_asm_len ("sbiw %C0,%2"      CR_TAB
+			  "sbci %B0,hi8(%1)" CR_TAB
+			  "sbci %A0,lo8(%1)", xop, plen, 3);
+    }
 
-      if (INTVAL (xlo8) == INTVAL (xhi8))
-	{
-	  xop[0] = xreg;
-	  xop[1] = xlo8;
+  bool changed[8] = { 0, 0, 0, 0, 0, 0, 0, 0 };
 
-	  return avr_asm_len ("cpi %A0,%1"  CR_TAB
-			      "cpc %B0,%A0", xop, plen, 2);
-	}
-    }
+  /* The >= and < comparisons may skip the lower bytes when the according bytes
+     of the constant are all zeros.  In that case, the comparison may start
+     at a byte other than the LSB.  */
 
-  for (int i = 0; i < n_bytes; i++)
+  const int start = ((cond == GEU || cond == LTU || cond == GE || cond == LT)
+		     && INTVAL (xval) != 0)
+    ? ctz_hwi (INTVAL (xval)) / 8
+    : 0;
+
+  for (int i = start; i < n_bytes; i++)
     {
       /* We compare byte-wise.  */
-      rtx reg8 = simplify_gen_subreg (QImode, xreg, mode, i);
-      rtx xval8 = simplify_gen_subreg (QImode, xval, mode, i);
+      xop[0] = avr_byte (xreg, i);
+      xop[1] = avr_byte (xval, i);
 
       /* 8-bit value to compare with this byte.  */
-      unsigned int val8 = UINTVAL (xval8) & GET_MODE_MASK (QImode);
-
-      /* Registers R16..R31 can operate with immediate.  */
-      bool ld_reg_p = test_hard_reg_class (LD_REGS, reg8);
-
-      xop[0] = reg8;
-      xop[1] = gen_int_mode (val8, QImode);
+      unsigned int val8 = avr_uint8 (xval, i);
 
       /* Word registers >= R24 can use SBIW/ADIW with 0..63.  */
 
-      if (i == 0
-	  && avr_adiw_reg_p (reg8))
+      if (i == start
+	  && i % 2 == 0
+	  && n_bytes - start >= 2
+	  && avr_adiw_reg_p (xop[0]))
 	{
-	  int val16 = trunc_int_for_mode (INTVAL (xval), HImode);
+	  int val16 = avr_int16 (xval, i);
 
 	  if (IN_RANGE (val16, 0, 63)
 	      && (val8 == 0
 		  || reg_unused_after (insn, xreg)))
 	    {
 	      avr_asm_len ("sbiw %0,%1", xop, plen, 1);
-
+	      changed[i] = changed[i + 1] = val8 != 0;
 	      i++;
 	      continue;
 	    }
 
-	  if (n_bytes == 2
-	      && IN_RANGE (val16, -63, -1)
-	      && compare_eq_p (insn)
+	  if (IN_RANGE (val16, -63, -1)
+	      && eqne_p
+	      && n_bytes - start == 2
 	      && reg_unused_after (insn, xreg))
 	    {
 	      return avr_asm_len ("adiw %0,%n1", xop, plen, 1);
@@ -7271,7 +6667,7 @@ avr_out_compare (rtx_insn *insn, rtx *xop, int *plen)
 
       if (val8 == 0)
 	{
-	  avr_asm_len (i == 0
+	  avr_asm_len (i == start
 		       ? "cp %0,__zero_reg__"
 		       : "cpc %0,__zero_reg__", xop, plen, 1);
 	  continue;
@@ -7282,9 +6678,9 @@ avr_out_compare (rtx_insn *insn, rtx *xop, int *plen)
 	 instruction; the only difference is that comparisons don't write
 	 the result back to the target register.  */
 
-      if (ld_reg_p)
+      if (test_hard_reg_class (LD_REGS, xop[0]))
 	{
-	  if (i == 0)
+	  if (i == start)
 	    {
 	      avr_asm_len ("cpi %0,%1", xop, plen, 1);
 	      continue;
@@ -7292,10 +6688,37 @@ avr_out_compare (rtx_insn *insn, rtx *xop, int *plen)
 	  else if (reg_unused_after (insn, xreg))
 	    {
 	      avr_asm_len ("sbci %0,%1", xop, plen, 1);
+	      changed[i] = true;
 	      continue;
 	    }
 	}
 
+      /* When byte comparisons for an EQ or NE comparison look like
+	     compare (x[i], C)
+	     compare (x[j], C)
+	 then we can instead use
+	     compare (x[i], C)
+	     compare (x[j], x[i])
+	 which is shorter, and the outcome of the comparison is the same.  */
+
+      if (eqne_p)
+	{
+	  bool found = false;
+
+	  for (int j = start; j < i && ! found; ++j)
+	    if (val8 == avr_uint8 (xval, j)
+		// Make sure that we didn't clobber x[j] above.
+		&& ! changed[j])
+	      {
+		rtx op[] = { xop[0], avr_byte (xreg, j) };
+		avr_asm_len ("cpc %0,%1", op, plen, 1);
+		found = true;
+	      }
+
+	  if (found)
+	    continue;
+	}
+
       /* Must load the value into the scratch register.  */
 
       gcc_assert (REG_P (xop[2]));
@@ -7304,7 +6727,7 @@ avr_out_compare (rtx_insn *insn, rtx *xop, int *plen)
 	avr_asm_len ("ldi %2,%1", xop, plen, 1);
       clobber_val = (int) val8;
 
-      avr_asm_len (i == 0
+      avr_asm_len (i == start
 		   ? "cp %0,%2"
 		   : "cpc %0,%2", xop, plen, 1);
     }
@@ -7323,6 +6746,7 @@ avr_out_compare64 (rtx_insn *insn, rtx *op, int *plen)
   return avr_out_compare (insn, xop, plen);
 }
 
+
 /* Output test instruction for HImode.  */
 
 const char *
@@ -7405,7 +6829,7 @@ avr_out_tstsi (rtx_insn *insn, rtx *op, int *plen)
    PLEN == 0: Print instructions.  */
 
 const char *
-avr_out_cmp_ext (rtx xop[], enum rtx_code code, int *plen)
+avr_out_cmp_ext (rtx xop[], rtx_code code, int *plen)
 {
   // The smaller reg is the one that's to be extended.  Get its index as z.
   int z = GET_MODE_SIZE (GET_MODE (xop[1])) < GET_MODE_SIZE (GET_MODE (xop[0]));
@@ -7425,7 +6849,7 @@ avr_out_cmp_ext (rtx xop[], enum rtx_code code, int *plen)
     {
       // Sign-extend the high-byte of zreg to tmp_reg.
       int zmsb = GET_MODE_SIZE (zmode) - 1;
-      rtx xzmsb = simplify_gen_subreg (QImode, zreg, zmode, zmsb);
+      rtx xzmsb = avr_byte (zreg, zmsb);
 
       avr_asm_len ("mov __tmp_reg__,%0" CR_TAB
 		   "rol __tmp_reg__"    CR_TAB
@@ -7437,7 +6861,7 @@ avr_out_cmp_ext (rtx xop[], enum rtx_code code, int *plen)
       zex = zero_reg_rtx;
     }
   else
-    gcc_unreachable();
+    gcc_unreachable ();
 
   // Now output n_bytes bytes of the very comparison.
 
@@ -7448,10 +6872,8 @@ avr_out_cmp_ext (rtx xop[], enum rtx_code code, int *plen)
   for (int b = 1; b < n_bytes; ++b)
     {
       rtx regs[2];
-      regs[1 - z] = simplify_gen_subreg (QImode, reg, mode, b);
-      regs[z] = (b < GET_MODE_SIZE (zmode)
-		 ? simplify_gen_subreg (QImode, zreg, zmode, b)
-		 : zex);
+      regs[1 - z] = avr_byte (reg, b);
+      regs[z] = b < GET_MODE_SIZE (zmode) ? avr_byte (zreg, b) : zex;
 
       avr_asm_len ("cpc %0,%1", regs, plen, 1);
     }
@@ -7461,7 +6883,7 @@ avr_out_cmp_ext (rtx xop[], enum rtx_code code, int *plen)
 
 
 /* Generate asm equivalent for various shifts.  This only handles cases
-   that are not already carefully hand-optimized in ?sh??i3_out.
+   that are not already carefully hand-optimized in ?sh<mode>3_out.
 
    OPERANDS[0] resp. %0 in TEMPL is the operand to be shifted.
    OPERANDS[2] is the shift count as CONST_INT, MEM or REG.
@@ -7469,7 +6891,9 @@ avr_out_cmp_ext (rtx xop[], enum rtx_code code, int *plen)
                available and SCRATCH, otherwise (no scratch available)
 
    TEMPL is an assembler template that shifts by one position.
-   T_LEN is the length of this template.  */
+   T_LEN is the length of this template.
+   PLEN != 0: Set *PLEN to the length of the sequence in words.
+   PLEN == 0: Output instructions.  */
 
 void
 out_shift_with_cnt (const char *templ, rtx_insn *insn, rtx operands[],
@@ -7592,116 +7016,122 @@ out_shift_with_cnt (const char *templ, rtx_insn *insn, rtx operands[],
 /* 8bit shift left ((char)x << i)   */
 
 const char *
-ashlqi3_out (rtx_insn *insn, rtx operands[], int *len)
+ashlqi3_out (rtx_insn *insn, rtx operands[], int *plen)
 {
   if (CONST_INT_P (operands[2]))
     {
-      int k;
+      int reg0 = REGNO (operands[0]);
+      int reg1 = REGNO (operands[1]);
+      bool ldreg_p = test_hard_reg_class (LD_REGS, operands[0]);
+      int offs = INTVAL (operands[2]);
 
-      if (!len)
-	len = &k;
+      if (plen)
+	*plen = 0;
 
-      switch (INTVAL (operands[2]))
+      if (offs <= 3
+	  || (offs <= 5 && ! ldreg_p))
+	{
+	  for (int i = 0; i < offs; ++i)
+	    avr_asm_len ("lsl %0", operands, plen, 1);
+	  return "";
+	}
+
+      switch (offs)
 	{
 	default:
-	  if (INTVAL (operands[2]) < 8)
+	  if (offs < 8)
 	    break;
-
-	  *len = 1;
-	  return "clr %0";
+	  return avr_asm_len ("clr %0", operands, plen, 1);
 
 	case 1:
-	  *len = 1;
-	  return "lsl %0";
-
 	case 2:
-	  *len = 2;
-	  return ("lsl %0" CR_TAB
-		  "lsl %0");
-
 	case 3:
-	  *len = 3;
-	  return ("lsl %0" CR_TAB
-		  "lsl %0" CR_TAB
-		  "lsl %0");
+	  gcc_unreachable ();
 
 	case 4:
-	  if (test_hard_reg_class (LD_REGS, operands[0]))
-	    {
-	      *len = 2;
-	      return ("swap %0" CR_TAB
-		      "andi %0,0xf0");
-	    }
-	  *len = 4;
-	  return ("lsl %0" CR_TAB
-		  "lsl %0" CR_TAB
-		  "lsl %0" CR_TAB
-		  "lsl %0");
-
+	  return avr_asm_len ("swap %0" CR_TAB
+			      "andi %0,0xf0", operands, plen, 2);
 	case 5:
-	  if (test_hard_reg_class (LD_REGS, operands[0]))
-	    {
-	      *len = 3;
-	      return ("swap %0" CR_TAB
-		      "lsl %0"  CR_TAB
-		      "andi %0,0xe0");
-	    }
-	  *len = 5;
-	  return ("lsl %0" CR_TAB
-		  "lsl %0" CR_TAB
-		  "lsl %0" CR_TAB
-		  "lsl %0" CR_TAB
-		  "lsl %0");
-
+	  return avr_asm_len ("swap %0" CR_TAB
+			      "lsl %0"  CR_TAB
+			      "andi %0,0xe0", operands, plen, 3);
 	case 6:
-	  if (test_hard_reg_class (LD_REGS, operands[0]))
-	    {
-	      *len = 4;
-	      return ("swap %0" CR_TAB
-		      "lsl %0"  CR_TAB
-		      "lsl %0"  CR_TAB
-		      "andi %0,0xc0");
-	    }
-	  *len = 6;
-	  return ("lsl %0" CR_TAB
-		  "lsl %0" CR_TAB
-		  "lsl %0" CR_TAB
-		  "lsl %0" CR_TAB
-		  "lsl %0" CR_TAB
-		  "lsl %0");
-
+	  if (ldreg_p && reg0 == reg1)
+	    return avr_asm_len ("swap %0" CR_TAB
+				"lsl %0"  CR_TAB
+				"lsl %0"  CR_TAB
+				"andi %0,0xc0", operands, plen, 4);
+	  if (ldreg_p && reg0 != reg1 && AVR_HAVE_MUL)
+	    return avr_asm_len ("ldi %0,1<<6" CR_TAB
+				"mul %0,%1"   CR_TAB
+				"mov %0,r0"   CR_TAB
+				"clr __zero_reg__", operands, plen, 4);
+	  return reg0 != reg1
+	    ? avr_asm_len ("clr %0"    CR_TAB
+			   "bst %1,0"  CR_TAB
+			   "bld %0,6"  CR_TAB
+			   "bst %1,1"  CR_TAB
+			   "bld %0,7", operands, plen, 5)
+	    : avr_asm_len ("lsl %0"  CR_TAB
+			   "lsl %0"  CR_TAB
+			   "lsl %0"  CR_TAB
+			   "lsl %0"  CR_TAB
+			   "lsl %0"  CR_TAB
+			   "lsl %0", operands, plen, 6);
 	case 7:
-	  *len = 3;
-	  return ("ror %0" CR_TAB
-		  "clr %0" CR_TAB
-		  "ror %0");
+	  return avr_asm_len ("bst %1,0" CR_TAB
+			      "clr %0"   CR_TAB
+			      "bld %0,7", operands, plen, 3);
 	}
     }
   else if (CONSTANT_P (operands[2]))
     fatal_insn ("internal compiler error.  Incorrect shift:", insn);
 
   out_shift_with_cnt ("lsl %0",
-		      insn, operands, len, 1);
+		      insn, operands, plen, 1);
   return "";
 }
 
 
+/* Output a 16-bit left shift  XOP[0] = XOP[1] << XOP[2]  using MUL.
+   XOP[3] is an upper 8-bit scratch register.  This function is currently
+   only used for offsets 5 and 6 but works for offsets 1...7 as well.  */
+
+static const char*
+avr_out_ashlhi3_mul (rtx *xop, bool scratch_p, int *plen)
+{
+  gcc_assert (scratch_p && AVR_HAVE_MUL);
+
+  // Takes 7 words and 9 cycles.
+  return avr_asm_len ("ldi %3,1<<%2" CR_TAB
+		      "mul %B1,%3"   CR_TAB
+		      "mov %B0,r0"   CR_TAB
+		      "mul %A1,%3"   CR_TAB
+		      "mov %A0,r0"   CR_TAB
+		      "or  %B0,r1"   CR_TAB
+		      "clr __zero_reg__", xop, plen, -7);
+}
+
+
 /* 16bit shift left ((short)x << i)   */
 
 const char *
-ashlhi3_out (rtx_insn *insn, rtx operands[], int *len)
+ashlhi3_out (rtx_insn *insn, rtx operands[], int *plen)
 {
   if (CONST_INT_P (operands[2]))
     {
-      int scratch = (GET_CODE (PATTERN (insn)) == PARALLEL
-		     && XVECLEN (PATTERN (insn), 0) == 3
-		     && REG_P (operands[3]));
-      int ldi_ok = test_hard_reg_class (LD_REGS, operands[0]);
-      int k;
-      int *t = len;
+      bool scratch = (GET_CODE (PATTERN (insn)) == PARALLEL
+		      && XVECLEN (PATTERN (insn), 0) == 3
+		      && REG_P (operands[3]));
+      bool ldi_ok = test_hard_reg_class (LD_REGS, operands[0]);
+      bool reg1_unused_after = reg_unused_after (insn, operands[1]);
+      int size;
+      int reg0 = REGNO (operands[0]);
+      int reg1 = REGNO (operands[1]);
+      bool use_mul_p = reg1 != reg0 || (scratch && AVR_HAVE_MUL);
 
-      if (!len)
-	len = &k;
+      if (plen)
+	*plen = 0;
 
       switch (INTVAL (operands[2]))
 	{
@@ -7709,238 +7139,223 @@ ashlhi3_out (rtx_insn *insn, rtx operands[], int *len)
 	  if (INTVAL (operands[2]) < 16)
 	    break;
 
-	  *len = 2;
-	  return ("clr %B0" CR_TAB
-		  "clr %A0");
-
+	  return avr_asm_len ("clr %B0" CR_TAB
+			      "clr %A0", operands, plen, 2);
 	case 4:
-	  if (optimize_size && scratch)
+	  if (avr_optimize_size_max_p () && scratch)
 	    break;  /* 5 */
 	  if (ldi_ok)
-	    {
-	      *len = 6;
-	      return ("swap %A0"      CR_TAB
-		      "swap %B0"      CR_TAB
-		      "andi %B0,0xf0" CR_TAB
-		      "eor %B0,%A0"   CR_TAB
-		      "andi %A0,0xf0" CR_TAB
-		      "eor %B0,%A0");
-	    }
+	    return avr_asm_len ("swap %A0"      CR_TAB
+				"swap %B0"      CR_TAB
+				"andi %B0,0xf0" CR_TAB
+				"eor %B0,%A0"   CR_TAB
+				"andi %A0,0xf0" CR_TAB
+				"eor %B0,%A0", operands, plen, 6);
 	  if (scratch)
-	    {
-	      *len = 7;
-	      return ("swap %A0"    CR_TAB
-		      "swap %B0"    CR_TAB
-		      "ldi %3,0xf0" CR_TAB
-		      "and %B0,%3"  CR_TAB
-		      "eor %B0,%A0" CR_TAB
-		      "and %A0,%3"  CR_TAB
-		      "eor %B0,%A0");
-	    }
+	    return avr_asm_len ("swap %A0"    CR_TAB
+				"swap %B0"    CR_TAB
+				"ldi %3,0xf0" CR_TAB
+				"and %B0,%3"  CR_TAB
+				"eor %B0,%A0" CR_TAB
+				"and %A0,%3"  CR_TAB
+				"eor %B0,%A0", operands, plen, 7);
 	  break;  /* optimize_size ? 6 : 8 */
 
 	case 5:
+	  size = (scratch ? 5 : 6) + (reg1 != reg0) * (2 - AVR_HAVE_MOVW);
+	  if (avr_optimize_size_max_p () && (size < 7 || !use_mul_p))
+	    {
+	      if (reg0 != reg1)
+		{
+		  if (AVR_HAVE_MOVW)
+		    avr_asm_len ("movw %0,%1", operands, plen, 1);
+		  else
+		    avr_asm_len ("mov %A0,%A1" CR_TAB
+				 "mov %B0,%B1", operands, plen, 2);
+		}
+	      break;  // scratch ? 5 : 6
+	    }
+
+	  if (use_mul_p)
+	    return avr_out_ashlhi3_mul (operands, scratch, plen); // 7
+
 	  if (optimize_size)
 	    break;  /* scratch ? 5 : 6 */
 	  if (ldi_ok)
-	    {
-	      *len = 8;
-	      return ("lsl %A0"       CR_TAB
-		      "rol %B0"       CR_TAB
-		      "swap %A0"      CR_TAB
-		      "swap %B0"      CR_TAB
-		      "andi %B0,0xf0" CR_TAB
-		      "eor %B0,%A0"   CR_TAB
-		      "andi %A0,0xf0" CR_TAB
-		      "eor %B0,%A0");
-	    }
+	    return avr_asm_len ("lsl %A0"       CR_TAB
+				"rol %B0"       CR_TAB
+				"swap %A0"      CR_TAB
+				"swap %B0"      CR_TAB
+				"andi %B0,0xf0" CR_TAB
+				"eor %B0,%A0"   CR_TAB
+				"andi %A0,0xf0" CR_TAB
+				"eor %B0,%A0", operands, plen, 8);
 	  if (scratch)
-	    {
-	      *len = 9;
-	      return ("lsl %A0"     CR_TAB
-		      "rol %B0"     CR_TAB
-		      "swap %A0"    CR_TAB
-		      "swap %B0"    CR_TAB
-		      "ldi %3,0xf0" CR_TAB
-		      "and %B0,%3"  CR_TAB
-		      "eor %B0,%A0" CR_TAB
-		      "and %A0,%3"  CR_TAB
-		      "eor %B0,%A0");
-	    }
+	    return avr_asm_len ("lsl %A0"     CR_TAB
+				"rol %B0"     CR_TAB
+				"swap %A0"    CR_TAB
+				"swap %B0"    CR_TAB
+				"ldi %3,0xf0" CR_TAB
+				"and %B0,%3"  CR_TAB
+				"eor %B0,%A0" CR_TAB
+				"and %A0,%3"  CR_TAB
+				"eor %B0,%A0", operands, plen, 9);
 	  break;  /* 10 */
 
 	case 6:
+	  size = (scratch ? 5 : 6) + (reg1 != reg0) * (2 - AVR_HAVE_MOVW);
+	  if (avr_optimize_size_max_p () && (size < 7 || !use_mul_p))
+	    {
+	      if (reg0 != reg1)
+		{
+		  if (AVR_HAVE_MOVW)
+		    avr_asm_len ("movw %0,%1", operands, plen, 1);
+		  else
+		    avr_asm_len ("mov %A0,%A1" CR_TAB
+				 "mov %B0,%B1", operands, plen, 2);
+		}
+	      break;  // scratch ? 5 : 6
+	    }
+
+	  if (use_mul_p)
+	    return avr_out_ashlhi3_mul (operands, scratch, plen); // 7
+
 	  if (optimize_size)
 	    break;  /* scratch ? 5 : 6 */
-	  *len = 9;
-	  return ("clr __tmp_reg__" CR_TAB
-		  "lsr %B0"         CR_TAB
-		  "ror %A0"         CR_TAB
-		  "ror __tmp_reg__" CR_TAB
-		  "lsr %B0"         CR_TAB
-		  "ror %A0"         CR_TAB
-		  "ror __tmp_reg__" CR_TAB
-		  "mov %B0,%A0"     CR_TAB
-		  "mov %A0,__tmp_reg__");
-
+	  return avr_asm_len ("clr __tmp_reg__" CR_TAB
+			      "lsr %B0"         CR_TAB
+			      "ror %A0"         CR_TAB
+			      "ror __tmp_reg__" CR_TAB
+			      "lsr %B0"         CR_TAB
+			      "ror %A0"         CR_TAB
+			      "ror __tmp_reg__" CR_TAB
+			      "mov %B0,%A0"     CR_TAB
+			      "mov %A0,__tmp_reg__", operands, plen, 9);
 	case 7:
-	  *len = 5;
-	  return ("lsr %B0"     CR_TAB
-		  "mov %B0,%A0" CR_TAB
-		  "clr %A0"     CR_TAB
-		  "ror %B0"     CR_TAB
-		  "ror %A0");
-
+	  return reg1_unused_after
+	    ? avr_asm_len ("lsr %B1"     CR_TAB
+			   "mov %B0,%A1" CR_TAB
+			   "clr %A0"     CR_TAB
+			   "ror %B0"     CR_TAB
+			   "ror %A0", operands, plen, 5)
+	    : avr_asm_len ("bst %B1,0"   CR_TAB
+			   "mov %B0,%A1" CR_TAB
+			   "clr %A0"     CR_TAB
+			   "ror %B0"     CR_TAB
+			   "ror %A0"     CR_TAB
+			   "bld %B0,7", operands, plen, 6);
 	case 8:
-	  return *len = 2, ("mov %B0,%A1" CR_TAB
-			    "clr %A0");
-
+	  return avr_asm_len ("mov %B0,%A1" CR_TAB
+			      "clr %A0", operands, plen, 2);
 	case 9:
-	  *len = 3;
-	  return ("mov %B0,%A0" CR_TAB
-		  "clr %A0"     CR_TAB
-		  "lsl %B0");
-
+	  return avr_asm_len ("mov %B0,%A1" CR_TAB
+			      "clr %A0"     CR_TAB
+			      "lsl %B0", operands, plen, 3);
 	case 10:
-	  *len = 4;
-	  return ("mov %B0,%A0" CR_TAB
-		  "clr %A0"     CR_TAB
-		  "lsl %B0"     CR_TAB
-		  "lsl %B0");
-
+	  return avr_asm_len ("mov %B0,%A1" CR_TAB
+			      "clr %A0"     CR_TAB
+			      "lsl %B0"     CR_TAB
+			      "lsl %B0", operands, plen, 4);
 	case 11:
-	  *len = 5;
-	  return ("mov %B0,%A0" CR_TAB
-		  "clr %A0"     CR_TAB
-		  "lsl %B0"     CR_TAB
-		  "lsl %B0"     CR_TAB
-		  "lsl %B0");
-
+	  return avr_asm_len ("mov %B0,%A1" CR_TAB
+			      "clr %A0"     CR_TAB
+			      "lsl %B0"     CR_TAB
+			      "lsl %B0"     CR_TAB
+			      "lsl %B0", operands, plen, 5);
 	case 12:
 	  if (ldi_ok)
-	    {
-	      *len = 4;
-	      return ("mov %B0,%A0" CR_TAB
-		      "clr %A0"     CR_TAB
-		      "swap %B0"    CR_TAB
-		      "andi %B0,0xf0");
-	    }
+	    return avr_asm_len ("mov %B0,%A1" CR_TAB
+				"clr %A0"     CR_TAB
+				"swap %B0"    CR_TAB
+				"andi %B0,0xf0", operands, plen, 4);
 	  if (scratch)
-	    {
-	      *len = 5;
-	      return ("mov %B0,%A0" CR_TAB
-		      "clr %A0"     CR_TAB
-		      "swap %B0"    CR_TAB
-		      "ldi %3,0xf0" CR_TAB
-		      "and %B0,%3");
-	    }
-	  *len = 6;
-	  return ("mov %B0,%A0" CR_TAB
-		  "clr %A0"     CR_TAB
-		  "lsl %B0"     CR_TAB
-		  "lsl %B0"     CR_TAB
-		  "lsl %B0"     CR_TAB
-		  "lsl %B0");
-
+	    return avr_asm_len ("mov %B0,%A1" CR_TAB
+				"clr %A0"     CR_TAB
+				"swap %B0"    CR_TAB
+				"ldi %3,0xf0" CR_TAB
+				"and %B0,%3", operands, plen, 5);
+
+	  return avr_asm_len ("mov %B0,%A1" CR_TAB
+			      "clr %A0"     CR_TAB
+			      "lsl %B0"     CR_TAB
+			      "lsl %B0"     CR_TAB
+			      "lsl %B0"     CR_TAB
+			      "lsl %B0", operands, plen, 6);
 	case 13:
 	  if (ldi_ok)
-	    {
-	      *len = 5;
-	      return ("mov %B0,%A0" CR_TAB
-		      "clr %A0"     CR_TAB
-		      "swap %B0"    CR_TAB
-		      "lsl %B0"     CR_TAB
-		      "andi %B0,0xe0");
-	    }
+	    return avr_asm_len ("mov %B0,%A0" CR_TAB
+				"clr %A0"     CR_TAB
+				"swap %B0"    CR_TAB
+				"lsl %B0"     CR_TAB
+				"andi %B0,0xe0", operands, plen, 5);
 	  if (AVR_HAVE_MUL && scratch)
-	    {
-	      *len = 5;
-	      return ("ldi %3,0x20" CR_TAB
-		      "mul %A0,%3"  CR_TAB
-		      "mov %B0,r0"  CR_TAB
-		      "clr %A0"     CR_TAB
-		      "clr __zero_reg__");
-	    }
+	    return avr_asm_len ("ldi %3,0x20" CR_TAB
+				"mul %A0,%3"  CR_TAB
+				"mov %B0,r0"  CR_TAB
+				"clr %A0"     CR_TAB
+				"clr __zero_reg__", operands, plen, 5);
 	  if (optimize_size && scratch)
 	    break;  /* 5 */
+
 	  if (scratch)
-	    {
-	      *len = 6;
-	      return ("mov %B0,%A0" CR_TAB
-		      "clr %A0"     CR_TAB
-		      "swap %B0"    CR_TAB
-		      "lsl %B0"     CR_TAB
-		      "ldi %3,0xe0" CR_TAB
-		      "and %B0,%3");
-	    }
+	    return avr_asm_len ("mov %B0,%A0" CR_TAB
+				"clr %A0"     CR_TAB
+				"swap %B0"    CR_TAB
+				"lsl %B0"     CR_TAB
+				"ldi %3,0xe0" CR_TAB
+				"and %B0,%3", operands, plen, 6);
 	  if (AVR_HAVE_MUL)
-	    {
-	      *len = 6;
-	      return ("set"        CR_TAB
-		      "bld r1,5"   CR_TAB
-		      "mul %A0,r1" CR_TAB
-		      "mov %B0,r0" CR_TAB
-		      "clr %A0"    CR_TAB
-		      "clr __zero_reg__");
-	    }
-	  *len = 7;
-	  return ("mov %B0,%A0" CR_TAB
-		  "clr %A0"     CR_TAB
-		  "lsl %B0"     CR_TAB
-		  "lsl %B0"     CR_TAB
-		  "lsl %B0"     CR_TAB
-		  "lsl %B0"     CR_TAB
-		  "lsl %B0");
-
+	    return avr_asm_len ("set"        CR_TAB
+				"bld r1,5"   CR_TAB
+				"mul %A0,r1" CR_TAB
+				"mov %B0,r0" CR_TAB
+				"clr %A0"    CR_TAB
+				"clr __zero_reg__", operands, plen, 6);
+	  return avr_asm_len ("mov %B0,%A0" CR_TAB
+			      "clr %A0"     CR_TAB
+			      "lsl %B0"     CR_TAB
+			      "lsl %B0"     CR_TAB
+			      "lsl %B0"     CR_TAB
+			      "lsl %B0"     CR_TAB
+			      "lsl %B0", operands, plen, 7);
 	case 14:
 	  if (AVR_HAVE_MUL && ldi_ok)
-	    {
-	      *len = 5;
-	      return ("ldi %B0,0x40" CR_TAB
-		      "mul %A0,%B0"  CR_TAB
-		      "mov %B0,r0"   CR_TAB
-		      "clr %A0"      CR_TAB
-		      "clr __zero_reg__");
-	    }
+	    return avr_asm_len ("ldi %B0,0x40" CR_TAB
+				"mul %A0,%B0"  CR_TAB
+				"mov %B0,r0"   CR_TAB
+				"clr %A0"      CR_TAB
+				"clr __zero_reg__", operands, plen, 5);
 	  if (AVR_HAVE_MUL && scratch)
-	    {
-	      *len = 5;
-	      return ("ldi %3,0x40" CR_TAB
-		      "mul %A0,%3"  CR_TAB
-		      "mov %B0,r0"  CR_TAB
-		      "clr %A0"     CR_TAB
-		      "clr __zero_reg__");
-	    }
+	    return avr_asm_len ("ldi %3,0x40" CR_TAB
+				"mul %A0,%3"  CR_TAB
+				"mov %B0,r0"  CR_TAB
+				"clr %A0"     CR_TAB
+				"clr __zero_reg__", operands, plen, 5);
 	  if (optimize_size && ldi_ok)
-	    {
-	      *len = 5;
-	      return ("mov %B0,%A0" CR_TAB
-		      "ldi %A0,6" "\n1:\t"
-		      "lsl %B0"     CR_TAB
-		      "dec %A0"     CR_TAB
-		      "brne 1b");
-	    }
+	    return avr_asm_len ("mov %B0,%A0" CR_TAB
+				"ldi %A0,6" "\n1:\t"
+				"lsl %B0"     CR_TAB
+				"dec %A0"     CR_TAB
+				"brne 1b", operands, plen, 5);
 	  if (optimize_size && scratch)
 	    break;  /* 5 */
-	  *len = 6;
-	  return ("clr %B0" CR_TAB
-		  "lsr %A0" CR_TAB
-		  "ror %B0" CR_TAB
-		  "lsr %A0" CR_TAB
-		  "ror %B0" CR_TAB
-		  "clr %A0");
 
+	  return avr_asm_len ("clr %B0" CR_TAB
+			      "lsr %A0" CR_TAB
+			      "ror %B0" CR_TAB
+			      "lsr %A0" CR_TAB
+			      "ror %B0" CR_TAB
+			      "clr %A0", operands, plen, 6);
 	case 15:
-	  *len = 4;
-	  return ("clr %B0" CR_TAB
-		  "lsr %A0" CR_TAB
-		  "ror %B0" CR_TAB
-		  "clr %A0");
-	}
-      len = t;
+	  return avr_asm_len ("bst %A1,0" CR_TAB
+			      "clr %A0"   CR_TAB
+			      "clr %B0"   CR_TAB
+			      "bld %B0,7", operands, plen, 4);
+	} // switch
     }
+
   out_shift_with_cnt ("lsl %A0" CR_TAB
-		      "rol %B0", insn, operands, len, 2);
+		      "rol %B0", insn, operands, plen, 2);
   return "";
 }
 
@@ -7955,6 +7370,10 @@ avr_out_ashlpsi3 (rtx_insn *insn, rtx *op, int *plen)
 
   if (CONST_INT_P (op[2]))
     {
+      int reg0 = REGNO (op[0]);
+      int reg1 = REGNO (op[1]);
+      bool reg1_unused_after = reg_unused_after (insn, op[1]);
+
       switch (INTVAL (op[2]))
 	{
 	default:
@@ -7964,40 +7383,40 @@ avr_out_ashlpsi3 (rtx_insn *insn, rtx *op, int *plen)
 	  return avr_asm_len ("clr %A0" CR_TAB
 			      "clr %B0" CR_TAB
 			      "clr %C0", op, plen, 3);
-
 	case 8:
-	  {
-	    int reg0 = REGNO (op[0]);
-	    int reg1 = REGNO (op[1]);
-
-	    if (reg0 >= reg1)
-	      return avr_asm_len ("mov %C0,%B1"  CR_TAB
-				  "mov %B0,%A1"  CR_TAB
-				  "clr %A0", op, plen, 3);
-	    else
-	      return avr_asm_len ("clr %A0"      CR_TAB
-				  "mov %B0,%A1"  CR_TAB
-				  "mov %C0,%B1", op, plen, 3);
-	  }
+	  return reg0 >= reg1
+	    ? avr_asm_len ("mov %C0,%B1"  CR_TAB
+			   "mov %B0,%A1"  CR_TAB
+			   "clr %A0", op, plen, 3)
+	    : avr_asm_len ("clr %A0"      CR_TAB
+			   "mov %B0,%A1"  CR_TAB
+			   "mov %C0,%B1", op, plen, 3);
+	case 15:
+	  avr_asm_len (reg1_unused_after
+		       ? "lsr %B1"
+		       : "bst %B1,0", op, plen, 1);
+	  if (reg0 + 2 != reg1)
+	    avr_asm_len ("mov %C0,%A1", op, plen, 1);
+	  avr_asm_len ("clr %A0"  CR_TAB
+		       "clr %B0"  CR_TAB
+		       "ror %C0"  CR_TAB
+		       "ror %B0", op, plen, 5);
+	  return reg1_unused_after
+	    ? ""
+	    : avr_asm_len ("bld %C0,7", op, plen, 1);
 
 	case 16:
-	  {
-	    int reg0 = REGNO (op[0]);
-	    int reg1 = REGNO (op[1]);
-
-	    if (reg0 + 2 != reg1)
-	      avr_asm_len ("mov %C0,%A0", op, plen, 1);
-
-	    return avr_asm_len ("clr %B0"  CR_TAB
-				"clr %A0", op, plen, 2);
-	  }
+	  if (reg0 + 2 != reg1)
+	    avr_asm_len ("mov %C0,%A1", op, plen, 1);
 
+	  return avr_asm_len ("clr %B0"  CR_TAB
+			      "clr %A0", op, plen, 2);
 	case 23:
-	  return avr_asm_len ("clr %C0" CR_TAB
-			      "lsr %A0" CR_TAB
-			      "ror %C0" CR_TAB
-			      "clr %B0" CR_TAB
-			      "clr %A0", op, plen, 5);
+	  return avr_asm_len ("bst %A1,0" CR_TAB
+			      "clr %A0"   CR_TAB
+			      "clr %B0"   CR_TAB
+			      "clr %C0"   CR_TAB
+			      "bld %C0,7", op, plen, 5);
 	}
     }
 
@@ -8011,160 +7430,202 @@ avr_out_ashlpsi3 (rtx_insn *insn, rtx *op, int *plen)
 /* 32bit shift left ((long)x << i)   */
 
 const char *
-ashlsi3_out (rtx_insn *insn, rtx operands[], int *len)
+ashlsi3_out (rtx_insn *insn, rtx operands[], int *plen)
 {
   if (CONST_INT_P (operands[2]))
     {
-      int k;
-      int *t = len;
-
-      if (!len)
-	len = &k;
+      int off = INTVAL (operands[2]);
+      int reg0 = true_regnum (operands[0]);
+      int reg1 = true_regnum (operands[1]);
+      bool reg1_unused_after = reg_unused_after (insn, operands[1]);
+      bool scratch_p = (GET_CODE (PATTERN (insn)) == PARALLEL
+			&& XVECLEN (PATTERN (insn), 0) == 3
+			&& REG_P (operands[3]));
+      if (plen)
+	*plen = 0;
 
-      switch (INTVAL (operands[2]))
+      switch (off)
 	{
 	default:
-	  if (INTVAL (operands[2]) < 32)
+	  if (off < 32)
 	    break;
 
-	  if (AVR_HAVE_MOVW)
-	    return *len = 3, ("clr %D0" CR_TAB
-			      "clr %C0" CR_TAB
-			      "movw %A0,%C0");
-	  *len = 4;
-	  return ("clr %D0" CR_TAB
-		  "clr %C0" CR_TAB
-		  "clr %B0" CR_TAB
-		  "clr %A0");
-
+	  return AVR_HAVE_MOVW
+	    ? avr_asm_len ("clr %D0" CR_TAB
+			   "clr %C0" CR_TAB
+			   "movw %A0,%C0", operands, plen, 3)
+	    : avr_asm_len ("clr %D0" CR_TAB
+			   "clr %C0" CR_TAB
+			   "clr %B0" CR_TAB
+			   "clr %A0", operands, plen, 4);
 	case 8:
-	  {
-	    int reg0 = true_regnum (operands[0]);
-	    int reg1 = true_regnum (operands[1]);
-	    *len = 4;
-	    if (reg0 >= reg1)
-	      return ("mov %D0,%C1"  CR_TAB
-		      "mov %C0,%B1"  CR_TAB
-		      "mov %B0,%A1"  CR_TAB
-		      "clr %A0");
-	    else
-	      return ("clr %A0"      CR_TAB
-		      "mov %B0,%A1"  CR_TAB
-		      "mov %C0,%B1"  CR_TAB
-		      "mov %D0,%C1");
-	  }
+	  return reg0 >= reg1
+	    ? avr_asm_len ("mov %D0,%C1"  CR_TAB
+			   "mov %C0,%B1"  CR_TAB
+			   "mov %B0,%A1"  CR_TAB
+			   "clr %A0", operands, plen, 4)
+	    : avr_asm_len ("clr %A0"      CR_TAB
+			   "mov %B0,%A1"  CR_TAB
+			   "mov %C0,%B1"  CR_TAB
+			   "mov %D0,%C1", operands, plen, 4);
+	case 15:
+	  avr_asm_len (reg1_unused_after
+		       ? "lsr %C1"
+		       : "bst %C1,0", operands, plen, 1);
+	  if (reg0 + 2 != reg1)
+	    {
+	      if (AVR_HAVE_MOVW)
+		avr_asm_len ("movw %C0,%A1", operands, plen, 1);
+	      else
+		avr_asm_len ("mov %C0,%A1"  CR_TAB
+			     "mov %D0,%B1", operands, plen, 2);
+	    }
+	  avr_asm_len ("clr %A0"  CR_TAB
+		       "clr %B0"  CR_TAB
+		       "ror %D0"  CR_TAB
+		       "ror %C0"  CR_TAB
+		       "ror %B0", operands, plen, 5);
+	  return reg1_unused_after
+	    ? ""
+	    : avr_asm_len ("bld %D0,7", operands, plen, 1);
 
 	case 16:
+	  if (reg0 + 2 == reg1)
+	    return avr_asm_len ("clr %B0"  CR_TAB
+				"clr %A0", operands, plen, 2);
+	  return AVR_HAVE_MOVW
+	    ? avr_asm_len ("movw %C0,%A1" CR_TAB
+			   "clr %B0"      CR_TAB
+			   "clr %A0", operands, plen, 3)
+	    : avr_asm_len ("mov %C0,%A1"  CR_TAB
+			   "mov %D0,%B1"  CR_TAB
+			   "clr %B0"      CR_TAB
+			   "clr %A0", operands, plen, 4);
+	case 30:
+	  if (AVR_HAVE_MUL && scratch_p)
+	    return avr_asm_len ("ldi %3,1<<6"       CR_TAB
+				"mul %3,%A1"        CR_TAB
+				"mov %D0,r0"        CR_TAB
+				"clr __zero_reg__"  CR_TAB
+				"clr %C0"           CR_TAB
+				"clr %B0"           CR_TAB
+				"clr %A0", operands, plen, 7);
+	  // Fallthrough
+
+	case 28:
+	case 29:
 	  {
-	    int reg0 = true_regnum (operands[0]);
-	    int reg1 = true_regnum (operands[1]);
-	    if (reg0 + 2 == reg1)
-	      return *len = 2, ("clr %B0"      CR_TAB
-				"clr %A0");
-	    if (AVR_HAVE_MOVW)
-	      return *len = 3, ("movw %C0,%A1" CR_TAB
-				"clr %B0"      CR_TAB
-				"clr %A0");
-	    else
-	      return *len = 4, ("mov %C0,%A1"  CR_TAB
-				"mov %D0,%B1"  CR_TAB
-				"clr %B0"      CR_TAB
-				"clr %A0");
+	    const bool ld_reg0_p = avr_ld_regno_p (reg0 + 3); // %D0
+	    const bool ld_reg1_p = avr_ld_regno_p (reg1 + 0); // %A1
+	    if (ld_reg0_p
+		|| (ld_reg1_p && reg1_unused_after)
+		|| scratch_p)
+	      {
+		if (ld_reg0_p)
+		  avr_asm_len ("mov %D0,%A1"    CR_TAB
+			       "swap %D0"       CR_TAB
+			       "andi %D0,0xf0", operands, plen, 3);
+		else if (ld_reg1_p && reg1_unused_after)
+		  avr_asm_len ("swap %A1"       CR_TAB
+			       "andi %A1,0xf0"  CR_TAB
+			       "mov %D0,%A1", operands, plen, 3);
+		else
+		  avr_asm_len ("mov %D0,%A1"    CR_TAB
+			       "swap %D0"       CR_TAB
+			       "ldi %3,0xf0"    CR_TAB
+			       "and %D0,%3", operands, plen, 4);
+		for (int i = 28; i < off; ++i)
+		  avr_asm_len ("lsl %D0", operands, plen, 1);
+		return avr_asm_len ("clr %C0"  CR_TAB
+				    "clr %B0"  CR_TAB
+				    "clr %A0", operands, plen, 3);
+	      }
 	  }
+	  // Fallthrough
 
 	case 24:
-	  *len = 4;
-	  return ("mov %D0,%A1"  CR_TAB
-		  "clr %C0"      CR_TAB
-		  "clr %B0"      CR_TAB
-		  "clr %A0");
-
+	case 25:
+	case 26:
+	case 27:
+	  avr_asm_len ("mov %D0,%A1", operands, plen, 1);
+	  for (int i = 24; i < off; ++i)
+	    avr_asm_len ("lsl %D0", operands, plen, 1);
+	  return avr_asm_len ("clr %C0"      CR_TAB
+			      "clr %B0"      CR_TAB
+			      "clr %A0", operands, plen, 3);
 	case 31:
-	  *len = 6;
-	  return ("clr %D0" CR_TAB
-		  "lsr %A0" CR_TAB
-		  "ror %D0" CR_TAB
-		  "clr %C0" CR_TAB
-		  "clr %B0" CR_TAB
-		  "clr %A0");
+	  return AVR_HAVE_MOVW
+	    ? avr_asm_len ("bst %A1,0"    CR_TAB
+			   "clr %A0"      CR_TAB
+			   "clr %B0"      CR_TAB
+			   "movw %C0,%A0" CR_TAB
+			   "bld %D0,7", operands, plen, 5)
+	    : avr_asm_len ("bst %A1,0" CR_TAB
+			   "clr %A0"   CR_TAB
+			   "clr %B0"   CR_TAB
+			   "clr %D0"   CR_TAB
+			   "clr %C0"   CR_TAB
+			   "bld %D0,7", operands, plen, 6);
 	}
-      len = t;
     }
+
   out_shift_with_cnt ("lsl %A0" CR_TAB
 		      "rol %B0" CR_TAB
 		      "rol %C0" CR_TAB
-		      "rol %D0", insn, operands, len, 4);
+		      "rol %D0", insn, operands, plen, 4);
   return "";
 }
 
-/* 8bit arithmetic shift right  ((signed char)x >> i) */
+
+/* 8-bit arithmetic shift right  (int8_t) x >> i.  */
 
 const char *
-ashrqi3_out (rtx_insn *insn, rtx operands[], int *len)
+ashrqi3_out (rtx_insn *insn, rtx operands[], int *plen)
 {
   if (CONST_INT_P (operands[2]))
     {
-      int k;
+      if (plen)
+	*plen = 0;
 
-      if (!len)
-	len = &k;
+      const int offs = INTVAL (operands[2]);
+      bool reg1_unused_after = reg_unused_after (insn, operands[1]);
 
-      switch (INTVAL (operands[2]))
+      if (IN_RANGE (offs, 0, 5))
 	{
-	case 1:
-	  *len = 1;
-	  return "asr %0";
-
-	case 2:
-	  *len = 2;
-	  return ("asr %0" CR_TAB
-		  "asr %0");
-
-	case 3:
-	  *len = 3;
-	  return ("asr %0" CR_TAB
-		  "asr %0" CR_TAB
-		  "asr %0");
-
-	case 4:
-	  *len = 4;
-	  return ("asr %0" CR_TAB
-		  "asr %0" CR_TAB
-		  "asr %0" CR_TAB
-		  "asr %0");
-
-	case 5:
-	  *len = 5;
-	  return ("asr %0" CR_TAB
-		  "asr %0" CR_TAB
-		  "asr %0" CR_TAB
-		  "asr %0" CR_TAB
-		  "asr %0");
-
-	case 6:
-	  *len = 4;
-	  return ("bst %0,6"  CR_TAB
-		  "lsl %0"    CR_TAB
-		  "sbc %0,%0" CR_TAB
-		  "bld %0,0");
-
-	default:
-	  if (INTVAL (operands[2]) < 8)
-	    break;
-
-	  /* fall through */
-
-	case 7:
-	  *len = 2;
-	  return ("lsl %0" CR_TAB
-		  "sbc %0,%0");
+	  for (int i = 0; i < offs; ++i)
+	    avr_asm_len ("asr %0", operands, plen, 1);
+	  return "";
+	}
+      else if (offs == 6)
+	{
+	  return reg1_unused_after
+	    ? avr_asm_len ("bst %1,6"  CR_TAB
+			   "lsl %1"    CR_TAB
+			   "sbc %0,%0" CR_TAB
+			   "bld %0,0", operands, plen, 4)
+	    : avr_asm_len ("mov %0,%1" CR_TAB
+			   "bst %0,6"  CR_TAB
+			   "lsl %0"    CR_TAB
+			   "sbc %0,%0" CR_TAB
+			   "bld %0,0", operands, plen, 5);
+	}
+      else if (offs >= 7)
+	{
+	  rtx xop[2] = { operands[0], operands[1] };
+	  if (! reg_unused_after (insn, xop[1]))
+	    {
+	      avr_asm_len ("mov %0,%1", xop, plen, 1);
+	      xop[1] = xop[0];
+	    }
+	  return avr_asm_len ("lsl %1" CR_TAB
+			      "sbc %0,%0", xop, plen, 2);
 	}
     }
   else if (CONSTANT_P (operands[2]))
     fatal_insn ("internal compiler error.  Incorrect shift:", insn);
 
   out_shift_with_cnt ("asr %0",
-		      insn, operands, len, 1);
+		      insn, operands, plen, 1);
   return "";
 }
 
@@ -8172,19 +7633,18 @@ ashrqi3_out (rtx_insn *insn, rtx operands[], int *len)
 /* 16bit arithmetic shift right  ((signed short)x >> i) */
 
 const char *
-ashrhi3_out (rtx_insn *insn, rtx operands[], int *len)
+ashrhi3_out (rtx_insn *insn, rtx operands[], int *plen)
 {
   if (CONST_INT_P (operands[2]))
     {
-      int scratch = (GET_CODE (PATTERN (insn)) == PARALLEL
-		     && XVECLEN (PATTERN (insn), 0) == 3
-		     && REG_P (operands[3]));
-      int ldi_ok = test_hard_reg_class (LD_REGS, operands[0]);
-      int k;
-      int *t = len;
+      bool scratch = (GET_CODE (PATTERN (insn)) == PARALLEL
+		      && XVECLEN (PATTERN (insn), 0) == 3
+		      && REG_P (operands[3]));
+      bool ldi_ok = test_hard_reg_class (LD_REGS, operands[0]);
+      bool reg1_unused_after = reg_unused_after (insn, operands[1]);
 
-      if (!len)
-	len = &k;
+      if (plen)
+	*plen = 0;
 
       switch (INTVAL (operands[2]))
 	{
@@ -8196,125 +7656,112 @@ ashrhi3_out (rtx_insn *insn, rtx operands[], int *len)
 	case 6:
 	  if (optimize_size)
 	    break;  /* scratch ? 5 : 6 */
-	  *len = 8;
-	  return ("mov __tmp_reg__,%A0" CR_TAB
-		  "mov %A0,%B0"         CR_TAB
-		  "lsl __tmp_reg__"     CR_TAB
-		  "rol %A0"             CR_TAB
-		  "sbc %B0,%B0"         CR_TAB
-		  "lsl __tmp_reg__"     CR_TAB
-		  "rol %A0"             CR_TAB
-		  "rol %B0");
-
+	  return avr_asm_len ("mov __tmp_reg__,%A0" CR_TAB
+			      "mov %A0,%B0"         CR_TAB
+			      "lsl __tmp_reg__"     CR_TAB
+			      "rol %A0"             CR_TAB
+			      "sbc %B0,%B0"         CR_TAB
+			      "lsl __tmp_reg__"     CR_TAB
+			      "rol %A0"             CR_TAB
+			      "rol %B0", operands, plen, 8);
 	case 7:
-	  *len = 4;
-	  return ("lsl %A0"     CR_TAB
-		  "mov %A0,%B0" CR_TAB
-		  "rol %A0"     CR_TAB
-		  "sbc %B0,%B0");
-
+	  return reg1_unused_after
+	    ? avr_asm_len ("lsl %A1"     CR_TAB
+			   "mov %A0,%B1" CR_TAB
+			   "rol %A0"     CR_TAB
+			   "sbc %B0,%B0", operands, plen, 4)
+	    : avr_asm_len ("mov %A0,%A1"     CR_TAB
+			   "lsl %A0"     CR_TAB
+			   "mov %A0,%B1" CR_TAB
+			   "rol %A0"     CR_TAB
+			   "sbc %B0,%B0", operands, plen, 5);
 	case 8:
 	  {
 	    int reg0 = true_regnum (operands[0]);
 	    int reg1 = true_regnum (operands[1]);
 
-	    if (reg0 == reg1)
-	      return *len = 3, ("mov %A0,%B0" CR_TAB
-				"lsl %B0"     CR_TAB
-				"sbc %B0,%B0");
-	    else
-	      return *len = 4, ("mov %A0,%B1" CR_TAB
-				"clr %B0"     CR_TAB
-				"sbrc %A0,7"  CR_TAB
-				"dec %B0");
+	    return reg0 == reg1
+	      ? avr_asm_len ("mov %A0,%B0" CR_TAB
+			     "lsl %B0"     CR_TAB
+			     "sbc %B0,%B0", operands, plen, 3)
+	      : avr_asm_len ("mov %A0,%B1" CR_TAB
+			     "clr %B0"     CR_TAB
+			     "sbrc %A0,7"  CR_TAB
+			     "dec %B0", operands, plen, 4);
 	  }
 
 	case 9:
-	  *len = 4;
-	  return ("mov %A0,%B0" CR_TAB
-		  "lsl %B0"      CR_TAB
-		  "sbc %B0,%B0" CR_TAB
-		  "asr %A0");
-
+	  return avr_asm_len ("mov %A0,%B0" CR_TAB
+			      "lsl %B0"      CR_TAB
+			      "sbc %B0,%B0" CR_TAB
+			      "asr %A0", operands, plen, 4);
 	case 10:
-	  *len = 5;
-	  return ("mov %A0,%B0" CR_TAB
-		  "lsl %B0"     CR_TAB
-		  "sbc %B0,%B0" CR_TAB
-		  "asr %A0"     CR_TAB
-		  "asr %A0");
-
+	  return avr_asm_len ("mov %A0,%B0" CR_TAB
+			      "lsl %B0"     CR_TAB
+			      "sbc %B0,%B0" CR_TAB
+			      "asr %A0"     CR_TAB
+			      "asr %A0", operands, plen, 5);
 	case 11:
 	  if (AVR_HAVE_MUL && ldi_ok)
-	    {
-	      *len = 5;
-	      return ("ldi %A0,0x20" CR_TAB
-		      "muls %B0,%A0" CR_TAB
-		      "mov %A0,r1"   CR_TAB
-		      "sbc %B0,%B0"  CR_TAB
-		      "clr __zero_reg__");
-	    }
+	    return avr_asm_len ("ldi %A0,0x20" CR_TAB
+				"muls %B0,%A0" CR_TAB
+				"mov %A0,r1"   CR_TAB
+				"sbc %B0,%B0"  CR_TAB
+				"clr __zero_reg__", operands, plen, 5);
 	  if (optimize_size && scratch)
 	    break;  /* 5 */
-	  *len = 6;
-	  return ("mov %A0,%B0" CR_TAB
-		  "lsl %B0"     CR_TAB
-		  "sbc %B0,%B0" CR_TAB
-		  "asr %A0"     CR_TAB
-		  "asr %A0"     CR_TAB
-		  "asr %A0");
-
+	  return avr_asm_len ("mov %A0,%B0" CR_TAB
+			      "lsl %B0"     CR_TAB
+			      "sbc %B0,%B0" CR_TAB
+			      "asr %A0"     CR_TAB
+			      "asr %A0"     CR_TAB
+			      "asr %A0", operands, plen, 6);
 	case 12:
 	  if (AVR_HAVE_MUL && ldi_ok)
-	    {
-	      *len = 5;
-	      return ("ldi %A0,0x10" CR_TAB
-		      "muls %B0,%A0" CR_TAB
-		      "mov %A0,r1"   CR_TAB
-		      "sbc %B0,%B0"  CR_TAB
-		      "clr __zero_reg__");
-	    }
+	    return avr_asm_len ("ldi %A0,0x10" CR_TAB
+				"muls %B0,%A0" CR_TAB
+				"mov %A0,r1"   CR_TAB
+				"sbc %B0,%B0"  CR_TAB
+				"clr __zero_reg__", operands, plen, 5);
 	  if (optimize_size && scratch)
 	    break;  /* 5 */
-	  *len = 7;
-	  return ("mov %A0,%B0" CR_TAB
-		  "lsl %B0"     CR_TAB
-		  "sbc %B0,%B0" CR_TAB
-		  "asr %A0"     CR_TAB
-		  "asr %A0"     CR_TAB
-		  "asr %A0"     CR_TAB
-		  "asr %A0");
-
+	  return avr_asm_len ("mov %A0,%B0" CR_TAB
+			      "lsl %B0"     CR_TAB
+			      "sbc %B0,%B0" CR_TAB
+			      "asr %A0"     CR_TAB
+			      "asr %A0"     CR_TAB
+			      "asr %A0"     CR_TAB
+			      "asr %A0", operands, plen, 7);
 	case 13:
 	  if (AVR_HAVE_MUL && ldi_ok)
-	    {
-	      *len = 5;
-	      return ("ldi %A0,0x08" CR_TAB
-		      "muls %B0,%A0" CR_TAB
-		      "mov %A0,r1"   CR_TAB
-		      "sbc %B0,%B0"  CR_TAB
-		      "clr __zero_reg__");
-	    }
+	    return avr_asm_len ("ldi %A0,0x08" CR_TAB
+				"muls %B0,%A0" CR_TAB
+				"mov %A0,r1"   CR_TAB
+				"sbc %B0,%B0"  CR_TAB
+				"clr __zero_reg__", operands, plen, 5);
 	  if (optimize_size)
 	    break;  /* scratch ? 5 : 7 */
-	  *len = 8;
-	  return ("mov %A0,%B0" CR_TAB
-		  "lsl %B0"     CR_TAB
-		  "sbc %B0,%B0" CR_TAB
-		  "asr %A0"     CR_TAB
-		  "asr %A0"     CR_TAB
-		  "asr %A0"     CR_TAB
-		  "asr %A0"     CR_TAB
-		  "asr %A0");
-
+	  return avr_asm_len ("mov %A0,%B0" CR_TAB
+			      "lsl %B0"     CR_TAB
+			      "sbc %B0,%B0" CR_TAB
+			      "asr %A0"     CR_TAB
+			      "asr %A0"     CR_TAB
+			      "asr %A0"     CR_TAB
+			      "asr %A0"     CR_TAB
+			      "asr %A0", operands, plen, 8);
 	case 14:
-	  *len = 5;
-	  return ("lsl %B0"     CR_TAB
-		  "sbc %A0,%A0" CR_TAB
-		  "lsl %B0"     CR_TAB
-		  "mov %B0,%A0" CR_TAB
-		  "rol %A0");
-
+	  return reg1_unused_after
+	    ? avr_asm_len ("bst %B1,6"   CR_TAB
+			   "lsl %B1"     CR_TAB
+			   "sbc %B0,%B0" CR_TAB
+			   "sbc %A0,%A0" CR_TAB
+			   "bld %A0,0", operands, plen, 5)
+	    : avr_asm_len ("mov %B0,%B1"   CR_TAB
+			   "bst %B0,6"   CR_TAB
+			   "lsl %B0"     CR_TAB
+			   "sbc %B0,%B0" CR_TAB
+			   "sbc %A0,%A0" CR_TAB
+			   "bld %A0,0", operands, plen, 6);
 	default:
 	  if (INTVAL (operands[2]) < 16)
 	    break;
@@ -8322,14 +7769,22 @@ ashrhi3_out (rtx_insn *insn, rtx operands[], int *len)
 	  /* fall through */
 
 	case 15:
-	  return *len = 3, ("lsl %B0"     CR_TAB
-			    "sbc %A0,%A0" CR_TAB
-			    "mov %B0,%A0");
-	}
-      len = t;
+	  {
+	    rtx xop[2] = { operands[0], operands[1] };
+	    if (! reg_unused_after (insn, xop[1]))
+	      {
+		avr_asm_len ("mov %B0,%B1", xop, plen, 1);
+		xop[1] = xop[0];
+	      }
+	    return avr_asm_len ("lsl %B1"     CR_TAB
+				"sbc %A0,%A0" CR_TAB
+				"mov %B0,%A0", xop, plen, 3);
+	  }
+	} // switch
     }
+
   out_shift_with_cnt ("asr %B0" CR_TAB
-		      "ror %A0", insn, operands, len, 2);
+		      "ror %A0", insn, operands, plen, 2);
   return "";
 }
 
@@ -8347,30 +7802,61 @@ avr_out_ashrpsi3 (rtx_insn *insn, rtx *op, int *plen)
       if (plen)
 	*plen = 0;
 
+      bool reg1_unused_after = reg_unused_after (insn, op[1]);
+
       switch (INTVAL (op[2]))
 	{
 	case 8:
-	  if (dest <= src)
-	    return avr_asm_len ("mov %A0,%B1" CR_TAB
-				"mov %B0,%C1" CR_TAB
-				"clr %C0"     CR_TAB
-				"sbrc %B0,7"  CR_TAB
-				"dec %C0", op, plen, 5);
-	  else
-	    return avr_asm_len ("clr %C0"     CR_TAB
-				"sbrc %C1,7"  CR_TAB
-				"dec %C0"     CR_TAB
-				"mov %B0,%C1" CR_TAB
-				"mov %A0,%B1", op, plen, 5);
+	  return dest <= src
+	    ? avr_asm_len ("mov %A0,%B1" CR_TAB
+			   "mov %B0,%C1" CR_TAB
+			   "clr %C0"     CR_TAB
+			   "sbrc %B0,7"  CR_TAB
+			   "dec %C0", op, plen, 5)
+	    : avr_asm_len ("clr %C0"     CR_TAB
+			   "sbrc %C1,7"  CR_TAB
+			   "dec %C0"     CR_TAB
+			   "mov %B0,%C1" CR_TAB
+			   "mov %A0,%B1", op, plen, 5);
+	case 15:
+	  avr_asm_len (reg1_unused_after
+		       ? "lsl %B1"
+		       : "bst %B1,7", op, plen, 1);
+	  if (dest != src + 2)
+	    avr_asm_len ("mov %A0,%C1", op, plen, 1);
+	  avr_asm_len ("rol %A0"      CR_TAB
+		       "sbc %B0,%B0"  CR_TAB
+		       "sbc %C0,%C0", op, plen, 3);
+	  return reg1_unused_after
+	    ? ""
+	    : avr_asm_len ("bld %A0,0", op, plen, 1);
 
 	case 16:
 	  if (dest != src + 2)
 	    avr_asm_len ("mov %A0,%C1", op, plen, 1);
-
-	  return avr_asm_len ("clr %B0"     CR_TAB
-			      "sbrc %A0,7"  CR_TAB
-			      "com %B0"     CR_TAB
-			      "mov %C0,%B0", op, plen, 4);
+	  return reg1_unused_after && dest != src + 2
+	    ? avr_asm_len ("rol %C1"      CR_TAB
+			   "sbc %B0,%B0"  CR_TAB
+			   "sbc %C0,%C0", op, plen, 3)
+	    : avr_asm_len ("clr %B0"     CR_TAB
+			   "sbrc %A0,7"  CR_TAB
+			   "com %B0"     CR_TAB
+			   "mov %C0,%B0", op, plen, 4);
+	case 22:
+	  {
+	    rtx xop[2] = { op[0], op[1] };
+	    if (! reg1_unused_after)
+	      {
+		avr_asm_len ("mov %C0,%C1", xop, plen, 1);
+		xop[1] = xop[0];
+	      }
+	    return avr_asm_len ("bst %C1,6"   CR_TAB
+				"lsl %C1"     CR_TAB
+				"sbc %C0,%C0" CR_TAB
+				"sbc %B0,%B0" CR_TAB
+				"sbc %A0,%A0" CR_TAB
+				"bld %A0,0", xop, plen, 6);
+	  }
 
 	default:
 	  if (INTVAL (op[2]) < 24)
@@ -8379,10 +7865,18 @@ avr_out_ashrpsi3 (rtx_insn *insn, rtx *op, int *plen)
 	  /* fall through */
 
 	case 23:
-	  return avr_asm_len ("lsl %C0"     CR_TAB
-			      "sbc %A0,%A0" CR_TAB
-			      "mov %B0,%A0" CR_TAB
-			      "mov %C0,%A0", op, plen, 4);
+	  {
+	    rtx xop[2] = { op[0], op[1] };
+	    if (! reg1_unused_after)
+	      {
+		avr_asm_len ("mov %C0,%C1", xop, plen, 1);
+		xop[1] = xop[0];
+	      }
+	    return avr_asm_len ("lsl %C1"     CR_TAB
+				"sbc %A0,%A0" CR_TAB
+				"mov %B0,%A0" CR_TAB
+				"mov %C0,%A0", xop, plen, 4);
+	  }
 	} /* switch */
     }
 
@@ -8396,71 +7890,104 @@ avr_out_ashrpsi3 (rtx_insn *insn, rtx *op, int *plen)
 /* 32-bit arithmetic shift right  ((signed long)x >> i) */
 
 const char *
-ashrsi3_out (rtx_insn *insn, rtx operands[], int *len)
+ashrsi3_out (rtx_insn *insn, rtx operands[], int *plen)
 {
   if (CONST_INT_P (operands[2]))
     {
-      int k;
-      int *t = len;
+      if (plen)
+	*plen = 0;
 
-      if (!len)
-	len = &k;
+      int reg0 = true_regnum (operands[0]);
+      int reg1 = true_regnum (operands[1]);
+      bool reg1_unused_after = reg_unused_after (insn, operands[1]);
 
       switch (INTVAL (operands[2]))
 	{
 	case 8:
-	  {
-	    int reg0 = true_regnum (operands[0]);
-	    int reg1 = true_regnum (operands[1]);
-	    *len=6;
-	    if (reg0 <= reg1)
-	      return ("mov %A0,%B1" CR_TAB
-		      "mov %B0,%C1" CR_TAB
-		      "mov %C0,%D1" CR_TAB
-		      "clr %D0"     CR_TAB
-		      "sbrc %C0,7"  CR_TAB
-		      "dec %D0");
-	    else
-	      return ("clr %D0"     CR_TAB
-		      "sbrc %D1,7"  CR_TAB
-		      "dec %D0"     CR_TAB
-		      "mov %C0,%D1" CR_TAB
-		      "mov %B0,%C1" CR_TAB
-		      "mov %A0,%B1");
-	  }
+	  return reg0 <= reg1
+	    ? avr_asm_len ("mov %A0,%B1" CR_TAB
+			   "mov %B0,%C1" CR_TAB
+			   "mov %C0,%D1" CR_TAB
+			   "clr %D0"     CR_TAB
+			   "sbrc %C0,7"  CR_TAB
+			   "dec %D0", operands, plen, 6)
+	    : avr_asm_len ("clr %D0"     CR_TAB
+			   "sbrc %D1,7"  CR_TAB
+			   "dec %D0"     CR_TAB
+			   "mov %C0,%D1" CR_TAB
+			   "mov %B0,%C1" CR_TAB
+			   "mov %A0,%B1", operands, plen, 6);
+	case 15:
+	  avr_asm_len (reg1_unused_after
+		       ? "lsl %B1"
+		       : "bst %B1,7", operands, plen, 1);
+	  if (reg0 != reg1 + 2)
+	    {
+	      if (AVR_HAVE_MOVW)
+		avr_asm_len ("movw %A0,%C1", operands, plen, 1);
+	      else
+		avr_asm_len ("mov %A0,%C1"  CR_TAB
+			     "mov %B0,%D1", operands, plen, 2);
+	    }
+	  avr_asm_len ("rol %A0"     CR_TAB
+		       "rol %B0"     CR_TAB
+		       "sbc %C0,%C0" CR_TAB
+		       "sbc %D0,%D0", operands, plen, 4);
+	  return reg1_unused_after
+	    ? ""
+	    : avr_asm_len ("bld %A0,0", operands, plen, 1);
 
 	case 16:
-	  {
-	    int reg0 = true_regnum (operands[0]);
-	    int reg1 = true_regnum (operands[1]);
-
-	    if (reg0 == reg1 + 2)
-	      return *len = 4, ("clr %D0"     CR_TAB
+	  if (reg0 == reg1 + 2)
+	    return avr_asm_len ("clr %D0"     CR_TAB
 				"sbrc %B0,7"  CR_TAB
 				"com %D0"     CR_TAB
-				"mov %C0,%D0");
-	    if (AVR_HAVE_MOVW)
-	      return *len = 5, ("movw %A0,%C1" CR_TAB
-				"clr %D0"      CR_TAB
-				"sbrc %B0,7"   CR_TAB
-				"com %D0"      CR_TAB
-				"mov %C0,%D0");
-	    else
-	      return *len = 6, ("mov %B0,%D1" CR_TAB
-				"mov %A0,%C1" CR_TAB
-				"clr %D0"     CR_TAB
-				"sbrc %B0,7"  CR_TAB
-				"com %D0"     CR_TAB
-				"mov %C0,%D0");
-	  }
-
+				"mov %C0,%D0", operands, plen, 4);
+	  if (AVR_HAVE_MOVW)
+	    avr_asm_len ("movw %A0,%C1", operands, plen, 1);
+	  else
+	    avr_asm_len ("mov %B0,%D1" CR_TAB
+			 "mov %A0,%C1", operands, plen, 2);
+	  return reg1_unused_after
+	    ? avr_asm_len ("lsl %D1"      CR_TAB
+			   "sbc %D0,%D0"  CR_TAB
+			   "mov %C0,%D0", operands, plen, 3)
+	    : avr_asm_len ("clr %D0"     CR_TAB
+			   "sbrc %B0,7"  CR_TAB
+			   "com %D0"     CR_TAB
+			   "mov %C0,%D0", operands, plen, 4);
 	case 24:
-	  return *len = 6, ("mov %A0,%D1" CR_TAB
-			    "clr %D0"     CR_TAB
-			    "sbrc %A0,7"  CR_TAB
-			    "com %D0"     CR_TAB
-			    "mov %B0,%D0" CR_TAB
-			    "mov %C0,%D0");
+	  return reg1_unused_after
+	    ? avr_asm_len ("mov %A0,%D1" CR_TAB
+			   "lsl %D1"     CR_TAB
+			   "sbc %D0,%D0" CR_TAB
+			   "mov %B0,%D0" CR_TAB
+			   "mov %C0,%D0", operands, plen, 5)
+	    : avr_asm_len ("mov %A0,%D1" CR_TAB
+			   "clr %D0"     CR_TAB
+			   "sbrc %A0,7"  CR_TAB
+			   "com %D0"     CR_TAB
+			   "mov %B0,%D0" CR_TAB
+			   "mov %C0,%D0", operands, plen, 6);
+	case 30:
+	  {
+	    rtx xop[2] = { operands[0], operands[1] };
+	    if (! reg1_unused_after)
+	      {
+		avr_asm_len ("mov %D0,%D1", xop, plen, 1);
+		xop[1] = xop[0];
+	      }
+	    avr_asm_len ("bst %D1,6" CR_TAB
+			 "lsl %D1" CR_TAB
+			 "sbc %A0,%A0" CR_TAB
+			 "sbc %B0,%B0", xop, plen, 4);
+	    return AVR_HAVE_MOVW
+	      ? avr_asm_len ("movw %C0,%A0" CR_TAB
+			     "bld %A0,0", xop, plen, 2)
+	      : avr_asm_len ("mov %C0,%A0" CR_TAB
+			     "mov %D0,%A0" CR_TAB
+			     "bld %A0,0", xop, plen, 3);
+	  }
 
 	default:
 	  if (INTVAL (operands[2]) < 32)
@@ -8469,377 +7996,316 @@ ashrsi3_out (rtx_insn *insn, rtx operands[], int *len)
 	  /* fall through */
 
 	case 31:
-	  if (AVR_HAVE_MOVW)
-	    return *len = 4, ("lsl %D0"     CR_TAB
-			      "sbc %A0,%A0" CR_TAB
-			      "mov %B0,%A0" CR_TAB
-			      "movw %C0,%A0");
-	  else
-	    return *len = 5, ("lsl %D0"     CR_TAB
-			      "sbc %A0,%A0" CR_TAB
-			      "mov %B0,%A0" CR_TAB
-			      "mov %C0,%A0" CR_TAB
-			      "mov %D0,%A0");
-	}
-      len = t;
+	  {
+	    rtx xop[2] = { operands[0], operands[1] };
+	    if (! reg1_unused_after)
+	      {
+		avr_asm_len ("mov %D0,%D1", xop, plen, 1);
+		xop[1] = xop[0];
+	      }
+	    return AVR_HAVE_MOVW
+	      ? avr_asm_len ("lsl %D1"     CR_TAB
+			     "sbc %A0,%A0" CR_TAB
+			     "mov %B0,%A0" CR_TAB
+			     "movw %C0,%A0", xop, plen, 4)
+	      : avr_asm_len ("lsl %D1"     CR_TAB
+			     "sbc %A0,%A0" CR_TAB
+			     "mov %B0,%A0" CR_TAB
+			     "mov %C0,%A0" CR_TAB
+			     "mov %D0,%A0", xop, plen, 5);
+	  }
+	} // switch
     }
+
   out_shift_with_cnt ("asr %D0" CR_TAB
 		      "ror %C0" CR_TAB
 		      "ror %B0" CR_TAB
-		      "ror %A0", insn, operands, len, 4);
+		      "ror %A0", insn, operands, plen, 4);
   return "";
 }
 
 /* 8-bit logic shift right ((unsigned char)x >> i) */
 
 const char *
-lshrqi3_out (rtx_insn *insn, rtx operands[], int *len)
+lshrqi3_out (rtx_insn *insn, rtx operands[], int *plen)
 {
   if (CONST_INT_P (operands[2]))
     {
-      int k;
+      int reg0 = REGNO (operands[0]);
+      int reg1 = REGNO (operands[1]);
+      bool ldreg_p = test_hard_reg_class (LD_REGS, operands[0]);
+      int offs = INTVAL (operands[2]);
+
+      if (plen)
+	*plen = 0;
 
-      if (!len)
-	len = &k;
+      if (offs <= 3
+	  || (offs <= 5 && ! ldreg_p))
+	{
+	  for (int i = 0; i < offs; ++i)
+	    avr_asm_len ("lsr %0", operands, plen, 1);
+	  return "";
+	}
 
       switch (INTVAL (operands[2]))
 	{
 	default:
 	  if (INTVAL (operands[2]) < 8)
 	    break;
-
-	  *len = 1;
-	  return "clr %0";
+	  return avr_asm_len ("clr %0", operands, plen, 1);
 
 	case 1:
-	  *len = 1;
-	  return "lsr %0";
-
 	case 2:
-	  *len = 2;
-	  return ("lsr %0" CR_TAB
-		  "lsr %0");
 	case 3:
-	  *len = 3;
-	  return ("lsr %0" CR_TAB
-		  "lsr %0" CR_TAB
-		  "lsr %0");
+	  gcc_unreachable ();
 
 	case 4:
-	  if (test_hard_reg_class (LD_REGS, operands[0]))
-	    {
-	      *len=2;
-	      return ("swap %0" CR_TAB
-		      "andi %0,0x0f");
-	    }
-	  *len = 4;
-	  return ("lsr %0" CR_TAB
-		  "lsr %0" CR_TAB
-		  "lsr %0" CR_TAB
-		  "lsr %0");
-
+	  return avr_asm_len ("swap %0" CR_TAB
+			      "andi %0,0x0f", operands, plen, 2);
 	case 5:
-	  if (test_hard_reg_class (LD_REGS, operands[0]))
-	    {
-	      *len = 3;
-	      return ("swap %0" CR_TAB
-		      "lsr %0"  CR_TAB
-		      "andi %0,0x7");
-	    }
-	  *len = 5;
-	  return ("lsr %0" CR_TAB
-		  "lsr %0" CR_TAB
-		  "lsr %0" CR_TAB
-		  "lsr %0" CR_TAB
-		  "lsr %0");
-
+	  return avr_asm_len ("swap %0" CR_TAB
+			      "lsr %0"  CR_TAB
+			      "andi %0,0x7", operands, plen, 3);
 	case 6:
-	  if (test_hard_reg_class (LD_REGS, operands[0]))
-	    {
-	      *len = 4;
-	      return ("swap %0" CR_TAB
-		      "lsr %0"  CR_TAB
-		      "lsr %0"  CR_TAB
-		      "andi %0,0x3");
-	    }
-	  *len = 6;
-	  return ("lsr %0" CR_TAB
-		  "lsr %0" CR_TAB
-		  "lsr %0" CR_TAB
-		  "lsr %0" CR_TAB
-		  "lsr %0" CR_TAB
-		  "lsr %0");
-
+	  if (ldreg_p && reg0 == reg1)
+	    return avr_asm_len ("swap %0" CR_TAB
+				"lsr %0"  CR_TAB
+				"lsr %0"  CR_TAB
+				"andi %0,0x3", operands, plen, 4);
+	  if (ldreg_p && reg0 != reg1 && AVR_HAVE_MUL)
+	    return avr_asm_len ("ldi %0,1<<2" CR_TAB
+				"mul %0,%1"   CR_TAB
+				"mov %0,r1"   CR_TAB
+				"clr __zero_reg__", operands, plen, 4);
+	  return reg0 != reg1
+	    ? avr_asm_len ("clr %0"    CR_TAB
+			   "bst %1,6"  CR_TAB
+			   "bld %0,0"  CR_TAB
+			   "bst %1,7"  CR_TAB
+			   "bld %0,1", operands, plen, 5)
+	    : avr_asm_len ("lsr %0"  CR_TAB
+			   "lsr %0"  CR_TAB
+			   "lsr %0"  CR_TAB
+			   "lsr %0"  CR_TAB
+			   "lsr %0"  CR_TAB
+			   "lsr %0", operands, plen, 6);
 	case 7:
-	  *len = 3;
-	  return ("bst %1,7" CR_TAB
-		  "clr %0"   CR_TAB
-		  "bld %0,0");
+	  return avr_asm_len ("bst %1,7" CR_TAB
+			      "clr %0"   CR_TAB
+			      "bld %0,0", operands, plen, 3);
 	}
     }
   else if (CONSTANT_P (operands[2]))
     fatal_insn ("internal compiler error.  Incorrect shift:", insn);
 
   out_shift_with_cnt ("lsr %0",
-		      insn, operands, len, 1);
+		      insn, operands, plen, 1);
   return "";
 }
 
+
 /* 16-bit logic shift right ((unsigned short)x >> i) */
 
 const char *
-lshrhi3_out (rtx_insn *insn, rtx operands[], int *len)
+lshrhi3_out (rtx_insn *insn, rtx operands[], int *plen)
 {
   if (CONST_INT_P (operands[2]))
     {
-      int scratch = (GET_CODE (PATTERN (insn)) == PARALLEL
-		     && XVECLEN (PATTERN (insn), 0) == 3
-		     && REG_P (operands[3]));
-      int ldi_ok = test_hard_reg_class (LD_REGS, operands[0]);
-      int k;
-      int *t = len;
+      bool scratch = (GET_CODE (PATTERN (insn)) == PARALLEL
+		      && XVECLEN (PATTERN (insn), 0) == 3
+		      && REG_P (operands[3]));
+      bool ldi_ok = test_hard_reg_class (LD_REGS, operands[0]);
+      bool reg1_unused_after = reg_unused_after (insn, operands[1]);
 
-      if (!len)
-	len = &k;
+      if (plen)
+	*plen = 0;
 
       switch (INTVAL (operands[2]))
 	{
 	default:
 	  if (INTVAL (operands[2]) < 16)
 	    break;
-
-	  *len = 2;
-	  return ("clr %B0" CR_TAB
-		  "clr %A0");
+	  return avr_asm_len ("clr %B0" CR_TAB
+			      "clr %A0", operands, plen, 2);
 
 	case 4:
 	  if (optimize_size && scratch)
 	    break;  /* 5 */
 	  if (ldi_ok)
-	    {
-	      *len = 6;
-	      return ("swap %B0"      CR_TAB
-		      "swap %A0"      CR_TAB
-		      "andi %A0,0x0f" CR_TAB
-		      "eor %A0,%B0"   CR_TAB
-		      "andi %B0,0x0f" CR_TAB
-		      "eor %A0,%B0");
-	    }
+	    return avr_asm_len ("swap %B0"      CR_TAB
+				"swap %A0"      CR_TAB
+				"andi %A0,0x0f" CR_TAB
+				"eor %A0,%B0"   CR_TAB
+				"andi %B0,0x0f" CR_TAB
+				"eor %A0,%B0", operands, plen, 6);
 	  if (scratch)
-	    {
-	      *len = 7;
-	      return ("swap %B0"    CR_TAB
-		      "swap %A0"    CR_TAB
-		      "ldi %3,0x0f" CR_TAB
-		      "and %A0,%3"  CR_TAB
-		      "eor %A0,%B0" CR_TAB
-		      "and %B0,%3"  CR_TAB
-		      "eor %A0,%B0");
-	    }
+	    return avr_asm_len ("swap %B0"    CR_TAB
+				"swap %A0"    CR_TAB
+				"ldi %3,0x0f" CR_TAB
+				"and %A0,%3"  CR_TAB
+				"eor %A0,%B0" CR_TAB
+				"and %B0,%3"  CR_TAB
+				"eor %A0,%B0", operands, plen, 7);
 	  break;  /* optimize_size ? 6 : 8 */
 
 	case 5:
 	  if (optimize_size)
 	    break;  /* scratch ? 5 : 6 */
 	  if (ldi_ok)
-	    {
-	      *len = 8;
-	      return ("lsr %B0"       CR_TAB
-		      "ror %A0"       CR_TAB
-		      "swap %B0"      CR_TAB
-		      "swap %A0"      CR_TAB
-		      "andi %A0,0x0f" CR_TAB
-		      "eor %A0,%B0"   CR_TAB
-		      "andi %B0,0x0f" CR_TAB
-		      "eor %A0,%B0");
-	    }
+	    return avr_asm_len ("lsr %B0"       CR_TAB
+				"ror %A0"       CR_TAB
+				"swap %B0"      CR_TAB
+				"swap %A0"      CR_TAB
+				"andi %A0,0x0f" CR_TAB
+				"eor %A0,%B0"   CR_TAB
+				"andi %B0,0x0f" CR_TAB
+				"eor %A0,%B0", operands, plen, 8);
 	  if (scratch)
-	    {
-	      *len = 9;
-	      return ("lsr %B0"     CR_TAB
-		      "ror %A0"     CR_TAB
-		      "swap %B0"    CR_TAB
-		      "swap %A0"    CR_TAB
-		      "ldi %3,0x0f" CR_TAB
-		      "and %A0,%3"  CR_TAB
-		      "eor %A0,%B0" CR_TAB
-		      "and %B0,%3"  CR_TAB
-		      "eor %A0,%B0");
-	    }
+	    return avr_asm_len ("lsr %B0"     CR_TAB
+				"ror %A0"     CR_TAB
+				"swap %B0"    CR_TAB
+				"swap %A0"    CR_TAB
+				"ldi %3,0x0f" CR_TAB
+				"and %A0,%3"  CR_TAB
+				"eor %A0,%B0" CR_TAB
+				"and %B0,%3"  CR_TAB
+				"eor %A0,%B0", operands, plen, 9);
 	  break;  /* 10 */
 
 	case 6:
 	  if (optimize_size)
 	    break;  /* scratch ? 5 : 6 */
-	  *len = 9;
-	  return ("clr __tmp_reg__" CR_TAB
-		  "lsl %A0"         CR_TAB
-		  "rol %B0"         CR_TAB
-		  "rol __tmp_reg__" CR_TAB
-		  "lsl %A0"         CR_TAB
-		  "rol %B0"         CR_TAB
-		  "rol __tmp_reg__" CR_TAB
-		  "mov %A0,%B0"     CR_TAB
-		  "mov %B0,__tmp_reg__");
-
+	  return avr_asm_len ("clr __tmp_reg__" CR_TAB
+			      "lsl %A0"         CR_TAB
+			      "rol %B0"         CR_TAB
+			      "rol __tmp_reg__" CR_TAB
+			      "lsl %A0"         CR_TAB
+			      "rol %B0"         CR_TAB
+			      "rol __tmp_reg__" CR_TAB
+			      "mov %A0,%B0"     CR_TAB
+			      "mov %B0,__tmp_reg__", operands, plen, 9);
 	case 7:
-	  *len = 5;
-	  return ("lsl %A0"     CR_TAB
-		  "mov %A0,%B0" CR_TAB
-		  "rol %A0"     CR_TAB
-		  "sbc %B0,%B0" CR_TAB
-		  "neg %B0");
-
+	  return reg1_unused_after
+	    ? avr_asm_len ("lsl %A1"     CR_TAB
+			   "mov %A0,%B1" CR_TAB
+			   "rol %A0"     CR_TAB
+			   "sbc %B0,%B0" CR_TAB
+			   "neg %B0", operands, plen, 5)
+	    : avr_asm_len ("bst %A1,7"   CR_TAB
+			   "mov %A0,%B1" CR_TAB
+			   "rol %A0"     CR_TAB
+			   "sbc %B0,%B0" CR_TAB
+			   "neg %B0"     CR_TAB
+			   "bld %A0,0", operands, plen, 6);
 	case 8:
-	  return *len = 2, ("mov %A0,%B1" CR_TAB
-			    "clr %B0");
-
+	  return avr_asm_len ("mov %A0,%B1" CR_TAB
+			      "clr %B0", operands, plen, 2);
 	case 9:
-	  *len = 3;
-	  return ("mov %A0,%B0" CR_TAB
-		  "clr %B0"     CR_TAB
-		  "lsr %A0");
-
+	  return avr_asm_len ("mov %A0,%B1" CR_TAB
+			      "clr %B0"     CR_TAB
+			      "lsr %A0", operands, plen, 3);
 	case 10:
-	  *len = 4;
-	  return ("mov %A0,%B0" CR_TAB
-		  "clr %B0"     CR_TAB
-		  "lsr %A0"     CR_TAB
-		  "lsr %A0");
-
+	  return avr_asm_len ("mov %A0,%B1" CR_TAB
+			      "clr %B0"     CR_TAB
+			      "lsr %A0"     CR_TAB
+			      "lsr %A0", operands, plen, 4);
 	case 11:
-	  *len = 5;
-	  return ("mov %A0,%B0" CR_TAB
-		  "clr %B0"     CR_TAB
-		  "lsr %A0"     CR_TAB
-		  "lsr %A0"     CR_TAB
-		  "lsr %A0");
-
+	  return avr_asm_len ("mov %A0,%B1" CR_TAB
+			      "clr %B0"     CR_TAB
+			      "lsr %A0"     CR_TAB
+			      "lsr %A0"     CR_TAB
+			      "lsr %A0", operands, plen, 5);
 	case 12:
 	  if (ldi_ok)
-	    {
-	      *len = 4;
-	      return ("mov %A0,%B0" CR_TAB
-		      "clr %B0"     CR_TAB
-		      "swap %A0"    CR_TAB
-		      "andi %A0,0x0f");
-	    }
-	  if (scratch)
-	    {
-	      *len = 5;
-	      return ("mov %A0,%B0" CR_TAB
-		      "clr %B0"     CR_TAB
-		      "swap %A0"    CR_TAB
-		      "ldi %3,0x0f" CR_TAB
-		      "and %A0,%3");
-	    }
-	  *len = 6;
-	  return ("mov %A0,%B0" CR_TAB
-		  "clr %B0"     CR_TAB
-		  "lsr %A0"     CR_TAB
-		  "lsr %A0"     CR_TAB
-		  "lsr %A0"     CR_TAB
-		  "lsr %A0");
-
+	    return avr_asm_len ("mov %A0,%B1" CR_TAB
+				"clr %B0"     CR_TAB
+				"swap %A0"    CR_TAB
+				"andi %A0,0x0f", operands, plen, 4);
+	  return scratch
+	    ? avr_asm_len ("mov %A0,%B1" CR_TAB
+			   "clr %B0"     CR_TAB
+			   "swap %A0"    CR_TAB
+			   "ldi %3,0x0f" CR_TAB
+			   "and %A0,%3", operands, plen, 5)
+	    : avr_asm_len ("mov %A0,%B1" CR_TAB
+			   "clr %B0"     CR_TAB
+			   "lsr %A0"     CR_TAB
+			   "lsr %A0"     CR_TAB
+			   "lsr %A0"     CR_TAB
+			   "lsr %A0", operands, plen, 6);
 	case 13:
 	  if (ldi_ok)
-	    {
-	      *len = 5;
-	      return ("mov %A0,%B0" CR_TAB
-		      "clr %B0"     CR_TAB
-		      "swap %A0"    CR_TAB
-		      "lsr %A0"     CR_TAB
-		      "andi %A0,0x07");
-	    }
+	    return avr_asm_len ("mov %A0,%B0" CR_TAB
+				"clr %B0"     CR_TAB
+				"swap %A0"    CR_TAB
+				"lsr %A0"     CR_TAB
+				"andi %A0,0x07", operands, plen, 5);
 	  if (AVR_HAVE_MUL && scratch)
-	    {
-	      *len = 5;
-	      return ("ldi %3,0x08" CR_TAB
-		      "mul %B0,%3"  CR_TAB
-		      "mov %A0,r1"  CR_TAB
-		      "clr %B0"     CR_TAB
-		      "clr __zero_reg__");
-	    }
+	    return avr_asm_len ("ldi %3,0x08" CR_TAB
+				"mul %B0,%3"  CR_TAB
+				"mov %A0,r1"  CR_TAB
+				"clr %B0"     CR_TAB
+				"clr __zero_reg__", operands, plen, 5);
 	  if (optimize_size && scratch)
 	    break;  /* 5 */
 	  if (scratch)
-	    {
-	      *len = 6;
-	      return ("mov %A0,%B0" CR_TAB
-		      "clr %B0"     CR_TAB
-		      "swap %A0"    CR_TAB
-		      "lsr %A0"     CR_TAB
-		      "ldi %3,0x07" CR_TAB
-		      "and %A0,%3");
-	    }
-	  if (AVR_HAVE_MUL)
-	    {
-	      *len = 6;
-	      return ("set"        CR_TAB
-		      "bld r1,3"   CR_TAB
-		      "mul %B0,r1" CR_TAB
-		      "mov %A0,r1" CR_TAB
-		      "clr %B0"    CR_TAB
-		      "clr __zero_reg__");
-	    }
-	  *len = 7;
-	  return ("mov %A0,%B0" CR_TAB
-		  "clr %B0"     CR_TAB
-		  "lsr %A0"     CR_TAB
-		  "lsr %A0"     CR_TAB
-		  "lsr %A0"     CR_TAB
-		  "lsr %A0"     CR_TAB
-		  "lsr %A0");
-
+	    return avr_asm_len ("mov %A0,%B0" CR_TAB
+				"clr %B0"     CR_TAB
+				"swap %A0"    CR_TAB
+				"lsr %A0"     CR_TAB
+				"ldi %3,0x07" CR_TAB
+				"and %A0,%3", operands, plen, 6);
+	  return AVR_HAVE_MUL
+	    ? avr_asm_len ("set"        CR_TAB
+			   "bld r1,3"   CR_TAB
+			   "mul %B0,r1" CR_TAB
+			   "mov %A0,r1" CR_TAB
+			   "clr %B0"    CR_TAB
+			   "clr __zero_reg__", operands, plen, 6)
+	    : avr_asm_len ("mov %A0,%B0" CR_TAB
+			   "clr %B0"     CR_TAB
+			   "lsr %A0"     CR_TAB
+			   "lsr %A0"     CR_TAB
+			   "lsr %A0"     CR_TAB
+			   "lsr %A0"     CR_TAB
+			   "lsr %A0", operands, plen, 7);
 	case 14:
 	  if (AVR_HAVE_MUL && ldi_ok)
-	    {
-	      *len = 5;
-	      return ("ldi %A0,0x04" CR_TAB
-		      "mul %B0,%A0"  CR_TAB
-		      "mov %A0,r1"   CR_TAB
-		      "clr %B0"      CR_TAB
-		      "clr __zero_reg__");
-	    }
+	    return avr_asm_len ("ldi %A0,0x04" CR_TAB
+				"mul %B0,%A0"  CR_TAB
+				"mov %A0,r1"   CR_TAB
+				"clr %B0"      CR_TAB
+				"clr __zero_reg__", operands, plen, 5);
 	  if (AVR_HAVE_MUL && scratch)
-	    {
-	      *len = 5;
-	      return ("ldi %3,0x04" CR_TAB
-		      "mul %B0,%3"  CR_TAB
-		      "mov %A0,r1"  CR_TAB
-		      "clr %B0"     CR_TAB
-		      "clr __zero_reg__");
-	    }
+	    return avr_asm_len ("ldi %3,0x04" CR_TAB
+				"mul %B0,%3"  CR_TAB
+				"mov %A0,r1"  CR_TAB
+				"clr %B0"     CR_TAB
+				"clr __zero_reg__", operands, plen, 5);
 	  if (optimize_size && ldi_ok)
-	    {
-	      *len = 5;
-	      return ("mov %A0,%B0" CR_TAB
-		      "ldi %B0,6" "\n1:\t"
-		      "lsr %A0"     CR_TAB
-		      "dec %B0"     CR_TAB
-		      "brne 1b");
-	    }
+	    return avr_asm_len ("mov %A0,%B0" CR_TAB
+				"ldi %B0,6" "\n1:\t"
+				"lsr %A0"     CR_TAB
+				"dec %B0"     CR_TAB
+				"brne 1b", operands, plen, 5);
 	  if (optimize_size && scratch)
 	    break;  /* 5 */
-	  *len = 6;
-	  return ("clr %A0" CR_TAB
-		  "lsl %B0" CR_TAB
-		  "rol %A0" CR_TAB
-		  "lsl %B0" CR_TAB
-		  "rol %A0" CR_TAB
-		  "clr %B0");
-
+	  return avr_asm_len ("clr %A0" CR_TAB
+			      "lsl %B0" CR_TAB
+			      "rol %A0" CR_TAB
+			      "lsl %B0" CR_TAB
+			      "rol %A0" CR_TAB
+			      "clr %B0", operands, plen, 6);
 	case 15:
-	  *len = 4;
-	  return ("bst %B1,7" CR_TAB
-		  "clr %A0"   CR_TAB
-		  "clr %B0"   CR_TAB
-		  "bld %A0,0");
+	  return avr_asm_len ("bst %B1,7" CR_TAB
+			      "clr %A0"   CR_TAB
+			      "clr %B0"   CR_TAB
+			      "bld %A0,0", operands, plen, 4);
 	}
-      len = t;
     }
+
   out_shift_with_cnt ("lsr %B0" CR_TAB
-		      "ror %A0", insn, operands, len, 2);
+		      "ror %A0", insn, operands, plen, 2);
   return "";
 }
 
@@ -8851,6 +8317,7 @@ avr_out_lshrpsi3 (rtx_insn *insn, rtx *op, int *plen)
 {
   int dest = REGNO (op[0]);
   int src = REGNO (op[1]);
+  bool src_unused_after_p = reg_unused_after (insn, op[1]);
 
   if (CONST_INT_P (op[2]))
     {
@@ -8868,6 +8335,19 @@ avr_out_lshrpsi3 (rtx_insn *insn, rtx *op, int *plen)
 	    return avr_asm_len ("clr %C0"     CR_TAB
 				"mov %B0,%C1" CR_TAB
 				"mov %A0,%B1", op, plen, 3);
+	case 15:
+	  avr_asm_len (src_unused_after_p
+		       ? "lsl %B1"
+		       : "bst %B1,7", op, plen, 1);
+	  if (dest != src + 2)
+	    avr_asm_len ("mov %A0,%C1", op, plen, 1);
+	  avr_asm_len ("clr %C0"  CR_TAB
+		       "clr %B0"  CR_TAB
+		       "rol %A0"  CR_TAB
+		       "rol %B0", op, plen, 4);
+	  return src_unused_after_p
+	    ? ""
+	    : avr_asm_len ("bld %A0,0", op, plen, 1);
 
 	case 16:
 	  if (dest != src + 2)
@@ -8901,99 +8381,181 @@ avr_out_lshrpsi3 (rtx_insn *insn, rtx *op, int *plen)
 /* 32-bit logic shift right ((unsigned int)x >> i) */
 
 const char *
-lshrsi3_out (rtx_insn *insn, rtx operands[], int *len)
+lshrsi3_out (rtx_insn *insn, rtx operands[], int *plen)
 {
   if (CONST_INT_P (operands[2]))
     {
-      int k;
-      int *t = len;
-
-      if (!len)
-	len = &k;
+      int off = INTVAL (operands[2]);
+      int reg0 = true_regnum (operands[0]);
+      int reg1 = true_regnum (operands[1]);
+      bool reg1_unused_after = reg_unused_after (insn, operands[1]);
+      bool scratch_p = (GET_CODE (PATTERN (insn)) == PARALLEL
+			&& XVECLEN (PATTERN (insn), 0) == 3
+			&& REG_P (operands[3]));
+      if (plen)
+	*plen = 0;
 
-      switch (INTVAL (operands[2]))
+      switch (off)
 	{
 	default:
-	  if (INTVAL (operands[2]) < 32)
+	  if (off < 32)
 	    break;
 
-	  if (AVR_HAVE_MOVW)
-	    return *len = 3, ("clr %D0" CR_TAB
-			      "clr %C0" CR_TAB
-			      "movw %A0,%C0");
-	  *len = 4;
-	  return ("clr %D0" CR_TAB
-		  "clr %C0" CR_TAB
-		  "clr %B0" CR_TAB
-		  "clr %A0");
-
+	  return AVR_HAVE_MOVW
+	    ? avr_asm_len ("clr %D0" CR_TAB
+			   "clr %C0" CR_TAB
+			   "movw %A0,%C0", operands, plen, 3)
+	    : avr_asm_len ("clr %D0" CR_TAB
+			   "clr %C0" CR_TAB
+			   "clr %B0" CR_TAB
+			   "clr %A0", operands, plen, 4);
 	case 8:
-	  {
-	    int reg0 = true_regnum (operands[0]);
-	    int reg1 = true_regnum (operands[1]);
-	    *len = 4;
-	    if (reg0 <= reg1)
-	      return ("mov %A0,%B1" CR_TAB
-		      "mov %B0,%C1" CR_TAB
-		      "mov %C0,%D1" CR_TAB
-		      "clr %D0");
-	    else
-	      return ("clr %D0"     CR_TAB
-		      "mov %C0,%D1" CR_TAB
-		      "mov %B0,%C1" CR_TAB
-		      "mov %A0,%B1");
-	  }
+	  return reg0 <= reg1
+	    ? avr_asm_len ("mov %A0,%B1" CR_TAB
+			   "mov %B0,%C1" CR_TAB
+			   "mov %C0,%D1" CR_TAB
+			   "clr %D0", operands, plen, 4)
+	    : avr_asm_len ("clr %D0"     CR_TAB
+			   "mov %C0,%D1" CR_TAB
+			   "mov %B0,%C1" CR_TAB
+			   "mov %A0,%B1", operands, plen, 4);
+	case 15:
+	  avr_asm_len (reg1_unused_after
+		       ? "lsl %B1"
+		       : "bst %B1,7", operands, plen, 1);
+	  if (reg0 != reg1 + 2)
+	    {
+	      if (AVR_HAVE_MOVW)
+		avr_asm_len ("movw %A0,%C1", operands, plen, 1);
+	      else
+		avr_asm_len ("mov %A0,%C1"  CR_TAB
+			     "mov %B0,%D1", operands, plen, 2);
+	    }
+	  avr_asm_len ("clr %D0"  CR_TAB
+		       "clr %C0"  CR_TAB
+		       "rol %A0"  CR_TAB
+		       "rol %B0"  CR_TAB
+		       "rol %C0", operands, plen, 5);
+	  return reg1_unused_after
+	    ? ""
+	    : avr_asm_len ("bld %A0,0", operands, plen, 1);
 
 	case 16:
+	  if (reg0 == reg1 + 2)
+	    return avr_asm_len ("clr %C0"  CR_TAB
+				"clr %D0", operands, plen, 2);
+	  return AVR_HAVE_MOVW
+	    ? avr_asm_len ("movw %A0,%C1" CR_TAB
+			   "clr %C0"      CR_TAB
+			   "clr %D0", operands, plen, 3)
+	    : avr_asm_len ("mov %B0,%D1" CR_TAB
+			   "mov %A0,%C1" CR_TAB
+			   "clr %C0"     CR_TAB
+			   "clr %D0", operands, plen, 4);
+	case 30:
+	  if (AVR_HAVE_MUL && scratch_p)
+	    return avr_asm_len ("ldi %3,1<<2"       CR_TAB
+				"mul %3,%D1"        CR_TAB
+				"mov %A0,r1"        CR_TAB
+				"clr __zero_reg__"  CR_TAB
+				"clr %B0"           CR_TAB
+				"clr %C0"           CR_TAB
+				"clr %D0", operands, plen, 7);
+	  // Fallthrough
+
+	case 29:
+	case 28:
 	  {
-	    int reg0 = true_regnum (operands[0]);
-	    int reg1 = true_regnum (operands[1]);
-
-	    if (reg0 == reg1 + 2)
-	      return *len = 2, ("clr %C0"     CR_TAB
-				"clr %D0");
-	    if (AVR_HAVE_MOVW)
-	      return *len = 3, ("movw %A0,%C1" CR_TAB
-				"clr %C0"      CR_TAB
-				"clr %D0");
-	    else
-	      return *len = 4, ("mov %B0,%D1" CR_TAB
-				"mov %A0,%C1" CR_TAB
-				"clr %C0"     CR_TAB
-				"clr %D0");
+	    const bool ld_reg0_p = avr_ld_regno_p (reg0 + 0); // %A0
+	    const bool ld_reg1_p = avr_ld_regno_p (reg1 + 3); // %D1
+	    if (ld_reg0_p
+		|| (ld_reg1_p && reg1_unused_after)
+		|| scratch_p)
+	      {
+		if (ld_reg0_p)
+		  avr_asm_len ("mov %A0,%D1"    CR_TAB
+			       "swap %A0"       CR_TAB
+			       "andi %A0,0x0f", operands, plen, 3);
+		else if (ld_reg1_p && reg1_unused_after)
+		  avr_asm_len ("swap %D1"       CR_TAB
+			       "andi %D1,0x0f"  CR_TAB
+			       "mov %A0,%D1", operands, plen, 3);
+		else
+		  avr_asm_len ("mov %A0,%D1"    CR_TAB
+			       "swap %A0"       CR_TAB
+			       "ldi %3,0x0f"    CR_TAB
+			       "and %A0,%3", operands, plen, 4);
+		for (int i = 28; i < off; ++i)
+		  avr_asm_len ("lsr %A0", operands, plen, 1);
+		return avr_asm_len ("clr %B0"  CR_TAB
+				    "clr %C0"  CR_TAB
+				    "clr %D0", operands, plen, 3);
+	      }
 	  }
+	  // Fallthrough
 
+	case 27:
+	case 26:
+	case 25:
 	case 24:
-	  return *len = 4, ("mov %A0,%D1" CR_TAB
-			    "clr %B0"     CR_TAB
-			    "clr %C0"     CR_TAB
-			    "clr %D0");
-
+	  avr_asm_len ("mov %A0,%D1", operands, plen, 1);
+	  for (int i = 24; i < off; ++i)
+	    avr_asm_len ("lsr %A0", operands, plen, 1);
+	  return avr_asm_len ("clr %B0"     CR_TAB
+			      "clr %C0"     CR_TAB
+			      "clr %D0", operands, plen, 3);
 	case 31:
-	  if (AVR_HAVE_MOVW)
-	    return *len = 5, ("bst %D1,7"    CR_TAB
-			      "clr %A0"      CR_TAB
-			      "clr %B0"      CR_TAB
-			      "movw %C0,%A0" CR_TAB
-			      "bld %A0,0");
-	  *len = 6;
-	  return ("bst %D1,7" CR_TAB
-		  "clr %A0"   CR_TAB
-		  "clr %B0"   CR_TAB
-		  "clr %C0"   CR_TAB
-		  "clr %D0"   CR_TAB
-		  "bld %A0,0");
-	}
-      len = t;
+	  return AVR_HAVE_MOVW
+	    ? avr_asm_len ("bst %D1,7"    CR_TAB
+			   "clr %A0"      CR_TAB
+			   "clr %B0"      CR_TAB
+			   "movw %C0,%A0" CR_TAB
+			   "bld %A0,0", operands, plen, 5)
+	    : avr_asm_len ("bst %D1,7" CR_TAB
+			   "clr %A0"   CR_TAB
+			   "clr %B0"   CR_TAB
+			   "clr %C0"   CR_TAB
+			   "clr %D0"   CR_TAB
+			   "bld %A0,0", operands, plen, 6);
+	} // switch
     }
+
   out_shift_with_cnt ("lsr %D0" CR_TAB
 		      "ror %C0" CR_TAB
 		      "ror %B0" CR_TAB
-		      "ror %A0", insn, operands, len, 4);
+		      "ror %A0", insn, operands, plen, 4);
   return "";
 }
 
 
+/* When INSN is a PARALLEL with two SETs, a SET of REG_CC and a SET of a
+   GPR, then return the second SET and set *CCMODE to the first SET's mode.
+   Otherwise, return single_set and set *CCMODE to VOIDmode.  */
+
+static rtx
+avr_cc_set (rtx_insn *insn, machine_mode *ccmode)
+{
+  // single_set() not only depends on the anatomy of an insn but also
+  // on REG_UNUSED notes, thus we have to analyze by hand so that the
+  // result only depends on the pattern.
+
+  rtx pat = PATTERN (insn);
+
+  if (GET_CODE (pat) == PARALLEL
+      && XVECLEN (pat, 0) == 2
+      && GET_CODE (XVECEXP (pat, 0, 0)) == SET
+      && GET_CODE (XVECEXP (pat, 0, 1)) == SET)
+    {
+      rtx ccset = XVECEXP (pat, 0, 0);
+      *ccmode = GET_MODE (SET_DEST (ccset));
+      return XVECEXP (pat, 0, 1);
+    }
+
+  *ccmode = VOIDmode;
+  return single_set (insn);
+}
+
+
 /* Output addition of registers YOP[0] and YOP[1]
 
       YOP[0] += extend (YOP[1])
@@ -9002,8 +8564,11 @@ lshrsi3_out (rtx_insn *insn, rtx operands[], int *len)
 
       YOP[0] -= extend (YOP[2])
 
-   where the integer modes satisfy  SI >= YOP[0].mode > YOP[1/2].mode >= QI,
-   and the extension may be sign- or zero-extend.  Returns "".
+   where the integer modes satisfy  SI >= YOP[0].mode >= YOP[1/2].mode >= QI,
+   and the extension may be sign-extend, zero-extend or reg (no extend).
+   INSN is either a single_set or a true parallel insn.  In the latter case,
+   INSN has two SETs: A SET of REG_CC and a SET like in the single_set case.
+   Returns "".
 
    If PLEN == NULL output the instructions.
    If PLEN != NULL set *PLEN to the length of the sequence in words.  */
@@ -9012,19 +8577,24 @@ const char *
 avr_out_plus_ext (rtx_insn *insn, rtx *yop, int *plen)
 {
   rtx regs[2];
+  machine_mode ccmode;
+
+  /* Ouch! Whether or not an insn is a single_set does not only depend
+     on the anatomy of the pattern, but also on REG_UNUSED notes.
+     Hence we have to dig by hand... */
 
-  const rtx src = SET_SRC (single_set (insn));
-  const RTX_CODE add = GET_CODE (src);
+  const rtx src = SET_SRC (avr_cc_set (insn, &ccmode));
+  const rtx_code add = GET_CODE (src);
   gcc_assert (GET_CODE (src) == PLUS || GET_CODE (src) == MINUS);
 
   // Use XOP[] in the remainder with XOP[0] = YOP[0] and XOP[1] = YOP[1/2].
   rtx xop[2] = { yop[0], yop[add == PLUS ? 1 : 2] };
   const rtx xreg = XEXP (src, add == PLUS ? 1 : 0);
   const rtx xext = XEXP (src, add == PLUS ? 0 : 1);
-  const RTX_CODE ext = GET_CODE (xext);
+  const rtx_code ext = GET_CODE (xext);
 
   gcc_assert (REG_P (xreg)
-	      && (ext == ZERO_EXTEND || ext == SIGN_EXTEND));
+	      && (ext == ZERO_EXTEND || ext == SIGN_EXTEND || ext == REG));
 
   const int n_bytes0 = GET_MODE_SIZE (GET_MODE (xop[0]));
   const int n_bytes1 = GET_MODE_SIZE (GET_MODE (xop[1]));
@@ -9044,7 +8614,9 @@ avr_out_plus_ext (rtx_insn *insn, rtx *yop, int *plen)
 
   if (ext == SIGN_EXTEND
       && (n_bytes0 > 1 + n_bytes1
-	  || reg_overlap_mentioned_p (msb1, xop[0])))
+	  || reg_overlap_mentioned_p (msb1, xop[0])
+	  // The insn also wants to set SREG.N and SREG.Z.
+	  || ccmode == CCZNmode))
     {
       // Sign-extending more than one byte: Set tmp_reg to 0 or -1
       // depending on $1.msb. Same for the pathological case where
@@ -9084,8 +8656,96 @@ avr_out_plus_ext (rtx_insn *insn, rtx *yop, int *plen)
 }
 
 
+/* Output code for addition of a sign-bit
+
+      YOP[0] += YOP[1] <CMP> 0
+
+   or such a subtraction:
+
+      YOP[0] -= YOP[2] <CMP> 0
+
+   where CMP is in { GE, LT }.
+   If PLEN == NULL output the instructions.
+   If PLEN != NULL set *PLEN to the length of the sequence in words.  */
+
+const char *
+avr_out_add_msb (rtx_insn *insn, rtx *yop, rtx_code cmp, int *plen)
+{
+  const rtx_code add = GET_CODE (SET_SRC (single_set (insn)));
+  const machine_mode mode = GET_MODE (yop[0]);
+  const int n_bytes = GET_MODE_SIZE (mode);
+  rtx sigop = yop[add == PLUS ? 1 : 2];
+  rtx msb = avr_byte (sigop, GET_MODE_SIZE (GET_MODE (sigop)) - 1);
+  rtx op[3] = { yop[0], msb, nullptr };
+
+  if (plen)
+    *plen = 0;
+
+  if (n_bytes == 1
+      || (n_bytes == 2 && avr_adiw_reg_p (op[0])))
+    {
+      avr_asm_len (cmp == LT
+		   ? "sbrc %1,7"
+		   : "sbrs %1,7", op, plen, 1);
+      const char *s_add = add == PLUS
+	? n_bytes == 1 ? "inc %0" : "adiw %0,1"
+	: n_bytes == 1 ? "dec %0" : "sbiw %0,1";
+      return avr_asm_len (s_add, op, plen, 1);
+    }
+
+  bool labl_p = false;
+  const char *s_code0 = nullptr;
+
+  // Default code provided SREG.C = MSBit.
+  const char *s_code = add == PLUS
+    ? "adc %2,__zero_reg__"
+    : "sbc %2,__zero_reg__";
+
+  if (cmp == LT)
+    {
+      if (reg_unused_after (insn, sigop)
+	  && ! reg_overlap_mentioned_p (msb, op[0]))
+	avr_asm_len ("lsl %1", op, plen, 1);
+      else
+	avr_asm_len ("mov __tmp_reg__,%1" CR_TAB
+		     "lsl __tmp_reg__", op, plen, 2);
+    }
+  else if (test_hard_reg_class (LD_REGS, msb))
+    {
+      avr_asm_len ("cpi %1,0x80", op, plen, 1);
+    }
+  else if (test_hard_reg_class (LD_REGS, op[0]))
+    {
+      labl_p = true;
+      avr_asm_len ("tst %1" CR_TAB
+		   "brmi 0f", op, plen, 2);
+      s_code0 = add == PLUS ? "subi %2,-1" : "subi %2,1";
+      s_code  = add == PLUS ? "sbci %2,-1" : "sbci %2,0";
+    }
+  else
+    {
+      labl_p = true;
+      avr_asm_len ("tst %1"  CR_TAB
+		   "brmi 0f" CR_TAB
+		   "sec", op, plen, 3);
+    }
+
+  for (int i = 0; i < n_bytes; ++i)
+    {
+      op[2] = avr_byte (op[0], i);
+      avr_asm_len (i == 0 && s_code0
+		   ? s_code0
+		   : s_code, op, plen, 1);
+    }
+
+  return labl_p
+    ? avr_asm_len ("0:", op, plen, 0)
+    : "";
+}
+
+
 /* Output addition of register XOP[0] and compile time constant XOP[2].
-   INSN is a single_set insn or an insn pattern.
+   XINSN is a single_set insn or an insn pattern.
    CODE == PLUS:  perform addition by using ADD instructions or
    CODE == MINUS: perform addition by using SUB instructions:
 
@@ -9111,9 +8771,13 @@ avr_out_plus_ext (rtx_insn *insn, rtx *yop, int *plen)
    fixed-point rounding, cf. `avr_out_round'.  */
 
 static void
-avr_out_plus_1 (rtx insn, rtx *xop, int *plen, enum rtx_code code,
-		enum rtx_code code_sat, int sign, bool out_label)
+avr_out_plus_1 (rtx xinsn, rtx *xop, int *plen, rtx_code code,
+		rtx_code code_sat, int sign, bool out_label)
 {
+  rtx_insn *insn = xinsn && INSN_P (xinsn)
+    ? as_a <rtx_insn *> (xinsn)
+    : nullptr;
+
   /* MODE of the operation.  */
   machine_mode mode = GET_MODE (xop[0]);
 
@@ -9123,6 +8787,11 @@ avr_out_plus_1 (rtx insn, rtx *xop, int *plen, enum rtx_code code,
   /* Number of bytes to operate on.  */
   int n_bytes = GET_MODE_SIZE (mode);
 
+  int regno0 = REGNO (xop[0]);
+  if (optimize && code_sat == UNKNOWN)
+    while (n_bytes && avr_result_regno_unused_p (insn, regno0 + n_bytes - 1))
+      n_bytes -= 1;
+
   /* Value (0..0xff) held in clobber register op[3] or -1 if unknown.  */
   int clobber_val = -1;
 
@@ -9147,11 +8816,23 @@ avr_out_plus_1 (rtx insn, rtx *xop, int *plen, enum rtx_code code,
 
   if (REG_P (xop[2]))
     {
+      if (optimize
+	  && REGNO (xop[0]) != REGNO (xop[2])
+	  && reg_overlap_mentioned_p (xop[0], xop[2]))
+	{
+	  /* PR118878: Paradoxical SUBREGs may result in overlapping
+	     registers.  The assumption is that the overlapping part
+	     is unused garbage.  */
+	  gcc_assert (n_bytes <= 4);
+	  int delta = (int) REGNO (xop[0]) - (int) REGNO (xop[2]);
+	  n_bytes = std::min (n_bytes, std::abs (delta));
+	}
+
       for (int i = 0; i < n_bytes; i++)
 	{
 	  /* We operate byte-wise on the destination.  */
-	  op[0] = simplify_gen_subreg (QImode, xop[0], mode, i);
-	  op[1] = simplify_gen_subreg (QImode, xop[2], mode, i);
+	  op[0] = avr_byte (xop[0], i);
+	  op[1] = avr_byte (xop[2], i);
 
 	  if (i == 0)
 	    avr_asm_len (code == PLUS ? "add %0,%1" : "sub %0,%1",
@@ -9161,13 +8842,9 @@ avr_out_plus_1 (rtx insn, rtx *xop, int *plen, enum rtx_code code,
 			 op, plen, 1);
 	}
 
-      if (reg_overlap_mentioned_p (xop[0], xop[2]))
-	{
-	  gcc_assert (REGNO (xop[0]) == REGNO (xop[2]));
-
-	  if (MINUS == code)
-	    return;
-	}
+      if (MINUS == code
+	  && REGNO (xop[0]) == REGNO (xop[2]))
+	return;
 
       goto saturate;
     }
@@ -9187,8 +8864,7 @@ avr_out_plus_1 (rtx insn, rtx *xop, int *plen, enum rtx_code code,
 
   if (SS_PLUS == code_sat && MINUS == code
       && sign < 0
-      && 0x80 == (INTVAL (simplify_gen_subreg (QImode, xval, imode, n_bytes-1))
-		  & GET_MODE_MASK (QImode)))
+      && 0x80 == avr_uint8 (xval, n_bytes - 1))
     {
       /* We compute x + 0x80 by means of SUB instructions.  We negated the
 	 constant subtrahend above and are left with  x - (-128)  so that we
@@ -9197,7 +8873,7 @@ avr_out_plus_1 (rtx insn, rtx *xop, int *plen, enum rtx_code code,
 	 where this must be done is when NEG overflowed in case [2s] because
 	 the V computation needs the right sign of the subtrahend.  */
 
-      rtx msb = simplify_gen_subreg (QImode, xop[0], mode, n_bytes - 1);
+      rtx msb = avr_byte (xop[0], n_bytes - 1);
 
       avr_asm_len ("subi %0,128" CR_TAB
 		   "brmi 0f", &msb, plen, 2);
@@ -9209,8 +8885,8 @@ avr_out_plus_1 (rtx insn, rtx *xop, int *plen, enum rtx_code code,
   for (int i = 0; i < n_bytes; i++)
     {
       /* We operate byte-wise on the destination.  */
-      rtx reg8 = simplify_gen_subreg (QImode, xop[0], mode, i);
-      rtx xval8 = simplify_gen_subreg (QImode, xval, imode, i);
+      rtx reg8 = avr_byte (xop[0], i);
+      rtx xval8 = avr_byte (xval, i);
 
       /* 8-bit value to operate with this byte. */
       unsigned int val8 = UINTVAL (xval8) & GET_MODE_MASK (QImode);
@@ -9228,8 +8904,7 @@ avr_out_plus_1 (rtx insn, rtx *xop, int *plen, enum rtx_code code,
 	  && i + 2 <= n_bytes
 	  && avr_adiw_reg_p (reg8))
 	{
-	  rtx xval16 = simplify_gen_subreg (HImode, xval, imode, i);
-	  unsigned int val16 = UINTVAL (xval16) & GET_MODE_MASK (HImode);
+	  unsigned int val16 = avr_uint16 (xval, i);
 
 	  /* Registers R24, X, Y, Z can use ADIW/SBIW with constants < 64
 	     i.e. operate word-wise.  */
@@ -9255,12 +8930,12 @@ avr_out_plus_1 (rtx insn, rtx *xop, int *plen, enum rtx_code code,
 	  // When that pass adjusts the frame pointer, then we know that
 	  // reg Y points to ordinary memory, and the only side-effect
 	  // of -Y and Y+ is the side effect on Y.
-	  && avr_fuse_add >= 2
+	  && avropt_fuse_add >= 2
 	  && frame_pointer_needed
 	  && REGNO (xop[0]) == FRAME_POINTER_REGNUM)
 	{
-	  if (INSN_P (insn)
-	      && _reg_unused_after (as_a <rtx_insn *> (insn), xop[0], false))
+	  if (insn
+	      && _reg_unused_after (insn, xop[0], false))
 	    return;
 
 	  if (AVR_HAVE_8BIT_SP)
@@ -9299,9 +8974,9 @@ avr_out_plus_1 (rtx insn, rtx *xop, int *plen, enum rtx_code code,
 	{
 	case PLUS:
 
-	  gcc_assert (plen != NULL || (op[2] && REG_P (op[2])));
+	  gcc_assert (plen != nullptr || (op[2] && REG_P (op[2])));
 
-	  if (plen != NULL && UNKNOWN != code_sat)
+	  if (plen != nullptr && UNKNOWN != code_sat)
 	    {
 	      /* This belongs to the x + 0x80 corner case.  The code with
 		 ADD instruction is not smaller, thus make this case
@@ -9325,7 +9000,7 @@ avr_out_plus_1 (rtx insn, rtx *xop, int *plen, enum rtx_code code,
 	    avr_asm_len (started ? "sbci %0,%1" : "subi %0,%1", op, plen, 1);
 	  else
 	    {
-	      gcc_assert (plen != NULL || REG_P (op[2]));
+	      gcc_assert (plen != nullptr || REG_P (op[2]));
 
 	      if (clobber_val != (int) val8)
 		avr_asm_len ("ldi %2,%1", op, plen, 1);
@@ -9338,7 +9013,7 @@ avr_out_plus_1 (rtx insn, rtx *xop, int *plen, enum rtx_code code,
 
 	default:
 	  /* Unknown code */
-	  gcc_unreachable();
+	  gcc_unreachable ();
 	}
 
       started = true;
@@ -9355,7 +9030,7 @@ avr_out_plus_1 (rtx insn, rtx *xop, int *plen, enum rtx_code code,
      We have to compute  A = A <op> B  where  A  is a register and
      B is a register or a non-zero compile time constant CONST.
      A is register class "r" if unsigned && B is REG.  Otherwise, A is in "d".
-     B stands for the original operand $2 in INSN.  In the case of B = CONST,
+     B stands for the original operand $2 in XINSN.  In the case of B = CONST,
      SIGN in { -1, 1 } is the sign of B.  Otherwise, SIGN is 0.
 
      CODE is the instruction flavor we use in the asm sequence to perform <op>.
@@ -9381,13 +9056,10 @@ avr_out_plus_1 (rtx insn, rtx *xop, int *plen, enum rtx_code code,
      s+  =  b < 0  ?  -0x80 :  0x7f
      s-  =  b < 0  ?   0x7f : -0x80
 
-     The cases a - b actually perform  a - (-(-b))  if B is CONST.
-  */
+     The cases a - b actually perform  a - (-(-b))  if B is CONST.  */
 
-  op[0] = simplify_gen_subreg (QImode, xop[0], mode, n_bytes-1);
-  op[1] = n_bytes > 1
-    ? simplify_gen_subreg (QImode, xop[0], mode, n_bytes-2)
-    : NULL_RTX;
+  op[0] = avr_byte (xop[0], n_bytes - 1);
+  op[1] = n_bytes > 1 ? avr_byte (xop[0], n_bytes - 2) : NULL_RTX;
 
   bool need_copy = true;
   int len_call = 1 + AVR_HAVE_JMP_CALL;
@@ -9395,7 +9067,7 @@ avr_out_plus_1 (rtx insn, rtx *xop, int *plen, enum rtx_code code,
   switch (code_sat)
     {
     default:
-      gcc_unreachable();
+      gcc_unreachable ();
 
     case SS_PLUS:
     case SS_MINUS:
@@ -9420,7 +9092,7 @@ avr_out_plus_1 (rtx insn, rtx *xop, int *plen, enum rtx_code code,
 	{
 	  /* [1s,reg] */
 
-	  op[2] = simplify_gen_subreg (QImode, xop[2], mode, n_bytes-1);
+	  op[2] = avr_byte (xop[2], n_bytes - 1);
 
 	  if (n_bytes == 1)
 	    avr_asm_len ("ldi %0,0x80" CR_TAB
@@ -9436,7 +9108,7 @@ avr_out_plus_1 (rtx insn, rtx *xop, int *plen, enum rtx_code code,
 	{
 	  /* [3s,reg] */
 
-	  op[2] = simplify_gen_subreg (QImode, xop[2], mode, n_bytes-1);
+	  op[2] = avr_byte (xop[2], n_bytes - 1);
 
 	  if (n_bytes == 1)
 	    avr_asm_len ("ldi %0,0x7f" CR_TAB
@@ -9480,7 +9152,7 @@ avr_out_plus_1 (rtx insn, rtx *xop, int *plen, enum rtx_code code,
 	    avr_asm_len ("ldi %1,0xff", op, plen, 1);
 	}
       else
-	gcc_unreachable();
+	gcc_unreachable ();
 
       break;
 
@@ -9577,7 +9249,7 @@ avr_out_plus_1 (rtx insn, rtx *xop, int *plen, enum rtx_code code,
    are additions/subtraction for pointer modes, i.e. HImode and PSImode.  */
 
 static const char *
-avr_out_plus_symbol (rtx *xop, enum rtx_code code, int *plen)
+avr_out_plus_symbol (rtx *xop, rtx_code code, int *plen)
 {
   machine_mode mode = GET_MODE (xop[0]);
 
@@ -9633,8 +9305,8 @@ avr_out_plus (rtx insn, rtx *xop, int *plen, bool out_label)
   machine_mode mode = GET_MODE (xdest);
   scalar_int_mode imode = int_mode_for_mode (mode).require ();
   int n_bytes = GET_MODE_SIZE (mode);
-  enum rtx_code code_sat = GET_CODE (SET_SRC (xpattern));
-  enum rtx_code code
+  rtx_code code_sat = GET_CODE (SET_SRC (xpattern));
+  rtx_code code
     = (PLUS == code_sat || SS_PLUS == code_sat || US_PLUS == code_sat
        ? PLUS : MINUS);
 
@@ -9677,8 +9349,7 @@ avr_out_plus (rtx insn, rtx *xop, int *plen, bool out_label)
 
   /* Saturation will need the sign of the original operand.  */
 
-  rtx xmsb = simplify_gen_subreg (QImode, op[2], imode, n_bytes-1);
-  int sign = INTVAL (xmsb) < 0 ? -1 : 1;
+  int sign = avr_int8 (op[2], n_bytes - 1) < 0 ? -1 : 1;
 
   /* If we subtract and the subtrahend is a constant, then negate it
      so that avr_out_plus_1 can be used.  */
@@ -9694,9 +9365,49 @@ avr_out_plus (rtx insn, rtx *xop, int *plen, bool out_label)
   if (plen)
     *plen = (len_minus <= len_plus) ? len_minus : len_plus;
   else if (len_minus <= len_plus)
-    avr_out_plus_1 (insn, op, NULL, MINUS, code_sat, sign, out_label);
+    avr_out_plus_1 (insn, op, nullptr, MINUS, code_sat, sign, out_label);
+  else
+    avr_out_plus_1 (insn, op, nullptr, PLUS, code_sat, sign, out_label);
+
+  return "";
+}
+
+
+/* Output an addition with a compile-time constant that sets SREG.N:
+
+      XOP[0] += XOP[1]
+
+   where XOP[0] is a HI, PSI or SI register, and XOP[1] is a register or a
+   compile-time constant.  XOP[2] is SCRATCH or a QI clobber reg.  Return "".
+
+   If PLEN == NULL output the instructions.
+   If PLEN != NULL set *PLEN to the length of the sequence in words.  */
+
+const char *
+avr_out_plus_set_N (rtx *xop, int *plen)
+{
+  gcc_assert (xop[1] != const0_rtx);
+
+  // The output function for vanilla additions, avr_out_plus_1, can be
+  // used because it always issues an operation on the MSB (except when
+  // the addend is zero).
+
+  rtx op[] = { xop[0], xop[0], xop[1], xop[2] };
+
+  if (REG_P (xop[1]))
+    {
+      avr_out_plus_1 (NULL_RTX, op, plen, PLUS, UNKNOWN, 0, false);
+    }
   else
-    avr_out_plus_1 (insn, op, NULL, PLUS, code_sat, sign, out_label);
+    {
+      int len_plus, len_minus;
+
+      avr_out_plus_1 (NULL_RTX, op, &len_plus,  PLUS,  UNKNOWN, 0, false);
+      avr_out_plus_1 (NULL_RTX, op, &len_minus, MINUS, UNKNOWN, 0, false);
+
+      avr_out_plus_1 (NULL_RTX, op, plen, len_minus < len_plus ? MINUS : PLUS,
+		      UNKNOWN, 0, false);
+    }
 
   return "";
 }
@@ -9704,8 +9415,9 @@ avr_out_plus (rtx insn, rtx *xop, int *plen, bool out_label)
 
 /* Output an instruction sequence for addition of REG in XOP[0] and CONST_INT
    in XOP[1] in such a way that SREG.Z and SREG.N are set according to the
-   result.  XOP[2] might be a d-regs clobber register.  If XOP[2] is SCRATCH,
-   then the addition can be performed without a clobber reg.  Return "".
+   result.  The mode is HI, PSI or SI.  XOP[2] might be a d-regs clobber
+   register.  If XOP[2] is SCRATCH, then the addition can be performed
+   without a clobber reg.  Return "".
 
    If PLEN == NULL, then output the instructions.
    If PLEN != NULL, then set *PLEN to the length of the sequence in words. */
@@ -9725,21 +9437,12 @@ avr_out_plus_set_ZN (rtx *xop, int *plen)
   // Number of bytes to operate on.
   int n_bytes = GET_MODE_SIZE (mode);
 
-  if (n_bytes == 1)
-    {
-      if (INTVAL (xval) == 1)
-	return avr_asm_len ("inc %0", xop, plen, 1);
-
-      if (INTVAL (xval) == -1)
-	return avr_asm_len ("dec %0", xop, plen, 1);
-    }
-
   if (n_bytes == 2
       && avr_adiw_reg_p (xreg)
       && IN_RANGE (INTVAL (xval), 1, 63))
     {
       // Add 16-bit value in [1..63] to a w register.
-      return avr_asm_len ("adiw %0, %1", xop, plen, 1);
+      return avr_asm_len ("adiw %0,%1", xop, plen, 1);
     }
 
   // Addition won't work; subtract the negative of XVAL instead.
@@ -9758,17 +9461,17 @@ avr_out_plus_set_ZN (rtx *xop, int *plen)
   // SBIW'ed in one go.
   for (int i = 0; i < n_bytes; ++i)
     {
-      op[0] = simplify_gen_subreg (QImode, xreg, mode, i);
+      op[0] = avr_byte (xreg, i);
 
       if (i == 0
 	  && n_bytes >= 2
 	  && avr_adiw_reg_p (op[0]))
 	{
-	  op[1] = simplify_gen_subreg (HImode, xval, mode, 0);
+	  op[1] = avr_word (xval, 0);
 	  if (IN_RANGE (INTVAL (op[1]), 0, 63))
 	    {
 	      // SBIW can handle the lower 16 bits.
-	      avr_asm_len ("sbiw %0, %1", op, plen, 1);
+	      avr_asm_len ("sbiw %0,%1", op, plen, 1);
 
 	      // Next byte has already been handled: Skip it.
 	      ++i;
@@ -9776,14 +9479,14 @@ avr_out_plus_set_ZN (rtx *xop, int *plen)
 	    }
 	}
 
-      op[1] = simplify_gen_subreg (QImode, xval, mode, i);
+      op[1] = avr_byte (xval, i);
 
       if (test_hard_reg_class (LD_REGS, op[0]))
 	{
 	  // d-regs can subtract immediates.
 	  avr_asm_len (i == 0
-		       ? "subi %0, %1"
-		       : "sbci %0, %1", op, plen, 1);
+		       ? "subi %0,%1"
+		       : "sbci %0,%1", op, plen, 1);
 	}
       else
 	{
@@ -9792,8 +9495,8 @@ avr_out_plus_set_ZN (rtx *xop, int *plen)
 	    {
 	      // Any register can subtract 0.
 	      avr_asm_len (i == 0
-			   ? "sub %0, __zero_reg__"
-			   : "sbc %0, __zero_reg__", op, plen, 1);
+			   ? "sub %0,__zero_reg__"
+			   : "sbc %0,__zero_reg__", op, plen, 1);
 	    }
 	  else
 	    {
@@ -9803,13 +9506,13 @@ avr_out_plus_set_ZN (rtx *xop, int *plen)
 		{
 		  // Load partial xval to QI clobber reg and memoize for later.
 		  gcc_assert (REG_P (op[2]));
-		  avr_asm_len ("ldi %2, %1", op, plen, 1);
+		  avr_asm_len ("ldi %2,%1", op, plen, 1);
 		  clobber_val = val8;
 		}
 
 	      avr_asm_len (i == 0
-			   ? "sub %0, %2"
-			   : "sbc %0, %2", op, plen, 1);
+			   ? "sub %0,%2"
+			   : "sbc %0,%2", op, plen, 1);
 	    }
 	}
     } // Loop bytes.
@@ -9818,6 +9521,136 @@ avr_out_plus_set_ZN (rtx *xop, int *plen)
 }
 
 
+/* A helper worker for `op8_ZN_operator'.  Allow
+
+     OP0 <code> OP1
+
+  QImode operations that set SREG.N and SREG.Z in a usable way.
+  these are:
+
+  * OP0 is a QImode register, and
+  * OP1 is a QImode register or CONST_INT, and
+
+  the allowed operations is one of:
+
+  * SHIFTs with a const_int offset in { 1, 2, 3 }.
+  * MINUS and XOR with a register operand
+  * IOR and AND with a register operand, or d-reg + const_int
+  * PLUS with a register operand, or d-reg + const_int,
+    or a const_int in { -2, -1, 1, 2 }.  */
+
+bool
+avr_op8_ZN_operator (rtx op)
+{
+  const rtx_code code = GET_CODE (op);
+  rtx op0 = XEXP (op, 0);
+  rtx op1 = XEXP (op, 1);
+
+  if (! register_operand (op0, QImode)
+      || ! (register_operand (op1, QImode)
+	    || const_int_operand (op1, QImode)))
+    return false;
+
+  const bool reg1_p = REG_P (op1);
+  const bool ld_reg0_p = test_hard_reg_class (LD_REGS, op0);
+
+  switch (code)
+    {
+    default:
+      break;
+
+    case ASHIFT:
+    case ASHIFTRT:
+    case LSHIFTRT:
+      return const_1_to_3_operand (op1, QImode);
+
+    case MINUS:
+    case XOR:
+      return reg1_p;
+
+    case IOR:
+    case AND:
+      return reg1_p || ld_reg0_p;
+
+    case PLUS:
+      return reg1_p || ld_reg0_p || abs1_abs2_operand (op1, QImode);
+    }
+
+  return false;
+}
+
+
+/* Output a QImode instruction sequence for
+
+      XOP[0] = XOP[0]  <CODE>  XOP[2]
+
+   where XOP[0] is a register, and the possible operands and CODEs
+   are according to  `avr_op8_ZN_operator'  from above.  Return "".
+
+   If PLEN == NULL, then output the instructions.
+   If PLEN != NULL, then set *PLEN to the length of the sequence in words.  */
+
+const char *
+avr_out_op8_set_ZN (rtx_code code, rtx *xop, int *plen)
+{
+  const bool reg2_p = REG_P (xop[2]);
+  const int ival = CONST_INT_P (xop[2]) ? (int) INTVAL (xop[2]) : 0;
+
+  gcc_assert (op8_ZN_operator (gen_rtx_fmt_ee (code, QImode, xop[0], xop[2]),
+			       QImode));
+  if (plen)
+    *plen = 0;
+
+  const char *tpl = nullptr;
+  int times = 1;
+
+  if (code == ASHIFT)
+    tpl = "lsl %0", times = ival;
+  else if (code == LSHIFTRT)
+    tpl = "lsr %0", times = ival;
+  else if (code == ASHIFTRT)
+    tpl = "asr %0", times = ival;
+  else if (code == MINUS)
+    tpl = "sub %0,%2";
+  else if (code == XOR)
+    tpl = "eor %0,%2";
+  else if (code == AND)
+    tpl = reg2_p ? "and %0,%2" : "andi %0,lo8(%2)";
+  else if (code == IOR)
+    tpl = reg2_p ? "or %0,%2" : "ori %0,lo8(%2)";
+  else if (code == PLUS)
+    {
+      if (ival
+	  && ! test_hard_reg_class (LD_REGS, xop[0]))
+	{
+	  tpl = ival > 0 ? "inc %0" : "dec %0";
+	  times = std::abs (ival);
+	}
+      else
+	tpl = reg2_p ? "add %0,%2" : "subi %0,lo8(%n2)";
+    }
+  else
+    gcc_unreachable ();
+
+  for (int i = 0; i < times; ++i)
+    avr_asm_len (tpl, xop, plen, 1);
+
+  return "";
+}
+
+
+/* Used in the "length" attribute of insn "*op8.for.cczn.<code>".  */
+
+int
+avr_len_op8_set_ZN (rtx_code code, rtx *xop)
+{
+  int len;
+  (void) avr_out_op8_set_ZN (code, xop, &len);
+
+  return len;
+}
+
+
 /* Output bit operation (IOR, AND, XOR) with register XOP[0] and compile
    time constant XOP[2]:
 
@@ -9827,14 +9660,18 @@ avr_out_plus_set_ZN (rtx *xop, int *plen)
    operation; otherwise, set *PLEN to the length of the instruction sequence
    (in words) printed with PLEN == NULL.  XOP[3] is either an 8-bit clobber
    register or SCRATCH if no clobber register is needed for the operation.
-   INSN is an INSN_P or a pattern of an insn.  */
+   XINSN is an INSN_P or a pattern of an insn.  */
 
 const char *
-avr_out_bitop (rtx insn, rtx *xop, int *plen)
+avr_out_bitop (rtx xinsn, rtx *xop, int *plen)
 {
+  rtx_insn *insn = xinsn && INSN_P (xinsn)
+    ? as_a <rtx_insn *> (xinsn)
+    : nullptr;
+  rtx xpattern = insn ? single_set (insn) : xinsn;
+
   /* CODE and MODE of the operation.  */
-  rtx xpattern = INSN_P (insn) ? single_set (as_a <rtx_insn *> (insn)) : insn;
-  enum rtx_code code = GET_CODE (SET_SRC (xpattern));
+  rtx_code code = GET_CODE (SET_SRC (xpattern));
   machine_mode mode = GET_MODE (xop[0]);
 
   /* Number of bytes to operate on.  */
@@ -9861,11 +9698,14 @@ avr_out_bitop (rtx insn, rtx *xop, int *plen)
   for (int i = 0; i < n_bytes; i++)
     {
       /* We operate byte-wise on the destination.  */
-      rtx reg8 = simplify_gen_subreg (QImode, xop[0], mode, i);
-      rtx xval8 = simplify_gen_subreg (QImode, xop[2], mode, i);
+      rtx reg8 = avr_byte (xop[0], i);
+
+      if (optimize
+	  && avr_result_regno_unused_p (insn, REGNO (reg8)))
+	continue;
 
       /* 8-bit value to operate with this byte. */
-      unsigned int val8 = UINTVAL (xval8) & GET_MODE_MASK (QImode);
+      unsigned int val8 = avr_uint8 (xop[2], i);
 
       /* Number of bits set in the current byte of the constant.  */
       int pop8 = popcount_hwi (val8);
@@ -9963,7 +9803,7 @@ avr_out_bitop (rtx insn, rtx *xop, int *plen)
 
 	default:
 	  /* Unknown rtx_code */
-	  gcc_unreachable();
+	  gcc_unreachable ();
 	}
     } /* for all sub-bytes */
 
@@ -9984,12 +9824,12 @@ void
 avr_emit_xior_with_shift (rtx_insn *insn, rtx *xop, int bitoff)
 {
   rtx src = SET_SRC (single_set (insn));
-  RTX_CODE xior = GET_CODE (src);
+  rtx_code xior = GET_CODE (src);
   gcc_assert (xior == XOR || xior == IOR);
   gcc_assert (bitoff % 8 == 0);
 
   // Work out the shift offset in bytes; negative for shift right.
-  RTX_CODE shift = GET_CODE (XEXP (src, 0));
+  rtx_code shift = GET_CODE (XEXP (src, 0));
   int byteoff = 0?0
     : shift == ASHIFT ? bitoff / 8
     : shift == LSHIFTRT ? -bitoff / 8
@@ -10185,7 +10025,7 @@ avr_out_insert_notbit (rtx_insn *insn, rtx op[], int *plen)
    -  <Shift> is any of: ASHIFT, LSHIFTRT, ASHIFTRT.
    -  The result depends on XOP[1].
    or  XOP[0] = XOP[1] & XOP[2]  where
-   -  XOP[0] and XOP[1] have the same mode which is one of: HI, PSI, SI.
+   -  XOP[0] and XOP[1] have the same mode which is one of: QI, HI, PSI, SI.
    -  XOP[2] is an exact const_int power of 2.
    Returns "".
    PLEN != 0: Set *PLEN to the code length in words.  Don't output anything.
@@ -10214,7 +10054,7 @@ avr_out_insv (rtx_insn *insn, rtx xop[], int *plen)
     }
 
   // Any of ASHIFT, LSHIFTRT, ASHIFTRT.
-  enum rtx_code code = GET_CODE (XEXP (xsrc, 0));
+  rtx_code code = GET_CODE (XEXP (xsrc, 0));
   int shift = code == ASHIFT ? INTVAL (xop2) : -INTVAL (xop2);
 
   // Determines the position of the output bit.
@@ -10231,11 +10071,9 @@ avr_out_insv (rtx_insn *insn, rtx xop[], int *plen)
   rtx op[4] =
     {
       // Output
-      simplify_gen_subreg (QImode, xop[0], mode, obit / 8),
-      GEN_INT (obit & 7),
+      avr_byte (xop[0], obit / 8), GEN_INT (obit & 7),
       // Input
-      simplify_gen_subreg (QImode, xop[1], mode, ibit / 8),
-      GEN_INT (ibit & 7)
+      avr_byte (xop[1], ibit / 8), GEN_INT (ibit & 7)
     };
   obit &= 7;
   ibit &= 7;
@@ -10322,7 +10160,7 @@ avr_out_insv (rtx_insn *insn, rtx xop[], int *plen)
     {
       for (int b = 0; b < n_bytes; ++b)
 	{
-	  rtx byte = simplify_gen_subreg (QImode, xop[0], mode, b);
+	  rtx byte = avr_byte (xop[0], b);
 	  if (REGNO (byte) != REGNO (op[0]))
 	    avr_asm_len ("clr %0", &byte, plen, 1);
 	}
@@ -10342,7 +10180,7 @@ avr_out_insv (rtx_insn *insn, rtx xop[], int *plen)
   else
     for (int b = 0; b < n_bytes; ++b)
       {
-	rtx byte = simplify_gen_subreg (QImode, xop[0], mode, b);
+	rtx byte = avr_byte (xop[0], b);
 	avr_asm_len ("clr %0", &byte, plen, 1);
       }
 
@@ -10366,7 +10204,7 @@ avr_out_extr (rtx_insn *insn, rtx xop[], int *plen)
 
   if (GET_MODE (src) != QImode)
     {
-      src = xop[1] = simplify_gen_subreg (QImode, src, GET_MODE (src), bit / 8);
+      src = xop[1] = avr_byte (src, bit / 8);
       bit %= 8;
       xop[2] = GEN_INT (bit);
     }
@@ -10493,7 +10331,7 @@ const char *
 avr_out_fract (rtx_insn *insn, rtx operands[], bool intsigned, int *plen)
 {
   rtx xop[6];
-  RTX_CODE shift = UNKNOWN;
+  rtx_code shift = UNKNOWN;
   bool sign_in_carry = false;
   bool msb_in_carry = false;
   bool lsb_in_tmp_reg = false;
@@ -10579,7 +10417,7 @@ avr_out_fract (rtx_insn *insn, rtx operands[], bool intsigned, int *plen)
   else if (dest.fbit % 8 == src.fbit % 8)
     shift = UNKNOWN;
   else
-    gcc_unreachable();
+    gcc_unreachable ();
 
   /* If we need to round the fraction part, we might need to save/round it
      before clobbering any of it in Step 1.  Also, we might want to do
@@ -10627,7 +10465,7 @@ avr_out_fract (rtx_insn *insn, rtx operands[], bool intsigned, int *plen)
 		       &all_regs_rtx[s0], plen, 1);
 	  for (sn = src.regno + src.fbyte; sn <= copied_msb; sn++)
 	    avr_asm_len ("sbci %0,255", &all_regs_rtx[sn], plen, 1);
-	  avr_asm_len ("\n0:", NULL, plen, 0);
+	  avr_asm_len ("\n0:", nullptr, plen, 0);
 	  frac_rounded = true;
 	}
       else if (use_src && overlap)
@@ -10724,7 +10562,7 @@ avr_out_fract (rtx_insn *insn, rtx operands[], bool intsigned, int *plen)
       // First gather what code to emit (if any) and additional step to
       // apply if a MOVW is in use.  xop[2] is destination rtx and xop[3]
       // is the source rtx for the current loop iteration.
-      const char *code = NULL;
+      const char *code = nullptr;
       int stepw = 0;
 
       if (clr0)
@@ -10834,7 +10672,7 @@ avr_out_fract (rtx_insn *insn, rtx operands[], bool intsigned, int *plen)
 
 	  /* Overflow goes with set carry.  Clear carry otherwise.  */
 	  avr_asm_len ("brvs 0f" CR_TAB
-		       "clc\n0:", NULL, plen, 2);
+		       "clc\n0:", nullptr, plen, 2);
 	}
       /* Likewise, when converting from accumulator types to integer, we
 	 need to round up negative values.  */
@@ -10881,19 +10719,19 @@ avr_out_fract (rtx_insn *insn, rtx operands[], bool intsigned, int *plen)
 	  else
 	    {
 	      /* Fall back to use __zero_reg__ as a temporary.  */
-	      avr_asm_len ("dec __zero_reg__", NULL, plen, 1);
+	      avr_asm_len ("dec __zero_reg__", nullptr, plen, 1);
 	      if (have_carry)
 		avr_asm_len ("clt" CR_TAB
-			     "bld __zero_reg__,7", NULL, plen, 2);
+			     "bld __zero_reg__,7", nullptr, plen, 2);
 	      else
-		avr_asm_len ("lsr __zero_reg__", NULL, plen, 1);
+		avr_asm_len ("lsr __zero_reg__", nullptr, plen, 1);
 	      avr_asm_len (have_carry && lsb_in_tmp_reg
 			   ? "adc __tmp_reg__,__zero_reg__"
 			   : have_carry ? "adc %2,__zero_reg__"
 			   : lsb_in_tmp_reg ? "add __tmp_reg__,__zero_reg__"
 			   : "add %2,__zero_reg__",
 			   xop, plen, 1);
-	      avr_asm_len ("eor __zero_reg__,__zero_reg__", NULL, plen, 1);
+	      avr_asm_len ("eor __zero_reg__,__zero_reg__", nullptr, plen, 1);
 	    }
 
 	  for (d0 = dest.regno + zero_bytes;
@@ -11031,8 +10869,8 @@ avr_out_round (rtx_insn * /*insn*/, rtx *xop, int *plen)
   wide_int wi_add = wi::set_bit_in_zero (fbit-1 - INTVAL (xop[2]),
 					 GET_MODE_PRECISION (imode));
   // Lengths of PLUS and AND parts.
-  int len_add = 0, *plen_add = plen ? &len_add : NULL;
-  int len_and = 0, *plen_and = plen ? &len_and : NULL;
+  int len_add = 0, *plen_add = plen ? &len_add : nullptr;
+  int len_and = 0, *plen_and = plen ? &len_and : nullptr;
 
   // Add-Saturate  1/2 * 2^(-RP).  Don't print the label "0:" when printing
   // the saturated addition so that we can emit the "rjmp 1f" before the
@@ -11052,7 +10890,7 @@ avr_out_round (rtx_insn * /*insn*/, rtx *xop, int *plen)
   avr_out_plus (xpattern, op, plen_add, false /* Don't print "0:" */);
 
   avr_asm_len ("rjmp 1f" CR_TAB
-	       "0:", NULL, plen_add, 1);
+	       "0:", nullptr, plen_add, 1);
 
   // Keep  all bits from RP and higher:   ... 2^(-RP)
   // Clear all bits from RP+1 and lower:              2^(-RP-1) ...
@@ -11068,7 +10906,7 @@ avr_out_round (rtx_insn * /*insn*/, rtx *xop, int *plen)
   op[2] = xmask;
   op[3] = gen_rtx_SCRATCH (QImode);
   avr_out_bitop (xpattern, op, plen_and);
-  avr_asm_len ("1:", NULL, plen, 0);
+  avr_asm_len ("1:", nullptr, plen, 0);
 
   if (plen)
     *plen = len_add + len_and;
@@ -11251,7 +11089,7 @@ avr_adjust_insn_length (rtx_insn *insn, int len)
 
   /* Read from insn attribute "adjust_len" if/how length is to be adjusted.  */
 
-  enum attr_adjust_len adjust_len = get_attr_adjust_len (insn);
+  attr_adjust_len adjust_len = get_attr_adjust_len (insn);
 
   if (adjust_len == ADJUST_LEN_NO)
     {
@@ -11277,6 +11115,7 @@ avr_adjust_insn_length (rtx_insn *insn, int len)
     case ADJUST_LEN_EXTR_NOT: avr_out_extr_not (insn, op, &len); break;
     case ADJUST_LEN_EXTR: avr_out_extr (insn, op, &len); break;
     case ADJUST_LEN_INSV: avr_out_insv (insn, op, &len); break;
+    case ADJUST_LEN_SET_SOME: avr_out_set_some (insn, op, &len); break;
 
     case ADJUST_LEN_PLUS: avr_out_plus (insn, op, &len); break;
     case ADJUST_LEN_PLUS_EXT: avr_out_plus_ext (insn, op, &len); break;
@@ -11288,7 +11127,9 @@ avr_adjust_insn_length (rtx_insn *insn, int len)
     case ADJUST_LEN_MOV32: output_movsisf (insn, op, &len); break;
     case ADJUST_LEN_CPYMEM: avr_out_cpymem (insn, op, &len); break;
     case ADJUST_LEN_XLOAD: avr_out_xload (insn, op, &len); break;
+    case ADJUST_LEN_FLOAD: avr_out_fload (insn, op, &len); break;
     case ADJUST_LEN_SEXT: avr_out_sign_extend (insn, op, &len); break;
+    case ADJUST_LEN_SEXTR: avr_out_sextr (insn, op, &len); break;
 
     case ADJUST_LEN_SFRACT: avr_out_fract (insn, op, true, &len); break;
     case ADJUST_LEN_UFRACT: avr_out_fract (insn, op, false, &len); break;
@@ -11301,6 +11142,7 @@ avr_adjust_insn_length (rtx_insn *insn, int len)
     case ADJUST_LEN_COMPARE64: avr_out_compare64 (insn, op, &len); break;
     case ADJUST_LEN_CMP_UEXT: avr_out_cmp_ext (op, ZERO_EXTEND, &len); break;
     case ADJUST_LEN_CMP_SEXT: avr_out_cmp_ext (op, SIGN_EXTEND, &len); break;
+    case ADJUST_LEN_CMP_LSR: avr_out_cmp_lsr (insn, op, &len); break;
 
     case ADJUST_LEN_LSHRQI: lshrqi3_out (insn, op, &len); break;
     case ADJUST_LEN_LSHRHI: lshrhi3_out (insn, op, &len); break;
@@ -11322,162 +11164,21 @@ avr_adjust_insn_length (rtx_insn *insn, int len)
 
     case ADJUST_LEN_INSERT_BITS: avr_out_insert_bits (op, &len); break;
     case ADJUST_LEN_ADD_SET_ZN: avr_out_plus_set_ZN (op, &len); break;
+    case ADJUST_LEN_ADD_SET_N:  avr_out_plus_set_N (op, &len); break;
+
+    case ADJUST_LEN_ADD_GE0: avr_out_add_msb (insn, op, GE, &len); break;
+    case ADJUST_LEN_ADD_LT0: avr_out_add_msb (insn, op, LT, &len); break;
 
     case ADJUST_LEN_INSV_NOTBIT: avr_out_insert_notbit (insn, op, &len); break;
 
     default:
-      gcc_unreachable();
+      gcc_unreachable ();
     }
 
   return len;
 }
 
 
-/* Return true when INSN has a REG_UNUSED note for hard reg REG.
-   rtlanal.cc::find_reg_note() uses == to compare XEXP (link, 0)
-   therefore use a custom function.  */
-
-static bool
-avr_insn_has_reg_unused_note_p (rtx_insn *insn, rtx reg)
-{
-  for (rtx link = REG_NOTES (insn); link; link = XEXP (link, 1))
-    if (REG_NOTE_KIND (link) == REG_UNUSED
-	&& REG_P (XEXP (link, 0))
-	&& REGNO (reg) >= REGNO (XEXP (link, 0))
-	&& END_REGNO (reg) <= END_REGNO (XEXP (link, 0)))
-      return true;
-
-  return false;
-}
-
-
-/* Return nonzero if register REG dead after INSN.  */
-
-int
-reg_unused_after (rtx_insn *insn, rtx reg)
-{
-  return (dead_or_set_p (insn, reg)
-	  || (REG_P (reg) && _reg_unused_after (insn, reg, true)));
-}
-
-/* A helper for the previous function.
-   Return nonzero if REG is not used after INSN.
-   We assume REG is a reload reg, and therefore does
-   not live past labels.  It may live past calls or jumps though.  */
-
-bool
-_reg_unused_after (rtx_insn *insn, rtx reg, bool look_at_insn)
-{
-  if (look_at_insn)
-    {
-      /* If the reg is set by this instruction, then it is safe for our
-	 case.  Disregard the case where this is a store to memory, since
-	 we are checking a register used in the store address.  */
-      rtx set = single_set (insn);
-      if (set && !MEM_P (SET_DEST (set))
-	  && reg_overlap_mentioned_p (reg, SET_DEST (set)))
-	return 1;
-
-      /* This case occurs when fuse-add introduced a POST_INC addressing,
-	 but the address register is unused after.  */
-      if (set)
-	{
-	  rtx mem = MEM_P (SET_SRC (set)) ? SET_SRC (set) : SET_DEST (set);
-	  if (MEM_P (mem)
-	      && reg_overlap_mentioned_p (reg, XEXP (mem, 0))
-	      && avr_insn_has_reg_unused_note_p (insn, reg))
-	    return 1;
-	}
-    }
-
-  while ((insn = NEXT_INSN (insn)))
-    {
-      rtx set;
-      enum rtx_code code = GET_CODE (insn);
-
-#if 0
-      /* If this is a label that existed before reload, then the register
-	 if dead here.  However, if this is a label added by reorg, then
-	 the register may still be live here.  We can't tell the difference,
-	 so we just ignore labels completely.  */
-      if (code == CODE_LABEL)
-	return 1;
-      /* else */
-#endif
-
-      if (!INSN_P (insn))
-	continue;
-
-      if (code == JUMP_INSN)
-	return 0;
-
-      /* If this is a sequence, we must handle them all at once.
-	 We could have for instance a call that sets the target register,
-	 and an insn in a delay slot that uses the register.  In this case,
-	 we must return 0.  */
-      else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
-	{
-	  rtx_sequence *seq = as_a <rtx_sequence *> (PATTERN (insn));
-	  int retval = 0;
-
-	  for (int i = 0; i < seq->len (); i++)
-	    {
-	      rtx_insn *this_insn = seq->insn (i);
-	      rtx set = single_set (this_insn);
-
-	      if (CALL_P (this_insn))
-		code = CALL_INSN;
-	      else if (JUMP_P (this_insn))
-		{
-		  if (INSN_ANNULLED_BRANCH_P (this_insn))
-		    return 0;
-		  code = JUMP_INSN;
-		}
-
-	      if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
-		return 0;
-	      if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
-		{
-		  if (!MEM_P (SET_DEST (set)))
-		    retval = 1;
-		  else
-		    return 0;
-		}
-	      if (set == 0
-		  && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
-		return 0;
-	    }
-	  if (retval == 1)
-	    return 1;
-	  else if (code == JUMP_INSN)
-	    return 0;
-	}
-
-      if (code == CALL_INSN)
-	{
-	  rtx tem;
-	  for (tem = CALL_INSN_FUNCTION_USAGE (insn); tem; tem = XEXP (tem, 1))
-	    if (GET_CODE (XEXP (tem, 0)) == USE
-		&& REG_P (XEXP (XEXP (tem, 0), 0))
-		&& reg_overlap_mentioned_p (reg, XEXP (XEXP (tem, 0), 0)))
-	      return 0;
-	  if (call_used_or_fixed_reg_p (REGNO (reg)))
-	    return 1;
-	}
-
-      set = single_set (insn);
-
-      if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
-	return 0;
-      if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
-	return !MEM_P (SET_DEST (set));
-      if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
-	return 0;
-    }
-  return 1;
-}
-
-
 /* Implement `TARGET_ASM_INTEGER'.  */
 /* Target hook for assembling integer objects.  The AVR version needs
    special handling for references to certain labels.  */
@@ -11517,10 +11218,7 @@ avr_assemble_integer (rtx x, unsigned int size, int aligned_p)
       /* varasm fails to handle big fixed modes that don't fit in hwi.  */
 
       for (unsigned n = 0; n < size; n++)
-	{
-	  rtx xn = simplify_gen_subreg (QImode, x, GET_MODE (x), n);
-	  default_assemble_integer (xn, 1, aligned_p);
-	}
+	default_assemble_integer (avr_byte (x, n), 1, aligned_p);
 
       return true;
     }
@@ -11534,13 +11232,14 @@ avr_assemble_integer (rtx x, unsigned int size, int aligned_p)
   return default_assemble_integer (x, size, aligned_p);
 }
 
+
 /* Implement `TARGET_CLASS_MAX_NREGS'.  Reasons described in comments for
    avr_hard_regno_nregs. */
 
 static unsigned char
 avr_class_max_nregs (reg_class_t rclass, machine_mode mode)
 {
-  if (rclass == CC_REG && mode == CCmode)
+  if (rclass == CC_REG && GET_MODE_CLASS (mode) == MODE_CC)
     return 1;
 
   return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
@@ -11607,6 +11306,7 @@ avr_handle_progmem_attribute (tree *node, tree name, tree args,
   return NULL_TREE;
 }
 
+
 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
    struct attribute_spec.handler.  */
 
@@ -11705,7 +11405,7 @@ avr_handle_addr_attribute (tree *node, tree name, tree args,
       else
 	{
 	  tree attribs = DECL_ATTRIBUTES (*node);
-	  const char *names[] = { "io", "io_low", "address", NULL };
+	  const char *names[] = { "io", "io_low", "address", nullptr };
 	  for (const char **p = names; *p; p++)
 	    {
 	      tree other = lookup_attribute (*p, attribs);
@@ -11804,12 +11504,12 @@ avr_addr_space_supported_p (addr_space_t as, location_t loc)
 		  "Tiny devices");
       return false;
     }
-  else if (avr_addrspace[as].segment >= avr_n_flash)
+  else if (avr_addrspace[as].segment >= avropt_n_flash)
     {
       if (loc != UNKNOWN_LOCATION)
 	error_at (loc, "address space %qs not supported for devices with "
 		  "flash size up to %d KiB", avr_addrspace[as].name,
-		  64 * avr_n_flash);
+		  64 * avropt_n_flash);
       return false;
     }
 
@@ -11854,7 +11554,8 @@ avr_progmem_p (tree decl, tree attributes)
   if (TREE_CODE (decl) != VAR_DECL)
     return 0;
 
-  if (avr_decl_memx_p (decl))
+  if (avr_decl_memx_p (decl)
+      || avr_decl_flashx_p (decl))
     return 2;
 
   if (avr_decl_flash_p (decl))
@@ -11867,7 +11568,7 @@ avr_progmem_p (tree decl, tree attributes)
   tree a = decl;
 
   do
-    a = TREE_TYPE(a);
+    a = TREE_TYPE (a);
   while (TREE_CODE (a) == ARRAY_TYPE);
 
   if (a == error_mark_node)
@@ -11943,7 +11644,7 @@ avr_nonconst_pointer_addrspace (tree typ)
 static bool
 avr_pgm_check_var_decl (tree node)
 {
-  const char *reason = NULL;
+  const char *reason = nullptr;
 
   addr_space_t as = ADDR_SPACE_GENERIC;
 
@@ -11995,7 +11696,7 @@ avr_pgm_check_var_decl (tree node)
 	       avr_addrspace[as].name, reason, node);
     }
 
-  return reason == NULL;
+  return reason == nullptr;
 }
 
 
@@ -12003,7 +11704,7 @@ avr_pgm_check_var_decl (tree node)
    attributes named NAME, where NAME is in { "signal", "interrupt" }.  */
 
 static void
-avr_handle_isr_attribute (tree, tree *attrs, const char *name)
+avr_handle_isr_attribute (tree node, tree *attrs, const char *name)
 {
   bool seen = false;
 
@@ -12013,9 +11714,14 @@ avr_handle_isr_attribute (tree, tree *attrs, const char *name)
       seen = true;
       for (tree v = TREE_VALUE (list); v; v = TREE_CHAIN (v))
 	{
-	  if (! avr_isr_number (TREE_VALUE (v)))
+	  int num = avr_isr_number (TREE_VALUE (v));
+	  if (! num)
 	    error ("attribute %qs expects a constant positive integer"
 		   " argument", name);
+	  if (TARGET_CVT
+	      && num >= 4)
+	    error ("vector number %d of %q+D is out of range 1%s3 for"
+		   " compact vector table", num, node, "...");
 	}
     }
 
@@ -12026,6 +11732,24 @@ avr_handle_isr_attribute (tree, tree *attrs, const char *name)
     }
 }
 
+
+/* Helper for `avr_insert_attributes'.
+   Return the section name from attribute "section" in attribute list ATTRS.
+   When no "section" attribute is present, then return nullptr.  */
+
+static const char *
+avr_attrs_section_name (tree attrs)
+{
+  if (tree a_sec = lookup_attribute ("section", attrs))
+    if (TREE_VALUE (a_sec))
+      if (tree t_section_name = TREE_VALUE (TREE_VALUE (a_sec)))
+	if (TREE_CODE (t_section_name) == STRING_CST)
+	  return TREE_STRING_POINTER (t_section_name);
+
+  return nullptr;
+}
+
+
 /* Implement `TARGET_INSERT_ATTRIBUTES'.  */
 
 static void
@@ -12035,7 +11759,7 @@ avr_insert_attributes (tree node, tree *attributes)
       && ! TREE_STATIC (node)
       && ! DECL_EXTERNAL (node))
     {
-      const char *names[] = { "io", "io_low", "address", NULL };
+      const char *names[] = { "io", "io_low", "address", nullptr };
       for (const char **p = names; *p; ++p)
 	if (lookup_attribute (*p, *attributes))
 	  error ("variable %q+D with attribute %qs must be located in "
@@ -12058,6 +11782,51 @@ avr_insert_attributes (tree node, tree *attributes)
 			       NULL, *attributes);
     }
 
+  const char *section_name = avr_attrs_section_name (*attributes);
+
+  // When the function is in an .initN or .finiN section, then add "used"
+  // since such functions are never called.
+  if (section_name
+      && strlen (section_name) == strlen (".init*")
+      && IN_RANGE (section_name[5], '0', '9')
+      && (startswith (section_name, ".init")
+	  || startswith (section_name, ".fini"))
+      && !lookup_attribute ("used", *attributes))
+    {
+      *attributes = tree_cons (get_identifier ("used"), NULL, *attributes);
+    }
+
+#if defined WITH_AVRLIBC
+  if (avropt_call_main == 0
+      && TREE_CODE (node) == FUNCTION_DECL
+      && MAIN_NAME_P (DECL_NAME (node)))
+    {
+      bool in_init9_p = section_name && !strcmp (section_name, ".init9");
+
+      if (section_name && !in_init9_p)
+	{
+	  warning (OPT_Wattributes, "%<section(\"%s\")%> attribute on main"
+		   " function inhibits %<-mno-call-main%>", section_name);
+	}
+      else
+	{
+	  if (!lookup_attribute ("noreturn", *attributes))
+	    *attributes = tree_cons (get_identifier ("noreturn"),
+				     NULL_TREE, *attributes);
+	  // Put main into section .init9 so that it is executed even
+	  // though it's not called.
+	  if (!in_init9_p)
+	    {
+	      tree init9 = build_string (1 + strlen (".init9"), ".init9");
+	      tree arg = build_tree_list (NULL_TREE, init9);
+	      *attributes = tree_cons (get_identifier ("section"),
+				       arg, *attributes);
+	    }
+	  avr_no_call_main_p = true;
+	}
+    } // -mno-call-main
+#endif // AVR-LibC
+
   avr_handle_isr_attribute (node, attributes, "signal");
   avr_handle_isr_attribute (node, attributes, "interrupt");
 
@@ -12132,10 +11901,10 @@ avr_rodata_in_flash_p ()
       return have_avrxmega3_rodata_in_flash;
 
     case ARCH_AVRXMEGA2:
-      return avr_flmap && have_avrxmega2_flmap && avr_rodata_in_ram != 1;
+      return avropt_flmap && have_avrxmega2_flmap && avropt_rodata_in_ram != 1;
 
     case ARCH_AVRXMEGA4:
-      return avr_flmap && have_avrxmega4_flmap && avr_rodata_in_ram != 1;
+      return avropt_flmap && have_avrxmega4_flmap && avropt_rodata_in_ram != 1;
     }
 
   return false;
@@ -12249,7 +12018,7 @@ avr_output_addr_attrib (tree decl, const char *name,
 	}
       else if (local_p)
 	{
-	  const char *names[] = { "io", "io_low", "address", NULL };
+	  const char *names[] = { "io", "io_low", "address", nullptr };
 	  for (const char **p = names; *p; ++p)
 	    if (lookup_attribute (*p, DECL_ATTRIBUTES (decl)))
 	      {
@@ -12262,7 +12031,7 @@ avr_output_addr_attrib (tree decl, const char *name,
       return true;
     }
 
-  gcc_unreachable();
+  gcc_unreachable ();
 
   return false;
 }
@@ -12305,7 +12074,7 @@ avr_asm_named_section (const char *name, unsigned int flags, tree decl)
       if (startswith (name, old_prefix))
 	{
 	  const char *sname = ACONCAT ((new_prefix,
-					name + strlen (old_prefix), NULL));
+					name + strlen (old_prefix), nullptr));
 	  default_elf_asm_named_section (sname, flags, decl);
 	  return;
 	}
@@ -12378,7 +12147,7 @@ avr_decl_maybe_lds_p (tree node)
 {
   if (!node
       || TREE_CODE (node) != VAR_DECL
-      || DECL_SECTION_NAME (node) != NULL)
+      || DECL_SECTION_NAME (node) != nullptr)
     return false;
 
   /* Don't use LDS for objects that go to .rodata.  The current default
@@ -12602,6 +12371,7 @@ avr_asm_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
   return sect;
 }
 
+
 /* Implement `TARGET_ASM_FILE_START'.  */
 /* Outputs some text at the start of each assembler file.  */
 
@@ -12611,7 +12381,7 @@ avr_file_start (void)
   int sfr_offset = avr_arch->sfr_offset;
 
   if (avr_arch->asm_only)
-    error ("architecture %qs supported for assembler only", avr_mmcu);
+    error ("architecture %qs supported for assembler only", avropt_mmcu);
 
   default_file_start ();
 
@@ -12656,6 +12426,15 @@ avr_file_end (void)
 
   if (avr_need_clear_bss_p)
     fputs (".global __do_clear_bss\n", asm_out_file);
+
+  /* Don't let __call_main call main() and exit().
+     Defining this symbol will keep the code from being pulled
+     in from lib<mcu>.a as requested by AVR-LibC's gcrt1.S.
+     We invoke main() by other means: putting it in .init9.  */
+
+  if (avr_no_call_main_p)
+    fputs (".global __call_main\n"
+	   "__call_main = 0\n", asm_out_file);
 }
 
 
@@ -12846,16 +12625,16 @@ avr_cbranch_cost (rtx x)
 }
 
 
-/* Mutually recursive subroutine of avr_rtx_cost for calculating the
+/* Mutually recursive subroutine of `avr_rtx_cost' for calculating the
    cost of an RTX operand given its context.  X is the rtx of the
    operand, MODE is its mode, and OUTER is the rtx_code of this
    operand's parent operator.  */
 
 static int
-avr_operand_rtx_cost (rtx x, machine_mode mode, enum rtx_code outer,
+avr_operand_rtx_cost (rtx x, machine_mode mode, rtx_code outer,
 		      int opno, bool speed)
 {
-  enum rtx_code code = GET_CODE (x);
+  rtx_code code = GET_CODE (x);
 
   switch (code)
     {
@@ -12877,6 +12656,24 @@ avr_operand_rtx_cost (rtx x, machine_mode mode, enum rtx_code outer,
   return total;
 }
 
+
+/* Return the default shift costs for an n-byte shift with a constant
+   bit offset in terms of cycles (speed) or in terms of words (!speed).  */
+
+static int
+avr_default_shift_costs (int n_bytes, int offset, bool speed)
+{
+  int c_space = 3 + n_bytes;
+  int c_speed = offset <= 4
+    ? (3 + n_bytes) * offset
+    // For larger offsets, don't make the speed costs more costly than
+    // an unrolled shift, because we cannot rollback from an unrolled shift.
+    : n_bytes * offset;
+
+  return COSTS_N_INSNS (speed ? c_speed : c_space);
+}
+
+
 /* Worker function for AVR backend's rtx_cost function.
    X is rtx expression whose cost is to be calculated.
    Return true if the complete cost has been computed.
@@ -12887,8 +12684,68 @@ static bool
 avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
 		 int /*opno*/, int *total, bool speed)
 {
-  enum rtx_code code = GET_CODE (x);
-  HOST_WIDE_INT val;
+  const rtx_code code = GET_CODE (x);
+  const int n_bytes = GET_MODE_SIZE (mode);
+  const HOST_WIDE_INT val1 = BINARY_P (x) && CONST_INT_P (XEXP (x, 1))
+    ? INTVAL (XEXP (x, 1))
+    : -1;
+
+  if (avropt_pr118012)
+    {
+      if ((code == IOR || code == XOR || code == PLUS)
+	  && GET_CODE (XEXP (x, 0)) == ASHIFT
+	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
+	  && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == AND
+	  && XEXP (XEXP (XEXP (XEXP (x, 0), 0), 0), 1) == const1_rtx)
+	{
+	  *total = COSTS_N_INSNS (2 + n_bytes);
+	  return true;
+	}
+    }
+
+  // Insns with nonzero_bits() == 1 in the condition.
+  if (avropt_use_nonzero_bits
+      && mode == QImode
+      && (code == AND || code == IOR || code == XOR)
+      && REG_P (XEXP (x, 1)))
+    {
+      // "*nzb=1.<code>.lsr_split"
+      // "*nzb=1.<code>.lsr.not_split"
+      bool is_nzb = (GET_CODE (XEXP (x, 0)) == LSHIFTRT
+		     && (REG_P (XEXP (XEXP (x, 0), 0))
+			 || GET_CODE (XEXP (XEXP (x, 0), 0)) == XOR)
+		     && const_0_to_7_operand (XEXP (XEXP (x, 0), 1), QImode));
+      // "*nzb=1.<code>.zerox_split"
+      // "*nzb=1.<code>.zerox.not_split"
+      is_nzb |= (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
+		 && (REG_P (XEXP (XEXP (x, 0), 0))
+		     || GET_CODE (XEXP (XEXP (x, 0), 0)) == XOR)
+		 && const1_operand (XEXP (XEXP (x, 0), 1), QImode)
+		 && const_0_to_7_operand (XEXP (XEXP (x, 0), 2), QImode));
+      // "*nzb=1.<code>.ge0_split"
+      is_nzb |= (GET_CODE (XEXP (x, 0)) == GE
+		 && REG_P (XEXP (XEXP (x, 0), 0))
+		 && const0_operand (XEXP (XEXP (x, 0), 1), QImode));
+      if (is_nzb)
+	{
+	  *total = COSTS_N_INSNS (code == XOR ? 3 : 2);
+	  return true;
+	}
+    }
+
+  // Insn "*nzb=1.ior.ashift_split" with nonzero_bits() == 1 in the condition.
+  if (avropt_use_nonzero_bits
+      && mode == QImode
+      && code == IOR
+      && REG_P (XEXP (x, 1))
+      && GET_CODE (XEXP (x, 0)) == ASHIFT
+      && REG_P (XEXP (XEXP (x, 0), 0))
+      && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
+    {
+      *total = COSTS_N_INSNS (2);
+      return true;
+    }
+
 
   switch (code)
     {
@@ -12903,10 +12760,24 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
       return true;
 
     case MEM:
-      *total = COSTS_N_INSNS (GET_MODE_SIZE (mode));
+      *total = COSTS_N_INSNS (n_bytes);
       return true;
 
     case NEG:
+      if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
+	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTRACT)
+	{
+	  // Just a sign_extract of bit 0?
+	  rtx y = XEXP (XEXP (x, 0), 0);
+	  if (XEXP (y, 1) == const1_rtx
+	      && XEXP (y, 2) == const0_rtx)
+	    {
+	      *total = COSTS_N_INSNS (1 + n_bytes
+				      - (AVR_HAVE_MOVW && n_bytes == 4));
+	      return true;
+	    }
+	}
+
       switch (mode)
 	{
 	case E_QImode:
@@ -12917,7 +12788,7 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
 	case E_HImode:
 	case E_PSImode:
 	case E_SImode:
-	  *total = COSTS_N_INSNS (2 * GET_MODE_SIZE (mode) - 1);
+	  *total = COSTS_N_INSNS (2 * n_bytes - 1);
 	  break;
 
 	default:
@@ -12941,19 +12812,19 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
       return true;
 
     case NOT:
-      *total = COSTS_N_INSNS (GET_MODE_SIZE (mode));
+      *total = COSTS_N_INSNS (n_bytes);
       *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, 0, speed);
       return true;
 
     case ZERO_EXTEND:
-      *total = COSTS_N_INSNS (GET_MODE_SIZE (mode)
+      *total = COSTS_N_INSNS (n_bytes
 			      - GET_MODE_SIZE (GET_MODE (XEXP (x, 0))));
       *total += avr_operand_rtx_cost (XEXP (x, 0), GET_MODE (XEXP (x, 0)),
 				      code, 0, speed);
       return true;
 
     case SIGN_EXTEND:
-      *total = COSTS_N_INSNS (GET_MODE_SIZE (mode) + 2
+      *total = COSTS_N_INSNS (n_bytes + 2
 			      - GET_MODE_SIZE (GET_MODE (XEXP (x, 0))));
       *total += avr_operand_rtx_cost (XEXP (x, 0), GET_MODE (XEXP (x, 0)),
 				      code, 0, speed);
@@ -12993,13 +12864,13 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
       if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
 	  && REG_P (XEXP (x, 1)))
 	{
-	  *total = COSTS_N_INSNS (GET_MODE_SIZE (mode));
+	  *total = COSTS_N_INSNS (n_bytes);
 	  return true;
 	}
       if (REG_P (XEXP (x, 0))
 	  && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
 	{
-	  *total = COSTS_N_INSNS (GET_MODE_SIZE (mode));
+	  *total = COSTS_N_INSNS (n_bytes);
 	  return true;
 	}
 
@@ -13008,8 +12879,8 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
 	  && REG_P (XEXP (x, 1)))
 	{
 	  int size2 = GET_MODE_SIZE (GET_MODE (XEXP (XEXP (x, 0), 0)));
-	  *total = COSTS_N_INSNS (2 + GET_MODE_SIZE (mode)
-				  + (GET_MODE_SIZE (mode) > 1 + size2));
+	  *total = COSTS_N_INSNS (2 + n_bytes
+				  + (n_bytes > 1 + size2));
 	  return true;
 	}
 
@@ -13053,8 +12924,6 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
 	      *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1,
 					      speed);
 	    }
-	  else if (IN_RANGE (INTVAL (XEXP (x, 1)), -63, 63))
-	    *total = COSTS_N_INSNS (1);
 	  else
 	    *total = COSTS_N_INSNS (2);
 	  break;
@@ -13066,8 +12935,6 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
 	      *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1,
 					      speed);
 	    }
-	  else if (IN_RANGE (INTVAL (XEXP (x, 1)), -63, 63))
-	    *total = COSTS_N_INSNS (2);
 	  else
 	    *total = COSTS_N_INSNS (3);
 	  break;
@@ -13079,8 +12946,6 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
 	      *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1,
 					      speed);
 	    }
-	  else if (IN_RANGE (INTVAL (XEXP (x, 1)), -63, 63))
-	    *total = COSTS_N_INSNS (1);
 	  else
 	    *total = COSTS_N_INSNS (4);
 	  break;
@@ -13104,7 +12969,7 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
       if (REG_P (XEXP (x, 0))
 	  && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
 	{
-	  *total = COSTS_N_INSNS (GET_MODE_SIZE (mode));
+	  *total = COSTS_N_INSNS (n_bytes);
 	  return true;
 	}
       // *sub<HISI:mode>3.sign_extend.<QIPSI:mode>
@@ -13112,8 +12977,8 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
 	  && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
 	{
 	  int size2 = GET_MODE_SIZE (GET_MODE (XEXP (XEXP (x, 1), 0)));
-	  *total = COSTS_N_INSNS (2 + GET_MODE_SIZE (mode)
-				  + (GET_MODE_SIZE (mode) > 1 + size2));
+	  *total = COSTS_N_INSNS (2 + n_bytes
+				  + (n_bytes > 1 + size2));
 	  return true;
 	}
 
@@ -13176,22 +13041,41 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
 	      || LSHIFTRT == GET_CODE (XEXP (x, 0))))
 	{
 	  // "*insv.any_shift.<mode>
-	  *total = COSTS_N_INSNS (1 + GET_MODE_SIZE (mode));
+	  *total = COSTS_N_INSNS (1 + n_bytes);
 	  return true;
 	}
-      *total = COSTS_N_INSNS (GET_MODE_SIZE (mode));
+      *total = COSTS_N_INSNS (n_bytes);
       *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, 0, speed);
       if (!CONST_INT_P (XEXP (x, 1)))
 	*total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1, speed);
       return true;
 
     case XOR:
-      *total = COSTS_N_INSNS (GET_MODE_SIZE (mode));
+      *total = COSTS_N_INSNS (n_bytes);
       *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, 0, speed);
       *total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1, speed);
       return true;
 
     case MULT:
+      if (avropt_pr118012)
+	{
+	  if (GET_CODE (XEXP (x, 0)) == AND
+	      && XEXP (XEXP (x, 0), 1) == const1_rtx)
+	    {
+	      // Try to defeat PR118012.  The MUL variant is actually very
+	      // expensive, but combine is given a pattern to transform this
+	      // into something less toxic.  Though this might not work
+	      // for SImode, and we still have a completely ridiculous
+	      // 32-bit multiplication instead of a simple bit test on
+	      // devices that don't even have MUL.  This is because on
+	      // AVR_TINY, we'll get a libcall which we cannot undo.
+	      // (On other devices that don't have MUL, the libcall is
+	      // bypassed by providing mulsi3, cf. insn mulsi3_[call_]pr118012.
+	      *total = 0;
+	      return true;
+	    }
+	} // PR118012
+
       switch (mode)
 	{
 	case E_QImode:
@@ -13208,8 +13092,8 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
 	    {
 	      rtx op0 = XEXP (x, 0);
 	      rtx op1 = XEXP (x, 1);
-	      enum rtx_code code0 = GET_CODE (op0);
-	      enum rtx_code code1 = GET_CODE (op1);
+	      rtx_code code0 = GET_CODE (op0);
+	      rtx_code code1 = GET_CODE (op1);
 	      bool ex0 = SIGN_EXTEND == code0 || ZERO_EXTEND == code0;
 	      bool ex1 = SIGN_EXTEND == code1 || ZERO_EXTEND == code1;
 
@@ -13300,12 +13184,12 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
       if (!speed)
 	*total = COSTS_N_INSNS (AVR_HAVE_JMP_CALL ? 2 : 1);
       else
-	*total = COSTS_N_INSNS (15 * GET_MODE_SIZE (mode));
+	*total = COSTS_N_INSNS (15 * n_bytes);
       *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, 0, speed);
       /* For div/mod with const-int divisor we have at least the cost of
 	 loading the divisor. */
       if (CONST_INT_P (XEXP (x, 1)))
-	*total += COSTS_N_INSNS (GET_MODE_SIZE (mode));
+	*total += COSTS_N_INSNS (n_bytes);
       /* Add some overall penaly for clobbering and moving around registers */
       *total += COSTS_N_INSNS (2);
       return true;
@@ -13314,20 +13198,18 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
       switch (mode)
 	{
 	case E_QImode:
-	  if (CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 4)
+	  if (val1 == 4)
 	    *total = COSTS_N_INSNS (1);
-
 	  break;
 
 	case E_HImode:
-	  if (CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 8)
+	  if (val1 == 8)
 	    *total = COSTS_N_INSNS (3);
-
 	  break;
 
 	case E_SImode:
 	  if (CONST_INT_P (XEXP (x, 1)))
-	    switch (INTVAL (XEXP (x, 1)))
+	    switch (val1)
 	      {
 	      case 8:
 	      case 24:
@@ -13348,7 +13230,7 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
     case ASHIFT:
       switch (mode)
 	{
-	case E_QImode:
+	case E_QImode: // ashlqi3
 	  if (speed
 	      && XEXP (x, 0) == const1_rtx
 	      && GET_CODE (XEXP (x, 1)) == AND)
@@ -13367,17 +13249,18 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
 	    }
 	  else
 	    {
-	      val = INTVAL (XEXP (x, 1));
-	      if (val == 7)
+	      if (val1 == 7)
 		*total = COSTS_N_INSNS (3);
-	      else if (val >= 0 && val <= 7)
-		*total = COSTS_N_INSNS (val);
+	      else if (val1 == 6)
+		*total = COSTS_N_INSNS (5 - AVR_HAVE_MUL);
+	      else if (val1 >= 0 && val1 <= 5)
+		*total = COSTS_N_INSNS (val1);
 	      else
 		*total = COSTS_N_INSNS (1);
 	    }
 	  break;
 
-	case E_HImode:
+	case E_HImode: // ashlhi3
 	  if (AVR_HAVE_MUL)
 	    {
 	      if (const_2_to_7_operand (XEXP (x, 1), HImode)
@@ -13403,7 +13286,7 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
 					      speed);
 	    }
 	  else
-	    switch (INTVAL (XEXP (x, 1)))
+	    switch (val1)
 	      {
 	      case 0:
 		*total = 0;
@@ -13436,19 +13319,18 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
 		*total = COSTS_N_INSNS (!speed ? 5 : 10);
 		break;
 	      default:
-		*total = COSTS_N_INSNS (!speed ? 5 : 41);
-		*total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1,
-						speed);
+		*total = avr_default_shift_costs (n_bytes, val1, speed);
+		break;
 	      }
 	  break;
 
-	case E_PSImode:
+	case E_PSImode: // ashlpsi3
 	  if (!CONST_INT_P (XEXP (x, 1)))
 	    {
 	      *total = COSTS_N_INSNS (!speed ? 6 : 73);
 	    }
 	  else
-	    switch (INTVAL (XEXP (x, 1)))
+	    switch (val1)
 	      {
 	      case 0:
 		*total = 0;
@@ -13458,16 +13340,20 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
 	      case 16:
 		*total = COSTS_N_INSNS (3);
 		break;
+	      case 9:
+	      case 15:
+		*total = COSTS_N_INSNS (6);
+		break;
 	      case 23:
 		*total = COSTS_N_INSNS (5);
 		break;
 	      default:
-		*total = COSTS_N_INSNS (!speed ? 5 : 3 * INTVAL (XEXP (x, 1)));
+		*total = avr_default_shift_costs (n_bytes, val1, speed);
 		break;
 	      }
 	  break;
 
-	case E_SImode:
+	case E_SImode: // ashlsi3
 	  if (!CONST_INT_P (XEXP (x, 1)))
 	    {
 	      *total = COSTS_N_INSNS (!speed ? 7 : 113);
@@ -13475,19 +13361,34 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
 					      speed);
 	    }
 	  else
-	    switch (INTVAL (XEXP (x, 1)))
+	    switch (val1)
 	      {
 	      case 0:
 		*total = 0;
 		break;
-	      case 24:
-		*total = COSTS_N_INSNS (3);
-		break;
 	      case 1:
 	      case 8:
-	      case 16:
 		*total = COSTS_N_INSNS (4);
 		break;
+	      case 15:
+		*total = COSTS_N_INSNS (8 - AVR_HAVE_MOVW);
+		break;
+	      case 16:
+		*total = COSTS_N_INSNS (4 - AVR_HAVE_MOVW);
+		break;
+	      case 24:
+	      case 25:
+	      case 26:
+	      case 27:
+		*total = COSTS_N_INSNS (4 + val1 - 24);
+		break;
+	      case 28:
+	      case 29:
+		*total = COSTS_N_INSNS (6 + val1 - 28);
+		break;
+	      case 30:
+		*total = COSTS_N_INSNS (!speed && AVR_HAVE_MUL ? 7 : 8);
+		break;
 	      case 31:
 		*total = COSTS_N_INSNS (6);
 		break;
@@ -13495,9 +13396,8 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
 		*total = COSTS_N_INSNS (!speed ? 7 : 8);
 		break;
 	      default:
-		*total = COSTS_N_INSNS (!speed ? 7 : 113);
-		*total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1,
-						speed);
+		*total = avr_default_shift_costs (n_bytes, val1, speed);
+		break;
 	      }
 	  break;
 
@@ -13510,7 +13410,7 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
     case ASHIFTRT:
       switch (mode)
 	{
-	case E_QImode:
+	case E_QImode: // ashrqi3
 	  if (!CONST_INT_P (XEXP (x, 1)))
 	    {
 	      *total = COSTS_N_INSNS (!speed ? 4 : 17);
@@ -13519,19 +13419,18 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
 	    }
 	  else
 	    {
-	      val = INTVAL (XEXP (x, 1));
-	      if (val == 6)
+	      if (val1 == 6)
 		*total = COSTS_N_INSNS (4);
-	      else if (val == 7)
+	      else if (val1 == 7)
 		*total = COSTS_N_INSNS (2);
-	      else if (val >= 0 && val <= 7)
-		*total = COSTS_N_INSNS (val);
+	      else if (val1 >= 0 && val1 <= 5)
+		*total = COSTS_N_INSNS (val1);
 	      else
 		*total = COSTS_N_INSNS (1);
 	    }
 	  break;
 
-	case E_HImode:
+	case E_HImode: // ashrhi3
 	  if (CONST_INT_P (XEXP (x, 0))
 	      && INTVAL (XEXP (x, 0)) == 128
 	      && GET_CODE (XEXP (x, 1)) == AND)
@@ -13548,7 +13447,7 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
 					      speed);
 	    }
 	  else
-	    switch (INTVAL (XEXP (x, 1)))
+	    switch (val1)
 	      {
 	      case 0:
 		*total = 0;
@@ -13580,19 +13479,18 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
 		*total = COSTS_N_INSNS (!speed ? 5 : 8);
 		break;
 	      default:
-		*total = COSTS_N_INSNS (!speed ? 5 : 41);
-		*total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1,
-						speed);
+		*total = avr_default_shift_costs (n_bytes, val1, speed);
+		break;
 	      }
 	  break;
 
-	case E_PSImode:
+	case E_PSImode: // ashrpsi3
 	  if (!CONST_INT_P (XEXP (x, 1)))
 	    {
 	      *total = COSTS_N_INSNS (!speed ? 6 : 73);
 	    }
 	  else
-	    switch (INTVAL (XEXP (x, 1)))
+	    switch (val1)
 	      {
 	      case 0:
 		*total = 0;
@@ -13600,20 +13498,26 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
 	      case 1:
 		*total = COSTS_N_INSNS (3);
 		break;
-	      case 16:
 	      case 8:
+	      case 15:
 		*total = COSTS_N_INSNS (5);
 		break;
+	      case 16:
+		*total = COSTS_N_INSNS (4);
+		break;
+	      case 22:
+		*total = COSTS_N_INSNS (6);
+		break;
 	      case 23:
 		*total = COSTS_N_INSNS (4);
 		break;
 	      default:
-		*total = COSTS_N_INSNS (!speed ? 5 : 3 * INTVAL (XEXP (x, 1)));
+		*total = avr_default_shift_costs (n_bytes, val1, speed);
 		break;
 	      }
 	  break;
 
-	case E_SImode:
+	case E_SImode: // ashrsi3
 	  if (!CONST_INT_P (XEXP (x, 1)))
 	    {
 	      *total = COSTS_N_INSNS (!speed ? 7 : 113);
@@ -13621,7 +13525,7 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
 					      speed);
 	    }
 	  else
-	    switch (INTVAL (XEXP (x, 1)))
+	    switch (val1)
 	      {
 	      case 0:
 		*total = 0;
@@ -13630,20 +13534,29 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
 		*total = COSTS_N_INSNS (4);
 		break;
 	      case 8:
+		*total = COSTS_N_INSNS (6);
+		break;
+	      case 15:
+		*total = COSTS_N_INSNS (6 - AVR_HAVE_MOVW);
+		break;
 	      case 16:
+		*total = COSTS_N_INSNS (4 - AVR_HAVE_MOVW);
+		break;
 	      case 24:
-		*total = COSTS_N_INSNS (6);
+		*total = COSTS_N_INSNS (5);
 		break;
 	      case 2:
 		*total = COSTS_N_INSNS (!speed ? 7 : 8);
 		break;
+	      case 30:
+		*total = COSTS_N_INSNS (7 - AVR_HAVE_MOVW);
+		break;
 	      case 31:
 		*total = COSTS_N_INSNS (AVR_HAVE_MOVW ? 4 : 5);
 		break;
 	      default:
-		*total = COSTS_N_INSNS (!speed ? 7 : 113);
-		*total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1,
-						speed);
+		*total = avr_default_shift_costs (n_bytes, val1, speed);
+		break;
 	      }
 	  break;
 
@@ -13662,7 +13575,7 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
 
       switch (mode)
 	{
-	case E_QImode:
+	case E_QImode: // lshrqi3
 	  if (!CONST_INT_P (XEXP (x, 1)))
 	    {
 	      *total = COSTS_N_INSNS (!speed ? 4 : 17);
@@ -13671,17 +13584,18 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
 	    }
 	  else
 	    {
-	      val = INTVAL (XEXP (x, 1));
-	      if (val == 7)
+	      if (val1 == 7)
 		*total = COSTS_N_INSNS (3);
-	      else if (val >= 0 && val <= 7)
-		*total = COSTS_N_INSNS (val);
+	      else if (val1 == 6)
+		*total = COSTS_N_INSNS (5 - AVR_HAVE_MUL);
+	      else if (val1 >= 0 && val1 <= 5)
+		*total = COSTS_N_INSNS (val1);
 	      else
 		*total = COSTS_N_INSNS (1);
 	    }
 	  break;
 
-	case E_HImode:
+	case E_HImode: // lshrhi3
 	  if (!CONST_INT_P (XEXP (x, 1)))
 	    {
 	      *total = COSTS_N_INSNS (!speed ? 5 : 41);
@@ -13689,7 +13603,7 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
 					      speed);
 	    }
 	  else
-	    switch (INTVAL (XEXP (x, 1)))
+	    switch (val1)
 	      {
 	      case 0:
 		*total = 0;
@@ -13724,19 +13638,18 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
 		*total = COSTS_N_INSNS (!speed ? 5 : 9);
 		break;
 	      default:
-		*total = COSTS_N_INSNS (!speed ? 5 : 41);
-		*total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1,
-						speed);
+		*total = avr_default_shift_costs (n_bytes, val1, speed);
+		break;
 	      }
 	  break;
 
-	case E_PSImode:
+	case E_PSImode: // lshrpsi3
 	  if (!CONST_INT_P (XEXP (x, 1)))
 	    {
 	      *total = COSTS_N_INSNS (!speed ? 6 : 73);
 	    }
 	  else
-	    switch (INTVAL (XEXP (x, 1)))
+	    switch (val1)
 	      {
 	      case 0:
 		*total = 0;
@@ -13746,16 +13659,19 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
 	      case 16:
 		*total = COSTS_N_INSNS (3);
 		break;
+	      case 15:
+		*total = COSTS_N_INSNS (6);
+		break;
 	      case 23:
 		*total = COSTS_N_INSNS (5);
 		break;
 	      default:
-		*total = COSTS_N_INSNS (!speed ? 5 : 3 * INTVAL (XEXP (x, 1)));
+		*total = avr_default_shift_costs (n_bytes, val1, speed);
 		break;
 	      }
 	  break;
 
-	case E_SImode:
+	case E_SImode: // lshrsi3
 	  if (!CONST_INT_P (XEXP (x, 1)))
 	    {
 	      *total = COSTS_N_INSNS (!speed ? 7 : 113);
@@ -13763,29 +13679,43 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
 					      speed);
 	    }
 	  else
-	    switch (INTVAL (XEXP (x, 1)))
+	    switch (val1)
 	      {
 	      case 0:
 		*total = 0;
 		break;
 	      case 1:
+	      case 8:
 		*total = COSTS_N_INSNS (4);
 		break;
 	      case 2:
 		*total = COSTS_N_INSNS (!speed ? 7 : 8);
 		break;
-	      case 8:
+	      case 15:
+		*total = COSTS_N_INSNS (8 - AVR_HAVE_MOVW);
+		break;
 	      case 16:
+		*total = COSTS_N_INSNS (4 - AVR_HAVE_MOVW);
+		break;
 	      case 24:
-		*total = COSTS_N_INSNS (4);
+	      case 25:
+	      case 26:
+	      case 27:
+		*total = COSTS_N_INSNS (4 + val1 - 24);
+		break;
+	      case 28:
+	      case 29:
+		*total = COSTS_N_INSNS (6 + val1 - 28);
+		break;
+	      case 30:
+		*total = COSTS_N_INSNS (!speed && AVR_HAVE_MUL ? 7 : 8);
 		break;
 	      case 31:
 		*total = COSTS_N_INSNS (6);
 		break;
 	      default:
-		*total = COSTS_N_INSNS (!speed ? 7 : 113);
-		*total += avr_operand_rtx_cost (XEXP (x, 1), mode, code, 1,
-						speed);
+		*total = avr_default_shift_costs (n_bytes, val1, speed);
+		break;
 	      }
 	  break;
 
@@ -13795,6 +13725,28 @@ avr_rtx_costs_1 (rtx x, machine_mode mode, int outer_code,
       *total += avr_operand_rtx_cost (XEXP (x, 0), mode, code, 0, speed);
       return true;
 
+    case GE:
+      if (mode == QImode
+	  && REG_P (XEXP (x, 0))
+	  && XEXP (x, 1) == const0_rtx)
+	{
+	  *total = COSTS_N_INSNS (3);
+	  return true;
+	}
+      break;
+
+    case ZERO_EXTRACT:
+      if (mode == QImode
+	  && REG_P (XEXP (x, 0))
+	  && XEXP (x, 1) == const1_rtx
+	  && CONST_INT_P (XEXP (x, 2)))
+	{
+	  int bpos = INTVAL (XEXP (x, 2));
+	  *total = COSTS_N_INSNS (bpos == 0 ? 1 : bpos == 1 ? 2 : 3);
+	  return true;
+	}
+      break;
+
     case COMPARE:
       switch (GET_MODE (XEXP (x, 0)))
 	{
@@ -13899,7 +13851,7 @@ avr_insn_cost (rtx_insn *insn, bool speed)
       subrtx_iterator::array_type array;
       FOR_EACH_SUBRTX (iter, array, SET_SRC (set), NONCONST)
 	{
-	  enum rtx_code code = GET_CODE (*iter);
+	  rtx_code code = GET_CODE (*iter);
 	  not_bit_p |= code == NOT || code == XOR || code == GE;
 	}
 
@@ -13956,13 +13908,14 @@ avr_address_cost (rtx x, machine_mode mode, addr_space_t /*as*/,
   return cost;
 }
 
+
 /* Test for extra memory constraint 'Q'.
    It's a memory address based on Y or Z pointer with valid displacement.  */
 
-int
+bool
 extra_constraint_Q (rtx x)
 {
-  int ok = 0;
+  bool ok = false;
   rtx plus = XEXP (x, 0);
 
   if (GET_CODE (plus) == PLUS
@@ -13990,10 +13943,11 @@ extra_constraint_Q (rtx x)
   return ok;
 }
 
+
 /* Convert condition code CONDITION to the valid AVR condition code.  */
 
-RTX_CODE
-avr_normalize_condition (RTX_CODE condition)
+rtx_code
+avr_normalize_condition (rtx_code condition)
 {
   switch (condition)
     {
@@ -14067,21 +14021,22 @@ avr_function_value (const_tree type, const_tree /*fn_decl_or_type*/,
   return gen_rtx_REG (BLKmode, avr_ret_register () + 2 - offs);
 }
 
-int
-test_hard_reg_class (enum reg_class rclass, rtx x)
+
+bool
+test_hard_reg_class (reg_class rclass, rtx x)
 {
   int regno = true_regnum (x);
   if (regno < 0)
-    return 0;
+    return false;
 
   if (TEST_HARD_REG_CLASS (rclass, regno))
-    return 1;
+    return true;
 
-  return 0;
+  return false;
 }
 
 
-/* Helper for jump_over_one_insn_p:  Test if INSN is a 2-word instruction
+/* Helper for `jump_over_one_insn_p':  Test if INSN is a 2-word instruction
    and thus is suitable to be skipped by CPSE, SBRC, etc.  */
 
 static bool
@@ -14095,7 +14050,10 @@ avr_2word_insn_p (rtx_insn *insn)
   switch (INSN_CODE (insn))
     {
     default:
-      return false;
+      return (recog_memoized (insn) >= 0
+	      // Transparent calls may be skipped.
+	      && (get_attr_type (insn) == TYPE_XCALL
+		  || get_attr_adjust_len (insn) == ADJUST_LEN_CALL));
 
     case CODE_FOR_movqi_insn:
     case CODE_FOR_movuqq_insn:
@@ -14128,7 +14086,7 @@ avr_2word_insn_p (rtx_insn *insn)
 }
 
 
-int
+bool
 jump_over_one_insn_p (rtx_insn *insn, rtx dest)
 {
   int uid = INSN_UID (GET_CODE (dest) == LABEL_REF
@@ -14154,7 +14112,7 @@ jump_over_one_insn_p (rtx_insn *insn, rtx dest)
 static unsigned int
 avr_hard_regno_nregs (unsigned int regno, machine_mode mode)
 {
-  if (regno == REG_CC && mode == CCmode)
+  if (regno == REG_CC && GET_MODE_CLASS (mode) == MODE_CC)
     return 1;
 
   return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
@@ -14169,7 +14127,7 @@ static bool
 avr_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
 {
   if (regno == REG_CC)
-    return mode == CCmode;
+    return GET_MODE_CLASS (mode) == MODE_CC;
 
   /* NOTE: 8-bit values must not be disallowed for R28 or R29.
 	Disallowing QI et al. in these regs might lead to code like
@@ -14190,9 +14148,9 @@ avr_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
 	address registers is extreme stress test for reload.  */
 
   if (GET_MODE_SIZE (mode) >= 4
-      && regno >= REG_X
+      && regno + GET_MODE_SIZE (mode) >= REG_30
       // This problem only concerned the old reload.
-      && ! avr_lra_p)
+      && ! avropt_lra_p)
     return false;
 
   /* All modes larger than 8 bits should start in an even register.  */
@@ -14213,7 +14171,7 @@ avr_hard_regno_call_part_clobbered (unsigned, unsigned regno,
 	in PR53595.  Thus, report invalid hard registers as FALSE.  */
 
   if (!avr_hard_regno_mode_ok (regno, mode))
-    return 0;
+    return false;
 
   /* Return true if any of the following boundaries is crossed:
      17/18 or 19/20 (if AVR_TINY), 27/28 and 29/30.  */
@@ -14227,9 +14185,9 @@ avr_hard_regno_call_part_clobbered (unsigned, unsigned regno,
 
 /* Implement `MODE_CODE_BASE_REG_CLASS'.  */
 
-enum reg_class
+reg_class
 avr_mode_code_base_reg_class (machine_mode /*mode*/, addr_space_t as,
-			      RTX_CODE outer_code, RTX_CODE /*index_code*/)
+			      rtx_code outer_code, rtx_code /*index_code*/)
 {
   if (!ADDR_SPACE_GENERIC_P (as))
     {
@@ -14241,7 +14199,7 @@ avr_mode_code_base_reg_class (machine_mode /*mode*/, addr_space_t as,
     // will rectify it (register allocation cannot do it).
     return POINTER_REGS;
 
-  if (!avr_strict_X)
+  if (!avropt_strict_X)
     return reload_completed ? BASE_POINTER_REGS : POINTER_REGS;
 
   return PLUS == outer_code ? BASE_POINTER_REGS : POINTER_REGS;
@@ -14252,8 +14210,8 @@ avr_mode_code_base_reg_class (machine_mode /*mode*/, addr_space_t as,
 
 bool
 avr_regno_mode_code_ok_for_base_p (int regno, machine_mode /*mode*/,
-				   addr_space_t as, RTX_CODE outer_code,
-				   RTX_CODE /*index_code*/)
+				   addr_space_t as, rtx_code outer_code,
+				   rtx_code /*index_code*/)
 {
   bool ok = false;
 
@@ -14299,7 +14257,7 @@ avr_regno_mode_code_ok_for_base_p (int regno, machine_mode /*mode*/,
 	}
     }
 
-  if (avr_strict_X
+  if (avropt_strict_X
       // On Reduced Tiny, all registers are equal in that they do not
       // support PLUS addressing; respective addresses will be fake,
       // even for the frame pointer.  They must be handled in the
@@ -14316,232 +14274,6 @@ avr_regno_mode_code_ok_for_base_p (int regno, machine_mode /*mode*/,
 }
 
 
-/* A helper for `output_reload_insisf' and `output_reload_inhi'.  */
-/* Set 32-bit register OP[0] to compile-time constant OP[1].
-   CLOBBER_REG is a QI clobber register or NULL_RTX.
-   LEN == NULL: output instructions.
-   LEN != NULL: set *LEN to the length of the instruction sequence
-		(in words) printed with LEN = NULL.
-   If CLEAR_P is true, OP[0] had been cleard to Zero already.
-   If CLEAR_P is false, nothing is known about OP[0].
-
-   The effect on cc0 is as follows:
-
-   Load 0 to any register except ZERO_REG : NONE
-   Load ld register with any value        : NONE
-   Anything else:                         : CLOBBER  */
-
-static void
-output_reload_in_const (rtx *op, rtx clobber_reg, int *len, bool clear_p)
-{
-  rtx src = op[1];
-  rtx dest = op[0];
-  rtx xval, xdest[4];
-  int ival[4];
-  int clobber_val = 1234;
-  bool cooked_clobber_p = false;
-  bool set_p = false;
-  machine_mode mode = GET_MODE (dest);
-  int n_bytes = GET_MODE_SIZE (mode);
-
-  gcc_assert (REG_P (dest)
-	      && CONSTANT_P (src));
-
-  if (len)
-    *len = 0;
-
-  /* (REG:SI 14) is special: It's neither in LD_REGS nor in NO_LD_REGS
-     but has some subregs that are in LD_REGS.  Use the MSB (REG:QI 17).  */
-
-  if (REGNO (dest) < REG_16
-      && REGNO (dest) + GET_MODE_SIZE (mode) > REG_16)
-    {
-      clobber_reg = all_regs_rtx[REGNO (dest) + n_bytes - 1];
-    }
-
-  /* We might need a clobber reg but don't have one.  Look at the value to
-     be loaded more closely.  A clobber is only needed if it is a symbol
-     or contains a byte that is neither 0, -1 or a power of 2.  */
-
-  if (NULL_RTX == clobber_reg
-      && !test_hard_reg_class (LD_REGS, dest)
-      && (! (CONST_INT_P (src) || CONST_FIXED_P (src) || CONST_DOUBLE_P (src))
-	  || !avr_popcount_each_byte (src, n_bytes,
-				      (1 << 0) | (1 << 1) | (1 << 8))))
-    {
-      /* We have no clobber register but need one.  Cook one up.
-	 That's cheaper than loading from constant pool.  */
-
-      cooked_clobber_p = true;
-      clobber_reg = all_regs_rtx[REG_Z + 1];
-      avr_asm_len ("mov __tmp_reg__,%0", &clobber_reg, len, 1);
-    }
-
-  /* Now start filling DEST from LSB to MSB.  */
-
-  for (int n = 0; n < n_bytes; n++)
-    {
-      bool done_byte = false;
-      rtx xop[3];
-
-      /* Crop the n-th destination byte.  */
-
-      xdest[n] = simplify_gen_subreg (QImode, dest, mode, n);
-      int ldreg_p = test_hard_reg_class (LD_REGS, xdest[n]);
-
-      if (!CONST_INT_P (src)
-	  && !CONST_FIXED_P (src)
-	  && !CONST_DOUBLE_P (src))
-	{
-	  static const char *const asm_code[][2] =
-	    {
-	      { "ldi %2,lo8(%1)"  CR_TAB "mov %0,%2",    "ldi %0,lo8(%1)"  },
-	      { "ldi %2,hi8(%1)"  CR_TAB "mov %0,%2",    "ldi %0,hi8(%1)"  },
-	      { "ldi %2,hlo8(%1)" CR_TAB "mov %0,%2",    "ldi %0,hlo8(%1)" },
-	      { "ldi %2,hhi8(%1)" CR_TAB "mov %0,%2",    "ldi %0,hhi8(%1)" }
-	    };
-
-	  xop[0] = xdest[n];
-	  xop[1] = src;
-	  xop[2] = clobber_reg;
-
-	  avr_asm_len (asm_code[n][ldreg_p], xop, len, ldreg_p ? 1 : 2);
-
-	  continue;
-	}
-
-      /* Crop the n-th source byte.  */
-
-      xval = simplify_gen_subreg (QImode, src, mode, n);
-      ival[n] = INTVAL (xval);
-
-      /* Look if we can reuse the low word by means of MOVW.  */
-
-      if (n == 2
-	  && n_bytes >= 4
-	  && AVR_HAVE_MOVW)
-	{
-	  rtx lo16 = simplify_gen_subreg (HImode, src, mode, 0);
-	  rtx hi16 = simplify_gen_subreg (HImode, src, mode, 2);
-
-	  if (INTVAL (lo16) == INTVAL (hi16))
-	    {
-	      if (INTVAL (lo16) != 0 || !clear_p)
-		avr_asm_len ("movw %C0,%A0", &op[0], len, 1);
-
-	      break;
-	    }
-	}
-
-      /* Don't use CLR so that cc0 is set as expected.  */
-
-      if (ival[n] == 0)
-	{
-	  if (!clear_p)
-	    avr_asm_len (ldreg_p ? "ldi %0,0"
-			 : AVR_ZERO_REGNO == REGNO (xdest[n]) ? "clr %0"
-			 : "mov %0,__zero_reg__",
-			 &xdest[n], len, 1);
-	  continue;
-	}
-
-      if (clobber_val == ival[n]
-	  && REGNO (clobber_reg) == REGNO (xdest[n]))
-	{
-	  continue;
-	}
-
-      /* LD_REGS can use LDI to move a constant value */
-
-      if (ldreg_p)
-	{
-	  xop[0] = xdest[n];
-	  xop[1] = xval;
-	  avr_asm_len ("ldi %0,lo8(%1)", xop, len, 1);
-	  continue;
-	}
-
-      /* Try to reuse value already loaded in some lower byte. */
-
-      for (int j = 0; j < n; j++)
-	if (ival[j] == ival[n])
-	  {
-	    xop[0] = xdest[n];
-	    xop[1] = xdest[j];
-
-	    avr_asm_len ("mov %0,%1", xop, len, 1);
-	    done_byte = true;
-	    break;
-	  }
-
-      if (done_byte)
-	continue;
-
-      /* Need no clobber reg for -1: Use CLR/DEC */
-
-      if (ival[n] == -1)
-	{
-	  if (!clear_p)
-	    avr_asm_len ("clr %0", &xdest[n], len, 1);
-
-	  avr_asm_len ("dec %0", &xdest[n], len, 1);
-	  continue;
-	}
-      else if (ival[n] == 1)
-	{
-	  if (!clear_p)
-	    avr_asm_len ("clr %0", &xdest[n], len, 1);
-
-	  avr_asm_len ("inc %0", &xdest[n], len, 1);
-	  continue;
-	}
-
-      /* Use T flag or INC to manage powers of 2 if we have
-	 no clobber reg.  */
-
-      if (NULL_RTX == clobber_reg
-	  && single_one_operand (xval, QImode))
-	{
-	  xop[0] = xdest[n];
-	  xop[1] = GEN_INT (exact_log2 (ival[n] & GET_MODE_MASK (QImode)));
-
-	  gcc_assert (constm1_rtx != xop[1]);
-
-	  if (!set_p)
-	    {
-	      set_p = true;
-	      avr_asm_len ("set", xop, len, 1);
-	    }
-
-	  if (!clear_p)
-	    avr_asm_len ("clr %0", xop, len, 1);
-
-	  avr_asm_len ("bld %0,%1", xop, len, 1);
-	  continue;
-	}
-
-      /* We actually need the LD_REGS clobber reg.  */
-
-      gcc_assert (NULL_RTX != clobber_reg);
-
-      xop[0] = xdest[n];
-      xop[1] = xval;
-      xop[2] = clobber_reg;
-      clobber_val = ival[n];
-
-      avr_asm_len ("ldi %2,lo8(%1)" CR_TAB
-		   "mov %0,%2", xop, len, 2);
-    }
-
-  /* If we cooked up a clobber reg above, restore it.  */
-
-  if (cooked_clobber_p)
-    {
-      avr_asm_len ("mov %0,__tmp_reg__", &clobber_reg, len, 1);
-    }
-}
-
-
 /* Reload the constant OP[1] into the HI register OP[0].
    CLOBBER_REG is a QI clobber reg needed to move vast majority of consts
    into a NO_LD_REGS register.  If CLOBBER_REG is NULL_RTX we either don't
@@ -14621,6 +14353,7 @@ output_reload_insisf (rtx *op, rtx clobber_reg, int *len)
   return "";
 }
 
+
 const char *
 avr_out_reload_inpsi (rtx *op, rtx clobber_reg, int *len)
 {
@@ -14647,7 +14380,7 @@ avr_output_addr_vec (rtx_insn *labl, rtx table)
 {
   FILE *stream = asm_out_file;
 
-  app_disable();
+  app_disable ();
 
   // Switch to appropriate (sub)section.
 
@@ -14676,13 +14409,13 @@ avr_output_addr_vec (rtx_insn *labl, rtx table)
       tree asm_name = DECL_ASSEMBLER_NAME (current_function_decl);
       const char *fname = IDENTIFIER_POINTER (asm_name);
       fname = targetm.strip_name_encoding (fname);
-      sec_name = ACONCAT ((sec_name, ".", fname, NULL));
+      sec_name = ACONCAT ((sec_name, ".", fname, nullptr));
 
       fprintf (stream, "\t.section\t%s,\"%s\",@progbits\n", sec_name,
 	       AVR_HAVE_JMP_CALL ? "a" : "ax");
     }
 
-  // Output the label that preceeds the table.
+  // Output the label that precedes the table.
 
   ASM_OUTPUT_ALIGN (stream, 1);
   targetm.asm_out.internal_label (stream, "L", CODE_LABEL_NUMBER (labl));
@@ -14704,7 +14437,7 @@ avr_output_addr_vec (rtx_insn *labl, rtx table)
   // Switch back to original section.  As we clobbered the section above,
   // forget the current section before switching back.
 
-  in_section = NULL;
+  in_section = nullptr;
   switch_to_section (current_function_section ());
 }
 
@@ -14758,6 +14491,7 @@ avr_conditional_register_usage (void)
     }
 }
 
+
 /* Implement `TARGET_HARD_REGNO_SCRATCH_OK'.  */
 /* Returns true if SCRATCH are safe to be allocated as a scratch
    registers (for a define_peephole2) in the current function.  */
@@ -14789,7 +14523,7 @@ avr_hard_regno_scratch_ok (unsigned int regno)
 /* Worker function for `HARD_REGNO_RENAME_OK'.  */
 /* Return nonzero if register OLD_REG can be renamed to register NEW_REG.  */
 
-int
+bool
 avr_hard_regno_rename_ok (unsigned int old_reg, unsigned int new_reg)
 {
   /* Interrupt functions can only use registers that have already been
@@ -14798,7 +14532,7 @@ avr_hard_regno_rename_ok (unsigned int old_reg, unsigned int new_reg)
 
   if ((cfun->machine->is_interrupt || cfun->machine->is_signal)
       && !df_regs_ever_live_p (new_reg))
-    return 0;
+    return false;
 
   /* Don't allow hard registers that might be part of the frame pointer.
      Some places in the compiler just test for [HARD_]FRAME_POINTER_REGNUM
@@ -14808,12 +14542,13 @@ avr_hard_regno_rename_ok (unsigned int old_reg, unsigned int new_reg)
       && (old_reg == REG_Y || old_reg == REG_Y + 1
 	  || new_reg == REG_Y || new_reg == REG_Y + 1))
     {
-      return 0;
+      return false;
     }
 
-  return 1;
+  return true;
 }
 
+
 /* Output a branch that tests a single bit of a register (QI, HI, SI or DImode)
    or memory location in the I/O space (QImode only).
 
@@ -14825,10 +14560,14 @@ avr_hard_regno_rename_ok (unsigned int old_reg, unsigned int new_reg)
 const char *
 avr_out_sbxx_branch (rtx_insn *insn, rtx operands[])
 {
-  enum rtx_code comp = GET_CODE (operands[0]);
+  rtx_code comp = GET_CODE (operands[0]);
   bool long_jump = get_attr_length (insn) >= 4;
   bool reverse = long_jump || jump_over_one_insn_p (insn, operands[3]);
 
+  // PR116953: jump_over_one_insn_p may call extract on the next insn,
+  // clobbering recog_data.operand.  Thus, restore recog_data.
+  extract_constrain_insn_cached (insn);
+
   if (comp == GE)
     comp = EQ;
   else if (comp == LT)
@@ -14840,7 +14579,7 @@ avr_out_sbxx_branch (rtx_insn *insn, rtx operands[])
   switch (GET_CODE (operands[1]))
     {
     default:
-      gcc_unreachable();
+      gcc_unreachable ();
 
     case CONST_INT:
     case CONST:
@@ -14886,6 +14625,132 @@ avr_out_sbxx_branch (rtx_insn *insn, rtx operands[])
 }
 
 
+/* Output code for  XOP[0] = sign_extract (XOP[1].0)  and return "".
+   PLEN == 0: Output instructions.
+   PLEN != 0: Set *PLEN to the length of the sequence in words.  */
+
+const char *
+avr_out_sextr (rtx_insn *insn, rtx *xop, int *plen)
+{
+  rtx dest = xop[0];
+  rtx src = xop[1];
+  int bit = INTVAL (xop[2]);
+  int n_bytes = GET_MODE_SIZE (GET_MODE (dest));
+
+  gcc_assert (bit == 0);
+
+  if (reg_unused_after (insn, src))
+    avr_asm_len ("lsr %1", xop, plen, -1);
+  else
+    avr_asm_len ("mov %0,%1"  CR_TAB
+		 "lsr %0", xop, plen, -2);
+
+  for (int i = 0; i < n_bytes; ++i)
+    {
+      rtx b = avr_byte (dest, i);
+      avr_asm_len ("sbc %0,%0", &b, plen, 1);
+      if (i == 1 && n_bytes == 4 && AVR_HAVE_MOVW)
+	return avr_asm_len ("movw %C0,%A0", xop, plen, 1);
+    }
+
+  return "";
+}
+
+
+/*
+   if (bits.bitno <eqne> 0)
+     dest = op0;
+   else
+     dest = op0 <pix> op1;
+
+   Performed as:
+
+   dest = op0;
+   if (bits.bitno <eqne> 0)
+     goto LL;
+   dest o= op1;
+LL:;  */
+
+void
+avr_emit_skip_pixop (rtx_code pix, rtx dest, rtx op0, rtx op1,
+		     rtx_code eqne, rtx bits, int bitno)
+{
+  gcc_assert (eqne == EQ);
+
+  const machine_mode mode = GET_MODE (dest);
+
+  // Get rid of early-clobbers.
+
+  if (reg_overlap_mentioned_p (dest, bits))
+    bits = copy_to_mode_reg (GET_MODE (bits), bits);
+
+  if (reg_overlap_mentioned_p (dest, op1))
+    op1 = copy_to_mode_reg (mode, op1);
+
+  // xorqi3 has "register_operand" for op1.
+  if (mode == QImode && pix == XOR)
+    op1 = force_reg (QImode, op1);
+
+  emit_move_insn (dest, op0);
+
+  // Skip if bits.bitno <eqne> bitno.
+  rtx xlabel = gen_label_rtx ();
+  rtx zerox = gen_rtx_ZERO_EXTRACT (QImode, bits, const1_rtx, GEN_INT (bitno));
+  rtx cond = gen_rtx_fmt_ee (eqne, VOIDmode, zerox, const0_rtx);
+  emit (gen_sbrx_branchqi_split (cond, bits, const0_rtx, xlabel));
+
+  // Payload: plus, ior, xor for HI, PSI, SI have a scratch:QI;
+  // QI and plus:HI don't.
+  rtx src = gen_rtx_fmt_ee (pix, mode, dest, op1);
+  rtx set = gen_rtx_SET (dest, src);
+  rtx clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (QImode));
+  bool no_scratch = mode == QImode || (mode == HImode && pix == PLUS);
+  emit (no_scratch
+	? set
+	: gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
+
+  emit_label (xlabel);
+}
+
+
+/*
+   if (bits.bitno <eqne> 0)
+     dest = src;
+   else
+     dest = 0;
+
+   Performed as:
+
+   dest = src;
+   if (bits.bitno <eqne> 0)
+     goto LL;
+   dest = 0;
+LL:;  */
+
+void
+avr_emit_skip_clear (rtx dest, rtx src, rtx_code eqne, rtx bits, int bitno)
+{
+  const machine_mode mode = GET_MODE (dest);
+
+  // Get rid of early-clobber.
+  if (reg_overlap_mentioned_p (dest, bits))
+    bits = copy_to_mode_reg (GET_MODE (bits), bits);
+
+  emit_move_insn (dest, src);
+
+  // Skip if bits.bitno <eqne> bitno.
+  rtx xlabel = gen_label_rtx ();
+  rtx zerox = gen_rtx_ZERO_EXTRACT (QImode, bits, const1_rtx, GEN_INT (bitno));
+  rtx cond = gen_rtx_fmt_ee (eqne, VOIDmode, zerox, const0_rtx);
+  emit (gen_sbrx_branchqi_split (cond, bits, const0_rtx, xlabel));
+
+  // Payload: dest = 0;
+  emit_move_insn (dest, CONST0_RTX (mode));
+
+  emit_label (xlabel);
+}
+
+
 /* Worker function for `TARGET_ASM_CONSTRUCTOR'.  */
 
 static void
@@ -14918,14 +14783,7 @@ avr_return_in_memory (const_tree type, const_tree /*fntype*/)
      (ATtiny4/5/9/10/20/40) devices, only 4 registers are available.
      Return true if size is unknown or greater than the limit.  */
 
-  if (size == -1 || size > ret_size_limit)
-    {
-      return true;
-    }
-  else
-    {
-      return false;
-    }
+  return size == -1 || size > ret_size_limit;
 }
 
 
@@ -14977,7 +14835,7 @@ avr_reg_ok_for_pgm_addr (rtx reg, bool strict)
 
   /* Avoid combine to propagate hard regs.  */
 
-  if (can_create_pseudo_p()
+  if (can_create_pseudo_p ()
       && REGNO (reg) < REG_Z)
     {
       return false;
@@ -14998,7 +14856,7 @@ avr_addr_space_legitimate_address_p (machine_mode mode, rtx x, bool strict,
   switch (as)
     {
     default:
-      gcc_unreachable();
+      gcc_unreachable ();
 
     case ADDR_SPACE_GENERIC:
       return avr_legitimate_address_p (mode, x, strict);
@@ -15027,9 +14885,10 @@ avr_addr_space_legitimate_address_p (machine_mode mode, rtx x, bool strict,
       break; /* FLASH */
 
     case ADDR_SPACE_MEMX:
+    case ADDR_SPACE_FLASHX:
       if (REG_P (x))
 	ok = (!strict
-	      && can_create_pseudo_p());
+	      && can_create_pseudo_p ());
 
       if (LO_SUM == GET_CODE (x))
 	{
@@ -15042,7 +14901,7 @@ avr_addr_space_legitimate_address_p (machine_mode mode, rtx x, bool strict,
 		&& REGNO (lo) == REG_Z);
 	}
 
-      break; /* MEMX */
+      break; /* MEMX, FLASHX */
     }
 
   if (avr_log.legitimate_address_p)
@@ -15090,19 +14949,20 @@ avr_addr_space_legitimize_address (rtx x, rtx old_x,
 /* Implement `TARGET_ADDR_SPACE_CONVERT'.  */
 
 static rtx
-avr_addr_space_convert (rtx src, tree type_from, tree type_to)
+avr_addr_space_convert (rtx src, tree type_old, tree type_new)
 {
-  addr_space_t as_from = TYPE_ADDR_SPACE (TREE_TYPE (type_from));
-  addr_space_t as_to = TYPE_ADDR_SPACE (TREE_TYPE (type_to));
+  addr_space_t as_old = TYPE_ADDR_SPACE (TREE_TYPE (type_old));
+  addr_space_t as_new = TYPE_ADDR_SPACE (TREE_TYPE (type_new));
+  int size_old = GET_MODE_SIZE (targetm.addr_space.pointer_mode (as_old));
+  int size_new = GET_MODE_SIZE (targetm.addr_space.pointer_mode (as_new));
 
   if (avr_log.progmem)
     avr_edump ("\n%!: op = %r\nfrom = %t\nto = %t\n",
-	       src, type_from, type_to);
+	       src, type_old, type_new);
 
   /* Up-casting from 16-bit to 24-bit pointer.  */
 
-  if (as_from != ADDR_SPACE_MEMX
-      && as_to == ADDR_SPACE_MEMX)
+  if (size_old == 2 && size_new == 3)
     {
       rtx sym = src;
       rtx reg = gen_reg_rtx (PSImode);
@@ -15119,14 +14979,16 @@ avr_addr_space_convert (rtx src, tree type_from, tree type_to)
       if (SYMBOL_REF_P (sym)
 	  && ADDR_SPACE_FLASH == AVR_SYMBOL_GET_ADDR_SPACE (sym))
 	{
-	  as_from = ADDR_SPACE_FLASH;
+	  as_old = ADDR_SPACE_FLASH;
 	}
 
-      /* Linearize memory: RAM has bit 23 set.  */
+      /* Linearize memory: RAM has bit 23 set.  When as_new = __flashx then
+	 this is basically UB since __flashx mistreats RAM addresses, but there
+	 is no way to bail out.  (Though -Waddr-space-convert will tell.)  */
 
-      int msb = ADDR_SPACE_GENERIC_P (as_from)
+      int msb = ADDR_SPACE_GENERIC_P (as_old)
 	? 0x80
-	: avr_addrspace[as_from].segment;
+	: avr_addrspace[as_old].segment;
 
       src = force_reg (Pmode, src);
 
@@ -15139,8 +15001,7 @@ avr_addr_space_convert (rtx src, tree type_from, tree type_to)
 
   /* Down-casting from 24-bit to 16-bit throws away the high byte.  */
 
-  if (as_from == ADDR_SPACE_MEMX
-      && as_to != ADDR_SPACE_MEMX)
+  if (size_old == 3 && size_new == 2)
     {
       rtx new_src = gen_reg_rtx (Pmode);
 
@@ -15166,6 +15027,18 @@ avr_addr_space_subset_p (addr_space_t /*subset*/, addr_space_t /*superset*/)
 }
 
 
+/* Helps the next function.  */
+
+static bool
+avr_addr_space_contains (addr_space_t super, addr_space_t sub)
+{
+  return (super == sub
+	  || super == ADDR_SPACE_MEMX
+	  || (super == ADDR_SPACE_FLASHX
+	      && sub != ADDR_SPACE_MEMX && ! ADDR_SPACE_GENERIC_P (sub)));
+}
+
+
 /* Implement `TARGET_CONVERT_TO_TYPE'.  */
 
 static tree
@@ -15195,7 +15068,7 @@ avr_convert_to_type (tree type, tree expr)
 	    code with progmem and pgm_read_xxx.
   */
 
-  if (avr_warn_addr_space_convert
+  if (avropt_warn_addr_space_convert
       && expr != error_mark_node
       && POINTER_TYPE_P (type)
       && POINTER_TYPE_P (TREE_TYPE (expr)))
@@ -15206,8 +15079,7 @@ avr_convert_to_type (tree type, tree expr)
       if (avr_log.progmem)
 	avr_edump ("%?: type = %t\nexpr = %t\n\n", type, expr);
 
-      if (as_new != ADDR_SPACE_MEMX
-	  && as_new != as_old)
+      if (! avr_addr_space_contains (as_new, as_old))
 	{
 	  location_t loc = EXPR_LOCATION (expr);
 	  const char *name_old = avr_addrspace[as_old].name;
@@ -15285,7 +15157,7 @@ avr_fix_operands (rtx *op, rtx *hreg, unsigned opmask, unsigned rmask)
 	  && (rmask & regmask (GET_MODE (reg), REGNO (reg))))
 	{
 	  *op = gen_reg_rtx (GET_MODE (reg));
-	  if (hreg == NULL)
+	  if (hreg == nullptr)
 	    emit_move_insn (*op, reg);
 	  else
 	    *hreg = reg;
@@ -15300,7 +15172,7 @@ avr_fix_operands (rtx *op, rtx *hreg, unsigned opmask, unsigned rmask)
 void
 avr_fix_inputs (rtx *op, unsigned opmask, unsigned rmask)
 {
-  avr_fix_operands (op, NULL, opmask, rmask);
+  avr_fix_operands (op, nullptr, opmask, rmask);
 }
 
 
@@ -15362,6 +15234,46 @@ avr_emit3_fix_outputs (rtx (*gen)(rtx,rtx,rtx), rtx *op,
 }
 
 
+/* A helper for the insn condition of "*nzb=1.<code>.lsr[.not]_split"
+   where <code> is AND, IOR or XOR.  Return true when
+
+      OP[0] <code>= OP[1] >> OP[2]
+
+   can be performed by means of the code of "*nzb=1.<code>.zerox", i.e.
+
+      OP[0] <code>= OP[1].OP[2]
+
+   For example, when OP[0] is in { 0, 1 }, then  R24 &= R10.4
+   can be performed by means of  SBRS R10,4  $  CLR R24.
+   Notice that the constraint of OP[3] is "0".  */
+
+bool
+avr_nonzero_bits_lsr_operands_p (rtx_code code, rtx *op)
+{
+  if (reload_completed)
+    return false;
+
+  const auto offs = INTVAL (op[2]);
+  const auto op1_non0 = nonzero_bits (op[1], QImode);
+  const auto op3_non0 = nonzero_bits (op[3], QImode);
+
+  switch (code)
+    {
+    default:
+      gcc_unreachable ();
+
+    case IOR:
+    case XOR:
+      return op1_non0 >> offs == 1;
+
+    case AND:
+      return op3_non0 == 1;
+    }
+
+  return false;
+}
+
+
 /* Worker function for cpymemhi expander.
    XOP[0]  Destination as MEM:BLK
    XOP[1]  Source      "     "
@@ -15391,9 +15303,18 @@ avr_emit_cpymemhi (rtx *xop)
   rtx a_src  = XEXP (xop[1], 0);
   rtx a_dest = XEXP (xop[0], 0);
 
-  if (PSImode == GET_MODE (a_src))
+  if (as == ADDR_SPACE_FLASHX
+      && ! AVR_HAVE_ELPM)
     {
-      gcc_assert (as == ADDR_SPACE_MEMX);
+      a_src = copy_to_mode_reg (Pmode, avr_word (a_src, 0));
+      as = ADDR_SPACE_FLASH;
+    }
+
+  machine_mode addr_mode = GET_MODE (a_src);
+
+  if (addr_mode == PSImode)
+    {
+      gcc_assert (as == ADDR_SPACE_MEMX || as == ADDR_SPACE_FLASHX);
 
       loop_mode = (count < 0x100) ? QImode : HImode;
       loop_reg = gen_rtx_REG (loop_mode, 24);
@@ -15407,7 +15328,7 @@ avr_emit_cpymemhi (rtx *xop)
       int segment = avr_addrspace[as].segment;
 
       if (segment
-	  && avr_n_flash > 1)
+	  && avropt_n_flash > 1)
 	{
 	  a_hi8 = GEN_INT (segment);
 	  emit_move_insn (rampz_rtx, a_hi8 = copy_to_mode_reg (QImode, a_hi8));
@@ -15441,7 +15362,7 @@ avr_emit_cpymemhi (rtx *xop)
 
   gcc_assert (TMP_REGNO == LPM_REGNO);
 
-  if (as != ADDR_SPACE_MEMX)
+  if (addr_mode == HImode)
     {
       /* Load instruction ([E]LPM or LD) is known at compile time:
 	 Do the copy-loop inline.  */
@@ -15456,7 +15377,7 @@ avr_emit_cpymemhi (rtx *xop)
       rtx (*fun) (rtx, rtx)
 	= QImode == loop_mode ? gen_cpymemx_qi : gen_cpymemx_hi;
 
-      emit_move_insn (gen_rtx_REG (QImode, 23), a_hi8);
+      emit_move_insn (gen_rtx_REG (QImode, REG_23), a_hi8);
 
       insn = fun (xas, GEN_INT (avr_addr.rampz));
     }
@@ -15495,7 +15416,7 @@ avr_out_cpymem (rtx_insn * /*insn*/, rtx *op, int *plen)
   switch (as)
     {
     default:
-      gcc_unreachable();
+      gcc_unreachable ();
 
     case ADDR_SPACE_GENERIC:
 
@@ -15545,13 +15466,19 @@ avr_out_cpymem (rtx_insn * /*insn*/, rtx *op, int *plen)
 		   "sbci %B1,0", xop, plen, 2);
     }
 
-  /* Loop until zero */
+  // Loop until zero.
+  avr_asm_len ("brne 0b", xop, plen, 1);
 
-  return avr_asm_len ("brne 0b", xop, plen, 1);
+
+  // Restore RAMPZ on EBI devices.
+  if (as >= ADDR_SPACE_FLASH1
+      && AVR_HAVE_ELPM && AVR_HAVE_RAMPD)
+    avr_asm_len ("out %i0,__zero_reg__", &rampz_rtx, plen, 1);
+
+  return "";
 }
 
 
-
 /* Helper for __builtin_avr_delay_cycles */
 
 static rtx
@@ -15562,6 +15489,7 @@ avr_mem_clobber (void)
   return mem;
 }
 
+
 static void
 avr_expand_delay_cycles (rtx operands0)
 {
@@ -15574,7 +15502,7 @@ avr_expand_delay_cycles (rtx operands0)
       loop_count = ((cycles - 9) / 6) + 1;
       cycles_used = ((loop_count - 1) * 6) + 9;
       emit_insn (gen_delay_cycles_4 (gen_int_mode (loop_count, SImode),
-				     avr_mem_clobber()));
+				     avr_mem_clobber ()));
       cycles -= cycles_used;
     }
 
@@ -15585,7 +15513,7 @@ avr_expand_delay_cycles (rtx operands0)
 	loop_count = 0xFFFFFF;
       cycles_used = ((loop_count - 1) * 5) + 7;
       emit_insn (gen_delay_cycles_3 (gen_int_mode (loop_count, SImode),
-				     avr_mem_clobber()));
+				     avr_mem_clobber ()));
       cycles -= cycles_used;
     }
 
@@ -15596,7 +15524,7 @@ avr_expand_delay_cycles (rtx operands0)
 	loop_count = 0xFFFF;
       cycles_used = ((loop_count - 1) * 4) + 5;
       emit_insn (gen_delay_cycles_2 (gen_int_mode (loop_count, HImode),
-				     avr_mem_clobber()));
+				     avr_mem_clobber ()));
       cycles -= cycles_used;
     }
 
@@ -15607,7 +15535,7 @@ avr_expand_delay_cycles (rtx operands0)
 	loop_count = 255;
       cycles_used = loop_count * 3;
       emit_insn (gen_delay_cycles_1 (gen_int_mode (loop_count, QImode),
-				     avr_mem_clobber()));
+				     avr_mem_clobber ()));
       cycles -= cycles_used;
     }
 
@@ -15686,7 +15614,7 @@ avr_map_metric (unsigned int a, int mode)
       else if (mode == MAP_MASK_PREIMAGE_F)
 	metric |= ((unsigned) (ai == 0xf)) << i;
       else
-	gcc_unreachable();
+	gcc_unreachable ();
     }
 
   return metric;
@@ -15723,7 +15651,7 @@ avr_has_nibble_0xf (rtx ival)
 typedef struct
 {
   /* tree code of binary function G */
-  enum tree_code code;
+  tree_code code;
 
   /* The constant second argument of G */
   int arg;
@@ -15966,7 +15894,7 @@ avr_out_insert_bits (rtx *op, int *plen)
 
 enum avr_builtin_id
   {
-#define DEF_BUILTIN(NAME, N_ARGS, TYPE, CODE, LIBNAME)  \
+#define DEF_BUILTIN(NAME, N_ARGS, TYPE, CODE, LIBNAME, ATTRS) \
     AVR_BUILTIN_ ## NAME,
 #include "builtins.def"
 #undef DEF_BUILTIN
@@ -15986,10 +15914,10 @@ struct GTY(()) avr_builtin_description
    that a built-in's ID can be used to access the built-in by means of
    avr_bdesc[ID]  */
 
-static GTY(()) struct avr_builtin_description
+static GTY(()) avr_builtin_description
 avr_bdesc[AVR_BUILTIN_COUNT] =
   {
-#define DEF_BUILTIN(NAME, N_ARGS, TYPE, ICODE, LIBNAME)         \
+#define DEF_BUILTIN(NAME, N_ARGS, TYPE, ICODE, LIBNAME, ATTRS) \
     { (enum insn_code) CODE_FOR_ ## ICODE, N_ARGS, NULL_TREE },
 #include "builtins.def"
 #undef DEF_BUILTIN
@@ -16011,11 +15939,32 @@ avr_builtin_decl (unsigned id, bool /*initialize_p*/)
 static void
 avr_init_builtin_int24 (void)
 {
-  tree int24_type  = make_signed_type (GET_MODE_BITSIZE (PSImode));
-  tree uint24_type = make_unsigned_type (GET_MODE_BITSIZE (PSImode));
+  for (int i = 0; i < NUM_INT_N_ENTS; ++i)
+    if (int_n_data[i].bitsize == 24)
+      {
+	tree uint24_type = int_n_trees[i].unsigned_type;
+	lang_hooks.types.register_builtin_type (uint24_type, "__uint24");
+	break;
+      }
+}
+
 
-  lang_hooks.types.register_builtin_type (int24_type, "__int24");
-  lang_hooks.types.register_builtin_type (uint24_type, "__uint24");
+/* Return a function signature type similar to strlen, but where
+   the address is qualified by named address-space AS.  */
+
+static tree
+avr_ftype_strlen (addr_space_t as)
+{
+  tree const_AS_char_node
+    = build_qualified_type (char_type_node,
+			    TYPE_QUAL_CONST | ENCODE_QUAL_ADDR_SPACE (as));
+  tree const_AS_ptr_type_node
+    = build_pointer_type_for_mode (const_AS_char_node,
+				   avr_addr_space_pointer_mode (as), false);
+  tree size_ftype_const_AS_char_ptr
+    = build_function_type_list (size_type_node, const_AS_ptr_type_node, NULL);
+
+  return size_ftype_const_AS_char_ptr;
 }
 
 
@@ -16076,6 +16025,10 @@ avr_init_builtins (void)
 				const_memx_ptr_type_node,
 				NULL);
 
+  tree strlen_flash_node = avr_ftype_strlen (ADDR_SPACE_FLASH);
+  tree strlen_flashx_node = avr_ftype_strlen (ADDR_SPACE_FLASHX);
+  tree strlen_memx_node = avr_ftype_strlen (ADDR_SPACE_MEMX);
+
 #define ITYP(T)                                                         \
   lang_hooks.types.type_for_size (TYPE_PRECISION (T), TYPE_UNSIGNED (T))
 
@@ -16187,17 +16140,18 @@ avr_init_builtins (void)
   FX_FTYPE_INTX (ul);
   FX_FTYPE_INTX (ull);
 
-
-#define DEF_BUILTIN(NAME, N_ARGS, TYPE, CODE, LIBNAME)                  \
-  {                                                                     \
-    int id = AVR_BUILTIN_ ## NAME;                                      \
-    const char *Name = "__builtin_avr_" #NAME;                          \
-    char *name = (char *) alloca (1 + strlen (Name));                   \
-                                                                        \
-    gcc_assert (id < AVR_BUILTIN_COUNT);                                \
-    avr_bdesc[id].fndecl                                                \
-      = add_builtin_function (avr_tolower (name, Name), TYPE, id,       \
-			      BUILT_IN_MD, LIBNAME, NULL_TREE);         \
+  tree attr_const = tree_cons (get_identifier ("const"), NULL, NULL);
+
+#define DEF_BUILTIN(NAME, N_ARGS, TYPE, CODE, LIBNAME, ATTRS)		\
+  {									\
+    int id = AVR_BUILTIN_ ## NAME;					\
+    const char *Name = "__builtin_avr_" #NAME;				\
+    char *name = (char *) alloca (1 + strlen (Name));			\
+									\
+    gcc_assert (id < AVR_BUILTIN_COUNT);				\
+    avr_bdesc[id].fndecl						\
+      = add_builtin_function (avr_tolower (name, Name), TYPE, id,	\
+			      BUILT_IN_MD, LIBNAME, ATTRS);		\
   }
 #include "builtins.def"
 #undef DEF_BUILTIN
@@ -16256,7 +16210,7 @@ avr_default_expand_builtin (enum insn_code icode, tree exp, rtx target)
     case 3: pat = GEN_FCN (icode) (target, xop[0], xop[1], xop[2]); break;
 
     default:
-      gcc_unreachable();
+      gcc_unreachable ();
     }
 
   if (pat == NULL_RTX)
@@ -16282,7 +16236,7 @@ avr_expand_builtin (tree exp, rtx target, rtx /*subtarget*/,
   tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
   const char *bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
   unsigned int id = DECL_MD_FUNCTION_CODE (fndecl);
-  const struct avr_builtin_description *d = &avr_bdesc[id];
+  const avr_builtin_description *d = &avr_bdesc[id];
   tree arg0;
   rtx op0;
 
@@ -16430,7 +16384,7 @@ avr_fold_absfx (tree tval)
   unsigned int bits = GET_MODE_BITSIZE (fval.mode);
   double_int ival = fval.data.sext (bits);
 
-  if (!ival.is_negative())
+  if (!ival.is_negative ())
     return tval;
 
   /* ISO/IEC TR 18037, 7.18a.6.2:  The absfx functions are saturating.  */
@@ -16465,6 +16419,13 @@ avr_fold_builtin (tree fndecl, int /*n_args*/, tree *arg, bool /*ignore*/)
 			    build_int_cst (val_type, 4));
       }
 
+    case AVR_BUILTIN_STRLEN_FLASH:
+    case AVR_BUILTIN_STRLEN_FLASHX:
+    case AVR_BUILTIN_STRLEN_MEMX:
+      if (tree len = c_strlen (arg[0], 0))
+	return len;
+      break;
+
     case AVR_BUILTIN_ABSHR:
     case AVR_BUILTIN_ABSR:
     case AVR_BUILTIN_ABSLR:
@@ -16665,23 +16626,38 @@ avr_fold_builtin (tree fndecl, int /*n_args*/, tree *arg, bool /*ignore*/)
    with the original cc0-based compiler.  */
 
 static rtx_insn *
-avr_md_asm_adjust (vec<rtx> &/*outputs*/, vec<rtx> &/*inputs*/,
+avr_md_asm_adjust (vec<rtx> & /*outputs*/, vec<rtx> & /*inputs*/,
 		   vec<machine_mode> & /*input_modes*/,
-		   vec<const char *> &/*constraints*/,
-		   vec<rtx> &/*uses*/,
+		   vec<const char *> & /*constraints*/,
+		   vec<rtx> & /*uses*/,
 		   vec<rtx> &clobbers, HARD_REG_SET &clobbered_regs,
 		   location_t /*loc*/)
 {
   clobbers.safe_push (cc_reg_rtx);
   SET_HARD_REG_BIT (clobbered_regs, REG_CC);
-  return NULL;
+  return nullptr;
+}
+
+
+/* Implement `TARGET_C_MODE_FOR_FLOATING_TYPE'.  Return SFmode or DFmode
+   for TI_{LONG_,}DOUBLE_TYPE which is for {long,} double type, go with
+   the default one for the others.  */
+
+static machine_mode
+avr_c_mode_for_floating_type (tree_index ti)
+{
+  if (ti == TI_DOUBLE_TYPE)
+    return avropt_double == 32 ? SFmode : DFmode;
+  if (ti == TI_LONG_DOUBLE_TYPE)
+    return avropt_long_double == 32 ? SFmode : DFmode;
+  return default_mode_for_floating_type (ti);
 }
 
 
 /* Worker function for `FLOAT_LIB_COMPARE_RETURNS_BOOL'.  */
 
 bool
-avr_float_lib_compare_returns_bool (machine_mode mode, enum rtx_code)
+avr_float_lib_compare_returns_bool (machine_mode mode, rtx_code)
 {
   if (mode == DFmode)
     {
@@ -16695,12 +16671,21 @@ avr_float_lib_compare_returns_bool (machine_mode mode, enum rtx_code)
 }
 
 
+/* Implement `TARGET_UNWIND_WORD_MODE'.  */
+
+static scalar_int_mode
+avr_unwind_word_mode ()
+{
+  return Pmode;
+}
+
+
 /* Implement `TARGET_LRA_P'.  */
 
 static bool
 avr_use_lra_p ()
 {
-  return avr_lra_p;
+  return avropt_lra_p;
 }
 
 
@@ -16901,6 +16886,9 @@ avr_use_lra_p ()
 #undef  TARGET_CANONICALIZE_COMPARISON
 #define TARGET_CANONICALIZE_COMPARISON avr_canonicalize_comparison
 
+#undef  TARGET_UNWIND_WORD_MODE
+#define TARGET_UNWIND_WORD_MODE avr_unwind_word_mode
+
 /* According to the opening comment in PR86772, the following applies:
   "If the port does not (and never will in the future) need to mitigate
    against unsafe speculation."  */
@@ -16910,7 +16898,7 @@ avr_use_lra_p ()
 #undef TARGET_C_MODE_FOR_FLOATING_TYPE
 #define TARGET_C_MODE_FOR_FLOATING_TYPE avr_c_mode_for_floating_type
 
-struct gcc_target targetm = TARGET_INITIALIZER;
+gcc_target targetm = TARGET_INITIALIZER;
 
 
 #include "gt-avr.h"
diff --git a/gcc/config/avr/avr.h b/gcc/config/avr/avr.h
index 56b7f39..cb818c3 100644
--- a/gcc/config/avr/avr.h
+++ b/gcc/config/avr/avr.h
@@ -1,6 +1,6 @@
 /* Definitions of target machine for GNU compiler,
-   for ATMEL AVR at90s8515, ATmega103/103L, ATmega603/603L microcontrollers.
-   Copyright (C) 1998-2024 Free Software Foundation, Inc.
+   for AVR 8-bit microcontrollers.
+   Copyright (C) 1998-2025 Free Software Foundation, Inc.
    Contributed by Denis Chertykov (chertykov@gmail.com)
 
 This file is part of GCC.
@@ -53,6 +53,7 @@ enum
     ADDR_SPACE_FLASH3,
     ADDR_SPACE_FLASH4,
     ADDR_SPACE_FLASH5,
+    ADDR_SPACE_FLASHX,
     ADDR_SPACE_MEMX,
     /* Sentinel */
     ADDR_SPACE_COUNT
@@ -94,9 +95,9 @@ FIXME: DRIVER_SELF_SPECS has changed.
    there is always __AVR_SP8__ == __AVR_HAVE_8BIT_SP__.  */
 
 #define AVR_HAVE_8BIT_SP                        \
-  (TARGET_TINY_STACK || avr_sp8)
+  (TARGET_TINY_STACK || avropt_sp8)
 
-#define AVR_HAVE_SPH (!avr_sp8)
+#define AVR_HAVE_SPH (!avropt_sp8)
 
 #define AVR_2_BYTE_PC (!AVR_HAVE_EIJMP_EICALL)
 #define AVR_3_BYTE_PC (AVR_HAVE_EIJMP_EICALL)
@@ -308,18 +309,25 @@ enum reg_class {
 
 #define STATIC_CHAIN_REGNUM ((AVR_TINY) ? 18 :2)
 
-#define ELIMINABLE_REGS {					\
+#define RELOAD_ELIMINABLE_REGS {				\
     { ARG_POINTER_REGNUM, STACK_POINTER_REGNUM },               \
     { ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM },               \
     { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM },             \
     { FRAME_POINTER_REGNUM + 1, STACK_POINTER_REGNUM + 1 } }
 
+#define ELIMINABLE_REGS						\
+  {								\
+    { ARG_POINTER_REGNUM, STACK_POINTER_REGNUM },		\
+    { ARG_POINTER_REGNUM, FRAME_POINTER_REGNUM },		\
+    { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM }		\
+  }
+
 #define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET)			\
   OFFSET = avr_initial_elimination_offset (FROM, TO)
 
 #define RETURN_ADDR_RTX(count, tem) avr_return_addr_rtx (count, tem)
 
-/* Don't use Push rounding. expr.cc: emit_single_push_insn is broken 
+/* Don't use Push rounding. expr.cc: emit_single_push_insn is broken
    for POST_DEC targets (PR27386).  */
 /*#define PUSH_ROUNDING(NPUSHED) (NPUSHED)*/
 
@@ -333,7 +341,7 @@ typedef struct avr_args
 
   /* Whether some of the arguments are passed on the stack,
      and hence an arg pointer is needed.  */
-  int has_stack_args;
+  bool has_stack_args;
 } CUMULATIVE_ARGS;
 
 #define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS) \
@@ -367,7 +375,7 @@ typedef struct avr_args
    edges instead.  The default branch costs are 0, mainly because otherwise
    do_store_flag might come up with bloated code.  */
 #define BRANCH_COST(speed_p, predictable_p)     \
-  (avr_branch_cost + (reload_completed ? 4 : 0))
+  (avropt_branch_cost + (reload_completed ? 4 : 0))
 
 #define SLOW_BYTE_ACCESS 0
 
@@ -478,7 +486,7 @@ typedef struct avr_args
 
 /* Set MOVE_RATIO to 3 to allow memory moves upto 4 bytes to happen
    by pieces when optimizing for speed, like it did when MOVE_MAX_PIECES
-   was 4. When optimizing for size, allow memory moves upto 2 bytes. 
+   was 4. When optimizing for size, allow memory moves upto 2 bytes.
    Also see avr_use_by_pieces_infrastructure_p. */
 
 #define MOVE_RATIO(speed) ((speed) ? 3 : 2)
@@ -546,7 +554,7 @@ extern const char *avr_no_devlib (int, const char**);
 struct GTY(()) machine_function
 {
   /* 'true' - if current function is a naked function.  */
-  int is_naked;
+  bool is_naked;
 
   /* 0 when no "interrupt" attribute is present.
      1 when an "interrupt" attribute without arguments is present (and
@@ -561,48 +569,55 @@ struct GTY(()) machine_function
      -1 when "signal" attribute(s) with arguments are present but none
      without argument.  */
   int is_signal;
-  
+
   /* 'true' - if current function is a non-blocking interrupt service
      routine as specified by the "isr_noblock" attribute.  */
-  int is_noblock;
+  bool is_noblock;
 
-  /* 'true' - if current function is a 'task' function 
+  /* 'true' - if current function is a 'task' function
      as specified by the "OS_task" attribute.  */
-  int is_OS_task;
+  bool is_OS_task;
 
-  /* 'true' - if current function is a 'main' function 
+  /* 'true' - if current function is a 'main' function
      as specified by the "OS_main" attribute.  */
-  int is_OS_main;
-  
+  bool is_OS_main;
+
   /* Current function stack size.  */
   int stack_usage;
 
   /* 'true' if a callee might be tail called */
-  int sibcall_fails;
+  bool sibcall_fails;
 
   /* 'true' if the above is_foo predicates are sanity-checked to avoid
      multiple diagnose for the same function.  */
-  int attributes_checked_p;
+  bool attributes_checked_p;
 
   /* 'true' - if current function shall not use '__gcc_isr' pseudo
      instructions as specified by the "no_gccisr" attribute.  */
-  int is_no_gccisr;
+  bool is_no_gccisr;
 
   /* Used for `__gcc_isr' pseudo instruction handling of
      non-naked ISR prologue / epilogue(s).  */
   struct
   {
     /* 'true' if this function actually uses "*gasisr" insns. */
-    int yes;
+    bool yes;
     /* 'true' if this function is allowed to use "*gasisr" insns. */
-    int maybe;
+    bool maybe;
     /* The register numer as printed by the Done chunk.  */
     int regno;
   } gasisr;
 
   /* 'true' if this function references .L__stack_usage like with
      __builtin_return_address.  */
-  int use_L__stack_usage;
+  bool use_L__stack_usage;
+
+  /* Counts how many times the execute() method of the avr-fuse-add pass
+     has been invoked.  The count is even increased when the optimization
+     itself is not run.  The purpose of this variable is to provide
+     information about where in the pass sequence we are.
+     It is used in insn / split conditions.  */
+  int n_avr_fuse_add_executed;
 };
 
 /* AVR does not round pushes, but the existence of this macro is
diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md
index c10709e..f8bbdc7 100644
--- a/gcc/config/avr/avr.md
+++ b/gcc/config/avr/avr.md
@@ -1,6 +1,6 @@
 ;;   Machine description for GNU compiler,
-;;   for ATMEL AVR micro controllers.
-;;   Copyright (C) 1998-2024 Free Software Foundation, Inc.
+;;   for AVR 8-bit microcontrollers.
+;;   Copyright (C) 1998-2025 Free Software Foundation, Inc.
 ;;   Contributed by Denis Chertykov (chertykov@gmail.com)
 
 ;; This file is part of GCC.
@@ -84,10 +84,10 @@
   [UNSPEC_STRLEN
    UNSPEC_CPYMEM
    UNSPEC_INDEX_JMP
+   UNSPEC_NZB
    UNSPEC_FMUL
    UNSPEC_FMULS
    UNSPEC_FMULSU
-   UNSPEC_COPYSIGN
    UNSPEC_INSERT_BITS
    UNSPEC_ROUND
    ])
@@ -161,20 +161,25 @@
 ;; Otherwise do special processing depending on the attribute.
 
 (define_attr "adjust_len"
-  "out_bitop, plus, addto_sp, sext, extr, extr_not, plus_ext,
+  "out_bitop, plus, addto_sp, sext, extr, extr_not, plus_ext, sextr,
    tsthi, tstpsi, tstsi, compare, compare64, call,
    mov8, mov16, mov24, mov32, reload_in16, reload_in24, reload_in32,
    ufract, sfract, round,
-   xload, cpymem,
+   xload, fload, cpymem,
    ashlqi, ashrqi, lshrqi,
    ashlhi, ashrhi, lshrhi,
    ashlsi, ashrsi, lshrsi,
    ashlpsi, ashrpsi, lshrpsi,
-   insert_bits, insv_notbit, insv,
-   add_set_ZN, cmp_uext, cmp_sext,
+   add_lt0, add_ge0,
+   insert_bits, insv_notbit, insv, set_some,
+   add_set_ZN, add_set_N, cmp_uext, cmp_sext, cmp_lsr,
    no"
   (const_string "no"))
 
+(define_attr "nzb"
+  "yes, no"
+  (const_string "no"))
+
 ;; Flavours of instruction set architecture (ISA), used in enabled attribute
 
 ;; mov  : ISA has no MOVW                movw  : ISA has MOVW
@@ -184,83 +189,91 @@
 ;; elpm : ISA has ELPM but no ELPMX      elpmx : ISA has ELPMX
 ;; no_xmega: non-XMEGA core              xmega : XMEGA core
 ;; no_adiw:  ISA has no ADIW, SBIW       adiw  : ISA has ADIW, SBIW
+;; no_mul:   ISA has no MUL              mul   : ISA has [F]MUL[S[U]]
+
+;; The following ISA attributes are actually not architecture specific,
+;; but depend on (optimization) options.  This is because the "enabled"
+;; attribut can't depend on more than one other attribute.  This means
+;; that 3op must work for all ISAs, and hence a 'flat' attribue scheme
+;; can be used (as opposed to a true cartesian product).
+
+;; 3op  : alternative is a 3-operand insn
 
 (define_attr "isa"
   "mov,movw, rjmp,jmp, ijmp,eijmp, lpm,lpmx, elpm,elpmx, no_xmega,xmega,
-   no_adiw,adiw,
+   no_adiw,adiw, no_mul,mul,
+   3op,
    standard"
   (const_string "standard"))
 
 (define_attr "enabled" ""
-  (cond [(eq_attr "isa" "standard")
-         (const_int 1)
+  (if_then_else
+   (ior (eq_attr "isa" "standard")
 
-         (and (eq_attr "isa" "mov")
-              (match_test "!AVR_HAVE_MOVW"))
-         (const_int 1)
+        (and (eq_attr "isa" "mov")
+             (match_test "!AVR_HAVE_MOVW"))
 
-         (and (eq_attr "isa" "movw")
-              (match_test "AVR_HAVE_MOVW"))
-         (const_int 1)
+        (and (eq_attr "isa" "movw")
+             (match_test "AVR_HAVE_MOVW"))
 
-         (and (eq_attr "isa" "rjmp")
-              (match_test "!AVR_HAVE_JMP_CALL"))
-         (const_int 1)
+        (and (eq_attr "isa" "rjmp")
+             (match_test "!AVR_HAVE_JMP_CALL"))
 
-         (and (eq_attr "isa" "jmp")
-              (match_test "AVR_HAVE_JMP_CALL"))
-         (const_int 1)
+        (and (eq_attr "isa" "jmp")
+             (match_test "AVR_HAVE_JMP_CALL"))
 
-         (and (eq_attr "isa" "ijmp")
-              (match_test "!AVR_HAVE_EIJMP_EICALL"))
-         (const_int 1)
+        (and (eq_attr "isa" "ijmp")
+             (match_test "!AVR_HAVE_EIJMP_EICALL"))
 
-         (and (eq_attr "isa" "eijmp")
-              (match_test "AVR_HAVE_EIJMP_EICALL"))
-         (const_int 1)
+        (and (eq_attr "isa" "eijmp")
+             (match_test "AVR_HAVE_EIJMP_EICALL"))
 
-         (and (eq_attr "isa" "lpm")
-              (match_test "!AVR_HAVE_LPMX"))
-         (const_int 1)
+        (and (eq_attr "isa" "lpm")
+             (match_test "!AVR_HAVE_LPMX"))
 
-         (and (eq_attr "isa" "lpmx")
-              (match_test "AVR_HAVE_LPMX"))
-         (const_int 1)
+        (and (eq_attr "isa" "lpmx")
+             (match_test "AVR_HAVE_LPMX"))
 
-         (and (eq_attr "isa" "elpm")
-              (match_test "AVR_HAVE_ELPM && !AVR_HAVE_ELPMX"))
-         (const_int 1)
+        (and (eq_attr "isa" "elpm")
+             (match_test "AVR_HAVE_ELPM && !AVR_HAVE_ELPMX"))
 
-         (and (eq_attr "isa" "elpmx")
-              (match_test "AVR_HAVE_ELPMX"))
-         (const_int 1)
+        (and (eq_attr "isa" "elpmx")
+             (match_test "AVR_HAVE_ELPMX"))
 
-         (and (eq_attr "isa" "xmega")
-              (match_test "AVR_XMEGA"))
-         (const_int 1)
+        (and (eq_attr "isa" "xmega")
+             (match_test "AVR_XMEGA"))
 
-         (and (eq_attr "isa" "no_xmega")
-              (match_test "!AVR_XMEGA"))
-         (const_int 1)
+        (and (eq_attr "isa" "no_xmega")
+             (match_test "!AVR_XMEGA"))
 
-         (and (eq_attr "isa" "adiw")
-              (match_test "AVR_HAVE_ADIW"))
-         (const_int 1)
+        (and (eq_attr "isa" "adiw")
+             (match_test "AVR_HAVE_ADIW"))
 
-         (and (eq_attr "isa" "no_adiw")
-              (match_test "!AVR_HAVE_ADIW"))
-         (const_int 1)
+        (and (eq_attr "isa" "no_adiw")
+             (match_test "!AVR_HAVE_ADIW"))
 
-         ] (const_int 0)))
+        (and (eq_attr "isa" "mul")
+             (match_test "AVR_HAVE_MUL"))
+
+        (and (eq_attr "isa" "no_mul")
+             (match_test "!AVR_HAVE_MUL"))
+
+        (and (eq_attr "isa" "3op")
+             (match_test "avr_shift_is_3op ()"))
+        )
+   (const_int 1)
+   (const_int 0)))
 
 
 ;; Define mode iterators
 (define_mode_iterator QIHI  [QI HI])
 (define_mode_iterator QIHI2 [QI HI])
 (define_mode_iterator QISI  [QI HI PSI SI])
+(define_mode_iterator QISI2 [QI HI PSI SI])
 (define_mode_iterator QIDI  [QI HI PSI SI DI])
 (define_mode_iterator QIPSI [QI HI PSI])
 (define_mode_iterator HISI  [HI PSI SI])
+(define_mode_iterator HI_SI [HI SI])
 
 ;; Ordered integral and fixed-point modes of specific sizes.
 (define_mode_iterator ALL1 [QI QQ UQQ])
@@ -277,6 +290,10 @@
 (define_mode_iterator ALLs234 [HI SI PSI
                                HQ HA SQ SA])
 
+(define_mode_iterator ALLCC [CC CCN CCZN])
+
+(define_mode_attr CCname [(CC "") (CCN "_N") (CCZN "_ZN")])
+
 ;; All supported move-modes
 (define_mode_iterator MOVMODE [QI QQ UQQ
                                HI HQ UHQ HA UHA
@@ -298,7 +315,15 @@
 (define_mode_attr MSB  [(QI "7") (QQ "7") (UQQ "7")
                         (HI "15") (HQ "15") (UHQ "15") (HA "15") (UHA "15")
                         (PSI "23")
-                        (SI "31") (SQ "31") (USQ "31") (SA "31") (USA "31") (SF "31")])
+                        (SI "31") (SQ "31") (USQ "31") (SA "31") (USA "31")
+                        (SF "31")])
+
+;; Where the most significant bit is located, as a constraint.
+(define_mode_attr C_MSB [(QI "C07") (QQ "C07") (UQQ "C07")
+                         (HI "C15") (HQ "C15") (UHQ "C15") (HA "C15") (UHA "C15")
+                         (PSI "C23")
+                         (SI "C31") (SQ "C31") (USQ "C31") (SA "C31") (USA "C31")
+                         (SF "C31")])
 
 ;; Size in bytes of the mode.
 (define_mode_attr SIZE [(QI "1") (QQ "1") (UQQ "1")
@@ -316,10 +341,14 @@
 (define_code_iterator any_lshift  [lshiftrt ashift]) ; logic shift
 
 (define_code_iterator piaop [plus ior and])
+(define_code_iterator pixop [plus ior xor])
 (define_code_iterator bitop [xor ior and])
 (define_code_iterator xior [xor ior])
 (define_code_iterator eqne [eq ne])
 (define_code_iterator gelt [ge lt])
+(define_code_iterator eqnegtle [eq ne gt le])
+(define_code_iterator cmp_signed [eq ne ge lt gt le])
+(define_code_iterator op8_ZN [plus minus and ior xor ashift ashiftrt lshiftrt])
 
 (define_code_iterator ss_addsub [ss_plus ss_minus])
 (define_code_iterator us_addsub [us_plus us_minus])
@@ -405,9 +434,14 @@
 
     emit_clobber (gen_rtx_MEM (BLKmode, hard_frame_pointer_rtx));
 
-    emit_move_insn (hard_frame_pointer_rtx, r_fp);
+    // PR64242: When r_sp is located in the frame, we must not
+    // change FP prior to reading r_sp.  Hence copy r_fp to a
+    // local register (and hope that reload won't spill it).
+    rtx r_fp_reg = copy_to_reg (r_fp);
     emit_stack_restore (SAVE_NONLOCAL, r_sp);
 
+    emit_move_insn (hard_frame_pointer_rtx, r_fp_reg);
+
     emit_use (hard_frame_pointer_rtx);
     emit_use (stack_pointer_rtx);
 
@@ -429,9 +463,11 @@
 
 (define_insn "pushhi1_insn"
   [(set (mem:HI (post_dec:HI (reg:HI REG_SP)))
-        (match_operand:HI 0 "register_operand" "r"))]
+        (match_operand:HI 0 "reg_or_0_operand" "r,Y00"))]
   ""
-  "push %B0\;push %A0"
+  "@
+	push %B0\;push %A0
+	push __zero_reg__\;push __zero_reg__"
   [(set_attr "length" "2")])
 
 ;; All modes for a multi-byte push.  We must include complex modes here too,
@@ -509,17 +545,13 @@
 ;;========================================================================
 ;; Move stuff around
 
-;; "loadqi_libgcc"
-;; "loadhi_libgcc"
-;; "loadpsi_libgcc"
-;; "loadsi_libgcc"
-;; "loadsf_libgcc"
-(define_expand "load<mode>_libgcc"
+;; Expand helper for mov<mode>.
+(define_expand "gen_load<mode>_libgcc"
   [(set (match_dup 3)
         (match_dup 2))
    (set (reg:MOVMODE 22)
-        (match_operand:MOVMODE 1 "memory_operand" ""))
-   (set (match_operand:MOVMODE 0 "register_operand" "")
+        (match_operand:MOVMODE 1 "memory_operand"))
+   (set (match_operand:MOVMODE 0 "register_operand")
         (reg:MOVMODE 22))]
   "avr_load_libgcc_p (operands[1])"
   {
@@ -558,24 +590,25 @@
   [(set_attr "type" "xcall")])
 
 
-;; "xload8qi_A"
-;; "xload8qq_A" "xload8uqq_A"
-(define_insn_and_split "xload8<mode>_A"
-  [(set (match_operand:ALL1 0 "register_operand" "=r")
-        (match_operand:ALL1 1 "memory_operand"    "m"))
+;; Inline load a __memx value when flash <= 64 KiB, or
+;; inline load a __flashx value.
+(define_insn_and_split "fxmov<mode>_A"
+  [(set (match_operand:MOVMODE 0 "register_operand" "=r")
+        (match_operand:MOVMODE 1 "memory_operand"    "m"))
    (clobber (reg:HI REG_Z))]
   "can_create_pseudo_p()
-   && !avr_xload_libgcc_p (<MODE>mode)
-   && avr_mem_memx_p (operands[1])
-   && REG_P (XEXP (operands[1], 0))"
+   && REG_P (XEXP (operands[1], 0))
+   && (avr_load_libgcc_insn_p (insn, ADDR_SPACE_MEMX, false)
+       || avr_load_libgcc_insn_p (insn, ADDR_SPACE_FLASHX, false))"
   { gcc_unreachable(); }
   "&& 1"
-  [(clobber (const_int 0))]
+  [(scratch)]
   {
     // Split away the high part of the address.  GCC's register allocator
     // is not able to allocate segment registers and reload the resulting
     // expressions.  Notice that no address register can hold a PSImode.
 
+    addr_space_t as = MEM_ADDR_SPACE (operands[1]);
     rtx addr = XEXP (operands[1], 0);
     rtx hi8 = gen_reg_rtx (QImode);
     rtx reg_z = gen_rtx_REG (HImode, REG_Z);
@@ -583,29 +616,68 @@
     emit_move_insn (reg_z, simplify_gen_subreg (HImode, addr, PSImode, 0));
     emit_move_insn (hi8, simplify_gen_subreg (QImode, addr, PSImode, 2));
 
-    rtx_insn *insn = emit_insn (gen_xload<mode>_8 (operands[0], hi8));
-    set_mem_addr_space (SET_SRC (single_set (insn)),
-                                 MEM_ADDR_SPACE (operands[1]));
+    rtx_insn *insn;
+    if (as == ADDR_SPACE_MEMX)
+      insn = emit_insn (gen_xmov<mode>_8 (operands[0], hi8));
+    else if (as == ADDR_SPACE_FLASHX)
+      insn = emit_insn (gen_fmov<mode> (operands[0], hi8));
+    else
+      gcc_unreachable ();
+
+    set_mem_addr_space (SET_SRC (single_set (insn)), as);
     DONE;
   })
 
-;; "xloadqi_A" "xloadqq_A" "xloaduqq_A"
-;; "xloadhi_A" "xloadhq_A" "xloaduhq_A" "xloadha_A" "xloaduha_A"
-;; "xloadsi_A" "xloadsq_A" "xloadusq_A" "xloadsa_A" "xloadusa_A"
-;; "xloadpsi_A"
-;; "xloadsf_A"
-(define_insn_and_split "xload<mode>_A"
+;; Move value from address space memx or flashx to a register
+;; These insns must be prior to respective generic move insn.
+
+;; "xmovqi_8"
+;; "xmovqq_8" "xmovuqq_8"
+(define_insn "xmov<mode>_8"
+  [(set (match_operand:MOVMODE 0 "register_operand"                   "=&r,r")
+        (mem:MOVMODE (lo_sum:PSI (match_operand:QI 1 "register_operand" "r,r")
+                                 (reg:HI REG_Z))))]
+  "<SIZE> == 1
+   && avr_load_libgcc_insn_p (insn, ADDR_SPACE_MEMX, false)"
+  {
+    return avr_out_xload (insn, operands, NULL);
+  }
+  [(set_attr "length" "4,4")
+   (set_attr "adjust_len" "*,xload")
+   (set_attr "isa" "lpmx,lpm")])
+
+;; Load a value from __flashx inline.
+(define_insn "fmov<mode>"
+  [(set (match_operand:MOVMODE 0 "register_operand"                    "=r")
+        (mem:MOVMODE (lo_sum:PSI (match_operand:QI 1 "register_operand" "r")
+                                 (reg:HI REG_Z))))
+   (clobber (reg:HI REG_Z))]
+  "avr_load_libgcc_insn_p (insn, ADDR_SPACE_FLASHX, false)"
+  {
+    return avr_out_fload (insn, operands, NULL);
+  }
+  [(set_attr "adjust_len" "fload")])
+
+
+;; Load a __memx or __flashx value per libgcc call.
+;; "fxloadqi_A" "fxloadqq_A" "fxloaduqq_A"
+;; "fxloadhi_A" "fxloadhq_A" "fxloaduhq_A" "fxloadha_A" "fxloaduha_A"
+;; "fxloadsi_A" "fxloadsq_A" "fxloadusq_A" "fxloadsa_A" "fxloadusa_A"
+;; "fxloadpsi_A"
+;; "fxloadsf_A"
+(define_insn_and_split "fxload<mode>_A"
   [(set (match_operand:MOVMODE 0 "register_operand" "=r")
         (match_operand:MOVMODE 1 "memory_operand"    "m"))
-   (clobber (reg:MOVMODE 22))
-   (clobber (reg:QI 21))
+   (clobber (reg:MOVMODE REG_22))
+   (clobber (reg:QI REG_21))
    (clobber (reg:HI REG_Z))]
   "can_create_pseudo_p()
-   && avr_mem_memx_p (operands[1])
-   && REG_P (XEXP (operands[1], 0))"
+   && REG_P (XEXP (operands[1], 0))
+   && (avr_load_libgcc_insn_p (insn, ADDR_SPACE_MEMX, true)
+       || avr_load_libgcc_insn_p (insn, ADDR_SPACE_FLASHX, true))"
   { gcc_unreachable(); }
   "&& 1"
-  [(clobber (const_int 0))]
+  [(scratch)]
   {
     rtx addr = XEXP (operands[1], 0);
     rtx reg_z = gen_rtx_REG (HImode, REG_Z);
@@ -614,65 +686,61 @@
 
     // Split the address to R21:Z
     emit_move_insn (reg_z, simplify_gen_subreg (HImode, addr, PSImode, 0));
-    emit_move_insn (gen_rtx_REG (QImode, 21), addr_hi8);
+    emit_move_insn (gen_rtx_REG (QImode, REG_21), addr_hi8);
 
     // Load with code from libgcc.
-    rtx_insn *insn = emit_insn (gen_xload_<mode>_libgcc ());
+    rtx_insn *insn = emit_insn (gen_fxload_<mode>_libgcc ());
     set_mem_addr_space (SET_SRC (single_set (insn)), as);
 
     // Move to destination.
-    emit_move_insn (operands[0], gen_rtx_REG (<MODE>mode, 22));
+    emit_move_insn (operands[0], gen_rtx_REG (<MODE>mode, REG_22));
 
     DONE;
   })
 
-;; Move value from address space memx to a register
-;; These insns must be prior to respective generic move insn.
-
-;; "xloadqi_8"
-;; "xloadqq_8" "xloaduqq_8"
-(define_insn "xload<mode>_8"
-  [(set (match_operand:ALL1 0 "register_operand"                   "=&r,r")
-        (mem:ALL1 (lo_sum:PSI (match_operand:QI 1 "register_operand" "r,r")
-                              (reg:HI REG_Z))))]
-  "!avr_xload_libgcc_p (<MODE>mode)"
-  {
-    return avr_out_xload (insn, operands, NULL);
-  }
-  [(set_attr "length" "4,4")
-   (set_attr "adjust_len" "*,xload")
-   (set_attr "isa" "lpmx,lpm")])
-
 ;; R21:Z : 24-bit source address
 ;; R22   : 1-4 byte output
 
-;; "xload_qi_libgcc" "xload_qq_libgcc" "xload_uqq_libgcc"
-;; "xload_hi_libgcc" "xload_hq_libgcc" "xload_uhq_libgcc" "xload_ha_libgcc" "xload_uha_libgcc"
-;; "xload_si_libgcc" "xload_sq_libgcc" "xload_usq_libgcc" "xload_sa_libgcc" "xload_usa_libgcc"
-;; "xload_sf_libgcc"
-;; "xload_psi_libgcc"
-(define_insn_and_split "xload_<mode>_libgcc"
-  [(set (reg:MOVMODE 22)
-        (mem:MOVMODE (lo_sum:PSI (reg:QI 21)
+;; "fxload_qi_libgcc" "fxload_qq_libgcc" "fxload_uqq_libgcc"
+;; "fxload_hi_libgcc" "fxload_hq_libgcc" "fxload_uhq_libgcc" "fxload_ha_libgcc" "xload_uha_libgcc"
+;; "fxload_si_libgcc" "fxload_sq_libgcc" "fxload_usq_libgcc" "fxload_sa_libgcc" "xload_usa_libgcc"
+;; "fxload_sf_libgcc"
+;; "fxload_psi_libgcc"
+(define_insn_and_split "fxload_<mode>_libgcc"
+  [(set (reg:MOVMODE REG_22)
+        (mem:MOVMODE (lo_sum:PSI (reg:QI REG_21)
                                  (reg:HI REG_Z))))
-   (clobber (reg:QI 21))
+   (clobber (reg:QI REG_21))
    (clobber (reg:HI REG_Z))]
-  "avr_xload_libgcc_p (<MODE>mode)"
+  "avr_load_libgcc_insn_p (insn, ADDR_SPACE_MEMX, true)
+    || avr_load_libgcc_insn_p (insn, ADDR_SPACE_FLASHX, true)"
   "#"
   "&& reload_completed"
-  [(parallel [(set (reg:MOVMODE 22)
-                   (mem:MOVMODE (lo_sum:PSI (reg:QI 21)
-                                            (reg:HI REG_Z))))
-              (clobber (reg:CC REG_CC))])])
+  [(parallel [(set (reg:MOVMODE REG_22)
+                   (match_dup 0))
+              (clobber (reg:QI REG_21))
+              (clobber (reg:HI REG_Z))
+              (clobber (reg:CC REG_CC))])]
+  {
+    operands[0] = SET_SRC (single_set (curr_insn));
+  })
 
-(define_insn "*xload_<mode>_libgcc"
-  [(set (reg:MOVMODE 22)
-        (mem:MOVMODE (lo_sum:PSI (reg:QI 21)
+(define_insn "*fxload_<mode>_libgcc"
+  [(set (reg:MOVMODE REG_22)
+        (mem:MOVMODE (lo_sum:PSI (reg:QI REG_21)
                                  (reg:HI REG_Z))))
+   (clobber (reg:QI REG_21))
+   (clobber (reg:HI REG_Z))
    (clobber (reg:CC REG_CC))]
-  "avr_xload_libgcc_p (<MODE>mode)
-   && reload_completed"
-  "%~call __xload_<SIZE>"
+  "reload_completed
+   && (avr_load_libgcc_insn_p (insn, ADDR_SPACE_MEMX, true)
+       || avr_load_libgcc_insn_p (insn, ADDR_SPACE_FLASHX, true))"
+  {
+    rtx src = SET_SRC (single_set (insn));
+    return avr_mem_memx_p (src)
+      ? "%~call __xload_<SIZE>"
+      : "%~call __fload_<SIZE>";
+  }
   [(set_attr "type" "xcall")])
 
 
@@ -684,8 +752,8 @@
 ;; "movsf"
 ;; "movpsi"
 (define_expand "mov<mode>"
-  [(set (match_operand:MOVMODE 0 "nonimmediate_operand" "")
-        (match_operand:MOVMODE 1 "general_operand" ""))]
+  [(set (match_operand:MOVMODE 0 "nonimmediate_operand")
+        (match_operand:MOVMODE 1 "general_operand"))]
   ""
   {
     rtx dest = operands[0];
@@ -717,7 +785,19 @@
         operands[1] = src = copy_to_mode_reg (<MODE>mode, src);
       }
 
-    if (avr_mem_memx_p (src))
+    // Let __flashx decay to __flash on devices <= 64 KiB.
+    if (avr_mem_flashx_p (src)
+        && ! AVR_HAVE_ELPM)
+      {
+        rtx addr = XEXP (src, 0);
+        addr = copy_to_mode_reg (Pmode, avr_word (addr, 0));
+        // replace_equiv_address() hickupps, so do it by hand.
+        operands[1] = src = gen_rtx_MEM (<MODE>mode, addr);
+        set_mem_addr_space (src, ADDR_SPACE_FLASH);
+      }
+
+    if (avr_mem_memx_p (src)
+        || avr_mem_flashx_p (src))
       {
         rtx addr = XEXP (src, 0);
 
@@ -728,10 +808,11 @@
           ? gen_reg_rtx (<MODE>mode)
           : dest;
 
-        if (!avr_xload_libgcc_p (<MODE>mode))
-          // No <mode> here because gen_xload8<mode>_A only iterates over ALL1.
-          // insn-emit does not depend on the mode, it's all about operands.
-          emit_insn (gen_xload8qi_A (dest2, src));
+        if (avr_load_libgcc_mem_p (src, ADDR_SPACE_MEMX, false)
+            || avr_load_libgcc_mem_p (src, ADDR_SPACE_FLASHX, false))
+          {
+            emit_insn (gen_fxmov<mode>_A (dest2, src));
+          }
         else
           {
             rtx reg_22 = gen_rtx_REG (<MODE>mode, REG_22);
@@ -739,7 +820,7 @@
                 || reg_overlap_mentioned_p (dest2, all_regs_rtx[REG_21]))
               dest2 = gen_reg_rtx (<MODE>mode);
 
-            emit_insn (gen_xload<mode>_A (dest2, src));
+            emit_insn (gen_fxload<mode>_A (dest2, src));
           }
 
         if (dest2 != dest)
@@ -751,7 +832,7 @@
     if (avr_load_libgcc_p (src))
       {
         // For the small devices, do loads per libgcc call.
-        emit_insn (gen_load<mode>_libgcc (dest, src));
+        emit_insn (gen_gen_load<mode>_libgcc (dest, src));
         DONE;
       }
   })
@@ -985,55 +1066,36 @@
               (clobber (reg:CC REG_CC))])])
 
 
-;; For LPM loads from AS1 we split
-;;    R = *Z
-;; to
-;;    R = *Z++
-;;    Z = Z - sizeof (R)
-;;
-;; so that the second instruction can be optimized out.
-
-(define_split ; "split-lpmx"
-  [(set (match_operand:HISI 0 "register_operand" "")
-        (match_operand:HISI 1 "memory_operand" ""))]
-  "reload_completed
-   && AVR_HAVE_LPMX
-   && avr_mem_flash_p (operands[1])
-   && REG_P (XEXP (operands[1], 0))
-   && !reg_overlap_mentioned_p (XEXP (operands[1], 0), operands[0])"
-  [(set (match_dup 0)
-        (match_dup 2))
-   (set (match_dup 3)
-        (plus:HI (match_dup 3)
-                 (match_dup 4)))]
-  {
-     rtx addr = XEXP (operands[1], 0);
-
-    operands[2] = replace_equiv_address (operands[1],
-                                         gen_rtx_POST_INC (Pmode, addr));
-    operands[3] = addr;
-    operands[4] = gen_int_mode (-<SIZE>, HImode);
-  })
-
-
 ;; Legitimate address and stuff allows way more addressing modes than
 ;; Reduced Tiny actually supports.  Split them now so that we get
 ;; closer to real instructions which may result in some optimization
-;; opportunities.
+;; opportunities.  This applies also to fake X + offset addressing.
 (define_split
   [(parallel [(set (match_operand:MOVMODE 0 "nonimmediate_operand")
                    (match_operand:MOVMODE 1 "general_operand"))
               (clobber (reg:CC REG_CC))])]
   "reload_completed
-   && avr_fuse_add > 0
-   // Only split this for .split2 when we are before
-   // pass .avr-fuse-add (which runs after proep).
-   && ! epilogue_completed
    && (MEM_P (operands[0]) || MEM_P (operands[1]))"
   [(scratch)]
   {
-    if (avr_split_tiny_move (curr_insn, operands))
+    if (avropt_fuse_add > 0
+        // Only split fake addressing for .split2 when we are before
+        // pass .avr-fuse-add (which runs after proep).
+        && ! epilogue_completed
+        && avr_split_fake_addressing_move (curr_insn, operands))
       DONE;
+
+    // Splitting multi-byte load / stores into 1-byte such insns
+    // provided non-volatile, addr-space = generic, no reg-overlap
+    // and the resulting addressings are natively supported.
+    if (avropt_split_ldst
+        // Splitting too early may obfuscate some PRE_DEC / POST_INC
+        // opportunities, thus only split after avr-fuse-add.
+        && n_avr_fuse_add_executed > 0
+        && GET_MODE_SIZE (<MODE>mode) > 1
+        && avr_split_ldst (operands))
+      DONE;
+
     FAIL;
   })
 
@@ -1152,6 +1214,27 @@
   [(set_attr "length" "4,4,8,9,4,10")
    (set_attr "adjust_len" "mov32")])
 
+;; Setting just some bytes of a register when some of them are already known.
+;; This is only needed for the lower regs, and when there is no scratch reg.
+(define_insn "set_some"
+  [(match_parallel 0 "set_some_operation"
+    [(use (match_operand:QI 1 "dreg_or_0_operand" "d Y00")) ; known d-reg or 0
+     (use (match_operand:QI 2 "const_int_operand" "n"))     ; known value of $1
+     (use (match_operand:QI 3 "const_int_operand" "n"))     ; regno
+     (use (match_operand:QI 4 "const_int_operand" "n"))     ; mode size of $3
+     (clobber (reg:CC REG_CC))
+     ;; 1...4 of these.
+     (set (match_operand:QI 5 "register_operand" "=r")
+          (match_operand:QI 6 "const_int_operand" "n"))])]
+  "reload_completed"
+  {
+    return avr_out_set_some (insn, operands, nullptr);
+  }
+  [(set (attr "length")
+        (symbol_ref "2 + 2 * (XVECLEN (operands[0], 0) - 5)"))
+   (set_attr "adjust_len" "set_some")])
+
+
 ;; fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
 ;; move floating point numbers (32 bit)
 
@@ -1272,7 +1355,7 @@
   [(set_attr "adjust_len" "cpymem")])
 
 
-;; $0    : Address Space
+;; $0    : 24-bit address space
 ;; $1    : RAMPZ RAM address
 ;; R24   : #bytes and loop register
 ;; R23:Z : 24-bit source address
@@ -1283,7 +1366,9 @@
 
 (define_insn_and_split "cpymemx_<mode>"
   [(set (mem:BLK (reg:HI REG_X))
-        (mem:BLK (lo_sum:PSI (reg:QI 23)
+        ;; Spell out the address.  IRA may try to spill
+        ;; a hard reg when operands were used.
+        (mem:BLK (lo_sum:PSI (reg:QI REG_23)
                              (reg:HI REG_Z))))
    (unspec [(match_operand:QI 0 "const_int_operand" "n")]
            UNSPEC_CPYMEM)
@@ -1298,8 +1383,7 @@
   "#"
   "&& reload_completed"
   [(parallel [(set (mem:BLK (reg:HI REG_X))
-                   (mem:BLK (lo_sum:PSI (reg:QI 23)
-                                        (reg:HI REG_Z))))
+                   (match_dup 2))
               (unspec [(match_dup 0)]
                       UNSPEC_CPYMEM)
               (use (reg:QIHI 24))
@@ -1309,11 +1393,15 @@
               (clobber (reg:HI 24))
               (clobber (reg:QI 23))
               (clobber (mem:QI (match_dup 1)))
-              (clobber (reg:CC REG_CC))])])
+              (clobber (reg:CC REG_CC))])]
+  {
+    rtx xset = XVECEXP (PATTERN (curr_insn), 0, 0);
+    operands[2] = SET_SRC (xset);
+  })
 
 (define_insn "*cpymemx_<mode>"
   [(set (mem:BLK (reg:HI REG_X))
-        (mem:BLK (lo_sum:PSI (reg:QI 23)
+        (mem:BLK (lo_sum:PSI (reg:QI REG_23)
                              (reg:HI REG_Z))))
    (unspec [(match_operand:QI 0 "const_int_operand" "n")]
            UNSPEC_CPYMEM)
@@ -1326,7 +1414,12 @@
    (clobber (mem:QI (match_operand:QI 1 "io_address_operand" "n")))
    (clobber (reg:CC REG_CC))]
   "reload_completed"
-  "%~call __movmemx_<mode>"
+  {
+    addr_space_t as = (addr_space_t) INTVAL (operands[0]);
+    return as == ADDR_SPACE_MEMX
+      ? "%~call __movmemx_<mode>"
+      : "%~call __movmemf_<mode>";
+  }
   [(set_attr "type" "xcall")])
 
 
@@ -1871,11 +1964,11 @@
 
 
 ;;                                  "*addhi3.sign_extend.qi_split"
-;; "*addpsi3.sign_extend.qi_split"  "*addpsi3.sign_extend.qi_split"
-;; "*addpsi3.sign_extend.hi_split"  "*addpsi3.sign_extend.hi_split"
-;; "*addsi3.sign_extend.qi_split"   "*addsi3.sign_extend.qi_split"
-;; "*addsi3.sign_extend.hi_split"   "*addsi3.sign_extend.hi_split"
-;; "*addsi3.sign_extend.psi_split"  "*addsi3.sign_extend.psi_split"
+;; "*addpsi3.zero_extend.qi_split"  "*addpsi3.sign_extend.qi_split"
+;; "*addpsi3.zero_extend.hi_split"  "*addpsi3.sign_extend.hi_split"
+;; "*addsi3.zero_extend.qi_split"   "*addsi3.sign_extend.qi_split"
+;; "*addsi3.zero_extend.hi_split"   "*addsi3.sign_extend.hi_split"
+;; "*addsi3.zero_extend.psi_split"  "*addsi3.sign_extend.psi_split"
 ;; The zero_extend:HI(QI) case is treated in an own insn as it can
 ;; more than just "r,r,0".
 (define_insn_and_split "*add<HISI:mode>3.<code>.<QIPSI:mode>_split"
@@ -1892,11 +1985,11 @@
               (clobber (reg:CC REG_CC))])])
 
 ;;                            "*addhi3.sign_extend.qi"
-;; "*addpsi3.sign_extend.qi"  "*addpsi3.sign_extend.qi"
-;; "*addpsi3.sign_extend.hi"  "*addpsi3.sign_extend.hi"
-;; "*addsi3.sign_extend.qi"   "*addsi3.sign_extend.qi"
-;; "*addsi3.sign_extend.hi"   "*addsi3.sign_extend.hi"
-;; "*addsi3.sign_extend.psi"  "*addsi3.sign_extend.psi"
+;; "*addpsi3.zero_extend.qi"  "*addpsi3.sign_extend.qi"
+;; "*addpsi3.zero_extend.hi"  "*addpsi3.sign_extend.hi"
+;; "*addsi3.zero_extend.qi"   "*addsi3.sign_extend.qi"
+;; "*addsi3.zero_extend.hi"   "*addsi3.sign_extend.hi"
+;; "*addsi3.zero_extend.psi"  "*addsi3.sign_extend.psi"
 (define_insn "*add<HISI:mode>3.<code>.<QIPSI:mode>"
   [(set (match_operand:HISI 0 "register_operand"                             "=r")
         (plus:HISI (any_extend:HISI (match_operand:QIPSI 1 "register_operand" "r"))
@@ -2198,135 +2291,146 @@
 
 
 ;; Used when expanding div or mod inline for some special values
-(define_insn_and_split "*subqi3.ashiftrt7_split"
-  [(set (match_operand:QI 0 "register_operand"                       "=r")
-        (minus:QI (match_operand:QI 1 "register_operand"              "0")
-                  (ashiftrt:QI (match_operand:QI 2 "register_operand" "r")
-                               (const_int 7))))]
+;; *subqi3.msbit_split   *subhi3.msbit_split
+;; *subpsi3.msbit_split  *subsi3.msbit_split
+(define_insn_and_split "*sub<mode>3.msbit_split"
+  [(set (match_operand:QISI 0 "register_operand"                              "=r")
+        (minus:QISI (match_operand:QISI 1 "register_operand"                   "0")
+                    (any_shiftrt:QISI (match_operand:QISI 2 "register_operand" "r")
+                                      (match_operand:QI 3 "const<MSB>_operand" "<C_MSB>"))))]
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (minus:QI (match_dup 1)
-                             (ashiftrt:QI (match_dup 2)
-                                          (const_int 7))))
+  [; *sub<QISI:mode>3.lt0
+   (parallel [(set (match_dup 0)
+                   (minus:QISI (match_dup 1)
+                               (lt:QISI (match_dup 2)
+                                        (const_int 0))))
               (clobber (reg:CC REG_CC))])])
 
-(define_insn "*subqi3.ashiftrt7"
-  [(set (match_operand:QI 0 "register_operand"                       "=r")
-        (minus:QI (match_operand:QI 1 "register_operand"              "0")
-                  (ashiftrt:QI (match_operand:QI 2 "register_operand" "r")
-                               (const_int 7))))
-   (clobber (reg:CC REG_CC))]
-  "reload_completed"
-  "sbrc %2,7\;inc %0"
-  [(set_attr "length" "2")])
-
-(define_insn_and_split "*addqi3.lt0_split"
-  [(set (match_operand:QI 0 "register_operand"                 "=r")
-        (plus:QI (lt:QI (match_operand:QI 1 "register_operand"  "r")
-                        (const_int 0))
-                 (match_operand:QI 2 "register_operand"         "0")))]
+;; *addqi3.msbit_split   *addhi3.msbit_split
+;; *addpsi3.msbit_split  *addsi3.msbit_split
+(define_insn_and_split "*add<mode>3.msbit_split"
+  [(set (match_operand:QISI 0 "register_operand"                             "=r")
+        (plus:QISI (any_shiftrt:QISI (match_operand:QISI 1 "register_operand" "r")
+                                     (match_operand:QI 2 "const<MSB>_operand" "<C_MSB>"))
+                   (match_operand:QISI 3 "register_operand"                   "0")))]
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (plus:QI (lt:QI (match_dup 1)
-                                   (const_int 0))
-                            (match_dup 2)))
+  [; *add<QISI:mode>3.lt0
+   (parallel [(set (match_dup 0)
+                   (plus:QISI (lt:QISI (match_dup 1)
+                                       (const_int 0))
+                              (match_dup 3)))
+              (clobber (reg:CC REG_CC))])])
+
+;;                           *addhi3.sex.msbit_split
+;; *addpsi3.sex.msbit_split  *addsi3.sex.msbit_split
+(define_insn_and_split "*add<HISI:mode>3.sex.msbit_split"
+  [(set (match_operand:HISI 0 "register_operand"                  "=r")
+        (plus:HISI (any_shiftrt:HISI
+                    (sign_extend:HISI (match_operand:QIPSI 1 "register_operand" "r"))
+                    (match_operand:QI 2 "const<HISI:MSB>_operand" "<HISI:C_MSB>"))
+                   (match_operand:HISI 3 "register_operand"       "0")))]
+  "<HISI:SIZE> > <QIPSI:SIZE>"
+  "#"
+  "&& reload_completed"
+  [; *add<QISI:mode>3.lt0
+   (parallel [(set (match_dup 0)
+                   (plus:HISI (lt:HISI (match_dup 1)
+                                       (const_int 0))
+                              (match_dup 3)))
+              (clobber (reg:CC REG_CC))])])
+
+;;                           *subhi3.sex.msbit_split
+;; *subpsi3.sex.msbit_split  *subsi3.sex.msbit_split
+(define_insn_and_split "*sub<HISI:mode>3.sex.msbit_split"
+  [(set (match_operand:HISI 0 "register_operand"                  "=r")
+        (minus:HISI (match_operand:HISI 1 "register_operand"       "0")
+                    (any_shiftrt:HISI
+                     (sign_extend:HISI (match_operand:QIPSI 2 "register_operand" "r"))
+                     (match_operand:QI 3 "const<HISI:MSB>_operand" "<HISI:C_MSB>"))))]
+  "<HISI:SIZE> > <QIPSI:SIZE>"
+  "#"
+  "&& reload_completed"
+  [; *sub<QISI:mode>3.lt0
+   (parallel [(set (match_dup 0)
+                   (minus:HISI (match_dup 1)
+                               (lt:HISI (match_dup 2)
+                                        (const_int 0))))
               (clobber (reg:CC REG_CC))])])
 
-(define_insn "*addqi3.lt0"
-  [(set (match_operand:QI 0 "register_operand"                 "=r")
-        (plus:QI (lt:QI (match_operand:QI 1 "register_operand"  "r")
-                        (const_int 0))
-                 (match_operand:QI 2 "register_operand"         "0")))
+;; *subqi3.lt0   *subqi3.ge0
+;; *subhi3.lt0   *subhi3.ge0
+;; *subpsi3.lt0  *subpsi3.ge0
+;; *subsi3.lt0   *subsi3.ge0
+(define_insn "*sub<QISI:mode>3.<code>0"
+  [(set (match_operand:QISI 0 "register_operand"                        "=r")
+        (minus:QISI (match_operand:QISI 1 "register_operand"             "0")
+                    (gelt:QISI (match_operand:QISI2 2 "register_operand" "r")
+                               (const_int 0))))
    (clobber (reg:CC REG_CC))]
   "reload_completed"
-  "sbrc %1,7\;inc %0"
-  [(set_attr "length" "2")])
-
-(define_insn_and_split "*addhi3.lt0_split"
-  [(set (match_operand:HI 0 "register_operand"                   "=w,r")
-        (plus:HI (lt:HI (match_operand:QI 1 "register_operand"    "r,r")
-                        (const_int 0))
-                 (match_operand:HI 2 "register_operand"           "0,0")))
-   (clobber (match_scratch:QI 3                                  "=X,&1"))]
-  ""
-  "#"
-  "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (plus:HI (lt:HI (match_dup 1)
-                                   (const_int 0))
-                            (match_dup 2)))
-              (clobber (match_dup 3))
-              (clobber (reg:CC REG_CC))])]
-  ""
-  [(set_attr "isa" "adiw,*")])
+  {
+    return avr_out_add_msb (insn, operands, <CODE>, nullptr);
+  }
+  [(set_attr "adjust_len" "add_<code>0")])
 
-(define_insn "*addhi3.lt0"
-  [(set (match_operand:HI 0 "register_operand"                   "=w,r")
-        (plus:HI (lt:HI (match_operand:QI 1 "register_operand"    "r,r")
-                        (const_int 0))
-                 (match_operand:HI 2 "register_operand"           "0,0")))
-   (clobber (match_scratch:QI 3                                  "=X,&1"))
+;; *addqi3.lt0   *addqi3.ge0
+;; *addhi3.lt0   *addhi3.ge0
+;; *addpsi3.lt0  *addpsi3.ge0
+;; *addsi3.lt0   *addsi3.ge0
+(define_insn "*add<QISI:mode>3.<code>0"
+  [(set (match_operand:QISI 0 "register_operand"                       "=r")
+        (plus:QISI (gelt:QISI (match_operand:QISI2 1 "register_operand" "r")
+                              (const_int 0))
+                   (match_operand:QISI 2 "register_operand"             "0")))
    (clobber (reg:CC REG_CC))]
   "reload_completed"
-  "@
-	sbrc %1,7\;adiw %0,1
-	lsl %1\;adc %A0,__zero_reg__\;adc %B0,__zero_reg__"
-  [(set_attr "length" "2,3")
-   (set_attr "isa" "adiw,*")])
+  {
+    return avr_out_add_msb (insn, operands, <CODE>, nullptr);
+  }
+  [(set_attr "adjust_len" "add_<code>0")])
 
-(define_insn_and_split "*addpsi3.lt0_split"
-  [(set (match_operand:PSI 0 "register_operand"                         "=r")
-        (plus:PSI (lshiftrt:PSI (match_operand:PSI 1 "register_operand"  "r")
-                                (const_int 23))
-                  (match_operand:PSI 2 "register_operand"                "0")))]
+;; *addqi3.lt0_split   *addqi3.ge0_split
+;; *addhi3.lt0_split   *addhi3.ge0_split
+;; *addpsi3.lt0_split  *addpsi3.ge0_split
+;; *addsi3.lt0_split   *addsi3.ge0_split
+(define_insn_and_split "*add<QISI:mode>3.<code>0_split"
+  [(set (match_operand:QISI 0 "register_operand"                       "=r")
+        (plus:QISI (gelt:QISI (match_operand:QISI2 1 "register_operand" "r")
+                              (const_int 0))
+                   (match_operand:QISI 2 "register_operand"             "0")))]
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (plus:PSI (lshiftrt:PSI (match_dup 1)
-                                           (const_int 23))
-                             (match_dup 2)))
+  [; *add<QISI:mode>3.<code>0
+   (parallel [(set (match_dup 0)
+                   (plus:QISI (gelt:QISI (match_dup 1)
+                                         (const_int 0))
+                              (match_dup 2)))
               (clobber (reg:CC REG_CC))])])
 
-(define_insn "*addpsi3.lt0"
-  [(set (match_operand:PSI 0 "register_operand"                         "=r")
-        (plus:PSI (lshiftrt:PSI (match_operand:PSI 1 "register_operand"  "r")
-                                (const_int 23))
-                  (match_operand:PSI 2 "register_operand"                "0")))
-   (clobber (reg:CC REG_CC))]
-  "reload_completed"
-  "mov __tmp_reg__,%C1\;lsl __tmp_reg__
-	adc %A0,__zero_reg__\;adc %B0,__zero_reg__\;adc %C0,__zero_reg__"
-  [(set_attr "length" "5")])
-
-(define_insn_and_split "*addsi3.lt0_split"
-  [(set (match_operand:SI 0 "register_operand"                       "=r")
-        (plus:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "r")
-                              (const_int 31))
-                 (match_operand:SI 2 "register_operand"               "0")))]
+;; *subqi3.lt0_split   *subqi3.ge0_split
+;; *subhi3.lt0_split   *subhi3.ge0_split
+;; *subpsi3.lt0_split  *subpsi3.ge0_split
+;; *subsi3.lt0_split   *subsi3.ge0_split
+(define_insn_and_split "*sub<QISI:mode>3.<code>0_split"
+  [(set (match_operand:QISI 0 "register_operand"                        "=r")
+        (minus:QISI (match_operand:QISI 1 "register_operand"             "0")
+                    (gelt:QISI (match_operand:QISI2 2 "register_operand" "r")
+                               (const_int 0))))]
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (plus:SI (lshiftrt:SI (match_dup 1)
-                                         (const_int 31))
-                            (match_dup 2)))
+  [; *sub<QISI:mode>3.<code>0
+   (parallel [(set (match_dup 0)
+                   (minus:QISI (match_dup 1)
+                               (gelt:QISI (match_dup 2)
+                                          (const_int 0))))
               (clobber (reg:CC REG_CC))])])
 
-(define_insn "*addsi3.lt0"
-  [(set (match_operand:SI 0 "register_operand"                       "=r")
-        (plus:SI (lshiftrt:SI (match_operand:SI 1 "register_operand"  "r")
-                              (const_int 31))
-                 (match_operand:SI 2 "register_operand"               "0")))
-   (clobber (reg:CC REG_CC))]
-  "reload_completed"
-  "mov __tmp_reg__,%D1\;lsl __tmp_reg__
-	adc %A0,__zero_reg__\;adc %B0,__zero_reg__\;adc %C0,__zero_reg__\;adc %D0,__zero_reg__"
-  [(set_attr "length" "6")])
 
 (define_insn_and_split "*umulqihi3.call_split"
   [(set (reg:HI 24)
@@ -3243,8 +3347,18 @@
                             (match_operand:SI 2 "nonmemory_operand" "")))
               (clobber (reg:HI 26))
               (clobber (reg:DI 18))])]
-  "AVR_HAVE_MUL"
+  "AVR_HAVE_MUL
+   || (avropt_pr118012
+       /* AVR_TINY passes args on the stack, so we cannot work
+          around PR118012 like this. */
+       && ! AVR_TINY)"
   {
+    if (! AVR_HAVE_MUL)
+      {
+        emit (gen_gen_mulsi3_pr118012 (operands[0], operands[1], operands[2]));
+        DONE;
+      }
+
     if (u16_operand (operands[2], SImode))
       {
         operands[2] = force_reg (HImode, gen_int_mode (INTVAL (operands[2]), HImode));
@@ -3264,6 +3378,26 @@
       DONE;
   })
 
+;; With PR118012, we do __mulsi3 as a transparent call, so insn combine
+;; can transform  (mult:SI (and:SI * (const_int 1)))  into something
+;; less toxic.
+(define_expand "gen_mulsi3_pr118012"
+  [(parallel [(set (match_operand:SI 0 "register_operand")
+                   (mult:SI (match_operand:SI 1 "register_operand")
+                            (match_operand:SI 2 "nonmemory_operand")))
+              (clobber (reg:HI 26))
+              (clobber (reg:HI 30))
+              (clobber (reg:DI 18))])]
+  "avropt_pr118012
+   && ! AVR_HAVE_MUL
+   && ! AVR_TINY"
+  {
+    operands[2] = force_reg (SImode, operands[2]);
+    if (avr_emit3_fix_outputs (gen_gen_mulsi3_pr118012, operands, 1 << 0,
+                               regmask (DImode, 18) | regmask (HImode, 26) | regmask (HImode, 30)))
+      DONE;
+  })
+
 (define_insn_and_split "*mulsi3"
   [(set (match_operand:SI 0 "pseudo_register_operand"                      "=r")
         (mult:SI (match_operand:SI 1 "pseudo_register_operand"              "r")
@@ -3299,6 +3433,33 @@
       }
   })
 
+(define_insn_and_split "*mulsi3_pr118012"
+  [(set (match_operand:SI 0 "pseudo_register_operand"         "=r")
+        (mult:SI (match_operand:SI 1 "pseudo_register_operand" "r")
+                 (match_operand:SI 2 "pseudo_register_operand" "r")))
+   (clobber (reg:HI 26))
+   (clobber (reg:HI 30))
+   (clobber (reg:DI 18))]
+  "avropt_pr118012
+   && ! AVR_HAVE_MUL
+   && ! AVR_TINY
+   && ! reload_completed"
+  { gcc_unreachable(); }
+  "&& 1"
+  [(set (reg:SI 18)
+        (match_dup 1))
+   (set (reg:SI 22)
+        (match_dup 2))
+   (parallel [(set (reg:SI 22)
+                   (mult:SI (reg:SI 22)
+                            (reg:SI 18)))
+              (clobber (reg:SI 18))
+              (clobber (reg:HI 26))
+              (clobber (reg:HI 30))])
+   (set (match_dup 0)
+        (reg:SI 22))])
+
+
 ;; "muluqisi3"
 ;; "muluhisi3"
 (define_expand "mulu<mode>si3"
@@ -3564,6 +3725,26 @@
               (clobber (reg:HI 26))
               (clobber (reg:CC REG_CC))])])
 
+(define_insn_and_split "*mulsi3_call_pr118012_split"
+  [(set (reg:SI 22)
+        (mult:SI (reg:SI 22)
+                 (reg:SI 18)))
+   (clobber (reg:SI 18))
+   (clobber (reg:HI 26))
+   (clobber (reg:HI 30))]
+  "avropt_pr118012
+   && ! AVR_HAVE_MUL
+   && ! AVR_TINY"
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (reg:SI 22)
+                   (mult:SI (reg:SI 22)
+                            (reg:SI 18)))
+              (clobber (reg:SI 18))
+              (clobber (reg:HI 26))
+              (clobber (reg:HI 30))
+              (clobber (reg:CC REG_CC))])])
+
 (define_insn "*mulsi3_call"
   [(set (reg:SI 22)
         (mult:SI (reg:SI 22)
@@ -3574,6 +3755,21 @@
   "%~call __mulsi3"
   [(set_attr "type" "xcall")])
 
+(define_insn "*mulsi3_call_pr118012"
+  [(set (reg:SI 22)
+        (mult:SI (reg:SI 22)
+                 (reg:SI 18)))
+   (clobber (reg:SI 18))
+   (clobber (reg:HI 26))
+   (clobber (reg:HI 30))
+   (clobber (reg:CC REG_CC))]
+  "avropt_pr118012
+   && ! AVR_HAVE_MUL
+   && ! AVR_TINY
+   && reload_completed"
+  "%~call __mulsi3"
+  [(set_attr "type" "xcall")])
+
 ;; "*mulhisi3_call"
 ;; "*umulhisi3_call"
 (define_insn_and_split "*<extend_u>mulhisi3_call_split"
@@ -4282,9 +4478,9 @@
 ; and
 
 (define_insn_and_split "andqi3"
-  [(set (match_operand:QI 0 "register_operand"       "=??r,d,*l")
-        (and:QI (match_operand:QI 1 "register_operand" "%0,0,0")
-                (match_operand:QI 2 "nonmemory_operand" "r,i,Ca1")))]
+  [(set (match_operand:QI 0 "register_operand"       "=??r,d,*l ,r")
+        (and:QI (match_operand:QI 1 "register_operand" "%0,0,0  ,r")
+                (match_operand:QI 2 "nonmemory_operand" "r,i,Ca1,Cb1")))]
   ""
   "#"
   "&& reload_completed"
@@ -4294,16 +4490,18 @@
               (clobber (reg:CC REG_CC))])])
 
 (define_insn "*andqi3"
-  [(set (match_operand:QI 0 "register_operand"       "=??r,d,*l")
-        (and:QI (match_operand:QI 1 "register_operand" "%0,0,0")
-                (match_operand:QI 2 "nonmemory_operand" "r,i,Ca1")))
+  [(set (match_operand:QI 0 "register_operand"       "=??r,d,*l ,r")
+        (and:QI (match_operand:QI 1 "register_operand" "%0,0,0  ,r")
+                (match_operand:QI 2 "nonmemory_operand" "r,i,Ca1,Cb1")))
    (clobber (reg:CC REG_CC))]
   "reload_completed"
   "@
 	and %0,%2
 	andi %0,lo8(%2)
-	* return avr_out_bitop (insn, operands, NULL);"
-  [(set_attr "length" "1,1,2")])
+	* return avr_out_bitop (insn, operands, NULL);
+	* return avr_out_insv (insn, operands, NULL);"
+  [(set_attr "length" "1,1,2,3")
+   (set_attr "adjust_len" "*,*,out_bitop,insv")])
 
 (define_insn_and_split "andhi3"
   [(set (match_operand:HI 0 "register_operand"       "=??r,d,d,r  ,r  ,r")
@@ -4615,8 +4813,8 @@
   [(parallel [(set (match_dup 0)
                    (xor:PSI (match_dup 1)
                             (match_dup 2)))
-                   (clobber (match_dup 3))
-                   (clobber (reg:CC REG_CC))])])
+              (clobber (match_dup 3))
+              (clobber (reg:CC REG_CC))])])
 
 (define_insn "*xorpsi3"
   [(set (match_operand:PSI 0 "register_operand"        "=??r,r  ,d  ,r")
@@ -4769,6 +4967,37 @@
               (clobber (reg:CC REG_CC))])])
 
 
+;; For operations like  X o= CST, regalloc may spill l-reg X to a d-reg:
+;;    D =  X
+;;    D o= CST
+;;    X =  D
+;; where it is better to instead
+;;    D =  CST
+;;    X o= D
+(define_peephole2
+  [; Move l-reg to d-reg for the purpose of BITOP.
+   (parallel [(set (match_operand:ALL1 0 "d_register_operand")
+                   (match_operand:ALL1 1 "l_register_operand"))
+              (clobber (reg:CC REG_CC))])
+   (parallel [(set (match_dup 0)
+                   (bitop:ALL1 (match_dup 0)
+                               (match_operand:ALL1 2 "const_operand")))
+              (clobber (reg:CC REG_CC))])
+   ; Move d-reg result back to l-reg.
+   (parallel [(set (match_dup 1)
+                   (match_dup 0))
+              (clobber (reg:CC REG_CC))])]
+  "peep2_reg_dead_p (3, operands[0])"
+  [; "movqi_insn"
+   (parallel [(set (match_dup 0)
+                   (match_dup 2))
+              (clobber (reg:CC REG_CC))])
+   (parallel [(set (match_dup 1)
+                   (bitop:ALL1 (match_dup 1)
+                               (match_dup 0)))
+              (clobber (reg:CC REG_CC))])])
+
+
 ;; swap swap swap swap swap swap swap swap swap swap swap swap swap swap swap
 ;; swap
 
@@ -5044,6 +5273,41 @@
 ;;<< << << << << << << << << << << << << << << << << << << << << << << << << <<
 ;; arithmetic shift left
 
+;; Work around PR120423: Transform left shift of a paradoxical subreg
+;; into left shift of the zero-extended entity.
+(define_split ; PR120423
+  [(set (match_operand:HISI 0 "register_operand")
+        (ashift:HISI (subreg:HISI (match_operand:QIPSI 1 "nonimmediate_operand")
+                                  0)
+                     (match_operand:QI 2 "const_int_operand")))]
+  "!reload_completed
+   && !avropt_lra_p
+   && <HISI:SIZE> > <QIPSI:SIZE>"
+  [(set (match_dup 4)
+        (zero_extend:HISI (match_dup 5)))
+   (set (match_dup 0)
+        (ashift:HISI (match_dup 4)
+                     (match_dup 2)))]
+  {
+    operands[4] = gen_reg_rtx (<HISI:MODE>mode);
+    operands[5] = force_reg (<QIPSI:MODE>mode, operands[1]);
+  })
+
+;; Similar happens for PR116389.
+(define_split ; PR116389
+  [(set (match_operand:HISI 0 "register_operand")
+        (subreg:HISI (match_operand:QIPSI 1 "nonimmediate_operand")
+                     0))]
+  "!reload_completed
+   && !avropt_lra_p
+   && <HISI:SIZE> > <QIPSI:SIZE>"
+  [(set (match_dup 0)
+        (zero_extend:HISI (match_dup 2)))]
+  {
+    operands[2] = force_reg (<QIPSI:MODE>mode, operands[1]);
+  })
+
+
 ;; "ashlqi3"
 ;; "ashlqq3"  "ashluqq3"
 (define_expand "ashl<mode>3"
@@ -5093,9 +5357,9 @@
 ;; "*ashlqi3"
 ;; "*ashlqq3"  "*ashluqq3"
 (define_insn_and_split "*ashl<mode>3_split"
-  [(set (match_operand:ALL1 0 "register_operand"              "=r,r,r,r,!d,r,r")
-        (ashift:ALL1 (match_operand:ALL1 1 "register_operand"  "0,0,0,0,0 ,0,0")
-                     (match_operand:QI 2 "nop_general_operand" "r,L,P,K,n ,n,Qm")))]
+  [(set (match_operand:ALL1 0 "register_operand"              "=r,r  ,r      ,r,r")
+        (ashift:ALL1 (match_operand:ALL1 1 "register_operand"  "0,0  ,r      ,0,0")
+                     (match_operand:QI 2 "nop_general_operand" "r,LPK,C07 C06,n,Qm")))]
   ""
   "#"
   "&& reload_completed"
@@ -5105,39 +5369,48 @@
               (clobber (reg:CC REG_CC))])])
 
 (define_insn "*ashl<mode>3"
-  [(set (match_operand:ALL1 0 "register_operand"              "=r,r,r,r,!d,r,r")
-        (ashift:ALL1 (match_operand:ALL1 1 "register_operand"  "0,0,0,0,0 ,0,0")
-                     (match_operand:QI 2 "nop_general_operand" "r,L,P,K,n ,n,Qm")))
+  [(set (match_operand:ALL1 0 "register_operand"              "=r,r  ,r      ,r,r")
+        (ashift:ALL1 (match_operand:ALL1 1 "register_operand"  "0,0  ,r      ,0,0")
+                     (match_operand:QI 2 "nop_general_operand" "r,LPK,C07 C06,n,Qm")))
    (clobber (reg:CC REG_CC))]
   "reload_completed"
   {
     return ashlqi3_out (insn, operands, NULL);
   }
-  [(set_attr "length" "5,0,1,2,4,6,9")
+  [(set_attr "length" "9")
    (set_attr "adjust_len" "ashlqi")])
 
+;; "ashlhi3"
+;; "ashlhq3"  "ashluhq3"
+;; "ashlha3"  "ashluha3"
 (define_insn_and_split "ashl<mode>3"
-  [(set (match_operand:ALL2 0 "register_operand"              "=r,r,r,r,r,r,r")
-        (ashift:ALL2 (match_operand:ALL2 1 "register_operand"  "0,0,0,r,0,0,0")
-                     (match_operand:QI 2 "nop_general_operand" "r,L,P,O,K,n,Qm")))]
+  [(set (match_operand:ALL2 0 "register_operand"              "=r,r  ,r        ,r  ,r,r")
+        (ashift:ALL2 (match_operand:ALL2 1 "register_operand"  "0,0  ,r        ,r  ,0,0")
+                     (match_operand:QI 2 "nop_general_operand" "r,LPK,O C7c C15,C2l,n,Qm")))]
   ""
   "#"
   "&& reload_completed"
   [(parallel [(set (match_dup 0)
                    (ashift:ALL2 (match_dup 1)
                                 (match_dup 2)))
-              (clobber (reg:CC REG_CC))])])
+              (clobber (reg:CC REG_CC))])]
+  ""
+  [(set_attr "isa" "*,*,*,3op,*,*")])
 
+;; "*ashlhi3"
+;; "*ashlhq3"  "*ashluhq3"
+;; "*ashlha3"  "*ashluha3"
 (define_insn "*ashl<mode>3"
-  [(set (match_operand:ALL2 0 "register_operand"              "=r,r,r,r,r,r,r")
-        (ashift:ALL2 (match_operand:ALL2 1 "register_operand"  "0,0,0,r,0,0,0")
-                     (match_operand:QI 2 "nop_general_operand" "r,L,P,O,K,n,Qm")))
+  [(set (match_operand:ALL2 0 "register_operand"              "=r,r  ,r        ,r  ,r,r")
+        (ashift:ALL2 (match_operand:ALL2 1 "register_operand"  "0,0  ,r        ,r  ,0,0")
+                     (match_operand:QI 2 "nop_general_operand" "r,LPK,O C7c C15,C2l,n,Qm")))
    (clobber (reg:CC REG_CC))]
   "reload_completed"
   {
     return ashlhi3_out (insn, operands, NULL);
   }
-  [(set_attr "length" "6,0,2,2,4,10,10")
+  [(set_attr "isa" "*,*,*,3op,*,*")
+   (set_attr "length" "10")
    (set_attr "adjust_len" "ashlhi")])
 
 
@@ -5227,27 +5500,30 @@
 ;; "ashlsq3"  "ashlusq3"
 ;; "ashlsa3"  "ashlusa3"
 (define_insn_and_split "ashl<mode>3"
-  [(set (match_operand:ALL4 0 "register_operand"                "=r,r,r,r,r,r,r")
-        (ashift:ALL4 (match_operand:ALL4 1 "register_operand"    "0,0,0,r,0,0,0")
-                     (match_operand:QI 2 "nop_general_operand"   "r,L,P,O,K,n,Qm")))]
+  [(set (match_operand:ALL4 0 "register_operand"                "=r,r  ,r    ,r  ,r,r")
+        (ashift:ALL4 (match_operand:ALL4 1 "register_operand"    "0,0  ,r    ,r  ,0,0")
+                     (match_operand:QI 2 "nop_general_operand"   "r,LPK,O C4L,C4l,n,Qm")))]
   ""
   "#"
   "&& reload_completed"
   [(parallel [(set (match_dup 0)
                    (ashift:ALL4 (match_dup 1)
                                 (match_dup 2)))
-              (clobber (reg:CC REG_CC))])])
+              (clobber (reg:CC REG_CC))])]
+  ""
+  [(set_attr "isa" "*,*,*,3op,*,*")])
 
 (define_insn "*ashl<mode>3"
-  [(set (match_operand:ALL4 0 "register_operand"                "=r,r,r,r,r,r,r")
-        (ashift:ALL4 (match_operand:ALL4 1 "register_operand"    "0,0,0,r,0,0,0")
-                     (match_operand:QI 2 "nop_general_operand"   "r,L,P,O,K,n,Qm")))
+  [(set (match_operand:ALL4 0 "register_operand"                "=r,r  ,r    ,r  ,r,r")
+        (ashift:ALL4 (match_operand:ALL4 1 "register_operand"    "0,0  ,r    ,r  ,0,0")
+                     (match_operand:QI 2 "nop_general_operand"   "r,LPK,O C4L,C4l,n,Qm")))
    (clobber (reg:CC REG_CC))]
   "reload_completed"
   {
     return ashlsi3_out (insn, operands, NULL);
   }
-  [(set_attr "length" "8,0,4,4,8,10,12")
+  [(set_attr "isa" "*,*,*,3op,*,*")
+   (set_attr "length" "12")
    (set_attr "adjust_len" "ashlsi")])
 
 ;; Optimize if a scratch register from LD_REGS happens to be available.
@@ -5307,90 +5583,135 @@
     operands[2] = avr_to_int_mode (operands[0]);
   })
 
-(define_peephole2
+;; Endow 2-byte shift with a scratch if available.
+(define_peephole2 ; *ashlhi3_const  *ashrhi3_const  *lshrhi3_const
   [(match_scratch:QI 3 "d")
-   (parallel [(set (match_operand:ALL2 0 "register_operand" "")
-                   (ashift:ALL2 (match_operand:ALL2 1 "register_operand" "")
-                                (match_operand:QI 2 "const_int_operand" "")))
-              (clobber (reg:CC REG_CC))])]
+   (parallel [(set (match_operand:ALL2 0 "register_operand")
+                   (any_shift:ALL2 (match_operand:ALL2 1 "register_operand")
+                                   (match_operand:QI 2 "const_int_operand")))
+              (clobber (reg:CC REG_CC))])
+   ;; Don't allow $3 to overlap with $0.
+   (match_dup 3)]
   ""
   [(parallel [(set (match_dup 0)
-                   (ashift:ALL2 (match_dup 1)
-                                (match_dup 2)))
+                   (any_shift:ALL2 (match_dup 1)
+                                   (match_dup 2)))
               (clobber (match_dup 3))
               (clobber (reg:CC REG_CC))])])
 
 ;; "*ashlhi3_const"
 ;; "*ashlhq3_const"  "*ashluhq3_const"
 ;; "*ashlha3_const"  "*ashluha3_const"
-(define_insn_and_split "*ashl<mode>3_const_split"
-  [(set (match_operand:ALL2 0 "register_operand"              "=r,r,r,r,r")
-        (ashift:ALL2 (match_operand:ALL2 1 "register_operand"  "0,0,r,0,0")
-                     (match_operand:QI 2 "const_int_operand"   "L,P,O,K,n")))
-   (clobber (match_scratch:QI 3                               "=X,X,X,X,&d"))]
-  "reload_completed"
-  "#"
-  "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (ashift:ALL2 (match_dup 1)
-                                (match_dup 2)))
-              (clobber (match_dup 3))
-              (clobber (reg:CC REG_CC))])])
-
 (define_insn "*ashl<mode>3_const"
-  [(set (match_operand:ALL2 0 "register_operand"              "=r,r,r,r,r")
-        (ashift:ALL2 (match_operand:ALL2 1 "register_operand"  "0,0,r,0,0")
-                     (match_operand:QI 2 "const_int_operand"   "L,P,O,K,n")))
-   (clobber (match_scratch:QI 3                               "=X,X,X,X,&d"))
+  [(set (match_operand:ALL2 0 "register_operand"              "=r  ,r        ,r      ,r  ,r")
+        (ashift:ALL2 (match_operand:ALL2 1 "register_operand"  "0  ,r        ,r      ,r  ,0")
+                     (match_operand:QI 2 "const_int_operand"   "LPK,O C7c C15,C05 C06,C2l,n")))
+   (clobber (match_scratch:QI 3                               "=X  ,X        ,&d     ,&d ,&d"))
    (clobber (reg:CC REG_CC))]
   "reload_completed"
   {
     return ashlhi3_out (insn, operands, NULL);
   }
-  [(set_attr "length" "0,2,2,4,10")
+  [(set_attr "isa" "*,*,mul,3op,*")
+   (set_attr "length" "10")
    (set_attr "adjust_len" "ashlhi")])
 
-(define_peephole2
-  [(match_scratch:QI 3 "d")
-   (parallel [(set (match_operand:ALL4 0 "register_operand" "")
-                   (ashift:ALL4 (match_operand:ALL4 1 "register_operand" "")
-                                (match_operand:QI 2 "const_int_operand" "")))
+(define_code_attr constr_split_suffix
+  [(ashift "l")
+   (ashiftrt "a")
+   (lshiftrt "r")])
+
+;; Split shift into a byte shift and a residual bit shift (without scratch)
+(define_split
+  [(parallel [(set (match_operand:ALL234 0 "register_operand")
+                   (any_shift:ALL234 (match_operand:ALL234 1 "register_operand")
+                                     (match_operand:QI 2 "const_int_operand")))
+              (clobber (reg:CC REG_CC))])]
+  "avropt_split_bit_shift
+   && n_avr_fuse_add_executed >= 1
+   && satisfies_constraint_C<SIZE><constr_split_suffix> (operands[2])"
+  [(parallel [(set (match_dup 0)
+                   (any_shift:ALL234 (match_dup 1)
+                                     (match_dup 3)))
+              (clobber (scratch:QI))
+              (clobber (reg:CC REG_CC))])
+   (parallel [(set (match_dup 0)
+                   (any_shift:ALL234 (match_dup 0)
+                                     (match_dup 4)))
+              (clobber (reg:CC REG_CC))])]
+  {
+    int offset = INTVAL (operands[2]);
+    if (avr_split_shift (operands, NULL_RTX, <CODE>))
+      DONE;
+    else if (offset <= 8)
+      FAIL;
+    operands[3] = GEN_INT (offset & ~7);
+    operands[4] = GEN_INT (offset & 7);
+  })
+
+;; Split shift into a byte shift and a residual bit shift (with scratch)
+(define_split
+  [(parallel [(set (match_operand:ALL234 0 "register_operand")
+                   (any_shift:ALL234 (match_operand:ALL234 1 "register_operand")
+                                     (match_operand:QI 2 "const_int_operand")))
+              (clobber (match_operand:QI 3 "scratch_or_dreg_operand"))
+              (clobber (reg:CC REG_CC))])]
+  "avropt_split_bit_shift
+   && n_avr_fuse_add_executed >= 1
+   && satisfies_constraint_C<SIZE><constr_split_suffix> (operands[2])"
+  [(parallel [(set (match_dup 0)
+                   (any_shift:ALL234 (match_dup 1)
+                                     (match_dup 4)))
+              (clobber (scratch:QI))
+              (clobber (reg:CC REG_CC))])
+   (parallel [(set (match_dup 0)
+                   (any_shift:ALL234 (match_dup 0)
+                                     (match_dup 5)))
+              (clobber (match_dup 3))
               (clobber (reg:CC REG_CC))])]
+  {
+    int offset = INTVAL (operands[2]);
+    if (avr_split_shift (operands, operands[3], <CODE>))
+      DONE;
+    else if (offset <= 8)
+      FAIL;
+    operands[4] = GEN_INT (offset & ~7);
+    operands[5] = GEN_INT (offset & 7);
+  })
+
+
+;; Endow 4-byte shift with a scratch if available.
+(define_peephole2 ; *ashrsi3_const  *lshrsi3_const  *ashlsi3_const
+  [(match_scratch:QI 3 "d")
+   (parallel [(set (match_operand:ALL4 0 "register_operand")
+                   (any_shift:ALL4 (match_operand:ALL4 1 "register_operand")
+                                   (match_operand:QI 2 "const_int_operand")))
+              (clobber (reg:CC REG_CC))])
+   ;; $3 must not overlap with the output of the insn above.
+   (match_dup 3)]
   ""
   [(parallel [(set (match_dup 0)
-                   (ashift:ALL4 (match_dup 1)
-                                (match_dup 2)))
+                   (any_shift:ALL4 (match_dup 1)
+                                   (match_dup 2)))
               (clobber (match_dup 3))
               (clobber (reg:CC REG_CC))])])
 
+
 ;; "*ashlsi3_const"
 ;; "*ashlsq3_const"  "*ashlusq3_const"
 ;; "*ashlsa3_const"  "*ashlusa3_const"
-(define_insn_and_split "*ashl<mode>3_const_split"
-  [(set (match_operand:ALL4 0 "register_operand"              "=r,r,r,r")
-        (ashift:ALL4 (match_operand:ALL4 1 "register_operand"  "0,0,r,0")
-                     (match_operand:QI 2 "const_int_operand"   "L,P,O,n")))
-   (clobber (match_scratch:QI 3                               "=X,X,X,&d"))]
-  "reload_completed"
-  "#"
-  "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (ashift:ALL4 (match_dup 1)
-                                (match_dup 2)))
-              (clobber (match_dup 3))
-              (clobber (reg:CC REG_CC))])])
-
 (define_insn "*ashl<mode>3_const"
-  [(set (match_operand:ALL4 0 "register_operand"              "=r,r,r,r")
-        (ashift:ALL4 (match_operand:ALL4 1 "register_operand"  "0,0,r,0")
-                     (match_operand:QI 2 "const_int_operand"   "L,P,O,n")))
-   (clobber (match_scratch:QI 3                               "=X,X,X,&d"))
+  [(set (match_operand:ALL4 0 "register_operand"             "=r ,r    ,r  ,r")
+        (ashift:ALL4 (match_operand:ALL4 1 "register_operand" "0 ,r    ,r  ,0")
+                     (match_operand:QI 2 "const_int_operand"  "LP,O C4L,C4l,n")))
+   (clobber (match_operand:QI 3 "scratch_or_dreg_operand"    "=X ,X    ,&d ,&d"))
    (clobber (reg:CC REG_CC))]
   "reload_completed"
   {
     return ashlsi3_out (insn, operands, NULL);
   }
-  [(set_attr "length" "0,4,4,10")
+  [(set_attr "isa" "*,*,3op,*")
+   (set_attr "length" "10")
    (set_attr "adjust_len" "ashlsi")])
 
 (define_expand "ashlpsi3"
@@ -5421,10 +5742,10 @@
   })
 
 (define_insn_and_split "*ashlpsi3_split"
-  [(set (match_operand:PSI 0 "register_operand"             "=r,r,r,r")
-        (ashift:PSI (match_operand:PSI 1 "register_operand"  "0,0,r,0")
-                    (match_operand:QI 2 "nonmemory_operand"  "r,P,O,n")))
-   (clobber (match_scratch:QI 3                             "=X,X,X,&d"))]
+  [(set (match_operand:PSI 0 "register_operand"             "=r,r,r        ,r  ,r")
+        (ashift:PSI (match_operand:PSI 1 "register_operand"  "0,0,r        ,r  ,0")
+                    (match_operand:QI 2 "nonmemory_operand"  "r,P,O C15 C23,C3l,n")))
+   (clobber (match_scratch:QI 3                             "=X,X,X        ,&d ,&d"))]
   ""
   "#"
   "&& reload_completed"
@@ -5432,19 +5753,48 @@
                    (ashift:PSI (match_dup 1)
                                (match_dup 2)))
               (clobber (match_dup 3))
-              (clobber (reg:CC REG_CC))])])
+              (clobber (reg:CC REG_CC))])]
+  ""
+  [(set_attr "isa" "*,*,*,3op,*")])
 
 (define_insn "*ashlpsi3"
-  [(set (match_operand:PSI 0 "register_operand"             "=r,r,r,r")
-        (ashift:PSI (match_operand:PSI 1 "register_operand"  "0,0,r,0")
-                    (match_operand:QI 2 "nonmemory_operand"  "r,P,O,n")))
-   (clobber (match_scratch:QI 3                             "=X,X,X,&d"))
+  [(set (match_operand:PSI 0 "register_operand"             "=r,r,r        ,r  ,r")
+        (ashift:PSI (match_operand:PSI 1 "register_operand"  "0,0,r        ,r  ,0")
+                    (match_operand:QI 2 "nonmemory_operand"  "r,P,O C15 C23,C3l,n")))
+   ; "X&d" since the insn may be a split of a 4-byte shift without scratch.
+   (clobber (match_scratch:QI 3                             "=X,X,X        ,X&d,X&d"))
    (clobber (reg:CC REG_CC))]
   "reload_completed"
   {
     return avr_out_ashlpsi3 (insn, operands, NULL);
   }
-  [(set_attr "adjust_len" "ashlpsi")])
+  [(set_attr "isa" "*,*,*,3op,*")
+   (set_attr "adjust_len" "ashlpsi")])
+
+;; Seen in PSI loads from __flashx tables.
+(define_insn_and_split "*ashlqi.1.zextpsi_split"
+  [(set (match_operand:PSI 0 "register_operand"  "=r")
+        (zero_extend:PSI
+         (ashift:HI (zero_extend:HI (match_operand:QI 1 "register_operand" "0"))
+                    (const_int 1))))]
+  ""
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (match_dup 2)
+                   (const_int 0))
+              (clobber (reg:CC REG_CC))])
+   (parallel [(set (match_dup 3)
+                   (const_int 0))
+              (clobber (reg:CC REG_CC))])
+   (parallel [(set (match_dup 4)
+                   (ashift:HI (match_dup 4)
+                              (const_int 1)))
+              (clobber (reg:CC REG_CC))])]
+  {
+    operands[2] = avr_byte (operands[0], 2);
+    operands[3] = avr_byte (operands[0], 1);
+    operands[4] = avr_word (operands[0], 0);
+  })
 
 ;; >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >>
 ;; arithmetic shift right
@@ -5452,9 +5802,9 @@
 ;; "ashrqi3"
 ;; "ashrqq3"  "ashruqq3"
 (define_insn_and_split "ashr<mode>3"
-  [(set (match_operand:ALL1 0 "register_operand"                  "=r,r,r,r,r          ,r      ,r")
-        (ashiftrt:ALL1 (match_operand:ALL1 1 "register_operand"    "0,0,0,0,0          ,0      ,0")
-                       (match_operand:QI 2 "nop_general_operand"   "r,L,P,K,C03 C04 C05,C06 C07,Qm")))]
+  [(set (match_operand:ALL1 0 "register_operand"                  "=r,r              ,r      ,r")
+        (ashiftrt:ALL1 (match_operand:ALL1 1 "register_operand"    "0,0              ,r      ,0")
+                       (match_operand:QI 2 "nop_general_operand"   "r,LPK C03 C04 C05,C06 C07,Qm")))]
   ""
   "#"
   "&& reload_completed"
@@ -5464,49 +5814,55 @@
               (clobber (reg:CC REG_CC))])])
 
 (define_insn "*ashr<mode>3"
-  [(set (match_operand:ALL1 0 "register_operand"                  "=r,r,r,r,r          ,r      ,r")
-        (ashiftrt:ALL1 (match_operand:ALL1 1 "register_operand"    "0,0,0,0,0          ,0      ,0")
-                       (match_operand:QI 2 "nop_general_operand"   "r,L,P,K,C03 C04 C05,C06 C07,Qm")))
+  [(set (match_operand:ALL1 0 "register_operand"                  "=r,r              ,r      ,r")
+        (ashiftrt:ALL1 (match_operand:ALL1 1 "register_operand"    "0,0              ,r      ,0")
+                       (match_operand:QI 2 "nop_general_operand"   "r,LPK C03 C04 C05,C06 C07,Qm")))
    (clobber (reg:CC REG_CC))]
   "reload_completed"
   {
     return ashrqi3_out (insn, operands, NULL);
   }
-  [(set_attr "length" "5,0,1,2,5,4,9")
+  [(set_attr "length" "9")
    (set_attr "adjust_len" "ashrqi")])
 
 ;; "ashrhi3"
 ;; "ashrhq3"  "ashruhq3"
 ;; "ashrha3"  "ashruha3"
 (define_insn_and_split "ashr<mode>3"
-  [(set (match_operand:ALL2 0 "register_operand"                "=r,r,r,r,r,r,r")
-        (ashiftrt:ALL2 (match_operand:ALL2 1 "register_operand"  "0,0,0,r,0,0,0")
-                       (match_operand:QI 2 "nop_general_operand" "r,L,P,O,K,n,Qm")))]
+  [(set (match_operand:ALL2 0 "register_operand"                "=r,r  ,r            ,r  ,r,r")
+        (ashiftrt:ALL2 (match_operand:ALL2 1 "register_operand"  "0,0  ,r            ,r  ,0,0")
+                       (match_operand:QI 2 "nop_general_operand" "r,LPK,O C07 C14 C15,C2a,n,Qm")))]
   ""
   "#"
   "&& reload_completed"
-  [(parallel [(set (match_operand:ALL2 0 "register_operand"                "=r,r,r,r,r,r,r")
-                   (ashiftrt:ALL2 (match_operand:ALL2 1 "register_operand"  "0,0,0,r,0,0,0")
-                                  (match_operand:QI 2 "nop_general_operand" "r,L,P,O,K,n,Qm")))
-              (clobber (reg:CC REG_CC))])])
+  [(parallel [(set (match_dup 0)
+                   (ashiftrt:ALL2 (match_dup 1)
+                                  (match_dup 2)))
+              (clobber (reg:CC REG_CC))])]
+   ""
+   [(set_attr "isa" "*,*,*,3op,*,*")])
 
+;; "*ashrhi3"
+;; "*ashrhq3"  "*ashruhq3"
+;; "*ashrha3"  "*ashruha3"
 (define_insn "*ashr<mode>3"
-  [(set (match_operand:ALL2 0 "register_operand"                "=r,r,r,r,r,r,r")
-        (ashiftrt:ALL2 (match_operand:ALL2 1 "register_operand"  "0,0,0,r,0,0,0")
-                       (match_operand:QI 2 "nop_general_operand" "r,L,P,O,K,n,Qm")))
+  [(set (match_operand:ALL2 0 "register_operand"                "=r,r  ,r            ,r  ,r,r")
+        (ashiftrt:ALL2 (match_operand:ALL2 1 "register_operand"  "0,0  ,r            ,r  ,0,0")
+                       (match_operand:QI 2 "nop_general_operand" "r,LPK,O C07 C14 C15,C2a,n,Qm")))
    (clobber (reg:CC REG_CC))]
   "reload_completed"
   {
     return ashrhi3_out (insn, operands, NULL);
   }
-  [(set_attr "length" "6,0,2,4,4,10,10")
+  [(set_attr "isa" "*,*,*,3op,*,*")
+   (set_attr "length" "10")
    (set_attr "adjust_len" "ashrhi")])
 
 (define_insn_and_split "ashrpsi3"
-  [(set (match_operand:PSI 0 "register_operand"                 "=r,r,r,r,r")
-        (ashiftrt:PSI (match_operand:PSI 1 "register_operand"    "0,0,0,r,0")
-                      (match_operand:QI 2 "nonmemory_operand"    "r,P,K,O,n")))
-   (clobber (match_scratch:QI 3                                 "=X,X,X,X,&d"))]
+  [(set (match_operand:PSI 0 "register_operand"               "=r,r ,r            ,r  ,r")
+        (ashiftrt:PSI (match_operand:PSI 1 "register_operand"  "0,0 ,r            ,r  ,0")
+                      (match_operand:QI 2 "nonmemory_operand"  "r,PK,O C15 C22 C23,C3a,n")))
+   (clobber (match_scratch:QI 3                               "=X,X ,X            ,&d ,&d"))]
   ""
   "#"
   "&& reload_completed"
@@ -5514,133 +5870,88 @@
                    (ashiftrt:PSI (match_dup 1)
                                  (match_dup 2)))
               (clobber (match_dup 3))
-              (clobber (reg:CC REG_CC))])])
+              (clobber (reg:CC REG_CC))])]
+  ""
+  [(set_attr "isa" "*,*,*,3op,*")])
 
 (define_insn "*ashrpsi3"
-  [(set (match_operand:PSI 0 "register_operand"                 "=r,r,r,r,r")
-        (ashiftrt:PSI (match_operand:PSI 1 "register_operand"    "0,0,0,r,0")
-                      (match_operand:QI 2 "nonmemory_operand"    "r,P,K,O,n")))
-   (clobber (match_scratch:QI 3                                 "=X,X,X,X,&d"))
+  [(set (match_operand:PSI 0 "register_operand"                 "=r,r ,r            ,r  ,r")
+        (ashiftrt:PSI (match_operand:PSI 1 "register_operand"    "0,0 ,r            ,r  ,0")
+                      (match_operand:QI 2 "nonmemory_operand"    "r,PK,O C15 C22 C23,C3a,n")))
+   ; "X&d" since the insn may be a split of a 4-byte shift without scratch.
+   (clobber (match_scratch:QI 3                                 "=X,X ,X            ,X&d,X&d"))
    (clobber (reg:CC REG_CC))]
   "reload_completed"
   {
     return avr_out_ashrpsi3 (insn, operands, NULL);
   }
-  [(set_attr "adjust_len" "ashrpsi")])
+  [(set_attr "isa" "*,*,*,3op,*")
+   (set_attr "adjust_len" "ashrpsi")])
 
 ;; "ashrsi3"
 ;; "ashrsq3"  "ashrusq3"
 ;; "ashrsa3"  "ashrusa3"
 (define_insn_and_split "ashr<mode>3"
-  [(set (match_operand:ALL4 0 "register_operand"                  "=r,r,r,r,r,r,r")
-        (ashiftrt:ALL4 (match_operand:ALL4 1 "register_operand"    "0,0,0,r,0,0,0")
-                       (match_operand:QI 2 "nop_general_operand"   "r,L,P,O,K,n,Qm")))]
+  [(set (match_operand:ALL4 0 "register_operand"                  "=r,r  ,r            ,r  ,r,r")
+        (ashiftrt:ALL4 (match_operand:ALL4 1 "register_operand"    "0,0  ,r            ,r  ,0,0")
+                       (match_operand:QI 2 "nop_general_operand"   "r,LPK,O C15 C30 C31,C4a,n,Qm")))]
   ""
   "#"
   "&& reload_completed"
   [(parallel [(set (match_dup 0)
                    (ashiftrt:ALL4 (match_dup 1)
                                   (match_dup 2)))
-              (clobber (reg:CC REG_CC))])])
+              (clobber (reg:CC REG_CC))])]
+  ""
+  [(set_attr "isa" "*,*,*,3op,*,*")])
 
 (define_insn "*ashr<mode>3"
-  [(set (match_operand:ALL4 0 "register_operand"                  "=r,r,r,r,r,r,r")
-        (ashiftrt:ALL4 (match_operand:ALL4 1 "register_operand"    "0,0,0,r,0,0,0")
-                       (match_operand:QI 2 "nop_general_operand"   "r,L,P,O,K,n,Qm")))
+  [(set (match_operand:ALL4 0 "register_operand"                  "=r,r  ,r            ,r  ,r,r")
+        (ashiftrt:ALL4 (match_operand:ALL4 1 "register_operand"    "0,0  ,r            ,r  ,0,0")
+                       (match_operand:QI 2 "nop_general_operand"   "r,LPK,O C15 C30 C31,C4a,n,Qm")))
    (clobber (reg:CC REG_CC))]
   "reload_completed"
   {
     return ashrsi3_out (insn, operands, NULL);
   }
-  [(set_attr "length" "8,0,4,6,8,10,12")
+  [(set_attr "isa" "*,*,*,3op,*,*")
+   (set_attr "length" "12")
    (set_attr "adjust_len" "ashrsi")])
 
-;; Optimize if a scratch register from LD_REGS happens to be available.
-
-(define_peephole2
-  [(match_scratch:QI 3 "d")
-   (parallel [(set (match_operand:ALL2 0 "register_operand" "")
-                   (ashiftrt:ALL2 (match_operand:ALL2 1 "register_operand" "")
-                                  (match_operand:QI 2 "const_int_operand" "")))
-              (clobber (reg:CC REG_CC))])]
-  ""
-  [(parallel [(set (match_dup 0)
-                   (ashiftrt:ALL2 (match_dup 1)
-                                  (match_dup 2)))
-              (clobber (match_dup 3))
-              (clobber (reg:CC REG_CC))])])
 
 ;; "*ashrhi3_const"
 ;; "*ashrhq3_const"  "*ashruhq3_const"
 ;; "*ashrha3_const"  "*ashruha3_const"
-(define_insn_and_split "*ashr<mode>3_const_split"
-  [(set (match_operand:ALL2 0 "register_operand"                "=r,r,r,r,r")
-        (ashiftrt:ALL2 (match_operand:ALL2 1 "register_operand"  "0,0,r,0,0")
-                       (match_operand:QI 2 "const_int_operand"   "L,P,O,K,n")))
-   (clobber (match_scratch:QI 3                                 "=X,X,X,X,&d"))]
-  "reload_completed"
-  "#"
-  "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (ashiftrt:ALL2 (match_dup 1)
-                                  (match_dup 2)))
-              (clobber (match_dup 3))
-              (clobber (reg:CC REG_CC))])])
-
 (define_insn "*ashr<mode>3_const"
-  [(set (match_operand:ALL2 0 "register_operand"                "=r,r,r,r,r")
-        (ashiftrt:ALL2 (match_operand:ALL2 1 "register_operand"  "0,0,r,0,0")
-                       (match_operand:QI 2 "const_int_operand"   "L,P,O,K,n")))
-   (clobber (match_scratch:QI 3                                 "=X,X,X,X,&d"))
+  [(set (match_operand:ALL2 0 "register_operand"                "=r  ,r            ,r  ,r")
+        (ashiftrt:ALL2 (match_operand:ALL2 1 "register_operand"  "0  ,r            ,r  ,0")
+                       (match_operand:QI 2 "const_int_operand"   "LPK,O C07 C14 C15,C2a,n")))
+   (clobber (match_scratch:QI 3                                 "=X  ,X            ,&d ,&d"))
    (clobber (reg:CC REG_CC))]
   "reload_completed"
   {
     return ashrhi3_out (insn, operands, NULL);
   }
-  [(set_attr "length" "0,2,4,4,10")
+  [(set_attr "isa" "*,*,3op,*")
+   (set_attr "length" "10")
    (set_attr "adjust_len" "ashrhi")])
 
-(define_peephole2
-  [(match_scratch:QI 3 "d")
-   (parallel [(set (match_operand:ALL4 0 "register_operand" "")
-                   (ashiftrt:ALL4 (match_operand:ALL4 1 "register_operand" "")
-                                  (match_operand:QI 2 "const_int_operand" "")))
-              (clobber (reg:CC REG_CC))])]
-  ""
-  [(parallel [(set (match_dup 0)
-                   (ashiftrt:ALL4 (match_dup 1)
-                                  (match_dup 2)))
-              (clobber (match_dup 3))
-              (clobber (reg:CC REG_CC))])])
 
 ;; "*ashrsi3_const"
 ;; "*ashrsq3_const"  "*ashrusq3_const"
 ;; "*ashrsa3_const"  "*ashrusa3_const"
-(define_insn_and_split "*ashr<mode>3_const_split"
-  [(set (match_operand:ALL4 0 "register_operand"                "=r,r,r,r")
-        (ashiftrt:ALL4 (match_operand:ALL4 1 "register_operand"  "0,0,r,0")
-                       (match_operand:QI 2 "const_int_operand"   "L,P,O,n")))
-   (clobber (match_scratch:QI 3                                 "=X,X,X,&d"))]
-  "reload_completed"
-  "#"
-  "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (ashiftrt:ALL4 (match_dup 1)
-                                  (match_dup 2)))
-              (clobber (match_dup 3))
-              (clobber (reg:CC REG_CC))])])
-
 (define_insn "*ashr<mode>3_const"
-  [(set (match_operand:ALL4 0 "register_operand"                "=r,r,r,r")
-        (ashiftrt:ALL4 (match_operand:ALL4 1 "register_operand"  "0,0,r,0")
-                       (match_operand:QI 2 "const_int_operand"   "L,P,O,n")))
-   (clobber (match_scratch:QI 3                                 "=X,X,X,&d"))
+  [(set (match_operand:ALL4 0 "register_operand"                "=r ,r            ,r  ,r")
+        (ashiftrt:ALL4 (match_operand:ALL4 1 "register_operand"  "0 ,r            ,r  ,0")
+                       (match_operand:QI 2 "const_int_operand"   "LP,O C15 C30 C31,C4a,n")))
+   (clobber (match_operand:QI 3 "scratch_or_dreg_operand"       "=X ,X            ,&d ,&d"))
    (clobber (reg:CC REG_CC))]
   "reload_completed"
   {
     return ashrsi3_out (insn, operands, NULL);
   }
-  [(set_attr "length" "0,4,4,10")
+  [(set_attr "isa" "*,*,3op,*")
+   (set_attr "length" "10")
    (set_attr "adjust_len" "ashrsi")])
 
 ;; >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >> >>
@@ -5696,9 +6007,9 @@
 ;; "*lshrqq3"
 ;; "*lshruqq3"
 (define_insn_and_split "*lshr<mode>3_split"
-  [(set (match_operand:ALL1 0 "register_operand"                  "=r,r,r,r,r  ,!d,r,r")
-        (lshiftrt:ALL1 (match_operand:ALL1 1 "register_operand"    "0,0,0,0,r  ,0 ,0,0")
-                       (match_operand:QI 2 "nop_general_operand"   "r,L,P,K,C07,n ,n,Qm")))]
+  [(set (match_operand:ALL1 0 "register_operand"                  "=r,r  ,r      ,r,r")
+        (lshiftrt:ALL1 (match_operand:ALL1 1 "register_operand"    "0,0  ,r      ,0,0")
+                       (match_operand:QI 2 "nop_general_operand"   "r,LPK,C07 C06,n,Qm")))]
   ""
   "#"
   "&& reload_completed"
@@ -5708,9 +6019,9 @@
               (clobber (reg:CC REG_CC))])])
 
 (define_insn "*lshr<mode>3"
-  [(set (match_operand:ALL1 0 "register_operand"                  "=r,r,r,r,r  ,!d,r,r")
-        (lshiftrt:ALL1 (match_operand:ALL1 1 "register_operand"    "0,0,0,0,r  ,0 ,0,0")
-                       (match_operand:QI 2 "nop_general_operand"   "r,L,P,K,C07,n ,n,Qm")))
+  [(set (match_operand:ALL1 0 "register_operand"                  "=r,r  ,r      ,r,r")
+        (lshiftrt:ALL1 (match_operand:ALL1 1 "register_operand"    "0,0  ,r      ,0,0")
+                       (match_operand:QI 2 "nop_general_operand"   "r,LPK,C07 C06,n,Qm")))
    (clobber (reg:CC REG_CC))]
   "reload_completed"
   {
@@ -5722,33 +6033,36 @@
 ;; "lshrhq3"  "lshruhq3"
 ;; "lshrha3"  "lshruha3"
 (define_insn_and_split "lshr<mode>3"
-  [(set (match_operand:ALL2 0 "register_operand"                "=r,r,r,r,r  ,r,r,r")
-        (lshiftrt:ALL2 (match_operand:ALL2 1 "register_operand"  "0,0,0,r,r  ,0,0,0")
-                       (match_operand:QI 2 "nop_general_operand" "r,L,P,O,C15,K,n,Qm")))]
+  [(set (match_operand:ALL2 0 "register_operand"                "=r,r  ,r        ,r  ,r,r")
+        (lshiftrt:ALL2 (match_operand:ALL2 1 "register_operand"  "0,0  ,r        ,r  ,0,0")
+                       (match_operand:QI 2 "nop_general_operand" "r,LPK,O C7c C15,C2r,n,Qm")))]
   ""
   "#"
   "&& reload_completed"
   [(parallel [(set (match_dup 0)
                    (lshiftrt:ALL2 (match_dup 1)
                                   (match_dup 2)))
-              (clobber (reg:CC REG_CC))])])
+              (clobber (reg:CC REG_CC))])]
+  ""
+  [(set_attr "isa" "*,*,*,3op,*,*")])
 
 (define_insn "*lshr<mode>3"
-  [(set (match_operand:ALL2 0 "register_operand"                "=r,r,r,r,r  ,r,r,r")
-        (lshiftrt:ALL2 (match_operand:ALL2 1 "register_operand"  "0,0,0,r,r  ,0,0,0")
-                       (match_operand:QI 2 "nop_general_operand" "r,L,P,O,C15,K,n,Qm")))
+  [(set (match_operand:ALL2 0 "register_operand"                "=r,r  ,r        ,r  ,r,r")
+        (lshiftrt:ALL2 (match_operand:ALL2 1 "register_operand"  "0,0  ,r        ,r  ,0,0")
+                       (match_operand:QI 2 "nop_general_operand" "r,LPK,O C7c C15,C2r,n,Qm")))
    (clobber (reg:CC REG_CC))]
   "reload_completed"
   {
     return lshrhi3_out (insn, operands, NULL);
   }
-  [(set_attr "adjust_len" "lshrhi")])
+  [(set_attr "isa" "*,*,*,3op,*,*")
+   (set_attr "adjust_len" "lshrhi")])
 
 (define_insn_and_split "lshrpsi3"
-  [(set (match_operand:PSI 0 "register_operand"                 "=r,r,r,r  ,r,r")
-        (lshiftrt:PSI (match_operand:PSI 1 "register_operand"    "0,0,r,r  ,0,0")
-                      (match_operand:QI 2 "nonmemory_operand"    "r,P,O,C23,K,n")))
-   (clobber (match_scratch:QI 3                                 "=X,X,X,X  ,X,&d"))]
+  [(set (match_operand:PSI 0 "register_operand"                 "=r,r ,r        ,r  ,r")
+        (lshiftrt:PSI (match_operand:PSI 1 "register_operand"    "0,0 ,r        ,r  ,0")
+                      (match_operand:QI 2 "nonmemory_operand"    "r,PK,O C15 C23,C3r,n")))
+   (clobber (match_scratch:QI 3                                 "=X,X ,X        ,&d ,&d"))]
   ""
   "#"
   "&& reload_completed"
@@ -5756,45 +6070,52 @@
                    (lshiftrt:PSI (match_dup 1)
                                  (match_dup 2)))
               (clobber (match_dup 3))
-              (clobber (reg:CC REG_CC))])])
+              (clobber (reg:CC REG_CC))])]
+  ""
+  [(set_attr "isa" "*,*,*,3op,*")])
 
 (define_insn "*lshrpsi3"
-  [(set (match_operand:PSI 0 "register_operand"                 "=r,r,r,r  ,r,r")
-        (lshiftrt:PSI (match_operand:PSI 1 "register_operand"    "0,0,r,r  ,0,0")
-                      (match_operand:QI 2 "nonmemory_operand"    "r,P,O,C23,K,n")))
-   (clobber (match_scratch:QI 3                                 "=X,X,X,X  ,X,&d"))
+  [(set (match_operand:PSI 0 "register_operand"                 "=r,r ,r        ,r  ,r")
+        (lshiftrt:PSI (match_operand:PSI 1 "register_operand"    "0,0 ,r        ,r  ,0")
+                      (match_operand:QI 2 "nonmemory_operand"    "r,PK,O C15 C23,C3r,n")))
+   ; "X&d" since the insn may be a split of a 4-byte shift without scratch.
+   (clobber (match_scratch:QI 3                                 "=X,X ,X        ,X&d,X&d"))
    (clobber (reg:CC REG_CC))]
   "reload_completed"
   {
     return avr_out_lshrpsi3 (insn, operands, NULL);
   }
-  [(set_attr "adjust_len" "lshrpsi")])
+  [(set_attr "isa" "*,*,*,3op,*")
+   (set_attr "adjust_len" "lshrpsi")])
 
 ;; "lshrsi3"
 ;; "lshrsq3"  "lshrusq3"
 ;; "lshrsa3"  "lshrusa3"
 (define_insn_and_split "lshr<mode>3"
-  [(set (match_operand:ALL4 0 "register_operand"                  "=r,r,r,r,r,r  ,r,r")
-        (lshiftrt:ALL4 (match_operand:ALL4 1 "register_operand"    "0,0,0,r,0,r  ,0,0")
-                       (match_operand:QI 2 "nop_general_operand"   "r,L,P,O,K,C31,n,Qm")))]
+  [(set (match_operand:ALL4 0 "register_operand"                  "=r,r  ,r    ,r  ,r,r")
+        (lshiftrt:ALL4 (match_operand:ALL4 1 "register_operand"    "0,0  ,r    ,r  ,0,0")
+                       (match_operand:QI 2 "nop_general_operand"   "r,LPK,O C4R,C4r,n,Qm")))]
   ""
   "#"
   "&& reload_completed"
   [(parallel [(set (match_dup 0)
                    (lshiftrt:ALL4 (match_dup 1)
                                   (match_dup 2)))
-              (clobber (reg:CC REG_CC))])])
+              (clobber (reg:CC REG_CC))])]
+  ""
+  [(set_attr "isa" "*,*,*,3op,*,*")])
 
 (define_insn "*lshr<mode>3"
-  [(set (match_operand:ALL4 0 "register_operand"                  "=r,r,r,r,r,r  ,r,r")
-        (lshiftrt:ALL4 (match_operand:ALL4 1 "register_operand"    "0,0,0,r,0,r  ,0,0")
-                       (match_operand:QI 2 "nop_general_operand"   "r,L,P,O,K,C31,n,Qm")))
+  [(set (match_operand:ALL4 0 "register_operand"                  "=r,r  ,r    ,r  ,r,r")
+        (lshiftrt:ALL4 (match_operand:ALL4 1 "register_operand"    "0,0  ,r    ,r  ,0,0")
+                       (match_operand:QI 2 "nop_general_operand"   "r,LPK,O C4R,C4r,n,Qm")))
    (clobber (reg:CC REG_CC))]
   "reload_completed"
   {
     return lshrsi3_out (insn, operands, NULL);
   }
-  [(set_attr "adjust_len" "lshrsi")])
+  [(set_attr "isa" "*,*,*,3op,*,*")
+   (set_attr "adjust_len" "lshrsi")])
 
 ;; Optimize if a scratch register from LD_REGS happens to be available.
 
@@ -5853,89 +6174,39 @@
     operands[2] = avr_to_int_mode (operands[0]);
   })
 
-(define_peephole2 ; "*lshrhi3_const"
-  [(match_scratch:QI 3 "d")
-   (parallel [(set (match_operand:ALL2 0 "register_operand" "")
-                   (lshiftrt:ALL2 (match_operand:ALL2 1 "register_operand" "")
-                                  (match_operand:QI 2 "const_int_operand" "")))
-              (clobber (reg:CC REG_CC))])]
-  ""
-  [(parallel [(set (match_dup 0)
-                   (lshiftrt:ALL2 (match_dup 1)
-                                  (match_dup 2)))
-              (clobber (match_dup 3))
-              (clobber (reg:CC REG_CC))])])
 
 ;; "*lshrhi3_const"
 ;; "*lshrhq3_const"  "*lshruhq3_const"
 ;; "*lshrha3_const"  "*lshruha3_const"
-(define_insn_and_split "*lshr<mode>3_const_split"
-  [(set (match_operand:ALL2 0 "register_operand"                "=r,r,r,r  ,r,r")
-        (lshiftrt:ALL2 (match_operand:ALL2 1 "register_operand"  "0,0,r,r  ,0,0")
-                       (match_operand:QI 2 "const_int_operand"   "L,P,O,C15,K,n")))
-   (clobber (match_scratch:QI 3                                 "=X,X,X,X  ,X,&d"))]
-  "reload_completed"
-  "#"
-  "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (lshiftrt:ALL2 (match_dup 1)
-                                  (match_dup 2)))
-              (clobber (match_dup 3))
-              (clobber (reg:CC REG_CC))])])
-
 (define_insn "*lshr<mode>3_const"
-  [(set (match_operand:ALL2 0 "register_operand"                "=r,r,r,r  ,r,r")
-        (lshiftrt:ALL2 (match_operand:ALL2 1 "register_operand"  "0,0,r,r  ,0,0")
-                       (match_operand:QI 2 "const_int_operand"   "L,P,O,C15,K,n")))
-   (clobber (match_scratch:QI 3                                 "=X,X,X,X  ,X,&d"))
+  [(set (match_operand:ALL2 0 "register_operand"                "=r  ,r        ,r  ,r")
+        (lshiftrt:ALL2 (match_operand:ALL2 1 "register_operand"  "0  ,r        ,r  ,0")
+                       (match_operand:QI 2 "const_int_operand"   "LPK,O C7c C15,C2r,n")))
+   (clobber (match_scratch:QI 3                                 "=X  ,X        ,&d ,&d"))
    (clobber (reg:CC REG_CC))]
   "reload_completed"
   {
     return lshrhi3_out (insn, operands, NULL);
   }
-  [(set_attr "adjust_len" "lshrhi")])
+  [(set_attr "isa" "*,*,3op,*")
+   (set_attr "adjust_len" "lshrhi")])
 
-(define_peephole2 ; "*lshrsi3_const"
-  [(match_scratch:QI 3 "d")
-   (parallel [(set (match_operand:ALL4 0 "register_operand" "")
-                   (lshiftrt:ALL4 (match_operand:ALL4 1 "register_operand" "")
-                                  (match_operand:QI 2 "const_int_operand" "")))
-              (clobber (reg:CC REG_CC))])]
-  ""
-  [(parallel [(set (match_dup 0)
-                   (lshiftrt:ALL4 (match_dup 1)
-                                  (match_dup 2)))
-              (clobber (match_dup 3))
-              (clobber (reg:CC REG_CC))])])
 
 ;; "*lshrsi3_const"
 ;; "*lshrsq3_const"  "*lshrusq3_const"
 ;; "*lshrsa3_const"  "*lshrusa3_const"
-(define_insn_and_split "*lshr<mode>3_const_split"
-  [(set (match_operand:ALL4 0 "register_operand"               "=r,r,r,r  ,r")
-        (lshiftrt:ALL4 (match_operand:ALL4 1 "register_operand" "0,0,r,r  ,0")
-                       (match_operand:QI 2 "const_int_operand"  "L,P,O,C31,n")))
-   (clobber (match_scratch:QI 3                                "=X,X,X,X  ,&d"))]
-  "reload_completed"
-  "#"
-  "&& reload_completed"
-  [(parallel [(set (match_dup 0)
-                   (lshiftrt:ALL4 (match_dup 1)
-                                  (match_dup 2)))
-              (clobber (match_dup 3))
-              (clobber (reg:CC REG_CC))])])
-
 (define_insn "*lshr<mode>3_const"
-  [(set (match_operand:ALL4 0 "register_operand"               "=r,r,r,r  ,r")
-        (lshiftrt:ALL4 (match_operand:ALL4 1 "register_operand" "0,0,r,r  ,0")
-                       (match_operand:QI 2 "const_int_operand"  "L,P,O,C31,n")))
-   (clobber (match_scratch:QI 3                                "=X,X,X,X  ,&d"))
+  [(set (match_operand:ALL4 0 "register_operand"               "=r ,r    ,r  ,r")
+        (lshiftrt:ALL4 (match_operand:ALL4 1 "register_operand" "0 ,r    ,r  ,0")
+                       (match_operand:QI 2 "const_int_operand"  "LP,O C4R,C4r,n")))
+   (clobber (match_operand:QI 3 "scratch_or_dreg_operand"      "=X ,X    ,&d ,&d"))
    (clobber (reg:CC REG_CC))]
   "reload_completed"
   {
     return lshrsi3_out (insn, operands, NULL);
   }
-  [(set_attr "adjust_len" "lshrsi")])
+  [(set_attr "isa" "*,*,3op,*")
+   (set_attr "adjust_len" "lshrsi")])
 
 ;; abs(x) abs(x) abs(x) abs(x) abs(x) abs(x) abs(x) abs(x) abs(x) abs(x) abs(x)
 ;; abs
@@ -6072,6 +6343,15 @@
   ""
   [(set_attr "isa" "*,*,mov,movw")])
 
+(define_insn "*negsi2.libgcc"
+  [(set (reg:SI REG_22)
+        (neg:SI (reg:SI REG_22)))
+   (clobber (reg:CC REG_CC))]
+  "reload_completed
+   && optimize_size"
+  "%~call __negsi2"
+  [(set_attr "type" "xcall")])
+
 (define_insn "*negsi2"
   [(set (match_operand:SI 0 "register_operand"       "=!d,r,&r,&r")
         (neg:SI (match_operand:SI 1 "register_operand" "0,0,r ,r")))
@@ -6655,6 +6935,34 @@
    (set_attr "adjust_len" "tstsi,*,compare,compare")])
 
 
+;; "*cmphi_lsr"
+;; "*cmpsi_lsr"
+;; "*cmppsi_lsr"
+(define_insn_and_split "*cmp<mode>_lsr"
+  [(set (reg:CC REG_CC)
+        (compare:CC (lshiftrt:HISI (match_operand:HISI 0 "register_operand"    "r")
+                                   (match_operand:QI 1 "const_8_16_24_operand" "n"))
+                    (const_int 0)))
+   (clobber (scratch:QI))]
+  "reload_completed"
+  {
+    return avr_out_cmp_lsr (insn, operands, NULL);
+  }
+  "&& 1"
+  [;; "cmpqi3"
+   (set (reg:CC REG_CC)
+        (compare:CC (match_dup 0)
+                    (const_int 0)))]
+  {
+    // When the comparison is just one byte, then cmpqi3.
+    if (INTVAL (operands[1]) / 8 == <SIZE> - 1)
+      operands[0] = simplify_gen_subreg (QImode, operands[0], <MODE>mode, <SIZE> - 1);
+    else
+      FAIL;
+  }
+  [(set_attr "adjust_len" "cmp_lsr")])
+
+
 ;; A helper for avr_pass_ifelse::avr_rest_of_handle_ifelse().
 (define_expand "gen_compare<mode>"
   [(parallel [(set (reg:CC REG_CC)
@@ -6667,6 +6975,12 @@
                    (match_operand 1))
               (clobber (reg:CC REG_CC))])])
 
+(define_expand "gen_move_clobbercc_scratch"
+  [(parallel [(set (match_operand 0)
+                   (match_operand 1))
+              (clobber (match_operand 2))
+              (clobber (reg:CC REG_CC))])])
+
 ;; ----------------------------------------------------------------------
 ;; JUMP INSTRUCTIONS
 ;; ----------------------------------------------------------------------
@@ -6684,7 +6998,7 @@
     int icode = (int) GET_CODE (operands[0]);
 
     targetm.canonicalize_comparison (&icode, &operands[1], &operands[2], false);
-    PUT_CODE (operands[0], (enum rtx_code) icode);
+    PUT_CODE (operands[0], (rtx_code) icode);
   })
 
 (define_expand "cbranch<mode>4"
@@ -6701,7 +7015,7 @@
     int icode = (int) GET_CODE (operands[0]);
 
     targetm.canonicalize_comparison (&icode, &operands[1], &operands[2], false);
-    PUT_CODE (operands[0], (enum rtx_code) icode);
+    PUT_CODE (operands[0], (rtx_code) icode);
   })
 
 
@@ -6748,20 +7062,9 @@
                        (label_ref (match_dup 3))
                        (pc)))]
    {
-     // Unsigned >= 65536 and < 65536 can be performed by testing the
-     // high word against 0.
-     if ((GET_CODE (operands[0]) == LTU
-          || GET_CODE (operands[0]) == GEU)
-         && const_operand (operands[2], <MODE>mode)
-         && INTVAL (avr_to_int_mode (operands[2])) == 65536)
-       {
-         // "cmphi3" of the high word against 0.
-         operands[0] = copy_rtx (operands[0]);
-         PUT_CODE (operands[0], GET_CODE (operands[0]) == GEU ? NE : EQ);
-         operands[1] = simplify_gen_subreg (HImode, operands[1], <MODE>mode, 2);
-         operands[2] = const0_rtx;
-         operands[4] = gen_rtx_SCRATCH (QImode);
-       }
+     // Unsigned >= 256^n and < 256^n can be performed by testing the
+     // higher bytes against 0 (*cmpsi_lsr).
+     avr_maybe_cmp_lsr (operands);
    })
 
 ;; "cbranchpsi4_insn"
@@ -6784,7 +7087,12 @@
          (if_then_else (match_op_dup 0
                          [(reg:CC REG_CC) (const_int 0)])
                        (label_ref (match_dup 3))
-                       (pc)))])
+                       (pc)))]
+   {
+     // Unsigned >= 256^n and < 256^n can be performed by testing the
+     // higher bytes against 0 (*cmppsi_lsr).
+     avr_maybe_cmp_lsr (operands);
+   })
 
 ;; "cbranchhi4_insn"
 ;; "cbranchhq4_insn"  "cbranchuhq4_insn"  "cbranchha4_insn"  "cbranchuha4_insn"
@@ -6810,21 +7118,11 @@
                        (pc)))]
    {
      // Unsigned >= 256 and < 256 can be performed by testing the
-     // high byte against 0.
-     if ((GET_CODE (operands[0]) == LTU
-          || GET_CODE (operands[0]) == GEU)
-         && const_operand (operands[2], <MODE>mode)
-         && INTVAL (avr_to_int_mode (operands[2])) == 256)
-       {
-         rtx_code code = GET_CODE (operands[0]) == GEU ? NE : EQ;
-         rtx hi8 = simplify_gen_subreg (QImode, operands[1], <MODE>mode, 1);
-         rtx cmp = gen_rtx_fmt_ee (code, VOIDmode, cc_reg_rtx, const0_rtx);
-         emit (gen_cmpqi3 (hi8, const0_rtx));
-         emit (gen_branch (operands[3], cmp));
-         DONE;
-       }
+     // high byte against 0 (*cmphi_lsr).
+     avr_maybe_cmp_lsr (operands);
    })
 
+
 ;; Combiner pattern to compare sign- or zero-extended register against
 ;; a wider register, like comparing uint8_t against uint16_t.
 (define_insn_and_split "*cbranch<HISI:mode>.<code><QIPSI:mode>.0"
@@ -6889,11 +7187,474 @@
   })
 
 
+;; Try optimize decrement-and-branch.  When we have an addition followed
+;; by a comparison of the result against zero, we can output the addition
+;; in such a way that SREG.N and SREG.Z are set according to the result.
+;; The comparisons are split2 from their cbranch insns and before
+;; peephole2 patterns like for swapped_tst and sbrx_branch have been applied.
+
+;; We do NOT use cmpelim / SELECT_CC_MODE because it has many shortcomings
+;; and is by no means equipollent to the removed cc0 framework -- at least
+;; with regard to the avr backend:  Whether or not the result of a comparison
+;; can be obtained as a byproduct of an operation might depend on the
+;; availability of a scratch register:  There are cases where we need a
+;; scratch register to optimize away a comparison, and where the operation
+;; without a comparison does not require a scratch.  With the peep2 approach
+;; below, we can get a scratch from the peep2 framework without increasing
+;; the register pressure, whereas cmpelim doesn't offer such a feature.
+;;    When no scratch is available, then we just don't perform the optimizaton,
+;; i.e. the comparison against 0 won't be optimized away, which is preferred
+;; over increasing the register pressure -- in many cases without reason --
+;; which might result in additional spills.
+;;    What we definitely do not want is to pop a scratch without need, and
+;; in some arithmetic insn we won't know whether it might also be considered
+;; for CCmode generation, at least not prior to register allocation:
+;; CCmode only comes into existence after register allocation.
+;;    cmpelim has more shortcomings, for example some comparisons may not
+;; be available, and it does not handle several of the forms supported below,
+;; just to mention two.  A solution for the former would be to return VOIDmode
+;; in SELECT_CC_MODE, but cmpelim doesn't handle that.  Anyway, it's pointless
+;; to speculate about how other shortcomings could be fixed when the scratch
+;; problem is unsoved in cmpelim.
+;;    Apart from that, compare-elim.cc lists some demands that are not
+;; compatible with this bachend.  For example, it assumes that when an insn
+;; can set the condition code, it is always of the form compare:CCM, i.e.
+;; all comparisons are supported.  This is not the case for AVR, see the
+;; peep2 conditions below.  There is no way (at least not a documented one)
+;; to express that in SELECT_CC_MODE.
+;;    Apart from that passes running before register allocation (and thus
+;; before split2) have #ifdef SELECT_CC_MODE, and nowhere there is an
+;; explanation on how to handle that.
+;;    Skipping cmpelim is accomplished by not defining TARGET_FLAGS_REGNUM.
+
+;; Note: reload1.cc::do_output_reload() does not support output reloads
+;; for JUMP_INSNs, hence letting combine doing decrement-and-branch might
+;; run into an ICE.  Doing reloads by hand is too painful, hence, stick with
+;; RTL peepholes for now.
+
+(define_expand "gen_add_for_<code>_<mode>"
+  [;; "*add.for.cczn.<mode>"
+   (parallel [(set (reg:CCZN REG_CC)
+                   (compare:CCZN (plus:HISI (match_operand:HISI 0 "register_operand")
+                                            (match_operand:HISI 1 "const_int_operand"))
+                                 (const_int 0)))
+              (set (match_dup 0)
+                   (plus:HISI (match_dup 0)
+                              (match_dup 1)))
+              (clobber (match_operand:QI 3))])
+   ;; "branch_ZN"
+   (set (pc)
+        (if_then_else (eqnegtle (reg:CCZN REG_CC)
+                                (const_int 0))
+                      (label_ref (match_dup 2))
+                      (pc)))])
+
+(define_expand "gen_add_for_<code>_<mode>"
+  [;; "*add.for.ccn.<mode>"
+   (parallel [(set (reg:CCN REG_CC)
+                   (compare:CCN (plus:HISI (match_operand:HISI 0 "register_operand")
+                                           (match_operand:HISI 1 "nonmemory_operand"))
+                                (const_int 0)))
+              (set (match_dup 0)
+                   (plus:HISI (match_dup 0)
+                              (match_dup 1)))
+              (clobber (match_operand:QI 3))])
+   ;; "branch_N"
+   (set (pc)
+        (if_then_else (gelt (reg:CCN REG_CC)
+                            (const_int 0))
+                      (label_ref (match_dup 2))
+                      (pc)))])
+
+
+;; 1/3: Additions without a scratch register.
+(define_peephole2
+  [(parallel [(set (match_operand:HISI 0 "register_operand")
+                   (plus:HISI (match_dup 0)
+                              (match_operand:HISI 1 "nonmemory_operand")))
+              (clobber (reg:CC REG_CC))])
+   (parallel [(set (reg:CC REG_CC)
+                   (compare:CC (match_dup 0)
+                               (match_operand:HISI 3 "const0_operand")))
+              (clobber (scratch:QI))])
+   (set (pc)
+        (if_then_else (cmp_signed (reg:CC REG_CC)
+                                  (const_int 0))
+                      (label_ref (match_operand 2))
+                      (pc)))]
+  "// Multi-byte reg-reg additions only set the N flag.
+   (<CODE> == GE || <CODE> == LT || ! REG_P (operands[1]))
+   // Needs a const or a d-reg.
+   && (REG_P (operands[1]) || d_register_operand (operands[0], <MODE>mode))
+   && peep2_regno_dead_p (3, REG_CC)"
+  [(scratch)]
+  {
+    emit (gen_gen_add_for_<code>_<mode> (operands[0], operands[1], operands[2],
+                                         gen_rtx_SCRATCH (QImode)));
+    DONE;
+  })
+
+;; 2/3: Additions with a scratch register from the insn.
+(define_peephole2
+  [(parallel [(set (match_operand:HISI 0 "register_operand")
+                   (plus:HISI (match_dup 0)
+                              (match_operand:HISI 1 "nonmemory_operand")))
+              (clobber (match_operand:QI 3 "scratch_or_dreg_operand"))
+              (clobber (reg:CC REG_CC))])
+   (parallel [(set (reg:CC REG_CC)
+                   (compare:CC (match_dup 0)
+                               (match_operand:HISI 4 "const0_operand")))
+              (clobber (scratch:QI))])
+   (set (pc)
+        (if_then_else (cmp_signed (reg:CC REG_CC)
+                                  (const_int 0))
+                      (label_ref (match_operand 2))
+                      (pc)))]
+  "// Multi-byte reg-reg additions only set the N flag.
+   (<CODE> == GE || <CODE> == LT || ! REG_P (operands[1]))
+   && peep2_regno_dead_p (3, REG_CC)"
+  [(scratch)]
+  {
+    rtx scratch = operands[3];
+
+    // We need either a d-register or a scratch register
+    // when $1 is not a register.
+    if (! REG_P (operands[1])
+        && ! REG_P (scratch)
+        && ! d_register_operand (operands[0], <MODE>mode))
+      FAIL;
+
+    emit (gen_gen_add_for_<code>_<mode> (operands[0], operands[1], operands[2],
+                                         scratch));
+    DONE;
+  })
+
+;; 3/3: Additions with a scratch register from peephole2.
+(define_peephole2
+  [(match_scratch:QI 3 "d")
+   (parallel [(set (match_operand:HISI 0 "register_operand")
+                   (plus:HISI (match_dup 0)
+                              (match_operand:HISI 1 "const_int_operand")))
+              (clobber (reg:CC REG_CC))])
+   (parallel [(set (reg:CC REG_CC)
+                   (compare:CC (match_dup 0)
+                               (match_operand:HISI 4 "const0_operand")))
+              (clobber (scratch:QI))])
+   (set (pc)
+        (if_then_else (cmp_signed (reg:CC REG_CC)
+                                  (const_int 0))
+                      (label_ref (match_operand 2))
+                      (pc)))]
+  "peep2_regno_dead_p (3, REG_CC)"
+  [(scratch)]
+  {
+    emit (gen_gen_add_for_<code>_<mode> (operands[0], operands[1], operands[2],
+                                         operands[3]));
+    DONE;
+  })
+
+;; Result of the above three peepholes is an addition that also
+;; performs a signed comparison (of the result) against zero.
+;; FIXME: Using (match_dup 0) instead of operands[3/4] makes rnregs
+;; barf in regrename.cc::merge_overlapping_regs().  For now, use the
+;; fix from PR50788: Constrain as "0".
+
+;; "*add.for.cczn.hi"  "*add.for.cczn.psi"  "*add.for.cczn.si"
+(define_insn "*add.for.cczn.<mode>"
+  [(set (reg:CCZN REG_CC)
+        (compare:CCZN
+         (plus:HISI (match_operand:HISI 3 "register_operand"  "0 ,0")
+                    (match_operand:HISI 1 "const_int_operand" "n ,n"))
+         (const_int 0)))
+   (set (match_operand:HISI 0 "register_operand"             "=d ,r")
+        (plus:HISI (match_operand:HISI 4 "register_operand"   "0 ,0")
+                   (match_operand:HISI 5 "const_int_operand"  "1 ,1")))
+   (clobber (match_scratch:QI 2                              "=X ,&d"))]
+  "reload_completed"
+  {
+    return avr_out_plus_set_ZN (operands, nullptr);
+  }
+  [(set (attr "length")
+        (symbol_ref "<SIZE> * (1 + REG_P (operands[2]))"))
+   (set_attr "adjust_len" "add_set_ZN")])
+
+;; "*add.for.ccn.hi"  "*add.for.ccn.psi"  "*add.for.ccn.si"
+(define_insn "*add.for.ccn.<mode>"
+  [(set (reg:CCN REG_CC)
+        (compare:CCN
+         (plus:HISI (match_operand:HISI 3 "register_operand"  "0 ,0 ,0")
+                    (match_operand:HISI 1 "nonmemory_operand" "n ,n ,r"))
+         (const_int 0)))
+   (set (match_operand:HISI 0 "register_operand"             "=d ,r ,r")
+        (plus:HISI (match_operand:HISI 4 "register_operand"   "0 ,0 ,0")
+                   (match_operand:HISI 5 "nonmemory_operand"  "1 ,1 ,1")))
+   (clobber (match_scratch:QI 2                              "=X ,&d,X"))]
+  "reload_completed"
+  {
+    return avr_out_plus_set_N (operands, nullptr);
+  }
+  [(set (attr "length")
+        (symbol_ref "<SIZE> * (1 + REG_P (operands[2]))"))
+   (set_attr "adjust_len" "add_set_N")])
+
+
+;; 1/3: Subtractions with REG subtrahend set Z and N in a meaningful way.
+;; The QI and PSI cases are handled below because they don't have a scratch:QI.
+(define_peephole2
+  [(parallel [(set (match_operand:HI_SI 0 "register_operand")
+                   (minus:HI_SI (match_dup 0)
+                                (match_operand:HI_SI 1 "register_operand")))
+              (clobber (scratch:QI))
+              (clobber (reg:CC REG_CC))])
+   (parallel [(set (reg:CC REG_CC)
+                   (compare:CC (match_dup 0)
+                               (match_operand:HI_SI 3 "const0_operand")))
+              (clobber (scratch:QI))])
+   (set (pc)
+        (if_then_else (cmp_signed (reg:CC REG_CC)
+                                  (const_int 0))
+                      (label_ref (match_operand 2))
+                      (pc)))]
+  "peep2_regno_dead_p (3, REG_CC)"
+  [;; "*sub.for.cczn.<mode>"
+   (parallel [(set (reg:CCZN REG_CC)
+                   (compare:CCZN (minus:HI_SI (match_dup 0)
+                                              (match_dup 1))
+                                 (const_int 0)))
+              (set (match_dup 0)
+                   (minus:HI_SI (match_dup 0)
+                                (match_dup 1)))])
+   ;; "branch_ZN"
+   (set (pc)
+        (if_then_else (cmp_signed (reg:CCZN REG_CC)
+                                  (const_int 0))
+                      (label_ref (match_dup 2))
+                      (pc)))])
+
+;; 2/3: Subtractions with a PSImode REG: no scratch:QI.
+(define_peephole2
+  [(parallel [(set (match_operand:PSI 0 "register_operand")
+                   (minus:PSI (match_dup 0)
+                              (match_operand:PSI 1 "register_operand")))
+              (clobber (reg:CC REG_CC))])
+   (parallel [(set (reg:CC REG_CC)
+                   (compare:CC (match_dup 0)
+                               (match_operand:PSI 3 "const0_operand")))
+              (clobber (scratch:QI))])
+   (set (pc)
+        (if_then_else (cmp_signed (reg:CC REG_CC)
+                                  (const_int 0))
+                      (label_ref (match_operand 2))
+                      (pc)))]
+  "peep2_regno_dead_p (3, REG_CC)"
+  [;; "*sub.for.cczn.psi"
+   (parallel [(set (reg:CCZN REG_CC)
+                   (compare:CCZN (minus:PSI (match_dup 0)
+                                            (match_dup 1))
+                                 (const_int 0)))
+              (set (match_dup 0)
+                   (minus:PSI (match_dup 0)
+                              (match_dup 1)))])
+   ;; "branch_ZN"
+   (set (pc)
+        (if_then_else (cmp_signed (reg:CCZN REG_CC)
+                                  (const_int 0))
+                      (label_ref (match_dup 2))
+                      (pc)))])
+
+;; 3/3: Subtractions that extend the subtrahend.
+(define_peephole2
+  [(parallel [(set (match_operand:HISI 0 "register_operand")
+                   (minus:HISI (match_dup 0)
+                               (any_extend:HISI (match_operand:QIPSI 1 "register_operand"))))
+              (clobber (reg:CC REG_CC))])
+   (parallel [(set (reg:CC REG_CC)
+                   (compare:CC (match_dup 0)
+                               (match_operand:HISI 3 "const0_operand")))
+              (clobber (scratch:QI))])
+   (set (pc)
+        (if_then_else (cmp_signed (reg:CC REG_CC)
+                                  (const_int 0))
+                      (label_ref (match_operand 2))
+                      (pc)))]
+  "<HISI:SIZE> > <QIPSI:SIZE>
+   && peep2_regno_dead_p (3, REG_CC)"
+  [;; "*sub-extend<QIPSI:mode>.for.cczn.<HISI:mode>"
+   (parallel [(set (reg:CCZN REG_CC)
+                   (compare:CCZN (minus:HISI (match_dup 0)
+                                             (any_extend:HISI (match_dup 1)))
+                                 (const_int 0)))
+              (set (match_dup 0)
+                   (minus:HISI (match_dup 0)
+                               (any_extend:HISI (match_dup 1))))])
+   ;; "branch_ZN"
+   (set (pc)
+        (if_then_else (cmp_signed (reg:CCZN REG_CC)
+                                  (const_int 0))
+                      (label_ref (match_dup 2))
+                      (pc)))])
+
+;; "*sub.for.cczn.hi"
+;; "*sub.for.cczn.psi"
+;; "*sub.for.cczn.si"
+(define_insn "*sub.for.cczn.<mode>"
+  [(set (reg:CCZN REG_CC)
+        (compare:CCZN (minus:HISI (match_operand:HISI 3 "register_operand" "1")
+                                  (match_operand:HISI 4 "register_operand" "2"))
+                      (const_int 0)))
+   (set (match_operand:HISI 0 "register_operand"             "=r")
+        (minus:HISI (match_operand:HISI 1 "register_operand"  "0")
+                    (match_operand:HISI 2 "register_operand"  "r")))]
+  "reload_completed"
+  {
+    return avr_out_plus_ext (insn, operands, nullptr);
+  }
+  [(set_attr "length" "<SIZE>")])
+
+
+(define_insn "*sub-extend<QIPSI:mode>.for.cczn.<HISI:mode>"
+  [(set (reg:CCZN REG_CC)
+        (compare:CCZN (minus:HISI (match_operand:HISI 3 "register_operand"     "0")
+                                  (any_extend:HISI
+                                   (match_operand:QIPSI 4 "register_operand"   "2")))
+                      (const_int 0)))
+   (set (match_operand:HISI 0 "register_operand"                              "=r")
+        (minus:HISI (match_operand:HISI 1 "register_operand"                   "0")
+                    (any_extend:HISI (match_operand:QIPSI 2 "register_operand" "r"))))]
+  "reload_completed
+   && <HISI:SIZE> > <QIPSI:SIZE>"
+  {
+    return avr_out_plus_ext (insn, operands, nullptr);
+  }
+  [(set (attr "length")
+        (symbol_ref "<HISI:SIZE> + 3 * (<CODE> == SIGN_EXTEND)"))])
+
+
+;; Operations other that PLUS can set the condition code in
+;; a meaningful way, too.
+
+;; 1/1 Left shift sets the N bit.
+(define_peephole2
+  [(parallel [(set (match_operand:HISI 0 "register_operand")
+                   (ashift:HISI (match_dup 0)
+                                (const_int 1)))
+              (clobber (match_operand:QI 3 "scratch_operand"))
+              (clobber (reg:CC REG_CC))])
+   (parallel [(set (reg:CC REG_CC)
+                   (compare:CC (match_dup 0)
+                               (const_int 0)))
+              (clobber (scratch:QI))])
+   (set (pc)
+        (if_then_else (gelt (reg:CC REG_CC)
+                            (const_int 0))
+                      (label_ref (match_operand 2))
+                      (pc)))]
+  "peep2_regno_dead_p (3, REG_CC)"
+  [;; "*ashift.for.ccn.<mode>"
+   (parallel [(set (reg:CCN REG_CC)
+                   (compare:CCN (ashift:HISI (match_dup 0)
+                                             (const_int 1))
+                                (const_int 0)))
+              (set (match_dup 0)
+                   (ashift:HISI (match_dup 0)
+                                (const_int 1)))])
+   ;; "branch_N"
+   (set (pc)
+        (if_then_else (gelt (reg:CCN REG_CC)
+                            (const_int 0))
+                      (label_ref (match_operand 2))
+                      (pc)))])
+
+(define_insn "*ashift.for.ccn.<mode>"
+  [(set (reg:CCN REG_CC)
+        (compare:CCN (ashift:HISI (match_operand:HISI 2 "register_operand" "0")
+                                  (const_int 1))
+                     (const_int 0)))
+   (set (match_operand:HISI 0 "register_operand"             "=r")
+        (ashift:HISI (match_operand:HISI 1 "register_operand" "0")
+                     (const_int 1)))]
+  "reload_completed"
+  {
+    output_asm_insn ("lsl %A0", operands);
+    output_asm_insn ("rol %B0", operands);
+    if (<SIZE> >= 3) output_asm_insn ("rol %C0", operands);
+    if (<SIZE> >= 4) output_asm_insn ("rol %D0", operands);
+    return "";
+  }
+  [(set_attr "length" "<SIZE>")])
+
+
+;; 1/1 QImode operations that set Z and N in a meaningful way.
+(define_peephole2
+  [(parallel [(set (match_operand:QI 0 "register_operand")
+                   (match_operator:QI 2 "op8_ZN_operator" [(match_dup 0)
+                                                           (match_operand:QI 1)]))
+              (clobber (reg:CC REG_CC))])
+   (set (reg:CC REG_CC)
+        (compare:CC (match_dup 0)
+                    (match_operand:QI 4 "const0_operand")))
+   (set (pc)
+        (if_then_else (cmp_signed (reg:CC REG_CC)
+                                  (const_int 0))
+                      (label_ref (match_operand 3))
+                      (pc)))]
+  "peep2_regno_dead_p (3, REG_CC)"
+  [;; "*op8.for.cczn.<code>"
+   (parallel [(set (reg:CCZN REG_CC)
+                   (compare:CCZN (match_op_dup 2 [(match_dup 0)
+                                                  (match_dup 1)])
+                                 (const_int 0)))
+              (set (match_dup 0)
+                   (match_op_dup 2 [(match_dup 0)
+                                    (match_dup 1)]))])
+   ;; "branch_ZN"
+   (set (pc)
+        (if_then_else (cmp_signed (reg:CCZN REG_CC)
+                                  (const_int 0))
+                      (label_ref (match_operand 3))
+                      (pc)))])
+
+;; Constraints and predicate for the insn below.  This is what op8_ZN_operator
+;; allows.  Constraints are written in such a way that all cases have two
+;; alternatives (shifts, XOR and MINUS have effectively just one alternative).
+;; Note again that due to nregs, match_dup's won't work.
+(define_code_attr c0_op8
+  [(xor "r,r") (minus "r,r") (ashift "r,r") (ashiftrt "r,r") (lshiftrt "r,r")
+   (and "d,r") (ior "d,r") (plus "d,r")])
+
+(define_code_attr c2_op8
+  [(xor "r,r") (minus "r,r") (and "n,r") (ior "n,r") (plus "n,r P N K Cm2")
+   (ashift "P K,C03") (ashiftrt "P K,C03") (lshiftrt "P K,C03")])
+
+(define_code_attr p2_op8
+  [(ashift "const_1_to_3")  (ashiftrt "const_1_to_3")  (lshiftrt "const_1_to_3")
+   (xor "register")  (minus "register")
+   (plus "nonmemory")  (and "nonmemory")  (ior "nonmemory")])
+
+;; Result of the peephole2 above:  An 8-bit operation that sets Z and N.
+;; The allowed operations are:  PLUS, MINUS, AND, IOR, XOR and SHIFTs
+;; with operands according to op8_ZN_operator.
+(define_insn "*op8.for.cczn.<code>"
+  [(set (reg:CCZN REG_CC)
+        (compare:CCZN (op8_ZN:QI (match_operand:QI 3 "register_operand" "0,0")
+                                 (match_operand:QI 4 "<p2_op8>_operand" "2,2"))
+                      (const_int 0)))
+   (set (match_operand:QI 0 "register_operand"           "=<c0_op8>")
+        (op8_ZN:QI (match_operand:QI 1 "register_operand" "0,0")
+                   (match_operand:QI 2 "<p2_op8>_operand" "<c2_op8>")))]
+  "reload_completed"
+  {
+    return avr_out_op8_set_ZN (<CODE>, operands, nullptr);
+  }
+  [(set (attr "length")
+        (symbol_ref "avr_len_op8_set_ZN (<CODE>, operands)"))])
+
+
 ;; Test a single bit in a QI/HI/SImode register.
 ;; Combine will create zero-extract patterns for single-bit tests.
 ;; Permit any mode in source pattern by using VOIDmode.
 
-(define_insn_and_split "*sbrx_branch<mode>_split"
+(define_insn_and_split "sbrx_branch<mode>_split"
   [(set (pc)
         (if_then_else
          (match_operator 0 "eqne_operator"
@@ -7050,32 +7811,25 @@
 ;;  Compare with 0 (test) jumps
 ;; ************************************************************************
 
-(define_insn "branch"
+;; "branch"
+;; "branch_N"
+;; "branch_ZN"
+(define_insn "branch<CCname>"
   [(set (pc)
-        (if_then_else (match_operator 1 "simple_comparison_operator"
-                        [(reg:CC REG_CC)
+        (if_then_else (match_operator 1 "ordered_comparison_operator"
+                        [(reg:ALLCC REG_CC)
                          (const_int 0)])
                       (label_ref (match_operand 0))
                       (pc)))]
   "reload_completed"
   {
-    return ret_cond_branch (operands[1], avr_jump_mode (operands[0], insn), 0);
+    return avr_cond_branch (insn, operands);
   }
-  [(set_attr "type" "branch")])
-
-
-(define_insn "difficult_branch"
-  [(set (pc)
-        (if_then_else (match_operator 1 "difficult_comparison_operator"
-                        [(reg:CC REG_CC)
-                         (const_int 0)])
-                      (label_ref (match_operand 0 "" ""))
-                      (pc)))]
-  "reload_completed"
-  {
-    return ret_cond_branch (operands[1], avr_jump_mode (operands[0], insn), 0);
-  }
-  [(set_attr "type" "branch1")])
+  [(set (attr "type")
+        (if_then_else
+         (match_test "simple_comparison_operator (operands[1], VOIDmode)")
+         (const_string "branch")
+         (const_string "branch1")))])
 
 
 ;; **************************************************************************
@@ -7209,7 +7963,7 @@
         (unspec:HI [(match_operand:HI 0 "register_operand" "!z,*r,z")]
                    UNSPEC_INDEX_JMP))
    (use (label_ref (match_operand 1 "" "")))
-   (clobber (match_dup 0))
+   (clobber (match_operand:HI 2 "scratch_operand" "=X,X,0"))
    (clobber (const_int 0))]
   "!AVR_HAVE_EIJMP_EICALL"
   "#"
@@ -7218,7 +7972,7 @@
                    (unspec:HI [(match_dup 0)]
                               UNSPEC_INDEX_JMP))
               (use (label_ref (match_dup 1)))
-              (clobber (match_dup 0))
+              (clobber (match_dup 2))
               (clobber (const_int 0))
               (clobber (reg:CC REG_CC))])]
   ""
@@ -7229,7 +7983,7 @@
         (unspec:HI [(match_operand:HI 0 "register_operand" "!z,*r,z")]
                    UNSPEC_INDEX_JMP))
    (use (label_ref (match_operand 1 "" "")))
-   (clobber (match_dup 0))
+   (clobber (match_operand:HI 2 "scratch_operand" "=X,X,0"))
    (clobber (const_int 0))
    (clobber (reg:CC REG_CC))]
   "!AVR_HAVE_EIJMP_EICALL && reload_completed"
@@ -7315,8 +8069,8 @@
    (parallel [(set (pc)
                    (unspec:HI [(match_dup 7)] UNSPEC_INDEX_JMP))
               (use (label_ref (match_dup 3)))
-              (clobber (match_dup 7))
-              (clobber (match_dup 8))])]
+              (clobber (match_dup 8))
+              (clobber (match_dup 9))])]
   ""
   {
     operands[1] = simplify_unary_operation (NEG, SImode, operands[1], SImode);
@@ -7326,14 +8080,24 @@
     if (AVR_HAVE_EIJMP_EICALL)
       {
         operands[7] = gen_rtx_REG (HImode, REG_Z);
-        operands[8] = all_regs_rtx[24];
+        operands[8] = gen_rtx_REG (HImode, REG_Z);
+        operands[9] = all_regs_rtx[24];
       }
     else
       {
         operands[6] = gen_rtx_PLUS (HImode, operands[6],
                                     gen_rtx_LABEL_REF (VOIDmode, operands[3]));
-        operands[7] = gen_reg_rtx (HImode);
-        operands[8] = const0_rtx;
+        if (AVR_HAVE_JMP_CALL)
+          {
+            operands[7] = gen_rtx_REG (HImode, REG_Z);
+            operands[8] = gen_rtx_REG (HImode, REG_Z);
+          }
+        else
+          {
+            operands[7] = gen_reg_rtx (HImode);
+            operands[8] = gen_rtx_SCRATCH (HImode);
+          }
+        operands[9] = const0_rtx;
       }
   })
 
@@ -7346,11 +8110,11 @@
 ;; "casesi_hi_sequence"
 (define_insn "casesi_<mode>_sequence"
   [(set (match_operand:SI 0 "register_operand")
-        (match_operator:SI 9 "extend_operator"
-                           [(match_operand:QIHI 10 "register_operand")]))
+        (match_operator:SI 10 "extend_operator"
+                           [(match_operand:QIHI 11 "register_operand")]))
 
    ;; What follows is a matcher for code from casesi.
-   ;; We keep the same operand numbering (except for $9 and $10
+   ;; We keep the same operand numbering (except for $10 and $11
    ;; which don't appear in casesi).
    (parallel [(set (match_operand:SI 5 "register_operand")
                    (plus:SI (match_dup 0)
@@ -7370,8 +8134,8 @@
    (parallel [(set (pc)
                    (unspec:HI [(match_dup 7)] UNSPEC_INDEX_JMP))
               (use (label_ref (match_operand 3)))
-              (clobber (match_dup 7))
-              (clobber (match_operand:QI 8))])]
+              (clobber (match_operand:HI 8))
+              (clobber (match_operand:QI 9))])]
   "optimize
    && avr_casei_sequence_check_operands (operands)"
   { gcc_unreachable(); }
@@ -7603,7 +8367,7 @@
    (parallel [(set (reg:CC REG_CC)
                    (compare:CC (match_dup 0)
                                (const_int -1)))
-              (clobber (match_operand:QI 1 "scratch_or_d_register_operand"))])
+              (clobber (match_operand:QI 1 "scratch_or_dreg_operand"))])
    (set (pc)
         (if_then_else (eqne (reg:CC REG_CC)
                             (const_int 0))
@@ -7683,7 +8447,7 @@
    (parallel [(set (reg:CC REG_CC)
                    (compare:CC (match_dup 0)
                                (const_int -1)))
-              (clobber (match_operand:QI 1 "scratch_or_d_register_operand"))])
+              (clobber (match_operand:QI 1 "scratch_or_dreg_operand"))])
    (set (pc)
         (if_then_else (eqne (reg:CC REG_CC)
                             (const_int 0))
@@ -7930,7 +8694,7 @@
                    (unspec_volatile:BLK [(match_dup 2)]
                                         UNSPECV_MEMORY_BARRIER))
               (clobber (reg:CC REG_CC))])]
-  "avr_gasisr_prologues"
+  "avropt_gasisr_prologues"
   {
     operands[2] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
     MEM_VOLATILE_P (operands[2]) = 1;
@@ -7945,7 +8709,7 @@
    (set (match_operand:BLK 2)
         (unspec_volatile:BLK [(match_dup 2)] UNSPECV_MEMORY_BARRIER))
    (clobber (reg:CC REG_CC))]
-  "avr_gasisr_prologues"
+  "avropt_gasisr_prologues"
   "__gcc_isr %0"
   [(set_attr "length" "6,5")])
 
@@ -8357,8 +9121,8 @@
   [(set (pc)
         (if_then_else (ge (match_operand:QI 0 "register_operand" "")
                           (const_int 0))
-         (label_ref (match_operand 1 "" ""))
-         (pc)))]
+                      (label_ref (match_operand 1 "" ""))
+                      (pc)))]
    ""
    "#"
    "reload_completed"
@@ -8839,12 +9603,18 @@
 ;; Copysign
 
 (define_insn "copysignsf3"
-  [(set (match_operand:SF 0 "register_operand"             "=r")
-        (unspec:SF [(match_operand:SF 1 "register_operand"  "0")
-                    (match_operand:SF 2 "register_operand"  "r")]
-                   UNSPEC_COPYSIGN))]
+  [(set (match_operand:SF 0 "register_operand"              "=r")
+        (copysign:SF (match_operand:SF 1 "register_operand"  "0")
+                     (match_operand:SF 2 "nonmemory_operand" "rF")))]
   ""
-  "bst %D2,7\;bld %D0,7"
+  {
+    if (const_double_operand (operands[2], SFmode))
+      {
+        rtx xmsb = simplify_gen_subreg (QImode, operands[2], SFmode, 3);
+        return INTVAL (xmsb) < 0 ? "set\;bld %D0,7" : "clt\;bld %D0,7";
+      }
+    return "bst %D2,7\;bld %D0,7";
+  }
   [(set_attr "length" "2")])
 
 ;; Swap Bytes (change byte-endianness)
@@ -9555,173 +10325,6 @@
               (clobber (reg:CC REG_CC))])])
 
 
-;; Try optimize decrement-and-branch.  When we have an addition followed
-;; by a comparison of the result against zero, we can output the addition
-;; in such a way that SREG.N and SREG.Z are set according to the result.
-
-;; { -1, +1 } for QImode, otherwise the empty set.
-(define_mode_attr p1m1 [(QI "N P")
-                        (HI "Yxx") (PSI "Yxx") (SI "Yxx")])
-
-;; FIXME: reload1.cc::do_output_reload() does not support output reloads
-;; for JUMP_INSNs, hence letting combine doing decrement-and-branch like
-;; the following might run into ICE.  Doing reloads by hand is too painful...
-;
-; (define_insn_and_split "*add.for.eqne.<mode>.cbranch"
-;   [(set (pc)
-;         (if_then_else (eqne (match_operand:QISI 1 "register_operand"  "0")
-;                             (match_operand:QISI 2 "const_int_operand" "n"))
-;                       (label_ref (match_operand 4))
-;                       (pc)))
-;    (set (match_operand:QISI 0 "register_operand" "=r")
-;         (plus:QISI (match_dup 1)
-;                    (match_operand:QISI 3 "const_int_operand" "n")))]
-;   ;; No clobber for now as combine might not have one handy.
-;   ;; We pop a scatch in split1.
-;   "!reload_completed
-;    && const0_rtx == simplify_binary_operation (PLUS, <MODE>mode,
-;                                                operands[2], operands[3])"
-;   { gcc_unreachable(); }
-;   "&& 1"
-;   [(parallel [(set (pc)
-;                    (if_then_else (eqne (match_dup 1)
-;                                        (match_dup 2))
-;                                  (label_ref (match_dup 4))
-;                                  (pc)))
-;               (set (match_dup 0)
-;                    (plus:QISI (match_dup 1)
-;                               (match_dup 3)))
-;               (clobber (scratch:QI))])])
-;
-;; ...Hence, stick with RTL peepholes for now.  Unfortunately, there is no
-;; canonical form, and if reload shuffles registers around, we might miss
-;; opportunities to match a decrement-and-branch.
-;; doloop_end doesn't reload either, so doloop_end also won't work.
-
-(define_expand "gen_add_for_<code>_<mode>"
-  ; "*add.for.eqne.<mode>"
-  [(parallel [(set (reg:CC REG_CC)
-                   (compare:CC (plus:QISI (match_operand:QISI 0 "register_operand")
-                                          (match_operand:QISI 1 "const_int_operand"))
-                               (const_int 0)))
-              (set (match_dup 0)
-                   (plus:QISI (match_dup 0)
-                              (match_dup 1)))
-              (clobber (match_operand:QI 3))])
-   ; "branch"
-   (set (pc)
-        (if_then_else (eqne (reg:CC REG_CC)
-                            (const_int 0))
-                      (label_ref (match_dup 2))
-                      (pc)))])
-
-
-;; 1/3: A version without clobber: d-reg or 8-bit adds +/-1.
-(define_peephole2
-  [(parallel [(set (match_operand:QISI 0 "register_operand")
-                   (plus:QISI (match_dup 0)
-                              (match_operand:QISI 1 "const_int_operand")))
-              (clobber (reg:CC REG_CC))])
-   (set (reg:CC REG_CC)
-        (compare:CC (match_dup 0)
-                    (const_int 0)))
-   (set (pc)
-        (if_then_else (eqne (reg:CC REG_CC)
-                            (const_int 0))
-                      (label_ref (match_operand 2))
-                      (pc)))]
-  "peep2_regno_dead_p (3, REG_CC)
-   && (d_register_operand (operands[0], <MODE>mode)
-       || (<MODE>mode == QImode
-           && (INTVAL (operands[1]) == 1
-               || INTVAL (operands[1]) == -1)))"
-  [(scratch)]
-  {
-    emit (gen_gen_add_for_<code>_<mode> (operands[0], operands[1], operands[2],
-          gen_rtx_SCRATCH (QImode)));
-    DONE;
-  })
-
-;; 2/3: A version with clobber from the insn.
-(define_peephole2
-  [(parallel [(set (match_operand:QISI 0 "register_operand")
-                   (plus:QISI (match_dup 0)
-                              (match_operand:QISI 1 "const_int_operand")))
-              (clobber (match_operand:QI 3 "scratch_or_d_register_operand"))
-              (clobber (reg:CC REG_CC))])
-   (parallel [(set (reg:CC REG_CC)
-                   (compare:CC (match_dup 0)
-                               (const_int 0)))
-              (clobber (match_operand:QI 4 "scratch_or_d_register_operand"))])
-   (set (pc)
-        (if_then_else (eqne (reg:CC REG_CC)
-                            (const_int 0))
-                      (label_ref (match_operand 2))
-                      (pc)))]
-  "peep2_regno_dead_p (3, REG_CC)"
-  [(scratch)]
-  {
-    rtx scratch = REG_P (operands[3]) ? operands[3] : operands[4];
-
-    // We need either a d-register or a scratch register to clobber.
-    if (! REG_P (scratch)
-        && ! d_register_operand (operands[0], <MODE>mode)
-        && ! (QImode == <MODE>mode
-              && (INTVAL (operands[1]) == 1
-                  || INTVAL (operands[1]) == -1)))
-      {
-        FAIL;
-      }
-    emit (gen_gen_add_for_<code>_<mode> (operands[0], operands[1], operands[2],
-          scratch));
-    DONE;
-  })
-
-;; 3/3 A version with a clobber from peephole2.
-(define_peephole2
-  [(match_scratch:QI 3 "d")
-   (parallel [(set (match_operand:QISI 0 "register_operand")
-                   (plus:QISI (match_dup 0)
-                              (match_operand:QISI 1 "const_int_operand")))
-              (clobber (reg:CC REG_CC))])
-   (set (reg:CC REG_CC)
-        (compare:CC (match_dup 0)
-                    (const_int 0)))
-   (set (pc)
-        (if_then_else (eqne (reg:CC REG_CC)
-                            (const_int 0))
-                      (label_ref (match_operand 2))
-                      (pc)))]
-  "peep2_regno_dead_p (3, REG_CC)"
-  [(scratch)]
-  {
-    emit (gen_gen_add_for_<code>_<mode> (operands[0], operands[1], operands[2],
-          operands[3]));
-    DONE;
-  })
-
-;; Result of the above three peepholes is an addition that also
-;; performs an EQ or NE comparison (of the result) against zero.
-;; FIXME: Using (match_dup 0) instead of operands[3/4] makes rnregs
-;; barf in regrename.cc::merge_overlapping_regs().  For now, use the
-;; fix from PR50788: Constrain as "0".
-(define_insn "*add.for.eqne.<mode>"
-  [(set (reg:CC REG_CC)
-        (compare:CC
-         (plus:QISI (match_operand:QISI 3 "register_operand"  "0,0     ,0")
-                    (match_operand:QISI 1 "const_int_operand" "n,<p1m1>,n"))
-         (const_int 0)))
-   (set (match_operand:QISI 0 "register_operand"             "=d,*r    ,r")
-        (plus:QISI (match_operand:QISI 4 "register_operand"   "0,0     ,0")
-                   (match_dup 1)))
-   (clobber (match_scratch:QI 2                              "=X,X     ,&d"))]
-  "reload_completed"
-  {
-    return avr_out_plus_set_ZN (operands, nullptr);
-  }
-  [(set_attr "adjust_len" "add_set_ZN")])
-
-
 ;; Swapping both comparison and branch condition.  This can turn difficult
 ;; branches to easy ones.  And in some cases, a comparison against one can
 ;; be turned into a comparison against zero.
@@ -9749,7 +10352,7 @@
                       (pc)))]
   {
     rtx xval = avr_to_int_mode (operands[2]);
-    enum rtx_code code = GET_CODE (operands[0]);
+    rtx_code code = GET_CODE (operands[0]);
 
     if (code == GT && xval == const0_rtx)
       code = LT;
@@ -9789,7 +10392,7 @@
                       (pc)))]
   {
     rtx xval = avr_to_int_mode (operands[2]);
-    enum rtx_code code = GET_CODE (operands[0]);
+    rtx_code code = GET_CODE (operands[0]);
 
     if (code == GT && xval == const0_rtx)
       code = LT;
@@ -9850,9 +10453,9 @@
 
 (define_insn_and_split "*extzv.qihi1"
   [(set (match_operand:HI 0 "register_operand"                     "=r")
-        (zero_extract:HI (match_operand:QI 1 "register_operand"     "r")
+        (zero_extract:HI (match_operand:QIHI 1 "register_operand"   "r")
                          (const_int 1)
-                         (match_operand:QI 2 "const_0_to_7_operand" "n")))]
+                         (match_operand:QI 2 "const_0_to_<MSB>_operand" "n")))]
   ""
   "#"
   ""
@@ -10075,6 +10678,499 @@
                          (match_dup 2)))])
 
 
+(define_insn_and_split "*sextr.<QISI:mode>.<QISI2:mode>_split"
+  [(set (match_operand:QISI 0 "register_operand"                    "=r")
+        (sign_extract:QISI (match_operand:QISI2 1 "register_operand" "r")
+                           (const_int 1)
+                           (match_operand:QI 2 "const0_operand"      "L")))]
+  ""
+  "#"
+  "&& reload_completed"
+  [(parallel [(set (match_dup 0)
+                   (sign_extract:QISI (match_dup 1)
+                                      (const_int 1)
+                                      (match_dup 2)))
+              (clobber (reg:CC REG_CC))])])
+
+(define_insn "*sextr.<QISI:mode>.<QISI2:mode>"
+  [(set (match_operand:QISI 0 "register_operand"                    "=r")
+        (sign_extract:QISI (match_operand:QISI2 1 "register_operand" "r")
+                           (const_int 1)
+                           (match_operand:QI 2 "const0_operand"      "L")))
+   (clobber (reg:CC REG_CC))]
+  "reload_completed"
+  {
+    return avr_out_sextr (insn, operands, NULL);
+  }
+  [(set_attr "adjust_len" "sextr")])
+
+
+(define_insn_and_split "*neg.zextr-to-sextr.<HISI:mode>.<QISI:mode>"
+  [(set (match_operand:HISI 0 "register_operand")
+        (neg:HISI (zero_extend:HISI
+                   (zero_extract:QIPSI (match_operand:QISI 1 "register_operand")
+                                       (const_int 1)
+                                       (match_operand:QI 2 "const0_operand")))))]
+  "avropt_pr118012
+   && <HISI:SIZE> > <QIPSI:SIZE>
+   && ! reload_completed"
+  { gcc_unreachable (); }
+  "&& 1"
+  [(set (match_dup 0)
+        (sign_extract:HISI (match_dup 1)
+                           (const_int 1)
+                           (match_dup 2)))])
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; PR118012: match.pd's
+;;
+;; /* (zero_one == 0) ? y : z <op> y -> ((typeof(y))zero_one * z) <op> y */
+;; /* (zero_one != 0) ? z <op> y : y -> ((typeof(y))zero_one * z) <op> y */
+;;
+;; introduces a crazy "optimization" that transforms code like
+;;
+;;    if (b & 1)
+;;       c ^= a;
+;; to
+;;
+;;    u = extract_bit0 (b);
+;;    v = zero_extend (u);
+;;    w = NEG v;
+;;    x = a AND w
+;;    c ^= x
+;;
+;; or even to
+;;
+;;    u = extract_bit0 (b);
+;;    v = a MULT u
+;;    c ^= v
+;;
+;; even on machines that don't have MUL instructions or that
+;; have to perform the multiplication by means of a libgcc call.
+;; Try to fix that below.  Notice that on AVR_TINY no MUL insn is
+;; available since is is performed as a libgcc call from which we
+;; cannot roll back.  With !AVR_HAVR_MULMUL it's a transparent call
+;; from avr.md so we can get rid of that at least.
+
+;; Map
+;;      $0 = ((sign_extract ($1.0)) AND $3) <op> $4
+;; to
+;;      $0 = $4
+;;      if ($1.0 == 0)
+;;        goto L
+;;      $0 <op>= $3
+;;      L:;
+(define_insn_and_split "*pixop-to-skip.<QISI:mode>"
+  [(set (match_operand:QISI 0 "register_operand")
+        (pixop:QISI (and:QISI (sign_extract:QISI (match_operand:QISI2 1 "register_operand")
+                                                 (const_int 1)
+                                                 (match_operand:QI 2 "const0_operand"))
+                              (match_operand:QISI 3 "nonmemory_operand"))
+                    (match_operand:QISI 4 "register_operand")))]
+  "avropt_pr118012
+   && ! reload_completed"
+  { gcc_unreachable (); }
+  "&& 1"
+  [(scratch)]
+  {
+    avr_emit_skip_pixop (<pixop:CODE>, operands[0], operands[4], operands[3],
+                         EQ, operands[1], 0);
+    DONE;
+  })
+
+;; Map
+;;      $0 = (($1 AND 1) MULT $2) o $3
+;; to
+;;      $0 = $3
+;;      if ($1.0 == 0)
+;;        goto L
+;;      $0 o= $2
+;;      L:;
+(define_insn_and_split "*mul.and1-to-skip.<mode>"
+  [(set (match_operand:QISI 0 "register_operand")
+        (pixop:QISI (mult:QISI (and:QISI (match_operand:QISI 1 "register_operand")
+                                         (const_int 1))
+                               (match_operand:QISI 2 "nonmemory_operand"))
+                    (match_operand:QISI 3 "register_operand")))]
+  "avropt_pr118012
+   && ! reload_completed"
+  { gcc_unreachable (); }
+  "&& 1"
+  [(scratch)]
+  {
+    avr_emit_skip_pixop (<CODE>, operands[0], operands[3], operands[2],
+                         EQ, operands[1], 0);
+    DONE;
+  })
+
+(define_insn_and_split "*mul.ext.and1-to-skip.<HISI:mode>"
+  [(set (match_operand:HISI 0 "register_operand")
+        (pixop:HISI (mult:HISI (any_extend:HISI (and:QIPSI (match_operand:QIPSI 1 "register_operand")
+                                                           (const_int 1)))
+                               (match_operand:HISI 2 "nonmemory_operand"))
+                    (match_operand:HISI 3 "register_operand")))]
+  "avropt_pr118012
+   && <HISI:SIZE> > <QIPSI:SIZE>
+   && ! reload_completed"
+  { gcc_unreachable (); }
+  "&& 1"
+  [(scratch)]
+  {
+    avr_emit_skip_pixop (<pixop:CODE>, operands[0], operands[3], operands[2],
+                         EQ, operands[1], 0);
+    DONE;
+  })
+
+;; Like the one above, but where $2 was a power of 2 and MULT has been
+;; transformed to ASHIFT (PR118360).
+(define_insn_and_split "*shl.ext.and1-to-skip.<HISI:mode>"
+  [(set (match_operand:HISI 0 "register_operand")
+        (pixop:HISI (ashift:HISI (any_extend:HISI (and:QIPSI (match_operand:QIPSI 1 "register_operand")
+                                                             (const_int 1)))
+                                 (match_operand:QI 2 "const_int_operand"))
+                    (match_operand:HISI 3 "register_operand")))]
+  "avropt_pr118012
+   && <HISI:SIZE> > <QIPSI:SIZE>
+   && ! reload_completed"
+  { gcc_unreachable (); }
+  "&& 1"
+  [(scratch)]
+  {
+    rtx op2 = gen_int_mode (1u << INTVAL (operands[2]), <HISI:MODE>mode);
+    avr_emit_skip_pixop (<pixop:CODE>, operands[0], operands[3], op2,
+                         EQ, operands[1], 0);
+    DONE;
+  })
+
+(define_insn_and_split "*shl.and-to-skip.<mode>"
+  [(set (match_operand:HISI 0 "register_operand")
+        (pixop:HISI (and:HISI (ashift:HISI (match_operand:HISI 1 "register_operand")
+                                           (match_operand:QI 4 "const_0_to_<MSB>_operand"))
+                              (match_operand:HISI 2 "single_one_operand"))
+                    (match_operand:HISI 3 "register_operand")))]
+  "avropt_pr118012
+   && exact_log2 (UINTVAL (operands[2]) & GET_MODE_MASK (<MODE>mode))
+      == INTVAL (operands[4])
+   && ! reload_completed"
+  { gcc_unreachable (); }
+  "&& 1"
+  [(scratch)]
+  {
+    avr_emit_skip_pixop (<CODE>, operands[0], operands[3], operands[2],
+                         EQ, operands[1], 0);
+    DONE;
+  })
+
+
+;; Map
+;;      $0 = ($1 AND 1) MULT $2
+;; to
+;;      $0 = $2
+;;      if ($1.0 != 0)
+;;        goto L
+;;      $0 = 0
+;;      L:;
+(define_insn_and_split "*map.mul.and1-to-skip.<QISI:mode>"
+  [(set (match_operand:QISI 0 "register_operand")
+        (mult:QISI (and:QISI (match_operand:QISI2 1 "register_operand")
+                             (const_int 1))
+                   (match_operand:QISI 2 "nonmemory_operand")))]
+  "avropt_pr118012
+   && ! reload_completed"
+  { gcc_unreachable (); }
+  "&& 1"
+  [(scratch)]
+  {
+    avr_emit_skip_clear (operands[0], operands[2], NE, operands[1], 0);
+    DONE;
+  })
+
+(define_insn_and_split "*map.mul.and1-to-skip.<mode>"
+  [(set (match_operand:QISI 0 "register_operand")
+        (mult:QISI (and:QISI (match_operand:QISI 1 "register_operand")
+                             (const_int 1))
+                   (match_operand:QISI 2 "nonmemory_operand")))]
+  "avropt_pr118012
+   && ! reload_completed"
+  { gcc_unreachable (); }
+  "&& 1"
+  [(scratch)]
+  {
+    avr_emit_skip_clear (operands[0], operands[2], NE, operands[1], 0);
+    DONE;
+  })
+
+(define_insn_and_split "*map.mul.ext.and1-to-skip.<HISI:mode>"
+  [(set (match_operand:HISI 0 "register_operand")
+        (mult:HISI (any_extend:HISI (and:QIPSI (match_operand:QIPSI 1 "register_operand")
+                                               (const_int 1)))
+                   (match_operand:HISI 2 "nonmemory_operand")))]
+  "avropt_pr118012
+   && <HISI:SIZE> > <QIPSI:SIZE>
+   && ! reload_completed"
+  { gcc_unreachable (); }
+  "&& 1"
+  [(scratch)]
+  {
+    avr_emit_skip_clear (operands[0], operands[2], NE, operands[1], 0);
+    DONE;
+  })
+
+;; Similar, but the MULT has been turned to ASHIFT.
+(define_insn_and_split "*map.shl.ext.and1-to-skip.<HISI:mode>"
+  [(set (match_operand:HISI 0 "register_operand")
+        (ashift:HISI (any_extend:HISI (and:QIPSI (match_operand:QIPSI 1 "register_operand")
+                                                 (const_int 1)))
+                     (match_operand:QI 2 "const_0_to_<HISI:MSB>_operand")))]
+  "avropt_pr118012
+   && <HISI:SIZE> > <QIPSI:SIZE>
+   && ! reload_completed"
+  { gcc_unreachable (); }
+  "&& 1"
+  [(scratch)]
+  {
+    rtx op2 = gen_int_mode (1u << INTVAL (operands[2]), <HISI:MODE>mode);
+    avr_emit_skip_clear (operands[0], op2, NE, operands[1], 0);
+    DONE;
+  })
+
+
+;; Map
+;;      $0 = sign_extract($1.0) AND $3
+;; to
+;;      $0 = $3
+;;      if ($1.0 != 0)
+;;        goto L
+;;      $0 = 0
+;;      L:;
+(define_insn_and_split "*map.and1-to-skip.<QISI:mode>"
+  [(set (match_operand:QISI 0 "register_operand")
+        (and:QISI (sign_extract:QISI (match_operand:QISI2 1 "register_operand")
+                                     (const_int 1)
+                                     (match_operand:QI 2 "const0_operand"))
+                  (match_operand:QISI 3 "nonmemory_operand")))]
+  "avropt_pr118012
+   && ! reload_completed"
+  { gcc_unreachable (); }
+  "&& 1"
+  [(scratch)]
+  {
+    avr_emit_skip_clear (operands[0], operands[3], NE, operands[1], 0);
+    DONE;
+  })
+
+;; Patterns for -muse-nonzero-bits use nonzero_bits() in their condition,
+;; which makes possible some more optimizations.
+;;    Since combine may add clobber of REG_CC, we must make sure that there are
+;; no other routes to synthesize such patterns.  We use an UNSPEC for that.
+;;    As insns are not supposed to use stuff like nonzero_bits() in their
+;; condition, we split the insns right after reload.  For CFG reasons we have
+;; to do the splits by hand in avr_pass_split_nzb.  All insns that must be
+;; split by that pass must have insn attribute "nzb" set to "yes".  Moreover,
+;; the insns to split must be single_sets and must not touch control flow.
+
+(define_code_attr nzb_constr_rdr [(and "r") (ior "d") (xor "r")])
+(define_code_attr nzb_use1_nnr   [(and "n") (ior "n") (xor "r")])
+
+(define_insn_and_split "*nzb=1.<code>.zerox_split"
+  [(set (match_operand:QI 0 "register_operand")
+        (bitop:QI (zero_extract:QI (match_operand:QI 1 "register_operand")
+                                   (const_int 1)
+                                   (match_operand:QI 2 "const_0_to_7_operand"))
+                  (match_operand:QI 3 "register_operand")))]
+  "optimize && avropt_use_nonzero_bits
+   && !reload_completed
+   && (<CODE> == IOR || <CODE> == XOR
+       || nonzero_bits (operands[3], QImode) == 1)"
+  { gcc_unreachable (); }
+  "optimize && avropt_use_nonzero_bits
+   && !reload_completed"
+  [(parallel [(set (match_dup 0)
+                   (bitop:QI (zero_extract:QI (match_dup 1)
+                                              (const_int 1)
+                                              (match_dup 2))
+                             (unspec:QI [(match_dup 3)
+                                         ] UNSPEC_NZB)))
+              (use (const_int 1))
+              (clobber (reg:CC REG_CC))])]
+  ""
+  [(set_attr "nzb" "yes")])
+
+(define_insn "*nzb=1.<code>.zerox"
+  [(set (match_operand:QI 0 "register_operand"                "=<nzb_constr_rdr>")
+        (bitop:QI (zero_extract:QI (match_operand:QI 1 "register_operand"     "r")
+                                   (const_int 1)
+                                   (match_operand:QI 2 "const_0_to_7_operand" "n"))
+                  (unspec:QI [(match_operand:QI 3 "register_operand"          "0")
+                              ] UNSPEC_NZB)))
+   (use (match_operand:QI 4 "nonmemory_operand" "<nzb_use1_nnr>"))
+   (clobber (reg:CC REG_CC))]
+  "optimize && avropt_use_nonzero_bits"
+  {
+    if (<CODE> == AND)
+      return "sbrs %1,%2\;clr %0";
+    else if (<CODE> == IOR)
+      return "sbrc %1,%2\;ori %0,1";
+    else if (<CODE> == XOR)
+      return "sbrc %1,%2\;eor %0,%4";
+    else
+      gcc_unreachable ();
+  }
+  [(set_attr "length" "2")])
+
+(define_insn_and_split "*nzb=1.<code>.lsr_split"
+  [(set (match_operand:QI 0 "register_operand")
+        (bitop:QI (lshiftrt:QI (match_operand:QI 1 "register_operand")
+                               (match_operand:QI 2 "const_0_to_7_operand"))
+                  (match_operand:QI 3 "register_operand")))]
+  "optimize && avropt_use_nonzero_bits
+   && !reload_completed
+   && avr_nonzero_bits_lsr_operands_p (<CODE>, operands)"
+  { gcc_unreachable (); }
+  "optimize && avropt_use_nonzero_bits
+   && !reload_completed"
+  [(parallel [(set (match_dup 0)
+                   (bitop:QI (zero_extract:QI (match_dup 1)
+                                              (const_int 1)
+                                              (match_dup 2))
+                             (unspec:QI [(match_dup 3)
+                                        ] UNSPEC_NZB)))
+              (use (const_int 1))
+              (clobber (reg:CC REG_CC))])]
+  ""
+  [(set_attr "nzb" "yes")])
+
+(define_insn_and_split "*nzb=1.<code>.zerox.not_split"
+  [(set (match_operand:QI 0 "register_operand")
+        (bitop:QI (zero_extract:QI (xor:QI (match_operand:QI 1 "register_operand")
+                                           (match_operand:QI 4 "const_int_operand"))
+                                   (const_int 1)
+                                   (match_operand:QI 2 "const_0_to_7_operand"))
+                  (match_operand:QI 3 "register_operand")))]
+  "optimize && avropt_use_nonzero_bits
+   && !reload_completed
+   && INTVAL (operands[2]) == exact_log2 (0xff & INTVAL (operands[4]))
+   && (<CODE> == IOR
+       || nonzero_bits (operands[3], QImode) == 1)"
+  { gcc_unreachable (); }
+  "optimize && avropt_use_nonzero_bits
+   && !reload_completed"
+  ; "*nzb=1.<code>.zerox.not"
+  [(parallel [(set (match_dup 0)
+                   (bitop:QI (zero_extract:QI (not:QI (match_dup 1))
+                                              (const_int 1)
+                                              (match_dup 2))
+                             (unspec:QI [(match_dup 3)
+                                         ] UNSPEC_NZB)))
+              (use (const_int 1))
+              (clobber (reg:CC REG_CC))])]
+  ""
+  [(set_attr "nzb" "yes")])
+
+(define_insn_and_split "*nzb=1.<code>.lsr.not_split"
+  [(set (match_operand:QI 0 "register_operand")
+        (bitop:QI (lshiftrt:QI (xor:QI (match_operand:QI 1 "register_operand")
+                                       (match_operand:QI 4 "const_int_operand"))
+                               (match_operand:QI 2 "const_0_to_7_operand"))
+                  (match_operand:QI 3 "register_operand")))]
+  "optimize && avropt_use_nonzero_bits
+   && !reload_completed
+   && INTVAL (operands[2]) == exact_log2 (0xff & INTVAL (operands[4]))
+   && avr_nonzero_bits_lsr_operands_p (<CODE>, operands)"
+  { gcc_unreachable (); }
+  "optimize && avropt_use_nonzero_bits
+   && !reload_completed"
+  ; "*nzb=1.<code>.zerox.not"
+  [(parallel [(set (match_dup 0)
+                   (bitop:QI (zero_extract:QI (not:QI (match_dup 1))
+                                              (const_int 1)
+                                              (match_dup 2))
+                             (unspec:QI [(match_dup 3)
+                                         ] UNSPEC_NZB)))
+              (use (const_int 1))
+              (clobber (reg:CC REG_CC))])]
+  ""
+  [(set_attr "nzb" "yes")])
+
+(define_insn_and_split "*nzb=1.<code>.ge0_split"
+  [(set (match_operand:QI 0 "register_operand")
+        (bitop:QI (ge:QI (match_operand:QI 1 "register_operand")
+                         (const_int 0))
+                  (match_operand:QI 2 "register_operand")))]
+  "optimize && avropt_use_nonzero_bits
+   && !reload_completed
+   && (<CODE> == IOR || <CODE> == XOR
+       || nonzero_bits (operands[2], QImode) == 1)"
+  { gcc_unreachable (); }
+  "optimize && avropt_use_nonzero_bits
+   && !reload_completed"
+  ; "*nzb=1.<code>.zerox.not"
+  [(parallel [(set (match_dup 0)
+                   (bitop:QI (zero_extract:QI (not:QI (match_dup 1))
+                                              (const_int 1)
+                                              (const_int 7))
+                             (unspec:QI [(match_dup 2)
+                                         ] UNSPEC_NZB)))
+              (use (const_int 1))
+              (clobber (reg:CC REG_CC))])]
+  ""
+  [(set_attr "nzb" "yes")])
+
+(define_insn "*nzb=1.<code>.zerox.not"
+  [(set (match_operand:QI 0 "register_operand"                    "=<nzb_constr_rdr>")
+        (bitop:QI (zero_extract:QI (not:QI (match_operand:QI 1 "register_operand" "r"))
+                                   (const_int 1)
+                                   (match_operand:QI 2 "const_0_to_7_operand"     "n"))
+                  (unspec:QI [(match_operand:QI 3 "register_operand"              "0")
+                              ] UNSPEC_NZB)))
+   (use (match_operand:QI 4 "nonmemory_operand" "<nzb_use1_nnr>"))
+   (clobber (reg:CC REG_CC))]
+  "optimize && avropt_use_nonzero_bits"
+  {
+    if (<CODE> == AND)
+      return "sbrc %1,%2\;clr %0";
+    else if (<CODE> == IOR)
+      return "sbrs %1,%2\;ori %0,1";
+    else if (<CODE> == XOR)
+      return "sbrs %1,%2\;eor %0,%4";
+    else
+      gcc_unreachable ();
+  }
+  [(set_attr "length" "2")])
+
+(define_insn_and_split "*nzb=1.ior.ashift_split"
+  [(set (match_operand:QI 0 "register_operand"                       "=d")
+        (ior:QI (ashift:QI (match_operand:QI 1 "register_operand"     "r")
+                           (match_operand:QI 2 "const_0_to_7_operand" "n"))
+                (match_operand:QI 3 "register_operand"                "0")))]
+  "optimize && avropt_use_nonzero_bits
+   && !reload_completed
+   && nonzero_bits (operands[1], QImode) == 1"
+  { gcc_unreachable (); }
+  "optimize && avropt_use_nonzero_bits
+   && !reload_completed"
+  [(parallel [(set (match_dup 0)
+                   (unspec:QI [(ior:QI (ashift:QI (match_dup 1)
+                                                  (match_dup 2))
+                                       (match_dup 3))
+                               ] UNSPEC_NZB))
+              (clobber (reg:CC REG_CC))])]
+  ""
+  [(set_attr "nzb" "yes")])
+
+(define_insn "*nzb=1.ior.ashift"
+  [(set (match_operand:QI 0 "register_operand"                                   "=d")
+        (unspec:QI [(ior:QI (ashift:QI (match_operand:QI 1 "register_operand"     "r")
+                                       (match_operand:QI 2 "const_0_to_7_operand" "n"))
+                            (match_operand:QI 3 "register_operand"                "0"))
+                    ] UNSPEC_NZB))
+   (clobber (reg:CC REG_CC))]
+  "optimize && avropt_use_nonzero_bits"
+  "sbrc %1,0\;ori %0,1<<%2"
+  [(set_attr "length" "2")])
+
+
 ;; Work around PR115307: Early passes expand isinf/f/l to a bloat.
 ;; These passes do not consider costs, and there is no way to
 ;; hook in or otherwise disable the generated bloat.
diff --git a/gcc/config/avr/avr.opt b/gcc/config/avr/avr.opt
index 444ed7e..9883119 100644
--- a/gcc/config/avr/avr.opt
+++ b/gcc/config/avr/avr.opt
@@ -1,6 +1,6 @@
-; Options for the ATMEL AVR port of the compiler.
+; Options for AVR 8-bit microcontrollers.
 
-; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
@@ -19,25 +19,33 @@
 ; <http://www.gnu.org/licenses/>.
 
 mlra
-Target Var(avr_lra_p) UInteger Init(0) Optimization Undocumented
-Usa LRA for reload instead of the old reload framework.  This option is experimental, and it may be removed in future versions of the compiler.
+Target Var(avropt_lra_p) UInteger Init(1) Optimization Undocumented
+Usa LRA for reload instead of the old reload framework.  This option is experimental, on per default, and it may be removed in future versions of the compiler.
 
 mcall-prologues
 Target Mask(CALL_PROLOGUES) Optimization
 Optimization. Use subroutines for function prologues and epilogues.
 
+mcvt
+Target Mask(CVT)
+Use a startup code with a compact vector table.
+
 mmcu=
-Target RejectNegative Joined Var(avr_mmcu) MissingArgError(missing device or architecture after %qs)
+Target RejectNegative Joined Var(avropt_mmcu) MissingArgError(missing device or architecture after %qs)
 -mmcu=<MCU>	Select the target MCU.
 
 mgas-isr-prologues
-Target Var(avr_gasisr_prologues) UInteger Init(0) Optimization
+Target Var(avropt_gasisr_prologues) UInteger Init(0) Optimization
 Optimization. Allow usage of __gcc_isr pseudo instructions in ISR prologues and epilogues.
 
 mn-flash=
-Target RejectNegative Joined Var(avr_n_flash) UInteger Init(-1)
+Target RejectNegative Joined Var(avropt_n_flash) UInteger Init(-1)
 This option is used internally. Set the number of 64 KiB flash segments.
 
+mcall-main
+Target Var(avropt_call_main) UInteger Init(1)
+Call main and exit (default).
+
 mskip-bug
 Target Mask(SKIP_BUG)
 This option is used internally. Indicate presence of a processor erratum.  Do not skip 32-bit instructions.
@@ -50,7 +58,16 @@ mdeb
 Target Undocumented Mask(ALL_DEBUG)
 
 mlog=
-Target RejectNegative Joined Undocumented Var(avr_log_details)
+Target RejectNegative Joined Undocumented Var(avropt_log_details)
+
+;; Tries to work around PR118012.
+mpr118012
+Target Var(avropt_pr118012) UInteger Init(1) Undocumented
+This option is on per default in order to work around PR118012.
+
+muse-nonzero-bits
+Target Var(avropt_use_nonzero_bits) UInteger Init(0) Optimization
+Optimization. Allow to use nonzero_bits() in some insn conditions.
 
 mshort-calls
 Target RejectNegative Mask(SHORT_CALLS)
@@ -65,7 +82,7 @@ Target RejectNegative Mask(NO_INTERRUPTS)
 Change the stack pointer without disabling interrupts.
 
 mbranch-cost=
-Target Joined RejectNegative UInteger Var(avr_branch_cost) Init(0) Optimization
+Target Joined RejectNegative UInteger Var(avropt_branch_cost) Init(0) Optimization
 -mbranch-cost=<cost>	Optimization. Set the branch costs for conditional branch instructions.  Reasonable values are small, non-negative integers.  The default branch cost is 0.
 
 mmain-is-OS_task
@@ -94,21 +111,29 @@ maccumulate-args
 Target Mask(ACCUMULATE_OUTGOING_ARGS) Optimization
 Optimization. Accumulate outgoing function arguments and acquire/release the needed stack space for outgoing function arguments in function prologue/epilogue.  Without this option, outgoing arguments are pushed before calling a function and popped afterwards.  This option can lead to reduced code size for functions that call many functions that get their arguments on the stack like, for example printf.
 
+msplit-bit-shift
+Target Var(avropt_split_bit_shift) Init(0) Optimization
+Optimization. Split shifts of 4-byte values into a byte shift and a residual bit shift.
+
+msplit-ldst
+Target Var(avropt_split_ldst) Init(0) Optimization
+Optimization. Split most of the load and store instructions into byte load and stores.
+
 mstrict-X
-Target Var(avr_strict_X) Init(0) Optimization
+Target Var(avropt_strict_X) Init(0) Optimization
 Optimization. When accessing RAM, use X as imposed by the hardware, i.e. just use pre-decrement, post-increment and indirect addressing with the X register.  Without this option, the compiler may assume that there is an addressing mode X+const similar to Y+const and Z+const and emit instructions to emulate such an addressing mode for X.
 
 mflmap
-Target Var(avr_flmap) Init(0)
+Target Var(avropt_flmap) Init(0)
 This option is used internally. The device has the bitfield NVMCTRL_CTRLB.FLMAP.
 
 mrodata-in-ram
-Target Var(avr_rodata_in_ram) Init(-1)
+Target Var(avropt_rodata_in_ram) Init(-1)
 The device has the .rodata section located in the RAM area.
 
 ;; For rationale behind -msp8 see explanation in avr.h.
 msp8
-Target RejectNegative Var(avr_sp8) Init(0)
+Target RejectNegative Var(avropt_sp8) Init(0)
 This option is used internally for multilib generation and selection. The device has no SPH special function register.
 
 mfuse-add
@@ -116,31 +141,39 @@ Target Alias(mfuse-add=, 2, 0) Optimization
 Optimization. Split register additions from load/store instructions. Most useful on Reduced Tiny.
 
 mfuse-add=
-Target Joined RejectNegative UInteger Var(avr_fuse_add) Init(0) Optimization IntegerRange(0, 3)
+Target Joined RejectNegative UInteger Var(avropt_fuse_add) Init(0) Optimization IntegerRange(0, 3)
 -mfuse-add=<0,2>	Optimization. Split register additions from load/store instructions. Most useful on Reduced Tiny.
 
 Waddr-space-convert
-Warning C Var(avr_warn_addr_space_convert) Init(0)
+Warning C Var(avropt_warn_addr_space_convert) Init(0)
 Warn if the address space of an address is changed.
 
 Wmisspelled-isr
-Warning C C++ Var(avr_warn_misspelled_isr) Init(1)
+Warning C C++ Var(avropt_warn_misspelled_isr) Init(1)
 Warn if the ISR is misspelled, i.e. without __vector prefix. Enabled by default.
 
 mfract-convert-truncate
 Target Mask(FRACT_CONV_TRUNC)
 Allow to use truncation instead of rounding towards zero for fractional fixed-point types.
 
+mfuse-move
+Target Alias(mfuse-move=, 23, 0) Optimization
+Optimization. Run a post-reload pass that tweaks move instructions.
+
+mfuse-move=
+Target Joined RejectNegative UInteger Var(avropt_fuse_move) Init(0) Optimization IntegerRange(0, 23)
+-mfuse-move=<0,23>	Optimization. Run a post-reload pass that tweaks move instructions.
+
 mabsdata
 Target Mask(ABSDATA)
 Assume that all data in static storage can be accessed by LDS / STS instructions.  This option is only useful for reduced Tiny devices like ATtiny40.
 
 mdouble=
-Target Joined RejectNegative Var(avr_double) Init(0) Enum(avr_bits_e) Save
+Target Joined RejectNegative Var(avropt_double) Init(0) Enum(avropt_bits_e) Save
 -mdouble=<BITS>	Use <BITS> bits wide double type.
 
 mlong-double=
-Target Joined RejectNegative Var(avr_long_double) Init(0) Enum(avr_bits_e) Save
+Target Joined RejectNegative Var(avropt_long_double) Init(0) Enum(avropt_bits_e) Save
 -mlong-double=<BITS>	Use <BITS> bits wide long double type.
 
 nodevicelib
@@ -152,11 +185,11 @@ Driver Target RejectNegative
 Do not use the device-specific specs file device-specs/specs-<MCU>.
 
 Enum
-Name(avr_bits_e) Type(int)
+Name(avropt_bits_e) Type(int)
 Available BITS selections:
 
 EnumValue
-Enum(avr_bits_e) String(32)  Value(32)
+Enum(avropt_bits_e) String(32)  Value(32)
 
 EnumValue
-Enum(avr_bits_e) String(64) Value(64)
+Enum(avropt_bits_e) String(64) Value(64)
diff --git a/gcc/config/avr/avr.opt.urls b/gcc/config/avr/avr.opt.urls
index 6acc418..662fdee 100644
--- a/gcc/config/avr/avr.opt.urls
+++ b/gcc/config/avr/avr.opt.urls
@@ -5,6 +5,9 @@
 mcall-prologues
 UrlSuffix(gcc/AVR-Options.html#index-mcall-prologues)
 
+mcvt
+UrlSuffix(gcc/AVR-Options.html#index-mcvt)
+
 mmcu=
 UrlSuffix(gcc/AVR-Options.html#index-mmcu)
 
@@ -14,12 +17,18 @@ UrlSuffix(gcc/AVR-Options.html#index-mgas-isr-prologues)
 mn-flash=
 UrlSuffix(gcc/AVR-Options.html#index-mn-flash)
 
+mcall-main
+UrlSuffix(gcc/AVR-Options.html#index-mcall-main)
+
 mskip-bug
 UrlSuffix(gcc/AVR-Options.html#index-mskip-bug)
 
 mrmw
 UrlSuffix(gcc/AVR-Options.html#index-mrmw)
 
+muse-nonzero-bits
+UrlSuffix(gcc/AVR-Options.html#index-muse-nonzero-bits)
+
 mshort-calls
 UrlSuffix(gcc/AVR-Options.html#index-mshort-calls)
 
@@ -44,6 +53,12 @@ UrlSuffix(gcc/AVR-Options.html#index-mrelax)
 maccumulate-args
 UrlSuffix(gcc/AVR-Options.html#index-maccumulate-args)
 
+msplit-bit-shift
+UrlSuffix(gcc/AVR-Options.html#index-msplit-bit-shift)
+
+msplit-ldst
+UrlSuffix(gcc/AVR-Options.html#index-msplit-ldst)
+
 mstrict-X
 UrlSuffix(gcc/AVR-Options.html#index-mstrict-X)
 
@@ -71,6 +86,12 @@ UrlSuffix(gcc/AVR-Options.html#index-Wmisspelled-isr)
 mfract-convert-truncate
 UrlSuffix(gcc/AVR-Options.html#index-mfract-convert-truncate)
 
+mfuse-move
+UrlSuffix(gcc/AVR-Options.html#index-mfuse-move)
+
+mfuse-move=
+UrlSuffix(gcc/AVR-Options.html#index-mfuse-move)
+
 mabsdata
 UrlSuffix(gcc/AVR-Options.html#index-mabsdata)
 
diff --git a/gcc/config/avr/avrlibc.h b/gcc/config/avr/avrlibc.h
index fb4ffed..409a694 100644
--- a/gcc/config/avr/avrlibc.h
+++ b/gcc/config/avr/avrlibc.h
@@ -1,6 +1,5 @@
-/* Definitions of target machine for the GNU compiler collection
-   for Atmel AVR micro controller if configured for AVR-Libc.
-   Copyright (C) 2012-2024 Free Software Foundation, Inc.
+/* Definitions for AVR 8-bit microcontrollers if configured for AVR-LibC.
+   Copyright (C) 2012-2025 Free Software Foundation, Inc.
    Contributed by Georg-Johann Lay (avr@gjlay.de)
 
 This file is part of GCC.
diff --git a/gcc/config/avr/builtins.def b/gcc/config/avr/builtins.def
index fbe7572..c961089 100644
--- a/gcc/config/avr/builtins.def
+++ b/gcc/config/avr/builtins.def
@@ -1,4 +1,4 @@
-/* Copyright (C) 2012-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2012-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -20,7 +20,7 @@
    builtins defined in the AVR part of the GNU compiler.
    Befor including this file, define a macro
 
-   DEF_BUILTIN(NAME, N_ARGS, TYPE, ICODE, LIBNAME)
+   DEF_BUILTIN(NAME, N_ARGS, TYPE, ICODE, LIBNAME, ATTRS)
 
    NAME:    `__builtin_avr_name' will be the user-level name of the builtin.
             `AVR_BUILTIN_NAME' will be the internal builtin's id.
@@ -30,31 +30,46 @@
    ICODE:   Name of attached insn or expander.  If special treatment in avr.cc
             is needed to expand the built-in, use `nothing'.
    LIBNAME: Name of the attached implementation in libgcc which is used if
-            the builtin cannot be folded away and there is no insn.  */
+            the builtin cannot be folded away and there is no insn.
+   ATTRS:   Function attributes like "attr_const" for the `const' attribute
+            or "NULL_TREE" for no attribute.  */
+
+#define AVR_FIRST_C_ONLY_BUILTIN_ID AVR_BUILTIN_FLASH_SEGMENT
 
 /* Mapped to respective instruction.  */
 
-DEF_BUILTIN (NOP,  -1, void_ftype_void, nothing, NULL)
-DEF_BUILTIN (SEI,   0, void_ftype_void, enable_interrupt, NULL)
-DEF_BUILTIN (CLI,   0, void_ftype_void, disable_interrupt, NULL)
-DEF_BUILTIN (WDR,   0, void_ftype_void, wdr, NULL)
-DEF_BUILTIN (SLEEP, 0, void_ftype_void, sleep, NULL)
+DEF_BUILTIN (NOP,  -1, void_ftype_void, nothing, NULL, NULL_TREE)
+DEF_BUILTIN (SEI,   0, void_ftype_void, enable_interrupt, NULL, NULL_TREE)
+DEF_BUILTIN (CLI,   0, void_ftype_void, disable_interrupt, NULL, NULL_TREE)
+DEF_BUILTIN (WDR,   0, void_ftype_void, wdr, NULL, NULL_TREE)
+DEF_BUILTIN (SLEEP, 0, void_ftype_void, sleep, NULL, NULL_TREE)
 
 /* Mapped to respective instruction but might also be folded away
    or emit as libgcc call if ISA does not provide the instruction.  */
 
-DEF_BUILTIN (SWAP,   1, uintQI_ftype_uintQI,        rotlqi3_4, NULL)
-DEF_BUILTIN (FMUL,   2, uintHI_ftype_uintQI_uintQI, fmul, NULL)
-DEF_BUILTIN (FMULS,  2, intHI_ftype_intQI_intQI,    fmuls, NULL)
-DEF_BUILTIN (FMULSU, 2, intHI_ftype_intQI_uintQI,   fmulsu, NULL)
+DEF_BUILTIN (SWAP,   1, uintQI_ftype_uintQI,        rotlqi3_4, NULL, attr_const)
+DEF_BUILTIN (FMUL,   2, uintHI_ftype_uintQI_uintQI, fmul, NULL, attr_const)
+DEF_BUILTIN (FMULS,  2, intHI_ftype_intQI_intQI,    fmuls, NULL, attr_const)
+DEF_BUILTIN (FMULSU, 2, intHI_ftype_intQI_uintQI,   fmulsu, NULL, attr_const)
 
 /* More complex stuff that cannot be mapped 1:1 to an instruction.  */
 
-DEF_BUILTIN (DELAY_CYCLES, -1, void_ftype_uintSI, nothing, NULL)
-DEF_BUILTIN (NOPS,         -1, void_ftype_uintSI, nothing, NULL)
-DEF_BUILTIN (MASK1,       2, uintQI_ftype_uintQI_uintQI, gen_mask1, NULL)
-DEF_BUILTIN (INSERT_BITS, 3, uintQI_ftype_uintSI_uintQI_uintQI, insert_bits, NULL)
-DEF_BUILTIN (FLASH_SEGMENT, 1, intQI_ftype_const_memx_ptr, flash_segment, NULL)
+DEF_BUILTIN (DELAY_CYCLES, -1, void_ftype_uintSI, nothing, NULL, NULL_TREE)
+DEF_BUILTIN (NOPS,         -1, void_ftype_uintSI, nothing, NULL, NULL_TREE)
+DEF_BUILTIN (MASK1,       2, uintQI_ftype_uintQI_uintQI, gen_mask1, NULL, attr_const)
+DEF_BUILTIN (INSERT_BITS, 3, uintQI_ftype_uintSI_uintQI_uintQI, insert_bits, NULL, attr_const)
+
+/* All following built-ins are C only, see avr.cc::avr_builtin_C_only_p()
+ * since they are using named address-spaces or fixed-point types, none
+ * of which are supported for C++.  */
+
+DEF_BUILTIN (FLASH_SEGMENT, 1, intQI_ftype_const_memx_ptr, flash_segment, NULL, attr_const)
+
+/* strlen for ASes so that __builtin_constant_p can be used wthout raising
+   a diagnostic from -Waddr-space-convert in some AVR-LibC headers.  */
+DEF_BUILTIN (STRLEN_FLASH,  1, strlen_flash_node,  nothing, "__strlen_P", attr_const) // AVR-LibC
+DEF_BUILTIN (STRLEN_FLASHX, 1, strlen_flashx_node, nothing, "strlen_PF",  attr_const) // AVR-LibC
+DEF_BUILTIN (STRLEN_MEMX,   1, strlen_memx_node,   nothing, "__strlen_memx", NULL_TREE)
 
 /* ISO/IEC TR 18037 "Embedded C"
    The following builtins are undocumented and used by stdfix.h.  */
@@ -63,109 +78,109 @@ DEF_BUILTIN (FLASH_SEGMENT, 1, intQI_ftype_const_memx_ptr, flash_segment, NULL)
 
 /* 7.18a.6.2 The fixed-point absolute value functions. */
 
-DEF_BUILTIN (ABSHR,   1, hr_ftype_hr,   ssabsqq2, "__ssabs_1")
-DEF_BUILTIN (ABSR,    1, nr_ftype_nr,   ssabshq2, "__ssabs_2")
-DEF_BUILTIN (ABSLR,   1, lr_ftype_lr,   ssabssq2, "__ssabs_4")
-DEF_BUILTIN (ABSLLR, -1, llr_ftype_llr, nothing,  "__ssabsdq2") // GCC extension
+DEF_BUILTIN (ABSHR,   1, hr_ftype_hr,   ssabsqq2, "__ssabs_1", attr_const)
+DEF_BUILTIN (ABSR,    1, nr_ftype_nr,   ssabshq2, "__ssabs_2", attr_const)
+DEF_BUILTIN (ABSLR,   1, lr_ftype_lr,   ssabssq2, "__ssabs_4", attr_const)
+DEF_BUILTIN (ABSLLR, -1, llr_ftype_llr, nothing,  "__ssabsdq2", attr_const) // GCC extension
 
-DEF_BUILTIN (ABSHK,   1, hk_ftype_hk,   ssabsha2, "__ssabs_2")
-DEF_BUILTIN (ABSK,    1, nk_ftype_nk,   ssabssa2, "__ssabs_4")
-DEF_BUILTIN (ABSLK,  -1, lk_ftype_lk,   nothing,  "__ssabsda2")
-DEF_BUILTIN (ABSLLK, -1, llk_ftype_llk, nothing,  "__ssabsta2") // GCC extension
+DEF_BUILTIN (ABSHK,   1, hk_ftype_hk,   ssabsha2, "__ssabs_2", attr_const)
+DEF_BUILTIN (ABSK,    1, nk_ftype_nk,   ssabssa2, "__ssabs_4", attr_const)
+DEF_BUILTIN (ABSLK,  -1, lk_ftype_lk,   nothing,  "__ssabsda2", attr_const)
+DEF_BUILTIN (ABSLLK, -1, llk_ftype_llk, nothing,  "__ssabsta2", attr_const) // GCC extension
 
 /* 7.18a.6.3 The fixed-point round functions. */
 
-DEF_BUILTIN (ROUNDHR,    2, hr_ftype_hr_int,     roundqq3,  "__roundhr")
-DEF_BUILTIN (ROUNDR,     2, nr_ftype_nr_int,     roundhq3,  "__roundr")
-DEF_BUILTIN (ROUNDLR,    2, lr_ftype_lr_int,     roundsq3,  "__roundlr")
-DEF_BUILTIN (ROUNDLLR,  -1, llr_ftype_llr_int,   nothing,   "__rounddq3") // GCC extension
+DEF_BUILTIN (ROUNDHR,    2, hr_ftype_hr_int,     roundqq3,  "__roundhr", attr_const)
+DEF_BUILTIN (ROUNDR,     2, nr_ftype_nr_int,     roundhq3,  "__roundr", attr_const)
+DEF_BUILTIN (ROUNDLR,    2, lr_ftype_lr_int,     roundsq3,  "__roundlr", attr_const)
+DEF_BUILTIN (ROUNDLLR,  -1, llr_ftype_llr_int,   nothing,   "__rounddq3", attr_const) // GCC extension
 
-DEF_BUILTIN (ROUNDUHR,   2, uhr_ftype_uhr_int,   rounduqq3, "__rounduhr")
-DEF_BUILTIN (ROUNDUR,    2, unr_ftype_unr_int,   rounduhq3, "__roundur")
-DEF_BUILTIN (ROUNDULR,   2, ulr_ftype_ulr_int,   roundusq3, "__roundulr")
-DEF_BUILTIN (ROUNDULLR, -1, ullr_ftype_ullr_int, nothing,   "__roundudq3") // GCC extension
+DEF_BUILTIN (ROUNDUHR,   2, uhr_ftype_uhr_int,   rounduqq3, "__rounduhr", attr_const)
+DEF_BUILTIN (ROUNDUR,    2, unr_ftype_unr_int,   rounduhq3, "__roundur", attr_const)
+DEF_BUILTIN (ROUNDULR,   2, ulr_ftype_ulr_int,   roundusq3, "__roundulr", attr_const)
+DEF_BUILTIN (ROUNDULLR, -1, ullr_ftype_ullr_int, nothing,   "__roundudq3", attr_const) // GCC extension
 
-DEF_BUILTIN (ROUNDHK,    2, hk_ftype_hk_int,     roundha3,  "__roundhk")
-DEF_BUILTIN (ROUNDK,     2, nk_ftype_nk_int,     roundsa3,  "__roundk")
-DEF_BUILTIN (ROUNDLK,   -1, lk_ftype_lk_int,     nothing,   "__roundda3")
-DEF_BUILTIN (ROUNDLLK,  -1, llk_ftype_llk_int,   nothing,   "__roundta3") // GCC extension
+DEF_BUILTIN (ROUNDHK,    2, hk_ftype_hk_int,     roundha3,  "__roundhk", attr_const)
+DEF_BUILTIN (ROUNDK,     2, nk_ftype_nk_int,     roundsa3,  "__roundk", attr_const)
+DEF_BUILTIN (ROUNDLK,   -1, lk_ftype_lk_int,     nothing,   "__roundda3", attr_const)
+DEF_BUILTIN (ROUNDLLK,  -1, llk_ftype_llk_int,   nothing,   "__roundta3", attr_const) // GCC extension
 
-DEF_BUILTIN (ROUNDUHK,   2, uhk_ftype_uhk_int,   rounduha3, "__rounduhk")
-DEF_BUILTIN (ROUNDUK,    2, unk_ftype_unk_int,   roundusa3, "__rounduk")
-DEF_BUILTIN (ROUNDULK,  -1, ulk_ftype_ulk_int,   nothing,   "__rounduda3")
-DEF_BUILTIN (ROUNDULLK, -1, ullk_ftype_ullk_int, nothing,   "__rounduta3") // GCC extension
+DEF_BUILTIN (ROUNDUHK,   2, uhk_ftype_uhk_int,   rounduha3, "__rounduhk", attr_const)
+DEF_BUILTIN (ROUNDUK,    2, unk_ftype_unk_int,   roundusa3, "__rounduk", attr_const)
+DEF_BUILTIN (ROUNDULK,  -1, ulk_ftype_ulk_int,   nothing,   "__rounduda3", attr_const)
+DEF_BUILTIN (ROUNDULLK, -1, ullk_ftype_ullk_int, nothing,   "__rounduta3", attr_const) // GCC extension
 
 /* 7.18a.6.4 The fixed-point bit countls functions. */
 
-DEF_BUILTIN (COUNTLSHR,   -1, int_ftype_hr,   nothing, "__countlsqi2")
-DEF_BUILTIN (COUNTLSR,    -1, int_ftype_nr,   nothing, "__countlshi2")
-DEF_BUILTIN (COUNTLSLR,   -1, int_ftype_lr,   nothing, "__countlssi2")
-DEF_BUILTIN (COUNTLSLLR,  -1, int_ftype_llr,  nothing, "__countlsdi2") // GCC extension
+DEF_BUILTIN (COUNTLSHR,   -1, int_ftype_hr,   nothing, "__countlsqi2", attr_const)
+DEF_BUILTIN (COUNTLSR,    -1, int_ftype_nr,   nothing, "__countlshi2", attr_const)
+DEF_BUILTIN (COUNTLSLR,   -1, int_ftype_lr,   nothing, "__countlssi2", attr_const)
+DEF_BUILTIN (COUNTLSLLR,  -1, int_ftype_llr,  nothing, "__countlsdi2", attr_const) // GCC extension
 
-DEF_BUILTIN (COUNTLSUHR,  -1, int_ftype_uhr,  nothing, "__countlsuqi2")
-DEF_BUILTIN (COUNTLSUR,   -1, int_ftype_unr,  nothing, "__countlsuhi2")
-DEF_BUILTIN (COUNTLSULR,  -1, int_ftype_ulr,  nothing, "__countlsusi2")
-DEF_BUILTIN (COUNTLSULLR, -1, int_ftype_ullr, nothing, "__countlsudi2") // GCC extension
+DEF_BUILTIN (COUNTLSUHR,  -1, int_ftype_uhr,  nothing, "__countlsuqi2", attr_const)
+DEF_BUILTIN (COUNTLSUR,   -1, int_ftype_unr,  nothing, "__countlsuhi2", attr_const)
+DEF_BUILTIN (COUNTLSULR,  -1, int_ftype_ulr,  nothing, "__countlsusi2", attr_const)
+DEF_BUILTIN (COUNTLSULLR, -1, int_ftype_ullr, nothing, "__countlsudi2", attr_const) // GCC extension
 
-DEF_BUILTIN (COUNTLSHK,   -1, int_ftype_hk,   nothing, "__countlshi2")
-DEF_BUILTIN (COUNTLSK,    -1, int_ftype_nk,   nothing, "__countlssi2")
-DEF_BUILTIN (COUNTLSLK,   -1, int_ftype_lk,   nothing, "__countlsdi2")
-DEF_BUILTIN (COUNTLSLLK,  -1, int_ftype_llk,  nothing, "__countlsdi2") // GCC extension
+DEF_BUILTIN (COUNTLSHK,   -1, int_ftype_hk,   nothing, "__countlshi2", attr_const)
+DEF_BUILTIN (COUNTLSK,    -1, int_ftype_nk,   nothing, "__countlssi2", attr_const)
+DEF_BUILTIN (COUNTLSLK,   -1, int_ftype_lk,   nothing, "__countlsdi2", attr_const)
+DEF_BUILTIN (COUNTLSLLK,  -1, int_ftype_llk,  nothing, "__countlsdi2", attr_const) // GCC extension
 
-DEF_BUILTIN (COUNTLSUHK,  -1, int_ftype_uhk,  nothing, "__countlsuhi2")
-DEF_BUILTIN (COUNTLSUK,   -1, int_ftype_unk,  nothing, "__countlsusi2")
-DEF_BUILTIN (COUNTLSULK,  -1, int_ftype_ulk,  nothing, "__countlsudi2")
-DEF_BUILTIN (COUNTLSULLK, -1, int_ftype_ullk, nothing, "__countlsudi2") // GCC extension
+DEF_BUILTIN (COUNTLSUHK,  -1, int_ftype_uhk,  nothing, "__countlsuhi2", attr_const)
+DEF_BUILTIN (COUNTLSUK,   -1, int_ftype_unk,  nothing, "__countlsusi2", attr_const)
+DEF_BUILTIN (COUNTLSULK,  -1, int_ftype_ulk,  nothing, "__countlsudi2", attr_const)
+DEF_BUILTIN (COUNTLSULLK, -1, int_ftype_ullk, nothing, "__countlsudi2", attr_const) // GCC extension
 
 /* 7.18a.6.5 The bitwise fixed-point to integer conversion functions. */
 
-DEF_BUILTIN (BITSHR,   -1,   inthr_ftype_hr,   nothing, "__ret")
-DEF_BUILTIN (BITSR,    -1,   intnr_ftype_nr,   nothing, "__ret")
-DEF_BUILTIN (BITSLR,   -1,   intlr_ftype_lr,   nothing, "__ret")
-DEF_BUILTIN (BITSLLR,  -1,  intllr_ftype_llr,  nothing, "__ret") // GCC extension
+DEF_BUILTIN (BITSHR,   -1,   inthr_ftype_hr,   nothing, "__ret", attr_const)
+DEF_BUILTIN (BITSR,    -1,   intnr_ftype_nr,   nothing, "__ret", attr_const)
+DEF_BUILTIN (BITSLR,   -1,   intlr_ftype_lr,   nothing, "__ret", attr_const)
+DEF_BUILTIN (BITSLLR,  -1,  intllr_ftype_llr,  nothing, "__ret", attr_const) // GCC extension
 
-DEF_BUILTIN (BITSUHR,  -1,  intuhr_ftype_uhr,  nothing, "__ret")
-DEF_BUILTIN (BITSUR,   -1,  intunr_ftype_unr,  nothing, "__ret")
-DEF_BUILTIN (BITSULR,  -1,  intulr_ftype_ulr,  nothing, "__ret")
-DEF_BUILTIN (BITSULLR, -1, intullr_ftype_ullr, nothing, "__ret") // GCC extension
+DEF_BUILTIN (BITSUHR,  -1,  intuhr_ftype_uhr,  nothing, "__ret", attr_const)
+DEF_BUILTIN (BITSUR,   -1,  intunr_ftype_unr,  nothing, "__ret", attr_const)
+DEF_BUILTIN (BITSULR,  -1,  intulr_ftype_ulr,  nothing, "__ret", attr_const)
+DEF_BUILTIN (BITSULLR, -1, intullr_ftype_ullr, nothing, "__ret", attr_const) // GCC extension
 
-DEF_BUILTIN (BITSHK,   -1,   inthk_ftype_hk,   nothing, "__ret")
-DEF_BUILTIN (BITSK,    -1,   intnk_ftype_nk,   nothing, "__ret")
-DEF_BUILTIN (BITSLK,   -1,   intlk_ftype_lk,   nothing, "__ret")
-DEF_BUILTIN (BITSLLK,  -1,  intllk_ftype_llk,  nothing, "__ret") // GCC extension
+DEF_BUILTIN (BITSHK,   -1,   inthk_ftype_hk,   nothing, "__ret", attr_const)
+DEF_BUILTIN (BITSK,    -1,   intnk_ftype_nk,   nothing, "__ret", attr_const)
+DEF_BUILTIN (BITSLK,   -1,   intlk_ftype_lk,   nothing, "__ret", attr_const)
+DEF_BUILTIN (BITSLLK,  -1,  intllk_ftype_llk,  nothing, "__ret", attr_const) // GCC extension
 
-DEF_BUILTIN (BITSUHK,  -1,  intuhk_ftype_uhk,  nothing, "__ret")
-DEF_BUILTIN (BITSUK,   -1,  intunk_ftype_unk,  nothing, "__ret")
-DEF_BUILTIN (BITSULK,  -1,  intulk_ftype_ulk,  nothing, "__ret")
-DEF_BUILTIN (BITSULLK, -1, intullk_ftype_ullk, nothing, "__ret") // GCC extension
+DEF_BUILTIN (BITSUHK,  -1,  intuhk_ftype_uhk,  nothing, "__ret", attr_const)
+DEF_BUILTIN (BITSUK,   -1,  intunk_ftype_unk,  nothing, "__ret", attr_const)
+DEF_BUILTIN (BITSULK,  -1,  intulk_ftype_ulk,  nothing, "__ret", attr_const)
+DEF_BUILTIN (BITSULLK, -1, intullk_ftype_ullk, nothing, "__ret", attr_const) // GCC extension
 
 
 /* 7.18a.6.6 The bitwise integer to fixed-point conversion functions. */
 
-DEF_BUILTIN (  HRBITS, -1,   hr_ftype_inthr,   nothing, "__ret")
-DEF_BUILTIN (   RBITS, -1,   nr_ftype_intnr,   nothing, "__ret")
-DEF_BUILTIN (  LRBITS, -1,   lr_ftype_intlr,   nothing, "__ret")
-DEF_BUILTIN ( LLRBITS, -1,  llr_ftype_intllr,  nothing, "__ret") // GCC extension
+DEF_BUILTIN (  HRBITS, -1,   hr_ftype_inthr,   nothing, "__ret", attr_const)
+DEF_BUILTIN (   RBITS, -1,   nr_ftype_intnr,   nothing, "__ret", attr_const)
+DEF_BUILTIN (  LRBITS, -1,   lr_ftype_intlr,   nothing, "__ret", attr_const)
+DEF_BUILTIN ( LLRBITS, -1,  llr_ftype_intllr,  nothing, "__ret", attr_const) // GCC extension
 
-DEF_BUILTIN ( UHRBITS, -1,  uhr_ftype_intuhr,  nothing, "__ret")
-DEF_BUILTIN (  URBITS, -1,  unr_ftype_intunr,  nothing, "__ret")
-DEF_BUILTIN ( ULRBITS, -1,  ulr_ftype_intulr,  nothing, "__ret")
-DEF_BUILTIN (ULLRBITS, -1, ullr_ftype_intullr, nothing, "__ret") // GCC extension
+DEF_BUILTIN ( UHRBITS, -1,  uhr_ftype_intuhr,  nothing, "__ret", attr_const)
+DEF_BUILTIN (  URBITS, -1,  unr_ftype_intunr,  nothing, "__ret", attr_const)
+DEF_BUILTIN ( ULRBITS, -1,  ulr_ftype_intulr,  nothing, "__ret", attr_const)
+DEF_BUILTIN (ULLRBITS, -1, ullr_ftype_intullr, nothing, "__ret", attr_const) // GCC extension
 
-DEF_BUILTIN (  HKBITS, -1,   hk_ftype_inthk,   nothing, "__ret")
-DEF_BUILTIN (   KBITS, -1,   nk_ftype_intnk,   nothing, "__ret")
-DEF_BUILTIN (  LKBITS, -1,   lk_ftype_intlk,   nothing, "__ret")
-DEF_BUILTIN ( LLKBITS, -1,  llk_ftype_intllk,  nothing, "__ret") // GCC extension
+DEF_BUILTIN (  HKBITS, -1,   hk_ftype_inthk,   nothing, "__ret", attr_const)
+DEF_BUILTIN (   KBITS, -1,   nk_ftype_intnk,   nothing, "__ret", attr_const)
+DEF_BUILTIN (  LKBITS, -1,   lk_ftype_intlk,   nothing, "__ret", attr_const)
+DEF_BUILTIN ( LLKBITS, -1,  llk_ftype_intllk,  nothing, "__ret", attr_const) // GCC extension
 
-DEF_BUILTIN ( UHKBITS, -1,  uhk_ftype_intuhk,  nothing, "__ret")
-DEF_BUILTIN (  UKBITS, -1,  unk_ftype_intunk,  nothing, "__ret")
-DEF_BUILTIN ( ULKBITS, -1,  ulk_ftype_intulk,  nothing, "__ret")
-DEF_BUILTIN (ULLKBITS, -1, ullk_ftype_intullk, nothing, "__ret") // GCC extension
+DEF_BUILTIN ( UHKBITS, -1,  uhk_ftype_intuhk,  nothing, "__ret", attr_const)
+DEF_BUILTIN (  UKBITS, -1,  unk_ftype_intunk,  nothing, "__ret", attr_const)
+DEF_BUILTIN ( ULKBITS, -1,  ulk_ftype_intulk,  nothing, "__ret", attr_const)
+DEF_BUILTIN (ULLKBITS, -1, ullk_ftype_intullk, nothing, "__ret", attr_const) // GCC extension
 
 /* Overloaded */
 
 /* 7.18a.6.7  Type-generic fixed-point functions. */
 
-DEF_BUILTIN (ABSFX,     -1, void_ftype_void /* dummy */, nothing, NULL)
-DEF_BUILTIN (ROUNDFX,   -1, void_ftype_void /* dummy */, nothing, NULL)
-DEF_BUILTIN (COUNTLSFX, -1, void_ftype_void /* dummy */, nothing, NULL)
+DEF_BUILTIN (ABSFX,     -1, void_ftype_void /* dummy */, nothing, NULL, attr_const)
+DEF_BUILTIN (ROUNDFX,   -1, void_ftype_void /* dummy */, nothing, NULL, attr_const)
+DEF_BUILTIN (COUNTLSFX, -1, void_ftype_void /* dummy */, nothing, NULL, attr_const)
diff --git a/gcc/config/avr/constraints.md b/gcc/config/avr/constraints.md
index 963e23a..2ca9cc3 100644
--- a/gcc/config/avr/constraints.md
+++ b/gcc/config/avr/constraints.md
@@ -1,5 +1,5 @@
-;; Constraint definitions for ATMEL AVR micro controllers.
-;; Copyright (C) 2006-2024 Free Software Foundation, Inc.
+;; Insn constraint definitions for AVR 8-bit microcontrollers.
+;; Copyright (C) 2006-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
@@ -133,16 +133,36 @@
   (and (match_code "const_int")
        (match_test "ival == 7")))
 
+(define_constraint "C7c"
+  "Constant integer the range 7 @dots{} 12."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 7, 12)")))
+
+(define_constraint "C14"
+  "Constant integer 14."
+  (and (match_code "const_int")
+       (match_test "ival == 14")))
+
 (define_constraint "C15"
   "Constant integer 15."
   (and (match_code "const_int")
        (match_test "ival == 15")))
 
+(define_constraint "C22"
+  "Constant integer 22."
+  (and (match_code "const_int")
+       (match_test "ival == 22")))
+
 (define_constraint "C23"
   "Constant integer 23."
   (and (match_code "const_int")
        (match_test "ival == 23")))
 
+(define_constraint "C30"
+  "Constant integer 30."
+  (and (match_code "const_int")
+       (match_test "ival == 30")))
+
 (define_constraint "C31"
   "Constant integer 31."
   (and (match_code "const_int")
@@ -188,6 +208,11 @@
   (and (match_code "const_int")
        (match_test "avr_popcount_each_byte (op, 4, (1<<0) | (1<<1) | (1<<8))")))
 
+(define_constraint "Cb1"
+  "Constant 1-byte integer that has exactly 1 bit set."
+  (and (match_code "const_int")
+       (match_test "single_one_operand (op, QImode)")))
+
 (define_constraint "Cb2"
   "Constant 2-byte integer that has exactly 1 bit set."
   (and (match_code "const_int")
@@ -258,6 +283,61 @@
   (and (match_code "const_int,symbol_ref,const")
        (match_test "const_0mod256_operand (op, HImode)")))
 
+(define_constraint "C2a"
+  "A constant integer shift offset for a 2-byte ASHIFTRT that's opt to being split."
+  (and (match_code "const_int")
+       (match_test "avr_split_shift_p (2, ival, ASHIFTRT)")))
+
+(define_constraint "C2r"
+  "A constant integer shift offset for a 2-byte LSHIFTRT that's opt to being split."
+  (and (match_code "const_int")
+       (match_test "avr_split_shift_p (2, ival, LSHIFTRT)")))
+
+(define_constraint "C2l"
+  "A constant integer shift offset for a 2-byte ASHIFT that's opt to being split."
+  (and (match_code "const_int")
+       (match_test "avr_split_shift_p (2, ival, ASHIFT)")))
+
+(define_constraint "C3a"
+  "A constant integer shift offset for a 3-byte ASHIFTRT that's opt to being split."
+  (and (match_code "const_int")
+       (match_test "avr_split_shift_p (3, ival, ASHIFTRT)")))
+
+(define_constraint "C3r"
+  "A constant integer shift offset for a 3-byte LSHIFTRT that's opt to being split."
+  (and (match_code "const_int")
+       (match_test "avr_split_shift_p (3, ival, LSHIFTRT)")))
+
+(define_constraint "C3l"
+  "A constant integer shift offset for a 3-byte ASHIFT that's opt to being split."
+  (and (match_code "const_int")
+       (match_test "avr_split_shift_p (3, ival, ASHIFT)")))
+
+(define_constraint "C4a"
+  "A constant integer shift offset for a 4-byte ASHIFTRT that's opt to being split."
+  (and (match_code "const_int")
+       (match_test "avr_split_shift_p (4, ival, ASHIFTRT)")))
+
+(define_constraint "C4r"
+  "A constant integer shift offset for a 4-byte LSHIFTRT that's opt to being split."
+  (and (match_code "const_int")
+       (match_test "avr_split_shift_p (4, ival, LSHIFTRT)")))
+
+(define_constraint "C4l"
+  "A constant integer shift offset for a 4-byte ASHIFT that's opt to being split."
+  (and (match_code "const_int")
+       (match_test "avr_split_shift_p (4, ival, ASHIFT)")))
+
+(define_constraint "C4R"
+  "A constant integer shift offset for a 4-byte LSHIFTRT that's a 3-operand insn independent of options."
+  (and (match_code "const_int")
+       (match_test "ival == 15 || IN_RANGE (ival, 25, 31)")))
+
+(define_constraint "C4L"
+  "A constant integer shift offset for a 4-byte ASHIFT that's a 3-operand insn independent of options."
+  (and (match_code "const_int")
+       (match_test "ival == 15 || IN_RANGE (ival, 25, 31)")))
+
 ;; CONST_FIXED is no element of 'n' so cook our own.
 ;; "i" or "s" would match but because the insn uses iterators that cover
 ;; INT_MODE, "i" or "s" is not always possible.
diff --git a/gcc/config/avr/driver-avr.cc b/gcc/config/avr/driver-avr.cc
index 92e875a..c81ba2b 100644
--- a/gcc/config/avr/driver-avr.cc
+++ b/gcc/config/avr/driver-avr.cc
@@ -1,5 +1,5 @@
-/* Subroutines for the gcc driver.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+/* Subroutines for the gcc driver for AVR 8-bit microcontrollers.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
    Contributed by Georg-Johann Lay <avr@gjlay.de>
 
 This file is part of GCC.
diff --git a/gcc/config/avr/elf.h b/gcc/config/avr/elf.h
index 0112aa3..d240f85 100644
--- a/gcc/config/avr/elf.h
+++ b/gcc/config/avr/elf.h
@@ -1,4 +1,5 @@
-/* Copyright (C) 2011-2024 Free Software Foundation, Inc.
+/* Overrides for elfos.h for AVR 8-bit microcontrollers.
+   Copyright (C) 2011-2025 Free Software Foundation, Inc.
    Contributed by Georg-Johann Lay (avr@gjlay.de)
 
    This file is part of GCC.
@@ -7,12 +8,12 @@
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 3, or (at your option)
    any later version.
-   
+
    GCC is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
-   
+
    You should have received a copy of the GNU General Public License
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
diff --git a/gcc/config/avr/gen-avr-mmcu-specs.cc b/gcc/config/avr/gen-avr-mmcu-specs.cc
index bb94bea..05f2d0e 100644
--- a/gcc/config/avr/gen-avr-mmcu-specs.cc
+++ b/gcc/config/avr/gen-avr-mmcu-specs.cc
@@ -1,4 +1,5 @@
-/* Copyright (C) 1998-2024 Free Software Foundation, Inc.
+/* Build device-specs for AVR 8-bit microcontrollers.
+   Copyright (C) 1998-2025 Free Software Foundation, Inc.
    Contributed by Joern Rennecke
 
    This file is part of GCC.
@@ -7,12 +8,12 @@
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 3, or (at your option)
    any later version.
-   
+
    GCC is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
-   
+
    You should have received a copy of the GNU General Public License
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
@@ -73,7 +74,7 @@ static const char help_copy_paste[] =
   "# for a new device spec file, make sure you are copying from a specs file\n"
   "# for a device from the same or compatible:\n"
   "#     compiler version, compiler vendor, core architecture, SP width,\n"
-  "#     short-calls and FLMAP.\n"
+  "#     short-calls, features like CVT and FLMAP.\n"
   "# Otherwise, errors and wrong or sub-optimal code may likely occur.\n"
   "# See <" WIKI_URL ">\n"
   "# and <" SPECFILE_USAGE_URL "> for a description\n"
@@ -135,7 +136,7 @@ struct McuInfo
   const avr_arch_t *arch;
   bool is_arch, is_device;
   bool flmap, have_flmap2, have_flmap4, have_flmap;
-  bool rodata_in_flash;
+  bool rodata_in_flash, have_cvt;
   // Device name as used by the vendor, extracted from "__AVR_<Name>__".
   char mcu_Name[50] = { 0 };
 
@@ -148,7 +149,8 @@ struct McuInfo
       have_flmap (flmap && (have_flmap2 || have_flmap4)),
       rodata_in_flash (arch_id == ARCH_AVRTINY
 		       || (arch_id == ARCH_AVRXMEGA3
-			   && have_avrxmega3_rodata_in_flash))
+			   && have_avrxmega3_rodata_in_flash)),
+      have_cvt (mcu->dev_attribute & AVR_CVT)
   {
     if (is_device)
       snprintf (mcu_Name, 1 + strlen (mcu->macro) - strlen ("__AVR_" "__"),
@@ -258,8 +260,9 @@ print_mcu (const avr_mcu_t *mcu, const McuInfo &mi)
   if (mi.is_arch)
     fprintf (f, "core architecture %s\n", mi.arch->name);
   else
-    fprintf (f, "device %s (core %s, %d-bit SP%s%s)\n", mi.mcu_Name,
+    fprintf (f, "device %s (core %s, %d-bit SP%s%s%s)\n", mi.mcu_Name,
 	     mi.arch->name, sp8 ? 8 : 16, rcall ? ", short-calls" : "",
+	     mi.have_cvt ? ", CVT" : "",
 	     mi.have_flmap ? ", FLMAP" : "");
   fprintf (f, "%s\n", header);
 
@@ -272,7 +275,12 @@ print_mcu (const avr_mcu_t *mcu, const McuInfo &mi)
   if (mi.is_device)
     {
       fprintf (f, "*avrlibc_startfile:\n");
-      fprintf (f, "\tcrt%s.o%%s", mcu->name);
+      fprintf (f, "\t%%{!mcvt:crt%s.o%%s}", mcu->name);
+      if (mi.have_cvt)
+	fprintf (f, " %%{mcvt:crt%s-cvt.o%%s}", mcu->name);
+      else
+	fprintf (f, " %%{mcvt:%%e%s does not support a compact vector"
+		 " table (-mcvt)}", mi.mcu_Name);
       fprintf (f, "\n\n");
 
       fprintf (f, "*avrlibc_devicelib:\n");
diff --git a/gcc/config/avr/gen-avr-mmcu-texi.cc b/gcc/config/avr/gen-avr-mmcu-texi.cc
index 70aa430..93cf31f 100644
--- a/gcc/config/avr/gen-avr-mmcu-texi.cc
+++ b/gcc/config/avr/gen-avr-mmcu-texi.cc
@@ -1,4 +1,5 @@
-/* Copyright (C) 2012-2024 Free Software Foundation, Inc.
+/* Build texi documentation for option -mmcu for AVR 8-bit microcontrollers.
+   Copyright (C) 2012-2025 Free Software Foundation, Inc.
    Contributed by Georg-Johann Lay (avr@gjlay.de)
 
    This file is part of GCC.
@@ -7,12 +8,12 @@
    it under the terms of the GNU General Public License as published by
    the Free Software Foundation; either version 3, or (at your option)
    any later version.
-   
+
    GCC is distributed in the hope that it will be useful,
    but WITHOUT ANY WARRANTY; without even the implied warranty of
    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
    GNU General Public License for more details.
-   
+
    You should have received a copy of the GNU General Public License
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
@@ -162,7 +163,7 @@ int main (void)
   size_t i, n_mcus = 0;
   const avr_mcu_t *mcu;
 
-  printf ("@c Copyright (C) 2012-2024 Free Software Foundation, Inc.\n");
+  printf ("@c Copyright (C) 2012-2025 Free Software Foundation, Inc.\n");
   printf ("@c This is part of the GCC manual.\n");
   printf ("@c For copying conditions, see the file "
 	  "gcc/doc/include/fdl.texi.\n\n");
diff --git a/gcc/config/avr/genmultilib.awk b/gcc/config/avr/genmultilib.awk
index 8ba0dd9..e824be6 100644
--- a/gcc/config/avr/genmultilib.awk
+++ b/gcc/config/avr/genmultilib.awk
@@ -1,4 +1,4 @@
-# Copyright (C) 2011-2024 Free Software Foundation, Inc.
+# Copyright (C) 2011-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
@@ -31,6 +31,7 @@ BEGIN {
     FS ="[(, \t]+"
     option[""] = ""
     comment = 1
+    cols_expected = -1
 
     dir_tiny = "tiny-stack"
     opt_tiny = "msp8"
@@ -136,10 +137,36 @@ BEGIN {
 ##################################################################
 
 /^AVR_MCU/ {
-    name = $2
-    gsub ("\"", "", name)
+    line = $0
+    gsub (/\(/, ",", line)
+    gsub (/[ \t")]/, "", line)
+    n_cols = split (line, col, ",")
+
+    # Now we have col[] something like:
+    # col[1] = AVR_MCU
+    # col[2] = avr2              # Device name
+    # col[3] = ARCH_AVR2         # Core
+    # col[4] = AVR_ERRATA_SKIP   # Device Attributes
+    # col[5] = NULL              # Device Macro like __AVR_ATtiny22__
+    # col[6] = 0x0060            # Tdata
+    # col[7] = 0x0               # Ttext
+    # col[8] = 0x60000           # Flash Size
+    # col[9] = 0                 # PM Offset
+
+    # Sanity check the number of columns.
+    if (cols_expected == -1)
+	cols_expected = n_cols
+    else if (n_cols != cols_expected)
+    {
+	msg = "genmultilib.awk: error: wrong number of columns"
+	print msg | "cat 1>&2"
+	print $0 | "cat 1>&2"
+	exit 1
+    }
+
+    name = col[2]
 
-    if ($5 == "NULL")
+    if (col[5] == "NULL")
     {
 	core = name
 
@@ -169,9 +196,9 @@ BEGIN {
     opts = option[core]
 
     # split device specific feature list
-    n = split($4,dev_attribute,"|")
+    n = split (col[4], dev_attribute, "|")
 
-    for (i=1; i <= n; i++)
+    for (i = 1; i <= n; i++)
     {
       if (dev_attribute[i] == "AVR_SHORT_SP")
         opts = opts "/" opt_tiny
diff --git a/gcc/config/avr/predicates.md b/gcc/config/avr/predicates.md
index 5b49481..b154077 100644
--- a/gcc/config/avr/predicates.md
+++ b/gcc/config/avr/predicates.md
@@ -1,5 +1,5 @@
-;; Predicate definitions for ATMEL AVR micro controllers.
-;; Copyright (C) 2006-2024 Free Software Foundation, Inc.
+;; Insn predicate definitions for AVR 8-bit microcontrollers.
+;; Copyright (C) 2006-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
@@ -27,7 +27,7 @@
   (and (match_code "reg")
        (match_test "REGNO (op) >= 16 && REGNO (op) <= 31")))
 
-(define_predicate "scratch_or_d_register_operand"
+(define_predicate "scratch_or_dreg_operand"
   (ior (match_operand 0 "d_register_operand")
        (and (match_code ("scratch"))
             (match_operand 0 "scratch_operand"))))
@@ -89,6 +89,7 @@
 (define_predicate "nox_general_operand"
   (and (match_operand 0 "general_operand")
        (not (match_test "avr_load_libgcc_p (op)"))
+       (not (match_test "avr_mem_flashx_p (op)"))
        (not (match_test "avr_mem_memx_p (op)"))))
 
 ;; Return 1 if OP is the zero constant for MODE.
@@ -147,6 +148,11 @@
   (and (match_code "const_int")
        (match_test "IN_RANGE (INTVAL (op), 2, 7)")))
 
+;; Return true if OP is constant integer 1..3 for MODE.
+(define_predicate "const_1_to_3_operand"
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (INTVAL (op), 1, 3)")))
+
 ;; Return 1 if OP is constant integer 1..6 for MODE.
 (define_predicate "const_1_to_6_operand"
   (and (match_code "const_int")
@@ -162,11 +168,22 @@
   (and (match_code "const_int")
        (match_test "IN_RANGE (INTVAL (op), -255, -1)")))
 
+;; Return true if OP is a CONST_INT in { -2, -1, 1, 2 }.
+(define_predicate "abs1_abs2_operand"
+  (and (match_code "const_int")
+       (match_test "INTVAL (op) != 0")
+       (match_test "IN_RANGE (INTVAL (op), -2, 2)")))
+
 ;; Returns true if OP is either the constant zero or a register.
 (define_predicate "reg_or_0_operand"
   (ior (match_operand 0 "register_operand")
        (match_operand 0 "const0_operand")))
 
+;; Returns true if OP is either the constant zero or an upper register.
+(define_predicate "dreg_or_0_operand"
+  (ior (match_operand 0 "d_register_operand")
+       (match_operand 0 "const0_operand")))
+
 ;; Returns 1 if OP is a SYMBOL_REF.
 (define_predicate "symbol_ref_operand"
   (match_code "symbol_ref"))
@@ -242,10 +259,30 @@
   (and (match_operand 0 "comparison_operator")
        (not (match_code "gt,gtu,le,leu"))))
 
+;; True for EQ, NE, GE, LT, GT, LE
+(define_predicate "signed_comparison_operator"
+  (match_code "eq,ne,ge,lt,gt,le"))
+
 ;; True for SIGN_EXTEND, ZERO_EXTEND.
 (define_predicate "extend_operator"
   (match_code "sign_extend,zero_extend"))
 
+;; True for 8-bit operations that set SREG.N and SREG.Z in a
+;; usable way:
+;; * OP0 is a QImode register, and
+;; * OP1 is a QImode register or CONST_INT, and
+;;
+;; the allowed operations is one of:
+;;
+;; * SHIFTs with a const_int offset in { 1, 2, 3 }.
+;; * MINUS and XOR with a register operand
+;; * IOR and AND with a register operand, or d-reg + const_int
+;; * PLUS with a register operand, or d-reg + const_int,
+;;   or a const_int in { -2, -1, 1, 2 }.  */
+(define_predicate "op8_ZN_operator"
+  (and (match_code "plus,minus,ashift,ashiftrt,lshiftrt,and,ior,xor")
+       (match_test "avr_op8_ZN_operator (op)")))
+
 ;; Return true if OP is a valid call operand.
 (define_predicate "call_insn_operand"
   (and (match_code "mem")
@@ -343,3 +380,7 @@
   (ior (match_code "const_fixed")
        (match_code "const_double")
        (match_operand 0 "immediate_operand")))
+
+(define_predicate "set_some_operation"
+  (and (match_code "parallel")
+       (match_test "avr_set_some_operation (op)")))
diff --git a/gcc/config/avr/ranges.h b/gcc/config/avr/ranges.h
new file mode 100644
index 0000000..f58920c
--- /dev/null
+++ b/gcc/config/avr/ranges.h
@@ -0,0 +1,278 @@
+/* Subsets of a finite interval over Z.
+   Copyright (C) 2024-2025 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* A class that represents the union of finitely many intervals.
+   The domain over which the intervals are defined is a finite integer
+   interval [m_min, m_max], usually the range of some [u]intN_t.
+   Supported operations are:
+   - Complement w.r.t. the domain (invert)
+   - Union (union_)
+   - Intersection (intersect)
+   - Difference / Setminus (minus).
+   Ranges is closed under all operations:  The result of all operations
+   is a Ranges over the same domain.  (As opposed to value-range.h which
+   may ICE for some operations, see below).
+
+   The representation is unique in the sense that when we have two
+   Ranges A and B, then
+   1)  A == B  <==>  A.size == B.size  &&  Ai == Bi for all i.
+
+   The representation is normalized:
+   2)  Ai != {}          ;; There are no empty intervals.
+   3)  Ai.hi < A{i+1}.lo ;; The Ai's are in increasing order and separated
+			 ;; by at least one value (non-adjacent).
+   The sub-intervals Ai are maintained as a std::vector.
+   The computation of union and intersection scales like  A.size * B.size
+   i.e. Ranges is only eligible for GCC when size() has a fixed upper
+   bound independent of the program being compiled (or there are other
+   means to guarantee that the complexity is linearistic).
+   In the context of AVR, we have size() <= 3.
+
+   The reason why we don't use value-range.h's irange or int_range is that
+   these use the integers Z as their domain, which makes computations like
+   invert() quite nasty as they may ICE for common cases.  Doing all
+   these special cases (like one sub-interval touches the domain bounds)
+   makes using value-range.h more laborious (and instable) than using our
+   own mini Ranger.  */
+
+struct Ranges
+{
+  // This is good enough as it covers (un)signed SImode.
+  using T = HOST_WIDE_INT;
+  typedef T scalar_type;
+
+  // Non-empty ranges.  Empty sets are only used transiently;
+  // Ranges.ranges[] doesn't use them.
+  struct SubRange
+  {
+    // Lower and upper bound, inclusively.
+    T lo, hi;
+
+    SubRange intersect (const SubRange &r) const
+    {
+      if (lo >= r.lo && hi <= r.hi)
+	return *this;
+      else if (r.lo >= lo && r.hi <= hi)
+	return r;
+      else if (lo > r.hi || hi < r.lo)
+	return SubRange { 1, 0 };
+      else
+	return SubRange { std::max (lo, r.lo), std::min (hi, r.hi) };
+    }
+
+    T cardinality () const
+    {
+      return std::max<T> (0, hi - lo + 1);
+    }
+  };
+
+  // Finitely many intervals over [m_min, m_max] that are normalized:
+  // No empty sets, increasing order, separated by at least one value.
+  T m_min, m_max;
+  std::vector<SubRange> ranges;
+
+  // Not used anywhere in Ranges; can be used elsewhere.
+  // May be clobbered by set operations.
+  int tag = -1;
+
+  enum initial_range { EMPTY, ALL };
+
+  Ranges (T mi, T ma, initial_range ir)
+    : m_min (mi), m_max (ma)
+  {
+    if (ir == ALL)
+      push (mi, ma);
+  }
+
+  // Domain is the range of some [u]intN_t.
+  static Ranges NBitsRanges (int n_bits, bool unsigned_p, initial_range ir)
+  {
+    T mask = ((T) 1 << n_bits) - 1;
+    gcc_assert (mask > 0);
+    T ma = mask >> ! unsigned_p;
+    return Ranges (unsigned_p ? 0 : -ma - 1, ma, ir);
+  }
+
+  static void sort2 (Ranges &a, Ranges &b)
+  {
+    if (a.size () && b.size ())
+      if (a.ranges[0].lo > b.ranges[0].lo)
+	std::swap (a, b);
+  }
+
+  void print (FILE *file) const
+  {
+    if (file)
+      {
+	fprintf (file, " .tag%d=#%d={", tag, size ());
+	for (const auto &r : ranges)
+	  fprintf (file, "[ %ld, %ld ]", (long) r.lo, (long) r.hi);
+	fprintf (file, "}\n");
+      }
+  }
+
+  // The number of sub-intervals in .ranges.
+  int size () const
+  {
+    return (int) ranges.size ();
+  }
+
+  // Append [LO, HI] & [m_min, m_max] to .ranges provided the
+  // former is non-empty.
+  void push (T lo, T hi)
+  {
+    lo = std::max (lo, m_min);
+    hi = std::min (hi, m_max);
+
+    if (lo <= hi)
+      ranges.push_back (SubRange { lo, hi });
+  }
+
+  // Append R to .ranges provided the former is non-empty.
+  void push (const SubRange &r)
+  {
+    push (r.lo, r.hi);
+  }
+
+  // Cardinality of the n-th interval.
+  T cardinality (int n) const
+  {
+    return n < size () ? ranges[n].cardinality () : 0;
+  }
+
+  // Check that *this is normalized: .ranges are non-empty, non-overlapping,
+  // non-adjacent and increasing.
+  bool check () const
+  {
+    bool bad = size () && (ranges[0].lo < m_min
+			   || ranges[size () - 1].hi > m_max);
+
+    for (int n = 0; n < size (); ++n)
+      {
+	bad |= ranges[n].lo > ranges[n].hi;
+	bad |= n > 0 && ranges[n - 1].hi >= ranges[n].lo;
+      }
+
+    if (bad)
+      print (dump_file);
+
+    return ! bad;
+  }
+
+  // Intersect A and B according to  (U Ai) & (U Bj) = U (Ai & Bj)
+  // This has quadratic complexity, but also the nice property that
+  // when A and B are normalized, then the result is too.
+  void intersect (const Ranges &r)
+  {
+    gcc_assert (m_min == r.m_min && m_max == r.m_max);
+
+    if (this == &r)
+      return;
+
+    std::vector<SubRange> rs;
+    std::swap (rs, ranges);
+
+    for (const auto &a : rs)
+      for (const auto &b : r.ranges)
+	push (a.intersect (b));
+  }
+
+  // Complement w.r.t. the domain [m_min, m_max].
+  void invert ()
+  {
+    std::vector<SubRange> rs;
+    std::swap (rs, ranges);
+
+    if (rs.size () == 0)
+	push (m_min, m_max);
+    else
+      {
+	push (m_min, rs[0].lo - 1);
+
+	for (size_t n = 1; n < rs.size (); ++n)
+	  push (rs[n - 1].hi + 1, rs[n].lo - 1);
+
+	push (rs[rs.size () - 1].hi + 1, m_max);
+      }
+  }
+
+  // Set-minus.
+  void minus (const Ranges &r)
+  {
+    gcc_assert (m_min == r.m_min && m_max == r.m_max);
+
+    Ranges sub = r;
+    sub.invert ();
+    intersect (sub);
+  }
+
+  // Union of sets.  Not needed in avr.cc but added for completeness.
+  // DeMorgan this for simplicity.
+  void union_ (const Ranges &r)
+  {
+    gcc_assert (m_min == r.m_min && m_max == r.m_max);
+
+    if (this != &r)
+      {
+	invert ();
+	minus (r);
+	invert ();
+      }
+  }
+
+  // Get the truth Ranges for  x <cmp> val.  For example,
+  // LT 3  will return  [m_min, 2].
+  Ranges truth (rtx_code cmp, T val, bool strict = true)
+  {
+    if (strict)
+      {
+	if (avr_strict_signed_p (cmp))
+	  gcc_assert (m_min == -m_max - 1);
+	else if (avr_strict_unsigned_p (cmp))
+	  gcc_assert (m_min == 0);
+
+	gcc_assert (IN_RANGE (val, m_min, m_max));
+      }
+
+    bool rev = cmp == NE || cmp == LTU || cmp == LT || cmp == GTU || cmp == GT;
+    if (rev)
+      cmp = reverse_condition (cmp);
+
+    T lo = m_min;
+    T hi = m_max;
+
+    if (cmp == EQ)
+      lo = hi = val;
+    else if (cmp == LEU || cmp == LE)
+      hi = val;
+    else if (cmp == GEU || cmp == GE)
+      lo = val;
+    else
+      gcc_unreachable ();
+
+    Ranges rs (m_min, m_max, Ranges::EMPTY);
+    rs.push (lo, hi);
+
+    if (rev)
+      rs.invert ();
+
+    return rs;
+  }
+
+}; // struct Ranges
diff --git a/gcc/config/avr/specs.h b/gcc/config/avr/specs.h
index 0ccc37b..ff269bf 100644
--- a/gcc/config/avr/specs.h
+++ b/gcc/config/avr/specs.h
@@ -1,6 +1,6 @@
-/* Specs definitions for Atmel AVR back end.
+/* Specs definitions for AVR 8-bit microcontrollers.
 
-   Copyright (C) 2012-2024 Free Software Foundation, Inc.
+   Copyright (C) 2012-2025 Free Software Foundation, Inc.
    Contributed by Georg-Johann Lay (avr@gjlay.de)
 
 This file is part of GCC.
diff --git a/gcc/config/avr/stdfix.h b/gcc/config/avr/stdfix.h
index e130a26..0038bba 100644
--- a/gcc/config/avr/stdfix.h
+++ b/gcc/config/avr/stdfix.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2007-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -108,12 +108,12 @@ typedef long long unsigned int uint_uk_t;
 /* The Embedded-C paper specifies results only for rounding points
 
        0 < RP < FBIT
-  
+
    As an extension, the following functions work as expected
    with rounding points
 
        -IBIT < RP < FBIT
- 
+
    For example, rounding an accum with a rounding point of -1 will
    result in an even integer value.  */
 
diff --git a/gcc/config/avr/t-avr b/gcc/config/avr/t-avr
index 449512a..8645013 100644
--- a/gcc/config/avr/t-avr
+++ b/gcc/config/avr/t-avr
@@ -1,4 +1,4 @@
-# Copyright (C) 2000-2024 Free Software Foundation, Inc.
+# Copyright (C) 2000-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
@@ -59,8 +59,15 @@ avr-log.o: $(srcdir)/config/avr/avr-log.cc \
   $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) $(INPUT_H) dumpfile.h
 	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
 
+avr-passes.o: $(srcdir)/config/avr/avr-passes.cc \
+  $(srcdir)/config/avr/avr-passes-fuse-move.h \
+  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) $(INPUT_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<
+
 avr.o avr-c.o: $(srcdir)/config/avr/builtins.def
 
+avr-passes.o: $(srcdir)/config/avr/ranges.h
+
 # This overrides stdfix.h from USER_H which we supply and include
 # in our own stdfix.h as stdfix-gcc.h.
 
diff --git a/gcc/config/bfin/bfin-modes.def b/gcc/config/bfin/bfin-modes.def
index a79b1a4..0b15082 100644
--- a/gcc/config/bfin/bfin-modes.def
+++ b/gcc/config/bfin/bfin-modes.def
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler, for Blackfin.
-   Copyright (C) 2005-2024 Free Software Foundation, Inc.
+   Copyright (C) 2005-2025 Free Software Foundation, Inc.
    Contributed by Analog Devices.
 
    This file is part of GCC.
diff --git a/gcc/config/bfin/bfin-opts.h b/gcc/config/bfin/bfin-opts.h
index 60679da..7e41cd7 100644
--- a/gcc/config/bfin/bfin-opts.h
+++ b/gcc/config/bfin/bfin-opts.h
@@ -1,5 +1,5 @@
 /* Definitions for the Blackfin port needed for option handling.
-   Copyright (C) 2005-2024 Free Software Foundation, Inc.
+   Copyright (C) 2005-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/bfin/bfin-protos.h b/gcc/config/bfin/bfin-protos.h
index bd49329..0925186 100644
--- a/gcc/config/bfin/bfin-protos.h
+++ b/gcc/config/bfin/bfin-protos.h
@@ -1,5 +1,5 @@
 /* Prototypes for Blackfin functions used in the md file & elsewhere.
-   Copyright (C) 2005-2024 Free Software Foundation, Inc.
+   Copyright (C) 2005-2025 Free Software Foundation, Inc.
 
    This file is part of GNU CC.
 
@@ -71,7 +71,7 @@ extern char *bfin_asm_long (void);
 extern char *bfin_asm_short (void);
 extern int log2constp (unsigned HOST_WIDE_INT);
 
-extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx);	  
+extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx);
 extern HOST_WIDE_INT bfin_initial_elimination_offset (int, int);
 
 extern int effective_address_32bit_p (rtx, machine_mode);
diff --git a/gcc/config/bfin/bfin.cc b/gcc/config/bfin/bfin.cc
index 3e40f2c..2cf4e77 100644
--- a/gcc/config/bfin/bfin.cc
+++ b/gcc/config/bfin/bfin.cc
@@ -1,5 +1,5 @@
 /* The Blackfin code generation auxiliary output file.
-   Copyright (C) 2005-2024 Free Software Foundation, Inc.
+   Copyright (C) 2005-2025 Free Software Foundation, Inc.
    Contributed by Analog Devices.
 
    This file is part of GCC.
@@ -97,14 +97,14 @@ bfin_globalize_label (FILE *stream, const char *name)
   fputc ('\n',stream);
 }
 
-static void 
-output_file_start (void) 
+static void
+output_file_start (void)
 {
   FILE *file = asm_out_file;
   int i;
 
   fprintf (file, ".file \"%s\";\n", LOCATION_FILE (input_location));
-  
+
   for (i = 0; arg_regs[i] >= 0; i++)
     ;
   max_arg_registers = i;	/* how many arg reg used  */
@@ -417,7 +417,7 @@ expand_prologue_reg_save (rtx spreg, int saveall, bool is_inthandler)
 	}
     }
   for (i = REG_P7 + 1; i < REG_CC; i++)
-    if (saveall 
+    if (saveall
 	|| (is_inthandler
 	    && (df_regs_ever_live_p (i)
 		|| (!leaf_function_p () && call_used_or_fixed_reg_p (i)))))
@@ -548,7 +548,7 @@ expand_epilogue_reg_restore (rtx spreg, bool saveall, bool is_inthandler)
    it.
 
    Normally, this macro will push all remaining incoming registers on the
-   stack and set PRETEND_SIZE to the length of the registers pushed.  
+   stack and set PRETEND_SIZE to the length of the registers pushed.
 
    Blackfin specific :
    - VDSP C compiler manual (our ABI) says that a variable args function
@@ -590,7 +590,7 @@ setup_incoming_varargs (cumulative_args_t cum,
    be accessed via the stack pointer) in functions that seem suitable.  */
 
 static bool
-bfin_frame_pointer_required (void) 
+bfin_frame_pointer_required (void)
 {
   e_funkind fkind = funkind (TREE_TYPE (current_function_decl));
 
@@ -906,7 +906,7 @@ do_unlink (rtx spreg, HOST_WIDE_INT frame_size, bool all, int epilogue_p)
 
   if (stack_frame_needed_p ())
     emit_insn (gen_unlink ());
-  else 
+  else
     {
       rtx postinc = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, spreg));
 
@@ -968,7 +968,7 @@ expand_interrupt_handler_prologue (rtx spreg, e_funkind fkind, bool all)
       emit_insn (gen_movsi_low (p5reg, p5reg, chipid));
       emit_insn (gen_dummy_load (p5reg, bfin_cc_rtx));
     }
-  
+
   if (lookup_attribute ("nesting", attrs))
     {
       rtx srcreg = gen_rtx_REG (Pmode, ret_regs[fkind]);
@@ -1046,7 +1046,7 @@ bfin_load_pic_reg (rtx dest)
      pic reg, since the caller always passes a usable one.  */
   if (local_info_node && local_info_node->local)
     return pic_offset_table_rtx;
-      
+
   if (OPTION_SET_P (bfin_library_id))
     addr = plus_constant (Pmode, pic_offset_table_rtx,
 			   -4 - bfin_library_id * 4);
@@ -1236,7 +1236,7 @@ bfin_delegitimize_address (rtx orig_x)
    32-bit instruction.  */
 
 int
-effective_address_32bit_p (rtx op, machine_mode mode) 
+effective_address_32bit_p (rtx op, machine_mode mode)
 {
   HOST_WIDE_INT offset;
 
@@ -1312,7 +1312,7 @@ print_address_operand (FILE *file, rtx x)
 
     case PRE_DEC:
       fprintf (file, "--");
-      output_address (VOIDmode, XEXP (x, 0));    
+      output_address (VOIDmode, XEXP (x, 0));
       break;
     case POST_INC:
       output_address (VOIDmode, XEXP (x, 0));
@@ -1390,7 +1390,7 @@ print_operand (FILE *file, rtx x, char code)
 	  output_operand_lossage ("invalid %%j value");
 	}
       break;
-    
+
     case 'J':					 /* reverse logic */
       switch (GET_CODE(x))
 	{
@@ -1491,7 +1491,7 @@ print_operand (FILE *file, rtx x, char code)
 	      else
 		output_operand_lossage ("invalid operand for code '%c'", code);
 	    }
-	  else 
+	  else
 	    fprintf (file, "%s", reg_names[REGNO (x)]);
 	  break;
 
@@ -1620,7 +1620,7 @@ print_operand (FILE *file, rtx x, char code)
 
 /* Initialize a variable CUM of type CUMULATIVE_ARGS
    for a call to a function whose data type is FNTYPE.
-   For a library call, FNTYPE is 0.  
+   For a library call, FNTYPE is 0.
    VDSP C Compiler manual, our ABI says that
    first 3 words of arguments will use R0, R1 and R2.
 */
@@ -1718,7 +1718,7 @@ bfin_arg_partial_bytes (cumulative_args_t cum, const function_arg_info &arg)
 {
   int bytes = arg.promoted_size_in_bytes ();
   int bytes_left = get_cumulative_args (cum)->nregs * UNITS_PER_WORD;
-  
+
   if (bytes == -1)
     return 0;
 
@@ -1759,7 +1759,7 @@ bfin_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
 
 /* Return true when register may be used to pass function parameters.  */
 
-bool 
+bool
 function_arg_regno_p (int n)
 {
   int i;
@@ -2701,7 +2701,7 @@ bfin_valid_reg_p (unsigned int regno, int strict, machine_mode mode,
 
 /* Recognize an RTL expression that is a valid memory address for an
    instruction.  The MODE argument is the machine mode for the MEM expression
-   that wants to use this address. 
+   that wants to use this address.
 
    Blackfin addressing modes are as follows:
 
@@ -2710,7 +2710,7 @@ bfin_valid_reg_p (unsigned int regno, int strict, machine_mode mode,
 
       B [ Preg + uimm15 ]
       W [ Preg + uimm16m2 ]
-      [ Preg + uimm17m4 ] 
+      [ Preg + uimm17m4 ]
 
       [preg++]
       [preg--]
@@ -2888,8 +2888,8 @@ bfin_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
       else
 	*total = cost2;
       return true;
-      
-    case ASHIFT: 
+
+    case ASHIFT:
     case ASHIFTRT:
     case LSHIFTRT:
       if (mode == DImode)
@@ -2904,7 +2904,7 @@ bfin_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
 	*total += rtx_cost (op0, mode, code, 0, speed);
 
       return true;
-	  
+
     case IOR:
     case AND:
     case XOR:
@@ -3152,11 +3152,11 @@ output_push_multiple (rtx insn, rtx *operands)
 {
   char buf[80];
   int ok;
-  
+
   /* Validate the insn again, and compute first_[dp]reg_to_save. */
   ok = analyze_push_multiple_operation (PATTERN (insn));
   gcc_assert (ok);
-  
+
   if (first_dreg_to_save == 8)
     sprintf (buf, "[--sp] = ( p5:%d );\n", first_preg_to_save);
   else if (first_preg_to_save == 6)
@@ -3176,7 +3176,7 @@ output_pop_multiple (rtx insn, rtx *operands)
 {
   char buf[80];
   int ok;
-  
+
   /* Validate the insn again, and compute first_[dp]reg_to_save. */
   ok = analyze_pop_multiple_operation (PATTERN (insn));
   gcc_assert (ok);
@@ -3784,8 +3784,7 @@ hwloop_optimize (hwloop_info loop)
 	}
     }
 
-  seq = get_insns ();
-  end_sequence ();
+  seq = end_sequence ();
 
   if (loop->incoming_src)
     {
@@ -3856,7 +3855,7 @@ static void
 hwloop_fail (hwloop_info loop)
 {
   rtx insn = loop->loop_end;
-  
+
   if (DPREG_P (loop->iter_reg))
     {
       /* If loop->iter_reg is a DREG or PREG, we can split it here
@@ -3880,7 +3879,7 @@ hwloop_fail (hwloop_info loop)
     }
   else
     {
-      splitting_loops = 1;  
+      splitting_loops = 1;
       try_split (PATTERN (insn), safe_as_a <rtx_insn *> (insn), 1);
       splitting_loops = 0;
     }
@@ -4132,7 +4131,7 @@ workaround_rts_anomaly (void)
 
       if (BARRIER_P (insn))
 	return;
-      
+
       if (NOTE_P (insn) || LABEL_P (insn))
 	continue;
 
@@ -4286,7 +4285,7 @@ indirect_call_p (rtx pat)
   pat = XEXP (pat, 0);
   gcc_assert (GET_CODE (pat) == MEM);
   pat = XEXP (pat, 0);
-  
+
   return REG_P (pat);
 }
 
@@ -4329,7 +4328,7 @@ workaround_speculation (void)
       int delay_needed = 0;
 
       next = find_next_insn_start (insn);
-      
+
       if (NOTE_P (insn) || BARRIER_P (insn))
 	continue;
       if (JUMP_TABLE_DATA_P (insn))
@@ -4344,7 +4343,7 @@ workaround_speculation (void)
       pat = PATTERN (insn);
       if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER)
 	continue;
-      
+
       if (GET_CODE (pat) == ASM_INPUT || asm_noperands (pat) >= 0)
 	{
 	  np_check_regno = -1;
@@ -4603,7 +4602,7 @@ add_sched_insns_for_speculation (void)
 	  if (GET_CODE (PATTERN (next)) == UNSPEC_VOLATILE
 	      && get_attr_type (next) == TYPE_STALL)
 	    continue;
-	  emit_insn_before (gen_stall (GEN_INT (1)), next);	  
+	  emit_insn_before (gen_stall (GEN_INT (1)), next);
 	}
     }
 }
@@ -4719,7 +4718,7 @@ bfin_comp_type_attributes (const_tree type1, const_tree type2)
 
   if (kind1 != kind2)
     return 0;
-  
+
   /*  Check for mismatched modifiers */
   if (!lookup_attribute ("nesting", TYPE_ATTRIBUTES (type1))
       != !lookup_attribute ("nesting", TYPE_ATTRIBUTES (type2)))
@@ -4744,9 +4743,9 @@ bfin_comp_type_attributes (const_tree type1, const_tree type2)
    struct attribute_spec.handler.  */
 
 static tree
-bfin_handle_longcall_attribute (tree *node, tree name, 
-				tree args ATTRIBUTE_UNUSED, 
-				int flags ATTRIBUTE_UNUSED, 
+bfin_handle_longcall_attribute (tree *node, tree name,
+				tree args ATTRIBUTE_UNUSED,
+				int flags ATTRIBUTE_UNUSED,
 				bool *no_add_attrs)
 {
   if (TREE_CODE (*node) != FUNCTION_TYPE
@@ -5154,7 +5153,7 @@ bfin_init_builtins (void)
     = build_function_type_list (integer_type_node,
 				build_pointer_type (integer_type_node),
 				NULL_TREE);
-  
+
   /* Add the remaining MMX insns with somewhat more complicated types.  */
   def_builtin ("__builtin_bfin_csync", void_ftype_void, BFIN_BUILTIN_CSYNC);
   def_builtin ("__builtin_bfin_ssync", void_ftype_void, BFIN_BUILTIN_SSYNC);
@@ -5746,7 +5745,7 @@ bfin_conditional_register_usage (void)
 #define TARGET_EXPAND_BUILTIN bfin_expand_builtin
 
 #undef TARGET_ASM_GLOBALIZE_LABEL
-#define TARGET_ASM_GLOBALIZE_LABEL bfin_globalize_label 
+#define TARGET_ASM_GLOBALIZE_LABEL bfin_globalize_label
 
 #undef TARGET_ASM_FILE_START
 #define TARGET_ASM_FILE_START output_file_start
@@ -5881,4 +5880,7 @@ bfin_conditional_register_usage (void)
 #undef TARGET_CONSTANT_ALIGNMENT
 #define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings
 
+#undef TARGET_DOCUMENTATION_NAME
+#define TARGET_DOCUMENTATION_NAME "Blackfin"
+
 struct gcc_target targetm = TARGET_INITIALIZER;
diff --git a/gcc/config/bfin/bfin.h b/gcc/config/bfin/bfin.h
index e957c31..a0d2a99 100644
--- a/gcc/config/bfin/bfin.h
+++ b/gcc/config/bfin/bfin.h
@@ -1,5 +1,5 @@
 /* Definitions for the Blackfin port.
-   Copyright (C) 2005-2024 Free Software Foundation, Inc.
+   Copyright (C) 2005-2025 Free Software Foundation, Inc.
    Contributed by Analog Devices.
 
    This file is part of GCC.
@@ -295,10 +295,10 @@ extern const char *bfin_library_id_string;
 /* Define this if the above stack space is to be considered part of the
  * space allocated by the caller.  */
 #define OUTGOING_REG_PARM_STACK_SPACE(FNTYPE) 1
-	  
+
 /* Define this if the maximum size of all the outgoing args is to be
    accumulated and pushed during the prologue.  The amount can be
-   found in the variable crtl->outgoing_args_size. */ 
+   found in the variable crtl->outgoing_args_size. */
 #define ACCUMULATE_OUTGOING_ARGS 1
 
 /*#define DATA_ALIGNMENT(TYPE, BASIC-ALIGN) for arrays.. */
@@ -876,11 +876,11 @@ typedef struct {
 #define DEFAULT_SIGNED_CHAR 1
 /* FLOAT_TYPE_SIZE get poisoned, so add BFIN_ prefix.  */
 #define BFIN_FLOAT_TYPE_SIZE BITS_PER_WORD
-#define SHORT_TYPE_SIZE 16 
+#define SHORT_TYPE_SIZE 16
 #define CHAR_TYPE_SIZE	8
 #define INT_TYPE_SIZE	32
 #define LONG_TYPE_SIZE	32
-#define LONG_LONG_TYPE_SIZE 64 
+#define LONG_LONG_TYPE_SIZE 64
 
 /* Note: Fix this to depend on target switch. -- lev */
 
@@ -943,7 +943,7 @@ typedef struct {
 #define JUMP_TABLES_IN_TEXT_SECTION flag_pic
 
 /* Define if operations between registers always perform the operation
-   on the full register even if a narrower mode is specified. 
+   on the full register even if a narrower mode is specified.
 #define WORD_REGISTER_OPERATIONS 1
 */
 
@@ -1095,7 +1095,7 @@ extern rtx bfin_cc_rtx, bfin_rets_rtx;
 #define SET_ASM_OP              ".set "
 
 /* Debugger register number for a given compiler register number */
-#define DEBUGGER_REGNO(REGNO)  (REGNO) 
+#define DEBUGGER_REGNO(REGNO)  (REGNO)
 
 #define SIZE_ASM_OP     "\t.size\t"
 
diff --git a/gcc/config/bfin/bfin.md b/gcc/config/bfin/bfin.md
index b008d96..27c156b 100644
--- a/gcc/config/bfin/bfin.md
+++ b/gcc/config/bfin/bfin.md
@@ -1,5 +1,5 @@
 ;;- Machine description for Blackfin for GNU compiler
-;;  Copyright (C) 2005-2024 Free Software Foundation, Inc.
+;;  Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;;  Contributed by Analog Devices.
 
 ;; This file is part of GCC.
@@ -1440,12 +1440,15 @@
   "%0 = min(%1,%2)%!"
   [(set_attr "type" "dsp32")])
 
-(define_insn "abssi2"
-  [(set (match_operand:SI 0 "register_operand" "=d")
-	(abs:SI (match_operand:SI 1 "register_operand" "d")))]
-  ""
-  "%0 = abs %1%!"
-  [(set_attr "type" "dsp32")])
+;; The ABS instruction is defined as saturating.  So at the least
+;; it is inappropriate for -fwrapv.  This also fixes the C23 uabs
+;; tests.
+;;(define_insn "abssi2"
+;;  [(set (match_operand:SI 0 "register_operand" "=d")
+;;	(abs:SI (match_operand:SI 1 "register_operand" "d")))]
+;;  ""
+;;  "%0 = abs %1%!"
+;;  [(set_attr "type" "dsp32")])
 
 (define_insn "ssabssi2"
   [(set (match_operand:SI 0 "register_operand" "=d")
diff --git a/gcc/config/bfin/bfin.opt b/gcc/config/bfin/bfin.opt
index 042170e..55a10f7 100644
--- a/gcc/config/bfin/bfin.opt
+++ b/gcc/config/bfin/bfin.opt
@@ -1,6 +1,6 @@
 ; Options for the Blackfin port of the compiler
 ;
-; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/bfin/constraints.md b/gcc/config/bfin/constraints.md
index ec83bb7..e5945d6 100644
--- a/gcc/config/bfin/constraints.md
+++ b/gcc/config/bfin/constraints.md
@@ -1,5 +1,5 @@
 ;; Constraint definitions for Blackfin
-;; Copyright (C) 2008-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2008-2025 Free Software Foundation, Inc.
 ;; Contributed by Analog Devices
 
 ;; This file is part of GCC.
diff --git a/gcc/config/bfin/elf.h b/gcc/config/bfin/elf.h
index 57fd1ca..93d8a5a 100644
--- a/gcc/config/bfin/elf.h
+++ b/gcc/config/bfin/elf.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2005-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2005-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/bfin/linux.h b/gcc/config/bfin/linux.h
index 70a96af..c3cecf7 100644
--- a/gcc/config/bfin/linux.h
+++ b/gcc/config/bfin/linux.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2007-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/bfin/predicates.md b/gcc/config/bfin/predicates.md
index d4fd9d6..a72044f 100644
--- a/gcc/config/bfin/predicates.md
+++ b/gcc/config/bfin/predicates.md
@@ -1,5 +1,5 @@
 ;; Predicate definitions for the Blackfin.
-;; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;; Contributed by Analog Devices.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/bfin/print-sysroot-suffix.sh b/gcc/config/bfin/print-sysroot-suffix.sh
index 61bec0d..9c135a1 100644
--- a/gcc/config/bfin/print-sysroot-suffix.sh
+++ b/gcc/config/bfin/print-sysroot-suffix.sh
@@ -1,5 +1,5 @@
 #!/bin/sh
-# Copyright (C) 2007-2024 Free Software Foundation, Inc.
+# Copyright (C) 2007-2025 Free Software Foundation, Inc.
 # This file is part of GCC.
 
 # GCC is free software; you can redistribute it and/or modify
diff --git a/gcc/config/bfin/rtems.h b/gcc/config/bfin/rtems.h
index ba500b9..09b97d3 100644
--- a/gcc/config/bfin/rtems.h
+++ b/gcc/config/bfin/rtems.h
@@ -1,5 +1,5 @@
 /* Definitions for rtems targeting a bfin
-   Copyright (C) 2006-2024 Free Software Foundation, Inc.
+   Copyright (C) 2006-2025 Free Software Foundation, Inc.
    Contributed by Ralf Corsépius (ralf.corsepius@rtems.org).
 
    This file is part of GCC.
diff --git a/gcc/config/bfin/sync.md b/gcc/config/bfin/sync.md
index 7a823b2..71ad4e3 100644
--- a/gcc/config/bfin/sync.md
+++ b/gcc/config/bfin/sync.md
@@ -1,5 +1,5 @@
 ;; GCC machine description for Blackfin synchronization instructions.
-;; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;; Contributed by Analog Devices.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/bfin/t-bfin-elf b/gcc/config/bfin/t-bfin-elf
index 3b15783..175c30e 100644
--- a/gcc/config/bfin/t-bfin-elf
+++ b/gcc/config/bfin/t-bfin-elf
@@ -1,4 +1,4 @@
-# Copyright (C) 2005-2024 Free Software Foundation, Inc.
+# Copyright (C) 2005-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/bfin/t-bfin-linux b/gcc/config/bfin/t-bfin-linux
index 7091638..6dcf213 100644
--- a/gcc/config/bfin/t-bfin-linux
+++ b/gcc/config/bfin/t-bfin-linux
@@ -1,4 +1,4 @@
-# Copyright (C) 2007-2024 Free Software Foundation, Inc.
+# Copyright (C) 2007-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/bfin/t-bfin-uclinux b/gcc/config/bfin/t-bfin-uclinux
index 34b7e0a..75cb690 100644
--- a/gcc/config/bfin/t-bfin-uclinux
+++ b/gcc/config/bfin/t-bfin-uclinux
@@ -1,4 +1,4 @@
-# Copyright (C) 2007-2024 Free Software Foundation, Inc.
+# Copyright (C) 2007-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/bfin/uclinux.h b/gcc/config/bfin/uclinux.h
index 28c1416..06dfc52 100644
--- a/gcc/config/bfin/uclinux.h
+++ b/gcc/config/bfin/uclinux.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2005-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2005-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/bpf/atomic.md b/gcc/config/bpf/atomic.md
index 4e94c03..af5f11a 100644
--- a/gcc/config/bpf/atomic.md
+++ b/gcc/config/bpf/atomic.md
@@ -1,5 +1,5 @@
 ;; Machine description for eBPF.
-;; Copyright (C) 2023-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2023-2025 Free Software Foundation, Inc.
 
 ;; This file is part of GCC.
 
diff --git a/gcc/config/bpf/bpf-c.cc b/gcc/config/bpf/bpf-c.cc
index f12f062..5b12ee2 100644
--- a/gcc/config/bpf/bpf-c.cc
+++ b/gcc/config/bpf/bpf-c.cc
@@ -1,5 +1,5 @@
 /* BPF-specific code for C family languages.
-   Copyright (C) 2024 Free Software Foundation, Inc.
+   Copyright (C) 2024-2025 Free Software Foundation, Inc.
    Contributed by Oracle Inc.
 
 This file is part of GCC.
diff --git a/gcc/config/bpf/bpf-opts.h b/gcc/config/bpf/bpf-opts.h
index aa9eb5c..5cc43cb 100644
--- a/gcc/config/bpf/bpf-opts.h
+++ b/gcc/config/bpf/bpf-opts.h
@@ -1,5 +1,5 @@
 /* Definitions for option handling for eBPF.
-   Copyright (C) 2019-2024 Free Software Foundation, Inc.
+   Copyright (C) 2019-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/bpf/bpf-passes.def b/gcc/config/bpf/bpf-passes.def
index 718c643..e05af71 100644
--- a/gcc/config/bpf/bpf-passes.def
+++ b/gcc/config/bpf/bpf-passes.def
@@ -1,5 +1,5 @@
 /* Declaration of target-specific passes for eBPF.
-   Copyright (C) 2023-2024 Free Software Foundation, Inc.
+   Copyright (C) 2023-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/bpf/bpf-protos.h b/gcc/config/bpf/bpf-protos.h
index b4866d3..f56465e 100644
--- a/gcc/config/bpf/bpf-protos.h
+++ b/gcc/config/bpf/bpf-protos.h
@@ -1,5 +1,5 @@
 /* Definition of eBPF target for GNU compiler.
-   Copyright (C) 2019-2024 Free Software Foundation, Inc.
+   Copyright (C) 2019-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/bpf/bpf.cc b/gcc/config/bpf/bpf.cc
index 2051fa5..e9b3fe9 100644
--- a/gcc/config/bpf/bpf.cc
+++ b/gcc/config/bpf/bpf.cc
@@ -1,5 +1,5 @@
 /* Subroutines used for code generation for eBPF.
-   Copyright (C) 2019-2024 Free Software Foundation, Inc.
+   Copyright (C) 2019-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -1084,7 +1084,8 @@ bpf_expand_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
 #define TARGET_EXPAND_BUILTIN bpf_expand_builtin
 
 static tree
-bpf_resolve_overloaded_builtin (location_t loc, tree fndecl, void *arglist)
+bpf_resolve_overloaded_builtin (location_t loc, tree fndecl, void *arglist,
+				bool complain ATTRIBUTE_UNUSED)
 {
   int code = DECL_MD_FUNCTION_CODE (fndecl);
   if (code > BPF_CORE_BUILTINS_MARKER)
@@ -1449,6 +1450,9 @@ bpf_expand_setmem (rtx *operands)
   return true;
 }
 
+#undef TARGET_DOCUMENTATION_NAME
+#define TARGET_DOCUMENTATION_NAME "BPF"
+
 /* Finally, build the GCC target.  */
 
 struct gcc_target targetm = TARGET_INITIALIZER;
diff --git a/gcc/config/bpf/bpf.h b/gcc/config/bpf/bpf.h
index 2298e5b..8fb55b0 100644
--- a/gcc/config/bpf/bpf.h
+++ b/gcc/config/bpf/bpf.h
@@ -1,5 +1,5 @@
 /* Definition of the eBPF target for GCC.
-   Copyright (C) 2019-2024 Free Software Foundation, Inc.
+   Copyright (C) 2019-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/bpf/bpf.md b/gcc/config/bpf/bpf.md
index a532e2a..91d9483 100644
--- a/gcc/config/bpf/bpf.md
+++ b/gcc/config/bpf/bpf.md
@@ -1,5 +1,5 @@
 ;; Machine description for eBPF.
-;; Copyright (C) 2019-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2019-2025 Free Software Foundation, Inc.
 
 ;; This file is part of GCC.
 
@@ -121,12 +121,11 @@
   [(match_operand:DI 0 "general_operand" "")
    (match_operand:DI 1 "general_operand" "")]
   ""
-  "
 {
-  error (\"BPF does not support dynamic stack allocation\");
+  sorry ("dynamic stack allocation not supported");
   emit_insn (gen_nop ());
   DONE;
-}")
+})
 
 ;;;; Arithmetic/Logical
 
diff --git a/gcc/config/bpf/bpf.opt b/gcc/config/bpf/bpf.opt
index 62156e2..54d038a 100644
--- a/gcc/config/bpf/bpf.opt
+++ b/gcc/config/bpf/bpf.opt
@@ -1,6 +1,6 @@
 ; Options for the eBPF compiler port.
 
-; Copyright (C) 2019-2024 Free Software Foundation, Inc.
+; Copyright (C) 2019-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/bpf/btfext-out.cc b/gcc/config/bpf/btfext-out.cc
index b3df7b5..7cab59b 100644
--- a/gcc/config/bpf/btfext-out.cc
+++ b/gcc/config/bpf/btfext-out.cc
@@ -1,5 +1,5 @@
 /* BPF Compile Once - Run Everywhere (CO-RE) support.
-   Copyright (C) 2021-2024 Free Software Foundation, Inc.
+   Copyright (C) 2021-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -298,6 +298,13 @@ bpf_core_reloc_add (const tree type, const char * section_name,
   ctf_container_ref ctfc = ctf_get_tu_ctfc ();
   ctf_dtdef_ref dtd = ctf_lookup_tree_type (ctfc, type);
 
+  /* Make sure CO-RE type is never the const or volatile version.  */
+  if ((btf_dtd_kind (dtd) == BTF_KIND_CONST
+       || btf_dtd_kind (dtd) == BTF_KIND_VOLATILE)
+      && kind >= BPF_RELO_FIELD_BYTE_OFFSET
+      && kind <= BPF_RELO_FIELD_RSHIFT_U64)
+    dtd = dtd->ref_type;
+
   /* Buffer the access string in the auxiliary strtab.  */
   bpfcr->bpfcr_astr_off = 0;
   gcc_assert (accessor != NULL);
@@ -610,6 +617,9 @@ btf_ext_init (void)
 void
 btf_ext_output (void)
 {
+  if (!ctf_get_tu_ctfc ())
+    return;
+
   output_btfext_header ();
   output_btfext_func_info (btf_ext);
   if (TARGET_BPF_CORE)
diff --git a/gcc/config/bpf/btfext-out.h b/gcc/config/bpf/btfext-out.h
index b363094..5d42ef3 100644
--- a/gcc/config/bpf/btfext-out.h
+++ b/gcc/config/bpf/btfext-out.h
@@ -1,6 +1,6 @@
 /* btfext-out.h - Declarations and definitions related to
    BPF Compile Once - Run Everywhere (CO-RE) support.
-   Copyright (C) 2021-2024 Free Software Foundation, Inc.
+   Copyright (C) 2021-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/bpf/constraints.md b/gcc/config/bpf/constraints.md
index dd04e67..8372329 100644
--- a/gcc/config/bpf/constraints.md
+++ b/gcc/config/bpf/constraints.md
@@ -1,5 +1,5 @@
 ;; Constraint definitions for eBPF.
-;; Copyright (C) 2019-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2019-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/bpf/core-builtins.cc b/gcc/config/bpf/core-builtins.cc
index 86e2e9d..152da94 100644
--- a/gcc/config/bpf/core-builtins.cc
+++ b/gcc/config/bpf/core-builtins.cc
@@ -1,5 +1,5 @@
 /* Subroutines used for code generation for eBPF.
-   Copyright (C) 2019-2024 Free Software Foundation, Inc.
+   Copyright (C) 2019-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -698,10 +698,13 @@ compute_field_expr (tree node, unsigned int *accessors,
 			      access_node, false, callback);
       return n;
 
+    case VAR_DECL:
+      accessors[0] = 0;
+      return 1;
+
     case ADDR_EXPR:
     case CALL_EXPR:
     case SSA_NAME:
-    case VAR_DECL:
     case PARM_DECL:
       return 0;
     default:
@@ -1822,6 +1825,7 @@ make_gimple_core_safe_access_index (tree *tp,
 
       tree lhs;
       if (!wi->is_lhs
+	  && gimple_code (wi->stmt) != GIMPLE_CALL
 	  && (lhs = gimple_get_lhs (wi->stmt)) != NULL_TREE)
 	core_mark_as_access_index (lhs);
     }
diff --git a/gcc/config/bpf/predicates.md b/gcc/config/bpf/predicates.md
index 568156f..6c3cc9d 100644
--- a/gcc/config/bpf/predicates.md
+++ b/gcc/config/bpf/predicates.md
@@ -1,5 +1,5 @@
 ;; Predicate definitions for eBPF.
-;; Copyright (C) 2019-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2019-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/c6x/c6x-isas.def b/gcc/config/c6x/c6x-isas.def
index 76e9dfe..95a7847 100644
--- a/gcc/config/c6x/c6x-isas.def
+++ b/gcc/config/c6x/c6x-isas.def
@@ -1,5 +1,5 @@
 /* C6X ISA names.
-   Copyright (C) 2011-2024 Free Software Foundation, Inc.
+   Copyright (C) 2011-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/c6x/c6x-modes.def b/gcc/config/c6x/c6x-modes.def
index db78bf2..6d33f0c 100644
--- a/gcc/config/c6x/c6x-modes.def
+++ b/gcc/config/c6x/c6x-modes.def
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler, for TI C6x.
-   Copyright (C) 2010-2024 Free Software Foundation, Inc.
+   Copyright (C) 2010-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/c6x/c6x-mult.md b/gcc/config/c6x/c6x-mult.md
index 64fd2ef..04904de 100644
--- a/gcc/config/c6x/c6x-mult.md
+++ b/gcc/config/c6x/c6x-mult.md
@@ -3,7 +3,7 @@
 ;; Multiplication patterns for TI C6X.
 ;; This file is processed by genmult.sh to produce two variants of each
 ;; pattern, a normal one and a real_mult variant for modulo scheduling.
-;; Copyright (C) 2010-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2010-2025 Free Software Foundation, Inc.
 ;; Contributed by Bernd Schmidt <bernds@codesourcery.com>
 ;; Contributed by CodeSourcery.
 ;;
@@ -424,7 +424,7 @@
 ;; Multiplication patterns for TI C6X.
 ;; This file is processed by genmult.sh to produce two variants of each
 ;; pattern, a normal one and a real_mult variant for modulo scheduling.
-;; Copyright (C) 2010-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2010-2025 Free Software Foundation, Inc.
 ;; Contributed by Bernd Schmidt <bernds@codesourcery.com>
 ;; Contributed by CodeSourcery.
 ;;
diff --git a/gcc/config/c6x/c6x-mult.md.in b/gcc/config/c6x/c6x-mult.md.in
index f21b52e..8db2c4d 100644
--- a/gcc/config/c6x/c6x-mult.md.in
+++ b/gcc/config/c6x/c6x-mult.md.in
@@ -1,7 +1,7 @@
 ;; Multiplication patterns for TI C6X.
 ;; This file is processed by genmult.sh to produce two variants of each
 ;; pattern, a normal one and a real_mult variant for modulo scheduling.
-;; Copyright (C) 2010-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2010-2025 Free Software Foundation, Inc.
 ;; Contributed by Bernd Schmidt <bernds@codesourcery.com>
 ;; Contributed by CodeSourcery.
 ;;
diff --git a/gcc/config/c6x/c6x-opts.h b/gcc/config/c6x/c6x-opts.h
index 342a967..f8a3547 100644
--- a/gcc/config/c6x/c6x-opts.h
+++ b/gcc/config/c6x/c6x-opts.h
@@ -1,5 +1,5 @@
 /* Definitions for option handling for TI C6X.
-   Copyright (C) 2011-2024 Free Software Foundation, Inc.
+   Copyright (C) 2011-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/c6x/c6x-protos.h b/gcc/config/c6x/c6x-protos.h
index b9ed33c..ab58948 100644
--- a/gcc/config/c6x/c6x-protos.h
+++ b/gcc/config/c6x/c6x-protos.h
@@ -1,5 +1,5 @@
 /* Prototypes for exported functions defined in c6x.cc.
-   Copyright (C) 2010-2024 Free Software Foundation, Inc.
+   Copyright (C) 2010-2025 Free Software Foundation, Inc.
    Contributed by CodeSourcery.
 
    This file is part of GCC.
diff --git a/gcc/config/c6x/c6x-sched.md b/gcc/config/c6x/c6x-sched.md
index d8290a5..bee68dc 100644
--- a/gcc/config/c6x/c6x-sched.md
+++ b/gcc/config/c6x/c6x-sched.md
@@ -4,7 +4,7 @@
 ;; Definitions for side 1, cross n
 
 ;; Scheduling description for TI C6X.
-;; Copyright (C) 2010-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2010-2025 Free Software Foundation, Inc.
 ;; Contributed by Bernd Schmidt <bernds@codesourcery.com>
 ;; Contributed by CodeSourcery.
 ;;
@@ -237,7 +237,7 @@
 ;; Definitions for side 2, cross n
 
 ;; Scheduling description for TI C6X.
-;; Copyright (C) 2010-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2010-2025 Free Software Foundation, Inc.
 ;; Contributed by Bernd Schmidt <bernds@codesourcery.com>
 ;; Contributed by CodeSourcery.
 ;;
@@ -470,7 +470,7 @@
 ;; Definitions for side 1, cross y
 
 ;; Scheduling description for TI C6X.
-;; Copyright (C) 2010-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2010-2025 Free Software Foundation, Inc.
 ;; Contributed by Bernd Schmidt <bernds@codesourcery.com>
 ;; Contributed by CodeSourcery.
 ;;
@@ -703,7 +703,7 @@
 ;; Definitions for side 2, cross y
 
 ;; Scheduling description for TI C6X.
-;; Copyright (C) 2010-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2010-2025 Free Software Foundation, Inc.
 ;; Contributed by Bernd Schmidt <bernds@codesourcery.com>
 ;; Contributed by CodeSourcery.
 ;;
diff --git a/gcc/config/c6x/c6x-sched.md.in b/gcc/config/c6x/c6x-sched.md.in
index 293c5d6..fbaa5f0 100644
--- a/gcc/config/c6x/c6x-sched.md.in
+++ b/gcc/config/c6x/c6x-sched.md.in
@@ -1,5 +1,5 @@
 ;; Scheduling description for TI C6X.
-;; Copyright (C) 2010-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2010-2025 Free Software Foundation, Inc.
 ;; Contributed by Bernd Schmidt <bernds@codesourcery.com>
 ;; Contributed by CodeSourcery.
 ;;
diff --git a/gcc/config/c6x/c6x-tables.opt b/gcc/config/c6x/c6x-tables.opt
index 5339338..7e375a7 100644
--- a/gcc/config/c6x/c6x-tables.opt
+++ b/gcc/config/c6x/c6x-tables.opt
@@ -1,7 +1,7 @@
 ; -*- buffer-read-only: t -*-
 ; Generated automatically by genopt.sh from c6x-isas.def.
 ;
-; Copyright (C) 2011-2024 Free Software Foundation, Inc.
+; Copyright (C) 2011-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/c6x/c6x.cc b/gcc/config/c6x/c6x.cc
index 4ea3a1e..695a97e 100644
--- a/gcc/config/c6x/c6x.cc
+++ b/gcc/config/c6x/c6x.cc
@@ -1,5 +1,5 @@
 /* Target Code for TI C6X
-   Copyright (C) 2010-2024 Free Software Foundation, Inc.
+   Copyright (C) 2010-2025 Free Software Foundation, Inc.
    Contributed by Andrew Jenner <andrew@codesourcery.com>
    Contributed by Bernd Schmidt <bernds@codesourcery.com>
 
@@ -1582,8 +1582,7 @@ c6x_expand_compare (rtx comparison, machine_mode mode)
 
 	  cmp = emit_library_call_value (libfunc, 0, LCT_CONST, SImode,
 					 op0, op_mode, op1, op_mode);
-	  insns = get_insns ();
-	  end_sequence ();
+	  insns = end_sequence ();
 
 	  emit_libcall_block (insns, cmp, cmp,
 			      gen_rtx_fmt_ee (code, SImode, op0, op1));
@@ -6398,7 +6397,7 @@ c6x_init_builtins (void)
   tree v2si_ftype_v2hi_v2hi
     = build_function_type_list (V2SI_type_node, V2HI_type_node,
 				V2HI_type_node, NULL_TREE);
-  
+
   def_builtin ("__builtin_c6x_sadd", int_ftype_int_int,
 	       C6X_BUILTIN_SADD);
   def_builtin ("__builtin_c6x_ssub", int_ftype_int_int,
diff --git a/gcc/config/c6x/c6x.h b/gcc/config/c6x/c6x.h
index 1b16ff5..50bad27 100644
--- a/gcc/config/c6x/c6x.h
+++ b/gcc/config/c6x/c6x.h
@@ -1,5 +1,5 @@
 /* Target Definitions for TI C6X.
-   Copyright (C) 2010-2024 Free Software Foundation, Inc.
+   Copyright (C) 2010-2025 Free Software Foundation, Inc.
    Contributed by Andrew Jenner <andrew@codesourcery.com>
    Contributed by Bernd Schmidt <bernds@codesourcery.com>
 
@@ -444,11 +444,9 @@ struct GTY(()) machine_function
 #define TARG_VEC_PERMUTE_COST        1
 #endif
 
-/* ttype entries (the only interesting data references used) are
-   sb-relative got-indirect (aka .ehtype).  */
+/* .ehtype ttype entries are sb-relative.  */
 #define ASM_PREFERRED_EH_DATA_FORMAT(code, data) \
-  (((code) == 0 && (data) == 1) ? (DW_EH_PE_datarel | DW_EH_PE_indirect) \
-				: DW_EH_PE_absptr)
+  (((code) == 0 && (data) == 1) ? DW_EH_PE_datarel : DW_EH_PE_absptr)
 
 /* This should be the same as the definition in elfos.h, plus the call
    to output special unwinding directives.  */
diff --git a/gcc/config/c6x/c6x.md b/gcc/config/c6x/c6x.md
index 5964dd6..345a1f5 100644
--- a/gcc/config/c6x/c6x.md
+++ b/gcc/config/c6x/c6x.md
@@ -1,5 +1,5 @@
 ;; Machine description for TI C6X.
-;; Copyright (C) 2010-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2010-2025 Free Software Foundation, Inc.
 ;; Contributed by Andrew Jenner <andrew@codesourcery.com>
 ;; Contributed by Bernd Schmidt <bernds@codesourcery.com>
 ;; Contributed by CodeSourcery.
@@ -365,12 +365,14 @@
 (define_insn "nop"
   [(const_int 0)]
   ""
-  "nop")
+  "nop"
+  [(set_attr "dest_regfile" "unknown")])
 
 (define_insn "nop_count"
   [(unspec [(match_operand 0 "const_int_operand" "n")] UNSPEC_NOP)]
   ""
-  "%|%.\\tnop\\t%0")
+  "%|%.\\tnop\\t%0"
+  [(set_attr "dest_regfile" "unknown")])
 
 ;; -------------------------------------------------------------------------
 ;; Move instructions
@@ -3082,7 +3084,7 @@
 ;; Widening vector multiply and dot product.
 ;; See c6x-mult.md.in for the define_insn patterns
 
-(define_expand "sdot_prodv2hi"
+(define_expand "sdot_prodsiv2hi"
   [(match_operand:SI 0 "register_operand" "")
    (match_operand:V2HI 1 "register_operand" "")
    (match_operand:V2HI 2 "register_operand" "")
diff --git a/gcc/config/c6x/c6x.opt b/gcc/config/c6x/c6x.opt
index c7beaed..5f93c0e 100644
--- a/gcc/config/c6x/c6x.opt
+++ b/gcc/config/c6x/c6x.opt
@@ -1,5 +1,5 @@
 ; Option definitions for TI C6X.
-; Copyright (C) 2010-2024 Free Software Foundation, Inc.
+; Copyright (C) 2010-2025 Free Software Foundation, Inc.
 ; Contributed by Bernd Schmidt <bernds@codesourcery.com>
 ; Contributed by CodeSourcery.
 ;
diff --git a/gcc/config/c6x/c6x_intrinsics.h b/gcc/config/c6x/c6x_intrinsics.h
index c1735e9..27b0d26 100644
--- a/gcc/config/c6x/c6x_intrinsics.h
+++ b/gcc/config/c6x/c6x_intrinsics.h
@@ -1,6 +1,6 @@
 /* Intrinsics for TI C6X.
 
-   Copyright (C) 2011-2024 Free Software Foundation, Inc.
+   Copyright (C) 2011-2025 Free Software Foundation, Inc.
    Contributed by CodeSourcery.
 
    This file is part of GCC.
diff --git a/gcc/config/c6x/constraints.md b/gcc/config/c6x/constraints.md
index 5e99a3b..2823750 100644
--- a/gcc/config/c6x/constraints.md
+++ b/gcc/config/c6x/constraints.md
@@ -1,5 +1,5 @@
 ;; Constraint definitions for TI C6X.
-;; Copyright (C) 2010-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2010-2025 Free Software Foundation, Inc.
 ;; Contributed by Andrew Jenner <andrew@codesourcery.com>
 ;; Contributed by Bernd Schmidt <bernds@codesourcery.com>
 ;; Contributed by CodeSourcery.
diff --git a/gcc/config/c6x/elf-common.h b/gcc/config/c6x/elf-common.h
index 2af1630..0c37d67 100644
--- a/gcc/config/c6x/elf-common.h
+++ b/gcc/config/c6x/elf-common.h
@@ -1,5 +1,5 @@
 /* ELF definitions for TI C6X
-   Copyright (C) 2010-2024 Free Software Foundation, Inc.
+   Copyright (C) 2010-2025 Free Software Foundation, Inc.
    Contributed by Andrew Jenner <andrew@codesourcery.com>
    Contributed by Bernd Schmidt <bernds@codesourcery.com>
 
diff --git a/gcc/config/c6x/elf.h b/gcc/config/c6x/elf.h
index 6a48436..5cd4a56 100644
--- a/gcc/config/c6x/elf.h
+++ b/gcc/config/c6x/elf.h
@@ -1,5 +1,5 @@
 /* ELF definitions for TI C6X
-   Copyright (C) 2010-2024 Free Software Foundation, Inc.
+   Copyright (C) 2010-2025 Free Software Foundation, Inc.
    Contributed by Andrew Jenner <andrew@codesourcery.com>
    Contributed by Bernd Schmidt <bernds@codesourcery.com>
 
diff --git a/gcc/config/c6x/genmult.sh b/gcc/config/c6x/genmult.sh
index bbb331e..c0f32a5 100644
--- a/gcc/config/c6x/genmult.sh
+++ b/gcc/config/c6x/genmult.sh
@@ -2,7 +2,7 @@
 # Generate c6x-mult.md from c6x-mult.md.in
 # The input file is passed as an argument.
 
-# Copyright (C) 2011-2024 Free Software Foundation, Inc.
+# Copyright (C) 2011-2025 Free Software Foundation, Inc.
 
 #This file is part of GCC.
 
diff --git a/gcc/config/c6x/genopt.sh b/gcc/config/c6x/genopt.sh
index 508d4c3..4995440 100644
--- a/gcc/config/c6x/genopt.sh
+++ b/gcc/config/c6x/genopt.sh
@@ -1,6 +1,6 @@
 #!/bin/sh
 # Generate c6x-tables.opt from the lists in *.def.
-# Copyright (C) 2011-2024 Free Software Foundation, Inc.
+# Copyright (C) 2011-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
@@ -22,7 +22,7 @@ cat <<EOF
 ; -*- buffer-read-only: t -*-
 ; Generated automatically by genopt.sh from c6x-isas.def.
 ;
-; Copyright (C) 2011-2024 Free Software Foundation, Inc.
+; Copyright (C) 2011-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/c6x/gensched.sh b/gcc/config/c6x/gensched.sh
index f78aef80..481e9c1 100644
--- a/gcc/config/c6x/gensched.sh
+++ b/gcc/config/c6x/gensched.sh
@@ -2,7 +2,7 @@
 # Generate c6x-sched.md from c6x-sched.md.in
 # The input file is passed as an argument.
 
-# Copyright (C) 2010-2024 Free Software Foundation, Inc.
+# Copyright (C) 2010-2025 Free Software Foundation, Inc.
 
 #This file is part of GCC.
 
diff --git a/gcc/config/c6x/predicates.md b/gcc/config/c6x/predicates.md
index aed2d50..b9b199f 100644
--- a/gcc/config/c6x/predicates.md
+++ b/gcc/config/c6x/predicates.md
@@ -1,5 +1,5 @@
 /* Predicates for TI C6X
-   Copyright (C) 2010-2024 Free Software Foundation, Inc.
+   Copyright (C) 2010-2025 Free Software Foundation, Inc.
    Contributed by Andrew Jenner <andrew@codesourcery.com>
    Contributed by Bernd Schmidt <bernds@codesourcery.com>
 
diff --git a/gcc/config/c6x/sync.md b/gcc/config/c6x/sync.md
index c390358..e2e3571 100644
--- a/gcc/config/c6x/sync.md
+++ b/gcc/config/c6x/sync.md
@@ -1,5 +1,5 @@
 ;; GCC machine description for C6X synchronization instructions.
-;; Copyright (C) 2011-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2011-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/c6x/t-c6x b/gcc/config/c6x/t-c6x
index 73243ac..7372d41 100644
--- a/gcc/config/c6x/t-c6x
+++ b/gcc/config/c6x/t-c6x
@@ -1,5 +1,5 @@
 # Target Makefile Fragment for TI C6X.
-# Copyright (C) 2010-2024 Free Software Foundation, Inc.
+# Copyright (C) 2010-2025 Free Software Foundation, Inc.
 # Contributed by CodeSourcery.
 #
 # This file is part of GCC.
diff --git a/gcc/config/c6x/t-c6x-elf b/gcc/config/c6x/t-c6x-elf
index 9cd562a..49db81a 100644
--- a/gcc/config/c6x/t-c6x-elf
+++ b/gcc/config/c6x/t-c6x-elf
@@ -1,5 +1,5 @@
 # Target Makefile Fragment for TI C6X using ELF.
-# Copyright (C) 2010-2024 Free Software Foundation, Inc.
+# Copyright (C) 2010-2025 Free Software Foundation, Inc.
 # Contributed by CodeSourcery.
 #
 # This file is part of GCC.
diff --git a/gcc/config/c6x/uclinux-elf.h b/gcc/config/c6x/uclinux-elf.h
index cdd3169..df38435 100644
--- a/gcc/config/c6x/uclinux-elf.h
+++ b/gcc/config/c6x/uclinux-elf.h
@@ -1,5 +1,5 @@
 /* Definitions for TI C6X running ucLinux using ELF
-   Copyright (C) 2010-2024 Free Software Foundation, Inc.
+   Copyright (C) 2010-2025 Free Software Foundation, Inc.
    Contributed by Andrew Jenner <andrew@codesourcery.com>
    Contributed by Bernd Schmidt <bernds@codesourcery.com>
 
diff --git a/gcc/config/cris/constraints.md b/gcc/config/cris/constraints.md
index eea2fb2..287d476 100644
--- a/gcc/config/cris/constraints.md
+++ b/gcc/config/cris/constraints.md
@@ -1,5 +1,5 @@
 ;; Constraint definitions for CRIS.
-;; Copyright (C) 2011-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2011-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/cris/cris-modes.def b/gcc/config/cris/cris-modes.def
index d6421ee..6966f75 100644
--- a/gcc/config/cris/cris-modes.def
+++ b/gcc/config/cris/cris-modes.def
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler, for CRIS.
-   Copyright (C) 2002-2024 Free Software Foundation, Inc.
+   Copyright (C) 2002-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/cris/cris-passes.def b/gcc/config/cris/cris-passes.def
index 9b663eb..14b0bbc 100644
--- a/gcc/config/cris/cris-passes.def
+++ b/gcc/config/cris/cris-passes.def
@@ -1,5 +1,5 @@
 /* Description of target passes for Visium.
-   Copyright (C) 2020-2024 Free Software Foundation, Inc.
+   Copyright (C) 2020-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/cris/cris-protos.h b/gcc/config/cris/cris-protos.h
index 40ed947..09b5eef 100644
--- a/gcc/config/cris/cris-protos.h
+++ b/gcc/config/cris/cris-protos.h
@@ -1,5 +1,5 @@
 /* Definitions for GCC.  Part of the machine description for CRIS.
-   Copyright (C) 1998-2024 Free Software Foundation, Inc.
+   Copyright (C) 1998-2025 Free Software Foundation, Inc.
    Contributed by Axis Communications.
 
 This file is part of GCC.
diff --git a/gcc/config/cris/cris.cc b/gcc/config/cris/cris.cc
index 617fc0a..a34c9e9 100644
--- a/gcc/config/cris/cris.cc
+++ b/gcc/config/cris/cris.cc
@@ -1,5 +1,5 @@
 /* Definitions for GCC.  Part of the machine description for CRIS.
-   Copyright (C) 1998-2024 Free Software Foundation, Inc.
+   Copyright (C) 1998-2025 Free Software Foundation, Inc.
    Contributed by Axis Communications.  Written by Hans-Peter Nilsson.
 
 This file is part of GCC.
@@ -2279,7 +2279,7 @@ cris_side_effect_mode_ok (enum rtx_code code, rtx *ops,
 /* Queue an .ident string in the queue of top-level asm statements.
    If the front-end is done, we must be being called from toplev.cc.
    In that case, do nothing.  */
-void 
+void
 cris_asm_output_ident (const char *string)
 {
   if (symtab->state != PARSING)
@@ -2692,8 +2692,7 @@ cris_split_movdx (rtx *operands)
   else
     internal_error ("unknown dest");
 
-  val = get_insns ();
-  end_sequence ();
+  val = end_sequence ();
   return val;
 }
 
@@ -3597,7 +3596,7 @@ cris_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
   if (for_return == 1)
     return mode;
   return CRIS_PROMOTED_MODE (mode, *punsignedp, type);
-} 
+}
 
 /* Atomic types require alignment to be at least their "natural" size.  */
 
diff --git a/gcc/config/cris/cris.h b/gcc/config/cris/cris.h
index 028da93..1681c79 100644
--- a/gcc/config/cris/cris.h
+++ b/gcc/config/cris/cris.h
@@ -1,5 +1,5 @@
 /* Definitions for GCC.  Part of the machine description for CRIS.
-   Copyright (C) 1998-2024 Free Software Foundation, Inc.
+   Copyright (C) 1998-2025 Free Software Foundation, Inc.
    Contributed by Axis Communications.  Written by Hans-Peter Nilsson.
 
 This file is part of GCC.
diff --git a/gcc/config/cris/cris.md b/gcc/config/cris/cris.md
index c15395b..8fe79f5 100644
--- a/gcc/config/cris/cris.md
+++ b/gcc/config/cris/cris.md
@@ -1,5 +1,5 @@
 ;; GCC machine description for CRIS cpu cores.
-;; Copyright (C) 1998-2024 Free Software Foundation, Inc.
+;; Copyright (C) 1998-2025 Free Software Foundation, Inc.
 ;; Contributed by Axis Communications.
 
 ;; This file is part of GCC.
@@ -539,8 +539,7 @@
 		      operand_subword (op1, 0, 1, DImode));
       emit_move_insn (operand_subword (op0, 1, 1, DImode),
 		      operand_subword (op1, 1, 1, DImode));
-      insns = get_insns ();
-      end_sequence ();
+      insns = end_sequence ();
 
       emit_insn (insns);
       DONE;
@@ -2418,7 +2417,7 @@
 		      (pc)))]
   "reload_completed"
 {
-  return <MODE>mode == CC_NZmode ? "b<oCC> %l0%#": "b<CC> %l0%#";
+  return <MODE>mode == CC_NZmode ? "b<oCC> %l0%#" : "b<CC> %l0%#";
 }
   [(set_attr "slottable" "has_slot")])
 
@@ -3024,6 +3023,7 @@
 ;; Re-compose a decomposed "indirect offset" address for a szext
 ;; operation.  The non-clobbering "addi" is generated by LRA.
 ;; This and lra_szext_decomposed is covered by cris/rld-legit1.c.
+;; (Unfortunately not true when enabling late-combine.)
 (define_peephole2 ; lra_szext_decomposed_indirect_with_offset
   [(parallel
     [(set (match_operand:SI 0 "register_operand")
@@ -3046,6 +3046,50 @@
        (mem:BW2 (plus:SI (szext:SI (mem:BW (match_dup 1))) (match_dup 2)))))
      (clobber (reg:CC CRIS_CC0_REGNUM))])])
 
+;; When enabling late-combine, we get a slightly changed register
+;; allocation.  The two allocations for the pseudo-registers involved
+;; in the matching pattern get "swapped" and the (plus ...) in the
+;; pattern above is now a load from a stack-slot.  If peephole2 is
+;; disabled, we see that the original sequence is actually improved;
+;; one less incoming instruction, a load.  We need to "undo" that
+;; improvement a bit and move that load "back" to before the sequence
+;; we combine in lra_szext_decomposed_indirect_with_offset.  But that
+;; changed again, so there's no define_peephole2 for that sequence
+;; here, because it'd be hard or impossible to write a matching
+;; test-case.  A few commits later, the incoming pattern sequence has
+;; changed again: back to the original but with the (plus...) part of
+;; the address inside the second memory reference.
+;; Coverage: cris/rld-legit1.c@r15-1880-gce34fcc572a0dc or
+;; r15-3386-gaf1500dd8c00 when adding -flate-combine-instructions.
+
+(define_peephole2 ; lra_szext_decomposed_indir_plus
+  [(parallel
+    [(set (match_operand:SI 0 "register_operand")
+	  (sign_extend:SI (mem:BW (match_operand:SI 1 "register_operand"))))
+     (clobber (reg:CC CRIS_CC0_REGNUM))])
+   (parallel
+    [(set (match_operand:SI 3 "register_operand")
+	  (szext:SI (mem:BW2 (plus:SI
+			      (match_operand:SI 4 "register_operand")
+			      (match_operand:SI 2 "register_operand")))))
+     (clobber (reg:CC CRIS_CC0_REGNUM))])]
+  "(REGNO (operands[0]) == REGNO (operands[3])
+    || peep2_reg_dead_p (3, operands[0]))
+   && (REGNO (operands[0]) == REGNO (operands[1])
+       || peep2_reg_dead_p (3, operands[0]))
+   && (rtx_equal_p (operands[2], operands[0])
+       || rtx_equal_p (operands[4], operands[0]))"
+  [(parallel
+    [(set
+      (match_dup 3)
+      (szext:SI
+       (mem:BW2 (plus:SI (szext:SI (mem:BW (match_dup 1))) (match_dup 2)))))
+     (clobber (reg:CC CRIS_CC0_REGNUM))])]
+{
+  if (! rtx_equal_p (operands[4], operands[0]))
+    operands[2] = operands[4];
+})
+
 ;; Add operations with similar or same decomposed addresses here, when
 ;; encountered - but only when covered by mentioned test-cases for at
 ;; least one of the cases generalized in the pattern.
diff --git a/gcc/config/cris/cris.opt b/gcc/config/cris/cris.opt
index 309a57a..9fa9cbf 100644
--- a/gcc/config/cris/cris.opt
+++ b/gcc/config/cris/cris.opt
@@ -1,6 +1,6 @@
 ; Options for the CRIS port of the compiler.
 
-; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/cris/elf.opt b/gcc/config/cris/elf.opt
index 2b3978b..0960ab4 100644
--- a/gcc/config/cris/elf.opt
+++ b/gcc/config/cris/elf.opt
@@ -1,6 +1,6 @@
 ; ELF-specific options for the CRIS port of the compiler.
 
-; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/cris/predicates.md b/gcc/config/cris/predicates.md
index c196177..0af70e0 100644
--- a/gcc/config/cris/predicates.md
+++ b/gcc/config/cris/predicates.md
@@ -1,5 +1,5 @@
 ;; Operand and operator predicates for the GCC CRIS port.
-;; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/cris/sync.md b/gcc/config/cris/sync.md
index c2bc9e2..faf5f22 100644
--- a/gcc/config/cris/sync.md
+++ b/gcc/config/cris/sync.md
@@ -1,5 +1,5 @@
 ;; GCC machine description for CRIS atomic memory sequences.
-;; Copyright (C) 2012-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2012-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/cris/t-cris b/gcc/config/cris/t-cris
index 846ebb2..1483da9 100644
--- a/gcc/config/cris/t-cris
+++ b/gcc/config/cris/t-cris
@@ -3,7 +3,7 @@
 #
 # The Makefile fragment to include when compiling gcc et al for CRIS.
 #
-# Copyright (C) 2001-2024 Free Software Foundation, Inc.
+# Copyright (C) 2001-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/cris/t-elfmulti b/gcc/config/cris/t-elfmulti
index 77361f5..d36d10e 100644
--- a/gcc/config/cris/t-elfmulti
+++ b/gcc/config/cris/t-elfmulti
@@ -1,4 +1,4 @@
-# Copyright (C) 2001-2024 Free Software Foundation, Inc.
+# Copyright (C) 2001-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/csky/constraints.md b/gcc/config/csky/constraints.md
index fac859c..bc61d2c 100644
--- a/gcc/config/csky/constraints.md
+++ b/gcc/config/csky/constraints.md
@@ -1,5 +1,5 @@
 ;; Constraints for C-SKY.
-;; Copyright (C) 2018-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2018-2025 Free Software Foundation, Inc.
 ;; Contributed by C-SKY Microsystems and Mentor Graphics.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/csky/csky-elf.h b/gcc/config/csky/csky-elf.h
index 8e77e92..dc00996 100644
--- a/gcc/config/csky/csky-elf.h
+++ b/gcc/config/csky/csky-elf.h
@@ -1,5 +1,5 @@
 /* Declarations for bare-metal C-SKY targets.
-   Copyright (C) 2018-2024 Free Software Foundation, Inc.
+   Copyright (C) 2018-2025 Free Software Foundation, Inc.
    Contributed by C-SKY Microsystems and Mentor Graphics.
 
    This file is part of GCC.
diff --git a/gcc/config/csky/csky-linux-elf.h b/gcc/config/csky/csky-linux-elf.h
index 7b12492..f1606b0 100644
--- a/gcc/config/csky/csky-linux-elf.h
+++ b/gcc/config/csky/csky-linux-elf.h
@@ -1,5 +1,5 @@
 /* Declarations for C-SKY targets running Linux.
-   Copyright (C) 2018-2024 Free Software Foundation, Inc.
+   Copyright (C) 2018-2025 Free Software Foundation, Inc.
    Contributed by C-SKY Microsystems and Mentor Graphics.
 
    This file is part of GCC.
diff --git a/gcc/config/csky/csky-modes.def b/gcc/config/csky/csky-modes.def
index 8f35523..b92c423 100644
--- a/gcc/config/csky/csky-modes.def
+++ b/gcc/config/csky/csky-modes.def
@@ -1,5 +1,5 @@
 /* C-SKY extra machine modes.
-   Copyright (C) 2018-2024 Free Software Foundation, Inc.
+   Copyright (C) 2018-2025 Free Software Foundation, Inc.
    Contributed by C-SKY Microsystems and Mentor Graphics.
 
    This file is part of GCC.
diff --git a/gcc/config/csky/csky-protos.h b/gcc/config/csky/csky-protos.h
index b6282a3..d081be6 100644
--- a/gcc/config/csky/csky-protos.h
+++ b/gcc/config/csky/csky-protos.h
@@ -1,5 +1,5 @@
 /* Prototype declarations for the C-SKY back end.
-   Copyright (C) 2018-2024 Free Software Foundation, Inc.
+   Copyright (C) 2018-2025 Free Software Foundation, Inc.
    Contributed by C-SKY Microsystems and Mentor Graphics.
 
    This file is part of GCC.
diff --git a/gcc/config/csky/csky.cc b/gcc/config/csky/csky.cc
index 72619ea..9888af1 100644
--- a/gcc/config/csky/csky.cc
+++ b/gcc/config/csky/csky.cc
@@ -1,5 +1,5 @@
 /* GCC backend functions for C-SKY targets.
-   Copyright (C) 2018-2024 Free Software Foundation, Inc.
+   Copyright (C) 2018-2025 Free Software Foundation, Inc.
    Contributed by C-SKY Microsystems and Mentor Graphics.
 
    This file is part of GCC.
@@ -2541,7 +2541,7 @@ csky_configure_build_target (struct csky_build_target *target,
 	 between different cpu & arch, should based on arch.  */
       if (csky_selected_arch
 	  && (csky_selected_cpu->base_arch != csky_selected_arch->base_arch))
-	warning (0, "cpu %s is not based on arch %s, ignoring the arch",
+	warning (0, "cpu %qs is not based on arch %qs, ignoring the arch",
 		 csky_selected_cpu->name, csky_selected_arch->name);
       if (!csky_selected_arch)
 	csky_selected_arch = &all_architectures[csky_selected_cpu->base_arch];
@@ -2607,7 +2607,7 @@ csky_option_override (void)
 		   || CSKY_TARGET_ARCH (CK860)))
     {
       flag_pic = 0;
-      warning (0, "%qs is not supported by arch %s",
+      warning (0, "%qs is not supported by arch %qs",
 	       "-fPIC", csky_active_target.arch_pp_name);
     }
 
@@ -2645,7 +2645,7 @@ csky_option_override (void)
 	}
 
       if (CSKY_TARGET_ARCH (CK801) || CSKY_TARGET_ARCH (CK802))
-	error ("%qs is not supported by arch %s",
+	error ("%qs is not supported by arch %qs",
 	       "-mhard-float", csky_active_target.arch_pp_name);
       else if (csky_fpu_index == TARGET_FPU_auto)
 	error ("%<-mhard-float%> is not supported by the selected CPU");
@@ -2710,7 +2710,7 @@ csky_option_override (void)
      pools are placed.  */
   if ((CSKY_TARGET_ARCH (CK801) || CSKY_TARGET_ARCH (CK802))
       && !TARGET_CONSTANT_POOL)
-    error ("%qs is not supported by arch %s",
+    error ("%qs is not supported by arch %qs",
 	   "-mno-constpool", csky_active_target.arch_pp_name);
   else if (TARGET_CONSTANT_POOL == -1)
     TARGET_CONSTANT_POOL = (CSKY_TARGET_ARCH (CK801)
@@ -2733,7 +2733,7 @@ csky_option_override (void)
       else if (TARGET_MINI_REGISTERS == -1)
 	TARGET_MINI_REGISTERS = 0;
       if (TARGET_HIGH_REGISTERS > 0)
-	warning (0, "%qs is not supported by arch %s",
+	warning (0, "%qs is not supported by arch %qs",
 		 "-mhigh-registers", csky_active_target.arch_pp_name);
       TARGET_HIGH_REGISTERS = 0;
     }
@@ -2750,7 +2750,7 @@ csky_option_override (void)
   if (CSKY_TARGET_ARCH (CK801))
     {
       if (TARGET_MULTIPLE_STLD > 0)
-	warning (0, "%qs is not supported by arch %s",
+	warning (0, "%qs is not supported by arch %qs",
 		 "-mmultiple-stld", csky_active_target.arch_pp_name);
       TARGET_MULTIPLE_STLD = 0;
     }
@@ -2899,8 +2899,7 @@ csky_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
   *valuep = emit_library_call_value (get_tls_get_addr (),
 				     NULL_RTX, LCT_PURE, /* LCT_CONST?	*/
 				     Pmode, reg, Pmode);
-  insns = get_insns ();
-  end_sequence ();
+  insns = end_sequence ();
   return insns;
 }
 
diff --git a/gcc/config/csky/csky.h b/gcc/config/csky/csky.h
index 757fec5..0af8947 100644
--- a/gcc/config/csky/csky.h
+++ b/gcc/config/csky/csky.h
@@ -1,5 +1,5 @@
 /* Declarations for the C-SKY back end.
-   Copyright (C) 2018-2024 Free Software Foundation, Inc.
+   Copyright (C) 2018-2025 Free Software Foundation, Inc.
    Contributed by C-SKY Microsystems and Mentor Graphics.
 
    This file is part of GCC.
diff --git a/gcc/config/csky/csky.md b/gcc/config/csky/csky.md
index ee20118..b3e3f7b 100644
--- a/gcc/config/csky/csky.md
+++ b/gcc/config/csky/csky.md
@@ -1,5 +1,5 @@
 ;; Machine description for C-SKY processors.
-;; Copyright (C) 2018-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2018-2025 Free Software Foundation, Inc.
 ;; Contributed by C-SKY Microsystems and Mentor Graphics.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/csky/csky.opt b/gcc/config/csky/csky.opt
index 50a2623..ea7cea6 100644
--- a/gcc/config/csky/csky.opt
+++ b/gcc/config/csky/csky.opt
@@ -1,5 +1,5 @@
 ;; Command-line options for the C-SKY back end.
-;; Copyright (C) 2018-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2018-2025 Free Software Foundation, Inc.
 ;; Contributed by C-SKY Microsystems and Mentor Graphics.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/csky/csky_cores.def b/gcc/config/csky/csky_cores.def
index eaffa5b..9d6d2ac 100644
--- a/gcc/config/csky/csky_cores.def
+++ b/gcc/config/csky/csky_cores.def
@@ -1,5 +1,5 @@
 /* Architecture and core descriptions for the C-SKY back end.
-   Copyright (C) 2018-2024 Free Software Foundation, Inc.
+   Copyright (C) 2018-2025 Free Software Foundation, Inc.
    Contributed by C-SKY Microsystems and Mentor Graphics.
 
    This file is part of GCC.
diff --git a/gcc/config/csky/csky_genopt.sh b/gcc/config/csky/csky_genopt.sh
index f684f1e..f67cedf 100644
--- a/gcc/config/csky/csky_genopt.sh
+++ b/gcc/config/csky/csky_genopt.sh
@@ -1,6 +1,6 @@
 #!/bin/sh
 # Generate csky_tables.opt from the lists in *.def.
-# Copyright (C) 2018-2024 Free Software Foundation, Inc.
+# Copyright (C) 2018-2025 Free Software Foundation, Inc.
 # Contributed by C-SKY Microsystems and Mentor Graphics.
 #
 # This file is part of GCC.
@@ -23,7 +23,7 @@ cat <<EOF
 ; -*- buffer-read-only: t -*-
 ; Generated automatically by csky_genopt.sh from csky_cores.def.
 
-; Copyright (C) 2018-2024 Free Software Foundation, Inc.
+; Copyright (C) 2018-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/csky/csky_insn_dsp.md b/gcc/config/csky/csky_insn_dsp.md
index ddea157..ad21c74 100644
--- a/gcc/config/csky/csky_insn_dsp.md
+++ b/gcc/config/csky/csky_insn_dsp.md
@@ -1,5 +1,5 @@
 ;; C-SKY DSP instruction descriptions.
-;; Copyright (C) 2018-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2018-2025 Free Software Foundation, Inc.
 ;; Contributed by C-SKY Microsystems and Mentor Graphics.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/csky/csky_insn_fpu.md b/gcc/config/csky/csky_insn_fpu.md
index afaa25f..1c5bde6 100644
--- a/gcc/config/csky/csky_insn_fpu.md
+++ b/gcc/config/csky/csky_insn_fpu.md
@@ -1,5 +1,5 @@
 ;; C-SKY FPU instruction descriptions.
-;; Copyright (C) 2018-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2018-2025 Free Software Foundation, Inc.
 ;; Contributed by C-SKY Microsystems and Mentor Graphics.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/csky/csky_insn_fpuv2.md b/gcc/config/csky/csky_insn_fpuv2.md
index 2de11ef..cec9338 100644
--- a/gcc/config/csky/csky_insn_fpuv2.md
+++ b/gcc/config/csky/csky_insn_fpuv2.md
@@ -1,5 +1,5 @@
 ;; C-SKY FPUV2 instruction descriptions.
-;; Copyright (C) 2018-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2018-2025 Free Software Foundation, Inc.
 ;; Contributed by C-SKY Microsystems and Mentor Graphics.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/csky/csky_insn_fpuv3.md b/gcc/config/csky/csky_insn_fpuv3.md
index f4645d8..7d69e9f 100644
--- a/gcc/config/csky/csky_insn_fpuv3.md
+++ b/gcc/config/csky/csky_insn_fpuv3.md
@@ -1,5 +1,5 @@
 ;; C-SKY FPUV3 instruction descriptions.
-;; Copyright (C) 2018-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2018-2025 Free Software Foundation, Inc.
 ;; Contributed by C-SKY Microsystems and Mentor Graphics.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/csky/csky_isa.def b/gcc/config/csky/csky_isa.def
index b2b9ee7..81adb91 100644
--- a/gcc/config/csky/csky_isa.def
+++ b/gcc/config/csky/csky_isa.def
@@ -1,5 +1,5 @@
 /* ISA feature descriptions for the C-SKY back end.
-   Copyright (C) 2018-2024 Free Software Foundation, Inc.
+   Copyright (C) 2018-2025 Free Software Foundation, Inc.
    Contributed by C-SKY Microsystems and Mentor Graphics.
 
    This file is part of GCC.
diff --git a/gcc/config/csky/csky_isa.h b/gcc/config/csky/csky_isa.h
index c49dd43..5f384ed 100644
--- a/gcc/config/csky/csky_isa.h
+++ b/gcc/config/csky/csky_isa.h
@@ -1,5 +1,5 @@
 /* ISA feature enumerations for C-SKY targets.
-   Copyright (C) 2018-2024 Free Software Foundation, Inc.
+   Copyright (C) 2018-2025 Free Software Foundation, Inc.
    Contributed by C-SKY Microsystems and Mentor Graphics.
 
    This file is part of GCC.
diff --git a/gcc/config/csky/csky_opts.h b/gcc/config/csky/csky_opts.h
index 7aa8aff..e57895c 100644
--- a/gcc/config/csky/csky_opts.h
+++ b/gcc/config/csky/csky_opts.h
@@ -1,5 +1,5 @@
 /* Processor and arch enumerations for C-SKY targets.
-   Copyright (C) 2018-2024 Free Software Foundation, Inc.
+   Copyright (C) 2018-2025 Free Software Foundation, Inc.
    Contributed by C-SKY Microsystems and Mentor Graphics.
 
    This file is part of GCC.
diff --git a/gcc/config/csky/csky_pipeline_ck801.md b/gcc/config/csky/csky_pipeline_ck801.md
index 10b29ae..c3dcd5b 100644
--- a/gcc/config/csky/csky_pipeline_ck801.md
+++ b/gcc/config/csky/csky_pipeline_ck801.md
@@ -1,5 +1,5 @@
 ;; Scheduler information for C-SKY CK801 processors.
-;; Copyright (C) 2018-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2018-2025 Free Software Foundation, Inc.
 ;; Contributed by C-SKY Microsystems and Mentor Graphics.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/csky/csky_pipeline_ck802.md b/gcc/config/csky/csky_pipeline_ck802.md
index 550c0d6..c59fec4 100644
--- a/gcc/config/csky/csky_pipeline_ck802.md
+++ b/gcc/config/csky/csky_pipeline_ck802.md
@@ -1,5 +1,5 @@
 ;; Instruction scheduling information for C-SKY CK802 processors.
-;; Copyright (C) 2018-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2018-2025 Free Software Foundation, Inc.
 ;; Contributed by C-SKY Microsystems and Mentor Graphics.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/csky/csky_pipeline_ck803.md b/gcc/config/csky/csky_pipeline_ck803.md
index 31d7786..692a713 100644
--- a/gcc/config/csky/csky_pipeline_ck803.md
+++ b/gcc/config/csky/csky_pipeline_ck803.md
@@ -1,5 +1,5 @@
 ;; Scheduler information for C-SKY CK803 processors.
-;; Copyright (C) 2018-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2018-2025 Free Software Foundation, Inc.
 ;; Contributed by C-SKY Microsystems and Mentor Graphics.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/csky/csky_pipeline_ck810.md b/gcc/config/csky/csky_pipeline_ck810.md
index 0686ada..3c6832b 100644
--- a/gcc/config/csky/csky_pipeline_ck810.md
+++ b/gcc/config/csky/csky_pipeline_ck810.md
@@ -1,5 +1,5 @@
 ;; Instruction scheduling information for C-SKY CK810 processors.
-;; Copyright (C) 2018-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2018-2025 Free Software Foundation, Inc.
 ;; Contributed by C-SKY Microsystems and Mentor Graphics.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/csky/csky_tables.opt b/gcc/config/csky/csky_tables.opt
index deb1771..cf09d27 100644
--- a/gcc/config/csky/csky_tables.opt
+++ b/gcc/config/csky/csky_tables.opt
@@ -1,7 +1,7 @@
 ; -*- buffer-read-only: t -*-
 ; Generated automatically by csky_genopt.sh from csky_cores.def.
 
-; Copyright (C) 2018-2024 Free Software Foundation, Inc.
+; Copyright (C) 2018-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/csky/predicates.md b/gcc/config/csky/predicates.md
index 79cc921..2e4fc59 100644
--- a/gcc/config/csky/predicates.md
+++ b/gcc/config/csky/predicates.md
@@ -1,5 +1,5 @@
 ;; Predicates for C-SKY.
-;; Copyright (C) 2018-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2018-2025 Free Software Foundation, Inc.
 ;; Contributed by C-SKY Microsystems and Mentor Graphics.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/csky/t-csky b/gcc/config/csky/t-csky
index b0249cd..a300570 100644
--- a/gcc/config/csky/t-csky
+++ b/gcc/config/csky/t-csky
@@ -1,6 +1,6 @@
 # Make rules for all C-SKY targets.
 #
-# Copyright (C) 2018-2024 Free Software Foundation, Inc.
+# Copyright (C) 2018-2025 Free Software Foundation, Inc.
 # Contributed by C-SKY Microsystems and Mentor Graphics.
 #
 # This file is part of GCC.
diff --git a/gcc/config/csky/t-csky-elf b/gcc/config/csky/t-csky-elf
index 02e17b8..b082f51 100644
--- a/gcc/config/csky/t-csky-elf
+++ b/gcc/config/csky/t-csky-elf
@@ -1,6 +1,6 @@
 # Multilib configuration for csky*-elf.
 #
-# Copyright (C) 2018-2024 Free Software Foundation, Inc.
+# Copyright (C) 2018-2025 Free Software Foundation, Inc.
 # Contributed by C-SKY Microsystems and Mentor Graphics.
 #
 # This file is part of GCC.
diff --git a/gcc/config/csky/t-csky-linux b/gcc/config/csky/t-csky-linux
index 8df4d2c..62dca5c 100644
--- a/gcc/config/csky/t-csky-linux
+++ b/gcc/config/csky/t-csky-linux
@@ -1,6 +1,6 @@
 # Multilib configuration for csky*-linux-*.
 #
-# Copyright (C) 2018-2024 Free Software Foundation, Inc.
+# Copyright (C) 2018-2025 Free Software Foundation, Inc.
 # Contributed by C-SKY Microsystems and Mentor Graphics.
 #
 # This file is part of GCC.
diff --git a/gcc/config/darwin-c.cc b/gcc/config/darwin-c.cc
index aaec1c3..7257015 100644
--- a/gcc/config/darwin-c.cc
+++ b/gcc/config/darwin-c.cc
@@ -1,5 +1,5 @@
 /* Darwin support needed only by C/C++ frontends.
-   Copyright (C) 2001-2024 Free Software Foundation, Inc.
+   Copyright (C) 2001-2025 Free Software Foundation, Inc.
    Contributed by Apple Computer Inc.
 
 This file is part of GCC.
@@ -733,7 +733,7 @@ darwin_cpp_builtins (cpp_reader *pfile)
 
   /* Since we do not (at 4.6) support ObjC gc for the NeXT runtime, the
      following will cause a syntax error if one tries to compile gc attributed
-     items.  However, without this, NeXT system headers cannot be parsed 
+     items.  However, without this, NeXT system headers cannot be parsed
      properly (on systems >= darwin 9).  */
   if (flag_objc_gc)
     {
@@ -805,24 +805,24 @@ darwin_cfstring_ref_p (const_tree strp)
     return false;
 
   tn = TYPE_NAME (strp);
-  if (tn) 
+  if (tn)
     tn = DECL_NAME (tn);
-  return (tn 
+  return (tn
 	  && IDENTIFIER_POINTER (tn)
 	  && startswith (IDENTIFIER_POINTER (tn), "CFStringRef"));
 }
 
 /* At present the behavior of this is undefined and it does nothing.  */
 static void
-darwin_check_cfstring_format_arg (tree ARG_UNUSED (format_arg), 
+darwin_check_cfstring_format_arg (tree ARG_UNUSED (format_arg),
 				  tree ARG_UNUSED (args_list))
 {
 }
 
 /* The extra format types we recognize.  */
 EXPORTED_CONST format_kind_info darwin_additional_format_types[] = {
-  { "CFString",   NULL,  NULL, NULL, NULL, 
-    NULL, NULL, 
+  { "CFString",   NULL,  NULL, NULL, NULL,
+    NULL, NULL,
     FMT_FLAG_ARG_CONVERT|FMT_FLAG_PARSE_ARG_CONVERT_EXTERNAL, 0, 0, 0, 0, 0, 0,
     NULL, NULL
   }
diff --git a/gcc/config/darwin-d.cc b/gcc/config/darwin-d.cc
index 95020aa..e23a6cd 100644
--- a/gcc/config/darwin-d.cc
+++ b/gcc/config/darwin-d.cc
@@ -1,5 +1,5 @@
 /* Darwin support needed only by D front-end.
-   Copyright (C) 2020-2024 Free Software Foundation, Inc.
+   Copyright (C) 2020-2025 Free Software Foundation, Inc.
 
 GCC is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
diff --git a/gcc/config/darwin-driver.cc b/gcc/config/darwin-driver.cc
index eabe9bc..224e0a0 100644
--- a/gcc/config/darwin-driver.cc
+++ b/gcc/config/darwin-driver.cc
@@ -1,5 +1,5 @@
 /* Additional functions for the GCC driver on Darwin native.
-   Copyright (C) 2006-2024 Free Software Foundation, Inc.
+   Copyright (C) 2006-2025 Free Software Foundation, Inc.
    Contributed by Apple Computer Inc.
 
 This file is part of GCC.
@@ -191,8 +191,8 @@ darwin_find_version_from_kernel (void)
 
 /* When running on a Darwin system and using that system's headers and
    libraries, default the -mmacosx-version-min flag to be the version
-   of the system on which the compiler is running.  
-   
+   of the system on which the compiler is running.
+
    When building cross or native cross compilers, default to the OSX
    version of the target (as provided by the most specific target header
    included in tm.h).  This may be overidden by setting the flag explicitly
@@ -287,7 +287,7 @@ darwin_driver_init (unsigned int *decoded_options_count,
 	case OPT_arch:
 	  /* Support provision of a single -arch xxxx flag as a means of
 	     specifying the sub-target/multi-lib.  Translate this into -m32/64
-	     as appropriate.  */  
+	     as appropriate.  */
 	  if (!strcmp ((*decoded_options)[i].arg, "i386"))
 	    seenX86 = true;
 	  else if (!strcmp ((*decoded_options)[i].arg, "x86_64"))
@@ -307,7 +307,7 @@ darwin_driver_init (unsigned int *decoded_options_count,
 		      * sizeof (struct cl_decoded_option)));
 	  }
 	  --i;
-	  --*decoded_options_count; 
+	  --*decoded_options_count;
 	  break;
 
 	case OPT_m32:
@@ -351,6 +351,16 @@ darwin_driver_init (unsigned int *decoded_options_count,
 	    noexport_p = false;
 	  break;
 
+	case OPT_ObjC:
+	  (*decoded_options)[i].opt_index = OPT_x;
+	  (*decoded_options)[i].arg = "objective-c";
+	  break;
+
+	case OPT_ObjC__:
+	  (*decoded_options)[i].opt_index = OPT_x;
+	  (*decoded_options)[i].arg = "objective-c++";
+	  break;
+
 	default:
 	  break;
 	}
@@ -370,7 +380,7 @@ darwin_driver_init (unsigned int *decoded_options_count,
     {
       if (seenX86_64 || seenM64)
 	{
-	  const char *op = (seenX86_64? "-arch x86_64": "-m64");
+	  const char *op = (seenX86_64 ? "-arch x86_64" : "-m64");
 	  warning (0, "%qs conflicts with %<-arch i386%> (%qs ignored)",
 		   op, op);
 	}
@@ -384,7 +394,7 @@ darwin_driver_init (unsigned int *decoded_options_count,
 		    " (%<-m32%> ignored)");
       if (! seenM64) /* Add -m64 if the User didn't. */
 	appendM64 = true;
-    }  
+    }
 #elif DARWIN_PPC
   if (seenX86 || seenX86_64)
     warning (0, "this compiler does not support x86"
@@ -393,7 +403,7 @@ darwin_driver_init (unsigned int *decoded_options_count,
     {
       if (seenPPC64 || seenM64)
 	{
-	  const char *op = (seenPPC64? "-arch ppc64": "-m64");
+	  const char *op = (seenPPC64 ? "-arch ppc64" : "-m64");
 	  warning (0, "%qs conflicts with %<-arch ppc%> (%qs ignored)",
 		   op, op);
 	}
diff --git a/gcc/config/darwin-f.cc b/gcc/config/darwin-f.cc
index 5178889..cef9c38 100644
--- a/gcc/config/darwin-f.cc
+++ b/gcc/config/darwin-f.cc
@@ -1,5 +1,5 @@
 /* Darwin support needed only by Fortran frontends.
-   Copyright (C) 2008-2024 Free Software Foundation, Inc.
+   Copyright (C) 2008-2025 Free Software Foundation, Inc.
    Contributed by Daniel Franke.
 
 This file is part of GCC.
diff --git a/gcc/config/darwin-ppc-ldouble-patch.def b/gcc/config/darwin-ppc-ldouble-patch.def
index 8320db5..3f2d319 100644
--- a/gcc/config/darwin-ppc-ldouble-patch.def
+++ b/gcc/config/darwin-ppc-ldouble-patch.def
@@ -1,4 +1,4 @@
-/* Copyright (C) 2008-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2008-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/darwin-protos.h b/gcc/config/darwin-protos.h
index b67e052..16ad30f 100644
--- a/gcc/config/darwin-protos.h
+++ b/gcc/config/darwin-protos.h
@@ -1,5 +1,5 @@
 /* Prototypes.
-   Copyright (C) 2001-2024 Free Software Foundation, Inc.
+   Copyright (C) 2001-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -59,7 +59,7 @@ extern void darwin_set_default_type_attributes (tree);
 extern int machopic_reloc_rw_mask (void);
 extern section *machopic_select_section (tree, int, unsigned HOST_WIDE_INT);
 
-extern section *darwin_function_section (tree, enum node_frequency, bool, bool); 
+extern section *darwin_function_section (tree, enum node_frequency, bool, bool);
 extern section *darwin_tm_clone_table_section (void);
 extern void darwin_function_switched_text_sections (FILE *, tree, bool);
 
@@ -105,11 +105,11 @@ extern void darwin_asm_declare_constant_name (FILE *, const char *,
 extern void darwin_output_aligned_bss (FILE *, tree, const char *,
 				       unsigned HOST_WIDE_INT, unsigned int);
 
-extern void darwin_asm_output_aligned_decl_local (FILE *, tree, const char *, 
-						  unsigned HOST_WIDE_INT, 
+extern void darwin_asm_output_aligned_decl_local (FILE *, tree, const char *,
+						  unsigned HOST_WIDE_INT,
 						  unsigned int);
 extern void darwin_asm_output_aligned_decl_common (FILE *, tree, const char *,
-						   unsigned HOST_WIDE_INT, 
+						   unsigned HOST_WIDE_INT,
 						   unsigned int);
 
 extern bool darwin_binds_local_p (const_tree);
diff --git a/gcc/config/darwin-rust.cc b/gcc/config/darwin-rust.cc
index 79bd4af..02976d7 100644
--- a/gcc/config/darwin-rust.cc
+++ b/gcc/config/darwin-rust.cc
@@ -1,5 +1,5 @@
 /* Darwin support needed only by Rust front-end.
-   Copyright (C) 2022-2024 Free Software Foundation, Inc.
+   Copyright (C) 2022-2025 Free Software Foundation, Inc.
 
 GCC is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
diff --git a/gcc/config/darwin-sections.def b/gcc/config/darwin-sections.def
index b46d346..44adcc6 100644
--- a/gcc/config/darwin-sections.def
+++ b/gcc/config/darwin-sections.def
@@ -1,4 +1,4 @@
-/* Copyright (C) 2005-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2005-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/darwin.cc b/gcc/config/darwin.cc
index 9129378..be2daed 100644
--- a/gcc/config/darwin.cc
+++ b/gcc/config/darwin.cc
@@ -1,5 +1,5 @@
 /* Functions for generic Darwin as target machine for GNU C compiler.
-   Copyright (C) 1989-2024 Free Software Foundation, Inc.
+   Copyright (C) 1989-2025 Free Software Foundation, Inc.
    Contributed by Apple Computer Inc.
 
 This file is part of GCC.
@@ -89,7 +89,7 @@ along with GCC; see the file COPYING3.  If not see
 typedef struct GTY(()) cdtor_record {
   rtx symbol;
   int priority;		/* [con/de]structor priority */
-  int position;		/* original position */
+  unsigned position;	/* original position */
 } cdtor_record;
 
 static GTY(()) vec<cdtor_record, va_gc> *ctors = NULL;
@@ -2403,7 +2403,7 @@ darwin_asm_declare_object_name (FILE *file,
 #ifdef DEBUG_DARWIN_MEM_ALLOCATORS
 fprintf (file, "# dadon: %s %s (%llu, %u) local %d weak %d"
 	       " stat %d com %d pub %d t-const %d t-ro %d init %lx\n",
-	xname, (TREE_CODE (decl) == VAR_DECL?"var":"const"),
+	xname, TREE_CODE (decl) == VAR_DECL ? "var" : "const",
 	(unsigned long long)size, DECL_ALIGN (decl), local_def,
 	DECL_WEAK (decl), TREE_STATIC (decl), DECL_COMMON (decl),
 	TREE_PUBLIC (decl), TREE_CONSTANT (decl), TREE_READONLY (decl),
@@ -2641,7 +2641,7 @@ darwin_emit_common (FILE *fp, const char *name,
   fputs ("\t.comm\t", fp);
   assemble_name (fp, name);
   fprintf (fp, "," HOST_WIDE_INT_PRINT_UNSIGNED,
-	   emit_aligned_common?size:rounded);
+	   emit_aligned_common ? size : rounded);
   if (l2align && emit_aligned_common)
     fprintf (fp, ",%u", l2align);
   fputs ("\n", fp);
diff --git a/gcc/config/darwin.h b/gcc/config/darwin.h
index 3775990..9b9a3fe 100644
--- a/gcc/config/darwin.h
+++ b/gcc/config/darwin.h
@@ -1,5 +1,5 @@
 /* Target definitions for Darwin (macOS) systems.
-   Copyright (C) 1989-2024 Free Software Foundation, Inc.
+   Copyright (C) 1989-2025 Free Software Foundation, Inc.
    Contributed by Apple Computer Inc.
 
 This file is part of GCC.
@@ -45,9 +45,12 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 
 #define OBJECT_FORMAT_MACHO 1
 
-/* Suppress g++ attempt to link in the math library automatically. */
+/* Suppress language-specific specs attempt to link in libm automatically. */
 #define MATH_LIBRARY ""
 
+/* Likewise libdl.  */
+#define DL_LIBRARY ""
+
 /* We have atexit.  */
 
 #define HAVE_ATEXIT
@@ -284,12 +287,17 @@ extern GTY(()) int darwin_ms_struct;
 #define DARWIN_RDYNAMIC "%{rdynamic:%nrdynamic is not supported}"
 #endif
 
-#if LD64_HAS_PLATFORM_VERSION
-#define DARWIN_PLATFORM_ID \
-  "%{mmacosx-version-min=*: -platform_version macos %* 0.0} "
+#if LD64_HAS_MACOS_VERSION_MIN
+# define DARWIN_PLATFORM_ID \
+  "%{mmacosx-version-min=*:-macos_version_min %*} "
 #else
-#define DARWIN_PLATFORM_ID \
+# if LD64_HAS_PLATFORM_VERSION
+#  define DARWIN_PLATFORM_ID \
+  "%{mmacosx-version-min=*: -platform_version macos %* 0.0} "
+# else
+#  define DARWIN_PLATFORM_ID \
   "%{mmacosx-version-min=*:-macosx_version_min %*} "
+# endif
 #endif
 
 /* Code built with mdynamic-no-pic does not support PIE/PIC, so  we disallow
@@ -496,6 +504,7 @@ extern GTY(()) int darwin_ms_struct;
    %{static|static-libgcc|static-libgfortran:%:replace-outfile(-lgfortran libgfortran.a%s)}\
    %{static|static-libgcc|static-libquadmath:%:replace-outfile(-lquadmath libquadmath.a%s)}\
    %{static|static-libgcc|static-libphobos:%:replace-outfile(-lgphobos libgphobos.a%s)}\
+   %{static|static-libgcc|static-libgcobol:%:replace-outfile(-lgcobol libgcobol.a%s)}\
    %{static|static-libgcc|static-libstdc++|static-libgfortran:%:replace-outfile(-lgomp libgomp.a%s)}\
    %{static|static-libgcc|static-libstdc++:%:replace-outfile(-lstdc++ libstdc++.a%s)}\
    %{static|static-libgm2:%:replace-outfile(-lm2pim libm2pim.a%s)}\
@@ -648,6 +657,8 @@ extern GTY(()) int darwin_ms_struct;
 #define ASM_OPTIONS "%{v} %{w:-W} %{I*}"
 #endif
 
+#define AS_NEEDS_DASH_FOR_PIPED_INPUT
+
 /* Default Darwin ASM_SPEC, very simple. */
 #define ASM_SPEC \
 "%{static} -arch %(darwin_arch) " \
@@ -850,7 +861,7 @@ ASM_OPTIONS ASM_MMACOSX_VERSION_MIN_SPEC
 #define TARGET_ASM_DECLARE_CONSTANT_NAME darwin_asm_declare_constant_name
 
 /* Wrap new method names in quotes so the assembler doesn't gag.
-   Make Objective-C internal symbols local and in doing this, we need 
+   Make Objective-C internal symbols local and in doing this, we need
    to accommodate the name mangling done by c++ on file scope locals.  */
 
 int darwin_label_is_anonymous_local_objc_name (const char *name);
@@ -1207,7 +1218,7 @@ void add_framework_path (char *);
 #undef GTM_SELF_SPECS
 #define GTM_SELF_SPECS ""
 
-/* Darwin disables section anchors by default.  
+/* Darwin disables section anchors by default.
    They should be enabled per arch where support exists in that arch.  */
 #define TARGET_ASM_OUTPUT_ANCHOR NULL
 #define DARWIN_SECTION_ANCHORS 0
@@ -1238,7 +1249,7 @@ extern void darwin_driver_init (unsigned int *,struct cl_decoded_option **);
 #undef STACK_CHECK_STATIC_BUILTIN
 #define STACK_CHECK_STATIC_BUILTIN 1
 
-/* When building cross-compilers (and native crosses) we shall default to 
+/* When building cross-compilers (and native crosses) we shall default to
    providing an osx-version-min of this unless overridden by the User.
    10.5 is the only version that fully supports all our archs so that's the
    fall-back default.  */
diff --git a/gcc/config/darwin.opt b/gcc/config/darwin.opt
index 1140480..e275d84 100644
--- a/gcc/config/darwin.opt
+++ b/gcc/config/darwin.opt
@@ -1,6 +1,6 @@
 ; Processor-independent options for Darwin.
 
-; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
@@ -261,6 +261,12 @@ noseglinkedit
 Driver RejectNegative Negative(seglinkedit)
 (Obsolete) This is the default.
 
+ObjC
+Driver RejectNegative
+
+ObjC++
+Driver RejectNegative
+
 object
 Driver RejectNegative
 
diff --git a/gcc/config/default-c.cc b/gcc/config/default-c.cc
index c3f3a72..12089da 100644
--- a/gcc/config/default-c.cc
+++ b/gcc/config/default-c.cc
@@ -1,5 +1,5 @@
 /* Default C-family target hooks initializer.
-   Copyright (C) 2011-2024 Free Software Foundation, Inc.
+   Copyright (C) 2011-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/default-d.cc b/gcc/config/default-d.cc
index 139c166..5a9975b 100644
--- a/gcc/config/default-d.cc
+++ b/gcc/config/default-d.cc
@@ -1,5 +1,5 @@
 /* Default D language target hooks initializer.
-   Copyright (C) 2017-2024 Free Software Foundation, Inc.
+   Copyright (C) 2017-2025 Free Software Foundation, Inc.
 
 GCC is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
diff --git a/gcc/config/default-rust.cc b/gcc/config/default-rust.cc
index 3a1321e..af1fcd8 100644
--- a/gcc/config/default-rust.cc
+++ b/gcc/config/default-rust.cc
@@ -1,5 +1,5 @@
 /* Default Rust language target hooks initializer.
-   Copyright (C) 2022-2024 Free Software Foundation, Inc.
+   Copyright (C) 2022-2025 Free Software Foundation, Inc.
 
 GCC is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
diff --git a/gcc/config/dragonfly-d.cc b/gcc/config/dragonfly-d.cc
index f90584d..aaeccea 100644
--- a/gcc/config/dragonfly-d.cc
+++ b/gcc/config/dragonfly-d.cc
@@ -1,5 +1,5 @@
 /* DragonFly support needed only by D front-end.
-   Copyright (C) 2020-2024 Free Software Foundation, Inc.
+   Copyright (C) 2020-2025 Free Software Foundation, Inc.
 
 GCC is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
diff --git a/gcc/config/dragonfly-rust.cc b/gcc/config/dragonfly-rust.cc
index 27c6230..fa2fc7f 100644
--- a/gcc/config/dragonfly-rust.cc
+++ b/gcc/config/dragonfly-rust.cc
@@ -1,5 +1,5 @@
 /* DragonFly support needed only by Rust front-end.
-   Copyright (C) 2022-2024 Free Software Foundation, Inc.
+   Copyright (C) 2022-2025 Free Software Foundation, Inc.
 
 GCC is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
diff --git a/gcc/config/dragonfly-stdint.h b/gcc/config/dragonfly-stdint.h
index 4f80e4c..e4dbbdd 100644
--- a/gcc/config/dragonfly-stdint.h
+++ b/gcc/config/dragonfly-stdint.h
@@ -1,5 +1,5 @@
 /* Definitions for <stdint.h> types for DragonFly systems.
-   Copyright (C) 2014-2024 Free Software Foundation, Inc.
+   Copyright (C) 2014-2025 Free Software Foundation, Inc.
    Contributed by John Marino <gnugcc@marino.st>
 
 This file is part of GCC.
diff --git a/gcc/config/dragonfly.h b/gcc/config/dragonfly.h
index 17601c5..edb0d55 100644
--- a/gcc/config/dragonfly.h
+++ b/gcc/config/dragonfly.h
@@ -1,5 +1,5 @@
 /* Base configuration file for all DragonFly targets.
-   Copyright (C) 2014-2024 Free Software Foundation, Inc.
+   Copyright (C) 2014-2025 Free Software Foundation, Inc.
    Contributed by John Marino <gnugcc@marino.st>
 
 This file is part of GCC.
diff --git a/gcc/config/dragonfly.opt b/gcc/config/dragonfly.opt
index f045f3d..45a1762 100644
--- a/gcc/config/dragonfly.opt
+++ b/gcc/config/dragonfly.opt
@@ -1,6 +1,6 @@
 ; DragonFly BSD options.
 
-; Copyright (C) 2014-2024 Free Software Foundation, Inc.
+; Copyright (C) 2014-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/elfos.h b/gcc/config/elfos.h
index 1338815..afef227 100644
--- a/gcc/config/elfos.h
+++ b/gcc/config/elfos.h
@@ -1,6 +1,6 @@
 /* elfos.h  --  operating system specific defines to be used when
    targeting GCC for some generic ELF system
-   Copyright (C) 1991-2024 Free Software Foundation, Inc.
+   Copyright (C) 1991-2025 Free Software Foundation, Inc.
    Based on svr4.h contributed by Ron Guilmette (rfg@netcom.com).
 
 This file is part of GCC.
@@ -43,10 +43,10 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #undef  USER_LABEL_PREFIX
 #define USER_LABEL_PREFIX ""
 
-/* The biggest alignment supported by ELF in bits. 32-bit ELF 
-   supports section alignment up to (0x80000000 * 8), while 
-   64-bit ELF supports (0x8000000000000000 * 8). If this macro 
-   is not defined, the default is the largest alignment supported 
+/* The biggest alignment supported by ELF in bits. 32-bit ELF
+   supports section alignment up to (0x80000000 * 8), while
+   64-bit ELF supports (0x8000000000000000 * 8). If this macro
+   is not defined, the default is the largest alignment supported
    by 32-bit ELF and representable on a 32-bit host. Use this
    macro to limit the alignment which can be specified using
    the `__attribute__ ((aligned (N)))' construct.  */
diff --git a/gcc/config/epiphany/constraints.md b/gcc/config/epiphany/constraints.md
index 5dc9601..6ab6efe 100644
--- a/gcc/config/epiphany/constraints.md
+++ b/gcc/config/epiphany/constraints.md
@@ -1,5 +1,5 @@
 ;; Constraint definitions for Adaptiva epiphany
-;; Copyright (C) 2007-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2007-2025 Free Software Foundation, Inc.
 ;; Contributed by Embecosm on behalf of Adapteva, Inc.
 
 ;; This file is part of GCC.
diff --git a/gcc/config/epiphany/epiphany-modes.def b/gcc/config/epiphany/epiphany-modes.def
index 1ff44a1..1151bc3 100644
--- a/gcc/config/epiphany/epiphany-modes.def
+++ b/gcc/config/epiphany/epiphany-modes.def
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler, Adapteva Epiphany cpu.
-   Copyright (C) 2002-2024 Free Software Foundation, Inc.
+   Copyright (C) 2002-2025 Free Software Foundation, Inc.
    Contributed by Embecosm on behalf of Adapteva, Inc.
 
 This file is part of GCC.
diff --git a/gcc/config/epiphany/epiphany-protos.h b/gcc/config/epiphany/epiphany-protos.h
index 51baad1..219717a 100644
--- a/gcc/config/epiphany/epiphany-protos.h
+++ b/gcc/config/epiphany/epiphany-protos.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler, EPIPHANY cpu.
-   Copyright (C) 2000-2024 Free Software Foundation, Inc.
+   Copyright (C) 2000-2025 Free Software Foundation, Inc.
    Contributed by Embecosm on behalf of Adapteva, Inc.
 
 This file is part of GCC.
diff --git a/gcc/config/epiphany/epiphany-sched.md b/gcc/config/epiphany/epiphany-sched.md
index 1f41e54..99d584b 100644
--- a/gcc/config/epiphany/epiphany-sched.md
+++ b/gcc/config/epiphany/epiphany-sched.md
@@ -1,5 +1,5 @@
 ;; DFA scheduling description for EPIPHANY
-;; Copyright (C) 2004-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2004-2025 Free Software Foundation, Inc.
 ;; Contributed by Embecosm on behalf of Adapteva, Inc.
 
 ;; This file is part of GCC.
diff --git a/gcc/config/epiphany/epiphany.cc b/gcc/config/epiphany/epiphany.cc
index 79254c2..16626f8 100644
--- a/gcc/config/epiphany/epiphany.cc
+++ b/gcc/config/epiphany/epiphany.cc
@@ -1,5 +1,5 @@
 /* Subroutines used for code generation on the EPIPHANY cpu.
-   Copyright (C) 1994-2024 Free Software Foundation, Inc.
+   Copyright (C) 1994-2025 Free Software Foundation, Inc.
    Contributed by Embecosm on behalf of Adapteva, Inc.
 
 This file is part of GCC.
@@ -830,7 +830,7 @@ epiphany_rtx_costs (rtx x, machine_mode mode, int outer_code,
 	  return false;
 	}
 
-	
+
     case SET:
       {
 	rtx src = SET_SRC (x);
@@ -3045,4 +3045,7 @@ epiphany_starting_frame_offset (void)
   return epiphany_stack_offset;
 }
 
+#undef TARGET_DOCUMENTATION_NAME
+#define TARGET_DOCUMENTATION_NAME "Epiphany"
+
 struct gcc_target targetm = TARGET_INITIALIZER;
diff --git a/gcc/config/epiphany/epiphany.h b/gcc/config/epiphany/epiphany.h
index aa4715b..9cbcff1 100644
--- a/gcc/config/epiphany/epiphany.h
+++ b/gcc/config/epiphany/epiphany.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler, Argonaut EPIPHANY cpu.
-   Copyright (C) 1994-2024 Free Software Foundation, Inc.
+   Copyright (C) 1994-2025 Free Software Foundation, Inc.
    Contributed by Embecosm on behalf of Adapteva, Inc.
 
 This file is part of GCC.
diff --git a/gcc/config/epiphany/epiphany.md b/gcc/config/epiphany/epiphany.md
index 395ddd5..a36cd45 100644
--- a/gcc/config/epiphany/epiphany.md
+++ b/gcc/config/epiphany/epiphany.md
@@ -1,5 +1,5 @@
 ;; Machine description of the Adaptiva epiphany cpu for GNU C compiler
-;; Copyright (C) 1994-2024 Free Software Foundation, Inc.
+;; Copyright (C) 1994-2025 Free Software Foundation, Inc.
 ;; Contributed by Embecosm on behalf of Adapteva, Inc.
 
 ;; This file is part of GCC.
@@ -2187,14 +2187,14 @@
   if (epiphany_uninterruptible_p (current_function_decl)
       != target_uninterruptible)
     {
-      emit_insn (target_uninterruptible ? gen_gid (): gen_gie ());
+      emit_insn (target_uninterruptible ? gen_gid () : gen_gie ());
       emit_call_insn
 	(gen_rtx_PARALLEL
 	  (VOIDmode,
 	   gen_rtvec (2, gen_rtx_CALL (VOIDmode, operands[0], operands[1]),
 			 gen_rtx_CLOBBER (VOIDmode,
 					  gen_rtx_REG (SImode, GPR_LR)))));
-      emit_insn (target_uninterruptible ? gen_gie (): gen_gid ());
+      emit_insn (target_uninterruptible ? gen_gie () : gen_gid ());
       DONE;
     }
 })
@@ -2225,13 +2225,13 @@
   if (epiphany_uninterruptible_p (current_function_decl)
       != target_uninterruptible)
     {
-      emit_insn (target_uninterruptible ? gen_gid (): gen_gie ());
+      emit_insn (target_uninterruptible ? gen_gid () : gen_gie ());
       emit_call_insn
 	(gen_rtx_PARALLEL
 	  (VOIDmode,
 	   gen_rtvec (2, gen_rtx_CALL (VOIDmode, operands[0], operands[1]),
 			 ret_rtx)));
-      emit_insn (target_uninterruptible ? gen_gie (): gen_gid ());
+      emit_insn (target_uninterruptible ? gen_gie () : gen_gid ());
       DONE;
     }
 })
@@ -2264,7 +2264,7 @@
   if (epiphany_uninterruptible_p (current_function_decl)
       != target_uninterruptible)
     {
-      emit_insn (target_uninterruptible ? gen_gid (): gen_gie ());
+      emit_insn (target_uninterruptible ? gen_gid () : gen_gie ());
       emit_call_insn
 	(gen_rtx_PARALLEL
 	  (VOIDmode,
@@ -2273,7 +2273,7 @@
 			    gen_rtx_CALL (VOIDmode, operands[1], operands[2])),
 			 gen_rtx_CLOBBER (VOIDmode,
 					  gen_rtx_REG (SImode, GPR_LR)))));
-      emit_insn (target_uninterruptible ? gen_gie (): gen_gid ());
+      emit_insn (target_uninterruptible ? gen_gie () : gen_gid ());
       DONE;
     }
 })
@@ -2307,7 +2307,7 @@
   if (epiphany_uninterruptible_p (current_function_decl)
       != target_uninterruptible)
     {
-      emit_insn (target_uninterruptible ? gen_gid (): gen_gie ());
+      emit_insn (target_uninterruptible ? gen_gid () : gen_gie ());
       emit_call_insn
 	(gen_rtx_PARALLEL
 	  (VOIDmode,
@@ -2315,7 +2315,7 @@
 			   (operands[0],
 			    gen_rtx_CALL (VOIDmode, operands[1], operands[2])),
 			 ret_rtx)));
-      emit_insn (target_uninterruptible ? gen_gie (): gen_gid ());
+      emit_insn (target_uninterruptible ? gen_gie () : gen_gid ());
       DONE;
     }
 })
diff --git a/gcc/config/epiphany/epiphany.opt b/gcc/config/epiphany/epiphany.opt
index 6cffea8..9cd93db 100644
--- a/gcc/config/epiphany/epiphany.opt
+++ b/gcc/config/epiphany/epiphany.opt
@@ -1,6 +1,6 @@
 ; Options for the Adapteva EPIPHANY port of the compiler
 ;
-; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ; Contributed by Embecosm on behalf of Adapteva, Inc.
 ;
 ; This file is part of GCC.
diff --git a/gcc/config/epiphany/epiphany_intrinsics.h b/gcc/config/epiphany/epiphany_intrinsics.h
index 55a57a6..68146af 100644
--- a/gcc/config/epiphany/epiphany_intrinsics.h
+++ b/gcc/config/epiphany/epiphany_intrinsics.h
@@ -1,5 +1,5 @@
 /* Epiphany intrinsic functions
-   Copyright (C) 2011-2024 Free Software Foundation, Inc.
+   Copyright (C) 2011-2025 Free Software Foundation, Inc.
    Contributed by Embecosm on behalf of Adapteva, Inc.
 
 This file is part of GCC.
diff --git a/gcc/config/epiphany/mode-switch-use.cc b/gcc/config/epiphany/mode-switch-use.cc
index b52929d..f16719e 100644
--- a/gcc/config/epiphany/mode-switch-use.cc
+++ b/gcc/config/epiphany/mode-switch-use.cc
@@ -1,6 +1,6 @@
 /* Insert USEs in instructions that require mode switching.
    This should probably be merged into mode-switching.cc .
-   Copyright (C) 2011-2024 Free Software Foundation, Inc.
+   Copyright (C) 2011-2025 Free Software Foundation, Inc.
    Contributed by Embecosm on behalf of Adapteva, Inc.
 
 This file is part of GCC.
diff --git a/gcc/config/epiphany/predicates.md b/gcc/config/epiphany/predicates.md
index f9631cd..2b402f5 100644
--- a/gcc/config/epiphany/predicates.md
+++ b/gcc/config/epiphany/predicates.md
@@ -1,5 +1,5 @@
 ;; Predicate definitions for code generation on the EPIPHANY cpu.
-;; Copyright (C) 1994-2024 Free Software Foundation, Inc.
+;; Copyright (C) 1994-2025 Free Software Foundation, Inc.
 ;; Contributed by Embecosm on behalf of Adapteva, Inc.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/epiphany/resolve-sw-modes.cc b/gcc/config/epiphany/resolve-sw-modes.cc
index 3262eba..1206839 100644
--- a/gcc/config/epiphany/resolve-sw-modes.cc
+++ b/gcc/config/epiphany/resolve-sw-modes.cc
@@ -1,5 +1,5 @@
 /* Mode switching cleanup pass for the EPIPHANY cpu.
-   Copyright (C) 2000-2024 Free Software Foundation, Inc.
+   Copyright (C) 2000-2025 Free Software Foundation, Inc.
    Contributed by Embecosm on behalf of Adapteva, Inc.
 
 This file is part of GCC.
@@ -169,8 +169,7 @@ pass_resolve_sw_modes::execute (function *fun)
 	  emit_set_fp_mode (EPIPHANY_MSW_ENTITY_ROUND_UNKNOWN,
 			    jilted_mode, FP_MODE_NONE,
 			    reg_class_contents[NO_REGS]);
-	  seq = get_insns ();
-	  end_sequence ();
+	  seq = end_sequence ();
 	  need_commit = true;
 	  insert_insn_on_edge (seq, e);
 	}
diff --git a/gcc/config/epiphany/rtems.h b/gcc/config/epiphany/rtems.h
index f7c061f..b7d1334 100644
--- a/gcc/config/epiphany/rtems.h
+++ b/gcc/config/epiphany/rtems.h
@@ -1,5 +1,5 @@
 /* Definitions for RTEMS based EPIPHANY systems.
-   Copyright (C) 2018-2024 Free Software Foundation, Inc.
+   Copyright (C) 2018-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/epiphany/t-epiphany b/gcc/config/epiphany/t-epiphany
index 690bb13..e48661c 100644
--- a/gcc/config/epiphany/t-epiphany
+++ b/gcc/config/epiphany/t-epiphany
@@ -1,4 +1,4 @@
-# Copyright (C) 1997-2024 Free Software Foundation, Inc.
+# Copyright (C) 1997-2025 Free Software Foundation, Inc.
 # Contributed by Embecosm on behalf of Adapteva, Inc.
 #
 # This file is part of GCC.
diff --git a/gcc/config/flat.h b/gcc/config/flat.h
index fc04bb7..a9035f3 100644
--- a/gcc/config/flat.h
+++ b/gcc/config/flat.h
@@ -1,5 +1,5 @@
 /* Defines to be used for targets that support flat executables.
-   Copyright (C) 2006-2024 Free Software Foundation, Inc.
+   Copyright (C) 2006-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/fr30/constraints.md b/gcc/config/fr30/constraints.md
index 1beee7c..94c1ffd 100644
--- a/gcc/config/fr30/constraints.md
+++ b/gcc/config/fr30/constraints.md
@@ -1,5 +1,5 @@
 ;; Constraint definitions for the FR30.
-;; Copyright (C) 2011-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2011-2025 Free Software Foundation, Inc.
 
 ;; This file is part of GCC.
 
diff --git a/gcc/config/fr30/fr30-protos.h b/gcc/config/fr30/fr30-protos.h
index 70d1529..e72ac19 100644
--- a/gcc/config/fr30/fr30-protos.h
+++ b/gcc/config/fr30/fr30-protos.h
@@ -1,5 +1,5 @@
 /* Prototypes for fr30.cc functions used in the md file & elsewhere.
-   Copyright (C) 1999-2024 Free Software Foundation, Inc.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/fr30/fr30.cc b/gcc/config/fr30/fr30.cc
index cf93cba..8dd7961 100644
--- a/gcc/config/fr30/fr30.cc
+++ b/gcc/config/fr30/fr30.cc
@@ -1,5 +1,5 @@
 /* FR30 specific functions.
-   Copyright (C) 1998-2024 Free Software Foundation, Inc.
+   Copyright (C) 1998-2025 Free Software Foundation, Inc.
    Contributed by Cygnus Solutions.
 
    This file is part of GCC.
@@ -18,7 +18,7 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-/*{{{  Includes */ 
+/*{{{  Includes */
 
 #define IN_TARGET_CODE 1
 
@@ -45,13 +45,13 @@
 #include "target-def.h"
 
 /*}}}*/
-/*{{{  Function Prologues & Epilogues */ 
+/*{{{  Function Prologues & Epilogues */
 
 /* The FR30 stack looks like this:
 
              Before call                       After call
    FP ->|                       |       |                       |
-        +-----------------------+       +-----------------------+       high 
+        +-----------------------+       +-----------------------+       high
         |                       |       |                       |       memory
         |  local variables,     |       |  local variables,     |
         |  reg save area, etc.  |       |  reg save area, etc.  |
@@ -63,32 +63,32 @@
    SP ->| do not fit in regs    |       |                       |
         +-----------------------+       +-----------------------+
                                         |  args that used to be |  \
-                                        | in regs; only created |   |  pretend_size 
-                                   AP-> |   for vararg funcs    |  /  
-                                        +-----------------------+    
-                                        |                       |  \  
+                                        | in regs; only created |   |  pretend_size
+                                   AP-> |   for vararg funcs    |  /
+                                        +-----------------------+
+                                        |                       |  \
                                         |  register save area   |   |
                                         |                       |   |
 					+-----------------------+   |  reg_size
-                                        |    return address     |   | 
+                                        |    return address     |   |
 					+-----------------------+   |
                                    FP ->|   previous frame ptr  |  /
-                                        +-----------------------+    
-                                        |                       |  \   
-                                        |  local variables      |   |  var_size 
-                                        |                       |  /  
-                                        +-----------------------+    
-                                        |                       |  \       
+                                        +-----------------------+
+                                        |                       |  \
+                                        |  local variables      |   |  var_size
+                                        |                       |  /
+                                        +-----------------------+
+                                        |                       |  \
      low                                |  room for args to     |   |
-     memory                             |  other funcs called   |   |  args_size     
+     memory                             |  other funcs called   |   |  args_size
                                         |  from this one        |   |
-                                   SP ->|                       |  /  
-                                        +-----------------------+    
-   
+                                   SP ->|                       |  /
+                                        +-----------------------+
+
    Note, AP is a fake hard register.  It will be eliminated in favor of
    SP or FP as appropriate.
 
-   Note, Some or all of the stack sections above may be omitted if they 
+   Note, Some or all of the stack sections above may be omitted if they
    are not needed.  */
 
 /* Structure to be filled in by fr30_compute_frame_size() with register
@@ -211,7 +211,7 @@ fr30_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
 }
 
 /* Returns the number of bytes offset between FROM_REG and TO_REG
-   for the current function.  As a side effect it fills in the 
+   for the current function.  As a side effect it fills in the
    current_frame_info structure, if the data is available.  */
 unsigned int
 fr30_compute_frame_size (int from_reg, int to_reg)
@@ -259,10 +259,10 @@ fr30_compute_frame_size (int from_reg, int to_reg)
 
   /* Calculate the required distance.  */
   return_value = 0;
-  
+
   if (to_reg == STACK_POINTER_REGNUM)
     return_value += args_size + var_size;
-  
+
   if (from_reg == ARG_POINTER_REGNUM)
     return_value += reg_size;
 
@@ -292,7 +292,7 @@ fr30_expand_prologue (void)
   if (current_frame_info.pretend_size)
     {
       int regs_to_save = current_frame_info.pretend_size / UNITS_PER_WORD;
-      
+
       /* Push argument registers into the pretend arg area.  */
       for (regno = FIRST_ARG_REGNUM + FR30_NUM_ARG_REGS; regno --, regs_to_save --;)
         {
@@ -317,7 +317,7 @@ fr30_expand_prologue (void)
   /* Save return address if necessary.  */
   if (current_frame_info.save_rp)
     {
-      insn = emit_insn (gen_movsi_push (gen_rtx_REG (Pmode, 
+      insn = emit_insn (gen_movsi_push (gen_rtx_REG (Pmode,
       						     RETURN_POINTER_REGNUM)));
       RTX_FRAME_RELATED_P (insn) = 1;
     }
@@ -329,12 +329,12 @@ fr30_expand_prologue (void)
         {
 	  int enter_size = current_frame_info.frame_size + UNITS_PER_WORD;
 	  rtx pattern;
-	  
+
 	  insn = emit_insn (gen_enter_func (GEN_INT (enter_size)));
           RTX_FRAME_RELATED_P (insn) = 1;
-	  
+
 	  pattern = PATTERN (insn);
-	  
+
 	  /* Also mark all 3 subexpressions as RTX_FRAME_RELATED_P. */
           if (GET_CODE (pattern) == PARALLEL)
             {
@@ -342,7 +342,7 @@ fr30_expand_prologue (void)
               for (x = XVECLEN (pattern, 0); x--;)
 		{
 		  rtx part = XVECEXP (pattern, 0, x);
-		  
+
 		  /* One of the insns in the ENTER pattern updates the
 		     frame pointer.  If we do not actually need the frame
 		     pointer in this function then this is a side effect
@@ -410,7 +410,7 @@ fr30_expand_epilogue (void)
 
   /* Perform the inversion operations of the prologue.  */
   gcc_assert (current_frame_info.initialised);
-  
+
   /* Pop local variables and arguments off the stack.
      If frame_pointer_needed is TRUE then the frame pointer register
      has actually been used as a frame pointer, and we can recover
@@ -433,18 +433,18 @@ fr30_expand_epilogue (void)
 	  emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, tmp));
 	}
     }
-  
+
   if (current_frame_info.save_fp)
     emit_insn (gen_movsi_pop (frame_pointer_rtx));
-  
+
   /* Pop all the registers that were pushed.  */
   if (current_frame_info.save_rp)
     emit_insn (gen_movsi_pop (gen_rtx_REG (Pmode, RETURN_POINTER_REGNUM)));
-    
+
   for (regno = 0; regno < STACK_POINTER_REGNUM; regno ++)
     if (current_frame_info.gmask & (1 << regno))
       emit_insn (gen_movsi_pop (gen_rtx_REG (Pmode, regno)));
-  
+
   if (current_frame_info.pretend_size)
     emit_insn (gen_add_to_stack (GEN_INT (current_frame_info.pretend_size)));
 
@@ -494,7 +494,7 @@ fr30_setup_incoming_varargs (cumulative_args_t arg_regs_used_so_far_v,
 }
 
 /*}}}*/
-/*{{{  Printing operands */ 
+/*{{{  Printing operands */
 
 /* Print a memory address as an operand to reference that memory location.  */
 
@@ -506,7 +506,7 @@ fr30_print_operand_address (FILE *stream, rtx address)
     case SYMBOL_REF:
       output_addr_const (stream, address);
       break;
-      
+
     default:
       fprintf (stderr, "code = %x\n", GET_CODE (address));
       debug_rtx (address);
@@ -521,7 +521,7 @@ void
 fr30_print_operand (FILE *file, rtx x, int code)
 {
   rtx x0;
-  
+
   switch (code)
     {
     case '#':
@@ -529,7 +529,7 @@ fr30_print_operand (FILE *file, rtx x, int code)
       if (dbr_sequence_length () != 0)
 	fputs (":D", file);
       return;
-      
+
     case 'p':
       /* Compute the register name of the second register in a hi/lo
 	 register pair.  */
@@ -538,7 +538,7 @@ fr30_print_operand (FILE *file, rtx x, int code)
       else
 	fprintf (file, "r%d", REGNO (x) + 1);
       return;
-      
+
     case 'b':
       /* Convert GCC's comparison operators into FR30 comparison codes.  */
       switch (GET_CODE (x))
@@ -558,7 +558,7 @@ fr30_print_operand (FILE *file, rtx x, int code)
 	  break;
 	}
       return;
-      
+
     case 'B':
       /* Convert GCC's comparison operators into the complimentary FR30
 	 comparison codes.  */
@@ -587,7 +587,7 @@ fr30_print_operand (FILE *file, rtx x, int code)
       else
 	{
 	  HOST_WIDE_INT val;
-	  
+
 	  val = INTVAL (x);
 
 	  val &= 0xff;
@@ -595,7 +595,7 @@ fr30_print_operand (FILE *file, rtx x, int code)
 	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
 	}
       return;
-      
+
     case 'x':
       if (GET_CODE (x) != CONST_INT
 	  || INTVAL (x) < 16
@@ -617,11 +617,11 @@ fr30_print_operand (FILE *file, rtx x, int code)
 	  fputs (str, file);
 	}
       return;
-      
+
     case 0:
       /* Handled below.  */
       break;
-      
+
     default:
       fprintf (stderr, "unknown code = %x\n", code);
       output_operand_lossage ("fr30_print_operand: unknown code");
@@ -636,7 +636,7 @@ fr30_print_operand (FILE *file, rtx x, int code)
 
     case MEM:
       x0 = XEXP (x,0);
-      
+
       switch (GET_CODE (x0))
 	{
 	case REG:
@@ -677,11 +677,11 @@ fr30_print_operand (FILE *file, rtx x, int code)
 	      fprintf (file, "@(r15, #" HOST_WIDE_INT_PRINT_DEC ")", val);
 	    }
 	  break;
-	  
+
 	case SYMBOL_REF:
 	  output_address (VOIDmode, x0);
 	  break;
-	  
+
 	default:
 	  fprintf (stderr, "bad MEM code = %x\n", GET_CODE (x0));
 	  debug_rtx (x);
@@ -689,7 +689,7 @@ fr30_print_operand (FILE *file, rtx x, int code)
 	  break;
 	}
       break;
-      
+
     case CONST_DOUBLE :
       /* We handle SFmode constants here as output_addr_const doesn't.  */
       if (GET_MODE (x) == SFmode)
@@ -740,7 +740,7 @@ fr30_function_value_regno_p (const unsigned int regno)
   return (regno == RETURN_VALUE_REGNUM);
 }
 
-/*{{{  Function arguments */ 
+/*{{{  Function arguments */
 
 /* Return true if we should pass an argument on the stack rather than
    in registers.  */
@@ -787,7 +787,7 @@ fr30_arg_partial_bytes (cumulative_args_t cum_v, const function_arg_info &arg)
      register, partial stack space.  */
   if (*cum + fr30_num_arg_regs (arg) <= FR30_NUM_ARG_REGS)
     return 0;
-  
+
   return (FR30_NUM_ARG_REGS - *cum) * UNITS_PER_WORD;
 }
 
@@ -814,7 +814,7 @@ fr30_function_arg_advance (cumulative_args_t cum,
 }
 
 /*}}}*/
-/*{{{  Operand predicates */ 
+/*{{{  Operand predicates */
 
 #ifndef Mmode
 #define Mmode machine_mode
@@ -828,30 +828,30 @@ fr30_check_multiple_regs (rtx *operands, int num_operands, int descending)
   if (descending)
     {
       unsigned int prev_regno = 0;
-      
+
       while (num_operands --)
 	{
 	  if (GET_CODE (operands [num_operands]) != REG)
 	    return 0;
-	  
+
 	  if (REGNO (operands [num_operands]) < prev_regno)
 	    return 0;
-	  
+
 	  prev_regno = REGNO (operands [num_operands]);
 	}
     }
   else
     {
       unsigned int prev_regno = CONDITION_CODE_REGNUM;
-      
+
       while (num_operands --)
 	{
 	  if (GET_CODE (operands [num_operands]) != REG)
 	    return 0;
-	  
+
 	  if (REGNO (operands [num_operands]) > prev_regno)
 	    return 0;
-	  
+
 	  prev_regno = REGNO (operands [num_operands]);
 	}
     }
@@ -895,13 +895,13 @@ fr30_move_double (rtx * operands)
       if (src_code == REG)
 	{
 	  int reverse = (REGNO (dest) == REGNO (src) + 1);
-	  
+
 	  /* We normally copy the low-numbered register first.  However, if
 	     the first register of operand 0 is the same as the second register
 	     of operand 1, we must copy in the opposite order.  */
 	  emit_insn (gen_rtx_SET (operand_subword (dest, reverse, TRUE, mode),
 				  operand_subword (src,  reverse, TRUE, mode)));
-	  
+
 	  emit_insn
 	    (gen_rtx_SET (operand_subword (dest, !reverse, TRUE, mode),
 			  operand_subword (src,  !reverse, TRUE, mode)));
@@ -912,9 +912,9 @@ fr30_move_double (rtx * operands)
 	  rtx dest0 = operand_subword (dest, 0, TRUE, mode);
 	  rtx dest1 = operand_subword (dest, 1, TRUE, mode);
 	  rtx new_mem;
-	  
+
 	  gcc_assert (GET_CODE (addr) == REG);
-	  
+
 	  /* Copy the address before clobbering it.  See PR 34174.  */
 	  emit_insn (gen_rtx_SET (dest1, addr));
 	  emit_insn (gen_rtx_SET (dest0, adjust_address (src, SImode, 0)));
@@ -923,7 +923,7 @@ fr30_move_double (rtx * operands)
 
 	  new_mem = gen_rtx_MEM (SImode, dest1);
 	  MEM_COPY_ATTRIBUTES (new_mem, src);
-	      
+
 	  emit_insn (gen_rtx_SET (dest1, new_mem));
 	}
       else if (src_code == CONST_INT || src_code == CONST_DOUBLE)
@@ -932,7 +932,7 @@ fr30_move_double (rtx * operands)
 	  split_double (src, &words[0], &words[1]);
 	  emit_insn (gen_rtx_SET (operand_subword (dest, 0, TRUE, mode),
 				  words[0]));
-      
+
 	  emit_insn (gen_rtx_SET (operand_subword (dest, 1, TRUE, mode),
 				  words[1]));
 	}
@@ -976,8 +976,7 @@ fr30_move_double (rtx * operands)
     /* This should have been prevented by the constraints on movdi_insn.  */
     gcc_unreachable ();
 
-  val = get_insns ();
-  end_sequence ();
+  val = end_sequence ();
 
   return val;
 }
@@ -1006,7 +1005,7 @@ fr30_frame_pointer_required (void)
    target are 32 bit aligned within the trampoline.  That allows us to
    initialize those locations with simple SImode stores.   The alternative
    would be to use HImode stores.  */
-   
+
 static void
 fr30_asm_trampoline_template (FILE *f)
 {
diff --git a/gcc/config/fr30/fr30.h b/gcc/config/fr30/fr30.h
index 19020fb..837b561 100644
--- a/gcc/config/fr30/fr30.h
+++ b/gcc/config/fr30/fr30.h
@@ -1,7 +1,7 @@
-/*{{{  Comment.  */ 
+/*{{{  Comment.  */
 
-/* Definitions of FR30 target. 
-   Copyright (C) 1998-2024 Free Software Foundation, Inc.
+/* Definitions of FR30 target.
+   Copyright (C) 1998-2025 Free Software Foundation, Inc.
    Contributed by Cygnus Solutions.
 
 This file is part of GCC.
@@ -21,7 +21,7 @@ along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
 /*}}}*/ 
-/*{{{  Run-time target specifications.  */ 
+/*{{{  Run-time target specifications.  */
 
 #undef  ASM_SPEC
 #define ASM_SPEC ""
@@ -56,7 +56,7 @@ along with GCC; see the file COPYING3.  If not see
 		   %{static:-Bstatic} %{shared:-shared} %{symbolic:-Bsymbolic}"
 
 /*}}}*/ 
-/*{{{  Storage Layout.  */ 
+/*{{{  Storage Layout.  */
 
 #define BITS_BIG_ENDIAN 1
 
@@ -93,7 +93,7 @@ along with GCC; see the file COPYING3.  If not see
 #define PCC_BITFIELD_TYPE_MATTERS 1
 
 /*}}}*/ 
-/*{{{  Layout of Source Language Data Types.  */ 
+/*{{{  Layout of Source Language Data Types.  */
 
 #define SHORT_TYPE_SIZE 	16
 #define INT_TYPE_SIZE 		32
@@ -115,7 +115,7 @@ along with GCC; see the file COPYING3.  If not see
 #define WCHAR_TYPE_SIZE BITS_PER_WORD
 
 /*}}}*/ 
-/*{{{  REGISTER BASICS.  */ 
+/*{{{  REGISTER BASICS.  */
 
 /* Number of hardware registers known to the compiler.  They receive numbers 0
    through `FIRST_PSEUDO_REGISTER-1'; thus, the first pseudo register's number
@@ -139,7 +139,7 @@ along with GCC; see the file COPYING3.  If not see
 
 /* A call-used register that can be used during the function prologue.  */
 #define PROLOGUE_TMP_REGNUM	 COMPILER_SCRATCH_REGISTER
-     
+
 /* Register numbers used for passing a function's static chain pointer.  If
    register windows are used, the register number as seen by the called
    function is `STATIC_CHAIN_INCOMING_REGNUM', while the register number as
@@ -161,7 +161,7 @@ along with GCC; see the file COPYING3.  If not see
    determines which register this is.  On other machines, you can choose any
    register you wish for this purpose.  */
 #define FRAME_POINTER_REGNUM	14
-     
+
 /* The register number of the stack pointer register, which must also be a
    fixed register according to `FIXED_REGISTERS'.  On most machines, the
    hardware determines which register this is.  */
@@ -231,7 +231,7 @@ along with GCC; see the file COPYING3.  If not see
 }
 
 /*}}}*/ 
-/*{{{  Register Classes.  */ 
+/*{{{  Register Classes.  */
 
 /* An enumeral type that must be defined with all the register class names as
    enumeral values.  `NO_REGS' must be first.  `ALL_REGS' must be the last
@@ -332,7 +332,7 @@ enum reg_class
 #define CLASS_MAX_NREGS(CLASS, MODE) targetm.hard_regno_nregs (0, MODE)
 
 /*}}}*/ 
-/*{{{  Basic Stack Layout.  */ 
+/*{{{  Basic Stack Layout.  */
 
 /* Define this macro if pushing a word onto the stack moves the stack pointer
    to a smaller address.  */
@@ -367,7 +367,7 @@ enum reg_class
 #define INCOMING_RETURN_ADDR_RTX gen_rtx_REG (SImode, RETURN_POINTER_REGNUM)
 
 /*}}}*/ 
-/*{{{  Register That Address the Stack Frame.  */ 
+/*{{{  Register That Address the Stack Frame.  */
 
 /* The register number of the arg pointer register, which is used to access the
    function's argument list.  On some machines, this is the same as the frame
@@ -379,7 +379,7 @@ enum reg_class
 #define ARG_POINTER_REGNUM 20
 
 /*}}}*/ 
-/*{{{  Eliminating the Frame Pointer and the Arg Pointer.  */ 
+/*{{{  Eliminating the Frame Pointer and the Arg Pointer.  */
 
 /* If defined, this macro specifies a table of register pairs used to eliminate
    unneeded registers that point into the stack frame.  If it is not defined,
@@ -417,7 +417,7 @@ enum reg_class
      (OFFSET) = fr30_compute_frame_size (FROM, TO)
 
 /*}}}*/ 
-/*{{{  Passing Function Arguments on the Stack.  */ 
+/*{{{  Passing Function Arguments on the Stack.  */
 
 /* If defined, the maximum amount of space required for outgoing arguments will
    be computed and placed into the variable
@@ -430,10 +430,10 @@ enum reg_class
 #define ACCUMULATE_OUTGOING_ARGS 1
 
 /*}}}*/ 
-/*{{{  Function Arguments in Registers.  */ 
+/*{{{  Function Arguments in Registers.  */
 
 /* The number of register assigned to holding function arguments.  */
-     
+
 #define FR30_NUM_ARG_REGS	 4
 
 /* A C type for declaring a variable that is used as the first argument of
@@ -478,7 +478,7 @@ enum reg_class
   ((REGNO) >= FIRST_ARG_REGNUM && ((REGNO) < FIRST_ARG_REGNUM + FR30_NUM_ARG_REGS))
 
 /*}}}*/ 
-/*{{{  How Large Values are Returned.  */ 
+/*{{{  How Large Values are Returned.  */
 
 /* Define this macro to be 1 if all structure and union return values must be
    in memory.  Since this results in slower code, this should be defined only
@@ -490,7 +490,7 @@ enum reg_class
 #define DEFAULT_PCC_STRUCT_RETURN 1
 
 /*}}}*/ 
-/*{{{  Generating Code for Profiling.  */ 
+/*{{{  Generating Code for Profiling.  */
 
 /* A C statement or compound statement to output to FILE some assembler code to
    call the profiling subroutine `mcount'.  Before calling, the assembler code
@@ -512,7 +512,7 @@ enum reg_class
 }
 
 /*}}}*/ 
-/*{{{  Trampolines for Nested Functions.  */ 
+/*{{{  Trampolines for Nested Functions.  */
 
 /* A C expression for the size in bytes of the trampoline, as an integer.  */
 #define TRAMPOLINE_SIZE 18
@@ -523,7 +523,7 @@ enum reg_class
 #define TRAMPOLINE_ALIGNMENT 32
 
 /*}}}*/ 
-/*{{{  Addressing Modes.  */ 
+/*{{{  Addressing Modes.  */
 
 /* A number, the maximum number of registers that can appear in a valid memory
    address.  Note that it is up to you to specify a value equal to the maximum
@@ -536,15 +536,15 @@ enum reg_class
 
 /* On the FR30 we only have one real addressing mode - an address in a
    register.  There are three special cases however:
-   
+
    * indexed addressing using small positive offsets from the stack pointer
-   
+
    * indexed addressing using small signed offsets from the frame pointer
 
    * register plus register addressing using R13 as the base register.
 
    At the moment we only support the first two of these special cases.  */
-   
+
 #ifdef REG_OK_STRICT
 #define GO_IF_LEGITIMATE_ADDRESS(MODE, X, LABEL)			\
   do									\
@@ -617,7 +617,7 @@ enum reg_class
 #define REG_OK_FOR_INDEX_P(X) REG_OK_FOR_BASE_P (X)
 
 /*}}}*/ 
-/*{{{  Describing Relative Costs of Operations */ 
+/*{{{  Describing Relative Costs of Operations */
 
 /* Define this macro as a C expression which is nonzero if accessing less than
    a word of memory (i.e. a `char' or a `short') is no faster than accessing a
@@ -633,7 +633,7 @@ enum reg_class
 #define SLOW_BYTE_ACCESS 1
 
 /*}}}*/ 
-/*{{{  Dividing the output into sections.  */ 
+/*{{{  Dividing the output into sections.  */
 
 /* A C expression whose value is a string containing the assembler operation
    that should precede instructions and read-only data.  Normally `".text"' is
@@ -668,13 +668,13 @@ enum reg_class
 #define ASM_APP_OFF "#NO_APP\n"
 
 /*}}}*/ 
-/*{{{  Output and Generation of Labels.  */ 
+/*{{{  Output and Generation of Labels.  */
 
 /* Globalizing directive for a label.  */
 #define GLOBAL_ASM_OP "\t.globl "
 
 /*}}}*/ 
-/*{{{  Output of Assembler Instructions.  */ 
+/*{{{  Output of Assembler Instructions.  */
 
 /* A C compound statement to output to stdio stream STREAM the assembler syntax
    for an instruction operand X.  X is an RTL expression.
@@ -713,7 +713,7 @@ enum reg_class
 #define IMMEDIATE_PREFIX ""
 
 /*}}}*/ 
-/*{{{  Output of Dispatch Tables.  */ 
+/*{{{  Output of Dispatch Tables.  */
 
 /* This macro should be provided on machines where the addresses in a dispatch
    table are relative to the table's own address.
@@ -741,7 +741,7 @@ fprintf (STREAM, "\t.word .L%d-.L%d\n", VALUE, REL)
 fprintf (STREAM, "\t.word .L%d\n", VALUE)
 
 /*}}}*/ 
-/*{{{  Assembler Commands for Alignment.  */ 
+/*{{{  Assembler Commands for Alignment.  */
 
 /* A C statement to output to the stdio stream STREAM an assembler command to
    advance the location counter to a multiple of 2 to the POWER bytes.  POWER
@@ -750,7 +750,7 @@ fprintf (STREAM, "\t.word .L%d\n", VALUE)
   fprintf ((STREAM), "\t.p2align %d\n", (POWER))
 
 /*}}}*/ 
-/*{{{  Miscellaneous Parameters.  */ 
+/*{{{  Miscellaneous Parameters.  */
 
 /* An alias for a machine mode name.  This is the machine mode that elements of
    a jump-table should have.  */
diff --git a/gcc/config/fr30/fr30.md b/gcc/config/fr30/fr30.md
index 04f6d90..5fed22d 100644
--- a/gcc/config/fr30/fr30.md
+++ b/gcc/config/fr30/fr30.md
@@ -1,5 +1,5 @@
 ;; FR30 machine description.
-;; Copyright (C) 1998-2024 Free Software Foundation, Inc.
+;; Copyright (C) 1998-2025 Free Software Foundation, Inc.
 ;; Contributed by Cygnus Solutions.
 
 ;; This file is part of GCC.
diff --git a/gcc/config/fr30/fr30.opt b/gcc/config/fr30/fr30.opt
index 3cf0f94..11fc127 100644
--- a/gcc/config/fr30/fr30.opt
+++ b/gcc/config/fr30/fr30.opt
@@ -1,6 +1,6 @@
 ; Options for the FR30 port of the compiler.
 
-; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/fr30/predicates.md b/gcc/config/fr30/predicates.md
index 2f87a4c..ad636c0 100644
--- a/gcc/config/fr30/predicates.md
+++ b/gcc/config/fr30/predicates.md
@@ -1,5 +1,5 @@
 ;; Predicate definitions for FR30.
-;; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/freebsd-d.cc b/gcc/config/freebsd-d.cc
index 37e81f0..909254a 100644
--- a/gcc/config/freebsd-d.cc
+++ b/gcc/config/freebsd-d.cc
@@ -1,5 +1,5 @@
 /* FreeBSD support needed only by D front-end.
-   Copyright (C) 2020-2024 Free Software Foundation, Inc.
+   Copyright (C) 2020-2025 Free Software Foundation, Inc.
 
 GCC is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
diff --git a/gcc/config/freebsd-nthr.h b/gcc/config/freebsd-nthr.h
index f0b6b0c..eaa960e 100644
--- a/gcc/config/freebsd-nthr.h
+++ b/gcc/config/freebsd-nthr.h
@@ -1,5 +1,5 @@
 /* FreeBSD configuration setting for FreeBSD systems.
-   Copyright (C) 2001-2024 Free Software Foundation, Inc.
+   Copyright (C) 2001-2025 Free Software Foundation, Inc.
    Contributed by Loren J. Rittle <ljrittle@acm.org>
 
 This file is part of GCC.
diff --git a/gcc/config/freebsd-rust.cc b/gcc/config/freebsd-rust.cc
index 6ecb8e2..addc6fd 100644
--- a/gcc/config/freebsd-rust.cc
+++ b/gcc/config/freebsd-rust.cc
@@ -1,5 +1,5 @@
 /* FreeBSD support needed only by Rust front-end.
-   Copyright (C) 2022-2024 Free Software Foundation, Inc.
+   Copyright (C) 2022-2025 Free Software Foundation, Inc.
 
 GCC is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
diff --git a/gcc/config/freebsd-spec.h b/gcc/config/freebsd-spec.h
index f43056b..cdfd9a8 100644
--- a/gcc/config/freebsd-spec.h
+++ b/gcc/config/freebsd-spec.h
@@ -1,5 +1,5 @@
 /* Base configuration file for all FreeBSD targets.
-   Copyright (C) 1999-2024 Free Software Foundation, Inc.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -22,10 +22,10 @@ a copy of the GCC Runtime Library Exception along with this program;
 see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 <http://www.gnu.org/licenses/>.  */
 
-/* Common FreeBSD configuration. 
+/* Common FreeBSD configuration.
    All FreeBSD architectures should include this file, which will specify
    their commonalities.
-   Adapted from gcc/config/freebsd.h by 
+   Adapted from gcc/config/freebsd.h by
    David O'Brien <obrien@FreeBSD.org>
    Loren J. Rittle <ljrittle@acm.org>.  */
 
@@ -49,7 +49,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 /* Define the default FreeBSD-specific per-CPU hook code.  */
 #define FBSD_TARGET_CPU_CPP_BUILTINS() do {} while (0)
 
-/* Provide a CPP_SPEC appropriate for FreeBSD.  We just deal with the GCC 
+/* Provide a CPP_SPEC appropriate for FreeBSD.  We just deal with the GCC
    option `-posix', and PIC issues.  */
 
 #define FBSD_CPP_SPEC "							\
@@ -58,10 +58,10 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
   %{posix:-D_POSIX_SOURCE}"
 
 /* Provide a STARTFILE_SPEC appropriate for FreeBSD.  Here we add
-   the magical crtbegin.o file (see crtstuff.c) which provides part 
-	of the support for getting C++ file-scope static object constructed 
+   the magical crtbegin.o file (see crtstuff.c) which provides part
+	of the support for getting C++ file-scope static object constructed
 	before entering `main'.  */
-   
+
 #define FBSD_STARTFILE_SPEC \
   "%{!shared: \
      %{pg:gcrt1.o%s} %{!pg:%{p:gcrt1.o%s} \
@@ -71,9 +71,9 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    crti.o%s %{static:crtbeginT.o%s;shared|pie:crtbeginS.o%s;:crtbegin.o%s}"
 
 /* Provide a ENDFILE_SPEC appropriate for FreeBSD.  Here we tack on
-   the magical crtend.o file (see crtstuff.c) which provides part of 
-	the support for getting C++ file-scope static object constructed 
-	before entering `main', followed by a normal "finalizer" file, 
+   the magical crtend.o file (see crtstuff.c) which provides part of
+	the support for getting C++ file-scope static object constructed
+	before entering `main', followed by a normal "finalizer" file,
 	`crtn.o'.  */
 
 #define FBSD_ENDFILE_SPEC \
diff --git a/gcc/config/freebsd-stdint.h b/gcc/config/freebsd-stdint.h
index 943515c..f964693 100644
--- a/gcc/config/freebsd-stdint.h
+++ b/gcc/config/freebsd-stdint.h
@@ -1,5 +1,5 @@
 /* Definitions for <stdint.h> types for FreeBSD systems.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
    Contributed by Gerald Pfeifer <gerald@pfeifer.com>.
 
 This file is part of GCC.
diff --git a/gcc/config/freebsd.h b/gcc/config/freebsd.h
index 7add672..aef797d 100644
--- a/gcc/config/freebsd.h
+++ b/gcc/config/freebsd.h
@@ -1,5 +1,5 @@
 /* Base configuration file for all FreeBSD targets.
-   Copyright (C) 1999-2024 Free Software Foundation, Inc.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -17,11 +17,11 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-/* Common FreeBSD configuration. 
+/* Common FreeBSD configuration.
    All FreeBSD architectures should include this file, which will specify
    their commonalities.
-   Adapted from gcc/config/i386/freebsd-elf.h by 
-   David O'Brien <obrien@FreeBSD.org>.  
+   Adapted from gcc/config/i386/freebsd-elf.h by
+   David O'Brien <obrien@FreeBSD.org>.
    Further work by David O'Brien <obrien@FreeBSD.org> and
    Loren J. Rittle <ljrittle@acm.org>.  */
 
diff --git a/gcc/config/freebsd.opt b/gcc/config/freebsd.opt
index 88f0d88..12b9f008 100644
--- a/gcc/config/freebsd.opt
+++ b/gcc/config/freebsd.opt
@@ -1,6 +1,6 @@
 ; FreeBSD options.
 
-; Copyright (C) 2010-2024 Free Software Foundation, Inc.
+; Copyright (C) 2010-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/frv/constraints.md b/gcc/config/frv/constraints.md
index d5a94b9..b1aa422 100644
--- a/gcc/config/frv/constraints.md
+++ b/gcc/config/frv/constraints.md
@@ -1,5 +1,5 @@
 ;; Constraint definitions for FRV.
-;; Copyright (C) 2001-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2001-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/frv/frv-asm.h b/gcc/config/frv/frv-asm.h
index 0bdda93..746f399 100644
--- a/gcc/config/frv/frv-asm.h
+++ b/gcc/config/frv/frv-asm.h
@@ -1,5 +1,5 @@
 /* Assembler Support.
-   Copyright (C) 2000-2024 Free Software Foundation, Inc.
+   Copyright (C) 2000-2025 Free Software Foundation, Inc.
    Contributed by Red Hat, Inc.
 
    This file is part of GCC.
diff --git a/gcc/config/frv/frv-modes.def b/gcc/config/frv/frv-modes.def
index 33f7708..2d0f634 100644
--- a/gcc/config/frv/frv-modes.def
+++ b/gcc/config/frv/frv-modes.def
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler for FRV.
-   Copyright (C) 2002-2024 Free Software Foundation, Inc.
+   Copyright (C) 2002-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/frv/frv-opts.h b/gcc/config/frv/frv-opts.h
index d78b425..e45f74a 100644
--- a/gcc/config/frv/frv-opts.h
+++ b/gcc/config/frv/frv-opts.h
@@ -1,5 +1,5 @@
 /* Frv option-handling defitions.
-   Copyright (C) 1999-2024 Free Software Foundation, Inc.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/frv/frv-protos.h b/gcc/config/frv/frv-protos.h
index 97cb03b..1e70a95 100644
--- a/gcc/config/frv/frv-protos.h
+++ b/gcc/config/frv/frv-protos.h
@@ -1,5 +1,5 @@
 /* Frv prototypes.
-   Copyright (C) 1999-2024 Free Software Foundation, Inc.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
    Contributed by Red Hat, Inc.
 
 This file is part of GCC.
diff --git a/gcc/config/frv/frv.cc b/gcc/config/frv/frv.cc
index ac6fda6..e52bd59 100644
--- a/gcc/config/frv/frv.cc
+++ b/gcc/config/frv/frv.cc
@@ -1,4 +1,4 @@
-/* Copyright (C) 1997-2024 Free Software Foundation, Inc.
+/* Copyright (C) 1997-2025 Free Software Foundation, Inc.
    Contributed by Red Hat, Inc.
 
 This file is part of GCC.
@@ -2484,7 +2484,7 @@ frv_print_operand_address (FILE * stream, machine_mode /* mode */, rtx x)
 	 See gcc/testsuite/gcc.dg/asm-4.c for an example.  */
       frv_print_operand_memory_reference (stream, x, 0);
       return;
-      
+
     default:
       break;
     }
@@ -4759,8 +4759,7 @@ frv_split_scc (rtx dest, rtx test, rtx cc_reg, rtx cr_reg, HOST_WIDE_INT value)
 				gen_rtx_SET (dest, const0_rtx)));
 
   /* Finish up, return sequence.  */
-  ret = get_insns ();
-  end_sequence ();
+  ret = end_sequence ();
   return ret;
 }
 
@@ -4931,8 +4930,7 @@ frv_split_cond_move (rtx operands[])
     }
 
   /* Finish up, return sequence.  */
-  ret = get_insns ();
-  end_sequence ();
+  ret = end_sequence ();
   return ret;
 }
 
@@ -5062,8 +5060,7 @@ frv_split_minmax (rtx operands[])
     }
 
   /* Finish up, return sequence.  */
-  ret = get_insns ();
-  end_sequence ();
+  ret = end_sequence ();
   return ret;
 }
 
@@ -5101,8 +5098,7 @@ frv_split_abs (rtx operands[])
 				  gen_rtx_SET (dest, src)));
 
   /* Finish up, return sequence.  */
-  ret = get_insns ();
-  end_sequence ();
+  ret = end_sequence ();
   return ret;
 }
 
@@ -6311,7 +6307,7 @@ frv_secondary_reload_class (enum reg_class rclass,
 /* This hook exists to catch the case where secondary_reload_class() is
    called from init_reg_autoinc() in regclass.c - before the reload optabs
    have been initialised.  */
-   
+
 static reg_class_t
 frv_secondary_reload (bool in_p, rtx x, reg_class_t reload_class_i,
 		      machine_mode reload_mode,
@@ -6682,7 +6678,7 @@ frv_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
 	default:
 	  break;
 
-	case QUAD_REGS:	
+	case QUAD_REGS:
 	case GPR_REGS:
 	case GR8_REGS:
 	case GR9_REGS:
diff --git a/gcc/config/frv/frv.h b/gcc/config/frv/frv.h
index b16103a..28fd07c 100644
--- a/gcc/config/frv/frv.h
+++ b/gcc/config/frv/frv.h
@@ -1,5 +1,5 @@
 /* Target macros for the FRV port of GCC.
-   Copyright (C) 1999-2024 Free Software Foundation, Inc.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
    Contributed by Red Hat Inc.
 
    This file is part of GCC.
diff --git a/gcc/config/frv/frv.md b/gcc/config/frv/frv.md
index 3556131..8ecc633 100644
--- a/gcc/config/frv/frv.md
+++ b/gcc/config/frv/frv.md
@@ -1,5 +1,5 @@
 ;; Frv Machine Description
-;; Copyright (C) 1999-2024 Free Software Foundation, Inc.
+;; Copyright (C) 1999-2025 Free Software Foundation, Inc.
 ;; Contributed by Red Hat, Inc.
 
 ;; This file is part of GCC.
@@ -2009,8 +2009,7 @@
 				gen_rtx_NE (CC_CCRmode, icr, const0_rtx),
 				gen_rtx_SET (dest, const0_rtx)));
 
-  operands[3] = get_insns ();
-  end_sequence ();
+  operands[3] = end_sequence ();
 }")
 
 ;; Reload CC_UNSmode for unsigned integer comparisons
@@ -2074,8 +2073,7 @@
 				gen_rtx_NE (CC_CCRmode, icr, const0_rtx),
 				gen_rtx_SET (dest, const0_rtx)));
 
-  operands[3] = get_insns ();
-  end_sequence ();
+  operands[3] = end_sequence ();
 }")
 
 ;; Reload CC_NZmode.  This is mostly the same as the CCmode and CC_UNSmode
@@ -2245,8 +2243,7 @@
 
   emit_insn (gen_andsi3 (int_op0, int_op0, GEN_INT (CC_MASK)));
 
-  operands[2] = get_insns ();
-  end_sequence ();
+  operands[2] = end_sequence ();
 }")
 
 ;; Move a gpr value to FCC.
@@ -2329,8 +2326,7 @@
 					    const0_rtx),
 				gen_rtx_SET (int_op0, const0_rtx)));
 
-  operands[2] = get_insns ();
-  end_sequence ();
+  operands[2] = end_sequence ();
 }")
 
 (define_split
@@ -2357,8 +2353,7 @@
   if (! ICR_P (REGNO (operands[0])))
     emit_insn (gen_movcc_ccr (operands[0], icr));
 
-  operands[2] = get_insns ();
-  end_sequence ();
+  operands[2] = end_sequence ();
 }")
 
 
diff --git a/gcc/config/frv/frv.opt b/gcc/config/frv/frv.opt
index ecd9183..1890f91 100644
--- a/gcc/config/frv/frv.opt
+++ b/gcc/config/frv/frv.opt
@@ -1,6 +1,6 @@
 ; Options for the FR-V port of the compiler.
 
-; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/frv/linux.h b/gcc/config/frv/linux.h
index d408a02..444ca55 100644
--- a/gcc/config/frv/linux.h
+++ b/gcc/config/frv/linux.h
@@ -1,5 +1,5 @@
 /* Target macros for the FRV Linux port of GCC.
-   Copyright (C) 1999-2024 Free Software Foundation, Inc.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
    Contributed by Red Hat Inc.
 
    This file is part of GCC.
diff --git a/gcc/config/frv/predicates.md b/gcc/config/frv/predicates.md
index 9cd65ce..b948116 100644
--- a/gcc/config/frv/predicates.md
+++ b/gcc/config/frv/predicates.md
@@ -1,5 +1,5 @@
 ;; Predicate definitions for Frv.
-;; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/frv/t-frv b/gcc/config/frv/t-frv
index 304b629..1130520 100644
--- a/gcc/config/frv/t-frv
+++ b/gcc/config/frv/t-frv
@@ -1,4 +1,4 @@
-# Copyright (C) 2002-2024 Free Software Foundation, Inc.
+# Copyright (C) 2002-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/frv/t-linux b/gcc/config/frv/t-linux
index fd26d53..e960e47 100644
--- a/gcc/config/frv/t-linux
+++ b/gcc/config/frv/t-linux
@@ -1,4 +1,4 @@
-# Copyright (C) 2004-2024 Free Software Foundation, Inc.
+# Copyright (C) 2004-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/ft32/constraints.md b/gcc/config/ft32/constraints.md
index ff421b0..7beb51f3 100644
--- a/gcc/config/ft32/constraints.md
+++ b/gcc/config/ft32/constraints.md
@@ -1,5 +1,5 @@
 ;; Constraint definitions for FT32
-;; Copyright (C) 2015-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2015-2025 Free Software Foundation, Inc.
 ;; Contributed by FTDI <support@ftdi.com>
 
 ;; This file is part of GCC.
diff --git a/gcc/config/ft32/ft32-protos.h b/gcc/config/ft32/ft32-protos.h
index b4564e4..35cf893 100644
--- a/gcc/config/ft32/ft32-protos.h
+++ b/gcc/config/ft32/ft32-protos.h
@@ -1,5 +1,5 @@
 /* Prototypes for ft32.cc functions used in the md file & elsewhere.
-   Copyright (C) 2015-2024 Free Software Foundation, Inc.
+   Copyright (C) 2015-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/ft32/ft32.cc b/gcc/config/ft32/ft32.cc
index 3c6e5fb..e76c432 100644
--- a/gcc/config/ft32/ft32.cc
+++ b/gcc/config/ft32/ft32.cc
@@ -1,5 +1,5 @@
 /* Target Code for ft32
-   Copyright (C) 2015-2024 Free Software Foundation, Inc.
+   Copyright (C) 2015-2025 Free Software Foundation, Inc.
    Contributed by FTDI <support@ftdi.com>
 
    This file is part of GCC.
@@ -831,19 +831,6 @@ ft32_target_case_values_threshold (void)
   ft32_addr_space_legitimate_address_p
 
 
-// Enabling LRA gives the infamous
-//    internal compiler error: Max. number of generated reload insns per insn is achieved (90)
-// errors e.g. when compiling sieve.c
-
-static bool
-ft32_lra_p (void)
-{
-  return ft32_lra_flag;
-}
-
-#undef TARGET_LRA_P
-#define TARGET_LRA_P ft32_lra_p
-
 static bool
 reg_ok_for_base_p (rtx r, bool strict)
 {
diff --git a/gcc/config/ft32/ft32.h b/gcc/config/ft32/ft32.h
index 1803d5b..7584824 100644
--- a/gcc/config/ft32/ft32.h
+++ b/gcc/config/ft32/ft32.h
@@ -1,5 +1,5 @@
 /* Target Definitions for ft32.
-   Copyright (C) 2015-2024 Free Software Foundation, Inc.
+   Copyright (C) 2015-2025 Free Software Foundation, Inc.
    Contributed by FTDI <support@ftdi.com>
 
    This file is part of GCC.
diff --git a/gcc/config/ft32/ft32.md b/gcc/config/ft32/ft32.md
index 8414a4b..4d66abe 100644
--- a/gcc/config/ft32/ft32.md
+++ b/gcc/config/ft32/ft32.md
@@ -1,5 +1,5 @@
 ;; Machine description for FT32
-;; Copyright (C) 2015-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2015-2025 Free Software Foundation, Inc.
 ;; Contributed by FTDI <support@ftdi.com>
 
 ;; This file is part of GCC.
@@ -760,6 +760,12 @@
       operands[0] = index;
     }
 
+  /* operands[2] could be an integer that is out of range for
+     the comparison insn we're going to emit.  If so, force
+     it into a register.  */
+  if (!ft32_rimm_operand (operands[2], SImode))
+    operands[2] = force_reg (SImode, operands[2]);
+
   {
     rtx test = gen_rtx_GTU (VOIDmode, operands[0], operands[2]);
     emit_jump_insn (gen_cbranchsi4 (test, operands[0], operands[2], operands[4]));
diff --git a/gcc/config/ft32/ft32.opt b/gcc/config/ft32/ft32.opt
index cecc548..039ca29 100644
--- a/gcc/config/ft32/ft32.opt
+++ b/gcc/config/ft32/ft32.opt
@@ -1,6 +1,6 @@
 ; Options for the FT32 port of the compiler.
 
-; Copyright (C) 2015-2024 Free Software Foundation, Inc.
+; Copyright (C) 2015-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
@@ -23,8 +23,8 @@ Target Mask(SIM)
 Target the software simulator.
 
 mlra
-Target Var(ft32_lra_flag) Init(0) Save
-Use LRA instead of reload.
+Target RejectNegative Ignore
+Ignored, but preserved for backward compatibility.
 
 mnodiv
 Target Mask(NODIV)
diff --git a/gcc/config/ft32/predicates.md b/gcc/config/ft32/predicates.md
index 2b65f66..c03c5e9 100644
--- a/gcc/config/ft32/predicates.md
+++ b/gcc/config/ft32/predicates.md
@@ -1,5 +1,5 @@
 ;; Predicate definitions for FT32
-;; Copyright (C) 2015-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2015-2025 Free Software Foundation, Inc.
 ;; Contributed by FTDI <support@ftdi.com>
 
 ;; This file is part of GCC.
diff --git a/gcc/config/ft32/t-ft32 b/gcc/config/ft32/t-ft32
index 261313c..3595636 100644
--- a/gcc/config/ft32/t-ft32
+++ b/gcc/config/ft32/t-ft32
@@ -1,5 +1,5 @@
 # Target Makefile Fragment for ft32
-# Copyright (C) 2015-2024 Free Software Foundation, Inc.
+# Copyright (C) 2015-2025 Free Software Foundation, Inc.
 # Contributed by FTDI <support@ftdi.com>
 #
 # This file is part of GCC.
diff --git a/gcc/config/fuchsia-rust.cc b/gcc/config/fuchsia-rust.cc
index c4f730e..c812e61 100644
--- a/gcc/config/fuchsia-rust.cc
+++ b/gcc/config/fuchsia-rust.cc
@@ -1,5 +1,5 @@
 /* Fuchsia support needed only by Rust front-end.
-   Copyright (C) 2022-2024 Free Software Foundation, Inc.
+   Copyright (C) 2022-2025 Free Software Foundation, Inc.
 
 GCC is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
diff --git a/gcc/config/fuchsia.h b/gcc/config/fuchsia.h
index 16fc821..0d43d0c 100644
--- a/gcc/config/fuchsia.h
+++ b/gcc/config/fuchsia.h
@@ -1,5 +1,5 @@
 /* Base configuration file for all Fuchsia targets.
-   Copyright (C) 2017-2024 Free Software Foundation, Inc.
+   Copyright (C) 2017-2025 Free Software Foundation, Inc.
    Contributed by Google.
 
 This file is part of GCC.
diff --git a/gcc/config/fused-madd.opt b/gcc/config/fused-madd.opt
index f15e2d1..500b416 100644
--- a/gcc/config/fused-madd.opt
+++ b/gcc/config/fused-madd.opt
@@ -1,6 +1,6 @@
 ; -mfused-madd option (some targets only).
 ;
-; Copyright (C) 2010-2024 Free Software Foundation, Inc.
+; Copyright (C) 2010-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/g.opt b/gcc/config/g.opt
index 4a9ee47..21f2b89 100644
--- a/gcc/config/g.opt
+++ b/gcc/config/g.opt
@@ -1,6 +1,6 @@
 ; -G option (small data, some targets only).
 
-; Copyright (C) 2003-2024 Free Software Foundation, Inc.
+; Copyright (C) 2003-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/gcn/constraints.md b/gcc/config/gcn/constraints.md
index 6c7b752..92b90f9 100644
--- a/gcc/config/gcn/constraints.md
+++ b/gcc/config/gcn/constraints.md
@@ -1,5 +1,5 @@
 ;; Constraint definitions for GCN.
-;; Copyright (C) 2016-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2016-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/gcn/gcn-builtins.def b/gcc/config/gcn/gcn-builtins.def
index e8f3e63..17546dd 100644
--- a/gcc/config/gcn/gcn-builtins.def
+++ b/gcc/config/gcn/gcn-builtins.def
@@ -1,4 +1,4 @@
-/* Copyright (C) 2016-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2016-2025 Free Software Foundation, Inc.
 
    This file is free software; you can redistribute it and/or modify it under
    the terms of the GNU General Public License as published by the Free
diff --git a/gcc/config/gcn/gcn-devices.def b/gcc/config/gcn/gcn-devices.def
new file mode 100644
index 0000000..426acf0
--- /dev/null
+++ b/gcc/config/gcn/gcn-devices.def
@@ -0,0 +1,409 @@
+/* Copyright (C) 2024-2025 Free Software Foundation, Inc.
+
+   This file is free software; you can redistribute it and/or modify it under
+   the terms of the GNU General Public License as published by the Free
+   Software Foundation; either version 3 of the License, or (at your option)
+   any later version.
+
+   This file is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+   for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+/* GCN Device Configurations.
+
+   This file contains all the device-specific information needed for both
+   GCC and Libgomp.  Please respect the formatting and field comments as
+   this file is read by Awk scripts in addition to the C++ preprocessor.
+
+   To add a new device:
+    1. Add a new GCN_DEVICE instance below.
+    2. Add the name to the list in config.gcc.
+    3. Allow gcn-tables.opt to regenerate.
+    4. Implement target-specific metadata and new features using
+       PROCESSOR_<NAME> (or a new ISA feature flag).
+    5. Consider adding to the set of device-specific tests in the libgomp
+       testsuite.
+
+    New ISA variants are defined in gcn-opts.h.  Please use the feature macros
+    in any conditionals, rather than depending on specific devices or ISAs
+    directly.
+
+    GCN_DEVICE field descriptions:
+      0  "name"  (text, external)
+        Lower case device name used in -march=name, diagnostics,
+	assembler directives, etc.
+      1  "NAME"  (text, external)
+        Upper case device name used in macros.
+      2  "ELF"   (hex integer, external)
+        Magic number used assigned to this device for use in elf_flags.
+      3  "ISA"   (enum gcn_isa, internal)
+        ISA variant for instruction selection, etc.
+      4  "XNACK default"   (enum hsaco_attr_type, internal)
+        Default value for the -mattr=[-+]xnack setting.  May need to correspond
+	to the assembler expectations for this device.
+      5  "SRAM_ECC default"   (enum hsaco_attr_type, internal)
+        Default value for the -mattr=[-+]sram-ecc setting.  Only really used
+	to ensure that the binary is in a known state mkoffload can match.
+      6  "WAVE64 mode"    (enum hsaco_attr_type, internal)
+        Set "on" for devices where this needs to be configured, "unsupported"
+	otherwise (meaning no special treatment needed).  GCC does not support
+	wave32 mode.
+      7  "CU mode"    (enum hsaco_attr_type, internal)
+        Set "on" for devices that have this feature, "unsupported" otherwise
+	(meaning that CU mode is not optional on the device).  GCC does not
+	support CU mode off.
+      8  "Max ISA VGPRs"   (integer, internal)
+        Define how many registers there are in the VGPR register file, for the
+	purposes of calculating maximum occupancy.  Some devices have AVGPRs
+	in the same register file, some have more registers than are
+	addressable from a single kernel.  Used by libgomp's plugin-gcn.c.
+      9  "Generic Processor Version"  (unsigned, external)
+	Used as version field for generic processor support. For non-generic
+	code it is 0; otherwise, between 1 and 255. Initially, it is 1 for
+	each generic device, but incremented (for a given generic device) if
+	an new device of that series requires a code change;
+	cf. EF_AMDGPU_GENERIC_VERSION_V.  The version shall be the same as
+	generated by the used llvm-mc assembler.
+      10 "Architecture Family Name"  (string, external)
+	Used to #define '__GFX<...>__'.
+      11 "GENERIC NAME" (text, external)
+	The name of the generic ISA this device is compatible with or "NONE",
+	where the generic name is the NAME (field 2) of the associated
+	generic device.
+
+    Fields marked "external", above, have values defined elsewhere (HSA, ROCM,
+    LLVM, ELF, etc.) and must have matching definitions here.  Fields marked
+    "internal" are defined and used only in GCC (although some may have
+    user-visible effects) and may be refactored as needed.  */
+
+/* GCN GFX9 (Vega)  */
+
+GCN_DEVICE(gfx900, GFX900, 0x2c, ISA_GCN5,
+	   /* XNACK default */ HSACO_ATTR_OFF,
+	   /* SRAM_ECC default */ HSACO_ATTR_UNSUPPORTED,
+	   /* WAVE64 mode */ HSACO_ATTR_UNSUPPORTED,
+	   /* CU mode */ HSACO_ATTR_UNSUPPORTED,
+	   /* Max ISA VGPRs */ 256,
+	   /* Generic code obj version */ 0,  /* non-generic */
+	   /* Architecture Family */ GFX9,
+	   /* Generic Name */ GFX9_GENERIC
+	   )
+
+GCN_DEVICE(gfx902, GFX902, 0x2d, ISA_GCN5,
+	   /* XNACK default */ HSACO_ATTR_OFF,
+	   /* SRAM_ECC default */ HSACO_ATTR_UNSUPPORTED,
+	   /* WAVE64 mode */ HSACO_ATTR_UNSUPPORTED,
+	   /* CU mode */ HSACO_ATTR_UNSUPPORTED,
+	   /* Max ISA VGPRs */ 256,
+	   /* Generic code obj version */ 0,  /* non-generic */
+	   /* Architecture Family */ GFX9,
+	   /* Generic Name */ GFX9_GENERIC
+	   )
+
+GCN_DEVICE(gfx904, GFX904, 0x2e, ISA_GCN5,
+	   /* XNACK default */ HSACO_ATTR_OFF,
+	   /* SRAM_ECC default */ HSACO_ATTR_UNSUPPORTED,
+	   /* WAVE64 mode */ HSACO_ATTR_UNSUPPORTED,
+	   /* CU mode */ HSACO_ATTR_UNSUPPORTED,
+	   /* Max ISA VGPRs */ 256,
+	   /* Generic code obj version */ 0,  /* non-generic */
+	   /* Architecture Family */ GFX9,
+	   /* Generic Name */ GFX9_GENERIC
+	   )
+
+GCN_DEVICE(gfx906, GFX906, 0x2f, ISA_GCN5,
+	   /* XNACK default */ HSACO_ATTR_OFF,
+	   /* SRAM_ECC default */ HSACO_ATTR_ANY,
+	   /* WAVE64 mode */ HSACO_ATTR_UNSUPPORTED,
+	   /* CU mode */ HSACO_ATTR_UNSUPPORTED,
+	   /* Max ISA VGPRs */ 256,
+	   /* Generic code obj version */ 0,  /* non-generic */
+	   /* Architecture Family */ GFX9,
+	   /* Generic Name */ GFX9_GENERIC
+	   )
+
+GCN_DEVICE(gfx908, GFX908, 0x30, ISA_CDNA1,
+	   /* XNACK default */ HSACO_ATTR_OFF,
+	   /* SRAM_ECC default */ HSACO_ATTR_ANY,
+	   /* WAVE64 mode */ HSACO_ATTR_UNSUPPORTED,
+	   /* CU mode */ HSACO_ATTR_UNSUPPORTED,
+	   /* Max ISA VGPRs */ 256,
+	   /* Generic code obj version */ 0,  /* non-generic */
+	   /* Architecture Family */ GFX9,
+	   /* Generic Name */ NONE
+	   )
+
+GCN_DEVICE(gfx909, GFX909, 0x31, ISA_GCN5,
+	   /* XNACK default */ HSACO_ATTR_ANY,
+	   /* SRAM_ECC default */ HSACO_ATTR_UNSUPPORTED,
+	   /* WAVE64 mode */ HSACO_ATTR_UNSUPPORTED,
+	   /* CU mode */ HSACO_ATTR_UNSUPPORTED,
+	   /* Max ISA VGPRs */ 256,
+	   /* Generic code obj version */ 0,  /* non-generic */
+	   /* Architecture Family */ GFX9,
+	   /* Generic Name */ GFX9_GENERIC
+	   )
+
+GCN_DEVICE(gfx90a, GFX90A, 0x3f, ISA_CDNA2,
+	   /* XNACK default */ HSACO_ATTR_ANY,
+	   /* SRAM_ECC default */ HSACO_ATTR_ANY,
+	   /* WAVE64 mode */ HSACO_ATTR_UNSUPPORTED,
+	   /* CU mode */ HSACO_ATTR_UNSUPPORTED,
+	   /* Max ISA VGPRs */ 512,
+	   /* Generic code obj version */ 0,  /* non-generic */
+	   /* Architecture Family */ GFX9,
+	   /* Generic Name */ NONE
+	   )
+
+GCN_DEVICE(gfx90c, GFX90C, 0x32, ISA_GCN5,
+	   /* XNACK default */ HSACO_ATTR_ANY,
+	   /* SRAM_ECC default */ HSACO_ATTR_UNSUPPORTED,
+	   /* WAVE64 mode */ HSACO_ATTR_UNSUPPORTED,
+	   /* CU mode */ HSACO_ATTR_UNSUPPORTED,
+	   /* Max ISA VGPRs */ 256,
+	   /* Generic code obj version */ 0,  /* non-generic */
+	   /* Architecture Family */ GFX9,
+	   /* Generic Name */ GFX9_GENERIC
+	   )
+
+GCN_DEVICE(gfx942, GFX942, 0x4c, ISA_CDNA3,
+	   /* XNACK default */ HSACO_ATTR_ANY,
+	   /* SRAM_ECC default */ HSACO_ATTR_ANY,
+	   /* WAVE64 mode */ HSACO_ATTR_UNSUPPORTED,
+	   /* CU mode */ HSACO_ATTR_UNSUPPORTED,
+	   /* Max ISA VGPRs */ 512,
+	   /* Generic code obj version */ 0,  /* non-generic */
+	   /* Architecture Family */ GFX9,
+	   /* Generic Name */ NONE
+	   )
+
+GCN_DEVICE(gfx950, GFX950, 0x4f, ISA_CDNA3,
+	   /* XNACK default */ HSACO_ATTR_ANY,
+	   /* SRAM_ECC default */ HSACO_ATTR_ANY,
+	   /* WAVE64 mode */ HSACO_ATTR_UNSUPPORTED,
+	   /* CU mode */ HSACO_ATTR_UNSUPPORTED,
+	   /* Max ISA VGPRs */ 512,
+	   /* Generic code obj version */ 0,  /* non-generic */
+	   /* Architecture Family */ GFX9,
+	   /* Generic Name */ NONE
+	   )
+
+GCN_DEVICE(gfx9-generic, GFX9_GENERIC, 0x051, ISA_GCN5,
+	   /* XNACK default */ HSACO_ATTR_ANY,
+	   /* SRAM_ECC default */ HSACO_ATTR_UNSUPPORTED,
+	   /* WAVE64 mode */ HSACO_ATTR_UNSUPPORTED,
+	   /* CU mode */ HSACO_ATTR_UNSUPPORTED,
+	   /* Max ISA VGPRs */ 256,
+	   /* Generic code obj version */ 1,
+	   /* Architecture Family */ GFX9,
+	   /* Generic Name */ NONE
+	   )
+
+GCN_DEVICE(gfx9-4-generic, GFX9_4_GENERIC, 0x05f, ISA_CDNA3,
+	   /* XNACK default */ HSACO_ATTR_ANY,
+	   /* SRAM_ECC default */ HSACO_ATTR_UNSUPPORTED,
+	   /* WAVE64 mode */ HSACO_ATTR_UNSUPPORTED,
+	   /* CU mode */ HSACO_ATTR_UNSUPPORTED,
+	   /* Max ISA VGPRs */ 256,
+	   /* Generic code obj version */ 1,
+	   /* Architecture Family */ GFX9,
+	   /* Generic Name */ NONE
+	   )
+
+/* GCN GFX10.3 (RDNA 2) */
+
+GCN_DEVICE(gfx1030, GFX1030, 0x36, ISA_RDNA2,
+	   /* XNACK default */ HSACO_ATTR_UNSUPPORTED,
+	   /* SRAM_ECC default */ HSACO_ATTR_UNSUPPORTED,
+	   /* WAVE64 mode */ HSACO_ATTR_ON,
+	   /* CU mode */ HSACO_ATTR_ON,
+	   /* Max ISA VGPRs */ 512, /* 512 SIMD32 = 256 wavefrontsize64.  */
+	   /* Generic code obj version */ 0,  /* non-generic */
+	   /* Architecture Family */ GFX10,
+	   /* Generic Name */ GFX10_3_GENERIC
+	   )
+
+GCN_DEVICE(gfx1031, GFX1031, 0x37, ISA_RDNA2,
+	   /* XNACK default */ HSACO_ATTR_UNSUPPORTED,
+	   /* SRAM_ECC default */ HSACO_ATTR_UNSUPPORTED,
+	   /* WAVE64 mode */ HSACO_ATTR_ON,
+	   /* CU mode */ HSACO_ATTR_ON,
+	   /* Max ISA VGPRs */ 512, /* 512 SIMD32 = 256 wavefrontsize64.  */
+	   /* Generic code obj version */ 0,  /* non-generic */
+	   /* Architecture Family */ GFX10,
+	   /* Generic Name */ GFX10_3_GENERIC
+	   )
+
+GCN_DEVICE(gfx1032, GFX1032, 0x38, ISA_RDNA2,
+	   /* XNACK default */ HSACO_ATTR_UNSUPPORTED,
+	   /* SRAM_ECC default */ HSACO_ATTR_UNSUPPORTED,
+	   /* WAVE64 mode */ HSACO_ATTR_ON,
+	   /* CU mode */ HSACO_ATTR_ON,
+	   /* Max ISA VGPRs */ 512, /* 512 SIMD32 = 256 wavefrontsize64.  */
+	   /* Generic code obj version */ 0,  /* non-generic */
+	   /* Architecture Family */ GFX10,
+	   /* Generic Name */ GFX10_3_GENERIC
+	   )
+
+GCN_DEVICE(gfx1033, GFX1033, 0x39, ISA_RDNA2,
+	   /* XNACK default */ HSACO_ATTR_UNSUPPORTED,
+	   /* SRAM_ECC default */ HSACO_ATTR_UNSUPPORTED,
+	   /* WAVE64 mode */ HSACO_ATTR_ON,
+	   /* CU mode */ HSACO_ATTR_ON,
+	   /* Max ISA VGPRs */ 512, /* 512 SIMD32 = 256 wavefrontsize64.  */
+	   /* Generic code obj version */ 0,  /* non-generic */
+	   /* Architecture Family */ GFX10,
+	   /* Generic Name */ GFX10_3_GENERIC
+	   )
+
+GCN_DEVICE(gfx1034, GFX1034, 0x3e, ISA_RDNA2,
+	   /* XNACK default */ HSACO_ATTR_UNSUPPORTED,
+	   /* SRAM_ECC default */ HSACO_ATTR_UNSUPPORTED,
+	   /* WAVE64 mode */ HSACO_ATTR_ON,
+	   /* CU mode */ HSACO_ATTR_ON,
+	   /* Max ISA VGPRs */ 512, /* 512 SIMD32 = 256 wavefrontsize64.  */
+	   /* Generic code obj version */ 0,  /* non-generic */
+	   /* Architecture Family */ GFX10,
+	   /* Generic Name */ GFX10_3_GENERIC
+	   )
+
+GCN_DEVICE(gfx1035, GFX1035, 0x3d, ISA_RDNA2,
+	   /* XNACK default */ HSACO_ATTR_UNSUPPORTED,
+	   /* SRAM_ECC default */ HSACO_ATTR_UNSUPPORTED,
+	   /* WAVE64 mode */ HSACO_ATTR_ON,
+	   /* CU mode */ HSACO_ATTR_ON,
+	   /* Max ISA VGPRs */ 512, /* 512 SIMD32 = 256 wavefrontsize64.  */
+	   /* Generic code obj version */ 0,  /* non-generic */
+	   /* Architecture Family */ GFX10,
+	   /* Generic Name */ GFX10_3_GENERIC
+	   )
+
+GCN_DEVICE(gfx1036, GFX1036, 0x45, ISA_RDNA2,
+	   /* XNACK default */ HSACO_ATTR_UNSUPPORTED,
+	   /* SRAM_ECC default */ HSACO_ATTR_UNSUPPORTED,
+	   /* WAVE64 mode */ HSACO_ATTR_ON,
+	   /* CU mode */ HSACO_ATTR_ON,
+	   /* Max ISA VGPRs */ 512, /* 512 SIMD32 = 256 wavefrontsize64.  */
+	   /* Generic code obj version */ 0,  /* non-generic */
+	   /* Architecture Family */ GFX10,
+	   /* Generic Name */ GFX10_3_GENERIC
+	   )
+
+GCN_DEVICE(gfx10-3-generic, GFX10_3_GENERIC, 0x053, ISA_RDNA2,
+	   /* XNACK default */ HSACO_ATTR_UNSUPPORTED,
+	   /* SRAM_ECC default */ HSACO_ATTR_UNSUPPORTED,
+	   /* WAVE64 mode */ HSACO_ATTR_ON,
+	   /* CU mode */ HSACO_ATTR_ON,
+	   /* Max ISA VGPRs */ 512, /* 512 SIMD32 = 256 wavefrontsize64.  */
+	   /* Generic code obj version */ 1,
+	   /* Architecture Family */ GFX10,
+	   /* Generic Name */ NONE
+	   )
+
+/* GCN GFX11 (RDNA 3)  */
+
+GCN_DEVICE(gfx1100, GFX1100, 0x41, ISA_RDNA3,
+	   /* XNACK default */ HSACO_ATTR_UNSUPPORTED,
+	   /* SRAM_ECC default */ HSACO_ATTR_UNSUPPORTED,
+	   /* WAVE64 mode */ HSACO_ATTR_ON,
+	   /* CU mode */ HSACO_ATTR_ON,
+	   /* Max ISA VGPRs */ 1536, /* 1536 SIMD32 = 768 wavefrontsize64.  */
+	   /* Generic code obj version */ 0,  /* non-generic */
+	   /* Architecture Family */ GFX11,
+	   /* Generic Name */ GFX11_GENERIC
+	   )
+
+GCN_DEVICE(gfx1101, GFX1101, 0x46, ISA_RDNA3,
+	   /* XNACK default */ HSACO_ATTR_UNSUPPORTED,
+	   /* SRAM_ECC default */ HSACO_ATTR_UNSUPPORTED,
+	   /* WAVE64 mode */ HSACO_ATTR_ON,
+	   /* CU mode */ HSACO_ATTR_ON,
+	   /* Max ISA VGPRs */ 1536,
+	   /* Generic code obj version */ 0,  /* non-generic */
+	   /* Architecture Family */ GFX11,
+	   /* Generic Name */ GFX11_GENERIC
+	   )
+
+GCN_DEVICE(gfx1102, GFX1102, 0x47, ISA_RDNA3,
+	   /* XNACK default */ HSACO_ATTR_UNSUPPORTED,
+	   /* SRAM_ECC default */ HSACO_ATTR_UNSUPPORTED,
+	   /* WAVE64 mode */ HSACO_ATTR_ON,
+	   /* CU mode */ HSACO_ATTR_ON,
+	   /* Max ISA VGPRs */ 1536,
+	   /* Generic code obj version */ 0,  /* non-generic */
+	   /* Architecture Family */ GFX11,
+	   /* Generic Name */ GFX11_GENERIC
+	   )
+
+GCN_DEVICE(gfx1103, GFX1103, 0x44, ISA_RDNA3,
+	   /* XNACK default */ HSACO_ATTR_UNSUPPORTED,
+	   /* SRAM_ECC default */ HSACO_ATTR_UNSUPPORTED,
+	   /* WAVE64 mode */ HSACO_ATTR_ON,
+	   /* CU mode */ HSACO_ATTR_ON,
+	   /* Max ISA VGPRs */ 1536,
+	   /* Generic code obj version */ 0,  /* non-generic */
+	   /* Architecture Family */ GFX11,
+	   /* Generic Name */ GFX11_GENERIC
+	   )
+
+GCN_DEVICE(gfx1150, GFX1150, 0x43, ISA_RDNA3,
+	   /* XNACK default */ HSACO_ATTR_UNSUPPORTED,
+	   /* SRAM_ECC default */ HSACO_ATTR_UNSUPPORTED,
+	   /* WAVE64 mode */ HSACO_ATTR_ON,
+	   /* CU mode */ HSACO_ATTR_ON,
+	   /* Max ISA VGPRs */ 1536,
+	   /* Generic code obj version */ 0,  /* non-generic */
+	   /* Architecture Family */ GFX11,
+	   /* Generic Name */ GFX11_GENERIC
+	   )
+
+GCN_DEVICE(gfx1151, GFX1151, 0x4a, ISA_RDNA3,
+	   /* XNACK default */ HSACO_ATTR_UNSUPPORTED,
+	   /* SRAM_ECC default */ HSACO_ATTR_UNSUPPORTED,
+	   /* WAVE64 mode */ HSACO_ATTR_ON,
+	   /* CU mode */ HSACO_ATTR_ON,
+	   /* Max ISA VGPRs */ 1536,
+	   /* Generic code obj version */ 0,  /* non-generic */
+	   /* Architecture Family */ GFX11,
+	   /* Generic Name */ GFX11_GENERIC
+	   )
+
+GCN_DEVICE(gfx1152, GFX1152, 0x55, ISA_RDNA3,
+	   /* XNACK default */ HSACO_ATTR_UNSUPPORTED,
+	   /* SRAM_ECC default */ HSACO_ATTR_UNSUPPORTED,
+	   /* WAVE64 mode */ HSACO_ATTR_ON,
+	   /* CU mode */ HSACO_ATTR_ON,
+	   /* Max ISA VGPRs */ 1536,
+	   /* Generic code obj version */ 0,  /* non-generic */
+	   /* Architecture Family */ GFX11,
+	   /* Generic Name */ GFX11_GENERIC
+	   )
+
+GCN_DEVICE(gfx1153, GFX1153, 0x58, ISA_RDNA3,
+	   /* XNACK default */ HSACO_ATTR_UNSUPPORTED,
+	   /* SRAM_ECC default */ HSACO_ATTR_UNSUPPORTED,
+	   /* WAVE64 mode */ HSACO_ATTR_ON,
+	   /* CU mode */ HSACO_ATTR_ON,
+	   /* Max ISA VGPRs */ 1536,
+	   /* Generic code obj version */ 0,  /* non-generic */
+	   /* Architecture Family */ GFX11,
+	   /* Generic Name */ GFX11_GENERIC
+	   )
+
+GCN_DEVICE(gfx11-generic, GFX11_GENERIC, 0x054, ISA_RDNA3,
+	   /* XNACK default */ HSACO_ATTR_UNSUPPORTED,
+	   /* SRAM_ECC default */ HSACO_ATTR_UNSUPPORTED,
+	   /* WAVE64 mode */ HSACO_ATTR_ON,
+	   /* CU mode */ HSACO_ATTR_ON,
+	   /* Max ISA VGPRs */ 1536,
+	   /* Generic code obj version */ 1,
+	   /* Architecture Family */ GFX11,
+	   /* Generic Name */ NONE
+	   )
+
+#undef GCN_DEVICE
diff --git a/gcc/config/gcn/gcn-hsa.h b/gcc/config/gcn/gcn-hsa.h
index 0322055..6904aaa 100644
--- a/gcc/config/gcn/gcn-hsa.h
+++ b/gcc/config/gcn/gcn-hsa.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2016-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2016-2025 Free Software Foundation, Inc.
 
    This file is free software; you can redistribute it and/or modify it under
    the terms of the GNU General Public License as published by the Free
@@ -46,6 +46,10 @@
 #define ASM_OUTPUT_LABEL(FILE,NAME) \
   do { assemble_name (FILE, NAME); fputs (":\n", FILE); } while (0)
 
+/* Used in lieu of '../elfos.h:ASM_WEAKEN_LABEL'.  */
+#define ASM_WEAKEN_DECL(STREAM, DECL, NAME, VALUE) \
+  gcn_asm_weaken_decl ((STREAM), (DECL), (NAME), (VALUE))
+
 #define ASM_OUTPUT_LABELREF(FILE, NAME) \
   asm_fprintf (FILE, "%U%s", default_strip_name_encoding (NAME))
 
@@ -75,39 +79,16 @@ extern unsigned int gcn_local_sym_hash (const char *name);
    supported for gcn.  */
 #define GOMP_SELF_SPECS ""
 
-/* Explicitly set the ABI version; in principle, we could use just the
-   default; however, when debugging symbols are turned on, mkoffload.cc
-   writes a new AMD GPU object file and the ABI version needs to be the
-   same. - LLVM <= 17 defaults to 4 while LLVM >= 18 defaults to 5.
-   GCC supports LLVM >= 13.0.1 and only LLVM >= 14 supports version 5.
-   Note that Fiji is only supported with LLVM <= 17 as version 3 is no longer
-   supported in LLVM >= 18.  */
-#define ABI_VERSION_SPEC "march=fiji:--amdhsa-code-object-version=3;" \
-			 "!march=*|march=*:--amdhsa-code-object-version=4"
-
-/* Note that the XNACK and SRAM-ECC settings must match those in mkoffload.cc
-   as the latter creates new ELF object file when debugging is enabled and
-   the ELF flags (e_flags) of that generated file must be identical to those
-   generated by the compiler.  */
-
-#define NO_XNACK "march=fiji:;march=gfx1030:;march=gfx1036:;march=gfx1100:;march=gfx1103:;" \
-    /* These match the defaults set in gcn.cc.  */ \
-    "!mxnack*|mxnack=default:%{march=gfx900|march=gfx906|march=gfx908:-mattr=-xnack};"
-#define NO_SRAM_ECC "!march=*:;march=fiji:;march=gfx900:;march=gfx906:;march=gfx90c:;"
-
-/* In HSACOv4 no attribute setting means the binary supports "any" hardware
-   configuration.  The name of the attribute also changed.  */
-#define SRAMOPT "msram-ecc=on:-mattr=+sramecc;msram-ecc=off:-mattr=-sramecc"
-#define XNACKOPT "mxnack=on:-mattr=+xnack;mxnack=off:-mattr=-xnack"
+#include "gcn-device-macros.h"
 
 /* Use LLVM assembler and linker options.  */
 #define ASM_SPEC  "-triple=amdgcn--amdhsa "  \
 		  "%{march=*:-mcpu=%*} " \
-		  "%{" ABI_VERSION_SPEC "} " \
-		  "%{" NO_XNACK XNACKOPT "} " \
-		  "%{" NO_SRAM_ECC SRAMOPT "} " \
-		  "%{march=gfx1030|march=gfx1036|march=gfx1100|march=gfx1103:-mattr=+wavefrontsize64} " \
-		  "%{march=gfx1030|march=gfx1036|march=gfx1100|march=gfx1103:-mattr=+cumode} " \
+		  ABI_VERSION_OPT \
+		  XNACKOPT \
+		  SRAMOPT \
+		  WAVE64OPT \
+		  CUMODEOPT \
 		  "-filetype=obj"
 #define LINK_SPEC "--pie --export-dynamic"
 #define LIB_SPEC  "-lc"
diff --git a/gcc/config/gcn/gcn-modes.def b/gcc/config/gcn/gcn-modes.def
index 19f6d70..1e9e8ae 100644
--- a/gcc/config/gcn/gcn-modes.def
+++ b/gcc/config/gcn/gcn-modes.def
@@ -1,4 +1,4 @@
-/* Copyright (C) 2016-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2016-2025 Free Software Foundation, Inc.
 
    This file is free software; you can redistribute it and/or modify it under
    the terms of the GNU General Public License as published by the Free
diff --git a/gcc/config/gcn/gcn-opts.h b/gcc/config/gcn/gcn-opts.h
index 24e856b..0bfc786 100644
--- a/gcc/config/gcn/gcn-opts.h
+++ b/gcc/config/gcn/gcn-opts.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2016-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2016-2025 Free Software Foundation, Inc.
 
    This file is free software; you can redistribute it and/or modify it under
    the terms of the GNU General Public License as published by the Free
@@ -17,85 +17,62 @@
 #ifndef GCN_OPTS_H
 #define GCN_OPTS_H
 
-/* Which processor to generate code or schedule for.  */
+/* Create constants for PROCESSOR_GFX???.  */
 enum processor_type
 {
-  PROCESSOR_FIJI,    // gfx803
-  PROCESSOR_VEGA10,  // gfx900
-  PROCESSOR_VEGA20,  // gfx906
-  PROCESSOR_GFX908,
-  PROCESSOR_GFX90a,
-  PROCESSOR_GFX90c,
-  PROCESSOR_GFX1030,
-  PROCESSOR_GFX1036,
-  PROCESSOR_GFX1100,
-  PROCESSOR_GFX1103
+#define GCN_DEVICE(name, NAME, ...) \
+  PROCESSOR_ ## NAME,
+#include "gcn-devices.def"
+  PROCESSOR_COUNT
 };
 
-#define TARGET_FIJI (gcn_arch == PROCESSOR_FIJI)
-#define TARGET_VEGA10 (gcn_arch == PROCESSOR_VEGA10)
-#define TARGET_VEGA20 (gcn_arch == PROCESSOR_VEGA20)
-#define TARGET_GFX908 (gcn_arch == PROCESSOR_GFX908)
-#define TARGET_GFX90a (gcn_arch == PROCESSOR_GFX90a)
-#define TARGET_GFX90c (gcn_arch == PROCESSOR_GFX90c)
-#define TARGET_GFX1030 (gcn_arch == PROCESSOR_GFX1030)
-#define TARGET_GFX1036 (gcn_arch == PROCESSOR_GFX1036)
-#define TARGET_GFX1100 (gcn_arch == PROCESSOR_GFX1100)
-#define TARGET_GFX1103 (gcn_arch == PROCESSOR_GFX1103)
-
 /* Set in gcn_option_override.  */
 extern enum gcn_isa {
   ISA_UNKNOWN,
-  ISA_GCN3,
   ISA_GCN5,
   ISA_RDNA2,
   ISA_RDNA3,
   ISA_CDNA1,
-  ISA_CDNA2
+  ISA_CDNA2,
+  ISA_CDNA3
 } gcn_isa;
 
-#define TARGET_GCN3 (gcn_isa == ISA_GCN3)
-#define TARGET_GCN3_PLUS (gcn_isa >= ISA_GCN3)
 #define TARGET_GCN5 (gcn_isa == ISA_GCN5)
-#define TARGET_GCN5_PLUS (gcn_isa >= ISA_GCN5)
 #define TARGET_CDNA1 (gcn_isa == ISA_CDNA1)
 #define TARGET_CDNA1_PLUS (gcn_isa >= ISA_CDNA1)
 #define TARGET_CDNA2 (gcn_isa == ISA_CDNA2)
 #define TARGET_CDNA2_PLUS (gcn_isa >= ISA_CDNA2)
+#define TARGET_CDNA3 (gcn_isa == ISA_CDNA3)
+#define TARGET_CDNA3_PLUS (gcn_isa >= ISA_CDNA3)
 #define TARGET_RDNA2 (gcn_isa == ISA_RDNA2)
 #define TARGET_RDNA2_PLUS (gcn_isa >= ISA_RDNA2 && gcn_isa < ISA_CDNA1)
 #define TARGET_RDNA3 (gcn_isa == ISA_RDNA3)
 
 
-#define TARGET_M0_LDS_LIMIT (TARGET_GCN3)
 #define TARGET_PACKED_WORK_ITEMS (TARGET_CDNA2_PLUS || TARGET_RDNA3)
 
-#define TARGET_XNACK (flag_xnack != HSACO_ATTR_OFF)
+#define TARGET_XNACK (flag_xnack == HSACO_ATTR_ON \
+		      || flag_xnack == HSACO_ATTR_ANY)
 
 enum hsaco_attr_type
 {
+  HSACO_ATTR_UNSUPPORTED,
   HSACO_ATTR_OFF,
   HSACO_ATTR_ON,
   HSACO_ATTR_ANY,
   HSACO_ATTR_DEFAULT
 };
 
-/* There are global address instructions.  */
-#define TARGET_GLOBAL_ADDRSPACE TARGET_GCN5_PLUS
 /* Device has an AVGPR register file.  */
 #define TARGET_AVGPRS TARGET_CDNA1_PLUS
 /* There are load/store instructions for AVGPRS.  */
 #define TARGET_AVGPR_MEMOPS TARGET_CDNA2_PLUS
 /* AVGPRS may have their own register file, or be combined with VGPRS.  */
 #define TARGET_AVGPR_COMBINED TARGET_CDNA2_PLUS
-/* flat_load/store allows offsets.  */
-#define TARGET_FLAT_OFFSETS TARGET_GCN5_PLUS
 /* global_load/store has reduced offset.  */
 #define TARGET_11BIT_GLOBAL_OFFSET TARGET_RDNA2_PLUS
 /* The work item details are all encoded into v0.  */
 //#define TARGET_PACKED_WORK_ITEMS TARGET_PACKED_WORK_ITEMS
-/* m0 must be initialized in order to use LDS.  */
-//#define TARGET_M0_LDS_LIMIT TARGET_M0_LDS_LIMIT
 /* CDNA2 load/store costs are reduced.
  * TODO: what does this mean?  */
 #define TARGET_CDNA2_MEM_COSTS TARGET_CDNA2_PLUS
@@ -107,21 +84,26 @@ enum hsaco_attr_type
 #define TARGET_DPP8 TARGET_RDNA2_PLUS
 /* Device requires CDNA1-style manually inserted wait states for AVGPRs.  */
 #define TARGET_AVGPR_CDNA1_NOPS TARGET_CDNA1
+/* Whether to use the 'globally coherent' (glc) or the 'scope' (sc0) flag
+   for non-scalar memory operations. The string starts on purpose with a space.
+   Note: for scalar memory operations (i.e. 's_...'), 'glc' is still used.
+   CDNA3 also uses 'nt' instead of 'slc' and 'sc1' instead of 'scc'; however,
+   there is no non-scalar user so far.  */
+#define TARGET_GLC_NAME (TARGET_CDNA3 ? " sc0" : " glc")
 /* The metadata on different devices need different granularity.  */
 #define TARGET_VGPR_GRANULARITY \
   (TARGET_RDNA3 ? 12 \
    : TARGET_RDNA2_PLUS || TARGET_CDNA2_PLUS ? 8 \
    : 4)
 /* This mostly affects the metadata.  */
-#define TARGET_ARCHITECTED_FLAT_SCRATCH TARGET_RDNA3
-/* Assembler uses s_add_co not just s_add.  */
-#define TARGET_EXPLICIT_CARRY TARGET_GCN5_PLUS
-/* mulsi3 permits immediate.  */
-#define TARGET_MULTIPLY_IMMEDIATE TARGET_GCN5_PLUS
+#define TARGET_ARCHITECTED_FLAT_SCRATCH (TARGET_RDNA3 || TARGET_CDNA3)
 /* Device has Sub-DWord Addressing instrucions.  */
 #define TARGET_SDWA (!TARGET_RDNA3)
 /* Different devices uses different cache control instructions.  */
-#define TARGET_WBINVL1_CACHE (!TARGET_RDNA2_PLUS)
+#define TARGET_WBINVL1_CACHE (!TARGET_RDNA2_PLUS && !TARGET_CDNA3)
 #define TARGET_GLn_CACHE TARGET_RDNA2_PLUS
+#define TARGET_TARGET_SC_CACHE TARGET_CDNA3
+/* Some devices have TGSPLIT, which needs at least metadata.  */
+#define TARGET_TGSPLIT TARGET_CDNA2_PLUS
 
 #endif
diff --git a/gcc/config/gcn/gcn-protos.h b/gcc/config/gcn/gcn-protos.h
index 654da79..1e513ba 100644
--- a/gcc/config/gcn/gcn-protos.h
+++ b/gcc/config/gcn/gcn-protos.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2016-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2016-2025 Free Software Foundation, Inc.
 
    This file is free software; you can redistribute it and/or modify it under
    the terms of the GNU General Public License as published by the Free
@@ -18,6 +18,8 @@
 #define _GCN_PROTOS_
 
 extern void gcn_asm_output_symbol_ref (FILE *file, rtx x);
+extern void gcn_asm_weaken_decl (FILE *stream, tree decl, const char *name,
+				 const char *value);
 extern tree gcn_builtin_decl (unsigned code, bool initialize_p);
 extern bool gcn_can_split_p (machine_mode, rtx);
 extern bool gcn_constant64_p (rtx);
diff --git a/gcc/config/gcn/gcn-run.cc b/gcc/config/gcn/gcn-run.cc
index 2f3ed2d..ff72398 100644
--- a/gcc/config/gcn/gcn-run.cc
+++ b/gcc/config/gcn/gcn-run.cc
@@ -1,7 +1,7 @@
 /* Run a stand-alone AMD GCN kernel.
 
    Copyright 2017 Mentor Graphics Corporation
-   Copyright (C) 2018-2024 Free Software Foundation, Inc.
+   Copyright (C) 2018-2025 Free Software Foundation, Inc.
 
    This program is free software: you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -426,7 +426,7 @@ load_image (const char *filename)
 
   /* Locate the "_init_array" function, and read the kernel's properties.  */
   hsa_executable_symbol_t symbol;
-  XHSA (hsa_fns.hsa_executable_get_symbol_fn (executable, NULL, 
+  XHSA (hsa_fns.hsa_executable_get_symbol_fn (executable, NULL,
 					      "_init_array.kd", device, 0,
 					      &symbol),
 	"Find '_init_array' function");
diff --git a/gcc/config/gcn/gcn-tables.opt b/gcc/config/gcn/gcn-tables.opt
new file mode 100644
index 0000000..4a381b3
--- /dev/null
+++ b/gcc/config/gcn/gcn-tables.opt
@@ -0,0 +1,112 @@
+; -*- buffer-read-only: t -*-
+; Generated automatically by gen-opt-tables.awk from gcn-devices.def.
+; Do not edit.
+
+; Copyright (C) 2024-2025 Free Software Foundation, Inc.
+
+; This file is part of GCC.
+
+; GCC is free software; you can redistribute it and/or modify
+; it under the terms of the GNU General Public License as
+; published by the Free Software Foundation; either version 3,
+; or (at your option) any later version.
+
+; GCC is distributed in the hope that it will be useful,
+; but WITHOUT ANY WARRANTY; without even the implied warranty of
+; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+; GNU General Public License for more details.
+
+; You should have received a copy of the GNU General Public
+; License along with GCC; see the file COPYING3.  If not see
+; <http://www.gnu.org/licenses/>.
+
+Enum
+Name(gpu_type) Type(enum processor_type)
+GCN GPU type to use:
+
+EnumValue
+Enum(gpu_type) String(gfx900) Value(PROCESSOR_GFX900)
+
+EnumValue
+Enum(gpu_type) String(gfx902) Value(PROCESSOR_GFX902)
+
+EnumValue
+Enum(gpu_type) String(gfx904) Value(PROCESSOR_GFX904)
+
+EnumValue
+Enum(gpu_type) String(gfx906) Value(PROCESSOR_GFX906)
+
+EnumValue
+Enum(gpu_type) String(gfx908) Value(PROCESSOR_GFX908)
+
+EnumValue
+Enum(gpu_type) String(gfx909) Value(PROCESSOR_GFX909)
+
+EnumValue
+Enum(gpu_type) String(gfx90a) Value(PROCESSOR_GFX90A)
+
+EnumValue
+Enum(gpu_type) String(gfx90c) Value(PROCESSOR_GFX90C)
+
+EnumValue
+Enum(gpu_type) String(gfx942) Value(PROCESSOR_GFX942)
+
+EnumValue
+Enum(gpu_type) String(gfx950) Value(PROCESSOR_GFX950)
+
+EnumValue
+Enum(gpu_type) String(gfx9-generic) Value(PROCESSOR_GFX9_GENERIC)
+
+EnumValue
+Enum(gpu_type) String(gfx9-4-generic) Value(PROCESSOR_GFX9_4_GENERIC)
+
+EnumValue
+Enum(gpu_type) String(gfx1030) Value(PROCESSOR_GFX1030)
+
+EnumValue
+Enum(gpu_type) String(gfx1031) Value(PROCESSOR_GFX1031)
+
+EnumValue
+Enum(gpu_type) String(gfx1032) Value(PROCESSOR_GFX1032)
+
+EnumValue
+Enum(gpu_type) String(gfx1033) Value(PROCESSOR_GFX1033)
+
+EnumValue
+Enum(gpu_type) String(gfx1034) Value(PROCESSOR_GFX1034)
+
+EnumValue
+Enum(gpu_type) String(gfx1035) Value(PROCESSOR_GFX1035)
+
+EnumValue
+Enum(gpu_type) String(gfx1036) Value(PROCESSOR_GFX1036)
+
+EnumValue
+Enum(gpu_type) String(gfx10-3-generic) Value(PROCESSOR_GFX10_3_GENERIC)
+
+EnumValue
+Enum(gpu_type) String(gfx1100) Value(PROCESSOR_GFX1100)
+
+EnumValue
+Enum(gpu_type) String(gfx1101) Value(PROCESSOR_GFX1101)
+
+EnumValue
+Enum(gpu_type) String(gfx1102) Value(PROCESSOR_GFX1102)
+
+EnumValue
+Enum(gpu_type) String(gfx1103) Value(PROCESSOR_GFX1103)
+
+EnumValue
+Enum(gpu_type) String(gfx1150) Value(PROCESSOR_GFX1150)
+
+EnumValue
+Enum(gpu_type) String(gfx1151) Value(PROCESSOR_GFX1151)
+
+EnumValue
+Enum(gpu_type) String(gfx1152) Value(PROCESSOR_GFX1152)
+
+EnumValue
+Enum(gpu_type) String(gfx1153) Value(PROCESSOR_GFX1153)
+
+EnumValue
+Enum(gpu_type) String(gfx11-generic) Value(PROCESSOR_GFX11_GENERIC)
diff --git a/gcc/config/gcn/gcn-tables.opt.urls b/gcc/config/gcn/gcn-tables.opt.urls
new file mode 100644
index 0000000..b13ed90
--- /dev/null
+++ b/gcc/config/gcn/gcn-tables.opt.urls
@@ -0,0 +1,2 @@
+; Autogenerated by regenerate-opt-urls.py from gcc/config/gcn/gcn-tables.opt and generated HTML
+
diff --git a/gcc/config/gcn/gcn-tree.cc b/gcc/config/gcn/gcn-tree.cc
index 6a7485a..4ae29bd 100644
--- a/gcc/config/gcn/gcn-tree.cc
+++ b/gcc/config/gcn/gcn-tree.cc
@@ -1,17 +1,17 @@
-/* Copyright (C) 2017-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2017-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
-   
+
    GCC is free software; you can redistribute it and/or modify it under
    the terms of the GNU General Public License as published by the Free
    Software Foundation; either version 3, or (at your option) any later
    version.
-   
+
    GCC is distributed in the hope that it will be useful, but WITHOUT ANY
    WARRANTY; without even the implied warranty of MERCHANTABILITY or
    FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
    for more details.
-   
+
    You should have received a copy of the GNU General Public License
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
@@ -184,7 +184,7 @@ gcn_lockless_update (location_t loc, gimple_stmt_iterator *gsi,
 }
 
 /* Helper function for gcn_reduction_update.
-   
+
    Insert code to lockfully update *PTR with *PTR OP VAR just before
    GSI.  This is necessary for types larger than 64 bits, where there
    is no cmp&swap instruction to implement a lockless scheme.  We use
@@ -488,7 +488,7 @@ gcn_goacc_reduction_teardown (gcall *call)
 }
 
 /* Implement TARGET_GOACC_REDUCTION.
- 
+
    Expand calls to the GOACC REDUCTION internal function, into a sequence of
    gimple instructions.  */
 
diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md
index b24cf9b..4b21302 100644
--- a/gcc/config/gcn/gcn-valu.md
+++ b/gcc/config/gcn/gcn-valu.md
@@ -1,4 +1,4 @@
-;; Copyright (C) 2016-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2016-2025 Free Software Foundation, Inc.
 
 ;; This file is free software; you can redistribute it and/or modify it under
 ;; the terms of the GNU General Public License as published by the Free
@@ -452,7 +452,7 @@
   [(set (match_operand:V_1REG 0 "nonimmediate_operand")
 	(match_operand:V_1REG 1 "general_operand"))]
   ""
-  {@ [cons: =0, 1; attrs: type, length, gcn_version]
+  {@ [cons: =0, 1; attrs: type, length, cdna]
   [v  ,vA;vop1     ,4,*    ] v_mov_b32\t%0, %1
   [v  ,B ;vop1     ,8,*    ] ^
   [v  ,a ;vop3p_mai,8,*    ] v_accvgpr_read_b32\t%0, %1
@@ -519,7 +519,7 @@
        return \"v_accvgpr_mov_b32\t%H0, %H1\;v_accvgpr_mov_b32\t%L0, %L1\";"
   [(set_attr "type" "vmult,vmult,vmult,vmult")
    (set_attr "length" "16,16,16,8")
-   (set_attr "gcn_version" "*,*,*,cdna2")])
+   (set_attr "cdna" "*,*,*,cdna2")])
 
 (define_insn "mov<mode>_exec"
   [(set (match_operand:V_2REG 0 "nonimmediate_operand" "= v,   v,   v, v, m")
@@ -565,7 +565,7 @@
   [(set (match_operand:V_4REG 0 "nonimmediate_operand")
 	(match_operand:V_4REG 1 "general_operand"))]
   ""
-  {@ [cons: =0, 1; attrs: type, length, gcn_version]
+  {@ [cons: =0, 1; attrs: type, length, cdna]
   [v ,vDB;vmult,16,*    ]           v_mov_b32\t%L0, %L1\;          v_mov_b32\t%H0, %H1\;          v_mov_b32\t%J0, %J1\;          v_mov_b32\t%K0, %K1
   [v ,a  ;vmult,32,*    ]  v_accvgpr_read_b32\t%L0, %L1\; v_accvgpr_read_b32\t%H0, %H1\; v_accvgpr_read_b32\t%J0, %J1\; v_accvgpr_read_b32\t%K0, %K1
   [$a,v  ;vmult,32,*    ] v_accvgpr_write_b32\t%L0, %L1\;v_accvgpr_write_b32\t%H0, %H1\;v_accvgpr_write_b32\t%J0, %J1\;v_accvgpr_write_b32\t%K0, %K1
@@ -662,7 +662,7 @@
 	  UNSPEC_SGPRBASE))
    (clobber (match_operand:<VnDI> 2 "register_operand"))]
   "lra_in_progress || reload_completed"
-  {@ [cons: =0, 1, =2; attrs: type, length, gcn_version]
+  {@ [cons: =0, 1, =2; attrs: type, length, cdna]
   [v,vA,&v;vop1,4 ,*    ] v_mov_b32\t%0, %1
   [v,vB,&v;vop1,8 ,*    ] ^
   [v,m ,&v;*   ,12,*    ] #
@@ -689,7 +689,7 @@
    #"
   [(set_attr "type" "vmult,*,*,*,*")
    (set_attr "length" "8,12,12,12,12")
-   (set_attr "gcn_version" "*,*,*,cdna2,cdna2")])
+   (set_attr "cdna" "*,*,*,cdna2,cdna2")])
 
 (define_insn "@mov<mode>_sgprbase"
   [(set (match_operand:V_4REG 0 "nonimmediate_operand")
@@ -1156,23 +1156,16 @@
 	   (mem:BLK (scratch))]
 	  UNSPEC_GATHER))]
   "(AS_FLAT_P (INTVAL (operands[3]))
-    && ((TARGET_GCN3 && INTVAL(operands[2]) == 0)
-	|| ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x1000)))
-    || (AS_GLOBAL_P (INTVAL (operands[3]))
-	&& (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
+    && ((unsigned HOST_WIDE_INT)INTVAL(operands[2]) < 0x1000))
+   || (AS_GLOBAL_P (INTVAL (operands[3]))
+       && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
   {
     addr_space_t as = INTVAL (operands[3]);
-    const char *glc = INTVAL (operands[4]) ? " glc" : "";
+    const char *glc = INTVAL (operands[4]) ? TARGET_GLC_NAME : "";
 
     static char buf[200];
     if (AS_FLAT_P (as))
-      {
-	if (TARGET_FLAT_OFFSETS)
-	  sprintf (buf, "flat_load%%o0\t%%0, %%1 offset:%%2%s\;s_waitcnt\t0",
-		   glc);
-	else
-	  sprintf (buf, "flat_load%%o0\t%%0, %%1%s\;s_waitcnt\t0", glc);
-      }
+      sprintf (buf, "flat_load%%o0\t%%0, %%1 offset:%%2%s\;s_waitcnt\t0", glc);
     else if (AS_GLOBAL_P (as))
       sprintf (buf, "global_load%%o0\t%%0, %%1, off offset:%%2%s\;"
 	       "s_waitcnt\tvmcnt(0)", glc);
@@ -1183,7 +1176,7 @@
   }
   [(set_attr "type" "flat")
    (set_attr "length" "12")
-   (set_attr "gcn_version" "*,cdna2,*,cdna2")
+   (set_attr "cdna" "*,cdna2,*,cdna2")
    (set_attr "xnack" "off,off,on,on")])
 
 (define_insn "gather<mode>_insn_1offset_ds<exec>"
@@ -1207,7 +1200,7 @@
   }
   [(set_attr "type" "ds")
    (set_attr "length" "12")
-   (set_attr "gcn_version" "*,cdna2")])
+   (set_attr "cdna" "*,cdna2")])
 
 (define_insn "gather<mode>_insn_2offsets<exec>"
   [(set (match_operand:V_MOV 0 "register_operand"		"=v,a,&v,&a")
@@ -1228,7 +1221,7 @@
     && (((unsigned HOST_WIDE_INT)INTVAL(operands[3]) + 0x1000) < 0x2000))"
   {
     addr_space_t as = INTVAL (operands[4]);
-    const char *glc = INTVAL (operands[5]) ? " glc" : "";
+    const char *glc = INTVAL (operands[5]) ? TARGET_GLC_NAME : "";
 
     static char buf[200];
     if (AS_GLOBAL_P (as))
@@ -1241,7 +1234,7 @@
   }
   [(set_attr "type" "flat")
    (set_attr "length" "12")
-   (set_attr "gcn_version" "*,cdna2,*,cdna2")
+   (set_attr "cdna" "*,cdna2,*,cdna2")
    (set_attr "xnack" "off,off,on,on")])
 
 (define_expand "scatter_store<mode><vnsi>"
@@ -1290,22 +1283,16 @@
 	  UNSPEC_SCATTER))]
   "(AS_FLAT_P (INTVAL (operands[3]))
     && (INTVAL(operands[1]) == 0
-	|| (TARGET_FLAT_OFFSETS
-	    && (unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x1000)))
+	|| ((unsigned HOST_WIDE_INT)INTVAL(operands[1]) < 0x1000)))
     || (AS_GLOBAL_P (INTVAL (operands[3]))
 	&& (((unsigned HOST_WIDE_INT)INTVAL(operands[1]) + 0x1000) < 0x2000))"
   {
     addr_space_t as = INTVAL (operands[3]);
-    const char *glc = INTVAL (operands[4]) ? " glc" : "";
+    const char *glc = INTVAL (operands[4]) ? TARGET_GLC_NAME : "";
 
     static char buf[200];
     if (AS_FLAT_P (as))
-      {
-	if (TARGET_FLAT_OFFSETS)
 	  sprintf (buf, "flat_store%%s2\t%%0, %%2 offset:%%1%s", glc);
-	else
-	  sprintf (buf, "flat_store%%s2\t%%0, %%2%s", glc);
-      }
     else if (AS_GLOBAL_P (as))
       sprintf (buf, "global_store%%s2\t%%0, %%2, off offset:%%1%s", glc);
     else
@@ -1315,7 +1302,7 @@
   }
   [(set_attr "type" "flat")
    (set_attr "length" "12")
-   (set_attr "gcn_version" "*,cdna2")])
+   (set_attr "cdna" "*,cdna2")])
 
 (define_insn "scatter<mode>_insn_1offset_ds<exec_scatter>"
   [(set (mem:BLK (scratch))
@@ -1338,7 +1325,7 @@
   }
   [(set_attr "type" "ds")
    (set_attr "length" "12")
-   (set_attr "gcn_version" "*,cdna2")])
+   (set_attr "cdna" "*,cdna2")])
 
 (define_insn "scatter<mode>_insn_2offsets<exec_scatter>"
   [(set (mem:BLK (scratch))
@@ -1358,7 +1345,7 @@
     && (((unsigned HOST_WIDE_INT)INTVAL(operands[2]) + 0x1000) < 0x2000))"
   {
     addr_space_t as = INTVAL (operands[4]);
-    const char *glc = INTVAL (operands[5]) ? " glc" : "";
+    const char *glc = INTVAL (operands[5]) ? TARGET_GLC_NAME : "";
 
     static char buf[200];
     if (AS_GLOBAL_P (as))
@@ -1370,7 +1357,7 @@
   }
   [(set_attr "type" "flat")
    (set_attr "length" "12")
-   (set_attr "gcn_version" "*,cdna2")])
+   (set_attr "cdna" "*,cdna2")])
 
 ;; }}}
 ;; {{{ Permutations
@@ -1476,7 +1463,7 @@
    (clobber (reg:DI VCC_REG))]
   ""
   {@ [cons: =0, %1, 2; attrs: type, length]
-  [v,v,vSvA;vop2,4] v_add%^_u32\t%0, vcc, %2, %1
+  [v,v,vSvA;vop2,4] v_add_co_u32\t%0, vcc, %2, %1
   [v,v,vSvB;vop2,8] ^
   })
 
@@ -1489,7 +1476,7 @@
    (clobber (reg:DI VCC_REG))]
   ""
   {@ [cons: =0, 1, 2; attrs: type, length]
-  [v,v,SvA;vop2,4] v_add%^_u32\t%0, vcc, %2, %1
+  [v,v,SvA;vop2,4] v_add_co_u32\t%0, vcc, %2, %1
   [v,v,SvB;vop2,8] ^
   })
 
@@ -1503,7 +1490,7 @@
 		(match_dup 1)))]
   ""
   {@ [cons: =0, %1, 2, =3; attrs: type, length]
-  [v,v,vSvA,cV;vop2 ,4] v_add%^_u32\t%0, %3, %2, %1
+  [v,v,vSvA,cV;vop2 ,4] v_add_co_u32\t%0, %3, %2, %1
   [v,v,vSvB,cV;vop2 ,8] ^
   [v,v,vSvA,Sg;vop3b,8] ^
   })
@@ -1523,7 +1510,7 @@
 		(vec_duplicate:V_SI (match_dup 2))))]
   ""
   {@ [cons: =0, 1, 2, =3; attrs: type, length]
-  [v,SvA,v,cV;vop2 ,4] v_add%^_u32\t%0, %3, %1, %2
+  [v,SvA,v,cV;vop2 ,4] v_add_co_u32\t%0, %3, %1, %2
   [v,SvB,v,cV;vop2 ,8] ^
   [v,SvA,v,Sg;vop3b,8] ^
   })
@@ -1560,7 +1547,7 @@
 			  (match_dup 1))
 			(match_dup 1))))]
   ""
-  "{v_addc%^_u32|v_add_co_ci_u32}\t%0, %4, %2, %1, %3"
+  "{v_addc_co_u32|v_add_co_ci_u32}\t%0, %4, %2, %1, %3"
   [(set_attr "type" "vop2,vop3b")
    (set_attr "length" "4,8")])
 
@@ -1572,8 +1559,8 @@
    (clobber (reg:DI VCC_REG))]
   ""
   "@
-   v_sub%^_u32\t%0, vcc, %1, %2
-   v_subrev%^_u32\t%0, vcc, %2, %1"
+   v_sub_co_u32\t%0, vcc, %1, %2
+   v_subrev_co_u32\t%0, vcc, %2, %1"
   [(set_attr "type" "vop2")
    (set_attr "length" "8,8")])
 
@@ -1587,10 +1574,10 @@
 		(match_dup 1)))]
   ""
   "@
-   v_sub%^_u32\t%0, %3, %1, %2
-   v_sub%^_u32\t%0, %3, %1, %2
-   v_subrev%^_u32\t%0, %3, %2, %1
-   v_subrev%^_u32\t%0, %3, %2, %1"
+   v_sub_co_u32\t%0, %3, %1, %2
+   v_sub_co_u32\t%0, %3, %1, %2
+   v_subrev_co_u32\t%0, %3, %2, %1
+   v_subrev_co_u32\t%0, %3, %2, %1"
   [(set_attr "type" "vop2,vop3b,vop2,vop3b")
    (set_attr "length" "8")])
 
@@ -1625,10 +1612,10 @@
 			(match_dup 1))))]
   ""
   "@
-   {v_subb%^_u32|v_sub_co_ci_u32}\t%0, %4, %1, %2, %3
-   {v_subb%^_u32|v_sub_co_ci_u32}\t%0, %4, %1, %2, %3
-   {v_subbrev%^_u32|v_subrev_co_ci_u32}\t%0, %4, %2, %1, %3
-   {v_subbrev%^_u32|v_subrev_co_ci_u32}\t%0, %4, %2, %1, %3"
+   {v_subb_co_u32|v_sub_co_ci_u32}\t%0, %4, %1, %2, %3
+   {v_subb_co_u32|v_sub_co_ci_u32}\t%0, %4, %1, %2, %3
+   {v_subbrev_co_u32|v_subrev_co_ci_u32}\t%0, %4, %2, %1, %3
+   {v_subbrev_co_u32|v_subrev_co_ci_u32}\t%0, %4, %2, %1, %3"
   [(set_attr "type" "vop2,vop3b,vop2,vop3b")
    (set_attr "length" "4,8,4,8")])
 
@@ -4002,7 +3989,8 @@
 (define_expand "maskload<mode>di"
   [(match_operand:V_MOV 0 "register_operand")
    (match_operand:V_MOV 1 "memory_operand")
-   (match_operand 2 "")]
+   (match_operand 2 "")
+   (match_operand:V_MOV 3 "maskload_else_operand")]
   ""
   {
     rtx exec = force_reg (DImode, operands[2]);
@@ -4011,11 +3999,8 @@
     rtx as = gen_rtx_CONST_INT (VOIDmode, MEM_ADDR_SPACE (operands[1]));
     rtx v = gen_rtx_CONST_INT (VOIDmode, MEM_VOLATILE_P (operands[1]));
 
-    /* Masked lanes are required to hold zero.  */
-    emit_move_insn (operands[0], gcn_vec_constant (<MODE>mode, 0));
-
     emit_insn (gen_gather<mode>_expr_exec (operands[0], addr, as, v,
-					   operands[0], exec));
+					   gcn_gen_undef (<MODE>mode), exec));
     DONE;
   })
 
@@ -4040,7 +4025,8 @@
    (match_operand:<VnSI> 2 "register_operand")
    (match_operand 3 "immediate_operand")
    (match_operand:SI 4 "gcn_alu_operand")
-   (match_operand:DI 5 "")]
+   (match_operand:DI 5 "")
+   (match_operand:V_MOV 6 "maskload_else_operand")]
   ""
   {
     rtx exec = force_reg (DImode, operands[5]);
@@ -4049,19 +4035,20 @@
 					  operands[2], operands[4],
 					  INTVAL (operands[3]), exec);
 
-    /* Masked lanes are required to hold zero.  */
-    emit_move_insn (operands[0], gcn_vec_constant (<MODE>mode, 0));
-
     if (GET_MODE (addr) == <VnDI>mode)
       emit_insn (gen_gather<mode>_insn_1offset_exec (operands[0], addr,
 						     const0_rtx, const0_rtx,
-						     const0_rtx, operands[0],
+						     const0_rtx,
+						     gcn_gen_undef
+							(<MODE>mode),
 						     exec));
     else
       emit_insn (gen_gather<mode>_insn_2offsets_exec (operands[0], operands[1],
 						      addr, const0_rtx,
 						      const0_rtx, const0_rtx,
-						      operands[0], exec));
+						      gcn_gen_undef
+							(<MODE>mode),
+						      exec));
     DONE;
   })
 
@@ -4297,10 +4284,7 @@
 	   (match_operand:V_1REG 2 "register_operand" "v")
 	   (match_operand:SI 3 "const_int_operand"        "n")]
 	  REDUC_UNSPEC))]
-  ; GCN3 requires a carry out, GCN5 not
-  "!(TARGET_GCN3 && SCALAR_INT_MODE_P (<SCALAR_MODE>mode)
-     && <reduc_unspec> == UNSPEC_PLUS_DPP_SHR)
-   && TARGET_DPP_FULL"
+  "TARGET_DPP_FULL"
   {
     return gcn_expand_dpp_shr_insn (<MODE>mode, "<reduc_insn>",
 				    <reduc_unspec>, INTVAL (operands[3]));
@@ -4347,7 +4331,7 @@
    (clobber (reg:DI VCC_REG))]
   "TARGET_DPP_FULL"
   {
-    return gcn_expand_dpp_shr_insn (<VnSI>mode, "v_add%^_u32",
+    return gcn_expand_dpp_shr_insn (<VnSI>mode, "v_add_co_u32",
 				    UNSPEC_PLUS_CARRY_DPP_SHR,
 				    INTVAL (operands[3]));
   }
@@ -4365,7 +4349,7 @@
    (clobber (reg:DI VCC_REG))]
   "TARGET_DPP_FULL"
   {
-    return gcn_expand_dpp_shr_insn (<MODE>mode, "v_addc%^_u32",
+    return gcn_expand_dpp_shr_insn (<MODE>mode, "v_addc_co_u32",
 				    UNSPEC_PLUS_CARRY_IN_DPP_SHR,
 				    INTVAL (operands[3]));
   }
diff --git a/gcc/config/gcn/gcn.cc b/gcc/config/gcn/gcn.cc
index 17316a7..0ce5a29 100644
--- a/gcc/config/gcn/gcn.cc
+++ b/gcc/config/gcn/gcn.cc
@@ -1,4 +1,4 @@
-/* Copyright (C) 2016-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2016-2025 Free Software Foundation, Inc.
 
    This file is free software; you can redistribute it and/or modify it under
    the terms of the GNU General Public License as published by the Free
@@ -68,12 +68,12 @@ static bool ext_gcn_constants_init = 0;
 
 /* Holds the ISA variant, derived from the command line parameters.  */
 
-enum gcn_isa gcn_isa = ISA_GCN3;	/* Default to GCN3.  */
+enum gcn_isa gcn_isa = ISA_GCN5;	/* Default to GCN5.  */
 
 /* Reserve this much space for LDS (for propagating variables from
    worker-single mode to worker-partitioned mode), per workgroup.  Global
    analysis could calculate an exact bound, but we don't do that yet.
- 
+
    We want to permit full occupancy, so size accordingly.  */
 
 /* Use this as a default, but allow it to grow if the user requests a large
@@ -98,6 +98,16 @@ static hash_map<tree, int> lds_allocs;
 #define MAX_NORMAL_VGPR_COUNT	24
 #define MAX_NORMAL_AVGPR_COUNT	24
 
+/* Import all the data from gcn-devices.def.
+   The PROCESSOR_GFXnnn should be indices for this table.  */
+const struct gcn_device_def gcn_devices[] = {
+#define GCN_DEVICE(name, NAME, ELF, ISA, XNACK, SRAMECC, WAVE64, CU, VGPRS, \
+		   GEN_VER, ARCH_FAM, ...) \
+    {PROCESSOR_ ## NAME, #name, #NAME, ISA, XNACK, SRAMECC, WAVE64, CU, VGPRS, \
+     GEN_VER, #ARCH_FAM},
+#include "gcn-devices.def"
+};
+
 /* }}}  */
 /* {{{ Initialization and options.  */
 
@@ -118,7 +128,7 @@ gcn_init_machine_status (void)
 }
 
 /* Implement TARGET_OPTION_OVERRIDE.
- 
+
    Override option settings where defaults are variable, or we have specific
    needs to consider.  */
 
@@ -133,18 +143,8 @@ gcn_option_override (void)
   if (!flag_pic)
     flag_pic = flag_pie;
 
-  gcn_isa = (gcn_arch == PROCESSOR_FIJI ? ISA_GCN3
-      : gcn_arch == PROCESSOR_VEGA10 ? ISA_GCN5
-      : gcn_arch == PROCESSOR_VEGA20 ? ISA_GCN5
-      : gcn_arch == PROCESSOR_GFX908 ? ISA_CDNA1
-      : gcn_arch == PROCESSOR_GFX90a ? ISA_CDNA2
-      : gcn_arch == PROCESSOR_GFX90c ? ISA_GCN5
-      : gcn_arch == PROCESSOR_GFX1030 ? ISA_RDNA2
-      : gcn_arch == PROCESSOR_GFX1036 ? ISA_RDNA2
-      : gcn_arch == PROCESSOR_GFX1100 ? ISA_RDNA3
-      : gcn_arch == PROCESSOR_GFX1103 ? ISA_RDNA3
-      : ISA_UNKNOWN);
-  gcc_assert (gcn_isa != ISA_UNKNOWN);
+  gcc_assert (gcn_arch >= 0 && gcn_arch < PROCESSOR_COUNT);
+  gcn_isa = gcn_devices[gcn_arch].isa;
 
   /* Reserve 1Kb (somewhat arbitrarily) of LDS space for reduction results and
      worker broadcasts.  */
@@ -164,23 +164,14 @@ gcn_option_override (void)
 	acc_lds_size = 32768;
     }
 
-  /* gfx803 "Fiji", gfx1030 and gfx1100 do not support XNACK.  */
-  if (gcn_arch == PROCESSOR_FIJI
-      || gcn_arch == PROCESSOR_GFX1030
-      || gcn_arch == PROCESSOR_GFX1036
-      || gcn_arch == PROCESSOR_GFX1100
-      || gcn_arch == PROCESSOR_GFX1103)
+  /* gfx1030 and gfx1100 do not support XNACK.  */
+  if (gcn_devices[gcn_arch].xnack_default == HSACO_ATTR_UNSUPPORTED)
     {
       if (flag_xnack == HSACO_ATTR_ON)
 	error ("%<-mxnack=on%> is incompatible with %<-march=%s%>",
-	       (gcn_arch == PROCESSOR_FIJI ? "fiji"
-		: gcn_arch == PROCESSOR_GFX1030 ? "gfx1030"
-		: gcn_arch == PROCESSOR_GFX1036 ? "gfx1036"
-		: gcn_arch == PROCESSOR_GFX1100 ? "gfx1100"
-		: gcn_arch == PROCESSOR_GFX1103 ? "gfx1103"
-		: NULL));
-      /* Allow HSACO_ATTR_ANY silently because that's the default.  */
-      flag_xnack = HSACO_ATTR_OFF;
+	       gcn_devices[gcn_arch].name);
+      /* Allow HSACO_ATTR_ANY silently.  */
+      flag_xnack = HSACO_ATTR_UNSUPPORTED;
     }
 
   /* There's no need for XNACK on devices without USM, and there are register
@@ -188,24 +179,10 @@ gcn_option_override (void)
      available.
      FIXME: can the regalloc mean the default can be really "any"?  */
   if (flag_xnack == HSACO_ATTR_DEFAULT)
-    switch (gcn_arch)
-      {
-      case PROCESSOR_FIJI:
-      case PROCESSOR_VEGA10:
-      case PROCESSOR_VEGA20:
-      case PROCESSOR_GFX908:
-	flag_xnack = HSACO_ATTR_OFF;
-	break;
-      case PROCESSOR_GFX90a:
-      case PROCESSOR_GFX90c:
-	flag_xnack = HSACO_ATTR_ANY;
-	break;
-      default:
-	gcc_unreachable ();
-      }
+    flag_xnack = gcn_devices[gcn_arch].xnack_default;
 
   if (flag_sram_ecc == HSACO_ATTR_DEFAULT)
-    flag_sram_ecc = HSACO_ATTR_ANY;
+    flag_sram_ecc = gcn_devices[gcn_arch].sramecc_default;
 }
 
 /* }}}  */
@@ -270,7 +247,7 @@ static const long default_requested_args
 
 /* Extract parameter settings from __attribute__((amdgpu_hsa_kernel ())).
    This function also sets the default values for some arguments.
- 
+
    Return true on success, with ARGS populated.  */
 
 static bool
@@ -367,7 +344,7 @@ gcn_parse_amdgpu_hsa_kernel_attribute (struct gcn_kernel_args *args,
 }
 
 /* Referenced by TARGET_ATTRIBUTE_TABLE.
- 
+
    Validates target specific attributes.  */
 
 static tree
@@ -397,7 +374,7 @@ gcn_handle_amdgpu_hsa_kernel_attribute (tree *node, tree name,
 }
 
 /* Implement TARGET_ATTRIBUTE_TABLE.
- 
+
    Create target-specific __attribute__ types.  */
 
 TARGET_GNU_ATTRIBUTES (gcn_attribute_table, {
@@ -515,7 +492,7 @@ VnMODE (int n, machine_mode mode)
 }
 
 /* Implement TARGET_CLASS_MAX_NREGS.
- 
+
    Return the number of hard registers needed to hold a value of MODE in
    a register of class RCLASS.  */
 
@@ -550,7 +527,7 @@ gcn_class_max_nregs (reg_class_t rclass, machine_mode mode)
 }
 
 /* Implement TARGET_HARD_REGNO_NREGS.
-   
+
    Return the number of hard registers needed to hold a value of MODE in
    REGNO.  */
 
@@ -561,7 +538,7 @@ gcn_hard_regno_nregs (unsigned int regno, machine_mode mode)
 }
 
 /* Implement TARGET_HARD_REGNO_MODE_OK.
-   
+
    Return true if REGNO can hold value in MODE.  */
 
 bool
@@ -608,9 +585,8 @@ gcn_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
     case XNACK_MASK_HI_REG:
     case TBA_HI_REG:
     case TMA_HI_REG:
-      return mode == SImode;
     case VCC_HI_REG:
-      return false;
+      return mode == SImode;
     case EXEC_HI_REG:
       return mode == SImode /*|| mode == V32BImode */ ;
     case SCC_REG:
@@ -642,7 +618,7 @@ gcn_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
 }
 
 /* Implement REGNO_REG_CLASS via gcn.h.
-   
+
    Return smallest class containing REGNO.  */
 
 enum reg_class
@@ -677,7 +653,7 @@ gcn_regno_reg_class (int regno)
 }
 
 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
-   
+
    GCC assumes that lowpart contains first part of value as stored in memory.
    This is not the case for vector registers.  */
 
@@ -709,7 +685,7 @@ gcn_can_change_mode_class (machine_mode from, machine_mode to,
 }
 
 /* Implement TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P.
-   
+
    When this hook returns true for MODE, the compiler allows
    registers explicitly used in the rtl to be used as spill registers
    but prevents the compiler from extending the lifetime of these
@@ -723,7 +699,7 @@ gcn_small_register_classes_for_mode_p (machine_mode mode)
 }
 
 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
- 
+
    Returns true if pseudos that have been assigned to registers of class RCLASS
    would likely be spilled because registers of RCLASS are needed for spill
    registers.  */
@@ -736,7 +712,7 @@ gcn_class_likely_spilled_p (reg_class_t rclass)
 }
 
 /* Implement TARGET_MODES_TIEABLE_P.
- 
+
    Returns true if a value of MODE1 is accessible in MODE2 without
    copying.  */
 
@@ -758,7 +734,7 @@ gcn_modes_tieable_p (machine_mode mode1, machine_mode mode2)
 }
 
 /* Implement TARGET_TRULY_NOOP_TRUNCATION.
- 
+
    Returns true if it is safe to “convert” a value of INPREC bits to one of
    OUTPREC bits (where OUTPREC is smaller than INPREC) by merely operating on
    it as if it had only OUTPREC bits.  */
@@ -845,7 +821,7 @@ gcn_can_split_p (machine_mode, rtx op)
 }
 
 /* Implement TARGET_SPILL_CLASS.
-   
+
    Return class of registers which could be used for pseudo of MODE
    and of class RCLASS for spilling instead of memory.  Return NO_REGS
    if it is not possible or non-profitable.  */
@@ -861,7 +837,7 @@ gcn_spill_class (reg_class_t c, machine_mode /*mode */ )
 }
 
 /* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
-   
+
    Change allocno class for given pseudo from allocno and best class
    calculated by IRA.  */
 
@@ -1156,7 +1132,7 @@ gcn_constant64_p (rtx x)
 }
 
 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
- 
+
    Returns true if X is a legitimate constant for a MODE immediate operand.  */
 
 bool
@@ -1199,7 +1175,7 @@ gcn_vec_constant (machine_mode mode, int a)
   if (FLOAT_MODE_P (innermode))
     {
       REAL_VALUE_TYPE rv;
-      real_from_integer (&rv, NULL, a, SIGNED);
+      real_from_integer (&rv, VOIDmode, a, SIGNED);
       tem = const_double_from_real_value (rv, innermode);
     }
   else
@@ -1249,7 +1225,7 @@ gcn_gen_undef (machine_mode mode)
     GEN_VNM        - create accessor functions for all sizes of all modes
     GEN_VN_NOEXEC  - for insns without "_exec" variants
     GEN_VNM_NOEXEC - likewise
- 
+
     E.g.  add<mode>3
       GEN_VNM (add, 3, A(rtx dest, rtx s1, rtx s2), A(dest, s1, s2)
 
@@ -1532,8 +1508,7 @@ gcn_flat_address_p (rtx x, machine_mode mode)
   if (!vec_mode && gcn_vec_address_register_p (x, DImode, false))
     return true;
 
-  if (TARGET_FLAT_OFFSETS
-      && GET_CODE (x) == PLUS
+  if (GET_CODE (x) == PLUS
       && gcn_vec_address_register_p (XEXP (x, 0), DImode, false)
       && CONST_INT_P (XEXP (x, 1)))
     return true;
@@ -1631,7 +1606,7 @@ gcn_global_address_p (rtx addr)
 }
 
 /* Implement TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P.
-   
+
    Recognizes RTL expressions that are valid memory addresses for an
    instruction.  The MODE argument is the machine mode for the MEM
    expression that wants to use this address.
@@ -1644,10 +1619,6 @@ static bool
 gcn_addr_space_legitimate_address_p (machine_mode mode, rtx x, bool strict,
 				     addr_space_t as, code_helper = ERROR_MARK)
 {
-  /* All vector instructions need to work on addresses in registers.  */
-  if (!TARGET_FLAT_OFFSETS && (vgpr_vector_mode_p (mode) && !REG_P (x)))
-    return false;
-
   if (AS_SCALAR_FLAT_P (as))
     {
       if (mode == QImode || mode == HImode)
@@ -1693,15 +1664,13 @@ gcn_addr_space_legitimate_address_p (machine_mode mode, rtx x, bool strict,
     return gcn_address_register_p (x, SImode, strict);
   else if (AS_FLAT_P (as) || AS_FLAT_SCRATCH_P (as))
     {
-      if (!TARGET_FLAT_OFFSETS || GET_CODE (x) == REG)
+      if (GET_CODE (x) == REG)
        return ((GET_MODE_CLASS (mode) == MODE_VECTOR_INT
 		|| GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
 	       ? gcn_address_register_p (x, DImode, strict)
 	       : gcn_vec_address_register_p (x, DImode, strict));
       else
 	{
-	  gcc_assert (TARGET_FLAT_OFFSETS);
-
 	  if (GET_CODE (x) == PLUS)
 	    {
 	      rtx x1 = XEXP (x, 1);
@@ -1725,8 +1694,6 @@ gcn_addr_space_legitimate_address_p (machine_mode mode, rtx x, bool strict,
     }
   else if (AS_GLOBAL_P (as))
     {
-      gcc_assert (TARGET_FLAT_OFFSETS);
-
       if (GET_CODE (x) == REG)
        return (gcn_address_register_p (x, DImode, strict)
 	       || (!VECTOR_MODE_P (mode)
@@ -1819,7 +1786,7 @@ gcn_addr_space_legitimate_address_p (machine_mode mode, rtx x, bool strict,
 }
 
 /* Implement TARGET_ADDR_SPACE_POINTER_MODE.
-   
+
    Return the appropriate mode for a named address pointer.  */
 
 static scalar_int_mode
@@ -1842,7 +1809,7 @@ gcn_addr_space_pointer_mode (addr_space_t addrspace)
 }
 
 /* Implement TARGET_ADDR_SPACE_ADDRESS_MODE.
-   
+
    Return the appropriate mode for a named address space address.  */
 
 static scalar_int_mode
@@ -1852,7 +1819,7 @@ gcn_addr_space_address_mode (addr_space_t addrspace)
 }
 
 /* Implement TARGET_ADDR_SPACE_SUBSET_P.
-   
+
    Determine if one named address space is a subset of another.  */
 
 static bool
@@ -1930,7 +1897,7 @@ gcn_addr_space_debug (addr_space_t as)
 
 
 /* Implement REGNO_MODE_CODE_OK_FOR_BASE_P via gcn.h
-   
+
    Retun true if REGNO is OK for memory adressing.  */
 
 bool
@@ -1963,7 +1930,7 @@ gcn_regno_mode_code_ok_for_base_p (int regno,
 }
 
 /* Implement MODE_CODE_BASE_REG_CLASS via gcn.h.
-   
+
    Return a suitable register class for memory addressing.  */
 
 reg_class
@@ -1994,7 +1961,7 @@ gcn_mode_code_base_reg_class (machine_mode mode, addr_space_t as, int oc,
 }
 
 /* Implement REGNO_OK_FOR_INDEX_P via gcn.h.
-   
+
    Return true if REGNO is OK for index of memory addressing.  */
 
 bool
@@ -2203,7 +2170,7 @@ gcn_addr_space_legitimize_address (rtx x, rtx old, machine_mode mode,
     case ADDR_SPACE_FLAT:
     case ADDR_SPACE_FLAT_SCRATCH:
     case ADDR_SPACE_GLOBAL:
-      return !TARGET_FLAT_OFFSETS ? force_reg (DImode, x) : x;
+      return x;
     case ADDR_SPACE_LDS:
     case ADDR_SPACE_GDS:
       /* FIXME: LDS support offsets, handle them!.  */
@@ -2241,13 +2208,6 @@ gcn_expand_scalar_to_vector_address (machine_mode mode, rtx exec, rtx mem,
   rtx mem_base = XEXP (mem, 0);
   rtx mem_index = NULL_RTX;
 
-  if (!TARGET_FLAT_OFFSETS)
-    {
-      /* gcn_addr_space_legitimize_address should have put the address in a
-         register.  If not, it is too late to do anything about it.  */
-      gcc_assert (REG_P (mem_base));
-    }
-
   if (GET_CODE (mem_base) == PLUS)
     {
       mem_index = XEXP (mem_base, 1);
@@ -2669,7 +2629,7 @@ gcn_valid_move_p (machine_mode mode, rtx dest, rtx src)
 /* {{{ Functions and ABI.  */
 
 /* Implement TARGET_FUNCTION_VALUE.
-   
+
    Define how to find the value returned by a function.
    The register location is always the same, but the mode depends on
    VALTYPE.  */
@@ -2688,7 +2648,7 @@ gcn_function_value (const_tree valtype, const_tree, bool)
 }
 
 /* Implement TARGET_FUNCTION_VALUE_REGNO_P.
-   
+
    Return true if N is a possible register number for the function return
    value.  */
 
@@ -2734,7 +2694,7 @@ gcn_strict_argument_naming (cumulative_args_t cum_v)
 }
 
 /* Implement TARGET_PRETEND_OUTGOING_VARARGS_NAMED.
- 
+
    See comment on gcn_strict_argument_naming.  */
 
 static bool
@@ -2744,7 +2704,7 @@ gcn_pretend_outgoing_varargs_named (cumulative_args_t cum_v)
 }
 
 /* Implement TARGET_FUNCTION_ARG.
- 
+
    Return an RTX indicating whether a function argument is passed in a register
    and if so, which register.  */
 
@@ -2806,7 +2766,7 @@ gcn_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
 }
 
 /* Implement TARGET_FUNCTION_ARG_ADVANCE.
- 
+
    Updates the summarizer variable pointed to by CUM_V to advance past an
    argument in the argument list.  */
 
@@ -2844,7 +2804,7 @@ gcn_function_arg_advance (cumulative_args_t cum_v,
 }
 
 /* Implement TARGET_ARG_PARTIAL_BYTES.
- 
+
    Returns the number of bytes at the beginning of an argument that must be put
    in registers.  The value must be zero for arguments that are passed entirely
    in registers or that are entirely pushed on the stack.  */
@@ -2896,7 +2856,7 @@ gcn_detect_incoming_pointer_arg (tree fndecl)
 }
 
 /* Implement INIT_CUMULATIVE_ARGS, via gcn.h.
-   
+
    Initialize a variable CUM of type CUMULATIVE_ARGS for a call to a function
    whose data type is FNTYPE.  For a library call, FNTYPE is 0.  */
 
@@ -2973,7 +2933,7 @@ gcn_return_in_memory (const_tree type, const_tree ARG_UNUSED (fntype))
 }
 
 /* Implement TARGET_PROMOTE_FUNCTION_MODE.
- 
+
    Return the mode to use for outgoing function arguments.  */
 
 machine_mode
@@ -2989,7 +2949,7 @@ gcn_promote_function_mode (const_tree ARG_UNUSED (type), machine_mode mode,
 }
 
 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.
-   
+
    Derived from hppa_gimplify_va_arg_expr.  The generic routine doesn't handle
    ARGS_GROW_DOWNWARDS.  */
 
@@ -3050,27 +3010,7 @@ gcn_omp_device_kind_arch_isa (enum omp_device_kind_arch_isa trait,
     case omp_device_arch:
       return strcmp (name, "amdgcn") == 0 || strcmp (name, "gcn") == 0;
     case omp_device_isa:
-      if (strcmp (name, "fiji") == 0 || strcmp (name, "gfx803") == 0)
-	return gcn_arch == PROCESSOR_FIJI;
-      if (strcmp (name, "gfx900") == 0)
-	return gcn_arch == PROCESSOR_VEGA10;
-      if (strcmp (name, "gfx906") == 0)
-	return gcn_arch == PROCESSOR_VEGA20;
-      if (strcmp (name, "gfx908") == 0)
-	return gcn_arch == PROCESSOR_GFX908;
-      if (strcmp (name, "gfx90a") == 0)
-	return gcn_arch == PROCESSOR_GFX90a;
-      if (strcmp (name, "gfx90c") == 0)
-	return gcn_arch == PROCESSOR_GFX90c;
-      if (strcmp (name, "gfx1030") == 0)
-	return gcn_arch == PROCESSOR_GFX1030;
-      if (strcmp (name, "gfx1036") == 0)
-	return gcn_arch == PROCESSOR_GFX1036;
-      if (strcmp (name, "gfx1100") == 0)
-	return gcn_arch == PROCESSOR_GFX1100;
-      if (strcmp (name, "gfx1103") == 0)
-	return gcn_arch == PROCESSOR_GFX1103;
-      return 0;
+      return strcmp (name, gcn_devices[gcn_arch].name) == 0;
     default:
       gcc_unreachable ();
     }
@@ -3114,7 +3054,7 @@ gcn_compute_frame_offsets (void)
 
 /* Insert code into the prologue or epilogue to store or load any
    callee-save register to/from the stack.
- 
+
    Helper function for gcn_expand_prologue and gcn_expand_epilogue.  */
 
 static void
@@ -3155,8 +3095,7 @@ move_callee_saved_registers (rtx sp, machine_function *offsets,
 	saved_scalars++;
       }
 
-  rtx move_scalars = get_insns ();
-  end_sequence ();
+  rtx move_scalars = end_sequence ();
   start_sequence ();
 
   /* Ensure that all vector lanes are moved.  */
@@ -3291,8 +3230,7 @@ move_callee_saved_registers (rtx sp, machine_function *offsets,
 	offset += size;
       }
 
-  rtx move_vectors = get_insns ();
-  end_sequence ();
+  rtx move_vectors = end_sequence ();
 
   if (prologue)
     {
@@ -3419,8 +3357,7 @@ gcn_expand_prologue ()
 						 + offsets->callee_saves))));
 	}
 
-      rtx_insn *seq = get_insns ();
-      end_sequence ();
+      rtx_insn *seq = end_sequence ();
 
       emit_insn (seq);
     }
@@ -3562,17 +3499,6 @@ gcn_expand_prologue ()
   /* Ensure that the scheduler doesn't do anything unexpected.  */
   emit_insn (gen_blockage ());
 
-  if (TARGET_M0_LDS_LIMIT)
-  {
-    /* m0 is initialized for the usual LDS DS and FLAT memory case.
-       The low-part is the address of the topmost addressable byte, which is
-       size-1.  The high-part is an offset and should be zero.  */
-    emit_move_insn (gen_rtx_REG (SImode, M0_REG),
-	gen_int_mode (LDS_SIZE, SImode));
-
-    emit_insn (gen_prologue_use (gen_rtx_REG (SImode, M0_REG)));
-  }
-
   if (cfun && cfun->machine && !cfun->machine->normal_function && flag_openmp)
     {
       /* OpenMP kernels have an implicit call to gomp_gcn_enter_kernel.  */
@@ -3682,10 +3608,10 @@ gcn_frame_pointer_rqd (void)
 }
 
 /* Implement TARGET_CAN_ELIMINATE.
- 
+
    Return true if the compiler is allowed to try to replace register number
    FROM_REG with register number TO_REG.
- 
+
    FIXME: is the default "true" not enough? Should this be a negative set?  */
 
 bool
@@ -3696,7 +3622,7 @@ gcn_can_eliminate_p (int /*from_reg */ , int to_reg)
 }
 
 /* Implement INITIAL_ELIMINATION_OFFSET.
- 
+
    Returns the initial difference between the specified pair of registers, in
    terms of stack position.  */
 
@@ -3763,7 +3689,7 @@ gcn_hard_regno_rename_ok (unsigned int from_reg, unsigned int to_reg)
 }
 
 /* Implement HARD_REGNO_CALLER_SAVE_MODE.
- 
+
    Which mode is required for saving NREGS of a pseudo-register in
    call-clobbered hard register REGNO.  */
 
@@ -3872,7 +3798,7 @@ gcn_expand_divmod_libfunc (rtx libfunc, machine_mode mode, rtx op0, rtx op1,
 /* {{{ Miscellaneous.  */
 
 /* Implement TARGET_CANNOT_COPY_INSN_P.
- 
+
    Return true if INSN must not be duplicated.  */
 
 static bool
@@ -3964,7 +3890,7 @@ gcn_emutls_var_init (tree, tree decl, tree)
 /* {{{ Costs.  */
 
 /* Implement TARGET_RTX_COSTS.
-   
+
    Compute a (partial) cost for rtx X.  Return true if the complete
    cost has been computed, and false if subexpressions should be
    scanned.  In either case, *TOTAL contains the cost result.  */
@@ -4001,7 +3927,7 @@ gcn_rtx_costs (rtx x, machine_mode, int, int, int *total, bool)
 }
 
 /* Implement TARGET_MEMORY_MOVE_COST.
-   
+
    Return the cost of moving data of mode M between a
    register and memory.  A value of 2 is the default; this cost is
    relative to those in `REGISTER_MOVE_COST'.
@@ -4063,7 +3989,7 @@ gcn_memory_move_cost (machine_mode mode, reg_class_t regclass, bool in)
 }
 
 /* Implement TARGET_REGISTER_MOVE_COST.
-   
+
    Return the cost of moving data from a register in class CLASS1 to
    one in class CLASS2.  Base value is 2.  */
 
@@ -4186,7 +4112,7 @@ struct gcn_builtin_description gcn_builtins[] = {
 static GTY(()) tree gcn_builtin_decls[GCN_BUILTIN_MAX];
 
 /* Implement TARGET_BUILTIN_DECL.
-   
+
    Return the GCN builtin for CODE.  */
 
 tree
@@ -4238,7 +4164,7 @@ gcn_init_builtin_types (void)
 }
 
 /* Implement TARGET_INIT_BUILTINS.
-   
+
    Set up all builtin functions for this target.  */
 
 static void
@@ -4526,7 +4452,7 @@ gcn_init_libfuncs (void)
 /* Expand the CMP_SWAP GCN builtins.  We have our own versions that do
    not require taking the address of any object, other than the memory
    cell being operated on.
- 
+
    Helper function for gcn_expand_builtin_1.  */
 
 static rtx
@@ -5030,7 +4956,7 @@ gcn_expand_builtin_binop (tree exp, rtx target, rtx /*subtarget */ ,
 }
 
 /* Implement TARGET_EXPAND_BUILTIN.
-   
+
    Expand an expression EXP that calls a built-in function, with result going
    to TARGET if that's convenient (and in mode MODE if that's convenient).
    SUBTARGET may be used as the target for computing one of EXP's operands.
@@ -5070,7 +4996,7 @@ gcn_vectorize_get_mask_mode (machine_mode)
 
 /* Return an RTX that references a vector with the i-th lane containing
    PERM[i]*4.
- 
+
    Helper function for gcn_vectorize_vec_perm_const.  */
 
 static rtx
@@ -5107,9 +5033,9 @@ gcn_make_vec_perm_address (unsigned int *perm, int nelt)
 }
 
 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST.
- 
+
    Return true if permutation with SEL is possible.
-   
+
    If DST/SRC0/SRC1 are non-null, emit the instructions to perform the
    permutations.  */
 
@@ -5200,7 +5126,7 @@ gcn_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
 }
 
 /* Implements TARGET_VECTOR_MODE_SUPPORTED_P.
- 
+
    Return nonzero if vector MODE is supported with at least move
    instructions.  */
 
@@ -5597,8 +5523,7 @@ gcn_expand_reduc_scalar (machine_mode mode, rtx src, int unspec)
 		    || unspec == UNSPEC_UMAX_DPP_SHR);
   bool use_plus_carry = unspec == UNSPEC_PLUS_DPP_SHR
 			&& GET_MODE_CLASS (mode) == MODE_VECTOR_INT
-			/* FIXME: why GCN3?  */
-			&& (TARGET_GCN3 || scalar_mode == DImode);
+			&& scalar_mode == DImode;
 
   if (use_plus_carry)
     unspec = UNSPEC_PLUS_CARRY_DPP_SHR;
@@ -5725,7 +5650,8 @@ gcn_simd_clone_adjust (struct cgraph_node *ARG_UNUSED (node))
 /* Implement TARGET_SIMD_CLONE_USABLE.  */
 
 static int
-gcn_simd_clone_usable (struct cgraph_node *ARG_UNUSED (node))
+gcn_simd_clone_usable (struct cgraph_node *ARG_UNUSED (node),
+		       machine_mode ARG_UNUSED (vector_mode))
 {
   /* We don't need to do anything here because
      gcn_simd_clone_compute_vecsize_and_simdlen currently only returns one
@@ -5930,8 +5856,7 @@ gcn_restore_exec (rtx_insn *insn, rtx_insn *last_exec_def, int64_t curr_exec,
 	{
 	  start_sequence ();
 	  emit_move_insn (exec_save_reg, exec_reg);
-	  rtx_insn *seq = get_insns ();
-	  end_sequence ();
+	  rtx_insn *seq = end_sequence ();
 
 	  emit_insn_after (seq, last_exec_def);
 	  if (dump_file && (dump_flags & TDF_DETAILS))
@@ -5947,8 +5872,7 @@ gcn_restore_exec (rtx_insn *insn, rtx_insn *last_exec_def, int64_t curr_exec,
   /* Restore EXEC register before the usage.  */
   start_sequence ();
   emit_move_insn (exec_reg, exec);
-  rtx_insn *seq = get_insns ();
-  end_sequence ();
+  rtx_insn *seq = end_sequence ();
   emit_insn_before (seq, insn);
 
   if (dump_file && (dump_flags & TDF_DETAILS))
@@ -6109,8 +6033,7 @@ gcn_md_reorg (void)
 	    {
 	      start_sequence ();
 	      emit_move_insn (exec_reg, GEN_INT (new_exec));
-	      rtx_insn *seq = get_insns ();
-	      end_sequence ();
+	      rtx_insn *seq = end_sequence ();
 	      emit_insn_before (seq, insn);
 
 	      if (dump_file && (dump_flags & TDF_DETAILS))
@@ -6195,7 +6118,7 @@ gcn_md_reorg (void)
   CLEAR_REG_SET (&live);
 
   /* "Manually Inserted Wait States (NOPs)."
-   
+
      GCN hardware detects most kinds of register dependencies, but there
      are some exceptions documented in the ISA manual.  This pass
      detects the missed cases, and inserts the documented number of NOPs
@@ -6506,7 +6429,7 @@ gcn_fork_join (gcall *call, const int dims[], bool is_fork)
 
 /* Implement ???????
    FIXME make this a real hook.
- 
+
    Adjust FNDECL such that options inherited from the host compiler
    are made appropriate for the accelerator compiler.  */
 
@@ -6569,7 +6492,7 @@ gcn_shared_mem_layout (unsigned HOST_WIDE_INT *lo,
 /* {{{ ASM Output.  */
 
 /*  Implement TARGET_ASM_FILE_START.
- 
+
     Print assembler file header text.  */
 
 static void
@@ -6579,68 +6502,20 @@ output_file_start (void)
      configuration.  */
   const char *xnack = (flag_xnack == HSACO_ATTR_ON ? ":xnack+"
 		       : flag_xnack == HSACO_ATTR_OFF ? ":xnack-"
-		       : "");
+		       : "" /* Unsupported or "any".  */);
   const char *sram_ecc = (flag_sram_ecc == HSACO_ATTR_ON ? ":sramecc+"
 			  : flag_sram_ecc == HSACO_ATTR_OFF ? ":sramecc-"
-			  : "");
-
-  const char *cpu;
-  switch (gcn_arch)
-    {
-    case PROCESSOR_FIJI:
-      cpu = "gfx803";
-      xnack = "";
-      sram_ecc = "";
-      break;
-    case PROCESSOR_VEGA10:
-      cpu = "gfx900";
-      sram_ecc = "";
-      break;
-    case PROCESSOR_VEGA20:
-      cpu = "gfx906";
-      sram_ecc = "";
-      break;
-    case PROCESSOR_GFX908:
-      cpu = "gfx908";
-      break;
-    case PROCESSOR_GFX90a:
-      cpu = "gfx90a";
-      break;
-    case PROCESSOR_GFX90c:
-      cpu = "gfx90c";
-      sram_ecc = "";
-      break;
-    case PROCESSOR_GFX1030:
-      cpu = "gfx1030";
-      xnack = "";
-      sram_ecc = "";
-      break;
-    case PROCESSOR_GFX1036:
-      cpu = "gfx1036";
-      xnack = "";
-      sram_ecc = "";
-      break;
-    case PROCESSOR_GFX1100:
-      cpu = "gfx1100";
-      xnack = "";
-      sram_ecc = "";
-      break;
-    case PROCESSOR_GFX1103:
-      cpu = "gfx1103";
-      xnack = "";
-      sram_ecc = "";
-      break;
-    default: gcc_unreachable ();
-    }
+			  : "" /* Unsupported or "any".  */);
+  const char *cpu = gcn_devices[gcn_arch].name;
 
   fprintf(asm_out_file, "\t.amdgcn_target \"amdgcn-unknown-amdhsa--%s%s%s\"\n",
 	  cpu, sram_ecc, xnack);
 }
 
 /* Implement ASM_DECLARE_FUNCTION_NAME via gcn-hsa.h.
-   
+
    Print the initial definition of a function name.
- 
+
    For GCN kernel entry points this includes all the HSA meta-data, special
    alignment constraints that don't apply to regular functions, and magic
    comments that pass information to mkoffload.  */
@@ -6705,8 +6580,8 @@ gcn_hsa_declare_function_name (FILE *file, const char *name,
   if (avgpr % vgpr_block_size)
     avgpr += vgpr_block_size - (avgpr % vgpr_block_size);
 
-  fputs ("\t.rodata\n"
-	 "\t.p2align\t6\n"
+  switch_to_section (readonly_data_section);
+  fputs ("\t.p2align\t6\n"
 	 "\t.amdhsa_kernel\t", file);
   assemble_name (file, name);
   fputs ("\n", file);
@@ -6778,21 +6653,28 @@ gcn_hsa_declare_function_name (FILE *file, const char *name,
   if (!TARGET_ARCHITECTED_FLAT_SCRATCH)
     fprintf (file,
 	   "\t  .amdhsa_reserve_flat_scratch\t0\n");
-  if (gcn_arch == PROCESSOR_GFX90a)
+  if (TARGET_AVGPR_COMBINED)
     fprintf (file,
-	     "\t  .amdhsa_accum_offset\t%i\n"
-	     "\t  .amdhsa_tg_split\t0\n",
+	     "\t  .amdhsa_accum_offset\t%i\n",
 	     vgpr); /* The AGPRs come after the VGPRs.  */
+  if (TARGET_TGSPLIT)
+    fprintf (file,
+	     "\t  .amdhsa_tg_split\t0\n");
   fputs ("\t.end_amdhsa_kernel\n", file);
 
 #if 1
   /* The following is YAML embedded in assembler; tabs are not allowed.  */
-  fputs ("        .amdgpu_metadata\n"
-	 "        amdhsa.version:\n"
-	 "          - 1\n"
-	 "          - 0\n"
-	 "        amdhsa.kernels:\n"
-	 "          - .name: ", file);
+
+  /* 'amdhsa.version': code object V3 = 1.0, V4 = 1.1, V5/V6 = 1.2.  */
+  /* Keep in sync with 'amdhsa-code-object' in gen-gcn-device-macros.awk.  */
+  fprintf (file,
+	   "        .amdgpu_metadata\n"
+	   "        amdhsa.version:\n"
+	   "          - 1\n"
+	   "          - %d\n"
+	   "        amdhsa.kernels:\n"
+	   "          - .name: ",
+	   gcn_devices[gcn_arch].generic_version ? 2 /* V6 */ : 1 /* V4 */);
   assemble_name (file, name);
   fputs ("\n            .symbol: ", file);
   assemble_name (file, name);
@@ -6813,12 +6695,12 @@ gcn_hsa_declare_function_name (FILE *file, const char *name,
 	   (TARGET_WAVE64_COMPAT
 	    ? " ; wavefrontsize64 counts double on SIMD32"
 	    : ""));
-  if (gcn_arch == PROCESSOR_GFX90a || gcn_arch == PROCESSOR_GFX908)
+  if (TARGET_AVGPRS)
     fprintf (file, "            .agpr_count: %i\n", avgpr);
   fputs ("        .end_amdgpu_metadata\n", file);
 #endif
 
-  fputs ("\t.text\n", file);
+  switch_to_section (current_function_section ());
   fputs ("\t.align\t256\n", file);
   fputs ("\t.type\t", file);
   assemble_name (file, name);
@@ -6833,6 +6715,26 @@ gcn_hsa_declare_function_name (FILE *file, const char *name,
 	     oacc_get_fn_dim_size (cfun->decl, GOMP_DIM_VECTOR), name);
 }
 
+/* Implement TARGET_ASM_INIT_SECTIONS.  */
+
+static void
+gcn_asm_init_sections (void)
+{
+  /* With the default 'exception_section' (via
+     'gcc/except.cc:switch_to_exception_section'), the assembler fails:
+         /tmp/ccTYJljP.s:95:2: error: changed section flags for .gcc_except_table, expected: 0x2
+                 .section        .gcc_except_table,"aw",@progbits
+                 ^
+     The flags for '.gcc_except_table' don't match the pre-defined ones that
+     the assembler expects: 'SHF_ALLOC' ('a'), without 'SHF_WRITE' ('w').
+     As we're not actually implementing exception handling, keep things simple,
+     and allocate a "fake" section.  */
+  exception_section = get_section (".fake_gcc_except_table",
+				   SECTION_DEBUG /* '!SHF_ALLOC' */
+				   | SECTION_EXCLUDE /* 'SHF_EXCLUDE' */,
+				   NULL);
+}
+
 /* Implement TARGET_ASM_SELECT_SECTION.
 
    Return the section into which EXP should be placed.  */
@@ -6855,7 +6757,7 @@ gcn_asm_select_section (tree exp, int reloc, unsigned HOST_WIDE_INT align)
 }
 
 /* Implement TARGET_ASM_FUNCTION_PROLOGUE.
- 
+
    Emits custom text into the assembler file at the head of each function.  */
 
 static void
@@ -7018,8 +6920,22 @@ gcn_asm_output_symbol_ref (FILE *file, rtx x)
     }
 }
 
+void
+gcn_asm_weaken_decl (FILE *stream, tree decl, const char *name,
+		     const char *value)
+{
+  if (!value
+      && DECL_EXTERNAL (decl))
+    /* Don't emit weak undefined symbols; see PR119369.  */
+    return;
+  if (value)
+    ASM_OUTPUT_WEAK_ALIAS (stream, name, value);
+  else
+    ASM_WEAKEN_LABEL (stream, name);
+}
+
 /* Implement TARGET_CONSTANT_ALIGNMENT.
- 
+
    Returns the alignment in bits of a constant that is being placed in memory.
    CONSTANT is the constant and BASIC_ALIGN is the alignment that the object
    would ordinarily have.  */
@@ -7070,15 +6986,10 @@ print_operand_address (FILE *file, rtx mem)
       if (GET_CODE (addr) == REG)
 	print_reg (file, addr);
       else
-	{
-	  gcc_assert (TARGET_FLAT_OFFSETS);
-	  print_reg (file, XEXP (addr, 0));
-	}
+	print_reg (file, XEXP (addr, 0));
     }
   else if (AS_GLOBAL_P (as))
     {
-      gcc_assert (TARGET_GLOBAL_ADDRSPACE);
-
       rtx base = addr;
       rtx vgpr_offset = NULL_RTX;
 
@@ -7190,8 +7101,10 @@ print_operand_address (FILE *file, rtx mem)
    E - print conditional code for v_cmp (eq_u64/ne_u64...)
    A - print address in formatting suitable for given address space.
    O - print offset:n for data share operations.
-   ^ - print "_co" suffix for GCN5 mnemonics
-   g - print "glc", if appropriate for given MEM
+   G - print "glc" (or for gfx94x: sc0) unconditionally [+ indep. of regnum]
+   g - print "glc" (or for gfx94x: sc0), if appropriate for given MEM
+       NOTE: Do not use 'G' or 'g with scalar memory access ('s_...') as those
+       require "glc" also with gfx94x.
    L - print low-part of a multi-reg value
    H - print second part of a multi-reg value (high-part of 2-reg value)
    J - print third part of a multi-reg value
@@ -7439,8 +7352,6 @@ print_operand (FILE *file, rtx x, int code)
 	rtx x0 = XEXP (x, 0);
 	if (AS_GLOBAL_P (MEM_ADDR_SPACE (x)))
 	  {
-	    gcc_assert (TARGET_GLOBAL_ADDRSPACE);
-
 	    fprintf (file, ", ");
 
 	    rtx base = x0;
@@ -7809,14 +7720,13 @@ print_operand (FILE *file, rtx x, int code)
       else
 	output_addr_const (file, x);
       return;
-    case '^':
-      if (TARGET_EXPLICIT_CARRY)
-	fputs ("_co", file);
+    case 'G':
+      fputs (TARGET_GLC_NAME, file);
       return;
     case 'g':
       gcc_assert (xcode == MEM);
       if (MEM_VOLATILE_P (x))
-	fputs (" glc", file);
+	fputs (TARGET_GLC_NAME, file);
       return;
     default:
       output_operand_lossage ("invalid %%xn code");
@@ -7825,7 +7735,7 @@ print_operand (FILE *file, rtx x, int code)
 }
 
 /* Implement DEBUGGER_REGNO macro.
- 
+
    Return the DWARF register number that corresponds to the GCC internal
    REGNO.  */
 
@@ -7864,7 +7774,7 @@ gcn_dwarf_register_number (unsigned int regno)
 }
 
 /* Implement TARGET_DWARF_REGISTER_SPAN.
- 
+
    DImode and Vector DImode require additional registers.  */
 
 static rtx
@@ -7912,6 +7822,8 @@ gcn_dwarf_register_span (rtx rtl)
 #define TARGET_ARG_PARTIAL_BYTES gcn_arg_partial_bytes
 #undef  TARGET_ASM_ALIGNED_DI_OP
 #define TARGET_ASM_ALIGNED_DI_OP "\t.8byte\t"
+#undef  TARGET_ASM_INIT_SECTIONS
+#define TARGET_ASM_INIT_SECTIONS gcn_asm_init_sections
 #undef  TARGET_ASM_FILE_START
 #define TARGET_ASM_FILE_START output_file_start
 #undef  TARGET_ASM_FUNCTION_PROLOGUE
@@ -7995,8 +7907,6 @@ gcn_dwarf_register_span (rtx rtl)
 #define TARGET_LEGITIMATE_CONSTANT_P gcn_legitimate_constant_p
 #undef  TARGET_LIBC_HAS_FUNCTION
 #define TARGET_LIBC_HAS_FUNCTION gcn_libc_has_function
-#undef  TARGET_LRA_P
-#define TARGET_LRA_P hook_bool_void_true
 #undef  TARGET_MACHINE_DEPENDENT_REORG
 #define TARGET_MACHINE_DEPENDENT_REORG gcn_md_reorg
 #undef  TARGET_MEMORY_MOVE_COST
@@ -8065,6 +7975,9 @@ gcn_dwarf_register_span (rtx rtl)
 #undef  TARGET_VECTOR_MODE_SUPPORTED_P
 #define TARGET_VECTOR_MODE_SUPPORTED_P gcn_vector_mode_supported_p
 
+#undef TARGET_DOCUMENTATION_NAME
+#define TARGET_DOCUMENTATION_NAME "AMD GCN"
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-gcn.h"
diff --git a/gcc/config/gcn/gcn.h b/gcc/config/gcn/gcn.h
index bd2afa6..3d42de3 100644
--- a/gcc/config/gcn/gcn.h
+++ b/gcc/config/gcn/gcn.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2016-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2016-2025 Free Software Foundation, Inc.
 
    This file is free software; you can redistribute it and/or modify it under
    the terms of the GNU General Public License as published by the Free
@@ -16,49 +16,62 @@
 
 #include "config/gcn/gcn-opts.h"
 
+extern const struct gcn_device_def {
+  enum processor_type id;
+  const char *name;
+  const char *NAME;
+  enum gcn_isa isa;
+
+  /* Features.  */
+  enum hsaco_attr_type xnack_default;
+  enum hsaco_attr_type sramecc_default;
+  enum hsaco_attr_type wave64_default;
+  enum hsaco_attr_type cumode_default;
+  int max_isa_vgprs;
+  unsigned generic_version;
+  const char *arch_family;
+} gcn_devices[];
+
 #define TARGET_CPU_CPP_BUILTINS()                                              \
   do                                                                           \
     {                                                                          \
+      builtin_define ("__AMDGPU__");                                           \
       builtin_define ("__AMDGCN__");                                           \
-      if (TARGET_GCN3)                                                         \
-	builtin_define ("__GCN3__");                                           \
-      else if (TARGET_GCN5)                                                    \
+      if (TARGET_GCN5)                                                         \
 	builtin_define ("__GCN5__");                                           \
       else if (TARGET_CDNA1)                                                   \
 	builtin_define ("__CDNA1__");                                          \
       else if (TARGET_CDNA2)                                                   \
 	builtin_define ("__CDNA2__");                                          \
+      else if (TARGET_CDNA3)                                                   \
+	builtin_define ("__CDNA3__");                                          \
       else if (TARGET_RDNA2)                                                   \
 	builtin_define ("__RDNA2__");                                          \
       else if (TARGET_RDNA3)                                                   \
 	builtin_define ("__RDNA3__");                                          \
       else                                                                     \
 	gcc_unreachable ();                                                    \
-      if (TARGET_FIJI)                                                         \
-	{                                                                      \
-	  builtin_define ("__fiji__");                                         \
-	  builtin_define ("__gfx803__");                                       \
-	}                                                                      \
-      else if (TARGET_VEGA10)                                                  \
-	builtin_define ("__gfx900__");                                         \
-      else if (TARGET_VEGA20)                                                  \
-	builtin_define ("__gfx906__");                                         \
-      else if (TARGET_GFX908)                                                  \
-	builtin_define ("__gfx908__");                                         \
-      else if (TARGET_GFX90a)                                                  \
-	builtin_define ("__gfx90a__");                                         \
-      else if (TARGET_GFX90c)                                                  \
-	builtin_define ("__gfx90c__");                                         \
-      else if (TARGET_GFX1030)                                                 \
-	builtin_define ("__gfx1030__");                                        \
-      else if (TARGET_GFX1036)                                                 \
-	builtin_define ("__gfx1036__");                                        \
-      else if (TARGET_GFX1100)                                                 \
-	builtin_define ("__gfx1100__");                                        \
-      else if (TARGET_GFX1103)                                                 \
-	builtin_define ("__gfx1103__");                                        \
-      else                                                                     \
-	gcc_unreachable ();                                                    \
+      char *name = (char *)xmalloc (strlen (gcn_devices[gcn_arch].name) + 5);  \
+      sprintf (name, "__%s__", gcn_devices[gcn_arch].name);                    \
+      char *p;                                                                 \
+      if (gcn_devices[gcn_arch].generic_version)                               \
+	while ((p = strchr(name, '-')))                                        \
+	  *p = '_';                                                            \
+      builtin_define (name);                                                   \
+      name = (char *)xmalloc (strlen (gcn_devices[gcn_arch].arch_family) + 5); \
+      sprintf (name, "__%s__", gcn_devices[gcn_arch].arch_family);             \
+      builtin_define (name);                                                   \
+      name = (char *)xmalloc (strlen ("__amdgcn_target_id__") +                \
+			      strlen (gcn_devices[gcn_arch].name) + 4);        \
+      sprintf (name, "__amdgcn_target_id__=\"%s\"", gcn_devices[gcn_arch].name); \
+      builtin_define (name);                                                   \
+      name = (char *)xmalloc (strlen ("__amdgcn_processor__") +                \
+			      strlen (gcn_devices[gcn_arch].name) + 4);        \
+      sprintf (name, "__amdgcn_processor__=\"%s\"", gcn_devices[gcn_arch].name); \
+      if (gcn_devices[gcn_arch].generic_version)                               \
+	while ((p = strchr(name, '-')))                                        \
+	  *p = '_';                                                            \
+      builtin_define (name);                                                   \
   } while (0)
 
 #define ASSEMBLER_DIALECT (TARGET_RDNA2_PLUS ? 1 : 0)
@@ -198,7 +211,7 @@ STATIC_ASSERT (LAST_AVGPR_REG + 1 - FIRST_AVGPR_REG == 256);
 #define HARD_FRAME_POINTER_IS_ARG_POINTER   0
 #define HARD_FRAME_POINTER_IS_FRAME_POINTER 0
 
-#define SGPR_REGNO_P(N)		((N) >= FIRST_SGPR_REG && (N) <= LAST_SGPR_REG)
+#define SGPR_REGNO_P(N)		(/*(N) >= FIRST_SGPR_REG &&*/ (N) <= LAST_SGPR_REG)
 #define VGPR_REGNO_P(N)		((N) >= FIRST_VGPR_REG && (N) <= LAST_VGPR_REG)
 #define AVGPR_REGNO_P(N)        ((N) >= FIRST_AVGPR_REG && (N) <= LAST_AVGPR_REG)
 #define SSRC_REGNO_P(N)		((N) <= SCC_REG && (N) != VCCZ_REG)
@@ -581,8 +594,7 @@ enum gcn_address_spaces
   c_register_addr_space ("__global", ADDR_SPACE_GLOBAL);             \
 } while (0);
 
-#define STACK_ADDR_SPACE \
-  (TARGET_GCN5_PLUS ? ADDR_SPACE_GLOBAL : ADDR_SPACE_FLAT)
+#define STACK_ADDR_SPACE ADDR_SPACE_GLOBAL
 #define DEFAULT_ADDR_SPACE \
   ((cfun && cfun->machine && !cfun->machine->use_flat_addressing) \
    ? ADDR_SPACE_GLOBAL : ADDR_SPACE_FLAT)
@@ -857,7 +869,3 @@ enum gcn_builtin_codes
       || M == V2SFmode || M == V2DImode || M == V2DFmode) \
    ? 2 \
    : 1)
-
-/* The C++ front end insists to link against libstdc++ -- which we don't build.
-   Tell it to instead link against the innocuous libgcc.  */
-#define LIBSTDCXX "gcc"
diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md
index f223ec9..2ce2e05 100644
--- a/gcc/config/gcn/gcn.md
+++ b/gcc/config/gcn/gcn.md
@@ -1,4 +1,4 @@
-;; Copyright (C) 2016-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2016-2025 Free Software Foundation, Inc.
 
 ;; This file is free software; you can redistribute it and/or modify it under
 ;; the terms of the GNU General Public License as published by the Free
@@ -219,7 +219,7 @@
 ;	 flags: offen, idxen, glc, lds, slc, tfe
 ;
 ; flat - flat or global memory operations
-;	 flags: glc, slc
+;	 flags: {CDNA3: sc0, nt, sc1 | otherwise: glc, slc, scc}
 ;	 addr: vgpr0-255
 ;	 data: vgpr0-255
 ;	 vdst: vgpr0-255
@@ -286,7 +286,7 @@
 
 ; Disable alternatives that only apply to specific ISA variants.
 
-(define_attr "gcn_version" "gcn3,gcn5,cdna2" (const_string "gcn3"))
+(define_attr "cdna" "any,cdna2" (const_string "any"))
 (define_attr "rdna" "any,no,yes" (const_string "any"))
 
 (define_attr "xnack" "na,off,on" (const_string "na"))
@@ -298,10 +298,7 @@
 	 (and (eq_attr "rdna" "yes")
 	      (eq (symbol_ref "TARGET_RDNA2_PLUS") (const_int 0)))
 	   (const_int 0)
-	 (and (eq_attr "gcn_version" "gcn5")
-	      (eq (symbol_ref "TARGET_GCN5_PLUS") (const_int 0)))
-	   (const_int 0)
-	 (and (eq_attr "gcn_version" "cdna2")
+	 (and (eq_attr "cdna" "cdna2")
 	      (eq (symbol_ref "TARGET_CDNA2_PLUS") (const_int 0)))
 	   (const_int 0)
 	 (and (eq_attr "xnack" "off")
@@ -568,7 +565,7 @@
   [(set (match_operand:SISF 0 "nonimmediate_operand")
 	(match_operand:SISF 1 "gcn_load_operand"))]
   ""
-  {@ [cons: =0, 1; attrs: type, exec, length, gcn_version, xnack]
+  {@ [cons: =0, 1; attrs: type, exec, length, cdna, xnack]
    [SD  ,SSA ;sop1 ,*   ,4 ,*    ,*  ] s_mov_b32\t%0, %1
    [SD  ,J   ;sopk ,*   ,4 ,*    ,*  ] s_movk_i32\t%0, %1
    [SD  ,B   ;sop1 ,*   ,8 ,*    ,*  ] s_mov_b32\t%0, %1
@@ -609,7 +606,7 @@
   [(set (match_operand:QIHI 0 "nonimmediate_operand")
 	(match_operand:QIHI 1 "gcn_load_operand"))]
   "gcn_valid_move_p (<MODE>mode, operands[0], operands[1])"
-  {@ [cons: =0, 1; attrs: type, exec, length, gcn_version, xnack]
+  {@ [cons: =0, 1; attrs: type, exec, length, cdna, xnack]
   [SD  ,SSA ;sop1 ,*   ,4 ,*    ,*  ] s_mov_b32\t%0, %1
   [SD  ,J   ;sopk ,*   ,4 ,*    ,*  ] s_movk_i32\t%0, %1
   [SD  ,B   ;sop1 ,*   ,8 ,*    ,*  ] s_mov_b32\t%0, %1
@@ -642,7 +639,7 @@
   [(set (match_operand:DIDF 0 "nonimmediate_operand")
 	(match_operand:DIDF 1 "general_operand"))]
   "GET_CODE(operands[1]) != SYMBOL_REF"
-  {@ [cons: =0, 1; attrs: type, length, gcn_version, xnack]
+  {@ [cons: =0, 1; attrs: type, length, cdna, xnack]
   [SD  ,SSA ;sop1 ,4 ,*    ,*  ] s_mov_b64\t%0, %1
   [SD  ,C   ;sop1 ,8 ,*    ,*  ] ^
   [SD  ,DB  ;mult ,* ,*    ,*  ] #
@@ -707,7 +704,7 @@
   [(set (match_operand:TI 0 "nonimmediate_operand")
 	(match_operand:TI 1 "general_operand"  ))]
   ""
-  {@ [cons: =0, 1; attrs: type, delayeduse, length, gcn_version, xnack]
+  {@ [cons: =0, 1; attrs: type, delayeduse, length, cdna, xnack]
   [SD ,SSB;mult ,*  ,* ,*    ,*  ] #
   [RS ,Sm ;smem ,*  ,12,*    ,*  ] s_store_dwordx4\t%1, %A0
   [Sm ,RS ;smem ,yes,12,*    ,off] s_load_dwordx4\t%0, %A1\;s_waitcnt\tlgkmcnt(0)
@@ -1017,6 +1014,15 @@
   [(set_attr "type" "sopp")
    (set_attr "length" "4")])
 
+(define_expand "exception_receiver"
+  [(const_int 0)]
+  ""
+{
+  if (!fake_exceptions)
+    sorry ("exception handling not supported");
+  DONE;
+})
+
 ;; }}}
 ;; {{{ Conditionals
 
@@ -1137,7 +1143,7 @@
    s_add_i32\t%0, %1, %2
    s_addk_i32\t%0, %2
    s_add_i32\t%0, %1, %2
-   v_add%^_u32\t%0, vcc, %2, %1"
+   v_add_co_u32\t%0, vcc, %2, %1"
   [(set_attr "type" "sop2,sopk,sop2,vop2")
    (set_attr "length" "4,4,8,8")])
 
@@ -1217,7 +1223,7 @@
   ""
   "@
    s_add_u32\t%0, %1, %2
-   v_add%^_u32\t%0, vcc, %2, %1"
+   v_add_co_u32\t%0, vcc, %2, %1"
   [(set_attr "type" "sop2,vop2")
    (set_attr "length" "8,8")])
 
@@ -1232,7 +1238,7 @@
   "INTVAL (operands[2]) == -INTVAL (operands[3])"
   "@
    s_add_u32\t%0, %1, %2
-   v_add%^_u32\t%0, vcc, %2, %1"
+   v_add_co_u32\t%0, vcc, %2, %1"
   [(set_attr "type" "sop2,vop2")
    (set_attr "length" "4")])
 
@@ -1254,7 +1260,7 @@
   ""
   "@
    s_addc_u32\t%0, %1, %2
-   {v_addc%^_u32|v_add_co_ci_u32}\t%0, vcc, %2, %1, vcc"
+   {v_addc_co_u32|v_add_co_ci_u32}\t%0, vcc, %2, %1, vcc"
   [(set_attr "type" "sop2,vop2")
    (set_attr "length" "8,4")])
 
@@ -1270,7 +1276,7 @@
   ""
   "@
    s_addc_u32\t%0, %1, 0
-   {v_addc%^_u32|v_add_co_ci_u32}\t%0, vcc, 0, %1, vcc"
+   {v_addc_co_u32|v_add_co_ci_u32}\t%0, vcc, 0, %1, vcc"
   [(set_attr "type" "sop2,vop2")
    (set_attr "length" "4")])
 
@@ -1299,8 +1305,8 @@
 	rtx new_operands[4] = { operands[0], operands[1], operands[2],
 				gen_rtx_REG (DImode, CC_SAVE_REG) };
 
-	output_asm_insn ("v_add%^_u32\t%L0, %3, %L2, %L1", new_operands);
-	output_asm_insn ("{v_addc%^_u32|v_add_co_ci_u32}\t%H0, %3, %H2, %H1, %3",
+	output_asm_insn ("v_add_co_u32\t%L0, %3, %L2, %L1", new_operands);
+	output_asm_insn ("{v_addc_co_u32|v_add_co_ci_u32}\t%H0, %3, %H2, %H1, %3",
 			 new_operands);
       }
     else
@@ -1332,8 +1338,8 @@
   "@
    s_sub_i32\t%0, %1, %2
    s_sub_i32\t%0, %1, %2
-   v_subrev%^_u32\t%0, vcc, %2, %1
-   v_sub%^_u32\t%0, vcc, %1, %2"
+   v_subrev_co_u32\t%0, vcc, %2, %1
+   v_sub_co_u32\t%0, vcc, %1, %2"
   [(set_attr "type" "sop2,sop2,vop2,vop2")
    (set_attr "length" "4,8,8,8")])
 
@@ -1462,11 +1468,6 @@
 	    (const_int 32))))]
   ""
 {
-  if (can_create_pseudo_p ()
-      && !TARGET_MULTIPLY_IMMEDIATE
-      && !gcn_inline_immediate_operand (operands[2], SImode))
-    operands[2] = force_reg (SImode, operands[2]);
-
   if (REG_P (operands[2]))
     emit_insn (gen_<su>mulsi3_highpart_reg (operands[0], operands[1],
 					    operands[2]));
@@ -1492,8 +1493,7 @@
   s_mul_hi<sgnsuffix>0\t%0, %1, %2
   v_mul_hi<sgnsuffix>0\t%0, %2, %1"
   [(set_attr "type" "sop2,vop3a")
-   (set_attr "length" "4,8")
-   (set_attr "gcn_version" "gcn5,*")])
+   (set_attr "length" "4,8")])
 
 (define_insn "<su>mulsi3_highpart_imm"
   [(set (match_operand:SI 0 "register_operand"	              "=Sg,Sg,v")
@@ -1504,15 +1504,13 @@
 		(match_operand:SI 1 "register_operand"         "Sg,Sg,v"))
 	      (match_operand:DI 2 "gcn_32bit_immediate_operand" "A, B,A"))
 	    (const_int 32))))]
-  "TARGET_MULTIPLY_IMMEDIATE
-   || gcn_inline_immediate_operand (operands[2], SImode)"
+  ""
   "@
   s_mul_hi<sgnsuffix>0\t%0, %1, %2
   s_mul_hi<sgnsuffix>0\t%0, %1, %2
   v_mul_hi<sgnsuffix>0\t%0, %2, %1"
   [(set_attr "type" "sop2,sop2,vop3a")
-   (set_attr "length" "4,8,8")
-   (set_attr "gcn_version" "gcn5,gcn5,*")])
+   (set_attr "length" "4,8,8")])
 
 (define_expand "<su>mulsidi3"
   [(set (match_operand:DI 0 "register_operand" "")
@@ -1522,11 +1520,6 @@
 		   (match_operand:SI 2 "nonmemory_operand" ""))))]
   ""
 {
-  if (can_create_pseudo_p ()
-      && !TARGET_MULTIPLY_IMMEDIATE
-      && !gcn_inline_immediate_operand (operands[2], SImode))
-    operands[2] = force_reg (SImode, operands[2]);
-
   if (REG_P (operands[2]))
     emit_insn (gen_<su>mulsidi3_reg (operands[0], operands[1], operands[2]));
   else
@@ -1551,8 +1544,7 @@
     emit_insn (gen_mulsi3 (dstlo, operands[1], operands[2]));
     emit_insn (gen_<su>mulsi3_highpart (dsthi, operands[1], operands[2]));
     DONE;
-  }
-  [(set_attr "gcn_version" "gcn5,*")])
+  })
 
 (define_insn_and_split "<su>mulsidi3_imm"
   [(set (match_operand:DI 0 "register_operand"                "=&Sg,&Sg,&v")
@@ -1560,8 +1552,7 @@
 		   (match_operand:SI 1 "register_operand"       "Sg, Sg, v"))
 		 (match_operand:DI 2 "gcn_32bit_immediate_operand"
 								 "A,  B, A")))]
-  "TARGET_MULTIPLY_IMMEDIATE
-   || gcn_inline_immediate_operand (operands[2], SImode)"
+  ""
   "#"
   "&& reload_completed"
   [(const_int 0)]
@@ -1571,8 +1562,7 @@
     emit_insn (gen_mulsi3 (dstlo, operands[1], operands[2]));
     emit_insn (gen_<su>mulsi3_highpart (dsthi, operands[1], operands[2]));
     DONE;
-  }
-  [(set_attr "gcn_version" "gcn5,gcn5,*")])
+  })
 
 (define_insn_and_split "muldi3"
   [(set (match_operand:DI 0 "register_operand"         "=&Sg,&Sg, &v,&v")
@@ -1606,8 +1596,7 @@
     add = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (3, add, clob1, clob2));
     emit_insn (add);
     DONE;
-  }
-  [(set_attr "gcn_version" "gcn5,gcn5,*,*")])
+  })
 
 (define_insn "<u>mulhisi3"
   [(set (match_operand:SI 0 "register_operand"			"=v")
@@ -1975,6 +1964,14 @@
   [(set_attr "type" "mult")
    (set_attr "length" "8")])
 
+(define_insn "*memory_barrier"
+  [(set (match_operand:BLK 0)
+	(unspec:BLK [(match_dup 0)] UNSPEC_MEMORY_BARRIER))]
+  "TARGET_TARGET_SC_CACHE"
+  "buffer_inv sc1"
+  [(set_attr "type" "mubuf")
+   (set_attr "length" "4")])
+
 ; FIXME: These patterns have been disabled as they do not seem to work
 ; reliably - they can cause hangs or incorrect results.
 ; TODO: flush caches according to memory model
@@ -1991,11 +1988,10 @@
   "0 /* Disabled.  */"
   "@
    s_atomic_<bare_mnemonic><X>\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0)
-   flat_atomic_<bare_mnemonic><X>\t%0, %1, %2 glc\;s_waitcnt\t0
-   global_atomic_<bare_mnemonic><X>\t%0, %A1, %2%O1 glc\;s_waitcnt\tvmcnt(0)"
+   flat_atomic_<bare_mnemonic><X>\t%0, %1, %2 %G2\;s_waitcnt\t0
+   global_atomic_<bare_mnemonic><X>\t%0, %A1, %2%O1 %G2\;s_waitcnt\tvmcnt(0)"
   [(set_attr "type" "smem,flat,flat")
-   (set_attr "length" "12")
-   (set_attr "gcn_version" "gcn5,*,gcn5")])
+   (set_attr "length" "12")])
 
 ; FIXME: These patterns are disabled because the instructions don't
 ; seem to work as advertised.  Specifically, OMP "team distribute"
@@ -2016,8 +2012,7 @@
    flat_atomic_<bare_mnemonic><X>\t%0, %1\;s_waitcnt\t0
    global_atomic_<bare_mnemonic><X>\t%A0, %1%O0\;s_waitcnt\tvmcnt(0)"
   [(set_attr "type" "smem,flat,flat")
-   (set_attr "length" "12")
-   (set_attr "gcn_version" "gcn5,*,gcn5")])
+   (set_attr "length" "12")])
 
 (define_mode_attr x2 [(SI "DI") (DI "TI")])
 (define_mode_attr size [(SI "4") (DI "8")])
@@ -2060,11 +2055,10 @@
   ""
   "@
    s_atomic_cmpswap<X>\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0)
-   flat_atomic_cmpswap<X>\t%0, %1, %2 glc\;s_waitcnt\t0
-   global_atomic_cmpswap<X>\t%0, %A1, %2%O1 glc\;s_waitcnt\tvmcnt(0)"
+   flat_atomic_cmpswap<X>\t%0, %1, %2 %G2\;s_waitcnt\t0
+   global_atomic_cmpswap<X>\t%0, %A1, %2%O1 %G2\;s_waitcnt\tvmcnt(0)"
   [(set_attr "type" "smem,flat,flat")
    (set_attr "length" "12")
-   (set_attr "gcn_version" "gcn5,*,gcn5")
    (set_attr "delayeduse" "*,yes,yes")])
 
 (define_insn "sync_compare_and_swap<mode>_lds_insn"
@@ -2105,12 +2099,12 @@
 	    return "s_load%o0\t%0, %A1 glc\;s_waitcnt\tlgkmcnt(0)";
 	  case 1:
 	    return (TARGET_RDNA2 /* Not GFX11.  */
-		    ? "flat_load%o0\t%0, %A1%O1 glc dlc\;s_waitcnt\t0"
-		    : "flat_load%o0\t%0, %A1%O1 glc\;s_waitcnt\t0");
+		    ? "flat_load%o0\t%0, %A1%O1 %G1 dlc\;s_waitcnt\t0"
+		    : "flat_load%o0\t%0, %A1%O1 %G1\;s_waitcnt\t0");
 	  case 2:
 	    return (TARGET_RDNA2 /* Not GFX11.  */
-		    ? "global_load%o0\t%0, %A1%O1 glc dlc\;s_waitcnt\tvmcnt(0)"
-		    : "global_load%o0\t%0, %A1%O1 glc\;s_waitcnt\tvmcnt(0)");
+		    ? "global_load%o0\t%0, %A1%O1 %G1 dlc\;s_waitcnt\tvmcnt(0)"
+		    : "global_load%o0\t%0, %A1%O1 %G1\;s_waitcnt\tvmcnt(0)");
 	  }
 	break;
       case MEMMODEL_CONSUME:
@@ -2123,21 +2117,27 @@
 		   "s_dcache_wb_vol";
 	  case 1:
 	    return (TARGET_RDNA2
-		    ? "flat_load%o0\t%0, %A1%O1 glc dlc\;s_waitcnt\t0\;"
+		    ? "flat_load%o0\t%0, %A1%O1 %G1 dlc\;s_waitcnt\t0\;"
 		      "buffer_gl1_inv\;buffer_gl0_inv"
 		    : TARGET_RDNA3
-		    ? "flat_load%o0\t%0, %A1%O1 glc\;s_waitcnt\t0\;"
+		    ? "flat_load%o0\t%0, %A1%O1 %G1\;s_waitcnt\t0\;"
 		      "buffer_gl1_inv\;buffer_gl0_inv"
-		    : "flat_load%o0\t%0, %A1%O1 glc\;s_waitcnt\t0\;"
+		    : TARGET_TARGET_SC_CACHE
+		    ? "flat_load%o0\t%0, %A1%O1 %G1\;s_waitcnt\t0\;"
+		      "buffer_inv sc1"
+		    : "flat_load%o0\t%0, %A1%O1 %G1\;s_waitcnt\t0\;"
 		      "buffer_wbinvl1_vol");
 	  case 2:
 	    return (TARGET_RDNA2
-		    ? "global_load%o0\t%0, %A1%O1 glc dlc\;s_waitcnt\tvmcnt(0)\;"
+		    ? "global_load%o0\t%0, %A1%O1 %G1 dlc\;s_waitcnt\tvmcnt(0)\;"
 		      "buffer_gl1_inv\;buffer_gl0_inv"
 		    : TARGET_RDNA3
-		    ? "global_load%o0\t%0, %A1%O1 glc\;s_waitcnt\tvmcnt(0)\;"
+		    ? "global_load%o0\t%0, %A1%O1 %G1\;s_waitcnt\tvmcnt(0)\;"
 		      "buffer_gl1_inv\;buffer_gl0_inv"
-		    : "global_load%o0\t%0, %A1%O1 glc\;s_waitcnt\tvmcnt(0)\;"
+		    : TARGET_TARGET_SC_CACHE
+		    ? "global_load%o0\t%0, %A1%O1 %G1\;s_waitcnt\tvmcnt(0)\;"
+		      "buffer_inv sc1"
+		    : "global_load%o0\t%0, %A1%O1 %G1\;s_waitcnt\tvmcnt(0)\;"
 		      "buffer_wbinvl1_vol");
 	  }
 	break;
@@ -2151,21 +2151,27 @@
 		   "s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol";
 	  case 1:
 	    return (TARGET_RDNA2
-		    ? "buffer_gl1_inv\;buffer_gl0_inv\;flat_load%o0\t%0, %A1%O1 glc dlc\;"
+		    ? "buffer_gl1_inv\;buffer_gl0_inv\;flat_load%o0\t%0, %A1%O1 %G1 dlc\;"
 		      "s_waitcnt\t0\;buffer_gl1_inv\;buffer_gl0_inv"
 		    : TARGET_RDNA3
-		    ? "buffer_gl1_inv\;buffer_gl0_inv\;flat_load%o0\t%0, %A1%O1 glc\;"
+		    ? "buffer_gl1_inv\;buffer_gl0_inv\;flat_load%o0\t%0, %A1%O1 %G1\;"
 		      "s_waitcnt\t0\;buffer_gl1_inv\;buffer_gl0_inv"
-		    : "buffer_wbinvl1_vol\;flat_load%o0\t%0, %A1%O1 glc\;"
+		    : TARGET_TARGET_SC_CACHE
+		    ? "buffer_inv sc1\;flat_load%o0\t%0, %A1%O1 %G1\;"
+		      "s_waitcnt\t0\;buffer_inv sc1"
+		    : "buffer_wbinvl1_vol\;flat_load%o0\t%0, %A1%O1 %G1\;"
 		      "s_waitcnt\t0\;buffer_wbinvl1_vol");
 	  case 2:
 	    return (TARGET_RDNA2
-		    ? "buffer_gl1_inv\;buffer_gl0_inv\;global_load%o0\t%0, %A1%O1 glc dlc\;"
+		    ? "buffer_gl1_inv\;buffer_gl0_inv\;global_load%o0\t%0, %A1%O1 %G1 dlc\;"
 		      "s_waitcnt\tvmcnt(0)\;buffer_gl1_inv\;buffer_gl0_inv"
 		    : TARGET_RDNA3
-		    ? "buffer_gl1_inv\;buffer_gl0_inv\;global_load%o0\t%0, %A1%O1 glc\;"
+		    ? "buffer_gl1_inv\;buffer_gl0_inv\;global_load%o0\t%0, %A1%O1 %G1\;"
 		      "s_waitcnt\tvmcnt(0)\;buffer_gl1_inv\;buffer_gl0_inv"
-		    : "buffer_wbinvl1_vol\;global_load%o0\t%0, %A1%O1 glc\;"
+		    : TARGET_TARGET_SC_CACHE
+		    ? "buffer_inv sc1\;global_load%o0\t%0, %A1%O1 %G1\;"
+		      "s_waitcnt\tvmcnt(0)\;buffer_inv sc1"
+		    : "buffer_wbinvl1_vol\;global_load%o0\t%0, %A1%O1 %G1\;"
 		      "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol");
 	  }
 	break;
@@ -2174,7 +2180,6 @@
   }
   [(set_attr "type" "smem,flat,flat")
    (set_attr "length" "28")
-   (set_attr "gcn_version" "gcn5,*,gcn5")
    (set_attr "rdna" "no,*,*")])
 
 (define_insn "atomic_store<mode>"
@@ -2193,9 +2198,9 @@
 	  case 0:
 	    return "s_store%o1\t%1, %A0 glc\;s_waitcnt\tlgkmcnt(0)";
 	  case 1:
-	    return "flat_store%o1\t%A0, %1%O0 glc\;s_waitcnt\t0";
+	    return "flat_store%o1\t%A0, %1%O0 %G1\;s_waitcnt\t0";
 	  case 2:
-	    return "global_store%o1\t%A0, %1%O0 glc\;s_waitcnt\tvmcnt(0)";
+	    return "global_store%o1\t%A0, %1%O0 %G1\;s_waitcnt\tvmcnt(0)";
 	  }
 	break;
       case MEMMODEL_RELEASE:
@@ -2206,15 +2211,19 @@
 	    return "s_dcache_wb_vol\;s_store%o1\t%1, %A0 glc";
 	  case 1:
 	    return (TARGET_GLn_CACHE
-		    ? "buffer_gl1_inv\;buffer_gl0_inv\;flat_store%o1\t%A0, %1%O0 glc"
+		    ? "buffer_gl1_inv\;buffer_gl0_inv\;flat_store%o1\t%A0, %1%O0 %G1"
 		    : TARGET_WBINVL1_CACHE
-		    ? "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 glc"
+		    ? "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 %G1"
+		    : TARGET_TARGET_SC_CACHE
+		    ? "buffer_inv sc1\;flat_store%o1\t%A0, %1%O0 %G1"
 		    : "error: cache architectire unspecified");
 	  case 2:
 	    return (TARGET_GLn_CACHE
-		    ? "buffer_gl1_inv\;buffer_gl0_inv\;global_store%o1\t%A0, %1%O0 glc"
+		    ? "buffer_gl1_inv\;buffer_gl0_inv\;global_store%o1\t%A0, %1%O0 %G1"
 		    : TARGET_WBINVL1_CACHE
-		    ? "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 glc"
+		    ? "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 %G1"
+		    : TARGET_TARGET_SC_CACHE
+		    ? "buffer_inv sc1\;global_store%o1\t%A0, %1%O0 %G1"
 		    : "error: cache architecture unspecified");
 	  }
 	break;
@@ -2228,19 +2237,25 @@
 		   "s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol";
 	  case 1:
 	    return (TARGET_GLn_CACHE
-		    ? "buffer_gl1_inv\;buffer_gl0_inv\;flat_store%o1\t%A0, %1%O0 glc\;"
+		    ? "buffer_gl1_inv\;buffer_gl0_inv\;flat_store%o1\t%A0, %1%O0 %G1\;"
 		      "s_waitcnt\t0\;buffer_gl1_inv\;buffer_gl0_inv"
 		    : TARGET_WBINVL1_CACHE
-		    ? "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 glc\;"
+		    ? "buffer_wbinvl1_vol\;flat_store%o1\t%A0, %1%O0 %G1\;"
 		      "s_waitcnt\t0\;buffer_wbinvl1_vol"
+		    : TARGET_TARGET_SC_CACHE
+		    ? "buffer_inv sc1\;flat_store%o1\t%A0, %1%O0 %G1\;"
+		      "s_waitcnt\t0\;buffer_inv sc1"
 		    : "error: cache architecture unspecified");
 	  case 2:
 	    return (TARGET_GLn_CACHE
-		    ? "buffer_gl1_inv\;buffer_gl0_inv\;global_store%o1\t%A0, %1%O0 glc\;"
+		    ? "buffer_gl1_inv\;buffer_gl0_inv\;global_store%o1\t%A0, %1%O0 %G1\;"
 		      "s_waitcnt\tvmcnt(0)\;buffer_gl1_inv\;buffer_gl0_inv"
 		    : TARGET_WBINVL1_CACHE
-		    ? "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 glc\;"
+		    ? "buffer_wbinvl1_vol\;global_store%o1\t%A0, %1%O0 %G1\;"
 		      "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol"
+		    : TARGET_TARGET_SC_CACHE
+		    ? "buffer_inv sc1\;global_store%o1\t%A0, %1%O0 %G1\;"
+		      "s_waitcnt\tvmcnt(0)\;buffer_inv sc1"
 		    : "error: cache architecture unspecified");
 	  }
 	break;
@@ -2249,7 +2264,6 @@
   }
   [(set_attr "type" "smem,flat,flat")
    (set_attr "length" "28")
-   (set_attr "gcn_version" "gcn5,*,gcn5")
    (set_attr "rdna" "no,*,*")])
 
 (define_insn "atomic_exchange<mode>"
@@ -2270,9 +2284,9 @@
 	  case 0:
 	    return "s_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\tlgkmcnt(0)";
 	  case 1:
-	    return "flat_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\t0";
+	    return "flat_atomic_swap<X>\t%0, %1, %2 %G1\;s_waitcnt\t0";
 	  case 2:
-	    return "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
+	    return "global_atomic_swap<X>\t%0, %A1, %2%O1 %G1\;"
 		   "s_waitcnt\tvmcnt(0)";
 	  }
 	break;
@@ -2286,19 +2300,25 @@
 		   "s_dcache_wb_vol\;s_dcache_inv_vol";
 	  case 1:
 	    return (TARGET_GLn_CACHE
-		    ? "flat_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\t0\;"
+		    ? "flat_atomic_swap<X>\t%0, %1, %2 %G1\;s_waitcnt\t0\;"
 		      "buffer_gl1_inv\;buffer_gl0_inv"
 		    : TARGET_WBINVL1_CACHE
-            ? "flat_atomic_swap<X>\t%0, %1, %2 glc\;s_waitcnt\t0\;"
+            ? "flat_atomic_swap<X>\t%0, %1, %2 %G1\;s_waitcnt\t0\;"
 		      "buffer_wbinvl1_vol"
+	    : TARGET_TARGET_SC_CACHE
+            ? "flat_atomic_swap<X>\t%0, %1, %2 %G1\;s_waitcnt\t0\;"
+		      "buffer_inv sc1"
             : "error: cache architecture unspecified");
 	  case 2:
 	    return (TARGET_GLn_CACHE
-		    ? "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
+		    ? "global_atomic_swap<X>\t%0, %A1, %2%O1 %G1\;"
 		      "s_waitcnt\tvmcnt(0)\;buffer_gl1_inv\;buffer_gl0_inv"
 		    : TARGET_WBINVL1_CACHE
-            ? "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
+            ? "global_atomic_swap<X>\t%0, %A1, %2%O1 %G1\;"
 		      "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol"
+	    : TARGET_TARGET_SC_CACHE
+            ? "global_atomic_swap<X>\t%0, %A1, %2%O1 %G1\;"
+		      "s_waitcnt\tvmcnt(0)\;buffer_inv sc1"
             : "error: cache architecture unspecified");
 	  }
 	break;
@@ -2311,20 +2331,27 @@
 		   "s_waitcnt\tlgkmcnt(0)";
 	  case 1:
 	    return (TARGET_GLn_CACHE
-		    ? "buffer_gl1_inv\;buffer_gl0_inv\;flat_atomic_swap<X>\t%0, %1, %2 glc\;"
+		    ? "buffer_gl1_inv\;buffer_gl0_inv\;flat_atomic_swap<X>\t%0, %1, %2 %G1\;"
 		      "s_waitcnt\t0"
 		    : TARGET_WBINVL1_CACHE
-            ? "buffer_wbinvl1_vol\;flat_atomic_swap<X>\t%0, %1, %2 glc\;"
+            ? "buffer_wbinvl1_vol\;flat_atomic_swap<X>\t%0, %1, %2 %G1\;"
+		      "s_waitcnt\t0"
+	    : TARGET_TARGET_SC_CACHE
+            ? "buffer_inv sc1\;flat_atomic_swap<X>\t%0, %1, %2 %G1\;"
 		      "s_waitcnt\t0"
             : "error: cache architecture unspecified");
 	  case 2:
 	    return (TARGET_GLn_CACHE
 		    ? "buffer_gl1_inv\;buffer_gl0_inv\;"
-		      "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
+		      "global_atomic_swap<X>\t%0, %A1, %2%O1 %G1\;"
 		      "s_waitcnt\tvmcnt(0)"
 		    : TARGET_WBINVL1_CACHE
             ? "buffer_wbinvl1_vol\;"
-		      "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
+		      "global_atomic_swap<X>\t%0, %A1, %2%O1 %G1\;"
+		      "s_waitcnt\tvmcnt(0)"
+	    : TARGET_TARGET_SC_CACHE
+            ? "buffer_inv sc1\;"
+		      "global_atomic_swap<X>\t%0, %A1, %2%O1 %G1\;"
 		      "s_waitcnt\tvmcnt(0)"
             : "error: cache architecture unspecified");
 	  }
@@ -2339,21 +2366,28 @@
 		   "s_waitcnt\tlgkmcnt(0)\;s_dcache_inv_vol";
 	  case 1:
 	    return (TARGET_GLn_CACHE
-		    ? "buffer_gl1_inv\;buffer_gl0_inv\;flat_atomic_swap<X>\t%0, %1, %2 glc\;"
+		    ? "buffer_gl1_inv\;buffer_gl0_inv\;flat_atomic_swap<X>\t%0, %1, %2 %G1\;"
 		      "s_waitcnt\t0\;buffer_gl1_inv\;buffer_gl0_inv"
 		    : TARGET_WBINVL1_CACHE
-            ? "buffer_wbinvl1_vol\;flat_atomic_swap<X>\t%0, %1, %2 glc\;"
+            ? "buffer_wbinvl1_vol\;flat_atomic_swap<X>\t%0, %1, %2 %G1\;"
 		      "s_waitcnt\t0\;buffer_wbinvl1_vol"
+	    : TARGET_TARGET_SC_CACHE
+            ? "buffer_inv sc1\;flat_atomic_swap<X>\t%0, %1, %2 %G1\;"
+		      "s_waitcnt\t0\;buffer_inv sc1"
             : "error: cache architecture unspecified");
 	  case 2:
 	    return (TARGET_GLn_CACHE
 		    ? "buffer_gl1_inv\;buffer_gl0_inv\;"
-		      "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
+		      "global_atomic_swap<X>\t%0, %A1, %2%O1 %G1\;"
 		      "s_waitcnt\tvmcnt(0)\;buffer_gl1_inv\;buffer_gl0_inv"
 		    : TARGET_WBINVL1_CACHE
             ? "buffer_wbinvl1_vol\;"
-		      "global_atomic_swap<X>\t%0, %A1, %2%O1 glc\;"
+		      "global_atomic_swap<X>\t%0, %A1, %2%O1 %G1\;"
 		      "s_waitcnt\tvmcnt(0)\;buffer_wbinvl1_vol"
+	    : TARGET_TARGET_SC_CACHE
+            ? "buffer_inv sc1\;"
+		      "global_atomic_swap<X>\t%0, %A1, %2%O1 %G1\;"
+		      "s_waitcnt\tvmcnt(0)\;buffer_inv sc1"
             : "error: cache architecture unspecified");
 	  }
 	break;
@@ -2362,7 +2396,6 @@
   }
   [(set_attr "type" "smem,flat,flat")
    (set_attr "length" "28")
-   (set_attr "gcn_version" "gcn5,*,gcn5")
    (set_attr "rdna" "no,*,*")])
 
 ;; }}}
diff --git a/gcc/config/gcn/gcn.opt b/gcc/config/gcn/gcn.opt
index 3317c49..99d6aeb 100644
--- a/gcc/config/gcn/gcn.opt
+++ b/gcc/config/gcn/gcn.opt
@@ -1,6 +1,6 @@
 ; Options for the GCN port of the compiler.
 
-; Copyright (C) 2016-2024 Free Software Foundation, Inc.
+; Copyright (C) 2016-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
@@ -21,46 +21,12 @@
 HeaderInclude
 config/gcn/gcn-opts.h
 
-Enum
-Name(gpu_type) Type(enum processor_type)
-GCN GPU type to use:
-
-EnumValue
-Enum(gpu_type) String(fiji) Value(PROCESSOR_FIJI)
-
-EnumValue
-Enum(gpu_type) String(gfx900) Value(PROCESSOR_VEGA10)
-
-EnumValue
-Enum(gpu_type) String(gfx906) Value(PROCESSOR_VEGA20)
-
-EnumValue
-Enum(gpu_type) String(gfx908) Value(PROCESSOR_GFX908)
-
-EnumValue
-Enum(gpu_type) String(gfx90a) Value(PROCESSOR_GFX90a)
-
-EnumValue
-Enum(gpu_type) String(gfx90c) Value(PROCESSOR_GFX90c)
-
-EnumValue
-Enum(gpu_type) String(gfx1030) Value(PROCESSOR_GFX1030)
-
-EnumValue
-Enum(gpu_type) String(gfx1036) Value(PROCESSOR_GFX1036)
-
-EnumValue
-Enum(gpu_type) String(gfx1100) Value(PROCESSOR_GFX1100)
-
-EnumValue
-Enum(gpu_type) String(gfx1103) Value(PROCESSOR_GFX1103)
-
 march=
-Target RejectNegative Negative(march=) Joined ToLower Enum(gpu_type) Var(gcn_arch) Init(PROCESSOR_FIJI)
+Target RejectNegative Negative(march=) Joined ToLower Enum(gpu_type) Var(gcn_arch) Init(PROCESSOR_GFX900)
 Specify the name of the target GPU.
 
 mtune=
-Target RejectNegative Negative(mtune=) Joined ToLower Enum(gpu_type) Var(gcn_tune) Init(PROCESSOR_FIJI)
+Target RejectNegative Negative(mtune=) Joined ToLower Enum(gpu_type) Var(gcn_tune) Init(PROCESSOR_GFX900)
 Specify the name of the target GPU.
 
 m32
@@ -117,7 +83,7 @@ Target RejectNegative Joined ToLower Enum(hsaco_attr_type) Var(flag_xnack) Init(
 Compile for devices requiring XNACK enabled. Default \"any\" if USM is supported.
 
 msram-ecc=
-Target RejectNegative Joined ToLower Enum(hsaco_attr_type) Var(flag_sram_ecc) Init(HSACO_ATTR_ANY)
+Target RejectNegative Joined ToLower Enum(hsaco_attr_type) Var(flag_sram_ecc) Init(HSACO_ATTR_DEFAULT)
 Compile for devices with the SRAM ECC feature enabled, or not. Default \"any\".
 
 -param=gcn-preferred-vectorization-factor=
@@ -135,3 +101,11 @@ Enum(gcn_preferred_vectorization_factor) String(32) Value(32)
 
 EnumValue
 Enum(gcn_preferred_vectorization_factor) String(64) Value(64)
+
+mfake-exceptions
+Target Var(fake_exceptions) Init(0) Undocumented
+; With '-mfake-exceptions' enabled, the user-visible behavior in presence of
+; exception handling constructs changes such that the compile-time
+; 'sorry, unimplemented: exception handling not supported' is skipped, code
+; generation proceeds, and instead, exception handling constructs 'abort' at
+; run time.  (..., or don't, if they're in dead code.)
diff --git a/gcc/config/gcn/gen-gcn-device-macros.awk b/gcc/config/gcn/gen-gcn-device-macros.awk
new file mode 100644
index 0000000..d227e6f
--- /dev/null
+++ b/gcc/config/gcn/gen-gcn-device-macros.awk
@@ -0,0 +1,131 @@
+# Generate $objdir/gcn-device-macros.h from gcn-devices.def
+#
+# Copyright (C) 2024-2025 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+BEGIN {
+  FS= "[(,] *"
+
+  print "/* Generated by gen-gcn-device-macros.awk from gcn-devices.def."
+  print "   Do not edit.  */"
+
+  list=""
+  generic_list=""
+}
+
+/^GCN_DEVICE\(/ {
+  gfx=$2
+  NAME=$3
+  list=(list " OPT_" NAME)
+
+  print ""
+  next
+}
+
+/XNACK default.*HSACO_ATTR_UNSUPPORTED/ {
+  printf "\n#define XNACK_%s \"march=%s:;\"", NAME, gfx
+  next
+}
+
+/XNACK default.*HSACO_ATTR_OFF/ {
+  printf "\n#define XNACK_%s \"march=%s:%{!mxnack*|mxnack=default|mxnack=off:-mattr=-xnack;mxnack=on:-mattr=+xnack};\"", NAME, gfx
+  next
+}
+
+/XNACK default.*HSACO_ATTR_ANY/ {
+  printf "\n#define XNACK_%s \"march=%s:%{mxnack=off:-mattr=-xnack;mxnack=on:-mattr=+xnack};\"", NAME, gfx
+  next
+}
+
+/XNACK default.*HSACO/ {
+  print FILENAME ":" NR ": error: unhandled HSACO default at line (gen-gcn-device-macros.awk)" > "/dev/stderr"
+  exit 1
+}
+
+/SRAM_ECC default.*HSACO_ATTR_UNSUPPORTED/ {
+  printf "\n#define SRAM_%s \"march=%s:;\"", NAME, gfx
+  next
+}
+
+/SRAM_ECC default.*HSACO_ATTR_ANY/ {
+  printf "\n#define SRAM_%s \"march=%s:%{msram-ecc=on:-mattr=+sramecc;msram-ecc=off:-mattr=-sramecc};\"", NAME, gfx
+  next
+}
+
+/SRAM_ECC default.*HSACO/ {
+  print FILENAME ":" NR ": error: unhandled HSACO default at line (gen-gcn-device-macros.awk)" > "/dev/stderr"
+  exit 1
+}
+
+/WAVE64 mode.*HSACO_ATTR_UNSUPPORTED/ {
+  printf "\n#define WAVE64_%s \"march=%s:;\"", NAME, gfx
+  next
+}
+
+/WAVE64 mode.*HSACO_ATTR_ON/ {
+  printf "\n#define WAVE64_%s \"march=%s:-mattr=+wavefrontsize64;\"", NAME, gfx
+  next
+}
+
+/WAVE64 mode.*HSACO/ {
+  print FILENAME ":" NR ": error: unhandled HSACO default at line (gen-gcn-device-macros.awk)" > "/dev/stderr"
+  exit 1
+}
+
+/CU mode.*HSACO_ATTR_UNSUPPORTED/ {
+  printf "\n#define CU_%s \"march=%s:;\"", NAME, gfx
+  next
+}
+
+/CU mode.*HSACO_ATTR_ON/ {
+  printf "\n#define CU_%s \"march=%s:-mattr=+cumode;\"", NAME, gfx
+  next
+}
+
+/CU mode.*HSACO/ {
+  print FILENAME ":" NR ": error: unhandled HSACO default at line (gen-gcn-device-macros.awk)" > "/dev/stderr"
+  exit 1
+}
+
+/Generic code obj version/ {
+  match($0,/Generic code obj version[^\/]*\/[\t ]*([0-9]+)/,m)
+  if (m[1] > 0) {
+     printf "\n#define GENERIC_%s \"march=%s:--amdhsa-code-object-version=6;\"", NAME, gfx
+     generic_list=(generic_list " GENERIC_" NAME)
+  }
+  next
+}
+
+# ABI Version: In principle, the LLVM default would work. However,
+# however, when debugging symbols are turned on, mkoffload.cc
+# writes a new AMD GPU object file and the ABI version needs to be the
+# same. - LLVM <= 17 defaults to 4 while LLVM >= 18 defaults to 5.
+# GCC supports LLVM >= 13.0.1 and only LLVM >= 14 supports version 5.
+# Code object V6 is supported since LLVM 19.
+#
+# Keep in sync with 'amdhsa.version' in gcn.cc
+#
+END {
+  print ""
+  print ""
+  printf "#define ABI_VERSION_OPT \"%%{\"%s \"!march=*|march=*:--amdhsa-code-object-version=4} \"\n", generic_list
+  printf "#define XNACKOPT \"%%{\"%s \":%%eexpected march\\n} \"\n", gensub (/OPT/, "XNACK", "g", list)
+  printf "#define SRAMOPT \"%%{\"%s \":%%eexpected march\\n} \"\n", gensub (/OPT/, "SRAM", "g", list)
+  printf "#define WAVE64OPT \"%%{\"%s \":%%eexpected march\\n} \"\n", gensub (/OPT/, "WAVE64", "g", list)
+  printf "#define CUMODEOPT \"%%{\"%s \":%%eexpected march\\n} \"\n", gensub (/OPT/, "CU", "g", list)
+}
diff --git a/gcc/config/gcn/gen-opt-tables.awk b/gcc/config/gcn/gen-opt-tables.awk
new file mode 100644
index 0000000..5dccb23
--- /dev/null
+++ b/gcc/config/gcn/gen-opt-tables.awk
@@ -0,0 +1,55 @@
+# Generate gcn-tables.opt from gcn-devices.def
+#
+# Copyright (C) 2024-2025 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+BEGIN {
+  FS= "[(,] *"
+
+  print "; -*- buffer-read-only: t -*-"
+  print "; Generated automatically by gen-opt-tables.awk from gcn-devices.def."
+  print "; Do not edit."
+  print ""
+  print "; Copyright (C) 2024-2025 Free Software Foundation, Inc."
+  print ""
+  print "; This file is part of GCC."
+  print ""
+  print "; GCC is free software; you can redistribute it and/or modify"
+  print "; it under the terms of the GNU General Public License as"
+  print "; published by the Free Software Foundation; either version 3,"
+  print "; or (at your option) any later version."
+  print ""
+  print "; GCC is distributed in the hope that it will be useful,"
+  print "; but WITHOUT ANY WARRANTY; without even the implied warranty of"
+  print "; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the"
+  print "; GNU General Public License for more details."
+  print ""
+  print "; You should have received a copy of the GNU General Public"
+  print "; License along with GCC; see the file COPYING3.  If not see"
+  print "; <http://www.gnu.org/licenses/>."
+  print ""
+  print "Enum"
+  print "Name(gpu_type) Type(enum processor_type)"
+  print "GCN GPU type to use:"
+}
+
+/^GCN_DEVICE\(/ {
+  print ""
+  print "EnumValue"
+  print "Enum(gpu_type) String(" $2 ") Value(PROCESSOR_" $3 ")"
+}
diff --git a/gcc/config/gcn/mkoffload.cc b/gcc/config/gcn/mkoffload.cc
index 810298a..b284ff4 100644
--- a/gcc/config/gcn/mkoffload.cc
+++ b/gcc/config/gcn/mkoffload.cc
@@ -1,6 +1,6 @@
 /* Offload image generation tool for AMD GCN.
 
-   Copyright (C) 2014-2024 Free Software Foundation, Inc.
+   Copyright (C) 2014-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -48,27 +48,17 @@
 #define ELFABIVERSION_AMDGPU_HSA_V3 1
 #undef  ELFABIVERSION_AMDGPU_HSA_V4
 #define ELFABIVERSION_AMDGPU_HSA_V4 2
-
-#undef  EF_AMDGPU_MACH_AMDGCN_GFX803
-#define EF_AMDGPU_MACH_AMDGCN_GFX803 0x2a
-#undef  EF_AMDGPU_MACH_AMDGCN_GFX900
-#define EF_AMDGPU_MACH_AMDGCN_GFX900 0x2c
-#undef  EF_AMDGPU_MACH_AMDGCN_GFX906
-#define EF_AMDGPU_MACH_AMDGCN_GFX906 0x2f
-#undef  EF_AMDGPU_MACH_AMDGCN_GFX908
-#define EF_AMDGPU_MACH_AMDGCN_GFX908 0x30
-#undef  EF_AMDGPU_MACH_AMDGCN_GFX90a
-#define EF_AMDGPU_MACH_AMDGCN_GFX90a 0x3f
-#undef  EF_AMDGPU_MACH_AMDGCN_GFX90c
-#define EF_AMDGPU_MACH_AMDGCN_GFX90c 0x32
-#undef  EF_AMDGPU_MACH_AMDGCN_GFX1030
-#define EF_AMDGPU_MACH_AMDGCN_GFX1030 0x36
-#undef  EF_AMDGPU_MACH_AMDGCN_GFX1036
-#define EF_AMDGPU_MACH_AMDGCN_GFX1036 0x45
-#undef  EF_AMDGPU_MACH_AMDGCN_GFX1100
-#define EF_AMDGPU_MACH_AMDGCN_GFX1100 0x41
-#undef  EF_AMDGPU_MACH_AMDGCN_GFX1103
-#define EF_AMDGPU_MACH_AMDGCN_GFX1103 0x44
+#undef  ELFABIVERSION_AMDGPU_HSA_V6
+#define ELFABIVERSION_AMDGPU_HSA_V6 4
+
+/* Extract the EF_AMDGPU_MACH_AMDGCN_GFXnnn from the def file.  */
+enum elf_arch_code {
+  EF_AMDGPU_MACH_AMDGCN_NONE = -1,  /* For generic handling.  */
+#define GCN_DEVICE(name, NAME, ELF_ARCH, ...) \
+  EF_AMDGPU_MACH_AMDGCN_ ## NAME = ELF_ARCH,
+#include "gcn-devices.def"
+#undef GCN_DEVICE
+};
 
 #define EF_AMDGPU_FEATURE_XNACK_V4	0x300  /* Mask.  */
 #define EF_AMDGPU_FEATURE_XNACK_UNSUPPORTED_V4	0x000
@@ -82,6 +72,9 @@
 #define EF_AMDGPU_FEATURE_SRAMECC_OFF_V4	0x800
 #define EF_AMDGPU_FEATURE_SRAMECC_ON_V4		0xc00
 
+#define EF_AMDGPU_GENERIC_VERSION_V		0xff000000  /* Mask.  */
+#define EF_AMDGPU_GENERIC_VERSION_OFFSET	24
+
 #define SET_XNACK_ON(VAR) VAR = ((VAR & ~EF_AMDGPU_FEATURE_XNACK_V4) \
 				 | EF_AMDGPU_FEATURE_XNACK_ON_V4)
 #define SET_XNACK_ANY(VAR) VAR = ((VAR & ~EF_AMDGPU_FEATURE_XNACK_V4) \
@@ -113,6 +106,12 @@
 			       == EF_AMDGPU_FEATURE_SRAMECC_ON_V4)
 #define TEST_SRAM_ECC_UNSET(VAR) ((VAR & EF_AMDGPU_FEATURE_SRAMECC_V4) == 0)
 
+#define GET_GENERIC_VERSION(VAR) ((VAR & EF_AMDGPU_GENERIC_VERSION_V) \
+				  >> EF_AMDGPU_GENERIC_VERSION_OFFSET)
+#define SET_GENERIC_VERSION(VAR,GEN_VER) \
+  VAR = ((VAR & ~EF_AMDGPU_GENERIC_VERSION_V) \
+	 | (GEN_VER << EF_AMDGPU_GENERIC_VERSION_OFFSET))
+
 #ifndef R_AMDGPU_NONE
 #define R_AMDGPU_NONE		0
 #define R_AMDGPU_ABS32_LO	1	/* (S + A) & 0xFFFFFFFF  */
@@ -135,9 +134,10 @@ static const char *gcn_dumpbase;
 static struct obstack files_to_cleanup;
 
 enum offload_abi offload_abi = OFFLOAD_ABI_UNSET;
-uint32_t elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX900;  // Default GPU architecture.
-uint32_t elf_flags = EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4;
+const char *offload_abi_host_opts = NULL;
 
+enum elf_arch_code elf_arch = EF_AMDGPU_MACH_AMDGCN_GFX900;  // Default GPU architecture.
+uint32_t elf_flags = EF_AMDGPU_FEATURE_SRAMECC_UNSUPPORTED_V4;
 static int gcn_stack_size = 0;  /* Zero means use default.  */
 
 /* Delete tempfiles.  */
@@ -182,44 +182,6 @@ xputenv (const char *string)
   putenv (CONST_CAST (char *, string));
 }
 
-/* Read the whole input file.  It will be NUL terminated (but
-   remember, there could be a NUL in the file itself.  */
-
-static const char *
-read_file (FILE *stream, size_t *plen)
-{
-  size_t alloc = 16384;
-  size_t base = 0;
-  char *buffer;
-
-  if (!fseek (stream, 0, SEEK_END))
-    {
-      /* Get the file size.  */
-      long s = ftell (stream);
-      if (s >= 0)
-	alloc = s + 100;
-      fseek (stream, 0, SEEK_SET);
-    }
-  buffer = XNEWVEC (char, alloc);
-
-  for (;;)
-    {
-      size_t n = fread (buffer + base, 1, alloc - base - 1, stream);
-
-      if (!n)
-	break;
-      base += n;
-      if (base + 1 == alloc)
-	{
-	  alloc *= 2;
-	  buffer = XRESIZEVEC (char, buffer, alloc);
-	}
-    }
-  buffer[base] = 0;
-  *plen = base;
-  return buffer;
-}
-
 /* Parse STR, saving found tokens into PVALUES and return their number.
    Tokens are assumed to be delimited by ':'.  */
 
@@ -352,18 +314,14 @@ copy_early_debug_info (const char *infile, const char *outfile)
   /* We only support host relocations of x86_64, for now.  */
   gcc_assert (ehdr.e_machine == EM_X86_64);
 
-  /* Fiji devices use HSACOv3 regardless of the assembler.  */
-  uint32_t elf_flags_actual = (elf_arch == EF_AMDGPU_MACH_AMDGCN_GFX803
-			       ? 0 : elf_flags);
-
   /* Patch the correct elf architecture flag into the file.  */
   ehdr.e_ident[7] = ELFOSABI_AMDGPU_HSA;
-  ehdr.e_ident[8] = (elf_arch == EF_AMDGPU_MACH_AMDGCN_GFX803
-		     ? ELFABIVERSION_AMDGPU_HSA_V3
+  ehdr.e_ident[8] = (GET_GENERIC_VERSION (elf_flags)
+		     ? ELFABIVERSION_AMDGPU_HSA_V6
 		     : ELFABIVERSION_AMDGPU_HSA_V4);
   ehdr.e_type = ET_REL;
   ehdr.e_machine = EM_AMDGPU;
-  ehdr.e_flags = elf_arch | elf_flags_actual;
+  ehdr.e_flags = elf_arch | elf_flags;
 
   /* Load the section headers so we can walk them later.  */
   Elf64_Shdr *sections = (Elf64_Shdr *)xmalloc (sizeof (Elf64_Shdr)
@@ -657,9 +615,11 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
   struct oaccdims *dims = XOBFINISH (&dims_os, struct oaccdims *);
   struct regcount *regcounts = XOBFINISH (&regcounts_os, struct regcount *);
 
-  fprintf (cfile, "#include <stdlib.h>\n");
-  fprintf (cfile, "#include <stdint.h>\n");
-  fprintf (cfile, "#include <stdbool.h>\n\n");
+  if (gcn_stack_size)
+    {
+      fprintf (cfile, "#include <stdlib.h>\n");
+      fprintf (cfile, "#include <stdbool.h>\n\n");
+    }
 
   fprintf (cfile, "static const int gcn_num_vars = %d;\n\n", var_count);
   fprintf (cfile, "static const int gcn_num_ind_funcs = %d;\n\n", ind_fn_count);
@@ -725,35 +685,28 @@ process_asm (FILE *in, FILE *out, FILE *cfile)
 /* Embed an object file into a C source file.  */
 
 static void
-process_obj (FILE *in, FILE *cfile, uint32_t omp_requires)
+process_obj (const char *fname_in, FILE *cfile, uint32_t omp_requires)
 {
-  size_t len = 0;
-  const char *input = read_file (in, &len);
-
   /* Dump out an array containing the binary.
-     FIXME: do this with objcopy.  */
-  fprintf (cfile, "static unsigned char gcn_code[] = {");
-  for (size_t i = 0; i < len; i += 17)
-    {
-      fprintf (cfile, "\n\t");
-      for (size_t j = i; j < i + 17 && j < len; j++)
-	fprintf (cfile, "%3u,", (unsigned char) input[j]);
-    }
-  fprintf (cfile, "\n};\n\n");
+     If the file is empty, a parse error is shown as the argument to is_empty
+     is an undeclared identifier.  */
+  fprintf (cfile,
+	   "static unsigned char gcn_code[] = {\n"
+	   "#embed \"%s\" if_empty (error_file_is_empty)\n"
+	   "};\n\n", fname_in);
 
   fprintf (cfile,
 	   "static const struct gcn_image {\n"
-	   "  size_t size;\n"
+	   "  __SIZE_TYPE__ size;\n"
 	   "  void *image;\n"
 	   "} gcn_image = {\n"
-	   "  %zu,\n"
+	   "  sizeof(gcn_code),\n"
 	   "  gcn_code\n"
-	   "};\n\n",
-	   len);
+	   "};\n\n");
 
   fprintf (cfile,
 	   "static const struct gcn_data {\n"
-	   "  uintptr_t omp_requires_mask;\n"
+	   "  __UINTPTR_TYPE__ omp_requires_mask;\n"
 	   "  const struct gcn_image *gcn_image;\n"
 	   "  unsigned kernel_count;\n"
 	   "  const struct hsa_kernel_description *kernel_infos;\n"
@@ -827,17 +780,10 @@ compile_native (const char *infile, const char *outfile, const char *compiler,
   obstack_ptr_grow (&argv_obstack, gcn_dumpbase);
   obstack_ptr_grow (&argv_obstack, "-dumpbase-ext");
   obstack_ptr_grow (&argv_obstack, ".c");
-  switch (offload_abi)
-    {
-    case OFFLOAD_ABI_LP64:
-      obstack_ptr_grow (&argv_obstack, "-m64");
-      break;
-    case OFFLOAD_ABI_ILP32:
-      obstack_ptr_grow (&argv_obstack, "-m32");
-      break;
-    default:
-      gcc_unreachable ();
-    }
+  if (!offload_abi_host_opts)
+    fatal_error (input_location,
+		 "%<-foffload-abi-host-opts%> not specified");
+  obstack_ptr_grow (&argv_obstack, offload_abi_host_opts);
   obstack_ptr_grow (&argv_obstack, infile);
   obstack_ptr_grow (&argv_obstack, "-c");
   obstack_ptr_grow (&argv_obstack, "-o");
@@ -850,30 +796,17 @@ compile_native (const char *infile, const char *outfile, const char *compiler,
   obstack_free (&argv_obstack, NULL);
 }
 
-static int
+static enum elf_arch_code
 get_arch (const char *str, const char *with_arch_str)
 {
-  if (strcmp (str, "fiji") == 0)
-    return EF_AMDGPU_MACH_AMDGCN_GFX803;
-  else if (strcmp (str, "gfx900") == 0)
-    return EF_AMDGPU_MACH_AMDGCN_GFX900;
-  else if (strcmp (str, "gfx906") == 0)
-    return EF_AMDGPU_MACH_AMDGCN_GFX906;
-  else if (strcmp (str, "gfx908") == 0)
-    return EF_AMDGPU_MACH_AMDGCN_GFX908;
-  else if (strcmp (str, "gfx90a") == 0)
-    return EF_AMDGPU_MACH_AMDGCN_GFX90a;
-  else if (strcmp (str, "gfx90c") == 0)
-    return EF_AMDGPU_MACH_AMDGCN_GFX90c;
-  else if (strcmp (str, "gfx1030") == 0)
-    return EF_AMDGPU_MACH_AMDGCN_GFX1030;
-  else if (strcmp (str, "gfx1036") == 0)
-    return EF_AMDGPU_MACH_AMDGCN_GFX1036;
-  else if (strcmp (str, "gfx1100") == 0)
-    return EF_AMDGPU_MACH_AMDGCN_GFX1100;
-  else if (strcmp (str, "gfx1103") == 0)
-    return EF_AMDGPU_MACH_AMDGCN_GFX1103;
-
+  /* Use the def file to map the name to the elf_arch_code.  */
+  if (!str) ;
+#define GCN_DEVICE(name, NAME, ELF, ...) \
+  else if (strcmp (str, #name) == 0) \
+    return (enum elf_arch_code) ELF;
+#include "gcn-devices.def"
+
+  /* else */
   error ("unrecognized argument in option %<-march=%s%>", str);
 
   /* The suggestions are based on the configured multilib support; the compiler
@@ -905,7 +838,91 @@ get_arch (const char *str, const char *with_arch_str)
 
   exit (FATAL_EXIT_CODE);
 
-  return 0;
+  return EF_AMDGPU_MACH_AMDGCN_NONE;
+}
+
+static const char*
+get_arch_name (enum elf_arch_code arch_code)
+{
+  switch (arch_code)
+    {
+#define GCN_DEVICE(name, NAME, ELF, ...) \
+    case EF_AMDGPU_MACH_AMDGCN_ ## NAME: \
+      return #name;
+#include "../../gcc/config/gcn/gcn-devices.def"
+    default: return NULL;
+    }
+}
+
+/* If an generic arch exists and for the chosen arch no (multi)lib is
+   available, print a fatal error - and suggest to compile for the generic
+   version instead.  */
+
+static void
+check_for_missing_lib (enum elf_arch_code elf_arch,
+		       enum elf_arch_code default_arch)
+{
+  enum elf_arch_code generic_arch;
+  switch (elf_arch)
+    {
+#define GCN_DEVICE(name, NAME, ELF, ISA, XNACK, SRAM, WAVE64, CU, \
+		   MAX_ISA_VGPRS, GEN_VER, ARCH_FAM, GEN_MACH, ...) \
+    case EF_AMDGPU_MACH_AMDGCN_ ## NAME: \
+      generic_arch = EF_AMDGPU_MACH_AMDGCN_ ## GEN_MACH; break;
+#include "../../gcc/config/gcn/gcn-devices.def"
+    default: generic_arch = EF_AMDGPU_MACH_AMDGCN_NONE;
+    }
+
+  /* If not generic or the default arch, the library version exists.  */
+  if (generic_arch == EF_AMDGPU_MACH_AMDGCN_NONE || elf_arch == default_arch)
+    return;
+
+  /* Search gcn_arch in the multilib config, which might look like
+     "march=gfx900/march=gfx906".  */
+  const char *p = multilib_options;
+  const char *q = NULL;
+  const char *isa_name = get_arch_name (elf_arch);
+  while ((q = strstr (p, isa_name)) != NULL)
+    {
+      if (multilib_options + strlen ("march=") <= q
+	  && startswith (&q[-strlen ("march=")], "march="))
+	{
+	  const char r = q[strlen (isa_name)];
+	  if (r != '\0' && r != '/')
+	    continue;
+	  break;
+	}
+      p++;
+    }
+
+  /* Specified -march= exists in the multilib.  */
+  if (q != NULL)
+    return;
+
+  /* If no lib, try to find one for the generic arch.  */
+  const char *gen_name = get_arch_name (generic_arch);
+  if (generic_arch != default_arch)
+    {
+      p = multilib_options;
+      while ((q = strstr (p, gen_name)) != NULL)
+	{
+	  if (multilib_options + strlen ("march=") <= q
+	      && startswith (&q[-strlen ("march=")], "march="))
+	    {
+	      const char r = q[strlen (gen_name)];
+	      if (r != '\0' && r != '/')
+		continue;
+	      break;
+	    }
+	  p++;
+	}
+      if (q == NULL)
+	return;
+    }
+  fatal_error (UNKNOWN_LOCATION,
+	       "GCC was built without library support for %<-march=%s%>; "
+	       "consider compiling for the associated generic architecture "
+	       "%<-march=%s%> instead", isa_name, gen_name);
 }
 
 int
@@ -930,6 +947,7 @@ main (int argc, char **argv)
 	elf_arch = get_arch (configure_default_options[0].value, NULL);
 	break;
       }
+  enum elf_arch_code default_arch = elf_arch;
 
   obstack_init (&files_to_cleanup);
   if (atexit (mkoffload_cleanup) != 0)
@@ -1008,6 +1026,15 @@ main (int argc, char **argv)
 			 "unrecognizable argument of option %<" STR "%>");
 	}
 #undef STR
+      else if (startswith (argv[i], "-foffload-abi-host-opts="))
+	{
+	  if (offload_abi_host_opts)
+	    fatal_error (input_location,
+			 "%<-foffload-abi-host-opts%> specified "
+			 "multiple times");
+	  offload_abi_host_opts
+	    = argv[i] + strlen ("-foffload-abi-host-opts=");
+	}
       else if (strcmp (argv[i], "-fopenmp") == 0)
 	fopenmp = true;
       else if (strcmp (argv[i], "-fopenacc") == 0)
@@ -1043,7 +1070,8 @@ main (int argc, char **argv)
 #undef STR
       /* Translate host into offloading libraries.  */
       else if (strcmp (argv[i], "-l_GCC_gfortran") == 0
-	       || strcmp (argv[i], "-l_GCC_m") == 0)
+	       || strcmp (argv[i], "-l_GCC_m") == 0
+	       || strcmp (argv[i], "-l_GCC_stdc++") == 0)
 	{
 	  /* Elide '_GCC_'.  */
 	  size_t i_dst = strlen ("-l");
@@ -1055,6 +1083,8 @@ main (int argc, char **argv)
 	}
     }
 
+  check_for_missing_lib (elf_arch, default_arch);
+
   if (!(fopenacc ^ fopenmp))
     fatal_error (input_location,
 		 "either %<-fopenacc%> or %<-fopenmp%> must be set");
@@ -1072,46 +1102,50 @@ main (int argc, char **argv)
       gcc_unreachable ();
     }
 
-  /* This must match gcn-hsa.h's settings for NO_XNACK, NO_SRAM_ECC
-     and ASM_SPEC.  */
+  /* Set the default ELF flags for XNACK.  */
   switch (elf_arch)
     {
-    case EF_AMDGPU_MACH_AMDGCN_GFX803:
-    case EF_AMDGPU_MACH_AMDGCN_GFX1030:
-    case EF_AMDGPU_MACH_AMDGCN_GFX1036:
-    case EF_AMDGPU_MACH_AMDGCN_GFX1100:
-    case EF_AMDGPU_MACH_AMDGCN_GFX1103:
-      SET_XNACK_UNSET (elf_flags);
-      SET_SRAM_ECC_UNSET (elf_flags);
-      break;
-    case EF_AMDGPU_MACH_AMDGCN_GFX900:
-      SET_XNACK_OFF (elf_flags);
-      SET_SRAM_ECC_UNSET (elf_flags);
-      break;
-    case EF_AMDGPU_MACH_AMDGCN_GFX906:
-      SET_XNACK_OFF (elf_flags);
-      SET_SRAM_ECC_ANY (elf_flags);
-      break;
-    case EF_AMDGPU_MACH_AMDGCN_GFX908:
-      SET_XNACK_OFF (elf_flags);
-      if (TEST_SRAM_ECC_UNSET (elf_flags))
-	SET_SRAM_ECC_ANY (elf_flags);
-      break;
-    case EF_AMDGPU_MACH_AMDGCN_GFX90a:
-      if (TEST_XNACK_UNSET (elf_flags))
-	SET_XNACK_ANY (elf_flags);
-      if (TEST_SRAM_ECC_UNSET (elf_flags))
-	SET_SRAM_ECC_ANY (elf_flags);
-      break;
-    case EF_AMDGPU_MACH_AMDGCN_GFX90c:
-      if (TEST_XNACK_UNSET (elf_flags))
-	SET_XNACK_ANY (elf_flags);
-      SET_SRAM_ECC_UNSET (elf_flags);
-      break;
+#define GCN_DEVICE(name, NAME, ELF, ISA, XNACK, SRAM, ...) \
+    case ELF: XNACK; break;
+#define HSACO_ATTR_UNSUPPORTED SET_XNACK_UNSET (elf_flags)
+#define HSACO_ATTR_OFF SET_XNACK_OFF (elf_flags)
+#define HSACO_ATTR_ANY \
+      if (TEST_XNACK_UNSET (elf_flags)) SET_XNACK_ANY (elf_flags)
+#include "gcn-devices.def"
+#undef HSACO_ATTR_UNSUPPORTED
+#undef HSACO_ATTR_OFF
+#undef HSACO_ATTR_ANY
     default:
       fatal_error (input_location, "unhandled architecture");
     }
 
+  /* Set the default ELF flags for SRAM_ECC.  */
+  switch (elf_arch)
+    {
+#define GCN_DEVICE(name, NAME, ELF, ISA, XNACK, SRAM, ...) \
+    case ELF: SRAM; break;
+#define HSACO_ATTR_UNSUPPORTED SET_SRAM_ECC_UNSET (elf_flags)
+#define HSACO_ATTR_OFF SET_SRAM_ECC_OFF (elf_flags)
+#define HSACO_ATTR_ANY \
+      if (TEST_SRAM_ECC_UNSET (elf_flags)) SET_SRAM_ECC_ANY (elf_flags)
+#include "gcn-devices.def"
+#undef HSACO_ATTR_UNSUPPORTED
+#undef HSACO_ATTR_OFF
+#undef HSACO_ATTR_ANY
+    default:
+      fatal_error (input_location, "unhandled architecture");
+    }
+
+  /* Set the generic version.  */
+  switch (elf_arch)
+    {
+#define GCN_DEVICE(name, NAME, ELF, ISA, XNACK, SRAMECC, WAVE64, CU, VGPRS, GEN_VER, ...) \
+    case ELF: if (GEN_VER) SET_GENERIC_VERSION (elf_flags, GEN_VER); break;
+#include "gcn-devices.def"
+#undef GCN_DEVICE
+    case EF_AMDGPU_MACH_AMDGCN_NONE: gcc_unreachable ();
+    }
+
   /* Build arguments for compiler pass.  */
   struct obstack cc_argv_obstack;
   obstack_init (&cc_argv_obstack);
@@ -1126,6 +1160,9 @@ main (int argc, char **argv)
   obstack_ptr_grow (&cc_argv_obstack, "-xlto");
   if (fopenmp)
     obstack_ptr_grow (&cc_argv_obstack, "-mgomp");
+  /* The host code may contain exception handling constructs.
+     Handle these as good as we can.  */
+  obstack_ptr_grow (&cc_argv_obstack, "-mfake-exceptions");
 
   for (int ix = 1; ix != argc; ix++)
     {
@@ -1312,13 +1349,7 @@ main (int argc, char **argv)
       fork_execute (ld_argv[0], CONST_CAST (char **, ld_argv), true, ".ld_args");
       obstack_free (&ld_argv_obstack, NULL);
 
-      in = fopen (gcn_o_name, "r");
-      if (!in)
-	fatal_error (input_location, "cannot open intermediate gcn obj file");
-
-      process_obj (in, cfile, omp_requires);
-
-      fclose (in);
+      process_obj (gcn_o_name, cfile, omp_requires);
 
       xputenv (concat ("GCC_EXEC_PREFIX=", execpath, NULL));
       xputenv (concat ("COMPILER_PATH=", cpath, NULL));
diff --git a/gcc/config/gcn/offload.h b/gcc/config/gcn/offload.h
index c9b9e9e..038d212 100644
--- a/gcc/config/gcn/offload.h
+++ b/gcc/config/gcn/offload.h
@@ -1,6 +1,6 @@
 /* Support for AMD GCN offloading.
 
-   Copyright (C) 2014-2024 Free Software Foundation, Inc.
+   Copyright (C) 2014-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/gcn/predicates.md b/gcc/config/gcn/predicates.md
index 3f59396..b7591a9 100644
--- a/gcc/config/gcn/predicates.md
+++ b/gcc/config/gcn/predicates.md
@@ -1,5 +1,5 @@
 ;; Predicate definitions for GCN.
-;; Copyright (C) 2016-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2016-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
@@ -228,3 +228,5 @@
   return gcn_stepped_zero_int_parallel_p (op, 1);
 })
 
+(define_predicate "maskload_else_operand"
+  (match_operand 0 "scratch_operand"))
diff --git a/gcc/config/gcn/t-gcn-hsa b/gcc/config/gcn/t-gcn-hsa
index 5fc34a1..0e08385 100644
--- a/gcc/config/gcn/t-gcn-hsa
+++ b/gcc/config/gcn/t-gcn-hsa
@@ -1,4 +1,4 @@
-#  Copyright (C) 2016-2024 Free Software Foundation, Inc.
+#  Copyright (C) 2016-2025 Free Software Foundation, Inc.
 #
 #  This file is free software; you can redistribute it and/or modify it under
 #  the terms of the GNU General Public License as published by the Free
@@ -49,3 +49,10 @@ gcn-tree.o: $(srcdir)/config/gcn/gcn-tree.cc
 	$(COMPILE) $<
 	$(POSTCOMPILE)
 ALL_HOST_OBJS += gcn-tree.o
+
+$(srcdir)/config/gcn/gcn-tables.opt: $(srcdir)/config/gcn/gcn-devices.def $(srcdir)/config/gcn/gen-opt-tables.awk
+	$(AWK) -f $(srcdir)/config/gcn/gen-opt-tables.awk $< > $@
+
+gcn-device-macros.h: $(srcdir)/config/gcn/gcn-devices.def $(srcdir)/config/gcn/gen-gcn-device-macros.awk
+	$(AWK) -f $(srcdir)/config/gcn/gen-gcn-device-macros.awk $< > $@
+generated_files += gcn-device-macros.h
diff --git a/gcc/config/gcn/t-omp-device b/gcc/config/gcn/t-omp-device
index b92e19b..cae6bd3 100644
--- a/gcc/config/gcn/t-omp-device
+++ b/gcc/config/gcn/t-omp-device
@@ -1,4 +1,4 @@
-omp-device-properties-gcn: $(srcdir)/config/gcn/gcn.cc
+omp-device-properties-gcn: $(srcdir)/config/gcn/gcn-devices.def
 	echo kind: gpu > $@
 	echo arch: amdgcn gcn >> $@
-	echo isa: fiji gfx803 gfx900 gfx906 gfx908 gfx90a gfx90c gfx1030 gfx1036 gfx1100 gfx1103 >> $@
+	echo isa: `grep -o -P '(?<=GCN_DEVICE\()gfx[0-9a-f]+(?=,)' $<` >> $@
diff --git a/gcc/config/glibc-c.cc b/gcc/config/glibc-c.cc
index 171fdd9..548b8ba 100644
--- a/gcc/config/glibc-c.cc
+++ b/gcc/config/glibc-c.cc
@@ -1,5 +1,5 @@
 /* C-family target hooks initializer for targets possibly using glibc.
-   Copyright (C) 2012-2024 Free Software Foundation, Inc.
+   Copyright (C) 2012-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/glibc-stdint.h b/gcc/config/glibc-stdint.h
index 5386288..603dd5ff 100644
--- a/gcc/config/glibc-stdint.h
+++ b/gcc/config/glibc-stdint.h
@@ -1,5 +1,5 @@
 /* Definitions for <stdint.h> types on systems using GNU libc or uClibc.
-   Copyright (C) 2008-2024 Free Software Foundation, Inc.
+   Copyright (C) 2008-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/gnu-d.cc b/gcc/config/gnu-d.cc
index cb41436..598e186 100644
--- a/gcc/config/gnu-d.cc
+++ b/gcc/config/gnu-d.cc
@@ -1,5 +1,5 @@
 /* GNU system support needed only by D front-end.
-   Copyright (C) 2017-2024 Free Software Foundation, Inc.
+   Copyright (C) 2017-2025 Free Software Foundation, Inc.
 
 GCC is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
diff --git a/gcc/config/gnu-user.h b/gcc/config/gnu-user.h
index f7eefda..4c4e31e 100644
--- a/gcc/config/gnu-user.h
+++ b/gcc/config/gnu-user.h
@@ -1,7 +1,7 @@
 /* Definitions for systems using, at least optionally, a GNU
    (glibc-based) userspace or other userspace with libc derived from
    glibc (e.g. uClibc) or for which similar specs are appropriate.
-   Copyright (C) 1995-2024 Free Software Foundation, Inc.
+   Copyright (C) 1995-2025 Free Software Foundation, Inc.
    Contributed by Eric Youngdale.
    Modified for stabs-in-ELF by H.J. Lu (hjl@lucon.org).
 
diff --git a/gcc/config/gnu-user.opt b/gcc/config/gnu-user.opt
index 0991922..a7bd65a 100644
--- a/gcc/config/gnu-user.opt
+++ b/gcc/config/gnu-user.opt
@@ -1,6 +1,6 @@
 ; Options for systems using gnu-user.h.
 
-; Copyright (C) 2011-2024 Free Software Foundation, Inc.
+; Copyright (C) 2011-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/gnu.h b/gcc/config/gnu.h
index e2a33ba..6b8f36b 100644
--- a/gcc/config/gnu.h
+++ b/gcc/config/gnu.h
@@ -1,7 +1,7 @@
 /* Configuration common to all targets running the GNU system.  */
 
 /*
-Copyright (C) 1994-2024 Free Software Foundation, Inc.
+Copyright (C) 1994-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/h8300/constraints.md b/gcc/config/h8300/constraints.md
index 5c801a8..ba76df8 100644
--- a/gcc/config/h8300/constraints.md
+++ b/gcc/config/h8300/constraints.md
@@ -1,5 +1,5 @@
 ;; Constraint definitions for Renesas H8/300.
-;; Copyright (C) 2011-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2011-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
@@ -168,13 +168,9 @@
   (and (match_code "const_int")
        (match_test "!h8300_shift_needs_scratch_p (ival, SImode, CLOBBER)")))
 
-(define_constraint "U"
+(define_memory_constraint "U"
   "An operand valid for a bset destination."
-  (ior (and (match_code "reg")
-	    (match_test "(reload_in_progress || reload_completed)
-			 ? REG_OK_FOR_BASE_STRICT_P (op)
-			 : REG_OK_FOR_BASE_P (op)"))
-       (and (match_code "mem")
+  (ior (and (match_code "mem")
 	    (match_code "reg" "0")
 	    (match_test "(reload_in_progress || reload_completed)
 			 ? REG_OK_FOR_BASE_STRICT_P (XEXP (op, 0))
diff --git a/gcc/config/h8300/elf.h b/gcc/config/h8300/elf.h
index 2d07d2a..166d63a 100644
--- a/gcc/config/h8300/elf.h
+++ b/gcc/config/h8300/elf.h
@@ -1,6 +1,6 @@
 /* Definitions of target machine for GNU compiler.
    Renesas H8/300 version generating elf
-   Copyright (C) 2001-2024 Free Software Foundation, Inc.
+   Copyright (C) 2001-2025 Free Software Foundation, Inc.
    Contributed by Steve Chamberlain (sac@cygnus.com),
    Jim Wilson (wilson@cygnus.com), and Doug Evans (dje@cygnus.com).
 
diff --git a/gcc/config/h8300/genmova.sh b/gcc/config/h8300/genmova.sh
index 89fcbc6..2c17e39 100644
--- a/gcc/config/h8300/genmova.sh
+++ b/gcc/config/h8300/genmova.sh
@@ -2,7 +2,7 @@
 # Generate mova.md, a file containing patterns that can be implemented
 # using the h8sx mova instruction.
 
-# Copyright (C) 2004-2024 Free Software Foundation, Inc.
+# Copyright (C) 2004-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
@@ -22,7 +22,7 @@
 
 echo ";; -*- buffer-read-only: t -*-"
 echo ";; Generated automatically from genmova.sh"
-echo ";; Copyright (C) 2004-2024 Free Software Foundation, Inc."
+echo ";; Copyright (C) 2004-2025 Free Software Foundation, Inc."
 echo ";;"
 echo ";; This file is part of GCC."
 echo ";;"
diff --git a/gcc/config/h8300/h8300-modes.def b/gcc/config/h8300/h8300-modes.def
index 4fcd09e..1584a57 100644
--- a/gcc/config/h8300/h8300-modes.def
+++ b/gcc/config/h8300/h8300-modes.def
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler. 
-   Copyright (C) 2020-2024 Free Software Foundation, Inc.
+   Copyright (C) 2020-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/h8300/h8300-protos.h b/gcc/config/h8300/h8300-protos.h
index 1d93943..a2b8427 100644
--- a/gcc/config/h8300/h8300-protos.h
+++ b/gcc/config/h8300/h8300-protos.h
@@ -1,6 +1,6 @@
 /* Definitions of target machine for GNU compiler.
    Renesas H8/300 version
-   Copyright (C) 2000-2024 Free Software Foundation, Inc.
+   Copyright (C) 2000-2025 Free Software Foundation, Inc.
    Contributed by Steve Chamberlain (sac@cygnus.com),
    Jim Wilson (wilson@cygnus.com), and Doug Evans (dje@cygnus.com).
 
diff --git a/gcc/config/h8300/h8300.cc b/gcc/config/h8300/h8300.cc
index 7ab26f2..c5935f6 100644
--- a/gcc/config/h8300/h8300.cc
+++ b/gcc/config/h8300/h8300.cc
@@ -1,5 +1,5 @@
 /* Subroutines for insn-output.cc for Renesas H8/300.
-   Copyright (C) 1992-2024 Free Software Foundation, Inc.
+   Copyright (C) 1992-2025 Free Software Foundation, Inc.
    Contributed by Steve Chamberlain (sac@cygnus.com),
    Jim Wilson (wilson@cygnus.com), and Doug Evans (dje@cygnus.com).
 
@@ -317,7 +317,7 @@ h8300_option_override (void)
 	       "%<-msx%> - option ignored");
    }
 
-#ifdef H8300_LINUX 
+#ifdef H8300_LINUX
  if ((TARGET_NORMAL_MODE))
    {
       error ("%<-mn%> is not supported for linux targets");
@@ -811,7 +811,7 @@ h8300_expand_prologue (void)
 
   if (h8300_monitor_function_p (current_function_decl))
  /* The monitor function act as normal functions, which means it
-    can accept parameters and return values. In addition to this, 
+    can accept parameters and return values. In addition to this,
     interrupts are masked in prologue and return with "rte" in epilogue. */
     emit_insn (gen_monitor_prologue ());
 
@@ -1444,7 +1444,7 @@ h8300_print_operand (FILE *file, rtx x, int code)
 	  fprintf (file, "%sl", names_big[REGNO (x)]);
 	  break;
 	case CONST_INT:
-	  fprintf (file, "#%ld", (-INTVAL (x)) & 0xff);
+	  fprintf (file, "#" HOST_WIDE_INT_PRINT_DEC, (-INTVAL (x)) & 0xff);
 	  break;
 	default:
 	  gcc_unreachable ();
@@ -1457,7 +1457,7 @@ h8300_print_operand (FILE *file, rtx x, int code)
 	  fprintf (file, "%sh", names_big[REGNO (x)]);
 	  break;
 	case CONST_INT:
-	  fprintf (file, "#%ld", ((-INTVAL (x)) & 0xff00) >> 8);
+	  fprintf (file, "#" HOST_WIDE_INT_PRINT_DEC, ((-INTVAL (x)) & 0xff00) >> 8);
 	  break;
 	default:
 	  gcc_unreachable ();
@@ -1465,7 +1465,7 @@ h8300_print_operand (FILE *file, rtx x, int code)
       break;
     case 'G':
       gcc_assert (GET_CODE (x) == CONST_INT);
-      fprintf (file, "#%ld", 0xff & (-INTVAL (x)));
+      fprintf (file, "#" HOST_WIDE_INT_PRINT_DEC, 0xff & (-INTVAL (x)));
       break;
     case 'S':
       if (GET_CODE (x) == REG)
@@ -1484,7 +1484,7 @@ h8300_print_operand (FILE *file, rtx x, int code)
       if ((exact_log2 ((bitint >> 8) & 0xff)) == -1)
 	bitint = exact_log2 (bitint & 0xff);
       else
-        bitint = exact_log2 ((bitint >> 8) & 0xff);	      
+        bitint = exact_log2 ((bitint >> 8) & 0xff);
       gcc_assert (bitint >= 0);
       fprintf (file, "#%d", bitint);
       break;
@@ -1493,7 +1493,7 @@ h8300_print_operand (FILE *file, rtx x, int code)
       if ((exact_log2 ((bitint >> 8) & 0xff)) == -1 )
 	bitint = exact_log2 (bitint & 0xff);
       else
-	bitint = (exact_log2 ((bitint >> 8) & 0xff));      
+	bitint = (exact_log2 ((bitint >> 8) & 0xff));
       gcc_assert (bitint >= 0);
       fprintf (file, "#%d", bitint);
       break;
@@ -1542,7 +1542,7 @@ h8300_print_operand (FILE *file, rtx x, int code)
 	  h8300_print_operand (file, x, 0);
 	  break;
 	case CONST_INT:
-	  fprintf (file, "#%ld", ((INTVAL (x) >> 16) & 0xffff));
+	  fprintf (file, "#" HOST_WIDE_INT_PRINT_DEC, ((INTVAL (x) >> 16) & 0xffff));
 	  break;
 	case CONST_DOUBLE:
 	  {
@@ -1567,7 +1567,7 @@ h8300_print_operand (FILE *file, rtx x, int code)
 	  h8300_print_operand (file, x, 0);
 	  break;
 	case CONST_INT:
-	  fprintf (file, "#%ld", INTVAL (x) & 0xffff);
+	  fprintf (file, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0xffff);
 	  break;
 	case CONST_DOUBLE:
 	  {
@@ -1621,7 +1621,7 @@ h8300_print_operand (FILE *file, rtx x, int code)
       break;
     case 's':
       if (GET_CODE (x) == CONST_INT)
-	fprintf (file, "#%ld", (INTVAL (x)) & 0xff);
+	fprintf (file, "#" HOST_WIDE_INT_PRINT_DEC, (INTVAL (x)) & 0xff);
       else if (GET_CODE (x) == REG)
 	fprintf (file, "%s", byte_reg (x, 0));
       else
@@ -1629,7 +1629,7 @@ h8300_print_operand (FILE *file, rtx x, int code)
       break;
     case 't':
       if (GET_CODE (x) == CONST_INT)
-	fprintf (file, "#%ld", (INTVAL (x) >> 8) & 0xff);
+	fprintf (file, "#" HOST_WIDE_INT_PRINT_DEC, (INTVAL (x) >> 8) & 0xff);
       else if (GET_CODE (x) == REG)
 	fprintf (file, "%s", byte_reg (x, 1));
       else
@@ -1637,7 +1637,7 @@ h8300_print_operand (FILE *file, rtx x, int code)
       break;
     case 'w':
       if (GET_CODE (x) == CONST_INT)
-	fprintf (file, "#%ld", INTVAL (x) & 0xff);
+	fprintf (file, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0xff);
       else if (GET_CODE (x) == REG)
 	fprintf (file, "%s", byte_reg (x, 0));
       else
@@ -1645,7 +1645,7 @@ h8300_print_operand (FILE *file, rtx x, int code)
       break;
     case 'x':
       if (GET_CODE (x) == CONST_INT)
-	fprintf (file, "#%ld", (INTVAL (x) >> 8) & 0xff);
+	fprintf (file, "#" HOST_WIDE_INT_PRINT_DEC, (INTVAL (x) >> 8) & 0xff);
       else if (GET_CODE (x) == REG)
 	fprintf (file, "%s", byte_reg (x, 1));
       else
@@ -1653,7 +1653,7 @@ h8300_print_operand (FILE *file, rtx x, int code)
       break;
     case 'y':
       if (GET_CODE (x) == CONST_INT)
-	fprintf (file, "#%ld", (INTVAL (x) >> 16) & 0xff);
+	fprintf (file, "#" HOST_WIDE_INT_PRINT_DEC, (INTVAL (x) >> 16) & 0xff);
       else if (GET_CODE (x) == REG)
 	fprintf (file, "%s", byte_reg (x, 0));
       else
@@ -1661,7 +1661,7 @@ h8300_print_operand (FILE *file, rtx x, int code)
       break;
     case 'z':
       if (GET_CODE (x) == CONST_INT)
-	fprintf (file, "#%ld", (INTVAL (x) >> 24) & 0xff);
+	fprintf (file, "#" HOST_WIDE_INT_PRINT_DEC, (INTVAL (x) >> 24) & 0xff);
       else if (GET_CODE (x) == REG)
 	fprintf (file, "%s", byte_reg (x, 1));
       else
@@ -2358,7 +2358,7 @@ h8300_bitfield_length (rtx op, rtx op2)
   if (GET_CODE (op) == REG)
     op = op2;
   gcc_assert (GET_CODE (op) != REG);
-  
+
   size = GET_MODE_SIZE (GET_MODE (op));
   operand_length = h8300_classify_operand (op, size, &opclass);
 
@@ -2521,7 +2521,7 @@ h8300_insn_length_from_table (rtx_insn *insn, rtx * operands)
 
     case LENGTH_TABLE_BITFIELD:
       return h8300_bitfield_length (operands[0], operands[1]);
-      
+
     case LENGTH_TABLE_BITBRANCH:
       return h8300_bitfield_length (operands[1], operands[2]) - 2;
 
@@ -4100,7 +4100,7 @@ output_a_shift (rtx operands[4], rtx_code code)
   /* This case must be taken care of by one of the two splitters
      that convert a variable shift into a loop.  */
   gcc_assert (GET_CODE (operands[2]) == CONST_INT);
-  
+
   n = INTVAL (operands[2]);
 
   /* If the count is negative, make it 0.  */
@@ -4113,7 +4113,7 @@ output_a_shift (rtx operands[4], rtx_code code)
     n = GET_MODE_BITSIZE (mode);
 
   get_shift_alg (shift_type, shift_mode, n, &info);
-  
+
   switch (info.alg)
     {
     case SHIFT_SPECIAL:
@@ -4134,7 +4134,7 @@ output_a_shift (rtx operands[4], rtx_code code)
       for (; n > 0; n--)
 	output_asm_insn (info.shift1, operands);
       return "";
-      
+
     case SHIFT_ROT_AND:
       {
 	int m = GET_MODE_BITSIZE (mode) - n;
@@ -4146,18 +4146,18 @@ output_a_shift (rtx operands[4], rtx_code code)
 	/* Not all possibilities of rotate are supported.  They shouldn't
 	   be generated, but let's watch for 'em.  */
 	gcc_assert (info.shift1);
-	
+
 	/* Emit two bit rotates first.  */
 	if (info.shift2 != NULL)
 	  {
 	    for (; m > 1; m -= 2)
 	      output_asm_insn (info.shift2, operands);
 	  }
-	
+
 	/* Now single bit rotates for any residual.  */
 	for (; m > 0; m--)
 	  output_asm_insn (info.shift1, operands);
-	
+
 	/* Now mask off the high bits.  */
 	switch (mode)
 	  {
@@ -4201,7 +4201,7 @@ output_a_shift (rtx operands[4], rtx_code code)
 	  fprintf (asm_out_file, "\tbne	.Llt%d\n", loopend_lab);
 	}
       return "";
-      
+
     default:
       gcc_unreachable ();
     }
@@ -4381,7 +4381,7 @@ compute_a_shift_cc (rtx operands[3], rtx_code code)
   enum shift_mode shift_mode;
   struct shift_info info;
   int n;
-  
+
   switch (mode)
     {
     case E_QImode:
@@ -4415,7 +4415,7 @@ compute_a_shift_cc (rtx operands[3], rtx_code code)
   /* This case must be taken care of by one of the two splitters
      that convert a variable shift into a loop.  */
   gcc_assert (GET_CODE (operands[2]) == CONST_INT);
-  
+
   n = INTVAL (operands[2]);
 
   /* If the count is negative, make it 0.  */
@@ -4426,9 +4426,9 @@ compute_a_shift_cc (rtx operands[3], rtx_code code)
      do the intuitive thing.  */
   else if ((unsigned int) n > GET_MODE_BITSIZE (mode))
     n = GET_MODE_BITSIZE (mode);
-  
+
   get_shift_alg (shift_type, shift_mode, n, &info);
-  
+
   switch (info.alg)
     {
     case SHIFT_SPECIAL:
@@ -4441,11 +4441,11 @@ compute_a_shift_cc (rtx operands[3], rtx_code code)
     case SHIFT_INLINE:
       return (info.cc_inline == OLD_CC_SET_ZN
 	      || info.cc_inline == OLD_CC_SET_ZNV);
-      
+
     case SHIFT_ROT_AND:
       /* This case always ends with an and instruction.  */
       return true;
-      
+
     case SHIFT_LOOP:
       /* A loop to shift by a "large" constant value.
 	 If we have shift-by-2 insns, use them.  */
@@ -4454,10 +4454,10 @@ compute_a_shift_cc (rtx operands[3], rtx_code code)
 	  if (n % 2)
 	    return (info.cc_inline == OLD_CC_SET_ZN
 		    || info.cc_inline == OLD_CC_SET_ZNV);
-		
+
 	}
       return false;
-      
+
     default:
       gcc_unreachable ();
     }
@@ -5410,6 +5410,14 @@ h8300_init_libfuncs (void)
   set_optab_libfunc (udiv_optab, HImode, "__udivhi3");
   set_optab_libfunc (smod_optab, HImode, "__modhi3");
   set_optab_libfunc (umod_optab, HImode, "__umodhi3");
+
+/* The comment below comes from config/mmix/mmix.cc.
+
+   Unfortunately, by default __builtin_ffs is expanded to ffs for
+   targets where INT_TYPE_SIZE < BITS_PER_WORD.  That together with
+   newlib since 2017-07-04 implementing ffs as __builtin_ffs leads to
+   (newlib) ffs recursively calling itself.  */
+  set_optab_libfunc (ffs_optab, HImode, "__ffshi2");
 }
 
 /* Worker function for TARGET_FUNCTION_VALUE.
@@ -5736,4 +5744,7 @@ pre_incdec_with_reg (rtx op, unsigned int reg)
 #undef TARGET_C_MODE_FOR_FLOATING_TYPE
 #define TARGET_C_MODE_FOR_FLOATING_TYPE h8300_c_mode_for_floating_type
 
+#undef TARGET_DOCUMENTATION_NAME
+#define TARGET_DOCUMENTATION_NAME "H8/300"
+
 struct gcc_target targetm = TARGET_INITIALIZER;
diff --git a/gcc/config/h8300/h8300.h b/gcc/config/h8300/h8300.h
index d25689c..8018b63 100644
--- a/gcc/config/h8300/h8300.h
+++ b/gcc/config/h8300/h8300.h
@@ -1,6 +1,6 @@
 /* Definitions of target machine for GNU compiler.
    Renesas H8/300 (generic)
-   Copyright (C) 1992-2024 Free Software Foundation, Inc.
+   Copyright (C) 1992-2025 Free Software Foundation, Inc.
    Contributed by Steve Chamberlain (sac@cygnus.com),
    Jim Wilson (wilson@cygnus.com), and Doug Evans (dje@cygnus.com).
 
@@ -610,6 +610,12 @@ struct cum_arg
 #define DATA_SECTION_ASM_OP "\t.section .data"
 #define BSS_SECTION_ASM_OP "\t.section .bss"
 
+/* Override default definitions from elfos.h. */
+#undef INIT_SECTION_ASM_OP
+#define INIT_SECTION_ASM_OP "\t.section\t.init,\"ax\""
+#undef FINI_SECTION_ASM_OP
+#define FINI_SECTION_ASM_OP "\t.section\t.fini,\"ax\""
+
 #undef DO_GLOBAL_CTORS_BODY
 #define DO_GLOBAL_CTORS_BODY			\
 {						\
@@ -647,19 +653,11 @@ struct cum_arg
 /* Globalizing directive for a label.  */
 #define GLOBAL_ASM_OP "\t.global "
 
+/* Override default definition from elfos.h. */
+#undef ASM_DECLARE_FUNCTION_NAME
 #define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL) \
    ASM_OUTPUT_FUNCTION_LABEL (FILE, NAME, DECL)
 
-/* This is how to store into the string LABEL
-   the symbol_ref name of an internal numbered label where
-   PREFIX is the class of label and NUM is the number within the class.
-   This is suitable for output with `assemble_name'.
-
-   N.B.: The h8300.md branch_true and branch_false patterns also know
-   how to generate internal labels.  */
-#define ASM_GENERATE_INTERNAL_LABEL(LABEL, PREFIX, NUM)	\
-  sprintf (LABEL, "*.%s%lu", PREFIX, (unsigned long)(NUM))
-
 /* This is how to output an insn to push a register on the stack.
    It need not be very fast code.  */
 
@@ -690,9 +688,6 @@ struct cum_arg
   if ((LOG) != 0)				\
     fprintf (FILE, "\t.align %d\n", (LOG))
 
-#define ASM_OUTPUT_SKIP(FILE, SIZE) \
-  fprintf (FILE, "\t.space %d\n", (int)(SIZE))
-
 /* This says how to output an assembler line
    to define a global common symbol.  */
 
diff --git a/gcc/config/h8300/h8300.md b/gcc/config/h8300/h8300.md
index c2d429a..1c18fd3 100644
--- a/gcc/config/h8300/h8300.md
+++ b/gcc/config/h8300/h8300.md
@@ -1,5 +1,5 @@
 ;; GCC machine description for Renesas H8/300
-;; Copyright (C) 1992-2024 Free Software Foundation, Inc.
+;; Copyright (C) 1992-2025 Free Software Foundation, Inc.
 
 ;;   Contributed by Steve Chamberlain (sac@cygnus.com),
 ;;   Jim Wilson (wilson@cygnus.com), and Doug Evans (dje@cygnus.com).
diff --git a/gcc/config/h8300/h8300.opt b/gcc/config/h8300/h8300.opt
index 7b2fa56..6530075 100644
--- a/gcc/config/h8300/h8300.opt
+++ b/gcc/config/h8300/h8300.opt
@@ -1,6 +1,6 @@
 ; Options for the Renesas H8/300 port of the compiler
 ;
-; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/h8300/jumpcall.md b/gcc/config/h8300/jumpcall.md
index b596399..4e63408 100644
--- a/gcc/config/h8300/jumpcall.md
+++ b/gcc/config/h8300/jumpcall.md
@@ -146,9 +146,9 @@
 (define_insn_and_split ""
   [(set (pc)
 	(if_then_else (match_operator 3 "eqne_operator"
-			[(zero_extract:QHSI (match_operand:QHSI 1 "register_operand" "r")
-					    (const_int 1)
-					    (match_operand 2 "const_int_operand" "n"))
+			[(zero_extract:HSI (match_operand:HSI 1 "register_operand" "r")
+					   (const_int 1)
+					   (match_operand 2 "const_int_operand" "n"))
 			 (const_int 0)])
 		      (label_ref (match_operand 0 "" ""))
 		      (pc)))]
@@ -156,7 +156,7 @@
   "#"
   "&& reload_completed"
   [(set (reg:CCZ CC_REG)
-	(eq (zero_extract:QHSI (match_dup 1) (const_int 1) (match_dup 2))
+	(eq (zero_extract:HSI (match_dup 1) (const_int 1) (match_dup 2))
 	    (const_int 0)))
    (set (pc)
 	(if_then_else (match_op_dup 3 [(reg:CCZ CC_REG) (const_int 0)])
diff --git a/gcc/config/h8300/linux.h b/gcc/config/h8300/linux.h
index 6e21db3..df2262b 100644
--- a/gcc/config/h8300/linux.h
+++ b/gcc/config/h8300/linux.h
@@ -1,6 +1,6 @@
 /* Definitions of target machine for GNU compiler.
    Renesas H8/300 (linux variant)
-   Copyright (C) 2015-2024 Free Software Foundation, Inc.
+   Copyright (C) 2015-2025 Free Software Foundation, Inc.
    Contributed by Yoshinori Sato <ysato@users.sourceforge.jp>
 
 This file is part of GCC.
diff --git a/gcc/config/h8300/logical.md b/gcc/config/h8300/logical.md
index 5df0922..f848242 100644
--- a/gcc/config/h8300/logical.md
+++ b/gcc/config/h8300/logical.md
@@ -32,7 +32,7 @@
 ;; ----------------------------------------------------------------------
 
 (define_insn_and_split "*andqi3_2"
-  [(set (match_operand:QI 0 "bit_operand" "=U,rQ,r")
+  [(set (match_operand:QI 0 "bit_operand" "=rU,rQ,r")
 	(and:QI (match_operand:QI 1 "bit_operand" "%0,0,WU")
 		(match_operand:QI 2 "h8300_src_operand" "Y0,rQi,IP1>X")))]
   "TARGET_H8300SX"
@@ -42,7 +42,7 @@
 	      (clobber (reg:CC CC_REG))])])
 
 (define_insn "*andqi3_2_clobber_flags"
-  [(set (match_operand:QI 0 "bit_operand" "=U,rQ,r")
+  [(set (match_operand:QI 0 "bit_operand" "=rU,rQ,r")
 	(and:QI (match_operand:QI 1 "bit_operand" "%0,0,WU")
 		(match_operand:QI 2 "h8300_src_operand" "Y0,rQi,IP1>X")))
    (clobber (reg:CC CC_REG))]
@@ -55,7 +55,7 @@
    (set_attr "length_table" "*,logicb,*")])
 
 (define_insn_and_split "andqi3_1"
-  [(set (match_operand:QI 0 "bit_operand" "=U,r")
+  [(set (match_operand:QI 0 "bit_operand" "=rU,r")
 	(and:QI (match_operand:QI 1 "bit_operand" "%0,0")
 		(match_operand:QI 2 "h8300_src_operand" "Y0,rn")))]
   "register_operand (operands[0], QImode)
@@ -156,7 +156,7 @@
 ;; ----------------------------------------------------------------------
 
 (define_insn_and_split "<code>qi3_1"
-  [(set (match_operand:QI 0 "bit_operand" "=U,rQ")
+  [(set (match_operand:QI 0 "bit_operand" "=rU,rQ")
 	(ors:QI (match_operand:QI 1 "bit_operand" "%0,0")
 		(match_operand:QI 2 "h8300_src_operand" "Y2,rQi")))]
   "TARGET_H8300SX || register_operand (operands[0], QImode)
diff --git a/gcc/config/h8300/mova.md b/gcc/config/h8300/mova.md
index e6e94f6..07ab513 100644
--- a/gcc/config/h8300/mova.md
+++ b/gcc/config/h8300/mova.md
@@ -1,6 +1,6 @@
 ;; -*- buffer-read-only: t -*-
 ;; Generated automatically from genmova.sh
-;; Copyright (C) 2004-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2004-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/h8300/predicates.md b/gcc/config/h8300/predicates.md
index a8d4089..d323fc1 100644
--- a/gcc/config/h8300/predicates.md
+++ b/gcc/config/h8300/predicates.md
@@ -1,5 +1,5 @@
 ;; Predicate definitions for Renesas H8/300.
-;; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/h8300/t-h8300 b/gcc/config/h8300/t-h8300
index a8126c9..1f75c3f 100644
--- a/gcc/config/h8300/t-h8300
+++ b/gcc/config/h8300/t-h8300
@@ -1,4 +1,4 @@
-# Copyright (C) 1993-2024 Free Software Foundation, Inc.
+# Copyright (C) 1993-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/h8300/t-linux b/gcc/config/h8300/t-linux
index d0ad628..2629ff2 100644
--- a/gcc/config/h8300/t-linux
+++ b/gcc/config/h8300/t-linux
@@ -1,4 +1,4 @@
-# Copyright (C) 2015-2024 Free Software Foundation, Inc.
+# Copyright (C) 2015-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/h8300/testcompare.md b/gcc/config/h8300/testcompare.md
index efa66d2..694c9e6 100644
--- a/gcc/config/h8300/testcompare.md
+++ b/gcc/config/h8300/testcompare.md
@@ -4,7 +4,7 @@
 
 ;; (define_insn_and_split "*tst_extzv_1_n"
 ;;   [(set (cc0)
-;; 	(compare (zero_extract:SI (match_operand:QI 0 "general_operand_src" "r,U,mn>")
+;; 	(compare (zero_extract:SI (match_operand:QI 0 "general_operand_src" "r,rU,mn>")
 ;; 				  (const_int 1)
 ;; 				  (match_operand 1 "const_int_operand" "n,n,n"))
 ;; 		 (const_int 0)))
diff --git a/gcc/config/host-darwin.cc b/gcc/config/host-darwin.cc
index d5d3ac3..3b4f445 100644
--- a/gcc/config/host-darwin.cc
+++ b/gcc/config/host-darwin.cc
@@ -1,5 +1,5 @@
 /* Darwin host-specific hook definitions.
-   Copyright (C) 2003-2024 Free Software Foundation, Inc.
+   Copyright (C) 2003-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/host-darwin.h b/gcc/config/host-darwin.h
index ecf454e..1f8c25d 100644
--- a/gcc/config/host-darwin.h
+++ b/gcc/config/host-darwin.h
@@ -1,5 +1,5 @@
 /* Darwin host-specific hook definitions.
-   Copyright (C) 2003-2024 Free Software Foundation, Inc.
+   Copyright (C) 2003-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -18,7 +18,7 @@
    <http://www.gnu.org/licenses/>.  */
 
 extern void * darwin_gt_pch_get_address (size_t sz, int fd);
-extern int darwin_gt_pch_use_address (void *&addr, size_t sz, int fd, 
+extern int darwin_gt_pch_use_address (void *&addr, size_t sz, int fd,
 				      size_t off);
 
 #undef HOST_HOOKS_GT_PCH_GET_ADDRESS
diff --git a/gcc/config/host-hpux.cc b/gcc/config/host-hpux.cc
index 8c52759..fe77956 100644
--- a/gcc/config/host-hpux.cc
+++ b/gcc/config/host-hpux.cc
@@ -1,5 +1,5 @@
 /* HP-UX host-specific hook definitions.
-   Copyright (C) 2004-2024 Free Software Foundation, Inc.
+   Copyright (C) 2004-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/host-linux.cc b/gcc/config/host-linux.cc
index 1cec56e..201bf16 100644
--- a/gcc/config/host-linux.cc
+++ b/gcc/config/host-linux.cc
@@ -1,5 +1,5 @@
 /* Linux host-specific hook definitions.
-   Copyright (C) 2004-2024 Free Software Foundation, Inc.
+   Copyright (C) 2004-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -105,7 +105,7 @@
 #endif
 
 /* Determine a location where we might be able to reliably allocate SIZE
-   bytes.  FD is the PCH file, though we should return with the file 
+   bytes.  FD is the PCH file, though we should return with the file
    unmapped.  */
 
 static void *
diff --git a/gcc/config/host-netbsd.cc b/gcc/config/host-netbsd.cc
index 690c636..e1262e0 100644
--- a/gcc/config/host-netbsd.cc
+++ b/gcc/config/host-netbsd.cc
@@ -1,5 +1,5 @@
 /* NetBSD host-specific hook definitions.
-   Copyright (C) 2004-2024 Free Software Foundation, Inc.
+   Copyright (C) 2004-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -62,7 +62,7 @@ netbsd_gt_pch_get_address (size_t size, int fd)
   return addr;
 }
 
-/* Map SIZE bytes of FD+OFFSET at BASE.  Return 1 if we succeeded at 
+/* Map SIZE bytes of FD+OFFSET at BASE.  Return 1 if we succeeded at
    mapping the data at BASE, -1 if we couldn't.  */
 
 static int
diff --git a/gcc/config/host-openbsd.cc b/gcc/config/host-openbsd.cc
index 8a3e75d..b5d2a01 100644
--- a/gcc/config/host-openbsd.cc
+++ b/gcc/config/host-openbsd.cc
@@ -1,5 +1,5 @@
 /* OpenBSD host-specific hook definitions.
-   Copyright (C) 2004-2024 Free Software Foundation, Inc.
+   Copyright (C) 2004-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -62,7 +62,7 @@ openbsd_gt_pch_get_address (size_t size, int fd)
   return addr;
 }
 
-/* Map SIZE bytes of FD+OFFSET at BASE.  Return 1 if we succeeded at 
+/* Map SIZE bytes of FD+OFFSET at BASE.  Return 1 if we succeeded at
    mapping the data at BASE, -1 if we couldn't.  */
 
 static int
diff --git a/gcc/config/host-solaris.cc b/gcc/config/host-solaris.cc
index f8afed7..fe4353a 100644
--- a/gcc/config/host-solaris.cc
+++ b/gcc/config/host-solaris.cc
@@ -1,5 +1,5 @@
 /* Solaris host-specific hook definitions.
-   Copyright (C) 2004-2024 Free Software Foundation, Inc.
+   Copyright (C) 2004-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -39,7 +39,7 @@ mmap_fixed (void *addr, size_t len, int prot, int flags, int fd, off_t off)
   void *base;
 
   base = mmap ((caddr_t) addr, len, prot, flags, fd, off);
-  
+
   if (base != addr)
     {
       size_t page_size = getpagesize();
@@ -101,7 +101,7 @@ sol_gt_pch_get_address (size_t size, int fd)
   return addr;
 }
 
-/* Map SIZE bytes of FD+OFFSET at BASE.  Return 1 if we succeeded at 
+/* Map SIZE bytes of FD+OFFSET at BASE.  Return 1 if we succeeded at
    mapping the data at BASE, -1 if we couldn't.  */
 
 static int
diff --git a/gcc/config/hpux11.opt b/gcc/config/hpux11.opt
index 98fff8e..af7cb52 100644
--- a/gcc/config/hpux11.opt
+++ b/gcc/config/hpux11.opt
@@ -1,6 +1,6 @@
 ; HP-UX 11 options.
 
-; Copyright (C) 2011-2024 Free Software Foundation, Inc.
+; Copyright (C) 2011-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/i386/adxintrin.h b/gcc/config/i386/adxintrin.h
index c8e805b..732d84e 100644
--- a/gcc/config/i386/adxintrin.h
+++ b/gcc/config/i386/adxintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2012-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2012-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/ammintrin.h b/gcc/config/i386/ammintrin.h
index b94731a..5cda845 100644
--- a/gcc/config/i386/ammintrin.h
+++ b/gcc/config/i386/ammintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2007-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/amxavx512intrin.h b/gcc/config/i386/amxavx512intrin.h
new file mode 100644
index 0000000..ab53625
--- /dev/null
+++ b/gcc/config/i386/amxavx512intrin.h
@@ -0,0 +1,189 @@
+/* Copyright (C) 2024-2025 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if !defined _IMMINTRIN_H_INCLUDED
+#error "Never use <amxavx512intrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AMXAVX512INTRIN_H_INCLUDED
+#define _AMXAVX512INTRIN_H_INCLUDED
+
+#if !defined(__AMX_AVX512__)
+#pragma GCC push_options
+#pragma GCC target("amx-avx512")
+#define __DISABLE_AMX_AVX512__
+#endif /* __AMX_AVX512__ */
+
+#if defined(__x86_64__)
+#define _tile_cvtrowd2ps_internal(src,A)				\
+({									\
+  __m512 dst;								\
+  __asm__ volatile							\
+  ("{tcvtrowd2ps\t%1, %%tmm"#src", %0|tcvtrowd2ps\t%0, %%tmm"#src", %1}"	\
+   : "=v" (dst) : "r" ((unsigned) (A)));				\
+  dst;									\
+})
+
+#define _tile_cvtrowd2psi_internal(src,imm)				\
+({									\
+  __m512 dst;								\
+  __asm__ volatile							\
+  ("{tcvtrowd2ps\t$"#imm", %%tmm"#src", %0|tcvtrowd2ps\t%0, %%tmm"#src", "#imm"}"	\
+   : "=v" (dst) :);							\
+  dst;									\
+})
+
+#define _tile_cvtrowps2bf16h_internal(src,A)				\
+({									\
+  __m512bh dst;								\
+  __asm__ volatile							\
+  ("{tcvtrowps2bf16h\t%1, %%tmm"#src", %0|tcvtrowps2bf16h\t%0, %%tmm"#src", %1}"	\
+   : "=v" (dst) : "r" ((unsigned) (A)));				\
+  dst;									\
+})
+
+#define _tile_cvtrowps2bf16hi_internal(src,imm)			\
+({									\
+  __m512bh dst;								\
+  __asm__ volatile							\
+  ("{tcvtrowps2bf16h\t$"#imm", %%tmm"#src", %0|tcvtrowps2bf16h\t%0, %%tmm"#src", "#imm"}"	\
+   : "=v" (dst) :);							\
+  dst;									\
+})
+
+#define _tile_cvtrowps2bf16l_internal(src,A)				\
+({									\
+  __m512bh dst;								\
+  __asm__ volatile							\
+  ("{tcvtrowps2bf16l\t%1, %%tmm"#src", %0|tcvtrowps2bf16l\t%0, %%tmm"#src", %1}"	\
+   : "=v" (dst) : "r" ((unsigned) (A)));				\
+  dst;									\
+})
+
+#define _tile_cvtrowps2bf16li_internal(src,imm)			\
+({									\
+  __m512bh dst;								\
+  __asm__ volatile							\
+  ("{tcvtrowps2bf16l\t$"#imm", %%tmm"#src", %0|tcvtrowps2bf16l\t%0, %%tmm"#src", "#imm"}"	\
+   : "=v" (dst) :);							\
+  dst;									\
+})
+
+#define _tile_cvtrowps2phh_internal(src,A)				\
+({									\
+  __m512h dst;								\
+  __asm__ volatile							\
+  ("{tcvtrowps2phh\t%1, %%tmm"#src", %0|tcvtrowps2phh\t%0, %%tmm"#src", %1}"	\
+   : "=v" (dst) : "r" ((unsigned) (A)));				\
+  dst;									\
+})
+
+#define _tile_cvtrowps2phhi_internal(src,imm)				\
+({									\
+  __m512h dst;								\
+  __asm__ volatile							\
+  ("{tcvtrowps2phh\t$"#imm", %%tmm"#src", %0|tcvtrowps2phh\t%0, %%tmm"#src", "#imm"}"	\
+   : "=v" (dst) :);							\
+  dst;									\
+})
+
+#define _tile_cvtrowps2phl_internal(src,A)				\
+({									\
+  __m512h dst;								\
+  __asm__ volatile							\
+  ("{tcvtrowps2phl\t%1, %%tmm"#src", %0|tcvtrowps2phl\t%0, %%tmm"#src", %1}"	\
+   : "=v" (dst) : "r" ((unsigned) (A)));				\
+  dst;									\
+})
+
+#define _tile_cvtrowps2phli_internal(src,imm)				\
+({									\
+  __m512h dst;								\
+  __asm__ volatile							\
+  ("{tcvtrowps2phl\t$"#imm", %%tmm"#src", %0|tcvtrowps2phl\t%0, %%tmm"#src", "#imm"}"	\
+   : "=v" (dst) :);							\
+  dst;									\
+})
+
+#define _tile_movrow_internal(src,A)					\
+({									\
+  __m512 dst;								\
+  __asm__ volatile							\
+  ("{tilemovrow\t%1, %%tmm"#src", %0|tilemovrow\t%0, %%tmm"#src", %1}"	\
+   : "=v" (dst) : "r" ((unsigned) (A)));				\
+  dst;									\
+})
+
+#define _tile_movrowi_internal(src,imm)					\
+({									\
+  __m512 dst;								\
+  __asm__ volatile							\
+  ("{tilemovrow\t$"#imm", %%tmm"#src", %0|tilemovrow\t%0, %%tmm"#src", "#imm"}"	\
+   : "=v" (dst) :);							\
+  dst;									\
+})
+
+#define _tile_cvtrowd2ps(src,A)					\
+  _tile_cvtrowd2ps_internal (src,A)
+
+#define _tile_cvtrowd2psi(src,imm)				\
+  _tile_cvtrowd2psi_internal (src,imm)
+
+#define _tile_cvtrowps2bf16h(src,A)				\
+  _tile_cvtrowps2bf16h_internal (src,A)
+
+#define _tile_cvtrowps2bf16hi(src,imm)				\
+  _tile_cvtrowps2bf16hi_internal (src,imm)
+
+#define _tile_cvtrowps2bf16l(src,A)				\
+  _tile_cvtrowps2bf16l_internal (src,A)
+
+#define _tile_cvtrowps2bf16li(src,imm)				\
+  _tile_cvtrowps2bf16li_internal (src,imm)
+
+#define _tile_cvtrowps2phh(src,A)				\
+  _tile_cvtrowps2phh_internal (src,A)
+
+#define _tile_cvtrowps2phhi(src,imm)				\
+  _tile_cvtrowps2phhi_internal (src,imm)
+
+#define _tile_cvtrowps2phl(src,A)				\
+  _tile_cvtrowps2phl_internal (src,A)
+
+#define _tile_cvtrowps2phli(src,imm)				\
+  _tile_cvtrowps2phli_internal (src,imm)
+
+#define _tile_movrow(src,A)					\
+  _tile_movrow_internal (src,A)
+
+#define _tile_movrowi(src,imm)					\
+  _tile_movrowi_internal (src,imm)
+
+#endif
+
+#ifdef __DISABLE_AMX_AVX512__
+#undef __DISABLE_AMX_AVX512__
+#pragma GCC pop_options
+#endif /* __DISABLE_AMX_AVX512__ */
+
+#endif /* _AMXAVX512INTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/amxbf16intrin.h b/gcc/config/i386/amxbf16intrin.h
index 57569cb..9f4a9d1 100644
--- a/gcc/config/i386/amxbf16intrin.h
+++ b/gcc/config/i386/amxbf16intrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2020-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2020-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/amxcomplexintrin.h b/gcc/config/i386/amxcomplexintrin.h
index 92ac561a..fc5964f 100644
--- a/gcc/config/i386/amxcomplexintrin.h
+++ b/gcc/config/i386/amxcomplexintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2023-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2023-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/amxfp16intrin.h b/gcc/config/i386/amxfp16intrin.h
index a46b272..02fd031 100644
--- a/gcc/config/i386/amxfp16intrin.h
+++ b/gcc/config/i386/amxfp16intrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2020-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2020-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/amxfp8intrin.h b/gcc/config/i386/amxfp8intrin.h
new file mode 100644
index 0000000..8952be9
--- /dev/null
+++ b/gcc/config/i386/amxfp8intrin.h
@@ -0,0 +1,67 @@
+/* Copyright (C) 2024-2025 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if !defined _IMMINTRIN_H_INCLUDED
+#error "Never use <amxfp8intrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AMXFP8INTRIN_H_INCLUDED
+#define _AMXFP8INTRIN_H_INCLUDED
+
+#if defined(__x86_64__)
+#define _tile_dpbf8ps_internal(dst,src1,src2)			\
+  __asm__ volatile \
+  ("{tdpbf8ps\t%%tmm"#src2", %%tmm"#src1", %%tmm"#dst"|tdpbf8ps\t%%tmm"#dst", %%tmm"#src1", %%tmm"#src2"}" ::)
+
+#define _tile_dpbhf8ps_internal(dst,src1,src2)			\
+  __asm__ volatile \
+  ("{tdpbhf8ps\t%%tmm"#src2", %%tmm"#src1", %%tmm"#dst"|tdpbhf8ps\t%%tmm"#dst", %%tmm"#src1", %%tmm"#src2"}" ::)
+
+#define _tile_dphbf8ps_internal(dst,src1,src2)			\
+  __asm__ volatile \
+  ("{tdphbf8ps\t%%tmm"#src2", %%tmm"#src1", %%tmm"#dst"|tdphbf8ps\t%%tmm"#dst", %%tmm"#src1", %%tmm"#src2"}" ::)
+
+#define _tile_dphf8ps_internal(dst,src1,src2)			\
+  __asm__ volatile \
+  ("{tdphf8ps\t%%tmm"#src2", %%tmm"#src1", %%tmm"#dst"|tdphf8ps\t%%tmm"#dst", %%tmm"#src1", %%tmm"#src2"}" ::)
+
+#define _tile_dpbf8ps(dst,src1,src2)				\
+  _tile_dpbf8ps_internal (dst,src1,src2)
+
+#define _tile_dpbhf8ps(dst,src1,src2)				\
+  _tile_dpbhf8ps_internal (dst,src1,src2)
+
+#define _tile_dphbf8ps(dst,src1,src2)				\
+  _tile_dphbf8ps_internal (dst,src1,src2)
+
+#define _tile_dphf8ps(dst,src1,src2)				\
+  _tile_dphf8ps_internal (dst,src1,src2)
+
+#endif
+
+#ifdef __DISABLE_AMX_FP8__
+#undef __DISABLE_AMX_FP8__
+#pragma GCC pop_options
+#endif /* __DISABLE_AMX_FP8__ */
+
+#endif /* _AMXFP8INTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/amxint8intrin.h b/gcc/config/i386/amxint8intrin.h
index c65ad38..332c8db 100644
--- a/gcc/config/i386/amxint8intrin.h
+++ b/gcc/config/i386/amxint8intrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2020-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2020-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/amxmovrsintrin.h b/gcc/config/i386/amxmovrsintrin.h
new file mode 100644
index 0000000..97969f8
--- /dev/null
+++ b/gcc/config/i386/amxmovrsintrin.h
@@ -0,0 +1,111 @@
+/* Copyright (C) 2024-2025 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+ 
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+ 
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _IMMINTRIN_H_INCLUDED
+#error "Never use <amxmovrsintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AMX_MOVRSINTRIN_H_INCLUDED
+#define _AMX_MOVRSINTRIN_H_INCLUDED
+
+#if defined(__x86_64__)
+
+#if !defined(__AMX_MOVRS__)
+#pragma GCC push_options
+#pragma GCC target("amx-movrs")
+#define __DISABLE_AMX_MOVRS__
+#endif /* __AMX_MOVRS__ */
+
+#define _tile_loaddrs_internal(tdst, base, stride)	  \
+__asm__ volatile					  \
+  ("{tileloaddrs\t(%0,%1,1), %%tmm"#tdst		  \
+     "|tileloaddrs\t%%tmm"#tdst", [%0+%1*1]}"		  \
+   :: "r" ((const void*) (base)), "r" ((long) (stride)))
+
+#define _tile_loaddrst1_internal(tdst, base, stride)	  \
+__asm__ volatile					  \
+  ("{tileloaddrst1\t(%0,%1,1), %%tmm"#tdst		  \
+     "|tileloaddrst1\t%%tmm"#tdst", [%0+%1*1]}"		  \
+   :: "r" ((const void*) (base)), "r" ((long) (stride)))
+
+#define _tile_loaddrs(tdst, base, stride)		  \
+  _tile_loaddrs_internal(tdst, base, stride)
+
+#define _tile_loaddrst1(tdst, base, stride)		  \
+  _tile_loaddrst1_internal(tdst, base, stride)
+
+#ifdef __DISABLE_AMX_MOVRS__
+#undef __DISABLE_AMX_MOVRS__
+#pragma GCC pop_options
+#endif /* __DISABLE_AMX_MOVRS__ */
+
+#if !defined(__AMX_MOVRS__) || !defined (__AMX_TRANSPOSE__)
+#pragma GCC push_options
+#pragma GCC target("amx-movrs,amx-transpose")
+#define __DISABLE_AMX_MOVRS_TRANSPOSE__
+#endif /* __AMX_MOVRS_TRANSPOSE__ */
+
+#define _tile_2rpntlvwz0rs_internal(tdst, base, stride)	  \
+  __asm__ volatile					  \
+  ("{t2rpntlvwz0rs\t(%0,%1,1), %%tmm"#tdst		  \
+     "|t2rpntlvwz0rs\t%%tmm"#tdst", [%0+%1*1]}"	  \
+   :: "r" ((const void*) (base)), "r" ((long) (stride)))
+
+#define _tile_2rpntlvwz0rst1_internal(tdst, base, stride) \
+  __asm__ volatile					  \
+  ("{t2rpntlvwz0rst1\t(%0,%1,1), %%tmm"#tdst		  \
+     "|t2rpntlvwz0rst1\t%%tmm"#tdst", [%0+%1*1]}"	  \
+   :: "r" ((const void*) (base)), "r" ((long) (stride)))
+
+#define _tile_2rpntlvwz1rs_internal(tdst, base, stride)	  \
+  __asm__ volatile					  \
+  ("{t2rpntlvwz1rs\t(%0,%1,1), %%tmm"#tdst		  \
+     "|t2rpntlvwz1rs\t%%tmm"#tdst", [%0+%1*1]}"		  \
+   :: "r" ((const void*) (base)), "r" ((long) (stride)))
+
+#define _tile_2rpntlvwz1rst1_internal(tdst, base, stride) \
+  __asm__ volatile					  \
+  ("{t2rpntlvwz1rst1\t(%0,%1,1), %%tmm"#tdst		  \
+     "|t2rpntlvwz1rst1\t%%tmm"#tdst", [%0+%1*1]}"	  \
+   :: "r" ((const void*) (base)), "r" ((long) (stride)))
+
+#define _tile_2rpntlvwz0rs(tdst, base, stride)		  \
+  _tile_2rpntlvwz0rs_internal(tdst, base, stride)
+
+#define _tile_2rpntlvwz0rst1(tdst, base, stride)	  \
+  _tile_2rpntlvwz0rst1_internal(tdst, base, stride)
+
+#define _tile_2rpntlvwz1rs(tdst, base, stride)		  \
+  _tile_2rpntlvwz1rs_internal(tdst, base, stride)
+
+#define _tile_2rpntlvwz1rst1(tdst, base, stride)	  \
+  _tile_2rpntlvwz1rst1_internal(tdst, base, stride)
+
+#ifdef __DISABLE_AMX_MOVRS_TRANSPOSE__
+#undef __DISABLE_AMX_MOVRS_TRANSPOSE__
+#pragma GCC pop_options
+#endif /* __DISABLE_AMX_MOVRS_TRANSPOSE__ */
+
+#endif /* __x86_64__ */
+
+#endif /* _AMX_MOVRSINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/amxtf32intrin.h b/gcc/config/i386/amxtf32intrin.h
new file mode 100644
index 0000000..8ed910d
--- /dev/null
+++ b/gcc/config/i386/amxtf32intrin.h
@@ -0,0 +1,47 @@
+/* Copyright (C) 2024-2025 Free Software Foundation, Inc.
+   This file is part of GCC.
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if !defined _IMMINTRIN_H_INCLUDED
+#error "Never use <amxtf32intrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AMXTF32INTRIN_H_INCLUDED
+#define _AMXTF32INTRIN_H_INCLUDED
+
+#if !defined(__AMX_TF32__)
+#pragma GCC push_options
+#pragma GCC target("amx-tf32")
+#define __DISABLE_AMX_TF32__
+#endif /* __AMX_TF32__ */
+
+#if defined(__x86_64__)
+#define _tile_mmultf32ps_internal(src1_dst,src2,src3)			\
+  __asm__ volatile\
+  ("{tmmultf32ps\t%%tmm"#src3", %%tmm"#src2", %%tmm"#src1_dst"|tmmultf32ps\t%%tmm"#src1_dst", %%tmm"#src2", %%tmm"#src3"}" ::)
+
+#define _tile_mmultf32ps(src1_dst,src2,src3)				\
+  _tile_mmultf32ps_internal (src1_dst, src2, src3)
+
+#endif
+
+#ifdef __DISABLE_AMX_TF32__
+#undef __DISABLE_AMX_TF32__
+#pragma GCC pop_options
+#endif /* __DISABLE_AMX_TF32__ */
+
+#endif /* _AMXTF32INTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/amxtileintrin.h b/gcc/config/i386/amxtileintrin.h
index 5081b32..8c8e2cd 100644
--- a/gcc/config/i386/amxtileintrin.h
+++ b/gcc/config/i386/amxtileintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2020-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2020-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/amxtransposeintrin.h b/gcc/config/i386/amxtransposeintrin.h
new file mode 100644
index 0000000..f06603e
--- /dev/null
+++ b/gcc/config/i386/amxtransposeintrin.h
@@ -0,0 +1,177 @@
+/* Copyright (C) 2024-2025 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if !defined _IMMINTRIN_H_INCLUDED
+#error "Never use <amxtransposeintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _AMXTRANSPOSEINTRIN_H_INCLUDED
+#define _AMXTRANSPOSEINTRIN_H_INCLUDED
+
+#if !defined(__AMX_TRANSPOSE__)
+#pragma GCC push_options
+#pragma GCC target("amx-transpose")
+#define __DISABLE_AMX_TRANSPOSE__
+#endif /* __AMX_TRANSPOSE__ */
+
+#if defined(__x86_64__)
+#define _tile_transposed_internal(dst,src)					\
+  __asm__ volatile\
+  ("{ttransposed\t%%tmm"#src", %%tmm"#dst"|ttransposed\t%%tmm"#dst", %%tmm"#src"}" ::)
+
+#define _tile_2rpntlvwz0_internal(dst,base,stride)				\
+  __asm__ volatile\
+  ("{t2rpntlvwz0\t(%0,%1,1), %%tmm"#dst"|t2rpntlvwz0\t%%tmm"#dst", [%0+%1*1]}"  \
+   :: "r" ((const void*) (base)), "r" ((long) (stride)))
+
+#define _tile_2rpntlvwz0t1_internal(dst,base,stride)				\
+  __asm__ volatile\
+  ("{t2rpntlvwz0t1\t(%0,%1,1), %%tmm"#dst"|t2rpntlvwz0t1\t%%tmm"#dst", [%0+%1*1]}" \
+   :: "r" ((const void*)(base)), "r" ((long)(stride)))
+
+#define _tile_2rpntlvwz1_internal(dst,base,stride)				\
+  __asm__ volatile\
+  ("{t2rpntlvwz1\t(%0,%1,1), %%tmm"#dst"|t2rpntlvwz1\t%%tmm"#dst", [%0+%1*1]}"  \
+  :: "r" ((const void*)(base)), "r" ((long)(stride)))
+
+#define _tile_2rpntlvwz1t1_internal(dst,base,stride)				\
+  __asm__ volatile\
+  ("{t2rpntlvwz1t1\t(%0,%1,1), %%tmm"#dst"|t2rpntlvwz1t1\t%%tmm"#dst", [%0+%1*1]}" \
+  :: "r" ((const void*)(base)), "r" ((long)(stride)))
+
+#define _tile_transposed(dst,src)						\
+  _tile_transposed_internal (dst, src)
+
+#define _tile_2rpntlvwz0(dst,base,stride)					\
+  _tile_2rpntlvwz0_internal (dst, base, stride)
+
+#define _tile_2rpntlvwz0t1(dst,base,stride)					\
+  _tile_2rpntlvwz0t1_internal (dst, base, stride)
+
+#define _tile_2rpntlvwz1(dst,base,stride)					\
+  _tile_2rpntlvwz1_internal (dst, base, stride)
+
+#define _tile_2rpntlvwz1t1(dst,base,stride)					\
+  _tile_2rpntlvwz1t1_internal (dst, base, stride)
+
+#if !defined(__AMX_BF16__)
+#pragma GCC push_options
+#pragma GCC target("amx-bf16")
+#define __DISABLE_AMX_BF16__
+#endif /* __AMX_BF16__ */
+
+#define _tile_tdpbf16ps_internal(src1_dst,src2,src3)				\
+  __asm__ volatile\
+  ("{ttdpbf16ps\t%%tmm"#src3", %%tmm"#src2", %%tmm"#src1_dst"|ttdpbf16ps\t%%tmm"#src1_dst", %%tmm"#src2", %%tmm"#src3"}" ::)
+
+#define _tile_tdpbf16ps(src1_dst,src2,src3)					\
+  _tile_tdpbf16ps_internal (src1_dst, src2, src3)
+
+#ifdef __DISABLE_AMX_BF16__
+#undef __DISABLE_AMX_BF16__
+#pragma GCC pop_options
+#endif /* __DISABLE_AMX_BF16__ */
+
+#if !defined(__AMX_FP16__)
+#pragma GCC push_options
+#pragma GCC target("amx-fp16")
+#define __DISABLE_AMX_FP16__
+#endif /* __AMX_FP16__ */
+
+#define _tile_tdpfp16ps_internal(src1_dst,src2,src3)				\
+  __asm__ volatile\
+  ("{ttdpfp16ps\t%%tmm"#src3", %%tmm"#src2", %%tmm"#src1_dst"|ttdpfp16ps\t%%tmm"#src1_dst", %%tmm"#src2", %%tmm"#src3"}" ::)
+
+#define _tile_tdpfp16ps(src1_dst,src2,src3)					\
+  _tile_tdpfp16ps_internal (src1_dst, src2, src3)
+
+#ifdef __DISABLE_AMX_FP16__
+#undef __DISABLE_AMX_FP16__
+#pragma GCC pop_options
+#endif /* __DISABLE_AMX_FP16__ */
+
+#if !defined(__AMX_COMPLEX__)
+#pragma GCC push_options
+#pragma GCC target("amx-complex")
+#define __DISABLE_AMX_COMPLEX__
+#endif /* __AMX_COMPLEX__ */
+
+#define _tile_conjtcmmimfp16ps_internal(src1_dst,src2,src3)			\
+  __asm__ volatile\
+  ("{tconjtcmmimfp16ps\t%%tmm"#src3", %%tmm"#src2", %%tmm"#src1_dst"|tconjtcmmimfp16ps\t%%tmm"#src1_dst", %%tmm"#src2", %%tmm"#src3"}" ::)
+
+#define _tile_conjtfp16_internal(dst,src)					\
+  __asm__ volatile\
+  ("{tconjtfp16\t%%tmm"#src", %%tmm"#dst"|tconjtfp16\t%%tmm"#dst", %%tmm"#src"}" ::)
+
+#define _tile_tcmmimfp16ps_internal(src1_dst,src2,src3)				\
+  __asm__ volatile\
+  ("{ttcmmimfp16ps\t%%tmm"#src3", %%tmm"#src2", %%tmm"#src1_dst"|ttcmmimfp16ps\t%%tmm"#src1_dst", %%tmm"#src2", %%tmm"#src3"}" ::)
+
+#define _tile_tcmmrlfp16ps_internal(src1_dst,src2,src3)				\
+  __asm__ volatile\
+  ("{ttcmmrlfp16ps\t%%tmm"#src3", %%tmm"#src2", %%tmm"#src1_dst"|ttcmmrlfp16ps\t%%tmm"#src1_dst", %%tmm"#src2", %%tmm"#src3"}" ::)
+
+#define _tile_conjtcmmimfp16ps(src1_dst,src2,src3)				\
+  _tile_conjtcmmimfp16ps_internal (src1_dst, src2, src3)
+
+#define _tile_conjtfp16(dst,src)						\
+  _tile_conjtfp16_internal (dst, src)
+
+#define _tile_tcmmimfp16ps(src1_dst,src2,src3)					\
+  _tile_tcmmimfp16ps_internal (src1_dst, src2, src3)
+
+#define _tile_tcmmrlfp16ps(src1_dst,src2,src3)					\
+  _tile_tcmmrlfp16ps_internal (src1_dst, src2, src3)
+
+#ifdef __DISABLE_AMX_COMPLEX__
+#undef __DISABLE_AMX_COMPLEX__
+#pragma GCC pop_options
+#endif /* __DISABLE_AMX_COMPLEX__ */
+
+#if !defined(__AMX_TF32__)
+#pragma GCC push_options
+#pragma GCC target("amx-tf32")
+#define __DISABLE_AMX_TF32__
+#endif /* __AMX_TF32__ */
+
+#define _tile_tmmultf32ps_internal(src1_dst,src2,src3)				\
+  __asm__ volatile\
+  ("{ttmmultf32ps\t%%tmm"#src3", %%tmm"#src2", %%tmm"#src1_dst"|ttmmultf32ps\t%%tmm"#src1_dst", %%tmm"#src2", %%tmm"#src3"}" ::)
+
+#define _tile_tmmultf32ps(src1_dst,src2,src3)					\
+  _tile_tmmultf32ps_internal (src1_dst, src2, src3)
+
+#ifdef __DISABLE_AMX_TF32__
+#undef __DISABLE_AMX_TF32__
+#pragma GCC pop_options
+#endif /* __DISABLE_AMX_TF32__ */
+
+#endif /* __x86_64__ */
+
+#ifdef __DISABLE_AMX_TRANSPOSE__
+#undef __DISABLE_AMX_TRANSPOSE__
+#pragma GCC pop_options
+#endif /* __DISABLE_AMX_TRANSPOSE__ */
+
+#endif /* _AMXTRANSPOSEINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/athlon.md b/gcc/config/i386/athlon.md
index fb10634..82f8ea6 100644
--- a/gcc/config/i386/athlon.md
+++ b/gcc/config/i386/athlon.md
@@ -1,4 +1,4 @@
-;; Copyright (C) 2002-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2002-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/i386/atom.md b/gcc/config/i386/atom.md
index dea8007..ad04cae 100644
--- a/gcc/config/i386/atom.md
+++ b/gcc/config/i386/atom.md
@@ -1,5 +1,5 @@
 ;; Atom Scheduling
-;; Copyright (C) 2009-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2009-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/i386/att.h b/gcc/config/i386/att.h
index afd88fd..a076bc2 100644
--- a/gcc/config/i386/att.h
+++ b/gcc/config/i386/att.h
@@ -1,5 +1,5 @@
 /* Definitions for AT&T assembler syntax for the Intel 80386.
-   Copyright (C) 1988-2024 Free Software Foundation, Inc.
+   Copyright (C) 1988-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/i386/avx10_2-512bf16intrin.h b/gcc/config/i386/avx10_2-512bf16intrin.h
deleted file mode 100644
index 4e7f8eb..0000000
--- a/gcc/config/i386/avx10_2-512bf16intrin.h
+++ /dev/null
@@ -1,681 +0,0 @@
-/* Copyright (C) 2024 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef _IMMINTRIN_H_INCLUDED
-#error "Never use <avx10_2-512bf16intrin.h> directly; include <immintrin.h> instead."
-#endif
-
-#ifndef _AVX10_2_512BF16INTRIN_H_INCLUDED
-#define _AVX10_2_512BF16INTRIN_H_INCLUDED
-
-#if !defined (__AVX10_2_512__)
-#pragma GCC push_options
-#pragma GCC target("avx10.2-512")
-#define __DISABLE_AVX10_2_512__
-#endif /* __AVX10_2_512__ */
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_addne_pbh (__m512bh __A, __m512bh __B)
-{
-  return (__m512bh) __builtin_ia32_addnepbf16512 (__A, __B);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_addne_pbh (__m512bh __W, __mmask32 __U,
-		       __m512bh __A, __m512bh __B)
-{
-  return (__m512bh)
-    __builtin_ia32_addnepbf16512_mask (__A, __B, __W, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_addne_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
-{
-  return (__m512bh)
-    __builtin_ia32_addnepbf16512_mask (__A, __B,
-				       (__v32bf) _mm512_setzero_si512 (),
-				        __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_subne_pbh (__m512bh __A, __m512bh __B)
-{
-  return (__m512bh) __builtin_ia32_subnepbf16512 (__A, __B);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_subne_pbh (__m512bh __W, __mmask32 __U,
-		       __m512bh __A, __m512bh __B)
-{
-  return (__m512bh)
-    __builtin_ia32_subnepbf16512_mask (__A, __B, __W, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_subne_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
-{
-  return (__m512bh)
-    __builtin_ia32_subnepbf16512_mask (__A, __B,
-				       (__v32bf) _mm512_setzero_si512 (),
-				        __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mulne_pbh (__m512bh __A, __m512bh __B)
-{
-  return (__m512bh) __builtin_ia32_mulnepbf16512 (__A, __B);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_mulne_pbh (__m512bh __W, __mmask32 __U,
-		       __m512bh __A, __m512bh __B)
-{
-  return (__m512bh)
-    __builtin_ia32_mulnepbf16512_mask (__A, __B, __W, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_mulne_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
-{
-  return (__m512bh)
-    __builtin_ia32_mulnepbf16512_mask (__A, __B,
-				       (__v32bf) _mm512_setzero_si512 (),
-				        __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_divne_pbh (__m512bh __A, __m512bh __B)
-{
-  return (__m512bh) __builtin_ia32_divnepbf16512 (__A, __B);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_divne_pbh (__m512bh __W, __mmask32 __U,
-		       __m512bh __A, __m512bh __B)
-{
-  return (__m512bh)
-    __builtin_ia32_divnepbf16512_mask (__A, __B, __W, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_divne_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
-{
-  return (__m512bh)
-    __builtin_ia32_divnepbf16512_mask (__A, __B,
-				       (__v32bf) _mm512_setzero_si512 (),
-				        __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_max_pbh (__m512bh __A, __m512bh __B)
-{
-  return (__m512bh) __builtin_ia32_maxpbf16512 (__A, __B);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_max_pbh (__m512bh __W, __mmask32 __U,
-		       __m512bh __A, __m512bh __B)
-{
-  return (__m512bh)
-    __builtin_ia32_maxpbf16512_mask (__A, __B, __W, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_max_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
-{
-  return (__m512bh)
-    __builtin_ia32_maxpbf16512_mask (__A, __B,
-				     (__v32bf) _mm512_setzero_si512 (),
-				     __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_min_pbh (__m512bh __A, __m512bh __B)
-{
-  return (__m512bh) __builtin_ia32_minpbf16512 (__A, __B);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_min_pbh (__m512bh __W, __mmask32 __U,
-		       __m512bh __A, __m512bh __B)
-{
-  return (__m512bh)
-    __builtin_ia32_minpbf16512_mask (__A, __B, __W, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_min_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
-{
-  return (__m512bh)
-    __builtin_ia32_minpbf16512_mask (__A, __B,
-				     (__v32bf) _mm512_setzero_si512 (),
-				     __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_scalef_pbh (__m512bh __A, __m512bh __B)
-{
-  return (__m512bh) __builtin_ia32_scalefpbf16512 (__A, __B);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_scalef_pbh (__m512bh __W, __mmask32 __U,
-			  __m512bh __A, __m512bh __B)
-{
-  return (__m512bh)
-    __builtin_ia32_scalefpbf16512_mask (__A, __B, __W, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_scalef_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
-{
-  return (__m512bh)
-    __builtin_ia32_scalefpbf16512_mask (__A, __B,
-					(__v32bf) _mm512_setzero_si512 (),
-					__U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_fmaddne_pbh (__m512bh __A, __m512bh __B, __m512bh __C)
-{
-  return (__m512bh)
-    __builtin_ia32_fmaddnepbf16512_mask (__A, __B, __C, (__mmask32) -1);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_fmaddne_pbh (__m512bh __A, __mmask32 __U,
-			 __m512bh __B, __m512bh __C)
-{
-  return (__m512bh)
-    __builtin_ia32_fmaddnepbf16512_mask (__A, __B, __C, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask3_fmaddne_pbh (__m512bh __A, __m512bh __B,
-			  __m512bh __C, __mmask32 __U)
-{
-  return (__m512bh)
-    __builtin_ia32_fmaddnepbf16512_mask3 (__A, __B, __C, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_fmaddne_pbh (__mmask32 __U, __m512bh __A,
-			  __m512bh __B, __m512bh __C)
-{
-  return (__m512bh)
-    __builtin_ia32_fmaddnepbf16512_maskz (__A, __B, __C, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_fmsubne_pbh (__m512bh __A, __m512bh __B, __m512bh __C)
-{
-  return (__m512bh)
-    __builtin_ia32_fmsubnepbf16512_mask (__A, __B, __C, (__mmask32) -1);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_fmsubne_pbh (__m512bh __A, __mmask32 __U,
-			 __m512bh __B, __m512bh __C)
-{
-  return (__m512bh)
-    __builtin_ia32_fmsubnepbf16512_mask (__A, __B, __C, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask3_fmsubne_pbh (__m512bh __A, __m512bh __B,
-			  __m512bh __C, __mmask32 __U)
-{
-  return (__m512bh)
-    __builtin_ia32_fmsubnepbf16512_mask3 (__A, __B, __C, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_fmsubne_pbh (__mmask32 __U, __m512bh __A,
-			  __m512bh __B, __m512bh __C)
-{
-  return (__m512bh)
-    __builtin_ia32_fmsubnepbf16512_maskz (__A, __B, __C, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_fnmaddne_pbh (__m512bh __A, __m512bh __B, __m512bh __C)
-{
-  return (__m512bh)
-    __builtin_ia32_fnmaddnepbf16512_mask (__A, __B, __C, (__mmask32) -1);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_fnmaddne_pbh (__m512bh __A, __mmask32 __U,
-			  __m512bh __B, __m512bh __C)
-{
-  return (__m512bh)
-    __builtin_ia32_fnmaddnepbf16512_mask (__A, __B, __C, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask3_fnmaddne_pbh (__m512bh __A, __m512bh __B,
-			   __m512bh __C, __mmask32 __U)
-{
-  return (__m512bh)
-    __builtin_ia32_fnmaddnepbf16512_mask3 (__A, __B, __C, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_fnmaddne_pbh (__mmask32 __U, __m512bh __A,
-			   __m512bh __B, __m512bh __C)
-{
-  return (__m512bh)
-    __builtin_ia32_fnmaddnepbf16512_maskz (__A, __B, __C, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_fnmsubne_pbh (__m512bh __A, __m512bh __B, __m512bh __C)
-{
-  return (__m512bh)
-    __builtin_ia32_fnmsubnepbf16512_mask (__A, __B, __C, (__mmask32) -1);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_fnmsubne_pbh (__m512bh __A, __mmask32 __U,
-			  __m512bh __B, __m512bh __C)
-{
-  return (__m512bh)
-    __builtin_ia32_fnmsubnepbf16512_mask (__A, __B, __C, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask3_fnmsubne_pbh (__m512bh __A, __m512bh __B,
-			   __m512bh __C, __mmask32 __U)
-{
-  return (__m512bh)
-    __builtin_ia32_fnmsubnepbf16512_mask3 (__A, __B, __C, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_fnmsubne_pbh (__mmask32 __U, __m512bh __A,
-			   __m512bh __B, __m512bh __C)
-{
-  return (__m512bh)
-    __builtin_ia32_fnmsubnepbf16512_maskz (__A, __B, __C, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_rsqrt_pbh (__m512bh __A)
-{
-  return (__m512bh)
-    __builtin_ia32_rsqrtpbf16512_mask (__A,
-				       (__v32bf) _mm512_setzero_si512 (),
-				       (__mmask32) -1);
-
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_rsqrt_pbh (__m512bh __W, __mmask32 __U, __m512bh __A)
-{
-  return (__m512bh)
-    __builtin_ia32_rsqrtpbf16512_mask (__A,  __W,  __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_rsqrt_pbh (__mmask32 __U, __m512bh __A)
-{
-  return (__m512bh)
-    __builtin_ia32_rsqrtpbf16512_mask (__A,
-				       (__v32bf) _mm512_setzero_si512 (),
-				       __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_sqrtne_pbh (__m512bh __A)
-{
-  return (__m512bh)
-    __builtin_ia32_sqrtnepbf16512_mask (__A,
-				        (__v32bf) _mm512_setzero_si512 (),
-					(__mmask32) -1);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_sqrtne_pbh (__m512bh __W, __mmask32 __U, __m512bh __A)
-{
-  return (__m512bh)
-    __builtin_ia32_sqrtnepbf16512_mask (__A,  __W,  __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_sqrtne_pbh (__mmask32 __U, __m512bh __A)
-{
-  return (__m512bh)
-    __builtin_ia32_sqrtnepbf16512_mask (__A,
-					(__v32bf) _mm512_setzero_si512 (),
-					__U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_rcp_pbh (__m512bh __A)
-{
-  return (__m512bh)
-    __builtin_ia32_rcppbf16512_mask (__A,
-				     (__v32bf) _mm512_setzero_si512 (),
-				     (__mmask32) -1);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_rcp_pbh (__m512bh __W, __mmask32 __U, __m512bh __A)
-{
-  return (__m512bh)
-    __builtin_ia32_rcppbf16512_mask (__A,  __W,  __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_rcp_pbh (__mmask32 __U, __m512bh __A)
-{
-  return (__m512bh)
-    __builtin_ia32_rcppbf16512_mask (__A,
-				     (__v32bf) _mm512_setzero_si512 (),
-				     __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_getexp_pbh (__m512bh __A)
-{
-  return (__m512bh)
-    __builtin_ia32_getexppbf16512_mask (__A,
-					(__v32bf) _mm512_setzero_si512 (),
-					(__mmask32) -1);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_getexp_pbh (__m512bh __W, __mmask32 __U, __m512bh __A)
-{
-  return (__m512bh) __builtin_ia32_getexppbf16512_mask (__A,  __W,  __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_getexp_pbh (__mmask32 __U, __m512bh __A)
-{
-  return (__m512bh)
-    __builtin_ia32_getexppbf16512_mask (__A,
-					(__v32bf) _mm512_setzero_si512 (),
-					__U);
-}
-
-/* Intrinsics vrndscalepbf16.  */
-#ifdef __OPTIMIZE__
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_roundscalene_pbh (__m512bh __A, int B)
-{
-  return (__m512bh)
-    __builtin_ia32_rndscalenepbf16512_mask (__A, B,
-					    (__v32bf) _mm512_setzero_si512 (),
-					    (__mmask32) -1);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_roundscalene_pbh (__m512bh __W, __mmask32 __U, __m512bh __A, int B)
-{
-  return (__m512bh)
-    __builtin_ia32_rndscalenepbf16512_mask (__A, B, __W,  __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_roundscalene_pbh (__mmask32 __U, __m512bh __A, int B)
-{
-  return (__m512bh)
-    __builtin_ia32_rndscalenepbf16512_mask (__A, B,
-					    (__v32bf) _mm512_setzero_si512 (),
-					    __U);
-}
-
-#else
-#define _mm512_roundscalene_pbh(A, B)					      \
-  (__builtin_ia32_rndscalenepbf16512_mask ((A), (B),			      \
-					   (__v32bf) _mm512_setzero_si512 (), \
-					   (__mmask32) -1))
-
-#define _mm512_mask_roundscalene_pbh(A, B, C, D)	    		      \
-  (__builtin_ia32_rndscalenepbf16512_mask ((C), (D), (A), (B)))
-
-#define _mm512_maskz_roundscalene_pbh(A, B, C)				      \
-  (__builtin_ia32_rndscalenepbf16512_mask ((B), (C),			      \
-					   (__v32bf) _mm512_setzero_si512 (), \
-					   (A)))
-
-#endif /* __OPTIMIZE__ */
-
-/* Intrinsics vreducepbf16.  */
-#ifdef __OPTIMIZE__
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_reducene_pbh (__m512bh __A, int B)
-{
-  return (__m512bh)
-    __builtin_ia32_reducenepbf16512_mask (__A, B,
-					  (__v32bf) _mm512_setzero_si512 (),
-					  (__mmask32) -1);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_reducene_pbh (__m512bh __W, __mmask32 __U,
-			  __m512bh __A, int B)
-{
-  return (__m512bh)
-    __builtin_ia32_reducenepbf16512_mask (__A, B, __W,  __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_reducene_pbh (__mmask32 __U, __m512bh __A, int B)
-{
-  return (__m512bh)
-    __builtin_ia32_reducenepbf16512_mask (__A, B,
-					  (__v32bf) _mm512_setzero_si512 (),
-					  __U);
-}
-
-#else
-#define _mm512_reducene_pbh(A, B)					      \
-  (__builtin_ia32_reducenepbf16512_mask ((A), (B),			      \
-					 (__v32bf) _mm512_setzero_si512 (),   \
-					 (__mmask32) -1))
-
-#define _mm512_mask_reducene_pbh(A, B, C, D)				      \
-  (__builtin_ia32_reducenepbf16512_mask ((C), (D), (A), (B)))
-
-#define _mm512_maskz_reducene_pbh(A, B, C)				      \
-  (__builtin_ia32_reducenepbf16512_mask ((B), (C),			      \
-					 (__v32bf) _mm512_setzero_si512 (),   \
-					 (A)))
-
-#endif /* __OPTIMIZE__ */
-
-/* Intrinsics vgetmantpbf16.  */
-#ifdef __OPTIMIZE__
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_getmant_pbh (__m512bh __A, _MM_MANTISSA_NORM_ENUM __B,
-		    _MM_MANTISSA_SIGN_ENUM __C)
-{
-  return (__m512bh)
-    __builtin_ia32_getmantpbf16512_mask (__A, (int) (__C << 2) | __B,
-					 (__v32bf) _mm512_setzero_si512 (),
-					 (__mmask32) -1);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_getmant_pbh (__m512bh __W, __mmask32 __U, __m512bh __A,
-			 _MM_MANTISSA_NORM_ENUM __B,
-			 _MM_MANTISSA_SIGN_ENUM __C)
-{
-  return (__m512bh)
-    __builtin_ia32_getmantpbf16512_mask (__A, (int) (__C << 2) | __B,
-					 __W, __U);
-}
-
-extern __inline__ __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_getmant_pbh (__mmask32 __U, __m512bh __A,
-			  _MM_MANTISSA_NORM_ENUM __B,
-			  _MM_MANTISSA_SIGN_ENUM __C)
-{
-  return (__m512bh)
-    __builtin_ia32_getmantpbf16512_mask (__A, (int) (__C << 2) | __B,
-					 (__v32bf) _mm512_setzero_si512 (),
-					 __U);
-}
-
-#else
-#define _mm512_getmant_pbh(A, B, C)					      \
-  (__builtin_ia32_getmantpbf16512_mask ((A), (int)(((C)<<2) | (B)),	      \
-					  (__v32bf) _mm512_setzero_si512 (),  \
-					  (__mmask32) -1))
-
-#define _mm512_mask_getmant_pbh(A, B, C, D, E)				      \
-  (__builtin_ia32_getmantpbf16512_mask ((C), (int)(((D)<<2) | (E)), (A), (B)))
-
-#define _mm512_maskz_getmant_pbh(A, B, C, D)				      \
-  (__builtin_ia32_getmantpbf16512_mask ((B), (int)(((C)<<2) | (D)),	      \
-					  (__v32bf) _mm512_setzero_si512 (),  \
-					  (A)))
-
-#endif /* __OPTIMIZE__ */
-
-/* Intrinsics vfpclasspbf16.  */
-#ifdef __OPTIMIZE__
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_fpclass_pbh_mask (__mmask32 __U, __m512bh __A,
-			      const int __imm)
-{
-  return (__mmask32)
-    __builtin_ia32_fpclasspbf16512_mask (__A, __imm, __U);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_fpclass_pbh_mask (__m512bh __A, const int __imm)
-{
-  return (__mmask32)
-    __builtin_ia32_fpclasspbf16512_mask (__A, __imm,
-					 (__mmask32) -1);
-}
-
-#else
-#define _mm512_mask_fpclass_pbh_mask(U, X, C)				   \
-  ((__mmask32) __builtin_ia32_fpclasspbf16512_mask (			   \
-      (__v32bf) (__m512bh) (X), (int) (C), (__mmask32) (U)))
-
-#define _mm512_fpclass_pbh_mask(X, C)					   \
-  ((__mmask32) __builtin_ia32_fpclasspbf16512_mask (			   \
-      (__v32bf) (__m512bh) (X), (int) (C), (__mmask32) (-1)))
-#endif /* __OPIMTIZE__ */
-
-
-/* Intrinsics vcmppbf16.  */
-#ifdef __OPTIMIZE__
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cmp_pbh_mask (__mmask32 __U, __m512bh __A, __m512bh __B,
-			  const int __imm)
-{
-  return (__mmask32)
-    __builtin_ia32_cmppbf16512_mask (__A, __B, __imm, __U);
-}
-
-extern __inline __mmask32
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cmp_pbh_mask (__m512bh __A, __m512bh __B, const int __imm)
-{
-  return (__mmask32)
-    __builtin_ia32_cmppbf16512_mask (__A, __B, __imm,
-				     (__mmask32) -1);
-}
-
-#else
-#define _mm512_mask_cmp_pbh_mask(A, B, C, D)				\
-  ((__mmask32) __builtin_ia32_cmppbf16512_mask ((B), (C), (D), (A)))
-
-#define _mm512_cmp_pbh_mask(A, B, C)					\
-  ((__mmask32) __builtin_ia32_cmppbf16512_mask ((A), (B), (C), (-1)))
-
-#endif /* __OPIMTIZE__ */
-
-#ifdef __DISABLE_AVX10_2_512__
-#undef __DISABLE_AVX10_2_512__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX10_2_512__ */
-
-#endif /* _AVX10_2_512BF16INTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx10_2-512convertintrin.h b/gcc/config/i386/avx10_2-512convertintrin.h
deleted file mode 100644
index dfbdfc3..0000000
--- a/gcc/config/i386/avx10_2-512convertintrin.h
+++ /dev/null
@@ -1,572 +0,0 @@
-/* Copyright (C) 2024 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef _IMMINTRIN_H_INCLUDED
-#error "Never use <avx10_2-512convertintrin.h> directly; include <immintrin.h> instead."
-#endif // _IMMINTRIN_H_INCLUDED
-
-#ifndef __AVX10_2_512CONVERTINTRIN_H_INCLUDED
-#define __AVX10_2_512CONVERTINTRIN_H_INCLUDED
-
-#ifndef __AVX10_2_512__
-#pragma GCC push_options
-#pragma GCC target("avx10.2-512")
-#define __DISABLE_AVX10_2_512__
-#endif /* __AVX10_2_512__ */
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtx2ps_ph (__m512 __A, __m512 __B)
-{
-  return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
-							     (__v16sf) __B,
-							     (__v32hf)
-							     _mm512_setzero_ph (),
-							     (__mmask32) -1,
-							     _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtx2ps_ph (__m512h __W, __mmask32 __U, __m512 __A,
-			  __m512 __B)
-{
-  return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
-							     (__v16sf) __B,
-							     (__v32hf) __W,
-							     (__mmask32) __U,
-							     _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtx2ps_ph (__mmask32 __U, __m512 __A, __m512 __B)
-{
-  return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
-							     (__v16sf) __B,
-							     (__v32hf)
-							     _mm512_setzero_ph (),
-							     (__mmask32) __U,
-							     _MM_FROUND_CUR_DIRECTION);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtx_round2ps_ph (__m512 __A, __m512 __B, const int __R)
-{
-  return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
-							    (__v16sf) __B,
-							    (__v32hf)
-							    _mm512_setzero_ph (),
-							    (__mmask32) -1,
-							    __R);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtx_round2ps_ph (__m512h __W, __mmask32 __U, __m512 __A,
-			     __m512 __B, const int __R)
-{
-  return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
-							    (__v16sf) __B,
-							    (__v32hf) __W,
-							    (__mmask32) __U,
-							    __R);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtx_round2ps_ph (__mmask32 __U, __m512 __A,
-			      __m512 __B, const int __R)
-{
-  return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
-							    (__v16sf) __B,
-							    (__v32hf)
-							    _mm512_setzero_ph (),
-							    (__mmask32) __U,
-							    __R);
-}
-
-#else
-#define _mm512_cvtx_round2ps_ph(A, B, R) \
-  ((__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) (A), \
-						       (__v16sf) (B), \
-						       (__v32hf) \
-						       (_mm512_setzero_ph ()), \
-						       (__mmask32) (-1), \
-						       (R)))
-#define _mm512_mask_cvtx_round2ps_ph(W, U, A, B, R) \
-  ((__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) (A), \
-						       (__v16sf) (B), \
-						       (__v32hf) (W), \
-						       (__mmask32) (U), \
-						       (R)))
-#define _mm512_maskz_cvtx_round2ps_ph(U, A, B, R) \
-  ((__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) (A), \
-						       (__v16sf) (B), \
-						       (__v32hf) \
-						       (_mm512_setzero_ph ()), \
-						       (__mmask32) (U), \
-						       (R)))
-#endif  /* __OPTIMIZE__  */
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtbiasph_pbf8 (__m512i __A, __m512h __B)
-{
-  return (__m256i) __builtin_ia32_vcvtbiasph2bf8512_mask ((__v64qi) __A,
-							  (__v32hf) __B,
-							  (__v32qi)(__m256i)
-							  _mm256_undefined_si256 (),
-							  (__mmask32) -1);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtbiasph_pbf8 (__m256i __W, __mmask32 __U,
-			    __m512i __A, __m512h __B)
-{
-  return (__m256i) __builtin_ia32_vcvtbiasph2bf8512_mask ((__v64qi) __A,
-							  (__v32hf) __B,
-							  (__v32qi)(__m256i) __W,
-							  (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtbiasph_pbf8 (__mmask32 __U, __m512i __A, __m512h __B)
-{
-  return (__m256i) __builtin_ia32_vcvtbiasph2bf8512_mask ((__v64qi) __A,
-							  (__v32hf) __B,
-							  (__v32qi)(__m256i)
-							  _mm256_setzero_si256 (),
-							  (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtbiassph_pbf8 (__m512i __A, __m512h __B)
-{
-  return (__m256i) __builtin_ia32_vcvtbiasph2bf8s512_mask ((__v64qi) __A,
-							   (__v32hf) __B,
-							   (__v32qi)(__m256i)
-							   _mm256_undefined_si256 (),
-							   (__mmask32) -1);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtbiassph_pbf8 (__m256i __W, __mmask32 __U,
-			     __m512i __A, __m512h __B)
-{
-  return (__m256i) __builtin_ia32_vcvtbiasph2bf8s512_mask ((__v64qi) __A,
-							   (__v32hf) __B,
-							   (__v32qi)(__m256i) __W,
-							   (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtbiassph_pbf8 (__mmask32 __U, __m512i __A, __m512h __B)
-{
-  return (__m256i) __builtin_ia32_vcvtbiasph2bf8s512_mask ((__v64qi) __A,
-							   (__v32hf) __B,
-							   (__v32qi)(__m256i)
-							   _mm256_setzero_si256 (),
-							   (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtbiasph_phf8 (__m512i __A, __m512h __B)
-{
-  return (__m256i) __builtin_ia32_vcvtbiasph2hf8512_mask ((__v64qi) __A,
-							  (__v32hf) __B,
-							  (__v32qi)(__m256i)
-					 		  _mm256_undefined_si256 (),
-							  (__mmask32) -1);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtbiasph_phf8 (__m256i __W, __mmask32 __U, __m512i __A,
-				__m512h __B)
-{
-  return (__m256i) __builtin_ia32_vcvtbiasph2hf8512_mask ((__v64qi) __A,
-							  (__v32hf) __B,
-							  (__v32qi)(__m256i) __W,
-							  (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtbiasph_phf8 (__mmask32 __U, __m512i __A, __m512h __B)
-{
-  return (__m256i) __builtin_ia32_vcvtbiasph2hf8512_mask ((__v64qi) __A,
-							  (__v32hf) __B,
-							  (__v32qi)(__m256i)
-							  _mm256_setzero_si256 (),
-							  (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtbiassph_phf8 (__m512i __A, __m512h __B)
-{
-  return (__m256i) __builtin_ia32_vcvtbiasph2hf8s512_mask ((__v64qi) __A,
-							   (__v32hf) __B,
-							   (__v32qi)(__m256i)
-							   _mm256_undefined_si256 (),
-							   (__mmask32) -1);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtbiassph_phf8 (__m256i __W, __mmask32 __U,
-			     __m512i __A, __m512h __B)
-{
-  return (__m256i) __builtin_ia32_vcvtbiasph2hf8s512_mask ((__v64qi) __A,
-							   (__v32hf) __B,
-							   (__v32qi)(__m256i) __W,
-							   (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtbiassph_phf8 (__mmask32 __U, __m512i __A, __m512h __B)
-{
-  return (__m256i) __builtin_ia32_vcvtbiasph2hf8s512_mask ((__v64qi) __A,
-							   (__v32hf) __B,
-							   (__v32qi)(__m256i)
-							   _mm256_setzero_si256 (),
-							   (__mmask32) __U);
-}
-
-extern __inline__ __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtne2ph_pbf8 (__m512h __A, __m512h __B)
-{
-  return (__m512i) __builtin_ia32_vcvtne2ph2bf8512_mask ((__v32hf) __A,
-							 (__v32hf) __B,
-							 (__v64qi)
-							 _mm512_setzero_si512 (),
-							 (__mmask64) -1);
-}
-
-extern __inline__ __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtne2ph_pbf8 (__m512i __W, __mmask64 __U, 
-			   __m512h __A, __m512h __B)
-{
-  return (__m512i) __builtin_ia32_vcvtne2ph2bf8512_mask ((__v32hf) __A,
-							 (__v32hf) __B,
-							 (__v64qi) __W,
-							 (__mmask64) __U);
-}
-
-extern __inline__ __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtne2ph_pbf8 (__mmask64 __U,  __m512h __A, __m512h __B)
-{
-  return (__m512i) __builtin_ia32_vcvtne2ph2bf8512_mask ((__v32hf) __A,
-							 (__v32hf) __B,
-							 (__v64qi)
-							 _mm512_setzero_si512 (),
-							 (__mmask64) __U);
-}
-
-extern __inline__ __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtnes2ph_pbf8 (__m512h __A, __m512h __B)
-{
-  return (__m512i) __builtin_ia32_vcvtne2ph2bf8s512_mask ((__v32hf) __A,
-							  (__v32hf) __B,
-							  (__v64qi)
-							  _mm512_setzero_si512 (),
-							  (__mmask64) -1);
-}
-
-extern __inline__ __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtnes2ph_pbf8 (__m512i __W, __mmask64 __U,
-			    __m512h __A, __m512h __B)
-{
-  return (__m512i) __builtin_ia32_vcvtne2ph2bf8s512_mask ((__v32hf) __A,
-							  (__v32hf) __B,
-							  (__v64qi) __W,
-							  (__mmask64) __U);
-}
-
-extern __inline__ __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtnes2ph_pbf8 (__mmask64 __U, __m512h __A, __m512h __B)
-{
-  return (__m512i) __builtin_ia32_vcvtne2ph2bf8s512_mask ((__v32hf) __A,
-							  (__v32hf) __B,
-							  (__v64qi)
-							  _mm512_setzero_si512 (),
-							  (__mmask64) __U);
-}
-
-extern __inline__ __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtne2ph_phf8 (__m512h __A, __m512h __B)
-{
-  return (__m512i) __builtin_ia32_vcvtne2ph2hf8512_mask ((__v32hf) __A,
-							 (__v32hf) __B,
-							 (__v64qi)
-							 _mm512_setzero_si512 (),
-						 	 (__mmask64) -1);
-}
-
-extern __inline__ __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtne2ph_phf8 (__m512i __W, __mmask64 __U,
-			   __m512h __A, __m512h __B)
-{
-  return (__m512i) __builtin_ia32_vcvtne2ph2hf8512_mask ((__v32hf) __A,
-							 (__v32hf) __B,
-							 (__v64qi) __W,
-							 (__mmask64) __U);
-}
-
-extern __inline__ __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtne2ph_phf8 (__mmask64 __U, __m512h __A, __m512h __B)
-{
-  return (__m512i) __builtin_ia32_vcvtne2ph2hf8512_mask ((__v32hf) __A,
-							 (__v32hf) __B,
-							 (__v64qi)
-							 _mm512_setzero_si512 (),
-						 	 (__mmask64) __U);
-}
-
-extern __inline__ __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtnes2ph_phf8 (__m512h __A, __m512h __B)
-{
-  return (__m512i) __builtin_ia32_vcvtne2ph2hf8s512_mask ((__v32hf) __A,
-							  (__v32hf) __B,
-							  (__v64qi)
-							  _mm512_setzero_si512 (),
-							  (__mmask64) -1);
-}
-
-extern __inline__ __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtnes2ph_phf8 (__m512i __W, __mmask64 __U, 
-			    __m512h __A, __m512h __B)
-{
-  return (__m512i) __builtin_ia32_vcvtne2ph2hf8s512_mask ((__v32hf) __A,
-							  (__v32hf) __B,
-							  (__v64qi) __W,
-							  (__mmask64) __U);
-}
-
-extern __inline__ __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtnes2ph_phf8 (__mmask64 __U, __m512h __A, __m512h __B)
-{
-  return (__m512i) __builtin_ia32_vcvtne2ph2hf8s512_mask ((__v32hf) __A,
-							  (__v32hf) __B,
-							  (__v64qi)
-							  _mm512_setzero_si512 (),
-							  (__mmask64) __U);
-}
-
-extern __inline__ __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvthf8_ph (__m256i __A)
-{
-  return (__m512h) __builtin_ia32_vcvthf82ph512_mask ((__v32qi) __A,
-						      (__v32hf) (__m512h)
-						      _mm512_undefined_ph (),
-						      (__mmask32) -1);
-}
-
-extern __inline__ __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvthf8_ph (__m512h __W, __mmask32 __U, __m256i __A)
-{
-  return (__m512h) __builtin_ia32_vcvthf82ph512_mask ((__v32qi) __A,
-						      (__v32hf) (__m512h) __W,
-						      (__mmask32) __U);
-}
-
-extern __inline__ __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvthf8_ph (__mmask32 __U, __m256i __A)
-{
-  return (__m512h) __builtin_ia32_vcvthf82ph512_mask ((__v32qi) __A,
-						      (__v32hf) (__m512h)
-						      _mm512_setzero_ph (),
-						      (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtneph_pbf8 (__m512h __A)
-{
-  return (__m256i) __builtin_ia32_vcvtneph2bf8512_mask ((__v32hf) __A,
-							(__v32qi) (__m256i)
-							_mm256_undefined_si256 (),
-							(__mmask32) -1);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtneph_pbf8 (__m256i __W, __mmask32 __U, __m512h __A)
-{
-  return (__m256i) __builtin_ia32_vcvtneph2bf8512_mask ((__v32hf) __A,
-							(__v32qi) (__m256i) __W,
-							(__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtneph_pbf8 (__mmask32 __U, __m512h __A)
-{
-  return (__m256i) __builtin_ia32_vcvtneph2bf8512_mask ((__v32hf) __A,
-							(__v32qi) (__m256i)
-							_mm256_setzero_si256 (),
-							(__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtnesph_pbf8 (__m512h __A)
-{
-  return (__m256i) __builtin_ia32_vcvtneph2bf8s512_mask ((__v32hf) __A,
-							 (__v32qi) (__m256i)
-							 _mm256_undefined_si256 (),
-							 (__mmask32) -1);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtnesph_pbf8 (__m256i __W, __mmask32 __U, __m512h __A)
-{
-  return (__m256i) __builtin_ia32_vcvtneph2bf8s512_mask ((__v32hf) __A,
-							 (__v32qi) (__m256i) __W,
-							 (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtnesph_pbf8 (__mmask32 __U, __m512h __A)
-{
-  return (__m256i) __builtin_ia32_vcvtneph2bf8s512_mask ((__v32hf) __A,
-							 (__v32qi) (__m256i)
-							 _mm256_setzero_si256 (),
-							 (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtneph_phf8 (__m512h __A)
-{
-  return (__m256i) __builtin_ia32_vcvtneph2hf8512_mask ((__v32hf) __A,
-							(__v32qi) (__m256i)
-							_mm256_undefined_si256 (),
-							(__mmask32) -1);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtneph_phf8 (__m256i __W, __mmask32 __U, __m512h __A)
-{
-  return (__m256i) __builtin_ia32_vcvtneph2hf8512_mask ((__v32hf) __A,
-							(__v32qi)(__m256i) __W,
-							(__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtneph_phf8 (__mmask32 __U, __m512h __A)
-{
-  return (__m256i) __builtin_ia32_vcvtneph2hf8512_mask ((__v32hf) __A,
-							(__v32qi) (__m256i)
-							_mm256_setzero_si256 (),
-							(__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtnesph_phf8 (__m512h __A)
-{
-  return (__m256i) __builtin_ia32_vcvtneph2hf8s512_mask ((__v32hf) __A,
-							 (__v32qi) (__m256i)
-							 _mm256_undefined_si256 (),
-							 (__mmask32) -1);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtnesph_phf8 (__m256i __W, __mmask32 __U, __m512h __A)
-{
-  return (__m256i) __builtin_ia32_vcvtneph2hf8s512_mask ((__v32hf) __A,
-							 (__v32qi) (__m256i) __W,
-							 (__mmask32) __U);
-}
-
-extern __inline__ __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtnesph_phf8 (__mmask32 __U, __m512h __A)
-{
-  return (__m256i) __builtin_ia32_vcvtneph2hf8s512_mask ((__v32hf) __A,
-							 (__v32qi) (__m256i)
-							 _mm256_setzero_si256 (),
-							 (__mmask32) __U);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtpbf8_ph (__m256i __A)
-{
-  return (__m512h) _mm512_castsi512_ph ((__m512i) _mm512_slli_epi16 (
-	 (__m512i) _mm512_cvtepi8_epi16 (__A), 8));
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtpbf8_ph (__m512h __S, __mmask16 __U, __m256i __A)
-{
-  return (__m512h) _mm512_castsi512_ph ((__m512i) _mm512_mask_slli_epi16 (
-	 (__m512i) __S, __U, (__m512i) _mm512_cvtepi8_epi16 (__A), 8));
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtpbf8_ph (__mmask16 __U, __m256i __A)
-{
-  return (__m512h) _mm512_castsi512_ph ((__m512i) _mm512_slli_epi16 (
-	 (__m512i) _mm512_maskz_cvtepi8_epi16 (__U, __A), 8));
-}
-
-#ifdef __DISABLE_AVX10_2_512__
-#undef __DISABLE_AVX10_2_512__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX10_2_512__ */
-
-#endif /* __AVX10_2_512CONVERTINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx10_2-512mediaintrin.h b/gcc/config/i386/avx10_2-512mediaintrin.h
deleted file mode 100644
index e471c83..0000000
--- a/gcc/config/i386/avx10_2-512mediaintrin.h
+++ /dev/null
@@ -1,514 +0,0 @@
-/* Copyright (C) 2024 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#if !defined _IMMINTRIN_H_INCLUDED
-#error "Never use <avx10_2-512mediaintrin.h> directly; include <immintrin.h> instead."
-#endif
-
-#ifndef _AVX10_2_512MEDIAINTRIN_H_INCLUDED
-#define _AVX10_2_512MEDIAINTRIN_H_INCLUDED
-
-#if !defined(__AVX10_2_512__)
-#pragma GCC push_options
-#pragma GCC target("avx10.2-512")
-#define __DISABLE_AVX10_2_512__
-#endif /* __AVX10_2_512__ */
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpbssd_epi32 (__m512i __W, __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpbssd512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpbssd_epi32 (__m512i __W, __mmask16 __U,
-			  __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpbssd_v16si_mask ((__v16si) __W,
-					(__v16si) __A,
-					(__v16si) __B,
-					(__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpbssd_epi32 (__mmask16 __U, __m512i __W,
-			   __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpbssd_v16si_maskz ((__v16si) __W,
-					 (__v16si) __A,
-					 (__v16si) __B,
-					 (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpbssds_epi32 (__m512i __W, __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpbssds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpbssds_epi32 (__m512i __W, __mmask16 __U,
-			   __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpbssds_v16si_mask ((__v16si) __W,
-					 (__v16si) __A,
-					 (__v16si) __B,
-					 (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpbssds_epi32 (__mmask16 __U, __m512i __W,
-			    __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpbssds_v16si_maskz ((__v16si) __W,
-					  (__v16si) __A,
-					  (__v16si) __B,
-					  (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpbsud_epi32 (__m512i __W, __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpbsud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpbsud_epi32 (__m512i __W, __mmask16 __U,
-			  __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpbsud_v16si_mask ((__v16si) __W,
-					(__v16si) __A,
-					(__v16si) __B,
-					(__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpbsud_epi32 (__mmask16 __U, __m512i __W,
-			   __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpbsud_v16si_maskz ((__v16si) __W,
-					 (__v16si) __A,
-					 (__v16si) __B,
-					 (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpbsuds_epi32 (__m512i __W, __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpbsuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpbsuds_epi32 (__m512i __W, __mmask16 __U,
-			   __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpbsuds_v16si_mask ((__v16si) __W,
-					 (__v16si) __A,
-					 (__v16si) __B,
-					 (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpbsuds_epi32 (__mmask16 __U, __m512i __W,
-			    __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpbsuds_v16si_maskz ((__v16si) __W,
-					  (__v16si) __A,
-					  (__v16si) __B,
-					  (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpbuud_epi32 (__m512i __W, __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpbuud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpbuud_epi32 (__m512i __W, __mmask16 __U,
-			  __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpbuud_v16si_mask ((__v16si) __W,
-					(__v16si) __A,
-					(__v16si) __B,
-					(__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpbuud_epi32 (__mmask16 __U, __m512i __W,
-			   __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpbuud_v16si_maskz ((__v16si) __W,
-					 (__v16si) __A,
-					 (__v16si) __B,
-					 (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpbuuds_epi32 (__m512i __W, __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpbuuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpbuuds_epi32 (__m512i __W, __mmask16 __U,
-			   __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpbuuds_v16si_mask ((__v16si) __W,
-					 (__v16si) __A,
-					 (__v16si) __B,
-					 (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpbuuds_epi32 (__mmask16 __U, __m512i __W,
-			    __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpbuuds_v16si_maskz ((__v16si) __W,
-					  (__v16si) __A,
-					  (__v16si) __B,
-					  (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpwsud_epi32 (__m512i __W, __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpwsud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpwsud_epi32 (__m512i __W, __mmask16 __U,
-			  __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpwsud_v16si_mask ((__v16si) __W,
-					(__v16si) __A,
-					(__v16si) __B,
-					(__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpwsud_epi32 (__mmask16 __U, __m512i __W,
-			   __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpwsud_v16si_maskz ((__v16si) __W,
-					 (__v16si) __A,
-					 (__v16si) __B,
-					 (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpwsuds_epi32 (__m512i __W, __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpwsuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpwsuds_epi32 (__m512i __W, __mmask16 __U,
-			   __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpwsuds_v16si_mask ((__v16si) __W,
-					 (__v16si) __A,
-					 (__v16si) __B,
-					 (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpwsuds_epi32 (__mmask16 __U, __m512i __W,
-			    __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpwsuds_v16si_maskz ((__v16si) __W,
-					  (__v16si) __A,
-					  (__v16si) __B,
-					  (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpwusd_epi32 (__m512i __W, __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpwusd512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpwusd_epi32 (__m512i __W, __mmask16 __U,
-			  __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpwusd_v16si_mask ((__v16si) __W,
-					(__v16si) __A,
-					(__v16si) __B,
-					(__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpwusd_epi32 (__mmask16 __U, __m512i __W,
-			   __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpwusd_v16si_maskz ((__v16si) __W,
-					 (__v16si) __A,
-					 (__v16si) __B,
-					 (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpwusds_epi32 (__m512i __W, __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpwusds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpwusds_epi32 (__m512i __W, __mmask16 __U,
-			   __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpwusds_v16si_mask ((__v16si) __W,
-					 (__v16si) __A,
-					 (__v16si) __B,
-					 (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpwusds_epi32 (__mmask16 __U, __m512i __W,
-			    __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpwusds_v16si_maskz ((__v16si) __W,
-					  (__v16si) __A,
-					  (__v16si) __B,
-					  (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpwuud_epi32 (__m512i __W, __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpwuud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpwuud_epi32 (__m512i __W, __mmask16 __U,
-			  __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpwuud_v16si_mask ((__v16si) __W,
-					(__v16si) __A,
-					(__v16si) __B,
-					(__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpwuud_epi32 (__mmask16 __U, __m512i __W,
-			   __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpwuud_v16si_maskz ((__v16si) __W,
-					 (__v16si) __A,
-					 (__v16si) __B,
-					 (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpwuuds_epi32 (__m512i __W, __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpwuuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpwuuds_epi32 (__m512i __W, __mmask16 __U,
-			   __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpwuuds_v16si_mask ((__v16si) __W,
-					 (__v16si) __A,
-					 (__v16si) __B,
-					 (__mmask16) __U);
-}
-
-extern __inline __m512i
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpwuuds_epi32 (__mmask16 __U, __m512i __W,
-			    __m512i __A, __m512i __B)
-{
-  return (__m512i)
-    __builtin_ia32_vpdpwuuds_v16si_maskz ((__v16si) __W,
-					  (__v16si) __A,
-					  (__v16si) __B,
-					  (__mmask16) __U);
-}
-
-extern __inline __m512
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_dpph_ps (__m512 __W, __m512h __A, __m512h __B)
-{
-  return (__m512)
-    __builtin_ia32_vdpphps512_mask ((__v16sf) __W,
-				    (__v16sf) __A,
-				    (__v16sf) __B,
-				    (__mmask16) -1);
-}
-
-extern __inline __m512
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_dpph_ps (__m512 __W, __mmask16 __U, __m512h __A,
-		     __m512h __B)
-{
-  return (__m512)
-    __builtin_ia32_vdpphps512_mask ((__v16sf) __W,
-				    (__v16sf) __A,
-				    (__v16sf) __B,
-				    (__mmask16) __U);
-}
-
-extern __inline __m512
-__attribute__((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_dpph_ps (__mmask16 __U, __m512 __W, __m512h __A,
-		      __m512h __B)
-{
-  return (__m512)
-    __builtin_ia32_vdpphps512_maskz ((__v16sf) __W,
-				     (__v16sf) __A,
-				     (__v16sf) __B,
-				     (__mmask16) __U);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mpsadbw_epu8 (__m512i __X, __m512i __Y, const int __M)
-{
-  return (__m512i) __builtin_ia32_mpsadbw512 ((__v64qi) __X,
-					      (__v64qi) __Y,
-					      __M);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_mpsadbw_epu8 (__m512i __W, __mmask32 __U, __m512i __X,
-			  __m512i __Y, const int __M)
-{
-  return (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi) __X,
-						   (__v64qi) __Y,
-						   __M,
-						   (__v32hi) __W,
-						   __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_mpsadbw_epu8 (__mmask32 __U, __m512i __X,
-			   __m512i __Y, const int __M)
-{
-  return (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi) __X,
-						   (__v64qi) __Y,
-						   __M,
-						   (__v32hi) _mm512_setzero_epi32 (),
-						   __U);
-}
-#else
-#define _mm512_mpsadbw_epu8(X, Y, M)					\
-  (__m512i) __builtin_ia32_mpsadbw512 ((__v64qi)(__m512i)(X),		\
-				       (__v64qi)(__m512i)(Y), (int)(M))
-
-#define _mm512_mask_mpsadbw_epu8(W, U, X, Y, M)				\
-  (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi)(__m512i)(X),	\
-					    (__v64qi)(__m512i)(Y),	\
-					    (int)(M),			\
-					    (__v32hi)(__m512i)(W),	\
-					    (__mmask32)(U))
-
-#define _mm512_maskz_mpsadbw_epu8(U, X, Y, M)				\
-  (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi)(__m512i)(X),	\
-					    (__v64qi)(__m512i)(Y),	\
-					    (int)(M),			\
-					    (__v32hi) _mm512_setzero_epi32 (),	\
-					    (__mmask32)(U))
-#endif
-
-#ifdef __DISABLE_AVX10_2_512__
-#undef __DISABLE_AVX10_2_512__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX10_2_512__ */
-
-#endif /* __AVX10_2_512MEDIAINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx10_2-512minmaxintrin.h b/gcc/config/i386/avx10_2-512minmaxintrin.h
deleted file mode 100644
index 95e9bee..0000000
--- a/gcc/config/i386/avx10_2-512minmaxintrin.h
+++ /dev/null
@@ -1,489 +0,0 @@
-/* Copyright (C) 2024 Free Software Foundation, Inc.
-   This file is part of GCC.
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#if !defined _IMMINTRIN_H_INCLUDED
-#error "Never use <avx10_2-512minmaxintrin.h> directly; include <immintrin.h> instead."
-#endif
-
-#ifndef _AVX10_2_512MINMAXINTRIN_H_INCLUDED
-#define _AVX10_2_512MINMAXINTRIN_H_INCLUDED
-
-#if !defined (__AVX10_2_512__)
-#pragma GCC push_options
-#pragma GCC target("avx10.2-512")
-#define __DISABLE_AVX10_2_512__
-#endif /* __AVX10_2_512__ */
-
-#ifdef __OPTIMIZE__
-extern __inline __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_minmax_nepbh (__m512bh __A, __m512bh __B, const int __C)
-{
-  return (__m512bh) __builtin_ia32_minmaxnepbf16512_mask ((__v32bf) __A,
-							  (__v32bf) __B,
-							  __C,
-							  (__v32bf)(__m512bh)
-							  _mm512_setzero_si512 (),
-							  (__mmask32) -1);
-}
-
-extern __inline __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_minmax_nepbh (__m512bh __W, __mmask32 __U,
-			  __m512bh __A, __m512bh __B, const int __C)
-{
-  return (__m512bh) __builtin_ia32_minmaxnepbf16512_mask ((__v32bf) __A,
-							  (__v32bf) __B,
-							  __C,
-							  (__v32bf) __W,
-							  (__mmask32) __U);
-}
-
-extern __inline __m512bh
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_minmax_nepbh (__mmask32 __U, __m512bh __A,
-			   __m512bh __B, const int __C)
-{
-  return (__m512bh) __builtin_ia32_minmaxnepbf16512_mask ((__v32bf) __A,
-							  (__v32bf) __B,
-							  __C,
-							  (__v32bf)(__m512bh)
-							  _mm512_setzero_si512 (),
-							  (__mmask32) __U);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_minmax_pd (__m512d __A, __m512d __B, const int __C)
-{
-  return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
-							  (__v8df) __B,
-							  __C,
-							  (__v8df)
-							  _mm512_undefined_pd (),
-							  (__mmask8) -1,
-							  _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_minmax_pd (__m512d __W, __mmask8 __U, __m512d __A,
-		       __m512d __B, const int __C)
-{
-  return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
-							  (__v8df) __B,
-							  __C,
-							  (__v8df) __W,
-							  (__mmask8) __U,
-							  _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_minmax_pd (__mmask8 __U, __m512d __A, __m512d __B,
-			const int __C)
-{
-  return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
-							  (__v8df) __B,
-							  __C,
-							  (__v8df)
-							  _mm512_setzero_pd (),
-							  (__mmask8) __U,
-							  _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_minmax_round_pd (__m512d __A, __m512d __B, const int __C,
-			const int __R)
-{
-  return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
-							  (__v8df) __B,
-							  __C,
-							  (__v8df)
-							  _mm512_undefined_pd (),
-							  (__mmask8) -1, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_minmax_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
-			     __m512d __B, const int __C, const int __R)
-{
-  return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
-							  (__v8df) __B,
-							  __C,
-							  (__v8df) __W,
-							  (__mmask8) __U, __R);
-}
-
-extern __inline __m512d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_minmax_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
-			      const int __C, const int __R)
-{
-  return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
-							  (__v8df) __B,
-							  __C,
-							  (__v8df)
-							  _mm512_setzero_pd (),
-							  (__mmask8) __U, __R);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_minmax_ph (__m512h __A, __m512h __B, const int __C)
-{
-  return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
-							  (__v32hf) __B,
-							  __C,
-							  (__v32hf)
-							  _mm512_undefined_ph (),
-							  (__mmask32) -1,
-							  _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_minmax_ph (__m512h __W, __mmask32 __U, __m512h __A,
-		       __m512h __B, const int __C)
-{
-  return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
-							  (__v32hf) __B,
-							  __C,
-							  (__v32hf) __W,
-							  (__mmask32) __U,
-							  _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_minmax_ph (__mmask32 __U, __m512h __A, __m512h __B,
-			const int __C)
-{
-  return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
-							  (__v32hf) __B,
-							  __C,
-							  (__v32hf)
-							  _mm512_setzero_ph (),
-							  (__mmask32) __U,
-							  _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_minmax_round_ph (__m512h __A, __m512h __B, const int __C, const int __R)
-{
-  return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
-							  (__v32hf) __B,
-							  __C,
-							  (__v32hf)
-							  _mm512_undefined_ph (),
-							  (__mmask32) -1, __R);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_minmax_round_ph (__m512h __W, __mmask32 __U, __m512h __A,
-			     __m512h __B, const int __C, const int __R)
-{
-  return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
-							  (__v32hf) __B,
-							  __C,
-							  (__v32hf) __W,
-							  (__mmask32) __U, __R);
-}
-
-extern __inline __m512h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_minmax_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
-			      const int __C, const int __R)
-{
-  return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
-							  (__v32hf) __B,
-							  __C,
-							  (__v32hf)
-							  _mm512_setzero_ph (),
-							  (__mmask32) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_minmax_ps (__m512 __A, __m512 __B, const int __C)
-{
-  return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
-							 (__v16sf) __B,
-							 __C,
-							 (__v16sf)
-							 _mm512_undefined_ps (),
-							 (__mmask16) -1,
-							 _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_minmax_ps (__m512 __W, __mmask16 __U, __m512 __A,
-		       __m512 __B, const int __C)
-{
-  return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
-							 (__v16sf) __B,
-							 __C,
-							 (__v16sf) __W,
-							 (__mmask16) __U,
-							 _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_minmax_ps (__mmask16 __U, __m512 __A, __m512 __B,
-			const int __C)
-{
-  return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
-							 (__v16sf) __B,
-							 __C,
-							 (__v16sf)
-							 _mm512_setzero_ps (),
-							 (__mmask16) __U,
-							 _MM_FROUND_CUR_DIRECTION);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_minmax_round_ps (__m512 __A, __m512 __B, const int __C, const int __R)
-{
-  return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
-							 (__v16sf) __B,
-							 __C,
-							 (__v16sf)
-							 _mm512_undefined_ps (),
-							 (__mmask16) -1, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_minmax_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
-			     __m512 __B, const int __C, const int __R)
-{
-  return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
-							 (__v16sf) __B,
-							 __C,
-							 (__v16sf) __W,
-							 (__mmask16) __U, __R);
-}
-
-extern __inline __m512
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_minmax_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
-			      const int __C, const int __R)
-{
-  return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
-							 (__v16sf) __B,
-							 __C,
-							 (__v16sf)
-							 _mm512_setzero_ps (),
-							 (__mmask16) __U, __R);
-}
-
-#else
-#define _mm512_minmax_nepbh(A, B, C)					      \
-  ((__m512bh) __builtin_ia32_minmaxnepbf16512_mask ((__v32bf) (A),	      \
-						    (__v32bf) (B),	      \
-						    (int) (C),		      \
-						    (__v32bf) (__m512bh)      \
-						    _mm512_setzero_si512 (),  \
-						    (__mmask32) (-1)))
-
-#define _mm512_mask_minmax_nepbh(W, U, A, B, C)				      \
-  ((__m512bh) __builtin_ia32_minmaxnepbf16512_mask ((__v32bf) (A),	      \
-						    (__v32bf) (B), 	      \
-						    (int) (C),		      \
-						    (__v32bf) (__m512bh) (W), \
-						    (__mmask32) (U)))
-
-#define _mm512_maskz_minmax_nepbh(U, A, B, C)				      \
-  ((__m512bh) __builtin_ia32_minmaxnepbf16512_mask ((__v32bf) (A),	      \
-						    (__v32bf) (B),	      \
-						    (int) (C),		      \
-						    (__v32bf) (__m512bh)      \
-						    _mm512_setzero_si512 (),  \
-						    (__mmask32) (U)))
-
-#define _mm512_minmax_round_pd(A, B, C, R)				      \
-  ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A),	      \
-						    (__v8df) (B),	      \
-						    (int) (C),		      \
-						    (__v8df) (__m512d)	      \
-						    _mm512_undefined_pd (),   \
-						    (__mmask8) (-1),	      \
-						    (int) (R)))
-
-#define _mm512_mask_minmax_round_pd(W, U, A, B, C, R)			      \
-  ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A),	      \
-						    (__v8df) (B),	      \
-						    (int) (C),		      \
-						    (__v8df) (__m512d) (W),   \
-						    (__mmask8) (U),	      \
-						    (int) (R)))
-
-#define _mm512_maskz_minmax_round_pd(U, A, B, C, R)			      \
-  ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A),	      \
-						    (__v8df) (B),	      \
-						    (int) (C),		      \
-						    (__v8df) (__m512d)	      \
-						    _mm512_setzero_pd (),     \
-						    (__mmask8) (U), 	      \
-						    (int) (R)))
-
-#define _mm512_minmax_round_ph(A, B, C, R)				      \
-  ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A),	      \
-						    (__v32hf) (B),	      \
-						    (int) (C),		      \
-						    (__v32hf) (__m512h)	      \
-						    _mm512_undefined_ph (),   \
-						    (__mmask32) (-1),	      \
-						    (int) (R)))
-
-#define _mm512_mask_minmax_round_ph(W, U, A, B, C, R)			      \
-  ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A),	      \
-						    (__v32hf) (B),	      \
-						    (int) (C),		      \
-						    (__v32hf) (__m512h) (W),  \
-						    (__mmask32) (U),	      \
-						    (int) (R)))
-
-#define _mm512_maskz_minmax_round_ph(U, A, B, C, R)			      \
-  ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A),	      \
-						    (__v32hf) (B),	      \
-						    (int) (C),		      \
-						    (__v32hf) (__m512h)	      \
-						    _mm512_setzero_ph (),     \
-						    (__mmask32) (U),	      \
-						    (int) (R)))
-
-#define _mm512_minmax_round_ps(A, B, C, R)				      \
-  ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A),	      \
-						   (__v16sf) (B),	      \
-						   (int) (C),		      \
-						   (__v16sf) (__m512)	      \
-						   _mm512_undefined_ps (),    \
-						   (__mmask16) (-1),	      \
-						   (int) (R)))
-
-#define _mm512_mask_minmax_round_ps(W, U, A, B, C, R)			      \
-  ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A),	      \
-						   (__v16sf) (B),	      \
-						   (int) (C),		      \
-						   (__v16sf) (__m512) (W),    \
-						   (__mmask16) (U),	      \
-						   (int) (R)))
-
-#define _mm512_maskz_minmax_round_ps(U, A, B, C, R)			      \
-  ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), 	      \
-						   (__v16sf) (B),	      \
-						   (int) (C),		      \
-						   (__v16sf) (__m512)	      \
-						   _mm512_setzero_ps (),      \
-						   (__mmask16) (U),	      \
-						   (int) (R)))
-
-#define _mm512_minmax_pd(A, B, C)					      \
-  ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A),	      \
-						    (__v8df) (B),	      \
-						    (int) (C),		      \
-						    (__v8df) (__m512d)	      \
-						    _mm512_undefined_pd (),   \
-						    (__mmask8) (-1),	      \
-						    _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_mask_minmax_pd(W, U, A, B, C)				      \
-  ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A),	      \
-						    (__v8df) (B),	      \
-						    (int) (C),		      \
-						    (__v8df) (__m512d) (W),   \
-						    (__mmask8) (U),	      \
-						    _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_maskz_minmax_pd(U, A, B, C)				      \
-  ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A),	      \
-						    (__v8df) (B),	      \
-						    (int) (C),		      \
-						    (__v8df) (__m512d)	      \
-						    _mm512_setzero_pd (),     \
-						    (__mmask8) (U),	      \
-						    _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_minmax_ph(A, B, C)					      \
-  ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A),	      \
-						    (__v32hf) (B),	      \
-						    (int) (C),		      \
-						    (__v32hf) (__m512h)	      \
-						    _mm512_undefined_ph (),   \
-						    (__mmask32) (-1),	      \
-						    _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_mask_minmax_ph(W, U, A, B, C)				      \
-  ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A),	      \
-						    (__v32hf) (B),	      \
-						    (int) (C),		      \
-						    (__v32hf) (__m512h) (W),  \
-						    (__mmask32) (U),	      \
-						    _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_maskz_minmax_ph(U, A, B, C)				      \
-  ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A),	      \
-						    (__v32hf) (B),	      \
-						    (int) (C),		      \
-						    (__v32hf) (__m512h)	      \
-						    _mm512_setzero_ph (),     \
-						    (__mmask32) (U),	      \
-						    _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_minmax_ps(A, B, C)					      \
-  ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A),	      \
-						   (__v16sf) (B),	      \
-						   (int) (C),		      \
-						   (__v16sf) (__m512)	      \
-						   _mm512_undefined_ps (),    \
-						   (__mmask16) (-1),	      \
-						   _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_mask_minmax_ps(W, U, A, B, C)				      \
-  ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A),	      \
-						   (__v16sf) (B),	      \
-						   (int) (C),		      \
-						   (__v16sf) (__m512) (W),    \
-						   (__mmask16) (U),	      \
-						   _MM_FROUND_CUR_DIRECTION))
-
-#define _mm512_maskz_minmax_ps(U, A, B, C)				      \
-  ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A),	      \
-						   (__v16sf) (B),	      \
-						   (int) (C),		      \
-						   (__v16sf) (__m512)	      \
-						   _mm512_setzero_ps (),      \
-						   (__mmask16) (U),	      \
-						   _MM_FROUND_CUR_DIRECTION))
-
-#endif
-
-#ifdef __DISABLE_AVX10_2_512__
-#undef __DISABLE_AVX10_2_512__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX10_2_512__ */
-
-#endif /* _AVX10_2_512MINMAXINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx10_2-512satcvtintrin.h b/gcc/config/i386/avx10_2-512satcvtintrin.h
deleted file mode 100644
index d625a64..0000000
--- a/gcc/config/i386/avx10_2-512satcvtintrin.h
+++ /dev/null
@@ -1,1080 +0,0 @@
-/* Copyright (C) 2024 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#if !defined _IMMINTRIN_H_INCLUDED
-#error "Never use <avx10_2-512satcvtintrin.h> directly; include <immintrin.h> instead."
-#endif
-
-#ifndef _AVX10_2_512SATCVTINTRIN_H_INCLUDED
-#define _AVX10_2_512SATCVTINTRIN_H_INCLUDED
-
-#if !defined (__AVX10_2_512__)
-#pragma GCC push_options
-#pragma GCC target("avx10.2-512")
-#define __DISABLE_AVX10_2_512__
-#endif /* __AVX10_2_512__ */
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvtnebf16_epi16 (__m512bh __A)
-{
-  return
-    (__m512i) __builtin_ia32_cvtnebf162ibs512_mask ((__v32bf) __A,
-						    (__v32hi)
-						    _mm512_undefined_si512 (),
-						    (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvtnebf16_epi16 (__m512i __W, __mmask32 __U, __m512bh __A)
-{
-  return (__m512i) __builtin_ia32_cvtnebf162ibs512_mask ((__v32bf) __A,
-							 (__v32hi) __W,
-							 (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvtnebf16_epi16 (__mmask32 __U, __m512bh __A)
-{
-  return
-    (__m512i) __builtin_ia32_cvtnebf162ibs512_mask ((__v32bf) __A,
-						    (__v32hi)
-						    _mm512_setzero_si512 (),
-						    (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvtnebf16_epu16 (__m512bh __A)
-{
-  return
-    (__m512i) __builtin_ia32_cvtnebf162iubs512_mask ((__v32bf) __A,
-						     (__v32hi)
-						     _mm512_undefined_si512 (),
-						     (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvtnebf16_epu16 (__m512i __W, __mmask32 __U, __m512bh __A)
-{
-  return (__m512i) __builtin_ia32_cvtnebf162iubs512_mask ((__v32bf) __A,
-							  (__v32hi) __W,
-							  (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvtnebf16_epu16 (__mmask32 __U, __m512bh __A)
-{
-  return
-    (__m512i) __builtin_ia32_cvtnebf162iubs512_mask ((__v32bf) __A,
-						     (__v32hi)
-						     _mm512_setzero_si512 (),
-						     (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvttnebf16_epi16 (__m512bh __A)
-{
-  return
-    (__m512i) __builtin_ia32_cvttnebf162ibs512_mask ((__v32bf) __A,
-						     (__v32hi)
-						     _mm512_undefined_si512 (),
-						     (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvttnebf16_epi16 (__m512i __W, __mmask32 __U, __m512bh __A)
-{
-  return (__m512i) __builtin_ia32_cvttnebf162ibs512_mask ((__v32bf) __A,
-							  (__v32hi) __W,
-							  (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvttnebf16_epi16 (__mmask32 __U, __m512bh __A)
-{
-  return
-    (__m512i) __builtin_ia32_cvttnebf162ibs512_mask ((__v32bf) __A,
-						     (__v32hi)
-						     _mm512_setzero_si512 (),
-						     (__mmask32) __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvttnebf16_epu16 (__m512bh __A)
-{
-  return (__m512i)
-    __builtin_ia32_cvttnebf162iubs512_mask ((__v32bf) __A,
-					    (__v32hi) _mm512_undefined_si512 (),
-					    (__mmask32) -1);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvttnebf16_epu16 (__m512i __W, __mmask32 __U, __m512bh __A)
-{
-  return (__m512i) __builtin_ia32_cvttnebf162iubs512_mask ((__v32bf) __A,
-							   (__v32hi) __W,
-							   (__mmask32)
-							   __U);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvttnebf16_epu16 (__mmask32 __U, __m512bh __A)
-{
-  return (__m512i)
-    __builtin_ia32_cvttnebf162iubs512_mask ((__v32bf) __A,
-					    (__v32hi)
-					    _mm512_setzero_si512 (),
-					    (__mmask32) __U);
-}
-
-#ifdef __OPTIMIZE__
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvt_roundph_epi16 (__m512h __A, const int __R)
-{
-  return
-    (__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) __A,
-						      (__v32hi)
-						      _mm512_undefined_si512 (),
-						      (__mmask32) -1,
-						      __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvt_roundph_epi16 (__m512i __W, __mmask32 __U, __m512h __A,
-			     const int __R)
-{
-  return (__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) __A,
-							   (__v32hi) __W,
-							   (__mmask32) __U,
-							   __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvt_roundph_epi16 (__mmask32 __U, __m512h __A, const int __R)
-{
-  return
-    (__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) __A,
-						      (__v32hi)
-						      _mm512_setzero_si512 (),
-						      (__mmask32) __U,
-						      __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvt_roundph_epu16 (__m512h __A, const int __R)
-{
-  return
-    (__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) __A,
-						       (__v32hi)
-						       _mm512_undefined_si512 (),
-						       (__mmask32) -1,
-						       __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvt_roundph_epu16 (__m512i __W, __mmask32 __U, __m512h __A,
-			      const int __R)
-{
-  return (__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) __A,
-							    (__v32hi) __W,
-							    (__mmask32) __U,
-							    __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvt_roundph_epu16 (__mmask32 __U, __m512h __A, const int __R)
-{
-  return
-    (__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) __A,
-						       (__v32hi)
-						       _mm512_setzero_si512 (),
-						       (__mmask32) __U,
-						       __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvt_roundps_epi32 (__m512 __A, const int __R)
-{
-  return
-    (__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) __A,
-						      (__v16si)
-						      _mm512_undefined_si512 (),
-						      (__mmask16) -1,
-						      __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
-			     const int __R)
-{
-  return (__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) __A,
-							   (__v16si) __W,
-							   (__mmask16) __U,
-							   __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
-{
-  return
-    (__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) __A,
-						      (__v16si)
-						      _mm512_setzero_si512 (),
-						      (__mmask16) __U,
-						      __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvt_roundps_epu32 (__m512 __A, const int __R)
-{
-  return
-    (__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) __A,
-						       (__v16si)
-						       _mm512_undefined_si512 (),
-						       (__mmask16) -1,
-						       __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
-			      const int __R)
-{
-  return (__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) __A,
-							    (__v16si) __W,
-							    (__mmask16) __U,
-							    __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
-{
-  return
-    (__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) __A,
-						       (__v16si)
-						       _mm512_setzero_si512 (),
-						       (__mmask16) __U,
-						       __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvtt_roundph_epi16 (__m512h __A, const int __R)
-{
-  return (__m512i)
-    __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) __A,
-					     (__v32hi)
-					     _mm512_undefined_si512 (),
-					     (__mmask32) -1,
-					     __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvtt_roundph_epi16 (__m512i __W, __mmask32 __U, __m512h __A,
-			      const int __R)
-{
-  return (__m512i) __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) __A,
-							    (__v32hi) __W,
-							    (__mmask32) __U,
-							    __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvtt_roundph_epi16 (__mmask32 __U, __m512h __A, const int __R)
-{
-  return
-    (__m512i) __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) __A,
-						       (__v32hi)
-						       _mm512_setzero_si512 (),
-						       (__mmask32) __U,
-						       __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvtt_roundph_epu16 (__m512h __A, const int __R)
-{
-  return (__m512i)
-    __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) __A,
-					      (__v32hi)
-					      _mm512_undefined_si512 (),
-					      (__mmask32) -1,
-					      __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvtt_roundph_epu16 (__m512i __W, __mmask32 __U, __m512h __A,
-			       const int __R)
-{
-  return (__m512i) __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) __A,
-							     (__v32hi) __W,
-							     (__mmask32) __U,
-							     __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvtt_roundph_epu16 (__mmask32 __U, __m512h __A, const int __R)
-{
-  return (__m512i)
-    __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) __A,
-					      (__v32hi)
-					      _mm512_setzero_si512 (),
-					      (__mmask32) __U,
-					      __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvtt_roundps_epi32 (__m512 __A, const int __R)
-{
-  return (__m512i)
-    __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) __A,
-					     (__v16si)
-					     _mm512_undefined_si512 (),
-					     (__mmask16) -1,
-					     __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvtt_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
-			      const int __R)
-{
-  return (__m512i) __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) __A,
-							    (__v16si) __W,
-							    (__mmask16) __U,
-							    __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvtt_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
-{
-  return (__m512i)
-    __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) __A,
-					     (__v16si)
-					     _mm512_setzero_si512 (),
-					     (__mmask16) __U,
-					     __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_ipcvtt_roundps_epu32 (__m512 __A, const int __R)
-{
-  return (__m512i)
-    __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) __A,
-					      (__v16si)
-					      _mm512_undefined_si512 (),
-					      (__mmask16) -1,
-					      __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_ipcvtt_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
-			       const int __R)
-{
-  return (__m512i) __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) __A,
-							     (__v16si) __W,
-							     (__mmask16) __U,
-							     __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_ipcvtt_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
-{
-  return (__m512i)
-    __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) __A,
-					      (__v16si)
-					      _mm512_setzero_si512 (),
-					      (__mmask16) __U,
-					      __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_roundpd_epi32 (__m512d __A, const int __R)
-{
-  return (__m256i)
-    __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) __A,
-					     (__v8si)
-					     _mm256_undefined_si256 (),
-					     (__mmask8) -1,
-					     __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
-				const int __R)
-{
-  return (__m256i) __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) __A,
-							    (__v8si) __W,
-							    (__mmask8) __U,
-							    __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) __A,
-						       (__v8si)
-						       _mm256_setzero_si256 (),
-						       (__mmask8) __U,
-						       __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_roundpd_epi64 (__m512d __A, const int __R)
-{
-  return (__m512i)
-    __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) __A,
-					     (__v8di)
-					     _mm512_undefined_si512 (),
-					     (__mmask8) -1,
-					     __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_roundpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A,
-				const int __R)
-{
-  return (__m512i) __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) __A,
-							    (__v8di) __W,
-							    (__mmask8) __U,
-							    __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_roundpd_epi64 (__mmask8 __U, __m512d __A, const int __R)
-{
-  return
-    (__m512i) __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) __A,
-						       (__v8di)
-						       _mm512_setzero_si512 (),
-						       (__mmask8) __U,
-						       __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_roundpd_epu32 (__m512d __A, const int __R)
-{
-  return (__m256i)
-    __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) __A,
-					      (__v8si)
-					      _mm256_undefined_si256 (),
-					      (__mmask8) -1,
-					      __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
-				const int __R)
-{
-  return (__m256i) __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) __A,
-							     (__v8si) __W,
-							     (__mmask8) __U,
-							     __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) __A,
-							(__v8si)
-							_mm256_setzero_si256 (),
-							(__mmask8) __U,
-							__R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_roundpd_epu64 (__m512d __A, const int __R)
-{
-  return (__m512i)
-    __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) __A,
-					      (__v8di)
-					      _mm512_undefined_si512 (),
-					      (__mmask8) -1,
-					      __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_roundpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A,
-				const int __R)
-{
-  return (__m512i) __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) __A,
-							     (__v8di) __W,
-							     (__mmask8) __U,
-							     __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_roundpd_epu64 (__mmask8 __U, __m512d __A, const int __R)
-{
-  return (__m512i)
-    __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) __A,
-					      (__v8di)
-					      _mm512_setzero_si512 (),
-					      (__mmask8) __U,
-					      __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_roundps_epi32 (__m512 __A, const int __R)
-{
-  return (__m512i)
-    __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) __A,
-					     (__v16si)
-					     _mm512_undefined_si512 (),
-					     (__mmask16) -1,
-					     __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
-				const int __R)
-{
-  return (__m512i) __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) __A,
-							    (__v16si) __W,
-							    (__mmask16) __U,
-							    __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
-{
-  return
-    (__m512i) __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) __A,
-						       (__v16si)
-						       _mm512_setzero_si512 (),
-						       (__mmask16) __U,
-						       __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_roundps_epi64 (__m256 __A, const int __R)
-{
-  return (__m512i)
-    __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) __A,
-					     (__v8di)
-					     _mm512_undefined_si512 (),
-					     (__mmask8) -1,
-					     __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_roundps_epi64 (__m512i __W, __mmask8 __U, __m256 __A,
-				const int __R)
-{
-  return (__m512i) __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) __A,
-							    (__v8di) __W,
-							    (__mmask8) __U,
-							    __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_roundps_epi64 (__mmask8 __U, __m256 __A, const int __R)
-{
-  return
-    (__m512i) __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) __A,
-						       (__v8di)
-						       _mm512_setzero_si512 (),
-						       (__mmask8) __U,
-						       __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_roundps_epu32 (__m512 __A, const int __R)
-{
-  return (__m512i)
-    __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) __A,
-					      (__v16si)
-					      _mm512_undefined_si512 (),
-					      (__mmask16) -1,
-					      __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
-				const int __R)
-{
-  return (__m512i) __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) __A,
-							     (__v16si) __W,
-							     (__mmask16) __U,
-							     __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
-{
-  return (__m512i)
-    __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) __A,
-					      (__v16si)
-					      _mm512_setzero_si512 (),
-					      (__mmask16) __U,
-					      __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_cvtts_roundps_epu64 (__m256 __A, const int __R)
-{
-  return (__m512i)
-    __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) __A,
-					      (__v8di)
-					      _mm512_undefined_si512 (),
-					      (__mmask8) -1,
-					      __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_mask_cvtts_roundps_epu64 (__m512i __W, __mmask8 __U, __m256 __A,
-				const int __R)
-{
-  return (__m512i) __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) __A,
-							     (__v8di) __W,
-							     (__mmask8) __U,
-							     __R);
-}
-
-extern __inline __m512i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm512_maskz_cvtts_roundps_epu64 (__mmask8 __U, __m256 __A, const int __R)
-{
-  return
-    (__m512i) __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) __A,
-							(__v8di)
-							_mm512_setzero_si512 (),
-							(__mmask8) __U,
-							__R);
-}
-#else
-#define _mm512_ipcvt_roundph_epi16(A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) (A), \
-					   (__v32hi) \
-					   (_mm512_undefined_si512 ()), \
-					   (__mmask32) (-1), \
-					   (R)))
-
-#define _mm512_mask_ipcvt_roundph_epi16(W, U, A, R) \
-  ((__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) (A), \
-						     (__v32hi) (W), \
-						     (__mmask32) (U), \
-						     (R)))
-
-#define _mm512_maskz_ipcvt_roundph_epi16(U, A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) (A), \
-					   (__v32hi) \
-					   (_mm512_setzero_si512 ()), \
-					   (__mmask32) (U), \
-					   (R)))
-
-#define _mm512_ipcvt_roundph_epu16(A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) (A), \
-					    (__v32hi) \
-					    (_mm512_undefined_si512 ()), \
-					    (__mmask32) (-1), \
-					    (R)))
-
-#define _mm512_mask_ipcvt_roundph_epu16(W, U, A, R) \
-  ((__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) (A), \
-						      (__v32hi) (W), \
-						      (__mmask32) (U), \
-						      (R)))
-
-#define _mm512_maskz_ipcvt_roundph_epu16(U, A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) (A), \
-					    (__v32hi) \
-					    (_mm512_setzero_si512 ()), \
-					    (__mmask32) (U), \
-					    (R)))
-
-#define _mm512_ipcvt_roundps_epi32(A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) (A), \
-					   (__v16si) \
-					   (_mm512_undefined_si512 ()), \
-					   (__mmask16) (-1), \
-					   (R)))
-
-#define _mm512_mask_ipcvt_roundps_epi32(W, U, A, R) \
-  ((__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) (A), \
-						     (__v16si) (W), \
-						     (__mmask16) (U), \
-						     (R)))
-
-#define _mm512_maskz_ipcvt_roundps_epi32(U, A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) (A), \
-					   (__v16si) \
-					   (_mm512_setzero_si512 ()), \
-					   (__mmask16) (U), \
-					   (R)))
-
-#define _mm512_ipcvt_roundps_epu32(A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) (A), \
-					    (__v16si) \
-					    (_mm512_undefined_si512 ()), \
-					    (__mmask16) (-1), \
-					    (R)))
-
-#define _mm512_mask_ipcvt_roundps_epu32(W, U, A, R) \
-  ((__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) (A), \
-						      (__v16si) (W), \
-						      (__mmask16) (U), \
-						      (R)))
-
-#define _mm512_maskz_ipcvt_roundps_epu32(U, A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) (A), \
-					    (__v16si) \
-					    (_mm512_setzero_si512 ()), \
-					    (__mmask16) (U), \
-					    (R)))
-
-#define _mm512_ipcvtt_roundph_epi16(A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) (A), \
-					    (__v32hi) \
-					    (_mm512_undefined_si512 ()), \
-					    (__mmask32) (-1), \
-					    (R)))
-
-#define _mm512_mask_ipcvtt_roundph_epi16(W, U, A, R) \
-  ((__m512i) __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) (A), \
-						      (__v32hi) (W), \
-						      (__mmask32) (U), \
-						      (R)))
-
-#define _mm512_maskz_ipcvtt_roundph_epi16(U, A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) (A), \
-					    (__v32hi) \
-					    (_mm512_setzero_si512 ()), \
-					    (__mmask32) (U), \
-					    (R)))
-
-#define _mm512_ipcvtt_roundph_epu16(A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) (A), \
-					     (__v32hi) \
-					     (_mm512_undefined_si512 ()), \
-					     (__mmask32) (-1), \
-					     (R)))
-
-#define _mm512_mask_ipcvtt_roundph_epu16(W, U, A, R) \
-  ((__m512i) __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) (A), \
-						       (__v32hi) (W), \
-						       (__mmask32) (U), \
-						       (R)))
-
-#define _mm512_maskz_ipcvtt_roundph_epu16(U, A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) (A), \
-					     (__v32hi) \
-					     (_mm512_setzero_si512 ()), \
-					     (__mmask32) (U), \
-					     (R)))
-
-#define _mm512_ipcvtt_roundps_epi32(A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) (A), \
-					    (__v16si) \
-					    (_mm512_undefined_si512 ()), \
-					    (__mmask16) (-1), \
-					    (R)))
-
-#define _mm512_mask_ipcvtt_roundps_epi32(W, U, A, R) \
-  ((__m512i) __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) (A), \
-						      (__v16si) (W), \
-						      (__mmask16) (U), \
-						      (R)))
-
-#define _mm512_maskz_ipcvtt_roundps_epi32(U, A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) (A), \
-					    (__v16si) \
-					    (_mm512_setzero_si512 ()), \
-					    (__mmask16) (U), \
-					    (R)))
-
-#define _mm512_ipcvtt_roundps_epu32(A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) (A), \
-					     (__v16si) \
-					     (_mm512_undefined_si512 ()), \
-					     (__mmask16) (-1), \
-					     (R)))
-
-#define _mm512_mask_ipcvtt_roundps_epu32(W, U, A, R) \
-  ((__m512i) __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) (A), \
-						       (__v16si) (W), \
-						       (__mmask16) (U), \
-						       (R)))
-
-#define _mm512_maskz_ipcvtt_roundps_epu32(U, A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) (A), \
-					     (__v16si) \
-					     (_mm512_setzero_si512 ()), \
-					     (__mmask16) (U), \
-					     (R)))
-
-#define _mm512_cvtts_roundpd_epi32(A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) (A), \
-					    (__v8si) \
-					    (_mm256_undefined_si256 ()), \
-					    (__mmask8) (-1), \
-					    (R)))
-
-#define _mm512_mask_cvtts_roundpd_epi32(W, U, A, R) \
-  ((__m256i) __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) (A), \
-						      (__v8si) (W), \
-						      (__mmask8) (U), \
-						      (R)))
-
-#define _mm512_maskz_cvtts_roundpd_epi32(U, A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) (A), \
-					    (__v8si) \
-					    (_mm256_setzero_si256 ()), \
-					    (__mmask8) (U), \
-					    (R)))
-
-#define _mm512_cvtts_roundpd_epi64(A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) (A), \
-					    (__v8di) \
-					    (_mm512_undefined_si512 ()), \
-					    (__mmask8) (-1), \
-					    (R)))
-
-#define _mm512_mask_cvtts_roundpd_epi64(W, U, A, R) \
-  ((__m512i) __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) (A), \
-						      (__v8di) (W), \
-						      (__mmask8) (U), \
-						      (R)))
-
-#define _mm512_maskz_cvtts_roundpd_epi64(U, A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) (A), \
-					    (__v8di) \
-					    (_mm512_setzero_si512 ()), \
-					    (__mmask8) (U), \
-					    (R)))
-
-#define _mm512_cvtts_roundpd_epu32(A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) (A), \
-					     (__v8si) \
-					     (_mm256_undefined_si256 ()), \
-					     (__mmask8) (-1), \
-					     (R)))
-
-#define _mm512_mask_cvtts_roundpd_epu32(W, U, A, R) \
-  ((__m256i) __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) (A), \
-						       (__v8si) (W), \
-						       (__mmask8) (U), \
-						       (R)))
-
-#define _mm512_maskz_cvtts_roundpd_epu32(U, A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) (A), \
-					     (__v8si) \
-					     (_mm256_setzero_si256 ()), \
-					     (__mmask8) (U), \
-					     (R)))
-
-#define _mm512_cvtts_roundpd_epu64(A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) (A), \
-					     (__v8di) \
-					     (_mm512_undefined_si512 ()), \
-					     (__mmask8) (-1), \
-					     (R)))
-
-#define _mm512_mask_cvtts_roundpd_epu64(W, U, A, R) \
-  ((__m512i) __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) (A), \
-						       (__v8di) (W), \
-						       (__mmask8) (U), \
-						       (R)))
-
-#define _mm512_maskz_cvtts_roundpd_epu64(U, A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) (A), \
-					     (__v8di) \
-					     (_mm512_setzero_si512 ()), \
-					     (__mmask8) (U), \
-					     (R)))
-
-#define _mm512_cvtts_roundps_epi32(A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) (A), \
-					    (__v16si) \
-					    (_mm512_undefined_si512 ()), \
-					    (__mmask16) (-1), \
-					    (R)))
-
-#define _mm512_mask_cvtts_roundps_epi32(W, U, A, R) \
-  ((__m512i) __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) (A), \
-						      (__v16si) (W), \
-						      (__mmask16) (U), \
-						      (R)))
-
-#define _mm512_maskz_cvtts_roundps_epi32(U, A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) (A), \
-					    (__v16si) \
-					    (_mm512_setzero_si512 ()), \
-					    (__mmask16) (U), \
-					    (R)))
-
-#define _mm512_cvtts_roundps_epi64(A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) (A), \
-					    (__v8di) \
-					    (_mm512_undefined_si512 ()), \
-					    (__mmask8) (-1), \
-					    (R)))
-
-#define _mm512_mask_cvtts_roundps_epi64(W, U, A, R) \
-  ((__m512i) __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) (A), \
-						      (__v8di) (W), \
-						      (__mmask8) (U), \
-						      (R)))
-
-#define _mm512_maskz_cvtts_roundps_epi64(U, A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) (A), \
-					    (__v8di) \
-					    (_mm512_setzero_si512 ()), \
-					    (__mmask8) (U), \
-					    (R)))
-
-#define _mm512_cvtts_roundps_epu32(A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) (A), \
-					     (__v16si) \
-					     (_mm512_undefined_si512 ()), \
-					     (__mmask16) (-1), \
-					     (R)))
-
-#define _mm512_mask_cvtts_roundps_epu32(W, U, A, R) \
-  ((__m512i) __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) (A), \
-						       (__v16si) (W), \
-						       (__mmask16) (U), \
-						       (R)))
-
-#define _mm512_maskz_cvtts_roundps_epu32(U, A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) (A), \
-					     (__v16si) \
-					     (_mm512_setzero_si512 ()), \
-					     (__mmask16) (U), \
-					     (R)))
-
-#define _mm512_cvtts_roundps_epu64(A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) (A), \
-					     (__v8di) \
-					     (_mm512_undefined_si512 ()), \
-					     (__mmask8) (-1), \
-					     (R)))
-
-#define _mm512_mask_cvtts_roundps_epu64(W, U, A, R) \
-  ((__m512i) __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) (A), \
-						       (__v8di) (W), \
-						       (__mmask8) (U), \
-						       (R)))
-
-#define _mm512_maskz_cvtts_roundps_epu64(U, A, R) \
-  ((__m512i) \
-   __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) (A), \
-					     (__v8di) \
-					     (_mm512_setzero_si512 ()), \
-					     (__mmask8) (U), \
-					     (R)))
-#endif
-
-#ifdef __DISABLE_AVX10_2_512__
-#undef __DISABLE_AVX10_2_512__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX10_2_512__ */
-
-#endif /* _AVX10_2_512SATCVTINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx10_2bf16intrin.h b/gcc/config/i386/avx10_2bf16intrin.h
index f36fb8e..9560480 100644
--- a/gcc/config/i386/avx10_2bf16intrin.h
+++ b/gcc/config/i386/avx10_2bf16intrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2024 Free Software Foundation, Inc.
+/* Copyright (C) 2024-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -28,234 +28,364 @@
 #ifndef _AVX10_2BF16INTRIN_H_INCLUDED
 #define _AVX10_2BF16INTRIN_H_INCLUDED
 
-#if !defined(__AVX10_2_256__)
+#if !defined(__AVX10_2__)
 #pragma GCC push_options
 #pragma GCC target("avx10.2")
-#define __DISABLE_AVX10_2_256__
-#endif /* __AVX10_2_256__ */
+#define __DISABLE_AVX10_2__
+#endif /* __AVX10_2__ */
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_add_pbh (__m512bh __A, __m512bh __B)
+{
+  return (__m512bh) __builtin_ia32_addbf16512 (__A, __B);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_add_pbh (__m512bh __W, __mmask32 __U,
+		     __m512bh __A, __m512bh __B)
+{
+  return (__m512bh)
+    __builtin_ia32_addbf16512_mask (__A, __B, __W, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_add_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
+{
+  return (__m512bh)
+    __builtin_ia32_addbf16512_mask (__A, __B,
+				    (__v32bf) _mm512_setzero_si512 (),
+				    __U);
+}
 
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_addne_pbh (__m256bh __A, __m256bh __B)
+_mm256_add_pbh (__m256bh __A, __m256bh __B)
 {
-  return (__m256bh) __builtin_ia32_addnepbf16256 (__A, __B);
+  return (__m256bh) __builtin_ia32_addbf16256 (__A, __B);
 }
 
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_addne_pbh (__m256bh __W, __mmask16 __U,
-		       __m256bh __A, __m256bh __B)
+_mm256_mask_add_pbh (__m256bh __W, __mmask16 __U,
+		     __m256bh __A, __m256bh __B)
 {
   return (__m256bh)
-    __builtin_ia32_addnepbf16256_mask (__A, __B, __W, __U);
+    __builtin_ia32_addbf16256_mask (__A, __B, __W, __U);
 }
 
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_addne_pbh (__mmask16 __U, __m256bh __A, __m256bh __B)
+_mm256_maskz_add_pbh (__mmask16 __U, __m256bh __A, __m256bh __B)
 {
   return (__m256bh)
-    __builtin_ia32_addnepbf16256_mask (__A, __B,
-				       (__v16bf) _mm256_setzero_si256 (),
-				       __U);
+    __builtin_ia32_addbf16256_mask (__A, __B,
+				    (__v16bf) _mm256_setzero_si256 (),
+				    __U);
 }
 
 extern __inline__ __m128bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_addne_pbh (__m128bh __A, __m128bh __B)
+_mm_add_pbh (__m128bh __A, __m128bh __B)
 {
-  return (__m128bh) __builtin_ia32_addnepbf16128 (__A, __B);
+  return (__m128bh) __builtin_ia32_addbf16128 (__A, __B);
 }
 
 extern __inline__ __m128bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_addne_pbh (__m128bh __W, __mmask8 __U,
-		    __m128bh __A, __m128bh __B)
+_mm_mask_add_pbh (__m128bh __W, __mmask8 __U,
+		  __m128bh __A, __m128bh __B)
 {
   return (__m128bh)
-    __builtin_ia32_addnepbf16128_mask (__A, __B, __W, __U);
+    __builtin_ia32_addbf16128_mask (__A, __B, __W, __U);
 }
 
 extern __inline__ __m128bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_addne_pbh (__mmask8 __U, __m128bh __A, __m128bh __B)
+_mm_maskz_add_pbh (__mmask8 __U, __m128bh __A, __m128bh __B)
 {
   return (__m128bh)
-    __builtin_ia32_addnepbf16128_mask (__A, __B,
-				       (__v8bf) _mm_setzero_si128 (),
-				       __U);
+    __builtin_ia32_addbf16128_mask (__A, __B,
+				    (__v8bf) _mm_setzero_si128 (),
+				    __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sub_pbh (__m512bh __A, __m512bh __B)
+{
+  return (__m512bh) __builtin_ia32_subbf16512 (__A, __B);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sub_pbh (__m512bh __W, __mmask32 __U,
+		     __m512bh __A, __m512bh __B)
+{
+  return (__m512bh)
+    __builtin_ia32_subbf16512_mask (__A, __B, __W, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sub_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
+{
+  return (__m512bh)
+    __builtin_ia32_subbf16512_mask (__A, __B,
+				    (__v32bf) _mm512_setzero_si512 (),
+				    __U);
 }
 
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_subne_pbh (__m256bh __A, __m256bh __B)
+_mm256_sub_pbh (__m256bh __A, __m256bh __B)
 {
-  return (__m256bh) __builtin_ia32_subnepbf16256 (__A, __B);
+  return (__m256bh) __builtin_ia32_subbf16256 (__A, __B);
 }
 
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_subne_pbh (__m256bh __W, __mmask16 __U,
-		       __m256bh __A, __m256bh __B)
+_mm256_mask_sub_pbh (__m256bh __W, __mmask16 __U,
+		     __m256bh __A, __m256bh __B)
 {
   return (__m256bh)
-    __builtin_ia32_subnepbf16256_mask (__A, __B, __W, __U);
+    __builtin_ia32_subbf16256_mask (__A, __B, __W, __U);
 }
 
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_subne_pbh (__mmask16 __U, __m256bh __A, __m256bh __B)
+_mm256_maskz_sub_pbh (__mmask16 __U, __m256bh __A, __m256bh __B)
 {
   return (__m256bh)
-    __builtin_ia32_subnepbf16256_mask (__A, __B,
-				       (__v16bf) _mm256_setzero_si256 (),
-				       __U);
+    __builtin_ia32_subbf16256_mask (__A, __B,
+				    (__v16bf) _mm256_setzero_si256 (),
+				    __U);
 }
 
 extern __inline__ __m128bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_subne_pbh (__m128bh __A, __m128bh __B)
+_mm_sub_pbh (__m128bh __A, __m128bh __B)
 {
-  return (__m128bh) __builtin_ia32_subnepbf16128 (__A, __B);
+  return (__m128bh) __builtin_ia32_subbf16128 (__A, __B);
 }
 
 extern __inline__ __m128bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_subne_pbh (__m128bh __W, __mmask8 __U,
-		    __m128bh __A, __m128bh __B)
+_mm_mask_sub_pbh (__m128bh __W, __mmask8 __U,
+		  __m128bh __A, __m128bh __B)
 {
   return (__m128bh)
-    __builtin_ia32_subnepbf16128_mask (__A, __B, __W, __U);
+    __builtin_ia32_subbf16128_mask (__A, __B, __W, __U);
 }
 
 extern __inline__ __m128bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_subne_pbh (__mmask8 __U, __m128bh __A, __m128bh __B)
+_mm_maskz_sub_pbh (__mmask8 __U, __m128bh __A, __m128bh __B)
 {
   return (__m128bh)
-    __builtin_ia32_subnepbf16128_mask (__A, __B,
-				       (__v8bf) _mm_setzero_si128 (),
-				       __U);
+    __builtin_ia32_subbf16128_mask (__A, __B,
+				    (__v8bf) _mm_setzero_si128 (),
+				    __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mul_pbh (__m512bh __A, __m512bh __B)
+{
+  return (__m512bh) __builtin_ia32_mulbf16512 (__A, __B);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mul_pbh (__m512bh __W, __mmask32 __U,
+		     __m512bh __A, __m512bh __B)
+{
+  return (__m512bh)
+    __builtin_ia32_mulbf16512_mask (__A, __B, __W, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mul_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
+{
+  return (__m512bh)
+    __builtin_ia32_mulbf16512_mask (__A, __B,
+				    (__v32bf) _mm512_setzero_si512 (),
+				    __U);
 }
 
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mulne_pbh (__m256bh __A, __m256bh __B)
+_mm256_mul_pbh (__m256bh __A, __m256bh __B)
 {
-  return (__m256bh) __builtin_ia32_mulnepbf16256 (__A, __B);
+  return (__m256bh) __builtin_ia32_mulbf16256 (__A, __B);
 }
 
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_mulne_pbh (__m256bh __W, __mmask16 __U,
-		       __m256bh __A, __m256bh __B)
+_mm256_mask_mul_pbh (__m256bh __W, __mmask16 __U,
+		     __m256bh __A, __m256bh __B)
 {
   return (__m256bh)
-    __builtin_ia32_mulnepbf16256_mask (__A, __B, __W, __U);
+    __builtin_ia32_mulbf16256_mask (__A, __B, __W, __U);
 }
 
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_mulne_pbh (__mmask16 __U, __m256bh __A, __m256bh __B)
+_mm256_maskz_mul_pbh (__mmask16 __U, __m256bh __A, __m256bh __B)
 {
   return (__m256bh)
-    __builtin_ia32_mulnepbf16256_mask (__A, __B,
-				       (__v16bf) _mm256_setzero_si256 (),
-				       __U);
+    __builtin_ia32_mulbf16256_mask (__A, __B,
+				    (__v16bf) _mm256_setzero_si256 (),
+				    __U);
 }
 
 extern __inline__ __m128bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mulne_pbh (__m128bh __A, __m128bh __B)
+_mm_mul_pbh (__m128bh __A, __m128bh __B)
 {
-  return (__m128bh) __builtin_ia32_mulnepbf16128 (__A, __B);
+  return (__m128bh) __builtin_ia32_mulbf16128 (__A, __B);
 }
 
 extern __inline__ __m128bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_mulne_pbh (__m128bh __W, __mmask8 __U,
-		    __m128bh __A, __m128bh __B)
+_mm_mask_mul_pbh (__m128bh __W, __mmask8 __U,
+		  __m128bh __A, __m128bh __B)
 {
   return (__m128bh)
-    __builtin_ia32_mulnepbf16128_mask (__A, __B, __W, __U);
+    __builtin_ia32_mulbf16128_mask (__A, __B, __W, __U);
 }
 
 extern __inline__ __m128bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_mulne_pbh (__mmask8 __U, __m128bh __A, __m128bh __B)
+_mm_maskz_mul_pbh (__mmask8 __U, __m128bh __A, __m128bh __B)
 {
   return (__m128bh)
-    __builtin_ia32_mulnepbf16128_mask (__A, __B,
-				       (__v8bf) _mm_setzero_si128 (),
-				       __U);
+    __builtin_ia32_mulbf16128_mask (__A, __B,
+				    (__v8bf) _mm_setzero_si128 (),
+				    __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_div_pbh (__m512bh __A, __m512bh __B)
+{
+  return (__m512bh) __builtin_ia32_divbf16512 (__A, __B);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_div_pbh (__m512bh __W, __mmask32 __U,
+		     __m512bh __A, __m512bh __B)
+{
+  return (__m512bh)
+    __builtin_ia32_divbf16512_mask (__A, __B, __W, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_div_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
+{
+  return (__m512bh)
+    __builtin_ia32_divbf16512_mask (__A, __B,
+				    (__v32bf) _mm512_setzero_si512 (),
+				    __U);
 }
 
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_divne_pbh (__m256bh __A, __m256bh __B)
+_mm256_div_pbh (__m256bh __A, __m256bh __B)
 {
-  return (__m256bh) __builtin_ia32_divnepbf16256 (__A, __B);
+  return (__m256bh) __builtin_ia32_divbf16256 (__A, __B);
 }
 
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_divne_pbh (__m256bh __W, __mmask16 __U,
-		       __m256bh __A, __m256bh __B)
+_mm256_mask_div_pbh (__m256bh __W, __mmask16 __U,
+		     __m256bh __A, __m256bh __B)
 {
   return (__m256bh)
-    __builtin_ia32_divnepbf16256_mask (__A, __B, __W, __U);
+    __builtin_ia32_divbf16256_mask (__A, __B, __W, __U);
 }
 
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_divne_pbh (__mmask16 __U, __m256bh __A, __m256bh __B)
+_mm256_maskz_div_pbh (__mmask16 __U, __m256bh __A, __m256bh __B)
 {
   return (__m256bh)
-    __builtin_ia32_divnepbf16256_mask (__A, __B,
-				       (__v16bf) _mm256_setzero_si256 (),
-				       __U);
+    __builtin_ia32_divbf16256_mask (__A, __B,
+				    (__v16bf) _mm256_setzero_si256 (),
+				    __U);
 }
 
 extern __inline__ __m128bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_divne_pbh (__m128bh __A, __m128bh __B)
+_mm_div_pbh (__m128bh __A, __m128bh __B)
 {
-  return (__m128bh) __builtin_ia32_divnepbf16128 (__A, __B);
+  return (__m128bh) __builtin_ia32_divbf16128 (__A, __B);
 }
 
 extern __inline__ __m128bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_divne_pbh (__m128bh __W, __mmask8 __U,
-		    __m128bh __A, __m128bh __B)
+_mm_mask_div_pbh (__m128bh __W, __mmask8 __U,
+		  __m128bh __A, __m128bh __B)
 {
   return (__m128bh)
-    __builtin_ia32_divnepbf16128_mask (__A, __B, __W, __U);
+    __builtin_ia32_divbf16128_mask (__A, __B, __W, __U);
 }
 
 extern __inline__ __m128bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_divne_pbh (__mmask8 __U, __m128bh __A, __m128bh __B)
+_mm_maskz_div_pbh (__mmask8 __U, __m128bh __A, __m128bh __B)
 {
   return (__m128bh)
-    __builtin_ia32_divnepbf16128_mask (__A, __B,
-				       (__v8bf) _mm_setzero_si128 (),
-				       __U);
+    __builtin_ia32_divbf16128_mask (__A, __B,
+				    (__v8bf) _mm_setzero_si128 (),
+				    __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_max_pbh (__m512bh __A, __m512bh __B)
+{
+  return (__m512bh) __builtin_ia32_maxbf16512 (__A, __B);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_max_pbh (__m512bh __W, __mmask32 __U,
+		     __m512bh __A, __m512bh __B)
+{
+  return (__m512bh)
+    __builtin_ia32_maxbf16512_mask (__A, __B, __W, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_max_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
+{
+  return (__m512bh)
+    __builtin_ia32_maxbf16512_mask (__A, __B,
+				    (__v32bf) _mm512_setzero_si512 (),
+				    __U);
 }
 
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_max_pbh (__m256bh __A, __m256bh __B)
 {
-  return (__m256bh) __builtin_ia32_maxpbf16256 (__A, __B);
+  return (__m256bh) __builtin_ia32_maxbf16256 (__A, __B);
 }
 
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_max_pbh (__m256bh __W, __mmask16 __U,
-		       __m256bh __A, __m256bh __B)
+		     __m256bh __A, __m256bh __B)
 {
   return (__m256bh)
-    __builtin_ia32_maxpbf16256_mask (__A, __B, __W, __U);
+    __builtin_ia32_maxbf16256_mask (__A, __B, __W, __U);
 }
 
 extern __inline__ __m256bh
@@ -263,25 +393,25 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_max_pbh (__mmask16 __U, __m256bh __A, __m256bh __B)
 {
   return (__m256bh)
-    __builtin_ia32_maxpbf16256_mask (__A, __B,
-				       (__v16bf) _mm256_setzero_si256 (),
-				       __U);
+    __builtin_ia32_maxbf16256_mask (__A, __B,
+				    (__v16bf) _mm256_setzero_si256 (),
+				    __U);
 }
 
 extern __inline__ __m128bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_max_pbh (__m128bh __A, __m128bh __B)
 {
-  return (__m128bh) __builtin_ia32_maxpbf16128 (__A, __B);
+  return (__m128bh) __builtin_ia32_maxbf16128 (__A, __B);
 }
 
 extern __inline__ __m128bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_max_pbh (__m128bh __W, __mmask8 __U,
-		    __m128bh __A, __m128bh __B)
+		  __m128bh __A, __m128bh __B)
 {
   return (__m128bh)
-    __builtin_ia32_maxpbf16128_mask (__A, __B, __W, __U);
+    __builtin_ia32_maxbf16128_mask (__A, __B, __W, __U);
 }
 
 extern __inline__ __m128bh
@@ -289,16 +419,42 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_max_pbh (__mmask8 __U, __m128bh __A, __m128bh __B)
 {
   return (__m128bh)
-    __builtin_ia32_maxpbf16128_mask (__A, __B,
-				     (__v8bf) _mm_setzero_si128 (),
-				     __U);
+    __builtin_ia32_maxbf16128_mask (__A, __B,
+				    (__v8bf) _mm_setzero_si128 (),
+				    __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_min_pbh (__m512bh __A, __m512bh __B)
+{
+  return (__m512bh) __builtin_ia32_minbf16512 (__A, __B);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_min_pbh (__m512bh __W, __mmask32 __U,
+		     __m512bh __A, __m512bh __B)
+{
+  return (__m512bh)
+    __builtin_ia32_minbf16512_mask (__A, __B, __W, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_min_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
+{
+  return (__m512bh)
+    __builtin_ia32_minbf16512_mask (__A, __B,
+				    (__v32bf) _mm512_setzero_si512 (),
+				    __U);
 }
 
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_min_pbh (__m256bh __A, __m256bh __B)
 {
-  return (__m256bh) __builtin_ia32_minpbf16256 (__A, __B);
+  return (__m256bh) __builtin_ia32_minbf16256 (__A, __B);
 }
 
 extern __inline__ __m256bh
@@ -307,7 +463,7 @@ _mm256_mask_min_pbh (__m256bh __W, __mmask16 __U,
 		     __m256bh __A, __m256bh __B)
 {
   return (__m256bh)
-    __builtin_ia32_minpbf16256_mask (__A, __B, __W, __U);
+    __builtin_ia32_minbf16256_mask (__A, __B, __W, __U);
 }
 
 extern __inline__ __m256bh
@@ -315,16 +471,16 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_min_pbh (__mmask16 __U, __m256bh __A, __m256bh __B)
 {
   return (__m256bh)
-    __builtin_ia32_minpbf16256_mask (__A, __B,
-				     (__v16bf) _mm256_setzero_si256 (),
-				     __U);
+    __builtin_ia32_minbf16256_mask (__A, __B,
+				    (__v16bf) _mm256_setzero_si256 (),
+				    __U);
 }
 
 extern __inline__ __m128bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_min_pbh (__m128bh __A, __m128bh __B)
 {
-  return (__m128bh) __builtin_ia32_minpbf16128 (__A, __B);
+  return (__m128bh) __builtin_ia32_minbf16128 (__A, __B);
 }
 
 extern __inline__ __m128bh
@@ -333,7 +489,7 @@ _mm_mask_min_pbh (__m128bh __W, __mmask8 __U,
 		  __m128bh __A, __m128bh __B)
 {
   return (__m128bh)
-    __builtin_ia32_minpbf16128_mask (__A, __B, __W, __U);
+    __builtin_ia32_minbf16128_mask (__A, __B, __W, __U);
 }
 
 extern __inline__ __m128bh
@@ -341,16 +497,42 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_min_pbh (__mmask8 __U, __m128bh __A, __m128bh __B)
 {
   return (__m128bh)
-    __builtin_ia32_minpbf16128_mask (__A, __B,
-				     (__v8bf) _mm_setzero_si128 (),
-				     __U);
+    __builtin_ia32_minbf16128_mask (__A, __B,
+				    (__v8bf) _mm_setzero_si128 (),
+				    __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_scalef_pbh (__m512bh __A, __m512bh __B)
+{
+  return (__m512bh) __builtin_ia32_scalefbf16512 (__A, __B);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_scalef_pbh (__m512bh __W, __mmask32 __U,
+			__m512bh __A, __m512bh __B)
+{
+  return (__m512bh)
+    __builtin_ia32_scalefbf16512_mask (__A, __B, __W, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_scalef_pbh (__mmask32 __U, __m512bh __A, __m512bh __B)
+{
+  return (__m512bh)
+    __builtin_ia32_scalefbf16512_mask (__A, __B,
+				       (__v32bf) _mm512_setzero_si512 (),
+				       __U);
 }
 
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_scalef_pbh (__m256bh __A, __m256bh __B)
 {
-  return (__m256bh) __builtin_ia32_scalefpbf16256 (__A, __B);
+  return (__m256bh) __builtin_ia32_scalefbf16256 (__A, __B);
 }
 
 extern __inline__ __m256bh
@@ -359,7 +541,7 @@ _mm256_mask_scalef_pbh (__m256bh __W, __mmask16 __U,
 			__m256bh __A, __m256bh __B)
 {
   return (__m256bh)
-    __builtin_ia32_scalefpbf16256_mask (__A, __B, __W, __U);
+    __builtin_ia32_scalefbf16256_mask (__A, __B, __W, __U);
 }
 
 extern __inline__ __m256bh
@@ -367,16 +549,16 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_scalef_pbh (__mmask16 __U, __m256bh __A, __m256bh __B)
 {
   return (__m256bh)
-    __builtin_ia32_scalefpbf16256_mask (__A, __B,
-					(__v16bf) _mm256_setzero_si256 (),
-					__U);
+    __builtin_ia32_scalefbf16256_mask (__A, __B,
+				       (__v16bf) _mm256_setzero_si256 (),
+				       __U);
 }
 
 extern __inline__ __m128bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_scalef_pbh (__m128bh __A, __m128bh __B)
 {
-  return (__m128bh) __builtin_ia32_scalefpbf16128 (__A, __B);
+  return (__m128bh) __builtin_ia32_scalefbf16128 (__A, __B);
 }
 
 extern __inline__ __m128bh
@@ -385,7 +567,7 @@ _mm_mask_scalef_pbh (__m128bh __W, __mmask8 __U,
 		     __m128bh __A, __m128bh __B)
 {
   return (__m128bh)
-    __builtin_ia32_scalefpbf16128_mask (__A, __B, __W, __U);
+    __builtin_ia32_scalefbf16128_mask (__A, __B, __W, __U);
 }
 
 extern __inline__ __m128bh
@@ -393,288 +575,457 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_scalef_pbh (__mmask8 __U, __m128bh __A, __m128bh __B)
 {
   return (__m128bh)
-    __builtin_ia32_scalefpbf16128_mask (__A, __B,
-					(__v8bf) _mm_setzero_si128 (),
-					__U);
+    __builtin_ia32_scalefbf16128_mask (__A, __B,
+				       (__v8bf) _mm_setzero_si128 (),
+				       __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmadd_pbh (__m512bh __A, __m512bh __B, __m512bh __C)
+{
+  return (__m512bh)
+    __builtin_ia32_fmaddbf16512_mask (__A, __B, __C, (__mmask32) -1);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmadd_pbh (__m512bh __A, __mmask32 __U,
+		       __m512bh __B, __m512bh __C)
+{
+  return (__m512bh)
+    __builtin_ia32_fmaddbf16512_mask (__A, __B, __C, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmadd_pbh (__m512bh __A, __m512bh __B,
+			__m512bh __C, __mmask32 __U)
+{
+  return (__m512bh)
+    __builtin_ia32_fmaddbf16512_mask3 (__A, __B, __C, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmadd_pbh (__mmask32 __U, __m512bh __A,
+			  __m512bh __B, __m512bh __C)
+{
+  return (__m512bh)
+    __builtin_ia32_fmaddbf16512_maskz (__A, __B, __C, __U);
 }
 
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_fmaddne_pbh (__m256bh __A, __m256bh __B, __m256bh __C)
+_mm256_fmadd_pbh (__m256bh __A, __m256bh __B, __m256bh __C)
 {
   return (__m256bh)
-    __builtin_ia32_fmaddnepbf16256_mask (__A, __B, __C, (__mmask16) -1);
+    __builtin_ia32_fmaddbf16256_mask (__A, __B, __C, (__mmask16) -1);
 }
 
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_fmaddne_pbh (__m256bh __A, __mmask16 __U,
-			 __m256bh __B, __m256bh __C)
+_mm256_mask_fmadd_pbh (__m256bh __A, __mmask16 __U,
+		       __m256bh __B, __m256bh __C)
 {
   return (__m256bh)
-    __builtin_ia32_fmaddnepbf16256_mask (__A, __B, __C, __U);
+    __builtin_ia32_fmaddbf16256_mask (__A, __B, __C, __U);
 }
 
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask3_fmaddne_pbh (__m256bh __A, __m256bh __B,
-			  __m256bh __C, __mmask16 __U)
+_mm256_mask3_fmadd_pbh (__m256bh __A, __m256bh __B,
+			__m256bh __C, __mmask16 __U)
 {
   return (__m256bh)
-    __builtin_ia32_fmaddnepbf16256_mask3 (__A, __B, __C, __U);
+    __builtin_ia32_fmaddbf16256_mask3 (__A, __B, __C, __U);
 }
 
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_fmaddne_pbh (__mmask16 __U, __m256bh __A,
-			  __m256bh __B, __m256bh __C)
+_mm256_maskz_fmadd_pbh (__mmask16 __U, __m256bh __A,
+			__m256bh __B, __m256bh __C)
 {
   return (__m256bh)
-    __builtin_ia32_fmaddnepbf16256_maskz (__A, __B, __C, __U);
+    __builtin_ia32_fmaddbf16256_maskz (__A, __B, __C, __U);
 }
 
 extern __inline__ __m128bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fmaddne_pbh (__m128bh __A, __m128bh __B, __m128bh __C)
+_mm_fmadd_pbh (__m128bh __A, __m128bh __B, __m128bh __C)
 {
   return (__m128bh)
-    __builtin_ia32_fmaddnepbf16128_mask (__A, __B, __C, (__mmask8) -1);
+    __builtin_ia32_fmaddbf16128_mask (__A, __B, __C, (__mmask8) -1);
 }
 
 extern __inline__ __m128bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fmaddne_pbh (__m128bh __A, __mmask8 __U,
-		      __m128bh __B, __m128bh __C)
+_mm_mask_fmadd_pbh (__m128bh __A, __mmask8 __U,
+		    __m128bh __B, __m128bh __C)
 {
   return (__m128bh)
-    __builtin_ia32_fmaddnepbf16128_mask (__A, __B, __C, __U);
+    __builtin_ia32_fmaddbf16128_mask (__A, __B, __C, __U);
 }
 
 extern __inline__ __m128bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask3_fmaddne_pbh (__m128bh __A, __m128bh __B,
-		       __m128bh __C, __mmask8 __U)
+_mm_mask3_fmadd_pbh (__m128bh __A, __m128bh __B,
+		     __m128bh __C, __mmask8 __U)
 {
   return (__m128bh)
-    __builtin_ia32_fmaddnepbf16128_mask3 (__A, __B, __C, __U);
+    __builtin_ia32_fmaddbf16128_mask3 (__A, __B, __C, __U);
 }
 
 extern __inline__ __m128bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fmaddne_pbh (__mmask8 __U, __m128bh __A,
-		       __m128bh __B, __m128bh __C)
+_mm_maskz_fmadd_pbh (__mmask8 __U, __m128bh __A,
+		     __m128bh __B, __m128bh __C)
 {
   return (__m128bh)
-    __builtin_ia32_fmaddnepbf16128_maskz (__A, __B, __C, __U);
+    __builtin_ia32_fmaddbf16128_maskz (__A, __B, __C, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fmsub_pbh (__m512bh __A, __m512bh __B, __m512bh __C)
+{
+  return (__m512bh)
+    __builtin_ia32_fmsubbf16512_mask (__A, __B, __C, (__mmask32) -1);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fmsub_pbh (__m512bh __A, __mmask32 __U,
+		       __m512bh __B, __m512bh __C)
+{
+  return (__m512bh)
+    __builtin_ia32_fmsubbf16512_mask (__A, __B, __C, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fmsub_pbh (__m512bh __A, __m512bh __B,
+			__m512bh __C, __mmask32 __U)
+{
+  return (__m512bh)
+    __builtin_ia32_fmsubbf16512_mask3 (__A, __B, __C, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fmsub_pbh (__mmask32 __U, __m512bh __A,
+			__m512bh __B, __m512bh __C)
+{
+  return (__m512bh)
+    __builtin_ia32_fmsubbf16512_maskz (__A, __B, __C, __U);
 }
 
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_fmsubne_pbh (__m256bh __A, __m256bh __B, __m256bh __C)
+_mm256_fmsub_pbh (__m256bh __A, __m256bh __B, __m256bh __C)
 {
   return (__m256bh)
-    __builtin_ia32_fmsubnepbf16256_mask (__A, __B, __C, (__mmask16) -1);
+    __builtin_ia32_fmsubbf16256_mask (__A, __B, __C, (__mmask16) -1);
 }
 
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_fmsubne_pbh (__m256bh __A, __mmask16 __U,
-			 __m256bh __B, __m256bh __C)
+_mm256_mask_fmsub_pbh (__m256bh __A, __mmask16 __U,
+		       __m256bh __B, __m256bh __C)
 {
-  return (__m256bh) __builtin_ia32_fmsubnepbf16256_mask (__A, __B, __C, __U);
+  return (__m256bh) __builtin_ia32_fmsubbf16256_mask (__A, __B, __C, __U);
 }
 
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask3_fmsubne_pbh (__m256bh __A, __m256bh __B,
-			  __m256bh __C, __mmask16 __U)
+_mm256_mask3_fmsub_pbh (__m256bh __A, __m256bh __B,
+			__m256bh __C, __mmask16 __U)
 {
   return (__m256bh)
-    __builtin_ia32_fmsubnepbf16256_mask3 (__A, __B, __C, __U);
+    __builtin_ia32_fmsubbf16256_mask3 (__A, __B, __C, __U);
 }
 
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_fmsubne_pbh (__mmask16 __U, __m256bh __A,
-			  __m256bh __B, __m256bh __C)
+_mm256_maskz_fmsub_pbh (__mmask16 __U, __m256bh __A,
+			__m256bh __B, __m256bh __C)
 {
   return (__m256bh)
-    __builtin_ia32_fmsubnepbf16256_maskz (__A, __B, __C, __U);
+    __builtin_ia32_fmsubbf16256_maskz (__A, __B, __C, __U);
 }
 
 extern __inline__ __m128bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fmsubne_pbh (__m128bh __A, __m128bh __B, __m128bh __C)
+_mm_fmsub_pbh (__m128bh __A, __m128bh __B, __m128bh __C)
 {
   return (__m128bh)
-    __builtin_ia32_fmsubnepbf16128_mask (__A, __B, __C, (__mmask8) -1);
+    __builtin_ia32_fmsubbf16128_mask (__A, __B, __C, (__mmask8) -1);
 }
 
 extern __inline__ __m128bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fmsubne_pbh (__m128bh __A, __mmask8 __U,
-		      __m128bh __B, __m128bh __C)
+_mm_mask_fmsub_pbh (__m128bh __A, __mmask8 __U,
+		    __m128bh __B, __m128bh __C)
 {
   return (__m128bh)
-    __builtin_ia32_fmsubnepbf16128_mask (__A, __B, __C, __U);
+    __builtin_ia32_fmsubbf16128_mask (__A, __B, __C, __U);
 }
 
 extern __inline__ __m128bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask3_fmsubne_pbh (__m128bh __A, __m128bh __B,
-		       __m128bh __C, __mmask8 __U)
+_mm_mask3_fmsub_pbh (__m128bh __A, __m128bh __B,
+		     __m128bh __C, __mmask8 __U)
 {
   return (__m128bh)
-    __builtin_ia32_fmsubnepbf16128_mask3 (__A, __B, __C, __U);
+    __builtin_ia32_fmsubbf16128_mask3 (__A, __B, __C, __U);
 }
 
 extern __inline__ __m128bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fmsubne_pbh (__mmask8 __U, __m128bh __A,
-		       __m128bh __B, __m128bh __C)
+_mm_maskz_fmsub_pbh (__mmask8 __U, __m128bh __A,
+		     __m128bh __B, __m128bh __C)
 {
   return (__m128bh)
-    __builtin_ia32_fmsubnepbf16128_maskz (__A, __B, __C, __U);
+    __builtin_ia32_fmsubbf16128_maskz (__A, __B, __C, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fnmadd_pbh (__m512bh __A, __m512bh __B, __m512bh __C)
+{
+  return (__m512bh)
+    __builtin_ia32_fnmaddbf16512_mask (__A, __B, __C, (__mmask32) -1);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fnmadd_pbh (__m512bh __A, __mmask32 __U,
+			__m512bh __B, __m512bh __C)
+{
+  return (__m512bh)
+    __builtin_ia32_fnmaddbf16512_mask (__A, __B, __C, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fnmadd_pbh (__m512bh __A, __m512bh __B,
+			 __m512bh __C, __mmask32 __U)
+{
+  return (__m512bh)
+    __builtin_ia32_fnmaddbf16512_mask3 (__A, __B, __C, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fnmadd_pbh (__mmask32 __U, __m512bh __A,
+			 __m512bh __B, __m512bh __C)
+{
+  return (__m512bh)
+    __builtin_ia32_fnmaddbf16512_maskz (__A, __B, __C, __U);
 }
 
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_fnmaddne_pbh (__m256bh __A, __m256bh __B, __m256bh __C)
+_mm256_fnmadd_pbh (__m256bh __A, __m256bh __B, __m256bh __C)
 {
   return (__m256bh)
-    __builtin_ia32_fnmaddnepbf16256_mask (__A, __B, __C, (__mmask16) -1);
+    __builtin_ia32_fnmaddbf16256_mask (__A, __B, __C, (__mmask16) -1);
 }
 
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_fnmaddne_pbh (__m256bh __A, __mmask16 __U,
-			  __m256bh __B, __m256bh __C)
+_mm256_mask_fnmadd_pbh (__m256bh __A, __mmask16 __U,
+			__m256bh __B, __m256bh __C)
 {
   return (__m256bh)
-    __builtin_ia32_fnmaddnepbf16256_mask (__A, __B, __C, __U);
+    __builtin_ia32_fnmaddbf16256_mask (__A, __B, __C, __U);
 }
 
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask3_fnmaddne_pbh (__m256bh __A, __m256bh __B,
-			   __m256bh __C, __mmask16 __U)
+_mm256_mask3_fnmadd_pbh (__m256bh __A, __m256bh __B,
+			 __m256bh __C, __mmask16 __U)
 {
   return (__m256bh)
-    __builtin_ia32_fnmaddnepbf16256_mask3 (__A, __B, __C, __U);
+    __builtin_ia32_fnmaddbf16256_mask3 (__A, __B, __C, __U);
 }
 
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_fnmaddne_pbh (__mmask16 __U, __m256bh __A,
-			   __m256bh __B, __m256bh __C)
+_mm256_maskz_fnmadd_pbh (__mmask16 __U, __m256bh __A,
+			 __m256bh __B, __m256bh __C)
 {
   return (__m256bh)
-    __builtin_ia32_fnmaddnepbf16256_maskz (__A, __B, __C, __U);
+    __builtin_ia32_fnmaddbf16256_maskz (__A, __B, __C, __U);
 }
 
 extern __inline__ __m128bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fnmaddne_pbh (__m128bh __A, __m128bh __B, __m128bh __C)
+_mm_fnmadd_pbh (__m128bh __A, __m128bh __B, __m128bh __C)
 {
   return (__m128bh)
-    __builtin_ia32_fnmaddnepbf16128_mask (__A, __B, __C, (__mmask8) -1);
+    __builtin_ia32_fnmaddbf16128_mask (__A, __B, __C, (__mmask8) -1);
 }
 
 extern __inline__ __m128bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fnmaddne_pbh (__m128bh __A, __mmask8 __U,
-		       __m128bh __B, __m128bh __C)
+_mm_mask_fnmadd_pbh (__m128bh __A, __mmask8 __U,
+		     __m128bh __B, __m128bh __C)
 {
   return (__m128bh)
-    __builtin_ia32_fnmaddnepbf16128_mask (__A, __B, __C, __U);
+    __builtin_ia32_fnmaddbf16128_mask (__A, __B, __C, __U);
 }
 
 extern __inline__ __m128bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask3_fnmaddne_pbh (__m128bh __A, __m128bh __B,
-			__m128bh __C, __mmask8 __U)
+_mm_mask3_fnmadd_pbh (__m128bh __A, __m128bh __B,
+		      __m128bh __C, __mmask8 __U)
 {
   return (__m128bh)
-    __builtin_ia32_fnmaddnepbf16128_mask3 (__A, __B, __C, __U);
+    __builtin_ia32_fnmaddbf16128_mask3 (__A, __B, __C, __U);
 }
 
 extern __inline__ __m128bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fnmaddne_pbh (__mmask8 __U, __m128bh __A,
-			__m128bh __B, __m128bh __C)
+_mm_maskz_fnmadd_pbh (__mmask8 __U, __m128bh __A,
+		      __m128bh __B, __m128bh __C)
 {
   return (__m128bh)
-    __builtin_ia32_fnmaddnepbf16128_maskz (__A, __B, __C, __U);
+    __builtin_ia32_fnmaddbf16128_maskz (__A, __B, __C, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fnmsub_pbh (__m512bh __A, __m512bh __B, __m512bh __C)
+{
+  return (__m512bh)
+    __builtin_ia32_fnmsubbf16512_mask (__A, __B, __C, (__mmask32) -1);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fnmsub_pbh (__m512bh __A, __mmask32 __U,
+			__m512bh __B, __m512bh __C)
+{
+  return (__m512bh)
+    __builtin_ia32_fnmsubbf16512_mask (__A, __B, __C, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask3_fnmsub_pbh (__m512bh __A, __m512bh __B,
+			 __m512bh __C, __mmask32 __U)
+{
+  return (__m512bh)
+    __builtin_ia32_fnmsubbf16512_mask3 (__A, __B, __C, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_fnmsub_pbh (__mmask32 __U, __m512bh __A,
+			 __m512bh __B, __m512bh __C)
+{
+  return (__m512bh)
+    __builtin_ia32_fnmsubbf16512_maskz (__A, __B, __C, __U);
 }
 
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_fnmsubne_pbh (__m256bh __A, __m256bh __B, __m256bh __C)
+_mm256_fnmsub_pbh (__m256bh __A, __m256bh __B, __m256bh __C)
 {
   return (__m256bh)
-    __builtin_ia32_fnmsubnepbf16256_mask (__A, __B, __C, (__mmask16) -1);
+    __builtin_ia32_fnmsubbf16256_mask (__A, __B, __C, (__mmask16) -1);
 }
 
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_fnmsubne_pbh (__m256bh __A, __mmask16 __U,
-			  __m256bh __B, __m256bh __C)
+_mm256_mask_fnmsub_pbh (__m256bh __A, __mmask16 __U,
+			__m256bh __B, __m256bh __C)
 {
   return (__m256bh)
-    __builtin_ia32_fnmsubnepbf16256_mask (__A, __B, __C, __U);
+    __builtin_ia32_fnmsubbf16256_mask (__A, __B, __C, __U);
 }
 
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask3_fnmsubne_pbh (__m256bh __A, __m256bh __B,
-			   __m256bh __C, __mmask16 __U)
+_mm256_mask3_fnmsub_pbh (__m256bh __A, __m256bh __B,
+			 __m256bh __C, __mmask16 __U)
 {
   return (__m256bh)
-    __builtin_ia32_fnmsubnepbf16256_mask3 (__A, __B, __C, __U);
+    __builtin_ia32_fnmsubbf16256_mask3 (__A, __B, __C, __U);
 }
 
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_fnmsubne_pbh (__mmask16 __U, __m256bh __A,
-			   __m256bh __B, __m256bh __C)
+_mm256_maskz_fnmsub_pbh (__mmask16 __U, __m256bh __A,
+			 __m256bh __B, __m256bh __C)
 {
   return (__m256bh)
-    __builtin_ia32_fnmsubnepbf16256_maskz (__A, __B, __C, __U);
+    __builtin_ia32_fnmsubbf16256_maskz (__A, __B, __C, __U);
 }
 
 extern __inline__ __m128bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_fnmsubne_pbh (__m128bh __A, __m128bh __B, __m128bh __C)
+_mm_fnmsub_pbh (__m128bh __A, __m128bh __B, __m128bh __C)
 {
   return (__m128bh)
-    __builtin_ia32_fnmsubnepbf16128_mask (__A, __B, __C, (__mmask8) -1);
+    __builtin_ia32_fnmsubbf16128_mask (__A, __B, __C, (__mmask8) -1);
 }
 
 extern __inline__ __m128bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_fnmsubne_pbh (__m128bh __A, __mmask8 __U,
-		       __m128bh __B, __m128bh __C)
+_mm_mask_fnmsub_pbh (__m128bh __A, __mmask8 __U,
+		     __m128bh __B, __m128bh __C)
 {
   return (__m128bh)
-    __builtin_ia32_fnmsubnepbf16128_mask (__A, __B, __C, __U);
+    __builtin_ia32_fnmsubbf16128_mask (__A, __B, __C, __U);
 }
 
 extern __inline__ __m128bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask3_fnmsubne_pbh (__m128bh __A, __m128bh __B,
-			__m128bh __C, __mmask8 __U)
+_mm_mask3_fnmsub_pbh (__m128bh __A, __m128bh __B,
+		      __m128bh __C, __mmask8 __U)
 {
   return (__m128bh)
-    __builtin_ia32_fnmsubnepbf16128_mask3 (__A, __B, __C, __U);
+    __builtin_ia32_fnmsubbf16128_mask3 (__A, __B, __C, __U);
 }
 
 extern __inline__ __m128bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_fnmsubne_pbh (__mmask8 __U, __m128bh __A,
-			__m128bh __B, __m128bh __C)
+_mm_maskz_fnmsub_pbh (__mmask8 __U, __m128bh __A,
+		      __m128bh __B, __m128bh __C)
 {
   return (__m128bh)
-    __builtin_ia32_fnmsubnepbf16128_maskz (__A, __B, __C, __U);
+    __builtin_ia32_fnmsubbf16128_maskz (__A, __B, __C, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rsqrt_pbh (__m512bh __A)
+{
+  return (__m512bh)
+    __builtin_ia32_rsqrtbf16512_mask (__A,
+				      (__v32bf) _mm512_setzero_si512 (),
+				      (__mmask32) -1);
+
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rsqrt_pbh (__m512bh __W, __mmask32 __U, __m512bh __A)
+{
+  return (__m512bh)
+    __builtin_ia32_rsqrtbf16512_mask (__A,  __W,  __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rsqrt_pbh (__mmask32 __U, __m512bh __A)
+{
+  return (__m512bh)
+    __builtin_ia32_rsqrtbf16512_mask (__A,
+				      (__v32bf) _mm512_setzero_si512 (),
+				      __U);
 }
 
 extern __inline__ __m256bh
@@ -682,9 +1033,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_rsqrt_pbh (__m256bh __A)
 {
   return (__m256bh)
-    __builtin_ia32_rsqrtpbf16256_mask (__A,
-				       (__v16bf) _mm256_setzero_si256 (),
-				       (__mmask16) -1);
+    __builtin_ia32_rsqrtbf16256_mask (__A,
+				      (__v16bf) _mm256_setzero_si256 (),
+				      (__mmask16) -1);
 }
 
 extern __inline__ __m256bh
@@ -692,7 +1043,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_rsqrt_pbh (__m256bh __W, __mmask16 __U, __m256bh __A)
 {
   return (__m256bh)
-    __builtin_ia32_rsqrtpbf16256_mask (__A, __W, __U);
+    __builtin_ia32_rsqrtbf16256_mask (__A, __W, __U);
 }
 
 extern __inline__ __m256bh
@@ -700,9 +1051,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_rsqrt_pbh (__mmask16 __U, __m256bh __A)
 {
   return (__m256bh)
-    __builtin_ia32_rsqrtpbf16256_mask (__A,
-				       (__v16bf) _mm256_setzero_si256 (),
-				       __U);
+    __builtin_ia32_rsqrtbf16256_mask (__A,
+				      (__v16bf) _mm256_setzero_si256 (),
+				      __U);
 }
 
 extern __inline__ __m128bh
@@ -710,9 +1061,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_rsqrt_pbh (__m128bh __A)
 {
   return (__m128bh)
-	__builtin_ia32_rsqrtpbf16128_mask (__A,
-				       	   (__v8bf) _mm_setzero_si128 (),
-					   (__mmask8) -1);
+	__builtin_ia32_rsqrtbf16128_mask (__A,
+					  (__v8bf) _mm_setzero_si128 (),
+					  (__mmask8) -1);
 }
 
 extern __inline__ __m128bh
@@ -720,7 +1071,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_rsqrt_pbh (__m128bh __W, __mmask8 __U, __m128bh __A)
 {
   return (__m128bh)
-    __builtin_ia32_rsqrtpbf16128_mask (__A, __W, __U);
+    __builtin_ia32_rsqrtbf16128_mask (__A, __W, __U);
 }
 
 extern __inline__ __m128bh
@@ -728,65 +1079,121 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_rsqrt_pbh (__mmask8 __U, __m128bh __A)
 {
   return (__m128bh)
-    __builtin_ia32_rsqrtpbf16128_mask (__A,
-				       (__v8bf) _mm_setzero_si128 (),
-				       __U);
+    __builtin_ia32_rsqrtbf16128_mask (__A,
+				      (__v8bf) _mm_setzero_si128 (),
+				      __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sqrt_pbh (__m512bh __A)
+{
+  return (__m512bh)
+    __builtin_ia32_sqrtbf16512_mask (__A,
+				     (__v32bf) _mm512_setzero_si512 (),
+				     (__mmask32) -1);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_sqrt_pbh (__m512bh __W, __mmask32 __U, __m512bh __A)
+{
+  return (__m512bh)
+    __builtin_ia32_sqrtbf16512_mask (__A,  __W,  __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_sqrt_pbh (__mmask32 __U, __m512bh __A)
+{
+  return (__m512bh)
+    __builtin_ia32_sqrtbf16512_mask (__A,
+				     (__v32bf) _mm512_setzero_si512 (),
+				     __U);
 }
 
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_sqrtne_pbh (__m256bh __A)
+_mm256_sqrt_pbh (__m256bh __A)
 {
   return (__m256bh)
-    __builtin_ia32_sqrtnepbf16256_mask (__A,
-				       	(__v16bf) _mm256_setzero_si256 (),
-					(__mmask16) -1);
+    __builtin_ia32_sqrtbf16256_mask (__A,
+				     (__v16bf) _mm256_setzero_si256 (),
+				     (__mmask16) -1);
 }
 
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_sqrtne_pbh (__m256bh __W, __mmask16 __U, __m256bh __A)
+_mm256_mask_sqrt_pbh (__m256bh __W, __mmask16 __U, __m256bh __A)
 {
   return (__m256bh)
-    __builtin_ia32_sqrtnepbf16256_mask (__A, __W, __U);
+    __builtin_ia32_sqrtbf16256_mask (__A, __W, __U);
 }
 
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_sqrtne_pbh (__mmask16 __U, __m256bh __A)
+_mm256_maskz_sqrt_pbh (__mmask16 __U, __m256bh __A)
 {
   return (__m256bh)
-    __builtin_ia32_sqrtnepbf16256_mask (__A,
-					(__v16bf) _mm256_setzero_si256 (),
-					__U);
+    __builtin_ia32_sqrtbf16256_mask (__A,
+				     (__v16bf) _mm256_setzero_si256 (),
+				     __U);
 }
 
 extern __inline__ __m128bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_sqrtne_pbh (__m128bh __A)
+_mm_sqrt_pbh (__m128bh __A)
 {
   return (__m128bh)
-    __builtin_ia32_sqrtnepbf16128_mask (__A,
-				       	(__v8bf) _mm_setzero_si128 (),
-					(__mmask8) -1);
+    __builtin_ia32_sqrtbf16128_mask (__A,
+				     (__v8bf) _mm_setzero_si128 (),
+				     (__mmask8) -1);
 }
 
 extern __inline__ __m128bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_sqrtne_pbh (__m128bh __W, __mmask8 __U, __m128bh __A)
+_mm_mask_sqrt_pbh (__m128bh __W, __mmask8 __U, __m128bh __A)
 {
   return (__m128bh)
-    __builtin_ia32_sqrtnepbf16128_mask (__A, __W, __U);
+    __builtin_ia32_sqrtbf16128_mask (__A, __W, __U);
 }
 
 extern __inline__ __m128bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_sqrtne_pbh (__mmask8 __U, __m128bh __A)
+_mm_maskz_sqrt_pbh (__mmask8 __U, __m128bh __A)
 {
   return (__m128bh)
-    __builtin_ia32_sqrtnepbf16128_mask (__A,
-					(__v8bf) _mm_setzero_si128 (),
-					__U);
+    __builtin_ia32_sqrtbf16128_mask (__A,
+				     (__v8bf) _mm_setzero_si128 (),
+				     __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_rcp_pbh (__m512bh __A)
+{
+  return (__m512bh)
+    __builtin_ia32_rcpbf16512_mask (__A,
+				    (__v32bf) _mm512_setzero_si512 (),
+				    (__mmask32) -1);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_rcp_pbh (__m512bh __W, __mmask32 __U, __m512bh __A)
+{
+  return (__m512bh)
+    __builtin_ia32_rcpbf16512_mask (__A,  __W,  __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_rcp_pbh (__mmask32 __U, __m512bh __A)
+{
+  return (__m512bh)
+    __builtin_ia32_rcpbf16512_mask (__A,
+				    (__v32bf) _mm512_setzero_si512 (),
+				    __U);
 }
 
 extern __inline__ __m256bh
@@ -794,9 +1201,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_rcp_pbh (__m256bh __A)
 {
   return (__m256bh)
-    __builtin_ia32_rcppbf16256_mask (__A,
-				     (__v16bf) _mm256_setzero_si256 (),
-				     (__mmask16) -1);
+    __builtin_ia32_rcpbf16256_mask (__A,
+				    (__v16bf) _mm256_setzero_si256 (),
+				    (__mmask16) -1);
 }
 
 extern __inline__ __m256bh
@@ -804,7 +1211,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_rcp_pbh (__m256bh __W, __mmask16 __U, __m256bh __A)
 {
   return (__m256bh)
-    __builtin_ia32_rcppbf16256_mask (__A, __W, __U);
+    __builtin_ia32_rcpbf16256_mask (__A, __W, __U);
 }
 
 extern __inline__ __m256bh
@@ -812,9 +1219,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_rcp_pbh (__mmask16 __U, __m256bh __A)
 {
   return (__m256bh)
-    __builtin_ia32_rcppbf16256_mask (__A,
-				     (__v16bf) _mm256_setzero_si256 (),
-				     __U);
+    __builtin_ia32_rcpbf16256_mask (__A,
+				    (__v16bf) _mm256_setzero_si256 (),
+				    __U);
 }
 
 extern __inline__ __m128bh
@@ -822,9 +1229,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_rcp_pbh (__m128bh __A)
 {
   return (__m128bh)
-    __builtin_ia32_rcppbf16128_mask (__A,
-				     (__v8bf) _mm_setzero_si128 (),
-				     (__mmask8) -1);
+    __builtin_ia32_rcpbf16128_mask (__A,
+				    (__v8bf) _mm_setzero_si128 (),
+				    (__mmask8) -1);
 }
 
 extern __inline__ __m128bh
@@ -832,7 +1239,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_rcp_pbh (__m128bh __W, __mmask8 __U, __m128bh __A)
 {
   return (__m128bh)
-    __builtin_ia32_rcppbf16128_mask (__A, __W, __U);
+    __builtin_ia32_rcpbf16128_mask (__A, __W, __U);
 }
 
 extern __inline__ __m128bh
@@ -840,9 +1247,36 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_rcp_pbh (__mmask8 __U, __m128bh __A)
 {
   return (__m128bh)
-    __builtin_ia32_rcppbf16128_mask (__A,
-				     (__v8bf) _mm_setzero_si128 (),
-				     __U);
+    __builtin_ia32_rcpbf16128_mask (__A,
+				    (__v8bf) _mm_setzero_si128 (),
+				    __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_getexp_pbh (__m512bh __A)
+{
+  return (__m512bh)
+    __builtin_ia32_getexpbf16512_mask (__A,
+				       (__v32bf) _mm512_setzero_si512 (),
+				       (__mmask32) -1);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_getexp_pbh (__m512bh __W, __mmask32 __U, __m512bh __A)
+{
+  return (__m512bh) __builtin_ia32_getexpbf16512_mask (__A,  __W,  __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_getexp_pbh (__mmask32 __U, __m512bh __A)
+{
+  return (__m512bh)
+    __builtin_ia32_getexpbf16512_mask (__A,
+				       (__v32bf) _mm512_setzero_si512 (),
+				       __U);
 }
 
 extern __inline__ __m256bh
@@ -850,9 +1284,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_getexp_pbh (__m256bh __A)
 {
   return (__m256bh)
-    __builtin_ia32_getexppbf16256_mask (__A,
-					(__v16bf) _mm256_setzero_si256 (),
-					(__mmask16) -1);
+    __builtin_ia32_getexpbf16256_mask (__A,
+				       (__v16bf) _mm256_setzero_si256 (),
+				       (__mmask16) -1);
 }
 
 extern __inline__ __m256bh
@@ -860,7 +1294,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_getexp_pbh (__m256bh __W, __mmask16 __U, __m256bh __A)
 {
   return (__m256bh)
-    __builtin_ia32_getexppbf16256_mask (__A, __W, __U);
+    __builtin_ia32_getexpbf16256_mask (__A, __W, __U);
 }
 
 extern __inline__ __m256bh
@@ -868,9 +1302,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_getexp_pbh (__mmask16 __U, __m256bh __A)
 {
   return (__m256bh)
-    __builtin_ia32_getexppbf16256_mask (__A,
-					(__v16bf) _mm256_setzero_si256 (),
-					__U);
+    __builtin_ia32_getexpbf16256_mask (__A,
+				       (__v16bf) _mm256_setzero_si256 (),
+				       __U);
 }
 
 extern __inline__ __m128bh
@@ -878,9 +1312,9 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_getexp_pbh (__m128bh __A)
 {
   return (__m128bh)
-    __builtin_ia32_getexppbf16128_mask (__A,
-				       	(__v8bf) _mm_setzero_si128 (),
-					(__mmask8) -1);
+    __builtin_ia32_getexpbf16128_mask (__A,
+				       (__v8bf) _mm_setzero_si128 (),
+				       (__mmask8) -1);
 }
 
 extern __inline__ __m128bh
@@ -888,7 +1322,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_getexp_pbh (__m128bh __W, __mmask8 __U, __m128bh __A)
 {
   return (__m128bh)
-    __builtin_ia32_getexppbf16128_mask (__A, __W, __U);
+    __builtin_ia32_getexpbf16128_mask (__A, __W, __U);
 }
 
 extern __inline__ __m128bh
@@ -896,201 +1330,318 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_maskz_getexp_pbh (__mmask8 __U, __m128bh __A)
 {
   return (__m128bh)
-    __builtin_ia32_getexppbf16128_mask (__A,
-					(__v8bf) _mm_setzero_si128 (),
-					__U);
+    __builtin_ia32_getexpbf16128_mask (__A,
+				       (__v8bf) _mm_setzero_si128 (),
+				       __U);
 }
 
-/* Intrinsics vrndscalepbf16.  */
+/* Intrinsics vrndscalebf16.  */
 #ifdef __OPTIMIZE__
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_roundscale_pbh (__m512bh __A, int B)
+{
+  return (__m512bh)
+    __builtin_ia32_rndscalebf16512_mask (__A, B,
+					 (__v32bf) _mm512_setzero_si512 (),
+					 (__mmask32) -1);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_roundscale_pbh (__m512bh __W, __mmask32 __U, __m512bh __A, int B)
+{
+  return (__m512bh)
+    __builtin_ia32_rndscalebf16512_mask (__A, B, __W,  __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_roundscale_pbh (__mmask32 __U, __m512bh __A, int B)
+{
+  return (__m512bh)
+    __builtin_ia32_rndscalebf16512_mask (__A, B,
+					 (__v32bf) _mm512_setzero_si512 (),
+					 __U);
+}
+
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_roundscalene_pbh (__m256bh __A, int B)
+_mm256_roundscale_pbh (__m256bh __A, int B)
 {
   return (__m256bh)
-    __builtin_ia32_rndscalenepbf16256_mask (__A, B,
-					    (__v16bf) _mm256_setzero_si256 (),
-					    (__mmask16) -1);
+    __builtin_ia32_rndscalebf16256_mask (__A, B,
+					 (__v16bf) _mm256_setzero_si256 (),
+					 (__mmask16) -1);
 }
 
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_roundscalene_pbh (__m256bh __W, __mmask16 __U,
-			      __m256bh __A, int B)
+_mm256_mask_roundscale_pbh (__m256bh __W, __mmask16 __U,
+			    __m256bh __A, int B)
 {
   return (__m256bh)
-    __builtin_ia32_rndscalenepbf16256_mask (__A, B, __W, __U);
+    __builtin_ia32_rndscalebf16256_mask (__A, B, __W, __U);
 }
 
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_roundscalene_pbh (__mmask16 __U, __m256bh __A, int B)
+_mm256_maskz_roundscale_pbh (__mmask16 __U, __m256bh __A, int B)
 {
   return (__m256bh)
-    __builtin_ia32_rndscalenepbf16256_mask (__A, B,
-					    (__v16bf) _mm256_setzero_si256 (),
-					    __U);
+    __builtin_ia32_rndscalebf16256_mask (__A, B,
+					 (__v16bf) _mm256_setzero_si256 (),
+					 __U);
 }
 
 extern __inline__ __m128bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_roundscalene_pbh (__m128bh __A, int B)
+_mm_roundscale_pbh (__m128bh __A, int B)
 {
   return (__m128bh)
-    __builtin_ia32_rndscalenepbf16128_mask (__A, B,
-					    (__v8bf) _mm_setzero_si128 (),
-					    (__mmask8) -1);
+    __builtin_ia32_rndscalebf16128_mask (__A, B,
+					 (__v8bf) _mm_setzero_si128 (),
+					 (__mmask8) -1);
 }
 
 extern __inline__ __m128bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_roundscalene_pbh (__m128bh __W, __mmask8 __U,
-			   __m128bh __A, int B)
+_mm_mask_roundscale_pbh (__m128bh __W, __mmask8 __U,
+			 __m128bh __A, int B)
 {
   return (__m128bh)
-    __builtin_ia32_rndscalenepbf16128_mask (__A, B, __W, __U);
+    __builtin_ia32_rndscalebf16128_mask (__A, B, __W, __U);
 }
 
 extern __inline__ __m128bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_roundscalene_pbh (__mmask8 __U, __m128bh __A, int B)
+_mm_maskz_roundscale_pbh (__mmask8 __U, __m128bh __A, int B)
 {
   return (__m128bh)
-    __builtin_ia32_rndscalenepbf16128_mask (__A, B,
-					    (__v8bf) _mm_setzero_si128 (),
-					    __U);
+    __builtin_ia32_rndscalebf16128_mask (__A, B,
+					 (__v8bf) _mm_setzero_si128 (),
+					 __U);
 }
 
 #else
-#define _mm256_roundscalene_pbh(A, B)					      \
-  (__builtin_ia32_rndscalenepbf16256_mask ((A), (B),			      \
-					   (__v16bf) _mm256_setzero_si256 (), \
-					   (__mmask16) -1))
+#define _mm512_roundscale_pbh(A, B)					      \
+  (__builtin_ia32_rndscalebf16512_mask ((A), (B),			      \
+					(__v32bf) _mm512_setzero_si512 (),    \
+					(__mmask32) -1))
+
+#define _mm512_mask_roundscale_pbh(A, B, C, D)	    		      \
+  (__builtin_ia32_rndscalebf16512_mask ((C), (D), (A), (B)))
+
+#define _mm512_maskz_roundscale_pbh(A, B, C)				      \
+  (__builtin_ia32_rndscalebf16512_mask ((B), (C),			      \
+					(__v32bf) _mm512_setzero_si512 (),    \
+					(A)))
+
+#define _mm256_roundscale_pbh(A, B)					      \
+  (__builtin_ia32_rndscalebf16256_mask ((A), (B),			      \
+					(__v16bf) _mm256_setzero_si256 (),    \
+					(__mmask16) -1))
+
+#define _mm256_mask_roundscale_pbh(A, B, C, D)	    		      \
+  (__builtin_ia32_rndscalebf16256_mask ((C), (D), (A), (B)))
+
+#define _mm256_maskz_roundscale_pbh(A, B, C)				      \
+  (__builtin_ia32_rndscalebf16256_mask ((B), (C),			      \
+					(__v16bf) _mm256_setzero_si256 (),    \
+					(A)))
+
+#define _mm_roundscale_pbh(A, B)					      \
+  (__builtin_ia32_rndscalebf16128_mask ((A), (B),			      \
+					(__v8bf) _mm_setzero_si128 (),	      \
+					(__mmask8) -1))
 
-#define _mm256_mask_roundscalene_pbh(A, B, C, D)	    		      \
-  (__builtin_ia32_rndscalenepbf16256_mask ((C), (D), (A), (B)))
+#define _mm_mask_roundscale_pbh(A, B, C, D)				      \
+  (__builtin_ia32_rndscalebf16128_mask ((C), (D), (A), (B)))
 
-#define _mm256_maskz_roundscalene_pbh(A, B, C)				      \
-  (__builtin_ia32_rndscalenepbf16256_mask ((B), (C),			      \
-					   (__v16bf) _mm256_setzero_si256 (), \
-					   (A)))
+#define _mm_maskz_roundscale_pbh(A, B, C)				      \
+  (__builtin_ia32_rndscalebf16128_mask ((B), (C),			      \
+					(__v8bf) _mm_setzero_si128 (),	      \
+					(A)))
 
-#define _mm_roundscalene_pbh(A, B)					      \
-  (__builtin_ia32_rndscalenepbf16128_mask ((A), (B),			      \
-					   (__v8bf) _mm_setzero_si128 (),     \
-					   (__mmask8) -1))
+#endif /* __OPTIMIZE__ */
 
-#define _mm_mask_roundscalene_pbh(A, B, C, D)				      \
-  (__builtin_ia32_rndscalenepbf16128_mask ((C), (D), (A), (B)))
+/* Intrinsics vreducebf16.  */
+#ifdef __OPTIMIZE__
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_reduce_pbh (__m512bh __A, int B)
+{
+  return (__m512bh)
+    __builtin_ia32_reducebf16512_mask (__A, B,
+				       (__v32bf) _mm512_setzero_si512 (),
+				       (__mmask32) -1);
+}
 
-#define _mm_maskz_roundscalene_pbh(A, B, C)				      \
-  (__builtin_ia32_rndscalenepbf16128_mask ((B), (C),			      \
-					   (__v8bf) _mm_setzero_si128 (),     \
-					   (A)))
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_reduce_pbh (__m512bh __W, __mmask32 __U,
+			__m512bh __A, int B)
+{
+  return (__m512bh)
+    __builtin_ia32_reducebf16512_mask (__A, B, __W,  __U);
+}
 
-#endif /* __OPTIMIZE__ */
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_reduce_pbh (__mmask32 __U, __m512bh __A, int B)
+{
+  return (__m512bh)
+    __builtin_ia32_reducebf16512_mask (__A, B,
+					  (__v32bf) _mm512_setzero_si512 (),
+					  __U);
+}
 
-/* Intrinsics vreducepbf16.  */
-#ifdef __OPTIMIZE__
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_reducene_pbh (__m256bh __A, int B)
+_mm256_reduce_pbh (__m256bh __A, int B)
 {
   return (__m256bh)
-    __builtin_ia32_reducenepbf16256_mask (__A, B,
-					  (__v16bf) _mm256_setzero_si256 (),
-					  (__mmask16) -1);
+    __builtin_ia32_reducebf16256_mask (__A, B,
+				       (__v16bf) _mm256_setzero_si256 (),
+				       (__mmask16) -1);
 }
 
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_reducene_pbh (__m256bh __W, __mmask16 __U,
-			  __m256bh __A, int B)
+_mm256_mask_reduce_pbh (__m256bh __W, __mmask16 __U,
+			__m256bh __A, int B)
 {
   return (__m256bh)
-    __builtin_ia32_reducenepbf16256_mask (__A, B, __W, __U);
+    __builtin_ia32_reducebf16256_mask (__A, B, __W, __U);
 }
 
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_reducene_pbh (__mmask16 __U, __m256bh __A, int B)
+_mm256_maskz_reduce_pbh (__mmask16 __U, __m256bh __A, int B)
 {
   return (__m256bh)
-    __builtin_ia32_reducenepbf16256_mask (__A, B,
-					  (__v16bf) _mm256_setzero_si256 (),
-					  __U);
+    __builtin_ia32_reducebf16256_mask (__A, B,
+				       (__v16bf) _mm256_setzero_si256 (),
+				       __U);
 }
 
 extern __inline__ __m128bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_reducene_pbh (__m128bh __A, int B)
+_mm_reduce_pbh (__m128bh __A, int B)
 {
   return (__m128bh)
-    __builtin_ia32_reducenepbf16128_mask (__A, B,
-					  (__v8bf) _mm_setzero_si128 (),
-					  (__mmask8) -1);
+    __builtin_ia32_reducebf16128_mask (__A, B,
+				       (__v8bf) _mm_setzero_si128 (),
+				       (__mmask8) -1);
 }
 
 extern __inline__ __m128bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_reducene_pbh (__m128bh __W, __mmask8 __U,
-		       __m128bh __A, int B)
+_mm_mask_reduce_pbh (__m128bh __W, __mmask8 __U,
+		     __m128bh __A, int B)
 {
   return (__m128bh)
-    __builtin_ia32_reducenepbf16128_mask (__A, B, __W, __U);
+    __builtin_ia32_reducebf16128_mask (__A, B, __W, __U);
 }
 
 extern __inline__ __m128bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_reducene_pbh (__mmask8 __U, __m128bh __A, int B)
+_mm_maskz_reduce_pbh (__mmask8 __U, __m128bh __A, int B)
 {
   return (__m128bh)
-    __builtin_ia32_reducenepbf16128_mask (__A, B,
-					  (__v8bf) _mm_setzero_si128 (),
-					  __U);
+    __builtin_ia32_reducebf16128_mask (__A, B,
+				       (__v8bf) _mm_setzero_si128 (),
+				       __U);
 }
 
 #else
-#define _mm256_reducene_pbh(A, B)					      \
-  (__builtin_ia32_reducenepbf16256_mask ((A), (B),			      \
-					 (__v16bf) _mm256_setzero_si256 (),   \
-					 (__mmask16) -1))
+#define _mm512_reduce_pbh(A, B)					      \
+  (__builtin_ia32_reducebf16512_mask ((A), (B),			      \
+				      (__v32bf) _mm512_setzero_si512 (),   \
+				      (__mmask32) -1))
+
+#define _mm512_mask_reduce_pbh(A, B, C, D)				      \
+  (__builtin_ia32_reducebf16512_mask ((C), (D), (A), (B)))
 
-#define _mm256_mask_reducene_pbh(A, B, C, D)				      \
-  (__builtin_ia32_reducenepbf16256_mask ((C), (D), (A), (B)))
+#define _mm512_maskz_reduce_pbh(A, B, C)				      \
+  (__builtin_ia32_reducebf16512_mask ((B), (C),			      \
+				      (__v32bf) _mm512_setzero_si512 (),      \
+				      (A)))
 
-#define _mm256_maskz_reducene_pbh(A, B, C)				      \
-  (__builtin_ia32_reducenepbf16256_mask ((B), (C),			      \
-					 (__v16bf) _mm256_setzero_si256 (),   \
-					 (A)))
+#define _mm256_reduce_pbh(A, B)					      \
+  (__builtin_ia32_reducebf16256_mask ((A), (B),			      \
+				      (__v16bf) _mm256_setzero_si256 (),      \
+				      (__mmask16) -1))
 
-#define _mm_reducene_pbh(A, B)						      \
-  (__builtin_ia32_reducenepbf16128_mask ((A), (B),			      \
-					 (__v8bf) _mm_setzero_si128 (),       \
-					 (__mmask8) -1))
+#define _mm256_mask_reduce_pbh(A, B, C, D)				      \
+  (__builtin_ia32_reducebf16256_mask ((C), (D), (A), (B)))
 
-#define _mm_mask_reducene_pbh(A, B, C, D)				      \
-  (__builtin_ia32_reducenepbf16128_mask ((C), (D), (A), (B)))
+#define _mm256_maskz_reduce_pbh(A, B, C)				      \
+  (__builtin_ia32_reducebf16256_mask ((B), (C),			      \
+				      (__v16bf) _mm256_setzero_si256 (),      \
+				      (A)))
 
-#define _mm_maskz_reducene_pbh(A, B, C)					      \
-  (__builtin_ia32_reducenepbf16128_mask ((B), (C),			      \
-					 (__v8bf) _mm_setzero_si128 (),       \
-					 (A)))
+#define _mm_reduce_pbh(A, B)						      \
+  (__builtin_ia32_reducebf16128_mask ((A), (B),			      \
+				      (__v8bf) _mm_setzero_si128 (),	      \
+				      (__mmask8) -1))
+
+#define _mm_mask_reduce_pbh(A, B, C, D)				      \
+  (__builtin_ia32_reducebf16128_mask ((C), (D), (A), (B)))
+
+#define _mm_maskz_reduce_pbh(A, B, C)					      \
+  (__builtin_ia32_reducebf16128_mask ((B), (C),			      \
+				      (__v8bf) _mm_setzero_si128 (),	      \
+				      (A)))
 
 #endif /* __OPTIMIZE__ */
 
 
-/* Intrinsics vgetmantpbf16.  */
+/* Intrinsics vgetmantbf16.  */
 #ifdef __OPTIMIZE__
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_getmant_pbh (__m512bh __A, _MM_MANTISSA_NORM_ENUM __B,
+		    _MM_MANTISSA_SIGN_ENUM __C)
+{
+  return (__m512bh)
+    __builtin_ia32_getmantbf16512_mask (__A, (int) (__C << 2) | __B,
+					(__v32bf) _mm512_setzero_si512 (),
+					(__mmask32) -1);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_getmant_pbh (__m512bh __W, __mmask32 __U, __m512bh __A,
+			 _MM_MANTISSA_NORM_ENUM __B,
+			 _MM_MANTISSA_SIGN_ENUM __C)
+{
+  return (__m512bh)
+    __builtin_ia32_getmantbf16512_mask (__A, (int) (__C << 2) | __B,
+					__W, __U);
+}
+
+extern __inline__ __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_getmant_pbh (__mmask32 __U, __m512bh __A,
+			  _MM_MANTISSA_NORM_ENUM __B,
+			  _MM_MANTISSA_SIGN_ENUM __C)
+{
+  return (__m512bh)
+    __builtin_ia32_getmantbf16512_mask (__A, (int) (__C << 2) | __B,
+					(__v32bf) _mm512_setzero_si512 (),
+					__U);
+}
+
 extern __inline__ __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_getmant_pbh (__m256bh __A, _MM_MANTISSA_NORM_ENUM __B,
 		    _MM_MANTISSA_SIGN_ENUM __C)
 {
   return (__m256bh)
-    __builtin_ia32_getmantpbf16256_mask (__A, (int) (__C << 2) | __B,
-					 (__v16bf) _mm256_setzero_si256 (),
-					 (__mmask16) -1);
+    __builtin_ia32_getmantbf16256_mask (__A, (int) (__C << 2) | __B,
+					(__v16bf) _mm256_setzero_si256 (),
+					(__mmask16) -1);
 }
 
 extern __inline__ __m256bh
@@ -1100,8 +1651,8 @@ _mm256_mask_getmant_pbh (__m256bh __W, __mmask16 __U, __m256bh __A,
 			 _MM_MANTISSA_SIGN_ENUM __C)
 {
   return (__m256bh)
-    __builtin_ia32_getmantpbf16256_mask (__A, (int) (__C << 2) | __B,
-					 __W, __U);
+    __builtin_ia32_getmantbf16256_mask (__A, (int) (__C << 2) | __B,
+					__W, __U);
 }
 
 extern __inline__ __m256bh
@@ -1111,9 +1662,9 @@ _mm256_maskz_getmant_pbh (__mmask16 __U, __m256bh __A,
 			  _MM_MANTISSA_SIGN_ENUM __C)
 {
   return (__m256bh)
-    __builtin_ia32_getmantpbf16256_mask (__A, (int) (__C << 2) | __B,
-					 (__v16bf) _mm256_setzero_si256 (),
-					 __U);
+    __builtin_ia32_getmantbf16256_mask (__A, (int) (__C << 2) | __B,
+					(__v16bf) _mm256_setzero_si256 (),
+					__U);
 }
 
 extern __inline__ __m128bh
@@ -1122,9 +1673,9 @@ _mm_getmant_pbh (__m128bh __A, _MM_MANTISSA_NORM_ENUM __B,
 		 _MM_MANTISSA_SIGN_ENUM __C)
 {
   return (__m128bh)
-    __builtin_ia32_getmantpbf16128_mask (__A, (int) (__C << 2) | __B,
-					 (__v8bf) _mm_setzero_si128 (),
-					 (__mmask8) -1);
+    __builtin_ia32_getmantbf16128_mask (__A, (int) (__C << 2) | __B,
+					(__v8bf) _mm_setzero_si128 (),
+					(__mmask8) -1);
 }
 
 extern __inline__ __m128bh
@@ -1134,8 +1685,8 @@ _mm_mask_getmant_pbh (__m128bh __W, __mmask8 __U, __m128bh __A,
 		      _MM_MANTISSA_SIGN_ENUM __C)
 {
   return (__m128bh)
-    __builtin_ia32_getmantpbf16128_mask (__A, (int) (__C << 2) | __B,
-					 __W, __U);
+    __builtin_ia32_getmantbf16128_mask (__A, (int) (__C << 2) | __B,
+					__W, __U);
 }
 
 extern __inline__ __m128bh
@@ -1145,48 +1696,79 @@ _mm_maskz_getmant_pbh (__mmask8 __U, __m128bh __A,
 		       _MM_MANTISSA_SIGN_ENUM __C)
 {
   return (__m128bh)
-    __builtin_ia32_getmantpbf16128_mask (__A, (int) (__C << 2) | __B,
-					 (__v8bf) _mm_setzero_si128 (),
-					 __U);
+    __builtin_ia32_getmantbf16128_mask (__A, (int) (__C << 2) | __B,
+					(__v8bf) _mm_setzero_si128 (),
+					__U);
 }
 
 #else
+#define _mm512_getmant_pbh(A, B, C)					      \
+  (__builtin_ia32_getmantbf16512_mask ((A), (int)(((C)<<2) | (B)),	      \
+				       (__v32bf) _mm512_setzero_si512 (),     \
+				       (__mmask32) -1))
+
+#define _mm512_mask_getmant_pbh(A, B, C, D, E)				      \
+  (__builtin_ia32_getmantbf16512_mask ((C), (int)(((D)<<2) | (E)), (A), (B)))
+
+#define _mm512_maskz_getmant_pbh(A, B, C, D)				      \
+  (__builtin_ia32_getmantbf16512_mask ((B), (int)(((C)<<2) | (D)),	      \
+				       (__v32bf) _mm512_setzero_si512 (),     \
+					  (A)))
+
 #define _mm256_getmant_pbh(A, B, C)					      \
-  (__builtin_ia32_getmantpbf16256_mask ((A), (int)(((C)<<2) | (B)),	      \
-					   (__v16bf) _mm256_setzero_si256 (), \
-					   (__mmask16) (-1)))
+  (__builtin_ia32_getmantbf16256_mask ((A), (int)(((C)<<2) | (B)),	      \
+				       (__v16bf) _mm256_setzero_si256 (),     \
+				       (__mmask16) (-1)))
 
 #define _mm256_mask_getmant_pbh(A, B, C, D, E)				      \
-  (__builtin_ia32_getmantpbf16256_mask ((C), (int)(((D)<<2) | (E)), (A), (B)))
+  (__builtin_ia32_getmantbf16256_mask ((C), (int)(((D)<<2) | (E)), (A), (B)))
 
 #define _mm256_maskz_getmant_pbh(A, B, C, D)				      \
-  (__builtin_ia32_getmantpbf16256_mask ((B), (int)(((C)<<2) | (D)),	      \
-					   (__v16bf) _mm256_setzero_si256 (), \
-					   (A)))
+  (__builtin_ia32_getmantbf16256_mask ((B), (int)(((C)<<2) | (D)),	      \
+				       (__v16bf) _mm256_setzero_si256 (),     \
+				       (A)))
 
 #define _mm_getmant_pbh(A, B, C)					      \
-  (__builtin_ia32_getmantpbf16128_mask ((A), (int)(((C)<<2) | (B)),	      \
-					(__v8bf) _mm_setzero_si128 (),	      \
-					(__mmask8) (-1)))
+  (__builtin_ia32_getmantbf16128_mask ((A), (int)(((C)<<2) | (B)),	      \
+				       (__v8bf) _mm_setzero_si128 (),	      \
+				       (__mmask8) (-1)))
 
 #define _mm_mask_getmant_pbh(A, B, C, D, E)				      \
-  (__builtin_ia32_getmantpbf16128_mask ((C), (int)(((D)<<2) | (E)), (A), (B)))
+  (__builtin_ia32_getmantbf16128_mask ((C), (int)(((D)<<2) | (E)), (A), (B)))
 
 #define _mm_maskz_getmant_pbh(A, B, C, D)				      \
-  (__builtin_ia32_getmantpbf16128_mask ((B), (int)(((C)<<2) | (D)),	      \
-					(__v8bf) _mm_setzero_si128 (), (A)))
+  (__builtin_ia32_getmantbf16128_mask ((B), (int)(((C)<<2) | (D)),	      \
+				       (__v8bf) _mm_setzero_si128 (), (A)))
 
 #endif /* __OPTIMIZE__ */
 
-/* Intrinsics vfpclasspbf16.  */
+/* Intrinsics vfpclassbf16.  */
 #ifdef __OPTIMIZE__
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_fpclass_pbh_mask (__mmask32 __U, __m512bh __A,
+			      const int __imm)
+{
+  return (__mmask32)
+    __builtin_ia32_fpclassbf16512_mask (__A, __imm, __U);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_fpclass_pbh_mask (__m512bh __A, const int __imm)
+{
+  return (__mmask32)
+    __builtin_ia32_fpclassbf16512_mask (__A, __imm,
+					(__mmask32) -1);
+}
+
 extern __inline __mmask16
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_fpclass_pbh_mask (__mmask16 __U, __m256bh __A,
-				const int __imm)
+			      const int __imm)
 {
   return (__mmask16)
-    __builtin_ia32_fpclasspbf16256_mask (__A, __imm, __U);
+    __builtin_ia32_fpclassbf16256_mask (__A, __imm, __U);
 }
 
 extern __inline __mmask16
@@ -1194,7 +1776,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_fpclass_pbh_mask (__m256bh __A, const int __imm)
 {
   return (__mmask16)
-    __builtin_ia32_fpclasspbf16256_mask (__A, __imm, (__mmask16) -1);
+    __builtin_ia32_fpclassbf16256_mask (__A, __imm, (__mmask16) -1);
 }
 
 extern __inline __mmask8
@@ -1202,7 +1784,7 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_fpclass_pbh_mask (__mmask8 __U, __m128bh __A, const int __imm)
 {
   return (__mmask8)
-    __builtin_ia32_fpclasspbf16128_mask (__A, __imm, __U);
+    __builtin_ia32_fpclassbf16128_mask (__A, __imm, __U);
 }
 
 extern __inline __mmask8
@@ -1210,36 +1792,62 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_fpclass_pbh_mask (__m128bh __A, const int __imm)
 {
   return (__mmask8)
-    __builtin_ia32_fpclasspbf16128_mask (__A, __imm, (__mmask8) -1);
+    __builtin_ia32_fpclassbf16128_mask (__A, __imm, (__mmask8) -1);
 }
 
 #else
+#define _mm512_mask_fpclass_pbh_mask(U, X, C)				   \
+  ((__mmask32) __builtin_ia32_fpclassbf16512_mask (			   \
+      (__v32bf) (__m512bh) (X), (int) (C), (__mmask32) (U)))
+
+#define _mm512_fpclass_pbh_mask(X, C)					   \
+  ((__mmask32) __builtin_ia32_fpclassbf16512_mask (			   \
+      (__v32bf) (__m512bh) (X), (int) (C), (__mmask32) (-1)))
+
 #define _mm256_mask_fpclass_pbh_mask(U, A, B)			      \
-  ((__mmask16) __builtin_ia32_fpclasspbf16256_mask ((A), (B), (U)))
+  ((__mmask16) __builtin_ia32_fpclassbf16256_mask ((A), (B), (U)))
 
 #define _mm256_fpclass_pbh_mask(A, B)				      \
-  ((__mmask16) __builtin_ia32_fpclasspbf16256_mask ((A), (B),	      \
-						    (__mmask16) (-1)))
+  ((__mmask16) __builtin_ia32_fpclassbf16256_mask ((A), (B),	      \
+						   (__mmask16) (-1)))
 
 #define _mm_mask_fpclass_pbh_mask(U, A, B)			      \
-  ((__mmask8) __builtin_ia32_fpclasspbf16128_mask ((A), (B), (U)))
+  ((__mmask8) __builtin_ia32_fpclassbf16128_mask ((A), (B), (U)))
 
 #define _mm_fpclass_pbh_mask(A, B)				      \
-  ((__mmask8) __builtin_ia32_fpclasspbf16128_mask ((A), (B),	      \
-						   (__mmask8) (-1)))
+  ((__mmask8) __builtin_ia32_fpclassbf16128_mask ((A), (B),	      \
+						  (__mmask8) (-1)))
 
 #endif /* __OPIMTIZE__ */
 
 
-/* Intrinsics vcmppbf16.  */
+/* Intrinsics vcmpbf16.  */
 #ifdef __OPTIMIZE__
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cmp_pbh_mask (__mmask32 __U, __m512bh __A, __m512bh __B,
+			  const int __imm)
+{
+  return (__mmask32)
+    __builtin_ia32_cmpbf16512_mask (__A, __B, __imm, __U);
+}
+
+extern __inline __mmask32
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cmp_pbh_mask (__m512bh __A, __m512bh __B, const int __imm)
+{
+  return (__mmask32)
+    __builtin_ia32_cmpbf16512_mask (__A, __B, __imm,
+				    (__mmask32) -1);
+}
+
 extern __inline __mmask16
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_cmp_pbh_mask (__mmask16 __U, __m256bh __A,
-			    __m256bh __B, const int __imm)
+			  __m256bh __B, const int __imm)
 {
   return (__mmask16)
-    __builtin_ia32_cmppbf16256_mask (__A, __B, __imm, __U);
+    __builtin_ia32_cmpbf16256_mask (__A, __B, __imm, __U);
 }
 
 extern __inline __mmask16
@@ -1247,16 +1855,16 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_cmp_pbh_mask (__m256bh __A, __m256bh __B, const int __imm)
 {
   return (__mmask16)
-    __builtin_ia32_cmppbf16256_mask (__A, __B, __imm, (__mmask16) -1);
+    __builtin_ia32_cmpbf16256_mask (__A, __B, __imm, (__mmask16) -1);
 }
 
 extern __inline __mmask8
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_cmp_pbh_mask (__mmask8 __U, __m128bh __A,
-			 __m128bh __B, const int __imm)
+		       __m128bh __B, const int __imm)
 {
   return (__mmask8)
-    __builtin_ia32_cmppbf16128_mask (__A, __B, __imm, __U);
+    __builtin_ia32_cmpbf16128_mask (__A, __B, __imm, __U);
 }
 
 extern __inline __mmask8
@@ -1264,72 +1872,78 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cmp_pbh_mask (__m128bh __A, __m128bh __B, const int __imm)
 {
   return (__mmask8)
-    __builtin_ia32_cmppbf16128_mask (__A, __B, __imm, (__mmask8) -1);
+    __builtin_ia32_cmpbf16128_mask (__A, __B, __imm, (__mmask8) -1);
 }
 
 #else
+#define _mm512_mask_cmp_pbh_mask(A, B, C, D)				\
+  ((__mmask32) __builtin_ia32_cmpbf16512_mask ((B), (C), (D), (A)))
+
+#define _mm512_cmp_pbh_mask(A, B, C)					\
+  ((__mmask32) __builtin_ia32_cmpbf16512_mask ((A), (B), (C), (-1)))
+
 #define _mm256_mask_cmp_pbh_mask(A, B, C, D)			      \
-  ((__mmask16) __builtin_ia32_cmppbf16256_mask ((B), (C), (D), (A)))
-		
+  ((__mmask16) __builtin_ia32_cmpbf16256_mask ((B), (C), (D), (A)))
+
 #define _mm256_cmp_pbh_mask(A, B, C)				      \
-  ((__mmask16) __builtin_ia32_cmppbf16256_mask ((A), (B), (C),	      \
-						(__mmask16) (-1)))
+  ((__mmask16) __builtin_ia32_cmpbf16256_mask ((A), (B), (C),	      \
+					       (__mmask16) (-1)))
 
 #define _mm_mask_cmp_pbh_mask(A, B, C, D)			      \
-  ((__mmask8) __builtin_ia32_cmppbf16128_mask ((B), (C), (D), (A)))
-		
+  ((__mmask8) __builtin_ia32_cmpbf16128_mask ((B), (C), (D), (A)))
+
 #define _mm_cmp_pbh_mask(A, B, C)				      \
-  ((__mmask8) __builtin_ia32_cmppbf16128_mask ((A), (B), (C),	      \
-					       (__mmask8) (-1)))
+  ((__mmask8) __builtin_ia32_cmpbf16128_mask ((A), (B), (C),	      \
+					      (__mmask8) (-1)))
 
 #endif /* __OPIMTIZE__ */
 
-/* Intrinsics vcomsbf16.  */
+/* Intrinsics vcomisbf16.  */
 extern __inline int
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comeq_sbh (__m128bh __A, __m128bh __B)
+_mm_comieq_sbh (__m128bh __A, __m128bh __B)
 {
-  return __builtin_ia32_vcomsbf16eq (__A, __B);
+  return __builtin_ia32_vcomisbf16eq (__A, __B);
 }
 
 extern __inline int
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comlt_sbh (__m128bh __A, __m128bh __B)
+_mm_comilt_sbh (__m128bh __A, __m128bh __B)
 {
-  return __builtin_ia32_vcomsbf16lt (__A, __B);
+  return __builtin_ia32_vcomisbf16lt (__A, __B);
 }
 
 extern __inline int
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comle_sbh (__m128bh __A, __m128bh __B)
+_mm_comile_sbh (__m128bh __A, __m128bh __B)
 {
-  return __builtin_ia32_vcomsbf16le (__A, __B);
+  return __builtin_ia32_vcomisbf16le (__A, __B);
 }
 
 extern __inline int
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comgt_sbh (__m128bh __A, __m128bh __B)
+_mm_comigt_sbh (__m128bh __A, __m128bh __B)
 {
-  return __builtin_ia32_vcomsbf16gt (__A, __B);
+  return __builtin_ia32_vcomisbf16gt (__A, __B);
 }
 
 extern __inline int
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comge_sbh (__m128bh __A, __m128bh __B)
+_mm_comige_sbh (__m128bh __A, __m128bh __B)
 {
-  return __builtin_ia32_vcomsbf16ge (__A, __B);
+  return __builtin_ia32_vcomisbf16ge (__A, __B);
 }
 
 extern __inline int
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_comneq_sbh (__m128bh __A, __m128bh __B)
+_mm_comineq_sbh (__m128bh __A, __m128bh __B)
 {
-  return __builtin_ia32_vcomsbf16neq (__A, __B);
+  return __builtin_ia32_vcomisbf16neq (__A, __B);
 }
 
-#ifdef __DISABLE_AVX10_2_256__
-#undef __DISABLE_AVX10_2_256__
+#ifdef __DISABLE_AVX10_2__
+#undef __DISABLE_AVX10_2__
 #pragma GCC pop_options
-#endif /* __DISABLE_AVX10_2_256__ */
+#endif /* __DISABLE_AVX10_2__ */
 
 #endif /* __AVX10_2BF16INTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx10_2convertintrin.h b/gcc/config/i386/avx10_2convertintrin.h
index 8d2c1a5..f2fb98f 100644
--- a/gcc/config/i386/avx10_2convertintrin.h
+++ b/gcc/config/i386/avx10_2convertintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2024 Free Software Foundation, Inc.
+/* Copyright (C) 2024-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -28,10 +28,10 @@
 #ifndef _AVX10_2CONVERTINTRIN_H_INCLUDED
 #define _AVX10_2CONVERTINTRIN_H_INCLUDED
 
-#if !defined(__AVX10_2_256__)
+#if !defined(__AVX10_2__)
 #pragma GCC push_options
 #pragma GCC target("avx10.2")
-#define __DISABLE_AVX10_2_256__
+#define __DISABLE_AVX10_2__
 #endif /* __AVX10_2__ */
 
 extern __inline __m128h
@@ -70,103 +70,134 @@ extern __inline __m256h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_cvtx2ps_ph (__m256 __A, __m256 __B)
 {
-  return (__m256h) __builtin_ia32_vcvt2ps2phx256_mask_round ((__v8sf) __A,
-							     (__v8sf) __B,
-							     (__v16hf)
-							     _mm256_setzero_ph (),
-							     (__mmask16) -1,
-							     _MM_FROUND_CUR_DIRECTION);
+  return (__m256h) __builtin_ia32_vcvt2ps2phx256_mask ((__v8sf) __A,
+						       (__v8sf) __B,
+						       (__v16hf)
+						       _mm256_setzero_ph (),
+						       (__mmask16) -1);
 }
 
 extern __inline __m256h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_cvtx2ps_ph (__m256h __W, __mmask16 __U, __m256 __A, __m256 __B)
 {
-  return (__m256h) __builtin_ia32_vcvt2ps2phx256_mask_round ((__v8sf) __A,
-							     (__v8sf) __B,
-							     (__v16hf) __W,
-							     (__mmask16) __U,
-							     _MM_FROUND_CUR_DIRECTION);
+  return (__m256h) __builtin_ia32_vcvt2ps2phx256_mask ((__v8sf) __A,
+						       (__v8sf) __B,
+						       (__v16hf) __W,
+						       (__mmask16) __U);
 }
 
 extern __inline __m256h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_cvtx2ps_ph ( __mmask16 __U, __m256 __A, __m256 __B)
 {
-  return (__m256h) __builtin_ia32_vcvt2ps2phx256_mask_round ((__v8sf) __A,
-							     (__v8sf) __B,
-							     (__v16hf)
-							     _mm256_setzero_ph (),
-							     (__mmask16) __U,
+  return (__m256h) __builtin_ia32_vcvt2ps2phx256_mask ((__v8sf) __A,
+						       (__v8sf) __B,
+						       (__v16hf)
+						       _mm256_setzero_ph (),
+						       (__mmask16) __U);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtx2ps_ph (__m512 __A, __m512 __B)
+{
+  return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
+							     (__v16sf) __B,
+							     (__v32hf)
+							     _mm512_setzero_ph (),
+							     (__mmask32) -1,
+							     _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtx2ps_ph (__m512h __W, __mmask32 __U, __m512 __A,
+			__m512 __B)
+{
+  return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
+							     (__v16sf) __B,
+							     (__v32hf) __W,
+							     (__mmask32) __U,
+							     _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtx2ps_ph (__mmask32 __U, __m512 __A, __m512 __B)
+{
+  return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
+							     (__v16sf) __B,
+							     (__v32hf)
+							     _mm512_setzero_ph (),
+							     (__mmask32) __U,
 							     _MM_FROUND_CUR_DIRECTION);
 }
 
 #ifdef __OPTIMIZE__
-extern __inline __m256h
+extern __inline __m512h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtx_round2ps_ph (__m256 __A, __m256 __B, const int __R)
+_mm512_cvtx_round2ps_ph (__m512 __A, __m512 __B, const int __R)
 {
-  return (__m256h) __builtin_ia32_vcvt2ps2phx256_mask_round ((__v8sf) __A,
-							     (__v8sf) __B,
-							     (__v16hf)
-							     _mm256_setzero_ph (),
-							     (__mmask16) -1,
-							     __R);
+  return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
+							    (__v16sf) __B,
+							    (__v32hf)
+							    _mm512_setzero_ph (),
+							    (__mmask32) -1,
+							    __R);
 }
 
-extern __inline __m256h
+extern __inline __m512h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtx_round2ps_ph (__m256h __W, __mmask16 __U, __m256 __A,
-			      __m256 __B, const int __R)
+_mm512_mask_cvtx_round2ps_ph (__m512h __W, __mmask32 __U, __m512 __A,
+			      __m512 __B, const int __R)
 {
-  return (__m256h) __builtin_ia32_vcvt2ps2phx256_mask_round ((__v8sf) __A,
-							     (__v8sf) __B,
-							     (__v16hf) __W,
-							     (__mmask16) __U,
-							     __R);
+  return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
+							    (__v16sf) __B,
+							    (__v32hf) __W,
+							    (__mmask32) __U,
+							    __R);
 }
 
-extern __inline __m256h
+extern __inline __m512h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtx_round2ps_ph (__mmask16 __U, __m256 __A,
-			       __m256 __B, const int __R)
+_mm512_maskz_cvtx_round2ps_ph (__mmask32 __U, __m512 __A,
+			      __m512 __B, const int __R)
 {
-  return (__m256h) __builtin_ia32_vcvt2ps2phx256_mask_round ((__v8sf) __A,
-							     (__v8sf) __B,
-							     (__v16hf)
-							     _mm256_setzero_ph (),
-							     (__mmask16) __U,
-							     __R);
+  return (__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) __A,
+							    (__v16sf) __B,
+							    (__v32hf)
+							    _mm512_setzero_ph (),
+							    (__mmask32) __U,
+							    __R);
 }
 
 #else
-#define _mm256_cvtx_round2ps_ph(A, B, R) \
-  ((__m256h) __builtin_ia32_vcvt2ps2phx256_mask_round ((__v8sf) (A), \
-						       (__v8sf) (B), \
-						       (__v16hf) \
-						       (_mm256_setzero_ph ()), \
-						       (__mmask16) (-1), \
+#define _mm512_cvtx_round2ps_ph(A, B, R) \
+  ((__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) (A), \
+						       (__v16sf) (B), \
+						       (__v32hf) \
+						       (_mm512_setzero_ph ()), \
+						       (__mmask32) (-1), \
 						       (R)))
-
-#define _mm256_mask_cvtx_round2ps_ph(W, U, A, B, R) \
-  ((__m256h) __builtin_ia32_vcvt2ps2phx256_mask_round ((__v8sf) (A), \
-						       (__v8sf) (B),  \
-						       (__v16hf) (W), \
-						       (__mmask16) (U), \
+#define _mm512_mask_cvtx_round2ps_ph(W, U, A, B, R) \
+  ((__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) (A), \
+						       (__v16sf) (B), \
+						       (__v32hf) (W), \
+						       (__mmask32) (U), \
 						       (R)))
-
-#define _mm256_maskz_cvtx_round2ps_ph(U, A, B, R) \
-  ((__m256h) __builtin_ia32_vcvt2ps2phx256_mask_round ((__v8sf) (A), \
-						       (__v8sf) (B),  \
-						       (__v16hf) \
-						       (_mm256_setzero_ph ()),  \
-						       (__mmask16) (U), \
+#define _mm512_maskz_cvtx_round2ps_ph(U, A, B, R) \
+  ((__m512h) __builtin_ia32_vcvt2ps2phx512_mask_round ((__v16sf) (A), \
+						       (__v16sf) (B), \
+						       (__v32hf) \
+						       (_mm512_setzero_ph ()), \
+						       (__mmask32) (U), \
 						       (R)))
 #endif  /* __OPTIMIZE__  */
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtbiasph_pbf8 (__m128i __A, __m128h __B)
+_mm_cvtbiasph_bf8 (__m128i __A, __m128h __B)
 {
   return (__m128i) __builtin_ia32_vcvtbiasph2bf8128 ((__v16qi) __A,
 						     (__v8hf) __B);
@@ -174,8 +205,8 @@ _mm_cvtbiasph_pbf8 (__m128i __A, __m128h __B)
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtbiasph_pbf8 (__m128i __W, __mmask8 __U, __m128i __A,
-			     __m128h __B)
+_mm_mask_cvtbiasph_bf8 (__m128i __W, __mmask8 __U, __m128i __A,
+			__m128h __B)
 {
   return (__m128i) __builtin_ia32_vcvtbiasph2bf8128_mask ((__v16qi) __A,
 							  (__v8hf) __B,
@@ -185,7 +216,7 @@ _mm_mask_cvtbiasph_pbf8 (__m128i __W, __mmask8 __U, __m128i __A,
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtbiasph_pbf8 (__mmask8 __U, __m128i __A, __m128h __B)
+_mm_maskz_cvtbiasph_bf8 (__mmask8 __U, __m128i __A, __m128h __B)
 {
   return (__m128i) __builtin_ia32_vcvtbiasph2bf8128_mask ((__v16qi) __A,
 							  (__v8hf) __B,
@@ -196,7 +227,7 @@ _mm_maskz_cvtbiasph_pbf8 (__mmask8 __U, __m128i __A, __m128h __B)
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtbiasph_pbf8 (__m256i __A, __m256h __B)
+_mm256_cvtbiasph_bf8 (__m256i __A, __m256h __B)
 {
   return (__m128i) __builtin_ia32_vcvtbiasph2bf8256_mask ((__v32qi) __A,
 							  (__v16hf) __B,
@@ -207,8 +238,8 @@ _mm256_cvtbiasph_pbf8 (__m256i __A, __m256h __B)
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtbiasph_pbf8 (__m128i __W, __mmask16 __U, __m256i __A,
-				__m256h __B)
+_mm256_mask_cvtbiasph_bf8 (__m128i __W, __mmask16 __U, __m256i __A,
+			   __m256h __B)
 {
   return (__m128i) __builtin_ia32_vcvtbiasph2bf8256_mask ((__v32qi) __A,
 							  (__v16hf) __B,
@@ -218,7 +249,7 @@ _mm256_mask_cvtbiasph_pbf8 (__m128i __W, __mmask16 __U, __m256i __A,
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtbiasph_pbf8 (__mmask16 __U, __m256i __A, __m256h __B)
+_mm256_maskz_cvtbiasph_bf8 (__mmask16 __U, __m256i __A, __m256h __B)
 {
   return (__m128i) __builtin_ia32_vcvtbiasph2bf8256_mask ((__v32qi) __A,
 							  (__v16hf) __B,
@@ -227,9 +258,42 @@ _mm256_maskz_cvtbiasph_pbf8 (__mmask16 __U, __m256i __A, __m256h __B)
 							  (__mmask16) __U);
 }
 
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtbiasph_bf8 (__m512i __A, __m512h __B)
+{
+  return (__m256i) __builtin_ia32_vcvtbiasph2bf8512_mask ((__v64qi) __A,
+							  (__v32hf) __B,
+							  (__v32qi)(__m256i)
+							  _mm256_undefined_si256 (),
+							  (__mmask32) -1);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtbiasph_bf8 (__m256i __W, __mmask32 __U,
+			   __m512i __A, __m512h __B)
+{
+  return (__m256i) __builtin_ia32_vcvtbiasph2bf8512_mask ((__v64qi) __A,
+							  (__v32hf) __B,
+							  (__v32qi)(__m256i) __W,
+							  (__mmask32) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtbiasph_bf8 (__mmask32 __U, __m512i __A, __m512h __B)
+{
+  return (__m256i) __builtin_ia32_vcvtbiasph2bf8512_mask ((__v64qi) __A,
+							  (__v32hf) __B,
+							  (__v32qi)(__m256i)
+							  _mm256_setzero_si256 (),
+							  (__mmask32) __U);
+}
+
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtbiassph_pbf8 (__m128i __A, __m128h __B)
+_mm_cvts_biasph_bf8 (__m128i __A, __m128h __B)
 {
   return (__m128i) __builtin_ia32_vcvtbiasph2bf8s128 ((__v16qi) __A,
 						      (__v8hf) __B);
@@ -237,7 +301,7 @@ _mm_cvtbiassph_pbf8 (__m128i __A, __m128h __B)
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtbiassph_pbf8 (__m128i __W, __mmask8 __U,
+_mm_mask_cvts_biasph_bf8 (__m128i __W, __mmask8 __U,
 			  __m128i __A, __m128h __B)
 {
   return (__m128i) __builtin_ia32_vcvtbiasph2bf8s128_mask ((__v16qi) __A,
@@ -248,7 +312,7 @@ _mm_mask_cvtbiassph_pbf8 (__m128i __W, __mmask8 __U,
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtbiassph_pbf8 (__mmask8 __U, __m128i __A, __m128h __B)
+_mm_maskz_cvts_biasph_bf8 (__mmask8 __U, __m128i __A, __m128h __B)
 {
   return (__m128i) __builtin_ia32_vcvtbiasph2bf8s128_mask ((__v16qi) __A,
 							   (__v8hf) __B,
@@ -259,7 +323,7 @@ _mm_maskz_cvtbiassph_pbf8 (__mmask8 __U, __m128i __A, __m128h __B)
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtbiassph_pbf8 (__m256i __A, __m256h __B)
+_mm256_cvts_biasph_bf8 (__m256i __A, __m256h __B)
 {
   return (__m128i) __builtin_ia32_vcvtbiasph2bf8s256_mask ((__v32qi) __A,
 							   (__v16hf) __B,
@@ -270,7 +334,7 @@ _mm256_cvtbiassph_pbf8 (__m256i __A, __m256h __B)
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtbiassph_pbf8 (__m128i __W, __mmask16 __U,
+_mm256_mask_cvts_biasph_bf8 (__m128i __W, __mmask16 __U,
 			     __m256i __A, __m256h __B)
 {
   return (__m128i) __builtin_ia32_vcvtbiasph2bf8s256_mask ((__v32qi) __A,
@@ -281,7 +345,7 @@ _mm256_mask_cvtbiassph_pbf8 (__m128i __W, __mmask16 __U,
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtbiassph_pbf8 (__mmask16 __U, __m256i __A, __m256h __B)
+_mm256_maskz_cvts_biasph_bf8 (__mmask16 __U, __m256i __A, __m256h __B)
 {
   return (__m128i) __builtin_ia32_vcvtbiasph2bf8s256_mask ((__v32qi) __A,
 							   (__v16hf) __B,
@@ -290,9 +354,42 @@ _mm256_maskz_cvtbiassph_pbf8 (__mmask16 __U, __m256i __A, __m256h __B)
 							   (__mmask16) __U);
 }
 
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvts_biasph_bf8 (__m512i __A, __m512h __B)
+{
+  return (__m256i) __builtin_ia32_vcvtbiasph2bf8s512_mask ((__v64qi) __A,
+							   (__v32hf) __B,
+							   (__v32qi)(__m256i)
+							   _mm256_undefined_si256 (),
+							   (__mmask32) -1);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvts_biasph_bf8 (__m256i __W, __mmask32 __U,
+			     __m512i __A, __m512h __B)
+{
+  return (__m256i) __builtin_ia32_vcvtbiasph2bf8s512_mask ((__v64qi) __A,
+							   (__v32hf) __B,
+							   (__v32qi)(__m256i) __W,
+							   (__mmask32) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvts_biasph_bf8 (__mmask32 __U, __m512i __A, __m512h __B)
+{
+  return (__m256i) __builtin_ia32_vcvtbiasph2bf8s512_mask ((__v64qi) __A,
+							   (__v32hf) __B,
+							   (__v32qi)(__m256i)
+							   _mm256_setzero_si256 (),
+							   (__mmask32) __U);
+}
+
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtbiasph_phf8 (__m128i __A, __m128h __B)
+_mm_cvtbiasph_hf8 (__m128i __A, __m128h __B)
 {
   return (__m128i) __builtin_ia32_vcvtbiasph2hf8128 ((__v16qi) __A,
 						     (__v8hf) __B);
@@ -300,8 +397,8 @@ _mm_cvtbiasph_phf8 (__m128i __A, __m128h __B)
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtbiasph_phf8 (__m128i __W, __mmask8 __U, __m128i __A,
-			     __m128h __B)
+_mm_mask_cvtbiasph_hf8 (__m128i __W, __mmask8 __U, __m128i __A,
+			__m128h __B)
 {
   return (__m128i) __builtin_ia32_vcvtbiasph2hf8128_mask ((__v16qi) __A,
 							  (__v8hf) __B,
@@ -311,7 +408,7 @@ _mm_mask_cvtbiasph_phf8 (__m128i __W, __mmask8 __U, __m128i __A,
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtbiasph_phf8 (__mmask8 __U, __m128i __A, __m128h __B)
+_mm_maskz_cvtbiasph_hf8 (__mmask8 __U, __m128i __A, __m128h __B)
 {
   return (__m128i) __builtin_ia32_vcvtbiasph2hf8128_mask ((__v16qi) __A,
 							  (__v8hf) __B,
@@ -322,7 +419,7 @@ _mm_maskz_cvtbiasph_phf8 (__mmask8 __U, __m128i __A, __m128h __B)
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtbiasph_phf8 (__m256i __A, __m256h __B)
+_mm256_cvtbiasph_hf8 (__m256i __A, __m256h __B)
 {
   return (__m128i) __builtin_ia32_vcvtbiasph2hf8256_mask ((__v32qi) __A,
 							  (__v16hf) __B,
@@ -333,8 +430,8 @@ _mm256_cvtbiasph_phf8 (__m256i __A, __m256h __B)
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtbiasph_phf8 (__m128i __W, __mmask16 __U,
-			    __m256i __A, __m256h __B)
+_mm256_mask_cvtbiasph_hf8 (__m128i __W, __mmask16 __U,
+			   __m256i __A, __m256h __B)
 {
   return (__m128i) __builtin_ia32_vcvtbiasph2hf8256_mask ((__v32qi) __A,
 							  (__v16hf) __B,
@@ -344,7 +441,7 @@ _mm256_mask_cvtbiasph_phf8 (__m128i __W, __mmask16 __U,
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtbiasph_phf8 (__mmask16 __U, __m256i __A, __m256h __B)
+_mm256_maskz_cvtbiasph_hf8 (__mmask16 __U, __m256i __A, __m256h __B)
 {
   return (__m128i) __builtin_ia32_vcvtbiasph2hf8256_mask ((__v32qi) __A,
 							  (__v16hf) __B,
@@ -353,9 +450,42 @@ _mm256_maskz_cvtbiasph_phf8 (__mmask16 __U, __m256i __A, __m256h __B)
 							  (__mmask16) __U);
 }
 
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtbiasph_hf8 (__m512i __A, __m512h __B)
+{
+  return (__m256i) __builtin_ia32_vcvtbiasph2hf8512_mask ((__v64qi) __A,
+							  (__v32hf) __B,
+							  (__v32qi)(__m256i)
+					 		  _mm256_undefined_si256 (),
+							  (__mmask32) -1);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtbiasph_hf8 (__m256i __W, __mmask32 __U, __m512i __A,
+			   __m512h __B)
+{
+  return (__m256i) __builtin_ia32_vcvtbiasph2hf8512_mask ((__v64qi) __A,
+							  (__v32hf) __B,
+							  (__v32qi)(__m256i) __W,
+							  (__mmask32) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtbiasph_hf8 (__mmask32 __U, __m512i __A, __m512h __B)
+{
+  return (__m256i) __builtin_ia32_vcvtbiasph2hf8512_mask ((__v64qi) __A,
+							  (__v32hf) __B,
+							  (__v32qi)(__m256i)
+							  _mm256_setzero_si256 (),
+							  (__mmask32) __U);
+}
+
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtbiassph_phf8 (__m128i __A, __m128h __B)
+_mm_cvts_biasph_hf8 (__m128i __A, __m128h __B)
 {
   return (__m128i) __builtin_ia32_vcvtbiasph2hf8s128 ((__v16qi) __A,
 						      (__v8hf) __B);
@@ -363,7 +493,7 @@ _mm_cvtbiassph_phf8 (__m128i __A, __m128h __B)
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtbiassph_phf8 (__m128i __W, __mmask8 __U,
+_mm_mask_cvts_biasph_hf8 (__m128i __W, __mmask8 __U,
 			  __m128i __A, __m128h __B)
 {
   return (__m128i) __builtin_ia32_vcvtbiasph2hf8s128_mask ((__v16qi) __A,
@@ -374,7 +504,7 @@ _mm_mask_cvtbiassph_phf8 (__m128i __W, __mmask8 __U,
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtbiassph_phf8 (__mmask8 __U, __m128i __A, __m128h __B)
+_mm_maskz_cvts_biasph_hf8 (__mmask8 __U, __m128i __A, __m128h __B)
 {
   return (__m128i) __builtin_ia32_vcvtbiasph2hf8s128_mask ((__v16qi) __A,
 							   (__v8hf) __B,
@@ -385,7 +515,7 @@ _mm_maskz_cvtbiassph_phf8 (__mmask8 __U, __m128i __A, __m128h __B)
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtbiassph_phf8 (__m256i __A, __m256h __B)
+_mm256_cvts_biasph_hf8 (__m256i __A, __m256h __B)
 {
   return (__m128i) __builtin_ia32_vcvtbiasph2hf8s256_mask ((__v32qi) __A,
 							   (__v16hf) __B,
@@ -396,7 +526,7 @@ _mm256_cvtbiassph_phf8 (__m256i __A, __m256h __B)
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtbiassph_phf8 (__m128i __W, __mmask16 __U,
+_mm256_mask_cvts_biasph_hf8 (__m128i __W, __mmask16 __U,
 			     __m256i __A, __m256h __B)
 {
   return (__m128i) __builtin_ia32_vcvtbiasph2hf8s256_mask ((__v32qi) __A,
@@ -407,7 +537,7 @@ _mm256_mask_cvtbiassph_phf8 (__m128i __W, __mmask16 __U,
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtbiassph_phf8 (__mmask16 __U, __m256i __A, __m256h __B)
+_mm256_maskz_cvts_biasph_hf8 (__mmask16 __U, __m256i __A, __m256h __B)
 {
   return (__m128i) __builtin_ia32_vcvtbiasph2hf8s256_mask ((__v32qi) __A,
 							   (__v16hf) __B,
@@ -416,268 +546,433 @@ _mm256_maskz_cvtbiassph_phf8 (__mmask16 __U, __m256i __A, __m256h __B)
 							   (__mmask16) __U);
 }
 
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvts_biasph_hf8 (__m512i __A, __m512h __B)
+{
+  return (__m256i) __builtin_ia32_vcvtbiasph2hf8s512_mask ((__v64qi) __A,
+							   (__v32hf) __B,
+							   (__v32qi)(__m256i)
+							   _mm256_undefined_si256 (),
+							   (__mmask32) -1);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvts_biasph_hf8 (__m256i __W, __mmask32 __U,
+			     __m512i __A, __m512h __B)
+{
+  return (__m256i) __builtin_ia32_vcvtbiasph2hf8s512_mask ((__v64qi) __A,
+							   (__v32hf) __B,
+							   (__v32qi)(__m256i) __W,
+							   (__mmask32) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvts_biasph_hf8 (__mmask32 __U, __m512i __A, __m512h __B)
+{
+  return (__m256i) __builtin_ia32_vcvtbiasph2hf8s512_mask ((__v64qi) __A,
+							   (__v32hf) __B,
+							   (__v32qi)(__m256i)
+							   _mm256_setzero_si256 (),
+							   (__mmask32) __U);
+}
+
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtne2ph_pbf8 (__m128h __A, __m128h __B)
+_mm_cvt2ph_bf8 (__m128h __A, __m128h __B)
 {
-  return (__m128i) __builtin_ia32_vcvtne2ph2bf8128_mask ((__v8hf) __A,
-						 	 (__v8hf) __B,
-							 (__v16qi)
-							 _mm_setzero_si128 (),
-							 (__mmask16) -1);
+  return (__m128i) __builtin_ia32_vcvt2ph2bf8128_mask ((__v8hf) __A,
+						       (__v8hf) __B,
+						       (__v16qi)
+						       _mm_setzero_si128 (),
+						       (__mmask16) -1);
 }
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtne2ph_pbf8 (__m128i __W, __mmask16 __U, 
-			__m128h __A, __m128h __B)
+_mm_mask_cvt2ph_bf8 (__m128i __W, __mmask16 __U,
+		     __m128h __A, __m128h __B)
 {
-  return (__m128i) __builtin_ia32_vcvtne2ph2bf8128_mask ((__v8hf) __A,
-							 (__v8hf) __B,
-							 (__v16qi) __W,
-							 (__mmask16) __U);
+  return (__m128i) __builtin_ia32_vcvt2ph2bf8128_mask ((__v8hf) __A,
+						       (__v8hf) __B,
+						       (__v16qi) __W,
+						       (__mmask16) __U);
 }
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtne2ph_pbf8 (__mmask16 __U, __m128h __A, __m128h __B)
+_mm_maskz_cvt2ph_bf8 (__mmask16 __U, __m128h __A, __m128h __B)
 {
-  return (__m128i) __builtin_ia32_vcvtne2ph2bf8128_mask ((__v8hf) __A,
-						 	 (__v8hf) __B,
-							 (__v16qi)
-							 _mm_setzero_si128 (),
-							 (__mmask16) __U);
+  return (__m128i) __builtin_ia32_vcvt2ph2bf8128_mask ((__v8hf) __A,
+						       (__v8hf) __B,
+						       (__v16qi)
+						       _mm_setzero_si128 (),
+						       (__mmask16) __U);
 }
 
 extern __inline__ __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtne2ph_pbf8 (__m256h __A, __m256h __B)
+_mm256_cvt2ph_bf8 (__m256h __A, __m256h __B)
 {
-  return (__m256i) __builtin_ia32_vcvtne2ph2bf8256_mask ((__v16hf) __A,
-							 (__v16hf) __B,
-							 (__v32qi)
-							 _mm256_setzero_si256 (),
-							 (__mmask32) -1);
+  return (__m256i) __builtin_ia32_vcvt2ph2bf8256_mask ((__v16hf) __A,
+						       (__v16hf) __B,
+						       (__v32qi)
+						       _mm256_setzero_si256 (),
+						       (__mmask32) -1);
 }
 
 extern __inline__ __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtne2ph_pbf8 (__m256i __W, __mmask32 __U, 
-			   __m256h __A, __m256h __B)
+_mm256_mask_cvt2ph_bf8 (__m256i __W, __mmask32 __U,
+			__m256h __A, __m256h __B)
 {
-  return (__m256i) __builtin_ia32_vcvtne2ph2bf8256_mask ((__v16hf) __A,
-							 (__v16hf) __B,
-							 (__v32qi) __W,
-							 (__mmask32) __U);
+  return (__m256i) __builtin_ia32_vcvt2ph2bf8256_mask ((__v16hf) __A,
+						       (__v16hf) __B,
+						       (__v32qi) __W,
+						       (__mmask32) __U);
 }
 
 extern __inline__ __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtne2ph_pbf8 (__mmask32 __U, __m256h __A, __m256h __B)
+_mm256_maskz_cvt2ph_bf8 (__mmask32 __U, __m256h __A, __m256h __B)
+{
+  return (__m256i) __builtin_ia32_vcvt2ph2bf8256_mask ((__v16hf) __A,
+						       (__v16hf) __B,
+						       (__v32qi)
+						       _mm256_setzero_si256 (),
+						       (__mmask32) __U);
+}
+
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt2ph_bf8 (__m512h __A, __m512h __B)
+{
+  return (__m512i) __builtin_ia32_vcvt2ph2bf8512_mask ((__v32hf) __A,
+						       (__v32hf) __B,
+						       (__v64qi)
+						       _mm512_setzero_si512 (),
+						       (__mmask64) -1);
+}
+
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt2ph_bf8 (__m512i __W, __mmask64 __U,
+			__m512h __A, __m512h __B)
+{
+  return (__m512i) __builtin_ia32_vcvt2ph2bf8512_mask ((__v32hf) __A,
+						       (__v32hf) __B,
+						       (__v64qi) __W,
+						       (__mmask64) __U);
+}
+
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt2ph_bf8 (__mmask64 __U,  __m512h __A, __m512h __B)
 {
-  return (__m256i) __builtin_ia32_vcvtne2ph2bf8256_mask ((__v16hf) __A,
-							 (__v16hf) __B,
-							 (__v32qi)
-							 _mm256_setzero_si256 (),
-							 (__mmask32) __U);
+  return (__m512i) __builtin_ia32_vcvt2ph2bf8512_mask ((__v32hf) __A,
+						       (__v32hf) __B,
+						       (__v64qi)
+						       _mm512_setzero_si512 (),
+						       (__mmask64) __U);
 }
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtnes2ph_pbf8 (__m128h __A, __m128h __B)
+_mm_cvts_2ph_bf8 (__m128h __A, __m128h __B)
 {
-  return (__m128i) __builtin_ia32_vcvtne2ph2bf8s128_mask ((__v8hf) __A,
-							  (__v8hf) __B,
-							  (__v16qi)
-							  _mm_setzero_si128 (),
-							  (__mmask16) -1);
+  return (__m128i) __builtin_ia32_vcvt2ph2bf8s128_mask ((__v8hf) __A,
+							(__v8hf) __B,
+							(__v16qi)
+							_mm_setzero_si128 (),
+							(__mmask16) -1);
 }
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtnes2ph_pbf8 (__m128i __W, __mmask16 __U, 
-			 __m128h __A, __m128h __B)
+_mm_mask_cvts_2ph_bf8 (__m128i __W, __mmask16 __U,
+		       __m128h __A, __m128h __B)
 {
-  return (__m128i) __builtin_ia32_vcvtne2ph2bf8s128_mask ((__v8hf) __A,
-							  (__v8hf) __B,
-							  (__v16qi) __W,
-							  (__mmask16) __U);
+  return (__m128i) __builtin_ia32_vcvt2ph2bf8s128_mask ((__v8hf) __A,
+							(__v8hf) __B,
+							(__v16qi) __W,
+							(__mmask16) __U);
 }
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtnes2ph_pbf8 (__mmask16 __U, __m128h __A, __m128h __B)
+_mm_maskz_cvts_2ph_bf8 (__mmask16 __U, __m128h __A, __m128h __B)
 {
-  return (__m128i) __builtin_ia32_vcvtne2ph2bf8s128_mask ((__v8hf) __A,
-							  (__v8hf) __B,
-							  (__v16qi)
-							  _mm_setzero_si128 (),
-							  (__mmask16) __U);
+  return (__m128i) __builtin_ia32_vcvt2ph2bf8s128_mask ((__v8hf) __A,
+							(__v8hf) __B,
+							(__v16qi)
+							_mm_setzero_si128 (),
+							(__mmask16) __U);
 }
 
 extern __inline__ __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtnes2ph_pbf8 (__m256h __A, __m256h __B)
+_mm256_cvts_2ph_bf8 (__m256h __A, __m256h __B)
 {
-  return (__m256i) __builtin_ia32_vcvtne2ph2bf8s256_mask ((__v16hf) __A,
-							  (__v16hf) __B,
-							  (__v32qi)
-							  _mm256_setzero_si256 (),
-							  (__mmask32) -1);
+  return (__m256i) __builtin_ia32_vcvt2ph2bf8s256_mask ((__v16hf) __A,
+							(__v16hf) __B,
+							(__v32qi)
+							_mm256_setzero_si256 (),
+							(__mmask32) -1);
 }
 
 extern __inline__ __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtnes2ph_pbf8 (__m256i __W, __mmask32 __U,
-			    __m256h __A, __m256h __B)
+_mm256_mask_cvts_2ph_bf8 (__m256i __W, __mmask32 __U,
+			  __m256h __A, __m256h __B)
 {
-  return (__m256i) __builtin_ia32_vcvtne2ph2bf8s256_mask ((__v16hf) __A,
-							  (__v16hf) __B,
-							  (__v32qi) __W,
-							  (__mmask32) __U);
+  return (__m256i) __builtin_ia32_vcvt2ph2bf8s256_mask ((__v16hf) __A,
+							(__v16hf) __B,
+							(__v32qi) __W,
+							(__mmask32) __U);
 }
 
 extern __inline__ __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtnes2ph_pbf8 (__mmask32 __U, __m256h __A, __m256h __B)
+_mm256_maskz_cvts_2ph_bf8 (__mmask32 __U, __m256h __A, __m256h __B)
 {
-  return (__m256i) __builtin_ia32_vcvtne2ph2bf8s256_mask ((__v16hf) __A,
-							  (__v16hf) __B,
-							  (__v32qi)
-							  _mm256_setzero_si256 (),
-							  (__mmask32) __U);
+  return (__m256i) __builtin_ia32_vcvt2ph2bf8s256_mask ((__v16hf) __A,
+							(__v16hf) __B,
+							(__v32qi)
+							_mm256_setzero_si256 (),
+							(__mmask32) __U);
+}
+
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvts_2ph_bf8 (__m512h __A, __m512h __B)
+{
+  return (__m512i) __builtin_ia32_vcvt2ph2bf8s512_mask ((__v32hf) __A,
+							(__v32hf) __B,
+							(__v64qi)
+							_mm512_setzero_si512 (),
+							(__mmask64) -1);
+}
+
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvts_2ph_bf8 (__m512i __W, __mmask64 __U,
+			  __m512h __A, __m512h __B)
+{
+  return (__m512i) __builtin_ia32_vcvt2ph2bf8s512_mask ((__v32hf) __A,
+							(__v32hf) __B,
+							(__v64qi) __W,
+							(__mmask64) __U);
+}
+
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvts_2ph_bf8 (__mmask64 __U, __m512h __A, __m512h __B)
+{
+  return (__m512i) __builtin_ia32_vcvt2ph2bf8s512_mask ((__v32hf) __A,
+							(__v32hf) __B,
+							(__v64qi)
+							_mm512_setzero_si512 (),
+							(__mmask64) __U);
 }
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtne2ph_phf8 (__m128h __A, __m128h __B)
+_mm_cvt2ph_hf8 (__m128h __A, __m128h __B)
 {
-  return (__m128i) __builtin_ia32_vcvtne2ph2hf8128_mask ((__v8hf) __A,
-						    	 (__v8hf) __B,
-							 (__v16qi)
-							 _mm_setzero_si128 (),
-							 (__mmask16) -1);
+  return (__m128i) __builtin_ia32_vcvt2ph2hf8128_mask ((__v8hf) __A,
+						       (__v8hf) __B,
+						       (__v16qi)
+						       _mm_setzero_si128 (),
+						       (__mmask16) -1);
 }
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtne2ph_phf8 (__m128i __W, __mmask16 __U,
-			__m128h __A, __m128h __B)
+_mm_mask_cvt2ph_hf8 (__m128i __W, __mmask16 __U,
+		     __m128h __A, __m128h __B)
 {
-  return (__m128i) __builtin_ia32_vcvtne2ph2hf8128_mask ((__v8hf) __A,
-							 (__v8hf) __B,
-							 (__v16qi) __W,
-							 (__mmask16) __U);
+  return (__m128i) __builtin_ia32_vcvt2ph2hf8128_mask ((__v8hf) __A,
+						       (__v8hf) __B,
+						       (__v16qi) __W,
+						       (__mmask16) __U);
 }
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtne2ph_phf8 (__mmask16 __U, __m128h __A, __m128h __B)
+_mm_maskz_cvt2ph_hf8 (__mmask16 __U, __m128h __A, __m128h __B)
 {
-  return (__m128i) __builtin_ia32_vcvtne2ph2hf8128_mask ((__v8hf) __A,
-						    	 (__v8hf) __B,
-							 (__v16qi)
-							 _mm_setzero_si128 (),
-							 (__mmask16) __U);
+  return (__m128i) __builtin_ia32_vcvt2ph2hf8128_mask ((__v8hf) __A,
+						       (__v8hf) __B,
+						       (__v16qi)
+						       _mm_setzero_si128 (),
+						       (__mmask16) __U);
 }
 
 extern __inline__ __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtne2ph_phf8 (__m256h __A, __m256h __B)
+_mm256_cvt2ph_hf8 (__m256h __A, __m256h __B)
 {
-  return (__m256i) __builtin_ia32_vcvtne2ph2hf8256_mask ((__v16hf) __A,
-						    	 (__v16hf) __B,
-							 (__v32qi)
-							 _mm256_setzero_si256 (),
-							 (__mmask32) -1);
+  return (__m256i) __builtin_ia32_vcvt2ph2hf8256_mask ((__v16hf) __A,
+						       (__v16hf) __B,
+						       (__v32qi)
+						       _mm256_setzero_si256 (),
+						       (__mmask32) -1);
 }
 
 extern __inline__ __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtne2ph_phf8 (__m256i __W, __mmask32 __U,
-			   __m256h __A, __m256h __B)
+_mm256_mask_cvt2ph_hf8 (__m256i __W, __mmask32 __U,
+			__m256h __A, __m256h __B)
 {
-  return (__m256i) __builtin_ia32_vcvtne2ph2hf8256_mask ((__v16hf) __A,
-							 (__v16hf) __B,
-							 (__v32qi) __W,
-							 (__mmask32) __U);
+  return (__m256i) __builtin_ia32_vcvt2ph2hf8256_mask ((__v16hf) __A,
+						       (__v16hf) __B,
+						       (__v32qi) __W,
+						       (__mmask32) __U);
 }
 
 extern __inline__ __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtne2ph_phf8 (__mmask32 __U, __m256h __A, __m256h __B)
+_mm256_maskz_cvt2ph_hf8 (__mmask32 __U, __m256h __A, __m256h __B)
+{
+  return (__m256i) __builtin_ia32_vcvt2ph2hf8256_mask ((__v16hf) __A,
+						       (__v16hf) __B,
+						       (__v32qi)
+						       _mm256_setzero_si256 (),
+						       (__mmask32) __U);
+}
+
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvt2ph_hf8 (__m512h __A, __m512h __B)
+{
+  return (__m512i) __builtin_ia32_vcvt2ph2hf8512_mask ((__v32hf) __A,
+						       (__v32hf) __B,
+						       (__v64qi)
+						       _mm512_setzero_si512 (),
+						       (__mmask64) -1);
+}
+
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvt2ph_hf8 (__m512i __W, __mmask64 __U,
+			__m512h __A, __m512h __B)
+{
+  return (__m512i) __builtin_ia32_vcvt2ph2hf8512_mask ((__v32hf) __A,
+						       (__v32hf) __B,
+						       (__v64qi) __W,
+						       (__mmask64) __U);
+}
+
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvt2ph_hf8 (__mmask64 __U, __m512h __A, __m512h __B)
 {
-  return (__m256i) __builtin_ia32_vcvtne2ph2hf8256_mask ((__v16hf) __A,
-						    	 (__v16hf) __B,
-							 (__v32qi)
-							 _mm256_setzero_si256 (),
-							 (__mmask32) __U);
+  return (__m512i) __builtin_ia32_vcvt2ph2hf8512_mask ((__v32hf) __A,
+						       (__v32hf) __B,
+						       (__v64qi)
+						       _mm512_setzero_si512 (),
+						       (__mmask64) __U);
 }
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtnes2ph_phf8 (__m128h __A, __m128h __B)
+_mm_cvts_2ph_hf8 (__m128h __A, __m128h __B)
 {
-  return (__m128i) __builtin_ia32_vcvtne2ph2hf8s128_mask ((__v8hf) __A,
-						     	  (__v8hf) __B,
-							  (__v16qi)
-							  _mm_setzero_si128 (),
-							  (__mmask16) -1);
+  return (__m128i) __builtin_ia32_vcvt2ph2hf8s128_mask ((__v8hf) __A,
+							(__v8hf) __B,
+							(__v16qi)
+							_mm_setzero_si128 (),
+							(__mmask16) -1);
 }
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtnes2ph_phf8 (__m128i __W, __mmask16 __U,
-			 __m128h __A, __m128h __B)
+_mm_mask_cvts_2ph_hf8 (__m128i __W, __mmask16 __U,
+		       __m128h __A, __m128h __B)
 {
-  return (__m128i) __builtin_ia32_vcvtne2ph2hf8s128_mask ((__v8hf) __A,
-						     	  (__v8hf) __B,
-							  (__v16qi) __W,
-							  (__mmask16) __U);
+  return (__m128i) __builtin_ia32_vcvt2ph2hf8s128_mask ((__v8hf) __A,
+							(__v8hf) __B,
+							(__v16qi) __W,
+							(__mmask16) __U);
 }
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtnes2ph_phf8 (__mmask16 __U, __m128h __A, __m128h __B)
+_mm_maskz_cvts_2ph_hf8 (__mmask16 __U, __m128h __A, __m128h __B)
 {
-  return (__m128i) __builtin_ia32_vcvtne2ph2hf8s128_mask ((__v8hf) __A,
-						     	  (__v8hf) __B,
-							  (__v16qi)
-							  _mm_setzero_si128 (),
-							  (__mmask16) __U);
+  return (__m128i) __builtin_ia32_vcvt2ph2hf8s128_mask ((__v8hf) __A,
+							(__v8hf) __B,
+							(__v16qi)
+							_mm_setzero_si128 (),
+							(__mmask16) __U);
 }
 
 extern __inline__ __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtnes2ph_phf8 (__m256h __A, __m256h __B)
+_mm256_cvts_2ph_hf8 (__m256h __A, __m256h __B)
 {
-  return (__m256i) __builtin_ia32_vcvtne2ph2hf8s256_mask ((__v16hf) __A,
-						     	  (__v16hf) __B,
-							  (__v32qi)
-							  _mm256_setzero_si256 (),
-							  (__mmask32) -1);
+  return (__m256i) __builtin_ia32_vcvt2ph2hf8s256_mask ((__v16hf) __A,
+							(__v16hf) __B,
+							(__v32qi)
+							_mm256_setzero_si256 (),
+							(__mmask32) -1);
 }
 
 extern __inline__ __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtnes2ph_phf8 (__m256i __W, __mmask32 __U,
-			    __m256h __A, __m256h __B)
+_mm256_mask_cvts_2ph_hf8 (__m256i __W, __mmask32 __U,
+			  __m256h __A, __m256h __B)
 {
-  return (__m256i) __builtin_ia32_vcvtne2ph2hf8s256_mask ((__v16hf) __A,
-							  (__v16hf) __B,
-							  (__v32qi) __W,
-							  (__mmask32) __U);
+  return (__m256i) __builtin_ia32_vcvt2ph2hf8s256_mask ((__v16hf) __A,
+							(__v16hf) __B,
+							(__v32qi) __W,
+							(__mmask32) __U);
 }
 
 extern __inline__ __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtnes2ph_phf8 (__mmask32 __U, __m256h __A, __m256h __B)
+_mm256_maskz_cvts_2ph_hf8 (__mmask32 __U, __m256h __A, __m256h __B)
 {
-  return (__m256i) __builtin_ia32_vcvtne2ph2hf8s256_mask ((__v16hf) __A,
-						     	  (__v16hf) __B,
-							  (__v32qi)
-							  _mm256_setzero_si256 (),
-							  (__mmask32) __U);
+  return (__m256i) __builtin_ia32_vcvt2ph2hf8s256_mask ((__v16hf) __A,
+							(__v16hf) __B,
+							(__v32qi)
+							_mm256_setzero_si256 (),
+							(__mmask32) __U);
+}
+
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvts_2ph_hf8 (__m512h __A, __m512h __B)
+{
+  return (__m512i) __builtin_ia32_vcvt2ph2hf8s512_mask ((__v32hf) __A,
+							(__v32hf) __B,
+							(__v64qi)
+							_mm512_setzero_si512 (),
+							(__mmask64) -1);
+}
+
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvts_2ph_hf8 (__m512i __W, __mmask64 __U,
+			  __m512h __A, __m512h __B)
+{
+  return (__m512i) __builtin_ia32_vcvt2ph2hf8s512_mask ((__v32hf) __A,
+							(__v32hf) __B,
+							(__v64qi) __W,
+							(__mmask64) __U);
+}
+
+extern __inline__ __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvts_2ph_hf8 (__mmask64 __U, __m512h __A, __m512h __B)
+{
+  return (__m512i) __builtin_ia32_vcvt2ph2hf8s512_mask ((__v32hf) __A,
+							(__v32hf) __B,
+							(__v64qi)
+							_mm512_setzero_si512 (),
+							(__mmask64) __U);
 }
 
 extern __inline__ __m128h
@@ -738,241 +1033,386 @@ _mm256_maskz_cvthf8_ph (__mmask16 __U, __m128i __A)
 						      (__mmask16) __U);
 }
 
+extern __inline__ __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvthf8_ph (__m256i __A)
+{
+  return (__m512h) __builtin_ia32_vcvthf82ph512_mask ((__v32qi) __A,
+						      (__v32hf) (__m512h)
+						      _mm512_undefined_ph (),
+						      (__mmask32) -1);
+}
+
+extern __inline__ __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvthf8_ph (__m512h __W, __mmask32 __U, __m256i __A)
+{
+  return (__m512h) __builtin_ia32_vcvthf82ph512_mask ((__v32qi) __A,
+						      (__v32hf) (__m512h) __W,
+						      (__mmask32) __U);
+}
+
+extern __inline__ __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvthf8_ph (__mmask32 __U, __m256i __A)
+{
+  return (__m512h) __builtin_ia32_vcvthf82ph512_mask ((__v32qi) __A,
+						      (__v32hf) (__m512h)
+						      _mm512_setzero_ph (),
+						      (__mmask32) __U);
+}
+
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtneph_pbf8 (__m128h __A)
+_mm_cvtph_bf8 (__m128h __A)
 {
-  return (__m128i) __builtin_ia32_vcvtneph2bf8128_mask ((__v8hf) __A,
-							(__v16qi)(__m128i)
-							_mm_undefined_si128 (),
-							(__mmask8) -1);
+  return (__m128i) __builtin_ia32_vcvtph2bf8128_mask ((__v8hf) __A,
+						      (__v16qi)(__m128i)
+						      _mm_undefined_si128 (),
+						      (__mmask8) -1);
 }
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtneph_pbf8 (__m128i __W, __mmask8 __U, __m128h __A)
+_mm_mask_cvtph_bf8 (__m128i __W, __mmask8 __U, __m128h __A)
 {
-  return (__m128i) __builtin_ia32_vcvtneph2bf8128_mask ((__v8hf) __A,
-							(__v16qi)(__m128i) __W,
-							(__mmask8) __U);
+  return (__m128i) __builtin_ia32_vcvtph2bf8128_mask ((__v8hf) __A,
+						      (__v16qi)(__m128i) __W,
+						      (__mmask8) __U);
 }
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtneph_pbf8 (__mmask8 __U, __m128h __A)
+_mm_maskz_cvtph_bf8 (__mmask8 __U, __m128h __A)
 {
-  return (__m128i) __builtin_ia32_vcvtneph2bf8128_mask ((__v8hf) __A,
-							(__v16qi)(__m128i)
-							_mm_setzero_si128 (),
-							(__mmask8) __U);
+  return (__m128i) __builtin_ia32_vcvtph2bf8128_mask ((__v8hf) __A,
+						      (__v16qi)(__m128i)
+						      _mm_setzero_si128 (),
+						      (__mmask8) __U);
 }
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtneph_pbf8 (__m256h __A)
+_mm256_cvtph_bf8 (__m256h __A)
 {
-  return (__m128i) __builtin_ia32_vcvtneph2bf8256_mask ((__v16hf) __A,
-							(__v16qi)(__m128i)
-							_mm_undefined_si128 (),
-							(__mmask16) -1);
+  return (__m128i) __builtin_ia32_vcvtph2bf8256_mask ((__v16hf) __A,
+						      (__v16qi)(__m128i)
+						      _mm_undefined_si128 (),
+						      (__mmask16) -1);
 }
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtneph_pbf8 (__m128i __W, __mmask16 __U, __m256h __A)
+_mm256_mask_cvtph_bf8 (__m128i __W, __mmask16 __U, __m256h __A)
 {
-  return (__m128i) __builtin_ia32_vcvtneph2bf8256_mask ((__v16hf) __A,
-							(__v16qi)(__m128i) __W,
-							(__mmask16) __U);
+  return (__m128i) __builtin_ia32_vcvtph2bf8256_mask ((__v16hf) __A,
+						      (__v16qi)(__m128i) __W,
+						      (__mmask16) __U);
 }
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtneph_pbf8 (__mmask16 __U, __m256h __A)
+_mm256_maskz_cvtph_bf8 (__mmask16 __U, __m256h __A)
 {
-  return (__m128i) __builtin_ia32_vcvtneph2bf8256_mask ((__v16hf) __A,
-							(__v16qi)(__m128i)
-							_mm_setzero_si128 (),
-							(__mmask16) __U);
+  return (__m128i) __builtin_ia32_vcvtph2bf8256_mask ((__v16hf) __A,
+						      (__v16qi)(__m128i)
+						      _mm_setzero_si128 (),
+						      (__mmask16) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtph_bf8 (__m512h __A)
+{
+  return (__m256i) __builtin_ia32_vcvtph2bf8512_mask ((__v32hf) __A,
+						      (__v32qi) (__m256i)
+						      _mm256_undefined_si256 (),
+						      (__mmask32) -1);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtph_bf8 (__m256i __W, __mmask32 __U, __m512h __A)
+{
+  return (__m256i) __builtin_ia32_vcvtph2bf8512_mask ((__v32hf) __A,
+						      (__v32qi) (__m256i) __W,
+						      (__mmask32) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtph_bf8 (__mmask32 __U, __m512h __A)
+{
+  return (__m256i) __builtin_ia32_vcvtph2bf8512_mask ((__v32hf) __A,
+						      (__v32qi) (__m256i)
+						      _mm256_setzero_si256 (),
+						      (__mmask32) __U);
 }
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtnesph_pbf8 (__m128h __A)
+_mm_cvts_ph_bf8 (__m128h __A)
 {
-  return (__m128i) __builtin_ia32_vcvtneph2bf8s128_mask ((__v8hf) __A,
-							 (__v16qi)(__m128i)
-							 _mm_undefined_si128 (),
-							 (__mmask8) -1);
+  return (__m128i) __builtin_ia32_vcvtph2bf8s128_mask ((__v8hf) __A,
+						       (__v16qi)(__m128i)
+						       _mm_undefined_si128 (),
+						       (__mmask8) -1);
 }
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtnesph_pbf8 (__m128i __W, __mmask8 __U, __m128h __A)
+_mm_mask_cvts_ph_bf8 (__m128i __W, __mmask8 __U, __m128h __A)
 {
-  return (__m128i) __builtin_ia32_vcvtneph2bf8s128_mask ((__v8hf) __A,
-							 (__v16qi)(__m128i) __W,
-							 (__mmask8) __U);
+  return (__m128i) __builtin_ia32_vcvtph2bf8s128_mask ((__v8hf) __A,
+						       (__v16qi)(__m128i) __W,
+						       (__mmask8) __U);
 }
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtnesph_pbf8 (__mmask8 __U, __m128h __A)
+_mm_maskz_cvts_ph_bf8 (__mmask8 __U, __m128h __A)
 {
-  return (__m128i) __builtin_ia32_vcvtneph2bf8s128_mask ((__v8hf) __A,
-							 (__v16qi)(__m128i)
-							 _mm_setzero_si128 (),
-							 (__mmask8) __U);
+  return (__m128i) __builtin_ia32_vcvtph2bf8s128_mask ((__v8hf) __A,
+						       (__v16qi)(__m128i)
+						       _mm_setzero_si128 (),
+						       (__mmask8) __U);
 }
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtnesph_pbf8 (__m256h __A)
+_mm256_cvts_ph_bf8 (__m256h __A)
 {
-  return (__m128i) __builtin_ia32_vcvtneph2bf8s256_mask ((__v16hf) __A,
-							 (__v16qi)(__m128i)
-							 _mm_undefined_si128 (),
-							 (__mmask16) -1);
+  return (__m128i) __builtin_ia32_vcvtph2bf8s256_mask ((__v16hf) __A,
+						       (__v16qi)(__m128i)
+						       _mm_undefined_si128 (),
+						       (__mmask16) -1);
 }
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtnesph_pbf8 (__m128i __W, __mmask16 __U, __m256h __A)
+_mm256_mask_cvts_ph_bf8 (__m128i __W, __mmask16 __U, __m256h __A)
 {
-  return (__m128i) __builtin_ia32_vcvtneph2bf8s256_mask ((__v16hf) __A,
-							 (__v16qi)(__m128i) __W,
-							 (__mmask16) __U);
+  return (__m128i) __builtin_ia32_vcvtph2bf8s256_mask ((__v16hf) __A,
+						       (__v16qi)(__m128i) __W,
+						       (__mmask16) __U);
 }
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtnesph_pbf8 (__mmask16 __U, __m256h __A)
+_mm256_maskz_cvts_ph_bf8 (__mmask16 __U, __m256h __A)
 {
-  return (__m128i) __builtin_ia32_vcvtneph2bf8s256_mask ((__v16hf) __A,
-							 (__v16qi)(__m128i)
-							 _mm_setzero_si128 (),
-							 (__mmask16) __U);
+  return (__m128i) __builtin_ia32_vcvtph2bf8s256_mask ((__v16hf) __A,
+						       (__v16qi)(__m128i)
+						       _mm_setzero_si128 (),
+						       (__mmask16) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvts_ph_bf8 (__m512h __A)
+{
+  return (__m256i) __builtin_ia32_vcvtph2bf8s512_mask ((__v32hf) __A,
+						       (__v32qi) (__m256i)
+						       _mm256_undefined_si256 (),
+						       (__mmask32) -1);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvts_ph_bf8 (__m256i __W, __mmask32 __U, __m512h __A)
+{
+  return (__m256i) __builtin_ia32_vcvtph2bf8s512_mask ((__v32hf) __A,
+						       (__v32qi) (__m256i) __W,
+						       (__mmask32) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvts_ph_bf8 (__mmask32 __U, __m512h __A)
+{
+  return (__m256i) __builtin_ia32_vcvtph2bf8s512_mask ((__v32hf) __A,
+						       (__v32qi) (__m256i)
+						       _mm256_setzero_si256 (),
+						       (__mmask32) __U);
 }
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtneph_phf8 (__m128h __A)
+_mm_cvtph_hf8 (__m128h __A)
 {
-  return (__m128i) __builtin_ia32_vcvtneph2hf8128_mask ((__v8hf) __A,
-							(__v16qi)(__m128i)
-							_mm_undefined_si128 (),
-							(__mmask8) -1);
+  return (__m128i) __builtin_ia32_vcvtph2hf8128_mask ((__v8hf) __A,
+						      (__v16qi)(__m128i)
+						      _mm_undefined_si128 (),
+						      (__mmask8) -1);
 }
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtneph_phf8 (__m128i __W, __mmask8 __U, __m128h __A)
+_mm_mask_cvtph_hf8 (__m128i __W, __mmask8 __U, __m128h __A)
 {
-  return (__m128i) __builtin_ia32_vcvtneph2hf8128_mask ((__v8hf) __A,
-							(__v16qi)(__m128i) __W,
-							(__mmask8) __U);
+  return (__m128i) __builtin_ia32_vcvtph2hf8128_mask ((__v8hf) __A,
+						      (__v16qi)(__m128i) __W,
+						      (__mmask8) __U);
 }
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtneph_phf8 (__mmask8 __U, __m128h __A)
+_mm_maskz_cvtph_hf8 (__mmask8 __U, __m128h __A)
 {
-  return (__m128i) __builtin_ia32_vcvtneph2hf8128_mask ((__v8hf) __A,
-							(__v16qi)(__m128i)
-							_mm_setzero_si128 (),
-							(__mmask8) __U);
+  return (__m128i) __builtin_ia32_vcvtph2hf8128_mask ((__v8hf) __A,
+						      (__v16qi)(__m128i)
+						      _mm_setzero_si128 (),
+						      (__mmask8) __U);
 }
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtneph_phf8 (__m256h __A)
+_mm256_cvtph_hf8 (__m256h __A)
 {
-  return (__m128i) __builtin_ia32_vcvtneph2hf8256_mask ((__v16hf) __A,
-							(__v16qi)(__m128i)
-							_mm_undefined_si128 (),
-							(__mmask16) -1);
+  return (__m128i) __builtin_ia32_vcvtph2hf8256_mask ((__v16hf) __A,
+						      (__v16qi)(__m128i)
+						      _mm_undefined_si128 (),
+						      (__mmask16) -1);
 }
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtneph_phf8 (__m128i __W, __mmask16 __U, __m256h __A)
+_mm256_mask_cvtph_hf8 (__m128i __W, __mmask16 __U, __m256h __A)
 {
-  return (__m128i) __builtin_ia32_vcvtneph2hf8256_mask ((__v16hf) __A,
-							(__v16qi)(__m128i) __W,
-							(__mmask16) __U);
+  return (__m128i) __builtin_ia32_vcvtph2hf8256_mask ((__v16hf) __A,
+						      (__v16qi)(__m128i) __W,
+						      (__mmask16) __U);
 }
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtneph_phf8 (__mmask16 __U, __m256h __A)
+_mm256_maskz_cvtph_hf8 (__mmask16 __U, __m256h __A)
 {
-  return (__m128i) __builtin_ia32_vcvtneph2hf8256_mask ((__v16hf) __A,
-							(__v16qi)(__m128i)
-							_mm_setzero_si128 (),
-							(__mmask16) __U);
+  return (__m128i) __builtin_ia32_vcvtph2hf8256_mask ((__v16hf) __A,
+						      (__v16qi)(__m128i)
+						      _mm_setzero_si128 (),
+						      (__mmask16) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtph_hf8 (__m512h __A)
+{
+  return (__m256i) __builtin_ia32_vcvtph2hf8512_mask ((__v32hf) __A,
+						      (__v32qi) (__m256i)
+						      _mm256_undefined_si256 (),
+						      (__mmask32) -1);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtph_hf8 (__m256i __W, __mmask32 __U, __m512h __A)
+{
+  return (__m256i) __builtin_ia32_vcvtph2hf8512_mask ((__v32hf) __A,
+						      (__v32qi)(__m256i) __W,
+						      (__mmask32) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtph_hf8 (__mmask32 __U, __m512h __A)
+{
+  return (__m256i) __builtin_ia32_vcvtph2hf8512_mask ((__v32hf) __A,
+						      (__v32qi) (__m256i)
+						      _mm256_setzero_si256 (),
+						      (__mmask32) __U);
 }
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtnesph_phf8 (__m128h __A)
+_mm_cvts_ph_hf8 (__m128h __A)
 {
-  return (__m128i) __builtin_ia32_vcvtneph2hf8s128_mask ((__v8hf) __A,
-							 (__v16qi)(__m128i)
-							 _mm_undefined_si128 (),
-							 (__mmask8) -1);
+  return (__m128i) __builtin_ia32_vcvtph2hf8s128_mask ((__v8hf) __A,
+						       (__v16qi)(__m128i)
+						       _mm_undefined_si128 (),
+						       (__mmask8) -1);
 }
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtnesph_phf8 (__m128i __W, __mmask8 __U, __m128h __A)
+_mm_mask_cvts_ph_hf8 (__m128i __W, __mmask8 __U, __m128h __A)
 {
-  return (__m128i) __builtin_ia32_vcvtneph2hf8s128_mask ((__v8hf) __A,
-							 (__v16qi)(__m128i) __W,
-							 (__mmask8) __U);
+  return (__m128i) __builtin_ia32_vcvtph2hf8s128_mask ((__v8hf) __A,
+						       (__v16qi)(__m128i) __W,
+						       (__mmask8) __U);
 }
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtnesph_phf8 (__mmask8 __U, __m128h __A)
+_mm_maskz_cvts_ph_hf8 (__mmask8 __U, __m128h __A)
 {
-  return (__m128i) __builtin_ia32_vcvtneph2hf8s128_mask ((__v8hf) __A,
-							 (__v16qi)(__m128i)
-							 _mm_setzero_si128 (),
-							 (__mmask8) __U);
+  return (__m128i) __builtin_ia32_vcvtph2hf8s128_mask ((__v8hf) __A,
+						       (__v16qi)(__m128i)
+						       _mm_setzero_si128 (),
+						       (__mmask8) __U);
 }
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtnesph_phf8 (__m256h __A)
+_mm256_cvts_ph_hf8 (__m256h __A)
 {
-  return (__m128i) __builtin_ia32_vcvtneph2hf8s256_mask ((__v16hf) __A,
-							 (__v16qi)(__m128i)
-							 _mm_undefined_si128 (),
-							 (__mmask16) -1);
+  return (__m128i) __builtin_ia32_vcvtph2hf8s256_mask ((__v16hf) __A,
+						       (__v16qi)(__m128i)
+						       _mm_undefined_si128 (),
+						       (__mmask16) -1);
 }
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtnesph_phf8 (__m128i __W, __mmask16 __U, __m256h __A)
+_mm256_mask_cvts_ph_hf8 (__m128i __W, __mmask16 __U, __m256h __A)
 {
-  return (__m128i) __builtin_ia32_vcvtneph2hf8s256_mask ((__v16hf) __A,
-							 (__v16qi)(__m128i) __W,
-							 (__mmask16) __U);
+  return (__m128i) __builtin_ia32_vcvtph2hf8s256_mask ((__v16hf) __A,
+						       (__v16qi)(__m128i) __W,
+						       (__mmask16) __U);
 }
 
 extern __inline__ __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtnesph_phf8 (__mmask16 __U, __m256h __A)
+_mm256_maskz_cvts_ph_hf8 (__mmask16 __U, __m256h __A)
+{
+  return (__m128i) __builtin_ia32_vcvtph2hf8s256_mask ((__v16hf) __A,
+						       (__v16qi)(__m128i)
+						       _mm_setzero_si128 (),
+						       (__mmask16) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvts_ph_hf8 (__m512h __A)
+{
+  return (__m256i) __builtin_ia32_vcvtph2hf8s512_mask ((__v32hf) __A,
+						       (__v32qi) (__m256i)
+						       _mm256_undefined_si256 (),
+						       (__mmask32) -1);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvts_ph_hf8 (__m256i __W, __mmask32 __U, __m512h __A)
 {
-  return (__m128i) __builtin_ia32_vcvtneph2hf8s256_mask ((__v16hf) __A,
-							 (__v16qi)(__m128i)
-							 _mm_setzero_si128 (),
-							 (__mmask16) __U);
+  return (__m256i) __builtin_ia32_vcvtph2hf8s512_mask ((__v32hf) __A,
+						       (__v32qi) (__m256i) __W,
+						       (__mmask32) __U);
+}
+
+extern __inline__ __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvts_ph_hf8 (__mmask32 __U, __m512h __A)
+{
+  return (__m256i) __builtin_ia32_vcvtph2hf8s512_mask ((__v32hf) __A,
+						       (__v32qi) (__m256i)
+						       _mm256_setzero_si256 (),
+						       (__mmask32) __U);
 }
 
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtpbf8_ph (__m128i __A)
+_mm_cvtbf8_ph (__m128i __A)
 {
   return (__m128h) _mm_castsi128_ph ((__m128i) _mm_slli_epi16 (
 	 (__m128i) _mm_cvtepi8_epi16 (__A), 8));
@@ -980,7 +1420,7 @@ _mm_cvtpbf8_ph (__m128i __A)
 
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvtpbf8_ph (__m128h __S, __mmask8 __U, __m128i __A)
+_mm_mask_cvtbf8_ph (__m128h __S, __mmask8 __U, __m128i __A)
 {
   return (__m128h) _mm_castsi128_ph ((__m128i) _mm_mask_slli_epi16 (
 	 (__m128i) __S, __U, (__m128i) _mm_cvtepi8_epi16 (__A), 8));
@@ -988,7 +1428,7 @@ _mm_mask_cvtpbf8_ph (__m128h __S, __mmask8 __U, __m128i __A)
 
 extern __inline __m128h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvtpbf8_ph (__mmask8 __U, __m128i __A)
+_mm_maskz_cvtbf8_ph (__mmask8 __U, __m128i __A)
 {
   return (__m128h) _mm_castsi128_ph ((__m128i) _mm_slli_epi16 (
 	 (__m128i) _mm_maskz_cvtepi8_epi16 (__U, __A), 8));
@@ -996,7 +1436,7 @@ _mm_maskz_cvtpbf8_ph (__mmask8 __U, __m128i __A)
 
 extern __inline __m256h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtpbf8_ph (__m128i __A)
+_mm256_cvtbf8_ph (__m128i __A)
 {
   return (__m256h) _mm256_castsi256_ph ((__m256i) _mm256_slli_epi16 (
 	 (__m256i) _mm256_cvtepi8_epi16 (__A), 8));
@@ -1004,7 +1444,7 @@ _mm256_cvtpbf8_ph (__m128i __A)
 
 extern __inline __m256h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtpbf8_ph (__m256h __S, __mmask8 __U, __m128i __A)
+_mm256_mask_cvtbf8_ph (__m256h __S, __mmask16 __U, __m128i __A)
 {
   return (__m256h) _mm256_castsi256_ph ((__m256i) _mm256_mask_slli_epi16 (
 	 (__m256i) __S, __U, (__m256i) _mm256_cvtepi8_epi16 (__A), 8));
@@ -1012,15 +1452,39 @@ _mm256_mask_cvtpbf8_ph (__m256h __S, __mmask8 __U, __m128i __A)
 
 extern __inline __m256h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtpbf8_ph (__mmask8 __U, __m128i __A)
+_mm256_maskz_cvtbf8_ph (__mmask16 __U, __m128i __A)
 {
   return (__m256h) _mm256_castsi256_ph ((__m256i) _mm256_slli_epi16 (
 	 (__m256i) _mm256_maskz_cvtepi8_epi16 (__U, __A), 8));
 }
 
-#ifdef __DISABLE_AVX10_2_256__
-#undef __DISABLE_AVX10_2_256__
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtbf8_ph (__m256i __A)
+{
+  return (__m512h) _mm512_castsi512_ph ((__m512i) _mm512_slli_epi16 (
+	 (__m512i) _mm512_cvtepi8_epi16 (__A), 8));
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtbf8_ph (__m512h __S, __mmask32 __U, __m256i __A)
+{
+  return (__m512h) _mm512_castsi512_ph ((__m512i) _mm512_mask_slli_epi16 (
+	 (__m512i) __S, __U, (__m512i) _mm512_cvtepi8_epi16 (__A), 8));
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtbf8_ph (__mmask32 __U, __m256i __A)
+{
+  return (__m512h) _mm512_castsi512_ph ((__m512i) _mm512_slli_epi16 (
+	 (__m512i) _mm512_maskz_cvtepi8_epi16 (__U, __A), 8));
+}
+
+#ifdef __DISABLE_AVX10_2__
+#undef __DISABLE_AVX10_2__
 #pragma GCC pop_options
-#endif /* __DISABLE_AVX10_2_256__ */
+#endif /* __DISABLE_AVX10_2__ */
 
 #endif /* __AVX10_2CONVERTINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx10_2copyintrin.h b/gcc/config/i386/avx10_2copyintrin.h
index f1150c7..a23ab25 100644
--- a/gcc/config/i386/avx10_2copyintrin.h
+++ b/gcc/config/i386/avx10_2copyintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2024 Free Software Foundation, Inc.
+/* Copyright (C) 2024-2025 Free Software Foundation, Inc.
    This file is part of GCC.
    GCC is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
diff --git a/gcc/config/i386/avx10_2mediaintrin.h b/gcc/config/i386/avx10_2mediaintrin.h
index 5456c18..7d30502 100644
--- a/gcc/config/i386/avx10_2mediaintrin.h
+++ b/gcc/config/i386/avx10_2mediaintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2024 Free Software Foundation, Inc.
+/* Copyright (C) 2024-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -28,11 +28,11 @@
 #ifndef _AVX10_2MEDIAINTRIN_H_INCLUDED
 #define _AVX10_2MEDIAINTRIN_H_INCLUDED
 
-#if !defined(__AVX10_2_256__)
+#if !defined(__AVX10_2__)
 #pragma GCC push_options
-#pragma GCC target("avx10.2-256")
-#define __DISABLE_AVX10_2_256__
-#endif /* __AVX10_2_256__ */
+#pragma GCC target("avx10.2")
+#define __DISABLE_AVX10_2__
+#endif /* __AVX10_2__ */
 
 #define _mm_dpbssd_epi32(W, A, B) \
   (__m128i) __builtin_ia32_vpdpbssd128 ((__v4si) (W), (__v4si) (A), (__v4si) (B))
@@ -394,6 +394,198 @@ _mm256_maskz_dpbuuds_epi32 (__mmask8 __U, __m256i __W,
 					 (__mmask8) __U);
 }
 
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpbssd_epi32 (__m512i __W, __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpbssd512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpbssd_epi32 (__m512i __W, __mmask16 __U,
+			  __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpbssd_v16si_mask ((__v16si) __W,
+					(__v16si) __A,
+					(__v16si) __B,
+					(__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpbssd_epi32 (__mmask16 __U, __m512i __W,
+			   __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpbssd_v16si_maskz ((__v16si) __W,
+					 (__v16si) __A,
+					 (__v16si) __B,
+					 (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpbssds_epi32 (__m512i __W, __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpbssds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpbssds_epi32 (__m512i __W, __mmask16 __U,
+			   __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpbssds_v16si_mask ((__v16si) __W,
+					 (__v16si) __A,
+					 (__v16si) __B,
+					 (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpbssds_epi32 (__mmask16 __U, __m512i __W,
+			    __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpbssds_v16si_maskz ((__v16si) __W,
+					  (__v16si) __A,
+					  (__v16si) __B,
+					  (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpbsud_epi32 (__m512i __W, __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpbsud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpbsud_epi32 (__m512i __W, __mmask16 __U,
+			  __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpbsud_v16si_mask ((__v16si) __W,
+					(__v16si) __A,
+					(__v16si) __B,
+					(__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpbsud_epi32 (__mmask16 __U, __m512i __W,
+			   __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpbsud_v16si_maskz ((__v16si) __W,
+					 (__v16si) __A,
+					 (__v16si) __B,
+					 (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpbsuds_epi32 (__m512i __W, __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpbsuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpbsuds_epi32 (__m512i __W, __mmask16 __U,
+			   __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpbsuds_v16si_mask ((__v16si) __W,
+					 (__v16si) __A,
+					 (__v16si) __B,
+					 (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpbsuds_epi32 (__mmask16 __U, __m512i __W,
+			    __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpbsuds_v16si_maskz ((__v16si) __W,
+					  (__v16si) __A,
+					  (__v16si) __B,
+					  (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpbuud_epi32 (__m512i __W, __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpbuud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpbuud_epi32 (__m512i __W, __mmask16 __U,
+			  __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpbuud_v16si_mask ((__v16si) __W,
+					(__v16si) __A,
+					(__v16si) __B,
+					(__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpbuud_epi32 (__mmask16 __U, __m512i __W,
+			   __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpbuud_v16si_maskz ((__v16si) __W,
+					 (__v16si) __A,
+					 (__v16si) __B,
+					 (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpbuuds_epi32 (__m512i __W, __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpbuuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpbuuds_epi32 (__m512i __W, __mmask16 __U,
+			   __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpbuuds_v16si_mask ((__v16si) __W,
+					 (__v16si) __A,
+					 (__v16si) __B,
+					 (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpbuuds_epi32 (__mmask16 __U, __m512i __W,
+			    __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpbuuds_v16si_maskz ((__v16si) __W,
+					  (__v16si) __A,
+					  (__v16si) __B,
+					  (__mmask16) __U);
+}
+
 extern __inline __m128i
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_mask_dpwsud_epi32 (__m128i __W, __mmask8 __U,
@@ -682,6 +874,233 @@ _mm256_maskz_dpwuuds_epi32 (__mmask8 __U, __m256i __W,
 					 (__mmask8) __U);
 }
 
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpwsud_epi32 (__m512i __W, __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpwsud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpwsud_epi32 (__m512i __W, __mmask16 __U,
+			  __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpwsud_v16si_mask ((__v16si) __W,
+					(__v16si) __A,
+					(__v16si) __B,
+					(__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpwsud_epi32 (__mmask16 __U, __m512i __W,
+			   __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpwsud_v16si_maskz ((__v16si) __W,
+					 (__v16si) __A,
+					 (__v16si) __B,
+					 (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpwsuds_epi32 (__m512i __W, __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpwsuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpwsuds_epi32 (__m512i __W, __mmask16 __U,
+			   __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpwsuds_v16si_mask ((__v16si) __W,
+					 (__v16si) __A,
+					 (__v16si) __B,
+					 (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpwsuds_epi32 (__mmask16 __U, __m512i __W,
+			    __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpwsuds_v16si_maskz ((__v16si) __W,
+					  (__v16si) __A,
+					  (__v16si) __B,
+					  (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpwusd_epi32 (__m512i __W, __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpwusd512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpwusd_epi32 (__m512i __W, __mmask16 __U,
+			  __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpwusd_v16si_mask ((__v16si) __W,
+					(__v16si) __A,
+					(__v16si) __B,
+					(__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpwusd_epi32 (__mmask16 __U, __m512i __W,
+			   __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpwusd_v16si_maskz ((__v16si) __W,
+					 (__v16si) __A,
+					 (__v16si) __B,
+					 (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpwusds_epi32 (__m512i __W, __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpwusds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpwusds_epi32 (__m512i __W, __mmask16 __U,
+			   __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpwusds_v16si_mask ((__v16si) __W,
+					 (__v16si) __A,
+					 (__v16si) __B,
+					 (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpwusds_epi32 (__mmask16 __U, __m512i __W,
+			    __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpwusds_v16si_maskz ((__v16si) __W,
+					  (__v16si) __A,
+					  (__v16si) __B,
+					  (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpwuud_epi32 (__m512i __W, __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpwuud512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpwuud_epi32 (__m512i __W, __mmask16 __U,
+			  __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpwuud_v16si_mask ((__v16si) __W,
+					(__v16si) __A,
+					(__v16si) __B,
+					(__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpwuud_epi32 (__mmask16 __U, __m512i __W,
+			   __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpwuud_v16si_maskz ((__v16si) __W,
+					 (__v16si) __A,
+					 (__v16si) __B,
+					 (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpwuuds_epi32 (__m512i __W, __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpwuuds512 ((__v16si) __W, (__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpwuuds_epi32 (__m512i __W, __mmask16 __U,
+			   __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpwuuds_v16si_mask ((__v16si) __W,
+					 (__v16si) __A,
+					 (__v16si) __B,
+					 (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpwuuds_epi32 (__mmask16 __U, __m512i __W,
+			    __m512i __A, __m512i __B)
+{
+  return (__m512i)
+    __builtin_ia32_vpdpwuuds_v16si_maskz ((__v16si) __W,
+					  (__v16si) __A,
+					  (__v16si) __B,
+					  (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_dpph_ps (__m512 __W, __m512h __A, __m512h __B)
+{
+  return (__m512)
+    __builtin_ia32_vdpphps512_mask ((__v16sf) __W,
+				    (__v16sf) __A,
+				    (__v16sf) __B,
+				    (__mmask16) -1);
+}
+
+extern __inline __m512
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_dpph_ps (__m512 __W, __mmask16 __U, __m512h __A,
+		     __m512h __B)
+{
+  return (__m512)
+    __builtin_ia32_vdpphps512_mask ((__v16sf) __W,
+				    (__v16sf) __A,
+				    (__v16sf) __B,
+				    (__mmask16) __U);
+}
+
+extern __inline __m512
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_dpph_ps (__mmask16 __U, __m512 __W, __m512h __A,
+		      __m512h __B)
+{
+  return (__m512)
+    __builtin_ia32_vdpphps512_maskz ((__v16sf) __W,
+				     (__v16sf) __A,
+				     (__v16sf) __B,
+				     (__mmask16) __U);
+}
+
 extern __inline __m256
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_dpph_ps (__m256 __W, __m256h __A, __m256h __B)
@@ -800,6 +1219,39 @@ _mm256_maskz_mpsadbw_epu8 (__mmask16 __U, __m256i __X,
 						   (__v16hi) _mm256_setzero_si256 (),
 						   __U);
 }
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mpsadbw_epu8 (__m512i __X, __m512i __Y, const int __M)
+{
+  return (__m512i) __builtin_ia32_mpsadbw512 ((__v64qi) __X,
+					      (__v64qi) __Y,
+					      __M);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_mpsadbw_epu8 (__m512i __W, __mmask32 __U, __m512i __X,
+			  __m512i __Y, const int __M)
+{
+  return (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi) __X,
+						   (__v64qi) __Y,
+						   __M,
+						   (__v32hi) __W,
+						   __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_mpsadbw_epu8 (__mmask32 __U, __m512i __X,
+			   __m512i __Y, const int __M)
+{
+  return (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi) __X,
+						   (__v64qi) __Y,
+						   __M,
+						   (__v32hi) _mm512_setzero_epi32 (),
+						   __U);
+}
 #else
 #define _mm_mask_mpsadbw_epu8(W, U, X, Y, M)				\
   (__m128i) __builtin_ia32_mpsadbw128_mask ((__v16qi)(__m128i)(X),	\
@@ -829,11 +1281,28 @@ _mm256_maskz_mpsadbw_epu8 (__mmask16 __U, __m256i __X,
 					    (__v16hi) _mm256_setzero_si256 (),	\
 					    (__mmask16)(U))
 
+#define _mm512_mpsadbw_epu8(X, Y, M)					\
+  (__m512i) __builtin_ia32_mpsadbw512 ((__v64qi)(__m512i)(X),		\
+				       (__v64qi)(__m512i)(Y), (int)(M))
+
+#define _mm512_mask_mpsadbw_epu8(W, U, X, Y, M)				\
+  (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi)(__m512i)(X),	\
+					    (__v64qi)(__m512i)(Y),	\
+					    (int)(M),			\
+					    (__v32hi)(__m512i)(W),	\
+					    (__mmask32)(U))
+
+#define _mm512_maskz_mpsadbw_epu8(U, X, Y, M)				\
+  (__m512i) __builtin_ia32_mpsadbw512_mask ((__v64qi)(__m512i)(X),	\
+					    (__v64qi)(__m512i)(Y),	\
+					    (int)(M),			\
+					    (__v32hi) _mm512_setzero_epi32 (),	\
+					    (__mmask32)(U))
 #endif
 
-#ifdef __DISABLE_AVX10_2_256__
-#undef __DISABLE_AVX10_2_256__
+#ifdef __DISABLE_AVX10_2__
+#undef __DISABLE_AVX10_2__
 #pragma GCC pop_options
-#endif /* __DISABLE_AVX10_2_256__ */
+#endif /* __DISABLE_AVX10_2__ */
 
 #endif /* __AVX10_2MEDIAINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx10_2minmaxintrin.h b/gcc/config/i386/avx10_2minmaxintrin.h
index a4dad80..f9fe14e 100644
--- a/gcc/config/i386/avx10_2minmaxintrin.h
+++ b/gcc/config/i386/avx10_2minmaxintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2024 Free Software Foundation, Inc.
+/* Copyright (C) 2024-2025 Free Software Foundation, Inc.
    This file is part of GCC.
    GCC is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -23,83 +23,121 @@
 #ifndef _AVX10_2MINMAXINTRIN_H_INCLUDED
 #define _AVX10_2MINMAXINTRIN_H_INCLUDED
 
-#if !defined(__AVX10_2_256__)
+#if !defined(__AVX10_2__)
 #pragma GCC push_options
 #pragma GCC target("avx10.2")
-#define __DISABLE_AVX10_2_256__
-#endif /* __AVX10_2_256__ */
+#define __DISABLE_AVX10_2__
+#endif /* __AVX10_2__ */
 
 #ifdef __OPTIMIZE__
 extern __inline __m128bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_minmax_nepbh (__m128bh __A, __m128bh __B, const int __C)
+_mm_minmax_pbh (__m128bh __A, __m128bh __B, const int __C)
 {
-  return (__m128bh) __builtin_ia32_minmaxnepbf16128_mask ((__v8bf) __A,
-							  (__v8bf) __B,
-							  __C,
-							  (__v8bf)(__m128bh)
-							  _mm_setzero_si128 (),
-							  (__mmask8) -1);
+  return (__m128bh) __builtin_ia32_minmaxbf16128_mask ((__v8bf) __A,
+						       (__v8bf) __B,
+						       __C,
+						       (__v8bf)(__m128bh)
+						       _mm_setzero_si128 (),
+						       (__mmask8) -1);
 }
 
 extern __inline __m128bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_minmax_nepbh (__m128bh __W, __mmask8 __U, __m128bh __A,
-		       __m128bh __B, const int __C)
+_mm_mask_minmax_pbh (__m128bh __W, __mmask8 __U, __m128bh __A,
+		     __m128bh __B, const int __C)
 {
-  return (__m128bh) __builtin_ia32_minmaxnepbf16128_mask ((__v8bf) __A,
-							  (__v8bf) __B,
-							  __C,
-							  (__v8bf) __W,
-							  (__mmask8) __U);
+  return (__m128bh) __builtin_ia32_minmaxbf16128_mask ((__v8bf) __A,
+						       (__v8bf) __B,
+						       __C,
+						       (__v8bf) __W,
+						       (__mmask8) __U);
 }
 
 extern __inline __m128bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_minmax_nepbh (__mmask8 __U, __m128bh __A, __m128bh __B, const int __C)
+_mm_maskz_minmax_pbh (__mmask8 __U, __m128bh __A, __m128bh __B, const int __C)
 {
-  return (__m128bh) __builtin_ia32_minmaxnepbf16128_mask ((__v8bf) __A,
-							  (__v8bf) __B,
-							  __C,
-							  (__v8bf)(__m128bh)
-							  _mm_setzero_si128 (),
-							  (__mmask8) __U);
+  return (__m128bh) __builtin_ia32_minmaxbf16128_mask ((__v8bf) __A,
+						       (__v8bf) __B,
+						       __C,
+						       (__v8bf)(__m128bh)
+						       _mm_setzero_si128 (),
+						       (__mmask8) __U);
 }
 
 extern __inline __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_minmax_nepbh (__m256bh __A, __m256bh __B, const int __C)
+_mm256_minmax_pbh (__m256bh __A, __m256bh __B, const int __C)
 {
-  return (__m256bh) __builtin_ia32_minmaxnepbf16256_mask ((__v16bf) __A,
-							  (__v16bf) __B,
-							  __C,
-							  (__v16bf)(__m256bh)
-							  _mm256_setzero_si256 (),
-							  (__mmask16) -1);
+  return (__m256bh) __builtin_ia32_minmaxbf16256_mask ((__v16bf) __A,
+						       (__v16bf) __B,
+						       __C,
+						       (__v16bf)(__m256bh)
+						       _mm256_setzero_si256 (),
+						       (__mmask16) -1);
 }
 
 extern __inline __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_minmax_nepbh (__m256bh __W, __mmask16 __U, __m256bh __A, __m256bh __B,
-			    const int __C)
+_mm256_mask_minmax_pbh (__m256bh __W, __mmask16 __U, __m256bh __A,
+			__m256bh __B, const int __C)
 {
-  return (__m256bh) __builtin_ia32_minmaxnepbf16256_mask ((__v16bf) __A,
-							  (__v16bf) __B,
-							  __C,
-							  (__v16bf) __W,
-							  (__mmask16) __U);
+  return (__m256bh) __builtin_ia32_minmaxbf16256_mask ((__v16bf) __A,
+						       (__v16bf) __B,
+						       __C,
+						       (__v16bf) __W,
+						       (__mmask16) __U);
 }
 
 extern __inline __m256bh
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_minmax_nepbh (__mmask16 __U, __m256bh __A, __m256bh __B, const int __C)
+_mm256_maskz_minmax_pbh (__mmask16 __U, __m256bh __A,
+			 __m256bh __B, const int __C)
 {
-  return (__m256bh) __builtin_ia32_minmaxnepbf16256_mask ((__v16bf) __A,
-							  (__v16bf) __B,
-							  __C,
-							  (__v16bf)(__m256bh)
-							  _mm256_setzero_si256 (),
-							  (__mmask16) __U);
+  return (__m256bh) __builtin_ia32_minmaxbf16256_mask ((__v16bf) __A,
+						       (__v16bf) __B,
+						       __C,
+						       (__v16bf)(__m256bh)
+						       _mm256_setzero_si256 (),
+						       (__mmask16) __U);
+}
+
+extern __inline __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_minmax_pbh (__m512bh __A, __m512bh __B, const int __C)
+{
+  return (__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) __A,
+						       (__v32bf) __B,
+						       __C,
+						       (__v32bf)(__m512bh)
+						       _mm512_setzero_si512 (),
+						       (__mmask32) -1);
+}
+
+extern __inline __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_minmax_pbh (__m512bh __W, __mmask32 __U,
+			__m512bh __A, __m512bh __B, const int __C)
+{
+  return (__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) __A,
+						       (__v32bf) __B,
+						       __C,
+						       (__v32bf) __W,
+						       (__mmask32) __U);
+}
+
+extern __inline __m512bh
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_minmax_pbh (__mmask32 __U, __m512bh __A,
+			 __m512bh __B, const int __C)
+{
+  return (__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) __A,
+						       (__v32bf) __B,
+						       __C,
+						       (__v32bf)(__m512bh)
+						       _mm512_setzero_si512 (),
+						       (__mmask32) __U);
 }
 
 extern __inline __m128d
@@ -142,10 +180,10 @@ extern __inline __m256d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_minmax_pd (__m256d __A, __m256d __B, const int __C)
 {
-  return (__m256d) __builtin_ia32_minmaxpd256_mask_round (
+  return (__m256d) __builtin_ia32_minmaxpd256_mask (
 		   (__v4df) __A, (__v4df) __B, __C,
 		   (__v4df) (__m256d) _mm256_undefined_pd (),
-		   (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
+		   (__mmask8) -1);
 }
 
 extern __inline __m256d
@@ -153,50 +191,97 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_minmax_pd (__m256d __W, __mmask8 __U, __m256d __A, __m256d __B,
 		       const int __C)
 {
-  return (__m256d) __builtin_ia32_minmaxpd256_mask_round (
+  return (__m256d) __builtin_ia32_minmaxpd256_mask (
 		   (__v4df) __A, (__v4df) __B, __C, (__v4df) __W,
-		   (__mmask8) __U, _MM_FROUND_CUR_DIRECTION);
+		   (__mmask8) __U);
 }
 
 extern __inline __m256d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_minmax_pd (__mmask8 __U, __m256d __A, __m256d __B, const int __C)
 {
-  return (__m256d) __builtin_ia32_minmaxpd256_mask_round (
+  return (__m256d) __builtin_ia32_minmaxpd256_mask (
 		   (__v4df) __A, (__v4df) __B, __C,
 		   (__v4df) (__m256d) _mm256_setzero_pd (),
-		   (__mmask8) __U, _MM_FROUND_CUR_DIRECTION);
+		   (__mmask8) __U);
 }
 
-extern __inline __m256d
+extern __inline __m512d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_minmax_round_pd (__m256d __A, __m256d __B, const int __C, const int __R)
+_mm512_minmax_pd (__m512d __A, __m512d __B, const int __C)
 {
-  return (__m256d) __builtin_ia32_minmaxpd256_mask_round (
-		   (__v4df) __A, (__v4df) __B, __C,
-		   (__v4df) (__m256d) _mm256_undefined_pd (),
-		   (__mmask8) -1, __R);
+  return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
+							  (__v8df) __B,
+							  __C,
+							  (__v8df)
+							  _mm512_undefined_pd (),
+							  (__mmask8) -1,
+							  _MM_FROUND_CUR_DIRECTION);
 }
 
-extern __inline __m256d
+extern __inline __m512d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_minmax_round_pd (__m256d __W, __mmask8 __U, __m256d __A,
-			     __m256d __B, const int __C, const int __R)
+_mm512_mask_minmax_pd (__m512d __W, __mmask8 __U, __m512d __A,
+		       __m512d __B, const int __C)
 {
-  return (__m256d) __builtin_ia32_minmaxpd256_mask_round (
-		   (__v4df) __A, (__v4df) __B, __C, (__v4df) __W,
-		   (__mmask8) __U, __R);
+  return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
+							  (__v8df) __B,
+							  __C,
+							  (__v8df) __W,
+							  (__mmask8) __U,
+							  _MM_FROUND_CUR_DIRECTION);
 }
 
-extern __inline __m256d
+extern __inline __m512d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_minmax_round_pd (__mmask8 __U, __m256d __A, __m256d __B,
+_mm512_maskz_minmax_pd (__mmask8 __U, __m512d __A, __m512d __B,
+			const int __C)
+{
+  return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
+							  (__v8df) __B,
+							  __C,
+							  (__v8df)
+							  _mm512_setzero_pd (),
+							  (__mmask8) __U,
+							  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_minmax_round_pd (__m512d __A, __m512d __B, const int __C,
+			const int __R)
+{
+  return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
+							  (__v8df) __B,
+							  __C,
+							  (__v8df)
+							  _mm512_undefined_pd (),
+							  (__mmask8) -1, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_minmax_round_pd (__m512d __W, __mmask8 __U, __m512d __A,
+			     __m512d __B, const int __C, const int __R)
+{
+  return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
+							  (__v8df) __B,
+							  __C,
+							  (__v8df) __W,
+							  (__mmask8) __U, __R);
+}
+
+extern __inline __m512d
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_minmax_round_pd (__mmask8 __U, __m512d __A, __m512d __B,
 			      const int __C, const int __R)
 {
-  return (__m256d) __builtin_ia32_minmaxpd256_mask_round (
-		   (__v4df) __A, (__v4df) __B, __C,
-		   (__v4df) (__m256d) _mm256_setzero_pd (),
-		   (__mmask8) __U, __R);
+  return (__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) __A,
+							  (__v8df) __B,
+							  __C,
+							  (__v8df)
+							  _mm512_setzero_pd (),
+							  (__mmask8) __U, __R);
 }
 
 extern __inline __m128h
@@ -239,10 +324,10 @@ extern __inline __m256h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_minmax_ph (__m256h __A, __m256h __B, const int __C)
 {
-  return (__m256h) __builtin_ia32_minmaxph256_mask_round (
+  return (__m256h) __builtin_ia32_minmaxph256_mask (
 		  (__v16hf) __A, (__v16hf) __B, __C,
 		  (__v16hf) (__m256h) _mm256_undefined_ph (),
-		  (__mmask16) -1, _MM_FROUND_CUR_DIRECTION);
+		  (__mmask16) -1);
 }
 
 extern __inline __m256h
@@ -250,50 +335,96 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_minmax_ph (__m256h __W, __mmask16 __U, __m256h __A, __m256h __B,
 		       const int __C)
 {
-  return (__m256h) __builtin_ia32_minmaxph256_mask_round (
+  return (__m256h) __builtin_ia32_minmaxph256_mask (
 		  (__v16hf) __A, (__v16hf) __B, __C, (__v16hf) __W,
-		  (__mmask16) __U, _MM_FROUND_CUR_DIRECTION);
+		  (__mmask16) __U);
 }
 
 extern __inline __m256h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_minmax_ph (__mmask16 __U, __m256h __A, __m256h __B, const int __C)
 {
-  return (__m256h) __builtin_ia32_minmaxph256_mask_round (
+  return (__m256h) __builtin_ia32_minmaxph256_mask (
 		  (__v16hf) __A, (__v16hf) __B, __C,
 		  (__v16hf) (__m256h) _mm256_setzero_ph (),
-		  (__mmask16) __U, _MM_FROUND_CUR_DIRECTION);
+		  (__mmask16) __U);
 }
 
-extern __inline __m256h
+extern __inline __m512h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_minmax_round_ph (__m256h __A, __m256h __B, const int __C, const int __R)
+_mm512_minmax_ph (__m512h __A, __m512h __B, const int __C)
 {
-  return (__m256h) __builtin_ia32_minmaxph256_mask_round (
-		  (__v16hf) __A, (__v16hf) __B, __C,
-		  (__v16hf) (__m256h) _mm256_undefined_ph (),
-		  (__mmask16) -1, __R);
+  return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
+							  (__v32hf) __B,
+							  __C,
+							  (__v32hf)
+							  _mm512_undefined_ph (),
+							  (__mmask32) -1,
+							  _MM_FROUND_CUR_DIRECTION);
 }
 
-extern __inline __m256h
+extern __inline __m512h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_minmax_round_ph (__m256h __W, __mmask16 __U, __m256h __A,
-			     __m256h __B, const int __C, const int __R)
+_mm512_mask_minmax_ph (__m512h __W, __mmask32 __U, __m512h __A,
+		       __m512h __B, const int __C)
 {
-  return (__m256h) __builtin_ia32_minmaxph256_mask_round (
-		  (__v16hf) __A, (__v16hf) __B, __C, (__v16hf) __W,
-		  (__mmask16) __U, __R);
+  return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
+							  (__v32hf) __B,
+							  __C,
+							  (__v32hf) __W,
+							  (__mmask32) __U,
+							  _MM_FROUND_CUR_DIRECTION);
 }
 
-extern __inline __m256h
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_minmax_ph (__mmask32 __U, __m512h __A, __m512h __B,
+			const int __C)
+{
+  return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
+							  (__v32hf) __B,
+							  __C,
+							  (__v32hf)
+							  _mm512_setzero_ph (),
+							  (__mmask32) __U,
+							  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_minmax_round_ph (__m512h __A, __m512h __B, const int __C, const int __R)
+{
+  return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
+							  (__v32hf) __B,
+							  __C,
+							  (__v32hf)
+							  _mm512_undefined_ph (),
+							  (__mmask32) -1, __R);
+}
+
+extern __inline __m512h
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_minmax_round_ph (__m512h __W, __mmask32 __U, __m512h __A,
+			     __m512h __B, const int __C, const int __R)
+{
+  return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
+							  (__v32hf) __B,
+							  __C,
+							  (__v32hf) __W,
+							  (__mmask32) __U, __R);
+}
+
+extern __inline __m512h
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_minmax_round_ph (__mmask16 __U, __m256h __A, __m256h __B,
+_mm512_maskz_minmax_round_ph (__mmask32 __U, __m512h __A, __m512h __B,
 			      const int __C, const int __R)
 {
-  return (__m256h) __builtin_ia32_minmaxph256_mask_round (
-		  (__v16hf) __A, (__v16hf) __B, __C,
-		  (__v16hf) (__m256h) _mm256_setzero_ph (),
-		  (__mmask16) __U, __R);
+  return (__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) __A,
+							  (__v32hf) __B,
+							  __C,
+							  (__v32hf)
+							  _mm512_setzero_ph (),
+							  (__mmask32) __U, __R);
 }
 
 extern __inline __m128
@@ -336,10 +467,10 @@ extern __inline __m256
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_minmax_ps (__m256 __A, __m256 __B, const int __C)
 {
-  return (__m256) __builtin_ia32_minmaxps256_mask_round (
+  return (__m256) __builtin_ia32_minmaxps256_mask (
 		  (__v8sf) __A, (__v8sf) __B, __C,
 		  (__v8sf) (__m256) _mm256_undefined_ps (),
-		  (__mmask8) -1, _MM_FROUND_CUR_DIRECTION);
+		  (__mmask8) -1);
 }
 
 extern __inline __m256
@@ -347,50 +478,96 @@ __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_mask_minmax_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B,
 		       const int __C)
 {
-  return (__m256) __builtin_ia32_minmaxps256_mask_round (
+  return (__m256) __builtin_ia32_minmaxps256_mask (
 		  (__v8sf) __A, (__v8sf) __B, __C, (__v8sf) __W,
-		  (__mmask8) __U, _MM_FROUND_CUR_DIRECTION);
+		  (__mmask8) __U);
 }
 
 extern __inline __m256
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm256_maskz_minmax_ps (__mmask8 __U, __m256 __A, __m256 __B, const int __C)
 {
-  return (__m256) __builtin_ia32_minmaxps256_mask_round (
+  return (__m256) __builtin_ia32_minmaxps256_mask (
 		  (__v8sf) __A, (__v8sf) __B, __C,
 		  (__v8sf) (__m256) _mm256_setzero_ps (),
-		  (__mmask8) __U, _MM_FROUND_CUR_DIRECTION);
+		  (__mmask8) __U);
 }
 
-extern __inline __m256
+extern __inline __m512
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_minmax_round_ps (__m256 __A, __m256 __B, const int __C, const int __R)
+_mm512_minmax_ps (__m512 __A, __m512 __B, const int __C)
 {
-  return (__m256) __builtin_ia32_minmaxps256_mask_round (
-		  (__v8sf) __A, (__v8sf) __B, __C,
-		  (__v8sf) (__m256) _mm256_undefined_ps (),
-		  (__mmask8) -1, __R);
+  return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
+							 (__v16sf) __B,
+							 __C,
+							 (__v16sf)
+							 _mm512_undefined_ps (),
+							 (__mmask16) -1,
+							 _MM_FROUND_CUR_DIRECTION);
 }
 
-extern __inline __m256
+extern __inline __m512
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_minmax_round_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B,
-			     const int __C, const int __R)
+_mm512_mask_minmax_ps (__m512 __W, __mmask16 __U, __m512 __A,
+		       __m512 __B, const int __C)
 {
-  return (__m256) __builtin_ia32_minmaxps256_mask_round (
-		  (__v8sf) __A, (__v8sf) __B, __C, (__v8sf) __W,
-		  (__mmask8) __U, __R);
+  return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
+							 (__v16sf) __B,
+							 __C,
+							 (__v16sf) __W,
+							 (__mmask16) __U,
+							 _MM_FROUND_CUR_DIRECTION);
 }
 
-extern __inline __m256
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_minmax_ps (__mmask16 __U, __m512 __A, __m512 __B,
+			const int __C)
+{
+  return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
+							 (__v16sf) __B,
+							 __C,
+							 (__v16sf)
+							 _mm512_setzero_ps (),
+							 (__mmask16) __U,
+							 _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_minmax_round_ps (__m512 __A, __m512 __B, const int __C, const int __R)
+{
+  return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
+							 (__v16sf) __B,
+							 __C,
+							 (__v16sf)
+							 _mm512_undefined_ps (),
+							 (__mmask16) -1, __R);
+}
+
+extern __inline __m512
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_minmax_round_ps (__m512 __W, __mmask16 __U, __m512 __A,
+			     __m512 __B, const int __C, const int __R)
+{
+  return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
+							 (__v16sf) __B,
+							 __C,
+							 (__v16sf) __W,
+							 (__mmask16) __U, __R);
+}
+
+extern __inline __m512
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_minmax_round_ps (__mmask8 __U, __m256 __A, __m256 __B,
+_mm512_maskz_minmax_round_ps (__mmask16 __U, __m512 __A, __m512 __B,
 			      const int __C, const int __R)
 {
-  return (__m256) __builtin_ia32_minmaxps256_mask_round (
-		  (__v8sf) __A, (__v8sf) __B, __C,
-		  (__v8sf) (__m256) _mm256_setzero_ps (),
-		  (__mmask8) __U, __R);
+  return (__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) __A,
+							 (__v16sf) __B,
+							 __C,
+							 (__v16sf)
+							 _mm512_setzero_ps (),
+							 (__mmask16) __U, __R);
 }
 
 extern __inline __m128d
@@ -626,51 +803,74 @@ _mm_maskz_minmax_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
 }
 
 #else
-#define _mm_minmax_nepbh(A, B, C)					      \
-  ((__m128bh) __builtin_ia32_minmaxnepbf16128_mask ((__v8bf) (A),	      \
-						    (__v8bf) (B), 	      \
-						    (int) (C),		      \
-						    (__v8bf) (__m128bh)	      \
-						    _mm_setzero_si128 (),     \
-						    (__mmask8) (-1)))
+#define _mm_minmax_pbh(A, B, C)						      \
+  ((__m128bh) __builtin_ia32_minmaxbf16128_mask ((__v8bf) (A),		      \
+						 (__v8bf) (B),		      \
+						 (int) (C),		      \
+						 (__v8bf) (__m128bh)	      \
+						 _mm_setzero_si128 (),	      \
+						 (__mmask8) (-1)))
 
-#define _mm_mask_minmax_nepbh(W, U, A, B, C)				      \
-  ((__m128bh) __builtin_ia32_minmaxnepbf16128_mask ((__v8bf) (A),	      \
-						    (__v8bf) (B),	      \
-						    (int) (C),		      \
-						    (__v8bf) (__m128bh) (W),  \
-						    (__mmask8) (U)))
+#define _mm_mask_minmax_pbh(W, U, A, B, C)				      \
+  ((__m128bh) __builtin_ia32_minmaxbf16128_mask ((__v8bf) (A),		      \
+						 (__v8bf) (B),		      \
+						 (int) (C),		      \
+						 (__v8bf) (__m128bh) (W),     \
+						 (__mmask8) (U)))
 
-#define _mm_maskz_minmax_nepbh(U, A, B, C)				      \
-  ((__m128bh) __builtin_ia32_minmaxnepbf16128_mask ((__v8bf) (A),	      \
-						    (__v8bf) (B),	      \
-						    (int) (C),		      \
-						    (__v8bf) (__m128bh)	      \
-						    _mm_setzero_si128 (),     \
-						    (__mmask8) (U)))
+#define _mm_maskz_minmax_pbh(U, A, B, C)				      \
+  ((__m128bh) __builtin_ia32_minmaxbf16128_mask ((__v8bf) (A),		      \
+						 (__v8bf) (B),		      \
+						 (int) (C),		      \
+						 (__v8bf) (__m128bh)	      \
+						 _mm_setzero_si128 (),	      \
+						 (__mmask8) (U)))
 
-#define _mm256_minmax_nepbh(A, B, C)					      \
-  ((__m256bh) __builtin_ia32_minmaxnepbf16256_mask ((__v16bf) (A),	      \
-						    (__v16bf) (B),	      \
-						    (int) (C),		      \
-						    (__v16bf) (__m256bh)      \
-						    _mm256_setzero_si256 (),  \
-						    (__mmask16) (-1)))
+#define _mm256_minmax_pbh(A, B, C)					      \
+  ((__m256bh) __builtin_ia32_minmaxbf16256_mask ((__v16bf) (A),		      \
+						 (__v16bf) (B),		      \
+						 (int) (C),		      \
+						 (__v16bf) (__m256bh)	      \
+						 _mm256_setzero_si256 (),     \
+						 (__mmask16) (-1)))
 
-#define _mm256_mask_minmax_nepbh(W, U, A, B, C)				      \
-  ((__m256bh) __builtin_ia32_minmaxnepbf16256_mask ((__v16bf) (A),	      \
-  						    (__v16bf) (B),	      \
-						    (int) (C),		      \
-						    (__v16bf) (__m256bh) (W), \
-						    (__mmask16) (U)))
+#define _mm256_mask_minmax_pbh(W, U, A, B, C)				      \
+  ((__m256bh) __builtin_ia32_minmaxbf16256_mask ((__v16bf) (A),		      \
+						 (__v16bf) (B),		      \
+						 (int) (C),		      \
+						 (__v16bf) (__m256bh) (W),    \
+						 (__mmask16) (U)))
 
-#define _mm256_maskz_minmax_nepbh(U, A, B, C)				      \
-  ((__m256bh) __builtin_ia32_minmaxnepbf16256_mask ((__v16bf) (A),	      \
-						    (__v16bf) (B),	      \
-						    (int) (C),		      \
-						    (__v16bf) (__m256bh)      \
-						    _mm256_setzero_si256 (),  \
-						    (__mmask16) (U)))
+#define _mm256_maskz_minmax_pbh(U, A, B, C)				      \
+  ((__m256bh) __builtin_ia32_minmaxbf16256_mask ((__v16bf) (A),		      \
+						 (__v16bf) (B),		      \
+						 (int) (C),		      \
+						 (__v16bf) (__m256bh)	      \
+						 _mm256_setzero_si256 (),     \
+						 (__mmask16) (U)))
+
+#define _mm512_minmax_pbh(A, B, C)					      \
+  ((__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) (A),		      \
+						 (__v32bf) (B),		      \
+						 (int) (C),		      \
+						 (__v32bf) (__m512bh)	      \
+						 _mm512_setzero_si512 (),     \
+						 (__mmask32) (-1)))
+
+#define _mm512_mask_minmax_pbh(W, U, A, B, C)				      \
+  ((__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) (A),		      \
+						 (__v32bf) (B), 	      \
+						 (int) (C),		      \
+						 (__v32bf) (__m512bh) (W),    \
+						 (__mmask32) (U)))
+
+#define _mm512_maskz_minmax_pbh(U, A, B, C)				      \
+  ((__m512bh) __builtin_ia32_minmaxbf16512_mask ((__v32bf) (A),		      \
+						 (__v32bf) (B),		      \
+						 (int) (C),		      \
+						 (__v32bf) (__m512bh)	      \
+						 _mm512_setzero_si512 (),     \
+						 (__mmask32) (U)))
 
 #define _mm_minmax_pd(A, B, C)						      \
   ((__m128d) __builtin_ia32_minmaxpd128_mask ((__v2df) (A),		      \
@@ -696,55 +896,78 @@ _mm_maskz_minmax_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
 	  				      (__mmask8) (U)))
 
 #define _mm256_minmax_pd(A, B, C)					      \
-  ((__m256d) __builtin_ia32_minmaxpd256_mask_round ((__v4df) (A),	      \
-						    (__v4df) (B),	      \
+  ((__m256d) __builtin_ia32_minmaxpd256_mask ((__v4df) (A),		      \
+					      (__v4df) (B),		      \
+					      (int) (C),		      \
+					      (__v4df) (__m256d)	      \
+					      _mm256_undefined_pd (),	      \
+					      (__mmask8) (-1)))
+
+#define _mm256_mask_minmax_pd(W, U, A, B, C)				      \
+  ((__m256d) __builtin_ia32_minmaxpd256_mask ((__v4df) (A),		      \
+					      (__v4df) (B),		      \
+					      (int) (C),		      \
+					      (__v4df) (__m256d) (W),	      \
+					      (__mmask8) (U)))
+
+#define _mm256_maskz_minmax_pd(U, A, B, C)				      \
+  ((__m256d) __builtin_ia32_minmaxpd256_mask ((__v4df) (A),		      \
+					      (__v4df) (B),		      \
+					      (int) (C),		      \
+					      (__v4df) (__m256d)	      \
+					      _mm256_setzero_pd (),	      \
+					      (__mmask8) (U)))
+
+#define _mm512_minmax_pd(A, B, C)					      \
+  ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A),	      \
+						    (__v8df) (B),	      \
 						    (int) (C),		      \
-						    (__v4df) (__m256d)	      \
-						    _mm256_undefined_pd (),   \
+						    (__v8df) (__m512d)	      \
+						    _mm512_undefined_pd (),   \
 						    (__mmask8) (-1),	      \
 						    _MM_FROUND_CUR_DIRECTION))
 
-#define _mm256_mask_minmax_pd(W, U, A, B, C)				      \
-  ((__m256d) __builtin_ia32_minmaxpd256_mask_round ((__v4df) (A),	      \
-						    (__v4df) (B),	      \
+#define _mm512_mask_minmax_pd(W, U, A, B, C)				      \
+  ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A),	      \
+						    (__v8df) (B),	      \
 						    (int) (C),		      \
-						    (__v4df) (__m256d) (W),   \
+						    (__v8df) (__m512d) (W),   \
 						    (__mmask8) (U),	      \
 						    _MM_FROUND_CUR_DIRECTION))
 
-#define _mm256_maskz_minmax_pd(U, A, B, C)				      \
-  ((__m256d) __builtin_ia32_minmaxpd256_mask_round ((__v4df) (A),	      \
-						    (__v4df) (B),	      \
+#define _mm512_maskz_minmax_pd(U, A, B, C)				      \
+  ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A),	      \
+						    (__v8df) (B),	      \
 						    (int) (C),		      \
-						    (__v4df) (__m256d)	      \
-						    _mm256_setzero_pd (),     \
+						    (__v8df) (__m512d)	      \
+						    _mm512_setzero_pd (),     \
 						    (__mmask8) (U),	      \
 						    _MM_FROUND_CUR_DIRECTION))
 
-#define _mm256_minmax_round_pd(A, B, C, R)				      \
-  ((__m256d) __builtin_ia32_minmaxpd256_mask_round ((__v4df) (A),	      \
-						    (__v4df) (B),	      \
+#define _mm512_minmax_round_pd(A, B, C, R)				      \
+  ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A),	      \
+						    (__v8df) (B),	      \
 						    (int) (C),		      \
-						    (__v4df) (__m256d)	      \
-						    _mm256_undefined_pd (),   \
+						    (__v8df) (__m512d)	      \
+						    _mm512_undefined_pd (),   \
 						    (__mmask8) (-1),	      \
 						    (int) (R)))
 
-#define _mm256_mask_minmax_round_pd(W, U, A, B, C, R)			      \
-  ((__m256d) __builtin_ia32_minmaxpd256_mask_round ((__v4df) (A),	      \
-						    (__v4df) (B),	      \
+#define _mm512_mask_minmax_round_pd(W, U, A, B, C, R)			      \
+  ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A),	      \
+						    (__v8df) (B),	      \
 						    (int) (C),		      \
-						    (__v4df) (__m256d) (W),   \
+						    (__v8df) (__m512d) (W),   \
 						    (__mmask8) (U),	      \
 						    (int) (R)))
 
-#define _mm256_maskz_minmax_round_pd(U, A, B, C, R)			      \
-  ((__m256d) __builtin_ia32_minmaxpd256_mask_round ((__v4df) (A),	      \
-						    (__v4df) (B),	      \
+#define _mm512_maskz_minmax_round_pd(U, A, B, C, R)			      \
+  ((__m512d) __builtin_ia32_minmaxpd512_mask_round ((__v8df) (A),	      \
+						    (__v8df) (B),	      \
 						    (int) (C),		      \
-						    (__v4df) (__m256d)	      \
-						    _mm256_setzero_pd (),     \
-						    (__mmask8) (U),	      \
+						    (__v8df) (__m512d)	      \
+						    _mm512_setzero_pd (),     \
+						    (__mmask8) (U), 	      \
 						    (int) (R)))
 
 #define _mm_minmax_ph(A, B, C)						      \
@@ -771,55 +994,78 @@ _mm_maskz_minmax_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
 					      (__mmask8) (U)))
 
 #define _mm256_minmax_ph(A, B, C)					      \
-  ((__m256h) __builtin_ia32_minmaxph256_mask_round ((__v16hf) (A),	      \
-						    (__v16hf) (B),	      \
+  ((__m256h) __builtin_ia32_minmaxph256_mask ((__v16hf) (A),		      \
+					      (__v16hf) (B),		      \
+					      (int) (C),		      \
+					      (__v16hf) (__m256h)	      \
+					      _mm256_undefined_ph (),	      \
+					      (__mmask16) (-1)))
+
+#define _mm256_mask_minmax_ph(W, U, A, B, C)				      \
+  ((__m256h) __builtin_ia32_minmaxph256_mask ((__v16hf) (A),		      \
+					      (__v16hf) (B),		      \
+					      (int) (C),		      \
+					      (__v16hf) (__m256h) (W),	      \
+					      (__mmask16) (U)))
+
+#define _mm256_maskz_minmax_ph(U, A, B, C)				      \
+  ((__m256h) __builtin_ia32_minmaxph256_mask ((__v16hf) (A),		      \
+					      (__v16hf) (B),		      \
+					      (int) (C),		      \
+					      (__v16hf) (__m256h)	      \
+					      _mm256_setzero_ph (),	      \
+					      (__mmask16) (U)))
+
+#define _mm512_minmax_ph(A, B, C)					      \
+  ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A),	      \
+						    (__v32hf) (B),	      \
 						    (int) (C),		      \
-						    (__v16hf) (__m256h)	      \
-						    _mm256_undefined_ph (),   \
-						    (__mmask16) (-1),	      \
+						    (__v32hf) (__m512h)	      \
+						    _mm512_undefined_ph (),   \
+						    (__mmask32) (-1),	      \
 						    _MM_FROUND_CUR_DIRECTION))
 
-#define _mm256_mask_minmax_ph(W, U, A, B, C)				      \
-  ((__m256h) __builtin_ia32_minmaxph256_mask_round ((__v16hf) (A),	      \
-						    (__v16hf) (B),	      \
+#define _mm512_mask_minmax_ph(W, U, A, B, C)				      \
+  ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A),	      \
+						    (__v32hf) (B),	      \
 						    (int) (C),		      \
-						    (__v16hf) (__m256h) (W),  \
-						    (__mmask16) (U),	      \
+						    (__v32hf) (__m512h) (W),  \
+						    (__mmask32) (U),	      \
 						    _MM_FROUND_CUR_DIRECTION))
 
-#define _mm256_maskz_minmax_ph(U, A, B, C)				      \
-  ((__m256h) __builtin_ia32_minmaxph256_mask_round ((__v16hf) (A),	      \
-						    (__v16hf) (B),	      \
+#define _mm512_maskz_minmax_ph(U, A, B, C)				      \
+  ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A),	      \
+						    (__v32hf) (B),	      \
 						    (int) (C),		      \
-						    (__v16hf) (__m256h)	      \
-						    _mm256_setzero_ph (),     \
-						    (__mmask16) (U),	      \
+						    (__v32hf) (__m512h)	      \
+						    _mm512_setzero_ph (),     \
+						    (__mmask32) (U),	      \
 						    _MM_FROUND_CUR_DIRECTION))
 
-#define _mm256_minmax_round_ph(A, B, C, R)				      \
-  ((__m256h) __builtin_ia32_minmaxph256_mask_round ((__v16hf) (A),	      \
-						    (__v16hf) (B),	      \
+#define _mm512_minmax_round_ph(A, B, C, R)				      \
+  ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A),	      \
+						    (__v32hf) (B),	      \
 						    (int) (C),		      \
-						    (__v16hf) (__m256h)	      \
-						    _mm256_undefined_ph (),   \
-						    (__mmask16) (-1),	      \
+						    (__v32hf) (__m512h)	      \
+						    _mm512_undefined_ph (),   \
+						    (__mmask32) (-1),	      \
 						    (int) (R)))
 
-#define _mm256_mask_minmax_round_ph(W, U, A, B, C, R)			      \
-  ((__m256h) __builtin_ia32_minmaxph256_mask_round ((__v16hf) (A),	      \
-						    (__v16hf) (B),	      \
+#define _mm512_mask_minmax_round_ph(W, U, A, B, C, R)			      \
+  ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A),	      \
+						    (__v32hf) (B),	      \
 						    (int) (C),		      \
-						    (__v16hf) (__m256h) (W),  \
-						    (__mmask16) (U),	      \
+						    (__v32hf) (__m512h) (W),  \
+						    (__mmask32) (U),	      \
 						    (int) (R)))
 
-#define _mm256_maskz_minmax_round_ph(U, A, B, C, R)			      \
-  ((__m256h) __builtin_ia32_minmaxph256_mask_round ((__v16hf) (A),	      \
-						    (__v16hf) (B),	      \
+#define _mm512_maskz_minmax_round_ph(U, A, B, C, R)			      \
+  ((__m512h) __builtin_ia32_minmaxph512_mask_round ((__v32hf) (A),	      \
+						    (__v32hf) (B),	      \
 						    (int) (C),		      \
-						    (__v16hf) (__m256h)	      \
-						    _mm256_setzero_ph (),     \
-						    (__mmask16) (U),	      \
+						    (__v32hf) (__m512h)	      \
+						    _mm512_setzero_ph (),     \
+						    (__mmask32) (U),	      \
 						    (int) (R)))
 
 #define _mm_minmax_ps(A, B, C)						      \
@@ -846,55 +1092,78 @@ _mm_maskz_minmax_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
 					     (__mmask8) (U)))
 
 #define _mm256_minmax_ps(A, B, C)					      \
-  ((__m256) __builtin_ia32_minmaxps256_mask_round ((__v8sf) (A),	      \
-						   (__v8sf) (B),	      \
+  ((__m256) __builtin_ia32_minmaxps256_mask ((__v8sf) (A),		      \
+					     (__v8sf) (B),		      \
+					     (int) (C),			      \
+					     (__v8sf) (__m256)		      \
+					     _mm256_undefined_ps (),	      \
+					     (__mmask8) (-1)))
+
+#define _mm256_mask_minmax_ps(W, U, A, B, C)				      \
+  ((__m256) __builtin_ia32_minmaxps256_mask ((__v8sf) (A),		      \
+					     (__v8sf) (B),		      \
+					     (int) (C),			      \
+					     (__v8sf) (__m256) (W),	      \
+					     (__mmask8) (U)))
+
+#define _mm256_maskz_minmax_ps(U, A, B, C)				      \
+  ((__m256) __builtin_ia32_minmaxps256_mask ((__v8sf) (A),		      \
+					     (__v8sf) (B),		      \
+					     (int) (C),			      \
+					     (__v8sf) (__m256)		      \
+					     _mm256_setzero_ps (),	      \
+					     (__mmask8) (U)))
+
+#define _mm512_minmax_ps(A, B, C)					      \
+  ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A),	      \
+						   (__v16sf) (B),	      \
 						   (int) (C),		      \
-						   (__v8sf) (__m256)	      \
-						   _mm256_undefined_ps (),    \
-						   (__mmask8) (-1),	      \
+						   (__v16sf) (__m512)	      \
+						   _mm512_undefined_ps (),    \
+						   (__mmask16) (-1),	      \
 						   _MM_FROUND_CUR_DIRECTION))
 
-#define _mm256_mask_minmax_ps(W, U, A, B, C)				      \
-  ((__m256) __builtin_ia32_minmaxps256_mask_round ((__v8sf) (A),	      \
-						   (__v8sf) (B),	      \
+#define _mm512_mask_minmax_ps(W, U, A, B, C)				      \
+  ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A),	      \
+						   (__v16sf) (B),	      \
 						   (int) (C),		      \
-						   (__v8sf) (__m256) (W),     \
-						   (__mmask8) (U),	      \
+						   (__v16sf) (__m512) (W),    \
+						   (__mmask16) (U),	      \
 						   _MM_FROUND_CUR_DIRECTION))
 
-#define _mm256_maskz_minmax_ps(U, A, B, C)				      \
-  ((__m256) __builtin_ia32_minmaxps256_mask_round ((__v8sf) (A),	      \
-						   (__v8sf) (B),	      \
+#define _mm512_maskz_minmax_ps(U, A, B, C)				      \
+  ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A),	      \
+						   (__v16sf) (B),	      \
 						   (int) (C),		      \
-						   (__v8sf) (__m256)	      \
-						   _mm256_setzero_ps (),      \
-						   (__mmask8) (U),	      \
+						   (__v16sf) (__m512)	      \
+						   _mm512_setzero_ps (),      \
+						   (__mmask16) (U),	      \
 						   _MM_FROUND_CUR_DIRECTION))
 
-#define _mm256_minmax_round_ps(A, B, C, R)				      \
-  ((__m256) __builtin_ia32_minmaxps256_mask_round ((__v8sf) (A),	      \
-						   (__v8sf) (B),	      \
+#define _mm512_minmax_round_ps(A, B, C, R)				      \
+  ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A),	      \
+						   (__v16sf) (B),	      \
 						   (int) (C),		      \
-						   (__v8sf) (__m256)	      \
-						   _mm256_undefined_ps (),    \
-						   (__mmask8) (-1),	      \
+						   (__v16sf) (__m512)	      \
+						   _mm512_undefined_ps (),    \
+						   (__mmask16) (-1),	      \
 						   (int) (R)))
 
-#define _mm256_mask_minmax_round_ps(W, U, A, B, C, R)			      \
-  ((__m256) __builtin_ia32_minmaxps256_mask_round ((__v8sf) (A),	      \
-						   (__v8sf) (B),	      \
+#define _mm512_mask_minmax_round_ps(W, U, A, B, C, R)			      \
+  ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A),	      \
+						   (__v16sf) (B),	      \
 						   (int) (C),		      \
-						   (__v8sf) (__m256) (W),     \
-						   (__mmask8) (U),	      \
+						   (__v16sf) (__m512) (W),    \
+						   (__mmask16) (U),	      \
 						   (int) (R)))
 
-#define _mm256_maskz_minmax_round_ps(U, A, B, C, R)			      \
-  ((__m256) __builtin_ia32_minmaxps256_mask_round ((__v8sf) (A),	      \
-						   (__v8sf) (B),	      \
+#define _mm512_maskz_minmax_round_ps(U, A, B, C, R)			      \
+  ((__m512) __builtin_ia32_minmaxps512_mask_round ((__v16sf) (A), 	      \
+						   (__v16sf) (B),	      \
 						   (int) (C),		      \
-						   (__v8sf) (__m256)	      \
-						   _mm256_setzero_ps (),      \
-						   (__mmask8) (U),	      \
+						   (__v16sf) (__m512)	      \
+						   _mm512_setzero_ps (),      \
+						   (__mmask16) (U),	      \
 						   (int) (R)))
 
 #define _mm_minmax_round_sd(A, B, C, R)					      \
@@ -1055,9 +1324,9 @@ _mm_maskz_minmax_round_ss (__mmask8 __U, __m128 __A, __m128 __B,
 
 #endif
 
-#ifdef __DISABLE_AVX10_2_256__
-#undef __DISABLE_AVX10_2_256__
+#ifdef __DISABLE_AVX10_2__
+#undef __DISABLE_AVX10_2__
 #pragma GCC pop_options
-#endif /* __DISABLE_AVX10_2_256__ */
+#endif /* __DISABLE_AVX10_2__ */
 
 #endif /* _AVX10_2MINMAXINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx10_2roundingintrin.h b/gcc/config/i386/avx10_2roundingintrin.h
deleted file mode 100644
index c7146e3..0000000
--- a/gcc/config/i386/avx10_2roundingintrin.h
+++ /dev/null
@@ -1,6433 +0,0 @@
-/* Copyright (C) 2024 Free Software Foundation, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify
-   it under the terms of the GNU General Public License as published by
-   the Free Software Foundation; either version 3, or (at your option)
-   any later version.
-
-   GCC is distributed in the hope that it will be useful,
-   but WITHOUT ANY WARRANTY; without even the implied warranty of
-   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-   GNU General Public License for more details.
-
-   Under Section 7 of GPL version 3, you are granted additional
-   permissions described in the GCC Runtime Library Exception, version
-   3.1, as published by the Free Software Foundation.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef _IMMINTRIN_H_INCLUDED
-#error "Never use <avx10_2roundingintrin.h> directly; include <immintrin.h> instead."
-#endif
-
-#ifndef _AVX10_2ROUNDINGINTRIN_H_INCLUDED
-#define _AVX10_2ROUNDINGINTRIN_H_INCLUDED
-
-#ifndef __AVX10_2_256__
-#pragma GCC push_options
-#pragma GCC target("avx10.2-256")
-#define __DISABLE_AVX10_2_256__
-#endif /* __AVX10_2_256__ */
-
-#ifdef  __OPTIMIZE__
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_add_round_pd (__m256d __A, __m256d __B, const int __R)
-{
-  return (__m256d) __builtin_ia32_addpd256_mask_round ((__v4df) __A,
-						       (__v4df) __B,
-						       (__v4df)
-						       _mm256_undefined_pd (),
-						       (__mmask8) -1,
-						       __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_add_round_pd (__m256d __W, __mmask8 __U, __m256d __A,
-			  __m256d __B, const int __R)
-{
-  return (__m256d) __builtin_ia32_addpd256_mask_round ((__v4df) __A,
-						       (__v4df) __B,
-						       (__v4df) __W,
-						       (__mmask8) __U,
-						       __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_add_round_pd (__mmask8 __U, __m256d __A, __m256d __B,
-			   const int __R)
-{
-  return (__m256d) __builtin_ia32_addpd256_mask_round ((__v4df) __A,
-						       (__v4df) __B,
-						       (__v4df)
-						       _mm256_setzero_pd (),
-						       (__mmask8) __U,
-						       __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_add_round_ph (__m256h __A, __m256h __B, const int __R)
-{
-  return (__m256h) __builtin_ia32_addph256_mask_round ((__v16hf) __A,
-						       (__v16hf) __B,
-						       (__v16hf)
-						       _mm256_undefined_ph (),
-						       (__mmask16) -1,
-						       __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_add_round_ph (__m256h __W, __mmask16 __U, __m256h __A,
-			  __m256h __B, const int __R)
-{
-  return (__m256h) __builtin_ia32_addph256_mask_round ((__v16hf) __A,
-						       (__v16hf) __B,
-						       (__v16hf) __W,
-						       (__mmask16) __U,
-						       __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_add_round_ph (__mmask16 __U, __m256h __A, __m256h __B,
-			   const int __R)
-{
-  return (__m256h) __builtin_ia32_addph256_mask_round ((__v16hf) __A,
-						       (__v16hf) __B,
-						       (__v16hf)
-						       _mm256_setzero_ph (),
-						       (__mmask16) __U,
-						       __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_add_round_ps (__m256 __A, __m256 __B, const int __R)
-{
-  return (__m256) __builtin_ia32_addps256_mask_round ((__v8sf) __A,
-						      (__v8sf) __B,
-						      (__v8sf)
-						      _mm256_undefined_ps (),
-						      (__mmask8) -1,
-						      __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_add_round_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B,
-			  const int __R)
-{
-  return (__m256) __builtin_ia32_addps256_mask_round ((__v8sf) __A,
-						      (__v8sf) __B,
-						      (__v8sf) __W,
-						      (__mmask8) __U,
-						      __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_add_round_ps (__mmask8 __U, __m256 __A, __m256 __B,
-			   const int __R)
-{
-  return (__m256) __builtin_ia32_addps256_mask_round ((__v8sf) __A,
-						      (__v8sf) __B,
-						      (__v8sf)
-						      _mm256_setzero_ps (),
-						      (__mmask8) __U,
-						      __R);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmp_round_pd_mask (__m256d __A, __m256d __B, const int __C,
-			  const int __R)
-{
-  return (__mmask8) __builtin_ia32_cmppd256_mask_round ((__v4df) __A,
-							(__v4df) __B,
-							__C,
-							(__mmask8) -1,
-							__R);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmp_round_pd_mask (__mmask8 __U, __m256d __A, __m256d __B,
-			       const int __C, const int __R)
-{
-  return (__mmask8) __builtin_ia32_cmppd256_mask_round ((__v4df) __A,
-							(__v4df) __B,
-							__C,
-							(__mmask8) __U,
-							__R);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmp_round_ph_mask (__m256h __A, __m256h __B, const int __C,
-			  const int __R)
-{
-  return (__mmask16) __builtin_ia32_cmpph256_mask_round ((__v16hf) __A,
-							 (__v16hf) __B,
-							 __C,
-							 (__mmask16) -1,
-							 __R);
-}
-
-extern __inline __mmask16
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmp_round_ph_mask (__mmask16 __U, __m256h __A, __m256h __B,
-			       const int __C, const int __R)
-{
-  return (__mmask16) __builtin_ia32_cmpph256_mask_round ((__v16hf) __A,
-							 (__v16hf) __B,
-							 __C,
-							 (__mmask16) __U,
-							 __R);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cmp_round_ps_mask (__m256 __A, __m256 __B, const int __C, const int __R)
-{
-  return (__mmask8) __builtin_ia32_cmpps256_mask_round ((__v8sf) __A,
-							(__v8sf) __B,
-							__C,
-							(__mmask8) -1,
-							__R);
-}
-
-extern __inline __mmask8
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cmp_round_ps_mask (__mmask8 __U, __m256 __A, __m256 __B,
-			       const int __C, const int __R)
-{
-  return (__mmask8) __builtin_ia32_cmpps256_mask_round ((__v8sf) __A,
-							(__v8sf) __B,
-							__C,
-							(__mmask8) __U,
-							__R);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvt_roundepi32_ph (__m256i __A, const int __R)
-{
-  return (__m128h) __builtin_ia32_vcvtdq2ph256_mask_round ((__v8si) __A,
-							   (__v8hf)
-							   _mm_setzero_ph (),
-							   (__mmask8) -1,
-							   __R);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvt_roundepi32_ph (__m128h __W, __mmask8 __U, __m256i __A,
-			       const int __R)
-{
-  return (__m128h) __builtin_ia32_vcvtdq2ph256_mask_round ((__v8si) __A,
-							   (__v8hf) __W,
-							   (__mmask8) __U,
-							   __R);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvt_roundepi32_ph (__mmask8 __U, __m256i __A, const int __R)
-{
-  return (__m128h) __builtin_ia32_vcvtdq2ph256_mask_round ((__v8si) __A,
-							   (__v8hf)
-							   _mm_setzero_ph (),
-							   (__mmask8) __U,
-							   __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvt_roundepi32_ps (__m256i __A, const int __R)
-{
-  return (__m256) __builtin_ia32_cvtdq2ps256_mask_round ((__v8si) __A,
-							 (__v8sf)
-							 _mm256_undefined_ps (),
-							 (__mmask8) -1,
-							 __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvt_roundepi32_ps (__m256 __W, __mmask8 __U, __m256i __A,
-			       const int __R)
-{
-  return (__m256) __builtin_ia32_cvtdq2ps256_mask_round ((__v8si) __A,
-							 (__v8sf) __W,
-							 (__mmask8) __U,
-							 __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvt_roundepi32_ps (__mmask8 __U, __m256i __A, const int __R)
-{
-  return (__m256) __builtin_ia32_cvtdq2ps256_mask_round ((__v8si) __A,
-							 (__v8sf)
-							 _mm256_setzero_ps (),
-							 (__mmask8) __U,
-							 __R);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvt_roundpd_ph (__m256d __A, const int __R)
-{
-  return (__m128h) __builtin_ia32_vcvtpd2ph256_mask_round ((__v4df) __A,
-							   (__v8hf)
-							   _mm_setzero_ph (),
-							   (__mmask8) -1,
-							   __R);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvt_roundpd_ph (__m128h __W, __mmask8 __U, __m256d __A,
-			    const int __R)
-{
-  return (__m128h) __builtin_ia32_vcvtpd2ph256_mask_round ((__v4df) __A,
-							   (__v8hf) __W,
-							   (__mmask8) __U,
-							   __R);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvt_roundpd_ph (__mmask8 __U, __m256d __A, const int __R)
-{
-  return (__m128h) __builtin_ia32_vcvtpd2ph256_mask_round ((__v4df) __A,
-							   (__v8hf)
-							   _mm_setzero_ph (),
-							   (__mmask8) __U,
-							   __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvt_roundpd_ps (__m256d __A, const int __R)
-{
-  return (__m128) __builtin_ia32_cvtpd2ps256_mask_round ((__v4df) __A,
-							 (__v4sf)
-							 _mm_undefined_ps (),
-							 (__mmask8) -1,
-							 __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvt_roundpd_ps (__m128 __W, __mmask8 __U, __m256d __A,
-			    const int __R)
-{
-  return (__m128) __builtin_ia32_cvtpd2ps256_mask_round ((__v4df) __A,
-							 (__v4sf) __W,
-							 (__mmask8) __U,
-							 __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvt_roundpd_ps (__mmask8 __U, __m256d __A, const int __R)
-{
-  return (__m128) __builtin_ia32_cvtpd2ps256_mask_round ((__v4df) __A,
-							 (__v4sf)
-							 _mm_setzero_ps (),
-							 (__mmask8) __U,
-							 __R);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvt_roundpd_epi32 (__m256d __A, const int __R)
-{
-  return
-    (__m128i) __builtin_ia32_cvtpd2dq256_mask_round ((__v4df) __A,
-						     (__v4si)
-						     _mm_undefined_si128 (),
-						     (__mmask8) -1,
-						     __R);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvt_roundpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A,
-			       const int __R)
-{
-  return (__m128i) __builtin_ia32_cvtpd2dq256_mask_round ((__v4df) __A,
-							  (__v4si) __W,
-							  (__mmask8) __U,
-							  __R);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvt_roundpd_epi32 (__mmask8 __U, __m256d __A, const int __R)
-{
-  return (__m128i) __builtin_ia32_cvtpd2dq256_mask_round ((__v4df) __A,
-							  (__v4si)
-							  _mm_setzero_si128 (),
-							  (__mmask8) __U,
-							  __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvt_roundpd_epi64 (__m256d __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_cvtpd2qq256_mask_round ((__v4df) __A,
-						     (__v4di)
-						     _mm256_setzero_si256 (),
-						     (__mmask8) -1,
-						     __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvt_roundpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A,
-			       const int __R)
-{
-  return (__m256i) __builtin_ia32_cvtpd2qq256_mask_round ((__v4df) __A,
-							  (__v4di) __W,
-							  (__mmask8) __U,
-							  __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvt_roundpd_epi64 (__mmask8 __U, __m256d __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_cvtpd2qq256_mask_round ((__v4df) __A,
-						     (__v4di)
-						     _mm256_setzero_si256 (),
-						     (__mmask8) __U,
-						     __R);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvt_roundpd_epu32 (__m256d __A, const int __R)
-{
-  return
-    (__m128i) __builtin_ia32_cvtpd2udq256_mask_round ((__v4df) __A,
-						      (__v4si)
-						      _mm_undefined_si128 (),
-						      (__mmask8) -1,
-						      __R);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvt_roundpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A,
-			       const int __R)
-{
-  return (__m128i) __builtin_ia32_cvtpd2udq256_mask_round ((__v4df) __A,
-							   (__v4si) __W,
-							   (__mmask8) __U,
-							   __R);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvt_roundpd_epu32 (__mmask8 __U, __m256d __A, const int __R)
-{
-  return (__m128i) __builtin_ia32_cvtpd2udq256_mask_round ((__v4df) __A,
-							   (__v4si)
-							   _mm_setzero_si128 (),
-							   (__mmask8) __U,
-							   __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvt_roundpd_epu64 (__m256d __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_cvtpd2uqq256_mask_round ((__v4df) __A,
-						      (__v4di)
-						      _mm256_setzero_si256 (),
-						      (__mmask8) -1,
-						      __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvt_roundpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A,
-			       const int __R)
-{
-  return (__m256i) __builtin_ia32_cvtpd2uqq256_mask_round ((__v4df) __A,
-							   (__v4di) __W,
-							   (__mmask8) __U,
-							   __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvt_roundpd_epu64 (__mmask8 __U, __m256d __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_cvtpd2uqq256_mask_round ((__v4df) __A,
-						      (__v4di)
-						      _mm256_setzero_si256 (),
-						      (__mmask8) __U,
-						      __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvt_roundph_epi32 (__m128h __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_vcvtph2dq256_mask_round ((__v8hf) __A,
-						      (__v8si)
-						      _mm256_setzero_si256 (),
-						      (__mmask8) -1,
-						      __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvt_roundph_epi32 (__m256i __W, __mmask8 __U, __m128h __A,
-			       const int __R)
-{
-  return (__m256i) __builtin_ia32_vcvtph2dq256_mask_round ((__v8hf) __A,
-							   (__v8si) __W,
-							   (__mmask8) __U,
-							   __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvt_roundph_epi32 (__mmask8 __U, __m128h __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_vcvtph2dq256_mask_round ((__v8hf) __A,
-						      (__v8si)
-						      _mm256_setzero_si256 (),
-						      (__mmask8) __U,
-						      __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvt_roundph_pd (__m128h __A, const int __R)
-{
-  return (__m256d) __builtin_ia32_vcvtph2pd256_mask_round ((__v8hf) __A,
-							   (__v4df)
-							   _mm256_setzero_pd (),
-							   (__mmask8) -1,
-							   __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvt_roundph_pd (__m256d __W, __mmask8 __U, __m128h __A,
-			    const int __R)
-{
-  return (__m256d) __builtin_ia32_vcvtph2pd256_mask_round ((__v8hf) __A,
-							   (__v4df) __W,
-							   (__mmask8) __U,
-							   __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvt_roundph_pd (__mmask8 __U, __m128h __A, const int __R)
-{
-  return (__m256d) __builtin_ia32_vcvtph2pd256_mask_round ((__v8hf) __A,
-							   (__v4df)
-							   _mm256_setzero_pd (),
-							   (__mmask8) __U,
-							   __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvt_roundph_ps (__m128h __A, const int __R)
-{
-  return
-    (__m256) __builtin_ia32_vcvtph2ps256_mask_round ((__v8hf) __A,
-						     (__v8sf)
-						     _mm256_undefined_ps (),
-						     (__mmask8) -1,
-						     __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvt_roundph_ps (__m256 __W, __mmask8 __U, __m128h __A,
-			    const int __R)
-{
-  return (__m256) __builtin_ia32_vcvtph2ps256_mask_round ((__v8hf) __A,
-							  (__v8sf) __W,
-							  (__mmask8) __U,
-							  __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvt_roundph_ps (__mmask8 __U, __m128h __A, const int __R)
-{
-  return (__m256) __builtin_ia32_vcvtph2ps256_mask_round ((__v8hf) __A,
-							  (__v8sf)
-							  _mm256_setzero_ps (),
-							  (__mmask8) __U,
-							  __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtx_roundph_ps (__m128h __A, const int __R)
-{
-  return (__m256) __builtin_ia32_vcvtph2psx256_mask_round ((__v8hf) __A,
-							   (__v8sf)
-							   _mm256_setzero_ps (),
-							   (__mmask8) -1,
-							   __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtx_roundph_ps (__m256 __W, __mmask8 __U, __m128h __A,
-			     const int __R)
-{
-  return (__m256) __builtin_ia32_vcvtph2psx256_mask_round ((__v8hf) __A,
-							   (__v8sf) __W,
-							   (__mmask8) __U,
-							   __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtx_roundph_ps (__mmask8 __U, __m128h __A, const int __R)
-{
-  return (__m256) __builtin_ia32_vcvtph2psx256_mask_round ((__v8hf) __A,
-							   (__v8sf)
-							   _mm256_setzero_ps (),
-							   (__mmask8) __U,
-							   __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvt_roundph_epi64 (__m128h __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_vcvtph2qq256_mask_round ((__v8hf) __A,
-						      (__v4di)
-						      _mm256_setzero_si256 (),
-						      (__mmask8) -1,
-						      __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvt_roundph_epi64 (__m256i __W, __mmask8 __U, __m128h __A,
-			       const int __R)
-{
-  return (__m256i) __builtin_ia32_vcvtph2qq256_mask_round ((__v8hf) __A,
-							   (__v4di) __W,
-							   (__mmask8) __U,
-							   __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvt_roundph_epi64 (__mmask8 __U, __m128h __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_vcvtph2qq256_mask_round ((__v8hf) __A,
-						      (__v4di)
-						      _mm256_setzero_si256 (),
-						      (__mmask8) __U,
-						      __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvt_roundph_epu32 (__m128h __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_vcvtph2udq256_mask_round ((__v8hf) __A,
-						       (__v8si)
-						       _mm256_setzero_si256 (),
-						       (__mmask8) -1,
-						       __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvt_roundph_epu32 (__m256i __W, __mmask8 __U, __m128h __A,
-			       const int __R)
-{
-  return (__m256i) __builtin_ia32_vcvtph2udq256_mask_round ((__v8hf) __A,
-							    (__v8si) __W,
-							    (__mmask8) __U,
-							    __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvt_roundph_epu32 (__mmask8 __U, __m128h __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_vcvtph2udq256_mask_round ((__v8hf) __A,
-						       (__v8si)
-						       _mm256_setzero_si256 (),
-						       (__mmask8) __U,
-						       __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvt_roundph_epu64 (__m128h __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_vcvtph2uqq256_mask_round ((__v8hf) __A,
-						       (__v4di)
-						       _mm256_setzero_si256 (),
-						       (__mmask8) -1,
-						       __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvt_roundph_epu64 (__m256i __W, __mmask8 __U, __m128h __A,
-			       const int __R)
-{
-  return (__m256i) __builtin_ia32_vcvtph2uqq256_mask_round ((__v8hf) __A,
-							    (__v4di) __W,
-							    (__mmask8) __U,
-							    __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvt_roundph_epu64 (__mmask8 __U, __m128h __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_vcvtph2uqq256_mask_round ((__v8hf) __A,
-						       (__v4di)
-						       _mm256_setzero_si256 (),
-						       (__mmask8) __U,
-						       __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvt_roundph_epu16 (__m256h __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_vcvtph2uw256_mask_round ((__v16hf) __A,
-						      (__v16hi)
-						      _mm256_undefined_si256 (),
-						      (__mmask16) -1,
-						      __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvt_roundph_epu16 (__m256i __W, __mmask16 __U, __m256h __A,
-			       const int __R)
-{
-  return (__m256i) __builtin_ia32_vcvtph2uw256_mask_round ((__v16hf) __A,
-							   (__v16hi) __W,
-							   (__mmask16) __U,
-							   __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvt_roundph_epu16 (__mmask16 __U, __m256h __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_vcvtph2uw256_mask_round ((__v16hf) __A,
-						      (__v16hi)
-						      _mm256_setzero_si256 (),
-						      (__mmask16) __U,
-						      __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvt_roundph_epi16 (__m256h __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_vcvtph2w256_mask_round ((__v16hf) __A,
-						     (__v16hi)
-						     _mm256_undefined_si256 (),
-						     (__mmask16) -1,
-						     __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvt_roundph_epi16 (__m256i __W, __mmask16 __U, __m256h __A,
-			       const int __R)
-{
-  return (__m256i) __builtin_ia32_vcvtph2w256_mask_round ((__v16hf) __A,
-							  (__v16hi) __W,
-							  (__mmask16) __U,
-							  __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvt_roundph_epi16 (__mmask16 __U, __m256h __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_vcvtph2w256_mask_round ((__v16hf) __A,
-						     (__v16hi)
-						     _mm256_setzero_si256 (),
-						     (__mmask16) __U,
-						     __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvt_roundps_pd (__m128 __A, const int __R)
-{
-  return
-    (__m256d) __builtin_ia32_vcvtps2pd256_mask_round ((__v4sf) __A,
-						      (__v4df)
-						      _mm256_undefined_pd (),
-						      (__mmask8) -1,
-						      __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvt_roundps_pd (__m256d __W, __mmask8 __U, __m128 __A,
-			    const int __R)
-{
-  return (__m256d) __builtin_ia32_vcvtps2pd256_mask_round ((__v4sf) __A,
-							   (__v4df) __W,
-							   (__mmask8) __U,
-							   __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvt_roundps_pd (__mmask8 __U, __m128 __A, const int __R)
-{
-  return (__m256d) __builtin_ia32_vcvtps2pd256_mask_round ((__v4sf) __A,
-							   (__v4df)
-							   _mm256_setzero_pd (),
-							   (__mmask8) __U,
-							   __R);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtx_roundps_ph (__m256 __A, const int __R)
-{
-  return (__m128h) __builtin_ia32_vcvtps2phx256_mask_round ((__v8sf) __A,
-							    (__v8hf)
-							    _mm_setzero_ph (),
-							    (__mmask8) -1,
-							    __R);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtx_roundps_ph (__m128h __W, __mmask8 __U, __m256 __A,
-			     const int __R)
-{
-  return (__m128h) __builtin_ia32_vcvtps2phx256_mask_round ((__v8sf) __A,
-							    (__v8hf) __W,
-							    (__mmask8) __U,
-							    __R);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtx_roundps_ph (__mmask8 __U, __m256 __A, const int __R)
-{
-  return (__m128h) __builtin_ia32_vcvtps2phx256_mask_round ((__v8sf) __A,
-							    (__v8hf)
-							    _mm_setzero_ph (),
-							    (__mmask8) __U,
-							    __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvt_roundps_epi32 (__m256 __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_vcvtps2dq256_mask_round ((__v8sf) __A,
-						      (__v8si)
-						      _mm256_undefined_si256 (),
-						      (__mmask8) -1,
-						      __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvt_roundps_epi32 (__m256i __W, __mmask8 __U, __m256 __A,
-			       const int __R)
-{
-  return (__m256i) __builtin_ia32_vcvtps2dq256_mask_round ((__v8sf) __A,
-							   (__v8si) __W,
-							   (__mmask8) __U,
-							   __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvt_roundps_epi32 (__mmask8 __U, __m256 __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_vcvtps2dq256_mask_round ((__v8sf) __A,
-						      (__v8si)
-						      _mm256_setzero_si256 (),
-						      (__mmask8) __U,
-						      __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvt_roundps_epi64 (__m128 __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_cvtps2qq256_mask_round ((__v4sf) __A,
-						     (__v4di)
-						     _mm256_setzero_si256 (),
-						     (__mmask8) -1,
-						     __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvt_roundps_epi64 (__m256i __W, __mmask8 __U, __m128 __A,
-			       const int __R)
-{
-  return (__m256i) __builtin_ia32_cvtps2qq256_mask_round ((__v4sf) __A,
-							  (__v4di) __W,
-							  (__mmask8) __U,
-							  __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvt_roundps_epi64 (__mmask8 __U, __m128 __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_cvtps2qq256_mask_round ((__v4sf) __A,
-						     (__v4di)
-						     _mm256_setzero_si256 (),
-						     (__mmask8) __U,
-						     __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvt_roundps_epu32 (__m256 __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_cvtps2udq256_mask_round ((__v8sf) __A,
-						      (__v8si)
-						      _mm256_undefined_si256 (),
-						      (__mmask8) -1,
-						      __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvt_roundps_epu32 (__m256i __W, __mmask8 __U, __m256 __A,
-			       const int __R)
-{
-  return (__m256i) __builtin_ia32_cvtps2udq256_mask_round ((__v8sf) __A,
-							   (__v8si) __W,
-							   (__mmask8) __U,
-							   __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvt_roundps_epu32 (__mmask8 __U, __m256 __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_cvtps2udq256_mask_round ((__v8sf) __A,
-						      (__v8si)
-						      _mm256_setzero_si256 (),
-						      (__mmask8) __U,
-						      __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvt_roundps_epu64 (__m128 __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_cvtps2uqq256_mask_round ((__v4sf) __A,
-						      (__v4di)
-						      _mm256_setzero_si256 (),
-						      (__mmask8) -1,
-						      __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvt_roundps_epu64 (__m256i __W, __mmask8 __U, __m128 __A,
-			       const int __R)
-{
-  return (__m256i) __builtin_ia32_cvtps2uqq256_mask_round ((__v4sf) __A,
-							   (__v4di) __W,
-							   (__mmask8) __U,
-							   __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvt_roundps_epu64 (__mmask8 __U, __m128 __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_cvtps2uqq256_mask_round ((__v4sf) __A,
-						      (__v4di)
-						      _mm256_setzero_si256 (),
-						      (__mmask8) __U,
-						      __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvt_roundepi64_pd (__m256i __A, const int __R)
-{
-  return (__m256d) __builtin_ia32_cvtqq2pd256_mask_round ((__v4di) __A,
-							  (__v4df)
-							  _mm256_setzero_pd (),
-							  (__mmask8) -1,
-							  __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvt_roundepi64_pd (__m256d __W, __mmask8 __U, __m256i __A,
-			       const int __R)
-{
-  return (__m256d) __builtin_ia32_cvtqq2pd256_mask_round ((__v4di) __A,
-							  (__v4df) __W,
-							  (__mmask8) __U,
-							  __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvt_roundepi64_pd (__mmask8 __U, __m256i __A, const int __R)
-{
-  return (__m256d) __builtin_ia32_cvtqq2pd256_mask_round ((__v4di) __A,
-							  (__v4df)
-							  _mm256_setzero_pd (),
-							  (__mmask8) __U,
-							  __R);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvt_roundepi64_ph (__m256i __A, const int __R)
-{
-  return (__m128h) __builtin_ia32_vcvtqq2ph256_mask_round ((__v4di) __A,
-							   (__v8hf)
-							   _mm_setzero_ph (),
-							   (__mmask8) -1,
-							   __R);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvt_roundepi64_ph (__m128h __W, __mmask8 __U, __m256i __A,
-			       const int __R)
-{
-  return (__m128h) __builtin_ia32_vcvtqq2ph256_mask_round ((__v4di) __A,
-							   (__v8hf) __W,
-							   (__mmask8) __U,
-							   __R);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvt_roundepi64_ph (__mmask8 __U, __m256i __A, const int __R)
-{
-  return (__m128h) __builtin_ia32_vcvtqq2ph256_mask_round ((__v4di) __A,
-							   (__v8hf)
-							   _mm_setzero_ph (),
-							   (__mmask8) __U,
-							   __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvt_roundepi64_ps (__m256i __A, const int __R)
-{
-  return (__m128) __builtin_ia32_cvtqq2ps256_mask_round ((__v4di) __A,
-							 (__v4sf)
-							 _mm_setzero_ps (),
-							 (__mmask8) -1,
-							 __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvt_roundepi64_ps (__m128 __W, __mmask8 __U, __m256i __A,
-			       const int __R)
-{
-  return (__m128) __builtin_ia32_cvtqq2ps256_mask_round ((__v4di) __A,
-							 (__v4sf) __W,
-							 (__mmask8) __U,
-							 __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvt_roundepi64_ps (__mmask8 __U, __m256i __A, const int __R)
-{
-  return (__m128) __builtin_ia32_cvtqq2ps256_mask_round ((__v4di) __A,
-							 (__v4sf)
-							 _mm_setzero_ps (),
-							 (__mmask8) __U,
-							 __R);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtt_roundpd_epi32 (__m256d __A, const int __R)
-{
-  return
-    (__m128i) __builtin_ia32_cvttpd2dq256_mask_round ((__v4df) __A,
-						      (__v4si)
-						      _mm_undefined_si128 (),
-						      (__mmask8) -1,
-						      __R);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtt_roundpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A,
-				const int __R)
-{
-  return (__m128i) __builtin_ia32_cvttpd2dq256_mask_round ((__v4df) __A,
-							   (__v4si) __W,
-							   (__mmask8) __U,
-							   __R);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtt_roundpd_epi32 (__mmask8 __U, __m256d __A, const int __R)
-{
-  return (__m128i) __builtin_ia32_cvttpd2dq256_mask_round ((__v4df) __A,
-							   (__v4si)
-							   _mm_setzero_si128 (),
-							   (__mmask8) __U,
-							   __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtt_roundpd_epi64 (__m256d __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_cvttpd2qq256_mask_round ((__v4df) __A,
-						      (__v4di)
-						      _mm256_setzero_si256 (),
-						      (__mmask8) -1,
-						      __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtt_roundpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A,
-				const int __R)
-{
-  return (__m256i) __builtin_ia32_cvttpd2qq256_mask_round ((__v4df) __A,
-							   (__v4di) __W,
-							   (__mmask8) __U,
-							   __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtt_roundpd_epi64 (__mmask8 __U, __m256d __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_cvttpd2qq256_mask_round ((__v4df) __A,
-						      (__v4di)
-						      _mm256_setzero_si256 (),
-						      (__mmask8) __U,
-						      __R);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtt_roundpd_epu32 (__m256d __A, const int __R)
-{
-  return
-    (__m128i) __builtin_ia32_cvttpd2udq256_mask_round ((__v4df) __A,
-						       (__v4si)
-						       _mm_undefined_si128 (),
-						       (__mmask8) -1,
-						       __R);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtt_roundpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A,
-				const int __R)
-{
-  return (__m128i) __builtin_ia32_cvttpd2udq256_mask_round ((__v4df) __A,
-							    (__v4si) __W,
-							    (__mmask8) __U,
-							    __R);
-}
-
-extern __inline __m128i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtt_roundpd_epu32 (__mmask8 __U, __m256d __A, const int __R)
-{
-  return
-    (__m128i) __builtin_ia32_cvttpd2udq256_mask_round ((__v4df) __A,
-						       (__v4si)
-						       _mm_setzero_si128 (),
-						       (__mmask8) __U,
-						       __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtt_roundpd_epu64 (__m256d __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_cvttpd2uqq256_mask_round ((__v4df) __A,
-						       (__v4di) \
-						       _mm256_setzero_si256 (),
-						       (__mmask8) -1,
-						       __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtt_roundpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A,
-				const int __R)
-{
-  return (__m256i) __builtin_ia32_cvttpd2uqq256_mask_round ((__v4df) __A,
-							    (__v4di) __W,
-							    (__mmask8) __U,
-							    __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtt_roundpd_epu64 (__mmask8 __U, __m256d __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_cvttpd2uqq256_mask_round ((__v4df) __A,
-						       (__v4di)
-						       _mm256_setzero_si256 (),
-						       (__mmask8) __U,
-						       __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtt_roundph_epi32 (__m128h __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_vcvttph2dq256_mask_round ((__v8hf) __A,
-						       (__v8si)
-						       _mm256_setzero_si256 (),
-						       (__mmask8) -1,
-						       __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtt_roundph_epi32 (__m256i __W, __mmask8 __U, __m128h __A,
-				const int __R)
-{
-  return (__m256i) __builtin_ia32_vcvttph2dq256_mask_round ((__v8hf) __A,
-							    (__v8si) __W,
-							    (__mmask8) __U,
-							    __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtt_roundph_epi32 (__mmask8 __U, __m128h __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_vcvttph2dq256_mask_round ((__v8hf) __A,
-						       (__v8si)
-						       _mm256_setzero_si256 (),
-						       (__mmask8) __U,
-						       __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtt_roundph_epi64 (__m128h __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_vcvttph2qq256_mask_round ((__v8hf) __A,
-						       (__v4di)
-						       _mm256_setzero_si256 (),
-						       (__mmask8) -1,
-						       __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtt_roundph_epi64 (__m256i __W, __mmask8 __U, __m128h __A,
-				const int __R)
-{
-  return (__m256i) __builtin_ia32_vcvttph2qq256_mask_round ((__v8hf) __A,
-							    (__v4di) __W,
-							    (__mmask8) __U,
-							    __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtt_roundph_epi64 (__mmask8 __U, __m128h __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_vcvttph2qq256_mask_round ((__v8hf) __A,
-						       (__v4di)
-						       _mm256_setzero_si256 (),
-						       (__mmask8) __U,
-						       __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtt_roundph_epu32 (__m128h __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_vcvttph2udq256_mask_round ((__v8hf) __A,
-							(__v8si)
-							_mm256_setzero_si256 (),
-							(__mmask8) -1,
-							__R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtt_roundph_epu32 (__m256i __W, __mmask8 __U, __m128h __A,
-				const int __R)
-{
-  return (__m256i) __builtin_ia32_vcvttph2udq256_mask_round ((__v8hf) __A,
-							     (__v8si) __W,
-							     (__mmask8) __U,
-							     __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtt_roundph_epu32 (__mmask8 __U, __m128h __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_vcvttph2udq256_mask_round ((__v8hf) __A,
-							(__v8si)
-							_mm256_setzero_si256 (),
-							(__mmask8) __U,
-							__R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtt_roundph_epu64 (__m128h __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_vcvttph2uqq256_mask_round ((__v8hf) __A,
-							(__v4di)
-							_mm256_setzero_si256 (),
-							(__mmask8) -1,
-							__R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtt_roundph_epu64 (__m256i __W, __mmask8 __U, __m128h __A,
-				const int __R)
-{
-  return (__m256i) __builtin_ia32_vcvttph2uqq256_mask_round ((__v8hf) __A,
-							     (__v4di) __W,
-							     (__mmask8) __U,
-							     __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtt_roundph_epu64 (__mmask8 __U, __m128h __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_vcvttph2uqq256_mask_round ((__v8hf) __A,
-							(__v4di)
-							_mm256_setzero_si256 (),
-							(__mmask8) __U,
-							__R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtt_roundph_epu16 (__m256h __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_vcvttph2uw256_mask_round ((__v16hf) __A,
-						       (__v16hi)
-						       _mm256_setzero_si256 (),
-						       (__mmask16) -1,
-						       __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtt_roundph_epu16 (__m256i __W, __mmask16 __U, __m256h __A,
-				const int __R)
-{
-  return (__m256i) __builtin_ia32_vcvttph2uw256_mask_round ((__v16hf) __A,
-							    (__v16hi) __W,
-							    (__mmask16) __U,
-							    __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtt_roundph_epu16 (__mmask16 __U, __m256h __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_vcvttph2uw256_mask_round ((__v16hf) __A,
-						       (__v16hi)
-						       _mm256_setzero_si256 (),
-						       (__mmask16) __U,
-						       __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtt_roundph_epi16 (__m256h __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_vcvttph2w256_mask_round ((__v16hf) __A,
-						      (__v16hi)
-						      _mm256_setzero_si256 (),
-						      (__mmask16) -1,
-						      __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtt_roundph_epi16 (__m256i __W, __mmask16 __U, __m256h __A,
-				const int __R)
-{
-  return (__m256i) __builtin_ia32_vcvttph2w256_mask_round ((__v16hf) __A,
-							   (__v16hi) __W,
-							   (__mmask16) __U,
-							   __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtt_roundph_epi16 (__mmask16 __U, __m256h __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_vcvttph2w256_mask_round ((__v16hf) __A,
-						      (__v16hi)
-						      _mm256_setzero_si256 (),
-						      (__mmask16) __U,
-						      __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtt_roundps_epi32 (__m256 __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_cvttps2dq256_mask_round ((__v8sf) __A,
-						      (__v8si)
-						      _mm256_undefined_si256 (),
-						      (__mmask8) -1,
-						      __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtt_roundps_epi32 (__m256i __W, __mmask8 __U, __m256 __A,
-				const int __R)
-{
-  return (__m256i) __builtin_ia32_cvttps2dq256_mask_round ((__v8sf) __A,
-							   (__v8si) __W,
-							   (__mmask8) __U,
-							   __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtt_roundps_epi32 (__mmask8 __U, __m256 __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_cvttps2dq256_mask_round ((__v8sf) __A,
-						      (__v8si)
-						      _mm256_setzero_si256 (),
-						      (__mmask8) __U,
-						      __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtt_roundps_epi64 (__m128 __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_cvttps2qq256_mask_round ((__v4sf) __A,
-						      (__v4di)
-						      _mm256_setzero_si256 (),
-						      (__mmask8) -1,
-						      __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtt_roundps_epi64 (__m256i __W, __mmask8 __U, __m128 __A,
-				const int __R)
-{
-  return (__m256i) __builtin_ia32_cvttps2qq256_mask_round ((__v4sf) __A,
-							   (__v4di) __W,
-							   (__mmask8) __U,
-							   __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtt_roundps_epi64 (__mmask8 __U, __m128 __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_cvttps2qq256_mask_round ((__v4sf) __A,
-						      (__v4di)
-						      _mm256_setzero_si256 (),
-						      (__mmask8) __U,
-						      __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtt_roundps_epu32 (__m256 __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_cvttps2udq256_mask_round ((__v8sf) __A,
-						       (__v8si)
-						       _mm256_undefined_si256 (),
-						       (__mmask8) -1,
-						       __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtt_roundps_epu32 (__m256i __W, __mmask8 __U, __m256 __A,
-				const int __R)
-{
-  return (__m256i) __builtin_ia32_cvttps2udq256_mask_round ((__v8sf) __A,
-							    (__v8si) __W,
-							    (__mmask8) __U,
-							    __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtt_roundps_epu32 (__mmask8 __U, __m256 __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_cvttps2udq256_mask_round ((__v8sf) __A,
-						       (__v8si)
-						       _mm256_setzero_si256 (),
-						       (__mmask8) __U,
-						       __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtt_roundps_epu64 (__m128 __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_cvttps2uqq256_mask_round ((__v4sf) __A,
-						       (__v4di)
-						       _mm256_setzero_si256 (),
-						       (__mmask8) -1,
-						       __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtt_roundps_epu64 (__m256i __W, __mmask8 __U, __m128 __A,
-				const int __R)
-{
-  return (__m256i) __builtin_ia32_cvttps2uqq256_mask_round ((__v4sf) __A,
-							    (__v4di) __W,
-							    (__mmask8) __U,
-							    __R);
-}
-
-extern __inline __m256i
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtt_roundps_epu64 (__mmask8 __U, __m128 __A, const int __R)
-{
-  return
-    (__m256i) __builtin_ia32_cvttps2uqq256_mask_round ((__v4sf) __A,
-						       (__v4di)
-						       _mm256_setzero_si256 (),
-						       (__mmask8) __U,
-						       __R);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvt_roundepu32_ph (__m256i __A, const int __R)
-{
-  return (__m128h) __builtin_ia32_vcvtudq2ph256_mask_round ((__v8si) __A,
-							    (__v8hf)
-							    _mm_setzero_ph (),
-							    (__mmask8) -1,
-							    __R);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvt_roundepu32_ph (__m128h __W, __mmask8 __U, __m256i __A,
-			       const int __R)
-{
-  return (__m128h) __builtin_ia32_vcvtudq2ph256_mask_round ((__v8si) __A,
-							    (__v8hf) __W,
-							    (__mmask8) __U,
-							    __R);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvt_roundepu32_ph (__mmask8 __U, __m256i __A, const int __R)
-{
-  return (__m128h) __builtin_ia32_vcvtudq2ph256_mask_round ((__v8si) __A,
-							    (__v8hf)
-							    _mm_setzero_ph (),
-							    (__mmask8) __U,
-							    __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvt_roundepu32_ps (__m256i __A, const int __R)
-{
-  return
-    (__m256) __builtin_ia32_cvtudq2ps256_mask_round ((__v8si) __A,
-						     (__v8sf)
-						     _mm256_undefined_ps (),
-						     (__mmask8) -1,
-						     __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvt_roundepu32_ps (__m256 __W, __mmask8 __U, __m256i __A,
-			       const int __R)
-{
-  return (__m256) __builtin_ia32_cvtudq2ps256_mask_round ((__v8si) __A,
-							  (__v8sf) __W,
-							  (__mmask8) __U,
-							  __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvt_roundepu32_ps (__mmask8 __U, __m256i __A, const int __R)
-{
-  return (__m256) __builtin_ia32_cvtudq2ps256_mask_round ((__v8si) __A,
-							  (__v8sf)
-							  _mm256_setzero_ps (),
-							  (__mmask8) __U,
-							  __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvt_roundepu64_pd (__m256i __A, const int __R)
-{
-  return (__m256d) __builtin_ia32_cvtuqq2pd256_mask_round ((__v4di) __A,
-							   (__v4df)
-							   _mm256_setzero_pd (),
-							   (__mmask8) -1,
-							   __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvt_roundepu64_pd (__m256d __W, __mmask8 __U, __m256i __A,
-			       const int __R)
-{
-  return (__m256d) __builtin_ia32_cvtuqq2pd256_mask_round ((__v4di) __A,
-							   (__v4df) __W,
-							   (__mmask8) __U,
-							   __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvt_roundepu64_pd (__mmask8 __U, __m256i __A, const int __R)
-{
-  return (__m256d) __builtin_ia32_cvtuqq2pd256_mask_round ((__v4di) __A,
-							   (__v4df)
-							   _mm256_setzero_pd (),
-							   (__mmask8) __U,
-							   __R);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvt_roundepu64_ph (__m256i __A, const int __R)
-{
-  return (__m128h) __builtin_ia32_vcvtuqq2ph256_mask_round ((__v4di) __A,
-							    (__v8hf)
-							    _mm_setzero_ph (),
-							    (__mmask8) -1,
-							    __R);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvt_roundepu64_ph (__m128h __W, __mmask8 __U, __m256i __A,
-			       const int __R)
-{
-  return (__m128h) __builtin_ia32_vcvtuqq2ph256_mask_round ((__v4di) __A,
-							    (__v8hf) __W,
-							    (__mmask8) __U,
-							    __R);
-}
-
-extern __inline __m128h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvt_roundepu64_ph (__mmask8 __U, __m256i __A, const int __R)
-{
-  return (__m128h) __builtin_ia32_vcvtuqq2ph256_mask_round ((__v4di) __A,
-							    (__v8hf)
-							    _mm_setzero_ph (),
-							    (__mmask8) __U,
-							    __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvt_roundepu64_ps (__m256i __A, const int __R)
-{
-  return (__m128) __builtin_ia32_cvtuqq2ps256_mask_round ((__v4di) __A,
-							  (__v4sf)
-							  _mm_setzero_ps (),
-							  (__mmask8) -1,
-							  __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvt_roundepu64_ps (__m128 __W, __mmask8 __U, __m256i __A,
-			       const int __R)
-{
-  return (__m128) __builtin_ia32_cvtuqq2ps256_mask_round ((__v4di) __A,
-							  (__v4sf) __W,
-							  (__mmask8) __U,
-							  __R);
-}
-
-extern __inline __m128
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvt_roundepu64_ps (__mmask8 __U, __m256i __A, const int __R)
-{
-  return (__m128) __builtin_ia32_cvtuqq2ps256_mask_round ((__v4di) __A,
-							  (__v4sf)
-							  _mm_setzero_ps (),
-							  (__mmask8) __U,
-							  __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvt_roundepu16_ph (__m256i __A, const int __R)
-{
-  return (__m256h) __builtin_ia32_vcvtuw2ph256_mask_round ((__v16hi) __A,
-							   (__v16hf)
-							   _mm256_setzero_ph (),
-							   (__mmask16) -1,
-							   __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvt_roundepu16_ph (__m256h __W, __mmask16 __U, __m256i __A,
-			       const int __R)
-{
-  return (__m256h) __builtin_ia32_vcvtuw2ph256_mask_round ((__v16hi) __A,
-							   (__v16hf) __W,
-							   (__mmask16) __U,
-							   __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvt_roundepu16_ph (__mmask16 __U, __m256i __A, const int __R)
-{
-  return (__m256h) __builtin_ia32_vcvtuw2ph256_mask_round ((__v16hi) __A,
-							   (__v16hf)
-							   _mm256_setzero_ph (),
-							   (__mmask16) __U,
-							   __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvt_roundepi16_ph (__m256i __A, const int __R)
-{
-  return (__m256h) __builtin_ia32_vcvtw2ph256_mask_round ((__v16hi) __A,
-							  (__v16hf)
-							  _mm256_setzero_ph (),
-							  (__mmask16) -1,
-							  __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvt_roundepi16_ph (__m256h __W, __mmask16 __U, __m256i __A,
-			       const int __R)
-{
-  return (__m256h) __builtin_ia32_vcvtw2ph256_mask_round ((__v16hi) __A,
-							  (__v16hf) __W,
-							  (__mmask16) __U,
-							  __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvt_roundepi16_ph (__mmask16 __U, __m256i __A, const int __R)
-{
-  return (__m256h) __builtin_ia32_vcvtw2ph256_mask_round ((__v16hi) __A,
-							  (__v16hf)
-							  _mm256_setzero_ph (),
-							  (__mmask16) __U,
-							  __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_div_round_pd (__m256d __A, __m256d __B, const int __R)
-{
-  return (__m256d) __builtin_ia32_divpd256_mask_round ((__v4df) __A,
-						       (__v4df) __B,
-						       (__v4df)
-						       _mm256_undefined_pd (),
-						       (__mmask8) -1,
-						       __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_div_round_pd (__m256d __W, __mmask8 __U, __m256d __A,
-			  __m256d __B, const int __R)
-{
-  return (__m256d) __builtin_ia32_divpd256_mask_round ((__v4df) __A,
-						       (__v4df) __B,
-						       (__v4df) __W,
-						       (__mmask8) __U,
-						       __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_div_round_pd (__mmask8 __U, __m256d __A, __m256d __B,
-			   const int __R)
-{
-  return (__m256d) __builtin_ia32_divpd256_mask_round ((__v4df) __A,
-						       (__v4df) __B,
-						       (__v4df)
-						       _mm256_setzero_pd (),
-						       (__mmask8) __U,
-						       __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_div_round_ph (__m256h __A, __m256h __B, const int __R)
-{
-  return (__m256h) __builtin_ia32_divph256_mask_round ((__v16hf) __A,
-						       (__v16hf) __B,
-						       (__v16hf)
-						       _mm256_setzero_ph (),
-						       (__mmask16) -1,
-						       __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_div_round_ph (__m256h __W, __mmask16 __U, __m256h __A,
-			  __m256h __B, const int __R)
-{
-  return (__m256h) __builtin_ia32_divph256_mask_round ((__v16hf) __A,
-						       (__v16hf) __B,
-						       (__v16hf) __W,
-						       (__mmask16) __U,
-						       __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_div_round_ph (__mmask16 __U, __m256h __A, __m256h __B,
-			   const int __R)
-{
-  return (__m256h) __builtin_ia32_divph256_mask_round ((__v16hf) __A,
-						       (__v16hf) __B,
-						       (__v16hf)
-						       _mm256_setzero_ph (),
-						       (__mmask16) __U,
-						       __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_div_round_ps (__m256 __A, __m256 __B, const int __R)
-{
-  return (__m256) __builtin_ia32_divps256_mask_round ((__v8sf) __A,
-						      (__v8sf) __B,
-						      (__v8sf)
-						      _mm256_undefined_ps (),
-						      (__mmask8) -1,
-						      __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_div_round_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B,
-			  const int __R)
-{
-  return (__m256) __builtin_ia32_divps256_mask_round ((__v8sf) __A,
-						      (__v8sf) __B,
-						      (__v8sf) __W,
-						      (__mmask8) __U,
-						      __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_div_round_ps (__mmask8 __U, __m256 __A, __m256 __B,
-			   const int __R)
-{
-  return (__m256) __builtin_ia32_divps256_mask_round ((__v8sf) __A,
-						      (__v8sf) __B,
-						      (__v8sf)
-						      _mm256_setzero_ps (),
-						      (__mmask8) __U,
-						      __R);
-}
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_fcmadd_round_pch (__m256h __A, __m256h __B, __m256h __D, const int __R)
-{
-  return (__m256h) __builtin_ia32_vfcmaddcph256_round ((__v16hf) __A,
-						       (__v16hf) __B,
-						       (__v16hf) __D,
-						       __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_fcmadd_round_pch (__m256h __A, __mmask8 __U, __m256h __B,
-			      __m256h __D, const int __R)
-{
-  return (__m256h) __builtin_ia32_vfcmaddcph256_mask_round ((__v16hf) __A,
-							    (__v16hf) __B,
-							    (__v16hf) __D,
-							    __U,
-							    __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask3_fcmadd_round_pch (__m256h __A, __m256h __B, __m256h __D,
-			       __mmask8 __U, const int __R)
-{
-  return (__m256h) __builtin_ia32_vfcmaddcph256_mask3_round ((__v16hf) __A,
-							     (__v16hf) __B,
-							     (__v16hf) __D,
-							     __U,
-							     __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_fcmadd_round_pch (__mmask8 __U, __m256h __A, __m256h __B,
-			       __m256h __D, const int __R)
-{
-  return (__m256h) __builtin_ia32_vfcmaddcph256_maskz_round ((__v16hf) __A,
-							     (__v16hf) __B,
-							     (__v16hf) __D,
-							     __U,
-							     __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_fcmul_round_pch (__m256h __A, __m256h __B, const int __R)
-{
-  return
-    (__m256h) __builtin_ia32_vfcmulcph256_round ((__v16hf) __A,
-						 (__v16hf) __B,
-						 __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_fcmul_round_pch (__m256h __W, __mmask8 __U, __m256h __A,
-			     __m256h __B, const int __R)
-{
-  return (__m256h) __builtin_ia32_vfcmulcph256_mask_round ((__v16hf) __A,
-							   (__v16hf) __B,
-							   (__v16hf) __W,
-							   (__mmask16) __U,
-							   __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_fcmul_round_pch (__mmask8 __U, __m256h __A, __m256h __B,
-			      const int __R)
-{
-  return (__m256h) __builtin_ia32_vfcmulcph256_mask_round ((__v16hf) __A,
-							   (__v16hf) __B,
-							   (__v16hf)
-							   _mm256_setzero_ph (),
-							   (__mmask16) __U,
-							   __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_fixupimm_round_pd (__m256d __A, __m256d __B, __m256i __D,
-			  const int __C, const int __R)
-{
-  return (__m256d) __builtin_ia32_fixupimmpd256_mask_round ((__v4df) __A,
-							    (__v4df) __B,
-							    (__v4di) __D,
-							    __C,
-							    (__mmask8) -1,
-							    __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_fixupimm_round_pd (__m256d __A, __mmask8 __U, __m256d __B,
-			       __m256i __D, const int __C, const int __R)
-{
-  return (__m256d) __builtin_ia32_fixupimmpd256_mask_round ((__v4df) __A,
-							    (__v4df) __B,
-							    (__v4di) __D,
-							    __C,
-							    (__mmask8) __U,
-							    __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_fixupimm_round_pd (__mmask8 __U, __m256d __A, __m256d __B,
-				__m256i __D, const int __C, const int __R)
-{
-  return (__m256d) __builtin_ia32_fixupimmpd256_maskz_round ((__v4df) __A,
-							     (__v4df) __B,
-							     (__v4di) __D,
-							     __C,
-							     (__mmask8) __U,
-							     __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_fixupimm_round_ps (__m256 __A, __m256 __B, __m256i __D, const int __C,
-			  const int __R)
-{
-  return (__m256) __builtin_ia32_fixupimmps256_mask_round ((__v8sf) __A,
-							   (__v8sf) __B,
-							   (__v8si) __D,
-							   __C,
-							   (__mmask8) -1,
-							   __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_fixupimm_round_ps (__m256 __A, __mmask8 __U, __m256 __B,
-			       __m256i __D, const int __C, const int __R)
-{
-  return (__m256) __builtin_ia32_fixupimmps256_mask_round ((__v8sf) __A,
-							   (__v8sf) __B,
-							   (__v8si) __D,
-							   __C,
-							   (__mmask8) __U,
-							   __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_fixupimm_round_ps (__mmask8 __U, __m256 __A, __m256 __B,
-				__m256i __D, const int __C, const int __R)
-{
-  return (__m256) __builtin_ia32_fixupimmps256_maskz_round ((__v8sf) __A,
-							    (__v8sf) __B,
-							    (__v8si) __D,
-							    __C,
-							    (__mmask8) __U,
-							    __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_fmadd_round_pd (__m256d __A, __m256d __B, __m256d __D, const int __R)
-{
-  return (__m256d) __builtin_ia32_vfmaddpd256_mask_round ((__v4df) __A,
-							  (__v4df) __B,
-							  (__v4df) __D,
-							  (__mmask8) -1,
-							  __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_fmadd_round_pd (__m256d __A, __mmask8 __U, __m256d __B,
-			    __m256d __D, const int __R)
-{
-  return (__m256d) __builtin_ia32_vfmaddpd256_mask_round ((__v4df) __A,
-							  (__v4df) __B,
-							  (__v4df) __D,
-							  (__mmask8) __U, __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask3_fmadd_round_pd (__m256d __A, __m256d __B, __m256d __D,
-			     __mmask8 __U, const int __R)
-{
-  return (__m256d) __builtin_ia32_vfmaddpd256_mask3_round ((__v4df) __A,
-							   (__v4df) __B,
-							   (__v4df) __D,
-							   (__mmask8) __U,
-							   __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_fmadd_round_pd (__mmask8 __U, __m256d __A, __m256d __B,
-			     __m256d __D, const int __R)
-{
-  return (__m256d) __builtin_ia32_vfmaddpd256_maskz_round ((__v4df) __A,
-							   (__v4df) __B,
-							   (__v4df) __D,
-							   (__mmask8) __U,
-							   __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_fmadd_round_ph (__m256h __A, __m256h __B, __m256h __D, const int __R)
-{
-  return (__m256h) __builtin_ia32_vfmaddph256_mask_round ((__v16hf) __A,
-							  (__v16hf) __B,
-							  (__v16hf) __D,
-							  (__mmask16) -1,
-							  __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_fmadd_round_ph (__m256h __A, __mmask16 __U, __m256h __B,
-			    __m256h __D, const int __R)
-{
-  return (__m256h) __builtin_ia32_vfmaddph256_mask_round ((__v16hf) __A,
-							  (__v16hf) __B,
-							  (__v16hf) __D,
-							  (__mmask16) __U,
-							  __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask3_fmadd_round_ph (__m256h __A, __m256h __B, __m256h __D,
-			     __mmask16 __U, const int __R)
-{
-  return (__m256h) __builtin_ia32_vfmaddph256_mask3_round ((__v16hf) __A,
-							   (__v16hf) __B,
-							   (__v16hf) __D,
-							   (__mmask16) __U,
-							   __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_fmadd_round_ph (__mmask16 __U, __m256h __A, __m256h __B,
-			     __m256h __D, const int __R)
-{
-  return (__m256h) __builtin_ia32_vfmaddph256_maskz_round ((__v16hf) __A,
-							   (__v16hf) __B,
-							   (__v16hf) __D,
-							   (__mmask16) __U,
-							   __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_fmadd_round_ps (__m256 __A, __m256 __B, __m256 __D, const int __R)
-{
-  return (__m256) __builtin_ia32_vfmaddps256_mask_round ((__v8sf) __A,
-							 (__v8sf) __B,
-							 (__v8sf) __D,
-							 (__mmask8) -1,
-							 __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_fmadd_round_ps (__m256 __A, __mmask8 __U, __m256 __B,
-			    __m256 __D, const int __R)
-{
-  return (__m256) __builtin_ia32_vfmaddps256_mask_round ((__v8sf) __A,
-							 (__v8sf) __B,
-							 (__v8sf) __D,
-							 (__mmask8) __U,
-							 __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask3_fmadd_round_ps (__m256 __A, __m256 __B, __m256 __D,
-			     __mmask8 __U, const int __R)
-{
-  return (__m256) __builtin_ia32_vfmaddps256_mask3_round ((__v8sf) __A,
-							  (__v8sf) __B,
-							  (__v8sf) __D,
-							  (__mmask8) __U,
-							  __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_fmadd_round_ps (__mmask8 __U, __m256 __A, __m256 __B,
-			     __m256 __D, const int __R)
-{
-  return (__m256) __builtin_ia32_vfmaddps256_maskz_round ((__v8sf) __A,
-							  (__v8sf) __B,
-							  (__v8sf) __D,
-							  (__mmask8) __U,
-							  __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_fmadd_round_pch (__m256h __A, __m256h __B, __m256h __D, const int __R)
-{
-  return (__m256h) __builtin_ia32_vfmaddcph256_round ((__v16hf) __A,
-						      (__v16hf) __B,
-						      (__v16hf) __D,
-						      __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_fmadd_round_pch (__m256h __A, __mmask16 __U, __m256h __B,
-			     __m256h __D, const int __R)
-{
-  return (__m256h) __builtin_ia32_vfmaddcph256_mask_round ((__v16hf) __A,
-							   (__v16hf) __B,
-							   (__v16hf) __D,
-							   __U,
-							   __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask3_fmadd_round_pch (__m256h __A, __m256h __B, __m256h __D,
-			      __mmask16 __U, const int __R)
-{
-  return (__m256h) __builtin_ia32_vfmaddcph256_mask3_round ((__v16hf) __A,
-							    (__v16hf) __B,
-							    (__v16hf) __D,
-							    __U,
-							    __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_fmadd_round_pch (__mmask16 __U, __m256h __A, __m256h __B,
-			      __m256h __D, const int __R)
-{
-  return (__m256h) __builtin_ia32_vfmaddcph256_maskz_round ((__v16hf) __A,
-							    (__v16hf) __B,
-							    (__v16hf) __D,
-							    __U,
-							    __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_fmaddsub_round_pd (__m256d __A, __m256d __B, __m256d __D, const int __R)
-{
-  return (__m256d) __builtin_ia32_vfmaddsubpd256_mask_round ((__v4df) __A,
-							     (__v4df) __B,
-							     (__v4df) __D,
-							     (__mmask8) -1,
-							     __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_fmaddsub_round_pd (__m256d __A, __mmask8 __U, __m256d __B,
-			       __m256d __D, const int __R)
-{
-  return (__m256d) __builtin_ia32_vfmaddsubpd256_mask_round ((__v4df) __A,
-							     (__v4df) __B,
-							     (__v4df) __D,
-							     (__mmask8) __U,
-							     __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask3_fmaddsub_round_pd (__m256d __A, __m256d __B, __m256d __D,
-				__mmask8 __U, const int __R)
-{
-  return (__m256d) __builtin_ia32_vfmaddsubpd256_mask3_round ((__v4df) __A,
-							      (__v4df) __B,
-							      (__v4df) __D,
-							      (__mmask8) __U,
-							      __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_fmaddsub_round_pd (__mmask8 __U, __m256d __A, __m256d __B,
-				__m256d __D, const int __R)
-{
-  return (__m256d) __builtin_ia32_vfmaddsubpd256_maskz_round ((__v4df) __A,
-							      (__v4df) __B,
-							      (__v4df) __D,
-							      (__mmask8) __U,
-							      __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_fmaddsub_round_ph (__m256h __A, __m256h __B, __m256h __D, const int __R)
-{
-  return (__m256h) __builtin_ia32_vfmaddsubph256_mask_round ((__v16hf) __A,
-							     (__v16hf) __B,
-							     (__v16hf) __D,
-							     (__mmask16) -1,
-							     __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_fmaddsub_round_ph (__m256h __A, __mmask16 __U, __m256h __B,
-			       __m256h __D, const int __R)
-{
-  return (__m256h) __builtin_ia32_vfmaddsubph256_mask_round ((__v16hf) __A,
-							     (__v16hf) __B,
-							     (__v16hf) __D,
-							     (__mmask16) __U,
-							     __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask3_fmaddsub_round_ph (__m256h __A, __m256h __B, __m256h __D,
-				__mmask16 __U, const int __R)
-{
-  return (__m256h) __builtin_ia32_vfmaddsubph256_mask3_round ((__v16hf) __A,
-							      (__v16hf) __B,
-							      (__v16hf) __D,
-							      (__mmask16) __U,
-							      __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_fmaddsub_round_ph (__mmask16 __U, __m256h __A, __m256h __B,
-				__m256h __D, const int __R)
-{
-  return (__m256h) __builtin_ia32_vfmaddsubph256_maskz_round ((__v16hf) __A,
-							      (__v16hf) __B,
-							      (__v16hf) __D,
-							      (__mmask16) __U,
-							      __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_fmaddsub_round_ps (__m256 __A, __m256 __B, __m256 __D, const int __R)
-{
-  return (__m256) __builtin_ia32_vfmaddsubps256_mask_round ((__v8sf) __A,
-							    (__v8sf) __B,
-							    (__v8sf) __D,
-							    (__mmask8) -1,
-							    __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_fmaddsub_round_ps (__m256 __A, __mmask8 __U, __m256 __B,
-			       __m256 __D, const int __R)
-{
-  return (__m256) __builtin_ia32_vfmaddsubps256_mask_round ((__v8sf) __A,
-							    (__v8sf) __B,
-							    (__v8sf) __D,
-							    (__mmask8) __U,
-							    __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask3_fmaddsub_round_ps (__m256 __A, __m256 __B, __m256 __D,
-				__mmask8 __U, const int __R)
-{
-  return (__m256) __builtin_ia32_vfmaddsubps256_mask3_round ((__v8sf) __A,
-							     (__v8sf) __B,
-							     (__v8sf) __D,
-							     (__mmask8) __U,
-							     __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_fmaddsub_round_ps (__mmask8 __U, __m256 __A, __m256 __B,
-				__m256 __D, const int __R)
-{
-  return (__m256) __builtin_ia32_vfmaddsubps256_maskz_round ((__v8sf) __A,
-							     (__v8sf) __B,
-							     (__v8sf) __D,
-							     (__mmask8) __U,
-							     __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_fmsub_round_pd (__m256d __A, __m256d __B, __m256d __D, const int __R)
-{
-  return (__m256d) __builtin_ia32_vfmsubpd256_mask_round ((__v4df) __A,
-							  (__v4df) __B,
-							  (__v4df) __D,
-							  (__mmask8) -1, __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_fmsub_round_pd (__m256d __A, __mmask8 __U, __m256d __B,
-			    __m256d __D, const int __R)
-{
-  return (__m256d) __builtin_ia32_vfmsubpd256_mask_round ((__v4df) __A,
-							  (__v4df) __B,
-							  (__v4df) __D,
-							  (__mmask8) __U, __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask3_fmsub_round_pd (__m256d __A, __m256d __B, __m256d __D,
-			     __mmask8 __U, const int __R)
-{
-  return (__m256d) __builtin_ia32_vfmsubpd256_mask3_round ((__v4df) __A,
-							   (__v4df) __B,
-							   (__v4df) __D,
-							   (__mmask8) __U, __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_fmsub_round_pd (__mmask8 __U, __m256d __A, __m256d __B,
-			     __m256d __D, const int __R)
-{
-  return (__m256d) __builtin_ia32_vfmsubpd256_maskz_round ((__v4df) __A,
-							   (__v4df) __B,
-							   (__v4df) __D,
-							   (__mmask8) __U, __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_fmsub_round_ph (__m256h __A, __m256h __B, __m256h __D, const int __R)
-{
-  return (__m256h)
-    __builtin_ia32_vfmsubph256_mask_round ((__v16hf) __A,
-					   (__v16hf) __B,
-					   (__v16hf) __D,
-					   (__mmask16) -1, __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_fmsub_round_ph (__m256h __A, __mmask16 __U, __m256h __B,
-			    __m256h __D, const int __R)
-{
-  return (__m256h)
-    __builtin_ia32_vfmsubph256_mask_round ((__v16hf) __A,
-					   (__v16hf) __B,
-					   (__v16hf) __D,
-					   (__mmask16) __U, __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask3_fmsub_round_ph (__m256h __A, __m256h __B, __m256h __D,
-			     __mmask16 __U, const int __R)
-{
-  return (__m256h)
-    __builtin_ia32_vfmsubph256_mask3_round ((__v16hf) __A,
-					    (__v16hf) __B,
-					    (__v16hf) __D,
-					    (__mmask16) __U, __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_fmsub_round_ph (__mmask16 __U, __m256h __A, __m256h __B,
-			     __m256h __D, const int __R)
-{
-  return (__m256h)
-    __builtin_ia32_vfmsubph256_maskz_round ((__v16hf) __A,
-					    (__v16hf) __B,
-					    (__v16hf) __D,
-					    (__mmask16) __U, __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_fmsub_round_ps (__m256 __A, __m256 __B, __m256 __D, const int __R)
-{
-  return (__m256) __builtin_ia32_vfmsubps256_mask_round ((__v8sf) __A,
-							 (__v8sf) __B,
-							 (__v8sf) __D,
-							 (__mmask8) -1, __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_fmsub_round_ps (__m256 __A, __mmask8 __U, __m256 __B,
-			    __m256 __D, const int __R)
-{
-  return (__m256) __builtin_ia32_vfmsubps256_mask_round ((__v8sf) __A,
-							 (__v8sf) __B,
-							 (__v8sf) __D,
-							 (__mmask8) __U, __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask3_fmsub_round_ps (__m256 __A, __m256 __B, __m256 __D,
-			     __mmask8 __U, const int __R)
-{
-  return (__m256) __builtin_ia32_vfmsubps256_mask3_round ((__v8sf) __A,
-							  (__v8sf) __B,
-							  (__v8sf) __D,
-							  (__mmask8) __U, __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_fmsub_round_ps (__mmask8 __U, __m256 __A, __m256 __B,
-			     __m256 __D, const int __R)
-{
-  return (__m256) __builtin_ia32_vfmsubps256_maskz_round ((__v8sf) __A,
-							  (__v8sf) __B,
-							  (__v8sf) __D,
-							  (__mmask8) __U, __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_fmsubadd_round_pd (__m256d __A, __m256d __B, __m256d __D, const int __R)
-{
-  return (__m256d) __builtin_ia32_vfmsubaddpd256_mask_round ((__v4df) __A,
-							     (__v4df) __B,
-							     (__v4df) __D,
-							     (__mmask8) -1,
-							     __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_fmsubadd_round_pd (__m256d __A, __mmask8 __U, __m256d __B,
-			       __m256d __D, const int __R)
-{
-  return (__m256d) __builtin_ia32_vfmsubaddpd256_mask_round ((__v4df) __A,
-							     (__v4df) __B,
-							     (__v4df) __D,
-							     (__mmask8) __U,
-							     __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask3_fmsubadd_round_pd (__m256d __A, __m256d __B, __m256d __D,
-				__mmask8 __U, const int __R)
-{
-  return (__m256d) __builtin_ia32_vfmsubaddpd256_mask3_round ((__v4df) __A,
-							      (__v4df) __B,
-							      (__v4df) __D,
-							      (__mmask8) __U,
-							      __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_fmsubadd_round_pd (__mmask8 __U, __m256d __A, __m256d __B,
-				__m256d __D, const int __R)
-{
-  return (__m256d) __builtin_ia32_vfmsubaddpd256_maskz_round ((__v4df) __A,
-							      (__v4df) __B,
-							      (__v4df) __D,
-							      (__mmask8) __U,
-							      __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_fmsubadd_round_ph (__m256h __A, __m256h __B, __m256h __D, const int __R)
-{
-  return (__m256h)
-    __builtin_ia32_vfmsubaddph256_mask_round ((__v16hf) __A,
-					      (__v16hf) __B,
-					      (__v16hf) __D,
-					      (__mmask16) -1,
-					      __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_fmsubadd_round_ph (__m256h __A, __mmask16 __U, __m256h __B,
-			       __m256h __D, const int __R)
-{
-  return (__m256h)
-    __builtin_ia32_vfmsubaddph256_mask_round ((__v16hf) __A,
-					      (__v16hf) __B,
-					      (__v16hf) __D,
-					      (__mmask16) __U,
-					      __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask3_fmsubadd_round_ph (__m256h __A, __m256h __B, __m256h __D,
-				__mmask16 __U, const int __R)
-{
-  return (__m256h)
-    __builtin_ia32_vfmsubaddph256_mask3_round ((__v16hf) __A,
-					       (__v16hf) __B,
-					       (__v16hf) __D,
-					       (__mmask16) __U,
-					       __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_fmsubadd_round_ph (__mmask16 __U, __m256h __A, __m256h __B,
-				__m256h __D, const int __R)
-{
-  return (__m256h)
-    __builtin_ia32_vfmsubaddph256_maskz_round ((__v16hf) __A,
-					       (__v16hf) __B,
-					       (__v16hf) __D,
-					       (__mmask16) __U,
-					       __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_fmsubadd_round_ps (__m256 __A, __m256 __B, __m256 __D, const int __R)
-{
-  return (__m256) __builtin_ia32_vfmsubaddps256_mask_round ((__v8sf) __A,
-							    (__v8sf) __B,
-							    (__v8sf) __D,
-							    (__mmask8) -1,
-							    __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_fmsubadd_round_ps (__m256 __A, __mmask8 __U, __m256 __B,
-			       __m256 __D, const int __R)
-{
-  return (__m256) __builtin_ia32_vfmsubaddps256_mask_round ((__v8sf) __A,
-							    (__v8sf) __B,
-							    (__v8sf) __D,
-							    (__mmask8) __U,
-							    __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask3_fmsubadd_round_ps (__m256 __A, __m256 __B, __m256 __D,
-				__mmask8 __U, const int __R)
-{
-  return (__m256) __builtin_ia32_vfmsubaddps256_mask3_round ((__v8sf) __A,
-							     (__v8sf) __B,
-							     (__v8sf) __D,
-							     (__mmask8) __U,
-							     __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_fmsubadd_round_ps (__mmask8 __U, __m256 __A, __m256 __B,
-				__m256 __D, const int __R)
-{
-  return (__m256) __builtin_ia32_vfmsubaddps256_maskz_round ((__v8sf) __A,
-							     (__v8sf) __B,
-							     (__v8sf) __D,
-							     (__mmask8) __U,
-							     __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_fmul_round_pch (__m256h __B, __m256h __D, const int __R)
-{
-  return (__m256h) __builtin_ia32_vfmulcph256_round ((__v16hf) __B,
-						     (__v16hf) __D,
-						     __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_fmul_round_pch (__m256h __A, __mmask8 __U, __m256h __B,
-			    __m256h __D, const int __R)
-{
-  return (__m256h) __builtin_ia32_vfmulcph256_mask_round ((__v16hf) __B,
-							  (__v16hf) __D,
-							  (__v16hf) __A,
-							  (__mmask16) __U,
-							  __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_fmul_round_pch (__mmask8 __U, __m256h __B, __m256h __D,
-			     const int __R)
-{
-  return (__m256h) __builtin_ia32_vfmulcph256_mask_round ((__v16hf) __B,
-							  (__v16hf) __D,
-							  (__v16hf)
-							  _mm256_setzero_ph (),
-							  (__mmask16) __U,
-							  __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_fnmadd_round_pd (__m256d __A, __m256d __B, __m256d __D, const int __R)
-{
-  return (__m256d) __builtin_ia32_vfnmaddpd256_mask_round ((__v4df) __A,
-							   (__v4df) __B,
-							   (__v4df) __D,
-							   (__mmask8) -1,
-							   __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_fnmadd_round_pd (__m256d __A, __mmask8 __U, __m256d __B,
-			     __m256d __D, const int __R)
-{
-  return (__m256d) __builtin_ia32_vfnmaddpd256_mask_round ((__v4df) __A,
-							   (__v4df) __B,
-							   (__v4df) __D,
-							   (__mmask8) __U,
-							   __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask3_fnmadd_round_pd (__m256d __A, __m256d __B, __m256d __D,
-			      __mmask8 __U, const int __R)
-{
-  return (__m256d) __builtin_ia32_vfnmaddpd256_mask3_round ((__v4df) __A,
-							    (__v4df) __B,
-							    (__v4df) __D,
-							    (__mmask8) __U,
-							    __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_fnmadd_round_pd (__mmask8 __U, __m256d __A, __m256d __B,
-			      __m256d __D, const int __R)
-{
-  return (__m256d) __builtin_ia32_vfnmaddpd256_maskz_round ((__v4df) __A,
-							    (__v4df) __B,
-							    (__v4df) __D,
-							    (__mmask8) __U,
-							    __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_fnmadd_round_ph (__m256h __A, __m256h __B, __m256h __D, const int __R)
-{
-  return (__m256h)
-    __builtin_ia32_vfnmaddph256_mask_round ((__v16hf) __A,
-					    (__v16hf) __B,
-					    (__v16hf) __D,
-					    (__mmask16) -1,
-					    __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_fnmadd_round_ph (__m256h __A, __mmask16 __U, __m256h __B,
-			     __m256h __D, const int __R)
-{
-  return (__m256h)
-    __builtin_ia32_vfnmaddph256_mask_round ((__v16hf) __A,
-					    (__v16hf) __B,
-					    (__v16hf) __D,
-					    (__mmask16) __U,
-					    __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask3_fnmadd_round_ph (__m256h __A, __m256h __B, __m256h __D,
-			      __mmask16 __U, const int __R)
-{
-  return (__m256h)
-    __builtin_ia32_vfnmaddph256_mask3_round ((__v16hf) __A,
-					     (__v16hf) __B,
-					     (__v16hf) __D,
-					     (__mmask16) __U,
-					     __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_fnmadd_round_ph (__mmask16 __U, __m256h __A, __m256h __B,
-			      __m256h __D, const int __R)
-{
-  return (__m256h)
-    __builtin_ia32_vfnmaddph256_maskz_round ((__v16hf) __A,
-					     (__v16hf) __B,
-					     (__v16hf) __D,
-					     (__mmask16) __U,
-					     __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_fnmadd_round_ps (__m256 __A, __m256 __B, __m256 __D, const int __R)
-{
-  return (__m256) __builtin_ia32_vfnmaddps256_mask_round ((__v8sf) __A,
-							  (__v8sf) __B,
-							  (__v8sf) __D,
-							  (__mmask8) -1,
-							  __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_fnmadd_round_ps (__m256 __A, __mmask8 __U, __m256 __B,
-			     __m256 __D, const int __R)
-{
-  return (__m256) __builtin_ia32_vfnmaddps256_mask_round ((__v8sf) __A,
-							  (__v8sf) __B,
-							  (__v8sf) __D,
-							  (__mmask8) __U,
-							  __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask3_fnmadd_round_ps (__m256 __A, __m256 __B, __m256 __D,
-			      __mmask8 __U, const int __R)
-{
-  return (__m256) __builtin_ia32_vfnmaddps256_mask3_round ((__v8sf) __A,
-							   (__v8sf) __B,
-							   (__v8sf) __D,
-							   (__mmask8) __U,
-							   __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_fnmadd_round_ps (__mmask8 __U, __m256 __A, __m256 __B,
-			      __m256 __D, const int __R)
-{
-  return (__m256) __builtin_ia32_vfnmaddps256_maskz_round ((__v8sf) __A,
-							   (__v8sf) __B,
-							   (__v8sf) __D,
-							   (__mmask8) __U,
-							   __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_fnmsub_round_pd (__m256d __A, __m256d __B, __m256d __D, const int __R)
-{
-  return (__m256d) __builtin_ia32_vfnmsubpd256_mask_round ((__v4df) __A,
-							   (__v4df) __B,
-							   (__v4df) __D,
-							   (__mmask8) -1,
-							   __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_fnmsub_round_pd (__m256d __A, __mmask8 __U, __m256d __B,
-			     __m256d __D, const int __R)
-{
-  return (__m256d) __builtin_ia32_vfnmsubpd256_mask_round ((__v4df) __A,
-							   (__v4df) __B,
-							   (__v4df) __D,
-							   (__mmask8) __U,
-							   __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask3_fnmsub_round_pd (__m256d __A, __m256d __B, __m256d __D,
-			      __mmask8 __U, const int __R)
-{
-  return (__m256d) __builtin_ia32_vfnmsubpd256_mask3_round ((__v4df) __A,
-							    (__v4df) __B,
-							    (__v4df) __D,
-							    (__mmask8) __U,
-							    __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_fnmsub_round_pd (__mmask8 __U, __m256d __A, __m256d __B,
-			      __m256d __D, const int __R)
-{
-  return (__m256d) __builtin_ia32_vfnmsubpd256_maskz_round ((__v4df) __A,
-							    (__v4df) __B,
-							    (__v4df) __D,
-							    (__mmask8) __U,
-							    __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_fnmsub_round_ph (__m256h __A, __m256h __B, __m256h __D, const int __R)
-{
-  return (__m256h)
-    __builtin_ia32_vfnmsubph256_mask_round ((__v16hf) __A,
-					    (__v16hf) __B,
-					    (__v16hf) __D,
-					    (__mmask16) -1,
-					    __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_fnmsub_round_ph (__m256h __A, __mmask16 __U, __m256h __B,
-			     __m256h __D, const int __R)
-{
-  return (__m256h)
-    __builtin_ia32_vfnmsubph256_mask_round ((__v16hf) __A,
-					    (__v16hf) __B,
-					    (__v16hf) __D,
-					    (__mmask16) __U,
-					    __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask3_fnmsub_round_ph (__m256h __A, __m256h __B, __m256h __D,
-			      __mmask16 __U, const int __R)
-{
-  return (__m256h)
-    __builtin_ia32_vfnmsubph256_mask3_round ((__v16hf) __A,
-					     (__v16hf) __B,
-					     (__v16hf) __D,
-					     (__mmask16) __U,
-					     __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_fnmsub_round_ph (__mmask16 __U, __m256h __A, __m256h __B,
-			      __m256h __D, const int __R)
-{
-  return (__m256h)
-    __builtin_ia32_vfnmsubph256_maskz_round ((__v16hf) __A,
-					     (__v16hf) __B,
-					     (__v16hf) __D,
-					     (__mmask16) __U,
-					     __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_fnmsub_round_ps (__m256 __A, __m256 __B, __m256 __D, const int __R)
-{
-  return (__m256) __builtin_ia32_vfnmsubps256_mask_round ((__v8sf) __A,
-							  (__v8sf) __B,
-							  (__v8sf) __D,
-							  (__mmask8) -1,
-							  __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_fnmsub_round_ps (__m256 __A, __mmask8 __U, __m256 __B,
-			     __m256 __D, const int __R)
-{
-  return (__m256) __builtin_ia32_vfnmsubps256_mask_round ((__v8sf) __A,
-							  (__v8sf) __B,
-							  (__v8sf) __D,
-							  (__mmask8) __U,
-							  __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask3_fnmsub_round_ps (__m256 __A, __m256 __B, __m256 __D,
-			      __mmask8 __U, const int __R)
-{
-  return (__m256) __builtin_ia32_vfnmsubps256_mask3_round ((__v8sf) __A,
-							   (__v8sf) __B,
-							   (__v8sf) __D,
-							   (__mmask8) __U,
-							   __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_fnmsub_round_ps (__mmask8 __U, __m256 __A, __m256 __B,
-			      __m256 __D, const int __R)
-{
-  return (__m256) __builtin_ia32_vfnmsubps256_maskz_round ((__v8sf) __A,
-							   (__v8sf) __B,
-							   (__v8sf) __D,
-							   (__mmask8) __U,
-							   __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_getexp_round_pd (__m256d __A, const int __R)
-{
-  return
-    (__m256d) __builtin_ia32_getexppd256_mask_round ((__v4df) __A,
-						     (__v4df)
-						     _mm256_undefined_pd (),
-						     (__mmask8) -1,
-						     __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_getexp_round_pd (__m256d __W, __mmask8 __U, __m256d __A,
-			     const int __R)
-{
-  return (__m256d) __builtin_ia32_getexppd256_mask_round ((__v4df) __A,
-							  (__v4df) __W,
-							  (__mmask8) __U,
-							  __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_getexp_round_pd (__mmask8 __U, __m256d __A, const int __R)
-{
-  return (__m256d) __builtin_ia32_getexppd256_mask_round ((__v4df) __A,
-							  (__v4df)
-							  _mm256_setzero_pd (),
-							  (__mmask8) __U,
-							  __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_getexp_round_ph (__m256h __A, const int __R)
-{
-  return (__m256h) __builtin_ia32_getexpph256_mask_round ((__v16hf) __A,
-							  (__v16hf)
-							  _mm256_setzero_ph (),
-							  (__mmask16) -1,
-							  __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_getexp_round_ph (__m256h __W, __mmask16 __U, __m256h __A,
-			     const int __R)
-{
-  return (__m256h) __builtin_ia32_getexpph256_mask_round ((__v16hf) __A,
-							  (__v16hf) __W,
-							  (__mmask16) __U,
-							  __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_getexp_round_ph (__mmask16 __U, __m256h __A, const int __R)
-{
-  return (__m256h) __builtin_ia32_getexpph256_mask_round ((__v16hf) __A,
-							  (__v16hf)
-							  _mm256_setzero_ph (),
-							  (__mmask16) __U,
-							  __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_getexp_round_ps (__m256 __A, const int __R)
-{
-  return (__m256) __builtin_ia32_getexpps256_mask_round ((__v8sf) __A,
-							 (__v8sf)
-							 _mm256_undefined_ps (),
-							 (__mmask8) -1,
-							 __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_getexp_round_ps (__m256 __W, __mmask8 __U, __m256 __A,
-			     const int __R)
-{
-  return (__m256) __builtin_ia32_getexpps256_mask_round ((__v8sf) __A,
-							 (__v8sf) __W,
-							 (__mmask8) __U,
-							 __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_getexp_round_ps (__mmask8 __U, __m256 __A, const int __R)
-{
-  return (__m256) __builtin_ia32_getexpps256_mask_round ((__v8sf) __A,
-							 (__v8sf)
-							 _mm256_setzero_ps (),
-							 (__mmask8) __U,
-							 __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_getmant_round_pd (__m256d __A, _MM_MANTISSA_NORM_ENUM __B,
-			 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
-{
-  return
-    (__m256d) __builtin_ia32_getmantpd256_mask_round ((__v4df) __A,
-						      (__C << 2) | __B,
-						      _mm256_undefined_pd (),
-						      (__mmask8) -1, __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_getmant_round_pd (__m256d __W, __mmask8 __U, __m256d __A,
-			      _MM_MANTISSA_NORM_ENUM __B,
-			      _MM_MANTISSA_SIGN_ENUM __C, const int __R)
-{
-  return (__m256d) __builtin_ia32_getmantpd256_mask_round ((__v4df) __A,
-							   (__C << 2) | __B,
-							   (__v4df) __W, __U,
-							   __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_getmant_round_pd (__mmask8 __U, __m256d __A,
-			       _MM_MANTISSA_NORM_ENUM __B,
-			       _MM_MANTISSA_SIGN_ENUM __C, const int __R)
-{
-  return (__m256d) __builtin_ia32_getmantpd256_mask_round ((__v4df) __A,
-							   (__C << 2) | __B,
-							   (__v4df)
-							   _mm256_setzero_pd (),
-							   __U, __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_getmant_round_ph (__m256h __A, _MM_MANTISSA_NORM_ENUM __B,
-			 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
-{
-  return
-    (__m256h) __builtin_ia32_getmantph256_mask_round ((__v16hf) __A,
-						      (__C << 2) | __B,
-						      _mm256_undefined_ph (),
-						      (__mmask16) -1, __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_getmant_round_ph (__m256h __W, __mmask16 __U, __m256h __A,
-			      _MM_MANTISSA_NORM_ENUM __B,
-			      _MM_MANTISSA_SIGN_ENUM __C, const int __R)
-{
-  return (__m256h) __builtin_ia32_getmantph256_mask_round ((__v16hf) __A,
-							   (__C << 2) | __B,
-							   (__v16hf) __W, __U,
-							   __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_getmant_round_ph (__mmask8 __U, __m256h __A,
-			       _MM_MANTISSA_NORM_ENUM __B,
-			       _MM_MANTISSA_SIGN_ENUM __C, const int __R)
-{
-  return (__m256h) __builtin_ia32_getmantph256_mask_round ((__v16hf) __A,
-							   (__C << 2) | __B,
-							   (__v16hf)
-							   _mm256_setzero_ph (),
-							   __U, __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_getmant_round_ps (__m256 __A, _MM_MANTISSA_NORM_ENUM __B,
-			 _MM_MANTISSA_SIGN_ENUM __C, const int __R)
-{
-  return
-    (__m256) __builtin_ia32_getmantps256_mask_round ((__v8sf) __A,
-						     (__C << 2) | __B,
-						     _mm256_undefined_ps (),
-						     (__mmask8) -1, __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_getmant_round_ps (__m256 __W, __mmask8 __U, __m256 __A,
-			      _MM_MANTISSA_NORM_ENUM __B,
-			      _MM_MANTISSA_SIGN_ENUM __C, const int __R)
-{
-  return (__m256) __builtin_ia32_getmantps256_mask_round ((__v8sf) __A,
-							  (__C << 2) | __B,
-							  (__v8sf) __W, __U,
-							  __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_getmant_round_ps (__mmask8 __U, __m256 __A,
-			       _MM_MANTISSA_NORM_ENUM __B,
-			       _MM_MANTISSA_SIGN_ENUM __C, const int __R)
-{
-  return (__m256) __builtin_ia32_getmantps256_mask_round ((__v8sf) __A,
-							  (__C << 2) | __B,
-							  (__v8sf)
-							  _mm256_setzero_ps (),
-							  __U, __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_max_round_pd (__m256d __A, __m256d __B, const int __R)
-{
-  return (__m256d) __builtin_ia32_maxpd256_mask_round ((__v4df) __A,
-						       (__v4df) __B,
-						       (__v4df)
-						       _mm256_undefined_pd (),
-						       (__mmask8) -1,
-						       __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_max_round_pd (__m256d __W, __mmask8 __U, __m256d __A,
-			  __m256d __B, const int __R)
-{
-  return (__m256d) __builtin_ia32_maxpd256_mask_round ((__v4df) __A,
-						       (__v4df) __B,
-						       (__v4df) __W,
-						       (__mmask8) __U,
-						       __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_max_round_pd (__mmask8 __U, __m256d __A, __m256d __B,
-			   const int __R)
-{
-  return (__m256d) __builtin_ia32_maxpd256_mask_round ((__v4df) __A,
-						       (__v4df) __B,
-						       (__v4df)
-						       _mm256_setzero_pd (),
-						       (__mmask8) __U,
-						       __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_max_round_ph (__m256h __A, __m256h __B, const int __R)
-{
-  return (__m256h) __builtin_ia32_maxph256_mask_round ((__v16hf) __A,
-						       (__v16hf) __B,
-						       (__v16hf)
-						       _mm256_undefined_ph (),
-						       (__mmask16) -1,
-						       __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_max_round_ph (__m256h __W, __mmask16 __U, __m256h __A,
-			  __m256h __B, const int __R)
-{
-  return (__m256h) __builtin_ia32_maxph256_mask_round ((__v16hf) __A,
-						       (__v16hf) __B,
-						       (__v16hf) __W,
-						       (__mmask16) __U,
-						       __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_max_round_ph (__mmask16 __U, __m256h __A, __m256h __B,
-			   const int __R)
-{
-  return (__m256h) __builtin_ia32_maxph256_mask_round ((__v16hf) __A,
-						       (__v16hf) __B,
-						       (__v16hf)
-						       _mm256_setzero_ph (),
-						       (__mmask16) __U,
-						       __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_max_round_ps (__m256 __A, __m256 __B, const int __R)
-{
-  return (__m256) __builtin_ia32_maxps256_mask_round ((__v8sf) __A,
-						      (__v8sf) __B,
-						      (__v8sf)
-						      _mm256_undefined_ps (),
-						      (__mmask8) -1,
-						      __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_max_round_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B,
-			  const int __R)
-{
-  return (__m256) __builtin_ia32_maxps256_mask_round ((__v8sf) __A,
-						      (__v8sf) __B,
-						      (__v8sf) __W,
-						      (__mmask8) __U,
-						      __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_max_round_ps (__mmask8 __U, __m256 __A, __m256 __B,
-			   const int __R)
-{
-  return (__m256) __builtin_ia32_maxps256_mask_round ((__v8sf) __A,
-						      (__v8sf) __B,
-						      (__v8sf)
-						      _mm256_setzero_ps (),
-						      (__mmask8) __U,
-						      __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_min_round_pd (__m256d __A, __m256d __B, const int __R)
-{
-  return (__m256d) __builtin_ia32_minpd256_mask_round ((__v4df) __A,
-						       (__v4df) __B,
-						       (__v4df)
-						       _mm256_undefined_pd (),
-						       (__mmask8) -1,
-						       __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_min_round_pd (__m256d __W, __mmask8 __U, __m256d __A,
-			  __m256d __B, const int __R)
-{
-  return (__m256d) __builtin_ia32_minpd256_mask_round ((__v4df) __A,
-						       (__v4df) __B,
-						       (__v4df) __W,
-						       (__mmask8) __U,
-						       __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_min_round_pd (__mmask8 __U, __m256d __A, __m256d __B,
-			   const int __R)
-{
-  return (__m256d) __builtin_ia32_minpd256_mask_round ((__v4df) __A,
-						       (__v4df) __B,
-						       (__v4df)
-						       _mm256_setzero_pd (),
-						       (__mmask8) __U,
-						       __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_min_round_ph (__m256h __A, __m256h __B, const int __R)
-{
-  return (__m256h) __builtin_ia32_minph256_mask_round ((__v16hf) __A,
-						       (__v16hf) __B,
-						       (__v16hf)
-						       _mm256_undefined_ph (),
-						       (__mmask16) -1,
-						       __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_min_round_ph (__m256h __W, __mmask16 __U, __m256h __A,
-			  __m256h __B, const int __R)
-{
-  return (__m256h) __builtin_ia32_minph256_mask_round ((__v16hf) __A,
-						       (__v16hf) __B,
-						       (__v16hf) __W,
-						       (__mmask16) __U,
-						       __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_min_round_ph (__mmask16 __U, __m256h __A, __m256h __B,
-			   const int __R)
-{
-  return (__m256h) __builtin_ia32_minph256_mask_round ((__v16hf) __A,
-						       (__v16hf) __B,
-						       (__v16hf)
-						       _mm256_setzero_ph (),
-						       (__mmask16) __U,
-						       __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_min_round_ps (__m256 __A, __m256 __B, const int __R)
-{
-  return (__m256) __builtin_ia32_minps256_mask_round ((__v8sf) __A,
-						      (__v8sf) __B,
-						      (__v8sf)
-						      _mm256_undefined_ps (),
-						      (__mmask8) -1,
-						      __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_min_round_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B,
-			  const int __R)
-{
-  return (__m256) __builtin_ia32_minps256_mask_round ((__v8sf) __A,
-						      (__v8sf) __B,
-						      (__v8sf) __W,
-						      (__mmask8) __U,
-						      __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_min_round_ps (__mmask8 __U, __m256 __A, __m256 __B,
-			   const int __R)
-{
-  return (__m256) __builtin_ia32_minps256_mask_round ((__v8sf) __A,
-						      (__v8sf) __B,
-						      (__v8sf)
-						      _mm256_setzero_ps (),
-						      (__mmask8) __U,
-						      __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mul_round_pd (__m256d __A, __m256d __B, const int __R)
-{
-  return (__m256d) __builtin_ia32_mulpd256_mask_round ((__v4df) __A,
-						       (__v4df) __B,
-						       (__v4df)
-						       _mm256_undefined_pd (),
-						       (__mmask8) -1,
-						       __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_mul_round_pd (__m256d __W, __mmask8 __U, __m256d __A,
-			  __m256d __B, const int __R)
-{
-  return (__m256d) __builtin_ia32_mulpd256_mask_round ((__v4df) __A,
-						       (__v4df) __B,
-						       (__v4df) __W,
-						       (__mmask8) __U,
-						       __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_mul_round_pd (__mmask8 __U, __m256d __A, __m256d __B,
-			   const int __R)
-{
-  return (__m256d) __builtin_ia32_mulpd256_mask_round ((__v4df) __A,
-						       (__v4df) __B,
-						       (__v4df)
-						       _mm256_setzero_pd (),
-						       (__mmask8) __U,
-						       __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mul_round_ph (__m256h __A, __m256h __B, const int __R)
-{
-  return (__m256h) __builtin_ia32_mulph256_mask_round ((__v16hf) __A,
-						       (__v16hf) __B,
-						       (__v16hf)
-						       _mm256_undefined_ph (),
-						       (__mmask16) -1,
-						       __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_mul_round_ph (__m256h __W, __mmask16 __U, __m256h __A,
-			  __m256h __B, const int __R)
-{
-  return (__m256h) __builtin_ia32_mulph256_mask_round ((__v16hf) __A,
-						       (__v16hf) __B,
-						       (__v16hf) __W,
-						       (__mmask16) __U,
-						       __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_mul_round_ph (__mmask16 __U, __m256h __A, __m256h __B,
-			   const int __R)
-{
-  return (__m256h) __builtin_ia32_mulph256_mask_round ((__v16hf) __A,
-						       (__v16hf) __B,
-						       (__v16hf)
-						       _mm256_setzero_ph (),
-						       (__mmask16) __U,
-						       __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mul_round_ps (__m256 __A, __m256 __B, const int __R)
-{
-  return (__m256) __builtin_ia32_mulps256_mask_round ((__v8sf) __A,
-						      (__v8sf) __B,
-						      (__v8sf)
-						      _mm256_undefined_ps (),
-						      (__mmask8) -1,
-						      __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_mul_round_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B,
-			  const int __R)
-{
-  return (__m256) __builtin_ia32_mulps256_mask_round ((__v8sf) __A,
-						      (__v8sf) __B,
-						      (__v8sf) __W,
-						      (__mmask8) __U,
-						      __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_mul_round_ps (__mmask8 __U, __m256 __A, __m256 __B,
-			   const int __R)
-{
-  return (__m256) __builtin_ia32_mulps256_mask_round ((__v8sf) __A,
-						      (__v8sf) __B,
-						      (__v8sf)
-						      _mm256_setzero_ps (),
-						      (__mmask8) __U,
-						      __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_range_round_pd (__m256d __A, __m256d __B, const int __C,
-		       const int __R)
-{
-  return (__m256d) __builtin_ia32_rangepd256_mask_round ((__v4df) __A,
-							 (__v4df) __B,
-							 __C,
-							 (__v4df)
-							 _mm256_setzero_pd (),
-							 (__mmask8) -1,
-							 __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_range_round_pd (__m256d __W, __mmask8 __U, __m256d __A,
-			    __m256d __B, const int __C, const int __R)
-{
-  return (__m256d) __builtin_ia32_rangepd256_mask_round ((__v4df) __A,
-							 (__v4df) __B,
-							 __C,
-							 (__v4df) __W,
-							 (__mmask8) __U,
-							 __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_range_round_pd (__mmask8 __U, __m256d __A, __m256d __B,
-			     const int __C, const int __R)
-{
-  return (__m256d) __builtin_ia32_rangepd256_mask_round ((__v4df) __A,
-							 (__v4df) __B,
-							 __C,
-							 (__v4df)
-							 _mm256_setzero_pd (),
-							 (__mmask8) __U,
-							 __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_range_round_ps (__m256 __A, __m256 __B, const int __C, const int __R)
-{
-  return (__m256) __builtin_ia32_rangeps256_mask_round ((__v8sf) __A,
-							(__v8sf) __B,
-							__C,
-							(__v8sf)
-							_mm256_setzero_ps (),
-							(__mmask8) -1,
-							__R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_range_round_ps (__m256 __W, __mmask8 __U, __m256 __A,
-			    __m256 __B, const int __C, const int __R)
-{
-  return (__m256) __builtin_ia32_rangeps256_mask_round ((__v8sf) __A,
-							(__v8sf) __B,
-							__C,
-							(__v8sf) __W,
-							(__mmask8) __U,
-							__R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_range_round_ps (__mmask8 __U, __m256 __A, __m256 __B,
-			     const int __C, const int __R)
-{
-  return (__m256) __builtin_ia32_rangeps256_mask_round ((__v8sf) __A,
-							(__v8sf) __B,
-							__C,
-							(__v8sf)
-							_mm256_setzero_ps (),
-							(__mmask8) __U,
-							__R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_reduce_round_pd (__m256d __A, const int __C, const int __R)
-{
-  return (__m256d) __builtin_ia32_reducepd256_mask_round ((__v4df) __A,
-							  __C,
-							  (__v4df)
-							  _mm256_setzero_pd (),
-							  (__mmask8) -1,
-							  __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_reduce_round_pd (__m256d __W, __mmask8 __U, __m256d __A,
-			     const int __C, const int __R)
-{
-  return (__m256d) __builtin_ia32_reducepd256_mask_round ((__v4df) __A,
-							  __C,
-							  (__v4df) __W,
-							  (__mmask8) __U,
-							  __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_reduce_round_pd (__mmask8 __U, __m256d __A, const int __C,
-			      const int __R)
-{
-  return (__m256d) __builtin_ia32_reducepd256_mask_round ((__v4df) __A,
-							  __C,
-							  (__v4df)
-							  _mm256_setzero_pd (),
-							  (__mmask8) __U,
-							  __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_reduce_round_ph (__m256h __A, const int __C, const int __R)
-{
-  return (__m256h) __builtin_ia32_reduceph256_mask_round ((__v16hf) __A,
-							  __C,
-							  (__v16hf)
-							  _mm256_setzero_ph (),
-							  (__mmask16) -1,
-							  __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_reduce_round_ph (__m256h __W, __mmask16 __U, __m256h __A,
-			     const int __C, const int __R)
-{
-  return (__m256h) __builtin_ia32_reduceph256_mask_round ((__v16hf) __A,
-							  __C,
-							  (__v16hf) __W,
-							  (__mmask16) __U,
-							  __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_reduce_round_ph (__mmask16 __U, __m256h __A, const int __C,
-			      const int __R)
-{
-  return (__m256h) __builtin_ia32_reduceph256_mask_round ((__v16hf) __A,
-							  __C,
-							  (__v16hf)
-							  _mm256_setzero_ph (),
-							  (__mmask16) __U,
-							  __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_reduce_round_ps (__m256 __A, const int __C, const int __R)
-{
-  return (__m256) __builtin_ia32_reduceps256_mask_round ((__v8sf) __A,
-							 __C,
-							 (__v8sf)
-							 _mm256_setzero_ps (),
-							 (__mmask8) -1,
-							 __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_reduce_round_ps (__m256 __W, __mmask8 __U, __m256 __A,
-			     const int __C, const int __R)
-{
-  return (__m256) __builtin_ia32_reduceps256_mask_round ((__v8sf) __A,
-							 __C,
-							 (__v8sf) __W,
-							 (__mmask8) __U,
-							 __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_reduce_round_ps (__mmask8 __U, __m256 __A, const int __C,
-			      const int __R)
-{
-  return (__m256) __builtin_ia32_reduceps256_mask_round ((__v8sf) __A,
-							 __C,
-							 (__v8sf)
-							 _mm256_setzero_ps (),
-							 (__mmask8) __U,
-							 __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_roundscale_round_pd (__m256d __A, const int __C, const int __R)
-{
-  return
-    (__m256d) __builtin_ia32_rndscalepd256_mask_round ((__v4df) __A,
-						       __C,
-						       (__v4df)
-						       _mm256_undefined_pd (),
-						       (__mmask8) -1,
-						       __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_roundscale_round_pd (__m256d __W, __mmask8 __U, __m256d __A,
-				 const int __C, const int __R)
-{
-  return (__m256d) __builtin_ia32_rndscalepd256_mask_round ((__v4df) __A,
-							    __C,
-							    (__v4df) __W,
-							    (__mmask8) __U,
-							    __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_roundscale_round_pd (__mmask8 __U, __m256d __A, const int __C,
-				  const int __R)
-{
-  return
-    (__m256d) __builtin_ia32_rndscalepd256_mask_round ((__v4df) __A,
-						       __C,
-						       (__v4df)
-						       _mm256_setzero_pd (),
-						       (__mmask8) __U,
-						       __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_roundscale_round_ph (__m256h __A, const int __C, const int __R)
-{
-  return
-    (__m256h) __builtin_ia32_rndscaleph256_mask_round ((__v16hf) __A,
-						       __C,
-						       (__v16hf)
-						       _mm256_undefined_ph (),
-						       (__mmask16) -1,
-						       __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_roundscale_round_ph (__m256h __W, __mmask16 __U, __m256h __A,
-				 const int __C, const int __R)
-{
-  return (__m256h) __builtin_ia32_rndscaleph256_mask_round ((__v16hf) __A,
-							    __C,
-							    (__v16hf) __W,
-							    (__mmask16) __U,
-							    __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_roundscale_round_ph (__mmask16 __U, __m256h __A, const int __C,
-				  const int __R)
-{
-  return
-    (__m256h) __builtin_ia32_rndscaleph256_mask_round ((__v16hf) __A,
-						       __C,
-						       (__v16hf)
-						       _mm256_setzero_ph (),
-						       (__mmask16) __U,
-						       __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_roundscale_round_ps (__m256 __A, const int __C, const int __R)
-{
-  return
-    (__m256) __builtin_ia32_rndscaleps256_mask_round ((__v8sf) __A,
-						      __C,
-						      (__v8sf)
-						      _mm256_undefined_ps (),
-						      (__mmask8) -1,
-						      __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_roundscale_round_ps (__m256 __W, __mmask8 __U, __m256 __A,
-				 const int __C, const int __R)
-{
-  return (__m256) __builtin_ia32_rndscaleps256_mask_round ((__v8sf) __A,
-							   __C,
-							   (__v8sf) __W,
-							   (__mmask8) __U,
-							   __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_roundscale_round_ps (__mmask8 __U, __m256 __A, const int __C,
-				  const int __R)
-{
-  return (__m256) __builtin_ia32_rndscaleps256_mask_round ((__v8sf) __A,
-							   __C,
-							   (__v8sf)
-							   _mm256_setzero_ps (),
-							   (__mmask8) __U,
-							   __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_scalef_round_pd (__m256d __A, __m256d __B, const int __R)
-{
-  return
-    (__m256d) __builtin_ia32_scalefpd256_mask_round ((__v4df) __A,
-						     (__v4df) __B,
-						     (__v4df)
-						     _mm256_undefined_pd (),
-						     (__mmask8) -1,
-						     __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_scalef_round_pd (__m256d __W, __mmask8 __U, __m256d __A,
-			     __m256d __B, const int __R)
-{
-  return (__m256d) __builtin_ia32_scalefpd256_mask_round ((__v4df) __A,
-							  (__v4df) __B,
-							  (__v4df) __W,
-							  (__mmask8) __U,
-							  __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_scalef_round_pd (__mmask8 __U, __m256d __A, __m256d __B,
-			      const int __R)
-{
-  return (__m256d) __builtin_ia32_scalefpd256_mask_round ((__v4df) __A,
-							  (__v4df) __B,
-							  (__v4df)
-							  _mm256_setzero_pd (),
-							  (__mmask8) __U,
-							  __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_scalef_round_ph (__m256h __A, __m256h __B, const int __R)
-{
-  return
-    (__m256h) __builtin_ia32_scalefph256_mask_round ((__v16hf) __A,
-						     (__v16hf) __B,
-						     (__v16hf)
-						     _mm256_undefined_ph (),
-						     (__mmask16) -1,
-						     __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_scalef_round_ph (__m256h __W, __mmask16 __U, __m256h __A,
-			     __m256h __B, const int __R)
-{
-  return (__m256h) __builtin_ia32_scalefph256_mask_round ((__v16hf) __A,
-							  (__v16hf) __B,
-							  (__v16hf) __W,
-							  (__mmask16) __U,
-							  __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_scalef_round_ph (__mmask16 __U, __m256h __A, __m256h __B,
-			      const int __R)
-{
-  return (__m256h) __builtin_ia32_scalefph256_mask_round ((__v16hf) __A,
-							  (__v16hf) __B,
-							  (__v16hf)
-							  _mm256_setzero_ph (),
-							  (__mmask16) __U,
-							  __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_scalef_round_ps (__m256 __A, __m256 __B, const int __R)
-{
-  return (__m256) __builtin_ia32_scalefps256_mask_round ((__v8sf) __A,
-							 (__v8sf) __B,
-							 (__v8sf)
-							 _mm256_undefined_ps (),
-							 (__mmask8) -1,
-							 __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_scalef_round_ps (__m256 __W, __mmask8 __U, __m256 __A,
-			     __m256 __B, const int __R)
-{
-  return (__m256) __builtin_ia32_scalefps256_mask_round ((__v8sf) __A,
-							 (__v8sf) __B,
-							 (__v8sf) __W,
-							 (__mmask8) __U,
-							 __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_scalef_round_ps (__mmask8 __U, __m256 __A, __m256 __B,
-			      const int __R)
-{
-  return (__m256) __builtin_ia32_scalefps256_mask_round ((__v8sf) __A,
-							 (__v8sf) __B,
-							 (__v8sf)
-							 _mm256_setzero_ps (),
-							 (__mmask8) __U,
-							 __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_sqrt_round_pd (__m256d __A, const int __R)
-{
-  return (__m256d) __builtin_ia32_sqrtpd256_mask_round ((__v4df) __A,
-							(__v4df)
-							_mm256_undefined_pd (),
-							(__mmask8) -1,
-							__R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_sqrt_round_pd (__m256d __W, __mmask8 __U, __m256d __A,
-			   const int __R)
-{
-  return (__m256d) __builtin_ia32_sqrtpd256_mask_round ((__v4df) __A,
-							(__v4df) __W,
-							(__mmask8) __U,
-							__R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_sqrt_round_pd (__mmask8 __U, __m256d __A, const int __R)
-{
-  return (__m256d) __builtin_ia32_sqrtpd256_mask_round ((__v4df) __A,
-							(__v4df)
-							_mm256_setzero_pd (),
-							(__mmask8) __U,
-							__R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_sqrt_round_ph (__m256h __A, const int __R)
-{
-  return (__m256h) __builtin_ia32_sqrtph256_mask_round ((__v16hf) __A,
-							(__v16hf)
-							_mm256_undefined_ph (),
-							(__mmask16) -1,
-							__R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_sqrt_round_ph (__m256h __W, __mmask16 __U, __m256h __A,
-			   const int __R)
-{
-  return (__m256h) __builtin_ia32_sqrtph256_mask_round ((__v16hf) __A,
-							(__v16hf) __W,
-							(__mmask16) __U,
-							__R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_sqrt_round_ph (__mmask16 __U, __m256h __A, const int __R)
-{
-  return (__m256h) __builtin_ia32_sqrtph256_mask_round ((__v16hf) __A,
-							(__v16hf)
-							_mm256_setzero_ph (),
-							(__mmask16) __U,
-							__R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_sqrt_round_ps (__m256 __A, const int __R)
-{
-  return (__m256) __builtin_ia32_sqrtps256_mask_round ((__v8sf) __A,
-						       (__v8sf)
-						       _mm256_undefined_ps (),
-						       (__mmask8) -1,
-						       __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_sqrt_round_ps (__m256 __W, __mmask8 __U, __m256 __A,
-			   const int __R)
-{
-  return (__m256) __builtin_ia32_sqrtps256_mask_round ((__v8sf) __A,
-						       (__v8sf) __W,
-						       (__mmask8) __U,
-						       __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_sqrt_round_ps (__mmask8 __U, __m256 __A, const int __R)
-{
-  return (__m256) __builtin_ia32_sqrtps256_mask_round ((__v8sf) __A,
-						       (__v8sf)
-						       _mm256_setzero_ps (),
-						       (__mmask8) __U,
-						       __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_sub_round_pd (__m256d __A, __m256d __B, const int __R)
-{
-  return (__m256d) __builtin_ia32_subpd256_mask_round ((__v4df) __A,
-						       (__v4df) __B,
-						       (__v4df)
-						       _mm256_undefined_pd (),
-						       (__mmask8) -1,
-						       __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_sub_round_pd (__m256d __W, __mmask8 __U, __m256d __A,
-			  __m256d __B, const int __R)
-{
-  return (__m256d) __builtin_ia32_subpd256_mask_round ((__v4df) __A,
-						       (__v4df) __B,
-						       (__v4df) __W,
-						       (__mmask8) __U,
-						       __R);
-}
-
-extern __inline __m256d
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_sub_round_pd (__mmask8 __U, __m256d __A, __m256d __B,
-			   const int __R)
-{
-  return (__m256d) __builtin_ia32_subpd256_mask_round ((__v4df) __A,
-						       (__v4df) __B,
-						       (__v4df)
-						       _mm256_setzero_pd (),
-						       (__mmask8) __U,
-						       __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_sub_round_ph (__m256h __A, __m256h __B, const int __R)
-{
-  return (__m256h) __builtin_ia32_subph256_mask_round ((__v16hf) __A,
-						       (__v16hf) __B,
-						       (__v16hf)
-						       _mm256_undefined_ph (),
-						       (__mmask16) -1,
-						       __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_sub_round_ph (__m256h __W, __mmask16 __U, __m256h __A,
-			  __m256h __B, const int __R)
-{
-  return (__m256h) __builtin_ia32_subph256_mask_round ((__v16hf) __A,
-						       (__v16hf) __B,
-						       (__v16hf) __W,
-						       (__mmask16) __U,
-						       __R);
-}
-
-extern __inline __m256h
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_sub_round_ph (__mmask16 __U, __m256h __A, __m256h __B,
-			   const int __R)
-{
-  return (__m256h) __builtin_ia32_subph256_mask_round ((__v16hf) __A,
-						       (__v16hf) __B,
-						       (__v16hf)
-						       _mm256_setzero_ph (),
-						       (__mmask16) __U,
-						       __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_sub_round_ps (__m256 __A, __m256 __B, const int __R)
-{
-  return (__m256) __builtin_ia32_subps256_mask_round ((__v8sf) __A,
-						      (__v8sf) __B,
-						      (__v8sf)
-						      _mm256_undefined_ps (),
-						      (__mmask8) -1,
-						      __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_sub_round_ps (__m256 __W, __mmask8 __U, __m256 __A, __m256 __B,
-			  const int __R)
-{
-  return (__m256) __builtin_ia32_subps256_mask_round ((__v8sf) __A,
-						      (__v8sf) __B,
-						      (__v8sf) __W,
-						      (__mmask8) __U,
-						      __R);
-}
-
-extern __inline __m256
-__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_sub_round_ps (__mmask8 __U, __m256 __A, __m256 __B,
-			   const int __R)
-{
-  return (__m256) __builtin_ia32_subps256_mask_round ((__v8sf) __A,
-						      (__v8sf) __B,
-						      (__v8sf)
-						      _mm256_setzero_ps (),
-						      (__mmask8) __U,
-						      __R);
-}
-#else
-#define _mm256_add_round_pd(A, B, R) \
-  ((__m256d) __builtin_ia32_addpd256_mask_round ((__v4df) (A), \
-						 (__v4df) (B), \
-						 (__v4df) \
-						 (_mm256_undefined_pd ()), \
-						 (__mmask8) (-1), \
-						 (R)))
-
-#define _mm256_mask_add_round_pd(W, U, A, B, R) \
-  ((__m256d) __builtin_ia32_addpd256_mask_round ((__v4df) (A), \
-						 (__v4df) (B), \
-						 (__v4df) (W), \
-						 (__mmask8) (U), \
-						 (R)))
-
-#define _mm256_maskz_add_round_pd(U, A, B, R) \
-  ((__m256d) __builtin_ia32_addpd256_mask_round ((__v4df) (A), \
-						 (__v4df) (B), \
-						 (__v4df) \
-						 (_mm256_setzero_pd ()), \
-						 (__mmask8) (U), \
-						 (R)))
-
-#define _mm256_add_round_ph(A, B, R) \
-  ((__m256h) __builtin_ia32_addph256_mask_round ((__v16hf) (A), \
-						 (__v16hf) (B), \
-						 (__v16hf) \
-						 (_mm256_undefined_ph ()), \
-						 (__mmask16) (-1), \
-						 (R)))
-
-#define _mm256_mask_add_round_ph(W, U, A, B, R) \
-  ((__m256h) __builtin_ia32_addph256_mask_round ((__v16hf) (A), \
-						 (__v16hf) (B), \
-						 (__v16hf) (W), \
-						 (__mmask16) (U), \
-						 (R)))
-
-#define _mm256_maskz_add_round_ph(U, A, B, R) \
-  ((__m256h) __builtin_ia32_addph256_mask_round ((__v16hf) (A), \
-						 (__v16hf) (B), \
-						 (__v16hf) \
-						 (_mm256_setzero_ph ()), \
-						 (__mmask16) (U), \
-						 (R)))
-
-#define _mm256_add_round_ps(A, B, R) \
-  ((__m256) __builtin_ia32_addps256_mask_round ((__v8sf) (A), \
-						(__v8sf) (B), \
-						(__v8sf) \
-						(_mm256_undefined_ps ()), \
-						(__mmask8) (-1), \
-						(R)))
-
-#define _mm256_mask_add_round_ps(W, U, A, B, R) \
-  ((__m256) __builtin_ia32_addps256_mask_round ((__v8sf) (A), \
-						(__v8sf) (B), \
-						(__v8sf) (W), \
-						(__mmask8) (U), \
-						(R)))
-
-#define _mm256_maskz_add_round_ps(U, A, B, R)\
-  ((__m256) __builtin_ia32_addps256_mask_round ((__v8sf) (A), \
-						(__v8sf) (B), \
-						(__v8sf) \
-						(_mm256_setzero_ps ()), \
-						(__mmask8) (U), \
-						(R)))
-
-#define _mm256_cmp_round_pd_mask(A, B, C, R) \
-  ((__mmask8) __builtin_ia32_cmppd256_mask_round ((__v4df) (A), \
-						  (__v4df) (B), \
-						  (C), \
-						  (__mmask8) (-1), \
-						  (R)))
-
-#define _mm256_mask_cmp_round_pd_mask(U, A, B, C, R) \
-  ((__mmask8) __builtin_ia32_cmppd256_mask_round ((__v4df) (A), \
-						  (__v4df) (B), \
-						  (C), \
-						  (__mmask8) (U), \
-						  (R)))
-
-#define _mm256_cmp_round_ph_mask(A, B, C, R) \
-  ((__mmask16) __builtin_ia32_cmpph256_mask_round ((__v16hf) (A), \
-						   (__v16hf) (B), \
-						   (C), \
-						   (__mmask16) (-1), \
-						   (R)))
-
-#define _mm256_mask_cmp_round_ph_mask(U, A, B, C, R) \
-  ((__mmask16) __builtin_ia32_cmpph256_mask_round ((__v16hf) (A), \
-						   (__v16hf) (B), \
-						   (C), \
-						   (__mmask16) (U), \
-						   (R)))
-
-#define _mm256_cmp_round_ps_mask(A, B, C, R) \
-  ((__mmask8) __builtin_ia32_cmpps256_mask_round ((__v8sf) (A), \
-						  (__v8sf) (B), \
-						  (C), \
-						  (__mmask8) (-1), \
-						  (R)))
-
-#define _mm256_mask_cmp_round_ps_mask(U, A, B, C, R) \
-  ((__mmask8) __builtin_ia32_cmpps256_mask_round ((__v8sf) (A), \
-						  (__v8sf) (B), \
-						  (C), \
-						  (__mmask8) (U), \
-						  (R)))
-
-#define _mm256_cvt_roundepi32_ph(A, R) \
-  ((__m128h) __builtin_ia32_vcvtdq2ph256_mask_round ((__v8si) (A), \
-						     (__v8hf) \
-						     (_mm_setzero_ph ()), \
-						     (__mmask8) (-1), \
-						     (R)))
-
-#define _mm256_mask_cvt_roundepi32_ph(W, U, A, R) \
-  ((__m128h) __builtin_ia32_vcvtdq2ph256_mask_round ((__v8si) (A), \
-						     (__v8hf) (W), \
-						     (__mmask8) (U), \
-						     (R)))
-
-#define _mm256_maskz_cvt_roundepi32_ph(U, A, R) \
-  ((__m128h) __builtin_ia32_vcvtdq2ph256_mask_round ((__v8si) (A), \
-						     (__v8hf) \
-						     (_mm_setzero_ph ()), \
-						     (__mmask8) (U), \
-						     (R)))
-
-#define _mm256_cvt_roundepi32_ps(A, R) \
-  ((__m256) __builtin_ia32_cvtdq2ps256_mask_round ((__v8si) (A), \
-						   (__v8sf) \
-						   (_mm256_undefined_ps ()), \
-						   (__mmask8) (-1), \
-						   (R)))
-
-#define _mm256_mask_cvt_roundepi32_ps(W, U, A, R) \
-  ((__m256) __builtin_ia32_cvtdq2ps256_mask_round ((__v8si) (A), \
-						   (__v8sf) (W), \
-						   (__mmask8) (U), \
-						   (R)))
-
-#define _mm256_maskz_cvt_roundepi32_ps(U, A, R) \
-  ((__m256) __builtin_ia32_cvtdq2ps256_mask_round ((__v8si) (A), \
-						   (__v8sf) \
-						   (_mm256_setzero_ps ()), \
-						   (__mmask8) (U), \
-						   (R)))
-
-#define _mm256_cvt_roundpd_ph(A, R) \
-  ((__m128h) __builtin_ia32_vcvtpd2ph256_mask_round ((__v4df) (A), \
-						     (_mm_setzero_ph ()), \
-						     (__mmask8) (-1), \
-						     (R)))
-
-#define _mm256_mask_cvt_roundpd_ph(W, U, A, R) \
-  ((__m128h) __builtin_ia32_vcvtpd2ph256_mask_round ((__v4df) (A), \
-						     (__v8hf) (W), \
-						     (__mmask8) (U), \
-						     (R)))
-
-#define _mm256_maskz_cvt_roundpd_ph(U, A, R) \
-  ((__m128h) __builtin_ia32_vcvtpd2ph256_mask_round ((__v4df) (A), \
-						     (_mm_setzero_ph ()), \
-						     (__mmask8) (U), \
-						     (R)))
-
-#define _mm256_cvt_roundpd_ps(A, R) \
-  ((__m128) __builtin_ia32_cvtpd2ps256_mask_round ((__v4df) (A), \
-						   (__v4sf) \
-						   (_mm_undefined_ps ()), \
-						   (__mmask8) (-1), \
-						   (R)))
-
-#define _mm256_mask_cvt_roundpd_ps(W, U, A, R) \
-  ((__m128) __builtin_ia32_cvtpd2ps256_mask_round ((__v4df) (A), \
-						   (__v4sf) (W), \
-						   (__mmask8) (U), \
-						   (R)))
-
-#define _mm256_maskz_cvt_roundpd_ps(U, A, R) \
-  ((__m128) __builtin_ia32_cvtpd2ps256_mask_round ((__v4df) (A), \
-						   (__v4sf) \
-						   (_mm_setzero_ps ()), \
-						   (__mmask8) (U), \
-						   (R)))
-
-#define _mm256_cvt_roundpd_epi32(A, R) \
-  ((__m128i) __builtin_ia32_cvtpd2dq256_mask_round ((__v4df) (A), \
-						    (__v4si) \
-						    (_mm_undefined_si128 ()), \
-						    (__mmask8) (-1), \
-						    (R)))
-
-#define _mm256_mask_cvt_roundpd_epi32(W, U, A, R) \
-  ((__m128i) __builtin_ia32_cvtpd2dq256_mask_round ((__v4df) (A), \
-						    (__v4si) (W), \
-						    (__mmask8) (U), \
-						    (R)))
-
-#define _mm256_maskz_cvt_roundpd_epi32(U, A, R)\
-  ((__m128i) __builtin_ia32_cvtpd2dq256_mask_round ((__v4df) (A), \
-						    (__v4si) \
-						    (_mm_setzero_si128 ()), \
-						    (__mmask8) (U), \
-						    (R)))
-
-#define _mm256_cvt_roundpd_epi64(A, R) \
-  ((__m256i) __builtin_ia32_cvtpd2qq256_mask_round ((__v4df) (A), \
-						    (__v4di) \
-						    (_mm256_setzero_si256 ()), \
-						    (__mmask8) (-1), \
-						    (R)))
-
-#define _mm256_mask_cvt_roundpd_epi64(W, U, A, R) \
-  ((__m256i) __builtin_ia32_cvtpd2qq256_mask_round ((__v4df) (A), \
-						    (__v4di) (W), \
-						    (__mmask8) (U), \
-						    (R)))
-
-#define _mm256_maskz_cvt_roundpd_epi64(U, A, R) \
-  ((__m256i) __builtin_ia32_cvtpd2qq256_mask_round ((__v4df) (A), \
-						    (__v4di) \
-						    (_mm256_setzero_si256 ()), \
-						    (__mmask8) (U), \
-						    (R)))
-
-#define _mm256_cvt_roundpd_epu32(A, R) \
-  ((__m128i) __builtin_ia32_cvtpd2udq256_mask_round ((__v4df) (A), \
-						     (__v4si) \
-						     (_mm_undefined_si128 ()), \
-						     (__mmask8) (-1),  \
-						     (R)))
-
-#define _mm256_mask_cvt_roundpd_epu32(W, U, A, R) \
-  ((__m128i) __builtin_ia32_cvtpd2udq256_mask_round ((__v4df) (A), \
-						     (__v4si) (W), \
-						     (__mmask8) (U), \
-						     (R)))
-
-#define _mm256_maskz_cvt_roundpd_epu32(U, A, R) \
-  ((__m128i) __builtin_ia32_cvtpd2udq256_mask_round ((__v4df) (A), \
-						     (__v4si) \
-						     (_mm_setzero_si128 ()), \
-						     (__mmask8) (U), \
-						     (R)))
-
-#define _mm256_cvt_roundpd_epu64(A, R) \
-  ((__m256i) __builtin_ia32_cvtpd2uqq256_mask_round ((__v4df) (A), \
-						     (__v4di) \
-						     (_mm256_setzero_si256 ()),\
-						     (__mmask8) (-1), \
-						     (R)))
-
-#define _mm256_mask_cvt_roundpd_epu64(W, U, A, R) \
-  ((__m256i) __builtin_ia32_cvtpd2uqq256_mask_round ((__v4df) (A), \
-						     (__v4di) (W), \
-						     (__mmask8) (U), \
-						     (R)))
-
-#define _mm256_maskz_cvt_roundpd_epu64(U, A, R) \
-  ((__m256i) __builtin_ia32_cvtpd2uqq256_mask_round ((__v4df) (A), \
-						     (__v4di) \
-						     (_mm256_setzero_si256 ()),\
-						     (__mmask8) (U), \
-						     (R)))
-
-#define _mm256_cvt_roundph_epi32(A, R) \
-  ((__m256i) __builtin_ia32_vcvtph2dq256_mask_round ((__v8hf) (A), \
-						     (__v8si) \
-						     (_mm256_setzero_si256 ()),\
-						     (__mmask8) (-1), \
-						     (R)))
-
-#define _mm256_mask_cvt_roundph_epi32(W, U, A, R) \
-  ((__m256i) __builtin_ia32_vcvtph2dq256_mask_round ((__v8hf) (A), \
-						     (__v8si) (W), \
-						     (__mmask8) (U), \
-						     (R)))
-
-#define _mm256_maskz_cvt_roundph_epi32(U, A, R) \
-  ((__m256i) __builtin_ia32_vcvtph2dq256_mask_round ((__v8hf) (A), \
-						     (__v8si) \
-						     (_mm256_setzero_si256 ()),\
-						     (__mmask8) (U), \
-						     (R)))
-
-#define _mm256_cvt_roundph_pd(A, R) \
-  ((__m256d) __builtin_ia32_vcvtph2pd256_mask_round ((__v8hf) (A), \
-						     (__v4df) \
-						     (_mm256_setzero_pd ()), \
-						     (__mmask8) (-1), \
-						     (R)))
-
-#define _mm256_mask_cvt_roundph_pd(W, U, A, R) \
-  ((__m256d) __builtin_ia32_vcvtph2pd256_mask_round ((__v8hf) (A), \
-						     (__v4df) (W), \
-						     (__mmask8) (U), \
-						     (R)))
-
-#define _mm256_maskz_cvt_roundph_pd(U, A, R) \
-  ((__m256d) __builtin_ia32_vcvtph2pd256_mask_round ((__v8hf) (A), \
-						     (__v4df) \
-						     (_mm256_setzero_pd ()), \
-						     (__mmask8) (U), \
-						     (R)))
-
-#define _mm256_cvt_roundph_ps(A, R) \
-  ((__m256) __builtin_ia32_vcvtph2ps256_mask_round ((__v8hf) (A),  \
-						    (__v8sf) \
-						    (_mm256_undefined_ps ()), \
-						    (__mmask8) (-1), \
-						    (R)))
-
-#define _mm256_mask_cvt_roundph_ps(W, U, A, R) \
-  ((__m256) __builtin_ia32_vcvtph2ps256_mask_round ((__v8hf) (A), \
-						    (__v8sf) (W), \
-						    (__mmask8) (U), \
-						    (R)))
-
-#define _mm256_maskz_cvt_roundph_ps(U, A, R) \
-  ((__m256) __builtin_ia32_vcvtph2ps256_mask_round ((__v8hf) (A), \
-						    (__v8sf) \
-						    (_mm256_setzero_ps ()), \
-						    (__mmask8) (U), \
-						    (R)))
-
-#define _mm256_cvtx_roundph_ps(A, R) \
-  ((__m256) __builtin_ia32_vcvtph2psx256_mask_round ((__v8hf) (A), \
-						     (__v8sf) \
-						     (_mm256_setzero_ps ()), \
-						     (__mmask8) (-1), \
-						     (R)))
-
-#define _mm256_mask_cvtx_roundph_ps(W, U, A, R) \
-  ((__m256) __builtin_ia32_vcvtph2psx256_mask_round ((__v8hf) (A), \
-						     (__v8sf) (W), \
-						     (__mmask8) (U), \
-						     (R)))
-
-#define _mm256_maskz_cvtx_roundph_ps(U, A, R) \
-  ((__m256) __builtin_ia32_vcvtph2psx256_mask_round ((__v8hf) (A), \
-						     (__v8sf) \
-						     (_mm256_setzero_ps ()), \
-						     (__mmask8) (U), \
-						     (R)))
-
-#define _mm256_cvt_roundph_epi64(A, R) \
-  ((__m256i) __builtin_ia32_vcvtph2qq256_mask_round ((__v8hf) (A), \
-						     (__v4di) \
-						     (_mm256_setzero_si256 ()),\
-						     (__mmask8) (-1), \
-						     (R)))
-
-#define _mm256_mask_cvt_roundph_epi64(W, U, A, R) \
-  ((__m256i) __builtin_ia32_vcvtph2qq256_mask_round ((__v8hf) (A), \
-						     (__v4di) (W), \
-						     (__mmask8) (U), \
-						     (R)))
-
-#define _mm256_maskz_cvt_roundph_epi64(U, A, R) \
-  ((__m256i) __builtin_ia32_vcvtph2qq256_mask_round ((__v8hf) (A), \
-						     (__v4di) \
-						     (_mm256_setzero_si256 ()),\
-						     (__mmask8) (U),  \
-						     (R)))
-
-#define _mm256_cvt_roundph_epu32(A, R) \
-  ((__m256i) \
-   __builtin_ia32_vcvtph2udq256_mask_round ((__v8hf) (A), \
-					    (__v8si) \
-					    (_mm256_setzero_si256 ()), \
-					    (__mmask8) (-1), \
-					    (R)))
-
-#define _mm256_mask_cvt_roundph_epu32(W, U, A, R) \
-  ((__m256i) __builtin_ia32_vcvtph2udq256_mask_round ((__v8hf) (A), \
-						      (__v8si) (W), \
-						      (__mmask8) (U), \
-						      (R)))
-
-#define _mm256_maskz_cvt_roundph_epu32(U, A, R) \
-  ((__m256i) \
-   __builtin_ia32_vcvtph2udq256_mask_round ((__v8hf) (A), \
-					    (__v8si) \
-					    (_mm256_setzero_si256 ()), \
-					    (__mmask8) (U),  \
-					    (R)))
-
-#define _mm256_cvt_roundph_epu64(A, R) \
-  ((__m256i) \
-   __builtin_ia32_vcvtph2uqq256_mask_round ((__v8hf) (A), \
-					    (__v4di) \
-					    (_mm256_setzero_si256 ()), \
-					    (__mmask8) (-1),  \
-					    (R)))
-
-#define _mm256_mask_cvt_roundph_epu64(W, U, A, R) \
-  ((__m256i) __builtin_ia32_vcvtph2uqq256_mask_round ((__v8hf) (A), \
-						      (__v4di) (W), \
-						      (__mmask8) (U), \
-						      (R)))
-
-#define _mm256_maskz_cvt_roundph_epu64(U, A, R) \
-  ((__m256i) \
-   __builtin_ia32_vcvtph2uqq256_mask_round ((__v8hf) (A), \
-					    (__v4di) \
-					    (_mm256_setzero_si256 ()), \
-					    (__mmask8) (U), \
-					    (R)))
-
-#define _mm256_cvt_roundph_epu16(A, R) \
-  ((__m256i) \
-   __builtin_ia32_vcvtph2uw256_mask_round ((__v16hf) (A), \
-					   (__v16hi) \
-					   (_mm256_undefined_si256 ()), \
-					   (__mmask16) (-1), \
-					   (R)))
-
-#define _mm256_mask_cvt_roundph_epu16(W, U, A, R) \
-  ((__m256i) __builtin_ia32_vcvtph2uw256_mask_round ((__v16hf) (A), \
-						     (__v16hi) (W), \
-						     (__mmask16) (U), \
-						     (R)))
-
-#define _mm256_maskz_cvt_roundph_epu16(U, A, R) \
-  ((__m256i) \
-   __builtin_ia32_vcvtph2uw256_mask_round ((__v16hf) (A), \
-					   (__v16hi) \
-					   (_mm256_setzero_si256 ()), \
-					   (__mmask16) (U), \
-					   (R)))
-
-#define _mm256_cvt_roundph_epi16(A, R) \
-  ((__m256i) \
-   __builtin_ia32_vcvtph2w256_mask_round ((__v16hf) (A), \
-					  (__v16hi) \
-					  (_mm256_undefined_si256 ()), \
-					  (__mmask16) (-1), \
-					  (R)))
-
-#define _mm256_mask_cvt_roundph_epi16(W, U, A, R) \
-  ((__m256i) __builtin_ia32_vcvtph2w256_mask_round ((__v16hf) (A), \
-						    (__v16hi) (W), \
-						    (__mmask16) (U), \
-						    (R)))
-
-#define _mm256_maskz_cvt_roundph_epi16(U, A, R) \
-  ((__m256i) __builtin_ia32_vcvtph2w256_mask_round ((__v16hf) (A), \
-						    (__v16hi) \
-						    (_mm256_setzero_si256 ()), \
-						    (__mmask16) (U), \
-						    (R)))
-
-#define _mm256_cvt_roundps_pd(A, R) \
-  ((__m256d) __builtin_ia32_vcvtps2pd256_mask_round ((__v4sf) (A), \
-						     (__v4df) \
-						     (_mm256_undefined_pd ()), \
-						     (__mmask8) (-1),  \
-						     (R)))
-
-#define _mm256_mask_cvt_roundps_pd(W, U, A, R) \
-  ((__m256d) __builtin_ia32_vcvtps2pd256_mask_round ((__v4sf) (A), \
-						     (__v4df) (W), \
-						     (__mmask8) (U), \
-						     (R)))
-
-#define _mm256_maskz_cvt_roundps_pd(U, A, R) \
-  ((__m256d) __builtin_ia32_vcvtps2pd256_mask_round ((__v4sf) (A), \
-						     (__v4df) \
-						     (_mm256_setzero_pd ()), \
-						     (__mmask8) (U), \
-						     (R)))
-
-#define _mm256_cvtx_roundps_ph(A, R) \
-  ((__m128h) __builtin_ia32_vcvtps2phx256_mask_round ((__v8sf) (A), \
-						      (__v8hf) \
-						      (_mm_setzero_ph ()), \
-						      (__mmask8) (-1), \
-						      (R)))
-
-#define _mm256_mask_cvtx_roundps_ph(W, U, A, R) \
-  ((__m128h) __builtin_ia32_vcvtps2phx256_mask_round ((__v8sf) (A), \
-						      (__v8hf) (W), \
-						      (__mmask8) (U), \
-						      (R)))
-
-#define _mm256_maskz_cvtx_roundps_ph(U, A, R) \
-  ((__m128h) __builtin_ia32_vcvtps2phx256_mask_round ((__v8sf) (A), \
-						      (__v8hf) \
-						      (_mm_setzero_ph ()), \
-						      (__mmask8) (U), \
-						      (R)))
-
-#define _mm256_cvt_roundps_epi32(A, R) \
-  ((__m256i) \
-   __builtin_ia32_vcvtps2dq256_mask_round ((__v8sf) (A), \
-					   (__v8si) \
-					   (_mm256_undefined_si256 ()), \
-					   (__mmask8) (-1), \
-					   (R)))
-
-#define _mm256_mask_cvt_roundps_epi32(W, U, A, R) \
-  ((__m256i) __builtin_ia32_vcvtps2dq256_mask_round ((__v8sf) (A), \
-						     (__v8si) (W), \
-						     (__mmask8) (U), \
-						     (R)))
-
-#define _mm256_maskz_cvt_roundps_epi32(U, A, R) \
-  ((__m256i) \
-   __builtin_ia32_vcvtps2dq256_mask_round ((__v8sf) (A), \
-					   (__v8si) \
-					   (_mm256_setzero_si256 ()), \
-					   (__mmask8) (U), \
-					   (R)))
-
-#define _mm256_cvt_roundps_epi64(A, R) \
-  ((__m256i) __builtin_ia32_cvtps2qq256_mask_round ((__v4sf) (A), \
-						    (__v4di) \
-						    (_mm256_setzero_si256 ()), \
-						    (__mmask8) (-1), \
-						    (R)))
-
-#define _mm256_mask_cvt_roundps_epi64(W, U, A, R) \
-  ((__m256i) __builtin_ia32_cvtps2qq256_mask_round ((__v4sf) (A), \
-						    (__v4di) (W), \
-						    (__mmask8) (U), \
-						    (R)))
-
-#define _mm256_maskz_cvt_roundps_epi64(U, A, R) \
-  ((__m256i) __builtin_ia32_cvtps2qq256_mask_round ((__v4sf) (A), \
-						    (__v4di) \
-						    (_mm256_setzero_si256 ()), \
-						    (__mmask8) (U), \
-						    (R)))
-
-#define _mm256_cvt_roundps_epu32(A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvtps2udq256_mask_round ((__v8sf) (A), \
-					   (__v8si) \
-					   (_mm256_undefined_si256 ()), \
-					   (__mmask8) (-1),  \
-					   (R)))
-
-#define _mm256_mask_cvt_roundps_epu32(W, U, A, R) \
-  ((__m256i) __builtin_ia32_cvtps2udq256_mask_round ((__v8sf) (A), \
-						     (__v8si) (W), \
-						     (__mmask8) (U), \
-						     (R)))
-
-#define _mm256_maskz_cvt_roundps_epu32(U, A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvtps2udq256_mask_round ((__v8sf) (A), \
-					   (__v8si) \
-					   (_mm256_setzero_si256 ()), \
-					   (__mmask8) (U), \
-					   (R)))
-
-#define _mm256_cvt_roundps_epu64(B, R) \
-  ((__m256i) \
-   __builtin_ia32_cvtps2uqq256_mask_round ((__v4sf) (B), \
-					   (__v4di) \
-					   (_mm256_setzero_si256 ()), \
-					   (__mmask8) (-1), \
-					   (R)))
-
-#define _mm256_mask_cvt_roundps_epu64(W, U, A, R) \
-  ((__m256i) __builtin_ia32_cvtps2uqq256_mask_round ((__v4sf) (A), \
-						     (__v4di) (W), \
-						     (__mmask8) (U), \
-						     (R)))
-
-#define _mm256_maskz_cvt_roundps_epu64(U, A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvtps2uqq256_mask_round ((__v4sf) (A), \
-					   (__v4di) \
-					   (_mm256_setzero_si256 ()), \
-					   (__mmask8) (U), \
-					   (R)))
-
-#define  _mm256_cvt_roundepi64_pd(A, R) \
-  ((__m256d) __builtin_ia32_cvtqq2pd256_mask_round ((__v4di) (A), \
-						    (__v4df) \
-						    (_mm256_setzero_pd ()), \
-						    (__mmask8) (-1), \
-						    (R)))
-
-#define _mm256_mask_cvt_roundepi64_pd(W, U, A, R) \
-  ((__m256d) __builtin_ia32_cvtqq2pd256_mask_round ((__v4di) (A), \
-						    (__v4df) (W), \
-						    (__mmask8) (U), \
-						    (R)))
-
-#define _mm256_maskz_cvt_roundepi64_pd(U, A, R) \
-  ((__m256d) __builtin_ia32_cvtqq2pd256_mask_round ((__v4di) (A), \
-						    (__v4df) \
-						    (_mm256_setzero_pd ()), \
-						    (__mmask8) (U), \
-						    (R)))
-
-#define _mm256_cvt_roundepi64_ph(A, R) \
-  ((__m128h) __builtin_ia32_vcvtqq2ph256_mask_round ((__v4di) (A), \
-						     (__v8hf) \
-						     (_mm_setzero_ph ()), \
-						     (__mmask8) (-1), \
-						     (R)))
-
-#define _mm256_mask_cvt_roundepi64_ph(W, U, A, R) \
-  ((__m128h) __builtin_ia32_vcvtqq2ph256_mask_round ((__v4di) (A), \
-						     (__v8hf) (W), \
-						     (__mmask8) (U), \
-						     (R)))
-
-#define _mm256_maskz_cvt_roundepi64_ph(U, A, R) \
-  ((__m128h) __builtin_ia32_vcvtqq2ph256_mask_round ((__v4di) (A), \
-						     (__v8hf) \
-						     (_mm_setzero_ph ()), \
-						     (__mmask8) (U), \
-						     (R)))
-
-#define _mm256_cvt_roundepi64_ps(A, R) \
-  ((__m128) __builtin_ia32_cvtqq2ps256_mask_round ((__v4di) (A), \
-						   (__v4sf) \
-						   (_mm_setzero_ps ()), \
-						   (__mmask8) (-1), \
-						   (R)))
-
-#define  _mm256_mask_cvt_roundepi64_ps(W, U, A, R) \
-  ((__m128) __builtin_ia32_cvtqq2ps256_mask_round ((__v4di) (A), \
-						   (__v4sf) (W), \
-						   (__mmask8) (U), \
-						   (R)))
-
-#define _mm256_maskz_cvt_roundepi64_ps(U, A, R) \
-  ((__m128) __builtin_ia32_cvtqq2ps256_mask_round ((__v4di) (A), \
-						   (__v4sf) \
-						   (_mm_setzero_ps ()), \
-						   (__mmask8) (U), \
-						   (R)))
-
-#define _mm256_cvtt_roundpd_epi32(A, R) \
-  ((__m128i) __builtin_ia32_cvttpd2dq256_mask_round ((__v4df) (A), \
-						     (__v4si) \
-						     (_mm_undefined_si128 ()), \
-						     (__mmask8) (-1), \
-						     (R)))
-
-#define _mm256_mask_cvtt_roundpd_epi32(W, U, A, R) \
-  ((__m128i) __builtin_ia32_cvttpd2dq256_mask_round ((__v4df) (A), \
-						     (__v4si) (W), \
-						     (__mmask8) (U), \
-						     (R)))
-
-#define _mm256_maskz_cvtt_roundpd_epi32(U, A, R) \
-  ((__m128i) __builtin_ia32_cvttpd2dq256_mask_round ((__v4df) (A), \
-						     (__v4si) \
-						     (_mm_setzero_si128 ()), \
-						     (__mmask8) (U),  \
-						     (R)))
-
-#define _mm256_cvtt_roundpd_epi64(A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvttpd2qq256_mask_round ((__v4df) (A), \
-					   (__v4di) \
-					   (_mm256_setzero_si256 ()), \
-					   (__mmask8) (-1), \
-					   (R)))
-
-#define _mm256_mask_cvtt_roundpd_epi64(W, U, A, R) \
-  ((__m256i) __builtin_ia32_cvttpd2qq256_mask_round ((__v4df) (A), \
-						     (__v4di) (W), \
-						     (__mmask8) (U), \
-						     (R)))
-
-#define _mm256_maskz_cvtt_roundpd_epi64(U, A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvttpd2qq256_mask_round ((__v4df) (A), \
-					   (__v4di) \
-					   (_mm256_setzero_si256 ()), \
-					   (__mmask8) (U), \
-					   (R)))
-
-#define _mm256_cvtt_roundpd_epu32(A, R) \
-  ((__m128i) \
-   __builtin_ia32_cvttpd2udq256_mask_round ((__v4df) (A), \
-					    (__v4si) \
-					    (_mm_undefined_si128 ()), \
-					    (__mmask8) (-1), \
-					    (R)))
-
-#define _mm256_mask_cvtt_roundpd_epu32(W, U, A, R) \
-  ((__m128i) __builtin_ia32_cvttpd2udq256_mask_round ((__v4df) (A), \
-						      (__v4si) (W), \
-						      (__mmask8) (U), \
-						      (R)))
-
-#define _mm256_maskz_cvtt_roundpd_epu32(U, A, R) \
-  ((__m128i) __builtin_ia32_cvttpd2udq256_mask_round ((__v4df) (A), \
-						      (__v4si) \
-						      (_mm_setzero_si128 ()), \
-						      (__mmask8) (U), \
-						      (R)))
-
-#define _mm256_cvtt_roundpd_epu64(A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvttpd2uqq256_mask_round ((__v4df) (A), \
-					    (__v4di) \
-					    (_mm256_setzero_si256 ()), \
-					    (__mmask8) (-1), \
-					    (R)))
-
-#define _mm256_mask_cvtt_roundpd_epu64(W, U, A, R) \
-  ((__m256i) __builtin_ia32_cvttpd2uqq256_mask_round ((__v4df) (A), \
-						      (__v4di) (W), \
-						      (__mmask8) (U), \
-						      (R)))
-
-#define _mm256_maskz_cvtt_roundpd_epu64(U, A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvttpd2uqq256_mask_round ((__v4df) (A), \
-					    (__v4di) \
-					    (_mm256_setzero_si256 ()), \
-					    (__mmask8) (U), \
-					    (R)))
-
-#define _mm256_cvtt_roundph_epi32(A, R) \
-  ((__m256i) \
-   __builtin_ia32_vcvttph2dq256_mask_round ((__v8hf) (A), \
-					    (__v8si) \
-					    (_mm256_setzero_si256 ()), \
-					    (__mmask8) (-1), \
-					    (R)))
-
-#define _mm256_mask_cvtt_roundph_epi32(W, U, A, R) \
-  ((__m256i) __builtin_ia32_vcvttph2dq256_mask_round ((__v8hf) (A), \
-						      (__v8si) (W), \
-						      (__mmask8) (U), \
-						      (R)))
-
-#define _mm256_maskz_cvtt_roundph_epi32(U, A, R) \
-  ((__m256i) \
-   __builtin_ia32_vcvttph2dq256_mask_round ((__v8hf) (A), \
-					    (__v8si) \
-					    (_mm256_setzero_si256 ()), \
-					    (__mmask8) (U), \
-					    (R)))
-
-#define _mm256_cvtt_roundph_epi64(A, R) \
-  ((__m256i) \
-   __builtin_ia32_vcvttph2qq256_mask_round ((__v8hf) (A), \
-					    (__v4di) \
-					    (_mm256_setzero_si256 ()), \
-					    (__mmask8) (-1), \
-					    (R)))
-
-#define _mm256_mask_cvtt_roundph_epi64(W, U, A, R) \
-  ((__m256i) __builtin_ia32_vcvttph2qq256_mask_round ((__v8hf) (A), \
-						      (__v4di) (W), \
-						      (__mmask8) (U), \
-						      (R)))
-
-#define _mm256_maskz_cvtt_roundph_epi64(U, A, R) \
-  ((__m256i) \
-   __builtin_ia32_vcvttph2qq256_mask_round ((__v8hf) (A), \
-					    (__v4di) \
-					    (_mm256_setzero_si256 ()), \
-					    (__mmask8) (U), \
-					    (R)))
-
-#define _mm256_cvtt_roundph_epu32(A, R) \
-  ((__m256i) \
-   __builtin_ia32_vcvttph2udq256_mask_round ((__v8hf) (A), \
-					     (__v8si) \
-					     (_mm256_setzero_si256 ()), \
-					     (__mmask8) (-1), \
-					     (R)))
-
-#define _mm256_mask_cvtt_roundph_epu32(W, U, A, R) \
-  ((__m256i) __builtin_ia32_vcvttph2udq256_mask_round ((__v8hf) (A), \
-						       (__v8si) (W), \
-						       (__mmask8) (U), \
-						       (R)))
-
-#define _mm256_maskz_cvtt_roundph_epu32(U, A, R) \
-  ((__m256i) \
-   __builtin_ia32_vcvttph2udq256_mask_round ((__v8hf) (A), \
-					     (__v8si) \
-					     (_mm256_setzero_si256 ()), \
-					     (__mmask8) (U), \
-					     (R)))
-
-#define _mm256_cvtt_roundph_epu64(A, R) \
-  ((__m256i) \
-   __builtin_ia32_vcvttph2uqq256_mask_round ((__v8hf) (A), \
-					     (__v4di) \
-					     (_mm256_setzero_si256 ()), \
-					     (__mmask8) (-1), \
-					     (R)))
-
-#define _mm256_mask_cvtt_roundph_epu64(W, U, A, R) \
-  ((__m256i) __builtin_ia32_vcvttph2uqq256_mask_round ((__v8hf) (A), \
-						       (__v4di) (W), \
-						       (__mmask8) (U), \
-						       (R)))
-
-#define _mm256_maskz_cvtt_roundph_epu64(U, A, R) \
-  ((__m256i) \
-   __builtin_ia32_vcvttph2uqq256_mask_round ((__v8hf) (A), \
-					     (__v4di) \
-					     (_mm256_setzero_si256 ()), \
-					     (__mmask8) (U), \
-					     (R)))
-
-#define _mm256_cvtt_roundph_epu16(A, R) \
-  ((__m256i) \
-   __builtin_ia32_vcvttph2uw256_mask_round ((__v16hf) (A), \
-					    (__v16hi) \
-					    (_mm256_setzero_si256 ()), \
-					    (__mmask16) (-1), \
-					    (R)))
-
-#define _mm256_mask_cvtt_roundph_epu16(W, U, A, R) \
-  ((__m256i) __builtin_ia32_vcvttph2uw256_mask_round ((__v16hf) (A), \
-						      (__v16hi) (W), \
-						      (__mmask16) (U), \
-						      (R)))
-
-#define _mm256_maskz_cvtt_roundph_epu16(U, A, R) \
-  ((__m256i) \
-   __builtin_ia32_vcvttph2uw256_mask_round ((__v16hf) (A), \
-					    (__v16hi) \
-					    (_mm256_setzero_si256 ()), \
-					    (__mmask16) (U), \
-					    (R)))
-
-#define _mm256_cvtt_roundph_epi16(A, R) \
-  ((__m256i) \
-   __builtin_ia32_vcvttph2uw256_mask_round ((__v16hf) (A), \
-					    (__v16hi) \
-					    (_mm256_setzero_si256 ()), \
-					    (__mmask16) (-1),  \
-					    (R)))
-
-#define _mm256_mask_cvtt_roundph_epi16(W, U, A, R) \
-  ((__m256i) __builtin_ia32_vcvttph2uw256_mask_round ((__v16hf) (A), \
-						      (__v16hi) (W), \
-						      (__mmask16) (U), \
-						      (R)))
-
-#define _mm256_maskz_cvtt_roundph_epi16(U, A, R)\
-  ((__m256i) \
-   __builtin_ia32_vcvttph2uw256_mask_round ((__v16hf) (A), \
-					    (__v16hi) \
-					    (_mm256_setzero_si256 ()), \
-					    (__mmask16) (U), \
-					    (R)))
-
-#define _mm256_cvtt_roundps_epi32(A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvttps2dq256_mask_round ((__v8sf) (A), \
-					   (__v8si) \
-					   (_mm256_undefined_si256 ()), \
-					   (__mmask8) (-1), \
-					   (R)))
-
-#define _mm256_mask_cvtt_roundps_epi32(W, U, A, R) \
-  ((__m256i) __builtin_ia32_cvttps2dq256_mask_round ((__v8sf) (A), \
-						     (__v8si) (W), \
-						     (__mmask8) (U), \
-						     (R)))
-
-#define _mm256_maskz_cvtt_roundps_epi32(U, A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvttps2dq256_mask_round ((__v8sf) (A), \
-					   (__v8si) \
-					   (_mm256_setzero_si256 ()),  \
-					   (__mmask8) (U),  \
-					   (R)))
-
-#define _mm256_cvtt_roundps_epi64(A, R) \
-  ((__m256i) __builtin_ia32_cvttps2qq256_mask_round ((__v4sf) (A), \
-						     (__v4di) \
-						     (_mm256_setzero_si256 ()),\
-						     (__mmask8) (-1), \
-						     (R)))
-
-#define _mm256_mask_cvtt_roundps_epi64(W, U, A, R) \
-  ((__m256i) __builtin_ia32_cvttps2qq256_mask_round ((__v4sf) (A), \
-						     (__v4di) (W), \
-						     (__mmask8) (U), \
-						     (R)))
-
-#define _mm256_maskz_cvtt_roundps_epi64(U, A, R) \
-  ((__m256i) __builtin_ia32_cvttps2qq256_mask_round ((__v4sf) (A), \
-						     (__v4di) \
-						     (_mm256_setzero_si256 ()),\
-						     (__mmask8) (U), \
-						     (R)))
-
-#define _mm256_cvtt_roundps_epu32(A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvttps2udq256_mask_round ((__v8sf) (A), \
-					    (__v8si) \
-					    (_mm256_undefined_si256 ()), \
-					    (__mmask8) (-1), \
-					    (R)))
-
-#define _mm256_mask_cvtt_roundps_epu32(W, U, A, R) \
-  ((__m256i) __builtin_ia32_cvttps2udq256_mask_round ((__v8sf) (A), \
-						      (__v8si) (W), \
-						      (__mmask8) (U), \
-						      (R)))
-
-#define _mm256_maskz_cvtt_roundps_epu32(U, A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvttps2udq256_mask_round ((__v8sf) (A), \
-					    (__v8si) \
-					    (_mm256_setzero_si256 ()), \
-					    (__mmask8) (U), \
-					    (R)))
-
-#define _mm256_cvtt_roundps_epu64(A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvttps2uqq256_mask_round ((__v4sf) (A), \
-					    (__v4di) \
-					    (_mm256_setzero_si256 ()), \
-					    (__mmask8) (-1), \
-					    (R)))
-
-#define _mm256_mask_cvtt_roundps_epu64(W, U, A, R) \
-  ((__m256i) __builtin_ia32_cvttps2uqq256_mask_round ((__v4sf) (A), \
-						      (__v4di) (W), \
-						      (__mmask8) (U), \
-						      (R)))
-
-#define _mm256_maskz_cvtt_roundps_epu64(U, A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvttps2uqq256_mask_round ((__v4sf) (A), \
-					    (__v4di) \
-					    (_mm256_setzero_si256 ()), \
-					    (__mmask8) (U),  \
-					    (R)))
-
-#define _mm256_cvt_roundepu32_ph(A, R) \
-  ((__m128h) __builtin_ia32_vcvtudq2ph256_mask_round ((__v8si) (A), \
-						      (__v8hf) \
-						      (_mm_setzero_ph ()), \
-						      (__mmask8) (-1), \
-						      (R)))
-
-#define _mm256_mask_cvt_roundepu32_ph(W, U, A, R) \
-  ((__m128h) __builtin_ia32_vcvtudq2ph256_mask_round ((__v8si) (A), \
-						      (__v8hf) (W), \
-						      (__mmask8) (U), \
-						      (R)))
-
-#define _mm256_maskz_cvt_roundepu32_ph(U, A, R) \
-  ((__m128h) __builtin_ia32_vcvtudq2ph256_mask_round ((__v8si) (A), \
-						      (__v8hf) \
-						      (_mm_setzero_ph ()), \
-						      (__mmask8) (U), \
-						      (R)))
-
-#define _mm256_cvt_roundepu32_ps(A, R) \
-  ((__m256) __builtin_ia32_cvtudq2ps256_mask_round ((__v8si) (A), \
-						    (__v8sf) \
-						    (_mm256_undefined_ps ()), \
-						    (__mmask8) (-1), \
-						    (R)))
-
-#define _mm256_mask_cvt_roundepu32_ps(W, U, A, R) \
-  ((__m256) __builtin_ia32_cvtudq2ps256_mask_round ((__v8si) (A), \
-						    (__v8sf) (W), \
-						    (__mmask8) (U), \
-						    (R)))
-
-#define _mm256_maskz_cvt_roundepu32_ps(U, A, R) \
-  ((__m256) __builtin_ia32_cvtudq2ps256_mask_round ((__v8si) (A), \
-						    (__v8sf) \
-						    (_mm256_setzero_ps ()), \
-						    (__mmask8) (U), \
-						    (R)))
-
-#define _mm256_cvt_roundepu64_pd(A, R) \
-  ((__m256d) __builtin_ia32_cvtuqq2pd256_mask_round ((__v4di) (A), \
-						     (__v4df) \
-						     (_mm256_setzero_pd ()), \
-						     (__mmask8) (-1),  \
-						     (R)))
-
-#define _mm256_mask_cvt_roundepu64_pd(W, U, A, R) \
-  ((__m256d) __builtin_ia32_cvtuqq2pd256_mask_round ((__v4di) (A), \
-						     (__v4df) (W),  \
-						     (__mmask8) (U),  \
-						     (R)))
-
-#define _mm256_maskz_cvt_roundepu64_pd(U, A, R) \
-  ((__m256d) __builtin_ia32_cvtuqq2pd256_mask_round ((__v4di) (A), \
-						     (__v4df) \
-						     (_mm256_setzero_pd ()), \
-						     (__mmask8) (U), \
-						     (R)))
-
-#define _mm256_cvt_roundepu64_ph(A, R) \
-  ((__m128h) __builtin_ia32_vcvtuqq2ph256_mask_round ((__v4di) (A), \
-						      (__v8hf) \
-						      (_mm_setzero_ph ()), \
-						      (__mmask8) (-1), \
-						      (R)))
-
-#define _mm256_mask_cvt_roundepu64_ph(W, U, A, R) \
-  ((__m128h) __builtin_ia32_vcvtuqq2ph256_mask_round ((__v4di) (A), \
-						      (__v8hf) (W), \
-						      (__mmask8) (U), \
-						      (R)))
-
-#define _mm256_maskz_cvt_roundepu64_ph(U, A, R) \
-  ((__m128h) __builtin_ia32_vcvtuqq2ph256_mask_round ((__v4di) (A), \
-						      (__v8hf) \
-						      (_mm_setzero_ph ()), \
-						      (__mmask8) (U), \
-						      (R)))
-
-#define _mm256_cvt_roundepu64_ps(A, R) \
-  ((__m128) __builtin_ia32_cvtuqq2ps256_mask_round ((__v4di) (A), \
-						    (__v4sf) \
-						    (_mm_setzero_ps ()), \
-						    (__mmask8) (-1), \
-						    (R)))
-
-#define _mm256_mask_cvt_roundepu64_ps(W, U, A, R) \
-  ((__m128) __builtin_ia32_cvtuqq2ps256_mask_round ((__v4di) (A), \
-						    (__v4sf) (W), \
-						    (__mmask8) (U), \
-						    (R)))
-
-#define _mm256_maskz_cvt_roundepu64_ps(U, A, R) \
-  ((__m128) __builtin_ia32_cvtuqq2ps256_mask_round ((__v4di) (A), \
-						    (__v4sf) \
-						    (_mm_setzero_ps ()), \
-						    (__mmask8) (U), \
-						    (R)))
-
-#define _mm256_cvt_roundepu16_ph(A, R) \
-  ((__m256h) __builtin_ia32_vcvtuw2ph256_mask_round ((__v16hi) (A), \
-						     (__v16hf) \
-						     (_mm256_setzero_ph ()), \
-						     (__mmask16) (-1), \
-						     (R)))
-
-#define _mm256_mask_cvt_roundepu16_ph(W, U, A, R) \
-  ((__m256h) __builtin_ia32_vcvtuw2ph256_mask_round ((__v16hi) (A), \
-						     (__v16hf) (W), \
-						     (__mmask16) (U), \
-						     (R)))
-
-#define _mm256_maskz_cvt_roundepu16_ph(U, A, R) \
-  ((__m256h) __builtin_ia32_vcvtuw2ph256_mask_round ((__v16hi) (A), \
-						     (__v16hf) \
-						     (_mm256_setzero_ph ()), \
-						     (__mmask16) (U), \
-						     (R)))
-
-#define _mm256_cvt_roundepi16_ph(A, R) \
-  ((__m256h) __builtin_ia32_vcvtw2ph256_mask_round ((__v16hi) (A), \
-						    (__v16hf) \
-						    (_mm256_setzero_ph ()), \
-						    (__mmask16) (-1), \
-						    (R)))
-
-#define _mm256_mask_cvt_roundepi16_ph(W, U, A, R) \
-  ((__m256h) __builtin_ia32_vcvtw2ph256_mask_round ((__v16hi) (A), \
-						    (__v16hf) (W), \
-						    (__mmask16) (U), \
-						    (R)))
-
-#define _mm256_maskz_cvt_roundepi16_ph(U, A, R) \
-  ((__m256h) __builtin_ia32_vcvtw2ph256_mask_round ((__v16hi) (A), \
-						    (__v16hf) \
-						    (_mm256_setzero_ph ()), \
-						    (__mmask16) (U), \
-						    (R)))
-
-#define _mm256_div_round_pd(A, B, R) \
-  ((__m256d) __builtin_ia32_divpd256_mask_round ((__v4df) (A), \
-						 (__v4df) (B), \
-						 (__v4df) \
-						 (_mm256_undefined_pd ()), \
-						 (__mmask8) (-1), \
-						 (R)))
-
-#define _mm256_mask_div_round_pd(W, U, A, B, R) \
-  ((__m256d) __builtin_ia32_divpd256_mask_round ((__v4df) (A), \
-						 (__v4df) (B), \
-						 (__v4df) (W), \
-						 (__mmask8) (U), \
-						 (R)))
-
-#define _mm256_maskz_div_round_pd(U, A, B, R) \
-  ((__m256d) __builtin_ia32_divpd256_mask_round ((__v4df) (A), \
-						 (__v4df) (B), \
-						 (__v4df) \
-						 (_mm256_setzero_pd ()), \
-						 (__mmask8) (U), \
-						 (R)))
-
-#define _mm256_div_round_ph(A, B, R) \
-  ((__m256h) __builtin_ia32_divph256_mask_round ((__v16hf) (A), \
-						 (__v16hf) (B), \
-						 (__v16hf) \
-						 (_mm256_setzero_ph ()), \
-						 (__mmask16) (-1), \
-						 (R)))
-
-#define _mm256_mask_div_round_ph(W, U, A, B, R) \
-  ((__m256h) __builtin_ia32_divph256_mask_round ((__v16hf) (A), \
-						 (__v16hf) (B), \
-						 (__v16hf) (W), \
-						 (__mmask16) (U), \
-						 (R)))
-
-#define _mm256_maskz_div_round_ph(U, A, B, R) \
-  ((__m256h) __builtin_ia32_divph256_mask_round ((__v16hf) (A), \
-						 (__v16hf) (B), \
-						 (__v16hf) \
-						 (_mm256_setzero_ph ()), \
-						 (__mmask16) (U), \
-						 (R)))
-
-#define _mm256_div_round_ps(A, B, R) \
-  ((__m256) __builtin_ia32_divps256_mask_round ((__v8sf) (A), \
-						(__v8sf) (B), \
-						(__v8sf) \
-						(_mm256_undefined_ps ()), \
-						(__mmask8) (-1), \
-						(R)))
-
-#define _mm256_mask_div_round_ps(W, U, A, B, R) \
-  ((__m256) __builtin_ia32_divps256_mask_round ((__v8sf) (A), \
-						(__v8sf) (B), \
-						(__v8sf) (W), \
-						(__mmask8) (U), \
-						(R)))
-
-#define _mm256_maskz_div_round_ps(U, A, B, R) \
-  ((__m256) __builtin_ia32_divps256_mask_round ((__v8sf) (A), \
-						(__v8sf) (B), \
-						(__v8sf) \
-						(_mm256_setzero_ps ()), \
-						(__mmask8) (U), \
-						(R)))
-
-#define _mm256_fcmadd_round_pch(A, B, D, R) \
-  (__m256h) __builtin_ia32_vfcmaddcph256_round ((A), (B), (D), (R))
-
-#define _mm256_mask_fcmadd_round_pch(A, U, B, D, R) \
-  ((__m256h) __builtin_ia32_vfcmaddcph256_mask_round ((__v16hf)(A), \
-						      (__v16hf)(B), \
-						      (__v16hf)(D), \
-						      (U), (R)))
-
-#define _mm256_mask3_fcmadd_round_pch(A, B, D, U, R) \
-  ((__m256h) __builtin_ia32_vfcmaddcph256_mask3_round ((A), (B), (D), (U), (R)))
-
-#define _mm256_maskz_fcmadd_round_pch(U, A, B, D, R) \
-  ((__m256h) __builtin_ia32_vfcmaddcph256_maskz_round ((A), (B), (D), (U), (R)))
-
-#define _mm256_fcmul_round_pch(A, B, R) \
-  ((__m256h) __builtin_ia32_vfcmulcph256_round ((__v16hf) (A), \
-						(__v16hf) (B), \
-						(R)))
-
-#define _mm256_mask_fcmul_round_pch(W, U, A, B, R) \
-  ((__m256h) __builtin_ia32_vfcmulcph256_mask_round ((__v16hf) (A), \
-						     (__v16hf) (B), \
-						     (__v16hf) (W), \
-						     (__mmask16) (U), \
-						     (R)))
-
-#define _mm256_maskz_fcmul_round_pch(U, A, B, R) \
-  ((__m256h) __builtin_ia32_vfcmulcph256_mask_round ((__v16hf) (A), \
-						     (__v16hf) (B), \
-						     (__v16hf) \
-						     (_mm256_setzero_ph ()), \
-						     (__mmask16) (U), \
-						     (R)))
-
-#define _mm256_fixupimm_round_pd(A, B, D, C, R) \
-  ((__m256d) __builtin_ia32_fixupimmpd256_mask_round ((__v4df) (A), \
-						      (__v4df) (B), \
-						      (__v4di) (D), \
-						      (C), \
-						      (__mmask8) (-1), \
-						      (R)))
-
-#define _mm256_mask_fixupimm_round_pd(A, U, B, D, C, R)\
-  ((__m256d) __builtin_ia32_fixupimmpd256_mask_round ((__v4df) (A),  \
-						      (__v4df) (B),  \
-						      (__v4di) (D),  \
-						      (C),  \
-						      (__mmask8) (U),  \
-						      (R)))
-
-#define _mm256_maskz_fixupimm_round_pd(U, A, B, D, C, R)\
-  ((__m256d) __builtin_ia32_fixupimmpd256_maskz_round ((__v4df) (A),  \
-						       (__v4df) (B),  \
-						       (__v4di) (D),  \
-						       (C),  \
-						       (__mmask8) (U),  \
-						       (R)))
-
-#define _mm256_fixupimm_round_ps(A, B, D, C, R)\
-  ((__m256) __builtin_ia32_fixupimmps256_mask_round ((__v8sf) (A),  \
-						     (__v8sf) (B),  \
-						     (__v8si) (D),  \
-						     (C),  \
-						     (__mmask8) (-1),  \
-						     (R)))
-
-#define _mm256_mask_fixupimm_round_ps(A, U, B, D, C, R)\
-  ((__m256) __builtin_ia32_fixupimmps256_mask_round ((__v8sf) (A),  \
-						     (__v8sf) (B),  \
-						     (__v8si) (D),  \
-						     (C),  \
-						     (__mmask8) (U),  \
-						     (R)))
-
-#define _mm256_maskz_fixupimm_round_ps(U, A, B, D, C, R)\
-  ((__m256) __builtin_ia32_fixupimmps256_maskz_round ((__v8sf) (A),  \
-						      (__v8sf) (B),  \
-						      (__v8si) (D),  \
-						      (C),  \
-						      (__mmask8) (U),  \
-						      (R)))
-
-#define _mm256_fmadd_round_pd(A, B, D, R) \
-  ((__m256d) __builtin_ia32_vfmaddpd256_mask_round (A, B, D, -1, R))
-
-#define _mm256_mask_fmadd_round_pd(A, U, B, D, R) \
-  ((__m256d) __builtin_ia32_vfmaddpd256_mask_round (A, B, D, U, R))
-
-#define _mm256_mask3_fmadd_round_pd(A, B, D, U, R) \
-  ((__m256d) __builtin_ia32_vfmaddpd256_mask3_round (A, B, D, U, R))
-
-#define _mm256_maskz_fmadd_round_pd(U, A, B, D, R) \
-  ((__m256d) __builtin_ia32_vfmaddpd256_maskz_round (A, B, D, U, R))
-
-#define _mm256_fmadd_round_ph(A, B, D, R) \
-  ((__m256h) __builtin_ia32_vfmaddph256_mask_round (A, B, D, -1, R))
-
-#define _mm256_mask_fmadd_round_ph(A, U, B, D, R) \
-  ((__m256h) __builtin_ia32_vfmaddph256_mask_round (A, B, D, U, R))
-
-#define _mm256_mask3_fmadd_round_ph(A, B, D, U, R) \
-  ((__m256h) __builtin_ia32_vfmaddph256_mask3_round (A, B, D, U, R))
-
-#define _mm256_maskz_fmadd_round_ph(U, A, B, D, R) \
-  ((__m256h) __builtin_ia32_vfmaddph256_maskz_round (A, B, D, U, R))
-
-#define _mm256_fmadd_round_ps(A, B, D, R) \
-  ((__m256)__builtin_ia32_vfmaddps256_mask_round (A, B, D, -1, R))
-
-#define _mm256_mask_fmadd_round_ps(A, U, B, D, R)    \
-  ((__m256)__builtin_ia32_vfmaddps256_mask_round (A, B, D, U, R))
-
-#define _mm256_mask3_fmadd_round_ps(A, B, D, U, R)   \
-  ((__m256)__builtin_ia32_vfmaddps256_mask3_round (A, B, D, U, R))
-
-#define _mm256_maskz_fmadd_round_ps(U, A, B, D, R)   \
-  ((__m256)__builtin_ia32_vfmaddps256_maskz_round (A, B, D, U, R))
-
-#define _mm256_fmadd_round_pch(A, B, D, R)	\
-  (__m256h) __builtin_ia32_vfmaddcph256_round ((A), (B), (D), (R))
-
-#define _mm256_mask_fmadd_round_pch(A, U, B, D, R) \
-  ((__m256h) __builtin_ia32_vfmaddcph256_mask_round ((__v16hf) (A), \
-						     (__v16hf) (B), \
-						     (__v16hf) (D), \
-						     (U), (R)))
-
-#define _mm256_mask3_fmadd_round_pch(A, B, D, U, R)	\
-  (__m256h) __builtin_ia32_vfmaddcph256_mask3_round ((A), (B), (D), (U), (R))
-
-#define _mm256_maskz_fmadd_round_pch(U, A, B, D, R)	\
-  (__m256h) __builtin_ia32_vfmaddcph256_maskz_round ((A), (B), (D), (U), (R))
-
-#define _mm256_fmaddsub_round_pd(A, B, D, R)		\
-  (__m256d) __builtin_ia32_vfmaddsubpd256_mask_round (A, B, D, -1, R)
-
-#define _mm256_mask_fmaddsub_round_pd(A, U, B, D, R)    \
-  (__m256d) __builtin_ia32_vfmaddsubpd256_mask_round (A, B, D, U, R)
-
-#define _mm256_mask3_fmaddsub_round_pd(A, B, D, U, R)   \
-  (__m256d)__builtin_ia32_vfmaddsubpd256_mask3_round (A, B, D, U, R)
-
-#define _mm256_maskz_fmaddsub_round_pd(U, A, B, D, R)   \
-  (__m256d)__builtin_ia32_vfmaddsubpd256_maskz_round (A, B, D, U, R)
-
-#define _mm256_fmaddsub_round_ph(A, B, D, R)		  \
-  ((__m256h)__builtin_ia32_vfmaddsubph256_mask_round ((A), (B), (D), -1, (R)))
-
-#define _mm256_mask_fmaddsub_round_ph(A, U, B, D, R)	  \
-  ((__m256h)__builtin_ia32_vfmaddsubph256_mask_round ((A), (B), (D), (U), (R)))
-
-#define _mm256_mask3_fmaddsub_round_ph(A, B, D, U, R)	  \
-  ((__m256h)__builtin_ia32_vfmaddsubph256_mask3_round ((A), (B), (D), (U), (R)))
-
-#define _mm256_maskz_fmaddsub_round_ph(U, A, B, D, R)	  \
-  ((__m256h)__builtin_ia32_vfmaddsubph256_maskz_round ((A), (B), (D), (U), (R)))
-
-#define _mm256_fmaddsub_round_ps(A, B, D, R)		\
-  (__m256)__builtin_ia32_vfmaddsubps256_mask_round (A, B, D, -1, R)
-
-#define _mm256_mask_fmaddsub_round_ps(A, U, B, D, R)    \
-  (__m256)__builtin_ia32_vfmaddsubps256_mask_round (A, B, D, U, R)
-
-#define _mm256_mask3_fmaddsub_round_ps(A, B, D, U, R)   \
-  (__m256)__builtin_ia32_vfmaddsubps256_mask3_round (A, B, D, U, R)
-
-#define _mm256_maskz_fmaddsub_round_ps(U, A, B, D, R)   \
-  (__m256)__builtin_ia32_vfmaddsubps256_maskz_round (A, B, D, U, R)
-
-#define _mm256_fmsub_round_pd(A, B, D, R)	    \
-  (__m256d)__builtin_ia32_vfmsubpd256_mask_round (A, B, D, -1, R)
-
-#define _mm256_mask_fmsub_round_pd(A, U, B, D, R)    \
-  (__m256d)__builtin_ia32_vfmsubpd256_mask_round (A, B, D, U, R)
-
-#define _mm256_mask3_fmsub_round_pd(A, B, D, U, R)   \
-  (__m256d)__builtin_ia32_vfmsubpd256_mask3_round (A, B, D, U, R)
-
-#define _mm256_maskz_fmsub_round_pd(U, A, B, D, R)   \
-  (__m256d)__builtin_ia32_vfmsubpd256_maskz_round (A, B, D, U, R)
-
-#define _mm256_fmsub_round_ph(A, B, D, R)	      \
-  ((__m256h)__builtin_ia32_vfmsubph256_mask_round ((A), (B), (D), -1, (R)))
-
-#define _mm256_mask_fmsub_round_ph(A, U, B, D, R)	  \
-  ((__m256h)__builtin_ia32_vfmsubph256_mask_round ((A), (B), (D), (U), (R)))
-
-#define _mm256_mask3_fmsub_round_ph(A, B, D, U, R)	  \
-  ((__m256h)__builtin_ia32_vfmsubph256_mask3_round ((A), (B), (D), (U), (R)))
-
-#define _mm256_maskz_fmsub_round_ph(U, A, B, D, R)	  \
-  ((__m256h)__builtin_ia32_vfmsubph256_maskz_round ((A), (B), (D), (U), (R)))
-
-#define _mm256_fmsub_round_ps(A, B, D, R)	    \
-  (__m256)__builtin_ia32_vfmsubps256_mask_round (A, B, D, -1, R)
-
-#define _mm256_mask_fmsub_round_ps(A, U, B, D, R)    \
-  (__m256)__builtin_ia32_vfmsubps256_mask_round (A, B, D, U, R)
-
-#define _mm256_mask3_fmsub_round_ps(A, B, D, U, R)   \
-  (__m256)__builtin_ia32_vfmsubps256_mask3_round (A, B, D, U, R)
-
-#define _mm256_maskz_fmsub_round_ps(U, A, B, D, R)   \
-  (__m256)__builtin_ia32_vfmsubps256_maskz_round (A, B, D, U, R)
-
-#define _mm256_fmsubadd_round_pd(A, B, D, R)		\
-  (__m256d)__builtin_ia32_vfmsubaddpd256_mask_round (A, B, D, -1, R)
-
-#define _mm256_mask_fmsubadd_round_pd(A, U, B, D, R)    \
-  (__m256d)__builtin_ia32_vfmsubaddpd256_mask_round (A, B, D, U, R)
-
-#define _mm256_mask3_fmsubadd_round_pd(A, B, D, U, R)   \
-  (__m256d)__builtin_ia32_vfmsubaddpd256_mask3_round (A, B, D, U, R)
-
-#define _mm256_maskz_fmsubadd_round_pd(U, A, B, D, R)   \
-  (__m256d)__builtin_ia32_vfmsubaddpd256_maskz_round (A, B, D, U, R)
-
-#define _mm256_fmsubadd_round_ph(A, B, D, R)	    \
-  ((__m256h)__builtin_ia32_vfmsubaddph256_mask_round ((A), (B), (D), -1, (R)))
-
-#define _mm256_mask_fmsubadd_round_ph(A, U, B, D, R)	  \
-  ((__m256h)__builtin_ia32_vfmsubaddph256_mask_round ((A), (B), (D), (U), (R)))
-
-#define _mm256_mask3_fmsubadd_round_ph(A, B, D, U, R)	  \
-  ((__m256h)__builtin_ia32_vfmsubaddph256_mask3_round ((A), (B), (D), (U), (R)))
-
-#define _mm256_maskz_fmsubadd_round_ph(U, A, B, D, R)	  \
-  ((__m256h)__builtin_ia32_vfmsubaddph256_maskz_round ((A), (B), (D), (U), (R)))
-
-#define _mm256_fmsubadd_round_ps(A, B, D, R)		\
-  (__m256)__builtin_ia32_vfmsubaddps256_mask_round (A, B, D, -1, R)
-
-#define _mm256_mask_fmsubadd_round_ps(A, U, B, D, R)    \
-  (__m256)__builtin_ia32_vfmsubaddps256_mask_round (A, B, D, U, R)
-
-#define _mm256_mask3_fmsubadd_round_ps(A, B, D, U, R)   \
-  (__m256)__builtin_ia32_vfmsubaddps256_mask3_round (A, B, D, U, R)
-
-#define _mm256_maskz_fmsubadd_round_ps(U, A, B, D, R)   \
-  (__m256)__builtin_ia32_vfmsubaddps256_maskz_round (A, B, D, U, R)
-
-#define _mm256_fmul_round_pch(B, D, R) \
-  ((__m256h) __builtin_ia32_vfmulcph256_round ((__v16hf) (B), \
-					       (__v16hf) (D), \
-					       (R)))
-
-#define _mm256_mask_fmul_round_pch(A, U, B, D, R) \
-  ((__m256h) __builtin_ia32_vfmulcph256_mask_round ((__v16hf) (B), \
-						    (__v16hf) (D), \
-						    (__v16hf) (A), \
-						    (__mmask16) (U), \
-						    (R)))
-
-#define _mm256_maskz_fmul_round_pch(U, B, D, R) \
-  ((__m256h) __builtin_ia32_vfmulcph256_mask_round ((__v16hf) (B), \
-						    (__v16hf) (D), \
-						    (__v16hf) \
-						    (_mm256_setzero_ph ()), \
-						    (__mmask16) (U), \
-						    (R)))
-
-#define _mm256_fnmadd_round_pd(A, B, D, R)	      \
-  (__m256d)__builtin_ia32_vfnmaddpd256_mask_round (A, B, D, -1, R)
-
-#define _mm256_mask_fnmadd_round_pd(A, U, B, D, R)    \
-  (__m256d)__builtin_ia32_vfnmaddpd256_mask_round (A, B, D, U, R)
-
-#define _mm256_mask3_fnmadd_round_pd(A, B, D, U, R)   \
-  (__m256d)__builtin_ia32_vfnmaddpd256_mask3_round (A, B, D, U, R)
-
-#define _mm256_maskz_fnmadd_round_pd(U, A, B, D, R)   \
-  (__m256d)__builtin_ia32_vfnmaddpd256_maskz_round (A, B, D, U, R)
-
-#define _mm256_fnmadd_round_ph(A, B, D, R)	    \
-  ((__m256h)__builtin_ia32_vfnmaddph256_mask_round ((A), (B), (D), -1, (R)))
-
-#define _mm256_mask_fnmadd_round_ph(A, U, B, D, R)	  \
-  ((__m256h)__builtin_ia32_vfnmaddph256_mask_round ((A), (B), (D), (U), (R)))
-
-#define _mm256_mask3_fnmadd_round_ph(A, B, D, U, R)	  \
-  ((__m256h)__builtin_ia32_vfnmaddph256_mask3_round ((A), (B), (D), (U), (R)))
-
-#define _mm256_maskz_fnmadd_round_ph(U, A, B, D, R)	  \
-  ((__m256h)__builtin_ia32_vfnmaddph256_maskz_round ((A), (B), (D), (U), (R)))
-
-#define _mm256_fnmadd_round_ps(A, B, D, R)	      \
-  (__m256)__builtin_ia32_vfnmaddps256_mask_round (A, B, D, -1, R)
-
-#define _mm256_mask_fnmadd_round_ps(A, U, B, D, R)    \
-  (__m256)__builtin_ia32_vfnmaddps256_mask_round (A, B, D, U, R)
-
-#define _mm256_mask3_fnmadd_round_ps(A, B, D, U, R)   \
-  (__m256)__builtin_ia32_vfnmaddps256_mask3_round (A, B, D, U, R)
-
-#define _mm256_maskz_fnmadd_round_ps(U, A, B, D, R)   \
-  (__m256)__builtin_ia32_vfnmaddps256_maskz_round (A, B, D, U, R)
-
-#define _mm256_fnmsub_round_pd(A, B, D, R)	      \
-  (__m256d)__builtin_ia32_vfnmsubpd256_mask_round (A, B, D, -1, R)
-
-#define _mm256_mask_fnmsub_round_pd(A, U, B, D, R)    \
-  (__m256d)__builtin_ia32_vfnmsubpd256_mask_round (A, B, D, U, R)
-
-#define _mm256_mask3_fnmsub_round_pd(A, B, D, U, R)   \
-  (__m256d)__builtin_ia32_vfnmsubpd256_mask3_round (A, B, D, U, R)
-
-#define _mm256_maskz_fnmsub_round_pd(U, A, B, D, R)   \
-  (__m256d)__builtin_ia32_vfnmsubpd256_maskz_round (A, B, D, U, R)
-
-#define _mm256_fnmsub_round_ph(A, B, D, R)	    \
-  ((__m256h)__builtin_ia32_vfnmsubph256_mask_round ((A), (B), (D), -1, (R)))
-
-#define _mm256_mask_fnmsub_round_ph(A, U, B, D, R)	  \
-  ((__m256h)__builtin_ia32_vfnmsubph256_mask_round ((A), (B), (D), (U), (R)))
-
-#define _mm256_mask3_fnmsub_round_ph(A, B, D, U, R)	  \
-  ((__m256h)__builtin_ia32_vfnmsubph256_mask3_round ((A), (B), (D), (U), (R)))
-
-#define _mm256_maskz_fnmsub_round_ph(U, A, B, D, R)	  \
-  ((__m256h)__builtin_ia32_vfnmsubph256_maskz_round ((A), (B), (D), (U), (R)))
-
-#define _mm256_fnmsub_round_ps(A, B, D, R)	      \
-  (__m256)__builtin_ia32_vfnmsubps256_mask_round (A, B, D, -1, R)
-
-#define _mm256_mask_fnmsub_round_ps(A, U, B, D, R)    \
-  (__m256)__builtin_ia32_vfnmsubps256_mask_round (A, B, D, U, R)
-
-#define _mm256_mask3_fnmsub_round_ps(A, B, D, U, R)   \
-  (__m256)__builtin_ia32_vfnmsubps256_mask3_round (A, B, D, U, R)
-
-#define _mm256_maskz_fnmsub_round_ps(U, A, B, D, R)   \
-  (__m256)__builtin_ia32_vfnmsubps256_maskz_round (A, B, D, U, R)
-
-#define _mm256_getexp_round_pd(A, R) \
-  ((__m256d) __builtin_ia32_getexppd256_mask_round ((__v4df) (A), \
-						    (__v4df) \
-						    (_mm256_undefined_pd ()), \
-						    (__mmask8) (-1), \
-						    (R)))
-
-#define _mm256_mask_getexp_round_pd(W, U, A, R) \
-  ((__m256d) __builtin_ia32_getexppd256_mask_round ((__v4df) (A), \
-						    (__v4df) (W), \
-						    (__mmask8) (U), \
-						    (R)))
-
-#define _mm256_maskz_getexp_round_pd(U, A, R) \
-  ((__m256d) __builtin_ia32_getexppd256_mask_round ((__v4df) (A), \
-						    (__v4df) \
-						    (_mm256_setzero_pd ()), \
-						    (__mmask8) (U), \
-						    (R)))
-
-#define _mm256_getexp_round_ph(A, R)\
-  ((__m256h) __builtin_ia32_getexpph256_mask_round ((__v16hf) (A), \
-						    (__v16hf) \
-						    (_mm256_setzero_ph ()), \
-						    (__mmask16) (-1), \
-						    (R)))
-
-#define _mm256_mask_getexp_round_ph(W, U, A, R)\
-  ((__m256h) __builtin_ia32_getexpph256_mask_round ((__v16hf) (A), \
-						    (__v16hf) (W), \
-						    (__mmask16) (U), \
-						    (R)))
-
-#define _mm256_maskz_getexp_round_ph(U, A, R)\
-  ((__m256h) __builtin_ia32_getexpph256_mask_round ((__v16hf) (A), \
-						    (__v16hf) \
-						    (_mm256_setzero_ph ()), \
-						    (__mmask16) (U), \
-						    (R)))
-
-#define _mm256_getexp_round_ps(A, R)\
-  ((__m256) __builtin_ia32_getexpps256_mask_round ((__v8sf) (A), \
-						   (__v8sf) \
-						   (_mm256_undefined_ps ()), \
-						   (__mmask8) (-1), \
-						   (R)))
-
-#define _mm256_mask_getexp_round_ps(W, U, A, R)\
-  ((__m256) __builtin_ia32_getexpps256_mask_round ((__v8sf) (A), \
-						   (__v8sf) (W), \
-						   (__mmask8) (U), \
-						   (R)))
-
-#define _mm256_maskz_getexp_round_ps(U, A, R)\
-  ((__m256) __builtin_ia32_getexpps256_mask_round ((__v8sf) (A), \
-						   (__v8sf) \
-						   (_mm256_setzero_ps ()), \
-						   (__mmask8) (U), \
-						   (R)))
-
-#define _mm256_getmant_round_pd(A, B, C, R) \
-  ((__m256d)__builtin_ia32_getmantpd256_mask_round ((__v4df) (__m256d) (A), \
-						    (int) (((C) << 2) | (B)), \
-						    (__v4df) (__m256d) \
-						    _mm256_undefined_pd (), \
-						    (__mmask8)-1, \
-						    (R)))
-
-#define _mm256_mask_getmant_round_pd(W, U, A, B, C, R) \
-  ((__m256d)__builtin_ia32_getmantpd256_mask_round ((__v4df) (__m256d) (A),    \
-						    (int) (((C) << 2) | (B)), \
-						    (__v4df) (__m256d) (W),    \
-						    (__mmask8) (U), \
-						    (R)))
-
-#define _mm256_maskz_getmant_round_pd(U, A, B, C, R) \
-  ((__m256d)__builtin_ia32_getmantpd256_mask_round ((__v4df) (__m256d) (A), \
-						    (int) (((C) << 2) | (B)), \
-						    (__v4df) (__m256d) \
-						    _mm256_setzero_pd (), \
-						    (__mmask8) (U), \
-						    (R)))
-
-
-#define _mm256_getmant_round_ph(A, B, C, R) \
-  ((__m256h)__builtin_ia32_getmantph256_mask_round ((__v16hf) (__m256h) (A), \
-						    (int) (((C)<<2) | (B)), \
-						    (__v16hf) (__m256h) \
-						    _mm256_undefined_ph (), \
-						    (__mmask16)-1, \
-						    (R)))
-
-#define _mm256_mask_getmant_round_ph(W, U, A, B, C, R) \
-  ((__m256h)__builtin_ia32_getmantph256_mask_round ((__v16hf) (__m256h) (A), \
-						    (int) (((C)<<2) | (B)), \
-						    (__v16hf) (__m256h) (W), \
-						    (__mmask16) (U), \
-						    (R)))
-
-#define _mm256_maskz_getmant_round_ph(U, A, B, C, R) \
-  ((__m256h)__builtin_ia32_getmantph256_mask_round ((__v16hf) (__m256h) (A), \
-						    (int) (((C)<<2) | (B)), \
-						    (__v16hf) (__m256h) \
-						    _mm256_setzero_ph (), \
-						    (__mmask16) (U), \
-						    (R)))
-
-#define _mm256_getmant_round_ps(A, B, C, R) \
-  ((__m256)__builtin_ia32_getmantps256_mask_round ((__v8sf) (__m256) (A), \
-						   (int) (((C)<<2) | (B)), \
-						   (__v8sf) (__m256) \
-						   _mm256_undefined_ps (), \
-						   (__mmask8)-1, \
-						   (R)))
-
-#define _mm256_mask_getmant_round_ps(W, U, A, B, C, R) \
-  ((__m256)__builtin_ia32_getmantps256_mask_round ((__v8sf) (__m256) (A), \
-						   (int) (((C)<<2) | (B)), \
-						   (__v8sf) (__m256) (W), \
-						   (__mmask8) (U), \
-						   (R)))
-
-#define _mm256_maskz_getmant_round_ps(U, A, B, C, R) \
-  ((__m256)__builtin_ia32_getmantps256_mask_round ((__v8sf) (__m256) (A), \
-						   (int) (((C)<<2) | (B)), \
-						   (__v8sf) (__m256) \
-						   _mm256_setzero_ps (), \
-						   (__mmask8) (U), \
-						   (R)))
-
-#define _mm256_max_round_pd(A, B, R) \
-  ((__m256d) __builtin_ia32_maxpd256_mask_round ((__v4df) (A), \
-						 (__v4df) (B), \
-						 (__v4df) \
-						 (_mm256_undefined_pd ()), \
-						 (__mmask8) (-1), \
-						 (R)))
-
-#define _mm256_mask_max_round_pd(W, U, A, B, R) \
-  ((__m256d) __builtin_ia32_maxpd256_mask_round ((__v4df) (A), \
-						 (__v4df) (B), \
-						 (__v4df) (W), \
-						 (__mmask8) (U), \
-						 (R)))
-
-#define _mm256_maskz_max_round_pd(U, A, B, R) \
-  ((__m256d) __builtin_ia32_maxpd256_mask_round ((__v4df) (A), \
-						 (__v4df) (B), \
-						 (__v4df) \
-						 (_mm256_setzero_pd ()), \
-						 (__mmask8) (U), \
-						 (R)))
-
-#define _mm256_max_round_ph(A, B, R) \
-  ((__m256h) __builtin_ia32_maxph256_mask_round ((__v16hf) (A), \
-						 (__v16hf) (B), \
-						 (__v16hf) \
-						 (_mm256_undefined_ph ()), \
-						 (__mmask16) (-1), \
-						 (R)))
-
-#define _mm256_mask_max_round_ph(W, U, A, B, R) \
-  ((__m256h) __builtin_ia32_maxph256_mask_round ((__v16hf) (A), \
-						 (__v16hf) (B), \
-						 (__v16hf) (W), \
-						 (__mmask16) (U), \
-						 (R)))
-
-#define _mm256_maskz_max_round_ph(U, A, B, R) \
-  ((__m256h) __builtin_ia32_maxph256_mask_round ((__v16hf) (A), \
-						 (__v16hf) (B), \
-						 (__v16hf) \
-						 (_mm256_setzero_ph ()), \
-						 (__mmask16) (U), \
-						 (R)))
-
-#define _mm256_max_round_ps(A, B, R) \
-  ((__m256) __builtin_ia32_maxps256_mask_round ((__v8sf) (A), \
-						(__v8sf) (B), \
-						(__v8sf) \
-						(_mm256_undefined_ps ()), \
-						(__mmask8) (-1), \
-						(R)))
-
-#define _mm256_mask_max_round_ps(W, U, A, B, R) \
-  ((__m256) __builtin_ia32_maxps256_mask_round ((__v8sf) (A), \
-						(__v8sf) (B), \
-						(__v8sf) (W), \
-						(__mmask8) (U), \
-						(R)))
-
-#define _mm256_maskz_max_round_ps(U, A, B, R) \
-  ((__m256) __builtin_ia32_maxps256_mask_round ((__v8sf) (A), \
-						(__v8sf) (B), \
-						(__v8sf) \
-						(_mm256_setzero_ps ()), \
-						(__mmask8) (U), \
-						(R)))
-
-#define _mm256_min_round_pd(A, B, R) \
-  ((__m256d) __builtin_ia32_minpd256_mask_round ((__v4df) (A), \
-						 (__v4df) (B), \
-						 (__v4df) \
-						 (_mm256_undefined_pd ()), \
-						 (__mmask8) (-1), \
-						 (R)))
-
-#define _mm256_mask_min_round_pd(W, U, A, B, R) \
-  ((__m256d) __builtin_ia32_minpd256_mask_round ((__v4df) (A), \
-						 (__v4df) (B), \
-						 (__v4df) (W), \
-						 (__mmask8) (U), \
-						 (R)))
-
-#define _mm256_maskz_min_round_pd(U, A, B, R) \
-  ((__m256d) __builtin_ia32_minpd256_mask_round ((__v4df) (A), \
-						 (__v4df) (B), \
-						 (__v4df) \
-						 (_mm256_setzero_pd ()), \
-						 (__mmask8) (U), \
-						 (R)))
-
-#define _mm256_min_round_ph(A, B, R) \
-  ((__m256h) __builtin_ia32_minph256_mask_round ((__v16hf) (A), \
-						 (__v16hf) (B), \
-						 (__v16hf) \
-						 (_mm256_undefined_ph ()), \
-						 (__mmask16) (-1), \
-						 (R)))
-
-#define _mm256_mask_min_round_ph(W, U, A, B, R) \
-  ((__m256h) __builtin_ia32_minph256_mask_round ((__v16hf) (A), \
-						 (__v16hf) (B), \
-						 (__v16hf) (W), \
-						 (__mmask16) (U), \
-						 (R)))
-
-#define _mm256_maskz_min_round_ph(U, A, B, R) \
-  ((__m256h) __builtin_ia32_minph256_mask_round ((__v16hf) (A), \
-						 (__v16hf) (B), \
-						 (__v16hf) \
-						 (_mm256_setzero_ph ()), \
-						 (__mmask16) (U), \
-						 (R)))
-
-#define _mm256_min_round_ps(A, B, R) \
-  ((__m256) __builtin_ia32_minps256_mask_round ((__v8sf) (A), \
-						(__v8sf) (B), \
-						(__v8sf) \
-						(_mm256_undefined_ps ()), \
-						(__mmask8) (-1), \
-						(R)))
-
-#define _mm256_mask_min_round_ps(W, U, A, B, R) \
-  ((__m256) __builtin_ia32_minps256_mask_round ((__v8sf) (A), \
-						(__v8sf) (B), \
-						(__v8sf) (W), \
-						(__mmask8) (U), \
-						(R)))
-
-#define _mm256_maskz_min_round_ps(U, A, B, R) \
-  ((__m256) __builtin_ia32_minps256_mask_round ((__v8sf) (A), \
-						(__v8sf) (B), \
-						(__v8sf) \
-						(_mm256_setzero_ps ()), \
-						(__mmask8) (U), \
-						(R)))
-
-#define _mm256_mul_round_pd(A, B, R) \
-  ((__m256d) __builtin_ia32_mulpd256_mask_round ((__v4df) (A), \
-						 (__v4df) (B), \
-						 (__v4df) \
-						 (_mm256_undefined_pd ()), \
-						 (__mmask8) (-1), \
-						 (R)))
-
-#define _mm256_mask_mul_round_pd(W, U, A, B, R) \
-  ((__m256d) __builtin_ia32_mulpd256_mask_round ((__v4df) (A), \
-						 (__v4df) (B), \
-						 (__v4df) (W), \
-						 (__mmask8) (U), \
-						 (R)))
-
-#define _mm256_maskz_mul_round_pd(U, A, B, R) \
-  ((__m256d) __builtin_ia32_mulpd256_mask_round ((__v4df) (A), \
-						 (__v4df) (B), \
-						 (__v4df) \
-						 (_mm256_setzero_pd ()), \
-						 (__mmask8) (U), \
-						 (R)))
-
-#define _mm256_mul_round_ph(A, B, R) \
-  ((__m256h) __builtin_ia32_mulph256_mask_round ((__v16hf) (A), \
-						 (__v16hf) (B), \
-						 (__v16hf) \
-						 (_mm256_undefined_ph ()), \
-						 (__mmask16) (-1), \
-						 (R)))
-
-#define _mm256_mask_mul_round_ph(W, U, A, B, R) \
-  ((__m256h) __builtin_ia32_mulph256_mask_round ((__v16hf) (A), \
-						 (__v16hf) (B), \
-						 (__v16hf) (W), \
-						 (__mmask16) (U), \
-						 (R)))
-
-#define _mm256_maskz_mul_round_ph(U, A, B, R) \
-  ((__m256h) __builtin_ia32_mulph256_mask_round ((__v16hf) (A), \
-						 (__v16hf) (B), \
-						 (__v16hf) \
-						 (_mm256_setzero_ph ()), \
-						 (__mmask16) (U), \
-						 (R)))
-
-#define _mm256_mul_round_ps(A, B, R) \
-  ((__m256) __builtin_ia32_mulps256_mask_round ((__v8sf) (A), \
-						(__v8sf) (B), \
-						(__v8sf) \
-						(_mm256_undefined_ps ()), \
-						(__mmask8) (-1), \
-						(R)))
-
-#define _mm256_mask_mul_round_ps(W, U, A, B, R) \
-  ((__m256) __builtin_ia32_mulps256_mask_round ((__v8sf) (A), \
-						(__v8sf) (B), \
-						(__v8sf) (W), \
-						(__mmask8) (U), \
-						(R)))
-
-#define _mm256_maskz_mul_round_ps(U, A, B, R) \
-  ((__m256) __builtin_ia32_mulps256_mask_round ((__v8sf) (A), \
-						(__v8sf) (B), \
-						(__v8sf) \
-						(_mm256_setzero_ps ()), \
-						(__mmask8) (U), \
-						(R)))
-
-#define _mm256_range_round_pd(A, B, C, R) \
-  ((__m256d) __builtin_ia32_rangepd256_mask_round ((__v4df) (A), \
-						   (__v4df) (B), \
-						   (C), \
-						   (__v4df) \
-						   (_mm256_setzero_pd ()), \
-						   (__mmask8) (-1), \
-						   (R)))
-
-#define _mm256_mask_range_round_pd(W, U, A, B, C, R) \
-  ((__m256d) __builtin_ia32_rangepd256_mask_round ((__v4df) (A), \
-						   (__v4df) (B), \
-						   (C), \
-						   (__v4df) (W), \
-						   (__mmask8) (U), \
-						   (R)))
-
-#define _mm256_maskz_range_round_pd(U, A, B, C, R) \
-  ((__m256d) __builtin_ia32_rangepd256_mask_round ((__v4df) (A), \
-						   (__v4df) (B), \
-						   (C), \
-						   (__v4df) \
-						   (_mm256_setzero_pd ()), \
-						   (__mmask8) (U), \
-						   (R)))
-
-#define _mm256_range_round_ps(A, B, C, R) \
-  ((__m256) __builtin_ia32_rangeps256_mask_round ((__v8sf) (A), \
-						  (__v8sf) (B), \
-						  (C), \
-						  (__v8sf) \
-						  (_mm256_setzero_ps ()), \
-						  (__mmask8) (-1), \
-						  (R)))
-
-#define _mm256_mask_range_round_ps(W, U, A, B, C, R) \
-  ((__m256) __builtin_ia32_rangeps256_mask_round ((__v8sf) (A), \
-						  (__v8sf) (B), \
-						  (C), \
-						  (__v8sf) (W), \
-						  (__mmask8) (U), \
-						  (R)))
-
-#define _mm256_maskz_range_round_ps(U, A, B, C, R) \
-  ((__m256) __builtin_ia32_rangeps256_mask_round ((__v8sf) (A), \
-						  (__v8sf) (B), \
-						  (C), \
-						  (__v8sf) \
-						  (_mm256_setzero_ps ()), \
-						  (__mmask8) (U), \
-						  (R)))
-
-#define _mm256_reduce_round_pd(A, C, R) \
-  ((__m256d) __builtin_ia32_reducepd256_mask_round ((__v4df) (A), \
-						    (C), \
-						    (__v4df) \
-						    (_mm256_setzero_pd ()), \
-						    (__mmask8) (-1), \
-						    (R)))
-
-#define _mm256_mask_reduce_round_pd(W, U, A, C, R) \
-  ((__m256d) __builtin_ia32_reducepd256_mask_round ((__v4df) (A), \
-						    (C), \
-						    (__v4df) (W), \
-						    (__mmask8) (U), \
-						    (R)))
-
-#define _mm256_maskz_reduce_round_pd(U, A, C, R) \
-  ((__m256d) __builtin_ia32_reducepd256_mask_round ((__v4df) (A), \
-						    (C), \
-						    (__v4df) \
-						    (_mm256_setzero_pd ()), \
-						    (__mmask8) (U), \
-						    (R)))
-
-#define _mm256_reduce_round_ph(A, C, R) \
-  ((__m256h) __builtin_ia32_reduceph256_mask_round ((__v16hf) (A), \
-						    (C), \
-						    (__v16hf) \
-						    (_mm256_setzero_ph ()), \
-						    (__mmask16) (-1), \
-						    (R)))
-
-#define _mm256_mask_reduce_round_ph(W, U, A, C, R) \
-  ((__m256h) __builtin_ia32_reduceph256_mask_round ((__v16hf) (A), \
-						    (C), \
-						    (__v16hf) (W), \
-						    (__mmask16) (U), \
-						    (R)))
-
-#define _mm256_maskz_reduce_round_ph(U, A, C, R) \
-  ((__m256h) __builtin_ia32_reduceph256_mask_round ((__v16hf) (A), \
-						    (C), \
-						    (__v16hf) \
-						    (_mm256_setzero_ph ()), \
-						    (__mmask16) (U), \
-						    (R)))
-
-#define _mm256_reduce_round_ps(A, C, R) \
-  ((__m256) __builtin_ia32_reduceps256_mask_round ((__v8sf) (A), \
-						   (C), \
-						   (__v8sf) \
-						   (_mm256_setzero_ps ()), \
-						   (__mmask8) (-1), \
-						   (R)))
-
-#define _mm256_mask_reduce_round_ps(W, U, A, C, R) \
-  ((__m256) __builtin_ia32_reduceps256_mask_round ((__v8sf) (A), \
-						   (C), \
-						   (__v8sf) (W), \
-						   (__mmask8) (U), \
-						   (R)))
-
-#define _mm256_maskz_reduce_round_ps(U, A, C, R) \
-  ((__m256) __builtin_ia32_reduceps256_mask_round ((__v8sf) (A), \
-						   (C), \
-						   (__v8sf) \
-						   (_mm256_setzero_ps ()), \
-						   (__mmask8) (U), \
-						   (R)))
-
-#define _mm256_roundscale_round_pd(A, C, R) \
-  ((__m256d) \
-   __builtin_ia32_rndscalepd256_mask_round ((__v4df) (A), \
-					    (C), \
-					    (__v4df) \
-					    (_mm256_undefined_pd ()), \
-					    (__mmask8) (-1), \
-					    (R)))
-
-#define _mm256_mask_roundscale_round_pd(W, U, A, C, R) \
-  ((__m256d) __builtin_ia32_rndscalepd256_mask_round ((__v4df) (A), \
-						      (C), \
-						      (__v4df) (W), \
-						      (__mmask8) (U), \
-						      (R)))
-
-#define _mm256_maskz_roundscale_round_pd(U, A, C, R) \
-  ((__m256d) __builtin_ia32_rndscalepd256_mask_round ((__v4df) (A), \
-						      (C), \
-						      (__v4df) \
-						      (_mm256_setzero_pd ()), \
-						      (__mmask8) (U), \
-						      (R)))
-
-#define _mm256_roundscale_round_ph(A, C, R) \
-  ((__m256h) \
-   __builtin_ia32_rndscaleph256_mask_round ((__v16hf) (A), \
-					    (C), \
-					    (__v16hf) \
-					    (_mm256_undefined_ph ()), \
-					    (__mmask16) (-1), \
-					    (R)))
-
-#define _mm256_mask_roundscale_round_ph(W, U, A, C, R) \
-  ((__m256h) __builtin_ia32_rndscaleph256_mask_round ((__v16hf) (A), \
-						      (C), \
-						      (__v16hf) (W), \
-						      (__mmask16) (U), \
-						      (R)))
-
-#define _mm256_maskz_roundscale_round_ph(U, A, C, R) \
-  ((__m256h) __builtin_ia32_rndscaleph256_mask_round ((__v16hf) (A), \
-						      (C), \
-						      (__v16hf) \
-						      (_mm256_setzero_ph ()), \
-						      (__mmask16) (U), \
-						      (R)))
-
-#define _mm256_roundscale_round_ps(A, C, R) \
-  ((__m256) __builtin_ia32_rndscaleps256_mask_round ((__v8sf) (A), \
-						     (C), \
-						     (__v8sf) \
-						     (_mm256_undefined_ps ()), \
-						     (__mmask8) (-1), \
-						     (R)))
-
-#define _mm256_mask_roundscale_round_ps(W, U, A, C, R) \
-  ((__m256) __builtin_ia32_rndscaleps256_mask_round ((__v8sf) (A), \
-						     (C), \
-						     (__v8sf) (W), \
-						     (__mmask8) (U), \
-						     (R)))
-
-#define _mm256_maskz_roundscale_round_ps(U, A, C, R) \
-  ((__m256) __builtin_ia32_rndscaleps256_mask_round ((__v8sf) (A), \
-						     (C), \
-						     (__v8sf) \
-						     (_mm256_setzero_ps ()), \
-						     (__mmask8) (U), \
-						     (R)))
-
-#define _mm256_scalef_round_pd(A, B, R) \
-  ((__m256d) __builtin_ia32_scalefpd256_mask_round ((__v4df) (A), \
-						    (__v4df) (B), \
-						    (__v4df) \
-						    (_mm256_undefined_pd ()), \
-						    (__mmask8) (-1), \
-						    (R)))
-
-#define _mm256_mask_scalef_round_pd(W, U, A, B, R) \
-  ((__m256d) __builtin_ia32_scalefpd256_mask_round ((__v4df) (A), \
-						    (__v4df) (B), \
-						    (__v4df) (W), \
-						    (__mmask8) (U), \
-						    (R)))
-
-#define _mm256_maskz_scalef_round_pd(U, A, B, R) \
-  ((__m256d) __builtin_ia32_scalefpd256_mask_round ((__v4df) (A), \
-						    (__v4df) (B), \
-						    (__v4df) \
-						    (_mm256_setzero_pd ()), \
-						    (__mmask8) (U), \
-						    (R)))
-
-#define _mm256_scalef_round_ph(A, B, R) \
-  ((__m256h) __builtin_ia32_scalefph256_mask_round ((__v16hf) (A), \
-						    (__v16hf) (B), \
-						    (__v16hf) \
-						    (_mm256_undefined_ph ()), \
-						    (__mmask16) (-1), \
-						    (R)))
-
-#define _mm256_mask_scalef_round_ph(W, U, A, B, R) \
-  ((__m256h) __builtin_ia32_scalefph256_mask_round ((__v16hf) (A), \
-						    (__v16hf) (B), \
-						    (__v16hf) (W), \
-						    (__mmask16) (U), \
-						    (R)))
-
-#define _mm256_maskz_scalef_round_ph(U, A, B, R) \
-  ((__m256h) __builtin_ia32_scalefph256_mask_round ((__v16hf) (A), \
-						    (__v16hf) (B), \
-						    (__v16hf) \
-						    (_mm256_setzero_ph ()), \
-						    (__mmask16) (U), \
-						    (R)))
-
-#define _mm256_scalef_round_ps(A, B, R) \
-  ((__m256) __builtin_ia32_scalefps256_mask_round ((__v8sf) (A), \
-						   (__v8sf) (B), \
-						   (__v8sf) \
-						   (_mm256_undefined_ps ()), \
-						   (__mmask8) (-1), \
-						   (R)))
-
-#define _mm256_mask_scalef_round_ps(W, U, A, B, R) \
-  ((__m256) __builtin_ia32_scalefps256_mask_round ((__v8sf) (A), \
-						   (__v8sf) (B), \
-						   (__v8sf) (W), \
-						   (__mmask8) (U), \
-						   (R)))
-
-#define _mm256_maskz_scalef_round_ps(U, A, B, R) \
-  ((__m256) __builtin_ia32_scalefps256_mask_round ((__v8sf) (A), \
-						   (__v8sf) (B), \
-						   (__v8sf) \
-						   (_mm256_setzero_ps ()), \
-						   (__mmask8) (U), \
-						   (R)))
-
-#define _mm256_sqrt_round_pd(A, R) \
-  ((__m256d) __builtin_ia32_sqrtpd256_mask_round ((__v4df) (A), \
-						  (__v4df) \
-						  (_mm256_undefined_pd ()), \
-						  (__mmask8) (-1), \
-						  (R)))
-
-#define _mm256_mask_sqrt_round_pd(W, U, A, R) \
-  ((__m256d) __builtin_ia32_sqrtpd256_mask_round ((__v4df) (A), \
-						  (__v4df) (W), \
-						  (__mmask8) (U), \
-						  (R)))
-
-#define _mm256_maskz_sqrt_round_pd(U, A, R) \
-  ((__m256d) __builtin_ia32_sqrtpd256_mask_round ((__v4df) (A), \
-						  (__v4df) \
-						  (_mm256_setzero_pd ()), \
-						  (__mmask8) (U), \
-						  (R)))
-
-#define _mm256_sqrt_round_ph(A, R) \
-  ((__m256h) __builtin_ia32_sqrtph256_mask_round ((__v16hf) (A), \
-						  (__v16hf) \
-						  (_mm256_undefined_ph ()), \
-						  (__mmask16) (-1), \
-						  (R)))
-
-#define _mm256_mask_sqrt_round_ph(W, U, A, R) \
-  ((__m256h) __builtin_ia32_sqrtph256_mask_round ((__v16hf) (A), \
-						  (__v16hf) (W), \
-						  (__mmask16) (U), \
-						  (R)))
-
-#define _mm256_maskz_sqrt_round_ph(U, A, R) \
-  ((__m256h) __builtin_ia32_sqrtph256_mask_round ((__v16hf) (A), \
-						  (__v16hf) \
-						  (_mm256_setzero_ph ()), \
-						  (__mmask16) (U), \
-						  (R)))
-
-#define _mm256_sqrt_round_ps(A, R) \
-  ((__m256) __builtin_ia32_sqrtps256_mask_round ((__v8sf) (A), \
-						 (__v8sf) \
-						 (_mm256_undefined_ps ()), \
-						 (__mmask8) (-1), \
-						 (R)))
-
-#define _mm256_mask_sqrt_round_ps(W, U, A, R) \
-  ((__m256) __builtin_ia32_sqrtps256_mask_round ((__v8sf) (A), \
-						 (__v8sf) (W), \
-						 (__mmask8) (U), \
-						 (R)))
-
-#define _mm256_maskz_sqrt_round_ps(U, A, R) \
-  ((__m256) __builtin_ia32_sqrtps256_mask_round ((__v8sf) (A), \
-						 (__v8sf) \
-						 (_mm256_setzero_ps ()), \
-						 (__mmask8) (U), \
-						 (R)))
-
-#define _mm256_sub_round_pd(A, B, R) \
-  ((__m256d) __builtin_ia32_subpd256_mask_round ((__v4df) (A), \
-						 (__v4df) (B), \
-						 (__v4df) \
-						 (_mm256_undefined_pd ()), \
-						 (__mmask8) (-1), \
-						 (R)))
-
-#define _mm256_mask_sub_round_pd(W, U, A, B, R) \
-  ((__m256d) __builtin_ia32_subpd256_mask_round ((__v4df) (A), \
-						 (__v4df) (B), \
-						 (__v4df) (W), \
-						 (__mmask8) (U), \
-						 (R)))
-
-#define _mm256_maskz_sub_round_pd(U, A, B, R) \
-  ((__m256d) __builtin_ia32_subpd256_mask_round ((__v4df) (A), \
-						 (__v4df) (B), \
-						 (__v4df) \
-						 (_mm256_setzero_pd ()), \
-						 (__mmask8) (U), \
-						 (R)))
-
-#define _mm256_sub_round_ph(A, B, R) \
-  ((__m256h) __builtin_ia32_subph256_mask_round ((__v16hf) (A), \
-						 (__v16hf) (B), \
-						 (__v16hf) \
-						 (_mm256_undefined_ph ()), \
-						 (__mmask16) (-1), \
-						 (R)))
-
-#define _mm256_mask_sub_round_ph(W, U, A, B, R) \
-  ((__m256h) __builtin_ia32_subph256_mask_round ((__v16hf) (A), \
-						 (__v16hf) (B), \
-						 (__v16hf) (W), \
-						 (__mmask16) (U), \
-						 (R)))
-
-#define _mm256_maskz_sub_round_ph(U, A, B, R) \
-  ((__m256h) __builtin_ia32_subph256_mask_round ((__v16hf) (A), \
-						 (__v16hf) (B), \
-						 (__v16hf) \
-						 (_mm256_setzero_ph ()), \
-						 (__mmask16) (U), \
-						 (R)))
-
-#define _mm256_sub_round_ps(A, B, R) \
-  ((__m256) __builtin_ia32_subps256_mask_round ((__v8sf) (A), \
-						(__v8sf) (B), \
-						(__v8sf) \
-						(_mm256_undefined_ps ()), \
-						(__mmask8) (-1), \
-						(R)))
-
-#define _mm256_mask_sub_round_ps(W, U, A, B, R) \
-  ((__m256) __builtin_ia32_subps256_mask_round ((__v8sf) (A), \
-						(__v8sf) (B), \
-						(__v8sf) (W), \
-						(__mmask8) (U), \
-						(R)))
-
-#define _mm256_maskz_sub_round_ps(U, A, B, R) \
-  ((__m256) __builtin_ia32_subps256_mask_round ((__v8sf) (A), \
-						(__v8sf) (B), \
-						(__v8sf) \
-						(_mm256_setzero_ps ()), \
-						(__mmask8) (U), \
-						(R)))
-#endif
-
-#define _mm256_cmul_round_pch(A, B, R) _mm256_fcmul_round_pch ((A), (B), (R))
-#define _mm256_mask_cmul_round_pch(W, U, A, B, R)		      \
-  _mm256_mask_fcmul_round_pch ((W), (U), (A), (B), (R))
-#define _mm256_maskz_cmul_round_pch(U, A, B, R)			      \
-  _mm256_maskz_fcmul_round_pch ((U), (A), (B), (R))
-
-#define _mm256_mul_round_pch(A, B, R) _mm256_fmul_round_pch ((A), (B), (R))
-#define _mm256_mask_mul_round_pch(W, U, A, B, R)		      \
-  _mm256_mask_fmul_round_pch ((W), (U), (A), (B), (R))
-#define _mm256_maskz_mul_round_pch(U, A, B, R)			      \
-  _mm256_maskz_fmul_round_pch ((U), (A), (B), (R))
-
-#ifdef __DISABLE_AVX10_2_256__
-#undef __DISABLE_AVX10_2_256__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX10_2_256__ */
-
-#endif /* _AVX10_2ROUNDINGINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx10_2satcvtintrin.h b/gcc/config/i386/avx10_2satcvtintrin.h
index d0e3e37..c4fa19b 100644
--- a/gcc/config/i386/avx10_2satcvtintrin.h
+++ b/gcc/config/i386/avx10_2satcvtintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2024 Free Software Foundation, Inc.
+/* Copyright (C) 2024-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -28,256 +28,380 @@
 #ifndef _AVX10_2SATCVTINTRIN_H_INCLUDED
 #define _AVX10_2SATCVTINTRIN_H_INCLUDED
 
-#if !defined (__AVX10_2_256__)
+#if !defined (__AVX10_2__)
 #pragma GCC push_options
 #pragma GCC target("avx10.2")
-#define __DISABLE_AVX10_2_256__
-#endif /* __AVX10_2_256__ */
+#define __DISABLE_AVX10_2__
+#endif /* __AVX10_2__ */
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_ipcvtnebf16_epi16 (__m128bh __A)
+_mm_ipcvts_bf16_epi8 (__m128bh __A)
 {
-  return (__m128i) __builtin_ia32_cvtnebf162ibs128_mask ((__v8bf) __A,
-							 (__v8hi)
-							 _mm_undefined_si128 (),
-							 (__mmask8) -1);
+  return (__m128i) __builtin_ia32_cvtbf162ibs128_mask ((__v8bf) __A,
+						       (__v8hi)
+						       _mm_undefined_si128 (),
+						       (__mmask8) -1);
 }
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_ipcvtnebf16_epi16 (__m128i __W, __mmask8 __U, __m128bh __A)
+_mm_mask_ipcvts_bf16_epi8 (__m128i __W, __mmask8 __U, __m128bh __A)
 {
-  return (__m128i) __builtin_ia32_cvtnebf162ibs128_mask ((__v8bf) __A,
-							 (__v8hi) __W,
-							 (__mmask8) __U);
+  return (__m128i) __builtin_ia32_cvtbf162ibs128_mask ((__v8bf) __A,
+						       (__v8hi) __W,
+						       (__mmask8) __U);
 }
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_ipcvtnebf16_epi16 (__mmask8 __U, __m128bh __A)
+_mm_maskz_ipcvts_bf16_epi8 (__mmask8 __U, __m128bh __A)
 {
-  return (__m128i) __builtin_ia32_cvtnebf162ibs128_mask ((__v8bf) __A,
-							 (__v8hi)
-							 _mm_setzero_si128 (),
-							 (__mmask8) __U);
+  return (__m128i) __builtin_ia32_cvtbf162ibs128_mask ((__v8bf) __A,
+						       (__v8hi)
+						       _mm_setzero_si128 (),
+						       (__mmask8) __U);
 }
 
-extern __inline __m256i
+extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_ipcvtnebf16_epi16 (__m256bh __A)
+_mm_ipcvts_bf16_epu8 (__m128bh __A)
 {
   return
-    (__m256i) __builtin_ia32_cvtnebf162ibs256_mask ((__v16bf) __A,
-						    (__v16hi)
-						    _mm256_undefined_si256 (),
-						    (__mmask16) -1);
+    (__m128i) __builtin_ia32_cvtbf162iubs128_mask ((__v8bf) __A,
+						   (__v8hi)
+						   _mm_undefined_si128 (),
+						   (__mmask8) -1);
 }
 
-extern __inline __m256i
+extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_ipcvtnebf16_epi16 (__m256i __W, __mmask16 __U, __m256bh __A)
+_mm_mask_ipcvts_bf16_epu8 (__m128i __W, __mmask8 __U, __m128bh __A)
 {
-  return (__m256i) __builtin_ia32_cvtnebf162ibs256_mask ((__v16bf) __A,
-							 (__v16hi) __W,
-							 (__mmask16) __U);
+  return (__m128i) __builtin_ia32_cvtbf162iubs128_mask ((__v8bf) __A,
+							(__v8hi) __W,
+							(__mmask8) __U);
 }
 
-extern __inline __m256i
+extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_ipcvtnebf16_epi16 (__mmask16 __U, __m256bh __A)
+_mm_maskz_ipcvts_bf16_epu8 (__mmask8 __U, __m128bh __A)
 {
   return
-    (__m256i) __builtin_ia32_cvtnebf162ibs256_mask ((__v16bf) __A,
-						    (__v16hi)
-						    _mm256_setzero_si256 (),
-						    (__mmask16) __U);
+    (__m128i) __builtin_ia32_cvtbf162iubs128_mask ((__v8bf) __A,
+						   (__v8hi)
+						   _mm_setzero_si128 (),
+						   (__mmask8) __U);
 }
 
-extern __inline __m128i
+extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_ipcvtnebf16_epu16 (__m128bh __A)
+_mm256_ipcvts_bf16_epi8 (__m256bh __A)
 {
   return
-    (__m128i) __builtin_ia32_cvtnebf162iubs128_mask ((__v8bf) __A,
-						     (__v8hi)
-						     _mm_undefined_si128 (),
-						     (__mmask8) -1);
+    (__m256i) __builtin_ia32_cvtbf162ibs256_mask ((__v16bf) __A,
+						  (__v16hi)
+						  _mm256_undefined_si256 (),
+						  (__mmask16) -1);
 }
 
-extern __inline __m128i
+extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_ipcvtnebf16_epu16 (__m128i __W, __mmask8 __U, __m128bh __A)
+_mm256_mask_ipcvts_bf16_epi8 (__m256i __W, __mmask16 __U, __m256bh __A)
 {
-  return (__m128i) __builtin_ia32_cvtnebf162iubs128_mask ((__v8bf) __A,
-							  (__v8hi) __W,
-							  (__mmask8) __U);
+  return (__m256i) __builtin_ia32_cvtbf162ibs256_mask ((__v16bf) __A,
+						       (__v16hi) __W,
+						       (__mmask16) __U);
 }
 
-extern __inline __m128i
+extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_ipcvtnebf16_epu16 (__mmask8 __U, __m128bh __A)
+_mm256_maskz_ipcvts_bf16_epi8 (__mmask16 __U, __m256bh __A)
 {
   return
-    (__m128i) __builtin_ia32_cvtnebf162iubs128_mask ((__v8bf) __A,
-						     (__v8hi)
-						     _mm_setzero_si128 (),
-						     (__mmask8) __U);
+    (__m256i) __builtin_ia32_cvtbf162ibs256_mask ((__v16bf) __A,
+						  (__v16hi)
+						  _mm256_setzero_si256 (),
+						  (__mmask16) __U);
 }
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_ipcvtnebf16_epu16 (__m256bh __A)
+_mm256_ipcvts_bf16_epu8 (__m256bh __A)
 {
   return
-    (__m256i) __builtin_ia32_cvtnebf162iubs256_mask ((__v16bf) __A,
-						     (__v16hi)
-						     _mm256_undefined_si256 (),
-						     (__mmask16) -1);
+    (__m256i) __builtin_ia32_cvtbf162iubs256_mask ((__v16bf) __A,
+						   (__v16hi)
+						   _mm256_undefined_si256 (),
+						   (__mmask16) -1);
 }
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_ipcvtnebf16_epu16 (__m256i __W, __mmask16 __U, __m256bh __A)
+_mm256_mask_ipcvts_bf16_epu8 (__m256i __W, __mmask16 __U, __m256bh __A)
 {
-  return (__m256i) __builtin_ia32_cvtnebf162iubs256_mask ((__v16bf) __A,
-							  (__v16hi) __W,
-							  (__mmask16) __U);
+  return (__m256i) __builtin_ia32_cvtbf162iubs256_mask ((__v16bf) __A,
+							(__v16hi) __W,
+							(__mmask16) __U);
 }
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_ipcvtnebf16_epu16 (__mmask16 __U, __m256bh __A)
+_mm256_maskz_ipcvts_bf16_epu8 (__mmask16 __U, __m256bh __A)
 {
   return
-    (__m256i) __builtin_ia32_cvtnebf162iubs256_mask ((__v16bf) __A,
-						     (__v16hi)
-						     _mm256_setzero_si256 (),
-						     (__mmask16) __U);
+    (__m256i) __builtin_ia32_cvtbf162iubs256_mask ((__v16bf) __A,
+						   (__v16hi)
+						   _mm256_setzero_si256 (),
+						   (__mmask16) __U);
 }
 
-extern __inline __m128i
+extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_ipcvtph_epi16 (__m128h __A)
+_mm512_ipcvts_bf16_epi8 (__m512bh __A)
 {
-  return (__m128i) __builtin_ia32_cvtph2ibs128_mask ((__v8hf) __A,
-						     (__v8hi)
-						     _mm_undefined_si128 (),
-						     (__mmask8) -1);
+  return
+    (__m512i) __builtin_ia32_cvtbf162ibs512_mask ((__v32bf) __A,
+						  (__v32hi)
+						  _mm512_undefined_si512 (),
+						  (__mmask32) -1);
 }
 
-extern __inline __m128i
+extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_ipcvtph_epi16 (__m128i __W, __mmask8 __U, __m128h __A)
+_mm512_mask_ipcvts_bf16_epi8 (__m512i __W, __mmask32 __U, __m512bh __A)
 {
-  return (__m128i) __builtin_ia32_cvtph2ibs128_mask ((__v8hf) __A,
-						     (__v8hi) __W,
-						     (__mmask8) __U);
+  return (__m512i) __builtin_ia32_cvtbf162ibs512_mask ((__v32bf) __A,
+						       (__v32hi) __W,
+						       (__mmask32) __U);
 }
 
-extern __inline __m128i
+extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_ipcvtph_epi16 (__mmask8 __U, __m128h __A)
+_mm512_maskz_ipcvts_bf16_epi8 (__mmask32 __U, __m512bh __A)
 {
-  return (__m128i) __builtin_ia32_cvtph2ibs128_mask ((__v8hf) __A,
-						     (__v8hi)
-						     _mm_setzero_si128 (),
-						     (__mmask8) __U);
+  return
+    (__m512i) __builtin_ia32_cvtbf162ibs512_mask ((__v32bf) __A,
+						  (__v32hi)
+						  _mm512_setzero_si512 (),
+						  (__mmask32) __U);
 }
 
-extern __inline __m128i
+extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_ipcvtph_epu16 (__m128h __A)
+_mm512_ipcvts_bf16_epu8 (__m512bh __A)
 {
-  return (__m128i) __builtin_ia32_cvtph2iubs128_mask ((__v8hf) __A,
-						      (__v8hi)
-						      _mm_undefined_si128 (),
-						      (__mmask8) -1);
+  return
+    (__m512i) __builtin_ia32_cvtbf162iubs512_mask ((__v32bf) __A,
+						   (__v32hi)
+						   _mm512_undefined_si512 (),
+						   (__mmask32) -1);
 }
 
-extern __inline __m128i
+extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_ipcvtph_epu16 (__m128i __W, __mmask8 __U, __m128h __A)
+_mm512_mask_ipcvts_bf16_epu8 (__m512i __W, __mmask32 __U, __m512bh __A)
 {
-  return (__m128i) __builtin_ia32_cvtph2iubs128_mask ((__v8hf) __A,
-						      (__v8hi) __W,
-						      (__mmask8) __U);
+  return (__m512i) __builtin_ia32_cvtbf162iubs512_mask ((__v32bf) __A,
+							(__v32hi) __W,
+							(__mmask32) __U);
 }
 
-extern __inline __m128i
+extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_ipcvtph_epu16 (__mmask8 __U, __m128h __A)
+_mm512_maskz_ipcvts_bf16_epu8 (__mmask32 __U, __m512bh __A)
 {
-  return (__m128i) __builtin_ia32_cvtph2iubs128_mask ((__v8hf) __A,
-						      (__v8hi)
-						      _mm_setzero_si128 (),
-						      (__mmask8) __U);
+  return
+    (__m512i) __builtin_ia32_cvtbf162iubs512_mask ((__v32bf) __A,
+						   (__v32hi)
+						   _mm512_setzero_si512 (),
+						   (__mmask32) __U);
 }
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_ipcvtps_epi32 (__m128 __A)
+_mm_ipcvtts_bf16_epi8 (__m128bh __A)
 {
-  return (__m128i) __builtin_ia32_cvtps2ibs128_mask ((__v4sf) __A,
-						     (__v4si)
-						     _mm_undefined_si128 (),
-						     (__mmask8) -1);
+  return
+    (__m128i) __builtin_ia32_cvttbf162ibs128_mask ((__v8bf) __A,
+						   (__v8hi)
+						   _mm_undefined_si128 (),
+						   (__mmask8) -1);
 }
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_ipcvtps_epi32 (__m128i __W, __mmask8 __U, __m128 __A)
+_mm_mask_ipcvtts_bf16_epi8 (__m128i __W, __mmask8 __U, __m128bh __A)
 {
-  return (__m128i) __builtin_ia32_cvtps2ibs128_mask ((__v4sf) __A,
-						     (__v4si) __W,
-						     (__mmask8) __U);
+  return (__m128i) __builtin_ia32_cvttbf162ibs128_mask ((__v8bf) __A,
+							(__v8hi) __W,
+							(__mmask8) __U);
 }
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_ipcvtps_epi32 (__mmask8 __U, __m128 __A)
+_mm_maskz_ipcvtts_bf16_epi8 (__mmask8 __U, __m128bh __A)
 {
-  return (__m128i) __builtin_ia32_cvtps2ibs128_mask ((__v4sf) __A,
-						     (__v4si)
-						     _mm_setzero_si128 (),
-						     (__mmask8) __U);
+  return (__m128i) __builtin_ia32_cvttbf162ibs128_mask ((__v8bf) __A,
+							(__v8hi)
+							_mm_setzero_si128 (),
+							(__mmask8) __U);
 }
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_ipcvtps_epu32 (__m128 __A)
+_mm_ipcvtts_bf16_epu8 (__m128bh __A)
 {
-  return (__m128i) __builtin_ia32_cvtps2iubs128_mask ((__v4sf) __A,
-						      (__v4si)
-						      _mm_undefined_si128 (),
-						      (__mmask8) -1);
+  return
+    (__m128i) __builtin_ia32_cvttbf162iubs128_mask ((__v8bf) __A,
+						    (__v8hi)
+						    _mm_undefined_si128 (),
+						    (__mmask8) -1);
 }
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_ipcvtps_epu32 (__m128i __W, __mmask8 __U, __m128 __A)
+_mm_mask_ipcvtts_bf16_epu8 (__m128i __W, __mmask8 __U, __m128bh __A)
 {
-  return (__m128i) __builtin_ia32_cvtps2iubs128_mask ((__v4sf) __A,
-						      (__v4si) __W,
-						      (__mmask8) __U);
+  return (__m128i) __builtin_ia32_cvttbf162iubs128_mask ((__v8bf) __A,
+							 (__v8hi) __W,
+							 (__mmask8) __U);
 }
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_ipcvtps_epu32 (__mmask8 __U, __m128 __A)
+_mm_maskz_ipcvtts_bf16_epu8 (__mmask8 __U, __m128bh __A)
 {
-  return (__m128i) __builtin_ia32_cvtps2iubs128_mask ((__v4sf) __A,
-						      (__v4si)
-						      _mm_setzero_si128 (),
-						      (__mmask8) __U);
+  return (__m128i) __builtin_ia32_cvttbf162iubs128_mask ((__v8bf) __A,
+							 (__v8hi)
+							 _mm_setzero_si128 (),
+							 (__mmask8) __U);
 }
 
-extern __inline __m128i
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_ipcvtts_bf16_epi8 (__m256bh __A)
+{
+  return (__m256i)
+    __builtin_ia32_cvttbf162ibs256_mask ((__v16bf) __A,
+					 (__v16hi) _mm256_undefined_si256 (),
+					 (__mmask16) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_ipcvtts_bf16_epi8 (__m256i __W, __mmask16 __U, __m256bh __A)
+{
+  return (__m256i) __builtin_ia32_cvttbf162ibs256_mask ((__v16bf) __A,
+							(__v16hi) __W,
+							(__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_ipcvtts_bf16_epi8 (__mmask16 __U, __m256bh __A)
+{
+  return (__m256i)
+    __builtin_ia32_cvttbf162ibs256_mask ((__v16bf) __A,
+					 (__v16hi) _mm256_setzero_si256 (),
+					 (__mmask16) __U);
+}
+
+extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_ipcvttnebf16_epi16 (__m128bh __A)
+_mm256_ipcvtts_bf16_epu8 (__m256bh __A)
+{
+  return (__m256i)
+    __builtin_ia32_cvttbf162iubs256_mask ((__v16bf) __A,
+					  (__v16hi) _mm256_undefined_si256 (),
+					  (__mmask16) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_ipcvtts_bf16_epu8 (__m256i __W, __mmask16 __U, __m256bh __A)
+{
+  return (__m256i) __builtin_ia32_cvttbf162iubs256_mask ((__v16bf) __A,
+							 (__v16hi) __W,
+							 (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_ipcvtts_bf16_epu8 (__mmask16 __U, __m256bh __A)
+{
+  return (__m256i)
+    __builtin_ia32_cvttbf162iubs256_mask ((__v16bf) __A,
+					  (__v16hi) _mm256_setzero_si256 (),
+					  (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvtts_bf16_epi8 (__m512bh __A)
 {
   return
-    (__m128i) __builtin_ia32_cvttnebf162ibs128_mask ((__v8bf) __A,
+    (__m512i) __builtin_ia32_cvttbf162ibs512_mask ((__v32bf) __A,
+						   (__v32hi)
+						   _mm512_undefined_si512 (),
+						   (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvtts_bf16_epi8 (__m512i __W, __mmask32 __U, __m512bh __A)
+{
+  return (__m512i) __builtin_ia32_cvttbf162ibs512_mask ((__v32bf) __A,
+							(__v32hi) __W,
+							(__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvtts_bf16_epi8 (__mmask32 __U, __m512bh __A)
+{
+  return
+    (__m512i) __builtin_ia32_cvttbf162ibs512_mask ((__v32bf) __A,
+						   (__v32hi)
+						   _mm512_setzero_si512 (),
+						   (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvtts_bf16_epu8 (__m512bh __A)
+{
+  return (__m512i)
+    __builtin_ia32_cvttbf162iubs512_mask ((__v32bf) __A,
+					  (__v32hi) _mm512_undefined_si512 (),
+					  (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvtts_bf16_epu8 (__m512i __W, __mmask32 __U, __m512bh __A)
+{
+  return (__m512i) __builtin_ia32_cvttbf162iubs512_mask ((__v32bf) __A,
+							 (__v32hi) __W,
+							 (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvtts_bf16_epu8 (__mmask32 __U, __m512bh __A)
+{
+  return (__m512i)
+    __builtin_ia32_cvttbf162iubs512_mask ((__v32bf) __A,
+					  (__v32hi)
+					  _mm512_setzero_si512 (),
+					  (__mmask32) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_ipcvts_ph_epi8 (__m128h __A)
+{
+  return (__m128i) __builtin_ia32_cvtph2ibs128_mask ((__v8hf) __A,
 						     (__v8hi)
 						     _mm_undefined_si128 (),
 						     (__mmask8) -1);
@@ -285,29 +409,28 @@ _mm_ipcvttnebf16_epi16 (__m128bh __A)
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_ipcvttnebf16_epi16 (__m128i __W, __mmask8 __U, __m128bh __A)
+_mm_mask_ipcvts_ph_epi8 (__m128i __W, __mmask8 __U, __m128h __A)
 {
-  return (__m128i) __builtin_ia32_cvttnebf162ibs128_mask ((__v8bf) __A,
-							  (__v8hi) __W,
-							  (__mmask8) __U);
+  return (__m128i) __builtin_ia32_cvtph2ibs128_mask ((__v8hf) __A,
+						     (__v8hi) __W,
+						     (__mmask8) __U);
 }
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_ipcvttnebf16_epi16 (__mmask8 __U, __m128bh __A)
+_mm_maskz_ipcvts_ph_epi8 (__mmask8 __U, __m128h __A)
 {
-  return (__m128i) __builtin_ia32_cvttnebf162ibs128_mask ((__v8bf) __A,
-							  (__v8hi)
-							  _mm_setzero_si128 (),
-							  (__mmask8) __U);
+  return (__m128i) __builtin_ia32_cvtph2ibs128_mask ((__v8hf) __A,
+						     (__v8hi)
+						     _mm_setzero_si128 (),
+						     (__mmask8) __U);
 }
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_ipcvttnebf16_epu16 (__m128bh __A)
+_mm_ipcvts_ph_epu8 (__m128h __A)
 {
-  return
-    (__m128i) __builtin_ia32_cvttnebf162iubs128_mask ((__v8bf) __A,
+  return (__m128i) __builtin_ia32_cvtph2iubs128_mask ((__v8hf) __A,
 						      (__v8hi)
 						      _mm_undefined_si128 (),
 						      (__mmask8) -1);
@@ -315,88 +438,84 @@ _mm_ipcvttnebf16_epu16 (__m128bh __A)
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_ipcvttnebf16_epu16 (__m128i __W, __mmask8 __U, __m128bh __A)
+_mm_mask_ipcvts_ph_epu8 (__m128i __W, __mmask8 __U, __m128h __A)
 {
-  return (__m128i) __builtin_ia32_cvttnebf162iubs128_mask ((__v8bf) __A,
-							   (__v8hi) __W,
-							   (__mmask8) __U);
+  return (__m128i) __builtin_ia32_cvtph2iubs128_mask ((__v8hf) __A,
+						      (__v8hi) __W,
+						      (__mmask8) __U);
 }
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_ipcvttnebf16_epu16 (__mmask8 __U, __m128bh __A)
+_mm_maskz_ipcvts_ph_epu8 (__mmask8 __U, __m128h __A)
 {
-  return (__m128i) __builtin_ia32_cvttnebf162iubs128_mask ((__v8bf) __A,
-							   (__v8hi)
-							   _mm_setzero_si128 (),
-							   (__mmask8) __U);
+  return (__m128i) __builtin_ia32_cvtph2iubs128_mask ((__v8hf) __A,
+						      (__v8hi)
+						      _mm_setzero_si128 (),
+						      (__mmask8) __U);
 }
 
-extern __inline __m256i
+extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_ipcvttnebf16_epi16 (__m256bh __A)
+_mm_ipcvts_ps_epi8 (__m128 __A)
 {
-  return (__m256i)
-    __builtin_ia32_cvttnebf162ibs256_mask ((__v16bf) __A,
-					   (__v16hi)
-					   _mm256_undefined_si256 (),
-					   (__mmask16) -1);
+  return (__m128i) __builtin_ia32_cvtps2ibs128_mask ((__v4sf) __A,
+						     (__v4si)
+						     _mm_undefined_si128 (),
+						     (__mmask8) -1);
 }
 
-extern __inline __m256i
+extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_ipcvttnebf16_epi16 (__m256i __W, __mmask16 __U, __m256bh __A)
+_mm_mask_ipcvts_ps_epi8 (__m128i __W, __mmask8 __U, __m128 __A)
 {
-  return (__m256i) __builtin_ia32_cvttnebf162ibs256_mask ((__v16bf) __A,
-							  (__v16hi) __W,
-							  (__mmask16) __U);
+  return (__m128i) __builtin_ia32_cvtps2ibs128_mask ((__v4sf) __A,
+						     (__v4si) __W,
+						     (__mmask8) __U);
 }
 
-extern __inline __m256i
+extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_ipcvttnebf16_epi16 (__mmask16 __U, __m256bh __A)
+_mm_maskz_ipcvts_ps_epi8 (__mmask8 __U, __m128 __A)
 {
-  return (__m256i)
-    __builtin_ia32_cvttnebf162ibs256_mask ((__v16bf) __A,
-					   (__v16hi)
-					   _mm256_setzero_si256 (),
-					   (__mmask16) __U);
+  return (__m128i) __builtin_ia32_cvtps2ibs128_mask ((__v4sf) __A,
+						     (__v4si)
+						     _mm_setzero_si128 (),
+						     (__mmask8) __U);
 }
 
-extern __inline __m256i
+extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_ipcvttnebf16_epu16 (__m256bh __A)
+_mm_ipcvts_ps_epu8 (__m128 __A)
 {
-  return (__m256i)
-    __builtin_ia32_cvttnebf162iubs256_mask ((__v16bf) __A,
-					    (__v16hi)
-					    _mm256_undefined_si256 (),
-					    (__mmask16) -1);
+  return (__m128i) __builtin_ia32_cvtps2iubs128_mask ((__v4sf) __A,
+						      (__v4si)
+						      _mm_undefined_si128 (),
+						      (__mmask8) -1);
 }
 
-extern __inline __m256i
+extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_ipcvttnebf16_epu16 (__m256i __W, __mmask16 __U, __m256bh __A)
+_mm_mask_ipcvts_ps_epu8 (__m128i __W, __mmask8 __U, __m128 __A)
 {
-  return (__m256i) __builtin_ia32_cvttnebf162iubs256_mask ((__v16bf) __A,
-							   (__v16hi) __W,
-							   (__mmask16) __U);
+  return (__m128i) __builtin_ia32_cvtps2iubs128_mask ((__v4sf) __A,
+						      (__v4si) __W,
+						      (__mmask8) __U);
 }
 
-extern __inline __m256i
+extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_ipcvttnebf16_epu16 (__mmask16 __U, __m256bh __A)
+_mm_maskz_ipcvts_ps_epu8 (__mmask8 __U, __m128 __A)
 {
-  return (__m256i)
-    __builtin_ia32_cvttnebf162iubs256_mask ((__v16bf) __A,
-					    (__v16hi)
-					    _mm256_setzero_si256 (),
-					    (__mmask16) __U);
+  return (__m128i) __builtin_ia32_cvtps2iubs128_mask ((__v4sf) __A,
+						      (__v4si)
+						      _mm_setzero_si128 (),
+						      (__mmask8) __U);
 }
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_ipcvttph_epi16 (__m128h __A)
+_mm_ipcvtts_ph_epi8 (__m128h __A)
 {
   return (__m128i) __builtin_ia32_cvttph2ibs128_mask ((__v8hf) __A,
 						      (__v8hi)
@@ -406,7 +525,7 @@ _mm_ipcvttph_epi16 (__m128h __A)
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_ipcvttph_epi16 (__m128i __W, __mmask8 __U, __m128h __A)
+_mm_mask_ipcvtts_ph_epi8 (__m128i __W, __mmask8 __U, __m128h __A)
 {
   return (__m128i) __builtin_ia32_cvttph2ibs128_mask ((__v8hf) __A,
 						      (__v8hi) __W,
@@ -415,7 +534,7 @@ _mm_mask_ipcvttph_epi16 (__m128i __W, __mmask8 __U, __m128h __A)
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_ipcvttph_epi16 (__mmask8 __U, __m128h __A)
+_mm_maskz_ipcvtts_ph_epi8 (__mmask8 __U, __m128h __A)
 {
   return (__m128i) __builtin_ia32_cvttph2ibs128_mask ((__v8hf) __A,
 						      (__v8hi)
@@ -425,7 +544,7 @@ _mm_maskz_ipcvttph_epi16 (__mmask8 __U, __m128h __A)
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_ipcvttph_epu16 (__m128h __A)
+_mm_ipcvtts_ph_epu8 (__m128h __A)
 {
   return (__m128i) __builtin_ia32_cvttph2iubs128_mask ((__v8hf) __A,
 						       (__v8hi)
@@ -435,7 +554,7 @@ _mm_ipcvttph_epu16 (__m128h __A)
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_ipcvttph_epu16 (__m128i __W, __mmask8 __U, __m128h __A)
+_mm_mask_ipcvtts_ph_epu8 (__m128i __W, __mmask8 __U, __m128h __A)
 {
   return (__m128i) __builtin_ia32_cvttph2iubs128_mask ((__v8hf) __A,
 						       (__v8hi) __W,
@@ -444,7 +563,7 @@ _mm_mask_ipcvttph_epu16 (__m128i __W, __mmask8 __U, __m128h __A)
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_ipcvttph_epu16 (__mmask8 __U, __m128h __A)
+_mm_maskz_ipcvtts_ph_epu8 (__mmask8 __U, __m128h __A)
 {
   return (__m128i) __builtin_ia32_cvttph2iubs128_mask ((__v8hf) __A,
 						       (__v8hi)
@@ -454,7 +573,7 @@ _mm_maskz_ipcvttph_epu16 (__mmask8 __U, __m128h __A)
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_ipcvttps_epi32 (__m128 __A)
+_mm_ipcvtts_ps_epi8 (__m128 __A)
 {
   return (__m128i) __builtin_ia32_cvttps2ibs128_mask ((__v4sf) __A,
 						      (__v4si)
@@ -464,7 +583,7 @@ _mm_ipcvttps_epi32 (__m128 __A)
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_ipcvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A)
+_mm_mask_ipcvtts_ps_epi8 (__m128i __W, __mmask8 __U, __m128 __A)
 {
   return (__m128i) __builtin_ia32_cvttps2ibs128_mask ((__v4sf) __A,
 						      (__v4si) __W,
@@ -473,7 +592,7 @@ _mm_mask_ipcvttps_epi32 (__m128i __W, __mmask8 __U, __m128 __A)
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_ipcvttps_epi32 (__mmask8 __U, __m128 __A)
+_mm_maskz_ipcvtts_ps_epi8 (__mmask8 __U, __m128 __A)
 {
   return (__m128i) __builtin_ia32_cvttps2ibs128_mask ((__v4sf) __A,
 						      (__v4si)
@@ -483,7 +602,7 @@ _mm_maskz_ipcvttps_epi32 (__mmask8 __U, __m128 __A)
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_ipcvttps_epu32 (__m128 __A)
+_mm_ipcvtts_ps_epu8 (__m128 __A)
 {
   return (__m128i) __builtin_ia32_cvttps2iubs128_mask ((__v4sf) __A,
 						       (__v4si)
@@ -493,7 +612,7 @@ _mm_ipcvttps_epu32 (__m128 __A)
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_ipcvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A)
+_mm_mask_ipcvtts_ps_epu8 (__m128i __W, __mmask8 __U, __m128 __A)
 {
   return (__m128i) __builtin_ia32_cvttps2iubs128_mask ((__v4sf) __A,
 						       (__v4si) __W,
@@ -502,7 +621,7 @@ _mm_mask_ipcvttps_epu32 (__m128i __W, __mmask8 __U, __m128 __A)
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_ipcvttps_epu32 (__mmask8 __U, __m128 __A)
+_mm_maskz_ipcvtts_ps_epu8 (__mmask8 __U, __m128 __A)
 {
   return (__m128i) __builtin_ia32_cvttps2iubs128_mask ((__v4sf) __A,
 						       (__v4si)
@@ -512,7 +631,7 @@ _mm_maskz_ipcvttps_epu32 (__mmask8 __U, __m128 __A)
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvttspd_epi32 (__m128d __A)
+_mm_cvtts_pd_epi32 (__m128d __A)
 {
   return (__m128i) __builtin_ia32_cvttpd2dqs128_mask ((__v2df) __A,
 						      (__v4si)
@@ -522,7 +641,7 @@ _mm_cvttspd_epi32 (__m128d __A)
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvttspd_epi32 (__m128i __W, __mmask8 __U, __m128d __A)
+_mm_mask_cvtts_pd_epi32 (__m128i __W, __mmask8 __U, __m128d __A)
 {
   return (__m128i) __builtin_ia32_cvttpd2dqs128_mask ((__v2df) __A,
 						      (__v4si) __W,
@@ -531,7 +650,7 @@ _mm_mask_cvttspd_epi32 (__m128i __W, __mmask8 __U, __m128d __A)
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvttspd_epi32 (__mmask8 __U, __m128d __A)
+_mm_maskz_cvtts_pd_epi32 (__mmask8 __U, __m128d __A)
 {
   return (__m128i) __builtin_ia32_cvttpd2dqs128_mask ((__v2df) __A,
 						      (__v4si)
@@ -541,7 +660,7 @@ _mm_maskz_cvttspd_epi32 (__mmask8 __U, __m128d __A)
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvttspd_epi64 (__m128d __A)
+_mm_cvtts_pd_epi64 (__m128d __A)
 {
   return (__m128i) __builtin_ia32_cvttpd2qqs128_mask ((__v2df) __A,
 						      (__v2di)
@@ -551,7 +670,7 @@ _mm_cvttspd_epi64 (__m128d __A)
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvttspd_epi64 (__m128i __W, __mmask8 __U, __m128d __A)
+_mm_mask_cvtts_pd_epi64 (__m128i __W, __mmask8 __U, __m128d __A)
 {
   return (__m128i) __builtin_ia32_cvttpd2qqs128_mask ((__v2df) __A,
 						      (__v2di) __W,
@@ -560,7 +679,7 @@ _mm_mask_cvttspd_epi64 (__m128i __W, __mmask8 __U, __m128d __A)
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvttspd_epi64 (__mmask8 __U, __m128d __A)
+_mm_maskz_cvtts_pd_epi64 (__mmask8 __U, __m128d __A)
 {
   return (__m128i) __builtin_ia32_cvttpd2qqs128_mask ((__v2df) __A,
 						      (__v2di)
@@ -570,7 +689,7 @@ _mm_maskz_cvttspd_epi64 (__mmask8 __U, __m128d __A)
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvttspd_epu32 (__m128d __A)
+_mm_cvtts_pd_epu32 (__m128d __A)
 {
   return (__m128i) __builtin_ia32_cvttpd2udqs128_mask ((__v2df) __A,
 						       (__v4si)
@@ -580,7 +699,7 @@ _mm_cvttspd_epu32 (__m128d __A)
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvttspd_epu32 (__m128i __W, __mmask8 __U, __m128d __A)
+_mm_mask_cvtts_pd_epu32 (__m128i __W, __mmask8 __U, __m128d __A)
 {
   return (__m128i) __builtin_ia32_cvttpd2udqs128_mask ((__v2df) __A,
 						       (__v4si) __W,
@@ -589,7 +708,7 @@ _mm_mask_cvttspd_epu32 (__m128i __W, __mmask8 __U, __m128d __A)
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvttspd_epu32 (__mmask8 __U, __m128d __A)
+_mm_maskz_cvtts_pd_epu32 (__mmask8 __U, __m128d __A)
 {
   return (__m128i) __builtin_ia32_cvttpd2udqs128_mask ((__v2df) __A,
 						       (__v4si)
@@ -599,7 +718,7 @@ _mm_maskz_cvttspd_epu32 (__mmask8 __U, __m128d __A)
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvttspd_epu64 (__m128d __A)
+_mm_cvtts_pd_epu64 (__m128d __A)
 {
   return (__m128i) __builtin_ia32_cvttpd2uqqs128_mask ((__v2df) __A,
 						       (__v2di)
@@ -609,7 +728,7 @@ _mm_cvttspd_epu64 (__m128d __A)
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvttspd_epu64 (__m128i __W, __mmask8 __U, __m128d __A)
+_mm_mask_cvtts_pd_epu64 (__m128i __W, __mmask8 __U, __m128d __A)
 {
   return (__m128i) __builtin_ia32_cvttpd2uqqs128_mask ((__v2df) __A,
 						       (__v2di) __W,
@@ -618,7 +737,7 @@ _mm_mask_cvttspd_epu64 (__m128i __W, __mmask8 __U, __m128d __A)
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvttspd_epu64 (__mmask8 __U, __m128d __A)
+_mm_maskz_cvtts_pd_epu64 (__mmask8 __U, __m128d __A)
 {
   return (__m128i) __builtin_ia32_cvttpd2uqqs128_mask ((__v2df) __A,
 						       (__v2di)
@@ -628,7 +747,7 @@ _mm_maskz_cvttspd_epu64 (__mmask8 __U, __m128d __A)
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvttsps_epi32 (__m128 __A)
+_mm_cvtts_ps_epi32 (__m128 __A)
 {
   return (__m128i) __builtin_ia32_cvttps2dqs128_mask ((__v4sf) __A,
 						      (__v4si)
@@ -638,7 +757,7 @@ _mm_cvttsps_epi32 (__m128 __A)
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvttsps_epi32 (__m128i __W, __mmask8 __U, __m128 __A)
+_mm_mask_cvtts_ps_epi32 (__m128i __W, __mmask8 __U, __m128 __A)
 {
   return (__m128i) __builtin_ia32_cvttps2dqs128_mask ((__v4sf) __A,
 						      (__v4si) __W,
@@ -647,7 +766,7 @@ _mm_mask_cvttsps_epi32 (__m128i __W, __mmask8 __U, __m128 __A)
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvttsps_epi32 (__mmask8 __U, __m128 __A)
+_mm_maskz_cvtts_ps_epi32 (__mmask8 __U, __m128 __A)
 {
   return (__m128i) __builtin_ia32_cvttps2dqs128_mask ((__v4sf) __A,
 						      (__v4si)
@@ -657,7 +776,7 @@ _mm_maskz_cvttsps_epi32 (__mmask8 __U, __m128 __A)
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvttsps_epi64 (__m128 __A)
+_mm_cvtts_ps_epi64 (__m128 __A)
 {
   return (__m128i) __builtin_ia32_cvttps2qqs128_mask ((__v4sf) __A,
 						      (__v2di)
@@ -667,7 +786,7 @@ _mm_cvttsps_epi64 (__m128 __A)
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvttsps_epi64 (__m128i __W, __mmask8 __U, __m128 __A)
+_mm_mask_cvtts_ps_epi64 (__m128i __W, __mmask8 __U, __m128 __A)
 {
   return (__m128i) __builtin_ia32_cvttps2qqs128_mask ((__v4sf) __A,
 						      (__v2di) __W,
@@ -676,7 +795,7 @@ _mm_mask_cvttsps_epi64 (__m128i __W, __mmask8 __U, __m128 __A)
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvttsps_epi64 (__mmask8 __U, __m128 __A)
+_mm_maskz_cvtts_ps_epi64 (__mmask8 __U, __m128 __A)
 {
   return (__m128i) __builtin_ia32_cvttps2qqs128_mask ((__v4sf) __A,
 						      (__v2di)
@@ -686,7 +805,7 @@ _mm_maskz_cvttsps_epi64 (__mmask8 __U, __m128 __A)
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvttsps_epu32 (__m128 __A)
+_mm_cvtts_ps_epu32 (__m128 __A)
 {
   return (__m128i) __builtin_ia32_cvttps2udqs128_mask ((__v4sf) __A,
 						       (__v4si)
@@ -696,7 +815,7 @@ _mm_cvttsps_epu32 (__m128 __A)
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvttsps_epu32 (__m128i __W, __mmask8 __U, __m128 __A)
+_mm_mask_cvtts_ps_epu32 (__m128i __W, __mmask8 __U, __m128 __A)
 {
   return (__m128i) __builtin_ia32_cvttps2udqs128_mask ((__v4sf) __A,
 						       (__v4si) __W,
@@ -705,7 +824,7 @@ _mm_mask_cvttsps_epu32 (__m128i __W, __mmask8 __U, __m128 __A)
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvttsps_epu32 (__mmask8 __U, __m128 __A)
+_mm_maskz_cvtts_ps_epu32 (__mmask8 __U, __m128 __A)
 {
   return (__m128i) __builtin_ia32_cvttps2udqs128_mask ((__v4sf) __A,
 						       (__v4si)
@@ -715,7 +834,7 @@ _mm_maskz_cvttsps_epu32 (__mmask8 __U, __m128 __A)
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvttsps_epu64 (__m128 __A)
+_mm_cvtts_ps_epu64 (__m128 __A)
 {
   return (__m128i) __builtin_ia32_cvttps2uqqs128_mask ((__v4sf) __A,
 						       (__v2di)
@@ -725,7 +844,7 @@ _mm_cvttsps_epu64 (__m128 __A)
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_mask_cvttsps_epu64 (__m128i __W, __mmask8 __U, __m128 __A)
+_mm_mask_cvtts_ps_epu64 (__m128i __W, __mmask8 __U, __m128 __A)
 {
   return (__m128i) __builtin_ia32_cvttps2uqqs128_mask ((__v4sf) __A,
 						       (__v2di) __W,
@@ -734,7 +853,7 @@ _mm_mask_cvttsps_epu64 (__m128i __W, __mmask8 __U, __m128 __A)
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_maskz_cvttsps_epu64 (__mmask8 __U, __m128 __A)
+_mm_maskz_cvtts_ps_epu64 (__mmask8 __U, __m128 __A)
 {
   return (__m128i) __builtin_ia32_cvttps2uqqs128_mask ((__v4sf) __A,
 						       (__v2di)
@@ -742,433 +861,1285 @@ _mm_maskz_cvttsps_epu64 (__mmask8 __U, __m128 __A)
 						       (__mmask8) __U);
 }
 
-#ifdef __OPTIMIZE__
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_ipcvt_roundph_epi16 (__m256h __A, const int __R)
+_mm256_ipcvts_ph_epi8 (__m256h __A)
 {
   return
-    (__m256i) __builtin_ia32_cvtph2ibs256_mask_round ((__v16hf) __A,
-						      (__v16hi)
-						      _mm256_undefined_si256 (),
-						      (__mmask16) -1,
-						      __R);
+    (__m256i) __builtin_ia32_cvtph2ibs256_mask ((__v16hf) __A,
+						(__v16hi)
+						_mm256_undefined_si256 (),
+						(__mmask16) -1);
 }
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_ipcvt_roundph_epi16 (__m256i __W, __mmask16 __U, __m256h __A,
-			     const int __R)
+_mm256_mask_ipcvts_ph_epi8 (__m256i __W, __mmask16 __U, __m256h __A)
 {
-  return (__m256i) __builtin_ia32_cvtph2ibs256_mask_round ((__v16hf) __A,
-							   (__v16hi) __W,
-							   (__mmask16) __U,
-							   __R);
+  return (__m256i) __builtin_ia32_cvtph2ibs256_mask ((__v16hf) __A,
+						     (__v16hi) __W,
+						     (__mmask16) __U);
 }
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_ipcvt_roundph_epi16 (__mmask16 __U, __m256h __A, const int __R)
+_mm256_maskz_ipcvts_ph_epi8 (__mmask16 __U, __m256h __A)
 {
   return
-    (__m256i) __builtin_ia32_cvtph2ibs256_mask_round ((__v16hf) __A,
-						      (__v16hi)
-						      _mm256_setzero_si256 (),
-						      (__mmask16) __U,
-						      __R);
+    (__m256i) __builtin_ia32_cvtph2ibs256_mask ((__v16hf) __A,
+						(__v16hi)
+						_mm256_setzero_si256 (),
+						(__mmask16) __U);
 }
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_ipcvt_roundph_epu16 (__m256h __A, const int __R)
+_mm256_ipcvts_ph_epu8 (__m256h __A)
 {
   return (__m256i)
-    __builtin_ia32_cvtph2iubs256_mask_round ((__v16hf) __A,
-					     (__v16hi)
-					     _mm256_undefined_si256 (),
-					     (__mmask16) -1,
-					     __R);
+    __builtin_ia32_cvtph2iubs256_mask ((__v16hf) __A,
+				       (__v16hi)
+				       _mm256_undefined_si256 (),
+				       (__mmask16) -1);
 }
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_ipcvt_roundph_epu16 (__m256i __W, __mmask16 __U, __m256h __A,
-			      const int __R)
+_mm256_mask_ipcvts_ph_epu8 (__m256i __W, __mmask16 __U, __m256h __A)
 {
-  return (__m256i) __builtin_ia32_cvtph2iubs256_mask_round ((__v16hf) __A,
-							    (__v16hi) __W,
-							    (__mmask16) __U,
-							    __R);
+  return (__m256i) __builtin_ia32_cvtph2iubs256_mask ((__v16hf) __A,
+						      (__v16hi) __W,
+						      (__mmask16) __U);
 }
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_ipcvt_roundph_epu16 (__mmask16 __U, __m256h __A, const int __R)
+_mm256_maskz_ipcvts_ph_epu8 (__mmask16 __U, __m256h __A)
 {
   return
-    (__m256i) __builtin_ia32_cvtph2iubs256_mask_round ((__v16hf) __A,
-						       (__v16hi)
-						       _mm256_setzero_si256 (),
-						       (__mmask16) __U,
-						       __R);
+    (__m256i) __builtin_ia32_cvtph2iubs256_mask ((__v16hf) __A,
+						 (__v16hi)
+						 _mm256_setzero_si256 (),
+						 (__mmask16) __U);
 }
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_ipcvt_roundps_epi32 (__m256 __A, const int __R)
+_mm256_ipcvts_ps_epi8 (__m256 __A)
 {
   return
-    (__m256i) __builtin_ia32_cvtps2ibs256_mask_round ((__v8sf) __A,
-						      (__v8si)
-						      _mm256_undefined_si256 (),
-						      (__mmask8) -1,
-						      __R);
+    (__m256i) __builtin_ia32_cvtps2ibs256_mask ((__v8sf) __A,
+						(__v8si)
+						_mm256_undefined_si256 (),
+						(__mmask8) -1);
 }
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_ipcvt_roundps_epi32 (__m256i __W, __mmask8 __U, __m256 __A,
-			     const int __R)
+_mm256_mask_ipcvts_ps_epi8 (__m256i __W, __mmask8 __U, __m256 __A)
 {
-  return (__m256i) __builtin_ia32_cvtps2ibs256_mask_round ((__v8sf) __A,
-							   (__v8si) __W,
-							   (__mmask8) __U,
-							   __R);
+  return (__m256i) __builtin_ia32_cvtps2ibs256_mask ((__v8sf) __A,
+						     (__v8si) __W,
+						     (__mmask8) __U);
 }
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_ipcvt_roundps_epi32 (__mmask8 __U, __m256 __A, const int __R)
+_mm256_maskz_ipcvts_ps_epi8 (__mmask8 __U, __m256 __A)
 {
   return
-    (__m256i) __builtin_ia32_cvtps2ibs256_mask_round ((__v8sf) __A,
-						      (__v8si)
-						      _mm256_setzero_si256 (),
-						      (__mmask8) __U,
-						      __R);
+    (__m256i) __builtin_ia32_cvtps2ibs256_mask ((__v8sf) __A,
+						(__v8si)
+						_mm256_setzero_si256 (),
+						(__mmask8) __U);
 }
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_ipcvt_roundps_epu32 (__m256 __A, const int __R)
+_mm256_ipcvts_ps_epu8 (__m256 __A)
 {
   return (__m256i)
-    __builtin_ia32_cvtps2iubs256_mask_round ((__v8sf) __A,
-					     (__v8si)
-					     _mm256_undefined_si256 (),
-					     (__mmask8) -1,
-					     __R);
+    __builtin_ia32_cvtps2iubs256_mask ((__v8sf) __A,
+				       (__v8si)
+				       _mm256_undefined_si256 (),
+				       (__mmask8) -1);
 }
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_ipcvt_roundps_epu32 (__m256i __W, __mmask8 __U, __m256 __A,
-			      const int __R)
+_mm256_mask_ipcvts_ps_epu8 (__m256i __W, __mmask8 __U, __m256 __A)
 {
-  return (__m256i) __builtin_ia32_cvtps2iubs256_mask_round ((__v8sf) __A,
-							    (__v8si) __W,
-							    (__mmask8) __U,
-							    __R);
+  return (__m256i) __builtin_ia32_cvtps2iubs256_mask ((__v8sf) __A,
+						      (__v8si) __W,
+						      (__mmask8) __U);
 }
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_ipcvt_roundps_epu32 (__mmask8 __U, __m256 __A, const int __R)
+_mm256_maskz_ipcvts_ps_epu8 (__mmask8 __U, __m256 __A)
 {
   return
-    (__m256i) __builtin_ia32_cvtps2iubs256_mask_round ((__v8sf) __A,
-						       (__v8si)
-						       _mm256_setzero_si256 (),
-						       (__mmask8) __U,
-						       __R);
+    (__m256i) __builtin_ia32_cvtps2iubs256_mask ((__v8sf) __A,
+						 (__v8si)
+						 _mm256_setzero_si256 (),
+						 (__mmask8) __U);
 }
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_ipcvtt_roundph_epi16 (__m256h __A, const int __R)
+_mm256_ipcvtts_ph_epi8 (__m256h __A)
 {
   return (__m256i)
-    __builtin_ia32_cvttph2ibs256_mask_round ((__v16hf) __A,
-					     (__v16hi)
-					     _mm256_undefined_si256 (),
-					     (__mmask16) -1,
-					     __R);
+    __builtin_ia32_cvttph2ibs256_mask ((__v16hf) __A,
+				       (__v16hi)
+				       _mm256_undefined_si256 (),
+				       (__mmask16) -1);
 }
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_ipcvtt_roundph_epi16 (__m256i __W, __mmask16 __U, __m256h __A,
-			      const int __R)
+_mm256_mask_ipcvtts_ph_epi8 (__m256i __W, __mmask16 __U, __m256h __A)
 {
-  return (__m256i) __builtin_ia32_cvttph2ibs256_mask_round ((__v16hf) __A,
-							    (__v16hi) __W,
-							    (__mmask16) __U,
-							    __R);
+  return (__m256i) __builtin_ia32_cvttph2ibs256_mask ((__v16hf) __A,
+						      (__v16hi) __W,
+						      (__mmask16) __U);
 }
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_ipcvtt_roundph_epi16 (__mmask16 __U, __m256h __A, const int __R)
+_mm256_maskz_ipcvtts_ph_epi8 (__mmask16 __U, __m256h __A)
 {
   return
-    (__m256i) __builtin_ia32_cvttph2ibs256_mask_round ((__v16hf) __A,
-						       (__v16hi)
-						       _mm256_setzero_si256 (),
-						       (__mmask16) __U,
-						       __R);
+    (__m256i) __builtin_ia32_cvttph2ibs256_mask ((__v16hf) __A,
+						 (__v16hi)
+						 _mm256_setzero_si256 (),
+						 (__mmask16) __U);
 }
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_ipcvtt_roundph_epu16 (__m256h __A, const int __R)
+_mm256_ipcvtts_ph_epu8 (__m256h __A)
 {
   return (__m256i)
-    __builtin_ia32_cvttph2iubs256_mask_round ((__v16hf) __A,
-					      (__v16hi)
-					      _mm256_undefined_si256 (),
-					      (__mmask16) -1,
-					      __R);
+    __builtin_ia32_cvttph2iubs256_mask ((__v16hf) __A,
+					(__v16hi)
+					_mm256_undefined_si256 (),
+					(__mmask16) -1);
 }
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_ipcvtt_roundph_epu16 (__m256i __W, __mmask16 __U, __m256h __A,
-			       const int __R)
+_mm256_mask_ipcvtts_ph_epu8 (__m256i __W, __mmask16 __U, __m256h __A)
 {
-  return (__m256i) __builtin_ia32_cvttph2iubs256_mask_round ((__v16hf) __A,
-							     (__v16hi) __W,
-							     (__mmask16) __U,
-							     __R);
+  return (__m256i) __builtin_ia32_cvttph2iubs256_mask ((__v16hf) __A,
+						       (__v16hi) __W,
+						       (__mmask16) __U);
 }
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_ipcvtt_roundph_epu16 (__mmask16 __U, __m256h __A, const int __R)
+_mm256_maskz_ipcvtts_ph_epu8 (__mmask16 __U, __m256h __A)
 {
   return
-    (__m256i) __builtin_ia32_cvttph2iubs256_mask_round ((__v16hf) __A,
-							(__v16hi)
-							_mm256_setzero_si256 (),
-							(__mmask16) __U,
-							__R);
+    (__m256i) __builtin_ia32_cvttph2iubs256_mask ((__v16hf) __A,
+						  (__v16hi)
+						  _mm256_setzero_si256 (),
+						  (__mmask16) __U);
 }
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_ipcvtt_roundps_epi32 (__m256 __A, const int __R)
+_mm256_ipcvtts_ps_epi8 (__m256 __A)
 {
   return (__m256i)
-    __builtin_ia32_cvttps2ibs256_mask_round ((__v8sf) __A,
-					     (__v8si)
-					     _mm256_undefined_si256 (),
-					     (__mmask8) -1,
-					     __R);
+    __builtin_ia32_cvttps2ibs256_mask ((__v8sf) __A,
+				       (__v8si)
+				       _mm256_undefined_si256 (),
+				       (__mmask8) -1);
 }
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_ipcvtt_roundps_epi32 (__m256i __W, __mmask8 __U, __m256 __A,
-			      const int __R)
+_mm256_mask_ipcvtts_ps_epi8 (__m256i __W, __mmask8 __U, __m256 __A)
 {
-  return (__m256i) __builtin_ia32_cvttps2ibs256_mask_round ((__v8sf) __A,
-							    (__v8si) __W,
-							    (__mmask8) __U,
-							    __R);
+  return (__m256i) __builtin_ia32_cvttps2ibs256_mask ((__v8sf) __A,
+						      (__v8si) __W,
+						      (__mmask8) __U);
 }
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_ipcvtt_roundps_epi32 (__mmask8 __U, __m256 __A, const int __R)
+_mm256_maskz_ipcvtts_ps_epi8 (__mmask8 __U, __m256 __A)
 {
   return
-    (__m256i) __builtin_ia32_cvttps2ibs256_mask_round ((__v8sf) __A,
-						       (__v8si)
-						       _mm256_setzero_si256 (),
-						       (__mmask8) __U,
-						       __R);
+    (__m256i) __builtin_ia32_cvttps2ibs256_mask ((__v8sf) __A,
+						 (__v8si)
+						 _mm256_setzero_si256 (),
+						 (__mmask8) __U);
 }
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_ipcvtt_roundps_epu32 (__m256 __A, const int __R)
+_mm256_ipcvtts_ps_epu8 (__m256 __A)
 {
   return (__m256i)
-    __builtin_ia32_cvttps2iubs256_mask_round ((__v8sf) __A,
-					      (__v8si)
-					      _mm256_undefined_si256 (),
-					      (__mmask8) -1,
-					      __R);
+    __builtin_ia32_cvttps2iubs256_mask ((__v8sf) __A,
+					(__v8si)
+					_mm256_undefined_si256 (),
+					(__mmask8) -1);
 }
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_ipcvtt_roundps_epu32 (__m256i __W, __mmask8 __U, __m256 __A,
-			       const int __R)
+_mm256_mask_ipcvtts_ps_epu8 (__m256i __W, __mmask8 __U, __m256 __A)
 {
-  return (__m256i) __builtin_ia32_cvttps2iubs256_mask_round ((__v8sf) __A,
-							     (__v8si) __W,
-							     (__mmask8) __U,
-							     __R);
+  return (__m256i) __builtin_ia32_cvttps2iubs256_mask ((__v8sf) __A,
+						       (__v8si) __W,
+						       (__mmask8) __U);
 }
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_ipcvtt_roundps_epu32 (__mmask8 __U, __m256 __A, const int __R)
+_mm256_maskz_ipcvtts_ps_epu8 (__mmask8 __U, __m256 __A)
 {
   return
-    (__m256i) __builtin_ia32_cvttps2iubs256_mask_round ((__v8sf) __A,
-							(__v8si)
-							_mm256_setzero_si256 (),
-							(__mmask8) __U,
-							__R);
+    (__m256i) __builtin_ia32_cvttps2iubs256_mask ((__v8sf) __A,
+						  (__v8si)
+						  _mm256_setzero_si256 (),
+						  (__mmask8) __U);
 }
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtts_roundpd_epi32 (__m256d __A, const int __R)
+_mm256_cvtts_pd_epi32 (__m256d __A)
 {
   return
-    (__m128i) __builtin_ia32_cvttpd2dqs256_mask_round ((__v4df) __A,
-						       (__v4si)
-						       _mm_undefined_si128 (),
-						       (__mmask8) -1,
-						       __R);
+    (__m128i) __builtin_ia32_cvttpd2dqs256_mask ((__v4df) __A,
+						 (__v4si)
+						 _mm_undefined_si128 (),
+						 (__mmask8) -1);
 }
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtts_roundpd_epi32 (__m128i __W, __mmask8 __U, __m256d __A,
-				const int __R)
+_mm256_mask_cvtts_pd_epi32 (__m128i __W, __mmask8 __U, __m256d __A)
 {
-  return (__m128i) __builtin_ia32_cvttpd2dqs256_mask_round ((__v4df) __A,
-							    (__v4si) __W,
-							    (__mmask8) __U,
-							    __R);
+  return (__m128i) __builtin_ia32_cvttpd2dqs256_mask ((__v4df) __A,
+						      (__v4si) __W,
+						      (__mmask8) __U);
 }
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtts_roundpd_epi32 (__mmask8 __U, __m256d __A, const int __R)
+_mm256_maskz_cvtts_pd_epi32 (__mmask8 __U, __m256d __A)
 {
   return
-    (__m128i) __builtin_ia32_cvttpd2dqs256_mask_round ((__v4df) __A,
-						       (__v4si)
-						       _mm_setzero_si128 (),
-						       (__mmask8) __U,
-						       __R);
+    (__m128i) __builtin_ia32_cvttpd2dqs256_mask ((__v4df) __A,
+						 (__v4si)
+						 _mm_setzero_si128 (),
+						 (__mmask8) __U);
 }
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtts_roundpd_epi64 (__m256d __A, const int __R)
+_mm256_cvtts_pd_epi64 (__m256d __A)
 {
   return (__m256i)
-    __builtin_ia32_cvttpd2qqs256_mask_round ((__v4df) __A,
-					     (__v4di)
-					     _mm256_undefined_si256 (),
-					     (__mmask8) -1,
-					     __R);
+    __builtin_ia32_cvttpd2qqs256_mask ((__v4df) __A,
+				       (__v4di)
+				       _mm256_undefined_si256 (),
+				       (__mmask8) -1);
 }
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtts_roundpd_epi64 (__m256i __W, __mmask8 __U, __m256d __A,
-				const int __R)
+_mm256_mask_cvtts_pd_epi64 (__m256i __W, __mmask8 __U, __m256d __A)
 {
-  return (__m256i) __builtin_ia32_cvttpd2qqs256_mask_round ((__v4df) __A,
-							    (__v4di) __W,
-							    (__mmask8) __U,
-							    __R);
+  return (__m256i) __builtin_ia32_cvttpd2qqs256_mask ((__v4df) __A,
+						      (__v4di) __W,
+						      (__mmask8) __U);
 }
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtts_roundpd_epi64 (__mmask8 __U, __m256d __A, const int __R)
+_mm256_maskz_cvtts_pd_epi64 (__mmask8 __U, __m256d __A)
 {
   return
-    (__m256i) __builtin_ia32_cvttpd2qqs256_mask_round ((__v4df) __A,
-						       (__v4di)
-						       _mm256_setzero_si256 (),
-						       (__mmask8) __U,
-						       __R);
+    (__m256i) __builtin_ia32_cvttpd2qqs256_mask ((__v4df) __A,
+						 (__v4di)
+						 _mm256_setzero_si256 (),
+						 (__mmask8) __U);
 }
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtts_roundpd_epu32 (__m256d __A, const int __R)
+_mm256_cvtts_pd_epu32 (__m256d __A)
 {
   return
-    (__m128i) __builtin_ia32_cvttpd2udqs256_mask_round ((__v4df) __A,
-							(__v4si)
-							_mm_undefined_si128 (),
-							(__mmask8) -1,
-							__R);
+    (__m128i) __builtin_ia32_cvttpd2udqs256_mask ((__v4df) __A,
+						  (__v4si)
+						  _mm_undefined_si128 (),
+						  (__mmask8) -1);
 }
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtts_roundpd_epu32 (__m128i __W, __mmask8 __U, __m256d __A,
-				const int __R)
+_mm256_mask_cvtts_pd_epu32 (__m128i __W, __mmask8 __U, __m256d __A)
 {
-  return (__m128i) __builtin_ia32_cvttpd2udqs256_mask_round ((__v4df) __A,
-							     (__v4si) __W,
-							     (__mmask8) __U,
-							     __R);
+  return (__m128i) __builtin_ia32_cvttpd2udqs256_mask ((__v4df) __A,
+						       (__v4si) __W,
+						       (__mmask8) __U);
 }
 
 extern __inline __m128i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtts_roundpd_epu32 (__mmask8 __U, __m256d __A, const int __R)
+_mm256_maskz_cvtts_pd_epu32 (__mmask8 __U, __m256d __A)
 {
   return
-    (__m128i) __builtin_ia32_cvttpd2udqs256_mask_round ((__v4df) __A,
-							(__v4si)
-							_mm_setzero_si128 (),
-							(__mmask8) __U,
-							__R);
+    (__m128i) __builtin_ia32_cvttpd2udqs256_mask ((__v4df) __A,
+						  (__v4si)
+						  _mm_setzero_si128 (),
+						  (__mmask8) __U);
 }
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtts_roundpd_epu64 (__m256d __A, const int __R)
+_mm256_cvtts_pd_epu64 (__m256d __A)
 {
   return (__m256i)
-    __builtin_ia32_cvttpd2uqqs256_mask_round ((__v4df) __A,
-					      (__v4di)
-					      _mm256_undefined_si256 (),
-					      (__mmask8) -1,
-					      __R);
+    __builtin_ia32_cvttpd2uqqs256_mask ((__v4df) __A,
+					(__v4di)
+					_mm256_undefined_si256 (),
+					(__mmask8) -1);
 }
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtts_roundpd_epu64 (__m256i __W, __mmask8 __U, __m256d __A,
-				const int __R)
+_mm256_mask_cvtts_pd_epu64 (__m256i __W, __mmask8 __U, __m256d __A)
 {
-  return (__m256i) __builtin_ia32_cvttpd2uqqs256_mask_round ((__v4df) __A,
-							     (__v4di) __W,
-							     (__mmask8) __U,
-							     __R);
+  return (__m256i) __builtin_ia32_cvttpd2uqqs256_mask ((__v4df) __A,
+						       (__v4di) __W,
+						       (__mmask8) __U);
 }
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtts_roundpd_epu64 (__mmask8 __U, __m256d __A, const int __R)
+_mm256_maskz_cvtts_pd_epu64 (__mmask8 __U, __m256d __A)
 {
   return
-    (__m256i) __builtin_ia32_cvttpd2uqqs256_mask_round ((__v4df) __A,
-							(__v4di)
-							_mm256_setzero_si256 (),
-							(__mmask8) __U,
-							__R);
+    (__m256i) __builtin_ia32_cvttpd2uqqs256_mask ((__v4df) __A,
+						  (__v4di)
+						  _mm256_setzero_si256 (),
+						  (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtts_ps_epi32 (__m256 __A)
+{
+  return (__m256i)
+    __builtin_ia32_cvttps2dqs256_mask ((__v8sf) __A,
+				       (__v8si)
+				       _mm256_undefined_si256 (),
+				       (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtts_ps_epi32 (__m256i __W, __mmask8 __U, __m256 __A)
+{
+  return (__m256i) __builtin_ia32_cvttps2dqs256_mask ((__v8sf) __A,
+						      (__v8si) __W,
+						      (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtts_ps_epi32 (__mmask8 __U, __m256 __A)
+{
+  return
+    (__m256i) __builtin_ia32_cvttps2dqs256_mask ((__v8sf) __A,
+						 (__v8si)
+						 _mm256_setzero_si256 (),
+						 (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtts_ps_epi64 (__m128 __A)
+{
+  return (__m256i)
+    __builtin_ia32_cvttps2qqs256_mask ((__v4sf) __A,
+				       (__v4di)
+				       _mm256_undefined_si256 (),
+				       (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtts_ps_epi64 (__m256i __W, __mmask8 __U, __m128 __A)
+{
+  return (__m256i) __builtin_ia32_cvttps2qqs256_mask ((__v4sf) __A,
+						      (__v4di) __W,
+						      (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtts_ps_epi64 (__mmask8 __U, __m128 __A)
+{
+  return
+    (__m256i) __builtin_ia32_cvttps2qqs256_mask ((__v4sf) __A,
+						 (__v4di)
+						 _mm256_setzero_si256 (),
+						 (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtts_ps_epu32 (__m256 __A)
+{
+  return (__m256i)
+    __builtin_ia32_cvttps2udqs256_mask ((__v8sf) __A,
+					(__v8si)
+					_mm256_undefined_si256 (),
+					(__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtts_ps_epu32 (__m256i __W, __mmask8 __U, __m256 __A)
+{
+  return (__m256i) __builtin_ia32_cvttps2udqs256_mask ((__v8sf) __A,
+						       (__v8si) __W,
+						       (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtts_ps_epu32 (__mmask8 __U, __m256 __A)
+{
+  return
+    (__m256i) __builtin_ia32_cvttps2udqs256_mask ((__v8sf) __A,
+						  (__v8si)
+						  _mm256_setzero_si256 (),
+						  (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_cvtts_ps_epu64 (__m128 __A)
+{
+  return (__m256i)
+    __builtin_ia32_cvttps2uqqs256_mask ((__v4sf) __A,
+					(__v4di)
+					_mm256_undefined_si256 (),
+					(__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_cvtts_ps_epu64 (__m256i __W, __mmask8 __U, __m128 __A)
+{
+  return (__m256i) __builtin_ia32_cvttps2uqqs256_mask ((__v4sf) __A,
+						       (__v4di) __W,
+						       (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_cvtts_ps_epu64 (__mmask8 __U, __m128 __A)
+{
+  return
+    (__m256i) __builtin_ia32_cvttps2uqqs256_mask ((__v4sf) __A,
+						  (__v4di)
+						  _mm256_setzero_si256 (),
+						  (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvts_ph_epi8 (__m512h __A)
+{
+  return
+    (__m512i) __builtin_ia32_cvtph2ibs512_mask ((__v32hf) __A,
+						(__v32hi)
+						_mm512_undefined_si512 (),
+						(__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvts_ph_epi8 (__m512i __W, __mmask32 __U, __m512h __A)
+{
+  return (__m512i) __builtin_ia32_cvtph2ibs512_mask ((__v32hf) __A,
+						     (__v32hi) __W,
+						     (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvts_ph_epi8 (__mmask32 __U, __m512h __A)
+{
+  return
+    (__m512i) __builtin_ia32_cvtph2ibs512_mask ((__v32hf) __A,
+						(__v32hi)
+						_mm512_setzero_si512 (),
+						(__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvts_ph_epu8 (__m512h __A)
+{
+  return
+    (__m512i) __builtin_ia32_cvtph2iubs512_mask ((__v32hf) __A,
+						 (__v32hi)
+						 _mm512_undefined_si512 (),
+						 (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvts_ph_epu8 (__m512i __W, __mmask32 __U, __m512h __A)
+{
+  return (__m512i) __builtin_ia32_cvtph2iubs512_mask ((__v32hf) __A,
+						      (__v32hi) __W,
+						      (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvts_ph_epu8 (__mmask32 __U, __m512h __A)
+{
+  return
+    (__m512i) __builtin_ia32_cvtph2iubs512_mask ((__v32hf) __A,
+						 (__v32hi)
+						 _mm512_setzero_si512 (),
+						 (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvts_ps_epi8 (__m512 __A)
+{
+  return
+    (__m512i) __builtin_ia32_cvtps2ibs512_mask ((__v16sf) __A,
+						(__v16si)
+						_mm512_undefined_si512 (),
+						(__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvts_ps_epi8 (__m512i __W, __mmask16 __U, __m512 __A)
+{
+  return (__m512i) __builtin_ia32_cvtps2ibs512_mask ((__v16sf) __A,
+						     (__v16si) __W,
+						     (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvts_ps_epi8 (__mmask16 __U, __m512 __A)
+{
+  return
+    (__m512i) __builtin_ia32_cvtps2ibs512_mask ((__v16sf) __A,
+						(__v16si)
+						_mm512_setzero_si512 (),
+						(__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvts_ps_epu8 (__m512 __A)
+{
+  return
+    (__m512i) __builtin_ia32_cvtps2iubs512_mask ((__v16sf) __A,
+						 (__v16si)
+						 _mm512_undefined_si512 (),
+						 (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvts_ps_epu8 (__m512i __W, __mmask16 __U, __m512 __A)
+{
+  return (__m512i) __builtin_ia32_cvtps2iubs512_mask ((__v16sf) __A,
+						      (__v16si) __W,
+						      (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvts_ps_epu8 (__mmask16 __U, __m512 __A)
+{
+  return
+    (__m512i) __builtin_ia32_cvtps2iubs512_mask ((__v16sf) __A,
+						 (__v16si)
+						 _mm512_setzero_si512 (),
+						 (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvtts_ph_epi8 (__m512h __A)
+{
+  return (__m512i)
+    __builtin_ia32_cvttph2ibs512_mask ((__v32hf) __A,
+				       (__v32hi)
+				       _mm512_undefined_si512 (),
+				       (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvtts_ph_epi8 (__m512i __W, __mmask32 __U, __m512h __A)
+{
+  return (__m512i) __builtin_ia32_cvttph2ibs512_mask ((__v32hf) __A,
+						      (__v32hi) __W,
+						      (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvtts_ph_epi8 (__mmask32 __U, __m512h __A)
+{
+  return
+    (__m512i) __builtin_ia32_cvttph2ibs512_mask ((__v32hf) __A,
+						 (__v32hi)
+						 _mm512_setzero_si512 (),
+						 (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvtts_ph_epu8 (__m512h __A)
+{
+  return (__m512i)
+    __builtin_ia32_cvttph2iubs512_mask ((__v32hf) __A,
+					(__v32hi)
+					_mm512_undefined_si512 (),
+					(__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvtts_ph_epu8 (__m512i __W, __mmask32 __U, __m512h __A)
+{
+  return (__m512i) __builtin_ia32_cvttph2iubs512_mask ((__v32hf) __A,
+						       (__v32hi) __W,
+						       (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvtts_ph_epu8 (__mmask32 __U, __m512h __A)
+{
+  return (__m512i)
+    __builtin_ia32_cvttph2iubs512_mask ((__v32hf) __A,
+					(__v32hi)
+					_mm512_setzero_si512 (),
+					(__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvtts_ps_epi8 (__m512 __A)
+{
+  return (__m512i)
+    __builtin_ia32_cvttps2ibs512_mask ((__v16sf) __A,
+				       (__v16si)
+				       _mm512_undefined_si512 (),
+				       (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvtts_ps_epi8 (__m512i __W, __mmask16 __U, __m512 __A)
+{
+  return (__m512i) __builtin_ia32_cvttps2ibs512_mask ((__v16sf) __A,
+						      (__v16si) __W,
+						      (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvtts_ps_epi8 (__mmask16 __U, __m512 __A)
+{
+  return (__m512i)
+    __builtin_ia32_cvttps2ibs512_mask ((__v16sf) __A,
+				       (__v16si)
+				       _mm512_setzero_si512 (),
+				       (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvtts_ps_epu8 (__m512 __A)
+{
+  return (__m512i)
+    __builtin_ia32_cvttps2iubs512_mask ((__v16sf) __A,
+					(__v16si)
+					_mm512_undefined_si512 (),
+					(__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvtts_ps_epu8 (__m512i __W, __mmask16 __U, __m512 __A)
+{
+  return (__m512i) __builtin_ia32_cvttps2iubs512_mask ((__v16sf) __A,
+						       (__v16si) __W,
+						       (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvtts_ps_epu8 (__mmask16 __U, __m512 __A)
+{
+  return (__m512i)
+    __builtin_ia32_cvttps2iubs512_mask ((__v16sf) __A,
+					(__v16si)
+					_mm512_setzero_si512 (),
+					(__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_pd_epi32 (__m512d __A)
+{
+  return (__m256i)
+    __builtin_ia32_cvttpd2dqs512_mask ((__v8df) __A,
+				       (__v8si)
+				       _mm256_undefined_si256 (),
+				       (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_pd_epi32 (__m256i __W, __mmask8 __U, __m512d __A)
+{
+  return (__m256i) __builtin_ia32_cvttpd2dqs512_mask ((__v8df) __A,
+						      (__v8si) __W,
+						      (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_pd_epi32 (__mmask8 __U, __m512d __A)
+{
+  return
+    (__m256i) __builtin_ia32_cvttpd2dqs512_mask ((__v8df) __A,
+						 (__v8si)
+						 _mm256_setzero_si256 (),
+						 (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_pd_epi64 (__m512d __A)
+{
+  return (__m512i)
+    __builtin_ia32_cvttpd2qqs512_mask ((__v8df) __A,
+				       (__v8di)
+				       _mm512_undefined_si512 (),
+				       (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_pd_epi64 (__m512i __W, __mmask8 __U, __m512d __A)
+{
+  return (__m512i) __builtin_ia32_cvttpd2qqs512_mask ((__v8df) __A,
+						      (__v8di) __W,
+						      (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_pd_epi64 (__mmask8 __U, __m512d __A)
+{
+  return
+    (__m512i) __builtin_ia32_cvttpd2qqs512_mask ((__v8df) __A,
+						 (__v8di)
+						 _mm512_setzero_si512 (),
+						 (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_pd_epu32 (__m512d __A)
+{
+  return (__m256i)
+    __builtin_ia32_cvttpd2udqs512_mask ((__v8df) __A,
+					(__v8si)
+					_mm256_undefined_si256 (),
+					(__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_pd_epu32 (__m256i __W, __mmask8 __U, __m512d __A)
+{
+  return (__m256i) __builtin_ia32_cvttpd2udqs512_mask ((__v8df) __A,
+						       (__v8si) __W,
+						       (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_pd_epu32 (__mmask8 __U, __m512d __A)
+{
+  return
+    (__m256i) __builtin_ia32_cvttpd2udqs512_mask ((__v8df) __A,
+						  (__v8si)
+						  _mm256_setzero_si256 (),
+						  (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_pd_epu64 (__m512d __A)
+{
+  return (__m512i)
+    __builtin_ia32_cvttpd2uqqs512_mask ((__v8df) __A,
+					(__v8di)
+					_mm512_undefined_si512 (),
+					(__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_pd_epu64 (__m512i __W, __mmask8 __U, __m512d __A)
+{
+  return (__m512i) __builtin_ia32_cvttpd2uqqs512_mask ((__v8df) __A,
+						       (__v8di) __W,
+						       (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_pd_epu64 (__mmask8 __U, __m512d __A)
+{
+  return (__m512i)
+    __builtin_ia32_cvttpd2uqqs512_mask ((__v8df) __A,
+					(__v8di)
+					_mm512_setzero_si512 (),
+					(__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_ps_epi32 (__m512 __A)
+{
+  return (__m512i)
+    __builtin_ia32_cvttps2dqs512_mask ((__v16sf) __A,
+				       (__v16si)
+				       _mm512_undefined_si512 (),
+				       (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_ps_epi32 (__m512i __W, __mmask16 __U, __m512 __A)
+{
+  return (__m512i) __builtin_ia32_cvttps2dqs512_mask ((__v16sf) __A,
+						      (__v16si) __W,
+						      (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_ps_epi32 (__mmask16 __U, __m512 __A)
+{
+  return
+    (__m512i) __builtin_ia32_cvttps2dqs512_mask ((__v16sf) __A,
+						 (__v16si)
+						 _mm512_setzero_si512 (),
+						 (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_ps_epi64 (__m256 __A)
+{
+  return (__m512i)
+    __builtin_ia32_cvttps2qqs512_mask ((__v8sf) __A,
+				       (__v8di)
+				       _mm512_undefined_si512 (),
+				       (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_ps_epi64 (__m512i __W, __mmask8 __U, __m256 __A)
+{
+  return (__m512i) __builtin_ia32_cvttps2qqs512_mask ((__v8sf) __A,
+						      (__v8di) __W,
+						      (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_ps_epi64 (__mmask8 __U, __m256 __A)
+{
+  return
+    (__m512i) __builtin_ia32_cvttps2qqs512_mask ((__v8sf) __A,
+						 (__v8di)
+						 _mm512_setzero_si512 (),
+						 (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_ps_epu32 (__m512 __A)
+{
+  return (__m512i)
+    __builtin_ia32_cvttps2udqs512_mask ((__v16sf) __A,
+					(__v16si)
+					_mm512_undefined_si512 (),
+					(__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_ps_epu32 (__m512i __W, __mmask16 __U, __m512 __A)
+{
+  return (__m512i) __builtin_ia32_cvttps2udqs512_mask ((__v16sf) __A,
+						       (__v16si) __W,
+						       (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_ps_epu32 (__mmask16 __U, __m512 __A)
+{
+  return (__m512i)
+    __builtin_ia32_cvttps2udqs512_mask ((__v16sf) __A,
+					(__v16si)
+					_mm512_setzero_si512 (),
+					(__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_ps_epu64 (__m256 __A)
+{
+  return (__m512i)
+    __builtin_ia32_cvttps2uqqs512_mask ((__v8sf) __A,
+					(__v8di)
+					_mm512_undefined_si512 (),
+					(__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_ps_epu64 (__m512i __W, __mmask8 __U, __m256 __A)
+{
+  return (__m512i) __builtin_ia32_cvttps2uqqs512_mask ((__v8sf) __A,
+						       (__v8di) __W,
+						       (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_ps_epu64 (__mmask8 __U, __m256 __A)
+{
+  return
+    (__m512i) __builtin_ia32_cvttps2uqqs512_mask ((__v8sf) __A,
+						  (__v8di)
+						  _mm512_setzero_si512 (),
+						  (__mmask8) __U);
+}
+
+#ifdef __OPTIMIZE__
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvts_roundph_epi8 (__m512h __A, const int __R)
+{
+  return
+    (__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) __A,
+						      (__v32hi)
+						      _mm512_undefined_si512 (),
+						      (__mmask32) -1,
+						      __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvts_roundph_epi8 (__m512i __W, __mmask32 __U, __m512h __A,
+				 const int __R)
+{
+  return (__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) __A,
+							   (__v32hi) __W,
+							   (__mmask32) __U,
+							   __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvts_roundph_epi8 (__mmask32 __U, __m512h __A, const int __R)
+{
+  return
+    (__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) __A,
+						      (__v32hi)
+						      _mm512_setzero_si512 (),
+						      (__mmask32) __U,
+						      __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvts_roundph_epu8 (__m512h __A, const int __R)
+{
+  return
+    (__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) __A,
+						       (__v32hi)
+						       _mm512_undefined_si512 (),
+						       (__mmask32) -1,
+						       __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvts_roundph_epu8 (__m512i __W, __mmask32 __U, __m512h __A,
+				 const int __R)
+{
+  return (__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) __A,
+							    (__v32hi) __W,
+							    (__mmask32) __U,
+							    __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvts_roundph_epu8 (__mmask32 __U, __m512h __A, const int __R)
+{
+  return
+    (__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) __A,
+						       (__v32hi)
+						       _mm512_setzero_si512 (),
+						       (__mmask32) __U,
+						       __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvts_roundps_epi8 (__m512 __A, const int __R)
+{
+  return
+    (__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) __A,
+						      (__v16si)
+						      _mm512_undefined_si512 (),
+						      (__mmask16) -1,
+						      __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvts_roundps_epi8 (__m512i __W, __mmask16 __U, __m512 __A,
+				 const int __R)
+{
+  return (__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) __A,
+							   (__v16si) __W,
+							   (__mmask16) __U,
+							   __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvts_roundps_epi8 (__mmask16 __U, __m512 __A, const int __R)
+{
+  return
+    (__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) __A,
+						      (__v16si)
+						      _mm512_setzero_si512 (),
+						      (__mmask16) __U,
+						      __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvts_roundps_epu8 (__m512 __A, const int __R)
+{
+  return
+    (__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) __A,
+						       (__v16si)
+						       _mm512_undefined_si512 (),
+						       (__mmask16) -1,
+						       __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvts_roundps_epu8 (__m512i __W, __mmask16 __U, __m512 __A,
+				 const int __R)
+{
+  return (__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) __A,
+							    (__v16si) __W,
+							    (__mmask16) __U,
+							    __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvts_roundps_epu8 (__mmask16 __U, __m512 __A, const int __R)
+{
+  return
+    (__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) __A,
+						       (__v16si)
+						       _mm512_setzero_si512 (),
+						       (__mmask16) __U,
+						       __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvtts_roundph_epi8 (__m512h __A, const int __R)
+{
+  return (__m512i)
+    __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) __A,
+					     (__v32hi)
+					     _mm512_undefined_si512 (),
+					     (__mmask32) -1,
+					     __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvtts_roundph_epi8 (__m512i __W, __mmask32 __U, __m512h __A,
+				  const int __R)
+{
+  return (__m512i) __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) __A,
+							    (__v32hi) __W,
+							    (__mmask32) __U,
+							    __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvtts_roundph_epi8 (__mmask32 __U, __m512h __A, const int __R)
+{
+  return
+    (__m512i) __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) __A,
+						       (__v32hi)
+						       _mm512_setzero_si512 (),
+						       (__mmask32) __U,
+						       __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvtts_roundph_epu8 (__m512h __A, const int __R)
+{
+  return (__m512i)
+    __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) __A,
+					      (__v32hi)
+					      _mm512_undefined_si512 (),
+					      (__mmask32) -1,
+					      __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvtts_roundph_epu8 (__m512i __W, __mmask32 __U, __m512h __A,
+				  const int __R)
+{
+  return (__m512i) __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) __A,
+							     (__v32hi) __W,
+							     (__mmask32) __U,
+							     __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvtts_roundph_epu8 (__mmask32 __U, __m512h __A, const int __R)
+{
+  return (__m512i)
+    __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) __A,
+					      (__v32hi)
+					      _mm512_setzero_si512 (),
+					      (__mmask32) __U,
+					      __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvtts_roundps_epi8 (__m512 __A, const int __R)
+{
+  return (__m512i)
+    __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) __A,
+					     (__v16si)
+					     _mm512_undefined_si512 (),
+					     (__mmask16) -1,
+					     __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvtts_roundps_epi8 (__m512i __W, __mmask16 __U, __m512 __A,
+				  const int __R)
+{
+  return (__m512i) __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) __A,
+							    (__v16si) __W,
+							    (__mmask16) __U,
+							    __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvtts_roundps_epi8 (__mmask16 __U, __m512 __A, const int __R)
+{
+  return (__m512i)
+    __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) __A,
+					     (__v16si)
+					     _mm512_setzero_si512 (),
+					     (__mmask16) __U,
+					     __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_ipcvtts_roundps_epu8 (__m512 __A, const int __R)
+{
+  return (__m512i)
+    __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) __A,
+					      (__v16si)
+					      _mm512_undefined_si512 (),
+					      (__mmask16) -1,
+					      __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_ipcvtts_roundps_epu8 (__m512i __W, __mmask16 __U, __m512 __A,
+				  const int __R)
+{
+  return (__m512i) __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) __A,
+							     (__v16si) __W,
+							     (__mmask16) __U,
+							     __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_ipcvtts_roundps_epu8 (__mmask16 __U, __m512 __A, const int __R)
+{
+  return (__m512i)
+    __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) __A,
+					      (__v16si)
+					      _mm512_setzero_si512 (),
+					      (__mmask16) __U,
+					      __R);
 }
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtts_roundps_epi32 (__m256 __A, const int __R)
+_mm512_cvtts_roundpd_epi32 (__m512d __A, const int __R)
 {
   return (__m256i)
-    __builtin_ia32_cvttps2dqs256_mask_round ((__v8sf) __A,
+    __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) __A,
 					     (__v8si)
 					     _mm256_undefined_si256 (),
 					     (__mmask8) -1,
@@ -1177,10 +2148,10 @@ _mm256_cvtts_roundps_epi32 (__m256 __A, const int __R)
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtts_roundps_epi32 (__m256i __W, __mmask8 __U, __m256 __A,
-				const int __R)
+_mm512_mask_cvtts_roundpd_epi32 (__m256i __W, __mmask8 __U, __m512d __A,
+				 const int __R)
 {
-  return (__m256i) __builtin_ia32_cvttps2dqs256_mask_round ((__v8sf) __A,
+  return (__m256i) __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) __A,
 							    (__v8si) __W,
 							    (__mmask8) __U,
 							    __R);
@@ -1188,57 +2159,57 @@ _mm256_mask_cvtts_roundps_epi32 (__m256i __W, __mmask8 __U, __m256 __A,
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtts_roundps_epi32 (__mmask8 __U, __m256 __A, const int __R)
+_mm512_maskz_cvtts_roundpd_epi32 (__mmask8 __U, __m512d __A, const int __R)
 {
   return
-    (__m256i) __builtin_ia32_cvttps2dqs256_mask_round ((__v8sf) __A,
+    (__m256i) __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) __A,
 						       (__v8si)
 						       _mm256_setzero_si256 (),
 						       (__mmask8) __U,
 						       __R);
 }
 
-extern __inline __m256i
+extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtts_roundps_epi64 (__m128 __A, const int __R)
+_mm512_cvtts_roundpd_epi64 (__m512d __A, const int __R)
 {
-  return (__m256i)
-    __builtin_ia32_cvttps2qqs256_mask_round ((__v4sf) __A,
-					     (__v4di)
-					     _mm256_undefined_si256 (),
+  return (__m512i)
+    __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) __A,
+					     (__v8di)
+					     _mm512_undefined_si512 (),
 					     (__mmask8) -1,
 					     __R);
 }
 
-extern __inline __m256i
+extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtts_roundps_epi64 (__m256i __W, __mmask8 __U, __m128 __A,
-				    const int __R)
+_mm512_mask_cvtts_roundpd_epi64 (__m512i __W, __mmask8 __U, __m512d __A,
+				 const int __R)
 {
-  return (__m256i) __builtin_ia32_cvttps2qqs256_mask_round ((__v4sf) __A,
-							    (__v4di) __W,
+  return (__m512i) __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) __A,
+							    (__v8di) __W,
 							    (__mmask8) __U,
 							    __R);
 }
 
-extern __inline __m256i
+extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtts_roundps_epi64 (__mmask8 __U, __m128 __A, const int __R)
+_mm512_maskz_cvtts_roundpd_epi64 (__mmask8 __U, __m512d __A, const int __R)
 {
   return
-    (__m256i) __builtin_ia32_cvttps2qqs256_mask_round ((__v4sf) __A,
-						       (__v4di)
-						       _mm256_setzero_si256 (),
+    (__m512i) __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) __A,
+						       (__v8di)
+						       _mm512_setzero_si512 (),
 						       (__mmask8) __U,
 						       __R);
 }
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtts_roundps_epu32 (__m256 __A, const int __R)
+_mm512_cvtts_roundpd_epu32 (__m512d __A, const int __R)
 {
   return (__m256i)
-    __builtin_ia32_cvttps2udqs256_mask_round ((__v8sf) __A,
+    __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) __A,
 					      (__v8si)
 					      _mm256_undefined_si256 (),
 					      (__mmask8) -1,
@@ -1247,10 +2218,10 @@ _mm256_cvtts_roundps_epu32 (__m256 __A, const int __R)
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtts_roundps_epu32 (__m256i __W, __mmask8 __U, __m256 __A,
-				const int __R)
+_mm512_mask_cvtts_roundpd_epu32 (__m256i __W, __mmask8 __U, __m512d __A,
+				 const int __R)
 {
-  return (__m256i) __builtin_ia32_cvttps2udqs256_mask_round ((__v8sf) __A,
+  return (__m256i) __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) __A,
 							     (__v8si) __W,
 							     (__mmask8) __U,
 							     __R);
@@ -1258,668 +2229,609 @@ _mm256_mask_cvtts_roundps_epu32 (__m256i __W, __mmask8 __U, __m256 __A,
 
 extern __inline __m256i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtts_roundps_epu32 (__mmask8 __U, __m256 __A, const int __R)
+_mm512_maskz_cvtts_roundpd_epu32 (__mmask8 __U, __m512d __A, const int __R)
 {
   return
-    (__m256i) __builtin_ia32_cvttps2udqs256_mask_round ((__v8sf) __A,
+    (__m256i) __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) __A,
 							(__v8si)
 							_mm256_setzero_si256 (),
 							(__mmask8) __U,
 							__R);
 }
 
-extern __inline __m256i
+extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_cvtts_roundps_epu64 (__m128 __A, const int __R)
+_mm512_cvtts_roundpd_epu64 (__m512d __A, const int __R)
 {
-  return (__m256i)
-    __builtin_ia32_cvttps2uqqs256_mask_round ((__v4sf) __A,
-					      (__v4di)
-					      _mm256_undefined_si256 (),
+  return (__m512i)
+    __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) __A,
+					      (__v8di)
+					      _mm512_undefined_si512 (),
 					      (__mmask8) -1,
 					      __R);
 }
 
-extern __inline __m256i
+extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_mask_cvtts_roundps_epu64 (__m256i __W, __mmask8 __U, __m128 __A,
-				const int __R)
+_mm512_mask_cvtts_roundpd_epu64 (__m512i __W, __mmask8 __U, __m512d __A,
+				 const int __R)
 {
-  return (__m256i) __builtin_ia32_cvttps2uqqs256_mask_round ((__v4sf) __A,
-							     (__v4di) __W,
+  return (__m512i) __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) __A,
+							     (__v8di) __W,
 							     (__mmask8) __U,
 							     __R);
 }
 
-extern __inline __m256i
+extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm256_maskz_cvtts_roundps_epu64 (__mmask8 __U, __m128 __A, const int __R)
+_mm512_maskz_cvtts_roundpd_epu64 (__mmask8 __U, __m512d __A, const int __R)
 {
-  return
-    (__m256i) __builtin_ia32_cvttps2uqqs256_mask_round ((__v4sf) __A,
-							(__v4di)
-							_mm256_setzero_si256 (),
-							(__mmask8) __U,
-							__R);
+  return (__m512i)
+    __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) __A,
+					      (__v8di)
+					      _mm512_setzero_si512 (),
+					      (__mmask8) __U,
+					      __R);
 }
 
-extern __inline int
+extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtts_roundsd_epi32 (__m128d __A, const int __R)
+_mm512_cvtts_roundps_epi32 (__m512 __A, const int __R)
 {
-  return (int) __builtin_ia32_cvttsd2sis32_round ((__v2df) __A,
-						  __R);
+  return (__m512i)
+    __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) __A,
+					     (__v16si)
+					     _mm512_undefined_si512 (),
+					     (__mmask16) -1,
+					     __R);
 }
 
-extern __inline unsigned int
+extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtts_roundsd_epu32 (__m128d __A, const int __R)
+_mm512_mask_cvtts_roundps_epi32 (__m512i __W, __mmask16 __U, __m512 __A,
+				 const int __R)
 {
-  return (unsigned int) __builtin_ia32_cvttsd2usis32_round ((__v2df) __A,
+  return (__m512i) __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) __A,
+							    (__v16si) __W,
+							    (__mmask16) __U,
 							    __R);
 }
 
-extern __inline int
+extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtts_roundss_epi32 (__m128 __A, const int __R)
+_mm512_maskz_cvtts_roundps_epi32 (__mmask16 __U, __m512 __A, const int __R)
 {
-  return (int) __builtin_ia32_cvttss2sis32_round ((__v4sf) __A,
-						  __R);
+  return
+    (__m512i) __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) __A,
+						       (__v16si)
+						       _mm512_setzero_si512 (),
+						       (__mmask16) __U,
+						       __R);
 }
 
-extern __inline unsigned int
+extern __inline __m512i
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
-_mm_cvtts_roundss_epu32 (__m128 __A, const int __R)
+_mm512_cvtts_roundps_epi64 (__m256 __A, const int __R)
 {
-  return (unsigned int) __builtin_ia32_cvttss2usis32_round ((__v4sf) __A,
+  return (__m512i)
+    __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) __A,
+					     (__v8di)
+					     _mm512_undefined_si512 (),
+					     (__mmask8) -1,
+					     __R);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_roundps_epi64 (__m512i __W, __mmask8 __U, __m256 __A,
+				 const int __R)
+{
+  return (__m512i) __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) __A,
+							    (__v8di) __W,
+							    (__mmask8) __U,
 							    __R);
 }
-#else
 
-#define _mm256_ipcvt_roundph_epi16(A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvtph2ibs256_mask_round ((__v16hf) (A), \
-					   (__v16hi) \
-					   (_mm256_undefined_si256 ()), \
-					   (__mmask16) (-1), \
-					   (R)))
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_roundps_epi64 (__mmask8 __U, __m256 __A, const int __R)
+{
+  return
+    (__m512i) __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) __A,
+						       (__v8di)
+						       _mm512_setzero_si512 (),
+						       (__mmask8) __U,
+						       __R);
+}
 
-#define _mm256_mask_ipcvt_roundph_epi16(W, U, A, R) \
-  ((__m256i) __builtin_ia32_cvtph2ibs256_mask_round ((__v16hf) (A), \
-						     (__v16hi) (W), \
-						     (__mmask16) (U), \
-						     (R)))
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_roundps_epu32 (__m512 __A, const int __R)
+{
+  return (__m512i)
+    __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) __A,
+					      (__v16si)
+					      _mm512_undefined_si512 (),
+					      (__mmask16) -1,
+					      __R);
+}
 
-#define _mm256_maskz_ipcvt_roundph_epi16(U, A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvtph2ibs256_mask_round ((__v16hf) (A), \
-					   (__v16hi) \
-					   (_mm256_setzero_si256 ()), \
-					   (__mmask16) (U), \
-					   (R)))
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_roundps_epu32 (__m512i __W, __mmask16 __U, __m512 __A,
+				 const int __R)
+{
+  return (__m512i) __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) __A,
+							     (__v16si) __W,
+							     (__mmask16) __U,
+							     __R);
+}
 
-#define _mm256_ipcvt_roundph_epu16(A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvtph2iubs256_mask_round ((__v16hf) (A), \
-					    (__v16hi) \
-					    (_mm256_undefined_si256 ()), \
-					    (__mmask16) (-1), \
-					    (R)))
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_roundps_epu32 (__mmask16 __U, __m512 __A, const int __R)
+{
+  return (__m512i)
+    __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) __A,
+					      (__v16si)
+					      _mm512_setzero_si512 (),
+					      (__mmask16) __U,
+					      __R);
+}
 
-#define _mm256_mask_ipcvt_roundph_epu16(W, U, A, R) \
-  ((__m256i) __builtin_ia32_cvtph2iubs256_mask_round ((__v16hf) (A), \
-						      (__v16hi) (W), \
-						      (__mmask16) (U), \
-						      (R)))
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_cvtts_roundps_epu64 (__m256 __A, const int __R)
+{
+  return (__m512i)
+    __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) __A,
+					      (__v8di)
+					      _mm512_undefined_si512 (),
+					      (__mmask8) -1,
+					      __R);
+}
 
-#define _mm256_maskz_ipcvt_roundph_epu16(U, A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvtph2iubs256_mask_round ((__v16hf) (A), \
-					    (__v16hi) \
-					    (_mm256_setzero_si256 ()), \
-					    (__mmask16) (U), \
-					    (R)))
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_cvtts_roundps_epu64 (__m512i __W, __mmask8 __U, __m256 __A,
+				 const int __R)
+{
+  return (__m512i) __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) __A,
+							     (__v8di) __W,
+							     (__mmask8) __U,
+							     __R);
+}
 
-#define _mm256_ipcvt_roundps_epi32(A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvtps2ibs256_mask_round ((__v8sf) (A), \
-					   (__v8si) \
-					   (_mm256_undefined_si256 ()), \
-					   (__mmask8) (-1), \
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_cvtts_roundps_epu64 (__mmask8 __U, __m256 __A, const int __R)
+{
+  return
+    (__m512i) __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) __A,
+							(__v8di)
+							_mm512_setzero_si512 (),
+							(__mmask8) __U,
+							__R);
+}
+#else
+#define _mm512_ipcvts_roundph_epi8(A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) (A), \
+					   (__v32hi) \
+					   (_mm512_undefined_si512 ()), \
+					   (__mmask32) (-1), \
 					   (R)))
 
-#define _mm256_mask_ipcvt_roundps_epi32(W, U, A, R) \
-  ((__m256i) __builtin_ia32_cvtps2ibs256_mask_round ((__v8sf) (A), \
-						     (__v8si) (W), \
-						     (__mmask8) (U), \
+#define _mm512_mask_ipcvts_roundph_epi8(W, U, A, R) \
+  ((__m512i) __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) (A), \
+						     (__v32hi) (W), \
+						     (__mmask32) (U), \
 						     (R)))
 
-#define _mm256_maskz_ipcvt_roundps_epi32(U, A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvtps2ibs256_mask_round ((__v8sf) (A), \
-					   (__v8si) \
-					   (_mm256_setzero_si256 ()), \
-					   (__mmask8) (U), \
+#define _mm512_maskz_ipcvts_roundph_epi8(U, A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvtph2ibs512_mask_round ((__v32hf) (A), \
+					   (__v32hi) \
+					   (_mm512_setzero_si512 ()), \
+					   (__mmask32) (U), \
 					   (R)))
 
-#define _mm256_ipcvt_roundps_epu32(A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvtps2iubs256_mask_round ((__v8sf) (A), \
-					    (__v8si) \
-					    (_mm256_undefined_si256 ()), \
-					    (__mmask8) (-1), \
+#define _mm512_ipcvts_roundph_epu8(A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) (A), \
+					    (__v32hi) \
+					    (_mm512_undefined_si512 ()), \
+					    (__mmask32) (-1), \
 					    (R)))
 
-#define _mm256_mask_ipcvt_roundps_epu32(W, U, A, R) \
-  ((__m256i) __builtin_ia32_cvtps2iubs256_mask_round ((__v8sf) (A), \
-						      (__v8si) (W), \
-						      (__mmask8) (U), \
+#define _mm512_mask_ipcvts_roundph_epu8(W, U, A, R) \
+  ((__m512i) __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) (A), \
+						      (__v32hi) (W), \
+						      (__mmask32) (U), \
 						      (R)))
 
-#define _mm256_maskz_ipcvt_roundps_epu32(U, A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvtps2iubs256_mask_round ((__v8sf) (A), \
-					    (__v8si) \
-					    (_mm256_setzero_si256 ()), \
-					    (__mmask8) (U), \
+#define _mm512_maskz_ipcvts_roundph_epu8(U, A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvtph2iubs512_mask_round ((__v32hf) (A), \
+					    (__v32hi) \
+					    (_mm512_setzero_si512 ()), \
+					    (__mmask32) (U), \
 					    (R)))
 
+#define _mm512_ipcvts_roundps_epi8(A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) (A), \
+					   (__v16si) \
+					   (_mm512_undefined_si512 ()), \
+					   (__mmask16) (-1), \
+					   (R)))
 
-#define _mm256_ipcvttne_roundbf16_epi16(A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvttnebf162ibs256_mask_round ((__v16bf) (A), \
-						(__v16hi) \
-						(_mm256_undefined_si256 ()), \
-						(__mmask16) (-1), \
-						(R)))
-
-#define _mm256_mask_ipcvttne_roundbf16_epi16(W, U, A, R) \
-  ((__m256i) __builtin_ia32_cvttnebf162ibs256_mask_round ((__v16bf) (A), \
-							  (__v16hi) (W), \
-							  (__mmask16) (U), \
-							  (R)))
-
-#define _mm256_maskz_ipcvttne_roundbf16_epi16(U, A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvttnebf162ibs256_mask_round ((__v16bf) (A), \
-						(__v16hi) \
-						(_mm256_setzero_si256 ()), \
-						(__mmask16) (U), \
-						(R)))
+#define _mm512_mask_ipcvts_roundps_epi8(W, U, A, R) \
+  ((__m512i) __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) (A), \
+						     (__v16si) (W), \
+						     (__mmask16) (U), \
+						     (R)))
 
-#define _mm256_ipcvttne_roundbf16_epu16(A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvttnebf162iubs256_mask_round ((__v16bf) (A), \
-						 (__v16hi) \
-						 (_mm256_undefined_si256 ()), \
-						 (__mmask16) (-1), \
-						 (R)))
-
-#define _mm256_mask_ipcvttne_roundbf16_epu16(W, U, A, R) \
-  ((__m256i) __builtin_ia32_cvttnebf162iubs256_mask_round ((__v16bf) (A), \
-							   (__v16hi) (W), \
-							   (__mmask16) (U), \
-							   (R)))
-
-#define _mm256_maskz_ipcvttne_roundbf16_epu16(U, A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvttnebf162iubs256_mask_round ((__v16bf) (A), \
-						 (__v16hi) \
-						 (_mm256_setzero_si256 ()), \
-						 (__mmask16) (U), \
-						 (R)))
+#define _mm512_maskz_ipcvts_roundps_epi8(U, A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvtps2ibs512_mask_round ((__v16sf) (A), \
+					   (__v16si) \
+					   (_mm512_setzero_si512 ()), \
+					   (__mmask16) (U), \
+					   (R)))
 
-#define _mm256_ipcvtt_roundph_epi16(A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvttph2ibs256_mask_round ((__v16hf) (A), \
-					    (__v16hi) \
-					    (_mm256_undefined_si256 ()), \
+#define _mm512_ipcvts_roundps_epu8(A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) (A), \
+					    (__v16si) \
+					    (_mm512_undefined_si512 ()), \
 					    (__mmask16) (-1), \
 					    (R)))
 
-#define _mm256_mask_ipcvtt_roundph_epi16(W, U, A, R) \
-  ((__m256i) __builtin_ia32_cvttph2ibs256_mask_round ((__v16hf) (A), \
-						      (__v16hi) (W), \
+#define _mm512_mask_ipcvts_roundps_epu8(W, U, A, R) \
+  ((__m512i) __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) (A), \
+						      (__v16si) (W), \
 						      (__mmask16) (U), \
 						      (R)))
 
-#define _mm256_maskz_ipcvtt_roundph_epi16(U, A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvttph2ibs256_mask_round ((__v16hf) (A), \
-					    (__v16hi) \
-					    (_mm256_setzero_si256 ()), \
+#define _mm512_maskz_ipcvts_roundps_epu8(U, A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvtps2iubs512_mask_round ((__v16sf) (A), \
+					    (__v16si) \
+					    (_mm512_setzero_si512 ()), \
 					    (__mmask16) (U), \
 					    (R)))
 
-#define _mm256_ipcvtt_roundph_epu16(A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvttph2iubs256_mask_round ((__v16hf) (A), \
-					     (__v16hi) \
-					     (_mm256_undefined_si256 ()), \
-					     (__mmask16) (-1), \
-					     (R)))
-
-#define _mm256_mask_ipcvtt_roundph_epu16(W, U, A, R) \
-  ((__m256i) __builtin_ia32_cvttph2iubs256_mask_round ((__v16hf) (A), \
-						       (__v16hi) (W), \
-						       (__mmask16) (U), \
-						       (R)))
-
-#define _mm256_maskz_ipcvtt_roundph_epu16(U, A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvttph2iubs256_mask_round ((__v16hf) (A), \
-					     (__v16hi) \
-					     (_mm256_setzero_si256 ()), \
-					     (__mmask16) (U), \
-					     (R)))
-
-#define _mm256_ipcvtt_roundps_epi32(A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvttps2ibs256_mask_round ((__v8sf) (A), \
-					    (__v8si) \
-					    (_mm256_undefined_si256 ()), \
-					    (__mmask8) (-1), \
+#define _mm512_ipcvtts_roundph_epi8(A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) (A), \
+					    (__v32hi) \
+					    (_mm512_undefined_si512 ()), \
+					    (__mmask32) (-1), \
 					    (R)))
 
-#define _mm256_mask_ipcvtt_roundps_epi32(W, U, A, R) \
-  ((__m256i) __builtin_ia32_cvttps2ibs256_mask_round ((__v8sf) (A), \
-						      (__v8si) (W), \
-						      (__mmask8) (U), \
+#define _mm512_mask_ipcvtts_roundph_epi8(W, U, A, R) \
+  ((__m512i) __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) (A), \
+						      (__v32hi) (W), \
+						      (__mmask32) (U), \
 						      (R)))
 
-#define _mm256_maskz_ipcvtt_roundps_epi32(U, A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvttps2ibs256_mask_round ((__v8sf) (A), \
-					    (__v8si) \
-					    (_mm256_setzero_si256 ()), \
-					    (__mmask8) (U), \
+#define _mm512_maskz_ipcvtts_roundph_epi8(U, A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvttph2ibs512_mask_round ((__v32hf) (A), \
+					    (__v32hi) \
+					    (_mm512_setzero_si512 ()), \
+					    (__mmask32) (U), \
 					    (R)))
 
-#define _mm256_ipcvtt_roundps_epu32(A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvttps2iubs256_mask_round ((__v8sf) (A), \
-					     (__v8si) \
-					     (_mm256_undefined_si256 ()), \
-					     (__mmask8) (-1), \
+#define _mm512_ipcvtts_roundph_epu8(A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) (A), \
+					     (__v32hi) \
+					     (_mm512_undefined_si512 ()), \
+					     (__mmask32) (-1), \
 					     (R)))
 
-#define _mm256_mask_ipcvtt_roundps_epu32(W, U, A, R) \
-  ((__m256i) __builtin_ia32_cvttps2iubs256_mask_round ((__v8sf) (A), \
-						       (__v8si) (W), \
-						       (__mmask8) (U), \
+#define _mm512_mask_ipcvtts_roundph_epu8(W, U, A, R) \
+  ((__m512i) __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) (A), \
+						       (__v32hi) (W), \
+						       (__mmask32) (U), \
 						       (R)))
 
-#define _mm256_maskz_ipcvtt_roundps_epu32(U, A, R) \
-((__m256i) \
- __builtin_ia32_cvttps2iubs256_mask_round ((__v8sf) (A), \
-					   (__v8si) \
-					   (_mm256_setzero_si256 ()), \
-					   (__mmask8) (U), \
-					   (R)))
-
-#define _mm256_cvtts_roundpd_epi32(A, R) \
-  ((__m128i) \
-   __builtin_ia32_cvttpd2dqs256_mask_round ((__v4df) (A), \
-					    (__v4si) \
-					    (_mm_undefined_si128 ()), \
-					    (__mmask8) (-1), \
-					    (R)))
-
-#define _mm256_mask_cvtts_roundpd_epi32(W, U, A, R) \
-  ((__m128i) __builtin_ia32_cvttpd2dqs256_mask_round ((__v4df) (A), \
-						      (__v4si) (W), \
-						      (__mmask8) (U), \
-						      (R)))
-
-#define _mm256_maskz_cvtts_roundpd_epi32(U, A, R) \
-  ((__m128i) __builtin_ia32_cvttpd2dqs256_mask_round ((__v4df) (A), \
-						      (__v4si) \
-						      (_mm_setzero_si128 ()), \
-						      (__mmask8) (U), \
-						      (R)))
+#define _mm512_maskz_ipcvtts_roundph_epu8(U, A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvttph2iubs512_mask_round ((__v32hf) (A), \
+					     (__v32hi) \
+					     (_mm512_setzero_si512 ()), \
+					     (__mmask32) (U), \
+					     (R)))
 
-#define _mm256_cvtts_roundpd_epi64(A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvttpd2qqs256_mask_round ((__v4df) (A), \
-					    (__v4di) \
-					    (_mm256_undefined_si256 ()), \
-					    (__mmask8) (-1), \
+#define _mm512_ipcvtts_roundps_epi8(A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) (A), \
+					    (__v16si) \
+					    (_mm512_undefined_si512 ()), \
+					    (__mmask16) (-1), \
 					    (R)))
 
-#define _mm256_mask_cvtts_roundpd_epi64(W, U, A, R) \
-  ((__m256i) __builtin_ia32_cvttpd2qqs256_mask_round ((__v4df) (A), \
-						      (__v4di) (W), \
-						      (__mmask8) (U), \
+#define _mm512_mask_ipcvtts_roundps_epi8(W, U, A, R) \
+  ((__m512i) __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) (A), \
+						      (__v16si) (W), \
+						      (__mmask16) (U), \
 						      (R)))
 
-#define _mm256_maskz_cvtts_roundpd_epi64(U, A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvttpd2qqs256_mask_round ((__v4df) (A), \
-					    (__v4di) \
-					    (_mm256_setzero_si256 ()), \
-					    (__mmask8) (U), \
+#define _mm512_maskz_ipcvtts_roundps_epi8(U, A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvttps2ibs512_mask_round ((__v16sf) (A), \
+					    (__v16si) \
+					    (_mm512_setzero_si512 ()), \
+					    (__mmask16) (U), \
 					    (R)))
 
-#define _mm256_cvtts_roundpd_epu32(A, R) \
-  ((__m128i) \
-   __builtin_ia32_cvttpd2udqs256_mask_round ((__v4df) (A), \
-					     (__v4si) \
-					     (_mm_undefined_si128 ()), \
-					     (__mmask8) (-1), \
-					     (R)))
-
-#define _mm256_mask_cvtts_roundpd_epu32(W, U, A, R) \
-  ((__m128i) __builtin_ia32_cvttpd2udqs256_mask_round ((__v4df) (A), \
-						       (__v4si) (W), \
-						       (__mmask8) (U), \
-						       (R)))
-
-#define _mm256_maskz_cvtts_roundpd_epu32(U, A, R) \
-  ((__m128i) \
-   __builtin_ia32_cvttpd2udqs256_mask_round ((__v4df) (A), \
-					     (__v4si) (_mm_setzero_si128 ()), \
-					     (__mmask8) (U), \
-					     (R)))
-
-#define _mm256_cvtts_roundpd_epu64(A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvttpd2uqqs256_mask_round ((__v4df) (A), \
-					     (__v4di) \
-					     (_mm256_undefined_si256 ()), \
-					     (__mmask8) (-1), \
+#define _mm512_ipcvtts_roundps_epu8(A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) (A), \
+					     (__v16si) \
+					     (_mm512_undefined_si512 ()), \
+					     (__mmask16) (-1), \
 					     (R)))
 
-#define _mm256_mask_cvtts_roundpd_epu64(W, U, A, R) \
-  ((__m256i) __builtin_ia32_cvttpd2uqqs256_mask_round ((__v4df) (A), \
-						       (__v4di) (W), \
-						       (__mmask8) (U), \
+#define _mm512_mask_ipcvtts_roundps_epu8(W, U, A, R) \
+  ((__m512i) __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) (A), \
+						       (__v16si) (W), \
+						       (__mmask16) (U), \
 						       (R)))
 
-#define _mm256_maskz_cvtts_roundpd_epu64(U, A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvttpd2uqqs256_mask_round ((__v4df) (A), \
-					     (__v4di) \
-					     (_mm256_setzero_si256 ()), \
-					     (__mmask8) (U), \
+#define _mm512_maskz_ipcvtts_roundps_epu8(U, A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvttps2iubs512_mask_round ((__v16sf) (A), \
+					     (__v16si) \
+					     (_mm512_setzero_si512 ()), \
+					     (__mmask16) (U), \
 					     (R)))
 
-#define _mm256_cvtts_roundps_epi32(A, R) \
+#define _mm512_cvtts_roundpd_epi32(A, R) \
   ((__m256i) \
-   __builtin_ia32_cvttps2dqs256_mask_round ((__v8sf) (A), \
+   __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) (A), \
 					    (__v8si) \
 					    (_mm256_undefined_si256 ()), \
 					    (__mmask8) (-1), \
 					    (R)))
 
-#define _mm256_mask_cvtts_roundps_epi32(W, U, A, R) \
-  ((__m256i) __builtin_ia32_cvttps2dqs256_mask_round ((__v8sf) (A), \
+#define _mm512_mask_cvtts_roundpd_epi32(W, U, A, R) \
+  ((__m256i) __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) (A), \
 						      (__v8si) (W), \
 						      (__mmask8) (U), \
 						      (R)))
 
-#define _mm256_maskz_cvtts_roundps_epi32(U, A, R) \
+#define _mm512_maskz_cvtts_roundpd_epi32(U, A, R) \
   ((__m256i) \
-   __builtin_ia32_cvttps2dqs256_mask_round ((__v8sf) (A), \
+   __builtin_ia32_cvttpd2dqs512_mask_round ((__v8df) (A), \
 					    (__v8si) \
 					    (_mm256_setzero_si256 ()), \
 					    (__mmask8) (U), \
 					    (R)))
 
-#define _mm256_cvtts_roundps_epi64(A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvttps2qqs256_mask_round ((__v4sf) (A), \
-					    (__v4di) \
-					    (_mm256_undefined_si256 ()), \
+#define _mm512_cvtts_roundpd_epi64(A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) (A), \
+					    (__v8di) \
+					    (_mm512_undefined_si512 ()), \
 					    (__mmask8) (-1), \
 					    (R)))
 
-#define _mm256_mask_cvtts_roundps_epi64(W, U, A, R) \
-  ((__m256i) __builtin_ia32_cvttps2qqs256_mask_round ((__v4sf) (A), \
-						      (__v4di) (W), \
+#define _mm512_mask_cvtts_roundpd_epi64(W, U, A, R) \
+  ((__m512i) __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) (A), \
+						      (__v8di) (W), \
 						      (__mmask8) (U), \
 						      (R)))
 
-#define _mm256_maskz_cvtts_roundps_epi64(U, A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvttps2qqs256_mask_round ((__v4sf) (A), \
-					    (__v4di) \
-					    (_mm256_setzero_si256 ()), \
+#define _mm512_maskz_cvtts_roundpd_epi64(U, A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvttpd2qqs512_mask_round ((__v8df) (A), \
+					    (__v8di) \
+					    (_mm512_setzero_si512 ()), \
 					    (__mmask8) (U), \
 					    (R)))
 
-#define _mm256_cvtts_roundps_epu32(A, R) \
+#define _mm512_cvtts_roundpd_epu32(A, R) \
   ((__m256i) \
-   __builtin_ia32_cvttps2udqs256_mask_round ((__v8sf) (A), \
+   __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) (A), \
 					     (__v8si) \
 					     (_mm256_undefined_si256 ()), \
 					     (__mmask8) (-1), \
 					     (R)))
 
-#define _mm256_mask_cvtts_roundps_epu32(W, U, A, R) \
-  ((__m256i) __builtin_ia32_cvttps2udqs256_mask_round ((__v8sf) (A), \
+#define _mm512_mask_cvtts_roundpd_epu32(W, U, A, R) \
+  ((__m256i) __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) (A), \
 						       (__v8si) (W), \
 						       (__mmask8) (U), \
 						       (R)))
 
-#define _mm256_maskz_cvtts_roundps_epu32(U, A, R) \
+#define _mm512_maskz_cvtts_roundpd_epu32(U, A, R) \
   ((__m256i) \
-   __builtin_ia32_cvttps2udqs256_mask_round ((__v8sf) (A), \
+   __builtin_ia32_cvttpd2udqs512_mask_round ((__v8df) (A), \
 					     (__v8si) \
 					     (_mm256_setzero_si256 ()), \
 					     (__mmask8) (U), \
 					     (R)))
 
-#define _mm256_cvtts_roundps_epu64(A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvttps2uqqs256_mask_round ((__v4sf) (A), \
-					     (__v4di) \
-					     (_mm256_undefined_si256 ()), \
+#define _mm512_cvtts_roundpd_epu64(A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) (A), \
+					     (__v8di) \
+					     (_mm512_undefined_si512 ()), \
 					     (__mmask8) (-1), \
 					     (R)))
 
-#define _mm256_mask_cvtts_roundps_epu64(W, U, A, R) \
-  ((__m256i) __builtin_ia32_cvttps2uqqs256_mask_round ((__v4sf) (A), \
-						       (__v4di) (W), \
+#define _mm512_mask_cvtts_roundpd_epu64(W, U, A, R) \
+  ((__m512i) __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) (A), \
+						       (__v8di) (W), \
 						       (__mmask8) (U), \
 						       (R)))
 
-#define _mm256_maskz_cvtts_roundps_epu64(U, A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvttps2uqqs256_mask_round ((__v4sf) (A), \
-					     (__v4di) \
-					     (_mm256_setzero_si256 ()), \
+#define _mm512_maskz_cvtts_roundpd_epu64(U, A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvttpd2uqqs512_mask_round ((__v8df) (A), \
+					     (__v8di) \
+					     (_mm512_setzero_si512 ()), \
 					     (__mmask8) (U), \
 					     (R)))
 
-#define _mm_cvtts_roundsd_epi32(A, R) \
-  ((int) __builtin_ia32_cvttsd2sis32_round ((__v2df) (A), \
+#define _mm512_cvtts_roundps_epi32(A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) (A), \
+					    (__v16si) \
+					    (_mm512_undefined_si512 ()), \
+					    (__mmask16) (-1), \
 					    (R)))
 
-#define _mm_cvtts_roundsd_epu32(A, R) \
-  ((unsigned int) __builtin_ia32_cvttsd2usis32_round ((__v2df) (A), \
+#define _mm512_mask_cvtts_roundps_epi32(W, U, A, R) \
+  ((__m512i) __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) (A), \
+						      (__v16si) (W), \
+						      (__mmask16) (U), \
 						      (R)))
 
-#define _mm_cvtts_roundss_epi32(A, R) \
-  ((int) __builtin_ia32_cvttss2sis32_round ((__v4sf) (A), \
+#define _mm512_maskz_cvtts_roundps_epi32(U, A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvttps2dqs512_mask_round ((__v16sf) (A), \
+					    (__v16si) \
+					    (_mm512_setzero_si512 ()), \
+					    (__mmask16) (U), \
 					    (R)))
 
-#define _mm_cvtts_roundss_epu32(A, R) \
-  ((unsigned int) __builtin_ia32_cvttss2usis32_round ((__v4sf) (A), \
-						      (R)))
-#define _mm256_cvtts_roundpd_epi32(A, R) \
-  ((__m128i) \
-   __builtin_ia32_cvttpd2dqs256_mask_round ((__v4df) (A), \
-					    (__v4si) \
-					    (_mm_undefined_si128 ()), \
+#define _mm512_cvtts_roundps_epi64(A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) (A), \
+					    (__v8di) \
+					    (_mm512_undefined_si512 ()), \
 					    (__mmask8) (-1), \
 					    (R)))
 
-#define _mm256_mask_cvtts_roundpd_epi32(W, U, A, R) \
-  ((__m128i) __builtin_ia32_cvttpd2dqs256_mask_round ((__v4df) (A), \
-						      (__v4si) (W), \
+#define _mm512_mask_cvtts_roundps_epi64(W, U, A, R) \
+  ((__m512i) __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) (A), \
+						      (__v8di) (W), \
 						      (__mmask8) (U), \
 						      (R)))
 
-#define _mm256_maskz_cvtts_roundpd_epi32(U, A, R) \
-  ((__m128i) __builtin_ia32_cvttpd2dqs256_mask_round ((__v4df) (A), \
-						      (__v4si) \
-						      (_mm_setzero_si128 ()), \
-						      (__mmask8) (U), \
-						      (R)))
-
-#define _mm256_cvtts_roundpd_epi64(A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvttpd2qqs256_mask_round ((__v4df) (A), \
-					    (__v4di) \
-					    (_mm256_undefined_si256 ()), \
-					    (__mmask8) (-1), \
-					    (R)))
-
-#define _mm256_mask_cvtts_roundpd_epi64(W, U, A, R) \
-  ((__m256i) __builtin_ia32_cvttpd2qqs256_mask_round ((__v4df) (A), \
-						      (__v4di) (W), \
-						      (__mmask8) (U), \
-						      (R)))
-
-#define _mm256_maskz_cvtts_roundpd_epi64(U, A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvttpd2qqs256_mask_round ((__v4df) (A), \
-					    (__v4di) \
-					    (_mm256_setzero_si256 ()), \
+#define _mm512_maskz_cvtts_roundps_epi64(U, A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvttps2qqs512_mask_round ((__v8sf) (A), \
+					    (__v8di) \
+					    (_mm512_setzero_si512 ()), \
 					    (__mmask8) (U), \
 					    (R)))
 
-#define _mm256_cvtts_roundpd_epu32(A, R) \
-  ((__m128i) \
-   __builtin_ia32_cvttpd2udqs256_mask_round ((__v4df) (A), \
-					     (__v4si) \
-					     (_mm_undefined_si128 ()), \
-					     (__mmask8) (-1), \
+#define _mm512_cvtts_roundps_epu32(A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) (A), \
+					     (__v16si) \
+					     (_mm512_undefined_si512 ()), \
+					     (__mmask16) (-1), \
 					     (R)))
 
-#define _mm256_mask_cvtts_roundpd_epu32(W, U, A, R) \
-  ((__m128i) __builtin_ia32_cvttpd2udqs256_mask_round ((__v4df) (A), \
-						       (__v4si) (W), \
-						       (__mmask8) (U), \
+#define _mm512_mask_cvtts_roundps_epu32(W, U, A, R) \
+  ((__m512i) __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) (A), \
+						       (__v16si) (W), \
+						       (__mmask16) (U), \
 						       (R)))
 
-#define _mm256_maskz_cvtts_roundpd_epu32(U, A, R) \
-  ((__m128i) \
-   __builtin_ia32_cvttpd2udqs256_mask_round ((__v4df) (A), \
-					     (__v4si) (_mm_setzero_si128 ()), \
-					     (__mmask8) (U), \
+#define _mm512_maskz_cvtts_roundps_epu32(U, A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvttps2udqs512_mask_round ((__v16sf) (A), \
+					     (__v16si) \
+					     (_mm512_setzero_si512 ()), \
+					     (__mmask16) (U), \
 					     (R)))
 
-#define _mm256_cvtts_roundpd_epu64(A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvttpd2uqqs256_mask_round ((__v4df) (A), \
-					     (__v4di) \
-					     (_mm256_undefined_si256 ()), \
+#define _mm512_cvtts_roundps_epu64(A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) (A), \
+					     (__v8di) \
+					     (_mm512_undefined_si512 ()), \
 					     (__mmask8) (-1), \
 					     (R)))
 
-#define _mm256_mask_cvtts_roundpd_epu64(W, U, A, R) \
-  ((__m256i) __builtin_ia32_cvttpd2uqqs256_mask_round ((__v4df) (A), \
-						       (__v4di) (W), \
+#define _mm512_mask_cvtts_roundps_epu64(W, U, A, R) \
+  ((__m512i) __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) (A), \
+						       (__v8di) (W), \
 						       (__mmask8) (U), \
 						       (R)))
 
-#define _mm256_maskz_cvtts_roundpd_epu64(U, A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvttpd2uqqs256_mask_round ((__v4df) (A), \
-					     (__v4di) \
-					     (_mm256_setzero_si256 ()), \
+#define _mm512_maskz_cvtts_roundps_epu64(U, A, R) \
+  ((__m512i) \
+   __builtin_ia32_cvttps2uqqs512_mask_round ((__v8sf) (A), \
+					     (__v8di) \
+					     (_mm512_setzero_si512 ()), \
 					     (__mmask8) (U), \
 					     (R)))
+#endif
 
-#define _mm256_cvtts_roundps_epi32(A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvttps2dqs256_mask_round ((__v8sf) (A), \
-					    (__v8si) \
-					    (_mm256_undefined_si256 ()), \
-					    (__mmask8) (-1), \
-					    (R)))
-
-#define _mm256_mask_cvtts_roundps_epi32(W, U, A, R) \
-  ((__m256i) __builtin_ia32_cvttps2dqs256_mask_round ((__v8sf) (A), \
-						      (__v8si) (W), \
-						      (__mmask8) (U), \
-						      (R)))
-
-#define _mm256_maskz_cvtts_roundps_epi32(U, A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvttps2dqs256_mask_round ((__v8sf) (A), \
-					    (__v8si) \
-					    (_mm256_setzero_si256 ()), \
-					    (__mmask8) (U), \
-					    (R)))
-
-#define _mm256_cvtts_roundps_epi64(A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvttps2qqs256_mask_round ((__v4sf) (A), \
-					    (__v4di) \
-					    (_mm256_undefined_si256 ()), \
-					    (__mmask8) (-1), \
-					    (R)))
-
-#define _mm256_mask_cvtts_roundps_epi64(W, U, A, R) \
-  ((__m256i) __builtin_ia32_cvttps2qqs256_mask_round ((__v4sf) (A), \
-						      (__v4di) (W), \
-						      (__mmask8) (U), \
-						      (R)))
-
-#define _mm256_maskz_cvtts_roundps_epi64(U, A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvttps2qqs256_mask_round ((__v4sf) (A), \
-					    (__v4di) \
-					    (_mm256_setzero_si256 ()), \
-					    (__mmask8) (U), \
-					    (R)))
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtts_sd_epi32 (__m128d __A)
+{
+  return (int) __builtin_ia32_cvttsd2sis32_round ((__v2df) __A,
+						  _MM_FROUND_CUR_DIRECTION);
+}
 
-#define _mm256_cvtts_roundps_epu32(A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvttps2udqs256_mask_round ((__v8sf) (A), \
-					     (__v8si) \
-					     (_mm256_undefined_si256 ()), \
-					     (__mmask8) (-1), \
-					     (R)))
+extern __inline unsigned int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtts_sd_epu32 (__m128d __A)
+{
+  return (unsigned int) __builtin_ia32_cvttsd2usis32_round ((__v2df) __A,
+							    _MM_FROUND_CUR_DIRECTION);
+}
 
-#define _mm256_mask_cvtts_roundps_epu32(W, U, A, R) \
-  ((__m256i) __builtin_ia32_cvttps2udqs256_mask_round ((__v8sf) (A), \
-						       (__v8si) (W), \
-						       (__mmask8) (U), \
-						       (R)))
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtts_ss_epi32 (__m128 __A)
+{
+  return (int) __builtin_ia32_cvttss2sis32_round ((__v4sf) __A,
+						  _MM_FROUND_CUR_DIRECTION);
+}
 
-#define _mm256_maskz_cvtts_roundps_epu32(U, A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvttps2udqs256_mask_round ((__v8sf) (A), \
-					     (__v8si) \
-					     (_mm256_setzero_si256 ()), \
-					     (__mmask8) (U), \
-					     (R)))
+extern __inline unsigned int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtts_ss_epu32 (__m128 __A)
+{
+  return (unsigned int) __builtin_ia32_cvttss2usis32_round ((__v4sf) __A,
+							    _MM_FROUND_CUR_DIRECTION);
+}
 
-#define _mm256_cvtts_roundps_epu64(A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvttps2uqqs256_mask_round ((__v4sf) (A), \
-					     (__v4di) \
-					     (_mm256_undefined_si256 ()), \
-					     (__mmask8) (-1), \
-					     (R)))
+#ifdef __OPTIMIZE__
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtts_roundsd_epi32 (__m128d __A, const int __R)
+{
+  return (int) __builtin_ia32_cvttsd2sis32_round ((__v2df) __A,
+						  __R);
+}
 
-#define _mm256_mask_cvtts_roundps_epu64(W, U, A, R) \
-  ((__m256i) __builtin_ia32_cvttps2uqqs256_mask_round ((__v4sf) (A), \
-						       (__v4di) (W), \
-						       (__mmask8) (U), \
-						       (R)))
+extern __inline unsigned int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtts_roundsd_epu32 (__m128d __A, const int __R)
+{
+  return (unsigned int) __builtin_ia32_cvttsd2usis32_round ((__v2df) __A,
+							    __R);
+}
 
-#define _mm256_maskz_cvtts_roundps_epu64(U, A, R) \
-  ((__m256i) \
-   __builtin_ia32_cvttps2uqqs256_mask_round ((__v4sf) (A), \
-					     (__v4di) \
-					     (_mm256_setzero_si256 ()), \
-					     (__mmask8) (U), \
-					     (R)))
+extern __inline int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtts_roundss_epi32 (__m128 __A, const int __R)
+{
+  return (int) __builtin_ia32_cvttss2sis32_round ((__v4sf) __A,
+						  __R);
+}
 
+extern __inline unsigned int
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtts_roundss_epu32 (__m128 __A, const int __R)
+{
+  return (unsigned int) __builtin_ia32_cvttss2usis32_round ((__v4sf) __A,
+							    __R);
+}
+#else
 #define _mm_cvtts_roundsd_epi32(A, R) \
   ((int) __builtin_ia32_cvttsd2sis32_round ((__v2df) (A), \
 					    (R)))
@@ -1938,6 +2850,39 @@ _mm_cvtts_roundss_epu32 (__m128 __A, const int __R)
 #endif
 
 #ifdef __x86_64__
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtts_sd_epi64 (__m128d __A)
+{
+  return (long long) __builtin_ia32_cvttsd2sis64_round ((__v2df) __A,
+							_MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtts_sd_epu64 (__m128d __A)
+{
+  return (unsigned long long) __builtin_ia32_cvttsd2usis64_round ((__v2df) __A,
+								  _MM_FROUND_CUR_DIRECTION);
+}
+
+extern __inline long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtts_ss_epi64 (__m128 __A)
+{
+  return (long long) __builtin_ia32_cvttss2sis64_round ((__v4sf) __A,
+							_MM_FROUND_CUR_DIRECTION);
+}
+
+
+extern __inline unsigned long long
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_cvtts_ss_epu64 (__m128 __A)
+{
+  return (unsigned long long) __builtin_ia32_cvttss2usis64_round ((__v4sf) __A,
+								  _MM_FROUND_CUR_DIRECTION);
+}
+
 #ifdef __OPTIMIZE__
 extern __inline long long
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
@@ -1963,7 +2908,6 @@ _mm_cvtts_roundss_epi64 (__m128 __A, const int __R)
 							__R);
 }
 
-
 extern __inline unsigned long long
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm_cvtts_roundss_epu64 (__m128 __A, const int __R)
@@ -1972,7 +2916,6 @@ _mm_cvtts_roundss_epu64 (__m128 __A, const int __R)
 								  __R);
 }
 #else
-
 #define _mm_cvtts_roundsd_epi64(A, R) \
   ((long long) __builtin_ia32_cvttsd2sis64_round ((__v2df) (A), \
 						  (R)))
@@ -1991,9 +2934,9 @@ _mm_cvtts_roundss_epu64 (__m128 __A, const int __R)
 #endif
 #endif /* __x86_64__ */
 
-#ifdef __DISABLE_AVX10_2_256__
-#undef __DISABLE_AVX10_2_256__
+#ifdef __DISABLE_AVX10_2__
+#undef __DISABLE_AVX10_2__
 #pragma GCC pop_options
-#endif /* __DISABLE_AVX10_2_256__ */
+#endif /* __DISABLE_AVX10_2__ */
 
 #endif /* _AVX10_2SATCVTINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx2intrin.h b/gcc/config/i386/avx2intrin.h
index cb0801e..7dd231e 100644
--- a/gcc/config/i386/avx2intrin.h
+++ b/gcc/config/i386/avx2intrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2011-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/avx5124fmapsintrin.h b/gcc/config/i386/avx5124fmapsintrin.h
index 860f5ab..268de4b 100644
--- a/gcc/config/i386/avx5124fmapsintrin.h
+++ b/gcc/config/i386/avx5124fmapsintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2015-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2015-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/avx5124vnniwintrin.h b/gcc/config/i386/avx5124vnniwintrin.h
index 13f469c..d12deb4 100644
--- a/gcc/config/i386/avx5124vnniwintrin.h
+++ b/gcc/config/i386/avx5124vnniwintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2015-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2015-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/avx512bf16intrin.h b/gcc/config/i386/avx512bf16intrin.h
index 93bed30..6c087e6 100644
--- a/gcc/config/i386/avx512bf16intrin.h
+++ b/gcc/config/i386/avx512bf16intrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2019-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2019-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -28,9 +28,9 @@
 #ifndef _AVX512BF16INTRIN_H_INCLUDED
 #define _AVX512BF16INTRIN_H_INCLUDED
 
-#if !defined (__AVX512BF16__) || defined (__EVEX512__)
+#if !defined (__AVX512BF16__)
 #pragma GCC push_options
-#pragma GCC target("avx512bf16,no-evex512")
+#pragma GCC target("avx512bf16")
 #define __DISABLE_AVX512BF16__
 #endif /* __AVX512BF16__ */
 
@@ -42,17 +42,6 @@ _mm_cvtsbh_ss (__bf16 __A)
   return __builtin_ia32_cvtbf2sf (__A);
 }
 
-#ifdef __DISABLE_AVX512BF16__
-#undef __DISABLE_AVX512BF16__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX512BF16__ */
-
-#if !defined (__AVX512BF16__) || !defined (__EVEX512__)
-#pragma GCC push_options
-#pragma GCC target("avx512bf16,evex512")
-#define __DISABLE_AVX512BF16_512__
-#endif /* __AVX512BF16_512__ */
-
 /* Internal data types for implementing the intrinsics.  */
 typedef __bf16 __v32bf __attribute__ ((__vector_size__ (64)));
 
@@ -155,8 +144,8 @@ _mm512_mask_cvtpbh_ps (__m512 __S, __mmask16 __U, __m256bh __A)
 	 (__m512i)_mm512_cvtepi16_epi32 ((__m256i)__A), 16)));
 }
 
-#ifdef __DISABLE_AVX512BF16_512__
-#undef __DISABLE_AVX512BF16_512__
+#ifdef __DISABLE_AVX512BF16__
+#undef __DISABLE_AVX512BF16__
 #pragma GCC pop_options
 #endif /* __DISABLE_AVX512BF16_512__ */
 
diff --git a/gcc/config/i386/avx512bf16vlintrin.h b/gcc/config/i386/avx512bf16vlintrin.h
index 1cb9592..fd6d183 100644
--- a/gcc/config/i386/avx512bf16vlintrin.h
+++ b/gcc/config/i386/avx512bf16vlintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2019-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2019-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -28,9 +28,9 @@
 #ifndef _AVX512BF16VLINTRIN_H_INCLUDED
 #define _AVX512BF16VLINTRIN_H_INCLUDED
 
-#if !defined(__AVX512VL__) || !defined(__AVX512BF16__) || defined (__EVEX512__)
+#if !defined(__AVX512VL__) || !defined(__AVX512BF16__)
 #pragma GCC push_options
-#pragma GCC target("avx512bf16,avx512vl,no-evex512")
+#pragma GCC target("avx512bf16,avx512vl")
 #define __DISABLE_AVX512BF16VL__
 #endif /* __AVX512BF16__ */
 
diff --git a/gcc/config/i386/avx512bitalgintrin.h b/gcc/config/i386/avx512bitalgintrin.h
index dbabd7e..d7156f9 100644
--- a/gcc/config/i386/avx512bitalgintrin.h
+++ b/gcc/config/i386/avx512bitalgintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2017-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2017-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -28,9 +28,9 @@
 #ifndef _AVX512BITALGINTRIN_H_INCLUDED
 #define _AVX512BITALGINTRIN_H_INCLUDED
 
-#if !defined (__AVX512BITALG__) || !defined (__EVEX512__)
+#if !defined (__AVX512BITALG__)
 #pragma GCC push_options
-#pragma GCC target("avx512bitalg,evex512")
+#pragma GCC target("avx512bitalg")
 #define __DISABLE_AVX512BITALG__
 #endif /* __AVX512BITALG__ */
 
diff --git a/gcc/config/i386/avx512bitalgvlintrin.h b/gcc/config/i386/avx512bitalgvlintrin.h
index 4dd758e..cf9cff6 100644
--- a/gcc/config/i386/avx512bitalgvlintrin.h
+++ b/gcc/config/i386/avx512bitalgvlintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2023-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2023-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -28,9 +28,9 @@
 #ifndef _AVX512BITALGVLINTRIN_H_INCLUDED
 #define _AVX512BITALGVLINTRIN_H_INCLUDED
 
-#if !defined(__AVX512BITALG__) || !defined(__AVX512VL__) || defined (__EVEX512__)
+#if !defined(__AVX512BITALG__) || !defined(__AVX512VL__)
 #pragma GCC push_options
-#pragma GCC target("avx512bitalg,avx512vl,no-evex512")
+#pragma GCC target("avx512bitalg,avx512vl")
 #define __DISABLE_AVX512BITALGVL__
 #endif /* __AVX512BITALGVL__ */
 
diff --git a/gcc/config/i386/avx512bwintrin.h b/gcc/config/i386/avx512bwintrin.h
index 8991c9c..5e9eeaa 100644
--- a/gcc/config/i386/avx512bwintrin.h
+++ b/gcc/config/i386/avx512bwintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2014-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2014-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -28,9 +28,9 @@
 #ifndef _AVX512BWINTRIN_H_INCLUDED
 #define _AVX512BWINTRIN_H_INCLUDED
 
-#if !defined (__AVX512BW__) || defined (__EVEX512__)
+#if !defined (__AVX512BW__)
 #pragma GCC push_options
-#pragma GCC target("avx512bw,no-evex512")
+#pragma GCC target("avx512bw")
 #define __DISABLE_AVX512BW__
 #endif /* __AVX512BW__ */
 
@@ -199,7 +199,7 @@ _kunpackw_mask32 (__mmask16 __A, __mmask16 __B)
 					      (__mmask32) __B);
 }
 
-#if __OPTIMIZE__
+#ifdef __OPTIMIZE__
 extern __inline __mmask32
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _kshiftli_mask32 (__mmask32 __A, unsigned int __B)
@@ -346,17 +346,6 @@ _kandn_mask64 (__mmask64 __A, __mmask64 __B)
   return (__mmask64) __builtin_ia32_kandndi ((__mmask64) __A, (__mmask64) __B);
 }
 
-#ifdef __DISABLE_AVX512BW__
-#undef __DISABLE_AVX512BW__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX512BW__ */
-
-#if !defined (__AVX512BW__) || !defined (__EVEX512__)
-#pragma GCC push_options
-#pragma GCC target("avx512bw,evex512")
-#define __DISABLE_AVX512BW_512__
-#endif /* __AVX512BW_512__ */
-
 /* Internal data types for implementing the intrinsics.  */
 typedef short __v32hi __attribute__ ((__vector_size__ (64)));
 typedef short __v32hi_u __attribute__ ((__vector_size__ (64),	\
@@ -3369,8 +3358,8 @@ _mm512_bsrli_epi128 (__m512i __A, const int __N)
 
 #endif
 
-#ifdef __DISABLE_AVX512BW_512__
-#undef __DISABLE_AVX512BW_512__
+#ifdef __DISABLE_AVX512BW__
+#undef __DISABLE_AVX512BW__
 #pragma GCC pop_options
 #endif /* __DISABLE_AVX512BW_512__ */
 
diff --git a/gcc/config/i386/avx512cdintrin.h b/gcc/config/i386/avx512cdintrin.h
index 24ae280..5a92d25 100644
--- a/gcc/config/i386/avx512cdintrin.h
+++ b/gcc/config/i386/avx512cdintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2013-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2013-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -30,7 +30,7 @@
 
 #ifndef __AVX512CD__
 #pragma GCC push_options
-#pragma GCC target("avx512cd,evex512")
+#pragma GCC target("avx512cd")
 #define __DISABLE_AVX512CD__
 #endif /* __AVX512CD__ */
 
diff --git a/gcc/config/i386/avx512dqintrin.h b/gcc/config/i386/avx512dqintrin.h
index d9890c6..a7766b5 100644
--- a/gcc/config/i386/avx512dqintrin.h
+++ b/gcc/config/i386/avx512dqintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2014-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2014-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -28,9 +28,9 @@
 #ifndef _AVX512DQINTRIN_H_INCLUDED
 #define _AVX512DQINTRIN_H_INCLUDED
 
-#if !defined (__AVX512DQ__) || defined (__EVEX512__)
+#if !defined (__AVX512DQ__)
 #pragma GCC push_options
-#pragma GCC target("avx512dq,no-evex512")
+#pragma GCC target("avx512dq")
 #define __DISABLE_AVX512DQ__
 #endif /* __AVX512DQ__ */
 
@@ -120,7 +120,7 @@ _cvtmask8_u32 (__mmask8 __A)
 {
   return (unsigned int) __builtin_ia32_kmovb ((__mmask8 ) __A);
 }
-	
+
 extern __inline __mmask8
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _cvtu32_mask8 (unsigned int __A)
@@ -639,17 +639,6 @@ _mm_mask_fpclass_sd_mask (__mmask8 __U, __m128d __A, const int __imm)
 
 #endif
 
-#ifdef __DISABLE_AVX512DQ__
-#undef __DISABLE_AVX512DQ__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX512DQ__ */
-
-#if !defined (__AVX512DQ__) || !defined (__EVEX512__)
-#pragma GCC push_options
-#pragma GCC target("avx512dq,evex512")
-#define __DISABLE_AVX512DQ_512__
-#endif /* __AVX512DQ_512__ */
-
 extern __inline __m512d
 __attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
 _mm512_broadcast_f64x2 (__m128d __A)
@@ -2897,9 +2886,9 @@ _mm512_fpclass_ps_mask (__m512 __A, const int __imm)
 
 #endif
 
-#ifdef __DISABLE_AVX512DQ_512__
-#undef __DISABLE_AVX512DQ_512__
+#ifdef __DISABLE_AVX512DQ__
+#undef __DISABLE_AVX512DQ__
 #pragma GCC pop_options
-#endif /* __DISABLE_AVX512DQ_512__ */
+#endif /* __DISABLE_AVX512DQ__ */
 
 #endif /* _AVX512DQINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx512erintrin.h b/gcc/config/i386/avx512erintrin.h
index d8c62c9..4318a44 100644
--- a/gcc/config/i386/avx512erintrin.h
+++ b/gcc/config/i386/avx512erintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2013-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2013-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/avx512fintrin.h b/gcc/config/i386/avx512fintrin.h
index 3b6749d..4469f73 100644
--- a/gcc/config/i386/avx512fintrin.h
+++ b/gcc/config/i386/avx512fintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2013-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2013-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -28,9 +28,9 @@
 #ifndef _AVX512FINTRIN_H_INCLUDED
 #define _AVX512FINTRIN_H_INCLUDED
 
-#if !defined (__AVX512F__) || defined (__EVEX512__)
+#if !defined (__AVX512F__)
 #pragma GCC push_options
-#pragma GCC target("avx512f,no-evex512")
+#pragma GCC target("avx512f")
 #define __DISABLE_AVX512F__
 #endif /* __AVX512F__ */
 
@@ -54,11 +54,12 @@ typedef enum
   _MM_MANT_SIGN_nan		/* DEST = NaN if sign(SRC) = 1 */
 } _MM_MANTISSA_SIGN_ENUM;
 
-/* These _mm{,256}_avx512* intrins are duplicated from their _mm{,256}_* forms
-   from AVX2 or before.  We need to add them to prevent target option mismatch
-   when calling AVX512 intrins implemented with these intrins under no-evex512
-   function attribute.  All AVX512 intrins calling those AVX2 intrins or
-   before will change their calls to these AVX512 version.  */
+/* These _mm{,256}_avx512* intrins are initially duplicated from their
+   _mm{,256}_* forms from AVX2 or before.  At that time, e need to add them
+   to prevent target option mismatch when calling AVX512 intrins implemented
+   with these intrins under no-evex512 function attribute.  Thess intrins will
+   still be here to avoid huge changes.  All AVX512 intrins calling those AVX2
+   intrins or before have changed their calls to these AVX512 version.  */
 extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_avx512_undefined_ps (void)
 {
@@ -3802,17 +3803,6 @@ _mm_mask_cmp_ss_mask (__mmask8 __M, __m128 __X, __m128 __Y, const int __P)
 
 #endif
 
-#ifdef __DISABLE_AVX512F__
-#undef __DISABLE_AVX512F__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX512F__ */
-
-#if !defined (__AVX512F__) || !defined (__EVEX512__)
-#pragma GCC push_options
-#pragma GCC target("avx512f,evex512")
-#define __DISABLE_AVX512F_512__
-#endif /* __AVX512F_512__ */
-
 /* Internal data types for implementing the intrinsics.  */
 typedef double __v8df __attribute__ ((__vector_size__ (64)));
 typedef float __v16sf __attribute__ ((__vector_size__ (64)));
@@ -16609,9 +16599,9 @@ _mm512_mask_reduce_max_pd (__mmask8 __U, __m512d __A)
 
 #undef __MM512_REDUCE_OP
 
-#ifdef __DISABLE_AVX512F_512__
-#undef __DISABLE_AVX512F_512__
+#ifdef __DISABLE_AVX512F__
+#undef __DISABLE_AVX512F__
 #pragma GCC pop_options
-#endif /* __DISABLE_AVX512F_512__ */
+#endif /* __DISABLE_AVX512F__ */
 
 #endif /* _AVX512FINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx512fp16intrin.h b/gcc/config/i386/avx512fp16intrin.h
index 1869a92..471ec05 100644
--- a/gcc/config/i386/avx512fp16intrin.h
+++ b/gcc/config/i386/avx512fp16intrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2019-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2019-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -28,9 +28,9 @@
 #ifndef _AVX512FP16INTRIN_H_INCLUDED
 #define _AVX512FP16INTRIN_H_INCLUDED
 
-#if !defined (__AVX512FP16__) || defined (__EVEX512__)
+#if !defined (__AVX512FP16__)
 #pragma GCC push_options
-#pragma GCC target("avx512fp16,no-evex512")
+#pragma GCC target("avx512fp16")
 #define __DISABLE_AVX512FP16__
 #endif /* __AVX512FP16__ */
 
@@ -2852,17 +2852,6 @@ _mm_maskz_fmul_round_sch (__mmask8 __A, __m128h __B, __m128h __C, const int __E)
 #define _mm_maskz_cmul_round_sch(U, A, B, R)			      \
   _mm_maskz_fcmul_round_sch ((U), (A), (B), (R))
 
-#ifdef __DISABLE_AVX512FP16__
-#undef __DISABLE_AVX512FP16__
-#pragma GCC pop_options
-#endif /* __DISABLE_AVX512FP16__ */
-
-#if !defined (__AVX512FP16__) || !defined (__EVEX512__)
-#pragma GCC push_options
-#pragma GCC target("avx512fp16,evex512")
-#define __DISABLE_AVX512FP16_512__
-#endif /* __AVX512FP16_512__ */
-
 typedef _Float16 __v32hf __attribute__ ((__vector_size__ (64)));
 typedef _Float16 __m512h __attribute__ ((__vector_size__ (64), __may_alias__));
 typedef _Float16 __m512h_u __attribute__ ((__vector_size__ (64),	\
@@ -3961,11 +3950,11 @@ _mm512_fpclass_ph_mask (__m512h __A, const int __imm)
 #else
 #define _mm512_mask_fpclass_ph_mask(u, x, c)				\
   ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), \
-						 (int) (c),(__mmask8)(u)))
+						 (int) (c),(__mmask32)(u)))
 
 #define _mm512_fpclass_ph_mask(x, c)                                    \
   ((__mmask32) __builtin_ia32_fpclassph512_mask ((__v32hf) (__m512h) (x), \
-						 (int) (c),(__mmask8)-1))
+						 (int) (c),(__mmask32)-1))
 #endif /* __OPIMTIZE__ */
 
 /* Intrinsics vgetexpph.  */
@@ -7238,9 +7227,9 @@ _mm512_set1_pch (_Float16 _Complex __A)
 #define _mm512_maskz_cmul_round_pch(U, A, B, R)			      \
   _mm512_maskz_fcmul_round_pch ((U), (A), (B), (R))
 
-#ifdef __DISABLE_AVX512FP16_512__
-#undef __DISABLE_AVX512FP16_512__
+#ifdef __DISABLE_AVX512FP16__
+#undef __DISABLE_AVX512FP16__
 #pragma GCC pop_options
-#endif /* __DISABLE_AVX512FP16_512__ */
+#endif /* __DISABLE_AVX512FP16__ */
 
 #endif /* _AVX512FP16INTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx512fp16vlintrin.h b/gcc/config/i386/avx512fp16vlintrin.h
index 405a06b..cb98310 100644
--- a/gcc/config/i386/avx512fp16vlintrin.h
+++ b/gcc/config/i386/avx512fp16vlintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2019-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2019-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -28,9 +28,9 @@
 #ifndef __AVX512FP16VLINTRIN_H_INCLUDED
 #define __AVX512FP16VLINTRIN_H_INCLUDED
 
-#if !defined(__AVX512VL__) || !defined(__AVX512FP16__) || defined (__EVEX512__)
+#if !defined(__AVX512VL__) || !defined(__AVX512FP16__)
 #pragma GCC push_options
-#pragma GCC target("avx512fp16,avx512vl,no-evex512")
+#pragma GCC target("avx512fp16,avx512vl")
 #define __DISABLE_AVX512FP16VL__
 #endif /* __AVX512FP16VL__ */
 
diff --git a/gcc/config/i386/avx512ifmaintrin.h b/gcc/config/i386/avx512ifmaintrin.h
index eb09c97..56790c0 100644
--- a/gcc/config/i386/avx512ifmaintrin.h
+++ b/gcc/config/i386/avx512ifmaintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2013-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2013-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -28,9 +28,9 @@
 #ifndef _AVX512IFMAINTRIN_H_INCLUDED
 #define _AVX512IFMAINTRIN_H_INCLUDED
 
-#if !defined (__AVX512IFMA__) || !defined (__EVEX512__)
+#if !defined (__AVX512IFMA__)
 #pragma GCC push_options
-#pragma GCC target("avx512ifma,evex512")
+#pragma GCC target("avx512ifma")
 #define __DISABLE_AVX512IFMA__
 #endif /* __AVX512IFMA__ */
 
diff --git a/gcc/config/i386/avx512ifmavlintrin.h b/gcc/config/i386/avx512ifmavlintrin.h
index a8171a1..6b849c8 100644
--- a/gcc/config/i386/avx512ifmavlintrin.h
+++ b/gcc/config/i386/avx512ifmavlintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2013-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2013-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -28,9 +28,9 @@
 #ifndef _AVX512IFMAVLINTRIN_H_INCLUDED
 #define _AVX512IFMAVLINTRIN_H_INCLUDED
 
-#if !defined(__AVX512VL__) || !defined(__AVX512IFMA__) || defined (__EVEX512__)
+#if !defined(__AVX512VL__) || !defined(__AVX512IFMA__)
 #pragma GCC push_options
-#pragma GCC target("avx512ifma,avx512vl,no-evex512")
+#pragma GCC target("avx512ifma,avx512vl")
 #define __DISABLE_AVX512IFMAVL__
 #endif /* __AVX512IFMAVL__ */
 
diff --git a/gcc/config/i386/avx512pfintrin.h b/gcc/config/i386/avx512pfintrin.h
index 7a4959b..1c4f68f 100644
--- a/gcc/config/i386/avx512pfintrin.h
+++ b/gcc/config/i386/avx512pfintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2013-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2013-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/avx512vbmi2intrin.h b/gcc/config/i386/avx512vbmi2intrin.h
index 0ac6e32..e8bfe1d 100644
--- a/gcc/config/i386/avx512vbmi2intrin.h
+++ b/gcc/config/i386/avx512vbmi2intrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2013-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2013-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -28,9 +28,9 @@
 #ifndef __AVX512VBMI2INTRIN_H_INCLUDED
 #define __AVX512VBMI2INTRIN_H_INCLUDED
 
-#if !defined(__AVX512VBMI2__) || !defined (__EVEX512__)
+#if !defined(__AVX512VBMI2__)
 #pragma GCC push_options
-#pragma GCC target("avx512vbmi2,evex512")
+#pragma GCC target("avx512vbmi2")
 #define __DISABLE_AVX512VBMI2__
 #endif /* __AVX512VBMI2__ */
 
diff --git a/gcc/config/i386/avx512vbmi2vlintrin.h b/gcc/config/i386/avx512vbmi2vlintrin.h
index bb37872..5cdfebd 100644
--- a/gcc/config/i386/avx512vbmi2vlintrin.h
+++ b/gcc/config/i386/avx512vbmi2vlintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2013-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2013-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -28,9 +28,9 @@
 #ifndef _AVX512VBMI2VLINTRIN_H_INCLUDED
 #define _AVX512VBMI2VLINTRIN_H_INCLUDED
 
-#if !defined(__AVX512VL__) || !defined(__AVX512VBMI2__) || defined (__EVEX512__)
+#if !defined(__AVX512VL__) || !defined(__AVX512VBMI2__)
 #pragma GCC push_options
-#pragma GCC target("avx512vbmi2,avx512vl,no-evex512")
+#pragma GCC target("avx512vbmi2,avx512vl")
 #define __DISABLE_AVX512VBMI2VL__
 #endif /* __AVX512VBMIVL__ */
 
diff --git a/gcc/config/i386/avx512vbmiintrin.h b/gcc/config/i386/avx512vbmiintrin.h
index 0db0273..5f5e342 100644
--- a/gcc/config/i386/avx512vbmiintrin.h
+++ b/gcc/config/i386/avx512vbmiintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2013-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2013-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -28,9 +28,9 @@
 #ifndef _AVX512VBMIINTRIN_H_INCLUDED
 #define _AVX512VBMIINTRIN_H_INCLUDED
 
-#if !defined (__AVX512VBMI__) || !defined (__EVEX512__)
+#if !defined (__AVX512VBMI__)
 #pragma GCC push_options
-#pragma GCC target("avx512vbmi,evex512")
+#pragma GCC target("avx512vbmi")
 #define __DISABLE_AVX512VBMI__
 #endif /* __AVX512VBMI__ */
 
diff --git a/gcc/config/i386/avx512vbmivlintrin.h b/gcc/config/i386/avx512vbmivlintrin.h
index 22d940e..037ea93 100644
--- a/gcc/config/i386/avx512vbmivlintrin.h
+++ b/gcc/config/i386/avx512vbmivlintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2013-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2013-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -28,9 +28,9 @@
 #ifndef _AVX512VBMIVLINTRIN_H_INCLUDED
 #define _AVX512VBMIVLINTRIN_H_INCLUDED
 
-#if !defined(__AVX512VL__) || !defined(__AVX512VBMI__) || defined (__EVEX512__)
+#if !defined(__AVX512VL__) || !defined(__AVX512VBMI__)
 #pragma GCC push_options
-#pragma GCC target("avx512vbmi,avx512vl,no-evex512")
+#pragma GCC target("avx512vbmi,avx512vl")
 #define __DISABLE_AVX512VBMIVL__
 #endif /* __AVX512VBMIVL__ */
 
diff --git a/gcc/config/i386/avx512vlbwintrin.h b/gcc/config/i386/avx512vlbwintrin.h
index 98b9099..537e408 100644
--- a/gcc/config/i386/avx512vlbwintrin.h
+++ b/gcc/config/i386/avx512vlbwintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2014-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2014-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -28,9 +28,9 @@
 #ifndef _AVX512VLBWINTRIN_H_INCLUDED
 #define _AVX512VLBWINTRIN_H_INCLUDED
 
-#if !defined(__AVX512VL__) || !defined(__AVX512BW__) || defined (__EVEX512__)
+#if !defined(__AVX512VL__) || !defined(__AVX512BW__)
 #pragma GCC push_options
-#pragma GCC target("avx512vl,avx512bw,no-evex512")
+#pragma GCC target("avx512vl,avx512bw")
 #define __DISABLE_AVX512VLBW__
 #endif /* __AVX512VLBW__ */
 
diff --git a/gcc/config/i386/avx512vldqintrin.h b/gcc/config/i386/avx512vldqintrin.h
index eb2b63d..5783dbe 100644
--- a/gcc/config/i386/avx512vldqintrin.h
+++ b/gcc/config/i386/avx512vldqintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2014-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2014-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -28,9 +28,9 @@
 #ifndef _AVX512VLDQINTRIN_H_INCLUDED
 #define _AVX512VLDQINTRIN_H_INCLUDED
 
-#if !defined(__AVX512VL__) || !defined(__AVX512DQ__) || defined (__EVEX512__)
+#if !defined(__AVX512VL__) || !defined(__AVX512DQ__)
 #pragma GCC push_options
-#pragma GCC target("avx512vl,avx512dq,no-evex512")
+#pragma GCC target("avx512vl,avx512dq")
 #define __DISABLE_AVX512VLDQ__
 #endif /* __AVX512VLDQ__ */
 
diff --git a/gcc/config/i386/avx512vlintrin.h b/gcc/config/i386/avx512vlintrin.h
index ca3b578..50930cd 100644
--- a/gcc/config/i386/avx512vlintrin.h
+++ b/gcc/config/i386/avx512vlintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2014-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2014-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -28,9 +28,9 @@
 #ifndef _AVX512VLINTRIN_H_INCLUDED
 #define _AVX512VLINTRIN_H_INCLUDED
 
-#if !defined (__AVX512VL__) || defined (__EVEX512__)
+#if !defined (__AVX512VL__)
 #pragma GCC push_options
-#pragma GCC target("avx512vl,no-evex512")
+#pragma GCC target("avx512vl")
 #define __DISABLE_AVX512VL__
 #endif /* __AVX512VL__ */
 
@@ -13650,7 +13650,7 @@ _mm256_permutex_pd (__m256d __X, const int __M)
 
 #if !defined (__AVX512CD__) || !defined (__AVX512VL__)
 #pragma GCC push_options
-#pragma GCC target("avx512vl,avx512cd,no-evex512")
+#pragma GCC target("avx512vl,avx512cd")
 #define __DISABLE_AVX512VLCD__
 #endif
 
diff --git a/gcc/config/i386/avx512vnniintrin.h b/gcc/config/i386/avx512vnniintrin.h
index 168e9aa..fe7b663 100644
--- a/gcc/config/i386/avx512vnniintrin.h
+++ b/gcc/config/i386/avx512vnniintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2013-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2013-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -28,9 +28,9 @@
 #ifndef __AVX512VNNIINTRIN_H_INCLUDED
 #define __AVX512VNNIINTRIN_H_INCLUDED
 
-#if !defined(__AVX512VNNI__) || !defined (__EVEX512__)
+#if !defined(__AVX512VNNI__)
 #pragma GCC push_options
-#pragma GCC target("avx512vnni,evex512")
+#pragma GCC target("avx512vnni")
 #define __DISABLE_AVX512VNNI__
 #endif /* __AVX512VNNI__ */
 
diff --git a/gcc/config/i386/avx512vnnivlintrin.h b/gcc/config/i386/avx512vnnivlintrin.h
index 88d7dfc..01c3c91 100644
--- a/gcc/config/i386/avx512vnnivlintrin.h
+++ b/gcc/config/i386/avx512vnnivlintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2013-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2013-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -28,9 +28,9 @@
 #ifndef _AVX512VNNIVLINTRIN_H_INCLUDED
 #define _AVX512VNNIVLINTRIN_H_INCLUDED
 
-#if !defined(__AVX512VL__) || !defined(__AVX512VNNI__) || defined (__EVEX512__)
+#if !defined(__AVX512VL__) || !defined(__AVX512VNNI__)
 #pragma GCC push_options
-#pragma GCC target("avx512vnni,avx512vl,no-evex512")
+#pragma GCC target("avx512vnni,avx512vl")
 #define __DISABLE_AVX512VNNIVL__
 #endif /* __AVX512VNNIVL__ */
 
diff --git a/gcc/config/i386/avx512vp2intersectintrin.h b/gcc/config/i386/avx512vp2intersectintrin.h
index 5061b20..50f7ead 100644
--- a/gcc/config/i386/avx512vp2intersectintrin.h
+++ b/gcc/config/i386/avx512vp2intersectintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2019-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2019-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -28,9 +28,9 @@
 #ifndef _AVX512VP2INTERSECTINTRIN_H_INCLUDED
 #define _AVX512VP2INTERSECTINTRIN_H_INCLUDED
 
-#if !defined(__AVX512VP2INTERSECT__) || !defined (__EVEX512__)
+#if !defined(__AVX512VP2INTERSECT__)
 #pragma GCC push_options
-#pragma GCC target("avx512vp2intersect,evex512")
+#pragma GCC target("avx512vp2intersect")
 #define __DISABLE_AVX512VP2INTERSECT__
 #endif /* __AVX512VP2INTERSECT__ */
 
diff --git a/gcc/config/i386/avx512vp2intersectvlintrin.h b/gcc/config/i386/avx512vp2intersectvlintrin.h
index 546ccd9..3e0a8ab 100644
--- a/gcc/config/i386/avx512vp2intersectvlintrin.h
+++ b/gcc/config/i386/avx512vp2intersectvlintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2019-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2019-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -28,10 +28,9 @@
 #ifndef _AVX512VP2INTERSECTVLINTRIN_H_INCLUDED
 #define _AVX512VP2INTERSECTVLINTRIN_H_INCLUDED
 
-#if !defined(__AVX512VP2INTERSECT__) || !defined(__AVX512VL__) \
-  || defined (__EVEX512__)
+#if !defined(__AVX512VP2INTERSECT__) || !defined(__AVX512VL__)
 #pragma GCC push_options
-#pragma GCC target("avx512vp2intersect,avx512vl,no-evex512")
+#pragma GCC target("avx512vp2intersect,avx512vl")
 #define __DISABLE_AVX512VP2INTERSECTVL__
 #endif /* __AVX512VP2INTERSECTVL__ */
 
diff --git a/gcc/config/i386/avx512vpopcntdqintrin.h b/gcc/config/i386/avx512vpopcntdqintrin.h
index e1fb322..e4b89ea 100644
--- a/gcc/config/i386/avx512vpopcntdqintrin.h
+++ b/gcc/config/i386/avx512vpopcntdqintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2017-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2017-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -28,9 +28,9 @@
 #ifndef _AVX512VPOPCNTDQINTRIN_H_INCLUDED
 #define _AVX512VPOPCNTDQINTRIN_H_INCLUDED
 
-#if !defined (__AVX512VPOPCNTDQ__) || !defined (__EVEX512__)
+#if !defined (__AVX512VPOPCNTDQ__)
 #pragma GCC push_options
-#pragma GCC target("avx512vpopcntdq,evex512")
+#pragma GCC target("avx512vpopcntdq")
 #define __DISABLE_AVX512VPOPCNTDQ__
 #endif /* __AVX512VPOPCNTDQ__ */
 
diff --git a/gcc/config/i386/avx512vpopcntdqvlintrin.h b/gcc/config/i386/avx512vpopcntdqvlintrin.h
index 5352355..8eb1d42 100644
--- a/gcc/config/i386/avx512vpopcntdqvlintrin.h
+++ b/gcc/config/i386/avx512vpopcntdqvlintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2017-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2017-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -28,10 +28,9 @@
 #ifndef _AVX512VPOPCNTDQVLINTRIN_H_INCLUDED
 #define _AVX512VPOPCNTDQVLINTRIN_H_INCLUDED
 
-#if !defined(__AVX512VPOPCNTDQ__) || !defined(__AVX512VL__) \
-  || defined (__EVEX512__)
+#if !defined(__AVX512VPOPCNTDQ__) || !defined(__AVX512VL__)
 #pragma GCC push_options
-#pragma GCC target("avx512vpopcntdq,avx512vl,no-evex512")
+#pragma GCC target("avx512vpopcntdq,avx512vl")
 #define __DISABLE_AVX512VPOPCNTDQVL__
 #endif /* __AVX512VPOPCNTDQVL__ */
 
diff --git a/gcc/config/i386/avxifmaintrin.h b/gcc/config/i386/avxifmaintrin.h
index 7438630..b855e62 100644
--- a/gcc/config/i386/avxifmaintrin.h
+++ b/gcc/config/i386/avxifmaintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2020-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2020-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/avxintrin.h b/gcc/config/i386/avxintrin.h
index ec9b990..de944ac 100644
--- a/gcc/config/i386/avxintrin.h
+++ b/gcc/config/i386/avxintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2008-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2008-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/avxmath.h b/gcc/config/i386/avxmath.h
index c738273..8fb0965 100644
--- a/gcc/config/i386/avxmath.h
+++ b/gcc/config/i386/avxmath.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2010-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2010-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/i386/avxneconvertintrin.h b/gcc/config/i386/avxneconvertintrin.h
index 50c7939..9906063 100644
--- a/gcc/config/i386/avxneconvertintrin.h
+++ b/gcc/config/i386/avxneconvertintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2021-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2021-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/avxvnniint16intrin.h b/gcc/config/i386/avxvnniint16intrin.h
index f26c313..5682df6 100644
--- a/gcc/config/i386/avxvnniint16intrin.h
+++ b/gcc/config/i386/avxvnniint16intrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2023-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2023-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/avxvnniint8intrin.h b/gcc/config/i386/avxvnniint8intrin.h
index f72e3b3..10a2112 100644
--- a/gcc/config/i386/avxvnniint8intrin.h
+++ b/gcc/config/i386/avxvnniint8intrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2020-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2020-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/avxvnniintrin.h b/gcc/config/i386/avxvnniintrin.h
index f085e53..bb8f417 100644
--- a/gcc/config/i386/avxvnniintrin.h
+++ b/gcc/config/i386/avxvnniintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2020-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2020-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/bdver1.md b/gcc/config/i386/bdver1.md
index 149814d..c27af14 100644
--- a/gcc/config/i386/bdver1.md
+++ b/gcc/config/i386/bdver1.md
@@ -1,4 +1,4 @@
-;; Copyright (C) 2010-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2010-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/i386/bdver3.md b/gcc/config/i386/bdver3.md
index 9b067cd..0d6a6f5 100644
--- a/gcc/config/i386/bdver3.md
+++ b/gcc/config/i386/bdver3.md
@@ -1,4 +1,4 @@
-;; Copyright (C) 2012-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2012-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/i386/biarch64.h b/gcc/config/i386/biarch64.h
index e7c14bc..725c7e4 100644
--- a/gcc/config/i386/biarch64.h
+++ b/gcc/config/i386/biarch64.h
@@ -1,7 +1,7 @@
 /* Make configure files to produce biarch compiler defaulting to 64bit mode.
    This file must be included very first, while the OS specific file later
-   to overwrite otherwise wrong defaults. 
-   Copyright (C) 2001-2024 Free Software Foundation, Inc.
+   to overwrite otherwise wrong defaults.
+   Copyright (C) 2001-2025 Free Software Foundation, Inc.
    Contributed by Bo Thorsen <bo@suse.de>.
 
 This file is part of GCC.
diff --git a/gcc/config/i386/biarchx32.h b/gcc/config/i386/biarchx32.h
index 44a7bb7..b18d424 100644
--- a/gcc/config/i386/biarchx32.h
+++ b/gcc/config/i386/biarchx32.h
@@ -1,7 +1,7 @@
 /* Make configure files to produce biarch compiler defaulting to x32 mode.
    This file must be included very first, while the OS specific file later
    to overwrite otherwise wrong defaults.
-   Copyright (C) 2012-2024 Free Software Foundation, Inc.
+   Copyright (C) 2012-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/i386/bmi2intrin.h b/gcc/config/i386/bmi2intrin.h
index 0ef0ebe..fe3e9b5 100644
--- a/gcc/config/i386/bmi2intrin.h
+++ b/gcc/config/i386/bmi2intrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2011-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/bmiintrin.h b/gcc/config/i386/bmiintrin.h
index 227eade..5a23429 100644
--- a/gcc/config/i386/bmiintrin.h
+++ b/gcc/config/i386/bmiintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2010-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2010-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/bmmintrin.h b/gcc/config/i386/bmmintrin.h
index 2ce675c..b81c56e 100644
--- a/gcc/config/i386/bmmintrin.h
+++ b/gcc/config/i386/bmmintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2007-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/bsd.h b/gcc/config/i386/bsd.h
index 4ed832ab..a27982e 100644
--- a/gcc/config/i386/bsd.h
+++ b/gcc/config/i386/bsd.h
@@ -1,7 +1,7 @@
 /* Definitions for BSD assembler syntax for Intel 386
    (actually AT&T syntax for insns and operands,
    adapted to BSD conventions for symbol names and debugging.)
-   Copyright (C) 1988-2024 Free Software Foundation, Inc.
+   Copyright (C) 1988-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/i386/btver2.md b/gcc/config/i386/btver2.md
index dbab4b9..4c2b5b0 100644
--- a/gcc/config/i386/btver2.md
+++ b/gcc/config/i386/btver2.md
@@ -1,4 +1,4 @@
-;; Copyright (C) 2012-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2012-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/i386/cet.h b/gcc/config/i386/cet.h
index 00b19ec..4f9e5ca 100644
--- a/gcc/config/i386/cet.h
+++ b/gcc/config/i386/cet.h
@@ -1,5 +1,5 @@
 /* ELF program property for Intel CET.
-   Copyright (C) 2017-2024 Free Software Foundation, Inc.
+   Copyright (C) 2017-2025 Free Software Foundation, Inc.
 
    This file is free software; you can redistribute it and/or modify it
    under the terms of the GNU General Public License as published by the
diff --git a/gcc/config/i386/cetintrin.h b/gcc/config/i386/cetintrin.h
index 545cb9c..8e0e5ad 100644
--- a/gcc/config/i386/cetintrin.h
+++ b/gcc/config/i386/cetintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2015-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2015-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/cldemoteintrin.h b/gcc/config/i386/cldemoteintrin.h
index 048ec5e..c4c029d 100644
--- a/gcc/config/i386/cldemoteintrin.h
+++ b/gcc/config/i386/cldemoteintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2018-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2018-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/clflushoptintrin.h b/gcc/config/i386/clflushoptintrin.h
index 1d9f2c8..752bbeb 100644
--- a/gcc/config/i386/clflushoptintrin.h
+++ b/gcc/config/i386/clflushoptintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2013-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2013-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/clwbintrin.h b/gcc/config/i386/clwbintrin.h
index ac5a68e..00a9703 100644
--- a/gcc/config/i386/clwbintrin.h
+++ b/gcc/config/i386/clwbintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2013-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2013-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/clzerointrin.h b/gcc/config/i386/clzerointrin.h
index 4d1f2bc..f26703c 100644
--- a/gcc/config/i386/clzerointrin.h
+++ b/gcc/config/i386/clzerointrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2012-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2012-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/cmpccxaddintrin.h b/gcc/config/i386/cmpccxaddintrin.h
index 39f368f..f18aae9 100644
--- a/gcc/config/i386/cmpccxaddintrin.h
+++ b/gcc/config/i386/cmpccxaddintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2012-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2012-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -72,11 +72,11 @@ _cmpccxadd_epi64 (long long *__A, long long __B, long long __C,
 }
 #else
 #define _cmpccxadd_epi32(A,B,C,D) \
-  __builtin_ia32_cmpccxadd ((int *) (A), (int) (B), (int) (C), \
+  __builtin_ia32_cmpccxadd ((A), (int) (B), (int) (C), \
 			    (_CMPCCX_ENUM) (D))
 #define _cmpccxadd_epi64(A,B,C,D) \
-  __builtin_ia32_cmpccxadd64 ((long long *) (A), (long long) (B), \
-			      (long long) (C), (_CMPCCX_ENUM) (D))
+  __builtin_ia32_cmpccxadd64 ((A), (long long) (B), (long long) (C), \
+			      (_CMPCCX_ENUM) (D))
 #endif
 
 #ifdef __DISABLE_CMPCCXADD__
diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md
index 91a6308..38877a7 100644
--- a/gcc/config/i386/constraints.md
+++ b/gcc/config/i386/constraints.md
@@ -1,5 +1,5 @@
 ;; Constraint definitions for IA-32 and x86-64.
-;; Copyright (C) 2006-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2006-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
@@ -203,21 +203,19 @@
 
 (define_constraint "Bs"
   "@internal Sibcall memory operand."
-  (ior (and (not (match_test "TARGET_INDIRECT_BRANCH_REGISTER"))
-	    (not (match_test "TARGET_X32"))
-	    (match_operand 0 "sibcall_memory_operand"))
-       (and (match_test "TARGET_X32")
-	    (match_test "Pmode == DImode")
-	    (match_operand 0 "GOT_memory_operand"))))
+  (and (not (match_test "TARGET_INDIRECT_BRANCH_REGISTER"))
+       (if_then_else (match_test "TARGET_X32")
+         (and (match_test "Pmode == DImode")
+              (match_operand 0 "GOT_memory_operand"))
+         (match_operand 0 "sibcall_memory_operand"))))
 
 (define_constraint "Bw"
   "@internal Call memory operand."
-  (ior (and (not (match_test "TARGET_INDIRECT_BRANCH_REGISTER"))
-	    (not (match_test "TARGET_X32"))
-	    (match_operand 0 "memory_operand"))
-       (and (match_test "TARGET_X32")
-	    (match_test "Pmode == DImode")
-	    (match_operand 0 "GOT_memory_operand"))))
+  (and (not (match_test "TARGET_INDIRECT_BRANCH_REGISTER"))
+       (if_then_else (match_test "TARGET_X32")
+         (and (match_test "Pmode == DImode")
+              (match_operand 0 "GOT_memory_operand"))
+         (match_operand 0 "memory_operand"))))
 
 (define_constraint "Bz"
   "@internal Constant call address operand."
diff --git a/gcc/config/i386/core2.md b/gcc/config/i386/core2.md
index c9f1762..853dc7e 100644
--- a/gcc/config/i386/core2.md
+++ b/gcc/config/i386/core2.md
@@ -1,5 +1,5 @@
 ;; Scheduling for Core 2 and derived processors.
-;; Copyright (C) 2004-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2004-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/i386/cpuid.h b/gcc/config/i386/cpuid.h
index 1e8060e..afd4ef0 100644
--- a/gcc/config/i386/cpuid.h
+++ b/gcc/config/i386/cpuid.h
@@ -1,20 +1,20 @@
 /*
- * Copyright (C) 2007-2024 Free Software Foundation, Inc.
+ * Copyright (C) 2007-2025 Free Software Foundation, Inc.
  *
  * This file is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the
  * Free Software Foundation; either version 3, or (at your option) any
  * later version.
- * 
+ *
  * This file is distributed in the hope that it will be useful, but
  * WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  * General Public License for more details.
- * 
+ *
  * Under Section 7 of GPL version 3, you are granted additional
  * permissions described in the GCC Runtime Library Exception, version
  * 3.1, as published by the Free Software Foundation.
- * 
+ *
  * You should have received a copy of the GNU General Public License and
  * a copy of the GCC Runtime Library Exception along with this program;
  * see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
@@ -138,6 +138,7 @@
 #define bit_AMX_FP16    (1 << 21)
 #define bit_HRESET      (1 << 22)
 #define bit_AVXIFMA     (1 << 23)
+#define bit_MOVRS	(1 << 31)
 
 /* %edx */
 #define bit_AVXVNNIINT8 (1 << 4)
@@ -162,10 +163,13 @@
 #define bit_AESKLE	( 1<<0 )
 #define bit_WIDEKL	( 1<<2 )
 
-/* AVX10 sub leaf (%eax == 0x24) */
-/* %ebx */
-#define bit_AVX10_256	(1 << 17)
-#define bit_AVX10_512	(1 << 18)
+/* AMX sub leaf (%eax == 0x1e, %ecx == 1) */
+/* %eax */
+#define bit_AMX_FP8	(1 << 4)
+#define bit_AMX_TRANSPOSE	(1 << 5)
+#define bit_AMX_TF32	(1 << 6)
+#define bit_AMX_AVX512  (1 << 7)
+#define bit_AMX_MOVRS	(1 << 8)
 
 /* Signatures for different CPU implementations as returned in uses
    of cpuid with level 0.  */
diff --git a/gcc/config/i386/cross-stdarg.h b/gcc/config/i386/cross-stdarg.h
index a695cfb..b0b482f 100644
--- a/gcc/config/i386/cross-stdarg.h
+++ b/gcc/config/i386/cross-stdarg.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2002-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/crtdll.h b/gcc/config/i386/crtdll.h
index 92af5e9..3531816 100644
--- a/gcc/config/i386/crtdll.h
+++ b/gcc/config/i386/crtdll.h
@@ -1,7 +1,7 @@
 /* Operating system specific defines to be used when targeting GCC for
    hosting on Windows32, using GNU tools and the Windows32 API Library.
    This variant uses CRTDLL.DLL instead of MSVCRTDLL.DLL.
-   Copyright (C) 1998-2024 Free Software Foundation, Inc.
+   Copyright (C) 1998-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/i386/cygming.h b/gcc/config/i386/cygming.h
index 9c8c7e3..0a3173c 100644
--- a/gcc/config/i386/cygming.h
+++ b/gcc/config/i386/cygming.h
@@ -1,6 +1,6 @@
 /* Operating system specific defines to be used when targeting GCC for
    hosting on Windows32, using a Unix style C library and tools.
-   Copyright (C) 1995-2024 Free Software Foundation, Inc.
+   Copyright (C) 1995-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -28,16 +28,15 @@ along with GCC; see the file COPYING3.  If not see
 #undef TARGET_SEH
 #define TARGET_SEH  (TARGET_64BIT_MS_ABI && flag_unwind_tables)
 
+#undef PREFERRED_STACK_BOUNDARY_DEFAULT
+#define PREFERRED_STACK_BOUNDARY_DEFAULT \
+  (TARGET_64BIT ? 128 : MIN_STACK_BOUNDARY)
+
 /* Win64 with SEH cannot represent DRAP stack frames.  Disable its use.
    Force the use of different mechanisms to allocate aligned local data.  */
 #undef MAX_STACK_ALIGNMENT
 #define MAX_STACK_ALIGNMENT  (TARGET_SEH ? 128 : MAX_OFILE_ALIGNMENT)
 
-/* 32-bit Windows aligns the stack on a 4-byte boundary but SSE instructions
-   may require 16-byte alignment.  */
-#undef STACK_REALIGN_DEFAULT
-#define STACK_REALIGN_DEFAULT TARGET_SSE
-
 /* Support hooks for SEH.  */
 #undef  TARGET_ASM_UNWIND_EMIT
 #define TARGET_ASM_UNWIND_EMIT  i386_pe_seh_unwind_emit
@@ -192,7 +191,7 @@ along with GCC; see the file COPYING3.  If not see
    in_section = NULL)
 
 /* Older versions of gas don't handle 'r' as data.
-   Explicitly set data flag with 'd'.  */  
+   Explicitly set data flag with 'd'.  */
 #define READONLY_DATA_SECTION_ASM_OP "\t.section .rdata,\"dr\""
 
 /* Don't allow flag_pic to propagate since gas may produce invalid code
@@ -247,9 +246,10 @@ do {							\
 #undef ASM_OUTPUT_LABELREF
 #define  ASM_OUTPUT_LABELREF(STREAM, NAME)	\
 do {						\
+  const char *prefix = "";			\
   if ((NAME)[0] != FASTCALL_PREFIX)		\
-    fputs (user_label_prefix, (STREAM));	\
-  fputs ((NAME), (STREAM));			\
+    prefix = user_label_prefix;			\
+  ix86_asm_output_labelref ((STREAM), prefix, (NAME));	\
 } while (0)
 
 /* This does much the same in memory rather than to a stream.  */
@@ -309,7 +309,7 @@ do {						\
 #define ASM_DECLARE_COLD_FUNCTION_NAME(FILE, NAME, DECL)	\
   do								\
     {								\
-      mingw_pe_declare_function_type (FILE, NAME, 0);		\
+      mingw_pe_declare_type (FILE, NAME, 0, 1);		\
       i386_pe_seh_cold_init (FILE, NAME);			\
       ASM_OUTPUT_LABEL (FILE, NAME);				\
     }								\
@@ -335,7 +335,7 @@ do {						\
 
 /* Declare the type properly for any external libcall.  */
 #define ASM_OUTPUT_EXTERNAL_LIBCALL(FILE, FUN) \
-  mingw_pe_declare_function_type (FILE, XSTR (FUN, 0), 1)
+  mingw_pe_declare_type (FILE, XSTR (FUN, 0), 1, 1)
 
 /* This says out to put a global symbol in the BSS section.  */
 #undef ASM_OUTPUT_ALIGNED_BSS
@@ -348,9 +348,9 @@ do {						\
 
 /* Kludge because of missing PE-COFF support for early LTO debug.  */
 #undef  TARGET_ASM_LTO_START
-#define TARGET_ASM_LTO_START i386_pe_asm_lto_start
+#define TARGET_ASM_LTO_START mingw_pe_asm_lto_start
 #undef  TARGET_ASM_LTO_END
-#define TARGET_ASM_LTO_END i386_pe_asm_lto_end
+#define TARGET_ASM_LTO_END mingw_pe_asm_lto_end
 
 #undef ASM_COMMENT_START
 #define ASM_COMMENT_START " #"
@@ -420,8 +420,8 @@ do {						\
 	= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (DECL));		\
       mingw_pe_maybe_record_exported_symbol (DECL, alias, 0);		\
       if (TREE_CODE (DECL) == FUNCTION_DECL)				\
-	mingw_pe_declare_function_type (STREAM, alias,			\
-				       TREE_PUBLIC (DECL));		\
+	mingw_pe_declare_type (STREAM, alias,			\
+				       TREE_PUBLIC (DECL), 1);		\
       ASM_OUTPUT_DEF (STREAM, alias, IDENTIFIER_POINTER (TARGET));	\
     } while (0)
 
@@ -461,7 +461,7 @@ do {						\
 #define TARGET_ASM_ASSEMBLE_VISIBILITY i386_pe_assemble_visibility
 
 #undef SUB_TARGET_RECORD_STUB
-#define SUB_TARGET_RECORD_STUB mingw_pe_record_stub
+#define SUB_TARGET_RECORD_STUB(NAME, DECL) mingw_pe_record_stub((NAME), 0)
 
 /* Static stack checking is supported by means of probes.  */
 #define STACK_CHECK_STATIC_BUILTIN 1
@@ -470,7 +470,7 @@ do {						\
 # define HAVE_GAS_ALIGNED_COMM 0
 #endif
 
-#define PE_COFF_LEGITIMIZE_EXTERN_DECL \
+#define PE_COFF_LEGITIMIZE_EXTERN_DECL(RTX) \
   (ix86_cmodel == CM_LARGE_PIC || ix86_cmodel == CM_MEDIUM_PIC)
 
 #define HAVE_64BIT_POINTERS TARGET_64BIT_DEFAULT
diff --git a/gcc/config/i386/cygwin-stdint.h b/gcc/config/i386/cygwin-stdint.h
index 0ad82a5..61bde07 100644
--- a/gcc/config/i386/cygwin-stdint.h
+++ b/gcc/config/i386/cygwin-stdint.h
@@ -1,5 +1,5 @@
 /* Definitions for <stdint.h> types on systems using Cygwin.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/i386/cygwin-w64.h b/gcc/config/i386/cygwin-w64.h
index 8dac104..160a290 100644
--- a/gcc/config/i386/cygwin-w64.h
+++ b/gcc/config/i386/cygwin-w64.h
@@ -1,7 +1,7 @@
 /* Operating system specific defines to be used when targeting GCC for
    hosting on Windows 32/64 via Cygwin runtime, using GNU tools and
    the Windows API Library.
-   Copyright (C) 2013-2024 Free Software Foundation, Inc.
+   Copyright (C) 2013-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/i386/cygwin.h b/gcc/config/i386/cygwin.h
index 63de10c..bb353ee 100644
--- a/gcc/config/i386/cygwin.h
+++ b/gcc/config/i386/cygwin.h
@@ -1,6 +1,6 @@
 /* Operating system specific defines to be used when targeting GCC for
    hosting on Windows32, using a Unix style C library and tools.
-   Copyright (C) 1995-2024 Free Software Foundation, Inc.
+   Copyright (C) 1995-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -137,7 +137,7 @@ along with GCC; see the file COPYING3.  If not see
    do not use them unnecessarily in gthr-posix.h.  */
 #define GTHREAD_USE_WEAK 0
 
-/* Every program on cygwin links against cygwin1.dll which contains 
+/* Every program on cygwin links against cygwin1.dll which contains
    the pthread routines.  There is no need to explicitly link them
    and the -pthread flag is accepted only for compatibility.  */
 #undef GOMP_SELF_SPECS
diff --git a/gcc/config/i386/cygwin.opt b/gcc/config/i386/cygwin.opt
index 25af57c..970ed11 100644
--- a/gcc/config/i386/cygwin.opt
+++ b/gcc/config/i386/cygwin.opt
@@ -1,6 +1,6 @@
 ; Cygwin-specific options.
 
-; Copyright (C) 2013-2024 Free Software Foundation, Inc.
+; Copyright (C) 2013-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/i386/darwin.h b/gcc/config/i386/darwin.h
index bf9c45d..f4037b1 100644
--- a/gcc/config/i386/darwin.h
+++ b/gcc/config/i386/darwin.h
@@ -1,5 +1,5 @@
 /* Target definitions for x86 running Darwin.
-   Copyright (C) 2001-2024 Free Software Foundation, Inc.
+   Copyright (C) 2001-2025 Free Software Foundation, Inc.
    Contributed by Apple Computer Inc.
 
 This file is part of GCC.
diff --git a/gcc/config/i386/darwin32-biarch.h b/gcc/config/i386/darwin32-biarch.h
index 051ad12..420e1ff 100644
--- a/gcc/config/i386/darwin32-biarch.h
+++ b/gcc/config/i386/darwin32-biarch.h
@@ -1,6 +1,6 @@
 /* Target definitions for i386 running Darwin with a 32b host and supporting
    a 64b multilib.
-   Copyright (C) 2019-2024 Free Software Foundation, Inc.
+   Copyright (C) 2019-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/i386/darwin64-biarch.h b/gcc/config/i386/darwin64-biarch.h
index 8543679..5122cc3 100644
--- a/gcc/config/i386/darwin64-biarch.h
+++ b/gcc/config/i386/darwin64-biarch.h
@@ -1,6 +1,6 @@
 /* Target definitions for x86_64 running Darwin with a 64b host supporting a
    32b multilib.
-   Copyright (C) 2006-2024 Free Software Foundation, Inc.
+   Copyright (C) 2006-2025 Free Software Foundation, Inc.
    Contributed by Apple Computer Inc.
 
 This file is part of GCC.
diff --git a/gcc/config/i386/djgpp-stdint.h b/gcc/config/i386/djgpp-stdint.h
index e319707..5606451 100644
--- a/gcc/config/i386/djgpp-stdint.h
+++ b/gcc/config/i386/djgpp-stdint.h
@@ -1,5 +1,5 @@
 /* Definitions for <stdint.h> types on systems using DJGPP.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/i386/djgpp.cc b/gcc/config/i386/djgpp.cc
index a553c77..5c6e721 100644
--- a/gcc/config/i386/djgpp.cc
+++ b/gcc/config/i386/djgpp.cc
@@ -1,5 +1,5 @@
 /* Subroutines for DJGPP.
-   Copyright (C) 2013-2024 Free Software Foundation, Inc.
+   Copyright (C) 2013-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/i386/djgpp.h b/gcc/config/i386/djgpp.h
index 1b5dfb7..f9ab8cc 100644
--- a/gcc/config/i386/djgpp.h
+++ b/gcc/config/i386/djgpp.h
@@ -1,5 +1,5 @@
 /* Configuration for an i386 running MS-DOS with DJGPP.
-   Copyright (C) 1997-2024 Free Software Foundation, Inc.
+   Copyright (C) 1997-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -98,7 +98,7 @@ along with GCC; see the file COPYING3.  If not see
   while (0)
 #endif
 
-/* This is how to tell assembler that a symbol is weak  */ 
+/* This is how to tell assembler that a symbol is weak  */
 #undef ASM_WEAKEN_LABEL
 #define ASM_WEAKEN_LABEL(FILE,NAME) \
   do { fputs ("\t.weak\t", FILE); assemble_name (FILE, NAME); \
diff --git a/gcc/config/i386/djgpp.opt b/gcc/config/i386/djgpp.opt
index c893ef3..39bcb32 100644
--- a/gcc/config/i386/djgpp.opt
+++ b/gcc/config/i386/djgpp.opt
@@ -1,6 +1,6 @@
 ; DJGPP-specific options.
 
-; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/i386/dragonfly.h b/gcc/config/i386/dragonfly.h
index b75e7df..2cec7da 100644
--- a/gcc/config/i386/dragonfly.h
+++ b/gcc/config/i386/dragonfly.h
@@ -1,5 +1,5 @@
 /* Definitions for Intel 386 running DragonFly with ELF format
-   Copyright (C) 2014-2024 Free Software Foundation, Inc.
+   Copyright (C) 2014-2025 Free Software Foundation, Inc.
    Contributed by John Marino <gnugcc@marino.st>
 
 This file is part of GCC.
diff --git a/gcc/config/i386/driver-i386.cc b/gcc/config/i386/driver-i386.cc
index 445f564..fe71f55 100644
--- a/gcc/config/i386/driver-i386.cc
+++ b/gcc/config/i386/driver-i386.cc
@@ -1,5 +1,5 @@
 /* Subroutines for the gcc driver.
-   Copyright (C) 2006-2024 Free Software Foundation, Inc.
+   Copyright (C) 2006-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -252,7 +252,7 @@ decode_caches_intel (unsigned reg, bool xeon_mp,
 /* Detect cache parameters using CPUID function 2.  */
 
 static void
-detect_caches_cpuid2 (bool xeon_mp, 
+detect_caches_cpuid2 (bool xeon_mp,
 		      struct cache_desc *level1, struct cache_desc *level2)
 {
   unsigned regs[4];
@@ -295,7 +295,7 @@ detect_caches_cpuid4 (struct cache_desc *level1, struct cache_desc *level2,
   int count;
 
   for (count = 0;; count++)
-    { 
+    {
       __cpuid_count(4, count, eax, ebx, ecx, edx);
       switch (eax & 0x1f)
 	{
@@ -374,33 +374,6 @@ detect_caches_intel (bool xeon_mp, unsigned max_level,
 #define has_feature(f) \
   has_cpu_feature (&cpu_model, cpu_features2, f)
 
-/* We will emit a warning when using AVX10.1 and AVX512 options with one
-   enabled and the other disabled.  Add this function to avoid push "-mno-"
-   options under this scenario for -march=native.  */
-
-bool check_avx512_features (__processor_model &cpu_model,
-			    unsigned int (&cpu_features2)[SIZE_OF_CPU_FEATURES],
-			    const enum processor_features feature)
-{
-  if (has_feature (FEATURE_AVX10_1_256)
-      && ((feature == FEATURE_AVX512F)
-	  || (feature == FEATURE_AVX512CD)
-	  || (feature == FEATURE_AVX512DQ)
-	  || (feature == FEATURE_AVX512BW)
-	  || (feature == FEATURE_AVX512VL)
-	  || (feature == FEATURE_AVX512IFMA)
-	  || (feature == FEATURE_AVX512VBMI)
-	  || (feature == FEATURE_AVX512VBMI2)
-	  || (feature == FEATURE_AVX512VNNI)
-	  || (feature == FEATURE_AVX512VPOPCNTDQ)
-	  || (feature == FEATURE_AVX512BITALG)
-	  || (feature == FEATURE_AVX512FP16)
-	  || (feature == FEATURE_AVX512BF16)))
-    return false;
-
-  return true;
-}
-
 /* This will be called by the spec parser in gcc.cc when it sees
    a %:local_cpu_detect(args) construct.  Currently it will be
    called with either "arch [32|64]" or "tune [32|64]" as argument
@@ -580,6 +553,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
 	  processor = PROCESSOR_PENTIUM;
 	  break;
 	case 6:
+	case 19:
 	  processor = PROCESSOR_PENTIUMPRO;
 	  break;
 	case 15:
@@ -622,11 +596,14 @@ const char *host_detect_local_cpu (int argc, const char **argv)
 	{
 	  if (arch)
 	    {
-	      /* This is unknown family 0x6 CPU.  */
+	      /* This is unknown CPU.  */
 	      if (has_feature (FEATURE_AVX512F))
 		{
+		  /* Assume Diamond Rapids.  */
+		  if (has_feature (FEATURE_AMX_FP8))
+		    cpu = "diamondrapids";
 		  /* Assume Granite Rapids D.  */
-		  if (has_feature (FEATURE_AMX_COMPLEX))
+		  else if (has_feature (FEATURE_AMX_COMPLEX))
 		    cpu = "graniterapids-d";
 		  /* Assume Granite Rapids.  */
 		  else if (has_feature (FEATURE_AMX_FP16))
@@ -688,7 +665,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
 		    cpu = "haswell";
 		  /* Assume Sandy Bridge.  */
 		  else
-		    cpu = "sandybridge";	      
+		    cpu = "sandybridge";
 	      }
 	      else if (has_feature (FEATURE_SSE4_2))
 		{
@@ -905,12 +882,7 @@ const char *host_detect_local_cpu (int argc, const char **argv)
 		  options = concat (options, " ",
 				    isa_names_table[i].option, NULL);
 	      }
-	    /* Never push -mno-avx10.1-{256,512} under -march=native to
-	       avoid unnecessary warnings when building librarys.  */
-	    else if (isa_names_table[i].feature != FEATURE_AVX10_1_256
-		     && isa_names_table[i].feature != FEATURE_AVX10_1_512
-		     && check_avx512_features (cpu_model, cpu_features2,
-					       isa_names_table[i].feature))
+	    else
 	      options = concat (options, neg_option,
 				isa_names_table[i].option + 2, NULL);
 	  }
diff --git a/gcc/config/i386/driver-mingw32.cc b/gcc/config/i386/driver-mingw32.cc
index 9229660..c01133f 100644
--- a/gcc/config/i386/driver-mingw32.cc
+++ b/gcc/config/i386/driver-mingw32.cc
@@ -1,5 +1,5 @@
 /* Host OS specific configuration for the gcc driver.
-   Copyright (C) 2017-2024 Free Software Foundation, Inc.
+   Copyright (C) 2017-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/i386/emmintrin.h b/gcc/config/i386/emmintrin.h
index a3fcd7a..5e505d2 100644
--- a/gcc/config/i386/emmintrin.h
+++ b/gcc/config/i386/emmintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2003-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2003-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/enqcmdintrin.h b/gcc/config/i386/enqcmdintrin.h
index 68d68c1..a395daf 100644
--- a/gcc/config/i386/enqcmdintrin.h
+++ b/gcc/config/i386/enqcmdintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2019-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2019-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/f16cintrin.h b/gcc/config/i386/f16cintrin.h
index e888837..be8d8de 100644
--- a/gcc/config/i386/f16cintrin.h
+++ b/gcc/config/i386/f16cintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2011-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/fma4intrin.h b/gcc/config/i386/fma4intrin.h
index 23d36b9..ad44e1d 100644
--- a/gcc/config/i386/fma4intrin.h
+++ b/gcc/config/i386/fma4intrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2007-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/fmaintrin.h b/gcc/config/i386/fmaintrin.h
index 9558378..2ce1bd9 100644
--- a/gcc/config/i386/fmaintrin.h
+++ b/gcc/config/i386/fmaintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2011-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/freebsd.h b/gcc/config/i386/freebsd.h
index 583c752..0d91306 100644
--- a/gcc/config/i386/freebsd.h
+++ b/gcc/config/i386/freebsd.h
@@ -1,5 +1,5 @@
 /* Definitions for Intel 386 running FreeBSD with ELF format
-   Copyright (C) 1996-2024 Free Software Foundation, Inc.
+   Copyright (C) 1996-2025 Free Software Foundation, Inc.
    Contributed by Eric Youngdale.
    Modified for stabs-in-ELF by H.J. Lu.
    Adapted from GNU/Linux version by John Polstra.
@@ -48,17 +48,17 @@ along with GCC; see the file COPYING3.  If not see
 
 #undef  SIZE_TYPE
 #define SIZE_TYPE	(TARGET_64BIT ? "long unsigned int" : "unsigned int")
- 
+
 #undef  PTRDIFF_TYPE
 #define PTRDIFF_TYPE	(TARGET_64BIT ? "long int" : "int")
-  
+
 #undef  WCHAR_TYPE_SIZE
 #define WCHAR_TYPE_SIZE	(TARGET_64BIT ? 32 : BITS_PER_WORD)
 
 #undef  SUBTARGET_EXTRA_SPECS	/* i386.h bogusly defines it.  */
 #define SUBTARGET_EXTRA_SPECS \
   { "fbsd_dynamic_linker", FBSD_DYNAMIC_LINKER }
-    
+
 /* Use the STARTFILE_SPEC from config/freebsd-spec.h.  */
 
 #undef  STARTFILE_SPEC
diff --git a/gcc/config/i386/freebsd64.h b/gcc/config/i386/freebsd64.h
index 12985e2..ec3f8ed 100644
--- a/gcc/config/i386/freebsd64.h
+++ b/gcc/config/i386/freebsd64.h
@@ -1,5 +1,5 @@
 /* Definitions for AMD x86-64 running FreeBSD with ELF format
-   Copyright (C) 2002-2024 Free Software Foundation, Inc.
+   Copyright (C) 2002-2025 Free Software Foundation, Inc.
    Contributed by David O'Brien <obrien@FreeBSD.org>
 
 This file is part of GCC.
diff --git a/gcc/config/i386/fxsrintrin.h b/gcc/config/i386/fxsrintrin.h
index 8661e0f..1196728 100644
--- a/gcc/config/i386/fxsrintrin.h
+++ b/gcc/config/i386/fxsrintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2012-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2012-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/gas.h b/gcc/config/i386/gas.h
index ae77e27..9c1fb79 100644
--- a/gcc/config/i386/gas.h
+++ b/gcc/config/i386/gas.h
@@ -1,5 +1,5 @@
 /* Definitions for Intel 386 using GAS.
-   Copyright (C) 1988-2024 Free Software Foundation, Inc.
+   Copyright (C) 1988-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -50,7 +50,7 @@ along with GCC; see the file COPYING3.  If not see
    doubt or guess work, and since this file is used for both a.out and other
    file formats, we use one of them.  */
 
-#ifdef HAVE_GAS_BALIGN_AND_P2ALIGN 
+#ifdef HAVE_GAS_BALIGN_AND_P2ALIGN
 #undef ASM_OUTPUT_ALIGN
 #define ASM_OUTPUT_ALIGN(FILE,LOG) \
   if ((LOG)!=0) fprintf ((FILE), "\t.balign %d\n", 1 << (LOG))
diff --git a/gcc/config/i386/gcc-auto-profile b/gcc/config/i386/gcc-auto-profile
index 04f7d35..0e9e5fe 100755
--- a/gcc/config/i386/gcc-auto-profile
+++ b/gcc/config/i386/gcc-auto-profile
@@ -24,8 +24,16 @@ if [ "$1" = "--all" ] ; then
   shift
 fi
 
-if ! grep -q Intel /proc/cpuinfo ; then
-  echo >&2 "Only Intel CPUs supported"
+if grep -q AuthenticAMD /proc/cpuinfo ; then
+  vendor=AMD
+  if ! grep -q " brs" /proc/cpuinfo && ! grep -q amd_lbr_v2 /proc/cpuinfo ; then
+    echo >&2 "AMD CPU with brs (Zen 3) or amd_lbr_v2 (Zen 4+) feature is required"
+    exit 1
+  fi
+elif grep -q Intel /proc/cpuinfo ; then
+  vendor=Intel
+else
+  echo >&2 "Only AMD and Intel CPUs supported"
   exit 1
 fi
 
@@ -33,7 +41,7 @@ if grep -q hypervisor /proc/cpuinfo ; then
   echo >&2 "Warning: branch profiling may not be functional in VMs"
 fi
 
-case `grep -E -q "^cpu family\s*: 6" /proc/cpuinfo &&
+case `test $vendor = Intel && grep -E -q "^cpu family\s*: 6" /proc/cpuinfo &&
   grep -E "^model\s*:" /proc/cpuinfo | head -n1` in
 model*:\ 46|\
 model*:\ 30|\
@@ -85,14 +93,28 @@ model*:\ 141|\
 model*:\ 143|\
 model*:\ 207|\
 model*:\ 106|\
-model*:\ 108) E="cpu/event=0xc4,umask=0x20/p$FLAGS" ;;
+model*:\ 108|\
+model*:\ 173|\
+model*:\ 174) E="cpu/event=0xc4,umask=0x20/$FLAGS" ;;
 model*:\ 134|\
 model*:\ 150|\
-model*:\ 156|\
-model*:\ 190) E="cpu/event=0xc4,umask=0xfe/p$FLAGS" ;;
+model*:\ 156) E="cpu/event=0xc4,umask=0xfe/p$FLAGS" ;;
+model*:\ 190|\
+model*:\ 175|\
+model*:\ 182) E="cpu/event=0xc4,umask=0xc0/$FLAGS" ;;
+model*:\ 190) E="cpu/event=0xc4,umask=0xfe/$FLAGS" ;;
 *)
-echo >&2 "Unknown CPU. Run contrib/gen_autofdo_event.py --all --script to update script."
-	exit 1 ;;
+        if perf list br_inst_retired | grep -q br_inst_retired.near_taken ; then
+            E=br_inst_retired.near_taken:p
+        elif perf list ex_ret_brn_tkn | grep -q ex_ret_brn_tkn ; then
+            E=ex_ret_brn_tkn:P$FLAGS
+        elif $vendor = Intel ; then
+echo >&2 "Unknown Intel CPU. Run contrib/gen_autofdo_event.py --all --script to update script."
+	  exit 1
+        else
+echo >&2 "AMD CPU without support for ex_ret_brn_tkn event"
+	  exit 1
+        fi ;;
 esac
 set -x
 if ! perf record -e $E -b "$@" ; then
@@ -100,7 +122,7 @@ if ! perf record -e $E -b "$@" ; then
   # (e.g., in a virtual machine). Trying to run without /p.
   set +x
   echo >&2 "Retrying without /p."
-  E="$(echo "${E}" | sed -e 's/\/p/\//')"
+  E="$(echo "${E}" | sed -e \'s/\/p/\//\ -e s/:p//)"
   set -x
   exec perf record -e $E -b "$@"
  set +x
diff --git a/gcc/config/i386/geode.md b/gcc/config/i386/geode.md
index 585e0a9..1c326c5 100644
--- a/gcc/config/i386/geode.md
+++ b/gcc/config/i386/geode.md
@@ -1,5 +1,5 @@
 ;; Geode Scheduling
-;; Copyright (C) 2006-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2006-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/i386/gfniintrin.h b/gcc/config/i386/gfniintrin.h
index a7ab9c4..bc433c2 100644
--- a/gcc/config/i386/gfniintrin.h
+++ b/gcc/config/i386/gfniintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2017-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2017-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -297,9 +297,9 @@ _mm256_maskz_gf2p8affine_epi64_epi8 (__mmask32 __A, __m256i __B,
 #pragma GCC pop_options
 #endif /* __GFNIAVX512VLBW__ */
 
-#if !defined(__GFNI__) || !defined(__EVEX512__) || !defined(__AVX512F__)
+#if !defined(__GFNI__) || !defined(__AVX512F__)
 #pragma GCC push_options
-#pragma GCC target("gfni,avx512f,evex512")
+#pragma GCC target("gfni,avx512f")
 #define __DISABLE_GFNIAVX512F__
 #endif /* __GFNIAVX512F__ */
 
@@ -341,9 +341,9 @@ _mm512_gf2p8affine_epi64_epi8 (__m512i __A, __m512i __B, const int __C)
 #pragma GCC pop_options
 #endif /* __GFNIAVX512F__ */
 
-#if !defined(__GFNI__) || !defined(__EVEX512__) || !defined(__AVX512BW__)
+#if !defined(__GFNI__) || !defined(__AVX512BW__)
 #pragma GCC push_options
-#pragma GCC target("gfni,avx512bw,evex512")
+#pragma GCC target("gfni,avx512bw")
 #define __DISABLE_GFNIAVX512FBW__
 #endif /* __GFNIAVX512FBW__ */
 
diff --git a/gcc/config/i386/glm.md b/gcc/config/i386/glm.md
index 6440f4f..1c3c494 100644
--- a/gcc/config/i386/glm.md
+++ b/gcc/config/i386/glm.md
@@ -1,5 +1,5 @@
 ;; Goldmont(GLM) Scheduling
-;; Copyright (C) 2018-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2018-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/i386/gmm_malloc.h b/gcc/config/i386/gmm_malloc.h
index 7e2ff62..736d3e9 100644
--- a/gcc/config/i386/gmm_malloc.h
+++ b/gcc/config/i386/gmm_malloc.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2004-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2004-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -29,7 +29,7 @@
 #include <errno.h>
 #endif
 
-static __inline__ void * 
+static __inline__ void *
 _mm_malloc (size_t __size, size_t __align)
 {
   void * __malloc_ptr;
@@ -50,7 +50,7 @@ _mm_malloc (size_t __size, size_t __align)
  /* Assume malloc'd pointer is aligned at least to sizeof (void*).
     If necessary, add another sizeof (void*) to store the value
     returned by malloc. Effectively this enforces a minimum alignment
-    of sizeof double. */     
+    of sizeof double. */
     if (__align < 2 * sizeof (void *))
       __align = 2 * sizeof (void *);
 
@@ -62,7 +62,7 @@ _mm_malloc (size_t __size, size_t __align)
   __aligned_ptr = (void *) (((size_t) __malloc_ptr + __align)
 			    & ~((size_t) (__align) - 1));
 
-  /* Store the original pointer just before p.  */	
+  /* Store the original pointer just before p.  */
   ((void **) __aligned_ptr)[-1] = __malloc_ptr;
 
   return __aligned_ptr;
diff --git a/gcc/config/i386/gnu-property.cc b/gcc/config/i386/gnu-property.cc
index 68db844..587dd5c 100644
--- a/gcc/config/i386/gnu-property.cc
+++ b/gcc/config/i386/gnu-property.cc
@@ -1,5 +1,5 @@
 /* Functions for x86 GNU property.
-   Copyright (C) 2017-2024 Free Software Foundation, Inc.
+   Copyright (C) 2017-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/i386/gnu-user-common.h b/gcc/config/i386/gnu-user-common.h
index a572097..0628e76 100644
--- a/gcc/config/i386/gnu-user-common.h
+++ b/gcc/config/i386/gnu-user-common.h
@@ -1,5 +1,5 @@
 /* Common definitions for Intel 386 and AMD x86-64 systems using
-   GNU userspace.  Copyright (C) 2012-2024 Free Software Foundation, Inc.
+   GNU userspace.  Copyright (C) 2012-2025 Free Software Foundation, Inc.
    Contributed by Ilya Enkovich.
 
 This file is part of GCC.
diff --git a/gcc/config/i386/gnu-user.h b/gcc/config/i386/gnu-user.h
index da6f64b..7abfda5 100644
--- a/gcc/config/i386/gnu-user.h
+++ b/gcc/config/i386/gnu-user.h
@@ -1,5 +1,5 @@
 /* Definitions for Intel 386 systems using GNU userspace.
-   Copyright (C) 1994-2024 Free Software Foundation, Inc.
+   Copyright (C) 1994-2025 Free Software Foundation, Inc.
    Contributed by Eric Youngdale.
    Modified for stabs-in-ELF by H.J. Lu.
 
@@ -41,16 +41,16 @@ along with GCC; see the file COPYING3.  If not see
 
 #undef SIZE_TYPE
 #define SIZE_TYPE "unsigned int"
- 
+
 #undef PTRDIFF_TYPE
 #define PTRDIFF_TYPE "int"
-  
+
 #undef WCHAR_TYPE
 #define WCHAR_TYPE "long int"
-   
+
 #undef WCHAR_TYPE_SIZE
 #define WCHAR_TYPE_SIZE BITS_PER_WORD
-    
+
 /* Provide a LINK_SPEC appropriate for GNU userspace.  Here we provide support
    for the special GCC options -static and -shared, which allow us to
    link things in one of these three modes by applying the appropriate
diff --git a/gcc/config/i386/gnu-user64.h b/gcc/config/i386/gnu-user64.h
index 95d87d9..abe7147 100644
--- a/gcc/config/i386/gnu-user64.h
+++ b/gcc/config/i386/gnu-user64.h
@@ -1,5 +1,5 @@
 /* Definitions for AMD x86-64 using GNU userspace.
-   Copyright (C) 2001-2024 Free Software Foundation, Inc.
+   Copyright (C) 2001-2025 Free Software Foundation, Inc.
    Contributed by Jan Hubicka <jh@suse.cz>, based on linux.h.
 
 This file is part of GCC.
diff --git a/gcc/config/i386/gnu.h b/gcc/config/i386/gnu.h
index af1d558..5bbcaf8 100644
--- a/gcc/config/i386/gnu.h
+++ b/gcc/config/i386/gnu.h
@@ -1,7 +1,7 @@
 /* Configuration for an i386 running GNU with ELF as the target machine.  */
 
 /*
-Copyright (C) 1994-2024 Free Software Foundation, Inc.
+Copyright (C) 1994-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/i386/gnu64.h b/gcc/config/i386/gnu64.h
index a42d9b1..54c9d87 100644
--- a/gcc/config/i386/gnu64.h
+++ b/gcc/config/i386/gnu64.h
@@ -1,7 +1,7 @@
 /* Configuration for an x86_64 running GNU with ELF as the target machine.  */
 
 /*
-Copyright (C) 2023-2024 Free Software Foundation, Inc.
+Copyright (C) 2023-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/i386/haswell.md b/gcc/config/i386/haswell.md
index 2257a52..b1c424a 100644
--- a/gcc/config/i386/haswell.md
+++ b/gcc/config/i386/haswell.md
@@ -1,5 +1,5 @@
 ;; Scheduling for Haswell and derived processors.
-;; Copyright (C) 2004-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2004-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/i386/host-cygwin.cc b/gcc/config/i386/host-cygwin.cc
index c72999c..d131cfe 100644
--- a/gcc/config/i386/host-cygwin.cc
+++ b/gcc/config/i386/host-cygwin.cc
@@ -1,5 +1,5 @@
 /* Cygwin host-specific hook definitions.
- Copyright (C) 2004-2024 Free Software Foundation, Inc.
+ Copyright (C) 2004-2025 Free Software Foundation, Inc.
 
  This file is part of GCC.
 
@@ -59,7 +59,7 @@ cygwin_gt_pch_get_address (size_t sz, int fd)
    /* Cygwin requires that the underlying file be at least
       as large as the requested mapping.  */
   if ((size_t) p < sz)
-    { 
+    {
       if (ftruncate (fd, sz) == -1)
 	fatal_error (input_location, "cannot extend PCH file: %m");
     }
diff --git a/gcc/config/i386/host-i386-darwin.cc b/gcc/config/i386/host-i386-darwin.cc
index e1e8f34..34797c3 100644
--- a/gcc/config/i386/host-i386-darwin.cc
+++ b/gcc/config/i386/host-i386-darwin.cc
@@ -1,5 +1,5 @@
 /* i386-darwin host-specific hook definitions.
-   Copyright (C) 2003-2024 Free Software Foundation, Inc.
+   Copyright (C) 2003-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/i386/host-mingw32.cc b/gcc/config/i386/host-mingw32.cc
index 4256398..87804a5 100644
--- a/gcc/config/i386/host-mingw32.cc
+++ b/gcc/config/i386/host-mingw32.cc
@@ -1,5 +1,5 @@
 /* mingw32 host-specific hook definitions.
-   Copyright (C) 2004-2024 Free Software Foundation, Inc.
+   Copyright (C) 2004-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -47,7 +47,7 @@ static inline void w32_error(const char*, const char*, int, const char*);
 /* Granularity for reserving address space.  */
 static size_t va_granularity = 0x10000;
 
-/* Print out the GetLastError() translation.  */ 
+/* Print out the GetLastError() translation.  */
 static inline void
 w32_error (const char* function, const char* file, int line,
 	   const char* my_msg)
@@ -93,7 +93,7 @@ mingw32_gt_pch_get_address (size_t size, int)
      for NT system dlls is in 0x70000000 to 0x78000000 range.
      If we allocate at bottom we need to reserve the address as early
      as possible and at the same point in each invocation. */
- 
+
   res = VirtualAlloc (NULL, size,
 		      MEM_RESERVE | MEM_TOP_DOWN,
 		      PAGE_NOACCESS);
@@ -103,11 +103,11 @@ mingw32_gt_pch_get_address (size_t size, int)
     /* We do not need the address space for now, so free it.  */
     VirtualFree (res, 0, MEM_RELEASE);
 
-  return res; 
+  return res;
 }
 
 /* ADDR is an address returned by gt_pch_get_address.  Attempt to allocate
-   SIZE bytes at the same address and load it with the data from FD at 
+   SIZE bytes at the same address and load it with the data from FD at
    OFFSET.  Return -1 if we couldn't allocate memory at ADDR, return 0
    if the memory is allocated but the data not loaded, return 1 if done.  */
 
@@ -117,10 +117,10 @@ mingw32_gt_pch_use_address (void *&addr, size_t size, int fd,
 {
   void * mmap_addr;
   HANDLE mmap_handle;
- 
+
   /* Apparently, MS Vista puts unnamed file mapping objects into Global
      namespace when running an application in a Terminal Server
-     session.  This causes failure since, by default, applications 
+     session.  This causes failure since, by default, applications
      don't get SeCreateGlobalPrivilege. We don't need global
      memory sharing so explicitly put object into Local namespace.
 
@@ -135,15 +135,14 @@ mingw32_gt_pch_use_address (void *&addr, size_t size, int fd,
      and earlier, backslashes are invalid in object name.  So, we need
      to check if we are on Windows2000 or higher.  */
   OSVERSIONINFO version_info;
-  int r;
 
   version_info.dwOSVersionInfoSize = sizeof (version_info);
 
   if (size == 0)
-    return 0; 
+    return 0;
 
   /* Offset must be also be a multiple of allocation granularity for
-     this to work.  We can't change the offset. */ 
+     this to work.  We can't change the offset. */
   if ((offset & (va_granularity - 1)) != 0)
     return -1;
 
@@ -166,28 +165,27 @@ mingw32_gt_pch_use_address (void *&addr, size_t size, int fd,
   if (mmap_handle == NULL)
     {
       w32_error (__FUNCTION__,  __FILE__, __LINE__, "CreateFileMapping");
-      return -1; 
+      return -1;
     }
 
-  /* Retry five times, as here might occure a race with multiple gcc's
-     instances at same time.  */
-  for (r = 0; r < 5; r++)
-   {
-      mmap_addr = MapViewOfFileEx (mmap_handle, FILE_MAP_COPY, 0, offset,
-				   size, addr);
-      if (mmap_addr == addr)
-	break;
-      if (r != 4)
-        Sleep (500);
-   }
-      
-  if (mmap_addr != addr)
+  /* Try mapping the file at `addr`.  */
+  mmap_addr = MapViewOfFileEx (mmap_handle, FILE_MAP_COPY, 0, offset,
+			       size, addr);
+  if (mmap_addr == NULL)
     {
-      w32_error (__FUNCTION__, __FILE__, __LINE__, "MapViewOfFileEx");
-      CloseHandle(mmap_handle);
-      return  -1;
+      /* We could not map the file at its original address, so let the
+	 system choose a different one. The PCH can be relocated later.  */
+      mmap_addr = MapViewOfFileEx (mmap_handle, FILE_MAP_COPY, 0, offset,
+				   size, NULL);
+      if (mmap_addr == NULL)
+	{
+	  w32_error (__FUNCTION__, __FILE__, __LINE__, "MapViewOfFileEx");
+	  CloseHandle(mmap_handle);
+	  return  -1;
+	}
     }
 
+  addr = mmap_addr;
   return 1;
 }
 
diff --git a/gcc/config/i386/hresetintrin.h b/gcc/config/i386/hresetintrin.h
index d4817d0..e0e7699 100644
--- a/gcc/config/i386/hresetintrin.h
+++ b/gcc/config/i386/hresetintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2020-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2020-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/i386-builtin-types.awk b/gcc/config/i386/i386-builtin-types.awk
index 0c98a2a..b2277e0 100644
--- a/gcc/config/i386/i386-builtin-types.awk
+++ b/gcc/config/i386/i386-builtin-types.awk
@@ -1,4 +1,4 @@
-#  Copyright (C) 2009-2024 Free Software Foundation, Inc.
+#  Copyright (C) 2009-2025 Free Software Foundation, Inc.
 #
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the
diff --git a/gcc/config/i386/i386-builtin-types.def b/gcc/config/i386/i386-builtin-types.def
index 290f6e6..64bde02 100644
--- a/gcc/config/i386/i386-builtin-types.def
+++ b/gcc/config/i386/i386-builtin-types.def
@@ -1369,6 +1369,7 @@ DEF_FUNCTION_TYPE (V16HF, V16HF, V16HF, V16HF, UQI)
 DEF_FUNCTION_TYPE (V16HF, V16HF, V16HF, V16HF, UHI)
 DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, USI)
 DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, INT)
+DEF_FUNCTION_TYPE (V32HI, V32HF, V32HI, USI)
 DEF_FUNCTION_TYPE (V32HI, V32HF, V32HI, USI, INT)
 DEF_FUNCTION_TYPE (V32HF, V32HI, V32HF, USI, INT)
 DEF_FUNCTION_TYPE (USI, V32HF, V32HF, INT, USI)
@@ -1415,46 +1416,7 @@ DEF_FUNCTION_TYPE (V4DI, V4DI, V4DI, V2DI)
 DEF_FUNCTION_TYPE (VOID, UINT64, UINT64)
 
 # AVX10.2 builtins
-DEF_FUNCTION_TYPE (V4DF, V4DF, V4DF, V4DF, UQI, INT)
-DEF_FUNCTION_TYPE (V16HF, V16HF, V16HF, V16HF, UHI, INT)
-DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF, V8SF, UQI, INT)
-DEF_FUNCTION_TYPE (UQI, V4DF, V4DF, INT, UQI, INT)
-DEF_FUNCTION_TYPE (UHI, V16HF, V16HF, INT, UHI, INT)
-DEF_FUNCTION_TYPE (UQI, V8SF, V8SF, INT, UQI, INT)
-DEF_FUNCTION_TYPE (V8HF, V8SI, V8HF, UQI, INT)
-DEF_FUNCTION_TYPE (V8SF, V8SI, V8SF, UQI, INT)
-DEF_FUNCTION_TYPE (V8HF, V4DF, V8HF, UQI, INT)
-DEF_FUNCTION_TYPE (V4SF, V4DF, V4SF, UQI, INT)
-DEF_FUNCTION_TYPE (V4SI, V4DF, V4SI, UQI, INT)
-DEF_FUNCTION_TYPE (V4DI, V4DF, V4DI, UQI, INT)
-DEF_FUNCTION_TYPE (V8SI, V8HF, V8SI, UQI, INT)
-DEF_FUNCTION_TYPE (V4DF, V8HF, V4DF, UQI, INT)
-DEF_FUNCTION_TYPE (V8SF, V8HF, V8SF, UQI, INT)
-DEF_FUNCTION_TYPE (V4DI, V8HF, V4DI, UQI, INT)
-DEF_FUNCTION_TYPE (V16HI, V16HF, V16HI, UHI, INT)
-DEF_FUNCTION_TYPE (V4DF, V4SF, V4DF, UQI, INT)
-DEF_FUNCTION_TYPE (V8HF, V8SF, V8HF, UQI, INT)
-DEF_FUNCTION_TYPE (V8SI, V8SF, V8SI, UQI, INT)
-DEF_FUNCTION_TYPE (V4DI, V4SF, V4DI, UQI, INT)
-DEF_FUNCTION_TYPE (V4DF, V4DI, V4DF, UQI, INT)
-DEF_FUNCTION_TYPE (V8HF, V4DI, V8HF, UQI, INT)
-DEF_FUNCTION_TYPE (V4SF, V4DI, V4SF, UQI, INT)
-DEF_FUNCTION_TYPE (V16HF, V16HI, V16HF, UHI, INT)
-DEF_FUNCTION_TYPE (V16HF, V16HF, V16HF, V16HF, INT)
-DEF_FUNCTION_TYPE (V16HF, V16HF, V16HF, V16HF, UQI, INT)
-DEF_FUNCTION_TYPE (V4DF, V4DF, V4DF, V4DI, INT, UQI, INT)
-DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF, V8SI, INT, UQI, INT)
-DEF_FUNCTION_TYPE (V16HF, V16HF, V16HF, INT)
-DEF_FUNCTION_TYPE (V4DF, V4DF, V4DF, UQI, INT)
-DEF_FUNCTION_TYPE (V16HF, V16HF, V16HF, UHI, INT)
-DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF, UQI, INT)
-DEF_FUNCTION_TYPE (V4DF, V4DF, INT, V4DF, UQI, INT)
-DEF_FUNCTION_TYPE (V16HF, V16HF, INT, V16HF, UHI, INT)
-DEF_FUNCTION_TYPE (V8SF, V8SF, INT, V8SF, UQI, INT)
-DEF_FUNCTION_TYPE (V4DF, V4DF, V4DF, INT, V4DF, UQI, INT)
-DEF_FUNCTION_TYPE (V8SF, V8SF, V8SF, INT, V8SF, UQI, INT)
 DEF_FUNCTION_TYPE (V32HF, V16SF, V16SF, V32HF, USI, INT)
-DEF_FUNCTION_TYPE (V16HF, V8SF, V8SF, V16HF, UHI, INT)
 DEF_FUNCTION_TYPE (V32HF, V16SF, V16SF, V32HF, USI)
 DEF_FUNCTION_TYPE (V16HF, V8SF, V8SF, V16HF, UHI)
 DEF_FUNCTION_TYPE (V8HF, V4SF, V4SF, V8HF, UQI)
@@ -1496,6 +1458,7 @@ DEF_FUNCTION_TYPE (INT, V8BF, V8BF)
 DEF_FUNCTION_TYPE (V8HI, V8BF, V8HI, UQI)
 DEF_FUNCTION_TYPE (V16HI, V16BF, V16HI, UHI)
 DEF_FUNCTION_TYPE (V32HI, V32BF, V32HI, USI)
+DEF_FUNCTION_TYPE (V16SI, V16SF, V16SI, UHI)
 DEF_FUNCTION_TYPE (V16SI, V16SF, V16SI, UHI, INT)
 DEF_FUNCTION_TYPE (V16HI, V16BF, V16HI, UHI, INT)
 DEF_FUNCTION_TYPE (V32HI, V32BF, V32HI, USI, INT)
@@ -1505,5 +1468,17 @@ DEF_FUNCTION_TYPE (V32BF, V32BF, V32BF, INT, V32BF, USI)
 DEF_FUNCTION_TYPE (V8HF, V8HF, V8HF, INT, V8HF, UQI)
 DEF_FUNCTION_TYPE (V8DF, V8DF, V8DF, INT, V8DF, UQI, INT)
 DEF_FUNCTION_TYPE (V32HF, V32HF, V32HF, INT, V32HF, USI, INT)
-DEF_FUNCTION_TYPE (V16HF, V16HF, V16HF, INT, V16HF, UHI, INT)
+DEF_FUNCTION_TYPE (V16HF, V16HF, V16HF, INT, V16HF, UHI)
 DEF_FUNCTION_TYPE (V16SF, V16SF, V16SF, INT, V16SF, UHI, INT)
+DEF_FUNCTION_TYPE (V8DI, V8SF, V8DI, UQI)
+DEF_FUNCTION_TYPE (V8DI, V8DF, V8DI, UQI)
+DEF_FUNCTION_TYPE (V8SI, V8DF, V8SI, UQI)
+
+# SM4 builtins
+DEF_FUNCTION_TYPE (V16SI, V16SI, V16SI)
+
+# MOVRS builtins
+DEF_FUNCTION_TYPE (CHAR, PCCHAR)
+DEF_FUNCTION_TYPE (SHORT, PCSHORT)
+DEF_FUNCTION_TYPE (INT, PCINT)
+DEF_FUNCTION_TYPE (INT64, PCINT64)
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 151ccf4..fe42c6436 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -1,5 +1,5 @@
 /* Builtin functions for ia32.
-   Copyright (C) 1988-2024 Free Software Foundation, Inc.
+   Copyright (C) 1988-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -94,10 +94,10 @@ BDESC (0, 0, CODE_FOR_nothing, "__builtin_ia32_rdpmc", IX86_BUILTIN_RDPMC, UNKNO
 BDESC (0, 0, CODE_FOR_pause, "__builtin_ia32_pause", IX86_BUILTIN_PAUSE, UNKNOWN, (int) VOID_FTYPE_VOID)
 
 /* 80387 (for use internally for atomic compound assignment).  */
-BDESC (0, 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID)
-BDESC (0, 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID)
-BDESC (0, 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID)
-BDESC (0, 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID)
+BDESC (0, 0, CODE_FOR_fnstenv, "__builtin_ia32_fnstenv ", IX86_BUILTIN_FNSTENV, UNKNOWN, (int) VOID_FTYPE_PVOID)
+BDESC (0, 0, CODE_FOR_fldenv, "__builtin_ia32_fldenv ", IX86_BUILTIN_FLDENV, UNKNOWN, (int) VOID_FTYPE_PCVOID)
+BDESC (0, 0, CODE_FOR_fnstsw, "__builtin_ia32_fnstsw ", IX86_BUILTIN_FNSTSW, UNKNOWN, (int) USHORT_FTYPE_VOID)
+BDESC (0, 0, CODE_FOR_fnclex, "__builtin_ia32_fnclex ", IX86_BUILTIN_FNCLEX, UNKNOWN, (int) VOID_FTYPE_VOID)
 
 /* MMX */
 BDESC (OPTION_MASK_ISA_MMX, 0, CODE_FOR_mmx_emms, "__builtin_ia32_emms", IX86_BUILTIN_EMMS, UNKNOWN, (int) VOID_FTYPE_VOID)
@@ -204,53 +204,53 @@ BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_maskstored256, "__builtin_ia32_mas
 BDESC (OPTION_MASK_ISA_AVX2, 0, CODE_FOR_avx2_maskstoreq256, "__builtin_ia32_maskstoreq256", IX86_BUILTIN_MASKSTOREQ256, UNKNOWN, (int) VOID_FTYPE_PV4DI_V4DI_V4DI)
 
 /* AVX512F */
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCINT_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCINT64_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCDOUBLE_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCFLOAT_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PINT_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PINT64_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev8div16qi2_mask_store_2, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store_2, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store_2, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressstorev16sf_mask, "__builtin_ia32_compressstoresf512_mask", IX86_BUILTIN_COMPRESSPSSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressstorev16si_mask, "__builtin_ia32_compressstoresi512_mask", IX86_BUILTIN_PCOMPRESSDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressstorev8df_mask, "__builtin_ia32_compressstoredf512_mask", IX86_BUILTIN_COMPRESSPDSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressstorev8di_mask, "__builtin_ia32_compressstoredi512_mask", IX86_BUILTIN_PCOMPRESSQSTORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv16sf_mask, "__builtin_ia32_expandloadsf512_mask", IX86_BUILTIN_EXPANDPSLOAD512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandloadsf512_maskz", IX86_BUILTIN_EXPANDPSLOAD512Z, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv16si_mask, "__builtin_ia32_expandloadsi512_mask", IX86_BUILTIN_PEXPANDDLOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandloadsi512_maskz", IX86_BUILTIN_PEXPANDDLOAD512Z, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv8df_mask, "__builtin_ia32_expandloaddf512_mask", IX86_BUILTIN_EXPANDPDLOAD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expandloaddf512_maskz", IX86_BUILTIN_EXPANDPDLOAD512Z, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv8di_mask, "__builtin_ia32_expandloaddi512_mask", IX86_BUILTIN_PEXPANDQLOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expandloaddi512_maskz", IX86_BUILTIN_PEXPANDQLOAD512Z, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_loaddqusi512_mask", IX86_BUILTIN_LOADDQUSI512, UNKNOWN, (int) V16SI_FTYPE_PCINT_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_loaddqudi512_mask", IX86_BUILTIN_LOADDQUDI512, UNKNOWN, (int) V8DI_FTYPE_PCINT64_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadupd512_mask", IX86_BUILTIN_LOADUPD512, UNKNOWN, (int) V8DF_FTYPE_PCDOUBLE_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadups512_mask", IX86_BUILTIN_LOADUPS512, UNKNOWN, (int) V16SF_FTYPE_PCFLOAT_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_loadaps512_mask", IX86_BUILTIN_LOADAPS512, UNKNOWN, (int) V16SF_FTYPE_PCV16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32load512_mask", IX86_BUILTIN_MOVDQA32LOAD512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_loadapd512_mask", IX86_BUILTIN_LOADAPD512, UNKNOWN, (int) V8DF_FTYPE_PCV8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64load512_mask", IX86_BUILTIN_MOVDQA64LOAD512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movntv16sf, "__builtin_ia32_movntps512", IX86_BUILTIN_MOVNTPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movntv8df, "__builtin_ia32_movntpd512", IX86_BUILTIN_MOVNTPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movntv8di, "__builtin_ia32_movntdq512", IX86_BUILTIN_MOVNTDQ512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movntdqa, "__builtin_ia32_movntdqa512", IX86_BUILTIN_MOVNTDQA512, UNKNOWN, (int) V8DI_FTYPE_PV8DI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_storedqusi512_mask", IX86_BUILTIN_STOREDQUSI512, UNKNOWN, (int) VOID_FTYPE_PINT_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_storedqudi512_mask", IX86_BUILTIN_STOREDQUDI512, UNKNOWN, (int) VOID_FTYPE_PINT64_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeupd512_mask", IX86_BUILTIN_STOREUPD512, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev8div8si2_mask_store, "__builtin_ia32_pmovusqd512mem_mask", IX86_BUILTIN_PMOVUSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev8div8si2_mask_store, "__builtin_ia32_pmovsqd512mem_mask", IX86_BUILTIN_PMOVSQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev8div8si2_mask_store, "__builtin_ia32_pmovqd512mem_mask", IX86_BUILTIN_PMOVQD512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8SI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovusqw512mem_mask", IX86_BUILTIN_PMOVUSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovsqw512mem_mask", IX86_BUILTIN_PMOVSQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev8div8hi2_mask_store, "__builtin_ia32_pmovqw512mem_mask", IX86_BUILTIN_PMOVQW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovusdw512mem_mask", IX86_BUILTIN_PMOVUSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovsdw512mem_mask", IX86_BUILTIN_PMOVSDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev16siv16hi2_mask_store, "__builtin_ia32_pmovdw512mem_mask", IX86_BUILTIN_PMOVDW512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev8div16qi2_mask_store_2, "__builtin_ia32_pmovqb512mem_mask", IX86_BUILTIN_PMOVQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev8div16qi2_mask_store_2, "__builtin_ia32_pmovusqb512mem_mask", IX86_BUILTIN_PMOVUSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask_store_2, "__builtin_ia32_pmovsqb512mem_mask", IX86_BUILTIN_PMOVSQB512_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovusdb512mem_mask", IX86_BUILTIN_PMOVUSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovsdb512mem_mask", IX86_BUILTIN_PMOVSDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev16siv16qi2_mask_store, "__builtin_ia32_pmovdb512mem_mask", IX86_BUILTIN_PMOVDB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeups512_mask", IX86_BUILTIN_STOREUPS512, UNKNOWN, (int) VOID_FTYPE_PFLOAT_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev16sf_mask, "__builtin_ia32_storeaps512_mask", IX86_BUILTIN_STOREAPS512, UNKNOWN, (int) VOID_FTYPE_PV16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev16si_mask, "__builtin_ia32_movdqa32store512_mask", IX86_BUILTIN_MOVDQA32STORE512, UNKNOWN, (int) VOID_FTYPE_PV16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev8df_mask, "__builtin_ia32_storeapd512_mask", IX86_BUILTIN_STOREAPD512, UNKNOWN, (int) VOID_FTYPE_PV8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storev8di_mask, "__builtin_ia32_movdqa64store512_mask", IX86_BUILTIN_MOVDQA64STORE512, UNKNOWN, (int) VOID_FTYPE_PV8DI_V8DI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loaddf_mask, "__builtin_ia32_loadsd_mask", IX86_BUILTIN_LOADSD_MASK, UNKNOWN, (int) V2DF_FTYPE_PCDOUBLE_V2DF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadsf_mask, "__builtin_ia32_loadss_mask", IX86_BUILTIN_LOADSS_MASK, UNKNOWN, (int) V4SF_FTYPE_PCFLOAT_V4SF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_storedf_mask, "__builtin_ia32_storesd_mask", IX86_BUILTIN_STORESD_MASK, UNKNOWN, (int) VOID_FTYPE_PDOUBLE_V2DF_UQI)
@@ -297,14 +297,14 @@ BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_CMPCCXADD, CODE_FOR_cmpccxadd_si,
 BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_CMPCCXADD, CODE_FOR_cmpccxadd_di, "__builtin_ia32_cmpccxadd64", IX86_BUILTIN_CMPCCXADD64, UNKNOWN, (int) LONGLONG_FTYPE_PLONGLONG_LONGLONG_LONGLONG_INT)
 
 /* AVX512BW */
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_loadv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCSHORT_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_loadv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCCHAR_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_storev32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PSHORT_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_storev64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PCHAR_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_loadv32hi_mask, "__builtin_ia32_loaddquhi512_mask", IX86_BUILTIN_LOADDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_PCSHORT_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_loadv64qi_mask, "__builtin_ia32_loaddquqi512_mask", IX86_BUILTIN_LOADDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_PCCHAR_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_storev32hi_mask, "__builtin_ia32_storedquhi512_mask", IX86_BUILTIN_STOREDQUHI512_MASK, UNKNOWN, (int) VOID_FTYPE_PSHORT_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_storev64qi_mask, "__builtin_ia32_storedquqi512_mask", IX86_BUILTIN_STOREDQUQI512_MASK, UNKNOWN, (int) VOID_FTYPE_PCHAR_V64QI_UDI)
 
 /* AVX512VP2INTERSECT */
-BDESC (0, OPTION_MASK_ISA2_AVX512VP2INTERSECT | OPTION_MASK_ISA2_EVEX512, CODE_FOR_nothing, "__builtin_ia32_2intersectd512", IX86_BUILTIN_2INTERSECTD512, UNKNOWN, (int) VOID_FTYPE_PUHI_PUHI_V16SI_V16SI)
-BDESC (0, OPTION_MASK_ISA2_AVX512VP2INTERSECT | OPTION_MASK_ISA2_EVEX512, CODE_FOR_nothing, "__builtin_ia32_2intersectq512", IX86_BUILTIN_2INTERSECTQ512, UNKNOWN, (int) VOID_FTYPE_PUQI_PUQI_V8DI_V8DI)
+BDESC (0, OPTION_MASK_ISA2_AVX512VP2INTERSECT, CODE_FOR_nothing, "__builtin_ia32_2intersectd512", IX86_BUILTIN_2INTERSECTD512, UNKNOWN, (int) VOID_FTYPE_PUHI_PUHI_V16SI_V16SI)
+BDESC (0, OPTION_MASK_ISA2_AVX512VP2INTERSECT, CODE_FOR_nothing, "__builtin_ia32_2intersectq512", IX86_BUILTIN_2INTERSECTQ512, UNKNOWN, (int) VOID_FTYPE_PUQI_PUQI_V8DI_V8DI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512VP2INTERSECT, CODE_FOR_nothing, "__builtin_ia32_2intersectd256", IX86_BUILTIN_2INTERSECTD256, UNKNOWN, (int) VOID_FTYPE_PUQI_PUQI_V8SI_V8SI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512VP2INTERSECT, CODE_FOR_nothing, "__builtin_ia32_2intersectq256", IX86_BUILTIN_2INTERSECTQ256, UNKNOWN, (int) VOID_FTYPE_PUQI_PUQI_V4DI_V4DI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512VP2INTERSECT, CODE_FOR_nothing, "__builtin_ia32_2intersectd128", IX86_BUILTIN_2INTERSECTD128, UNKNOWN, (int) VOID_FTYPE_PUQI_PUQI_V4SI_V4SI)
@@ -411,9 +411,9 @@ BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl
 BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_ss_truncatev16hiv16qi2_mask_store, "__builtin_ia32_pmovswb256mem_mask", IX86_BUILTIN_PMOVSWB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16HI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_us_truncatev8hiv8qi2_mask_store_2, "__builtin_ia32_pmovuswb128mem_mask", IX86_BUILTIN_PMOVUSWB128_MEM, UNKNOWN, (int) VOID_FTYPE_PUDI_V8HI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512BW | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_us_truncatev16hiv16qi2_mask_store, "__builtin_ia32_pmovuswb256mem_mask", IX86_BUILTIN_PMOVUSWB256_MEM, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16HI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovuswb512mem_mask", IX86_BUILTIN_PMOVUSWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovswb512mem_mask", IX86_BUILTIN_PMOVSWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovwb512mem_mask", IX86_BUILTIN_PMOVWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovuswb512mem_mask", IX86_BUILTIN_PMOVUSWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovswb512mem_mask", IX86_BUILTIN_PMOVSWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask_store, "__builtin_ia32_pmovwb512mem_mask", IX86_BUILTIN_PMOVWB512_MEM, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32HI_USI)
 
 /* AVX512FP16 */
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_loadhf_mask, "__builtin_ia32_loadsh_mask", IX86_BUILTIN_LOADSH_MASK, UNKNOWN, (int) V8HF_FTYPE_PCFLOAT16_V8HF_UQI)
@@ -434,17 +434,17 @@ BDESC (OPTION_MASK_ISA_PKU, 0, CODE_FOR_rdpkru,  "__builtin_ia32_rdpkru", IX86_B
 BDESC (OPTION_MASK_ISA_PKU, 0, CODE_FOR_wrpkru,  "__builtin_ia32_wrpkru", IX86_BUILTIN_WRPKRU, UNKNOWN, (int) VOID_FTYPE_UNSIGNED)
 
 /* VBMI2 */
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_compressstorev64qi_mask, "__builtin_ia32_compressstoreuqi512_mask", IX86_BUILTIN_PCOMPRESSBSTORE512, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_compressstorev32hi_mask, "__builtin_ia32_compressstoreuhi512_mask", IX86_BUILTIN_PCOMPRESSWSTORE512, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_compressstorev64qi_mask, "__builtin_ia32_compressstoreuqi512_mask", IX86_BUILTIN_PCOMPRESSBSTORE512, UNKNOWN, (int) VOID_FTYPE_PV64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_compressstorev32hi_mask, "__builtin_ia32_compressstoreuhi512_mask", IX86_BUILTIN_PCOMPRESSWSTORE512, UNKNOWN, (int) VOID_FTYPE_PV32HI_V32HI_USI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressstorev32qi_mask, "__builtin_ia32_compressstoreuqi256_mask", IX86_BUILTIN_PCOMPRESSBSTORE256, UNKNOWN, (int) VOID_FTYPE_PV32QI_V32QI_USI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressstorev16qi_mask, "__builtin_ia32_compressstoreuqi128_mask", IX86_BUILTIN_PCOMPRESSBSTORE128, UNKNOWN, (int) VOID_FTYPE_PV16QI_V16QI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressstorev16hi_mask, "__builtin_ia32_compressstoreuhi256_mask", IX86_BUILTIN_PCOMPRESSWSTORE256, UNKNOWN, (int) VOID_FTYPE_PV16HI_V16HI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressstorev8hi_mask, "__builtin_ia32_compressstoreuhi128_mask", IX86_BUILTIN_PCOMPRESSWSTORE128, UNKNOWN, (int) VOID_FTYPE_PV8HI_V8HI_UQI)
 
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv64qi_mask, "__builtin_ia32_expandloadqi512_mask", IX86_BUILTIN_PEXPANDBLOAD512, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv64qi_maskz, "__builtin_ia32_expandloadqi512_maskz", IX86_BUILTIN_PEXPANDBLOAD512Z, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv32hi_mask, "__builtin_ia32_expandloadhi512_mask", IX86_BUILTIN_PEXPANDWLOAD512, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv32hi_maskz, "__builtin_ia32_expandloadhi512_maskz", IX86_BUILTIN_PEXPANDWLOAD512Z, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv64qi_mask, "__builtin_ia32_expandloadqi512_mask", IX86_BUILTIN_PEXPANDBLOAD512, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv64qi_maskz, "__builtin_ia32_expandloadqi512_maskz", IX86_BUILTIN_PEXPANDBLOAD512Z, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv32hi_mask, "__builtin_ia32_expandloadhi512_mask", IX86_BUILTIN_PEXPANDWLOAD512, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv32hi_maskz, "__builtin_ia32_expandloadhi512_maskz", IX86_BUILTIN_PEXPANDWLOAD512Z, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_USI)
 
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv32qi_mask, "__builtin_ia32_expandloadqi256_mask", IX86_BUILTIN_PEXPANDBLOAD256, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_USI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv32qi_maskz, "__builtin_ia32_expandloadqi256_maskz", IX86_BUILTIN_PEXPANDBLOAD256Z, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_USI)
@@ -505,6 +505,24 @@ BDESC (0, OPTION_MASK_ISA2_WIDEKL, CODE_FOR_nothing, "__builtin_ia32_aesencwide2
 BDESC (0, OPTION_MASK_ISA2_PREFETCHI, CODE_FOR_prefetchi, "__builtin_ia32_prefetchi", IX86_BUILTIN_PREFETCHI, UNKNOWN, (int) VOID_FTYPE_PCVOID_INT)
 BDESC (0, 0, CODE_FOR_nothing, "__builtin_ia32_prefetch", IX86_BUILTIN_PREFETCH, UNKNOWN, (int) VOID_FTYPE_PCVOID_INT_INT_INT)
 
+/* MOVRS */
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_MOVRS, CODE_FOR_movrsqi, "__builtin_ia32_movrsqi", IX86_BUILTIN_MOVRSQI, UNKNOWN, (int) CHAR_FTYPE_PCCHAR)
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_MOVRS, CODE_FOR_movrshi, "__builtin_ia32_movrshi", IX86_BUILTIN_MOVRSHI, UNKNOWN, (int) SHORT_FTYPE_PCSHORT)
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_MOVRS, CODE_FOR_movrssi, "__builtin_ia32_movrssi", IX86_BUILTIN_MOVRSSI, UNKNOWN, (int) INT_FTYPE_PCINT)
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_MOVRS, CODE_FOR_movrsdi, "__builtin_ia32_movrsdi", IX86_BUILTIN_MOVRSDI, UNKNOWN, (int) INT64_FTYPE_PCINT64)
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_MOVRS | OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vmovrsbv64qi_mask, "__builtin_ia32_vmovrsb512_mask", IX86_BUILTIN_VMOVRSB_512, UNKNOWN, (int) V64QI_FTYPE_PCV64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_MOVRS | OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vmovrsdv16si_mask, "__builtin_ia32_vmovrsd512_mask", IX86_BUILTIN_VMOVRSD_512, UNKNOWN, (int) V16SI_FTYPE_PCV16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_MOVRS | OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vmovrsqv8di_mask, "__builtin_ia32_vmovrsq512_mask", IX86_BUILTIN_VMOVRSQ_512, UNKNOWN, (int) V8DI_FTYPE_PCV8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_MOVRS | OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vmovrswv32hi_mask, "__builtin_ia32_vmovrsw512_mask", IX86_BUILTIN_VMOVRSW_512, UNKNOWN, (int) V32HI_FTYPE_PCV32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_MOVRS | OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vmovrsbv32qi_mask, "__builtin_ia32_vmovrsb256_mask", IX86_BUILTIN_VMOVRSB_256, UNKNOWN, (int) V32QI_FTYPE_PCV32QI_V32QI_USI)
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_MOVRS | OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vmovrsdv8si_mask, "__builtin_ia32_vmovrsd256_mask", IX86_BUILTIN_VMOVRSD_256, UNKNOWN, (int) V8SI_FTYPE_PCV8SI_V8SI_UQI)
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_MOVRS | OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vmovrsqv4di_mask, "__builtin_ia32_vmovrsq256_mask", IX86_BUILTIN_VMOVRSQ_256, UNKNOWN, (int) V4DI_FTYPE_PCV4DI_V4DI_UQI)
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_MOVRS | OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vmovrswv16hi_mask, "__builtin_ia32_vmovrsw256_mask", IX86_BUILTIN_VMOVRSW_256, UNKNOWN, (int) V16HI_FTYPE_PCV16HI_V16HI_UHI)
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_MOVRS | OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vmovrsbv16qi_mask, "__builtin_ia32_vmovrsb128_mask", IX86_BUILTIN_VMOVRSB_128, UNKNOWN, (int) V16QI_FTYPE_PCV16QI_V16QI_UHI)
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_MOVRS | OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vmovrsdv4si_mask, "__builtin_ia32_vmovrsd128_mask", IX86_BUILTIN_VMOVRSD_128, UNKNOWN, (int) V4SI_FTYPE_PCV4SI_V4SI_UQI)
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_MOVRS | OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vmovrsqv2di_mask, "__builtin_ia32_vmovrsq128_mask", IX86_BUILTIN_VMOVRSQ_128, UNKNOWN, (int) V2DI_FTYPE_PCV2DI_V2DI_UQI)
+BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_MOVRS | OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vmovrswv8hi_mask, "__builtin_ia32_vmovrsw128_mask", IX86_BUILTIN_VMOVRSW_128, UNKNOWN, (int) V8HI_FTYPE_PCV8HI_V8HI_UQI)
+
 BDESC_END (SPECIAL_ARGS, PURE_ARGS)
 
 /* AVX */
@@ -1366,230 +1384,230 @@ BDESC (OPTION_MASK_ISA_BMI2, 0, CODE_FOR_bmi2_pext_si3, "__builtin_ia32_pext_si"
 BDESC (OPTION_MASK_ISA_BMI2 | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_bmi2_pext_di3, "__builtin_ia32_pext_di", IX86_BUILTIN_PEXT64, UNKNOWN, (int) UINT64_FTYPE_UINT64_UINT64)
 
 /* AVX512F */
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_INT_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_INT_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vcvtps2ph512_mask_sae,  "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_floatunsv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_si512_256si, "__builtin_ia32_si512_256si", IX86_BUILTIN_SI512_SI256, UNKNOWN, (int) V16SI_FTYPE_V8SI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ps512_256ps, "__builtin_ia32_ps512_256ps", IX86_BUILTIN_PS512_PS256, UNKNOWN, (int) V16SF_FTYPE_V8SF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_pd512_256pd, "__builtin_ia32_pd512_256pd", IX86_BUILTIN_PD512_PD256, UNKNOWN, (int) V8DF_FTYPE_V4DF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_si512_si, "__builtin_ia32_si512_si", IX86_BUILTIN_SI512_SI, UNKNOWN, (int) V16SI_FTYPE_V4SI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ps512_ps, "__builtin_ia32_ps512_ps", IX86_BUILTIN_PS512_PS, UNKNOWN, (int) V16SF_FTYPE_V4SF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_pd512_pd, "__builtin_ia32_pd512_pd", IX86_BUILTIN_PD512_PD, UNKNOWN, (int) V8DF_FTYPE_V2DF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_alignv16si_mask, "__builtin_ia32_alignd512_mask", IX86_BUILTIN_ALIGND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_alignv8di_mask, "__builtin_ia32_alignq512_mask", IX86_BUILTIN_ALIGNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_blendmv16si, "__builtin_ia32_blendmd_512_mask", IX86_BUILTIN_BLENDMD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_blendmv8df, "__builtin_ia32_blendmpd_512_mask", IX86_BUILTIN_BLENDMPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_blendmv16sf, "__builtin_ia32_blendmps_512_mask", IX86_BUILTIN_BLENDMPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_blendmv8di, "__builtin_ia32_blendmq_512_mask", IX86_BUILTIN_BLENDMQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x4_512", IX86_BUILTIN_BROADCASTF32X4_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_broadcastv8df_mask, "__builtin_ia32_broadcastf64x4_512", IX86_BUILTIN_BROADCASTF64X4_512, UNKNOWN, (int) V8DF_FTYPE_V4DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_broadcastv16si_mask, "__builtin_ia32_broadcasti32x4_512", IX86_BUILTIN_BROADCASTI32X4_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_broadcastv8di_mask, "__builtin_ia32_broadcasti64x4_512", IX86_BUILTIN_BROADCASTI64X4_512, UNKNOWN, (int) V8DI_FTYPE_V4DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vec_dupv8df_mask, "__builtin_ia32_broadcastsd512", IX86_BUILTIN_BROADCASTSD512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vec_dupv16sf_mask, "__builtin_ia32_broadcastss512", IX86_BUILTIN_BROADCASTSS512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_cmpv16si3_mask, "__builtin_ia32_cmpd512_mask", IX86_BUILTIN_CMPD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_INT_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_cmpv8di3_mask, "__builtin_ia32_cmpq512_mask", IX86_BUILTIN_CMPQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_INT_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressv8df_mask, "__builtin_ia32_compressdf512_mask", IX86_BUILTIN_COMPRESSPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressv16sf_mask, "__builtin_ia32_compresssf512_mask", IX86_BUILTIN_COMPRESSPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_floatv8siv8df2_mask, "__builtin_ia32_cvtdq2pd512_mask", IX86_BUILTIN_CVTDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vcvtps2ph512_mask_sae,  "__builtin_ia32_vcvtps2ph512_mask", IX86_BUILTIN_CVTPS2PH512, UNKNOWN, (int) V16HI_FTYPE_V16SF_INT_V16HI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_floatunsv8siv8df2_mask, "__builtin_ia32_cvtudq2pd512_mask", IX86_BUILTIN_CVTUDQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SI_V8DF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_cvtusi2sd32, "__builtin_ia32_cvtusi2sd32", IX86_BUILTIN_CVTUSI2SD32, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512CD, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_UQI)
-BDESC (OPTION_MASK_ISA_AVX512CD, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask"  , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv8df_mask, "__builtin_ia32_expanddf512_mask", IX86_BUILTIN_EXPANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv8df_maskz, "__builtin_ia32_expanddf512_maskz", IX86_BUILTIN_EXPANDPD512Z, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv16sf_mask, "__builtin_ia32_expandsf512_mask", IX86_BUILTIN_EXPANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv16sf_maskz, "__builtin_ia32_expandsf512_maskz", IX86_BUILTIN_EXPANDPS512Z, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vextractf32x4_mask, "__builtin_ia32_extractf32x4_mask", IX86_BUILTIN_EXTRACTF32X4, UNKNOWN, (int) V4SF_FTYPE_V16SF_INT_V4SF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vextractf64x4_mask, "__builtin_ia32_extractf64x4_mask", IX86_BUILTIN_EXTRACTF64X4, UNKNOWN, (int) V4DF_FTYPE_V8DF_INT_V4DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vextracti32x4_mask, "__builtin_ia32_extracti32x4_mask", IX86_BUILTIN_EXTRACTI32X4, UNKNOWN, (int) V4SI_FTYPE_V16SI_INT_V4SI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vextracti64x4_mask, "__builtin_ia32_extracti64x4_mask", IX86_BUILTIN_EXTRACTI64X4, UNKNOWN, (int) V4DI_FTYPE_V8DI_INT_V4DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vinsertf32x4_mask, "__builtin_ia32_insertf32x4_mask", IX86_BUILTIN_INSERTF32X4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V4SF_INT_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vinsertf64x4_mask, "__builtin_ia32_insertf64x4_mask", IX86_BUILTIN_INSERTF64X4, UNKNOWN, (int) V8DF_FTYPE_V8DF_V4DF_INT_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vinserti32x4_mask, "__builtin_ia32_inserti32x4_mask", IX86_BUILTIN_INSERTI32X4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_INT_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vinserti64x4_mask, "__builtin_ia32_inserti64x4_mask", IX86_BUILTIN_INSERTI64X4, UNKNOWN, (int) V8DI_FTYPE_V8DI_V4DI_INT_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv8df_mask, "__builtin_ia32_movapd512_mask", IX86_BUILTIN_MOVAPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv16sf_mask, "__builtin_ia32_movaps512_mask", IX86_BUILTIN_MOVAPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movddup512_mask, "__builtin_ia32_movddup512_mask", IX86_BUILTIN_MOVDDUP512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv16si_mask, "__builtin_ia32_movdqa32_512_mask", IX86_BUILTIN_MOVDQA32_512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_loadv8di_mask, "__builtin_ia32_movdqa64_512_mask", IX86_BUILTIN_MOVDQA64_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movshdup512_mask, "__builtin_ia32_movshdup512_mask", IX86_BUILTIN_MOVSHDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movsldup512_mask, "__builtin_ia32_movsldup512_mask", IX86_BUILTIN_MOVSLDUP512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_absv16si2_mask, "__builtin_ia32_pabsd512_mask", IX86_BUILTIN_PABSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_absv8di2_mask, "__builtin_ia32_pabsq512_mask", IX86_BUILTIN_PABSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_addv16si3_mask, "__builtin_ia32_paddd512_mask", IX86_BUILTIN_PADDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_addv8di3_mask, "__builtin_ia32_paddq512_mask", IX86_BUILTIN_PADDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_andv16si3_mask, "__builtin_ia32_pandd512_mask", IX86_BUILTIN_PANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_andnotv16si3_mask, "__builtin_ia32_pandnd512_mask", IX86_BUILTIN_PANDND512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_andnotv8di3_mask, "__builtin_ia32_pandnq512_mask", IX86_BUILTIN_PANDNQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_andv8di3_mask, "__builtin_ia32_pandq512_mask", IX86_BUILTIN_PANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vec_dupv16si_mask, "__builtin_ia32_pbroadcastd512", IX86_BUILTIN_PBROADCASTD512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vec_dup_gprv16si_mask, "__builtin_ia32_pbroadcastd512_gpr_mask", IX86_BUILTIN_PBROADCASTD512_GPR, UNKNOWN, (int) V16SI_FTYPE_SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512CD, 0, CODE_FOR_avx512cd_maskb_vec_dupv8di, "__builtin_ia32_broadcastmb512", IX86_BUILTIN_PBROADCASTMB512, UNKNOWN, (int) V8DI_FTYPE_UQI)
+BDESC (OPTION_MASK_ISA_AVX512CD, 0, CODE_FOR_avx512cd_maskw_vec_dupv16si, "__builtin_ia32_broadcastmw512", IX86_BUILTIN_PBROADCASTMW512, UNKNOWN, (int) V16SI_FTYPE_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vec_dupv8di_mask, "__builtin_ia32_pbroadcastq512", IX86_BUILTIN_PBROADCASTQ512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vec_dup_gprv8di_mask, "__builtin_ia32_pbroadcastq512_gpr_mask", IX86_BUILTIN_PBROADCASTQ512_GPR, UNKNOWN, (int) V8DI_FTYPE_DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_eqv16si3_mask, "__builtin_ia32_pcmpeqd512_mask", IX86_BUILTIN_PCMPEQD512_MASK, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_eqv8di3_mask, "__builtin_ia32_pcmpeqq512_mask", IX86_BUILTIN_PCMPEQQ512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_gtv16si3_mask, "__builtin_ia32_pcmpgtd512_mask", IX86_BUILTIN_PCMPGTD512_MASK, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_gtv8di3_mask, "__builtin_ia32_pcmpgtq512_mask", IX86_BUILTIN_PCMPGTQ512_MASK, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressv16si_mask, "__builtin_ia32_compresssi512_mask", IX86_BUILTIN_PCOMPRESSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_compressv8di_mask, "__builtin_ia32_compressdi512_mask", IX86_BUILTIN_PCOMPRESSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv16si_mask, "__builtin_ia32_expandsi512_mask", IX86_BUILTIN_PEXPANDD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv16si_maskz, "__builtin_ia32_expandsi512_maskz", IX86_BUILTIN_PEXPANDD512Z, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_expandv8di_mask, "__builtin_ia32_expanddi512_mask", IX86_BUILTIN_PEXPANDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_expandv8di_maskz, "__builtin_ia32_expanddi512_maskz", IX86_BUILTIN_PEXPANDQ512Z, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_smaxv16si3_mask, "__builtin_ia32_pmaxsd512_mask", IX86_BUILTIN_PMAXSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_smaxv8di3_mask, "__builtin_ia32_pmaxsq512_mask", IX86_BUILTIN_PMAXSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_umaxv16si3_mask, "__builtin_ia32_pmaxud512_mask", IX86_BUILTIN_PMAXUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_umaxv8di3_mask, "__builtin_ia32_pmaxuq512_mask", IX86_BUILTIN_PMAXUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sminv16si3_mask, "__builtin_ia32_pminsd512_mask", IX86_BUILTIN_PMINSD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sminv8di3_mask, "__builtin_ia32_pminsq512_mask", IX86_BUILTIN_PMINSQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_uminv16si3_mask, "__builtin_ia32_pminud512_mask", IX86_BUILTIN_PMINUD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_uminv8di3_mask, "__builtin_ia32_pminuq512_mask", IX86_BUILTIN_PMINUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev16siv16qi2_mask, "__builtin_ia32_pmovdb512_mask", IX86_BUILTIN_PMOVDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev16siv16hi2_mask, "__builtin_ia32_pmovdw512_mask", IX86_BUILTIN_PMOVDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev8div16qi2_mask, "__builtin_ia32_pmovqb512_mask", IX86_BUILTIN_PMOVQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev8div8si2_mask, "__builtin_ia32_pmovqd512_mask", IX86_BUILTIN_PMOVQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_truncatev8div8hi2_mask, "__builtin_ia32_pmovqw512_mask", IX86_BUILTIN_PMOVQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev16siv16qi2_mask, "__builtin_ia32_pmovsdb512_mask", IX86_BUILTIN_PMOVSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev16siv16hi2_mask, "__builtin_ia32_pmovsdw512_mask", IX86_BUILTIN_PMOVSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev8div16qi2_mask, "__builtin_ia32_pmovsqb512_mask", IX86_BUILTIN_PMOVSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev8div8si2_mask, "__builtin_ia32_pmovsqd512_mask", IX86_BUILTIN_PMOVSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ss_truncatev8div8hi2_mask, "__builtin_ia32_pmovsqw512_mask", IX86_BUILTIN_PMOVSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sign_extendv16qiv16si2_mask, "__builtin_ia32_pmovsxbd512_mask", IX86_BUILTIN_PMOVSXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sign_extendv8qiv8di2_mask, "__builtin_ia32_pmovsxbq512_mask", IX86_BUILTIN_PMOVSXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sign_extendv8siv8di2_mask, "__builtin_ia32_pmovsxdq512_mask", IX86_BUILTIN_PMOVSXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sign_extendv16hiv16si2_mask, "__builtin_ia32_pmovsxwd512_mask", IX86_BUILTIN_PMOVSXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sign_extendv8hiv8di2_mask, "__builtin_ia32_pmovsxwq512_mask", IX86_BUILTIN_PMOVSXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev16siv16qi2_mask, "__builtin_ia32_pmovusdb512_mask", IX86_BUILTIN_PMOVUSDB512, UNKNOWN, (int) V16QI_FTYPE_V16SI_V16QI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev16siv16hi2_mask, "__builtin_ia32_pmovusdw512_mask", IX86_BUILTIN_PMOVUSDW512, UNKNOWN, (int) V16HI_FTYPE_V16SI_V16HI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev8div16qi2_mask, "__builtin_ia32_pmovusqb512_mask", IX86_BUILTIN_PMOVUSQB512, UNKNOWN, (int) V16QI_FTYPE_V8DI_V16QI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev8div8si2_mask, "__builtin_ia32_pmovusqd512_mask", IX86_BUILTIN_PMOVUSQD512, UNKNOWN, (int) V8SI_FTYPE_V8DI_V8SI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_us_truncatev8div8hi2_mask, "__builtin_ia32_pmovusqw512_mask", IX86_BUILTIN_PMOVUSQW512, UNKNOWN, (int) V8HI_FTYPE_V8DI_V8HI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_zero_extendv16qiv16si2_mask, "__builtin_ia32_pmovzxbd512_mask", IX86_BUILTIN_PMOVZXBD512, UNKNOWN, (int) V16SI_FTYPE_V16QI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_zero_extendv8qiv8di2_mask, "__builtin_ia32_pmovzxbq512_mask", IX86_BUILTIN_PMOVZXBQ512, UNKNOWN, (int) V8DI_FTYPE_V16QI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_zero_extendv8siv8di2_mask, "__builtin_ia32_pmovzxdq512_mask", IX86_BUILTIN_PMOVZXDQ512, UNKNOWN, (int) V8DI_FTYPE_V8SI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_zero_extendv16hiv16si2_mask, "__builtin_ia32_pmovzxwd512_mask", IX86_BUILTIN_PMOVZXWD512, UNKNOWN, (int) V16SI_FTYPE_V16HI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_zero_extendv8hiv8di2_mask, "__builtin_ia32_pmovzxwq512_mask", IX86_BUILTIN_PMOVZXWQ512, UNKNOWN, (int) V8DI_FTYPE_V8HI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_vec_widen_smult_even_v16si_mask, "__builtin_ia32_pmuldq512_mask", IX86_BUILTIN_PMULDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_mulv16si3_mask, "__builtin_ia32_pmulld512_mask"  , IX86_BUILTIN_PMULLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_vec_widen_umult_even_v16si_mask, "__builtin_ia32_pmuludq512_mask", IX86_BUILTIN_PMULUDQ512, UNKNOWN, (int) V8DI_FTYPE_V16SI_V16SI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_iorv16si3_mask, "__builtin_ia32_pord512_mask", IX86_BUILTIN_PORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_iorv8di3_mask, "__builtin_ia32_porq512_mask", IX86_BUILTIN_PORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rolv16si_mask, "__builtin_ia32_prold512_mask", IX86_BUILTIN_PROLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rolv8di_mask, "__builtin_ia32_prolq512_mask", IX86_BUILTIN_PROLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rolvv16si_mask, "__builtin_ia32_prolvd512_mask", IX86_BUILTIN_PROLVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rolvv8di_mask, "__builtin_ia32_prolvq512_mask", IX86_BUILTIN_PROLVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rorv16si_mask, "__builtin_ia32_prord512_mask", IX86_BUILTIN_PRORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rorv8di_mask, "__builtin_ia32_prorq512_mask", IX86_BUILTIN_PRORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rorvv16si_mask, "__builtin_ia32_prorvd512_mask", IX86_BUILTIN_PRORVD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rorvv8di_mask, "__builtin_ia32_prorvq512_mask", IX86_BUILTIN_PRORVQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_pshufdv3_mask, "__builtin_ia32_pshufd512_mask", IX86_BUILTIN_PSHUFD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslld512_mask", IX86_BUILTIN_PSLLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ashlv16si3_mask, "__builtin_ia32_pslldi512_mask", IX86_BUILTIN_PSLLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllq512_mask", IX86_BUILTIN_PSLLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ashlv8di3_mask, "__builtin_ia32_psllqi512_mask", IX86_BUILTIN_PSLLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ashlvv16si_mask, "__builtin_ia32_psllv16si_mask", IX86_BUILTIN_PSLLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ashlvv8di_mask, "__builtin_ia32_psllv8di_mask", IX86_BUILTIN_PSLLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psrad512_mask", IX86_BUILTIN_PSRAD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ashrv16si3_mask, "__builtin_ia32_psradi512_mask", IX86_BUILTIN_PSRADI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraq512_mask", IX86_BUILTIN_PSRAQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_ashrv8di3_mask, "__builtin_ia32_psraqi512_mask", IX86_BUILTIN_PSRAQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ashrvv16si_mask, "__builtin_ia32_psrav16si_mask", IX86_BUILTIN_PSRAVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ashrvv8di_mask, "__builtin_ia32_psrav8di_mask", IX86_BUILTIN_PSRAVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrld512_mask", IX86_BUILTIN_PSRLD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V4SI_V16SI_UHI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_lshrv16si3_mask, "__builtin_ia32_psrldi512_mask", IX86_BUILTIN_PSRLDI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_INT_V16SI_UHI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlq512_mask", IX86_BUILTIN_PSRLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_V8DI_UQI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_lshrv8di3_mask, "__builtin_ia32_psrlqi512_mask", IX86_BUILTIN_PSRLQI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_lshrvv16si_mask, "__builtin_ia32_psrlv16si_mask", IX86_BUILTIN_PSRLVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_lshrvv8di_mask, "__builtin_ia32_psrlv8di_mask", IX86_BUILTIN_PSRLVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_subv16si3_mask, "__builtin_ia32_psubd512_mask", IX86_BUILTIN_PSUBD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_subv8di3_mask, "__builtin_ia32_psubq512_mask", IX86_BUILTIN_PSUBQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_testmv16si3_mask, "__builtin_ia32_ptestmd512", IX86_BUILTIN_PTESTMD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_testmv8di3_mask, "__builtin_ia32_ptestmq512", IX86_BUILTIN_PTESTMQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_testnmv16si3_mask, "__builtin_ia32_ptestnmd512", IX86_BUILTIN_PTESTNMD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_testnmv8di3_mask, "__builtin_ia32_ptestnmq512", IX86_BUILTIN_PTESTNMQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_interleave_highv16si_mask, "__builtin_ia32_punpckhdq512_mask", IX86_BUILTIN_PUNPCKHDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_interleave_highv8di_mask, "__builtin_ia32_punpckhqdq512_mask", IX86_BUILTIN_PUNPCKHQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_interleave_lowv16si_mask, "__builtin_ia32_punpckldq512_mask", IX86_BUILTIN_PUNPCKLDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_interleave_lowv8di_mask, "__builtin_ia32_punpcklqdq512_mask", IX86_BUILTIN_PUNPCKLQDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_xorv16si3_mask, "__builtin_ia32_pxord512_mask", IX86_BUILTIN_PXORD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_xorv8di3_mask, "__builtin_ia32_pxorq512_mask", IX86_BUILTIN_PXORQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_rcp14v8df_mask, "__builtin_ia32_rcp14pd512_mask", IX86_BUILTIN_RCP14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_rcp14v16sf_mask, "__builtin_ia32_rcp14ps512_mask", IX86_BUILTIN_RCP14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_srcp14v2df, "__builtin_ia32_rcp14sd", IX86_BUILTIN_RCP14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_srcp14v2df_mask, "__builtin_ia32_rcp14sd_mask", IX86_BUILTIN_RCP14SDMASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_srcp14v4sf, "__builtin_ia32_rcp14ss", IX86_BUILTIN_RCP14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_srcp14v4sf_mask, "__builtin_ia32_rcp14ss_mask", IX86_BUILTIN_RCP14SSMASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_rsqrt14v8df_mask, "__builtin_ia32_rsqrt14pd512_mask", IX86_BUILTIN_RSQRT14PD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_rsqrt14v16sf_mask, "__builtin_ia32_rsqrt14ps512_mask", IX86_BUILTIN_RSQRT14PS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_UHI)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_rsqrt14v2df, "__builtin_ia32_rsqrt14sd", IX86_BUILTIN_RSQRT14SD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_rsqrt14_v2df_mask, "__builtin_ia32_rsqrt14sd_mask", IX86_BUILTIN_RSQRT14SDMASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_rsqrt14v4sf, "__builtin_ia32_rsqrt14ss", IX86_BUILTIN_RSQRT14SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_rsqrt14_v4sf_mask, "__builtin_ia32_rsqrt14ss_mask", IX86_BUILTIN_RSQRT14SSMASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_INT_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_INT_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_unpcklps512_mask,  "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512CD, OPTION_MASK_ISA2_EVEX512, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512CD, OPTION_MASK_ISA2_EVEX512, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512CD, OPTION_MASK_ISA2_EVEX512, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512CD, OPTION_MASK_ISA2_EVEX512, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_shufpd512_mask, "__builtin_ia32_shufpd512_mask", IX86_BUILTIN_SHUFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_shufps512_mask, "__builtin_ia32_shufps512_mask", IX86_BUILTIN_SHUFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_shuf_f32x4_mask, "__builtin_ia32_shuf_f32x4_mask", IX86_BUILTIN_SHUF_F32x4, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_shuf_f64x2_mask, "__builtin_ia32_shuf_f64x2_mask", IX86_BUILTIN_SHUF_F64x2, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_shuf_i32x4_mask, "__builtin_ia32_shuf_i32x4_mask", IX86_BUILTIN_SHUF_I32x4, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_shuf_i64x2_mask, "__builtin_ia32_shuf_i64x2_mask", IX86_BUILTIN_SHUF_I64x2, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ucmpv16si3_mask, "__builtin_ia32_ucmpd512_mask", IX86_BUILTIN_UCMPD512, UNKNOWN, (int) UHI_FTYPE_V16SI_V16SI_INT_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_ucmpv8di3_mask, "__builtin_ia32_ucmpq512_mask", IX86_BUILTIN_UCMPQ512, UNKNOWN, (int) UQI_FTYPE_V8DI_V8DI_INT_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_unpckhpd512_mask, "__builtin_ia32_unpckhpd512_mask", IX86_BUILTIN_UNPCKHPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_unpckhps512_mask, "__builtin_ia32_unpckhps512_mask", IX86_BUILTIN_UNPCKHPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_unpcklpd512_mask, "__builtin_ia32_unpcklpd512_mask", IX86_BUILTIN_UNPCKLPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_unpcklps512_mask,  "__builtin_ia32_unpcklps512_mask", IX86_BUILTIN_UNPCKLPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512CD, 0, CODE_FOR_clzv16si2_mask, "__builtin_ia32_vplzcntd_512_mask", IX86_BUILTIN_VPCLZCNTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512CD, 0, CODE_FOR_clzv8di2_mask, "__builtin_ia32_vplzcntq_512_mask", IX86_BUILTIN_VPCLZCNTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512CD, 0, CODE_FOR_conflictv16si_mask, "__builtin_ia32_vpconflictsi_512_mask", IX86_BUILTIN_VPCONFLICTD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512CD, 0, CODE_FOR_conflictv8di_mask, "__builtin_ia32_vpconflictdi_512_mask", IX86_BUILTIN_VPCONFLICTQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_permv8df_mask, "__builtin_ia32_permdf512_mask", IX86_BUILTIN_VPERMDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_permv8di_mask, "__builtin_ia32_permdi512_mask", IX86_BUILTIN_VPERMDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermi2varv16si3_mask, "__builtin_ia32_vpermi2vard512_mask", IX86_BUILTIN_VPERMI2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermi2varv8df3_mask, "__builtin_ia32_vpermi2varpd512_mask", IX86_BUILTIN_VPERMI2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermi2varv16sf3_mask, "__builtin_ia32_vpermi2varps512_mask", IX86_BUILTIN_VPERMI2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermi2varv8di3_mask, "__builtin_ia32_vpermi2varq512_mask", IX86_BUILTIN_VPERMI2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermilv8df_mask, "__builtin_ia32_vpermilpd512_mask", IX86_BUILTIN_VPERMILPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermilv16sf_mask, "__builtin_ia32_vpermilps512_mask", IX86_BUILTIN_VPERMILPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermilvarv8df3_mask, "__builtin_ia32_vpermilvarpd512_mask", IX86_BUILTIN_VPERMILVARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermilvarv16sf3_mask, "__builtin_ia32_vpermilvarps512_mask", IX86_BUILTIN_VPERMILVARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermt2varv16si3_mask, "__builtin_ia32_vpermt2vard512_mask", IX86_BUILTIN_VPERMT2VARD512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermt2varv16si3_maskz, "__builtin_ia32_vpermt2vard512_maskz", IX86_BUILTIN_VPERMT2VARD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermt2varv8df3_mask, "__builtin_ia32_vpermt2varpd512_mask", IX86_BUILTIN_VPERMT2VARPD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermt2varv8df3_maskz, "__builtin_ia32_vpermt2varpd512_maskz", IX86_BUILTIN_VPERMT2VARPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermt2varv16sf3_mask, "__builtin_ia32_vpermt2varps512_mask", IX86_BUILTIN_VPERMT2VARPS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermt2varv16sf3_maskz, "__builtin_ia32_vpermt2varps512_maskz", IX86_BUILTIN_VPERMT2VARPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermt2varv8di3_mask, "__builtin_ia32_vpermt2varq512_mask", IX86_BUILTIN_VPERMT2VARQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vpermt2varv8di3_maskz, "__builtin_ia32_vpermt2varq512_maskz", IX86_BUILTIN_VPERMT2VARQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_permvarv8df_mask, "__builtin_ia32_permvardf512_mask", IX86_BUILTIN_VPERMVARDF512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DI_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_permvarv8di_mask, "__builtin_ia32_permvardi512_mask", IX86_BUILTIN_VPERMVARDI512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_permvarv16sf_mask, "__builtin_ia32_permvarsf512_mask", IX86_BUILTIN_VPERMVARSF512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SI_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_permvarv16si_mask, "__builtin_ia32_permvarsi512_mask", IX86_BUILTIN_VPERMVARSI512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vternlogv16si_mask, "__builtin_ia32_pternlogd512_mask", IX86_BUILTIN_VTERNLOGD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vternlogv16si_maskz, "__builtin_ia32_pternlogd512_maskz", IX86_BUILTIN_VTERNLOGD512_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_INT_UHI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vternlogv8di_mask, "__builtin_ia32_pternlogq512_mask", IX86_BUILTIN_VTERNLOGQ512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vternlogv8di_maskz, "__builtin_ia32_pternlogq512_maskz", IX86_BUILTIN_VTERNLOGQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_INT_UQI)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movdf_mask, "__builtin_ia32_movesd_mask", IX86_BUILTIN_MOVSD_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_movsf_mask, "__builtin_ia32_movess_mask", IX86_BUILTIN_MOVSS_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI)
 
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_copysignv16sf3,  "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_copysignv8df3,  "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_rndscalev32hf, "__builtin_ia32_floorph512", IX86_BUILTIN_FLOORPH512, (enum rtx_code) ROUND_FLOOR, (int) V32HF_FTYPE_V32HF_ROUND)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_rndscalev32hf, "__builtin_ia32_ceilph512", IX86_BUILTIN_CEILPH512, (enum rtx_code) ROUND_CEIL, (int) V32HF_FTYPE_V32HF_ROUND)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_rndscalev32hf, "__builtin_ia32_truncph512", IX86_BUILTIN_TRUNCPH512, (enum rtx_code) ROUND_TRUNC, (int) V32HF_FTYPE_V32HF_ROUND)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundps512, "__builtin_ia32_floorps512", IX86_BUILTIN_FLOORPS512, (enum rtx_code) ROUND_FLOOR, (int) V16SF_FTYPE_V16SF_ROUND)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundps512, "__builtin_ia32_ceilps512", IX86_BUILTIN_CEILPS512, (enum rtx_code) ROUND_CEIL, (int) V16SF_FTYPE_V16SF_ROUND)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundps512, "__builtin_ia32_truncps512", IX86_BUILTIN_TRUNCPS512, (enum rtx_code) ROUND_TRUNC, (int) V16SF_FTYPE_V16SF_ROUND)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_floorpd512", IX86_BUILTIN_FLOORPD512, (enum rtx_code) ROUND_FLOOR, (int) V8DF_FTYPE_V8DF_ROUND)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_ceilpd512", IX86_BUILTIN_CEILPD512, (enum rtx_code) ROUND_CEIL, (int) V8DF_FTYPE_V8DF_ROUND)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_truncpd512", IX86_BUILTIN_TRUNCPD512, (enum rtx_code) ROUND_TRUNC, (int) V8DF_FTYPE_V8DF_ROUND)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fix_notruncv16sfv16si, "__builtin_ia32_cvtps2dq512", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vec_pack_sfix_v8df, "__builtin_ia32_vec_pack_sfix512", IX86_BUILTIN_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_roundv16sf2_sfix, "__builtin_ia32_roundps_az_sfix512", IX86_BUILTIN_ROUNDPS_AZ_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V16SF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundps512_sfix, "__builtin_ia32_floorps_sfix512", IX86_BUILTIN_FLOORPS_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V16SF_ROUND)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundps512_sfix, "__builtin_ia32_ceilps_sfix512", IX86_BUILTIN_CEILPS_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V16SF_ROUND)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_copysignv16sf3,  "__builtin_ia32_copysignps512", IX86_BUILTIN_CPYSGNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_copysignv8df3,  "__builtin_ia32_copysignpd512", IX86_BUILTIN_CPYSGNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sqrtv8df2, "__builtin_ia32_sqrtpd512", IX86_BUILTIN_SQRTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sqrtv16sf2, "__builtin_ia32_sqrtps512", IX86_BUILTIN_SQRTPS_NR512, UNKNOWN, (int) V16SF_FTYPE_V16SF)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_rndscalev32hf, "__builtin_ia32_floorph512", IX86_BUILTIN_FLOORPH512, (enum rtx_code) ROUND_FLOOR, (int) V32HF_FTYPE_V32HF_ROUND)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_rndscalev32hf, "__builtin_ia32_ceilph512", IX86_BUILTIN_CEILPH512, (enum rtx_code) ROUND_CEIL, (int) V32HF_FTYPE_V32HF_ROUND)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_rndscalev32hf, "__builtin_ia32_truncph512", IX86_BUILTIN_TRUNCPH512, (enum rtx_code) ROUND_TRUNC, (int) V32HF_FTYPE_V32HF_ROUND)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundps512, "__builtin_ia32_floorps512", IX86_BUILTIN_FLOORPS512, (enum rtx_code) ROUND_FLOOR, (int) V16SF_FTYPE_V16SF_ROUND)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundps512, "__builtin_ia32_ceilps512", IX86_BUILTIN_CEILPS512, (enum rtx_code) ROUND_CEIL, (int) V16SF_FTYPE_V16SF_ROUND)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundps512, "__builtin_ia32_truncps512", IX86_BUILTIN_TRUNCPS512, (enum rtx_code) ROUND_TRUNC, (int) V16SF_FTYPE_V16SF_ROUND)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_floorpd512", IX86_BUILTIN_FLOORPD512, (enum rtx_code) ROUND_FLOOR, (int) V8DF_FTYPE_V8DF_ROUND)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_ceilpd512", IX86_BUILTIN_CEILPD512, (enum rtx_code) ROUND_CEIL, (int) V8DF_FTYPE_V8DF_ROUND)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundpd512, "__builtin_ia32_truncpd512", IX86_BUILTIN_TRUNCPD512, (enum rtx_code) ROUND_TRUNC, (int) V8DF_FTYPE_V8DF_ROUND)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fix_notruncv16sfv16si, "__builtin_ia32_cvtps2dq512", IX86_BUILTIN_CVTPS2DQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vec_pack_sfix_v8df, "__builtin_ia32_vec_pack_sfix512", IX86_BUILTIN_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_roundv16sf2_sfix, "__builtin_ia32_roundps_az_sfix512", IX86_BUILTIN_ROUNDPS_AZ_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V16SF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundps512_sfix, "__builtin_ia32_floorps_sfix512", IX86_BUILTIN_FLOORPS_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V16SF_ROUND)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundps512_sfix, "__builtin_ia32_ceilps_sfix512", IX86_BUILTIN_CEILPS_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V16SF_ROUND)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_roundv8df2_vec_pack_sfix, "__builtin_ia32_roundpd_az_vec_pack_sfix512", IX86_BUILTIN_ROUNDPD_AZ_VEC_PACK_SFIX512, UNKNOWN, (int) V16SI_FTYPE_V8DF_V8DF)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_floorpd_vec_pack_sfix512", IX86_BUILTIN_FLOORPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_FLOOR, (int) V16SI_FTYPE_V8DF_V8DF_ROUND)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_roundpd_vec_pack_sfix512, "__builtin_ia32_ceilpd_vec_pack_sfix512", IX86_BUILTIN_CEILPD_VEC_PACK_SFIX512, (enum rtx_code) ROUND_CEIL, (int) V16SI_FTYPE_V8DF_V8DF_ROUND)
 
 /* Mask arithmetic operations */
 BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_kashiftqi, "__builtin_ia32_kshiftliqi", IX86_BUILTIN_KSHIFTLI8, UNKNOWN, (int) UQI_FTYPE_UQI_UQI_CONST)
@@ -1668,8 +1686,10 @@ BDESC (OPTION_MASK_ISA_AVX, OPTION_MASK_ISA2_SM3, CODE_FOR_vsm3rnds2, "__builtin
 /* SM4 */
 BDESC (0, OPTION_MASK_ISA2_SM4, CODE_FOR_vsm4key4_v4si, "__builtin_ia32_vsm4key4128", IX86_BUILTIN_VSM4KEY4128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI)
 BDESC (0, OPTION_MASK_ISA2_SM4, CODE_FOR_vsm4key4_v8si, "__builtin_ia32_vsm4key4256", IX86_BUILTIN_VSM4KEY4256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI)
+BDESC (0, OPTION_MASK_ISA2_SM4 | OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vsm4key4_v16si, "__builtin_ia32_vsm4key4512", IX86_BUILTIN_VSM4KEY4512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI)
 BDESC (0, OPTION_MASK_ISA2_SM4, CODE_FOR_vsm4rnds4_v4si, "__builtin_ia32_vsm4rnds4128", IX86_BUILTIN_VSM4RNDS4128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI)
 BDESC (0, OPTION_MASK_ISA2_SM4, CODE_FOR_vsm4rnds4_v8si, "__builtin_ia32_vsm4rnds4256", IX86_BUILTIN_VSM4RNDS4256, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI)
+BDESC (0, OPTION_MASK_ISA2_SM4 | OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vsm4rnds4_v16si, "__builtin_ia32_vsm4rnds4512", IX86_BUILTIN_VSM4RNDS4512, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI)
 
 /* SHA512 */
 BDESC (0, OPTION_MASK_ISA2_SHA512, CODE_FOR_vsha512msg1, "__builtin_ia32_vsha512msg1", IX86_BUILTIN_VSHA512MSG1, UNKNOWN, (int) V4DI_FTYPE_V4DI_V2DI)
@@ -2413,136 +2433,136 @@ BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_cmpv2df3_mask, "__builtin_
 BDESC (OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_cmpv4sf3_mask, "__builtin_ia32_cmpps128_mask", IX86_BUILTIN_CMPPS128_MASK, UNKNOWN, (int) UQI_FTYPE_V4SF_V4SF_INT_UQI)
 
 /* AVX512DQ.  */
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_UQI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) UHI_FTYPE_V16SI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) UQI_FTYPE_V8DI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_UHI)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_broadcastv16sf_mask, "__builtin_ia32_broadcastf32x2_512_mask", IX86_BUILTIN_BROADCASTF32x2_512, UNKNOWN, (int) V16SF_FTYPE_V4SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_broadcastv16si_mask, "__builtin_ia32_broadcasti32x2_512_mask", IX86_BUILTIN_BROADCASTI32x2_512, UNKNOWN, (int) V16SI_FTYPE_V4SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_broadcastv8df_mask_1, "__builtin_ia32_broadcastf64x2_512_mask", IX86_BUILTIN_BROADCASTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V2DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_broadcastv8di_mask_1, "__builtin_ia32_broadcasti64x2_512_mask", IX86_BUILTIN_BROADCASTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V2DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_broadcastv16sf_mask_1, "__builtin_ia32_broadcastf32x8_512_mask", IX86_BUILTIN_BROADCASTF32X8_512, UNKNOWN, (int) V16SF_FTYPE_V8SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_broadcastv16si_mask_1, "__builtin_ia32_broadcasti32x8_512_mask", IX86_BUILTIN_BROADCASTI32X8_512, UNKNOWN, (int) V16SI_FTYPE_V8SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vextractf64x2_mask, "__builtin_ia32_extractf64x2_512_mask", IX86_BUILTIN_EXTRACTF64X2_512, UNKNOWN, (int) V2DF_FTYPE_V8DF_INT_V2DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vextractf32x8_mask, "__builtin_ia32_extractf32x8_mask", IX86_BUILTIN_EXTRACTF32X8, UNKNOWN, (int) V8SF_FTYPE_V16SF_INT_V8SF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vextracti64x2_mask, "__builtin_ia32_extracti64x2_512_mask", IX86_BUILTIN_EXTRACTI64X2_512, UNKNOWN, (int) V2DI_FTYPE_V8DI_INT_V2DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vextracti32x8_mask, "__builtin_ia32_extracti32x8_mask", IX86_BUILTIN_EXTRACTI32X8, UNKNOWN, (int) V8SI_FTYPE_V16SI_INT_V8SI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_reducepv8df_mask, "__builtin_ia32_reducepd512_mask", IX86_BUILTIN_REDUCEPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_reducepv16sf_mask, "__builtin_ia32_reduceps512_mask", IX86_BUILTIN_REDUCEPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_mulv8di3_mask, "__builtin_ia32_pmullq512_mask", IX86_BUILTIN_PMULLQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_xorv8df3_mask, "__builtin_ia32_xorpd512_mask", IX86_BUILTIN_XORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_xorv16sf3_mask, "__builtin_ia32_xorps512_mask", IX86_BUILTIN_XORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_iorv8df3_mask, "__builtin_ia32_orpd512_mask", IX86_BUILTIN_ORPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_iorv16sf3_mask, "__builtin_ia32_orps512_mask", IX86_BUILTIN_ORPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_andv8df3_mask, "__builtin_ia32_andpd512_mask", IX86_BUILTIN_ANDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_andv16sf3_mask, "__builtin_ia32_andps512_mask", IX86_BUILTIN_ANDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512f_andnotv8df3_mask, "__builtin_ia32_andnpd512_mask", IX86_BUILTIN_ANDNPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512f_andnotv16sf3_mask, "__builtin_ia32_andnps512_mask", IX86_BUILTIN_ANDNPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vinsertf32x8_mask, "__builtin_ia32_insertf32x8_mask", IX86_BUILTIN_INSERTF32X8, UNKNOWN, (int) V16SF_FTYPE_V16SF_V8SF_INT_V16SF_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vinserti32x8_mask, "__builtin_ia32_inserti32x8_mask", IX86_BUILTIN_INSERTI32X8, UNKNOWN, (int) V16SI_FTYPE_V16SI_V8SI_INT_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vinsertf64x2_mask, "__builtin_ia32_insertf64x2_512_mask", IX86_BUILTIN_INSERTF64X2_512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V2DF_INT_V8DF_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_vinserti64x2_mask, "__builtin_ia32_inserti64x2_512_mask", IX86_BUILTIN_INSERTI64X2_512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V2DI_INT_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_fpclassv8df_mask, "__builtin_ia32_fpclasspd512_mask", IX86_BUILTIN_FPCLASSPD512, UNKNOWN, (int) QI_FTYPE_V8DF_INT_UQI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_fpclassv16sf_mask, "__builtin_ia32_fpclassps512_mask", IX86_BUILTIN_FPCLASSPS512, UNKNOWN, (int) HI_FTYPE_V16SF_INT_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512f_cvtd2maskv16si, "__builtin_ia32_cvtd2mask512", IX86_BUILTIN_CVTD2MASK512, UNKNOWN, (int) UHI_FTYPE_V16SI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512f_cvtq2maskv8di, "__builtin_ia32_cvtq2mask512", IX86_BUILTIN_CVTQ2MASK512, UNKNOWN, (int) UQI_FTYPE_V8DI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512f_cvtmask2dv16si, "__builtin_ia32_cvtmask2d512", IX86_BUILTIN_CVTMASK2D512, UNKNOWN, (int) V16SI_FTYPE_UHI)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512f_cvtmask2qv8di, "__builtin_ia32_cvtmask2q512", IX86_BUILTIN_CVTMASK2Q512, UNKNOWN, (int) V8DI_FTYPE_UQI)
 
 /* AVX512BW.  */
 BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_kunpcksi, "__builtin_ia32_kunpcksi", IX86_BUILTIN_KUNPCKWD, UNKNOWN, (int) USI_FTYPE_USI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) UDI_FTYPE_UDI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask",  IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask",  IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_loadv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_loadv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask"  , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask",  IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask",  IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) UDI_FTYPE_V64QI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) USI_FTYPE_V32HI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_INT_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_INT_USI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_INT_UDI)
-BDESC (OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_INT_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_kunpckdi, "__builtin_ia32_kunpckdi", IX86_BUILTIN_KUNPCKDQ, UNKNOWN, (int) UDI_FTYPE_UDI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_packusdw_mask, "__builtin_ia32_packusdw512_mask",  IX86_BUILTIN_PACKUSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ashlv4ti3, "__builtin_ia32_pslldq512", IX86_BUILTIN_PSLLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_lshrv4ti3, "__builtin_ia32_psrldq512", IX86_BUILTIN_PSRLDQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_INT_CONVERT)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_packssdw_mask, "__builtin_ia32_packssdw512_mask",  IX86_BUILTIN_PACKSSDW512, UNKNOWN, (int) V32HI_FTYPE_V16SI_V16SI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_palignrv4ti, "__builtin_ia32_palignr512", IX86_BUILTIN_PALIGNR512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_CONVERT)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_palignrv64qi_mask, "__builtin_ia32_palignr512_mask", IX86_BUILTIN_PALIGNR512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_UDI_CONVERT)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_loadv32hi_mask, "__builtin_ia32_movdquhi512_mask", IX86_BUILTIN_MOVDQUHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_loadv64qi_mask, "__builtin_ia32_movdquqi512_mask", IX86_BUILTIN_MOVDQUQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512f_psadbw, "__builtin_ia32_psadbw512", IX86_BUILTIN_PSADBW512, UNKNOWN, (int) V8DI_FTYPE_V64QI_V64QI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_dbpsadbwv32hi_mask, "__builtin_ia32_dbpsadbw512_mask", IX86_BUILTIN_DBPSADBW512, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_vec_dupv64qi_mask, "__builtin_ia32_pbroadcastb512_mask", IX86_BUILTIN_PBROADCASTB512, UNKNOWN, (int) V64QI_FTYPE_V16QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_vec_dup_gprv64qi_mask, "__builtin_ia32_pbroadcastb512_gpr_mask", IX86_BUILTIN_PBROADCASTB512_GPR, UNKNOWN, (int) V64QI_FTYPE_QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_vec_dupv32hi_mask, "__builtin_ia32_pbroadcastw512_mask", IX86_BUILTIN_PBROADCASTW512, UNKNOWN, (int) V32HI_FTYPE_V8HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_vec_dup_gprv32hi_mask, "__builtin_ia32_pbroadcastw512_gpr_mask", IX86_BUILTIN_PBROADCASTW512_GPR, UNKNOWN, (int) V32HI_FTYPE_HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_sign_extendv32qiv32hi2_mask, "__builtin_ia32_pmovsxbw512_mask", IX86_BUILTIN_PMOVSXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_zero_extendv32qiv32hi2_mask, "__builtin_ia32_pmovzxbw512_mask", IX86_BUILTIN_PMOVZXBW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32QI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_permvarv32hi_mask, "__builtin_ia32_permvarhi512_mask", IX86_BUILTIN_VPERMVARHI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_vpermt2varv32hi3_mask, "__builtin_ia32_vpermt2varhi512_mask", IX86_BUILTIN_VPERMT2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_vpermt2varv32hi3_maskz, "__builtin_ia32_vpermt2varhi512_maskz", IX86_BUILTIN_VPERMT2VARHI512_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_vpermi2varv32hi3_mask, "__builtin_ia32_vpermi2varhi512_mask", IX86_BUILTIN_VPERMI2VARHI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_uavgv64qi3_mask, "__builtin_ia32_pavgb512_mask", IX86_BUILTIN_PAVGB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_uavgv32hi3_mask, "__builtin_ia32_pavgw512_mask", IX86_BUILTIN_PAVGW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_addv64qi3_mask, "__builtin_ia32_paddb512_mask", IX86_BUILTIN_PADDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_subv64qi3_mask, "__builtin_ia32_psubb512_mask", IX86_BUILTIN_PSUBB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_sssubv64qi3_mask, "__builtin_ia32_psubsb512_mask", IX86_BUILTIN_PSUBSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_ssaddv64qi3_mask, "__builtin_ia32_paddsb512_mask", IX86_BUILTIN_PADDSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_ussubv64qi3_mask, "__builtin_ia32_psubusb512_mask", IX86_BUILTIN_PSUBUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_usaddv64qi3_mask, "__builtin_ia32_paddusb512_mask", IX86_BUILTIN_PADDUSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_subv32hi3_mask, "__builtin_ia32_psubw512_mask", IX86_BUILTIN_PSUBW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_addv32hi3_mask, "__builtin_ia32_paddw512_mask", IX86_BUILTIN_PADDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_sssubv32hi3_mask, "__builtin_ia32_psubsw512_mask", IX86_BUILTIN_PSUBSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_ssaddv32hi3_mask, "__builtin_ia32_paddsw512_mask", IX86_BUILTIN_PADDSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_ussubv32hi3_mask, "__builtin_ia32_psubusw512_mask", IX86_BUILTIN_PSUBUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_usaddv32hi3_mask, "__builtin_ia32_paddusw512_mask", IX86_BUILTIN_PADDUSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_umaxv32hi3_mask, "__builtin_ia32_pmaxuw512_mask", IX86_BUILTIN_PMAXUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_smaxv32hi3_mask, "__builtin_ia32_pmaxsw512_mask", IX86_BUILTIN_PMAXSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_uminv32hi3_mask, "__builtin_ia32_pminuw512_mask", IX86_BUILTIN_PMINUW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_sminv32hi3_mask, "__builtin_ia32_pminsw512_mask", IX86_BUILTIN_PMINSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_umaxv64qi3_mask, "__builtin_ia32_pmaxub512_mask", IX86_BUILTIN_PMAXUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_smaxv64qi3_mask, "__builtin_ia32_pmaxsb512_mask", IX86_BUILTIN_PMAXSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_uminv64qi3_mask, "__builtin_ia32_pminub512_mask", IX86_BUILTIN_PMINUB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_sminv64qi3_mask, "__builtin_ia32_pminsb512_mask", IX86_BUILTIN_PMINSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovwb512_mask", IX86_BUILTIN_PMOVWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ss_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovswb512_mask", IX86_BUILTIN_PMOVSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_us_truncatev32hiv32qi2_mask, "__builtin_ia32_pmovuswb512_mask", IX86_BUILTIN_PMOVUSWB512, UNKNOWN, (int) V32QI_FTYPE_V32HI_V32QI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_umulhrswv32hi3_mask, "__builtin_ia32_pmulhrsw512_mask", IX86_BUILTIN_PMULHRSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_umulv32hi3_highpart_mask, "__builtin_ia32_pmulhuw512_mask" , IX86_BUILTIN_PMULHUW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_smulv32hi3_highpart_mask, "__builtin_ia32_pmulhw512_mask"  , IX86_BUILTIN_PMULHW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_mulv32hi3_mask, "__builtin_ia32_pmullw512_mask", IX86_BUILTIN_PMULLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllwi512_mask", IX86_BUILTIN_PSLLWI512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_ashlv32hi3_mask, "__builtin_ia32_psllw512_mask", IX86_BUILTIN_PSLLW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_packsswb_mask, "__builtin_ia32_packsswb512_mask",  IX86_BUILTIN_PACKSSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_packuswb_mask, "__builtin_ia32_packuswb512_mask",  IX86_BUILTIN_PACKUSWB512, UNKNOWN, (int) V64QI_FTYPE_V32HI_V32HI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ashrvv32hi_mask, "__builtin_ia32_psrav32hi_mask", IX86_BUILTIN_PSRAVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_pmaddubsw512v32hi_mask, "__builtin_ia32_pmaddubsw512_mask", IX86_BUILTIN_PMADDUBSW512_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_pmaddwd512v32hi_mask, "__builtin_ia32_pmaddwd512_mask", IX86_BUILTIN_PMADDWD512_MASK, UNKNOWN, (int) V16SI_FTYPE_V32HI_V32HI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_lshrvv32hi_mask, "__builtin_ia32_psrlv32hi_mask", IX86_BUILTIN_PSRLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_interleave_highv64qi_mask, "__builtin_ia32_punpckhbw512_mask", IX86_BUILTIN_PUNPCKHBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_interleave_highv32hi_mask, "__builtin_ia32_punpckhwd512_mask", IX86_BUILTIN_PUNPCKHWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_interleave_lowv64qi_mask, "__builtin_ia32_punpcklbw512_mask", IX86_BUILTIN_PUNPCKLBW512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_interleave_lowv32hi_mask, "__builtin_ia32_punpcklwd512_mask", IX86_BUILTIN_PUNPCKLWD512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_pshufbv64qi3_mask, "__builtin_ia32_pshufb512_mask", IX86_BUILTIN_PSHUFB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_pshufhwv32hi_mask, "__builtin_ia32_pshufhw512_mask", IX86_BUILTIN_PSHUFHW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_pshuflwv32hi_mask, "__builtin_ia32_pshuflw512_mask", IX86_BUILTIN_PSHUFLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psrawi512_mask", IX86_BUILTIN_PSRAWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_ashrv32hi3_mask, "__builtin_ia32_psraw512_mask", IX86_BUILTIN_PSRAW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlwi512_mask", IX86_BUILTIN_PSRLWI512, UNKNOWN, (int) V32HI_FTYPE_V32HI_INT_V32HI_USI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_lshrv32hi3_mask, "__builtin_ia32_psrlw512_mask", IX86_BUILTIN_PSRLW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V8HI_V32HI_USI_COUNT)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_cvtb2maskv64qi, "__builtin_ia32_cvtb2mask512", IX86_BUILTIN_CVTB2MASK512, UNKNOWN, (int) UDI_FTYPE_V64QI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_cvtw2maskv32hi, "__builtin_ia32_cvtw2mask512", IX86_BUILTIN_CVTW2MASK512, UNKNOWN, (int) USI_FTYPE_V32HI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_cvtmask2bv64qi, "__builtin_ia32_cvtmask2b512", IX86_BUILTIN_CVTMASK2B512, UNKNOWN, (int) V64QI_FTYPE_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_cvtmask2wv32hi, "__builtin_ia32_cvtmask2w512", IX86_BUILTIN_CVTMASK2W512, UNKNOWN, (int) V32HI_FTYPE_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_eqv64qi3_mask, "__builtin_ia32_pcmpeqb512_mask", IX86_BUILTIN_PCMPEQB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_eqv32hi3_mask, "__builtin_ia32_pcmpeqw512_mask", IX86_BUILTIN_PCMPEQW512_MASK, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_gtv64qi3_mask, "__builtin_ia32_pcmpgtb512_mask", IX86_BUILTIN_PCMPGTB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_gtv32hi3_mask, "__builtin_ia32_pcmpgtw512_mask", IX86_BUILTIN_PCMPGTW512_MASK, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_testmv64qi3_mask, "__builtin_ia32_ptestmb512", IX86_BUILTIN_PTESTMB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_testmv32hi3_mask, "__builtin_ia32_ptestmw512", IX86_BUILTIN_PTESTMW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_testnmv64qi3_mask, "__builtin_ia32_ptestnmb512", IX86_BUILTIN_PTESTNMB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_testnmv32hi3_mask, "__builtin_ia32_ptestnmw512", IX86_BUILTIN_PTESTNMW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ashlvv32hi_mask, "__builtin_ia32_psllv32hi_mask", IX86_BUILTIN_PSLLVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_absv64qi2_mask, "__builtin_ia32_pabsb512_mask", IX86_BUILTIN_PABSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_absv32hi2_mask, "__builtin_ia32_pabsw512_mask", IX86_BUILTIN_PABSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_blendmv32hi, "__builtin_ia32_blendmw_512_mask", IX86_BUILTIN_BLENDMW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_blendmv64qi, "__builtin_ia32_blendmb_512_mask", IX86_BUILTIN_BLENDMB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_cmpv64qi3_mask, "__builtin_ia32_cmpb512_mask", IX86_BUILTIN_CMPB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_INT_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_cmpv32hi3_mask, "__builtin_ia32_cmpw512_mask", IX86_BUILTIN_CMPW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_INT_USI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ucmpv64qi3_mask, "__builtin_ia32_ucmpb512_mask", IX86_BUILTIN_UCMPB512, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_INT_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_avx512bw_ucmpv32hi3_mask, "__builtin_ia32_ucmpw512_mask", IX86_BUILTIN_UCMPW512, UNKNOWN, (int) USI_FTYPE_V32HI_V32HI_INT_USI)
 
 /* AVX512IFMA */
-BDESC (OPTION_MASK_ISA_AVX512IFMA, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpmadd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512IFMA, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpmadd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512IFMA, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpmadd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512IFMA, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpmadd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512IFMA, 0, CODE_FOR_vpmadd52luqv8di_mask, "__builtin_ia32_vpmadd52luq512_mask", IX86_BUILTIN_VPMADD52LUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512IFMA, 0, CODE_FOR_vpmadd52luqv8di_maskz, "__builtin_ia32_vpmadd52luq512_maskz", IX86_BUILTIN_VPMADD52LUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512IFMA, 0, CODE_FOR_vpmadd52huqv8di_mask, "__builtin_ia32_vpmadd52huq512_mask", IX86_BUILTIN_VPMADD52HUQ512, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512IFMA, 0, CODE_FOR_vpmadd52huqv8di_maskz, "__builtin_ia32_vpmadd52huq512_maskz", IX86_BUILTIN_VPMADD52HUQ512_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmadd52luqv4di_mask, "__builtin_ia32_vpmadd52luq256_mask", IX86_BUILTIN_VPMADD52LUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmadd52luqv4di_maskz, "__builtin_ia32_vpmadd52luq256_maskz", IX86_BUILTIN_VPMADD52LUQ256_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmadd52huqv4di_mask, "__builtin_ia32_vpmadd52huq256_mask", IX86_BUILTIN_VPMADD52HUQ256, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI)
@@ -2557,13 +2577,13 @@ BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_A
 BDESC (OPTION_MASK_ISA_AVX512IFMA | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXIFMA, CODE_FOR_vpmadd52huqv2di, "__builtin_ia32_vpmadd52huq128", IX86_BUINTIN_VPMADD52HUQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI)
 
 /* AVX512VBMI */
-BDESC (OPTION_MASK_ISA_AVX512VBMI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI, 0, CODE_FOR_vpmultishiftqbv64qi_mask, "__builtin_ia32_vpmultishiftqb512_mask", IX86_BUILTIN_VPMULTISHIFTQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmultishiftqbv32qi_mask, "__builtin_ia32_vpmultishiftqb256_mask", IX86_BUILTIN_VPMULTISHIFTQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpmultishiftqbv16qi_mask, "__builtin_ia32_vpmultishiftqb128_mask", IX86_BUILTIN_VPMULTISHIFTQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_permvarv64qi_mask, "__builtin_ia32_permvarqi512_mask", IX86_BUILTIN_VPERMVARQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vpermt2varv64qi3_mask, "__builtin_ia32_vpermt2varqi512_mask", IX86_BUILTIN_VPERMT2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vpermt2varv64qi3_maskz, "__builtin_ia32_vpermt2varqi512_maskz", IX86_BUILTIN_VPERMT2VARQI512_MASKZ, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_vpermi2varv64qi3_mask, "__builtin_ia32_vpermi2varqi512_mask", IX86_BUILTIN_VPERMI2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI, 0, CODE_FOR_avx512bw_permvarv64qi_mask, "__builtin_ia32_permvarqi512_mask", IX86_BUILTIN_VPERMVARQI512_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI, 0, CODE_FOR_avx512bw_vpermt2varv64qi3_mask, "__builtin_ia32_vpermt2varqi512_mask", IX86_BUILTIN_VPERMT2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI, 0, CODE_FOR_avx512bw_vpermt2varv64qi3_maskz, "__builtin_ia32_vpermt2varqi512_maskz", IX86_BUILTIN_VPERMT2VARQI512_MASKZ, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI, 0, CODE_FOR_avx512bw_vpermi2varv64qi3_mask, "__builtin_ia32_vpermi2varqi512_mask", IX86_BUILTIN_VPERMI2VARQI512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_permvarv32qi_mask, "__builtin_ia32_permvarqi256_mask", IX86_BUILTIN_VPERMVARQI256_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_permvarv16qi_mask, "__builtin_ia32_permvarqi128_mask", IX86_BUILTIN_VPERMVARQI128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_vpermt2varv32qi3_mask, "__builtin_ia32_vpermt2varqi256_mask", IX86_BUILTIN_VPERMT2VARQI256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI)
@@ -2574,16 +2594,16 @@ BDESC (OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512
 BDESC (OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_vpermi2varv16qi3_mask, "__builtin_ia32_vpermi2varqi128_mask", IX86_BUILTIN_VPERMI2VARQI128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_V16QI_UHI)
 
 /* VBMI2 */
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_compressv64qi_mask, "__builtin_ia32_compressqi512_mask", IX86_BUILTIN_PCOMPRESSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_compressv32hi_mask, "__builtin_ia32_compresshi512_mask", IX86_BUILTIN_PCOMPRESSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_compressv64qi_mask, "__builtin_ia32_compressqi512_mask", IX86_BUILTIN_PCOMPRESSB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_compressv32hi_mask, "__builtin_ia32_compresshi512_mask", IX86_BUILTIN_PCOMPRESSW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressv32qi_mask, "__builtin_ia32_compressqi256_mask", IX86_BUILTIN_PCOMPRESSB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressv16qi_mask, "__builtin_ia32_compressqi128_mask", IX86_BUILTIN_PCOMPRESSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressv16hi_mask, "__builtin_ia32_compresshi256_mask", IX86_BUILTIN_PCOMPRESSW256, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_compressv8hi_mask, "__builtin_ia32_compresshi128_mask", IX86_BUILTIN_PCOMPRESSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv64qi_mask, "__builtin_ia32_expandqi512_mask", IX86_BUILTIN_PEXPANDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv64qi_maskz, "__builtin_ia32_expandqi512_maskz", IX86_BUILTIN_PEXPANDB512Z, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv32hi_mask, "__builtin_ia32_expandhi512_mask", IX86_BUILTIN_PEXPANDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_expandv32hi_maskz, "__builtin_ia32_expandhi512_maskz", IX86_BUILTIN_PEXPANDW512Z, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv64qi_mask, "__builtin_ia32_expandqi512_mask", IX86_BUILTIN_PEXPANDB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv64qi_maskz, "__builtin_ia32_expandqi512_maskz", IX86_BUILTIN_PEXPANDB512Z, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv32hi_mask, "__builtin_ia32_expandhi512_mask", IX86_BUILTIN_PEXPANDW512, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_expandv32hi_maskz, "__builtin_ia32_expandhi512_maskz", IX86_BUILTIN_PEXPANDW512Z, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv32qi_mask, "__builtin_ia32_expandqi256_mask", IX86_BUILTIN_PEXPANDB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv32qi_maskz, "__builtin_ia32_expandqi256_maskz", IX86_BUILTIN_PEXPANDB256Z, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv16qi_mask, "__builtin_ia32_expandqi128_mask", IX86_BUILTIN_PEXPANDB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI)
@@ -2592,64 +2612,64 @@ BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expan
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv16hi_maskz, "__builtin_ia32_expandhi256_maskz", IX86_BUILTIN_PEXPANDW256Z, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv8hi_mask, "__builtin_ia32_expandhi128_mask", IX86_BUILTIN_PEXPANDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_expandv8hi_maskz, "__builtin_ia32_expandhi128_maskz", IX86_BUILTIN_PEXPANDW128Z, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrd_v32hi, "__builtin_ia32_vpshrd_v32hi", IX86_BUILTIN_VPSHRDV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrd_v32hi_mask, "__builtin_ia32_vpshrd_v32hi_mask", IX86_BUILTIN_VPSHRDV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT_V32HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrd_v32hi, "__builtin_ia32_vpshrd_v32hi", IX86_BUILTIN_VPSHRDV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrd_v32hi_mask, "__builtin_ia32_vpshrd_v32hi_mask", IX86_BUILTIN_VPSHRDV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT_V32HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v16hi, "__builtin_ia32_vpshrd_v16hi", IX86_BUILTIN_VPSHRDV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v16hi_mask, "__builtin_ia32_vpshrd_v16hi_mask", IX86_BUILTIN_VPSHRDV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT_V16HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v8hi, "__builtin_ia32_vpshrd_v8hi", IX86_BUILTIN_VPSHRDV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v8hi_mask, "__builtin_ia32_vpshrd_v8hi_mask", IX86_BUILTIN_VPSHRDV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT_V8HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrd_v16si, "__builtin_ia32_vpshrd_v16si", IX86_BUILTIN_VPSHRDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrd_v16si_mask, "__builtin_ia32_vpshrd_v16si_mask", IX86_BUILTIN_VPSHRDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrd_v16si, "__builtin_ia32_vpshrd_v16si", IX86_BUILTIN_VPSHRDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrd_v16si_mask, "__builtin_ia32_vpshrd_v16si_mask", IX86_BUILTIN_VPSHRDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_INT)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v8si, "__builtin_ia32_vpshrd_v8si", IX86_BUILTIN_VPSHRDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v8si_mask, "__builtin_ia32_vpshrd_v8si_mask", IX86_BUILTIN_VPSHRDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_INT)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v4si, "__builtin_ia32_vpshrd_v4si", IX86_BUILTIN_VPSHRDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v4si_mask, "__builtin_ia32_vpshrd_v4si_mask", IX86_BUILTIN_VPSHRDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrd_v8di, "__builtin_ia32_vpshrd_v8di", IX86_BUILTIN_VPSHRDV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrd_v8di_mask, "__builtin_ia32_vpshrd_v8di_mask", IX86_BUILTIN_VPSHRDV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrd_v8di, "__builtin_ia32_vpshrd_v8di", IX86_BUILTIN_VPSHRDV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrd_v8di_mask, "__builtin_ia32_vpshrd_v8di_mask", IX86_BUILTIN_VPSHRDV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_INT)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v4di, "__builtin_ia32_vpshrd_v4di", IX86_BUILTIN_VPSHRDV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v4di_mask, "__builtin_ia32_vpshrd_v4di_mask", IX86_BUILTIN_VPSHRDV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_INT)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v2di, "__builtin_ia32_vpshrd_v2di", IX86_BUILTIN_VPSHRDV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrd_v2di_mask, "__builtin_ia32_vpshrd_v2di_mask", IX86_BUILTIN_VPSHRDV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshld_v32hi, "__builtin_ia32_vpshld_v32hi", IX86_BUILTIN_VPSHLDV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshld_v32hi_mask, "__builtin_ia32_vpshld_v32hi_mask", IX86_BUILTIN_VPSHLDV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT_V32HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshld_v32hi, "__builtin_ia32_vpshld_v32hi", IX86_BUILTIN_VPSHLDV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshld_v32hi_mask, "__builtin_ia32_vpshld_v32hi_mask", IX86_BUILTIN_VPSHLDV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_INT_V32HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v16hi, "__builtin_ia32_vpshld_v16hi", IX86_BUILTIN_VPSHLDV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v16hi_mask, "__builtin_ia32_vpshld_v16hi_mask", IX86_BUILTIN_VPSHLDV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_INT_V16HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v8hi, "__builtin_ia32_vpshld_v8hi", IX86_BUILTIN_VPSHLDV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v8hi_mask, "__builtin_ia32_vpshld_v8hi_mask", IX86_BUILTIN_VPSHLDV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_INT_V8HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshld_v16si, "__builtin_ia32_vpshld_v16si", IX86_BUILTIN_VPSHLDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshld_v16si_mask, "__builtin_ia32_vpshld_v16si_mask", IX86_BUILTIN_VPSHLDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshld_v16si, "__builtin_ia32_vpshld_v16si", IX86_BUILTIN_VPSHLDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshld_v16si_mask, "__builtin_ia32_vpshld_v16si_mask", IX86_BUILTIN_VPSHLDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_INT_V16SI_INT)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v8si, "__builtin_ia32_vpshld_v8si", IX86_BUILTIN_VPSHLDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v8si_mask, "__builtin_ia32_vpshld_v8si_mask", IX86_BUILTIN_VPSHLDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_INT_V8SI_INT)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v4si, "__builtin_ia32_vpshld_v4si", IX86_BUILTIN_VPSHLDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v4si_mask, "__builtin_ia32_vpshld_v4si_mask", IX86_BUILTIN_VPSHLDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_INT_V4SI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshld_v8di, "__builtin_ia32_vpshld_v8di", IX86_BUILTIN_VPSHLDV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshld_v8di_mask, "__builtin_ia32_vpshld_v8di_mask", IX86_BUILTIN_VPSHLDV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshld_v8di, "__builtin_ia32_vpshld_v8di", IX86_BUILTIN_VPSHLDV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshld_v8di_mask, "__builtin_ia32_vpshld_v8di_mask", IX86_BUILTIN_VPSHLDV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT_V8DI_INT)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v4di, "__builtin_ia32_vpshld_v4di", IX86_BUILTIN_VPSHLDV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v4di_mask, "__builtin_ia32_vpshld_v4di_mask", IX86_BUILTIN_VPSHLDV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT_V4DI_INT)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v2di, "__builtin_ia32_vpshld_v2di", IX86_BUILTIN_VPSHLDV2DI, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshld_v2di_mask, "__builtin_ia32_vpshld_v2di_mask", IX86_BUILTIN_VPSHLDV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_V2DI_INT)
 
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v32hi, "__builtin_ia32_vpshrdv_v32hi", IX86_BUILTIN_VPSHRDVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v32hi_mask, "__builtin_ia32_vpshrdv_v32hi_mask", IX86_BUILTIN_VPSHRDVV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v32hi_maskz, "__builtin_ia32_vpshrdv_v32hi_maskz", IX86_BUILTIN_VPSHRDVV32HI_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v32hi, "__builtin_ia32_vpshrdv_v32hi", IX86_BUILTIN_VPSHRDVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v32hi_mask, "__builtin_ia32_vpshrdv_v32hi_mask", IX86_BUILTIN_VPSHRDVV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v32hi_maskz, "__builtin_ia32_vpshrdv_v32hi_maskz", IX86_BUILTIN_VPSHRDVV32HI_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v16hi, "__builtin_ia32_vpshrdv_v16hi", IX86_BUILTIN_VPSHRDVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v16hi_mask, "__builtin_ia32_vpshrdv_v16hi_mask", IX86_BUILTIN_VPSHRDVV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v16hi_maskz, "__builtin_ia32_vpshrdv_v16hi_maskz", IX86_BUILTIN_VPSHRDVV16HI_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8hi, "__builtin_ia32_vpshrdv_v8hi", IX86_BUILTIN_VPSHRDVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8hi_mask, "__builtin_ia32_vpshrdv_v8hi_mask", IX86_BUILTIN_VPSHRDVV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8hi_maskz, "__builtin_ia32_vpshrdv_v8hi_maskz", IX86_BUILTIN_VPSHRDVV8HI_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v16si, "__builtin_ia32_vpshrdv_v16si", IX86_BUILTIN_VPSHRDVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v16si_mask, "__builtin_ia32_vpshrdv_v16si_mask", IX86_BUILTIN_VPSHRDVV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v16si_maskz, "__builtin_ia32_vpshrdv_v16si_maskz", IX86_BUILTIN_VPSHRDVV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v16si, "__builtin_ia32_vpshrdv_v16si", IX86_BUILTIN_VPSHRDVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v16si_mask, "__builtin_ia32_vpshrdv_v16si_mask", IX86_BUILTIN_VPSHRDVV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v16si_maskz, "__builtin_ia32_vpshrdv_v16si_maskz", IX86_BUILTIN_VPSHRDVV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8si, "__builtin_ia32_vpshrdv_v8si", IX86_BUILTIN_VPSHRDVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8si_mask, "__builtin_ia32_vpshrdv_v8si_mask", IX86_BUILTIN_VPSHRDVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v8si_maskz, "__builtin_ia32_vpshrdv_v8si_maskz", IX86_BUILTIN_VPSHRDVV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4si, "__builtin_ia32_vpshrdv_v4si", IX86_BUILTIN_VPSHRDVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4si_mask, "__builtin_ia32_vpshrdv_v4si_mask", IX86_BUILTIN_VPSHRDVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4si_maskz, "__builtin_ia32_vpshrdv_v4si_maskz", IX86_BUILTIN_VPSHRDVV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v8di, "__builtin_ia32_vpshrdv_v8di", IX86_BUILTIN_VPSHRDVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v8di_mask, "__builtin_ia32_vpshrdv_v8di_mask", IX86_BUILTIN_VPSHRDVV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshrdv_v8di_maskz, "__builtin_ia32_vpshrdv_v8di_maskz", IX86_BUILTIN_VPSHRDVV8DI_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v8di, "__builtin_ia32_vpshrdv_v8di", IX86_BUILTIN_VPSHRDVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v8di_mask, "__builtin_ia32_vpshrdv_v8di_mask", IX86_BUILTIN_VPSHRDVV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshrdv_v8di_maskz, "__builtin_ia32_vpshrdv_v8di_maskz", IX86_BUILTIN_VPSHRDVV8DI_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4di, "__builtin_ia32_vpshrdv_v4di", IX86_BUILTIN_VPSHRDVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4di_mask, "__builtin_ia32_vpshrdv_v4di_mask", IX86_BUILTIN_VPSHRDVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v4di_maskz, "__builtin_ia32_vpshrdv_v4di_maskz", IX86_BUILTIN_VPSHRDVV4DI_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI)
@@ -2657,27 +2677,27 @@ BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshr
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v2di_mask, "__builtin_ia32_vpshrdv_v2di_mask", IX86_BUILTIN_VPSHRDVV2DI_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshrdv_v2di_maskz, "__builtin_ia32_vpshrdv_v2di_maskz", IX86_BUILTIN_VPSHRDVV2DI_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI)
 
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v32hi, "__builtin_ia32_vpshldv_v32hi", IX86_BUILTIN_VPSHLDVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v32hi_mask, "__builtin_ia32_vpshldv_v32hi_mask", IX86_BUILTIN_VPSHLDVV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v32hi_maskz, "__builtin_ia32_vpshldv_v32hi_maskz", IX86_BUILTIN_VPSHLDVV32HI_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v32hi, "__builtin_ia32_vpshldv_v32hi", IX86_BUILTIN_VPSHLDVV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v32hi_mask, "__builtin_ia32_vpshldv_v32hi_mask", IX86_BUILTIN_VPSHLDVV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v32hi_maskz, "__builtin_ia32_vpshldv_v32hi_maskz", IX86_BUILTIN_VPSHLDVV32HI_MASKZ, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_V32HI_USI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v16hi, "__builtin_ia32_vpshldv_v16hi", IX86_BUILTIN_VPSHLDVV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v16hi_mask, "__builtin_ia32_vpshldv_v16hi_mask", IX86_BUILTIN_VPSHLDVV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v16hi_maskz, "__builtin_ia32_vpshldv_v16hi_maskz", IX86_BUILTIN_VPSHLDVV16HI_MASKZ, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_V16HI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8hi, "__builtin_ia32_vpshldv_v8hi", IX86_BUILTIN_VPSHLDVV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8hi_mask, "__builtin_ia32_vpshldv_v8hi_mask", IX86_BUILTIN_VPSHLDVV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8hi_maskz, "__builtin_ia32_vpshldv_v8hi_maskz", IX86_BUILTIN_VPSHLDVV8HI_MASKZ, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_V8HI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v16si, "__builtin_ia32_vpshldv_v16si", IX86_BUILTIN_VPSHLDVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v16si_mask, "__builtin_ia32_vpshldv_v16si_mask", IX86_BUILTIN_VPSHLDVV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v16si_maskz, "__builtin_ia32_vpshldv_v16si_maskz", IX86_BUILTIN_VPSHLDVV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v16si, "__builtin_ia32_vpshldv_v16si", IX86_BUILTIN_VPSHLDVV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v16si_mask, "__builtin_ia32_vpshldv_v16si_mask", IX86_BUILTIN_VPSHLDVV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v16si_maskz, "__builtin_ia32_vpshldv_v16si_maskz", IX86_BUILTIN_VPSHLDVV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8si, "__builtin_ia32_vpshldv_v8si", IX86_BUILTIN_VPSHLDVV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8si_mask, "__builtin_ia32_vpshldv_v8si_mask", IX86_BUILTIN_VPSHLDVV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v8si_maskz, "__builtin_ia32_vpshldv_v8si_maskz", IX86_BUILTIN_VPSHLDVV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4si, "__builtin_ia32_vpshldv_v4si", IX86_BUILTIN_VPSHLDVV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4si_mask, "__builtin_ia32_vpshldv_v4si_mask", IX86_BUILTIN_VPSHLDVV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4si_maskz, "__builtin_ia32_vpshldv_v4si_maskz", IX86_BUILTIN_VPSHLDVV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v8di, "__builtin_ia32_vpshldv_v8di", IX86_BUILTIN_VPSHLDVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v8di_mask, "__builtin_ia32_vpshldv_v8di_mask", IX86_BUILTIN_VPSHLDVV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
-BDESC (OPTION_MASK_ISA_AVX512VBMI2, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpshldv_v8di_maskz, "__builtin_ia32_vpshldv_v8di_maskz", IX86_BUILTIN_VPSHLDVV8DI_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v8di, "__builtin_ia32_vpshldv_v8di", IX86_BUILTIN_VPSHLDVV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v8di_mask, "__builtin_ia32_vpshldv_v8di_mask", IX86_BUILTIN_VPSHLDVV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VBMI2, 0, CODE_FOR_vpshldv_v8di_maskz, "__builtin_ia32_vpshldv_v8di_maskz", IX86_BUILTIN_VPSHLDVV8DI_MASKZ, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_V8DI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4di, "__builtin_ia32_vpshldv_v4di", IX86_BUILTIN_VPSHLDVV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4di_mask, "__builtin_ia32_vpshldv_v4di_mask", IX86_BUILTIN_VPSHLDVV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v4di_maskz, "__builtin_ia32_vpshldv_v4di_maskz", IX86_BUILTIN_VPSHLDVV4DI_MASKZ, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_V4DI_UQI)
@@ -2686,20 +2706,20 @@ BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshl
 BDESC (OPTION_MASK_ISA_AVX512VBMI2 | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpshldv_v2di_maskz, "__builtin_ia32_vpshldv_v2di_maskz", IX86_BUILTIN_VPSHLDVV2DI_MASKZ, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_V2DI_UQI)
 
 /* GFNI */
-BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vgf2p8affineinvqb_v64qi, "__builtin_ia32_vgf2p8affineinvqb_v64qi", IX86_BUILTIN_VGF2P8AFFINEINVQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT)
-BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vgf2p8affineinvqb_v64qi_mask, "__builtin_ia32_vgf2p8affineinvqb_v64qi_mask", IX86_BUILTIN_VGF2P8AFFINEINVQB512MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_vgf2p8affineinvqb_v64qi, "__builtin_ia32_vgf2p8affineinvqb_v64qi", IX86_BUILTIN_VGF2P8AFFINEINVQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT)
+BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vgf2p8affineinvqb_v64qi_mask, "__builtin_ia32_vgf2p8affineinvqb_v64qi_mask", IX86_BUILTIN_VGF2P8AFFINEINVQB512MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT_V64QI_UDI)
 BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX, 0, CODE_FOR_vgf2p8affineinvqb_v32qi, "__builtin_ia32_vgf2p8affineinvqb_v32qi", IX86_BUILTIN_VGF2P8AFFINEINVQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT)
 BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vgf2p8affineinvqb_v32qi_mask, "__builtin_ia32_vgf2p8affineinvqb_v32qi_mask", IX86_BUILTIN_VGF2P8AFFINEINVQB256MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT_V32QI_USI)
 BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_vgf2p8affineinvqb_v16qi, "__builtin_ia32_vgf2p8affineinvqb_v16qi", IX86_BUILTIN_VGF2P8AFFINEINVQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT)
 BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vgf2p8affineinvqb_v16qi_mask, "__builtin_ia32_vgf2p8affineinvqb_v16qi_mask", IX86_BUILTIN_VGF2P8AFFINEINVQB128MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT_V16QI_UHI)
-BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vgf2p8affineqb_v64qi, "__builtin_ia32_vgf2p8affineqb_v64qi", IX86_BUILTIN_VGF2P8AFFINEQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT)
-BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vgf2p8affineqb_v64qi_mask, "__builtin_ia32_vgf2p8affineqb_v64qi_mask", IX86_BUILTIN_VGF2P8AFFINEQB512MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_vgf2p8affineqb_v64qi, "__builtin_ia32_vgf2p8affineqb_v64qi", IX86_BUILTIN_VGF2P8AFFINEQB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT)
+BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vgf2p8affineqb_v64qi_mask, "__builtin_ia32_vgf2p8affineqb_v64qi_mask", IX86_BUILTIN_VGF2P8AFFINEQB512MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT_V64QI_UDI)
 BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX, 0, CODE_FOR_vgf2p8affineqb_v32qi, "__builtin_ia32_vgf2p8affineqb_v32qi", IX86_BUILTIN_VGF2P8AFFINEQB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT)
 BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vgf2p8affineqb_v32qi_mask, "__builtin_ia32_vgf2p8affineqb_v32qi_mask", IX86_BUILTIN_VGF2P8AFFINEQB256MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_INT_V32QI_USI)
 BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_vgf2p8affineqb_v16qi, "__builtin_ia32_vgf2p8affineqb_v16qi", IX86_BUILTIN_VGF2P8AFFINEQB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT)
 BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vgf2p8affineqb_v16qi_mask, "__builtin_ia32_vgf2p8affineqb_v16qi_mask", IX86_BUILTIN_VGF2P8AFFINEQB128MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_INT_V16QI_UHI)
-BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vgf2p8mulb_v64qi, "__builtin_ia32_vgf2p8mulb_v64qi", IX86_BUILTIN_VGF2P8MULB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI)
-BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512BW, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vgf2p8mulb_v64qi_mask, "__builtin_ia32_vgf2p8mulb_v64qi_mask", IX86_BUILTIN_VGF2P8MULB512MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_vgf2p8mulb_v64qi, "__builtin_ia32_vgf2p8mulb_v64qi", IX86_BUILTIN_VGF2P8MULB512, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI)
+BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vgf2p8mulb_v64qi_mask, "__builtin_ia32_vgf2p8mulb_v64qi_mask", IX86_BUILTIN_VGF2P8MULB512MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_V64QI_UDI)
 BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX, 0, CODE_FOR_vgf2p8mulb_v32qi, "__builtin_ia32_vgf2p8mulb_v32qi", IX86_BUILTIN_VGF2P8MULB256, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI)
 BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512BW, 0, CODE_FOR_vgf2p8mulb_v32qi_mask, "__builtin_ia32_vgf2p8mulb_v32qi_mask", IX86_BUILTIN_VGF2P8MULB256MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_V32QI_USI)
 BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_SSE2, 0, CODE_FOR_vgf2p8mulb_v16qi, "__builtin_ia32_vgf2p8mulb_v16qi", IX86_BUILTIN_VGF2P8MULB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI)
@@ -2707,9 +2727,9 @@ BDESC (OPTION_MASK_ISA_GFNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vgf2p8mulb_v
 
 /* AVX512_VNNI */
 
-BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpbusd_v16si, "__builtin_ia32_vpdpbusd_v16si", IX86_BUILTIN_VPDPBUSDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpbusd_v16si_mask, "__builtin_ia32_vpdpbusd_v16si_mask", IX86_BUILTIN_VPDPBUSDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpbusd_v16si_maskz, "__builtin_ia32_vpdpbusd_v16si_maskz", IX86_BUILTIN_VPDPBUSDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusd_v16si, "__builtin_ia32_vpdpbusd_v16si", IX86_BUILTIN_VPDPBUSDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusd_v16si_mask, "__builtin_ia32_vpdpbusd_v16si_mask", IX86_BUILTIN_VPDPBUSDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusd_v16si_maskz, "__builtin_ia32_vpdpbusd_v16si_maskz", IX86_BUILTIN_VPDPBUSDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXVNNI, CODE_FOR_vpdpbusd_v8si, "__builtin_ia32_vpdpbusd_v8si", IX86_BUILTIN_VPDPBUSDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
 BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v8si_mask, "__builtin_ia32_vpdpbusd_v8si_mask", IX86_BUILTIN_VPDPBUSDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v8si_maskz, "__builtin_ia32_vpdpbusd_v8si_maskz", IX86_BUILTIN_VPDPBUSDV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
@@ -2717,9 +2737,9 @@ BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_A
 BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v4si_mask, "__builtin_ia32_vpdpbusd_v4si_mask", IX86_BUILTIN_VPDPBUSDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusd_v4si_maskz, "__builtin_ia32_vpdpbusd_v4si_maskz", IX86_BUILTIN_VPDPBUSDV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
 
-BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpbusds_v16si, "__builtin_ia32_vpdpbusds_v16si", IX86_BUILTIN_VPDPBUSDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpbusds_v16si_mask, "__builtin_ia32_vpdpbusds_v16si_mask", IX86_BUILTIN_VPDPBUSDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpbusds_v16si_maskz, "__builtin_ia32_vpdpbusds_v16si_maskz", IX86_BUILTIN_VPDPBUSDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusds_v16si, "__builtin_ia32_vpdpbusds_v16si", IX86_BUILTIN_VPDPBUSDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusds_v16si_mask, "__builtin_ia32_vpdpbusds_v16si_mask", IX86_BUILTIN_VPDPBUSDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpbusds_v16si_maskz, "__builtin_ia32_vpdpbusds_v16si_maskz", IX86_BUILTIN_VPDPBUSDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXVNNI, CODE_FOR_vpdpbusds_v8si, "__builtin_ia32_vpdpbusds_v8si", IX86_BUILTIN_VPDPBUSDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
 BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v8si_mask, "__builtin_ia32_vpdpbusds_v8si_mask", IX86_BUILTIN_VPDPBUSDSV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v8si_maskz, "__builtin_ia32_vpdpbusds_v8si_maskz", IX86_BUILTIN_VPDPBUSDSV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
@@ -2727,9 +2747,9 @@ BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_A
 BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v4si_mask, "__builtin_ia32_vpdpbusds_v4si_mask", IX86_BUILTIN_VPDPBUSDSV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpbusds_v4si_maskz, "__builtin_ia32_vpdpbusds_v4si_maskz", IX86_BUILTIN_VPDPBUSDSV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
 
-BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpwssd_v16si, "__builtin_ia32_vpdpwssd_v16si", IX86_BUILTIN_VPDPWSSDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpwssd_v16si_mask, "__builtin_ia32_vpdpwssd_v16si_mask", IX86_BUILTIN_VPDPWSSDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpwssd_v16si_maskz, "__builtin_ia32_vpdpwssd_v16si_maskz", IX86_BUILTIN_VPDPWSSDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssd_v16si, "__builtin_ia32_vpdpwssd_v16si", IX86_BUILTIN_VPDPWSSDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssd_v16si_mask, "__builtin_ia32_vpdpwssd_v16si_mask", IX86_BUILTIN_VPDPWSSDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssd_v16si_maskz, "__builtin_ia32_vpdpwssd_v16si_maskz", IX86_BUILTIN_VPDPWSSDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXVNNI, CODE_FOR_vpdpwssd_v8si, "__builtin_ia32_vpdpwssd_v8si", IX86_BUILTIN_VPDPWSSDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
 BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v8si_mask, "__builtin_ia32_vpdpwssd_v8si_mask", IX86_BUILTIN_VPDPWSSDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v8si_maskz, "__builtin_ia32_vpdpwssd_v8si_maskz", IX86_BUILTIN_VPDPWSSDV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
@@ -2737,9 +2757,9 @@ BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_A
 BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v4si_mask, "__builtin_ia32_vpdpwssd_v4si_mask", IX86_BUILTIN_VPDPWSSDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssd_v4si_maskz, "__builtin_ia32_vpdpwssd_v4si_maskz", IX86_BUILTIN_VPDPWSSDV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
 
-BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpwssds_v16si, "__builtin_ia32_vpdpwssds_v16si", IX86_BUILTIN_VPDPWSSDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpwssds_v16si_mask, "__builtin_ia32_vpdpwssds_v16si_mask", IX86_BUILTIN_VPDPWSSDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512VNNI, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpdpwssds_v16si_maskz, "__builtin_ia32_vpdpwssds_v16si_maskz", IX86_BUILTIN_VPDPWSSDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssds_v16si, "__builtin_ia32_vpdpwssds_v16si", IX86_BUILTIN_VPDPWSSDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssds_v16si_mask, "__builtin_ia32_vpdpwssds_v16si_mask", IX86_BUILTIN_VPDPWSSDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VNNI, 0, CODE_FOR_vpdpwssds_v16si_maskz, "__builtin_ia32_vpdpwssds_v16si_maskz", IX86_BUILTIN_VPDPWSSDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXVNNI, CODE_FOR_vpdpwssds_v8si, "__builtin_ia32_vpdpwssds_v8si", IX86_BUILTIN_VPDPWSSDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
 BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v8si_mask, "__builtin_ia32_vpdpwssds_v8si_mask", IX86_BUILTIN_VPDPWSSDSV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v8si_maskz, "__builtin_ia32_vpdpwssds_v8si_maskz", IX86_BUILTIN_VPDPWSSDSV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
@@ -2748,43 +2768,43 @@ BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpws
 BDESC (OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpdpwssds_v4si_maskz, "__builtin_ia32_vpdpwssds_v4si_maskz", IX86_BUILTIN_VPDPWSSDSV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
 
 /* AVXVNNIINT8 */
-BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8 | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbssd_v8si, "__builtin_ia32_vpdpbssd256", IX86_BUILTIN_VPDPBSSDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
-BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8 | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbssds_v8si, "__builtin_ia32_vpdpbssds256", IX86_BUILTIN_VPDPBSSDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
-BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8 | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbsud_v8si, "__builtin_ia32_vpdpbsud256", IX86_BUILTIN_VPDPBSUDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
-BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8 | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbsuds_v8si, "__builtin_ia32_vpdpbsuds256", IX86_BUILTIN_VPDPBSUDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
-BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8 | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbuud_v8si, "__builtin_ia32_vpdpbuud256", IX86_BUILTIN_VPDPBUUDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
-BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8 | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbuuds_v8si, "__builtin_ia32_vpdpbuuds256", IX86_BUILTIN_VPDPBUUDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
-BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8 | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbssd_v4si, "__builtin_ia32_vpdpbssd128", IX86_BUILTIN_VPDPBSSDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
-BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8 | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbssds_v4si, "__builtin_ia32_vpdpbssds128", IX86_BUILTIN_VPDPBSSDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
-BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8 | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbsud_v4si, "__builtin_ia32_vpdpbsud128", IX86_BUILTIN_VPDPBSUDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
-BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8 | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbsuds_v4si, "__builtin_ia32_vpdpbsuds128", IX86_BUILTIN_VPDPBSUDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
-BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8 | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbuud_v4si, "__builtin_ia32_vpdpbuud128", IX86_BUILTIN_VPDPBUUDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
-BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8 | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbuuds_v4si, "__builtin_ia32_vpdpbuuds128", IX86_BUILTIN_VPDPBUUDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
+BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8 | OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbssd_v8si, "__builtin_ia32_vpdpbssd256", IX86_BUILTIN_VPDPBSSDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
+BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8 | OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbssds_v8si, "__builtin_ia32_vpdpbssds256", IX86_BUILTIN_VPDPBSSDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
+BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8 | OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbsud_v8si, "__builtin_ia32_vpdpbsud256", IX86_BUILTIN_VPDPBSUDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
+BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8 | OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbsuds_v8si, "__builtin_ia32_vpdpbsuds256", IX86_BUILTIN_VPDPBSUDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
+BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8 | OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbuud_v8si, "__builtin_ia32_vpdpbuud256", IX86_BUILTIN_VPDPBUUDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
+BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8 | OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbuuds_v8si, "__builtin_ia32_vpdpbuuds256", IX86_BUILTIN_VPDPBUUDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
+BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8 | OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbssd_v4si, "__builtin_ia32_vpdpbssd128", IX86_BUILTIN_VPDPBSSDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
+BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8 | OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbssds_v4si, "__builtin_ia32_vpdpbssds128", IX86_BUILTIN_VPDPBSSDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
+BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8 | OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbsud_v4si, "__builtin_ia32_vpdpbsud128", IX86_BUILTIN_VPDPBSUDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
+BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8 | OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbsuds_v4si, "__builtin_ia32_vpdpbsuds128", IX86_BUILTIN_VPDPBSUDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
+BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8 | OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbuud_v4si, "__builtin_ia32_vpdpbuud128", IX86_BUILTIN_VPDPBUUDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
+BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT8 | OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbuuds_v4si, "__builtin_ia32_vpdpbuuds128", IX86_BUILTIN_VPDPBUUDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
 
 /* AVXVNNIINT16 */
-BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT16 | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpwusd_v8si, "__builtin_ia32_vpdpwusd256", IX86_BUILTIN_VPDPWUSDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
-BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT16 | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpwusds_v8si, "__builtin_ia32_vpdpwusds256", IX86_BUILTIN_VPDPWUSDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
-BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT16 | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpwsud_v8si, "__builtin_ia32_vpdpwsud256", IX86_BUILTIN_VPDPWSUDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
-BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT16 | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpwsuds_v8si, "__builtin_ia32_vpdpwsuds256", IX86_BUILTIN_VPDPWSUDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
-BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT16 | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpwuud_v8si, "__builtin_ia32_vpdpwuud256", IX86_BUILTIN_VPDPWUUDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
-BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT16 | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpwuuds_v8si, "__builtin_ia32_vpdpwuuds256", IX86_BUILTIN_VPDPWUUDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
-BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT16 | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpwusd_v4si, "__builtin_ia32_vpdpwusd128", IX86_BUILTIN_VPDPWUSDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
-BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT16 | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpwusds_v4si, "__builtin_ia32_vpdpwusds128", IX86_BUILTIN_VPDPWUSDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
-BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT16 | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpwsud_v4si, "__builtin_ia32_vpdpwsud128", IX86_BUILTIN_VPDPWSUDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
-BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT16 | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpwsuds_v4si, "__builtin_ia32_vpdpwsuds128", IX86_BUILTIN_VPDPWSUDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
-BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT16 | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpwuud_v4si, "__builtin_ia32_vpdpwuud128", IX86_BUILTIN_VPDPWUUDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
-BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT16 | OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpwuuds_v4si, "__builtin_ia32_vpdpwuuds128", IX86_BUILTIN_VPDPWUUDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
+BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT16 | OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwusd_v8si, "__builtin_ia32_vpdpwusd256", IX86_BUILTIN_VPDPWUSDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
+BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT16 | OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwusds_v8si, "__builtin_ia32_vpdpwusds256", IX86_BUILTIN_VPDPWUSDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
+BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT16 | OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwsud_v8si, "__builtin_ia32_vpdpwsud256", IX86_BUILTIN_VPDPWSUDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
+BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT16 | OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwsuds_v8si, "__builtin_ia32_vpdpwsuds256", IX86_BUILTIN_VPDPWSUDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
+BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT16 | OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwuud_v8si, "__builtin_ia32_vpdpwuud256", IX86_BUILTIN_VPDPWUUDV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
+BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT16 | OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwuuds_v8si, "__builtin_ia32_vpdpwuuds256", IX86_BUILTIN_VPDPWUUDSV8SI, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI)
+BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT16 | OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwusd_v4si, "__builtin_ia32_vpdpwusd128", IX86_BUILTIN_VPDPWUSDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
+BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT16 | OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwusds_v4si, "__builtin_ia32_vpdpwusds128", IX86_BUILTIN_VPDPWUSDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
+BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT16 | OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwsud_v4si, "__builtin_ia32_vpdpwsud128", IX86_BUILTIN_VPDPWSUDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
+BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT16 | OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwsuds_v4si, "__builtin_ia32_vpdpwsuds128", IX86_BUILTIN_VPDPWSUDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
+BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT16 | OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwuud_v4si, "__builtin_ia32_vpdpwuud128", IX86_BUILTIN_VPDPWUUDV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
+BDESC (0, OPTION_MASK_ISA2_AVXVNNIINT16 | OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwuuds_v4si, "__builtin_ia32_vpdpwuuds128", IX86_BUILTIN_VPDPWUUDSV4SI, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI)
 
 /* VPCLMULQDQ */
 BDESC (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpclmulqdq_v2di, "__builtin_ia32_vpclmulqdq_v2di", IX86_BUILTIN_VPCLMULQDQ2, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT)
 BDESC (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_AVX, 0, CODE_FOR_vpclmulqdq_v4di, "__builtin_ia32_vpclmulqdq_v4di", IX86_BUILTIN_VPCLMULQDQ4, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_INT)
-BDESC (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpclmulqdq_v8di, "__builtin_ia32_vpclmulqdq_v8di", IX86_BUILTIN_VPCLMULQDQ8, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT)
+BDESC (OPTION_MASK_ISA_VPCLMULQDQ | OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_vpclmulqdq_v8di, "__builtin_ia32_vpclmulqdq_v8di", IX86_BUILTIN_VPCLMULQDQ8, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_INT)
 
 /* VPOPCNTDQ */
-BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpopcountv16si, "__builtin_ia32_vpopcountd_v16si", IX86_BUILTIN_VPOPCOUNTDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI)
-BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpopcountv16si_mask, "__builtin_ia32_vpopcountd_v16si_mask", IX86_BUILTIN_VPOPCOUNTDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
-BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpopcountv8di, "__builtin_ia32_vpopcountq_v8di", IX86_BUILTIN_VPOPCOUNTQV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI)
-BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpopcountv8di_mask, "__builtin_ia32_vpopcountq_v8di_mask", IX86_BUILTIN_VPOPCOUNTQV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
+BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, 0, CODE_FOR_vpopcountv16si, "__builtin_ia32_vpopcountd_v16si", IX86_BUILTIN_VPOPCOUNTDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI)
+BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, 0, CODE_FOR_vpopcountv16si_mask, "__builtin_ia32_vpopcountd_v16si_mask", IX86_BUILTIN_VPOPCOUNTDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_UHI)
+BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, 0, CODE_FOR_vpopcountv8di, "__builtin_ia32_vpopcountq_v8di", IX86_BUILTIN_VPOPCOUNTQV8DI, UNKNOWN, (int) V8DI_FTYPE_V8DI)
+BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ, 0, CODE_FOR_vpopcountv8di_mask, "__builtin_ia32_vpopcountq_v8di_mask", IX86_BUILTIN_VPOPCOUNTQV8DI_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DI_V8DI_UQI)
 
 BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv4di, "__builtin_ia32_vpopcountq_v4di", IX86_BUILTIN_VPOPCOUNTQV4DI, UNKNOWN, (int) V4DI_FTYPE_V4DI)
 BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv4di_mask, "__builtin_ia32_vpopcountq_v4di_mask", IX86_BUILTIN_VPOPCOUNTQV4DI_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DI_V4DI_UQI)
@@ -2796,21 +2816,21 @@ BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_v
 BDESC (OPTION_MASK_ISA_AVX512VPOPCNTDQ | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv8si_mask, "__builtin_ia32_vpopcountd_v8si_mask", IX86_BUILTIN_VPOPCOUNTDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_UHI)
 
 /* BITALG */
-BDESC (OPTION_MASK_ISA_AVX512BITALG, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpopcountv64qi, "__builtin_ia32_vpopcountb_v64qi", IX86_BUILTIN_VPOPCOUNTBV64QI, UNKNOWN, (int) V64QI_FTYPE_V64QI)
-BDESC (OPTION_MASK_ISA_AVX512BITALG, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpopcountv64qi_mask, "__builtin_ia32_vpopcountb_v64qi_mask", IX86_BUILTIN_VPOPCOUNTBV64QI_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, 0, CODE_FOR_vpopcountv64qi, "__builtin_ia32_vpopcountb_v64qi", IX86_BUILTIN_VPOPCOUNTBV64QI, UNKNOWN, (int) V64QI_FTYPE_V64QI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, 0, CODE_FOR_vpopcountv64qi_mask, "__builtin_ia32_vpopcountb_v64qi_mask", IX86_BUILTIN_VPOPCOUNTBV64QI_MASK, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_UDI)
 BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv32qi, "__builtin_ia32_vpopcountb_v32qi", IX86_BUILTIN_VPOPCOUNTBV32QI, UNKNOWN, (int) V32QI_FTYPE_V32QI)
 BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv32qi_mask, "__builtin_ia32_vpopcountb_v32qi_mask", IX86_BUILTIN_VPOPCOUNTBV32QI_MASK, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI_USI)
 BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv16qi, "__builtin_ia32_vpopcountb_v16qi", IX86_BUILTIN_VPOPCOUNTBV16QI, UNKNOWN, (int) V16QI_FTYPE_V16QI)
 BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv16qi_mask, "__builtin_ia32_vpopcountb_v16qi_mask", IX86_BUILTIN_VPOPCOUNTBV16QI_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI_UHI)
 
-BDESC (OPTION_MASK_ISA_AVX512BITALG, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpopcountv32hi, "__builtin_ia32_vpopcountw_v32hi", IX86_BUILTIN_VPOPCOUNTWV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI)
-BDESC (OPTION_MASK_ISA_AVX512BITALG, OPTION_MASK_ISA2_EVEX512, CODE_FOR_vpopcountv32hi_mask, "__builtin_ia32_vpopcountw_v32hi_mask", IX86_BUILTIN_VPOPCOUNTQV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, 0, CODE_FOR_vpopcountv32hi, "__builtin_ia32_vpopcountw_v32hi", IX86_BUILTIN_VPOPCOUNTWV32HI, UNKNOWN, (int) V32HI_FTYPE_V32HI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, 0, CODE_FOR_vpopcountv32hi_mask, "__builtin_ia32_vpopcountw_v32hi_mask", IX86_BUILTIN_VPOPCOUNTQV32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HI_V32HI_USI)
 BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv16hi, "__builtin_ia32_vpopcountw_v16hi", IX86_BUILTIN_VPOPCOUNTWV16HI, UNKNOWN, (int) V16HI_FTYPE_V16HI)
 BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv16hi_mask, "__builtin_ia32_vpopcountw_v16hi_mask", IX86_BUILTIN_VPOPCOUNTQV16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HI_V16HI_UHI)
 BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv8hi, "__builtin_ia32_vpopcountw_v8hi", IX86_BUILTIN_VPOPCOUNTWV8HI, UNKNOWN, (int) V8HI_FTYPE_V8HI)
 BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_vpopcountv8hi_mask, "__builtin_ia32_vpopcountw_v8hi_mask", IX86_BUILTIN_VPOPCOUNTQV8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI_UQI)
 
-BDESC (OPTION_MASK_ISA_AVX512BITALG, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512vl_vpshufbitqmbv64qi_mask, "__builtin_ia32_vpshufbitqmb512_mask", IX86_BUILTIN_VPSHUFBITQMB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI)
+BDESC (OPTION_MASK_ISA_AVX512BITALG, 0, CODE_FOR_avx512vl_vpshufbitqmbv64qi_mask, "__builtin_ia32_vpshufbitqmb512_mask", IX86_BUILTIN_VPSHUFBITQMB512_MASK, UNKNOWN, (int) UDI_FTYPE_V64QI_V64QI_UDI)
 BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_vpshufbitqmbv32qi_mask, "__builtin_ia32_vpshufbitqmb256_mask", IX86_BUILTIN_VPSHUFBITQMB256_MASK, UNKNOWN, (int) USI_FTYPE_V32QI_V32QI_USI)
 BDESC (OPTION_MASK_ISA_AVX512BITALG | OPTION_MASK_ISA_AVX512VL, 0, CODE_FOR_avx512vl_vpshufbitqmbv16qi_mask, "__builtin_ia32_vpshufbitqmb128_mask", IX86_BUILTIN_VPSHUFBITQMB128_MASK, UNKNOWN, (int) UHI_FTYPE_V16QI_V16QI_UHI)
 
@@ -2819,40 +2839,40 @@ BDESC (0, OPTION_MASK_ISA2_RDPID, CODE_FOR_rdpid, "__builtin_ia32_rdpid", IX86_B
 
 /* VAES.  */
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdec_v16qi, "__builtin_ia32_vaesdec_v16qi", IX86_BUILTIN_VAESDEC16, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI)
-BDESC (0, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdec_v32qi, "__builtin_ia32_vaesdec_v32qi", IX86_BUILTIN_VAESDEC32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI)
-BDESC (0, OPTION_MASK_ISA2_VAES | OPTION_MASK_ISA2_EVEX512, CODE_FOR_vaesdec_v64qi, "__builtin_ia32_vaesdec_v64qi", IX86_BUILTIN_VAESDEC64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI)
+BDESC (OPTION_MASK_ISA_AVX, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdec_v32qi, "__builtin_ia32_vaesdec_v32qi", IX86_BUILTIN_VAESDEC32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI)
+BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdec_v64qi, "__builtin_ia32_vaesdec_v64qi", IX86_BUILTIN_VAESDEC64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdeclast_v16qi, "__builtin_ia32_vaesdeclast_v16qi", IX86_BUILTIN_VAESDECLAST16, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI)
-BDESC (0, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdeclast_v32qi, "__builtin_ia32_vaesdeclast_v32qi", IX86_BUILTIN_VAESDECLAST32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI)
-BDESC (0, OPTION_MASK_ISA2_VAES | OPTION_MASK_ISA2_EVEX512, CODE_FOR_vaesdeclast_v64qi, "__builtin_ia32_vaesdeclast_v64qi", IX86_BUILTIN_VAESDECLAST64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI)
+BDESC (OPTION_MASK_ISA_AVX, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdeclast_v32qi, "__builtin_ia32_vaesdeclast_v32qi", IX86_BUILTIN_VAESDECLAST32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI)
+BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesdeclast_v64qi, "__builtin_ia32_vaesdeclast_v64qi", IX86_BUILTIN_VAESDECLAST64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenc_v16qi, "__builtin_ia32_vaesenc_v16qi", IX86_BUILTIN_VAESENC16, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI)
-BDESC (0, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenc_v32qi, "__builtin_ia32_vaesenc_v32qi", IX86_BUILTIN_VAESENC32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI)
-BDESC (0, OPTION_MASK_ISA2_VAES | OPTION_MASK_ISA2_EVEX512, CODE_FOR_vaesenc_v64qi, "__builtin_ia32_vaesenc_v64qi", IX86_BUILTIN_VAESENC64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI)
+BDESC (OPTION_MASK_ISA_AVX, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenc_v32qi, "__builtin_ia32_vaesenc_v32qi", IX86_BUILTIN_VAESENC32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI)
+BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenc_v64qi, "__builtin_ia32_vaesenc_v64qi", IX86_BUILTIN_VAESENC64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenclast_v16qi, "__builtin_ia32_vaesenclast_v16qi", IX86_BUILTIN_VAESENCLAST16, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI)
-BDESC (0, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenclast_v32qi, "__builtin_ia32_vaesenclast_v32qi", IX86_BUILTIN_VAESENCLAST32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI)
-BDESC (0, OPTION_MASK_ISA2_VAES | OPTION_MASK_ISA2_EVEX512, CODE_FOR_vaesenclast_v64qi, "__builtin_ia32_vaesenclast_v64qi", IX86_BUILTIN_VAESENCLAST64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI)
+BDESC (OPTION_MASK_ISA_AVX, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenclast_v32qi, "__builtin_ia32_vaesenclast_v32qi", IX86_BUILTIN_VAESENCLAST32, UNKNOWN, (int) V32QI_FTYPE_V32QI_V32QI)
+BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_VAES, CODE_FOR_vaesenclast_v64qi, "__builtin_ia32_vaesenclast_v64qi", IX86_BUILTIN_VAESENCLAST64, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI)
 
 /* BF16 */
-BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtne2ps2bf16_v32bf, "__builtin_ia32_cvtne2ps2bf16_v32bf", IX86_BUILTIN_CVTNE2PS2BF16_V32BF, UNKNOWN, (int) V32BF_FTYPE_V16SF_V16SF)
-BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtne2ps2bf16_v32bf_mask, "__builtin_ia32_cvtne2ps2bf16_v32bf_mask", IX86_BUILTIN_CVTNE2PS2BF16_V32BF_MASK, UNKNOWN, (int) V32BF_FTYPE_V16SF_V16SF_V32BF_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtne2ps2bf16_v32bf_maskz, "__builtin_ia32_cvtne2ps2bf16_v32bf_maskz", IX86_BUILTIN_CVTNE2PS2BF16_V32BF_MASKZ, UNKNOWN, (int) V32BF_FTYPE_V16SF_V16SF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v32bf, "__builtin_ia32_cvtne2ps2bf16_v32bf", IX86_BUILTIN_CVTNE2PS2BF16_V32BF, UNKNOWN, (int) V32BF_FTYPE_V16SF_V16SF)
+BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v32bf_mask, "__builtin_ia32_cvtne2ps2bf16_v32bf_mask", IX86_BUILTIN_CVTNE2PS2BF16_V32BF_MASK, UNKNOWN, (int) V32BF_FTYPE_V16SF_V16SF_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v32bf_maskz, "__builtin_ia32_cvtne2ps2bf16_v32bf_maskz", IX86_BUILTIN_CVTNE2PS2BF16_V32BF_MASKZ, UNKNOWN, (int) V32BF_FTYPE_V16SF_V16SF_USI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v16bf, "__builtin_ia32_cvtne2ps2bf16_v16bf", IX86_BUILTIN_CVTNE2PS2BF16_V16BF, UNKNOWN, (int) V16BF_FTYPE_V8SF_V8SF)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v16bf_mask, "__builtin_ia32_cvtne2ps2bf16_v16bf_mask", IX86_BUILTIN_CVTNE2PS2BF16_V16BF_MASK, UNKNOWN, (int) V16BF_FTYPE_V8SF_V8SF_V16BF_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v16bf_maskz, "__builtin_ia32_cvtne2ps2bf16_v16bf_maskz", IX86_BUILTIN_CVTNE2PS2BF16_V16BF_MASKZ, UNKNOWN, (int) V16BF_FTYPE_V8SF_V8SF_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v8bf, "__builtin_ia32_cvtne2ps2bf16_v8bf", IX86_BUILTIN_CVTNE2PS2BF16_V8BF, UNKNOWN, (int) V8BF_FTYPE_V4SF_V4SF)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v8bf_mask, "__builtin_ia32_cvtne2ps2bf16_v8bf_mask", IX86_BUILTIN_CVTNE2PS2BF16_V8BF_MASK, UNKNOWN, (int) V8BF_FTYPE_V4SF_V4SF_V8BF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtne2ps2bf16_v8bf_maskz, "__builtin_ia32_cvtne2ps2bf16_v8bf_maskz", IX86_BUILTIN_CVTNE2PS2BF16_V8BF_MASKZ, UNKNOWN, (int) V8BF_FTYPE_V4SF_V4SF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtneps2bf16_v16sf, "__builtin_ia32_cvtneps2bf16_v16sf", IX86_BUILTIN_CVTNEPS2BF16_V16SF, UNKNOWN, (int) V16BF_FTYPE_V16SF)
-BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtneps2bf16_v16sf_mask, "__builtin_ia32_cvtneps2bf16_v16sf_mask", IX86_BUILTIN_CVTNEPS2BF16_V16SF_MASK, UNKNOWN, (int) V16BF_FTYPE_V16SF_V16BF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtneps2bf16_v16sf_maskz, "__builtin_ia32_cvtneps2bf16_v16sf_maskz", IX86_BUILTIN_CVTNE2PS2BF16_V16SF_MASKZ, UNKNOWN, (int) V16BF_FTYPE_V16SF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v16sf, "__builtin_ia32_cvtneps2bf16_v16sf", IX86_BUILTIN_CVTNEPS2BF16_V16SF, UNKNOWN, (int) V16BF_FTYPE_V16SF)
+BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v16sf_mask, "__builtin_ia32_cvtneps2bf16_v16sf_mask", IX86_BUILTIN_CVTNEPS2BF16_V16SF_MASK, UNKNOWN, (int) V16BF_FTYPE_V16SF_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v16sf_maskz, "__builtin_ia32_cvtneps2bf16_v16sf_maskz", IX86_BUILTIN_CVTNE2PS2BF16_V16SF_MASKZ, UNKNOWN, (int) V16BF_FTYPE_V16SF_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXNECONVERT | OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_vcvtneps2bf16_v8sf, "__builtin_ia32_cvtneps2bf16_v8sf", IX86_BUILTIN_CVTNEPS2BF16_V8SF, UNKNOWN, (int) V8BF_FTYPE_V8SF)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v8sf_mask, "__builtin_ia32_cvtneps2bf16_v8sf_mask", IX86_BUILTIN_CVTNEPS2BF16_V8SF_MASK, UNKNOWN, (int) V8BF_FTYPE_V8SF_V8BF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v8sf_maskz, "__builtin_ia32_cvtneps2bf16_v8sf_maskz", IX86_BUILTIN_CVTNE2PS2BF16_V8SF_MASKZ, UNKNOWN, (int) V8BF_FTYPE_V8SF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVXNECONVERT | OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_vcvtneps2bf16_v4sf, "__builtin_ia32_cvtneps2bf16_v4sf", IX86_BUILTIN_CVTNEPS2BF16_V4SF, UNKNOWN, (int) V8BF_FTYPE_V4SF)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v4sf_mask, "__builtin_ia32_cvtneps2bf16_v4sf_mask", IX86_BUILTIN_CVTNEPS2BF16_V4SF_MASK, UNKNOWN, (int) V8BF_FTYPE_V4SF_V8BF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_cvtneps2bf16_v4sf_maskz, "__builtin_ia32_cvtneps2bf16_v4sf_maskz", IX86_BUILTIN_CVTNE2PS2BF16_V4SF_MASKZ, UNKNOWN, (int) V8BF_FTYPE_V4SF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_dpbf16ps_v16sf, "__builtin_ia32_dpbf16ps_v16sf", IX86_BUILTIN_DPBF16PS_V16SF, UNKNOWN, (int) V16SF_FTYPE_V16SF_V32BF_V32BF)
-BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_dpbf16ps_v16sf_mask, "__builtin_ia32_dpbf16ps_v16sf_mask", IX86_BUILTIN_DPBF16PS_V16SF_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V32BF_V32BF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512BF16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_dpbf16ps_v16sf_maskz, "__builtin_ia32_dpbf16ps_v16sf_maskz", IX86_BUILTIN_DPBF16PS_V16SF_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V32BF_V32BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v16sf, "__builtin_ia32_dpbf16ps_v16sf", IX86_BUILTIN_DPBF16PS_V16SF, UNKNOWN, (int) V16SF_FTYPE_V16SF_V32BF_V32BF)
+BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v16sf_mask, "__builtin_ia32_dpbf16ps_v16sf_mask", IX86_BUILTIN_DPBF16PS_V16SF_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V32BF_V32BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v16sf_maskz, "__builtin_ia32_dpbf16ps_v16sf_maskz", IX86_BUILTIN_DPBF16PS_V16SF_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V32BF_V32BF_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v8sf, "__builtin_ia32_dpbf16ps_v8sf", IX86_BUILTIN_DPBF16PS_V8SF, UNKNOWN, (int) V8SF_FTYPE_V8SF_V16BF_V16BF)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v8sf_mask, "__builtin_ia32_dpbf16ps_v8sf_mask", IX86_BUILTIN_DPBF16PS_V8SF_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V16BF_V16BF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512BF16, CODE_FOR_avx512f_dpbf16ps_v8sf_maskz, "__builtin_ia32_dpbf16ps_v8sf_maskz", IX86_BUILTIN_DPBF16PS_V8SF_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V16BF_V16BF_UQI)
@@ -2865,40 +2885,40 @@ BDESC (OPTION_MASK_ISA_SSE2, 0, CODE_FOR_extendbfsf2_1, "__builtin_ia32_cvtbf2sf
 /* AVX512FP16.  */
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv8hf3_mask, "__builtin_ia32_addph128_mask", IX86_BUILTIN_ADDPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv16hf3_mask, "__builtin_ia32_addph256_mask", IX86_BUILTIN_ADDPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_addv32hf3_mask, "__builtin_ia32_addph512_mask", IX86_BUILTIN_ADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv32hf3_mask, "__builtin_ia32_addph512_mask", IX86_BUILTIN_ADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv8hf3_mask, "__builtin_ia32_subph128_mask", IX86_BUILTIN_SUBPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv16hf3_mask, "__builtin_ia32_subph256_mask", IX86_BUILTIN_SUBPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_subv32hf3_mask, "__builtin_ia32_subph512_mask", IX86_BUILTIN_SUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv32hf3_mask, "__builtin_ia32_subph512_mask", IX86_BUILTIN_SUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv8hf3_mask, "__builtin_ia32_mulph128_mask", IX86_BUILTIN_MULPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv16hf3_mask, "__builtin_ia32_mulph256_mask", IX86_BUILTIN_MULPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_mulv32hf3_mask, "__builtin_ia32_mulph512_mask", IX86_BUILTIN_MULPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv32hf3_mask, "__builtin_ia32_mulph512_mask", IX86_BUILTIN_MULPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv8hf3_mask, "__builtin_ia32_divph128_mask", IX86_BUILTIN_DIVPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv16hf3_mask, "__builtin_ia32_divph256_mask", IX86_BUILTIN_DIVPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_divv32hf3_mask, "__builtin_ia32_divph512_mask", IX86_BUILTIN_DIVPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv32hf3_mask, "__builtin_ia32_divph512_mask", IX86_BUILTIN_DIVPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmaddv8hf3_mask, "__builtin_ia32_addsh_mask", IX86_BUILTIN_ADDSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsubv8hf3_mask, "__builtin_ia32_subsh_mask", IX86_BUILTIN_SUBSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmmulv8hf3_mask, "__builtin_ia32_mulsh_mask", IX86_BUILTIN_MULSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmdivv8hf3_mask, "__builtin_ia32_divsh_mask", IX86_BUILTIN_DIVSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv8hf3_mask, "__builtin_ia32_maxph128_mask", IX86_BUILTIN_MAXPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv16hf3_mask, "__builtin_ia32_maxph256_mask", IX86_BUILTIN_MAXPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_smaxv32hf3_mask, "__builtin_ia32_maxph512_mask", IX86_BUILTIN_MAXPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv32hf3_mask, "__builtin_ia32_maxph512_mask", IX86_BUILTIN_MAXPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv8hf3_mask, "__builtin_ia32_minph128_mask", IX86_BUILTIN_MINPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv16hf3_mask, "__builtin_ia32_minph256_mask", IX86_BUILTIN_MINPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_sminv32hf3_mask, "__builtin_ia32_minph512_mask", IX86_BUILTIN_MINPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv32hf3_mask, "__builtin_ia32_minph512_mask", IX86_BUILTIN_MINPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsmaxv8hf3_mask, "__builtin_ia32_maxsh_mask", IX86_BUILTIN_MAXSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsminv8hf3_mask, "__builtin_ia32_minsh_mask", IX86_BUILTIN_MINSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_cmpv8hf3_mask, "__builtin_ia32_cmpph128_mask", IX86_BUILTIN_CMPPH128_MASK, UNKNOWN, (int) UQI_FTYPE_V8HF_V8HF_INT_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_cmpv16hf3_mask, "__builtin_ia32_cmpph256_mask", IX86_BUILTIN_CMPPH256_MASK, UNKNOWN, (int) UHI_FTYPE_V16HF_V16HF_INT_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_cmpv32hf3_mask, "__builtin_ia32_cmpph512_mask", IX86_BUILTIN_CMPPH512_MASK, UNKNOWN, (int) USI_FTYPE_V32HF_V32HF_INT_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_cmpv32hf3_mask, "__builtin_ia32_cmpph512_mask", IX86_BUILTIN_CMPPH512_MASK, UNKNOWN, (int) USI_FTYPE_V32HF_V32HF_INT_USI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_sqrtv8hf2_mask, "__builtin_ia32_sqrtph128_mask", IX86_BUILTIN_SQRTPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_sqrtv16hf2_mask, "__builtin_ia32_sqrtph256_mask", IX86_BUILTIN_SQRTPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rsqrtv8hf2_mask, "__builtin_ia32_rsqrtph128_mask", IX86_BUILTIN_RSQRTPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rsqrtv16hf2_mask, "__builtin_ia32_rsqrtph256_mask", IX86_BUILTIN_RSQRTPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_rsqrtv32hf2_mask, "__builtin_ia32_rsqrtph512_mask", IX86_BUILTIN_RSQRTPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rsqrtv32hf2_mask, "__builtin_ia32_rsqrtph512_mask", IX86_BUILTIN_RSQRTPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmrsqrtv8hf2_mask, "__builtin_ia32_rsqrtsh_mask", IX86_BUILTIN_RSQRTSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rcpv8hf2_mask, "__builtin_ia32_rcpph128_mask", IX86_BUILTIN_RCPPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rcpv16hf2_mask, "__builtin_ia32_rcpph256_mask", IX86_BUILTIN_RCPPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_rcpv32hf2_mask, "__builtin_ia32_rcpph512_mask", IX86_BUILTIN_RCPPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_rcpv32hf2_mask, "__builtin_ia32_rcpph512_mask", IX86_BUILTIN_RCPPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmrcpv8hf2_mask, "__builtin_ia32_rcpsh_mask", IX86_BUILTIN_RCPSH_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_scalefv8hf_mask, "__builtin_ia32_scalefph128_mask", IX86_BUILTIN_SCALEFPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_scalefv16hf_mask, "__builtin_ia32_scalefph256_mask", IX86_BUILTIN_SCALEFPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI)
@@ -2908,7 +2928,7 @@ BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp1
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_rndscalev16hf_mask, "__builtin_ia32_rndscaleph256_mask", IX86_BUILTIN_RNDSCALEPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_INT_V16HF_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512dq_fpclassv16hf_mask, "__builtin_ia32_fpclassph256_mask", IX86_BUILTIN_FPCLASSPH256, UNKNOWN, (int) HI_FTYPE_V16HF_INT_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512dq_fpclassv8hf_mask, "__builtin_ia32_fpclassph128_mask", IX86_BUILTIN_FPCLASSPH128, UNKNOWN, (int) QI_FTYPE_V8HF_INT_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_fpclassv32hf_mask, "__builtin_ia32_fpclassph512_mask", IX86_BUILTIN_FPCLASSPH512, UNKNOWN, (int) SI_FTYPE_V32HF_INT_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512dq_fpclassv32hf_mask, "__builtin_ia32_fpclassph512_mask", IX86_BUILTIN_FPCLASSPH512, UNKNOWN, (int) SI_FTYPE_V32HF_INT_USI)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512dq_vmfpclassv8hf_mask, "__builtin_ia32_fpclasssh_mask", IX86_BUILTIN_FPCLASSSH_MASK, UNKNOWN, (int) QI_FTYPE_V8HF_INT_UQI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_getexpv16hf_mask, "__builtin_ia32_getexpph256_mask", IX86_BUILTIN_GETEXPPH256, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI)
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_getexpv8hf_mask, "__builtin_ia32_getexpph128_mask", IX86_BUILTIN_GETEXPPH128, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_UQI)
@@ -3021,315 +3041,351 @@ BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_
 BDESC (OPTION_MASK_ISA_AVX512VL, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512vl_fmulc_v16hf_mask, "__builtin_ia32_vfmulcph256_mask", IX86_BUILTIN_VFMULCPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UQI)
 
 /* AVX10.2.  */
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpbssd_v16si, "__builtin_ia32_vpdpbssd512", IX86_BUILTIN_VPDPBSSDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpbssds_v16si, "__builtin_ia32_vpdpbssds512", IX86_BUILTIN_VPDPBSSDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpbsud_v16si, "__builtin_ia32_vpdpbsud512", IX86_BUILTIN_VPDPBSUDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpbsuds_v16si, "__builtin_ia32_vpdpbsuds512", IX86_BUILTIN_VPDPBSUDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpbuud_v16si, "__builtin_ia32_vpdpbuud512", IX86_BUILTIN_VPDPBUUDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpbuuds_v16si, "__builtin_ia32_vpdpbuuds512", IX86_BUILTIN_VPDPBUUDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpbssd_v16si_mask, "__builtin_ia32_vpdpbssd_v16si_mask", IX86_BUILTIN_VPDPBSSDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpbssd_v16si_maskz, "__builtin_ia32_vpdpbssd_v16si_maskz", IX86_BUILTIN_VPDPBSSDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpbssds_v16si_mask, "__builtin_ia32_vpdpbssds_v16si_mask", IX86_BUILTIN_VPDPBSSDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpbssds_v16si_maskz, "__builtin_ia32_vpdpbssds_v16si_maskz", IX86_BUILTIN_VPDPBSSDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpbsud_v16si_mask, "__builtin_ia32_vpdpbsud_v16si_mask", IX86_BUILTIN_VPDPBSUDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpbsud_v16si_maskz, "__builtin_ia32_vpdpbsud_v16si_maskz", IX86_BUILTIN_VPDPBSUDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpbsuds_v16si_mask, "__builtin_ia32_vpdpbsuds_v16si_mask", IX86_BUILTIN_VPDPBSUDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpbsuds_v16si_maskz, "__builtin_ia32_vpdpbsuds_v16si_maskz", IX86_BUILTIN_VPDPBSUDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpbuud_v16si_mask, "__builtin_ia32_vpdpbuud_v16si_mask", IX86_BUILTIN_VPDPBUUDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpbuud_v16si_maskz, "__builtin_ia32_vpdpbuud_v16si_maskz", IX86_BUILTIN_VPDPBUUDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpbuuds_v16si_mask, "__builtin_ia32_vpdpbuuds_v16si_mask", IX86_BUILTIN_VPDPBUUDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpbuuds_v16si_maskz, "__builtin_ia32_vpdpbuuds_v16si_maskz", IX86_BUILTIN_VPDPBUUDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbssd_v8si_mask, "__builtin_ia32_vpdpbssd_v8si_mask", IX86_BUILTIN_VPDPBSSDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbssd_v8si_maskz, "__builtin_ia32_vpdpbssd_v8si_maskz", IX86_BUILTIN_VPDPBSSDV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbssds_v8si_mask, "__builtin_ia32_vpdpbssds_v8si_mask", IX86_BUILTIN_VPDPBSSDSV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbssds_v8si_maskz, "__builtin_ia32_vpdpbssds_v8si_maskz", IX86_BUILTIN_VPDPBSSDSV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbsud_v8si_mask, "__builtin_ia32_vpdpbsud_v8si_mask", IX86_BUILTIN_VPDPBSUDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbsud_v8si_maskz, "__builtin_ia32_vpdpbsud_v8si_maskz", IX86_BUILTIN_VPDPBSUDV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbsuds_v8si_mask, "__builtin_ia32_vpdpbsuds_v8si_mask", IX86_BUILTIN_VPDPBSUDSV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbsuds_v8si_maskz, "__builtin_ia32_vpdpbsuds_v8si_maskz", IX86_BUILTIN_VPDPBSUDSV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbuud_v8si_mask, "__builtin_ia32_vpdpbuud_v8si_mask", IX86_BUILTIN_VPDPBUUDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbuud_v8si_maskz, "__builtin_ia32_vpdpbuud_v8si_maskz", IX86_BUILTIN_VPDPBUUDV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbuuds_v8si_mask, "__builtin_ia32_vpdpbuuds_v8si_mask", IX86_BUILTIN_VPDPBUUDSV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbuuds_v8si_maskz, "__builtin_ia32_vpdpbuuds_v8si_maskz", IX86_BUILTIN_VPDPBUUDSV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbssd_v4si_mask, "__builtin_ia32_vpdpbssd_v4si_mask", IX86_BUILTIN_VPDPBSSDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbssd_v4si_maskz, "__builtin_ia32_vpdpbssd_v4si_maskz", IX86_BUILTIN_VPDPBSSDV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbssds_v4si_mask, "__builtin_ia32_vpdpbssds_v4si_mask", IX86_BUILTIN_VPDPBSSDSV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbssds_v4si_maskz, "__builtin_ia32_vpdpbssds_v4si_maskz", IX86_BUILTIN_VPDPBSSDSV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbsud_v4si_mask, "__builtin_ia32_vpdpbsud_v4si_mask", IX86_BUILTIN_VPDPBSUDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbsud_v4si_maskz, "__builtin_ia32_vpdpbsud_v4si_maskz", IX86_BUILTIN_VPDPBSUDV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbsuds_v4si_mask, "__builtin_ia32_vpdpbsuds_v4si_mask", IX86_BUILTIN_VPDPBSUDSV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbsuds_v4si_maskz, "__builtin_ia32_vpdpbsuds_v4si_maskz", IX86_BUILTIN_VPDPBSUDSV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbuud_v4si_mask, "__builtin_ia32_vpdpbuud_v4si_mask", IX86_BUILTIN_VPDPBUUDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbuud_v4si_maskz, "__builtin_ia32_vpdpbuud_v4si_maskz", IX86_BUILTIN_VPDPBUUDV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbuuds_v4si_mask, "__builtin_ia32_vpdpbuuds_v4si_mask", IX86_BUILTIN_VPDPBUUDSV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpbuuds_v4si_maskz, "__builtin_ia32_vpdpbuuds_v4si_maskz", IX86_BUILTIN_VPDPBUUDSV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpwsud_v16si, "__builtin_ia32_vpdpwsud512", IX86_BUILTIN_VPDPWSUDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpwsuds_v16si, "__builtin_ia32_vpdpwsuds512", IX86_BUILTIN_VPDPWSUDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpwusd_v16si, "__builtin_ia32_vpdpwusd512", IX86_BUILTIN_VPDPWUSDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpwusds_v16si, "__builtin_ia32_vpdpwusds512", IX86_BUILTIN_VPDPWUSDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpwuud_v16si, "__builtin_ia32_vpdpwuud512", IX86_BUILTIN_VPDPWUUDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpwuuds_v16si, "__builtin_ia32_vpdpwuuds512", IX86_BUILTIN_VPDPWUUDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpwsud_v16si_mask, "__builtin_ia32_vpdpwsud_v16si_mask", IX86_BUILTIN_VPDPWSUDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpwsud_v16si_maskz, "__builtin_ia32_vpdpwsud_v16si_maskz", IX86_BUILTIN_VPDPWSUDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpwsuds_v16si_mask, "__builtin_ia32_vpdpwsuds_v16si_mask", IX86_BUILTIN_VPDPWSUDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpwsuds_v16si_maskz, "__builtin_ia32_vpdpwsuds_v16si_maskz", IX86_BUILTIN_VPDPWSUDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpwusd_v16si_mask, "__builtin_ia32_vpdpwusd_v16si_mask", IX86_BUILTIN_VPDPWUSDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpwusd_v16si_maskz, "__builtin_ia32_vpdpwusd_v16si_maskz", IX86_BUILTIN_VPDPWUSDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpwusds_v16si_mask, "__builtin_ia32_vpdpwusds_v16si_mask", IX86_BUILTIN_VPDPWUSDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpwusds_v16si_maskz, "__builtin_ia32_vpdpwusds_v16si_maskz", IX86_BUILTIN_VPDPWUSDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpwuud_v16si_mask, "__builtin_ia32_vpdpwuud_v16si_mask", IX86_BUILTIN_VPDPWUUDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpwuud_v16si_maskz, "__builtin_ia32_vpdpwuud_v16si_maskz", IX86_BUILTIN_VPDPWUUDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpwuuds_v16si_mask, "__builtin_ia32_vpdpwuuds_v16si_mask", IX86_BUILTIN_VPDPWUUDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vpdpwuuds_v16si_maskz, "__builtin_ia32_vpdpwuuds_v16si_maskz", IX86_BUILTIN_VPDPWUUDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpwsud_v8si_mask, "__builtin_ia32_vpdpwsud_v8si_mask", IX86_BUILTIN_VPDPWSUDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpwsud_v8si_maskz, "__builtin_ia32_vpdpwsud_v8si_maskz", IX86_BUILTIN_VPDPWSUDV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpwsuds_v8si_mask, "__builtin_ia32_vpdpwsuds_v8si_mask", IX86_BUILTIN_VPDPWSUDSV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpwsuds_v8si_maskz, "__builtin_ia32_vpdpwsuds_v8si_maskz", IX86_BUILTIN_VPDPWSUDSV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpwusd_v8si_mask, "__builtin_ia32_vpdpwusd_v8si_mask", IX86_BUILTIN_VPDPWUSDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpwusd_v8si_maskz, "__builtin_ia32_vpdpwusd_v8si_maskz", IX86_BUILTIN_VPDPWUSDV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpwusds_v8si_mask, "__builtin_ia32_vpdpwusds_v8si_mask", IX86_BUILTIN_VPDPWUSDSV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpwusds_v8si_maskz, "__builtin_ia32_vpdpwusds_v8si_maskz", IX86_BUILTIN_VPDPWUSDSV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpwuud_v8si_mask, "__builtin_ia32_vpdpwuud_v8si_mask", IX86_BUILTIN_VPDPWUUDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpwuud_v8si_maskz, "__builtin_ia32_vpdpwuud_v8si_maskz", IX86_BUILTIN_VPDPWUUDV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpwuuds_v8si_mask, "__builtin_ia32_vpdpwuuds_v8si_mask", IX86_BUILTIN_VPDPWUUDSV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpwuuds_v8si_maskz, "__builtin_ia32_vpdpwuuds_v8si_maskz", IX86_BUILTIN_VPDPWUUDSV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpwsud_v4si_mask, "__builtin_ia32_vpdpwsud_v4si_mask", IX86_BUILTIN_VPDPWSUDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpwsud_v4si_maskz, "__builtin_ia32_vpdpwsud_v4si_maskz", IX86_BUILTIN_VPDPWSUDV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpwsuds_v4si_mask, "__builtin_ia32_vpdpwsuds_v4si_mask", IX86_BUILTIN_VPDPWSUDSV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpwsuds_v4si_maskz, "__builtin_ia32_vpdpwsuds_v4si_maskz", IX86_BUILTIN_VPDPWSUDSV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpwusd_v4si_mask, "__builtin_ia32_vpdpwusd_v4si_mask", IX86_BUILTIN_VPDPWUSDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpwusd_v4si_maskz, "__builtin_ia32_vpdpwusd_v4si_maskz", IX86_BUILTIN_VPDPWUSDV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpwusds_v4si_mask, "__builtin_ia32_vpdpwusds_v4si_mask", IX86_BUILTIN_VPDPWUSDSV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpwusds_v4si_maskz, "__builtin_ia32_vpdpwusds_v4si_maskz", IX86_BUILTIN_VPDPWUSDSV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpwuud_v4si_mask, "__builtin_ia32_vpdpwuud_v4si_mask", IX86_BUILTIN_VPDPWUUDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpwuud_v4si_maskz, "__builtin_ia32_vpdpwuud_v4si_maskz", IX86_BUILTIN_VPDPWUUDV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpwuuds_v4si_mask, "__builtin_ia32_vpdpwuuds_v4si_mask", IX86_BUILTIN_VPDPWUUDSV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vpdpwuuds_v4si_maskz, "__builtin_ia32_vpdpwuuds_v4si_maskz", IX86_BUILTIN_VPDPWUUDSV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vdpphps_v16sf_mask, "__builtin_ia32_vdpphps512_mask", IX86_BUILTIN_VDPPHPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vdpphps_v16sf_maskz, "__builtin_ia32_vdpphps512_maskz", IX86_BUILTIN_VDPPHPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vdpphps_v8sf_mask, "__builtin_ia32_vdpphps256_mask", IX86_BUILTIN_VDPPHPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vdpphps_v8sf_maskz, "__builtin_ia32_vdpphps256_maskz", IX86_BUILTIN_VDPPHPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vdpphps_v4sf_mask, "__builtin_ia32_vdpphps128_mask", IX86_BUILTIN_VDPPHPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vdpphps_v4sf_maskz, "__builtin_ia32_vdpphps128_maskz", IX86_BUILTIN_VDPPHPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_mpsadbw, "__builtin_ia32_mpsadbw512", IX86_BUILTIN_AVX10_2_MPSADBW, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_mpsadbw_mask, "__builtin_ia32_mpsadbw512_mask", IX86_BUILTIN_VMPSADBW_V32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx2_mpsadbw_mask, "__builtin_ia32_mpsadbw256_mask", IX86_BUILTIN_VMPSADBW_V16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_INT_V16HI_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_sse4_1_mpsadbw_mask, "__builtin_ia32_mpsadbw128_mask", IX86_BUILTIN_VMPSADBW_V8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_INT_V8HI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvt2ps2phx_v8hf_mask, "__builtin_ia32_vcvt2ps2phx128_mask", IX86_BUILTIN_VCVT2PS2PHX_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V4SF_V4SF_V8HF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtbiasph2bf8v8hf, "__builtin_ia32_vcvtbiasph2bf8128", IX86_BUILTIN_VCVTBIASPH2BF8128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V8HF)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtbiasph2bf8v8hf_mask, "__builtin_ia32_vcvtbiasph2bf8128_mask", IX86_BUILTIN_VCVTBIASPH2BF8128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V8HF_V16QI_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtbiasph2bf8v16hf_mask, "__builtin_ia32_vcvtbiasph2bf8256_mask", IX86_BUILTIN_VCVTBIASPH2BF8256_MASK, UNKNOWN, (int) V16QI_FTYPE_V32QI_V16HF_V16QI_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vcvtbiasph2bf8v32hf_mask, "__builtin_ia32_vcvtbiasph2bf8512_mask", IX86_BUILTIN_VCVTBIASPH2BF8512_MASK, UNKNOWN, (int) V32QI_FTYPE_V64QI_V32HF_V32QI_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtbiasph2bf8sv8hf, "__builtin_ia32_vcvtbiasph2bf8s128", IX86_BUILTIN_VCVTBIASPH2BF8S128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V8HF)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtbiasph2bf8sv8hf_mask, "__builtin_ia32_vcvtbiasph2bf8s128_mask", IX86_BUILTIN_VCVTBIASPH2BF8S128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V8HF_V16QI_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtbiasph2bf8sv16hf_mask, "__builtin_ia32_vcvtbiasph2bf8s256_mask", IX86_BUILTIN_VCVTBIASPH2BF8S256_MASK, UNKNOWN, (int) V16QI_FTYPE_V32QI_V16HF_V16QI_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vcvtbiasph2bf8sv32hf_mask, "__builtin_ia32_vcvtbiasph2bf8s512_mask", IX86_BUILTIN_VCVTBIASPH2BF8S512_MASK, UNKNOWN, (int) V32QI_FTYPE_V64QI_V32HF_V32QI_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtbiasph2hf8v8hf, "__builtin_ia32_vcvtbiasph2hf8128", IX86_BUILTIN_VCVTBIASPH2HF8128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V8HF)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtbiasph2hf8v8hf_mask, "__builtin_ia32_vcvtbiasph2hf8128_mask", IX86_BUILTIN_VCVTBIASPH2HF8128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V8HF_V16QI_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtbiasph2hf8v16hf_mask, "__builtin_ia32_vcvtbiasph2hf8256_mask", IX86_BUILTIN_VCVTBIASPH2HF8256_MASK, UNKNOWN, (int) V16QI_FTYPE_V32QI_V16HF_V16QI_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vcvtbiasph2hf8v32hf_mask, "__builtin_ia32_vcvtbiasph2hf8512_mask", IX86_BUILTIN_VCVTBIASPH2HF8512_MASK, UNKNOWN, (int) V32QI_FTYPE_V64QI_V32HF_V32QI_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtbiasph2hf8sv8hf, "__builtin_ia32_vcvtbiasph2hf8s128", IX86_BUILTIN_VCVTBIASPH2HF8S128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V8HF)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtbiasph2hf8sv8hf_mask, "__builtin_ia32_vcvtbiasph2hf8s128_mask", IX86_BUILTIN_VCVTBIASPH2HF8S128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V8HF_V16QI_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtbiasph2hf8sv16hf_mask, "__builtin_ia32_vcvtbiasph2hf8s256_mask", IX86_BUILTIN_VCVTBIASPH2HF8S256_MASK, UNKNOWN, (int) V16QI_FTYPE_V32QI_V16HF_V16QI_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vcvtbiasph2hf8sv32hf_mask, "__builtin_ia32_vcvtbiasph2hf8s512_mask", IX86_BUILTIN_VCVTBIASPH2HF8S512_MASK, UNKNOWN, (int) V32QI_FTYPE_V64QI_V32HF_V32QI_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtne2ph2bf8v8hf_mask, "__builtin_ia32_vcvtne2ph2bf8128_mask", IX86_BUILTIN_VCVTNE2PH2BF8128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HF_V8HF_V16QI_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtne2ph2bf8v16hf_mask, "__builtin_ia32_vcvtne2ph2bf8256_mask", IX86_BUILTIN_VCVTNE2PH2BF8256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HF_V16HF_V32QI_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vcvtne2ph2bf8v32hf_mask, "__builtin_ia32_vcvtne2ph2bf8512_mask", IX86_BUILTIN_VCVTNE2PH2BF8512_MASK, UNKNOWN, (int) V64QI_FTYPE_V32HF_V32HF_V64QI_UDI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtne2ph2bf8sv8hf_mask, "__builtin_ia32_vcvtne2ph2bf8s128_mask", IX86_BUILTIN_VCVTNE2PH2BF8S128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HF_V8HF_V16QI_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtne2ph2bf8sv16hf_mask, "__builtin_ia32_vcvtne2ph2bf8s256_mask", IX86_BUILTIN_VCVTNE2PH2BF8S256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HF_V16HF_V32QI_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vcvtne2ph2bf8sv32hf_mask, "__builtin_ia32_vcvtne2ph2bf8s512_mask", IX86_BUILTIN_VCVTNE2PH2BF8S512_MASK, UNKNOWN, (int) V64QI_FTYPE_V32HF_V32HF_V64QI_UDI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtne2ph2hf8v8hf_mask, "__builtin_ia32_vcvtne2ph2hf8128_mask", IX86_BUILTIN_VCVTNE2PH2HF8128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HF_V8HF_V16QI_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtne2ph2hf8v16hf_mask, "__builtin_ia32_vcvtne2ph2hf8256_mask", IX86_BUILTIN_VCVTNE2PH2HF8256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HF_V16HF_V32QI_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vcvtne2ph2hf8v32hf_mask, "__builtin_ia32_vcvtne2ph2hf8512_mask", IX86_BUILTIN_VCVTNE2PH2HF8512_MASK, UNKNOWN, (int) V64QI_FTYPE_V32HF_V32HF_V64QI_UDI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtne2ph2hf8sv8hf_mask, "__builtin_ia32_vcvtne2ph2hf8s128_mask", IX86_BUILTIN_VCVTNE2PH2HF8S128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HF_V8HF_V16QI_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtne2ph2hf8sv16hf_mask, "__builtin_ia32_vcvtne2ph2hf8s256_mask", IX86_BUILTIN_VCVTNE2PH2HF8S256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HF_V16HF_V32QI_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vcvtne2ph2hf8sv32hf_mask, "__builtin_ia32_vcvtne2ph2hf8s512_mask", IX86_BUILTIN_VCVTNE2PH2HF8S512_MASK, UNKNOWN, (int) V64QI_FTYPE_V32HF_V32HF_V64QI_UDI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtneph2bf8v8hf_mask, "__builtin_ia32_vcvtneph2bf8128_mask", IX86_BUILTIN_VCVTNEPH2BF8128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HF_V16QI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtneph2bf8v16hf_mask, "__builtin_ia32_vcvtneph2bf8256_mask", IX86_BUILTIN_VCVTNEPH2BF8256_MASK, UNKNOWN, (int) V16QI_FTYPE_V16HF_V16QI_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vcvtneph2bf8v32hf_mask, "__builtin_ia32_vcvtneph2bf8512_mask", IX86_BUILTIN_VCVTNEPH2BF8512_MASK, UNKNOWN, (int) V32QI_FTYPE_V32HF_V32QI_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtneph2bf8sv8hf_mask, "__builtin_ia32_vcvtneph2bf8s128_mask", IX86_BUILTIN_VCVTNEPH2BF8S128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HF_V16QI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtneph2bf8sv16hf_mask, "__builtin_ia32_vcvtneph2bf8s256_mask", IX86_BUILTIN_VCVTNEPH2BF8S256_MASK, UNKNOWN, (int) V16QI_FTYPE_V16HF_V16QI_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vcvtneph2bf8sv32hf_mask, "__builtin_ia32_vcvtneph2bf8s512_mask", IX86_BUILTIN_VCVTNEPH2BF8S512_MASK, UNKNOWN, (int) V32QI_FTYPE_V32HF_V32QI_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtneph2hf8v8hf_mask, "__builtin_ia32_vcvtneph2hf8128_mask", IX86_BUILTIN_VCVTNEPH2HF8128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HF_V16QI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtneph2hf8v16hf_mask, "__builtin_ia32_vcvtneph2hf8256_mask", IX86_BUILTIN_VCVTNEPH2HF8256_MASK, UNKNOWN, (int) V16QI_FTYPE_V16HF_V16QI_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vcvtneph2hf8v32hf_mask, "__builtin_ia32_vcvtneph2hf8512_mask", IX86_BUILTIN_VCVTNEPH2HF8512_MASK, UNKNOWN, (int) V32QI_FTYPE_V32HF_V32QI_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtneph2hf8sv8hf_mask, "__builtin_ia32_vcvtneph2hf8s128_mask", IX86_BUILTIN_VCVTNEPH2HF8S128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HF_V16QI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtneph2hf8sv16hf_mask, "__builtin_ia32_vcvtneph2hf8s256_mask", IX86_BUILTIN_VCVTNEPH2HF8S256_MASK, UNKNOWN, (int) V16QI_FTYPE_V16HF_V16QI_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vcvtneph2hf8sv32hf_mask, "__builtin_ia32_vcvtneph2hf8s512_mask", IX86_BUILTIN_VCVTNEPH2HF8S512_MASK, UNKNOWN, (int) V32QI_FTYPE_V32HF_V32QI_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvthf82phv8hf_mask, "__builtin_ia32_vcvthf82ph128_mask", IX86_BUILTIN_VCVTHF82PH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V16QI_V8HF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvthf82phv16hf_mask, "__builtin_ia32_vcvthf82ph256_mask", IX86_BUILTIN_VCVTHF82PH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16QI_V16HF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_vcvthf82phv32hf_mask, "__builtin_ia32_vcvthf82ph512_mask", IX86_BUILTIN_VCVTHF82PH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32QI_V32HF_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_addnepbf16_v32bf, "__builtin_ia32_addnepbf16512", IX86_BUILTIN_ADDNEPBF16512, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_addnepbf16_v32bf_mask, "__builtin_ia32_addnepbf16512_mask", IX86_BUILTIN_ADDNEPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_addnepbf16_v16bf, "__builtin_ia32_addnepbf16256", IX86_BUILTIN_ADDNEPBF16256, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_addnepbf16_v16bf_mask, "__builtin_ia32_addnepbf16256_mask", IX86_BUILTIN_ADDNEPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_addnepbf16_v8bf, "__builtin_ia32_addnepbf16128", IX86_BUILTIN_ADDNEPBF16128, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_addnepbf16_v8bf_mask, "__builtin_ia32_addnepbf16128_mask", IX86_BUILTIN_ADDNEPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_subnepbf16_v32bf, "__builtin_ia32_subnepbf16512", IX86_BUILTIN_SUBNEPBF16512, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_subnepbf16_v32bf_mask, "__builtin_ia32_subnepbf16512_mask", IX86_BUILTIN_SUBNEPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_subnepbf16_v16bf, "__builtin_ia32_subnepbf16256", IX86_BUILTIN_SUBNEPBF16256, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_subnepbf16_v16bf_mask, "__builtin_ia32_subnepbf16256_mask", IX86_BUILTIN_SUBNEPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_subnepbf16_v8bf, "__builtin_ia32_subnepbf16128", IX86_BUILTIN_SUBNEPBF16128, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_subnepbf16_v8bf_mask, "__builtin_ia32_subnepbf16128_mask", IX86_BUILTIN_SUBNEPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_mulnepbf16_v32bf, "__builtin_ia32_mulnepbf16512", IX86_BUILTIN_MULNEPBF16512, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_mulnepbf16_v32bf_mask, "__builtin_ia32_mulnepbf16512_mask", IX86_BUILTIN_MULNEPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_mulnepbf16_v16bf, "__builtin_ia32_mulnepbf16256", IX86_BUILTIN_MULNEPBF16256, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_mulnepbf16_v16bf_mask, "__builtin_ia32_mulnepbf16256_mask", IX86_BUILTIN_MULNEPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_mulnepbf16_v8bf, "__builtin_ia32_mulnepbf16128", IX86_BUILTIN_MULNEPBF16128, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_mulnepbf16_v8bf_mask, "__builtin_ia32_mulnepbf16128_mask", IX86_BUILTIN_MULNEPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_divnepbf16_v32bf, "__builtin_ia32_divnepbf16512", IX86_BUILTIN_DIVNEPBF16512, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_divnepbf16_v32bf_mask, "__builtin_ia32_divnepbf16512_mask", IX86_BUILTIN_DIVNEPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_divnepbf16_v16bf, "__builtin_ia32_divnepbf16256", IX86_BUILTIN_DIVNEPBF16256, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_divnepbf16_v16bf_mask, "__builtin_ia32_divnepbf16256_mask", IX86_BUILTIN_DIVNEPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_divnepbf16_v8bf, "__builtin_ia32_divnepbf16128", IX86_BUILTIN_DIVNEPBF16128, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_divnepbf16_v8bf_mask, "__builtin_ia32_divnepbf16128_mask", IX86_BUILTIN_DIVNEPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_smaxpbf16_v32bf, "__builtin_ia32_maxpbf16512", IX86_BUILTIN_MAXPBF16512, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_smaxpbf16_v32bf_mask, "__builtin_ia32_maxpbf16512_mask", IX86_BUILTIN_MAXPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_smaxpbf16_v16bf, "__builtin_ia32_maxpbf16256", IX86_BUILTIN_MAXPBF16256, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_smaxpbf16_v16bf_mask, "__builtin_ia32_maxpbf16256_mask", IX86_BUILTIN_MAXPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_smaxpbf16_v8bf, "__builtin_ia32_maxpbf16128", IX86_BUILTIN_MAXPBF16128, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_smaxpbf16_v8bf_mask, "__builtin_ia32_maxpbf16128_mask", IX86_BUILTIN_MAXPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_sminpbf16_v32bf, "__builtin_ia32_minpbf16512", IX86_BUILTIN_MINPBF16512, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_sminpbf16_v32bf_mask, "__builtin_ia32_minpbf16512_mask", IX86_BUILTIN_MINPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_sminpbf16_v16bf, "__builtin_ia32_minpbf16256", IX86_BUILTIN_MINPBF16256, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_sminpbf16_v16bf_mask, "__builtin_ia32_minpbf16256_mask", IX86_BUILTIN_MINPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_sminpbf16_v8bf, "__builtin_ia32_minpbf16128", IX86_BUILTIN_MINPBF16128, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_sminpbf16_v8bf_mask, "__builtin_ia32_minpbf16128_mask", IX86_BUILTIN_MINPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_scalefpbf16_v32bf, "__builtin_ia32_scalefpbf16512", IX86_BUILTIN_SCALEFPBF16512, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_scalefpbf16_v32bf_mask, "__builtin_ia32_scalefpbf16512_mask", IX86_BUILTIN_SCALEFPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_scalefpbf16_v16bf, "__builtin_ia32_scalefpbf16256", IX86_BUILTIN_SCALEFPBF16256, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_scalefpbf16_v16bf_mask, "__builtin_ia32_scalefpbf16256_mask", IX86_BUILTIN_SCALEFPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_scalefpbf16_v8bf, "__builtin_ia32_scalefpbf16128", IX86_BUILTIN_SCALEFPBF16128, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_scalefpbf16_v8bf_mask, "__builtin_ia32_scalefpbf16128_mask", IX86_BUILTIN_SCALEFPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fmaddnepbf16_v32bf_mask, "__builtin_ia32_fmaddnepbf16512_mask", IX86_BUILTIN_FMADDNEPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fmaddnepbf16_v32bf_mask3, "__builtin_ia32_fmaddnepbf16512_mask3", IX86_BUILTIN_FMADDNEPBF16512_MASK3, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fmaddnepbf16_v32bf_maskz, "__builtin_ia32_fmaddnepbf16512_maskz", IX86_BUILTIN_FMADDNEPBF16512_MASKZ, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmaddnepbf16_v16bf_mask, "__builtin_ia32_fmaddnepbf16256_mask", IX86_BUILTIN_FMADDNEPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmaddnepbf16_v16bf_mask3, "__builtin_ia32_fmaddnepbf16256_mask3", IX86_BUILTIN_FMADDNEPBF16256_MASK3, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmaddnepbf16_v16bf_maskz, "__builtin_ia32_fmaddnepbf16256_maskz", IX86_BUILTIN_FMADDNEPBF16256_MASKZ, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmaddnepbf16_v8bf_mask, "__builtin_ia32_fmaddnepbf16128_mask", IX86_BUILTIN_FMADDNEPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmaddnepbf16_v8bf_mask3, "__builtin_ia32_fmaddnepbf16128_mask3", IX86_BUILTIN_FMADDNEPBF16128_MASK3, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmaddnepbf16_v8bf_maskz, "__builtin_ia32_fmaddnepbf16128_maskz", IX86_BUILTIN_FMADDNEPBF16128_MASKZ, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fmsubnepbf16_v32bf_mask, "__builtin_ia32_fmsubnepbf16512_mask", IX86_BUILTIN_FMSUBNEPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fmsubnepbf16_v32bf_mask3, "__builtin_ia32_fmsubnepbf16512_mask3", IX86_BUILTIN_FMSUBNEPBF16512_MASK3, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fmsubnepbf16_v32bf_maskz, "__builtin_ia32_fmsubnepbf16512_maskz", IX86_BUILTIN_FMSUBNEPBF16512_MASKZ, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmsubnepbf16_v16bf_mask, "__builtin_ia32_fmsubnepbf16256_mask", IX86_BUILTIN_FMSUBNEPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmsubnepbf16_v16bf_mask3, "__builtin_ia32_fmsubnepbf16256_mask3", IX86_BUILTIN_FMSUBNEPBF16256_MASK3, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmsubnepbf16_v16bf_maskz, "__builtin_ia32_fmsubnepbf16256_maskz", IX86_BUILTIN_FMSUBNEPBF16256_MASKZ, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmsubnepbf16_v8bf_mask, "__builtin_ia32_fmsubnepbf16128_mask", IX86_BUILTIN_FMSUBNEPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmsubnepbf16_v8bf_mask3, "__builtin_ia32_fmsubnepbf16128_mask3", IX86_BUILTIN_FMSUBNEPBF16128_MASK3, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fmsubnepbf16_v8bf_maskz, "__builtin_ia32_fmsubnepbf16128_maskz", IX86_BUILTIN_FMSUBNEPBF16128_MASKZ, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fnmaddnepbf16_v32bf_mask, "__builtin_ia32_fnmaddnepbf16512_mask", IX86_BUILTIN_FNMADDNEPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fnmaddnepbf16_v32bf_mask3, "__builtin_ia32_fnmaddnepbf16512_mask3", IX86_BUILTIN_FNMADDNEPBF16512_MASK3, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fnmaddnepbf16_v32bf_maskz, "__builtin_ia32_fnmaddnepbf16512_maskz", IX86_BUILTIN_FNMADDNEPBF16512_MASKZ, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmaddnepbf16_v16bf_mask, "__builtin_ia32_fnmaddnepbf16256_mask", IX86_BUILTIN_FNMADDNEPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmaddnepbf16_v16bf_mask3, "__builtin_ia32_fnmaddnepbf16256_mask3", IX86_BUILTIN_FNMADDNEPBF16256_MASK3, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmaddnepbf16_v16bf_maskz, "__builtin_ia32_fnmaddnepbf16256_maskz", IX86_BUILTIN_FNMADDNEPBF16256_MASKZ, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmaddnepbf16_v8bf_mask, "__builtin_ia32_fnmaddnepbf16128_mask", IX86_BUILTIN_FNMADDNEPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmaddnepbf16_v8bf_mask3, "__builtin_ia32_fnmaddnepbf16128_mask3", IX86_BUILTIN_FNMADDNEPBF16128_MASK3, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmaddnepbf16_v8bf_maskz, "__builtin_ia32_fnmaddnepbf16128_maskz", IX86_BUILTIN_FNMADDNEPBF16128_MASKZ, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fnmsubnepbf16_v32bf_mask, "__builtin_ia32_fnmsubnepbf16512_mask", IX86_BUILTIN_FNMSUBNEPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fnmsubnepbf16_v32bf_mask3, "__builtin_ia32_fnmsubnepbf16512_mask3", IX86_BUILTIN_FNMSUBNEPBF16512_MASK3, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fnmsubnepbf16_v32bf_maskz, "__builtin_ia32_fnmsubnepbf16512_maskz", IX86_BUILTIN_FNMSUBNEPBF16512_MASKZ, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmsubnepbf16_v16bf_mask, "__builtin_ia32_fnmsubnepbf16256_mask", IX86_BUILTIN_FNMSUBNEPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmsubnepbf16_v16bf_mask3, "__builtin_ia32_fnmsubnepbf16256_mask3", IX86_BUILTIN_FNMSUBNEPBF16256_MASK3, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmsubnepbf16_v16bf_maskz, "__builtin_ia32_fnmsubnepbf16256_maskz", IX86_BUILTIN_FNMSUBNEPBF16256_MASKZ, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmsubnepbf16_v8bf_mask, "__builtin_ia32_fnmsubnepbf16128_mask", IX86_BUILTIN_FNMSUBNEPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmsubnepbf16_v8bf_mask3, "__builtin_ia32_fnmsubnepbf16128_mask3", IX86_BUILTIN_FNMSUBNEPBF16128_MASK3, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fnmsubnepbf16_v8bf_maskz, "__builtin_ia32_fnmsubnepbf16128_maskz", IX86_BUILTIN_FNMSUBNEPBF16128_MASKZ, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_rsqrtpbf16_v32bf_mask, "__builtin_ia32_rsqrtpbf16512_mask", IX86_BUILTIN_RSQRTPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_rsqrtpbf16_v16bf_mask, "__builtin_ia32_rsqrtpbf16256_mask", IX86_BUILTIN_RSQRTPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_rsqrtpbf16_v8bf_mask, "__builtin_ia32_rsqrtpbf16128_mask", IX86_BUILTIN_RSQRTPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_sqrtnepbf16_v32bf_mask, "__builtin_ia32_sqrtnepbf16512_mask", IX86_BUILTIN_SQRTNEPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_sqrtnepbf16_v16bf_mask, "__builtin_ia32_sqrtnepbf16256_mask", IX86_BUILTIN_SQRTNEPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_sqrtnepbf16_v8bf_mask, "__builtin_ia32_sqrtnepbf16128_mask", IX86_BUILTIN_SQRTNEPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_rcppbf16_v32bf_mask, "__builtin_ia32_rcppbf16512_mask", IX86_BUILTIN_RCPPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_rcppbf16_v16bf_mask, "__builtin_ia32_rcppbf16256_mask", IX86_BUILTIN_RCPPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_rcppbf16_v8bf_mask, "__builtin_ia32_rcppbf16128_mask", IX86_BUILTIN_RCPPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_getexppbf16_v32bf_mask, "__builtin_ia32_getexppbf16512_mask", IX86_BUILTIN_GETEXPPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_getexppbf16_v16bf_mask, "__builtin_ia32_getexppbf16256_mask", IX86_BUILTIN_GETEXPPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_getexppbf16_v8bf_mask, "__builtin_ia32_getexppbf16128_mask", IX86_BUILTIN_GETEXPPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_rndscalenepbf16_v32bf_mask, "__builtin_ia32_rndscalenepbf16512_mask", IX86_BUILTIN_RNDSCALENEPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_INT_V32BF_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_rndscalenepbf16_v16bf_mask, "__builtin_ia32_rndscalenepbf16256_mask", IX86_BUILTIN_RNDSCALENEPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_INT_V16BF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_rndscalenepbf16_v8bf_mask, "__builtin_ia32_rndscalenepbf16128_mask", IX86_BUILTIN_RNDSCALENEPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_INT_V8BF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_reducenepbf16_v32bf_mask, "__builtin_ia32_reducenepbf16512_mask", IX86_BUILTIN_REDUCENEPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_INT_V32BF_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_reducenepbf16_v16bf_mask, "__builtin_ia32_reducenepbf16256_mask", IX86_BUILTIN_REDUCENEPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_INT_V16BF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_reducenepbf16_v8bf_mask, "__builtin_ia32_reducenepbf16128_mask", IX86_BUILTIN_REDUCENEPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_INT_V8BF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_getmantpbf16_v32bf_mask, "__builtin_ia32_getmantpbf16512_mask", IX86_BUILTIN_GETMANTPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_INT_V32BF_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_getmantpbf16_v16bf_mask, "__builtin_ia32_getmantpbf16256_mask", IX86_BUILTIN_GETMANTPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_INT_V16BF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_getmantpbf16_v8bf_mask, "__builtin_ia32_getmantpbf16128_mask", IX86_BUILTIN_GETMANTPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_INT_V8BF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_fpclasspbf16_v32bf_mask, "__builtin_ia32_fpclasspbf16512_mask", IX86_BUILTIN_FPCLASSPBF16512_MASK, UNKNOWN, (int) SI_FTYPE_V32BF_INT_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fpclasspbf16_v16bf_mask, "__builtin_ia32_fpclasspbf16256_mask", IX86_BUILTIN_FPCLASSPBF16256_MASK, UNKNOWN, (int) HI_FTYPE_V16BF_INT_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_fpclasspbf16_v8bf_mask, "__builtin_ia32_fpclasspbf16128_mask", IX86_BUILTIN_FPCLASSPBF16128_MASK, UNKNOWN, (int) QI_FTYPE_V8BF_INT_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_cmppbf16_v32bf_mask, "__builtin_ia32_cmppbf16512_mask", IX86_BUILTIN_CMPPBF16512_MASK, UNKNOWN, (int) USI_FTYPE_V32BF_V32BF_INT_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cmppbf16_v16bf_mask, "__builtin_ia32_cmppbf16256_mask", IX86_BUILTIN_CMPPBF16256_MASK, UNKNOWN, (int) UHI_FTYPE_V16BF_V16BF_INT_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cmppbf16_v8bf_mask, "__builtin_ia32_cmppbf16128_mask", IX86_BUILTIN_CMPPBF16128_MASK, UNKNOWN, (int) UQI_FTYPE_V8BF_V8BF_INT_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_comsbf16_v8bf, "__builtin_ia32_vcomsbf16eq", IX86_BUILTIN_VCOMSBF16EQ, EQ, (int) INT_FTYPE_V8BF_V8BF)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_comsbf16_v8bf, "__builtin_ia32_vcomsbf16gt", IX86_BUILTIN_VCOMSBF16GT, GT, (int) INT_FTYPE_V8BF_V8BF)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_comsbf16_v8bf, "__builtin_ia32_vcomsbf16ge", IX86_BUILTIN_VCOMSBF16GE, GE, (int) INT_FTYPE_V8BF_V8BF)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_comsbf16_v8bf, "__builtin_ia32_vcomsbf16le", IX86_BUILTIN_VCOMSBF16LE, LE, (int) INT_FTYPE_V8BF_V8BF)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_comsbf16_v8bf, "__builtin_ia32_vcomsbf16lt", IX86_BUILTIN_VCOMSBF16LT, LT, (int) INT_FTYPE_V8BF_V8BF)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_comsbf16_v8bf, "__builtin_ia32_vcomsbf16neq", IX86_BUILTIN_VCOMSBF16NE, NE, (int) INT_FTYPE_V8BF_V8BF)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvtnebf162ibsv8bf_mask, "__builtin_ia32_cvtnebf162ibs128_mask", IX86_BUILTIN_CVTNEBF162IBS128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8BF_V8HI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvtnebf162ibsv16bf_mask, "__builtin_ia32_cvtnebf162ibs256_mask", IX86_BUILTIN_CVTNEBF162IBS256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16BF_V16HI_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_cvtnebf162ibsv32bf_mask, "__builtin_ia32_cvtnebf162ibs512_mask", IX86_BUILTIN_CVTNEBF162IBS512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32BF_V32HI_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvtnebf162iubsv8bf_mask, "__builtin_ia32_cvtnebf162iubs128_mask", IX86_BUILTIN_CVTNEBF162IUBS128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8BF_V8HI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvtnebf162iubsv16bf_mask, "__builtin_ia32_cvtnebf162iubs256_mask", IX86_BUILTIN_CVTNEBF162IUBS256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16BF_V16HI_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_cvtnebf162iubsv32bf_mask, "__builtin_ia32_cvtnebf162iubs512_mask", IX86_BUILTIN_CVTNEBF162IUBS512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32BF_V32HI_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvtph2ibsv8hf_mask, "__builtin_ia32_cvtph2ibs128_mask", IX86_BUILTIN_CVTPH2IBS128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HF_V8HI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvtph2iubsv8hf_mask, "__builtin_ia32_cvtph2iubs128_mask", IX86_BUILTIN_CVTPH2IUBS128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HF_V8HI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvtps2ibsv4sf_mask, "__builtin_ia32_cvtps2ibs128_mask", IX86_BUILTIN_CVTPS2IBS128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvtps2iubsv4sf_mask, "__builtin_ia32_cvtps2iubs128_mask", IX86_BUILTIN_CVTPS2IUBS128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvttnebf162ibsv8bf_mask, "__builtin_ia32_cvttnebf162ibs128_mask", IX86_BUILTIN_CVTTNEBF162IBS128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8BF_V8HI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvttnebf162ibsv16bf_mask, "__builtin_ia32_cvttnebf162ibs256_mask", IX86_BUILTIN_CVTTNEBF162IBS256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16BF_V16HI_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_cvttnebf162ibsv32bf_mask, "__builtin_ia32_cvttnebf162ibs512_mask", IX86_BUILTIN_CVTTNEBF162IBS512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32BF_V32HI_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvttnebf162iubsv8bf_mask, "__builtin_ia32_cvttnebf162iubs128_mask", IX86_BUILTIN_CVTTNEBF162IUBS128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8BF_V8HI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvttnebf162iubsv16bf_mask, "__builtin_ia32_cvttnebf162iubs256_mask", IX86_BUILTIN_CVTTNEBF162IUBS256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16BF_V16HI_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_cvttnebf162iubsv32bf_mask, "__builtin_ia32_cvttnebf162iubs512_mask", IX86_BUILTIN_CVTTNEBF162IUBS512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32BF_V32HI_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvttph2ibsv8hf_mask, "__builtin_ia32_cvttph2ibs128_mask", IX86_BUILTIN_CVTTPH2IBS128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HF_V8HI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvttph2iubsv8hf_mask, "__builtin_ia32_cvttph2iubs128_mask", IX86_BUILTIN_CVTTPH2IUBS128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HF_V8HI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvttps2ibsv4sf_mask, "__builtin_ia32_cvttps2ibs128_mask", IX86_BUILTIN_CVTTPS2IBS128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvttps2iubsv4sf_mask, "__builtin_ia32_cvttps2iubs128_mask", IX86_BUILTIN_CVTTPS2IUBS128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttpd2dqsv2df_mask, "__builtin_ia32_cvttpd2dqs128_mask", IX86_BUILTIN_VCVTTPD2DQS128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttpd2qqsv2df_mask, "__builtin_ia32_cvttpd2qqs128_mask", IX86_BUILTIN_VCVTTPD2QQS128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttpd2udqsv2df_mask, "__builtin_ia32_cvttpd2udqs128_mask", IX86_BUILTIN_VCVTTPD2UDQS128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttpd2uqqsv2df_mask, "__builtin_ia32_cvttpd2uqqs128_mask", IX86_BUILTIN_VCVTTPD2UQQS128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttps2dqsv4sf_mask, "__builtin_ia32_cvttps2dqs128_mask", IX86_BUILTIN_VCVTTPS2DQS128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttps2qqsv2di_mask, "__builtin_ia32_cvttps2qqs128_mask", IX86_BUILTIN_VCVTTPS2QQS128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttps2udqsv4sf_mask, "__builtin_ia32_cvttps2udqs128_mask", IX86_BUILTIN_VCVTTPS2UDQS128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttps2uqqsv2di_mask, "__builtin_ia32_cvttps2uqqs128_mask", IX86_BUILTIN_VCVTTPS2UQQS128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxnepbf16_v8bf_mask, "__builtin_ia32_minmaxnepbf16128_mask", IX86_BUILTIN_MINMAXNEPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_INT_V8BF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxnepbf16_v16bf_mask, "__builtin_ia32_minmaxnepbf16256_mask", IX86_BUILTIN_MINMAXNEPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_INT_V16BF_UHI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_minmaxnepbf16_v32bf_mask, "__builtin_ia32_minmaxnepbf16512_mask", IX86_BUILTIN_MINMAXNEPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_INT_V32BF_USI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxpv2df_mask, "__builtin_ia32_minmaxpd128_mask", IX86_BUILTIN_MINMAXPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxpv8hf_mask, "__builtin_ia32_minmaxph128_mask", IX86_BUILTIN_MINMAXPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxpv4sf_mask, "__builtin_ia32_minmaxps128_mask", IX86_BUILTIN_MINMAXPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbssd_v16si, "__builtin_ia32_vpdpbssd512", IX86_BUILTIN_VPDPBSSDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbssds_v16si, "__builtin_ia32_vpdpbssds512", IX86_BUILTIN_VPDPBSSDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbsud_v16si, "__builtin_ia32_vpdpbsud512", IX86_BUILTIN_VPDPBSUDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbsuds_v16si, "__builtin_ia32_vpdpbsuds512", IX86_BUILTIN_VPDPBSUDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbuud_v16si, "__builtin_ia32_vpdpbuud512", IX86_BUILTIN_VPDPBUUDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbuuds_v16si, "__builtin_ia32_vpdpbuuds512", IX86_BUILTIN_VPDPBUUDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbssd_v16si_mask, "__builtin_ia32_vpdpbssd_v16si_mask", IX86_BUILTIN_VPDPBSSDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbssd_v16si_maskz, "__builtin_ia32_vpdpbssd_v16si_maskz", IX86_BUILTIN_VPDPBSSDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbssds_v16si_mask, "__builtin_ia32_vpdpbssds_v16si_mask", IX86_BUILTIN_VPDPBSSDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbssds_v16si_maskz, "__builtin_ia32_vpdpbssds_v16si_maskz", IX86_BUILTIN_VPDPBSSDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbsud_v16si_mask, "__builtin_ia32_vpdpbsud_v16si_mask", IX86_BUILTIN_VPDPBSUDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbsud_v16si_maskz, "__builtin_ia32_vpdpbsud_v16si_maskz", IX86_BUILTIN_VPDPBSUDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbsuds_v16si_mask, "__builtin_ia32_vpdpbsuds_v16si_mask", IX86_BUILTIN_VPDPBSUDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbsuds_v16si_maskz, "__builtin_ia32_vpdpbsuds_v16si_maskz", IX86_BUILTIN_VPDPBSUDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbuud_v16si_mask, "__builtin_ia32_vpdpbuud_v16si_mask", IX86_BUILTIN_VPDPBUUDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbuud_v16si_maskz, "__builtin_ia32_vpdpbuud_v16si_maskz", IX86_BUILTIN_VPDPBUUDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbuuds_v16si_mask, "__builtin_ia32_vpdpbuuds_v16si_mask", IX86_BUILTIN_VPDPBUUDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbuuds_v16si_maskz, "__builtin_ia32_vpdpbuuds_v16si_maskz", IX86_BUILTIN_VPDPBUUDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbssd_v8si_mask, "__builtin_ia32_vpdpbssd_v8si_mask", IX86_BUILTIN_VPDPBSSDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbssd_v8si_maskz, "__builtin_ia32_vpdpbssd_v8si_maskz", IX86_BUILTIN_VPDPBSSDV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbssds_v8si_mask, "__builtin_ia32_vpdpbssds_v8si_mask", IX86_BUILTIN_VPDPBSSDSV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbssds_v8si_maskz, "__builtin_ia32_vpdpbssds_v8si_maskz", IX86_BUILTIN_VPDPBSSDSV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbsud_v8si_mask, "__builtin_ia32_vpdpbsud_v8si_mask", IX86_BUILTIN_VPDPBSUDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbsud_v8si_maskz, "__builtin_ia32_vpdpbsud_v8si_maskz", IX86_BUILTIN_VPDPBSUDV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbsuds_v8si_mask, "__builtin_ia32_vpdpbsuds_v8si_mask", IX86_BUILTIN_VPDPBSUDSV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbsuds_v8si_maskz, "__builtin_ia32_vpdpbsuds_v8si_maskz", IX86_BUILTIN_VPDPBSUDSV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbuud_v8si_mask, "__builtin_ia32_vpdpbuud_v8si_mask", IX86_BUILTIN_VPDPBUUDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbuud_v8si_maskz, "__builtin_ia32_vpdpbuud_v8si_maskz", IX86_BUILTIN_VPDPBUUDV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbuuds_v8si_mask, "__builtin_ia32_vpdpbuuds_v8si_mask", IX86_BUILTIN_VPDPBUUDSV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbuuds_v8si_maskz, "__builtin_ia32_vpdpbuuds_v8si_maskz", IX86_BUILTIN_VPDPBUUDSV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbssd_v4si_mask, "__builtin_ia32_vpdpbssd_v4si_mask", IX86_BUILTIN_VPDPBSSDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbssd_v4si_maskz, "__builtin_ia32_vpdpbssd_v4si_maskz", IX86_BUILTIN_VPDPBSSDV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbssds_v4si_mask, "__builtin_ia32_vpdpbssds_v4si_mask", IX86_BUILTIN_VPDPBSSDSV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbssds_v4si_maskz, "__builtin_ia32_vpdpbssds_v4si_maskz", IX86_BUILTIN_VPDPBSSDSV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbsud_v4si_mask, "__builtin_ia32_vpdpbsud_v4si_mask", IX86_BUILTIN_VPDPBSUDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbsud_v4si_maskz, "__builtin_ia32_vpdpbsud_v4si_maskz", IX86_BUILTIN_VPDPBSUDV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbsuds_v4si_mask, "__builtin_ia32_vpdpbsuds_v4si_mask", IX86_BUILTIN_VPDPBSUDSV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbsuds_v4si_maskz, "__builtin_ia32_vpdpbsuds_v4si_maskz", IX86_BUILTIN_VPDPBSUDSV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbuud_v4si_mask, "__builtin_ia32_vpdpbuud_v4si_mask", IX86_BUILTIN_VPDPBUUDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbuud_v4si_maskz, "__builtin_ia32_vpdpbuud_v4si_maskz", IX86_BUILTIN_VPDPBUUDV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbuuds_v4si_mask, "__builtin_ia32_vpdpbuuds_v4si_mask", IX86_BUILTIN_VPDPBUUDSV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpbuuds_v4si_maskz, "__builtin_ia32_vpdpbuuds_v4si_maskz", IX86_BUILTIN_VPDPBUUDSV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwsud_v16si, "__builtin_ia32_vpdpwsud512", IX86_BUILTIN_VPDPWSUDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwsuds_v16si, "__builtin_ia32_vpdpwsuds512", IX86_BUILTIN_VPDPWSUDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwusd_v16si, "__builtin_ia32_vpdpwusd512", IX86_BUILTIN_VPDPWUSDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwusds_v16si, "__builtin_ia32_vpdpwusds512", IX86_BUILTIN_VPDPWUSDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwuud_v16si, "__builtin_ia32_vpdpwuud512", IX86_BUILTIN_VPDPWUUDV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwuuds_v16si, "__builtin_ia32_vpdpwuuds512", IX86_BUILTIN_VPDPWUUDSV16SI, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwsud_v16si_mask, "__builtin_ia32_vpdpwsud_v16si_mask", IX86_BUILTIN_VPDPWSUDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwsud_v16si_maskz, "__builtin_ia32_vpdpwsud_v16si_maskz", IX86_BUILTIN_VPDPWSUDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwsuds_v16si_mask, "__builtin_ia32_vpdpwsuds_v16si_mask", IX86_BUILTIN_VPDPWSUDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwsuds_v16si_maskz, "__builtin_ia32_vpdpwsuds_v16si_maskz", IX86_BUILTIN_VPDPWSUDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwusd_v16si_mask, "__builtin_ia32_vpdpwusd_v16si_mask", IX86_BUILTIN_VPDPWUSDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwusd_v16si_maskz, "__builtin_ia32_vpdpwusd_v16si_maskz", IX86_BUILTIN_VPDPWUSDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwusds_v16si_mask, "__builtin_ia32_vpdpwusds_v16si_mask", IX86_BUILTIN_VPDPWUSDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwusds_v16si_maskz, "__builtin_ia32_vpdpwusds_v16si_maskz", IX86_BUILTIN_VPDPWUSDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwuud_v16si_mask, "__builtin_ia32_vpdpwuud_v16si_mask", IX86_BUILTIN_VPDPWUUDV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwuud_v16si_maskz, "__builtin_ia32_vpdpwuud_v16si_maskz", IX86_BUILTIN_VPDPWUUDV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwuuds_v16si_mask, "__builtin_ia32_vpdpwuuds_v16si_mask", IX86_BUILTIN_VPDPWUUDSV16SI_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwuuds_v16si_maskz, "__builtin_ia32_vpdpwuuds_v16si_maskz", IX86_BUILTIN_VPDPWUUDSV16SI_MASKZ, UNKNOWN, (int) V16SI_FTYPE_V16SI_V16SI_V16SI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwsud_v8si_mask, "__builtin_ia32_vpdpwsud_v8si_mask", IX86_BUILTIN_VPDPWSUDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwsud_v8si_maskz, "__builtin_ia32_vpdpwsud_v8si_maskz", IX86_BUILTIN_VPDPWSUDV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwsuds_v8si_mask, "__builtin_ia32_vpdpwsuds_v8si_mask", IX86_BUILTIN_VPDPWSUDSV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwsuds_v8si_maskz, "__builtin_ia32_vpdpwsuds_v8si_maskz", IX86_BUILTIN_VPDPWSUDSV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwusd_v8si_mask, "__builtin_ia32_vpdpwusd_v8si_mask", IX86_BUILTIN_VPDPWUSDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwusd_v8si_maskz, "__builtin_ia32_vpdpwusd_v8si_maskz", IX86_BUILTIN_VPDPWUSDV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwusds_v8si_mask, "__builtin_ia32_vpdpwusds_v8si_mask", IX86_BUILTIN_VPDPWUSDSV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwusds_v8si_maskz, "__builtin_ia32_vpdpwusds_v8si_maskz", IX86_BUILTIN_VPDPWUSDSV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwuud_v8si_mask, "__builtin_ia32_vpdpwuud_v8si_mask", IX86_BUILTIN_VPDPWUUDV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwuud_v8si_maskz, "__builtin_ia32_vpdpwuud_v8si_maskz", IX86_BUILTIN_VPDPWUUDV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwuuds_v8si_mask, "__builtin_ia32_vpdpwuuds_v8si_mask", IX86_BUILTIN_VPDPWUUDSV8SI_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwuuds_v8si_maskz, "__builtin_ia32_vpdpwuuds_v8si_maskz", IX86_BUILTIN_VPDPWUUDSV8SI_MASKZ, UNKNOWN, (int) V8SI_FTYPE_V8SI_V8SI_V8SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwsud_v4si_mask, "__builtin_ia32_vpdpwsud_v4si_mask", IX86_BUILTIN_VPDPWSUDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwsud_v4si_maskz, "__builtin_ia32_vpdpwsud_v4si_maskz", IX86_BUILTIN_VPDPWSUDV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwsuds_v4si_mask, "__builtin_ia32_vpdpwsuds_v4si_mask", IX86_BUILTIN_VPDPWSUDSV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwsuds_v4si_maskz, "__builtin_ia32_vpdpwsuds_v4si_maskz", IX86_BUILTIN_VPDPWSUDSV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwusd_v4si_mask, "__builtin_ia32_vpdpwusd_v4si_mask", IX86_BUILTIN_VPDPWUSDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwusd_v4si_maskz, "__builtin_ia32_vpdpwusd_v4si_maskz", IX86_BUILTIN_VPDPWUSDV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwusds_v4si_mask, "__builtin_ia32_vpdpwusds_v4si_mask", IX86_BUILTIN_VPDPWUSDSV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwusds_v4si_maskz, "__builtin_ia32_vpdpwusds_v4si_maskz", IX86_BUILTIN_VPDPWUSDSV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwuud_v4si_mask, "__builtin_ia32_vpdpwuud_v4si_mask", IX86_BUILTIN_VPDPWUUDV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwuud_v4si_maskz, "__builtin_ia32_vpdpwuud_v4si_maskz", IX86_BUILTIN_VPDPWUUDV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwuuds_v4si_mask, "__builtin_ia32_vpdpwuuds_v4si_mask", IX86_BUILTIN_VPDPWUUDSV4SI_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vpdpwuuds_v4si_maskz, "__builtin_ia32_vpdpwuuds_v4si_maskz", IX86_BUILTIN_VPDPWUUDSV4SI_MASKZ, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI_V4SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vdpphps_v16sf_mask, "__builtin_ia32_vdpphps512_mask", IX86_BUILTIN_VDPPHPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vdpphps_v16sf_maskz, "__builtin_ia32_vdpphps512_maskz", IX86_BUILTIN_VDPPHPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vdpphps_v8sf_mask, "__builtin_ia32_vdpphps256_mask", IX86_BUILTIN_VDPPHPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vdpphps_v8sf_maskz, "__builtin_ia32_vdpphps256_maskz", IX86_BUILTIN_VDPPHPS256_MASKZ, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vdpphps_v4sf_mask, "__builtin_ia32_vdpphps128_mask", IX86_BUILTIN_VDPPHPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vdpphps_v4sf_maskz, "__builtin_ia32_vdpphps128_maskz", IX86_BUILTIN_VDPPHPS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_mpsadbw, "__builtin_ia32_mpsadbw512", IX86_BUILTIN_AVX10_2_MPSADBW, UNKNOWN, (int) V64QI_FTYPE_V64QI_V64QI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_mpsadbw_mask, "__builtin_ia32_mpsadbw512_mask", IX86_BUILTIN_VMPSADBW_V32HI_MASK, UNKNOWN, (int) V32HI_FTYPE_V64QI_V64QI_INT_V32HI_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx2_mpsadbw_mask, "__builtin_ia32_mpsadbw256_mask", IX86_BUILTIN_VMPSADBW_V16HI_MASK, UNKNOWN, (int) V16HI_FTYPE_V32QI_V32QI_INT_V16HI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_sse4_1_mpsadbw_mask, "__builtin_ia32_mpsadbw128_mask", IX86_BUILTIN_VMPSADBW_V8HI_MASK, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI_INT_V8HI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_cvt2ps2phx_v16hf_mask, "__builtin_ia32_vcvt2ps2phx256_mask", IX86_BUILTIN_VCVT2PS2PHX_V16HF_MASK, UNKNOWN, (int) V16HF_FTYPE_V8SF_V8SF_V16HF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_cvt2ps2phx_v8hf_mask, "__builtin_ia32_vcvt2ps2phx128_mask", IX86_BUILTIN_VCVT2PS2PHX_V8HF_MASK, UNKNOWN, (int) V8HF_FTYPE_V4SF_V4SF_V8HF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vcvtbiasph2bf8v8hf, "__builtin_ia32_vcvtbiasph2bf8128", IX86_BUILTIN_VCVTBIASPH2BF8128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V8HF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vcvtbiasph2bf8v8hf_mask, "__builtin_ia32_vcvtbiasph2bf8128_mask", IX86_BUILTIN_VCVTBIASPH2BF8128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V8HF_V16QI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vcvtbiasph2bf8v16hf_mask, "__builtin_ia32_vcvtbiasph2bf8256_mask", IX86_BUILTIN_VCVTBIASPH2BF8256_MASK, UNKNOWN, (int) V16QI_FTYPE_V32QI_V16HF_V16QI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vcvtbiasph2bf8v32hf_mask, "__builtin_ia32_vcvtbiasph2bf8512_mask", IX86_BUILTIN_VCVTBIASPH2BF8512_MASK, UNKNOWN, (int) V32QI_FTYPE_V64QI_V32HF_V32QI_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vcvtbiasph2bf8sv8hf, "__builtin_ia32_vcvtbiasph2bf8s128", IX86_BUILTIN_VCVTBIASPH2BF8S128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V8HF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vcvtbiasph2bf8sv8hf_mask, "__builtin_ia32_vcvtbiasph2bf8s128_mask", IX86_BUILTIN_VCVTBIASPH2BF8S128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V8HF_V16QI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vcvtbiasph2bf8sv16hf_mask, "__builtin_ia32_vcvtbiasph2bf8s256_mask", IX86_BUILTIN_VCVTBIASPH2BF8S256_MASK, UNKNOWN, (int) V16QI_FTYPE_V32QI_V16HF_V16QI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vcvtbiasph2bf8sv32hf_mask, "__builtin_ia32_vcvtbiasph2bf8s512_mask", IX86_BUILTIN_VCVTBIASPH2BF8S512_MASK, UNKNOWN, (int) V32QI_FTYPE_V64QI_V32HF_V32QI_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vcvtbiasph2hf8v8hf, "__builtin_ia32_vcvtbiasph2hf8128", IX86_BUILTIN_VCVTBIASPH2HF8128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V8HF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vcvtbiasph2hf8v8hf_mask, "__builtin_ia32_vcvtbiasph2hf8128_mask", IX86_BUILTIN_VCVTBIASPH2HF8128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V8HF_V16QI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vcvtbiasph2hf8v16hf_mask, "__builtin_ia32_vcvtbiasph2hf8256_mask", IX86_BUILTIN_VCVTBIASPH2HF8256_MASK, UNKNOWN, (int) V16QI_FTYPE_V32QI_V16HF_V16QI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vcvtbiasph2hf8v32hf_mask, "__builtin_ia32_vcvtbiasph2hf8512_mask", IX86_BUILTIN_VCVTBIASPH2HF8512_MASK, UNKNOWN, (int) V32QI_FTYPE_V64QI_V32HF_V32QI_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vcvtbiasph2hf8sv8hf, "__builtin_ia32_vcvtbiasph2hf8s128", IX86_BUILTIN_VCVTBIASPH2HF8S128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V8HF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vcvtbiasph2hf8sv8hf_mask, "__builtin_ia32_vcvtbiasph2hf8s128_mask", IX86_BUILTIN_VCVTBIASPH2HF8S128_MASK, UNKNOWN, (int) V16QI_FTYPE_V16QI_V8HF_V16QI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vcvtbiasph2hf8sv16hf_mask, "__builtin_ia32_vcvtbiasph2hf8s256_mask", IX86_BUILTIN_VCVTBIASPH2HF8S256_MASK, UNKNOWN, (int) V16QI_FTYPE_V32QI_V16HF_V16QI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vcvtbiasph2hf8sv32hf_mask, "__builtin_ia32_vcvtbiasph2hf8s512_mask", IX86_BUILTIN_VCVTBIASPH2HF8S512_MASK, UNKNOWN, (int) V32QI_FTYPE_V64QI_V32HF_V32QI_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vcvt2ph2bf8v8hf_mask, "__builtin_ia32_vcvt2ph2bf8128_mask", IX86_BUILTIN_VCVT2PH2BF8128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HF_V8HF_V16QI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vcvt2ph2bf8v16hf_mask, "__builtin_ia32_vcvt2ph2bf8256_mask", IX86_BUILTIN_VCVT2PH2BF8256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HF_V16HF_V32QI_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vcvt2ph2bf8v32hf_mask, "__builtin_ia32_vcvt2ph2bf8512_mask", IX86_BUILTIN_VCVT2PH2BF8512_MASK, UNKNOWN, (int) V64QI_FTYPE_V32HF_V32HF_V64QI_UDI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vcvt2ph2bf8sv8hf_mask, "__builtin_ia32_vcvt2ph2bf8s128_mask", IX86_BUILTIN_VCVT2PH2BF8S128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HF_V8HF_V16QI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vcvt2ph2bf8sv16hf_mask, "__builtin_ia32_vcvt2ph2bf8s256_mask", IX86_BUILTIN_VCVT2PH2BF8S256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HF_V16HF_V32QI_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vcvt2ph2bf8sv32hf_mask, "__builtin_ia32_vcvt2ph2bf8s512_mask", IX86_BUILTIN_VCVT2PH2BF8S512_MASK, UNKNOWN, (int) V64QI_FTYPE_V32HF_V32HF_V64QI_UDI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vcvt2ph2hf8v8hf_mask, "__builtin_ia32_vcvt2ph2hf8128_mask", IX86_BUILTIN_VCVT2PH2HF8128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HF_V8HF_V16QI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vcvt2ph2hf8v16hf_mask, "__builtin_ia32_vcvt2ph2hf8256_mask", IX86_BUILTIN_VCVT2PH2HF8256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HF_V16HF_V32QI_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vcvt2ph2hf8v32hf_mask, "__builtin_ia32_vcvt2ph2hf8512_mask", IX86_BUILTIN_VCVT2PH2HF8512_MASK, UNKNOWN, (int) V64QI_FTYPE_V32HF_V32HF_V64QI_UDI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vcvt2ph2hf8sv8hf_mask, "__builtin_ia32_vcvt2ph2hf8s128_mask", IX86_BUILTIN_VCVT2PH2HF8S128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HF_V8HF_V16QI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vcvt2ph2hf8sv16hf_mask, "__builtin_ia32_vcvt2ph2hf8s256_mask", IX86_BUILTIN_VCVT2PH2HF8S256_MASK, UNKNOWN, (int) V32QI_FTYPE_V16HF_V16HF_V32QI_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vcvt2ph2hf8sv32hf_mask, "__builtin_ia32_vcvt2ph2hf8s512_mask", IX86_BUILTIN_VCVT2PH2HF8S512_MASK, UNKNOWN, (int) V64QI_FTYPE_V32HF_V32HF_V64QI_UDI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vcvtph2bf8v8hf_mask, "__builtin_ia32_vcvtph2bf8128_mask", IX86_BUILTIN_VCVTPH2BF8128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HF_V16QI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vcvtph2bf8v16hf_mask, "__builtin_ia32_vcvtph2bf8256_mask", IX86_BUILTIN_VCVTPH2BF8256_MASK, UNKNOWN, (int) V16QI_FTYPE_V16HF_V16QI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vcvtph2bf8v32hf_mask, "__builtin_ia32_vcvtph2bf8512_mask", IX86_BUILTIN_VCVTPH2BF8512_MASK, UNKNOWN, (int) V32QI_FTYPE_V32HF_V32QI_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vcvtph2bf8sv8hf_mask, "__builtin_ia32_vcvtph2bf8s128_mask", IX86_BUILTIN_VCVTPH2BF8S128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HF_V16QI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vcvtph2bf8sv16hf_mask, "__builtin_ia32_vcvtph2bf8s256_mask", IX86_BUILTIN_VCVTPH2BF8S256_MASK, UNKNOWN, (int) V16QI_FTYPE_V16HF_V16QI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vcvtph2bf8sv32hf_mask, "__builtin_ia32_vcvtph2bf8s512_mask", IX86_BUILTIN_VCVTPH2BF8S512_MASK, UNKNOWN, (int) V32QI_FTYPE_V32HF_V32QI_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vcvtph2hf8v8hf_mask, "__builtin_ia32_vcvtph2hf8128_mask", IX86_BUILTIN_VCVTPH2HF8128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HF_V16QI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vcvtph2hf8v16hf_mask, "__builtin_ia32_vcvtph2hf8256_mask", IX86_BUILTIN_VCVTPH2HF8256_MASK, UNKNOWN, (int) V16QI_FTYPE_V16HF_V16QI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vcvtph2hf8v32hf_mask, "__builtin_ia32_vcvtph2hf8512_mask", IX86_BUILTIN_VCVTPH2HF8512_MASK, UNKNOWN, (int) V32QI_FTYPE_V32HF_V32QI_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vcvtph2hf8sv8hf_mask, "__builtin_ia32_vcvtph2hf8s128_mask", IX86_BUILTIN_VCVTPH2HF8S128_MASK, UNKNOWN, (int) V16QI_FTYPE_V8HF_V16QI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vcvtph2hf8sv16hf_mask, "__builtin_ia32_vcvtph2hf8s256_mask", IX86_BUILTIN_VCVTPH2HF8S256_MASK, UNKNOWN, (int) V16QI_FTYPE_V16HF_V16QI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vcvtph2hf8sv32hf_mask, "__builtin_ia32_vcvtph2hf8s512_mask", IX86_BUILTIN_VCVTPH2HF8S512_MASK, UNKNOWN, (int) V32QI_FTYPE_V32HF_V32QI_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vcvthf82phv8hf_mask, "__builtin_ia32_vcvthf82ph128_mask", IX86_BUILTIN_VCVTHF82PH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V16QI_V8HF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vcvthf82phv16hf_mask, "__builtin_ia32_vcvthf82ph256_mask", IX86_BUILTIN_VCVTHF82PH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16QI_V16HF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_vcvthf82phv32hf_mask, "__builtin_ia32_vcvthf82ph512_mask", IX86_BUILTIN_VCVTHF82PH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32QI_V32HF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_addbf16_v32bf, "__builtin_ia32_addbf16512", IX86_BUILTIN_ADDBF16512, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_addbf16_v32bf_mask, "__builtin_ia32_addbf16512_mask", IX86_BUILTIN_ADDBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_addbf16_v16bf, "__builtin_ia32_addbf16256", IX86_BUILTIN_ADDBF16256, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_addbf16_v16bf_mask, "__builtin_ia32_addbf16256_mask", IX86_BUILTIN_ADDBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_addbf16_v8bf, "__builtin_ia32_addbf16128", IX86_BUILTIN_ADDBF16128, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_addbf16_v8bf_mask, "__builtin_ia32_addbf16128_mask", IX86_BUILTIN_ADDBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_subbf16_v32bf, "__builtin_ia32_subbf16512", IX86_BUILTIN_SUBBF16512, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_subbf16_v32bf_mask, "__builtin_ia32_subbf16512_mask", IX86_BUILTIN_SUBBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_subbf16_v16bf, "__builtin_ia32_subbf16256", IX86_BUILTIN_SUBBF16256, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_subbf16_v16bf_mask, "__builtin_ia32_subbf16256_mask", IX86_BUILTIN_SUBBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_subbf16_v8bf, "__builtin_ia32_subbf16128", IX86_BUILTIN_SUBBF16128, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_subbf16_v8bf_mask, "__builtin_ia32_subbf16128_mask", IX86_BUILTIN_SUBBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_mulbf16_v32bf, "__builtin_ia32_mulbf16512", IX86_BUILTIN_MULBF16512, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_mulbf16_v32bf_mask, "__builtin_ia32_mulbf16512_mask", IX86_BUILTIN_MULBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_mulbf16_v16bf, "__builtin_ia32_mulbf16256", IX86_BUILTIN_MULBF16256, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_mulbf16_v16bf_mask, "__builtin_ia32_mulbf16256_mask", IX86_BUILTIN_MULBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_mulbf16_v8bf, "__builtin_ia32_mulbf16128", IX86_BUILTIN_MULBF16128, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_mulbf16_v8bf_mask, "__builtin_ia32_mulbf16128_mask", IX86_BUILTIN_MULBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_divbf16_v32bf, "__builtin_ia32_divbf16512", IX86_BUILTIN_DIVBF16512, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_divbf16_v32bf_mask, "__builtin_ia32_divbf16512_mask", IX86_BUILTIN_DIVBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_divbf16_v16bf, "__builtin_ia32_divbf16256", IX86_BUILTIN_DIVBF16256, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_divbf16_v16bf_mask, "__builtin_ia32_divbf16256_mask", IX86_BUILTIN_DIVBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_divbf16_v8bf, "__builtin_ia32_divbf16128", IX86_BUILTIN_DIVBF16128, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_divbf16_v8bf_mask, "__builtin_ia32_divbf16128_mask", IX86_BUILTIN_DIVBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_smaxbf16_v32bf, "__builtin_ia32_maxbf16512", IX86_BUILTIN_MAXBF16512, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_smaxbf16_v32bf_mask, "__builtin_ia32_maxbf16512_mask", IX86_BUILTIN_MAXBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_smaxbf16_v16bf, "__builtin_ia32_maxbf16256", IX86_BUILTIN_MAXBF16256, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_smaxbf16_v16bf_mask, "__builtin_ia32_maxbf16256_mask", IX86_BUILTIN_MAXBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_smaxbf16_v8bf, "__builtin_ia32_maxbf16128", IX86_BUILTIN_MAXBF16128, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_smaxbf16_v8bf_mask, "__builtin_ia32_maxbf16128_mask", IX86_BUILTIN_MAXBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_sminbf16_v32bf, "__builtin_ia32_minbf16512", IX86_BUILTIN_MINBF16512, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_sminbf16_v32bf_mask, "__builtin_ia32_minbf16512_mask", IX86_BUILTIN_MINBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_sminbf16_v16bf, "__builtin_ia32_minbf16256", IX86_BUILTIN_MINBF16256, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_sminbf16_v16bf_mask, "__builtin_ia32_minbf16256_mask", IX86_BUILTIN_MINBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_sminbf16_v8bf, "__builtin_ia32_minbf16128", IX86_BUILTIN_MINBF16128, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_sminbf16_v8bf_mask, "__builtin_ia32_minbf16128_mask", IX86_BUILTIN_MINBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_scalefbf16_v32bf, "__builtin_ia32_scalefbf16512", IX86_BUILTIN_SCALEFBF16512, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_scalefbf16_v32bf_mask, "__builtin_ia32_scalefbf16512_mask", IX86_BUILTIN_SCALEFBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_scalefbf16_v16bf, "__builtin_ia32_scalefbf16256", IX86_BUILTIN_SCALEFBF16256, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_scalefbf16_v16bf_mask, "__builtin_ia32_scalefbf16256_mask", IX86_BUILTIN_SCALEFBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_scalefbf16_v8bf, "__builtin_ia32_scalefbf16128", IX86_BUILTIN_SCALEFBF16128, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_scalefbf16_v8bf_mask, "__builtin_ia32_scalefbf16128_mask", IX86_BUILTIN_SCALEFBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_fmaddbf16_v32bf_mask, "__builtin_ia32_fmaddbf16512_mask", IX86_BUILTIN_FMADDBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_fmaddbf16_v32bf_mask3, "__builtin_ia32_fmaddbf16512_mask3", IX86_BUILTIN_FMADDBF16512_MASK3, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_fmaddbf16_v32bf_maskz, "__builtin_ia32_fmaddbf16512_maskz", IX86_BUILTIN_FMADDBF16512_MASKZ, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_fmaddbf16_v16bf_mask, "__builtin_ia32_fmaddbf16256_mask", IX86_BUILTIN_FMADDBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_fmaddbf16_v16bf_mask3, "__builtin_ia32_fmaddbf16256_mask3", IX86_BUILTIN_FMADDBF16256_MASK3, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_fmaddbf16_v16bf_maskz, "__builtin_ia32_fmaddbf16256_maskz", IX86_BUILTIN_FMADDBF16256_MASKZ, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_fmaddbf16_v8bf_mask, "__builtin_ia32_fmaddbf16128_mask", IX86_BUILTIN_FMADDBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_fmaddbf16_v8bf_mask3, "__builtin_ia32_fmaddbf16128_mask3", IX86_BUILTIN_FMADDBF16128_MASK3, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_fmaddbf16_v8bf_maskz, "__builtin_ia32_fmaddbf16128_maskz", IX86_BUILTIN_FMADDBF16128_MASKZ, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_fmsubbf16_v32bf_mask, "__builtin_ia32_fmsubbf16512_mask", IX86_BUILTIN_FMSUBBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_fmsubbf16_v32bf_mask3, "__builtin_ia32_fmsubbf16512_mask3", IX86_BUILTIN_FMSUBBF16512_MASK3, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_fmsubbf16_v32bf_maskz, "__builtin_ia32_fmsubbf16512_maskz", IX86_BUILTIN_FMSUBBF16512_MASKZ, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_fmsubbf16_v16bf_mask, "__builtin_ia32_fmsubbf16256_mask", IX86_BUILTIN_FMSUBBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_fmsubbf16_v16bf_mask3, "__builtin_ia32_fmsubbf16256_mask3", IX86_BUILTIN_FMSUBBF16256_MASK3, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_fmsubbf16_v16bf_maskz, "__builtin_ia32_fmsubbf16256_maskz", IX86_BUILTIN_FMSUBBF16256_MASKZ, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_fmsubbf16_v8bf_mask, "__builtin_ia32_fmsubbf16128_mask", IX86_BUILTIN_FMSUBBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_fmsubbf16_v8bf_mask3, "__builtin_ia32_fmsubbf16128_mask3", IX86_BUILTIN_FMSUBBF16128_MASK3, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_fmsubbf16_v8bf_maskz, "__builtin_ia32_fmsubbf16128_maskz", IX86_BUILTIN_FMSUBBF16128_MASKZ, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_fnmaddbf16_v32bf_mask, "__builtin_ia32_fnmaddbf16512_mask", IX86_BUILTIN_FNMADDBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_fnmaddbf16_v32bf_mask3, "__builtin_ia32_fnmaddbf16512_mask3", IX86_BUILTIN_FNMADDBF16512_MASK3, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_fnmaddbf16_v32bf_maskz, "__builtin_ia32_fnmaddbf16512_maskz", IX86_BUILTIN_FNMADDBF16512_MASKZ, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_fnmaddbf16_v16bf_mask, "__builtin_ia32_fnmaddbf16256_mask", IX86_BUILTIN_FNMADDBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_fnmaddbf16_v16bf_mask3, "__builtin_ia32_fnmaddbf16256_mask3", IX86_BUILTIN_FNMADDBF16256_MASK3, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_fnmaddbf16_v16bf_maskz, "__builtin_ia32_fnmaddbf16256_maskz", IX86_BUILTIN_FNMADDBF16256_MASKZ, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_fnmaddbf16_v8bf_mask, "__builtin_ia32_fnmaddbf16128_mask", IX86_BUILTIN_FNMADDBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_fnmaddbf16_v8bf_mask3, "__builtin_ia32_fnmaddbf16128_mask3", IX86_BUILTIN_FNMADDBF16128_MASK3, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_fnmaddbf16_v8bf_maskz, "__builtin_ia32_fnmaddbf16128_maskz", IX86_BUILTIN_FNMADDBF16128_MASKZ, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_fnmsubbf16_v32bf_mask, "__builtin_ia32_fnmsubbf16512_mask", IX86_BUILTIN_FNMSUBBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_fnmsubbf16_v32bf_mask3, "__builtin_ia32_fnmsubbf16512_mask3", IX86_BUILTIN_FNMSUBBF16512_MASK3, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_fnmsubbf16_v32bf_maskz, "__builtin_ia32_fnmsubbf16512_maskz", IX86_BUILTIN_FNMSUBBF16512_MASKZ, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_fnmsubbf16_v16bf_mask, "__builtin_ia32_fnmsubbf16256_mask", IX86_BUILTIN_FNMSUBBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_fnmsubbf16_v16bf_mask3, "__builtin_ia32_fnmsubbf16256_mask3", IX86_BUILTIN_FNMSUBBF16256_MASK3, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_fnmsubbf16_v16bf_maskz, "__builtin_ia32_fnmsubbf16256_maskz", IX86_BUILTIN_FNMSUBBF16256_MASKZ, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_fnmsubbf16_v8bf_mask, "__builtin_ia32_fnmsubbf16128_mask", IX86_BUILTIN_FNMSUBBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_fnmsubbf16_v8bf_mask3, "__builtin_ia32_fnmsubbf16128_mask3", IX86_BUILTIN_FNMSUBBF16128_MASK3, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_fnmsubbf16_v8bf_maskz, "__builtin_ia32_fnmsubbf16128_maskz", IX86_BUILTIN_FNMSUBBF16128_MASKZ, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_rsqrtbf16_v32bf_mask, "__builtin_ia32_rsqrtbf16512_mask", IX86_BUILTIN_RSQRTBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_rsqrtbf16_v16bf_mask, "__builtin_ia32_rsqrtbf16256_mask", IX86_BUILTIN_RSQRTBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_rsqrtbf16_v8bf_mask, "__builtin_ia32_rsqrtbf16128_mask", IX86_BUILTIN_RSQRTBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_sqrtbf16_v32bf_mask, "__builtin_ia32_sqrtbf16512_mask", IX86_BUILTIN_SQRTBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_sqrtbf16_v16bf_mask, "__builtin_ia32_sqrtbf16256_mask", IX86_BUILTIN_SQRTBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_sqrtbf16_v8bf_mask, "__builtin_ia32_sqrtbf16128_mask", IX86_BUILTIN_SQRTBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_rcpbf16_v32bf_mask, "__builtin_ia32_rcpbf16512_mask", IX86_BUILTIN_RCPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_rcpbf16_v16bf_mask, "__builtin_ia32_rcpbf16256_mask", IX86_BUILTIN_RCPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_rcpbf16_v8bf_mask, "__builtin_ia32_rcpbf16128_mask", IX86_BUILTIN_RCPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_getexpbf16_v32bf_mask, "__builtin_ia32_getexpbf16512_mask", IX86_BUILTIN_GETEXPBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_getexpbf16_v16bf_mask, "__builtin_ia32_getexpbf16256_mask", IX86_BUILTIN_GETEXPBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_getexpbf16_v8bf_mask, "__builtin_ia32_getexpbf16128_mask", IX86_BUILTIN_GETEXPBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_rndscalebf16_v32bf_mask, "__builtin_ia32_rndscalebf16512_mask", IX86_BUILTIN_RNDSCALEBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_INT_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_rndscalebf16_v16bf_mask, "__builtin_ia32_rndscalebf16256_mask", IX86_BUILTIN_RNDSCALEBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_INT_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_rndscalebf16_v8bf_mask, "__builtin_ia32_rndscalebf16128_mask", IX86_BUILTIN_RNDSCALEBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_INT_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_reducebf16_v32bf_mask, "__builtin_ia32_reducebf16512_mask", IX86_BUILTIN_REDUCEBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_INT_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_reducebf16_v16bf_mask, "__builtin_ia32_reducebf16256_mask", IX86_BUILTIN_REDUCEBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_INT_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_reducebf16_v8bf_mask, "__builtin_ia32_reducebf16128_mask", IX86_BUILTIN_REDUCEBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_INT_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_getmantbf16_v32bf_mask, "__builtin_ia32_getmantbf16512_mask", IX86_BUILTIN_GETMANTBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_INT_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_getmantbf16_v16bf_mask, "__builtin_ia32_getmantbf16256_mask", IX86_BUILTIN_GETMANTBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_INT_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_getmantbf16_v8bf_mask, "__builtin_ia32_getmantbf16128_mask", IX86_BUILTIN_GETMANTBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_INT_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_fpclassbf16_v32bf_mask, "__builtin_ia32_fpclassbf16512_mask", IX86_BUILTIN_FPCLASSBF16512_MASK, UNKNOWN, (int) SI_FTYPE_V32BF_INT_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_fpclassbf16_v16bf_mask, "__builtin_ia32_fpclassbf16256_mask", IX86_BUILTIN_FPCLASSBF16256_MASK, UNKNOWN, (int) HI_FTYPE_V16BF_INT_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_fpclassbf16_v8bf_mask, "__builtin_ia32_fpclassbf16128_mask", IX86_BUILTIN_FPCLASSBF16128_MASK, UNKNOWN, (int) QI_FTYPE_V8BF_INT_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_cmpbf16_v32bf_mask, "__builtin_ia32_cmpbf16512_mask", IX86_BUILTIN_CMPBF16512_MASK, UNKNOWN, (int) USI_FTYPE_V32BF_V32BF_INT_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_cmpbf16_v16bf_mask, "__builtin_ia32_cmpbf16256_mask", IX86_BUILTIN_CMPBF16256_MASK, UNKNOWN, (int) UHI_FTYPE_V16BF_V16BF_INT_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_cmpbf16_v8bf_mask, "__builtin_ia32_cmpbf16128_mask", IX86_BUILTIN_CMPBF16128_MASK, UNKNOWN, (int) UQI_FTYPE_V8BF_V8BF_INT_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_comisbf16_v8bf, "__builtin_ia32_vcomisbf16eq", IX86_BUILTIN_VCOMISBF16EQ, EQ, (int) INT_FTYPE_V8BF_V8BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_comisbf16_v8bf, "__builtin_ia32_vcomisbf16gt", IX86_BUILTIN_VCOMISBF16GT, GT, (int) INT_FTYPE_V8BF_V8BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_comisbf16_v8bf, "__builtin_ia32_vcomisbf16ge", IX86_BUILTIN_VCOMISBF16GE, GE, (int) INT_FTYPE_V8BF_V8BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_comisbf16_v8bf, "__builtin_ia32_vcomisbf16le", IX86_BUILTIN_VCOMISBF16LE, LE, (int) INT_FTYPE_V8BF_V8BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_comisbf16_v8bf, "__builtin_ia32_vcomisbf16lt", IX86_BUILTIN_VCOMISBF16LT, LT, (int) INT_FTYPE_V8BF_V8BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_comisbf16_v8bf, "__builtin_ia32_vcomisbf16neq", IX86_BUILTIN_VCOMISBF16NE, NE, (int) INT_FTYPE_V8BF_V8BF)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_cvtbf162ibsv8bf_mask, "__builtin_ia32_cvtbf162ibs128_mask", IX86_BUILTIN_CVTBF162IBS128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8BF_V8HI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_cvtbf162ibsv16bf_mask, "__builtin_ia32_cvtbf162ibs256_mask", IX86_BUILTIN_CVTBF162IBS256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16BF_V16HI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_cvtbf162ibsv32bf_mask, "__builtin_ia32_cvtbf162ibs512_mask", IX86_BUILTIN_CVTBF162IBS512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32BF_V32HI_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_cvtbf162iubsv8bf_mask, "__builtin_ia32_cvtbf162iubs128_mask", IX86_BUILTIN_CVTBF162IUBS128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8BF_V8HI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_cvtbf162iubsv16bf_mask, "__builtin_ia32_cvtbf162iubs256_mask", IX86_BUILTIN_CVTBF162IUBS256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16BF_V16HI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_cvtbf162iubsv32bf_mask, "__builtin_ia32_cvtbf162iubs512_mask", IX86_BUILTIN_CVTBF162IUBS512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32BF_V32HI_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_cvtph2ibsv8hf_mask, "__builtin_ia32_cvtph2ibs128_mask", IX86_BUILTIN_CVTPH2IBS128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HF_V8HI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_cvtph2ibsv16hf_mask, "__builtin_ia32_cvtph2ibs256_mask", IX86_BUILTIN_CVTPH2IBS256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HF_V16HI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_cvtph2ibsv32hf_mask, "__builtin_ia32_cvtph2ibs512_mask", IX86_BUILTIN_CVTPH2IBS512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_cvtph2iubsv8hf_mask, "__builtin_ia32_cvtph2iubs128_mask", IX86_BUILTIN_CVTPH2IUBS128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HF_V8HI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_cvtph2iubsv16hf_mask, "__builtin_ia32_cvtph2iubs256_mask", IX86_BUILTIN_CVTPH2IUBS256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HF_V16HI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_cvtph2iubsv32hf_mask, "__builtin_ia32_cvtph2iubs512_mask", IX86_BUILTIN_CVTPH2IUBS512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_cvtps2ibsv4sf_mask, "__builtin_ia32_cvtps2ibs128_mask", IX86_BUILTIN_CVTPS2IBS128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_cvtps2ibsv8sf_mask, "__builtin_ia32_cvtps2ibs256_mask", IX86_BUILTIN_CVTPS2IBS256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_cvtps2ibsv16sf_mask, "__builtin_ia32_cvtps2ibs512_mask", IX86_BUILTIN_CVTPS2IBS512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_cvtps2iubsv4sf_mask, "__builtin_ia32_cvtps2iubs128_mask", IX86_BUILTIN_CVTPS2IUBS128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_cvtps2iubsv8sf_mask, "__builtin_ia32_cvtps2iubs256_mask", IX86_BUILTIN_CVTPS2IUBS256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_cvtps2iubsv16sf_mask, "__builtin_ia32_cvtps2iubs512_mask", IX86_BUILTIN_CVTPS2IUBS512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_cvttbf162ibsv8bf_mask, "__builtin_ia32_cvttbf162ibs128_mask", IX86_BUILTIN_CVTTBF162IBS128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8BF_V8HI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_cvttbf162ibsv16bf_mask, "__builtin_ia32_cvttbf162ibs256_mask", IX86_BUILTIN_CVTTBF162IBS256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16BF_V16HI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_cvttbf162ibsv32bf_mask, "__builtin_ia32_cvttbf162ibs512_mask", IX86_BUILTIN_CVTTBF162IBS512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32BF_V32HI_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_cvttbf162iubsv8bf_mask, "__builtin_ia32_cvttbf162iubs128_mask", IX86_BUILTIN_CVTTBF162IUBS128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8BF_V8HI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_cvttbf162iubsv16bf_mask, "__builtin_ia32_cvttbf162iubs256_mask", IX86_BUILTIN_CVTTBF162IUBS256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16BF_V16HI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_cvttbf162iubsv32bf_mask, "__builtin_ia32_cvttbf162iubs512_mask", IX86_BUILTIN_CVTTBF162IUBS512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32BF_V32HI_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_cvttph2ibsv8hf_mask, "__builtin_ia32_cvttph2ibs128_mask", IX86_BUILTIN_CVTTPH2IBS128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HF_V8HI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_cvttph2ibsv16hf_mask, "__builtin_ia32_cvttph2ibs256_mask", IX86_BUILTIN_CVTTPH2IBS256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HF_V16HI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_cvttph2ibsv32hf_mask, "__builtin_ia32_cvttph2ibs512_mask", IX86_BUILTIN_CVTTPH2IBS512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_cvttph2iubsv8hf_mask, "__builtin_ia32_cvttph2iubs128_mask", IX86_BUILTIN_CVTTPH2IUBS128_MASK, UNKNOWN, (int) V8HI_FTYPE_V8HF_V8HI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_cvttph2iubsv16hf_mask, "__builtin_ia32_cvttph2iubs256_mask", IX86_BUILTIN_CVTTPH2IUBS256_MASK, UNKNOWN, (int) V16HI_FTYPE_V16HF_V16HI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_cvttph2iubsv32hf_mask, "__builtin_ia32_cvttph2iubs512_mask", IX86_BUILTIN_CVTTPH2IUBS512_MASK, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_cvttps2ibsv4sf_mask, "__builtin_ia32_cvttps2ibs128_mask", IX86_BUILTIN_CVTTPS2IBS128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_cvttps2ibsv8sf_mask, "__builtin_ia32_cvttps2ibs256_mask", IX86_BUILTIN_CVTTPS2IBS256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_cvttps2ibsv16sf_mask, "__builtin_ia32_cvttps2ibs512_mask", IX86_BUILTIN_CVTTPS2IBS512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_cvttps2iubsv4sf_mask, "__builtin_ia32_cvttps2iubs128_mask", IX86_BUILTIN_CVTTPS2IUBS128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_cvttps2iubsv8sf_mask, "__builtin_ia32_cvttps2iubs256_mask", IX86_BUILTIN_CVTTPS2IUBS256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_cvttps2iubsv16sf_mask, "__builtin_ia32_cvttps2iubs512_mask", IX86_BUILTIN_CVTTPS2IUBS512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vcvttpd2dqsv2df_mask, "__builtin_ia32_cvttpd2dqs128_mask", IX86_BUILTIN_VCVTTPD2DQS128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vcvttpd2dqsv4df_mask, "__builtin_ia32_cvttpd2dqs256_mask", IX86_BUILTIN_VCVTTPD2DQS256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vcvttpd2dqsv8df_mask, "__builtin_ia32_cvttpd2dqs512_mask", IX86_BUILTIN_VCVTTPD2DQS512_MASK, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vcvttpd2qqsv2df_mask, "__builtin_ia32_cvttpd2qqs128_mask", IX86_BUILTIN_VCVTTPD2QQS128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vcvttpd2qqsv4df_mask, "__builtin_ia32_cvttpd2qqs256_mask", IX86_BUILTIN_VCVTTPD2QQS256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vcvttpd2qqsv8df_mask, "__builtin_ia32_cvttpd2qqs512_mask", IX86_BUILTIN_VCVTTPD2QQS512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vcvttpd2udqsv2df_mask, "__builtin_ia32_cvttpd2udqs128_mask", IX86_BUILTIN_VCVTTPD2UDQS128_MASK, UNKNOWN, (int) V4SI_FTYPE_V2DF_V4SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vcvttpd2udqsv4df_mask, "__builtin_ia32_cvttpd2udqs256_mask", IX86_BUILTIN_VCVTTPD2UDQS256_MASK, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vcvttpd2udqsv8df_mask, "__builtin_ia32_cvttpd2udqs512_mask", IX86_BUILTIN_VCVTTPD2UDQS512_MASK, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vcvttpd2uqqsv2df_mask, "__builtin_ia32_cvttpd2uqqs128_mask", IX86_BUILTIN_VCVTTPD2UQQS128_MASK, UNKNOWN, (int) V2DI_FTYPE_V2DF_V2DI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vcvttpd2uqqsv4df_mask, "__builtin_ia32_cvttpd2uqqs256_mask", IX86_BUILTIN_VCVTTPD2UQQS256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vcvttpd2uqqsv8df_mask, "__builtin_ia32_cvttpd2uqqs512_mask", IX86_BUILTIN_VCVTTPD2UQQS512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vcvttps2dqsv4sf_mask, "__builtin_ia32_cvttps2dqs128_mask", IX86_BUILTIN_VCVTTPS2DQS128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vcvttps2dqsv8sf_mask, "__builtin_ia32_cvttps2dqs256_mask", IX86_BUILTIN_VCVTTPS2DQS256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vcvttps2dqsv16sf_mask, "__builtin_ia32_cvttps2dqs512_mask", IX86_BUILTIN_VCVTTPS2DQS512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vcvttps2qqsv2di_mask, "__builtin_ia32_cvttps2qqs128_mask", IX86_BUILTIN_VCVTTPS2QQS128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vcvttps2qqsv4di_mask, "__builtin_ia32_cvttps2qqs256_mask", IX86_BUILTIN_VCVTTPS2QQS256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vcvttps2qqsv8di_mask, "__builtin_ia32_cvttps2qqs512_mask", IX86_BUILTIN_VCVTTPS2QQS512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vcvttps2udqsv4sf_mask, "__builtin_ia32_cvttps2udqs128_mask", IX86_BUILTIN_VCVTTPS2UDQS128_MASK, UNKNOWN, (int) V4SI_FTYPE_V4SF_V4SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vcvttps2udqsv8sf_mask, "__builtin_ia32_cvttps2udqs256_mask", IX86_BUILTIN_VCVTTPS2UDQS256_MASK, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vcvttps2udqsv16sf_mask, "__builtin_ia32_cvttps2udqs512_mask", IX86_BUILTIN_VCVTTPS2UDQS512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vcvttps2uqqsv2di_mask, "__builtin_ia32_cvttps2uqqs128_mask", IX86_BUILTIN_VCVTTPS2UQQS128_MASK, UNKNOWN, (int) V2DI_FTYPE_V4SF_V2DI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vcvttps2uqqsv4di_mask, "__builtin_ia32_cvttps2uqqs256_mask", IX86_BUILTIN_VCVTTPS2UQQS256_MASK, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vcvttps2uqqsv8di_mask, "__builtin_ia32_cvttps2uqqs512_mask", IX86_BUILTIN_VCVTTPS2UQQS512_MASK, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_minmaxbf16_v8bf_mask, "__builtin_ia32_minmaxbf16128_mask", IX86_BUILTIN_MINMAXBF16128_MASK, UNKNOWN, (int) V8BF_FTYPE_V8BF_V8BF_INT_V8BF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_minmaxbf16_v16bf_mask, "__builtin_ia32_minmaxbf16256_mask", IX86_BUILTIN_MINMAXBF16256_MASK, UNKNOWN, (int) V16BF_FTYPE_V16BF_V16BF_INT_V16BF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_minmaxbf16_v32bf_mask, "__builtin_ia32_minmaxbf16512_mask", IX86_BUILTIN_MINMAXBF16512_MASK, UNKNOWN, (int) V32BF_FTYPE_V32BF_V32BF_INT_V32BF_USI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_minmaxpv4df_mask, "__builtin_ia32_minmaxpd256_mask", IX86_BUILTIN_MINMAXPD256_MASK, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_minmaxpv16hf_mask, "__builtin_ia32_minmaxph256_mask", IX86_BUILTIN_MINMAXPH256_MASK, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_INT_V16HF_UHI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_minmaxpv8sf_mask, "__builtin_ia32_minmaxps256_mask", IX86_BUILTIN_MINMAXPS256_MASK, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_minmaxpv2df_mask, "__builtin_ia32_minmaxpd128_mask", IX86_BUILTIN_MINMAXPD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_minmaxpv8hf_mask, "__builtin_ia32_minmaxph128_mask", IX86_BUILTIN_MINMAXPH128_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_minmaxpv4sf_mask, "__builtin_ia32_minmaxps128_mask", IX86_BUILTIN_MINMAXPS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI)
 
 /* Builtins with rounding support.  */
 BDESC_END (ARGS, ROUND_ARGS)
 
 /* AVX512F.  */
 BDESC_FIRST (round_args, ROUND_ARGS,
-       OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+       OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_addv8df3_mask_round, "__builtin_ia32_addpd512_mask", IX86_BUILTIN_ADDPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_addv16sf3_mask_round, "__builtin_ia32_addps512_mask", IX86_BUILTIN_ADDPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmaddv2df3_round, "__builtin_ia32_addsd_round", IX86_BUILTIN_ADDSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmaddv2df3_mask_round, "__builtin_ia32_addsd_mask_round", IX86_BUILTIN_ADDSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmaddv4sf3_round, "__builtin_ia32_addss_round", IX86_BUILTIN_ADDSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmaddv4sf3_mask_round, "__builtin_ia32_addss_mask_round", IX86_BUILTIN_ADDSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) UQI_FTYPE_V8DF_V8DF_INT_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) UHI_FTYPE_V16SF_V16SF_INT_UHI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_cmpv8df3_mask_round, "__builtin_ia32_cmppd512_mask", IX86_BUILTIN_CMPPD512, UNKNOWN, (int) UQI_FTYPE_V8DF_V8DF_INT_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_cmpv16sf3_mask_round, "__builtin_ia32_cmpps512_mask", IX86_BUILTIN_CMPPS512, UNKNOWN, (int) UHI_FTYPE_V16SF_V16SF_INT_UHI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmcmpv2df3_mask_round, "__builtin_ia32_cmpsd_mask", IX86_BUILTIN_CMPSD_MASK, UNKNOWN, (int) UQI_FTYPE_V2DF_V2DF_INT_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmcmpv4sf3_mask_round, "__builtin_ia32_cmpss_mask", IX86_BUILTIN_CMPSS_MASK, UNKNOWN, (int) UQI_FTYPE_V4SF_V4SF_INT_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_comi_round, "__builtin_ia32_vcomisd", IX86_BUILTIN_COMIDF, UNKNOWN, (int) INT_FTYPE_V2DF_V2DF_INT_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_comi_round, "__builtin_ia32_vcomiss", IX86_BUILTIN_COMISF, UNKNOWN, (int) INT_FTYPE_V4SF_V4SF_INT_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtpd2ps512_mask_round,  "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_fixuns_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_vcvtph2ps512_mask_round,  "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fixuns_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_floatv16siv16sf2_mask_round, "__builtin_ia32_cvtdq2ps512_mask", IX86_BUILTIN_CVTDQ2PS512, UNKNOWN, (int) V16SF_FTYPE_V16SI_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_cvtpd2dq512_mask_round, "__builtin_ia32_cvtpd2dq512_mask", IX86_BUILTIN_CVTPD2DQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_cvtpd2ps512_mask_round,  "__builtin_ia32_cvtpd2ps512_mask", IX86_BUILTIN_CVTPD2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DF_V8SF_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_fixuns_notruncv8dfv8si2_mask_round, "__builtin_ia32_cvtpd2udq512_mask", IX86_BUILTIN_CVTPD2UDQ512, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vcvtph2ps512_mask_round,  "__builtin_ia32_vcvtph2ps512_mask", IX86_BUILTIN_CVTPH2PS512, UNKNOWN, (int) V16SF_FTYPE_V16HI_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fix_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2dq512_mask", IX86_BUILTIN_CVTPS2DQ512_MASK, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_cvtps2pd512_mask_round, "__builtin_ia32_cvtps2pd512_mask", IX86_BUILTIN_CVTPS2PD512, UNKNOWN, (int) V8DF_FTYPE_V8SF_V8DF_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fixuns_notruncv16sfv16si_mask_round, "__builtin_ia32_cvtps2udq512_mask", IX86_BUILTIN_CVTPS2UDQ512, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_cvtsd2ss_round, "__builtin_ia32_cvtsd2ss_round", IX86_BUILTIN_CVTSD2SS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_cvtsd2ss_mask_round, "__builtin_ia32_cvtsd2ss_mask_round", IX86_BUILTIN_CVTSD2SS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2DF_V4SF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_sse2_cvtsi2sdq_round, "__builtin_ia32_cvtsi2sd64", IX86_BUILTIN_CVTSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_INT64_INT)
@@ -3345,64 +3401,64 @@ BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_floatunsv16siv16sf2_mask_round, "__b
 BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_cvtusi2sd64_round, "__builtin_ia32_cvtusi2sd64", IX86_BUILTIN_CVTUSI2SD64, UNKNOWN, (int) V2DF_FTYPE_V2DF_UINT64_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_cvtusi2ss32_round, "__builtin_ia32_cvtusi2ss32", IX86_BUILTIN_CVTUSI2SS32, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT_INT)
 BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_cvtusi2ss64_round, "__builtin_ia32_cvtusi2ss64", IX86_BUILTIN_CVTUSI2SS64, UNKNOWN, (int) V4SF_FTYPE_V4SF_UINT64_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_divv8df3_mask_round, "__builtin_ia32_divpd512_mask", IX86_BUILTIN_DIVPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_divv16sf3_mask_round, "__builtin_ia32_divps512_mask", IX86_BUILTIN_DIVPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmdivv2df3_round, "__builtin_ia32_divsd_round", IX86_BUILTIN_DIVSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmdivv2df3_mask_round, "__builtin_ia32_divsd_mask_round", IX86_BUILTIN_DIVSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmdivv4sf3_round, "__builtin_ia32_divss_round", IX86_BUILTIN_DIVSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmdivv4sf3_mask_round, "__builtin_ia32_divss_mask_round", IX86_BUILTIN_DIVSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fixupimmv8df_mask_round, "__builtin_ia32_fixupimmpd512_mask", IX86_BUILTIN_FIXUPIMMPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fixupimmv8df_maskz_round, "__builtin_ia32_fixupimmpd512_maskz", IX86_BUILTIN_FIXUPIMMPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fixupimmv16sf_mask_round, "__builtin_ia32_fixupimmps512_mask", IX86_BUILTIN_FIXUPIMMPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fixupimmv16sf_maskz_round, "__builtin_ia32_fixupimmps512_maskz", IX86_BUILTIN_FIXUPIMMPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sfixupimmv2df_mask_round, "__builtin_ia32_fixupimmsd_mask", IX86_BUILTIN_FIXUPIMMSD128_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sfixupimmv2df_maskz_round, "__builtin_ia32_fixupimmsd_maskz", IX86_BUILTIN_FIXUPIMMSD128_MASKZ, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sfixupimmv4sf_mask_round, "__builtin_ia32_fixupimmss_mask", IX86_BUILTIN_FIXUPIMMSS128_MASK, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sfixupimmv4sf_maskz_round, "__builtin_ia32_fixupimmss_maskz", IX86_BUILTIN_FIXUPIMMSS128_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_getexpv8df_mask_round, "__builtin_ia32_getexppd512_mask", IX86_BUILTIN_GETEXPPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_getexpv16sf_mask_round, "__builtin_ia32_getexpps512_mask", IX86_BUILTIN_GETEXPPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sgetexpv2df_round, "__builtin_ia32_getexpsd128_round", IX86_BUILTIN_GETEXPSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sgetexpv2df_mask_round, "__builtin_ia32_getexpsd_mask_round", IX86_BUILTIN_GETEXPSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sgetexpv4sf_round, "__builtin_ia32_getexpss128_round", IX86_BUILTIN_GETEXPSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sgetexpv4sf_mask_round, "__builtin_ia32_getexpss_mask_round", IX86_BUILTIN_GETEXPSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_getmantv8df_mask_round, "__builtin_ia32_getmantpd512_mask", IX86_BUILTIN_GETMANTPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_getmantv16sf_mask_round, "__builtin_ia32_getmantps512_mask", IX86_BUILTIN_GETMANTPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vgetmantv2df_round, "__builtin_ia32_getmantsd_round", IX86_BUILTIN_GETMANTSD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vgetmantv2df_mask_round, "__builtin_ia32_getmantsd_mask_round", IX86_BUILTIN_GETMANTSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vgetmantv4sf_round, "__builtin_ia32_getmantss_round", IX86_BUILTIN_GETMANTSS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vgetmantv4sf_mask_round, "__builtin_ia32_getmantss_mask_round", IX86_BUILTIN_GETMANTSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_smaxv8df3_mask_round, "__builtin_ia32_maxpd512_mask", IX86_BUILTIN_MAXPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_smaxv16sf3_mask_round, "__builtin_ia32_maxps512_mask", IX86_BUILTIN_MAXPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmsmaxv2df3_round, "__builtin_ia32_maxsd_round", IX86_BUILTIN_MAXSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmsmaxv2df3_mask_round, "__builtin_ia32_maxsd_mask_round", IX86_BUILTIN_MAXSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmsmaxv4sf3_round, "__builtin_ia32_maxss_round", IX86_BUILTIN_MAXSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmsmaxv4sf3_mask_round, "__builtin_ia32_maxss_mask_round", IX86_BUILTIN_MAXSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sminv8df3_mask_round, "__builtin_ia32_minpd512_mask", IX86_BUILTIN_MINPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sminv16sf3_mask_round, "__builtin_ia32_minps512_mask", IX86_BUILTIN_MINPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmsminv2df3_round, "__builtin_ia32_minsd_round", IX86_BUILTIN_MINSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmsminv2df3_mask_round, "__builtin_ia32_minsd_mask_round", IX86_BUILTIN_MINSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmsminv4sf3_round, "__builtin_ia32_minss_round", IX86_BUILTIN_MINSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmsminv4sf3_mask_round, "__builtin_ia32_minss_mask_round", IX86_BUILTIN_MINSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_mulv8df3_mask_round, "__builtin_ia32_mulpd512_mask", IX86_BUILTIN_MULPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_mulv16sf3_mask_round, "__builtin_ia32_mulps512_mask", IX86_BUILTIN_MULPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmmulv2df3_round, "__builtin_ia32_mulsd_round", IX86_BUILTIN_MULSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmmulv2df3_mask_round, "__builtin_ia32_mulsd_mask_round", IX86_BUILTIN_MULSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmmulv4sf3_round, "__builtin_ia32_mulss_round", IX86_BUILTIN_MULSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmmulv4sf3_mask_round, "__builtin_ia32_mulss_mask_round", IX86_BUILTIN_MULSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rndscalev8df_mask_round, "__builtin_ia32_rndscalepd_mask", IX86_BUILTIN_RNDSCALEPD, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rndscalev16sf_mask_round, "__builtin_ia32_rndscaleps_mask", IX86_BUILTIN_RNDSCALEPS, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rndscalev2df_mask_round, "__builtin_ia32_rndscalesd_mask_round", IX86_BUILTIN_RNDSCALESD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_rndscalev4sf_mask_round, "__builtin_ia32_rndscaless_mask_round", IX86_BUILTIN_RNDSCALESS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_scalefv8df_mask_round, "__builtin_ia32_scalefpd512_mask", IX86_BUILTIN_SCALEFPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_scalefv16sf_mask_round, "__builtin_ia32_scalefps512_mask", IX86_BUILTIN_SCALEFPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmscalefv2df_mask_round, "__builtin_ia32_scalefsd_mask_round", IX86_BUILTIN_SCALEFSD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmscalefv4sf_mask_round, "__builtin_ia32_scalefss_mask_round", IX86_BUILTIN_SCALEFSS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sqrtv8df2_mask_round, "__builtin_ia32_sqrtpd512_mask", IX86_BUILTIN_SQRTPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_sqrtv16sf2_mask_round, "__builtin_ia32_sqrtps512_mask", IX86_BUILTIN_SQRTPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmsqrtv2df2_mask_round, "__builtin_ia32_sqrtsd_mask_round", IX86_BUILTIN_SQRTSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmsqrtv4sf2_mask_round, "__builtin_ia32_sqrtss_mask_round", IX86_BUILTIN_SQRTSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_subv8df3_mask_round, "__builtin_ia32_subpd512_mask", IX86_BUILTIN_SUBPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_subv16sf3_mask_round, "__builtin_ia32_subps512_mask", IX86_BUILTIN_SUBPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmsubv2df3_round, "__builtin_ia32_subsd_round", IX86_BUILTIN_SUBSD_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse2_vmsubv2df3_mask_round, "__builtin_ia32_subsd_mask_round", IX86_BUILTIN_SUBSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_sse_vmsubv4sf3_round, "__builtin_ia32_subss_round", IX86_BUILTIN_SUBSS_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT)
@@ -3423,12 +3479,12 @@ BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_unspec_sse_cvttss2si_round, "__built
 BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_unspec_sse_cvttss2siq_round, "__builtin_ia32_vcvttss2si64", IX86_BUILTIN_VCVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_unspec_avx512f_vcvttss2usi_round, "__builtin_ia32_vcvttss2usi32", IX86_BUILTIN_VCVTTSS2USI32, UNKNOWN, (int) UINT_FTYPE_V4SF_INT)
 BDESC (OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_64BIT, 0, CODE_FOR_unspec_avx512f_vcvttss2usiq_round, "__builtin_ia32_vcvttss2usi64", IX86_BUILTIN_VCVTTSS2USI64, UNKNOWN, (int) UINT64_FTYPE_V4SF_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmadd_v8df_mask_round, "__builtin_ia32_vfmaddpd512_mask", IX86_BUILTIN_VFMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmadd_v8df_mask3_round, "__builtin_ia32_vfmaddpd512_mask3", IX86_BUILTIN_VFMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmadd_v8df_maskz_round, "__builtin_ia32_vfmaddpd512_maskz", IX86_BUILTIN_VFMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmadd_v16sf_mask_round, "__builtin_ia32_vfmaddps512_mask", IX86_BUILTIN_VFMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmadd_v16sf_mask3_round, "__builtin_ia32_vfmaddps512_mask3", IX86_BUILTIN_VFMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmadd_v16sf_maskz_round, "__builtin_ia32_vfmaddps512_maskz", IX86_BUILTIN_VFMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_fmai_vmfmadd_v2df_round, "__builtin_ia32_vfmaddsd3_round", IX86_BUILTIN_VFMADDSD3_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_fmai_vmfmadd_v4sf_round, "__builtin_ia32_vfmaddss3_round", IX86_BUILTIN_VFMADDSS3_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmfmadd_v2df_mask_round, "__builtin_ia32_vfmaddsd3_mask", IX86_BUILTIN_VFMADDSD3_MASK, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT)
@@ -3439,100 +3495,100 @@ BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmfmadd_v4sf_mask_round, "__
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmfmadd_v4sf_mask3_round, "__builtin_ia32_vfmaddss3_mask3", IX86_BUILTIN_VFMADDSS3_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmfmadd_v4sf_maskz_round, "__builtin_ia32_vfmaddss3_maskz", IX86_BUILTIN_VFMADDSS3_MASKZ, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_vmfmsub_v4sf_mask3_round, "__builtin_ia32_vfmsubss3_mask3", IX86_BUILTIN_VFMSUBSS3_MASK3, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmsub_v8df_mask_round, "__builtin_ia32_vfmsubpd512_mask", IX86_BUILTIN_VFMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmsub_v8df_maskz_round, "__builtin_ia32_vfmsubpd512_maskz", IX86_BUILTIN_VFMSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmsub_v16sf_mask_round, "__builtin_ia32_vfmsubps512_mask", IX86_BUILTIN_VFMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fmsub_v16sf_maskz_round, "__builtin_ia32_vfmsubps512_maskz", IX86_BUILTIN_VFMSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmadd_v8df_mask3_round, "__builtin_ia32_vfnmaddpd512_mask3", IX86_BUILTIN_VFNMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmadd_v8df_maskz_round, "__builtin_ia32_vfnmaddpd512_maskz", IX86_BUILTIN_VFNMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmadd_v16sf_mask3_round, "__builtin_ia32_vfnmaddps512_mask3", IX86_BUILTIN_VFNMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmadd_v16sf_maskz_round, "__builtin_ia32_vfnmaddps512_maskz", IX86_BUILTIN_VFNMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmsub_v8df_maskz_round, "__builtin_ia32_vfnmsubpd512_maskz", IX86_BUILTIN_VFNMSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512f_fnmsub_v16sf_maskz_round, "__builtin_ia32_vfnmsubps512_maskz", IX86_BUILTIN_VFNMSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmaddsub_v8df_mask_round, "__builtin_ia32_vfmaddsubpd512_mask", IX86_BUILTIN_VFMADDSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmaddsub_v8df_mask3_round, "__builtin_ia32_vfmaddsubpd512_mask3", IX86_BUILTIN_VFMADDSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmaddsub_v8df_maskz_round, "__builtin_ia32_vfmaddsubpd512_maskz", IX86_BUILTIN_VFMADDSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmaddsub_v16sf_mask_round, "__builtin_ia32_vfmaddsubps512_mask", IX86_BUILTIN_VFMADDSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmaddsub_v16sf_mask3_round, "__builtin_ia32_vfmaddsubps512_mask3", IX86_BUILTIN_VFMADDSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmaddsub_v16sf_maskz_round, "__builtin_ia32_vfmaddsubps512_maskz", IX86_BUILTIN_VFMADDSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmsubadd_v8df_mask3_round, "__builtin_ia32_vfmsubaddpd512_mask3", IX86_BUILTIN_VFMSUBADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmsubadd_v16sf_mask3_round, "__builtin_ia32_vfmsubaddps512_mask3", IX86_BUILTIN_VFMSUBADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmsub_v8df_mask_round, "__builtin_ia32_vfmsubpd512_mask", IX86_BUILTIN_VFMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmsub_v8df_mask3_round, "__builtin_ia32_vfmsubpd512_mask3", IX86_BUILTIN_VFMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmsub_v8df_maskz_round, "__builtin_ia32_vfmsubpd512_maskz", IX86_BUILTIN_VFMSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmsub_v16sf_mask_round, "__builtin_ia32_vfmsubps512_mask", IX86_BUILTIN_VFMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmsub_v16sf_mask3_round, "__builtin_ia32_vfmsubps512_mask3", IX86_BUILTIN_VFMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fmsub_v16sf_maskz_round, "__builtin_ia32_vfmsubps512_maskz", IX86_BUILTIN_VFMSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmadd_v8df_mask_round, "__builtin_ia32_vfnmaddpd512_mask", IX86_BUILTIN_VFNMADDPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmadd_v8df_mask3_round, "__builtin_ia32_vfnmaddpd512_mask3", IX86_BUILTIN_VFNMADDPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmadd_v8df_maskz_round, "__builtin_ia32_vfnmaddpd512_maskz", IX86_BUILTIN_VFNMADDPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmadd_v16sf_mask_round, "__builtin_ia32_vfnmaddps512_mask", IX86_BUILTIN_VFNMADDPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmadd_v16sf_mask3_round, "__builtin_ia32_vfnmaddps512_mask3", IX86_BUILTIN_VFNMADDPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmadd_v16sf_maskz_round, "__builtin_ia32_vfnmaddps512_maskz", IX86_BUILTIN_VFNMADDPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmsub_v8df_mask_round, "__builtin_ia32_vfnmsubpd512_mask", IX86_BUILTIN_VFNMSUBPD512_MASK, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmsub_v8df_mask3_round, "__builtin_ia32_vfnmsubpd512_mask3", IX86_BUILTIN_VFNMSUBPD512_MASK3, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmsub_v8df_maskz_round, "__builtin_ia32_vfnmsubpd512_maskz", IX86_BUILTIN_VFNMSUBPD512_MASKZ, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmsub_v16sf_mask_round, "__builtin_ia32_vfnmsubps512_mask", IX86_BUILTIN_VFNMSUBPS512_MASK, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmsub_v16sf_mask3_round, "__builtin_ia32_vfnmsubps512_mask3", IX86_BUILTIN_VFNMSUBPS512_MASK3, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512F, 0, CODE_FOR_avx512f_fnmsub_v16sf_maskz_round, "__builtin_ia32_vfnmsubps512_maskz", IX86_BUILTIN_VFNMSUBPS512_MASKZ, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT)
 
 /* AVX512DQ.  */
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_reducepv8df_mask_round, "__builtin_ia32_reducepd512_mask_round", IX86_BUILTIN_REDUCEPD512_MASK_ROUND, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_reducepv16sf_mask_round, "__builtin_ia32_reduceps512_mask_round", IX86_BUILTIN_REDUCEPS512_MASK_ROUND, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_reducepv8df_mask_round, "__builtin_ia32_reducepd512_mask_round", IX86_BUILTIN_REDUCEPD512_MASK_ROUND, UNKNOWN, (int) V8DF_FTYPE_V8DF_INT_V8DF_UQI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_reducepv16sf_mask_round, "__builtin_ia32_reduceps512_mask_round", IX86_BUILTIN_REDUCEPS512_MASK_ROUND, UNKNOWN, (int) V16SF_FTYPE_V16SF_INT_V16SF_UHI_INT)
 BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_reducesv2df_mask_round, "__builtin_ia32_reducesd_mask_round", IX86_BUILTIN_REDUCESD128_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_reducesv4sf_mask_round, "__builtin_ia32_reducess_mask_round", IX86_BUILTIN_REDUCESS128_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_rangesv2df_mask_round, "__builtin_ia32_rangesd128_mask_round", IX86_BUILTIN_RANGESD128, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT)
 BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_rangesv4sf_mask_round, "__builtin_ia32_rangess128_mask_round", IX86_BUILTIN_RANGESS128, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_fixuns_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_floatunsv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_floatunsv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_fixuns_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_fixuns_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT)
-BDESC (OPTION_MASK_ISA_AVX512DQ, OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_fix_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2qq512_mask", IX86_BUILTIN_CVTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_cvtps2qqv8di_mask_round, "__builtin_ia32_cvtps2qq512_mask", IX86_BUILTIN_CVTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_fixuns_notruncv8dfv8di2_mask_round, "__builtin_ia32_cvtpd2uqq512_mask", IX86_BUILTIN_CVTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_cvtps2uqqv8di_mask_round, "__builtin_ia32_cvtps2uqq512_mask", IX86_BUILTIN_CVTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_floatv8div8sf2_mask_round, "__builtin_ia32_cvtqq2ps512_mask", IX86_BUILTIN_CVTQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_floatunsv8div8sf2_mask_round, "__builtin_ia32_cvtuqq2ps512_mask", IX86_BUILTIN_CVTUQQ2PS512, UNKNOWN, (int) V8SF_FTYPE_V8DI_V8SF_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_floatv8div8df2_mask_round, "__builtin_ia32_cvtqq2pd512_mask", IX86_BUILTIN_CVTQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_floatunsv8div8df2_mask_round, "__builtin_ia32_cvtuqq2pd512_mask", IX86_BUILTIN_CVTUQQ2PD512, UNKNOWN, (int) V8DF_FTYPE_V8DI_V8DF_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_unspec_fix_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2qq512_mask", IX86_BUILTIN_CVTTPS2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_unspec_fixuns_truncv8sfv8di2_mask_round, "__builtin_ia32_cvttps2uqq512_mask", IX86_BUILTIN_CVTTPS2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_unspec_fix_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2qq512_mask", IX86_BUILTIN_CVTTPD2QQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_unspec_fixuns_truncv8dfv8di2_mask_round, "__builtin_ia32_cvttpd2uqq512_mask", IX86_BUILTIN_CVTTPD2UQQ512, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_rangepv16sf_mask_round, "__builtin_ia32_rangeps512_mask", IX86_BUILTIN_RANGEPS512, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT)
+BDESC (OPTION_MASK_ISA_AVX512DQ, 0, CODE_FOR_avx512dq_rangepv8df_mask_round, "__builtin_ia32_rangepd512_mask", IX86_BUILTIN_RANGEPD512, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT)
 
 /* AVX512FP16.  */
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_addv32hf3_mask_round, "__builtin_ia32_addph512_mask_round", IX86_BUILTIN_ADDPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_subv32hf3_mask_round, "__builtin_ia32_subph512_mask_round", IX86_BUILTIN_SUBPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_mulv32hf3_mask_round, "__builtin_ia32_mulph512_mask_round", IX86_BUILTIN_MULPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_divv32hf3_mask_round, "__builtin_ia32_divph512_mask_round", IX86_BUILTIN_DIVPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_addv32hf3_mask_round, "__builtin_ia32_addph512_mask_round", IX86_BUILTIN_ADDPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_subv32hf3_mask_round, "__builtin_ia32_subph512_mask_round", IX86_BUILTIN_SUBPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_mulv32hf3_mask_round, "__builtin_ia32_mulph512_mask_round", IX86_BUILTIN_MULPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_divv32hf3_mask_round, "__builtin_ia32_divph512_mask_round", IX86_BUILTIN_DIVPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmaddv8hf3_mask_round, "__builtin_ia32_addsh_mask_round", IX86_BUILTIN_ADDSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsubv8hf3_mask_round, "__builtin_ia32_subsh_mask_round", IX86_BUILTIN_SUBSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmmulv8hf3_mask_round, "__builtin_ia32_mulsh_mask_round", IX86_BUILTIN_MULSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmdivv8hf3_mask_round, "__builtin_ia32_divsh_mask_round", IX86_BUILTIN_DIVSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_smaxv32hf3_mask_round, "__builtin_ia32_maxph512_mask_round", IX86_BUILTIN_MAXPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_sminv32hf3_mask_round, "__builtin_ia32_minph512_mask_round", IX86_BUILTIN_MINPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_smaxv32hf3_mask_round, "__builtin_ia32_maxph512_mask_round", IX86_BUILTIN_MAXPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_sminv32hf3_mask_round, "__builtin_ia32_minph512_mask_round", IX86_BUILTIN_MINPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsmaxv8hf3_mask_round, "__builtin_ia32_maxsh_mask_round", IX86_BUILTIN_MAXSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsminv8hf3_mask_round, "__builtin_ia32_minsh_mask_round", IX86_BUILTIN_MINSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_cmpv32hf3_mask_round, "__builtin_ia32_cmpph512_mask_round", IX86_BUILTIN_CMPPH512_MASK_ROUND, UNKNOWN, (int) USI_FTYPE_V32HF_V32HF_INT_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_cmpv32hf3_mask_round, "__builtin_ia32_cmpph512_mask_round", IX86_BUILTIN_CMPPH512_MASK_ROUND, UNKNOWN, (int) USI_FTYPE_V32HF_V32HF_INT_USI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmcmpv8hf3_mask_round, "__builtin_ia32_cmpsh_mask_round", IX86_BUILTIN_CMPSH_MASK_ROUND, UNKNOWN, (int) UQI_FTYPE_V8HF_V8HF_INT_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_sqrtv32hf2_mask_round, "__builtin_ia32_sqrtph512_mask_round", IX86_BUILTIN_SQRTPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_sqrtv32hf2_mask_round, "__builtin_ia32_sqrtph512_mask_round", IX86_BUILTIN_SQRTPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vmsqrtv8hf2_mask_round, "__builtin_ia32_sqrtsh_mask_round", IX86_BUILTIN_SQRTSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_scalefv32hf_mask_round, "__builtin_ia32_scalefph512_mask_round", IX86_BUILTIN_SCALEFPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_scalefv32hf_mask_round, "__builtin_ia32_scalefph512_mask_round", IX86_BUILTIN_SCALEFPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmscalefv8hf_mask_round, "__builtin_ia32_scalefsh_mask_round", IX86_BUILTIN_SCALEFSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_reducepv32hf_mask_round, "__builtin_ia32_reduceph512_mask_round", IX86_BUILTIN_REDUCEPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_reducepv32hf_mask_round, "__builtin_ia32_reduceph512_mask_round", IX86_BUILTIN_REDUCEPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_reducesv8hf_mask_round, "__builtin_ia32_reducesh_mask_round", IX86_BUILTIN_REDUCESH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_rndscalev32hf_mask_round, "__builtin_ia32_rndscaleph512_mask_round", IX86_BUILTIN_RNDSCALEPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_rndscalev32hf_mask_round, "__builtin_ia32_rndscaleph512_mask_round", IX86_BUILTIN_RNDSCALEPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_rndscalev8hf_mask_round, "__builtin_ia32_rndscalesh_mask_round", IX86_BUILTIN_RNDSCALESH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_getexpv32hf_mask_round, "__builtin_ia32_getexpph512_mask", IX86_BUILTIN_GETEXPPH512, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_getexpv32hf_mask_round, "__builtin_ia32_getexpph512_mask", IX86_BUILTIN_GETEXPPH512, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_USI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_sgetexpv8hf_mask_round, "__builtin_ia32_getexpsh_mask_round", IX86_BUILTIN_GETEXPSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_getmantv32hf_mask_round, "__builtin_ia32_getmantph512_mask", IX86_BUILTIN_GETMANTPH512, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_getmantv32hf_mask_round, "__builtin_ia32_getmantph512_mask", IX86_BUILTIN_GETMANTPH512, UNKNOWN, (int) V32HF_FTYPE_V32HF_INT_V32HF_USI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vgetmantv8hf_mask_round, "__builtin_ia32_getmantsh_mask_round", IX86_BUILTIN_GETMANTSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtph2dq_v16si_mask_round, "__builtin_ia32_vcvtph2dq512_mask_round", IX86_BUILTIN_VCVTPH2DQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtph2udq_v16si_mask_round, "__builtin_ia32_vcvtph2udq512_mask_round", IX86_BUILTIN_VCVTPH2UDQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_avx512fp16_fix_truncv16si2_mask_round, "__builtin_ia32_vcvttph2dq512_mask_round", IX86_BUILTIN_VCVTTPH2DQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_avx512fp16_fixuns_truncv16si2_mask_round, "__builtin_ia32_vcvttph2udq512_mask_round", IX86_BUILTIN_VCVTTPH2UDQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtph2qq_v8di_mask_round, "__builtin_ia32_vcvtph2qq512_mask_round", IX86_BUILTIN_VCVTPH2QQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtph2uqq_v8di_mask_round, "__builtin_ia32_vcvtph2uqq512_mask_round", IX86_BUILTIN_VCVTPH2UQQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_avx512fp16_fix_truncv8di2_mask_round, "__builtin_ia32_vcvttph2qq512_mask_round", IX86_BUILTIN_VCVTTPH2QQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_avx512fp16_fixuns_truncv8di2_mask_round, "__builtin_ia32_vcvttph2uqq512_mask_round", IX86_BUILTIN_VCVTTPH2UQQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtph2w_v32hi_mask_round, "__builtin_ia32_vcvtph2w512_mask_round", IX86_BUILTIN_VCVTPH2W512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtph2uw_v32hi_mask_round, "__builtin_ia32_vcvtph2uw512_mask_round", IX86_BUILTIN_VCVTPH2UW512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_avx512fp16_fix_truncv32hi2_mask_round, "__builtin_ia32_vcvttph2w512_mask_round", IX86_BUILTIN_VCVTTPH2W512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_unspec_avx512fp16_fixuns_truncv32hi2_mask_round, "__builtin_ia32_vcvttph2uw512_mask_round", IX86_BUILTIN_VCVTTPH2UW512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtw2ph_v32hi_mask_round, "__builtin_ia32_vcvtw2ph512_mask_round", IX86_BUILTIN_VCVTW2PH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HI_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtuw2ph_v32hi_mask_round, "__builtin_ia32_vcvtuw2ph512_mask_round", IX86_BUILTIN_VCVTUW2PH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HI_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtdq2ph_v16si_mask_round, "__builtin_ia32_vcvtdq2ph512_mask_round", IX86_BUILTIN_VCVTDQ2PH512_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16SI_V16HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtudq2ph_v16si_mask_round, "__builtin_ia32_vcvtudq2ph512_mask_round", IX86_BUILTIN_VCVTUDQ2PH512_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16SI_V16HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtqq2ph_v8di_mask_round, "__builtin_ia32_vcvtqq2ph512_mask_round", IX86_BUILTIN_VCVTQQ2PH512_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DI_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtuqq2ph_v8di_mask_round, "__builtin_ia32_vcvtuqq2ph512_mask_round", IX86_BUILTIN_VCVTUQQ2PH512_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DI_V8HF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2dq_v16si_mask_round, "__builtin_ia32_vcvtph2dq512_mask_round", IX86_BUILTIN_VCVTPH2DQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2udq_v16si_mask_round, "__builtin_ia32_vcvtph2udq512_mask_round", IX86_BUILTIN_VCVTPH2UDQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_unspec_avx512fp16_fix_truncv16si2_mask_round, "__builtin_ia32_vcvttph2dq512_mask_round", IX86_BUILTIN_VCVTTPH2DQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_unspec_avx512fp16_fixuns_truncv16si2_mask_round, "__builtin_ia32_vcvttph2udq512_mask_round", IX86_BUILTIN_VCVTTPH2UDQ512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16HF_V16SI_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2qq_v8di_mask_round, "__builtin_ia32_vcvtph2qq512_mask_round", IX86_BUILTIN_VCVTPH2QQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2uqq_v8di_mask_round, "__builtin_ia32_vcvtph2uqq512_mask_round", IX86_BUILTIN_VCVTPH2UQQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_unspec_avx512fp16_fix_truncv8di2_mask_round, "__builtin_ia32_vcvttph2qq512_mask_round", IX86_BUILTIN_VCVTTPH2QQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_unspec_avx512fp16_fixuns_truncv8di2_mask_round, "__builtin_ia32_vcvttph2uqq512_mask_round", IX86_BUILTIN_VCVTTPH2UQQ512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8HF_V8DI_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2w_v32hi_mask_round, "__builtin_ia32_vcvtph2w512_mask_round", IX86_BUILTIN_VCVTPH2W512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtph2uw_v32hi_mask_round, "__builtin_ia32_vcvtph2uw512_mask_round", IX86_BUILTIN_VCVTPH2UW512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_unspec_avx512fp16_fix_truncv32hi2_mask_round, "__builtin_ia32_vcvttph2w512_mask_round", IX86_BUILTIN_VCVTTPH2W512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_unspec_avx512fp16_fixuns_truncv32hi2_mask_round, "__builtin_ia32_vcvttph2uw512_mask_round", IX86_BUILTIN_VCVTTPH2UW512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtw2ph_v32hi_mask_round, "__builtin_ia32_vcvtw2ph512_mask_round", IX86_BUILTIN_VCVTW2PH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HI_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtuw2ph_v32hi_mask_round, "__builtin_ia32_vcvtuw2ph512_mask_round", IX86_BUILTIN_VCVTUW2PH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HI_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtdq2ph_v16si_mask_round, "__builtin_ia32_vcvtdq2ph512_mask_round", IX86_BUILTIN_VCVTDQ2PH512_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16SI_V16HF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtudq2ph_v16si_mask_round, "__builtin_ia32_vcvtudq2ph512_mask_round", IX86_BUILTIN_VCVTUDQ2PH512_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16SI_V16HF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtqq2ph_v8di_mask_round, "__builtin_ia32_vcvtqq2ph512_mask_round", IX86_BUILTIN_VCVTQQ2PH512_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DI_V8HF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtuqq2ph_v8di_mask_round, "__builtin_ia32_vcvtuqq2ph512_mask_round", IX86_BUILTIN_VCVTUQQ2PH512_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DI_V8HF_UQI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2si_round, "__builtin_ia32_vcvtsh2si32_round", IX86_BUILTIN_VCVTSH2SI32_ROUND, UNKNOWN, (int) INT_FTYPE_V8HF_INT)
 BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2siq_round, "__builtin_ia32_vcvtsh2si64_round", IX86_BUILTIN_VCVTSH2SI64_ROUND, UNKNOWN, (int) INT64_FTYPE_V8HF_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2usi_round, "__builtin_ia32_vcvtsh2usi32_round", IX86_BUILTIN_VCVTSH2USI32_ROUND, UNKNOWN, (int) UINT_FTYPE_V8HF_INT)
@@ -3545,32 +3601,32 @@ BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsi2sh_round, "__b
 BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsi2shq_round, "__builtin_ia32_vcvtsi2sh64_round", IX86_BUILTIN_VCVTSI2SH64_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_INT64_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtusi2sh_round, "__builtin_ia32_vcvtusi2sh32_round", IX86_BUILTIN_VCVTUSI2SH32_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_UINT_INT)
 BDESC (OPTION_MASK_ISA_64BIT, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtusi2shq_round, "__builtin_ia32_vcvtusi2sh64_round", IX86_BUILTIN_VCVTUSI2SH64_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_UINT64_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_float_extend_phv8df2_mask_round, "__builtin_ia32_vcvtph2pd512_mask_round", IX86_BUILTIN_VCVTPH2PD512_MASK_ROUND, UNKNOWN, (int) V8DF_FTYPE_V8HF_V8DF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_float_extend_phv16sf2_mask_round, "__builtin_ia32_vcvtph2psx512_mask_round", IX86_BUILTIN_VCVTPH2PSX512_MASK_ROUND, UNKNOWN, (int) V16SF_FTYPE_V16HF_V16SF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtpd2ph_v8df_mask_round, "__builtin_ia32_vcvtpd2ph512_mask_round", IX86_BUILTIN_VCVTPD2PH512_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DF_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512fp16_vcvtps2ph_v16sf_mask_round, "__builtin_ia32_vcvtps2phx512_mask_round", IX86_BUILTIN_VCVTPS2PHX512_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16SF_V16HF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_float_extend_phv8df2_mask_round, "__builtin_ia32_vcvtph2pd512_mask_round", IX86_BUILTIN_VCVTPH2PD512_MASK_ROUND, UNKNOWN, (int) V8DF_FTYPE_V8HF_V8DF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_float_extend_phv16sf2_mask_round, "__builtin_ia32_vcvtph2psx512_mask_round", IX86_BUILTIN_VCVTPH2PSX512_MASK_ROUND, UNKNOWN, (int) V16SF_FTYPE_V16HF_V16SF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtpd2ph_v8df_mask_round, "__builtin_ia32_vcvtpd2ph512_mask_round", IX86_BUILTIN_VCVTPD2PH512_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8DF_V8HF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtps2ph_v16sf_mask_round, "__builtin_ia32_vcvtps2phx512_mask_round", IX86_BUILTIN_VCVTPS2PHX512_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16SF_V16HF_UHI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2ss_mask_round, "__builtin_ia32_vcvtsh2ss_mask_round", IX86_BUILTIN_VCVTSH2SS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V8HF_V4SF_V4SF_UQI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsh2sd_mask_round, "__builtin_ia32_vcvtsh2sd_mask_round", IX86_BUILTIN_VCVTSH2SD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V8HF_V2DF_V2DF_UQI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtss2sh_mask_round, "__builtin_ia32_vcvtss2sh_mask_round", IX86_BUILTIN_VCVTSS2SH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V4SF_V8HF_V8HF_UQI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_vcvtsd2sh_mask_round, "__builtin_ia32_vcvtsd2sh_mask_round", IX86_BUILTIN_VCVTSD2SH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V2DF_V8HF_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmaddsub_v32hf_mask_round, "__builtin_ia32_vfmaddsubph512_mask", IX86_BUILTIN_VFMADDSUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmaddsub_v32hf_mask3_round, "__builtin_ia32_vfmaddsubph512_mask3", IX86_BUILTIN_VFMADDSUBPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmaddsub_v32hf_maskz_round, "__builtin_ia32_vfmaddsubph512_maskz", IX86_BUILTIN_VFMADDSUBPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmsubadd_v32hf_mask_round, "__builtin_ia32_vfmsubaddph512_mask", IX86_BUILTIN_VFMSUBADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmsubadd_v32hf_mask3_round, "__builtin_ia32_vfmsubaddph512_mask3", IX86_BUILTIN_VFMSUBADDPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmsubadd_v32hf_maskz_round, "__builtin_ia32_vfmsubaddph512_maskz", IX86_BUILTIN_VFMSUBADDPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmadd_v32hf_mask_round, "__builtin_ia32_vfmaddph512_mask", IX86_BUILTIN_VFMADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmadd_v32hf_mask3_round, "__builtin_ia32_vfmaddph512_mask3", IX86_BUILTIN_VFMADDPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmadd_v32hf_maskz_round, "__builtin_ia32_vfmaddph512_maskz", IX86_BUILTIN_VFMADDPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fnmadd_v32hf_mask_round, "__builtin_ia32_vfnmaddph512_mask", IX86_BUILTIN_VFNMADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fnmadd_v32hf_mask3_round, "__builtin_ia32_vfnmaddph512_mask3", IX86_BUILTIN_VFNMADDPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fnmadd_v32hf_maskz_round, "__builtin_ia32_vfnmaddph512_maskz", IX86_BUILTIN_VFNMADDPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmsub_v32hf_mask_round, "__builtin_ia32_vfmsubph512_mask", IX86_BUILTIN_VFMSUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmsub_v32hf_mask3_round, "__builtin_ia32_vfmsubph512_mask3", IX86_BUILTIN_VFMSUBPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmsub_v32hf_maskz_round, "__builtin_ia32_vfmsubph512_maskz", IX86_BUILTIN_VFMSUBPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fnmsub_v32hf_mask_round, "__builtin_ia32_vfnmsubph512_mask", IX86_BUILTIN_VFNMSUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fnmsub_v32hf_mask3_round, "__builtin_ia32_vfnmsubph512_mask3", IX86_BUILTIN_VFNMSUBPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fnmsub_v32hf_maskz_round, "__builtin_ia32_vfnmsubph512_maskz", IX86_BUILTIN_VFNMSUBPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmaddsub_v32hf_mask_round, "__builtin_ia32_vfmaddsubph512_mask", IX86_BUILTIN_VFMADDSUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmaddsub_v32hf_mask3_round, "__builtin_ia32_vfmaddsubph512_mask3", IX86_BUILTIN_VFMADDSUBPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmaddsub_v32hf_maskz_round, "__builtin_ia32_vfmaddsubph512_maskz", IX86_BUILTIN_VFMADDSUBPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmsubadd_v32hf_mask_round, "__builtin_ia32_vfmsubaddph512_mask", IX86_BUILTIN_VFMSUBADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmsubadd_v32hf_mask3_round, "__builtin_ia32_vfmsubaddph512_mask3", IX86_BUILTIN_VFMSUBADDPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmsubadd_v32hf_maskz_round, "__builtin_ia32_vfmsubaddph512_maskz", IX86_BUILTIN_VFMSUBADDPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmadd_v32hf_mask_round, "__builtin_ia32_vfmaddph512_mask", IX86_BUILTIN_VFMADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmadd_v32hf_mask3_round, "__builtin_ia32_vfmaddph512_mask3", IX86_BUILTIN_VFMADDPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmadd_v32hf_maskz_round, "__builtin_ia32_vfmaddph512_maskz", IX86_BUILTIN_VFMADDPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fnmadd_v32hf_mask_round, "__builtin_ia32_vfnmaddph512_mask", IX86_BUILTIN_VFNMADDPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fnmadd_v32hf_mask3_round, "__builtin_ia32_vfnmaddph512_mask3", IX86_BUILTIN_VFNMADDPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fnmadd_v32hf_maskz_round, "__builtin_ia32_vfnmaddph512_maskz", IX86_BUILTIN_VFNMADDPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmsub_v32hf_mask_round, "__builtin_ia32_vfmsubph512_mask", IX86_BUILTIN_VFMSUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmsub_v32hf_mask3_round, "__builtin_ia32_vfmsubph512_mask3", IX86_BUILTIN_VFMSUBPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmsub_v32hf_maskz_round, "__builtin_ia32_vfmsubph512_maskz", IX86_BUILTIN_VFMSUBPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fnmsub_v32hf_mask_round, "__builtin_ia32_vfnmsubph512_mask", IX86_BUILTIN_VFNMSUBPH512_MASK, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fnmsub_v32hf_mask3_round, "__builtin_ia32_vfnmsubph512_mask3", IX86_BUILTIN_VFNMSUBPH512_MASK3, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fnmsub_v32hf_maskz_round, "__builtin_ia32_vfnmsubph512_maskz", IX86_BUILTIN_VFNMSUBPH512_MASKZ, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmfmadd_v8hf_mask_round, "__builtin_ia32_vfmaddsh3_mask", IX86_BUILTIN_VFMADDSH3_MASK, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmfmadd_v8hf_mask3_round, "__builtin_ia32_vfmaddsh3_mask3", IX86_BUILTIN_VFMADDSH3_MASK3, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmfmadd_v8hf_maskz_round, "__builtin_ia32_vfmaddsh3_maskz", IX86_BUILTIN_VFMADDSH3_MASKZ, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
@@ -3578,18 +3634,18 @@ BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmfnmadd_v8hf_mask_round
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmfnmadd_v8hf_mask3_round, "__builtin_ia32_vfnmaddsh3_mask3", IX86_BUILTIN_VFNMADDSH3_MASK3, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmfnmadd_v8hf_maskz_round, "__builtin_ia32_vfnmaddsh3_maskz", IX86_BUILTIN_VFNMADDSH3_MASKZ, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512f_vmfmsub_v8hf_mask3_round, "__builtin_ia32_vfmsubsh3_mask3", IX86_BUILTIN_VFMSUBSH3_MASK3, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_fma_fmaddc_v32hf_round, "__builtin_ia32_vfmaddcph512_round", IX86_BUILTIN_VFMADDCPH512_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmaddc_v32hf_mask1_round, "__builtin_ia32_vfmaddcph512_mask_round", IX86_BUILTIN_VFMADDCPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmaddc_v32hf_mask_round, "__builtin_ia32_vfmaddcph512_mask3_round", IX86_BUILTIN_VFMADDCPH512_MASK3_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmaddc_v32hf_maskz_round, "__builtin_ia32_vfmaddcph512_maskz_round", IX86_BUILTIN_VFMADDCPH512_MASKZ_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_fma_fcmaddc_v32hf_round, "__builtin_ia32_vfcmaddcph512_round", IX86_BUILTIN_VFCMADDCPH512_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fcmaddc_v32hf_mask1_round, "__builtin_ia32_vfcmaddcph512_mask_round", IX86_BUILTIN_VFCMADDCPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fcmaddc_v32hf_mask_round, "__builtin_ia32_vfcmaddcph512_mask3_round", IX86_BUILTIN_VFCMADDCPH512_MASK3_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fcmaddc_v32hf_maskz_round, "__builtin_ia32_vfcmaddcph512_maskz_round", IX86_BUILTIN_VFCMADDCPH512_MASKZ_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fcmulc_v32hf_round, "__builtin_ia32_vfcmulcph512_round", IX86_BUILTIN_VFCMULCPH512_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fcmulc_v32hf_mask_round, "__builtin_ia32_vfcmulcph512_mask_round", IX86_BUILTIN_VFCMULCPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmulc_v32hf_round, "__builtin_ia32_vfmulcph512_round", IX86_BUILTIN_VFMULCPH512_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX512FP16 | OPTION_MASK_ISA2_EVEX512, CODE_FOR_avx512bw_fmulc_v32hf_mask_round, "__builtin_ia32_vfmulcph512_mask_round", IX86_BUILTIN_VFMULCPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_fma_fmaddc_v32hf_round, "__builtin_ia32_vfmaddcph512_round", IX86_BUILTIN_VFMADDCPH512_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmaddc_v32hf_mask1_round, "__builtin_ia32_vfmaddcph512_mask_round", IX86_BUILTIN_VFMADDCPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmaddc_v32hf_mask_round, "__builtin_ia32_vfmaddcph512_mask3_round", IX86_BUILTIN_VFMADDCPH512_MASK3_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmaddc_v32hf_maskz_round, "__builtin_ia32_vfmaddcph512_maskz_round", IX86_BUILTIN_VFMADDCPH512_MASKZ_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_fma_fcmaddc_v32hf_round, "__builtin_ia32_vfcmaddcph512_round", IX86_BUILTIN_VFCMADDCPH512_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fcmaddc_v32hf_mask1_round, "__builtin_ia32_vfcmaddcph512_mask_round", IX86_BUILTIN_VFCMADDCPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fcmaddc_v32hf_mask_round, "__builtin_ia32_vfcmaddcph512_mask3_round", IX86_BUILTIN_VFCMADDCPH512_MASK3_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fcmaddc_v32hf_maskz_round, "__builtin_ia32_vfcmaddcph512_maskz_round", IX86_BUILTIN_VFCMADDCPH512_MASKZ_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fcmulc_v32hf_round, "__builtin_ia32_vfcmulcph512_round", IX86_BUILTIN_VFCMULCPH512_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fcmulc_v32hf_mask_round, "__builtin_ia32_vfcmulcph512_mask_round", IX86_BUILTIN_VFCMULCPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmulc_v32hf_round, "__builtin_ia32_vfmulcph512_round", IX86_BUILTIN_VFMULCPH512_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512bw_fmulc_v32hf_mask_round, "__builtin_ia32_vfmulcph512_mask_round", IX86_BUILTIN_VFMULCPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fma_fcmaddcsh_v8hf_round, "__builtin_ia32_vfcmaddcsh_round", IX86_BUILTIN_VFCMADDCSH_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fcmaddcsh_v8hf_mask1_round, "__builtin_ia32_vfcmaddcsh_mask_round", IX86_BUILTIN_VFCMADDCSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fcmaddcsh_v8hf_mask3_round, "__builtin_ia32_vfcmaddcsh_mask3_round", IX86_BUILTIN_VFCMADDCSH_MASK3_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
@@ -3604,215 +3660,37 @@ BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fmulcsh_v8hf_round, "
 BDESC (0, OPTION_MASK_ISA2_AVX512FP16, CODE_FOR_avx512fp16_fmulcsh_v8hf_mask_round, "__builtin_ia32_vfmulcsh_mask_round", IX86_BUILTIN_VFMULCSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT)
 
 /* AVX10.2.  */
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_addv4df3_mask_round, "__builtin_ia32_addpd256_mask_round", IX86_BUILTIN_ADDPD256_MASK_ROUND, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_addv16hf3_mask_round, "__builtin_ia32_addph256_mask_round", IX86_BUILTIN_ADDPH256_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_addv8sf3_mask_round, "__builtin_ia32_addps256_mask_round", IX86_BUILTIN_ADDPS256_MASK_ROUND, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_cmpv4df3_mask_round, "__builtin_ia32_cmppd256_mask_round", IX86_BUILTIN_CMPPD256_MASK_ROUND, UNKNOWN, (int) UQI_FTYPE_V4DF_V4DF_INT_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_cmpv16hf3_mask_round, "__builtin_ia32_cmpph256_mask_round", IX86_BUILTIN_CMPPH256_MASK_ROUND, UNKNOWN, (int) UHI_FTYPE_V16HF_V16HF_INT_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_cmpv8sf3_mask_round, "__builtin_ia32_cmpps256_mask_round", IX86_BUILTIN_CMPPS256_MASK_ROUND, UNKNOWN, (int) UQI_FTYPE_V8SF_V8SF_INT_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512fp16_vcvtdq2ph_v8si_mask_round, "__builtin_ia32_vcvtdq2ph256_mask_round", IX86_BUILTIN_VCVTDQ2PH256_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8SI_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_floatv8siv8sf2_mask_round, "__builtin_ia32_cvtdq2ps256_mask_round", IX86_BUILTIN_VCVTDQ2PS256_MASK_ROUND, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512fp16_vcvtpd2ph_v4df_mask_round, "__builtin_ia32_vcvtpd2ph256_mask_round", IX86_BUILTIN_VCVTPD2PH256_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V4DF_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx_cvtpd2ps256_mask_round, "__builtin_ia32_cvtpd2ps256_mask_round", IX86_BUILTIN_CVTPD2PS256_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4DF_V4SF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx_cvtpd2dq256_mask_round, "__builtin_ia32_cvtpd2dq256_mask_round", IX86_BUILTIN_CVTPD2DQ256_MASK_ROUND, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_fix_notruncv4dfv4di2_mask_round, "__builtin_ia32_cvtpd2qq256_mask_round", IX86_BUILTIN_CVTPD2QQ256_MASK_ROUND, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_fixuns_notruncv4dfv4si2_mask_round, "__builtin_ia32_cvtpd2udq256_mask_round", IX86_BUILTIN_CVTPD2UDQ256_MASK_ROUND, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_fixuns_notruncv4dfv4di2_mask_round, "__builtin_ia32_cvtpd2uqq256_mask_round", IX86_BUILTIN_CVTPD2UQQ256_MASK_ROUND, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512fp16_vcvtph2dq_v8si_mask_round, "__builtin_ia32_vcvtph2dq256_mask_round", IX86_BUILTIN_VCVTPH2DQ256_MASK_ROUND, UNKNOWN, (int) V8SI_FTYPE_V8HF_V8SI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512fp16_float_extend_phv4df2_mask_round, "__builtin_ia32_vcvtph2pd256_mask_round", IX86_BUILTIN_VCVTPH2PD256_MASK_ROUND, UNKNOWN, (int) V4DF_FTYPE_V8HF_V4DF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_vcvtph2ps256_mask_round, "__builtin_ia32_vcvtph2ps256_mask_round", IX86_BUILTIN_VCVTPH2PS256_MASK_ROUND, UNKNOWN, (int) V8SF_FTYPE_V8HF_V8SF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512fp16_float_extend_phv8sf2_mask_round, "__builtin_ia32_vcvtph2psx256_mask_round", IX86_BUILTIN_VCVTPH2PSX256_MASK_ROUND, UNKNOWN, (int) V8SF_FTYPE_V8HF_V8SF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512fp16_vcvtph2qq_v4di_mask_round, "__builtin_ia32_vcvtph2qq256_mask_round", IX86_BUILTIN_VCVTPH2QQ256_MASK_ROUND, UNKNOWN, (int) V4DI_FTYPE_V8HF_V4DI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512fp16_vcvtph2udq_v8si_mask_round, "__builtin_ia32_vcvtph2udq256_mask_round", IX86_BUILTIN_VCVTPH2UDQ256_MASK_ROUND, UNKNOWN, (int) V8SI_FTYPE_V8HF_V8SI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512fp16_vcvtph2uqq_v4di_mask_round, "__builtin_ia32_vcvtph2uqq256_mask_round", IX86_BUILTIN_VCVTPH2UQQ256_MASK_ROUND, UNKNOWN, (int) V4DI_FTYPE_V8HF_V4DI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512fp16_vcvtph2uw_v16hi_mask_round, "__builtin_ia32_vcvtph2uw256_mask_round", IX86_BUILTIN_VCVTPH2UW256_MASK_ROUND, UNKNOWN, (int) V16HI_FTYPE_V16HF_V16HI_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512fp16_vcvtph2w_v16hi_mask_round, "__builtin_ia32_vcvtph2w256_mask_round", IX86_BUILTIN_VCVTPH2W256_MASK_ROUND, UNKNOWN, (int) V16HI_FTYPE_V16HF_V16HI_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx_cvtps2pd256_mask_round, "__builtin_ia32_vcvtps2pd256_mask_round", IX86_BUILTIN_VCVTPS2PD256_MASK_ROUND, UNKNOWN, (int) V4DF_FTYPE_V4SF_V4DF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512fp16_vcvtps2ph_v8sf_mask_round, "__builtin_ia32_vcvtps2phx256_mask_round", IX86_BUILTIN_VCVTPS2PHX256_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8SF_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx_fix_notruncv8sfv8si_mask_round, "__builtin_ia32_vcvtps2dq256_mask_round", IX86_BUILTIN_VCVTPS2DQ256_MASK_ROUND, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512dq_cvtps2qqv4di_mask_round, "__builtin_ia32_cvtps2qq256_mask_round", IX86_BUILTIN_VCVTPS2QQ256_MASK_ROUND, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fixuns_notruncv8sfv8si_mask_round, "__builtin_ia32_cvtps2udq256_mask_round", IX86_BUILTIN_VCVTPS2UDQ256_MASK_ROUND, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512dq_cvtps2uqqv4di_mask_round, "__builtin_ia32_cvtps2uqq256_mask_round", IX86_BUILTIN_VCVTPS2UQQ256_MASK_ROUND, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_floatv4div4df2_mask_round, "__builtin_ia32_cvtqq2pd256_mask_round", IX86_BUILTIN_VCVTQQ2PD256_MASK_ROUND, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512fp16_vcvtqq2ph_v4di_mask_round, "__builtin_ia32_vcvtqq2ph256_mask_round", IX86_BUILTIN_VCVTQQ2PH256_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V4DI_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_floatv4div4sf2_mask_round, "__builtin_ia32_cvtqq2ps256_mask_round", IX86_BUILTIN_VCVTQQ2PS256_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_unspec_fix_truncv4dfv4si2_mask_round, "__builtin_ia32_cvttpd2dq256_mask_round", IX86_BUILTIN_VCVTTPD2DQ256_MASK_ROUND, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_unspec_fix_truncv4dfv4di2_mask_round, "__builtin_ia32_cvttpd2qq256_mask_round", IX86_BUILTIN_VCVTTPD2QQ256_MASK_ROUND, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_unspec_fixuns_truncv4dfv4si2_mask_round, "__builtin_ia32_cvttpd2udq256_mask_round", IX86_BUILTIN_VCVTTPD2UDQ256_MASK_ROUND, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_unspec_fixuns_truncv4dfv4di2_mask_round, "__builtin_ia32_cvttpd2uqq256_mask_round", IX86_BUILTIN_VCVTTPD2UQQ256_MASK_ROUND, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_unspec_avx512fp16_fix_truncv8si2_mask_round, "__builtin_ia32_vcvttph2dq256_mask_round", IX86_BUILTIN_VCVTTPH2DQ256_MASK_ROUND, UNKNOWN, (int) V8SI_FTYPE_V8HF_V8SI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_unspec_avx512fp16_fix_truncv4di2_mask_round, "__builtin_ia32_vcvttph2qq256_mask_round", IX86_BUILTIN_VCVTTPH2QQ256_MASK_ROUND, UNKNOWN, (int) V4DI_FTYPE_V8HF_V4DI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_unspec_avx512fp16_fixuns_truncv8si2_mask_round, "__builtin_ia32_vcvttph2udq256_mask_round", IX86_BUILTIN_VCVTTPH2UDQ256_MASK_ROUND, UNKNOWN, (int) V8SI_FTYPE_V8HF_V8SI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_unspec_avx512fp16_fixuns_truncv4di2_mask_round, "__builtin_ia32_vcvttph2uqq256_mask_round", IX86_BUILTIN_VCVTTPH2UQQ256_MASK_ROUND, UNKNOWN, (int) V4DI_FTYPE_V8HF_V4DI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_unspec_avx512fp16_fixuns_truncv16hi2_mask_round, "__builtin_ia32_vcvttph2uw256_mask_round", IX86_BUILTIN_VCVTTPH2UW256_MASK_ROUND, UNKNOWN, (int) V16HI_FTYPE_V16HF_V16HI_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_unspec_avx512fp16_fix_truncv16hi2_mask_round, "__builtin_ia32_vcvttph2w256_mask_round", IX86_BUILTIN_VCVTTPH2W256_MASK_ROUND, UNKNOWN, (int) V16HI_FTYPE_V16HF_V16HI_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_unspec_fix_truncv8sfv8si2_mask_round, "__builtin_ia32_cvttps2dq256_mask_round", IX86_BUILTIN_VCVTTPS2DQ256_MASK_ROUND, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_unspec_fix_truncv4sfv4di2_mask_round, "__builtin_ia32_cvttps2qq256_mask_round", IX86_BUILTIN_VCVTTPS2QQ256_MASK_ROUND, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_unspec_fixuns_truncv8sfv8si2_mask_round, "__builtin_ia32_cvttps2udq256_mask_round", IX86_BUILTIN_VCVTTPS2UDQ256_MASK_ROUND, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_unspec_fixuns_truncv4sfv4di2_mask_round, "__builtin_ia32_cvttps2uqq256_mask_round", IX86_BUILTIN_VCVTTPS2UQQ256_MASK_ROUND, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512fp16_vcvtudq2ph_v8si_mask_round, "__builtin_ia32_vcvtudq2ph256_mask_round", IX86_BUILTIN_VCVTUDQ2PH256_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8SI_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_floatunsv8siv8sf2_mask_round, "__builtin_ia32_cvtudq2ps256_mask_round", IX86_BUILTIN_VCVTUDQ2PS256_MASK_ROUND, UNKNOWN, (int) V8SF_FTYPE_V8SI_V8SF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_floatunsv4div4df2_mask_round, "__builtin_ia32_cvtuqq2pd256_mask_round", IX86_BUILTIN_VCVTUQQ2PD256_MASK_ROUND, UNKNOWN, (int) V4DF_FTYPE_V4DI_V4DF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512fp16_vcvtuqq2ph_v4di_mask_round, "__builtin_ia32_vcvtuqq2ph256_mask_round", IX86_BUILTIN_VCVTUQQ2PH256_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V4DI_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_floatunsv4div4sf2_mask_round, "__builtin_ia32_cvtuqq2ps256_mask_round", IX86_BUILTIN_VCVTUQQ2PS256_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4DI_V4SF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512fp16_vcvtuw2ph_v16hi_mask_round, "__builtin_ia32_vcvtuw2ph256_mask_round", IX86_BUILTIN_VCVTUW2PH256_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16HI_V16HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512fp16_vcvtw2ph_v16hi_mask_round, "__builtin_ia32_vcvtw2ph256_mask_round", IX86_BUILTIN_VCVTW2PH256_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16HI_V16HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx_divv4df3_mask_round, "__builtin_ia32_divpd256_mask_round", IX86_BUILTIN_VDIVPD256_MASK_ROUND, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512fp16_divv16hf3_mask_round, "__builtin_ia32_divph256_mask_round", IX86_BUILTIN_VDIVPH256_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx_divv8sf3_mask_round, "__builtin_ia32_divps256_mask_round", IX86_BUILTIN_VDIVPS256_MASK_ROUND, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_fma_fcmaddc_v16hf_round, "__builtin_ia32_vfcmaddcph256_round", IX86_BUILTIN_VFCMADDCPH256_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fcmaddc_v16hf_mask1_round, "__builtin_ia32_vfcmaddcph256_mask_round", IX86_BUILTIN_VFCMADDCPH256_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fcmaddc_v16hf_mask_round, "__builtin_ia32_vfcmaddcph256_mask3_round", IX86_BUILTIN_VFCMADDCPH256_MASK3_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fcmaddc_v16hf_maskz_round, "__builtin_ia32_vfcmaddcph256_maskz_round", IX86_BUILTIN_VFCMADDCPH256_MASKZ_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fcmulc_v16hf_round, "__builtin_ia32_vfcmulcph256_round", IX86_BUILTIN_VFCMULCPH256_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fcmulc_v16hf_mask_round, "__builtin_ia32_vfcmulcph256_mask_round", IX86_BUILTIN_VFCMULCPH256_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fixupimmv4df_mask_round, "__builtin_ia32_fixupimmpd256_mask_round", IX86_BUILTIN_VFIXUPIMMPD256_MASK_ROUND, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fixupimmv4df_maskz_round, "__builtin_ia32_fixupimmpd256_maskz_round", IX86_BUILTIN_VFIXUPIMMPD256_MASKZ_ROUND, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fixupimmv8sf_mask_round, "__builtin_ia32_fixupimmps256_mask_round", IX86_BUILTIN_VFIXUPIMMPS256_MASK_ROUND, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fixupimmv8sf_maskz_round, "__builtin_ia32_fixupimmps256_maskz_round", IX86_BUILTIN_VFIXUPIMMPS256_MASKZ_ROUND, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fmadd_v4df_mask_round, "__builtin_ia32_vfmaddpd256_mask_round", IX86_BUILTIN_VFMADDPD256_MASK_ROUND, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fmadd_v4df_mask3_round, "__builtin_ia32_vfmaddpd256_mask3_round", IX86_BUILTIN_VFMADDPD256_MASK3_ROUND, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fmadd_v4df_maskz_round, "__builtin_ia32_vfmaddpd256_maskz_round", IX86_BUILTIN_VFMADDPD256_MASKZ_ROUND, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fmadd_v16hf_mask_round, "__builtin_ia32_vfmaddph256_mask_round", IX86_BUILTIN_VFMADDPH256_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fmadd_v16hf_mask3_round, "__builtin_ia32_vfmaddph256_mask3_round", IX86_BUILTIN_VFMADDPH512_MASK3_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fmadd_v16hf_maskz_round, "__builtin_ia32_vfmaddph256_maskz_round", IX86_BUILTIN_VFMADDPH256_MASKZ_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fmadd_v8sf_mask_round, "__builtin_ia32_vfmaddps256_mask_round", IX86_BUILTIN_VFMADDPS256_MASK_ROUND, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fmadd_v8sf_mask3_round, "__builtin_ia32_vfmaddps256_mask3_round", IX86_BUILTIN_VFMADDPS512_MASK3_ROUND, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fmadd_v8sf_maskz_round, "__builtin_ia32_vfmaddps256_maskz_round", IX86_BUILTIN_VFMADDPS256_MASKZ_ROUND, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_fma_fmaddc_v16hf_round, "__builtin_ia32_vfmaddcph256_round", IX86_BUILTIN_VFMADDCPH256_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fmaddc_v16hf_mask1_round, "__builtin_ia32_vfmaddcph256_mask_round", IX86_BUILTIN_VFMADDCPH256_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fmaddc_v16hf_mask_round, "__builtin_ia32_vfmaddcph256_mask3_round", IX86_BUILTIN_VFMADDCPH256_MASK3_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fmaddc_v16hf_maskz_round, "__builtin_ia32_vfmaddcph256_maskz_round", IX86_BUILTIN_VFMADDCPH256_MASKZ_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fmaddsub_v4df_mask_round, "__builtin_ia32_vfmaddsubpd256_mask_round", IX86_BUILTIN_VFMADDSUBPD256_MASK_ROUND, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fmaddsub_v4df_mask3_round, "__builtin_ia32_vfmaddsubpd256_mask3_round", IX86_BUILTIN_VFMADDSUBPD256_MASK3_ROUND, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fmaddsub_v4df_maskz_round, "__builtin_ia32_vfmaddsubpd256_maskz_round", IX86_BUILTIN_VFMADDSUBPD256_MASKZ_ROUND, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fmaddsub_v16hf_mask_round, "__builtin_ia32_vfmaddsubph256_mask_round", IX86_BUILTIN_VFMADDSUBPH256_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fmaddsub_v16hf_mask3_round, "__builtin_ia32_vfmaddsubph256_mask3_round", IX86_BUILTIN_VFMADDSUBPH256_MASK3_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fmaddsub_v16hf_maskz_round, "__builtin_ia32_vfmaddsubph256_maskz_round", IX86_BUILTIN_VFMADDSUBPH256_MASKZ_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fmaddsub_v8sf_mask_round, "__builtin_ia32_vfmaddsubps256_mask_round", IX86_BUILTIN_VFMADDSUBPS256_MASK_ROUND, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fmaddsub_v8sf_mask3_round, "__builtin_ia32_vfmaddsubps256_mask3_round", IX86_BUILTIN_VFMADDSUBPS512_MASK3_ROUND, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fmaddsub_v8sf_maskz_round, "__builtin_ia32_vfmaddsubps256_maskz_round", IX86_BUILTIN_VFMADDSUBPS256_MASKZ_ROUND, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fmsub_v4df_mask_round, "__builtin_ia32_vfmsubpd256_mask_round", IX86_BUILTIN_VFMSUBPD256_MASK_ROUND, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fmsub_v4df_mask3_round, "__builtin_ia32_vfmsubpd256_mask3_round", IX86_BUILTIN_VFMSUBPD256_MASK3_ROUND, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fmsub_v4df_maskz_round, "__builtin_ia32_vfmsubpd256_maskz_round", IX86_BUILTIN_VFMSUBPD256_MASKZ_ROUND, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fmsub_v16hf_mask_round, "__builtin_ia32_vfmsubph256_mask_round", IX86_BUILTIN_VFMSUBPH256_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fmsub_v16hf_mask3_round, "__builtin_ia32_vfmsubph256_mask3_round", IX86_BUILTIN_VFMSUBPH256_MASK3_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fmsub_v16hf_maskz_round, "__builtin_ia32_vfmsubph256_maskz_round", IX86_BUILTIN_VFMSUBPH256_MASKZ_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fmsub_v8sf_mask_round, "__builtin_ia32_vfmsubps256_mask_round", IX86_BUILTIN_VFMSUBPS256_MASK_ROUND, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fmsub_v8sf_mask3_round, "__builtin_ia32_vfmsubps256_mask3_round", IX86_BUILTIN_VFMSUBPS512_MASK3_ROUND, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fmsub_v8sf_maskz_round, "__builtin_ia32_vfmsubps256_maskz_round", IX86_BUILTIN_VFMSUBPS256_MASKZ_ROUND, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fmsubadd_v4df_mask_round, "__builtin_ia32_vfmsubaddpd256_mask_round", IX86_BUILTIN_VFMSUBADDPD256_MASK_ROUND, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fmsubadd_v4df_mask3_round, "__builtin_ia32_vfmsubaddpd256_mask3_round", IX86_BUILTIN_VFMSUBADDPD256_MASK3_ROUND, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fmsubadd_v4df_maskz_round, "__builtin_ia32_vfmsubaddpd256_maskz_round", IX86_BUILTIN_VFMSUBADDPD256_MASKZ_ROUND, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fmsubadd_v16hf_mask_round, "__builtin_ia32_vfmsubaddph256_mask_round", IX86_BUILTIN_VFMSUBADDPH256_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fmsubadd_v16hf_mask3_round, "__builtin_ia32_vfmsubaddph256_mask3_round", IX86_BUILTIN_VFMSUBADDPH256_MASK3_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fmsubadd_v16hf_maskz_round, "__builtin_ia32_vfmsubaddph256_maskz_round", IX86_BUILTIN_VFMSUBADDPH256_MASKZ_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fmsubadd_v8sf_mask_round, "__builtin_ia32_vfmsubaddps256_mask_round", IX86_BUILTIN_VFMSUBADDPS256_MASK_ROUND, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fmsubadd_v8sf_mask3_round, "__builtin_ia32_vfmsubaddps256_mask3_round", IX86_BUILTIN_VFMSUBADDPS512_MASK3_ROUND, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fmsubadd_v8sf_maskz_round, "__builtin_ia32_vfmsubaddps256_maskz_round", IX86_BUILTIN_VFMSUBADDPS256_MASKZ_ROUND, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fmulc_v16hf_round, "__builtin_ia32_vfmulcph256_round", IX86_BUILTIN_VFMULCPH256_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fmulc_v16hf_mask_round, "__builtin_ia32_vfmulcph256_mask_round", IX86_BUILTIN_VFMULCPH256_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fnmadd_v4df_mask_round, "__builtin_ia32_vfnmaddpd256_mask_round", IX86_BUILTIN_VFNMADDPD256_MASK_ROUND, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fnmadd_v4df_mask3_round, "__builtin_ia32_vfnmaddpd256_mask3_round", IX86_BUILTIN_VFNMADDPD256_MASK3_ROUND, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fnmadd_v4df_maskz_round, "__builtin_ia32_vfnmaddpd256_maskz_round", IX86_BUILTIN_VFNMADDPD256_MASKZ_ROUND, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fnmadd_v16hf_mask_round, "__builtin_ia32_vfnmaddph256_mask_round", IX86_BUILTIN_VFNMADDPH256_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fnmadd_v16hf_mask3_round, "__builtin_ia32_vfnmaddph256_mask3_round", IX86_BUILTIN_VFNMADDPH256_MASK3_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fnmadd_v16hf_maskz_round, "__builtin_ia32_vfnmaddph256_maskz_round", IX86_BUILTIN_VFNMADDPH256_MASKZ_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fnmadd_v8sf_mask_round, "__builtin_ia32_vfnmaddps256_mask_round", IX86_BUILTIN_VFNMADDPS256_MASK_ROUND, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fnmadd_v8sf_mask3_round, "__builtin_ia32_vfnmaddps256_mask3_round", IX86_BUILTIN_VFNMADDPS512_MASK3_ROUND, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fnmadd_v8sf_maskz_round, "__builtin_ia32_vfnmaddps256_maskz_round", IX86_BUILTIN_VFNMADDPS256_MASKZ_ROUND, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fnmsub_v4df_mask_round, "__builtin_ia32_vfnmsubpd256_mask_round", IX86_BUILTIN_VFNMSUBPD256_MASK_ROUND, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fnmsub_v4df_mask3_round, "__builtin_ia32_vfnmsubpd256_mask3_round", IX86_BUILTIN_VFNMSUBPD256_MASK3_ROUND, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fnmsub_v4df_maskz_round, "__builtin_ia32_vfnmsubpd256_maskz_round", IX86_BUILTIN_VFNMSUBPD256_MASKZ_ROUND, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fnmsub_v16hf_mask_round, "__builtin_ia32_vfnmsubph256_mask_round", IX86_BUILTIN_VFNMSUBPH256_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fnmsub_v16hf_mask3_round, "__builtin_ia32_vfnmsubph256_mask3_round", IX86_BUILTIN_VFNMSUBPH256_MASK3_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fnmsub_v16hf_maskz_round, "__builtin_ia32_vfnmsubph256_maskz_round", IX86_BUILTIN_VFNMSUBPH256_MASKZ_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fnmsub_v8sf_mask_round, "__builtin_ia32_vfnmsubps256_mask_round", IX86_BUILTIN_VFNMSUBPS256_MASK_ROUND, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fnmsub_v8sf_mask3_round, "__builtin_ia32_vfnmsubps256_mask3_round", IX86_BUILTIN_VFNMSUBPS512_MASK3_ROUND, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_fnmsub_v8sf_maskz_round, "__builtin_ia32_vfnmsubps256_maskz_round", IX86_BUILTIN_VFNMSUBPS256_MASKZ_ROUND, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_getexpv4df_mask_round, "__builtin_ia32_getexppd256_mask_round", IX86_BUILTIN_VGETEXPPD256_MASK_ROUND, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_getexpv16hf_mask_round, "__builtin_ia32_getexpph256_mask_round", IX86_BUILTIN_VGETEXPPH256_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_getexpv8sf_mask_round, "__builtin_ia32_getexpps256_mask_round", IX86_BUILTIN_VGETEXPPS256_MASK_ROUND, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_getmantv4df_mask_round, "__builtin_ia32_getmantpd256_mask_round", IX86_BUILTIN_VGETMANTPD256_MASK_ROUND, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_getmantv16hf_mask_round, "__builtin_ia32_getmantph256_mask_round", IX86_BUILTIN_VGETMANTPH256_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16HF_INT_V16HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_getmantv8sf_mask_round, "__builtin_ia32_getmantps256_mask_round", IX86_BUILTIN_VGETMANTPS256_MASK_ROUND, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_smaxv4df3_mask_round, "__builtin_ia32_maxpd256_mask_round", IX86_BUILTIN_VMAXPD256_MASK_ROUND, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_smaxv16hf3_mask_round, "__builtin_ia32_maxph256_mask_round", IX86_BUILTIN_VMAXPH256_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_smaxv8sf3_mask_round, "__builtin_ia32_maxps256_mask_round", IX86_BUILTIN_VMAXPS256_MASK_ROUND, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_sminv4df3_mask_round, "__builtin_ia32_minpd256_mask_round", IX86_BUILTIN_VMINPD256_MASK_ROUND, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_sminv16hf3_mask_round, "__builtin_ia32_minph256_mask_round", IX86_BUILTIN_VMINPH256_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_sminv8sf3_mask_round, "__builtin_ia32_minps256_mask_round", IX86_BUILTIN_VMINPS256_MASK_ROUND, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_mulv4df3_mask_round, "__builtin_ia32_mulpd256_mask_round", IX86_BUILTIN_VMULPD256_MASK_ROUND, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_mulv16hf3_mask_round, "__builtin_ia32_mulph256_mask_round", IX86_BUILTIN_VMULPH256_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_mulv8sf3_mask_round, "__builtin_ia32_mulps256_mask_round", IX86_BUILTIN_VMULPS256_MASK_ROUND, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512dq_rangepv4df_mask_round, "__builtin_ia32_rangepd256_mask_round", IX86_BUILTIN_VRANGEPD256_MASK_ROUND, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512dq_rangepv8sf_mask_round, "__builtin_ia32_rangeps256_mask_round", IX86_BUILTIN_VRANGEPS256_MASK_ROUND, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_reducepv4df_mask_round, "__builtin_ia32_reducepd256_mask_round", IX86_BUILTIN_VREDUCEPD256_MASK_ROUND, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_reducepv16hf_mask_round, "__builtin_ia32_reduceph256_mask_round", IX86_BUILTIN_VREDUCEPH256_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16HF_INT_V16HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_reducepv8sf_mask_round, "__builtin_ia32_reduceps256_mask_round", IX86_BUILTIN_VREDUCEPS256_MASK_ROUND, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_rndscalev4df_mask_round, "__builtin_ia32_rndscalepd256_mask_round", IX86_BUILTIN_VRNDSCALEPD256_MASK_ROUND, UNKNOWN, (int) V4DF_FTYPE_V4DF_INT_V4DF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_rndscalev16hf_mask_round, "__builtin_ia32_rndscaleph256_mask_round", IX86_BUILTIN_VRNDSCALEPH256_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16HF_INT_V16HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_rndscalev8sf_mask_round, "__builtin_ia32_rndscaleps256_mask_round", IX86_BUILTIN_VRNDSCALEPS256_MASK_ROUND, UNKNOWN, (int) V8SF_FTYPE_V8SF_INT_V8SF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_scalefv4df_mask_round, "__builtin_ia32_scalefpd256_mask_round", IX86_BUILTIN_VSCALEFPD256_MASK_ROUND, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_scalefv16hf_mask_round, "__builtin_ia32_scalefph256_mask_round", IX86_BUILTIN_VSCALEFPH256_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx512vl_scalefv8sf_mask_round, "__builtin_ia32_scalefps256_mask_round", IX86_BUILTIN_VSCALEFPS256_MASK_ROUND, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256,  CODE_FOR_avx_sqrtv4df2_mask_round, "__builtin_ia32_sqrtpd256_mask_round", IX86_BUILTIN_VSQRTPD256_MASK_ROUND, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256,  CODE_FOR_avx512fp16_sqrtv16hf2_mask_round, "__builtin_ia32_sqrtph256_mask_round", IX86_BUILTIN_VSQRTPH256_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256,  CODE_FOR_avx_sqrtv8sf2_mask_round, "__builtin_ia32_sqrtps256_mask_round", IX86_BUILTIN_VSQRTPS256_MASK_ROUND, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_subv4df3_mask_round, "__builtin_ia32_subpd256_mask_round", IX86_BUILTIN_VSUBPD256_MASK_ROUND, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_V4DF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_subv16hf3_mask_round, "__builtin_ia32_subph256_mask_round", IX86_BUILTIN_VSUBPH256_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_V16HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_subv8sf3_mask_round, "__builtin_ia32_subps256_mask_round", IX86_BUILTIN_VSUBPS256_MASK_ROUND, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_V8SF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_cvt2ps2phx_v32hf_mask_round, "__builtin_ia32_vcvt2ps2phx512_mask_round", IX86_BUILTIN_VCVT2PS2PHX_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V16SF_V16SF_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvt2ps2phx_v16hf_mask_round, "__builtin_ia32_vcvt2ps2phx256_mask_round", IX86_BUILTIN_VCVT2PS2PHX_V16HF_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V8SF_V8SF_V16HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvtph2ibsv16hf_mask_round, "__builtin_ia32_cvtph2ibs256_mask_round", IX86_BUILTIN_CVTPH2IBS256_MASK_ROUND, UNKNOWN, (int) V16HI_FTYPE_V16HF_V16HI_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_cvtph2ibsv32hf_mask_round, "__builtin_ia32_cvtph2ibs512_mask_round", IX86_BUILTIN_CVTPH2IBS512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvtph2iubsv16hf_mask_round, "__builtin_ia32_cvtph2iubs256_mask_round", IX86_BUILTIN_CVTPH2IUBS256_MASK_ROUND, UNKNOWN, (int) V16HI_FTYPE_V16HF_V16HI_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_cvtph2iubsv32hf_mask_round, "__builtin_ia32_cvtph2iubs512_mask_round", IX86_BUILTIN_CVTPH2IUBS512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvtps2ibsv8sf_mask_round, "__builtin_ia32_cvtps2ibs256_mask_round", IX86_BUILTIN_CVTPS2IBS256_MASK_ROUND, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_cvtps2ibsv16sf_mask_round, "__builtin_ia32_cvtps2ibs512_mask_round", IX86_BUILTIN_CVTPS2IBS512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvtps2iubsv8sf_mask_round, "__builtin_ia32_cvtps2iubs256_mask_round", IX86_BUILTIN_CVTPS2IUBS256_MASK_ROUND, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_cvtps2iubsv16sf_mask_round, "__builtin_ia32_cvtps2iubs512_mask_round", IX86_BUILTIN_CVTPS2IUBS512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvttph2ibsv16hf_mask_round, "__builtin_ia32_cvttph2ibs256_mask_round", IX86_BUILTIN_CVTTPH2IBS256_MASK_ROUND, UNKNOWN, (int) V16HI_FTYPE_V16HF_V16HI_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_cvttph2ibsv32hf_mask_round, "__builtin_ia32_cvttph2ibs512_mask_round", IX86_BUILTIN_CVTTPH2IBS512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvttph2iubsv16hf_mask_round, "__builtin_ia32_cvttph2iubs256_mask_round", IX86_BUILTIN_CVTTPH2IUBS256_MASK_ROUND, UNKNOWN, (int) V16HI_FTYPE_V16HF_V16HI_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_cvttph2iubsv32hf_mask_round, "__builtin_ia32_cvttph2iubs512_mask_round", IX86_BUILTIN_CVTTPH2IUBS512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvttps2ibsv8sf_mask_round, "__builtin_ia32_cvttps2ibs256_mask_round", IX86_BUILTIN_CVTTPS2IBS256_MASK_ROUND, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_cvttps2ibsv16sf_mask_round, "__builtin_ia32_cvttps2ibs512_mask_round", IX86_BUILTIN_CVTTPS2IBS512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_cvttps2iubsv8sf_mask_round, "__builtin_ia32_cvttps2iubs256_mask_round", IX86_BUILTIN_CVTTPS2IUBS256_MASK_ROUND, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_cvttps2iubsv16sf_mask_round, "__builtin_ia32_cvttps2iubs512_mask_round", IX86_BUILTIN_CVTTPS2IUBS512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttpd2dqsv4df_mask_round, "__builtin_ia32_cvttpd2dqs256_mask_round", IX86_BUILTIN_VCVTTPD2DQS256_MASK_ROUND, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_vcvttpd2dqsv8df_mask_round, "__builtin_ia32_cvttpd2dqs512_mask_round", IX86_BUILTIN_VCVTTPD2DQS512_MASK_ROUND, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttpd2qqsv4df_mask_round, "__builtin_ia32_cvttpd2qqs256_mask_round", IX86_BUILTIN_VCVTTPD2QQS256_MASK_ROUND, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_vcvttpd2qqsv8df_mask_round, "__builtin_ia32_cvttpd2qqs512_mask_round", IX86_BUILTIN_VCVTTPD2QQS512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttpd2udqsv4df_mask_round, "__builtin_ia32_cvttpd2udqs256_mask_round", IX86_BUILTIN_VCVTTPD2UDQS256_MASK_ROUND, UNKNOWN, (int) V4SI_FTYPE_V4DF_V4SI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_vcvttpd2udqsv8df_mask_round, "__builtin_ia32_cvttpd2udqs512_mask_round", IX86_BUILTIN_VCVTTPD2UDQS512_MASK_ROUND, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttpd2uqqsv4df_mask_round, "__builtin_ia32_cvttpd2uqqs256_mask_round", IX86_BUILTIN_VCVTTPD2UQQS256_MASK_ROUND, UNKNOWN, (int) V4DI_FTYPE_V4DF_V4DI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_vcvttpd2uqqsv8df_mask_round, "__builtin_ia32_cvttpd2uqqs512_mask_round", IX86_BUILTIN_VCVTTPD2UQQS512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttps2dqsv8sf_mask_round, "__builtin_ia32_cvttps2dqs256_mask_round", IX86_BUILTIN_VCVTTPS2DQS256_MASK_ROUND, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_vcvttps2dqsv16sf_mask_round, "__builtin_ia32_cvttps2dqs512_mask_round", IX86_BUILTIN_VCVTTPS2DQS512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttps2qqsv4di_mask_round, "__builtin_ia32_cvttps2qqs256_mask_round", IX86_BUILTIN_VCVTTPS2QQS256_MASK_ROUND, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_vcvttps2qqsv8di_mask_round, "__builtin_ia32_cvttps2qqs512_mask_round", IX86_BUILTIN_VCVTTPS2QQS512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttps2udqsv8sf_mask_round, "__builtin_ia32_cvttps2udqs256_mask_round", IX86_BUILTIN_VCVTTPS2UDQS256_MASK_ROUND, UNKNOWN, (int) V8SI_FTYPE_V8SF_V8SI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_vcvttps2udqsv16sf_mask_round, "__builtin_ia32_cvttps2udqs512_mask_round", IX86_BUILTIN_VCVTTPS2UDQS512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttps2uqqsv4di_mask_round, "__builtin_ia32_cvttps2uqqs256_mask_round", IX86_BUILTIN_VCVTTPS2UQQS256_MASK_ROUND, UNKNOWN, (int) V4DI_FTYPE_V4SF_V4DI_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_vcvttps2uqqsv8di_mask_round, "__builtin_ia32_cvttps2uqqs512_mask_round", IX86_BUILTIN_VCVTTPS2UQQS512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttsd2sissi_round, "__builtin_ia32_cvttsd2sis32_round", IX86_BUILTIN_VCVTTSD2SIS32_ROUND, UNKNOWN, (int) INT_FTYPE_V2DF_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttsd2sisdi_round, "__builtin_ia32_cvttsd2sis64_round", IX86_BUILTIN_VCVTTSD2SIS64_ROUND, UNKNOWN, (int) INT64_FTYPE_V2DF_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttsd2usissi_round, "__builtin_ia32_cvttsd2usis32_round", IX86_BUILTIN_VCVTTSD2USIS32_ROUND, UNKNOWN, (int) INT_FTYPE_V2DF_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttsd2usisdi_round, "__builtin_ia32_cvttsd2usis64_round", IX86_BUILTIN_VCVTTSD2USIS64_ROUND, UNKNOWN, (int) INT64_FTYPE_V2DF_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttss2sissi_round, "__builtin_ia32_cvttss2sis32_round", IX86_BUILTIN_VCVTTSS2SIS32_ROUND, UNKNOWN, (int) INT_FTYPE_V4SF_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttss2sisdi_round, "__builtin_ia32_cvttss2sis64_round", IX86_BUILTIN_VCVTTSS2SIS64_ROUND, UNKNOWN, (int) INT64_FTYPE_V4SF_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttss2usissi_round, "__builtin_ia32_cvttss2usis32_round", IX86_BUILTIN_VCVTTSS2USIS32_ROUND, UNKNOWN, (int) INT_FTYPE_V4SF_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_vcvttss2usisdi_round, "__builtin_ia32_cvttss2usis64_round", IX86_BUILTIN_VCVTTSS2USIS64_ROUND, UNKNOWN, (int) INT64_FTYPE_V4SF_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_minmaxpv8df_mask_round, "__builtin_ia32_minmaxpd512_mask_round", IX86_BUILTIN_MINMAXPD512_MASK_ROUND, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_minmaxpv32hf_mask_round, "__builtin_ia32_minmaxph512_mask_round", IX86_BUILTIN_MINMAXPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_INT_V32HF_USI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_512, CODE_FOR_avx10_2_minmaxpv16sf_mask_round, "__builtin_ia32_minmaxps512_mask_round", IX86_BUILTIN_MINMAXPS512_MASK_ROUND, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxpv4df_mask_round, "__builtin_ia32_minmaxpd256_mask_round", IX86_BUILTIN_MINMAXPD256_MASK_ROUND, UNKNOWN, (int) V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxpv16hf_mask_round, "__builtin_ia32_minmaxph256_mask_round", IX86_BUILTIN_MINMAXPH256_MASK_ROUND, UNKNOWN, (int) V16HF_FTYPE_V16HF_V16HF_INT_V16HF_UHI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxpv8sf_mask_round, "__builtin_ia32_minmaxps256_mask_round", IX86_BUILTIN_MINMAXPS256_MASK_ROUND, UNKNOWN, (int) V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxsv2df_mask_round, "__builtin_ia32_minmaxsd_mask_round", IX86_BUILTIN_MINMAXSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxsv8hf_mask_round, "__builtin_ia32_minmaxsh_mask_round", IX86_BUILTIN_MINMAXSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI_INT)
-BDESC (0, OPTION_MASK_ISA2_AVX10_2_256, CODE_FOR_avx10_2_minmaxsv4sf_mask_round, "__builtin_ia32_minmaxss_mask_round", IX86_BUILTIN_MINMAXSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_cvt2ps2phx_v32hf_mask_round, "__builtin_ia32_vcvt2ps2phx512_mask_round", IX86_BUILTIN_VCVT2PS2PHX_V32HF_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V16SF_V16SF_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_cvtph2ibsv32hf_mask_round, "__builtin_ia32_cvtph2ibs512_mask_round", IX86_BUILTIN_CVTPH2IBS512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_cvtph2iubsv32hf_mask_round, "__builtin_ia32_cvtph2iubs512_mask_round", IX86_BUILTIN_CVTPH2IUBS512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_cvtps2ibsv16sf_mask_round, "__builtin_ia32_cvtps2ibs512_mask_round", IX86_BUILTIN_CVTPS2IBS512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_cvtps2iubsv16sf_mask_round, "__builtin_ia32_cvtps2iubs512_mask_round", IX86_BUILTIN_CVTPS2IUBS512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_cvttph2ibsv32hf_mask_round, "__builtin_ia32_cvttph2ibs512_mask_round", IX86_BUILTIN_CVTTPH2IBS512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_cvttph2iubsv32hf_mask_round, "__builtin_ia32_cvttph2iubs512_mask_round", IX86_BUILTIN_CVTTPH2IUBS512_MASK_ROUND, UNKNOWN, (int) V32HI_FTYPE_V32HF_V32HI_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_cvttps2ibsv16sf_mask_round, "__builtin_ia32_cvttps2ibs512_mask_round", IX86_BUILTIN_CVTTPS2IBS512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_cvttps2iubsv16sf_mask_round, "__builtin_ia32_cvttps2iubs512_mask_round", IX86_BUILTIN_CVTTPS2IUBS512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vcvttpd2dqsv8df_mask_round, "__builtin_ia32_cvttpd2dqs512_mask_round", IX86_BUILTIN_VCVTTPD2DQS512_MASK_ROUND, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vcvttpd2qqsv8df_mask_round, "__builtin_ia32_cvttpd2qqs512_mask_round", IX86_BUILTIN_VCVTTPD2QQS512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vcvttpd2udqsv8df_mask_round, "__builtin_ia32_cvttpd2udqs512_mask_round", IX86_BUILTIN_VCVTTPD2UDQS512_MASK_ROUND, UNKNOWN, (int) V8SI_FTYPE_V8DF_V8SI_QI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vcvttpd2uqqsv8df_mask_round, "__builtin_ia32_cvttpd2uqqs512_mask_round", IX86_BUILTIN_VCVTTPD2UQQS512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8DF_V8DI_QI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vcvttps2dqsv16sf_mask_round, "__builtin_ia32_cvttps2dqs512_mask_round", IX86_BUILTIN_VCVTTPS2DQS512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vcvttps2qqsv8di_mask_round, "__builtin_ia32_cvttps2qqs512_mask_round", IX86_BUILTIN_VCVTTPS2QQS512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vcvttps2udqsv16sf_mask_round, "__builtin_ia32_cvttps2udqs512_mask_round", IX86_BUILTIN_VCVTTPS2UDQS512_MASK_ROUND, UNKNOWN, (int) V16SI_FTYPE_V16SF_V16SI_HI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vcvttps2uqqsv8di_mask_round, "__builtin_ia32_cvttps2uqqs512_mask_round", IX86_BUILTIN_VCVTTPS2UQQS512_MASK_ROUND, UNKNOWN, (int) V8DI_FTYPE_V8SF_V8DI_QI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vcvttsd2sissi_round, "__builtin_ia32_cvttsd2sis32_round", IX86_BUILTIN_VCVTTSD2SIS32_ROUND, UNKNOWN, (int) INT_FTYPE_V2DF_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vcvttsd2sisdi_round, "__builtin_ia32_cvttsd2sis64_round", IX86_BUILTIN_VCVTTSD2SIS64_ROUND, UNKNOWN, (int) INT64_FTYPE_V2DF_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vcvttsd2usissi_round, "__builtin_ia32_cvttsd2usis32_round", IX86_BUILTIN_VCVTTSD2USIS32_ROUND, UNKNOWN, (int) INT_FTYPE_V2DF_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vcvttsd2usisdi_round, "__builtin_ia32_cvttsd2usis64_round", IX86_BUILTIN_VCVTTSD2USIS64_ROUND, UNKNOWN, (int) INT64_FTYPE_V2DF_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vcvttss2sissi_round, "__builtin_ia32_cvttss2sis32_round", IX86_BUILTIN_VCVTTSS2SIS32_ROUND, UNKNOWN, (int) INT_FTYPE_V4SF_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vcvttss2sisdi_round, "__builtin_ia32_cvttss2sis64_round", IX86_BUILTIN_VCVTTSS2SIS64_ROUND, UNKNOWN, (int) INT64_FTYPE_V4SF_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vcvttss2usissi_round, "__builtin_ia32_cvttss2usis32_round", IX86_BUILTIN_VCVTTSS2USIS32_ROUND, UNKNOWN, (int) INT_FTYPE_V4SF_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_vcvttss2usisdi_round, "__builtin_ia32_cvttss2usis64_round", IX86_BUILTIN_VCVTTSS2USIS64_ROUND, UNKNOWN, (int) INT64_FTYPE_V4SF_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_minmaxpv8df_mask_round, "__builtin_ia32_minmaxpd512_mask_round", IX86_BUILTIN_MINMAXPD512_MASK_ROUND, UNKNOWN, (int) V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_minmaxpv32hf_mask_round, "__builtin_ia32_minmaxph512_mask_round", IX86_BUILTIN_MINMAXPH512_MASK_ROUND, UNKNOWN, (int) V32HF_FTYPE_V32HF_V32HF_INT_V32HF_USI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_minmaxpv16sf_mask_round, "__builtin_ia32_minmaxps512_mask_round", IX86_BUILTIN_MINMAXPS512_MASK_ROUND, UNKNOWN, (int) V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_minmaxsv2df_mask_round, "__builtin_ia32_minmaxsd_mask_round", IX86_BUILTIN_MINMAXSD_MASK_ROUND, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_minmaxsv8hf_mask_round, "__builtin_ia32_minmaxsh_mask_round", IX86_BUILTIN_MINMAXSH_MASK_ROUND, UNKNOWN, (int) V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI_INT)
+BDESC (0, OPTION_MASK_ISA2_AVX10_2, CODE_FOR_avx10_2_minmaxsv4sf_mask_round, "__builtin_ia32_minmaxss_mask_round", IX86_BUILTIN_MINMAXSS_MASK_ROUND, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF_INT_V4SF_UQI_INT)
 
 BDESC_END (ROUND_ARGS, MULTI_ARG)
 
diff --git a/gcc/config/i386/i386-builtins.cc b/gcc/config/i386/i386-builtins.cc
index 4286eeb..4835b94 100644
--- a/gcc/config/i386/i386-builtins.cc
+++ b/gcc/config/i386/i386-builtins.cc
@@ -1,4 +1,4 @@
-/* Copyright (C) 1988-2024 Free Software Foundation, Inc.
+/* Copyright (C) 1988-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -801,102 +801,102 @@ ix86_init_mmx_sse_builtins (void)
 		    IX86_BUILTIN_GATHERALTDIV8SI);
 
   /* AVX512F */
-  def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+  def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0,
 		    "__builtin_ia32_gathersiv16sf",
 		    V16SF_FTYPE_V16SF_PCVOID_V16SI_HI_INT,
 		    IX86_BUILTIN_GATHER3SIV16SF);
 
-  def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+  def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0,
 		    "__builtin_ia32_gathersiv8df",
 		    V8DF_FTYPE_V8DF_PCVOID_V8SI_QI_INT,
 		    IX86_BUILTIN_GATHER3SIV8DF);
 
-  def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+  def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0,
 		    "__builtin_ia32_gatherdiv16sf",
 		    V8SF_FTYPE_V8SF_PCVOID_V8DI_QI_INT,
 		    IX86_BUILTIN_GATHER3DIV16SF);
 
-  def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+  def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0,
 		    "__builtin_ia32_gatherdiv8df",
 		    V8DF_FTYPE_V8DF_PCVOID_V8DI_QI_INT,
 		    IX86_BUILTIN_GATHER3DIV8DF);
 
-  def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+  def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0,
 		    "__builtin_ia32_gathersiv16si",
 		    V16SI_FTYPE_V16SI_PCVOID_V16SI_HI_INT,
 		    IX86_BUILTIN_GATHER3SIV16SI);
 
-  def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+  def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0,
 		    "__builtin_ia32_gathersiv8di",
 		    V8DI_FTYPE_V8DI_PCVOID_V8SI_QI_INT,
 		    IX86_BUILTIN_GATHER3SIV8DI);
 
-  def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+  def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0,
 		    "__builtin_ia32_gatherdiv16si",
 		    V8SI_FTYPE_V8SI_PCVOID_V8DI_QI_INT,
 		    IX86_BUILTIN_GATHER3DIV16SI);
 
-  def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+  def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0,
 		    "__builtin_ia32_gatherdiv8di",
 		    V8DI_FTYPE_V8DI_PCVOID_V8DI_QI_INT,
 		    IX86_BUILTIN_GATHER3DIV8DI);
 
-  def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+  def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0,
 		    "__builtin_ia32_gather3altsiv8df ",
 		    V8DF_FTYPE_V8DF_PCDOUBLE_V16SI_QI_INT,
 		    IX86_BUILTIN_GATHER3ALTSIV8DF);
 
-  def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+  def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0,
 		    "__builtin_ia32_gather3altdiv16sf ",
 		    V16SF_FTYPE_V16SF_PCFLOAT_V8DI_HI_INT,
 		    IX86_BUILTIN_GATHER3ALTDIV16SF);
 
-  def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+  def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0,
 		    "__builtin_ia32_gather3altsiv8di ",
 		    V8DI_FTYPE_V8DI_PCINT64_V16SI_QI_INT,
 		    IX86_BUILTIN_GATHER3ALTSIV8DI);
 
-  def_builtin_pure (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+  def_builtin_pure (OPTION_MASK_ISA_AVX512F, 0,
 		    "__builtin_ia32_gather3altdiv16si ",
 		    V16SI_FTYPE_V16SI_PCINT_V8DI_HI_INT,
 		    IX86_BUILTIN_GATHER3ALTDIV16SI);
 
-  def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+  def_builtin (OPTION_MASK_ISA_AVX512F, 0,
 	       "__builtin_ia32_scattersiv16sf",
 	       VOID_FTYPE_PVOID_HI_V16SI_V16SF_INT,
 	       IX86_BUILTIN_SCATTERSIV16SF);
 
-  def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+  def_builtin (OPTION_MASK_ISA_AVX512F, 0,
 	       "__builtin_ia32_scattersiv8df",
 	       VOID_FTYPE_PVOID_QI_V8SI_V8DF_INT,
 	       IX86_BUILTIN_SCATTERSIV8DF);
 
-  def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+  def_builtin (OPTION_MASK_ISA_AVX512F, 0,
 	       "__builtin_ia32_scatterdiv16sf",
 	       VOID_FTYPE_PVOID_QI_V8DI_V8SF_INT,
 	       IX86_BUILTIN_SCATTERDIV16SF);
 
-  def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+  def_builtin (OPTION_MASK_ISA_AVX512F, 0,
 	       "__builtin_ia32_scatterdiv8df",
 	       VOID_FTYPE_PVOID_QI_V8DI_V8DF_INT,
 	       IX86_BUILTIN_SCATTERDIV8DF);
 
-  def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+  def_builtin (OPTION_MASK_ISA_AVX512F, 0,
 	       "__builtin_ia32_scattersiv16si",
 	       VOID_FTYPE_PVOID_HI_V16SI_V16SI_INT,
 	       IX86_BUILTIN_SCATTERSIV16SI);
 
-  def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+  def_builtin (OPTION_MASK_ISA_AVX512F, 0,
 	       "__builtin_ia32_scattersiv8di",
 	       VOID_FTYPE_PVOID_QI_V8SI_V8DI_INT,
 	       IX86_BUILTIN_SCATTERSIV8DI);
 
-  def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+  def_builtin (OPTION_MASK_ISA_AVX512F, 0,
 	       "__builtin_ia32_scatterdiv16si",
 	       VOID_FTYPE_PVOID_QI_V8DI_V8SI_INT,
 	       IX86_BUILTIN_SCATTERDIV16SI);
 
-  def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+  def_builtin (OPTION_MASK_ISA_AVX512F, 0,
 	       "__builtin_ia32_scatterdiv8di",
 	       VOID_FTYPE_PVOID_QI_V8DI_V8DI_INT,
 	       IX86_BUILTIN_SCATTERDIV8DI);
@@ -1046,22 +1046,22 @@ ix86_init_mmx_sse_builtins (void)
 	       VOID_FTYPE_PVOID_QI_V2DI_V2DI_INT,
 	       IX86_BUILTIN_SCATTERDIV2DI);
 
-  def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+  def_builtin (OPTION_MASK_ISA_AVX512F, 0,
 	       "__builtin_ia32_scatteraltsiv8df ",
 	       VOID_FTYPE_PDOUBLE_QI_V16SI_V8DF_INT,
 	       IX86_BUILTIN_SCATTERALTSIV8DF);
 
-  def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+  def_builtin (OPTION_MASK_ISA_AVX512F, 0,
 	       "__builtin_ia32_scatteraltdiv16sf ",
 	       VOID_FTYPE_PFLOAT_HI_V8DI_V16SF_INT,
 	       IX86_BUILTIN_SCATTERALTDIV16SF);
 
-  def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+  def_builtin (OPTION_MASK_ISA_AVX512F, 0,
 	       "__builtin_ia32_scatteraltsiv8di ",
 	       VOID_FTYPE_PLONGLONG_QI_V16SI_V8DI_INT,
 	       IX86_BUILTIN_SCATTERALTSIV8DI);
 
-  def_builtin (OPTION_MASK_ISA_AVX512F, OPTION_MASK_ISA2_EVEX512,
+  def_builtin (OPTION_MASK_ISA_AVX512F, 0,
 	       "__builtin_ia32_scatteraltdiv16si ",
 	       VOID_FTYPE_PINT_HI_V8DI_V16SI_INT,
 	       IX86_BUILTIN_SCATTERALTDIV16SI);
@@ -1676,7 +1676,7 @@ ix86_vectorize_builtin_gather (const_tree mem_vectype,
   enum ix86_builtins code;
   const machine_mode mode = TYPE_MODE (TREE_TYPE (mem_vectype));
 
-  if ((!TARGET_AVX512F || !TARGET_EVEX512) && GET_MODE_SIZE (mode) == 64)
+  if (!TARGET_AVX512F && GET_MODE_SIZE (mode) == 64)
     return NULL_TREE;
 
   if (! TARGET_AVX2
@@ -1849,7 +1849,7 @@ get_builtin_code_for_version (tree decl, tree *predicate_list)
       target_node
 	= ix86_valid_target_attribute_tree (decl, attrs, &global_options,
 					    &global_options_set, 0);
-    
+
       gcc_assert (target_node);
       if (target_node == error_mark_node)
 	return 0;
@@ -1932,14 +1932,14 @@ get_builtin_code_for_version (tree decl, tree *predicate_list)
 
       cl_target_option_restore (&global_options, &global_options_set,
 				&cur_target);
-	
+
       if (predicate_list && arg_str == NULL)
 	{
 	  error_at (DECL_SOURCE_LOCATION (decl),
 		    "no dispatcher found for the versioning attributes");
 	  return 0;
 	}
-    
+
       if (predicate_list)
 	{
 	  predicate_decl = ix86_builtins [(int) builtin_fn];
@@ -2007,7 +2007,7 @@ get_builtin_code_for_version (tree decl, tree *predicate_list)
       *predicate_list = predicate_chain;
     }
 
-  return priority; 
+  return priority;
 }
 
 /* This builds the processor_model struct type defined in
diff --git a/gcc/config/i386/i386-builtins.h b/gcc/config/i386/i386-builtins.h
index cd51e23..7b3eaaf 100644
--- a/gcc/config/i386/i386-builtins.h
+++ b/gcc/config/i386/i386-builtins.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 1988-2024 Free Software Foundation, Inc.
+/* Copyright (C) 1988-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/i386/i386-c.cc b/gcc/config/i386/i386-c.cc
index 72435fe..457aa05 100644
--- a/gcc/config/i386/i386-c.cc
+++ b/gcc/config/i386/i386-c.cc
@@ -1,5 +1,5 @@
 /* Subroutines used for macro/preprocessor support on the ia-32.
-   Copyright (C) 2008-2024 Free Software Foundation, Inc.
+   Copyright (C) 2008-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -291,6 +291,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
       def_or_undef (parse_in, "__pantherlake");
       def_or_undef (parse_in, "__pantherlake__");
       break;
+    case PROCESSOR_DIAMONDRAPIDS:
+      def_or_undef (parse_in, "__diamondrapids");
+      def_or_undef (parse_in, "__diamondrapids__");
+      break;
 
     /* use PROCESSOR_max to not set/unset the arch macro.  */
     case PROCESSOR_max:
@@ -491,6 +495,9 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
     case PROCESSOR_PANTHERLAKE:
       def_or_undef (parse_in, "__tune_pantherlake__");
       break;
+    case PROCESSOR_DIAMONDRAPIDS:
+      def_or_undef (parse_in, "__tune_diamondrapids__");
+      break;
     case PROCESSOR_INTEL:
     case PROCESSOR_GENERIC:
       break;
@@ -722,25 +729,28 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
     def_or_undef (parse_in, "__SHA512__");
   if (isa_flag2 & OPTION_MASK_ISA2_SM4)
     def_or_undef (parse_in, "__SM4__");
-  if (isa_flag2 & OPTION_MASK_ISA2_EVEX512)
-    def_or_undef (parse_in, "__EVEX512__");
   if (isa_flag2 & OPTION_MASK_ISA2_USER_MSR)
     def_or_undef (parse_in, "__USER_MSR__");
-  if (isa_flag2 & OPTION_MASK_ISA2_AVX10_1_256)
-    {
-      def_or_undef (parse_in, "__AVX10_1_256__");
-      def_or_undef (parse_in, "__AVX10_1__");
-    }
-  if (isa_flag2 & OPTION_MASK_ISA2_AVX10_1_512)
-    def_or_undef (parse_in, "__AVX10_1_512__");
+  if (isa_flag2 & OPTION_MASK_ISA2_AVX10_1)
+    def_or_undef (parse_in, "__AVX10_1__");
   if (isa_flag2 & OPTION_MASK_ISA2_APX_F)
     def_or_undef (parse_in, "__APX_F__");
   if (ix86_apx_inline_asm_use_gpr32)
     def_or_undef (parse_in, "__APX_INLINE_ASM_USE_GPR32__");
-  if (isa_flag2 & OPTION_MASK_ISA2_AVX10_2_256)
-    def_or_undef (parse_in, "__AVX10_2_256__");
-  if (isa_flag2 & OPTION_MASK_ISA2_AVX10_2_512)
-    def_or_undef (parse_in, "__AVX10_2_512__");
+  if (isa_flag2 & OPTION_MASK_ISA2_AVX10_2)
+    def_or_undef (parse_in, "__AVX10_2__");
+  if (isa_flag2 & OPTION_MASK_ISA2_AMX_AVX512)
+    def_or_undef (parse_in, "__AMX_AVX512__");
+  if (isa_flag2 & OPTION_MASK_ISA2_AMX_TF32)
+    def_or_undef (parse_in, "__AMX_TF32__");
+  if (isa_flag2 & OPTION_MASK_ISA2_AMX_TRANSPOSE)
+    def_or_undef (parse_in, "__AMX_TRANSPOSE__");
+  if (isa_flag2 & OPTION_MASK_ISA2_AMX_FP8)
+    def_or_undef (parse_in, "__AMX_FP8__");
+  if (isa_flag2 & OPTION_MASK_ISA2_MOVRS)
+    def_or_undef (parse_in, "__MOVRS__");
+  if (isa_flag2 & OPTION_MASK_ISA2_AMX_MOVRS)
+    def_or_undef (parse_in, "__AMX_MOVRS__");
   if (TARGET_IAMCU)
     {
       def_or_undef (parse_in, "__iamcu");
diff --git a/gcc/config/i386/i386-d.cc b/gcc/config/i386/i386-d.cc
index 36e1433..f61b5a5 100644
--- a/gcc/config/i386/i386-d.cc
+++ b/gcc/config/i386/i386-d.cc
@@ -1,5 +1,5 @@
 /* Subroutines for the D front end on the x86 architecture.
-   Copyright (C) 2017-2024 Free Software Foundation, Inc.
+   Copyright (C) 2017-2025 Free Software Foundation, Inc.
 
 GCC is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -44,6 +44,9 @@ ix86_d_target_versions (void)
     d_add_builtin_version ("D_HardFloat");
   else
     d_add_builtin_version ("D_SoftFloat");
+
+  if (flag_cf_protection != CF_NONE)
+    d_add_builtin_version ("GNU_CET");
 }
 
 /* Handle a call to `__traits(getTargetInfo, "floatAbi")'.  */
@@ -79,6 +82,14 @@ ix86_d_handle_target_object_format (void)
   return build_string_literal (strlen (objfmt) + 1, objfmt);
 }
 
+/* Handle a call to `__traits(getTargetInfo, "CET")'.  */
+
+static tree
+ix86_d_handle_target_cf_protection (void)
+{
+  return build_int_cst_type (uint32_type_node, flag_cf_protection & ~CF_SET);
+}
+
 /* Implement TARGET_D_REGISTER_CPU_TARGET_INFO.  */
 
 void
@@ -87,6 +98,7 @@ ix86_d_register_target_info (void)
   const struct d_target_info_spec handlers[] = {
     { "floatAbi", ix86_d_handle_target_float_abi },
     { "objectFormat", ix86_d_handle_target_object_format },
+    { "CET", ix86_d_handle_target_cf_protection },
     { NULL, NULL },
   };
 
diff --git a/gcc/config/i386/i386-d.h b/gcc/config/i386/i386-d.h
index 5d233ff..f2f6bcb 100644
--- a/gcc/config/i386/i386-d.h
+++ b/gcc/config/i386/i386-d.h
@@ -1,5 +1,5 @@
 /* Definitions for the D front end on the x86 architecture.
-   Copyright (C) 2022-2024 Free Software Foundation, Inc.
+   Copyright (C) 2022-2025 Free Software Foundation, Inc.
 
 GCC is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
diff --git a/gcc/config/i386/i386-expand.cc b/gcc/config/i386/i386-expand.cc
index d692008..8f15c1c 100644
--- a/gcc/config/i386/i386-expand.cc
+++ b/gcc/config/i386/i386-expand.cc
@@ -1,4 +1,4 @@
-/* Copyright (C) 1988-2024 Free Software Foundation, Inc.
+/* Copyright (C) 1988-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -1009,7 +1009,7 @@ ix86_expand_vector_move_misalign (machine_mode mode, rtx operands[])
 	    t = gen_reg_rtx (V4SFmode);
 	  else
 	    t = op0;
-	    
+
 	  if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
 	    emit_move_insn (t, CONST0_RTX (V4SFmode));
 	  else
@@ -1684,7 +1684,7 @@ ix86_emit_binop (enum rtx_code code, machine_mode mode,
 
   op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, mode, dst, src));
   clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
-  
+
   emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
 }
 
@@ -2530,6 +2530,10 @@ ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
       emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
       return;
 
+    case E_BFmode:
+      gcc_assert (TARGET_AVX10_2 && !flag_trapping_math);
+      goto simple;
+
     case E_DImode:
       if (TARGET_64BIT)
 	goto simple;
@@ -2796,9 +2800,9 @@ ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
   bool unordered_compare = ix86_unordered_fp_compare (code);
   rtx op0 = *pop0, op1 = *pop1;
   machine_mode op_mode = GET_MODE (op0);
-  bool is_sse = SSE_FLOAT_MODE_SSEMATH_OR_HF_P (op_mode);
+  bool is_sse = SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (op_mode);
 
-  if (op_mode == BFmode)
+  if (op_mode == BFmode && (!TARGET_AVX10_2 || flag_trapping_math))
     {
       rtx op = gen_lowpart (HImode, op0);
       if (CONST_INT_P (op))
@@ -2916,6 +2920,15 @@ ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1)
   switch (ix86_fp_comparison_strategy (code))
     {
     case IX86_FPCMP_COMI:
+      tmp = gen_rtx_COMPARE (CCFPmode, op0, op1);
+      /* We only have vcomisbf16, No vcomubf16 nor vcomxbf16 */
+      if (GET_MODE (op0) != E_BFmode)
+	{
+	  if (TARGET_AVX10_2 && (code == EQ || code == NE))
+	    tmp = gen_rtx_UNSPEC (CCFPmode, gen_rtvec (1, tmp), UNSPEC_OPTCOMX);
+	  if (unordered_compare)
+	    tmp = gen_rtx_UNSPEC (CCFPmode, gen_rtvec (1, tmp), UNSPEC_NOTRAP);
+	}
       cmp_mode = CCFPmode;
       emit_insn (gen_rtx_SET (gen_rtx_REG (CCFPmode, FLAGS_REG), tmp));
       break;
@@ -3090,6 +3103,8 @@ ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
       && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0))) == 16)
     {
       tmp = SUBREG_REG (op0);
+      if (GET_MODE (tmp) == V8HFmode || GET_MODE (tmp) == V8BFmode)
+	tmp = gen_lowpart (V8HImode, tmp);
       tmp = gen_rtx_UNSPEC (CCZmode, gen_rtvec (2, tmp, tmp), UNSPEC_PTEST);
     }
   else
@@ -3139,12 +3154,17 @@ ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
    dest = op0 == op1 ? 0 : op0 < op1 ? -1 : op0 > op1 ? 1 : 2.  */
 
 void
-ix86_expand_fp_spaceship (rtx dest, rtx op0, rtx op1)
+ix86_expand_fp_spaceship (rtx dest, rtx op0, rtx op1, rtx op2)
 {
   gcc_checking_assert (ix86_fp_comparison_strategy (GT) != IX86_FPCMP_ARITH);
+  rtx zero = NULL_RTX;
+  if (op2 != const0_rtx
+      && (TARGET_IEEE_FP || TARGET_ZERO_EXTEND_WITH_AND)
+      && GET_MODE (dest) == SImode)
+    zero = force_reg (SImode, const0_rtx);
   rtx gt = ix86_expand_fp_compare (GT, op0, op1);
-  rtx l0 = gen_label_rtx ();
-  rtx l1 = gen_label_rtx ();
+  rtx l0 = op2 == const0_rtx ? gen_label_rtx () : NULL_RTX;
+  rtx l1 = op2 == const0_rtx ? gen_label_rtx () : NULL_RTX;
   rtx l2 = TARGET_IEEE_FP ? gen_label_rtx () : NULL_RTX;
   rtx lend = gen_label_rtx ();
   rtx tmp;
@@ -3158,32 +3178,185 @@ ix86_expand_fp_spaceship (rtx dest, rtx op0, rtx op1)
       jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
       add_reg_br_prob_note (jmp, profile_probability:: very_unlikely ());
     }
-  rtx eq = gen_rtx_fmt_ee (UNEQ, VOIDmode,
-			   gen_rtx_REG (CCFPmode, FLAGS_REG), const0_rtx);
-  tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, eq,
-			      gen_rtx_LABEL_REF (VOIDmode, l0), pc_rtx);
-  jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
-  add_reg_br_prob_note (jmp, profile_probability::unlikely ());
-  tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, gt,
-			      gen_rtx_LABEL_REF (VOIDmode, l1), pc_rtx);
-  jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
-  add_reg_br_prob_note (jmp, profile_probability::even ());
-  emit_move_insn (dest, constm1_rtx);
-  emit_jump (lend);
-  emit_label (l0);
-  emit_move_insn (dest, const0_rtx);
-  emit_jump (lend);
-  emit_label (l1);
-  emit_move_insn (dest, const1_rtx);
+  if (op2 == const0_rtx)
+    {
+      rtx eq = gen_rtx_fmt_ee (UNEQ, VOIDmode,
+			       gen_rtx_REG (CCFPmode, FLAGS_REG), const0_rtx);
+      tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, eq,
+				  gen_rtx_LABEL_REF (VOIDmode, l0), pc_rtx);
+      jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
+      add_reg_br_prob_note (jmp, profile_probability::unlikely ());
+      tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, gt,
+				  gen_rtx_LABEL_REF (VOIDmode, l1), pc_rtx);
+      jmp = emit_jump_insn (gen_rtx_SET (pc_rtx, tmp));
+      add_reg_br_prob_note (jmp, profile_probability::even ());
+      emit_move_insn (dest, constm1_rtx);
+      emit_jump (lend);
+      emit_label (l0);
+      emit_move_insn (dest, const0_rtx);
+      emit_jump (lend);
+      emit_label (l1);
+      emit_move_insn (dest, const1_rtx);
+    }
+  else
+    {
+      rtx lt_tmp = NULL_RTX;
+      if (GET_MODE (dest) != SImode || !TARGET_ZERO_EXTEND_WITH_AND)
+	{
+	  lt_tmp = gen_reg_rtx (QImode);
+	  ix86_expand_setcc (lt_tmp, UNLT, gen_rtx_REG (CCFPmode, FLAGS_REG),
+			     const0_rtx);
+	  if (GET_MODE (dest) != QImode)
+	    {
+	      tmp = gen_reg_rtx (GET_MODE (dest));
+	      emit_insn (gen_rtx_SET (tmp,
+				      gen_rtx_ZERO_EXTEND (GET_MODE (dest),
+							   lt_tmp)));
+	      lt_tmp = tmp;
+	    }
+	}
+      rtx gt_tmp;
+      if (zero)
+	{
+	  /* If TARGET_IEEE_FP and dest has SImode, emit SImode clear
+	     before the floating point comparison and use setcc_si_slp
+	     pattern to hide it from the combiner, so that it doesn't
+	     undo it.  Similarly for TARGET_ZERO_EXTEND_WITH_AND, where
+	     the ZERO_EXTEND normally emitted would need to be AND
+	     with flags clobber.  */
+	  tmp = ix86_expand_compare (GT, XEXP (gt, 0), const0_rtx);
+	  PUT_MODE (tmp, QImode);
+	  emit_insn (gen_setcc_si_slp (zero, tmp, zero));
+	  gt_tmp = zero;
+	}
+      else
+	{
+	  gt_tmp = gen_reg_rtx (QImode);
+	  ix86_expand_setcc (gt_tmp, GT, XEXP (gt, 0), const0_rtx);
+	  if (GET_MODE (dest) != QImode)
+	    {
+	      tmp = gen_reg_rtx (GET_MODE (dest));
+	      emit_insn (gen_rtx_SET (tmp,
+				      gen_rtx_ZERO_EXTEND (GET_MODE (dest),
+							   gt_tmp)));
+	      gt_tmp = tmp;
+	    }
+	}
+      if (lt_tmp)
+	{
+	  tmp = expand_simple_binop (GET_MODE (dest), MINUS, gt_tmp, lt_tmp,
+				     dest, 0, OPTAB_DIRECT);
+	  if (!rtx_equal_p (tmp, dest))
+	    emit_move_insn (dest, tmp);
+	}
+      else
+	{
+	  /* For TARGET_ZERO_EXTEND_WITH_AND emit sbb directly, as we can't
+	     do ZERO_EXTEND without clobbering flags.  */
+	  tmp = ix86_expand_compare (UNLT, XEXP (gt, 0), const0_rtx);
+	  PUT_MODE (tmp, SImode);
+	  emit_insn (gen_subsi3_carry (dest, gt_tmp,
+				       force_reg (GET_MODE (dest), const0_rtx),
+				       XEXP (gt, 0), tmp));
+	}
+    }
   emit_jump (lend);
   if (l2)
     {
       emit_label (l2);
-      emit_move_insn (dest, const2_rtx);
+      emit_move_insn (dest, op2 == const0_rtx ? const2_rtx : op2);
     }
   emit_label (lend);
 }
 
+/* Expand integral op0 <=> op1, i.e.
+   dest = op0 == op1 ? 0 : op0 < op1 ? -1 : 1.  */
+
+void
+ix86_expand_int_spaceship (rtx dest, rtx op0, rtx op1, rtx op2)
+{
+  gcc_assert (INTVAL (op2));
+  rtx zero1 = NULL_RTX, zero2 = NULL_RTX;
+  if (TARGET_ZERO_EXTEND_WITH_AND && GET_MODE (dest) == SImode)
+    {
+      zero1 = force_reg (SImode, const0_rtx);
+      if (INTVAL (op2) != 1)
+	zero2 = force_reg (SImode, const0_rtx);
+    }
+
+  /* Not using ix86_expand_int_compare here, so that it doesn't swap
+     operands nor optimize CC mode - we need a mode usable for both
+     LT and GT resp. LTU and GTU comparisons with the same unswapped
+     operands.  */
+  rtx flags = gen_rtx_REG (INTVAL (op2) != 1 ? CCGCmode : CCmode, FLAGS_REG);
+  rtx tmp = gen_rtx_COMPARE (GET_MODE (flags), op0, op1);
+  emit_insn (gen_rtx_SET (flags, tmp));
+  rtx lt_tmp = NULL_RTX;
+  if (zero2)
+    {
+      /* For TARGET_ZERO_EXTEND_WITH_AND, emit setcc_si_slp to avoid
+	 ZERO_EXTEND.  */
+      tmp = ix86_expand_compare (LT, flags, const0_rtx);
+      PUT_MODE (tmp, QImode);
+      emit_insn (gen_setcc_si_slp (zero2, tmp, zero2));
+      lt_tmp = zero2;
+    }
+  else if (!zero1)
+    {
+      lt_tmp = gen_reg_rtx (QImode);
+      ix86_expand_setcc (lt_tmp, INTVAL (op2) != 1 ? LT : LTU, flags,
+			 const0_rtx);
+      if (GET_MODE (dest) != QImode)
+	{
+	  tmp = gen_reg_rtx (GET_MODE (dest));
+	  emit_insn (gen_rtx_SET (tmp, gen_rtx_ZERO_EXTEND (GET_MODE (dest),
+							    lt_tmp)));
+	  lt_tmp = tmp;
+	}
+    }
+  rtx gt_tmp;
+  if (zero1)
+    {
+      /* For TARGET_ZERO_EXTEND_WITH_AND, emit setcc_si_slp to avoid
+	 ZERO_EXTEND.  */
+      tmp = ix86_expand_compare (INTVAL (op2) != 1 ? GT : GTU, flags,
+				 const0_rtx);
+      PUT_MODE (tmp, QImode);
+      emit_insn (gen_setcc_si_slp (zero1, tmp, zero1));
+      gt_tmp = zero1;
+    }
+  else
+    {
+      gt_tmp = gen_reg_rtx (QImode);
+      ix86_expand_setcc (gt_tmp, INTVAL (op2) != 1 ? GT : GTU, flags,
+			 const0_rtx);
+      if (GET_MODE (dest) != QImode)
+	{
+	  tmp = gen_reg_rtx (GET_MODE (dest));
+	  emit_insn (gen_rtx_SET (tmp, gen_rtx_ZERO_EXTEND (GET_MODE (dest),
+							    gt_tmp)));
+	  gt_tmp = tmp;
+	}
+    }
+  if (lt_tmp)
+    {
+      tmp = expand_simple_binop (GET_MODE (dest), MINUS, gt_tmp, lt_tmp, dest,
+				 0, OPTAB_DIRECT);
+      if (!rtx_equal_p (tmp, dest))
+	emit_move_insn (dest, tmp);
+    }
+  else
+    {
+      /* For TARGET_ZERO_EXTEND_WITH_AND emit sbb directly, as we can't
+	 do ZERO_EXTEND without clobbering flags.  */
+      tmp = ix86_expand_compare (LTU, flags, const0_rtx);
+      PUT_MODE (tmp, SImode);
+      emit_insn (gen_subsi3_carry (dest, gt_tmp,
+				   force_reg (GET_MODE (dest), const0_rtx),
+				   flags, tmp));
+    }
+}
+
 /* Expand comparison setting or clearing carry flag.  Return true when
    successful and set pop for the operation.  */
 static bool
@@ -3223,8 +3396,7 @@ ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
 	 too common scenario.  */
       start_sequence ();
       compare_op = ix86_expand_fp_compare (code, op0, op1);
-      compare_seq = get_insns ();
-      end_sequence ();
+      compare_seq = end_sequence ();
 
       if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode)
         code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
@@ -3388,8 +3560,7 @@ ix86_expand_int_movcc (rtx operands[])
 
   start_sequence ();
   compare_op = ix86_expand_compare (code, op0, op1);
-  compare_seq = get_insns ();
-  end_sequence ();
+  compare_seq = end_sequence ();
 
   compare_code = GET_CODE (compare_op);
 
@@ -3438,7 +3609,11 @@ ix86_expand_int_movcc (rtx operands[])
 	    negate_cc_compare_p = true;
 	}
 
-      diff = ct - cf;
+      diff = (unsigned HOST_WIDE_INT) ct - cf;
+      /* Make sure we can represent the difference between the two values.  */
+      if ((diff > 0) != ((cf < 0) != (ct < 0) ? cf < 0 : cf < ct))
+	return false;
+
       /*  Sign bit compares are better done using shifts than we do by using
 	  sbb.  */
       if (sign_bit_compare_p
@@ -3496,7 +3671,12 @@ ix86_expand_int_movcc (rtx operands[])
 		    PUT_CODE (compare_op,
 			      reverse_condition (GET_CODE (compare_op)));
 		}
-	      diff = ct - cf;
+
+	      diff = (unsigned HOST_WIDE_INT) ct - cf;
+	      /* Make sure we can represent the difference
+		 between the two values.  */
+	      if ((diff > 0) != ((cf < 0) != (ct < 0) ? cf < 0 : cf < ct))
+		return false;
 
 	      if (reg_overlap_mentioned_p (out, compare_op))
 		tmp = gen_reg_rtx (mode);
@@ -3514,7 +3694,12 @@ ix86_expand_int_movcc (rtx operands[])
 	      else
 		{
 		  std::swap (ct, cf);
-		  diff = ct - cf;
+
+		  diff = (unsigned HOST_WIDE_INT) ct - cf;
+		  /* Make sure we can represent the difference
+		     between the two values.  */
+		  if ((diff > 0) != ((cf < 0) != (ct < 0) ? cf < 0 : cf < ct))
+		    return false;
 		}
 	      tmp = emit_store_flag (tmp, code, op0, op1, VOIDmode, 0, -1);
 	    }
@@ -3581,9 +3766,15 @@ ix86_expand_int_movcc (rtx operands[])
 		  tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
 		}
 
+	      HOST_WIDE_INT ival = (unsigned HOST_WIDE_INT) cf - ct;
+	      /* Make sure we can represent the difference
+		 between the two values.  */
+	      if ((ival > 0) != ((ct < 0) != (cf < 0) ? ct < 0 : ct < cf))
+		return false;
+
 	      tmp = expand_simple_binop (mode, AND,
 					 copy_rtx (tmp),
-					 gen_int_mode (cf - ct, mode),
+					 gen_int_mode (ival, mode),
 					 copy_rtx (tmp), 1, OPTAB_DIRECT);
 	      if (ct)
 		tmp = expand_simple_binop (mode, PLUS,
@@ -3620,7 +3811,13 @@ ix86_expand_int_movcc (rtx operands[])
 	  if (new_code != UNKNOWN)
 	    {
 	      std::swap (ct, cf);
-	      diff = -diff;
+
+	      diff = (unsigned HOST_WIDE_INT) ct - cf;
+	      /* Make sure we can represent the difference
+		 between the two values.  */
+	      if ((diff > 0) != ((cf < 0) != (ct < 0) ? cf < 0 : cf < ct))
+		return false;
+
 	      code = new_code;
 	    }
 	}
@@ -3823,8 +4020,14 @@ ix86_expand_int_movcc (rtx operands[])
 					 copy_rtx (out), 1, OPTAB_DIRECT);
 	    }
 
+	  HOST_WIDE_INT ival = (unsigned HOST_WIDE_INT) cf - ct;
+	  /* Make sure we can represent the difference
+	     between the two values.  */
+	  if ((ival > 0) != ((ct < 0) != (cf < 0) ? ct < 0 : ct < cf))
+	    return false;
+
 	  out = expand_simple_binop (mode, AND, copy_rtx (out),
-				     gen_int_mode (cf - ct, mode),
+				     gen_int_mode (ival, mode),
 				     copy_rtx (out), 1, OPTAB_DIRECT);
 	  if (ct)
 	    out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
@@ -3965,6 +4168,10 @@ ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
     return false;
 
   mode = GET_MODE (dest);
+  if (immediate_operand (if_false, mode))
+    if_false = force_reg (mode, if_false);
+  if (immediate_operand (if_true, mode))
+    if_true = force_reg (mode, if_true);
 
   /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
      but MODE may be a vector mode and thus not appropriate.  */
@@ -4013,7 +4220,7 @@ ix86_valid_mask_cmp_mode (machine_mode mode)
   if ((inner_mode == QImode || inner_mode == HImode) && !TARGET_AVX512BW)
     return false;
 
-  return (vector_size == 64 && TARGET_EVEX512) || TARGET_AVX512VL;
+  return vector_size == 64 || TARGET_AVX512VL;
 }
 
 /* Return true if integer mask comparison should be used.  */
@@ -4031,6 +4238,8 @@ ix86_use_mask_cmp_p (machine_mode mode, machine_mode cmp_mode,
     return true;
   else if (GET_MODE_INNER (cmp_mode) == HFmode)
     return true;
+  else if (GET_MODE_INNER (cmp_mode) == BFmode)
+    return true;
 
   /* When op_true is NULL, op_false must be NULL, or vice versa.  */
   gcc_assert (!op_true == !op_false);
@@ -4247,23 +4456,23 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
   switch (mode)
     {
     case E_V2SFmode:
-      if (TARGET_SSE4_1)
+      if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1)
 	gen = gen_mmx_blendvps;
       break;
     case E_V4SFmode:
-      if (TARGET_SSE4_1)
+      if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1)
 	gen = gen_sse4_1_blendvps;
       break;
     case E_V2DFmode:
-      if (TARGET_SSE4_1)
+      if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1)
 	gen = gen_sse4_1_blendvpd;
       break;
     case E_SFmode:
-      if (TARGET_SSE4_1)
+      if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1)
 	gen = gen_sse4_1_blendvss;
       break;
     case E_DFmode:
-      if (TARGET_SSE4_1)
+      if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1)
 	gen = gen_sse4_1_blendvsd;
       break;
     case E_V8QImode:
@@ -4271,7 +4480,7 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
     case E_V4HFmode:
     case E_V4BFmode:
     case E_V2SImode:
-      if (TARGET_SSE4_1)
+      if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1)
 	{
 	  gen = gen_mmx_pblendvb_v8qi;
 	  blend_mode = V8QImode;
@@ -4281,14 +4490,14 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
     case E_V2HImode:
     case E_V2HFmode:
     case E_V2BFmode:
-      if (TARGET_SSE4_1)
+      if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1)
 	{
 	  gen = gen_mmx_pblendvb_v4qi;
 	  blend_mode = V4QImode;
 	}
       break;
     case E_V2QImode:
-      if (TARGET_SSE4_1)
+      if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1)
 	gen = gen_mmx_pblendvb_v2qi;
       break;
     case E_V16QImode:
@@ -4298,18 +4507,18 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
     case E_V4SImode:
     case E_V2DImode:
     case E_V1TImode:
-      if (TARGET_SSE4_1)
+      if (TARGET_SSE_MOVCC_USE_BLENDV && TARGET_SSE4_1)
 	{
 	  gen = gen_sse4_1_pblendvb;
 	  blend_mode = V16QImode;
 	}
       break;
     case E_V8SFmode:
-      if (TARGET_AVX)
+      if (TARGET_AVX && TARGET_SSE_MOVCC_USE_BLENDV)
 	gen = gen_avx_blendvps256;
       break;
     case E_V4DFmode:
-      if (TARGET_AVX)
+      if (TARGET_AVX && TARGET_SSE_MOVCC_USE_BLENDV)
 	gen = gen_avx_blendvpd256;
       break;
     case E_V32QImode:
@@ -4318,7 +4527,7 @@ ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
     case E_V16BFmode:
     case E_V8SImode:
     case E_V4DImode:
-      if (TARGET_AVX2)
+      if (TARGET_AVX2 && TARGET_SSE_MOVCC_USE_BLENDV)
 	{
 	  gen = gen_avx2_pblendvb;
 	  blend_mode = V32QImode;
@@ -4468,7 +4677,7 @@ ix86_expand_fp_movcc (rtx operands[])
       && !ix86_fp_comparison_operator (operands[1], VOIDmode))
     return false;
 
-  if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
+  if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
     {
       machine_mode cmode;
 
@@ -4512,6 +4721,8 @@ ix86_expand_fp_movcc (rtx operands[])
       compare_op = ix86_expand_compare (NE, tmp, const0_rtx);
     }
 
+  operands[2] = force_reg (mode, operands[2]);
+  operands[3] = force_reg (mode, operands[3]);
   emit_insn (gen_rtx_SET (operands[0],
 			  gen_rtx_IF_THEN_ELSE (mode, compare_op,
 						operands[2], operands[3])));
@@ -4847,7 +5058,7 @@ ix86_expand_int_sse_cmp (rtx dest, enum rtx_code code, rtx cop0, rtx cop1,
 	      && GET_MODE_SIZE (GET_MODE_INNER (mode)) >= 4
 	      /* Don't do it if not using integer masks and we'd end up with
 		 the right values in the registers though.  */
-	      && ((GET_MODE_SIZE (mode) == 64 && TARGET_EVEX512)
+	      && (GET_MODE_SIZE (mode) == 64
 		  || !vector_all_ones_operand (optrue, data_mode)
 		  || opfalse != CONST0_RTX (data_mode))))
 	{
@@ -5590,7 +5801,7 @@ ix86_expand_vec_perm (rtx operands[])
 
   if (TARGET_XOP)
     {
-      /* The XOP VPPERM insn supports three inputs.  By ignoring the 
+      /* The XOP VPPERM insn supports three inputs.  By ignoring the
 	 one_operand_shuffle special case, we avoid creating another
 	 set of constant vectors in memory.  */
       one_operand_shuffle = false;
@@ -7418,7 +7629,7 @@ ix86_expand_v1ti_ashiftrt (rtx operands[])
       rtx tmp7 = force_reg (V1TImode, gen_lowpart (V1TImode, tmp3));
       rtx tmp8 = gen_reg_rtx (V1TImode);
       emit_insn (gen_sse2_ashlv1ti3 (tmp8, tmp7, GEN_INT (64)));
- 
+
       rtx tmp9 = force_reg (V2DImode, gen_lowpart (V2DImode, tmp3));
       rtx tmp10 = gen_reg_rtx (V2DImode);
       emit_insn (gen_ashlv2di3 (tmp10, tmp9, GEN_INT (128 - bits)));
@@ -7688,7 +7899,8 @@ expand_set_or_cpymem_via_loop (rtx destmem, rtx srcmem,
 			       rtx count, machine_mode mode, int unroll,
 			       int expected_size, bool issetmem)
 {
-  rtx_code_label *out_label, *top_label;
+  rtx_code_label *out_label = nullptr;
+  rtx_code_label *top_label = nullptr;
   rtx iter, tmp;
   machine_mode iter_mode = counter_mode (count);
   int piece_size_n = GET_MODE_SIZE (mode) * unroll;
@@ -7696,9 +7908,19 @@ expand_set_or_cpymem_via_loop (rtx destmem, rtx srcmem,
   rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
   rtx size;
   int i;
+  int loop_count;
+
+  if (expected_size != -1 && CONST_INT_P (count))
+    loop_count = INTVAL (count) / GET_MODE_SIZE (mode) / unroll;
+  else
+    loop_count = -1;
 
-  top_label = gen_label_rtx ();
-  out_label = gen_label_rtx ();
+  /* Don't generate the loop if the loop count is 1.  */
+  if (loop_count != 1)
+    {
+      top_label = gen_label_rtx ();
+      out_label = gen_label_rtx ();
+    }
   iter = gen_reg_rtx (iter_mode);
 
   size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
@@ -7712,7 +7934,8 @@ expand_set_or_cpymem_via_loop (rtx destmem, rtx srcmem,
     }
   emit_move_insn (iter, const0_rtx);
 
-  emit_label (top_label);
+  if (loop_count != 1)
+    emit_label (top_label);
 
   tmp = convert_modes (Pmode, iter_mode, iter, true);
 
@@ -7780,21 +8003,25 @@ expand_set_or_cpymem_via_loop (rtx destmem, rtx srcmem,
   if (tmp != iter)
     emit_move_insn (iter, tmp);
 
-  emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
-			   true, top_label);
-  if (expected_size != -1)
+  if (loop_count != 1)
     {
-      expected_size /= GET_MODE_SIZE (mode) * unroll;
-      if (expected_size == 0)
-	predict_jump (0);
-      else if (expected_size > REG_BR_PROB_BASE)
-	predict_jump (REG_BR_PROB_BASE - 1);
+      emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
+			       true, top_label);
+      if (expected_size != -1)
+	{
+	  expected_size /= GET_MODE_SIZE (mode) * unroll;
+	  if (expected_size == 0)
+	    predict_jump (0);
+	  else if (expected_size > REG_BR_PROB_BASE)
+	    predict_jump (REG_BR_PROB_BASE - 1);
+	  else
+	    predict_jump (REG_BR_PROB_BASE
+			  - (REG_BR_PROB_BASE + expected_size / 2)
+			    / expected_size);
+	}
       else
-        predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2)
-		      / expected_size);
+	predict_jump (REG_BR_PROB_BASE * 80 / 100);
     }
-  else
-    predict_jump (REG_BR_PROB_BASE * 80 / 100);
   iter = ix86_zero_extend_to_Pmode (iter);
   tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
 			     true, OPTAB_LIB_WIDEN);
@@ -7807,7 +8034,8 @@ expand_set_or_cpymem_via_loop (rtx destmem, rtx srcmem,
       if (tmp != srcptr)
 	emit_move_insn (srcptr, tmp);
     }
-  emit_label (out_label);
+  if (loop_count != 1)
+    emit_label (out_label);
 }
 
 /* Divide COUNTREG by SCALE.  */
@@ -8010,19 +8238,11 @@ expand_cpymem_epilogue (rtx destmem, rtx srcmem,
   rtx src, dest;
   if (CONST_INT_P (count))
     {
-      HOST_WIDE_INT countval = INTVAL (count);
-      HOST_WIDE_INT epilogue_size = countval % max_size;
-      int i;
-
-      /* For now MAX_SIZE should be a power of 2.  This assert could be
-	 relaxed, but it'll require a bit more complicated epilogue
-	 expanding.  */
-      gcc_assert ((max_size & (max_size - 1)) == 0);
-      for (i = max_size; i >= 1; i >>= 1)
-	{
-	  if (epilogue_size & i)
-	    destmem = emit_memmov (destmem, &srcmem, destptr, srcptr, i);
-	}
+      unsigned HOST_WIDE_INT countval = UINTVAL (count);
+      unsigned HOST_WIDE_INT epilogue_size = countval % max_size;
+      unsigned int destalign = MEM_ALIGN (destmem);
+      move_by_pieces (destmem, srcmem, epilogue_size, destalign,
+		      RETURN_BEGIN);
       return;
     }
   if (max_size > 8)
@@ -8183,6 +8403,81 @@ expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
 				 1, max_size / 2, true);
 }
 
+/* Callback routine for store_by_pieces.  Return the RTL of a register
+   containing GET_MODE_SIZE (MODE) bytes in the RTL register op_p which
+   is a word or a word vector register.  If PREV_P isn't nullptr, it
+   has the RTL info from the previous iteration.  */
+
+static rtx
+setmem_epilogue_gen_val (void *op_p, void *prev_p, HOST_WIDE_INT,
+			 fixed_size_mode mode)
+{
+  rtx target;
+  by_pieces_prev *prev = (by_pieces_prev *) prev_p;
+  if (prev)
+    {
+      rtx prev_op = prev->data;
+      if (prev_op)
+	{
+	  machine_mode prev_mode = GET_MODE (prev_op);
+	  if (prev_mode == mode)
+	    return prev_op;
+	  if (VECTOR_MODE_P (prev_mode)
+	      && VECTOR_MODE_P (mode)
+	      && GET_MODE_INNER (prev_mode) == GET_MODE_INNER (mode))
+	    {
+	      target = gen_rtx_SUBREG (mode, prev_op, 0);
+	      return target;
+	    }
+	}
+    }
+
+  rtx op = (rtx) op_p;
+  machine_mode op_mode = GET_MODE (op);
+
+  gcc_assert (op_mode == word_mode
+	      || (VECTOR_MODE_P (op_mode)
+		  && GET_MODE_INNER (op_mode) == word_mode));
+
+  if (VECTOR_MODE_P (mode))
+    {
+      gcc_assert (GET_MODE_INNER (mode) == QImode);
+
+      unsigned int op_size = GET_MODE_SIZE (op_mode);
+      unsigned int size = GET_MODE_SIZE (mode);
+      unsigned int nunits = op_size / GET_MODE_SIZE (QImode);
+      machine_mode vec_mode
+	= mode_for_vector (QImode, nunits).require ();
+      target = gen_reg_rtx (vec_mode);
+      op = gen_rtx_SUBREG (vec_mode, op, 0);
+      emit_move_insn (target, op);
+      if (op_size == size)
+	return target;
+
+      rtx tmp = gen_reg_rtx (mode);
+      target = gen_rtx_SUBREG (mode, target, 0);
+      emit_move_insn (tmp, target);
+      return tmp;
+    }
+
+  target = gen_reg_rtx (word_mode);
+  if (VECTOR_MODE_P (op_mode))
+    {
+      op = gen_rtx_SUBREG (word_mode, op, 0);
+      emit_move_insn (target, op);
+    }
+  else
+    target = op;
+
+  if (mode == word_mode)
+    return target;
+
+  rtx tmp = gen_reg_rtx (mode);
+  target = gen_rtx_SUBREG (mode, target, 0);
+  emit_move_insn (tmp, target);
+  return tmp;
+}
+
 /* Output code to set at most count & (max_size - 1) bytes starting by DEST.  */
 static void
 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
@@ -8192,24 +8487,12 @@ expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx vec_value,
 
   if (CONST_INT_P (count))
     {
-      HOST_WIDE_INT countval = INTVAL (count);
-      HOST_WIDE_INT epilogue_size = countval % max_size;
-      int i;
-
-      /* For now MAX_SIZE should be a power of 2.  This assert could be
-	 relaxed, but it'll require a bit more complicated epilogue
-	 expanding.  */
-      gcc_assert ((max_size & (max_size - 1)) == 0);
-      for (i = max_size; i >= 1; i >>= 1)
-	{
-	  if (epilogue_size & i)
-	    {
-	      if (vec_value && i > GET_MODE_SIZE (GET_MODE (value)))
-		destmem = emit_memset (destmem, destptr, vec_value, i);
-	      else
-		destmem = emit_memset (destmem, destptr, value, i);
-	    }
-	}
+      unsigned HOST_WIDE_INT countval = UINTVAL (count);
+      unsigned HOST_WIDE_INT epilogue_size = countval % max_size;
+      unsigned int destalign = MEM_ALIGN (destmem);
+      store_by_pieces (destmem, epilogue_size, setmem_epilogue_gen_val,
+		       vec_value ? vec_value : value, destalign, true,
+		       RETURN_BEGIN);
       return;
     }
   if (max_size > 32)
@@ -8341,6 +8624,7 @@ expand_small_cpymem_or_setmem (rtx destmem, rtx srcmem,
   rtx_code_label *label = ix86_expand_aligntest (count, size, false);
   machine_mode mode = int_mode_for_size (size * BITS_PER_UNIT, 1).else_blk ();
   rtx modesize;
+  rtx scalar_value = value;
   int n;
 
   /* If we do not have vector value to copy, we must reduce size.  */
@@ -8360,11 +8644,57 @@ expand_small_cpymem_or_setmem (rtx destmem, rtx srcmem,
     {
       /* Choose appropriate vector mode.  */
       if (size >= 32)
-	mode = TARGET_AVX ? V32QImode : TARGET_SSE ? V16QImode : DImode;
+	switch (MOVE_MAX)
+	  {
+	  case 64:
+	    if (size >= 64)
+	      {
+		mode = V64QImode;
+		break;
+	      }
+	    /* FALLTHRU */
+	  case 32:
+	    mode = V32QImode;
+	    break;
+	  case 16:
+	    mode = V16QImode;
+	    break;
+	  case 8:
+	    mode = DImode;
+	    break;
+	  default:
+	    gcc_unreachable ();
+	  }
       else if (size >= 16)
 	mode = TARGET_SSE ? V16QImode : DImode;
       srcmem = change_address (srcmem, mode, srcptr);
     }
+  if (issetmem && vec_value && GET_MODE_SIZE (mode) > size)
+    {
+      /* For memset with vector and the size is smaller than the vector
+	 size, first try the narrower vector, otherwise, use the
+	 original value. */
+      machine_mode inner_mode = GET_MODE_INNER (mode);
+      unsigned int nunits = size / GET_MODE_SIZE (inner_mode);
+      if (nunits > 1)
+	{
+	  mode = mode_for_vector (GET_MODE_INNER (mode),
+				  nunits).require ();
+	  value = gen_rtx_SUBREG (mode, value, 0);
+	}
+      else
+	{
+	  scalar_int_mode smode
+	    = smallest_int_mode_for_size (size * BITS_PER_UNIT).require ();
+	  gcc_assert (GET_MODE_SIZE (GET_MODE (scalar_value))
+		      >= GET_MODE_SIZE (smode));
+	  mode = smode;
+	  if (GET_MODE (scalar_value) == mode)
+	    value = scalar_value;
+	  else
+	    value = gen_rtx_SUBREG (mode, scalar_value, 0);
+	}
+    }
   destmem = change_address (destmem, mode, destptr);
   modesize = GEN_INT (GET_MODE_SIZE (mode));
   gcc_assert (GET_MODE_SIZE (mode) <= size);
@@ -8414,7 +8744,7 @@ expand_small_cpymem_or_setmem (rtx destmem, rtx srcmem,
    DONE_LABEL is a label after the whole copying sequence. The label is created
    on demand if *DONE_LABEL is NULL.
    MIN_SIZE is minimal size of block copied.  This value gets adjusted for new
-   bounds after the initial copies. 
+   bounds after the initial copies.
 
    DESTMEM/SRCMEM are memory expressions pointing to the copies block,
    DESTPTR/SRCPTR are pointers to the block. DYNAMIC_CHECK indicate whether
@@ -8723,34 +9053,37 @@ expand_set_or_cpymem_constant_prologue (rtx dst, rtx *srcp, rtx destreg,
   return dst;
 }
 
-/* Return true if ALG can be used in current context.  
+/* Return true if ALG can be used in current context.
    Assume we expand memset if MEMSET is true.  */
 static bool
-alg_usable_p (enum stringop_alg alg, bool memset, bool have_as)
+alg_usable_p (enum stringop_alg alg, bool memset,
+	      addr_space_t dst_as, addr_space_t src_as)
 {
   if (alg == no_stringop)
     return false;
   /* It is not possible to use a library call if we have non-default
      address space.  We can do better than the generic byte-at-a-time
      loop, used as a fallback.  */
-  if (alg == libcall && have_as)
+  if (alg == libcall &&
+      !(ADDR_SPACE_GENERIC_P (dst_as) && ADDR_SPACE_GENERIC_P (src_as)))
     return false;
   if (alg == vector_loop)
     return TARGET_SSE || TARGET_AVX;
   /* Algorithms using the rep prefix want at least edi and ecx;
      additionally, memset wants eax and memcpy wants esi.  Don't
      consider such algorithms if the user has appropriated those
-     registers for their own purposes, or if we have a non-default
-     address space, since some string insns cannot override the segment.  */
+     registers for their own purposes, or if we have the destination
+     in the non-default address space, since string insns cannot
+     override the destination segment.  */
   if (alg == rep_prefix_1_byte
       || alg == rep_prefix_4_byte
       || alg == rep_prefix_8_byte)
     {
-      if (have_as)
-	return false;
       if (fixed_regs[CX_REG]
 	  || fixed_regs[DI_REG]
-	  || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG]))
+	  || (memset ? fixed_regs[AX_REG] : fixed_regs[SI_REG])
+	  || !ADDR_SPACE_GENERIC_P (dst_as)
+	  || !(ADDR_SPACE_GENERIC_P (src_as) || Pmode == word_mode))
 	return false;
     }
   return true;
@@ -8760,8 +9093,8 @@ alg_usable_p (enum stringop_alg alg, bool memset, bool have_as)
 static enum stringop_alg
 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
 	    unsigned HOST_WIDE_INT min_size, unsigned HOST_WIDE_INT max_size,
-	    bool memset, bool zero_memset, bool have_as,
-	    int *dynamic_check, bool *noalign, bool recur)
+	    bool memset, bool zero_memset, addr_space_t dst_as,
+	    addr_space_t src_as, int *dynamic_check, bool *noalign, bool recur)
 {
   const struct stringop_algs *algs;
   bool optimize_for_speed;
@@ -8793,7 +9126,7 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
   for (i = 0; i < MAX_STRINGOP_ALGS; i++)
     {
       enum stringop_alg candidate = algs->size[i].alg;
-      bool usable = alg_usable_p (candidate, memset, have_as);
+      bool usable = alg_usable_p (candidate, memset, dst_as, src_as);
       any_alg_usable_p |= usable;
 
       if (candidate != libcall && candidate && usable)
@@ -8809,17 +9142,17 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
 
   /* If user specified the algorithm, honor it if possible.  */
   if (ix86_stringop_alg != no_stringop
-      && alg_usable_p (ix86_stringop_alg, memset, have_as))
+      && alg_usable_p (ix86_stringop_alg, memset, dst_as, src_as))
     return ix86_stringop_alg;
   /* rep; movq or rep; movl is the smallest variant.  */
   else if (!optimize_for_speed)
     {
       *noalign = true;
       if (!count || (count & 3) || (memset && !zero_memset))
-	return alg_usable_p (rep_prefix_1_byte, memset, have_as)
+	return alg_usable_p (rep_prefix_1_byte, memset, dst_as, src_as)
 	       ? rep_prefix_1_byte : loop_1_byte;
       else
-	return alg_usable_p (rep_prefix_4_byte, memset, have_as)
+	return alg_usable_p (rep_prefix_4_byte, memset, dst_as, src_as)
 	       ? rep_prefix_4_byte : loop;
     }
   /* Very tiny blocks are best handled via the loop, REP is expensive to
@@ -8843,7 +9176,7 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
 	      enum stringop_alg candidate = algs->size[i].alg;
 
 	      if (candidate != libcall
-		  && alg_usable_p (candidate, memset, have_as))
+		  && alg_usable_p (candidate, memset, dst_as, src_as))
 		{
 		  alg = candidate;
 		  alg_noalign = algs->size[i].noalign;
@@ -8863,7 +9196,7 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
 		  else if (!any_alg_usable_p)
 		    break;
 		}
-	      else if (alg_usable_p (candidate, memset, have_as)
+	      else if (alg_usable_p (candidate, memset, dst_as, src_as)
 		       && !(TARGET_PREFER_KNOWN_REP_MOVSB_STOSB
 			    && candidate == rep_prefix_1_byte
 			    /* NB: If min_size != max_size, size is
@@ -8885,7 +9218,7 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
      choice in ix86_costs.  */
   if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
       && (algs->unknown_size == libcall
-	  || !alg_usable_p (algs->unknown_size, memset, have_as)))
+	  || !alg_usable_p (algs->unknown_size, memset, dst_as, src_as)))
     {
       enum stringop_alg alg;
       HOST_WIDE_INT new_expected_size = (max > 0 ? max : 4096) / 2;
@@ -8900,8 +9233,9 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
 	    *dynamic_check = 128;
 	  return loop_1_byte;
 	}
-      alg = decide_alg (count, new_expected_size, min_size, max_size, memset,
-			zero_memset, have_as, dynamic_check, noalign, true);
+      alg = decide_alg (count, new_expected_size, min_size, max_size,
+			memset, zero_memset, dst_as, src_as,
+			dynamic_check, noalign, true);
       gcc_assert (*dynamic_check == -1);
       if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
 	*dynamic_check = max;
@@ -8913,7 +9247,11 @@ decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size,
   /* Try to use some reasonable fallback algorithm.  Note that for
      non-default address spaces we default to a loop instead of
      a libcall.  */
-  return (alg_usable_p (algs->unknown_size, memset, have_as)
+
+  bool have_as = !(ADDR_SPACE_GENERIC_P (dst_as)
+		   && ADDR_SPACE_GENERIC_P (src_as));
+
+  return (alg_usable_p (algs->unknown_size, memset, dst_as, src_as)
 	  ? algs->unknown_size : have_as ? loop : libcall);
 }
 
@@ -8960,13 +9298,26 @@ decide_alignment (int align,
 static rtx
 promote_duplicated_reg (machine_mode mode, rtx val)
 {
+  if (val == const0_rtx)
+    return copy_to_mode_reg (mode, CONST0_RTX (mode));
+
   machine_mode valmode = GET_MODE (val);
+  if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
+    {
+      /* Duplicate the scalar value for integer vector.  */
+      gcc_assert ((val == const0_rtx || val == constm1_rtx)
+		  || GET_MODE_INNER (mode) == valmode);
+      rtx dup = gen_reg_rtx (mode);
+      bool ok = ix86_expand_vector_init_duplicate (false, mode, dup,
+						   val);
+      gcc_assert (ok);
+      return dup;
+    }
+
   rtx tmp;
   int nops = mode == DImode ? 3 : 2;
 
-  gcc_assert (mode == SImode || mode == DImode || val == const0_rtx);
-  if (val == const0_rtx)
-    return copy_to_mode_reg (mode, CONST0_RTX (mode));
+  gcc_assert (mode == SImode || mode == DImode);
   if (CONST_INT_P (val))
     {
       HOST_WIDE_INT v = INTVAL (val) & 255;
@@ -9089,7 +9440,7 @@ ix86_copy_addr_to_reg (rtx addr)
 	with specified algorithm.
 
      4) Epilogue: code copying tail of the block that is too small to be
-	handled by main body (or up to size guarded by prologue guard). 
+	handled by main body (or up to size guarded by prologue guard).
 
   Misaligned move sequence
 
@@ -9132,14 +9483,13 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
   bool need_zero_guard = false;
   bool noalign;
   machine_mode move_mode = VOIDmode;
-  machine_mode wider_mode;
   int unroll_factor = 1;
   /* TODO: Once value ranges are available, fill in proper data.  */
   unsigned HOST_WIDE_INT min_size = 0;
   unsigned HOST_WIDE_INT max_size = -1;
   unsigned HOST_WIDE_INT probable_max_size = -1;
   bool misaligned_prologue_used = false;
-  bool have_as;
+  addr_space_t dst_as, src_as = ADDR_SPACE_GENERIC;
 
   if (CONST_INT_P (align_exp))
     align = INTVAL (align_exp);
@@ -9177,16 +9527,15 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
   if (count > (HOST_WIDE_INT_1U << 30))
     return false;
 
-  have_as = !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (dst));
+  dst_as = MEM_ADDR_SPACE (dst);
   if (!issetmem)
-    have_as |= !ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src));
+    src_as = MEM_ADDR_SPACE (src);
 
   /* Step 0: Decide on preferred algorithm, desired alignment and
      size of chunks to be copied by main loop.  */
   alg = decide_alg (count, expected_size, min_size, probable_max_size,
-		    issetmem,
-		    issetmem && val_exp == const0_rtx, have_as,
-		    &dynamic_check, &noalign, false);
+		    issetmem, issetmem && val_exp == const0_rtx,
+		    dst_as, src_as, &dynamic_check, &noalign, false);
 
   if (dump_file)
     fprintf (dump_file, "Selected stringop expansion strategy: %s\n",
@@ -9196,11 +9545,6 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
     return false;
   gcc_assert (alg != no_stringop);
 
-  /* For now vector-version of memset is generated only for memory zeroing, as
-     creating of promoted vector value is very cheap in this case.  */
-  if (issetmem && alg == vector_loop && val_exp != const0_rtx)
-    alg = unrolled_loop;
-
   if (!count)
     count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
   destreg = ix86_copy_addr_to_reg (XEXP (dst, 0));
@@ -9209,6 +9553,7 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
 
   unroll_factor = 1;
   move_mode = word_mode;
+  int nunits;
   switch (alg)
     {
     case libcall:
@@ -9229,27 +9574,14 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
     case vector_loop:
       need_zero_guard = true;
       unroll_factor = 4;
-      /* Find the widest supported mode.  */
-      move_mode = word_mode;
-      while (GET_MODE_WIDER_MODE (move_mode).exists (&wider_mode)
-	     && optab_handler (mov_optab, wider_mode) != CODE_FOR_nothing)
-	move_mode = wider_mode;
-
-      if (TARGET_AVX256_SPLIT_REGS && GET_MODE_BITSIZE (move_mode) > 128)
-	move_mode = TImode;
-      if (TARGET_AVX512_SPLIT_REGS && GET_MODE_BITSIZE (move_mode) > 256)
-	move_mode = OImode;
-
-      /* Find the corresponding vector mode with the same size as MOVE_MODE.
-	 MOVE_MODE is an integer mode at the moment (SI, DI, TI, etc.).  */
-      if (GET_MODE_SIZE (move_mode) > GET_MODE_SIZE (word_mode))
+      /* Get the vector mode to move MOVE_MAX bytes.  */
+      nunits = MOVE_MAX / GET_MODE_SIZE (word_mode);
+      if (nunits > 1)
 	{
-	  int nunits = GET_MODE_SIZE (move_mode) / GET_MODE_SIZE (word_mode);
-	  if (!mode_for_vector (word_mode, nunits).exists (&move_mode)
-	      || optab_handler (mov_optab, move_mode) == CODE_FOR_nothing)
-	    move_mode = word_mode;
+	  move_mode = mode_for_vector (word_mode, nunits).require ();
+	  gcc_assert (optab_handler (mov_optab, move_mode)
+		      != CODE_FOR_nothing);
 	}
-      gcc_assert (optab_handler (mov_optab, move_mode) != CODE_FOR_nothing);
       break;
     case rep_prefix_8_byte:
       move_mode = DImode;
@@ -9305,20 +9637,41 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
        && ((desired_align > align && !align_bytes)
 	   || (!count && epilogue_size_needed > 1)));
 
+  /* Destination is aligned after the misaligned prologue.  */
+  bool aligned_dstmem = misaligned_prologue_used;
+
+  if (noalign && !misaligned_prologue_used)
+    {
+      /* Also use misaligned prologue if alignment isn't needed and
+	 destination isn't aligned.   Since alignment isn't needed,
+	 the destination after prologue won't be aligned.  */
+      aligned_dstmem = (GET_MODE_ALIGNMENT (move_mode)
+			<= MEM_ALIGN (dst));
+      if (!aligned_dstmem)
+	misaligned_prologue_used = true;
+    }
+
   /* Do the cheap promotion to allow better CSE across the
      main loop and epilogue (ie one load of the big constant in the
-     front of all code.  
+     front of all code.
      For now the misaligned move sequences do not have fast path
      without broadcasting.  */
-  if (issetmem && ((CONST_INT_P (val_exp) || misaligned_prologue_used)))
+  if (issetmem
+      && (alg == vector_loop
+	  || CONST_INT_P (val_exp)
+	  || misaligned_prologue_used))
     {
       if (alg == vector_loop)
 	{
-	  gcc_assert (val_exp == const0_rtx);
-	  vec_promoted_val = promote_duplicated_reg (move_mode, val_exp);
 	  promoted_val = promote_duplicated_reg_to_size (val_exp,
 							 GET_MODE_SIZE (word_mode),
 							 desired_align, align);
+	  /* Duplicate the promoted scalar value if not 0 nor -1.  */
+	  vec_promoted_val
+	    = promote_duplicated_reg (move_mode,
+				      (val_exp == const0_rtx
+				       || val_exp == constm1_rtx)
+				      ? val_exp : promoted_val);
 	}
       else
 	{
@@ -9343,7 +9696,8 @@ ix86_expand_set_or_cpymem (rtx dst, rtx src, rtx count_exp, rtx val_exp,
       if (!issetmem)
         src = change_address (src, BLKmode, srcreg);
       dst = change_address (dst, BLKmode, destreg);
-      set_mem_align (dst, desired_align * BITS_PER_UNIT);
+      if (aligned_dstmem)
+	set_mem_align (dst, desired_align * BITS_PER_UNIT);
       epilogue_size_needed = 0;
       if (need_zero_guard
 	  && min_size < (unsigned HOST_WIDE_INT) size_needed)
@@ -9933,9 +10287,11 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
 	  if (lookup_attribute ("interrupt",
 				TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))
 	    error ("interrupt service routine cannot be called directly");
-	  else if (lookup_attribute ("no_callee_saved_registers",
-				     TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))
+	  else if (ix86_type_no_callee_saved_registers_p (TREE_TYPE (fndecl)))
 	    call_no_callee_saved_registers = true;
+	  if (fndecl == current_function_decl
+	      && decl_binds_to_current_def_p (fndecl))
+	    cfun->machine->recursive_function = true;
 	}
     }
   else
@@ -9945,8 +10301,7 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
 	  tree mem_expr = MEM_EXPR (fnaddr);
 	  if (mem_expr != nullptr
 	      && TREE_CODE (mem_expr) == MEM_REF
-	      && lookup_attribute ("no_callee_saved_registers",
-				   TYPE_ATTRIBUTES (TREE_TYPE (mem_expr))))
+	      && ix86_type_no_callee_saved_registers_p (TREE_TYPE (mem_expr)))
 	    call_no_callee_saved_registers = true;
 	}
 
@@ -10050,13 +10405,15 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
     fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
   /* Since x32 GOT slot is 64 bit with zero upper 32 bits, indirect
      branch via x32 GOT slot is OK.  */
-  else if (!(TARGET_X32
-	     && MEM_P (fnaddr)
-	     && GET_CODE (XEXP (fnaddr, 0)) == ZERO_EXTEND
-	     && GOT_memory_operand (XEXP (XEXP (fnaddr, 0), 0), Pmode))
-	   && (sibcall
-	       ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
-	       : !call_insn_operand (XEXP (fnaddr, 0), word_mode)))
+  else if (TARGET_X32
+      && MEM_P (fnaddr)
+      && GET_CODE (XEXP (fnaddr, 0)) == ZERO_EXTEND
+      && GOT_memory_operand (XEXP (XEXP (fnaddr, 0), 0), Pmode)
+      && !TARGET_INDIRECT_BRANCH_REGISTER)
+    ;
+  else if (sibcall
+	   ? !sibcall_insn_operand (XEXP (fnaddr, 0), word_mode)
+	   : !call_insn_operand (XEXP (fnaddr, 0), word_mode))
     {
       fnaddr = convert_to_mode (word_mode, XEXP (fnaddr, 0), 1);
       fnaddr = gen_rtx_MEM (QImode, copy_to_mode_reg (word_mode, fnaddr));
@@ -10169,6 +10526,7 @@ ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
       char c_mask = CALL_USED_REGISTERS_MASK (is_64bit_ms_abi);
       for (int i = 0; i < FIRST_PSEUDO_REGISTER; i++)
 	if (!fixed_regs[i]
+	    && i != HARD_FRAME_POINTER_REGNUM
 	    && !(ix86_call_used_regs[i] == 1
 		 || (ix86_call_used_regs[i] & c_mask))
 	    && !STACK_REGNO_P (i)
@@ -10602,7 +10960,7 @@ ix86_ssecom_setcc (const enum rtx_code comparison,
 
   /* NB: For ordered EQ or unordered NE, check ZF alone isn't sufficient
      with NAN operands.
-     Under TARGET_AVX10_2_256, VCOMX/VUCOMX are generated instead of
+     Under TARGET_AVX10_2, VCOMX/VUCOMX are generated instead of
      COMI/UCOMI.  VCOMX/VUCOMX will not set ZF for NAN operands.  */
   if (check_unordered)
     {
@@ -10675,12 +11033,12 @@ ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
     case GE:
       break;
     case EQ:
-      if (!TARGET_AVX10_2_256 || !comx_ok)
+      if (!TARGET_AVX10_2 || !comx_ok)
 	check_unordered = true;
       mode = CCZmode;
       break;
     case NE:
-      if (!TARGET_AVX10_2_256 || !comx_ok)
+      if (!TARGET_AVX10_2 || !comx_ok)
 	check_unordered = true;
       mode = CCZmode;
       const_val = const1_rtx;
@@ -10701,7 +11059,7 @@ ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
     op1 = copy_to_mode_reg (mode1, op1);
 
   if ((comparison == EQ || comparison == NE)
-      && TARGET_AVX10_2_256 && comx_ok)
+      && TARGET_AVX10_2 && comx_ok)
     {
       switch (icode)
 	{
@@ -11067,6 +11425,54 @@ fixup_modeless_constant (rtx x, machine_mode mode)
   return x;
 }
 
+/* Expand the outgoing argument ARG to extract unsigned char and short
+   integer constants suitable for the predicates and the instruction
+   templates which expect the unsigned expanded value.  */
+
+static rtx
+ix86_expand_unsigned_small_int_cst_argument (tree arg)
+{
+  /* When passing 0xff as an unsigned char function argument with the
+     C frontend promotion, expand_normal gets
+
+     <integer_cst 0x7fffe6aa23a8 type <integer_type 0x7fffe98225e8 int> constant 255>
+
+     and returns the rtx value using the sign-extended representation:
+
+     (const_int 255 [0xff])
+
+     Without the C frontend promotion, expand_normal gets
+
+     <integer_cst 0x7fffe9824018 type <integer_type 0x7fffe9822348 unsigned char > constant 255>
+
+     and returns
+
+     (const_int -1 [0xffffffffffffffff])
+
+     which doesn't work with the predicates nor the instruction templates
+     which expect the unsigned expanded value.  Extract the unsigned char
+     and short integer constants to return
+
+     (const_int 255 [0xff])
+
+     so that the expanded value is always unsigned, without the C frontend
+     promotion.  */
+
+  if (TREE_CODE (arg) == INTEGER_CST)
+    {
+      tree type = TREE_TYPE (arg);
+      if (INTEGRAL_TYPE_P (type)
+	  && TYPE_UNSIGNED (type)
+	  && TYPE_PRECISION (type) < TYPE_PRECISION (integer_type_node))
+	{
+	  HOST_WIDE_INT cst = TREE_INT_CST_LOW (arg);
+	  return GEN_INT (cst);
+	}
+    }
+
+  return expand_normal (arg);
+}
+
 /* Subroutine of ix86_expand_builtin to take care of insns with
    variable number of operands.  */
 
@@ -11247,6 +11653,7 @@ ix86_expand_args_builtin (const struct builtin_description *d,
     case V16QI_FTYPE_V8HI_V8HI:
     case V16HF_FTYPE_V16HF_V16HF:
     case V16SF_FTYPE_V16SF_V16SF:
+    case V16SI_FTYPE_V16SI_V16SI:
     case V8QI_FTYPE_V8QI_V8QI:
     case V8QI_FTYPE_V4HI_V4HI:
     case V8HI_FTYPE_V8HI_V8HI:
@@ -11605,6 +12012,11 @@ ix86_expand_args_builtin (const struct builtin_description *d,
     case V8HF_FTYPE_V16QI_V8HF_UQI:
     case V16HF_FTYPE_V16QI_V16HF_UHI:
     case V32HF_FTYPE_V32QI_V32HF_USI:
+    case V16SI_FTYPE_V16SF_V16SI_UHI:
+    case V32HI_FTYPE_V32HF_V32HI_USI:
+    case V8DI_FTYPE_V8SF_V8DI_UQI:
+    case V8DI_FTYPE_V8DF_V8DI_UQI:
+    case V8SI_FTYPE_V8DF_V8SI_UQI:
       nargs = 3;
       break;
     case V32QI_FTYPE_V32QI_V32QI_INT:
@@ -11920,6 +12332,7 @@ ix86_expand_args_builtin (const struct builtin_description *d,
     case V8BF_FTYPE_V8BF_V8BF_INT_V8BF_UQI:
     case V16BF_FTYPE_V16BF_V16BF_INT_V16BF_UHI:
     case V32BF_FTYPE_V32BF_V32BF_INT_V32BF_USI:
+    case V16HF_FTYPE_V16HF_V16HF_INT_V16HF_UHI:
     case V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI:
       nargs = 5;
       mask_pos = 1;
@@ -11958,7 +12371,7 @@ ix86_expand_args_builtin (const struct builtin_description *d,
   for (i = 0; i < nargs; i++)
     {
       tree arg = CALL_EXPR_ARG (exp, i);
-      rtx op = expand_normal (arg);
+      rtx op = ix86_expand_unsigned_small_int_cst_argument (arg);
       machine_mode mode = insn_p->operand[i + 1].mode;
       /* Need to fixup modeless constant before testing predicate.  */
       op = fixup_modeless_constant (op, mode);
@@ -12290,7 +12703,7 @@ ix86_expand_sse_comi_round (const struct builtin_description *d,
     case ORDERED:
       if (!ordered)
 	{
-	  if (TARGET_AVX10_2_256 && comx_ok)
+	  if (TARGET_AVX10_2 && comx_ok)
 	    {
 	      /* Unlike VCOMI{SH,SS,SD}, VCOMX{SH,SS,SD} will set SF
 		 differently. So directly return true here.  */
@@ -12318,7 +12731,7 @@ ix86_expand_sse_comi_round (const struct builtin_description *d,
     case UNORDERED:
       if (ordered)
 	{
-	  if (TARGET_AVX10_2_256 && comx_ok)
+	  if (TARGET_AVX10_2 && comx_ok)
 	    {
 	      /* Unlike VCOMI{SH,SS,SD}, VCOMX{SH,SS,SD} will set SF
 		 differently. So directly return false here.  */
@@ -12365,20 +12778,20 @@ ix86_expand_sse_comi_round (const struct builtin_description *d,
       break;
       /* NB: COMI/UCOMI will set ZF with NAN operands.  Use CCZmode for
 	 _CMP_EQ_OQ/_CMP_EQ_OS.
-	 Under TARGET_AVX10_2_256, VCOMX/VUCOMX are always generated instead
+	 Under TARGET_AVX10_2, VCOMX/VUCOMX are always generated instead
 	 of COMI/UCOMI, VCOMX/VUCOMX will not set ZF with NAN.  */
     case EQ:
-      if (!TARGET_AVX10_2_256 || !comx_ok)
+      if (!TARGET_AVX10_2 || !comx_ok)
 	check_unordered = true;
       mode = CCZmode;
       break;
     case NE:
       /* NB: COMI/UCOMI will set ZF with NAN operands.  Use CCZmode for
 	 _CMP_NEQ_UQ/_CMP_NEQ_US.
-	 Under TARGET_AVX10_2_256, VCOMX/VUCOMX are always generated instead
+	 Under TARGET_AVX10_2, VCOMX/VUCOMX are always generated instead
 	 of COMI/UCOMI, VCOMX/VUCOMX will not set ZF with NAN.  */
       gcc_assert (!ordered);
-      if (!TARGET_AVX10_2_256 || !comx_ok)
+      if (!TARGET_AVX10_2 || !comx_ok)
 	check_unordered = true;
       mode = CCZmode;
       const_val = const1_rtx;
@@ -12401,7 +12814,7 @@ ix86_expand_sse_comi_round (const struct builtin_description *d,
     /* Generate comx instead of comi when EQ/NE to avoid NAN checks.
        Use orig_comp to exclude ORDERED/UNORDERED cases.  */
   if ((orig_comp == EQ || orig_comp == NE)
-      && TARGET_AVX10_2_256 && comx_ok)
+      && TARGET_AVX10_2 && comx_ok)
     {
       switch (icode)
 	{
@@ -12422,7 +12835,7 @@ ix86_expand_sse_comi_round (const struct builtin_description *d,
 
   /* Generate comi instead of comx when UNEQ/LTGT to avoid NAN checks.  */
   if ((comparison == UNEQ || comparison == LTGT)
-       && TARGET_AVX10_2_256 && comx_ok)
+       && TARGET_AVX10_2 && comx_ok)
     {
       switch (icode)
 	{
@@ -12525,7 +12938,6 @@ ix86_expand_round_builtin (const struct builtin_description *d,
       nargs = 2;
       break;
     case V32HF_FTYPE_V32HF_V32HF_INT:
-    case V16HF_FTYPE_V16HF_V16HF_INT:
     case V8HF_FTYPE_V8HF_V8HF_INT:
     case V8HF_FTYPE_V8HF_INT_INT:
     case V8HF_FTYPE_V8HF_UINT_INT:
@@ -12563,37 +12975,14 @@ ix86_expand_round_builtin (const struct builtin_description *d,
     case V16SI_FTYPE_V16SF_V16SI_HI_INT:
     case V16SI_FTYPE_V16SF_V16SI_UHI_INT:
     case V16SI_FTYPE_V16HF_V16SI_UHI_INT:
-    case V16HF_FTYPE_V16HF_V16HF_V16HF_INT:
     case V16HF_FTYPE_V16SI_V16HF_UHI_INT:
-    case V16HI_FTYPE_V16HF_V16HI_UHI_INT:
     case V8DF_FTYPE_V8SF_V8DF_QI_INT:
     case V16SF_FTYPE_V16HI_V16SF_HI_INT:
-    case V8SF_FTYPE_V8SF_V8SF_UQI_INT:
-    case V8SF_FTYPE_V8SI_V8SF_UQI_INT:
-    case V8SF_FTYPE_V8HF_V8SF_UQI_INT:
-    case V8SI_FTYPE_V8SF_V8SI_UQI_INT:
-    case V8SI_FTYPE_V8HF_V8SI_UQI_INT:
-    case V4DF_FTYPE_V4DF_V4DF_UQI_INT:
-    case V4DF_FTYPE_V4DI_V4DF_UQI_INT:
-    case V4DF_FTYPE_V4SF_V4DF_UQI_INT:
-    case V4DF_FTYPE_V8HF_V4DF_UQI_INT:
-    case V4DI_FTYPE_V8HF_V4DI_UQI_INT:
-    case V4DI_FTYPE_V4DF_V4DI_UQI_INT:
-    case V4DI_FTYPE_V4SF_V4DI_UQI_INT:
     case V2DF_FTYPE_V2DF_V2DF_V2DF_INT:
-    case V4SI_FTYPE_V4DF_V4SI_UQI_INT:
-    case V4SF_FTYPE_V4DF_V4SF_UQI_INT:
-    case V4SF_FTYPE_V4DI_V4SF_UQI_INT:
     case V4SF_FTYPE_V4SF_V4SF_V4SF_INT:
     case V8HF_FTYPE_V8DI_V8HF_UQI_INT:
     case V8HF_FTYPE_V8DF_V8HF_UQI_INT:
-    case V8HF_FTYPE_V8SF_V8HF_UQI_INT:
-    case V8HF_FTYPE_V8SI_V8HF_UQI_INT:
-    case V8HF_FTYPE_V4DF_V8HF_UQI_INT:
-    case V8HF_FTYPE_V4DI_V8HF_UQI_INT:
     case V16HF_FTYPE_V16SF_V16HF_UHI_INT:
-    case V16HF_FTYPE_V16HF_V16HF_UHI_INT:
-    case V16HF_FTYPE_V16HI_V16HF_UHI_INT:
     case V16HI_FTYPE_V16BF_V16HI_UHI_INT:
     case V8HF_FTYPE_V8HF_V8HF_V8HF_INT:
       nargs = 4;
@@ -12606,15 +12995,11 @@ ix86_expand_round_builtin (const struct builtin_description *d,
     case INT_FTYPE_V4SF_V4SF_INT_INT:
     case INT_FTYPE_V2DF_V2DF_INT_INT:
       return ix86_expand_sse_comi_round (d, exp, target, true);
-    case V4DF_FTYPE_V4DF_V4DF_V4DF_UQI_INT:
     case V8DF_FTYPE_V8DF_V8DF_V8DF_UQI_INT:
     case V2DF_FTYPE_V2DF_V2DF_V2DF_UQI_INT:
     case V4SF_FTYPE_V4SF_V4SF_V4SF_UQI_INT:
     case V4SF_FTYPE_V8HF_V4SF_V4SF_UQI_INT:
-    case V8SF_FTYPE_V8SF_V8SF_V8SF_UQI_INT:
     case V16SF_FTYPE_V16SF_V16SF_V16SF_HI_INT:
-    case V16HF_FTYPE_V16HF_V16HF_V16HF_UHI_INT:
-    case V16HF_FTYPE_V16HF_V16HF_V16HF_UQI_INT:
     case V32HF_FTYPE_V32HF_V32HF_V32HF_UHI_INT:
     case V32HF_FTYPE_V32HF_V32HF_V32HF_USI_INT:
     case V2DF_FTYPE_V8HF_V2DF_V2DF_UQI_INT:
@@ -12627,7 +13012,6 @@ ix86_expand_round_builtin (const struct builtin_description *d,
     case V8HF_FTYPE_V8HF_V8HF_V8HF_UQI_INT:
     case V8HF_FTYPE_V2DF_V8HF_V8HF_UQI_INT:
     case V8HF_FTYPE_V4SF_V8HF_V8HF_UQI_INT:
-    case V16HF_FTYPE_V8SF_V8SF_V16HF_UHI_INT:
     case V32HF_FTYPE_V16SF_V16SF_V32HF_USI_INT:
       nargs = 5;
       break;
@@ -12636,18 +13020,12 @@ ix86_expand_round_builtin (const struct builtin_description *d,
     case V8DF_FTYPE_V8DF_INT_V8DF_QI_INT:
     case V8DF_FTYPE_V8DF_INT_V8DF_UQI_INT:
     case V16SF_FTYPE_V16SF_INT_V16SF_UHI_INT:
-    case V16HF_FTYPE_V16HF_INT_V16HF_UHI_INT:
-    case V4DF_FTYPE_V4DF_INT_V4DF_UQI_INT:
-    case V8SF_FTYPE_V8SF_INT_V8SF_UQI_INT:
       nargs_constant = 4;
       nargs = 5;
       break;
     case UQI_FTYPE_V8DF_V8DF_INT_UQI_INT:
-    case UQI_FTYPE_V4DF_V4DF_INT_UQI_INT:
     case UQI_FTYPE_V2DF_V2DF_INT_UQI_INT:
     case UHI_FTYPE_V16SF_V16SF_INT_UHI_INT:
-    case UHI_FTYPE_V16HF_V16HF_INT_UHI_INT:
-    case UQI_FTYPE_V8SF_V8SF_INT_UQI_INT:
     case UQI_FTYPE_V4SF_V4SF_INT_UQI_INT:
     case USI_FTYPE_V32HF_V32HF_INT_USI_INT:
     case UQI_FTYPE_V8HF_V8HF_INT_UQI_INT:
@@ -12656,8 +13034,6 @@ ix86_expand_round_builtin (const struct builtin_description *d,
       break;
     case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_HI_INT:
     case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_QI_INT:
-    case V8SF_FTYPE_V8SF_V8SF_INT_V8SF_UQI_INT:
-    case V4DF_FTYPE_V4DF_V4DF_INT_V4DF_UQI_INT:
     case V4SF_FTYPE_V4SF_V4SF_INT_V4SF_QI_INT:
     case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_QI_INT:
     case V2DF_FTYPE_V2DF_V2DF_INT_V2DF_UQI_INT:
@@ -12665,15 +13041,12 @@ ix86_expand_round_builtin (const struct builtin_description *d,
     case V8HF_FTYPE_V8HF_V8HF_INT_V8HF_UQI_INT:
     case V8DF_FTYPE_V8DF_V8DF_INT_V8DF_UQI_INT:
     case V32HF_FTYPE_V32HF_V32HF_INT_V32HF_USI_INT:
-    case V16HF_FTYPE_V16HF_V16HF_INT_V16HF_UHI_INT:
     case V16SF_FTYPE_V16SF_V16SF_INT_V16SF_UHI_INT:
       nargs = 6;
       nargs_constant = 4;
       break;
     case V8DF_FTYPE_V8DF_V8DF_V8DI_INT_QI_INT:
-    case V4DF_FTYPE_V4DF_V4DF_V4DI_INT_UQI_INT:
     case V16SF_FTYPE_V16SF_V16SF_V16SI_INT_HI_INT:
-    case V8SF_FTYPE_V8SF_V8SF_V8SI_INT_UQI_INT:
     case V2DF_FTYPE_V2DF_V2DF_V2DI_INT_QI_INT:
     case V4SF_FTYPE_V4SF_V4SF_V4SI_INT_QI_INT:
       nargs = 6;
@@ -12693,7 +13066,7 @@ ix86_expand_round_builtin (const struct builtin_description *d,
   for (i = 0; i < nargs; i++)
     {
       tree arg = CALL_EXPR_ARG (exp, i);
-      rtx op = expand_normal (arg);
+      rtx op = ix86_expand_unsigned_small_int_cst_argument (arg);
       machine_mode mode = insn_p->operand[i + 1].mode;
       bool match = insn_p->operand[i + 1].predicate (op, mode);
 
@@ -12741,7 +13114,7 @@ ix86_expand_round_builtin (const struct builtin_description *d,
 	      /* Skip erasing embedded rounding for below expanders who
 		 generates multiple insns.  In ix86_erase_embedded_rounding
 		 the pattern will be transformed to a single set, and emit_insn
-		 appends the set insead of insert it to chain.  So the insns
+		 appends the set instead of insert it to chain.  So the insns
 		 emitted inside define_expander would be ignored.  */
 	      switch (icode)
 		{
@@ -12858,6 +13231,10 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
       klass = load;
       memory = 0;
       break;
+    case CHAR_FTYPE_PCCHAR:
+    case SHORT_FTYPE_PCSHORT:
+    case INT_FTYPE_PCINT:
+    case INT64_FTYPE_PCINT64:
     case UINT64_FTYPE_PUNSIGNED:
     case V2DI_FTYPE_PV2DI:
     case V4DI_FTYPE_PV4DI:
@@ -13174,7 +13551,7 @@ ix86_expand_special_args_builtin (const struct builtin_description *d,
       machine_mode mode = insn_p->operand[i + 1].mode;
 
       arg = CALL_EXPR_ARG (exp, i + arg_adjust);
-      op = expand_normal (arg);
+      op = ix86_expand_unsigned_small_int_cst_argument (arg);
 
       if (i == memory)
 	{
@@ -13434,9 +13811,9 @@ ix86_check_builtin_isa_match (unsigned int fcode,
   SHARE_BUILTIN (OPTION_MASK_ISA_AES, 0, OPTION_MASK_ISA_AVX512VL,
 		 OPTION_MASK_ISA2_VAES);
   SHARE_BUILTIN (0, OPTION_MASK_ISA2_AVXVNNIINT8, 0,
-		 OPTION_MASK_ISA2_AVX10_2_256);
+		 OPTION_MASK_ISA2_AVX10_2);
   SHARE_BUILTIN (0, OPTION_MASK_ISA2_AVXVNNIINT16, 0,
-		 OPTION_MASK_ISA2_AVX10_2_256);
+		 OPTION_MASK_ISA2_AVX10_2);
   isa = tmp_isa;
   isa2 = tmp_isa2;
 
@@ -13570,13 +13947,13 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
 
     case IX86_BUILTIN_LDMXCSR:
       op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
-      target = assign_386_stack_local (SImode, SLOT_TEMP);
+      target = assign_stack_temp (SImode, GET_MODE_SIZE (SImode));
       emit_move_insn (target, op0);
       emit_insn (gen_sse_ldmxcsr (target));
       return 0;
 
     case IX86_BUILTIN_STMXCSR:
-      target = assign_386_stack_local (SImode, SLOT_TEMP);
+      target = assign_stack_temp (SImode, GET_MODE_SIZE (SImode));
       emit_insn (gen_sse_stmxcsr (target));
       return copy_to_mode_reg (SImode, target);
 
@@ -13625,7 +14002,7 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
       if (!REG_P (op2))
 	op2 = copy_to_mode_reg (SImode, op2);
 
-      emit_insn (fcode == IX86_BUILTIN_MONITOR 
+      emit_insn (fcode == IX86_BUILTIN_MONITOR
 		 ? gen_sse3_monitor (Pmode, op0, op1, op2)
 		 : gen_monitorx (Pmode, op0, op1, op2));
       return 0;
@@ -13883,6 +14260,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
       op1 = expand_normal (arg1);
       op2 = expand_normal (arg2);
 
+      if (GET_MODE (op1) != Pmode)
+	op1 = convert_to_mode (Pmode, op1, 1);
+
       if (!address_operand (op2, VOIDmode))
 	{
 	  op2 = convert_memory_address (Pmode, op2);
@@ -13918,6 +14298,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
       emit_label (ok_label);
       emit_insn (gen_rtx_SET (target, pat));
 
+      if (GET_MODE (op0) != Pmode)
+	op0 = convert_to_mode (Pmode, op0, 1);
+
       for (i = 0; i < 8; i++)
 	{
 	  op = gen_rtx_MEM (V2DImode,
@@ -13942,6 +14325,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
 	if (!REG_P (op0))
 	  op0 = copy_to_mode_reg (SImode, op0);
 
+	if (GET_MODE (op2) != Pmode)
+	  op2 = convert_to_mode (Pmode, op2, 1);
+
 	op = gen_rtx_REG (V2DImode, GET_SSE_REGNO (0));
 	emit_move_insn (op, op1);
 
@@ -13979,6 +14365,9 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
 	if (!REG_P (op0))
 	  op0 = copy_to_mode_reg (SImode, op0);
 
+	if (GET_MODE (op3) != Pmode)
+	  op3 = convert_to_mode (Pmode, op3, 1);
+
 	/* Force to use xmm0, xmm1 for keylow, keyhi*/
 	op = gen_rtx_REG (V2DImode, GET_SSE_REGNO (0));
 	emit_move_insn (op, op1);
@@ -14021,9 +14410,16 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
 	    return const0_rtx;
 	  }
 
+	if (!IN_RANGE (INTVAL (op1), 0, 2))
+	  {
+	    warning (0, "invalid second argument to"
+		     " %<__builtin_ia32_prefetch%>; using zero");
+	    op1 = const0_rtx;
+	  }
+
 	if (INTVAL (op3) == 1)
 	  {
-	    if (INTVAL (op2) < 2 || INTVAL (op2) > 3)
+	    if (!IN_RANGE (INTVAL (op2), 2, 3))
 	      {
 		error ("invalid third argument");
 		return const0_rtx;
@@ -14043,20 +14439,26 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
 	  }
 	else
 	  {
+	    if (INTVAL (op3) != 0)
+	      warning (0, "invalid forth argument to"
+			  " %<__builtin_ia32_prefetch%>; using zero");
+
 	    if (!address_operand (op0, VOIDmode))
 	      {
 		op0 = convert_memory_address (Pmode, op0);
 		op0 = copy_addr_to_reg (op0);
 	      }
 
-	    if (INTVAL (op2) < 0 || INTVAL (op2) > 3)
+	    if (!IN_RANGE (INTVAL (op2), 0, 3))
 	      {
 		warning (0, "invalid third argument to %<__builtin_ia32_prefetch%>; using zero");
 		op2 = const0_rtx;
 	      }
 
-	    if (TARGET_3DNOW || TARGET_PREFETCH_SSE
-		|| TARGET_PRFCHW)
+	    if (TARGET_3DNOW
+		|| TARGET_PREFETCH_SSE
+		|| TARGET_PRFCHW
+		|| TARGET_MOVRS)
 	      emit_insn (gen_prefetch (op0, op1, op2));
 	    else if (!MEM_P (op0) && side_effects_p (op0))
 	      /* Don't do anything with direct references to volatile memory,
@@ -15293,7 +15695,7 @@ rdseed_step:
       op0 = expand_normal (arg0);
       op1 = expand_normal (arg1);
       op2 = expand_normal (arg2);
-      op3 = expand_normal (arg3);
+      op3 = ix86_expand_unsigned_small_int_cst_argument (arg3);
       op4 = expand_normal (arg4);
       /* Note the arg order is different from the operand order.  */
       mode0 = insn_data[icode].operand[1].mode;
@@ -15508,7 +15910,7 @@ rdseed_step:
       arg3 = CALL_EXPR_ARG (exp, 3);
       arg4 = CALL_EXPR_ARG (exp, 4);
       op0 = expand_normal (arg0);
-      op1 = expand_normal (arg1);
+      op1 = ix86_expand_unsigned_small_int_cst_argument (arg1);
       op2 = expand_normal (arg2);
       op3 = expand_normal (arg3);
       op4 = expand_normal (arg4);
@@ -15740,12 +16142,12 @@ rdseed_step:
 	  case IX86_BUILTIN_RDPID:
 	    return ix86_expand_special_args_builtin (bdesc_args + i, exp,
 						     target);
-	  case IX86_BUILTIN_VCOMSBF16EQ:
-	  case IX86_BUILTIN_VCOMSBF16NE:
-	  case IX86_BUILTIN_VCOMSBF16GT:
-	  case IX86_BUILTIN_VCOMSBF16GE:
-	  case IX86_BUILTIN_VCOMSBF16LT:
-	  case IX86_BUILTIN_VCOMSBF16LE:
+	  case IX86_BUILTIN_VCOMISBF16EQ:
+	  case IX86_BUILTIN_VCOMISBF16NE:
+	  case IX86_BUILTIN_VCOMISBF16GT:
+	  case IX86_BUILTIN_VCOMISBF16GE:
+	  case IX86_BUILTIN_VCOMISBF16LT:
+	  case IX86_BUILTIN_VCOMISBF16LE:
 	    return ix86_expand_sse_comi (bdesc_args + i, exp, target, false);
 	  case IX86_BUILTIN_FABSQ:
 	  case IX86_BUILTIN_COPYSIGNQ:
@@ -15928,7 +16330,7 @@ static const ix86_vec_bcast_map_simode_t ix86_vec_bcast_map_simode[] = {
 };
 
 /* Comparator for bsearch on ix86_vec_bcast_map.  */
-static int 
+static int
 ix86_vec_bcast_map_simode_cmp (const void *key, const void *entry)
 {
   return (*(const unsigned int*)key)
@@ -15957,7 +16359,7 @@ ix86_vector_duplicate_simode_const (machine_mode mode, rtx target,
     {
     case VEC_BCAST_PXOR:
       if ((mode == V8SImode && !TARGET_AVX2)
-	  || (mode == V16SImode && !(TARGET_AVX512F && TARGET_EVEX512)))
+	  || (mode == V16SImode && !TARGET_AVX512F))
 	return false;
       emit_move_insn (target, CONST0_RTX (mode));
       return true;
@@ -15965,7 +16367,7 @@ ix86_vector_duplicate_simode_const (machine_mode mode, rtx target,
     case VEC_BCAST_PCMPEQ:
       if ((mode == V4SImode && !TARGET_SSE2)
 	  || (mode == V8SImode && !TARGET_AVX2)
-	  || (mode == V16SImode && !(TARGET_AVX512F && TARGET_EVEX512)))
+	  || (mode == V16SImode && !TARGET_AVX512F))
 	return false;
       emit_move_insn (target, CONSTM1_RTX (mode));
       return true;
@@ -15985,7 +16387,7 @@ ix86_vector_duplicate_simode_const (machine_mode mode, rtx target,
 	  tmp2 = gen_reg_rtx (V32QImode);
 	  emit_insn (gen_absv32qi2 (tmp2, tmp1));
 	}
-      else if (mode == V16SImode && TARGET_AVX512BW && TARGET_EVEX512)
+      else if (mode == V16SImode && TARGET_AVX512BW)
 	{
 	  tmp1 = gen_reg_rtx (V64QImode);
 	  emit_move_insn (tmp1, CONSTM1_RTX (V64QImode));
@@ -16011,7 +16413,7 @@ ix86_vector_duplicate_simode_const (machine_mode mode, rtx target,
 	  tmp2 = gen_reg_rtx (V32QImode);
 	  emit_insn (gen_addv32qi3 (tmp2, tmp1, tmp1));
 	}
-      else if (mode == V16SImode && TARGET_AVX512BW && TARGET_EVEX512)
+      else if (mode == V16SImode && TARGET_AVX512BW)
 	{
 	  tmp1 = gen_reg_rtx (V64QImode);
 	  emit_move_insn (tmp1, CONSTM1_RTX (V64QImode));
@@ -16037,7 +16439,7 @@ ix86_vector_duplicate_simode_const (machine_mode mode, rtx target,
 	  tmp2 = gen_reg_rtx (V16HImode);
 	  emit_insn (gen_lshrv16hi3 (tmp2, tmp1, GEN_INT (entry->arg)));
 	}
-      else if (mode == V16SImode && TARGET_AVX512BW && TARGET_EVEX512)
+      else if (mode == V16SImode && TARGET_AVX512BW)
 	{
 	  tmp1 = gen_reg_rtx (V32HImode);
 	  emit_move_insn (tmp1, CONSTM1_RTX (V32HImode));
@@ -16063,7 +16465,7 @@ ix86_vector_duplicate_simode_const (machine_mode mode, rtx target,
 	  emit_insn (gen_lshrv8si3 (target, tmp1, GEN_INT (entry->arg)));
 	  return true;
 	}
-      else if (mode == V16SImode && TARGET_AVX512F && TARGET_EVEX512)
+      else if (mode == V16SImode && TARGET_AVX512F)
 	{
 	  tmp1 = gen_reg_rtx (V16SImode);
 	  emit_move_insn (tmp1, CONSTM1_RTX (V16SImode));
@@ -16089,7 +16491,7 @@ ix86_vector_duplicate_simode_const (machine_mode mode, rtx target,
 	  tmp2 = gen_reg_rtx (V16HImode);
 	  emit_insn (gen_ashlv16hi3 (tmp2, tmp1, GEN_INT (entry->arg)));
 	}
-      else if (mode == V16SImode && TARGET_AVX512BW && TARGET_EVEX512)
+      else if (mode == V16SImode && TARGET_AVX512BW)
 	{
 	  tmp1 = gen_reg_rtx (V32HImode);
 	  emit_move_insn (tmp1, CONSTM1_RTX (V32HImode));
@@ -16115,7 +16517,7 @@ ix86_vector_duplicate_simode_const (machine_mode mode, rtx target,
 	  emit_insn (gen_ashlv8si3 (target, tmp1, GEN_INT (entry->arg)));
 	  return true;
 	}
-      else if (mode == V16SImode && TARGET_AVX512F && TARGET_EVEX512)
+      else if (mode == V16SImode && TARGET_AVX512F)
 	{
 	  tmp1 = gen_reg_rtx (V16SImode);
 	  emit_move_insn (tmp1, CONSTM1_RTX (V16SImode));
@@ -16169,8 +16571,7 @@ ix86_vector_duplicate_value (machine_mode mode, rtx target, rtx val)
       if (GET_MODE (reg) != innermode)
 	reg = gen_lowpart (innermode, reg);
       SET_SRC (PATTERN (insn)) = gen_vec_duplicate (mode, reg);
-      seq = get_insns ();
-      end_sequence ();
+      seq = end_sequence ();
       if (seq)
 	emit_insn_before (seq, insn);
 
@@ -16486,7 +16887,6 @@ ix86_expand_vector_init_duplicate (bool mmx_ok, machine_mode mode,
 
     case E_V32HFmode:
     case E_V32BFmode:
-      gcc_assert (TARGET_EVEX512);
       if (TARGET_AVX512BW)
 	return ix86_vector_duplicate_value (mode, target, val);
       else
@@ -16539,9 +16939,6 @@ ix86_expand_vector_init_one_nonzero (bool mmx_ok, machine_mode mode,
   bool use_vector_set = false;
   rtx (*gen_vec_set_0) (rtx, rtx, rtx) = NULL;
 
-  if (GET_MODE_SIZE (mode) == 64 && !TARGET_EVEX512)
-    return false;
-
   switch (mode)
     {
     case E_V2DImode:
@@ -18096,6 +18493,8 @@ quarter:
   else if (use_vec_merge)
     {
 do_vec_merge:
+      if (!nonimmediate_operand (val, inner_mode))
+	val = force_reg (inner_mode, val);
       tmp = gen_rtx_VEC_DUPLICATE (mode, val);
       tmp = gen_rtx_VEC_MERGE (mode, tmp, target,
 			       GEN_INT (HOST_WIDE_INT_1U << elt));
@@ -18495,6 +18894,33 @@ emit_reduc_half (rtx dest, rtx src, int i)
     case E_V8HFmode:
     case E_V4SImode:
     case E_V2DImode:
+      if (TARGET_SSE_REDUCTION_PREFER_PSHUF)
+	{
+	  if (i == 128)
+	    {
+	      d = gen_reg_rtx (V4SImode);
+	      tem = gen_sse2_pshufd_1 (
+		  d, force_reg (V4SImode, gen_lowpart (V4SImode, src)),
+		  GEN_INT (2), GEN_INT (3), GEN_INT (2), GEN_INT (3));
+	      break;
+	    }
+	  else if (i == 64)
+	    {
+	      d = gen_reg_rtx (V4SImode);
+	      tem = gen_sse2_pshufd_1 (
+		  d, force_reg (V4SImode, gen_lowpart (V4SImode, src)),
+		  GEN_INT (1), GEN_INT (1), GEN_INT (1), GEN_INT (1));
+	      break;
+	    }
+	  else if (i == 32)
+	    {
+	      d = gen_reg_rtx (V8HImode);
+	      tem = gen_sse2_pshuflw_1 (
+		  d, force_reg (V8HImode, gen_lowpart (V8HImode, src)),
+		  GEN_INT (1), GEN_INT (1), GEN_INT (1), GEN_INT (1));
+	      break;
+	    }
+	}
       d = gen_reg_rtx (V1TImode);
       tem = gen_sse2_lshrv1ti3 (d, gen_lowpart (V1TImode, src),
 				GEN_INT (i / 2));
@@ -19081,8 +19507,6 @@ ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
   e1 = gen_reg_rtx (mode);
   x1 = gen_reg_rtx (mode);
 
-  /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
-
   b = force_reg (mode, b);
 
   /* x0 = rcp(b) estimate */
@@ -19095,20 +19519,42 @@ ix86_emit_swdivsf (rtx res, rtx a, rtx b, machine_mode mode)
     emit_insn (gen_rtx_SET (x0, gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
 						UNSPEC_RCP)));
 
-  /* e0 = x0 * b */
-  emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b)));
+  unsigned vector_size = GET_MODE_SIZE (mode);
+
+  /* (a - (rcp(b) * a * b)) * rcp(b) + rcp(b) * a
+     N-R step with 2 fma implementation.  */
+  if (TARGET_FMA
+      || (TARGET_AVX512F && vector_size == 64)
+      || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)))
+    {
+      /* e0 = x0 * a  */
+      emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, a)));
+      /* e1 = e0 * b - a  */
+      emit_insn (gen_rtx_SET (e1, gen_rtx_FMA (mode, e0, b,
+					       gen_rtx_NEG (mode, a))));
+      /* res = - e1 * x0 + e0  */
+      emit_insn (gen_rtx_SET (res, gen_rtx_FMA (mode,
+					       gen_rtx_NEG (mode, e1),
+					       x0, e0)));
+    }
+  else
+    /* a / b = a * ((rcp(b) + rcp(b)) - (b * rcp(b) * rcp (b))) */
+    {
+      /* e0 = x0 * b */
+      emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, b)));
 
-  /* e0 = x0 * e0 */
-  emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0)));
+      /* e1 = x0 + x0 */
+      emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0)));
 
-  /* e1 = x0 + x0 */
-  emit_insn (gen_rtx_SET (e1, gen_rtx_PLUS (mode, x0, x0)));
+      /* e0 = x0 * e0 */
+      emit_insn (gen_rtx_SET (e0, gen_rtx_MULT (mode, x0, e0)));
 
-  /* x1 = e1 - e0 */
-  emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0)));
+      /* x1 = e1 - e0 */
+      emit_insn (gen_rtx_SET (x1, gen_rtx_MINUS (mode, e1, e0)));
 
-  /* res = a * x1 */
-  emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1)));
+      /* res = a * x1 */
+      emit_insn (gen_rtx_SET (res, gen_rtx_MULT (mode, a, x1)));
+    }
 }
 
 /* Output code to perform a Newton-Rhapson approximation of a
@@ -19181,7 +19627,7 @@ ix86_emit_swsqrtsf (rtx res, rtx a, machine_mode mode, bool recip)
 
   unsigned vector_size = GET_MODE_SIZE (mode);
   if (TARGET_FMA
-      || (TARGET_AVX512F && TARGET_EVEX512 && vector_size == 64)
+      || (TARGET_AVX512F && vector_size == 64)
       || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16)))
     emit_insn (gen_rtx_SET (e2,
 			    gen_rtx_FMA (mode, e0, x0, mthree)));
@@ -21843,8 +22289,7 @@ expand_vec_perm_interleave2 (struct expand_vec_perm_d *d)
      V4SImode this *will* succeed.  For V8HImode or V16QImode it may not.  */
   start_sequence ();
   ok = expand_vec_perm_1 (&dfinal);
-  seq = get_insns ();
-  end_sequence ();
+  seq = end_sequence ();
 
   if (!ok)
     return false;
@@ -22180,8 +22625,7 @@ expand_vec_perm_vperm2f128_vblend (struct expand_vec_perm_d *d)
 
   start_sequence ();
   ok = expand_vec_perm_1 (&dfirst);
-  seq = get_insns ();
-  end_sequence ();
+  seq = end_sequence ();
 
   if (!ok)
     return false;
@@ -22289,8 +22733,7 @@ expand_vec_perm_2perm_interleave (struct expand_vec_perm_d *d, bool two_insn)
     {
       start_sequence ();
       ok = expand_vec_perm_1 (&dfirst);
-      seq1 = get_insns ();
-      end_sequence ();
+      seq1 = end_sequence ();
 
       if (!ok)
 	return false;
@@ -22300,8 +22743,7 @@ expand_vec_perm_2perm_interleave (struct expand_vec_perm_d *d, bool two_insn)
     {
       start_sequence ();
       ok = expand_vec_perm_1 (&dsecond);
-      seq2 = get_insns ();
-      end_sequence ();
+      seq2 = end_sequence ();
 
       if (!ok)
 	return false;
@@ -22415,8 +22857,7 @@ expand_vec_perm_2perm_pblendv (struct expand_vec_perm_d *d, bool two_insn)
     {
       start_sequence ();
       ok = expand_vec_perm_1 (&dfirst);
-      seq1 = get_insns ();
-      end_sequence ();
+      seq1 = end_sequence ();
 
       if (!ok)
 	return false;
@@ -22426,8 +22867,7 @@ expand_vec_perm_2perm_pblendv (struct expand_vec_perm_d *d, bool two_insn)
     {
       start_sequence ();
       ok = expand_vec_perm_1 (&dsecond);
-      seq2 = get_insns ();
-      end_sequence ();
+      seq2 = end_sequence ();
 
       if (!ok)
 	return false;
@@ -22621,8 +23061,7 @@ expand_vec_perm2_vperm2f128_vblend (struct expand_vec_perm_d *d)
   canonicalize_perm (&dfirst);
   start_sequence ();
   ok = ix86_expand_vec_perm_const_1 (&dfirst);
-  seq1 = get_insns ();
-  end_sequence ();
+  seq1 = end_sequence ();
 
   if (!ok)
     return false;
@@ -22630,8 +23069,7 @@ expand_vec_perm2_vperm2f128_vblend (struct expand_vec_perm_d *d)
   canonicalize_perm (&dsecond);
   start_sequence ();
   ok = ix86_expand_vec_perm_const_1 (&dsecond);
-  seq2 = get_insns ();
-  end_sequence ();
+  seq2 = end_sequence ();
 
   if (!ok)
     return false;
@@ -22897,6 +23335,53 @@ expand_vec_perm_vpshufb2_vpermq_even_odd (struct expand_vec_perm_d *d)
   return true;
 }
 
+/* A subroutine of ix86_expand_vec_perm_const_1. Try to implement a
+   permutation (which is a bland) with and, andnot and or when pshufb is not available.
+
+   It handles case:
+   __builtin_shufflevector (v1, v2, 0, 9, 2, 11, 4, 13, 6, 15);
+   __builtin_shufflevector (v1, v2, 8, 1, 2, 11, 4, 13, 6, 15);
+
+   An element[i] must be chosen between op0[i] and op1[i] to satisfy the
+   requirement.
+ */
+
+static bool
+expand_vec_perm_pand_pandn_por (struct expand_vec_perm_d *d)
+{
+  rtx rperm[16], vperm;
+  unsigned int i, nelt = d->nelt;
+
+  if (!TARGET_SSE2
+      || d->one_operand_p
+      || (d->vmode != V16QImode && d->vmode != V8HImode))
+    return false;
+
+  if (d->perm[0] != 0)
+    return false;
+
+  /* The dest[i] must select an element between op0[i] and op1[i].  */
+  for (i = 1; i < nelt; i++)
+    if ((d->perm[i] % nelt) != i)
+      return false;
+
+  if (d->testing_p)
+     return true;
+
+  /* Generates a blend mask for the operators AND and ANDNOT.  */
+  machine_mode inner_mode = GET_MODE_INNER (d->vmode);
+  for (i = 0; i < nelt; i++)
+    rperm[i] = (d->perm[i] <  nelt) ? CONSTM1_RTX (inner_mode)
+      : CONST0_RTX (inner_mode);
+
+  vperm = gen_rtx_CONST_VECTOR (d->vmode, gen_rtvec_v (nelt, rperm));
+  vperm = force_reg (d->vmode, vperm);
+
+  ix86_expand_sse_movcc (d->target, vperm, d->op0, d->op1);
+
+  return true;
+}
+
 /* Implement permutation with pslldq + psrldq + por when pshufb is not
    available.  */
 static bool
@@ -23630,7 +24115,7 @@ expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
       if (d->testing_p)
 	return true;
 
-      rtx (*gen_interleave) (machine_mode, int, rtx, rtx, rtx);
+      rtx (*gen_interleave) (machine_mode, rtx, rtx, rtx);
       if (elt >= nelt2)
 	{
 	  gen_interleave = gen_vec_interleave_high;
@@ -23641,7 +24126,7 @@ expand_vec_perm_broadcast_1 (struct expand_vec_perm_d *d)
       nelt2 /= 2;
 
       dest = gen_reg_rtx (vmode);
-      emit_insn (gen_interleave (vmode, 1, dest, op0, op0));
+      emit_insn (gen_interleave (vmode, dest, op0, op0));
 
       vmode = V4SImode;
       op0 = gen_lowpart (vmode, dest);
@@ -23956,6 +24441,9 @@ ix86_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
   if (expand_vec_perm_psrlw_psllw_por (d))
     return true;
 
+  if (expand_vec_perm_pand_pandn_por (d))
+    return true;
+
   /* Try sequences of four instructions.  */
 
   if (expand_vec_perm_even_odd_trunc (d))
@@ -24065,9 +24553,6 @@ ix86_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
   unsigned int i, nelt, which;
   bool two_args;
 
-  if (GET_MODE_SIZE (vmode) == 64 && !TARGET_EVEX512)
-    return false;
-
   /* For HF and BF mode vector, convert it to HI using subreg.  */
   if (GET_MODE_INNER (vmode) == HFmode || GET_MODE_INNER (vmode) == BFmode)
     {
@@ -24607,11 +25092,8 @@ ix86_expand_vecop_qihi2 (enum rtx_code code, rtx dest, rtx op1, rtx op2)
      generic permutation to merge the data back into the right place.  This
      permutation results in VPERMQ, which is slow, so better fall back to
      ix86_expand_vecop_qihi.  */
-  if (!TARGET_AVX512BW)
-    return false;
-
-  if ((qimode == V16QImode && !TARGET_AVX2)
-      || (qimode == V32QImode && (!TARGET_AVX512BW || !TARGET_EVEX512))
+  if (!TARGET_AVX512BW
+      || (qimode == V16QImode && !TARGET_AVX512VL)
       /* There are no V64HImode instructions.  */
       || qimode == V64QImode)
      return false;
@@ -24626,8 +25108,7 @@ ix86_expand_vecop_qihi2 (enum rtx_code code, rtx dest, rtx op1, rtx op2)
     {
     case E_V16QImode:
       himode = V16HImode;
-      if (TARGET_AVX512VL && TARGET_AVX512BW)
-	gen_truncate = gen_truncv16hiv16qi2;
+      gen_truncate = gen_truncv16hiv16qi2;
       break;
     case E_V32QImode:
       himode = V32HImode;
@@ -24669,33 +25150,7 @@ ix86_expand_vecop_qihi2 (enum rtx_code code, rtx dest, rtx op1, rtx op2)
     hdest = expand_simple_binop (himode, code, hop1, hop2,
 				 NULL_RTX, 1, OPTAB_DIRECT);
 
-  if (gen_truncate)
-    emit_insn (gen_truncate (dest, hdest));
-  else
-    {
-      struct expand_vec_perm_d d;
-      rtx wqdest = gen_reg_rtx (wqimode);
-      rtx wqres = gen_lowpart (wqimode, hdest);
-      bool ok;
-      int i;
-
-      /* Merge the data back into the right place.  */
-      d.target = wqdest;
-      d.op0 = d.op1 = wqres;
-      d.vmode = wqimode;
-      d.nelt = GET_MODE_NUNITS (wqimode);
-      d.one_operand_p = false;
-      d.testing_p = false;
-
-      for (i = 0; i < d.nelt; ++i)
-	d.perm[i] = i * 2;
-
-      ok = ix86_expand_vec_perm_const_1 (&d);
-      gcc_assert (ok);
-
-      emit_move_insn (dest, gen_lowpart (qimode, wqdest));
-    }
-
+  emit_insn (gen_truncate (dest, hdest));
   return true;
 }
 
@@ -25107,7 +25562,7 @@ ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
   machine_mode mode = GET_MODE (op0);
   rtx t1, t2, t3, t4, t5, t6;
 
-  if (TARGET_AVX512DQ && TARGET_EVEX512 && mode == V8DImode)
+  if (TARGET_AVX512DQ && mode == V8DImode)
     emit_insn (gen_avx512dq_mulv8di3 (op0, op1, op2));
   else if (TARGET_AVX512DQ && TARGET_AVX512VL && mode == V4DImode)
     emit_insn (gen_avx512dq_mulv4di3 (op0, op1, op2));
@@ -25142,7 +25597,7 @@ ix86_expand_sse2_mulvxdi3 (rtx op0, rtx op1, rtx op2)
 
       /* Multiply lower parts and add all */
       t5 = gen_reg_rtx (V2DImode);
-      emit_insn (gen_vec_widen_umult_even_v4si (t5, 
+      emit_insn (gen_vec_widen_umult_even_v4si (t5,
 					gen_lowpart (V4SImode, op1),
 					gen_lowpart (V4SImode, op2)));
       force_expand_binop (mode, add_optab, t5, t4, op0, 1, OPTAB_DIRECT);
@@ -25337,7 +25792,7 @@ ix86_expand_pextr (rtx *operands)
 	return false;
       dst = SUBREG_REG (dst);
     }
-	
+
   if (SUBREG_P (src))
     {
       pos += SUBREG_BYTE (src) * BITS_PER_UNIT;
@@ -25573,7 +26028,7 @@ ix86_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
 			    rtx op0, rtx op1,
 			    rtx *quot_p, rtx *rem_p)
 {
-  rtx rem = assign_386_stack_local (mode, SLOT_TEMP);
+  rtx rem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
 
   rtx quot = emit_library_call_value (libfunc, NULL_RTX, LCT_NORMAL,
 				      mode, op0, mode, op1, mode,
@@ -25837,8 +26292,7 @@ ix86_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn **gen_seq,
 	}
     }
 
-  *prep_seq = get_insns ();
-  end_sequence ();
+  *prep_seq = end_sequence ();
 
   start_sequence ();
 
@@ -25849,8 +26303,7 @@ ix86_gen_ccmp_first (rtx_insn **prep_seq, rtx_insn **gen_seq,
       end_sequence ();
       return NULL_RTX;
     }
-  *gen_seq = get_insns ();
-  end_sequence ();
+  *gen_seq = end_sequence ();
 
   return res;
 }
@@ -25893,8 +26346,7 @@ ix86_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev,
       return NULL_RTX;
     }
 
-  *prep_seq = get_insns ();
-  end_sequence ();
+  *prep_seq = end_sequence ();
 
   target = gen_rtx_REG (cc_mode, FLAGS_REG);
   dfv = ix86_get_flags_cc ((rtx_code) cmp_code);
@@ -25925,8 +26377,7 @@ ix86_gen_ccmp_next (rtx_insn **prep_seq, rtx_insn **gen_seq, rtx prev,
       return NULL_RTX;
     }
 
-  *gen_seq = get_insns ();
-  end_sequence ();
+  *gen_seq = end_sequence ();
 
   return gen_rtx_fmt_ee ((rtx_code) cmp_code, VOIDmode, target, const0_rtx);
 }
@@ -25940,8 +26391,7 @@ ix86_gen_bcst_mem (machine_mode mode, rtx x)
 {
   if (!TARGET_AVX512F
       || !CONST_VECTOR_P (x)
-      || (!TARGET_AVX512VL
-	  && (GET_MODE_SIZE (mode) != 64 || !TARGET_EVEX512))
+      || (!TARGET_AVX512VL && GET_MODE_SIZE (mode) != 64)
       || !VALID_BCST_MODE_P (GET_MODE_INNER (mode))
 	 /* Disallow HFmode broadcast.  */
       || GET_MODE_SIZE (GET_MODE_INNER (mode)) < 4)
@@ -25952,7 +26402,7 @@ ix86_gen_bcst_mem (machine_mode mode, rtx x)
       && !CONST_DOUBLE_P (cst)
       && !CONST_FIXED_P (cst))
     return NULL_RTX;
-  
+
   int n_elts = GET_MODE_NUNITS (mode);
   if (CONST_VECTOR_NUNITS (x) != n_elts)
     return NULL_RTX;
@@ -26120,7 +26570,7 @@ do_mem_operand:
 
 /* Return TRUE if OP (in mode MODE) is the leaf of a ternary logic
    expression, such as a register or a memory reference.  */
- 
+
 bool
 ix86_ternlog_leaf_p (rtx op, machine_mode mode)
 {
@@ -26309,7 +26759,7 @@ ix86_expand_ternlog (machine_mode mode, rtx op0, rtx op1, rtx op2, int idx,
 	  return target;
 	}
       break;
-  
+
     case 0x22: /* ~b&c */
       if ((!op0 || !side_effects_p (op0))
 	  && op1 && register_operand (op1, mode)
@@ -26382,7 +26832,7 @@ ix86_expand_ternlog (machine_mode mode, rtx op0, rtx op1, rtx op2, int idx,
 	  return target;
 	}
       break;
-  
+
     case 0x5a:  /* a^c */
       if (op0 && ix86_ternlog_leaf_p (op0, mode)
 	  && op2 && ix86_ternlog_leaf_p (op2, mode)
@@ -26648,4 +27098,81 @@ ix86_expand_trunc_with_avx2_noavx512f (rtx output, rtx input, machine_mode cvt_m
   emit_move_insn (output, gen_lowpart (out_mode, d.target));
 }
 
+/* Implement truncv8sfv8bf2 with vector permutation.  */
+void
+ix86_expand_vector_sf2bf_with_vec_perm (rtx dest, rtx src)
+{
+  machine_mode vperm_mode, src_mode = GET_MODE (src);
+  switch (src_mode)
+    {
+    case V16SFmode:
+      vperm_mode = V32BFmode;
+      break;
+    case V8SFmode:
+      vperm_mode = V16BFmode;
+      break;
+    case V4SFmode:
+      vperm_mode = V8BFmode;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  int nelt = GET_MODE_NUNITS (vperm_mode);
+  vec_perm_builder sel (nelt, nelt, 1);
+  sel.quick_grow (nelt);
+  for (int i = 0; i != nelt; i++)
+    sel[i] = (2 * i + 1) % nelt;
+  vec_perm_indices indices (sel, 1, nelt);
+
+  rtx target = gen_reg_rtx (vperm_mode);
+  rtx op0 = lowpart_subreg (vperm_mode,
+			    force_reg (src_mode, src),
+			    src_mode);
+  bool ok = targetm.vectorize.vec_perm_const (vperm_mode, vperm_mode,
+					      target, op0, op0, indices);
+  gcc_assert (ok);
+  emit_move_insn (dest, lowpart_subreg (GET_MODE (dest), target, vperm_mode));
+}
+
+/* Implement extendv8bf2v8sf2 with vector permutation.  */
+void
+ix86_expand_vector_bf2sf_with_vec_perm (rtx dest, rtx src)
+{
+  machine_mode vperm_mode, src_mode = GET_MODE (src);
+  switch (src_mode)
+    {
+    case V16BFmode:
+      vperm_mode = V32BFmode;
+      break;
+    case V8BFmode:
+      vperm_mode = V16BFmode;
+      break;
+    case V4BFmode:
+      vperm_mode = V8BFmode;
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  int nelt = GET_MODE_NUNITS (vperm_mode);
+  vec_perm_builder sel (nelt, nelt, 1);
+  sel.quick_grow (nelt);
+  for (int i = 0, k = 0, j = nelt; i != nelt; i++)
+    sel[i] = i & 1 ? j++ : k++;
+
+  vec_perm_indices indices (sel, 2, nelt);
+
+  rtx target = gen_reg_rtx (vperm_mode);
+  rtx op1 = lowpart_subreg (vperm_mode,
+			    force_reg (src_mode, src),
+			    src_mode);
+  rtx op0 = CONST0_RTX (vperm_mode);
+  bool ok = targetm.vectorize.vec_perm_const (vperm_mode, vperm_mode,
+					      target, op0, op1, indices);
+  gcc_assert (ok);
+  emit_move_insn (dest, lowpart_subreg (GET_MODE (dest), target, vperm_mode));
+}
+
+
 #include "gt-i386-expand.h"
diff --git a/gcc/config/i386/i386-expand.h b/gcc/config/i386/i386-expand.h
index 56bee29..4cc4e20 100644
--- a/gcc/config/i386/i386-expand.h
+++ b/gcc/config/i386/i386-expand.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 1988-2024 Free Software Foundation, Inc.
+/* Copyright (C) 1988-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/i386/i386-features.cc b/gcc/config/i386/i386-features.cc
index ca902ec..054f8d5 100644
--- a/gcc/config/i386/i386-features.cc
+++ b/gcc/config/i386/i386-features.cc
@@ -1,4 +1,4 @@
-/* Copyright (C) 1988-2024 Free Software Foundation, Inc.
+/* Copyright (C) 1988-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -296,9 +296,8 @@ scalar_chain::scalar_chain (enum machine_mode smode_, enum machine_mode vmode_)
   insns_conv = BITMAP_ALLOC (NULL);
   queue = NULL;
 
-  n_sse_to_integer = 0;
-  n_integer_to_sse = 0;
-
+  cost_sse_integer = 0;
+  weighted_cost_sse_integer = 0 ;
   max_visits = x86_stv_max_visits;
 }
 
@@ -337,20 +336,52 @@ scalar_chain::mark_dual_mode_def (df_ref def)
   /* Record the def/insn pair so we can later efficiently iterate over
      the defs to convert on insns not in the chain.  */
   bool reg_new = bitmap_set_bit (defs_conv, DF_REF_REGNO (def));
+  basic_block bb = BLOCK_FOR_INSN (DF_REF_INSN (def));
+  profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
+  bool speed_p = optimize_bb_for_speed_p (bb);
+  int cost = 0;
+
   if (!bitmap_bit_p (insns, DF_REF_INSN_UID (def)))
     {
       if (!bitmap_set_bit (insns_conv, DF_REF_INSN_UID (def))
 	  && !reg_new)
 	return;
-      n_integer_to_sse++;
+
+      /* Cost integer to sse moves.  */
+      if (speed_p)
+	cost = COSTS_N_INSNS (ix86_cost->integer_to_sse) / 2;
+      else if (TARGET_64BIT || smode == SImode)
+	cost = COSTS_N_BYTES (4);
+      /* vmovd (4 bytes) + vpinsrd (6 bytes).  */
+      else if (TARGET_SSE4_1)
+	cost = COSTS_N_BYTES (10);
+      /* movd (4 bytes) + movd (4 bytes) + unpckldq (4 bytes).  */
+      else
+	cost = COSTS_N_BYTES (12);
     }
   else
     {
       if (!reg_new)
 	return;
-      n_sse_to_integer++;
+
+      /* Cost sse to integer moves.  */
+      if (speed_p)
+	cost = COSTS_N_INSNS (ix86_cost->sse_to_integer) / 2;
+      else if (TARGET_64BIT || smode == SImode)
+	cost = COSTS_N_BYTES (4);
+      /* vmovd (4 bytes) + vpextrd (6 bytes).  */
+      else if (TARGET_SSE4_1)
+	cost = COSTS_N_BYTES (10);
+      /* movd (4 bytes) + psrlq (5 bytes) + movd (4 bytes).  */
+      else
+	cost = COSTS_N_BYTES (13);
     }
- 
+
+  if (speed_p)
+    weighted_cost_sse_integer += bb->count.to_sreal_scale (entry_count) * cost;
+
+  cost_sse_integer += cost;
+
   if (dump_file)
     fprintf (dump_file,
 	     "  Mark r%d def in insn %d as requiring both modes in chain #%d\n",
@@ -518,26 +549,28 @@ scalar_chain::build (bitmap candidates, unsigned insn_uid, bitmap disallowed)
    instead of using a scalar one.  */
 
 int
-general_scalar_chain::vector_const_cost (rtx exp)
+general_scalar_chain::vector_const_cost (rtx exp, basic_block bb)
 {
   gcc_assert (CONST_INT_P (exp));
 
   if (standard_sse_constant_p (exp, vmode))
     return ix86_cost->sse_op;
+  if (optimize_bb_for_size_p (bb))
+    return COSTS_N_BYTES (8);
   /* We have separate costs for SImode and DImode, use SImode costs
      for smaller modes.  */
-  return ix86_cost->sse_load[smode == DImode ? 1 : 0];
+  return COSTS_N_INSNS (ix86_cost->sse_load[smode == DImode ? 1 : 0]) / 2;
 }
 
-/* Compute a gain for chain conversion.  */
+/* Return true if it's cost profitable for chain conversion.  */
 
-int
+bool
 general_scalar_chain::compute_convert_gain ()
 {
   bitmap_iterator bi;
   unsigned insn_uid;
   int gain = 0;
-  int cost = 0;
+  sreal weighted_gain = 0;
 
   if (dump_file)
     fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id);
@@ -547,7 +580,7 @@ general_scalar_chain::compute_convert_gain ()
      smaller modes than SImode the int load/store costs need to be
      adjusted as well.  */
   unsigned sse_cost_idx = smode == DImode ? 1 : 0;
-  unsigned m = smode == DImode ? (TARGET_64BIT ? 1 : 2) : 1;
+  int m = smode == DImode ? (TARGET_64BIT ? 1 : 2) : 1;
 
   EXECUTE_IF_SET_IN_BITMAP (insns, 0, insn_uid, bi)
     {
@@ -555,26 +588,58 @@ general_scalar_chain::compute_convert_gain ()
       rtx def_set = single_set (insn);
       rtx src = SET_SRC (def_set);
       rtx dst = SET_DEST (def_set);
+      basic_block bb = BLOCK_FOR_INSN (insn);
       int igain = 0;
+      profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
+      bool speed_p = optimize_bb_for_speed_p (bb);
+      sreal bb_freq = bb->count.to_sreal_scale (entry_count);
 
       if (REG_P (src) && REG_P (dst))
-	igain += 2 * m - ix86_cost->xmm_move;
+	{
+	  if (!speed_p)
+	    /* reg-reg move is 2 bytes, while SSE 3.  */
+	    igain += COSTS_N_BYTES (2 * m - 3);
+	  else
+	    /* Move costs are normalized to reg-reg move having cost 2.  */
+	    igain += COSTS_N_INSNS (2 * m - ix86_cost->xmm_move) / 2;
+	}
       else if (REG_P (src) && MEM_P (dst))
-	igain
-	  += m * ix86_cost->int_store[2] - ix86_cost->sse_store[sse_cost_idx];
+	{
+	  if (!speed_p)
+	    /* Integer load/store is 3+ bytes and SSE 4+.  */
+	    igain += COSTS_N_BYTES (3 * m - 4);
+	  else
+	    igain
+	      += COSTS_N_INSNS (m * ix86_cost->int_store[2]
+				- ix86_cost->sse_store[sse_cost_idx]) / 2;
+	}
       else if (MEM_P (src) && REG_P (dst))
-	igain += m * ix86_cost->int_load[2] - ix86_cost->sse_load[sse_cost_idx];
+	{
+	  if (!speed_p)
+	    igain += COSTS_N_BYTES (3 * m - 4);
+	  else
+	    igain += COSTS_N_INSNS (m * ix86_cost->int_load[2]
+				    - ix86_cost->sse_load[sse_cost_idx]) / 2;
+	}
       else
 	{
 	  /* For operations on memory operands, include the overhead
 	     of explicit load and store instructions.  */
 	  if (MEM_P (dst))
-	    igain += optimize_insn_for_size_p ()
-		     ? -COSTS_N_BYTES (8)
-		     : (m * (ix86_cost->int_load[2]
-			     + ix86_cost->int_store[2])
-			- (ix86_cost->sse_load[sse_cost_idx] +
-			   ix86_cost->sse_store[sse_cost_idx]));
+	    {
+	      if (!speed_p)
+		/* ??? This probably should account size difference
+		   of SSE and integer load rather than full SSE load.  */
+		igain -= COSTS_N_BYTES (8);
+	      else
+		{
+		  int cost = (m * (ix86_cost->int_load[2]
+				   + ix86_cost->int_store[2])
+			     - (ix86_cost->sse_load[sse_cost_idx] +
+				ix86_cost->sse_store[sse_cost_idx]));
+		  igain += COSTS_N_INSNS (cost) / 2;
+		}
+	    }
 
 	  switch (GET_CODE (src))
 	    {
@@ -595,7 +660,7 @@ general_scalar_chain::compute_convert_gain ()
 	      igain += ix86_cost->shift_const - ix86_cost->sse_op;
 
 	      if (CONST_INT_P (XEXP (src, 0)))
-		igain -= vector_const_cost (XEXP (src, 0));
+		igain -= vector_const_cost (XEXP (src, 0), bb);
 	      break;
 
 	    case ROTATE:
@@ -631,16 +696,17 @@ general_scalar_chain::compute_convert_gain ()
 		igain += m * ix86_cost->add;
 
 	      if (CONST_INT_P (XEXP (src, 0)))
-		igain -= vector_const_cost (XEXP (src, 0));
+		igain -= vector_const_cost (XEXP (src, 0), bb);
 	      if (CONST_INT_P (XEXP (src, 1)))
-		igain -= vector_const_cost (XEXP (src, 1));
+		igain -= vector_const_cost (XEXP (src, 1), bb);
 	      if (MEM_P (XEXP (src, 1)))
 		{
-		  if (optimize_insn_for_size_p ())
+		  if (!speed_p)
 		    igain -= COSTS_N_BYTES (m == 2 ? 3 : 5);
 		  else
-		    igain += m * ix86_cost->int_load[2]
-			     - ix86_cost->sse_load[sse_cost_idx];
+		    igain += COSTS_N_INSNS
+			       (m * ix86_cost->int_load[2]
+				 - ix86_cost->sse_load[sse_cost_idx]) / 2;
 		}
 	      break;
 
@@ -698,7 +764,7 @@ general_scalar_chain::compute_convert_gain ()
 	    case CONST_INT:
 	      if (REG_P (dst))
 		{
-		  if (optimize_insn_for_size_p ())
+		  if (!speed_p)
 		    {
 		      /* xor (2 bytes) vs. xorps (3 bytes).  */
 		      if (src == const0_rtx)
@@ -722,14 +788,14 @@ general_scalar_chain::compute_convert_gain ()
 		      /* DImode can be immediate for TARGET_64BIT
 			 and SImode always.  */
 		      igain += m * COSTS_N_INSNS (1);
-		      igain -= vector_const_cost (src);
+		      igain -= vector_const_cost (src, bb);
 		    }
 		}
 	      else if (MEM_P (dst))
 		{
 		  igain += (m * ix86_cost->int_store[2]
 			    - ix86_cost->sse_store[sse_cost_idx]);
-		  igain -= vector_const_cost (src);
+		  igain -= vector_const_cost (src, bb);
 		}
 	      break;
 
@@ -737,13 +803,14 @@ general_scalar_chain::compute_convert_gain ()
 	      if (XVECEXP (XEXP (src, 1), 0, 0) == const0_rtx)
 		{
 		  // movd (4 bytes) replaced with movdqa (4 bytes).
-		  if (!optimize_insn_for_size_p ())
-		    igain += ix86_cost->sse_to_integer - ix86_cost->xmm_move;
+		  if (!!speed_p)
+		    igain += COSTS_N_INSNS (ix86_cost->sse_to_integer
+					    - ix86_cost->xmm_move) / 2;
 		}
 	      else
 		{
 		  // pshufd; movd replaced with pshufd.
-		  if (optimize_insn_for_size_p ())
+		  if (!speed_p)
 		    igain += COSTS_N_BYTES (4);
 		  else
 		    igain += ix86_cost->sse_to_integer;
@@ -755,55 +822,34 @@ general_scalar_chain::compute_convert_gain ()
 	    }
 	}
 
+      if (speed_p)
+	weighted_gain += bb_freq * igain;
+      gain += igain;
+
       if (igain != 0 && dump_file)
 	{
-	  fprintf (dump_file, "  Instruction gain %d for ", igain);
+	  fprintf (dump_file, "  Instruction gain %d with bb_freq %.2f for",
+		   igain, bb_freq.to_double ());
 	  dump_insn_slim (dump_file, insn);
 	}
-      gain += igain;
     }
 
   if (dump_file)
-    fprintf (dump_file, "  Instruction conversion gain: %d\n", gain);
-
-  /* Cost the integer to sse and sse to integer moves.  */
-  if (!optimize_function_for_size_p (cfun))
-    {
-      cost += n_sse_to_integer * ix86_cost->sse_to_integer;
-      /* ???  integer_to_sse but we only have that in the RA cost table.
-	      Assume sse_to_integer/integer_to_sse are the same which they
-	      are at the moment.  */
-      cost += n_integer_to_sse * ix86_cost->sse_to_integer;
-    }
-  else if (TARGET_64BIT || smode == SImode)
-    {
-      cost += n_sse_to_integer * COSTS_N_BYTES (4);
-      cost += n_integer_to_sse * COSTS_N_BYTES (4);
-    }
-  else if (TARGET_SSE4_1)
     {
-      /* vmovd (4 bytes) + vpextrd (6 bytes).  */
-      cost += n_sse_to_integer * COSTS_N_BYTES (10);
-      /* vmovd (4 bytes) + vpinsrd (6 bytes).  */
-      cost += n_integer_to_sse * COSTS_N_BYTES (10);
+      fprintf (dump_file, "  Instruction conversion gain: %d, \n",
+	       gain);
+      fprintf (dump_file, "  Registers conversion cost: %d\n",
+	       cost_sse_integer);
+      fprintf (dump_file, "  Weighted instruction conversion gain: %.2f, \n",
+	       weighted_gain.to_double ());
+      fprintf (dump_file, "  Weighted registers conversion cost: %.2f\n",
+	       weighted_cost_sse_integer.to_double ());
     }
-  else
-    {
-      /* movd (4 bytes) + psrlq (5 bytes) + movd (4 bytes).  */
-      cost += n_sse_to_integer * COSTS_N_BYTES (13);
-      /* movd (4 bytes) + movd (4 bytes) + unpckldq (4 bytes).  */
-      cost += n_integer_to_sse * COSTS_N_BYTES (12);
-    }
-
-  if (dump_file)
-    fprintf (dump_file, "  Registers conversion cost: %d\n", cost);
-
-  gain -= cost;
 
-  if (dump_file)
-    fprintf (dump_file, "  Total gain: %d\n", gain);
-
-  return gain;
+  if (weighted_gain != weighted_cost_sse_integer)
+    return weighted_gain > weighted_cost_sse_integer;
+  else
+    return gain > cost_sse_integer;;
 }
 
 /* Insert generated conversion instruction sequence INSNS
@@ -902,8 +948,7 @@ scalar_chain::make_vector_copies (rtx_insn *insn, rtx reg)
   else
     emit_insn (gen_rtx_SET (gen_rtx_SUBREG (vmode, vreg, 0),
 			    gen_gpr_to_xmm_move_src (vmode, reg)));
-  rtx_insn *seq = get_insns ();
-  end_sequence ();
+  rtx_insn *seq = end_sequence ();
   emit_conversion_insns (seq, insn);
 
   if (dump_file)
@@ -970,8 +1015,7 @@ scalar_chain::convert_reg (rtx_insn *insn, rtx dst, rtx src)
   else
     emit_move_insn (dst, src);
 
-  rtx_insn *seq = get_insns ();
-  end_sequence ();
+  rtx_insn *seq = end_sequence ();
   emit_conversion_insns (seq, insn);
 
   if (dump_file)
@@ -1066,8 +1110,7 @@ scalar_chain::convert_op (rtx *op, rtx_insn *insn)
 	{
 	  start_sequence ();
 	  vec_cst = validize_mem (force_const_mem (vmode, vec_cst));
-	  rtx_insn *seq = get_insns ();
-	  end_sequence ();
+	  rtx_insn *seq = end_sequence ();
 	  emit_insn_before (seq, insn);
 	}
 
@@ -1503,21 +1546,39 @@ general_scalar_chain::convert_insn (rtx_insn *insn)
   df_insn_rescan (insn);
 }
 
-/* Compute a gain for chain conversion.  */
+/* Helper function to compute gain for loading an immediate constant.
+   Typically, two movabsq for TImode vs. vmovdqa for V1TImode, but
+   with numerous special cases.  */
 
-int
+static int
+timode_immed_const_gain (rtx cst, basic_block bb)
+{
+  /* movabsq vs. movabsq+vmovq+vunpacklqdq.  */
+  if (CONST_WIDE_INT_P (cst)
+      && CONST_WIDE_INT_NUNITS (cst) == 2
+      && CONST_WIDE_INT_ELT (cst, 0) == CONST_WIDE_INT_ELT (cst, 1))
+    return optimize_bb_for_size_p (bb) ? -COSTS_N_BYTES (9)
+				       : -COSTS_N_INSNS (2);
+  /* 2x movabsq ~ vmovdqa.  */
+  return 0;
+}
+
+/* Return true it's cost profitable for for chain conversion.  */
+
+bool
 timode_scalar_chain::compute_convert_gain ()
 {
   /* Assume that if we have to move TImode values between units,
      then transforming this chain isn't worth it.  */
-  if (n_sse_to_integer || n_integer_to_sse)
-    return -1;
+  if (cost_sse_integer)
+    return false;
 
   bitmap_iterator bi;
   unsigned insn_uid;
 
   /* Split ties to prefer V1TImode when not optimizing for size.  */
   int gain = optimize_size ? 0 : 1;
+  sreal weighted_gain  = 0;
 
   if (dump_file)
     fprintf (dump_file, "Computing gain for chain #%d...\n", chain_id);
@@ -1529,27 +1590,36 @@ timode_scalar_chain::compute_convert_gain ()
       rtx src = SET_SRC (def_set);
       rtx dst = SET_DEST (def_set);
       HOST_WIDE_INT op1val;
+      basic_block bb = BLOCK_FOR_INSN (insn);
       int scost, vcost;
       int igain = 0;
+      profile_count entry_count = ENTRY_BLOCK_PTR_FOR_FN (cfun)->count;
+      bool speed_p = optimize_bb_for_speed_p (bb);
+      sreal bb_freq = bb->count.to_sreal_scale (entry_count);
 
       switch (GET_CODE (src))
 	{
 	case REG:
-	  if (optimize_insn_for_size_p ())
+	  if (!speed_p)
 	    igain = MEM_P (dst) ? COSTS_N_BYTES (6) : COSTS_N_BYTES (3);
 	  else
 	    igain = COSTS_N_INSNS (1);
 	  break;
 
 	case MEM:
-	  igain = optimize_insn_for_size_p () ? COSTS_N_BYTES (7)
-					      : COSTS_N_INSNS (1);
+	  igain = !speed_p ? COSTS_N_BYTES (7) : COSTS_N_INSNS (1);
 	  break;
 
 	case CONST_INT:
 	  if (MEM_P (dst)
 	      && standard_sse_constant_p (src, V1TImode))
-	    igain = optimize_insn_for_size_p() ? COSTS_N_BYTES (11) : 1;
+	    igain = !speed_p ? COSTS_N_BYTES (11) : 1;
+	  break;
+
+	case CONST_WIDE_INT:
+	  /* 2 x mov vs. vmovdqa.  */
+	  if (MEM_P (dst))
+	    igain = !speed_p ? COSTS_N_BYTES (3) : COSTS_N_INSNS (1);
 	  break;
 
 	case NOT:
@@ -1562,13 +1632,15 @@ timode_scalar_chain::compute_convert_gain ()
 	case IOR:
 	  if (!MEM_P (dst))
 	    igain = COSTS_N_INSNS (1);
+	  if (CONST_SCALAR_INT_P (XEXP (src, 1)))
+	    igain += timode_immed_const_gain (XEXP (src, 1), bb);
 	  break;
 
 	case ASHIFT:
 	case LSHIFTRT:
 	  /* See ix86_expand_v1ti_shift.  */
 	  op1val = INTVAL (XEXP (src, 1));
-	  if (optimize_insn_for_size_p ())
+	  if (!speed_p)
 	    {
 	      if (op1val == 64 || op1val == 65)
 		scost = COSTS_N_BYTES (5);
@@ -1602,7 +1674,7 @@ timode_scalar_chain::compute_convert_gain ()
 	case ASHIFTRT:
 	  /* See ix86_expand_v1ti_ashiftrt.  */
 	  op1val = INTVAL (XEXP (src, 1));
-	  if (optimize_insn_for_size_p ())
+	  if (!speed_p)
 	    {
 	      if (op1val == 64 || op1val == 127)
 		scost = COSTS_N_BYTES (7);
@@ -1680,7 +1752,7 @@ timode_scalar_chain::compute_convert_gain ()
 	case ROTATERT:
 	  /* See ix86_expand_v1ti_rotate.  */
 	  op1val = INTVAL (XEXP (src, 1));
-	  if (optimize_insn_for_size_p ())
+	  if (!speed_p)
 	    {
 	      scost = COSTS_N_BYTES (13);
 	      if ((op1val & 31) == 0)
@@ -1712,34 +1784,40 @@ timode_scalar_chain::compute_convert_gain ()
 	    {
 	      if (GET_CODE (XEXP (src, 0)) == AND)
 		/* and;and;or (9 bytes) vs. ptest (5 bytes).  */
-		igain = optimize_insn_for_size_p() ? COSTS_N_BYTES (4)
-						   : COSTS_N_INSNS (2);
+		igain = !speed_p ? COSTS_N_BYTES (4) : COSTS_N_INSNS (2);
 	      /* or (3 bytes) vs. ptest (5 bytes).  */
-	      else if (optimize_insn_for_size_p ())
+	      else if (!speed_p)
 		igain = -COSTS_N_BYTES (2);
 	    }
 	  else if (XEXP (src, 1) == const1_rtx)
 	    /* and;cmp -1 (7 bytes) vs. pcmpeqd;pxor;ptest (13 bytes).  */
-	    igain = optimize_insn_for_size_p() ? -COSTS_N_BYTES (6)
-					       : -COSTS_N_INSNS (1);
+	    igain = !speed_p ? -COSTS_N_BYTES (6) : -COSTS_N_INSNS (1);
 	  break;
 
 	default:
 	  break;
 	}
 
+      gain += igain;
+      if (speed_p)
+	weighted_gain += bb_freq * igain;
+
       if (igain != 0 && dump_file)
 	{
-	  fprintf (dump_file, "  Instruction gain %d for ", igain);
+	  fprintf (dump_file, "  Instruction gain %d with bb_freq %.2f for ",
+		   igain, bb_freq.to_double ());
 	  dump_insn_slim (dump_file, insn);
 	}
-      gain += igain;
     }
 
   if (dump_file)
-    fprintf (dump_file, "  Total gain: %d\n", gain);
+    fprintf (dump_file, "  Total gain: %d, weighted gain %.2f\n",
+	     gain, weighted_gain.to_double ());
 
-  return gain;
+  if (weighted_gain > (sreal) 0)
+    return true;
+  else
+    return gain > 0;
 }
 
 /* Fix uses of converted REG in debug insns.  */
@@ -1848,8 +1926,7 @@ timode_scalar_chain::convert_insn (rtx_insn *insn)
 	      src = validize_mem (force_const_mem (V1TImode, src));
 	      use_move = MEM_P (dst);
 	    }
-	  rtx_insn *seq = get_insns ();
-	  end_sequence ();
+	  rtx_insn *seq = end_sequence ();
 	  if (seq)
 	    emit_insn_before (seq, insn);
 	  if (use_move)
@@ -2304,14 +2381,16 @@ timode_scalar_to_vector_candidate_p (rtx_insn *insn)
 	      || CONST_SCALAR_INT_P (XEXP (src, 1))
 	      || timode_mem_p (XEXP (src, 1))))
 	return true;
-      return REG_P (XEXP (src, 0))
+      return (REG_P (XEXP (src, 0))
+	      || timode_mem_p (XEXP (src, 0)))
 	     && (REG_P (XEXP (src, 1))
 		 || CONST_SCALAR_INT_P (XEXP (src, 1))
 		 || timode_mem_p (XEXP (src, 1)));
 
     case IOR:
     case XOR:
-      return REG_P (XEXP (src, 0))
+      return (REG_P (XEXP (src, 0))
+	      || timode_mem_p (XEXP (src, 0)))
 	     && (REG_P (XEXP (src, 1))
 		 || CONST_SCALAR_INT_P (XEXP (src, 1))
 		 || timode_mem_p (XEXP (src, 1)));
@@ -2533,7 +2612,7 @@ convert_scalars_to_vector (bool timode_p)
 	     conversions.  */
 	  if (chain->build (&candidates[i], uid, disallowed))
 	    {
-	      if (chain->compute_convert_gain () > 0)
+	      if (chain->compute_convert_gain ())
 		converted_insns += chain->convert ();
 	      else if (dump_file)
 		fprintf (dump_file, "Chain #%d conversion is not profitable\n",
@@ -3006,6 +3085,82 @@ ix86_rpad_gate ()
 	  && optimize_function_for_speed_p (cfun));
 }
 
+/* Generate a vector set, DEST = SRC, at entry of the nearest dominator
+   for basic block map BBS, which is in the fake loop that contains the
+   whole function, so that there is only a single vector set in the
+   whole function.  If not nullptr, INNER_SCALAR is the inner scalar of
+   SRC, as (reg:SI 99) in (vec_duplicate:V4SI (reg:SI 99)).  */
+
+static void
+ix86_place_single_vector_set (rtx dest, rtx src, bitmap bbs,
+			      rtx inner_scalar = nullptr)
+{
+  basic_block bb = nearest_common_dominator_for_set (CDI_DOMINATORS, bbs);
+  while (bb->loop_father->latch
+	 != EXIT_BLOCK_PTR_FOR_FN (cfun))
+    bb = get_immediate_dominator (CDI_DOMINATORS,
+				  bb->loop_father->header);
+
+  rtx set = gen_rtx_SET (dest, src);
+
+  rtx_insn *insn = BB_HEAD (bb);
+  while (insn && !NONDEBUG_INSN_P (insn))
+    {
+      if (insn == BB_END (bb))
+	{
+	  insn = NULL;
+	  break;
+	}
+      insn = NEXT_INSN (insn);
+    }
+
+  rtx_insn *set_insn;
+  if (insn == BB_HEAD (bb))
+    {
+      set_insn = emit_insn_before (set, insn);
+      if (dump_file)
+	{
+	  fprintf (dump_file, "\nPlace:\n\n");
+	  print_rtl_single (dump_file, set_insn);
+	  fprintf (dump_file, "\nbefore:\n\n");
+	  print_rtl_single (dump_file, insn);
+	  fprintf (dump_file, "\n");
+	}
+    }
+  else
+    {
+      rtx_insn *after = insn ? PREV_INSN (insn) : BB_END (bb);
+      set_insn = emit_insn_after (set, after);
+      if (dump_file)
+	{
+	  fprintf (dump_file, "\nPlace:\n\n");
+	  print_rtl_single (dump_file, set_insn);
+	  fprintf (dump_file, "\nafter:\n\n");
+	  print_rtl_single (dump_file, after);
+	  fprintf (dump_file, "\n");
+	}
+    }
+
+  if (inner_scalar)
+    {
+      /* Set the source in (vec_duplicate:V4SI (reg:SI 99)).  */
+      rtx reg = XEXP (src, 0);
+      if ((REG_P (inner_scalar) || MEM_P (inner_scalar))
+	  && GET_MODE (reg) != GET_MODE (inner_scalar))
+	inner_scalar = gen_rtx_SUBREG (GET_MODE (reg), inner_scalar, 0);
+      rtx set = gen_rtx_SET (reg, inner_scalar);
+      insn = emit_insn_before (set, set_insn);
+      if (dump_file)
+	{
+	  fprintf (dump_file, "\nAdd:\n\n");
+	  print_rtl_single (dump_file, insn);
+	  fprintf (dump_file, "\nbefore:\n\n");
+	  print_rtl_single (dump_file, set_insn);
+	  fprintf (dump_file, "\n");
+	}
+    }
+}
+
 /* At entry of the nearest common dominator for basic blocks with
    conversions/rcp/sqrt/rsqrt/round, generate a single
 	vxorps %xmmN, %xmmN, %xmmN
@@ -3127,7 +3282,6 @@ remove_partial_avx_dependency (void)
 	  /* Generate an XMM vector SET.  */
 	  set = gen_rtx_SET (vec, src);
 	  set_insn = emit_insn_before (set, insn);
-	  df_insn_rescan (set_insn);
 
 	  if (cfun->can_throw_non_call_exceptions)
 	    {
@@ -3160,35 +3314,10 @@ remove_partial_avx_dependency (void)
       calculate_dominance_info (CDI_DOMINATORS);
       loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
 
-      /* Generate a vxorps at entry of the nearest dominator for basic
-	 blocks with conversions, which is in the fake loop that
-	 contains the whole function, so that there is only a single
-	 vxorps in the whole function.   */
-      bb = nearest_common_dominator_for_set (CDI_DOMINATORS,
-					     convert_bbs);
-      while (bb->loop_father->latch
-	     != EXIT_BLOCK_PTR_FOR_FN (cfun))
-	bb = get_immediate_dominator (CDI_DOMINATORS,
-				      bb->loop_father->header);
-
-      set = gen_rtx_SET (v4sf_const0, CONST0_RTX (V4SFmode));
-
-      insn = BB_HEAD (bb);
-      while (insn && !NONDEBUG_INSN_P (insn))
-	{
-	  if (insn == BB_END (bb))
-	    {
-	      insn = NULL;
-	      break;
-	    }
-	  insn = NEXT_INSN (insn);
-	}
-      if (insn == BB_HEAD (bb))
-	set_insn = emit_insn_before (set, insn);
-      else
-	set_insn = emit_insn_after (set,
-				    insn ? PREV_INSN (insn) : BB_END (bb));
-      df_insn_rescan (set_insn);
+      ix86_place_single_vector_set (v4sf_const0,
+				    CONST0_RTX (V4SFmode),
+				    convert_bbs);
+
       loop_optimizer_finalize ();
 
       if (!control_flow_insns.is_empty ())
@@ -3260,6 +3389,568 @@ make_pass_remove_partial_avx_dependency (gcc::context *ctxt)
   return new pass_remove_partial_avx_dependency (ctxt);
 }
 
+/* Return a machine mode suitable for vector SIZE with SMODE inner
+   mode.  */
+
+static machine_mode
+ix86_get_vector_cse_mode (unsigned int size, machine_mode smode)
+{
+  /* Use the inner scalar mode of vector broadcast source in:
+
+     (set (reg:V8DF 394)
+	  (vec_duplicate:V8DF (reg:V2DF 190 [ alpha ])))
+
+     to compute the vector mode for broadcast from vector source.
+   */
+  if (VECTOR_MODE_P (smode))
+    smode = GET_MODE_INNER (smode);
+  scalar_mode s_mode = as_a <scalar_mode> (smode);
+  poly_uint64 nunits = size / GET_MODE_SIZE (smode);
+  machine_mode mode = mode_for_vector (s_mode, nunits).require ();
+  return mode;
+}
+
+/* Replace the source operand of instructions in VECTOR_INSNS with
+   VECTOR_CONST in VECTOR_MODE.  */
+
+static void
+replace_vector_const (machine_mode vector_mode, rtx vector_const,
+		      auto_bitmap &vector_insns,
+		      machine_mode scalar_mode)
+{
+  bitmap_iterator bi;
+  unsigned int id;
+
+  EXECUTE_IF_SET_IN_BITMAP (vector_insns, 0, id, bi)
+    {
+      rtx_insn *insn = DF_INSN_UID_GET (id)->insn;
+
+      /* Get the single SET instruction.  */
+      rtx set = single_set (insn);
+      rtx src = SET_SRC (set);
+      rtx dest = SET_DEST (set);
+      machine_mode mode = GET_MODE (dest);
+
+      rtx replace;
+      /* Replace the source operand with VECTOR_CONST.  */
+      if (SUBREG_P (src) || mode == vector_mode)
+	replace = vector_const;
+      else
+	{
+	  unsigned int size = GET_MODE_SIZE (mode);
+	  if (size < ix86_regmode_natural_size (mode))
+	    {
+	      /* If the mode size is smaller than its natural size,
+		 first insert an extra move with a QI vector SUBREG
+		 of the same size to avoid validate_subreg failure.  */
+	      machine_mode vmode
+		= ix86_get_vector_cse_mode (size, scalar_mode);
+	      rtx vreg;
+	      if (mode == vmode)
+		vreg = vector_const;
+	      else
+		{
+		  vreg = gen_reg_rtx (vmode);
+		  rtx vsubreg = gen_rtx_SUBREG (vmode, vector_const, 0);
+		  rtx pat = gen_rtx_SET (vreg, vsubreg);
+		  rtx_insn *vinsn = emit_insn_before (pat, insn);
+		  if (dump_file)
+		    {
+		      fprintf (dump_file, "\nInsert an extra move:\n\n");
+		      print_rtl_single (dump_file, vinsn);
+		      fprintf (dump_file, "\nbefore:\n\n");
+		      print_rtl_single (dump_file, insn);
+		      fprintf (dump_file, "\n");
+		    }
+		}
+	      replace = gen_rtx_SUBREG (mode, vreg, 0);
+	    }
+	  else
+	    replace = gen_rtx_SUBREG (mode, vector_const, 0);
+	}
+
+      if (dump_file)
+	{
+	  fprintf (dump_file, "\nReplace:\n\n");
+	  print_rtl_single (dump_file, insn);
+	}
+      SET_SRC (set) = replace;
+      /* Drop possible dead definitions.  */
+      PATTERN (insn) = set;
+      INSN_CODE (insn) = -1;
+      recog_memoized (insn);
+      if (dump_file)
+	{
+	  fprintf (dump_file, "\nwith:\n\n");
+	  print_rtl_single (dump_file, insn);
+	  fprintf (dump_file, "\n");
+	}
+      df_insn_rescan (insn);
+    }
+}
+
+enum x86_cse_kind
+{
+  X86_CSE_CONST0_VECTOR,
+  X86_CSE_CONSTM1_VECTOR,
+  X86_CSE_VEC_DUP
+};
+
+struct redundant_load
+{
+  /* Bitmap of basic blocks with broadcast instructions.  */
+  auto_bitmap bbs;
+  /* Bitmap of broadcast instructions.  */
+  auto_bitmap insns;
+  /* The broadcast inner scalar.  */
+  rtx val;
+  /* The inner scalar mode.  */
+  machine_mode mode;
+  /* The instruction which sets the inner scalar.  Nullptr if the inner
+     scalar is applied to the whole function, instead of within the same
+     block.  */
+  rtx_insn *def_insn;
+  /* The widest broadcast source.  */
+  rtx broadcast_source;
+  /* The widest broadcast register.  */
+  rtx broadcast_reg;
+  /* The basic block of the broadcast instruction.  */
+  basic_block bb;
+  /* The number of broadcast instructions with the same inner scalar.  */
+  unsigned HOST_WIDE_INT count;
+  /* The threshold of broadcast instructions with the same inner
+     scalar.  */
+  unsigned int threshold;
+  /* The widest broadcast size in bytes.  */
+  unsigned int size;
+  /* Load kind.  */
+  x86_cse_kind kind;
+};
+
+/* Return the inner scalar if OP is a broadcast, else return nullptr.  */
+
+static rtx
+ix86_broadcast_inner (rtx op, machine_mode mode,
+		      machine_mode *scalar_mode_p,
+		      x86_cse_kind *kind_p, rtx_insn **insn_p)
+{
+  if (op == const0_rtx || op == CONST0_RTX (mode))
+    {
+      *scalar_mode_p = QImode;
+      *kind_p = X86_CSE_CONST0_VECTOR;
+      *insn_p = nullptr;
+      return const0_rtx;
+    }
+  else if ((GET_MODE_CLASS (mode) == MODE_VECTOR_INT
+	    && (op == constm1_rtx || op == CONSTM1_RTX (mode)))
+	    || (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
+		&& float_vector_all_ones_operand (op, mode)))
+    {
+      *scalar_mode_p = QImode;
+      *kind_p = X86_CSE_CONSTM1_VECTOR;
+      *insn_p = nullptr;
+      return constm1_rtx;
+    }
+
+  mode = GET_MODE (op);
+  int nunits = GET_MODE_NUNITS (mode);
+  if (nunits < 2)
+    return nullptr;
+
+  *kind_p = X86_CSE_VEC_DUP;
+
+  rtx reg;
+  if (GET_CODE (op) == VEC_DUPLICATE)
+    {
+      /* Only
+	  (vec_duplicate:V4SI (reg:SI 99))
+	  (vec_duplicate:V2DF (mem/u/c:DF (symbol_ref/u:DI ("*.LC1") [flags 0x2]) [0  S8 A64]))
+	 are supported.  Set OP to the broadcast source by default.  */
+      op = XEXP (op, 0);
+      reg = op;
+      if (SUBREG_P (op)
+	  && SUBREG_BYTE (op) == 0
+	  && !paradoxical_subreg_p (op))
+	reg = SUBREG_REG (op);
+      if (!REG_P (reg))
+	{
+	  if (MEM_P (op)
+	      && SYMBOL_REF_P (XEXP (op, 0))
+	      && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
+	    {
+	      /* Handle constant broadcast from memory.  */
+	      *scalar_mode_p = GET_MODE_INNER (mode);
+	      *insn_p = nullptr;
+	      return op;
+	    }
+	  return nullptr;
+	}
+    }
+  else if (CONST_VECTOR_P (op))
+    {
+      rtx first = XVECEXP (op, 0, 0);
+      for (int i = 1; i < nunits; ++i)
+	{
+	  rtx tmp = XVECEXP (op, 0, i);
+	  /* Vector duplicate value.  */
+	  if (!rtx_equal_p (tmp, first))
+	    return nullptr;
+	}
+      *scalar_mode_p = GET_MODE (first);
+      *insn_p = nullptr;
+      return first;
+    }
+  else
+    return nullptr;
+
+  mode = GET_MODE (op);
+
+  /* Only single def chain is supported.  */
+  df_ref ref = DF_REG_DEF_CHAIN (REGNO (reg));
+  if (!ref
+      || DF_REF_IS_ARTIFICIAL (ref)
+      || DF_REF_NEXT_REG (ref) != nullptr)
+    return nullptr;
+
+  rtx_insn *insn = DF_REF_INSN (ref);
+  rtx set = single_set (insn);
+  if (!set)
+    return nullptr;
+
+  rtx src = SET_SRC (set);
+
+  if (CONST_INT_P (src))
+    {
+      /* Handle sequences like
+
+	 (set (reg:SI 99)
+	       (const_int 34 [0x22]))
+	 (set (reg:V4SI 98)
+	       (vec_duplicate:V4SI (reg:SI 99)))
+
+	 Set *INSN_P to nullptr and return SET_SRC if SET_SRC is an
+	 integer constant.  */
+      op = src;
+      *insn_p = nullptr;
+    }
+  else
+    {
+      /* Handle sequences like
+
+	 (set (reg:QI 105 [ c ])
+	      (reg:QI 5 di [ c ]))
+	 (set (reg:V64QI 102 [ _1 ])
+	      (vec_duplicate:V64QI (reg:QI 105 [ c ])))
+
+	 (set (reg/v:SI 116 [ argc ])
+	      (mem/c:SI (reg:SI 135) [2 argc+0 S4 A32]))
+	 (set (reg:V4SI 119 [ _45 ])
+	      (vec_duplicate:V4SI (reg/v:SI 116 [ argc ])))
+
+	 (set (reg:SI 98 [ _1 ])
+	      (sign_extend:SI (reg:QI 106 [ c ])))
+	 (set (reg:V16SI 103 [ _2 ])
+	       (vec_duplicate:V16SI (reg:SI 98 [ _1 ])))
+
+	 (set (reg:SI 102 [ cost ])
+	      (mem/c:SI (symbol_ref:DI ("cost") [flags 0x40])))
+	 (set (reg:V4HI 103 [ _16 ])
+	      (vec_duplicate:V4HI (subreg:HI (reg:SI 102 [ cost ]) 0)))
+
+	 (set (subreg:SI (reg/v:HI 107 [ cr_val ]) 0)
+	      (ashift:SI (reg:SI 158)
+			 (subreg:QI (reg:SI 156 [ _2 ]) 0)))
+	 (set (reg:V16HI 183 [ _61 ])
+	      (vec_duplicate:V16HI (reg/v:HI 107 [ cr_val ])))
+
+	 Set *INSN_P to INSN and return the broadcast source otherwise.  */
+      *insn_p = insn;
+    }
+
+  *scalar_mode_p = mode;
+  return op;
+}
+
+/* At entry of the nearest common dominator for basic blocks with vector
+   CONST0_RTX and integer CONSTM1_RTX uses, generate a single widest
+   vector set instruction for all CONST0_RTX and integer CONSTM1_RTX
+   uses.
+
+   NB: We want to generate only a single widest vector set to cover the
+   whole function.  The LCM algorithm isn't appropriate here since it
+   may place a vector set inside the loop.  */
+
+static unsigned int
+remove_redundant_vector_load (void)
+{
+  timevar_push (TV_MACH_DEP);
+
+  auto_vec<redundant_load *> loads;
+  redundant_load *load;
+  basic_block bb;
+  rtx_insn *insn;
+  unsigned int i;
+
+  df_set_flags (DF_DEFER_INSN_RESCAN);
+
+  bool recursive_call_p = cfun->machine->recursive_function;
+
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      FOR_BB_INSNS (bb, insn)
+	{
+	  if (!NONDEBUG_INSN_P (insn))
+	    continue;
+
+	  rtx set = single_set (insn);
+	  if (!set)
+	    continue;
+
+	  /* Record single set vector instruction with CONST0_RTX and
+	     CONSTM1_RTX source.  Record basic blocks with CONST0_RTX and
+	     CONSTM1_RTX.  Count CONST0_RTX and CONSTM1_RTX.  Record the
+	     maximum size of CONST0_RTX and CONSTM1_RTX.  */
+
+	  rtx dest = SET_DEST (set);
+	  machine_mode mode = GET_MODE (dest);
+	  /* Skip non-vector instruction.  */
+	  if (!VECTOR_MODE_P (mode))
+	    continue;
+
+	  rtx src = SET_SRC (set);
+	  /* Skip non-vector load instruction.  */
+	  if (!REG_P (dest) && !SUBREG_P (dest))
+	    continue;
+
+	  rtx_insn *def_insn;
+	  machine_mode scalar_mode;
+	  x86_cse_kind kind;
+	  rtx val = ix86_broadcast_inner (src, mode, &scalar_mode,
+					  &kind, &def_insn);
+	  if (!val)
+	    continue;
+
+	   /* Remove redundant register loads if there are more than 2
+	      loads will be used.  */
+	  unsigned int threshold = 2;
+
+	  /* Check if there is a matching redundant vector load.   */
+	  bool matched = false;
+	  FOR_EACH_VEC_ELT (loads, i, load)
+	    if (load->val
+		&& load->kind == kind
+		&& load->mode == scalar_mode
+		&& (load->bb == bb
+		    || kind < X86_CSE_VEC_DUP
+		    /* Non all 0s/1s vector load must be in the same
+		       basic block if it is in a recursive call.  */
+		    || !recursive_call_p)
+		&& rtx_equal_p (load->val, val))
+	      {
+		/* Record vector instruction.  */
+		bitmap_set_bit (load->insns, INSN_UID (insn));
+
+		/* Record the maximum vector size.  */
+		if (load->size < GET_MODE_SIZE (mode))
+		  load->size = GET_MODE_SIZE (mode);
+
+		/* Record the basic block.  */
+		bitmap_set_bit (load->bbs, bb->index);
+		load->count++;
+		matched = true;
+		break;
+	      }
+
+	  if (matched)
+	    continue;
+
+	  /* We see this vector broadcast the first time.  */
+	  load = new redundant_load;
+
+	  load->val = copy_rtx (val);
+	  load->mode = scalar_mode;
+	  load->size = GET_MODE_SIZE (mode);
+	  load->def_insn = def_insn;
+	  load->count = 1;
+	  load->threshold = threshold;
+	  load->bb = BLOCK_FOR_INSN (insn);
+	  load->kind = kind;
+
+	  bitmap_set_bit (load->insns, INSN_UID (insn));
+	  bitmap_set_bit (load->bbs, bb->index);
+
+	  loads.safe_push (load);
+	}
+    }
+
+  bool replaced = false;
+  rtx reg, broadcast_source, broadcast_reg;
+  FOR_EACH_VEC_ELT (loads, i, load)
+    if (load->count >= load->threshold)
+      {
+	machine_mode mode = ix86_get_vector_cse_mode (load->size,
+						      load->mode);
+	broadcast_reg = gen_reg_rtx (mode);
+	if (load->def_insn)
+	  {
+	    /* Replace redundant vector loads with a single vector load
+	       in the same basic block.  */
+	    reg = load->val;
+	    if (load->mode != GET_MODE (reg))
+	      reg = gen_rtx_SUBREG (load->mode, reg, 0);
+	    broadcast_source = gen_rtx_VEC_DUPLICATE (mode, reg);
+	    replace_vector_const (mode, broadcast_reg, load->insns,
+				  load->mode);
+	  }
+	else
+	  {
+	    /* This is a constant integer/double vector.  If the
+	       inner scalar is 0 or -1, set vector to CONST0_RTX
+	       or CONSTM1_RTX directly.  */
+	    rtx reg;
+	    switch (load->kind)
+	      {
+	      case X86_CSE_CONST0_VECTOR:
+		broadcast_source = CONST0_RTX (mode);
+		break;
+	      case X86_CSE_CONSTM1_VECTOR:
+		broadcast_source = CONSTM1_RTX (mode);
+		break;
+	      default:
+		reg = gen_reg_rtx (load->mode);
+		broadcast_source = gen_rtx_VEC_DUPLICATE (mode, reg);
+		break;
+	      }
+	    replace_vector_const (mode, broadcast_reg, load->insns,
+				  load->mode);
+	  }
+	load->broadcast_source = broadcast_source;
+	load->broadcast_reg = broadcast_reg;
+	replaced = true;
+      }
+
+  if (replaced)
+    {
+      auto_vec<rtx_insn *> control_flow_insns;
+
+      /* (Re-)discover loops so that bb->loop_father can be used in the
+	 analysis below.  */
+      calculate_dominance_info (CDI_DOMINATORS);
+      loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
+
+      FOR_EACH_VEC_ELT (loads, i, load)
+	if (load->count >= load->threshold)
+	  {
+	    if (load->def_insn)
+	      {
+		/* Insert a broadcast after the original scalar
+		   definition.  */
+		rtx set = gen_rtx_SET (load->broadcast_reg,
+				       load->broadcast_source);
+		insn = emit_insn_after (set, load->def_insn);
+
+		if (cfun->can_throw_non_call_exceptions)
+		  {
+		    /* Handle REG_EH_REGION note in DEF_INSN.  */
+		    rtx note = find_reg_note (load->def_insn,
+					      REG_EH_REGION, nullptr);
+		    if (note)
+		      {
+			control_flow_insns.safe_push (load->def_insn);
+			add_reg_note (insn, REG_EH_REGION,
+				      XEXP (note, 0));
+		      }
+		  }
+
+		if (dump_file)
+		  {
+		    fprintf (dump_file, "\nAdd:\n\n");
+		    print_rtl_single (dump_file, insn);
+		    fprintf (dump_file, "\nafter:\n\n");
+		    print_rtl_single (dump_file, load->def_insn);
+		    fprintf (dump_file, "\n");
+		  }
+	      }
+	    else
+	      ix86_place_single_vector_set (load->broadcast_reg,
+					    load->broadcast_source,
+					    load->bbs,
+					    (load->kind == X86_CSE_VEC_DUP
+					     ? load->val
+					     : nullptr));
+	  }
+
+      loop_optimizer_finalize ();
+
+      if (!control_flow_insns.is_empty ())
+	{
+	  free_dominance_info (CDI_DOMINATORS);
+
+	  FOR_EACH_VEC_ELT (control_flow_insns, i, insn)
+	    if (control_flow_insn_p (insn))
+	      {
+		/* Split the block after insn.  There will be a fallthru
+		   edge, which is OK so we keep it.  We have to create
+		   the exception edges ourselves.  */
+		bb = BLOCK_FOR_INSN (insn);
+		split_block (bb, insn);
+		rtl_make_eh_edge (NULL, bb, BB_END (bb));
+	      }
+	}
+
+      df_process_deferred_rescans ();
+    }
+
+  df_clear_flags (DF_DEFER_INSN_RESCAN);
+
+  timevar_pop (TV_MACH_DEP);
+  return 0;
+}
+
+namespace {
+
+const pass_data pass_data_remove_redundant_vector_load =
+{
+  RTL_PASS, /* type */
+  "rrvl", /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  TV_MACH_DEP, /* tv_id */
+  0, /* properties_required */
+  0, /* properties_provided */
+  0, /* properties_destroyed */
+  0, /* todo_flags_start */
+  0, /* todo_flags_finish */
+};
+
+class pass_remove_redundant_vector_load : public rtl_opt_pass
+{
+public:
+  pass_remove_redundant_vector_load (gcc::context *ctxt)
+    : rtl_opt_pass (pass_data_remove_redundant_vector_load, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  bool gate (function *fun) final override
+    {
+      return (TARGET_SSE2
+	      && optimize
+	      && optimize_function_for_speed_p (fun));
+    }
+
+  unsigned int execute (function *) final override
+    {
+      return remove_redundant_vector_load ();
+    }
+}; // class pass_remove_redundant_vector_load
+
+} // anon namespace
+
+rtl_opt_pass *
+make_pass_remove_redundant_vector_load (gcc::context *ctxt)
+{
+  return new pass_remove_redundant_vector_load (ctxt);
+}
+
 /* Convert legacy instructions that clobbers EFLAGS to APX_NF
    instructions when there are no flag set between a flag
    producer and user.  */
@@ -3412,7 +4103,7 @@ public:
     {
       return ix86_apx_nf_convert ();
     }
-}; // class pass_rpad
+}; // class pass_apx_nf_convert
 
 } // anon namespace
 
@@ -3591,7 +4282,9 @@ public:
   /* opt_pass methods: */
   bool gate (function *) final override
     {
-      return optimize && optimize_function_for_speed_p (cfun);
+      return TARGET_ALIGN_TIGHT_LOOPS
+	     && optimize
+	     && optimize_function_for_speed_p (cfun);
     }
 
   unsigned int execute (function *) final override
@@ -3629,7 +4322,7 @@ ix86_compare_version_priority (tree decl1, tree decl2)
 
 /* V1 and V2 point to function versions with different priorities
    based on the target ISA.  This function compares their priorities.  */
- 
+
 static int
 feature_compare (const void *v1, const void *v2)
 {
@@ -3678,7 +4371,7 @@ add_condition_to_bb (tree function_decl, tree version_decl,
   convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
 	     		 build_fold_addr_expr (version_decl));
   result_var = create_tmp_var (ptr_type_node);
-  convert_stmt = gimple_build_assign (result_var, convert_expr); 
+  convert_stmt = gimple_build_assign (result_var, convert_expr);
   return_stmt = gimple_build_return (result_var);
 
   if (predicate_chain == NULL_TREE)
@@ -3705,7 +4398,7 @@ add_condition_to_bb (tree function_decl, tree version_decl,
       gimple_seq_add_stmt (&gseq, call_cond_stmt);
 
       predicate_chain = TREE_CHAIN (predicate_chain);
-      
+
       if (and_expr_var == NULL)
 	and_expr_var = cond_var;
       else
@@ -3746,7 +4439,7 @@ add_condition_to_bb (tree function_decl, tree version_decl,
   gimple_set_bb (return_stmt, bb2);
 
   bb3 = e23->dest;
-  make_edge (bb1, bb3, EDGE_FALSE_VALUE); 
+  make_edge (bb1, bb3, EDGE_FALSE_VALUE);
 
   remove_edge (e23);
   make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
@@ -3907,7 +4600,7 @@ ix86_mangle_function_version_assembler_name (tree decl, tree id)
   return ret;
 }
 
-tree 
+tree
 ix86_mangle_decl_assembler_name (tree decl, tree id)
 {
   /* For function version, add the target suffix to the assembler name.  */
@@ -3932,12 +4625,11 @@ ix86_get_function_versions_dispatcher (void *decl)
   struct cgraph_node *node = NULL;
   struct cgraph_node *default_node = NULL;
   struct cgraph_function_version_info *node_v = NULL;
-  struct cgraph_function_version_info *first_v = NULL;
 
   tree dispatch_decl = NULL;
 
   struct cgraph_function_version_info *default_version_info = NULL;
- 
+
   gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
 
   node = cgraph_node::get (fn);
@@ -3945,41 +4637,20 @@ ix86_get_function_versions_dispatcher (void *decl)
 
   node_v = node->function_version ();
   gcc_assert (node_v != NULL);
- 
+
   if (node_v->dispatcher_resolver != NULL)
     return node_v->dispatcher_resolver;
 
-  /* Find the default version and make it the first node.  */
-  first_v = node_v;
-  /* Go to the beginning of the chain.  */
-  while (first_v->prev != NULL)
-    first_v = first_v->prev;
-  default_version_info = first_v;
-  while (default_version_info != NULL)
-    {
-      if (is_function_default_version
-	    (default_version_info->this_node->decl))
-	break;
-      default_version_info = default_version_info->next;
-    }
+  /* The default node is always the beginning of the chain.  */
+  default_version_info = node_v;
+  while (default_version_info->prev != NULL)
+    default_version_info = default_version_info->prev;
+  default_node = default_version_info->this_node;
 
   /* If there is no default node, just return NULL.  */
-  if (default_version_info == NULL)
+  if (!is_function_default_version (default_node->decl))
     return NULL;
 
-  /* Make default info the first node.  */
-  if (first_v != default_version_info)
-    {
-      default_version_info->prev->next = default_version_info->next;
-      if (default_version_info->next)
-	default_version_info->next->prev = default_version_info->prev;
-      first_v->prev = default_version_info;
-      default_version_info->next = first_v;
-      default_version_info->prev = NULL;
-    }
-
-  default_node = default_version_info->this_node;
-
 #if defined (ASM_OUTPUT_TYPE_DIRECTIVE)
   if (targetm.has_ifunc_p ())
     {
@@ -4101,7 +4772,7 @@ make_resolver_func (const tree default_decl,
    provide the code to dispatch the right function at run-time.  NODE points
    to the dispatcher decl whose body will be created.  */
 
-tree 
+tree
 ix86_generate_version_dispatcher_body (void *node_p)
 {
   tree resolver_decl;
diff --git a/gcc/config/i386/i386-features.h b/gcc/config/i386/i386-features.h
index fbf7237..e3719b3 100644
--- a/gcc/config/i386/i386-features.h
+++ b/gcc/config/i386/i386-features.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 1988-2024 Free Software Foundation, Inc.
+/* Copyright (C) 1988-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -153,12 +153,13 @@ class scalar_chain
 
   bitmap insns_conv;
   hash_map<rtx, rtx> defs_map;
-  unsigned n_sse_to_integer;
-  unsigned n_integer_to_sse;
+  /* Cost of inserted conversion between ineteger and sse.  */
+  int cost_sse_integer;
+  sreal weighted_cost_sse_integer;
   auto_vec<rtx_insn *> control_flow_insns;
 
   bool build (bitmap candidates, unsigned insn_uid, bitmap disallowed);
-  virtual int compute_convert_gain () = 0;
+  virtual bool compute_convert_gain () = 0;
   int convert ();
 
  protected:
@@ -184,11 +185,11 @@ class general_scalar_chain : public scalar_chain
  public:
   general_scalar_chain (enum machine_mode smode_, enum machine_mode vmode_)
     : scalar_chain (smode_, vmode_) {}
-  int compute_convert_gain () final override;
+  bool compute_convert_gain () final override;
 
  private:
   void convert_insn (rtx_insn *insn) final override;
-  int vector_const_cost (rtx exp);
+  int vector_const_cost (rtx exp, basic_block bb);
   rtx convert_rotate (enum rtx_code, rtx op0, rtx op1, rtx_insn *insn);
 };
 
@@ -196,7 +197,7 @@ class timode_scalar_chain : public scalar_chain
 {
  public:
   timode_scalar_chain () : scalar_chain (TImode, V1TImode) {}
-  int compute_convert_gain () final override;
+  bool compute_convert_gain () final override;
 
  private:
   void fix_debug_reg_uses (rtx reg);
diff --git a/gcc/config/i386/i386-isa.def b/gcc/config/i386/i386-isa.def
index bfb33ba..6fa601d 100644
--- a/gcc/config/i386/i386-isa.def
+++ b/gcc/config/i386/i386-isa.def
@@ -1,5 +1,5 @@
 /* Definition for processor table alias flags.
-   Copyright (C) 2001-2024 Free Software Foundation, Inc.
+   Copyright (C) 2001-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -118,8 +118,11 @@ DEF_PTA(SHA512)
 DEF_PTA(SM4)
 DEF_PTA(APX_F)
 DEF_PTA(USER_MSR)
-DEF_PTA(EVEX512)
-DEF_PTA(AVX10_1_256)
-DEF_PTA(AVX10_1_512)
-DEF_PTA(AVX10_2_256)
-DEF_PTA(AVX10_2_512)
+DEF_PTA(AVX10_1)
+DEF_PTA(AVX10_2)
+DEF_PTA(AMX_AVX512)
+DEF_PTA(AMX_TF32)
+DEF_PTA(AMX_TRANSPOSE)
+DEF_PTA(AMX_FP8)
+DEF_PTA(MOVRS)
+DEF_PTA(AMX_MOVRS)
diff --git a/gcc/config/i386/i386-modes.def b/gcc/config/i386/i386-modes.def
index 6d8f194..2fedbeb 100644
--- a/gcc/config/i386/i386-modes.def
+++ b/gcc/config/i386/i386-modes.def
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GCC for IA-32.
-   Copyright (C) 2002-2024 Free Software Foundation, Inc.
+   Copyright (C) 2002-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/i386/i386-options.cc b/gcc/config/i386/i386-options.cc
index f79257c..09cb133 100644
--- a/gcc/config/i386/i386-options.cc
+++ b/gcc/config/i386/i386-options.cc
@@ -1,4 +1,4 @@
-/* Copyright (C) 1988-2024 Free Software Foundation, Inc.
+/* Copyright (C) 1988-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -131,10 +131,12 @@ along with GCC; see the file COPYING3.  If not see
 #define m_ARROWLAKE (HOST_WIDE_INT_1U<<PROCESSOR_ARROWLAKE)
 #define m_ARROWLAKE_S (HOST_WIDE_INT_1U<<PROCESSOR_ARROWLAKE_S)
 #define m_PANTHERLAKE (HOST_WIDE_INT_1U<<PROCESSOR_PANTHERLAKE)
+#define m_DIAMONDRAPIDS (HOST_WIDE_INT_1U<<PROCESSOR_DIAMONDRAPIDS)
 #define m_CORE_AVX512 (m_SKYLAKE_AVX512 | m_CANNONLAKE \
 		       | m_ICELAKE_CLIENT | m_ICELAKE_SERVER | m_CASCADELAKE \
 		       | m_TIGERLAKE | m_COOPERLAKE | m_SAPPHIRERAPIDS \
-		       | m_ROCKETLAKE | m_GRANITERAPIDS | m_GRANITERAPIDS_D)
+		       | m_ROCKETLAKE | m_GRANITERAPIDS | m_GRANITERAPIDS_D \
+		       | m_DIAMONDRAPIDS)
 #define m_CORE_AVX2 (m_HASWELL | m_SKYLAKE | m_CORE_AVX512)
 #define m_CORE_ALL (m_CORE2 | m_NEHALEM  | m_SANDYBRIDGE | m_CORE_AVX2)
 #define m_CORE_HYBRID (m_ALDERLAKE | m_ARROWLAKE | m_ARROWLAKE_S \
@@ -257,12 +259,15 @@ static struct ix86_target_opts isa2_opts[] =
   { "-msm3",		OPTION_MASK_ISA2_SM3 },
   { "-msha512",		OPTION_MASK_ISA2_SHA512 },
   { "-msm4",            OPTION_MASK_ISA2_SM4 },
-  { "-mevex512",	OPTION_MASK_ISA2_EVEX512 },
   { "-musermsr",	OPTION_MASK_ISA2_USER_MSR },
-  { "-mavx10.1-256",	OPTION_MASK_ISA2_AVX10_1_256 },
-  { "-mavx10.1-512",	OPTION_MASK_ISA2_AVX10_1_512 },
-  { "-mavx10.2-256",	OPTION_MASK_ISA2_AVX10_2_256 },
-  { "-mavx10.2-512",	OPTION_MASK_ISA2_AVX10_2_512 }
+  { "-mavx10.1",	OPTION_MASK_ISA2_AVX10_1 },
+  { "-mavx10.2",	OPTION_MASK_ISA2_AVX10_2 },
+  { "-mamx-avx512",	OPTION_MASK_ISA2_AMX_AVX512 },
+  { "-mamx-tf32",	OPTION_MASK_ISA2_AMX_TF32 },
+  { "-mamx-transpose",	OPTION_MASK_ISA2_AMX_TRANSPOSE },
+  { "-mamx-fp8", 	OPTION_MASK_ISA2_AMX_FP8 },
+  { "-mmovrs",		OPTION_MASK_ISA2_MOVRS },
+  { "-mamx-movrs",	OPTION_MASK_ISA2_AMX_MOVRS }
 };
 static struct ix86_target_opts isa_opts[] =
 {
@@ -706,8 +711,6 @@ ix86_function_specific_save (struct cl_target_option *ptr,
   ptr->x_ix86_apx_features = opts->x_ix86_apx_features;
   ptr->x_ix86_isa_flags_explicit = opts->x_ix86_isa_flags_explicit;
   ptr->x_ix86_isa_flags2_explicit = opts->x_ix86_isa_flags2_explicit;
-  ptr->x_ix86_no_avx512_explicit = opts->x_ix86_no_avx512_explicit;
-  ptr->x_ix86_no_avx10_1_explicit = opts->x_ix86_no_avx10_1_explicit;
   ptr->x_recip_mask_explicit = opts->x_recip_mask_explicit;
   ptr->x_ix86_arch_string = opts->x_ix86_arch_string;
   ptr->x_ix86_tune_string = opts->x_ix86_tune_string;
@@ -754,65 +757,66 @@ static unsigned HOST_WIDE_INT initial_ix86_arch_features[X86_ARCH_LAST] = {
   ~m_386,
 };
 
-/* This table must be in sync with enum processor_type in i386.h.  */ 
+/* This table must be in sync with enum processor_type in i386.h.  */
 static const struct processor_costs *processor_cost_table[] =
 {
-  &generic_cost,
-  &i386_cost,
-  &i486_cost,
-  &pentium_cost,
-  &lakemont_cost,
-  &pentiumpro_cost,
-  &pentium4_cost,
-  &nocona_cost,
-  &core_cost,
-  &core_cost,
-  &core_cost,
-  &core_cost,
-  &atom_cost,
-  &slm_cost,
-  &slm_cost,
-  &slm_cost,
-  &tremont_cost,
-  &alderlake_cost,
-  &alderlake_cost,
-  &alderlake_cost,
-  &skylake_cost,
-  &skylake_cost,
-  &icelake_cost,
-  &icelake_cost,
-  &icelake_cost,
-  &skylake_cost,
-  &icelake_cost,
-  &skylake_cost,
-  &icelake_cost,
-  &alderlake_cost,
-  &icelake_cost,
-  &icelake_cost,
-  &icelake_cost,
-  &alderlake_cost,
-  &alderlake_cost,
-  &alderlake_cost,
-  &intel_cost,
-  &lujiazui_cost,
-  &yongfeng_cost,
-  &shijidadao_cost,
-  &geode_cost,
-  &k6_cost,
-  &athlon_cost,
-  &k8_cost,
-  &amdfam10_cost,
-  &bdver_cost,
-  &bdver_cost,
-  &bdver_cost,
-  &bdver_cost,
-  &btver1_cost,
-  &btver2_cost,
-  &znver1_cost,
-  &znver2_cost,
-  &znver3_cost,
-  &znver4_cost,
-  &znver5_cost
+  &generic_cost,	/* PROCESSOR_GENERIC.		*/
+  &i386_cost,		/* PROCESSOR_I386.		*/
+  &i486_cost,		/* PROCESSOR_I486.		*/
+  &pentium_cost,	/* PROCESSOR_PENTIUM.		*/
+  &lakemont_cost,	/* PROCESSOR_LAKEMONT.		*/
+  &pentiumpro_cost,	/* PROCESSOR_PENTIUMPRO.	*/
+  &pentium4_cost,	/* PROCESSOR_PENTIUM4.		*/
+  &nocona_cost,		/* PROCESSOR_NOCONA.		*/
+  &core_cost,		/* PROCESSOR_CORE2.		*/
+  &core_cost,		/* PROCESSOR_NEHALEM.		*/
+  &core_cost,		/* PROCESSOR_SANDYBRIDGE.	*/
+  &core_cost,		/* PROCESSOR_HASWELL.		*/
+  &atom_cost,		/* PROCESSOR_BONNELL.		*/
+  &slm_cost,		/* PROCESSOR_SILVERMONT.	*/
+  &slm_cost,		/* PROCESSOR_GOLDMONT.		*/
+  &slm_cost,		/* PROCESSOR_GOLDMONT_PLUS.	*/
+  &tremont_cost,	/* PROCESSOR_TREMONT.		*/
+  &alderlake_cost,	/* PROCESSOR_SIERRAFOREST.	*/
+  &alderlake_cost,	/* PROCESSOR_GRANDRIDGE.	*/
+  &alderlake_cost,	/* PROCESSOR_CLEARWATERFOREST.	*/
+  &skylake_cost,	/* PROCESSOR_SKYLAKE.	*/
+  &skylake_cost,	/* PROCESSOR_SKYLAKE_AVX512.	*/
+  &icelake_cost,	/* PROCESSOR_CANNONLAKE.	*/
+  &icelake_cost,	/* PROCESSOR_ICELAKE_CLIENT.	*/
+  &icelake_cost,	/* PROCESSOR_ICELAKE_SERVER.	*/
+  &skylake_cost,	/* PROCESSOR_CASCADELAKE.	*/
+  &icelake_cost,	/* PROCESSOR_TIGERLAKE.		*/
+  &skylake_cost,	/* PROCESSOR_COOPERLAKE.	*/
+  &icelake_cost,	/* PROCESSOR_SAPPHIRERAPIDS.	*/
+  &alderlake_cost,	/* PROCESSOR_ALDERLAKE.		*/
+  &icelake_cost,	/* PROCESSOR_ROCKETLAKE.	*/
+  &icelake_cost,	/* PROCESSOR_GRANITERAPIDS.	*/
+  &icelake_cost,	/* PROCESSOR_GRANITERAPIDS_D.	*/
+  &alderlake_cost,	/* PROCESSOR_ARROWLAKE.		*/
+  &alderlake_cost,	/* PROCESSOR_ARROWLAKE_S.	*/
+  &alderlake_cost,	/* PROCESSOR_PANTHERLAKE.	*/
+  &icelake_cost,	/* PROCESSOR_DIAMONDRAPIDS.	*/
+  &alderlake_cost,	/* PROCESSOR_INTEL.		*/
+  &lujiazui_cost,	/* PROCESSOR_LUJIAZUI.		*/
+  &yongfeng_cost,	/* PROCESSOR_YONGFENG.		*/
+  &shijidadao_cost,	/* PROCESSOR_SHIJIDADAO.	*/
+  &geode_cost,		/* PROCESSOR_GEODE.		*/
+  &k6_cost,		/* PROCESSOR_K6.		*/
+  &athlon_cost,		/* PROCESSOR_ATHLON.		*/
+  &k8_cost,		/* PROCESSOR_K8.		*/
+  &amdfam10_cost,	/* PROCESSOR_AMDFAM10.		*/
+  &bdver_cost,		/* PROCESSOR_BDVER1.		*/
+  &bdver_cost,		/* PROCESSOR_BDVER2.		*/
+  &bdver_cost,		/* PROCESSOR_BDVER3.		*/
+  &bdver_cost,		/* PROCESSOR_BDVER4.		*/
+  &btver1_cost,		/* PROCESSOR_BTVER1.		*/
+  &btver2_cost,		/* PROCESSOR_BTVER2.		*/
+  &znver1_cost,		/* PROCESSOR_ZNVER1.		*/
+  &znver2_cost,		/* PROCESSOR_ZNVER2.		*/
+  &znver3_cost,		/* PROCESSOR_ZNVER3.		*/
+  &znver4_cost,		/* PROCESSOR_ZNVER4.		*/
+  &znver5_cost		/* PROCESSOR_ZNVER5.		*/
 };
 
 /* Guarantee that the array is aligned with enum processor_type.  */
@@ -850,8 +854,6 @@ ix86_function_specific_restore (struct gcc_options *opts,
   opts->x_ix86_apx_features = ptr->x_ix86_apx_features;
   opts->x_ix86_isa_flags_explicit = ptr->x_ix86_isa_flags_explicit;
   opts->x_ix86_isa_flags2_explicit = ptr->x_ix86_isa_flags2_explicit;
-  opts->x_ix86_no_avx512_explicit = ptr->x_ix86_no_avx512_explicit;
-  opts->x_ix86_no_avx10_1_explicit = ptr->x_ix86_no_avx10_1_explicit;
   opts->x_recip_mask_explicit = ptr->x_recip_mask_explicit;
   opts->x_ix86_arch_string = ptr->x_ix86_arch_string;
   opts->x_ix86_tune_string = ptr->x_ix86_tune_string;
@@ -1123,14 +1125,15 @@ ix86_valid_target_attribute_inner_p (tree fndecl, tree args, char *p_strings[],
     IX86_ATTR_ISA ("sha512", OPT_msha512),
     IX86_ATTR_ISA ("sm4", OPT_msm4),
     IX86_ATTR_ISA ("apxf", OPT_mapxf),
-    IX86_ATTR_ISA ("evex512", OPT_mevex512),
     IX86_ATTR_ISA ("usermsr", OPT_musermsr),
-    IX86_ATTR_ISA ("avx10.1", OPT_mavx10_1_256),
-    IX86_ATTR_ISA ("avx10.1-256", OPT_mavx10_1_256),
-    IX86_ATTR_ISA ("avx10.1-512", OPT_mavx10_1_512),
-    IX86_ATTR_ISA ("avx10.2", OPT_mavx10_2_256),
-    IX86_ATTR_ISA ("avx10.2-256", OPT_mavx10_2_256),
-    IX86_ATTR_ISA ("avx10.2-512", OPT_mavx10_2_512),
+    IX86_ATTR_ISA ("avx10.1", OPT_mavx10_1),
+    IX86_ATTR_ISA ("avx10.2", OPT_mavx10_2),
+    IX86_ATTR_ISA ("amx-avx512", OPT_mamx_avx512),
+    IX86_ATTR_ISA ("amx-tf32", OPT_mamx_tf32),
+    IX86_ATTR_ISA ("amx-transpose", OPT_mamx_transpose),
+    IX86_ATTR_ISA ("amx-fp8", OPT_mamx_fp8),
+    IX86_ATTR_ISA ("movrs", OPT_mmovrs),
+    IX86_ATTR_ISA ("amx-movrs", OPT_mamx_movrs),
 
     /* enum options */
     IX86_ATTR_ENUM ("fpmath=",	OPT_mfpmath_),
@@ -1417,18 +1420,6 @@ ix86_valid_target_attribute_tree (tree fndecl, tree args,
 					    target_clone_attr))
     return error_mark_node;
 
-  /* AVX10.1-256 will enable only 256 bit AVX512F features by setting all
-     AVX512 related ISA flags and not setting EVEX512.  When it is used
-     with avx512 related function attribute, we need to enable 512 bit to
-     align with the command line behavior.  Manually set EVEX512 for this
-     scenario.  */
-  if ((def->x_ix86_isa_flags2 & OPTION_MASK_ISA2_AVX10_1_256)
-      && (opts->x_ix86_isa_flags & OPTION_MASK_ISA_AVX512F)
-      && (opts->x_ix86_isa_flags_explicit & OPTION_MASK_ISA_AVX512F)
-      && !(def->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA2_EVEX512)
-      && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA2_EVEX512))
-    opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_EVEX512;
-
   /* If the changed options are different from the default, rerun
      ix86_option_override_internal, and then save the options away.
      The string options are attribute options, and will be undone
@@ -1439,10 +1430,7 @@ ix86_valid_target_attribute_tree (tree fndecl, tree args,
       || option_strings[IX86_FUNCTION_SPECIFIC_ARCH]
       || option_strings[IX86_FUNCTION_SPECIFIC_TUNE]
       || enum_opts_set.x_ix86_fpmath
-      || enum_opts_set.x_prefer_vector_width_type
-      || (!(def->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA2_AVX10_1_256)
-	  && (opts->x_ix86_isa_flags2_explicit
-	      & OPTION_MASK_ISA2_AVX10_1_256)))
+      || enum_opts_set.x_prefer_vector_width_type)
     {
       /* If we are using the default tune= or arch=, undo the string assigned,
 	 and use the default.  */
@@ -1545,9 +1533,9 @@ ix86_valid_target_attribute_p (tree fndecl,
   tree old_optimize = build_optimization_node (&global_options,
 					       &global_options_set);
 
-  /* Get the optimization options of the current function.  */  
+  /* Get the optimization options of the current function.  */
   tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
- 
+
   if (!func_optimize)
     func_optimize = old_optimize;
 
@@ -2006,7 +1994,7 @@ ix86_option_override_internal (bool main_args_p,
 			       struct gcc_options *opts_set)
 {
   unsigned int i;
-  unsigned HOST_WIDE_INT ix86_arch_mask, avx512_isa_flags, avx512_isa_flags2;
+  unsigned HOST_WIDE_INT ix86_arch_mask;
   const bool ix86_tune_specified = (opts->x_ix86_tune_string != NULL);
 
   /* -mrecip options.  */
@@ -2025,15 +2013,6 @@ ix86_option_override_internal (bool main_args_p,
       { "vec-sqrt",  RECIP_MASK_VEC_SQRT },
     };
 
-  avx512_isa_flags = OPTION_MASK_ISA_AVX512F | OPTION_MASK_ISA_AVX512CD
-    | OPTION_MASK_ISA_AVX512DQ | OPTION_MASK_ISA_AVX512BW
-    | OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_AVX512IFMA
-    | OPTION_MASK_ISA_AVX512VBMI | OPTION_MASK_ISA_AVX512VBMI2
-    | OPTION_MASK_ISA_AVX512VNNI | OPTION_MASK_ISA_AVX512VPOPCNTDQ
-    | OPTION_MASK_ISA_AVX512BITALG;
-  avx512_isa_flags2 = OPTION_MASK_ISA2_AVX512FP16
-    | OPTION_MASK_ISA2_AVX512BF16;
-
   /* Turn off both OPTION_MASK_ABI_64 and OPTION_MASK_ABI_X32 if
      TARGET_64BIT_DEFAULT is true and TARGET_64BIT is false.  */
   if (TARGET_64BIT_DEFAULT && !TARGET_64BIT_P (opts->x_ix86_isa_flags))
@@ -2655,101 +2634,6 @@ ix86_option_override_internal (bool main_args_p,
       &= ~((OPTION_MASK_ISA_BMI | OPTION_MASK_ISA_BMI2 | OPTION_MASK_ISA_TBM)
 	   & ~opts->x_ix86_isa_flags_explicit);
 
-  /* Emit a warning if AVX10.1 options is used with AVX512/EVEX512 options except
-     for the following option combinations:
-     1. Both AVX10.1-512 and AVX512 with 512 bit vector width are enabled with no
-	explicit disable on other AVX512 features.
-     2. Both AVX10.1-256 and AVX512 w/o 512 bit vector width are enabled with no
-	explicit disable on other AVX512 features.
-     3. Both AVX10.1 and AVX512 are disabled.  */
-  if (TARGET_AVX10_1_512_P (opts->x_ix86_isa_flags2))
-    {
-      if (opts->x_ix86_no_avx512_explicit
-	  && (((~(avx512_isa_flags & opts->x_ix86_isa_flags)
-	       & (avx512_isa_flags & opts->x_ix86_isa_flags_explicit)))
-	      || ((~((avx512_isa_flags2 | OPTION_MASK_ISA2_EVEX512)
-		     & opts->x_ix86_isa_flags2)
-		   & ((avx512_isa_flags2 | OPTION_MASK_ISA2_EVEX512)
-		      & opts->x_ix86_isa_flags2_explicit)))))
-	warning (0, "%<-mno-evex512%> or %<-mno-avx512XXX%> cannot disable "
-		    "AVX10 instructions when AVX10.1-512 is available");
-    }
-  else if (TARGET_AVX10_1_256_P (opts->x_ix86_isa_flags2))
-    {
-      if (TARGET_EVEX512_P (opts->x_ix86_isa_flags2)
-	  && (OPTION_MASK_ISA2_EVEX512 & opts->x_ix86_isa_flags2_explicit))
-	{
-	  if (!TARGET_AVX512F_P (opts->x_ix86_isa_flags)
-	      || !(OPTION_MASK_ISA_AVX512F & opts->x_ix86_isa_flags_explicit))
-	    {
-	      /* We should not emit 512 bit instructions under AVX10.1-256
-		 when EVEX512 is enabled w/o any AVX512 features enabled.
-		 Disable EVEX512 bit for this.  */
-	      warning (0, "Using %<-mevex512%> without any AVX512 features "
-			  "enabled together with AVX10.1 only will not enable "
-			  "any AVX512 or AVX10.1-512 features, using 256 as "
-			  "max vector size");
-	      opts->x_ix86_isa_flags2 &= ~OPTION_MASK_ISA2_EVEX512;
-	    }
-	  else
-	    warning (0, "Vector size conflicts between AVX10.1 and AVX512, "
-			"using 512 as max vector size");
-	}
-      else if (TARGET_AVX512F_P (opts->x_ix86_isa_flags)
-	       && !(OPTION_MASK_ISA2_EVEX512
-		    & opts->x_ix86_isa_flags2_explicit))
-	warning (0, "Vector size conflicts between AVX10.1 and AVX512, using "
-		    "512 as max vector size");
-      else if (opts->x_ix86_no_avx512_explicit
-	       && (((~(avx512_isa_flags & opts->x_ix86_isa_flags)
-		    & (avx512_isa_flags & opts->x_ix86_isa_flags_explicit)))
-		   || ((~(avx512_isa_flags2 & opts->x_ix86_isa_flags2)
-			& (avx512_isa_flags2
-			   & opts->x_ix86_isa_flags2_explicit)))))
-	warning (0, "%<-mno-avx512XXX%> cannot disable AVX10 instructions "
-		    "when AVX10 is available");
-    }
-  else if (TARGET_AVX512F_P (opts->x_ix86_isa_flags)
-	   && (OPTION_MASK_ISA_AVX512F & opts->x_ix86_isa_flags_explicit))
-    {
-      if (opts->x_ix86_no_avx10_1_explicit
-	  && ((OPTION_MASK_ISA2_AVX10_1_256 | OPTION_MASK_ISA2_AVX10_1_512)
-	      & opts->x_ix86_isa_flags2_explicit))
-	{
-	  warning (0, "%<-mno-avx10.1, -mno-avx10.1-256, -mno-avx10.1-512%> "
-		      "cannot disable AVX512 instructions when "
-		      "%<-mavx512XXX%>");
-	  /* Reset those unset AVX512 flags set by AVX10 options when AVX10 is
-	     disabled.  */
-	  if (OPTION_MASK_ISA2_AVX10_1_256 & opts->x_ix86_isa_flags2_explicit)
-	    {
-	      opts->x_ix86_isa_flags = (~avx512_isa_flags
-					& opts->x_ix86_isa_flags)
-		| (avx512_isa_flags & opts->x_ix86_isa_flags
-		   & opts->x_ix86_isa_flags_explicit);
-	      opts->x_ix86_isa_flags2 = (~avx512_isa_flags2
-					 & opts->x_ix86_isa_flags2)
-		| (avx512_isa_flags2 & opts->x_ix86_isa_flags2
-		   & opts->x_ix86_isa_flags2_explicit);
-	    }
-	}
-    }
-
-  /* Set EVEX512 if one of the following conditions meets:
-     1. AVX512 is enabled while EVEX512 is not explicitly set/unset.
-     2. AVX10.1-512 is enabled.  */
-  if (TARGET_AVX10_1_512_P (opts->x_ix86_isa_flags2)
-      || (TARGET_AVX512F_P (opts->x_ix86_isa_flags)
-	  && !(opts->x_ix86_isa_flags2_explicit & OPTION_MASK_ISA2_EVEX512)))
-    opts->x_ix86_isa_flags2 |= OPTION_MASK_ISA2_EVEX512;
-
-  /* Enable all AVX512 related ISAs when AVX10.1 is enabled.  */
-  if (TARGET_AVX10_1_256_P (opts->x_ix86_isa_flags2))
-    {
-      opts->x_ix86_isa_flags |= avx512_isa_flags;
-      opts->x_ix86_isa_flags2 |= avx512_isa_flags2;
-    }
-
   /* Validate -mpreferred-stack-boundary= value or default it to
      PREFERRED_STACK_BOUNDARY_DEFAULT.  */
   ix86_preferred_stack_boundary = PREFERRED_STACK_BOUNDARY_DEFAULT;
@@ -2803,8 +2687,8 @@ ix86_option_override_internal (bool main_args_p,
   if (flag_nop_mcount)
     error ("%<-mnop-mcount%> is not compatible with this target");
 #endif
-  if (flag_nop_mcount && flag_pic)
-    error ("%<-mnop-mcount%> is not implemented for %<-fPIC%>");
+  if (flag_nop_mcount && flag_pic && !flag_plt)
+    error ("%<-mnop-mcount%> is not implemented for %<-fno-plt%>");
 
   /* Accept -msseregparm only if at least SSE support is enabled.  */
   if (TARGET_SSEREGPARM_P (opts->x_target_flags)
@@ -2836,7 +2720,7 @@ ix86_option_override_internal (bool main_args_p,
   /* For all chips supporting SSE2, -mfpmath=sse performs better than
      fpmath=387.  The second is however default at many targets since the
      extra 80bit precision of temporaries is considered to be part of ABI.
-     Overwrite the default at least for -ffast-math. 
+     Overwrite the default at least for -ffast-math.
      TODO: -mfpmath=both seems to produce same performing code with bit
      smaller binaries.  It is however not clear if register allocation is
      ready for this setting.
@@ -2861,6 +2745,10 @@ ix86_option_override_internal (bool main_args_p,
 	ix86_veclib_handler = &ix86_veclibabi_acml;
 	break;
 
+      case ix86_veclibabi_type_aocl:
+	ix86_veclib_handler = &ix86_veclibabi_aocl;
+	break;
+
       default:
 	gcc_unreachable ();
       }
@@ -3020,8 +2908,7 @@ ix86_option_override_internal (bool main_args_p,
 	  opts->x_ix86_move_max = opts->x_prefer_vector_width_type;
 	  if (opts_set->x_ix86_move_max == PVW_NONE)
 	    {
-	      if (TARGET_AVX512F_P (opts->x_ix86_isa_flags)
-		  && TARGET_EVEX512_P (opts->x_ix86_isa_flags2))
+	      if (TARGET_AVX512F_P (opts->x_ix86_isa_flags))
 		opts->x_ix86_move_max = PVW_AVX512;
 	      /* Align with vectorizer to avoid potential STLF issue.  */
 	      else if (TARGET_AVX_P (opts->x_ix86_isa_flags))
@@ -3047,8 +2934,7 @@ ix86_option_override_internal (bool main_args_p,
 	  opts->x_ix86_store_max = opts->x_prefer_vector_width_type;
 	  if (opts_set->x_ix86_store_max == PVW_NONE)
 	    {
-	      if (TARGET_AVX512F_P (opts->x_ix86_isa_flags)
-		  && TARGET_EVEX512_P (opts->x_ix86_isa_flags2))
+	      if (TARGET_AVX512F_P (opts->x_ix86_isa_flags))
 		opts->x_ix86_store_max = PVW_AVX512;
 	      /* Align with vectorizer to avoid potential STLF issue.  */
 	      else if (TARGET_AVX_P (opts->x_ix86_isa_flags))
@@ -3345,13 +3231,13 @@ ix86_simd_clone_adjust (struct cgraph_node *node)
     case 'e':
       if (TARGET_PREFER_AVX256)
 	{
-	  if (!TARGET_AVX512F || !TARGET_EVEX512)
-	    str = "avx512f,evex512,prefer-vector-width=512";
+	  if (!TARGET_AVX512F)
+	    str = "avx512f,prefer-vector-width=512";
 	  else
 	    str = "prefer-vector-width=512";
 	}
-      else if (!TARGET_AVX512F || !TARGET_EVEX512)
-	str = "avx512f,evex512";
+      else if (!TARGET_AVX512F)
+	str = "avx512f";
       break;
     default:
       gcc_unreachable ();
@@ -3391,19 +3277,21 @@ ix86_set_func_type (tree fndecl)
      interrupt function in this case.  */
   enum call_saved_registers_type no_callee_saved_registers
     = TYPE_DEFAULT_CALL_SAVED_REGISTERS;
-  if (lookup_attribute ("no_callee_saved_registers",
-			TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))
+  if (lookup_attribute ("preserve_none",
+			     TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))
+    no_callee_saved_registers = TYPE_PRESERVE_NONE;
+  else if ((lookup_attribute ("no_callee_saved_registers",
+			      TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))
+	   || (ix86_noreturn_no_callee_saved_registers
+	       && TREE_THIS_VOLATILE (fndecl)
+	       && optimize
+	       && !optimize_debug
+	       && (TREE_NOTHROW (fndecl) || !flag_exceptions)
+	       && !lookup_attribute ("interrupt",
+				     TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))
+	       && !lookup_attribute ("no_caller_saved_registers",
+				 TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))))
     no_callee_saved_registers = TYPE_NO_CALLEE_SAVED_REGISTERS;
-  else if (ix86_noreturn_no_callee_saved_registers
-	   && TREE_THIS_VOLATILE (fndecl)
-	   && optimize
-	   && !optimize_debug
-	   && (TREE_NOTHROW (fndecl) || !flag_exceptions)
-	   && !lookup_attribute ("interrupt",
-				 TYPE_ATTRIBUTES (TREE_TYPE (fndecl)))
-	   && !lookup_attribute ("no_caller_saved_registers",
-				 TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))
-    no_callee_saved_registers = TYPE_NO_CALLEE_SAVED_REGISTERS_EXCEPT_BP;
 
   if (cfun->machine->func_type == TYPE_UNKNOWN)
     {
@@ -3415,9 +3303,16 @@ ix86_set_func_type (tree fndecl)
 		      "interrupt and naked attributes are not compatible");
 
 	  if (no_callee_saved_registers)
-	    error_at (DECL_SOURCE_LOCATION (fndecl),
-		      "%qs and %qs attributes are not compatible",
-		      "interrupt", "no_callee_saved_registers");
+	    {
+	      const char *attr;
+	      if (no_callee_saved_registers == TYPE_PRESERVE_NONE)
+		attr = "preserve_none";
+	      else
+		attr = "no_callee_saved_registers";
+	      error_at (DECL_SOURCE_LOCATION (fndecl),
+			"%qs and %qs attributes are not compatible",
+			"interrupt", attr);
+	    }
 
 	  int nargs = 0;
 	  for (tree arg = DECL_ARGUMENTS (fndecl);
@@ -3439,21 +3334,13 @@ ix86_set_func_type (tree fndecl)
       else
 	{
 	  cfun->machine->func_type = TYPE_NORMAL;
-	  if (lookup_attribute ("no_caller_saved_registers",
-				TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))
+	  if (no_callee_saved_registers)
+	    cfun->machine->call_saved_registers
+	      = no_callee_saved_registers;
+	  else if (lookup_attribute ("no_caller_saved_registers",
+				     TYPE_ATTRIBUTES (TREE_TYPE (fndecl))))
 	    cfun->machine->call_saved_registers
 	      = TYPE_NO_CALLER_SAVED_REGISTERS;
-	  if (no_callee_saved_registers)
-	    {
-	      if (cfun->machine->call_saved_registers
-		  == TYPE_NO_CALLER_SAVED_REGISTERS)
-		error_at (DECL_SOURCE_LOCATION (fndecl),
-			  "%qs and %qs attributes are not compatible",
-			  "no_caller_saved_registers",
-			  "no_callee_saved_registers");
-	      cfun->machine->call_saved_registers
-		= no_callee_saved_registers;
-	    }
 	}
     }
 }
@@ -3642,11 +3529,21 @@ ix86_set_current_function (tree fndecl)
       || (cfun->machine->call_saved_registers
 	  == TYPE_NO_CALLER_SAVED_REGISTERS))
     {
-      /* Don't allow SSE, MMX nor x87 instructions since they
-	 may change processor state.  */
+      /* Don't allow AVX, AVX512, MMX nor x87 instructions since they
+	 may change processor state.  Don't allow SSE instructions in
+	 exception/interrupt service routines.  */
       const char *isa;
       if (TARGET_SSE)
-	isa = "SSE";
+	{
+	  if (TARGET_AVX512F)
+	    isa = "AVX512";
+	  else if (TARGET_AVX)
+	    isa = "AVX";
+	  else if (cfun->machine->func_type != TYPE_NORMAL)
+	    isa = "SSE";
+	  else
+	    isa = NULL;
+	}
       else if (TARGET_MMX)
 	isa = "MMX/3Dnow";
       else if (TARGET_80387)
@@ -3680,8 +3577,8 @@ char *
 ix86_offload_options (void)
 {
   if (TARGET_LP64)
-    return xstrdup ("-foffload-abi=lp64");
-  return xstrdup ("-foffload-abi=ilp32");
+    return xstrdup ("-foffload-abi=lp64 -foffload-abi-host-opts=-m64");
+  return xstrdup ("-foffload-abi=ilp32 -foffload-abi-host-opts=-m32");
 }
 
 /* Handle "cdecl", "stdcall", "fastcall", "regparm", "thiscall",
@@ -4071,9 +3968,50 @@ ix86_handle_fndecl_attribute (tree *node, tree name, tree args, int,
 }
 
 static tree
-ix86_handle_call_saved_registers_attribute (tree *, tree, tree,
+ix86_handle_call_saved_registers_attribute (tree *node, tree name, tree,
 					    int, bool *)
 {
+  const char *attr1 = nullptr;
+  const char *attr2 = nullptr;
+
+  if (is_attribute_p ("no_callee_saved_registers", name))
+    {
+      /* Disallow preserve_none and no_caller_saved_registers
+	 attributes.  */
+      attr1 = "no_callee_saved_registers";
+      if (lookup_attribute ("preserve_none", TYPE_ATTRIBUTES (*node)))
+	attr2 = "preserve_none";
+      else if (lookup_attribute ("no_caller_saved_registers",
+				 TYPE_ATTRIBUTES (*node)))
+	attr2 = "no_caller_saved_registers";
+    }
+  else if (is_attribute_p ("no_caller_saved_registers", name))
+    {
+      /* Disallow preserve_none and no_callee_saved_registers
+	 attributes.  */
+      attr1 = "no_caller_saved_registers";
+      if (lookup_attribute ("preserve_none", TYPE_ATTRIBUTES (*node)))
+	attr2 = "preserve_none";
+      else if (lookup_attribute ("no_callee_saved_registers",
+				 TYPE_ATTRIBUTES (*node)))
+	attr2 = "no_callee_saved_registers";
+    }
+  else if (is_attribute_p ("preserve_none", name))
+    {
+      /* Disallow no_callee_saved_registers and no_caller_saved_registers
+	 attributes.  */
+      attr1 = "preserve_none";
+      if (lookup_attribute ("no_callee_saved_registers",
+			    TYPE_ATTRIBUTES (*node)))
+	attr2 = "no_caller_saved_registers";
+      else if (lookup_attribute ("no_callee_saved_registers",
+				 TYPE_ATTRIBUTES (*node)))
+	attr2 = "no_callee_saved_registers";
+    }
+
+  if (attr2)
+    error ("%qs and %qs attributes are not compatible", attr1, attr2);
+
   return NULL_TREE;
 }
 
@@ -4235,6 +4173,8 @@ static const attribute_spec ix86_gnu_attributes[] =
     ix86_handle_interrupt_attribute, NULL },
   { "no_caller_saved_registers", 0, 0, false, true, true, false,
     ix86_handle_call_saved_registers_attribute, NULL },
+  { "preserve_none", 0, 0, false, true, true, true,
+    ix86_handle_call_saved_registers_attribute, NULL },
   { "no_callee_saved_registers", 0, 0, false, true, true, true,
     ix86_handle_call_saved_registers_attribute, NULL },
   { "naked", 0, 0, true, false, false, false,
diff --git a/gcc/config/i386/i386-options.h b/gcc/config/i386/i386-options.h
index 0d448ef..0499c08 100644
--- a/gcc/config/i386/i386-options.h
+++ b/gcc/config/i386/i386-options.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 1988-2024 Free Software Foundation, Inc.
+/* Copyright (C) 1988-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -60,6 +60,7 @@ void ix86_simd_clone_adjust (struct cgraph_node *node);
 extern tree (*ix86_veclib_handler) (combined_fn, tree, tree);
 extern tree ix86_veclibabi_svml (combined_fn, tree, tree);
 extern tree ix86_veclibabi_acml (combined_fn, tree, tree);
+extern tree ix86_veclibabi_aocl (combined_fn, tree, tree);
 
 enum ix86_function_specific_strings
 {
diff --git a/gcc/config/i386/i386-opts.h b/gcc/config/i386/i386-opts.h
index c7ec0d9..d47184e 100644
--- a/gcc/config/i386/i386-opts.h
+++ b/gcc/config/i386/i386-opts.h
@@ -1,5 +1,5 @@
 /* Definitions for option handling for IA-32.
-   Copyright (C) 1988-2024 Free Software Foundation, Inc.
+   Copyright (C) 1988-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -29,7 +29,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 enum stringop_alg
 {
 #undef DEF_ALG
-#define DEF_ALG(alg, name) alg, 
+#define DEF_ALG(alg, name) alg,
 
 #include "stringop.def"
 last_alg
@@ -87,7 +87,8 @@ enum asm_dialect {
 enum ix86_veclibabi {
   ix86_veclibabi_type_none,
   ix86_veclibabi_type_svml,
-  ix86_veclibabi_type_acml
+  ix86_veclibabi_type_acml,
+  ix86_veclibabi_type_aocl
 };
 
 enum stack_protector_guard {
diff --git a/gcc/config/i386/i386-passes.def b/gcc/config/i386/i386-passes.def
index a9d350d..06f0288 100644
--- a/gcc/config/i386/i386-passes.def
+++ b/gcc/config/i386/i386-passes.def
@@ -1,5 +1,5 @@
 /* Description of target passes for IA-32 
-   Copyright (C) 2016-2024 Free Software Foundation, Inc.
+   Copyright (C) 2016-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -35,5 +35,6 @@ along with GCC; see the file COPYING3.  If not see
      PR116174.  */
   INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_align_tight_loops);
 
+  INSERT_PASS_AFTER (pass_late_combine, 1, pass_remove_redundant_vector_load);
   INSERT_PASS_AFTER (pass_late_combine, 1, pass_remove_partial_avx_dependency);
   INSERT_PASS_AFTER (pass_rtl_ifcvt, 1, pass_apx_nf_convert);
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index 3a7bc94..69bc0ee 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GCC for IA-32.
-   Copyright (C) 1988-2024 Free Software Foundation, Inc.
+   Copyright (C) 1988-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -164,7 +164,8 @@ extern bool ix86_expand_fp_vec_cmp (rtx[]);
 extern void ix86_expand_sse_movcc (rtx, rtx, rtx, rtx);
 extern void ix86_expand_sse_extend (rtx, rtx, bool);
 extern void ix86_expand_sse_unpack (rtx, rtx, bool, bool);
-extern void ix86_expand_fp_spaceship (rtx, rtx, rtx);
+extern void ix86_expand_fp_spaceship (rtx, rtx, rtx, rtx);
+extern void ix86_expand_int_spaceship (rtx, rtx, rtx, rtx);
 extern bool ix86_expand_int_addcc (rtx[]);
 extern void ix86_expand_carry (rtx arg);
 extern rtx_insn *ix86_expand_call (rtx, rtx, rtx, rtx, rtx, bool);
@@ -185,6 +186,7 @@ extern void ix86_expand_v2di_ashiftrt (rtx[]);
 extern rtx ix86_replace_reg_with_reg (rtx, rtx, rtx);
 extern rtx ix86_find_base_term (rtx);
 extern bool ix86_check_movabs (rtx, int);
+extern bool ix86_check_movs (rtx, int);
 extern bool ix86_check_no_addr_space (rtx);
 extern void ix86_split_idivmod (machine_mode, rtx[], bool);
 extern bool ix86_hardreg_mov_ok (rtx, rtx);
@@ -197,6 +199,7 @@ extern int ix86_attr_length_vex_default (rtx_insn *, bool, bool);
 extern rtx ix86_libcall_value (machine_mode);
 extern bool ix86_function_arg_regno_p (int);
 extern void ix86_asm_output_function_label (FILE *, const char *, tree);
+extern void ix86_asm_output_labelref (FILE *, const char *, const char *);
 extern void ix86_call_abi_override (const_tree);
 extern int ix86_reg_parm_stack_space (const_tree);
 
@@ -257,6 +260,9 @@ extern int ix86_ternlog_idx (rtx op, rtx *args);
 extern bool ix86_ternlog_operand_p (rtx op);
 extern rtx ix86_expand_ternlog (machine_mode mode, rtx op0, rtx op1, rtx op2,
 				int idx, rtx target);
+extern void ix86_expand_vector_sf2bf_with_vec_perm (rtx, rtx);
+extern void ix86_expand_vector_bf2sf_with_vec_perm (rtx, rtx);
+
 
 #ifdef TREE_CODE
 extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int);
@@ -276,6 +282,7 @@ extern tree ix86_valid_target_attribute_tree (tree, tree,
 					      struct gcc_options *,
 					      struct gcc_options *, bool);
 extern unsigned int ix86_get_callcvt (const_tree);
+extern bool ix86_type_no_callee_saved_registers_p (const_tree);
 
 #endif
 
@@ -316,8 +323,6 @@ extern void i386_pe_asm_output_aligned_decl_common (FILE *, tree,
 						    const char *,
 						    HOST_WIDE_INT,
 						    HOST_WIDE_INT);
-extern void i386_pe_asm_lto_start (void);
-extern void i386_pe_asm_lto_end (void);
 extern void i386_pe_start_function (FILE *, const char *, tree);
 extern void i386_pe_end_function (FILE *, const char *, tree);
 extern void i386_pe_end_cold_function (FILE *, const char *, tree);
@@ -369,6 +374,7 @@ extern int asm_preferred_eh_data_format (int, int);
 extern enum attr_cpu ix86_schedule;
 #endif
 
+extern bool ix86_nopic_noplt_attribute_p (rtx call_op);
 extern const char * ix86_output_call_insn (rtx_insn *insn, rtx call_op);
 extern const char * ix86_output_indirect_jmp (rtx call_op);
 extern const char * ix86_output_function_return (bool long_p);
@@ -424,12 +430,21 @@ extern rtl_opt_pass *make_pass_insert_endbr_and_patchable_area
   (gcc::context *);
 extern rtl_opt_pass *make_pass_remove_partial_avx_dependency
   (gcc::context *);
+extern rtl_opt_pass *make_pass_remove_redundant_vector_load
+  (gcc::context *);
 extern rtl_opt_pass *make_pass_apx_nf_convert (gcc::context *);
 extern rtl_opt_pass *make_pass_align_tight_loops (gcc::context *);
 
 extern bool ix86_has_no_direct_extern_access;
 extern bool ix86_rpad_gate ();
 
+extern sbitmap ix86_get_separate_components (void);
+extern sbitmap ix86_components_for_bb (basic_block);
+extern void ix86_disqualify_components (sbitmap, edge, sbitmap, bool);
+extern void ix86_emit_prologue_components (sbitmap);
+extern void ix86_emit_epilogue_components (sbitmap);
+extern void ix86_set_handled_components (sbitmap);
+
 /* In i386-expand.cc.  */
 bool ix86_check_builtin_isa_match (unsigned int, HOST_WIDE_INT*,
 				   HOST_WIDE_INT*);
diff --git a/gcc/config/i386/i386-rust-and-jit.inc b/gcc/config/i386/i386-rust-and-jit.inc
new file mode 100644
index 0000000..998f44c
--- /dev/null
+++ b/gcc/config/i386/i386-rust-and-jit.inc
@@ -0,0 +1,93 @@
+if (TARGET_64BIT)
+  ADD_TARGET_INFO ("target_arch", "x86_64");
+else
+  ADD_TARGET_INFO ("target_arch", "x86");
+
+// features officially "stabilised" in rustc
+if (TARGET_MMX)
+  ADD_TARGET_INFO ("target_feature", "mmx");
+if (TARGET_SSE)
+  ADD_TARGET_INFO ("target_feature", "sse");
+if (TARGET_SSE2)
+  ADD_TARGET_INFO ("target_feature", "sse2");
+if (TARGET_SSE3)
+  ADD_TARGET_INFO ("target_feature", "sse3");
+if (TARGET_SSSE3)
+  ADD_TARGET_INFO ("target_feature", "ssse3");
+if (TARGET_SSE4_1)
+  ADD_TARGET_INFO ("target_feature", "sse4.1");
+if (TARGET_SSE4_2)
+  ADD_TARGET_INFO ("target_feature", "sse4.2");
+if (TARGET_AES)
+  ADD_TARGET_INFO ("target_feature", "aes");
+if (TARGET_SHA)
+  ADD_TARGET_INFO ("target_feature", "sha");
+if (TARGET_AVX)
+  ADD_TARGET_INFO ("target_feature", "avx");
+if (TARGET_AVX2)
+  ADD_TARGET_INFO ("target_feature", "avx2");
+if (TARGET_AVX512F)
+  ADD_TARGET_INFO ("target_feature", "avx512f");
+if (TARGET_AVX512CD)
+  ADD_TARGET_INFO ("target_feature", "avx512cd");
+if (TARGET_AVX512DQ)
+  ADD_TARGET_INFO ("target_feature", "avx512dq");
+if (TARGET_AVX512BW)
+  ADD_TARGET_INFO ("target_feature", "avx512bw");
+if (TARGET_AVX512VL)
+  ADD_TARGET_INFO ("target_feature", "avx512vl");
+if (TARGET_AVX512VBMI)
+  ADD_TARGET_INFO ("target_feature", "avx512vbmi");
+if (TARGET_AVX512IFMA)
+  ADD_TARGET_INFO ("target_feature", "avx512ifma");
+if (TARGET_AVX512VPOPCNTDQ)
+  ADD_TARGET_INFO ("target_feature", "avx512vpopcntdq");
+if (TARGET_FMA)
+  ADD_TARGET_INFO ("target_feature", "fma");
+if (TARGET_RTM)
+  ADD_TARGET_INFO ("target_feature", "rtm");
+if (TARGET_SSE4A)
+  ADD_TARGET_INFO ("target_feature", "sse4a");
+if (TARGET_BMI)
+  {
+    ADD_TARGET_INFO ("target_feature", "bmi1");
+    ADD_TARGET_INFO ("target_feature", "bmi");
+  }
+if (TARGET_BMI2)
+  ADD_TARGET_INFO ("target_feature", "bmi2");
+if (TARGET_LZCNT)
+  ADD_TARGET_INFO ("target_feature", "lzcnt");
+if (TARGET_TBM)
+  ADD_TARGET_INFO ("target_feature", "tbm");
+if (TARGET_POPCNT)
+  ADD_TARGET_INFO ("target_feature", "popcnt");
+if (TARGET_RDRND)
+  {
+    ADD_TARGET_INFO ("target_feature", "rdrand");
+    ADD_TARGET_INFO ("target_feature", "rdrnd");
+  }
+if (TARGET_F16C)
+  ADD_TARGET_INFO ("target_feature", "f16c");
+if (TARGET_RDSEED)
+  ADD_TARGET_INFO ("target_feature", "rdseed");
+if (TARGET_ADX)
+  ADD_TARGET_INFO ("target_feature", "adx");
+if (TARGET_FXSR)
+  ADD_TARGET_INFO ("target_feature", "fxsr");
+if (TARGET_XSAVE)
+  ADD_TARGET_INFO ("target_feature", "xsave");
+if (TARGET_XSAVEOPT)
+  ADD_TARGET_INFO ("target_feature", "xsaveopt");
+if (TARGET_XSAVEC)
+  ADD_TARGET_INFO ("target_feature", "xsavec");
+if (TARGET_XSAVES)
+  ADD_TARGET_INFO ("target_feature", "xsaves");
+if (TARGET_VPCLMULQDQ)
+  {
+    ADD_TARGET_INFO ("target_feature", "pclmulqdq");
+    ADD_TARGET_INFO ("target_feature", "vpclmulqdq");
+  }
+if (TARGET_CMPXCHG16B)
+  ADD_TARGET_INFO ("target_feature", "cmpxchg16b");
+if (TARGET_MOVBE)
+  ADD_TARGET_INFO ("target_feature", "movbe");
diff --git a/gcc/config/i386/i386-rust.cc b/gcc/config/i386/i386-rust.cc
index e6173e6..de00076 100644
--- a/gcc/config/i386/i386-rust.cc
+++ b/gcc/config/i386/i386-rust.cc
@@ -1,5 +1,5 @@
 /* Subroutines for the Rust front end on the x86 architecture.
-   Copyright (C) 2022-2024 Free Software Foundation, Inc.
+   Copyright (C) 2022-2025 Free Software Foundation, Inc.
 
 GCC is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
@@ -29,97 +29,7 @@ along with GCC; see the file COPYING3.  If not see
 void
 ix86_rust_target_cpu_info (void)
 {
-  if (TARGET_64BIT)
-    rust_add_target_info ("target_arch", "x86_64");
-  else
-    rust_add_target_info ("target_arch", "x86");
-
-  // features officially "stabilised" in rustc
-  if (TARGET_MMX)
-    rust_add_target_info ("target_feature", "mmx");
-  if (TARGET_SSE)
-    rust_add_target_info ("target_feature", "sse");
-  if (TARGET_SSE2)
-    rust_add_target_info ("target_feature", "sse2");
-  if (TARGET_SSE3)
-    rust_add_target_info ("target_feature", "sse3");
-  if (TARGET_SSSE3)
-    rust_add_target_info ("target_feature", "ssse3");
-  if (TARGET_SSE4_1)
-    rust_add_target_info ("target_feature", "sse4.1");
-  if (TARGET_SSE4_2)
-    rust_add_target_info ("target_feature", "sse4.2");
-  if (TARGET_AES)
-    rust_add_target_info ("target_feature", "aes");
-  if (TARGET_SHA)
-    rust_add_target_info ("target_feature", "sha");
-  if (TARGET_AVX)
-    rust_add_target_info ("target_feature", "avx");
-  if (TARGET_AVX2)
-    rust_add_target_info ("target_feature", "avx2");
-  if (TARGET_AVX512F)
-    rust_add_target_info ("target_feature", "avx512f");
-  if (TARGET_AVX512CD)
-    rust_add_target_info ("target_feature", "avx512cd");
-  if (TARGET_AVX512DQ)
-    rust_add_target_info ("target_feature", "avx512dq");
-  if (TARGET_AVX512BW)
-    rust_add_target_info ("target_feature", "avx512bw");
-  if (TARGET_AVX512VL)
-    rust_add_target_info ("target_feature", "avx512vl");
-  if (TARGET_AVX512VBMI)
-    rust_add_target_info ("target_feature", "avx512vbmi");
-  if (TARGET_AVX512IFMA)
-    rust_add_target_info ("target_feature", "avx512ifma");
-  if (TARGET_AVX512VPOPCNTDQ)
-    rust_add_target_info ("target_feature", "avx512vpopcntdq");
-  if (TARGET_FMA)
-    rust_add_target_info ("target_feature", "fma");
-  if (TARGET_RTM)
-    rust_add_target_info ("target_feature", "rtm");
-  if (TARGET_SSE4A)
-    rust_add_target_info ("target_feature", "sse4a");
-  if (TARGET_BMI)
-    {
-      rust_add_target_info ("target_feature", "bmi1");
-      rust_add_target_info ("target_feature", "bmi");
-    }
-  if (TARGET_BMI2)
-    rust_add_target_info ("target_feature", "bmi2");
-  if (TARGET_LZCNT)
-    rust_add_target_info ("target_feature", "lzcnt");
-  if (TARGET_TBM)
-    rust_add_target_info ("target_feature", "tbm");
-  if (TARGET_POPCNT)
-    rust_add_target_info ("target_feature", "popcnt");
-  if (TARGET_RDRND)
-    {
-      rust_add_target_info ("target_feature", "rdrand");
-      rust_add_target_info ("target_feature", "rdrnd");
-    }
-  if (TARGET_F16C)
-    rust_add_target_info ("target_feature", "f16c");
-  if (TARGET_RDSEED)
-    rust_add_target_info ("target_feature", "rdseed");
-  if (TARGET_ADX)
-    rust_add_target_info ("target_feature", "adx");
-  if (TARGET_FXSR)
-    rust_add_target_info ("target_feature", "fxsr");
-  if (TARGET_XSAVE)
-    rust_add_target_info ("target_feature", "xsave");
-  if (TARGET_XSAVEOPT)
-    rust_add_target_info ("target_feature", "xsaveopt");
-  if (TARGET_XSAVEC)
-    rust_add_target_info ("target_feature", "xsavec");
-  if (TARGET_XSAVES)
-    rust_add_target_info ("target_feature", "xsaves");
-  if (TARGET_VPCLMULQDQ)
-    {
-      rust_add_target_info ("target_feature", "pclmulqdq");
-      rust_add_target_info ("target_feature", "vpclmulqdq");
-    }
-  if (TARGET_CMPXCHG16B)
-    rust_add_target_info ("target_feature", "cmpxchg16b");
-  if (TARGET_MOVBE)
-    rust_add_target_info ("target_feature", "movbe");
+#define ADD_TARGET_INFO rust_add_target_info
+#include "i386-rust-and-jit.inc"
+#undef ADD_TARGET_INFO
 }
diff --git a/gcc/config/i386/i386-rust.h b/gcc/config/i386/i386-rust.h
index 071bc82..b1bf85a 100644
--- a/gcc/config/i386/i386-rust.h
+++ b/gcc/config/i386/i386-rust.h
@@ -1,5 +1,5 @@
 /* Definitions for the Rust front end on the x86 architecture.
-   Copyright (C) 2022-2024 Free Software Foundation, Inc.
+   Copyright (C) 2022-2025 Free Software Foundation, Inc.
 
 GCC is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 224a78c..313522b 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -1,5 +1,5 @@
 /* Subroutines used for code generation on IA-32.
-   Copyright (C) 1988-2024 Free Software Foundation, Inc.
+   Copyright (C) 1988-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -100,6 +100,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "i386-features.h"
 #include "function-abi.h"
 #include "rtl-error.h"
+#include "gimple-pretty-print.h"
 
 /* This file should be included last.  */
 #include "target-def.h"
@@ -180,7 +181,7 @@ enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
 
 /* The "default" register map used in 32bit mode.  */
 
-int const debugger_register_map[FIRST_PSEUDO_REGISTER] =
+unsigned int const debugger_register_map[FIRST_PSEUDO_REGISTER] =
 {
   /* general regs */
   0, 2, 1, 3, 6, 7, 4, 5,
@@ -211,7 +212,7 @@ int const debugger_register_map[FIRST_PSEUDO_REGISTER] =
 
 /* The "default" register map used in 64bit mode.  */
 
-int const debugger64_register_map[FIRST_PSEUDO_REGISTER] =
+unsigned int const debugger64_register_map[FIRST_PSEUDO_REGISTER] =
 {
   /* general regs */
   0, 1, 2, 3, 4, 5, 6, 7,
@@ -293,7 +294,7 @@ int const debugger64_register_map[FIRST_PSEUDO_REGISTER] =
 	17 for %st(6) (gcc regno = 14)
 	18 for %st(7) (gcc regno = 15)
 */
-int const svr4_debugger_register_map[FIRST_PSEUDO_REGISTER] =
+unsigned int const svr4_debugger_register_map[FIRST_PSEUDO_REGISTER] =
 {
   /* general regs */
   0, 2, 1, 3, 6, 7, 5, 4,
@@ -334,6 +335,14 @@ static int const x86_64_ms_abi_int_parameter_registers[4] =
   CX_REG, DX_REG, R8_REG, R9_REG
 };
 
+/* Similar as Clang's preserve_none function parameter passing.
+   NB: Use DI_REG and SI_REG, see ix86_function_value_regno_p.  */
+
+static int const x86_64_preserve_none_int_parameter_registers[6] =
+{
+  R12_REG, R13_REG, R14_REG, R15_REG, DI_REG, SI_REG
+};
+
 static int const x86_64_int_return_registers[4] =
 {
   AX_REG, DX_REG, DI_REG, SI_REG
@@ -458,6 +467,10 @@ int ix86_arch_specified;
    indirect thunk pushes the return address onto stack, destroying
    red-zone.
 
+   NB: Don't use red-zone for functions with no_caller_saved_registers
+   and 32 GPRs or 16 XMM registers since 128-byte red-zone is too small
+   for 31 GPRs or 15 GPRs + 16 XMM registers.
+
    TODO: If we can reserve the first 2 WORDs, for PUSH and, another
    for CALL, in red-zone, we can allow local indirect jumps with
    indirect thunk.  */
@@ -467,6 +480,9 @@ ix86_using_red_zone (void)
 {
   return (TARGET_RED_ZONE
 	  && !TARGET_64BIT_MS_ABI
+	  && ((!TARGET_APX_EGPR && !TARGET_SSE)
+	      || (cfun->machine->call_saved_registers
+		  != TYPE_NO_CALLER_SAVED_REGISTERS))
 	  && (!cfun->machine->has_local_indirect_jump
 	      || cfun->machine->indirect_branch_type == indirect_branch_keep));
 }
@@ -511,7 +527,7 @@ ix86_conditional_register_usage (void)
 
   /*  See the definition of CALL_USED_REGISTERS in i386.h.  */
   c_mask = CALL_USED_REGISTERS_MASK (TARGET_64BIT_MS_ABI);
-  
+
   CLEAR_HARD_REG_SET (reg_class_contents[(int)CLOBBERED_REGS]);
 
   for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
@@ -578,11 +594,25 @@ ix86_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
 	{
 	  std::swap (*op0, *op1);
 	  *code = (int) scode;
+	  return;
 	}
     }
+
+  /* Swap operands of GTU comparison to canonicalize
+     addcarry/subborrow comparison.  */
+  if (!op0_preserve_value
+      && *code == GTU
+      && GET_CODE (*op0) == PLUS
+      && ix86_carry_flag_operator (XEXP (*op0, 0), VOIDmode)
+      && GET_CODE (XEXP (*op0, 1)) == ZERO_EXTEND
+      && GET_CODE (*op1) == ZERO_EXTEND)
+    {
+      std::swap (*op0, *op1);
+      *code = (int) swap_condition ((enum rtx_code) *code);
+      return;
+    }
 }
 
-
 /* Hook to determine if one function can safely inline another.  */
 
 static bool
@@ -877,6 +907,18 @@ x86_64_elf_unique_section (tree decl, int reloc)
   default_unique_section (decl, reloc);
 }
 
+/* Return true if TYPE has no_callee_saved_registers or preserve_none
+   attribute.  */
+
+bool
+ix86_type_no_callee_saved_registers_p (const_tree type)
+{
+  return (lookup_attribute ("no_callee_saved_registers",
+			    TYPE_ATTRIBUTES (type)) != NULL
+	  || lookup_attribute ("preserve_none",
+			       TYPE_ATTRIBUTES (type)) != NULL);
+}
+
 #ifdef COMMON_ASM_OP
 
 #ifndef LARGECOMM_SECTION_ASM_OP
@@ -998,11 +1040,10 @@ ix86_function_ok_for_sibcall (tree decl, tree exp)
 
   /* Sibling call isn't OK if callee has no callee-saved registers
      and the calling function has callee-saved registers.  */
-  if (cfun->machine->call_saved_registers != TYPE_NO_CALLEE_SAVED_REGISTERS
-      && (cfun->machine->call_saved_registers
-	  != TYPE_NO_CALLEE_SAVED_REGISTERS_EXCEPT_BP)
-      && lookup_attribute ("no_callee_saved_registers",
-			   TYPE_ATTRIBUTES (type)))
+  if ((cfun->machine->call_saved_registers
+       != TYPE_NO_CALLEE_SAVED_REGISTERS)
+      && cfun->machine->call_saved_registers != TYPE_PRESERVE_NONE
+      && ix86_type_no_callee_saved_registers_p (type))
     return false;
 
   /* If outgoing reg parm stack space changes, we cannot do sibcall.  */
@@ -1167,10 +1208,16 @@ ix86_comp_type_attributes (const_tree type1, const_tree type2)
       != ix86_function_regparm (type2, NULL))
     return 0;
 
-  if (lookup_attribute ("no_callee_saved_registers",
-			TYPE_ATTRIBUTES (type1))
-      != lookup_attribute ("no_callee_saved_registers",
-			   TYPE_ATTRIBUTES (type2)))
+  if (ix86_type_no_callee_saved_registers_p (type1)
+      != ix86_type_no_callee_saved_registers_p (type2))
+    return 0;
+
+  /* preserve_none attribute uses a different calling convention is
+     only for 64-bit.  */
+  if (TARGET_64BIT
+      && (lookup_attribute ("preserve_none", TYPE_ATTRIBUTES (type1))
+	  != lookup_attribute ("preserve_none",
+			       TYPE_ATTRIBUTES (type2))))
     return 0;
 
   return 1;
@@ -1532,7 +1579,10 @@ ix86_function_arg_regno_p (int regno)
   if (call_abi == SYSV_ABI && regno == AX_REG)
     return true;
 
-  if (call_abi == MS_ABI)
+  if (cfun
+      && cfun->machine->call_saved_registers == TYPE_PRESERVE_NONE)
+    parm_regs = x86_64_preserve_none_int_parameter_registers;
+  else if (call_abi == MS_ABI)
     parm_regs = x86_64_ms_abi_int_parameter_registers;
   else
     parm_regs = x86_64_int_parameter_registers;
@@ -1695,6 +1745,19 @@ ix86_asm_output_function_label (FILE *out_file, const char *fname,
     }
 }
 
+/* Output a user-defined label.  In AT&T syntax, registers are prefixed
+   with %, so labels require no punctuation.  In Intel syntax, registers
+   are unprefixed, so labels may clash with registers or other operators,
+   and require quoting.  */
+void
+ix86_asm_output_labelref (FILE *file, const char *prefix, const char *label)
+{
+  if (ASSEMBLER_DIALECT == ASM_ATT)
+    fprintf (file, "%s%s", prefix, label);
+  else
+    fprintf (file, "\"%s%s\"", prefix, label);
+}
+
 /* Implementation of call abi switching target hook. Specific to FNDECL
    the specific call register sets are set.  See also
    ix86_conditional_register_usage for more details.  */
@@ -1774,8 +1837,7 @@ ix86_init_pic_reg (void)
       add_reg_note (insn, REG_CFA_FLUSH_QUEUE, NULL_RTX);
     }
 
-  seq = get_insns ();
-  end_sequence ();
+  seq = end_sequence ();
 
   entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
   insert_insn_on_edge (seq, entry_edge);
@@ -1802,6 +1864,7 @@ init_cumulative_args (CUMULATIVE_ARGS *cum,  /* Argument info to initialize */
 
   memset (cum, 0, sizeof (*cum));
 
+  tree preserve_none_type;
   if (fndecl)
     {
       target = cgraph_node::get (fndecl);
@@ -1810,12 +1873,24 @@ init_cumulative_args (CUMULATIVE_ARGS *cum,  /* Argument info to initialize */
 	  target = target->function_symbol ();
 	  local_info_node = cgraph_node::local_info_node (target->decl);
 	  cum->call_abi = ix86_function_abi (target->decl);
+	  preserve_none_type = TREE_TYPE (target->decl);
 	}
       else
-	cum->call_abi = ix86_function_abi (fndecl);
+	{
+	  cum->call_abi = ix86_function_abi (fndecl);
+	  preserve_none_type = TREE_TYPE (fndecl);
+	}
     }
   else
-    cum->call_abi = ix86_function_type_abi (fntype);
+    {
+      cum->call_abi = ix86_function_type_abi (fntype);
+      preserve_none_type = fntype;
+    }
+  cum->preserve_none_abi
+    = (preserve_none_type
+       && (lookup_attribute ("preserve_none",
+			     TYPE_ATTRIBUTES (preserve_none_type))
+	   != nullptr));
 
   cum->caller = caller;
 
@@ -1939,7 +2014,7 @@ init_cumulative_args (CUMULATIVE_ARGS *cum,  /* Argument info to initialize */
 
    The midde-end can't deal with the vector types > 16 bytes.  In this
    case, we return the original mode and warn ABI change if CUM isn't
-   NULL. 
+   NULL.
 
    If INT_RETURN is true, warn ABI change if the vector mode isn't
    available for function return value.  */
@@ -1977,8 +2052,7 @@ type_natural_mode (const_tree type, const CUMULATIVE_ARGS *cum,
 	    if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
 		&& GET_MODE_INNER (mode) == innermode)
 	      {
-		if (size == 64 && (!TARGET_AVX512F || !TARGET_EVEX512)
-		    && !TARGET_IAMCU)
+		if (size == 64 && !TARGET_AVX512F && !TARGET_IAMCU)
 		  {
 		    static bool warnedavx512f;
 		    static bool warnedavx512f_ret;
@@ -3389,9 +3463,15 @@ function_arg_64 (const CUMULATIVE_ARGS *cum, machine_mode mode,
       break;
     }
 
+  const int *parm_regs;
+  if (cum->preserve_none_abi)
+    parm_regs = x86_64_preserve_none_int_parameter_registers;
+  else
+    parm_regs = x86_64_int_parameter_registers;
+
   return construct_container (mode, orig_mode, type, 0, cum->nregs,
 			      cum->sse_nregs,
-			      &x86_64_int_parameter_registers [cum->regno],
+			      &parm_regs[cum->regno],
 			      cum->sse_regno);
 }
 
@@ -4269,7 +4349,7 @@ ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
   if (fntype_or_decl && DECL_P (fntype_or_decl))
     fn = fntype_or_decl;
   fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
-  
+
   if (ix86_function_type_abi (fntype) == MS_ABI)
     {
       if (TARGET_64BIT)
@@ -4387,7 +4467,7 @@ ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
 
 	  /* Unless ABI prescibes otherwise,
 	     MMX/3dNow values are returned in MM0 if available.  */
-	     
+
 	  if (size == 8)
 	    return TARGET_VECT8_RETURNS || !TARGET_MMX;
 
@@ -4401,7 +4481,7 @@ ix86_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
 
 	  /* AVX512F values are returned in ZMM0 if available.  */
 	  if (size == 64)
-	    return !TARGET_AVX512F || !TARGET_EVEX512;
+	    return !TARGET_AVX512F;
 	}
 
       if (mode == XFmode)
@@ -4506,7 +4586,7 @@ ix86_build_builtin_va_list (void)
 
       /* For SYSV_ABI we use an array of one record.  */
       sysv_va_list_type_node = ix86_build_builtin_va_list_64 ();
-	
+
       /* For MS_ABI we use plain pointer to argument area.  */
       tree char_ptr_type = build_pointer_type (char_type_node);
       tree attr = tree_cons (get_identifier ("ms_abi va_list"), NULL_TREE,
@@ -4556,6 +4636,12 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
   if (max > X86_64_REGPARM_MAX)
     max = X86_64_REGPARM_MAX;
 
+  const int *parm_regs;
+  if (cum->preserve_none_abi)
+    parm_regs = x86_64_preserve_none_int_parameter_registers;
+  else
+    parm_regs = x86_64_int_parameter_registers;
+
   for (i = cum->regno; i < max; i++)
     {
       mem = gen_rtx_MEM (word_mode,
@@ -4563,8 +4649,7 @@ setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
       MEM_NOTRAP_P (mem) = 1;
       set_mem_alias_set (mem, set);
       emit_move_insn (mem,
-		      gen_rtx_REG (word_mode,
-				   x86_64_int_parameter_registers[i]));
+		      gen_rtx_REG (word_mode, parm_regs[i]));
     }
 
   if (ix86_varargs_fpr_size)
@@ -4718,8 +4803,7 @@ ix86_va_start (tree valist, rtx nextarg)
 
 	  start_sequence ();
 	  emit_move_insn (reg, gen_rtx_REG (Pmode, scratch_regno));
-	  seq = get_insns ();
-	  end_sequence ();
+	  seq = end_sequence ();
 
 	  push_topmost_sequence ();
 	  emit_insn_after (seq, entry_of_function ());
@@ -4907,13 +4991,31 @@ ix86_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
 
       examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
 
-      need_temp = (!REG_P (container)
+      bool container_in_reg = false;
+      if (REG_P (container))
+	container_in_reg = true;
+      else if (GET_CODE (container) == PARALLEL
+	       && GET_MODE (container) == BLKmode
+	       && XVECLEN (container, 0) == 1)
+	{
+	  /* Check if it is a PARALLEL BLKmode container of an EXPR_LIST
+	     expression in a TImode register.  In this case, temp isn't
+	     needed.  Otherwise, the TImode variable will be put in the
+	     GPR save area which guarantees only 8-byte alignment.   */
+	  rtx x = XVECEXP (container, 0, 0);
+	  if (GET_CODE (x) == EXPR_LIST
+	      && REG_P (XEXP (x, 0))
+	      && XEXP (x, 1) == const0_rtx)
+	    container_in_reg = true;
+	}
+
+      need_temp = (!container_in_reg
 		   && ((needed_intregs && TYPE_ALIGN (type) > 64)
 		       || TYPE_ALIGN (type) > 128));
 
       /* In case we are passing structure, verify that it is consecutive block
          on the register save area.  If not we need to do moves.  */
-      if (!need_temp && !REG_P (container))
+      if (!need_temp && !container_in_reg)
 	{
 	  /* Verify that all registers are strictly consecutive  */
 	  if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
@@ -5141,6 +5243,27 @@ ix86_check_movabs (rtx insn, int opnum)
   return volatile_ok || !MEM_VOLATILE_P (mem);
 }
 
+/* Return true if XVECEXP idx of INSN satisfies MOVS arguments.  */
+bool
+ix86_check_movs (rtx insn, int idx)
+{
+  rtx pat = PATTERN (insn);
+  gcc_assert (GET_CODE (pat) == PARALLEL);
+
+  rtx set = XVECEXP (pat, 0, idx);
+  gcc_assert (GET_CODE (set) == SET);
+
+  rtx dst = SET_DEST (set);
+  gcc_assert (MEM_P (dst));
+
+  rtx src = SET_SRC (set);
+  gcc_assert (MEM_P (src));
+
+  return (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (dst))
+	  && (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (src))
+	      || Pmode == word_mode));
+}
+
 /* Return false if INSN contains a MEM with a non-default address space.  */
 bool
 ix86_check_no_addr_space (rtx insn)
@@ -5317,7 +5440,7 @@ standard_sse_constant_p (rtx x, machine_mode pred_mode)
       switch (GET_MODE_SIZE (mode))
 	{
 	case 64:
-	  if (TARGET_AVX512F && TARGET_EVEX512)
+	  if (TARGET_AVX512F)
 	    return 2;
 	  break;
 	case 32:
@@ -5370,10 +5493,8 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
 	    {
 	      if (TARGET_AVX512VL)
 		return "vpxord\t%x0, %x0, %x0";
-	      else if (TARGET_EVEX512)
-		return "vpxord\t%g0, %g0, %g0";
 	      else
-		gcc_unreachable ();
+		return "vpxord\t%g0, %g0, %g0";
 	    }
 	  return "vpxor\t%x0, %x0, %x0";
 
@@ -5389,19 +5510,15 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
 		{
 		  if (TARGET_AVX512VL)
 		    return "vxorpd\t%x0, %x0, %x0";
-		  else if (TARGET_EVEX512)
-		    return "vxorpd\t%g0, %g0, %g0";
 		  else
-		    gcc_unreachable ();
+		    return "vxorpd\t%g0, %g0, %g0";
 		}
 	      else
 		{
 		  if (TARGET_AVX512VL)
 		    return "vpxorq\t%x0, %x0, %x0";
-		  else if (TARGET_EVEX512)
-		    return "vpxorq\t%g0, %g0, %g0";
 		  else
-		    gcc_unreachable ();
+		    return "vpxorq\t%g0, %g0, %g0";
 		}
 	    }
 	  return "vxorpd\t%x0, %x0, %x0";
@@ -5418,19 +5535,15 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
 		{
 		  if (TARGET_AVX512VL)
 		    return "vxorps\t%x0, %x0, %x0";
-		  else if (TARGET_EVEX512)
-		    return "vxorps\t%g0, %g0, %g0";
 		  else
-		    gcc_unreachable ();
+		    return "vxorps\t%g0, %g0, %g0";
 		}
 	      else
 		{
 		  if (TARGET_AVX512VL)
 		    return "vpxord\t%x0, %x0, %x0";
-		  else if (TARGET_EVEX512)
-		    return "vpxord\t%g0, %g0, %g0";
 		  else
-		    gcc_unreachable ();
+		    return "vpxord\t%g0, %g0, %g0";
 		}
 	    }
 	  return "vxorps\t%x0, %x0, %x0";
@@ -5445,13 +5558,13 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
 	       && float_vector_all_ones_operand (x, mode)))
     {
       enum attr_mode insn_mode = get_attr_mode (insn);
-      
+
       switch (insn_mode)
 	{
 	case MODE_XI:
 	case MODE_V8DF:
 	case MODE_V16SF:
-	  gcc_assert (TARGET_AVX512F && TARGET_EVEX512);
+	  gcc_assert (TARGET_AVX512F);
 	  return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
 
 	case MODE_OI:
@@ -5467,10 +5580,8 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
 	    {
 	      if (TARGET_AVX512VL)
 		return "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}";
-	      else if (TARGET_EVEX512)
-		return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
 	      else
-		gcc_unreachable ();
+		return "vpternlogd\t{$0xFF, %g0, %g0, %g0|%g0, %g0, %g0, 0xFF}";
 	    }
 	  return (TARGET_AVX
 		  ? "vpcmpeqd\t%0, %0, %0"
@@ -5484,7 +5595,7 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
     {
       if (GET_MODE_SIZE (mode) == 64)
 	{
-	  gcc_assert (TARGET_AVX512F && TARGET_EVEX512);
+	  gcc_assert (TARGET_AVX512F);
 	  return "vpcmpeqd\t%t0, %t0, %t0";
 	}
       else if (GET_MODE_SIZE (mode) == 32)
@@ -5496,7 +5607,7 @@ standard_sse_constant_opcode (rtx_insn *insn, rtx *operands)
     }
   else if (vector_all_ones_zero_extend_quarter_operand (x, mode))
     {
-      gcc_assert (TARGET_AVX512F && TARGET_EVEX512);
+      gcc_assert (TARGET_AVX512F);
       return "vpcmpeqd\t%x0, %x0, %x0";
     }
 
@@ -5607,8 +5718,6 @@ ix86_get_ssemov (rtx *operands, unsigned size,
 	  || memory_operand (operands[1], mode))
 	gcc_unreachable ();
       size = 64;
-      /* We need TARGET_EVEX512 to move into zmm register.  */
-      gcc_assert (TARGET_EVEX512);
       switch (type)
 	{
 	case opcode_int:
@@ -5647,7 +5756,7 @@ ix86_get_ssemov (rtx *operands, unsigned size,
 		      : "%vmovaps");
 	  else
 	    opcode = (misaligned_p
-		      ? (TARGET_AVX512BW
+		      ? (TARGET_AVX512BW && evex_reg_p
 			 ? "vmovdqu16"
 			 : "%vmovdqu")
 		      : "%vmovdqa");
@@ -5689,7 +5798,7 @@ ix86_get_ssemov (rtx *operands, unsigned size,
 		      : "%vmovaps");
 	  else
 	    opcode = (misaligned_p
-		      ? (TARGET_AVX512BW
+		      ? (TARGET_AVX512BW && evex_reg_p
 			 ? "vmovdqu8"
 			 : "%vmovdqu")
 		      : "%vmovdqa");
@@ -5709,7 +5818,7 @@ ix86_get_ssemov (rtx *operands, unsigned size,
 		      : "%vmovaps");
 	  else
 	    opcode = (misaligned_p
-		      ? (TARGET_AVX512BW
+		      ? (TARGET_AVX512BW && evex_reg_p
 			 ? "vmovdqu16"
 			 : "%vmovdqu")
 		      : "%vmovdqa");
@@ -5965,7 +6074,7 @@ ix86_frame_pointer_required (void)
   /* SSE saves require frame-pointer when stack is misaligned.  */
   if (TARGET_64BIT_MS_ABI && ix86_incoming_stack_boundary < 128)
     return true;
-  
+
   /* In ix86_option_override_internal, TARGET_OMIT_LEAF_FRAME_POINTER
      turns off the frame pointer by default.  Turn it back on now if
      we've not got a leaf function.  */
@@ -6417,7 +6526,7 @@ output_set_got (rtx dest, rtx label)
 
   xops[0] = dest;
 
-  if (TARGET_VXWORKS_RTP && flag_pic)
+  if (TARGET_VXWORKS_GOTTPIC && TARGET_VXWORKS_RTP && flag_pic)
     {
       /* Load (*VXWORKS_GOTT_BASE) into the PIC register.  */
       xops[2] = gen_rtx_MEM (Pmode,
@@ -6561,8 +6670,8 @@ gen_push2 (rtx mem, rtx reg1, rtx reg2, bool ppx_p = false)
   if (REG_P (reg2) && GET_MODE (reg2) != word_mode)
     reg2 = gen_rtx_REG (word_mode, REGNO (reg2));
 
-  return ppx_p ? gen_push2p_di (mem, reg1, reg2):
-		 gen_push2_di (mem, reg1, reg2);
+  return ppx_p ? gen_push2p_di (mem, reg1, reg2)
+	       : gen_push2_di (mem, reg1, reg2);
 }
 
 /* Return >= 0 if there is an unused call-clobbered register available
@@ -6662,9 +6771,7 @@ ix86_save_reg (unsigned int regno, bool maybe_eh_return, bool ignore_outlined)
 		  || !frame_pointer_needed));
 
     case TYPE_NO_CALLEE_SAVED_REGISTERS:
-      return false;
-
-    case TYPE_NO_CALLEE_SAVED_REGISTERS_EXCEPT_BP:
+    case TYPE_PRESERVE_NONE:
       if (regno != HARD_FRAME_POINTER_REGNUM)
 	return false;
       break;
@@ -6741,7 +6848,9 @@ ix86_nsaved_sseregs (void)
   int nregs = 0;
   int regno;
 
-  if (!TARGET_64BIT_MS_ABI)
+  if (!TARGET_64BIT_MS_ABI
+      && (cfun->machine->call_saved_registers
+	  != TYPE_NO_CALLER_SAVED_REGISTERS))
     return 0;
   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
     if (SSE_REGNO_P (regno) && ix86_save_reg (regno, true, true))
@@ -6849,6 +6958,26 @@ ix86_pro_and_epilogue_can_use_push2pop2 (int nregs)
 	 && (nregs + aligned) >= 3;
 }
 
+/* Check if push/pop should be used to save/restore registers.  */
+static bool
+save_regs_using_push_pop (HOST_WIDE_INT to_allocate)
+{
+  return ((!to_allocate && cfun->machine->frame.nregs <= 1)
+	  || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000))
+	  /* If static stack checking is enabled and done with probes,
+	     the registers need to be saved before allocating the frame.  */
+	  || flag_stack_check == STATIC_BUILTIN_STACK_CHECK
+	  /* If stack clash probing needs a loop, then it needs a
+	     scratch register.  But the returned register is only guaranteed
+	     to be safe to use after register saves are complete.  So if
+	     stack clash protections are enabled and the allocated frame is
+	     larger than the probe interval, then use pushes to save
+	     callee saved registers.  */
+	  || (flag_stack_clash_protection
+	      && !ix86_target_stack_probe ()
+	      && to_allocate > get_probe_interval ()));
+}
+
 /* Fill structure ix86_frame about frame of currently computed function.  */
 
 static void
@@ -6929,12 +7058,18 @@ ix86_compute_frame_layout (void)
   gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
   gcc_assert (preferred_alignment <= stack_alignment_needed);
 
-  /* The only ABI saving SSE regs should be 64-bit ms_abi.  */
-  gcc_assert (TARGET_64BIT || !frame->nsseregs);
+  /* The only ABI saving SSE regs should be 64-bit ms_abi or with
+     no_caller_saved_registers attribue.  */
+  gcc_assert (TARGET_64BIT
+	      || (cfun->machine->call_saved_registers
+		  == TYPE_NO_CALLER_SAVED_REGISTERS)
+	      || !frame->nsseregs);
   if (TARGET_64BIT && m->call_ms2sysv)
     {
       gcc_assert (stack_alignment_needed >= 16);
-      gcc_assert (!frame->nsseregs);
+      gcc_assert ((cfun->machine->call_saved_registers
+		   == TYPE_NO_CALLER_SAVED_REGISTERS)
+		  || !frame->nsseregs);
     }
 
   /* For SEH we have to limit the amount of code movement into the prologue.
@@ -7133,25 +7268,13 @@ ix86_compute_frame_layout (void)
   /* Size prologue needs to allocate.  */
   to_allocate = offset - frame->sse_reg_save_offset;
 
-  if ((!to_allocate && frame->nregs <= 1)
-      || (TARGET_64BIT && to_allocate >= HOST_WIDE_INT_C (0x80000000))
-       /* If static stack checking is enabled and done with probes,
-	  the registers need to be saved before allocating the frame.  */
-      || flag_stack_check == STATIC_BUILTIN_STACK_CHECK
-      /* If stack clash probing needs a loop, then it needs a
-	 scratch register.  But the returned register is only guaranteed
-	 to be safe to use after register saves are complete.  So if
-	 stack clash protections are enabled and the allocated frame is
-	 larger than the probe interval, then use pushes to save
-	 callee saved registers.  */
-      || (flag_stack_clash_protection
-	  && !ix86_target_stack_probe ()
-	  && to_allocate > get_probe_interval ()))
+  if (save_regs_using_push_pop (to_allocate))
     frame->save_regs_using_mov = false;
 
   if (ix86_using_red_zone ()
       && crtl->sp_is_unchanging
       && crtl->is_leaf
+      && !cfun->machine->asm_redzone_clobber_seen
       && !ix86_pc_thunk_call_expanded
       && !ix86_current_function_calls_tls_descriptor)
     {
@@ -7603,7 +7726,9 @@ ix86_emit_save_regs_using_mov (HOST_WIDE_INT cfa_offset)
   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
     if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
       {
-        ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
+	/* Skip registers, already processed by shrink wrap separate.  */
+	if (!cfun->machine->reg_is_wrapped_separately[regno])
+	  ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
 	cfa_offset -= UNITS_PER_WORD;
       }
 }
@@ -7696,8 +7821,15 @@ pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset,
 	add_frame_related_expr = true;
     }
 
-  insn = emit_insn (gen_pro_epilogue_adjust_stack_add
-		    (Pmode, dest, src, addend));
+  /*  Shrink wrap separate may insert prologue between TEST and JMP.  In order
+      not to affect EFlags, emit add without reg clobbering.  */
+  if (crtl->shrink_wrapped_separate)
+    insn = emit_insn (gen_pro_epilogue_adjust_stack_add_nocc
+		      (Pmode, dest, src, addend));
+  else
+    insn = emit_insn (gen_pro_epilogue_adjust_stack_add
+		      (Pmode, dest, src, addend));
+
   if (style >= 0)
     ix86_add_queued_cfa_restore_notes (insn);
 
@@ -7881,6 +8013,15 @@ ix86_update_stack_boundary (void)
   if (ix86_tls_descriptor_calls_expanded_in_cfun
       && crtl->preferred_stack_boundary < 128)
     crtl->preferred_stack_boundary = 128;
+
+  /* For 32-bit MS ABI, both the incoming and preferred stack boundaries
+     are 32 bits, but if force_align_arg_pointer is specified, it should
+     prefer 128 bits for a backward-compatibility reason, which is also
+     what the doc suggests.  */
+  if (lookup_attribute ("force_align_arg_pointer",
+			TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))
+      && crtl->preferred_stack_boundary < 128)
+    crtl->preferred_stack_boundary = 128;
 }
 
 /* Handle the TARGET_GET_DRAP_RTX hook.  Return NULL if no DRAP is
@@ -7911,8 +8052,7 @@ ix86_get_drap_rtx (void)
 
       start_sequence ();
       drap_vreg = copy_to_reg (arg_ptr);
-      seq = get_insns ();
-      end_sequence ();
+      seq = end_sequence ();
 
       insn = emit_insn_before (seq, NEXT_INSN (entry_of_function ()));
       if (!optimize)
@@ -8433,6 +8573,128 @@ output_probe_stack_range (rtx reg, rtx end)
   return "";
 }
 
+/* Data passed to ix86_update_stack_alignment.  */
+struct stack_access_data
+{
+  /* The stack access register.  */
+  const_rtx reg;
+  /* Pointer to stack alignment.  */
+  unsigned int *stack_alignment;
+};
+
+/* Update the maximum stack slot alignment from memory alignment in PAT.  */
+
+static void
+ix86_update_stack_alignment (rtx, const_rtx pat, void *data)
+{
+  /* This insn may reference stack slot.  Update the maximum stack slot
+     alignment if the memory is referenced by the stack access register. */
+  stack_access_data *p = (stack_access_data *) data;
+
+  subrtx_iterator::array_type array;
+  FOR_EACH_SUBRTX (iter, array, pat, ALL)
+    {
+      auto op = *iter;
+      if (MEM_P (op))
+	{
+	  if (reg_mentioned_p (p->reg, XEXP (op, 0)))
+	    {
+	      unsigned int alignment = MEM_ALIGN (op);
+
+	      if (alignment > *p->stack_alignment)
+		*p->stack_alignment = alignment;
+	      break;
+	    }
+	  else
+	    iter.skip_subrtxes ();
+	}
+    }
+}
+
+/* Helper function for ix86_find_all_reg_uses.  */
+
+static void
+ix86_find_all_reg_uses_1 (HARD_REG_SET &regset,
+			  rtx set, unsigned int regno,
+			  auto_bitmap &worklist)
+{
+  rtx dest = SET_DEST (set);
+
+  if (!REG_P (dest))
+    return;
+
+  /* Reject non-Pmode modes.  */
+  if (GET_MODE (dest) != Pmode)
+    return;
+
+  unsigned int dst_regno = REGNO (dest);
+
+  if (TEST_HARD_REG_BIT (regset, dst_regno))
+    return;
+
+  const_rtx src = SET_SRC (set);
+
+  subrtx_iterator::array_type array;
+  FOR_EACH_SUBRTX (iter, array, src, ALL)
+    {
+      auto op = *iter;
+
+      if (MEM_P (op))
+	iter.skip_subrtxes ();
+
+      if (REG_P (op) && REGNO (op) == regno)
+	{
+	  /* Add this register to register set.  */
+	  add_to_hard_reg_set (&regset, Pmode, dst_regno);
+	  bitmap_set_bit (worklist, dst_regno);
+	  break;
+	}
+    }
+}
+
+/* Find all registers defined with register REGNO.  */
+
+static void
+ix86_find_all_reg_uses (HARD_REG_SET &regset,
+			unsigned int regno, auto_bitmap &worklist)
+{
+  for (df_ref ref = DF_REG_USE_CHAIN (regno);
+       ref != NULL;
+       ref = DF_REF_NEXT_REG (ref))
+    {
+      if (DF_REF_IS_ARTIFICIAL (ref))
+	continue;
+
+      rtx_insn *insn = DF_REF_INSN (ref);
+
+      if (!NONJUMP_INSN_P (insn))
+	continue;
+
+      unsigned int ref_regno = DF_REF_REGNO (ref);
+
+      rtx set = single_set (insn);
+      if (set)
+	{
+	  ix86_find_all_reg_uses_1 (regset, set,
+				    ref_regno, worklist);
+	  continue;
+	}
+
+      rtx pat = PATTERN (insn);
+      if (GET_CODE (pat) != PARALLEL)
+	continue;
+
+      for (int i = 0; i < XVECLEN (pat, 0); i++)
+	{
+	  rtx exp = XVECEXP (pat, 0, i);
+
+	  if (GET_CODE (exp) == SET)
+	    ix86_find_all_reg_uses_1 (regset, exp,
+				      ref_regno, worklist);
+	}
+    }
+}
+
 /* Set stack_frame_required to false if stack frame isn't required.
    Update STACK_ALIGNMENT to the largest alignment, in bits, of stack
    slot used if stack frame is required and CHECK_STACK_SLOT is true.  */
@@ -8451,10 +8713,6 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
   add_to_hard_reg_set (&set_up_by_prologue, Pmode,
 		       HARD_FRAME_POINTER_REGNUM);
 
-  /* The preferred stack alignment is the minimum stack alignment.  */
-  if (stack_alignment > crtl->preferred_stack_boundary)
-    stack_alignment = crtl->preferred_stack_boundary;
-
   bool require_stack_frame = false;
 
   FOR_EACH_BB_FN (bb, cfun)
@@ -8466,27 +8724,67 @@ ix86_find_max_used_stack_alignment (unsigned int &stack_alignment,
 				       set_up_by_prologue))
 	  {
 	    require_stack_frame = true;
-
-	    if (check_stack_slot)
-	      {
-		/* Find the maximum stack alignment.  */
-		subrtx_iterator::array_type array;
-		FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
-		  if (MEM_P (*iter)
-		      && (reg_mentioned_p (stack_pointer_rtx,
-					   *iter)
-			  || reg_mentioned_p (frame_pointer_rtx,
-					      *iter)))
-		    {
-		      unsigned int alignment = MEM_ALIGN (*iter);
-		      if (alignment > stack_alignment)
-			stack_alignment = alignment;
-		    }
-	      }
+	    break;
 	  }
     }
 
   cfun->machine->stack_frame_required = require_stack_frame;
+
+  /* Stop if we don't need to check stack slot.  */
+  if (!check_stack_slot)
+    return;
+
+  /* The preferred stack alignment is the minimum stack alignment.  */
+  if (stack_alignment > crtl->preferred_stack_boundary)
+    stack_alignment = crtl->preferred_stack_boundary;
+
+  HARD_REG_SET stack_slot_access;
+  CLEAR_HARD_REG_SET (stack_slot_access);
+
+  /* Stack slot can be accessed by stack pointer, frame pointer or
+     registers defined by stack pointer or frame pointer.  */
+  auto_bitmap worklist;
+
+  add_to_hard_reg_set (&stack_slot_access, Pmode, STACK_POINTER_REGNUM);
+  bitmap_set_bit (worklist, STACK_POINTER_REGNUM);
+
+  if (frame_pointer_needed)
+    {
+      add_to_hard_reg_set (&stack_slot_access, Pmode,
+			   HARD_FRAME_POINTER_REGNUM);
+      bitmap_set_bit (worklist, HARD_FRAME_POINTER_REGNUM);
+    }
+
+  unsigned int regno;
+
+  do
+    {
+      regno = bitmap_clear_first_set_bit (worklist);
+      ix86_find_all_reg_uses (stack_slot_access, regno, worklist);
+    }
+  while (!bitmap_empty_p (worklist));
+
+  hard_reg_set_iterator hrsi;
+  stack_access_data data;
+
+  data.stack_alignment = &stack_alignment;
+
+  EXECUTE_IF_SET_IN_HARD_REG_SET (stack_slot_access, 0, regno, hrsi)
+    for (df_ref ref = DF_REG_USE_CHAIN (regno);
+	 ref != NULL;
+	 ref = DF_REF_NEXT_REG (ref))
+      {
+	if (DF_REF_IS_ARTIFICIAL (ref))
+	  continue;
+
+	rtx_insn *insn = DF_REF_INSN (ref);
+
+	if (!NONJUMP_INSN_P (insn))
+	  continue;
+
+	data.reg = DF_REF_REG (ref);
+	note_stores (insn, ix86_update_stack_alignment, &data);
+      }
 }
 
 /* Finalize stack_realign_needed and frame_pointer_needed flags, which
@@ -8996,11 +9294,22 @@ ix86_expand_prologue (void)
 	 doing this if we have to probe the stack; at least on x86_64 the
 	 stack probe can turn into a call that clobbers a red zone location. */
       else if (ix86_using_red_zone ()
-	       && (! TARGET_STACK_PROBE
-		   || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
+		&& (! TARGET_STACK_PROBE
+		    || frame.stack_pointer_offset < CHECK_STACK_LIMIT))
 	{
+	  HOST_WIDE_INT allocate_offset;
+	  if (crtl->shrink_wrapped_separate)
+	    {
+	      allocate_offset = m->fs.sp_offset - frame.stack_pointer_offset;
+
+	      /* Adjust the total offset at the beginning of the function.  */
+	      pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
+					 GEN_INT (allocate_offset), -1,
+					 m->fs.cfa_reg == stack_pointer_rtx);
+	      m->fs.sp_offset = cfun->machine->frame.stack_pointer_offset;
+	    }
+
 	  ix86_emit_save_regs_using_mov (frame.reg_save_offset);
-	  cfun->machine->red_zone_used = true;
 	  int_registers_saved = true;
 	}
     }
@@ -9578,30 +9887,35 @@ ix86_emit_restore_regs_using_mov (HOST_WIDE_INT cfa_offset,
   for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
     if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, maybe_eh_return, true))
       {
-	rtx reg = gen_rtx_REG (word_mode, regno);
-	rtx mem;
-	rtx_insn *insn;
-
-	mem = choose_baseaddr (cfa_offset, NULL);
-	mem = gen_frame_mem (word_mode, mem);
-	insn = emit_move_insn (reg, mem);
 
-        if (m->fs.cfa_reg == crtl->drap_reg && regno == REGNO (crtl->drap_reg))
+	/* Skip registers, already processed by shrink wrap separate.  */
+	if (!cfun->machine->reg_is_wrapped_separately[regno])
 	  {
-	    /* Previously we'd represented the CFA as an expression
-	       like *(%ebp - 8).  We've just popped that value from
-	       the stack, which means we need to reset the CFA to
-	       the drap register.  This will remain until we restore
-	       the stack pointer.  */
-	    add_reg_note (insn, REG_CFA_DEF_CFA, reg);
-	    RTX_FRAME_RELATED_P (insn) = 1;
+	    rtx reg = gen_rtx_REG (word_mode, regno);
+	    rtx mem;
+	    rtx_insn *insn;
 
-	    /* This means that the DRAP register is valid for addressing.  */
-	    m->fs.drap_valid = true;
-	  }
-	else
-	  ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
+	    mem = choose_baseaddr (cfa_offset, NULL);
+	    mem = gen_frame_mem (word_mode, mem);
+	    insn = emit_move_insn (reg, mem);
 
+	    if (m->fs.cfa_reg == crtl->drap_reg
+		&& regno == REGNO (crtl->drap_reg))
+	      {
+		/* Previously we'd represented the CFA as an expression
+		   like *(%ebp - 8).  We've just popped that value from
+		   the stack, which means we need to reset the CFA to
+		   the drap register.  This will remain until we restore
+		   the stack pointer.  */
+		add_reg_note (insn, REG_CFA_DEF_CFA, reg);
+		RTX_FRAME_RELATED_P (insn) = 1;
+
+		/* DRAP register is valid for addressing.  */
+		m->fs.drap_valid = true;
+	      }
+	    else
+	      ix86_add_cfa_restore_note (NULL, reg, cfa_offset);
+	  }
 	cfa_offset -= UNITS_PER_WORD;
       }
 }
@@ -9880,10 +10194,11 @@ ix86_expand_epilogue (int style)
      less work than reloading sp and popping the register.  */
   else if (!sp_valid_at (frame.hfp_save_offset) && frame.nregs <= 1)
     restore_regs_via_mov = true;
-  else if (TARGET_EPILOGUE_USING_MOVE
-	   && cfun->machine->use_fast_prologue_epilogue
-	   && (frame.nregs > 1
-	       || m->fs.sp_offset != reg_save_offset))
+  else if (crtl->shrink_wrapped_separate
+	   || (TARGET_EPILOGUE_USING_MOVE
+	       && cfun->machine->use_fast_prologue_epilogue
+	       && (frame.nregs > 1
+		   || m->fs.sp_offset != reg_save_offset)))
     restore_regs_via_mov = true;
   else if (frame_pointer_needed
 	   && !frame.nregs
@@ -9897,6 +10212,9 @@ ix86_expand_epilogue (int style)
   else
     restore_regs_via_mov = false;
 
+  if (crtl->shrink_wrapped_separate)
+    gcc_assert (restore_regs_via_mov);
+
   if (restore_regs_via_mov || frame.nsseregs)
     {
       /* Ensure that the entire register save area is addressable via
@@ -9949,6 +10267,7 @@ ix86_expand_epilogue (int style)
       gcc_assert (m->fs.sp_offset == UNITS_PER_WORD);
       gcc_assert (!crtl->drap_reg);
       gcc_assert (!frame.nregs);
+      gcc_assert (!crtl->shrink_wrapped_separate);
     }
   else if (restore_regs_via_mov)
     {
@@ -9963,6 +10282,8 @@ ix86_expand_epilogue (int style)
 	  rtx sa = EH_RETURN_STACKADJ_RTX;
 	  rtx_insn *insn;
 
+	  gcc_assert (!crtl->shrink_wrapped_separate);
+
 	  /* Stack realignment doesn't work with eh_return.  */
 	  if (crtl->stack_realign_needed)
 	    sorry ("Stack realignment not supported with "
@@ -10772,37 +11093,30 @@ ix86_decompose_address (rtx addr, struct ix86_address *out)
 	  addr = XEXP (addr, 0);
 	  if (CONST_INT_P (addr))
 	    return false;
-	}	      
-      else if (GET_CODE (addr) == AND
-	       && const_32bit_mask (XEXP (addr, 1), DImode))
-	{
-	  addr = lowpart_subreg (SImode, XEXP (addr, 0), DImode);
-	  if (addr == NULL_RTX)
-	    return false;
-
-	  if (CONST_INT_P (addr))
-	    return false;
 	}
       else if (GET_CODE (addr) == AND)
 	{
-	  /* For ASHIFT inside AND, combine will not generate
-	     canonical zero-extend. Merge mask for AND and shift_count
-	     to check if it is canonical zero-extend.  */
-	  tmp = XEXP (addr, 0);
 	  rtx mask = XEXP (addr, 1);
-	  if (tmp && GET_CODE(tmp) == ASHIFT)
+	  rtx shift_val;
+
+	  if (const_32bit_mask (mask, DImode)
+	      /* For ASHIFT inside AND, combine will not generate
+		 canonical zero-extend. Merge mask for AND and shift_count
+		 to check if it is canonical zero-extend.  */
+	      || (CONST_INT_P (mask)
+		  && GET_CODE (XEXP (addr, 0)) == ASHIFT
+		  && CONST_INT_P (shift_val = XEXP (XEXP (addr, 0), 1))
+		  && ((UINTVAL (mask)
+		       | ((HOST_WIDE_INT_1U << INTVAL (shift_val)) - 1))
+		      == HOST_WIDE_INT_UC (0xffffffff))))
 	    {
-	      rtx shift_val = XEXP (tmp, 1);
-	      if (CONST_INT_P (mask) && CONST_INT_P (shift_val)
-		  && (((unsigned HOST_WIDE_INT) INTVAL(mask)
-		      | ((HOST_WIDE_INT_1U << INTVAL(shift_val)) - 1))
-		      == 0xffffffff))
-		{
-		  addr = lowpart_subreg (SImode, XEXP (addr, 0),
-					 DImode);
-		}
-	    }
+	      addr = lowpart_subreg (SImode, XEXP (addr, 0), DImode);
+	      if (addr == NULL_RTX)
+		return false;
 
+	      if (CONST_INT_P (addr))
+		return false;
+	    }
 	}
     }
 
@@ -11151,6 +11465,9 @@ ix86_legitimate_constant_p (machine_mode mode, rtx x)
 	    x = XVECEXP (x, 0, 0);
 	    return (GET_CODE (x) == SYMBOL_REF
 		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
+	  case UNSPEC_SECREL32:
+	    x = XVECEXP (x, 0, 0);
+	    return GET_CODE (x) == SYMBOL_REF;
 	  default:
 	    return false;
 	  }
@@ -11198,7 +11515,7 @@ ix86_legitimate_constant_p (machine_mode mode, rtx x)
 	case E_OImode:
 	case E_XImode:
 	  if (!standard_sse_constant_p (x, mode)
-	      && GET_MODE_SIZE (TARGET_AVX512F && TARGET_EVEX512
+	      && GET_MODE_SIZE (TARGET_AVX512F
 				? XImode
 				: (TARGET_AVX
 				   ? OImode
@@ -11287,6 +11604,9 @@ legitimate_pic_operand_p (rtx x)
 	    x = XVECEXP (inner, 0, 0);
 	    return (GET_CODE (x) == SYMBOL_REF
 		    && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
+	  case UNSPEC_SECREL32:
+	    x = XVECEXP (inner, 0, 0);
+	    return GET_CODE (x) == SYMBOL_REF;
 	  case UNSPEC_MACHOPIC_OFFSET:
 	    return legitimate_pic_address_disp_p (x);
 	  default:
@@ -11467,6 +11787,9 @@ legitimate_pic_address_disp_p (rtx disp)
       disp = XVECEXP (disp, 0, 0);
       return (GET_CODE (disp) == SYMBOL_REF
 	      && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
+    case UNSPEC_SECREL32:
+      disp = XVECEXP (disp, 0, 0);
+      return GET_CODE (disp) == SYMBOL_REF;
     }
 
   return false;
@@ -11744,6 +12067,7 @@ ix86_legitimate_address_p (machine_mode, rtx addr, bool strict,
 	  case UNSPEC_INDNTPOFF:
 	  case UNSPEC_NTPOFF:
 	  case UNSPEC_DTPOFF:
+	  case UNSPEC_SECREL32:
 	    break;
 
 	  default:
@@ -11769,7 +12093,8 @@ ix86_legitimate_address_p (machine_mode, rtx addr, bool strict,
 		  || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
 		  || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
 		  || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
-		      && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
+		      && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF
+		      && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_SECREL32))
 		/* Non-constant pic memory reference.  */
 		return false;
 	    }
@@ -11920,7 +12245,7 @@ legitimize_pic_address (rtx orig, rtx reg)
   else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
 	   /* We can't always use @GOTOFF for text labels
 	      on VxWorks, see gotoff_operand.  */
-	   || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
+	   || (TARGET_VXWORKS_VAROFF && GET_CODE (addr) == LABEL_REF))
     {
 #if TARGET_PECOFF
       rtx tmp = legitimize_pe_coff_symbol (addr, true);
@@ -12093,6 +12418,24 @@ get_thread_pointer (machine_mode tp_mode, bool to_reg)
   return tp;
 }
 
+/* Construct the SYMBOL_REF for the _tls_index symbol.  */
+
+static GTY(()) rtx ix86_tls_index_symbol;
+
+#if TARGET_WIN32_TLS
+static rtx
+ix86_tls_index (void)
+{
+  if (!ix86_tls_index_symbol)
+    ix86_tls_index_symbol = gen_rtx_SYMBOL_REF (SImode, "_tls_index");
+
+  if (flag_pic)
+    return gen_rtx_CONST (Pmode, gen_rtx_UNSPEC (Pmode, gen_rtvec (1, ix86_tls_index_symbol), UNSPEC_PCREL));
+  else
+    return ix86_tls_index_symbol;
+}
+#endif
+
 /* Construct the SYMBOL_REF for the tls_get_addr function.  */
 
 static GTY(()) rtx ix86_tls_symbol;
@@ -12151,6 +12494,26 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
   machine_mode tp_mode = Pmode;
   int type;
 
+#if TARGET_WIN32_TLS
+  off = gen_const_mem (SImode, ix86_tls_index ());
+  set_mem_alias_set (off, GOT_ALIAS_SET);
+
+  tp = gen_const_mem (Pmode, GEN_INT (TARGET_64BIT ? 88 : 44));
+  set_mem_addr_space (tp, DEFAULT_TLS_SEG_REG);
+
+  if (TARGET_64BIT)
+    off = convert_to_mode (Pmode, off, 1);
+
+  base = force_reg (Pmode, off);
+  tp = copy_to_mode_reg (Pmode, tp);
+
+  tp = gen_const_mem (Pmode, gen_rtx_PLUS (Pmode, tp, gen_rtx_MULT (Pmode, base, GEN_INT (UNITS_PER_WORD))));
+  set_mem_alias_set (tp, GOT_ALIAS_SET);
+
+  base = force_reg (Pmode, tp);
+
+  return gen_rtx_PLUS (Pmode, base, gen_rtx_CONST (Pmode, gen_rtx_UNSPEC (SImode, gen_rtvec (1, x), UNSPEC_SECREL32)));
+#else
   /* Fall back to global dynamic model if tool chain cannot support local
      dynamic.  */
   if (TARGET_SUN_TLS && !TARGET_64BIT
@@ -12199,13 +12562,13 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
 	  if (TARGET_64BIT)
 	    {
 	      rtx rax = gen_rtx_REG (Pmode, AX_REG);
+	      rtx rdi = gen_rtx_REG (Pmode, DI_REG);
 	      rtx_insn *insns;
 
 	      start_sequence ();
 	      emit_call_insn
-		(gen_tls_global_dynamic_64 (Pmode, rax, x, caddr));
-	      insns = get_insns ();
-	      end_sequence ();
+		(gen_tls_global_dynamic_64 (Pmode, rax, x, caddr, rdi));
+	      insns = end_sequence ();
 
 	      if (GET_MODE (x) != Pmode)
 		x = gen_rtx_ZERO_EXTEND (Pmode, x);
@@ -12253,14 +12616,14 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
 	  if (TARGET_64BIT)
 	    {
 	      rtx rax = gen_rtx_REG (Pmode, AX_REG);
+	      rtx rdi = gen_rtx_REG (Pmode, DI_REG);
 	      rtx_insn *insns;
 	      rtx eqv;
 
 	      start_sequence ();
 	      emit_call_insn
-		(gen_tls_local_dynamic_base_64 (Pmode, rax, caddr));
-	      insns = get_insns ();
-	      end_sequence ();
+		(gen_tls_local_dynamic_base_64 (Pmode, rax, caddr, rdi));
+	      insns = end_sequence ();
 
 	      /* Attach a unique REG_EQUAL, to allow the RTL optimizers to
 		 share the LD_BASE result with other LD model accesses.  */
@@ -12301,17 +12664,6 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
     case TLS_MODEL_INITIAL_EXEC:
       if (TARGET_64BIT)
 	{
-	  if (TARGET_SUN_TLS && !TARGET_X32)
-	    {
-	      /* The Sun linker took the AMD64 TLS spec literally
-		 and can only handle %rax as destination of the
-		 initial executable code sequence.  */
-
-	      dest = gen_reg_rtx (DImode);
-	      emit_insn (gen_tls_initial_exec_64_sun (dest, x));
-	      return dest;
-	    }
-
 	  /* Generate DImode references to avoid %fs:(%reg32)
 	     problems and linker IE->LE relaxation bug.  */
 	  tp_mode = DImode;
@@ -12384,6 +12736,7 @@ legitimize_tls_address (rtx x, enum tls_model model, bool for_mov)
     }
 
   return dest;
+#endif
 }
 
 /* Return true if the TLS address requires insn using integer registers.
@@ -12450,7 +12803,7 @@ ix86_tls_address_pattern_p (rtx op)
 }
 
 /* Rewrite *LOC so that it refers to a default TLS address space.  */
-void
+static void
 ix86_rewrite_tls_address_1 (rtx *loc)
 {
   subrtx_ptr_iterator::array_type array;
@@ -12472,6 +12825,13 @@ ix86_rewrite_tls_address_1 (rtx *loc)
 		  if (GET_CODE (u) == UNSPEC
 		      && XINT (u, 1) == UNSPEC_TP)
 		    {
+		      /* NB: Since address override only applies to the
+			 (reg32) part in fs:(reg32), return if address
+			 override is used.  */
+		      if (Pmode != word_mode
+			  && REG_P (XEXP (*x, 1 - i)))
+			return;
+
 		      addr_space_t as = DEFAULT_TLS_SEG_REG;
 
 		      *x = XEXP (*x, 1 - i);
@@ -12846,6 +13206,9 @@ output_pic_addr_const (FILE *file, rtx x, int code)
 	case UNSPEC_INDNTPOFF:
 	  fputs ("@indntpoff", file);
 	  break;
+	case UNSPEC_SECREL32:
+	  fputs ("@secrel32", file);
+	  break;
 #if TARGET_MACHO
 	case UNSPEC_MACHOPIC_OFFSET:
 	  putc ('-', file);
@@ -12871,7 +13234,11 @@ i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
 {
   fputs (ASM_LONG, file);
   output_addr_const (file, x);
+#if TARGET_WIN32_TLS
+  fputs ("@secrel32", file);
+#else
   fputs ("@dtpoff", file);
+#endif
   switch (size)
     {
     case 4:
@@ -13105,7 +13472,7 @@ ix86_delegitimize_address_1 (rtx x, bool base_term_p)
       else if (base_term_p
 	       && pic_offset_table_rtx
 	       && !TARGET_MACHO
-	       && !TARGET_VXWORKS_RTP)
+	       && !TARGET_VXWORKS_VAROFF)
 	{
 	  rtx tmp = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
 	  tmp = gen_rtx_MINUS (Pmode, copy_rtx (addend), tmp);
@@ -13530,10 +13897,11 @@ print_reg (rtx x, int code, FILE *file)
    H -- print a memory address offset by 8; used for sse high-parts
    Y -- print condition for XOP pcom* instruction.
    V -- print naked full integer register name without %.
+   v -- print segment override prefix
    + -- print a branch hint as 'cs' or 'ds' prefix
    ; -- print a semicolon (after prefixes due to bug in older gas).
    ~ -- print "i" if TARGET_AVX2, "f" otherwise.
-   ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
+   ^ -- print addr32 prefix if Pmode != word_mode
    M -- print addr32 prefix for TARGET_X32 with VSIB address.
    ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
    N -- print maskz if it's constant 0 operand.
@@ -13621,7 +13989,7 @@ ix86_print_operand (FILE *file, rtx x, int code)
 	    case 2:
 	      putc ('w', file);
 	      break;
-  
+
 	    case 4:
 	      putc ('l', file);
 	      break;
@@ -14035,6 +14403,28 @@ ix86_print_operand (FILE *file, rtx x, int code)
 
 	  return;
 
+	case 'v':
+	  if (MEM_P (x))
+	    {
+	      switch (MEM_ADDR_SPACE (x))
+		{
+		case ADDR_SPACE_GENERIC:
+		  break;
+		case ADDR_SPACE_SEG_FS:
+		  fputs ("fs ", file);
+		  break;
+		case ADDR_SPACE_SEG_GS:
+		  fputs ("gs ", file);
+		  break;
+		default:
+		  gcc_unreachable ();
+		}
+	    }
+	  else
+	    output_operand_lossage ("operand is not a memory reference, "
+				    "invalid operand code 'v'");
+	  return;
+
 	case '*':
 	  if (ASSEMBLER_DIALECT == ASM_ATT)
 	    putc ('*', file);
@@ -14109,7 +14499,7 @@ ix86_print_operand (FILE *file, rtx x, int code)
 	  return;
 
 	case '^':
-	  if (TARGET_64BIT && Pmode != word_mode)
+	  if (Pmode != word_mode)
 	    fputs ("addr32 ", file);
 	  return;
 
@@ -14624,6 +15014,10 @@ i386_asm_output_addr_const_extra (FILE *file, rtx x)
       output_addr_const (file, op);
       fputs ("@indntpoff", file);
       break;
+    case UNSPEC_SECREL32:
+      output_addr_const (file, op);
+      fputs ("@secrel32", file);
+      break;
 #if TARGET_MACHO
     case UNSPEC_MACHOPIC_OFFSET:
       output_addr_const (file, op);
@@ -14881,9 +15275,19 @@ ix86_dirflag_mode_needed (rtx_insn *insn)
 static bool
 ix86_check_avx_upper_register (const_rtx exp)
 {
-  return (SSE_REG_P (exp)
-	  && !EXT_REX_SSE_REG_P (exp)
-	  && GET_MODE_BITSIZE (GET_MODE (exp)) > 128);
+  /* construct_container may return a parallel with expr_list
+     which contains the real reg and mode  */
+  subrtx_iterator::array_type array;
+  FOR_EACH_SUBRTX (iter, array, exp, NONCONST)
+    {
+      const_rtx x = *iter;
+      if (SSE_REG_P (x)
+	  && !EXT_REX_SSE_REG_P (x)
+	  && GET_MODE_BITSIZE (GET_MODE (x)) > 128)
+	return true;
+    }
+
+  return false;
 }
 
 /* Check if a 256bit or 512bit AVX register is referenced in stores.   */
@@ -14891,7 +15295,9 @@ ix86_check_avx_upper_register (const_rtx exp)
 static void
 ix86_check_avx_upper_stores (rtx dest, const_rtx, void *data)
 {
-  if (ix86_check_avx_upper_register (dest))
+  if (SSE_REG_P (dest)
+      && !EXT_REX_SSE_REG_P (dest)
+      && GET_MODE_BITSIZE (GET_MODE (dest)) > 128)
     {
       bool *used = (bool *) data;
       *used = true;
@@ -14950,14 +15356,14 @@ ix86_avx_u128_mode_needed (rtx_insn *insn)
       return AVX_U128_CLEAN;
     }
 
-  subrtx_iterator::array_type array;
-
   rtx set = single_set (insn);
   if (set)
     {
       rtx dest = SET_DEST (set);
       rtx src = SET_SRC (set);
-      if (ix86_check_avx_upper_register (dest))
+      if (SSE_REG_P (dest)
+	  && !EXT_REX_SSE_REG_P (dest)
+	  && GET_MODE_BITSIZE (GET_MODE (dest)) > 128)
 	{
 	  /* This is an YMM/ZMM load.  Return AVX_U128_DIRTY if the
 	     source isn't zero.  */
@@ -14968,9 +15374,8 @@ ix86_avx_u128_mode_needed (rtx_insn *insn)
 	}
       else
 	{
-	  FOR_EACH_SUBRTX (iter, array, src, NONCONST)
-	    if (ix86_check_avx_upper_register (*iter))
-	      return AVX_U128_DIRTY;
+	  if (ix86_check_avx_upper_register (src))
+	    return AVX_U128_DIRTY;
 	}
 
       /* This isn't YMM/ZMM load/store.  */
@@ -14981,9 +15386,8 @@ ix86_avx_u128_mode_needed (rtx_insn *insn)
      Hardware changes state only when a 256bit register is written to,
      but we need to prevent the compiler from moving optimal insertion
      point above eventual read from 256bit or 512 bit register.  */
-  FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
-    if (ix86_check_avx_upper_register (*iter))
-      return AVX_U128_DIRTY;
+  if (ix86_check_avx_upper_register (PATTERN (insn)))
+    return AVX_U128_DIRTY;
 
   return AVX_U128_ANY;
 }
@@ -15468,7 +15872,7 @@ ix86_output_addr_diff_elt (FILE *file, int value, int rel)
   gcc_assert (!TARGET_64BIT);
 #endif
   /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand.  */
-  if (TARGET_64BIT || TARGET_VXWORKS_RTP)
+  if (TARGET_64BIT || TARGET_VXWORKS_VAROFF)
     fprintf (file, "%s%s%d-%s%d\n",
 	     directive, LPREFIX, value, LPREFIX, rel);
 #if TARGET_MACHO
@@ -15972,7 +16376,7 @@ ix86_avoid_lea_for_addr (rtx_insn *insn, rtx operands[])
   if (optimize_size > 1
       && parts.scale > 1
       && !parts.base
-      && (!parts.disp || parts.disp == const0_rtx)) 
+      && (!parts.disp || parts.disp == const0_rtx))
     return true;
 
   /* Check we need to optimize.  */
@@ -16176,6 +16580,8 @@ ix86_build_const_vector (machine_mode mode, bool vect, rtx value)
     case E_V32BFmode:
     case E_V16BFmode:
     case E_V8BFmode:
+    case E_V4BFmode:
+    case E_V2BFmode:
       n_elt = GET_MODE_NUNITS (mode);
       v = rtvec_alloc (n_elt);
       scalar_mode = GET_MODE_INNER (mode);
@@ -16215,6 +16621,8 @@ ix86_build_signbit_mask (machine_mode mode, bool vect, bool invert)
     case E_V32BFmode:
     case E_V16BFmode:
     case E_V8BFmode:
+    case E_V4BFmode:
+    case E_V2BFmode:
       vec_mode = mode;
       imode = HImode;
       break;
@@ -16450,6 +16858,13 @@ ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
 	       && GET_CODE (op1) == GEU
 	       && GET_MODE (XEXP (op1, 0)) == CCCmode)
 	return CCCmode;
+      /* Similarly for the comparison of addcarry/subborrow pattern.  */
+      else if (code == LTU
+	       && GET_CODE (op0) == ZERO_EXTEND
+	       && GET_CODE (op1) == PLUS
+	       && ix86_carry_flag_operator (XEXP (op1, 0), VOIDmode)
+	       && GET_CODE (XEXP (op1, 1)) == ZERO_EXTEND)
+	return CCCmode;
       else
 	return CCmode;
     case GTU:			/* CF=0 & ZF=0 */
@@ -16623,6 +17038,11 @@ ix86_fp_compare_code_to_integer (enum rtx_code code)
       return LEU;
     case LTGT:
       return NE;
+    case EQ:
+    case NE:
+      if (TARGET_AVX10_2)
+	return code;
+      /* FALLTHRU.  */
     default:
       return UNKNOWN;
     }
@@ -16676,7 +17096,7 @@ ix86_ifunc_ref_local_ok (void)
    This is currently used only with 64-bit or 32-bit GOT32X ELF targets
    to call the function marked "noplt" indirectly.  */
 
-static bool
+bool
 ix86_nopic_noplt_attribute_p (rtx call_op)
 {
   if (flag_pic || ix86_cmodel == CM_LARGE
@@ -17124,7 +17544,7 @@ ix86_output_call_insn (rtx_insn *insn, rtx call_op)
 	      seh_nop_p = true;
 	      break;
 	    }
-	    
+
 	  /* If we get to another real insn, we don't need the nop.  */
 	  if (INSN_P (i))
 	    break;
@@ -17714,7 +18134,7 @@ ix86_dependencies_evaluation_hook (rtx_insn *head, rtx_insn *tail)
 		       using topological ordering in the region.  */
 		    if (rgn == CONTAINING_RGN (e->src->index)
 			&& BLOCK_TO_BB (bb->index) > BLOCK_TO_BB (e->src->index))
-		      add_dependee_for_func_arg (first_arg, e->src); 
+		      add_dependee_for_func_arg (first_arg, e->src);
 		  }
 	      }
 	    insn = first_arg;
@@ -17850,9 +18270,14 @@ ix86_warn_parameter_passing_abi (cumulative_args_t cum_v, tree type)
   if (cum->decl && !TREE_PUBLIC (cum->decl))
     return;
 
-  const_tree ctx = get_ultimate_context (cum->decl);
-  if (ctx != NULL_TREE
-      && !TRANSLATION_UNIT_WARN_EMPTY_P (ctx))
+  tree decl = cum->decl;
+  if (!decl)
+    /* If we don't know the target, look at the current TU.  */
+    decl = current_function_decl;
+
+  const_tree ctx = get_ultimate_context (decl);
+  if (ctx == NULL_TREE
+      || !TRANSLATION_UNIT_WARN_EMPTY_P (ctx))
     return;
 
   /* If the actual size of the type is zero, then there is no change
@@ -18086,7 +18511,7 @@ ix86_local_alignment (tree exp, machine_mode mode,
      other unit cannot rely on the alignment.
 
      Exclude va_list type.  It is the common case of local array where
-     we cannot benefit from the alignment.  
+     we cannot benefit from the alignment.
 
      TODO: Probably one should optimize for size only when var is not escaping.  */
   if (TARGET_64BIT && optimize_function_for_speed_p (cfun)
@@ -18469,6 +18894,8 @@ ix86_fold_builtin (tree fndecl, int n_args,
 	= (enum ix86_builtins) DECL_MD_FUNCTION_CODE (fndecl);
       enum rtx_code rcode;
       bool is_vshift;
+      enum tree_code tcode;
+      bool is_scalar;
       unsigned HOST_WIDE_INT mask;
 
       switch (fn_code)
@@ -18918,6 +19345,131 @@ ix86_fold_builtin (tree fndecl, int n_args,
 	    }
 	  break;
 
+	case IX86_BUILTIN_MINSS:
+	case IX86_BUILTIN_MINSH_MASK:
+	  tcode = LT_EXPR;
+	  is_scalar = true;
+	  goto do_minmax;
+
+	case IX86_BUILTIN_MAXSS:
+	case IX86_BUILTIN_MAXSH_MASK:
+	  tcode = GT_EXPR;
+	  is_scalar = true;
+	  goto do_minmax;
+
+	case IX86_BUILTIN_MINPS:
+	case IX86_BUILTIN_MINPD:
+	case IX86_BUILTIN_MINPS256:
+	case IX86_BUILTIN_MINPD256:
+	case IX86_BUILTIN_MINPS512:
+	case IX86_BUILTIN_MINPD512:
+	case IX86_BUILTIN_MINPS128_MASK:
+	case IX86_BUILTIN_MINPD128_MASK:
+	case IX86_BUILTIN_MINPS256_MASK:
+	case IX86_BUILTIN_MINPD256_MASK:
+	case IX86_BUILTIN_MINPH128_MASK:
+	case IX86_BUILTIN_MINPH256_MASK:
+	case IX86_BUILTIN_MINPH512_MASK:
+	  tcode = LT_EXPR;
+	  is_scalar = false;
+	  goto do_minmax;
+
+	case IX86_BUILTIN_MAXPS:
+	case IX86_BUILTIN_MAXPD:
+	case IX86_BUILTIN_MAXPS256:
+	case IX86_BUILTIN_MAXPD256:
+	case IX86_BUILTIN_MAXPS512:
+	case IX86_BUILTIN_MAXPD512:
+	case IX86_BUILTIN_MAXPS128_MASK:
+	case IX86_BUILTIN_MAXPD128_MASK:
+	case IX86_BUILTIN_MAXPS256_MASK:
+	case IX86_BUILTIN_MAXPD256_MASK:
+	case IX86_BUILTIN_MAXPH128_MASK:
+	case IX86_BUILTIN_MAXPH256_MASK:
+	case IX86_BUILTIN_MAXPH512_MASK:
+	  tcode = GT_EXPR;
+	  is_scalar = false;
+	do_minmax:
+	  gcc_assert (n_args >= 2);
+	  if (TREE_CODE (args[0]) != VECTOR_CST
+	      || TREE_CODE (args[1]) != VECTOR_CST)
+	    break;
+	  mask = HOST_WIDE_INT_M1U;
+	  if (n_args > 2)
+	    {
+	      gcc_assert (n_args >= 4);
+	      /* This is masked minmax.  */
+	      if (TREE_CODE (args[3]) != INTEGER_CST
+		  || TREE_SIDE_EFFECTS (args[2]))
+		break;
+	      mask = TREE_INT_CST_LOW (args[3]);
+	      unsigned elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
+	      mask |= HOST_WIDE_INT_M1U << elems;
+	      if (mask != HOST_WIDE_INT_M1U
+		  && TREE_CODE (args[2]) != VECTOR_CST)
+		break;
+	      if (n_args >= 5)
+		{
+		  if (!tree_fits_uhwi_p (args[4]))
+		    break;
+		  if (tree_to_uhwi (args[4]) != 4
+		      && tree_to_uhwi (args[4]) != 8)
+		    break;
+		}
+	      if (mask == (HOST_WIDE_INT_M1U << elems))
+		return args[2];
+	    }
+	  /* Punt on NaNs, unless exceptions are disabled.  */
+	  if (HONOR_NANS (args[0])
+	      && (n_args < 5 || tree_to_uhwi (args[4]) != 8))
+	    for (int i = 0; i < 2; ++i)
+	      {
+		unsigned count = vector_cst_encoded_nelts (args[i]);
+		for (unsigned j = 0; j < count; ++j)
+		  if (tree_expr_nan_p (VECTOR_CST_ENCODED_ELT (args[i], j)))
+		    return NULL_TREE;
+	      }
+	  {
+	    tree res = const_binop (tcode,
+				    truth_type_for (TREE_TYPE (args[0])),
+				    args[0], args[1]);
+	    if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST)
+	      break;
+	    res = fold_ternary (VEC_COND_EXPR, TREE_TYPE (args[0]), res,
+				args[0], args[1]);
+	    if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST)
+	      break;
+	    if (mask != HOST_WIDE_INT_M1U)
+	      {
+		unsigned nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
+		vec_perm_builder sel (nelts, nelts, 1);
+		for (unsigned int i = 0; i < nelts; i++)
+		  if (mask & (HOST_WIDE_INT_1U << i))
+		    sel.quick_push (i);
+		  else
+		    sel.quick_push (nelts + i);
+		vec_perm_indices indices (sel, 2, nelts);
+		res = fold_vec_perm (TREE_TYPE (args[0]), res, args[2],
+				     indices);
+		if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST)
+		  break;
+	      }
+	    if (is_scalar)
+	      {
+		unsigned nelts = TYPE_VECTOR_SUBPARTS (TREE_TYPE (args[0]));
+		vec_perm_builder sel (nelts, nelts, 1);
+		sel.quick_push (0);
+		for (unsigned int i = 1; i < nelts; i++)
+		  sel.quick_push (nelts + i);
+		vec_perm_indices indices (sel, 2, nelts);
+		res = fold_vec_perm (TREE_TYPE (args[0]), res, args[0],
+				     indices);
+		if (res == NULL_TREE || TREE_CODE (res) != VECTOR_CST)
+		  break;
+	      }
+	    return res;
+	  }
+
 	default:
 	  break;
 	}
@@ -19463,6 +20015,74 @@ ix86_gimple_fold_builtin (gimple_stmt_iterator *gsi)
       }
       return true;
 
+    case IX86_BUILTIN_MINPS:
+    case IX86_BUILTIN_MINPD:
+    case IX86_BUILTIN_MINPS256:
+    case IX86_BUILTIN_MINPD256:
+    case IX86_BUILTIN_MINPS512:
+    case IX86_BUILTIN_MINPD512:
+    case IX86_BUILTIN_MINPS128_MASK:
+    case IX86_BUILTIN_MINPD128_MASK:
+    case IX86_BUILTIN_MINPS256_MASK:
+    case IX86_BUILTIN_MINPD256_MASK:
+    case IX86_BUILTIN_MINPH128_MASK:
+    case IX86_BUILTIN_MINPH256_MASK:
+    case IX86_BUILTIN_MINPH512_MASK:
+      tcode = LT_EXPR;
+      goto do_minmax;
+
+    case IX86_BUILTIN_MAXPS:
+    case IX86_BUILTIN_MAXPD:
+    case IX86_BUILTIN_MAXPS256:
+    case IX86_BUILTIN_MAXPD256:
+    case IX86_BUILTIN_MAXPS512:
+    case IX86_BUILTIN_MAXPD512:
+    case IX86_BUILTIN_MAXPS128_MASK:
+    case IX86_BUILTIN_MAXPD128_MASK:
+    case IX86_BUILTIN_MAXPS256_MASK:
+    case IX86_BUILTIN_MAXPD256_MASK:
+    case IX86_BUILTIN_MAXPH128_MASK:
+    case IX86_BUILTIN_MAXPH256_MASK:
+    case IX86_BUILTIN_MAXPH512_MASK:
+      tcode = GT_EXPR;
+    do_minmax:
+      gcc_assert (n_args >= 2);
+      /* Without SSE4.1 we often aren't able to pattern match it back to the
+	 desired instruction.  */
+      if (!gimple_call_lhs (stmt) || !optimize || !TARGET_SSE4_1)
+	break;
+      arg0 = gimple_call_arg (stmt, 0);
+      arg1 = gimple_call_arg (stmt, 1);
+      elems = TYPE_VECTOR_SUBPARTS (TREE_TYPE (arg0));
+      /* For masked minmax, only optimize if the mask is all ones.  */
+      if (n_args > 2
+	  && !ix86_masked_all_ones (elems, gimple_call_arg (stmt, 3)))
+	break;
+      if (n_args >= 5)
+	{
+	  tree arg4 = gimple_call_arg (stmt, 4);
+	  if (!tree_fits_uhwi_p (arg4))
+	    break;
+	  if (tree_to_uhwi (arg4) == 4)
+	    /* Ok.  */;
+	  else if (tree_to_uhwi (arg4) != 8)
+	    /* Invalid round argument.  */
+	    break;
+	  else if (HONOR_NANS (arg0))
+	    /* Lowering to comparison would raise exceptions which
+	       shouldn't be raised.  */
+	    break;
+	}
+      {
+	tree type = truth_type_for (TREE_TYPE (arg0));
+	tree cmpres = gimple_build (&stmts, tcode, type, arg0, arg1);
+	gsi_insert_seq_before (gsi, stmts, GSI_SAME_STMT);
+	g = gimple_build_assign (gimple_call_lhs (stmt),
+				 VEC_COND_EXPR, cmpres, arg0, arg1);
+	gsi_replace (gsi, g, false);
+      }
+      return true;
+
     default:
       break;
     }
@@ -19642,6 +20262,148 @@ ix86_veclibabi_acml (combined_fn fn, tree type_out, tree type_in)
   return new_fndecl;
 }
 
+/* Handler for an AOCL-LibM-style interface to
+   a library with vectorized intrinsics.  */
+
+tree
+ix86_veclibabi_aocl (combined_fn fn, tree type_out, tree type_in)
+{
+  char name[20] = "amd_vr";
+  int name_len = 6;
+  tree fntype, new_fndecl, args;
+  unsigned arity;
+  const char *bname;
+  machine_mode el_mode, in_mode;
+  int n, in_n;
+
+  /* AOCL-LibM is 64bits only.  It is also only suitable for unsafe math only
+     as it trades off some accuracy for increased performance.  */
+  if (!TARGET_64BIT
+      || !flag_unsafe_math_optimizations)
+    return NULL_TREE;
+
+  el_mode = TYPE_MODE (TREE_TYPE (type_out));
+  n = TYPE_VECTOR_SUBPARTS (type_out);
+  in_mode = TYPE_MODE (TREE_TYPE (type_in));
+  in_n = TYPE_VECTOR_SUBPARTS (type_in);
+  if (el_mode != in_mode
+      || n != in_n)
+    return NULL_TREE;
+
+  gcc_checking_assert (n > 0);
+
+  /* Decide whether there exists a function for the combination of FN, the mode
+     and the vector width.  Return early if it doesn't.  */
+
+  if (el_mode != DFmode && el_mode != SFmode)
+    return NULL_TREE;
+
+  /* Supported vector widths for given FN and single/double precision.  Zeros
+     are used to fill out unused positions in the arrays.  */
+  static const int supported_n[][2][3] = {
+  /*   Single prec. ,  Double prec.  */
+    { { 16,  0,  0 }, {  2,  4,  8 } }, /* TAN.  */
+    { {  4,  8, 16 }, {  2,  4,  8 } }, /* EXP.  */
+    { {  4,  8, 16 }, {  2,  4,  8 } }, /* EXP2.  */
+    { {  4,  8, 16 }, {  2,  4,  8 } }, /* LOG.  */
+    { {  4,  8, 16 }, {  2,  4,  8 } }, /* LOG2.  */
+    { {  4,  8, 16 }, {  2,  4,  8 } }, /* COS.  */
+    { {  4,  8, 16 }, {  2,  4,  8 } }, /* SIN.  */
+    { {  4,  8, 16 }, {  2,  4,  8 } }, /* POW.  */
+    { {  4,  8, 16 }, {  2,  4,  8 } }, /* ERF.  */
+    { {  4,  8, 16 }, {  2,  8,  0 } }, /* ATAN.  */
+    { {  4,  8, 16 }, {  2,  0,  0 } }, /* LOG10.  */
+    { {  4,  0,  0 }, {  2,  0,  0 } }, /* EXP10.  */
+    { {  4,  0,  0 }, {  2,  0,  0 } }, /* LOG1P.  */
+    { {  4,  8, 16 }, {  8,  0,  0 } }, /* ASIN.  */
+    { {  4, 16,  0 }, {  0,  0,  0 } }, /* ACOS.  */
+    { {  4,  8, 16 }, {  0,  0,  0 } }, /* TANH.  */
+    { {  4,  0,  0 }, {  0,  0,  0 } }, /* EXPM1.  */
+    { {  4,  8,  0 }, {  0,  0,  0 } }, /* COSH.  */
+  };
+
+  /* We cannot simply index the supported_n array with FN since multiple FNs
+     may correspond to a single operation (see the definitions of these
+     CASE_CFN_* macros).  */
+  int i;
+  switch (fn)
+    {
+    CASE_CFN_TAN   :  i = 0; break;
+    CASE_CFN_EXP   :  i = 1; break;
+    CASE_CFN_EXP2  :  i = 2; break;
+    CASE_CFN_LOG   :  i = 3; break;
+    CASE_CFN_LOG2  :  i = 4; break;
+    CASE_CFN_COS   :  i = 5; break;
+    CASE_CFN_SIN   :  i = 6; break;
+    CASE_CFN_POW   :  i = 7; break;
+    CASE_CFN_ERF   :  i = 8; break;
+    CASE_CFN_ATAN  :  i = 9; break;
+    CASE_CFN_LOG10 : i = 10; break;
+    CASE_CFN_EXP10 : i = 11; break;
+    CASE_CFN_LOG1P : i = 12; break;
+    CASE_CFN_ASIN  : i = 13; break;
+    CASE_CFN_ACOS  : i = 14; break;
+    CASE_CFN_TANH  : i = 15; break;
+    CASE_CFN_EXPM1 : i = 16; break;
+    CASE_CFN_COSH  : i = 17; break;
+    default: return NULL_TREE;
+    }
+
+  int j = el_mode == DFmode;
+  bool n_is_supported = false;
+  for (unsigned k = 0; k < 3; k++)
+    if (supported_n[i][j][k] == n)
+      {
+	n_is_supported = true;
+	break;
+      }
+  if (!n_is_supported)
+    return NULL_TREE;
+
+  /* Append the precision and the vector width to the function name we are
+     constructing.  */
+  name[name_len++] = el_mode == DFmode ? 'd' : 's';
+  switch (n)
+    {
+      case 2:
+      case 4:
+      case 8:
+	name[name_len++] = '0' + n;
+	break;
+      case 16:
+	name[name_len++] = '1';
+	name[name_len++] = '6';
+	break;
+      default:
+	gcc_unreachable ();
+    }
+  name[name_len++] = '_';
+
+  /* Append the operation name (steal it from the name of a builtin).  */
+  tree fndecl = mathfn_built_in (el_mode == DFmode
+				 ? double_type_node : float_type_node, fn);
+  bname = IDENTIFIER_POINTER (DECL_NAME (fndecl));
+  sprintf (name + name_len, "%s", bname + 10);
+
+  arity = 0;
+  for (args = DECL_ARGUMENTS (fndecl); args; args = TREE_CHAIN (args))
+    arity++;
+
+  if (arity == 1)
+    fntype = build_function_type_list (type_out, type_in, NULL);
+  else
+    fntype = build_function_type_list (type_out, type_in, type_in, NULL);
+
+  /* Build a function declaration for the vectorized function.  */
+  new_fndecl = build_decl (BUILTINS_LOCATION,
+			   FUNCTION_DECL, get_identifier (name), fntype);
+  TREE_PUBLIC (new_fndecl) = 1;
+  DECL_EXTERNAL (new_fndecl) = 1;
+  TREE_READONLY (new_fndecl) = 1;
+
+  return new_fndecl;
+}
+
 /* Returns a decl of a function that implements scatter store with
    register type VECTYPE and index type INDEX_TYPE and SCALE.
    Return NULL_TREE if it is not available.  */
@@ -19652,14 +20414,10 @@ ix86_vectorize_builtin_scatter (const_tree vectype,
 {
   bool si;
   enum ix86_builtins code;
-  const machine_mode mode = TYPE_MODE (TREE_TYPE (vectype));
 
   if (!TARGET_AVX512F)
     return NULL_TREE;
 
-  if (!TARGET_EVEX512 && GET_MODE_SIZE (mode) == 64)
-    return NULL_TREE;
-
   if (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 2u)
       ? !TARGET_USE_SCATTER_2PARTS
       : (known_eq (TYPE_VECTOR_SUBPARTS (vectype), 4u)
@@ -20215,6 +20973,28 @@ ix86_class_likely_spilled_p (reg_class_t rclass)
   return false;
 }
 
+/* Implement TARGET_CALLEE_SAVE_COST.  */
+
+static int
+ix86_callee_save_cost (spill_cost_type, unsigned int hard_regno, machine_mode,
+		       unsigned int, int mem_cost, const HARD_REG_SET &, bool)
+{
+  /* Account for the fact that push and pop are shorter and do their
+     own allocation and deallocation.  */
+  if (GENERAL_REGNO_P (hard_regno))
+    {
+      /* push is 1 byte while typical spill is 4-5 bytes.
+	 ??? We probably should adjust size costs accordingly.
+	 Costs are relative to reg-reg move that has 2 bytes for 32bit
+	 and 3 bytes otherwise.	 Be sure that no cost table sets cost
+	 to 2, so we end up with 0.  */
+      if (mem_cost <= 2 || optimize_function_for_size_p (cfun))
+	return 1;
+      return mem_cost - 2;
+    }
+  return mem_cost;
+}
+
 /* Return true if a set of DST by the expression SRC should be allowed.
    This prevents complex sets of likely_spilled hard regs before split1.  */
 
@@ -20380,7 +21160,11 @@ ix86_can_change_mode_class (machine_mode from, machine_mode to,
     return true;
 
   /* x87 registers can't do subreg at all, as all values are reformatted
-     to extended precision.  */
+     to extended precision.
+
+     ??? middle-end queries mode changes for ALL_REGS and this makes
+     vec_series_lowpart_p to always return false.  We probably should
+     restrict this to modes supported by i387 and check if it is enabled.  */
   if (MAYBE_FLOAT_CLASS_P (regclass))
     return false;
 
@@ -20755,7 +21539,7 @@ ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
 	  - any of 512-bit wide vector mode
 	  - any scalar mode.  */
       if (TARGET_AVX512F
-	  && ((VALID_AVX512F_REG_OR_XI_MODE (mode) && TARGET_EVEX512)
+	  && ((VALID_AVX512F_REG_OR_XI_MODE (mode))
 	      || VALID_AVX512F_SCALAR_MODE (mode)))
 	return true;
 
@@ -20770,10 +21554,6 @@ ix86_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
       if (EXT_REX_SSE_REGNO_P (regno))
 	return false;
 
-      /* Use pinsrw/pextrw to mov 16-bit data from/to sse to/from integer.  */
-      if (TARGET_SSE2 && mode == HImode)
-	return true;
-
       /* OImode and AVX modes are available only when AVX is enabled.  */
       return ((TARGET_AVX
 	       && VALID_AVX256_REG_OR_OI_MODE (mode))
@@ -20930,19 +21710,20 @@ ix86_modes_tieable_p (machine_mode mode1, machine_mode mode2)
     return mode1 == SFmode;
 
   /* If MODE2 is only appropriate for an SSE register, then tie with
-     any other mode acceptable to SSE registers.  */
-  if (GET_MODE_SIZE (mode2) == 64
-      && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
-    return (GET_MODE_SIZE (mode1) == 64
-	    && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
-  if (GET_MODE_SIZE (mode2) == 32
-      && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
-    return (GET_MODE_SIZE (mode1) == 32
-	    && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
-  if (GET_MODE_SIZE (mode2) == 16
+     any vector modes or scalar floating point modes acceptable to SSE
+     registers, excluding scalar integer modes with SUBREG:
+	(subreg:QI (reg:TI 99) 0))
+	(subreg:HI (reg:TI 99) 0))
+	(subreg:SI (reg:TI 99) 0))
+	(subreg:DI (reg:TI 99) 0))
+     to avoid unnecessary move from SSE register to integer register.
+   */
+  if (GET_MODE_SIZE (mode2) >= 16
+      && (GET_MODE_SIZE (mode1) == GET_MODE_SIZE (mode2)
+	  || ((VECTOR_MODE_P (mode1) || SCALAR_FLOAT_MODE_P (mode1))
+	      && GET_MODE_SIZE (mode1) <= GET_MODE_SIZE (mode2)))
       && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
-    return (GET_MODE_SIZE (mode1) == 16
-	    && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
+    return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
 
   /* If MODE2 is appropriate for an MMX register, then tie
      with any other mode acceptable to MMX registers.  */
@@ -21000,7 +21781,7 @@ ix86_set_reg_reg_cost (machine_mode mode)
 
     case MODE_VECTOR_INT:
     case MODE_VECTOR_FLOAT:
-      if ((TARGET_AVX512F && TARGET_EVEX512 && VALID_AVX512F_REG_MODE (mode))
+      if ((TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
 	  || (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
 	  || (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
 	  || (TARGET_SSE && VALID_SSE_REG_MODE (mode))
@@ -21061,7 +21842,7 @@ ix86_widen_mult_cost (const struct processor_costs *cost,
       /* pmuludq under sse2, pmuldq under sse4.1, for sign_extend,
 	 require extra 4 mul, 4 add, 4 cmp and 2 shift.  */
       if (!TARGET_SSE4_1 && !uns_p)
-	extra_cost = (cost->mulss + cost->addss + cost->sse_op) * 4
+	extra_cost = (cost->mulss + cost->sse_op + cost->sse_op) * 4
 		      + cost->sse_op * 2;
       /* Fallthru.  */
     case V4DImode:
@@ -21084,7 +21865,7 @@ ix86_multiplication_cost (const struct processor_costs *cost,
   if (VECTOR_MODE_P (mode))
     inner_mode = GET_MODE_INNER (mode);
 
-  if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
+  if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
     return inner_mode == DFmode ? cost->mulsd : cost->mulss;
   else if (X87_FLOAT_MODE_P (mode))
     return cost->fmul;
@@ -21111,11 +21892,11 @@ ix86_multiplication_cost (const struct processor_costs *cost,
 	  else if (TARGET_AVX2)
 	    nops += 2;
 	  else if (TARGET_XOP)
-	    extra += cost->sse_load[2];
+	    extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
 	  else
 	    {
 	      nops += 1;
-	      extra += cost->sse_load[2];
+	      extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
 	    }
 	  goto do_qimode;
 
@@ -21134,13 +21915,13 @@ ix86_multiplication_cost (const struct processor_costs *cost,
 	    {
 	      nmults += 1;
 	      nops += 2;
-	      extra += cost->sse_load[2];
+	      extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
 	    }
 	  else
 	    {
 	      nmults += 1;
 	      nops += 4;
-	      extra += cost->sse_load[2];
+	      extra += COSTS_N_INSNS (cost->sse_load[2]) / 2;
 	    }
 	  goto do_qimode;
 
@@ -21153,14 +21934,16 @@ ix86_multiplication_cost (const struct processor_costs *cost,
 	    {
 	      nmults += 1;
 	      nops += 4;
-	      extra += cost->sse_load[3] * 2;
+	      /* 2 loads, so no division by 2.  */
+	      extra += COSTS_N_INSNS (cost->sse_load[3]);
 	    }
 	  goto do_qimode;
 
 	case V64QImode:
 	  nmults = 2;
 	  nops = 9;
-	  extra = cost->sse_load[3] * 2 + cost->sse_load[4] * 2;
+	  /* 2 loads of each size, so no division by 2.  */
+	  extra = COSTS_N_INSNS (cost->sse_load[3] + cost->sse_load[4]);
 
 	do_qimode:
 	  return ix86_vec_cost (mode, cost->mulss * nmults
@@ -21209,7 +21992,7 @@ ix86_division_cost (const struct processor_costs *cost,
   if (VECTOR_MODE_P (mode))
     inner_mode = GET_MODE_INNER (mode);
 
-  if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
+  if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
     return inner_mode == DFmode ? cost->divsd : cost->divss;
   else if (X87_FLOAT_MODE_P (mode))
     return cost->fdiv;
@@ -21253,7 +22036,7 @@ ix86_shift_rotate_cost (const struct processor_costs *cost,
 	    /* Use vpbroadcast.  */
 	    extra = cost->sse_op;
 	  else
-	    extra = cost->sse_load[2];
+	    extra = COSTS_N_INSNS (cost->sse_load[2]) / 2;
 
 	  if (constant_op1)
 	    {
@@ -21284,7 +22067,7 @@ ix86_shift_rotate_cost (const struct processor_costs *cost,
 		 shift with one insn set the cost to prefer paddb.  */
 	      if (constant_op1)
 		{
-		  extra = cost->sse_load[2];
+		  extra = COSTS_N_INSNS (cost->sse_load[2]) / 2;
 		  return ix86_vec_cost (mode, cost->sse_op) + extra;
 		}
 	      else
@@ -21299,7 +22082,9 @@ ix86_shift_rotate_cost (const struct processor_costs *cost,
 	    /* Use vpbroadcast.  */
 	    extra = cost->sse_op;
 	  else
-	    extra = (mode == V16QImode) ? cost->sse_load[2] : cost->sse_load[3];
+	    extra = COSTS_N_INSNS (mode == V16QImode
+				   ? cost->sse_load[2]
+				   : cost->sse_load[3]) / 2;
 
 	  if (constant_op1)
 	    {
@@ -21407,6 +22192,34 @@ ix86_insn_cost (rtx_insn *insn, bool speed)
   return insn_cost + pattern_cost (PATTERN (insn), speed);
 }
 
+/* Return cost of SSE/AVX FP->FP conversion (extensions and truncates).  */
+
+static int
+vec_fp_conversion_cost (const struct processor_costs *cost, int size)
+{
+  if (size < 128)
+    return cost->cvtss2sd;
+  else if (size < 256)
+    {
+      if (TARGET_SSE_SPLIT_REGS)
+	return cost->cvtss2sd * size / 64;
+      return cost->cvtss2sd;
+    }
+  if (size < 512)
+    return cost->vcvtps2pd256;
+  else
+    return cost->vcvtps2pd512;
+}
+
+/* Return true of X is UNSPEC with UNSPEC_PCMP or UNSPEC_UNSIGNED_PCMP.  */
+
+static bool
+unspec_pcmp_p (rtx x)
+{
+  return GET_CODE (x) == UNSPEC
+	 && (XINT (x, 1) == UNSPEC_PCMP || XINT (x, 1) == UNSPEC_UNSIGNED_PCMP);
+}
+
 /* Compute a (partial) cost for rtx X.  Return true if the complete
    cost has been computed, and false if subexpressions should be
    scanned.  In either case, *TOTAL contains the cost result.  */
@@ -21424,9 +22237,9 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
 
   /* Handling different vternlog variants.  */
   if ((GET_MODE_SIZE (mode) == 64
-       ? (TARGET_AVX512F && TARGET_EVEX512)
+       ? TARGET_AVX512F
        : (TARGET_AVX512VL
-	  || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)))
+	  || (TARGET_AVX512F && !TARGET_PREFER_AVX256)))
       && GET_MODE_SIZE (mode) >= 16
       && outer_code_i == SET
       && ternlog_operand (x, mode))
@@ -21480,7 +22293,11 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
     case SYMBOL_REF:
       if (x86_64_immediate_operand (x, VOIDmode))
 	*total = 0;
-     else
+      else if (TARGET_64BIT && x86_64_zext_immediate_operand (x, VOIDmode))
+	/* Consider the zext constants slightly more expensive, as they
+	   can't appear in most instructions.  */
+	*total = 1;
+      else
 	/* movabsq is slightly more expensive than a simple instruction. */
 	*total = COSTS_N_INSNS (1) + 1;
       return true;
@@ -21751,7 +22568,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
 	  return true;
 	}
 
-      if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
+      if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
 	*total = cost->addss;
       else if (X87_FLOAT_MODE_P (mode))
 	*total = cost->fadd;
@@ -21771,8 +22588,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
 	{
 	  /* (ior (not ...) ...) can be a single insn in AVX512.  */
 	  if (GET_CODE (XEXP (x, 0)) == NOT && TARGET_AVX512F
-	      && ((TARGET_EVEX512
-		   && GET_MODE_SIZE (mode) == 64)
+	      && (GET_MODE_SIZE (mode) == 64
 		  || (TARGET_AVX512VL
 		      && (GET_MODE_SIZE (mode) == 32
 			  || GET_MODE_SIZE (mode) == 16))))
@@ -21863,8 +22679,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
 
 	      /* (and (not ...) (not ...)) can be a single insn in AVX512.  */
 	      if (GET_CODE (right) == NOT && TARGET_AVX512F
-		  && ((TARGET_EVEX512
-		       && GET_MODE_SIZE (mode) == 64)
+		  && (GET_MODE_SIZE (mode) == 64
 		      || (TARGET_AVX512VL
 			  && (GET_MODE_SIZE (mode) == 32
 			      || GET_MODE_SIZE (mode) == 16))))
@@ -21934,8 +22749,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
 	{
 	  /* (not (xor ...)) can be a single insn in AVX512.  */
 	  if (GET_CODE (XEXP (x, 0)) == XOR && TARGET_AVX512F
-	      && ((TARGET_EVEX512
-		   && GET_MODE_SIZE (mode) == 64)
+	      && (GET_MODE_SIZE (mode) == 64
 		  || (TARGET_AVX512VL
 		      && (GET_MODE_SIZE (mode) == 32
 			  || GET_MODE_SIZE (mode) == 16))))
@@ -21958,7 +22772,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
       return false;
 
     case NEG:
-      if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
+      if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
 	*total = cost->sse_op;
       else if (X87_FLOAT_MODE_P (mode))
 	*total = cost->fchs;
@@ -22041,7 +22855,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
 	    *total = COSTS_N_BYTES (1);
 	  else if (TARGET_SLOW_STC)
 	    *total = COSTS_N_INSNS (2);
-	  else 
+	  else
 	    *total = COSTS_N_INSNS (1);
 	  return true;
 	}
@@ -22066,24 +22880,46 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
       return false;
 
     case FLOAT_EXTEND:
-      if (!SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
+      /* x87 represents all values extended to 80bit.  */
+      if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
 	*total = 0;
       else
-        *total = ix86_vec_cost (mode, cost->addss);
+	*total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode));
       return false;
 
     case FLOAT_TRUNCATE:
-      if (!SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
+      if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
+	*total = cost->fadd;
+      else
+	*total = vec_fp_conversion_cost (cost, GET_MODE_BITSIZE (mode));
+      return false;
+    case FLOAT:
+    case UNSIGNED_FLOAT:
+      if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
+	/* TODO: We do not have cost tables for x87.  */
+	*total = cost->fadd;
+      else if (VECTOR_MODE_P (mode))
+	*total = ix86_vec_cost (mode, cost->cvtpi2ps);
+      else
+	*total = cost->cvtsi2ss;
+      return false;
+
+    case FIX:
+    case UNSIGNED_FIX:
+      if (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
+	/* TODO: We do not have cost tables for x87.  */
 	*total = cost->fadd;
+      else if (VECTOR_MODE_P (mode))
+	*total = ix86_vec_cost (mode, cost->cvtps2pi);
       else
-        *total = ix86_vec_cost (mode, cost->addss);
+	*total = cost->cvtss2si;
       return false;
 
     case ABS:
       /* SSE requires memory load for the constant operand. It may make
 	 sense to account for this.  Of course the constant operand may or
 	 may not be reused. */
-      if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
+      if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
 	*total = cost->sse_op;
       else if (X87_FLOAT_MODE_P (mode))
 	*total = cost->fabs;
@@ -22094,7 +22930,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
       return false;
 
     case SQRT:
-      if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
+      if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
 	*total = mode == SFmode ? cost->sqrtss : cost->sqrtsd;
       else if (X87_FLOAT_MODE_P (mode))
 	*total = cost->fsqrt;
@@ -22137,13 +22973,41 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
 	}
       return false;
 
-    case VEC_SELECT:
     case VEC_CONCAT:
       /* ??? Assume all of these vector manipulation patterns are
 	 recognizable.  In which case they all pretty much have the
-	 same cost.  */
+	 same cost.
+	 ??? We should still recruse when computing cost.  */
      *total = cost->sse_op;
      return true;
+
+    case VEC_SELECT:
+     /* Special case extracting lower part from the vector.
+	This by itself needs to code and most of SSE/AVX instructions have
+	packed and single forms where the single form may be represented
+	by such VEC_SELECT.
+
+	Use cost 1 (despite the fact that functionally equivalent SUBREG has
+	cost 0).  Making VEC_SELECT completely free, for example instructs CSE
+	to forward propagate VEC_SELECT into
+
+	   (set (reg eax) (reg src))
+
+	which then prevents fwprop and combining. See i.e.
+	gcc.target/i386/pr91103-1.c.
+
+	??? rtvec_series_p test should be, for valid patterns, equivalent to
+	vec_series_lowpart_p but is not, since the latter calls
+	can_cange_mode_class on ALL_REGS and this return false since x87 does
+	not support subregs at all.  */
+     if (rtvec_series_p (XVEC (XEXP (x, 1), 0), 0))
+       *total = rtx_cost (XEXP (x, 0), GET_MODE (XEXP (x, 0)),
+			  outer_code, opno, speed) + 1;
+     else
+       /* ??? We should still recruse when computing cost.  */
+       *total = cost->sse_op;
+     return true;
+
     case VEC_DUPLICATE:
       *total = rtx_cost (XEXP (x, 0),
 			 GET_MODE (XEXP (x, 0)),
@@ -22156,13 +23020,87 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
 
     case VEC_MERGE:
       mask = XEXP (x, 2);
+      /* Scalar versions of SSE instructions may be represented as:
+
+	 (vec_merge (vec_duplicate (operation ....))
+		     (register or memory)
+		     (const_int 1))
+
+	 In this case vec_merge and vec_duplicate is for free.
+	 Just recurse into operation and second operand.  */
+      if (mask == const1_rtx
+	  && GET_CODE (XEXP (x, 0)) == VEC_DUPLICATE)
+	{
+	  *total = rtx_cost (XEXP (XEXP (x, 0), 0), mode,
+			     outer_code, opno, speed)
+		   + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
+	  return true;
+	}
       /* This is masked instruction, assume the same cost,
 	 as nonmasked variant.  */
-      if (TARGET_AVX512F && register_operand (mask, GET_MODE (mask)))
-	*total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed);
+      else if (TARGET_AVX512F
+	       && (register_operand (mask, GET_MODE (mask))
+		   /* Redunduant clean up of high bits for kmask with VL=2/4
+		      .i.e (vec_merge op0, op1, (and op3 15)).  */
+		   || (GET_CODE (mask) == AND
+		       && register_operand (XEXP (mask, 0), GET_MODE (mask))
+		       && CONST_INT_P (XEXP (mask, 1))
+		       && ((INTVAL (XEXP (mask, 1)) == 3
+			    && GET_MODE_NUNITS (mode) == 2)
+			   || (INTVAL (XEXP (mask, 1)) == 15
+			       && GET_MODE_NUNITS (mode) == 4)))))
+	{
+	  *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
+		   + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
+	  return true;
+	}
+      /* Combination of the two above:
+
+	 (vec_merge (vec_merge (vec_duplicate (operation ...))
+		       (register or memory)
+		       (reg:QI mask))
+		    (register or memory)
+		    (const_int 1))
+
+	 i.e. avx512fp16_vcvtss2sh_mask.  */
+      else if (TARGET_AVX512F
+	       && mask == const1_rtx
+	       && GET_CODE (XEXP (x, 0)) == VEC_MERGE
+	       && GET_CODE (XEXP (XEXP (x, 0), 0)) == VEC_DUPLICATE
+	       && register_operand (XEXP (XEXP (x, 0), 2),
+				    GET_MODE (XEXP (XEXP (x, 0), 2))))
+	{
+	  *total = rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
+			     mode, outer_code, opno, speed)
+		   + rtx_cost (XEXP (XEXP (x, 0), 1),
+			       mode, outer_code, opno, speed)
+		   + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed);
+	  return true;
+	}
+      /* vcmp.  */
+      else if (unspec_pcmp_p (mask)
+	       || (GET_CODE (mask) == NOT
+		   && unspec_pcmp_p (XEXP (mask, 0))))
+	{
+	  rtx uns = GET_CODE (mask) == NOT ? XEXP (mask, 0) : mask;
+	  rtx unsop0 = XVECEXP (uns, 0, 0);
+	  /* Make (subreg:V4SI (not:V16QI (reg:V16QI ..)) 0)
+	     cost the same as register.
+	     This is used by avx_cmp<mode>3_ltint_not.  */
+	  if (GET_CODE (unsop0) == SUBREG)
+	    unsop0 = XEXP (unsop0, 0);
+	  if (GET_CODE (unsop0) == NOT)
+	    unsop0 = XEXP (unsop0, 0);
+	  *total = rtx_cost (XEXP (x, 0), mode, outer_code, opno, speed)
+		   + rtx_cost (XEXP (x, 1), mode, outer_code, opno, speed)
+		   + rtx_cost (unsop0, mode, UNSPEC, opno, speed)
+		   + rtx_cost (XVECEXP (uns, 0, 1), mode, UNSPEC, opno, speed)
+		   + cost->sse_op;
+	  return true;
+	}
       else
 	*total = cost->sse_op;
-      return true;
+      return false;
 
     case MEM:
       /* CONST_VECTOR_DUPLICATE_P in constant_pool is just broadcast.
@@ -22179,7 +23117,7 @@ ix86_rtx_costs (rtx x, machine_mode mode, int outer_code_i, int opno,
 	}
 
       /* An insn that accesses memory is slightly more expensive
-         than one that does not.  */
+	 than one that does not.  */
       if (speed)
 	{
 	  *total += 1;
@@ -22420,7 +23358,9 @@ x86_this_parameter (tree function)
     {
       const int *parm_regs;
 
-      if (ix86_function_type_abi (type) == MS_ABI)
+      if (lookup_attribute ("preserve_none", TYPE_ATTRIBUTES (type)))
+	parm_regs = x86_64_preserve_none_int_parameter_registers;
+      else if (ix86_function_type_abi (type) == MS_ABI)
         parm_regs = x86_64_ms_abi_int_parameter_registers;
       else
         parm_regs = x86_64_int_parameter_registers;
@@ -22746,13 +23686,21 @@ x86_field_alignment (tree type, int computed)
 /* Print call to TARGET to FILE.  */
 
 static void
-x86_print_call_or_nop (FILE *file, const char *target)
+x86_print_call_or_nop (FILE *file, const char *target,
+		       const char *label)
 {
   if (flag_nop_mcount || !strcmp (target, "nop"))
     /* 5 byte nop: nopl 0(%[re]ax,%[re]ax,1) */
-    fprintf (file, "1:" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n");
+    fprintf (file, "%s" ASM_BYTE "0x0f, 0x1f, 0x44, 0x00, 0x00\n",
+	     label);
+  else if (!TARGET_PECOFF && flag_pic)
+    {
+      gcc_assert (flag_plt);
+
+      fprintf (file, "%s\tcall\t%s@PLT\n", label, target);
+    }
   else
-    fprintf (file, "1:\tcall\t%s\n", target);
+    fprintf (file, "%s\tcall\t%s\n", label, target);
 }
 
 static bool
@@ -22837,6 +23785,13 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
 
   const char *mcount_name = MCOUNT_NAME;
 
+  bool fentry_section_p
+    = (flag_record_mcount
+       || lookup_attribute ("fentry_section",
+			    DECL_ATTRIBUTES (current_function_decl)));
+
+  const char *label = fentry_section_p ? "1:" : "";
+
   if (current_fentry_name (&mcount_name))
     ;
   else if (fentry_name)
@@ -22872,11 +23827,12 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
 		  reg = legacy_reg;
 		}
 	      if (ASSEMBLER_DIALECT == ASM_INTEL)
-		fprintf (file, "1:\tmovabs\t%s, OFFSET FLAT:%s\n"
-			       "\tcall\t%s\n", reg, mcount_name, reg);
+		fprintf (file, "%s\tmovabs\t%s, OFFSET FLAT:%s\n"
+			       "\tcall\t%s\n", label, reg, mcount_name,
+			       reg);
 	      else
-		fprintf (file, "1:\tmovabsq\t$%s, %%%s\n\tcall\t*%%%s\n",
-			 mcount_name, reg, reg);
+		fprintf (file, "%s\tmovabsq\t$%s, %%%s\n\tcall\t*%%%s\n",
+			 label, mcount_name, reg, reg);
 	      break;
 	    case CM_LARGE_PIC:
 #ifdef NO_PROFILE_COUNTERS
@@ -22914,24 +23870,24 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
 	      break;
 	    case CM_SMALL_PIC:
 	    case CM_MEDIUM_PIC:
-	      if (!ix86_direct_extern_access)
+	      if (!flag_plt)
 		{
 		  if (ASSEMBLER_DIALECT == ASM_INTEL)
-		    fprintf (file, "1:\tcall\t[QWORD PTR %s@GOTPCREL[rip]]\n",
-			     mcount_name);
+		    fprintf (file, "%s\tcall\t[QWORD PTR %s@GOTPCREL[rip]]\n",
+			     label, mcount_name);
 		  else
-		    fprintf (file, "1:\tcall\t*%s@GOTPCREL(%%rip)\n",
-			     mcount_name);
+		    fprintf (file, "%s\tcall\t*%s@GOTPCREL(%%rip)\n",
+			     label, mcount_name);
 		  break;
 		}
 	      /* fall through */
 	    default:
-	      x86_print_call_or_nop (file, mcount_name);
+	      x86_print_call_or_nop (file, mcount_name, label);
 	      break;
 	    }
 	}
       else
-	x86_print_call_or_nop (file, mcount_name);
+	x86_print_call_or_nop (file, mcount_name, label);
     }
   else if (flag_pic)
     {
@@ -22945,10 +23901,14 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
 		 "\tleal\t%sP%d@GOTOFF(%%ebx), %%" PROFILE_COUNT_REGISTER "\n",
 		 LPREFIX, labelno);
 #endif
-      if (ASSEMBLER_DIALECT == ASM_INTEL)
-	fprintf (file, "1:\tcall\t[DWORD PTR %s@GOT[ebx]]\n", mcount_name);
+      if (flag_plt)
+	x86_print_call_or_nop (file, mcount_name, label);
+      else if (ASSEMBLER_DIALECT == ASM_INTEL)
+	fprintf (file, "%s\tcall\t[DWORD PTR %s@GOT[ebx]]\n",
+		 label, mcount_name);
       else
-	fprintf (file, "1:\tcall\t*%s@GOT(%%ebx)\n", mcount_name);
+	fprintf (file, "%s\tcall\t*%s@GOT(%%ebx)\n",
+		 label, mcount_name);
     }
   else
     {
@@ -22961,12 +23921,10 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
 	fprintf (file, "\tmovl\t$%sP%d, %%" PROFILE_COUNT_REGISTER "\n",
 		 LPREFIX, labelno);
 #endif
-      x86_print_call_or_nop (file, mcount_name);
+      x86_print_call_or_nop (file, mcount_name, label);
     }
 
-  if (flag_record_mcount
-      || lookup_attribute ("fentry_section",
-			   DECL_ATTRIBUTES (current_function_decl)))
+  if (fentry_section_p)
     {
       const char *sname = "__mcount_loc";
 
@@ -23725,7 +24683,7 @@ ix86_vector_mode_supported_p (machine_mode mode)
     return true;
   if (TARGET_AVX && VALID_AVX256_REG_MODE (mode))
     return true;
-  if (TARGET_AVX512F && TARGET_EVEX512 && VALID_AVX512F_REG_MODE (mode))
+  if (TARGET_AVX512F && VALID_AVX512F_REG_MODE (mode))
     return true;
   if ((TARGET_MMX || TARGET_MMX_WITH_SSE)
       && VALID_MMX_REG_MODE (mode))
@@ -23973,8 +24931,7 @@ ix86_md_asm_adjust (vec<rtx> &outputs, vec<rtx> & /*inputs*/,
 	}
     }
 
-  rtx_insn *seq = get_insns ();
-  end_sequence ();
+  rtx_insn *seq = end_sequence ();
 
   if (saw_asm_flag)
     return seq;
@@ -24195,6 +25152,13 @@ ix86_stack_protect_guard (void)
   return default_stack_protect_guard ();
 }
 
+static bool
+ix86_stack_protect_runtime_enabled_p (void)
+{
+  /* Naked functions should not enable stack protector.  */
+  return !ix86_function_naked (current_function_decl);
+}
+
 /* For 32-bit code we can save PIC register setup by using
    __stack_chk_fail_local hidden function instead of calling
    __stack_chk_fail directly.  64-bit code doesn't need to setup any PIC
@@ -24255,7 +25219,7 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
   switch (type_of_cost)
     {
       case scalar_stmt:
-        return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
+	return fp ? ix86_cost->addss : COSTS_N_INSNS (1);
 
       case scalar_load:
 	/* load/store costs are relative to register move which is 2. Recompute
@@ -24326,7 +25290,11 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
         return ix86_cost->cond_not_taken_branch_cost;
 
       case vec_perm:
+	return ix86_vec_cost (mode, ix86_cost->sse_op);
+
       case vec_promote_demote:
+	if (fp)
+	  return vec_fp_conversion_cost (ix86_tune_cost, mode);
         return ix86_vec_cost (mode, ix86_cost->sse_op);
 
       case vec_construct:
@@ -24339,12 +25307,18 @@ ix86_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
 	  /* One vinserti128 for combining two SSE vectors for AVX256.  */
 	  else if (GET_MODE_BITSIZE (mode) == 256)
 	    return ((n - 2) * ix86_cost->sse_op
-		    + ix86_vec_cost (mode, ix86_cost->addss));
+		    + ix86_vec_cost (mode, ix86_cost->sse_op));
 	  /* One vinserti64x4 and two vinserti128 for combining SSE
 	     and AVX256 vectors to AVX512.  */
 	  else if (GET_MODE_BITSIZE (mode) == 512)
-	    return ((n - 4) * ix86_cost->sse_op
-		    + 3 * ix86_vec_cost (mode, ix86_cost->addss));
+	    {
+	      machine_mode half_mode
+		= mode_for_vector (GET_MODE_INNER (mode),
+				   GET_MODE_NUNITS (mode) / 2).require ();
+	      return ((n - 4) * ix86_cost->sse_op
+		      + 2 * ix86_vec_cost (half_mode, ix86_cost->sse_op)
+		      + ix86_vec_cost (mode, ix86_cost->sse_op));
+	    }
 	  gcc_unreachable ();
 	}
 
@@ -24382,7 +25356,7 @@ ix86_canonical_va_list_type (tree type)
 	return ms_va_list_type_node;
 
       if ((TREE_CODE (type) == ARRAY_TYPE
-	   && integer_zerop (array_type_nelts (type)))
+	   && integer_zerop (array_type_nelts_minus_one (type)))
 	  || POINTER_TYPE_P (type))
 	{
 	  tree elem_type = TREE_TYPE (type);
@@ -24467,13 +25441,17 @@ ix86_reassociation_width (unsigned int op, machine_mode mode)
       if (width == 1)
 	return 1;
 
-      /* Integer vector instructions execute in FP unit
+      /* Znver1-4 Integer vector instructions execute in FP unit
 	 and can execute 3 additions and one multiplication per cycle.  */
       if ((ix86_tune == PROCESSOR_ZNVER1 || ix86_tune == PROCESSOR_ZNVER2
-	   || ix86_tune == PROCESSOR_ZNVER3 || ix86_tune == PROCESSOR_ZNVER4
-	   || ix86_tune == PROCESSOR_ZNVER5)
+	   || ix86_tune == PROCESSOR_ZNVER3 || ix86_tune == PROCESSOR_ZNVER4)
    	  && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS)
 	return 1;
+      /* Znver5 can do 2 integer multiplications per cycle with latency
+	 of 3.  */
+      if (ix86_tune == PROCESSOR_ZNVER5
+	  && INTEGRAL_MODE_P (mode) && op != PLUS && op != MINUS)
+	width = 6;
 
       /* Account for targets that splits wide vectors into multiple parts.  */
       if (TARGET_AVX512_SPLIT_REGS && GET_MODE_BITSIZE (mode) > 256)
@@ -24508,7 +25486,7 @@ ix86_preferred_simd_mode (scalar_mode mode)
   switch (mode)
     {
     case E_QImode:
-      if (TARGET_AVX512BW && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
+      if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
 	return V64QImode;
       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
 	return V32QImode;
@@ -24516,7 +25494,7 @@ ix86_preferred_simd_mode (scalar_mode mode)
 	return V16QImode;
 
     case E_HImode:
-      if (TARGET_AVX512BW && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
+      if (TARGET_AVX512BW && !TARGET_PREFER_AVX256)
 	return V32HImode;
       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
 	return V16HImode;
@@ -24524,7 +25502,7 @@ ix86_preferred_simd_mode (scalar_mode mode)
 	return V8HImode;
 
     case E_SImode:
-      if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
+      if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
 	return V16SImode;
       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
 	return V8SImode;
@@ -24532,7 +25510,7 @@ ix86_preferred_simd_mode (scalar_mode mode)
 	return V4SImode;
 
     case E_DImode:
-      if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
+      if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
 	return V8DImode;
       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
 	return V4DImode;
@@ -24546,16 +25524,23 @@ ix86_preferred_simd_mode (scalar_mode mode)
 	    {
 	      if (TARGET_PREFER_AVX128)
 		return V8HFmode;
-	      else if (TARGET_PREFER_AVX256 || !TARGET_EVEX512)
+	      else if (TARGET_PREFER_AVX256)
 		return V16HFmode;
 	    }
-	  if (TARGET_EVEX512)
-	    return V32HFmode;
+	  return V32HFmode;
 	}
       return word_mode;
 
+    case E_BFmode:
+      if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
+	return V32BFmode;
+      else if (TARGET_AVX && !TARGET_PREFER_AVX128)
+	return V16BFmode;
+      else
+	return V8BFmode;
+
     case E_SFmode:
-      if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
+      if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
 	return V16SFmode;
       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
 	return V8SFmode;
@@ -24563,7 +25548,7 @@ ix86_preferred_simd_mode (scalar_mode mode)
 	return V4SFmode;
 
     case E_DFmode:
-      if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
+      if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
 	return V8DFmode;
       else if (TARGET_AVX && !TARGET_PREFER_AVX128)
 	return V4DFmode;
@@ -24583,13 +25568,13 @@ ix86_preferred_simd_mode (scalar_mode mode)
 static unsigned int
 ix86_autovectorize_vector_modes (vector_modes *modes, bool all)
 {
-  if (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)
+  if (TARGET_AVX512F && !TARGET_PREFER_AVX256)
     {
       modes->safe_push (V64QImode);
       modes->safe_push (V32QImode);
       modes->safe_push (V16QImode);
     }
-  else if (TARGET_AVX512F && TARGET_EVEX512 && all)
+  else if (TARGET_AVX512F && all)
     {
       modes->safe_push (V32QImode);
       modes->safe_push (V16QImode);
@@ -24627,12 +25612,13 @@ ix86_get_mask_mode (machine_mode data_mode)
   unsigned elem_size = vector_size / nunits;
 
   /* Scalar mask case.  */
-  if ((TARGET_AVX512F && TARGET_EVEX512 && vector_size == 64)
+  if ((TARGET_AVX512F && vector_size == 64)
       || (TARGET_AVX512VL && (vector_size == 32 || vector_size == 16))
       /* AVX512FP16 only supports vector comparison
 	 to kmask for _Float16.  */
       || (TARGET_AVX512VL && TARGET_AVX512FP16
-	  && GET_MODE_INNER (data_mode) == E_HFmode))
+	  && GET_MODE_INNER (data_mode) == E_HFmode)
+      || (TARGET_AVX10_2 && GET_MODE_INNER (data_mode) == E_BFmode))
     {
       if (elem_size == 4
 	  || elem_size == 8
@@ -24687,7 +25673,13 @@ ix86_max_noce_ifcvt_seq_cost (edge e)
 	return param_max_rtl_if_conversion_unpredictable_cost;
     }
 
-  return BRANCH_COST (true, predictable_p) * COSTS_N_INSNS (2);
+  /* For modern machines with deeper pipeline, the penalty for branch
+     misprediction could be higher than before to reset the pipeline
+     slots. Add parameter br_mispredict_scale as a factor to describe
+     the impact of reseting the pipeline.  */
+
+  return BRANCH_COST (true, predictable_p)
+	 * ix86_tune_cost->br_mispredict_scale;
 }
 
 /* Return true if SEQ is a good candidate as a replacement for the
@@ -24765,12 +25757,15 @@ private:
      where we know it's not loaded from memory.  */
   unsigned m_num_gpr_needed[3];
   unsigned m_num_sse_needed[3];
+  /* Number of 256-bit vector permutation.  */
+  unsigned m_num_avx256_vec_perm[3];
 };
 
 ix86_vector_costs::ix86_vector_costs (vec_info* vinfo, bool costing_for_scalar)
   : vector_costs (vinfo, costing_for_scalar),
     m_num_gpr_needed (),
-    m_num_sse_needed ()
+    m_num_sse_needed (),
+    m_num_avx256_vec_perm ()
 {
 }
 
@@ -24803,6 +25798,14 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
       if (scalar_p)
 	mode = TYPE_MODE (TREE_TYPE (vectype));
     }
+  /* When we are costing a scalar stmt use the scalar stmt to get at the
+     type of the operation.  */
+  else if (scalar_p && stmt_info)
+    if (tree lhs = gimple_get_lhs (stmt_info->stmt))
+      {
+	fp = FLOAT_TYPE_P (TREE_TYPE (lhs));
+	mode = TYPE_MODE (TREE_TYPE (lhs));
+      }
 
   if ((kind == vector_stmt || kind == scalar_stmt)
       && stmt_info
@@ -24820,12 +25823,12 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
 	case MINUS_EXPR:
 	  if (kind == scalar_stmt)
 	    {
-	      if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
+	      if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
 		stmt_cost = ix86_cost->addss;
 	      else if (X87_FLOAT_MODE_P (mode))
 		stmt_cost = ix86_cost->fadd;
 	      else
-	        stmt_cost = ix86_cost->add;
+		stmt_cost = ix86_cost->add;
 	    }
 	  else
 	    stmt_cost = ix86_vec_cost (mode, fp ? ix86_cost->addss
@@ -24846,7 +25849,7 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
 	  break;
 
 	case NEGATE_EXPR:
-	  if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
+	  if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
 	    stmt_cost = ix86_cost->sse_op;
 	  else if (X87_FLOAT_MODE_P (mode))
 	    stmt_cost = ix86_cost->fchs;
@@ -24880,7 +25883,7 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
 			    (subcode == RSHIFT_EXPR
 			     && !TYPE_UNSIGNED (TREE_TYPE (op1)))
 			    ? ASHIFTRT : LSHIFTRT, mode,
-		            TREE_CODE (op2) == INTEGER_CST,
+			    TREE_CODE (op2) == INTEGER_CST,
 			    cst_and_fits_in_hwi (op2)
 			    ? int_cst_value (op2) : -1,
 			    false, false, NULL, NULL);
@@ -24889,27 +25892,174 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
 	case NOP_EXPR:
 	  /* Only sign-conversions are free.  */
 	  if (tree_nop_conversion_p
-	        (TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)),
+		(TREE_TYPE (gimple_assign_lhs (stmt_info->stmt)),
 		 TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))))
 	    stmt_cost = 0;
+	  else if (fp)
+	    stmt_cost = vec_fp_conversion_cost
+			  (ix86_tune_cost, GET_MODE_BITSIZE (mode));
+	  break;
+
+	case FLOAT_EXPR:
+	    if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
+	      stmt_cost = ix86_cost->cvtsi2ss;
+	    else if (X87_FLOAT_MODE_P (mode))
+	      /* TODO: We do not have cost tables for x87.  */
+	      stmt_cost = ix86_cost->fadd;
+	    else
+	      stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtpi2ps);
+	    break;
+
+	case FIX_TRUNC_EXPR:
+	    if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
+	      stmt_cost = ix86_cost->cvtss2si;
+	    else if (X87_FLOAT_MODE_P (mode))
+	      /* TODO: We do not have cost tables for x87.  */
+	      stmt_cost = ix86_cost->fadd;
+	    else
+	      stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtps2pi);
+	    break;
+
+	case COND_EXPR:
+	  {
+	    /* SSE2 conditinal move sequence is:
+		 pcmpgtd %xmm5, %xmm0 (accounted separately)
+		 pand    %xmm0, %xmm2
+		 pandn   %xmm1, %xmm0
+		 por     %xmm2, %xmm0
+	       while SSE4 uses cmp + blend
+	       and AVX512 masked moves.
+
+	       The condition is accounted separately since we usually have
+		 p = a < b
+		 c = p ? x : y
+	       and we will account first statement as setcc.  Exception is when
+	       p is loaded from memory as bool and then we will not acocunt
+	       the compare, but there is no way to check for this.  */
+
+	    int ninsns = TARGET_SSE4_1 ? 1 : 3;
+
+	    /* If one of parameters is 0 or -1 the sequence will be simplified:
+	       (if_true & mask) | (if_false & ~mask) -> if_true & mask  */
+	    if (ninsns > 1
+		&& (zerop (gimple_assign_rhs2 (stmt_info->stmt))
+		    || zerop (gimple_assign_rhs3 (stmt_info->stmt))
+		    || integer_minus_onep
+			(gimple_assign_rhs2 (stmt_info->stmt))
+		    || integer_minus_onep
+			(gimple_assign_rhs3 (stmt_info->stmt))))
+	      ninsns = 1;
+
+	    if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
+	      stmt_cost = ninsns * ix86_cost->sse_op;
+	    else if (X87_FLOAT_MODE_P (mode))
+	      /* x87 requires conditional branch.  We don't have cost for
+		 that.  */
+	      ;
+	    else if (VECTOR_MODE_P (mode))
+	      stmt_cost = ix86_vec_cost (mode, ninsns * ix86_cost->sse_op);
+	    else
+	      /* compare (accounted separately) + cmov.  */
+	      stmt_cost = ix86_cost->add;
+	  }
 	  break;
 
-	case BIT_IOR_EXPR:
-	case ABS_EXPR:
-	case ABSU_EXPR:
 	case MIN_EXPR:
 	case MAX_EXPR:
+	  if (fp)
+	    {
+	      if (X87_FLOAT_MODE_P (mode)
+		  && !SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
+		/* x87 requires conditional branch.  We don't have cost for
+		   that.  */
+		;
+	      else
+		/* minss  */
+		stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
+	    }
+	  else
+	    {
+	      if (VECTOR_MODE_P (mode))
+		{
+		  stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
+		  /* vpmin was introduced in SSE3.
+		     SSE2 needs pcmpgtd + pand + pandn + pxor.
+		     If one of parameters is 0 or -1 the sequence is simplified
+		     to pcmpgtd + pand.  */
+		  if (!TARGET_SSSE3)
+		    {
+		      if (zerop (gimple_assign_rhs2 (stmt_info->stmt))
+			  || integer_minus_onep
+				(gimple_assign_rhs2 (stmt_info->stmt)))
+			stmt_cost *= 2;
+		      else
+			stmt_cost *= 4;
+		    }
+		}
+	      else
+		/* cmp + cmov.  */
+		stmt_cost = ix86_cost->add * 2;
+	    }
+	  break;
+
+	case ABS_EXPR:
+	case ABSU_EXPR:
+	  if (fp)
+	    {
+	      if (X87_FLOAT_MODE_P (mode)
+		  && !SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
+		/* fabs.  */
+		stmt_cost = ix86_cost->fabs;
+	      else
+		/* andss of sign bit.  */
+		stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
+	    }
+	  else
+	    {
+	      if (VECTOR_MODE_P (mode))
+		{
+		  stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
+		  /* vabs was introduced in SSE3.
+		     SSE3 uses psrat + pxor + psub.  */
+		  if (!TARGET_SSSE3)
+		    stmt_cost *= 3;
+		}
+	      else
+		/* neg + cmov.  */
+		stmt_cost = ix86_cost->add * 2;
+	    }
+	  break;
+
+	case BIT_IOR_EXPR:
 	case BIT_XOR_EXPR:
 	case BIT_AND_EXPR:
 	case BIT_NOT_EXPR:
-	  if (SSE_FLOAT_MODE_SSEMATH_OR_HF_P (mode))
-	    stmt_cost = ix86_cost->sse_op;
-	  else if (VECTOR_MODE_P (mode))
+	  gcc_assert (!SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode)
+		      && !X87_FLOAT_MODE_P (mode));
+	  if (VECTOR_MODE_P (mode))
 	    stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
 	  else
 	    stmt_cost = ix86_cost->add;
 	  break;
+
 	default:
+	  if (truth_value_p (subcode))
+	    {
+	      if (SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P (mode))
+		/* CMPccS? insructions are cheap, so use sse_op.  While they
+		   produce a mask which may need to be turned to 0/1 by and,
+		   expect that this will be optimized away in a common case.  */
+		stmt_cost = ix86_cost->sse_op;
+	      else if (X87_FLOAT_MODE_P (mode))
+		/* fcmp + setcc.  */
+		stmt_cost = ix86_cost->fadd + ix86_cost->add;
+	      else if (VECTOR_MODE_P (mode))
+		stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
+	      else
+		/* setcc.  */
+		stmt_cost = ix86_cost->add;
+	      break;
+	    }
 	  break;
 	}
     }
@@ -24933,18 +26083,59 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
 	break;
       }
 
+  if (kind == vec_promote_demote)
+    {
+      int outer_size
+	= tree_to_uhwi
+	    (TYPE_SIZE
+		(TREE_TYPE (gimple_assign_lhs (stmt_info->stmt))));
+      int inner_size
+	= tree_to_uhwi
+	    (TYPE_SIZE
+		(TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt))));
+      bool inner_fp = FLOAT_TYPE_P
+			(TREE_TYPE (gimple_assign_rhs1 (stmt_info->stmt)));
+
+      if (fp && inner_fp)
+	stmt_cost = vec_fp_conversion_cost
+			  (ix86_tune_cost, GET_MODE_BITSIZE (mode));
+      else if (fp && !inner_fp)
+	stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtpi2ps);
+      else if (!fp && inner_fp)
+	stmt_cost = ix86_vec_cost (mode, ix86_cost->cvtps2pi);
+      else
+	stmt_cost = ix86_vec_cost (mode, ix86_cost->sse_op);
+      /* VEC_PACK_TRUNC_EXPR and similar demote operations: If outer size is
+	 greater than inner size we will end up doing two conversions and
+	 packing them.  We always pack pairs; if the size difference is greater
+	 it is split into multiple demote operations.  */
+      if (inner_size > outer_size)
+	stmt_cost = stmt_cost * 2
+		    + ix86_vec_cost (mode, ix86_cost->sse_op);
+    }
+
   /* If we do elementwise loads into a vector then we are bound by
      latency and execution resources for the many scalar loads
      (AGU and load ports).  Try to account for this by scaling the
      construction cost by the number of elements involved.  */
   if ((kind == vec_construct || kind == vec_to_scalar)
-      && stmt_info
-      && (STMT_VINFO_TYPE (stmt_info) == load_vec_info_type
-	  || STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
-      && ((STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE
-	   && (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF (stmt_info)))
-	       != INTEGER_CST))
-	  || STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_GATHER_SCATTER))
+      && ((stmt_info
+	   && (STMT_VINFO_TYPE (stmt_info) == load_vec_info_type
+	       || STMT_VINFO_TYPE (stmt_info) == store_vec_info_type)
+	   && ((STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info) == VMAT_ELEMENTWISE
+		&& (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF (stmt_info)))
+		    != INTEGER_CST))
+	       || (STMT_VINFO_MEMORY_ACCESS_TYPE (stmt_info)
+		   == VMAT_GATHER_SCATTER)))
+	  || (node
+	      && (((SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_ELEMENTWISE
+		    || (SLP_TREE_MEMORY_ACCESS_TYPE (node) == VMAT_STRIDED_SLP
+			&& SLP_TREE_LANES (node) == 1))
+		   && (TREE_CODE (DR_STEP (STMT_VINFO_DATA_REF
+					     (SLP_TREE_REPRESENTATIVE (node))))
+		      != INTEGER_CST))
+		  || (SLP_TREE_MEMORY_ACCESS_TYPE (node)
+		      == VMAT_GATHER_SCATTER)))))
     {
       stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
       stmt_cost *= (TYPE_VECTOR_SUBPARTS (vectype) + 1);
@@ -24993,7 +26184,22 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
 	      else
 		{
 		  m_num_gpr_needed[where]++;
-		  stmt_cost += ix86_cost->sse_to_integer;
+
+		  int cost = COSTS_N_INSNS (ix86_cost->integer_to_sse) / 2;
+
+		  /* For integer construction, the number of actual GPR -> XMM
+		     moves will be somewhere between 0 and n.
+		     We do not have very good idea about actual number, since
+		     the source may be a constant, memory or a chain of
+		     instructions that will be later converted by
+		     scalar-to-vector pass.  */
+		  if (kind == vec_construct
+		      && GET_MODE_BITSIZE (mode) == 256)
+		    cost *= 2;
+		  else if (kind == vec_construct
+			   && GET_MODE_BITSIZE (mode) == 512)
+		    cost *= 3;
+		  stmt_cost += cost;
 		}
 	    }
 	}
@@ -25004,6 +26210,10 @@ ix86_vector_costs::add_stmt_cost (int count, vect_cost_for_stmt kind,
   if (stmt_cost == -1)
     stmt_cost = ix86_builtin_vectorization_cost (kind, vectype, misalign);
 
+  if (kind == vec_perm && vectype
+      && GET_MODE_SIZE (TYPE_MODE (vectype)) == 32)
+    m_num_avx256_vec_perm[where]++;
+
   /* Penalize DFmode vector operations for Bonnell.  */
   if (TARGET_CPU_P (BONNELL) && kind == vector_stmt
       && vectype && GET_MODE_INNER (TYPE_MODE (vectype)) == DFmode)
@@ -25073,6 +26283,85 @@ ix86_vector_costs::finish_cost (const vector_costs *scalar_costs)
 
   ix86_vect_estimate_reg_pressure ();
 
+  for (int i = 0; i != 3; i++)
+    if (m_num_avx256_vec_perm[i]
+	&& TARGET_AVX256_AVOID_VEC_PERM)
+      m_costs[i] = INT_MAX;
+
+  /* When X86_TUNE_AVX512_TWO_EPILOGUES is enabled arrange for both
+     a AVX2 and a SSE epilogue for AVX512 vectorized loops.  */
+  if (loop_vinfo
+      && LOOP_VINFO_EPILOGUE_P (loop_vinfo)
+      && GET_MODE_SIZE (loop_vinfo->vector_mode) == 32
+      && ix86_tune_features[X86_TUNE_AVX512_TWO_EPILOGUES])
+    m_suggested_epilogue_mode = V16QImode;
+  /* When a 128bit SSE vectorized epilogue still has a VF of 16 or larger
+     enable a 64bit SSE epilogue.  */
+  if (loop_vinfo
+      && LOOP_VINFO_EPILOGUE_P (loop_vinfo)
+      && GET_MODE_SIZE (loop_vinfo->vector_mode) == 16
+      && LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant () >= 16)
+    m_suggested_epilogue_mode = V8QImode;
+
+  /* When X86_TUNE_AVX512_MASKED_EPILOGUES is enabled try to use
+     a masked epilogue if that doesn't seem detrimental.  */
+  if (loop_vinfo
+      && !LOOP_VINFO_EPILOGUE_P (loop_vinfo)
+      && LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant () > 2
+      && ix86_tune_features[X86_TUNE_AVX512_MASKED_EPILOGUES]
+      && !OPTION_SET_P (param_vect_partial_vector_usage))
+    {
+      bool avoid = false;
+      if (LOOP_VINFO_NITERS_KNOWN_P (loop_vinfo)
+	  && LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo) >= 0)
+	{
+	  unsigned int peel_niter
+	    = LOOP_VINFO_PEELING_FOR_ALIGNMENT (loop_vinfo);
+	  if (LOOP_VINFO_PEELING_FOR_GAPS (loop_vinfo))
+	    peel_niter += 1;
+	  /* When we know the number of scalar iterations of the epilogue,
+	     avoid masking when a single vector epilog iteration handles
+	     it in full.  */
+	  if (pow2p_hwi ((LOOP_VINFO_INT_NITERS (loop_vinfo) - peel_niter)
+			 % LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ()))
+	    avoid = true;
+	}
+      if (!avoid && loop_outer (loop_outer (LOOP_VINFO_LOOP (loop_vinfo))))
+	for (auto ddr : LOOP_VINFO_DDRS (loop_vinfo))
+	  {
+	    if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
+	      ;
+	    else if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
+	      ;
+	    else
+	      {
+		int loop_depth
+		    = index_in_loop_nest (LOOP_VINFO_LOOP (loop_vinfo)->num,
+					  DDR_LOOP_NEST (ddr));
+		if (DDR_NUM_DIST_VECTS (ddr) == 1
+		    && DDR_DIST_VECTS (ddr)[0][loop_depth] == 0)
+		  {
+		    /* Avoid the case when there's an outer loop that might
+		       traverse a multi-dimensional array with the inner
+		       loop just executing the masked epilogue with a
+		       read-write where the next outer iteration might
+		       read from the masked part of the previous write,
+		       'n' filling half a vector.
+			 for (j = 0; j < m; ++j)
+			   for (i = 0; i < n; ++i)
+			     a[j][i] = c * a[j][i];  */
+		    avoid = true;
+		    break;
+		  }
+	      }
+	  }
+      if (!avoid)
+	{
+	  m_suggested_epilogue_mode = loop_vinfo->vector_mode;
+	  m_masked_epilogue = 1;
+	}
+    }
+
   vector_costs::finish_cost (scalar_costs);
 }
 
@@ -25192,7 +26481,7 @@ ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
     {
       /* If the function isn't exported, we can pick up just one ISA
 	 for the clones.  */
-      if (TARGET_AVX512F && TARGET_EVEX512)
+      if (TARGET_AVX512F)
 	clonei->vecsize_mangle = 'e';
       else if (TARGET_AVX2)
 	clonei->vecsize_mangle = 'd';
@@ -25275,7 +26564,7 @@ ix86_simd_clone_compute_vecsize_and_simdlen (struct cgraph_node *node,
    slightly less desirable, etc.).  */
 
 static int
-ix86_simd_clone_usable (struct cgraph_node *node)
+ix86_simd_clone_usable (struct cgraph_node *node, machine_mode)
 {
   switch (node->simdclone->vecsize_mangle)
     {
@@ -25284,17 +26573,17 @@ ix86_simd_clone_usable (struct cgraph_node *node)
 	return -1;
       if (!TARGET_AVX)
 	return 0;
-      return (TARGET_AVX512F && TARGET_EVEX512) ? 3 : TARGET_AVX2 ? 2 : 1;
+      return TARGET_AVX512F ? 3 : TARGET_AVX2 ? 2 : 1;
     case 'c':
       if (!TARGET_AVX)
 	return -1;
-      return (TARGET_AVX512F && TARGET_EVEX512) ? 2 : TARGET_AVX2 ? 1 : 0;
+      return TARGET_AVX512F ? 2 : TARGET_AVX2 ? 1 : 0;
     case 'd':
       if (!TARGET_AVX2)
 	return -1;
-      return (TARGET_AVX512F && TARGET_EVEX512) ? 1 : 0;
+      return TARGET_AVX512F ? 1 : 0;
     case 'e':
-      if (!TARGET_AVX512F || !TARGET_EVEX512)
+      if (!TARGET_AVX512F)
 	return -1;
       return 0;
     default:
@@ -25544,7 +26833,7 @@ extract_base_offset_in_addr (rtx mem, rtx *base, rtx *symbase, rtx *offset)
   gcc_assert (MEM_P (mem));
 
   addr = XEXP (mem, 0);
-  
+
   if (GET_CODE (addr) == CONST)
     addr = XEXP (addr, 0);
 
@@ -25988,6 +27277,21 @@ ix86_mode_can_transfer_bits (machine_mode mode)
   return true;
 }
 
+/* Implement TARGET_REDZONE_CLOBBER.  */
+static rtx
+ix86_redzone_clobber ()
+{
+  cfun->machine->asm_redzone_clobber_seen = true;
+  if (ix86_using_red_zone ())
+    {
+      rtx base = plus_constant (Pmode, stack_pointer_rtx, -RED_ZONE_SIZE);
+      rtx mem = gen_rtx_MEM (BLKmode, base);
+      set_mem_size (mem, RED_ZONE_SIZE);
+      return mem;
+    }
+  return NULL_RTX;
+}
+
 /* Target-specific selftests.  */
 
 #if CHECKING_P
@@ -26568,6 +27872,10 @@ ix86_libgcc_floating_mode_supported_p
 #undef TARGET_STACK_PROTECT_GUARD
 #define TARGET_STACK_PROTECT_GUARD ix86_stack_protect_guard
 
+#undef TARGET_STACK_PROTECT_RUNTIME_ENABLED_P
+#define TARGET_STACK_PROTECT_RUNTIME_ENABLED_P \
+  ix86_stack_protect_runtime_enabled_p
+
 #if !TARGET_MACHO
 #undef TARGET_STACK_PROTECT_FAIL
 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
@@ -26608,8 +27916,16 @@ ix86_libgcc_floating_mode_supported_p
 #define TARGET_PREFERRED_RELOAD_CLASS ix86_preferred_reload_class
 #undef TARGET_PREFERRED_OUTPUT_RELOAD_CLASS
 #define TARGET_PREFERRED_OUTPUT_RELOAD_CLASS ix86_preferred_output_reload_class
+/* When this hook returns true for MODE, the compiler allows
+   registers explicitly used in the rtl to be used as spill registers
+   but prevents the compiler from extending the lifetime of these
+   registers.  */
+#undef TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P
+#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P hook_bool_mode_true
 #undef TARGET_CLASS_LIKELY_SPILLED_P
 #define TARGET_CLASS_LIKELY_SPILLED_P ix86_class_likely_spilled_p
+#undef TARGET_CALLEE_SAVE_COST
+#define TARGET_CALLEE_SAVE_COST ix86_callee_save_cost
 
 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
@@ -26837,6 +28153,9 @@ ix86_libgcc_floating_mode_supported_p
 #undef TARGET_MODE_CAN_TRANSFER_BITS
 #define TARGET_MODE_CAN_TRANSFER_BITS ix86_mode_can_transfer_bits
 
+#undef TARGET_REDZONE_CLOBBER
+#define TARGET_REDZONE_CLOBBER ix86_redzone_clobber
+
 static bool
 ix86_libc_has_fast_function (int fcode ATTRIBUTE_UNUSED)
 {
@@ -26933,6 +28252,198 @@ ix86_cannot_copy_insn_p (rtx_insn *insn)
 #define TARGET_RUN_TARGET_SELFTESTS selftest::ix86_run_selftests
 #endif /* #if CHECKING_P */
 
+#undef TARGET_DOCUMENTATION_NAME
+#define TARGET_DOCUMENTATION_NAME "x86"
+
+/* Implement TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS.  */
+sbitmap
+ix86_get_separate_components (void)
+{
+  HOST_WIDE_INT offset, to_allocate;
+  sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
+  bitmap_clear (components);
+  struct machine_function *m = cfun->machine;
+
+  offset = m->frame.stack_pointer_offset;
+  to_allocate = offset - m->frame.sse_reg_save_offset;
+
+  /* Shrink wrap separate uses MOV, which means APX PPX cannot be used.
+     Experiments show that APX PPX can speed up the prologue.  If the function
+     does not exit early during actual execution, then using APX PPX is faster.
+     If the function always exits early during actual execution, then shrink
+     wrap separate reduces the number of MOV (PUSH/POP) instructions actually
+     executed, thus speeding up execution.
+     foo:
+	  movl    $1, %eax
+	  testq   %rdi, %rdi
+	  jne.L60
+	  ret	---> early return.
+    .L60:
+	  subq    $88, %rsp	---> belong to prologue.
+	  xorl    %eax, %eax
+	  movq    %rbx, 40 (%rsp) ---> belong to prologue.
+	  movq    8 (%rdi), %rbx
+	  movq    %rbp, 48 (%rsp) ---> belong to prologue.
+	  movq    %rdi, %rbp
+	  testq   %rbx, %rbx
+	  jne.L61
+	  movq    40 (%rsp), %rbx
+	  movq    48 (%rsp), %rbp
+	  addq    $88, %rsp
+	  ret
+     .L61:
+	  movq    %r12, 56 (%rsp) ---> belong to prologue.
+	  movq    %r13, 64 (%rsp) ---> belong to prologue.
+	  movq    %r14, 72 (%rsp) ---> belong to prologue.
+     ... ...
+
+     Disable shrink wrap separate when PPX is enabled.  */
+  if ((TARGET_APX_PPX && !crtl->calls_eh_return)
+      || cfun->machine->func_type != TYPE_NORMAL
+      || TARGET_SEH
+      || crtl->stack_realign_needed
+      || m->call_ms2sysv)
+    return components;
+
+  /* Since shrink wrapping separate uses MOV instead of PUSH/POP.
+     Disable shrink wrap separate when MOV is prohibited.  */
+  if (save_regs_using_push_pop (to_allocate))
+    return components;
+
+  for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
+      {
+	/* Skip registers with large offsets, where a pseudo may be needed.  */
+	if (IN_RANGE (offset, -0x8000, 0x7fff))
+	  bitmap_set_bit (components, regno);
+	offset += UNITS_PER_WORD;
+      }
+
+  /* Don't mess with the following registers.  */
+  if (frame_pointer_needed)
+    bitmap_clear_bit (components, HARD_FRAME_POINTER_REGNUM);
+
+  if (crtl->drap_reg)
+    bitmap_clear_bit (components, REGNO (crtl->drap_reg));
+
+  if (pic_offset_table_rtx)
+    bitmap_clear_bit (components, REAL_PIC_OFFSET_TABLE_REGNUM);
+
+  return components;
+}
+
+/* Implement TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB.  */
+sbitmap
+ix86_components_for_bb (basic_block bb)
+{
+  bitmap in = DF_LIVE_IN (bb);
+  bitmap gen = &DF_LIVE_BB_INFO (bb)->gen;
+  bitmap kill = &DF_LIVE_BB_INFO (bb)->kill;
+
+  sbitmap components = sbitmap_alloc (FIRST_PSEUDO_REGISTER);
+  bitmap_clear (components);
+
+  function_abi_aggregator callee_abis;
+  rtx_insn *insn;
+  FOR_BB_INSNS (bb, insn)
+    if (CALL_P (insn))
+      callee_abis.note_callee_abi (insn_callee_abi (insn));
+  HARD_REG_SET extra_caller_saves = callee_abis.caller_save_regs (*crtl->abi);
+
+  /* GPRs are used in a bb if they are in the IN, GEN, or KILL sets.  */
+  for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (!fixed_regs[regno]
+	&& (TEST_HARD_REG_BIT (extra_caller_saves, regno)
+	    || bitmap_bit_p (in, regno)
+	    || bitmap_bit_p (gen, regno)
+	    || bitmap_bit_p (kill, regno)))
+      bitmap_set_bit (components, regno);
+
+  return components;
+}
+
+/* Implement TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS.  */
+void
+ix86_disqualify_components (sbitmap, edge, sbitmap, bool)
+{
+  /* Nothing to do for x86.  */
+}
+
+/* Implement TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS.  */
+void
+ix86_emit_prologue_components (sbitmap components)
+{
+  HOST_WIDE_INT cfa_offset;
+  struct machine_function *m = cfun->machine;
+
+  cfa_offset = m->frame.reg_save_offset + m->fs.sp_offset
+	       - m->frame.stack_pointer_offset;
+  for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
+      {
+	if (bitmap_bit_p (components, regno))
+	  ix86_emit_save_reg_using_mov (word_mode, regno, cfa_offset);
+	cfa_offset -= UNITS_PER_WORD;
+      }
+}
+
+/* Implement TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS.  */
+void
+ix86_emit_epilogue_components (sbitmap components)
+{
+  HOST_WIDE_INT cfa_offset;
+  struct machine_function *m = cfun->machine;
+  cfa_offset = m->frame.reg_save_offset + m->fs.sp_offset
+	       - m->frame.stack_pointer_offset;
+
+  for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (GENERAL_REGNO_P (regno) && ix86_save_reg (regno, true, true))
+      {
+	if (bitmap_bit_p (components, regno))
+	  {
+	    rtx reg = gen_rtx_REG (word_mode, regno);
+	    rtx mem;
+	    rtx_insn *insn;
+
+	    mem = choose_baseaddr (cfa_offset, NULL);
+	    mem = gen_frame_mem (word_mode, mem);
+	    insn = emit_move_insn (reg, mem);
+
+	    RTX_FRAME_RELATED_P (insn) = 1;
+	    add_reg_note (insn, REG_CFA_RESTORE, reg);
+	  }
+	cfa_offset -= UNITS_PER_WORD;
+      }
+}
+
+/* Implement TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS.  */
+void
+ix86_set_handled_components (sbitmap components)
+{
+  for (unsigned int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
+    if (bitmap_bit_p (components, regno))
+      {
+	cfun->machine->reg_is_wrapped_separately[regno] = true;
+	cfun->machine->use_fast_prologue_epilogue = true;
+	cfun->machine->frame.save_regs_using_mov = true;
+      }
+}
+
+#undef TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS
+#define TARGET_SHRINK_WRAP_GET_SEPARATE_COMPONENTS ix86_get_separate_components
+#undef TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB
+#define TARGET_SHRINK_WRAP_COMPONENTS_FOR_BB ix86_components_for_bb
+#undef TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS
+#define TARGET_SHRINK_WRAP_DISQUALIFY_COMPONENTS ix86_disqualify_components
+#undef TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS
+#define TARGET_SHRINK_WRAP_EMIT_PROLOGUE_COMPONENTS \
+  ix86_emit_prologue_components
+#undef TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS
+#define TARGET_SHRINK_WRAP_EMIT_EPILOGUE_COMPONENTS \
+  ix86_emit_epilogue_components
+#undef TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS
+#define TARGET_SHRINK_WRAP_SET_HANDLED_COMPONENTS ix86_set_handled_components
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-i386.h"
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index eabb324..3f7ad68 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GCC for IA-32.
-   Copyright (C) 1988-2024 Free Software Foundation, Inc.
+   Copyright (C) 1988-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -179,6 +179,7 @@ struct processor_costs {
   const int xmm_move, ymm_move, /* cost of moving XMM and YMM register.  */
 	    zmm_move;
   const int sse_to_integer;	/* cost of moving SSE register to integer.  */
+  const int integer_to_sse;	/* cost of moving integer register to SSE. */
   const int gather_static, gather_per_elt; /* Cost of gather load is computed
 				   as static + per_item * nelts. */
   const int scatter_static, scatter_per_elt; /* Cost of gather store is
@@ -207,6 +208,16 @@ struct processor_costs {
   const int divsd;		/* cost of DIVSD instructions.  */
   const int sqrtss;		/* cost of SQRTSS instructions.  */
   const int sqrtsd;		/* cost of SQRTSD instructions.  */
+  const int cvtss2sd;		/* cost SSE FP conversions,
+				   such as CVTSS2SD.  */
+  const int vcvtps2pd256;	/* cost 256bit packed FP conversions,
+				   such as VCVTPD2PS with larger reg in ymm.  */
+  const int vcvtps2pd512;	/* cost 512bit packed FP conversions,
+				   such as VCVTPD2PS with larger reg in zmm.  */
+  const int cvtsi2ss;		/* cost of CVTSI2SS instruction.  */
+  const int cvtss2si;		/* cost of CVT(T)SS2SI instruction.  */
+  const int cvtpi2ps;		/* cost of CVTPI2PS instruction.  */
+  const int cvtps2pi;		/* cost of CVT(T)PS2PI instruction.  */
   const int reassoc_int, reassoc_fp, reassoc_vec_int, reassoc_vec_fp;
 				/* Specify reassociation width for integer,
 				   fp, vector integer and vector fp
@@ -232,13 +243,15 @@ struct processor_costs {
 					   to be unrolled.  */
   const unsigned small_unroll_factor;   /* Unroll factor for small loop to
 					   be unrolled.  */
+  const int br_mispredict_scale;	/* Branch mispredict scale for ifcvt
+					   threshold.  */
 };
 
 extern const struct processor_costs *ix86_cost;
 extern const struct processor_costs ix86_size_cost;
 
 #define ix86_cur_cost() \
-  (optimize_insn_for_size_p () ? &ix86_size_cost: ix86_cost)
+  (optimize_insn_for_size_p () ? &ix86_size_cost : ix86_cost)
 
 /* Macros used in the machine description to test the flags.  */
 
@@ -430,6 +443,14 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
 	ix86_tune_features[X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS]
 #define TARGET_FUSE_ALU_AND_BRANCH \
 	ix86_tune_features[X86_TUNE_FUSE_ALU_AND_BRANCH]
+#define TARGET_FUSE_ALU_AND_BRANCH_MEM \
+	ix86_tune_features[X86_TUNE_FUSE_ALU_AND_BRANCH_MEM]
+#define TARGET_FUSE_ALU_AND_BRANCH_MEM_IMM \
+	ix86_tune_features[X86_TUNE_FUSE_ALU_AND_BRANCH_MEM_IMM]
+#define TARGET_FUSE_ALU_AND_BRANCH_RIP_RELATIVE\
+	ix86_tune_features[X86_TUNE_FUSE_ALU_AND_BRANCH_RIP_RELATIVE]
+#define TARGET_FUSE_MOV_AND_ALU \
+	ix86_tune_features[X86_TUNE_FUSE_MOV_AND_ALU]
 #define TARGET_OPT_AGU ix86_tune_features[X86_TUNE_OPT_AGU]
 #define TARGET_AVOID_LEA_FOR_ADDR \
 	ix86_tune_features[X86_TUNE_AVOID_LEA_FOR_ADDR]
@@ -437,6 +458,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
 	ix86_tune_features[X86_TUNE_SOFTWARE_PREFETCHING_BENEFICIAL]
 #define TARGET_AVX256_SPLIT_REGS \
 	ix86_tune_features[X86_TUNE_AVX256_SPLIT_REGS]
+#define TARGET_AVX256_AVOID_VEC_PERM \
+	ix86_tune_features[X86_TUNE_AVX256_AVOID_VEC_PERM]
 #define TARGET_AVX512_SPLIT_REGS \
 	ix86_tune_features[X86_TUNE_AVX512_SPLIT_REGS]
 #define TARGET_GENERAL_REGS_SSE_SPILL \
@@ -449,6 +472,10 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
     ix86_tune_features[X86_TUNE_ADJUST_UNROLL]
 #define TARGET_AVOID_FALSE_DEP_FOR_BMI \
 	ix86_tune_features[X86_TUNE_AVOID_FALSE_DEP_FOR_BMI]
+#define TARGET_AVOID_FALSE_DEP_FOR_TZCNT \
+	ix86_tune_features[X86_TUNE_AVOID_FALSE_DEP_FOR_TZCNT]
+#define TARGET_AVOID_FALSE_DEP_FOR_BLS \
+	ix86_tune_features[X86_TUNE_AVOID_FALSE_DEP_FOR_BLS]
 #define TARGET_ONE_IF_CONV_INSN \
 	ix86_tune_features[X86_TUNE_ONE_IF_CONV_INSN]
 #define TARGET_AVOID_MFENCE ix86_tune_features[X86_TUNE_AVOID_MFENCE]
@@ -460,6 +487,13 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
 	ix86_tune_features[X86_TUNE_DEST_FALSE_DEP_FOR_GLC]
 #define TARGET_SLOW_STC ix86_tune_features[X86_TUNE_SLOW_STC]
 #define TARGET_USE_RCR ix86_tune_features[X86_TUNE_USE_RCR]
+#define TARGET_SSE_MOVCC_USE_BLENDV \
+	ix86_tune_features[X86_TUNE_SSE_MOVCC_USE_BLENDV]
+#define TARGET_ALIGN_TIGHT_LOOPS \
+	ix86_tune_features[X86_TUNE_ALIGN_TIGHT_LOOPS]
+#define TARGET_SSE_REDUCTION_PREFER_PSHUF \
+	ix86_tune_features[X86_TUNE_SSE_REDUCTION_PREFER_PSHUF]
+
 
 /* Feature tests against the various architecture variations.  */
 enum ix86_arch_indices {
@@ -504,6 +538,7 @@ extern unsigned char ix86_prefetch_sse;
 #define TARGET_GNU2_TLS		(ix86_tls_dialect == TLS_DIALECT_GNU2)
 #define TARGET_ANY_GNU_TLS	(TARGET_GNU_TLS || TARGET_GNU2_TLS)
 #define TARGET_SUN_TLS		0
+#define TARGET_WIN32_TLS	0
 
 #ifndef TARGET_64BIT_DEFAULT
 #define TARGET_64BIT_DEFAULT 0
@@ -539,7 +574,7 @@ extern GTY(()) tree x86_mfence;
 #define TARGET_SUBTARGET64_ISA_DEFAULT \
   (OPTION_MASK_ISA_MMX | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_SSE2)
 
-/* Replace MACH-O, ifdefs by in-line tests, where possible. 
+/* Replace MACH-O, ifdefs by in-line tests, where possible.
    (a) Macros defined in config/i386/darwin.h  */
 #define TARGET_MACHO 0
 #define TARGET_MACHO_SYMBOL_STUBS 0
@@ -783,7 +818,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
    TARGET_ABSOLUTE_BIGGEST_ALIGNMENT.  */
 
 #define BIGGEST_ALIGNMENT \
-  (TARGET_IAMCU ? 32 : ((TARGET_AVX512F && TARGET_EVEX512) \
+  (TARGET_IAMCU ? 32 : (TARGET_AVX512F \
 			? 512 : (TARGET_AVX ? 256 : 128)))
 
 /* Maximum stack alignment.  */
@@ -897,7 +932,10 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
    and give entire struct the alignment of an int.  */
 /* Required on the 386 since it doesn't have bit-field insns.  */
 #define PCC_BITFIELD_TYPE_MATTERS 1
-
+
+#define VECTOR_STORE_FLAG_VALUE(MODE) \
+  (GET_MODE_CLASS (MODE) == MODE_VECTOR_INT ? constm1_rtx : NULL_RTX)
+
 /* Standard register usage.  */
 
 /* This processor has special stack-like registers.  See reg-stack.cc
@@ -1087,7 +1125,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
    || (MODE) == V8HFmode || (MODE) == V4HFmode || (MODE) == V2HFmode	\
    || (MODE) == V8BFmode || (MODE) == V4BFmode || (MODE) == V2BFmode	\
    || (MODE) == V4QImode || (MODE) == V2HImode || (MODE) == V1SImode	\
-   || (MODE) == V2DImode || (MODE) == V2QImode				\
+   || (MODE) == V2DImode || (MODE) == V2QImode || (MODE) == HImode	\
    || (MODE) == DFmode	|| (MODE) == DImode				\
    || (MODE) == HFmode || (MODE) == BFmode)
 
@@ -1149,9 +1187,10 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
 #define SSE_FLOAT_MODE_P(MODE) \
   ((TARGET_SSE && (MODE) == SFmode) || (TARGET_SSE2 && (MODE) == DFmode))
 
-#define SSE_FLOAT_MODE_SSEMATH_OR_HF_P(MODE)				\
-  ((SSE_FLOAT_MODE_P (MODE) && TARGET_SSE_MATH)				\
-   || (TARGET_AVX512FP16 && (MODE) == HFmode))
+#define SSE_FLOAT_MODE_SSEMATH_OR_HFBF_P(MODE)                          \
+  ((SSE_FLOAT_MODE_P (MODE) && TARGET_SSE_MATH)                         \
+   || (TARGET_AVX512FP16 && (MODE) == HFmode)                           \
+   || (TARGET_AVX10_2 && (MODE) == BFmode))
 
 #define FMA4_VEC_FLOAT_MODE_P(MODE) \
   (TARGET_FMA4 && ((MODE) == V4SFmode || (MODE) == V2DFmode \
@@ -1452,12 +1491,6 @@ enum reg_class
 
 #define REGNO_REG_CLASS(REGNO) (regclass_map[(REGNO)])
 
-/* When this hook returns true for MODE, the compiler allows
-   registers explicitly used in the rtl to be used as spill registers
-   but prevents the compiler from extending the lifetime of these
-   registers.  */
-#define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P hook_bool_mode_true
-
 #define QI_REG_P(X) (REG_P (X) && QI_REGNO_P (REGNO (X)))
 #define QI_REGNO_P(N) IN_RANGE ((N), FIRST_QI_REG, LAST_QI_REG)
 
@@ -1565,11 +1598,11 @@ enum reg_class
 /* If defined, the maximum amount of space required for outgoing arguments
    will be computed and placed into the variable `crtl->outgoing_args_size'.
    No space will be pushed onto the stack for each call; instead, the
-   function prologue should increase the stack frame size by this amount.  
+   function prologue should increase the stack frame size by this amount.
 
    In 32bit mode enabling argument accumulation results in about 5% code size
    growth because move instructions are less compact than push.  In 64bit
-   mode the difference is less drastic but visible.  
+   mode the difference is less drastic but visible.
 
    FIXME: Unlike earlier implementations, the size of unwind info seems to
    actually grow with accumulation.  Is that because accumulated args
@@ -1663,6 +1696,8 @@ typedef struct ix86_args {
   int stdarg;                   /* Set to 1 if function is stdarg.  */
   enum calling_abi call_abi;	/* Set to SYSV_ABI for sysv abi. Otherwise
  				   MS_ABI for ms abi.  */
+  bool preserve_none_abi;	/* Set to true if the preserve_none ABI is
+				   used.  */
   tree decl;			/* Callee decl.  */
 } CUMULATIVE_ARGS;
 
@@ -1864,7 +1899,7 @@ typedef struct ix86_args {
    MOVE_MAX_PIECES defaults to MOVE_MAX.  */
 
 #define MOVE_MAX \
-  ((TARGET_AVX512F && TARGET_EVEX512\
+  ((TARGET_AVX512F \
     && (ix86_move_max == PVW_AVX512 \
 	|| ix86_store_max == PVW_AVX512)) \
    ? 64 \
@@ -1883,7 +1918,7 @@ typedef struct ix86_args {
    store_by_pieces of 16/32/64 bytes.  */
 #define STORE_MAX_PIECES \
   (TARGET_INTER_UNIT_MOVES_TO_VEC \
-   ? ((TARGET_AVX512F && TARGET_EVEX512 && ix86_store_max == PVW_AVX512) \
+   ? ((TARGET_AVX512F && ix86_store_max == PVW_AVX512) \
       ? 64 \
       : ((TARGET_AVX \
 	  && ix86_store_max >= PVW_AVX256) \
@@ -2089,9 +2124,9 @@ do {							\
 #define DEBUGGER_REGNO(N) \
   (TARGET_64BIT ? debugger64_register_map[(N)] : debugger_register_map[(N)])
 
-extern int const debugger_register_map[FIRST_PSEUDO_REGISTER];
-extern int const debugger64_register_map[FIRST_PSEUDO_REGISTER];
-extern int const svr4_debugger_register_map[FIRST_PSEUDO_REGISTER];
+extern unsigned int const debugger_register_map[FIRST_PSEUDO_REGISTER];
+extern unsigned int const debugger64_register_map[FIRST_PSEUDO_REGISTER];
+extern unsigned int const svr4_debugger_register_map[FIRST_PSEUDO_REGISTER];
 
 /* Before the prologue, RA is at 0(%esp).  */
 #define INCOMING_RETURN_ADDR_RTX \
@@ -2220,7 +2255,7 @@ extern int const svr4_debugger_register_map[FIRST_PSEUDO_REGISTER];
 #define ASM_OUTPUT_SYMBOL_REF(FILE, SYM) \
   do {							\
     const char *name					\
-      = assemble_name_resolve (XSTR (x, 0));		\
+      = assemble_name_resolve (XSTR (SYM, 0));		\
     /* In -masm=att wrap identifiers that start with $	\
        into parens.  */					\
     if (ASSEMBLER_DIALECT == ASM_ATT			\
@@ -2236,6 +2271,13 @@ extern int const svr4_debugger_register_map[FIRST_PSEUDO_REGISTER];
   } while (0)
 #endif
 
+/* In Intel syntax, we have to quote user-defined labels that would
+   match (unprefixed) registers or operators.  */
+
+#undef ASM_OUTPUT_LABELREF
+#define ASM_OUTPUT_LABELREF(STREAM, NAME)	\
+  ix86_asm_output_labelref ((STREAM), user_label_prefix, (NAME))
+
 /* Under some conditions we need jump tables in the text section,
    because the assembler cannot handle label differences between
    sections.  */
@@ -2301,6 +2343,7 @@ enum processor_type
   PROCESSOR_ARROWLAKE,
   PROCESSOR_ARROWLAKE_S,
   PROCESSOR_PANTHERLAKE,
+  PROCESSOR_DIAMONDRAPIDS,
   PROCESSOR_INTEL,
   PROCESSOR_LUJIAZUI,
   PROCESSOR_YONGFENG,
@@ -2376,13 +2419,13 @@ constexpr wide_int_bitmask PTA_SKYLAKE = PTA_BROADWELL | PTA_AES
   | PTA_CLFLUSHOPT | PTA_XSAVEC | PTA_XSAVES | PTA_SGX;
 constexpr wide_int_bitmask PTA_SKYLAKE_AVX512 = PTA_SKYLAKE | PTA_AVX512F
   | PTA_AVX512CD | PTA_AVX512VL | PTA_AVX512BW | PTA_AVX512DQ | PTA_PKU
-  | PTA_CLWB | PTA_EVEX512;
+  | PTA_CLWB;
 constexpr wide_int_bitmask PTA_CASCADELAKE = PTA_SKYLAKE_AVX512
   | PTA_AVX512VNNI;
 constexpr wide_int_bitmask PTA_COOPERLAKE = PTA_CASCADELAKE | PTA_AVX512BF16;
 constexpr wide_int_bitmask PTA_CANNONLAKE = PTA_SKYLAKE | PTA_AVX512F
   | PTA_AVX512CD | PTA_AVX512VL | PTA_AVX512BW | PTA_AVX512DQ | PTA_PKU
-  | PTA_AVX512VBMI | PTA_AVX512IFMA | PTA_SHA | PTA_EVEX512;
+  | PTA_AVX512VBMI | PTA_AVX512IFMA | PTA_SHA;
 constexpr wide_int_bitmask PTA_ICELAKE_CLIENT = PTA_CANNONLAKE | PTA_AVX512VNNI
   | PTA_GFNI | PTA_VAES | PTA_AVX512VBMI2 | PTA_VPCLMULQDQ | PTA_AVX512BITALG
   | PTA_RDPID | PTA_AVX512VPOPCNTDQ;
@@ -2405,14 +2448,16 @@ constexpr wide_int_bitmask PTA_GOLDMONT_PLUS = PTA_GOLDMONT | PTA_RDPID
   | PTA_SGX | PTA_PTWRITE;
 constexpr wide_int_bitmask PTA_TREMONT = PTA_GOLDMONT_PLUS | PTA_CLWB
   | PTA_GFNI | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_CLDEMOTE | PTA_WAITPKG;
-constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_TREMONT | PTA_ADX | PTA_AVX
+constexpr wide_int_bitmask PTA_ALDERLAKE = PTA_GOLDMONT_PLUS | PTA_CLWB
+  | PTA_GFNI | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_WAITPKG | PTA_ADX | PTA_AVX
   | PTA_AVX2 | PTA_BMI | PTA_BMI2 | PTA_F16C | PTA_FMA | PTA_LZCNT
   | PTA_PCONFIG | PTA_PKU | PTA_VAES | PTA_VPCLMULQDQ | PTA_SERIALIZE
   | PTA_HRESET | PTA_KL | PTA_WIDEKL | PTA_AVXVNNI;
-constexpr wide_int_bitmask PTA_SIERRAFOREST = PTA_ALDERLAKE | PTA_AVXIFMA
-  | PTA_AVXVNNIINT8 | PTA_AVXNECONVERT | PTA_CMPCCXADD | PTA_ENQCMD | PTA_UINTR;
+constexpr wide_int_bitmask PTA_SIERRAFOREST = PTA_ALDERLAKE | PTA_CLDEMOTE
+  | PTA_AVXIFMA | PTA_AVXVNNIINT8 | PTA_AVXNECONVERT | PTA_CMPCCXADD
+  | PTA_ENQCMD | PTA_UINTR;
 constexpr wide_int_bitmask PTA_GRANITERAPIDS = PTA_SAPPHIRERAPIDS | PTA_AMX_FP16
-  | PTA_PREFETCHI;
+  | PTA_PREFETCHI | PTA_AVX10_1;
 constexpr wide_int_bitmask PTA_GRANITERAPIDS_D = PTA_GRANITERAPIDS
   | PTA_AMX_COMPLEX;
 constexpr wide_int_bitmask PTA_GRANDRIDGE = PTA_SIERRAFOREST;
@@ -2424,6 +2469,23 @@ constexpr wide_int_bitmask PTA_CLEARWATERFOREST = PTA_SIERRAFOREST
   | PTA_AVXVNNIINT16 | PTA_SHA512 | PTA_SM3 | PTA_SM4 | PTA_USER_MSR
   | PTA_PREFETCHI;
 constexpr wide_int_bitmask PTA_PANTHERLAKE = PTA_ARROWLAKE_S | PTA_PREFETCHI;
+constexpr wide_int_bitmask PTA_DIAMONDRAPIDS = PTA_GRANITERAPIDS_D
+  | PTA_AVXIFMA | PTA_AVXNECONVERT | PTA_AVXVNNIINT16 | PTA_AVXVNNIINT8
+  | PTA_CMPCCXADD | PTA_SHA512 | PTA_SM3 | PTA_SM4 | PTA_AVX10_2
+  | PTA_APX_F | PTA_AMX_AVX512 | PTA_AMX_FP8 | PTA_AMX_TF32 | PTA_AMX_TRANSPOSE
+  | PTA_MOVRS | PTA_AMX_MOVRS | PTA_USER_MSR;
+
+constexpr wide_int_bitmask PTA_BDVER1 = PTA_64BIT | PTA_MMX | PTA_SSE
+  | PTA_SSE2 | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3
+  | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_FMA4
+  | PTA_XOP | PTA_LWP | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE;
+constexpr wide_int_bitmask PTA_BDVER2 = PTA_BDVER1 | PTA_BMI | PTA_TBM
+  | PTA_F16C | PTA_FMA;
+constexpr wide_int_bitmask PTA_BDVER3 = PTA_BDVER2 | PTA_XSAVEOPT
+  | PTA_FSGSBASE;
+constexpr wide_int_bitmask PTA_BDVER4 = PTA_BDVER3 | PTA_AVX2 | PTA_BMI2
+  | PTA_RDRND | PTA_MOVBE | PTA_MWAITX;
+
 constexpr wide_int_bitmask PTA_ZNVER1 = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2
   | PTA_SSE3 | PTA_SSE4A | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
   | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_AVX2 | PTA_BMI | PTA_BMI2
@@ -2438,16 +2500,24 @@ constexpr wide_int_bitmask PTA_ZNVER3 = PTA_ZNVER2 | PTA_VAES | PTA_VPCLMULQDQ
 constexpr wide_int_bitmask PTA_ZNVER4 = PTA_ZNVER3 | PTA_AVX512F | PTA_AVX512DQ
   | PTA_AVX512IFMA | PTA_AVX512CD | PTA_AVX512BW | PTA_AVX512VL
   | PTA_AVX512BF16 | PTA_AVX512VBMI | PTA_AVX512VBMI2 | PTA_GFNI
-  | PTA_AVX512VNNI | PTA_AVX512BITALG | PTA_AVX512VPOPCNTDQ | PTA_EVEX512;
+  | PTA_AVX512VNNI | PTA_AVX512BITALG | PTA_AVX512VPOPCNTDQ;
 constexpr wide_int_bitmask PTA_ZNVER5 = PTA_ZNVER4 | PTA_AVXVNNI
   | PTA_MOVDIRI | PTA_MOVDIR64B | PTA_AVX512VP2INTERSECT | PTA_PREFETCHI;
-constexpr wide_int_bitmask PTA_LUJIAZUI = PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2
-  | PTA_SSE3 | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_AES
-  | PTA_PCLMUL | PTA_BMI | PTA_BMI2 | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT
-  | PTA_FSGSBASE | PTA_RDRND | PTA_MOVBE | PTA_ADX | PTA_RDSEED | PTA_POPCNT;
 
-constexpr wide_int_bitmask PTA_YONGFENG = PTA_LUJIAZUI | PTA_AVX | PTA_AVX2 | PTA_F16C
-  | PTA_FMA | PTA_SHA | PTA_LZCNT;
+constexpr wide_int_bitmask PTA_BTVER1 = PTA_64BIT | PTA_MMX | PTA_SSE
+  | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3 | PTA_SSE4A | PTA_ABM | PTA_CX16
+  | PTA_PRFCHW | PTA_FXSR | PTA_XSAVE;
+constexpr wide_int_bitmask PTA_BTVER2 = PTA_BTVER1 | PTA_SSE4_1 | PTA_SSE4_2
+  | PTA_AES | PTA_PCLMUL | PTA_AVX | PTA_BMI | PTA_F16C | PTA_MOVBE
+  | PTA_XSAVEOPT;
+
+constexpr wide_int_bitmask PTA_LUJIAZUI = PTA_64BIT | PTA_MMX | PTA_SSE
+  | PTA_SSE2 | PTA_SSE3 | PTA_CX16 | PTA_ABM | PTA_SSSE3 | PTA_SSE4_1
+  | PTA_SSE4_2 | PTA_AES | PTA_PCLMUL | PTA_BMI | PTA_BMI2 | PTA_PRFCHW
+  | PTA_FXSR | PTA_XSAVE | PTA_XSAVEOPT | PTA_FSGSBASE | PTA_RDRND | PTA_MOVBE
+  | PTA_ADX | PTA_RDSEED | PTA_POPCNT;
+constexpr wide_int_bitmask PTA_YONGFENG = PTA_LUJIAZUI | PTA_AVX | PTA_AVX2
+  | PTA_F16C | PTA_FMA | PTA_SHA | PTA_LZCNT;
 
 #ifndef GENERATOR_FILE
 
@@ -2510,8 +2580,7 @@ enum ix86_fpcmp_strategy {
 
 enum ix86_stack_slot
 {
-  SLOT_TEMP = 0,
-  SLOT_CW_STORED,
+  SLOT_CW_STORED = 0,
   SLOT_CW_ROUNDEVEN,
   SLOT_CW_TRUNC,
   SLOT_CW_FLOOR,
@@ -2733,11 +2802,13 @@ enum call_saved_registers_type
      or "no_caller_saved_registers" attribute.  */
   TYPE_NO_CALLER_SAVED_REGISTERS,
   /* The current function is a function specified with the
-     "no_callee_saved_registers" attribute.  */
+     "no_callee_saved_registers" attribute or a function specified with
+     the "noreturn" attribute when compiled with
+     "-mnoreturn-no-callee-saved-registers".  */
   TYPE_NO_CALLEE_SAVED_REGISTERS,
-  /* The current function is a function specified with the "noreturn"
-     attribute.  */
-  TYPE_NO_CALLEE_SAVED_REGISTERS_EXCEPT_BP,
+  /* The current function is a function specified with the
+     "preserve_none" attribute.  */
+  TYPE_PRESERVE_NONE,
 };
 
 enum queued_insn_type
@@ -2756,6 +2827,10 @@ struct GTY(()) machine_function {
   /* Cached initial frame layout for the current function.  */
   struct ix86_frame frame;
 
+  /* The components already handled by separate shrink-wrapping, which should
+     not be considered by the prologue and epilogue.  */
+  bool reg_is_wrapped_separately[FIRST_PSEUDO_REGISTER];
+
   /* For -fsplit-stack support: A stack local which holds a pointer to
      the stack arguments for a function with a variable number of
      arguments.  This is set at the start of the function and is used
@@ -2810,7 +2885,7 @@ struct GTY(()) machine_function {
   ENUM_BITFIELD(indirect_branch) function_return_type : 3;
 
   /* Call saved registers type.  */
-  ENUM_BITFIELD(call_saved_registers_type) call_saved_registers : 2;
+  ENUM_BITFIELD(call_saved_registers_type) call_saved_registers : 3;
 
   /* If true, there is register available for argument passing.  This
      is used only in ix86_function_ok_for_sibcall by 32-bit to determine
@@ -2852,6 +2927,12 @@ struct GTY(()) machine_function {
   /* True if red zone is used.  */
   BOOL_BITFIELD red_zone_used : 1;
 
+  /* True if inline asm with redzone clobber has been seen.  */
+  BOOL_BITFIELD asm_redzone_clobber_seen : 1;
+
+  /* True if this is a recursive function.  */
+  BOOL_BITFIELD recursive_function : 1;
+
   /* The largest alignment, in bytes, of stack slot actually used.  */
   unsigned int max_used_stack_alignment;
 
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index b56a51b..83c438b 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -1,5 +1,5 @@
 ;; GCC machine description for IA-32 and x86-64.
-;; Copyright (C) 1988-2024 Free Software Foundation, Inc.
+;; Copyright (C) 1988-2025 Free Software Foundation, Inc.
 ;; Mostly by William Schelter.
 ;; x86_64 support added by Jan Hubicka
 ;;
@@ -58,10 +58,11 @@
 ;; H -- print a memory address offset by 8; used for sse high-parts
 ;; K -- print HLE lock prefix
 ;; Y -- print condition for XOP pcom* instruction.
+;; v -- print segment override prefix
 ;; + -- print a branch hint as 'cs' or 'ds' prefix
 ;; ; -- print a semicolon (after prefixes due to bug in older gas).
 ;; ~ -- print "i" if TARGET_AVX2, "f" otherwise.
-;; ^ -- print addr32 prefix if TARGET_64BIT and Pmode != word_mode
+;; ^ -- print addr32 prefix if Pmode != word_mode
 ;; ! -- print NOTRACK prefix for jxx/call/ret instructions if required.
 
 (define_c_enum "unspec" [
@@ -79,6 +80,7 @@
   UNSPEC_MACHOPIC_OFFSET
   UNSPEC_PCREL
   UNSPEC_SIZEOF
+  UNSPEC_SECREL32
 
   ;; Prologue support
   UNSPEC_STACK_ALLOC
@@ -93,7 +95,6 @@
   UNSPEC_TLS_GD
   UNSPEC_TLS_LD_BASE
   UNSPEC_TLSDESC
-  UNSPEC_TLS_IE_SUN
 
   ;; Other random patterns
   UNSPEC_SCAS
@@ -117,6 +118,8 @@
   UNSPEC_STC
   UNSPEC_PUSHFL
   UNSPEC_POPFL
+  UNSPEC_OPTCOMX
+  UNSPEC_SETCC_SI_SLP
 
   ;; For SSE/MMX support:
   UNSPEC_FIX_NOTRUNC
@@ -362,6 +365,9 @@
   ;; For AMX-TILE
   UNSPECV_LDTILECFG
   UNSPECV_STTILECFG
+
+  ;; For MOVRS support
+  UNSPECV_MOVRS
 ])
 
 ;; Constants to represent rounding modes in the ROUND instruction
@@ -538,10 +544,10 @@
    str,bitmanip,
    fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,
    fxch,fistp,fisttp,frndint,
-   sse,ssemov,sseadd,sseadd1,sseiadd,sseiadd1,
+   sse,ssemov,ssemov2,sseadd,sseadd1,sseiadd,sseiadd1,
    ssemul,sseimul,ssediv,sselog,sselog1,
    sseishft,sseishft1,ssecmp,ssecomi,
-   ssecvt,ssecvt1,sseicvt,sseins,
+   ssecvt,ssecvt1,sseicvt,sseicvt2,sseins,
    sseshuf,sseshuf1,ssemuladd,sse4arg,
    lwp,mskmov,msklog,
    mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft"
@@ -559,10 +565,10 @@
   (cond [(eq_attr "type" "fmov,fop,fsgn,fmul,fdiv,fpspc,fcmov,fcmp,
 			  fxch,fistp,fisttp,frndint")
 	   (const_string "i387")
-	 (eq_attr "type" "sse,ssemov,sseadd,sseadd1,sseiadd,sseiadd1,
+	 (eq_attr "type" "sse,ssemov,ssemov2,sseadd,sseadd1,sseiadd,sseiadd1,
 			  ssemul,sseimul,ssediv,sselog,sselog1,
 			  sseishft,sseishft1,ssecmp,ssecomi,
-			  ssecvt,ssecvt1,sseicvt,sseins,
+			  ssecvt,ssecvt1,sseicvt,sseicvt2,sseins,
 			  sseshuf,sseshuf1,ssemuladd,sse4arg,mskmov")
 	   (const_string "sse")
 	 (eq_attr "type" "mmx,mmxmov,mmxadd,mmxmul,mmxcmp,mmxcvt,mmxshft")
@@ -575,12 +581,11 @@
 (define_attr "isa" "base,x64,nox64,x64_sse2,x64_sse4,x64_sse4_noavx,
 		    x64_avx,x64_avx512bw,x64_avx512dq,apx_ndd,apx_ndd_64,
 		    sse_noavx,sse2,sse2_noavx,sse3,sse3_noavx,sse4,sse4_noavx,
-		    avx,noavx,avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,avx512f_512,
-		    noavx512f,avx512bw,avx512bw_512,noavx512bw,avx512dq,
-		    noavx512dq,fma_or_avx512vl,avx512vl,noavx512vl,avxvnni,
-		    avx512vnnivl,avx512fp16,avxifma,avx512ifmavl,avxneconvert,
-		    avx512bf16vl,vpclmulqdqvl,avx_noavx512f,avx_noavx512vl,
-		    vaes_avx512vl,noapx_nf,avx10_2"
+		    avx,noavx,avx2,noavx2,bmi,bmi2,fma4,fma,avx512f,noavx512f,
+		    avx512bw,noavx512bw,avx512dq,noavx512dq,fma_or_avx512vl,
+		    avx512vl,noavx512vl,avxvnni,avx512vnnivl,avx512fp16,avxifma,
+		    avx512ifmavl,avxneconvert,avx512bf16vl,vpclmulqdqvl,
+		    avx_noavx512f,avx_noavx512vl,vaes_avx512vl,noapx_nf,avx10_2"
   (const_string "base"))
 
 ;; The (bounding maximum) length of an instruction immediate.
@@ -857,6 +862,9 @@
 		   mmx,mmxmov,mmxcmp,mmxcvt,mskmov,msklog")
 	      (match_operand 2 "memory_operand"))
 	   (const_string "load")
+	 (and (eq_attr "type" "ssemov2,sseicvt2")
+	      (match_operand 2 "memory_operand"))
+	   (const_string "load")
 	 (and (eq_attr "type" "icmov,ssemuladd,sse4arg")
 	      (match_operand 3 "memory_operand"))
 	   (const_string "load")
@@ -947,12 +955,8 @@
 	 (eq_attr "isa" "fma_or_avx512vl")
 	   (symbol_ref "TARGET_FMA || TARGET_AVX512VL")
 	 (eq_attr "isa" "avx512f") (symbol_ref "TARGET_AVX512F")
-	 (eq_attr "isa" "avx512f_512")
-	   (symbol_ref "TARGET_AVX512F && TARGET_EVEX512")
 	 (eq_attr "isa" "noavx512f") (symbol_ref "!TARGET_AVX512F")
 	 (eq_attr "isa" "avx512bw") (symbol_ref "TARGET_AVX512BW")
-	 (eq_attr "isa" "avx512bw_512")
-	   (symbol_ref "TARGET_AVX512BW && TARGET_EVEX512")
 	 (eq_attr "isa" "noavx512bw") (symbol_ref "!TARGET_AVX512BW")
 	 (eq_attr "isa" "avx512dq") (symbol_ref "TARGET_AVX512DQ")
 	 (eq_attr "isa" "noavx512dq") (symbol_ref "!TARGET_AVX512DQ")
@@ -977,7 +981,7 @@
 	   (symbol_ref "TARGET_APX_NDD && Pmode == DImode")
 	 (eq_attr "isa" "vaes_avx512vl")
 	   (symbol_ref "TARGET_VAES && TARGET_AVX512VL")
-	 (eq_attr "isa" "avx10_2") (symbol_ref "TARGET_AVX10_2_256")
+	 (eq_attr "isa" "avx10_2") (symbol_ref "TARGET_AVX10_2")
 
 	 (eq_attr "mmx_isa" "native")
 	   (symbol_ref "!TARGET_MMX_WITH_SSE")
@@ -1072,6 +1076,9 @@
 ;; Base name for insn mnemonic.
 (define_code_attr rotate [(rotate "rol") (rotatert "ror")])
 
+;; Counter rotate.
+(define_code_attr crotate [(rotate "rotatert") (rotatert "rotate")])
+
 ;; Mapping of abs neg operators
 (define_code_iterator absneg [abs neg])
 
@@ -1485,7 +1492,7 @@
 		[(reg:CC FLAGS_REG) (const_int 0)])
 	       (label_ref (match_operand 3))
 	       (pc)))]
-  "TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256"
+  "TARGET_AVX512F && !TARGET_PREFER_AVX256"
 {
   ix86_expand_branch (GET_CODE (operands[0]),
 		      operands[1], operands[2], operands[3]);
@@ -1592,6 +1599,20 @@
   [(set_attr "type" "icmp")
    (set_attr "mode" "<MODE>")])
 
+(define_insn "*cmp<mode>_plus_1"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (plus:SWI (match_operand:SWI 0 "nonimmediate_operand" "<r>m")
+		    (match_operand:SWI 1 "x86_64_neg_const_int_operand" "n"))
+	  (const_int 0)))]
+  "ix86_match_ccmode (insn, CCGOCmode)"
+{
+  operands[1] = gen_int_mode (-INTVAL (operands[1]), <MODE>mode);
+  return "cmp{<imodesuffix>}\t{%1, %0|%0, %1}";
+}
+  [(set_attr "type" "icmp")
+   (set_attr "mode" "<MODE>")])
+
 (define_insn "*cmpqi_ext<mode>_1"
   [(set (reg FLAGS_REG)
 	(compare
@@ -1736,7 +1757,7 @@
 	(compare:CC (match_operand:XF 1 "nonmemory_operand")
 		    (match_operand:XF 2 "nonmemory_operand")))
    (set (pc) (if_then_else
-              (match_operator 0 "ix86_fp_comparison_operator"
+              (match_operator 0 "ix86_fp_comparison_operator_xf"
                [(reg:CC FLAGS_REG)
                 (const_int 0)])
               (label_ref (match_operand 3))
@@ -1753,7 +1774,7 @@
 	(compare:CC (match_operand:XF 2 "nonmemory_operand")
 		    (match_operand:XF 3 "nonmemory_operand")))
    (set (match_operand:QI 0 "register_operand")
-              (match_operator 1 "ix86_fp_comparison_operator"
+              (match_operator 1 "ix86_fp_comparison_operator_xf"
                [(reg:CC FLAGS_REG)
                 (const_int 0)]))]
   "TARGET_80387"
@@ -1802,20 +1823,26 @@
 	(compare:CC (match_operand:BF 1 "cmp_fp_expander_operand")
 		    (match_operand:BF 2 "cmp_fp_expander_operand")))
    (set (pc) (if_then_else
-	      (match_operator 0 "comparison_operator"
+	      (match_operator 0 "ix86_fp_comparison_operator"
 	       [(reg:CC FLAGS_REG)
 		(const_int 0)])
 	      (label_ref (match_operand 3))
 	      (pc)))]
   "TARGET_80387 || (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH)"
 {
-  rtx op1 = ix86_expand_fast_convert_bf_to_sf (operands[1]);
-  rtx op2 = ix86_expand_fast_convert_bf_to_sf (operands[2]);
-  do_compare_rtx_and_jump (op1, op2, GET_CODE (operands[0]), 0,
-			   SFmode, NULL_RTX, NULL,
-			   as_a <rtx_code_label *> (operands[3]),
-			   /* Unfortunately this isn't propagated.  */
-			   profile_probability::even ());
+  if (TARGET_AVX10_2 && !flag_trapping_math)
+    ix86_expand_branch (GET_CODE (operands[0]),
+			operands[1], operands[2], operands[3]);
+  else
+    {
+      rtx op1 = ix86_expand_fast_convert_bf_to_sf (operands[1]);
+      rtx op2 = ix86_expand_fast_convert_bf_to_sf (operands[2]);
+      do_compare_rtx_and_jump (op1, op2, GET_CODE (operands[0]), 0,
+			       SFmode, NULL_RTX, NULL,
+			       as_a <rtx_code_label *> (operands[3]),
+			       /* Unfortunately this isn't propagated.  */
+			       profile_probability::even ());
+    }
   DONE;
 })
 
@@ -1839,17 +1866,19 @@
 	(compare:CC (match_operand:BF 2 "cmp_fp_expander_operand")
 		    (match_operand:BF 3 "cmp_fp_expander_operand")))
    (set (match_operand:QI 0 "register_operand")
-	(match_operator 1 "comparison_operator"
+	(match_operator 1 "ix86_fp_comparison_operator"
 	  [(reg:CC FLAGS_REG)
 	   (const_int 0)]))]
   "TARGET_80387 || (SSE_FLOAT_MODE_P (SFmode) && TARGET_SSE_MATH)"
 {
-  rtx op1 = ix86_expand_fast_convert_bf_to_sf (operands[2]);
-  rtx op2 = ix86_expand_fast_convert_bf_to_sf (operands[3]);
-  rtx res = emit_store_flag_force (operands[0], GET_CODE (operands[1]),
-				   op1, op2, SFmode, 0, 1);
-  if (!rtx_equal_p (res, operands[0]))
-    emit_move_insn (operands[0], res);
+  rtx op2 = operands[2], op3 = operands[3];
+  if (!TARGET_AVX10_2 || flag_trapping_math)
+    {
+      op2 = ix86_expand_fast_convert_bf_to_sf (operands[2]);
+      op3 = ix86_expand_fast_convert_bf_to_sf (operands[3]);
+    }
+  ix86_expand_setcc (operands[0], GET_CODE (operands[1]),
+		     op2, op3);
   DONE;
 })
 
@@ -2017,6 +2046,32 @@
    (set_attr "bdver1_decode" "double")
    (set_attr "znver1_decode" "double")])
 
+(define_insn "*cmpx<unord><MODEF:mode>"
+  [(set (reg:CCFP FLAGS_REG)
+	(unspec:CCFP [
+	  (compare:CCFP
+	    (match_operand:MODEF 0 "register_operand" "v")
+	    (match_operand:MODEF 1 "nonimmediate_operand" "vm"))]
+	  UNSPEC_OPTCOMX))]
+  "TARGET_AVX10_2"
+  "%v<unord>comx<MODEF:ssemodesuffix>\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecomi")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<MODEF:MODE>")])
+
+(define_insn "*cmpx<unord>hf"
+  [(set (reg:CCFP FLAGS_REG)
+	(unspec:CCFP [
+	  (compare:CCFP
+	    (match_operand:HF 0 "register_operand" "v")
+	    (match_operand:HF 1 "nonimmediate_operand" "vm"))]
+	  UNSPEC_OPTCOMX))]
+  "TARGET_AVX10_2"
+  "v<unord>comxsh\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecomi")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "HF")])
+
 (define_insn "*cmpi<unord><MODEF:mode>"
   [(set (reg:CCFP FLAGS_REG)
 	(compare:CCFP
@@ -2065,6 +2120,17 @@
    (set_attr "prefix" "evex")
    (set_attr "mode" "HF")])
 
+(define_insn "*cmpibf"
+  [(set (reg:CCFP FLAGS_REG)
+	(compare:CCFP
+	  (match_operand:BF 0 "register_operand" "v")
+	  (match_operand:BF 1 "nonimmediate_operand" "vm")))]
+  "TARGET_AVX10_2"
+  "vcomisbf16\t{%1, %0|%0, %1}"
+  [(set_attr "type" "ssecomi")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "BF")])
+
 ;; Set carry flag.
 (define_insn "x86_stc"
   [(set (reg:CCC FLAGS_REG) (unspec:CCC [(const_int 0)] UNSPEC_STC))]
@@ -2319,7 +2385,7 @@
 (define_expand "movxi"
   [(set (match_operand:XI 0 "nonimmediate_operand")
 	(match_operand:XI 1 "general_operand"))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "ix86_expand_vector_move (XImode, operands); DONE;")
 
 (define_expand "movoi"
@@ -2372,22 +2438,32 @@
    (set_attr "mode" "SI")
    (set_attr "length_immediate" "0")])
 
-(define_insn "*mov<mode>_and"
+;; Generate shorter "and $0,mem" for -Oz.  Split it to "mov $0,mem"
+;; otherwise.
+(define_insn_and_split "*mov<mode>_and"
   [(set (match_operand:SWI248 0 "memory_operand" "=m")
 	(match_operand:SWI248 1 "const0_operand"))
    (clobber (reg:CC FLAGS_REG))]
   "reload_completed"
   "and{<imodesuffix>}\t{%1, %0|%0, %1}"
+  "&& !(optimize_insn_for_size_p () && optimize_size > 1)"
+  [(set (match_dup 0) (match_dup 1))]
+  ""
   [(set_attr "type" "alu1")
    (set_attr "mode" "<MODE>")
    (set_attr "length_immediate" "1")])
 
-(define_insn "*mov<mode>_or"
+;; Generate shorter "or $-1,mem" for -Oz.  Split it to "mov $-1,mem"
+;; otherwise.
+(define_insn_and_split "*mov<mode>_or"
   [(set (match_operand:SWI248 0 "nonimmediate_operand" "=rm")
 	(match_operand:SWI248 1 "constm1_operand"))
    (clobber (reg:CC FLAGS_REG))]
   "reload_completed"
   "or{<imodesuffix>}\t{%1, %0|%0, %1}"
+  "&& !(optimize_insn_for_size_p () && optimize_size > 1)"
+  [(set (match_dup 0) (match_dup 1))]
+  ""
   [(set_attr "type" "alu1")
    (set_attr "mode" "<MODE>")
    (set_attr "length_immediate" "1")])
@@ -2395,7 +2471,7 @@
 (define_insn "*movxi_internal_avx512f"
   [(set (match_operand:XI 0 "nonimmediate_operand"		"=v,v ,v ,m")
 	(match_operand:XI 1 "nonimmediate_or_sse_const_operand" " C,BC,vm,v"))]
-  "TARGET_AVX512F && TARGET_EVEX512
+  "TARGET_AVX512F
    && (register_operand (operands[0], XImode)
        || register_operand (operands[1], XImode))"
 {
@@ -2525,7 +2601,7 @@
   [(set (match_operand:DI 0 "nonimmediate_operand"
     "=r  ,o  ,r,r  ,r,m ,*y,*y,?*y,?m,?r,?*y,?Yv,?v,?v,m ,m,?jc,?*Yd,?r,?v,?*y,?*x,*k,*k  ,*r,*m,*k")
 	(match_operand:DI 1 "general_operand"
-    "riFo,riF,Z,rem,i,re,C ,*y,Bk ,*y,*y,r  ,C  ,?v,Bk,?v,v,*Yd,jc  ,?v,r  ,*x ,*y ,*r,*kBk,*k,*k,CBC"))]
+    "riFo,riF,Z,rem,i,re,C ,*y,Bk ,*y,*y,r  ,C  ,?v,Bk,?v,v,*Yd,jc  ,?v,r ,*x ,*y ,*r,*kBk,*k,*k,CBC"))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))
    && ix86_hardreg_mov_ok (operands[0], operands[1])"
 {
@@ -2587,12 +2663,16 @@
   [(set (attr "isa")
      (cond [(eq_attr "alternative" "0,1,17,18")
 	      (const_string "nox64")
-	    (eq_attr "alternative" "2,3,4,5,10,11,23,25")
+	    (eq_attr "alternative" "2,3,4,5,10,11")
 	      (const_string "x64")
 	    (eq_attr "alternative" "19,20")
 	      (const_string "x64_sse2")
 	    (eq_attr "alternative" "21,22")
 	      (const_string "sse2")
+	    (eq_attr "alternative" "23,25")
+	      (const_string "x64_avx512bw")
+	    (eq_attr "alternative" "24,26,27")
+	      (const_string "avx512bw")
 	   ]
 	   (const_string "*")))
    (set (attr "type")
@@ -2761,7 +2841,7 @@
   [(set (match_operand:SI 0 "nonimmediate_operand"
     "=r,m ,*y,*y,?*y,?m,?r,?*y,?Yv,?v,?v,m ,?r,?v,*k,*k  ,*rm,*k")
 	(match_operand:SI 1 "general_operand"
-    "g ,re,C ,*y,Bk ,*y,*y,r  ,C  ,?v,Bk,?v,?v,r  ,*r,*kBk,*k ,CBC"))]
+    "g ,re,C ,*y,Bk ,*y,*y,r  ,C  ,?v,Bk,?v,?v,r ,*r,*kBk,*k ,CBC"))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))
    && ix86_hardreg_mov_ok (operands[0], operands[1])"
 {
@@ -2815,6 +2895,8 @@
   [(set (attr "isa")
      (cond [(eq_attr "alternative" "12,13")
 	      (const_string "sse2")
+	    (eq_attr "alternative" "14,15,16,17")
+	      (const_string "avx512bw")
 	   ]
 	   (const_string "*")))
    (set (attr "type")
@@ -2886,6 +2968,7 @@
 	(match_operand:SWI248 1 "const_int_operand"))]
   "optimize_insn_for_size_p () && optimize_size > 1
    && operands[1] != const0_rtx
+   && operands[1] != constm1_rtx
    && IN_RANGE (INTVAL (operands[1]), -128, 127)
    && !ix86_red_zone_used
    && REGNO (operands[0]) != SP_REG"
@@ -3658,7 +3741,7 @@
   [(set (match_operand:TI 0 "nonimmediate_operand" "=ro,r,r,&r")
 	(any_or_plus:TI
 	  (and:TI
-	    (match_operand:TI 1 "nonimmediate_operand" "r,m,r,m")
+	    (match_operand:TI 1 "nonimmediate_operand" "r,o,r,o")
 	    (match_operand:TI 3 "const_scalar_int_operand" "n,n,n,n"))
 	  (zero_extend:TI
 	    (match_operand:DI 2 "nonimmediate_operand" "r,r,m,m"))))]
@@ -3680,7 +3763,7 @@
   [(set (match_operand:DI 0 "nonimmediate_operand" "=ro,r,r,&r")
 	(any_or_plus:DI
 	  (and:DI
-	    (match_operand:DI 1 "nonimmediate_operand" "r,m,r,m")
+	    (match_operand:DI 1 "nonimmediate_operand" "r,o,r,o")
 	    (match_operand:DI 3 "const_int_operand" "n,n,n,n"))
 	  (zero_extend:DI
 	    (match_operand:SI 2 "nonimmediate_operand" "r,r,m,m"))))]
@@ -4353,7 +4436,7 @@
 	       (eq_attr "alternative" "11")
 		 (const_string "DI")
 	       (eq_attr "alternative" "5")
-		 (cond [(and (match_test "TARGET_AVX512F && TARGET_EVEX512")
+		 (cond [(and (match_test "TARGET_AVX512F")
 			     (not (match_test "TARGET_PREFER_AVX256")))
 			  (const_string "V16SF")
 			(match_test "TARGET_AVX")
@@ -5421,7 +5504,7 @@
    (set_attr "memory" "none")
    (set (attr "enabled")
      (if_then_else (eq_attr "alternative" "2")
-       (symbol_ref "TARGET_AVX512F && TARGET_EVEX512
+       (symbol_ref "TARGET_AVX512F
 		    && !TARGET_AVX512VL && !TARGET_PREFER_AVX256")
        (const_string "*")))])
 
@@ -5641,17 +5724,28 @@
    (set_attr "prefix" "evex")
    (set_attr "mode" "HF")])
 
+/* vcvtneps2bf16 doesn't honor SNAN, and turn sNAN into qNAN quietly,
+   and it always round to even.
+   flag_unsafe_math_optimization is needed for psrld.
+   If we don't expect qNaNs nor sNaNs and can assume rounding
+   to nearest, we can expand the conversion inline as
+   (fromi + 0x7fff + ((fromi >> 16) & 1)) >> 16.  */
 (define_insn "truncsfbf2"
-  [(set (match_operand:BF 0 "register_operand" "=x, v")
+  [(set (match_operand:BF 0 "register_operand" "=x,x,v,Yv")
 	(float_truncate:BF
-	  (match_operand:SF 1 "register_operand" "x,v")))]
-  "((TARGET_AVX512BF16 && TARGET_AVX512VL) || TARGET_AVXNECONVERT)
-   && !HONOR_NANS (BFmode) && flag_unsafe_math_optimizations"
+	  (match_operand:SF 1 "register_operand" "0,x,v,Yv")))]
+  "TARGET_SSE2 && !HONOR_NANS (BFmode) && !flag_rounding_math
+   && (flag_unsafe_math_optimizations
+       || TARGET_AVXNECONVERT
+       || (TARGET_AVX512BF16 && TARGET_AVX512VL))"
   "@
+  psrld\t{$16, %0|%0, 16}
   %{vex%} vcvtneps2bf16\t{%1, %0|%0, %1}
-  vcvtneps2bf16\t{%1, %0|%0, %1}"
-  [(set_attr "isa" "avxneconvert,avx512bf16vl")
-   (set_attr "prefix" "vex,evex")])
+  vcvtneps2bf16\t{%1, %0|%0, %1}
+  vpsrld\t{$16, %1, %0|%0, %1, 16}"
+  [(set_attr "isa" "noavx,avxneconvert,avx512bf16vl,avx")
+   (set_attr "prefix" "orig,vex,evex,vex")
+   (set_attr "type" "sseishft1,ssecvt,ssecvt,sseishft1")])
 
 ;; Signed conversion to DImode.
 
@@ -6249,7 +6343,7 @@
     {
       emit_insn (gen_floatunssi<mode>2_i387_with_xmm
 		  (operands[0], operands[1],
-		   assign_386_stack_local (DImode, SLOT_TEMP)));
+		   assign_stack_temp (DImode, GET_MODE_SIZE (DImode))));
       DONE;
     }
   if (!TARGET_AVX512F)
@@ -6434,7 +6528,7 @@
 	(plus:<DWI>
 	  (zero_extend:<DWI>
 	    (match_operand:DWIH 2 "nonimmediate_operand" "rm,r,rm,r"))
-	  (match_operand:<DWI> 1 "nonimmediate_operand" "0,0,r,m")))
+	  (match_operand:<DWI> 1 "nonimmediate_operand" "0,0,r,o")))
    (clobber (reg:CC FLAGS_REG))]
   "ix86_binary_operator_ok (UNKNOWN, <DWI>mode, operands, TARGET_APX_NDD)"
   "#"
@@ -7676,7 +7770,7 @@
 	(eq:CCO
 	  (plus:<QPWI>
 	    (sign_extend:<QPWI>
-	      (match_operand:<DWI> 1 "nonimmediate_operand" "%0,rjM"))
+	      (match_operand:<DWI> 1 "nonimmediate_operand" "%0,rjO"))
 	    (match_operand:<QPWI> 3 "const_scalar_int_operand" "n,n"))
 	  (sign_extend:<QPWI>
 	    (plus:<DWI>
@@ -8636,6 +8730,34 @@
 	      (set (match_dup 1)
 		   (minus:SWI (match_dup 1) (match_dup 0)))])])
 
+;; Under APX NDD, 'sub reg, mem, reg' is valid.
+;; New format for
+;; mov reg0, mem1
+;; sub reg0, mem2, reg0
+;; mov mem2, reg0
+;; to
+;; mov reg0, mem1
+;; sub mem2, reg0
+(define_peephole2
+  [(set (match_operand:SWI 0 "general_reg_operand")
+	(match_operand:SWI 1 "memory_operand"))
+   (parallel [(set (reg:CC FLAGS_REG)
+		   (compare:CC (match_operand:SWI 2 "memory_operand")
+			       (match_dup 0)))
+	      (set (match_dup 0)
+		   (minus:SWI (match_dup 2) (match_dup 0)))])
+   (set (match_dup 2) (match_dup 0))]
+  "TARGET_APX_NDD
+   && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+   && peep2_reg_dead_p (3, operands[0])
+   && !reg_overlap_mentioned_p (operands[0], operands[1])
+   && !reg_overlap_mentioned_p (operands[0], operands[2])"
+  [(set (match_dup 0) (match_dup 1))
+   (parallel [(set (reg:CC FLAGS_REG)
+		   (compare:CC (match_dup 2) (match_dup 0)))
+	      (set (match_dup 2)
+		   (minus:SWI (match_dup 2) (match_dup 0)))])])
+
 ;; decl %eax; cmpl $-1, %eax; jne .Lxx; can be optimized into
 ;; subl $1, %eax; jnc .Lxx;
 (define_peephole2
@@ -8973,12 +9095,12 @@
 		(match_operand:SWI48 1 "nonimmediate_operand" "%0,0,rm,r"))
 	      (match_operand:SWI48 2 "nonimmediate_operand" "r,rm,r,m")))
 	  (plus:<DWI>
-	    (zero_extend:<DWI> (match_dup 2))
 	    (match_operator:<DWI> 4 "ix86_carry_flag_operator"
-	      [(match_dup 3) (const_int 0)]))))
+	      [(match_dup 3) (const_int 0)])
+	    (zero_extend:<DWI> (match_dup 2)))))
    (set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r,r,r")
 	(plus:SWI48 (plus:SWI48 (match_op_dup 5
-				 [(match_dup 3) (const_int 0)])
+				  [(match_dup 3) (const_int 0)])
 				(match_dup 1))
 		    (match_dup 2)))]
   "ix86_binary_operator_ok (PLUS, <MODE>mode, operands, TARGET_APX_NDD)"
@@ -9005,9 +9127,9 @@
 			   (match_operand:SWI48 0 "general_reg_operand"))
 			 (match_operand:SWI48 1 "memory_operand")))
 		     (plus:<DWI>
-		       (zero_extend:<DWI> (match_dup 1))
 		       (match_operator:<DWI> 3 "ix86_carry_flag_operator"
-			 [(match_dup 2) (const_int 0)]))))
+			 [(match_dup 2) (const_int 0)])
+		       (zero_extend:<DWI> (match_dup 1)))))
 	      (set (match_dup 0)
 		   (plus:SWI48 (plus:SWI48 (match_op_dup 4
 					     [(match_dup 2) (const_int 0)])
@@ -9027,9 +9149,9 @@
 			   (match_dup 1))
 			 (match_dup 0)))
 		     (plus:<DWI>
-		       (zero_extend:<DWI> (match_dup 0))
 		       (match_op_dup 3
-			 [(match_dup 2) (const_int 0)]))))
+			 [(match_dup 2) (const_int 0)])
+		       (zero_extend:<DWI> (match_dup 0)))))
 	      (set (match_dup 1)
 		   (plus:SWI48 (plus:SWI48 (match_op_dup 4
 					     [(match_dup 2) (const_int 0)])
@@ -9050,9 +9172,9 @@
 			   (match_dup 0))
 			 (match_operand:SWI48 2 "memory_operand")))
 		     (plus:<DWI>
-		       (zero_extend:<DWI> (match_dup 2))
 		       (match_operator:<DWI> 4 "ix86_carry_flag_operator"
-			 [(match_dup 3) (const_int 0)]))))
+			 [(match_dup 3) (const_int 0)])
+		       (zero_extend:<DWI> (match_dup 2)))))
 	      (set (match_dup 0)
 		   (plus:SWI48 (plus:SWI48 (match_op_dup 5
 					     [(match_dup 3) (const_int 0)])
@@ -9074,15 +9196,127 @@
 			   (match_dup 1))
 			 (match_dup 0)))
 		     (plus:<DWI>
-		       (zero_extend:<DWI> (match_dup 0))
 		       (match_op_dup 4
-			 [(match_dup 3) (const_int 0)]))))
+			 [(match_dup 3) (const_int 0)])
+		       (zero_extend:<DWI> (match_dup 0)))))
 	      (set (match_dup 1)
 		   (plus:SWI48 (plus:SWI48 (match_op_dup 5
 					     [(match_dup 3) (const_int 0)])
 					   (match_dup 1))
 			       (match_dup 0)))])])
 
+;; Under APX NDD, 'adc reg, mem, reg' is valid.
+;;
+;; New format for
+;; mov reg0, mem1
+;; adc reg0, mem2, reg0
+;; mov mem1, reg0
+;; to
+;; mov reg0, mem2
+;; adc mem1, reg0
+(define_peephole2
+  [(set (match_operand:SWI48 0 "general_reg_operand")
+	(match_operand:SWI48 1 "memory_operand"))
+   (parallel [(set (reg:CCC FLAGS_REG)
+		   (compare:CCC
+		     (zero_extend:<DWI>
+		       (plus:SWI48
+			 (plus:SWI48
+			   (match_operator:SWI48 5 "ix86_carry_flag_operator"
+			     [(match_operand 3 "flags_reg_operand")
+			      (const_int 0)])
+			   (match_operand:SWI48 2 "memory_operand"))
+			 (match_dup 0)))
+		     (plus:<DWI>
+		       (match_operator:<DWI> 4 "ix86_carry_flag_operator"
+			 [(match_dup 3) (const_int 0)])
+		       (zero_extend:<DWI> (match_dup 0)))))
+	      (set (match_dup 0)
+		   (plus:SWI48 (plus:SWI48 (match_op_dup 5
+					     [(match_dup 3) (const_int 0)])
+					   (match_dup 2))
+			       (match_dup 0)))])
+   (set (match_dup 1) (match_dup 0))]
+  "TARGET_APX_NDD
+   && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+   && peep2_reg_dead_p (3, operands[0])
+   && !reg_overlap_mentioned_p (operands[0], operands[1])
+   && !reg_overlap_mentioned_p (operands[0], operands[2])"
+  [(set (match_dup 0) (match_dup 2))
+   (parallel [(set (reg:CCC FLAGS_REG)
+		   (compare:CCC
+		     (zero_extend:<DWI>
+		       (plus:SWI48
+			 (plus:SWI48
+			   (match_op_dup 5
+			     [(match_dup 3) (const_int 0)])
+			   (match_dup 1))
+			 (match_dup 0)))
+		     (plus:<DWI>
+		       (match_op_dup 4
+			 [(match_dup 3) (const_int 0)])
+		       (zero_extend:<DWI> (match_dup 0)))))
+	      (set (match_dup 1)
+		   (plus:SWI48 (plus:SWI48 (match_op_dup 5
+					     [(match_dup 3) (const_int 0)])
+					   (match_dup 1))
+			       (match_dup 0)))])])
+
+;; New format for
+;; mov reg0, mem1
+;; adc reg0, mem2, reg0
+;; mov mem2, reg0
+;; to
+;; mov reg0, mem1
+;; adc mem2, reg0
+(define_peephole2
+  [(set (match_operand:SWI48 0 "general_reg_operand")
+	(match_operand:SWI48 1 "memory_operand"))
+   (parallel [(set (reg:CCC FLAGS_REG)
+		   (compare:CCC
+		     (zero_extend:<DWI>
+		       (plus:SWI48
+			 (plus:SWI48
+			   (match_operator:SWI48 5 "ix86_carry_flag_operator"
+			     [(match_operand 3 "flags_reg_operand")
+			      (const_int 0)])
+			   (match_operand:SWI48 2 "memory_operand"))
+			 (match_dup 0)))
+		     (plus:<DWI>
+		       (match_operator:<DWI> 4 "ix86_carry_flag_operator"
+			 [(match_dup 3) (const_int 0)])
+		       (zero_extend:<DWI> (match_dup 0)))))
+	      (set (match_dup 0)
+		   (plus:SWI48 (plus:SWI48 (match_op_dup 5
+					     [(match_dup 3) (const_int 0)])
+					   (match_dup 2))
+			       (match_dup 0)))])
+   (set (match_dup 2) (match_dup 0))]
+  "TARGET_APX_NDD
+   && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+   && peep2_reg_dead_p (3, operands[0])
+   && !reg_overlap_mentioned_p (operands[0], operands[1])
+   && !reg_overlap_mentioned_p (operands[0], operands[2])"
+  [(set (match_dup 0) (match_dup 1))
+   (parallel [(set (reg:CCC FLAGS_REG)
+		   (compare:CCC
+		     (zero_extend:<DWI>
+		       (plus:SWI48
+			 (plus:SWI48
+			   (match_op_dup 5
+			     [(match_dup 3) (const_int 0)])
+			   (match_dup 2))
+			 (match_dup 0)))
+		     (plus:<DWI>
+		       (match_op_dup 4
+			 [(match_dup 3) (const_int 0)])
+		       (zero_extend:<DWI> (match_dup 0)))))
+	      (set (match_dup 2)
+		   (plus:SWI48 (plus:SWI48 (match_op_dup 5
+					     [(match_dup 3) (const_int 0)])
+					   (match_dup 2))
+			       (match_dup 0)))])])
+
 (define_peephole2
   [(parallel [(set (reg:CCC FLAGS_REG)
 		   (compare:CCC
@@ -9095,9 +9329,9 @@
 			   (match_operand:SWI48 0 "general_reg_operand"))
 			 (match_operand:SWI48 1 "memory_operand")))
 		     (plus:<DWI>
-		       (zero_extend:<DWI> (match_dup 1))
 		       (match_operator:<DWI> 3 "ix86_carry_flag_operator"
-			 [(match_dup 2) (const_int 0)]))))
+			 [(match_dup 2) (const_int 0)])
+		       (zero_extend:<DWI> (match_dup 1)))))
 	      (set (match_dup 0)
 		   (plus:SWI48 (plus:SWI48 (match_op_dup 4
 					     [(match_dup 2) (const_int 0)])
@@ -9125,9 +9359,9 @@
 			   (match_dup 1))
 			 (match_dup 0)))
 		     (plus:<DWI>
-		       (zero_extend:<DWI> (match_dup 0))
 		       (match_op_dup 3
-			 [(match_dup 2) (const_int 0)]))))
+			 [(match_dup 2) (const_int 0)])
+		       (zero_extend:<DWI> (match_dup 0)))))
 	      (set (match_dup 1)
 		   (plus:SWI48 (plus:SWI48 (match_op_dup 4
 					     [(match_dup 2) (const_int 0)])
@@ -9159,9 +9393,9 @@
 		(match_operand:SWI48 1 "nonimmediate_operand" "%0,rm"))
 	      (match_operand:SWI48 2 "x86_64_immediate_operand" "e,e")))
 	  (plus:<DWI>
-	    (match_operand:<DWI> 6 "const_scalar_int_operand")
 	    (match_operator:<DWI> 4 "ix86_carry_flag_operator"
-	      [(match_dup 3) (const_int 0)]))))
+	      [(match_dup 3) (const_int 0)])
+	    (match_operand:<DWI> 6 "const_scalar_int_operand"))))
    (set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
 	(plus:SWI48 (plus:SWI48 (match_op_dup 5
 				 [(match_dup 3) (const_int 0)])
@@ -9563,6 +9797,52 @@
 					       [(match_dup 3) (const_int 0)]))
 				(match_dup 0)))])])
 
+;; Under APX NDD, 'sbb reg, mem, reg' is valid.
+;;
+;; New format for
+;; mov reg0, mem1
+;; sbb reg0, mem2, reg0
+;; mov mem2, reg0
+;; to
+;; mov reg0, mem1
+;; sbb mem2, reg0
+(define_peephole2
+  [(set (match_operand:SWI48 0 "general_reg_operand")
+	(match_operand:SWI48 1 "memory_operand"))
+   (parallel [(set (reg:CCC FLAGS_REG)
+		   (compare:CCC
+		     (zero_extend:<DWI> (match_operand:SWI48 2 "memory_operand"))
+		     (plus:<DWI>
+		       (match_operator:<DWI> 4 "ix86_carry_flag_operator"
+			 [(match_operand 3 "flags_reg_operand") (const_int 0)])
+		       (zero_extend:<DWI>
+			 (match_dup 0)))))
+	      (set (match_dup 0)
+		   (minus:SWI48
+		     (minus:SWI48
+		       (match_dup 2)
+		       (match_operator:SWI48 5 "ix86_carry_flag_operator"
+			 [(match_dup 3) (const_int 0)]))
+		     (match_dup 0)))])
+   (set (match_dup 2) (match_dup 0))]
+  "TARGET_APX_NDD
+   && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+   && peep2_reg_dead_p (3, operands[0])
+   && !reg_overlap_mentioned_p (operands[0], operands[1])
+   && !reg_overlap_mentioned_p (operands[0], operands[2])"
+  [(set (match_dup 0) (match_dup 1))
+   (parallel [(set (reg:CCC FLAGS_REG)
+		   (compare:CCC
+		     (zero_extend:<DWI> (match_dup 2))
+		     (plus:<DWI> (match_op_dup 4
+				   [(match_dup 3) (const_int 0)])
+				 (zero_extend:<DWI> (match_dup 0)))))
+	      (set (match_dup 2)
+		   (minus:SWI48 (minus:SWI48 (match_dup 2)
+					     (match_op_dup 5
+					       [(match_dup 3) (const_int 0)]))
+				(match_dup 0)))])])
+
 (define_peephole2
   [(set (match_operand:SWI48 6 "general_reg_operand")
 	(match_operand:SWI48 7 "memory_operand"))
@@ -9685,6 +9965,53 @@
 	   (minus:SWI48 (match_dup 1) (match_dup 2)))])]
   "ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)")
 
+(define_insn "*subborrow<mode>_1"
+  [(set (reg:CCC FLAGS_REG)
+	(compare:CCC
+	  (zero_extend:<DWI>
+	    (match_operand:SWI48 1 "nonimmediate_operand" "0,rm"))
+	  (plus:<DWI>
+	    (match_operator:<DWI> 4 "ix86_carry_flag_operator"
+	      [(match_operand 3 "flags_reg_operand") (const_int 0)])
+	    (match_operand:<DWI> 6 "const_scalar_int_operand"))))
+   (set (match_operand:SWI48 0 "nonimmediate_operand" "=rm,r")
+	(plus:SWI48 (minus:SWI48
+		      (match_dup 1)
+		      (match_operator:SWI48 5 "ix86_carry_flag_operator"
+			[(match_dup 3) (const_int 0)]))
+		    (match_operand:SWI48 2 "x86_64_immediate_operand" "e,e")))]
+  "ix86_binary_operator_ok (MINUS, <MODE>mode, operands, TARGET_APX_NDD)
+   && CONST_INT_P (operands[2])
+   /* Check that operands[6] is -operands[2] zero extended from
+      <MODE>mode to <DWI>mode.  */
+   && ((<MODE>mode == SImode || -INTVAL (operands[2]) >= 0)
+       ? (CONST_INT_P (operands[6])
+	  && (UINTVAL (operands[6])
+	      == ((unsigned HOST_WIDE_INT) -INTVAL (operands[2])
+		  & GET_MODE_MASK (<MODE>mode))))
+       : (CONST_WIDE_INT_P (operands[6])
+	  && CONST_WIDE_INT_NUNITS (operands[6]) == 2
+	  && ((unsigned HOST_WIDE_INT) CONST_WIDE_INT_ELT (operands[6], 0)
+	      == (unsigned HOST_WIDE_INT) -INTVAL (operands[2]))
+	  && CONST_WIDE_INT_ELT (operands[6], 1) == 0))"
+{
+  bool use_ndd = get_attr_isa (insn) == ISA_APX_NDD;
+
+  operands[2] = GEN_INT (-INTVAL (operands[2]));
+
+  return use_ndd ? "sbb{<imodesuffix>}\t{%2, %1, %0|%0, %1, %2}"
+		 : "sbb{<imodesuffix>}\t{%2, %0|%0, %2}";
+}
+  [(set_attr "isa" "*,apx_ndd")
+   (set_attr "type" "alu")
+   (set_attr "use_carry" "1")
+   (set_attr "pent_pair" "pu")
+   (set_attr "mode" "<MODE>")
+   (set (attr "length_immediate")
+     (if_then_else (match_test "IN_RANGE (-INTVAL (operands[2]), -128, 127)")
+       (const_string "1")
+       (const_string "4")))])
+
 (define_expand "uaddc<mode>5"
   [(match_operand:SWI48 0 "register_operand")
    (match_operand:SWI48 1 "register_operand")
@@ -9977,8 +10304,8 @@
 			   (match_dup 4))
 			 (match_dup 5)))
 		     (plus:<DWI>
-		       (match_dup 6)
-		       (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0)))))
+		       (ltu:<DWI> (reg:CC FLAGS_REG) (const_int 0))
+		       (match_dup 6))))
 	      (set (match_dup 3)
 		   (plus:DWIH
 		     (plus:DWIH (ltu:DWIH (reg:CC FLAGS_REG) (const_int 0))
@@ -12292,7 +12619,9 @@
 	  (and:SWI248 (match_operand:SWI248 0 "memory_operand")
 		      (match_operand 1 "const_int_operand"))
 	  (const_int 0)))]
-  "!TARGET_PARTIAL_MEMORY_READ_STALL && !MEM_VOLATILE_P (operands[0])"
+  "!TARGET_PARTIAL_MEMORY_READ_STALL
+   && !MEM_VOLATILE_P (operands[0])
+   && offsettable_memref_p (operands[0])"
   [(set (reg:CCZ FLAGS_REG)
 	(compare:CCZ (match_dup 2) (const_int 0)))]
 {
@@ -14440,6 +14769,17 @@
 		   (compare:CCZ (neg:SWI (match_dup 0)) (const_int 0)))
 	      (set (match_dup 0) (neg:SWI (match_dup 0)))])])
 
+;; Optimize *negsi_1 followed by *cmpsi_ccno_1 (PR target/91384) with APX_F
+(define_peephole2
+  [(parallel [(set (match_operand:SWI 0 "general_reg_operand")
+		   (neg:SWI (match_operand:SWI 1 "general_reg_operand")))
+	      (clobber (reg:CC FLAGS_REG))])
+   (set (reg:CCZ FLAGS_REG) (compare:CCZ (match_dup 1) (const_int 0)))]
+  "TARGET_APX_NDD"
+  [(parallel [(set (reg:CCZ FLAGS_REG)
+		   (compare:CCZ (neg:SWI (match_dup 1)) (const_int 0)))
+	      (set (match_dup 0) (neg:SWI (match_dup 1)))])])
+
 ;; Special expand pattern to handle integer mode abs
 
 (define_expand "abs<mode>2"
@@ -15477,7 +15817,7 @@
 	      (clobber (reg:CC FLAGS_REG))])])
 
 (define_insn_and_split "*x86_64_shld_ndd_2"
-  [(set (match_operand:DI 0 "nonimmediate_operand")
+  [(set (match_operand:DI 0 "register_operand")
 	(ior:DI (ashift:DI (match_operand:DI 1 "nonimmediate_operand")
 			   (match_operand:QI 3 "nonmemory_operand"))
 		(lshiftrt:DI (match_operand:DI 2 "register_operand")
@@ -15487,7 +15827,7 @@
    && ix86_pre_reload_split ()"
   "#"
   "&& 1"
-  [(parallel [(set (match_dup 4)
+  [(parallel [(set (match_dup 0)
 		   (ior:DI (ashift:DI (match_dup 1)
 				      (and:QI (match_dup 3) (const_int 63)))
 			   (subreg:DI
@@ -15496,12 +15836,7 @@
 				 (minus:QI (const_int 64)
 					   (and:QI (match_dup 3)
 						   (const_int 63)))) 0)))
-	      (clobber (reg:CC FLAGS_REG))
-	      (set (match_dup 0) (match_dup 4))])]
-{
-  operands[4] = gen_reg_rtx (DImode);
-  emit_move_insn (operands[4], operands[0]);
-})
+	      (clobber (reg:CC FLAGS_REG))])])
 
 (define_insn "x86_shld<nf_name>"
   [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
@@ -15713,7 +16048,7 @@
 	      (clobber (reg:CC FLAGS_REG))])])
 
 (define_insn_and_split "*x86_shld_ndd_2"
-  [(set (match_operand:SI 0 "nonimmediate_operand")
+  [(set (match_operand:SI 0 "register_operand")
 	(ior:SI (ashift:SI (match_operand:SI 1 "nonimmediate_operand")
 			   (match_operand:QI 3 "nonmemory_operand"))
 		(lshiftrt:SI (match_operand:SI 2 "register_operand")
@@ -15723,7 +16058,7 @@
    && ix86_pre_reload_split ()"
   "#"
   "&& 1"
-  [(parallel [(set (match_dup 4)
+  [(parallel [(set (match_dup 0)
 		   (ior:SI (ashift:SI (match_dup 1)
 				      (and:QI (match_dup 3) (const_int 31)))
 			   (subreg:SI
@@ -15732,12 +16067,7 @@
 				 (minus:QI (const_int 32)
 					   (and:QI (match_dup 3)
 						   (const_int 31)))) 0)))
-	      (clobber (reg:CC FLAGS_REG))
-	      (set (match_dup 0) (match_dup 4))])]
-{
-  operands[4] = gen_reg_rtx (SImode);
-  emit_move_insn (operands[4], operands[0]);
-})
+	      (clobber (reg:CC FLAGS_REG))])])
 
 (define_expand "@x86_shift<mode>_adj_1"
   [(set (reg:CCZ FLAGS_REG)
@@ -15784,55 +16114,6 @@
   DONE;
 })
 
-;; Avoid useless masking of count operand.
-(define_insn_and_split "*ashl<mode>3_mask"
-  [(set (match_operand:SWI48 0 "nonimmediate_operand")
-	(ashift:SWI48
-	  (match_operand:SWI48 1 "nonimmediate_operand")
-	  (subreg:QI
-	    (and
-	      (match_operand 2 "int248_register_operand" "c,r")
-	      (match_operand 3 "const_int_operand")) 0)))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)
-   && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
-      == GET_MODE_BITSIZE (<MODE>mode)-1
-   && ix86_pre_reload_split ()"
-  "#"
-  "&& 1"
-  [(parallel
-     [(set (match_dup 0)
-	   (ashift:SWI48 (match_dup 1)
-			 (match_dup 2)))
-      (clobber (reg:CC FLAGS_REG))])]
-{
-  operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
-  operands[2] = gen_lowpart (QImode, operands[2]);
-}
-  [(set_attr "isa" "*,bmi2")])
-
-(define_insn_and_split "*ashl<mode>3_mask_1"
-  [(set (match_operand:SWI48 0 "nonimmediate_operand")
-	(ashift:SWI48
-	  (match_operand:SWI48 1 "nonimmediate_operand")
-	  (and:QI
-	    (match_operand:QI 2 "register_operand" "c,r")
-	    (match_operand:QI 3 "const_int_operand"))))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)
-   && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
-      == GET_MODE_BITSIZE (<MODE>mode)-1
-   && ix86_pre_reload_split ()"
-  "#"
-  "&& 1"
-  [(parallel
-     [(set (match_dup 0)
-	   (ashift:SWI48 (match_dup 1)
-			 (match_dup 2)))
-      (clobber (reg:CC FLAGS_REG))])]
-  ""
-  [(set_attr "isa" "*,bmi2")])
-
 (define_insn "*bmi2_ashl<mode>3_1"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
 	(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
@@ -16510,55 +16791,6 @@
   DONE;
 })
 
-;; Avoid useless masking of count operand.
-(define_insn_and_split "*<insn><mode>3_mask"
-  [(set (match_operand:SWI48 0 "nonimmediate_operand")
-	(any_shiftrt:SWI48
-	  (match_operand:SWI48 1 "nonimmediate_operand")
-	  (subreg:QI
-	    (and
-	      (match_operand 2 "int248_register_operand" "c,r")
-	      (match_operand 3 "const_int_operand")) 0)))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
-   && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
-      == GET_MODE_BITSIZE (<MODE>mode)-1
-   && ix86_pre_reload_split ()"
-  "#"
-  "&& 1"
-  [(parallel
-     [(set (match_dup 0)
-	   (any_shiftrt:SWI48 (match_dup 1)
-			      (match_dup 2)))
-      (clobber (reg:CC FLAGS_REG))])]
-{
-  operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
-  operands[2] = gen_lowpart (QImode, operands[2]);
-}
-  [(set_attr "isa" "*,bmi2")])
-
-(define_insn_and_split "*<insn><mode>3_mask_1"
-  [(set (match_operand:SWI48 0 "nonimmediate_operand")
-	(any_shiftrt:SWI48
-	  (match_operand:SWI48 1 "nonimmediate_operand")
-	  (and:QI
-	    (match_operand:QI 2 "register_operand" "c,r")
-	    (match_operand:QI 3 "const_int_operand"))))
-   (clobber (reg:CC FLAGS_REG))]
-  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
-   && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
-      == GET_MODE_BITSIZE (<MODE>mode)-1
-   && ix86_pre_reload_split ()"
-  "#"
-  "&& 1"
-  [(parallel
-     [(set (match_dup 0)
-	   (any_shiftrt:SWI48 (match_dup 1)
-			      (match_dup 2)))
-      (clobber (reg:CC FLAGS_REG))])]
-  ""
-  [(set_attr "isa" "*,bmi2")])
-
 (define_insn_and_split "*<insn><dwi>3_doubleword_mask"
   [(set (match_operand:<DWI> 0 "register_operand")
 	(any_shiftrt:<DWI>
@@ -16970,7 +17202,7 @@
 	      (clobber (reg:CC FLAGS_REG))])])
 
 (define_insn_and_split "*x86_64_shrd_ndd_2"
-  [(set (match_operand:DI 0 "nonimmediate_operand")
+  [(set (match_operand:DI 0 "register_operand")
 	(ior:DI (lshiftrt:DI (match_operand:DI 1 "nonimmediate_operand")
 			     (match_operand:QI 3 "nonmemory_operand"))
 		(ashift:DI (match_operand:DI 2 "register_operand")
@@ -16980,7 +17212,7 @@
   && ix86_pre_reload_split ()"
   "#"
   "&& 1"
-  [(parallel [(set (match_dup 4)
+  [(parallel [(set (match_dup 0)
 		   (ior:DI (lshiftrt:DI (match_dup 1)
 					(and:QI (match_dup 3) (const_int 63)))
 			   (subreg:DI
@@ -16989,12 +17221,7 @@
 				 (minus:QI (const_int 64)
 					   (and:QI (match_dup 3)
 						   (const_int 63)))) 0)))
-	      (clobber (reg:CC FLAGS_REG))
-	      (set (match_dup 0) (match_dup 4))])]
-{
-  operands[4] = gen_reg_rtx (DImode);
-  emit_move_insn (operands[4], operands[0]);
-})
+	      (clobber (reg:CC FLAGS_REG))])])
 
 (define_insn "x86_shrd<nf_name>"
   [(set (match_operand:SI 0 "nonimmediate_operand" "+r*m")
@@ -17205,7 +17432,7 @@
 	      (clobber (reg:CC FLAGS_REG))])])
 
 (define_insn_and_split "*x86_shrd_ndd_2"
-  [(set (match_operand:SI 0 "nonimmediate_operand")
+  [(set (match_operand:SI 0 "register_operand")
 	(ior:SI (lshiftrt:SI (match_operand:SI 1 "nonimmediate_operand")
 			   (match_operand:QI 3 "nonmemory_operand"))
 		(ashift:SI (match_operand:SI 2 "register_operand")
@@ -17215,7 +17442,7 @@
    && ix86_pre_reload_split ()"
   "#"
   "&& 1"
-  [(parallel [(set (match_dup 4)
+  [(parallel [(set (match_dup 0)
 		   (ior:SI (lshiftrt:SI (match_dup 1)
 				        (and:QI (match_dup 3) (const_int 31)))
 			   (subreg:SI
@@ -17224,12 +17451,7 @@
 				 (minus:QI (const_int 32)
 					   (and:QI (match_dup 3)
 						   (const_int 31)))) 0)))
-	      (clobber (reg:CC FLAGS_REG))
-	      (set (match_dup 0) (match_dup 4))])]
-{
-  operands[4] = gen_reg_rtx (SImode);
-  emit_move_insn (operands[4], operands[0]);
-})
+	      (clobber (reg:CC FLAGS_REG))])])
 
 ;; Base name for insn mnemonic.
 (define_mode_attr cvt_mnemonic
@@ -17826,6 +18048,157 @@
        (const_string "*")))
    (set_attr "mode" "QI")])
 
+;; Avoid useless masking of count operand.
+(define_insn_and_split "*<insn><mode>3_mask"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand")
+	(any_shift:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand")
+	  (subreg:QI
+	    (and
+	      (match_operand 2 "int248_register_operand" "c,r")
+	      (match_operand 3 "const_int_operand")) 0)))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+   && (INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1))
+      == <MODE_SIZE> * BITS_PER_UNIT - 1
+   && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(parallel
+     [(set (match_dup 0)
+	   (any_shift:SWI48 (match_dup 1)
+			    (match_dup 2)))
+      (clobber (reg:CC FLAGS_REG))])]
+{
+  operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
+  operands[2] = gen_lowpart (QImode, operands[2]);
+}
+  [(set_attr "isa" "*,bmi2")])
+
+(define_insn_and_split "*<insn><mode>3_mask_1"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand")
+	(any_shift:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand")
+	  (and:QI
+	    (match_operand:QI 2 "register_operand" "c,r")
+	    (match_operand:QI 3 "const_int_operand"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+   && (INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1))
+      == <MODE_SIZE> * BITS_PER_UNIT - 1
+   && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(parallel
+     [(set (match_dup 0)
+	   (any_shift:SWI48 (match_dup 1)
+			    (match_dup 2)))
+      (clobber (reg:CC FLAGS_REG))])]
+  ""
+  [(set_attr "isa" "*,bmi2")])
+
+(define_insn_and_split "*<insn><mode>3_add"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand")
+	(any_shift:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand")
+	  (subreg:QI
+	    (plus
+	      (match_operand 2 "int248_register_operand" "c,r")
+	      (match_operand 3 "const_int_operand")) 0)))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+   && (INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0
+   && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(parallel
+     [(set (match_dup 0)
+	   (any_shift:SWI48 (match_dup 1)
+			    (match_dup 2)))
+      (clobber (reg:CC FLAGS_REG))])]
+{
+  operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
+  operands[2] = gen_lowpart (QImode, operands[2]);
+}
+  [(set_attr "isa" "*,bmi2")])
+
+(define_insn_and_split "*<insn><mode>3_add_1"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand")
+	(any_shift:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand")
+	  (plus:QI
+	    (match_operand:QI 2 "register_operand" "c,r")
+	    (match_operand:QI 3 "const_int_operand"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+   && (INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0
+   && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(parallel
+     [(set (match_dup 0)
+	   (any_shift:SWI48 (match_dup 1)
+			    (match_dup 2)))
+      (clobber (reg:CC FLAGS_REG))])]
+  ""
+  [(set_attr "isa" "*,bmi2")])
+
+(define_insn_and_split "*<insn><mode>3_sub"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand")
+	(any_shift:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand")
+	  (subreg:QI
+	    (minus
+	      (match_operand 3 "const_int_operand")
+	      (match_operand 2 "int248_register_operand" "c,r")) 0)))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+   && (INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0
+   && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(parallel
+     [(set (match_dup 4)
+	   (neg:QI (match_dup 2)))
+      (clobber (reg:CC FLAGS_REG))])
+   (parallel
+     [(set (match_dup 0)
+	   (any_shift:SWI48 (match_dup 1)
+			    (match_dup 4)))
+      (clobber (reg:CC FLAGS_REG))])]
+{
+  operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
+  operands[2] = gen_lowpart (QImode, operands[2]);
+
+  operands[4] = gen_reg_rtx (QImode);
+}
+  [(set_attr "isa" "*,bmi2")])
+
+(define_insn_and_split "*<insn><mode>3_sub_1"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand")
+	(any_shift:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand")
+	  (minus:QI
+	    (match_operand:QI 3 "const_int_operand")
+	    (match_operand:QI 2 "register_operand" "c,r"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+   && (INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0
+   && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(parallel
+     [(set (match_dup 4)
+	   (neg:QI (match_dup 2)))
+      (clobber (reg:CC FLAGS_REG))])
+   (parallel
+     [(set (match_dup 0)
+	   (any_shift:SWI48 (match_dup 1)
+			    (match_dup 4)))
+      (clobber (reg:CC FLAGS_REG))])]
+  "operands[4] = gen_reg_rtx (QImode);"
+  [(set_attr "isa" "*,bmi2")])
+
 (define_insn_and_split "*extend<dwi>2_doubleword_highpart"
   [(set (match_operand:<DWI> 0 "register_operand" "=r")
 	(ashiftrt:<DWI>
@@ -17999,8 +18372,15 @@
 			   (match_dup 2)))
       (clobber (reg:CC FLAGS_REG))])]
 {
-  operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
-  operands[2] = gen_lowpart (QImode, operands[2]);
+  operands[2] = force_lowpart_subreg (QImode, operands[2],
+				      GET_MODE (operands[2]));
+  if (TARGET_APX_NF)
+    {
+      emit_move_insn (operands[0],
+		      gen_rtx_<CODE> (<MODE>mode, operands[1],
+				      operands[2]));
+      DONE;
+    }
 })
 
 (define_split
@@ -18016,7 +18396,8 @@
  [(set (match_dup 4) (match_dup 1))
   (set (match_dup 0)
        (any_rotate:SWI (match_dup 4)
-		       (subreg:QI (match_dup 2) 0)))]
+		       (subreg:QI
+			 (and:SI (match_dup 2) (match_dup 3)) 0)))]
  "operands[4] = gen_reg_rtx (<MODE>mode);")
 
 (define_insn_and_split "*<insn><mode>3_mask_1"
@@ -18037,7 +18418,16 @@
      [(set (match_dup 0)
 	   (any_rotate:SWI (match_dup 1)
 			   (match_dup 2)))
-      (clobber (reg:CC FLAGS_REG))])])
+      (clobber (reg:CC FLAGS_REG))])]
+{
+  if (TARGET_APX_NF)
+    {
+      emit_move_insn (operands[0],
+		      gen_rtx_<CODE> (<MODE>mode, operands[1],
+				      operands[2]));
+      DONE;
+    }
+})
 
 (define_split
   [(set (match_operand:SWI 0 "register_operand")
@@ -18050,9 +18440,148 @@
   == GET_MODE_BITSIZE (<MODE>mode) - 1"
  [(set (match_dup 4) (match_dup 1))
   (set (match_dup 0)
+       (any_rotate:SWI (match_dup 4)
+		       (and:QI (match_dup 2) (match_dup 3))))]
+ "operands[4] = gen_reg_rtx (<MODE>mode);")
+
+(define_insn_and_split "*<insn><mode>3_add"
+  [(set (match_operand:SWI 0 "nonimmediate_operand")
+	(any_rotate:SWI
+	  (match_operand:SWI 1 "nonimmediate_operand")
+	  (subreg:QI
+	    (plus
+	      (match_operand 2 "int_nonimmediate_operand")
+	      (match_operand 3 "const_int_operand")) 0)))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+   && (INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0
+   && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(parallel
+     [(set (match_dup 0)
+	   (any_rotate:SWI (match_dup 1) (match_dup 2)))
+      (clobber (reg:CC FLAGS_REG))])]
+{
+  operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
+  operands[2] = gen_lowpart (QImode, operands[2]);
+})
+
+(define_split
+  [(set (match_operand:SWI 0 "register_operand")
+	(any_rotate:SWI
+	  (match_operand:SWI 1 "const_int_operand")
+	  (subreg:QI
+	    (plus
+	      (match_operand 2 "int248_register_operand")
+	      (match_operand 3 "const_int_operand")) 0)))]
+ "(INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0"
+ [(set (match_dup 4) (match_dup 1))
+  (set (match_dup 0)
+       (any_rotate:SWI (match_dup 4) (subreg:QI (match_dup 2) 0)))]
+ "operands[4] = gen_reg_rtx (<MODE>mode);")
+
+(define_insn_and_split "*<insn><mode>3_add_1"
+  [(set (match_operand:SWI 0 "nonimmediate_operand")
+	(any_rotate:SWI
+	  (match_operand:SWI 1 "nonimmediate_operand")
+	  (plus:QI
+	    (match_operand:QI 2 "nonimmediate_operand")
+	    (match_operand:QI 3 "const_int_operand"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+   && (INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0
+   && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(parallel
+     [(set (match_dup 0)
+	   (any_rotate:SWI (match_dup 1) (match_dup 2)))
+      (clobber (reg:CC FLAGS_REG))])]
+ "operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);")
+
+(define_split
+  [(set (match_operand:SWI 0 "register_operand")
+	(any_rotate:SWI
+	  (match_operand:SWI 1 "const_int_operand")
+	  (plus:QI
+	    (match_operand:QI 2 "register_operand")
+	    (match_operand:QI 3 "const_int_operand"))))]
+ "(INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0"
+ [(set (match_dup 4) (match_dup 1))
+  (set (match_dup 0)
        (any_rotate:SWI (match_dup 4) (match_dup 2)))]
  "operands[4] = gen_reg_rtx (<MODE>mode);")
 
+(define_insn_and_split "*<insn><mode>3_sub"
+  [(set (match_operand:SWI 0 "nonimmediate_operand")
+	(any_rotate:SWI
+	  (match_operand:SWI 1 "nonimmediate_operand")
+	  (subreg:QI
+	    (minus
+	      (match_operand 3 "const_int_operand")
+	      (match_operand 2 "int_nonimmediate_operand")) 0)))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+   && (INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0
+   && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(parallel
+     [(set (match_dup 0)
+	   (<crotate>:SWI (match_dup 1) (match_dup 2)))
+      (clobber (reg:CC FLAGS_REG))])]
+{
+  operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);
+  operands[2] = gen_lowpart (QImode, operands[2]);
+})
+
+(define_split
+  [(set (match_operand:SWI 0 "register_operand")
+	(any_rotate:SWI
+	  (match_operand:SWI 1 "const_int_operand")
+	  (subreg:QI
+	    (minus
+	      (match_operand 3 "const_int_operand")
+	      (match_operand 2 "int248_register_operand")) 0)))]
+ "(INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0"
+ [(set (match_dup 4) (match_dup 1))
+  (set (match_dup 0)
+       (<crotate>:SWI (match_dup 4) (subreg:QI (match_dup 2) 0)))]
+ "operands[4] = gen_reg_rtx (<MODE>mode);")
+
+(define_insn_and_split "*<insn><mode>3_sub_1"
+  [(set (match_operand:SWI 0 "nonimmediate_operand")
+	(any_rotate:SWI
+	  (match_operand:SWI 1 "nonimmediate_operand")
+	  (minus:QI
+	    (match_operand:QI 3 "const_int_operand")
+	    (match_operand:QI 2 "nonimmediate_operand"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+   && (INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0
+   && ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(parallel
+     [(set (match_dup 0)
+	   (<crotate>:SWI (match_dup 1) (match_dup 2)))
+      (clobber (reg:CC FLAGS_REG))])]
+ "operands[2] = force_reg (GET_MODE (operands[2]), operands[2]);")
+
+(define_split
+  [(set (match_operand:SWI 0 "register_operand")
+	(any_rotate:SWI
+	  (match_operand:SWI 1 "const_int_operand")
+	  (minus:QI
+	    (match_operand:QI 3 "const_int_operand")
+	    (match_operand:QI 2 "register_operand"))))]
+ "(INTVAL (operands[3]) & (<MODE_SIZE> * BITS_PER_UNIT - 1)) == 0"
+ [(set (match_dup 4) (match_dup 1))
+  (set (match_dup 0)
+       (<crotate>:SWI (match_dup 4) (match_dup 2)))]
+ "operands[4] = gen_reg_rtx (<MODE>mode);")
+
 ;; Implement rotation using two double-precision
 ;; shift instructions and a scratch register.
 
@@ -18834,11 +19363,11 @@
 (define_insn "*bt<mode>"
   [(set (reg:CCC FLAGS_REG)
 	(compare:CCC
+	  (const_int 0)
 	  (zero_extract:SWI48
 	    (match_operand:SWI48 0 "nonimmediate_operand" "r,m")
 	    (const_int 1)
-	    (match_operand:QI 1 "nonmemory_operand" "q<S>,<S>"))
-	  (const_int 0)))]
+	    (match_operand:QI 1 "nonmemory_operand" "q<S>,<S>"))))]
   ""
 {
   switch (get_attr_mode (insn))
@@ -18865,14 +19394,14 @@
 (define_insn_and_split "*bt<SWI48:mode>_mask"
   [(set (reg:CCC FLAGS_REG)
         (compare:CCC
+	  (const_int 0)
           (zero_extract:SWI48
             (match_operand:SWI48 0 "nonimmediate_operand" "r,m")
             (const_int 1)
 	    (subreg:QI
 	      (and:SWI248
 		(match_operand:SWI248 1 "register_operand")
-		(match_operand 2 "const_int_operand")) 0))
-          (const_int 0)))]
+		(match_operand 2 "const_int_operand")) 0))))]
   "TARGET_USE_BT
    && (INTVAL (operands[2]) & (GET_MODE_BITSIZE (<SWI48:MODE>mode)-1))
       == GET_MODE_BITSIZE (<SWI48:MODE>mode)-1
@@ -18881,8 +19410,8 @@
   "&& 1"
   [(set (reg:CCC FLAGS_REG)
         (compare:CCC
-         (zero_extract:SWI48 (match_dup 0) (const_int 1) (match_dup 1))
-         (const_int 0)))]
+	 (const_int 0)
+	 (zero_extract:SWI48 (match_dup 0) (const_int 1) (match_dup 1))))]
   "operands[1] = gen_lowpart (QImode, operands[1]);")
 
 (define_insn_and_split "*jcc_bt<mode>"
@@ -18907,18 +19436,18 @@
   "&& 1"
   [(set (reg:CCC FLAGS_REG)
 	(compare:CCC
+	  (const_int 0)
 	  (zero_extract:SWI48
 	    (match_dup 1)
 	    (const_int 1)
-	    (match_dup 2))
-	  (const_int 0)))
+	    (match_dup 2))))
    (set (pc)
 	(if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
 		      (label_ref (match_dup 3))
 		      (pc)))]
 {
   operands[0] = shallow_copy_rtx (operands[0]);
-  PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
+  PUT_CODE (operands[0], GET_CODE (operands[0]) == NE ? LTU : GEU);
 })
 
 ;; Avoid useless masking of bit offset operand.
@@ -18943,18 +19472,18 @@
   "&& 1"
   [(set (reg:CCC FLAGS_REG)
 	(compare:CCC
+	  (const_int 0)
 	  (zero_extract:SWI48
 	    (match_dup 1)
 	    (const_int 1)
-	    (match_dup 2))
-	  (const_int 0)))
+	    (match_dup 2))))
    (set (pc)
 	(if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
 		      (label_ref (match_dup 4))
 		      (pc)))]
 {
   operands[0] = shallow_copy_rtx (operands[0]);
-  PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
+  PUT_CODE (operands[0], GET_CODE (operands[0]) == NE ? LTU : GEU);
 })
 
 ;; Avoid useless masking of bit offset operand.
@@ -18980,18 +19509,18 @@
   "&& 1"
   [(set (reg:CCC FLAGS_REG)
 	(compare:CCC
+	  (const_int 0)
 	  (zero_extract:SWI48
 	    (match_dup 1)
 	    (const_int 1)
-	    (match_dup 2))
-	  (const_int 0)))
+	    (match_dup 2))))
    (set (pc)
 	(if_then_else (match_op_dup 0 [(reg:CCC FLAGS_REG) (const_int 0)])
 		      (label_ref (match_dup 4))
 		      (pc)))]
 {
   operands[0] = shallow_copy_rtx (operands[0]);
-  PUT_CODE (operands[0], reverse_condition (GET_CODE (operands[0])));
+  PUT_CODE (operands[0], GET_CODE (operands[0]) == NE ? LTU : GEU);
   operands[2] = gen_lowpart (QImode, operands[2]);
 })
 
@@ -19012,10 +19541,10 @@
    && ix86_pre_reload_split ()"
   [(set (reg:CCC FLAGS_REG)
 	(compare:CCC
-	 (zero_extract:SWI48 (match_dup 1) (const_int 1) (match_dup 2))
-	 (const_int 0)))
+	 (const_int 0)
+	 (zero_extract:SWI48 (match_dup 1) (const_int 1) (match_dup 2))))
    (set (match_dup 0)
-	(if_then_else:SWI248 (eq (reg:CCC FLAGS_REG) (const_int 0))
+	(if_then_else:SWI248 (ltu (reg:CCC FLAGS_REG) (const_int 0))
 			     (match_dup 3)
 			     (match_dup 4)))]
 {
@@ -19036,10 +19565,10 @@
   "&& 1"
   [(set (reg:CCC FLAGS_REG)
         (compare:CCC
-         (zero_extract:SWI48 (match_dup 1) (const_int 1) (match_dup 2))
-         (const_int 0)))
+	 (const_int 0)
+	 (zero_extract:SWI48 (match_dup 1) (const_int 1) (match_dup 2))))
    (set (match_dup 0)
-        (eq:QI (reg:CCC FLAGS_REG) (const_int 0)))])
+	(ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))])
 
 ;; Help combine recognize bt followed by setnc
 (define_insn_and_split "*bt<mode>_setncqi"
@@ -19056,10 +19585,10 @@
   "&& 1"
   [(set (reg:CCC FLAGS_REG)
         (compare:CCC
-         (zero_extract:SWI48 (match_dup 1) (const_int 1) (match_dup 2))
-         (const_int 0)))
+	 (const_int 0)
+	 (zero_extract:SWI48 (match_dup 1) (const_int 1) (match_dup 2))))
    (set (match_dup 0)
-        (ne:QI (reg:CCC FLAGS_REG) (const_int 0)))])
+	(geu:QI (reg:CCC FLAGS_REG) (const_int 0)))])
 
 (define_insn_and_split "*bt<mode>_setnc<mode>"
   [(set (match_operand:SWI48 0 "register_operand")
@@ -19074,10 +19603,10 @@
   "&& 1"
   [(set (reg:CCC FLAGS_REG)
         (compare:CCC
-         (zero_extract:SWI48 (match_dup 1) (const_int 1) (match_dup 2))
-         (const_int 0)))
+	 (const_int 0)
+	 (zero_extract:SWI48 (match_dup 1) (const_int 1) (match_dup 2))))
    (set (match_dup 3)
-        (ne:QI (reg:CCC FLAGS_REG) (const_int 0)))
+	(geu:QI (reg:CCC FLAGS_REG) (const_int 0)))
    (set (match_dup 0) (zero_extend:SWI48 (match_dup 3)))]
   "operands[3] = gen_reg_rtx (QImode);")
 
@@ -19096,10 +19625,10 @@
   "&& 1"
   [(set (reg:CCC FLAGS_REG)
         (compare:CCC
-         (zero_extract:SWI48 (match_dup 1) (const_int 1) (match_dup 2))
-         (const_int 0)))
+	  (const_int 0)
+	  (zero_extract:SWI48 (match_dup 1) (const_int 1) (match_dup 2))))
    (set (match_dup 0)
-        (ne:QI (reg:CCC FLAGS_REG) (const_int 0)))])
+	(geu:QI (reg:CCC FLAGS_REG) (const_int 0)))])
 
 ;; Help combine recognize bt followed by setc
 (define_insn_and_split "*bt<mode>_setc<mode>_mask"
@@ -19120,10 +19649,10 @@
   "&& 1"
   [(set (reg:CCC FLAGS_REG)
         (compare:CCC
-         (zero_extract:SWI48 (match_dup 1) (const_int 1) (match_dup 2))
-         (const_int 0)))
+	  (const_int 0)
+	  (zero_extract:SWI48 (match_dup 1) (const_int 1) (match_dup 2))))
    (set (match_dup 3)
-        (eq:QI (reg:CCC FLAGS_REG) (const_int 0)))
+	(ltu:QI (reg:CCC FLAGS_REG) (const_int 0)))
    (set (match_dup 0) (zero_extend:SWI48 (match_dup 3)))]
 {
   operands[2] = gen_lowpart (QImode, operands[2]);
@@ -19251,6 +19780,27 @@
   [(set_attr "type" "setcc")
    (set_attr "mode" "QI")])
 
+(define_expand "setcc_si_slp"
+  [(set (match_operand:SI 0 "register_operand")
+	(unspec:SI
+	  [(match_operand:QI 1)
+	   (match_operand:SI 2 "register_operand")] UNSPEC_SETCC_SI_SLP))])
+
+(define_insn_and_split "*setcc_si_slp"
+  [(set (match_operand:SI 0 "register_operand" "=q")
+	(unspec:SI
+	  [(match_operator:QI 1 "ix86_comparison_operator"
+	     [(reg FLAGS_REG) (const_int 0)])
+	   (match_operand:SI 2 "register_operand" "0")] UNSPEC_SETCC_SI_SLP))]
+  "ix86_pre_reload_split ()"
+  "#"
+  "&& 1"
+  [(set (match_dup 0) (match_dup 2))
+   (set (strict_low_part (match_dup 3)) (match_dup 1))]
+{
+  operands[3] = gen_lowpart (QImode, operands[0]);
+})
+
 ;; In general it is not safe to assume too much about CCmode registers,
 ;; so simplify-rtx stops when it sees a second one.  Under certain
 ;; conditions this is safe on x86, so help combine not create
@@ -19563,7 +20113,7 @@
 
       /* We can't use @GOTOFF for text labels on VxWorks;
 	 see gotoff_operand.  */
-      if (TARGET_64BIT || TARGET_VXWORKS_RTP)
+      if (TARGET_64BIT || TARGET_VXWORKS_VAROFF)
 	{
 	  code = PLUS;
 	  op0 = operands[0];
@@ -19746,6 +20296,32 @@
   operands[8] = gen_lowpart (QImode, operands[4]);
   ix86_expand_clear (operands[4]);
 })
+
+(define_peephole2
+  [(set (match_operand 4 "flags_reg_operand") (match_operand 0))
+   (set (strict_low_part (match_operand:QI 5 "register_operand"))
+	(match_operator:QI 6 "ix86_comparison_operator"
+	  [(reg FLAGS_REG) (const_int 0)]))
+   (set (match_operand:QI 1 "register_operand")
+	(match_operator:QI 2 "ix86_comparison_operator"
+	  [(reg FLAGS_REG) (const_int 0)]))
+   (set (match_operand 3 "any_QIreg_operand")
+	(zero_extend (match_dup 1)))]
+  "(peep2_reg_dead_p (4, operands[1])
+    || operands_match_p (operands[1], operands[3]))
+   && ! reg_overlap_mentioned_p (operands[3], operands[0])
+   && ! reg_overlap_mentioned_p (operands[3], operands[5])
+   && ! reg_overlap_mentioned_p (operands[1], operands[5])
+   && peep2_regno_dead_p (0, FLAGS_REG)"
+  [(set (match_dup 4) (match_dup 0))
+   (set (strict_low_part (match_dup 5))
+	(match_dup 6))
+   (set (strict_low_part (match_dup 7))
+	(match_dup 2))]
+{
+  operands[7] = gen_lowpart (QImode, operands[3]);
+  ix86_expand_clear (operands[3]);
+})
 
 ;; Call instructions.
 
@@ -19796,7 +20372,7 @@
   [(call (mem:QI (zero_extend:DI
 		   (match_operand:SI 0 "GOT_memory_operand" "Bg")))
 	 (match_operand 1))]
-  "TARGET_X32"
+  "TARGET_X32 && !TARGET_INDIRECT_BRANCH_REGISTER"
 {
   rtx fnaddr = gen_const_mem (DImode, XEXP (operands[0], 0));
   return ix86_output_call_insn (insn, fnaddr);
@@ -19812,8 +20388,7 @@
 		     (match_operand:SI 1 "GOT32_symbol_operand"))))
 	 (match_operand 2))]
   "!TARGET_MACHO
-  && !TARGET_64BIT
-  && !TARGET_INDIRECT_BRANCH_REGISTER
+  && !TARGET_64BIT && !TARGET_INDIRECT_BRANCH_REGISTER
   && SIBLING_CALL_P (insn)"
 {
   rtx fnaddr = gen_rtx_PLUS (SImode, operands[0], operands[1]);
@@ -19842,8 +20417,7 @@
 	(match_operand:W 1 "memory_operand"))
    (call (mem:QI (match_dup 0))
 	 (match_operand 3))]
-  "!TARGET_X32
-   && !TARGET_INDIRECT_BRANCH_REGISTER
+  "!TARGET_X32 && !TARGET_INDIRECT_BRANCH_REGISTER
    && SIBLING_CALL_P (peep2_next_insn (1))
    && !reg_mentioned_p (operands[0],
 			CALL_INSN_FUNCTION_USAGE (peep2_next_insn (1)))"
@@ -19857,8 +20431,7 @@
    (unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)
    (call (mem:QI (match_dup 0))
 	 (match_operand 3))]
-  "!TARGET_X32
-   && !TARGET_INDIRECT_BRANCH_REGISTER
+  "!TARGET_X32 && !TARGET_INDIRECT_BRANCH_REGISTER
    && SIBLING_CALL_P (peep2_next_insn (2))
    && !reg_mentioned_p (operands[0],
 			CALL_INSN_FUNCTION_USAGE (peep2_next_insn (2)))"
@@ -19907,7 +20480,7 @@
 	(plus:SI (reg:SI SP_REG)
 		 (match_operand:SI 2 "immediate_operand" "i")))
    (unspec [(const_int 0)] UNSPEC_PEEPSIB)]
-  "!TARGET_64BIT"
+  "!TARGET_64BIT && !TARGET_INDIRECT_BRANCH_REGISTER"
   "* return ix86_output_call_insn (insn, operands[0]);"
   [(set_attr "type" "call")])
 
@@ -19955,8 +20528,7 @@
   [(set (match_operand:W 0 "register_operand")
         (match_operand:W 1 "memory_operand"))
    (set (pc) (match_dup 0))]
-  "!TARGET_X32
-   && !TARGET_INDIRECT_BRANCH_REGISTER
+  "!TARGET_X32 && !TARGET_INDIRECT_BRANCH_REGISTER
    && peep2_reg_dead_p (2, operands[0])"
   [(set (pc) (match_dup 1))])
 
@@ -20001,7 +20573,7 @@
 		(zero_extend:DI
 		  (match_operand:SI 1 "GOT_memory_operand" "Bg")))
 	      (match_operand 2)))]
-  "TARGET_X32"
+  "TARGET_X32 && !TARGET_INDIRECT_BRANCH_REGISTER"
 {
   rtx fnaddr = gen_const_mem (DImode, XEXP (operands[1], 0));
   return ix86_output_call_insn (insn, fnaddr);
@@ -20018,8 +20590,7 @@
 			  (match_operand:SI 2 "GOT32_symbol_operand"))))
 	 (match_operand 3)))]
   "!TARGET_MACHO
-   && !TARGET_64BIT
-   && !TARGET_INDIRECT_BRANCH_REGISTER
+   && !TARGET_64BIT && !TARGET_INDIRECT_BRANCH_REGISTER
    && SIBLING_CALL_P (insn)"
 {
   rtx fnaddr = gen_rtx_PLUS (SImode, operands[1], operands[2]);
@@ -20051,8 +20622,7 @@
    (set (match_operand 2)
    (call (mem:QI (match_dup 0))
 		 (match_operand 3)))]
-  "!TARGET_X32
-   && !TARGET_INDIRECT_BRANCH_REGISTER
+  "!TARGET_X32 && !TARGET_INDIRECT_BRANCH_REGISTER
    && SIBLING_CALL_P (peep2_next_insn (1))
    && !reg_mentioned_p (operands[0],
 			CALL_INSN_FUNCTION_USAGE (peep2_next_insn (1)))"
@@ -20068,8 +20638,7 @@
    (set (match_operand 2)
 	(call (mem:QI (match_dup 0))
 	      (match_operand 3)))]
-  "!TARGET_X32
-   && !TARGET_INDIRECT_BRANCH_REGISTER
+  "!TARGET_X32 && !TARGET_INDIRECT_BRANCH_REGISTER
    && SIBLING_CALL_P (peep2_next_insn (2))
    && !reg_mentioned_p (operands[0],
 			CALL_INSN_FUNCTION_USAGE (peep2_next_insn (2)))"
@@ -20123,7 +20692,7 @@
 	(plus:SI (reg:SI SP_REG)
 		 (match_operand:SI 3 "immediate_operand" "i")))
    (unspec [(const_int 0)] UNSPEC_PEEPSIB)]
-  "!TARGET_64BIT"
+  "!TARGET_64BIT && !TARGET_INDIRECT_BRANCH_REGISTER"
   "* return ix86_output_call_insn (insn, operands[1]);"
   [(set_attr "type" "callv")])
 
@@ -20412,7 +20981,7 @@
       (clobber (reg:CC FLAGS_REG))])]
   "!TARGET_64BIT"
 {
-  if (flag_pic && !TARGET_VXWORKS_RTP)
+  if (flag_pic && !TARGET_VXWORKS_GOTTPIC)
     ix86_pc_thunk_call_expanded = true;
 })
 
@@ -20433,7 +21002,7 @@
       (clobber (reg:CC FLAGS_REG))])]
   "!TARGET_64BIT"
 {
-  if (flag_pic && !TARGET_VXWORKS_RTP)
+  if (flag_pic && !TARGET_VXWORKS_GOTTPIC)
     ix86_pc_thunk_call_expanded = true;
 })
 
@@ -20663,7 +21232,8 @@
 	(ctz:SWI48 (match_dup 1)))]
   "TARGET_BMI"
   "tzcnt{<imodesuffix>}\t{%1, %0|%0, %1}";
-  "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
+  "&& (TARGET_AVOID_FALSE_DEP_FOR_BMI || TARGET_AVOID_FALSE_DEP_FOR_TZCNT)
+   && epilogue_completed
    && optimize_function_for_speed_p (cfun)
    && !reg_mentioned_p (operands[0], operands[1])"
   [(parallel
@@ -20730,7 +21300,8 @@
   return "bsf{<imodesuffix>}\t{%1, %0|%0, %1}";
 }
   "(TARGET_BMI || TARGET_CPU_P (GENERIC))
-   && TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
+   && (TARGET_AVOID_FALSE_DEP_FOR_BMI || TARGET_AVOID_FALSE_DEP_FOR_TZCNT)
+   && epilogue_completed
    && optimize_function_for_speed_p (cfun)
    && !reg_mentioned_p (operands[0], operands[1])"
   [(parallel
@@ -20785,7 +21356,8 @@
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_BMI && TARGET_64BIT"
   "tzcnt{l}\t{%1, %k0|%k0, %1}"
-  "&& TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
+  "&& (TARGET_AVOID_FALSE_DEP_FOR_BMI || TARGET_AVOID_FALSE_DEP_FOR_TZCNT)
+   && epilogue_completed
    && optimize_function_for_speed_p (cfun)
    && !reg_mentioned_p (operands[0], operands[1])"
   [(parallel
@@ -20836,7 +21408,8 @@
   return "bsf{l}\t{%1, %k0|%k0, %1}";
 }
   "(TARGET_BMI || TARGET_CPU_P (GENERIC))
-   && TARGET_AVOID_FALSE_DEP_FOR_BMI && epilogue_completed
+   && (TARGET_AVOID_FALSE_DEP_FOR_BMI || TARGET_AVOID_FALSE_DEP_FOR_TZCNT)
+   && epilogue_completed
    && optimize_function_for_speed_p (cfun)
    && !reg_mentioned_p (operands[0], operands[1])"
   [(parallel
@@ -20961,11 +21534,12 @@
    (set_attr "mode" "SI")])
 
 ; As bsr is undefined behavior on zero and for other input
-; values it is in range 0 to 63, we can optimize away sign-extends.
-(define_insn_and_split "*bsr_rex64_2"
+; values it is in range 0 to 63, we can optimize away sign-extends
+; or zero-extends.
+(define_insn_and_split "*bsr_rex64<u>_2"
   [(set (match_operand:DI 0 "register_operand")
 	(xor:DI
-	  (sign_extend:DI
+	  (any_extend:DI
 	    (minus:SI
 	      (const_int 63)
 	      (subreg:SI (clz:DI (match_operand:DI 1 "nonimmediate_operand"))
@@ -20987,9 +21561,9 @@
   operands[3] = lowpart_subreg (SImode, operands[2], DImode);
 })
 
-(define_insn_and_split "*bsr_2"
+(define_insn_and_split "*bsr<u>_2"
   [(set (match_operand:DI 0 "register_operand")
-	(sign_extend:DI
+	(any_extend:DI
 	  (xor:SI
 	    (minus:SI
 	      (const_int 31)
@@ -21066,7 +21640,7 @@
 	(minus:DI
 	  (match_operand:DI 2 "const_int_operand")
 	  (xor:DI
-	    (sign_extend:DI
+	    (any_extend:DI
 	      (minus:SI (const_int 63)
 			(subreg:SI
 			  (clz:DI (match_operand:DI 1 "nonimmediate_operand"))
@@ -21096,7 +21670,7 @@
   [(set (match_operand:DI 0 "register_operand")
 	(minus:DI
 	  (match_operand:DI 2 "const_int_operand")
-	  (sign_extend:DI
+	  (any_extend:DI
 	    (xor:SI
 	      (minus:SI (const_int 31)
 			(clz:SI (match_operand:SI 1 "nonimmediate_operand")))
@@ -21417,7 +21991,7 @@
    (set_attr "btver2_decode" "direct, double")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*bmi_blsi_<mode>"
+(define_insn_and_split "*bmi_blsi_<mode>"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
         (and:SWI48
           (neg:SWI48
@@ -21426,6 +22000,20 @@
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_BMI"
   "blsi\t{%1, %0|%0, %1}"
+  "&& TARGET_AVOID_FALSE_DEP_FOR_BLS
+   && epilogue_completed
+   && optimize_function_for_speed_p (cfun)
+   && !reg_mentioned_p (operands[0], operands[1])"
+  [(parallel
+    [(set (reg:CCNO FLAGS_REG)
+      (compare:CCNO
+        (and:SWI48
+          (neg:SWI48 (match_dup 1))
+          (match_dup 1))
+        (const_int 0)))
+     (set (match_dup 0) (and:SWI48 (neg:SWI48 (match_dup 1)) (match_dup 1)))
+     (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])]
+  "ix86_expand_clear (operands[0]);"
   [(set_attr "type" "bitmanip")
    (set_attr "btver2_decode" "double")
    (set_attr "mode" "<MODE>")])
@@ -21445,6 +22033,51 @@
    (set_attr "btver2_decode" "double")
    (set_attr "mode" "<MODE>")])
 
+(define_split
+  [(set (match_operand 3 "flags_reg_operand")
+	(match_operator 4 "compare_operator"
+	  [(and:SWI48
+	    (neg:SWI48 (match_operand:SWI48 1 "nonimmediate_operand"))
+	    (match_dup 1))
+	   (const_int 0)]))
+   (set (match_operand:SWI48 0 "register_operand")
+	(and:SWI48 (neg:SWI48 (match_dup 1)) (match_dup 1)))]
+  "TARGET_BMI
+   && TARGET_AVOID_FALSE_DEP_FOR_BLS
+   && epilogue_completed
+   && optimize_function_for_speed_p (cfun)
+   && !reg_mentioned_p (operands[0], operands[1])"
+  [(parallel
+    [(set (match_dup 3)
+      (match_op_dup 4
+        [(and:SWI48
+           (neg:SWI48 (match_dup 1))
+           (match_dup 1))
+         (const_int 0)]))
+     (set (match_dup 0) (and:SWI48 (neg:SWI48 (match_dup 1)) (match_dup 1)))
+     (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])]
+  "ix86_expand_clear (operands[0]);")
+
+; False dependency happens when destination is only updated by blsi.
+(define_insn "*bmi_blsi_<mode>_falsedep"
+  [(set (reg FLAGS_REG)
+	(compare
+	  (and:SWI48
+	    (neg:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm"))
+	    (match_dup 1))
+	  (const_int 0)))
+   (set (match_operand:SWI48 0 "register_operand" "=r")
+	(and:SWI48 (neg:SWI48 (match_dup 1)) (match_dup 1)))
+   (unspec [(match_operand:SWI48 2 "register_operand" "0")]
+	   UNSPEC_INSN_FALSE_DEP)]
+   "TARGET_BMI && ix86_match_ccmode (insn, CCNOmode)"
+   "blsi\t{%1, %0|%0, %1}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "btver2_decode" "double")
+   (set_attr "mode" "<MODE>")])
+
+;; No need for splitter of the false dependency, since the output is unused
+;; so it will not extend dependency chain.
 (define_insn "*bmi_blsi_<mode>_ccno"
   [(set (reg FLAGS_REG)
 	(compare
@@ -21459,7 +22092,7 @@
    (set_attr "btver2_decode" "double")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*bmi_blsmsk_<mode>"
+(define_insn_and_split "*bmi_blsmsk_<mode>"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
         (xor:SWI48
           (plus:SWI48
@@ -21469,11 +22102,40 @@
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_BMI"
   "blsmsk\t{%1, %0|%0, %1}"
+  "TARGET_AVOID_FALSE_DEP_FOR_BLS
+   && epilogue_completed
+   && optimize_function_for_speed_p (cfun)
+   && !reg_mentioned_p (operands[0], operands[1])"
+  [(parallel
+    [(set (match_dup 0)
+          (xor:SWI48
+            (plus:SWI48 (match_dup 1) (const_int -1))
+            (match_dup 1)))
+     (clobber (reg:CC FLAGS_REG))
+     (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])]
+  "ix86_expand_clear (operands[0]);"
   [(set_attr "type" "bitmanip")
    (set_attr "btver2_decode" "double")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*bmi_blsr_<mode>"
+; False dependency happens when destination is only updated by blmsk.
+(define_insn "*bmi_blsmsk_<mode>_falsedep"
+  [(set (match_operand:SWI48 0 "register_operand" "=r")
+        (xor:SWI48
+          (plus:SWI48
+            (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+            (const_int -1))
+          (match_dup 1)))
+   (clobber (reg:CC FLAGS_REG))
+   (unspec [(match_operand:SWI48 2 "register_operand" "0")]
+	   UNSPEC_INSN_FALSE_DEP)]
+  "TARGET_BMI"
+  "blsmsk\t{%1, %0|%0, %1}"
+  [(set_attr "type" "bitmanip")
+   (set_attr "btver2_decode" "double")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn_and_split "*bmi_blsr_<mode>"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
         (and:SWI48
           (plus:SWI48
@@ -21481,13 +22143,28 @@
             (const_int -1))
           (match_dup 1)))
    (clobber (reg:CC FLAGS_REG))]
-   "TARGET_BMI"
-   "blsr\t{%1, %0|%0, %1}"
+  "TARGET_BMI"
+  "blsr\t{%1, %0|%0, %1}"
+  "&& TARGET_AVOID_FALSE_DEP_FOR_BLS
+   && epilogue_completed
+   && optimize_function_for_speed_p (cfun)
+   && !reg_mentioned_p (operands[0], operands[1])"
+  [(parallel
+    [(set (reg:CCZ FLAGS_REG)
+      (compare:CCZ
+        (and:SWI48
+          (plus:SWI48 (match_dup 1) (const_int -1))
+          (match_dup 1))
+        (const_int 0)))
+     (set (match_dup 0) (and:SWI48 (plus:SWI48 (match_dup 1) (const_int -1))
+                                   (match_dup 1)))
+     (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])]
+  "ix86_expand_clear (operands[0]);"
   [(set_attr "type" "bitmanip")
    (set_attr "btver2_decode" "double")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*bmi_blsr_<mode>_cmp"
+(define_insn_and_split "*bmi_blsr_<mode>_cmp"
   [(set (reg:CCZ FLAGS_REG)
 	(compare:CCZ
 	  (and:SWI48
@@ -21502,12 +22179,53 @@
 	    (match_dup 1)
 	    (const_int -1))
 	  (match_dup 1)))]
+  "TARGET_BMI"
+  "blsr\t{%1, %0|%0, %1}"
+  "&& TARGET_AVOID_FALSE_DEP_FOR_BLS
+   && epilogue_completed
+   && optimize_function_for_speed_p (cfun)
+   && !reg_mentioned_p (operands[0], operands[1])"
+  [(parallel
+    [(set (reg:CCZ FLAGS_REG)
+      (compare:CCZ
+        (and:SWI48
+          (plus:SWI48 (match_dup 1) (const_int -1))
+          (match_dup 1))
+        (const_int 0)))
+     (set (match_dup 0) (and:SWI48 (plus:SWI48 (match_dup 1) (const_int -1))
+                                   (match_dup 1)))
+     (unspec [(match_dup 0)] UNSPEC_INSN_FALSE_DEP)])]
+  "ix86_expand_clear (operands[0]);"
+  [(set_attr "type" "bitmanip")
+   (set_attr "btver2_decode" "double")
+   (set_attr "mode" "<MODE>")])
+
+; False dependency happens when destination is only updated by bslr.
+(define_insn "*bmi_blsr_<mode>_cmp_falsedep"
+  [(set (reg:CCZ FLAGS_REG)
+	(compare:CCZ
+	  (and:SWI48
+	    (plus:SWI48
+	      (match_operand:SWI48 1 "nonimmediate_operand" "rm")
+	      (const_int -1))
+	    (match_dup 1))
+	  (const_int 0)))
+   (set (match_operand:SWI48 0 "register_operand" "=r")
+	(and:SWI48
+	  (plus:SWI48
+	    (match_dup 1)
+	    (const_int -1))
+	  (match_dup 1)))
+   (unspec [(match_operand:SWI48 2 "register_operand" "0")]
+	   UNSPEC_INSN_FALSE_DEP)]
    "TARGET_BMI"
    "blsr\t{%1, %0|%0, %1}"
   [(set_attr "type" "bitmanip")
    (set_attr "btver2_decode" "double")
    (set_attr "mode" "<MODE>")])
 
+;; No need for splitter of the false dependency, since the output is unused
+;; so it will not extend dependency chain.
 (define_insn "*bmi_blsr_<mode>_ccz"
   [(set (reg:CCZ FLAGS_REG)
 	(compare:CCZ
@@ -22494,7 +23212,8 @@
 	 (match_operand 3)))
    (unspec:P [(match_operand 1 "tls_symbolic_operand")
 	      (reg:P SP_REG)]
-	     UNSPEC_TLS_GD)]
+	     UNSPEC_TLS_GD)
+   (clobber (match_operand:P 4 "register_operand" "=D"))]
   "TARGET_64BIT"
 {
   if (!TARGET_X32)
@@ -22511,7 +23230,7 @@
        Use data16 prefix instead, which doesn't have this problem.  */
     fputs ("\tdata16", asm_out_file);
   output_asm_insn
-    ("lea{q}\t{%E1@tlsgd(%%rip), %%rdi|rdi, %E1@tlsgd[rip]}", operands);
+    ("lea{q}\t{%E1@tlsgd(%%rip), %q4|%q4, %E1@tlsgd[rip]}", operands);
   if (TARGET_SUN_TLS || flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT)
     fputs (ASM_SHORT "0x6666\n", asm_out_file);
   else
@@ -22535,14 +23254,15 @@
 	 (match_operand 4)))
    (unspec:DI [(match_operand 1 "tls_symbolic_operand")
 	       (reg:DI SP_REG)]
-	      UNSPEC_TLS_GD)]
+	      UNSPEC_TLS_GD)
+   (clobber (match_operand:DI 5 "register_operand" "=D"))]
   "TARGET_64BIT && ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF
    && GET_CODE (operands[3]) == CONST
    && GET_CODE (XEXP (operands[3], 0)) == UNSPEC
    && XINT (XEXP (operands[3], 0), 1) == UNSPEC_PLTOFF"
 {
   output_asm_insn
-    ("lea{q}\t{%E1@tlsgd(%%rip), %%rdi|rdi, %E1@tlsgd[rip]}", operands);
+    ("lea{q}\t{%E1@tlsgd(%%rip), %5|%5, %E1@tlsgd[rip]}", operands);
   output_asm_insn ("movabs{q}\t{%3, %%rax|rax, %3}", operands);
   output_asm_insn ("add{q}\t{%2, %%rax|rax, %2}", operands);
   return "call\t{*%%rax|rax}";
@@ -22558,7 +23278,8 @@
 	   (const_int 0)))
      (unspec:P [(match_operand 1 "tls_symbolic_operand")
 		(reg:P SP_REG)]
-	       UNSPEC_TLS_GD)])]
+	       UNSPEC_TLS_GD)
+     (clobber (match_operand:P 3 "register_operand"))])]
   "TARGET_64BIT"
   "ix86_tls_descriptor_calls_expanded_in_cfun = true;")
 
@@ -22609,11 +23330,12 @@
 	(call:P
 	 (mem:QI (match_operand 1 "constant_call_address_operand" "Bz"))
 	 (match_operand 2)))
-   (unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE)]
+   (unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE)
+   (clobber (match_operand:P 3 "register_operand" "=D"))]
   "TARGET_64BIT"
 {
   output_asm_insn
-    ("lea{q}\t{%&@tlsld(%%rip), %%rdi|rdi, %&@tlsld[rip]}", operands);
+    ("lea{q}\t{%&@tlsld(%%rip), %q3|%q3, %&@tlsld[rip]}", operands);
   if (TARGET_SUN_TLS)
     return "call\t%p1@plt";
   if (flag_plt || !HAVE_AS_IX86_TLS_GET_ADDR_GOT)
@@ -22629,14 +23351,15 @@
 	 (mem:QI (plus:DI (match_operand:DI 1 "register_operand" "b")
 			  (match_operand:DI 2 "immediate_operand" "i")))
 	 (match_operand 3)))
-   (unspec:DI [(reg:DI SP_REG)] UNSPEC_TLS_LD_BASE)]
+   (unspec:DI [(reg:DI SP_REG)] UNSPEC_TLS_LD_BASE)
+   (clobber (match_operand:DI 4 "register_operand" "=D"))]
   "TARGET_64BIT && ix86_cmodel == CM_LARGE_PIC && !TARGET_PECOFF
    && GET_CODE (operands[2]) == CONST
    && GET_CODE (XEXP (operands[2], 0)) == UNSPEC
    && XINT (XEXP (operands[2], 0), 1) == UNSPEC_PLTOFF"
 {
   output_asm_insn
-    ("lea{q}\t{%&@tlsld(%%rip), %%rdi|rdi, %&@tlsld[rip]}", operands);
+    ("lea{q}\t{%&@tlsld(%%rip), %4|%4, %&@tlsld[rip]}", operands);
   output_asm_insn ("movabs{q}\t{%2, %%rax|rax, %2}", operands);
   output_asm_insn ("add{q}\t{%1, %%rax|rax, %1}", operands);
   return "call\t{*%%rax|rax}";
@@ -22650,7 +23373,8 @@
 	   (call:P
 	    (mem:QI (match_operand 1))
 	    (const_int 0)))
-      (unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE)])]
+      (unspec:P [(reg:P SP_REG)] UNSPEC_TLS_LD_BASE)
+      (clobber (match_operand:P 2 "register_operand"))])]
   "TARGET_64BIT"
   "ix86_tls_descriptor_calls_expanded_in_cfun = true;")
 
@@ -22765,22 +23489,6 @@
   set_mem_addr_space (operands[2], as);
 })
 
-;; The Sun linker took the AMD64 TLS spec literally and can only handle
-;; %rax as destination of the initial executable code sequence.
-(define_insn "tls_initial_exec_64_sun"
-  [(set (match_operand:DI 0 "register_operand" "=a")
-	(unspec:DI
-	 [(match_operand 1 "tls_symbolic_operand")]
-	 UNSPEC_TLS_IE_SUN))
-   (clobber (reg:CC FLAGS_REG))]
-  "TARGET_64BIT && TARGET_SUN_TLS"
-{
-  output_asm_insn
-    ("mov{q}\t{%%fs:0, %0|%0, QWORD PTR fs:0}", operands);
-  return "add{q}\t{%a1@gottpoff(%%rip), %0|%0, %a1@gottpoff[rip]}";
-}
-  [(set_attr "type" "multi")])
-
 ;; GNU2 TLS patterns can be split.
 
 (define_expand "tls_dynamic_gnu2_32"
@@ -23253,7 +23961,7 @@
   [(set (match_operand:SF 0 "register_operand" "=x,x,x,x")
 	(unspec:SF [(match_operand:SF 1 "nonimmediate_operand" "0,x,m,ja")]
 		   UNSPEC_RSQRT))]
-  "TARGET_SSE && TARGET_SSE_MATH"
+  "TARGET_SSE"
   "@
    %vrsqrtss\t{%d1, %0|%0, %d1}
    %vrsqrtss\t{%d1, %0|%0, %d1}
@@ -25105,10 +25813,6 @@
 	      (clobber (reg:CC FLAGS_REG))])]
   ""
 {
-  /* Can't use this for non-default address spaces.  */
-  if (!ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (operands[3])))
-    FAIL;
-
   int piece_size = GET_MODE_SIZE (GET_MODE (operands[1]));
 
   /* If .md ever supports :P for Pmode, these can be directly
@@ -25116,9 +25820,14 @@
   operands[5] = plus_constant (Pmode, operands[0], piece_size);
   operands[6] = plus_constant (Pmode, operands[2], piece_size);
 
-  /* Can't use this if the user has appropriated esi or edi.  */
+  /* Can't use this if the user has appropriated esi or edi,
+   * or if we have the destination in the non-default address space,
+   * since string insns cannot override the destination segment.  */
   if ((TARGET_SINGLE_STRINGOP || optimize_insn_for_size_p ())
-      && !(fixed_regs[SI_REG] || fixed_regs[DI_REG]))
+      && !(fixed_regs[SI_REG] || fixed_regs[DI_REG])
+      && ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (operands[1]))
+      && (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (operands[3]))
+	  || Pmode == word_mode))
     {
       emit_insn (gen_strmov_singleop (operands[0], operands[1],
 				      operands[2], operands[3],
@@ -25153,8 +25862,15 @@
 		(const_int 8)))]
   "TARGET_64BIT
    && !(fixed_regs[SI_REG] || fixed_regs[DI_REG])
-   && ix86_check_no_addr_space (insn)"
-  "%^movsq"
+   && ix86_check_movs (insn, 0)"
+{
+  rtx exp = XVECEXP (PATTERN (insn), 0, 0);
+
+  operands[0] = SET_DEST (exp);
+  operands[1] = SET_SRC (exp);
+
+  return "%^%v1movsq";
+}
   [(set_attr "type" "str")
    (set_attr "memory" "both")
    (set_attr "mode" "DI")])
@@ -25169,8 +25885,15 @@
 	(plus:P (match_dup 3)
 		(const_int 4)))]
   "!(fixed_regs[SI_REG] || fixed_regs[DI_REG])
-   && ix86_check_no_addr_space (insn)"
-  "%^movs{l|d}"
+   && ix86_check_movs (insn, 0)"
+{
+  rtx exp = XVECEXP (PATTERN (insn), 0, 0);
+
+  operands[0] = SET_DEST (exp);
+  operands[1] = SET_SRC (exp);
+
+  return "%^%v1movs{l|d}";
+}
   [(set_attr "type" "str")
    (set_attr "memory" "both")
    (set_attr "mode" "SI")])
@@ -25185,8 +25908,15 @@
 	(plus:P (match_dup 3)
 		(const_int 2)))]
   "!(fixed_regs[SI_REG] || fixed_regs[DI_REG])
-   && ix86_check_no_addr_space (insn)"
-  "%^movsw"
+   && ix86_check_movs (insn, 0)"
+{
+  rtx exp = XVECEXP (PATTERN (insn), 0, 0);
+
+  operands[0] = SET_DEST (exp);
+  operands[1] = SET_SRC (exp);
+
+  return "%^%v1movsw";
+}
   [(set_attr "type" "str")
    (set_attr "memory" "both")
    (set_attr "mode" "HI")])
@@ -25201,8 +25931,15 @@
 	(plus:P (match_dup 3)
 		(const_int 1)))]
   "!(fixed_regs[SI_REG] || fixed_regs[DI_REG])
-   && ix86_check_no_addr_space (insn)"
-  "%^movsb"
+   && ix86_check_movs (insn, 0)"
+{
+  rtx exp = XVECEXP (PATTERN (insn), 0, 0);
+
+  operands[0] = SET_DEST (exp);
+  operands[1] = SET_SRC (exp);
+
+  return "%^%v1movsb";
+}
   [(set_attr "type" "str")
    (set_attr "memory" "both")
    (set (attr "prefix_rex")
@@ -25241,8 +25978,15 @@
    (use (match_dup 5))]
   "TARGET_64BIT
    && !(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
-   && ix86_check_no_addr_space (insn)"
-  "%^rep{%;} movsq"
+   && ix86_check_movs (insn, 3)"
+{
+  rtx exp = XVECEXP (PATTERN (insn), 0, 3);
+
+  operands[0] = SET_DEST (exp);
+  operands[1] = SET_SRC (exp);
+
+  return "%^%v1rep{%;} movsq";
+}
   [(set_attr "type" "str")
    (set_attr "prefix_rep" "1")
    (set_attr "memory" "both")
@@ -25261,8 +26005,15 @@
 	(mem:BLK (match_dup 4)))
    (use (match_dup 5))]
   "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
-   && ix86_check_no_addr_space (insn)"
-  "%^rep{%;} movs{l|d}"
+   && ix86_check_movs (insn, 3)"
+{
+  rtx exp = XVECEXP (PATTERN (insn), 0, 3);
+
+  operands[0] = SET_DEST (exp);
+  operands[1] = SET_SRC (exp);
+
+  return "%^%v1rep{%;} movs{l|d}";
+}
   [(set_attr "type" "str")
    (set_attr "prefix_rep" "1")
    (set_attr "memory" "both")
@@ -25279,8 +26030,15 @@
 	(mem:BLK (match_dup 4)))
    (use (match_dup 5))]
   "!(fixed_regs[CX_REG] || fixed_regs[SI_REG] || fixed_regs[DI_REG])
-   && ix86_check_no_addr_space (insn)"
-  "%^rep{%;} movsb"
+   && ix86_check_movs (insn, 3)"
+{
+  rtx exp = XVECEXP (PATTERN (insn), 0, 3);
+
+  operands[0] = SET_DEST (exp);
+  operands[1] = SET_SRC (exp);
+
+  return "%^%v1rep{%;} movsb";
+}
   [(set_attr "type" "str")
    (set_attr "prefix_rep" "1")
    (set_attr "memory" "both")
@@ -25362,7 +26120,8 @@
    (unspec [(const_int 0)] UNSPEC_STOS)]
   "TARGET_64BIT
    && !(fixed_regs[AX_REG] || fixed_regs[DI_REG])
-   && ix86_check_no_addr_space (insn)"
+   && ADDR_SPACE_GENERIC_P
+	(MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 0))))"
   "%^stosq"
   [(set_attr "type" "str")
    (set_attr "memory" "store")
@@ -25376,7 +26135,8 @@
 		(const_int 4)))
    (unspec [(const_int 0)] UNSPEC_STOS)]
   "!(fixed_regs[AX_REG] || fixed_regs[DI_REG])
-   && ix86_check_no_addr_space (insn)"
+   && ADDR_SPACE_GENERIC_P
+	(MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 0))))"
   "%^stos{l|d}"
   [(set_attr "type" "str")
    (set_attr "memory" "store")
@@ -25390,7 +26150,8 @@
 		(const_int 2)))
    (unspec [(const_int 0)] UNSPEC_STOS)]
   "!(fixed_regs[AX_REG] || fixed_regs[DI_REG])
-   && ix86_check_no_addr_space (insn)"
+   && ADDR_SPACE_GENERIC_P
+	(MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 0))))"
   "%^stosw"
   [(set_attr "type" "str")
    (set_attr "memory" "store")
@@ -25404,7 +26165,8 @@
 		(const_int 1)))
    (unspec [(const_int 0)] UNSPEC_STOS)]
   "!(fixed_regs[AX_REG] || fixed_regs[DI_REG])
-   && ix86_check_no_addr_space (insn)"
+   && ADDR_SPACE_GENERIC_P
+	(MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 0))))"
   "%^stosb"
   [(set_attr "type" "str")
    (set_attr "memory" "store")
@@ -25440,7 +26202,8 @@
    (use (match_dup 4))]
   "TARGET_64BIT
    && !(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
-   && ix86_check_no_addr_space (insn)"
+   && ADDR_SPACE_GENERIC_P
+	(MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 2))))"
   "%^rep{%;} stosq"
   [(set_attr "type" "str")
    (set_attr "prefix_rep" "1")
@@ -25458,7 +26221,8 @@
    (use (match_operand:SI 2 "register_operand" "a"))
    (use (match_dup 4))]
   "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
-   && ix86_check_no_addr_space (insn)"
+   && ADDR_SPACE_GENERIC_P
+	(MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 2))))"
   "%^rep{%;} stos{l|d}"
   [(set_attr "type" "str")
    (set_attr "prefix_rep" "1")
@@ -25475,7 +26239,8 @@
    (use (match_operand:QI 2 "register_operand" "a"))
    (use (match_dup 4))]
   "!(fixed_regs[AX_REG] || fixed_regs[CX_REG] || fixed_regs[DI_REG])
-   && ix86_check_no_addr_space (insn)"
+   && ADDR_SPACE_GENERIC_P
+	(MEM_ADDR_SPACE (SET_DEST (XVECEXP (PATTERN (insn), 0, 2))))"
   "%^rep{%;} stosb"
   [(set_attr "type" "str")
    (set_attr "prefix_rep" "1")
@@ -25742,8 +26507,8 @@
 (define_expand "mov<mode>cc"
   [(set (match_operand:SWIM 0 "register_operand")
 	(if_then_else:SWIM (match_operand 1 "comparison_operator")
-			   (match_operand:SWIM 2 "<general_operand>")
-			   (match_operand:SWIM 3 "<general_operand>")))]
+			   (match_operand:SWIM 2 "general_operand")
+			   (match_operand:SWIM 3 "general_operand")))]
   ""
   "if (ix86_expand_int_movcc (operands)) DONE; else FAIL;")
 
@@ -26110,8 +26875,8 @@
   [(set (match_operand:X87MODEF 0 "register_operand")
 	(if_then_else:X87MODEF
 	  (match_operand 1 "comparison_operator")
-	  (match_operand:X87MODEF 2 "register_operand")
-	  (match_operand:X87MODEF 3 "register_operand")))]
+	  (match_operand:X87MODEF 2 "nonimm_or_0_or_1s_operand")
+	  (match_operand:X87MODEF 3 "nonimm_or_0_operand")))]
   "(TARGET_80387 && TARGET_CMOVE)
    || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH)"
   "if (ix86_expand_fp_movcc (operands)) DONE; else FAIL;")
@@ -26701,7 +27466,7 @@
 	(cond [(and (eq_attr "alternative" "0")
 		    (not (match_test "TARGET_OPT_AGU")))
 		 (const_string "alu")
-	       (match_operand:<MODE> 2 "const0_operand")
+	       (match_operand 2 "const0_operand")
 		 (const_string "imov")
 	      ]
 	      (const_string "lea")))
@@ -26715,6 +27480,46 @@
 	      (const_string "*")))
    (set_attr "mode" "<MODE>")])
 
+(define_insn "@pro_epilogue_adjust_stack_add_nocc<mode>"
+  [(set (match_operand:P 0 "register_operand" "=r")
+	(plus:P (match_operand:P 1 "register_operand" "r")
+		(match_operand:P 2 "<nonmemory_operand>" "l<i>")))
+   (clobber (mem:BLK (scratch)))]
+  ""
+{
+  if (get_attr_type (insn) == TYPE_IMOV)
+    return "mov{<imodesuffix>}\t{%1, %0|%0, %1}";
+  else
+    {
+      operands[2] = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
+      return "lea{<imodesuffix>}\t{%E2, %0|%0, %E2}";
+    }
+}
+  [(set (attr "type")
+	(cond [(match_operand 2 "const0_operand")
+		 (const_string "imov")
+	      ]
+	      (const_string "lea")))
+   (set (attr "length_immediate")
+	(cond [(eq_attr "type" "imov")
+		 (const_string "0")
+	      ]
+	      (const_string "*")))
+   (set_attr "mode" "<MODE>")])
+
+(define_peephole2
+  [(parallel
+     [(set (match_operand:P 0 "register_operand")
+	   (plus:P (match_dup 0)
+		   (match_operand:P 1 "<nonmemory_operand>")))
+      (clobber (mem:BLK (scratch)))])]
+  "peep2_regno_dead_p (0, FLAGS_REG)"
+  [(parallel
+     [(set (match_dup 0)
+	   (plus:P (match_dup 0) (match_dup 1)))
+      (clobber (reg:CC FLAGS_REG))
+      (clobber (mem:BLK (scratch)))])])
+
 (define_insn "@pro_epilogue_adjust_stack_sub_<mode>"
   [(set (match_operand:P 0 "register_operand" "=r")
 	(minus:P (match_operand:P 1 "register_operand" "0")
@@ -27662,6 +28467,41 @@
 		       const0_rtx);
 })
 
+;; For APX NDD PLUS/MINUS/LOGIC
+;; Like cmpelim optimized pattern.
+;; Reduce an extra mov instruction like
+;; decl (%rdi), %eax
+;; mov %eax, (%rdi)
+;; to
+;; decl (%rdi)
+(define_peephole2
+  [(parallel [(set (reg FLAGS_REG)
+		   (compare (match_operator:SWI 2 "plusminuslogic_operator"
+			      [(match_operand:SWI 0 "memory_operand")
+			       (match_operand:SWI 1 "<nonmemory_operand>")])
+			    (const_int 0)))
+	      (set (match_operand:SWI 3 "register_operand") (match_dup 2))])
+   (set (match_dup 0) (match_dup 3))]
+  "TARGET_APX_NDD
+   && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+   && peep2_reg_dead_p (2, operands[3])
+   && !reg_overlap_mentioned_p (operands[3], operands[0])
+   && ix86_match_ccmode (peep2_next_insn (0),
+			 (GET_CODE (operands[2]) == PLUS
+			  || GET_CODE (operands[2]) == MINUS)
+			 ? CCGOCmode : CCNOmode)"
+  [(parallel [(set (match_dup 4) (match_dup 6))
+	      (set (match_dup 0) (match_dup 5))])]
+{
+  operands[4] = SET_DEST (XVECEXP (PATTERN (peep2_next_insn (0)), 0, 0));
+  operands[5]
+    = gen_rtx_fmt_ee (GET_CODE (operands[2]), GET_MODE (operands[2]),
+		      copy_rtx (operands[0]), operands[1]);
+  operands[6]
+    = gen_rtx_COMPARE (GET_MODE (operands[4]), copy_rtx (operands[5]),
+		       const0_rtx);
+})
+
 ;; Likewise for instances where we have a lea pattern.
 (define_peephole2
   [(set (match_operand:SWI 0 "register_operand")
@@ -27755,6 +28595,54 @@
 		       const0_rtx);
 })
 
+;; For APX NDD XOR
+;; Reduce 2 mov and 1 cmp instruction.
+;; from
+;; movq (%rdi), %rax
+;; xorq %rsi, %rax, %rdx
+;; movb %rdx, (%rdi)
+;; cmpb %rsi, %rax
+;; jne
+;; to
+;; xorb %rsi, (%rdi)
+;; jne
+(define_peephole2
+  [(set (match_operand:SWI 0 "register_operand")
+	(match_operand:SWI 1 "memory_operand"))
+   (parallel [(set (match_operand:SWI 4 "register_operand")
+		   (xor:SWI (match_operand:SWI 3 "register_operand")
+			    (match_operand:SWI 2 "<nonmemory_operand>")))
+	      (clobber (reg:CC FLAGS_REG))])
+   (set (match_dup 1) (match_dup 4))
+   (set (reg:CCZ FLAGS_REG)
+	(compare:CCZ (match_operand:SWI 5 "register_operand")
+		     (match_operand:SWI 6 "<nonmemory_operand>")))]
+  "TARGET_APX_NDD
+   && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+   && REGNO (operands[3]) == REGNO (operands[0])
+   && (rtx_equal_p (operands[0], operands[5])
+       ? rtx_equal_p (operands[2], operands[6])
+       : rtx_equal_p (operands[2], operands[5])
+	 && rtx_equal_p (operands[0], operands[6]))
+   && peep2_reg_dead_p (3, operands[4])
+   && peep2_reg_dead_p (4, operands[0])
+   && !reg_overlap_mentioned_p (operands[0], operands[1])
+   && !reg_overlap_mentioned_p (operands[0], operands[2])
+   && (<MODE>mode != QImode
+       || immediate_operand (operands[2], QImode)
+       || any_QIreg_operand (operands[2], QImode))"
+  [(parallel [(set (match_dup 7) (match_dup 9))
+	      (set (match_dup 1) (match_dup 8))])]
+{
+  operands[7] = SET_DEST (PATTERN (peep2_next_insn (3)));
+  operands[8] = gen_rtx_XOR (<MODE>mode, copy_rtx (operands[1]),
+			     operands[2]);
+  operands[9]
+    = gen_rtx_COMPARE (GET_MODE (operands[7]),
+		       copy_rtx (operands[8]),
+		       const0_rtx);
+})
+
 (define_peephole2
   [(set (match_operand:SWI12 0 "register_operand")
 	(match_operand:SWI12 1 "memory_operand"))
@@ -27998,6 +28886,58 @@
 		       const0_rtx);
 })
 
+;; For APX NDD XOR
+;; Reduce 2 mov and 1 cmp instruction.
+;; from
+;; movb (%rdi), %al
+;; xorl %esi, %eax, %edx
+;; movb %dl, (%rdi)
+;; cmpb %sil, %al
+;; jne
+;; to
+;; xorl %sil, (%rdi)
+;; jne
+(define_peephole2
+  [(set (match_operand:SWI12 0 "register_operand")
+	(match_operand:SWI12 1 "memory_operand"))
+   (parallel [(set (match_operand:SI 4 "register_operand")
+		   (xor:SI (match_operand:SI 3 "register_operand")
+			   (match_operand:SI 2 "<nonmemory_operand>")))
+	      (clobber (reg:CC FLAGS_REG))])
+   (set (match_dup 1) (match_operand:SWI12 5 "register_operand"))
+   (set (reg:CCZ FLAGS_REG)
+	(compare:CCZ (match_operand:SWI12 6 "register_operand")
+		     (match_operand:SWI12 7 "<nonmemory_operand>")))]
+  "TARGET_APX_NDD
+   && (TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
+   && REGNO (operands[3]) == REGNO (operands[0])
+   && REGNO (operands[5]) == REGNO (operands[4])
+   && (rtx_equal_p (operands[0], operands[6])
+       ? (REG_P (operands[2])
+	  ? REG_P (operands[7]) && REGNO (operands[2]) == REGNO (operands[7])
+	  : rtx_equal_p (operands[2], operands[7]))
+       : (rtx_equal_p (operands[0], operands[7])
+	  && REG_P (operands[2])
+	  && REGNO (operands[2]) == REGNO (operands[6])))
+   && peep2_reg_dead_p (3, operands[5])
+   && peep2_reg_dead_p (4, operands[0])
+   && !reg_overlap_mentioned_p (operands[0], operands[1])
+   && !reg_overlap_mentioned_p (operands[0], operands[2])
+   && (<MODE>mode != QImode
+       || immediate_operand (operands[2], SImode)
+       || any_QIreg_operand (operands[2], SImode))"
+  [(parallel [(set (match_dup 8) (match_dup 10))
+	      (set (match_dup 1) (match_dup 9))])]
+{
+  operands[8] = SET_DEST (PATTERN (peep2_next_insn (3)));
+  operands[9] = gen_rtx_XOR (<MODE>mode, copy_rtx (operands[1]),
+			     gen_lowpart (<MODE>mode, operands[2]));
+  operands[10]
+    = gen_rtx_COMPARE (GET_MODE (operands[8]),
+		       copy_rtx (operands[9]),
+		       const0_rtx);
+})
+
 ;; Attempt to optimize away memory stores of values the memory already
 ;; has.  See PR79593.
 (define_peephole2
@@ -28448,19 +29388,20 @@
   [(prefetch (match_operand 0 "address_operand")
 	     (match_operand:SI 1 "const_int_operand")
 	     (match_operand:SI 2 "const_int_operand"))]
-  "TARGET_3DNOW || TARGET_PREFETCH_SSE || TARGET_PRFCHW"
+  "TARGET_3DNOW || TARGET_PREFETCH_SSE || TARGET_PRFCHW || TARGET_MOVRS"
 {
-  bool write = operands[1] != const0_rtx;
+  int write = INTVAL (operands[1]);
   int locality = INTVAL (operands[2]);
 
   gcc_assert (IN_RANGE (locality, 0, 3));
+  gcc_assert (IN_RANGE (write, 0, 2));
 
   /* Use 3dNOW prefetch in case we are asking for write prefetch not
      supported by SSE counterpart (non-SSE2 athlon machines) or the
      SSE prefetch is not available (K6 machines).  Otherwise use SSE
      prefetch as it allows specifying of locality.  */
 
-  if (write)
+  if (write == 1)
     {
       if (TARGET_PRFCHW)
 	operands[2] = GEN_INT (3);
@@ -28468,20 +29409,29 @@
 	operands[2] = GEN_INT (3);
       else if (TARGET_PREFETCH_SSE)
 	operands[1] = const0_rtx;
+      else if (TARGET_3DNOW)
+	operands[2] = GEN_INT (3);
       else
 	{
-	  gcc_assert (TARGET_3DNOW);
-	  operands[2] = GEN_INT (3);
+	  gcc_assert (TARGET_MOVRS);
+	  FAIL;
 	}
     }
   else
     {
-      if (TARGET_PREFETCH_SSE)
+      if (!TARGET_MOVRS || locality != 1)
+	{
+	  operands[1] = const0_rtx;
+	  write = 0;
+	}
+      if (TARGET_PREFETCH_SSE || write == 2)
 	;
+      else if (TARGET_3DNOW)
+	operands[2] = GEN_INT (3);
       else
 	{
-	  gcc_assert (TARGET_3DNOW);
-	  operands[2] = GEN_INT (3);
+	  gcc_assert (TARGET_MOVRS);
+	  FAIL;
 	}
     }
 })
@@ -28543,6 +29493,18 @@
 	(symbol_ref "memory_address_length (operands[0], false)"))
    (set_attr "memory" "none")])
 
+(define_insn "*prefetch_rst2"
+  [(prefetch (match_operand 0 "address_operand" "p")
+             (const_int 2)
+             (const_int 1))]
+  "TARGET_MOVRS"
+  "prefetchrst2\t%a0"
+  [(set_attr "type" "sse")
+   (set_attr "atom_sse_attr" "prefetch")
+   (set (attr "length_address")
+        (symbol_ref "memory_address_length (operands[0], false)"))
+   (set_attr "memory" "none")])
+
 (define_insn "sse4_2_crc32<mode>"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(unspec:SI
@@ -28578,6 +29540,23 @@
    (set_attr "prefix_extra" "1")
    (set_attr "mode" "DI")])
 
+(define_expand "crc_rev<SWI124:mode>si4"
+  [(match_operand:SI 0 "register_operand")
+   (match_operand:SI 1 "register_operand")
+   (match_operand:SWI124 2 "nonimmediate_operand")
+   (match_operand:SI 3)]
+  "TARGET_CRC32"
+{
+  /* crc32 uses iSCSI polynomial */
+  if (INTVAL (operands[3]) == 0x1EDC6F41)
+    emit_insn (gen_sse4_2_crc32<mode> (operands[0], operands[1], operands[2]));
+  else
+    expand_reversed_crc_table_based (operands[0], operands[1], operands[2],
+				     operands[3], <SWI124:MODE>mode,
+				     generate_reflecting_code_standard);
+  DONE;
+})
+
 (define_insn "rdpmc"
   [(set (match_operand:DI 0 "register_operand" "=A")
   	(unspec_volatile:DI [(match_operand:SI 1 "register_operand" "c")]
@@ -29464,24 +30443,40 @@
    (set_attr "length" "4")])
 
 ;; Spaceship optimization
-(define_expand "spaceship<mode>3"
+(define_expand "spaceship<mode>4"
   [(match_operand:SI 0 "register_operand")
    (match_operand:MODEF 1 "cmp_fp_expander_operand")
-   (match_operand:MODEF 2 "cmp_fp_expander_operand")]
+   (match_operand:MODEF 2 "cmp_fp_expander_operand")
+   (match_operand:SI 3 "const_int_operand")]
   "(TARGET_80387 || (SSE_FLOAT_MODE_P (<MODE>mode) && TARGET_SSE_MATH))
    && (TARGET_CMOVE || (TARGET_SAHF && TARGET_USE_SAHF))"
 {
-  ix86_expand_fp_spaceship (operands[0], operands[1], operands[2]);
+  ix86_expand_fp_spaceship (operands[0], operands[1], operands[2],
+			    operands[3]);
   DONE;
 })
 
-(define_expand "spaceshipxf3"
+(define_expand "spaceshipxf4"
   [(match_operand:SI 0 "register_operand")
    (match_operand:XF 1 "nonmemory_operand")
-   (match_operand:XF 2 "nonmemory_operand")]
+   (match_operand:XF 2 "nonmemory_operand")
+   (match_operand:SI 3 "const_int_operand")]
   "TARGET_80387 && (TARGET_CMOVE || (TARGET_SAHF && TARGET_USE_SAHF))"
 {
-  ix86_expand_fp_spaceship (operands[0], operands[1], operands[2]);
+  ix86_expand_fp_spaceship (operands[0], operands[1], operands[2],
+			    operands[3]);
+  DONE;
+})
+
+(define_expand "spaceship<mode>4"
+  [(match_operand:SI 0 "register_operand")
+   (match_operand:SWI 1 "nonimmediate_operand")
+   (match_operand:SWI 2 "<general_operand>")
+   (match_operand:SI 3 "const_int_operand")]
+  ""
+{
+  ix86_expand_int_spaceship (operands[0], operands[1], operands[2],
+			     operands[3]);
   DONE;
 })
 
@@ -29575,6 +30570,17 @@
    (set_attr "prefix" "maybe_evex")
    (set_attr "memory" "store")])
 
+(define_insn "movrs<mode>"
+  [(set (match_operand:SWI1248x 0 "register_operand" "=r")
+    (unspec_volatile:SWI1248x
+      [(match_operand:SWI1248x 1 "memory_operand" "m")]
+      UNSPECV_MOVRS))]
+  "TARGET_MOVRS && TARGET_64BIT"
+  "movrs<imodesuffix>\t{%1, %0|%0, %1}"
+  [(set_attr "prefix" "orig")
+   (set_attr "type" "other")
+   (set_attr "mode" "<MODE>")])
+
 (include "mmx.md")
 (include "sse.md")
 (include "sync.md")
diff --git a/gcc/config/i386/i386.opt b/gcc/config/i386/i386.opt
index fe16e44..c93c0b1 100644
--- a/gcc/config/i386/i386.opt
+++ b/gcc/config/i386/i386.opt
@@ -1,6 +1,6 @@
 ; Options for the IA-32 and AMD64 ports of the compiler.
 
-; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
@@ -36,13 +36,6 @@ HOST_WIDE_INT ix86_isa_flags_explicit
 Variable
 HOST_WIDE_INT ix86_isa_flags2_explicit
 
-; Indicate if AVX512 and AVX10.1 are explicitly set no.
-Variable
-int ix86_no_avx512_explicit = 0
-
-Variable
-int ix86_no_avx10_1_explicit = 0
-
 ; Additional target flags
 Variable
 int ix86_target_flags
@@ -103,14 +96,6 @@ HOST_WIDE_INT x_ix86_isa_flags2_explicit
 TargetSave
 HOST_WIDE_INT x_ix86_isa_flags_explicit
 
-;; which flags were passed by the user
-TargetSave
-HOST_WIDE_INT x_ix86_no_avx512_explicit
-
-;; which flags were passed by the user
-TargetSave
-HOST_WIDE_INT x_ix86_no_avx10_1_explicit
-
 ;; whether -mtune was not specified
 TargetSave
 unsigned char tune_defaulted
@@ -588,6 +573,9 @@ Enum(ix86_veclibabi) String(svml) Value(ix86_veclibabi_type_svml)
 EnumValue
 Enum(ix86_veclibabi) String(acml) Value(ix86_veclibabi_type_acml)
 
+EnumValue
+Enum(ix86_veclibabi) String(aocl) Value(ix86_veclibabi_type_aocl)
+
 mvect8-ret-in-mem
 Target Mask(VECT8_RETURNS) Save
 Return 8-byte vectors in memory.
@@ -718,13 +706,9 @@ Target Mask(ISA_SSE4_2) Var(ix86_isa_flags) Save
 Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1 and SSE4.2 built-in functions and code generation.
 
 msse4
-Target RejectNegative Mask(ISA_SSE4_2) Var(ix86_isa_flags) Save
+Target Mask(ISA_SSE4_2) Var(ix86_isa_flags) Save
 Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1 and SSE4.2 built-in functions and code generation.
 
-mno-sse4
-Target RejectNegative InverseMask(ISA_SSE4_1) Var(ix86_isa_flags) Save
-Do not support SSE4.1 and SSE4.2 built-in functions and code generation.
-
 msse5
 Target Undocumented Alias(mavx) Warn(%<-msse5%> was removed)
 ;; Deprecated
@@ -1313,7 +1297,7 @@ Enable vectorization for scatter instruction.
 mapxf
 Target Mask(ISA2_APX_F) Var(ix86_isa_flags2) Save
 Support code generation for APX features, including EGPR, PUSH2POP2,
-NDD and PPX.
+NDD, PPX, NF, CCMP and ZU.
 
 mapx-features=
 Target Undocumented Joined Enum(apx_features) EnumSet Var(ix86_apx_features) Init(apx_none) Save
@@ -1352,40 +1336,41 @@ mapx-inline-asm-use-gpr32
 Target Var(ix86_apx_inline_asm_use_gpr32) Init(0)
 Enable GPR32 in inline asm when APX_F enabled.
 
-mevex512
-Target Mask(ISA2_EVEX512) Var(ix86_isa_flags2) Save
-Support 512 bit vector built-in functions and code generation.
-
 musermsr
 Target Mask(ISA2_USER_MSR) Var(ix86_isa_flags2) Save
 Support USER_MSR built-in functions and code generation.
 
-mavx10.1-256
-Target Mask(ISA2_AVX10_1_256) Var(ix86_isa_flags2) Save
-Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2,
-and AVX10.1 built-in functions and code generation.
-
-mavx10.1-512
-Target Mask(ISA2_AVX10_1_512) Var(ix86_isa_flags2) Save
-Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2,
-and AVX10.1-512 built-in functions and code generation.
-
 mavx10.1
-Target Alias(mavx10.1-256)
+Target Mask(ISA2_AVX10_1) Var(ix86_isa_flags2) Save
 Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2,
 and AVX10.1 built-in functions and code generation.
 
-mavx10.2-256
-Target Mask(ISA2_AVX10_2_256) Var(ix86_isa_flags2) Save
+mavx10.2
+Target Mask(ISA2_AVX10_2) Var(ix86_isa_flags2) Save
 Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2,
 AVX10.1 and AVX10.2 built-in functions and code generation.
 
-mavx10.2-512
-Target Mask(ISA2_AVX10_2_512) Var(ix86_isa_flags2) Save
+mamx-avx512
+Target Mask(ISA2_AMX_AVX512) Var(ix86_isa_flags2) Save
 Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2,
-AVX10.1-512 and AVX10.2-512 built-in functions and code generation.
+AVX10.1, AVX10.2 and AMX-AVX512 built-in functions and code generation.
 
-mavx10.2
-Target Alias(mavx10.2-256)
-Support MMX, SSE, SSE2, SSE3, SSSE3, SSE4.1, SSE4.2, AVX, AVX2,
-AVX10.1 and AVX10.2 built-in functions and code generation.
+mamx-tf32
+Target Mask(ISA2_AMX_TF32) Var(ix86_isa_flags2) Save
+Support AMX-TF32 built-in functions and code generation.
+
+mamx-transpose
+Target Mask(ISA2_AMX_TRANSPOSE) Var(ix86_isa_flags2) Save
+Support AMX-TRANSPOSE built-in functions and code generation.
+
+mamx-fp8
+Target Mask(ISA2_AMX_FP8) Var(ix86_isa_flags2) Save
+Support AMX-FP8 built-in functions and code generation.
+
+mmovrs
+Target Mask(ISA2_MOVRS) Var(ix86_isa_flags2) Save
+Support MOVRS built-in functions and code generation.
+
+mamx-movrs
+Target Mask(ISA2_AMX_MOVRS) Var(ix86_isa_flags2) Save
+Support AMX-MOVRS built-in functions and code generation.
diff --git a/gcc/config/i386/i386.opt.urls b/gcc/config/i386/i386.opt.urls
index fc70616..cce524c 100644
--- a/gcc/config/i386/i386.opt.urls
+++ b/gcc/config/i386/i386.opt.urls
@@ -33,7 +33,7 @@ malign-data=
 UrlSuffix(gcc/x86-Options.html#index-malign-data-1)
 
 march=
-UrlSuffix(gcc/x86-Options.html#index-march-16)
+UrlSuffix(gcc/x86-Options.html#index-march-15)
 
 ; skipping UrlSuffix for 'mbranch-cost=' due to finding no URLs
 
@@ -446,7 +446,8 @@ UrlSuffix(gcc/x86-Options.html#index-mstack-protector-guard-reg-3)
 mstack-protector-guard-offset=
 UrlSuffix(gcc/x86-Options.html#index-mstack-protector-guard-offset-4)
 
-; skipping UrlSuffix for 'mstack-protector-guard-symbol=' due to finding no URLs
+mstack-protector-guard-symbol=
+UrlSuffix(gcc/x86-Options.html#index-mstack-protector-guard-symbol)
 
 mgeneral-regs-only
 UrlSuffix(gcc/x86-Options.html#index-mgeneral-regs-only-2)
@@ -589,27 +590,30 @@ UrlSuffix(gcc/x86-Options.html#index-mapxf)
 mapx-inline-asm-use-gpr32
 UrlSuffix(gcc/x86-Options.html#index-mapx-inline-asm-use-gpr32)
 
-mevex512
-UrlSuffix(gcc/x86-Options.html#index-mevex512)
-
 musermsr
 UrlSuffix(gcc/x86-Options.html#index-musermsr)
 
-mavx10.1-256
-UrlSuffix(gcc/x86-Options.html#index-mavx10_002e1-256)
-
-mavx10.1-512
-UrlSuffix(gcc/x86-Options.html#index-mavx10_002e1-512)
-
 mavx10.1
 UrlSuffix(gcc/x86-Options.html#index-mavx10_002e1)
 
-mavx10.2-256
-UrlSuffix(gcc/x86-Options.html#index-mavx10_002e2-256)
-
-mavx10.2-512
-UrlSuffix(gcc/x86-Options.html#index-mavx10_002e2-512)
-
 mavx10.2
 UrlSuffix(gcc/x86-Options.html#index-mavx10_002e2)
 
+mamx-avx512
+UrlSuffix(gcc/x86-Options.html#index-mamx-avx512)
+
+mamx-tf32
+UrlSuffix(gcc/x86-Options.html#index-mamx-tf32)
+
+mamx-transpose
+UrlSuffix(gcc/x86-Options.html#index-mamx-transpose)
+
+mamx-fp8
+UrlSuffix(gcc/x86-Options.html#index-mamx-fp8)
+
+mmovrs
+UrlSuffix(gcc/x86-Options.html#index-mmovrs)
+
+mamx-movrs
+UrlSuffix(gcc/x86-Options.html#index-mamx-movrs)
+
diff --git a/gcc/config/i386/i386elf.h b/gcc/config/i386/i386elf.h
index e6ddcba..7b617ba 100644
--- a/gcc/config/i386/i386elf.h
+++ b/gcc/config/i386/i386elf.h
@@ -1,5 +1,5 @@
 /* Target definitions for GCC for Intel 80386 using ELF
-   Copyright (C) 1988-2024 Free Software Foundation, Inc.
+   Copyright (C) 1988-2025 Free Software Foundation, Inc.
 
    Derived from sysv4.h written by Ron Guilmette (rfg@netcom.com).
 
diff --git a/gcc/config/i386/ia32intrin.h b/gcc/config/i386/ia32intrin.h
index 47d561e..7b60386 100644
--- a/gcc/config/i386/ia32intrin.h
+++ b/gcc/config/i386/ia32intrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2009-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2009-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/iamcu.h b/gcc/config/i386/iamcu.h
index 7962da0..5c36721 100644
--- a/gcc/config/i386/iamcu.h
+++ b/gcc/config/i386/iamcu.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for Intel MCU psABI.
-   Copyright (C) 2015-2024 Free Software Foundation, Inc.
+   Copyright (C) 2015-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h
index 6b8035e..b195fe5 100644
--- a/gcc/config/i386/immintrin.h
+++ b/gcc/config/i386/immintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2008-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2008-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -132,34 +132,33 @@
 
 #include <amxcomplexintrin.h>
 
+#include <amxavx512intrin.h>
+
+#include <amxtf32intrin.h>
+
+#include <amxtransposeintrin.h>
+
+#include <amxfp8intrin.h>
+
 #include <prfchwintrin.h>
 
 #include <keylockerintrin.h>
 
 #include <amxfp16intrin.h>
 
-#include <avx10_2roundingintrin.h>
-
 #include <avx10_2mediaintrin.h>
 
-#include <avx10_2-512mediaintrin.h>
-
 #include <avx10_2convertintrin.h>
 
-#include <avx10_2-512convertintrin.h>
-
 #include <avx10_2bf16intrin.h>
 
-#include <avx10_2-512bf16intrin.h>
-
 #include <avx10_2satcvtintrin.h>
 
-#include <avx10_2-512satcvtintrin.h>
-
 #include <avx10_2minmaxintrin.h>
 
-#include <avx10_2-512minmaxintrin.h>
-
 #include <avx10_2copyintrin.h>
 
+#include <movrsintrin.h>
+
+#include <amxmovrsintrin.h>
 #endif /* _IMMINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/k6.md b/gcc/config/i386/k6.md
index 8e63275..2ea5b3f 100644
--- a/gcc/config/i386/k6.md
+++ b/gcc/config/i386/k6.md
@@ -1,5 +1,5 @@
 ;; AMD K6/K6-2 Scheduling
-;; Copyright (C) 2002-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2002-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/i386/keylockerintrin.h b/gcc/config/i386/keylockerintrin.h
index 37e0943..a6fc8c1 100644
--- a/gcc/config/i386/keylockerintrin.h
+++ b/gcc/config/i386/keylockerintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2018-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2018-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/kfreebsd-gnu.h b/gcc/config/i386/kfreebsd-gnu.h
index 53ffde9..200682b 100644
--- a/gcc/config/i386/kfreebsd-gnu.h
+++ b/gcc/config/i386/kfreebsd-gnu.h
@@ -1,5 +1,5 @@
 /* Definitions for Intel 386 running kFreeBSD-based GNU systems with ELF format
-   Copyright (C) 2011-2024 Free Software Foundation, Inc.
+   Copyright (C) 2011-2025 Free Software Foundation, Inc.
    Contributed by Robert Millan.
 
 This file is part of GCC.
diff --git a/gcc/config/i386/kfreebsd-gnu64.h b/gcc/config/i386/kfreebsd-gnu64.h
index 6f6fdf7..be38960 100644
--- a/gcc/config/i386/kfreebsd-gnu64.h
+++ b/gcc/config/i386/kfreebsd-gnu64.h
@@ -1,5 +1,5 @@
 /* Definitions for AMD x86-64 running kFreeBSD-based GNU systems with ELF format
-   Copyright (C) 2011-2024 Free Software Foundation, Inc.
+   Copyright (C) 2011-2025 Free Software Foundation, Inc.
    Contributed by Robert Millan.
 
 This file is part of GCC.
diff --git a/gcc/config/i386/kopensolaris-gnu.h b/gcc/config/i386/kopensolaris-gnu.h
index 069a4b7..ad4c2cb 100644
--- a/gcc/config/i386/kopensolaris-gnu.h
+++ b/gcc/config/i386/kopensolaris-gnu.h
@@ -1,5 +1,5 @@
 /* Definitions for Intel 386 running kOpenSolaris-based GNU systems with ELF format
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
    Contributed by Robert Millan.
 
 This file is part of GCC.
diff --git a/gcc/config/i386/linux-common.h b/gcc/config/i386/linux-common.h
index 807d800..2737d58 100644
--- a/gcc/config/i386/linux-common.h
+++ b/gcc/config/i386/linux-common.h
@@ -1,5 +1,5 @@
 /* Definitions for Intel 386 running Linux-based GNU systems with ELF format.
-   Copyright (C) 2012-2024 Free Software Foundation, Inc.
+   Copyright (C) 2012-2025 Free Software Foundation, Inc.
    Contributed by Ilya Enkovich.
 
 This file is part of GCC.
diff --git a/gcc/config/i386/linux.h b/gcc/config/i386/linux.h
index 20a96d5..efbd978 100644
--- a/gcc/config/i386/linux.h
+++ b/gcc/config/i386/linux.h
@@ -1,5 +1,5 @@
 /* Definitions for Intel 386 running Linux-based GNU systems with ELF format.
-   Copyright (C) 1994-2024 Free Software Foundation, Inc.
+   Copyright (C) 1994-2025 Free Software Foundation, Inc.
    Contributed by Eric Youngdale.
    Modified for stabs-in-ELF by H.J. Lu.
 
diff --git a/gcc/config/i386/linux64.h b/gcc/config/i386/linux64.h
index 3dd23ee..24170ee 100644
--- a/gcc/config/i386/linux64.h
+++ b/gcc/config/i386/linux64.h
@@ -1,5 +1,5 @@
 /* Definitions for AMD x86-64 running Linux-based GNU systems with ELF format.
-   Copyright (C) 2001-2024 Free Software Foundation, Inc.
+   Copyright (C) 2001-2025 Free Software Foundation, Inc.
    Contributed by Jan Hubicka <jh@suse.cz>, based on linux.h.
 
 This file is part of GCC.
diff --git a/gcc/config/i386/lujiazui.md b/gcc/config/i386/lujiazui.md
index 9620db7..873fceb 100644
--- a/gcc/config/i386/lujiazui.md
+++ b/gcc/config/i386/lujiazui.md
@@ -1,4 +1,4 @@
-;; Copyright (C) 2012-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2012-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/i386/lwpintrin.h b/gcc/config/i386/lwpintrin.h
index 191154b..3110df4 100644
--- a/gcc/config/i386/lwpintrin.h
+++ b/gcc/config/i386/lwpintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2007-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/lynx.h b/gcc/config/i386/lynx.h
index 8983da3..e9ac79d 100644
--- a/gcc/config/i386/lynx.h
+++ b/gcc/config/i386/lynx.h
@@ -1,5 +1,5 @@
 /* Definitions for LynxOS on i386.
-   Copyright (C) 1993-2024 Free Software Foundation, Inc.
+   Copyright (C) 1993-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/i386/lzcntintrin.h b/gcc/config/i386/lzcntintrin.h
index 1a0712d..73e77ec 100644
--- a/gcc/config/i386/lzcntintrin.h
+++ b/gcc/config/i386/lzcntintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2009-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2009-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/mingw-pthread.h b/gcc/config/i386/mingw-pthread.h
index 572d371..5615b93 100644
--- a/gcc/config/i386/mingw-pthread.h
+++ b/gcc/config/i386/mingw-pthread.h
@@ -1,6 +1,6 @@
 /* Defines that pthread library shall be enabled by default
    for target.
-   Copyright (C) 2011-2024 Free Software Foundation, Inc.
+   Copyright (C) 2011-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/i386/mingw-w64.h b/gcc/config/i386/mingw-w64.h
index 0a9986c..7521a20 100644
--- a/gcc/config/i386/mingw-w64.h
+++ b/gcc/config/i386/mingw-w64.h
@@ -1,7 +1,7 @@
 /* Operating system specific defines to be used when targeting GCC for
    hosting on Windows 32/64 via mingw-w64 runtime, using GNU tools and
    the Windows API Library.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/i386/mingw-w64.opt b/gcc/config/i386/mingw-w64.opt
index 27d7fe5..ee4d68c 100644
--- a/gcc/config/i386/mingw-w64.opt
+++ b/gcc/config/i386/mingw-w64.opt
@@ -1,6 +1,6 @@
 ; MinGW-w64-specific options.
 
-; Copyright (C) 2009-2024 Free Software Foundation, Inc.
+; Copyright (C) 2009-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/i386/mm3dnow.h b/gcc/config/i386/mm3dnow.h
index 68979e0..1d3e0a5 100644
--- a/gcc/config/i386/mm3dnow.h
+++ b/gcc/config/i386/mm3dnow.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2004-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2004-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/mmintrin.h b/gcc/config/i386/mmintrin.h
index 9b84bb6..8645630 100644
--- a/gcc/config/i386/mmintrin.h
+++ b/gcc/config/i386/mmintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2002-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index cb26975..29a8cb5 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -1,5 +1,5 @@
 ;; GCC machine description for MMX and 3dNOW! instructions
-;; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
@@ -30,6 +30,8 @@
 ;; direction of the user via a builtin.
 
 (define_c_enum "unspec" [
+  UNSPEC_3DNOW
+
   UNSPEC_MOVNTQ
   UNSPEC_PFRCP
   UNSPEC_PFRCPIT1
@@ -70,6 +72,9 @@
 ;; 8-byte and 4-byte HImode vector modes
 (define_mode_iterator VI2_32_64 [(V4HI "TARGET_MMX_WITH_SSE") V2HI])
 
+;; 8-byte, 4-byte and 2-byte QImode vector modes
+(define_mode_iterator VI1_16_32_64 [(V8QI "TARGET_MMX_WITH_SSE") V4QI V2QI])
+
 ;; 4-byte and 2-byte integer vector modes
 (define_mode_iterator VI_16_32 [V4QI V2QI V2HI])
 
@@ -121,7 +126,7 @@
 ;; Mapping of vector float modes to an integer mode of the same size
 (define_mode_attr mmxintvecmode
   [(V2SF "V2SI") (V2SI "V2SI") (V4HI "V4HI") (V8QI "V8QI")
-   (V4HF "V4HI") (V2HF "V2HI")])
+   (V4HF "V4HI") (V2HF "V2HI") (V4BF "V4HI") (V2BF "V2HI")])
 
 (define_mode_attr mmxintvecmodelower
   [(V2SF "v2si") (V2SI "v2si") (V4HI "v4hi") (V8QI "v8qi")
@@ -301,14 +306,14 @@
 
 (define_split
   [(set (match_operand:MMXMODE 0 "nonimmediate_gr_operand")
-        (match_operand:MMXMODE 1 "nonimmediate_gr_operand"))]
+	(match_operand:MMXMODE 1 "nonimmediate_gr_operand"))]
   "!TARGET_64BIT && reload_completed"
   [(const_int 0)]
   "ix86_split_long_move (operands); DONE;")
 
 (define_split
   [(set (match_operand:MMXMODE 0 "nonimmediate_gr_operand")
-        (match_operand:MMXMODE 1 "const0_operand"))]
+	(match_operand:MMXMODE 1 "const0_operand"))]
   "!TARGET_64BIT && reload_completed"
   [(const_int 0)]
   "ix86_split_long_move (operands); DONE;")
@@ -324,7 +329,7 @@
 
 (define_expand "mov<mode>"
   [(set (match_operand:V_32 0 "nonimmediate_operand")
-	(match_operand:V_32 1 "nonimmediate_operand"))]
+	(match_operand:V_32 1 "nonimm_or_0_operand"))]
   ""
 {
   ix86_expand_vector_move (<MODE>mode, operands);
@@ -334,7 +339,7 @@
 (define_insn "*mov<mode>_internal"
   [(set (match_operand:V_32 0 "nonimmediate_operand"
     "=r ,m ,v,v,v,m,r,v")
-	(match_operand:V_32 1 "general_operand"
+	(match_operand:V_32 1 "nonimm_or_0_operand"
     "rmC,rC,C,v,m,v,v,r"))]
   "!(MEM_P (operands[0]) && MEM_P (operands[1]))
    && ix86_hardreg_mov_ok (operands[0], operands[1])"
@@ -452,7 +457,7 @@
 
 (define_expand "movv2qi"
   [(set (match_operand:V2QI 0 "nonimmediate_operand")
-	(match_operand:V2QI 1 "nonimmediate_operand"))]
+	(match_operand:V2QI 1 "nonimm_or_0_operand"))]
   ""
 {
   ix86_expand_vector_move (V2QImode, operands);
@@ -462,9 +467,10 @@
 (define_insn "*movv2qi_internal"
   [(set (match_operand:V2QI 0 "nonimmediate_operand"
     "=r,r,r,m ,v,v,v,jm,m,r,v")
-	(match_operand:V2QI 1 "general_operand"
+	(match_operand:V2QI 1 "nonimm_or_0_operand"
     "r ,C,m,rC,C,v,m,x,v,v,r"))]
-  "!(MEM_P (operands[0]) && MEM_P (operands[1]))"
+  "!(MEM_P (operands[0]) && MEM_P (operands[1]))
+   && ix86_hardreg_mov_ok (operands[0], operands[1])"
 {
   switch (get_attr_type (insn))
     {
@@ -724,17 +730,20 @@
 })
 
 (define_expand "mmx_addv2sf3"
-  [(set (match_operand:V2SF 0 "register_operand")
-	(plus:V2SF
-	  (match_operand:V2SF 1 "nonimmediate_operand")
-	  (match_operand:V2SF 2 "nonimmediate_operand")))]
+  [(parallel
+     [(set (match_operand:V2SF 0 "register_operand")
+	   (plus:V2SF
+	     (match_operand:V2SF 1 "nonimmediate_operand")
+	     (match_operand:V2SF 2 "nonimmediate_operand")))
+      (unspec [(const_int 0)] UNSPEC_3DNOW)])]
   "TARGET_3DNOW"
   "ix86_fixup_binary_operands_no_copy (PLUS, V2SFmode, operands);")
 
 (define_insn "*mmx_addv2sf3"
   [(set (match_operand:V2SF 0 "register_operand" "=y")
 	(plus:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "%0")
-		   (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+		   (match_operand:V2SF 2 "nonimmediate_operand" "ym")))
+   (unspec [(const_int 0)] UNSPEC_3DNOW)]
   "TARGET_3DNOW && ix86_binary_operator_ok (PLUS, V2SFmode, operands)"
   "pfadd\t{%2, %0|%0, %2}"
   [(set_attr "type" "mmxadd")
@@ -742,21 +751,26 @@
    (set_attr "mode" "V2SF")])
 
 (define_expand "mmx_subv2sf3"
-  [(set (match_operand:V2SF 0 "register_operand")
-        (minus:V2SF (match_operand:V2SF 1 "register_operand")
-		    (match_operand:V2SF 2 "nonimmediate_operand")))]
+  [(parallel
+     [(set (match_operand:V2SF 0 "register_operand")
+	   (minus:V2SF (match_operand:V2SF 1 "register_operand")
+		       (match_operand:V2SF 2 "nonimmediate_operand")))
+      (unspec [(const_int 0)] UNSPEC_3DNOW)])]
   "TARGET_3DNOW")
 
 (define_expand "mmx_subrv2sf3"
-  [(set (match_operand:V2SF 0 "register_operand")
-        (minus:V2SF (match_operand:V2SF 2 "register_operand")
-		    (match_operand:V2SF 1 "nonimmediate_operand")))]
+  [(parallel
+     [(set (match_operand:V2SF 0 "register_operand")
+	   (minus:V2SF (match_operand:V2SF 2 "register_operand")
+		       (match_operand:V2SF 1 "nonimmediate_operand")))
+      (unspec [(const_int 0)] UNSPEC_3DNOW)])]
   "TARGET_3DNOW")
 
 (define_insn "*mmx_subv2sf3"
   [(set (match_operand:V2SF 0 "register_operand" "=y,y")
-        (minus:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "0,ym")
-		    (match_operand:V2SF 2 "nonimmediate_operand" "ym,0")))]
+	(minus:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "0,ym")
+		    (match_operand:V2SF 2 "nonimmediate_operand" "ym,0")))
+   (unspec [(const_int 0)] UNSPEC_3DNOW)]
   "TARGET_3DNOW && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
   "@
    pfsub\t{%2, %0|%0, %2}
@@ -766,16 +780,19 @@
    (set_attr "mode" "V2SF")])
 
 (define_expand "mmx_mulv2sf3"
-  [(set (match_operand:V2SF 0 "register_operand")
-	(mult:V2SF (match_operand:V2SF 1 "nonimmediate_operand")
-		   (match_operand:V2SF 2 "nonimmediate_operand")))]
+  [(parallel
+     [(set (match_operand:V2SF 0 "register_operand")
+	   (mult:V2SF (match_operand:V2SF 1 "nonimmediate_operand")
+		      (match_operand:V2SF 2 "nonimmediate_operand")))
+      (unspec [(const_int 0)] UNSPEC_3DNOW)])]
   "TARGET_3DNOW"
   "ix86_fixup_binary_operands_no_copy (MULT, V2SFmode, operands);")
 
 (define_insn "*mmx_mulv2sf3"
   [(set (match_operand:V2SF 0 "register_operand" "=y")
 	(mult:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "%0")
-		   (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+		   (match_operand:V2SF 2 "nonimmediate_operand" "ym")))
+   (unspec [(const_int 0)] UNSPEC_3DNOW)]
   "TARGET_3DNOW && ix86_binary_operator_ok (MULT, V2SFmode, operands)"
   "pfmul\t{%2, %0|%0, %2}"
   [(set_attr "type" "mmxmul")
@@ -805,7 +822,7 @@
 
 (define_expand "<code>v2sf3"
   [(set (match_operand:V2SF 0 "register_operand")
-        (smaxmin:V2SF
+	(smaxmin:V2SF
 	  (match_operand:V2SF 1 "register_operand")
 	  (match_operand:V2SF 2 "register_operand")))]
   "TARGET_MMX_WITH_SSE && ix86_partial_vec_fp_math"
@@ -824,10 +841,11 @@
 })
 
 (define_expand "mmx_<code>v2sf3"
-  [(set (match_operand:V2SF 0 "register_operand")
-        (smaxmin:V2SF
-	  (match_operand:V2SF 1 "nonimmediate_operand")
-	  (match_operand:V2SF 2 "nonimmediate_operand")))]
+  [(parallel
+     [(set (match_operand:V2SF 0 "register_operand")
+	   (smaxmin:V2SF (match_operand:V2SF 1 "nonimmediate_operand")
+			 (match_operand:V2SF 2 "nonimmediate_operand")))
+      (unspec [(const_int 0)] UNSPEC_3DNOW)])]
   "TARGET_3DNOW"
 {
   if (!flag_finite_math_only || flag_signed_zeros)
@@ -848,9 +866,9 @@
 
 (define_insn "*mmx_<code>v2sf3"
   [(set (match_operand:V2SF 0 "register_operand" "=y")
-        (smaxmin:V2SF
-	  (match_operand:V2SF 1 "nonimmediate_operand" "%0")
-	  (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+	(smaxmin:V2SF (match_operand:V2SF 1 "nonimmediate_operand" "%0")
+		      (match_operand:V2SF 2 "nonimmediate_operand" "ym")))
+   (unspec [(const_int 0)] UNSPEC_3DNOW)]
   "TARGET_3DNOW && ix86_binary_operator_ok (<CODE>, V2SFmode, operands)"
   "pf<maxmin_float>\t{%2, %0|%0, %2}"
   [(set_attr "type" "mmxadd")
@@ -865,10 +883,11 @@
 
 (define_insn "mmx_ieee_<ieee_maxmin>v2sf3"
   [(set (match_operand:V2SF 0 "register_operand" "=y")
-        (unspec:V2SF
+	(unspec:V2SF
 	  [(match_operand:V2SF 1 "register_operand" "0")
 	   (match_operand:V2SF 2 "nonimmediate_operand" "ym")]
-	  IEEE_MAXMIN))]
+	  IEEE_MAXMIN))
+   (unspec [(const_int 0)] UNSPEC_3DNOW)]
   "TARGET_3DNOW"
   "pf<ieee_maxmin>\t{%2, %0|%0, %2}"
   [(set_attr "type" "mmxadd")
@@ -877,7 +896,7 @@
 
 (define_insn "mmx_rcpv2sf2"
   [(set (match_operand:V2SF 0 "register_operand" "=y")
-        (unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")]
+	(unspec:V2SF [(match_operand:V2SF 1 "nonimmediate_operand" "ym")]
 		     UNSPEC_PFRCP))]
   "TARGET_3DNOW"
   "pfrcp\t{%1, %0|%0, %1}"
@@ -969,7 +988,7 @@
 	    (vec_select:SF (match_dup 1)
 	    (parallel [(match_operand:SI 4 "const_0_to_1_operand")])))
 	  (plus:SF
-            (vec_select:SF
+	    (vec_select:SF
 	      (match_operand:V2SF 2 "nonimmediate_operand" "ym")
 	      (parallel [(match_operand:SI 5 "const_0_to_1_operand")]))
 	    (vec_select:SF (match_dup 2)
@@ -1018,7 +1037,7 @@
 	      (parallel [(const_int  0)]))
 	    (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
 	  (minus:SF
-            (vec_select:SF
+	    (vec_select:SF
 	      (match_operand:V2SF 2 "nonimmediate_operand" "ym")
 	      (parallel [(const_int  0)]))
 	    (vec_select:SF (match_dup 2) (parallel [(const_int 1)])))))]
@@ -1078,7 +1097,7 @@
 	      (parallel [(const_int  0)]))
 	    (vec_select:SF (match_dup 1) (parallel [(const_int 1)])))
 	  (plus:SF
-            (vec_select:SF
+	    (vec_select:SF
 	      (match_operand:V2SF 2 "nonimmediate_operand" "ym")
 	      (parallel [(match_operand:SI 3 "const_0_to_1_operand")]))
 	    (vec_select:SF
@@ -1114,6 +1133,54 @@
   DONE;
 })
 
+(define_expand "vec_fmaddsubv2sf4"
+  [(match_operand:V2SF 0 "register_operand")
+   (match_operand:V2SF 1 "nonimmediate_operand")
+   (match_operand:V2SF 2 "nonimmediate_operand")
+   (match_operand:V2SF 3 "nonimmediate_operand")]
+  "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL)
+   && TARGET_MMX_WITH_SSE
+   && ix86_partial_vec_fp_math"
+{
+  rtx op3 = gen_reg_rtx (V4SFmode);
+  rtx op2 = gen_reg_rtx (V4SFmode);
+  rtx op1 = gen_reg_rtx (V4SFmode);
+  rtx op0 = gen_reg_rtx (V4SFmode);
+
+  emit_insn (gen_movq_v2sf_to_sse (op3, operands[3]));
+  emit_insn (gen_movq_v2sf_to_sse (op2, operands[2]));
+  emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
+
+  emit_insn (gen_vec_fmaddsubv4sf4 (op0, op1, op2, op3));
+
+  emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
+  DONE;
+})
+
+(define_expand "vec_fmsubaddv2sf4"
+  [(match_operand:V2SF 0 "register_operand")
+   (match_operand:V2SF 1 "nonimmediate_operand")
+   (match_operand:V2SF 2 "nonimmediate_operand")
+   (match_operand:V2SF 3 "nonimmediate_operand")]
+  "(TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL)
+   && TARGET_MMX_WITH_SSE
+   && ix86_partial_vec_fp_math"
+{
+  rtx op3 = gen_reg_rtx (V4SFmode);
+  rtx op2 = gen_reg_rtx (V4SFmode);
+  rtx op1 = gen_reg_rtx (V4SFmode);
+  rtx op0 = gen_reg_rtx (V4SFmode);
+
+  emit_insn (gen_movq_v2sf_to_sse (op3, operands[3]));
+  emit_insn (gen_movq_v2sf_to_sse (op2, operands[2]));
+  emit_insn (gen_movq_v2sf_to_sse (op1, operands[1]));
+
+  emit_insn (gen_vec_fmsubaddv4sf4 (op0, op1, op2, op3));
+
+  emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
+  DONE;
+})
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
 ;; Parallel single-precision floating point comparisons
@@ -1121,16 +1188,19 @@
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 (define_expand "mmx_eqv2sf3"
-  [(set (match_operand:V2SI 0 "register_operand")
-	(eq:V2SI (match_operand:V2SF 1 "nonimmediate_operand")
-		 (match_operand:V2SF 2 "nonimmediate_operand")))]
+  [(parallel
+     [(set (match_operand:V2SI 0 "register_operand")
+	   (eq:V2SI (match_operand:V2SF 1 "nonimmediate_operand")
+		    (match_operand:V2SF 2 "nonimmediate_operand")))
+      (unspec [(const_int 0)] UNSPEC_3DNOW)])]
   "TARGET_3DNOW"
   "ix86_fixup_binary_operands_no_copy (EQ, V2SFmode, operands);")
 
 (define_insn "*mmx_eqv2sf3"
   [(set (match_operand:V2SI 0 "register_operand" "=y")
 	(eq:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "%0")
-		 (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+		 (match_operand:V2SF 2 "nonimmediate_operand" "ym")))
+   (unspec [(const_int 0)] UNSPEC_3DNOW)]
   "TARGET_3DNOW && ix86_binary_operator_ok (EQ, V2SFmode, operands)"
   "pfcmpeq\t{%2, %0|%0, %2}"
   [(set_attr "type" "mmxcmp")
@@ -1140,7 +1210,8 @@
 (define_insn "mmx_gtv2sf3"
   [(set (match_operand:V2SI 0 "register_operand" "=y")
 	(gt:V2SI (match_operand:V2SF 1 "register_operand" "0")
-		 (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+		 (match_operand:V2SF 2 "nonimmediate_operand" "ym")))
+   (unspec [(const_int 0)] UNSPEC_3DNOW)]
   "TARGET_3DNOW"
   "pfcmpgt\t{%2, %0|%0, %2}"
   [(set_attr "type" "mmxcmp")
@@ -1150,7 +1221,8 @@
 (define_insn "mmx_gev2sf3"
   [(set (match_operand:V2SI 0 "register_operand" "=y")
 	(ge:V2SI (match_operand:V2SF 1 "register_operand" "0")
-		 (match_operand:V2SF 2 "nonimmediate_operand" "ym")))]
+		 (match_operand:V2SF 2 "nonimmediate_operand" "ym")))
+   (unspec [(const_int 0)] UNSPEC_3DNOW)]
   "TARGET_3DNOW"
   "pfcmpge\t{%2, %0|%0, %2}"
   [(set_attr "type" "mmxcmp")
@@ -1482,7 +1554,8 @@
 
 (define_insn "mmx_fix_truncv2sfv2si2"
   [(set (match_operand:V2SI 0 "register_operand" "=y")
-	(fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))]
+	(fix:V2SI (match_operand:V2SF 1 "nonimmediate_operand" "ym")))
+   (unspec [(const_int 0)] UNSPEC_3DNOW)]
   "TARGET_3DNOW"
   "pf2id\t{%1, %0|%0, %1}"
   [(set_attr "type" "mmxcvt")
@@ -1523,7 +1596,8 @@
 
 (define_insn "mmx_floatv2siv2sf2"
   [(set (match_operand:V2SF 0 "register_operand" "=y")
-	(float:V2SF (match_operand:V2SI 1 "nonimmediate_operand" "ym")))]
+	(float:V2SF (match_operand:V2SI 1 "nonimmediate_operand" "ym")))
+   (unspec [(const_int 0)] UNSPEC_3DNOW)]
   "TARGET_3DNOW"
   "pi2fd\t{%1, %0|%0, %1}"
   [(set_attr "type" "mmxcvt")
@@ -1958,6 +2032,8 @@
 
 (define_mode_iterator VHF_32_64 [V2HF (V4HF "TARGET_MMX_WITH_SSE")])
 
+(define_mode_iterator VBF_32_64 [V2BF (V4BF "TARGET_MMX_WITH_SSE")])
+
 (define_expand "divv4hf3"
   [(set (match_operand:V4HF 0 "register_operand")
 	(div:V4HF
@@ -2036,6 +2112,26 @@
   DONE;
 })
 
+;; VDIVNEPBF16 does not generate floating point exceptions.
+(define_expand "<insn><mode>3"
+  [(set (match_operand:VBF_32_64 0 "register_operand")
+    (plusminusmultdiv:VBF_32_64
+      (match_operand:VBF_32_64 1 "nonimmediate_operand")
+      (match_operand:VBF_32_64 2 "nonimmediate_operand")))]
+  "TARGET_AVX10_2"
+{
+  rtx op0 = gen_reg_rtx (V8BFmode);
+  rtx op1 = lowpart_subreg (V8BFmode,
+			    force_reg (<MODE>mode, operands[1]), <MODE>mode);
+  rtx op2 = lowpart_subreg (V8BFmode,
+			    force_reg (<MODE>mode, operands[2]), <MODE>mode);
+
+  emit_insn (gen_<insn>v8bf3 (op0, op1, op2));
+
+  emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, op0, V8BFmode));
+  DONE;
+})
+
 (define_expand "divv2hf3"
   [(set (match_operand:V2HF 0 "register_operand")
 	(div:V2HF
@@ -2076,6 +2172,25 @@
   DONE;
 })
 
+(define_expand "<code><mode>3"
+  [(set (match_operand:VBF_32_64 0 "register_operand")
+    (smaxmin:VBF_32_64
+      (match_operand:VBF_32_64 1 "nonimmediate_operand")
+      (match_operand:VBF_32_64 2 "nonimmediate_operand")))]
+  "TARGET_AVX10_2"
+{
+  rtx op0 = gen_reg_rtx (V8BFmode);
+  rtx op1 = lowpart_subreg (V8BFmode,
+			    force_reg (<MODE>mode, operands[1]), <MODE>mode);
+  rtx op2 = lowpart_subreg (V8BFmode,
+			    force_reg (<MODE>mode, operands[2]), <MODE>mode);
+
+  emit_insn (gen_<code>v8bf3 (op0, op1, op2));
+
+  emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, op0, V8BFmode));
+  DONE;
+})
+
 (define_expand "sqrt<mode>2"
   [(set (match_operand:VHF_32_64 0 "register_operand")
 	(sqrt:VHF_32_64
@@ -2091,18 +2206,37 @@
   DONE;
 })
 
+(define_expand "sqrt<mode>2"
+  [(set (match_operand:VBF_32_64 0 "register_operand")
+	(sqrt:VBF_32_64 (match_operand:VBF_32_64 1 "vector_operand")))]
+  "TARGET_AVX10_2"
+{
+  rtx op0 = gen_reg_rtx (V8BFmode);
+  rtx op1 = lowpart_subreg (V8BFmode,
+			    force_reg (<MODE>mode, operands[1]), <MODE>mode);
+
+  emit_insn (gen_sqrtv8bf2 (op0, op1));
+
+  emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, op0, V8BFmode));
+  DONE;
+})
+
+(define_mode_iterator VHBF_32_64
+ [V2BF (V4BF "TARGET_MMX_WITH_SSE")
+  V2HF (V4HF "TARGET_MMX_WITH_SSE")]) 
+
 (define_expand "<code><mode>2"
-  [(set (match_operand:VHF_32_64 0 "register_operand")
-	(absneg:VHF_32_64
-	  (match_operand:VHF_32_64 1 "register_operand")))]
+  [(set (match_operand:VHBF_32_64 0 "register_operand")
+	(absneg:VHBF_32_64
+	  (match_operand:VHBF_32_64 1 "register_operand")))]
   "TARGET_SSE"
   "ix86_expand_fp_absneg_operator (<CODE>, <MODE>mode, operands); DONE;")
 
 (define_insn_and_split "*mmx_<code><mode>"
-  [(set (match_operand:VHF_32_64 0 "register_operand" "=x,x,x")
-	(absneg:VHF_32_64
-	  (match_operand:VHF_32_64 1 "register_operand" "0,x,x")))
-   (use (match_operand:VHF_32_64 2 "register_operand" "x,0,x"))]
+  [(set (match_operand:VHBF_32_64 0 "register_operand" "=x,x,x")
+	(absneg:VHBF_32_64
+	  (match_operand:VHBF_32_64 1 "register_operand" "0,x,x")))
+   (use (match_operand:VHBF_32_64 2 "register_operand" "x,0,x"))]
   "TARGET_SSE"
   "#"
   "&& reload_completed"
@@ -2115,11 +2249,11 @@
   [(set_attr "isa" "noavx,noavx,avx")])
 
 (define_insn_and_split "*mmx_nabs<mode>2"
-  [(set (match_operand:VHF_32_64 0 "register_operand" "=x,x,x")
-	(neg:VHF_32_64
-	  (abs:VHF_32_64
-	    (match_operand:VHF_32_64 1 "register_operand" "0,x,x"))))
-   (use (match_operand:VHF_32_64 2 "register_operand" "x,0,x"))]
+  [(set (match_operand:VHBF_32_64 0 "register_operand" "=x,x,x")
+	(neg:VHBF_32_64
+	  (abs:VHBF_32_64
+	    (match_operand:VHBF_32_64 1 "register_operand" "0,x,x"))))
+   (use (match_operand:VHBF_32_64 2 "register_operand" "x,0,x"))]
   "TARGET_SSE"
   "#"
   "&& reload_completed"
@@ -2230,6 +2364,23 @@
   DONE;
 })
 
+;;This instruction does not generate floating point exceptions
+(define_expand "vec_cmp<mode>qi"
+  [(set (match_operand:QI 0 "register_operand")
+	(match_operator:QI 1 ""
+	  [(match_operand:VBF_32_64 2 "register_operand")
+	   (match_operand:VBF_32_64 3 "nonimmediate_operand")]))]
+  "TARGET_AVX10_2"
+{
+  rtx op2 = lowpart_subreg (V8BFmode,
+			     force_reg (<MODE>mode, operands[2]), <MODE>mode);
+  rtx op3 = lowpart_subreg (V8BFmode,
+			     force_reg (<MODE>mode, operands[3]), <MODE>mode);
+
+  emit_insn (gen_vec_cmpv8bfqi (operands[0], operands[1], op2, op3));
+  DONE;
+})
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
 ;; Parallel half-precision floating point rounding operations.
@@ -2410,11 +2561,11 @@
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 
 (define_insn "*mmx_andnot<mode>3"
-  [(set (match_operand:VHF_32_64 0 "register_operand"    "=x,x")
-	(and:VHF_32_64
-	  (not:VHF_32_64
-	    (match_operand:VHF_32_64 1 "register_operand" "0,x"))
-	  (match_operand:VHF_32_64 2 "register_operand"   "x,x")))]
+  [(set (match_operand:VHBF_32_64 0 "register_operand"    "=x,x")
+	(and:VHBF_32_64
+	  (not:VHBF_32_64
+	    (match_operand:VHBF_32_64 1 "register_operand" "0,x"))
+	  (match_operand:VHBF_32_64 2 "register_operand"   "x,x")))]
   "TARGET_SSE"
   "@
    andnps\t{%2, %0|%0, %2}
@@ -2425,10 +2576,10 @@
    (set_attr "mode" "V4SF")])
 
 (define_insn "<code><mode>3"
-  [(set (match_operand:VHF_32_64 0 "register_operand"   "=x,x")
-	(any_logic:VHF_32_64
-	  (match_operand:VHF_32_64 1 "register_operand" "%0,x")
-	  (match_operand:VHF_32_64 2 "register_operand" " x,x")))]
+  [(set (match_operand:VHBF_32_64 0 "register_operand"   "=x,x")
+	(any_logic:VHBF_32_64
+	  (match_operand:VHBF_32_64 1 "register_operand" "%0,x")
+	  (match_operand:VHBF_32_64 2 "register_operand" " x,x")))]
   "TARGET_SSE"
   "@
    <logic>ps\t{%2, %0|%0, %2}
@@ -2440,14 +2591,14 @@
 
 (define_expand "copysign<mode>3"
   [(set (match_dup 4)
-	(and:VHF_32_64
-	  (not:VHF_32_64 (match_dup 3))
-	  (match_operand:VHF_32_64 1 "register_operand")))
+	(and:VHBF_32_64
+	  (not:VHBF_32_64 (match_dup 3))
+	  (match_operand:VHBF_32_64 1 "register_operand")))
    (set (match_dup 5)
-	(and:VHF_32_64 (match_dup 3)
-		  (match_operand:VHF_32_64 2 "register_operand")))
-   (set (match_operand:VHF_32_64 0 "register_operand")
-	(ior:VHF_32_64 (match_dup 4) (match_dup 5)))]
+	(and:VHBF_32_64 (match_dup 3)
+		  (match_operand:VHBF_32_64 2 "register_operand")))
+   (set (match_operand:VHBF_32_64 0 "register_operand")
+	(ior:VHBF_32_64 (match_dup 4) (match_dup 5)))]
   "TARGET_SSE"
 {
   operands[3] = ix86_build_signbit_mask (<MODE>mode, true, false);
@@ -2458,11 +2609,11 @@
 
 (define_expand "xorsign<mode>3"
   [(set (match_dup 4)
-	(and:VHF_32_64 (match_dup 3)
-		  (match_operand:VHF_32_64 2 "register_operand")))
-   (set (match_operand:VHF_32_64 0 "register_operand")
-	(xor:VHF_32_64 (match_dup 4)
-		  (match_operand:VHF_32_64 1 "register_operand")))]
+	(and:VHBF_32_64 (match_dup 3)
+		  (match_operand:VHBF_32_64 2 "register_operand")))
+   (set (match_operand:VHBF_32_64 0 "register_operand")
+	(xor:VHBF_32_64 (match_dup 4)
+		  (match_operand:VHBF_32_64 1 "register_operand")))]
   "TARGET_SSE"
 {
   operands[3] = ix86_build_signbit_mask (<MODE>mode, true, false);
@@ -2474,7 +2625,7 @@
   [(set (match_operand:<mmxintvecmode> 0 "register_operand")
 	(lshiftrt:<mmxintvecmode>
 	  (subreg:<mmxintvecmode>
-	    (match_operand:VHF_32_64 1 "register_operand") 0)
+	    (match_operand:VHBF_32_64 1 "register_operand") 0)
 	  (match_dup 2)))]
   "TARGET_SSE2"
 {
@@ -2632,6 +2783,86 @@
   DONE;
 })
 
+(define_expand "fma<mode>4"
+  [(set (match_operand:VBF_32_64 0 "register_operand")
+	(fma:VBF_32_64
+	  (match_operand:VBF_32_64 1 "nonimmediate_operand")
+	  (match_operand:VBF_32_64 2 "nonimmediate_operand")
+	  (match_operand:VBF_32_64 3 "nonimmediate_operand")))]
+  "TARGET_AVX10_2"
+{
+  rtx op0 = gen_reg_rtx (V8BFmode);
+  rtx op1 = lowpart_subreg (V8BFmode, force_reg (<MODE>mode, operands[1]), <MODE>mode);
+  rtx op2 = lowpart_subreg (V8BFmode, force_reg (<MODE>mode, operands[2]), <MODE>mode);
+  rtx op3 = lowpart_subreg (V8BFmode, force_reg (<MODE>mode, operands[3]), <MODE>mode);
+
+  emit_insn (gen_fmav8bf4 (op0, op1, op2, op3));
+
+  emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, op0, V8BFmode));
+  DONE;
+})
+
+(define_expand "fms<mode>4"
+  [(set (match_operand:VBF_32_64 0 "register_operand")
+	(fma:VBF_32_64
+	  (match_operand:VBF_32_64   1 "nonimmediate_operand")
+	  (match_operand:VBF_32_64   2 "nonimmediate_operand")
+	  (neg:VBF_32_64
+	    (match_operand:VBF_32_64 3 "nonimmediate_operand"))))]
+  "TARGET_AVX10_2"
+{
+  rtx op0 = gen_reg_rtx (V8BFmode);
+  rtx op1 = lowpart_subreg (V8BFmode, force_reg (<MODE>mode, operands[1]), <MODE>mode);
+  rtx op2 = lowpart_subreg (V8BFmode, force_reg (<MODE>mode, operands[2]), <MODE>mode);
+  rtx op3 = lowpart_subreg (V8BFmode, force_reg (<MODE>mode, operands[3]), <MODE>mode);
+
+  emit_insn (gen_fmsv8bf4 (op0, op1, op2, op3));
+
+  emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, op0, V8BFmode));
+  DONE;
+})
+
+(define_expand "fnma<mode>4"
+  [(set (match_operand:VBF_32_64 0 "register_operand")
+	(fma:VBF_32_64
+	  (neg:VBF_32_64
+	    (match_operand:VBF_32_64 1 "nonimmediate_operand"))
+	  (match_operand:VBF_32_64   2 "nonimmediate_operand")
+	  (match_operand:VBF_32_64   3 "nonimmediate_operand")))]
+  "TARGET_AVX10_2"
+{
+  rtx op0 = gen_reg_rtx (V8BFmode);
+  rtx op1 = lowpart_subreg (V8BFmode, force_reg (<MODE>mode, operands[1]), <MODE>mode);
+  rtx op2 = lowpart_subreg (V8BFmode, force_reg (<MODE>mode, operands[2]), <MODE>mode);
+  rtx op3 = lowpart_subreg (V8BFmode, force_reg (<MODE>mode, operands[3]), <MODE>mode);
+
+  emit_insn (gen_fnmav8bf4 (op0, op1, op2, op3));
+
+  emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, op0, V8BFmode));
+  DONE;
+})
+
+(define_expand "fnms<mode>4"
+  [(set (match_operand:VBF_32_64 0 "register_operand")
+	(fma:VBF_32_64
+	  (neg:VBF_32_64
+	    (match_operand:VBF_32_64 1 "nonimmediate_operand"))
+	  (match_operand:VBF_32_64   2 "nonimmediate_operand")
+	  (neg:VBF_32_64
+	    (match_operand:VBF_32_64 3 "nonimmediate_operand"))))]
+  "TARGET_AVX10_2"
+{
+  rtx op0 = gen_reg_rtx (V8BFmode);
+  rtx op1 = lowpart_subreg (V8BFmode, force_reg (<MODE>mode, operands[1]), <MODE>mode);
+  rtx op2 = lowpart_subreg (V8BFmode, force_reg (<MODE>mode, operands[2]), <MODE>mode);
+  rtx op3 = lowpart_subreg (V8BFmode, force_reg (<MODE>mode, operands[3]), <MODE>mode);
+
+  emit_insn (gen_fnmsv8bf4 (op0, op1, op2, op3));
+
+  emit_move_insn (operands[0], lowpart_subreg (<MODE>mode, op0, V8BFmode));
+  DONE;
+})
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
 ;; Parallel half-precision floating point complex type operations
@@ -2834,6 +3065,46 @@
   DONE;
 })
 
+(define_expand "truncv2sfv2bf2"
+  [(set (match_operand:V2BF 0 "register_operand")
+	(float_truncate:V2BF
+	  (match_operand:V2SF 1 "nonimmediate_operand")))]
+  "TARGET_SSSE3 && TARGET_MMX_WITH_SSE
+  && !HONOR_NANS (BFmode) && !flag_rounding_math
+  && (flag_unsafe_math_optimizations
+      || TARGET_AVXNECONVERT
+      || (TARGET_AVX512BF16 && TARGET_AVX512VL))"
+{
+  rtx op1 = gen_reg_rtx (V4SFmode);
+  rtx op0 = gen_reg_rtx (V4BFmode);
+
+  emit_move_insn (op1, lowpart_subreg (V4SFmode,
+				       force_reg (V2SFmode, operands[1]),
+				       V2SFmode));
+  emit_insn (gen_truncv4sfv4bf2 (op0, op1));
+
+  emit_move_insn (operands[0], lowpart_subreg (V2BFmode, op0, V4BFmode));
+  DONE;
+})
+
+(define_expand "extendv2bfv2sf2"
+  [(set (match_operand:V2SF 0 "register_operand")
+	(float_extend:V2SF
+	  (match_operand:V2BF 1 "nonimmediate_operand")))]
+  "TARGET_SSE2 && TARGET_MMX_WITH_SSE && !HONOR_NANS (BFmode)"
+{
+  rtx op0 = gen_reg_rtx (V4SFmode);
+  rtx op1 = gen_reg_rtx (V4BFmode);
+
+  emit_move_insn (op1, lowpart_subreg (V4BFmode,
+				       force_reg (V2BFmode, operands[1]),
+				       V2BFmode));
+  emit_insn (gen_extendv4bfv4sf2 (op0, op1));
+
+  emit_move_insn (operands[0], lowpart_subreg (V2SFmode, op0, V4SFmode));
+  DONE;
+})
+
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
 ;; Parallel integral arithmetic
@@ -2858,7 +3129,7 @@
 
 (define_insn "negv2qi2"
   [(set (match_operand:V2QI 0 "register_operand" "=?Q,&Yw")
-        (neg:V2QI
+	(neg:V2QI
 	  (match_operand:V2QI 1 "register_operand" "0,Yw")))
    (clobber (reg:CC FLAGS_REG))]
   "!TARGET_PARTIAL_REG_STALL || optimize_size || TARGET_SSE2"
@@ -2876,7 +3147,7 @@
 
 (define_split
   [(set (match_operand:V2QI 0 "general_reg_operand")
-        (neg:V2QI
+	(neg:V2QI
 	  (match_operand:V2QI 1 "general_reg_operand")))
    (clobber (reg:CC FLAGS_REG))]
   "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
@@ -2890,8 +3161,8 @@
 	   (subreg:HI
 	     (neg:QI
 	       (subreg:QI
-	         (zero_extract:HI (match_dup 3)
-			          (const_int 8)
+		 (zero_extract:HI (match_dup 3)
+				  (const_int 8)
 				  (const_int 8)) 0)) 0))
       (clobber (reg:CC FLAGS_REG))])]
 {
@@ -2903,7 +3174,7 @@
 
 (define_split
   [(set (match_operand:V2QI 0 "sse_reg_operand")
-        (neg:V2QI
+	(neg:V2QI
 	  (match_operand:V2QI 1 "sse_reg_operand")))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_SSE2 && reload_completed"
@@ -2933,7 +3204,7 @@
 
 (define_insn "*mmx_<insn><mode>3"
   [(set (match_operand:MMXMODEI8 0 "register_operand" "=y,x,<Yv_Yw>")
-        (plusminus:MMXMODEI8
+	(plusminus:MMXMODEI8
 	  (match_operand:MMXMODEI8 1 "register_mmxmem_operand"
 	    "<comm>0,0,<Yv_Yw>")
 	  (match_operand:MMXMODEI8 2 "register_mmxmem_operand"
@@ -2951,7 +3222,7 @@
 
 (define_insn "<insn><mode>3"
   [(set (match_operand:VI_32 0 "register_operand" "=x,Yw")
-        (plusminus:VI_32
+	(plusminus:VI_32
 	  (match_operand:VI_32 1 "register_operand" "<comm>0,Yw")
 	  (match_operand:VI_32 2 "register_operand" "x,Yw")))]
   "TARGET_SSE2"
@@ -2964,7 +3235,7 @@
 
 (define_insn "<insn>v2qi3"
   [(set (match_operand:V2QI 0 "register_operand" "=?Q,x,Yw")
-        (plusminus:V2QI
+	(plusminus:V2QI
 	  (match_operand:V2QI 1 "register_operand" "<comm>0,0,Yw")
 	  (match_operand:V2QI 2 "register_operand" "Q,x,Yw")))
    (clobber (reg:CC FLAGS_REG))]
@@ -2983,7 +3254,7 @@
 
 (define_split
   [(set (match_operand:V2QI 0 "general_reg_operand")
-        (plusminus:V2QI
+	(plusminus:V2QI
 	  (match_operand:V2QI 1 "general_reg_operand")
 	  (match_operand:V2QI 2 "general_reg_operand")))
    (clobber (reg:CC FLAGS_REG))]
@@ -2998,11 +3269,11 @@
 	   (subreg:HI
 	     (plusminus:QI
 	       (subreg:QI
-	         (zero_extract:HI (match_dup 4)
-			          (const_int 8)
+		 (zero_extract:HI (match_dup 4)
+				  (const_int 8)
 				  (const_int 8)) 0)
 	       (subreg:QI
-	         (zero_extract:HI (match_dup 5)
+		 (zero_extract:HI (match_dup 5)
 				  (const_int 8)
 				  (const_int 8)) 0)) 0))
       (clobber (reg:CC FLAGS_REG))])]
@@ -3017,13 +3288,13 @@
 
 (define_split
   [(set (match_operand:V2QI 0 "sse_reg_operand")
-        (plusminus:V2QI
+	(plusminus:V2QI
 	  (match_operand:V2QI 1 "sse_reg_operand")
 	  (match_operand:V2QI 2 "sse_reg_operand")))
    (clobber (reg:CC FLAGS_REG))]
   "TARGET_SSE2 && reload_completed"
   [(set (match_dup 0)
-        (plusminus:V16QI (match_dup 1) (match_dup 2)))]
+	(plusminus:V16QI (match_dup 1) (match_dup 2)))]
 {
   operands[2] = lowpart_subreg (V16QImode, operands[2], V2QImode);
   operands[1] = lowpart_subreg (V16QImode, operands[1], V2QImode);
@@ -3047,7 +3318,7 @@
 
 (define_insn "*mmx_<insn><mode>3"
   [(set (match_operand:MMXMODE12 0 "register_operand" "=y,x,Yw")
-        (sat_plusminus:MMXMODE12
+	(sat_plusminus:MMXMODE12
 	  (match_operand:MMXMODE12 1 "register_mmxmem_operand" "<comm>0,0,Yw")
 	  (match_operand:MMXMODE12 2 "register_mmxmem_operand" "ym,x,Yw")))]
   "(TARGET_MMX || TARGET_MMX_WITH_SSE)
@@ -3061,9 +3332,9 @@
    (set_attr "type" "mmxadd,sseadd,sseadd")
    (set_attr "mode" "DI,TI,TI")])
 
-(define_insn "*<insn><mode>3"
+(define_insn "<insn><mode>3"
   [(set (match_operand:VI_16_32 0 "register_operand" "=x,Yw")
-        (sat_plusminus:VI_16_32
+	(sat_plusminus:VI_16_32
 	  (match_operand:VI_16_32 1 "register_operand" "<comm>0,Yw")
 	  (match_operand:VI_16_32 2 "register_operand" "x,Yw")))]
   "TARGET_SSE2"
@@ -3093,20 +3364,20 @@
 
 (define_expand "mmx_mulv4hi3"
   [(set (match_operand:V4HI 0 "register_operand")
-        (mult:V4HI (match_operand:V4HI 1 "register_mmxmem_operand")
+	(mult:V4HI (match_operand:V4HI 1 "register_mmxmem_operand")
 		   (match_operand:V4HI 2 "register_mmxmem_operand")))]
   "TARGET_MMX || TARGET_MMX_WITH_SSE"
   "ix86_fixup_binary_operands_no_copy (MULT, V4HImode, operands);")
 
 (define_expand "mulv4hi3"
   [(set (match_operand:V4HI 0 "register_operand")
-        (mult:V4HI (match_operand:V4HI 1 "register_operand")
+	(mult:V4HI (match_operand:V4HI 1 "register_operand")
 		   (match_operand:V4HI 2 "register_operand")))]
   "TARGET_MMX_WITH_SSE")
 
 (define_insn "*mmx_mulv4hi3"
   [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw")
-        (mult:V4HI (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yw")
+	(mult:V4HI (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yw")
 		   (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw")))]
   "(TARGET_MMX || TARGET_MMX_WITH_SSE)
    && ix86_binary_operator_ok (MULT, V4HImode, operands)"
@@ -3121,7 +3392,7 @@
 
 (define_insn "mulv2hi3"
   [(set (match_operand:V2HI 0 "register_operand" "=x,Yw")
-        (mult:V2HI (match_operand:V2HI 1 "register_operand" "%0,Yw")
+	(mult:V2HI (match_operand:V2HI 1 "register_operand" "%0,Yw")
 		   (match_operand:V2HI 2 "register_operand" "x,Yw")))]
   "TARGET_SSE2"
   "@
@@ -3253,7 +3524,7 @@
 
 (define_expand "mmx_pmaddwd"
   [(set (match_operand:V2SI 0 "register_operand")
-        (plus:V2SI
+	(plus:V2SI
 	  (mult:V2SI
 	    (sign_extend:V2SI
 	      (vec_select:V2HI
@@ -3275,7 +3546,7 @@
 
 (define_insn "*mmx_pmaddwd"
   [(set (match_operand:V2SI 0 "register_operand" "=y,x,Yw")
-        (plus:V2SI
+	(plus:V2SI
 	  (mult:V2SI
 	    (sign_extend:V2SI
 	      (vec_select:V2HI
@@ -3309,9 +3580,9 @@
 	  (lshiftrt:V4SI
 	    (plus:V4SI
 	      (mult:V4SI
-	        (sign_extend:V4SI
+		(sign_extend:V4SI
 		  (match_operand:V4HI 1 "nonimmediate_operand"))
-	        (sign_extend:V4SI
+		(sign_extend:V4SI
 		  (match_operand:V4HI 2 "nonimmediate_operand")))
 	      (const_vector:V4SI [(const_int 32768) (const_int 32768)
 				  (const_int 32768) (const_int 32768)]))
@@ -3325,9 +3596,9 @@
 	  (lshiftrt:V4SI
 	    (plus:V4SI
 	      (mult:V4SI
-	        (sign_extend:V4SI
+		(sign_extend:V4SI
 		  (match_operand:V4HI 1 "nonimmediate_operand" "%0"))
-	        (sign_extend:V4SI
+		(sign_extend:V4SI
 		  (match_operand:V4HI 2 "nonimmediate_operand" "ym")))
 	      (const_vector:V4SI [(const_int 32768) (const_int 32768)
 				  (const_int 32768) (const_int 32768)]))
@@ -3340,7 +3611,7 @@
 
 (define_expand "sse2_umulv1siv1di3"
   [(set (match_operand:V1DI 0 "register_operand")
-        (mult:V1DI
+	(mult:V1DI
 	  (zero_extend:V1DI
 	    (vec_select:V1SI
 	      (match_operand:V2SI 1 "register_mmxmem_operand")
@@ -3354,7 +3625,7 @@
 
 (define_insn "*sse2_umulv1siv1di3"
   [(set (match_operand:V1DI 0 "register_operand" "=y,x,Yv")
-        (mult:V1DI
+	(mult:V1DI
 	  (zero_extend:V1DI
 	    (vec_select:V1SI
 	      (match_operand:V2SI 1 "register_mmxmem_operand" "%0,0,Yv")
@@ -3399,7 +3670,7 @@
 
 (define_expand "mmx_<code>v4hi3"
   [(set (match_operand:V4HI 0 "register_operand")
-        (smaxmin:V4HI
+	(smaxmin:V4HI
 	  (match_operand:V4HI 1 "register_mmxmem_operand")
 	  (match_operand:V4HI 2 "register_mmxmem_operand")))]
   "(TARGET_MMX || TARGET_MMX_WITH_SSE)
@@ -3408,7 +3679,7 @@
 
 (define_insn "*mmx_<code>v4hi3"
   [(set (match_operand:V4HI 0 "register_operand" "=y,x,Yw")
-        (smaxmin:V4HI
+	(smaxmin:V4HI
 	  (match_operand:V4HI 1 "register_mmxmem_operand" "%0,0,Yw")
 	  (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw")))]
   "(TARGET_MMX || TARGET_MMX_WITH_SSE)
@@ -3425,7 +3696,7 @@
 
 (define_expand "<code>v4hi3"
   [(set (match_operand:V4HI 0 "register_operand")
-        (smaxmin:V4HI
+	(smaxmin:V4HI
 	  (match_operand:V4HI 1 "register_operand")
 	  (match_operand:V4HI 2 "register_operand")))]
   "TARGET_MMX_WITH_SSE")
@@ -3448,7 +3719,7 @@
 
 (define_insn "<code>v2hi3"
   [(set (match_operand:V2HI 0 "register_operand" "=x,Yw")
-        (smaxmin:V2HI
+	(smaxmin:V2HI
 	  (match_operand:V2HI 1 "register_operand" "%0,Yw")
 	  (match_operand:V2HI 2 "register_operand" "x,Yw")))]
   "TARGET_SSE2"
@@ -3477,7 +3748,7 @@
 
 (define_expand "mmx_<code>v8qi3"
   [(set (match_operand:V8QI 0 "register_operand")
-        (umaxmin:V8QI
+	(umaxmin:V8QI
 	  (match_operand:V8QI 1 "register_mmxmem_operand")
 	  (match_operand:V8QI 2 "register_mmxmem_operand")))]
   "(TARGET_MMX || TARGET_MMX_WITH_SSE)
@@ -3486,7 +3757,7 @@
 
 (define_insn "*mmx_<code>v8qi3"
   [(set (match_operand:V8QI 0 "register_operand" "=y,x,Yw")
-        (umaxmin:V8QI
+	(umaxmin:V8QI
 	  (match_operand:V8QI 1 "register_mmxmem_operand" "%0,0,Yw")
 	  (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yw")))]
   "(TARGET_MMX || TARGET_MMX_WITH_SSE)
@@ -3503,14 +3774,14 @@
 
 (define_expand "<code>v8qi3"
   [(set (match_operand:V8QI 0 "register_operand")
-        (umaxmin:V8QI
+	(umaxmin:V8QI
 	  (match_operand:V8QI 1 "register_operand")
 	  (match_operand:V8QI 2 "register_operand")))]
   "TARGET_MMX_WITH_SSE")
 
 (define_insn "<code><mode>3"
   [(set (match_operand:VI1_16_32 0 "register_operand" "=x,Yw")
-        (umaxmin:VI1_16_32
+	(umaxmin:VI1_16_32
 	  (match_operand:VI1_16_32 1 "register_operand" "%0,Yw")
 	  (match_operand:VI1_16_32 2 "register_operand" "x,Yw")))]
   "TARGET_SSE2"
@@ -3578,7 +3849,7 @@
 
 (define_insn "mmx_ashr<mode>3"
   [(set (match_operand:MMXMODE24 0 "register_operand" "=y,x,<Yv_Yw>")
-        (ashiftrt:MMXMODE24
+	(ashiftrt:MMXMODE24
 	  (match_operand:MMXMODE24 1 "register_operand" "0,0,<Yv_Yw>")
 	  (match_operand:DI 2 "nonmemory_operand" "yN,xN,<Yv_Yw>N")))]
   "TARGET_MMX || TARGET_MMX_WITH_SSE"
@@ -3608,14 +3879,14 @@
 
 (define_expand "ashr<mode>3"
   [(set (match_operand:MMXMODE24 0 "register_operand")
-        (ashiftrt:MMXMODE24
+	(ashiftrt:MMXMODE24
 	  (match_operand:MMXMODE24 1 "register_operand")
 	  (match_operand:DI 2 "nonmemory_operand")))]
   "TARGET_MMX_WITH_SSE")
 
 (define_insn "mmx_<insn><mode>3"
   [(set (match_operand:MMXMODE248 0 "register_operand" "=y,x,<Yv_Yw>")
-        (any_lshift:MMXMODE248
+	(any_lshift:MMXMODE248
 	  (match_operand:MMXMODE248 1 "register_operand" "0,0,<Yv_Yw>")
 	  (match_operand:DI 2 "nonmemory_operand" "yN,xN,<Yv_Yw>N")))]
   "TARGET_MMX || TARGET_MMX_WITH_SSE"
@@ -3645,14 +3916,14 @@
 
 (define_expand "<insn><mode>3"
   [(set (match_operand:MMXMODE24 0 "register_operand")
-        (any_lshift:MMXMODE24
+	(any_lshift:MMXMODE24
 	  (match_operand:MMXMODE24 1 "register_operand")
 	  (match_operand:DI 2 "nonmemory_operand")))]
   "TARGET_MMX_WITH_SSE")
 
 (define_insn "mmx_<insn>v1si3"
   [(set (match_operand:V1SI 0 "register_operand" "=x,Yw")
-        (any_lshift:V1SI
+	(any_lshift:V1SI
 	  (match_operand:V1SI 1 "register_operand" "0,Yw")
 	  (match_operand:DI 2 "nonmemory_operand" "xN,YwN")))]
   "TARGET_SSE2"
@@ -3669,7 +3940,7 @@
 
 (define_insn "<insn>v2hi3"
   [(set (match_operand:V2HI 0 "register_operand" "=x,Yw")
-        (any_shift:V2HI
+	(any_shift:V2HI
 	  (match_operand:V2HI 1 "register_operand" "0,Yw")
 	  (match_operand:DI 2 "nonmemory_operand" "xN,YwN")))]
   "TARGET_SSE2"
@@ -3730,7 +4001,7 @@
 
 (define_insn_and_split "<insn>v2qi3"
   [(set (match_operand:V2QI 0 "register_operand" "=Q")
-        (any_shift:V2QI
+	(any_shift:V2QI
 	  (match_operand:V2QI 1 "register_operand" "0")
 	  (match_operand:QI 2 "nonmemory_operand" "cI")))
    (clobber (reg:CC FLAGS_REG))]
@@ -3743,8 +4014,8 @@
 	   (subreg:HI
 	     (any_shift:QI
 	       (subreg:QI
-	         (zero_extract:HI (match_dup 4)
-			          (const_int 8)
+		 (zero_extract:HI (match_dup 4)
+				  (const_int 8)
 				  (const_int 8)) 0)
 	       (match_dup 2)) 0))
       (clobber (reg:CC FLAGS_REG))])
@@ -3857,7 +4128,7 @@
 
 (define_expand "mmx_eq<mode>3"
   [(set (match_operand:MMXMODEI 0 "register_operand")
-        (eq:MMXMODEI
+	(eq:MMXMODEI
 	  (match_operand:MMXMODEI 1 "register_mmxmem_operand")
 	  (match_operand:MMXMODEI 2 "register_mmxmem_operand")))]
   "TARGET_MMX || TARGET_MMX_WITH_SSE"
@@ -3865,7 +4136,7 @@
 
 (define_insn "*mmx_eq<mode>3"
   [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,x")
-        (eq:MMXMODEI
+	(eq:MMXMODEI
 	  (match_operand:MMXMODEI 1 "register_mmxmem_operand" "%0,0,x")
 	  (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,x")))]
   "(TARGET_MMX || TARGET_MMX_WITH_SSE)
@@ -3881,7 +4152,7 @@
 
 (define_insn "*eq<mode>3"
   [(set (match_operand:VI_16_32 0 "register_operand" "=x,x")
-        (eq:VI_16_32
+	(eq:VI_16_32
 	  (match_operand:VI_16_32 1 "register_operand" "%0,x")
 	  (match_operand:VI_16_32 2 "register_operand" "x,x")))]
   "TARGET_SSE2"
@@ -3894,7 +4165,7 @@
 
 (define_insn "mmx_gt<mode>3"
   [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,x")
-        (gt:MMXMODEI
+	(gt:MMXMODEI
 	  (match_operand:MMXMODEI 1 "register_operand" "0,0,x")
 	  (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,x")))]
   "TARGET_MMX || TARGET_MMX_WITH_SSE"
@@ -3909,7 +4180,7 @@
 
 (define_insn "*gt<mode>3"
   [(set (match_operand:VI_16_32 0 "register_operand" "=x,x")
-        (gt:VI_16_32
+	(gt:VI_16_32
 	  (match_operand:VI_16_32 1 "register_operand" "0,x")
 	  (match_operand:VI_16_32 2 "register_operand" "x,x")))]
   "TARGET_SSE2"
@@ -4189,10 +4460,10 @@
 ;; XOP parallel XMM conditional moves
 (define_insn "*xop_pcmov_<mode>"
   [(set (match_operand:MMXMODE124 0 "register_operand" "=x")
-        (if_then_else:MMXMODE124
-          (match_operand:MMXMODE124 3 "register_operand" "x")
-          (match_operand:MMXMODE124 1 "register_operand" "x")
-          (match_operand:MMXMODE124 2 "register_operand" "x")))]
+	(if_then_else:MMXMODE124
+	  (match_operand:MMXMODE124 3 "register_operand" "x")
+	  (match_operand:MMXMODE124 1 "register_operand" "x")
+	  (match_operand:MMXMODE124 2 "register_operand" "x")))]
   "TARGET_XOP && TARGET_MMX_WITH_SSE"
   "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "sse4arg")
@@ -4200,10 +4471,10 @@
 
 (define_insn "*xop_pcmov_<mode>"
   [(set (match_operand:V4F_64 0 "register_operand" "=x")
-        (if_then_else:V4F_64
-          (match_operand:V4F_64 3 "register_operand" "x")
-          (match_operand:V4F_64 1 "register_operand" "x")
-          (match_operand:V4F_64 2 "register_operand" "x")))]
+	(if_then_else:V4F_64
+	  (match_operand:V4F_64 3 "register_operand" "x")
+	  (match_operand:V4F_64 1 "register_operand" "x")
+	  (match_operand:V4F_64 2 "register_operand" "x")))]
   "TARGET_XOP && TARGET_MMX_WITH_SSE"
   "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "sse4arg")
@@ -4211,10 +4482,10 @@
 
 (define_insn "*xop_pcmov_<mode>"
   [(set (match_operand:VI_16_32 0 "register_operand" "=x")
-        (if_then_else:VI_16_32
-          (match_operand:VI_16_32 3 "register_operand" "x")
-          (match_operand:VI_16_32 1 "register_operand" "x")
-          (match_operand:VI_16_32 2 "register_operand" "x")))]
+	(if_then_else:VI_16_32
+	  (match_operand:VI_16_32 3 "register_operand" "x")
+	  (match_operand:VI_16_32 1 "register_operand" "x")
+	  (match_operand:VI_16_32 2 "register_operand" "x")))]
   "TARGET_XOP"
   "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "sse4arg")
@@ -4222,10 +4493,10 @@
 
 (define_insn "*xop_pcmov_<mode>"
   [(set (match_operand:V2F_32 0 "register_operand" "=x")
-        (if_then_else:V2F_32
-          (match_operand:V2F_32 3 "register_operand" "x")
-          (match_operand:V2F_32 1 "register_operand" "x")
-          (match_operand:V2F_32 2 "register_operand" "x")))]
+	(if_then_else:V2F_32
+	  (match_operand:V2F_32 3 "register_operand" "x")
+	  (match_operand:V2F_32 1 "register_operand" "x")
+	  (match_operand:V2F_32 2 "register_operand" "x")))]
   "TARGET_XOP"
   "vpcmov\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "sse4arg")
@@ -4307,6 +4578,13 @@
   operands[0] = lowpart_subreg (V16QImode, operands[0], <MODE>mode);
 })
 
+(define_expand "andn<mode>3"
+  [(set (match_operand:MMXMODEI 0 "register_operand")
+	(and:MMXMODEI
+	  (not:MMXMODEI (match_operand:MMXMODEI 2 "register_operand"))
+	  (match_operand:MMXMODEI 1 "register_operand")))]
+  "TARGET_MMX_WITH_SSE")
+
 (define_insn "mmx_andnot<mode>3"
   [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,x,v")
 	(and:MMXMODEI
@@ -4325,7 +4603,7 @@
 
 (define_insn "*andnot<mode>3"
   [(set (match_operand:VI_16_32 0 "register_operand" "=?&r,?r,x,x,v")
-        (and:VI_16_32
+	(and:VI_16_32
 	  (not:VI_16_32
 	    (match_operand:VI_16_32 1 "register_operand" "0,r,0,x,v"))
 	  (match_operand:VI_16_32 2 "register_operand" "r,r,x,x,v")))
@@ -4338,7 +4616,7 @@
 
 (define_split
   [(set (match_operand:VI_16_32 0 "general_reg_operand")
-        (and:VI_16_32
+	(and:VI_16_32
 	  (not:VI_16_32 (match_operand:VI_16_32 1 "general_reg_operand"))
 	  (match_operand:VI_16_32 2 "general_reg_operand")))
    (clobber (reg:CC FLAGS_REG))]
@@ -4355,13 +4633,13 @@
 
 (define_split
   [(set (match_operand:VI_16_32 0 "general_reg_operand")
-        (and:VI_16_32
+	(and:VI_16_32
 	  (not:VI_16_32 (match_operand:VI_16_32 1 "general_reg_operand"))
 	  (match_operand:VI_16_32 2 "general_reg_operand")))
    (clobber (reg:CC FLAGS_REG))]
   "!TARGET_BMI && reload_completed"
   [(set (match_dup 0)
-        (not:SI (match_dup 1)))
+	(not:SI (match_dup 1)))
    (parallel
      [(set (match_dup 0)
 	   (and:SI (match_dup 0) (match_dup 2)))
@@ -4374,7 +4652,7 @@
 
 (define_split
   [(set (match_operand:VI_16_32 0 "sse_reg_operand")
-        (and:VI_16_32
+	(and:VI_16_32
 	  (not:VI_16_32 (match_operand:VI_16_32 1 "sse_reg_operand"))
 	  (match_operand:VI_16_32 2 "sse_reg_operand")))
    (clobber (reg:CC FLAGS_REG))]
@@ -4404,7 +4682,7 @@
 
 (define_insn "*mmx_<code><mode>3"
   [(set (match_operand:MMXMODEI 0 "register_operand" "=y,x,x,v")
-        (any_logic:MMXMODEI
+	(any_logic:MMXMODEI
 	  (match_operand:MMXMODEI 1 "register_mmxmem_operand" "%0,0,x,v")
 	  (match_operand:MMXMODEI 2 "register_mmxmem_operand" "ym,x,x,v")))]
   "(TARGET_MMX || TARGET_MMX_WITH_SSE)
@@ -4421,7 +4699,7 @@
 
 (define_expand "<code><mode>3"
   [(set (match_operand:VI_16_32 0 "nonimmediate_operand")
-        (any_logic:VI_16_32
+	(any_logic:VI_16_32
 	  (match_operand:VI_16_32 1 "nonimmediate_operand")
 	  (match_operand:VI_16_32 2 "nonimmediate_or_x86_64_const_vector_operand")))]
   ""
@@ -4429,7 +4707,7 @@
 
 (define_insn "*<code><mode>3"
   [(set (match_operand:VI_16_32 0 "nonimmediate_operand" "=?r,m,x,x,v")
-        (any_logic:VI_16_32
+	(any_logic:VI_16_32
 	  (match_operand:VI_16_32 1 "nonimmediate_operand" "%0,0,0,x,v")
 	  (match_operand:VI_16_32 2 "nonimmediate_or_x86_64_const_vector_operand" "r,i,x,x,v")))
    (clobber (reg:CC FLAGS_REG))]
@@ -4441,7 +4719,7 @@
 
 (define_split
   [(set (match_operand:VI_16_32 0 "nonimmediate_gr_operand")
-        (any_logic:VI_16_32
+	(any_logic:VI_16_32
 	  (match_operand:VI_16_32 1 "nonimmediate_gr_operand")
 	  (match_operand:VI_16_32 2 "reg_or_const_vector_operand")))
    (clobber (reg:CC FLAGS_REG))]
@@ -4465,7 +4743,7 @@
 
 (define_split
   [(set (match_operand:VI_16_32 0 "sse_reg_operand")
-        (any_logic:VI_16_32
+	(any_logic:VI_16_32
 	  (match_operand:VI_16_32 1 "sse_reg_operand")
 	  (match_operand:VI_16_32 2 "sse_reg_operand")))
    (clobber (reg:CC FLAGS_REG))]
@@ -4566,10 +4844,10 @@
 	  (vec_concat:V16QI
 	    (match_operand:V8QI 1 "register_operand" "0,0,Yw")
 	    (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yw"))
-          (parallel [(const_int 4) (const_int 12)
-                     (const_int 5) (const_int 13)
-                     (const_int 6) (const_int 14)
-                     (const_int 7) (const_int 15)])))]
+	  (parallel [(const_int 4) (const_int 12)
+		     (const_int 5) (const_int 13)
+		     (const_int 6) (const_int 14)
+		     (const_int 7) (const_int 15)])))]
   "TARGET_MMX || TARGET_MMX_WITH_SSE"
   "@
    punpckhbw\t{%2, %0|%0, %2}
@@ -4589,8 +4867,8 @@
 	  (vec_concat:V8QI
 	    (match_operand:V4QI 1 "register_operand" "0,Yw")
 	    (match_operand:V4QI 2 "register_operand" "x,Yw"))
-          (parallel [(const_int 2) (const_int 6)
-                     (const_int 3) (const_int 7)])))]
+	  (parallel [(const_int 2) (const_int 6)
+		     (const_int 3) (const_int 7)])))]
   "TARGET_SSE2"
   "#"
   "&& reload_completed"
@@ -4606,10 +4884,10 @@
 	  (vec_concat:V16QI
 	    (match_operand:V8QI 1 "register_operand" "0,0,Yw")
 	    (match_operand:V8QI 2 "register_mmxmem_operand" "ym,x,Yw"))
-          (parallel [(const_int 0) (const_int 8)
-                     (const_int 1) (const_int 9)
-                     (const_int 2) (const_int 10)
-                     (const_int 3) (const_int 11)])))]
+	  (parallel [(const_int 0) (const_int 8)
+		     (const_int 1) (const_int 9)
+		     (const_int 2) (const_int 10)
+		     (const_int 3) (const_int 11)])))]
   "TARGET_MMX || TARGET_MMX_WITH_SSE"
   "@
    punpcklbw\t{%2, %0|%0, %k2}
@@ -4629,8 +4907,8 @@
 	  (vec_concat:V8QI
 	    (match_operand:V4QI 1 "register_operand" "0,Yw")
 	    (match_operand:V4QI 2 "register_operand" "x,Yw"))
-          (parallel [(const_int 0) (const_int 4)
-                     (const_int 1) (const_int 5)])))]
+	  (parallel [(const_int 0) (const_int 4)
+		     (const_int 1) (const_int 5)])))]
   "TARGET_SSE2"
   "#"
   "&& reload_completed"
@@ -4646,8 +4924,8 @@
 	  (vec_concat:V8HI
 	    (match_operand:V4HI 1 "register_operand" "0,0,Yw")
 	    (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw"))
-          (parallel [(const_int 2) (const_int 6)
-                     (const_int 3) (const_int 7)])))]
+	  (parallel [(const_int 2) (const_int 6)
+		     (const_int 3) (const_int 7)])))]
   "TARGET_MMX || TARGET_MMX_WITH_SSE"
   "@
    punpckhwd\t{%2, %0|%0, %2}
@@ -4667,8 +4945,8 @@
 	  (vec_concat:V8HI
 	    (match_operand:V4HI 1 "register_operand" "0,0,Yw")
 	    (match_operand:V4HI 2 "register_mmxmem_operand" "ym,x,Yw"))
-          (parallel [(const_int 0) (const_int 4)
-                     (const_int 1) (const_int 5)])))]
+	  (parallel [(const_int 0) (const_int 4)
+		     (const_int 1) (const_int 5)])))]
   "TARGET_MMX || TARGET_MMX_WITH_SSE"
   "@
    punpcklwd\t{%2, %0|%0, %k2}
@@ -4977,11 +5255,11 @@
 
 (define_insn "*mmx_pinsrd"
   [(set (match_operand:V2SI 0 "register_operand" "=x,Yv")
-        (vec_merge:V2SI
-          (vec_duplicate:V2SI
-            (match_operand:SI 2 "nonimmediate_operand" "jrjm,rm"))
+	(vec_merge:V2SI
+	  (vec_duplicate:V2SI
+	    (match_operand:SI 2 "nonimmediate_operand" "jrjm,rm"))
 	  (match_operand:V2SI 1 "register_operand" "0,Yv")
-          (match_operand:SI 3 "const_int_operand")))]
+	  (match_operand:SI 3 "const_int_operand")))]
   "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE
    && ((unsigned) exact_log2 (INTVAL (operands[3]))
        < GET_MODE_NUNITS (V2SImode))"
@@ -5007,11 +5285,11 @@
 
 (define_insn "*mmx_pinsrw"
   [(set (match_operand:V4FI_64 0 "register_operand" "=y,x,YW,&x")
-        (vec_merge:V4FI_64
-          (vec_duplicate:V4FI_64
-            (match_operand:<mmxscalarmode> 2 "nonimmediate_operand" "rm,rm,rm,x"))
+	(vec_merge:V4FI_64
+	  (vec_duplicate:V4FI_64
+	    (match_operand:<mmxscalarmode> 2 "nonimmediate_operand" "rm,rm,rm,x"))
 	  (match_operand:V4FI_64 1 "register_operand" "0,0,YW,x")
-          (match_operand:SI 3 "const_int_operand")))]
+	  (match_operand:SI 3 "const_int_operand")))]
   "(TARGET_MMX || TARGET_MMX_WITH_SSE)
    && (TARGET_SSE || TARGET_3DNOW_A)
    && ((unsigned) exact_log2 (INTVAL (operands[3]))
@@ -5062,11 +5340,11 @@
 
 (define_insn "*mmx_pinsrb"
   [(set (match_operand:V8QI 0 "register_operand" "=x,YW")
-        (vec_merge:V8QI
-          (vec_duplicate:V8QI
-            (match_operand:QI 2 "nonimmediate_operand" "jrjm,rm"))
+	(vec_merge:V8QI
+	  (vec_duplicate:V8QI
+	    (match_operand:QI 2 "nonimmediate_operand" "jrjm,rm"))
 	  (match_operand:V8QI 1 "register_operand" "0,YW")
-          (match_operand:SI 3 "const_int_operand")))]
+	  (match_operand:SI 3 "const_int_operand")))]
   "TARGET_SSE4_1 && TARGET_MMX_WITH_SSE
    && ((unsigned) exact_log2 (INTVAL (operands[3]))
        < GET_MODE_NUNITS (V8QImode))"
@@ -5257,12 +5535,12 @@
 
 (define_insn "mmx_pshufw<mode>_1"
   [(set (match_operand:V4FI_64 0 "register_operand" "=y,Yw")
-        (vec_select:V4FI_64
+	(vec_select:V4FI_64
 	  (match_operand:V4FI_64 1 "register_mmxmem_operand" "ym,Yw")
-          (parallel [(match_operand 2 "const_0_to_3_operand")
-                     (match_operand 3 "const_0_to_3_operand")
-                     (match_operand 4 "const_0_to_3_operand")
-                     (match_operand 5 "const_0_to_3_operand")])))]
+	  (parallel [(match_operand 2 "const_0_to_3_operand")
+		     (match_operand 3 "const_0_to_3_operand")
+		     (match_operand 4 "const_0_to_3_operand")
+		     (match_operand 5 "const_0_to_3_operand")])))]
   "(TARGET_MMX || TARGET_MMX_WITH_SSE)
    && (TARGET_SSE || TARGET_3DNOW_A)"
 {
@@ -5291,10 +5569,10 @@
 
 (define_insn "*mmx_pshufd_1"
   [(set (match_operand:V2SI 0 "register_operand" "=Yv")
-        (vec_select:V2SI
-          (match_operand:V2SI 1 "register_operand" "Yv")
-          (parallel [(match_operand 2 "const_0_to_1_operand")
-                     (match_operand 3 "const_0_to_1_operand")])))]
+	(vec_select:V2SI
+	  (match_operand:V2SI 1 "register_operand" "Yv")
+	  (parallel [(match_operand 2 "const_0_to_1_operand")
+		     (match_operand 3 "const_0_to_1_operand")])))]
   "TARGET_MMX_WITH_SSE"
 {
   int mask = 0;
@@ -5675,11 +5953,11 @@
 
 (define_insn "*pinsrw"
   [(set (match_operand:V2FI_32 0 "register_operand" "=x,YW,&x")
-        (vec_merge:V2FI_32
-          (vec_duplicate:V2FI_32
-            (match_operand:<mmxscalarmode> 2 "nonimmediate_operand" "rm,rm,x"))
+	(vec_merge:V2FI_32
+	  (vec_duplicate:V2FI_32
+	    (match_operand:<mmxscalarmode> 2 "nonimmediate_operand" "rm,rm,x"))
 	  (match_operand:V2FI_32 1 "register_operand" "0,YW,x")
-          (match_operand:SI 3 "const_int_operand")))]
+	  (match_operand:SI 3 "const_int_operand")))]
   "TARGET_SSE2
    && ((unsigned) exact_log2 (INTVAL (operands[3]))
        < GET_MODE_NUNITS (V2HImode))"
@@ -5728,11 +6006,11 @@
 
 (define_insn "*pinsrb"
   [(set (match_operand:V4QI 0 "register_operand" "=x,YW")
-        (vec_merge:V4QI
-          (vec_duplicate:V4QI
-            (match_operand:QI 2 "nonimmediate_operand" "jrjm,rm"))
+	(vec_merge:V4QI
+	  (vec_duplicate:V4QI
+	    (match_operand:QI 2 "nonimmediate_operand" "jrjm,rm"))
 	  (match_operand:V4QI 1 "register_operand" "0,YW")
-          (match_operand:SI 3 "const_int_operand")))]
+	  (match_operand:SI 3 "const_int_operand")))]
   "TARGET_SSE4_1
    && ((unsigned) exact_log2 (INTVAL (operands[3]))
        < GET_MODE_NUNITS (V4QImode))"
@@ -5923,10 +6201,10 @@
   [(set (match_dup 5)
 	(vec_select:<mmxxmmmode>
 	  (match_dup 5)
-          (parallel [(match_dup 3) (match_dup 4)
-                     (const_int 2) (const_int 3)
-                     (const_int 4) (const_int 5)
-                     (const_int 6) (const_int 7)])))]
+	  (parallel [(match_dup 3) (match_dup 4)
+		     (const_int 2) (const_int 3)
+		     (const_int 4) (const_int 5)
+		     (const_int 6) (const_int 7)])))]
 {
   rtx dest = lowpart_subreg (<mmxxmmmode>mode, operands[0], <MODE>mode);
   rtx op1 = lowpart_subreg (<mmxxmmmode>mode, operands[1], <MODE>mode);
@@ -5952,10 +6230,10 @@
 
 (define_insn "*pshufw<mode>_1"
   [(set (match_operand:V2FI_32 0 "register_operand" "=Yw")
-        (vec_select:V2FI_32
-          (match_operand:V2FI_32 1 "register_operand" "Yw")
-          (parallel [(match_operand 2 "const_0_to_1_operand")
-                     (match_operand 3 "const_0_to_1_operand")])))]
+	(vec_select:V2FI_32
+	  (match_operand:V2FI_32 1 "register_operand" "Yw")
+	  (parallel [(match_operand 2 "const_0_to_1_operand")
+		     (match_operand 3 "const_0_to_1_operand")])))]
   "TARGET_SSE2"
 {
   int mask = 0;
@@ -6344,7 +6622,7 @@
   DONE;
 })
 
-(define_expand "usdot_prodv8qi"
+(define_expand "usdot_prodv2siv8qi"
   [(match_operand:V2SI 0 "register_operand")
    (match_operand:V8QI 1 "register_operand")
    (match_operand:V8QI 2 "register_operand")
@@ -6363,7 +6641,7 @@
       rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode);
       rtx op0 = gen_reg_rtx (V4SImode);
 
-      emit_insn (gen_usdot_prodv16qi (op0, op1, op2, op3));
+      emit_insn (gen_usdot_prodv4siv16qi (op0, op1, op2, op3));
       emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
      }
    else
@@ -6377,7 +6655,7 @@
       emit_move_insn (op3, CONST0_RTX (V4SImode));
       emit_insn (gen_zero_extendv8qiv8hi2 (op1, operands[1]));
       emit_insn (gen_extendv8qiv8hi2 (op2, operands[2]));
-      emit_insn (gen_sdot_prodv8hi (op0, op1, op2, op3));
+      emit_insn (gen_sdot_prodv4siv8hi (op0, op1, op2, op3));
 
       /* vec_perm (op0, 2, 3, 0, 1);  */
       emit_insn (gen_sse2_pshufd (op0_1, op0, GEN_INT (78)));
@@ -6388,7 +6666,7 @@
     DONE;
 })
 
-(define_expand "sdot_prodv8qi"
+(define_expand "sdot_prodv2siv8qi"
   [(match_operand:V2SI 0 "register_operand")
    (match_operand:V8QI 1 "register_operand")
    (match_operand:V8QI 2 "register_operand")
@@ -6406,7 +6684,7 @@
       rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode);
       rtx op0 = gen_reg_rtx (V4SImode);
 
-      emit_insn (gen_sdot_prodv16qi (op0, op1, op2, op3));
+      emit_insn (gen_sdot_prodv4siv16qi (op0, op1, op2, op3));
       emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
     }
   else
@@ -6420,7 +6698,7 @@
       emit_move_insn (op3, CONST0_RTX (V4SImode));
       emit_insn (gen_extendv8qiv8hi2 (op1, operands[1]));
       emit_insn (gen_extendv8qiv8hi2 (op2, operands[2]));
-      emit_insn (gen_sdot_prodv8hi (op0, op1, op2, op3));
+      emit_insn (gen_sdot_prodv4siv8hi (op0, op1, op2, op3));
 
       /* vec_perm (op0, 2, 3, 0, 1);  */
       emit_insn (gen_sse2_pshufd (op0_1, op0, GEN_INT (78)));
@@ -6432,7 +6710,7 @@
 
 })
 
-(define_expand "udot_prodv8qi"
+(define_expand "udot_prodv2siv8qi"
   [(match_operand:V2SI 0 "register_operand")
    (match_operand:V8QI 1 "register_operand")
    (match_operand:V8QI 2 "register_operand")
@@ -6450,7 +6728,7 @@
       rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode);
       rtx op0 = gen_reg_rtx (V4SImode);
 
-      emit_insn (gen_udot_prodv16qi (op0, op1, op2, op3));
+      emit_insn (gen_udot_prodv4siv16qi (op0, op1, op2, op3));
       emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
     }
   else
@@ -6464,7 +6742,7 @@
       emit_move_insn (op3, CONST0_RTX (V4SImode));
       emit_insn (gen_zero_extendv8qiv8hi2 (op1, operands[1]));
       emit_insn (gen_zero_extendv8qiv8hi2 (op2, operands[2]));
-      emit_insn (gen_sdot_prodv8hi (op0, op1, op2, op3));
+      emit_insn (gen_sdot_prodv4siv8hi (op0, op1, op2, op3));
 
       /* vec_perm (op0, 2, 3, 0, 1);  */
       emit_insn (gen_sse2_pshufd (op0_1, op0, GEN_INT (78)));
@@ -6476,7 +6754,7 @@
 
 })
 
-(define_expand "usdot_prodv4hi"
+(define_expand "usdot_prodv2siv4hi"
   [(match_operand:V2SI 0 "register_operand")
    (match_operand:V4HI 1 "register_operand")
    (match_operand:V4HI 2 "register_operand")
@@ -6492,12 +6770,12 @@
   rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode);
   rtx op0 = gen_reg_rtx (V4SImode);
 
-  emit_insn (gen_usdot_prodv8hi (op0, op1, op2, op3));
+  emit_insn (gen_usdot_prodv4siv8hi (op0, op1, op2, op3));
   emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
   DONE;
 })
 
-(define_expand "udot_prodv4hi"
+(define_expand "udot_prodv2siv4hi"
   [(match_operand:V2SI 0 "register_operand")
    (match_operand:V4HI 1 "register_operand")
    (match_operand:V4HI 2 "register_operand")
@@ -6513,12 +6791,12 @@
   rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode);
   rtx op0 = gen_reg_rtx (V4SImode);
 
-  emit_insn (gen_udot_prodv8hi (op0, op1, op2, op3));
+  emit_insn (gen_udot_prodv4siv8hi (op0, op1, op2, op3));
   emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
   DONE;
 })
 
-(define_expand "sdot_prodv4hi"
+(define_expand "sdot_prodv2siv4hi"
   [(match_operand:V2SI 0 "register_operand")
    (match_operand:V4HI 1 "register_operand")
    (match_operand:V4HI 2 "register_operand")
@@ -6534,7 +6812,7 @@
   rtx op3 = lowpart_subreg (V4SImode, operands[3], V2SImode);
   rtx op0 = gen_reg_rtx (V4SImode);
 
-  emit_insn (gen_sdot_prodv8hi (op0, op1, op2, op3));
+  emit_insn (gen_sdot_prodv4siv8hi (op0, op1, op2, op3));
   emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
   DONE;
 })
@@ -6552,7 +6830,7 @@
   "TARGET_SSE2 && reload_completed
    && SSE_REGNO_P (REGNO (operands[1]))"
   [(set (match_dup 0)
-        (unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))
+	(unspec:SI [(match_dup 1)] UNSPEC_MOVMSK))
    (set (match_dup 0)
 	(zero_extend:SI (match_dup 2)))]
 {
@@ -6646,3 +6924,24 @@
   [(set_attr "type" "mmx")
    (set_attr "modrm" "0")
    (set_attr "memory" "none")])
+
+(define_insn "popcount<mode>2"
+  [(set (match_operand:VI1_16_32_64 0 "register_operand" "=v")
+	(popcount:VI1_16_32_64
+	  (match_operand:VI1_16_32_64 1 "register_operand" "v")))]
+  "TARGET_AVX512VL && TARGET_AVX512BITALG"
+  "vpopcntb\t{%1, %0|%0, %1}")
+
+(define_insn "popcount<mode>2"
+  [(set (match_operand:VI2_32_64 0 "register_operand" "=v")
+	(popcount:VI2_32_64
+	  (match_operand:VI2_32_64 1 "register_operand" "v")))]
+  "TARGET_AVX512VL && TARGET_AVX512BITALG"
+  "vpopcntw\t{%1, %0|%0, %1}")
+
+(define_insn "popcountv2si2"
+  [(set (match_operand:V2SI 0 "register_operand" "=v")
+	(popcount:V2SI
+	  (match_operand:V2SI 1 "register_operand" "v")))]
+  "TARGET_AVX512VPOPCNTDQ && TARGET_AVX512VL && TARGET_MMX_WITH_SSE"
+  "vpopcntd\t{%1, %0|%0, %1}")
diff --git a/gcc/config/i386/movdirintrin.h b/gcc/config/i386/movdirintrin.h
index 9655bf0..63c96c6 100644
--- a/gcc/config/i386/movdirintrin.h
+++ b/gcc/config/i386/movdirintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2018-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2018-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/movrsintrin.h b/gcc/config/i386/movrsintrin.h
new file mode 100644
index 0000000..9127442
--- /dev/null
+++ b/gcc/config/i386/movrsintrin.h
@@ -0,0 +1,442 @@
+/* Copyright (C) 2024-2025 Free Software Foundation, Inc.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#if !defined _IMMINTRIN_H_INCLUDED
+# error "Never use <movrsintrin.h> directly; include <immintrin.h> instead."
+#endif
+
+#ifndef _MOVRSINTRIN_H_INCLUDED
+#define _MOVRSINTRIN_H_INCLUDED
+
+#ifndef __MOVRS__
+#pragma GCC push_options
+#pragma GCC target("movrs")
+#define __DISABLE_MOVRS__
+#endif /* __MOVRS__ */
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_m_prefetchrs (void* __P)
+{
+  __builtin_ia32_prefetch (__P, 2, 1, 0 /* _MM_HINT_RST2 */);
+}
+
+#ifdef __x86_64__
+
+extern __inline char
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_movrs_i8 (void const * __P)
+{
+  return (char) __builtin_ia32_movrsqi ((const char *) __P);
+}
+
+extern __inline short
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_movrs_i16 (void const * __P)
+{
+  return (short) __builtin_ia32_movrshi ((const short *) __P);
+}
+
+extern __inline int
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_movrs_i32 (void const  * __P)
+{
+  return (int) __builtin_ia32_movrssi ((const int *) __P);
+}
+
+extern __inline long long
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_movrs_i64 (void const * __P)
+{
+  return (long long) __builtin_ia32_movrsdi ((const long long *) __P);
+}
+
+#endif /* __x86_64__ */
+
+#ifdef __DISABLE_MOVRS__
+#undef __DISABLE_MOVRS__
+#pragma GCC pop_options
+#endif /* __DISABLE_MOVRS__ */
+
+#ifdef __x86_64__
+
+#if !defined (__AVX10_2__) || !defined (__MOVRS__)
+#pragma GCC push_options
+#pragma GCC target("avx10.2,movrs")
+#define __DISABLE_MOVRS_AVX10_2__
+#endif /* __MOVRS_AVX10_2__ */
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_loadrs_epi8 (void const *__A)
+{
+  return (__m256i) __builtin_ia32_vmovrsb256_mask ((const __v32qi *) __A,
+						   (__v32qi)
+						   _mm256_setzero_si256 (),
+						   (__mmask32) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_loadrs_epi8 (__m256i __D, __mmask32 __U, void const *__A)
+{
+  return (__m256i) __builtin_ia32_vmovrsb256_mask ((const __v32qi *) __A,
+						   (__v32qi) __D,
+						   (__mmask32) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_loadrs_epi8 (__mmask32 __U, void const *__A)
+{
+  return (__m256i) __builtin_ia32_vmovrsb256_mask ((const __v32qi *) __A,
+						   (__v32qi)
+						   _mm256_setzero_si256 (),
+						   (__mmask32) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_loadrs_epi32 (void const *__A)
+{
+  return (__m256i) __builtin_ia32_vmovrsd256_mask ((const __v8si *) __A,
+						   (__v8si)
+						   _mm256_setzero_si256 (),
+						   (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_loadrs_epi32 (__m256i __D, __mmask8 __U, void const *__A)
+{
+  return (__m256i) __builtin_ia32_vmovrsd256_mask ((const __v8si *) __A,
+						   (__v8si) __D,
+						   (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_loadrs_epi32 (__mmask8 __U, void const *__A)
+{
+  return (__m256i) __builtin_ia32_vmovrsd256_mask ((const __v8si *) __A,
+						   (__v8si)
+						   _mm256_setzero_si256 (),
+						   (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_loadrs_epi64 (void const *__A)
+{
+  return (__m256i) __builtin_ia32_vmovrsq256_mask ((const __v4di *) __A,
+						   (__v4di)
+						   _mm256_setzero_si256 (),
+						   (__mmask8) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_loadrs_epi64 (__m256i __D, __mmask8 __U, void const *__A)
+{
+  return (__m256i) __builtin_ia32_vmovrsq256_mask ((const __v4di *) __A,
+						   (__v4di) __D,
+						   (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_loadrs_epi64 (__mmask8 __U, void const *__A)
+{
+  return (__m256i) __builtin_ia32_vmovrsq256_mask ((const __v4di *) __A,
+						   (__v4di)
+						   _mm256_setzero_si256 (),
+						   (__mmask8) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_loadrs_epi16 (void const *__A)
+{
+  return (__m256i) __builtin_ia32_vmovrsw256_mask ((const __v16hi *) __A,
+						   (__v16hi)
+						   _mm256_setzero_si256 (),
+						   (__mmask16) -1);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_mask_loadrs_epi16 (__m256i __D, __mmask16 __U, void const *__A)
+{
+  return (__m256i) __builtin_ia32_vmovrsw256_mask ((const __v16hi *) __A,
+						   (__v16hi) __D,
+						   (__mmask16) __U);
+}
+
+extern __inline __m256i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm256_maskz_loadrs_epi16 (__mmask16 __U, void const *__A)
+{
+  return (__m256i) __builtin_ia32_vmovrsw256_mask ((const __v16hi *) __A,
+						   (__v16hi)
+						   _mm256_setzero_si256 (),
+						   (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadrs_epi8 (void const *__A)
+{
+  return (__m128i) __builtin_ia32_vmovrsb128_mask ((const __v16qi *) __A,
+						   (__v16qi)
+						   _mm_setzero_si128 (),
+						   (__mmask16) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_loadrs_epi8 (__m128i __D, __mmask16 __U, void const *__A)
+{
+  return (__m128i) __builtin_ia32_vmovrsb128_mask ((const __v16qi *) __A,
+						   (__v16qi) __D,
+						   (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_loadrs_epi8 (__mmask16 __U, void const *__A)
+{
+  return (__m128i) __builtin_ia32_vmovrsb128_mask ((const __v16qi *) __A,
+						   (__v16qi)
+						   _mm_setzero_si128 (),
+						   (__mmask16) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadrs_epi32 (void const *__A)
+{
+  return (__m128i) __builtin_ia32_vmovrsd128_mask ((const __v4si *) __A,
+						   (__v4si)
+						   _mm_setzero_si128 (),
+						   (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_loadrs_epi32 (__m128i __D, __mmask8 __U, void const *__A)
+{
+  return (__m128i) __builtin_ia32_vmovrsd128_mask ((const __v4si *) __A,
+						   (__v4si) __D,
+						   (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_loadrs_epi32 (__mmask8 __U, void const *__A)
+{
+  return (__m128i) __builtin_ia32_vmovrsd128_mask ((const __v4si *) __A,
+						   (__v4si)
+						   _mm_setzero_si128 (),
+						   (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadrs_epi64 (void const *__A)
+{
+  return (__m128i) __builtin_ia32_vmovrsq128_mask ((const __v2di *) __A,
+						   (__v2di)
+						   _mm_setzero_si128 (),
+						   (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_loadrs_epi64 (__m128i __D, __mmask8 __U, void const *__A)
+{
+  return (__m128i) __builtin_ia32_vmovrsq128_mask ((const __v2di *) __A,
+						   (__v2di) __D,
+						   (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_loadrs_epi64 (__mmask8 __U, void const *__A)
+{
+  return (__m128i) __builtin_ia32_vmovrsq128_mask ((const __v2di *) __A,
+						   (__v2di)
+						   _mm_setzero_si128 (),
+						   (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_loadrs_epi16 (void const *__A)
+{
+  return (__m128i) __builtin_ia32_vmovrsw128_mask ((const __v8hi *) __A,
+						   (__v8hi)
+						   _mm_setzero_si128 (),
+						   (__mmask8) -1);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_mask_loadrs_epi16 (__m128i __D, __mmask8 __U, void const *__A)
+{
+  return (__m128i) __builtin_ia32_vmovrsw128_mask ((const __v8hi *) __A,
+						   (__v8hi) __D,
+						   (__mmask8) __U);
+}
+
+extern __inline __m128i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm_maskz_loadrs_epi16 (__mmask8 __U, void const *__A)
+{
+  return (__m128i) __builtin_ia32_vmovrsw128_mask ((const __v8hi *) __A,
+						   (__v8hi)
+						   _mm_setzero_si128 (),
+						   (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_loadrs_epi8 (void const *__A)
+{
+  return (__m512i) __builtin_ia32_vmovrsb512_mask ((const __v64qi *) __A,
+						   (__v64qi)
+						   _mm512_setzero_si512 (),
+						   (__mmask64) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_loadrs_epi8 (__m512i __D, __mmask64 __U, void const *__A)
+{
+  return (__m512i) __builtin_ia32_vmovrsb512_mask ((const __v64qi *) __A,
+						   (__v64qi) __D,
+						   (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_loadrs_epi8 (__mmask64 __U, void const *__A)
+{
+  return (__m512i) __builtin_ia32_vmovrsb512_mask ((const __v64qi *) __A,
+						   (__v64qi)
+						   _mm512_setzero_si512 (),
+						   (__mmask64) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_loadrs_epi32 (void const *__A)
+{
+  return (__m512i) __builtin_ia32_vmovrsd512_mask ((const __v16si *) __A,
+						   (__v16si) _mm512_setzero_si512 (),
+						   (__mmask16) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_loadrs_epi32 (__m512i __D, __mmask16 __U, void const *__A)
+{
+  return (__m512i) __builtin_ia32_vmovrsd512_mask ((const __v16si *) __A,
+						   (__v16si) __D,
+						   (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_loadrs_epi32 (__mmask16 __U, void const *__A)
+{
+  return (__m512i) __builtin_ia32_vmovrsd512_mask ((const __v16si *) __A,
+						   (__v16si)
+						   _mm512_setzero_si512 (),
+						   (__mmask16) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_loadrs_epi64 (void const *__A)
+{
+  return (__m512i) __builtin_ia32_vmovrsq512_mask ((const __v8di *) __A,
+						   (__v8di)
+						   _mm512_setzero_si512 (),
+						   (__mmask8) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_loadrs_epi64 (__m512i __D, __mmask8 __U, void const *__A)
+{
+  return (__m512i) __builtin_ia32_vmovrsq512_mask ((const __v8di *) __A,
+						   (__v8di) __D,
+						   (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_loadrs_epi64 (__mmask8 __U, void const *__A)
+{
+  return (__m512i) __builtin_ia32_vmovrsq512_mask ((const __v8di *) __A,
+						   (__v8di)
+						   _mm512_setzero_si512 (),
+						   (__mmask8) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_loadrs_epi16 (void const *__A)
+{
+  return (__m512i) __builtin_ia32_vmovrsw512_mask ((const __v32hi *) __A,
+						   (__v32hi)
+						   _mm512_setzero_si512 (),
+						   (__mmask32) -1);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_mask_loadrs_epi16 (__m512i __D, __mmask32 __U, void const *__A)
+{
+  return (__m512i) __builtin_ia32_vmovrsw512_mask ((const __v32hi *) __A,
+						   (__v32hi) __D,
+						   (__mmask32) __U);
+}
+
+extern __inline __m512i
+__attribute__ ((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_maskz_loadrs_epi16 (__mmask32 __U, void const *__A)
+{
+  return (__m512i) __builtin_ia32_vmovrsw512_mask ((const __v32hi *) __A,
+						   (__v32hi)
+						   _mm512_setzero_si512 (),
+						   (__mmask32) __U);
+}
+
+#ifdef __DISABLE_MOVRS_AVX10_2__
+#undef __DISABLE_MOVRS_AVX10_2__
+#pragma GCC pop_options
+#endif /* __DISABLE_MOVRS_AVX10_2__ */
+
+#endif /* __x86_64__ */
+
+#endif /* _MOVRSINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/mwaitintrin.h b/gcc/config/i386/mwaitintrin.h
index a0e9bb2..211397e 100644
--- a/gcc/config/i386/mwaitintrin.h
+++ b/gcc/config/i386/mwaitintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2021-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2021-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/mwaitxintrin.h b/gcc/config/i386/mwaitxintrin.h
index c9d1af4..6265554 100644
--- a/gcc/config/i386/mwaitxintrin.h
+++ b/gcc/config/i386/mwaitxintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2012-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2012-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/netbsd-elf.h b/gcc/config/i386/netbsd-elf.h
index 236a5b9..80d5a92 100644
--- a/gcc/config/i386/netbsd-elf.h
+++ b/gcc/config/i386/netbsd-elf.h
@@ -1,6 +1,6 @@
 /* Definitions of target machine for GCC,
    for i386/ELF NetBSD systems.
-   Copyright (C) 2001-2024 Free Software Foundation, Inc.
+   Copyright (C) 2001-2025 Free Software Foundation, Inc.
    Contributed by matthew green <mrg@eterna.com.au>
 
 This file is part of GCC.
diff --git a/gcc/config/i386/netbsd64.h b/gcc/config/i386/netbsd64.h
index 221ef97..fdcde39 100644
--- a/gcc/config/i386/netbsd64.h
+++ b/gcc/config/i386/netbsd64.h
@@ -1,6 +1,6 @@
 /* Definitions of target machine for GCC,
    for x86-64/ELF NetBSD systems.
-   Copyright (C) 2002-2024 Free Software Foundation, Inc.
+   Copyright (C) 2002-2025 Free Software Foundation, Inc.
    Contributed by Wasabi Systems, Inc.
 
 This file is part of GCC.
diff --git a/gcc/config/i386/nmmintrin.h b/gcc/config/i386/nmmintrin.h
index 3747f80..0189350 100644
--- a/gcc/config/i386/nmmintrin.h
+++ b/gcc/config/i386/nmmintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2007-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/nto.h b/gcc/config/i386/nto.h
index e5c8dec..64990d1 100644
--- a/gcc/config/i386/nto.h
+++ b/gcc/config/i386/nto.h
@@ -1,5 +1,5 @@
 /* Definitions for Intel 386 running QNX/Neutrino.
-   Copyright (C) 2002-2024 Free Software Foundation, Inc.
+   Copyright (C) 2002-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/i386/nto.opt b/gcc/config/i386/nto.opt
index 791cdfd..ca06cc0 100644
--- a/gcc/config/i386/nto.opt
+++ b/gcc/config/i386/nto.opt
@@ -1,6 +1,6 @@
 ; QNX options.
 
-; Copyright (C) 2011-2024 Free Software Foundation, Inc.
+; Copyright (C) 2011-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/i386/openbsdelf.h b/gcc/config/i386/openbsdelf.h
index 3b7c27b..bd63226 100644
--- a/gcc/config/i386/openbsdelf.h
+++ b/gcc/config/i386/openbsdelf.h
@@ -1,6 +1,6 @@
 /* Configuration for an OpenBSD i386 target.
-   
-   Copyright (C) 2005-2024 Free Software Foundation, Inc.
+
+   Copyright (C) 2005-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -67,7 +67,7 @@ along with GCC; see the file COPYING3.  If not see
    The icky part is not here, but in <machine/profile.h>.  */
 #undef FUNCTION_PROFILER
 #define FUNCTION_PROFILER(FILE, LABELNO)  \
-  fputs (flag_pic ? "\tcall __mcount@PLT\n": "\tcall __mcount\n", FILE);
+  fputs (flag_pic ? "\tcall __mcount@PLT\n" : "\tcall __mcount\n", FILE);
 
 #undef LINK_SPEC
 #define LINK_SPEC \
diff --git a/gcc/config/i386/pconfigintrin.h b/gcc/config/i386/pconfigintrin.h
index e68f43c..5d102bc 100644
--- a/gcc/config/i386/pconfigintrin.h
+++ b/gcc/config/i386/pconfigintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2018-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2018-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/pentium.md b/gcc/config/i386/pentium.md
index ec3dfb2..5c21d92 100644
--- a/gcc/config/i386/pentium.md
+++ b/gcc/config/i386/pentium.md
@@ -1,5 +1,5 @@
 ;; Pentium Scheduling
-;; Copyright (C) 2002-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2002-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/i386/pkuintrin.h b/gcc/config/i386/pkuintrin.h
index a20f1da..b541bb4 100644
--- a/gcc/config/i386/pkuintrin.h
+++ b/gcc/config/i386/pkuintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2015-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2015-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/pmm_malloc.h b/gcc/config/i386/pmm_malloc.h
index 32e3aa2..d1d79a3 100644
--- a/gcc/config/i386/pmm_malloc.h
+++ b/gcc/config/i386/pmm_malloc.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2004-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2004-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/pmmintrin.h b/gcc/config/i386/pmmintrin.h
index 33878616..1857173 100644
--- a/gcc/config/i386/pmmintrin.h
+++ b/gcc/config/i386/pmmintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2003-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2003-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/popcntintrin.h b/gcc/config/i386/popcntintrin.h
index e7c620f..95981ba 100644
--- a/gcc/config/i386/popcntintrin.h
+++ b/gcc/config/i386/popcntintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2009-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2009-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/ppro.md b/gcc/config/i386/ppro.md
index 43632dc..bf0d346 100644
--- a/gcc/config/i386/ppro.md
+++ b/gcc/config/i386/ppro.md
@@ -1,5 +1,5 @@
 ;; Scheduling for the Intel P6 family of processors
-;; Copyright (C) 2004-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2004-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index ab6a2e1..3afaf83 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -1,5 +1,5 @@
 ;; Predicate definitions for IA-32 and x86-64.
-;; Copyright (C) 2004-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2004-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
@@ -218,6 +218,7 @@
 	  case UNSPEC_DTPOFF:
 	  case UNSPEC_GOTNTPOFF:
 	  case UNSPEC_NTPOFF:
+	  case UNSPEC_SECREL32:
 	    return true;
 	  default:
 	    break;
@@ -392,6 +393,23 @@
   return false;
 })
 
+;; Return true if VALUE is a constant integer whose negation satisfies
+;; x86_64_immediate_operand.
+(define_predicate "x86_64_neg_const_int_operand"
+  (match_code "const_int")
+{
+  HOST_WIDE_INT val = -UINTVAL (op);
+  if (mode == DImode && trunc_int_for_mode (val, SImode) != val)
+    return false;
+  if (flag_cf_protection & CF_BRANCH)
+    {
+      unsigned HOST_WIDE_INT endbr = TARGET_64BIT ? 0xfa1e0ff3 : 0xfb1e0ff3;
+      if ((val & HOST_WIDE_INT_C (0xffffffff)) == endbr)
+	return false;
+    }
+  return true;
+})
+
 ;; Return true if VALUE is a constant integer whose low and high words satisfy
 ;; x86_64_immediate_operand.
 (define_predicate "x86_64_hilo_int_operand"
@@ -646,8 +664,9 @@
 ;; same segment as the GOT.  Unfortunately, the flexibility of linker
 ;; scripts means that we can't be sure of that in general, so assume
 ;; @GOTOFF is not valid on VxWorks, except with the large code model.
+;; The comments above seem to apply only to VxWorks releases before 7.
 (define_predicate "gotoff_operand"
-  (and (ior (not (match_test "TARGET_VXWORKS_RTP"))
+  (and (ior (not (match_test "TARGET_VXWORKS_VAROFF"))
             (match_test "ix86_cmodel == CM_LARGE")
             (match_test "ix86_cmodel == CM_LARGE_PIC"))
        (match_operand 0 "local_symbolic_operand")))
@@ -670,7 +689,9 @@
   (match_code "symbol_ref")
 {
   if (ix86_cmodel == CM_LARGE || ix86_cmodel == CM_LARGE_PIC
-      || flag_force_indirect_call)
+      || flag_force_indirect_call
+      || (TARGET_INDIRECT_BRANCH_REGISTER
+          && ix86_nopic_noplt_attribute_p (op)))
     return false;
   if (TARGET_DLLIMPORT_DECL_ATTRIBUTES && SYMBOL_REF_DLLIMPORT_P (op))
     return false;
@@ -779,22 +800,14 @@
   (ior (match_test "constant_call_address_operand
 		     (op, mode == VOIDmode ? mode : Pmode)")
        (match_operand 0 "call_register_operand")
-       (and (not (match_test "TARGET_INDIRECT_BRANCH_REGISTER"))
-	    (ior (and (not (match_test "TARGET_X32"))
-		      (match_operand 0 "memory_operand"))
-		 (and (match_test "TARGET_X32 && Pmode == DImode")
-		      (match_operand 0 "GOT_memory_operand"))))))
+       (match_test "satisfies_constraint_Bw (op)")))
 
 ;; Similarly, but for tail calls, in which we cannot allow memory references.
 (define_special_predicate "sibcall_insn_operand"
   (ior (match_test "constant_call_address_operand
 		     (op, mode == VOIDmode ? mode : Pmode)")
        (match_operand 0 "register_no_elim_operand")
-       (and (not (match_test "TARGET_INDIRECT_BRANCH_REGISTER"))
-	    (ior (and (not (match_test "TARGET_X32"))
-		      (match_operand 0 "sibcall_memory_operand"))
-		 (and (match_test "TARGET_X32 && Pmode == DImode")
-		      (match_operand 0 "GOT_memory_operand"))))))
+       (match_test "satisfies_constraint_Bs (op)")))
 
 ;; Return true if OP is a 32-bit GOT symbol operand.
 (define_predicate "GOT32_symbol_operand"
@@ -1273,12 +1286,19 @@
        (match_operand 0 "vector_memory_operand")
        (match_code "const_vector")))
 
+; Return true when OP is register_operand, vector_memory_operand,
+; const_vector zero or const_vector all ones.
+(define_predicate "vector_or_0_or_1s_operand"
+  (ior (match_operand 0 "register_operand")
+       (match_operand 0 "vector_memory_operand")
+       (match_operand 0 "const0_operand")
+       (match_operand 0 "int_float_vector_all_ones_operand")))
+
 (define_predicate "bcst_mem_operand"
   (and (match_code "vec_duplicate")
        (and (match_test "TARGET_AVX512F")
 	    (ior (match_test "TARGET_AVX512VL")
-		 (and (match_test "GET_MODE_SIZE (GET_MODE (op)) == 64")
-		      (match_test "TARGET_EVEX512"))))
+		 (match_test "GET_MODE_SIZE (GET_MODE (op)) == 64")))
        (match_test "VALID_BCST_MODE_P (GET_MODE_INNER (GET_MODE (op)))")
        (match_test "GET_MODE (XEXP (op, 0))
 		    == GET_MODE_INNER (GET_MODE (op))")
@@ -1339,6 +1359,12 @@
   (ior (match_operand 0 "nonimmediate_operand")
        (match_operand 0 "const0_operand")))
 
+; Return true when OP is a nonimmediate or zero or all ones.
+(define_predicate "nonimm_or_0_or_1s_operand"
+  (ior (match_operand 0 "nonimmediate_operand")
+       (match_operand 0 "const0_operand")
+       (match_operand 0 "int_float_vector_all_ones_operand")))
+
 ;; Return true for RTX codes that force SImode address.
 (define_predicate "SImode_address_operand"
   (match_code "subreg,zero_extend,and"))
@@ -1633,7 +1659,13 @@
 })
 
 ;; Return true if this comparison only requires testing one flag bit.
+;; VCOMX/VUCOMX set ZF, SF, OF, differently from COMI/UCOMI.
 (define_predicate "ix86_trivial_fp_comparison_operator"
+  (if_then_else (match_test "TARGET_AVX10_2")
+		(match_code "gt,ge,unlt,unle,eq,uneq,ne,ltgt,ordered,unordered")
+		(match_code "gt,ge,unlt,unle,uneq,ltgt,ordered,unordered")))
+
+(define_predicate "ix86_trivial_fp_comparison_operator_xf"
   (match_code "gt,ge,unlt,unle,uneq,ltgt,ordered,unordered"))
 
 ;; Return true if we know how to do this comparison.  Others require
@@ -1645,6 +1677,12 @@
                (match_operand 0 "comparison_operator")
                (match_operand 0 "ix86_trivial_fp_comparison_operator")))
 
+(define_predicate "ix86_fp_comparison_operator_xf"
+  (if_then_else (match_test "ix86_fp_comparison_strategy (GET_CODE (op))
+                             == IX86_FPCMP_ARITH")
+               (match_operand 0 "comparison_operator")
+               (match_operand 0 "ix86_trivial_fp_comparison_operator_xf")))
+
 ;; Return true if we can perform this comparison on TImode operands.
 (define_predicate "ix86_timode_comparison_operator"
   (if_then_else (match_test "TARGET_64BIT")
diff --git a/gcc/config/i386/prfchiintrin.h b/gcc/config/i386/prfchiintrin.h
index d6580e5..5ecaebb 100644
--- a/gcc/config/i386/prfchiintrin.h
+++ b/gcc/config/i386/prfchiintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2022-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2022-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/prfchwintrin.h b/gcc/config/i386/prfchwintrin.h
index b7dbebe..fa00e6c 100644
--- a/gcc/config/i386/prfchwintrin.h
+++ b/gcc/config/i386/prfchwintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2012-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2012-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/raointintrin.h b/gcc/config/i386/raointintrin.h
index 2143621..83fa33e 100644
--- a/gcc/config/i386/raointintrin.h
+++ b/gcc/config/i386/raointintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2019-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2019-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/rdos.h b/gcc/config/i386/rdos.h
index 2c5b4e4..7ffddf0 100644
--- a/gcc/config/i386/rdos.h
+++ b/gcc/config/i386/rdos.h
@@ -1,5 +1,5 @@
 /* Definitions for RDOS on i386.
-   Copyright (C) 2013-2024 Free Software Foundation, Inc.
+   Copyright (C) 2013-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/i386/rdos64.h b/gcc/config/i386/rdos64.h
index 80eee9f..538121d 100644
--- a/gcc/config/i386/rdos64.h
+++ b/gcc/config/i386/rdos64.h
@@ -1,5 +1,5 @@
 /* Definitions for RDOS on x86_64.
-   Copyright (C) 2013-2024 Free Software Foundation, Inc.
+   Copyright (C) 2013-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/i386/rdseedintrin.h b/gcc/config/i386/rdseedintrin.h
index 808ba8e..f31e86d 100644
--- a/gcc/config/i386/rdseedintrin.h
+++ b/gcc/config/i386/rdseedintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2012-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2012-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/rtemself.h b/gcc/config/i386/rtemself.h
index 43d2b87..7342581 100644
--- a/gcc/config/i386/rtemself.h
+++ b/gcc/config/i386/rtemself.h
@@ -1,5 +1,5 @@
 /* Definitions for rtems targeting an ix86 using ELF.
-   Copyright (C) 1996-2024 Free Software Foundation, Inc.
+   Copyright (C) 1996-2025 Free Software Foundation, Inc.
    Contributed by Joel Sherrill (joel@OARcorp.com).
 
    This file is part of GCC.
diff --git a/gcc/config/i386/rtmintrin.h b/gcc/config/i386/rtmintrin.h
index 2c3fc71..40a2756 100644
--- a/gcc/config/i386/rtmintrin.h
+++ b/gcc/config/i386/rtmintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2012-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2012-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/serializeintrin.h b/gcc/config/i386/serializeintrin.h
index 951757a..5ca599c 100644
--- a/gcc/config/i386/serializeintrin.h
+++ b/gcc/config/i386/serializeintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2018-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2018-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/sgxintrin.h b/gcc/config/i386/sgxintrin.h
index 23cd592..debc85c 100644
--- a/gcc/config/i386/sgxintrin.h
+++ b/gcc/config/i386/sgxintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2017-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2017-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/sha512intrin.h b/gcc/config/i386/sha512intrin.h
index b6816b7..bf08484 100644
--- a/gcc/config/i386/sha512intrin.h
+++ b/gcc/config/i386/sha512intrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2023-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2023-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/shaintrin.h b/gcc/config/i386/shaintrin.h
index c122988..d702cdd 100644
--- a/gcc/config/i386/shaintrin.h
+++ b/gcc/config/i386/shaintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2013-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2013-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/slm.md b/gcc/config/i386/slm.md
index e9badbb..a0f86d5 100644
--- a/gcc/config/i386/slm.md
+++ b/gcc/config/i386/slm.md
@@ -1,5 +1,5 @@
 ;; Slivermont(SLM) Scheduling
-;; Copyright (C) 2009-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2009-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/i386/sm3intrin.h b/gcc/config/i386/sm3intrin.h
index 7f7fc1e..abb7344 100644
--- a/gcc/config/i386/sm3intrin.h
+++ b/gcc/config/i386/sm3intrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2023-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2023-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/sm4intrin.h b/gcc/config/i386/sm4intrin.h
index 4c212cc..af7a1c3 100644
--- a/gcc/config/i386/sm4intrin.h
+++ b/gcc/config/i386/sm4intrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2023-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2023-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -67,4 +67,29 @@ _mm256_sm4rnds4_epi32 (__m256i __A, __m256i __B)
 #pragma GCC pop_options
 #endif /* __DISABLE_SM4__ */
 
+#if !defined (__SM4__) || !defined (__AVX10_2__)
+#pragma GCC push_options
+#pragma GCC target("sm4,avx10.2")
+#define __DISABLE_SM4_512__
+#endif /* __SM4_512__ */
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sm4key4_epi32 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vsm4key4512 ((__v16si) __A, (__v16si) __B);
+}
+
+extern __inline __m512i
+__attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_mm512_sm4rnds4_epi32 (__m512i __A, __m512i __B)
+{
+  return (__m512i) __builtin_ia32_vsm4rnds4512 ((__v16si) __A, (__v16si) __B);
+}
+
+#ifdef __DISABLE_SM4_512__
+#undef __DISABLE_SM4_512__
+#pragma GCC pop_options
+#endif /* __DISABLE_SM4_512__ */
+
 #endif /* _SM4INTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/smmintrin.h b/gcc/config/i386/smmintrin.h
index 4c315fe..5f815c9 100644
--- a/gcc/config/i386/smmintrin.h
+++ b/gcc/config/i386/smmintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2007-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -385,7 +385,7 @@ _mm_extract_ps (__m128 __X, const int __N)
    by index N.  */
 #define _MM_EXTRACT_FLOAT(D, S, N) \
   { (D) = __builtin_ia32_vec_ext_v4sf ((__v4sf)(S), (N)); }
-  
+
 /* Extract specified single precision float element into the lower
    part of __m128.  */
 #define _MM_PICK_OUT_PS(X, N)				\
diff --git a/gcc/config/i386/sol2.h b/gcc/config/i386/sol2.h
index b93bc4c..013e87f 100644
--- a/gcc/config/i386/sol2.h
+++ b/gcc/config/i386/sol2.h
@@ -1,5 +1,5 @@
 /* Target definitions for GCC for Intel 80386 running Solaris 2
-   Copyright (C) 1993-2024 Free Software Foundation, Inc.
+   Copyright (C) 1993-2025 Free Software Foundation, Inc.
    Contributed by Fred Fish (fnf@cygnus.com).
 
 This file is part of GCC.
@@ -80,7 +80,7 @@ along with GCC; see the file COPYING3.  If not see
 #define ASM_CPU_SPEC "%(asm_cpu_default) " ASM_XBRACE_COMMENT_SPEC
 
 /* Don't include ASM_PIC_SPEC.  While the Solaris 10+ assembler accepts -K PIC,
-   it gives many warnings: 
+   it gives many warnings:
 	Absolute relocation is used for symbol "<symbol>"
    GNU as doesn't recognize -K at all.  */
 #undef ASM_SPEC
@@ -179,7 +179,7 @@ along with GCC; see the file COPYING3.  If not see
 	  && (DECL) && DECL_SIZE (DECL))			\
 	{							\
 	  size_directive_output = 1;				\
-	  size = int_size_in_bytes (TREE_TYPE (DECL));		\
+	  size = tree_to_uhwi (DECL_SIZE_UNIT (DECL));		\
 	  ASM_OUTPUT_SIZE_DIRECTIVE (FILE, NAME, size);		\
 	}							\
 								\
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index da91d39..252ba07 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1,5 +1,5 @@
 ;; GCC machine description for SSE instructions
-;; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
@@ -220,37 +220,39 @@
   UNSPEC_VCVTBIASPH2BF8S
   UNSPEC_VCVTBIASPH2HF8
   UNSPEC_VCVTBIASPH2HF8S
-  UNSPEC_VCVTNE2PH2BF8
-  UNSPEC_VCVTNE2PH2BF8S
-  UNSPEC_VCVTNE2PH2HF8
-  UNSPEC_VCVTNE2PH2HF8S
-  UNSPEC_VCVTNEPH2BF8
-  UNSPEC_VCVTNEPH2BF8S
-  UNSPEC_VCVTNEPH2HF8
-  UNSPEC_VCVTNEPH2HF8S
+  UNSPEC_VCVT2PH2BF8
+  UNSPEC_VCVT2PH2BF8S
+  UNSPEC_VCVT2PH2HF8
+  UNSPEC_VCVT2PH2HF8S
+  UNSPEC_VCVTPH2BF8
+  UNSPEC_VCVTPH2BF8S
+  UNSPEC_VCVTPH2HF8
+  UNSPEC_VCVTPH2HF8S
   UNSPEC_VCVTHF82PH
-  UNSPEC_VSCALEFPBF16
-  UNSPEC_VRNDSCALENEPBF16
-  UNSPEC_VREDUCENEPBF16
-  UNSPEC_VGETMANTPBF16
-  UNSPEC_VFPCLASSPBF16
-  UNSPEC_VCOMSBF16
-  UNSPEC_VCVTNEBF162IBS
-  UNSPEC_VCVTNEBF162IUBS
+  UNSPEC_VSCALEFBF16
+  UNSPEC_VRNDSCALEBF16
+  UNSPEC_VREDUCEBF16
+  UNSPEC_VGETMANTBF16
+  UNSPEC_VFPCLASSBF16
+  UNSPEC_VCVTBF162IBS
+  UNSPEC_VCVTBF162IUBS
   UNSPEC_VCVTPH2IBS
   UNSPEC_VCVTPH2IUBS
   UNSPEC_VCVTPS2IBS
   UNSPEC_VCVTPS2IUBS
-  UNSPEC_VCVTTNEBF162IBS
-  UNSPEC_VCVTTNEBF162IUBS
+  UNSPEC_VCVTTBF162IBS
+  UNSPEC_VCVTTBF162IUBS
   UNSPEC_VCVTTPH2IBS
   UNSPEC_VCVTTPH2IUBS
   UNSPEC_VCVTTPS2IBS
   UNSPEC_VCVTTPS2IUBS
   UNSPEC_SFIX_SATURATION
   UNSPEC_UFIX_SATURATION
-  UNSPEC_MINMAXNEPBF16
+  UNSPEC_MINMAXBF16
   UNSPEC_MINMAX
+
+  ;; For MOVRS suppport
+  UNSPEC_VMOVRS
 ])
 
 (define_c_enum "unspecv" [
@@ -277,63 +279,63 @@
 
 ;; All vector modes including V?TImode, used in move patterns.
 (define_mode_iterator VMOVE
-  [(V64QI "TARGET_AVX512F && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI
-   (V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI
-   (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX") V4SI
-   (V8DI "TARGET_AVX512F && TARGET_EVEX512")  (V4DI "TARGET_AVX") V2DI
-   (V4TI "TARGET_AVX512F && TARGET_EVEX512") (V2TI "TARGET_AVX") V1TI
-   (V32HF "TARGET_AVX512F && TARGET_EVEX512") (V16HF "TARGET_AVX") V8HF
-   (V32BF "TARGET_AVX512F && TARGET_EVEX512") (V16BF "TARGET_AVX") V8BF
-   (V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF
-   (V8DF "TARGET_AVX512F && TARGET_EVEX512")  (V4DF "TARGET_AVX") V2DF])
+  [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
+   (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
+   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
+   (V8DI "TARGET_AVX512F")  (V4DI "TARGET_AVX") V2DI
+   (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX") V1TI
+   (V32HF "TARGET_AVX512F") (V16HF "TARGET_AVX") V8HF
+   (V32BF "TARGET_AVX512F") (V16BF "TARGET_AVX") V8BF
+   (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+   (V8DF "TARGET_AVX512F")  (V4DF "TARGET_AVX") V2DF])
 
 ;; All AVX-512{F,VL} vector modes without HF. Supposed TARGET_AVX512F baseline.
 (define_mode_iterator V48_AVX512VL
-  [(V16SI "TARGET_EVEX512") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
-   (V8DI "TARGET_EVEX512") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
-   (V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
-   (V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
+  [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
+   V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
+   V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
+   V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
 
 (define_mode_iterator V48_256_512_AVX512VL
-  [(V16SI "TARGET_EVEX512") (V8SI "TARGET_AVX512VL")
-   (V8DI "TARGET_EVEX512") (V4DI "TARGET_AVX512VL")
-   (V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL")
-   (V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL")])
+  [V16SI (V8SI "TARGET_AVX512VL")
+   V8DI (V4DI "TARGET_AVX512VL")
+   V16SF (V8SF "TARGET_AVX512VL")
+   V8DF (V4DF "TARGET_AVX512VL")])
 
 ;; All AVX-512{F,VL} vector modes. Supposed TARGET_AVX512F baseline.
 (define_mode_iterator V48H_AVX512VL
-  [(V16SI "TARGET_EVEX512") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
-   (V8DI "TARGET_EVEX512") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
-   (V32HF "TARGET_AVX512FP16 && TARGET_EVEX512")
+  [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
+   V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
+   (V32HF "TARGET_AVX512FP16")
    (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
    (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
-   (V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
-   (V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
+   V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
+   V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
 
 ;; 1,2 byte AVX-512{BW,VL} vector modes. Supposed TARGET_AVX512BW baseline.
 (define_mode_iterator VI12_AVX512VL
-  [(V64QI "TARGET_EVEX512") (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
-   (V32HI "TARGET_EVEX512") (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
+  [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
+   V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
 
 (define_mode_iterator VI12HFBF_AVX512VL
-  [(V64QI "TARGET_EVEX512") (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
-   (V32HI "TARGET_EVEX512") (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")
-   (V32HF "TARGET_EVEX512") (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL")
-   (V32BF "TARGET_EVEX512") (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")])
+  [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
+   V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")
+   V32HF (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL")
+   V32BF (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")])
 
 (define_mode_iterator VI1_AVX512VL
-  [(V64QI "TARGET_EVEX512") (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")])
+  [V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")])
 
 ;; All vector modes
 (define_mode_iterator V
-  [(V64QI "TARGET_AVX512F && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI
-   (V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI
-   (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX") V4SI
-   (V8DI "TARGET_AVX512F && TARGET_EVEX512")  (V4DI "TARGET_AVX") V2DI
-   (V32HF "TARGET_AVX512F && TARGET_EVEX512") (V16HF "TARGET_AVX") V8HF
-   (V32BF "TARGET_AVX512F && TARGET_EVEX512") (V16BF "TARGET_AVX") V8BF
-   (V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF
-   (V8DF "TARGET_AVX512F && TARGET_EVEX512")  (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
+  [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
+   (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
+   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
+   (V8DI "TARGET_AVX512F")  (V4DI "TARGET_AVX") V2DI
+   (V32HF "TARGET_AVX512F") (V16HF "TARGET_AVX") V8HF
+   (V32BF "TARGET_AVX512F") (V16BF "TARGET_AVX") V8BF
+   (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+   (V8DF "TARGET_AVX512F")  (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
 
 ;; All 128bit vector modes
 (define_mode_iterator V_128
@@ -350,55 +352,58 @@
 
 ;; All 512bit vector modes
 (define_mode_iterator V_512
-  [(V64QI "TARGET_EVEX512") (V32HI "TARGET_EVEX512")
-   (V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512")
-   (V16SF "TARGET_EVEX512") (V8DF "TARGET_EVEX512")
-   (V32HF "TARGET_EVEX512") (V32BF "TARGET_EVEX512")])
+  [V64QI V32HI V16SI V8DI
+   V16SF V8DF V32HF V32BF])
 
 ;; All 256bit and 512bit vector modes
 (define_mode_iterator V_256_512
   [V32QI V16HI V16HF V16BF V8SI V4DI V8SF V4DF
-   (V64QI "TARGET_AVX512F && TARGET_EVEX512")
-   (V32HI "TARGET_AVX512F && TARGET_EVEX512")
-   (V32HF "TARGET_AVX512F && TARGET_EVEX512")
-   (V32BF "TARGET_AVX512F && TARGET_EVEX512")
-   (V16SI "TARGET_AVX512F && TARGET_EVEX512")
-   (V8DI "TARGET_AVX512F && TARGET_EVEX512")
-   (V16SF "TARGET_AVX512F && TARGET_EVEX512")
-   (V8DF "TARGET_AVX512F && TARGET_EVEX512")])
+   (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F")
+   (V32HF "TARGET_AVX512F") (V32BF "TARGET_AVX512F")
+   (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
+   (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
 
 ;; All vector float modes
 (define_mode_iterator VF
-  [(V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF
-   (V8DF "TARGET_AVX512F && TARGET_EVEX512") (V4DF "TARGET_AVX")
+  [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+   (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")
    (V2DF "TARGET_SSE2")])
 
 (define_mode_iterator VF1_VF2_AVX512DQ
-  [(V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF
-   (V8DF "TARGET_AVX512DQ && TARGET_EVEX512")
+  [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+   (V8DF "TARGET_AVX512DQ")
    (V4DF "TARGET_AVX512DQ && TARGET_AVX512VL")
    (V2DF "TARGET_AVX512DQ && TARGET_AVX512VL")])
 
-(define_mode_iterator VF1_VF2_AVX10_2
-  [(V16SF "TARGET_AVX10_2_512") V8SF V4SF
-   (V8DF "TARGET_AVX10_2_512") V4DF V2DF])
-
 (define_mode_iterator VFH
-  [(V32HF "TARGET_AVX512FP16 && TARGET_EVEX512")
+  [(V32HF "TARGET_AVX512FP16")
    (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
    (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
-   (V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF
-   (V8DF "TARGET_AVX512F && TARGET_EVEX512") (V4DF "TARGET_AVX")
+   (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+   (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")
    (V2DF "TARGET_SSE2")])
 
+(define_mode_iterator VF_BHSD
+  [(V32HF "TARGET_AVX512FP16")
+   (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
+   (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
+   (V16SF "TARGET_AVX512F")
+   (V8SF "TARGET_AVX") V4SF
+   (V8DF "TARGET_AVX512F")
+   (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")
+   (V32BF "TARGET_AVX10_2")
+   (V16BF "TARGET_AVX10_2")
+   (V8BF "TARGET_AVX10_2")
+  ])
+
 ;; 128-, 256- and 512-bit float vector modes for bitwise operations
 (define_mode_iterator VFB
-  [(V32BF "TARGET_AVX512F && TARGET_EVEX512")
+  [(V32BF "TARGET_AVX512F")
    (V16BF "TARGET_AVX") (V8BF "TARGET_SSE2")
-   (V32HF "TARGET_AVX512F && TARGET_EVEX512")
+   (V32HF "TARGET_AVX512F")
    (V16HF "TARGET_AVX") (V8HF "TARGET_SSE2")
-   (V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF
-   (V8DF "TARGET_AVX512F && TARGET_EVEX512")
+   (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+   (V8DF "TARGET_AVX512F")
    (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
 
 ;; 128- and 256-bit float vector modes
@@ -415,41 +420,39 @@
 
 ;; All SFmode vector float modes
 (define_mode_iterator VF1
-  [(V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF])
+  [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF])
 
 (define_mode_iterator VF1_AVX2
-  [(V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX2") V4SF])
+  [(V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX2") V4SF])
 
 ;; 128- and 256-bit SF vector modes
 (define_mode_iterator VF1_128_256
   [(V8SF "TARGET_AVX") V4SF])
 
 (define_mode_iterator VF1_128_256VL
-  [(V8SF "TARGET_EVEX512") (V4SF "TARGET_AVX512VL")])
+  [V8SF (V4SF "TARGET_AVX512VL")])
 
 ;; All DFmode vector float modes
 (define_mode_iterator VF2
-  [(V8DF "TARGET_AVX512F && TARGET_EVEX512") (V4DF "TARGET_AVX") V2DF])
-
-(define_mode_iterator VF2_AVX10_2
-  [(V8DF "TARGET_AVX10_2_512") V4DF V2DF])
+  [(V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
 
-;; All DFmode & HFmode vector float modes
-(define_mode_iterator VF2H
-  [(V32HF "TARGET_AVX512FP16 && TARGET_EVEX512")
+;; All DFmode & HFmode & BFmode vector float modes
+(define_mode_iterator VF2HB
+  [(V32BF "TARGET_AVX10_2") (V16BF "TARGET_AVX10_2")
+   (V8BF "TARGET_AVX10_2") (V32HF "TARGET_AVX512FP16")
    (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
    (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
-   (V8DF "TARGET_AVX512F && TARGET_EVEX512") (V4DF "TARGET_AVX") V2DF])
+   (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF])
 
 ;; 128- and 256-bit DF vector modes
 (define_mode_iterator VF2_128_256
   [(V4DF "TARGET_AVX") V2DF])
 
 (define_mode_iterator VF2_512_256
-  [(V8DF "TARGET_AVX512F && TARGET_EVEX512") V4DF])
+  [(V8DF "TARGET_AVX512F") V4DF])
 
 (define_mode_iterator VF2_512_256VL
-  [(V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL")])
+  [V8DF (V4DF "TARGET_AVX512VL")])
 
 ;; All 128bit vector SF/DF modes
 (define_mode_iterator VF_128
@@ -466,107 +469,102 @@
 
 ;; All 512bit vector float modes
 (define_mode_iterator VF_512
-  [(V16SF "TARGET_EVEX512") (V8DF "TARGET_EVEX512")])
+  [V16SF V8DF])
 
 ;; All 512bit vector float modes for bitwise operations
 (define_mode_iterator VFB_512
-  [(V32BF "TARGET_EVEX512")
-   (V32HF "TARGET_EVEX512")
-   (V16SF "TARGET_EVEX512")
-   (V8DF "TARGET_EVEX512")])
+  [V32BF V32HF V16SF V8DF])
 
 (define_mode_iterator V24F_128
   [V4SF V8HF V8BF])
 
 (define_mode_iterator VI48_AVX512VL
-  [(V16SI "TARGET_EVEX512") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
-   (V8DI "TARGET_EVEX512") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
+  [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
+   V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
 
 (define_mode_iterator VI1248_AVX512VLBW
-  [(V64QI "TARGET_AVX512BW && TARGET_EVEX512")
+  [(V64QI "TARGET_AVX512BW")
    (V32QI "TARGET_AVX512VL && TARGET_AVX512BW")
    (V16QI "TARGET_AVX512VL && TARGET_AVX512BW")
-   (V32HI "TARGET_AVX512BW && TARGET_EVEX512")
+   (V32HI "TARGET_AVX512BW")
    (V16HI "TARGET_AVX512VL && TARGET_AVX512BW")
    (V8HI "TARGET_AVX512VL && TARGET_AVX512BW")
-   (V16SI "TARGET_EVEX512") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
-   (V8DI "TARGET_EVEX512") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
+   V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
+   V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
 
 (define_mode_iterator VF_AVX512VL
-  [(V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
-   (V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
+  [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
+   V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
 
 (define_mode_iterator VFH_AVX512VL
-  [(V32HF "TARGET_AVX512FP16 && TARGET_EVEX512")
+  [(V32HF "TARGET_AVX512FP16")
    (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
    (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
-   (V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
-   (V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
+   V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
+   V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
+
+(define_mode_iterator V48_AVX512VL_4
+  [(V4SF "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
+   (V4SI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL")])
 
-(define_mode_iterator VFH_AVX10_2
-  [(V32HF "TARGET_AVX10_2_512") V16HF V8HF
-   (V16SF "TARGET_AVX10_2_512") V8SF V4SF
-   (V8DF "TARGET_AVX10_2_512") V4DF V2DF])
+(define_mode_iterator VI48_AVX512VL_4
+  [(V4SI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL")])
+
+(define_mode_iterator V8_AVX512VL_2
+  [(V2DF "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
 
 (define_mode_iterator VF2_AVX512VL
-  [(V8DF "TARGET_EVEX512") (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
+  [V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
 
 (define_mode_iterator VF1_AVX512VL
-  [(V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")])
+  [V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")])
 
-(define_mode_iterator VF1_AVX10_2
-  [(V16SF "TARGET_AVX10_2_512") V8SF V4SF])
+(define_mode_iterator VF1_AVX512BW
+  [(V16SF "TARGET_AVX512BW") (V8SF "TARGET_AVX2") V4SF])
 
 (define_mode_iterator VHFBF
-  [(V32HF "TARGET_EVEX512") V16HF V8HF
-   (V32BF "TARGET_EVEX512") V16BF V8BF])
+  [V32HF V16HF V8HF V32BF V16BF V8BF])
 (define_mode_iterator VHFBF_256 [V16HF V16BF])
 (define_mode_iterator VHFBF_128 [V8HF V8BF])
 
 (define_mode_iterator VHF_AVX512VL
-  [(V32HF "TARGET_EVEX512") (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL")])
+  [V32HF (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL")])
 
 (define_mode_iterator VHFBF_AVX512VL
-  [(V32HF "TARGET_EVEX512") (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL")
-   (V32BF "TARGET_EVEX512") (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")])
-
-(define_mode_iterator VHF_AVX10_2
-  [(V32HF "TARGET_AVX10_2_512") V16HF V8HF])
+  [V32HF (V16HF "TARGET_AVX512VL") (V8HF "TARGET_AVX512VL")
+   V32BF (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")])
 
-(define_mode_iterator VBF_AVX10_2
-  [(V32BF "TARGET_AVX10_2_512") V16BF V8BF])
+(define_mode_iterator VBF
+  [V32BF V16BF V8BF])
 
 ;; All vector integer modes
 (define_mode_iterator VI
-  [(V16SI "TARGET_AVX512F && TARGET_EVEX512")
-   (V8DI "TARGET_AVX512F && TARGET_EVEX512")
-   (V64QI "TARGET_AVX512BW && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI
-   (V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI
+  [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
+   (V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
+   (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
    (V8SI "TARGET_AVX") V4SI
    (V4DI "TARGET_AVX") V2DI])
 
 ;; All vector integer and HF modes
 (define_mode_iterator VIHFBF
-  [(V16SI "TARGET_AVX512F && TARGET_EVEX512")
-   (V8DI "TARGET_AVX512F && TARGET_EVEX512")
-   (V64QI "TARGET_AVX512BW && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI
-   (V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI
-   (V8SI "TARGET_AVX") V4SI
-   (V4DI "TARGET_AVX") V2DI
-   (V32HF "TARGET_AVX512BW && TARGET_EVEX512") (V16HF "TARGET_AVX") V8HF
-   (V32BF "TARGET_AVX512BW && TARGET_EVEX512") (V16BF "TARGET_AVX") V8BF])
+  [(V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
+   (V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
+   (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
+   (V8SI "TARGET_AVX") V4SI (V4DI "TARGET_AVX") V2DI
+   (V32HF "TARGET_AVX512BW") (V16HF "TARGET_AVX") V8HF
+   (V32BF "TARGET_AVX512BW") (V16BF "TARGET_AVX") V8BF])
 
 (define_mode_iterator VI_AVX2
-  [(V64QI "TARGET_AVX512BW && TARGET_EVEX512") (V32QI "TARGET_AVX2") V16QI
-   (V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16HI "TARGET_AVX2") V8HI
-   (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX2") V4SI
-   (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX2") V2DI])
+  [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
+   (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
+   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
+   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
 
 (define_mode_iterator VI_AVX_AVX512F
-  [(V64QI "TARGET_AVX512F && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI
-   (V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI
-   (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX") V4SI
-   (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX") V2DI])
+  [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
+   (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
+   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
+   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
 
 ;; All QImode vector integer modes
 (define_mode_iterator VI1
@@ -584,47 +582,48 @@
   (V8SI "TARGET_AVX") (V4DI "TARGET_AVX")])
 
 (define_mode_iterator VI8
-  [(V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX") V2DI])
-
-(define_mode_iterator VI8_AVX10_2
-  [(V8DI "TARGET_AVX10_2_512") V4DI V2DI])
+  [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
 
 (define_mode_iterator VI8_FVL
-  [(V8DI "TARGET_AVX512F && TARGET_EVEX512") V4DI (V2DI "TARGET_AVX512VL")])
+  [(V8DI "TARGET_AVX512F") V4DI (V2DI "TARGET_AVX512VL")])
 
 (define_mode_iterator VI8_AVX512VL
-  [(V8DI "TARGET_EVEX512") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
+  [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
 
 (define_mode_iterator VI8_256_512
-  [(V8DI "TARGET_EVEX512") (V4DI "TARGET_AVX512VL")])
+  [V8DI (V4DI "TARGET_AVX512VL")])
 
 (define_mode_iterator VI1_AVX2
   [(V32QI "TARGET_AVX2") V16QI])
 
 (define_mode_iterator VI1_AVX512
-  [(V64QI "TARGET_AVX512BW && TARGET_EVEX512") (V32QI "TARGET_AVX2") V16QI])
+  [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI])
 
 (define_mode_iterator VI1_AVX512F
-  [(V64QI "TARGET_AVX512F && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI])
+  [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI])
 
 (define_mode_iterator VI1_AVX512VNNI
-  [(V64QI "TARGET_AVX512VNNI && TARGET_EVEX512") (V32QI "TARGET_AVX2") V16QI])
+  [(V64QI "TARGET_AVX512VNNI") (V32QI "TARGET_AVX2") V16QI])
+
+(define_mode_iterator VI1_AVX512VNNIBW
+  [(V64QI "TARGET_AVX512BW || TARGET_AVX512VNNI")
+   (V32QI "TARGET_AVX2") V16QI])
 
 (define_mode_iterator VI12_256_512_AVX512VL
-  [(V64QI "TARGET_EVEX512") (V32QI "TARGET_AVX512VL")
-   (V32HI "TARGET_EVEX512") (V16HI "TARGET_AVX512VL")])
+  [V64QI (V32QI "TARGET_AVX512VL")
+   V32HI (V16HI "TARGET_AVX512VL")])
 
 (define_mode_iterator VI2_AVX2
   [(V16HI "TARGET_AVX2") V8HI])
 
 (define_mode_iterator VI2_AVX2_AVX512BW
-  [(V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16HI "TARGET_AVX2") V8HI])
+  [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
 
 (define_mode_iterator VI2_AVX512F
-  [(V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX2") V8HI])
+  [(V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI])
 
 (define_mode_iterator VI2_AVX512VNNIBW
-  [(V32HI "(TARGET_AVX512BW || TARGET_AVX512VNNI) && TARGET_EVEX512")
+  [(V32HI "TARGET_AVX512BW || TARGET_AVX512VNNI")
    (V16HI "TARGET_AVX2") V8HI])
 
 (define_mode_iterator VI4_AVX
@@ -634,65 +633,64 @@
   [(V8SI "TARGET_AVX2") V4SI])
 
 (define_mode_iterator VI4_AVX512F
-  [(V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX2") V4SI])
+  [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
 
 (define_mode_iterator VI4_AVX512VL
-  [(V16SI "TARGET_EVEX512") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
+  [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
 
 (define_mode_iterator VI4_AVX10_2
-  [(V16SI "TARGET_AVX10_2_512") V8SI V4SI])
+  [(V16SI "TARGET_AVX10_2") V8SI V4SI])
 
 (define_mode_iterator VI48_AVX512F_AVX512VL
-  [V4SI V8SI (V16SI "TARGET_AVX512F && TARGET_EVEX512")
+  [V4SI V8SI (V16SI "TARGET_AVX512F")
    (V2DI "TARGET_AVX512VL") (V4DI "TARGET_AVX512VL")
-   (V8DI "TARGET_AVX512F && TARGET_EVEX512")])
+   (V8DI "TARGET_AVX512F")])
 
 (define_mode_iterator VI2_AVX512VL
-  [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") (V32HI "TARGET_EVEX512")])
+  [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI])
 
 (define_mode_iterator VI2HFBF_AVX512VL
-  [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") (V32HI "TARGET_EVEX512")
-   (V8HF "TARGET_AVX512VL") (V16HF "TARGET_AVX512VL") (V32HF "TARGET_EVEX512")
-   (V8BF "TARGET_AVX512VL") (V16BF "TARGET_AVX512VL") (V32BF "TARGET_EVEX512")])
+  [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI
+   (V8HF "TARGET_AVX512VL") (V16HF "TARGET_AVX512VL") V32HF
+   (V8BF "TARGET_AVX512VL") (V16BF "TARGET_AVX512VL") V32BF])
 
 (define_mode_iterator VI2H_AVX512VL
-  [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") (V32HI "TARGET_EVEX512")
-   (V8SI "TARGET_AVX512VL") (V16SI "TARGET_EVEX512")
-   (V8DI "TARGET_EVEX512")])
+  [(V8HI "TARGET_AVX512VL") (V16HI "TARGET_AVX512VL") V32HI
+   (V8SI "TARGET_AVX512VL") V16SI V8DI])
 
 (define_mode_iterator VI1_AVX512VL_F
-  [V32QI (V16QI "TARGET_AVX512VL") (V64QI "TARGET_AVX512F && TARGET_EVEX512")])
+  [V32QI (V16QI "TARGET_AVX512VL") (V64QI "TARGET_AVX512F")])
 
 (define_mode_iterator VI8_AVX2_AVX512BW
-  [(V8DI "TARGET_AVX512BW && TARGET_EVEX512") (V4DI "TARGET_AVX2") V2DI])
+  [(V8DI "TARGET_AVX512BW") (V4DI "TARGET_AVX2") V2DI])
 
 (define_mode_iterator VI8_AVX2
   [(V4DI "TARGET_AVX2") V2DI])
 
 (define_mode_iterator VI8_AVX2_AVX512F
-  [(V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX2") V2DI])
+  [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
 
 (define_mode_iterator VI8_AVX_AVX512F
-  [(V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX")])
+  [(V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")])
 
 (define_mode_iterator VI4_128_8_256
   [V4SI V4DI])
 
 ;; All V8D* modes
 (define_mode_iterator V8FI
-  [(V8DF "TARGET_EVEX512") (V8DI "TARGET_EVEX512")])
+  [V8DF V8DI])
 
 ;; All V16S* modes
 (define_mode_iterator V16FI
-  [(V16SF "TARGET_EVEX512") (V16SI "TARGET_EVEX512")])
+  [V16SF V16SI])
 
 ;; ??? We should probably use TImode instead.
 (define_mode_iterator VIMAX_AVX2_AVX512BW
-  [(V4TI "TARGET_AVX512BW && TARGET_EVEX512") (V2TI "TARGET_AVX2") V1TI])
+  [(V4TI "TARGET_AVX512BW") (V2TI "TARGET_AVX2") V1TI])
 
 ;; Suppose TARGET_AVX512BW as baseline
 (define_mode_iterator VIMAX_AVX512VL
-  [(V4TI "TARGET_EVEX512") (V2TI "TARGET_AVX512VL") (V1TI "TARGET_AVX512VL")])
+  [V4TI (V2TI "TARGET_AVX512VL") (V1TI "TARGET_AVX512VL")])
 
 (define_mode_iterator VIMAX_AVX2
   [(V2TI "TARGET_AVX2") V1TI])
@@ -702,17 +700,17 @@
    (V16HI "TARGET_AVX2") V8HI])
 
 (define_mode_iterator VI12_AVX2_AVX512BW
-  [(V64QI "TARGET_AVX512BW && TARGET_EVEX512") (V32QI "TARGET_AVX2") V16QI
-   (V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16HI "TARGET_AVX2") V8HI])
+  [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
+   (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI])
 
 (define_mode_iterator VI24_AVX2
   [(V16HI "TARGET_AVX2") V8HI
    (V8SI "TARGET_AVX2") V4SI])
 
 (define_mode_iterator VI124_AVX2_24_AVX512F_1_AVX512BW
-  [(V64QI "TARGET_AVX512BW && TARGET_EVEX512") (V32QI "TARGET_AVX2") V16QI
-   (V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX2") V8HI
-   (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX2") V4SI])
+  [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
+   (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX2") V8HI
+   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI])
 
 (define_mode_iterator VI124_AVX2
   [(V32QI "TARGET_AVX2") V16QI
@@ -720,17 +718,17 @@
    (V8SI "TARGET_AVX2") V4SI])
 
 (define_mode_iterator VI248_AVX512VL
-  [(V32HI "TARGET_EVEX512") (V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512")
+  [V32HI V16SI V8DI
    (V16HI "TARGET_AVX512VL") (V8SI "TARGET_AVX512VL")
    (V4DI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")
    (V4SI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
 
 (define_mode_iterator VI248_AVX512VLBW
-  [(V32HI "TARGET_AVX512BW && TARGET_EVEX512")
+  [(V32HI "TARGET_AVX512BW")
    (V16HI "TARGET_AVX512VL && TARGET_AVX512BW")
    (V8HI "TARGET_AVX512VL && TARGET_AVX512BW")
-   (V16SI "TARGET_EVEX512") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
-   (V8DI "TARGET_EVEX512") (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
+   V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
+   V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")])
 
 (define_mode_iterator VI48_AVX2
   [(V8SI "TARGET_AVX2") V4SI
@@ -742,17 +740,16 @@
    (V4DI "TARGET_AVX2") V2DI])
 
 (define_mode_iterator VI248_AVX2_8_AVX512F_24_AVX512BW
-  [(V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16HI "TARGET_AVX2") V8HI
-   (V16SI "TARGET_AVX512BW && TARGET_EVEX512") (V8SI "TARGET_AVX2") V4SI
-   (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX2") V2DI])
+  [(V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
+   (V16SI "TARGET_AVX512BW") (V8SI "TARGET_AVX2") V4SI
+   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX2") V2DI])
 
 (define_mode_iterator VI248_AVX512BW
-  [(V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16SI "TARGET_EVEX512")
-   (V8DI "TARGET_EVEX512")])
+  [(V32HI "TARGET_AVX512BW") V16SI V8DI])
 
 (define_mode_iterator VI248_AVX512BW_AVX512VL
-  [(V32HI "TARGET_AVX512BW && TARGET_EVEX512") 
-   (V4DI "TARGET_AVX512VL") (V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512")])
+  [(V32HI "TARGET_AVX512BW")
+   (V4DI "TARGET_AVX512VL") V16SI V8DI])
 
 ;; Suppose TARGET_AVX512VL as baseline
 (define_mode_iterator VI248_AVX512BW_1
@@ -766,16 +763,16 @@
   V4DI V2DI])
    
 (define_mode_iterator VI48_AVX512F
-  [(V16SI "TARGET_AVX512F && TARGET_EVEX512") V8SI V4SI
-   (V8DI "TARGET_AVX512F && TARGET_EVEX512") V4DI V2DI])
+  [(V16SI "TARGET_AVX512F") V8SI V4SI
+   (V8DI "TARGET_AVX512F") V4DI V2DI])
 
 (define_mode_iterator VI48_AVX_AVX512F
-  [(V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX") V4SI
-   (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX") V2DI])
+  [(V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
+   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI])
 
 (define_mode_iterator VI12_AVX_AVX512F
-  [(V64QI "TARGET_AVX512F && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI
-   (V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI])
+  [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
+   (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI])
 
 (define_mode_iterator V48_128_256
   [V4SF V2DF
@@ -916,10 +913,10 @@
 (define_mode_iterator VI248_128 [V8HI V4SI V2DI])
 (define_mode_iterator VI248_256 [V16HI V8SI V4DI])
 (define_mode_iterator VI248_512
-  [(V32HI "TARGET_EVEX512") (V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512")])
+  [V32HI V16SI V8DI])
 (define_mode_iterator VI48_128 [V4SI V2DI])
 (define_mode_iterator VI148_512
-  [(V64QI "TARGET_EVEX512") (V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512")])
+  [V64QI V16SI V8DI])
 (define_mode_iterator VI148_256 [V32QI V8SI V4DI])
 (define_mode_iterator VI148_128 [V16QI V4SI V2DI])
 
@@ -927,75 +924,62 @@
 (define_mode_iterator VI124_256 [V32QI V16HI V8SI])
 (define_mode_iterator VI124_256_AVX512F_AVX512BW
   [V32QI V16HI V8SI
-   (V64QI "TARGET_AVX512BW && TARGET_EVEX512")
-   (V32HI "TARGET_AVX512BW && TARGET_EVEX512")
-   (V16SI "TARGET_AVX512F && TARGET_EVEX512")])
+   (V64QI "TARGET_AVX512BW") (V32HI "TARGET_AVX512BW")
+   (V16SI "TARGET_AVX512F")])
 (define_mode_iterator VI48_256 [V8SI V4DI])
 (define_mode_iterator VI48_512
-  [(V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512")])
+  [V16SI V8DI])
 (define_mode_iterator VI4_256_8_512 [V8SI V8DI])
 (define_mode_iterator VI_AVX512BW
-  [(V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512")
-   (V32HI "TARGET_AVX512BW && TARGET_EVEX512")
-   (V64QI "TARGET_AVX512BW && TARGET_EVEX512")])
+  [V16SI V8DI
+   (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")])
 (define_mode_iterator VIHFBF_AVX512BW
-  [(V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512")
-   (V32HI "TARGET_AVX512BW && TARGET_EVEX512")
-   (V64QI "TARGET_AVX512BW && TARGET_EVEX512")
-   (V32HF "TARGET_AVX512BW && TARGET_EVEX512")
-   (V32BF "TARGET_AVX512BW && TARGET_EVEX512")])
+  [V16SI V8DI
+   (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512BW")
+   (V32HF "TARGET_AVX512BW") (V32BF "TARGET_AVX512BW")])
 
 ;; Int-float size matches
 (define_mode_iterator VI2F_256_512
-  [V16HI (V32HI "TARGET_EVEX512")
-   V16HF (V32HF "TARGET_EVEX512")
-   V16BF (V32BF "TARGET_EVEX512")])
+  [V16HI V32HI V16HF V32HF V16BF V32BF])
 (define_mode_iterator VI4F_128 [V4SI V4SF])
 (define_mode_iterator VI8F_128 [V2DI V2DF])
 (define_mode_iterator VI4F_256 [V8SI V8SF])
 (define_mode_iterator VI8F_256 [V4DI V4DF])
 (define_mode_iterator VI4F_256_512
-  [V8SI V8SF
-   (V16SI "TARGET_AVX512F && TARGET_EVEX512")
-   (V16SF "TARGET_AVX512F && TARGET_EVEX512")])
+  [V8SI V8SF (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")])
 (define_mode_iterator VI48F_256_512
   [V8SI V8SF
-  (V16SI "TARGET_AVX512F && TARGET_EVEX512")
-  (V16SF "TARGET_AVX512F && TARGET_EVEX512")
-  (V8DI "TARGET_AVX512F && TARGET_EVEX512")
-  (V8DF "TARGET_AVX512F && TARGET_EVEX512")
-  (V4DI "TARGET_AVX512VL") (V4DF  "TARGET_AVX512VL")])
+   (V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
+   (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
+   (V4DI "TARGET_AVX512VL") (V4DF  "TARGET_AVX512VL")])
 (define_mode_iterator VF48H_AVX512VL
-  [(V8DF "TARGET_EVEX512") (V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL")])
+  [V8DF V16SF (V8SF "TARGET_AVX512VL")])
 
 (define_mode_iterator VF48_128
   [V2DF V4SF])
 
 (define_mode_iterator VI48F
-  [(V16SI "TARGET_EVEX512") (V16SF "TARGET_EVEX512")
-   (V8DI "TARGET_EVEX512") (V8DF "TARGET_EVEX512")
+  [V16SI V16SF V8DI V8DF
    (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
    (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
    (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
    (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
 (define_mode_iterator VI12_VI48F_AVX512VL
-  [(V16SI "TARGET_AVX512F && TARGET_EVEX512")
-   (V16SF "TARGET_AVX512F && TARGET_EVEX512")
-   (V8DI "TARGET_AVX512F && TARGET_EVEX512")
-   (V8DF "TARGET_AVX512F && TARGET_EVEX512")
+  [(V16SI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
+   (V8DI "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
    (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
    (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
    (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
    (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")
-   (V64QI "TARGET_EVEX512") (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
-   (V32HI "TARGET_EVEX512") (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
+   V64QI (V16QI "TARGET_AVX512VL") (V32QI "TARGET_AVX512VL")
+   V32HI (V16HI "TARGET_AVX512VL") (V8HI "TARGET_AVX512VL")])
 
 (define_mode_iterator VI48F_256 [V8SI V8SF V4DI V4DF])
 
 (define_mode_iterator V8_128 [V8HI V8HF V8BF])
 (define_mode_iterator V16_256 [V16HI V16HF V16BF])
 (define_mode_iterator V32_512
- [(V32HI "TARGET_EVEX512") (V32HF "TARGET_EVEX512") (V32BF "TARGET_EVEX512")])
+ [V32HI V32HF V32BF])
 
 ;; Mapping from float mode to required SSE level
 (define_mode_attr sse
@@ -1280,6 +1264,12 @@
    (V8HF "w") (V8BF "w") (V4SF "k") (V2DF "q")
    (HF "w") (BF "w") (SF "k") (DF "q")])
 
+;; Pointer size override for 16-bit upper-convert modes (Intel asm dialect)
+(define_mode_attr iptrh
+ [(V32HI "") (V16SI "") (V8DI "")
+  (V16HI "") (V8SI "") (V4DI "q")
+  (V8HI "") (V4SI "q") (V2DI "k")])
+
 ;; Mapping of vector modes to VPTERNLOG suffix
 (define_mode_attr ternlogsuffix
   [(V8DI "q") (V4DI "q") (V2DI "q")
@@ -1401,7 +1391,7 @@
 ;; Mix-n-match
 (define_mode_iterator AVX256MODE2P [V8SI V8SF V4DF])
 (define_mode_iterator AVX512MODE2P
-  [(V16SI "TARGET_EVEX512") (V16SF "TARGET_EVEX512") (V8DF "TARGET_EVEX512")])
+  [V16SI V16SF V8DF])
 
 ;; Mapping for dbpsabbw modes
 (define_mode_attr dbpsadbwmode
@@ -1531,7 +1521,11 @@
    "TARGET_AVX512VL || <MODE_SIZE> == 64"
    "vpternlogd\t{$0xFF, %0, %0, %0|%0, %0, %0, 0xFF}"
   [(set_attr "type" "sselog1")
-   (set_attr "prefix" "evex")])
+   (set_attr "prefix" "evex")
+   (set (attr "mode")
+        (if_then_else (match_test "TARGET_AVX512VL")
+		      (const_string "<sseinsnmode>")
+		      (const_string "XI")))])
 
 ;; If mem_addr points to a memory region with less than whole vector size bytes
 ;; of accessible memory and k is a mask that would prevent reading the inaccessible
@@ -1595,6 +1589,44 @@
   "&& 1"
   [(set (match_dup 0) (match_dup 1))])
 
+(define_insn_and_split "*<avx512>_load<mode>mask_and15"
+  [(set (match_operand:V48_AVX512VL_4 0 "register_operand" "=v")
+	(vec_merge:V48_AVX512VL_4
+	 (unspec:V48_AVX512VL_4
+	  [(match_operand:V48_AVX512VL_4 1 "memory_operand" "m")]
+	  UNSPEC_MASKLOAD)
+	 (match_operand:V48_AVX512VL_4 2 "nonimm_or_0_operand" "0C")
+	 (and:QI
+	  (match_operand:QI 3 "register_operand" "Yk")
+	  (const_int 15))))]
+  "TARGET_AVX512F"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(vec_merge:V48_AVX512VL_4
+	 (unspec:V48_AVX512VL_4 [(match_dup 1)] UNSPEC_MASKLOAD)
+	 (match_dup 2)
+	 (match_dup 3)))])
+
+(define_insn_and_split "*<avx512>_load<mode>mask_and3"
+  [(set (match_operand:V8_AVX512VL_2 0 "register_operand" "=v")
+	(vec_merge:V8_AVX512VL_2
+	 (unspec:V8_AVX512VL_2
+	  [(match_operand:V8_AVX512VL_2 1 "memory_operand" "m")]
+	  UNSPEC_MASKLOAD)
+	 (match_operand:V8_AVX512VL_2 2 "nonimm_or_0_operand" "0C")
+	 (and:QI
+	  (match_operand:QI 3 "register_operand" "Yk")
+	  (const_int 3))))]
+  "TARGET_AVX512F"
+  "#"
+  "&& 1"
+  [(set (match_dup 0)
+	(vec_merge:V8_AVX512VL_2
+	 (unspec:V8_AVX512VL_2 [(match_dup 1)] UNSPEC_MASKLOAD)
+	 (match_dup 2)
+	 (match_dup 3)))])
+
 (define_expand "<avx512>_load<mode>_mask"
   [(set (match_operand:VI12_AVX512VL 0 "register_operand")
 	(vec_merge:VI12_AVX512VL
@@ -2005,11 +2037,9 @@
 (define_mode_iterator STORENT_MODE
   [(DI "TARGET_SSE2 && TARGET_64BIT") (SI "TARGET_SSE2")
    (SF "TARGET_SSE4A") (DF "TARGET_SSE4A")
-   (V8DI "TARGET_AVX512F && TARGET_EVEX512")
-   (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
-   (V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF
-   (V8DF "TARGET_AVX512F && TARGET_EVEX512")
-   (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
+   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") (V2DI "TARGET_SSE2")
+   (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+   (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")])
 
 (define_expand "storent<mode>"
   [(set (match_operand:STORENT_MODE 0 "memory_operand")
@@ -2400,6 +2430,91 @@
   DONE;
 })
 
+;; Optimize cmp + movcc with mask register by kortest + movcc.
+(define_insn_and_split "*kortest_cmp<SWI1248_AVX512BWDQ_64:mode>_movqicc"
+   [(set (match_operand:QI 0 "register_operand" "=r,r,r,r,r,r")
+      (if_then_else:QI
+	(match_operator 1 "bt_comparison_operator"
+	  [(match_operand:SWI1248_AVX512BWDQ_64 4 "register_operand"
+	  "?k,<SWI1248_AVX512BWDQ_64:r>,?k, <SWI1248_AVX512BWDQ_64:r>,?k,r")
+	   (const_int -1)])
+	(match_operand:QI 2 "register_operand"	"r,r,0,0,r,r")
+	(match_operand:QI 3 "register_operand" " 0,0,r,r,r,r")))
+    (clobber (reg:CC FLAGS_REG))]
+  "TARGET_AVX512BW && TARGET_CMOVE && !TARGET_PARTIAL_REG_STALL"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(if_then_else:SI
+	  (match_dup 5)
+	  (match_dup 2)
+	  (match_dup 3)))]
+{
+  rtx flag_reg;
+  if (MASK_REGNO_P (REGNO (operands[4])))
+    {
+      emit_insn (gen_kortest<SWI1248_AVX512BWDQ_64:mode>_ccc (operands[4], operands[4]));
+      flag_reg = gen_rtx_REG (CCCmode, FLAGS_REG);
+    }
+  else
+    {
+      flag_reg = gen_rtx_REG (CCZmode, FLAGS_REG);
+      emit_insn (gen_rtx_SET (flag_reg,
+			      gen_rtx_COMPARE (CCZmode,
+					       operands[4],
+					       constm1_rtx)));
+    }
+  operands[5] = gen_rtx_fmt_ee (GET_CODE (operands[1]), VOIDmode,
+				flag_reg,const0_rtx);
+  operands[0] = gen_lowpart (SImode, operands[0]);
+  operands[2] = gen_lowpart (SImode, operands[2]);
+  operands[3] = gen_lowpart (SImode, operands[3]);
+}
+  [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd")
+   (set_attr "type" "icmov")
+   (set_attr "mode" "QI")])
+
+(define_insn_and_split "*kortest_cmp<SWI1248_AVX512BWDQ_64:mode>_mov<SWI248:mode>cc"
+   [(set (match_operand:SWI248 0 "register_operand" "=r,r,r,r,r,r,r,r")
+      (if_then_else:SWI248
+	(match_operator 1 "bt_comparison_operator"
+	  [(match_operand:SWI1248_AVX512BWDQ_64 4 "register_operand"
+	  "?k,<SWI1248_AVX512BWDQ_64:r>,?k, <SWI1248_AVX512BWDQ_64:r>,?k,r,?k, r")
+	   (const_int -1)])
+	(match_operand:SWI248 2 "nonimmediate_operand" "rm,rm, 0, 0,rm,rm, r, r")
+	(match_operand:SWI248 3 "nonimmediate_operand" " 0, 0,rm,rm, r, r,rm,rm")))
+    (clobber (reg:CC FLAGS_REG))]
+  "TARGET_AVX512BW && TARGET_CMOVE
+   && !(MEM_P (operands[2]) && MEM_P (operands[3]))"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0)
+	(if_then_else:SWI248
+	  (match_dup 5)
+	  (match_dup 2)
+	  (match_dup 3)))]
+{
+  rtx flag_reg;
+  if (MASK_REGNO_P (REGNO (operands[4])))
+    {
+      emit_insn (gen_kortest<SWI1248_AVX512BWDQ_64:mode>_ccc (operands[4], operands[4]));
+      flag_reg = gen_rtx_REG (CCCmode, FLAGS_REG);
+    }
+  else
+    {
+      flag_reg = gen_rtx_REG (CCZmode, FLAGS_REG);
+      emit_insn (gen_rtx_SET (flag_reg,
+			      gen_rtx_COMPARE (CCZmode,
+					       operands[4],
+					       constm1_rtx)));
+    }
+  operands[5] = gen_rtx_fmt_ee (GET_CODE (operands[1]), VOIDmode,
+				flag_reg,const0_rtx);
+}
+  [(set_attr "isa" "*,*,*,*,apx_ndd,apx_ndd,apx_ndd,apx_ndd")
+   (set_attr "type" "icmov")
+   (set_attr "mode" "<SWI248:MODE>")])
+
 (define_insn "kunpckhi"
   [(set (match_operand:HI 0 "register_operand" "=k")
 	(ior:HI
@@ -2520,11 +2635,11 @@
 })
 
 (define_expand "<insn><mode>3<mask_name><round_name>"
-  [(set (match_operand:VFH 0 "register_operand")
-	(plusminus:VFH
-	  (match_operand:VFH 1 "<round_nimm_predicate>")
-	  (match_operand:VFH 2 "<round_nimm_predicate>")))]
-  "TARGET_SSE && <mask_mode512bit_condition> && <round_mode_condition>"
+  [(set (match_operand:VF_BHSD 0 "register_operand")
+	(plusminus:VF_BHSD
+	  (match_operand:VF_BHSD 1 "<round_nimm_predicate>")
+	  (match_operand:VF_BHSD 2 "<round_nimm_predicate>")))]
+  "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
   "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
 
 (define_insn "*<insn><mode>3<mask_name><round_name>"
@@ -2533,7 +2648,7 @@
 	  (match_operand:VFH 1 "<bcst_round_nimm_predicate>" "<comm>0,v")
 	  (match_operand:VFH 2 "<bcst_round_nimm_predicate>" "xBm,<bcst_round_constraint>")))]
   "TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
-   && <mask_mode512bit_condition> && <round_mode_condition>"
+   && <mask_mode512bit_condition> && <round_mode512bit_condition>"
   "@
    <plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
    v<plusminus_mnemonic><ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
@@ -2609,11 +2724,11 @@
 })
 
 (define_expand "mul<mode>3<mask_name><round_name>"
-  [(set (match_operand:VFH 0 "register_operand")
-	(mult:VFH
-	  (match_operand:VFH 1 "<round_nimm_predicate>")
-	  (match_operand:VFH 2 "<round_nimm_predicate>")))]
-  "TARGET_SSE && <mask_mode512bit_condition> && <round_mode_condition>"
+  [(set (match_operand:VF_BHSD 0 "register_operand")
+	(mult:VF_BHSD
+	  (match_operand:VF_BHSD 1 "<round_nimm_predicate>")
+	  (match_operand:VF_BHSD 2 "<round_nimm_predicate>")))]
+  "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
   "ix86_fixup_binary_operands_no_copy (MULT, <MODE>mode, operands);")
 
 (define_insn "*mul<mode>3<mask_name><round_name>"
@@ -2622,7 +2737,7 @@
 	  (match_operand:VFH 1 "<bcst_round_nimm_predicate>" "%0,v")
 	  (match_operand:VFH 2 "<bcst_round_nimm_predicate>" "xBm,<bcst_round_constraint>")))]
   "TARGET_SSE && ix86_binary_operator_ok (MULT, <MODE>mode, operands)
-   && <mask_mode512bit_condition> && <round_mode_condition>"
+   && <mask_mode512bit_condition> && <round_mode512bit_condition>"
   "@
    mul<ssemodesuffix>\t{%2, %0|%0, %2}
    vmul<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
@@ -2727,6 +2842,26 @@
     }
 })
 
+(define_expand "div<mode>3"
+  [(set (match_operand:VBF 0 "register_operand")
+	(div:VBF
+	  (match_operand:VBF 1 "register_operand")
+	  (match_operand:VBF 2 "vector_operand")))]
+  "TARGET_AVX10_2"
+{
+  if (TARGET_RECIP_VEC_DIV
+      && optimize_insn_for_speed_p ()
+      && flag_finite_math_only
+      && flag_unsafe_math_optimizations)
+    {
+      rtx op = gen_reg_rtx (<MODE>mode);
+      operands[2] = force_reg (<MODE>mode, operands[2]);
+      emit_insn (gen_avx10_2_rcpbf16_<mode> (op, operands[2]));
+      emit_insn (gen_avx10_2_mulbf16_<mode> (operands[0], operands[1], op));
+      DONE;
+    }
+})
+
 (define_expand "cond_div<mode>"
   [(set (match_operand:VFH 0 "register_operand")
 	(vec_merge:VFH
@@ -2750,7 +2885,7 @@
 	(div:VFH
 	  (match_operand:VFH 1 "register_operand" "0,v")
 	  (match_operand:VFH 2 "<bcst_round_nimm_predicate>" "xBm,<bcst_round_constraint>")))]
-  "TARGET_SSE && <mask_mode512bit_condition> && <round_mode_condition>"
+  "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
   "@
    div<ssemodesuffix>\t{%2, %0|%0, %2}
    vdiv<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
@@ -2893,8 +3028,8 @@
    (set_attr "mode" "<MODE>")])
 
 (define_expand "sqrt<mode>2"
-  [(set (match_operand:VF2H 0 "register_operand")
-	(sqrt:VF2H (match_operand:VF2H 1 "vector_operand")))]
+  [(set (match_operand:VF2HB 0 "register_operand")
+	(sqrt:VF2HB (match_operand:VF2HB 1 "vector_operand")))]
   "TARGET_SSE2")
 
 (define_expand "sqrt<mode>2"
@@ -2916,7 +3051,7 @@
 (define_insn "<sse>_sqrt<mode>2<mask_name><round_name>"
   [(set (match_operand:VFH 0 "register_operand" "=x,v")
 	(sqrt:VFH (match_operand:VFH 1 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
-  "TARGET_SSE && <mask_mode512bit_condition> && <round_mode_condition>"
+  "TARGET_SSE && <mask_mode512bit_condition> && <round_mode512bit_condition>"
   "@
    sqrt<ssemodesuffix>\t{%1, %0|%0, %1}
    vsqrt<ssemodesuffix>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
@@ -3132,7 +3267,7 @@
 	  (match_operand:VFH 1 "<round_saeonly_nimm_predicate>")
 	  (match_operand:VFH 2 "<round_saeonly_nimm_predicate>")))]
   "TARGET_SSE && <mask_mode512bit_condition>
-   && <round_saeonly_mode_condition>"
+   && <round_saeonly_mode512bit_condition>"
 {
   if (!flag_finite_math_only || flag_signed_zeros)
     {
@@ -3160,7 +3295,7 @@
   "TARGET_SSE
    && !(MEM_P (operands[1]) && MEM_P (operands[2]))
    && <mask_mode512bit_condition>
-   && <round_saeonly_mode_condition>"
+   && <round_saeonly_mode512bit_condition>"
   "@
    <maxmin_float><ssemodesuffix>\t{%2, %0|%0, %2}
    v<maxmin_float><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
@@ -3226,7 +3361,7 @@
      u = UNSPEC_IEEE_MAX;
 
    if (MEM_P (operands[2]))
-     force_reg (<MODE>mode, operands[2]);
+     operands[2] = force_reg (<MODE>mode, operands[2]);
    rtvec v = gen_rtvec (2, operands[2], operands[1]);
    rtx tmp = gen_rtx_UNSPEC (<MODE>mode, v, u);
    emit_move_insn (operands[0], tmp);
@@ -3247,7 +3382,7 @@
 	  IEEE_MAXMIN))]
   "TARGET_SSE
    && <mask_mode512bit_condition>
-   && <round_saeonly_mode_condition>"
+   && <round_saeonly_mode512bit_condition>"
   "@
    <ieee_maxmin><ssemodesuffix>\t{%2, %0|%0, %2}
    v<ieee_maxmin><ssemodesuffix>\t{<round_saeonly_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_saeonly_mask_op3>}"
@@ -3290,7 +3425,27 @@
 	   (const_string "*")))
    (set_attr "mode" "<ssescalarmode>")])
 
-(define_insn "<sse>_vm<code><mode>3<mask_scalar_name><round_saeonly_scalar_name>"
+(define_expand "<sse>_vm<code><mode>3<mask_scalar_name><round_saeonly_scalar_name>"
+  [(set (match_operand:VFH_128 0 "register_operand")
+	(vec_merge:VFH_128
+	  (smaxmin:VFH_128
+	    (match_operand:VFH_128 1 "register_operand")
+	    (match_operand:VFH_128 2 "nonimmediate_operand"))
+	 (match_dup 1)
+	 (const_int 1)))]
+  "TARGET_SSE"
+{
+  if (!flag_finite_math_only || flag_signed_zeros)
+    {
+      emit_insn (gen_<sse>_ieee_vm<maxmin_float><mode>3<mask_scalar_name><round_saeonly_scalar_name>
+		 (operands[0], operands[1], operands[2]
+		  <mask_scalar_operand_arg34>
+		  <round_saeonly_scalar_mask_arg3>));
+      DONE;
+    }
+})
+
+(define_insn "*<sse>_vm<code><mode>3<mask_scalar_name><round_saeonly_scalar_name>"
   [(set (match_operand:VFH_128 0 "register_operand" "=x,v")
 	(vec_merge:VFH_128
 	  (smaxmin:VFH_128
@@ -3308,6 +3463,25 @@
    (set_attr "prefix" "<round_saeonly_scalar_prefix>")
    (set_attr "mode" "<ssescalarmode>")])
 
+(define_insn "<sse>_ieee_vm<ieee_maxmin><mode>3<mask_scalar_name><round_saeonly_scalar_name>"
+  [(set (match_operand:VFH_128 0 "register_operand" "=x,v")
+	(vec_merge:VFH_128
+	  (unspec:VFH_128
+	    [(match_operand:VFH_128 1 "register_operand" "0,v")
+	     (match_operand:VFH_128 2 "nonimmediate_operand" "xm,<round_saeonly_scalar_constraint>")]
+	    IEEE_MAXMIN)
+	 (match_dup 1)
+	 (const_int 1)))]
+  "TARGET_SSE"
+  "@
+   <ieee_maxmin><ssescalarmodesuffix>\t{%2, %0|%0, %<iptr>2}
+   v<ieee_maxmin><ssescalarmodesuffix>\t{<round_saeonly_scalar_mask_op3>%2, %1, %0<mask_scalar_operand3>|%0<mask_scalar_operand3>, %1, %<iptr>2<round_saeonly_scalar_mask_op3>}"
+  [(set_attr "isa" "noavx,avx")
+   (set_attr "type" "sse")
+   (set_attr "btver2_sse_attr" "maxmin")
+   (set_attr "prefix" "<round_saeonly_scalar_prefix>")
+   (set_attr "mode" "<ssescalarmode>")])
+
 (define_mode_attr addsub_cst [(V4DF "5") (V2DF "1")
                               (V4SF "5") (V8SF "85")])
 
@@ -3709,15 +3883,12 @@
 (define_mode_iterator REDUC_PLUS_MODE
  [(V4DF "TARGET_AVX") (V8SF "TARGET_AVX")
   (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
-  (V8DF "TARGET_AVX512F && TARGET_EVEX512")
-  (V16SF "TARGET_AVX512F && TARGET_EVEX512")
-  (V32HF "TARGET_AVX512FP16 && TARGET_AVX512VL && TARGET_EVEX512")
+  (V8DF "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
+  (V32HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
   (V32QI "TARGET_AVX") (V16HI "TARGET_AVX")
   (V8SI "TARGET_AVX")  (V4DI "TARGET_AVX")
-  (V64QI "TARGET_AVX512F && TARGET_EVEX512")
-  (V32HI "TARGET_AVX512F && TARGET_EVEX512")
-  (V16SI "TARGET_AVX512F && TARGET_EVEX512")
-  (V8DI "TARGET_AVX512F && TARGET_EVEX512")])
+  (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F")
+  (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")])
 
 (define_expand "reduc_plus_scal_<mode>"
  [(plus:REDUC_PLUS_MODE
@@ -3760,13 +3931,11 @@
    (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
    (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
    (V8SF "TARGET_AVX") (V4DF "TARGET_AVX")
-   (V64QI "TARGET_AVX512BW && TARGET_EVEX512")
-   (V32HF "TARGET_AVX512FP16 && TARGET_AVX512VL && TARGET_EVEX512")
-   (V32HI "TARGET_AVX512BW && TARGET_EVEX512")
-   (V16SI "TARGET_AVX512F && TARGET_EVEX512")
-   (V8DI "TARGET_AVX512F && TARGET_EVEX512")
-   (V16SF "TARGET_AVX512F && TARGET_EVEX512")
-   (V8DF "TARGET_AVX512F && TARGET_EVEX512")])
+   (V64QI "TARGET_AVX512BW")
+   (V32HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
+   (V32HI "TARGET_AVX512BW") (V16SI "TARGET_AVX512F")
+   (V8DI "TARGET_AVX512F") (V16SF "TARGET_AVX512F")
+   (V8DF "TARGET_AVX512F")])
 
 (define_expand "reduc_<code>_scal_<mode>"
   [(smaxmin:REDUC_SMINMAX_MODE
@@ -3837,7 +4006,7 @@
 	  [(match_operand:VFH_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
 	   (match_operand:SI 2 "const_0_to_255_operand")]
 	  UNSPEC_REDUCE))]
-  "(TARGET_AVX512DQ || (VALID_AVX512FP16_REG_MODE (<MODE>mode))) && <round_saeonly_mode_condition>"
+  "TARGET_AVX512DQ || (VALID_AVX512FP16_REG_MODE (<MODE>mode))"
   "vreduce<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
   [(set_attr "type" "sse")
    (set_attr "prefix" "evex")
@@ -3875,10 +4044,8 @@
 (define_mode_iterator REDUC_ANY_LOGIC_MODE
  [(V32QI "TARGET_AVX") (V16HI "TARGET_AVX")
   (V8SI "TARGET_AVX")  (V4DI "TARGET_AVX")
-  (V64QI "TARGET_AVX512F && TARGET_EVEX512")
-  (V32HI "TARGET_AVX512F && TARGET_EVEX512")
-  (V16SI "TARGET_AVX512F && TARGET_EVEX512")
-  (V8DI "TARGET_AVX512F && TARGET_EVEX512")])
+  (V64QI "TARGET_AVX512F") (V32HI "TARGET_AVX512F")
+  (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")])
 
 (define_expand "reduc_<code>_scal_<mode>"
  [(any_logic:REDUC_ANY_LOGIC_MODE
@@ -4207,7 +4374,7 @@
 	   (match_operand:V48H_AVX512VL 2 "nonimmediate_operand" "<round_saeonly_constraint>")
 	   (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
 	  UNSPEC_PCMP))]
-  "TARGET_AVX512F && <round_saeonly_mode_condition>"
+  "TARGET_AVX512F && <round_saeonly_mode512bit_condition>"
   "v<ssecmpintprefix>cmp<ssemodesuffix>\t{%3, <round_saeonly_mask_scalar_merge_op4>%2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2<round_saeonly_mask_scalar_merge_op4>, %3}"
   [(set_attr "type" "ssecmp")
    (set_attr "length_immediate" "1")
@@ -4216,32 +4383,19 @@
 
 ;; Since vpcmpd implicitly clear the upper bits of dest, transform
 ;; vpcmpd + zero_extend to vpcmpd since the instruction
-(define_insn_and_split "*<avx512>_cmp<V48H_AVX512VL:mode>3_zero_extend<SWI248x:mode>"
-  [(set (match_operand:SWI248x 0 "register_operand")
+(define_insn "*<avx512>_cmp<V48H_AVX512VL:mode>3_zero_extend<SWI248x:mode>"
+  [(set (match_operand:SWI248x 0 "register_operand" "=k")
 	(zero_extend:SWI248x
 	  (unspec:<V48H_AVX512VL:avx512fmaskmode>
-	    [(match_operand:V48H_AVX512VL 1 "nonimmediate_operand")
-	     (match_operand:V48H_AVX512VL 2 "nonimmediate_operand")
-	     (match_operand:SI 3 "const_0_to_7_operand")]
+	    [(match_operand:V48H_AVX512VL 1 "nonimmediate_operand" "v")
+	     (match_operand:V48H_AVX512VL 2 "nonimmediate_operand" "vm")
+	     (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
 	    UNSPEC_PCMP)))]
   "TARGET_AVX512F
    && (!VALID_MASK_AVX512BW_MODE (<SWI248x:MODE>mode) || TARGET_AVX512BW)
-   && ix86_pre_reload_split ()
    && (GET_MODE_NUNITS (<V48H_AVX512VL:MODE>mode)
       < GET_MODE_PRECISION (<SWI248x:MODE>mode))"
-  "#"
-  "&& 1"
-  [(set (match_dup 0)
-	(unspec:<V48H_AVX512VL:avx512fmaskmode>
-	  [(match_dup 1)
-	   (match_dup 2)
-	   (match_dup 3)]
-	  UNSPEC_PCMP))]
-{
-  operands[1] = force_reg (<V48H_AVX512VL:MODE>mode, operands[1]);
-  operands[0] = lowpart_subreg (<V48H_AVX512VL:avx512fmaskmode>mode,
-				 operands[0], <SWI248x:MODE>mode);
-}
+  "v<ssecmpintprefix>cmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssecmp")
    (set_attr "length_immediate" "1")
    (set_attr "prefix" "evex")
@@ -4253,7 +4407,7 @@
 	  (unspec:<V48H_AVX512VL:avx512fmaskmode>
 	    [(match_operand:V48H_AVX512VL 1 "nonimmediate_operand")
 	     (match_operand:V48H_AVX512VL 2 "nonimmediate_operand")
-	     (match_operand:SI 3 "const_0_to_7_operand")]
+	     (match_operand:SI 3 "<cmp_imm_predicate>")]
 	    UNSPEC_PCMP)))
    (set (match_operand:<V48H_AVX512VL:avx512fmaskmode> 4 "register_operand")
 	(unspec:<V48H_AVX512VL:avx512fmaskmode>
@@ -4269,21 +4423,22 @@
   "#"
   "&& 1"
   [(set (match_dup 0)
-	(unspec:<V48H_AVX512VL:avx512fmaskmode>
-	  [(match_dup 1)
-	   (match_dup 2)
-	   (match_dup 3)]
-	  UNSPEC_PCMP))
-   (set (match_dup 4) (match_dup 0))]
+    (zero_extend:SWI248x
+	  (unspec:<V48H_AVX512VL:avx512fmaskmode>
+	    [(match_dup 1)
+	     (match_dup 2)
+	     (match_dup 3)]
+	    UNSPEC_PCMP)))
+   (set (match_dup 4) (match_dup 5))]
 {
-  operands[1] = force_reg (<V48H_AVX512VL:MODE>mode, operands[1]);
-  operands[0] = lowpart_subreg (<V48H_AVX512VL:avx512fmaskmode>mode,
+  operands[5] = lowpart_subreg (<V48H_AVX512VL:avx512fmaskmode>mode,
 				operands[0], <SWI248x:MODE>mode);
-}
-  [(set_attr "type" "ssecmp")
-   (set_attr "length_immediate" "1")
-   (set_attr "prefix" "evex")
-   (set_attr "mode" "<V48H_AVX512VL:sseinsnmode>")])
+  if (SUBREG_P (operands[5]))
+    {
+      SUBREG_PROMOTED_VAR_P (operands[5]) = 1;
+      SUBREG_PROMOTED_SET (operands[5], 1);
+    }
+})
 
 (define_insn_and_split "*<avx512>_cmp<mode>3"
   [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
@@ -4293,7 +4448,8 @@
 	     (match_operand:V48H_AVX512VL 2 "nonimmediate_operand")
 	     (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
 	    UNSPEC_PCMP)))]
-  "TARGET_AVX512F && ix86_pre_reload_split ()"
+  "TARGET_AVX512F && GET_MODE_NUNITS (<MODE>mode) >= 8
+   && ix86_pre_reload_split ()"
   "#"
   "&& 1"
   [(set (match_dup 0)
@@ -4304,6 +4460,70 @@
 	   UNSPEC_PCMP))]
   "operands[4] = GEN_INT (INTVAL (operands[3]) ^ 4);")
 
+(define_insn "*<avx512>_cmp<mode>3_and15"
+  [(set (match_operand:QI 0 "register_operand" "=k")
+	(and:QI
+	  (unspec:QI
+	    [(match_operand:V48_AVX512VL_4 1 "nonimmediate_operand" "v")
+	     (match_operand:V48_AVX512VL_4 2 "nonimmediate_operand" "vm")
+	     (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
+	    UNSPEC_PCMP)
+	  (const_int 15)))]
+  "TARGET_AVX512F"
+  "v<ssecmpintprefix>cmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "*<avx512>_ucmp<mode>3_and15"
+  [(set (match_operand:QI 0 "register_operand" "=k")
+	(and:QI
+	  (unspec:QI
+	    [(match_operand:VI48_AVX512VL_4 1 "nonimmediate_operand" "v")
+	     (match_operand:VI48_AVX512VL_4 2 "nonimmediate_operand" "vm")
+	     (match_operand:SI 3 "const_0_to_7_operand" "n")]
+	    UNSPEC_UNSIGNED_PCMP)
+	  (const_int 15)))]
+  "TARGET_AVX512F"
+  "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "*<avx512>_cmp<mode>3_and3"
+  [(set (match_operand:QI 0 "register_operand" "=k")
+	(and:QI
+	  (unspec:QI
+	    [(match_operand:V8_AVX512VL_2 1 "nonimmediate_operand" "v")
+	     (match_operand:V8_AVX512VL_2 2 "nonimmediate_operand" "vm")
+	     (match_operand:SI 3 "<cmp_imm_predicate>" "n")]
+	    UNSPEC_PCMP)
+	  (const_int 3)))]
+  "TARGET_AVX512F"
+  "v<ssecmpintprefix>cmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "<sseinsnmode>")])
+
+(define_insn "*avx512vl_ucmpv2di3_and3"
+  [(set (match_operand:QI 0 "register_operand" "=k")
+	(and:QI
+	  (unspec:QI
+	    [(match_operand:V2DI 1 "nonimmediate_operand" "v")
+	     (match_operand:V2DI 2 "nonimmediate_operand" "vm")
+	     (match_operand:SI 3 "const_0_to_7_operand" "n")]
+	    UNSPEC_UNSIGNED_PCMP)
+	  (const_int 3)))]
+  "TARGET_AVX512F"
+  "vpcmpuq\t{%3, %2, %1, %0|%0, %1, %2, %3}"
+  [(set_attr "type" "ssecmp")
+   (set_attr "length_immediate" "1")
+   (set_attr "prefix" "evex")
+   (set_attr "mode" "TI")])
+
 (define_insn "<avx512>_cmp<mode>3<mask_scalar_merge_name>"
   [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
 	(unspec:<avx512fmaskmode>
@@ -4318,31 +4538,18 @@
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn_and_split "*<avx512>_cmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>"
-  [(set (match_operand:SWI248x 0 "register_operand")
+(define_insn "*<avx512>_cmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>"
+  [(set (match_operand:SWI248x 0 "register_operand" "=k")
 	(zero_extend:SWI248x
 	  (unspec:<VI12_AVX512VL:avx512fmaskmode>
-	    [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
-	     (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")
-	     (match_operand:SI 3 "const_0_to_7_operand")]
+	    [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "v")
+	     (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
+	     (match_operand:SI 3 "const_0_to_7_operand" "n")]
 	    UNSPEC_PCMP)))]
   "TARGET_AVX512BW
-  && ix86_pre_reload_split ()
-  && (GET_MODE_NUNITS (<VI12_AVX512VL:MODE>mode)
-      < GET_MODE_PRECISION (<SWI248x:MODE>mode))"
-  "#"
-  "&& 1"
-  [(set (match_dup 0)
-	(unspec:<VI12_AVX512VL:avx512fmaskmode>
-	  [(match_dup 1)
-	   (match_dup 2)
-	   (match_dup 3)]
-	  UNSPEC_PCMP))]
-{
-  operands[1] = force_reg (<VI12_AVX512VL:MODE>mode, operands[1]);
-  operands[0] = lowpart_subreg (<VI12_AVX512VL:avx512fmaskmode>mode,
-				operands[0], <SWI248x:MODE>mode);
-}
+   && (GET_MODE_NUNITS (<VI12_AVX512VL:MODE>mode)
+       < GET_MODE_PRECISION (<SWI248x:MODE>mode))"
+  "vpcmp<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssecmp")
    (set_attr "length_immediate" "1")
    (set_attr "prefix" "evex")
@@ -4369,16 +4576,21 @@
   "#"
   "&& 1"
   [(set (match_dup 0)
-	(unspec:<VI12_AVX512VL:avx512fmaskmode>
-	  [(match_dup 1)
-	   (match_dup 2)
-	   (match_dup 3)]
-	  UNSPEC_PCMP))
-   (set (match_dup 4) (match_dup 0))]
+	(zero_extend:SWI248x
+	  (unspec:<VI12_AVX512VL:avx512fmaskmode>
+	   [(match_dup 1)
+		(match_dup 2)
+		(match_dup 3)]
+	   UNSPEC_PCMP)))
+   (set (match_dup 4) (match_dup 5))]
 {
-  operands[1] = force_reg (<VI12_AVX512VL:MODE>mode, operands[1]);
-  operands[0] = lowpart_subreg (<VI12_AVX512VL:avx512fmaskmode>mode,
+  operands[5] = lowpart_subreg (<VI12_AVX512VL:avx512fmaskmode>mode,
 				operands[0], <SWI248x:MODE>mode);
+  if (SUBREG_P (operands[5]))
+    {
+      SUBREG_PROMOTED_VAR_P (operands[5]) = 1;
+      SUBREG_PROMOTED_SET (operands[5], 1);
+    }
 }
   [(set_attr "type" "ssecmp")
    (set_attr "length_immediate" "1")
@@ -4436,31 +4648,18 @@
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn_and_split "*<avx512>_ucmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>"
-  [(set (match_operand:SWI248x 0 "register_operand")
+(define_insn "*<avx512>_ucmp<VI12_AVX512VL:mode>3_zero_extend<SWI248x:mode>"
+  [(set (match_operand:SWI248x 0 "register_operand" "=k")
 	(zero_extend:SWI248x
 	  (unspec:<VI12_AVX512VL:avx512fmaskmode>
-	    [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand")
-	     (match_operand:VI12_AVX512VL 2 "nonimmediate_operand")
-	     (match_operand:SI 3 "const_0_to_7_operand")]
+	    [(match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "v")
+	     (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")
+	     (match_operand:SI 3 "const_0_to_7_operand" "n")]
 	    UNSPEC_UNSIGNED_PCMP)))]
   "TARGET_AVX512BW
-  && ix86_pre_reload_split ()
   && (GET_MODE_NUNITS (<VI12_AVX512VL:MODE>mode)
       < GET_MODE_PRECISION (<SWI248x:MODE>mode))"
-  "#"
-  "&& 1"
-  [(set (match_dup 0)
-	(unspec:<VI12_AVX512VL:avx512fmaskmode>
-	  [(match_dup 1)
-	   (match_dup 2)
-	   (match_dup 3)]
-	  UNSPEC_UNSIGNED_PCMP))]
-{
-  operands[1] = force_reg (<VI12_AVX512VL:MODE>mode, operands[1]);
-  operands[0] = lowpart_subreg (<VI12_AVX512VL:avx512fmaskmode>mode,
-				operands[0], <SWI248x:MODE>mode);
-}
+  "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssecmp")
    (set_attr "length_immediate" "1")
    (set_attr "prefix" "evex")
@@ -4488,16 +4687,21 @@
   "#"
   "&& 1"
   [(set (match_dup 0)
-	(unspec:<VI12_AVX512VL:avx512fmaskmode>
-	  [(match_dup 1)
-	   (match_dup 2)
-	   (match_dup 3)]
-	  UNSPEC_UNSIGNED_PCMP))
-   (set (match_dup 4) (match_dup 0))]
-{
-  operands[1] = force_reg (<VI12_AVX512VL:MODE>mode, operands[1]);
-  operands[0] = lowpart_subreg (<VI12_AVX512VL:avx512fmaskmode>mode,
+	(zero_extend:SWI248x
+	 (unspec:<VI12_AVX512VL:avx512fmaskmode>
+	   [(match_dup 1)
+		(match_dup 2)
+		(match_dup 3)]
+	   UNSPEC_UNSIGNED_PCMP)))
+   (set (match_dup 4) (match_dup 5))]
+{
+  operands[5] = lowpart_subreg (<VI12_AVX512VL:avx512fmaskmode>mode,
 				operands[0], <SWI248x:MODE>mode);
+  if (SUBREG_P (operands[5]))
+    {
+      SUBREG_PROMOTED_VAR_P (operands[5]) = 1;
+      SUBREG_PROMOTED_SET (operands[5], 1);
+    }
 }
   [(set_attr "type" "ssecmp")
    (set_attr "length_immediate" "1")
@@ -4533,32 +4737,19 @@
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn_and_split "*<avx512>_ucmp<VI48_AVX512VL:mode>3_zero_extend<SWI248x:mode>"
-  [(set (match_operand:SWI248x 0 "register_operand")
+(define_insn "*<avx512>_ucmp<VI48_AVX512VL:mode>3_zero_extend<SWI248x:mode>"
+  [(set (match_operand:SWI248x 0 "register_operand" "=k")
 	(zero_extend:SWI248x
 	  (unspec:<VI48_AVX512VL:avx512fmaskmode>
-	    [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
-	     (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")
-	     (match_operand:SI 3 "const_0_to_7_operand")]
+	    [(match_operand:VI48_AVX512VL 1 "nonimmediate_operand" "v")
+	     (match_operand:VI48_AVX512VL 2 "nonimmediate_operand" "vm")
+	     (match_operand:SI 3 "const_0_to_7_operand" "n")]
 	    UNSPEC_UNSIGNED_PCMP)))]
   "TARGET_AVX512F
    && (!VALID_MASK_AVX512BW_MODE (<SWI248x:MODE>mode) || TARGET_AVX512BW)
-   && ix86_pre_reload_split ()
    && (GET_MODE_NUNITS (<VI48_AVX512VL:MODE>mode)
       < GET_MODE_PRECISION (<SWI248x:MODE>mode))"
-  "#"
-  "&& 1"
-  [(set (match_dup 0)
-	(unspec:<VI48_AVX512VL:avx512fmaskmode>
-	  [(match_dup 1)
-	   (match_dup 2)
-	   (match_dup 3)]
-	  UNSPEC_UNSIGNED_PCMP))]
-{
-  operands[1] = force_reg (<VI48_AVX512VL:MODE>mode, operands[1]);
-  operands[0] = lowpart_subreg (<VI48_AVX512VL:avx512fmaskmode>mode,
-				operands[0], <SWI248x:MODE>mode);
-}
+  "vpcmpu<ssemodesuffix>\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssecmp")
    (set_attr "length_immediate" "1")
    (set_attr "prefix" "evex")
@@ -4586,16 +4777,21 @@
   "#"
   "&& 1"
   [(set (match_dup 0)
-	(unspec:<VI48_AVX512VL:avx512fmaskmode>
-	  [(match_dup 1)
-	   (match_dup 2)
-	   (match_dup 3)]
-	  UNSPEC_UNSIGNED_PCMP))
-   (set (match_dup 4) (match_dup 0))]
-{
-  operands[1] = force_reg (<VI48_AVX512VL:MODE>mode, operands[1]);
-  operands[0] = lowpart_subreg (<VI48_AVX512VL:avx512fmaskmode>mode,
+	(zero_extend:SWI248x
+	 (unspec:<VI48_AVX512VL:avx512fmaskmode>
+	   [(match_dup 1)
+		(match_dup 2)
+		(match_dup 3)]
+	   UNSPEC_UNSIGNED_PCMP)))
+   (set (match_dup 4) (match_dup 5))]
+{
+  operands[5] = lowpart_subreg (<VI48_AVX512VL:avx512fmaskmode>mode,
 				operands[0], <SWI248x:MODE>mode);
+  if (SUBREG_P (operands[5]))
+    {
+      SUBREG_PROMOTED_VAR_P (operands[5]) = 1;
+      SUBREG_PROMOTED_SET (operands[5], 1);
+    }
 }
   [(set_attr "type" "ssecmp")
    (set_attr "length_immediate" "1")
@@ -4610,7 +4806,8 @@
 	     (match_operand:VI48_AVX512VL 2 "nonimmediate_operand")
 	     (match_operand:SI 3 "const_0_to_7_operand")]
 	    UNSPEC_UNSIGNED_PCMP)))]
-  "TARGET_AVX512F && ix86_pre_reload_split ()"
+  "TARGET_AVX512F && ix86_pre_reload_split ()
+   && GET_MODE_NUNITS (<MODE>mode) >= 8"
   "#"
   "&& 1"
   [(set (match_dup 0)
@@ -4702,7 +4899,7 @@
 	     (match_operand:<ssevecmode> 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
 	     (parallel [(const_int 0)]))]
 	  UNSPEC_COMX))]
-  "TARGET_AVX10_2_256"
+  "TARGET_AVX10_2"
   "v<unord>comx<ssemodesuffix>\t{<round_saeonly_op2>%1, %0|%0, %<iptr>1<round_saeonly_op2>}"
   [(set_attr "type" "ssecomi")
    (set_attr "prefix" "evex")
@@ -4728,6 +4925,20 @@
 		      (const_string "0")))
    (set_attr "mode" "<MODE>")])
 
+(define_insn "avx10_2_comisbf16_v8bf"
+  [(set (reg:CCFP FLAGS_REG)
+	(compare:CCFP
+	  (vec_select:BF
+	    (match_operand:V8BF 0 "register_operand" "v")
+	    (parallel [(const_int 0)]))
+	  (vec_select:BF
+	    (match_operand:V8BF 1 "nonimmediate_operand" "vm")
+	    (parallel [(const_int 0)]))))]
+  "TARGET_AVX10_2"
+  "vcomisbf16\t{%1, %0|%0, %1}"
+  [(set_attr "prefix" "evex")
+   (set_attr "type" "ssecomi")])
+
 (define_expand "vec_cmp<mode><avx512fmaskmodelower>"
   [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
 	(match_operator:<avx512fmaskmode> 1 ""
@@ -4754,6 +4965,19 @@
   DONE;
 })
 
+(define_expand "vec_cmp<mode><avx512fmaskmodelower>"
+  [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
+	(match_operator:<avx512fmaskmode> 1 ""
+	  [(match_operand:VBF 2 "register_operand")
+	   (match_operand:VBF 3 "nonimmediate_operand")]))]
+  "TARGET_AVX10_2"
+{
+  bool ok = ix86_expand_mask_vec_cmp (operands[0], GET_CODE (operands[1]),
+				      operands[2], operands[3]);
+  gcc_assert (ok);
+  DONE;
+})
+
 (define_expand "vec_cmp<mode><sseintvecmodelower>"
   [(set (match_operand:<sseintvecmode> 0 "register_operand")
 	(match_operator:<sseintvecmode> 1 ""
@@ -4963,7 +5187,7 @@
 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
   [(set (match_operand:VI_256_AVX2 0 "register_operand")
 	(vec_merge:VI_256_AVX2
-	  (match_operand:VI_256_AVX2 1 "nonimmediate_operand")
+	  (match_operand:VI_256_AVX2 1 "nonimm_or_0_or_1s_operand")
 	  (match_operand:VI_256_AVX2 2 "nonimm_or_0_operand")
 	  (match_operand:<sseintvecmode> 3 "register_operand")))]
   "TARGET_AVX"
@@ -4976,7 +5200,7 @@
 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
   [(set (match_operand:VI_128 0 "register_operand")
 	(vec_merge:VI_128
-	  (match_operand:VI_128 1 "vector_operand")
+	  (match_operand:VI_128 1 "vector_or_0_or_1s_operand")
 	  (match_operand:VI_128 2 "nonimm_or_0_operand")
 	  (match_operand:<sseintvecmode> 3 "register_operand")))]
   "TARGET_SSE2"
@@ -4989,7 +5213,7 @@
 (define_expand "vcond_mask_v1tiv1ti"
   [(set (match_operand:V1TI 0 "register_operand")
 	(vec_merge:V1TI
-	  (match_operand:V1TI 1 "vector_operand")
+	  (match_operand:V1TI 1 "vector_or_0_or_1s_operand")
 	  (match_operand:V1TI 2 "nonimm_or_0_operand")
 	  (match_operand:V1TI 3 "register_operand")))]
   "TARGET_SSE2"
@@ -5002,7 +5226,7 @@
 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
   [(set (match_operand:VF_256 0 "register_operand")
 	(vec_merge:VF_256
-	  (match_operand:VF_256 1 "nonimmediate_operand")
+	  (match_operand:VF_256 1 "nonimm_or_0_or_1s_operand")
 	  (match_operand:VF_256 2 "nonimm_or_0_operand")
 	  (match_operand:<sseintvecmode> 3 "register_operand")))]
   "TARGET_AVX"
@@ -5015,7 +5239,7 @@
 (define_expand "vcond_mask_<mode><sseintvecmodelower>"
   [(set (match_operand:VF_128 0 "register_operand")
 	(vec_merge:VF_128
-	  (match_operand:VF_128 1 "vector_operand")
+	  (match_operand:VF_128 1 "vector_or_0_or_1s_operand")
 	  (match_operand:VF_128 2 "nonimm_or_0_operand")
 	  (match_operand:<sseintvecmode> 3 "register_operand")))]
   "TARGET_SSE"
@@ -5394,7 +5618,7 @@
   output_asm_insn (buf, operands);
   return "";
 }
-  [(set_attr "isa" "noavx,avx,avx512vl,avx512f_512")
+  [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
    (set_attr "type" "sselog")
    (set_attr "prefix" "orig,vex,evex,evex")
    (set (attr "mode")
@@ -5451,7 +5675,7 @@
   output_asm_insn (buf, operands);
   return "";
 }
-  [(set_attr "isa" "noavx,avx_noavx512vl,avx512vl,avx512f_512")
+  [(set_attr "isa" "noavx,avx_noavx512f,avx512vl,avx512f")
    (set_attr "addr" "*,gpr16,*,*")
    (set_attr "type" "sselog")
    (set (attr "prefix_data16")
@@ -5524,7 +5748,7 @@
   output_asm_insn (buf, operands);
   return "";
 }
-  [(set_attr "isa" "noavx,avx,avx512vl,avx512f_512")
+  [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
    (set_attr "type" "sselog")
    (set_attr "prefix" "orig,vex,evex,evex")
    (set (attr "mode")
@@ -5586,7 +5810,7 @@
   output_asm_insn (buf, operands);
   return "";
 }
-  [(set_attr "isa" "noavx,avx,avx512vl,avx512f_512")
+  [(set_attr "isa" "noavx,avx,avx512vl,avx512f")
    (set_attr "type" "sselog")
    (set (attr "prefix_data16")
      (if_then_else
@@ -5632,12 +5856,10 @@
    (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
    (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
    (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
-   (V16SF "TARGET_AVX512F && TARGET_EVEX512")
-   (V8DF "TARGET_AVX512F && TARGET_EVEX512")
-   (HF "TARGET_AVX512FP16")
-   (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
-   (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
-   (V32HF "TARGET_AVX512FP16 && TARGET_EVEX512")])
+   (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
+   (HF "TARGET_AVX512FP16") (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
+   (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL") (V32HF "TARGET_AVX512FP16")
+   (V8BF "TARGET_AVX10_2") (V16BF "TARGET_AVX10_2") (V32BF "TARGET_AVX10_2")])
 
 (define_expand "fma<mode>4"
   [(set (match_operand:FMAMODEM 0 "register_operand")
@@ -5675,8 +5897,7 @@
   (V2DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
   (V8SF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
   (V4DF "TARGET_FMA || TARGET_FMA4 || TARGET_AVX512VL")
-  (V16SF "TARGET_AVX512F && TARGET_EVEX512")
-  (V8DF "TARGET_AVX512F && TARGET_EVEX512")])
+  (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")])
 
 (define_mode_iterator FMAMODE
   [SF DF V4SF V2DF V8SF V4DF])
@@ -5719,7 +5940,7 @@
    (match_operand:VFH_AVX512VL 2 "<round_expand_nimm_predicate>")
    (match_operand:VFH_AVX512VL 3 "<round_expand_nimm_predicate>")
    (match_operand:<avx512fmaskmode> 4 "register_operand")]
-  "TARGET_AVX512F && <round_mode_condition>"
+  "TARGET_AVX512F && <round_mode512bit_condition>"
 {
   emit_insn (gen_fma_fmadd_<mode>_maskz_1<round_expand_name> (
     operands[0], operands[1], operands[2], operands[3],
@@ -5746,14 +5967,12 @@
 
 ;; Suppose AVX-512F as baseline
 (define_mode_iterator VFH_SF_AVX512VL
-  [(V32HF "TARGET_AVX512FP16 && TARGET_EVEX512")
+  [(V32HF "TARGET_AVX512FP16")
    (V16HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
    (V8HF "TARGET_AVX512FP16 && TARGET_AVX512VL")
    (HF "TARGET_AVX512FP16")
-   SF (V16SF "TARGET_EVEX512")
-   (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
-   DF (V8DF "TARGET_EVEX512")
-   (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
+   SF V16SF (V8SF "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
+   DF V8DF (V4DF "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")])
 
 (define_insn "<sd_mask_codefor>fma_fmadd_<mode><sd_maskz_name><round_name>"
   [(set (match_operand:VFH_SF_AVX512VL 0 "register_operand" "=v,v,v")
@@ -5761,7 +5980,7 @@
 	  (match_operand:VFH_SF_AVX512VL 1 "<bcst_round_nimm_predicate>" "%0,0,v")
 	  (match_operand:VFH_SF_AVX512VL 2 "<bcst_round_nimm_predicate>" "<bcst_round_constraint>,v,<bcst_round_constraint>")
 	  (match_operand:VFH_SF_AVX512VL 3 "<bcst_round_nimm_predicate>" "v,<bcst_round_constraint>,0")))]
-  "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode_condition>"
+  "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
   "@
    vfmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
    vfmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
@@ -5797,12 +6016,12 @@
   [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v,v")
 	(vec_merge:VFH_AVX512VL
 	  (fma:VFH_AVX512VL
-	    (match_operand:VFH_AVX512VL 1 "register_operand" "0,0")
+	    (match_operand:VFH_AVX512VL 1 "nonimmediate_operand" "0,0")
 	    (match_operand:VFH_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
 	    (match_operand:VFH_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>"))
 	  (match_dup 1)
 	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
-  "TARGET_AVX512F && <round_mode_condition>"
+  "TARGET_AVX512F && <round_mode512bit_condition>"
   "@
    vfmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
    vfmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
@@ -5816,10 +6035,10 @@
 	  (fma:VFH_AVX512VL
 	    (match_operand:VFH_AVX512VL 1 "<round_nimm_predicate>" "%v")
 	    (match_operand:VFH_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
-	    (match_operand:VFH_AVX512VL 3 "register_operand" "0"))
+	    (match_operand:VFH_AVX512VL 3 "nonimmediate_operand" "0"))
 	  (match_dup 3)
 	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
-  "TARGET_AVX512F && <round_mode_condition>"
+  "TARGET_AVX512F"
   "vfmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
   [(set_attr "type" "ssemuladd")
    (set_attr "prefix" "evex")
@@ -5849,7 +6068,7 @@
    (match_operand:VFH_AVX512VL 2 "<round_expand_nimm_predicate>")
    (match_operand:VFH_AVX512VL 3 "<round_expand_nimm_predicate>")
    (match_operand:<avx512fmaskmode> 4 "register_operand")]
-  "TARGET_AVX512F && <round_mode_condition>"
+  "TARGET_AVX512F && <round_mode512bit_condition>"
 {
   emit_insn (gen_fma_fmsub_<mode>_maskz_1<round_expand_name> (
     operands[0], operands[1], operands[2], operands[3],
@@ -5864,7 +6083,7 @@
 	  (match_operand:VFH_SF_AVX512VL   2 "<bcst_round_nimm_predicate>" "<bcst_round_constraint>,v,<bcst_round_constraint>")
 	  (neg:VFH_SF_AVX512VL
 	    (match_operand:VFH_SF_AVX512VL 3 "<bcst_round_nimm_predicate>" "v,<bcst_round_constraint>,0"))))]
-  "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode_condition>"
+  "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
   "@
    vfmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
    vfmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
@@ -5901,13 +6120,13 @@
   [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v,v")
 	(vec_merge:VFH_AVX512VL
 	  (fma:VFH_AVX512VL
-	    (match_operand:VFH_AVX512VL 1 "register_operand" "0,0")
+	    (match_operand:VFH_AVX512VL 1 "nonimmediate_operand" "0,0")
 	    (match_operand:VFH_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
 	    (neg:VFH_AVX512VL
 	      (match_operand:VFH_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>")))
 	  (match_dup 1)
 	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
-  "TARGET_AVX512F && <round_mode_condition>"
+  "TARGET_AVX512F"
   "@
    vfmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
    vfmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
@@ -5922,10 +6141,10 @@
 	    (match_operand:VFH_AVX512VL 1 "<round_nimm_predicate>" "%v")
 	    (match_operand:VFH_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
 	    (neg:VFH_AVX512VL
-	      (match_operand:VFH_AVX512VL 3 "register_operand" "0")))
+	      (match_operand:VFH_AVX512VL 3 "nonimmediate_operand" "0")))
 	  (match_dup 3)
 	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
-  "TARGET_AVX512F && <round_mode_condition>"
+  "TARGET_AVX512F && <round_mode512bit_condition>"
   "vfmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
   [(set_attr "type" "ssemuladd")
    (set_attr "prefix" "evex")
@@ -5955,7 +6174,7 @@
    (match_operand:VFH_AVX512VL 2 "<round_expand_nimm_predicate>")
    (match_operand:VFH_AVX512VL 3 "<round_expand_nimm_predicate>")
    (match_operand:<avx512fmaskmode> 4 "register_operand")]
-  "TARGET_AVX512F && <round_mode_condition>"
+  "TARGET_AVX512F && <round_mode512bit_condition>"
 {
   emit_insn (gen_fma_fnmadd_<mode>_maskz_1<round_expand_name> (
     operands[0], operands[1], operands[2], operands[3],
@@ -5970,7 +6189,7 @@
 	    (match_operand:VFH_SF_AVX512VL 1 "<bcst_round_nimm_predicate>" "%0,0,v"))
 	  (match_operand:VFH_SF_AVX512VL   2 "<bcst_round_nimm_predicate>" "<bcst_round_constraint>,v,<bcst_round_constraint>")
 	  (match_operand:VFH_SF_AVX512VL   3 "<bcst_round_nimm_predicate>" "v,<bcst_round_constraint>,0")))]
-  "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode_condition>"
+  "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
   "@
    vfnmadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
    vfnmadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
@@ -6008,12 +6227,12 @@
 	(vec_merge:VFH_AVX512VL
 	  (fma:VFH_AVX512VL
 	    (neg:VFH_AVX512VL
-	      (match_operand:VFH_AVX512VL 1 "register_operand" "0,0"))
+	      (match_operand:VFH_AVX512VL 1 "nonimmediate_operand" "0,0"))
 	    (match_operand:VFH_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
 	    (match_operand:VFH_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>"))
 	  (match_dup 1)
 	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
-  "TARGET_AVX512F && <round_mode_condition>"
+  "TARGET_AVX512F && <round_mode512bit_condition>"
   "@
    vfnmadd132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
    vfnmadd213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
@@ -6028,10 +6247,10 @@
 	    (neg:VFH_AVX512VL
 	      (match_operand:VFH_AVX512VL 1 "<round_nimm_predicate>" "%v"))
 	    (match_operand:VFH_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
-	    (match_operand:VFH_AVX512VL 3 "register_operand" "0"))
+	    (match_operand:VFH_AVX512VL 3 "nonimmediate_operand" "0"))
 	  (match_dup 3)
 	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
-  "TARGET_AVX512F && <round_mode_condition>"
+  "TARGET_AVX512F && <round_mode512bit_condition>"
   "vfnmadd231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
   [(set_attr "type" "ssemuladd")
    (set_attr "prefix" "evex")
@@ -6062,7 +6281,7 @@
    (match_operand:VFH_AVX512VL 2 "<round_expand_nimm_predicate>")
    (match_operand:VFH_AVX512VL 3 "<round_expand_nimm_predicate>")
    (match_operand:<avx512fmaskmode> 4 "register_operand")]
-  "TARGET_AVX512F && <round_mode_condition>"
+  "TARGET_AVX512F && <round_mode512bit_condition>"
 {
   emit_insn (gen_fma_fnmsub_<mode>_maskz_1<round_expand_name> (
     operands[0], operands[1], operands[2], operands[3],
@@ -6078,7 +6297,7 @@
 	  (match_operand:VFH_SF_AVX512VL 2 "<bcst_round_nimm_predicate>" "<bcst_round_constraint>,v,<bcst_round_constraint>")
 	  (neg:VFH_SF_AVX512VL
 	    (match_operand:VFH_SF_AVX512VL 3 "<bcst_round_nimm_predicate>" "v,<bcst_round_constraint>,0"))))]
-  "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode_condition>"
+  "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
   "@
    vfnmsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
    vfnmsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
@@ -6117,13 +6336,13 @@
 	(vec_merge:VFH_AVX512VL
 	  (fma:VFH_AVX512VL
 	    (neg:VFH_AVX512VL
-	      (match_operand:VFH_AVX512VL 1 "register_operand" "0,0"))
+	      (match_operand:VFH_AVX512VL 1 "nonimmediate_operand" "0,0"))
 	    (match_operand:VFH_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
 	    (neg:VFH_AVX512VL
 	      (match_operand:VFH_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>")))
 	  (match_dup 1)
 	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
-  "TARGET_AVX512F && <round_mode_condition>"
+  "TARGET_AVX512F && <round_mode512bit_condition>"
   "@
    vfnmsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
    vfnmsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
@@ -6139,10 +6358,10 @@
 	      (match_operand:VFH_AVX512VL 1 "<round_nimm_predicate>" "%v"))
 	    (match_operand:VFH_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
 	    (neg:VFH_AVX512VL
-	      (match_operand:VFH_AVX512VL 3 "register_operand" "0")))
+	      (match_operand:VFH_AVX512VL 3 "nonimmediate_operand" "0")))
 	  (match_dup 3)
 	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
-  "TARGET_AVX512F && <round_mode_condition>"
+  "TARGET_AVX512F"
   "vfnmsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
   [(set_attr "type" "ssemuladd")
    (set_attr "prefix" "evex")
@@ -6240,7 +6459,7 @@
 	   (match_operand:VFH_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v,<round_constraint>")
 	   (match_operand:VFH_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0")]
 	  UNSPEC_FMADDSUB))]
-  "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode_condition>"
+  "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
   "@
    vfmaddsub132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
    vfmaddsub213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
@@ -6259,7 +6478,7 @@
 	    UNSPEC_FMADDSUB)
 	  (match_dup 1)
 	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
-  "TARGET_AVX512F && <round_mode_condition>"
+  "TARGET_AVX512F"
   "@
    vfmaddsub132<ssemodesuffix>\t{<round_op5>%2, %3, %0%{%4%}|%0%{%4%}, %3, %2<round_op5>}
    vfmaddsub213<ssemodesuffix>\t{<round_op5>%3, %2, %0%{%4%}|%0%{%4%}, %2, %3<round_op5>}"
@@ -6271,13 +6490,13 @@
   [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v")
 	(vec_merge:VFH_AVX512VL
 	  (unspec:VFH_AVX512VL
-	    [(match_operand:VFH_AVX512VL 1 "register_operand" "v")
+	    [(match_operand:VFH_AVX512VL 1 "nonimmediate_operand" "v")
 	     (match_operand:VFH_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
-	     (match_operand:VFH_AVX512VL 3 "register_operand" "0")]
+	     (match_operand:VFH_AVX512VL 3 "nonimmediate_operand" "0")]
 	    UNSPEC_FMADDSUB)
 	  (match_dup 3)
 	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
-  "TARGET_AVX512F && <round_mode_condition>"
+  "TARGET_AVX512F"
   "vfmaddsub231<ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
   [(set_attr "type" "ssemuladd")
    (set_attr "prefix" "evex")
@@ -6310,7 +6529,7 @@
 	   (neg:VFH_AVX512VL
 	     (match_operand:VFH_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>,0"))]
 	  UNSPEC_FMADDSUB))]
-  "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode_condition>"
+  "TARGET_AVX512F && <sd_mask_mode512bit_condition> && <round_mode512bit_condition>"
   "@
    vfmsubadd132<ssemodesuffix>\t{<round_sd_mask_op4>%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2<round_sd_mask_op4>}
    vfmsubadd213<ssemodesuffix>\t{<round_sd_mask_op4>%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3<round_sd_mask_op4>}
@@ -6323,7 +6542,7 @@
   [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v,v")
 	(vec_merge:VFH_AVX512VL
 	  (unspec:VFH_AVX512VL
-	    [(match_operand:VFH_AVX512VL 1 "register_operand" "0,0")
+	    [(match_operand:VFH_AVX512VL 1 "nonimmediate_operand" "0,0")
 	     (match_operand:VFH_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>,v")
 	     (neg:VFH_AVX512VL
 	       (match_operand:VFH_AVX512VL 3 "<round_nimm_predicate>" "v,<round_constraint>"))]
@@ -6342,10 +6561,10 @@
   [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v")
 	(vec_merge:VFH_AVX512VL
 	  (unspec:VFH_AVX512VL
-	    [(match_operand:VFH_AVX512VL 1 "register_operand" "v")
+	    [(match_operand:VFH_AVX512VL 1 "nonimmediate_operand" "v")
 	     (match_operand:VFH_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
 	     (neg:VFH_AVX512VL
-	       (match_operand:VFH_AVX512VL 3 "register_operand" "0"))]
+	       (match_operand:VFH_AVX512VL 3 "nonimmediate_operand" "0"))]
 	    UNSPEC_FMADDSUB)
 	  (match_dup 3)
 	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
@@ -6362,7 +6581,7 @@
   [(set (match_operand:VFH_128 0 "register_operand")
 	(vec_merge:VFH_128
 	  (fma:VFH_128
-	    (match_operand:VFH_128 1 "register_operand")
+	    (match_operand:VFH_128 1 "nonimmediate_operand")
 	    (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>")
 	    (match_operand:VFH_128 3 "<round_nimm_scalar_predicate>"))
 	  (match_dup 1)
@@ -6373,7 +6592,7 @@
   [(set (match_operand:VFH_128 0 "register_operand")
 	(vec_merge:VFH_128
 	  (fma:VFH_128
-	    (match_operand:VFH_128 1 "register_operand")
+	    (match_operand:VFH_128 1 "nonimmediate_operand")
 	    (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>")
 	    (neg:VFH_128
 	      (match_operand:VFH_128 3 "<round_nimm_scalar_predicate>")))
@@ -6386,8 +6605,8 @@
 	(vec_merge:VFH_128
 	  (fma:VFH_128
 	    (neg:VFH_128
-	      (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>"))
-	    (match_operand:VFH_128 1 "register_operand")
+	      (match_operand:VFH_128 1 "nonimmediate_operand"))
+	    (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>")
 	    (match_operand:VFH_128 3 "<round_nimm_scalar_predicate>"))
 	  (match_dup 1)
 	  (const_int 1)))]
@@ -6398,8 +6617,8 @@
 	(vec_merge:VFH_128
 	  (fma:VFH_128
 	    (neg:VFH_128
-	      (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>"))
-	    (match_operand:VFH_128 1 "register_operand")
+	      (match_operand:VFH_128 1 "nonimmediate_operand"))
+	    (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>")
 	    (neg:VFH_128
 	      (match_operand:VFH_128 3 "<round_nimm_scalar_predicate>")))
 	  (match_dup 1)
@@ -6410,7 +6629,7 @@
   [(set (match_operand:VFH_128 0 "register_operand" "=v,v")
         (vec_merge:VFH_128
 	  (fma:VFH_128
-	    (match_operand:VFH_128 1 "register_operand" "0,0")
+	    (match_operand:VFH_128 1 "nonimmediate_operand" "0,0")
 	    (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>, v")
 	    (match_operand:VFH_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
 	  (match_dup 1)
@@ -6427,7 +6646,7 @@
   [(set (match_operand:VFH_128 0 "register_operand" "=v,v")
         (vec_merge:VFH_128
 	  (fma:VFH_128
-	    (match_operand:VFH_128   1 "register_operand" "0,0")
+	    (match_operand:VFH_128   1 "nonimmediate_operand" "0,0")
 	    (match_operand:VFH_128   2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
 	    (neg:VFH_128
 	      (match_operand:VFH_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
@@ -6446,8 +6665,8 @@
         (vec_merge:VFH_128
 	  (fma:VFH_128
 	    (neg:VFH_128
-	      (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
-	    (match_operand:VFH_128   1 "register_operand" "0,0")
+	      (match_operand:VFH_128   1 "nonimmediate_operand" "0,0"))
+	    (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
 	    (match_operand:VFH_128   3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
 	  (match_dup 1)
 	  (const_int 1)))]
@@ -6464,8 +6683,8 @@
         (vec_merge:VFH_128
 	  (fma:VFH_128
 	    (neg:VFH_128
-	      (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
-	    (match_operand:VFH_128   1 "register_operand" "0,0")
+	      (match_operand:VFH_128   1 "nonimmediate_operand" "0,0"))
+	    (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
 	    (neg:VFH_128
 	      (match_operand:VFH_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
 	  (match_dup 1)
@@ -6483,7 +6702,7 @@
 	(vec_merge:VFH_128
 	  (vec_merge:VFH_128
 	    (fma:VFH_128
-	      (match_operand:VFH_128 1 "register_operand" "0,0")
+	      (match_operand:VFH_128 1 "nonimmediate_operand" "0,0")
 	      (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
 	      (match_operand:VFH_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
 	    (match_dup 1)
@@ -6505,7 +6724,7 @@
 	    (fma:VFH_128
 	      (match_operand:VFH_128 1 "<round_nimm_scalar_predicate>" "%v")
 	      (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>")
-	      (match_operand:VFH_128 3 "register_operand" "0"))
+	      (match_operand:VFH_128 3 "nonimmediate_operand" "0"))
 	    (match_dup 3)
 	    (match_operand:QI 4 "register_operand" "Yk"))
 	  (match_dup 3)
@@ -6535,7 +6754,7 @@
 	(vec_merge:VFH_128
 	  (vec_merge:VFH_128
 	    (fma:VFH_128
-	      (match_operand:VFH_128 1 "register_operand" "0,0")
+	      (match_operand:VFH_128 1 "nonimmediate_operand" "0,0")
 	      (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
 	      (match_operand:VFH_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
 	    (match_operand:VFH_128 4 "const0_operand")
@@ -6555,7 +6774,7 @@
 	(vec_merge:VFH_128
 	  (vec_merge:VFH_128
 	    (fma:VFH_128
-	      (match_operand:VFH_128 1 "register_operand" "0,0")
+	      (match_operand:VFH_128 1 "nonimmediate_operand" "0,0")
 	      (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
 	      (neg:VFH_128
 		(match_operand:VFH_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
@@ -6579,7 +6798,7 @@
 	      (match_operand:VFH_128 1 "<round_nimm_scalar_predicate>" "%v")
 	      (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>")
 	      (neg:VFH_128
-		(match_operand:VFH_128 3 "register_operand" "0")))
+		(match_operand:VFH_128 3 "nonimmediate_operand" "0")))
 	    (match_dup 3)
 	    (match_operand:QI 4 "register_operand" "Yk"))
 	  (match_dup 3)
@@ -6595,7 +6814,7 @@
 	(vec_merge:VFH_128
 	  (vec_merge:VFH_128
 	    (fma:VFH_128
-	      (match_operand:VFH_128 1 "register_operand" "0,0")
+	      (match_operand:VFH_128 1 "nonimmediate_operand" "0,0")
 	      (match_operand:VFH_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
 	      (neg:VFH_128
 		(match_operand:VFH_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
@@ -6617,8 +6836,8 @@
 	  (vec_merge:VFH_128
 	    (fma:VFH_128
 	      (neg:VFH_128
-		(match_operand:VFH_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
-	      (match_operand:VFH_128 1 "register_operand" "0,0")
+		(match_operand:VFH_128 1 "nonimmediate_operand" "0,0"))
+		(match_operand:VFH_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
 	      (match_operand:VFH_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
 	    (match_dup 1)
 	    (match_operand:QI 4 "register_operand" "Yk,Yk"))
@@ -6640,7 +6859,7 @@
 	      (neg:VFH_128
 		(match_operand:VFH_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>"))
 	      (match_operand:VFH_128 1 "<round_nimm_scalar_predicate>" "%v")
-	      (match_operand:VFH_128 3 "register_operand" "0"))
+	      (match_operand:VFH_128 3 "nonimmediate_operand" "0"))
 	    (match_dup 3)
 	    (match_operand:QI 4 "register_operand" "Yk"))
 	  (match_dup 3)
@@ -6672,7 +6891,7 @@
 	    (fma:VFH_128
 	      (neg:VFH_128
 		(match_operand:VFH_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
-	      (match_operand:VFH_128 1 "register_operand" "0,0")
+	      (match_operand:VFH_128 1 "nonimmediate_operand" "0,0")
 	      (match_operand:VFH_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>"))
 	    (match_operand:VFH_128 4 "const0_operand")
 	    (match_operand:QI 5 "register_operand" "Yk,Yk"))
@@ -6692,8 +6911,8 @@
 	  (vec_merge:VFH_128
 	    (fma:VFH_128
 	      (neg:VFH_128
-		(match_operand:VFH_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
-	      (match_operand:VFH_128 1 "register_operand" "0,0")
+		(match_operand:VFH_128 1 "nonimmediate_operand" "0,0"))
+		(match_operand:VFH_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v")
 	      (neg:VFH_128
 		(match_operand:VFH_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
 	    (match_dup 1)
@@ -6717,7 +6936,7 @@
 		(match_operand:VFH_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>"))
 	      (match_operand:VFH_128 1 "<round_nimm_scalar_predicate>" "%v")
 	      (neg:VFH_128
-		(match_operand:VFH_128 3 "register_operand" "0")))
+		(match_operand:VFH_128 3 "nonimmediate_operand" "0")))
 	    (match_dup 3)
 	    (match_operand:QI 4 "register_operand" "Yk"))
 	  (match_dup 3)
@@ -6735,7 +6954,7 @@
 	    (fma:VFH_128
 	      (neg:VFH_128
 		(match_operand:VFH_128 2 "<round_nimm_scalar_predicate>" "<round_constraint>,v"))
-	      (match_operand:VFH_128 1 "register_operand" "0,0")
+	      (match_operand:VFH_128 1 "nonimmediate_operand" "0,0")
 	      (neg:VFH_128
 		(match_operand:VFH_128 3 "<round_nimm_scalar_predicate>" "v,<round_constraint>")))
 	    (match_operand:VFH_128 4 "const0_operand")
@@ -6879,7 +7098,7 @@
    (match_operand:VHF_AVX512VL 2 "<round_expand_nimm_predicate>")
    (match_operand:VHF_AVX512VL 3 "<round_expand_nimm_predicate>")
    (match_operand:<avx512fmaskcmode> 4 "register_operand")]
-  "TARGET_AVX512FP16 && <round_mode_condition>"
+  "TARGET_AVX512FP16 && <round_mode512bit_condition>"
 {
   rtx op0, op1, dest;
   if (<round_embedded_complex>)
@@ -6909,7 +7128,7 @@
    (match_operand:VHF_AVX512VL 2 "<round_expand_nimm_predicate>")
    (match_operand:VHF_AVX512VL 3 "<round_expand_nimm_predicate>")
    (match_operand:<avx512fmaskcmode> 4 "register_operand")]
-  "TARGET_AVX512FP16 && <round_mode_condition>"
+  "TARGET_AVX512FP16 && <round_mode512bit_condition>"
 {
   emit_insn (gen_fma_fmaddc_<mode>_maskz_1<round_expand_name> (
     operands[0], operands[1], operands[2], operands[3],
@@ -6923,7 +7142,7 @@
    (match_operand:VHF_AVX512VL 2 "<round_expand_nimm_predicate>")
    (match_operand:VHF_AVX512VL 3 "<round_expand_nimm_predicate>")
    (match_operand:<avx512fmaskcmode> 4 "register_operand")]
-  "TARGET_AVX512FP16 && <round_mode_condition>"
+  "TARGET_AVX512FP16 && <round_mode512bit_condition>"
 {
   rtx op0, op1, dest;
   if (<round_embedded_complex>)
@@ -6955,7 +7174,7 @@
    (match_operand:VHF_AVX512VL 2 "<round_expand_nimm_predicate>")
    (match_operand:VHF_AVX512VL 3 "<round_expand_nimm_predicate>")
    (match_operand:<avx512fmaskcmode> 4 "register_operand")]
-  "TARGET_AVX512FP16 && <round_mode_condition>"
+  "TARGET_AVX512FP16 && <round_mode512bit_condition>"
 {
   emit_insn (gen_fma_fcmaddc_<mode>_maskz_1<round_expand_name> (
     operands[0], operands[1], operands[2], operands[3],
@@ -6979,7 +7198,7 @@
 	   (match_operand:VHF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")
 	   (match_operand:VHF_AVX512VL 3 "<round_nimm_predicate>" "0")]
 	   UNSPEC_COMPLEX_F_C_MA))]
-  "TARGET_AVX512FP16 && <sdc_mask_mode512bit_condition> && <round_mode_condition>"
+  "TARGET_AVX512FP16 && <sdc_mask_mode512bit_condition> && <round_mode512bit_condition>"
   "v<complexopname><ssemodesuffix>\t{<round_sdc_mask_op4>%2, %1, %0<sdc_mask_op4>|%0<sdc_mask_op4>, %1, %2<round_sdc_mask_op4>}"
   [(set_attr "type" "ssemuladd")
    (set_attr "prefix" "evex")
@@ -7117,7 +7336,7 @@
 	  (unspec:<avx512fmaskmode>
 	    [(match_operand:<avx512fmaskcmode> 4 "register_operand" "Yk")]
 	    UNSPEC_COMPLEX_MASK)))]
-  "TARGET_AVX512FP16 && <round_mode_condition>"
+  "TARGET_AVX512FP16 && <round_mode512bit_condition>"
   "v<complexopname><ssemodesuffix>\t{<round_op5>%2, %1, %0%{%4%}|%0%{%4%}, %1, %2<round_op5>}"
   [(set_attr "type" "ssemuladd")
    (set_attr "prefix" "evex")
@@ -7137,7 +7356,7 @@
 	    [(match_operand:VHF_AVX512VL 1 "<round_nimm_predicate>" "<int_comm>v")
 	     (match_operand:VHF_AVX512VL 2 "<round_nimm_predicate>" "<round_constraint>")]
 	     UNSPEC_COMPLEX_F_C_MUL))]
-  "TARGET_AVX512FP16 && <round_mode_condition>"
+  "TARGET_AVX512FP16 && <round_mode512bit_condition>"
 {
   if (TARGET_DEST_FALSE_DEP_FOR_GLC
       && <maskc_dest_false_dep_for_glc_cond>)
@@ -7154,7 +7373,7 @@
    (match_operand:V8HF 2 "<round_expand_nimm_predicate>")
    (match_operand:V8HF 3 "<round_expand_nimm_predicate>")
    (match_operand:QI 4 "register_operand")]
-  "TARGET_AVX512FP16 && <round_mode_condition>"
+  "TARGET_AVX512FP16 && <round_mode512bit_condition>"
 {
   emit_insn (gen_avx512fp16_fma_fmaddcsh_v8hf_maskz<round_expand_name> (
     operands[0], operands[1], operands[2], operands[3],
@@ -7168,7 +7387,7 @@
    (match_operand:V8HF 2 "<round_expand_nimm_predicate>")
    (match_operand:V8HF 3 "<round_expand_nimm_predicate>")
    (match_operand:QI 4 "register_operand")]
-  "TARGET_AVX512FP16 && <round_mode_condition>"
+  "TARGET_AVX512FP16 && <round_mode512bit_condition>"
 {
   rtx op0, op1, dest;
 
@@ -7198,7 +7417,7 @@
    (match_operand:V8HF 2 "<round_expand_nimm_predicate>")
    (match_operand:V8HF 3 "<round_expand_nimm_predicate>")
    (match_operand:QI 4 "register_operand")]
-  "TARGET_AVX512FP16 && <round_mode_condition>"
+  "TARGET_AVX512FP16 && <round_mode512bit_condition>"
 {
   emit_insn (gen_avx512fp16_fma_fcmaddcsh_v8hf_maskz<round_expand_name> (
     operands[0], operands[1], operands[2], operands[3],
@@ -7212,7 +7431,7 @@
    (match_operand:V8HF 2 "<round_expand_nimm_predicate>")
    (match_operand:V8HF 3 "<round_expand_nimm_predicate>")
    (match_operand:QI 4 "register_operand")]
-  "TARGET_AVX512FP16 && <round_mode_condition>"
+  "TARGET_AVX512FP16 && <round_mode512bit_condition>"
 {
   rtx op0, op1, dest;
 
@@ -7242,7 +7461,7 @@
    (match_operand:V8HF 2 "<round_expand_nimm_predicate>")
    (match_operand:V8HF 3 "<round_expand_nimm_predicate>")
    (match_operand:QI 4 "register_operand")]
-  "TARGET_AVX512FP16 && <round_mode_condition>"
+  "TARGET_AVX512FP16 && <round_mode512bit_condition>"
 {
   rtx dest, op0, op1;
 
@@ -7272,7 +7491,7 @@
    (match_operand:V8HF 2 "<round_expand_nimm_predicate>")
    (match_operand:V8HF 3 "<round_expand_nimm_predicate>")
    (match_operand:QI 4 "register_operand")]
-  "TARGET_AVX512FP16 && <round_mode_condition>"
+  "TARGET_AVX512FP16 && <round_mode512bit_condition>"
 {
   rtx dest, op0, op1;
 
@@ -7449,8 +7668,8 @@
         (unspec:VI248_AVX512VL
 	   [(match_operand:<ssePHmode> 1 "<round_nimm_predicate>" "<round_constraint>")]
 	   UNSPEC_US_FIX_NOTRUNC))]
-  "TARGET_AVX512FP16 && <round_mode_condition>"
-  "vcvtph2<sseintconvertsignprefix><sseintconvert>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
+  "TARGET_AVX512FP16"
+  "vcvtph2<sseintconvertsignprefix><sseintconvert>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %<iptrh>1<round_mask_op2>}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
@@ -7465,7 +7684,7 @@
   [(set (match_operand:<ssePHmode> 0 "register_operand" "=v")
 	(any_float:<ssePHmode>
 	  (match_operand:VI2H_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")))]
-  "TARGET_AVX512FP16 && <round_mode_condition>"
+  "TARGET_AVX512FP16"
   "vcvt<floatsuffix><sseintconvert>2ph<round_qq2phsuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
@@ -7503,19 +7722,6 @@
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_expand "avx512fp16_vcvt<floatsuffix>qq2ph_v4di_mask_round"
-  [(match_operand:V8HF 0 "register_operand")
-   (any_float:V4HF (match_operand:V4DI 1 "register_operand"))
-   (match_operand:V8HF 2 "nonimm_or_0_operand")
-   (match_operand:QI 3 "register_operand")
-   (unspec [(match_operand:SI 4 "const_4_or_8_to_11_operand")] UNSPEC_EMBEDDED_ROUNDING)]
-  "TARGET_AVX10_2_256"
-{
-  emit_insn (gen_avx512fp16_vcvt<floatsuffix>qq2ph_v4di_mask_round_1 (
-    operands[0], operands[1], operands[2], operands[3], CONST0_RTX (V4HFmode), operands[4]));
-  DONE;
-})
-
 (define_expand "avx512fp16_vcvt<floatsuffix><sseintconvert>2ph_<mode>_mask"
   [(set (match_operand:V8HF 0 "register_operand" "=v")
     (vec_concat:V8HF
@@ -7529,18 +7735,18 @@
   "TARGET_AVX512FP16 && TARGET_AVX512VL"
   "operands[4] = CONST0_RTX (V4HFmode);")
 
-(define_insn "avx512fp16_vcvt<floatsuffix><sseintconvert>2ph_<mode>_mask<round_name>_1"
+(define_insn "*avx512fp16_vcvt<floatsuffix><sseintconvert>2ph_<mode>_mask"
   [(set (match_operand:V8HF 0 "register_operand" "=v")
     (vec_concat:V8HF
         (vec_merge:V4HF
-	    (any_float:V4HF (match_operand:VI4_128_8_256 1 "<round_nimm_predicate>" "<round_constraint>"))
+	    (any_float:V4HF (match_operand:VI4_128_8_256 1 "vector_operand" "vm"))
             (vec_select:V4HF
                 (match_operand:V8HF 2 "nonimm_or_0_operand" "0C")
                 (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))
             (match_operand:QI 3 "register_operand" "Yk"))
 	    (match_operand:V4HF 4 "const0_operand")))]
-  "TARGET_AVX512FP16 && TARGET_AVX512VL && <round_mode_condition>"
-  "vcvt<floatsuffix><sseintconvert>2ph<round_qq2phsuff>\t{<round_op5>%1, %0%{%3%}%N2|%0%{%3%}%N2, %1<round_op5>}"
+  "TARGET_AVX512FP16 && TARGET_AVX512VL"
+  "vcvt<floatsuffix><sseintconvert>2ph<qq2phsuff>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
@@ -7687,7 +7893,7 @@
 	(unspec:VI2H_AVX512VL
 	  [(match_operand:<ssePHmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
 	  UNSPEC_VCVTT_U))]
-  "TARGET_AVX512FP16 && <round_saeonly_mode_condition>"
+  "TARGET_AVX512FP16"
   "vcvttph2<vcvtt_suffix><sseintconvert>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
@@ -7697,7 +7903,7 @@
   [(set (match_operand:VI2H_AVX512VL 0 "register_operand" "=v")
 	(any_fix:VI2H_AVX512VL
 	  (match_operand:<ssePHmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
-  "TARGET_AVX512FP16 && <round_saeonly_mode_condition>"
+  "TARGET_AVX512FP16"
   "vcvttph2<fixsuffix><sseintconvert>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
@@ -7720,13 +7926,13 @@
     }
 })
 
-(define_insn "unspec_avx512fp16_fix<vcvtt_uns_suffix>_trunc<mode>2<mask_name><round_saeonly_name>"
+(define_insn "unspec_avx512fp16_fix<vcvtt_uns_suffix>_trunc<mode>2<mask_name>"
   [(set (match_operand:VI4_128_8_256 0 "register_operand" "=v")
 	(unspec:VI4_128_8_256
 	    [(match_operand:V8HF 1 "register_operand" "v")]
 	    UNSPEC_VCVTT_U))]
-  "TARGET_AVX512FP16 && TARGET_AVX512VL && <round_saeonly_mode_condition>"
-  "vcvttph2<vcvtt_suffix><sseintconvert>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
+  "TARGET_AVX512FP16 && TARGET_AVX512VL"
+  "vcvttph2<vcvtt_suffix><sseintconvert>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
@@ -7850,7 +8056,7 @@
   [(set (match_operand:VF48H_AVX512VL 0 "register_operand" "=v")
 	(float_extend:VF48H_AVX512VL
 	  (match_operand:<ssePHmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
-  "TARGET_AVX512FP16 && <round_saeonly_mode_condition>"
+  "TARGET_AVX512FP16"
   "vcvtph2<castmode><ph2pssuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
@@ -7873,14 +8079,14 @@
     }
 })
 
-(define_insn "avx512fp16_float_extend_ph<mode>2<mask_name><round_saeonly_name>"
+(define_insn "avx512fp16_float_extend_ph<mode>2<mask_name>"
   [(set (match_operand:VF4_128_8_256 0 "register_operand" "=v")
 	(float_extend:VF4_128_8_256
 	  (vec_select:V4HF
 	    (match_operand:V8HF 1 "register_operand" "v")
 	    (parallel [(const_int 0) (const_int 1) (const_int 2) (const_int 3)]))))]
-  "TARGET_AVX512FP16 && TARGET_AVX512VL && <round_saeonly_mode_condition>"
-  "vcvtph2<castmode><ph2pssuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %q1<round_saeonly_mask_op2>}"
+  "TARGET_AVX512FP16 && TARGET_AVX512VL"
+  "vcvtph2<castmode><ph2pssuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %q1}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
@@ -7944,7 +8150,7 @@
   [(set (match_operand:<ssePHmode> 0 "register_operand" "=v")
 	(float_truncate:<ssePHmode>
 	  (match_operand:VF48H_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")))]
-  "TARGET_AVX512FP16 && <round_mode_condition>"
+  "TARGET_AVX512FP16"
   "vcvt<castmode>2ph<ph2pssuffix><round_qq2phsuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
@@ -7983,19 +8189,6 @@
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_expand "avx512fp16_vcvtpd2ph_v4df_mask_round"
-  [(match_operand:V8HF 0 "register_operand")
-   (match_operand:V4DF 1 "register_operand")
-   (match_operand:V8HF 2 "nonimm_or_0_operand")
-   (match_operand:QI 3 "register_operand")
-   (unspec [(match_operand:SI 4 "const_4_or_8_to_11_operand")] UNSPEC_EMBEDDED_ROUNDING)]
-  "TARGET_AVX10_2_256"
-{
-  emit_insn (gen_avx512fp16_vcvtpd2ph_v4df_mask_round_1 (
-    operands[0], operands[1], operands[2], operands[3], CONST0_RTX (V4HFmode), operands[4]));
-  DONE;
-})
-
 (define_expand "avx512fp16_vcvt<castmode>2ph_<mode>_mask"
   [(set (match_operand:V8HF 0 "register_operand" "=v")
 	(vec_concat:V8HF
@@ -8011,20 +8204,20 @@
   "TARGET_AVX512FP16 && TARGET_AVX512VL"
   "operands[4] = CONST0_RTX (V4HFmode);")
 
-(define_insn "avx512fp16_vcvt<castmode>2ph_<mode>_mask<round_name>_1"
+(define_insn "*avx512fp16_vcvt<castmode>2ph_<mode>_mask"
   [(set (match_operand:V8HF 0 "register_operand" "=v")
 	(vec_concat:V8HF
 	  (vec_merge:V4HF
 	    (float_truncate:V4HF
-	      (match_operand:VF4_128_8_256 1 "<round_nimm_predicate>" "<round_constraint>"))
+	      (match_operand:VF4_128_8_256 1 "vector_operand" "vm"))
 	    (vec_select:V4HF
 	      (match_operand:V8HF 2 "nonimm_or_0_operand" "0C")
 	      (parallel [(const_int 0) (const_int 1)
 			 (const_int 2) (const_int 3)]))
 	    (match_operand:QI 3 "register_operand" "Yk"))
 	  (match_operand:V4HF 4 "const0_operand")))]
-  "TARGET_AVX512FP16 && TARGET_AVX512VL && <round_mode_condition>"
-  "vcvt<castmode>2ph<ph2pssuffix><round_qq2phsuff>\t{<round_op5>%1, %0%{%3%}%N2|%0%{%3%}%N2, %1<round_op5>}"
+  "TARGET_AVX512FP16 && TARGET_AVX512VL"
+  "vcvt<castmode>2ph<ph2pssuffix><qq2phsuff>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
@@ -8469,7 +8662,7 @@
   [(set (match_operand:VF1 0 "register_operand" "=x,v")
 	(float:VF1
 	  (match_operand:<sseintvecmode> 1 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
-  "TARGET_SSE2 && <mask_mode512bit_condition> && <round_mode_condition>"
+  "TARGET_SSE2 && <mask_mode512bit_condition> && <round_mode512bit_condition>"
   "@
    cvtdq2ps\t{%1, %0|%0, %1}
    vcvtdq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
@@ -8482,7 +8675,7 @@
   [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
 	(unsigned_float:VF1_AVX512VL
 	  (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
-  "TARGET_AVX512F && <round_mode_condition>"
+  "TARGET_AVX512F"
   "vcvtudq2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
@@ -8506,13 +8699,13 @@
 (define_mode_attr sf2simodelower
   [(V16SI "v16sf") (V8SI "v8sf") (V4SI "v4sf")])
 
-(define_insn "<sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode><mask_name><round_name>"
+(define_insn "<sse2_avx_avx512f>_fix_notrunc<sf2simodelower><mode><mask_name>"
   [(set (match_operand:VI4_AVX 0 "register_operand" "=v")
 	(unspec:VI4_AVX
-	  [(match_operand:<ssePSmode> 1 "<round_nimm_predicate>" "<round_constraint4>")]
+	  [(match_operand:<ssePSmode> 1 "vector_operand" "vBm")]
 	  UNSPEC_FIX_NOTRUNC))]
-  "TARGET_SSE2 && <mask_mode512bit_condition> && <round_mode_condition>"
-  "%vcvtps2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
+  "TARGET_SSE2 && <mask_mode512bit_condition>"
+  "%vcvtps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "ssecvt")
    (set (attr "prefix_data16")
      (if_then_else
@@ -8527,7 +8720,7 @@
 	(unspec:V16SI
 	  [(match_operand:V16SF 1 "<round_nimm_predicate>" "<round_constraint>")]
 	  UNSPEC_FIX_NOTRUNC))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vcvtps2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
@@ -8538,7 +8731,7 @@
 	(unspec:VI4_AVX512VL
 	  [(match_operand:<ssePSmode> 1 "nonimmediate_operand" "<round_constraint>")]
 	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
-  "TARGET_AVX512F && <round_mode_condition>"
+  "TARGET_AVX512F"
   "vcvtps2udq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
@@ -8548,7 +8741,7 @@
   [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
 	(unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
 		     UNSPEC_FIX_NOTRUNC))]
-  "TARGET_AVX512DQ && <round_mode_condition>"
+  "TARGET_AVX512DQ && <round_mode512bit_condition>"
   "vcvtps2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
@@ -8571,7 +8764,7 @@
   [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
 	(unspec:VI8_256_512 [(match_operand:<ssePSmode2> 1 "nonimmediate_operand" "<round_constraint>")]
 		     UNSPEC_UNSIGNED_FIX_NOTRUNC))]
-  "TARGET_AVX512DQ && <round_mode_condition>"
+  "TARGET_AVX512DQ && <round_mode512bit_condition>"
   "vcvtps2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
@@ -8595,7 +8788,7 @@
 	(unspec:V16SI
 	  [(match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
 	  UNSPEC_VCVTT_U))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vcvttps2<vcvtt_suffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
@@ -8605,19 +8798,18 @@
   [(set (match_operand:V16SI 0 "register_operand" "=v")
 	(any_fix:V16SI
 	  (match_operand:V16SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vcvttps2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
    (set_attr "mode" "XI")])
 
-(define_insn "unspec_fix_truncv8sfv8si2<mask_name><round_saeonly_name>"
+(define_insn "unspec_fix_truncv8sfv8si2<mask_name>"
   [(set (match_operand:V8SI 0 "register_operand" "=v")
-	(unspec:V8SI [(match_operand:V8SF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
+	(unspec:V8SI [(match_operand:V8SF 1 "nonimmediate_operand" "vm")]
 	 UNSPEC_VCVTT))]
-  "TARGET_AVX && <mask_avx512vl_condition>
-  && (!<round_saeonly_applied> || TARGET_AVX10_2_256)"
-  "vcvttps2dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
+  "TARGET_AVX && <mask_avx512vl_condition>"
+  "vcvttps2dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "<mask_prefix>")
    (set_attr "mode" "OI")])
@@ -8817,7 +9009,7 @@
    cvtsi2sd{l}\t{%2, %0|%0, %2}
    vcvtsi2sd{l}\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "isa" "noavx,noavx,avx")
-   (set_attr "type" "sseicvt")
+   (set_attr "type" "sseicvt2")
    (set_attr "athlon_decode" "double,direct,*")
    (set_attr "amdfam10_decode" "vector,double,*")
    (set_attr "bdver1_decode" "double,direct,*")
@@ -8839,7 +9031,7 @@
    cvtsi2sd{q}\t{%2, %0|%0, %2}
    vcvtsi2sd{q}\t{%2, <round_op3>%1, %0|%0, %1<round_op3>, %2}"
   [(set_attr "isa" "noavx,noavx,avx")
-   (set_attr "type" "sseicvt")
+   (set_attr "type" "sseicvt2")
    (set_attr "athlon_decode" "double,direct,*")
    (set_attr "amdfam10_decode" "vector,double,*")
    (set_attr "bdver1_decode" "double,direct,*")
@@ -9003,7 +9195,7 @@
   [(set (match_operand:VF2_AVX512VL 0 "register_operand" "=v")
 	(any_float:VF2_AVX512VL
 	  (match_operand:<sseintvecmode> 1 "nonimmediate_operand" "<round_constraint>")))]
-  "TARGET_AVX512DQ && <round_mode_condition>"
+  "TARGET_AVX512DQ"
   "vcvt<floatsuffix>qq2pd\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
@@ -9017,8 +9209,8 @@
   [(set (match_operand:<ssePSmode2> 0 "register_operand" "=v")
 	 (any_float:<ssePSmode2>
 	   (match_operand:VI8_256_512 1 "nonimmediate_operand" "<round_constraint>")))]
-  "TARGET_AVX512DQ && <round_mode_condition>"
-  "vcvt<floatsuffix>qq2ps<round_qq2pssuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
+  "TARGET_AVX512DQ && <round_mode512bit_condition>"
+  "vcvt<floatsuffix>qq2ps<qq2pssuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
@@ -9194,7 +9386,7 @@
 		       (const_int 2) (const_int 3)
 		       (const_int 4) (const_int 5)
 		       (const_int 6) (const_int 7)]))))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vcvtdq2pd\t{%t1, %0|%0, %t1}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
@@ -9230,19 +9422,18 @@
 	(unspec:V8SI
 	  [(match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")]
 	  UNSPEC_FIX_NOTRUNC))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vcvtpd2dq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
    (set_attr "mode" "OI")])
 
-(define_insn "avx_cvtpd2dq256<mask_name><round_name>"
+(define_insn "avx_cvtpd2dq256<mask_name>"
   [(set (match_operand:V4SI 0 "register_operand" "=v")
-	(unspec:V4SI [(match_operand:V4DF 1 "<round_nimm_predicate>" "<round_constraint>")]
+	(unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
 		     UNSPEC_FIX_NOTRUNC))]
-  "TARGET_AVX && <mask_avx512vl_condition>
-   && (!<round_applied> || TARGET_AVX10_2_256)"
-  "vcvtpd2dq<round_suff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
+  "TARGET_AVX && <mask_avx512vl_condition>"
+  "vcvtpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "<mask_prefix>")
    (set_attr "mode" "OI")])
@@ -9332,8 +9523,8 @@
 	(unspec:<si2dfmode>
 	  [(match_operand:VF2_512_256VL 1 "nonimmediate_operand" "<round_constraint>")]
 	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
-  "TARGET_AVX512F && <round_mode_condition>"
-  "vcvtpd2udq<round_pd2udqsuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
+  "TARGET_AVX512F"
+  "vcvtpd2udq<pd2udqsuff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
@@ -9390,7 +9581,7 @@
 	(unspec:V8SI
 	  [(match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
 	  UNSPEC_VCVTT_U))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vcvttpd2<vcvtt_suffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
@@ -9400,7 +9591,7 @@
   [(set (match_operand:V8SI 0 "register_operand" "=v")
 	(any_fix:V8SI
 	  (match_operand:V8DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vcvttpd2<fixsuffix>dq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
@@ -9476,13 +9667,12 @@
    (set_attr "prefix" "evex")
    (set_attr "mode" "TI")])
 
-(define_insn "unspec_fix_truncv4dfv4si2<mask_name><round_saeonly_name>"
+(define_insn "unspec_fix_truncv4dfv4si2<mask_name>"
   [(set (match_operand:V4SI 0 "register_operand" "=v")
-	(unspec:V4SI [(match_operand:V4DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
+	(unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
 	 UNSPEC_VCVTT))]
-  "TARGET_AVX && <mask_avx512vl_condition>
-   && (!<round_saeonly_applied> || TARGET_AVX10_2_256)"
-  "vcvttpd2dq<round_saeonly_suff>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
+  "TARGET_AVX || (TARGET_AVX512VL && TARGET_AVX512F)"
+  "vcvttpd2dq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "maybe_evex")
    (set_attr "mode" "OI")])
@@ -9496,13 +9686,12 @@
    (set_attr "prefix" "maybe_evex")
    (set_attr "mode" "OI")])
 
-(define_insn "unspec_fixuns_truncv4dfv4si2<mask_name><round_saeonly_name>"
+(define_insn "unspec_fixuns_truncv4dfv4si2<mask_name>"
   [(set (match_operand:V4SI 0 "register_operand" "=v")
-	(unspec:V4SI [(match_operand:V4DF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
+	(unspec:V4SI [(match_operand:V4DF 1 "nonimmediate_operand" "vm")]
 	 UNSPEC_VCVTTU))]
-  "TARGET_AVX512VL && TARGET_AVX512F
-  && (!<round_saeonly_applied> || TARGET_AVX10_2_256)"
-  "vcvttpd2udq<round_saeonly_suff>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
+  "TARGET_AVX512VL && TARGET_AVX512F"
+  "vcvttpd2udq{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "maybe_evex")
    (set_attr "mode" "OI")])
@@ -9521,7 +9710,7 @@
 	(unspec:<sseintvecmode>
 	  [(match_operand:VF2_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
 	  UNSPEC_VCVTT_U))]
-  "TARGET_AVX512DQ && <round_saeonly_mode_condition>"
+  "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
   "vcvttpd2<vcvtt_suffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
@@ -9531,7 +9720,7 @@
   [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
 	(any_fix:<sseintvecmode>
 	  (match_operand:VF2_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
-  "TARGET_AVX512DQ && <round_saeonly_mode_condition>"
+  "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
   "vcvttpd2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
@@ -9542,7 +9731,7 @@
 	(unspec:<sseintvecmode>
 	  [(match_operand:VF2_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")]
 	  UNSPEC_FIX_NOTRUNC))]
-  "TARGET_AVX512DQ && <round_mode_condition>"
+  "TARGET_AVX512DQ && <round_mode512bit_condition>"
   "vcvtpd2qq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
@@ -9553,7 +9742,7 @@
 	(unspec:<sseintvecmode>
 	  [(match_operand:VF2_AVX512VL 1 "nonimmediate_operand" "<round_constraint>")]
 	  UNSPEC_UNSIGNED_FIX_NOTRUNC))]
-  "TARGET_AVX512DQ && <round_mode_condition>"
+  "TARGET_AVX512DQ && <round_mode512bit_condition>"
   "vcvtpd2uqq\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
@@ -9564,18 +9753,18 @@
 	(unspec:VI8_256_512
 	  [(match_operand:<ssePSmode2> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
 	  UNSPEC_VCVTT_U))]
-  "TARGET_AVX512DQ && <round_saeonly_mode_condition>"
+  "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
   "vcvttps2<vcvtt_suffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "fix<fixunssuffix>_trunc<ssePSmode2lower><mode>2<mask_name>"
+(define_insn "fix<fixunssuffix>_trunc<ssePSmode2lower><mode>2<mask_name><round_saeonly_name>"
   [(set (match_operand:VI8_256_512 0 "register_operand" "=v")
 	(any_fix:VI8_256_512
-	  (match_operand:<ssePSmode2> 1 "nonimmediate_operand" "vm")))]
-  "TARGET_AVX512DQ"
-  "vcvttps2<fixsuffix>qq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+	  (match_operand:<ssePSmode2> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
+  "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
+  "vcvttps2<fixsuffix>qq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
@@ -9730,13 +9919,13 @@
   DONE;
 })
 
-(define_insn "<mask_codefor>unspec_fixuns_trunc<mode><sseintvecmodelower>2<mask_name><round_saeonly_name>"
+(define_insn "<mask_codefor>unspec_fixuns_trunc<mode><sseintvecmodelower>2<mask_name>"
   [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
 	(unspec:<sseintvecmode>
-	  [(match_operand:VF1_128_256 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
+	  [(match_operand:VF1_128_256 1 "nonimmediate_operand" "vm")]
 	  UNSPEC_VCVTTU))]
-  "TARGET_AVX512VL && <round_saeonly_mode_condition>"
-  "vcvttps2udq\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
+  "TARGET_AVX512VL"
+  "vcvttps2udq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseintvecmode2>")])
@@ -9918,19 +10107,18 @@
   [(set (match_operand:V8SF 0 "register_operand" "=v")
 	(float_truncate:V8SF
 	  (match_operand:V8DF 1 "<round_nimm_predicate>" "<round_constraint>")))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vcvtpd2ps\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
    (set_attr "mode" "V8SF")])
 
-(define_insn "avx_cvtpd2ps256<mask_name><round_name>"
+(define_insn "avx_cvtpd2ps256<mask_name>"
   [(set (match_operand:V4SF 0 "register_operand" "=v")
 	(float_truncate:V4SF
-	  (match_operand:V4DF 1 "<round_nimm_predicate>" "<round_constraint>")))]
-  "TARGET_AVX && <mask_avx512vl_condition>
-   && (!<round_applied> || TARGET_AVX10_2_256)"
-  "vcvtpd2ps<round_suff>\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
+	  (match_operand:V4DF 1 "nonimmediate_operand" "vm")))]
+  "TARGET_AVX && <mask_avx512vl_condition>"
+  "vcvtpd2ps{y}\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "maybe_evex")
    (set_attr "btver2_decode" "vector")
@@ -10053,7 +10241,7 @@
   [(set (match_operand:VF2_512_256 0 "register_operand" "=v")
 	(float_extend:VF2_512_256
 	  (match_operand:<sf2dfmode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")))]
-  "TARGET_AVX && <mask_mode512bit_condition> && <round_saeonly_mode_condition>"
+  "TARGET_AVX && <mask_mode512bit_condition> && <round_saeonly_mode512bit_condition>"
   "vcvtps2pd\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "maybe_vex")
@@ -10081,7 +10269,7 @@
 		       (const_int 2) (const_int 3)
 		       (const_int 4) (const_int 5)
 		       (const_int 6) (const_int 7)]))))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vcvtps2pd\t{%t1, %0|%0, %t1}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
@@ -10259,7 +10447,10 @@
        (match_dup 2)
        (parallel [(const_int 0) (const_int 1)]))))]
   "TARGET_SSE2"
-  "operands[2] = gen_reg_rtx (V4SFmode);")
+{
+  operands[2] = gen_reg_rtx (V4SFmode);
+  emit_move_insn (operands[2], CONST0_RTX (V4SFmode));
+})
 
 (define_expand "vec_unpacks_hi_v8sf"
   [(set (match_dup 2)
@@ -10284,7 +10475,7 @@
    (set (match_operand:V8DF 0 "register_operand")
 	(float_extend:V8DF
 	  (match_dup 2)))]
-"TARGET_AVX512F && TARGET_EVEX512"
+"TARGET_AVX512F"
 "operands[2] = gen_reg_rtx (V8SFmode);")
 
 (define_expand "vec_unpacks_lo_v4sf"
@@ -10422,7 +10613,7 @@
    (set (match_operand:V8DF 0 "register_operand")
 	(float:V8DF
 	  (match_dup 2)))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "operands[2] = gen_reg_rtx (V8SImode);")
 
 (define_expand "vec_unpacks_float_lo_v16si"
@@ -10434,7 +10625,7 @@
 		       (const_int 2) (const_int 3)
 		       (const_int 4) (const_int 5)
 		       (const_int 6) (const_int 7)]))))]
-  "TARGET_AVX512F && TARGET_EVEX512")
+  "TARGET_AVX512F")
 
 (define_expand "vec_unpacku_float_hi_v4si"
   [(set (match_dup 5)
@@ -10530,7 +10721,7 @@
 (define_expand "vec_unpacku_float_hi_v16si"
   [(match_operand:V8DF 0 "register_operand")
    (match_operand:V16SI 1 "register_operand")]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
 {
   REAL_VALUE_TYPE TWO32r;
   rtx k, x, tmp[4];
@@ -10579,7 +10770,7 @@
 (define_expand "vec_unpacku_float_lo_v16si"
   [(match_operand:V8DF 0 "register_operand")
    (match_operand:V16SI 1 "nonimmediate_operand")]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
 {
   REAL_VALUE_TYPE TWO32r;
   rtx k, x, tmp[3];
@@ -10673,7 +10864,7 @@
   [(match_operand:V16SI 0 "register_operand")
    (match_operand:V8DF 1 "nonimmediate_operand")
    (match_operand:V8DF 2 "nonimmediate_operand")]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
 {
   rtx r1, r2;
 
@@ -10788,7 +10979,7 @@
   [(match_operand:V16SI 0 "register_operand")
    (match_operand:V8DF 1 "nonimmediate_operand")
    (match_operand:V8DF 2 "nonimmediate_operand")]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
 {
   rtx r1, r2;
 
@@ -10897,7 +11088,7 @@
    vmovlps\t{%H2, %1, %0|%0, %1, %H2}
    %vmovhps\t{%2, %0|%q0, %2}"
   [(set_attr "isa" "noavx,avx,noavx,avx,*")
-   (set_attr "type" "ssemov")
+   (set_attr "type" "ssemov2")
    (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
    (set_attr "mode" "V4SF,V4SF,V2SF,V2SF,V2SF")])
 
@@ -10981,7 +11172,7 @@
 		     (const_int 11) (const_int 27)
 		     (const_int 14) (const_int 30)
 		     (const_int 15) (const_int 31)])))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vunpckhps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sselog")
    (set_attr "prefix" "evex")
@@ -11069,7 +11260,7 @@
 		     (const_int 9) (const_int 25)
 		     (const_int 12) (const_int 28)
 		     (const_int 13) (const_int 29)])))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vunpcklps\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sselog")
    (set_attr "prefix" "evex")
@@ -11209,7 +11400,7 @@
 		     (const_int 11) (const_int 11)
 		     (const_int 13) (const_int 13)
 		     (const_int 15) (const_int 15)])))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vmovshdup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "sse")
    (set_attr "prefix" "evex")
@@ -11262,7 +11453,7 @@
 		     (const_int 10) (const_int 10)
 		     (const_int 12) (const_int 12)
 		     (const_int 14) (const_int 14)])))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vmovsldup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "sse")
    (set_attr "prefix" "evex")
@@ -11459,7 +11650,7 @@
    vmovlhps\t{%2, %1, %0|%0, %1, %2}
    %vmovlps\t{%2, %H0|%H0, %2}"
   [(set_attr "isa" "noavx,avx,noavx,avx,*")
-   (set_attr "type" "ssemov")
+   (set_attr "type" "ssemov2")
    (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex,maybe_vex")
    (set_attr "mode" "V2SF,V2SF,V4SF,V4SF,V2SF")])
 
@@ -11512,7 +11703,7 @@
    vmovlps\t{%2, %1, %0|%0, %1, %q2}
    %vmovlps\t{%2, %0|%q0, %2}"
   [(set_attr "isa" "noavx,avx,noavx,avx,*")
-   (set_attr "type" "sseshuf,sseshuf,ssemov,ssemov,ssemov")
+   (set_attr "type" "sseshuf,sseshuf,ssemov2,ssemov2,ssemov")
    (set (attr "length_immediate")
      (if_then_else (eq_attr "alternative" "0,1")
 		   (const_string "1")
@@ -11668,7 +11859,7 @@
    movhps\t{%2, %0|%0, %q2}
    vmovhps\t{%2, %1, %0|%0, %1, %q2}"
   [(set_attr "isa" "noavx,avx,noavx,avx")
-   (set_attr "type" "ssemov")
+   (set_attr "type" "ssemov2")
    (set_attr "prefix" "orig,maybe_evex,orig,maybe_evex")
    (set_attr "mode" "V4SF,V4SF,V2SF,V2SF")])
 
@@ -11749,10 +11940,12 @@
 	      (const_string "imov")
 	    (eq_attr "alternative" "14")
 	      (const_string "fmov")
+	    (eq_attr "alternative" "4,6")
+	      (const_string "ssemov2")
 	   ]
 	   (const_string "ssemov")))
    (set (attr "addr")
-     (if_then_else (eq_attr "alternative" "8,9")
+     (if_then_else (eq_attr "alternative" "9,10")
 		   (const_string "gpr16")
 		   (const_string "*")))
    (set (attr "prefix_extra")
@@ -11943,7 +12136,7 @@
 	  (match_operand:V8_128 1 "reg_or_0_operand" "v,C")
 	  (const_int 1)))]
   "TARGET_AVX512FP16
-  || (TARGET_AVX10_2_256 && const0_operand (operands[1], <MODE>mode))"
+  || (TARGET_AVX10_2 && const0_operand (operands[1], <MODE>mode))"
   "@
     vmovsh\t{%2, %1, %0|%0, %1, %2}
     vmovw\t{%2, %0|%2, %0}"
@@ -12114,7 +12307,7 @@
    movlpd\t{%2, %0|%0, %2}
    vmovlpd\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "isa" "noavx,avx,noavx,avx")
-   (set_attr "type" "ssemov")
+   (set_attr "type" "ssemov2")
    (set_attr "mode" "DF")])
 
 (define_expand "vec_set<mode>"
@@ -12220,9 +12413,7 @@
    (V8SF "32x4") (V8SI "32x4") (V4DF "64x2") (V4DI "64x2")])
 
 (define_mode_iterator AVX512_VEC
-  [(V8DF "TARGET_AVX512DQ && TARGET_EVEX512")
-   (V8DI "TARGET_AVX512DQ && TARGET_EVEX512")
-   (V16SF "TARGET_EVEX512") (V16SI "TARGET_EVEX512")])
+  [(V8DF "TARGET_AVX512DQ") (V8DI "TARGET_AVX512DQ") V16SF V16SI])
 
 (define_expand "<extract_type>_vextract<shuffletype><extract_suf>_mask"
   [(match_operand:<ssequartermode> 0 "nonimmediate_operand")
@@ -12391,9 +12582,7 @@
   [(V16SF "32x8") (V16SI "32x8") (V8DF "64x4") (V8DI "64x4")])
 
 (define_mode_iterator AVX512_VEC_2
-  [(V16SF "TARGET_AVX512DQ && TARGET_EVEX512")
-   (V16SI "TARGET_AVX512DQ && TARGET_EVEX512")
-   (V8DF "TARGET_EVEX512") (V8DI "TARGET_EVEX512")])
+  [(V16SF "TARGET_AVX512DQ") (V16SI "TARGET_AVX512DQ") V8DF V8DI])
 
 (define_expand "<extract_type_2>_vextract<shuffletype><extract_suf_2>_mask"
   [(match_operand:<ssehalfvecmode> 0 "nonimmediate_operand")
@@ -12954,7 +13143,7 @@
 		     (const_int 26) (const_int 27)
 		     (const_int 28) (const_int 29)
 		     (const_int 30) (const_int 31)])))]
-  "TARGET_AVX512F && TARGET_EVEX512
+  "TARGET_AVX512F
    && !(MEM_P (operands[0]) && MEM_P (operands[1]))"
 {
   if (TARGET_AVX512VL
@@ -13003,7 +13192,7 @@
 		     (const_int 58) (const_int 59)
 		     (const_int 60) (const_int 61)
 		     (const_int 62) (const_int 63)])))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vextracti64x4\t{$0x1, %1, %0|%0, %1, 0x1}"
   [(set_attr "type" "sselog1")
    (set_attr "length_immediate" "1")
@@ -13101,15 +13290,15 @@
 
 ;; Modes handled by vec_extract patterns.
 (define_mode_iterator VEC_EXTRACT_MODE
-  [(V64QI "TARGET_AVX512BW && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI
-   (V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI
-   (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX") V4SI
-   (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX") V2DI
-   (V32HF "TARGET_AVX512BW && TARGET_EVEX512") (V16HF "TARGET_AVX") V8HF
-   (V32BF "TARGET_AVX512BW && TARGET_EVEX512") (V16BF "TARGET_AVX") V8BF
-   (V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF
-   (V8DF "TARGET_AVX512F && TARGET_EVEX512") (V4DF "TARGET_AVX") V2DF
-   (V4TI "TARGET_AVX512F && TARGET_EVEX512") (V2TI "TARGET_AVX")])
+  [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX") V16QI
+   (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX") V8HI
+   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
+   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
+   (V32HF "TARGET_AVX512BW") (V16HF "TARGET_AVX") V8HF
+   (V32BF "TARGET_AVX512BW") (V16BF "TARGET_AVX") V8BF
+   (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+   (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") V2DF
+   (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
 
 (define_expand "vec_extract<mode><ssescalarmodelower>"
   [(match_operand:<ssescalarmode> 0 "register_operand")
@@ -13151,7 +13340,7 @@
 		     (const_int 3) (const_int 11)
 		     (const_int 5) (const_int 13)
 		     (const_int 7) (const_int 15)])))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vunpckhpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sselog")
    (set_attr "prefix" "evex")
@@ -13265,9 +13454,9 @@
 		     (const_int 2) (const_int 10)
 		     (const_int 4) (const_int 12)
 		     (const_int 6) (const_int 14)])))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
-  [(set_attr "type" "sselog1")
+  [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
    (set_attr "mode" "V8DF")])
 
@@ -13281,7 +13470,7 @@
 		     (const_int 2) (const_int 10)
 		     (const_int 4) (const_int 12)
 		     (const_int 6) (const_int 14)])))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vunpcklpd\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sselog")
    (set_attr "prefix" "evex")
@@ -13298,7 +13487,7 @@
 		     (const_int 2) (const_int 6)])))]
   "TARGET_AVX && <mask_avx512vl_condition>"
   "vmovddup\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
-  [(set_attr "type" "sselog1")
+  [(set_attr "type" "ssemov")
    (set_attr "prefix" "<mask_prefix>")
    (set_attr "mode" "V4DF")])
 
@@ -13450,7 +13639,7 @@
 	  [(match_operand:VFH_AVX512VL 1 "register_operand" "v")
 	   (match_operand:VFH_AVX512VL 2 "nonimmediate_operand" "<round_constraint>")]
 	  UNSPEC_SCALEF))]
-  "TARGET_AVX512F && <round_mode_condition>"
+  "TARGET_AVX512F"
   "vscalef<ssemodesuffix>\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}"
   [(set_attr "prefix" "evex")
    (set_attr "mode"  "<MODE>")])
@@ -13493,7 +13682,7 @@
 	   (match_operand:SI 4 "const_0_to_255_operand")]
 	  UNSPEC_VTERNLOG))]
   "(<MODE_SIZE> == 64 || TARGET_AVX512VL
-    || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256))
+    || (TARGET_AVX512F && !TARGET_PREFER_AVX256))
 /* Disallow embeded broadcast for vector HFmode since
    it's not real AVX512FP16 instruction.  */
   && (GET_MODE_SIZE (GET_MODE_INNER (<MODE>mode)) >= 4
@@ -13575,7 +13764,7 @@
   [(set (match_operand:V 0 "register_operand")
 	(match_operand:V 1 "ternlog_operand"))]
   "(<MODE_SIZE> == 64 || TARGET_AVX512VL
-    || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256))
+    || (TARGET_AVX512F && !TARGET_PREFER_AVX256))
    && ix86_pre_reload_split ()"
   "#"
   "&& 1"
@@ -13605,7 +13794,7 @@
 	    (match_operand:V 3 "regmem_or_bitnot_regmem_operand")
 	    (match_operand:V 4 "regmem_or_bitnot_regmem_operand"))))]
   "(<MODE_SIZE> == 64 || TARGET_AVX512VL
-    || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256))
+    || (TARGET_AVX512F && !TARGET_PREFER_AVX256))
    && ix86_pre_reload_split ()
    && (rtx_equal_p (STRIP_UNARY (operands[1]),
 		    STRIP_UNARY (operands[4]))
@@ -13690,7 +13879,7 @@
 	    (match_operand:V 3 "regmem_or_bitnot_regmem_operand"))
 	  (match_operand:V 4 "regmem_or_bitnot_regmem_operand")))]
   "(<MODE_SIZE> == 64 || TARGET_AVX512VL
-    || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256))
+    || (TARGET_AVX512F && !TARGET_PREFER_AVX256))
    && ix86_pre_reload_split ()
    && (rtx_equal_p (STRIP_UNARY (operands[1]),
 		    STRIP_UNARY (operands[4]))
@@ -13774,7 +13963,7 @@
 	    (match_operand:V 2 "regmem_or_bitnot_regmem_operand"))
 	  (match_operand:V 3 "regmem_or_bitnot_regmem_operand")))]
   "(<MODE_SIZE> == 64 || TARGET_AVX512VL
-    || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256))
+    || (TARGET_AVX512F && !TARGET_PREFER_AVX256))
    && ix86_pre_reload_split ()"
   "#"
   "&& 1"
@@ -13858,7 +14047,7 @@
   [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v")
         (unspec:VFH_AVX512VL [(match_operand:VFH_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
                         UNSPEC_GETEXP))]
-   "TARGET_AVX512F && <round_saeonly_mode_condition>"
+   "TARGET_AVX512F"
    "vgetexp<ssemodesuffix>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}";
     [(set_attr "prefix" "evex")
      (set_attr "mode" "<MODE>")])
@@ -13924,7 +14113,7 @@
    (match_operand:SI 3 "const_0_to_255_operand")
    (match_operand:V16SF 4 "register_operand")
    (match_operand:HI 5 "register_operand")]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
 {
   int mask = INTVAL (operands[3]);
   emit_insn (gen_avx512f_shufps512_1_mask (operands[0], operands[1], operands[2],
@@ -13973,7 +14162,7 @@
            (match_operand:<sseintvecmode> 3 "nonimmediate_operand" "<round_saeonly_constraint>")
            (match_operand:SI 4 "const_0_to_255_operand")]
            UNSPEC_FIXUPIMM))]
-  "TARGET_AVX512F && <round_saeonly_mode_condition>"
+  "TARGET_AVX512F"
   "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_sd_mask_op5>%3, %2, %0<sd_mask_op5>|%0<sd_mask_op5>, %2, %3<round_saeonly_sd_mask_op5>, %4}";
   [(set_attr "prefix" "evex")
    (set_attr "mode" "<MODE>")])
@@ -13989,7 +14178,7 @@
              UNSPEC_FIXUPIMM)
 	  (match_dup 1)
 	  (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
-  "TARGET_AVX512F && <round_saeonly_mode_condition>"
+  "TARGET_AVX512F"
   "vfixupimm<ssemodesuffix>\t{%4, <round_saeonly_op6>%3, %2, %0%{%5%}|%0%{%5%}, %2, %3<round_saeonly_op6>, %4}";
   [(set_attr "prefix" "evex")
    (set_attr "mode" "<MODE>")])
@@ -14051,7 +14240,7 @@
 	  [(match_operand:VFH_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
 	   (match_operand:SI 2 "const_0_to_255_operand")]
 	  UNSPEC_ROUND))]
-  "TARGET_AVX512F && <round_saeonly_mode_condition>"
+  "TARGET_AVX512F"
   "vrndscale<ssemodesuffix>\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
   [(set_attr "length_immediate" "1")
    (set_attr "prefix" "evex")
@@ -14111,7 +14300,7 @@
 		     (match_operand 16  "const_12_to_15_operand")
 		     (match_operand 17  "const_28_to_31_operand")
 		     (match_operand 18  "const_28_to_31_operand")])))]
-  "TARGET_AVX512F && TARGET_EVEX512
+  "TARGET_AVX512F
    && (INTVAL (operands[3]) == (INTVAL (operands[7]) - 4)
        && INTVAL (operands[4]) == (INTVAL (operands[8]) - 4)
        && INTVAL (operands[5]) == (INTVAL (operands[9]) - 4)
@@ -14146,7 +14335,7 @@
    (match_operand:SI 3 "const_0_to_255_operand")
    (match_operand:V8DF 4 "register_operand")
    (match_operand:QI 5 "register_operand")]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
 {
   int mask = INTVAL (operands[3]);
   emit_insn (gen_avx512f_shufpd512_1_mask (operands[0], operands[1], operands[2],
@@ -14176,7 +14365,7 @@
 		     (match_operand 8 "const_12_to_13_operand")
 		     (match_operand 9 "const_6_to_7_operand")
 		     (match_operand 10 "const_14_to_15_operand")])))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
 {
   int mask;
   mask = INTVAL (operands[3]);
@@ -14308,7 +14497,7 @@
 		     (const_int 3) (const_int 11)
 		     (const_int 5) (const_int 13)
 		     (const_int 7) (const_int 15)])))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vpunpckhqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sselog")
    (set_attr "prefix" "evex")
@@ -14358,7 +14547,7 @@
 		     (const_int 2) (const_int 10)
 		     (const_int 4) (const_int 12)
 		     (const_int 6) (const_int 14)])))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vpunpcklqdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sselog")
    (set_attr "prefix" "evex")
@@ -14565,7 +14754,7 @@
    #
    #"
   [(set_attr "isa" "noavx,avx,noavx,avx,*,*,*")
-   (set_attr "type" "ssemov,ssemov,sselog,sselog,ssemov,fmov,imov")
+   (set_attr "type" "ssemov2,ssemov2,sselog,sselog,ssemov,fmov,imov")
    (set (attr "prefix_data16")
      (if_then_else (eq_attr "alternative" "0")
 		   (const_string "1")
@@ -14635,6 +14824,8 @@
 	      (const_string "fmov")
 	    (eq_attr "alternative" "10")
 	      (const_string "imov")
+	    (eq_attr "alternative" "0,1,2")
+	      (const_string "ssemov2")
 	   ]
 	   (const_string "ssemov")))
    (set (attr "prefix_data16")
@@ -14687,7 +14878,7 @@
      (if_then_else
        (eq_attr "alternative" "5")
        (const_string "sselog")
-       (const_string "ssemov")))
+       (const_string "ssemov2")))
    (set (attr "prefix_data16")
      (if_then_else
        (and (eq_attr "alternative" "2,4")
@@ -14722,7 +14913,7 @@
    (set_attr "mode" "V2DF,DF,V8DF")
    (set (attr "enabled")
 	(cond [(eq_attr "alternative" "2")
-		 (symbol_ref "TARGET_AVX512F && TARGET_EVEX512
+		 (symbol_ref "TARGET_AVX512F
 			      && !TARGET_AVX512VL && !TARGET_PREFER_AVX256")
 	       (match_test "<mask_avx512vl_condition>")
 	         (const_string "*")
@@ -14759,7 +14950,7 @@
      (if_then_else
        (eq_attr "alternative" "0,1,2")
        (const_string "sselog")
-       (const_string "ssemov")))
+       (const_string "ssemov2")))
    (set (attr "prefix_data16")
 	(if_then_else (eq_attr "alternative" "3")
 		      (const_string "1")
@@ -14807,13 +14998,13 @@
   [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand")
 	(truncate:PMOV_DST_MODE_1
 	  (match_operand:<pmov_src_mode> 1 "register_operand")))]
-  "TARGET_AVX512F && TARGET_EVEX512")
+  "TARGET_AVX512F")
 
 (define_insn "*avx512f_<code><pmov_src_lower><mode>2"
   [(set (match_operand:PMOV_DST_MODE_1 0 "nonimmediate_operand" "=v,m")
 	(any_truncate:PMOV_DST_MODE_1
 	  (match_operand:<pmov_src_mode> 1 "register_operand" "v,v")))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "memory" "none,store")
@@ -14835,7 +15026,7 @@
 		     (const_int 10) (const_int 11)
 		     (const_int 12) (const_int 13)
 		     (const_int 14) (const_int 15)])))]
-  "TARGET_AVX512BW && TARGET_EVEX512 && ix86_pre_reload_split ()"
+  "TARGET_AVX512BW && ix86_pre_reload_split ()"
   "#"
   "&& 1"
   [(set (match_dup 0)
@@ -14860,7 +15051,7 @@
 		     (const_int 10) (const_int 11)
 		     (const_int 12) (const_int 13)
 		     (const_int 14) (const_int 15)])))]
-  "TARGET_AVX512BW && TARGET_EVEX512 && ix86_pre_reload_split ()"
+  "TARGET_AVX512BW && ix86_pre_reload_split ()"
   "#"
   "&& 1"
   [(set (match_dup 0)
@@ -14944,7 +15135,7 @@
 		     (const_int 2) (const_int 3)
 		     (const_int 4) (const_int 5)
 		     (const_int 6) (const_int 7)])))]
-  "TARGET_AVX512F && TARGET_EVEX512 && ix86_pre_reload_split ()"
+  "TARGET_AVX512F && ix86_pre_reload_split ()"
   "#"
   "&& 1"
   [(set (match_dup 0)
@@ -14960,7 +15151,7 @@
         (match_operand:<pmov_src_mode> 1 "register_operand" "v,v"))
       (match_operand:PMOV_DST_MODE_1 2 "nonimm_or_0_operand" "0C,0")
       (match_operand:<avx512fmaskmode> 3 "register_operand" "Yk,Yk")))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vpmov<trunsuffix><pmov_suff_1>\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "memory" "none,store")
@@ -14974,19 +15165,19 @@
         (match_operand:<pmov_src_mode> 1 "register_operand"))
       (match_dup 0)
       (match_operand:<avx512fmaskmode> 2 "register_operand")))]
-  "TARGET_AVX512F && TARGET_EVEX512")
+  "TARGET_AVX512F")
 
 (define_expand "truncv32hiv32qi2"
   [(set (match_operand:V32QI 0 "nonimmediate_operand")
 	(truncate:V32QI
 	  (match_operand:V32HI 1 "register_operand")))]
-  "TARGET_AVX512BW && TARGET_EVEX512")
+  "TARGET_AVX512BW")
 
 (define_insn "avx512bw_<code>v32hiv32qi2"
   [(set (match_operand:V32QI 0 "nonimmediate_operand" "=v,m")
 	(any_truncate:V32QI
 	    (match_operand:V32HI 1 "register_operand" "v,v")))]
-  "TARGET_AVX512BW && TARGET_EVEX512"
+  "TARGET_AVX512BW"
   "vpmov<trunsuffix>wb\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "memory" "none,store")
@@ -15016,7 +15207,7 @@
 		     (const_int 26) (const_int 27)
 		     (const_int 28) (const_int 29)
 		     (const_int 30) (const_int 31)])))]
-  "TARGET_AVX512VBMI && TARGET_EVEX512 && ix86_pre_reload_split ()"
+  "TARGET_AVX512VBMI && ix86_pre_reload_split ()"
   "#"
   "&& 1"
   [(set (match_dup 0)
@@ -15032,7 +15223,7 @@
         (match_operand:V32HI 1 "register_operand" "v,v"))
       (match_operand:V32QI 2 "nonimm_or_0_operand" "0C,0")
       (match_operand:SI 3 "register_operand" "Yk,Yk")))]
-  "TARGET_AVX512BW && TARGET_EVEX512"
+  "TARGET_AVX512BW"
   "vpmov<trunsuffix>wb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "memory" "none,store")
@@ -15046,7 +15237,7 @@
         (match_operand:V32HI 1 "register_operand"))
       (match_dup 0)
       (match_operand:SI 2 "register_operand")))]
-  "TARGET_AVX512BW && TARGET_EVEX512")
+  "TARGET_AVX512BW")
 
 (define_mode_iterator PMOV_DST_MODE_2
   [V4SI V8HI (V16QI "TARGET_AVX512BW")])
@@ -15288,7 +15479,7 @@
    (set_attr "prefix" "evex")
    (set_attr "mode" "TI")])
 
-(define_insn "*avx512vl_<code>v2div2qi2_mask_store_1"
+(define_insn "avx512vl_<code>v2div2qi2_mask_store_1"
   [(set (match_operand:V2QI 0 "memory_operand" "=m")
 	  (vec_merge:V2QI
 	    (any_truncate:V2QI
@@ -15302,28 +15493,19 @@
    (set_attr "prefix" "evex")
    (set_attr "mode" "TI")])
 
-(define_insn_and_split "avx512vl_<code>v2div2qi2_mask_store_2"
-  [(set (match_operand:HI 0 "memory_operand")
-	(subreg:HI
-	  (vec_merge:V2QI
-	    (any_truncate:V2QI
-	      (match_operand:V2DI 1 "register_operand"))
-	    (vec_select:V2QI
-	      (subreg:V4QI
-		(vec_concat:V2HI
-		  (match_dup 0)
-		  (const_int 0)) 0)
-	      (parallel [(const_int 0) (const_int 1)]))
-	    (match_operand:QI 2 "register_operand")) 0))]
-  "TARGET_AVX512VL && ix86_pre_reload_split ()"
-  "#"
-  "&& 1"
-  [(set (match_dup 0)
-	(vec_merge:V2QI
-	  (any_truncate:V2QI (match_dup 1))
-	  (match_dup 0)
-	  (match_dup 2)))]
-  "operands[0] = adjust_address_nv (operands[0], V2QImode, 0);")
+(define_expand "avx512vl_<code>v2div2qi2_mask_store_2"
+  [(match_operand:HI 0 "memory_operand")
+   (any_truncate:V2QI
+     (match_operand:V2DI 1 "register_operand"))
+   (match_operand:QI 2 "register_operand")]
+  "TARGET_AVX512VL"
+{
+  operands[0] = adjust_address_nv (operands[0], V2QImode, 0);
+  emit_insn (gen_avx512vl_<code>v2div2qi2_mask_store_1 (operands[0],
+							operands[1],
+							operands[2]));
+  DONE;
+})
 
 (define_insn "*avx512vl_<code><mode>v4qi2_store_1"
   [(set (match_operand:V4QI 0 "memory_operand" "=m")
@@ -15392,7 +15574,7 @@
    (set_attr "prefix" "evex")
    (set_attr "mode" "TI")])
 
-(define_insn "*avx512vl_<code><mode>v4qi2_mask_store_1"
+(define_insn "avx512vl_<code><mode>v4qi2_mask_store_1"
   [(set (match_operand:V4QI 0 "memory_operand" "=m")
 	(vec_merge:V4QI
 	  (any_truncate:V4QI
@@ -15406,29 +15588,19 @@
    (set_attr "prefix" "evex")
    (set_attr "mode" "TI")])
 
-(define_insn_and_split "avx512vl_<code><mode>v4qi2_mask_store_2"
-  [(set (match_operand:SI 0 "memory_operand")
-	(subreg:SI
-	  (vec_merge:V4QI
-	    (any_truncate:V4QI
-	      (match_operand:VI4_128_8_256 1 "register_operand"))
-	    (vec_select:V4QI
-	      (subreg:V8QI
-		(vec_concat:V2SI
-		  (match_dup 0)
-		  (const_int 0)) 0)
-	      (parallel [(const_int 0) (const_int 1)
-			 (const_int 2) (const_int 3)]))
-	    (match_operand:QI 2 "register_operand")) 0))]
-  "TARGET_AVX512VL && ix86_pre_reload_split ()"
-  "#"
-  "&& 1"
-  [(set (match_dup 0)
-	(vec_merge:V4QI
-	  (any_truncate:V4QI (match_dup 1))
-	  (match_dup 0)
-	  (match_dup 2)))]
-  "operands[0] = adjust_address_nv (operands[0], V4QImode, 0);")
+(define_expand "avx512vl_<code><mode>v4qi2_mask_store_2"
+  [(match_operand:SI 0 "memory_operand")
+   (any_truncate:V4QI
+     (match_operand:VI4_128_8_256 1 "register_operand"))
+   (match_operand:QI 2 "register_operand")]
+  "TARGET_AVX512VL"
+{
+  operands[0] = adjust_address_nv (operands[0], V4QImode, 0);
+  emit_insn (gen_avx512vl_<code><mode>v4qi2_mask_store_1 (operands[0],
+							  operands[1],
+							  operands[2]));
+  DONE;
+})
 
 (define_mode_iterator VI2_128_BW_4_256
   [(V8HI "TARGET_AVX512BW") V8SI])
@@ -15500,7 +15672,7 @@
    (set_attr "prefix" "evex")
    (set_attr "mode" "TI")])
 
-(define_insn "*avx512vl_<code><mode>v8qi2_mask_store_1"
+(define_insn "avx512vl_<code><mode>v8qi2_mask_store_1"
   [(set (match_operand:V8QI 0 "memory_operand" "=m")
 	(vec_merge:V8QI
 	  (any_truncate:V8QI
@@ -15514,31 +15686,19 @@
    (set_attr "prefix" "evex")
    (set_attr "mode" "TI")])
 
-(define_insn_and_split "avx512vl_<code><mode>v8qi2_mask_store_2"
-  [(set (match_operand:DI 0 "memory_operand")
-	(subreg:DI
-	  (vec_merge:V8QI
-	    (any_truncate:V8QI
-	      (match_operand:VI2_128_BW_4_256 1 "register_operand"))
-	    (vec_select:V8QI
-	      (subreg:V16QI
-		(vec_concat:V2DI
-		  (match_dup 0)
-		  (const_int 0)) 0)
-	      (parallel [(const_int 0) (const_int 1)
-			 (const_int 2) (const_int 3)
-			 (const_int 4) (const_int 5)
-			 (const_int 6) (const_int 7)]))
-	    (match_operand:QI 2 "register_operand")) 0))]
-  "TARGET_AVX512VL && ix86_pre_reload_split ()"
-  "#"
-  "&& 1"
-  [(set (match_dup 0)
-	(vec_merge:V8QI
-	  (any_truncate:V8QI (match_dup 1))
-	  (match_dup 0)
-	  (match_dup 2)))]
-  "operands[0] = adjust_address_nv (operands[0], V8QImode, 0);")
+(define_expand "avx512vl_<code><mode>v8qi2_mask_store_2"
+  [(match_operand:DI 0 "memory_operand")
+   (any_truncate:V8QI
+     (match_operand:VI2_128_BW_4_256 1 "register_operand"))
+   (match_operand:QI 2 "register_operand")]
+  "TARGET_AVX512VL"
+{
+  operands[0] = adjust_address_nv (operands[0], V8QImode, 0);
+  emit_insn (gen_avx512vl_<code><mode>v8qi2_mask_store_1 (operands[0],
+							  operands[1],
+							  operands[2]));
+  DONE;
+})
 
 (define_mode_iterator PMOV_SRC_MODE_4 [(V4DI "TARGET_AVX2") V2DI V4SI])
 (define_mode_attr pmov_dst_4
@@ -15666,7 +15826,7 @@
    (set_attr "prefix" "evex")
    (set_attr "mode" "TI")])
 
-(define_insn "*avx512vl_<code><mode>v4hi2_mask_store_1"
+(define_insn "avx512vl_<code><mode>v4hi2_mask_store_1"
   [(set (match_operand:V4HI 0 "memory_operand" "=m")
 	(vec_merge:V4HI
 	  (any_truncate:V4HI
@@ -15684,30 +15844,19 @@
    (set_attr "prefix" "evex")
    (set_attr "mode" "TI")])
 
-(define_insn_and_split "avx512vl_<code><mode>v4hi2_mask_store_2"
-  [(set (match_operand:DI 0 "memory_operand")
-	(subreg:DI
-	  (vec_merge:V4HI
-	    (any_truncate:V4HI
-	      (match_operand:VI4_128_8_256 1 "register_operand"))
-	    (vec_select:V4HI
-	      (subreg:V8HI
-		(vec_concat:V2DI
-		  (match_dup 0)
-		  (const_int 0)) 0)
-	      (parallel [(const_int 0) (const_int 1)
-			 (const_int 2) (const_int 3)]))
-	    (match_operand:QI 2 "register_operand")) 0))]
-  "TARGET_AVX512VL && ix86_pre_reload_split ()"
-  "#"
-  "&& 1"
-  [(set (match_dup 0)
-	(vec_merge:V4HI
-	  (any_truncate:V4HI (match_dup 1))
-	  (match_dup 0)
-	  (match_dup 2)))]
-  "operands[0] = adjust_address_nv (operands[0], V4HImode, 0);")
-
+(define_expand "avx512vl_<code><mode>v4hi2_mask_store_2"
+  [(match_operand:DI 0 "memory_operand")
+   (any_truncate:V4HI
+     (match_operand:VI4_128_8_256 1 "register_operand"))
+   (match_operand:QI 2 "register_operand")]
+  "TARGET_AVX512VL"
+{
+  operands[0] = adjust_address_nv (operands[0], V4HImode, 0);
+  emit_insn (gen_avx512vl_<code><mode>v4hi2_mask_store_1 (operands[0],
+							  operands[1],
+							  operands[2]));
+  DONE;
+})
 
 (define_insn "*avx512vl_<code>v2div2hi2_store_1"
   [(set (match_operand:V2HI 0 "memory_operand" "=m")
@@ -15768,7 +15917,7 @@
    (set_attr "prefix" "evex")
    (set_attr "mode" "TI")])
 
-(define_insn "*avx512vl_<code>v2div2hi2_mask_store_1"
+(define_insn "avx512vl_<code>v2div2hi2_mask_store_1"
   [(set (match_operand:V2HI 0 "memory_operand" "=m")
 	(vec_merge:V2HI
 	  (any_truncate:V2HI
@@ -15782,28 +15931,19 @@
    (set_attr "prefix" "evex")
    (set_attr "mode" "TI")])
 
-(define_insn_and_split "avx512vl_<code>v2div2hi2_mask_store_2"
-  [(set (match_operand:SI 0 "memory_operand")
-	(subreg:SI
-	  (vec_merge:V2HI
-	    (any_truncate:V2HI
-	      (match_operand:V2DI 1 "register_operand"))
-	    (vec_select:V2HI
-	      (subreg:V4HI
-		(vec_concat:V2SI
-		  (match_dup 0)
-		  (const_int 0)) 0)
-	      (parallel [(const_int 0) (const_int 1)]))
-	    (match_operand:QI 2 "register_operand")) 0))]
-  "TARGET_AVX512VL && ix86_pre_reload_split ()"
-  "#"
-  "&& 1"
-  [(set (match_dup 0)
-	(vec_merge:V2HI
-	  (any_truncate:V2HI (match_dup 1))
-	  (match_dup 0)
-	  (match_dup 2)))]
-  "operands[0] = adjust_address_nv (operands[0], V2HImode, 0);")
+(define_expand "avx512vl_<code>v2div2hi2_mask_store_2"
+  [(match_operand:SI 0 "memory_operand")
+   (any_truncate:V2HI
+    (match_operand:V2DI 1 "register_operand"))
+   (match_operand:QI 2 "register_operand")]
+  "TARGET_AVX512VL"
+{
+  operands[0] = adjust_address_nv (operands[0], V2HImode, 0);
+  emit_insn (gen_avx512vl_<code>v2div2hi2_mask_store_1 (operands[0],
+							operands[1],
+							operands[2]));
+  DONE;
+})
 
 (define_expand "truncv2div2si2"
   [(set (match_operand:V2SI 0 "register_operand")
@@ -15923,7 +16063,7 @@
    (set_attr "prefix" "evex")
    (set_attr "mode" "TI")])
 
-(define_insn "*avx512vl_<code>v2div2si2_mask_store_1"
+(define_insn "avx512vl_<code>v2div2si2_mask_store_1"
   [(set (match_operand:V2SI 0 "memory_operand" "=m")
 	(vec_merge:V2SI
 	  (any_truncate:V2SI
@@ -15937,34 +16077,25 @@
    (set_attr "prefix" "evex")
    (set_attr "mode" "TI")])
 
-(define_insn_and_split "avx512vl_<code>v2div2si2_mask_store_2"
-  [(set (match_operand:DI 0 "memory_operand")
-	(subreg:DI
-	  (vec_merge:V2SI
-	    (any_truncate:V2SI
-	      (match_operand:V2DI 1 "register_operand"))
-	    (vec_select:V2SI
-	      (subreg:V4SI
-		(vec_concat:V2DI
-		  (match_dup 0)
-		  (const_int 0)) 0)
-	      (parallel [(const_int 0) (const_int 1)]))
-	    (match_operand:QI 2 "register_operand")) 0))]
-  "TARGET_AVX512VL && ix86_pre_reload_split ()"
-  "#"
-  "&& 1"
-  [(set (match_dup 0)
-  	(vec_merge:V2SI
-	  (any_truncate:V2SI (match_dup 1))
-	  (match_dup 0)
-	  (match_dup 2)))]
-  "operands[0] = adjust_address_nv (operands[0], V2SImode, 0);")
+(define_expand "avx512vl_<code>v2div2si2_mask_store_2"
+  [(match_operand:DI 0 "memory_operand")
+   (any_truncate:V2SI
+    (match_operand:V2DI 1 "register_operand"))
+   (match_operand:QI 2 "register_operand")]
+  "TARGET_AVX512VL"
+{
+  operands[0] = adjust_address_nv (operands[0], V2SImode, 0);
+  emit_insn (gen_avx512vl_<code>v2div2si2_mask_store_1 (operands[0],
+							operands[1],
+							operands[2]));
+  DONE;
+})
 
 (define_expand "truncv8div8qi2"
   [(set (match_operand:V8QI 0 "register_operand")
 	(truncate:V8QI
 	    (match_operand:V8DI 1 "register_operand")))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
 {
   rtx op0 = gen_reg_rtx (V16QImode);
 
@@ -15984,7 +16115,7 @@
 			      (const_int 0) (const_int 0)
 			      (const_int 0) (const_int 0)
 			      (const_int 0) (const_int 0)])))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
@@ -15994,7 +16125,7 @@
   [(set (match_operand:V8QI 0 "memory_operand" "=m")
 	(any_truncate:V8QI
 	  (match_operand:V8DI 1 "register_operand" "v")))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vpmov<trunsuffix>qb\t{%1, %0|%0, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "memory" "store")
@@ -16006,7 +16137,7 @@
 	(subreg:DI
 	  (any_truncate:V8QI
 	    (match_operand:V8DI 1 "register_operand")) 0))]
-  "TARGET_AVX512F && TARGET_EVEX512 && ix86_pre_reload_split ()"
+  "TARGET_AVX512F && ix86_pre_reload_split ()"
   "#"
   "&& 1"
   [(set (match_dup 0)
@@ -16030,7 +16161,7 @@
                           (const_int 0) (const_int 0)
                           (const_int 0) (const_int 0)
                           (const_int 0) (const_int 0)])))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vpmov<trunsuffix>qb\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
@@ -16051,51 +16182,39 @@
 			  (const_int 0) (const_int 0)
 			  (const_int 0) (const_int 0)
 			  (const_int 0) (const_int 0)])))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vpmov<trunsuffix>qb\t{%1, %0%{%2%}%{z%}|%0%{%2%}%{z%}, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
    (set_attr "mode" "TI")])
 
-(define_insn "*avx512f_<code>v8div16qi2_mask_store_1"
+(define_insn "avx512f_<code>v8div16qi2_mask_store_1"
   [(set (match_operand:V8QI 0 "memory_operand" "=m")
 	(vec_merge:V8QI
 	  (any_truncate:V8QI
 	    (match_operand:V8DI 1 "register_operand" "v"))
 	(match_dup 0)
 	(match_operand:QI 2 "register_operand" "Yk")))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vpmov<trunsuffix>qb\t{%1, %0%{%2%}|%0%{%2%}, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "memory" "store")
    (set_attr "prefix" "evex")
    (set_attr "mode" "TI")])
 
-(define_insn_and_split "avx512f_<code>v8div16qi2_mask_store_2"
-  [(set (match_operand:DI 0 "memory_operand")
-	(subreg:DI
-	  (vec_merge:V8QI
-	  (any_truncate:V8QI
-	    (match_operand:V8DI 1 "register_operand"))
-	  (vec_select:V8QI
-	    (subreg:V16QI
-	      (vec_concat:V2DI
-		(match_dup 0)
-		(const_int 0)) 0)
-	    (parallel [(const_int 0) (const_int 1)
-		       (const_int 2) (const_int 3)
-		       (const_int 4) (const_int 5)
-		       (const_int 6) (const_int 7)]))
-	  (match_operand:QI 2 "register_operand")) 0))]
-  "TARGET_AVX512F && TARGET_EVEX512 && ix86_pre_reload_split ()"
-  "#"
-  "&& 1"
-  [(set (match_dup 0)
-	(vec_merge:V8QI
-	  (any_truncate:V8QI (match_dup 1))
-	  (match_dup 0)
-	  (match_dup 2)))]
-  "operands[0] = adjust_address_nv (operands[0], V8QImode, 0);")
+(define_expand "avx512f_<code>v8div16qi2_mask_store_2"
+  [(match_operand:DI 0 "memory_operand")
+   (any_truncate:V8QI
+    (match_operand:V8DI 1 "register_operand"))
+   (match_operand:QI 2 "register_operand")]
+  "TARGET_AVX512F"
+{
+  operands[0] = adjust_address_nv (operands[0], V8QImode, 0);
+  emit_insn (gen_avx512f_<code>v8div16qi2_mask_store_1 (operands[0],
+							operands[1],
+							operands[2]));
+  DONE;
+})
 
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;
@@ -16345,7 +16464,7 @@
                          (const_int 4) (const_int 6)
                          (const_int 8) (const_int 10)
                          (const_int 12) (const_int 14)])))))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
 
 (define_insn "*vec_widen_umult_even_v16si<mask_name>"
@@ -16365,7 +16484,7 @@
                          (const_int 4) (const_int 6)
                          (const_int 8) (const_int 10)
                          (const_int 12) (const_int 14)])))))]
-  "TARGET_AVX512F && TARGET_EVEX512
+  "TARGET_AVX512F
    && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
   "vpmuludq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sseimul")
@@ -16461,7 +16580,7 @@
                          (const_int 4) (const_int 6)
                          (const_int 8) (const_int 10)
                          (const_int 12) (const_int 14)])))))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "ix86_fixup_binary_operands_no_copy (MULT, V16SImode, operands);")
 
 (define_insn "*vec_widen_smult_even_v16si<mask_name>"
@@ -16481,7 +16600,7 @@
                          (const_int 4) (const_int 6)
                          (const_int 8) (const_int 10)
                          (const_int 12) (const_int 14)])))))]
-  "TARGET_AVX512F && TARGET_EVEX512
+  "TARGET_AVX512F
    && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
   "vpmuldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sseimul")
@@ -16875,7 +16994,7 @@
 (define_mode_attr SDOT_VPDP_SUF
   [(V32HI "v16si") (V16HI "v8si") (V8HI "v4si")])
 
-(define_expand "sdot_prod<mode>"
+(define_expand "sdot_prod<sseunpackmodelower><mode>"
   [(match_operand:<sseunpackmode> 0 "register_operand")
    (match_operand:VI2_AVX512VNNIBW 1 "register_operand")
    (match_operand:VI2_AVX512VNNIBW 2 "register_operand")
@@ -16883,7 +17002,7 @@
   "TARGET_SSE2"
 {
   /* Try with vnni instructions.  */
-  if ((<MODE_SIZE> == 64 && TARGET_AVX512VNNI && TARGET_EVEX512)
+  if ((<MODE_SIZE> == 64 && TARGET_AVX512VNNI)
       || (<MODE_SIZE> < 64
 	  && ((TARGET_AVX512VNNI && TARGET_AVX512VL) || TARGET_AVXVNNI)))
     {
@@ -16910,7 +17029,7 @@
 
 ;; Normally we use widen_mul_even/odd, but combine can't quite get it all
 ;; back together when madd is available.
-(define_expand "sdot_prodv4si"
+(define_expand "sdot_prodv2div4si"
   [(match_operand:V2DI 0 "register_operand")
    (match_operand:V4SI 1 "register_operand")
    (match_operand:V4SI 2 "register_operand")
@@ -16976,7 +17095,7 @@
    (match_operand:V64QI 1 "register_operand")
    (match_operand:V64QI 2 "nonimmediate_operand")
    (match_operand:V16SI 3 "nonimmediate_operand")]
-  "TARGET_AVX512BW && TARGET_EVEX512"
+  "TARGET_AVX512BW"
 {
   rtx t1 = gen_reg_rtx (V8DImode);
   rtx t2 = gen_reg_rtx (V16SImode);
@@ -17590,12 +17709,12 @@
 })
 
 (define_expand "<code><mode>3_mask"
-  [(set (match_operand:VI48_AVX512VL 0 "register_operand")
-	(vec_merge:VI48_AVX512VL
-	  (maxmin:VI48_AVX512VL
-	    (match_operand:VI48_AVX512VL 1 "nonimmediate_operand")
-	    (match_operand:VI48_AVX512VL 2 "nonimmediate_operand"))
-	  (match_operand:VI48_AVX512VL 3 "nonimm_or_0_operand")
+  [(set (match_operand:VI1248_AVX512VLBW 0 "register_operand")
+	(vec_merge:VI1248_AVX512VLBW
+	  (maxmin:VI1248_AVX512VLBW
+	    (match_operand:VI1248_AVX512VLBW 1 "nonimmediate_operand")
+	    (match_operand:VI1248_AVX512VLBW 2 "nonimmediate_operand"))
+	  (match_operand:VI1248_AVX512VLBW 3 "nonimm_or_0_operand")
 	  (match_operand:<avx512fmaskmode> 4 "register_operand")))]
   "TARGET_AVX512F"
   "ix86_fixup_binary_operands_no_copy (<CODE>, <MODE>mode, operands);")
@@ -17611,12 +17730,12 @@
    (set_attr "prefix" "maybe_evex")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "<mask_codefor><code><mode>3<mask_name>"
+(define_insn "*avx512bw_<code><mode>3<mask_name>"
   [(set (match_operand:VI12_AVX512VL 0 "register_operand" "=v")
         (maxmin:VI12_AVX512VL
-          (match_operand:VI12_AVX512VL 1 "register_operand" "v")
+          (match_operand:VI12_AVX512VL 1 "nonimmediate_operand" "%v")
           (match_operand:VI12_AVX512VL 2 "nonimmediate_operand" "vm")))]
-  "TARGET_AVX512BW"
+  "TARGET_AVX512BW && !(MEM_P (operands[1]) && MEM_P (operands[2]))"
   "vp<maxmin_int><ssemodesuffix>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sseiadd")
    (set_attr "prefix" "evex")
@@ -17849,8 +17968,8 @@
 	  (match_operand:VI_128_256 1 "vector_all_ones_operand")
 	  (match_operand:VI_128_256 2 "const0_operand")
 	  (unspec:<avx512fmaskmode>
-	    [(match_operand:VI_128_256 3 "nonimmediate_operand")
-	     (match_operand:VI_128_256 4 "nonimmediate_operand")
+	    [(match_operand:VI_128_256 3 "nonimm_or_0_operand")
+	     (match_operand:VI_128_256 4 "nonimm_or_0_operand")
 	     (match_operand:SI 5 "const_0_to_7_operand")]
 	     UNSPEC_PCMP)))]
   "TARGET_AVX512VL && ix86_pre_reload_split ()
@@ -17869,6 +17988,12 @@
 {
   if (INTVAL (operands[5]) == 1)
     std::swap (operands[3], operands[4]);
+
+  if (operands[3] == CONST0_RTX (<MODE>mode))
+    operands[3] = force_reg (<MODE>mode, operands[3]);
+  if (operands[4] == CONST0_RTX (<MODE>mode))
+    operands[4] = force_reg (<MODE>mode, operands[4]);
+
   enum rtx_code code = INTVAL (operands[5]) ? GT : EQ;
   emit_move_insn (operands[0], gen_rtx_fmt_ee (code, <MODE>mode,
 					       operands[3], operands[4]));
@@ -18036,7 +18161,7 @@
 	    [(match_operand:VI_128_256 3 "nonimmediate_operand")
 	     (match_operand:VI_128_256 4 "nonimmediate_operand")
 	     (match_operand:SI 5 "const_0_to_7_operand")]
-	     UNSPEC_PCMP_ITER)))]
+	     UNSPEC_PCMP)))]
   "TARGET_AVX512VL && ix86_pre_reload_split ()
      /* NE is commutative.  */
    && (INTVAL (operands[5]) == 4
@@ -18059,6 +18184,31 @@
   DONE;
 })
 
+(define_insn_and_split "*avx2_pcmp<mode>3_8"
+ [(set (match_operand:VI_128_256  0 "register_operand")
+	(vec_merge:VI_128_256
+	  (match_operand:VI_128_256 1 "const0_operand")
+	  (match_operand:VI_128_256 2 "vector_all_ones_operand")
+	  (unspec:<avx512fmaskmode>
+	    [(match_operand:VI_128_256 3 "nonimmediate_operand")
+	     (match_operand:VI_128_256 4 "nonimmediate_operand")
+	     (match_operand:SI 5 "const_0_to_7_operand")]
+	     UNSPEC_UNSIGNED_PCMP)))]
+  "TARGET_AVX512VL && ix86_pre_reload_split ()
+     /* NE is commutative.  */
+   && INTVAL (operands[5]) == 4"
+
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  if (MEM_P (operands[3]))
+    operands[3] = force_reg (<MODE>mode, operands[3]);
+  emit_move_insn (operands[0], gen_rtx_fmt_ee (EQ, <MODE>mode,
+					       operands[3], operands[4]));
+  DONE;
+})
+
 (define_expand "<avx512>_eq<mode>3<mask_scalar_merge_name>"
   [(set (match_operand:<avx512fmaskmode> 0 "register_operand")
 	(unspec:<avx512fmaskmode>
@@ -18183,13 +18333,10 @@
    (V8SI "TARGET_AVX2") (V4DI "TARGET_AVX2")
    (V8SF "TARGET_AVX2") (V4DF "TARGET_AVX2")
    (V16HF "TARGET_AVX512FP16")
-   (V16SF "TARGET_AVX512F && TARGET_EVEX512")
-   (V8DF "TARGET_AVX512F && TARGET_EVEX512")
-   (V16SI "TARGET_AVX512F && TARGET_EVEX512")
-   (V8DI "TARGET_AVX512F && TARGET_EVEX512")
-   (V32HI "TARGET_AVX512BW && TARGET_EVEX512")
-   (V64QI "TARGET_AVX512VBMI && TARGET_EVEX512")
-   (V32HF "TARGET_AVX512FP16 && TARGET_EVEX512")])
+   (V16SF "TARGET_AVX512F") (V8DF "TARGET_AVX512F")
+   (V16SI "TARGET_AVX512F") (V8DI "TARGET_AVX512F")
+   (V32HI "TARGET_AVX512BW") (V64QI "TARGET_AVX512VBMI")
+   (V32HF "TARGET_AVX512FP16")])
 
 (define_expand "vec_perm<mode>"
   [(match_operand:VEC_PERM_AVX2 0 "register_operand")
@@ -18216,7 +18363,7 @@
 {
   operands[2] = CONSTM1_RTX (<MODE>mode);
 
-  if (!TARGET_AVX512F || (!TARGET_AVX512VL && !TARGET_EVEX512))
+  if (!TARGET_AVX512F)
     operands[2] = force_reg (<MODE>mode, operands[2]);
 })
 
@@ -18225,7 +18372,6 @@
 	(xor:VI (match_operand:VI 1 "bcst_vector_operand"     " 0, m,Br")
 		(match_operand:VI 2 "vector_all_ones_operand" "BC,BC,BC")))]
   "TARGET_AVX512F
-   && (<MODE_SIZE> == 64 || TARGET_AVX512VL || TARGET_EVEX512)
    && (!<mask_applied>
        || <ssescalarmode>mode == SImode
        || <ssescalarmode>mode == DImode)"
@@ -18292,7 +18438,7 @@
 		(match_operand:VI 2 "vector_all_ones_operand" "BC,BC,BC")))
    (unspec [(match_operand:VI 3 "register_operand" "0,0,0")]
      UNSPEC_INSN_FALSE_DEP)]
-  "TARGET_AVX512F && (<MODE_SIZE> == 64 || TARGET_AVX512VL || TARGET_EVEX512)"
+  "TARGET_AVX512F"
 {
   if (TARGET_AVX512VL)
     return "vpternlog<ternlogsuffix>\t{$0x55, %1, %0, %0<mask_operand3>|%0<mask_operand3>, %0, %1, 0x55}";
@@ -18316,7 +18462,7 @@
 	  (not:<ssescalarmode>
 	    (match_operand:<ssescalarmode> 1 "nonimmediate_operand"))))]
   "<MODE_SIZE> == 64 || TARGET_AVX512VL
-   || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)"
+   || (TARGET_AVX512F && !TARGET_PREFER_AVX256)"
   [(set (match_dup 0)
 	(xor:VI48_AVX512F
 	  (vec_duplicate:VI48_AVX512F (match_dup 1))
@@ -18330,6 +18476,13 @@
 	  (match_operand:VI_AVX2 2 "vector_operand")))]
   "TARGET_SSE2")
 
+(define_expand "andn<mode>3"
+  [(set (match_operand:VI 0 "register_operand")
+	(and:VI
+	  (not:VI (match_operand:VI 2 "register_operand"))
+	  (match_operand:VI 1 "register_operand")))]
+  "TARGET_SSE2")
+
 (define_expand "<sse2_avx2>_andnot<mode>3_mask"
   [(set (match_operand:VI48_AVX512VL 0 "register_operand")
 	(vec_merge:VI48_AVX512VL
@@ -18463,8 +18616,7 @@
 		 (symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL")
 	       (eq_attr "alternative" "4")
 		 (symbol_ref "<MODE_SIZE> == 64 || TARGET_AVX512VL
-			      || (TARGET_AVX512F && TARGET_EVEX512
-				  && !TARGET_PREFER_AVX256)")
+			      || (TARGET_AVX512F && !TARGET_PREFER_AVX256)")
 	      ]
 	      (const_string "*")))])
 
@@ -18508,7 +18660,7 @@
 	      (match_operand:<ssescalarmode> 1 "nonimmediate_operand")))
 	  (match_operand:VI 2 "vector_operand")))]
   "<MODE_SIZE> == 64 || TARGET_AVX512VL
-   || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)"
+   || (TARGET_AVX512F && !TARGET_PREFER_AVX256)"
   [(set (match_dup 3)
 	(vec_duplicate:VI (match_dup 1)))
    (set (match_dup 0)
@@ -18523,7 +18675,7 @@
 	      (match_operand:<ssescalarmode> 1 "nonimmediate_operand")))
 	  (match_operand:VI 2 "vector_operand")))]
   "<MODE_SIZE> == 64 || TARGET_AVX512VL
-   || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256)"
+   || (TARGET_AVX512F && !TARGET_PREFER_AVX256)"
   [(set (match_dup 3)
 	(vec_duplicate:VI (match_dup 1)))
    (set (match_dup 0)
@@ -18817,7 +18969,7 @@
 	    (match_operand:VI 1 "bcst_vector_operand" "0,m,  0,vBr"))
 	  (match_operand:VI 2 "bcst_vector_operand"   "m,0,vBr,  0")))]
   "(<MODE_SIZE> == 64 || TARGET_AVX512VL
-    || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256))
+    || (TARGET_AVX512F && !TARGET_PREFER_AVX256))
    && (register_operand (operands[1], <MODE>mode)
        || register_operand (operands[2], <MODE>mode))"
 {
@@ -18850,7 +19002,7 @@
 	    (match_operand:VI 1 "bcst_vector_operand" "%0, 0")
 	    (match_operand:VI 2 "bcst_vector_operand" " m,vBr"))))]
   "(<MODE_SIZE> == 64 || TARGET_AVX512VL
-    || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256))
+    || (TARGET_AVX512F && !TARGET_PREFER_AVX256))
    && (register_operand (operands[1], <MODE>mode)
        || register_operand (operands[2], <MODE>mode))"
 {
@@ -18881,7 +19033,7 @@
 	  (not:VI (match_operand:VI 1 "bcst_vector_operand" "%0, 0"))
 	  (not:VI (match_operand:VI 2 "bcst_vector_operand" "m,vBr"))))]
   "(<MODE_SIZE> == 64 || TARGET_AVX512VL
-    || (TARGET_AVX512F && TARGET_EVEX512 && !TARGET_PREFER_AVX256))
+    || (TARGET_AVX512F && !TARGET_PREFER_AVX256))
    && (register_operand (operands[1], <MODE>mode)
        || register_operand (operands[2], <MODE>mode))"
 {
@@ -18903,7 +19055,7 @@
 		      (const_string "*")))])
 
 (define_mode_iterator AVX512ZEXTMASK
-  [(DI "TARGET_AVX512BW && TARGET_EVEX512") (SI "TARGET_AVX512BW") HI])
+  [(DI "TARGET_AVX512BW") (SI "TARGET_AVX512BW") HI])
 
 (define_insn "<avx512>_testm<mode>3<mask_scalar_merge_name>"
   [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
@@ -19152,7 +19304,7 @@
 		     (const_int 60) (const_int 61)
 		     (const_int 62) (const_int 63)])))]
 
-  "TARGET_AVX512BW && TARGET_EVEX512"
+  "TARGET_AVX512BW"
   "vpacksswb\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sselog")
    (set_attr "prefix" "<mask_prefix>")
@@ -19221,7 +19373,7 @@
 		     (const_int 14)  (const_int 15)
 		     (const_int 28)  (const_int 29)
 		     (const_int 30)  (const_int 31)])))]
-  "TARGET_AVX512BW && TARGET_EVEX512"
+  "TARGET_AVX512BW"
   "vpackssdw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sselog")
    (set_attr "prefix" "<mask_prefix>")
@@ -19283,7 +19435,7 @@
 		     (const_int 61) (const_int 125)
 		     (const_int 62) (const_int 126)
 		     (const_int 63) (const_int 127)])))]
-  "TARGET_AVX512BW && TARGET_EVEX512"
+  "TARGET_AVX512BW"
   "vpunpckhbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sselog")
    (set_attr "prefix" "evex")
@@ -19379,7 +19531,7 @@
 		     (const_int 53) (const_int 117)
 		     (const_int 54) (const_int 118)
 		     (const_int 55) (const_int 119)])))]
-  "TARGET_AVX512BW && TARGET_EVEX512"
+  "TARGET_AVX512BW"
   "vpunpcklbw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sselog")
    (set_attr "prefix" "evex")
@@ -19603,7 +19755,7 @@
 		     (const_int 11) (const_int 27)
 		     (const_int 14) (const_int 30)
 		     (const_int 15) (const_int 31)])))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vpunpckhdq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sselog")
    (set_attr "prefix" "evex")
@@ -19658,7 +19810,7 @@
 		     (const_int 9) (const_int 25)
 		     (const_int 12) (const_int 28)
 		     (const_int 13) (const_int 29)])))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vpunpckldq\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sselog")
    (set_attr "prefix" "evex")
@@ -20053,6 +20205,7 @@
   return "vshuf<shuffletype>64x2\t{%3, %2, %1, %0<mask_operand7>|%0<mask_operand7>, %1, %2, %3}";
 }
   [(set_attr "type" "sselog")
+   (set_attr "addr" "gpr16,*")
    (set_attr "length_immediate" "1")
    (set_attr "prefix" "evex")
    (set_attr "mode" "XI")])
@@ -20214,6 +20367,7 @@
   return "vshuf<shuffletype>32x4\t{%3, %2, %1, %0<mask_operand11>|%0<mask_operand11>, %1, %2, %3}";
 }
   [(set_attr "type" "sselog")
+   (set_attr "addr" "gpr16,*")
    (set_attr "length_immediate" "1")
    (set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
@@ -20362,7 +20516,7 @@
    (match_operand:SI 2 "const_0_to_255_operand")
    (match_operand:V16SI 3 "register_operand")
    (match_operand:HI 4 "register_operand")]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
 {
   int mask = INTVAL (operands[2]);
   emit_insn (gen_avx512f_pshufd_1_mask (operands[0], operands[1],
@@ -20406,7 +20560,7 @@
 		     (match_operand 15 "const_12_to_15_operand")
 		     (match_operand 16 "const_12_to_15_operand")
 		     (match_operand 17 "const_12_to_15_operand")])))]
-  "TARGET_AVX512F && TARGET_EVEX512
+  "TARGET_AVX512F
    && INTVAL (operands[2]) + 4 == INTVAL (operands[6])
    && INTVAL (operands[3]) + 4 == INTVAL (operands[7])
    && INTVAL (operands[4]) + 4 == INTVAL (operands[8])
@@ -20572,7 +20726,7 @@
 	  [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
 	   (match_operand:SI 2 "const_0_to_255_operand")]
 	  UNSPEC_PSHUFLW))]
-  "TARGET_AVX512BW && TARGET_EVEX512"
+  "TARGET_AVX512BW"
   "vpshuflw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sselog")
    (set_attr "prefix" "evex")
@@ -20748,7 +20902,7 @@
 	  [(match_operand:V32HI 1 "nonimmediate_operand" "vm")
 	   (match_operand:SI 2 "const_0_to_255_operand")]
 	  UNSPEC_PSHUFHW))]
-  "TARGET_AVX512BW && TARGET_EVEX512"
+  "TARGET_AVX512BW"
   "vpshufhw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sselog")
    (set_attr "prefix" "evex")
@@ -21282,7 +21436,7 @@
 	  (match_operand:V4TI 1 "register_operand" "v")
 	  (parallel
 	    [(match_operand:SI 2 "const_0_to_3_operand")])))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vextracti32x4\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "sselog")
    (set_attr "length_immediate" "1")
@@ -21290,7 +21444,7 @@
    (set_attr "mode" "XI")])
 
 (define_mode_iterator VEXTRACTI128_MODE
-  [(V4TI "TARGET_AVX512F && TARGET_EVEX512") V2TI])
+  [(V4TI "TARGET_AVX512F") V2TI])
 
 (define_split
   [(set (match_operand:TI 0 "nonimmediate_operand")
@@ -21313,7 +21467,7 @@
    && VECTOR_MODE_P (GET_MODE (operands[1]))
    && ((TARGET_SSE && GET_MODE_SIZE (GET_MODE (operands[1])) == 16)
        || (TARGET_AVX && GET_MODE_SIZE (GET_MODE (operands[1])) == 32)
-       || (TARGET_AVX512F && TARGET_EVEX512
+       || (TARGET_AVX512F
 	   && GET_MODE_SIZE (GET_MODE (operands[1])) == 64))
    && (<MODE>mode == SImode || TARGET_64BIT || MEM_P (operands[0]))"
   [(set (match_dup 0) (vec_select:SWI48x (match_dup 1)
@@ -21439,7 +21593,7 @@
    movhps\t{%2, %0|%0, %q2}
    vmovhps\t{%2, %1, %0|%0, %1, %q2}"
   [(set_attr "isa" "sse2_noavx,avx,noavx,noavx,avx")
-   (set_attr "type" "sselog,sselog,ssemov,ssemov,ssemov")
+   (set_attr "type" "sselog,sselog,ssemov,ssemov2,ssemov2")
    (set_attr "prefix" "orig,maybe_evex,orig,orig,maybe_evex")
    (set_attr "mode" "TI,TI,V4SF,V2SF,V2SF")])
 
@@ -21547,7 +21701,7 @@
      (if_then_else
        (eq_attr "alternative" "0,1,2,3,4,5")
        (const_string "sselog")
-       (const_string "ssemov")))
+       (const_string "ssemov2")))
    (set (attr "addr")
      (if_then_else (eq_attr "alternative" "0,1")
 		   (const_string "gpr16")
@@ -22263,7 +22417,8 @@
   [(set (reg:CCZ FLAGS_REG)
 	(unspec:CCZ [(match_dup 0)
 		     (match_dup 0)]
-		    UNSPEC_PTEST))])
+		    UNSPEC_PTEST))]
+  "operands[0] = force_reg (<MODE>mode, operands[0]);")
 
 (define_insn_and_split "*pmovsk_mask_cmp_<mode>_avx512"
   [(set (reg:CCZ FLAGS_REG)
@@ -22304,7 +22459,8 @@
   [(set (reg:CCZ FLAGS_REG)
 	(unspec:CCZ [(match_dup 0)
 		     (match_dup 0)]
-		    UNSPEC_PTEST))])
+		    UNSPEC_PTEST))]
+  "operands[0] = force_reg (<MODE>mode, operands[0]);")
 
 (define_expand "sse2_maskmovdqu"
   [(set (match_operand:V16QI 0 "memory_operand")
@@ -22686,7 +22842,7 @@
 				   (const_int 1) (const_int 1)
 				   (const_int 1) (const_int 1)]))
 	    (const_int 1))))]
-  "TARGET_AVX512BW && TARGET_EVEX512"
+  "TARGET_AVX512BW"
   "vpmulhrsw\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "type" "sseimul")
    (set_attr "prefix" "evex")
@@ -23200,10 +23356,10 @@
 ;; Mode iterator to handle singularity w/ absence of V2DI and V4DI
 ;; modes for abs instruction on pre AVX-512 targets.
 (define_mode_iterator VI1248_AVX512VL_AVX512BW
-  [(V64QI "TARGET_AVX512BW && TARGET_EVEX512") (V32QI "TARGET_AVX2") V16QI
-   (V32HI "TARGET_AVX512BW && TARGET_EVEX512") (V16HI "TARGET_AVX2") V8HI
-   (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX2") V4SI
-   (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX512VL")
+  [(V64QI "TARGET_AVX512BW") (V32QI "TARGET_AVX2") V16QI
+   (V32HI "TARGET_AVX512BW") (V16HI "TARGET_AVX2") V8HI
+   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX2") V4SI
+   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX512VL")
    (V2DI "TARGET_AVX512VL")])
 
 (define_insn "*abs<mode>2"
@@ -23676,7 +23832,7 @@
            (match_operand:V64QI 2 "vector_operand" "vm")
            (match_operand:SI 3 "const_0_to_255_operand" "n")]
           UNSPEC_MPSADBW))]
-  "TARGET_AVX10_2_512"
+  "TARGET_AVX10_2"
   "vmpsadbw\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}"
   [(set_attr "length_immediate" "1")
    (set_attr "prefix" "evex")])
@@ -23688,7 +23844,7 @@
            (match_operand:VI1 2 "vector_operand" "vm")
            (match_operand:SI 3 "const_0_to_255_operand" "n")]
           UNSPEC_MPSADBW))]
-  "TARGET_AVX10_2_256"
+  "TARGET_AVX10_2"
   "vmpsadbw\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}"
   [(set_attr "length_immediate" "1")
    (set_attr "prefix" "evex")
@@ -23954,6 +24110,7 @@
   "TARGET_AVX2"
   "vpblendd\t{%3, %2, %1, %0|%0, %1, %2, %3}"
   [(set_attr "type" "ssemov")
+   (set_attr "addr" "gpr16")
    (set_attr "prefix_extra" "1")
    (set_attr "length_immediate" "1")
    (set_attr "prefix" "vex")
@@ -24030,7 +24187,7 @@
   [(set (match_operand:V32HI 0 "register_operand" "=v")
 	(any_extend:V32HI
 	  (match_operand:V32QI 1 "nonimmediate_operand" "vm")))]
-  "TARGET_AVX512BW && TARGET_EVEX512"
+  "TARGET_AVX512BW"
   "vpmov<extsuffix>bw\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
@@ -24044,7 +24201,7 @@
 	    (match_operand:V64QI 2 "const0_operand"))
 	  (match_parallel 3 "pmovzx_parallel"
 	    [(match_operand 4 "const_int_operand")])))]
-  "TARGET_AVX512BW && TARGET_EVEX512"
+  "TARGET_AVX512BW"
   "#"
   "&& reload_completed"
   [(set (match_dup 0) (zero_extend:V32HI (match_dup 1)))]
@@ -24064,7 +24221,7 @@
 	    (match_operand:V64QI 3 "const0_operand"))
 	  (match_parallel 4 "pmovzx_parallel"
 	    [(match_operand 5 "const_int_operand")])))]
-  "TARGET_AVX512BW && TARGET_EVEX512"
+  "TARGET_AVX512BW"
   "#"
   "&& reload_completed"
   [(set (match_dup 0) (zero_extend:V32HI (match_dup 1)))]
@@ -24077,7 +24234,7 @@
   [(set (match_operand:V32HI 0 "register_operand")
 	(any_extend:V32HI
 	  (match_operand:V32QI 1 "nonimmediate_operand")))]
-  "TARGET_AVX512BW && TARGET_EVEX512")
+  "TARGET_AVX512BW")
 
 (define_insn "sse4_1_<code>v8qiv8hi2<mask_name>"
   [(set (match_operand:V8HI 0 "register_operand" "=Yr,*x,Yw")
@@ -24225,7 +24382,7 @@
   [(set (match_operand:V16SI 0 "register_operand" "=v")
 	(any_extend:V16SI
 	  (match_operand:V16QI 1 "nonimmediate_operand" "vm")))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vpmov<extsuffix>bd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
@@ -24235,7 +24392,7 @@
   [(set (match_operand:V16SI 0 "register_operand")
 	(any_extend:V16SI
 	  (match_operand:V16QI 1 "nonimmediate_operand")))]
-  "TARGET_AVX512F && TARGET_EVEX512")
+  "TARGET_AVX512F")
 
 (define_insn "avx2_<code>v8qiv8si2<mask_name>"
   [(set (match_operand:V8SI 0 "register_operand" "=v")
@@ -24368,7 +24525,7 @@
   [(set (match_operand:V16SI 0 "register_operand" "=v")
 	(any_extend:V16SI
 	  (match_operand:V16HI 1 "nonimmediate_operand" "vm")))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vpmov<extsuffix>wd\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
@@ -24378,7 +24535,7 @@
   [(set (match_operand:V16SI 0 "register_operand")
 	(any_extend:V16SI
 	  (match_operand:V16HI 1 "nonimmediate_operand")))]
-  "TARGET_AVX512F && TARGET_EVEX512")
+  "TARGET_AVX512F")
 
 (define_insn_and_split "avx512f_zero_extendv16hiv16si2_1"
   [(set (match_operand:V32HI 0 "register_operand" "=v")
@@ -24388,7 +24545,7 @@
 	    (match_operand:V32HI 2 "const0_operand"))
 	  (match_parallel 3 "pmovzx_parallel"
 	    [(match_operand 4 "const_int_operand")])))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "#"
   "&& reload_completed"
   [(set (match_dup 0) (zero_extend:V16SI (match_dup 1)))]
@@ -24612,7 +24769,7 @@
 		       (const_int 2) (const_int 3)
 		       (const_int 4) (const_int 5)
 		       (const_int 6) (const_int 7)]))))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
@@ -24622,7 +24779,7 @@
   [(set (match_operand:V8DI 0 "register_operand" "=v")
 	(any_extend:V8DI
 	  (match_operand:V8QI 1 "memory_operand" "m")))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vpmov<extsuffix>bq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
@@ -24640,7 +24797,7 @@
 		       (const_int 2) (const_int 3)
 		       (const_int 4) (const_int 5)
 		       (const_int 6) (const_int 7)]))))]
-  "TARGET_AVX512F && TARGET_EVEX512 && ix86_pre_reload_split ()"
+  "TARGET_AVX512F && ix86_pre_reload_split ()"
   "#"
   "&& 1"
   [(set (match_dup 0)
@@ -24651,7 +24808,7 @@
   [(set (match_operand:V8DI 0 "register_operand")
 	(any_extend:V8DI
 	  (match_operand:V8QI 1 "nonimmediate_operand")))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
 {
   if (!MEM_P (operands[1]))
     {
@@ -24793,7 +24950,7 @@
   [(set (match_operand:V8DI 0 "register_operand" "=v")
 	(any_extend:V8DI
 	  (match_operand:V8HI 1 "nonimmediate_operand" "vm")))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vpmov<extsuffix>wq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
@@ -24803,7 +24960,7 @@
   [(set (match_operand:V8DI 0 "register_operand")
 	(any_extend:V8DI
 	  (match_operand:V8HI 1 "nonimmediate_operand")))]
-  "TARGET_AVX512F && TARGET_EVEX512")
+  "TARGET_AVX512F")
 
 (define_insn "avx2_<code>v4hiv4di2<mask_name>"
   [(set (match_operand:V4DI 0 "register_operand" "=v")
@@ -24930,7 +25087,7 @@
   [(set (match_operand:V8DI 0 "register_operand" "=v")
 	(any_extend:V8DI
 	  (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vpmov<extsuffix>dq\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "ssemov")
    (set_attr "prefix" "evex")
@@ -24944,7 +25101,7 @@
 	    (match_operand:V16SI 2 "const0_operand"))
 	  (match_parallel 3 "pmovzx_parallel"
 	    [(match_operand 4 "const_int_operand")])))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "#"
   "&& reload_completed"
   [(set (match_dup 0) (zero_extend:V8DI (match_dup 1)))]
@@ -24963,7 +25120,7 @@
 	    (match_operand:V16SI 3 "const0_operand"))
 	  (match_parallel 4 "pmovzx_parallel"
 	    [(match_operand 5 "const_int_operand")])))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "#"
   "&& reload_completed"
   [(set (match_dup 0) (zero_extend:V8DI (match_dup 1)))]
@@ -24975,7 +25132,7 @@
   [(set (match_operand:V8DI 0 "register_operand" "=v")
 	(any_extend:V8DI
 	  (match_operand:V8SI 1 "nonimmediate_operand" "vm")))]
-  "TARGET_AVX512F && TARGET_EVEX512")
+  "TARGET_AVX512F")
 
 (define_insn "avx2_<code>v4siv4di2<mask_name>"
   [(set (match_operand:V4DI 0 "register_operand" "=v")
@@ -25376,7 +25533,7 @@
   [(match_operand:V16SI 0 "register_operand")
    (match_operand:V16SF 1 "nonimmediate_operand")
    (match_operand:SI 2 "const_0_to_15_operand")]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
 {
   rtx tmp = gen_reg_rtx (V16SFmode);
   emit_insn (gen_avx512f_rndscalev16sf (tmp, operands[1], operands[2]));
@@ -26594,7 +26751,7 @@
 	(ashiftrt:V8DI
 	  (match_operand:V8DI 1 "register_operand")
 	  (match_operand:V8DI 2 "nonimmediate_operand")))]
-  "TARGET_AVX512F && TARGET_EVEX512")
+  "TARGET_AVX512F")
 
 (define_expand "vashrv4di3"
   [(set (match_operand:V4DI 0 "register_operand")
@@ -26685,7 +26842,7 @@
   [(set (match_operand:V16SI 0 "register_operand")
 	(ashiftrt:V16SI (match_operand:V16SI 1 "register_operand")
 		        (match_operand:V16SI 2 "nonimmediate_operand")))]
-  "TARGET_AVX512F && TARGET_EVEX512")
+  "TARGET_AVX512F")
 
 (define_expand "vashrv8si3"
   [(set (match_operand:V8SI 0 "register_operand")
@@ -26963,7 +27120,7 @@
    vaesenc\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "isa" "noavx,avx,vaes_avx512vl")
    (set_attr "type" "sselog1")
-   (set_attr "addr" "gpr16,*,*")
+   (set_attr "addr" "gpr16,gpr16,*")
    (set_attr "prefix_extra" "1")
    (set_attr "prefix" "orig,maybe_evex,evex")
    (set_attr "btver2_decode" "double,double,double")
@@ -26981,7 +27138,7 @@
    vaesenclast\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "isa" "noavx,avx,vaes_avx512vl")
    (set_attr "type" "sselog1")
-   (set_attr "addr" "gpr16,*,*")
+   (set_attr "addr" "gpr16,gpr16,*")
    (set_attr "prefix_extra" "1")
    (set_attr "prefix" "orig,maybe_evex,evex")
    (set_attr "btver2_decode" "double,double,double")
@@ -26999,7 +27156,7 @@
    vaesdec\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "isa" "noavx,avx,vaes_avx512vl")
    (set_attr "type" "sselog1")
-   (set_attr "addr" "gpr16,*,*")
+   (set_attr "addr" "gpr16,gpr16,*")
    (set_attr "prefix_extra" "1")
    (set_attr "prefix" "orig,maybe_evex,evex")
    (set_attr "btver2_decode" "double,double,double") 
@@ -27016,7 +27173,7 @@
    * return TARGET_AES ? \"vaesdeclast\t{%2, %1, %0|%0, %1, %2}\" : \"%{evex%} vaesdeclast\t{%2, %1, %0|%0, %1, %2}\";
    vaesdeclast\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "isa" "noavx,avx,vaes_avx512vl")
-   (set_attr "addr" "gpr16,*,*")
+   (set_attr "addr" "gpr16,gpr16,*")
    (set_attr "type" "sselog1")
    (set_attr "prefix_extra" "1")
    (set_attr "prefix" "orig,maybe_evex,evex")
@@ -27128,12 +27285,12 @@
    (set_attr "mode" "OI")])
 
 (define_mode_attr pbroadcast_evex_isa
-  [(V64QI "avx512bw_512") (V32QI "avx512bw") (V16QI "avx512bw")
-   (V32HI "avx512bw_512") (V16HI "avx512bw") (V8HI "avx512bw")
-   (V16SI "avx512f_512") (V8SI "avx512f") (V4SI "avx512f")
-   (V8DI "avx512f_512") (V4DI "avx512f") (V2DI "avx512f")
-   (V32HF "avx512bw_512") (V16HF "avx512bw") (V8HF "avx512bw")
-   (V32BF "avx512bw_512") (V16BF "avx512bw") (V8BF "avx512bw")])
+  [(V64QI "avx512bw") (V32QI "avx512bw") (V16QI "avx512bw")
+   (V32HI "avx512bw") (V16HI "avx512bw") (V8HI "avx512bw")
+   (V16SI "avx512f") (V8SI "avx512f") (V4SI "avx512f")
+   (V8DI "avx512f") (V4DI "avx512f") (V2DI "avx512f")
+   (V32HF "avx512bw") (V16HF "avx512bw") (V8HF "avx512bw")
+   (V32BF "avx512bw") (V16BF "avx512bw") (V8BF "avx512bw")])
 
 (define_insn "avx2_pbroadcast<mode>"
   [(set (match_operand:VIHFBF 0 "register_operand" "=x,v")
@@ -27677,7 +27834,7 @@
    (set (attr "enabled")
 	(if_then_else (eq_attr "alternative" "1")
 		      (symbol_ref "TARGET_AVX512F && !TARGET_AVX512VL
-				   && TARGET_EVEX512 && !TARGET_PREFER_AVX256")
+				   && !TARGET_PREFER_AVX256")
 		      (const_string "*")))])
 
 (define_insn "*vec_dupv4si"
@@ -27705,7 +27862,7 @@
    (set (attr "enabled")
 	(if_then_else (eq_attr "alternative" "1")
 		      (symbol_ref "TARGET_AVX512F && !TARGET_AVX512VL
-				   && TARGET_EVEX512 && !TARGET_PREFER_AVX256")
+				   && !TARGET_PREFER_AVX256")
 		      (const_string "*")))])
 
 (define_insn "*vec_dupv2di"
@@ -27720,7 +27877,7 @@
    %vmovddup\t{%1, %0|%0, %1}
    movlhps\t%0, %0"
   [(set_attr "isa" "sse2_noavx,avx,avx512f,sse3,noavx")
-   (set_attr "type" "sselog1,sselog1,ssemov,sselog1,ssemov")
+   (set_attr "type" "sselog1,sselog1,ssemov,ssemov,ssemov")
    (set_attr "prefix" "orig,maybe_evex,evex,maybe_vex,orig")
    (set (attr "mode")
 	(cond [(and (eq_attr "alternative" "2")
@@ -27736,8 +27893,7 @@
 	(if_then_else
 	  (eq_attr "alternative" "2")
 	  (symbol_ref "TARGET_AVX512VL
-		       || (TARGET_AVX512F && TARGET_EVEX512
-			   && !TARGET_PREFER_AVX256)")
+		       || (TARGET_AVX512F && !TARGET_PREFER_AVX256)")
 	  (const_string "*")))])
 
 (define_insn "avx2_vbroadcasti128_<mode>"
@@ -27817,7 +27973,7 @@
   [(set_attr "type" "ssemov")
    (set_attr "prefix_extra" "1")
    (set_attr "prefix" "maybe_evex")
-   (set_attr "isa" "avx2,noavx2,avx2,avx512f_512,noavx2")
+   (set_attr "isa" "avx2,noavx2,avx2,avx512f,noavx2")
    (set_attr "mode" "<sseinsnmode>,V8SF,<sseinsnmode>,<sseinsnmode>,V8SF")])
 
 (define_split
@@ -27881,8 +28037,8 @@
 
 ;; For broadcast[i|f]32x2.  Yes there is no v4sf version, only v4si.
 (define_mode_iterator VI4F_BRCST32x2
-  [(V16SI "TARGET_EVEX512") (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
-   (V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL")])
+  [V16SI (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
+   V16SF (V8SF "TARGET_AVX512VL")])
 
 (define_mode_attr 64x2mode
   [(V8DF "V2DF") (V8DI "V2DI") (V4DI "V2DI") (V4DF "V2DF")])
@@ -27932,8 +28088,7 @@
 
 ;; For broadcast[i|f]64x2
 (define_mode_iterator VI8F_BRCST64x2
-  [(V8DI "TARGET_EVEX512") (V8DF "TARGET_EVEX512")
-   (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
+  [V8DI V8DF (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")])
 
 (define_insn "<mask_codefor>avx512dq_broadcast<mode><mask_name>_1"
   [(set (match_operand:VI8F_BRCST64x2 0 "register_operand" "=v,v")
@@ -27989,27 +28144,26 @@
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_mode_iterator VPERMI2
-  [(V16SI "TARGET_EVEX512") (V16SF "TARGET_EVEX512")
-   (V8DI "TARGET_EVEX512") (V8DF "TARGET_EVEX512")
+  [V16SI V16SF V8DI V8DF
    (V8SI "TARGET_AVX512VL") (V8SF "TARGET_AVX512VL")
    (V4DI "TARGET_AVX512VL") (V4DF "TARGET_AVX512VL")
    (V4SI "TARGET_AVX512VL") (V4SF "TARGET_AVX512VL")
    (V2DI "TARGET_AVX512VL") (V2DF "TARGET_AVX512VL")
-   (V32HI "TARGET_AVX512BW && TARGET_EVEX512")
+   (V32HI "TARGET_AVX512BW")
    (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
    (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
-   (V64QI "TARGET_AVX512VBMI && TARGET_EVEX512")
+   (V64QI "TARGET_AVX512VBMI")
    (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL")
    (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")])
 
 (define_mode_iterator VPERMI2I
-  [(V16SI "TARGET_EVEX512") (V8DI "TARGET_EVEX512")
+  [V16SI V8DI
    (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")
    (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
-   (V32HI "TARGET_AVX512BW && TARGET_EVEX512")
+   (V32HI "TARGET_AVX512BW")
    (V16HI "TARGET_AVX512BW && TARGET_AVX512VL")
    (V8HI "TARGET_AVX512BW && TARGET_AVX512VL")
-   (V64QI "TARGET_AVX512VBMI && TARGET_EVEX512")
+   (V64QI "TARGET_AVX512VBMI")
    (V32QI "TARGET_AVX512VBMI && TARGET_AVX512VL")
    (V16QI "TARGET_AVX512VBMI && TARGET_AVX512VL")])
 
@@ -28515,7 +28669,7 @@
    (set_attr "btver2_decode" "vector") 
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_expand "maskload<mode><sseintvecmodelower>"
+(define_expand "maskload<mode><sseintvecmodelower>_1"
   [(set (match_operand:V48_128_256 0 "register_operand")
 	(unspec:V48_128_256
 	  [(match_operand:<sseintvecmode> 2 "register_operand")
@@ -28523,13 +28677,28 @@
 	  UNSPEC_MASKMOV))]
   "TARGET_AVX")
 
+(define_expand "maskload<mode><sseintvecmodelower>"
+  [(set (match_operand:V48_128_256 0 "register_operand")
+       (unspec:V48_128_256
+         [(match_operand:<sseintvecmode> 2 "register_operand")
+          (match_operand:V48_128_256 1 "memory_operand")
+          (match_operand:V48_128_256 3 "const0_operand")]
+         UNSPEC_MASKMOV))]
+  "TARGET_AVX"
+{
+  emit_insn (gen_maskload<mode><sseintvecmodelower>_1 (operands[0],
+						       operands[1],
+						       operands[2]));
+  DONE;
+})
+
 (define_expand "maskload<mode><avx512fmaskmodelower>"
   [(set (match_operand:V48_AVX512VL 0 "register_operand")
 	(vec_merge:V48_AVX512VL
 	  (unspec:V48_AVX512VL
 	    [(match_operand:V48_AVX512VL 1 "memory_operand")]
 	    UNSPEC_MASKLOAD)
-	  (match_dup 0)
+          (match_operand:V48_AVX512VL 3 "const0_operand")
 	  (match_operand:<avx512fmaskmode> 2 "register_operand")))]
   "TARGET_AVX512F")
 
@@ -28539,7 +28708,7 @@
 	  (unspec:VI12HFBF_AVX512VL
 	    [(match_operand:VI12HFBF_AVX512VL 1 "memory_operand")]
 	    UNSPEC_MASKLOAD)
-	  (match_dup 0)
+          (match_operand:VI12HFBF_AVX512VL 3 "const0_operand")
 	  (match_operand:<avx512fmaskmode> 2 "register_operand")))]
   "TARGET_AVX512BW")
 
@@ -28669,29 +28838,28 @@
 
 ;; Modes handled by vec_init expanders.
 (define_mode_iterator VEC_INIT_MODE
-  [(V64QI "TARGET_AVX512F && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI
-   (V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI
-   (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX") V4SI
-   (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX") V2DI
-   (V32HF "TARGET_AVX512F && TARGET_EVEX512") (V16HF "TARGET_AVX") V8HF
-   (V32BF "TARGET_AVX512F && TARGET_EVEX512") (V16BF "TARGET_AVX") V8BF
-   (V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF
-   (V8DF "TARGET_AVX512F && TARGET_EVEX512")
-   (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")
-   (V4TI "TARGET_AVX512F && TARGET_EVEX512") (V2TI "TARGET_AVX")])
+  [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
+   (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
+   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
+   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX") V2DI
+   (V32HF "TARGET_AVX512F") (V16HF "TARGET_AVX") V8HF
+   (V32BF "TARGET_AVX512F") (V16BF "TARGET_AVX") V8BF
+   (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+   (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX") (V2DF "TARGET_SSE2")
+   (V4TI "TARGET_AVX512F") (V2TI "TARGET_AVX")])
 
 ;; Likewise, but for initialization from half sized vectors.
 ;; Thus, these are all VEC_INIT_MODE modes except V2??.
 (define_mode_iterator VEC_INIT_HALF_MODE
-  [(V64QI "TARGET_AVX512F && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI
-   (V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI
-   (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX") V4SI
-   (V8DI "TARGET_AVX512F && TARGET_EVEX512") (V4DI "TARGET_AVX")
-   (V32HF "TARGET_AVX512F && TARGET_EVEX512") (V16HF "TARGET_AVX") V8HF
-   (V32BF "TARGET_AVX512F && TARGET_EVEX512") (V16BF "TARGET_AVX") V8BF
-   (V16SF "TARGET_AVX512F && TARGET_EVEX512") (V8SF "TARGET_AVX") V4SF
-   (V8DF "TARGET_AVX512F && TARGET_EVEX512") (V4DF "TARGET_AVX")
-   (V4TI "TARGET_AVX512F && TARGET_EVEX512")])
+  [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
+   (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
+   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
+   (V8DI "TARGET_AVX512F") (V4DI "TARGET_AVX")
+   (V32HF "TARGET_AVX512F") (V16HF "TARGET_AVX") V8HF
+   (V32BF "TARGET_AVX512F") (V16BF "TARGET_AVX") V8BF
+   (V16SF "TARGET_AVX512F") (V8SF "TARGET_AVX") V4SF
+   (V8DF "TARGET_AVX512F") (V4DF "TARGET_AVX")
+   (V4TI "TARGET_AVX512F")])
 
 (define_expand "vec_init<mode><ssescalarmodelower>"
   [(match_operand:VEC_INIT_MODE 0 "register_operand")
@@ -28936,13 +29104,12 @@
    (set_attr "prefix" "vex")
    (set_attr "mode" "V8SF")])
 
-(define_insn "vcvtph2ps256<mask_name><round_saeonly_name>"
+(define_insn "vcvtph2ps256<mask_name>"
   [(set (match_operand:V8SF 0 "register_operand" "=v")
-	(unspec:V8SF [(match_operand:V8HI 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")]
+	(unspec:V8SF [(match_operand:V8HI 1 "nonimmediate_operand" "vm")]
 		     UNSPEC_VCVTPH2PS))]
-  "(TARGET_F16C || TARGET_AVX512VL)
-   && (!<round_saeonly_applied> || TARGET_AVX10_2_256)"
-  "vcvtph2ps\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
+  "TARGET_F16C || TARGET_AVX512VL"
+  "vcvtph2ps\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "vex")
    (set_attr "btver2_decode" "double")
@@ -28953,7 +29120,7 @@
 	(unspec:V16SF
 	  [(match_operand:V16HI 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
 	  UNSPEC_VCVTPH2PS))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vcvtph2ps\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
@@ -29043,7 +29210,7 @@
 	   UNSPEC_VCVTPS2PH)
 	 (match_operand:V16HI 3 "nonimm_or_0_operand")
 	 (match_operand:HI 4 "register_operand")))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
 {
   int round = INTVAL (operands[2]);
   /* Separate {sae} from rounding control imm,
@@ -29062,7 +29229,7 @@
 	  [(match_operand:V16SF 1 "register_operand" "v")
 	   (match_operand:SI 2 "const_0_to_255_operand")]
 	  UNSPEC_VCVTPS2PH))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vcvtps2ph\t{%2, <round_saeonly_mask_op3>%1, %0<mask_operand3>|%0<mask_operand3>, %1<round_saeonly_mask_op3>, %2}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
@@ -29074,7 +29241,7 @@
 	  [(match_operand:V16SF 1 "register_operand" "v")
 	   (match_operand:SI 2 "const_0_to_255_operand")]
 	  UNSPEC_VCVTPS2PH))]
-  "TARGET_AVX512F && TARGET_EVEX512"
+  "TARGET_AVX512F"
   "vcvtps2ph\t{%2, %1, %0<merge_mask_operand3>|%0<merge_mask_operand3>, %1, %2}"
   [(set_attr "type" "ssecvt")
    (set_attr "prefix" "evex")
@@ -29656,7 +29823,7 @@
 	   (match_operand:VF_AVX512VL 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
 	   (match_operand:SI 3 "const_0_to_15_operand")]
 	  UNSPEC_RANGE))]
-  "TARGET_AVX512DQ && <round_saeonly_mode_condition>"
+  "TARGET_AVX512DQ && <round_saeonly_mode512bit_condition>"
 {
   if (TARGET_DEST_FALSE_DEP_FOR_GLC
       && <mask4_dest_false_dep_for_glc_cond>
@@ -29714,7 +29881,7 @@
 	    UNSPEC_FPCLASS)
 	  (const_int 1)))]
    "TARGET_AVX512DQ || VALID_AVX512FP16_REG_MODE(<MODE>mode)"
-   "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}";
+   "vfpclass<ssescalarmodesuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %<iptr>1, %2}";
   [(set_attr "type" "sse")
    (set_attr "length_immediate" "1")
    (set_attr "prefix" "evex")
@@ -29726,7 +29893,7 @@
 	  [(match_operand:VFH_AVX512VL 1 "nonimmediate_operand" "<round_saeonly_constraint>")
 	   (match_operand:SI 2 "const_0_to_15_operand")]
 	  UNSPEC_GETMANT))]
-  "TARGET_AVX512F && <round_saeonly_mode_condition>"
+  "TARGET_AVX512F"
 {
   if (TARGET_DEST_FALSE_DEP_FOR_GLC
       && <mask3_dest_false_dep_for_glc_cond>
@@ -29950,25 +30117,29 @@
    (set_attr "mode" "OI")])
 
 (define_insn "vsm4key4_<mode>"
-  [(set (match_operand:VI4_AVX 0 "register_operand" "=x")
-        (unspec:VI4_AVX
-          [(match_operand:VI4_AVX 1 "register_operand" "x")
-           (match_operand:VI4_AVX 2 "vector_operand" "xBm")]
+  [(set (match_operand:VI4_AVX10_2 0 "register_operand" "=x,v")
+        (unspec:VI4_AVX10_2
+          [(match_operand:VI4_AVX10_2 1 "register_operand" "x,v")
+           (match_operand:VI4_AVX10_2 2 "vector_operand" "xBm,vBm")]
           UNSPEC_SM4KEY4))]
   "TARGET_SM4"
   "vsm4key4\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "other")
+   (set_attr "prefix" "maybe_evex")
+   (set_attr "isa" "avx,avx10_2")
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "vsm4rnds4_<mode>"
-  [(set (match_operand:VI4_AVX 0 "register_operand" "=x")
-        (unspec:VI4_AVX
-          [(match_operand:VI4_AVX 1 "register_operand" "x")
-           (match_operand:VI4_AVX 2 "vector_operand" "xBm")]
+  [(set (match_operand:VI4_AVX10_2 0 "register_operand" "=x,v")
+        (unspec:VI4_AVX10_2
+          [(match_operand:VI4_AVX10_2 1 "register_operand" "x,v")
+           (match_operand:VI4_AVX10_2 2 "vector_operand" "xBm,vBm")]
           UNSPEC_SM4RNDS4))]
   "TARGET_SM4"
   "vsm4rnds4\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "type" "other")
+   (set_attr "prefix" "maybe_evex")
+   (set_attr "isa" "avx,avx10_2")
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn_and_split "avx512f_<castmode><avxsizesuffix>_<castmode>"
@@ -30049,7 +30220,7 @@
 	   (match_operand:V8DI 2 "register_operand" "v")
 	   (match_operand:V8DI 3 "nonimmediate_operand" "vm")]
 	  VPMADD52))]
-  "TARGET_AVX512IFMA && TARGET_EVEX512"
+  "TARGET_AVX512IFMA"
   "vpmadd52<vpmadd52type>\t{%3, %2, %0|%0, %2, %3}"
   [(set_attr "type" "ssemuladd")
    (set_attr "prefix" "evex")
@@ -30365,7 +30536,7 @@
    [(set_attr ("prefix") ("evex"))
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_expand "usdot_prod<mode>"
+(define_expand "usdot_prod<ssedvecmodelower><mode>"
   [(match_operand:<ssedvecmode> 0 "register_operand")
    (match_operand:VI1_AVX512 1 "register_operand")
    (match_operand:VI1_AVX512 2 "register_operand")
@@ -30403,10 +30574,11 @@
       rtx sum = gen_reg_rtx (<ssedvecmode>mode);
 
       emit_move_insn (sum, CONST0_RTX (<ssedvecmode>mode));
-      emit_insn (gen_sdot_prod<sseunpackmodelower> (res1, op1_lo,
-						    op2_lo, sum));
-      emit_insn (gen_sdot_prod<sseunpackmodelower> (res2, op1_hi,
-						    op2_hi, operands[3]));
+      emit_insn (gen_sdot_prod<ssedvecmodelower><sseunpackmodelower> (res1,
+						    op1_lo, op2_lo, sum));
+      emit_insn (gen_sdot_prod<ssedvecmodelower><sseunpackmodelower> (res2,
+						    op1_hi, op2_hi,
+						    operands[3]));
       emit_insn (gen_add<ssedvecmodelower>3 (operands[0], res1, res2));
     }
   DONE;
@@ -30419,7 +30591,7 @@
 	   (match_operand:V16SI 2 "register_operand" "v")
 	   (match_operand:V16SI 3 "nonimmediate_operand" "vm")]
 	  UNSPEC_VPDPBUSD))]
-  "TARGET_AVX512VNNI && TARGET_EVEX512"
+  "TARGET_AVX512VNNI"
   "vpdpbusd\t{%3, %2, %0|%0, %2, %3}"
   [(set_attr ("prefix") ("evex"))])
 
@@ -30488,7 +30660,7 @@
 	   (match_operand:V16SI 2 "register_operand" "v")
 	   (match_operand:V16SI 3 "nonimmediate_operand" "vm")]
 	  UNSPEC_VPDPBUSDS))]
-  "TARGET_AVX512VNNI && TARGET_EVEX512"
+  "TARGET_AVX512VNNI"
   "vpdpbusds\t{%3, %2, %0|%0, %2, %3}"
   [(set_attr ("prefix") ("evex"))])
 
@@ -30557,7 +30729,7 @@
 	   (match_operand:V16SI 2 "register_operand" "v")
 	   (match_operand:V16SI 3 "nonimmediate_operand" "vm")]
 	  UNSPEC_VPDPWSSD))]
-  "TARGET_AVX512VNNI && TARGET_EVEX512"
+  "TARGET_AVX512VNNI"
   "vpdpwssd\t{%3, %2, %0|%0, %2, %3}"
   [(set_attr ("prefix") ("evex"))])
 
@@ -30626,7 +30798,7 @@
 	   (match_operand:V16SI 2 "register_operand" "v")
 	   (match_operand:V16SI 3 "nonimmediate_operand" "vm")]
 	  UNSPEC_VPDPWSSDS))]
-  "TARGET_AVX512VNNI && TARGET_EVEX512"
+  "TARGET_AVX512VNNI"
   "vpdpwssds\t{%3, %2, %0|%0, %2, %3}"
   [(set_attr ("prefix") ("evex"))])
 
@@ -30696,11 +30868,15 @@
 	  UNSPEC_VAESDEC))]
   "TARGET_VAES"
 {
-  if (which_alternative == 0 && <MODE>mode == V16QImode)
+  if (!TARGET_AES && <MODE>mode == V16QImode)
     return "%{evex%} vaesdec\t{%2, %1, %0|%0, %1, %2}";
   else
     return "vaesdec\t{%2, %1, %0|%0, %1, %2}";
-})
+}
+[(set_attr "isa" "avx,vaes_avx512vl")
+ (set_attr "type" "sselog1")
+ (set_attr "addr" "gpr16,*")
+ (set_attr "mode" "TI")])
 
 (define_insn "vaesdeclast_<mode>"
   [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=x,v")
@@ -30710,11 +30886,15 @@
 	  UNSPEC_VAESDECLAST))]
   "TARGET_VAES"
 {
-  if (which_alternative == 0 && <MODE>mode == V16QImode)
+  if (!TARGET_AES && <MODE>mode == V16QImode)
     return "%{evex%} vaesdeclast\t{%2, %1, %0|%0, %1, %2}";
   else
     return "vaesdeclast\t{%2, %1, %0|%0, %1, %2}";
-})
+}
+[(set_attr "isa" "avx,vaes_avx512vl")
+ (set_attr "type" "sselog1")
+ (set_attr "addr" "gpr16,*")
+ (set_attr "mode" "TI")])
 
 (define_insn "vaesenc_<mode>"
   [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=x,v")
@@ -30724,11 +30904,15 @@
 	  UNSPEC_VAESENC))]
   "TARGET_VAES"
 {
-  if (which_alternative == 0 && <MODE>mode == V16QImode)
+  if (!TARGET_AES && <MODE>mode == V16QImode)
     return "%{evex%} vaesenc\t{%2, %1, %0|%0, %1, %2}";
   else
     return "vaesenc\t{%2, %1, %0|%0, %1, %2}";
-})
+}
+[(set_attr "isa" "avx,vaes_avx512vl")
+ (set_attr "type" "sselog1")
+ (set_attr "addr" "gpr16,*")
+ (set_attr "mode" "TI")])
 
 (define_insn "vaesenclast_<mode>"
   [(set (match_operand:VI1_AVX512VL_F 0 "register_operand" "=x,v")
@@ -30738,11 +30922,15 @@
 	  UNSPEC_VAESENCLAST))]
   "TARGET_VAES"
 {
-  if (which_alternative == 0 && <MODE>mode == V16QImode)
+  if (!TARGET_AES && <MODE>mode == V16QImode)
     return "%{evex%} vaesenclast\t{%2, %1, %0|%0, %1, %2}";
   else
     return "vaesenclast\t{%2, %1, %0|%0, %1, %2}";
-})
+}
+[(set_attr "isa" "avx,vaes_avx512vl")
+ (set_attr "type" "sselog1")
+ (set_attr "addr" "gpr16,*")
+ (set_attr "mode" "TI")])
 
 (define_insn "vpclmulqdq_<mode>"
   [(set (match_operand:VI8_FVL 0 "register_operand" "=v")
@@ -30766,8 +30954,7 @@
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_mode_iterator VI48_AVX512VP2VL
-  [(V8DI "TARGET_EVEX512")
-   (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
+  [V8DI (V4DI "TARGET_AVX512VL") (V2DI "TARGET_AVX512VL")
    (V8SI "TARGET_AVX512VL") (V4SI "TARGET_AVX512VL")])
 
 (define_mode_iterator MASK_DWI [P2QI P2HI])
@@ -30809,18 +30996,22 @@
 	(unspec:P2HI [(match_operand:V16SI 1 "register_operand" "v")
 		      (match_operand:V16SI 2 "vector_operand" "vm")]
 		UNSPEC_VP2INTERSECT))]
-  "TARGET_AVX512VP2INTERSECT && TARGET_EVEX512"
+  "TARGET_AVX512VP2INTERSECT"
   "vp2intersectd\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr ("prefix") ("evex"))])
 
 (define_mode_iterator VF_AVX512BF16VL
-  [(V32BF "TARGET_EVEX512") (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")])
+  [V32BF (V16BF "TARGET_AVX512VL") (V8BF "TARGET_AVX512VL")])
 ;; Converting from BF to SF
 (define_mode_attr bf16_cvt_2sf
   [(V32BF  "V16SF") (V16BF  "V8SF") (V8BF  "V4SF")])
 ;; Converting from SF to BF
 (define_mode_attr sf_cvt_bf16
-  [(V8SF  "V8BF") (V16SF  "V16BF")])
+  [(V4SF "V4BF") (V8SF  "V8BF") (V16SF  "V16BF")])
+
+(define_mode_attr sf_cvt_bf16_lower
+  [(V4SF "v4bf") (V8SF  "v8bf") (V16SF  "v16bf")])
+
 ;; Mapping from BF to SF
 (define_mode_attr sf_bf16
   [(V4SF  "V8BF") (V8SF  "V16BF") (V16SF  "V32BF")])
@@ -30847,6 +31038,27 @@
   "TARGET_AVX512BF16"
   "vcvtne2ps2bf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}")
 
+(define_expand "truncv4sfv4bf2"
+  [(set (match_operand:V4BF 0 "register_operand")
+	  (float_truncate:V4BF
+	    (match_operand:V4SF 1 "nonimmediate_operand")))]
+  "TARGET_SSSE3 && !HONOR_NANS (BFmode) && !flag_rounding_math
+   && (flag_unsafe_math_optimizations
+       || TARGET_AVXNECONVERT
+       || (TARGET_AVX512BF16 && TARGET_AVX512VL))"
+{
+  if (!TARGET_AVXNECONVERT
+      && !(TARGET_AVX512BF16 && TARGET_AVX512VL))
+    ix86_expand_vector_sf2bf_with_vec_perm (operands[0], operands[1]);
+  else
+    {
+      rtx dest = gen_reg_rtx (V8BFmode);
+      emit_insn (gen_vcvtneps2bf16_v4sf (dest, operands[1]));
+      emit_move_insn (operands[0], lowpart_subreg (V4BFmode, dest, V8BFmode));
+    }
+  DONE;
+})
+
 (define_expand "vcvtneps2bf16_v4sf"
   [(set (match_operand:V8BF 0 "register_operand")
 	(vec_concat:V8BF
@@ -30909,7 +31121,7 @@
   "vcvtneps2bf16{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}")
 
 (define_mode_iterator VF1_AVX512_256
-  [(V16SF "TARGET_EVEX512") (V8SF "TARGET_AVX512VL")])
+  [V16SF (V8SF "TARGET_AVX512VL")])
 
 (define_expand "avx512f_cvtneps2bf16_<mode>_maskz"
   [(match_operand:<sf_cvt_bf16> 0 "register_operand")
@@ -30922,6 +31134,23 @@
   DONE;
 })
 
+(define_expand "truncv8sfv8bf2"
+  [(set (match_operand:V8BF 0 "register_operand")
+	(float_truncate:V8BF
+	  (match_operand:V8SF 1 "nonimmediate_operand")))]
+  "TARGET_AVX2 && !HONOR_NANS (BFmode) && !flag_rounding_math
+   && (flag_unsafe_math_optimizations
+       || TARGET_AVXNECONVERT
+       || (TARGET_AVX512BF16 && TARGET_AVX512VL))"
+{
+  if (!TARGET_AVXNECONVERT
+      && !(TARGET_AVX512BF16 && TARGET_AVX512VL))
+    {
+      ix86_expand_vector_sf2bf_with_vec_perm (operands[0], operands[1]);
+      DONE;
+    }
+})
+
 (define_insn "vcvtneps2bf16_v8sf"
   [(set (match_operand:V8BF 0 "register_operand" "=x,v")
 	(float_truncate:V8BF
@@ -30934,6 +31163,31 @@
    (set_attr "addr" "gpr16,*")
    (set_attr "prefix" "vex,evex")])
 
+(define_expand "truncv16sfv16bf2"
+  [(set (match_operand:V16BF 0 "register_operand")
+	(float_truncate:V16BF
+	  (match_operand:V16SF 1 "nonimmediate_operand")))]
+  "TARGET_AVX512BW
+   && !HONOR_NANS (BFmode) && !flag_rounding_math
+   && (flag_unsafe_math_optimizations || TARGET_AVX512BF16)"
+{
+  if (!TARGET_AVX512BF16)
+    {
+      ix86_expand_vector_sf2bf_with_vec_perm (operands[0], operands[1]);
+      DONE;
+    }
+})
+
+(define_expand "extend<sf_cvt_bf16_lower><mode>2"
+  [(set (match_operand:VF1_AVX512BW 0 "register_operand")
+	(float_extend:VF1_AVX512BW
+	  (match_operand:<sf_cvt_bf16> 1 "nonimmediate_operand")))]
+  "TARGET_SSE2 && !HONOR_NANS (BFmode)"
+{
+  ix86_expand_vector_bf2sf_with_vec_perm (operands[0], operands[1]);
+  DONE;
+})
+
 
 (define_insn "avx512f_cvtneps2bf16_<mode><mask_name>"
   [(set (match_operand:<sf_cvt_bf16> 0 "register_operand" "=v")
@@ -31122,7 +31376,8 @@
 	(unspec_volatile:CCZ [(match_dup 1) (match_dup 2)] AESDECENCKL))]
   "TARGET_KL"
   "aes<aesklvariant>\t{%2, %0|%0, %2}"
-  [(set_attr "type" "other")])
+  [(set_attr "type" "other")
+   (set_attr "addr" "gpr16")])
 
 (define_int_iterator AESDECENCWIDEKL
   [UNSPECV_AESDECWIDE128KLU8  UNSPECV_AESDECWIDE256KLU8
@@ -31184,7 +31439,8 @@
 	    AESDECENCWIDEKL))])]
   "TARGET_WIDEKL"
   "aes<aeswideklvariant>\t%0"
-  [(set_attr "type" "other")])
+  [(set_attr "type" "other")
+   (set_attr "addr" "gpr16")])
 
 ;; Modes handled by broadcast patterns.  NB: Allow V64QI and V32HI with
 ;; TARGET_AVX512F since ix86_expand_vector_init_duplicate can expand
@@ -31195,10 +31451,10 @@
 ;;	vinserti64x4	$0x1, %ymm15, %zmm15, %zmm15
 
 (define_mode_iterator INT_BROADCAST_MODE
-  [(V64QI "TARGET_AVX512F && TARGET_EVEX512") (V32QI "TARGET_AVX") V16QI
-   (V32HI "TARGET_AVX512F && TARGET_EVEX512") (V16HI "TARGET_AVX") V8HI
-   (V16SI "TARGET_AVX512F && TARGET_EVEX512") (V8SI "TARGET_AVX") V4SI
-   (V8DI "TARGET_AVX512F && TARGET_EVEX512 && TARGET_64BIT")
+  [(V64QI "TARGET_AVX512F") (V32QI "TARGET_AVX") V16QI
+   (V32HI "TARGET_AVX512F") (V16HI "TARGET_AVX") V8HI
+   (V16SI "TARGET_AVX512F") (V8SI "TARGET_AVX") V4SI
+   (V8DI "TARGET_AVX512F && TARGET_64BIT")
    (V4DI "TARGET_AVX && TARGET_64BIT") (V2DI "TARGET_64BIT")])
 
 ;; Broadcast from an integer.  NB: Enable broadcast only if we can move
@@ -31230,14 +31486,15 @@
    (UNSPEC_VPDPBSUD "bsud") (UNSPEC_VPDPBSUDS "bsuds")
    (UNSPEC_VPDPBUUD "buud") (UNSPEC_VPDPBUUDS "buuds")])
 
-(define_expand "sdot_prod<mode>"
+(define_expand "sdot_prod<ssedvecmodelower><mode>"
   [(match_operand:<ssedvecmode> 0 "register_operand")
-   (match_operand:VI1_AVX2 1 "register_operand")
-   (match_operand:VI1_AVX2 2 "register_operand")
+   (match_operand:VI1_AVX512VNNIBW 1 "register_operand")
+   (match_operand:VI1_AVX512VNNIBW 2 "register_operand")
    (match_operand:<ssedvecmode> 3 "register_operand")]
   "TARGET_SSE2"
 {
-  if (TARGET_AVXVNNIINT8)
+  if ((<MODE_SIZE> == 64 && TARGET_AVX10_2)
+      || (<MODE_SIZE> < 64 && (TARGET_AVXVNNIINT8 || TARGET_AVX10_2)))
     {
       operands[1] = lowpart_subreg (<ssedvecmode>mode,
 				    force_reg (<MODE>mode, operands[1]),
@@ -31266,54 +31523,26 @@
       rtx sum = gen_reg_rtx (<ssedvecmode>mode);
 
       emit_move_insn (sum, CONST0_RTX (<ssedvecmode>mode));
-      emit_insn (gen_sdot_prod<sseunpackmodelower> (res1, op1_lo,
-						    op2_lo, sum));
-      emit_insn (gen_sdot_prod<sseunpackmodelower> (res2, op1_hi,
-						    op2_hi, operands[3]));
+      emit_insn (gen_sdot_prod<ssedvecmodelower><sseunpackmodelower> (res1,
+						    op1_lo, op2_lo, sum));
+      emit_insn (gen_sdot_prod<ssedvecmodelower><sseunpackmodelower> (res2,
+						    op1_hi, op2_hi,
+						    operands[3]));
       emit_insn (gen_add<ssedvecmodelower>3 (operands[0], res1, res2));
     }
 
   DONE;
 })
 
-(define_expand "sdot_prodv64qi"
-  [(match_operand:V16SI 0 "register_operand")
-   (match_operand:V64QI 1 "register_operand")
-   (match_operand:V64QI 2 "register_operand")
-   (match_operand:V16SI 3 "register_operand")]
-  "(TARGET_AVX512VNNI || TARGET_AVX512BW) && TARGET_EVEX512"
-{
-  /* Emulate with vpdpwssd.  */
-  rtx op1_lo = gen_reg_rtx (V32HImode);
-  rtx op1_hi = gen_reg_rtx (V32HImode);
-  rtx op2_lo = gen_reg_rtx (V32HImode);
-  rtx op2_hi = gen_reg_rtx (V32HImode);
-
-  emit_insn (gen_vec_unpacks_lo_v64qi (op1_lo, operands[1]));
-  emit_insn (gen_vec_unpacks_lo_v64qi (op2_lo, operands[2]));
-  emit_insn (gen_vec_unpacks_hi_v64qi (op1_hi, operands[1]));
-  emit_insn (gen_vec_unpacks_hi_v64qi (op2_hi, operands[2]));
-
-  rtx res1 = gen_reg_rtx (V16SImode);
-  rtx res2 = gen_reg_rtx (V16SImode);
-  rtx sum = gen_reg_rtx (V16SImode);
-
-  emit_move_insn (sum, CONST0_RTX (V16SImode));
-  emit_insn (gen_sdot_prodv32hi (res1, op1_lo, op2_lo, sum));
-  emit_insn (gen_sdot_prodv32hi (res2, op1_hi, op2_hi, operands[3]));
-
-  emit_insn (gen_addv16si3 (operands[0], res1, res2));
-  DONE;
-})
-
-(define_expand "udot_prod<mode>"
+(define_expand "udot_prod<ssedvecmodelower><mode>"
   [(match_operand:<ssedvecmode> 0 "register_operand")
-   (match_operand:VI1_AVX2 1 "register_operand")
-   (match_operand:VI1_AVX2 2 "register_operand")
+   (match_operand:VI1_AVX512VNNIBW 1 "register_operand")
+   (match_operand:VI1_AVX512VNNIBW 2 "register_operand")
    (match_operand:<ssedvecmode> 3 "register_operand")]
   "TARGET_SSE2"
 {
-  if (TARGET_AVXVNNIINT8)
+  if ((<MODE_SIZE> == 64 && TARGET_AVX10_2)
+      || (<MODE_SIZE> < 64 && (TARGET_AVXVNNIINT8 || TARGET_AVX10_2)))
     {
       operands[1] = lowpart_subreg (<ssedvecmode>mode,
 				    force_reg (<MODE>mode, operands[1]),
@@ -31342,46 +31571,17 @@
      rtx sum = gen_reg_rtx (<ssedvecmode>mode);
 
      emit_move_insn (sum, CONST0_RTX (<ssedvecmode>mode));
-     emit_insn (gen_sdot_prod<sseunpackmodelower> (res1, op1_lo,
-						    op2_lo, sum));
-     emit_insn (gen_sdot_prod<sseunpackmodelower> (res2, op1_hi,
-						    op2_hi, operands[3]));
+     emit_insn (gen_sdot_prod<ssedvecmodelower><sseunpackmodelower> (res1,
+						    op1_lo, op2_lo, sum));
+     emit_insn (gen_sdot_prod<ssedvecmodelower><sseunpackmodelower> (res2,
+						    op1_hi, op2_hi,
+						    operands[3]));
      emit_insn (gen_add<ssedvecmodelower>3 (operands[0], res1, res2));
    }
 
   DONE;
 })
 
-(define_expand "udot_prodv64qi"
-  [(match_operand:V16SI 0 "register_operand")
-   (match_operand:V64QI 1 "register_operand")
-   (match_operand:V64QI 2 "register_operand")
-   (match_operand:V16SI 3 "register_operand")]
-  "(TARGET_AVX512VNNI || TARGET_AVX512BW) && TARGET_EVEX512"
-{
-  /* Emulate with vpdpwssd.  */
-  rtx op1_lo = gen_reg_rtx (V32HImode);
-  rtx op1_hi = gen_reg_rtx (V32HImode);
-  rtx op2_lo = gen_reg_rtx (V32HImode);
-  rtx op2_hi = gen_reg_rtx (V32HImode);
-
-  emit_insn (gen_vec_unpacku_lo_v64qi (op1_lo, operands[1]));
-  emit_insn (gen_vec_unpacku_lo_v64qi (op2_lo, operands[2]));
-  emit_insn (gen_vec_unpacku_hi_v64qi (op1_hi, operands[1]));
-  emit_insn (gen_vec_unpacku_hi_v64qi (op2_hi, operands[2]));
-
-  rtx res1 = gen_reg_rtx (V16SImode);
-  rtx res2 = gen_reg_rtx (V16SImode);
-  rtx sum = gen_reg_rtx (V16SImode);
-
-  emit_move_insn (sum, CONST0_RTX (V16SImode));
-  emit_insn (gen_sdot_prodv32hi (res1, op1_lo, op2_lo, sum));
-  emit_insn (gen_sdot_prodv32hi (res2, op1_hi, op2_hi, operands[3]));
-
-  emit_insn (gen_addv16si3 (operands[0], res1, res2));
-  DONE;
-})
-
 (define_insn "vpdp<vpdotprodtype>_<mode>"
   [(set (match_operand:VI4_AVX 0 "register_operand" "=v")
 	(unspec:VI4_AVX
@@ -31389,7 +31589,7 @@
 	   (match_operand:VI4_AVX 2 "register_operand" "v")
 	   (match_operand:VI4_AVX 3 "nonimmediate_operand" "vm")]
 	  VPDOTPROD))]
-  "TARGET_AVXVNNIINT8 || TARGET_AVX10_2_256"
+  "TARGET_AVXVNNIINT8 || TARGET_AVX10_2"
   "vpdp<vpdotprodtype>\t{%3, %2, %0|%0, %2, %3}"
    [(set_attr "prefix" "maybe_evex")])
 
@@ -31400,7 +31600,7 @@
            (match_operand:V16SI 2 "register_operand" "v")
            (match_operand:V16SI 3 "nonimmediate_operand" "vm")]
           VPDOTPROD))]
-  "TARGET_AVX10_2_512"
+  "TARGET_AVX10_2"
   "vpdp<vpdotprodtype>\t{%3, %2, %0|%0, %2, %3}"
    [(set_attr "prefix" "evex")])
 
@@ -31414,7 +31614,7 @@
 	    VPDOTPROD)
 	  (match_dup 1)
 	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
-  "TARGET_AVX10_2_256"
+  "TARGET_AVX10_2"
   "vpdp<vpdotprodtype>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
    [(set_attr "prefix" "evex")])
 
@@ -31428,7 +31628,7 @@
 	    VPDOTPROD)
 	  (match_dup 5)
 	  (match_operand:<avx512fmaskmode> 4 "register_operand")))]
-  "TARGET_AVX10_2_256"
+  "TARGET_AVX10_2"
   "operands[5] = CONST0_RTX (<MODE>mode);")
 
 (define_insn "*vpdp<vpdotprodtype>_<mode>_maskz"
@@ -31441,7 +31641,7 @@
 	    VPDOTPROD)
 	  (match_operand:VI4_AVX10_2 5 "const0_operand" "C")
 	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
-  "TARGET_AVX10_2_256"
+  "TARGET_AVX10_2"
   "vpdp<vpdotprodtype>\t{%3, %2, %0%{%4%}%N5|%0%{%4%}%N5, %2, %3}"
    [(set_attr "prefix" "evex")])
 
@@ -31528,36 +31728,36 @@
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "avx10_2_cvt2ps2phx_<mode><mask_name><round_name>"
-  [(set (match_operand:VHF_AVX10_2 0 "register_operand" "=v")
-	(vec_concat:VHF_AVX10_2
+  [(set (match_operand:VHF_AVX512VL 0 "register_operand" "=v")
+	(vec_concat:VHF_AVX512VL
 	  (float_truncate:<ssehalfvecmode>
 	    (match_operand:<ssePSmode> 2 "<round_nimm_predicate>" "<round_constraint>"))
 	  (float_truncate:<ssehalfvecmode>
 	    (match_operand:<ssePSmode> 1 "register_operand" "v"))))]
-  "TARGET_AVX10_2_256 && <round_mode_condition>"
+  "TARGET_AVX10_2 && <round_mode512bit_condition>"
   "vcvt2ps2phx\t{<round_mask_op3>%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2<round_mask_op3>}")
 
 (define_mode_attr ssebvecmode
   [(V8HF "V16QI") (V16HF "V32QI") (V32HF "V64QI")])
 
-(define_int_iterator UNSPEC_NECONVERTFP8_PACK
-   [UNSPEC_VCVTNE2PH2BF8 UNSPEC_VCVTNE2PH2BF8S
-    UNSPEC_VCVTNE2PH2HF8 UNSPEC_VCVTNE2PH2HF8S])
+(define_int_iterator UNSPEC_CONVERTFP8_PACK
+   [UNSPEC_VCVT2PH2BF8 UNSPEC_VCVT2PH2BF8S
+    UNSPEC_VCVT2PH2HF8 UNSPEC_VCVT2PH2HF8S])
 
-(define_int_attr neconvertfp8_pack
-   [(UNSPEC_VCVTNE2PH2BF8 "ne2ph2bf8")
-    (UNSPEC_VCVTNE2PH2BF8S "ne2ph2bf8s")
-    (UNSPEC_VCVTNE2PH2HF8 "ne2ph2hf8")
-    (UNSPEC_VCVTNE2PH2HF8S "ne2ph2hf8s")])
+(define_int_attr convertfp8_pack
+   [(UNSPEC_VCVT2PH2BF8 "2ph2bf8")
+    (UNSPEC_VCVT2PH2BF8S "2ph2bf8s")
+    (UNSPEC_VCVT2PH2HF8 "2ph2hf8")
+    (UNSPEC_VCVT2PH2HF8S "2ph2hf8s")])
 
-(define_insn "vcvt<neconvertfp8_pack><mode><mask_name>"
+(define_insn "vcvt<convertfp8_pack><mode><mask_name>"
   [(set (match_operand:<ssebvecmode> 0 "register_operand" "=v")
 	(unspec:<ssebvecmode>
-	  [(match_operand:VHF_AVX10_2 1 "register_operand" "v")
-	   (match_operand:VHF_AVX10_2 2 "nonimmediate_operand" "vm")]
-	  UNSPEC_NECONVERTFP8_PACK))]
-  "TARGET_AVX10_2_256"
-  "vcvt<neconvertfp8_pack>\t{%2, %1, %0<mask_operand3>|%0<mask_operand2>, %1, %2}"
+	  [(match_operand:VHF_AVX512VL 1 "register_operand" "v")
+	   (match_operand:VHF_AVX512VL 2 "nonimmediate_operand" "vm")]
+	  UNSPEC_CONVERTFP8_PACK))]
+  "TARGET_AVX10_2"
+  "vcvt<convertfp8_pack>\t{%2, %1, %0<mask_operand3>|%0<mask_operand2>, %1, %2}"
   [(set_attr "prefix" "evex")])
 
 (define_mode_attr ssebvecmode_2
@@ -31581,7 +31781,7 @@
 	     (match_operand:V8HF 2 "nonimmediate_operand")]
 	      UNSPEC_VCVTBIASPH2FP8_PACK)
 	  (match_dup 3)))]
-  "TARGET_AVX10_2_256"
+  "TARGET_AVX10_2"
   "operands[3] = CONST0_RTX (V8QImode);")
 
 (define_insn "*vcvt<biasph2fp8_pack>v8hf"
@@ -31592,7 +31792,7 @@
 	     (match_operand:V8HF 2 "nonimmediate_operand" "vm")]
 	      UNSPEC_VCVTBIASPH2FP8_PACK)
 	  (match_operand:V8QI 3 "const0_operand")))]
-  "TARGET_AVX10_2_256"
+  "TARGET_AVX10_2"
   "vcvt<biasph2fp8_pack>\t{%2, %1, %0|%0, %1, %2}"
   [(set_attr "prefix" "evex")
    (set_attr "mode" "HF")])
@@ -31613,7 +31813,7 @@
 			 (const_int 6) (const_int 7)]))
 	    (match_operand:QI 4 "register_operand" "C"))
 	  (match_dup 5)))]
-  "TARGET_AVX10_2_256"
+  "TARGET_AVX10_2"
   "operands[5] = CONST0_RTX (V8QImode);")
 
 (define_insn "*vcvt<biasph2fp8_pack>v8hf_mask"
@@ -31632,12 +31832,12 @@
 			 (const_int 6) (const_int 7)]))
 	    (match_operand:QI 4 "register_operand" "Yk"))
 	  (match_operand:V8QI 5 "const0_operand")))]
-  "TARGET_AVX10_2_256"
+  "TARGET_AVX10_2"
   "vcvt<biasph2fp8_pack>\t{%2, %1, %0%{%4%}%N3|%0%{%4%}%N3, %1, %2}"
   [(set_attr "prefix" "evex")])
 
 (define_mode_iterator VHF_AVX10_2_2
-  [(V32HF "TARGET_AVX10_2_512") V16HF])
+  [V32HF V16HF])
 
 (define_insn "vcvt<biasph2fp8_pack><mode><mask_name>"
   [(set (match_operand:<ssebvecmode_2> 0 "register_operand" "=v")
@@ -31645,55 +31845,55 @@
 	  [(match_operand:<ssebvecmode> 1 "register_operand" "v")
 	   (match_operand:VHF_AVX10_2_2 2 "nonimmediate_operand" "vm")]
 	  UNSPEC_VCVTBIASPH2FP8_PACK))]
-  "TARGET_AVX10_2_256"
+  "TARGET_AVX10_2"
   "vcvt<biasph2fp8_pack>\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
   [(set_attr "prefix" "evex")])
 
 (define_mode_iterator VHF_256_512
-  [V16HF (V32HF "TARGET_AVX10_2_512")])
+  [V16HF (V32HF "TARGET_AVX10_2")])
 
 (define_mode_attr ph2fp8suff
   [(V32HF "") (V16HF "{y}") (V8HF "{x}")])
 
-(define_int_iterator UNSPEC_NECONVERTPH2FP8
-   [UNSPEC_VCVTNEPH2BF8 UNSPEC_VCVTNEPH2BF8S
-    UNSPEC_VCVTNEPH2HF8 UNSPEC_VCVTNEPH2HF8S])
+(define_int_iterator UNSPEC_CONVERTPH2FP8
+   [UNSPEC_VCVTPH2BF8 UNSPEC_VCVTPH2BF8S
+    UNSPEC_VCVTPH2HF8 UNSPEC_VCVTPH2HF8S])
 
-(define_int_attr neconvertph2fp8
-   [(UNSPEC_VCVTNEPH2BF8 "neph2bf8")
-    (UNSPEC_VCVTNEPH2BF8S "neph2bf8s")
-    (UNSPEC_VCVTNEPH2HF8 "neph2hf8")
-    (UNSPEC_VCVTNEPH2HF8S "neph2hf8s")])
+(define_int_attr convertph2fp8
+   [(UNSPEC_VCVTPH2BF8 "ph2bf8")
+    (UNSPEC_VCVTPH2BF8S "ph2bf8s")
+    (UNSPEC_VCVTPH2HF8 "ph2hf8")
+    (UNSPEC_VCVTPH2HF8S "ph2hf8s")])
 
-(define_expand "vcvt<neconvertph2fp8>v8hf"
+(define_expand "vcvt<convertph2fp8>v8hf"
   [(set (match_operand:V16QI 0 "register_operand")
 	(vec_concat:V16QI
 	  (unspec:V8QI
 	    [(match_operand:V8HF 1 "nonimmediate_operand")]
-	      UNSPEC_NECONVERTPH2FP8)
+	      UNSPEC_CONVERTPH2FP8)
 	  (match_dup 2)))]
-  "TARGET_AVX10_2_256"
+  "TARGET_AVX10_2"
   "operands[2] = CONST0_RTX (V8QImode);")
 
-(define_insn "*vcvt<neconvertph2fp8>v8hf"
+(define_insn "*vcvt<convertph2fp8>v8hf"
   [(set (match_operand:V16QI 0 "register_operand" "=v")
 	(vec_concat:V16QI
 	  (unspec:V8QI
 	    [(match_operand:V8HF 1 "nonimmediate_operand" "vm")]
-	      UNSPEC_NECONVERTPH2FP8)
+	      UNSPEC_CONVERTPH2FP8)
 	  (match_operand:V8QI 2 "const0_operand")))]
-  "TARGET_AVX10_2_256"
-  "vcvt<neconvertph2fp8>{x}\t{%1, %0|%0, %1}"
+  "TARGET_AVX10_2"
+  "vcvt<convertph2fp8>{x}\t{%1, %0|%0, %1}"
   [(set_attr "prefix" "evex")
    (set_attr "mode" "HF")])
 
-(define_expand "vcvt<neconvertph2fp8>v8hf_mask"
+(define_expand "vcvt<convertph2fp8>v8hf_mask"
   [(set (match_operand:V16QI 0 "register_operand")
 	(vec_concat:V16QI
 	  (vec_merge:V8QI
 	    (unspec:V8QI
 	      [(match_operand:V8HF 1 "nonimmediate_operand")]
-	      UNSPEC_NECONVERTPH2FP8)
+	      UNSPEC_CONVERTPH2FP8)
 	    (vec_select:V8QI
 	      (match_operand:V16QI 2 "nonimm_or_0_operand")
 	      (parallel [(const_int 0) (const_int 1)
@@ -31702,16 +31902,16 @@
 			 (const_int 6) (const_int 7)]))
 	    (match_operand:QI 3 "register_operand"))
 	  (match_dup 4)))]
-  "TARGET_AVX10_2_256"
+  "TARGET_AVX10_2"
   "operands[4] = CONST0_RTX (V8QImode);")
 
-(define_insn "*vcvt<neconvertph2fp8>v8hf_mask"
+(define_insn "*vcvt<convertph2fp8>v8hf_mask"
   [(set (match_operand:V16QI 0 "register_operand" "=v")
 	(vec_concat:V16QI
 	  (vec_merge:V8QI
 	    (unspec:V8QI
 	      [(match_operand:V8HF 1 "nonimmediate_operand" "vm")]
-	      UNSPEC_NECONVERTPH2FP8)
+	      UNSPEC_CONVERTPH2FP8)
 	    (vec_select:V8QI
 	      (match_operand:V16QI 2 "nonimm_or_0_operand" "0C")
 	      (parallel [(const_int 0) (const_int 1)
@@ -31720,25 +31920,25 @@
 			 (const_int 6) (const_int 7)]))
 	    (match_operand:QI 3 "register_operand" "Yk"))
 	  (match_operand:V8QI 4 "const0_operand")))]
-  "TARGET_AVX10_2_256"
-  "vcvt<neconvertph2fp8>{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
+  "TARGET_AVX10_2"
+  "vcvt<convertph2fp8>{x}\t{%1, %0%{%3%}%N2|%0%{%3%}%N2, %1}"
   [(set_attr "prefix" "evex")])
 
-(define_insn "vcvt<neconvertph2fp8><mode><mask_name>"
+(define_insn "vcvt<convertph2fp8><mode><mask_name>"
   [(set (match_operand:<ssebvecmode_2> 0 "register_operand" "=v")
 	(unspec:<ssebvecmode_2>
 	  [(match_operand:VHF_256_512 1 "nonimmediate_operand" "vm")]
-	  UNSPEC_NECONVERTPH2FP8))]
-  "TARGET_AVX10_2_256"
-  "vcvt<neconvertph2fp8><ph2fp8suff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+	  UNSPEC_CONVERTPH2FP8))]
+  "TARGET_AVX10_2"
+  "vcvt<convertph2fp8><ph2fp8suff>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "prefix" "evex")])
 
 (define_insn "vcvthf82ph<mode><mask_name>"
-  [(set (match_operand:VHF_AVX10_2 0 "register_operand" "=v")
-	(unspec:VHF_AVX10_2
+  [(set (match_operand:VHF_AVX512VL 0 "register_operand" "=v")
+	(unspec:VHF_AVX512VL
 	  [(match_operand:<ssebvecmode_2> 1 "nonimmediate_operand" "vm")]
 	  UNSPEC_VCVTHF82PH))]
-  "TARGET_AVX10_2_256"
+  "TARGET_AVX10_2"
   "vcvthf82ph\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
   [(set_attr "prefix" "evex")])
 
@@ -31755,12 +31955,12 @@
    (UNSPEC_VPDPWSUD "wsud") (UNSPEC_VPDPWSUDS "wsuds")
    (UNSPEC_VPDPWUUD "wuud") (UNSPEC_VPDPWUUDS "wuuds")])
 
-(define_expand "usdot_prod<mode>"
+(define_expand "usdot_prod<sseunpackmodelower><mode>"
   [(match_operand:<sseunpackmode> 0 "register_operand")
-   (match_operand:VI2_AVX2 1 "register_operand")
-   (match_operand:VI2_AVX2 2 "register_operand")
+   (match_operand:VI2_AVX512F 1 "register_operand")
+   (match_operand:VI2_AVX512F 2 "register_operand")
    (match_operand:<sseunpackmode> 3 "register_operand")]
-  "TARGET_AVXVNNIINT16"
+  "TARGET_AVXVNNIINT16 || TARGET_AVX10_2"
 {
   operands[1] = lowpart_subreg (<sseunpackmode>mode,
                                 force_reg (<MODE>mode, operands[1]),
@@ -31773,12 +31973,12 @@
   DONE;
 })
 
-(define_expand "udot_prod<mode>"
+(define_expand "udot_prod<sseunpackmodelower><mode>"
   [(match_operand:<sseunpackmode> 0 "register_operand")
-   (match_operand:VI2_AVX2 1 "register_operand")
-   (match_operand:VI2_AVX2 2 "register_operand")
+   (match_operand:VI2_AVX512F 1 "register_operand")
+   (match_operand:VI2_AVX512F 2 "register_operand")
    (match_operand:<sseunpackmode> 3 "register_operand")]
-  "TARGET_AVXVNNIINT16"
+  "TARGET_AVXVNNIINT16 || TARGET_AVX10_2"
 {
   operands[1] = lowpart_subreg (<sseunpackmode>mode,
                                 force_reg (<MODE>mode, operands[1]),
@@ -31798,7 +31998,7 @@
 	   (match_operand:VI4_AVX 2 "register_operand" "v")
 	   (match_operand:VI4_AVX 3 "nonimmediate_operand" "vm")]
 	  VPDPWPROD))]
-  "TARGET_AVXVNNIINT16 || TARGET_AVX10_2_256"
+  "TARGET_AVXVNNIINT16 || TARGET_AVX10_2"
   "vpdp<vpdpwprodtype>\t{%3, %2, %0|%0, %2, %3}"
    [(set_attr "prefix" "maybe_evex")])
 
@@ -31809,7 +32009,7 @@
            (match_operand:V16SI 2 "register_operand" "v")
            (match_operand:V16SI 3 "nonimmediate_operand" "vm")]
           VPDPWPROD))]
-  "TARGET_AVX10_2_512"
+  "TARGET_AVX10_2"
   "vpdp<vpdpwprodtype>\t{%3, %2, %0|%0, %2, %3}"
   [(set_attr "prefix" "evex")])
 
@@ -31823,7 +32023,7 @@
 	    VPDPWPROD)
 	  (match_dup 1)
 	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
-  "TARGET_AVX10_2_256"
+  "TARGET_AVX10_2"
   "vpdp<vpdpwprodtype>\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
    [(set_attr "prefix" "evex")])
 
@@ -31837,7 +32037,7 @@
 	    VPDPWPROD)
 	  (match_dup 5)
 	  (match_operand:<avx512fmaskmode> 4 "register_operand")))]
-  "TARGET_AVX10_2_256"
+  "TARGET_AVX10_2"
   "operands[5] = CONST0_RTX (<MODE>mode);")
 
 (define_insn "*vpdp<vpdpwprodtype>_<mode>_maskz"
@@ -31850,42 +32050,42 @@
 	    VPDPWPROD)
 	  (match_operand:VI4_AVX10_2 5 "const0_operand" "C")
 	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
-  "TARGET_AVX10_2_256"
+  "TARGET_AVX10_2"
   "vpdp<vpdpwprodtype>\t{%3, %2, %0%{%4%}%N5|%0%{%4%}%N5, %2, %3}"
    [(set_attr "prefix" "evex")])
 
 (define_insn "vdpphps_<mode>"
-  [(set (match_operand:VF1_AVX10_2 0 "register_operand" "=v")
-	(unspec:VF1_AVX10_2
-	  [(match_operand:VF1_AVX10_2 1 "register_operand" "0")
-	   (match_operand:VF1_AVX10_2 2 "register_operand" "v")
-	   (match_operand:VF1_AVX10_2 3 "nonimmediate_operand" "vm")]
+  [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
+	(unspec:VF1_AVX512VL
+	  [(match_operand:VF1_AVX512VL 1 "register_operand" "0")
+	   (match_operand:VF1_AVX512VL 2 "register_operand" "v")
+	   (match_operand:VF1_AVX512VL 3 "nonimmediate_operand" "vm")]
 	   UNSPEC_VDPPHPS))]
-  "TARGET_AVX10_2_256"
+  "TARGET_AVX10_2"
   "vdpphps\t{%3, %2, %0|%0, %2, %3}"
   [(set_attr "prefix" "evex")])
 
 (define_insn "vdpphps_<mode>_mask"
-  [(set (match_operand:VF1_AVX10_2 0 "register_operand" "=v")
-	(vec_merge:VF1_AVX10_2
-	  (unspec:VF1_AVX10_2
-	    [(match_operand:VF1_AVX10_2 1 "register_operand" "0")
-	     (match_operand:VF1_AVX10_2 2 "register_operand" "v")
-	     (match_operand:VF1_AVX10_2 3 "nonimmediate_operand" "vm")]
+  [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
+	(vec_merge:VF1_AVX512VL
+	  (unspec:VF1_AVX512VL
+	    [(match_operand:VF1_AVX512VL 1 "register_operand" "0")
+	     (match_operand:VF1_AVX512VL 2 "register_operand" "v")
+	     (match_operand:VF1_AVX512VL 3 "nonimmediate_operand" "vm")]
 	    UNSPEC_VDPPHPS)
 	  (match_dup 1)
 	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
-  "TARGET_AVX10_2_256"
+  "TARGET_AVX10_2"
   "vdpphps\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
   [(set_attr "prefix" "evex")])
 
 (define_expand "vdpphps_<mode>_maskz"
-  [(match_operand:VF1_AVX10_2 0 "register_operand")
-   (match_operand:VF1_AVX10_2 1 "register_operand")
-   (match_operand:VF1_AVX10_2 2 "register_operand")
-   (match_operand:VF1_AVX10_2 3 "nonimmediate_operand")
+  [(match_operand:VF1_AVX512VL 0 "register_operand")
+   (match_operand:VF1_AVX512VL 1 "register_operand")
+   (match_operand:VF1_AVX512VL 2 "register_operand")
+   (match_operand:VF1_AVX512VL 3 "nonimmediate_operand")
    (match_operand:<avx512fmaskmode> 4 "register_operand")]
-  "TARGET_AVX10_2_256"
+  "TARGET_AVX10_2"
 {
   emit_insn (gen_vdpphps_<mode>_maskz_1 (operands[0], operands[1],
     operands[2], operands[3], CONST0_RTX(<MODE>mode), operands[4]));
@@ -31893,405 +32093,402 @@
 })
 
 (define_insn "vdpphps_<mode>_maskz_1"
-  [(set (match_operand:VF1_AVX10_2 0 "register_operand" "=v")
-	(vec_merge:VF1_AVX10_2
-	  (unspec:VF1_AVX10_2
-	    [(match_operand:VF1_AVX10_2 1 "register_operand" "0")
-	     (match_operand:VF1_AVX10_2 2 "register_operand" "v")
-	     (match_operand:VF1_AVX10_2 3 "nonimmediate_operand" "vm")]
+  [(set (match_operand:VF1_AVX512VL 0 "register_operand" "=v")
+	(vec_merge:VF1_AVX512VL
+	  (unspec:VF1_AVX512VL
+	    [(match_operand:VF1_AVX512VL 1 "register_operand" "0")
+	     (match_operand:VF1_AVX512VL 2 "register_operand" "v")
+	     (match_operand:VF1_AVX512VL 3 "nonimmediate_operand" "vm")]
 	    UNSPEC_VDPPHPS)
-	  (match_operand:VF1_AVX10_2 4 "const0_operand" "C")
+	  (match_operand:VF1_AVX512VL 4 "const0_operand" "C")
 	  (match_operand:<avx512fmaskmode> 5 "register_operand" "Yk")))]
-  "TARGET_AVX10_2_256"
+  "TARGET_AVX10_2"
   "vdpphps\t{%3, %2, %0%{%5%}%N4|%0%{%5%}%N4, %2, %3}"
   [(set_attr "prefix" "evex")])
 
-(define_insn "avx10_2_scalefpbf16_<mode><mask_name>"
-   [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
-      (unspec:VBF_AVX10_2
-	[(match_operand:VBF_AVX10_2 1 "register_operand" "v")
-	 (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm")]
-       UNSPEC_VSCALEFPBF16))]
-   "TARGET_AVX10_2_256"
-   "vscalefpbf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+(define_insn "avx10_2_scalefbf16_<mode><mask_name>"
+   [(set (match_operand:VBF 0 "register_operand" "=v")
+      (unspec:VBF
+	[(match_operand:VBF 1 "register_operand" "v")
+	 (match_operand:VBF 2 "nonimmediate_operand" "vm")]
+       UNSPEC_VSCALEFBF16))]
+   "TARGET_AVX10_2"
+   "vscalefbf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
    [(set_attr "prefix" "evex")])
 
-(define_insn "avx10_2_<code>pbf16_<mode><mask_name>"
-   [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
-      (smaxmin:VBF_AVX10_2
-	 (match_operand:VBF_AVX10_2 1 "register_operand" "v")
-	 (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm")))]
-   "TARGET_AVX10_2_256"
-   "v<maxmin_float>pbf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+(define_expand "<code><mode>3"
+  [(set (match_operand:VBF 0 "register_operand")
+     (smaxmin:VBF
+       (match_operand:VBF 1 "register_operand")
+       (match_operand:VBF 2 "nonimmediate_operand")))]
+  "TARGET_AVX10_2")
+
+(define_insn "avx10_2_<code>bf16_<mode><mask_name>"
+   [(set (match_operand:VBF 0 "register_operand" "=v")
+      (smaxmin:VBF
+	 (match_operand:VBF 1 "register_operand" "v")
+	 (match_operand:VBF 2 "nonimmediate_operand" "vm")))]
+   "TARGET_AVX10_2"
+   "v<maxmin_float>bf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
    [(set_attr "prefix" "evex")
     (set_attr "mode" "<MODE>")])
 
-(define_insn "avx10_2_<insn>nepbf16_<mode><mask_name>"
-   [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
-      (plusminusmultdiv:VBF_AVX10_2
-	(match_operand:VBF_AVX10_2 1 "register_operand" "v")
-	(match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm")))]
-   "TARGET_AVX10_2_256"
-   "v<insn>nepbf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+(define_insn "avx10_2_<insn>bf16_<mode><mask_name>"
+   [(set (match_operand:VBF 0 "register_operand" "=v")
+      (plusminusmultdiv:VBF
+	(match_operand:VBF 1 "register_operand" "v")
+	(match_operand:VBF 2 "nonimmediate_operand" "vm")))]
+   "TARGET_AVX10_2"
+   "v<insn>bf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
    [(set_attr "prefix" "evex")])
 
-(define_expand "avx10_2_fmaddnepbf16_<mode>_maskz"
-  [(match_operand:VBF_AVX10_2 0 "register_operand")
-   (match_operand:VBF_AVX10_2 1 "nonimmediate_operand")
-   (match_operand:VBF_AVX10_2 2 "nonimmediate_operand")
-   (match_operand:VBF_AVX10_2 3 "nonimmediate_operand")
+(define_expand "avx10_2_fmaddbf16_<mode>_maskz"
+  [(match_operand:VBF 0 "register_operand")
+   (match_operand:VBF 1 "nonimmediate_operand")
+   (match_operand:VBF 2 "nonimmediate_operand")
+   (match_operand:VBF 3 "nonimmediate_operand")
    (match_operand:<avx512fmaskmode> 4 "register_operand")]
-  "TARGET_AVX10_2_256"
+  "TARGET_AVX10_2"
   {
-    emit_insn (gen_avx10_2_fmaddnepbf16_<mode>_maskz_1 (operands[0], operands[1],
-						      operands[2], operands[3], 
-						      CONST0_RTX(<MODE>mode), 
-						      operands[4]));
+    emit_insn (gen_avx10_2_fmaddbf16_<mode>_maskz_1 (operands[0], operands[1],
+						     operands[2], operands[3],
+						     CONST0_RTX (<MODE>mode),
+						     operands[4]));
     DONE;
   })
 
-(define_insn "avx10_2_fmaddnepbf16_<mode><sd_maskz_name>"
-  [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v,v")
-	  (fma:VBF_AVX10_2
-	    (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%0,0,v")
-	    (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v,vm")
-	    (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm,0")))]
-  "TARGET_AVX10_2_256"
+(define_insn "avx10_2_fmaddbf16_<mode><sd_maskz_name>"
+  [(set (match_operand:VBF 0 "register_operand" "=v,v,v")
+	  (fma:VBF
+	    (match_operand:VBF 1 "nonimmediate_operand" "%0,0,v")
+	    (match_operand:VBF 2 "nonimmediate_operand" "vm,v,vm")
+	    (match_operand:VBF 3 "nonimmediate_operand" "v,vm,0")))]
+  "TARGET_AVX10_2"
   "@
-   vfmadd132nepbf16\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2}
-   vfmadd213nepbf16\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}
-   vfmadd231nepbf16\t{%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2}"
+   vfmadd132bf16\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2}
+   vfmadd213bf16\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}
+   vfmadd231bf16\t{%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2}"
   [(set_attr "prefix" "evex")
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "avx10_2_fmaddnepbf16_<mode>_mask"
-  [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v")
-	(vec_merge:VBF_AVX10_2
-	  (fma:VBF_AVX10_2
-	     (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "0,0")
-	     (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v")
-	     (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm"))
+(define_insn "avx10_2_fmaddbf16_<mode>_mask"
+  [(set (match_operand:VBF 0 "register_operand" "=v,v")
+	(vec_merge:VBF
+	  (fma:VBF
+	     (match_operand:VBF 1 "nonimmediate_operand" "0,0")
+	     (match_operand:VBF 2 "nonimmediate_operand" "vm,v")
+	     (match_operand:VBF 3 "nonimmediate_operand" "v,vm"))
 	  (match_dup 1)
 	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
-  "TARGET_AVX10_2_256"
+  "TARGET_AVX10_2"
   "@
-   vfmadd132nepbf16\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2}
-   vfmadd213nepbf16\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
+   vfmadd132bf16\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2}
+   vfmadd213bf16\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
   [(set_attr "prefix" "evex")
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "avx10_2_fmaddnepbf16_<mode>_mask3"
-  [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
-	(vec_merge:VBF_AVX10_2
-	  (fma:VBF_AVX10_2
-	     (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%v")
-	     (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm")
-	     (match_operand:VBF_AVX10_2 3 "register_operand" "0"))
+(define_insn "avx10_2_fmaddbf16_<mode>_mask3"
+  [(set (match_operand:VBF 0 "register_operand" "=v")
+	(vec_merge:VBF
+	  (fma:VBF
+	     (match_operand:VBF 1 "nonimmediate_operand" "%v")
+	     (match_operand:VBF 2 "nonimmediate_operand" "vm")
+	     (match_operand:VBF 3 "nonimmediate_operand" "0"))
 	  (match_dup 3)
 	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
-  "TARGET_AVX10_2_256"
-  "vfmadd231nepbf16\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}"
+  "TARGET_AVX10_2"
+  "vfmadd231bf16\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}"
   [(set_attr "prefix" "evex")
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_expand "avx10_2_fnmaddnepbf16_<mode>_maskz"
-  [(match_operand:VBF_AVX10_2 0 "register_operand")
-   (match_operand:VBF_AVX10_2 1 "nonimmediate_operand")
-   (match_operand:VBF_AVX10_2 2 "nonimmediate_operand")
-   (match_operand:VBF_AVX10_2 3 "nonimmediate_operand")
+(define_expand "avx10_2_fnmaddbf16_<mode>_maskz"
+  [(match_operand:VBF 0 "register_operand")
+   (match_operand:VBF 1 "nonimmediate_operand")
+   (match_operand:VBF 2 "nonimmediate_operand")
+   (match_operand:VBF 3 "nonimmediate_operand")
    (match_operand:<avx512fmaskmode> 4 "register_operand")]
-  "TARGET_AVX10_2_256"
+  "TARGET_AVX10_2"
   {
-    emit_insn (gen_avx10_2_fnmaddnepbf16_<mode>_maskz_1 (operands[0], operands[1],
-						       operands[2], operands[3], 
-						       CONST0_RTX(<MODE>mode), 
-						       operands[4]));
+    emit_insn (gen_avx10_2_fnmaddbf16_<mode>_maskz_1 (operands[0], operands[1],
+						      operands[2], operands[3],
+						      CONST0_RTX (<MODE>mode),
+						      operands[4]));
     DONE;
   })
 
-(define_insn "avx10_2_fnmaddnepbf16_<mode><sd_maskz_name>"
-  [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v,v")
-	  (fma:VBF_AVX10_2
-            (neg:VBF_AVX10_2
-	      (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%0,0,v"))
-	    (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v,vm")
-	    (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm,0")))]
-  "TARGET_AVX10_2_256"
+(define_insn "avx10_2_fnmaddbf16_<mode><sd_maskz_name>"
+  [(set (match_operand:VBF 0 "register_operand" "=v,v,v")
+	  (fma:VBF
+	    (neg:VBF
+	      (match_operand:VBF 1 "nonimmediate_operand" "%0,0,v"))
+	    (match_operand:VBF 2 "nonimmediate_operand" "vm,v,vm")
+	    (match_operand:VBF 3 "nonimmediate_operand" "v,vm,0")))]
+  "TARGET_AVX10_2"
   "@
-   vfnmadd132nepbf16\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2}
-   vfnmadd213nepbf16\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}
-   vfnmadd231nepbf16\t{%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2}"
+   vfnmadd132bf16\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2}
+   vfnmadd213bf16\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}
+   vfnmadd231bf16\t{%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2}"
   [(set_attr "prefix" "evex")
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "avx10_2_fnmaddnepbf16_<mode>_mask"
-  [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v")
-	(vec_merge:VBF_AVX10_2
-	  (fma:VBF_AVX10_2
-            (neg:VBF_AVX10_2
-	      (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "0,0"))
-	    (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v")
-	    (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm"))
+(define_insn "avx10_2_fnmaddbf16_<mode>_mask"
+  [(set (match_operand:VBF 0 "register_operand" "=v,v")
+	(vec_merge:VBF
+	  (fma:VBF
+	    (neg:VBF
+	      (match_operand:VBF 1 "nonimmediate_operand" "0,0"))
+	    (match_operand:VBF 2 "nonimmediate_operand" "vm,v")
+	    (match_operand:VBF 3 "nonimmediate_operand" "v,vm"))
 	  (match_dup 1)
 	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
-  "TARGET_AVX10_2_256"
+  "TARGET_AVX10_2"
   "@
-   vfnmadd132nepbf16\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2}
-   vfnmadd213nepbf16\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
+   vfnmadd132bf16\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2}
+   vfnmadd213bf16\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
   [(set_attr "prefix" "evex")
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "avx10_2_fnmaddnepbf16_<mode>_mask3"
-  [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
-	(vec_merge:VBF_AVX10_2
-	  (fma:VBF_AVX10_2
-            (neg:VBF_AVX10_2
-	      (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%v"))
-	    (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm")
-	    (match_operand:VBF_AVX10_2 3 "register_operand" "0"))
+(define_insn "avx10_2_fnmaddbf16_<mode>_mask3"
+  [(set (match_operand:VBF 0 "register_operand" "=v")
+	(vec_merge:VBF
+	  (fma:VBF
+	    (neg:VBF
+	      (match_operand:VBF 1 "nonimmediate_operand" "%v"))
+	    (match_operand:VBF 2 "nonimmediate_operand" "vm")
+	    (match_operand:VBF 3 "nonimmediate_operand" "0"))
 	  (match_dup 3)
 	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
-  "TARGET_AVX10_2_256"
-  "vfnmadd231nepbf16\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}"
+  "TARGET_AVX10_2"
+  "vfnmadd231bf16\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}"
   [(set_attr "prefix" "evex")
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_expand "avx10_2_fmsubnepbf16_<mode>_maskz"
-  [(match_operand:VBF_AVX10_2 0 "register_operand")
-   (match_operand:VBF_AVX10_2 1 "nonimmediate_operand")
-   (match_operand:VBF_AVX10_2 2 "nonimmediate_operand")
-   (match_operand:VBF_AVX10_2 3 "nonimmediate_operand")
+(define_expand "avx10_2_fmsubbf16_<mode>_maskz"
+  [(match_operand:VBF 0 "register_operand")
+   (match_operand:VBF 1 "nonimmediate_operand")
+   (match_operand:VBF 2 "nonimmediate_operand")
+   (match_operand:VBF 3 "nonimmediate_operand")
    (match_operand:<avx512fmaskmode> 4 "register_operand")]
-  "TARGET_AVX10_2_256"
+  "TARGET_AVX10_2"
   {
-    emit_insn (gen_avx10_2_fmsubnepbf16_<mode>_maskz_1 (operands[0], operands[1],
-						      operands[2], operands[3], 
-						      CONST0_RTX(<MODE>mode), 
-						      operands[4]));
+    emit_insn (gen_avx10_2_fmsubbf16_<mode>_maskz_1 (operands[0], operands[1],
+						     operands[2], operands[3],
+						     CONST0_RTX (<MODE>mode),
+						     operands[4]));
     DONE;
   })
 
-(define_insn "avx10_2_fmsubnepbf16_<mode><sd_maskz_name>"
-  [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v,v")
-	  (fma:VBF_AVX10_2
-	    (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%0,0,v")
-	    (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v,vm")
-            (neg:VBF_AVX10_2
-	      (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm,0"))))]
-  "TARGET_AVX10_2_256"
+(define_insn "avx10_2_fmsubbf16_<mode><sd_maskz_name>"
+  [(set (match_operand:VBF 0 "register_operand" "=v,v,v")
+	  (fma:VBF
+	    (match_operand:VBF 1 "nonimmediate_operand" "%0,0,v")
+	    (match_operand:VBF 2 "nonimmediate_operand" "vm,v,vm")
+	    (neg:VBF
+	      (match_operand:VBF 3 "nonimmediate_operand" "v,vm,0"))))]
+  "TARGET_AVX10_2"
   "@
-   vfmsub132nepbf16\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2}
-   vfmsub213nepbf16\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}
-   vfmsub231nepbf16\t{%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2}"
+   vfmsub132bf16\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2}
+   vfmsub213bf16\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}
+   vfmsub231bf16\t{%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2}"
   [(set_attr "prefix" "evex")
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "avx10_2_fmsubnepbf16_<mode>_mask"
-  [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v")
-	(vec_merge:VBF_AVX10_2
-	  (fma:VBF_AVX10_2
-	     (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "0,0")
-	     (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v")
-            (neg:VBF_AVX10_2
-	       (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm")))
+(define_insn "avx10_2_fmsubbf16_<mode>_mask"
+  [(set (match_operand:VBF 0 "register_operand" "=v,v")
+	(vec_merge:VBF
+	  (fma:VBF
+	    (match_operand:VBF 1 "nonimmediate_operand" "0,0")
+	    (match_operand:VBF 2 "nonimmediate_operand" "vm,v")
+	    (neg:VBF
+	      (match_operand:VBF 3 "nonimmediate_operand" "v,vm")))
 	  (match_dup 1)
 	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
-  "TARGET_AVX10_2_256"
+  "TARGET_AVX10_2"
   "@
-   vfmsub132nepbf16\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2}
-   vfmsub213nepbf16\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
+   vfmsub132bf16\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2}
+   vfmsub213bf16\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
   [(set_attr "prefix" "evex")
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "avx10_2_fmsubnepbf16_<mode>_mask3"
-  [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
-	(vec_merge:VBF_AVX10_2
-	  (fma:VBF_AVX10_2
-	     (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%v")
-	     (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm")
-             (neg:VBF_AVX10_2
-	       (match_operand:VBF_AVX10_2 3 "register_operand" "0")))
+(define_insn "avx10_2_fmsubbf16_<mode>_mask3"
+  [(set (match_operand:VBF 0 "register_operand" "=v")
+	(vec_merge:VBF
+	  (fma:VBF
+	    (match_operand:VBF 1 "nonimmediate_operand" "%v")
+	    (match_operand:VBF 2 "nonimmediate_operand" "vm")
+	    (neg:VBF
+	      (match_operand:VBF 3 "nonimmediate_operand" "0")))
 	  (match_dup 3)
 	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
-  "TARGET_AVX10_2_256"
-  "vfmsub231nepbf16\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}"
+  "TARGET_AVX10_2"
+  "vfmsub231bf16\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}"
   [(set_attr "prefix" "evex")
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_expand "avx10_2_fnmsubnepbf16_<mode>_maskz"
-  [(match_operand:VBF_AVX10_2 0 "register_operand")
-   (match_operand:VBF_AVX10_2 1 "nonimmediate_operand")
-   (match_operand:VBF_AVX10_2 2 "nonimmediate_operand")
-   (match_operand:VBF_AVX10_2 3 "nonimmediate_operand")
+(define_expand "avx10_2_fnmsubbf16_<mode>_maskz"
+  [(match_operand:VBF 0 "register_operand")
+   (match_operand:VBF 1 "nonimmediate_operand")
+   (match_operand:VBF 2 "nonimmediate_operand")
+   (match_operand:VBF 3 "nonimmediate_operand")
    (match_operand:<avx512fmaskmode> 4 "register_operand")]
-  "TARGET_AVX10_2_256"
+  "TARGET_AVX10_2"
   {
-    emit_insn (gen_avx10_2_fnmsubnepbf16_<mode>_maskz_1 (operands[0], operands[1],
-						       operands[2], operands[3], 
-						       CONST0_RTX(<MODE>mode), 
-						       operands[4]));
+    emit_insn (gen_avx10_2_fnmsubbf16_<mode>_maskz_1 (operands[0], operands[1],
+						      operands[2], operands[3],
+						      CONST0_RTX (<MODE>mode),
+						      operands[4]));
     DONE;
   })
 
-(define_insn "avx10_2_fnmsubnepbf16_<mode><sd_maskz_name>"
-  [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v,v")
-	  (fma:VBF_AVX10_2
-            (neg:VBF_AVX10_2
-	      (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%0,0,v"))
-	    (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v,vm")
-            (neg:VBF_AVX10_2
-	      (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm,0"))))]
-  "TARGET_AVX10_2_256"
+(define_insn "avx10_2_fnmsubbf16_<mode><sd_maskz_name>"
+  [(set (match_operand:VBF 0 "register_operand" "=v,v,v")
+	  (fma:VBF
+	    (neg:VBF
+	      (match_operand:VBF 1 "nonimmediate_operand" "%0,0,v"))
+	    (match_operand:VBF 2 "nonimmediate_operand" "vm,v,vm")
+	    (neg:VBF
+	      (match_operand:VBF 3 "nonimmediate_operand" "v,vm,0"))))]
+  "TARGET_AVX10_2"
   "@
-   vfnmsub132nepbf16\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2}
-   vfnmsub213nepbf16\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}
-   vfnmsub231nepbf16\t{%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2}"
+   vfnmsub132bf16\t{%2, %3, %0<sd_mask_op4>|%0<sd_mask_op4>, %3, %2}
+   vfnmsub213bf16\t{%3, %2, %0<sd_mask_op4>|%0<sd_mask_op4>, %2, %3}
+   vfnmsub231bf16\t{%2, %1, %0<sd_mask_op4>|%0<sd_mask_op4>, %1, %2}"
   [(set_attr "prefix" "evex")
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "avx10_2_fnmsubnepbf16_<mode>_mask"
-  [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v,v")
-	(vec_merge:VBF_AVX10_2
-	  (fma:VBF_AVX10_2
-            (neg:VBF_AVX10_2
-	      (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "0,0"))
-	    (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm,v")
-            (neg:VBF_AVX10_2
-	      (match_operand:VBF_AVX10_2 3 "nonimmediate_operand" "v,vm")))
+(define_insn "avx10_2_fnmsubbf16_<mode>_mask"
+  [(set (match_operand:VBF 0 "register_operand" "=v,v")
+	(vec_merge:VBF
+	  (fma:VBF
+	    (neg:VBF
+	      (match_operand:VBF 1 "nonimmediate_operand" "0,0"))
+	    (match_operand:VBF 2 "nonimmediate_operand" "vm,v")
+	    (neg:VBF
+	      (match_operand:VBF 3 "nonimmediate_operand" "v,vm")))
 	  (match_dup 1)
 	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk,Yk")))]
-  "TARGET_AVX10_2_256"
+  "TARGET_AVX10_2"
   "@
-   vfnmsub132nepbf16\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2}
-   vfnmsub213nepbf16\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
+   vfnmsub132bf16\t{%2, %3, %0%{%4%}|%0%{%4%}, %3, %2}
+   vfnmsub213bf16\t{%3, %2, %0%{%4%}|%0%{%4%}, %2, %3}"
   [(set_attr "prefix" "evex")
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "avx10_2_fnmsubnepbf16_<mode>_mask3"
-  [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
-	(vec_merge:VBF_AVX10_2
-	  (fma:VBF_AVX10_2
-            (neg:VBF_AVX10_2
-	      (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "%v"))
-	    (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm")
-            (neg:VBF_AVX10_2
-	      (match_operand:VBF_AVX10_2 3 "register_operand" "0")))
+(define_insn "avx10_2_fnmsubbf16_<mode>_mask3"
+  [(set (match_operand:VBF 0 "register_operand" "=v")
+	(vec_merge:VBF
+	  (fma:VBF
+	    (neg:VBF
+	      (match_operand:VBF 1 "nonimmediate_operand" "%v"))
+	    (match_operand:VBF 2 "nonimmediate_operand" "vm")
+	    (neg:VBF
+	      (match_operand:VBF 3 "nonimmediate_operand" "0")))
 	  (match_dup 3)
 	  (match_operand:<avx512fmaskmode> 4 "register_operand" "Yk")))]
-  "TARGET_AVX10_2_256"
-  "vfnmsub231nepbf16\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}"
+  "TARGET_AVX10_2"
+  "vfnmsub231bf16\t{%2, %1, %0%{%4%}|%0%{%4%}, %1, %2}"
   [(set_attr "prefix" "evex")
    (set_attr "type" "ssemuladd")
    (set_attr "mode" "<sseinsnmode>")])
 
-(define_insn "avx10_2_rsqrtpbf16_<mode><mask_name>"
-   [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
-	 (unspec:VBF_AVX10_2
-	   [(match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "vm")]
+(define_insn "avx10_2_rsqrtbf16_<mode><mask_name>"
+   [(set (match_operand:VBF 0 "register_operand" "=v")
+	 (unspec:VBF
+	   [(match_operand:VBF 1 "nonimmediate_operand" "vm")]
 	  UNSPEC_RSQRT))]
-   "TARGET_AVX10_2_256"
-   "vrsqrtpbf16\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+   "TARGET_AVX10_2"
+   "vrsqrtbf16\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
    [(set_attr "prefix" "evex")])
 
-(define_insn "avx10_2_sqrtnepbf16_<mode><mask_name>"
-   [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
-	 (sqrt:VBF_AVX10_2
-	   (match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "vm")))]
-   "TARGET_AVX10_2_256"
-   "vsqrtnepbf16\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+(define_insn "avx10_2_sqrtbf16_<mode><mask_name>"
+   [(set (match_operand:VBF 0 "register_operand" "=v")
+	 (sqrt:VBF
+	   (match_operand:VBF 1 "nonimmediate_operand" "vm")))]
+   "TARGET_AVX10_2"
+   "vsqrtbf16\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
    [(set_attr "prefix" "evex")])
 
-(define_insn "avx10_2_rcppbf16_<mode><mask_name>"
-   [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
-	 (unspec:VBF_AVX10_2
-	   [(match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "vm")]
+(define_insn "avx10_2_rcpbf16_<mode><mask_name>"
+   [(set (match_operand:VBF 0 "register_operand" "=v")
+	 (unspec:VBF
+	   [(match_operand:VBF 1 "nonimmediate_operand" "vm")]
 	  UNSPEC_RCP))]
-   "TARGET_AVX10_2_256"
-   "vrcppbf16\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+   "TARGET_AVX10_2"
+   "vrcpbf16\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
    [(set_attr "prefix" "evex")])
 
-(define_insn "avx10_2_getexppbf16_<mode><mask_name>"
-   [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
-	 (unspec:VBF_AVX10_2
-	   [(match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "vm")]
+(define_insn "avx10_2_getexpbf16_<mode><mask_name>"
+   [(set (match_operand:VBF 0 "register_operand" "=v")
+	 (unspec:VBF
+	   [(match_operand:VBF 1 "nonimmediate_operand" "vm")]
 	  UNSPEC_GETEXP))]
-   "TARGET_AVX10_2_256"
-   "vgetexppbf16\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+   "TARGET_AVX10_2"
+   "vgetexpbf16\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
    [(set_attr "prefix" "evex")])
 
 (define_int_iterator BF16IMMOP
-   [UNSPEC_VRNDSCALENEPBF16
-    UNSPEC_VREDUCENEPBF16
-    UNSPEC_VGETMANTPBF16])
+   [UNSPEC_VRNDSCALEBF16
+    UNSPEC_VREDUCEBF16
+    UNSPEC_VGETMANTBF16])
 
 (define_int_attr bf16immop
-   [(UNSPEC_VRNDSCALENEPBF16 "rndscalene")
-    (UNSPEC_VREDUCENEPBF16 "reducene")
-    (UNSPEC_VGETMANTPBF16 "getmant")])
-
-(define_insn "avx10_2_<bf16immop>pbf16_<mode><mask_name>"
-   [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
-	 (unspec:VBF_AVX10_2
-	   [(match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "vm")
+   [(UNSPEC_VRNDSCALEBF16 "rndscale")
+    (UNSPEC_VREDUCEBF16 "reduce")
+    (UNSPEC_VGETMANTBF16 "getmant")])
+
+(define_insn "avx10_2_<bf16immop>bf16_<mode><mask_name>"
+   [(set (match_operand:VBF 0 "register_operand" "=v")
+	 (unspec:VBF
+	   [(match_operand:VBF 1 "nonimmediate_operand" "vm")
 	    (match_operand:SI 2 "const_0_to_255_operand")]
 	  BF16IMMOP))]
-   "TARGET_AVX10_2_256"
-   "v<bf16immop>pbf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
+   "TARGET_AVX10_2"
+   "v<bf16immop>bf16\t{%2, %1, %0<mask_operand3>|%0<mask_operand3>, %1, %2}"
    [(set_attr "prefix" "evex")])
 
-(define_insn "avx10_2_fpclasspbf16_<mode><mask_scalar_merge_name>"
+(define_insn "avx10_2_fpclassbf16_<mode><mask_scalar_merge_name>"
    [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
 	 (unspec:<avx512fmaskmode>
-	   [(match_operand:VBF_AVX10_2 1 "nonimmediate_operand" "vm")
+	   [(match_operand:VBF 1 "nonimmediate_operand" "vm")
 	    (match_operand 2 "const_0_to_255_operand")]
-	  UNSPEC_VFPCLASSPBF16))]
-   "TARGET_AVX10_2_256"
-   "vfpclasspbf16<vecmemsuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
+	  UNSPEC_VFPCLASSBF16))]
+   "TARGET_AVX10_2"
+   "vfpclassbf16<vecmemsuffix>\t{%2, %1, %0<mask_scalar_merge_operand3>|%0<mask_scalar_merge_operand3>, %1, %2}"
    [(set_attr "prefix" "evex")])
 
-(define_insn "avx10_2_cmppbf16_<mode><mask_scalar_merge_name>"
+(define_insn "avx10_2_cmpbf16_<mode><mask_scalar_merge_name>"
    [(set (match_operand:<avx512fmaskmode> 0 "register_operand" "=k")
 	 (unspec:<avx512fmaskmode>
-	   [(match_operand:VBF_AVX10_2 1 "register_operand" "v")
-	    (match_operand:VBF_AVX10_2 2 "nonimmediate_operand" "vm")
+	   [(match_operand:VBF 1 "register_operand" "v")
+	    (match_operand:VBF 2 "nonimmediate_operand" "vm")
 	    (match_operand 3 "const_0_to_31_operand" "n")]
 	  UNSPEC_PCMP))]
-   "TARGET_AVX10_2_256"
-   "vcmppbf16\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
+   "TARGET_AVX10_2"
+   "vcmpbf16\t{%3, %2, %1, %0<mask_scalar_merge_operand4>|%0<mask_scalar_merge_operand4>, %1, %2, %3}"
    [(set_attr "prefix" "evex")])
 
-(define_insn "avx10_2_comsbf16_v8bf"
-  [(set (reg:CCFP FLAGS_REG)
-	(unspec:CCFP
-	  [(match_operand:V8BF 0 "register_operand" "v")
-	   (match_operand:V8BF 1 "nonimmediate_operand" "vm")]
-	 UNSPEC_VCOMSBF16))]
-  "TARGET_AVX10_2_256"
-  "vcomsbf16\t{%1, %0|%0, %1}"
-  [(set_attr "prefix" "evex")])
-
-(define_int_iterator UNSPEC_CVTNE_BF16_IBS_ITER
-   [UNSPEC_VCVTNEBF162IBS
-    UNSPEC_VCVTNEBF162IUBS
-    UNSPEC_VCVTTNEBF162IBS
-    UNSPEC_VCVTTNEBF162IUBS])
+(define_int_iterator UNSPEC_CVT_BF16_IBS_ITER
+   [UNSPEC_VCVTBF162IBS
+    UNSPEC_VCVTBF162IUBS
+    UNSPEC_VCVTTBF162IBS
+    UNSPEC_VCVTTBF162IUBS])
 
 (define_int_attr sat_cvt_sign_prefix
-   [(UNSPEC_VCVTNEBF162IBS "")
-    (UNSPEC_VCVTNEBF162IUBS "u")
-    (UNSPEC_VCVTTNEBF162IBS "")
-    (UNSPEC_VCVTTNEBF162IUBS "u")
+   [(UNSPEC_VCVTBF162IBS "")
+    (UNSPEC_VCVTBF162IUBS "u")
+    (UNSPEC_VCVTTBF162IBS "")
+    (UNSPEC_VCVTTBF162IUBS "u")
     (UNSPEC_VCVTPH2IBS "")
     (UNSPEC_VCVTPH2IUBS "u")
     (UNSPEC_VCVTTPH2IBS "")
@@ -32304,18 +32501,18 @@
     (UNSPEC_UFIX_SATURATION "u")])
 
 (define_int_attr sat_cvt_trunc_prefix
-   [(UNSPEC_VCVTNEBF162IBS "")
-    (UNSPEC_VCVTNEBF162IUBS "")
-    (UNSPEC_VCVTTNEBF162IBS "t")
-    (UNSPEC_VCVTTNEBF162IUBS "t")])
+   [(UNSPEC_VCVTBF162IBS "")
+    (UNSPEC_VCVTBF162IUBS "")
+    (UNSPEC_VCVTTBF162IBS "t")
+    (UNSPEC_VCVTTBF162IUBS "t")])
 
-(define_insn "avx10_2_cvt<sat_cvt_trunc_prefix>nebf162i<sat_cvt_sign_prefix>bs<mode><mask_name>"
+(define_insn "avx10_2_cvt<sat_cvt_trunc_prefix>bf162i<sat_cvt_sign_prefix>bs<mode><mask_name>"
  [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
        (unspec:<sseintvecmode>
-	  [(match_operand:VBF_AVX10_2 1 "vector_operand" "vm")]
-	  UNSPEC_CVTNE_BF16_IBS_ITER))]
- "TARGET_AVX10_2_256"
- "vcvt<sat_cvt_trunc_prefix>nebf162i<sat_cvt_sign_prefix>bs\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+	  [(match_operand:VBF 1 "vector_operand" "vm")]
+	  UNSPEC_CVT_BF16_IBS_ITER))]
+ "TARGET_AVX10_2"
+ "vcvt<sat_cvt_trunc_prefix>bf162i<sat_cvt_sign_prefix>bs\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
  [(set_attr "type" "ssecvt")
   (set_attr "prefix" "evex")
   (set_attr "mode" "<sseinsnmode>")])
@@ -32327,9 +32524,9 @@
 (define_insn "avx10_2_cvtph2i<sat_cvt_sign_prefix>bs<mode><mask_name><round_name>"
  [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
        (unspec:<sseintvecmode>
-	  [(match_operand:VHF_AVX10_2 1 "<round_nimm_predicate>" "<round_constraint>")]
+	  [(match_operand:VHF_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")]
 	  UNSPEC_CVT_PH_IBS_ITER))]
- "TARGET_AVX10_2_256 && <round_mode_condition>"
+ "TARGET_AVX10_2 && <round_mode512bit_condition>"
  "vcvtph2i<sat_cvt_sign_prefix>bs\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
  [(set_attr "type" "ssecvt")
   (set_attr "prefix" "evex")
@@ -32342,9 +32539,9 @@
 (define_insn "avx10_2_cvttph2i<sat_cvt_sign_prefix>bs<mode><mask_name><round_saeonly_name>"
  [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
        (unspec:<sseintvecmode>
-	  [(match_operand:VHF_AVX10_2 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
+	  [(match_operand:VHF_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
 	  UNSPEC_CVTT_PH_IBS_ITER))]
- "TARGET_AVX10_2_256 && <round_saeonly_mode_condition>"
+ "TARGET_AVX10_2 && <round_saeonly_mode512bit_condition>"
  "vcvttph2i<sat_cvt_sign_prefix>bs\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
  [(set_attr "type" "ssecvt")
   (set_attr "prefix" "evex")
@@ -32357,9 +32554,9 @@
 (define_insn "avx10_2_cvtps2i<sat_cvt_sign_prefix>bs<mode><mask_name><round_name>"
  [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
        (unspec:<sseintvecmode>
-	  [(match_operand:VF1_AVX10_2 1 "<round_nimm_predicate>" "<round_constraint>")]
+	  [(match_operand:VF1_AVX512VL 1 "<round_nimm_predicate>" "<round_constraint>")]
 	  UNSPEC_CVT_PS_IBS_ITER))]
- "TARGET_AVX10_2_256 && <round_mode_condition>"
+ "TARGET_AVX10_2 && <round_mode512bit_condition>"
  "vcvtps2i<sat_cvt_sign_prefix>bs\t{<round_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_mask_op2>}"
  [(set_attr "type" "ssecvt")
   (set_attr "prefix" "evex")
@@ -32372,9 +32569,9 @@
 (define_insn "avx10_2_cvttps2i<sat_cvt_sign_prefix>bs<mode><mask_name><round_saeonly_name>"
  [(set (match_operand:<sseintvecmode> 0 "register_operand" "=v")
        (unspec:<sseintvecmode>
-	  [(match_operand:VF1_AVX10_2 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
+	  [(match_operand:VF1_AVX512VL 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
 	  UNSPEC_CVTT_PS_IBS_ITER))]
- "TARGET_AVX10_2_256 && <round_saeonly_mode_condition>"
+ "TARGET_AVX10_2 && <round_saeonly_mode512bit_condition>"
  "vcvttps2i<sat_cvt_sign_prefix>bs\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
  [(set_attr "type" "ssecvt")
   (set_attr "prefix" "evex")
@@ -32391,9 +32588,9 @@
 (define_insn "avx10_2_vcvtt<castmode>2<sat_cvt_sign_prefix>dqs<mode><mask_name><round_saeonly_name>"
  [(set (match_operand:<VEC_GATHER_IDXSI> 0 "register_operand" "=v")
        (unspec:<VEC_GATHER_IDXSI>
-	  [(match_operand:VF1_VF2_AVX10_2 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
+	  [(match_operand:VF 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
 	  UNSPEC_SAT_CVT_DS_SIGN_ITER))]
- "TARGET_AVX10_2_256 && <round_saeonly_mode_condition>"
+ "TARGET_AVX10_2 && <round_saeonly_mode512bit_condition>"
  "vcvtt<castmode>2<sat_cvt_sign_prefix>dqs<pd2dqssuff>\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
  [(set_attr "type" "ssecvt")
   (set_attr "prefix" "evex")
@@ -32402,20 +32599,20 @@
 (define_insn "avx10_2_vcvttpd2<sat_cvt_sign_prefix>qqs<mode><mask_name><round_saeonly_name>"
  [(set (match_operand:<VEC_GATHER_IDXDI> 0 "register_operand" "=v")
        (unspec:<VEC_GATHER_IDXDI>
-	  [(match_operand:VF2_AVX10_2 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
+	  [(match_operand:VF2 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
 	  UNSPEC_SAT_CVT_DS_SIGN_ITER))]
- "TARGET_AVX10_2_256 && <round_saeonly_mode_condition>"
+ "TARGET_AVX10_2 && <round_saeonly_mode512bit_condition>"
  "vcvttpd2<sat_cvt_sign_prefix>qqs\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
  [(set_attr "type" "ssecvt")
   (set_attr "prefix" "evex")
   (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "avx10_2_vcvttps2<sat_cvt_sign_prefix>qqs<mode><mask_name><round_saeonly_name>"
- [(set (match_operand:VI8_AVX10_2 0 "register_operand" "=v")
-       (unspec:VI8_AVX10_2
+ [(set (match_operand:VI8 0 "register_operand" "=v")
+       (unspec:VI8
 	  [(match_operand:<vpckfloat_temp_mode> 1 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")]
 	  UNSPEC_SAT_CVT_DS_SIGN_ITER))]
- "TARGET_AVX10_2_256 && <round_saeonly_mode_condition>"
+ "TARGET_AVX10_2 && <round_saeonly_mode512bit_condition>"
  "vcvttps2<sat_cvt_sign_prefix>qqs\t{<round_saeonly_mask_op2>%1, %0<mask_operand2>|%0<mask_operand2>, %1<round_saeonly_mask_op2>}"
  [(set_attr "type" "ssecvt")
   (set_attr "prefix" "evex")
@@ -32428,7 +32625,7 @@
       (match_operand:V2DF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
       (parallel [(const_int 0)]))]
     UNSPEC_SAT_CVT_DS_SIGN_ITER))]
- "TARGET_AVX10_2_256"
+ "TARGET_AVX10_2"
  "vcvttsd2<sat_cvt_sign_prefix>sis\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
  [(set_attr "type" "ssecvt")
  (set_attr "prefix" "evex")
@@ -32441,32 +32638,32 @@
       (match_operand:V4SF 1 "<round_saeonly_nimm_scalar_predicate>" "<round_saeonly_constraint>")
       (parallel [(const_int 0)]))]
     UNSPEC_SAT_CVT_DS_SIGN_ITER))]
- "TARGET_AVX10_2_256"
+ "TARGET_AVX10_2"
  "vcvttss2<sat_cvt_sign_prefix>sis\t{<round_saeonly_op2>%1, %0|%0, %1<round_saeonly_op2>}"
  [(set_attr "type" "ssecvt")
  (set_attr "prefix" "evex")
  (set_attr "mode" "<MODE>")])
 
-(define_insn "avx10_2_minmaxnepbf16_<mode><mask_name>"
-  [(set (match_operand:VBF_AVX10_2 0 "register_operand" "=v")
-    (unspec:VBF_AVX10_2
-      [(match_operand:VBF_AVX10_2 1 "register_operand" "v")
-       (match_operand:VBF_AVX10_2 2 "bcst_vector_operand" "vmBr")
+(define_insn "avx10_2_minmaxbf16_<mode><mask_name>"
+  [(set (match_operand:VBF 0 "register_operand" "=v")
+    (unspec:VBF
+      [(match_operand:VBF 1 "register_operand" "v")
+       (match_operand:VBF 2 "bcst_vector_operand" "vmBr")
        (match_operand:SI 3 "const_0_to_255_operand")]
-       UNSPEC_MINMAXNEPBF16))]
-  "TARGET_AVX10_2_256"
-  "vminmaxnepbf16\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}"
+       UNSPEC_MINMAXBF16))]
+  "TARGET_AVX10_2"
+  "vminmaxbf16\t{%3, %2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2, %3}"
   [(set_attr "prefix" "evex")
    (set_attr "mode" "<sseinsnmode>")])
 
 (define_insn "avx10_2_minmaxp<mode><mask_name><round_saeonly_name>"
-  [(set (match_operand:VFH_AVX10_2 0 "register_operand" "=v")
-    (unspec:VFH_AVX10_2
-      [(match_operand:VFH_AVX10_2 1 "register_operand" "v")
-       (match_operand:VFH_AVX10_2 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
+  [(set (match_operand:VFH_AVX512VL 0 "register_operand" "=v")
+    (unspec:VFH_AVX512VL
+      [(match_operand:VFH_AVX512VL 1 "register_operand" "v")
+       (match_operand:VFH_AVX512VL 2 "<round_saeonly_nimm_predicate>" "<round_saeonly_constraint>")
        (match_operand:SI 3 "const_0_to_255_operand")]
       UNSPEC_MINMAX))]
-  "TARGET_AVX10_2_256"
+  "TARGET_AVX10_2"
   "vminmax<ssemodesuffix>\t{%3, <round_saeonly_mask_op4>%2, %1, %0<mask_operand4>|%0<mask_operand4>, %1, %2<round_saeonly_mask_op4>, %3}"
   [(set_attr "prefix" "evex")
    (set_attr "mode" "<MODE>")])
@@ -32481,7 +32678,19 @@
         UNSPEC_MINMAX)
       (match_dup 1)
       (const_int 1)))]
-  "TARGET_AVX10_2_256"
+  "TARGET_AVX10_2"
   "vminmax<ssescalarmodesuffix>\t{%3, <round_saeonly_scalar_mask_op4>%2, %1, %0<mask_scalar_operand4>|%0<mask_scalar_operand4>, %1, %2<round_saeonly_scalar_mask_op4>, %3}"
   [(set_attr "prefix" "evex")
    (set_attr "mode" "<ssescalarmode>")])
+
+(define_insn "avx10_2_vmovrs<ssemodesuffix><mode><mask_name>"
+  [(set (match_operand:VI1248_AVX512VLBW 0 "register_operand" "=v")
+	(unspec:VI1248_AVX512VLBW
+	  [(match_operand:VI1248_AVX512VLBW 1 "memory_operand" "m")]
+	  UNSPEC_VMOVRS))]
+  "TARGET_AVX10_2 && TARGET_MOVRS"
+  "vmovrs<ssemodesuffix>\t{%1, %0<mask_operand2>|%0<mask_operand2>, %1}"
+  [(set_attr "type" "ssemov")
+   (set_attr "prefix" "evex")
+   (set_attr "memory" "load")
+   (set_attr "mode" "<sseinsnmode>")])
diff --git a/gcc/config/i386/ssemath.h b/gcc/config/i386/ssemath.h
index 5e09462..df6fb72 100644
--- a/gcc/config/i386/ssemath.h
+++ b/gcc/config/i386/ssemath.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2010-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2010-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/i386/stringop.def b/gcc/config/i386/stringop.def
index 07de314..c65da44 100644
--- a/gcc/config/i386/stringop.def
+++ b/gcc/config/i386/stringop.def
@@ -1,5 +1,5 @@
 /* Definitions for stringop strategy for IA-32.
-   Copyright (C) 2013-2024 Free Software Foundation, Inc.
+   Copyright (C) 2013-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -13,7 +13,7 @@ but WITHOUT ANY WARRANTY; without even the implied warranty of
 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 
-You should have received a copy of the GNU General Public License 
+You should have received a copy of the GNU General Public License
 along with GCC; see the files COPYING3.  If not,
 see <http://www.gnu.org/licenses/>.  */
 
diff --git a/gcc/config/i386/subst.md b/gcc/config/i386/subst.md
index ca53413..c30b274 100644
--- a/gcc/config/i386/subst.md
+++ b/gcc/config/i386/subst.md
@@ -1,5 +1,5 @@
 ;; GCC machine description for AVX512F instructions
-;; Copyright (C) 2013-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2013-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
@@ -197,32 +197,19 @@
 (define_subst_attr "round_sd_mask_op4" "round" "" "<round_sd_mask_operand4>")
 (define_subst_attr "round_sdc_mask_op4" "round" "" "<round_sdc_mask_operand4>")
 (define_subst_attr "round_constraint" "round" "vm" "v")
-(define_subst_attr "round_suff" "round" "{y}" "")
 (define_subst_attr "round_qq2phsuff" "round" "<qq2phsuff>" "")
-(define_subst_attr "round_qq2pssuff" "round" "<qq2pssuff>" "")
-(define_subst_attr "round_pd2udqsuff" "round" "<pd2udqsuff>" "")
 (define_subst_attr "bcst_round_constraint" "round" "vmBr" "v")
 (define_subst_attr "round_constraint2" "round" "m" "v")
 (define_subst_attr "round_constraint3" "round" "rm" "r")
-(define_subst_attr "round_constraint4" "round" "vBm" "v")
 (define_subst_attr "round_nimm_predicate" "round" "vector_operand" "register_operand")
 (define_subst_attr "bcst_round_nimm_predicate" "round" "bcst_vector_operand" "register_operand")
 (define_subst_attr "round_nimm_scalar_predicate" "round" "nonimmediate_operand" "register_operand")
 (define_subst_attr "round_prefix" "round" "vex" "evex")
-(define_subst_attr "round_mode_condition" "round" "1" "((<MODE>mode == V16SFmode
-							       || <MODE>mode == V8DFmode
-							       || <MODE>mode == V8DImode
-							       || <MODE>mode == V16SImode
-							       || <MODE>mode == V32HImode
-							       || <MODE>mode == V32HFmode)
-							       || (TARGET_AVX10_2_256
-								   && (<MODE>mode == V8SFmode
-								       || <MODE>mode == V4DFmode
-								       || <MODE>mode == V4DImode
-								       || <MODE>mode == V8SImode
-								       || <MODE>mode == V16HImode
-								       || <MODE>mode == V16HFmode)))")
-(define_subst_attr "round_applied" "round" "false" "true")
+(define_subst_attr "round_mode512bit_condition" "round" "1" "(<MODE>mode == V16SFmode
+							      || <MODE>mode == V8DFmode
+							      || <MODE>mode == V8DImode
+							      || <MODE>mode == V16SImode
+							      || <MODE>mode == V32HFmode)")
 
 (define_subst_attr "round_modev4sf_condition" "round" "1" "(<MODE>mode == V4SFmode)")
 (define_subst_attr "round_codefor" "round" "*" "")
@@ -263,21 +250,11 @@
 (define_subst_attr "round_saeonly_constraint2" "round_saeonly" "m" "v")
 (define_subst_attr "round_saeonly_nimm_predicate" "round_saeonly" "vector_operand" "register_operand")
 (define_subst_attr "round_saeonly_nimm_scalar_predicate" "round_saeonly" "nonimmediate_operand" "register_operand")
-(define_subst_attr "round_saeonly_suff" "round_saeonly" "{y}" "")
-(define_subst_attr "round_saeonly_mode_condition" "round_saeonly" "1" "((<MODE>mode == V16SFmode
-									       || <MODE>mode == V8DFmode
-									       || <MODE>mode == V8DImode
-									       || <MODE>mode == V16SImode
-									       || <MODE>mode == V32HImode
-									       || <MODE>mode == V32HFmode)
-									       || (TARGET_AVX10_2_256
-										   && (<MODE>mode == V8SFmode
-										       || <MODE>mode == V4DFmode
-										       || <MODE>mode == V4DImode
-										       || <MODE>mode == V8SImode
-										       || <MODE>mode == V16HImode
-										       || <MODE>mode == V16HFmode)))")
-(define_subst_attr "round_saeonly_applied" "round_saeonly" "false" "true")
+(define_subst_attr "round_saeonly_mode512bit_condition" "round_saeonly" "1" "(<MODE>mode == V16SFmode
+									      || <MODE>mode == V8DFmode
+									      || <MODE>mode == V8DImode
+									      || <MODE>mode == V16SImode
+									      || <MODE>mode == V32HFmode)")
 
 
 (define_subst "round_saeonly"
@@ -366,6 +343,8 @@
 (define_subst_attr "mask_scalarcz_operand4" "mask_scalarcz" "" "%{%5%}%N4")
 (define_subst_attr "mask_scalar4_dest_false_dep_for_glc_cond" "mask_scalar" "1" "operands[4] == CONST0_RTX(<MODE>mode)")
 (define_subst_attr "mask_scalarc_dest_false_dep_for_glc_cond" "mask_scalarc" "1" "operands[3] == CONST0_RTX(V8HFmode)")
+(define_subst_attr "mask_scalar_operand_arg34" "mask_scalar" "" ", operands[3], operands[4]")
+(define_subst_attr "mask_scalar_expand_op3" "mask_scalar" "3" "5")
 
 (define_subst "mask_scalar"
   [(set (match_operand:SUBST_V 0)
@@ -473,6 +452,7 @@
 (define_subst_attr "round_saeonly_scalar_constraint" "round_saeonly_scalar" "vm" "v")
 (define_subst_attr "round_saeonly_scalar_prefix" "round_saeonly_scalar" "vex" "evex")
 (define_subst_attr "round_saeonly_scalar_nimm_predicate" "round_saeonly_scalar" "nonimmediate_operand" "register_operand")
+(define_subst_attr "round_saeonly_scalar_mask_arg3" "round_saeonly_scalar" "" ", operands[<mask_scalar_expand_op3>]")
 
 (define_subst "round_saeonly_scalar"
   [(set (match_operand:SUBST_V 0)
diff --git a/gcc/config/i386/sync.md b/gcc/config/i386/sync.md
index f2b3ba0..2c03f5b 100644
--- a/gcc/config/i386/sync.md
+++ b/gcc/config/i386/sync.md
@@ -1,5 +1,5 @@
 ;; GCC machine description for i386 synchronization instructions.
-;; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
@@ -170,7 +170,7 @@
   if (<MODE>mode == DImode && !TARGET_64BIT)
     emit_insn (gen_atomic_loaddi_fpu
 	       (operands[0], operands[1],
-	        assign_386_stack_local (DImode, SLOT_TEMP)));
+	        assign_stack_temp (DImode, GET_MODE_SIZE (DImode))));
   else
     {
       rtx dst = operands[0];
@@ -251,7 +251,7 @@
 	 out to be significantly larger than this plus a barrier.  */
       emit_insn (gen_atomic_storedi_fpu
 		 (operands[0], operands[1],
-	          assign_386_stack_local (DImode, SLOT_TEMP)));
+	          assign_stack_temp (DImode, GET_MODE_SIZE (DImode))));
     }
   else
     {
diff --git a/gcc/config/i386/sysv4.h b/gcc/config/i386/sysv4.h
index ff1e90b..3079851 100644
--- a/gcc/config/i386/sysv4.h
+++ b/gcc/config/i386/sysv4.h
@@ -1,5 +1,5 @@
 /* Target definitions for GCC for Intel 80386 running System V.4
-   Copyright (C) 1991-2024 Free Software Foundation, Inc.
+   Copyright (C) 1991-2025 Free Software Foundation, Inc.
 
    Written by Ron Guilmette (rfg@netcom.com).
 
diff --git a/gcc/config/i386/t-freebsd64 b/gcc/config/i386/t-freebsd64
index 5e2cd3d..29fb808 100644
--- a/gcc/config/i386/t-freebsd64
+++ b/gcc/config/i386/t-freebsd64
@@ -1,4 +1,4 @@
-# Copyright (C) 2019-2024 Free Software Foundation, Inc.
+# Copyright (C) 2019-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/i386/t-gnu-property b/gcc/config/i386/t-gnu-property
index b6507ed..3aa5193 100644
--- a/gcc/config/i386/t-gnu-property
+++ b/gcc/config/i386/t-gnu-property
@@ -1,4 +1,4 @@
-# Copyright (C) 2017-2024 Free Software Foundation, Inc.
+# Copyright (C) 2017-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/i386/t-gnu64 b/gcc/config/i386/t-gnu64
index 54b742a..a15bbe7 100644
--- a/gcc/config/i386/t-gnu64
+++ b/gcc/config/i386/t-gnu64
@@ -1,4 +1,4 @@
-# Copyright (C) 2002-2024 Free Software Foundation, Inc.
+# Copyright (C) 2002-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/i386/t-i386 b/gcc/config/i386/t-i386
index bf4ae10..2508f89 100644
--- a/gcc/config/i386/t-i386
+++ b/gcc/config/i386/t-i386
@@ -1,4 +1,4 @@
-# Copyright (C) 2008-2024 Free Software Foundation, Inc.
+# Copyright (C) 2008-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/i386/t-linux64 b/gcc/config/i386/t-linux64
index f9edc28..64a4a20 100644
--- a/gcc/config/i386/t-linux64
+++ b/gcc/config/i386/t-linux64
@@ -1,4 +1,4 @@
-# Copyright (C) 2002-2024 Free Software Foundation, Inc.
+# Copyright (C) 2002-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/i386/t-rtems b/gcc/config/i386/t-rtems
index 6e6c163..822bcfc 100644
--- a/gcc/config/i386/t-rtems
+++ b/gcc/config/i386/t-rtems
@@ -1,4 +1,4 @@
-# Copyright (C) 1999-2024 Free Software Foundation, Inc.
+# Copyright (C) 1999-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/i386/t-sol2 b/gcc/config/i386/t-sol2
index a8d2afb..217f07f 100644
--- a/gcc/config/i386/t-sol2
+++ b/gcc/config/i386/t-sol2
@@ -1,4 +1,4 @@
-# Copyright (C) 2004-2024 Free Software Foundation, Inc.
+# Copyright (C) 2004-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/i386/tbmintrin.h b/gcc/config/i386/tbmintrin.h
index a0d62b0..73eb5c2 100644
--- a/gcc/config/i386/tbmintrin.h
+++ b/gcc/config/i386/tbmintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2010-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2010-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/tmmintrin.h b/gcc/config/i386/tmmintrin.h
index f1b3303..35b26a1 100644
--- a/gcc/config/i386/tmmintrin.h
+++ b/gcc/config/i386/tmmintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2006-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2006-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/tsxldtrkintrin.h b/gcc/config/i386/tsxldtrkintrin.h
index 525723f..d6826e1 100644
--- a/gcc/config/i386/tsxldtrkintrin.h
+++ b/gcc/config/i386/tsxldtrkintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2020-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2020-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/uintrintrin.h b/gcc/config/i386/uintrintrin.h
index a77c305..f221d90 100644
--- a/gcc/config/i386/uintrintrin.h
+++ b/gcc/config/i386/uintrintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2020-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2020-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/unix.h b/gcc/config/i386/unix.h
index c35f434..7d7bb70 100644
--- a/gcc/config/i386/unix.h
+++ b/gcc/config/i386/unix.h
@@ -1,5 +1,5 @@
 /* Definitions for Unix assembler syntax for the Intel 80386.
-   Copyright (C) 1988-2024 Free Software Foundation, Inc.
+   Copyright (C) 1988-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/i386/usermsrintrin.h b/gcc/config/i386/usermsrintrin.h
index 9b6f6d2..0e304db 100644
--- a/gcc/config/i386/usermsrintrin.h
+++ b/gcc/config/i386/usermsrintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2022-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2022-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/vaesintrin.h b/gcc/config/i386/vaesintrin.h
index d47ed2d..64f3c20 100644
--- a/gcc/config/i386/vaesintrin.h
+++ b/gcc/config/i386/vaesintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2017-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2017-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -66,9 +66,9 @@ _mm256_aesenclast_epi128 (__m256i __A, __m256i __B)
 #endif /* __DISABLE_VAES__ */
 
 
-#if !defined(__VAES__) || !defined(__AVX512F__) || !defined(__EVEX512__)
+#if !defined(__VAES__) || !defined(__AVX512F__)
 #pragma GCC push_options
-#pragma GCC target("vaes,avx512f,evex512")
+#pragma GCC target("vaes,avx512f")
 #define __DISABLE_VAESF__
 #endif /* __VAES__ */
 
diff --git a/gcc/config/i386/vpclmulqdqintrin.h b/gcc/config/i386/vpclmulqdqintrin.h
index 0aad6db..a02ab38 100644
--- a/gcc/config/i386/vpclmulqdqintrin.h
+++ b/gcc/config/i386/vpclmulqdqintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2014-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2014-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -28,9 +28,9 @@
 #ifndef _VPCLMULQDQINTRIN_H_INCLUDED
 #define _VPCLMULQDQINTRIN_H_INCLUDED
 
-#if !defined(__VPCLMULQDQ__) || !defined(__AVX512F__) || !defined(__EVEX512__)
+#if !defined(__VPCLMULQDQ__) || !defined(__AVX512F__)
 #pragma GCC push_options
-#pragma GCC target("vpclmulqdq,avx512f,evex512")
+#pragma GCC target("vpclmulqdq,avx512f")
 #define __DISABLE_VPCLMULQDQF__
 #endif /* __VPCLMULQDQF__ */
 
diff --git a/gcc/config/i386/vxworks.h b/gcc/config/i386/vxworks.h
index ff56e91..cd50ea7 100644
--- a/gcc/config/i386/vxworks.h
+++ b/gcc/config/i386/vxworks.h
@@ -1,5 +1,5 @@
 /* IA32 VxWorks target definitions for GNU compiler.
-   Copyright (C) 2003-2024 Free Software Foundation, Inc.
+   Copyright (C) 2003-2025 Free Software Foundation, Inc.
    Updated by CodeSourcery, LLC.
 
 This file is part of GCC.
diff --git a/gcc/config/i386/vxworksae.h b/gcc/config/i386/vxworksae.h
index 57db591..8f5c1e5 100644
--- a/gcc/config/i386/vxworksae.h
+++ b/gcc/config/i386/vxworksae.h
@@ -1,5 +1,5 @@
 /* IA32 VxWorks AE target definitions for GNU compiler.
-   Copyright (C) 2005-2024 Free Software Foundation, Inc.
+   Copyright (C) 2005-2025 Free Software Foundation, Inc.
    Contributed by CodeSourcery, LLC.
 
 This file is part of GCC.
diff --git a/gcc/config/i386/waitpkgintrin.h b/gcc/config/i386/waitpkgintrin.h
index 00b1829..dfe2892 100644
--- a/gcc/config/i386/waitpkgintrin.h
+++ b/gcc/config/i386/waitpkgintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2018-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2018-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/wbnoinvdintrin.h b/gcc/config/i386/wbnoinvdintrin.h
index c3ef577..dd85d62 100644
--- a/gcc/config/i386/wbnoinvdintrin.h
+++ b/gcc/config/i386/wbnoinvdintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2018-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2018-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/wmmintrin.h b/gcc/config/i386/wmmintrin.h
index 34ddd3e..97e5635 100644
--- a/gcc/config/i386/wmmintrin.h
+++ b/gcc/config/i386/wmmintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2008-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2008-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -38,17 +38,17 @@
 #define __DISABLE_AES__
 #endif /* __AES__ */
 
-/* Performs 1 round of AES decryption of the first m128i using 
+/* Performs 1 round of AES decryption of the first m128i using
    the second m128i as a round key.  */
 #define _mm_aesdec_si128(X, Y) \
   (__m128i) __builtin_ia32_aesdec128 ((__v2di) (X), (__v2di) (Y))
 
-/* Performs the last round of AES decryption of the first m128i 
+/* Performs the last round of AES decryption of the first m128i
    using the second m128i as a round key.  */
 #define _mm_aesdeclast_si128(X, Y) \
   (__m128i) __builtin_ia32_aesdeclast128 ((__v2di) (X), (__v2di) (Y))
 
-/* Performs 1 round of AES encryption of the first m128i using 
+/* Performs 1 round of AES encryption of the first m128i using
    the second m128i as a round key.  */
 #define _mm_aesenc_si128(X, Y) \
   (__m128i) __builtin_ia32_aesenc128 ((__v2di) (X), (__v2di) (Y))
@@ -58,7 +58,7 @@
 #define _mm_aesenclast_si128(X, Y) \
   (__m128i) __builtin_ia32_aesenclast128 ((__v2di) (X), (__v2di) (Y))
 
-/* Performs the InverseMixColumn operation on the source m128i 
+/* Performs the InverseMixColumn operation on the source m128i
    and stores the result into m128i destination.  */
 extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_aesimc_si128 (__m128i __X)
diff --git a/gcc/config/i386/x-mingw32 b/gcc/config/i386/x-mingw32
index 4dc67f1..8900bfc 100644
--- a/gcc/config/i386/x-mingw32
+++ b/gcc/config/i386/x-mingw32
@@ -1,4 +1,4 @@
-# Copyright (C) 2003-2024 Free Software Foundation, Inc.
+# Copyright (C) 2003-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/i386/x-mingw32-utf8 b/gcc/config/i386/x-mingw32-utf8
index 6fa24fe..26302d8 100644
--- a/gcc/config/i386/x-mingw32-utf8
+++ b/gcc/config/i386/x-mingw32-utf8
@@ -1,4 +1,4 @@
-# Copyright (C) 2023-2024 Free Software Foundation, Inc.
+# Copyright (C) 2023-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/i386/x86-64.h b/gcc/config/i386/x86-64.h
index d70f228..45f70d0 100644
--- a/gcc/config/i386/x86-64.h
+++ b/gcc/config/i386/x86-64.h
@@ -1,5 +1,5 @@
 /* OS independent definitions for AMD x86-64.
-   Copyright (C) 2001-2024 Free Software Foundation, Inc.
+   Copyright (C) 2001-2025 Free Software Foundation, Inc.
    Contributed by Bo Thorsen <bo@suse.de>.
 
 This file is part of GCC.
diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
index 2bfaee5..c8603b9 100644
--- a/gcc/config/i386/x86-tune-costs.h
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -1,5 +1,5 @@
 /* Costs of operations of individual x86 CPUs.
-   Copyright (C) 1988-2024 Free Software Foundation, Inc.
+   Copyright (C) 1988-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -37,34 +37,37 @@ static stringop_algs ix86_size_memset[2] = {
 const
 struct processor_costs ix86_size_cost = {/* costs for tuning for size */
   {
-  /* Start of register allocator costs.  integer->integer move cost is 2. */
-  2,				     /* cost for loading QImode using movzbl */
-  {2, 2, 2},				/* cost of loading integer registers
+  /* Start of register allocator costs.  integer->integer move cost is 2
+     and coststs are relative to it.  movl %eax, %ebx is 2 bytes, so the
+     sizes coincides with average size of instruction encoding.  */
+  3,				     /* cost for loading QImode using movzbl */
+  /* Typical load/save from stack frame is 4 bytes with ebp and 5 with esp.  */
+  {5, 6, 5},				/* cost of loading integer registers
 					   in QImode, HImode and SImode.
 					   Relative to reg-reg move (2).  */
-  {2, 2, 2},				/* cost of storing integer registers */
+  {5, 6, 5},				/* cost of storing integer registers */
   2,					/* cost of reg,reg fld/fst */
-  {2, 2, 2},				/* cost of loading fp registers
+  {5, 6, 5},				/* cost of loading fp registers
 					   in SFmode, DFmode and XFmode */
-  {2, 2, 2},				/* cost of storing fp registers
+  {5, 6, 5},				/* cost of storing fp registers
 					   in SFmode, DFmode and XFmode */
   3,					/* cost of moving MMX register */
-  {3, 3},				/* cost of loading MMX registers
+  {6, 6},				/* cost of loading MMX registers
 					   in SImode and DImode */
-  {3, 3},				/* cost of storing MMX registers
+  {6, 6},				/* cost of storing MMX registers
 					   in SImode and DImode */
-  3, 3, 3,				/* cost of moving XMM,YMM,ZMM register */
-  {3, 3, 3, 3, 3},			/* cost of loading SSE registers
+  4, 4, 6,				/* cost of moving XMM,YMM,ZMM register */
+  {6, 6, 6, 6, 11},			/* cost of loading SSE registers
 					   in 32,64,128,256 and 512-bit */
-  {3, 3, 3, 3, 3},			/* cost of storing SSE registers
+  {6, 6, 6, 6, 11},			/* cost of storing SSE registers
 					   in 32,64,128,256 and 512-bit */
-  3, 3,				/* SSE->integer and integer->SSE moves */
-  3, 3,				/* mask->integer and integer->mask moves */
-  {2, 2, 2},				/* cost of loading mask register
+  4, 4,				/* SSE->integer and integer->SSE moves */
+  4, 4,				/* mask->integer and integer->mask moves */
+  {7, 7, 7},				/* cost of loading mask register
 					   in QImode, HImode, SImode.  */
-  {2, 2, 2},				/* cost if storing mask register
+  {7, 7, 7},				/* cost if storing mask register
 					   in QImode, HImode, SImode.  */
-  2,					/* cost of moving mask register.  */
+  4,					/* cost of moving mask register.  */
   /* End of register allocator costs.  */
   },
 
@@ -88,22 +91,25 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */
   0,					/* "large" insn */
   2,					/* MOVE_RATIO */
   2,					/* CLEAR_RATIO */
-  {2, 2, 2},				/* cost of loading integer registers
+  /* These costs are relative to reg-reg move with cost of 2.  Since it has
+     2 bytes, this coincides with average instruction sizes.  */
+  {5, 6, 5},				/* cost of loading integer registers
 					   in QImode, HImode and SImode.
 					   Relative to reg-reg move (2).  */
-  {2, 2, 2},				/* cost of storing integer registers */
-  {3, 3, 3, 3, 3},			/* cost of loading SSE register
+  {5, 6, 5},				/* cost of storing integer registers */
+  {6, 6, 6, 6, 11},			/* cost of loading SSE register
 					   in 32bit, 64bit, 128bit, 256bit and 512bit */
-  {3, 3, 3, 3, 3},			/* cost of storing SSE register
+  {6, 6, 6, 6, 11},			/* cost of storing SSE register
 					   in 32bit, 64bit, 128bit, 256bit and 512bit */
-  {3, 3, 3, 3, 3},			/* cost of unaligned SSE load
+  {6, 6, 6, 6, 11},			/* cost of unaligned SSE load
 					   in 128bit, 256bit and 512bit */
-  {3, 3, 3, 3, 3},			/* cost of unaligned SSE store
+  {6, 6, 6, 6, 11},			/* cost of unaligned SSE store
 					   in 128bit, 256bit and 512bit */
-  3, 3, 3,				/* cost of moving XMM,YMM,ZMM register */
-  3,					/* cost of moving SSE register to integer.  */
-  5, 0,					/* Gather load static, per_elt.  */
-  5, 0,					/* Gather store static, per_elt.  */
+  4, 4, 6,				/* cost of moving XMM,YMM,ZMM register */
+  4,					/* cost of moving SSE register to integer.  */
+  4,					/* cost of moving integer register to SSE.  */
+  COSTS_N_BYTES (5), 0,			/* Gather load static, per_elt.  */
+  COSTS_N_BYTES (5), 0,			/* Gather store static, per_elt.  */
   0,					/* size of l1 cache  */
   0,					/* size of l2 cache  */
   0,					/* size of prefetch block */
@@ -116,16 +122,24 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */
   COSTS_N_BYTES (2),			/* cost of FCHS instruction.  */
   COSTS_N_BYTES (2),			/* cost of FSQRT instruction.  */
 
-  COSTS_N_BYTES (2),			/* cost of cheap SSE instruction.  */
-  COSTS_N_BYTES (2),			/* cost of ADDSS/SD SUBSS/SD insns.  */
-  COSTS_N_BYTES (2),			/* cost of MULSS instruction.  */
-  COSTS_N_BYTES (2),			/* cost of MULSD instruction.  */
-  COSTS_N_BYTES (2),			/* cost of FMA SS instruction.  */
-  COSTS_N_BYTES (2),			/* cost of FMA SD instruction.  */
-  COSTS_N_BYTES (2),			/* cost of DIVSS instruction.  */
-  COSTS_N_BYTES (2),			/* cost of DIVSD instruction.  */
-  COSTS_N_BYTES (2),			/* cost of SQRTSS instruction.  */
-  COSTS_N_BYTES (2),			/* cost of SQRTSD instruction.  */
+  COSTS_N_BYTES (4),			/* cost of cheap SSE instruction.  */
+  COSTS_N_BYTES (4),			/* cost of ADDSS/SD SUBSS/SD insns.  */
+  COSTS_N_BYTES (4),			/* cost of MULSS instruction.  */
+  COSTS_N_BYTES (4),			/* cost of MULSD instruction.  */
+  COSTS_N_BYTES (4),			/* cost of FMA SS instruction.  */
+  COSTS_N_BYTES (4),			/* cost of FMA SD instruction.  */
+  COSTS_N_BYTES (4),			/* cost of DIVSS instruction.  */
+  COSTS_N_BYTES (4),			/* cost of DIVSD instruction.  */
+  COSTS_N_BYTES (4),			/* cost of SQRTSS instruction.  */
+  COSTS_N_BYTES (4),			/* cost of SQRTSD instruction.  */
+  COSTS_N_BYTES (4),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_BYTES (4),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_BYTES (6),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_BYTES (4),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_BYTES (4),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_BYTES (4),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_BYTES (4),			/* cost of CVT(T)PS2PI instruction.  */
+  
   1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
   ix86_size_memcpy,
   ix86_size_memset,
@@ -137,6 +151,7 @@ struct processor_costs ix86_size_cost = {/* costs for tuning for size */
   NULL,					/* Func alignment.  */
   4,					/* Small unroll limit.  */
   2,					/* Small unroll factor.  */
+  COSTS_N_INSNS (2),			/* Branch mispredict scale.  */
 };
 
 /* Processor costs (relative to an add) */
@@ -213,6 +228,7 @@ struct processor_costs i386_cost = {	/* 386 specific costs */
   {4, 8, 16, 32, 64},			/* cost of unaligned stores.  */
   2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
   3,					/* cost of moving SSE register to integer.  */
+  3,					/* cost of moving integer register to SSE.  */
   4, 4,					/* Gather load static, per_elt.  */
   4, 4,					/* Gather store static, per_elt.  */
   0,					/* size of l1 cache  */
@@ -237,6 +253,13 @@ struct processor_costs i386_cost = {	/* 386 specific costs */
   COSTS_N_INSNS (88),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (122),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (122),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (27),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (54),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (108),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (27),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (27),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (27),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (27),			/* cost of CVT(T)PS2PI instruction.  */
   1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
   i386_memcpy,
   i386_memset,
@@ -248,6 +271,7 @@ struct processor_costs i386_cost = {	/* 386 specific costs */
   "4",					/* Func alignment.  */
   4,					/* Small unroll limit.  */
   2,					/* Small unroll factor.  */
+  COSTS_N_INSNS (2),			/* Branch mispredict scale.  */
 };
 
 static stringop_algs i486_memcpy[2] = {
@@ -323,6 +347,7 @@ struct processor_costs i486_cost = {	/* 486 specific costs */
   {4, 8, 16, 32, 64},			/* cost of unaligned stores.  */
   2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
   3,					/* cost of moving SSE register to integer.  */
+  3,					/* cost of moving integer register to SSE.  */
   4, 4,					/* Gather load static, per_elt.  */
   4, 4,					/* Gather store static, per_elt.  */
   4,					/* size of l1 cache.  486 has 8kB cache
@@ -349,6 +374,13 @@ struct processor_costs i486_cost = {	/* 486 specific costs */
   COSTS_N_INSNS (74),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (83),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (83),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (8),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (16),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (32),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (27),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (27),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (27),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (27),			/* cost of CVT(T)PS2PI instruction.  */
   1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
   i486_memcpy,
   i486_memset,
@@ -360,6 +392,7 @@ struct processor_costs i486_cost = {	/* 486 specific costs */
   "16",					/* Func alignment.  */
   4,					/* Small unroll limit.  */
   2,					/* Small unroll factor.  */
+  COSTS_N_INSNS (2),			/* Branch mispredict scale.  */
 };
 
 static stringop_algs pentium_memcpy[2] = {
@@ -435,6 +468,7 @@ struct processor_costs pentium_cost = {
   {4, 8, 16, 32, 64},			/* cost of unaligned stores.  */
   2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
   3,					/* cost of moving SSE register to integer.  */
+  3,					/* cost of moving integer register to SSE.  */
   4, 4,					/* Gather load static, per_elt.  */
   4, 4,					/* Gather store static, per_elt.  */
   8,					/* size of l1 cache.  */
@@ -459,6 +493,13 @@ struct processor_costs pentium_cost = {
   COSTS_N_INSNS (39),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (70),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (70),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (6),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (12),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (3),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVT(T)PS2PI instruction.  */
   1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
   pentium_memcpy,
   pentium_memset,
@@ -470,6 +511,7 @@ struct processor_costs pentium_cost = {
   "16",					/* Func alignment.  */
   4,					/* Small unroll limit.  */
   2,					/* Small unroll factor.  */
+  COSTS_N_INSNS (2),			/* Branch mispredict scale.  */
 };
 
 static const
@@ -538,6 +580,7 @@ struct processor_costs lakemont_cost = {
   {4, 8, 16, 32, 64},			/* cost of unaligned stores.  */
   2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
   3,					/* cost of moving SSE register to integer.  */
+  3,					/* cost of moving integer register to SSE.  */
   4, 4,					/* Gather load static, per_elt.  */
   4, 4,					/* Gather store static, per_elt.  */
   8,					/* size of l1 cache.  */
@@ -562,6 +605,13 @@ struct processor_costs lakemont_cost = {
   COSTS_N_INSNS (60),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (31),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (63),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (5),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (10),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (20),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (5),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (5),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (5),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (5),			/* cost of CVT(T)PS2PI instruction.  */
   1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
   pentium_memcpy,
   pentium_memset,
@@ -573,6 +623,7 @@ struct processor_costs lakemont_cost = {
   "16",					/* Func alignment.  */
   4,					/* Small unroll limit.  */
   2,					/* Small unroll factor.  */
+  COSTS_N_INSNS (2),			/* Branch mispredict scale.  */
 };
 
 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
@@ -656,6 +707,7 @@ struct processor_costs pentiumpro_cost = {
   {4, 8, 16, 32, 64},			/* cost of unaligned stores.  */
   2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
   3,					/* cost of moving SSE register to integer.  */
+  3,					/* cost of moving integer register to SSE.  */
   4, 4,					/* Gather load static, per_elt.  */
   4, 4,					/* Gather store static, per_elt.  */
   8,					/* size of l1 cache.  */
@@ -680,6 +732,13 @@ struct processor_costs pentiumpro_cost = {
   COSTS_N_INSNS (18),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (31),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (31),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (6),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (12),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (3),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVT(T)PS2PI instruction.  */
   1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
   pentiumpro_memcpy,
   pentiumpro_memset,
@@ -691,6 +750,7 @@ struct processor_costs pentiumpro_cost = {
   "16",					/* Func alignment.  */
   4,					/* Small unroll limit.  */
   2,					/* Small unroll factor.  */
+  COSTS_N_INSNS (2),			/* Branch mispredict scale.  */
 };
 
 static stringop_algs geode_memcpy[2] = {
@@ -765,6 +825,7 @@ struct processor_costs geode_cost = {
   {2, 2, 8, 16, 32},			/* cost of unaligned stores.  */
   2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
   6,					/* cost of moving SSE register to integer.  */
+  6,					/* cost of moving integer register to SSE.  */
   2, 2,					/* Gather load static, per_elt.  */
   2, 2,					/* Gather store static, per_elt.  */
   64,					/* size of l1 cache.  */
@@ -789,6 +850,13 @@ struct processor_costs geode_cost = {
   COSTS_N_INSNS (47),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (54),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (54),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (12),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (24),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (6),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVT(T)PS2PI instruction.  */
   1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
   geode_memcpy,
   geode_memset,
@@ -800,6 +868,7 @@ struct processor_costs geode_cost = {
   NULL,					/* Func alignment.  */
   4,					/* Small unroll limit.  */
   2,					/* Small unroll factor.  */
+  COSTS_N_INSNS (2),			/* Branch mispredict scale.  */
 };
 
 static stringop_algs k6_memcpy[2] = {
@@ -874,6 +943,7 @@ struct processor_costs k6_cost = {
   {2, 2, 8, 16, 32},			/* cost of unaligned stores.  */
   2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
   6,					/* cost of moving SSE register to integer.  */
+  6,					/* cost of moving integer register to SSE.  */
   2, 2,					/* Gather load static, per_elt.  */
   2, 2,					/* Gather store static, per_elt.  */
   32,					/* size of l1 cache.  */
@@ -901,6 +971,13 @@ struct processor_costs k6_cost = {
   COSTS_N_INSNS (56),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (56),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (56),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (2),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (4),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (8),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (2),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (2),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (2),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (2),			/* cost of CVT(T)PS2PI instruction.  */
   1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
   k6_memcpy,
   k6_memset,
@@ -912,6 +989,7 @@ struct processor_costs k6_cost = {
   "32",					/* Func alignment.  */
   4,					/* Small unroll limit.  */
   2,					/* Small unroll factor.  */
+  COSTS_N_INSNS (2),			/* Branch mispredict scale.  */
 };
 
 /* For some reason, Athlon deals better with REP prefix (relative to loops)
@@ -989,6 +1067,7 @@ struct processor_costs athlon_cost = {
   {4, 4, 10, 10, 20},			/* cost of unaligned stores.  */
   2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
   5,					/* cost of moving SSE register to integer.  */
+  5,					/* cost of moving integer register to SSE.  */
   4, 4,					/* Gather load static, per_elt.  */
   4, 4,					/* Gather store static, per_elt.  */
   64,					/* size of l1 cache.  */
@@ -1014,6 +1093,13 @@ struct processor_costs athlon_cost = {
   COSTS_N_INSNS (24),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (19),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (19),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (8),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (16),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (4),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVT(T)PS2PI instruction.  */
   1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
   athlon_memcpy,
   athlon_memset,
@@ -1025,6 +1111,7 @@ struct processor_costs athlon_cost = {
   "16",					/* Func alignment.  */
   4,					/* Small unroll limit.  */
   2,					/* Small unroll factor.  */
+  COSTS_N_INSNS (2),			/* Branch mispredict scale.  */
 };
 
 /* K8 has optimized REP instruction for medium sized blocks, but for very
@@ -1106,6 +1193,7 @@ struct processor_costs k8_cost = {
   {4, 4, 10, 10, 20},			/* cost of unaligned stores.  */
   2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
   5,					/* cost of moving SSE register to integer.  */
+  5,					/* cost of moving integer register to SSE.  */
   4, 4,					/* Gather load static, per_elt.  */
   4, 4,					/* Gather store static, per_elt.  */
   64,					/* size of l1 cache.  */
@@ -1136,6 +1224,13 @@ struct processor_costs k8_cost = {
   COSTS_N_INSNS (20),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (19),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (27),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (8),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (16),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (14),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (10),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (5),			/* cost of CVT(T)PS2PI instruction.  */
   1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
   k8_memcpy,
   k8_memset,
@@ -1147,6 +1242,7 @@ struct processor_costs k8_cost = {
   "16",					/* Func alignment.  */
   4,					/* Small unroll limit.  */
   2,					/* Small unroll factor.  */
+  COSTS_N_INSNS (2),			/* Branch mispredict scale.  */
 };
 
 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
@@ -1236,6 +1332,7 @@ struct processor_costs amdfam10_cost = {
   {4, 4, 5, 10, 20},			/* cost of unaligned stores.  */
   2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
   3,					/* cost of moving SSE register to integer.  */
+  3,					/* cost of moving integer register to SSE.  */
   4, 4,					/* Gather load static, per_elt.  */
   4, 4,					/* Gather store static, per_elt.  */
   64,					/* size of l1 cache.  */
@@ -1266,6 +1363,13 @@ struct processor_costs amdfam10_cost = {
   COSTS_N_INSNS (20),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (19),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (27),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (8),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (16),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (14),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (8),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (7),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVT(T)PS2PI instruction.  */
   1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
   amdfam10_memcpy,
   amdfam10_memset,
@@ -1277,6 +1381,7 @@ struct processor_costs amdfam10_cost = {
   "32",					/* Func alignment.  */
   4,					/* Small unroll limit.  */
   2,					/* Small unroll factor.  */
+  COSTS_N_INSNS (2),			/* Branch mispredict scale.  */
 };
 
 /*  BDVER has optimized REP instruction for medium sized blocks, but for
@@ -1358,6 +1463,7 @@ const struct processor_costs bdver_cost = {
   {10, 10, 10, 40, 60},			/* cost of unaligned stores.  */
   2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
   16,					/* cost of moving SSE register to integer.  */
+  16,					/* cost of moving integer register to SSE.  */
   12, 12,				/* Gather load static, per_elt.  */
   10, 10,				/* Gather store static, per_elt.  */
   16,					/* size of l1 cache.  */
@@ -1389,6 +1495,13 @@ const struct processor_costs bdver_cost = {
   COSTS_N_INSNS (27),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (15),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (26),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (7),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (14),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (14),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (13),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVT(T)PS2PI instruction.  */
   1, 2, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
   bdver_memcpy,
   bdver_memset,
@@ -1400,6 +1513,7 @@ const struct processor_costs bdver_cost = {
   "11",					/* Func alignment.  */
   4,					/* Small unroll limit.  */
   2,					/* Small unroll factor.  */
+  COSTS_N_INSNS (2),			/* Branch mispredict scale.  */
 };
 
 
@@ -1501,6 +1615,7 @@ struct processor_costs znver1_cost = {
   {8, 8, 8, 16, 32},			/* cost of unaligned stores.  */
   2, 3, 6,				/* cost of moving XMM,YMM,ZMM register.  */
   6,					/* cost of moving SSE register to integer.  */
+  6,					/* cost of moving integer register to SSE.  */
   /* VGATHERDPD is 23 uops and throughput is 9, VGATHERDPD is 35 uops,
      throughput 12.  Approx 9 uops do not depend on vector size and every load
      is 7 uops.  */
@@ -1536,6 +1651,14 @@ struct processor_costs znver1_cost = {
   COSTS_N_INSNS (13),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (10),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (15),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTSS2SD etc.  */
+  /* Real latency is 4, but for split regs multiply cost of half op by 2.  */
+  COSTS_N_INSNS (6),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (12),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (8),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (7),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVT(T)PS2PI instruction.  */
   /* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles
      and it can execute 2 integer additions and 2 multiplications thus
      reassociation may make sense up to with of 6.  SPEC2k6 bencharks suggests
@@ -1555,6 +1678,7 @@ struct processor_costs znver1_cost = {
   "16",					/* Func alignment.  */
   4,					/* Small unroll limit.  */
   2,					/* Small unroll factor.  */
+  COSTS_N_INSNS (2),			/* Branch mispredict scale.  */
 };
 
 /*  ZNVER2 has optimized REP instruction for medium sized blocks, but for
@@ -1659,6 +1783,7 @@ struct processor_costs znver2_cost = {
   2, 2, 3,				/* cost of moving XMM,YMM,ZMM
 					   register.  */
   6,					/* cost of moving SSE register to integer.  */
+  6,					/* cost of moving integer register to SSE.  */
   /* VGATHERDPD is 23 uops and throughput is 9, VGATHERDPD is 35 uops,
      throughput 12.  Approx 9 uops do not depend on vector size and every load
      is 7 uops.  */
@@ -1694,6 +1819,13 @@ struct processor_costs znver2_cost = {
   COSTS_N_INSNS (13),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (10),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (15),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (5),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (10),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (7),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVT(T)PS2PI instruction.  */
   /* Zen can execute 4 integer operations per cycle.  FP operations
      take 3 cycles and it can execute 2 integer additions and 2
      multiplications thus reassociation may make sense up to with of 6.
@@ -1714,6 +1846,7 @@ struct processor_costs znver2_cost = {
   "16",					/* Func alignment.  */
   4,					/* Small unroll limit.  */
   2,					/* Small unroll factor.  */
+  COSTS_N_INSNS (2),			/* Branch mispredict scale.  */
 };
 
 struct processor_costs znver3_cost = {
@@ -1793,6 +1926,7 @@ struct processor_costs znver3_cost = {
   2, 2, 3,				/* cost of moving XMM,YMM,ZMM
 					   register.  */
   6,					/* cost of moving SSE register to integer.  */
+  6,					/* cost of moving integer register to SSE.  */
   /* VGATHERDPD is 15 uops and throughput is 4, VGATHERDPS is 23 uops,
      throughput 9.  Approx 7 uops do not depend on vector size and every load
      is 4 uops.  */
@@ -1828,6 +1962,13 @@ struct processor_costs znver3_cost = {
   COSTS_N_INSNS (13),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (10),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (15),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (5),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (10),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (6),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVT(T)PS2PI instruction.  */
   /* Zen can execute 4 integer operations per cycle.  FP operations
      take 3 cycles and it can execute 2 integer additions and 2
      multiplications thus reassociation may make sense up to with of 6.
@@ -1848,6 +1989,7 @@ struct processor_costs znver3_cost = {
   "16",					/* Func alignment.  */
   4,					/* Small unroll limit.  */
   2,					/* Small unroll factor.  */
+  COSTS_N_INSNS (2),			/* Branch mispredict scale.  */
 };
 
 /* This table currently replicates znver3_cost table. */
@@ -1929,6 +2071,7 @@ struct processor_costs znver4_cost = {
   2, 2, 2,				/* cost of moving XMM,YMM,ZMM
 					   register.  */
   6,					/* cost of moving SSE register to integer.  */
+  6,					/* cost of moving integer register to SSE.  */
   /* VGATHERDPD is 17 uops and throughput is 4, VGATHERDPS is 24 uops,
      throughput 5.  Approx 7 uops do not depend on vector size and every load
      is 5 uops.  */
@@ -1964,6 +2107,14 @@ struct processor_costs znver4_cost = {
   COSTS_N_INSNS (13),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (15),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (21),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (5),			/* cost of 256bit VCVTPS2PD etc.  */
+  /* Real latency is 6, but for split regs multiply cost of half op by 2.  */
+  COSTS_N_INSNS (10),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (6),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVT(T)PS2PI instruction.  */
   /* Zen can execute 4 integer operations per cycle.  FP operations
      take 3 cycles and it can execute 2 integer additions and 2
      multiplications thus reassociation may make sense up to with of 6.
@@ -1984,6 +2135,7 @@ struct processor_costs znver4_cost = {
   "16",					/* Func alignment.  */
   4,					/* Small unroll limit.  */
   2,					/* Small unroll factor.  */
+  COSTS_N_INSNS (2),			/* Branch mispredict scale.  */
 };
 
 /* This table currently replicates znver4_cost table. */
@@ -2034,6 +2186,7 @@ struct processor_costs znver5_cost = {
   COSTS_N_INSNS (1),			/* cost of a lea instruction.  */
   COSTS_N_INSNS (1),			/* variable shift costs.  */
   COSTS_N_INSNS (1),			/* constant shift costs.  */
+  /* mul has latency 3, executes in 3 integer units.  */
   {COSTS_N_INSNS (3),			/* cost of starting multiply for QI.  */
    COSTS_N_INSNS (3),			/* 				 HI.  */
    COSTS_N_INSNS (3),			/*				 SI.  */
@@ -2041,6 +2194,8 @@ struct processor_costs znver5_cost = {
    COSTS_N_INSNS (3)},			/*			other.  */
   0,					/* cost of multiply per each bit
 					   set.  */
+  /* integer divide has latency of 8 cycles
+     plus 1 for every 9 bits of quotient.  */
   {COSTS_N_INSNS (10),			/* cost of a divide/mod for QI.  */
    COSTS_N_INSNS (11),			/* 			    HI.  */
    COSTS_N_INSNS (13),			/*			    SI.  */
@@ -2048,7 +2203,7 @@ struct processor_costs znver5_cost = {
    COSTS_N_INSNS (16)},			/*			    other.  */
   COSTS_N_INSNS (1),			/* cost of movsx.  */
   COSTS_N_INSNS (1),			/* cost of movzx.  */
-  8,					/* "large" insn.  */
+  15,					/* "large" insn.  */
   9,					/* MOVE_RATIO.  */
   6,					/* CLEAR_RATIO */
   {6, 6, 6},				/* cost of loading integer registers
@@ -2065,12 +2220,14 @@ struct processor_costs znver5_cost = {
   2, 2, 2,				/* cost of moving XMM,YMM,ZMM
 					   register.  */
   6,					/* cost of moving SSE register to integer.  */
-  /* VGATHERDPD is 17 uops and throughput is 4, VGATHERDPS is 24 uops,
-     throughput 5.  Approx 7 uops do not depend on vector size and every load
-     is 5 uops.  */
+  6,					/* cost of moving integer register to SSE.  */
+
+  /* TODO: gather and scatter instructions are currently disabled in
+     x86-tune.def.  In some cases they are however a win, see PR116582
+     We however need good cost model for them.  */
   14, 10,				/* Gather load static, per_elt.  */
   14, 20,				/* Gather store static, per_elt.  */
-  32,					/* size of l1 cache.  */
+  48,					/* size of l1 cache.  */
   1024,					/* size of l2 cache.  */
   64,					/* size of prefetch block.  */
   /* New AMD processors never drop prefetches; if they cannot be performed
@@ -2080,6 +2237,8 @@ struct processor_costs znver5_cost = {
      time).  */
   100,					/* number of parallel prefetches.  */
   3,					/* Branch cost.  */
+  /* TODO x87 latencies are still based on znver4.
+     Probably not very important these days.  */
   COSTS_N_INSNS (7),			/* cost of FADD and FSUB insns.  */
   COSTS_N_INSNS (7),			/* cost of FMUL instruction.  */
   /* Latency of fdiv is 8-15.  */
@@ -2089,27 +2248,45 @@ struct processor_costs znver5_cost = {
   /* Latency of fsqrt is 4-10.  */
   COSTS_N_INSNS (25),			/* cost of FSQRT instruction.  */
 
+  /* SSE instructions have typical throughput 4 and latency 1.  */
   COSTS_N_INSNS (1),			/* cost of cheap SSE instruction.  */
-  COSTS_N_INSNS (3),			/* cost of ADDSS/SD SUBSS/SD insns.  */
+  /* ADDSS has throughput 2 and latency 2
+     (in some cases when source is another addition).  */
+  COSTS_N_INSNS (2),			/* cost of ADDSS/SD SUBSS/SD insns.  */
+  /* MULSS has throughput 2 and latency 3.  */
   COSTS_N_INSNS (3),			/* cost of MULSS instruction.  */
   COSTS_N_INSNS (3),			/* cost of MULSD instruction.  */
+  /* FMA had throughput 2 and latency 4.  */
   COSTS_N_INSNS (4),			/* cost of FMA SS instruction.  */
   COSTS_N_INSNS (4),			/* cost of FMA SD instruction.  */
+  /* DIVSS has throughtput 0.4 and latency 10.  */
   COSTS_N_INSNS (10),			/* cost of DIVSS instruction.  */
-  /* 9-13.  */
+  /* DIVSD has throughtput 0.25 and latency 13.  */
   COSTS_N_INSNS (13),			/* cost of DIVSD instruction.  */
+  /* DIVSD has throughtput 0.22 and latency 14.  */
   COSTS_N_INSNS (14),			/* cost of SQRTSS instruction.  */
+  /* DIVSD has throughtput 0.13 and latency 20.  */
   COSTS_N_INSNS (20),			/* cost of SQRTSD instruction.  */
-  /* Zen can execute 4 integer operations per cycle.  FP operations
-     take 3 cycles and it can execute 2 integer additions and 2
-     multiplications thus reassociation may make sense up to with of 6.
-     SPEC2k6 bencharks suggests
-     that 4 works better than 6 probably due to register pressure.
-
-     Integer vector operations are taken by FP unit and execute 3 vector
-     plus/minus operations per cycle but only one multiply.  This is adjusted
-     in ix86_reassociation_width.  */
-  4, 4, 3, 6,				/* reassoc int, fp, vec_int, vec_fp.  */
+  COSTS_N_INSNS (3),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (5),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (5),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (6),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVT(T)PS2PI instruction.  */
+  /* Zen5 can execute:
+      - integer ops: 6 per cycle, at most 3 multiplications.
+	latency 1 for additions, 3 for multiplications (pipelined)
+
+	Setting width of 9 for multiplication is probably excessive
+	for register pressure.
+      - fp ops: 2 additions per cycle, latency 2-3
+		2 multiplicaitons per cycle, latency 3
+      - vector intger ops: 4 additions, latency 1
+			   2 multiplications, latency 4
+	We increase width to 6 for multiplications
+	in ix86_reassociation_width.  */
+  6, 6, 4, 6,				/* reassoc int, fp, vec_int, vec_fp.  */
   znver2_memcpy,
   znver2_memset,
   COSTS_N_INSNS (4),			/* cond_taken_branch_cost.  */
@@ -2120,6 +2297,7 @@ struct processor_costs znver5_cost = {
   "16",					/* Func alignment.  */
   4,					/* Small unroll limit.  */
   2,					/* Small unroll factor.  */
+  COSTS_N_INSNS (2),			/* Branch mispredict scale.  */
 };
 
 /* skylake_cost should produce code tuned for Skylake familly of CPUs.  */
@@ -2211,6 +2389,7 @@ struct processor_costs skylake_cost = {
   {8, 8, 8, 8, 16},			/* cost of unaligned stores.  */
   2, 2, 4,				/* cost of moving XMM,YMM,ZMM register */
   6,					/* cost of moving SSE register to integer.  */
+  6,					/* cost of moving integer register to SSE.  */
   20, 8,				/* Gather load static, per_elt.  */
   22, 10,				/* Gather store static, per_elt.  */
   64,					/* size of l1 cache.  */
@@ -2235,6 +2414,13 @@ struct processor_costs skylake_cost = {
   COSTS_N_INSNS (14),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (12),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (18),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (2),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (2),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (4),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (6),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (7),			/* cost of CVT(T)PS2PI instruction.  */
   1, 4, 2, 2,				/* reassoc int, fp, vec_int, vec_fp.  */
   skylake_memcpy,
   skylake_memset,
@@ -2246,6 +2432,7 @@ struct processor_costs skylake_cost = {
   "16",					/* Func alignment.  */
   4,					/* Small unroll limit.  */
   2,					/* Small unroll factor.  */
+  COSTS_N_INSNS (2),			/* Branch mispredict scale.  */
 };
 
 /* icelake_cost should produce code tuned for Icelake family of CPUs.
@@ -2339,6 +2526,7 @@ struct processor_costs icelake_cost = {
   {8, 8, 8, 8, 16},			/* cost of unaligned stores.  */
   2, 2, 4,				/* cost of moving XMM,YMM,ZMM register */
   6,					/* cost of moving SSE register to integer.  */
+  6,					/* cost of moving integer register to SSE.  */
   20, 8,				/* Gather load static, per_elt.  */
   22, 10,				/* Gather store static, per_elt.  */
   64,					/* size of l1 cache.  */
@@ -2363,6 +2551,13 @@ struct processor_costs icelake_cost = {
   COSTS_N_INSNS (14),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (12),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (18),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (2),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (2),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (2),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (6),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (7),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVT(T)PS2PI instruction.  */
   1, 4, 2, 2,				/* reassoc int, fp, vec_int, vec_fp.  */
   icelake_memcpy,
   icelake_memset,
@@ -2374,6 +2569,7 @@ struct processor_costs icelake_cost = {
   "16",					/* Func alignment.  */
   4,					/* Small unroll limit.  */
   2,					/* Small unroll factor.  */
+  COSTS_N_INSNS (2) + 3,		/* Branch mispredict scale.  */
 };
 
 /* alderlake_cost should produce code tuned for alderlake family of CPUs.  */
@@ -2461,6 +2657,7 @@ struct processor_costs alderlake_cost = {
   {8, 8, 8, 10, 15},			/* cost of unaligned storess.  */
   2, 3, 4,				/* cost of moving XMM,YMM,ZMM register */
   6,					/* cost of moving SSE register to integer.  */
+  6,					/* cost of moving integer register to SSE.  */
   18, 6,				/* Gather load static, per_elt.  */
   18, 6,				/* Gather store static, per_elt.  */
   32,					/* size of l1 cache.  */
@@ -2485,6 +2682,13 @@ struct processor_costs alderlake_cost = {
   COSTS_N_INSNS (17),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (14),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (18),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (2),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (2),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (2),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (6),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (7),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVT(T)PS2PI instruction.  */
   1, 4, 3, 3,				/* reassoc int, fp, vec_int, vec_fp.  */
   alderlake_memcpy,
   alderlake_memset,
@@ -2496,6 +2700,7 @@ struct processor_costs alderlake_cost = {
   "16",					/* Func alignment.  */
   4,					/* Small unroll limit.  */
   2,					/* Small unroll factor.  */
+  COSTS_N_INSNS (2) + 3,		/* Branch mispredict scale.  */
 };
 
   /* BTVER1 has optimized REP instruction for medium sized blocks, but for
@@ -2576,6 +2781,7 @@ const struct processor_costs btver1_cost = {
   {10, 10, 12, 48, 96},			/* cost of unaligned stores.  */
   2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
   14,					/* cost of moving SSE register to integer.  */
+  14,					/* cost of moving integer register to SSE.  */
   10, 10,				/* Gather load static, per_elt.  */
   10, 10,				/* Gather store static, per_elt.  */
   32,					/* size of l1 cache.  */
@@ -2600,6 +2806,13 @@ const struct processor_costs btver1_cost = {
   COSTS_N_INSNS (17),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (14),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (48),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (7),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (14),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (14),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (13),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVT(T)PS2PI instruction.  */
   1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
   btver1_memcpy,
   btver1_memset,
@@ -2611,6 +2824,7 @@ const struct processor_costs btver1_cost = {
   "11",					/* Func alignment.  */
   4,					/* Small unroll limit.  */
   2,					/* Small unroll factor.  */
+  COSTS_N_INSNS (2),			/* Branch mispredict scale.  */
 };
 
 static stringop_algs btver2_memcpy[2] = {
@@ -2688,6 +2902,7 @@ const struct processor_costs btver2_cost = {
   {10, 10, 12, 48, 96},			/* cost of unaligned stores.  */
   2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
   14,					/* cost of moving SSE register to integer.  */
+  14,					/* cost of moving integer register to SSE.  */
   10, 10,				/* Gather load static, per_elt.  */
   10, 10,				/* Gather store static, per_elt.  */
   32,					/* size of l1 cache.  */
@@ -2712,6 +2927,13 @@ const struct processor_costs btver2_cost = {
   COSTS_N_INSNS (19),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (16),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (21),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (7),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (14),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (14),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (13),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVT(T)PS2PI instruction.  */
   1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
   btver2_memcpy,
   btver2_memset,
@@ -2723,6 +2945,7 @@ const struct processor_costs btver2_cost = {
   "11",					/* Func alignment.  */
   4,					/* Small unroll limit.  */
   2,					/* Small unroll factor.  */
+  COSTS_N_INSNS (2),			/* Branch mispredict scale.  */
 };
 
 static stringop_algs pentium4_memcpy[2] = {
@@ -2799,6 +3022,7 @@ struct processor_costs pentium4_cost = {
   {32, 32, 32, 64, 128},		/* cost of unaligned stores.  */
   12, 24, 48,				/* cost of moving XMM,YMM,ZMM register */
   20,					/* cost of moving SSE register to integer.  */
+  20,					/* cost of moving integer register to SSE.  */
   16, 16,				/* Gather load static, per_elt.  */
   16, 16,				/* Gather store static, per_elt.  */
   8,					/* size of l1 cache.  */
@@ -2823,6 +3047,13 @@ struct processor_costs pentium4_cost = {
   COSTS_N_INSNS (38),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (23),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (38),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (10),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (20),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (40),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (20),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (17),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (12),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (8),			/* cost of CVT(T)PS2PI instruction.  */
   1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
   pentium4_memcpy,
   pentium4_memset,
@@ -2834,6 +3065,7 @@ struct processor_costs pentium4_cost = {
   NULL,					/* Func alignment.  */
   4,					/* Small unroll limit.  */
   2,					/* Small unroll factor.  */
+  COSTS_N_INSNS (2),			/* Branch mispredict scale.  */
 };
 
 static stringop_algs nocona_memcpy[2] = {
@@ -2913,6 +3145,7 @@ struct processor_costs nocona_cost = {
   {24, 24, 24, 48, 96},			/* cost of unaligned stores.  */
   6, 12, 24,				/* cost of moving XMM,YMM,ZMM register */
   20,					/* cost of moving SSE register to integer.  */
+  20,					/* cost of moving integer register to SSE.  */
   12, 12,				/* Gather load static, per_elt.  */
   12, 12,				/* Gather store static, per_elt.  */
   8,					/* size of l1 cache.  */
@@ -2937,6 +3170,13 @@ struct processor_costs nocona_cost = {
   COSTS_N_INSNS (40),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (32),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (41),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (10),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (20),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (40),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (20),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (17),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (12),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (8),			/* cost of CVT(T)PS2PI instruction.  */
   1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
   nocona_memcpy,
   nocona_memset,
@@ -2948,6 +3188,7 @@ struct processor_costs nocona_cost = {
   NULL,					/* Func alignment.  */
   4,					/* Small unroll limit.  */
   2,					/* Small unroll factor.  */
+  COSTS_N_INSNS (2),			/* Branch mispredict scale.  */
 };
 
 static stringop_algs atom_memcpy[2] = {
@@ -3025,6 +3266,7 @@ struct processor_costs atom_cost = {
   {16, 16, 16, 32, 64},			/* cost of unaligned stores.  */
   2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
   8,					/* cost of moving SSE register to integer.  */
+  8,					/* cost of moving integer register to SSE.  */
   8, 8,					/* Gather load static, per_elt.  */
   8, 8,					/* Gather store static, per_elt.  */
   32,					/* size of l1 cache.  */
@@ -3049,6 +3291,13 @@ struct processor_costs atom_cost = {
   COSTS_N_INSNS (60),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (31),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (63),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (12),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (24),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (7),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (10),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVT(T)PS2PI instruction.  */
   2, 2, 2, 2,				/* reassoc int, fp, vec_int, vec_fp.  */
   atom_memcpy,
   atom_memset,
@@ -3060,6 +3309,7 @@ struct processor_costs atom_cost = {
   "16",					/* Func alignment.  */
   4,					/* Small unroll limit.  */
   2,					/* Small unroll factor.  */
+  COSTS_N_INSNS (2),			/* Branch mispredict scale.  */
 };
 
 static stringop_algs slm_memcpy[2] = {
@@ -3137,6 +3387,7 @@ struct processor_costs slm_cost = {
   {16, 16, 16, 32, 64},			/* cost of unaligned stores.  */
   2, 4, 8,				/* cost of moving XMM,YMM,ZMM register */
   8,					/* cost of moving SSE register to integer.  */
+  8,					/* cost of moving integer register to SSE.  */
   8, 8,					/* Gather load static, per_elt.  */
   8, 8,					/* Gather store static, per_elt.  */
   32,					/* size of l1 cache.  */
@@ -3161,6 +3412,13 @@ struct processor_costs slm_cost = {
   COSTS_N_INSNS (69),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (20),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (35),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (6),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (12),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (5),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (5),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVT(T)PS2PI instruction.  */
   1, 2, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
   slm_memcpy,
   slm_memset,
@@ -3172,6 +3430,7 @@ struct processor_costs slm_cost = {
   "16",					/* Func alignment.  */
   4,					/* Small unroll limit.  */
   2,					/* Small unroll factor.  */
+  COSTS_N_INSNS (2),			/* Branch mispredict scale.  */
 };
 
 static stringop_algs tremont_memcpy[2] = {
@@ -3261,6 +3520,7 @@ struct processor_costs tremont_cost = {
   {6, 6, 6, 10, 15},			/* cost of unaligned storess.  */
   2, 3, 4,				/* cost of moving XMM,YMM,ZMM register */
   6,					/* cost of moving SSE register to integer.  */
+  6,					/* cost of moving integer register to SSE.  */
   18, 6,				/* Gather load static, per_elt.  */
   18, 6,				/* Gather store static, per_elt.  */
   32,					/* size of l1 cache.  */
@@ -3287,6 +3547,13 @@ struct processor_costs tremont_cost = {
   COSTS_N_INSNS (17),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (14),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (18),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (6),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (12),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (6),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (4),			/* cost of CVT(T)PS2PI instruction.  */
   1, 4, 3, 3,				/* reassoc int, fp, vec_int, vec_fp.  */
   tremont_memcpy,
   tremont_memset,
@@ -3298,118 +3565,7 @@ struct processor_costs tremont_cost = {
   "16",					/* Func alignment.  */
   4,					/* Small unroll limit.  */
   2,					/* Small unroll factor.  */
-};
-
-static stringop_algs intel_memcpy[2] = {
-  {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
-  {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
-             {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
-static stringop_algs intel_memset[2] = {
-  {libcall, {{8, loop, false}, {15, unrolled_loop, false},
-             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
-  {libcall, {{24, loop, false}, {32, unrolled_loop, false},
-             {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
-static const
-struct processor_costs intel_cost = {
-  {
-  /* Start of register allocator costs.  integer->integer move cost is 2. */
-  6,				     /* cost for loading QImode using movzbl */
-  {4, 4, 4},				/* cost of loading integer registers
-					   in QImode, HImode and SImode.
-					   Relative to reg-reg move (2).  */
-  {6, 6, 6},				/* cost of storing integer registers */
-  2,					/* cost of reg,reg fld/fst */
-  {6, 6, 8},				/* cost of loading fp registers
-					   in SFmode, DFmode and XFmode */
-  {6, 6, 10},				/* cost of storing fp registers
-					   in SFmode, DFmode and XFmode */
-  2,					/* cost of moving MMX register */
-  {6, 6},				/* cost of loading MMX registers
-					   in SImode and DImode */
-  {6, 6},				/* cost of storing MMX registers
-					   in SImode and DImode */
-  2, 2, 2,				/* cost of moving XMM,YMM,ZMM register */
-  {6, 6, 6, 6, 6},			/* cost of loading SSE registers
-					   in 32,64,128,256 and 512-bit */
-  {6, 6, 6, 6, 6},			/* cost of storing SSE registers
-					   in 32,64,128,256 and 512-bit */
-  4, 4,				/* SSE->integer and integer->SSE moves */
-  4, 4,				/* mask->integer and integer->mask moves */
-  {4, 4, 4},				/* cost of loading mask register
-					   in QImode, HImode, SImode.  */
-  {6, 6, 6},				/* cost if storing mask register
-					   in QImode, HImode, SImode.  */
-  2,					/* cost of moving mask register.  */
-  /* End of register allocator costs.  */
-  },
-
-  COSTS_N_INSNS (1),			/* cost of an add instruction */
-  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
-  COSTS_N_INSNS (1),			/* variable shift costs */
-  COSTS_N_INSNS (1),			/* constant shift costs */
-  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
-   COSTS_N_INSNS (3),			/*				 HI */
-   COSTS_N_INSNS (3),			/*				 SI */
-   COSTS_N_INSNS (4),			/*				 DI */
-   COSTS_N_INSNS (2)},			/*			      other */
-  0,					/* cost of multiply per each bit set */
-  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
-   COSTS_N_INSNS (26),			/*			    HI */
-   COSTS_N_INSNS (42),			/*			    SI */
-   COSTS_N_INSNS (74),			/*			    DI */
-   COSTS_N_INSNS (74)},			/*			    other */
-  COSTS_N_INSNS (1),			/* cost of movsx */
-  COSTS_N_INSNS (1),			/* cost of movzx */
-  8,					/* "large" insn */
-  17,					/* MOVE_RATIO */
-  6,					/* CLEAR_RATIO */
-  {4, 4, 4},				/* cost of loading integer registers
-					   in QImode, HImode and SImode.
-					   Relative to reg-reg move (2).  */
-  {6, 6, 6},				/* cost of storing integer registers */
-  {6, 6, 6, 6, 6},			/* cost of loading SSE register
-					   in 32bit, 64bit, 128bit, 256bit and 512bit */
-  {6, 6, 6, 6, 6},			/* cost of storing SSE register
-					   in 32bit, 64bit, 128bit, 256bit and 512bit */
-  {10, 10, 10, 10, 10},			/* cost of unaligned loads.  */
-  {10, 10, 10, 10, 10},			/* cost of unaligned loads.  */
-  2, 2, 2,				/* cost of moving XMM,YMM,ZMM register */
-  4,					/* cost of moving SSE register to integer.  */
-  6, 6,					/* Gather load static, per_elt.  */
-  6, 6,					/* Gather store static, per_elt.  */
-  32,					/* size of l1 cache.  */
-  256,					/* size of l2 cache.  */
-  64,					/* size of prefetch block */
-  6,					/* number of parallel prefetches */
-  3,					/* Branch cost */
-  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
-  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
-  COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
-  COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
-  COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
-  COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
-
-  COSTS_N_INSNS (1),			/* cost of cheap SSE instruction.  */
-  COSTS_N_INSNS (8),			/* cost of ADDSS/SD SUBSS/SD insns.  */
-  COSTS_N_INSNS (8),			/* cost of MULSS instruction.  */
-  COSTS_N_INSNS (8),			/* cost of MULSD instruction.  */
-  COSTS_N_INSNS (6),			/* cost of FMA SS instruction.  */
-  COSTS_N_INSNS (6),			/* cost of FMA SD instruction.  */
-  COSTS_N_INSNS (20),			/* cost of DIVSS instruction.  */
-  COSTS_N_INSNS (20),			/* cost of DIVSD instruction.  */
-  COSTS_N_INSNS (40),			/* cost of SQRTSS instruction.  */
-  COSTS_N_INSNS (40),			/* cost of SQRTSD instruction.  */
-  1, 4, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
-  intel_memcpy,
-  intel_memset,
-  COSTS_N_INSNS (3),			/* cond_taken_branch_cost.  */
-  COSTS_N_INSNS (1),			/* cond_not_taken_branch_cost.  */
-  "16",					/* Loop alignment.  */
-  "16:8:8",				/* Jump alignment.  */
-  "0:0:8",				/* Label alignment.  */
-  "16",					/* Func alignment.  */
-  4,					/* Small unroll limit.  */
-  2,					/* Small unroll factor.  */
+  COSTS_N_INSNS (2),			/* Branch mispredict scale.  */
 };
 
 /* lujiazui_cost should produce code tuned for ZHAOXIN lujiazui CPU.  */
@@ -3482,15 +3638,16 @@ struct processor_costs lujiazui_cost = {
   {6, 6, 6},				/* cost of loading integer registers
 					   in QImode, HImode and SImode.
 					   Relative to reg-reg move (2).  */
-  {6, 6, 6},			/* cost of storing integer registers.  */
+  {6, 6, 6},				/* cost of storing integer registers.  */
   {6, 6, 6, 10, 15},			/* cost of loading SSE register
-				in 32bit, 64bit, 128bit, 256bit and 512bit.  */
+					   in 32bit, 64bit, 128bit, 256bit and 512bit.  */
   {6, 6, 6, 10, 15},			/* cost of storing SSE register
-				in 32bit, 64bit, 128bit, 256bit and 512bit.  */
+					   in 32bit, 64bit, 128bit, 256bit and 512bit.  */
   {6, 6, 6, 10, 15},			/* cost of unaligned loads.  */
   {6, 6, 6, 10, 15},			/* cost of unaligned storess.  */
-  2, 3, 4,			/* cost of moving XMM,YMM,ZMM register.  */
-  6,				/* cost of moving SSE register to integer.  */
+  2, 3, 4,				/* cost of moving XMM,YMM,ZMM register.  */
+  6,					/* cost of moving SSE register to integer.  */
+  6,					/* cost of moving integer register to SSE.  */
   18, 6,				/* Gather load static, per_elt.  */
   18, 6,				/* Gather store static, per_elt.  */
   32,				  	/* size of l1 cache.  */
@@ -3516,6 +3673,13 @@ struct processor_costs lujiazui_cost = {
   COSTS_N_INSNS (17),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (32),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (60),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (6),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (12),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (3),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVT(T)PS2PI instruction.  */
   1, 4, 3, 3,				/* reassoc int, fp, vec_int, vec_fp.  */
   lujiazui_memcpy,
   lujiazui_memset,
@@ -3527,6 +3691,7 @@ struct processor_costs lujiazui_cost = {
   "16",					/* Func alignment.  */
   4,					/* Small unroll limit.  */
   2,					/* Small unroll factor.  */
+  COSTS_N_INSNS (2),			/* Branch mispredict scale.  */
 };
 
 /* yongfeng_cost should produce code tuned for ZHAOXIN yongfeng CPU.  */
@@ -3607,6 +3772,7 @@ struct processor_costs yongfeng_cost = {
   {8, 8, 8, 12, 15},			/* cost of unaligned storess.  */
   2, 3, 4,			/* cost of moving XMM,YMM,ZMM register.  */
   8,				/* cost of moving SSE register to integer.  */
+  8,					/* cost of moving integer register to SSE.  */
   18, 6,				/* Gather load static, per_elt.  */
   18, 6,				/* Gather store static, per_elt.  */
   32,				  	/* size of l1 cache.  */
@@ -3631,6 +3797,13 @@ struct processor_costs yongfeng_cost = {
   COSTS_N_INSNS (14),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (20),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (35),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (6),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (12),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (3),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVT(T)PS2PI instruction.  */
   4, 4, 4, 4,				/* reassoc int, fp, vec_int, vec_fp.  */
   yongfeng_memcpy,
   yongfeng_memset,
@@ -3642,6 +3815,7 @@ struct processor_costs yongfeng_cost = {
   "16",					/* Func alignment.  */
   4,					/* Small unroll limit.  */
   2,					/* Small unroll factor.  */
+  COSTS_N_INSNS (2),			/* Branch mispredict scale.  */
 };
 
 /* shijidadao_cost should produce code tuned for ZHAOXIN shijidadao CPU.  */
@@ -3722,6 +3896,7 @@ struct processor_costs shijidadao_cost = {
   {8, 8, 8, 12, 15},			/* cost of unaligned storess.  */
   2, 3, 4,			/* cost of moving XMM,YMM,ZMM register.  */
   8,				/* cost of moving SSE register to integer.  */
+  8,					/* cost of moving integer register to SSE.  */
   18, 6,				/* Gather load static, per_elt.  */
   18, 6,				/* Gather store static, per_elt.  */
   32,				  	/* size of l1 cache.  */
@@ -3746,6 +3921,13 @@ struct processor_costs shijidadao_cost = {
   COSTS_N_INSNS (14),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (11),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (18),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (6),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (12),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (3),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVT(T)PS2PI instruction.  */
   4, 4, 4, 4,				/* reassoc int, fp, vec_int, vec_fp.  */
   shijidadao_memcpy,
   shijidadao_memset,
@@ -3757,23 +3939,41 @@ struct processor_costs shijidadao_cost = {
   "16",				/* Func alignment.  */
   4,					/* Small unroll limit.  */
   2,					/* Small unroll factor.  */
+  COSTS_N_INSNS (2),			/* Branch mispredict scale.  */
 };
 
 
 
-/* Generic should produce code tuned for Core-i7 (and newer chips)
-   and btver1 (and newer chips).  */
+/* Generic should produce code tuned for Haswell (and newer chips)
+   and znver1 (and newer chips):
+   1. Don't align memory.
+   2. For known sizes, prefer vector loop, unroll loop with 4 moves or
+      stores per iteration without aligning the loop, up to 256 bytes.
+   3. For unknown sizes, use memcpy/memset.
+   4. Since each loop iteration has 4 stores and 8 stores for zeroing
+      with unroll loop may be needed, change CLEAR_RATIO to 10 so that
+      zeroing up to 72 bytes are fully unrolled with 9 stores without
+      SSE.
+ */
 
 static stringop_algs generic_memcpy[2] = {
-  {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
-             {-1, libcall, false}}},
-  {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
-             {-1, libcall, false}}}};
+  {libcall,
+   {{256, vector_loop, true},
+    {256, unrolled_loop, true},
+    {-1, libcall, true}}},
+  {libcall,
+   {{256, vector_loop, true},
+    {256, unrolled_loop, true},
+    {-1, libcall, true}}}};
 static stringop_algs generic_memset[2] = {
-  {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
-             {-1, libcall, false}}},
-  {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
-             {-1, libcall, false}}}};
+  {libcall,
+   {{256, vector_loop, true},
+    {256, unrolled_loop, true},
+    {-1, libcall, true}}},
+  {libcall,
+   {{256, vector_loop, true},
+    {256, unrolled_loop, true},
+    {-1, libcall, true}}}};
 static const
 struct processor_costs generic_cost = {
   {
@@ -3830,7 +4030,7 @@ struct processor_costs generic_cost = {
   COSTS_N_INSNS (1),			/* cost of movzx */
   8,					/* "large" insn */
   17,					/* MOVE_RATIO */
-  6,					/* CLEAR_RATIO */
+  10,					/* CLEAR_RATIO */
   {6, 6, 6},				/* cost of loading integer registers
 					   in QImode, HImode and SImode.
 					   Relative to reg-reg move (2).  */
@@ -3843,6 +4043,7 @@ struct processor_costs generic_cost = {
   {6, 6, 6, 10, 15},			/* cost of unaligned storess.  */
   2, 3, 4,				/* cost of moving XMM,YMM,ZMM register */
   6,					/* cost of moving SSE register to integer.  */
+  6,					/* cost of moving integer register to SSE.  */
   18, 6,				/* Gather load static, per_elt.  */
   18, 6,				/* Gather store static, per_elt.  */
   32,					/* size of l1 cache.  */
@@ -3869,6 +4070,13 @@ struct processor_costs generic_cost = {
   COSTS_N_INSNS (17),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (14),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (18),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (4),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (5),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (6),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (3),			/* cost of CVT(T)PS2PI instruction.  */
   1, 4, 3, 3,				/* reassoc int, fp, vec_int, vec_fp.  */
   generic_memcpy,
   generic_memset,
@@ -3880,6 +4088,7 @@ struct processor_costs generic_cost = {
   "16",					/* Func alignment.  */
   4,					/* Small unroll limit.  */
   2,					/* Small unroll factor.  */
+  COSTS_N_INSNS (2),			/* Branch mispredict scale.  */
 };
 
 /* core_cost should produce code tuned for Core familly of CPUs.  */
@@ -3968,6 +4177,7 @@ struct processor_costs core_cost = {
   {6, 6, 6, 6, 12},			/* cost of unaligned stores.  */
   2, 2, 4,				/* cost of moving XMM,YMM,ZMM register */
   2,					/* cost of moving SSE register to integer.  */
+  2,					/* cost of moving integer register to SSE.  */
   /* VGATHERDPD is 7 uops, rec throughput 5, while VGATHERDPD is 9 uops,
      rec. throughput 6.
      So 5 uops statically and one uops per load.  */
@@ -3997,6 +4207,13 @@ struct processor_costs core_cost = {
   COSTS_N_INSNS (32),			/* cost of DIVSD instruction.  */
   COSTS_N_INSNS (30),			/* cost of SQRTSS instruction.  */
   COSTS_N_INSNS (58),			/* cost of SQRTSD instruction.  */
+  COSTS_N_INSNS (2),			/* cost of CVTSS2SD etc.  */
+  COSTS_N_INSNS (2),			/* cost of 256bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (2),			/* cost of 512bit VCVTPS2PD etc.  */
+  COSTS_N_INSNS (6),			/* cost of CVTSI2SS instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVT(T)SS2SI instruction.  */
+  COSTS_N_INSNS (6),			/* cost of CVTPI2PS instruction.  */
+  COSTS_N_INSNS (7),			/* cost of CVT(T)PS2PI instruction.  */
   1, 4, 2, 2,				/* reassoc int, fp, vec_int, vec_fp.  */
   core_memcpy,
   core_memset,
@@ -4008,5 +4225,6 @@ struct processor_costs core_cost = {
   "16",					/* Func alignment.  */
   4,					/* Small unroll limit.  */
   2,					/* Small unroll factor.  */
+  COSTS_N_INSNS (2),			/* Branch mispredict scale.  */
 };
 
diff --git a/gcc/config/i386/x86-tune-sched-atom.cc b/gcc/config/i386/x86-tune-sched-atom.cc
index 22fae51..eae058c 100644
--- a/gcc/config/i386/x86-tune-sched-atom.cc
+++ b/gcc/config/i386/x86-tune-sched-atom.cc
@@ -1,5 +1,5 @@
 /* Scheduler hooks for IA-32 which implement atom+ specific logic.
-   Copyright (C) 1988-2024 Free Software Foundation, Inc.
+   Copyright (C) 1988-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/i386/x86-tune-sched-bd.cc b/gcc/config/i386/x86-tune-sched-bd.cc
index 36df5e5..eade50d 100644
--- a/gcc/config/i386/x86-tune-sched-bd.cc
+++ b/gcc/config/i386/x86-tune-sched-bd.cc
@@ -1,5 +1,5 @@
 /* Scheduler hooks for IA-32 which implement bdver1-4 specific logic.
-   Copyright (C) 1988-2024 Free Software Foundation, Inc.
+   Copyright (C) 1988-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/i386/x86-tune-sched-core.cc b/gcc/config/i386/x86-tune-sched-core.cc
index 6134063..5d41afa 100644
--- a/gcc/config/i386/x86-tune-sched-core.cc
+++ b/gcc/config/i386/x86-tune-sched-core.cc
@@ -1,5 +1,5 @@
-/* Scheduler hooks for IA-32 which implement bdver1-4 specific logic.
-   Copyright (C) 1988-2024 Free Software Foundation, Inc.
+/* Scheduler hooks for IA-32 which implement Core 2 specific logic.
+   Copyright (C) 1988-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/i386/x86-tune-sched.cc b/gcc/config/i386/x86-tune-sched.cc
index d77298b..ff9c268 100644
--- a/gcc/config/i386/x86-tune-sched.cc
+++ b/gcc/config/i386/x86-tune-sched.cc
@@ -1,5 +1,5 @@
 /* Scheduler hooks for IA-32 which implement CPU specific logic.
-   Copyright (C) 1988-2024 Free Software Foundation, Inc.
+   Copyright (C) 1988-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -32,6 +32,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "insn-attr.h"
 #include "insn-opinit.h"
 #include "recog.h"
+#include "tm-constrs.h"
 
 /* Return the maximum number of instructions a cpu can issue.  */
 
@@ -44,7 +45,6 @@ ix86_issue_rate (void)
     case PROCESSOR_LAKEMONT:
     case PROCESSOR_BONNELL:
     case PROCESSOR_SILVERMONT:
-    case PROCESSOR_INTEL:
     case PROCESSOR_K6:
     case PROCESSOR_BTVER2:
     case PROCESSOR_PENTIUM4:
@@ -67,7 +67,6 @@ ix86_issue_rate (void)
     case PROCESSOR_ZNVER2:
     case PROCESSOR_ZNVER3:
     case PROCESSOR_ZNVER4:
-    case PROCESSOR_ZNVER5:
     case PROCESSOR_CORE2:
     case PROCESSOR_NEHALEM:
     case PROCESSOR_SANDYBRIDGE:
@@ -80,7 +79,17 @@ ix86_issue_rate (void)
     case PROCESSOR_ALDERLAKE:
     case PROCESSOR_YONGFENG:
     case PROCESSOR_SHIJIDADAO:
+    case PROCESSOR_SIERRAFOREST:
+    case PROCESSOR_INTEL:
     case PROCESSOR_GENERIC:
+    /* For znver5 decoder can handle 4 or 8 instructions per cycle,
+       op cache 12 instruction/cycle, dispatch 8 instructions
+       integer rename 8 instructions and Fp 6 instructions.
+
+       The scheduler, without understanding out of order nature of the CPU
+       is not going to be able to use more than 4 instructions since that
+       is limits of the decoders.  */
+    case PROCESSOR_ZNVER5:
       return 4;
 
     case PROCESSOR_ICELAKE_CLIENT:
@@ -91,6 +100,14 @@ ix86_issue_rate (void)
       return 5;
 
     case PROCESSOR_SAPPHIRERAPIDS:
+    case PROCESSOR_GRANITERAPIDS:
+    case PROCESSOR_GRANITERAPIDS_D:
+    case PROCESSOR_DIAMONDRAPIDS:
+    case PROCESSOR_GRANDRIDGE:
+    case PROCESSOR_CLEARWATERFOREST:
+    case PROCESSOR_ARROWLAKE:
+    case PROCESSOR_ARROWLAKE_S:
+    case PROCESSOR_PANTHERLAKE:
       return 6;
 
     default:
@@ -434,6 +451,8 @@ ix86_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
 	  enum attr_unit unit = get_attr_unit (insn);
 	  int loadcost;
 
+	  /* TODO: On znver5 complex addressing modes have
+	     greater latency.  */
 	  if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
 	    loadcost = 4;
 	  else
@@ -478,6 +497,7 @@ ix86_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
     case PROCESSOR_HASWELL:
     case PROCESSOR_TREMONT:
     case PROCESSOR_ALDERLAKE:
+    case PROCESSOR_INTEL:
     case PROCESSOR_GENERIC:
       /* Stack engine allows to execute push&pop instructions in parall.  */
       if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
@@ -500,7 +520,6 @@ ix86_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
       break;
 
     case PROCESSOR_SILVERMONT:
-    case PROCESSOR_INTEL:
       if (!reload_completed)
 	return cost;
 
@@ -563,6 +582,73 @@ ix86_macro_fusion_p ()
   return TARGET_FUSE_CMP_AND_BRANCH;
 }
 
+/* Check whether MOV is a reg-reg move and ALU is an
+   ALU operation that allows macro-op fusion.  */
+
+static bool
+ix86_fuse_mov_alu_p (rtx_insn *mov, rtx_insn *alu)
+{
+  /* Validate mov:
+      - It should be reg-reg move with opcode 0x89 or 0x8B.  */
+  rtx set1 = PATTERN (mov);
+  if (GET_CODE (set1) != SET
+      || !GENERAL_REG_P (SET_SRC (set1))
+      || !GENERAL_REG_P (SET_DEST (set1)))
+    return false;
+  rtx reg = SET_DEST (set1);
+  /*  - it should have 0x89 or 0x8B opcode.  */
+  if (!INTEGRAL_MODE_P (GET_MODE (reg))
+      || GET_MODE_SIZE (GET_MODE (reg)) < 2
+      || GET_MODE_SIZE (GET_MODE (reg)) > 8)
+    return false;
+  /* Validate ALU.  */
+  if (GET_CODE (PATTERN (alu)) != PARALLEL)
+    return false;
+  rtx set2 = XVECEXP (PATTERN (alu), 0, 0);
+  if (GET_CODE (set2) != SET)
+    return false;
+  /* If this is instruction setting both compare and normal
+     register, the first set always sets flags, while
+     second set writes to the output operan.  Pick
+     the second set.  */
+  if (GET_CODE (SET_SRC (set2)) == COMPARE)
+    {
+      set2 = XVECEXP (PATTERN (alu), 0, 1);
+      if (GET_CODE (set2) != SET)
+	return false;
+    }
+  /* Match one of:
+     ADD ADC AND XOR OR SUB SBB INC DEC NOT SAL SHL SHR SAR
+     We also may add insn attribute to handle some of sporadic
+     case we output those with different RTX expressions.  */
+
+  if (GET_CODE (SET_SRC (set2)) != PLUS
+      && GET_CODE (SET_SRC (set2)) != MINUS
+      && GET_CODE (SET_SRC (set2)) != XOR
+      && GET_CODE (SET_SRC (set2)) != AND
+      && GET_CODE (SET_SRC (set2)) != IOR
+      && GET_CODE (SET_SRC (set2)) != NOT
+      && GET_CODE (SET_SRC (set2)) != ASHIFT
+      && GET_CODE (SET_SRC (set2)) != ASHIFTRT
+      && GET_CODE (SET_SRC (set2)) != LSHIFTRT)
+    return false;
+  rtx op0 = XEXP (SET_SRC (set2), 0);
+  rtx op1 = GET_CODE (SET_SRC (set2)) != NOT ? XEXP (SET_SRC (set2), 1) : NULL;
+  /* One of operands should be register.  */
+  if (op1 && (!REG_P (op0) || REGNO (op0) != REGNO (reg)))
+    std::swap (op0, op1);
+  if (!REG_P (op0) || REGNO (op0) != REGNO (reg))
+    return false;
+  if (op1
+      && !REG_P (op1)
+      && !x86_64_immediate_operand (op1, VOIDmode))
+    return false;
+  /* Only one of two parameters must be move destination.  */
+  if (op1 && REG_P (op1) && REGNO (op1) == REGNO (reg))
+    return false;
+  return true;
+}
+
 /* Check whether current microarchitecture support macro fusion
    for insn pair "CONDGEN + CONDJMP". Refer to
    "Intel Architectures Optimization Reference Manual". */
@@ -570,10 +656,14 @@ ix86_macro_fusion_p ()
 bool
 ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
 {
-  rtx src, dest;
+  if (TARGET_FUSE_MOV_AND_ALU
+      && ix86_fuse_mov_alu_p (condgen, condjmp))
+    return true;
+  rtx src, imm = NULL_RTX;
   enum rtx_code ccode;
   rtx compare_set = NULL_RTX, test_if, cond;
   rtx alu_set = NULL_RTX, addr = NULL_RTX;
+  rtx alu_clobber = NULL_RTX;
   enum attr_type condgen_type;
 
   if (!any_condjump_p (condjmp))
@@ -599,6 +689,9 @@ ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
       alu_set = XVECEXP (PATTERN (condgen), 0, 1);
       goto handle_stack_protect_test;
     }
+  /* ??? zen5 can fuse cmp, test, sub, add, inc, dec, or, and xor.
+     Cores can not fuse or and xor which will pass the test below
+     since type is ALU.  */
   else if (condgen_type != TYPE_TEST
 	   && condgen_type != TYPE_ICMP
 	   && condgen_type != TYPE_INCDEC
@@ -622,6 +715,11 @@ ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
 	    else
 	      alu_set = XVECEXP (pat, 0, i);
 	  }
+	/* We also possibly generated ALU instruction only to set
+	   flags.  In this case there will be clobber.  */
+	else if (GET_CODE (XVECEXP (pat, 0, i)) == CLOBBER
+	    && GENERAL_REG_P (XEXP (XVECEXP (pat, 0, i), 0)))
+	  alu_clobber = XVECEXP (pat, 0, i);
     }
   if (compare_set == NULL_RTX)
     return false;
@@ -629,19 +727,30 @@ ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
   if (GET_CODE (src) != COMPARE)
     return false;
 
-  /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
-     supported.  */
-  if ((MEM_P (XEXP (src, 0)) && CONST_INT_P (XEXP (src, 1)))
-      || (MEM_P (XEXP (src, 1)) && CONST_INT_P (XEXP (src, 0))))
-    return false;
-
-  /* No fusion for RIP-relative address.  */
+  /* Check for memory operand.  */
   if (MEM_P (XEXP (src, 0)))
     addr = XEXP (XEXP (src, 0), 0);
   else if (MEM_P (XEXP (src, 1)))
     addr = XEXP (XEXP (src, 1), 0);
+  /* Some CPUs, i.e. tigerlake and cooperlake does not fuse
+     ALU with memory operand.  */
+  if (addr && !TARGET_FUSE_ALU_AND_BRANCH_MEM)
+    return false;
+  if (CONST_INT_P (XEXP (src, 0)))
+    imm = XEXP (src, 0);
+  else if (CONST_INT_P (XEXP (src, 1)))
+    imm = XEXP (src, 1);
+  /* Check that the instruction really has immediate.
+     In particular compare with 0 is done using test with no immediate.  */
+  if (imm && !get_attr_length_immediate (condgen))
+    imm = NULL;
+  /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
+     supported.   */
+  if (addr && imm && !TARGET_FUSE_ALU_AND_BRANCH_MEM_IMM)
+    return false;
 
-  if (addr)
+  /* No fusion for RIP-relative address.   */
+  if (addr && !TARGET_FUSE_ALU_AND_BRANCH_RIP_RELATIVE)
     {
       ix86_address parts;
       int ok = ix86_decompose_address (addr, &parts);
@@ -650,6 +759,12 @@ ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
       if (ix86_rip_relative_addr_p (&parts))
 	return false;
     }
+  /* Znver5 supports fussion fusion with their reg/reg, reg/imm and
+     reg/mem forms. They are also supported when the instruction has an
+     immediate and displacement that meets the criteria of 4 byte displacement
+     and 2 byte immediate or the case of 2 byte displacement and 4 byte
+     immediate.  We do not know the displacement size, so we ignore this
+     limitation.  */
 
  handle_stack_protect_test:
   test_if = SET_SRC (pc_set (condjmp));
@@ -665,20 +780,19 @@ ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
     return true;
 
   /* The following is the case that macro-fusion for alu + jmp.  */
-  if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
+  if (!TARGET_FUSE_ALU_AND_BRANCH || (!alu_set && !alu_clobber))
     return false;
 
   /* No fusion for alu op with memory destination operand.  */
-  dest = SET_DEST (alu_set);
-  if (MEM_P (dest))
+  if (alu_set && MEM_P (SET_DEST (alu_set)))
     return false;
 
+
   /* Macro-fusion for inc/dec + unsigned conditional jump is not
-     supported.  */
-  if (condgen_type == TYPE_INCDEC
-      && (ccode == GEU || ccode == GTU || ccode == LEU || ccode == LTU))
-    return false;
+     supported on some CPUs while supported on others (znver5 and core_avx512).
+     We however never generate it, so we do not need a specific tune for it.  */
+  gcc_checking_assert (!(condgen_type == TYPE_INCDEC
+		       && (ccode == GEU || ccode == GTU || ccode == LEU || ccode == LTU)));
 
   return true;
 }
-
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index 3d29bff..a86cbad 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -1,5 +1,5 @@
 /* Definitions of x86 tunable features.
-   Copyright (C) 2013-2024 Free Software Foundation, Inc.
+   Copyright (C) 2013-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -31,7 +31,7 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 	- Updating ix86_issue_rate and ix86_adjust_cost in i386.md
 	- possibly updating ia32_multipass_dfa_lookahead, ix86_sched_reorder
 	  and ix86_sched_init_global if those tricks are needed.
-    - Tunning the flags bellow. Those are split into sections and each
+    - tuning flags below; those are split into sections and each
       section is very roughly ordered by importance.  */
 
 /*****************************************************************************/
@@ -87,8 +87,7 @@ DEF_TUNE (X86_TUNE_SSE_PARTIAL_REG_CONVERTS_DEPENDENCY,
    several insns to break false dependency on the dest register for GLC
    micro-architecture.  */
 DEF_TUNE (X86_TUNE_DEST_FALSE_DEP_FOR_GLC,
-	  "dest_false_dep_for_glc", m_SAPPHIRERAPIDS | m_CORE_HYBRID
-	  | m_CORE_ATOM)
+	  "dest_false_dep_for_glc", m_SAPPHIRERAPIDS | m_ALDERLAKE)
 
 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
    are resolved on SSE register parts instead of whole registers, so we may
@@ -143,10 +142,34 @@ DEF_TUNE (X86_TUNE_FUSE_CMP_AND_BRANCH_SOFLAGS, "fuse_cmp_and_branch_soflags",
 
 /* X86_TUNE_FUSE_ALU_AND_BRANCH: Fuse alu with a subsequent conditional
    jump instruction when the alu instruction produces the CCFLAG consumed by
-   the conditional jump instruction. */
-DEF_TUNE (X86_TUNE_FUSE_ALU_AND_BRANCH, "fuse_alu_and_branch",
-		  m_SANDYBRIDGE | m_CORE_AVX2 | m_ZHAOXIN | m_GENERIC)
+   the conditional jump instruction.
 
+   TODO: znver5 supports fusing with SUB, ADD, INC, DEC, OR, AND,
+   There is also limitation for immediate and displacement supported.  */
+DEF_TUNE (X86_TUNE_FUSE_ALU_AND_BRANCH, "fuse_alu_and_branch",
+	  m_SANDYBRIDGE | m_CORE_AVX2 | m_ZHAOXIN | m_GENERIC | m_ZNVER3 | m_ZNVER4 | m_ZNVER5)
+
+/* X86_TUNE_FUSE_MOV_AND_ALU: mov and alu in case mov is reg-reg mov
+   and the destination is used by alu.  alu must be one of
+   ADD, ADC, AND, XOR, OR, SUB, SBB, INC, DEC, NOT, SAL, SHL, SHR, SAR.  */
+DEF_TUNE (X86_TUNE_FUSE_MOV_AND_ALU, "fuse_mov_and_alu",
+	 m_ZNVER5 | m_GRANITERAPIDS | m_GRANITERAPIDS_D)
+
+/* X86_TUNE_FUSE_AND_BRANCH_MEM: Fuse alu with a subsequent conditional
+   jump instruction when alu contains memory operand.
+   TODO: Not suported by TIGERLAKE and COPERLAKE, so m_CORE_AVX2 is wrong.  */
+DEF_TUNE (X86_TUNE_FUSE_ALU_AND_BRANCH_MEM, "fuse_alu_and_branch_mem",
+	  m_SANDYBRIDGE | m_CORE_AVX2 | m_ZHAOXIN | m_GENERIC | m_ZNVER3 | m_ZNVER4 | m_ZNVER5)
+
+/* X86_TUNE_FUSE_AND_BRANCH_MEM_IMM: Fuse alu with a subsequent conditional
+   jump instruction when alu contains both immediate and displacement.  */
+DEF_TUNE (X86_TUNE_FUSE_ALU_AND_BRANCH_MEM_IMM, "fuse_alu_and_branch_mem_imm",
+	  m_GENERIC | m_ZNVER4 | m_ZNVER5)
+
+/* X86_TUNE_FUSE_AND_BRANCH_RIP_RELATIVE: Fuse alu with a subsequent
+   conditional jump instruction when alu contains IP relative address.  */
+DEF_TUNE (X86_TUNE_FUSE_ALU_AND_BRANCH_RIP_RELATIVE,
+	  "fuse_alu_and_branch_rip_relative", 0)
 
 /*****************************************************************************/
 /* Function prologue, epilogue and function calling sequences.               */
@@ -206,7 +229,7 @@ DEF_TUNE (X86_TUNE_SINGLE_POP, "single_pop", m_386 | m_486 | m_PENT
 DEF_TUNE (X86_TUNE_DOUBLE_POP, "double_pop", m_PENT | m_LAKEMONT)
 
 /*****************************************************************************/
-/* Branch predictor tuning  		                                     */
+/* Branch predictor tuning and Front-end tuning  		                                     */
 /*****************************************************************************/
 
 /* X86_TUNE_PAD_SHORT_FUNCTION: Make every function to be at least 4
@@ -227,6 +250,10 @@ DEF_TUNE (X86_TUNE_FOUR_JUMP_LIMIT, "four_jump_limit",
 	  m_PPRO | m_P4_NOCONA | m_BONNELL | m_SILVERMONT | m_GOLDMONT
 	  | m_GOLDMONT_PLUS | m_INTEL | m_ATHLON_K8 | m_AMDFAM10)
 
+/* X86_TUNE_ALIGN_TIGHT_LOOPS: if false, tight loops are not aligned. */
+DEF_TUNE (X86_TUNE_ALIGN_TIGHT_LOOPS, "align_tight_loops",
+	 ~(m_ZHAOXIN | m_CASCADELAKE | m_SKYLAKE_AVX512))
+
 /*****************************************************************************/
 /* Integer instruction selection tuning                                      */
 /*****************************************************************************/
@@ -337,6 +364,16 @@ DEF_TUNE (X86_TUNE_AVOID_FALSE_DEP_FOR_BMI, "avoid_false_dep_for_bmi",
 	  | m_CANNONLAKE | m_CASCADELAKE | m_COOPERLAKE
 	  | m_ZHAOXIN | m_GENERIC)
 
+/* X86_TUNE_AVOID_FALSE_DEP_FOR_TZCNT: Avoid false dependency
+   for tzcnt instruction (also included in X86_TUNE_AVOID_FALSE_DEP_FOR_BMI).  */
+DEF_TUNE (X86_TUNE_AVOID_FALSE_DEP_FOR_TZCNT, "avoid_false_dep_for_tzcnt",
+	  m_ZNVER5)
+
+/* X86_TUNE_AVOID_FALSE_DEP_FOR_BLS: Avoid false dependency
+   for blsi, blsr and blsmsk instructions.  */
+DEF_TUNE (X86_TUNE_AVOID_FALSE_DEP_FOR_BLS, "avoid_false_dep_for_bls",
+	  m_ZNVER5)
+
 /* X86_TUNE_ADJUST_UNROLL: This enables adjusting the unroll factor based
    on hardware capabilities. Bdver3 hardware has a loop buffer which makes
    unrolling small loop less important. For, such architectures we adjust
@@ -476,55 +513,70 @@ DEF_TUNE (X86_TUNE_AVOID_4BYTE_PREFIXES, "avoid_4byte_prefixes",
 /* X86_TUNE_USE_GATHER_2PARTS: Use gather instructions for vectors with 2
    elements.  */
 DEF_TUNE (X86_TUNE_USE_GATHER_2PARTS, "use_gather_2parts",
-	  ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_CORE_HYBRID
+	  ~(m_ZNVER | m_CORE_HYBRID
 	    | m_YONGFENG | m_SHIJIDADAO | m_CORE_ATOM | m_GENERIC | m_GDS))
 
 /* X86_TUNE_USE_SCATTER_2PARTS: Use scater instructions for vectors with 2
    elements.  */
 DEF_TUNE (X86_TUNE_USE_SCATTER_2PARTS, "use_scatter_2parts",
-	  ~(m_ZNVER4))
+	  ~(m_ZNVER4 | m_ZNVER5))
 
 /* X86_TUNE_USE_GATHER_4PARTS: Use gather instructions for vectors with 4
    elements.  */
 DEF_TUNE (X86_TUNE_USE_GATHER_4PARTS, "use_gather_4parts",
-	  ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_CORE_HYBRID
+	  ~(m_ZNVER | m_CORE_HYBRID
 	    | m_YONGFENG | m_SHIJIDADAO | m_CORE_ATOM | m_GENERIC | m_GDS))
 
 /* X86_TUNE_USE_SCATTER_4PARTS: Use scater instructions for vectors with 4
    elements.  */
 DEF_TUNE (X86_TUNE_USE_SCATTER_4PARTS, "use_scatter_4parts",
-	  ~(m_ZNVER4))
+	  ~(m_ZNVER4 | m_ZNVER5))
 
 /* X86_TUNE_USE_GATHER: Use gather instructions for vectors with 8 or more
    elements.  */
 DEF_TUNE (X86_TUNE_USE_GATHER_8PARTS, "use_gather_8parts",
-	  ~(m_ZNVER1 | m_ZNVER2 | m_ZNVER4 | m_CORE_HYBRID | m_CORE_ATOM
+	  ~(m_ZNVER | m_CORE_HYBRID | m_CORE_ATOM
 	    | m_YONGFENG | m_SHIJIDADAO | m_GENERIC | m_GDS))
 
 /* X86_TUNE_USE_SCATTER: Use scater instructions for vectors with 8 or more
    elements.  */
 DEF_TUNE (X86_TUNE_USE_SCATTER_8PARTS, "use_scatter_8parts",
-	  ~(m_ZNVER4))
+	  ~(m_ZNVER4 | m_ZNVER5))
 
 /* X86_TUNE_AVOID_128FMA_CHAINS: Avoid creating loops with tight 128bit or
    smaller FMA chain.  */
-DEF_TUNE (X86_TUNE_AVOID_128FMA_CHAINS, "avoid_fma_chains", m_ZNVER1 | m_ZNVER2 | m_ZNVER3 | m_ZNVER4
+DEF_TUNE (X86_TUNE_AVOID_128FMA_CHAINS, "avoid_fma_chains", m_ZNVER
           | m_YONGFENG | m_SHIJIDADAO | m_GENERIC)
 
 /* X86_TUNE_AVOID_256FMA_CHAINS: Avoid creating loops with tight 256bit or
    smaller FMA chain.  */
-DEF_TUNE (X86_TUNE_AVOID_256FMA_CHAINS, "avoid_fma256_chains", m_ZNVER2 | m_ZNVER3 | m_ZNVER4
-	  | m_CORE_HYBRID | m_SAPPHIRERAPIDS | m_CORE_ATOM | m_GENERIC)
+DEF_TUNE (X86_TUNE_AVOID_256FMA_CHAINS, "avoid_fma256_chains",
+	  m_ZNVER2 | m_ZNVER3 | m_ZNVER4 | m_ZNVER5 | m_CORE_HYBRID
+	  | m_SAPPHIRERAPIDS | m_GRANITERAPIDS | m_GRANITERAPIDS_D
+	  | m_DIAMONDRAPIDS | m_CORE_ATOM | m_GENERIC)
 
 /* X86_TUNE_AVOID_512FMA_CHAINS: Avoid creating loops with tight 512bit or
    smaller FMA chain.  */
-DEF_TUNE (X86_TUNE_AVOID_512FMA_CHAINS, "avoid_fma512_chains", m_NONE)
+DEF_TUNE (X86_TUNE_AVOID_512FMA_CHAINS, "avoid_fma512_chains", m_ZNVER5)
 
 /* X86_TUNE_V2DF_REDUCTION_PREFER_PHADDPD: Prefer haddpd
    for v2df vector reduction.  */
 DEF_TUNE (X86_TUNE_V2DF_REDUCTION_PREFER_HADDPD,
 	  "v2df_reduction_prefer_haddpd", m_NONE)
 
+/* X86_TUNE_SSE_MOVCC_USE_BLENDV: Prefer blendv instructions to
+   3-instruction sequence (op1 & mask) | (op2 & ~mask)
+   for vector condition move.
+   For Crestmont, 4-operand vex blendv instructions come from MSROM
+   which is slow.  */
+DEF_TUNE (X86_TUNE_SSE_MOVCC_USE_BLENDV,
+	  "sse_movcc_use_blendv", ~m_CORE_ATOM)
+
+/* X86_TUNE_V4SI_REDUCTION_PREFER_SHUFD: Prefer pshuf to reduce V16QI,
+   V8HI, V8HI, V4SI, V4FI, V2DI modes when lshr are costlier. */
+DEF_TUNE (X86_TUNE_SSE_REDUCTION_PREFER_PSHUF,
+   "sse_reduction_prefer_pshuf", m_ZNVER4 | m_ZNVER5)
+
 /*****************************************************************************/
 /* AVX instruction selection tuning (some of SSE flags affects AVX, too)     */
 /*****************************************************************************/
@@ -541,7 +593,7 @@ DEF_TUNE (X86_TUNE_AVX256_UNALIGNED_STORE_OPTIMAL, "256_unaligned_store_optimal"
 
 /* X86_TUNE_AVX256_SPLIT_REGS: if true, AVX256 ops are split into two AVX128 ops.  */
 DEF_TUNE (X86_TUNE_AVX256_SPLIT_REGS, "avx256_split_regs",m_BDVER | m_BTVER2
-	  | m_ZNVER1)
+	  | m_ZNVER1 | m_CORE_ATOM)
 
 /* X86_TUNE_AVX128_OPTIMAL: Enable 128-bit AVX instruction generation for
    the auto-vectorizer.  */
@@ -552,6 +604,11 @@ DEF_TUNE (X86_TUNE_AVX128_OPTIMAL, "avx128_optimal", m_BDVER | m_BTVER2
    instructions in the auto-vectorizer.  */
 DEF_TUNE (X86_TUNE_AVX256_OPTIMAL, "avx256_optimal", m_CORE_AVX512)
 
+/* X86_TUNE_AVX256_AVOID_VEC_PERM: Avoid using 256-bit cross-lane
+   vector permutation instructions in the auto-vectorizer.  */
+DEF_TUNE (X86_TUNE_AVX256_AVOID_VEC_PERM,
+	 "avx256_avoid_vec_perm", m_CORE_ATOM)
+
 /* X86_TUNE_AVX256_SPLIT_REGS: if true, AVX512 ops are split into two AVX256 ops.  */
 DEF_TUNE (X86_TUNE_AVX512_SPLIT_REGS, "avx512_split_regs", m_ZNVER4)
 
@@ -568,12 +625,24 @@ DEF_TUNE (X86_TUNE_AVX256_STORE_BY_PIECES, "avx256_store_by_pieces",
 /* X86_TUNE_AVX512_MOVE_BY_PIECES: Optimize move_by_pieces with 512-bit
    AVX instructions.  */
 DEF_TUNE (X86_TUNE_AVX512_MOVE_BY_PIECES, "avx512_move_by_pieces",
-	  m_SAPPHIRERAPIDS | m_ZNVER4 | m_ZNVER5)
+	  m_SAPPHIRERAPIDS | m_GRANITERAPIDS | m_GRANITERAPIDS_D
+	  | m_DIAMONDRAPIDS | m_ZNVER4 | m_ZNVER5)
 
 /* X86_TUNE_AVX512_STORE_BY_PIECES: Optimize store_by_pieces with 512-bit
    AVX instructions.  */
 DEF_TUNE (X86_TUNE_AVX512_STORE_BY_PIECES, "avx512_store_by_pieces",
-	  m_SAPPHIRERAPIDS | m_ZNVER4 | m_ZNVER5)
+	  m_SAPPHIRERAPIDS | m_GRANITERAPIDS | m_GRANITERAPIDS_D
+	  | m_DIAMONDRAPIDS | m_ZNVER4 | m_ZNVER5)
+
+/* X86_TUNE_AVX512_TWO_EPILOGUES: Use two vector epilogues for 512-bit
+   vectorized loops.  */
+DEF_TUNE (X86_TUNE_AVX512_TWO_EPILOGUES, "avx512_two_epilogues",
+	  m_ZNVER4 | m_ZNVER5)
+
+/* X86_TUNE_AVX512_MAKED_EPILOGUES: Use two masked vector epilogues
+   when fit.  */
+DEF_TUNE (X86_TUNE_AVX512_MASKED_EPILOGUES, "avx512_masked_epilogues",
+	  m_ZNVER4 | m_ZNVER5)
 
 /*****************************************************************************/
 /*****************************************************************************/
diff --git a/gcc/config/i386/x86gprintrin.h b/gcc/config/i386/x86gprintrin.h
index 015ee6d..9cf5bd1 100644
--- a/gcc/config/i386/x86gprintrin.h
+++ b/gcc/config/i386/x86gprintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2020-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2020-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/x86intrin.h b/gcc/config/i386/x86intrin.h
index f41ce43..9e51b0a 100644
--- a/gcc/config/i386/x86intrin.h
+++ b/gcc/config/i386/x86intrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2008-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2008-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/xm-cygwin.h b/gcc/config/i386/xm-cygwin.h
index 13a68db..f7ae94f 100644
--- a/gcc/config/i386/xm-cygwin.h
+++ b/gcc/config/i386/xm-cygwin.h
@@ -1,6 +1,6 @@
 /* Configuration for GCC for hosting on Windows NT.
    using a unix style C library.
-   Copyright (C) 1995-2024 Free Software Foundation, Inc.
+   Copyright (C) 1995-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/i386/xm-djgpp.h b/gcc/config/i386/xm-djgpp.h
index ee9e400..8d15f53 100644
--- a/gcc/config/i386/xm-djgpp.h
+++ b/gcc/config/i386/xm-djgpp.h
@@ -1,5 +1,5 @@
 /* Configuration for GCC for Intel 80386 running DJGPP.
-   Copyright (C) 1988-2024 Free Software Foundation, Inc.
+   Copyright (C) 1988-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/i386/xm-mingw32.h b/gcc/config/i386/xm-mingw32.h
index 9159e8f..7b795e5 100644
--- a/gcc/config/i386/xm-mingw32.h
+++ b/gcc/config/i386/xm-mingw32.h
@@ -1,6 +1,6 @@
 /* Configuration for GCC for hosting on Windows32.
    using GNU tools and the Windows32 API Library.
-   Copyright (C) 1997-2024 Free Software Foundation, Inc.
+   Copyright (C) 1997-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h
index 7f10f96..16f407d 100644
--- a/gcc/config/i386/xmmintrin.h
+++ b/gcc/config/i386/xmmintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2002-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -38,6 +38,7 @@ enum _mm_hint
 {
   _MM_HINT_IT0 = 19,
   _MM_HINT_IT1 = 18,
+  _MM_HINT_RST2 = 9,
   /* _MM_HINT_ET is _MM_HINT_T with set 3rd bit.  */
   _MM_HINT_ET0 = 7,
   _MM_HINT_T0 = 3,
@@ -52,12 +53,12 @@ enum _mm_hint
 extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 _mm_prefetch (const void *__P, enum _mm_hint __I)
 {
-  __builtin_ia32_prefetch (__P, (__I & 0x4) >> 2,
+  __builtin_ia32_prefetch (__P, (__I & 0xC) >> 2,
 			   __I & 0x3, (__I & 0x10) >> 4);
 }
 #else
 #define _mm_prefetch(P, I) \
-  __builtin_ia32_prefetch ((P), ((I) & 0x4) >> 2, ((I) & 0x3), ((I) & 0x10) >> 4)
+  __builtin_ia32_prefetch ((P), ((I) & 0xC) >> 2, ((I) & 0x3), ((I) & 0x10) >> 4)
 #endif
 
 #ifndef __SSE__
diff --git a/gcc/config/i386/xopintrin.h b/gcc/config/i386/xopintrin.h
index fd2892b..8265439 100644
--- a/gcc/config/i386/xopintrin.h
+++ b/gcc/config/i386/xopintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2007-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2007-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/xsavecintrin.h b/gcc/config/i386/xsavecintrin.h
index 39f5c6b..541bba3 100644
--- a/gcc/config/i386/xsavecintrin.h
+++ b/gcc/config/i386/xsavecintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2014-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2014-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/xsaveintrin.h b/gcc/config/i386/xsaveintrin.h
index 88ecbef..5d6ef73 100644
--- a/gcc/config/i386/xsaveintrin.h
+++ b/gcc/config/i386/xsaveintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2012-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2012-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/xsaveoptintrin.h b/gcc/config/i386/xsaveoptintrin.h
index 657da5d..81f8bab 100644
--- a/gcc/config/i386/xsaveoptintrin.h
+++ b/gcc/config/i386/xsaveoptintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2012-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2012-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/xsavesintrin.h b/gcc/config/i386/xsavesintrin.h
index 88cdebb..8401afd 100644
--- a/gcc/config/i386/xsavesintrin.h
+++ b/gcc/config/i386/xsavesintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2014-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2014-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/xtestintrin.h b/gcc/config/i386/xtestintrin.h
index af4cb8b..e5b29f8 100644
--- a/gcc/config/i386/xtestintrin.h
+++ b/gcc/config/i386/xtestintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2012-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2012-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/i386/yongfeng.md b/gcc/config/i386/yongfeng.md
index 87035d6..3ac4049 100644
--- a/gcc/config/i386/yongfeng.md
+++ b/gcc/config/i386/yongfeng.md
@@ -1,4 +1,4 @@
-;; Copyright (C) 2012-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2012-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/i386/zn4zn5.md b/gcc/config/i386/zn4zn5.md
index ba9cfbb..ecb1e3b 100644
--- a/gcc/config/i386/zn4zn5.md
+++ b/gcc/config/i386/zn4zn5.md
@@ -1,4 +1,4 @@
-;; Copyright (C) 2012-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2012-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
@@ -142,6 +142,20 @@
 				   (eq_attr "memory" "load"))))
 			 "znver4-double,znver5-load,znver5-ieu")
 
+(define_insn_reservation "znver4_imov_double_store" 5
+			(and (eq_attr "cpu" "znver4")
+				 (and (eq_attr "znver1_decode" "double")
+				  (and (eq_attr "type" "imov")
+				   (eq_attr "memory" "store"))))
+			 "znver4-double,znver4-store,znver4-ieu")
+
+(define_insn_reservation "znver5_imov_double_store" 5
+			(and (eq_attr "cpu" "znver5")
+				 (and (eq_attr "znver1_decode" "double")
+				  (and (eq_attr "type" "imov")
+				   (eq_attr "memory" "store"))))
+			 "znver4-double,znver5-store,znver5-ieu")
+
 ;; imov, imovx
 (define_insn_reservation "znver4_imov" 1
             (and (eq_attr "cpu" "znver4")
@@ -167,6 +181,18 @@
 				  (eq_attr "memory" "load")))
 			 "znver4-direct,znver5-load,znver5-ieu")
 
+(define_insn_reservation "znver4_imov_store" 5
+			(and (eq_attr "cpu" "znver4")
+				 (and (eq_attr "type" "imov,imovx")
+				  (eq_attr "memory" "store")))
+			 "znver4-direct,znver4-store,znver4-ieu")
+
+(define_insn_reservation "znver5_imov_store" 5
+			(and (eq_attr "cpu" "znver5")
+				 (and (eq_attr "type" "imov,imovx")
+				  (eq_attr "memory" "store")))
+			 "znver4-direct,znver5-store,znver5-ieu")
+
 ;; Push Instruction
 (define_insn_reservation "znver4_push" 1
 			(and (eq_attr "cpu" "znver4")
@@ -395,6 +421,18 @@
 				   (eq_attr "memory" "store")))
 			 "znver4-direct,znver4-ieu,znver5-store")
 
+(define_insn_reservation "znver4_insn_both" 5
+			 (and (eq_attr "cpu" "znver4")
+			      (and (eq_attr "type" "alu,alu1,negnot,rotate1,ishift1,test,incdec,icmp")
+				   (eq_attr "memory" "both")))
+			 "znver4-direct,znver4-load,znver4-ieu,znver4-store")
+
+(define_insn_reservation "znver5_insn_both" 5
+			 (and (eq_attr "cpu" "znver5")
+			      (and (eq_attr "type" "alu,alu1,negnot,rotate1,ishift1,test,incdec,icmp")
+				   (eq_attr "memory" "both")))
+			 "znver4-direct,znver5-load,znver4-ieu,znver5-store")
+
 (define_insn_reservation "znver4_insn2_store" 1
 			 (and (eq_attr "cpu" "znver4")
 			      (and (eq_attr "type" "icmov,setcc")
@@ -855,13 +893,20 @@
 			 "znver4-direct,znver5-load,znver4-fpu")
 
 (define_insn_reservation "znver4_sse_log1" 1
+			 (and (eq_attr "cpu" "znver4,znver5")
+			      (and (eq_attr "type" "sselog1")
+				   (and (eq_attr "mode" "V4SF,V8SF,V2DF,V4DF,QI,HI,SI,DI,TI,OI")
+				    (eq_attr "memory" "none"))))
+			 "znver4-direct,znver4-fpu1|znver4-fpu2")
+
+(define_insn_reservation "znver4_sse_log1_store" 1
 			 (and (eq_attr "cpu" "znver4")
 			      (and (eq_attr "type" "sselog1")
 				   (and (eq_attr "mode" "V4SF,V8SF,V2DF,V4DF,QI,HI,SI,DI,TI,OI")
 				    (eq_attr "memory" "store"))))
 			 "znver4-direct,znver4-fpu1|znver4-fpu2,znver4-fp-store")
 
-(define_insn_reservation "znver5_sse_log1" 1
+(define_insn_reservation "znver5_sse_log1_store" 1
 			 (and (eq_attr "cpu" "znver5")
 			      (and (eq_attr "type" "sselog1")
 				   (and (eq_attr "mode" "V4SF,V8SF,V2DF,V4DF,QI,HI,SI,DI,TI,OI")
@@ -908,9 +953,8 @@
 
 (define_insn_reservation "znver4_sse_test" 1
 			 (and (eq_attr "cpu" "znver4,znver5")
-			      (and (eq_attr "prefix_extra" "1")
-				   (and (eq_attr "type" "ssecomi")
-					(eq_attr "memory" "none"))))
+				  (and (eq_attr "type" "ssecomi")
+					(eq_attr "memory" "none")))
 			 "znver4-direct,znver4-fpu1|znver4-fpu2")
 
 (define_insn_reservation "znver4_sse_test_load" 6
@@ -986,35 +1030,35 @@
 (define_insn_reservation "znver4_sse_mov_fp" 1
 			 (and (eq_attr "cpu" "znver4,znver5")
 			      (and (eq_attr "type" "ssemov")
-				   (and (eq_attr "mode" "V16SF,V8DF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF")
+				   (and (eq_attr "mode" "V16SF,V8DF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,DF,SF")
 				    (eq_attr "memory" "none"))))
 			 "znver4-direct,znver4-fpu")
 
 (define_insn_reservation "znver4_sse_mov_fp_load" 6
 			 (and (eq_attr "cpu" "znver4")
-			      (and (eq_attr "type" "ssemov")
-				   (and (eq_attr "mode" "V16SF,V8DF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF")
+			      (and (eq_attr "type" "ssemov,ssemov2")
+				   (and (eq_attr "mode" "V16SF,V8DF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,DF,SF")
 				    (eq_attr "memory" "load"))))
 			 "znver4-direct,znver4-load,znver4-fpu")
 
 (define_insn_reservation "znver5_sse_mov_fp_load" 6
 			 (and (eq_attr "cpu" "znver5")
-			      (and (eq_attr "type" "ssemov")
-				   (and (eq_attr "mode" "V16SF,V8DF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF")
+			      (and (eq_attr "type" "ssemov,ssemov2")
+				   (and (eq_attr "mode" "V16SF,V8DF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,DF,SF")
 				    (eq_attr "memory" "load"))))
 			 "znver4-direct,znver5-load,znver4-fpu")
 
 (define_insn_reservation "znver4_sse_mov_fp_store" 1
 			 (and (eq_attr "cpu" "znver4")
 			      (and (eq_attr "type" "ssemov")
-				   (and (eq_attr "mode" "V16SF,V8DF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF")
+				   (and (eq_attr "mode" "V16SF,V8DF,V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,DF,SF")
 				    (eq_attr "memory" "store"))))
 			 "znver4-direct,znver4-fp-store")
 
 (define_insn_reservation "znver5_sse_mov_fp_store" 1
 			 (and (eq_attr "cpu" "znver5")
 			      (and (eq_attr "type" "ssemov")
-				   (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF")
+				   (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,DF,SF")
 				    (eq_attr "memory" "store"))))
 			 "znver4-direct,znver5-fp-store256")
 
@@ -1028,42 +1072,42 @@
 (define_insn_reservation "znver4_sse_add" 3
 			 (and (eq_attr "cpu" "znver4,znver5")
 			      (and (eq_attr "type" "sseadd")
-				   (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF")
+				   (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,DF,SF")
 				    (eq_attr "memory" "none"))))
 			 "znver4-direct,znver4-fpu2|znver4-fpu3")
 
 (define_insn_reservation "znver4_sse_add_load" 8
 			 (and (eq_attr "cpu" "znver4")
 			      (and (eq_attr "type" "sseadd")
-				   (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF")
+				   (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,DF,SF")
 				    (eq_attr "memory" "load"))))
 			 "znver4-direct,znver4-load,znver4-fpu2|znver4-fpu3")
 
 (define_insn_reservation "znver5_sse_add_load" 8
 			 (and (eq_attr "cpu" "znver5")
 			      (and (eq_attr "type" "sseadd")
-				   (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF")
+				   (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,DF,SF")
 				    (eq_attr "memory" "load"))))
 			 "znver4-direct,znver5-load,znver4-fpu2|znver4-fpu3")
 
 (define_insn_reservation "znver4_sse_add1" 4
 			 (and (eq_attr "cpu" "znver4,znver5")
 			      (and (eq_attr "type" "sseadd1")
-				   (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF")
+				   (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,DF,SF")
 				    (eq_attr "memory" "none"))))
 			 "znver4-vector,znver4-fvector*2")
 
 (define_insn_reservation "znver4_sse_add1_load" 9
 			 (and (eq_attr "cpu" "znver4")
 			      (and (eq_attr "type" "sseadd1")
-				   (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF")
+				   (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,DF,SF")
 				    (eq_attr "memory" "load"))))
 			 "znver4-vector,znver4-load,znver4-fvector*2")
 
 (define_insn_reservation "znver5_sse_add1_load" 9
 			 (and (eq_attr "cpu" "znver5")
 			      (and (eq_attr "type" "sseadd1")
-				   (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF")
+				   (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,DF,SF")
 				    (eq_attr "memory" "load"))))
 			 "znver4-vector,znver5-load,znver4-fvector*2")
 
@@ -1091,28 +1135,28 @@
 (define_insn_reservation "znver4_sse_mul" 3
 			 (and (eq_attr "cpu" "znver4,znver5")
 			      (and (eq_attr "type" "ssemul")
-				   (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF")
+				   (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,DF,SF")
 				    (eq_attr "memory" "none"))))
 			 "znver4-direct,znver4-fpu0|znver4-fpu1")
 
 (define_insn_reservation "znver4_sse_mul_load" 8
 			 (and (eq_attr "cpu" "znver4")
 			      (and (eq_attr "type" "ssemul")
-				   (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF")
+				   (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,DF,SF")
 				    (eq_attr "memory" "load"))))
 			 "znver4-direct,znver4-load,znver4-fpu0|znver4-fpu1")
 
 (define_insn_reservation "znver5_sse_mul_load" 8
 			 (and (eq_attr "cpu" "znver5")
 			      (and (eq_attr "type" "ssemul")
-				   (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF")
+				   (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,DF,SF")
 				    (eq_attr "memory" "load"))))
 			 "znver4-direct,znver5-load,znver4-fpu0|znver4-fpu1")
 
 (define_insn_reservation "znver4_sse_div_pd" 13
 			 (and (eq_attr "cpu" "znver4,znver5")
 			      (and (eq_attr "type" "ssediv")
-				   (and (eq_attr "mode" "V4DF,V2DF,V1DF")
+				   (and (eq_attr "mode" "V4DF,V2DF,V1DF,DF")
 				    (eq_attr "memory" "none"))))
 			 "znver4-direct,znver4-fdiv*5")
 
@@ -1126,14 +1170,14 @@
 (define_insn_reservation "znver4_sse_div_pd_load" 18
 			 (and (eq_attr "cpu" "znver4")
 			      (and (eq_attr "type" "ssediv")
-				   (and (eq_attr "mode" "V4DF,V2DF,V1DF")
+				   (and (eq_attr "mode" "V4DF,V2DF,V1DF,DF")
 				    (eq_attr "memory" "load"))))
 			 "znver4-direct,znver4-load,znver4-fdiv*5")
 
 (define_insn_reservation "znver5_sse_div_pd_load" 18
 			 (and (eq_attr "cpu" "znver5")
 			      (and (eq_attr "type" "ssediv")
-				   (and (eq_attr "mode" "V4DF,V2DF,V1DF")
+				   (and (eq_attr "mode" "V4DF,V2DF,V1DF,DF")
 				    (eq_attr "memory" "load"))))
 			 "znver4-direct,znver5-load,znver4-fdiv*5")
 
@@ -1199,41 +1243,48 @@
 (define_insn_reservation "znver4_sse_cvt" 3
 			 (and (eq_attr "cpu" "znver4,znver5")
 			      (and (eq_attr "type" "ssecvt")
-				   (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF")
+				   (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,DF,SF")
 				    (eq_attr "memory" "none"))))
 			 "znver4-direct,znver4-fpu2|znver4-fpu3")
 
 (define_insn_reservation "znver4_sse_cvt_load" 8
 			 (and (eq_attr "cpu" "znver4")
 			      (and (eq_attr "type" "ssecvt")
-				   (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF")
+				   (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,DF,SF")
 				    (eq_attr "memory" "load"))))
 			 "znver4-direct,znver4-load,znver4-fpu2|znver4-fpu3")
 
 (define_insn_reservation "znver5_sse_cvt_load" 8
 			 (and (eq_attr "cpu" "znver5")
 			      (and (eq_attr "type" "ssecvt")
-				   (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF")
+				   (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,DF,SF")
 				    (eq_attr "memory" "load"))))
 			 "znver4-direct,znver5-load,znver4-fpu2|znver4-fpu3")
 
 (define_insn_reservation "znver4_sse_icvt" 3
 			 (and (eq_attr "cpu" "znver4,znver5")
-			      (and (eq_attr "type" "ssecvt")
+			      (and (eq_attr "type" "sseicvt")
 				   (and (eq_attr "mode" "SI")
 				    (eq_attr "memory" "none"))))
 			 "znver4-direct,znver4-fpu2|znver4-fpu3")
 
+(define_insn_reservation "znver4_sse_icvt2" 3
+			 (and (eq_attr "cpu" "znver4,znver5")
+			      (and (eq_attr "type" "sseicvt2")
+				   (and (eq_attr "mode" "DF")
+				    (eq_attr "memory" "none"))))
+			 "znver4-direct,znver4-fpu2|znver4-fpu3")
+
 (define_insn_reservation "znver4_sse_icvt_store" 4
 			 (and (eq_attr "cpu" "znver4")
-			      (and (eq_attr "type" "ssecvt")
+			      (and (eq_attr "type" "sseicvt")
 				   (and (eq_attr "mode" "SI")
 				    (eq_attr "memory" "store"))))
 			 "znver4-double,znver4-fpu2|znver4-fpu3,znver4-fp-store")
 
 (define_insn_reservation "znver5_sse_icvt_store" 4
 			 (and (eq_attr "cpu" "znver5")
-			      (and (eq_attr "type" "ssecvt")
+			      (and (eq_attr "type" "sseicvt")
 				   (and (eq_attr "mode" "SI")
 				    (eq_attr "memory" "store"))))
 			 "znver4-double,znver4-fpu2|znver4-fpu3,znver5-fp-store256")
@@ -1241,28 +1292,28 @@
 (define_insn_reservation "znver4_sse_shuf" 1
 			 (and (eq_attr "cpu" "znver4")
 			      (and (eq_attr "type" "sseshuf")
-				   (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF")
+				   (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,DF,SF")
 				    (eq_attr "memory" "none"))))
 			 "znver4-direct,znver4-fpu1|znver4-fpu2")
 
 (define_insn_reservation "znver5_sse_shuf" 1
 			 (and (eq_attr "cpu" "znver5")
 			      (and (eq_attr "type" "sseshuf")
-				   (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF")
+				   (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,DF,SF")
 				    (eq_attr "memory" "none"))))
 			 "znver4-direct,znver4-fpu1|znver4-fpu2|znver4-fpu3")
 
 (define_insn_reservation "znver4_sse_shuf_load" 6
 			 (and (eq_attr "cpu" "znver4")
 			      (and (eq_attr "type" "sseshuf")
-				   (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF")
+				   (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,DF,SF")
 				    (eq_attr "memory" "load"))))
 			 "znver4-direct,znver4-load,znver4-fpu")
 
 (define_insn_reservation "znver5_sse_shuf_load" 6
 			 (and (eq_attr "cpu" "znver5")
 			      (and (eq_attr "type" "sseshuf")
-				   (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,SF")
+				   (and (eq_attr "mode" "V8SF,V4DF,V4SF,V2DF,V2SF,V1DF,DF,SF")
 				    (eq_attr "memory" "load"))))
 			 "znver4-direct,znver5-load,znver4-fpu")
 
@@ -1316,6 +1367,20 @@
 				    (eq_attr "memory" "load"))))
 			 "znver4-direct,znver5-load,znver4-fpu0|znver4-fpu1|znver4-fpu2|znver4-fpu3")
 
+(define_insn_reservation "znver4_sse_log_evex_store" 1
+			 (and (eq_attr "cpu" "znver4")
+			      (and (eq_attr "type" "sselog")
+				   (and (eq_attr "mode" "V16SF,V8DF,XI")
+				    (eq_attr "memory" "store"))))
+			 "znver4-direct,znver4-store,znver4-fpu0*2|znver4-fpu1*2|znver4-fpu2*2|znver4-fpu3*2")
+
+(define_insn_reservation "znver5_sse_log_evex_store" 1
+			 (and (eq_attr "cpu" "znver5")
+			      (and (eq_attr "type" "sselog")
+				   (and (eq_attr "mode" "V16SF,V8DF,XI")
+				    (eq_attr "memory" "store"))))
+			 "znver4-direct,znver5-store,znver4-fpu0|znver4-fpu1|znver4-fpu2|znver4-fpu3")
+
 (define_insn_reservation "znver4_sse_log1_evex" 1
 			 (and (eq_attr "cpu" "znver4")
 			      (and (eq_attr "type" "sselog1")
@@ -1557,7 +1622,7 @@
 (define_insn_reservation "znver4_sse_cmp_avx128" 3
 			 (and (eq_attr "cpu" "znver4")
 			      (and (eq_attr "type" "ssecmp")
-				   (and (eq_attr "mode" "V4SF,V2DF,V2SF,V1DF,SF")
+				   (and (eq_attr "mode" "V4SF,V2DF,V2SF,V1DF,DF,SF,TI")
 				    (and (eq_attr "prefix" "evex")
 					 (eq_attr "memory" "none")))))
 			 "znver4-direct,znver4-fpu0*2|znver4-fpu1*2")
@@ -1565,7 +1630,7 @@
 (define_insn_reservation "znver5_sse_cmp_avx128" 3
 			 (and (eq_attr "cpu" "znver5")
 			      (and (eq_attr "type" "ssecmp")
-				   (and (eq_attr "mode" "V4SF,V2DF,V2SF,V1DF,SF")
+				   (and (eq_attr "mode" "V4SF,V2DF,V2SF,V1DF,DF,SF,TI")
 				    (and (eq_attr "prefix" "evex")
 					 (eq_attr "memory" "none")))))
 			 "znver4-direct,znver4-fpu1|znver4-fpu2")
@@ -1573,7 +1638,7 @@
 (define_insn_reservation "znver4_sse_cmp_avx128_load" 9
 			 (and (eq_attr "cpu" "znver4")
 			      (and (eq_attr "type" "ssecmp")
-				   (and (eq_attr "mode" "V4SF,V2DF,V2SF,V1DF,SF")
+				   (and (eq_attr "mode" "V4SF,V2DF,V2SF,V1DF,DF,SF")
 				    (and (eq_attr "prefix" "evex")
 					 (eq_attr "memory" "load")))))
 			 "znver4-direct,znver4-load,znver4-fpu0*2|znver4-fpu1*2")
@@ -1581,7 +1646,7 @@
 (define_insn_reservation "znver5_sse_cmp_avx128_load" 9
 			 (and (eq_attr "cpu" "znver5")
 			      (and (eq_attr "type" "ssecmp")
-				   (and (eq_attr "mode" "V4SF,V2DF,V2SF,V1DF,SF")
+				   (and (eq_attr "mode" "V4SF,V2DF,V2SF,V1DF,DF,SF")
 				    (and (eq_attr "prefix" "evex")
 					 (eq_attr "memory" "load")))))
 			 "znver4-direct,znver5-load,znver4-fpu1|znver4-fpu2")
@@ -1589,7 +1654,7 @@
 (define_insn_reservation "znver4_sse_cmp_avx256" 4
 			 (and (eq_attr "cpu" "znver4")
 			      (and (eq_attr "type" "ssecmp")
-				   (and (eq_attr "mode" "V8SF,V4DF")
+				   (and (eq_attr "mode" "V8SF,V4DF,OI")
 				    (and (eq_attr "prefix" "evex")
 					 (eq_attr "memory" "none")))))
 			 "znver4-direct,znver4-fpu0*2|znver4-fpu1*2")
@@ -1597,7 +1662,7 @@
 (define_insn_reservation "znver5_sse_cmp_avx256" 4
 			 (and (eq_attr "cpu" "znver5")
 			      (and (eq_attr "type" "ssecmp")
-				   (and (eq_attr "mode" "V8SF,V4DF")
+				   (and (eq_attr "mode" "V8SF,V4DF,OI")
 				    (and (eq_attr "prefix" "evex")
 					 (eq_attr "memory" "none")))))
 			 "znver4-direct,znver4-fpu1|znver4-fpu2")
@@ -1621,7 +1686,7 @@
 (define_insn_reservation "znver4_sse_cmp_avx512" 5
 			 (and (eq_attr "cpu" "znver4")
 			      (and (eq_attr "type" "ssecmp")
-				   (and (eq_attr "mode" "V16SF,V8DF")
+				   (and (eq_attr "mode" "V16SF,V8DF,XI")
 				    (and (eq_attr "prefix" "evex")
 					 (eq_attr "memory" "none")))))
 			 "znver4-direct,znver4-fpu0*2|znver4-fpu1*2")
@@ -1629,7 +1694,7 @@
 (define_insn_reservation "znver5_sse_cmp_avx512" 5
 			 (and (eq_attr "cpu" "znver5")
 			      (and (eq_attr "type" "ssecmp")
-				   (and (eq_attr "mode" "V16SF,V8DF")
+				   (and (eq_attr "mode" "V16SF,V8DF,XI")
 				    (and (eq_attr "prefix" "evex")
 					 (eq_attr "memory" "none")))))
 			 "znver4-direct,znver4-fpu1|znver4-fpu2")
@@ -1748,13 +1813,13 @@
 
 (define_insn_reservation "znver4_sse_muladd_load" 10
 			 (and (eq_attr "cpu" "znver4")
-			      (and (eq_attr "type" "sseshuf")
+			      (and (eq_attr "type" "ssemuladd")
 				   (eq_attr "memory" "load")))
 			 "znver4-direct,znver4-load,znver4-fpu0*2|znver4-fpu1*2")
 
 (define_insn_reservation "znver5_sse_muladd_load" 10
 			 (and (eq_attr "cpu" "znver5")
-			      (and (eq_attr "type" "sseshuf")
+			      (and (eq_attr "type" "ssemuladd")
 				   (eq_attr "memory" "load")))
 			 "znver4-direct,znver5-load,znver4-fpu1|znver4-fpu2")
 
diff --git a/gcc/config/i386/znver.md b/gcc/config/i386/znver.md
index 4e88263..6b6d9bb 100644
--- a/gcc/config/i386/znver.md
+++ b/gcc/config/i386/znver.md
@@ -1,4 +1,4 @@
-;; Copyright (C) 2012-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2012-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/ia64/constraints.md b/gcc/config/ia64/constraints.md
index 95d69eb..857e5bc 100644
--- a/gcc/config/ia64/constraints.md
+++ b/gcc/config/ia64/constraints.md
@@ -1,5 +1,5 @@
 ;; Constraint definitions for IA-64
-;; Copyright (C) 2006-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2006-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/ia64/div.md b/gcc/config/ia64/div.md
index d4e101a..dfbea70 100644
--- a/gcc/config/ia64/div.md
+++ b/gcc/config/ia64/div.md
@@ -1,4 +1,4 @@
-;; Copyright (C) 2007-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2007-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/ia64/elf.h b/gcc/config/ia64/elf.h
index d981522..1d76626 100644
--- a/gcc/config/ia64/elf.h
+++ b/gcc/config/ia64/elf.h
@@ -1,6 +1,6 @@
 /* Definitions for embedded ia64-elf target.
 
-Copyright (C) 2000-2024 Free Software Foundation, Inc.
+Copyright (C) 2000-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/ia64/freebsd.h b/gcc/config/ia64/freebsd.h
index c1eb8d5..17649a2 100644
--- a/gcc/config/ia64/freebsd.h
+++ b/gcc/config/ia64/freebsd.h
@@ -1,5 +1,5 @@
 /* Definitions for Intel IA-64 running FreeBSD using the ELF format
-   Copyright (C) 2001-2024 Free Software Foundation, Inc.
+   Copyright (C) 2001-2025 Free Software Foundation, Inc.
    Contributed by David E. O'Brien <obrien@FreeBSD.org> and BSDi.
 
 This file is part of GCC.
@@ -36,7 +36,7 @@ along with GCC; see the file COPYING3.  If not see
 
 /************************[  Target stuff  ]***********************************/
 
-/* Define the actual types of some ANSI-mandated types.  
+/* Define the actual types of some ANSI-mandated types.
    Needs to agree with <machine/ansi.h>.  GCC defaults come from c-decl.cc,
    c-common.cc, and config/<arch>/<arch>.h.  */
 
diff --git a/gcc/config/ia64/hpux.h b/gcc/config/ia64/hpux.h
index ae990fb..13c7900 100644
--- a/gcc/config/ia64/hpux.h
+++ b/gcc/config/ia64/hpux.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine GNU compiler.  IA-64 version.
-   Copyright (C) 1999-2024 Free Software Foundation, Inc.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
    Contributed by Steve Ellcey <sje@cup.hp.com> and
                   Reva Cuthbertson <reva@cup.hp.com>
 
diff --git a/gcc/config/ia64/ia64-c.cc b/gcc/config/ia64/ia64-c.cc
index 7a4a818..d7f7ed9 100644
--- a/gcc/config/ia64/ia64-c.cc
+++ b/gcc/config/ia64/ia64-c.cc
@@ -1,5 +1,5 @@
 /* Definitions of C specific functions for GNU compiler.
-   Copyright (C) 2002-2024 Free Software Foundation, Inc.
+   Copyright (C) 2002-2025 Free Software Foundation, Inc.
    Contributed by Steve Ellcey <sje@cup.hp.com>
 
 This file is part of GCC.
diff --git a/gcc/config/ia64/ia64-modes.def b/gcc/config/ia64/ia64-modes.def
index 07a6cc7..d2e4e9f54 100644
--- a/gcc/config/ia64/ia64-modes.def
+++ b/gcc/config/ia64/ia64-modes.def
@@ -1,5 +1,5 @@
 /* Definitions of target machine GNU compiler.  IA-64 version.
-   Copyright (C) 2002-2024 Free Software Foundation, Inc.
+   Copyright (C) 2002-2025 Free Software Foundation, Inc.
    Contributed by James E. Wilson <wilson@cygnus.com> and
    		  David Mosberger <davidm@hpl.hp.com>.
 
diff --git a/gcc/config/ia64/ia64-opts.h b/gcc/config/ia64/ia64-opts.h
index 4b020c3..dc20915 100644
--- a/gcc/config/ia64/ia64-opts.h
+++ b/gcc/config/ia64/ia64-opts.h
@@ -1,5 +1,5 @@
 /* Definitions for option handling for IA-64.
-   Copyright (C) 1999-2024 Free Software Foundation, Inc.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/ia64/ia64-protos.h b/gcc/config/ia64/ia64-protos.h
index f3ca6b6..c54a059 100644
--- a/gcc/config/ia64/ia64-protos.h
+++ b/gcc/config/ia64/ia64-protos.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler for IA-64.
-   Copyright (C) 1999-2024 Free Software Foundation, Inc.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/ia64/ia64.cc b/gcc/config/ia64/ia64.cc
index cd6ed89..8dab927 100644
--- a/gcc/config/ia64/ia64.cc
+++ b/gcc/config/ia64/ia64.cc
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler.
-   Copyright (C) 1999-2024 Free Software Foundation, Inc.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
    Contributed by James E. Wilson <wilson@cygnus.com> and
 		  David Mosberger <davidm@hpl.hp.com>.
 
@@ -352,7 +352,7 @@ struct expand_vec_perm_d
   machine_mode vmode;
   unsigned char nelt;
   bool one_operand_p;
-  bool testing_p; 
+  bool testing_p;
 };
 
 static bool ia64_expand_vec_perm_const_1 (struct expand_vec_perm_d *d);
@@ -619,9 +619,6 @@ static const scoped_attribute_specs *const ia64_attribute_table[] =
 #undef TARGET_LEGITIMATE_ADDRESS_P
 #define TARGET_LEGITIMATE_ADDRESS_P ia64_legitimate_address_p
 
-#undef TARGET_LRA_P
-#define TARGET_LRA_P hook_bool_void_false
-
 #undef TARGET_CANNOT_FORCE_CONST_MEM
 #define TARGET_CANNOT_FORCE_CONST_MEM ia64_cannot_force_const_mem
 
@@ -689,6 +686,9 @@ static const scoped_attribute_specs *const ia64_attribute_table[] =
 #undef TARGET_CONSTANT_ALIGNMENT
 #define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings
 
+#undef TARGET_DOCUMENTATION_NAME
+#define TARGET_DOCUMENTATION_NAME "IA-64"
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 /* Returns TRUE iff the target attribute indicated by ATTR_ID takes a plain
@@ -818,7 +818,7 @@ ia64_vms_common_object_attribute (tree *node, tree name, tree args,
     tree id;
 
     gcc_assert (DECL_P (decl));
-  
+
     DECL_COMMON (decl) = 1;
     id = TREE_VALUE (args);
     if (TREE_CODE (id) != IDENTIFIER_NODE && TREE_CODE (id) != STRING_CST)
@@ -1045,7 +1045,7 @@ ia64_legitimate_address_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x,
     return true;
   else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == POST_DEC)
 	   && ia64_legitimate_address_reg (XEXP (x, 0), strict)
-	   && XEXP (x, 0) != arg_pointer_rtx) 
+	   && XEXP (x, 0) != arg_pointer_rtx)
     return true;
   else if (GET_CODE (x) == POST_MODIFY
 	   && ia64_legitimate_address_reg (XEXP (x, 0), strict)
@@ -1241,8 +1241,7 @@ ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
 					 LCT_CONST, Pmode,
 					 tga_op1, Pmode, tga_op2, Pmode);
 
-      insns = get_insns ();
-      end_sequence ();
+      insns = end_sequence ();
 
       if (GET_MODE (op0) != Pmode)
 	op0 = tga_ret;
@@ -1265,8 +1264,7 @@ ia64_expand_tls_address (enum tls_model tls_kind, rtx op0, rtx op1,
 					 LCT_CONST, Pmode,
 					 tga_op1, Pmode, tga_op2, Pmode);
 
-      insns = get_insns ();
-      end_sequence ();
+      insns = end_sequence ();
 
       tga_eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
 				UNSPEC_LD_BASE);
@@ -1333,7 +1331,7 @@ ia64_expand_move (rtx op0, rtx op1)
 {
   machine_mode mode = GET_MODE (op0);
 
-  if (!reload_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
+  if (!lra_in_progress && !reload_completed && !ia64_move_ok (op0, op1))
     op1 = force_reg (mode, op1);
 
   if ((mode == Pmode || mode == ptr_mode) && symbolic_operand (op1, VOIDmode))
@@ -1359,7 +1357,7 @@ ia64_expand_move (rtx op0, rtx op1)
       else if (aligned_offset_symbol_operand (sym, mode))
 	{
 	  HOST_WIDE_INT addend_lo, addend_hi;
-	      
+
 	  addend_lo = ((addend & 0x3fff) ^ 0x2000) - 0x2000;
 	  addend_hi = addend - addend_lo;
 
@@ -1444,7 +1442,7 @@ ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
     case CONST_DOUBLE:
       /* Cannot occur reversed.  */
       gcc_assert (!reversed);
-      
+
       if (GET_MODE (in) != TFmode)
 	split_double (in, &out[0], &out[1]);
       else
@@ -1499,7 +1497,7 @@ ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
 
 	  case POST_INC:
 	    gcc_assert (!reversed && !dead);
-	    
+
 	    /* Just do the increment in two steps.  */
 	    out[0] = adjust_automodify_address (in, DImode, 0, 0);
 	    out[1] = adjust_automodify_address (in, DImode, 0, 8);
@@ -1507,7 +1505,7 @@ ia64_split_tmode (rtx out[2], rtx in, bool reversed, bool dead)
 
 	  case POST_DEC:
 	    gcc_assert (!reversed && !dead);
-	    
+
 	    /* Add 8, subtract 24.  */
 	    base = XEXP (base, 0);
 	    out[0] = adjust_automodify_address
@@ -1595,7 +1593,7 @@ ia64_split_tmode_move (rtx operands[])
      the appropriate order so that the pointer is not destroyed too
      early.  Also we must not generate a postmodify for that second
      load, or rws_access_regno will die.  And we must not generate a
-     postmodify for the second load if the destination register 
+     postmodify for the second load if the destination register
      overlaps with the base register.  */
   if (GET_CODE (operands[1]) == MEM
       && reg_overlap_mentioned_p (operands[0], operands[1]))
@@ -1780,7 +1778,7 @@ ia64_expand_movxf_movrf (machine_mode mode, rtx operands[])
 	}
     }
 
-  if (!reload_in_progress && !reload_completed)
+  if (!lra_in_progress && !reload_completed)
     {
       operands[1] = spill_xfmode_rfmode_operand (operands[1], 0, mode);
 
@@ -1841,7 +1839,7 @@ ia64_expand_compare (rtx *expr, rtx *op0, rtx *op1)
       int magic;
       enum rtx_code ncode;
       rtx ret;
-      
+
       gcc_assert (cmptf_libfunc && GET_MODE (*op1) == TFmode);
       switch (code)
 	{
@@ -1879,8 +1877,7 @@ ia64_expand_compare (rtx *expr, rtx *op0, rtx *op1)
       emit_insn (gen_rtx_SET (cmp, gen_rtx_fmt_ee (ncode, BImode,
 						   ret, const0_rtx)));
 
-      rtx_insn *insns = get_insns ();
-      end_sequence ();
+      rtx_insn *insns = end_sequence ();
 
       emit_libcall_block (insns, cmp, cmp,
 			  gen_rtx_fmt_ee (code, BImode, *op0, *op1));
@@ -2865,7 +2862,7 @@ ia64_compute_frame_size (HOST_WIDE_INT size)
       if (df_regs_ever_live_p (AR_PFS_REGNUM))
 	{
 	  SET_HARD_REG_BIT (mask, AR_PFS_REGNUM);
- 	  current_frame_info.r[reg_save_ar_pfs] 
+ 	  current_frame_info.r[reg_save_ar_pfs]
             = find_gr_spill (reg_save_ar_pfs, 1);
 	  if (current_frame_info.r[reg_save_ar_pfs] == 0)
 	    {
@@ -2880,8 +2877,8 @@ ia64_compute_frame_size (HOST_WIDE_INT size)
      it is absolutely critical that FP get the only hard register that's
      guaranteed to be free, so we allocated it first.  If all three did
      happen to be allocated hard regs, and are consecutive, rearrange them
-     into the preferred order now.  
-     
+     into the preferred order now.
+
      If we have already emitted code for any of those registers,
      then it's already too late to change.  */
   min_regno = MIN (current_frame_info.r[reg_fp],
@@ -2935,7 +2932,7 @@ ia64_compute_frame_size (HOST_WIDE_INT size)
     {
       df_set_regs_ever_live (AR_UNAT_REGNUM, true);
       SET_HARD_REG_BIT (mask, AR_UNAT_REGNUM);
-      current_frame_info.r[reg_save_ar_unat] 
+      current_frame_info.r[reg_save_ar_unat]
         = find_gr_spill (reg_save_ar_unat, spill_size == 0);
       if (current_frame_info.r[reg_save_ar_unat] == 0)
 	{
@@ -2947,7 +2944,7 @@ ia64_compute_frame_size (HOST_WIDE_INT size)
   if (df_regs_ever_live_p (AR_LC_REGNUM))
     {
       SET_HARD_REG_BIT (mask, AR_LC_REGNUM);
-      current_frame_info.r[reg_save_ar_lc] 
+      current_frame_info.r[reg_save_ar_lc]
         = find_gr_spill (reg_save_ar_lc, spill_size == 0);
       if (current_frame_info.r[reg_save_ar_lc] == 0)
 	{
@@ -3174,8 +3171,7 @@ spill_restore_mem (rtx reg, HOST_WIDE_INT cfa_off)
 				 spill_fill_data.init_reg[iter],
 				 disp_rtx));
 
-	  seq = get_insns ();
-	  end_sequence ();
+	  seq = end_sequence ();
 	}
 
       /* Careful for being the first insn in a sequence.  */
@@ -3534,7 +3530,7 @@ ia64_expand_prologue (void)
 	ia64_emit_probe_stack_range (get_stack_check_protect (), size, bs_size);
     }
 
-  if (dump_file) 
+  if (dump_file)
     {
       fprintf (dump_file, "ia64 frame related registers "
                "recorded in current_frame_info.r[]:\n");
@@ -4148,7 +4144,7 @@ ia64_expand_epilogue (int sibcall_p)
 	 names of r2 and HARD_FRAME_POINTER_REGNUM, so we have to make
 	 sure we're using the string "r2" when emitting the register
 	 name for the assembler.  */
-      if (current_frame_info.r[reg_fp] 
+      if (current_frame_info.r[reg_fp]
           && current_frame_info.r[reg_fp] == GR_REG (2))
 	fp = HARD_FRAME_POINTER_REGNUM;
 
@@ -4275,7 +4271,7 @@ ia64_hard_regno_rename_ok (int from, int to)
   unsigned int r;
 
   for (r = reg_fp; r <= reg_save_ar_lc; r++)
-    if (to == current_frame_info.r[r] 
+    if (to == current_frame_info.r[r]
         || from == current_frame_info.r[r]
         || to == emitted_frame_related_regs[r]
         || from == emitted_frame_related_regs[r])
@@ -4884,7 +4880,7 @@ ia64_function_arg_1 (cumulative_args_t cum_v, const function_arg_info &arg,
 	}
       return gen_rtx_PARALLEL (arg.mode, gen_rtvec_v (i, loc));
     }
-  
+
   /* Integral and aggregates go in general registers.  If we have run out of
      FR registers, then FP values must also go in general registers.  This can
      happen when we have a SFmode HFA.  */
@@ -5234,7 +5230,7 @@ ia64_function_value (const_tree valtype,
   if (fn_decl_or_type
       && !DECL_P (fn_decl_or_type))
     func = NULL;
-  
+
   mode = TYPE_MODE (valtype);
   hfa_mode = hfa_element_mode (valtype, 0);
 
@@ -5880,7 +5876,7 @@ ia64_preferred_reload_class (rtx x, reg_class_t rclass)
 	 of the f/f case when reloading (set (reg fX) (mem/v)).  */
       if (MEM_P (x) && MEM_VOLATILE_P (x))
 	return NO_REGS;
-      
+
       /* Force all unrecognized constants into the constant pool.  */
       if (CONSTANT_P (x))
 	return NO_REGS;
@@ -6490,7 +6486,7 @@ update_set_flags (rtx x, struct reg_flags *pflags)
 	 doloop_end_internal,
 	 (3) The destination is an fp register, in which case this is
 	 an fselect instruction.
-	 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case 
+	 (4) The condition has (unspec [(reg)] UNSPEC_LDC), in which case
 	 this is a check load.
 	 In all cases, nothing we do in this function applies.  */
       return;
@@ -6542,12 +6538,12 @@ set_src_needs_barrier (rtx x, struct reg_flags flags, int pred)
     }
 
   if (ia64_spec_check_src_p (src))
-    /* Avoid checking one register twice (in condition 
+    /* Avoid checking one register twice (in condition
        and in 'then' section) for ldc pattern.  */
     {
       gcc_assert (REG_P (XEXP (src, 2)));
       need_barrier = rtx_needs_barrier (XEXP (src, 2), flags, pred);
-		  
+
       /* We process MEM below.  */
       src = XEXP (src, 1);
     }
@@ -6905,6 +6901,7 @@ rtx_needs_barrier (rtx x, struct reg_flags flags, int pred)
 	  case '0':	/* unused field */
 	  case 'i':	/* integer */
 	  case 'n':	/* note */
+	  case 'L':     /* location_t */
 	  case 'w':	/* wide integer */
 	  case 's':	/* pointer to string */
 	  case 'S':	/* optional pointer to string */
@@ -7438,7 +7435,7 @@ static void
 ia64_sched_init_global (FILE *dump ATTRIBUTE_UNUSED,
                         int sched_verbose ATTRIBUTE_UNUSED,
                         int max_ready ATTRIBUTE_UNUSED)
-{  
+{
   gcc_assert (pending_data_specs == 0);
 }
 
@@ -7643,7 +7640,7 @@ ia64_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
   if (reload_completed)
     {
       int needed = group_barrier_needed (insn);
-      
+
       gcc_assert (!needed);
       if (CALL_P (insn))
 	init_insn_group_barriers ();
@@ -7777,7 +7774,7 @@ ia64_dfa_new_cycle (FILE *dump, int verbose, rtx_insn *insn, int last_clock,
 static void
 ia64_h_i_d_extended (void)
 {
-  if (stops_p != NULL) 
+  if (stops_p != NULL)
     {
       int new_clocks_length = get_max_uid () * 3 / 2;
       stops_p = (char *) xrecalloc (stops_p, new_clocks_length, clocks_length, 1);
@@ -7864,7 +7861,7 @@ static void
 ia64_clear_sched_context (void *_sc)
 {
   ia64_sched_context_t sc = (ia64_sched_context_t) _sc;
-  
+
   free (sc->prev_cycle_state);
   sc->prev_cycle_state = NULL;
 }
@@ -8043,13 +8040,13 @@ ia64_set_sched_flags (spec_info_t spec_info)
 		  || (mflag_sched_ar_in_data_spec && reload_completed)))
 	    mask |= BE_IN_DATA;
 	}
-      
+
       if (mflag_sched_control_spec
           && (!sel_sched_p ()
 	      || reload_completed))
 	{
 	  mask |= BEGIN_CONTROL;
-	  
+
 	  if (!sel_sched_p () && mflag_sched_in_control_spec)
 	    mask |= BE_IN_CONTROL;
 	}
@@ -8062,9 +8059,9 @@ ia64_set_sched_flags (spec_info_t spec_info)
 
 	  if (mask & BE_IN_SPEC)
 	    *flags |= NEW_BBS;
-	  
+
 	  spec_info->flags = 0;
-      
+
 	  if ((mask & CONTROL_SPEC)
 	      && sel_sched_p () && mflag_sel_sched_dont_check_control_spec)
 	    spec_info->flags |= SEL_SCHED_SPEC_DONT_CHECK_CONTROL;
@@ -8073,7 +8070,7 @@ ia64_set_sched_flags (spec_info_t spec_info)
 	    spec_info->dump = sched_dump;
 	  else
 	    spec_info->dump = 0;
-	  
+
 	  if (mflag_sched_count_spec_in_critical_path)
 	    spec_info->flags |= COUNT_SPEC_IN_CRITICAL_PATH;
 	}
@@ -8320,10 +8317,10 @@ insn_can_be_in_speculative_p (rtx insn ATTRIBUTE_UNUSED,
    return 0.  */
 static int
 ia64_speculate_insn (rtx_insn *insn, ds_t ts, rtx *new_pat)
-{  
+{
   int mode_no;
   int res;
-  
+
   gcc_assert (!(ts & ~SPECULATIVE));
 
   if (ia64_spec_check_p (insn))
@@ -8510,12 +8507,12 @@ ia64_gen_spec_check (rtx_insn *insn, rtx_insn *label, ds_t ds)
       gcc_assert (!ia64_needs_block_p (ds));
       op1 = copy_rtx (recog_data.operand[1]);
     }
-      
+
   gen_check = get_spec_check_gen_function (ds, mode_no, label == NULL_RTX,
 					   true);
 
   check_pat = gen_check (copy_rtx (recog_data.operand[0]), op1);
-    
+
   pat = PATTERN (insn);
   if (GET_CODE (pat) == COND_EXEC)
     check_pat = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (COND_EXEC_TEST (pat)),
@@ -8547,14 +8544,14 @@ ia64_spec_check_src_p (rtx src)
       t = XEXP (src, 0);
       if (GET_CODE (t) == NE)
 	{
-	  t = XEXP (t, 0);	    
+	  t = XEXP (t, 0);
 
 	  if (GET_CODE (t) == UNSPEC)
 	    {
 	      int code;
-	      
+
 	      code = XINT (t, 1);
-	     
+
 	      if (code == UNSPEC_LDCCLR
 		  || code == UNSPEC_LDCNC
 		  || code == UNSPEC_CHKACLR
@@ -9266,7 +9263,7 @@ bundling (FILE *dump, int verbose, rtx_insn *prev_head_insn, rtx_insn *tail)
 	       INSN_UID (insn));
 	  }
     }
-  
+
   /* We should find a solution because the 2nd insn scheduling has
      found one.  */
   gcc_assert (index_to_bundle_states [insn_num]);
@@ -9646,7 +9643,7 @@ ia64_st_address_bypass_p (rtx_insn *producer, rtx_insn *consumer)
   if (GET_CODE (reg) == SUBREG)
     reg = SUBREG_REG (reg);
   gcc_assert (GET_CODE (reg) == REG);
-  
+
   dest = ia64_single_set (consumer);
   gcc_assert (dest);
   mem = SET_DEST (dest);
@@ -9670,12 +9667,12 @@ ia64_ld_address_bypass_p (rtx_insn *producer, rtx_insn *consumer)
   if (GET_CODE (reg) == SUBREG)
     reg = SUBREG_REG (reg);
   gcc_assert (GET_CODE (reg) == REG);
-  
+
   src = ia64_single_set (consumer);
   gcc_assert (src);
   mem = SET_SRC (src);
   gcc_assert (mem);
- 
+
   if (GET_CODE (mem) == UNSPEC && XVECLEN (mem, 0) > 0)
     mem = XVECEXP (mem, 0, 0);
   else if (GET_CODE (mem) == IF_THEN_ELSE)
@@ -9684,7 +9681,7 @@ ia64_ld_address_bypass_p (rtx_insn *producer, rtx_insn *consumer)
       gcc_assert (XINT (XEXP (XEXP (mem, 0), 0), 1) == UNSPEC_LDCCLR);
       mem = XEXP (mem, 1);
     }
-     
+
   while (GET_CODE (mem) == SUBREG || GET_CODE (mem) == ZERO_EXTEND)
     mem = XEXP (mem, 0);
 
@@ -9892,7 +9889,7 @@ ia64_reorg (void)
     emit_all_insn_group_barriers (dump_file);
 
   df_analyze ();
- 
+
   /* A call must not be the last instruction in a function, so that the
      return address is still within the function, so that unwinding works
      properly.  Note that IA-64 differs from dwarf2 on this point.  */
@@ -10092,9 +10089,9 @@ process_cfa_adjust_cfa (FILE *out_file, rtx pat, rtx insn,
 	{
 	  rtx op0 = XEXP (src, 0);
 	  rtx op1 = XEXP (src, 1);
-	  
+
 	  gcc_assert (op0 == dest && GET_CODE (op1) == CONST_INT);
-	  
+
 	  if (INTVAL (op1) < 0)
 	    {
 	      gcc_assert (!frame_pointer_needed);
@@ -10922,7 +10919,7 @@ ia64_struct_retval_addr_is_first_parm_p (tree fntype)
      these return values.  */
   return (abi_version_at_least (2)
 	  && ret_type
-	  && TYPE_MODE (ret_type) == BLKmode 
+	  && TYPE_MODE (ret_type) == BLKmode
 	  && TREE_ADDRESSABLE (ret_type)
 	  && lang_GNU_CXX ());
 }
@@ -11514,7 +11511,7 @@ expand_vec_perm_shrp (struct expand_vec_perm_d *d)
 
 static bool
 expand_vec_perm_1 (struct expand_vec_perm_d *d)
-{     
+{
   unsigned i, nelt = d->nelt;
   unsigned char perm2[MAX_VECT_LEN];
 
@@ -11551,8 +11548,8 @@ expand_vec_perm_1 (struct expand_vec_perm_d *d)
   if (expand_vec_perm_shrp (d))
     return true;
 
-  /* ??? Look for deposit-like permutations where most of the result 
-     comes from one vector unchanged and the rest comes from a 
+  /* ??? Look for deposit-like permutations where most of the result
+     comes from one vector unchanged and the rest comes from a
      sequential hunk of the other vector.  */
 
   return false;
@@ -11639,7 +11636,7 @@ expand_vec_perm_interleave_2 (struct expand_vec_perm_d *d)
   h1 = h0 << nelt2;
   h2 = h0 << nelt;
   h3 = h0 << (nelt + nelt2);
-  
+
   if ((contents & (h0 | h2)) == contents)	/* punpck even halves */
     {
       for (i = 0; i < nelt; ++i)
@@ -11710,8 +11707,7 @@ expand_vec_perm_interleave_2 (struct expand_vec_perm_d *d)
      this *will* succeed.  For V8QImode or V2SImode it may not.  */
   start_sequence ();
   ok = expand_vec_perm_1 (&dfinal);
-  seq = get_insns ();
-  end_sequence ();
+  seq = end_sequence ();
   if (!ok)
     return false;
   if (d->testing_p)
@@ -11904,7 +11900,7 @@ ia64_expand_vec_setv2sf (rtx operands[3])
   struct expand_vec_perm_d d;
   unsigned int which;
   bool ok;
-  
+
   d.target = operands[0];
   d.op0 = operands[0];
   d.op1 = gen_reg_rtx (V2SFmode);
diff --git a/gcc/config/ia64/ia64.h b/gcc/config/ia64/ia64.h
index 763b1c3..5763dae 100644
--- a/gcc/config/ia64/ia64.h
+++ b/gcc/config/ia64/ia64.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine GNU compiler.  IA-64 version.
-   Copyright (C) 1999-2024 Free Software Foundation, Inc.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
    Contributed by James E. Wilson <wilson@cygnus.com> and
    		  David Mosberger <davidm@hpl.hp.com>.
 
diff --git a/gcc/config/ia64/ia64.md b/gcc/config/ia64/ia64.md
index 698e302..5515e83 100644
--- a/gcc/config/ia64/ia64.md
+++ b/gcc/config/ia64/ia64.md
@@ -1,5 +1,5 @@
 ;; IA-64 Machine description template
-;; Copyright (C) 1999-2024 Free Software Foundation, Inc.
+;; Copyright (C) 1999-2025 Free Software Foundation, Inc.
 ;; Contributed by James E. Wilson <wilson@cygnus.com> and
 ;;		  David Mosberger <davidm@hpl.hp.com>.
 
@@ -2318,7 +2318,7 @@
 			  (match_operand:DI 3 "register_operand" "f"))
 		 (match_operand:DI 4 "nonmemory_operand" "rI")))
    (clobber (match_scratch:DI 5 "=f"))]
-  "reload_in_progress"
+  "lra_in_progress"
   "#"
   [(set_attr "itanium_class" "unknown")])
 
@@ -3407,7 +3407,7 @@
 				   (match_operand:DI 2 "shladd_operand" "n"))
 			  (match_operand:DI 3 "nonmemory_operand" "r"))
 		 (match_operand:DI 4 "nonmemory_operand" "rI")))]
-  "reload_in_progress"
+  "lra_in_progress"
   "* gcc_unreachable ();"
   "reload_completed"
   [(set (match_dup 0) (plus:DI (mult:DI (match_dup 1) (match_dup 2))
@@ -5041,9 +5041,9 @@
   int i = (INTVAL (operands[1]));
   int j = (INTVAL (operands[2]));
 
-  gcc_assert (i == 0 || i == 1);
-  gcc_assert (j >= 0 && j <= 3);
-  return alt[i][j];
+  gcc_assert (IN_RANGE (i, 0, 2));
+  gcc_assert (IN_RANGE (j, 0, 3));
+  return alt[i & 1][j];
 }
   [(set_attr "itanium_class" "lfetch")])
 
diff --git a/gcc/config/ia64/ia64.opt b/gcc/config/ia64/ia64.opt
index b4549c8..1d34f10 100644
--- a/gcc/config/ia64/ia64.opt
+++ b/gcc/config/ia64/ia64.opt
@@ -1,4 +1,4 @@
-; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/ia64/itanium2.md b/gcc/config/ia64/itanium2.md
index be3d087..78081b01 100644
--- a/gcc/config/ia64/itanium2.md
+++ b/gcc/config/ia64/itanium2.md
@@ -1,5 +1,5 @@
 ;; Itanium2 DFA descriptions for insn scheduling and bundling.
-;; Copyright (C) 2002-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2002-2025 Free Software Foundation, Inc.
 ;; Contributed by Vladimir Makarov <vmakarov@redhat.com>.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/ia64/linux.h b/gcc/config/ia64/linux.h
index 3280193..a2494c2 100644
--- a/gcc/config/ia64/linux.h
+++ b/gcc/config/ia64/linux.h
@@ -1,6 +1,6 @@
 /* Definitions for ia64-linux target.
 
-Copyright (C) 2000-2024 Free Software Foundation, Inc.
+Copyright (C) 2000-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/ia64/predicates.md b/gcc/config/ia64/predicates.md
index 01a4eff..ba98ba5 100644
--- a/gcc/config/ia64/predicates.md
+++ b/gcc/config/ia64/predicates.md
@@ -1,5 +1,5 @@
 ;; Predicate definitions for IA-64.
-;; Copyright (C) 2004-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2004-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
@@ -347,7 +347,7 @@
 	   allows reload the opportunity to avoid spilling addresses to
 	   the stack, and instead simply substitute in the value from a
 	   REG_EQUIV.  We'll split this up again when splitting the insn.  */
-	if (reload_in_progress || reload_completed)
+	if (lra_in_progress || reload_completed)
 	  return true;
 
 	/* Some symbol types we allow to use with any offset.  */
diff --git a/gcc/config/ia64/sync.md b/gcc/config/ia64/sync.md
index 034a8cf..26e10fd 100644
--- a/gcc/config/ia64/sync.md
+++ b/gcc/config/ia64/sync.md
@@ -1,5 +1,5 @@
 ;; GCC machine description for IA-64 synchronization instructions.
-;; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/ia64/sysv4.h b/gcc/config/ia64/sysv4.h
index c64c7dd..95971eb 100644
--- a/gcc/config/ia64/sysv4.h
+++ b/gcc/config/ia64/sysv4.h
@@ -1,6 +1,6 @@
 /* Override definitions in elfos.h to be correct for IA64.
 
-Copyright (C) 2000-2024 Free Software Foundation, Inc.
+Copyright (C) 2000-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/ia64/t-hpux b/gcc/config/ia64/t-hpux
index 0b22636..ed4de49 100644
--- a/gcc/config/ia64/t-hpux
+++ b/gcc/config/ia64/t-hpux
@@ -1,4 +1,4 @@
-# Copyright (C) 2001-2024 Free Software Foundation, Inc.
+# Copyright (C) 2001-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/ia64/t-ia64 b/gcc/config/ia64/t-ia64
index d92c67d..4f3fdbf 100644
--- a/gcc/config/ia64/t-ia64
+++ b/gcc/config/ia64/t-ia64
@@ -1,4 +1,4 @@
-# Copyright (C) 2000-2024 Free Software Foundation, Inc.
+# Copyright (C) 2000-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/ia64/vect.md b/gcc/config/ia64/vect.md
index c6f95b4..f17c514 100644
--- a/gcc/config/ia64/vect.md
+++ b/gcc/config/ia64/vect.md
@@ -1,5 +1,5 @@
 ;; IA-64 machine description for vector operations.
-;; Copyright (C) 2004-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2004-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/ia64/vms.h b/gcc/config/ia64/vms.h
index e90f2c8..43ab193 100644
--- a/gcc/config/ia64/vms.h
+++ b/gcc/config/ia64/vms.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine GNU compiler. IA64-VMS version.
-   Copyright (C) 2003-2024 Free Software Foundation, Inc.
+   Copyright (C) 2003-2025 Free Software Foundation, Inc.
    Contributed by Douglas B Rupp (rupp@gnat.com).
 
 This file is part of GCC.
diff --git a/gcc/config/ia64/vms.opt b/gcc/config/ia64/vms.opt
index 3ac94fd..6da06db 100644
--- a/gcc/config/ia64/vms.opt
+++ b/gcc/config/ia64/vms.opt
@@ -1,6 +1,6 @@
 ; IA64 VMS options.
 
-; Copyright (C) 2011-2024 Free Software Foundation, Inc.
+; Copyright (C) 2011-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/initfini-array.h b/gcc/config/initfini-array.h
index f7c6ae8..221e2e2 100644
--- a/gcc/config/initfini-array.h
+++ b/gcc/config/initfini-array.h
@@ -1,6 +1,6 @@
 /* Definitions for ELF systems with .init_array/.fini_array section
    support.
-   Copyright (C) 2011-2024 Free Software Foundation, Inc.
+   Copyright (C) 2011-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/iq2000/abi b/gcc/config/iq2000/abi
index ee23365..222f4fd 100644
--- a/gcc/config/iq2000/abi
+++ b/gcc/config/iq2000/abi
@@ -232,7 +232,7 @@ caller passing as a "hidden" first argument a pointer to space allocated to
 receive the return value.
 
 
-Copyright (C) 2003-2024 Free Software Foundation, Inc.
+Copyright (C) 2003-2025 Free Software Foundation, Inc.
 
 Copying and distribution of this file, with or without modification,
 are permitted in any medium without royalty provided the copyright
diff --git a/gcc/config/iq2000/constraints.md b/gcc/config/iq2000/constraints.md
index c783493..aa91fb7 100644
--- a/gcc/config/iq2000/constraints.md
+++ b/gcc/config/iq2000/constraints.md
@@ -1,5 +1,5 @@
 ;; Constraints for Vitesse IQ2000 processors
-;; Copyright (C) 2011-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2011-2025 Free Software Foundation, Inc.
 
 ;; This file is part of GCC.
 
diff --git a/gcc/config/iq2000/iq2000-opts.h b/gcc/config/iq2000/iq2000-opts.h
index 2ad20b344..a7e01fa 100644
--- a/gcc/config/iq2000/iq2000-opts.h
+++ b/gcc/config/iq2000/iq2000-opts.h
@@ -1,5 +1,5 @@
 /* Definitions for option handling for Vitesse IQ2000 processors.
-   Copyright (C) 2003-2024 Free Software Foundation, Inc.
+   Copyright (C) 2003-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/iq2000/iq2000-protos.h b/gcc/config/iq2000/iq2000-protos.h
index ac4c7ea..4113da4 100644
--- a/gcc/config/iq2000/iq2000-protos.h
+++ b/gcc/config/iq2000/iq2000-protos.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler for iq2000.
-   Copyright (C) 2003-2024 Free Software Foundation, Inc.
+   Copyright (C) 2003-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/iq2000/iq2000.cc b/gcc/config/iq2000/iq2000.cc
index 136675d..b69ee8f 100644
--- a/gcc/config/iq2000/iq2000.cc
+++ b/gcc/config/iq2000/iq2000.cc
@@ -1,5 +1,5 @@
 /* Subroutines used for code generation on Vitesse IQ2000 processors
-   Copyright (C) 2003-2024 Free Software Foundation, Inc.
+   Copyright (C) 2003-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -1572,7 +1572,7 @@ final_prescan_insn (rtx_insn *insn, rtx opvec[] ATTRIBUTE_UNUSED,
       rtx_insn *nop_insn = emit_insn_after (gen_nop (), insn);
       INSN_ADDRESSES_NEW (nop_insn, -1);
     }
-  
+
   if (TARGET_STATS
       && (JUMP_P (insn) || CALL_P (insn)))
     dslots_jump_total ++;
@@ -1684,7 +1684,7 @@ compute_frame_size (HOST_WIDE_INT size)
   gp_reg_rounded = IQ2000_STACK_ALIGN (gp_reg_size);
   total_size += gp_reg_rounded + IQ2000_STACK_ALIGN (fp_reg_size);
 
-  /* The gp reg is caller saved, so there is no need for leaf routines 
+  /* The gp reg is caller saved, so there is no need for leaf routines
      (total_size == extra_size) to save the gp reg.  */
   if (total_size == extra_size
       && ! profile_flag)
@@ -1751,18 +1751,18 @@ iq2000_initial_elimination_offset (int from, int to ATTRIBUTE_UNUSED)
 {
   int offset;
 
-  compute_frame_size (get_frame_size ());				 
-  if ((from) == FRAME_POINTER_REGNUM) 
-    (offset) = 0; 
-  else if ((from) == ARG_POINTER_REGNUM) 
-    (offset) = (cfun->machine->total_size); 
-  else if ((from) == RETURN_ADDRESS_POINTER_REGNUM) 
+  compute_frame_size (get_frame_size ());
+  if ((from) == FRAME_POINTER_REGNUM)
+    (offset) = 0;
+  else if ((from) == ARG_POINTER_REGNUM)
+    (offset) = (cfun->machine->total_size);
+  else if ((from) == RETURN_ADDRESS_POINTER_REGNUM)
     {
-      if (leaf_function_p ()) 
-	(offset) = 0; 
-      else (offset) = cfun->machine->gp_sp_offset 
-	     + ((UNITS_PER_WORD - (POINTER_SIZE / BITS_PER_UNIT)) 
-		* (BYTES_BIG_ENDIAN != 0)); 
+      if (leaf_function_p ())
+	(offset) = 0;
+      else (offset) = cfun->machine->gp_sp_offset
+	     + ((UNITS_PER_WORD - (POINTER_SIZE / BITS_PER_UNIT))
+		* (BYTES_BIG_ENDIAN != 0));
     }
   else
     gcc_unreachable ();
@@ -1771,7 +1771,7 @@ iq2000_initial_elimination_offset (int from, int to ATTRIBUTE_UNUSED)
 }
 
 /* Common code to emit the insns (or to write the instructions to a file)
-   to save/restore registers.  
+   to save/restore registers.
    Other parts of the code assume that IQ2000_TEMP1_REGNUM (aka large_reg)
    is not modified within save_restore_insns.  */
 
@@ -1891,7 +1891,7 @@ save_restore_insns (int store_p)
 
 	  if (store_p)
 	    iq2000_emit_frame_related_store (mem_rtx, reg_rtx, gp_offset);
-	  else 
+	  else
 	    {
 	      emit_move_insn (reg_rtx, mem_rtx);
 	    }
@@ -2632,7 +2632,7 @@ expand_one_builtin (enum insn_code icode, rtx target, tree exp,
     default:
       gcc_unreachable ();
     }
-  
+
   if (! pat)
     return 0;
   emit_insn (pat);
@@ -2663,7 +2663,7 @@ iq2000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
     {
     default:
       break;
-      
+
     case IQ2000_BUILTIN_ADO16:
       return expand_one_builtin (CODE_FOR_ado16, target, exp, code, 2);
 
@@ -2672,10 +2672,10 @@ iq2000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
       code[2] = CONST_INT;
       code[3] = CONST_INT;
       return expand_one_builtin (CODE_FOR_ram, target, exp, code, 4);
-      
+
     case IQ2000_BUILTIN_CHKHDR:
       return expand_one_builtin (CODE_FOR_chkhdr, target, exp, code, 2);
-      
+
     case IQ2000_BUILTIN_PKRL:
       return expand_one_builtin (CODE_FOR_pkrl, target, exp, code, 2);
 
@@ -2822,7 +2822,7 @@ iq2000_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
     case IQ2000_BUILTIN_SYSCALL:
       return expand_one_builtin (CODE_FOR_syscall, target, exp, code, 0);
     }
-  
+
   return NULL_RTX;
 }
 
@@ -2843,39 +2843,39 @@ iq2000_setup_incoming_varargs (cumulative_args_t cum_v,
 			       int *pretend_size, int no_rtl)
 {
   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
-  unsigned int iq2000_off = ! cum->last_arg_fp; 
-  unsigned int iq2000_fp_off = cum->last_arg_fp; 
+  unsigned int iq2000_off = ! cum->last_arg_fp;
+  unsigned int iq2000_fp_off = cum->last_arg_fp;
 
   if ((cum->arg_words < MAX_ARGS_IN_REGISTERS - iq2000_off))
     {
-      int iq2000_save_gp_regs 
-	= MAX_ARGS_IN_REGISTERS - cum->arg_words - iq2000_off; 
-      int iq2000_save_fp_regs 
-        = (MAX_ARGS_IN_REGISTERS - cum->fp_arg_words - iq2000_fp_off); 
+      int iq2000_save_gp_regs
+	= MAX_ARGS_IN_REGISTERS - cum->arg_words - iq2000_off;
+      int iq2000_save_fp_regs
+        = (MAX_ARGS_IN_REGISTERS - cum->fp_arg_words - iq2000_fp_off);
 
-      if (iq2000_save_gp_regs < 0) 
-	iq2000_save_gp_regs = 0; 
-      if (iq2000_save_fp_regs < 0) 
-	iq2000_save_fp_regs = 0; 
+      if (iq2000_save_gp_regs < 0)
+	iq2000_save_gp_regs = 0;
+      if (iq2000_save_fp_regs < 0)
+	iq2000_save_fp_regs = 0;
 
-      *pretend_size = ((iq2000_save_gp_regs * UNITS_PER_WORD) 
-                      + (iq2000_save_fp_regs * UNITS_PER_FPREG)); 
+      *pretend_size = ((iq2000_save_gp_regs * UNITS_PER_WORD)
+                      + (iq2000_save_fp_regs * UNITS_PER_FPREG));
 
-      if (! (no_rtl)) 
+      if (! (no_rtl))
 	{
-	  if (cum->arg_words < MAX_ARGS_IN_REGISTERS - iq2000_off) 
+	  if (cum->arg_words < MAX_ARGS_IN_REGISTERS - iq2000_off)
 	    {
-	      rtx ptr, mem; 
+	      rtx ptr, mem;
 	      ptr = plus_constant (Pmode, virtual_incoming_args_rtx,
 				   - (iq2000_save_gp_regs
 				      * UNITS_PER_WORD));
-	      mem = gen_rtx_MEM (BLKmode, ptr); 
-	      move_block_from_reg 
-		(cum->arg_words + GP_ARG_FIRST + iq2000_off, 
-		 mem, 
+	      mem = gen_rtx_MEM (BLKmode, ptr);
+	      move_block_from_reg
+		(cum->arg_words + GP_ARG_FIRST + iq2000_off,
+		 mem,
 		 iq2000_save_gp_regs);
-	    } 
-	} 
+	    }
+	}
     }
 }
 
@@ -3297,7 +3297,7 @@ iq2000_rtx_costs (rtx x, machine_mode mode, int outer_code ATTRIBUTE_UNUSED,
 	* total = COSTS_N_INSNS (2 * num_words);
 	break;
       }
-      
+
     case FFS:
       * total = COSTS_N_INSNS (6);
       break;
@@ -3316,7 +3316,7 @@ iq2000_rtx_costs (rtx x, machine_mode mode, int outer_code ATTRIBUTE_UNUSED,
 	* total = COSTS_N_INSNS ((GET_CODE (XEXP (x, 1)) == CONST_INT) ? 4 : 12);
       else
 	* total = COSTS_N_INSNS (1);
-    break;								
+    break;
 
     case ABS:
       if (mode == SFmode || mode == DFmode)
@@ -3324,7 +3324,7 @@ iq2000_rtx_costs (rtx x, machine_mode mode, int outer_code ATTRIBUTE_UNUSED,
       else
 	* total = COSTS_N_INSNS (4);
       break;
-    
+
     case PLUS:
     case MINUS:
       if (mode == SFmode || mode == DFmode)
@@ -3334,7 +3334,7 @@ iq2000_rtx_costs (rtx x, machine_mode mode, int outer_code ATTRIBUTE_UNUSED,
       else
 	* total = COSTS_N_INSNS (1);
       break;
-    
+
     case NEG:
       * total = (mode == DImode) ? 4 : 1;
       break;
@@ -3357,16 +3357,16 @@ iq2000_rtx_costs (rtx x, machine_mode mode, int outer_code ATTRIBUTE_UNUSED,
       else
 	* total = COSTS_N_INSNS (69);
       break;
-      
+
     case UDIV:
     case UMOD:
       * total = COSTS_N_INSNS (69);
       break;
-      
+
     case SIGN_EXTEND:
       * total = COSTS_N_INSNS (2);
       break;
-    
+
     case ZERO_EXTEND:
       * total = COSTS_N_INSNS (1);
       break;
@@ -3374,7 +3374,7 @@ iq2000_rtx_costs (rtx x, machine_mode mode, int outer_code ATTRIBUTE_UNUSED,
     case CONST_INT:
       * total = 0;
       break;
-    
+
     case LABEL_REF:
       * total = COSTS_N_INSNS (2);
       break;
@@ -3399,19 +3399,19 @@ iq2000_rtx_costs (rtx x, machine_mode mode, int outer_code ATTRIBUTE_UNUSED,
     case SYMBOL_REF:
       * total = COSTS_N_INSNS (SYMBOL_REF_FLAG (x) ? 1 : 2);
       break;
-    
+
     case CONST_DOUBLE:
       {
 	rtx high, low;
-      
+
 	split_double (x, & high, & low);
-      
+
 	* total = COSTS_N_INSNS (  (high == CONST0_RTX (GET_MODE (high))
 				  || low == CONST0_RTX (GET_MODE (low)))
 				   ? 2 : 4);
 	break;
       }
-    
+
     default:
       return false;
     }
diff --git a/gcc/config/iq2000/iq2000.h b/gcc/config/iq2000/iq2000.h
index c3562be..e04156e 100644
--- a/gcc/config/iq2000/iq2000.h
+++ b/gcc/config/iq2000/iq2000.h
@@ -1,6 +1,6 @@
-/* Definitions of target machine for GNU compiler.  
+/* Definitions of target machine for GNU compiler.
    Vitesse IQ2000 processors
-   Copyright (C) 2003-2024 Free Software Foundation, Inc.
+   Copyright (C) 2003-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -60,7 +60,7 @@
 /* Storage Layout.  */
 
 #define BITS_BIG_ENDIAN 		0
-#define BYTES_BIG_ENDIAN 		1 
+#define BYTES_BIG_ENDIAN 		1
 #define WORDS_BIG_ENDIAN 		1
 #define BITS_PER_WORD 			32
 #define MAX_BITS_PER_WORD 		64
@@ -295,7 +295,7 @@ typedef struct iq2000_args
   init_cumulative_args (& CUM, FNTYPE, LIBNAME)				\
 
 #define FUNCTION_ARG_REGNO_P(N)						\
-  (((N) >= GP_ARG_FIRST && (N) <= GP_ARG_LAST))			
+  (((N) >= GP_ARG_FIRST && (N) <= GP_ARG_LAST))
 
 
 /* On the IQ2000, R2 and R3 are the only register thus used.  */
diff --git a/gcc/config/iq2000/iq2000.md b/gcc/config/iq2000/iq2000.md
index e62c250..b8d842f 100644
--- a/gcc/config/iq2000/iq2000.md
+++ b/gcc/config/iq2000/iq2000.md
@@ -1,5 +1,5 @@
 ;;  iq2000.md	     Machine Description for Vitesse IQ2000 processors
-;;  Copyright (C) 2003-2024 Free Software Foundation, Inc.
+;;  Copyright (C) 2003-2025 Free Software Foundation, Inc.
 
 ;; This file is part of GCC.
 
diff --git a/gcc/config/iq2000/iq2000.opt b/gcc/config/iq2000/iq2000.opt
index 86d1443..22f752a 100644
--- a/gcc/config/iq2000/iq2000.opt
+++ b/gcc/config/iq2000/iq2000.opt
@@ -1,6 +1,6 @@
 ; Options for the Vitesse IQ2000 port of the compiler.
 
-; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/iq2000/predicates.md b/gcc/config/iq2000/predicates.md
index 315256a..1eecb8b 100644
--- a/gcc/config/iq2000/predicates.md
+++ b/gcc/config/iq2000/predicates.md
@@ -1,5 +1,5 @@
 ;; Predicate definitions for Vitesse IQ2000.
-;; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/kfreebsd-d.cc b/gcc/config/kfreebsd-d.cc
index 7435e9d..8de59e7 100644
--- a/gcc/config/kfreebsd-d.cc
+++ b/gcc/config/kfreebsd-d.cc
@@ -1,5 +1,5 @@
 /* kFreeBSD-based GNU systems support needed only by D front-end.
-   Copyright (C) 2017-2024 Free Software Foundation, Inc.
+   Copyright (C) 2017-2025 Free Software Foundation, Inc.
 
 GCC is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
diff --git a/gcc/config/kfreebsd-gnu.h b/gcc/config/kfreebsd-gnu.h
index 91c0fe7..a4c91f0 100644
--- a/gcc/config/kfreebsd-gnu.h
+++ b/gcc/config/kfreebsd-gnu.h
@@ -1,5 +1,5 @@
 /* Definitions for kFreeBSD-based GNU systems with ELF format
-   Copyright (C) 2004-2024 Free Software Foundation, Inc.
+   Copyright (C) 2004-2025 Free Software Foundation, Inc.
    Contributed by Robert Millan.
 
 This file is part of GCC.
diff --git a/gcc/config/kopensolaris-d.cc b/gcc/config/kopensolaris-d.cc
index ffbc5dd..374b7b3 100644
--- a/gcc/config/kopensolaris-d.cc
+++ b/gcc/config/kopensolaris-d.cc
@@ -1,5 +1,5 @@
 /* kOpenSolaris-based GNU systems support needed only by D front-end.
-   Copyright (C) 2017-2024 Free Software Foundation, Inc.
+   Copyright (C) 2017-2025 Free Software Foundation, Inc.
 
 GCC is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
diff --git a/gcc/config/kopensolaris-gnu.h b/gcc/config/kopensolaris-gnu.h
index e7f6198..c44d30a 100644
--- a/gcc/config/kopensolaris-gnu.h
+++ b/gcc/config/kopensolaris-gnu.h
@@ -1,5 +1,5 @@
 /* Definitions for kOpenSolaris-based GNU systems with ELF format
-   Copyright (C) 2004-2024 Free Software Foundation, Inc.
+   Copyright (C) 2004-2025 Free Software Foundation, Inc.
    Contributed by Robert Millan.
 
 This file is part of GCC.
@@ -18,7 +18,7 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-#undef GNU_USER_TARGET_OS_CPP_BUILTINS    
+#undef GNU_USER_TARGET_OS_CPP_BUILTINS
 #define GNU_USER_TARGET_OS_CPP_BUILTINS()		\
   do						\
     {						\
diff --git a/gcc/config/linux-android.h b/gcc/config/linux-android.h
index 5f5ccd4..ecdc1da 100644
--- a/gcc/config/linux-android.h
+++ b/gcc/config/linux-android.h
@@ -1,5 +1,5 @@
 /* Configuration file for Linux Android targets.
-   Copyright (C) 2008-2024 Free Software Foundation, Inc.
+   Copyright (C) 2008-2025 Free Software Foundation, Inc.
    Contributed by Doug Kwan (dougkwan@google.com)
    Rewritten by CodeSourcery, Inc.
 
diff --git a/gcc/config/linux-android.opt b/gcc/config/linux-android.opt
index 9c1fd47..fd07eea 100644
--- a/gcc/config/linux-android.opt
+++ b/gcc/config/linux-android.opt
@@ -1,6 +1,6 @@
 ; Android specific options.
 
-; Copyright (C) 2008-2024 Free Software Foundation, Inc.
+; Copyright (C) 2008-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/linux-d.cc b/gcc/config/linux-d.cc
index 9604041..c77fcf2 100644
--- a/gcc/config/linux-d.cc
+++ b/gcc/config/linux-d.cc
@@ -1,5 +1,5 @@
 /* Linux support needed only by D front-end.
-   Copyright (C) 2017-2024 Free Software Foundation, Inc.
+   Copyright (C) 2017-2025 Free Software Foundation, Inc.
 
 GCC is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
diff --git a/gcc/config/linux-protos.h b/gcc/config/linux-protos.h
index a268709..b0f7583 100644
--- a/gcc/config/linux-protos.h
+++ b/gcc/config/linux-protos.h
@@ -1,5 +1,5 @@
 /* Prototypes.
-   Copyright (C) 2013-2024 Free Software Foundation, Inc.
+   Copyright (C) 2013-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/linux-rust.cc b/gcc/config/linux-rust.cc
index 65add8a..e14dbb4 100644
--- a/gcc/config/linux-rust.cc
+++ b/gcc/config/linux-rust.cc
@@ -1,5 +1,5 @@
 /* Linux support needed only by Rust front-end.
-   Copyright (C) 2022-2024 Free Software Foundation, Inc.
+   Copyright (C) 2022-2025 Free Software Foundation, Inc.
 
 GCC is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
diff --git a/gcc/config/linux.cc b/gcc/config/linux.cc
index e38361cd..4996b3b 100644
--- a/gcc/config/linux.cc
+++ b/gcc/config/linux.cc
@@ -1,5 +1,5 @@
 /* Functions for Linux Android as target machine for GNU C compiler.
-   Copyright (C) 2013-2024 Free Software Foundation, Inc.
+   Copyright (C) 2013-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/linux.h b/gcc/config/linux.h
index a7248ac..d6280a4 100644
--- a/gcc/config/linux.h
+++ b/gcc/config/linux.h
@@ -2,7 +2,7 @@
    MMU, using ELF at the compiler level but possibly FLT for final
    linked executables and shared libraries in some no-MMU cases, and
    possibly with a choice of libc implementations.
-   Copyright (C) 1995-2024 Free Software Foundation, Inc.
+   Copyright (C) 1995-2025 Free Software Foundation, Inc.
    Contributed by Eric Youngdale.
    Modified for stabs-in-ELF by H.J. Lu (hjl@lucon.org).
 
diff --git a/gcc/config/linux.opt b/gcc/config/linux.opt
index 31f334e..216f5c9 100644
--- a/gcc/config/linux.opt
+++ b/gcc/config/linux.opt
@@ -1,6 +1,6 @@
 ; Processor-independent options for GNU/Linux.
 ;
-; Copyright (C) 2006-2024 Free Software Foundation, Inc.
+; Copyright (C) 2006-2025 Free Software Foundation, Inc.
 ; Contributed by CodeSourcery.
 ;
 ; This file is part of GCC.
diff --git a/gcc/config/lm32/constraints.md b/gcc/config/lm32/constraints.md
index d7ab3d2..99a346d 100644
--- a/gcc/config/lm32/constraints.md
+++ b/gcc/config/lm32/constraints.md
@@ -1,7 +1,7 @@
 ;; Constraint definitions for Lattice Mico32 architecture.
 ;; Contributed by Jon Beniston <jon@beniston.com>
 ;;
-;; Copyright (C) 2009-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2009-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/lm32/lm32-protos.h b/gcc/config/lm32/lm32-protos.h
index fed0de6..7fe1c92 100644
--- a/gcc/config/lm32/lm32-protos.h
+++ b/gcc/config/lm32/lm32-protos.h
@@ -1,7 +1,7 @@
 /* Prototypes of target machine functions, Lattice Mico32 architecture.
    Contributed by Jon Beniston <jon@beniston.com>
 
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -20,13 +20,13 @@
    <http://www.gnu.org/licenses/>.  */
 
 extern int lm32_return_in_memory (tree type);
-extern void lm32_declare_object (FILE *stream, char *name, char *init_string, 
+extern void lm32_declare_object (FILE *stream, char *name, char *init_string,
                                  char *final_string, int size);
 extern void lm32_expand_prologue (void);
 extern void lm32_expand_epilogue (void);
 extern void lm32_print_operand (FILE *file, rtx op, int letter);
 extern void lm32_print_operand_address (FILE *file, rtx addr);
-extern HOST_WIDE_INT lm32_compute_initial_elimination_offset (int from, 
+extern HOST_WIDE_INT lm32_compute_initial_elimination_offset (int from,
                                                              int to);
 extern int lm32_can_use_return (void);
 extern rtx lm32_return_addr_rtx (int count, rtx frame);
diff --git a/gcc/config/lm32/lm32.cc b/gcc/config/lm32/lm32.cc
index 594f733..9ec756c 100644
--- a/gcc/config/lm32/lm32.cc
+++ b/gcc/config/lm32/lm32.cc
@@ -1,7 +1,7 @@
 /* Subroutines used for code generation on the Lattice Mico32 architecture.
    Contributed by Jon Beniston <jon@beniston.com>
 
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -44,6 +44,11 @@
 #include "expr.h"
 #include "tm-constrs.h"
 #include "builtins.h"
+#include "langhooks.h"
+#include "stor-layout.h"
+#include "fold-const.h"
+#include "gimple.h"
+#include "gimplify.h"
 
 /* This file should be included last.  */
 #include "target-def.h"
@@ -66,6 +71,10 @@ static bool lm32_in_small_data_p (const_tree);
 static void lm32_setup_incoming_varargs (cumulative_args_t cum,
 					 const function_arg_info &,
 					 int *pretend_size, int no_rtl);
+static tree lm32_build_builtin_va_list (void);
+static void lm32_builtin_va_start (tree valist, rtx nextarg);
+static tree lm32_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
+				       gimple_seq *post_p);
 static bool lm32_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno,
 			    int *total, bool speed);
 static bool lm32_can_eliminate (const int, const int);
@@ -92,6 +101,13 @@ static HOST_WIDE_INT lm32_starting_frame_offset (void);
 #define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
 #undef TARGET_SETUP_INCOMING_VARARGS
 #define TARGET_SETUP_INCOMING_VARARGS lm32_setup_incoming_varargs
+#undef TARGET_BUILD_BUILTIN_VA_LIST
+#define TARGET_BUILD_BUILTIN_VA_LIST lm32_build_builtin_va_list
+#undef TARGET_EXPAND_BUILTIN_VA_START
+#define TARGET_EXPAND_BUILTIN_VA_START lm32_builtin_va_start
+#undef TARGET_GIMPLIFY_VA_ARG_EXPR
+#define TARGET_GIMPLIFY_VA_ARG_EXPR lm32_gimplify_va_arg_expr
+
 #undef TARGET_FUNCTION_ARG
 #define TARGET_FUNCTION_ARG lm32_function_arg
 #undef TARGET_FUNCTION_ARG_ADVANCE
@@ -158,20 +174,20 @@ emit_add (rtx dest, rtx src0, rtx src1)
 }
 
 /* Generate the code to compare (and possibly branch) two integer values
-   TEST_CODE is the comparison code we are trying to emulate 
+   TEST_CODE is the comparison code we are trying to emulate
      (or implement directly)
-   RESULT is where to store the result of the comparison, 
+   RESULT is where to store the result of the comparison,
      or null to emit a branch
    CMP0 CMP1 are the two comparison operands
    DESTINATION is the destination of the branch, or null to only compare
    */
 
 static void
-gen_int_relational (enum rtx_code code,	
-		    rtx result,	
-		    rtx cmp0,	
-		    rtx cmp1,	
-		    rtx destination)	
+gen_int_relational (enum rtx_code code,
+		    rtx result,
+		    rtx cmp0,
+		    rtx cmp1,
+		    rtx destination)
 {
   machine_mode mode;
   int branch_p;
@@ -183,7 +199,7 @@ gen_int_relational (enum rtx_code code,
   /* Is this a branch or compare.  */
   branch_p = (destination != 0);
 
-  /* Instruction set doesn't support LE or LT, so swap operands and use 
+  /* Instruction set doesn't support LE or LT, so swap operands and use
      GE, GT.  */
   switch (code)
     {
@@ -270,7 +286,7 @@ lm32_expand_scc (rtx operands[])
   rtx op0 = operands[2];
   rtx op1 = operands[3];
 
-  gen_int_relational (code, target, op0, op1, NULL_RTX);  
+  gen_int_relational (code, target, op0, op1, NULL_RTX);
 }
 
 /* Compare OPERANDS[1] with OPERANDS[2] using comparison code
@@ -284,7 +300,7 @@ lm32_expand_conditional_branch (rtx operands[])
   rtx op1 = operands[2];
   rtx destination = operands[3];
 
-  gen_int_relational (code, NULL_RTX, op0, op1, destination);  
+  gen_int_relational (code, NULL_RTX, op0, op1, destination);
 }
 
 /* Generate and emit RTL to save or restore callee save registers.  */
@@ -304,10 +320,10 @@ expand_save_restore (struct lm32_frame_info *info, int op)
 	{
 	  rtx offset_rtx;
 	  rtx mem;
-	  
+
 	  offset_rtx = GEN_INT (offset);
 	  if (satisfies_constraint_K (offset_rtx))
-	    {	
+	    {
               mem = gen_rtx_MEM (word_mode,
                                  gen_rtx_PLUS (Pmode,
                                                stack_pointer_rtx,
@@ -316,23 +332,23 @@ expand_save_restore (struct lm32_frame_info *info, int op)
           else
             {
               /* r10 is caller saved so it can be used as a temp reg.  */
-              rtx r10;        
-               
+              rtx r10;
+
               r10 = gen_rtx_REG (word_mode, 10);
               insn = emit_move_insn (r10, offset_rtx);
               if (op == 0)
                 RTX_FRAME_RELATED_P (insn) = 1;
               insn = emit_add (r10, r10, stack_pointer_rtx);
               if (op == 0)
-                RTX_FRAME_RELATED_P (insn) = 1;                
+                RTX_FRAME_RELATED_P (insn) = 1;
               mem = gen_rtx_MEM (word_mode, r10);
-            }                                                 	    
-	    	    
+            }
+
 	  if (op == 0)
 	    insn = emit_move_insn (mem, gen_rtx_REG (word_mode, regno));
 	  else
 	    insn = emit_move_insn (gen_rtx_REG (word_mode, regno), mem);
-        
+
 	  /* only prologue instructions which set the sp fp or save a
 	     register should be marked as frame related.  */
 	  if (op == 0)
@@ -391,11 +407,11 @@ lm32_expand_prologue (void)
 	{
 	  /* Move sp to fp.  */
 	  insn = emit_move_insn (frame_pointer_rtx, stack_pointer_rtx);
-	  RTX_FRAME_RELATED_P (insn) = 1; 
+	  RTX_FRAME_RELATED_P (insn) = 1;
 
-	  /* Add offset - Don't use total_size, as that includes pretend_size, 
+	  /* Add offset - Don't use total_size, as that includes pretend_size,
              which isn't part of this frame?  */
-	  insn = emit_add (frame_pointer_rtx, 
+	  insn = emit_add (frame_pointer_rtx,
 			   frame_pointer_rtx,
 			   GEN_INT (current_frame_info.args_size +
 				    current_frame_info.callee_size +
@@ -513,7 +529,7 @@ lm32_print_operand (FILE * file, rtx op, int letter)
       fprintf (file, "%s", reg_names[regnum]);
     }
   else if (code == HIGH)
-    output_addr_const (file, XEXP (op, 0));  
+    output_addr_const (file, XEXP (op, 0));
   else if (code == MEM)
     output_address (GET_MODE (op), XEXP (op, 0));
   else if (letter == 'z' && GET_CODE (op) == CONST_INT && INTVAL (op) == 0)
@@ -632,8 +648,7 @@ lm32_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
   if (targetm.calls.must_pass_in_stack (arg))
     return NULL_RTX;
 
-  if (!arg.named
-      || *cum + LM32_NUM_REGS2 (arg.mode, arg.type) > LM32_NUM_ARG_REGS)
+  if (*cum + LM32_NUM_REGS2 (arg.mode, arg.type) > LM32_NUM_ARG_REGS)
     return NULL_RTX;
 
   return gen_rtx_REG (arg.mode, *cum + LM32_FIRST_ARG_REG);
@@ -680,14 +695,18 @@ lm32_setup_incoming_varargs (cumulative_args_t cum_v,
 			     const function_arg_info &arg,
 			     int *pretend_size, int no_rtl)
 {
-  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
+  CUMULATIVE_ARGS next_cum = *get_cumulative_args (cum_v);
   int first_anon_arg;
   tree fntype;
 
   fntype = TREE_TYPE (current_function_decl);
 
+  if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl))
+      || arg.type != NULL_TREE)
+    lm32_function_arg_advance (pack_cumulative_args (&next_cum), arg);
+
   if (stdarg_p (fntype))
-    first_anon_arg = *cum + LM32_FIRST_ARG_REG;
+    first_anon_arg = next_cum + LM32_FIRST_ARG_REG;
   else
     {
       /* this is the common case, we have been passed details setup
@@ -698,24 +717,274 @@ lm32_setup_incoming_varargs (cumulative_args_t cum_v,
       int size = arg.promoted_size_in_bytes ();
 
       first_anon_arg =
-	*cum + LM32_FIRST_ARG_REG +
+	next_cum + LM32_FIRST_ARG_REG +
 	((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD);
     }
 
-  if ((first_anon_arg < (LM32_FIRST_ARG_REG + LM32_NUM_ARG_REGS)) && !no_rtl)
+  if (FUNCTION_ARG_REGNO_P (first_anon_arg))
     {
-      int first_reg_offset = first_anon_arg;
       int size = LM32_FIRST_ARG_REG + LM32_NUM_ARG_REGS - first_anon_arg;
-      rtx regblock;
 
-      regblock = gen_rtx_MEM (BLKmode,
-			      plus_constant (Pmode, arg_pointer_rtx,
-					     FIRST_PARM_OFFSET (0)));
-      move_block_from_reg (first_reg_offset, regblock, size);
+      if (!no_rtl)
+	{
+	  rtx regblock
+	    = gen_rtx_MEM (BLKmode,
+			   plus_constant (Pmode, arg_pointer_rtx,
+					  FIRST_PARM_OFFSET (0)));
+	  move_block_from_reg (first_anon_arg, regblock, size);
+	}
 
       *pretend_size = size * UNITS_PER_WORD;
     }
 }
+/* This is the "struct __va_list".  */
+
+static GTY(()) tree va_list_type;
+
+/* Implement TARGET_BUILD_BUILTIN_VA_LIST.  */
+
+static tree
+lm32_build_builtin_va_list (void)
+{
+  /* We keep one pointer and a count
+
+     The pointer is the regular void *
+
+     The count tracks how many registers arguments
+     remain. When that goes to zero, we have to skip
+     over the reserved space that was the top of the
+     stack at function entry
+
+   */
+  tree va_list_name;
+  tree ap_field;
+  tree ap_reg_field;
+
+  va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
+  /* Name it */
+  va_list_name = build_decl (BUILTINS_LOCATION,
+			     TYPE_DECL,
+			     get_identifier ("__va_list"),
+			     va_list_type);
+
+  DECL_ARTIFICIAL (va_list_name) = 1;
+  TYPE_NAME (va_list_type) = va_list_name;
+  TYPE_STUB_DECL (va_list_type) = va_list_name;
+
+  ap_field = build_decl (BUILTINS_LOCATION,
+			 FIELD_DECL,
+			 get_identifier("__ap"),
+			 ptr_type_node);
+  DECL_ARTIFICIAL (ap_field) = 1;
+  DECL_FIELD_CONTEXT (ap_field) = va_list_type;
+  TYPE_FIELDS (va_list_type) = ap_field;
+
+  ap_reg_field = build_decl(BUILTINS_LOCATION,
+			    FIELD_DECL,
+			    get_identifier("__ap_reg"),
+			    ptr_type_node);
+  DECL_ARTIFICIAL (ap_reg_field) = 1;
+  DECL_FIELD_CONTEXT (ap_reg_field) = va_list_type;
+  DECL_CHAIN (ap_field) = ap_reg_field;
+
+  layout_type (va_list_type);
+
+  return va_list_type;
+}
+
+/* Implement TARGET_EXPAND_BUILTIN_VA_START.  */
+
+static void
+lm32_builtin_va_start (tree valist, rtx nextarg)
+{
+  const CUMULATIVE_ARGS *cum;
+  tree ap_field, ap_reg_field;
+  tree ap, ap_reg;
+  tree t;
+  int pretend_args_size = crtl->args.pretend_args_size;
+  cum = &crtl->args.info;
+
+  ap_field = TYPE_FIELDS(TREE_TYPE (valist));
+  ap = build3 (COMPONENT_REF, TREE_TYPE (ap_field), valist,
+	       ap_field, NULL_TREE);
+
+  std_expand_builtin_va_start (ap, nextarg);
+
+  ap_reg_field = DECL_CHAIN(ap_field);
+  ap_reg = build3 (COMPONENT_REF, TREE_TYPE (ap_reg_field), valist,
+		   ap_reg_field, NULL_TREE);
+
+  /* Emit code to initialize __ap_reg */
+
+  rtx last_reg_arg = expand_binop (ptr_mode, add_optab,
+				   crtl->args.internal_arg_pointer,
+				   gen_int_mode (pretend_args_size, Pmode),
+				   NULL_RTX, 0, OPTAB_LIB_WIDEN);
+
+  rtx ap_reg_r = expand_expr (ap_reg, NULL_RTX, VOIDmode, EXPAND_WRITE);
+  convert_move (ap_reg_r, last_reg_arg, 0);
+}
+
+#ifndef PAD_VARARGS_DOWN
+#define PAD_VARARGS_DOWN BYTES_BIG_ENDIAN
+#endif
+
+/*
+ * This was copied from "standard" implementation of va_arg, and then
+ * handling for overflow of the register parameters added.
+ */
+
+static tree
+lm32_std_gimplify_va_arg_expr (tree valist, tree ap_reg, tree type, gimple_seq *pre_p,
+			       gimple_seq *post_p)
+{
+  tree addr, t, type_size, rounded_size, valist_tmp;
+  unsigned HOST_WIDE_INT align, boundary;
+  bool indirect;
+
+  /* All of the alignment and movement below is for args-grow-up machines.
+     As of 2004, there are only 3 ARGS_GROW_DOWNWARD targets, and they all
+     implement their own specialized gimplify_va_arg_expr routines.  */
+  if (ARGS_GROW_DOWNWARD)
+    gcc_unreachable ();
+
+  indirect = pass_va_arg_by_reference (type);
+  if (indirect)
+    type = build_pointer_type (type);
+
+  if (targetm.calls.split_complex_arg
+      && TREE_CODE (type) == COMPLEX_TYPE
+      && targetm.calls.split_complex_arg (type))
+    {
+      tree real_part, imag_part;
+
+      real_part = std_gimplify_va_arg_expr (valist,
+					    TREE_TYPE (type), pre_p, NULL);
+      real_part = get_initialized_tmp_var (real_part, pre_p);
+
+      imag_part = std_gimplify_va_arg_expr (unshare_expr (valist),
+					    TREE_TYPE (type), pre_p, NULL);
+      imag_part = get_initialized_tmp_var (imag_part, pre_p);
+
+      return build2 (COMPLEX_EXPR, type, real_part, imag_part);
+   }
+
+  align = PARM_BOUNDARY / BITS_PER_UNIT;
+  boundary = targetm.calls.function_arg_boundary (TYPE_MODE (type), type);
+
+  /* When we align parameter on stack for caller, if the parameter
+     alignment is beyond MAX_SUPPORTED_STACK_ALIGNMENT, it will be
+     aligned at MAX_SUPPORTED_STACK_ALIGNMENT.  We will match callee
+     here with caller.  */
+  if (boundary > MAX_SUPPORTED_STACK_ALIGNMENT)
+    boundary = MAX_SUPPORTED_STACK_ALIGNMENT;
+
+  boundary /= BITS_PER_UNIT;
+
+  /* Hoist the valist value into a temporary for the moment.  */
+  valist_tmp = get_initialized_tmp_var (valist, pre_p);
+
+  /* va_list pointer is aligned to PARM_BOUNDARY.  If argument actually
+     requires greater alignment, we must perform dynamic alignment.  */
+  if (boundary > align
+      && !TYPE_EMPTY_P (type)
+      && !integer_zerop (TYPE_SIZE (type)))
+    {
+      t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
+		  fold_build_pointer_plus_hwi (valist_tmp, boundary - 1));
+      gimplify_and_add (t, pre_p);
+
+      t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp,
+		  fold_build2 (BIT_AND_EXPR, TREE_TYPE (valist),
+			       valist_tmp,
+			       build_int_cst (TREE_TYPE (valist), -boundary)));
+      gimplify_and_add (t, pre_p);
+    }
+  else
+    boundary = align;
+
+  /* If the actual alignment is less than the alignment of the type,
+     adjust the type accordingly so that we don't assume strict alignment
+     when dereferencing the pointer.  */
+  boundary *= BITS_PER_UNIT;
+  if (boundary < TYPE_ALIGN (type))
+    {
+      type = build_variant_type_copy (type);
+      SET_TYPE_ALIGN (type, boundary);
+    }
+
+  /* Compute the rounded size of the type.  */
+  type_size = arg_size_in_bytes (type);
+  rounded_size = round_up (type_size, align);
+
+  /* Reduce rounded_size so it's sharable with the postqueue.  */
+  gimplify_expr (&rounded_size, pre_p, post_p, is_gimple_val, fb_rvalue);
+
+  /*
+   * Check for a large parameter which didn't fit in the remaining registers
+   * and got pushed off to the stack instead
+   */
+  if (int_size_in_bytes(type) > UNITS_PER_WORD) {
+
+    /* Hoist the ap_reg value into a temporary for the moment.  */
+    tree ap_reg_tmp = get_initialized_tmp_var (ap_reg, pre_p);
+
+    t = fold_build2_loc (input_location, TRUTH_AND_EXPR,
+			 boolean_type_node,
+			 fold_build2_loc (input_location, LT_EXPR, boolean_type_node,
+					  valist_tmp, ap_reg_tmp),
+			 fold_build2_loc (input_location, GT_EXPR, boolean_type_node,
+					  fold_build_pointer_plus (valist_tmp, rounded_size), ap_reg_tmp));
+
+    t = fold_build3 (COND_EXPR, TREE_TYPE(valist), t, ap_reg_tmp, valist_tmp);
+
+    t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist_tmp, t);
+
+    gimplify_and_add (t, pre_p);
+  }
+
+  /* Get AP.  */
+  addr = valist_tmp;
+  if (PAD_VARARGS_DOWN && !integer_zerop (rounded_size))
+    {
+      /* Small args are padded downward.  */
+      t = fold_build2_loc (input_location, GT_EXPR, sizetype,
+		       rounded_size, size_int (align));
+      t = fold_build3 (COND_EXPR, sizetype, t, size_zero_node,
+		       size_binop (MINUS_EXPR, rounded_size, type_size));
+      addr = fold_build_pointer_plus (addr, t);
+    }
+
+  /* Compute new value for AP.  */
+  t = fold_build_pointer_plus (valist_tmp, rounded_size);
+  t = build2 (MODIFY_EXPR, TREE_TYPE (valist), valist, t);
+  gimplify_and_add (t, pre_p);
+
+  addr = fold_convert (build_pointer_type (type), addr);
+
+  if (indirect)
+    addr = build_va_arg_indirect_ref (addr);
+
+  return build_va_arg_indirect_ref (addr);
+}
+
+/* Return an expression of type "void *" pointing to the next
+   available argument in a variable-argument list.  VALIST is the
+   user-level va_list object, of type __builtin_va_list.  */
+/* Implement TARGET_GIMPLIFY_VA_ARG_EXPR.  */
+static tree
+lm32_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
+			  gimple_seq *post_p)
+{
+  tree ap_field = TYPE_FIELDS(TREE_TYPE (valist));
+  tree ap_reg_field = DECL_CHAIN(ap_field);
+  tree ap = build3 (COMPONENT_REF, TREE_TYPE (ap_field), valist,
+		    ap_field, NULL_TREE);
+  tree ap_reg = build3 (COMPONENT_REF, TREE_TYPE (ap_reg_field), valist,
+			ap_reg_field, NULL_TREE);
+
+  return lm32_std_gimplify_va_arg_expr(ap, ap_reg, type, pre_p, post_p);
+}
 
 /* Override command line options.  */
 static void
@@ -1129,7 +1398,7 @@ lm32_rtx_costs (rtx x, machine_mode mode, int outer_code,
 	          *total = COSTS_N_INSNS (2);
 		return true;
 	      }
-	    /* Fall through.  */ 
+	    /* Fall through.  */
 
 	  default:
             if (satisfies_constraint_K (x))
@@ -1194,32 +1463,32 @@ lm32_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
 static bool
 lm32_legitimate_address_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x,
 			   bool strict, code_helper)
-{  
-   /* (rM) */                                                    
+{
+   /* (rM) */
   if (strict && REG_P (x) && STRICT_REG_OK_FOR_BASE_P (x))
     return true;
   if (!strict && REG_P (x) && NONSTRICT_REG_OK_FOR_BASE_P (x))
     return true;
-       
-  /* (rM)+literal) */                               
-  if (GET_CODE (x) == PLUS  
-     && REG_P (XEXP (x, 0))                                     
+
+  /* (rM)+literal) */
+  if (GET_CODE (x) == PLUS
+     && REG_P (XEXP (x, 0))
      && ((strict && STRICT_REG_OK_FOR_BASE_P (XEXP (x, 0)))
-         || (!strict && NONSTRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))))                           
-     && GET_CODE (XEXP (x, 1)) == CONST_INT                      
+         || (!strict && NONSTRICT_REG_OK_FOR_BASE_P (XEXP (x, 0))))
+     && GET_CODE (XEXP (x, 1)) == CONST_INT
      && satisfies_constraint_K (XEXP ((x), 1)))
     return true;
-              
-  /* gp(sym)  */   
-  if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_SMALL_P (x)) 
+
+  /* gp(sym)  */
+  if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_SMALL_P (x))
     return true;
-    
-  return false;                                
+
+  return false;
 }
 
-/* Check a move is not memory to memory.  */ 
+/* Check a move is not memory to memory.  */
 
-bool 
+bool
 lm32_move_ok (machine_mode mode, rtx operands[2]) {
   if (memory_operand (operands[0], mode))
     return register_or_zero_operand (operands[1], mode);
@@ -1252,3 +1521,5 @@ lm32_starting_frame_offset (void)
 {
   return UNITS_PER_WORD;
 }
+
+#include "gt-lm32.h"
diff --git a/gcc/config/lm32/lm32.h b/gcc/config/lm32/lm32.h
index e761e14..ef7b4e8 100644
--- a/gcc/config/lm32/lm32.h
+++ b/gcc/config/lm32/lm32.h
@@ -1,7 +1,7 @@
 /* Definitions of target machine for GNU compiler, Lattice Mico32 architecture.
    Contributed by Jon Beniston <jon@beniston.com>
 
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -52,7 +52,7 @@
 %{muser-enabled} \
 "
 
-/* Let link script define all link options. 
+/* Let link script define all link options.
    Default to using simulator link script.  */
 
 #undef  STARTFILE_SPEC
@@ -162,7 +162,7 @@ do {                                                    \
 
 enum reg_class
 {
-  NO_REGS,                                      
+  NO_REGS,
   GENERAL_REGS,
   ALL_REGS,
   LIM_REG_CLASSES
diff --git a/gcc/config/lm32/lm32.md b/gcc/config/lm32/lm32.md
index 574fb12..0e7ab96 100644
--- a/gcc/config/lm32/lm32.md
+++ b/gcc/config/lm32/lm32.md
@@ -1,7 +1,7 @@
 ;; Machine description of the Lattice Mico32 architecture for GNU C compiler.
 ;; Contributed by Jon Beniston <jon@beniston.com>
 
-;; Copyright (C) 2009-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2009-2025 Free Software Foundation, Inc.
 
 ;; This file is part of GCC.
 
diff --git a/gcc/config/lm32/lm32.opt b/gcc/config/lm32/lm32.opt
index dda4f3b..f533b24 100644
--- a/gcc/config/lm32/lm32.opt
+++ b/gcc/config/lm32/lm32.opt
@@ -1,7 +1,7 @@
 ; Options for the Lattice Mico32 port of the compiler.
 ; Contributed by Jon Beniston <jon@beniston.com>
 ;
-; Copyright (C) 2009-2024 Free Software Foundation, Inc.
+; Copyright (C) 2009-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/lm32/predicates.md b/gcc/config/lm32/predicates.md
index 230eb94..80b73bf 100644
--- a/gcc/config/lm32/predicates.md
+++ b/gcc/config/lm32/predicates.md
@@ -1,7 +1,7 @@
 ;; Predicate definitions for Lattice Mico32 architecture.
 ;; Contributed by Jon Beniston <jon@beniston.com>
 ;;
-;; Copyright (C) 2009-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2009-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/lm32/rtems.h b/gcc/config/lm32/rtems.h
index 21cbe1d..5b2c0e0 100644
--- a/gcc/config/lm32/rtems.h
+++ b/gcc/config/lm32/rtems.h
@@ -1,5 +1,5 @@
 /* Definitions for rtems targeting a lm32 using ELF.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/lm32/uclinux-elf.h b/gcc/config/lm32/uclinux-elf.h
index 4a66b20..e3f51b3 100644
--- a/gcc/config/lm32/uclinux-elf.h
+++ b/gcc/config/lm32/uclinux-elf.h
@@ -1,5 +1,5 @@
 /* Definitions for LM32 running Linux-based GNU systems using ELF
-   Copyright (C) 1993-2024 Free Software Foundation, Inc.
+   Copyright (C) 1993-2025 Free Software Foundation, Inc.
    Contributed by Philip Blundell <philb@gnu.org>
 
    This file is part of GCC.
diff --git a/gcc/config/loongarch/constraints.md b/gcc/config/loongarch/constraints.md
index 18da8b3..97a4e4e 100644
--- a/gcc/config/loongarch/constraints.md
+++ b/gcc/config/loongarch/constraints.md
@@ -1,5 +1,5 @@
 ;; Constraint definitions for LoongArch.
-;; Copyright (C) 2021-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2021-2025 Free Software Foundation, Inc.
 ;; Contributed by Loongson Ltd.
 ;;
 ;; This file is part of GCC.
@@ -292,12 +292,16 @@
    "@internal"
    (match_operand 0 "low_bitmask_operand"))
 
+(define_constraint "Yy"
+   "@internal"
+   (match_operand 0 "ins_zero_bitmask_operand"))
+
 (define_constraint "YI"
   "@internal
    A replicated vector const in which the replicated value is in the range
    [-512,511]."
   (and (match_code "const_vector")
-       (match_test "loongarch_const_vector_same_int_p (op, mode, -512, 511)")))
+       (match_test "loongarch_const_vector_vrepli (op, mode)")))
 
 (define_constraint "YC"
   "@internal
@@ -334,19 +338,19 @@
   (and (match_code "const_vector")
        (match_test "loongarch_const_vector_same_int_p (op, mode, -16, 15)")))
 
-(define_constraint "Uuv6"
-  "@internal
-   A replicated vector const in which the replicated value is in the range
-   [0,63]."
-  (and (match_code "const_vector")
-       (match_test "loongarch_const_vector_same_int_p (op, mode, 0, 63)")))
-
 (define_constraint "Urv8"
   "@internal
    A replicated vector const with replicated byte values as well as elements"
   (and (match_code "const_vector")
        (match_test "loongarch_const_vector_same_bytes_p (op, mode)")))
 
+(define_constraint "Uuvx"
+  "@internal
+   A replicated vector const in which the replicated value is in the unsigned
+   range [0,umax]."
+  (and (match_code "const_vector")
+       (match_test "loongarch_const_vector_same_int_p (op, mode)")))
+
 (define_memory_constraint "ZC"
   "A memory operand whose address is formed by a base register and offset
    that is suitable for use in instructions with the same addressing mode
diff --git a/gcc/config/loongarch/elf.h b/gcc/config/loongarch/elf.h
index 8a38d0f..7610f72 100644
--- a/gcc/config/loongarch/elf.h
+++ b/gcc/config/loongarch/elf.h
@@ -1,5 +1,5 @@
 /* Definitions for LoongArch ELF-based systems.
-   Copyright (C) 2023-2024 Free Software Foundation, Inc.
+   Copyright (C) 2023-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/loongarch/generic.md b/gcc/config/loongarch/generic.md
index d24c7ff..ab1f9dc 100644
--- a/gcc/config/loongarch/generic.md
+++ b/gcc/config/loongarch/generic.md
@@ -1,5 +1,5 @@
 ;; Generic DFA-based pipeline description for LoongArch targets
-;; Copyright (C) 2021-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2021-2025 Free Software Foundation, Inc.
 ;; Contributed by Loongson Ltd.
 ;; Based on MIPS target for GNU compiler.
 
diff --git a/gcc/config/loongarch/genopts/gen-evolution.awk b/gcc/config/loongarch/genopts/gen-evolution.awk
index 1c8004e..507063b 100644
--- a/gcc/config/loongarch/genopts/gen-evolution.awk
+++ b/gcc/config/loongarch/genopts/gen-evolution.awk
@@ -3,7 +3,7 @@
 # A simple script that generates loongarch-evolution.h
 # from genopts/isa-evolution.in
 #
-# Copyright (C) 2021-2024 Free Software Foundation, Inc.
+# Copyright (C) 2021-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
@@ -33,10 +33,12 @@ BEGIN {
 {
     cpucfg_word[NR] = $1
     cpucfg_bit_in_word[NR] = $2
-    name[NR] = gensub(/-/, "_", "g", $3)
+    name[NR] = $3
+    gsub("-", "_", name[NR])
     name_capitalized[NR] = toupper(name[NR])
-    isa_version_major[NR] = gensub(/^([1-9][0-9]*)\.([0-9]+)$/, "\\1", 1, $4)
-    isa_version_minor[NR] = gensub(/^([1-9][0-9]*)\.([0-9]+)$/, "\\2", 1, $4)
+    split($4, isa_ver, "\\.")
+    isa_version_major[NR] = isa_ver[1]
+    isa_version_minor[NR] = isa_ver[2]
 
     $1 = $2 = $3 = $4 = ""
     sub (/^\s*/, "")
@@ -99,10 +101,18 @@ function gen_cpucfg_useful_idx()
         idx_list[j++] = i+0
     delete idx_bucket
 
-    asort (idx_list)
+    for (i = 1; i < j; i++) {
+        t = i
+        for (k = i + 1; k < j; k++)
+            t = idx_list[k] < idx_list[t] ? k : t
+
+        k = idx_list[t]
+        idx_list[t] = idx_list[i]
+        idx_list[i] = k
+    }
 
     print "static constexpr int cpucfg_useful_idx[] = {"
-    for (i in idx_list)
+    for (i = 1; i < j; i++)
         printf("  %d,\n", idx_list[i])
     print "};"
 
@@ -150,7 +160,7 @@ function gen_full_header()
     print "   Please do not edit this file directly."
     print ""
 
-    copyright_header(2023, 2024)
+    copyright_header(2023, 2025)
 
     print "*/"
     print ""
@@ -186,7 +196,7 @@ function gen_full_source()
     print "   Please do not edit this file directly."
     print ""
 
-    copyright_header(2023, 2024)
+    copyright_header(2023, 2025)
 
     print "*/"
     print ""
diff --git a/gcc/config/loongarch/genopts/genstr.sh b/gcc/config/loongarch/genopts/genstr.sh
index 5b3bf7f..97517da 100755
--- a/gcc/config/loongarch/genopts/genstr.sh
+++ b/gcc/config/loongarch/genopts/genstr.sh
@@ -2,7 +2,7 @@
 # A simple script that generates loongarch-str.h and loongarch.opt
 # from genopt/loongarch-optstr.
 #
-# Copyright (C) 2021-2024 Free Software Foundation, Inc.
+# Copyright (C) 2021-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
@@ -28,7 +28,7 @@ gen_defines() {
 /* Generated automatically by "genstr" from "loongarch-strings" and
    "isa-evolution.in".  Please do not edit this file directly.
 
-   Copyright (C) 2021-2024 Free Software Foundation, Inc.
+   Copyright (C) 2021-2025 Free Software Foundation, Inc.
    Contributed by Loongson Ltd.
 
 This file is part of GCC.
@@ -51,18 +51,18 @@ along with GCC; see the file COPYING3.  If not see
 #define LOONGARCH_STR_H
 EOF
 
-    sed -e '/^$/n' -e 's@#.*$@@' -e '/^$/d' \
-	-e 's@^\([^ \t]\+\)[ \t]*\([^ \t]*\)@#define \1 "\2"@' \
-	loongarch-strings
+    awk '/^#.*$/ { next } /^$/ { print; next }
+         { printf ("#define %s \"%s\"\n", $1, $2) }' \
+         loongarch-strings
 
     echo
 
-	# Generate the strings from isa-evolution.in.
-	awk '{
-	  a=$3
-	  gsub(/-/, "_", a)
-	  print("#define OPTSTR_"toupper(a)"\t\""$3"\"")
-	}' isa-evolution.in
+    # Generate the strings from isa-evolution.in.
+    awk '{
+      a=$3
+      gsub(/-/, "_", a)
+      print("#define OPTSTR_"toupper(a)"\t\""$3"\"")
+    }' isa-evolution.in
 
     echo
     echo "#endif /* LOONGARCH_STR_H */"
@@ -73,18 +73,8 @@ EOF
 # according to the key-value pairs defined in loongarch-strings.
 
 gen_options() {
-
-    sed -e '/^$/n' -e 's@#.*$@@' -e '/^$/d' \
-	-e 's@^\([^ \t]\+\)[ \t]*\([^ \t]*\)@\1="\2"@' \
-	loongarch-strings | { \
-
-	# read the definitions
-	while read -r line; do
-	    eval "$line"
-	done
-
-	# print a header
-	cat << EOF
+    # print a header
+    cat << EOF
 ; Generated by "genstr" from the template "loongarch.opt.in"
 ; and definitions from "loongarch-strings" and "isa-evolution.in".
 ;
@@ -95,12 +85,25 @@ gen_options() {
 ;
 EOF
 
-	# make the substitutions
-	sed -e 's@"@\\"@g' -e 's/@@\([^@]\+\)@@/${\1}/g' loongarch.opt.in | \
-	    while read -r line; do
-		eval "echo \"$line\""
-	    done
-    }
+    # Generate loongarch.opt.
+    awk 'BEGIN {
+            delete strtab
+            while (getline < "loongarch-strings" > 0) {
+                if ($0 ~ /^#.*$/ || $0 ~ /^$/) continue
+                strtab[$1] = $2
+            }
+        }
+        {
+            n = split($0, tmp, "@@")
+            for (i = 2; i <= n; i += 2)
+                tmp[i] = strtab[tmp[i]]
+
+            for (i = 1; i <= n; i++)
+                printf("%s", tmp[i])
+            printf ("\n")
+
+        }' loongarch.opt.in
+
 
     # Generate the strings from isa-evolution.in.
     awk '{
diff --git a/gcc/config/loongarch/genopts/loongarch-strings b/gcc/config/loongarch/genopts/loongarch-strings
index e9ebd25..ffbcfa2 100644
--- a/gcc/config/loongarch/genopts/loongarch-strings
+++ b/gcc/config/loongarch/genopts/loongarch-strings
@@ -1,6 +1,6 @@
 # Defines the key strings for LoongArch compiler options.
 #
-# Copyright (C) 2021-2024 Free Software Foundation, Inc.
+# Copyright (C) 2021-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/loongarch/genopts/loongarch.opt.in b/gcc/config/loongarch/genopts/loongarch.opt.in
index d00950c..39c1545 100644
--- a/gcc/config/loongarch/genopts/loongarch.opt.in
+++ b/gcc/config/loongarch/genopts/loongarch.opt.in
@@ -1,4 +1,4 @@
-; Copyright (C) 2021-2024 Free Software Foundation, Inc.
+; Copyright (C) 2021-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
@@ -177,6 +177,10 @@ mbranch-cost=
 Target RejectNegative Joined UInteger Var(la_branch_cost) Save
 -mbranch-cost=COST	Set the cost of branches to roughly COST instructions.
 
+maddr-reg-reg-cost=
+Target RejectNegative Joined UInteger Var(la_addr_reg_reg_cost) Save
+-maddr-reg-reg-cost=COST  Set the cost of ADDRESS_REG_REG to the value calculated by COST.
+
 mcheck-zero-division
 Target Mask(CHECK_ZERO_DIV) Save
 Trap on integer divide by zero.
@@ -301,3 +305,7 @@ default value is 4.
 ; CPUCFG independently, so we use bit flags to specify them.
 TargetVariable
 HOST_WIDE_INT la_isa_evolution = 0
+
+mannotate-tablejump
+Target Mask(ANNOTATE_TABLEJUMP) Save
+Annotate table jump instruction (jr {reg}) to correlate it with the jump table.
diff --git a/gcc/config/loongarch/gnu-user.h b/gcc/config/loongarch/gnu-user.h
index e5d84e180..fbc75a9 100644
--- a/gcc/config/loongarch/gnu-user.h
+++ b/gcc/config/loongarch/gnu-user.h
@@ -1,6 +1,6 @@
 /* Definitions for LoongArch systems using GNU (glibc-based) userspace,
    or other userspace with libc derived from glibc.
-   Copyright (C) 2021-2024 Free Software Foundation, Inc.
+   Copyright (C) 2021-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/loongarch/la464.md b/gcc/config/loongarch/la464.md
index 4da6ce9..883be03 100644
--- a/gcc/config/loongarch/la464.md
+++ b/gcc/config/loongarch/la464.md
@@ -1,6 +1,6 @@
 ;; Pipeline model for LoongArch LA464 cores.
 
-;; Copyright (C) 2021-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2021-2025 Free Software Foundation, Inc.
 ;; Contributed by Loongson Ltd.
 
 ;; This file is part of GCC.
diff --git a/gcc/config/loongarch/larchintrin.h b/gcc/config/loongarch/larchintrin.h
index 0f55bda..b8c0654 100644
--- a/gcc/config/loongarch/larchintrin.h
+++ b/gcc/config/loongarch/larchintrin.h
@@ -1,5 +1,5 @@
 /* Intrinsics for LoongArch BASE operations.
-   Copyright (C) 2021-2024 Free Software Foundation, Inc.
+   Copyright (C) 2021-2025 Free Software Foundation, Inc.
    Contributed by Loongson Ltd.
 
 This file is part of GCC.
diff --git a/gcc/config/loongarch/lasx.md b/gcc/config/loongarch/lasx.md
index d37b2e8..43e3ab0 100644
--- a/gcc/config/loongarch/lasx.md
+++ b/gcc/config/loongarch/lasx.md
@@ -1,6 +1,6 @@
 ;; Machine Description for LARCH Loongson ASX ASE
 ;;
-;; Copyright (C) 2018-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2018-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
@@ -20,8 +20,6 @@
 ;;
 
 (define_c_enum "unspec" [
-  UNSPEC_LASX_XVABSD_S
-  UNSPEC_LASX_XVABSD_U
   UNSPEC_LASX_XVAVG_S
   UNSPEC_LASX_XVAVG_U
   UNSPEC_LASX_XVAVGR_S
@@ -37,27 +35,20 @@
   UNSPEC_LASX_XVFCVTH
   UNSPEC_LASX_XVFCVTL
   UNSPEC_LASX_XVFLOGB
-  UNSPEC_LASX_XVFRECIP
   UNSPEC_LASX_XVFRECIPE
-  UNSPEC_LASX_XVFRINT
   UNSPEC_LASX_XVFRSQRT
   UNSPEC_LASX_XVFRSQRTE
   UNSPEC_LASX_XVFTINT_U
-  UNSPEC_LASX_XVCLO
   UNSPEC_LASX_XVSAT_S
   UNSPEC_LASX_XVSAT_U
-  UNSPEC_LASX_XVREPLVE0
   UNSPEC_LASX_XVREPL128VEI
   UNSPEC_LASX_XVSRAR
-  UNSPEC_LASX_XVSRARI
   UNSPEC_LASX_XVSRLR
-  UNSPEC_LASX_XVSRLRI
   UNSPEC_LASX_XVSHUF
   UNSPEC_LASX_XVSHUF_B
   UNSPEC_LASX_BRANCH
   UNSPEC_LASX_BRANCH_V
 
-  UNSPEC_LASX_MXVEXTW_U
   UNSPEC_LASX_XVSLLWIL_S
   UNSPEC_LASX_XVSLLWIL_U
   UNSPEC_LASX_XVSRAN
@@ -101,36 +92,9 @@
   UNSPEC_LASX_XVPERMI_Q
   UNSPEC_LASX_XVPERMI_D
 
-  UNSPEC_LASX_XVADDWEV
-  UNSPEC_LASX_XVADDWEV2
-  UNSPEC_LASX_XVADDWEV3
-  UNSPEC_LASX_XVSUBWEV
-  UNSPEC_LASX_XVSUBWEV2
-  UNSPEC_LASX_XVMULWEV
-  UNSPEC_LASX_XVMULWEV2
-  UNSPEC_LASX_XVMULWEV3
-  UNSPEC_LASX_XVADDWOD
-  UNSPEC_LASX_XVADDWOD2
-  UNSPEC_LASX_XVADDWOD3
-  UNSPEC_LASX_XVSUBWOD
-  UNSPEC_LASX_XVSUBWOD2
-  UNSPEC_LASX_XVMULWOD
-  UNSPEC_LASX_XVMULWOD2
-  UNSPEC_LASX_XVMULWOD3
-  UNSPEC_LASX_XVMADDWEV
-  UNSPEC_LASX_XVMADDWEV2
-  UNSPEC_LASX_XVMADDWEV3
-  UNSPEC_LASX_XVMADDWOD
-  UNSPEC_LASX_XVMADDWOD2
-  UNSPEC_LASX_XVMADDWOD3
-  UNSPEC_LASX_XVHADDW_Q_D
-  UNSPEC_LASX_XVHSUBW_Q_D
-  UNSPEC_LASX_XVHADDW_QU_DU
-  UNSPEC_LASX_XVHSUBW_QU_DU
   UNSPEC_LASX_XVADD_Q
   UNSPEC_LASX_XVSUB_Q
   UNSPEC_LASX_XVREPLVE
-  UNSPEC_LASX_XVSHUF4
   UNSPEC_LASX_XVMSKGEZ
   UNSPEC_LASX_XVMSKNZ
   UNSPEC_LASX_XVEXTH_Q_D
@@ -155,16 +119,12 @@
   UNSPEC_LASX_XVSSRLRN
   UNSPEC_LASX_XVEXTL_QU_DU
   UNSPEC_LASX_XVLDI
-  UNSPEC_LASX_XVLDX
   UNSPEC_LASX_XVSTX
   UNSPEC_LASX_VECINIT_MERGE
   UNSPEC_LASX_VEC_SET_INTERNAL
   UNSPEC_LASX_XVILVL_INTERNAL
 ])
 
-;; All vector modes with 256 bits.
-(define_mode_iterator LASX [V4DF V8SF V4DI V8SI V16HI V32QI])
-
 ;; Only used for splitting insert_d and copy_{u,s}.d.
 (define_mode_iterator LASX_D [V4DI V4DF])
 
@@ -215,11 +175,6 @@
    (V8SI "V4SI")
    (V4DI "V2DI")])
 
-;;attribute gives half float modes for vector modes.
-(define_mode_attr VFHMODE256
-   [(V8SF "V4SF")
-   (V4DF "V2DF")])
-
 ;; The attribute gives half int/float modes for vector modes.
 (define_mode_attr VHMODE256_ALL
   [(V32QI "V16QI")
@@ -255,20 +210,6 @@
    (V4DF "V8DF")
    (V4DI "V8DI")])
 
-;; This attribute gives the mode of the result for "copy_s_b, copy_u_b" etc.
-(define_mode_attr VRES256
-  [(V4DF "DF")
-   (V8SF "SF")
-   (V4DI "DI")
-   (V8SI "SI")
-   (V16HI "SI")
-   (V32QI "SI")])
-
-;; Only used with LASX_D iterator.
-(define_mode_attr lasx_d
-  [(V4DI "reg_or_0")
-   (V4DF "register")])
-
 ;; This attribute gives the 256 bit integer vector mode with same size.
 (define_mode_attr mode256_i
   [(V4DF "v4di")
@@ -278,14 +219,6 @@
    (V16HI "v16hi")
    (V32QI "v32qi")])
 
-
-;; This attribute gives the 256 bit float vector mode with same size.
-(define_mode_attr mode256_f
-  [(V4DF "v4df")
-   (V8SF "v8sf")
-   (V4DI "v4df")
-   (V8SI "v8sf")])
-
 ;; This attribute gives V32QI mode and V16HI mode with half size.
 (define_mode_attr mode256_i_half
   [(V32QI "v16qi")
@@ -347,14 +280,6 @@
    (V16HI "h")
    (V32QI "b")])
 
-(define_mode_attr flasxfmt_f
-  [(V4DF "d_f")
-   (V8SF "s_f")
-   (V4DI "d")
-   (V8SI "w")
-   (V16HI "h")
-   (V32QI "b")])
-
 ;; This attribute gives define_insn suffix for LASX instructions that need
 ;; distinction between integer and floating point.
 (define_mode_attr lasxfmt_f_wd
@@ -441,27 +366,6 @@
    (V4DI  "uimm6")])
 
 
-(define_mode_attr d2lasxfmt
-  [(V8SI "q")
-   (V16HI "d")
-   (V32QI "w")])
-
-(define_mode_attr d2lasxfmt_u
-  [(V8SI "qu")
-   (V16HI "du")
-   (V32QI "wu")])
-
-(define_mode_attr VD2MODE256
-  [(V8SI "V4DI")
-   (V16HI "V4DI")
-   (V32QI "V8SI")])
-
-(define_mode_attr lasxfmt_wd
-  [(V4DI "d")
-   (V8SI "w")
-   (V16HI "w")
-   (V32QI "w")])
-
 ;; Half modes of all LASX vector modes, in lower-case.
 (define_mode_attr lasxhalf [(V32QI "v16qi")  (V16HI "v8hi")
              (V8SI "v4si")  (V4DI  "v2di")
@@ -528,17 +432,7 @@
    (match_operand:ILASX_WHB 1 "register_operand")]
   "ISA_HAS_LASX"
 {
-  loongarch_expand_vec_unpack (operands, false/*unsigned_p*/,
-			       true/*high_p*/);
-  DONE;
-})
-
-(define_expand "vec_unpacks_lo_<mode>"
-  [(match_operand:<VDMODE256> 0 "register_operand")
-   (match_operand:ILASX_WHB 1 "register_operand")]
-  "ISA_HAS_LASX"
-{
-  loongarch_expand_vec_unpack (operands, false/*unsigned_p*/, false/*high_p*/);
+  loongarch_expand_vec_unpack (operands, false/*unsigned_p*/);
   DONE;
 })
 
@@ -547,16 +441,7 @@
    (match_operand:ILASX_WHB 1 "register_operand")]
   "ISA_HAS_LASX"
 {
-  loongarch_expand_vec_unpack (operands, true/*unsigned_p*/, true/*high_p*/);
-  DONE;
-})
-
-(define_expand "vec_unpacku_lo_<mode>"
-  [(match_operand:<VDMODE256> 0 "register_operand")
-   (match_operand:ILASX_WHB 1 "register_operand")]
-  "ISA_HAS_LASX"
-{
-  loongarch_expand_vec_unpack (operands, true/*unsigned_p*/, false/*high_p*/);
+  loongarch_expand_vec_unpack (operands, true/*unsigned_p*/);
   DONE;
 })
 
@@ -785,46 +670,6 @@
   DONE;
 })
 
-(define_expand "mov<mode>"
-  [(set (match_operand:LASX 0)
-	(match_operand:LASX 1))]
-  "ISA_HAS_LASX"
-{
-  if (loongarch_legitimize_move (<MODE>mode, operands[0], operands[1]))
-    DONE;
-})
-
-
-(define_expand "movmisalign<mode>"
-  [(set (match_operand:LASX 0)
-	(match_operand:LASX 1))]
-  "ISA_HAS_LASX"
-{
-  if (loongarch_legitimize_move (<MODE>mode, operands[0], operands[1]))
-    DONE;
-})
-
-;; 256-bit LASX modes can only exist in LASX registers or memory.
-(define_insn "mov<mode>_lasx"
-  [(set (match_operand:LASX 0 "nonimmediate_operand" "=f,f,R,*r,*f")
-	(match_operand:LASX 1 "move_operand" "fYGYI,R,f,*f,*r"))]
-  "ISA_HAS_LASX"
-  { return loongarch_output_move (operands[0], operands[1]); }
-  [(set_attr "type" "simd_move,simd_load,simd_store,simd_copy,simd_insert")
-   (set_attr "mode" "<MODE>")
-   (set_attr "length" "8,4,4,4,4")])
-
-
-(define_split
-  [(set (match_operand:LASX 0 "nonimmediate_operand")
-	(match_operand:LASX 1 "move_operand"))]
-  "reload_completed && ISA_HAS_LASX
-   && loongarch_split_move_p (operands[0], operands[1])"
-  [(const_int 0)]
-{
-  loongarch_split_move (operands[0], operands[1]);
-  DONE;
-})
 
 ;; LASX
 (define_insn "add<mode>3"
@@ -1013,11 +858,23 @@
   [(set (match_operand:ILASX 0 "register_operand" "=f,f")
 	(lshiftrt:ILASX
 	  (match_operand:ILASX 1 "register_operand" "f,f")
-	  (match_operand:ILASX 2 "reg_or_vector_same_uimm6_operand" "f,Uuv6")))]
+	  (match_operand:ILASX 2 "reg_or_vector_same_uimm_operand" "f,Uuvx")))]
   "ISA_HAS_LASX"
-  "@
-   xvsrl.<lasxfmt>\t%u0,%u1,%u2
-   xvsrli.<lasxfmt>\t%u0,%u1,%E2"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "xvsrl.<lasxfmt>\t%u0,%u1,%u2";
+    case 1:
+      {
+	unsigned HOST_WIDE_INT val = UINTVAL (CONST_VECTOR_ELT (operands[2], 0));
+	operands[2] = GEN_INT (val & (GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1));
+	return "xvsrli.<lasxfmt>\t%u0,%u1,%d2";
+      }
+    default:
+      gcc_unreachable ();
+    }
+}
   [(set_attr "type" "simd_shift")
    (set_attr "mode" "<MODE>")])
 
@@ -1026,11 +883,23 @@
   [(set (match_operand:ILASX 0 "register_operand" "=f,f")
 	(ashiftrt:ILASX
 	  (match_operand:ILASX 1 "register_operand" "f,f")
-	  (match_operand:ILASX 2 "reg_or_vector_same_uimm6_operand" "f,Uuv6")))]
+	  (match_operand:ILASX 2 "reg_or_vector_same_uimm_operand" "f,Uuvx")))]
   "ISA_HAS_LASX"
-  "@
-   xvsra.<lasxfmt>\t%u0,%u1,%u2
-   xvsrai.<lasxfmt>\t%u0,%u1,%E2"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "xvsra.<lasxfmt>\t%u0,%u1,%u2";
+    case 1:
+      {
+	unsigned HOST_WIDE_INT val = UINTVAL (CONST_VECTOR_ELT (operands[2], 0));
+	operands[2] = GEN_INT (val & (GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1));
+	return "xvsrai.<lasxfmt>\t%u0,%u1,%d2";
+      }
+    default:
+      gcc_unreachable ();
+    }
+}
   [(set_attr "type" "simd_shift")
    (set_attr "mode" "<MODE>")])
 
@@ -1039,11 +908,23 @@
   [(set (match_operand:ILASX 0 "register_operand" "=f,f")
 	(ashift:ILASX
 	  (match_operand:ILASX 1 "register_operand" "f,f")
-	  (match_operand:ILASX 2 "reg_or_vector_same_uimm6_operand" "f,Uuv6")))]
+	  (match_operand:ILASX 2 "reg_or_vector_same_uimm_operand" "f,Uuvx")))]
   "ISA_HAS_LASX"
-  "@
-   xvsll.<lasxfmt>\t%u0,%u1,%u2
-   xvslli.<lasxfmt>\t%u0,%u1,%E2"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "xvsll.<lasxfmt>\t%u0,%u1,%u2";
+    case 1:
+      {
+	unsigned HOST_WIDE_INT val = UINTVAL (CONST_VECTOR_ELT (operands[2], 0));
+	operands[2] = GEN_INT (val & (GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1));
+	return "xvslli.<lasxfmt>\t%u0,%u1,%d2";
+      }
+    default:
+      gcc_unreachable ();
+    }
+}
   [(set_attr "type" "simd_shift")
    (set_attr "mode" "<MODE>")])
 
@@ -1173,23 +1054,17 @@
   [(set_attr "type" "simd_int_arith")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "lasx_xvabsd_s_<lasxfmt>"
+(define_insn "<su>abd<mode>3"
   [(set (match_operand:ILASX 0 "register_operand" "=f")
-	(unspec:ILASX [(match_operand:ILASX 1 "register_operand" "f")
-		       (match_operand:ILASX 2 "register_operand" "f")]
-		      UNSPEC_LASX_XVABSD_S))]
-  "ISA_HAS_LASX"
-  "xvabsd.<lasxfmt>\t%u0,%u1,%u2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "<MODE>")])
-
-(define_insn "lasx_xvabsd_u_<lasxfmt_u>"
-  [(set (match_operand:ILASX 0 "register_operand" "=f")
-	(unspec:ILASX [(match_operand:ILASX 1 "register_operand" "f")
-		       (match_operand:ILASX 2 "register_operand" "f")]
-		      UNSPEC_LASX_XVABSD_U))]
+	(minus:ILASX
+	  (SU_MAX:ILASX
+	    (match_operand:ILASX 1 "register_operand" "f")
+	    (match_operand:ILASX 2 "register_operand" "f"))
+	  (<su_min>:ILASX
+	    (match_dup 1)
+	    (match_dup 2))))]
   "ISA_HAS_LASX"
-  "xvabsd.<lasxfmt_u>\t%u0,%u1,%u2"
+  "xvabsd.<lasxfmt><u>\t%u0,%u1,%u2"
   [(set_attr "type" "simd_int_arith")
    (set_attr "mode" "<MODE>")])
 
@@ -1329,28 +1204,6 @@
   [(set_attr "type" "simd_int_arith")
    (set_attr "mode" "<MODE>")])
 
-(define_expand "vec_cmp<mode><mode256_i>"
-  [(set (match_operand:<VIMODE256> 0 "register_operand")
-	(match_operator 1 ""
-	  [(match_operand:LASX 2 "register_operand")
-	   (match_operand:LASX 3 "register_operand")]))]
-  "ISA_HAS_LASX"
-{
-  loongarch_expand_vec_cmp (operands);
-  DONE;
-})
-
-(define_expand "vec_cmpu<ILASX:mode><mode256_i>"
-  [(set (match_operand:<VIMODE256> 0 "register_operand")
-	(match_operator 1 ""
-	  [(match_operand:ILASX 2 "register_operand")
-	   (match_operand:ILASX 3 "register_operand")]))]
-  "ISA_HAS_LASX"
-{
-  loongarch_expand_vec_cmp (operands);
-  DONE;
-})
-
 (define_insn "lasx_xvfclass_<flasxfmt>"
   [(set (match_operand:<VIMODE256> 0 "register_operand" "=f")
 	(unspec:<VIMODE256> [(match_operand:FLASX 1 "register_operand" "f")]
@@ -1391,10 +1244,6 @@
    (set_attr "cnv_mode" "<FINTCNV256>")
    (set_attr "mode" "<MODE>")])
 
-(define_mode_attr FFQ256
-  [(V4SF "V16HI")
-   (V2DF "V8SI")])
-
 (define_insn "lasx_xvreplgr2vr_<lasxfmt_f>"
   [(set (match_operand:ILASX 0 "register_operand" "=f,f")
 	(vec_duplicate:ILASX
@@ -1402,7 +1251,7 @@
   "ISA_HAS_LASX"
 {
   if (which_alternative == 1)
-    return "xvldi.b\t%u0,0" ;
+    return "xvrepli.b\t%u0,0";
 
   return "xvreplgr2vr.<lasxfmt>\t%u0,%z1";
 }
@@ -1545,76 +1394,6 @@
    (set_attr "cnv_mode" "<FINTCNV256_2>")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "lasx_xvh<optab>w_h<u>_b<u>"
-  [(set (match_operand:V16HI 0 "register_operand" "=f")
-	(addsub:V16HI
-	  (any_extend:V16HI
-	    (vec_select:V16QI
-	      (match_operand:V32QI 1 "register_operand" "f")
-	      (parallel [(const_int 1) (const_int 3)
-			 (const_int 5) (const_int 7)
-			 (const_int 9) (const_int 11)
-			 (const_int 13) (const_int 15)
-			 (const_int 17) (const_int 19)
-			 (const_int 21) (const_int 23)
-			 (const_int 25) (const_int 27)
-			 (const_int 29) (const_int 31)])))
-	  (any_extend:V16HI
-	    (vec_select:V16QI
-	      (match_operand:V32QI 2 "register_operand" "f")
-	      (parallel [(const_int 0) (const_int 2)
-			 (const_int 4) (const_int 6)
-			 (const_int 8) (const_int 10)
-			 (const_int 12) (const_int 14)
-			 (const_int 16) (const_int 18)
-			 (const_int 20) (const_int 22)
-			 (const_int 24) (const_int 26)
-			 (const_int 28) (const_int 30)])))))]
-  "ISA_HAS_LASX"
-  "xvh<optab>w.h<u>.b<u>\t%u0,%u1,%u2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V16HI")])
-
-(define_insn "lasx_xvh<optab>w_w<u>_h<u>"
-  [(set (match_operand:V8SI 0 "register_operand" "=f")
-	(addsub:V8SI
-	  (any_extend:V8SI
-	    (vec_select:V8HI
-	      (match_operand:V16HI 1 "register_operand" "f")
-	      (parallel [(const_int 1) (const_int 3)
-			 (const_int 5) (const_int 7)
-			 (const_int 9) (const_int 11)
-			 (const_int 13) (const_int 15)])))
-	  (any_extend:V8SI
-	    (vec_select:V8HI
-	      (match_operand:V16HI 2 "register_operand" "f")
-	      (parallel [(const_int 0) (const_int 2)
-			 (const_int 4) (const_int 6)
-			 (const_int 8) (const_int 10)
-			 (const_int 12) (const_int 14)])))))]
-  "ISA_HAS_LASX"
-  "xvh<optab>w.w<u>.h<u>\t%u0,%u1,%u2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V8SI")])
-
-(define_insn "lasx_xvh<optab>w_d<u>_w<u>"
-  [(set (match_operand:V4DI 0 "register_operand" "=f")
-	(addsub:V4DI
-	  (any_extend:V4DI
-	    (vec_select:V4SI
-	      (match_operand:V8SI 1 "register_operand" "f")
-	      (parallel [(const_int 1) (const_int 3)
-			 (const_int 5) (const_int 7)])))
-	  (any_extend:V4DI
-	    (vec_select:V4SI
-	      (match_operand:V8SI 2 "register_operand" "f")
-	      (parallel [(const_int 0) (const_int 2)
-			 (const_int 4) (const_int 6)])))))]
-  "ISA_HAS_LASX"
-  "xvh<optab>w.d<u>.w<u>\t%u0,%u1,%u2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V4DI")])
-
 (define_insn "lasx_xvpackev_b"
   [(set (match_operand:V32QI 0 "register_operand" "=f")
 	(vec_select:V32QI
@@ -2253,16 +2032,6 @@
   [(set_attr "type" "simd_shift")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "lasx_xvsrari_<lasxfmt>"
-  [(set (match_operand:ILASX 0 "register_operand" "=f")
-	(unspec:ILASX [(match_operand:ILASX 1 "register_operand" "f")
-		       (match_operand 2 "const_<bitimm256>_operand" "")]
-		      UNSPEC_LASX_XVSRARI))]
-  "ISA_HAS_LASX"
-  "xvsrari.<lasxfmt>\t%u0,%u1,%2"
-  [(set_attr "type" "simd_shift")
-   (set_attr "mode" "<MODE>")])
-
 (define_insn "lasx_xvsrlr_<lasxfmt>"
   [(set (match_operand:ILASX 0 "register_operand" "=f")
 	(unspec:ILASX [(match_operand:ILASX 1 "register_operand" "f")
@@ -2273,16 +2042,6 @@
   [(set_attr "type" "simd_shift")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "lasx_xvsrlri_<lasxfmt>"
-  [(set (match_operand:ILASX 0 "register_operand" "=f")
-	(unspec:ILASX [(match_operand:ILASX 1 "register_operand" "f")
-		       (match_operand 2 "const_<bitimm256>_operand" "")]
-		      UNSPEC_LASX_XVSRLRI))]
-  "ISA_HAS_LASX"
-  "xvsrlri.<lasxfmt>\t%u0,%u1,%2"
-  [(set_attr "type" "simd_shift")
-   (set_attr "mode" "<MODE>")])
-
 (define_insn "lasx_xvssub_s_<lasxfmt>"
   [(set (match_operand:ILASX 0 "register_operand" "=f")
 	(ss_minus:ILASX (match_operand:ILASX 1 "register_operand" "f")
@@ -2336,21 +2095,20 @@
 
 (define_insn "lasx_xvrepl128vei_b_internal"
   [(set (match_operand:V32QI 0 "register_operand" "=f")
-	(vec_duplicate:V32QI
-	  (vec_select:V32QI
-	    (match_operand:V32QI 1 "register_operand" "f")
-	    (parallel [(match_operand 2 "const_uimm4_operand" "")
-		       (match_dup 2) (match_dup 2) (match_dup 2)
-		       (match_dup 2) (match_dup 2) (match_dup 2)
-		       (match_dup 2) (match_dup 2) (match_dup 2)
-		       (match_dup 2) (match_dup 2) (match_dup 2)
-		       (match_dup 2) (match_dup 2) (match_dup 2)
-		       (match_operand 3 "const_16_to_31_operand" "")
-		       (match_dup 3) (match_dup 3) (match_dup 3)
-		       (match_dup 3) (match_dup 3) (match_dup 3)
-		       (match_dup 3) (match_dup 3) (match_dup 3)
-		       (match_dup 3) (match_dup 3) (match_dup 3)
-		       (match_dup 3) (match_dup 3) (match_dup 3)]))))]
+	(vec_select:V32QI
+	  (match_operand:V32QI 1 "register_operand" "f")
+	  (parallel [(match_operand 2 "const_uimm4_operand" "")
+		     (match_dup 2) (match_dup 2) (match_dup 2)
+		     (match_dup 2) (match_dup 2) (match_dup 2)
+		     (match_dup 2) (match_dup 2) (match_dup 2)
+		     (match_dup 2) (match_dup 2) (match_dup 2)
+		     (match_dup 2) (match_dup 2) (match_dup 2)
+		     (match_operand 3 "const_16_to_31_operand" "")
+		     (match_dup 3) (match_dup 3) (match_dup 3)
+		     (match_dup 3) (match_dup 3) (match_dup 3)
+		     (match_dup 3) (match_dup 3) (match_dup 3)
+		     (match_dup 3) (match_dup 3) (match_dup 3)
+		     (match_dup 3) (match_dup 3) (match_dup 3)])))]
   "ISA_HAS_LASX && ((INTVAL (operands[3]) - INTVAL (operands[2])) == 16)"
   "xvrepl128vei.b\t%u0,%u1,%2"
   [(set_attr "type" "simd_splat")
@@ -2358,17 +2116,16 @@
 
 (define_insn "lasx_xvrepl128vei_h_internal"
   [(set (match_operand:V16HI 0 "register_operand" "=f")
-	(vec_duplicate:V16HI
-	  (vec_select:V16HI
-	    (match_operand:V16HI 1 "register_operand" "f")
-	    (parallel [(match_operand 2 "const_uimm3_operand" "")
-		       (match_dup 2) (match_dup 2) (match_dup 2)
-		       (match_dup 2) (match_dup 2) (match_dup 2)
-		       (match_dup 2)
-		       (match_operand 3 "const_8_to_15_operand" "")
-		       (match_dup 3) (match_dup 3) (match_dup 3)
-		       (match_dup 3) (match_dup 3) (match_dup 3)
-		       (match_dup 3)]))))]
+	(vec_select:V16HI
+	  (match_operand:V16HI 1 "register_operand" "f")
+	  (parallel [(match_operand 2 "const_uimm3_operand" "")
+		     (match_dup 2) (match_dup 2) (match_dup 2)
+		     (match_dup 2) (match_dup 2) (match_dup 2)
+		     (match_dup 2)
+		     (match_operand 3 "const_8_to_15_operand" "")
+		     (match_dup 3) (match_dup 3) (match_dup 3)
+		     (match_dup 3) (match_dup 3) (match_dup 3)
+		     (match_dup 3)])))]
   "ISA_HAS_LASX && ((INTVAL (operands[3]) - INTVAL (operands[2])) == 8)"
   "xvrepl128vei.h\t%u0,%u1,%2"
   [(set_attr "type" "simd_splat")
@@ -2376,13 +2133,12 @@
 
 (define_insn "lasx_xvrepl128vei_w_internal"
   [(set (match_operand:V8SI 0 "register_operand" "=f")
-	(vec_duplicate:V8SI
-	  (vec_select:V8SI
-	    (match_operand:V8SI 1 "register_operand" "f")
-	    (parallel [(match_operand 2 "const_0_to_3_operand" "")
-		       (match_dup 2) (match_dup 2) (match_dup 2)
-		       (match_operand 3 "const_4_to_7_operand" "")
-		       (match_dup 3) (match_dup 3) (match_dup 3)]))))]
+	(vec_select:V8SI
+	  (match_operand:V8SI 1 "register_operand" "f")
+	  (parallel [(match_operand 2 "const_0_to_3_operand" "")
+		     (match_dup 2) (match_dup 2) (match_dup 2)
+		     (match_operand 3 "const_4_to_7_operand" "")
+		     (match_dup 3) (match_dup 3) (match_dup 3)])))]
   "ISA_HAS_LASX && ((INTVAL (operands[3]) - INTVAL (operands[2])) == 4)"
   "xvrepl128vei.w\t%u0,%u1,%2"
   [(set_attr "type" "simd_splat")
@@ -2390,13 +2146,12 @@
 
 (define_insn "lasx_xvrepl128vei_d_internal"
   [(set (match_operand:V4DI 0 "register_operand" "=f")
-	(vec_duplicate:V4DI
-	  (vec_select:V4DI
-	    (match_operand:V4DI 1 "register_operand" "f")
-	    (parallel [(match_operand 2 "const_0_or_1_operand" "")
-		       (match_dup 2)
-		       (match_operand 3 "const_2_or_3_operand" "")
-		       (match_dup 3)]))))]
+	(vec_select:V4DI
+	  (match_operand:V4DI 1 "register_operand" "f")
+	  (parallel [(match_operand 2 "const_0_or_1_operand" "")
+		     (match_dup 2)
+		     (match_operand 3 "const_2_or_3_operand" "")
+		     (match_dup 3)])))]
   "ISA_HAS_LASX && ((INTVAL (operands[3]) - INTVAL (operands[2])) == 2)"
   "xvrepl128vei.d\t%u0,%u1,%2"
   [(set_attr "type" "simd_splat")
@@ -2596,7 +2351,7 @@
    (set_attr "mode" "<MODE>")])
 
 ;; loongson-asx.
-(define_insn "lasx_vext2xv_h<u>_b<u>"
+(define_insn "vec_unpack<su>_lo_v32qi"
   [(set (match_operand:V16HI 0 "register_operand" "=f")
 	(any_extend:V16HI
 	  (vec_select:V16QI
@@ -2614,7 +2369,21 @@
   [(set_attr "type" "simd_shift")
    (set_attr "mode" "V16HI")])
 
-(define_insn "lasx_vext2xv_w<u>_h<u>"
+(define_insn "vec_unpack<su>_lo_v16qi_internal"
+  [(set (match_operand:V8HI 0 "register_operand" "=f")
+	(any_extend:V8HI
+	  (vec_select:V8QI
+	    (match_operand:V16QI 1 "register_operand" "f")
+	    (parallel [(const_int 0) (const_int 1)
+		       (const_int 2) (const_int 3)
+		       (const_int 4) (const_int 5)
+		       (const_int 6) (const_int 7)]))))]
+  "ISA_HAS_LASX"
+  "vext2xv.h<u>.b<u>\t%u0,%u1"
+  [(set_attr "type" "simd_shift")
+   (set_attr "mode" "V8HI")])
+
+(define_insn "vec_unpack<su>_lo_v16hi"
   [(set (match_operand:V8SI 0 "register_operand" "=f")
 	(any_extend:V8SI
 	  (vec_select:V8HI
@@ -2628,7 +2397,19 @@
   [(set_attr "type" "simd_shift")
    (set_attr "mode" "V8SI")])
 
-(define_insn "lasx_vext2xv_d<u>_w<u>"
+(define_insn "vec_unpack<su>_lo_v8hi_internal"
+  [(set (match_operand:V4SI 0 "register_operand" "=f")
+	(any_extend:V4SI
+	  (vec_select:V4HI
+	    (match_operand:V8HI 1 "register_operand" "f")
+	    (parallel [(const_int 0) (const_int 1)
+		       (const_int 2) (const_int 3)]))))]
+  "ISA_HAS_LASX"
+  "vext2xv.w<u>.h<u>\t%u0,%u1"
+  [(set_attr "type" "simd_shift")
+   (set_attr "mode" "V4SI")])
+
+(define_insn "vec_unpack<su>_lo_v8si"
   [(set (match_operand:V4DI 0 "register_operand" "=f")
 	(any_extend:V4DI
 	  (vec_select:V4SI
@@ -2640,6 +2421,17 @@
   [(set_attr "type" "simd_shift")
    (set_attr "mode" "V4DI")])
 
+(define_insn "vec_unpack<su>_lo_v4si_internal"
+  [(set (match_operand:V2DI 0 "register_operand" "=f")
+	(any_extend:V2DI
+	  (vec_select:V2SI
+	    (match_operand:V4SI 1 "register_operand" "f")
+	    (parallel [(const_int 0) (const_int 1)]))))]
+  "ISA_HAS_LASX"
+  "vext2xv.d<u>.w<u>\t%u0,%u1"
+  [(set_attr "type" "simd_shift")
+   (set_attr "mode" "V2DI")])
+
 (define_insn "lasx_vext2xv_w<u>_b<u>"
   [(set (match_operand:V8SI 0 "register_operand" "=f")
 	(any_extend:V8SI
@@ -3031,6 +2823,19 @@
   [(set_attr "type" "simd_int_arith")
    (set_attr "mode" "V4DF")])
 
+(define_expand "vec_packs_float_v4di"
+  [(match_operand:V8SF 0 "register_operand")
+   (match_operand:V4DI 1 "register_operand")
+   (match_operand:V4DI 2 "register_operand")]
+  "ISA_HAS_LASX"
+{
+  rtx tmp;
+  tmp = gen_reg_rtx (V8SFmode);
+  emit_insn (gen_lasx_xvffint_s_l (tmp, operands[2], operands[1]));
+  emit_insn (gen_lasx_xvpermi_d_v8sf (operands[0], tmp, GEN_INT (0xd8)));
+  DONE;
+})
+
 (define_insn "lasx_xvffint_s_l"
   [(set (match_operand:V8SF 0 "register_operand" "=f")
 	(unspec:V8SF [(match_operand:V4DI 1 "register_operand" "f")
@@ -3041,6 +2846,19 @@
   [(set_attr "type" "simd_int_arith")
    (set_attr "mode" "V4DI")])
 
+(define_expand "vec_pack_sfix_trunc_v4df"
+  [(match_operand:V8SI 0 "register_operand")
+   (match_operand:V4DF 1 "register_operand")
+   (match_operand:V4DF 2 "register_operand")]
+  "ISA_HAS_LASX"
+{
+  rtx tmp;
+  tmp = gen_reg_rtx (V8SImode);
+  emit_insn (gen_lasx_xvftintrz_w_d (tmp, operands[2], operands[1]));
+  emit_insn (gen_lasx_xvpermi_d_v8si (operands[0], tmp, GEN_INT (0xd8)));
+  DONE;
+})
+
 (define_insn "lasx_xvftintrz_w_d"
   [(set (match_operand:V8SI 0 "register_operand" "=f")
 	(unspec:V8SI [(match_operand:V4DF 1 "register_operand" "f")
@@ -3099,6 +2917,30 @@
   [(set_attr "type" "simd_shift")
    (set_attr "mode" "V8SF")])
 
+(define_expand "vec_unpacks_float_hi_v8si"
+  [(match_operand:V4DF 0 "register_operand")
+   (match_operand:V8SI 1 "register_operand")]
+  "ISA_HAS_LASX"
+{
+  rtx tmp;
+  tmp = gen_reg_rtx (V8SImode);
+  emit_insn (gen_lasx_xvpermi_d_v8si (tmp, operands[1], GEN_INT (0xe8)));
+  emit_insn (gen_lasx_xvffinth_d_w (operands[0], tmp));
+  DONE;
+})
+
+(define_expand "vec_unpacks_float_lo_v8si"
+  [(match_operand:V4DF 0 "register_operand")
+   (match_operand:V8SI 1 "register_operand")]
+  "ISA_HAS_LASX"
+{
+  rtx tmp;
+  tmp = gen_reg_rtx (V4DImode);
+  emit_insn (gen_vec_unpacks_lo_v8si (tmp, operands[1]));
+  emit_insn (gen_floatv4div4df2 (operands[0], tmp));
+  DONE;
+})
+
 (define_insn "lasx_xvffinth_d_w"
   [(set (match_operand:V4DF 0 "register_operand" "=f")
 	(unspec:V4DF [(match_operand:V8SI 1 "register_operand" "f")]
@@ -3117,6 +2959,18 @@
   [(set_attr "type" "simd_shift")
    (set_attr "mode" "V8SI")])
 
+(define_expand "vec_unpack_sfix_trunc_hi_v8sf"
+  [(match_operand:V4DI 0 "register_operand")
+   (match_operand:V8SF 1 "register_operand")]
+  "ISA_HAS_LASX"
+{
+  rtx tmp;
+  tmp = gen_reg_rtx (V8SFmode);
+  emit_insn (gen_lasx_xvpermi_d_v8sf (tmp, operands[1], GEN_INT (0xe8)));
+  emit_insn (gen_lasx_xvftintrzh_l_s (operands[0], tmp));
+  DONE;
+})
+
 (define_insn "lasx_xvftintrzh_l_s"
   [(set (match_operand:V4DI 0 "register_operand" "=f")
 	(unspec:V4DI [(match_operand:V8SF 1 "register_operand" "f")]
@@ -3126,6 +2980,18 @@
   [(set_attr "type" "simd_shift")
    (set_attr "mode" "V8SF")])
 
+(define_expand "vec_unpack_sfix_trunc_lo_v8sf"
+  [(match_operand:V4DI 0 "register_operand")
+   (match_operand:V8SF 1 "register_operand")]
+  "ISA_HAS_LASX"
+{
+  rtx tmp;
+  tmp = gen_reg_rtx (V8SFmode);
+  emit_insn (gen_lasx_xvpermi_d_v8sf (tmp, operands[1], GEN_INT (0xd4)));
+  emit_insn (gen_lasx_xvftintrzl_l_s (operands[0], tmp));
+  DONE;
+})
+
 (define_insn "lasx_xvftintrzl_l_s"
   [(set (match_operand:V4DI 0 "register_operand" "=f")
 	(unspec:V4DI [(match_operand:V8SF 1 "register_operand" "f")]
@@ -3227,892 +3093,6 @@
    (set_attr "mode" "<MODE>")
    (set_attr "length" "4")])
 
-;;XVADDWEV.H.B   XVSUBWEV.H.B   XVMULWEV.H.B
-;;XVADDWEV.H.BU  XVSUBWEV.H.BU  XVMULWEV.H.BU
-(define_insn "lasx_xv<optab>wev_h_b<u>"
-  [(set (match_operand:V16HI 0 "register_operand" "=f")
-	(addsubmul:V16HI
-	  (any_extend:V16HI
-	    (vec_select:V16QI
-	      (match_operand:V32QI 1 "register_operand" "%f")
-	      (parallel [(const_int 0) (const_int 2)
-			 (const_int 4) (const_int 6)
-			 (const_int 8) (const_int 10)
-			 (const_int 12) (const_int 14)
-			 (const_int 16) (const_int 18)
-			 (const_int 20) (const_int 22)
-			 (const_int 24) (const_int 26)
-			 (const_int 28) (const_int 30)])))
-	  (any_extend:V16HI
-	    (vec_select:V16QI
-	      (match_operand:V32QI 2 "register_operand" "f")
-	      (parallel [(const_int 0) (const_int 2)
-			 (const_int 4) (const_int 6)
-			 (const_int 8) (const_int 10)
-			 (const_int 12) (const_int 14)
-			 (const_int 16) (const_int 18)
-			 (const_int 20) (const_int 22)
-			 (const_int 24) (const_int 26)
-			 (const_int 28) (const_int 30)])))))]
-  "ISA_HAS_LASX"
-  "xv<optab>wev.h.b<u>\t%u0,%u1,%u2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V16HI")])
-
-;;XVADDWEV.W.H   XVSUBWEV.W.H   XVMULWEV.W.H
-;;XVADDWEV.W.HU  XVSUBWEV.W.HU  XVMULWEV.W.HU
-(define_insn "lasx_xv<optab>wev_w_h<u>"
-  [(set (match_operand:V8SI 0 "register_operand" "=f")
-	(addsubmul:V8SI
-	  (any_extend:V8SI
-	    (vec_select:V8HI
-	      (match_operand:V16HI 1 "register_operand" "%f")
-	      (parallel [(const_int 0) (const_int 2)
-			 (const_int 4) (const_int 6)
-			 (const_int 8) (const_int 10)
-			 (const_int 12) (const_int 14)])))
-	  (any_extend:V8SI
-	    (vec_select:V8HI
-	      (match_operand:V16HI 2 "register_operand" "f")
-	      (parallel [(const_int 0) (const_int 2)
-			 (const_int 4) (const_int 6)
-			 (const_int 8) (const_int 10)
-			 (const_int 12) (const_int 14)])))))]
-  "ISA_HAS_LASX"
-  "xv<optab>wev.w.h<u>\t%u0,%u1,%u2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V8SI")])
-
-;;XVADDWEV.D.W   XVSUBWEV.D.W   XVMULWEV.D.W
-;;XVADDWEV.D.WU  XVSUBWEV.D.WU  XVMULWEV.D.WU
-(define_insn "lasx_xv<optab>wev_d_w<u>"
-  [(set (match_operand:V4DI 0 "register_operand" "=f")
-	(addsubmul:V4DI
-	  (any_extend:V4DI
-	    (vec_select:V4SI
-	      (match_operand:V8SI 1 "register_operand" "%f")
-	      (parallel [(const_int 0) (const_int 2)
-			 (const_int 4) (const_int 6)])))
-	  (any_extend:V4DI
-	    (vec_select:V4SI
-	      (match_operand:V8SI 2 "register_operand" "f")
-	      (parallel [(const_int 0) (const_int 2)
-			 (const_int 4) (const_int 6)])))))]
-  "ISA_HAS_LASX"
-  "xv<optab>wev.d.w<u>\t%u0,%u1,%u2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V4DI")])
-
-;;XVADDWEV.Q.D
-;;TODO2
-(define_insn "lasx_xvaddwev_q_d"
-  [(set (match_operand:V4DI 0 "register_operand" "=f")
-	(unspec:V4DI [(match_operand:V4DI 1 "register_operand" "f")
-		      (match_operand:V4DI 2 "register_operand" "f")]
-		     UNSPEC_LASX_XVADDWEV))]
-  "ISA_HAS_LASX"
-  "xvaddwev.q.d\t%u0,%u1,%u2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V4DI")])
-
-;;XVSUBWEV.Q.D
-;;TODO2
-(define_insn "lasx_xvsubwev_q_d"
-  [(set (match_operand:V4DI 0 "register_operand" "=f")
-	(unspec:V4DI [(match_operand:V4DI 1 "register_operand" "f")
-		      (match_operand:V4DI 2 "register_operand" "f")]
-		     UNSPEC_LASX_XVSUBWEV))]
-  "ISA_HAS_LASX"
-  "xvsubwev.q.d\t%u0,%u1,%u2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V4DI")])
-
-;;XVMULWEV.Q.D
-;;TODO2
-(define_insn "lasx_xvmulwev_q_d"
-  [(set (match_operand:V4DI 0 "register_operand" "=f")
-	(unspec:V4DI [(match_operand:V4DI 1 "register_operand" "f")
-		      (match_operand:V4DI 2 "register_operand" "f")]
-		     UNSPEC_LASX_XVMULWEV))]
-  "ISA_HAS_LASX"
-  "xvmulwev.q.d\t%u0,%u1,%u2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V4DI")])
-
-
-;;XVADDWOD.H.B   XVSUBWOD.H.B   XVMULWOD.H.B
-;;XVADDWOD.H.BU  XVSUBWOD.H.BU  XVMULWOD.H.BU
-(define_insn "lasx_xv<optab>wod_h_b<u>"
-  [(set (match_operand:V16HI 0 "register_operand" "=f")
-	(addsubmul:V16HI
-	  (any_extend:V16HI
-	    (vec_select:V16QI
-	      (match_operand:V32QI 1 "register_operand" "%f")
-	      (parallel [(const_int 1) (const_int 3)
-			 (const_int 5) (const_int 7)
-			 (const_int 9) (const_int 11)
-			 (const_int 13) (const_int 15)
-			 (const_int 17) (const_int 19)
-			 (const_int 21) (const_int 23)
-			 (const_int 25) (const_int 27)
-			 (const_int 29) (const_int 31)])))
-	  (any_extend:V16HI
-	    (vec_select:V16QI
-	      (match_operand:V32QI 2 "register_operand" "f")
-	      (parallel [(const_int 1) (const_int 3)
-			 (const_int 5) (const_int 7)
-			 (const_int 9) (const_int 11)
-			 (const_int 13) (const_int 15)
-			 (const_int 17) (const_int 19)
-			 (const_int 21) (const_int 23)
-			 (const_int 25) (const_int 27)
-			 (const_int 29) (const_int 31)])))))]
-  "ISA_HAS_LASX"
-  "xv<optab>wod.h.b<u>\t%u0,%u1,%u2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V16HI")])
-
-;;XVADDWOD.W.H   XVSUBWOD.W.H   XVMULWOD.W.H
-;;XVADDWOD.W.HU  XVSUBWOD.W.HU  XVMULWOD.W.HU
-(define_insn "lasx_xv<optab>wod_w_h<u>"
-  [(set (match_operand:V8SI 0 "register_operand" "=f")
-	(addsubmul:V8SI
-	  (any_extend:V8SI
-	    (vec_select:V8HI
-	      (match_operand:V16HI 1 "register_operand" "%f")
-	      (parallel [(const_int 1) (const_int 3)
-			 (const_int 5) (const_int 7)
-			 (const_int 9) (const_int 11)
-			 (const_int 13) (const_int 15)])))
-	  (any_extend:V8SI
-	    (vec_select:V8HI
-	      (match_operand:V16HI 2 "register_operand" "f")
-	      (parallel [(const_int 1) (const_int 3)
-			 (const_int 5) (const_int 7)
-			 (const_int 9) (const_int 11)
-			 (const_int 13) (const_int 15)])))))]
-  "ISA_HAS_LASX"
-  "xv<optab>wod.w.h<u>\t%u0,%u1,%u2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V8SI")])
-
-
-;;XVADDWOD.D.W   XVSUBWOD.D.W   XVMULWOD.D.W
-;;XVADDWOD.D.WU  XVSUBWOD.D.WU  XVMULWOD.D.WU
-(define_insn "lasx_xv<optab>wod_d_w<u>"
-  [(set (match_operand:V4DI 0 "register_operand" "=f")
-	(addsubmul:V4DI
-	  (any_extend:V4DI
-	    (vec_select:V4SI
-	      (match_operand:V8SI 1 "register_operand" "%f")
-	      (parallel [(const_int 1) (const_int 3)
-			 (const_int 5) (const_int 7)])))
-	  (any_extend:V4DI
-	    (vec_select:V4SI
-	      (match_operand:V8SI 2 "register_operand" "f")
-	      (parallel [(const_int 1) (const_int 3)
-			 (const_int 5) (const_int 7)])))))]
-  "ISA_HAS_LASX"
-  "xv<optab>wod.d.w<u>\t%u0,%u1,%u2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V4DI")])
-
-;;XVADDWOD.Q.D
-;;TODO2
-(define_insn "lasx_xvaddwod_q_d"
-  [(set (match_operand:V4DI 0 "register_operand" "=f")
-	(unspec:V4DI [(match_operand:V4DI 1 "register_operand" "f")
-		      (match_operand:V4DI 2 "register_operand" "f")]
-		     UNSPEC_LASX_XVADDWOD))]
-  "ISA_HAS_LASX"
-  "xvaddwod.q.d\t%u0,%u1,%u2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V4DI")])
-
-;;XVSUBWOD.Q.D
-;;TODO2
-(define_insn "lasx_xvsubwod_q_d"
-  [(set (match_operand:V4DI 0 "register_operand" "=f")
-	(unspec:V4DI [(match_operand:V4DI 1 "register_operand" "f")
-		      (match_operand:V4DI 2 "register_operand" "f")]
-		     UNSPEC_LASX_XVSUBWOD))]
-  "ISA_HAS_LASX"
-  "xvsubwod.q.d\t%u0,%u1,%u2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V4DI")])
-
-;;XVMULWOD.Q.D
-;;TODO2
-(define_insn "lasx_xvmulwod_q_d"
-  [(set (match_operand:V4DI 0 "register_operand" "=f")
-	(unspec:V4DI [(match_operand:V4DI 1 "register_operand" "f")
-		      (match_operand:V4DI 2 "register_operand" "f")]
-		     UNSPEC_LASX_XVMULWOD))]
-  "ISA_HAS_LASX"
-  "xvmulwod.q.d\t%u0,%u1,%u2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V4DI")])
-
-;;XVADDWEV.Q.DU
-;;TODO2
-(define_insn "lasx_xvaddwev_q_du"
-  [(set (match_operand:V4DI 0 "register_operand" "=f")
-	(unspec:V4DI [(match_operand:V4DI 1 "register_operand" "f")
-		      (match_operand:V4DI 2 "register_operand" "f")]
-		     UNSPEC_LASX_XVADDWEV2))]
-  "ISA_HAS_LASX"
-  "xvaddwev.q.du\t%u0,%u1,%u2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V4DI")])
-
-;;XVSUBWEV.Q.DU
-;;TODO2
-(define_insn "lasx_xvsubwev_q_du"
-  [(set (match_operand:V4DI 0 "register_operand" "=f")
-	(unspec:V4DI [(match_operand:V4DI 1 "register_operand" "f")
-		      (match_operand:V4DI 2 "register_operand" "f")]
-		     UNSPEC_LASX_XVSUBWEV2))]
-  "ISA_HAS_LASX"
-  "xvsubwev.q.du\t%u0,%u1,%u2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V4DI")])
-
-;;XVMULWEV.Q.DU
-;;TODO2
-(define_insn "lasx_xvmulwev_q_du"
-  [(set (match_operand:V4DI 0 "register_operand" "=f")
-	(unspec:V4DI [(match_operand:V4DI 1 "register_operand" "f")
-		      (match_operand:V4DI 2 "register_operand" "f")]
-		     UNSPEC_LASX_XVMULWEV2))]
-  "ISA_HAS_LASX"
-  "xvmulwev.q.du\t%u0,%u1,%u2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V4DI")])
-
-;;XVADDWOD.Q.DU
-;;TODO2
-(define_insn "lasx_xvaddwod_q_du"
-  [(set (match_operand:V4DI 0 "register_operand" "=f")
-	(unspec:V4DI [(match_operand:V4DI 1 "register_operand" "f")
-		      (match_operand:V4DI 2 "register_operand" "f")]
-		     UNSPEC_LASX_XVADDWOD2))]
-  "ISA_HAS_LASX"
-  "xvaddwod.q.du\t%u0,%u1,%u2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V4DI")])
-
-;;XVSUBWOD.Q.DU
-;;TODO2
-(define_insn "lasx_xvsubwod_q_du"
-  [(set (match_operand:V4DI 0 "register_operand" "=f")
-	(unspec:V4DI [(match_operand:V4DI 1 "register_operand" "f")
-		      (match_operand:V4DI 2 "register_operand" "f")]
-		     UNSPEC_LASX_XVSUBWOD2))]
-  "ISA_HAS_LASX"
-  "xvsubwod.q.du\t%u0,%u1,%u2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V4DI")])
-
-;;XVMULWOD.Q.DU
-;;TODO2
-(define_insn "lasx_xvmulwod_q_du"
-  [(set (match_operand:V4DI 0 "register_operand" "=f")
-	(unspec:V4DI [(match_operand:V4DI 1 "register_operand" "f")
-		      (match_operand:V4DI 2 "register_operand" "f")]
-		     UNSPEC_LASX_XVMULWOD2))]
-  "ISA_HAS_LASX"
-  "xvmulwod.q.du\t%u0,%u1,%u2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V4DI")])
-
-;;XVADDWEV.H.BU.B   XVMULWEV.H.BU.B
-(define_insn "lasx_xv<optab>wev_h_bu_b"
-  [(set (match_operand:V16HI 0 "register_operand" "=f")
-	(addmul:V16HI
-	  (zero_extend:V16HI
-	    (vec_select:V16QI
-	      (match_operand:V32QI 1 "register_operand" "%f")
-	      (parallel [(const_int 0) (const_int 2)
-			 (const_int 4) (const_int 6)
-			 (const_int 8) (const_int 10)
-			 (const_int 12) (const_int 14)
-			 (const_int 16) (const_int 18)
-			 (const_int 20) (const_int 22)
-			 (const_int 24) (const_int 26)
-			 (const_int 28) (const_int 30)])))
-	  (sign_extend:V16HI
-	    (vec_select:V16QI
-	      (match_operand:V32QI 2 "register_operand" "f")
-	      (parallel [(const_int 0) (const_int 2)
-			 (const_int 4) (const_int 6)
-			 (const_int 8) (const_int 10)
-			 (const_int 12) (const_int 14)
-			 (const_int 16) (const_int 18)
-			 (const_int 20) (const_int 22)
-			 (const_int 24) (const_int 26)
-			 (const_int 28) (const_int 30)])))))]
-  "ISA_HAS_LASX"
-  "xv<optab>wev.h.bu.b\t%u0,%u1,%u2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V16HI")])
-
-;;XVADDWEV.W.HU.H   XVMULWEV.W.HU.H
-(define_insn "lasx_xv<optab>wev_w_hu_h"
-  [(set (match_operand:V8SI 0 "register_operand" "=f")
-	(addmul:V8SI
-	  (zero_extend:V8SI
-	    (vec_select:V8HI
-	      (match_operand:V16HI 1 "register_operand" "%f")
-	      (parallel [(const_int 0) (const_int 2)
-			 (const_int 4) (const_int 6)
-			 (const_int 8) (const_int 10)
-			 (const_int 12) (const_int 14)])))
-	  (sign_extend:V8SI
-	    (vec_select:V8HI
-	      (match_operand:V16HI 2 "register_operand" "f")
-	      (parallel [(const_int 0) (const_int 2)
-			 (const_int 4) (const_int 6)
-			 (const_int 8) (const_int 10)
-			 (const_int 12) (const_int 14)])))))]
-  "ISA_HAS_LASX"
-  "xv<optab>wev.w.hu.h\t%u0,%u1,%u2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V8SI")])
-
-;;XVADDWEV.D.WU.W   XVMULWEV.D.WU.W
-(define_insn "lasx_xv<optab>wev_d_wu_w"
-  [(set (match_operand:V4DI 0 "register_operand" "=f")
-	(addmul:V4DI
-	  (zero_extend:V4DI
-	    (vec_select:V4SI
-	      (match_operand:V8SI 1 "register_operand" "%f")
-	      (parallel [(const_int 0) (const_int 2)
-			 (const_int 4) (const_int 6)])))
-	  (sign_extend:V4DI
-	    (vec_select:V4SI
-	      (match_operand:V8SI 2 "register_operand" "f")
-	      (parallel [(const_int 0) (const_int 2)
-			 (const_int 4) (const_int 6)])))))]
-  "ISA_HAS_LASX"
-  "xv<optab>wev.d.wu.w\t%u0,%u1,%u2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V4DI")])
-
-;;XVADDWOD.H.BU.B   XVMULWOD.H.BU.B
-(define_insn "lasx_xv<optab>wod_h_bu_b"
-  [(set (match_operand:V16HI 0 "register_operand" "=f")
-	(addmul:V16HI
-	  (zero_extend:V16HI
-	    (vec_select:V16QI
-	      (match_operand:V32QI 1 "register_operand" "%f")
-	      (parallel [(const_int 1) (const_int 3)
-			 (const_int 5) (const_int 7)
-			 (const_int 9) (const_int 11)
-			 (const_int 13) (const_int 15)
-			 (const_int 17) (const_int 19)
-			 (const_int 21) (const_int 23)
-			 (const_int 25) (const_int 27)
-			 (const_int 29) (const_int 31)])))
-	  (sign_extend:V16HI
-	    (vec_select:V16QI
-	      (match_operand:V32QI 2 "register_operand" "f")
-	      (parallel [(const_int 1) (const_int 3)
-			 (const_int 5) (const_int 7)
-			 (const_int 9) (const_int 11)
-			 (const_int 13) (const_int 15)
-			 (const_int 17) (const_int 19)
-			 (const_int 21) (const_int 23)
-			 (const_int 25) (const_int 27)
-			 (const_int 29) (const_int 31)])))))]
-  "ISA_HAS_LASX"
-  "xv<optab>wod.h.bu.b\t%u0,%u1,%u2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V16HI")])
-
-;;XVADDWOD.W.HU.H   XVMULWOD.W.HU.H
-(define_insn "lasx_xv<optab>wod_w_hu_h"
-  [(set (match_operand:V8SI 0 "register_operand" "=f")
-	(addmul:V8SI
-	  (zero_extend:V8SI
-	    (vec_select:V8HI
-	      (match_operand:V16HI 1 "register_operand" "%f")
-	      (parallel [(const_int 1) (const_int 3)
-			 (const_int 5) (const_int 7)
-			 (const_int 9) (const_int 11)
-			 (const_int 13) (const_int 15)])))
-	  (sign_extend:V8SI
-	    (vec_select:V8HI
-	      (match_operand:V16HI 2 "register_operand" "f")
-	      (parallel [(const_int 1) (const_int 3)
-			 (const_int 5) (const_int 7)
-			 (const_int 9) (const_int 11)
-			 (const_int 13) (const_int 15)])))))]
-  "ISA_HAS_LASX"
-  "xv<optab>wod.w.hu.h\t%u0,%u1,%u2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V8SI")])
-
-;;XVADDWOD.D.WU.W   XVMULWOD.D.WU.W
-(define_insn "lasx_xv<optab>wod_d_wu_w"
-  [(set (match_operand:V4DI 0 "register_operand" "=f")
-	(addmul:V4DI
-	  (zero_extend:V4DI
-	    (vec_select:V4SI
-	      (match_operand:V8SI 1 "register_operand" "%f")
-	      (parallel [(const_int 1) (const_int 3)
-			 (const_int 5) (const_int 7)])))
-	  (sign_extend:V4DI
-	    (vec_select:V4SI
-	      (match_operand:V8SI 2 "register_operand" "f")
-	      (parallel [(const_int 1) (const_int 3)
-			 (const_int 5) (const_int 7)])))))]
-  "ISA_HAS_LASX"
-  "xv<optab>wod.d.wu.w\t%u0,%u1,%u2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V4DI")])
-
-;;XVMADDWEV.H.B   XVMADDWEV.H.BU
-(define_insn "lasx_xvmaddwev_h_b<u>"
-  [(set (match_operand:V16HI 0 "register_operand" "=f")
-	(plus:V16HI
-	  (match_operand:V16HI 1 "register_operand" "0")
-	  (mult:V16HI
-	    (any_extend:V16HI
-	      (vec_select:V16QI
-		(match_operand:V32QI 2 "register_operand" "%f")
-		(parallel [(const_int 0) (const_int 2)
-			   (const_int 4) (const_int 6)
-			   (const_int 8) (const_int 10)
-			   (const_int 12) (const_int 14)
-			   (const_int 16) (const_int 18)
-			   (const_int 20) (const_int 22)
-			   (const_int 24) (const_int 26)
-			   (const_int 28) (const_int 30)])))
-	    (any_extend:V16HI
-	      (vec_select:V16QI
-		(match_operand:V32QI 3 "register_operand" "f")
-		(parallel [(const_int 0) (const_int 2)
-			   (const_int 4) (const_int 6)
-			   (const_int 8) (const_int 10)
-			   (const_int 12) (const_int 14)
-			   (const_int 16) (const_int 18)
-			   (const_int 20) (const_int 22)
-			   (const_int 24) (const_int 26)
-			   (const_int 28) (const_int 30)]))))))]
-  "ISA_HAS_LASX"
-  "xvmaddwev.h.b<u>\t%u0,%u2,%u3"
-  [(set_attr "type" "simd_fmadd")
-   (set_attr "mode" "V16HI")])
-
-;;XVMADDWEV.W.H   XVMADDWEV.W.HU
-(define_insn "lasx_xvmaddwev_w_h<u>"
-  [(set (match_operand:V8SI 0 "register_operand" "=f")
-	(plus:V8SI
-	  (match_operand:V8SI 1 "register_operand" "0")
-	  (mult:V8SI
-	    (any_extend:V8SI
-	      (vec_select:V8HI
-		(match_operand:V16HI 2 "register_operand" "%f")
-		(parallel [(const_int 0) (const_int 2)
-			   (const_int 4) (const_int 6)
-			   (const_int 8) (const_int 10)
-			   (const_int 12) (const_int 14)])))
-	    (any_extend:V8SI
-	      (vec_select:V8HI
-		(match_operand:V16HI 3 "register_operand" "f")
-		(parallel [(const_int 0) (const_int 2)
-			   (const_int 4) (const_int 6)
-			   (const_int 8) (const_int 10)
-			   (const_int 12) (const_int 14)]))))))]
-  "ISA_HAS_LASX"
-  "xvmaddwev.w.h<u>\t%u0,%u2,%u3"
-  [(set_attr "type" "simd_fmadd")
-   (set_attr "mode" "V8SI")])
-
-;;XVMADDWEV.D.W   XVMADDWEV.D.WU
-(define_insn "lasx_xvmaddwev_d_w<u>"
-  [(set (match_operand:V4DI 0 "register_operand" "=f")
-	(plus:V4DI
-	  (match_operand:V4DI 1 "register_operand" "0")
-	  (mult:V4DI
-	    (any_extend:V4DI
-	      (vec_select:V4SI
-		(match_operand:V8SI 2 "register_operand" "%f")
-		(parallel [(const_int 0) (const_int 2)
-			   (const_int 4) (const_int 6)])))
-	    (any_extend:V4DI
-	      (vec_select:V4SI
-		(match_operand:V8SI 3 "register_operand" "f")
-		(parallel [(const_int 0) (const_int 2)
-			   (const_int 4) (const_int 6)]))))))]
-  "ISA_HAS_LASX"
-  "xvmaddwev.d.w<u>\t%u0,%u2,%u3"
-  [(set_attr "type" "simd_fmadd")
-   (set_attr "mode" "V4DI")])
-
-;;XVMADDWEV.Q.D
-;;TODO2
-(define_insn "lasx_xvmaddwev_q_d"
-  [(set (match_operand:V4DI 0 "register_operand" "=f")
-	(unspec:V4DI [(match_operand:V4DI 1 "register_operand" "0")
-		      (match_operand:V4DI 2 "register_operand" "f")
-		      (match_operand:V4DI 3 "register_operand" "f")]
-		     UNSPEC_LASX_XVMADDWEV))]
-  "ISA_HAS_LASX"
-  "xvmaddwev.q.d\t%u0,%u2,%u3"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V4DI")])
-
-;;XVMADDWOD.H.B   XVMADDWOD.H.BU
-(define_insn "lasx_xvmaddwod_h_b<u>"
-  [(set (match_operand:V16HI 0 "register_operand" "=f")
-	(plus:V16HI
-	  (match_operand:V16HI 1 "register_operand" "0")
-	  (mult:V16HI
-	    (any_extend:V16HI
-	      (vec_select:V16QI
-		(match_operand:V32QI 2 "register_operand" "%f")
-		(parallel [(const_int 1) (const_int 3)
-			   (const_int 5) (const_int 7)
-			   (const_int 9) (const_int 11)
-			   (const_int 13) (const_int 15)
-			   (const_int 17) (const_int 19)
-			   (const_int 21) (const_int 23)
-			   (const_int 25) (const_int 27)
-			   (const_int 29) (const_int 31)])))
-	    (any_extend:V16HI
-	      (vec_select:V16QI
-		(match_operand:V32QI 3 "register_operand" "f")
-		(parallel [(const_int 1) (const_int 3)
-			   (const_int 5) (const_int 7)
-			   (const_int 9) (const_int 11)
-			   (const_int 13) (const_int 15)
-			   (const_int 17) (const_int 19)
-			   (const_int 21) (const_int 23)
-			   (const_int 25) (const_int 27)
-			   (const_int 29) (const_int 31)]))))))]
-  "ISA_HAS_LASX"
-  "xvmaddwod.h.b<u>\t%u0,%u2,%u3"
-  [(set_attr "type" "simd_fmadd")
-   (set_attr "mode" "V16HI")])
-
-;;XVMADDWOD.W.H   XVMADDWOD.W.HU
-(define_insn "lasx_xvmaddwod_w_h<u>"
-  [(set (match_operand:V8SI 0 "register_operand" "=f")
-	(plus:V8SI
-	  (match_operand:V8SI 1 "register_operand" "0")
-	  (mult:V8SI
-	    (any_extend:V8SI
-	      (vec_select:V8HI
-		(match_operand:V16HI 2 "register_operand" "%f")
-		(parallel [(const_int 1) (const_int 3)
-			   (const_int 5) (const_int 7)
-			   (const_int 9) (const_int 11)
-			   (const_int 13) (const_int 15)])))
-	    (any_extend:V8SI
-	      (vec_select:V8HI
-		(match_operand:V16HI 3 "register_operand" "f")
-		(parallel [(const_int 1) (const_int 3)
-			   (const_int 5) (const_int 7)
-			   (const_int 9) (const_int 11)
-			   (const_int 13) (const_int 15)]))))))]
-  "ISA_HAS_LASX"
-  "xvmaddwod.w.h<u>\t%u0,%u2,%u3"
-  [(set_attr "type" "simd_fmadd")
-   (set_attr "mode" "V8SI")])
-
-;;XVMADDWOD.D.W   XVMADDWOD.D.WU
-(define_insn "lasx_xvmaddwod_d_w<u>"
-  [(set (match_operand:V4DI 0 "register_operand" "=f")
-	(plus:V4DI
-	  (match_operand:V4DI 1 "register_operand" "0")
-	  (mult:V4DI
-	    (any_extend:V4DI
-	      (vec_select:V4SI
-		(match_operand:V8SI 2 "register_operand" "%f")
-		(parallel [(const_int 1) (const_int 3)
-			   (const_int 5) (const_int 7)])))
-	    (any_extend:V4DI
-	      (vec_select:V4SI
-		(match_operand:V8SI 3 "register_operand" "f")
-		(parallel [(const_int 1) (const_int 3)
-			   (const_int 5) (const_int 7)]))))))]
-  "ISA_HAS_LASX"
-  "xvmaddwod.d.w<u>\t%u0,%u2,%u3"
-  [(set_attr "type" "simd_fmadd")
-   (set_attr "mode" "V4DI")])
-
-;;XVMADDWOD.Q.D
-;;TODO2
-(define_insn "lasx_xvmaddwod_q_d"
-  [(set (match_operand:V4DI 0 "register_operand" "=f")
-	(unspec:V4DI [(match_operand:V4DI 1 "register_operand" "0")
-		      (match_operand:V4DI 2 "register_operand" "f")
-		      (match_operand:V4DI 3 "register_operand" "f")]
-		     UNSPEC_LASX_XVMADDWOD))]
-  "ISA_HAS_LASX"
-  "xvmaddwod.q.d\t%u0,%u2,%u3"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V4DI")])
-
-;;XVMADDWEV.Q.DU
-;;TODO2
-(define_insn "lasx_xvmaddwev_q_du"
-  [(set (match_operand:V4DI 0 "register_operand" "=f")
-	(unspec:V4DI [(match_operand:V4DI 1 "register_operand" "0")
-		      (match_operand:V4DI 2 "register_operand" "f")
-		      (match_operand:V4DI 3 "register_operand" "f")]
-		     UNSPEC_LASX_XVMADDWEV2))]
-  "ISA_HAS_LASX"
-  "xvmaddwev.q.du\t%u0,%u2,%u3"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V4DI")])
-
-;;XVMADDWOD.Q.DU
-;;TODO2
-(define_insn "lasx_xvmaddwod_q_du"
-  [(set (match_operand:V4DI 0 "register_operand" "=f")
-	(unspec:V4DI [(match_operand:V4DI 1 "register_operand" "0")
-		      (match_operand:V4DI 2 "register_operand" "f")
-		      (match_operand:V4DI 3 "register_operand" "f")]
-		     UNSPEC_LASX_XVMADDWOD2))]
-  "ISA_HAS_LASX"
-  "xvmaddwod.q.du\t%u0,%u2,%u3"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V4DI")])
-
-;;XVMADDWEV.H.BU.B
-(define_insn "lasx_xvmaddwev_h_bu_b"
-  [(set (match_operand:V16HI 0 "register_operand" "=f")
-	(plus:V16HI
-	  (match_operand:V16HI 1 "register_operand" "0")
-	  (mult:V16HI
-	    (zero_extend:V16HI
-	      (vec_select:V16QI
-		(match_operand:V32QI 2 "register_operand" "%f")
-		(parallel [(const_int 0) (const_int 2)
-			   (const_int 4) (const_int 6)
-			   (const_int 8) (const_int 10)
-			   (const_int 12) (const_int 14)
-			   (const_int 16) (const_int 18)
-			   (const_int 20) (const_int 22)
-			   (const_int 24) (const_int 26)
-			   (const_int 28) (const_int 30)])))
-	    (sign_extend:V16HI
-	      (vec_select:V16QI
-		(match_operand:V32QI 3 "register_operand" "f")
-		(parallel [(const_int 0) (const_int 2)
-			   (const_int 4) (const_int 6)
-			   (const_int 8) (const_int 10)
-			   (const_int 12) (const_int 14)
-			   (const_int 16) (const_int 18)
-			   (const_int 20) (const_int 22)
-			   (const_int 24) (const_int 26)
-			   (const_int 28) (const_int 30)]))))))]
-  "ISA_HAS_LASX"
-  "xvmaddwev.h.bu.b\t%u0,%u2,%u3"
-  [(set_attr "type" "simd_fmadd")
-   (set_attr "mode" "V16HI")])
-
-;;XVMADDWEV.W.HU.H
-(define_insn "lasx_xvmaddwev_w_hu_h"
-  [(set (match_operand:V8SI 0 "register_operand" "=f")
-	(plus:V8SI
-	  (match_operand:V8SI 1 "register_operand" "0")
-	  (mult:V8SI
-	    (zero_extend:V8SI
-	      (vec_select:V8HI
-		(match_operand:V16HI 2 "register_operand" "%f")
-		(parallel [(const_int 0) (const_int 2)
-			   (const_int 4) (const_int 6)
-			   (const_int 8) (const_int 10)
-			   (const_int 12) (const_int 14)])))
-	    (sign_extend:V8SI
-	      (vec_select:V8HI
-		(match_operand:V16HI 3 "register_operand" "f")
-		(parallel [(const_int 0) (const_int 2)
-			   (const_int 4) (const_int 6)
-			   (const_int 8) (const_int 10)
-			   (const_int 12) (const_int 14)]))))))]
-  "ISA_HAS_LASX"
-  "xvmaddwev.w.hu.h\t%u0,%u2,%u3"
-  [(set_attr "type" "simd_fmadd")
-   (set_attr "mode" "V8SI")])
-
-;;XVMADDWEV.D.WU.W
-(define_insn "lasx_xvmaddwev_d_wu_w"
-  [(set (match_operand:V4DI 0 "register_operand" "=f")
-	(plus:V4DI
-	  (match_operand:V4DI 1 "register_operand" "0")
-	  (mult:V4DI
-	    (zero_extend:V4DI
-	      (vec_select:V4SI
-		(match_operand:V8SI 2 "register_operand" "%f")
-		(parallel [(const_int 0) (const_int 2)
-			   (const_int 4) (const_int 6)])))
-	    (sign_extend:V4DI
-	      (vec_select:V4SI
-		(match_operand:V8SI 3 "register_operand" "f")
-		(parallel [(const_int 0) (const_int 2)
-			   (const_int 4) (const_int 6)]))))))]
-  "ISA_HAS_LASX"
-  "xvmaddwev.d.wu.w\t%u0,%u2,%u3"
-  [(set_attr "type" "simd_fmadd")
-   (set_attr "mode" "V4DI")])
-
-;;XVMADDWEV.Q.DU.D
-;;TODO2
-(define_insn "lasx_xvmaddwev_q_du_d"
-  [(set (match_operand:V4DI 0 "register_operand" "=f")
-	(unspec:V4DI [(match_operand:V4DI 1 "register_operand" "0")
-		      (match_operand:V4DI 2 "register_operand" "f")
-		      (match_operand:V4DI 3 "register_operand" "f")]
-		     UNSPEC_LASX_XVMADDWEV3))]
-  "ISA_HAS_LASX"
-  "xvmaddwev.q.du.d\t%u0,%u2,%u3"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V4DI")])
-
-;;XVMADDWOD.H.BU.B
-(define_insn "lasx_xvmaddwod_h_bu_b"
-  [(set (match_operand:V16HI 0 "register_operand" "=f")
-	(plus:V16HI
-	  (match_operand:V16HI 1 "register_operand" "0")
-	  (mult:V16HI
-	    (zero_extend:V16HI
-	      (vec_select:V16QI
-		(match_operand:V32QI 2 "register_operand" "%f")
-		(parallel [(const_int 1) (const_int 3)
-			   (const_int 5) (const_int 7)
-			   (const_int 9) (const_int 11)
-			   (const_int 13) (const_int 15)
-			   (const_int 17) (const_int 19)
-			   (const_int 21) (const_int 23)
-			   (const_int 25) (const_int 27)
-			   (const_int 29) (const_int 31)])))
-	    (sign_extend:V16HI
-	      (vec_select:V16QI
-		(match_operand:V32QI 3 "register_operand" "f")
-		(parallel [(const_int 1) (const_int 3)
-			   (const_int 5) (const_int 7)
-			   (const_int 9) (const_int 11)
-			   (const_int 13) (const_int 15)
-			   (const_int 17) (const_int 19)
-			   (const_int 21) (const_int 23)
-			   (const_int 25) (const_int 27)
-			   (const_int 29) (const_int 31)]))))))]
-  "ISA_HAS_LASX"
-  "xvmaddwod.h.bu.b\t%u0,%u2,%u3"
-  [(set_attr "type" "simd_fmadd")
-   (set_attr "mode" "V16HI")])
-
-;;XVMADDWOD.W.HU.H
-(define_insn "lasx_xvmaddwod_w_hu_h"
-  [(set (match_operand:V8SI 0 "register_operand" "=f")
-	(plus:V8SI
-	  (match_operand:V8SI 1 "register_operand" "0")
-	  (mult:V8SI
-	    (zero_extend:V8SI
-	      (vec_select:V8HI
-		(match_operand:V16HI 2 "register_operand" "%f")
-		(parallel [(const_int 1) (const_int 3)
-			   (const_int 5) (const_int 7)
-			   (const_int 9) (const_int 11)
-			   (const_int 13) (const_int 15)])))
-	    (sign_extend:V8SI
-	      (vec_select:V8HI
-		(match_operand:V16HI 3 "register_operand" "f")
-		(parallel [(const_int 1) (const_int 3)
-			   (const_int 5) (const_int 7)
-			   (const_int 9) (const_int 11)
-			   (const_int 13) (const_int 15)]))))))]
-  "ISA_HAS_LASX"
-  "xvmaddwod.w.hu.h\t%u0,%u2,%u3"
-  [(set_attr "type" "simd_fmadd")
-   (set_attr "mode" "V8SI")])
-
-;;XVMADDWOD.D.WU.W
-(define_insn "lasx_xvmaddwod_d_wu_w"
-  [(set (match_operand:V4DI 0 "register_operand" "=f")
-	(plus:V4DI
-	  (match_operand:V4DI 1 "register_operand" "0")
-	  (mult:V4DI
-	    (zero_extend:V4DI
-	      (vec_select:V4SI
-		(match_operand:V8SI 2 "register_operand" "%f")
-		(parallel [(const_int 1) (const_int 3)
-			   (const_int 5) (const_int 7)])))
-	    (sign_extend:V4DI
-	      (vec_select:V4SI
-		(match_operand:V8SI 3 "register_operand" "f")
-		(parallel [(const_int 1) (const_int 3)
-			   (const_int 5) (const_int 7)]))))))]
-  "ISA_HAS_LASX"
-  "xvmaddwod.d.wu.w\t%u0,%u2,%u3"
-  [(set_attr "type" "simd_fmadd")
-   (set_attr "mode" "V4DI")])
-
-;;XVMADDWOD.Q.DU.D
-;;TODO2
-(define_insn "lasx_xvmaddwod_q_du_d"
-  [(set (match_operand:V4DI 0 "register_operand" "=f")
-	(unspec:V4DI [(match_operand:V4DI 1 "register_operand" "0")
-		      (match_operand:V4DI 2 "register_operand" "f")
-		      (match_operand:V4DI 3 "register_operand" "f")]
-		     UNSPEC_LASX_XVMADDWOD3))]
-  "ISA_HAS_LASX"
-  "xvmaddwod.q.du.d\t%u0,%u2,%u3"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V4DI")])
-
-;;XVHADDW.Q.D
-;;TODO2
-(define_insn "lasx_xvhaddw_q_d"
-  [(set (match_operand:V4DI 0 "register_operand" "=f")
-	(unspec:V4DI [(match_operand:V4DI 1 "register_operand" "f")
-		      (match_operand:V4DI 2 "register_operand" "f")]
-		     UNSPEC_LASX_XVHADDW_Q_D))]
-  "ISA_HAS_LASX"
-  "xvhaddw.q.d\t%u0,%u1,%u2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V4DI")])
-
-;;XVHSUBW.Q.D
-;;TODO2
-(define_insn "lasx_xvhsubw_q_d"
-  [(set (match_operand:V4DI 0 "register_operand" "=f")
-	(unspec:V4DI [(match_operand:V4DI 1 "register_operand" "f")
-		      (match_operand:V4DI 2 "register_operand" "f")]
-		     UNSPEC_LASX_XVHSUBW_Q_D))]
-  "ISA_HAS_LASX"
-  "xvhsubw.q.d\t%u0,%u1,%u2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V4DI")])
-
-;;XVHADDW.QU.DU
-;;TODO2
-(define_insn "lasx_xvhaddw_qu_du"
-  [(set (match_operand:V4DI 0 "register_operand" "=f")
-	(unspec:V4DI [(match_operand:V4DI 1 "register_operand" "f")
-		      (match_operand:V4DI 2 "register_operand" "f")]
-		     UNSPEC_LASX_XVHADDW_QU_DU))]
-  "ISA_HAS_LASX"
-  "xvhaddw.qu.du\t%u0,%u1,%u2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V4DI")])
-
-;;XVHSUBW.QU.DU
-;;TODO2
-(define_insn "lasx_xvhsubw_qu_du"
-  [(set (match_operand:V4DI 0 "register_operand" "=f")
-	(unspec:V4DI [(match_operand:V4DI 1 "register_operand" "f")
-		      (match_operand:V4DI 2 "register_operand" "f")]
-		     UNSPEC_LASX_XVHSUBW_QU_DU))]
-  "ISA_HAS_LASX"
-  "xvhsubw.qu.du\t%u0,%u1,%u2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V4DI")])
-
 ;;XVADD.Q
 ;;TODO2
 (define_insn "lasx_xvadd_q"
@@ -4159,50 +3139,6 @@
   [(set_attr "type" "simd_splat")
    (set_attr "mode" "<MODE>")])
 
-;;XVADDWEV.Q.DU.D
-(define_insn "lasx_xvaddwev_q_du_d"
-  [(set (match_operand:V4DI 0 "register_operand" "=f")
-	(unspec:V4DI [(match_operand:V4DI 1 "register_operand" "f")
-		      (match_operand:V4DI 2 "register_operand" "f")]
-		     UNSPEC_LASX_XVADDWEV3))]
-  "ISA_HAS_LASX"
-  "xvaddwev.q.du.d\t%u0,%u1,%u2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V4DI")])
-
-;;XVADDWOD.Q.DU.D
-(define_insn "lasx_xvaddwod_q_du_d"
-  [(set (match_operand:V4DI 0 "register_operand" "=f")
-	(unspec:V4DI [(match_operand:V4DI 1 "register_operand" "f")
-		      (match_operand:V4DI 2 "register_operand" "f")]
-		     UNSPEC_LASX_XVADDWOD3))]
-  "ISA_HAS_LASX"
-  "xvaddwod.q.du.d\t%u0,%u1,%u2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V4DI")])
-
-;;XVMULWEV.Q.DU.D
-(define_insn "lasx_xvmulwev_q_du_d"
-  [(set (match_operand:V4DI 0 "register_operand" "=f")
-	(unspec:V4DI [(match_operand:V4DI 1 "register_operand" "f")
-		      (match_operand:V4DI 2 "register_operand" "f")]
-		     UNSPEC_LASX_XVMULWEV3))]
-  "ISA_HAS_LASX"
-  "xvmulwev.q.du.d\t%u0,%u1,%u2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V4DI")])
-
-;;XVMULWOD.Q.DU.D
-(define_insn "lasx_xvmulwod_q_du_d"
-  [(set (match_operand:V4DI 0 "register_operand" "=f")
-	(unspec:V4DI [(match_operand:V4DI 1 "register_operand" "f")
-		      (match_operand:V4DI 2 "register_operand" "f")]
-		     UNSPEC_LASX_XVMULWOD3))]
-  "ISA_HAS_LASX"
-  "xvmulwod.q.du.d\t%u0,%u1,%u2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V4DI")])
-
 (define_insn "lasx_xvpickve2gr_w<u>"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(any_extend:SI
@@ -4238,10 +3174,10 @@
 	(any_extend:V16HI
 	  (vec_select:V16QI
 	    (match_operand:V32QI 1 "register_operand" "f")
-	      (parallel [(const_int 16) (const_int 17)
-			 (const_int 18) (const_int 19)
-			 (const_int 20) (const_int 21)
-			 (const_int 22) (const_int 23)
+	      (parallel [(const_int 8) (const_int 9)
+			 (const_int 10) (const_int 11)
+			 (const_int 12) (const_int 13)
+			 (const_int 14) (const_int 15)
 			 (const_int 24) (const_int 25)
 			 (const_int 26) (const_int 27)
 			 (const_int 28) (const_int 29)
@@ -4256,8 +3192,8 @@
 	(any_extend:V8SI
 	  (vec_select:V8HI
 	    (match_operand:V16HI 1 "register_operand" "f")
-	    (parallel [(const_int 8) (const_int 9)
-		       (const_int 10) (const_int 11)
+	    (parallel [(const_int 4) (const_int 5)
+		       (const_int 6) (const_int 7)
 		       (const_int 12) (const_int 13)
 		       (const_int 14) (const_int 15)]))))]
   "ISA_HAS_LASX"
@@ -4270,7 +3206,7 @@
 	(any_extend:V4DI
 	  (vec_select:V4SI
 	    (match_operand:V8SI 1 "register_operand" "f")
-	    (parallel [(const_int 4) (const_int 5)
+	    (parallel [(const_int 2) (const_int 3)
 		       (const_int 6) (const_int 7)]))))]
   "ISA_HAS_LASX"
   "xvexth.d<u>.w<u>\t%u0,%u1"
@@ -4642,18 +3578,6 @@
   [(set_attr "type" "simd_load")
    (set_attr "mode" "V4DI")])
 
-(define_insn "lasx_xvldx"
-  [(set (match_operand:V32QI 0 "register_operand" "=f")
-	(unspec:V32QI [(match_operand:DI 1 "register_operand" "r")
-		       (match_operand:DI 2 "reg_or_0_operand" "rJ")]
-		      UNSPEC_LASX_XVLDX))]
-  "ISA_HAS_LASX"
-{
-  return "xvldx\t%u0,%1,%z2";
-}
-  [(set_attr "type" "simd_load")
-   (set_attr "mode" "V32QI")])
-
 (define_insn "lasx_xvstx"
   [(set (mem:V32QI (plus:DI (match_operand:DI 1 "register_operand" "r")
 			    (match_operand:DI 2 "reg_or_0_operand" "rJ")))
@@ -4742,7 +3666,7 @@
   rtx tmp = gen_reg_rtx (V4DImode);
   rtx tmp1 = gen_reg_rtx (V4DImode);
   rtx vec_res = gen_reg_rtx (V4DImode);
-  emit_insn (gen_lasx_xvhaddw_q_d (tmp, operands[1], operands[1]));
+  emit_insn (gen_lasx_haddw_q_d_punned (tmp, operands[1], operands[1]));
   emit_insn (gen_lasx_xvpermi_d_v4di (tmp1, tmp, GEN_INT (2)));
   emit_insn (gen_addv4di3 (vec_res, tmp, tmp1));
   emit_insn (gen_vec_extractv4didi (operands[0], vec_res, const0_rtx));
@@ -4758,7 +3682,7 @@
   rtx tmp1 = gen_reg_rtx (V4DImode);
   rtx vec_res = gen_reg_rtx (V4DImode);
   emit_insn (gen_lasx_xvhaddw_d_w (tmp, operands[1], operands[1]));
-  emit_insn (gen_lasx_xvhaddw_q_d (tmp1, tmp, tmp));
+  emit_insn (gen_lasx_haddw_q_d_punned (tmp1, tmp, tmp));
   emit_insn (gen_lasx_xvpermi_d_v4di (tmp, tmp1, GEN_INT (2)));
   emit_insn (gen_addv4di3 (vec_res, tmp, tmp1));
   emit_insn (gen_vec_extractv8sisi (operands[0], gen_lowpart (V8SImode,vec_res),
@@ -4893,7 +3817,7 @@
   rtx t1 = gen_reg_rtx (V32QImode);
   rtx t2 = gen_reg_rtx (V16HImode);
   rtx t3 = gen_reg_rtx (V8SImode);
-  emit_insn (gen_lasx_xvabsd_u_bu (t1, operands[1], operands[2]));
+  emit_insn (gen_uabdv32qi3 (t1, operands[1], operands[2]));
   emit_insn (gen_lasx_xvhaddw_hu_bu (t2, t1, t1));
   emit_insn (gen_lasx_xvhaddw_wu_hu (t3, t2, t2));
   emit_insn (gen_addv8si3 (operands[0], t3, operands[3]));
@@ -4910,7 +3834,7 @@
   rtx t1 = gen_reg_rtx (V32QImode);
   rtx t2 = gen_reg_rtx (V16HImode);
   rtx t3 = gen_reg_rtx (V8SImode);
-  emit_insn (gen_lasx_xvabsd_s_b (t1, operands[1], operands[2]));
+  emit_insn (gen_sabdv32qi3 (t1, operands[1], operands[2]));
   emit_insn (gen_lasx_xvhaddw_hu_bu (t2, t1, t1));
   emit_insn (gen_lasx_xvhaddw_wu_hu (t3, t2, t2));
   emit_insn (gen_addv8si3 (operands[0], t3, operands[3]));
diff --git a/gcc/config/loongarch/lasxintrin.h b/gcc/config/loongarch/lasxintrin.h
index 16b2145..6bcffc2 100644
--- a/gcc/config/loongarch/lasxintrin.h
+++ b/gcc/config/loongarch/lasxintrin.h
@@ -1,6 +1,6 @@
 /* LARCH Loongson ASX intrinsics include file.
 
-   Copyright (C) 2018-2024 Free Software Foundation, Inc.
+   Copyright (C) 2018-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -3564,11 +3564,11 @@ __m256i __lasx_xvssrln_w_d (__m256i _1, __m256i _2)
 }
 
 /* Assembly instruction format:	xd, xj, xk.  */
-/* Data types in instruction templates:  V32QI, V32QI, V32QI.  */
+/* Data types in instruction templates:  UV32QI, UV32QI, UV32QI.  */
 extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 __m256i __lasx_xvorn_v (__m256i _1, __m256i _2)
 {
-  return (__m256i)__builtin_lasx_xvorn_v ((v32i8)_1, (v32i8)_2);
+  return (__m256i)__builtin_lasx_xvorn_v ((v32u8)_1, (v32u8)_2);
 }
 
 /* Assembly instruction format:	xd, i13.  */
diff --git a/gcc/config/loongarch/linux.h b/gcc/config/loongarch/linux.h
index 40d9ba6..b95a11f 100644
--- a/gcc/config/loongarch/linux.h
+++ b/gcc/config/loongarch/linux.h
@@ -1,5 +1,5 @@
 /* Definitions for Linux-based systems with libraries in ELF format.
-   Copyright (C) 2021-2024 Free Software Foundation, Inc.
+   Copyright (C) 2021-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/loongarch/loongarch-builtins.cc b/gcc/config/loongarch/loongarch-builtins.cc
index cf92770..9493ded 100644
--- a/gcc/config/loongarch/loongarch-builtins.cc
+++ b/gcc/config/loongarch/loongarch-builtins.cc
@@ -1,5 +1,5 @@
 /* Subroutines used for expanding LoongArch builtins.
-   Copyright (C) 2021-2024 Free Software Foundation, Inc.
+   Copyright (C) 2021-2025 Free Software Foundation, Inc.
    Contributed by Loongson Ltd.
 
 This file is part of GCC.
@@ -282,10 +282,24 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && ISA_HAS_FRECIPE)
 #define CODE_FOR_lsx_vftintrz_l_d CODE_FOR_fix_truncv2dfv2di2
 #define CODE_FOR_lsx_vftintrz_wu_s CODE_FOR_fixuns_truncv4sfv4si2
 #define CODE_FOR_lsx_vftintrz_lu_d CODE_FOR_fixuns_truncv2dfv2di2
+#define CODE_FOR_lsx_vftintrz_w_d CODE_FOR_vec_pack_sfix_trunc_v2df
+#define CODE_FOR_lsx_vftintrzh_l_s CODE_FOR_vec_unpack_sfix_trunc_hi_v4sf
+#define CODE_FOR_lsx_vftintrzl_l_s CODE_FOR_vec_unpack_sfix_trunc_lo_v4sf
 #define CODE_FOR_lsx_vffint_s_w CODE_FOR_floatv4siv4sf2
 #define CODE_FOR_lsx_vffint_d_l CODE_FOR_floatv2div2df2
 #define CODE_FOR_lsx_vffint_s_wu CODE_FOR_floatunsv4siv4sf2
 #define CODE_FOR_lsx_vffint_d_lu CODE_FOR_floatunsv2div2df2
+#define CODE_FOR_lsx_vffint_s_l CODE_FOR_vec_packs_float_v2di
+#define CODE_FOR_lsx_vffinth_d_w CODE_FOR_vec_unpacks_float_hi_v4si
+#define CODE_FOR_lsx_vffintl_d_w CODE_FOR_vec_unpacks_float_lo_v4si
+#define CODE_FOR_lsx_vexth_h_b CODE_FOR_vec_unpacks_hi_v16qi
+#define CODE_FOR_lsx_vexth_w_h CODE_FOR_vec_unpacks_hi_v8hi
+#define CODE_FOR_lsx_vexth_d_w CODE_FOR_vec_unpacks_hi_v4si
+#define CODE_FOR_lsx_vexth_hu_bu CODE_FOR_vec_unpacku_hi_v16qi
+#define CODE_FOR_lsx_vexth_wu_hu CODE_FOR_vec_unpacku_hi_v8hi
+#define CODE_FOR_lsx_vexth_du_wu CODE_FOR_vec_unpacku_hi_v4si
+#define CODE_FOR_lsx_vfcvth_d_s CODE_FOR_vec_unpacks_hi_v4sf
+#define CODE_FOR_lsx_vfcvtl_d_s CODE_FOR_vec_unpacks_lo_v4sf
 #define CODE_FOR_lsx_vfsub_s CODE_FOR_subv4sf3
 #define CODE_FOR_lsx_vfsub_d CODE_FOR_subv2df3
 #define CODE_FOR_lsx_vfmul_s CODE_FOR_mulv4sf3
@@ -448,14 +462,14 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && ISA_HAS_FRECIPE)
 #define CODE_FOR_lsx_vssub_hu CODE_FOR_lsx_vssub_u_hu
 #define CODE_FOR_lsx_vssub_wu CODE_FOR_lsx_vssub_u_wu
 #define CODE_FOR_lsx_vssub_du CODE_FOR_lsx_vssub_u_du
-#define CODE_FOR_lsx_vabsd_b CODE_FOR_lsx_vabsd_s_b
-#define CODE_FOR_lsx_vabsd_h CODE_FOR_lsx_vabsd_s_h
-#define CODE_FOR_lsx_vabsd_w CODE_FOR_lsx_vabsd_s_w
-#define CODE_FOR_lsx_vabsd_d CODE_FOR_lsx_vabsd_s_d
-#define CODE_FOR_lsx_vabsd_bu CODE_FOR_lsx_vabsd_u_bu
-#define CODE_FOR_lsx_vabsd_hu CODE_FOR_lsx_vabsd_u_hu
-#define CODE_FOR_lsx_vabsd_wu CODE_FOR_lsx_vabsd_u_wu
-#define CODE_FOR_lsx_vabsd_du CODE_FOR_lsx_vabsd_u_du
+#define CODE_FOR_lsx_vabsd_b CODE_FOR_sabdv16qi3
+#define CODE_FOR_lsx_vabsd_h CODE_FOR_sabdv8hi3
+#define CODE_FOR_lsx_vabsd_w CODE_FOR_sabdv4si3
+#define CODE_FOR_lsx_vabsd_d CODE_FOR_sabdv2di3
+#define CODE_FOR_lsx_vabsd_bu CODE_FOR_uabdv16qi3
+#define CODE_FOR_lsx_vabsd_hu CODE_FOR_uabdv8hi3
+#define CODE_FOR_lsx_vabsd_wu CODE_FOR_uabdv4si3
+#define CODE_FOR_lsx_vabsd_du CODE_FOR_uabdv2di3
 #define CODE_FOR_lsx_vftint_wu_s CODE_FOR_lsx_vftint_u_wu_s
 #define CODE_FOR_lsx_vftint_lu_d CODE_FOR_lsx_vftint_u_lu_d
 #define CODE_FOR_lsx_vandn_v CODE_FOR_andnv16qi3
@@ -505,6 +519,36 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && ISA_HAS_FRECIPE)
 #define CODE_FOR_lsx_vfrecip_d CODE_FOR_recipv2df3
 #define CODE_FOR_lsx_vfrecip_s CODE_FOR_recipv4sf3
 
+#define CODE_FOR_lsx_vaddwev_q_d	CODE_FOR_lsx_addwev_q_d_punned
+#define CODE_FOR_lsx_vaddwev_q_du	CODE_FOR_lsx_addwev_q_du_punned
+#define CODE_FOR_lsx_vsubwev_q_d	CODE_FOR_lsx_subwev_q_d_punned
+#define CODE_FOR_lsx_vsubwev_q_du	CODE_FOR_lsx_subwev_q_du_punned
+#define CODE_FOR_lsx_vmulwev_q_d	CODE_FOR_lsx_mulwev_q_d_punned
+#define CODE_FOR_lsx_vmulwev_q_du	CODE_FOR_lsx_mulwev_q_du_punned
+#define CODE_FOR_lsx_vaddwod_q_d	CODE_FOR_lsx_addwod_q_d_punned
+#define CODE_FOR_lsx_vaddwod_q_du	CODE_FOR_lsx_addwod_q_du_punned
+#define CODE_FOR_lsx_vsubwod_q_d	CODE_FOR_lsx_subwod_q_d_punned
+#define CODE_FOR_lsx_vsubwod_q_du	CODE_FOR_lsx_subwod_q_du_punned
+#define CODE_FOR_lsx_vmulwod_q_d	CODE_FOR_lsx_mulwod_q_d_punned
+#define CODE_FOR_lsx_vmulwod_q_du	CODE_FOR_lsx_mulwod_q_du_punned
+
+#define CODE_FOR_lsx_vaddwev_q_du_d	CODE_FOR_lsx_addwev_q_du_d_punned
+#define CODE_FOR_lsx_vmulwev_q_du_d	CODE_FOR_lsx_mulwev_q_du_d_punned
+#define CODE_FOR_lsx_vaddwod_q_du_d	CODE_FOR_lsx_addwod_q_du_d_punned
+#define CODE_FOR_lsx_vmulwod_q_du_d	CODE_FOR_lsx_mulwod_q_du_d_punned
+
+#define CODE_FOR_lsx_vhaddw_q_d		CODE_FOR_lsx_haddw_q_d_punned
+#define CODE_FOR_lsx_vhaddw_qu_du	CODE_FOR_lsx_haddw_qu_du_punned
+#define CODE_FOR_lsx_vhsubw_q_d		CODE_FOR_lsx_hsubw_q_d_punned
+#define CODE_FOR_lsx_vhsubw_qu_du	CODE_FOR_lsx_hsubw_qu_du_punned
+
+#define CODE_FOR_lsx_vmaddwev_q_d	CODE_FOR_lsx_maddwev_q_d_punned
+#define CODE_FOR_lsx_vmaddwev_q_du	CODE_FOR_lsx_maddwev_q_du_punned
+#define CODE_FOR_lsx_vmaddwev_q_du_d	CODE_FOR_lsx_maddwev_q_du_d_punned
+#define CODE_FOR_lsx_vmaddwod_q_d	CODE_FOR_lsx_maddwod_q_d_punned
+#define CODE_FOR_lsx_vmaddwod_q_du	CODE_FOR_lsx_maddwod_q_du_punned
+#define CODE_FOR_lsx_vmaddwod_q_du_d	CODE_FOR_lsx_maddwod_q_du_d_punned
+
 /* LoongArch ASX define CODE_FOR_lasx_mxxx */
 #define CODE_FOR_lasx_xvsadd_b CODE_FOR_ssaddv32qi3
 #define CODE_FOR_lasx_xvsadd_h CODE_FOR_ssaddv16hi3
@@ -563,6 +607,12 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && ISA_HAS_FRECIPE)
 #define CODE_FOR_lasx_xvffint_d_l CODE_FOR_floatv4div4df2
 #define CODE_FOR_lasx_xvffint_s_wu CODE_FOR_floatunsv8siv8sf2
 #define CODE_FOR_lasx_xvffint_d_lu CODE_FOR_floatunsv4div4df2
+#define CODE_FOR_lasx_vext2xv_h_b CODE_FOR_vec_unpacks_lo_v32qi
+#define CODE_FOR_lasx_vext2xv_w_h CODE_FOR_vec_unpacks_lo_v16hi
+#define CODE_FOR_lasx_vext2xv_d_w CODE_FOR_vec_unpacks_lo_v8si
+#define CODE_FOR_lasx_vext2xv_hu_bu CODE_FOR_vec_unpacku_lo_v32qi
+#define CODE_FOR_lasx_vext2xv_wu_hu CODE_FOR_vec_unpacku_lo_v16hi
+#define CODE_FOR_lasx_vext2xv_du_wu CODE_FOR_vec_unpacku_lo_v8si
 #define CODE_FOR_lasx_xvfsub_s CODE_FOR_subv8sf3
 #define CODE_FOR_lasx_xvfsub_d CODE_FOR_subv4df3
 #define CODE_FOR_lasx_xvfmul_s CODE_FOR_mulv8sf3
@@ -722,14 +772,14 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && ISA_HAS_FRECIPE)
 #define CODE_FOR_lasx_xvssub_hu CODE_FOR_lasx_xvssub_u_hu
 #define CODE_FOR_lasx_xvssub_wu CODE_FOR_lasx_xvssub_u_wu
 #define CODE_FOR_lasx_xvssub_du CODE_FOR_lasx_xvssub_u_du
-#define CODE_FOR_lasx_xvabsd_b CODE_FOR_lasx_xvabsd_s_b
-#define CODE_FOR_lasx_xvabsd_h CODE_FOR_lasx_xvabsd_s_h
-#define CODE_FOR_lasx_xvabsd_w CODE_FOR_lasx_xvabsd_s_w
-#define CODE_FOR_lasx_xvabsd_d CODE_FOR_lasx_xvabsd_s_d
-#define CODE_FOR_lasx_xvabsd_bu CODE_FOR_lasx_xvabsd_u_bu
-#define CODE_FOR_lasx_xvabsd_hu CODE_FOR_lasx_xvabsd_u_hu
-#define CODE_FOR_lasx_xvabsd_wu CODE_FOR_lasx_xvabsd_u_wu
-#define CODE_FOR_lasx_xvabsd_du CODE_FOR_lasx_xvabsd_u_du
+#define CODE_FOR_lasx_xvabsd_b CODE_FOR_sabdv32qi3
+#define CODE_FOR_lasx_xvabsd_h CODE_FOR_sabdv16hi3
+#define CODE_FOR_lasx_xvabsd_w CODE_FOR_sabdv8si3
+#define CODE_FOR_lasx_xvabsd_d CODE_FOR_sabdv4di3
+#define CODE_FOR_lasx_xvabsd_bu CODE_FOR_uabdv32qi3
+#define CODE_FOR_lasx_xvabsd_hu CODE_FOR_uabdv16hi3
+#define CODE_FOR_lasx_xvabsd_wu CODE_FOR_uabdv8si3
+#define CODE_FOR_lasx_xvabsd_du CODE_FOR_uabdv4di3
 #define CODE_FOR_lasx_xvavg_b CODE_FOR_lasx_xvavg_s_b
 #define CODE_FOR_lasx_xvavg_h CODE_FOR_lasx_xvavg_s_h
 #define CODE_FOR_lasx_xvavg_w CODE_FOR_lasx_xvavg_s_w
@@ -785,6 +835,36 @@ AVAIL_ALL (lasx_frecipe, ISA_HAS_LASX && ISA_HAS_FRECIPE)
 #define CODE_FOR_lasx_xvfrecip_d CODE_FOR_recipv4df3
 #define CODE_FOR_lasx_xvfrecip_s CODE_FOR_recipv8sf3
 
+#define CODE_FOR_lasx_xvaddwev_q_d	CODE_FOR_lasx_addwev_q_d_punned
+#define CODE_FOR_lasx_xvaddwev_q_du	CODE_FOR_lasx_addwev_q_du_punned
+#define CODE_FOR_lasx_xvsubwev_q_d	CODE_FOR_lasx_subwev_q_d_punned
+#define CODE_FOR_lasx_xvsubwev_q_du	CODE_FOR_lasx_subwev_q_du_punned
+#define CODE_FOR_lasx_xvmulwev_q_d	CODE_FOR_lasx_mulwev_q_d_punned
+#define CODE_FOR_lasx_xvmulwev_q_du	CODE_FOR_lasx_mulwev_q_du_punned
+#define CODE_FOR_lasx_xvaddwod_q_d	CODE_FOR_lasx_addwod_q_d_punned
+#define CODE_FOR_lasx_xvaddwod_q_du	CODE_FOR_lasx_addwod_q_du_punned
+#define CODE_FOR_lasx_xvsubwod_q_d	CODE_FOR_lasx_subwod_q_d_punned
+#define CODE_FOR_lasx_xvsubwod_q_du	CODE_FOR_lasx_subwod_q_du_punned
+#define CODE_FOR_lasx_xvmulwod_q_d	CODE_FOR_lasx_mulwod_q_d_punned
+#define CODE_FOR_lasx_xvmulwod_q_du	CODE_FOR_lasx_mulwod_q_du_punned
+
+#define CODE_FOR_lasx_xvaddwev_q_du_d	CODE_FOR_lasx_addwev_q_du_d_punned
+#define CODE_FOR_lasx_xvmulwev_q_du_d	CODE_FOR_lasx_mulwev_q_du_d_punned
+#define CODE_FOR_lasx_xvaddwod_q_du_d	CODE_FOR_lasx_addwod_q_du_d_punned
+#define CODE_FOR_lasx_xvmulwod_q_du_d	CODE_FOR_lasx_mulwod_q_du_d_punned
+
+#define CODE_FOR_lasx_xvhaddw_q_d	CODE_FOR_lasx_haddw_q_d_punned
+#define CODE_FOR_lasx_xvhaddw_qu_du	CODE_FOR_lasx_haddw_qu_du_punned
+#define CODE_FOR_lasx_xvhsubw_q_d	CODE_FOR_lasx_hsubw_q_d_punned
+#define CODE_FOR_lasx_xvhsubw_qu_du	CODE_FOR_lasx_hsubw_qu_du_punned
+
+#define CODE_FOR_lasx_xvmaddwev_q_d	CODE_FOR_lasx_maddwev_q_d_punned
+#define CODE_FOR_lasx_xvmaddwev_q_du	CODE_FOR_lasx_maddwev_q_du_punned
+#define CODE_FOR_lasx_xvmaddwev_q_du_d	CODE_FOR_lasx_maddwev_q_du_d_punned
+#define CODE_FOR_lasx_xvmaddwod_q_d	CODE_FOR_lasx_maddwod_q_d_punned
+#define CODE_FOR_lasx_xvmaddwod_q_du	CODE_FOR_lasx_maddwod_q_du_punned
+#define CODE_FOR_lasx_xvmaddwod_q_du_d	CODE_FOR_lasx_maddwod_q_du_d_punned
+
 static const struct loongarch_builtin_description loongarch_builtins[] = {
 #define LARCH_MOVFCSR2GR 0
   DIRECT_BUILTIN (movfcsr2gr, LARCH_USI_FTYPE_UQI, hard_float),
@@ -1568,7 +1648,7 @@ static const struct loongarch_builtin_description loongarch_builtins[] = {
   LSX_BUILTIN (vssrln_b_h, LARCH_V16QI_FTYPE_V8HI_V8HI),
   LSX_BUILTIN (vssrln_h_w, LARCH_V8HI_FTYPE_V4SI_V4SI),
   LSX_BUILTIN (vssrln_w_d, LARCH_V4SI_FTYPE_V2DI_V2DI),
-  LSX_BUILTIN (vorn_v, LARCH_V16QI_FTYPE_V16QI_V16QI),
+  LSX_BUILTIN (vorn_v, LARCH_UV16QI_FTYPE_UV16QI_UV16QI),
   LSX_BUILTIN (vldi, LARCH_V2DI_FTYPE_HI),
   LSX_BUILTIN (vshuf_b, LARCH_V16QI_FTYPE_V16QI_V16QI_V16QI),
   LSX_BUILTIN (vldx, LARCH_V16QI_FTYPE_CVPOINTER_DI),
@@ -2118,7 +2198,7 @@ static const struct loongarch_builtin_description loongarch_builtins[] = {
   LASX_BUILTIN (xvssrln_b_h, LARCH_V32QI_FTYPE_V16HI_V16HI),
   LASX_BUILTIN (xvssrln_h_w, LARCH_V16HI_FTYPE_V8SI_V8SI),
   LASX_BUILTIN (xvssrln_w_d, LARCH_V8SI_FTYPE_V4DI_V4DI),
-  LASX_BUILTIN (xvorn_v, LARCH_V32QI_FTYPE_V32QI_V32QI),
+  LASX_BUILTIN (xvorn_v, LARCH_UV32QI_FTYPE_UV32QI_UV32QI),
   LASX_BUILTIN (xvldi, LARCH_V4DI_FTYPE_HI),
   LASX_BUILTIN (xvldx, LARCH_V32QI_FTYPE_CVPOINTER_DI),
   LASX_NO_TARGET_BUILTIN (xvstx, LARCH_VOID_FTYPE_V32QI_CVPOINTER_DI),
@@ -2530,108 +2610,6 @@ loongarch_builtin_decl (unsigned int code, bool initialize_p ATTRIBUTE_UNUSED)
   return loongarch_builtin_decls[code];
 }
 
-/* Implement TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION.  */
-
-tree
-loongarch_builtin_vectorized_function (unsigned int fn, tree type_out,
-				       tree type_in)
-{
-  machine_mode in_mode, out_mode;
-  int in_n, out_n;
-
-  if (TREE_CODE (type_out) != VECTOR_TYPE
-      || TREE_CODE (type_in) != VECTOR_TYPE
-      || !ISA_HAS_LSX)
-    return NULL_TREE;
-
-  out_mode = TYPE_MODE (TREE_TYPE (type_out));
-  out_n = TYPE_VECTOR_SUBPARTS (type_out);
-  in_mode = TYPE_MODE (TREE_TYPE (type_in));
-  in_n = TYPE_VECTOR_SUBPARTS (type_in);
-
-  /* INSN is the name of the associated instruction pattern, without
-     the leading CODE_FOR_.  */
-#define LARCH_GET_BUILTIN(INSN) \
-  loongarch_builtin_decls[loongarch_get_builtin_decl_index[CODE_FOR_##INSN]]
-
-  switch (fn)
-    {
-    CASE_CFN_CEIL:
-      if (out_mode == DFmode && in_mode == DFmode)
-    {
-      if (out_n == 2 && in_n == 2)
-	return LARCH_GET_BUILTIN (lsx_vfrintrp_d);
-      if (out_n == 4 && in_n == 4)
-	return LARCH_GET_BUILTIN (lasx_xvfrintrp_d);
-    }
-      if (out_mode == SFmode && in_mode == SFmode)
-    {
-      if (out_n == 4 && in_n == 4)
-	return LARCH_GET_BUILTIN (lsx_vfrintrp_s);
-      if (out_n == 8 && in_n == 8)
-	return LARCH_GET_BUILTIN (lasx_xvfrintrp_s);
-    }
-      break;
-
-    CASE_CFN_TRUNC:
-      if (out_mode == DFmode && in_mode == DFmode)
-    {
-      if (out_n == 2 && in_n == 2)
-	return LARCH_GET_BUILTIN (lsx_vfrintrz_d);
-      if (out_n == 4 && in_n == 4)
-	return LARCH_GET_BUILTIN (lasx_xvfrintrz_d);
-    }
-      if (out_mode == SFmode && in_mode == SFmode)
-    {
-      if (out_n == 4 && in_n == 4)
-	return LARCH_GET_BUILTIN (lsx_vfrintrz_s);
-      if (out_n == 8 && in_n == 8)
-	return LARCH_GET_BUILTIN (lasx_xvfrintrz_s);
-    }
-      break;
-
-    CASE_CFN_RINT:
-    CASE_CFN_ROUND:
-      if (out_mode == DFmode && in_mode == DFmode)
-    {
-      if (out_n == 2 && in_n == 2)
-	return LARCH_GET_BUILTIN (lsx_vfrint_d);
-      if (out_n == 4 && in_n == 4)
-	return LARCH_GET_BUILTIN (lasx_xvfrint_d);
-    }
-      if (out_mode == SFmode && in_mode == SFmode)
-    {
-      if (out_n == 4 && in_n == 4)
-	return LARCH_GET_BUILTIN (lsx_vfrint_s);
-      if (out_n == 8 && in_n == 8)
-	return LARCH_GET_BUILTIN (lasx_xvfrint_s);
-    }
-      break;
-
-    CASE_CFN_FLOOR:
-      if (out_mode == DFmode && in_mode == DFmode)
-    {
-      if (out_n == 2 && in_n == 2)
-	return LARCH_GET_BUILTIN (lsx_vfrintrm_d);
-      if (out_n == 4 && in_n == 4)
-	return LARCH_GET_BUILTIN (lasx_xvfrintrm_d);
-    }
-      if (out_mode == SFmode && in_mode == SFmode)
-    {
-      if (out_n == 4 && in_n == 4)
-	return LARCH_GET_BUILTIN (lsx_vfrintrm_s);
-      if (out_n == 8 && in_n == 8)
-	return LARCH_GET_BUILTIN (lasx_xvfrintrm_s);
-    }
-      break;
-
-    default:
-      break;
-    }
-
-  return NULL_TREE;
-}
-
 /* Take argument ARGNO from EXP's argument list and convert it into
    an expand operand.  Store the operand in *OP.  */
 
@@ -2859,6 +2837,8 @@ loongarch_expand_builtin_insn (enum insn_code icode, unsigned int nops,
     case CODE_FOR_lsx_vpickod_h:
     case CODE_FOR_lsx_vpickod_w:
     case CODE_FOR_lsx_vandn_v:
+    case CODE_FOR_lsx_vftintrz_w_d:
+    case CODE_FOR_lsx_vffint_s_l:
     case CODE_FOR_lasx_xvilvh_b:
     case CODE_FOR_lasx_xvilvh_h:
     case CODE_FOR_lasx_xvilvh_w:
@@ -3076,7 +3056,10 @@ loongarch_expand_builtin_lsx_test_branch (enum insn_code icode, tree exp)
     ops[1].value = force_reg (ops[1].mode, ops[1].value);
 
   if ((cbranch = maybe_gen_insn (icode, 3, ops)) == NULL_RTX)
-    error ("failed to expand built-in function");
+    {
+      error ("failed to expand built-in function");
+      return const0_rtx;
+    }
 
   cmp_result = gen_reg_rtx (SImode);
 
@@ -3116,7 +3099,7 @@ loongarch_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
     {
       error_at (EXPR_LOCATION (exp),
 		"built-in function %qD is not enabled", fndecl);
-      return target;
+      return target ? target : const0_rtx;
     }
 
   switch (d->builtin_type)
diff --git a/gcc/config/loongarch/loongarch-c.cc b/gcc/config/loongarch/loongarch-c.cc
index c676364..effdcf0 100644
--- a/gcc/config/loongarch/loongarch-c.cc
+++ b/gcc/config/loongarch/loongarch-c.cc
@@ -1,5 +1,5 @@
 /* LoongArch-specific code for C family languages.
-   Copyright (C) 2021-2024 Free Software Foundation, Inc.
+   Copyright (C) 2021-2025 Free Software Foundation, Inc.
    Contributed by Loongson Ltd.
 
 This file is part of GCC.
@@ -23,32 +23,31 @@ along with GCC; see the file COPYING3.  If not see
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
+#include "target.h"
 #include "tm.h"
 #include "c-family/c-common.h"
 #include "cpplib.h"
+#include "c-family/c-pragma.h"
+#include "tm_p.h"
 
 #define preprocessing_asm_p() (cpp_get_options (pfile)->lang == CLK_ASM)
 #define builtin_define(TXT) cpp_define (pfile, TXT)
+#define builtin_undef(TXT) cpp_undef (pfile, TXT)
 #define builtin_assert(TXT) cpp_assert (pfile, TXT)
 
-void
-loongarch_cpu_cpp_builtins (cpp_reader *pfile)
+static void
+loongarch_def_or_undef (bool def_p, const char *macro, cpp_reader *pfile)
 {
-  builtin_assert ("machine=loongarch");
-  builtin_assert ("cpu=loongarch");
-  builtin_define ("__loongarch__");
-
-  builtin_define_with_value ("__loongarch_arch",
-			     loongarch_arch_strings[la_target.cpu_arch], 1);
-
-  builtin_define_with_value ("__loongarch_tune",
-			     loongarch_tune_strings[la_target.cpu_tune], 1);
-
-  builtin_define_with_value ("_LOONGARCH_ARCH",
-			     loongarch_arch_strings[la_target.cpu_arch], 1);
+  if (def_p)
+    cpp_define (pfile, macro);
+  else
+    cpp_undef (pfile, macro);
+}
 
-  builtin_define_with_value ("_LOONGARCH_TUNE",
-			     loongarch_tune_strings[la_target.cpu_tune], 1);
+static void
+loongarch_define_unconditional_macros (cpp_reader *pfile)
+{
+  builtin_define ("__loongarch__");
 
   /* Base architecture / ABI.  */
   if (TARGET_64BIT)
@@ -64,6 +63,48 @@ loongarch_cpu_cpp_builtins (cpp_reader *pfile)
       builtin_define ("__loongarch_lp64");
     }
 
+  /* Add support for FLOAT128_TYPE on the LoongArch architecture.  */
+  builtin_define ("__FLOAT128_TYPE__");
+
+  /* Map the old _Float128 'q' builtins into the new 'f128' builtins.  */
+  builtin_define ("__builtin_fabsq=__builtin_fabsf128");
+  builtin_define ("__builtin_copysignq=__builtin_copysignf128");
+  builtin_define ("__builtin_nanq=__builtin_nanf128");
+  builtin_define ("__builtin_nansq=__builtin_nansf128");
+  builtin_define ("__builtin_infq=__builtin_inff128");
+  builtin_define ("__builtin_huge_valq=__builtin_huge_valf128");
+
+  /* Native Data Sizes.  */
+  builtin_define_with_int_value ("_LOONGARCH_SZINT", INT_TYPE_SIZE);
+  builtin_define_with_int_value ("_LOONGARCH_SZLONG", LONG_TYPE_SIZE);
+  builtin_define_with_int_value ("_LOONGARCH_SZPTR", POINTER_SIZE);
+  builtin_define_with_int_value ("_LOONGARCH_FPSET", 32);
+  builtin_define_with_int_value ("_LOONGARCH_SPFPSET", 32);
+}
+
+static void
+loongarch_update_cpp_builtins (cpp_reader *pfile)
+{
+  /* Since the macros in this function might be redefined, it's necessary to
+     undef them first.*/
+  builtin_undef ("__loongarch_arch");
+  builtin_define_with_value ("__loongarch_arch",
+			     loongarch_arch_strings[la_target.cpu_arch], 1);
+
+  builtin_undef ("__loongarch_tune");
+  builtin_define_with_value ("__loongarch_tune",
+			     loongarch_tune_strings[la_target.cpu_tune], 1);
+
+  builtin_undef ("_LOONGARCH_ARCH");
+  builtin_define_with_value ("_LOONGARCH_ARCH",
+			     loongarch_arch_strings[la_target.cpu_arch], 1);
+
+  builtin_undef ("_LOONGARCH_TUNE");
+  builtin_define_with_value ("_LOONGARCH_TUNE",
+			     loongarch_tune_strings[la_target.cpu_tune], 1);
+
+  builtin_undef ("__loongarch_double_float");
+  builtin_undef ("__loongarch_single_float");
   /* These defines reflect the ABI in use, not whether the
      FPU is directly accessible.  */
   if (TARGET_DOUBLE_FLOAT_ABI)
@@ -71,6 +112,8 @@ loongarch_cpu_cpp_builtins (cpp_reader *pfile)
   else if (TARGET_SINGLE_FLOAT_ABI)
     builtin_define ("__loongarch_single_float=1");
 
+  builtin_undef ("__loongarch_soft_float");
+  builtin_undef ("__loongarch_hard_float");
   if (TARGET_DOUBLE_FLOAT_ABI || TARGET_SINGLE_FLOAT_ABI)
     builtin_define ("__loongarch_hard_float=1");
   else
@@ -78,6 +121,7 @@ loongarch_cpu_cpp_builtins (cpp_reader *pfile)
 
 
   /* ISA Extensions.  */
+  builtin_undef ("__loongarch_frlen");
   if (TARGET_DOUBLE_FLOAT)
     builtin_define ("__loongarch_frlen=64");
   else if (TARGET_SINGLE_FLOAT)
@@ -85,63 +129,119 @@ loongarch_cpu_cpp_builtins (cpp_reader *pfile)
   else
     builtin_define ("__loongarch_frlen=0");
 
-  if (TARGET_HARD_FLOAT && ISA_HAS_FRECIPE)
-    builtin_define ("__loongarch_frecipe");
+  loongarch_def_or_undef (ISA_HAS_LSX, "__loongarch_simd", pfile);
+  loongarch_def_or_undef (ISA_HAS_LSX, "__loongarch_sx", pfile);
+  loongarch_def_or_undef (ISA_HAS_LASX, "__loongarch_asx", pfile);
 
+  builtin_undef ("__loongarch_simd_width");
   if (ISA_HAS_LSX)
     {
-      builtin_define ("__loongarch_simd");
-      builtin_define ("__loongarch_sx");
-
-      if (!ISA_HAS_LASX)
+      if (ISA_HAS_LASX)
+	builtin_define ("__loongarch_simd_width=256");
+      else
 	builtin_define ("__loongarch_simd_width=128");
     }
 
-  if (ISA_HAS_LASX)
-    {
-      builtin_define ("__loongarch_asx");
-      builtin_define ("__loongarch_simd_width=256");
-    }
-
   /* ISA evolution features */
   int max_v_major = 1, max_v_minor = 0;
 
   for (int i = 0; i < N_EVO_FEATURES; i++)
-    if (la_target.isa.evolution & la_evo_feature_masks[i])
-      {
-	builtin_define (la_evo_macro_name[i]);
+    {
+      builtin_undef (la_evo_macro_name[i]);
 
-	int major = la_evo_version_major[i],
-	    minor = la_evo_version_minor[i];
+      if (la_target.isa.evolution & la_evo_feature_masks[i]
+	  && (la_evo_feature_masks[i] != OPTION_MASK_ISA_FRECIPE
+	      || TARGET_HARD_FLOAT))
+	{
+	  builtin_define (la_evo_macro_name[i]);
 
-	max_v_major = major > max_v_major ? major : max_v_major;
-	max_v_minor = major == max_v_major
-	  ? (minor > max_v_minor ? minor : max_v_minor): max_v_minor;
-      }
+	  int major = la_evo_version_major[i],
+	  minor = la_evo_version_minor[i];
+
+	  max_v_major = major > max_v_major ? major : max_v_major;
+	  max_v_minor = major == max_v_major
+	    ? (minor > max_v_minor ? minor : max_v_minor) : max_v_minor;
+	}
+    }
 
   /* Find the minimum ISA version required to run the target program.  */
+  builtin_undef ("__loongarch_version_major");
+  builtin_undef ("__loongarch_version_minor");
   if (!(max_v_major == 1 && max_v_minor <= 1 && ISA_HAS_LASX))
     {
       builtin_define_with_int_value ("__loongarch_version_major", max_v_major);
       builtin_define_with_int_value ("__loongarch_version_minor", max_v_minor);
     }
+}
 
-  /* Add support for FLOAT128_TYPE on the LoongArch architecture.  */
-  builtin_define ("__FLOAT128_TYPE__");
+void
+loongarch_cpu_cpp_builtins (cpp_reader *pfile)
+{
+  builtin_assert ("machine=loongarch");
+  builtin_assert ("cpu=loongarch");
 
-  /* Map the old _Float128 'q' builtins into the new 'f128' builtins.  */
-  builtin_define ("__builtin_fabsq=__builtin_fabsf128");
-  builtin_define ("__builtin_copysignq=__builtin_copysignf128");
-  builtin_define ("__builtin_nanq=__builtin_nanf128");
-  builtin_define ("__builtin_nansq=__builtin_nansf128");
-  builtin_define ("__builtin_infq=__builtin_inff128");
-  builtin_define ("__builtin_huge_valq=__builtin_huge_valf128");
+  loongarch_define_unconditional_macros (pfile);
+  loongarch_update_cpp_builtins (pfile);
+}
 
-  /* Native Data Sizes.  */
-  builtin_define_with_int_value ("_LOONGARCH_SZINT", INT_TYPE_SIZE);
-  builtin_define_with_int_value ("_LOONGARCH_SZLONG", LONG_TYPE_SIZE);
-  builtin_define_with_int_value ("_LOONGARCH_SZPTR", POINTER_SIZE);
-  builtin_define_with_int_value ("_LOONGARCH_FPSET", 32);
-  builtin_define_with_int_value ("_LOONGARCH_SPFPSET", 32);
+/* Hook to validate the current #pragma GCC target and set the state, and
+   update the macros based on what was changed.  If ARGS is NULL, then
+   POP_TARGET is used to reset the options.  */
 
+static bool
+loongarch_pragma_target_parse (tree args, tree pop_target)
+{
+  /* If args is not NULL then process it and setup the target-specific
+     information that it specifies.  */
+  if (args)
+    {
+      if (!loongarch_process_target_attr (args, NULL))
+	return false;
+
+      loongarch_option_override_internal (&la_target,
+					  &global_options,
+					  &global_options_set);
+    }
+
+  /* args is NULL, restore to the state described in pop_target.  */
+  else
+    {
+      pop_target = pop_target ? pop_target : target_option_default_node;
+      cl_target_option_restore (&global_options, &global_options_set,
+				TREE_TARGET_OPTION (pop_target));
+    }
+
+  target_option_current_node
+    = build_target_option_node (&global_options, &global_options_set);
+
+  loongarch_reset_previous_fndecl ();
+
+  /* For the definitions, ensure all newly defined macros are considered
+     as used for -Wunused-macros.  There is no point warning about the
+     compiler predefined macros.  */
+  cpp_options *cpp_opts = cpp_get_options (parse_in);
+  unsigned char saved_warn_unused_macros = cpp_opts->warn_unused_macros;
+  cpp_opts->warn_unused_macros = 0;
+
+  cpp_force_token_locations (parse_in, BUILTINS_LOCATION);
+  loongarch_update_cpp_builtins (parse_in);
+  cpp_stop_forcing_token_locations (parse_in);
+
+  cpp_opts->warn_unused_macros = saved_warn_unused_macros;
+
+  /* If we're popping or reseting make sure to update the globals so that
+     the optab availability predicates get recomputed.  */
+  if (pop_target)
+    loongarch_save_restore_target_globals (pop_target);
+
+  return true;
+}
+
+/* Implement REGISTER_TARGET_PRAGMAS.  */
+
+void
+loongarch_register_pragmas (void)
+{
+  /* Update pragma hook to allow parsing #pragma GCC target.  */
+  targetm.target_option.pragma_parse = loongarch_pragma_target_parse;
 }
diff --git a/gcc/config/loongarch/loongarch-cpu.cc b/gcc/config/loongarch/loongarch-cpu.cc
index 8564ebb..8adef84 100644
--- a/gcc/config/loongarch/loongarch-cpu.cc
+++ b/gcc/config/loongarch/loongarch-cpu.cc
@@ -1,5 +1,5 @@
 /* Definitions for LoongArch CPU properties.
-   Copyright (C) 2021-2024 Free Software Foundation, Inc.
+   Copyright (C) 2021-2025 Free Software Foundation, Inc.
    Contributed by Loongson Ltd.
 
 This file is part of GCC.
@@ -265,11 +265,11 @@ fill_native_cpu_config (struct loongarch_target *tgt)
       l1u_present |= cpucfg_cache[16] & 3;	  /* bit[1:0]: unified l1 */
       l1d_present |= cpucfg_cache[16] & 4;	  /* bit[2:2]: l1d */
       l1_szword = l1d_present ? 18 : (l1u_present ? 17 : 0);
-      l1_szword = l1_szword ? cpucfg_cache[l1_szword]: 0;
+      l1_szword = l1_szword ? cpucfg_cache[l1_szword] : 0;
 
       l2d_present |= cpucfg_cache[16] & 24;	  /* bit[4:3]: unified l2 */
       l2d_present |= cpucfg_cache[16] & 128;	  /* bit[7:7]: l2d */
-      l2_szword = l2d_present ? cpucfg_cache[19]: 0;
+      l2_szword = l2d_present ? cpucfg_cache[19] : 0;
 
       native_cache.l1d_line_size
 	= 1 << ((l1_szword & 0x7f000000) >> 24);  /* bit[30:24]: log2(line) */
diff --git a/gcc/config/loongarch/loongarch-cpu.h b/gcc/config/loongarch/loongarch-cpu.h
index d4155fa..a9175df 100644
--- a/gcc/config/loongarch/loongarch-cpu.h
+++ b/gcc/config/loongarch/loongarch-cpu.h
@@ -1,5 +1,5 @@
 /* Definitions for loongarch native cpu property detection routines.
-   Copyright (C) 2020-2024 Free Software Foundation, Inc.
+   Copyright (C) 2020-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/loongarch/loongarch-d.cc b/gcc/config/loongarch/loongarch-d.cc
index 9a46530..91da774 100644
--- a/gcc/config/loongarch/loongarch-d.cc
+++ b/gcc/config/loongarch/loongarch-d.cc
@@ -1,5 +1,5 @@
 /* Subroutines for the D front end on the LoongArch architecture.
-   Copyright (C) 2023-2024 Free Software Foundation, Inc.
+   Copyright (C) 2023-2025 Free Software Foundation, Inc.
 
 GCC is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
diff --git a/gcc/config/loongarch/loongarch-d.h b/gcc/config/loongarch/loongarch-d.h
index e1fbe7b..89fa684 100644
--- a/gcc/config/loongarch/loongarch-d.h
+++ b/gcc/config/loongarch/loongarch-d.h
@@ -1,5 +1,5 @@
 /* Definitions for the D front end on the LoongArch architecture.
-   Copyright (C) 2023-2024 Free Software Foundation, Inc.
+   Copyright (C) 2023-2025 Free Software Foundation, Inc.
 
 GCC is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
diff --git a/gcc/config/loongarch/loongarch-def-array.h b/gcc/config/loongarch/loongarch-def-array.h
index 0065d76..2363298 100644
--- a/gcc/config/loongarch/loongarch-def-array.h
+++ b/gcc/config/loongarch/loongarch-def-array.h
@@ -1,5 +1,5 @@
 /* A std::array like data structure for LoongArch static properties.
-   Copyright (C) 2023-2024 Free Software Foundation, Inc.
+   Copyright (C) 2023-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/loongarch/loongarch-def.cc b/gcc/config/loongarch/loongarch-def.cc
index d19628f..04b277e 100644
--- a/gcc/config/loongarch/loongarch-def.cc
+++ b/gcc/config/loongarch/loongarch-def.cc
@@ -1,5 +1,5 @@
 /* LoongArch static properties.
-   Copyright (C) 2021-2024 Free Software Foundation, Inc.
+   Copyright (C) 2021-2025 Free Software Foundation, Inc.
    Contributed by Loongson Ltd.
 
 This file is part of GCC.
@@ -107,12 +107,12 @@ array_tune<loongarch_cache> loongarch_cpu_cache =
 
 static inline loongarch_align la464_align ()
 {
-  return loongarch_align ().function_ ("32").loop_ ("16").jump_ ("16");
+  return loongarch_align ().function_ ("32").loop_ ("8").jump_ ("32").label_ ("8");
 }
 
 static inline loongarch_align la664_align ()
 {
-  return loongarch_align ().function_ ("8").loop_ ("8").jump_ ("32");
+  return loongarch_align ().function_ ("16").loop_ ("16").jump_ ("32").label_ ("8");
 }
 
 array_tune<loongarch_align> loongarch_cpu_align =
@@ -136,6 +136,7 @@ loongarch_rtx_cost_data::loongarch_rtx_cost_data ()
     movcf2gr (COSTS_N_INSNS (7)),
     movgr2cf (COSTS_N_INSNS (15)),
     branch_cost (6),
+    addr_reg_reg_cost (3),
     memory_latency (4) {}
 
 /* The following properties cannot be looked up directly using "cpucfg".
diff --git a/gcc/config/loongarch/loongarch-def.h b/gcc/config/loongarch/loongarch-def.h
index ef7d183..0bcd2a7 100644
--- a/gcc/config/loongarch/loongarch-def.h
+++ b/gcc/config/loongarch/loongarch-def.h
@@ -1,5 +1,5 @@
 /* LoongArch definitions.
-   Copyright (C) 2021-2024 Free Software Foundation, Inc.
+   Copyright (C) 2021-2025 Free Software Foundation, Inc.
    Contributed by Loongson Ltd.
 
 This file is part of GCC.
diff --git a/gcc/config/loongarch/loongarch-driver.cc b/gcc/config/loongarch/loongarch-driver.cc
index 628dcdc..f4bd59d 100644
--- a/gcc/config/loongarch/loongarch-driver.cc
+++ b/gcc/config/loongarch/loongarch-driver.cc
@@ -1,5 +1,5 @@
 /* Subroutines for the gcc driver.
-   Copyright (C) 2021-2024 Free Software Foundation, Inc.
+   Copyright (C) 2021-2025 Free Software Foundation, Inc.
    Contributed by Loongson Ltd.
 
 This file is part of GCC.
diff --git a/gcc/config/loongarch/loongarch-driver.h b/gcc/config/loongarch/loongarch-driver.h
index 9b00601..1525296 100644
--- a/gcc/config/loongarch/loongarch-driver.h
+++ b/gcc/config/loongarch/loongarch-driver.h
@@ -1,5 +1,5 @@
 /* Subroutine headers for the gcc driver.
-   Copyright (C) 2021-2024 Free Software Foundation, Inc.
+   Copyright (C) 2021-2025 Free Software Foundation, Inc.
    Contributed by Loongson Ltd.
 
 This file is part of GCC.
diff --git a/gcc/config/loongarch/loongarch-evolution.cc b/gcc/config/loongarch/loongarch-evolution.cc
index 1fb4e3b..de68624 100644
--- a/gcc/config/loongarch/loongarch-evolution.cc
+++ b/gcc/config/loongarch/loongarch-evolution.cc
@@ -1,7 +1,7 @@
 /* Generated automatically by "genstr" from "isa-evolution.in".
    Please do not edit this file directly.
 
-   Copyright (C) 2023-2024 Free Software Foundation, Inc.
+   Copyright (C) 2023-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/loongarch/loongarch-evolution.h b/gcc/config/loongarch/loongarch-evolution.h
index 7e8e602..5f90839 100644
--- a/gcc/config/loongarch/loongarch-evolution.h
+++ b/gcc/config/loongarch/loongarch-evolution.h
@@ -1,7 +1,7 @@
 /* Generated automatically by "genstr" from "isa-evolution.in".
    Please do not edit this file directly.
 
-   Copyright (C) 2023-2024 Free Software Foundation, Inc.
+   Copyright (C) 2023-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/loongarch/loongarch-ftypes.def b/gcc/config/loongarch/loongarch-ftypes.def
index 0ba68c8..337f2c2 100644
--- a/gcc/config/loongarch/loongarch-ftypes.def
+++ b/gcc/config/loongarch/loongarch-ftypes.def
@@ -1,5 +1,5 @@
 /* Definitions of prototypes for LoongArch built-in functions.
-   Copyright (C) 2021-2024 Free Software Foundation, Inc.
+   Copyright (C) 2021-2025 Free Software Foundation, Inc.
    Contributed by Loongson Ltd.
    Based on MIPS target for GNU compiler.
 
diff --git a/gcc/config/loongarch/loongarch-modes.def b/gcc/config/loongarch/loongarch-modes.def
index 64caa8d..2a97f81 100644
--- a/gcc/config/loongarch/loongarch-modes.def
+++ b/gcc/config/loongarch/loongarch-modes.def
@@ -1,5 +1,5 @@
 /* LoongArch extra machine modes.
-   Copyright (C) 2021-2024 Free Software Foundation, Inc.
+   Copyright (C) 2021-2025 Free Software Foundation, Inc.
    Contributed by Loongson Ltd.
    Based on MIPS target for GNU compiler.
 
@@ -27,14 +27,16 @@ CC_MODE (FCC);
 /* Vector modes.  */
 VECTOR_MODES (INT, 4);	      /* V4QI  V2HI      */
 VECTOR_MODES (INT, 8);	      /* V8QI  V4HI V2SI */
+VECTOR_MODE (INT, DI, 1);     /*		 V1DI */
 VECTOR_MODES (FLOAT, 8);      /*       V4HF V2SF */
 
 /* For LARCH LSX 128 bits.  */
 VECTOR_MODES (INT, 16);	      /* V16QI V8HI V4SI V2DI */
 VECTOR_MODES (FLOAT, 16);     /*	    V4SF V2DF */
+VECTOR_MODE (INT, TI, 1);     /*		 V1TI */
 
 /* For LARCH LASX 256 bits.  */
-VECTOR_MODES (INT, 32);	      /* V32QI V16HI V8SI V4DI */
+VECTOR_MODES (INT, 32);	      /* V32QI V16HI V8SI V4DI V2TI */
 VECTOR_MODES (FLOAT, 32);     /*	     V8SF V4DF */
 
 /* Double-sized vector modes for vec_concat.  */
@@ -49,6 +51,7 @@ VECTOR_MODE (INT, QI, 64);    /* V64QI	*/
 VECTOR_MODE (INT, HI, 32);    /* V32HI	*/
 VECTOR_MODE (INT, SI, 16);    /* V16SI	*/
 VECTOR_MODE (INT, DI, 8);     /* V8DI */
+VECTOR_MODE (INT, TI, 4);     /* V4TI */
 VECTOR_MODE (FLOAT, SF, 16);  /* V16SF	*/
 VECTOR_MODE (FLOAT, DF, 8);   /* V8DF */
 
diff --git a/gcc/config/loongarch/loongarch-opts.cc b/gcc/config/loongarch/loongarch-opts.cc
index 8408a70..6e72084 100644
--- a/gcc/config/loongarch/loongarch-opts.cc
+++ b/gcc/config/loongarch/loongarch-opts.cc
@@ -1,5 +1,5 @@
 /* Subroutines for loongarch-specific option handling.
-   Copyright (C) 2021-2024 Free Software Foundation, Inc.
+   Copyright (C) 2021-2025 Free Software Foundation, Inc.
    Contributed by Loongson Ltd.
 
 This file is part of GCC.
@@ -965,6 +965,9 @@ loongarch_target_option_override (struct loongarch_target *target,
 
       if (opts->x_flag_align_jumps && !opts->x_str_align_jumps)
 	opts->x_str_align_jumps = loongarch_cpu_align[target->cpu_tune].jump;
+
+      if (opts->x_flag_align_labels && !opts->x_str_align_labels)
+	opts->x_str_align_labels = loongarch_cpu_align[target->cpu_tune].label;
     }
 
   /* Set up parameters to be used in prefetching algorithm.  */
@@ -1010,6 +1013,9 @@ loongarch_target_option_override (struct loongarch_target *target,
   if (!opts_set->x_la_branch_cost)
     opts->x_la_branch_cost = loongarch_cost->branch_cost;
 
+  if (!opts_set->x_la_addr_reg_reg_cost)
+    opts->x_la_addr_reg_reg_cost = loongarch_cost->addr_reg_reg_cost;
+
   /* other stuff */
   if (ABI_LP64_P (target->abi.base))
     opts->x_flag_pcc_struct_return = 0;
diff --git a/gcc/config/loongarch/loongarch-opts.h b/gcc/config/loongarch/loongarch-opts.h
index 22efac7..1b397b1 100644
--- a/gcc/config/loongarch/loongarch-opts.h
+++ b/gcc/config/loongarch/loongarch-opts.h
@@ -1,5 +1,5 @@
 /* Definitions for loongarch-specific option handling.
-   Copyright (C) 2021-2024 Free Software Foundation, Inc.
+   Copyright (C) 2021-2025 Free Software Foundation, Inc.
    Contributed by Loongson Ltd.
 
 This file is part of GCC.
diff --git a/gcc/config/loongarch/loongarch-protos.h b/gcc/config/loongarch/loongarch-protos.h
index 85f6e89..6139af4 100644
--- a/gcc/config/loongarch/loongarch-protos.h
+++ b/gcc/config/loongarch/loongarch-protos.h
@@ -1,5 +1,5 @@
 /* Prototypes of target machine for GNU compiler.  LoongArch version.
-   Copyright (C) 2021-2024 Free Software Foundation, Inc.
+   Copyright (C) 2021-2025 Free Software Foundation, Inc.
    Contributed by Loongson Ltd.
    Based on MIPS target for GNU compiler.
 
@@ -85,8 +85,10 @@ extern bool loongarch_split_move_p (rtx, rtx);
 extern void loongarch_split_move (rtx, rtx);
 extern bool loongarch_addu16i_imm12_operand_p (HOST_WIDE_INT, machine_mode);
 extern void loongarch_split_plus_constant (rtx *, machine_mode);
+extern rtx loongarch_reassoc_shift_bitwise (bool is_and, rtx shamt,
+					    rtx mask, machine_mode mode);
 extern void loongarch_split_vector_move (rtx, rtx);
-extern const char *loongarch_output_move (rtx, rtx);
+extern const char *loongarch_output_move (rtx *);
 #ifdef RTX_CODE
 extern void loongarch_expand_scc (rtx *);
 extern void loongarch_expand_vec_cmp (rtx *);
@@ -113,11 +115,13 @@ extern rtx loongarch_return_addr (int, rtx);
 
 extern bool loongarch_const_vector_same_val_p (rtx, machine_mode);
 extern bool loongarch_const_vector_same_bytes_p (rtx, machine_mode);
-extern bool loongarch_const_vector_same_int_p (rtx, machine_mode, HOST_WIDE_INT,
-					  HOST_WIDE_INT);
+extern bool loongarch_const_vector_same_int_p (rtx, machine_mode,
+			   HOST_WIDE_INT low = HOST_WIDE_INT_MIN,
+			   HOST_WIDE_INT high = HOST_WIDE_INT_MAX);
 extern bool loongarch_const_vector_shuffle_set_p (rtx, machine_mode);
 extern bool loongarch_const_vector_bitimm_set_p (rtx, machine_mode);
 extern bool loongarch_const_vector_bitimm_clr_p (rtx, machine_mode);
+extern rtx loongarch_const_vector_vrepli (rtx, machine_mode);
 extern rtx loongarch_lsx_vec_parallel_const_half (machine_mode, bool);
 extern rtx loongarch_gen_const_int_vector (machine_mode, HOST_WIDE_INT);
 extern enum reg_class loongarch_secondary_reload_class (enum reg_class,
@@ -170,7 +174,7 @@ extern void loongarch_expand_atomic_qihi (union loongarch_gen_fn_ptrs,
 
 extern void loongarch_expand_vector_group_init (rtx, rtx);
 extern void loongarch_expand_vector_init (rtx, rtx);
-extern void loongarch_expand_vec_unpack (rtx op[2], bool, bool);
+extern void loongarch_expand_vec_unpack (rtx op[2], bool);
 extern void loongarch_expand_vec_perm (rtx, rtx, rtx, rtx);
 extern void loongarch_expand_vec_perm_1 (rtx[]);
 extern void loongarch_expand_vector_extract (rtx, rtx, int);
@@ -203,7 +207,6 @@ extern void loongarch_atomic_assign_expand_fenv (tree *, tree *, tree *);
 extern tree loongarch_builtin_decl (unsigned int, bool);
 extern rtx loongarch_expand_builtin (tree, rtx, rtx subtarget ATTRIBUTE_UNUSED,
 				     machine_mode, int);
-extern tree loongarch_builtin_vectorized_function (unsigned int, tree, tree);
 extern rtx loongarch_gen_const_int_vector_shuffle (machine_mode, int);
 extern tree loongarch_build_builtin_va_list (void);
 
@@ -212,4 +215,12 @@ extern void loongarch_emit_swrsqrtsf (rtx, rtx, machine_mode, bool);
 extern void loongarch_emit_swdivsf (rtx, rtx, rtx, machine_mode);
 extern bool loongarch_explicit_relocs_p (enum loongarch_symbol_type);
 extern bool loongarch_symbol_extreme_p (enum loongarch_symbol_type);
+extern bool loongarch_option_valid_attribute_p (tree, tree, tree, int);
+extern void loongarch_option_override_internal (struct loongarch_target *, struct gcc_options *, struct gcc_options *);
+extern void loongarch_reset_previous_fndecl (void);
+extern void loongarch_save_restore_target_globals (tree new_tree);
+extern void loongarch_register_pragmas (void);
+extern bool loongarch_process_target_attr (tree args, tree fndecl);
+extern rtx loongarch_gen_stepped_int_parallel (unsigned int nelts, int base,
+					       int step);
 #endif /* ! GCC_LOONGARCH_PROTOS_H */
diff --git a/gcc/config/loongarch/loongarch-str.h b/gcc/config/loongarch/loongarch-str.h
index 47f761b..1546ea3 100644
--- a/gcc/config/loongarch/loongarch-str.h
+++ b/gcc/config/loongarch/loongarch-str.h
@@ -1,7 +1,7 @@
 /* Generated automatically by "genstr" from "loongarch-strings" and
    "isa-evolution.in".  Please do not edit this file directly.
 
-   Copyright (C) 2021-2024 Free Software Foundation, Inc.
+   Copyright (C) 2021-2025 Free Software Foundation, Inc.
    Contributed by Loongson Ltd.
 
 This file is part of GCC.
diff --git a/gcc/config/loongarch/loongarch-target-attr.cc b/gcc/config/loongarch/loongarch-target-attr.cc
new file mode 100644
index 0000000..cb53744
--- /dev/null
+++ b/gcc/config/loongarch/loongarch-target-attr.cc
@@ -0,0 +1,424 @@
+/* Subroutines used for LoongArch code generation.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+   Contributed by Loongson Ltd.
+   Based on AArch64 target for GNU compiler.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define IN_TARGET_CODE 1
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "target.h"
+#include "tree.h"
+#include "tm_p.h"
+#include "diagnostic.h"
+#include "opts.h"
+
+/* Enum describing the various ways we can handle attributes.
+   In many cases we can reuse the generic option handling machinery.  */
+
+enum loongarch_attr_opt_type
+{
+  loongarch_attr_mask,	/* Attribute should set a bit in target_flags.  */
+  loongarch_attr_enum,	/* Attribute sets an enum variable.  */
+  loongarch_attr_bool	/* Attribute sets or unsets a boolean variable.  */
+};
+
+/* All the information needed to handle a target attribute.
+   NAME is the name of the attribute.
+   ATTR_TYPE specifies the type of behavior of the attribute as described
+   in the definition of enum loongarch_attr_opt_type.
+   ALLOW_NEG is true if the attribute supports a "no-" form.
+   OPT_NUM is the enum specifying the option that the attribute modifies.
+   This is needed for attributes that mirror the behavior of a command-line
+   option, that is it has ATTR_TYPE loongarch_attr_mask.  */
+
+struct loongarch_attribute_info
+{
+  const char *name;
+  enum loongarch_attr_opt_type attr_type;
+  bool allow_neg;
+  enum opt_code opt_num;
+};
+/* The target attributes that we support.  */
+
+static const struct loongarch_attribute_info loongarch_attributes[] =
+{
+  { "strict-align", loongarch_attr_mask, true, OPT_mstrict_align },
+  { "cmodel", loongarch_attr_enum, false, OPT_mcmodel_ },
+  { "arch", loongarch_attr_enum, false, OPT_march_ },
+  { "tune", loongarch_attr_enum, false, OPT_mtune_ },
+  { "lsx", loongarch_attr_bool, true, OPT_mlsx },
+  { "lasx", loongarch_attr_bool, true, OPT_mlasx },
+  { NULL, loongarch_attr_bool, false, OPT____ }
+};
+
+bool
+loongarch_handle_option (struct gcc_options *opts,
+			 struct gcc_options *opts_set ATTRIBUTE_UNUSED,
+			 const struct cl_decoded_option *decoded,
+			 location_t loc ATTRIBUTE_UNUSED)
+{
+  size_t code = decoded->opt_index;
+  int val = decoded->value;
+
+  switch (code)
+    {
+    case OPT_mstrict_align:
+      if (val)
+	opts->x_target_flags |= MASK_STRICT_ALIGN;
+      else
+	opts->x_target_flags &= ~MASK_STRICT_ALIGN;
+      return true;
+
+    case OPT_mcmodel_:
+      opts->x_la_opt_cmodel = val;
+      return true;
+
+    case OPT_march_:
+      opts->x_la_opt_cpu_arch = val;
+
+      /* Set these variables to the initial values so that they can be reset
+	 in the loongarch_config_target function according to the ARCH
+	 settings.  */
+      opts->x_la_opt_simd = M_OPT_UNSET;
+      opts->x_la_opt_fpu = M_OPT_UNSET;
+      opts->x_la_isa_evolution = 0;
+      return true;
+
+    case OPT_mtune_:
+      opts->x_la_opt_cpu_tune = val;
+
+      /* Set these variables to the initial values so that they can be reset
+	 in the loongarch_target_option_override function according to the TUNE
+	 settings.  */
+      opts->x_str_align_functions = NULL;
+      opts->x_str_align_loops = NULL;
+      opts->x_str_align_jumps = NULL;
+      return true;
+
+    case OPT_mlsx:
+      opts->x_la_opt_simd = val ? (la_opt_simd == ISA_EXT_SIMD_LASX
+	? ISA_EXT_SIMD_LASX : ISA_EXT_SIMD_LSX) : ISA_EXT_NONE;
+      return true;
+
+    case OPT_mlasx:
+      opts->x_la_opt_simd = val ? ISA_EXT_SIMD_LASX
+	: (la_opt_simd == ISA_EXT_SIMD_LASX || la_opt_simd == ISA_EXT_SIMD_LSX
+	   ? ISA_EXT_SIMD_LSX : ISA_EXT_NONE);
+      return true;
+
+    default:
+      return true;
+    }
+}
+
+/* Parse ARG_STR which contains the definition of one target attribute.
+   Show appropriate errors if any or return true if the attribute is valid.  */
+
+static bool
+loongarch_process_one_target_attr (char *arg_str, location_t loc)
+{
+  bool invert = false;
+
+  size_t len = strlen (arg_str);
+
+  if (len == 0)
+    {
+      error_at (loc, "malformed %<target()%> pragma or attribute");
+      return false;
+    }
+
+  char *str_to_check = (char *) alloca (len + 1);
+  strcpy (str_to_check, arg_str);
+
+  if (len > 3 && startswith (str_to_check, "no-"))
+    {
+      invert = true;
+      str_to_check += 3;
+    }
+  char *arg = strchr (str_to_check, '=');
+
+  /* If we found opt=foo then terminate STR_TO_CHECK at the '='
+     and point ARG to "foo".  */
+  if (arg)
+    {
+      *arg = '\0';
+      arg++;
+    }
+  const struct loongarch_attribute_info *p_attr;
+  bool found = false;
+  for (p_attr = loongarch_attributes; p_attr->name; p_attr++)
+    {
+      /* If the names don't match up, or the user has given an argument
+	 to an attribute that doesn't accept one, or didn't give an argument
+	 to an attribute that expects one, fail to match.  */
+      if (strcmp (str_to_check, p_attr->name) != 0)
+	continue;
+
+      found = true;
+
+      /* If the name matches but the attribute does not allow "no-" versions
+	 then we can't match.  */
+      if (invert && !p_attr->allow_neg)
+	{
+	  error_at (loc, "pragma or attribute %<target(\"%s\")%> does not "
+		    "allow a negated form", str_to_check);
+	  return false;
+	}
+
+      switch (p_attr->attr_type)
+	{
+	  /* Either set or unset a boolean option.  */
+	case loongarch_attr_mask:
+	    {
+	      struct cl_decoded_option decoded;
+
+	      /* We only need to specify the option number.
+		 loongarch_handle_option will know which mask to apply.  */
+	      decoded.opt_index = p_attr->opt_num;
+	      decoded.value = !invert;
+
+	      loongarch_handle_option (&global_options, &global_options_set,
+				       &decoded, input_location);
+	      break;
+	    }
+
+	  /* Use the option setting machinery to set an option to an enum.  */
+	  case loongarch_attr_enum:
+	    {
+	      gcc_assert (arg);
+	      bool valid;
+	      int value;
+	      struct cl_decoded_option decoded;
+	      valid = opt_enum_arg_to_value (p_attr->opt_num, arg,
+					      &value, CL_TARGET);
+
+	      decoded.opt_index = p_attr->opt_num;
+	      decoded.value = value;
+
+	      if (valid)
+		loongarch_handle_option (&global_options,
+					 &global_options_set,
+					 &decoded, input_location);
+	      else
+		error_at (loc, "pragma or attribute %<target(\"%s=%s\")%> is "
+			  "not valid", str_to_check, arg);
+	      break;
+	    }
+
+	  /* Either set or unset a boolean option.  */
+	  case loongarch_attr_bool:
+	    {
+	      struct cl_decoded_option decoded;
+
+	      generate_option (p_attr->opt_num, NULL, !invert,
+			       CL_TARGET, &decoded);
+	      loongarch_handle_option (&global_options, &global_options_set,
+				       &decoded, input_location);
+	      break;
+	    }
+	default:
+	  gcc_unreachable ();
+	}
+    }
+
+  /* If we reached here we either have found an attribute and validated
+     it or didn't match any.  If we matched an attribute but its arguments
+     were malformed we will have returned false already.  */
+  if (!found)
+    error_at (loc, "attribute %<target%> argument %qs is unknown",
+	      str_to_check);
+
+  return found;
+}
+
+/* Count how many times the character C appears in
+   NULL-terminated string STR.  */
+
+static unsigned int
+num_occurences_in_str (char c, char *str)
+{
+  unsigned int res = 0;
+  while (*str != '\0')
+    {
+      if (*str == c)
+	res++;
+
+      str++;
+    }
+
+  return res;
+}
+
+/* Parse the tree in ARGS that contains the target attribute information
+   and update the global target options space.  */
+
+bool
+loongarch_process_target_attr (tree args, tree fndecl)
+{
+  location_t loc
+    = fndecl == NULL ? UNKNOWN_LOCATION : DECL_SOURCE_LOCATION (fndecl);
+
+  if (TREE_CODE (args) == TREE_LIST)
+    {
+      do
+	{
+	  tree head = TREE_VALUE (args);
+	  if (head)
+	    {
+	      if (!loongarch_process_target_attr (head, fndecl))
+		return false;
+	    }
+	  args = TREE_CHAIN (args);
+	} while (args);
+
+      return true;
+    }
+
+  if (TREE_CODE (args) != STRING_CST)
+    {
+      error_at (loc, "attribute %<target%> argument not a string");
+      return false;
+    }
+
+  size_t len = strlen (TREE_STRING_POINTER (args));
+  auto_vec<char, 32> buffer;
+  buffer.safe_grow (len + 1);
+  char *str_to_check = buffer.address ();
+  memcpy (str_to_check, TREE_STRING_POINTER (args), len + 1);
+
+  if (len == 0)
+    {
+      error_at (loc, "malformed %<target()%> pragma or attribute");
+      return false;
+    }
+
+  /* Used to catch empty spaces between commas i.e.
+     attribute ((target ("attr1,,attr2"))).  */
+  unsigned int num_commas = num_occurences_in_str (',', str_to_check);
+
+  /* Handle multiple target attributes separated by ','.  */
+  char *token = strtok_r (str_to_check, ",", &str_to_check);
+
+  unsigned int num_attrs = 0;
+  while (token)
+    {
+      num_attrs++;
+      if (!loongarch_process_one_target_attr (token, loc))
+	return false;
+
+      token = strtok_r (NULL, ",", &str_to_check);
+    }
+
+  if (num_attrs != num_commas + 1)
+    {
+      error_at (loc, "malformed %<target(\"%s\")%> pragma or attribute",
+		TREE_STRING_POINTER (args));
+      return false;
+    }
+
+  return true;
+}
+
+/* Implement TARGET_OPTION_VALID_ATTRIBUTE_P.  This is used to
+   process attribute ((target ("..."))).  */
+
+bool
+loongarch_option_valid_attribute_p (tree fndecl, tree, tree args, int)
+{
+  struct cl_target_option cur_target;
+  bool ret;
+  tree old_optimize;
+  tree new_target, new_optimize;
+  tree existing_target = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
+
+  /* If what we're processing is the current pragma string then the
+     target option node is already stored in target_option_current_node
+     by loongarch_pragma_target_parse in loongarch-target-attr.cc.
+     Use that to avoid having to re-parse the string.  */
+  if (!existing_target && args == current_target_pragma)
+    {
+      DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = target_option_current_node;
+      return true;
+    }
+
+  tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
+
+  old_optimize
+    = build_optimization_node (&global_options, &global_options_set);
+
+  /* If the function changed the optimization levels as well as setting
+     target options, start with the optimizations specified.  */
+  if (func_optimize && func_optimize != old_optimize)
+    cl_optimization_restore (&global_options, &global_options_set,
+			     TREE_OPTIMIZATION (func_optimize));
+
+  /* Save the current target options to restore at the end.  */
+  cl_target_option_save (&cur_target, &global_options, &global_options_set);
+
+  /* If fndecl already has some target attributes applied to it, unpack
+     them so that we add this attribute on top of them, rather than
+     overwriting them.  */
+  if (existing_target)
+    {
+      struct cl_target_option *existing_options
+	= TREE_TARGET_OPTION (existing_target);
+
+      if (existing_options)
+	cl_target_option_restore (&global_options, &global_options_set,
+				  existing_options);
+    }
+  else
+    cl_target_option_restore (&global_options, &global_options_set,
+			      TREE_TARGET_OPTION (target_option_current_node));
+
+  ret = loongarch_process_target_attr (args, fndecl);
+
+  /* Set up any additional state.  */
+  if (ret)
+    {
+      loongarch_option_override_internal (&la_target,
+					  &global_options,
+					  &global_options_set);
+      new_target = build_target_option_node (&global_options,
+					     &global_options_set);
+    }
+  else
+    new_target = NULL;
+
+  new_optimize = build_optimization_node (&global_options,
+					  &global_options_set);
+
+  if (fndecl && ret)
+    {
+      DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
+
+      if (old_optimize != new_optimize)
+	DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
+    }
+
+  cl_target_option_restore (&global_options, &global_options_set, &cur_target);
+
+  if (old_optimize != new_optimize)
+    cl_optimization_restore (&global_options, &global_options_set,
+			     TREE_OPTIMIZATION (old_optimize));
+  return ret;
+}
+
diff --git a/gcc/config/loongarch/loongarch-tune.h b/gcc/config/loongarch/loongarch-tune.h
index cec9deb..0ae74e7 100644
--- a/gcc/config/loongarch/loongarch-tune.h
+++ b/gcc/config/loongarch/loongarch-tune.h
@@ -1,5 +1,5 @@
 /* Definitions for microarchitecture-related data structures.
-   Copyright (C) 2021-2024 Free Software Foundation, Inc.
+   Copyright (C) 2021-2025 Free Software Foundation, Inc.
    Contributed by Loongson Ltd.
 
 This file is part of GCC.
@@ -38,6 +38,7 @@ struct loongarch_rtx_cost_data
   unsigned short movcf2gr;
   unsigned short movgr2cf;
   unsigned short branch_cost;
+  unsigned short addr_reg_reg_cost;
   unsigned short memory_latency;
 
   /* Default RTX cost initializer, implemented in loongarch-def.cc.  */
@@ -115,6 +116,12 @@ struct loongarch_rtx_cost_data
     return *this;
   }
 
+  loongarch_rtx_cost_data addr_reg_reg_cost_ (unsigned short _addr_reg_reg_cost)
+  {
+    addr_reg_reg_cost = _addr_reg_reg_cost;
+    return *this;
+  }
+
   loongarch_rtx_cost_data memory_latency_ (unsigned short _memory_latency)
   {
     memory_latency = _memory_latency;
@@ -170,8 +177,9 @@ struct loongarch_align {
   const char *function;	/* default value for -falign-functions */
   const char *loop;	/* default value for -falign-loops */
   const char *jump;	/* default value for -falign-jumps */
+  const char *label;	/* default value for -falign-labels */
 
-  loongarch_align () : function (nullptr), loop (nullptr), jump (nullptr) {}
+  loongarch_align () : function (nullptr), loop (nullptr), jump (nullptr), label (nullptr) {}
 
   loongarch_align function_ (const char *_function)
   {
@@ -190,6 +198,12 @@ struct loongarch_align {
     jump = _jump;
     return *this;
   }
+
+  loongarch_align label_ (const char *_label)
+  {
+    label = _label;
+    return *this;
+  }
 };
 
 #endif /* LOONGARCH_TUNE_H */
diff --git a/gcc/config/loongarch/loongarch.cc b/gcc/config/loongarch/loongarch.cc
index c7a0210..f62e416 100644
--- a/gcc/config/loongarch/loongarch.cc
+++ b/gcc/config/loongarch/loongarch.cc
@@ -1,5 +1,5 @@
 /* Subroutines used for LoongArch code generation.
-   Copyright (C) 2021-2024 Free Software Foundation, Inc.
+   Copyright (C) 2021-2025 Free Software Foundation, Inc.
    Contributed by Loongson Ltd.
    Based on MIPS and RISC-V target for GNU compiler.
 
@@ -1846,6 +1846,28 @@ loongarch_const_vector_shuffle_set_p (rtx op, machine_mode mode)
   return true;
 }
 
+rtx
+loongarch_const_vector_vrepli (rtx x, machine_mode mode)
+{
+  int size = GET_MODE_SIZE (mode);
+
+  if (GET_CODE (x) != CONST_VECTOR
+      || GET_MODE_CLASS (mode) != MODE_VECTOR_INT)
+    return NULL_RTX;
+
+  for (scalar_int_mode elem_mode: {QImode, HImode, SImode, DImode})
+    {
+      machine_mode new_mode =
+	mode_for_vector (elem_mode, size / GET_MODE_SIZE (elem_mode))
+	  .require ();
+      rtx op = lowpart_subreg (new_mode, x, mode);
+      if (loongarch_const_vector_same_int_p (op, new_mode, -512, 511))
+	return op;
+    }
+
+  return NULL_RTX;
+}
+
 /* Return true if rtx constants of mode MODE should be put into a small
    data section.  */
 
@@ -2341,14 +2363,9 @@ loongarch_index_address_p (rtx addr, machine_mode mode ATTRIBUTE_UNUSED)
   return true;
 }
 
-/* Return the number of instructions needed to load or store a value
-   of mode MODE at address X.  Return 0 if X isn't valid for MODE.
-   Assume that multiword moves may need to be split into word moves
-   if MIGHT_SPLIT_P, otherwise assume that a single load or store is
-   enough.  */
-
-int
-loongarch_address_insns (rtx x, machine_mode mode, bool might_split_p)
+static int
+loongarch_address_insns_1 (rtx x, machine_mode mode, bool might_split_p,
+			   int reg_reg_cost)
 {
   struct loongarch_address_info addr;
   int factor;
@@ -2383,7 +2400,7 @@ loongarch_address_insns (rtx x, machine_mode mode, bool might_split_p)
 	return factor;
 
       case ADDRESS_REG_REG:
-	return factor;
+	return factor * reg_reg_cost;
 
       case ADDRESS_CONST_INT:
 	return lsx_p ? 0 : factor;
@@ -2398,6 +2415,18 @@ loongarch_address_insns (rtx x, machine_mode mode, bool might_split_p)
   return 0;
 }
 
+/* Return the number of instructions needed to load or store a value
+   of mode MODE at address X.  Return 0 if X isn't valid for MODE.
+   Assume that multiword moves may need to be split into word moves
+   if MIGHT_SPLIT_P, otherwise assume that a single load or store is
+   enough.  */
+
+int
+loongarch_address_insns (rtx x, machine_mode mode, bool might_split_p)
+{
+  return loongarch_address_insns_1 (x, mode, might_split_p, 1);
+}
+
 /* Return true if X fits within an unsigned field of BITS bits that is
    shifted left SHIFT bits before being used.  */
 
@@ -2501,7 +2530,7 @@ loongarch_const_insns (rtx x)
     case CONST_VECTOR:
       if ((LSX_SUPPORTED_MODE_P (GET_MODE (x))
 	   || LASX_SUPPORTED_MODE_P (GET_MODE (x)))
-	  && loongarch_const_vector_same_int_p (x, GET_MODE (x), -512, 511))
+	  && loongarch_const_vector_vrepli (x, GET_MODE (x)))
 	return 1;
       /* Fall through.  */
     case CONST_DOUBLE:
@@ -2919,9 +2948,7 @@ loongarch_call_tls_get_addr (rtx sym, enum loongarch_symbol_type type, rtx v0)
 
   RTL_CONST_CALL_P (insn) = 1;
   use_reg (&CALL_INSN_FUNCTION_USAGE (insn), a0);
-  insn = get_insns ();
-
-  end_sequence ();
+  insn = end_sequence ();
 
   return insn;
 }
@@ -3724,6 +3751,17 @@ loongarch_set_reg_reg_cost (machine_mode mode)
     }
 }
 
+/* Implement TARGET_ADDRESS_COST.  */
+
+static int
+loongarch_address_cost (rtx addr, machine_mode mode,
+			addr_space_t as ATTRIBUTE_UNUSED,
+			bool speed ATTRIBUTE_UNUSED)
+{
+  return loongarch_address_insns_1 (addr, mode, false,
+				    la_addr_reg_reg_cost);
+}
+
 /* Implement TARGET_RTX_COSTS.  */
 
 static bool
@@ -3792,7 +3830,7 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code,
 	  *total = COSTS_N_INSNS (2);
 	  return true;
 	}
-      cost = loongarch_address_insns (addr, mode, true);
+      cost = loongarch_address_cost (addr, mode, true, speed);
       if (cost > 0)
 	{
 	  *total = COSTS_N_INSNS (cost + 1);
@@ -3929,14 +3967,31 @@ loongarch_rtx_costs (rtx x, machine_mode mode, int outer_code,
 
       /* If it's an add + mult (which is equivalent to shift left) and
 	 it's immediate operand satisfies const_immalsl_operand predicate.  */
-      if ((mode == SImode || (TARGET_64BIT && mode == DImode))
-	  && GET_CODE (XEXP (x, 0)) == MULT)
+      if (code == PLUS
+	  && (mode == SImode || (TARGET_64BIT && mode == DImode)))
 	{
-	  rtx op2 = XEXP (XEXP (x, 0), 1);
-	  if (const_immalsl_operand (op2, mode))
+	  HOST_WIDE_INT shamt = -1;
+	  rtx lhs = XEXP (x, 0);
+	  rtx_code code_lhs = GET_CODE (lhs);
+
+	  switch (code_lhs)
+	    {
+	    case ASHIFT:
+	      if (CONST_INT_P (XEXP (lhs, 1)))
+		shamt = INTVAL (XEXP (lhs, 1));
+	      break;
+	    case MULT:
+	      if (CONST_INT_P (XEXP (lhs, 1)))
+		shamt = exact_log2 (INTVAL (XEXP (lhs, 1)));
+	      break;
+	    default:
+	      break;
+	    }
+
+	  if (IN_RANGE (shamt, 1, 4))
 	    {
 	      *total = (COSTS_N_INSNS (1)
-			+ set_src_cost (XEXP (XEXP (x, 0), 0), mode, speed)
+			+ set_src_cost (XEXP (lhs, 0), mode, speed)
 			+ set_src_cost (XEXP (x, 1), mode, speed));
 	      return true;
 	    }
@@ -4127,10 +4182,10 @@ loongarch_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
 
       case vec_construct:
 	elements = TYPE_VECTOR_SUBPARTS (vectype);
-	if (ISA_HAS_LASX)
-	  return elements + 1;
+	if (LASX_SUPPORTED_MODE_P (mode) && !LSX_SUPPORTED_MODE_P (mode))
+	  return elements / 2 + 3;
 	else
-	  return elements;
+	  return elements / 2 + 1;
 
       default:
 	gcc_unreachable ();
@@ -4362,16 +4417,6 @@ loongarch_vector_costs::finish_cost (const vector_costs *scalar_costs)
   vector_costs::finish_cost (scalar_costs);
 }
 
-/* Implement TARGET_ADDRESS_COST.  */
-
-static int
-loongarch_address_cost (rtx addr, machine_mode mode,
-			addr_space_t as ATTRIBUTE_UNUSED,
-			bool speed ATTRIBUTE_UNUSED)
-{
-  return loongarch_address_insns (addr, mode, false);
-}
-
 /* Implement TARGET_INSN_COST.  */
 
 static int
@@ -4513,6 +4558,49 @@ loongarch_split_plus_constant (rtx *op, machine_mode mode)
   op[2] = gen_int_mode (v, mode);
 }
 
+/* Test if reassociate (a << shamt) [&|^] mask to
+   (a [&|^] (mask >> shamt)) << shamt is possible and beneficial.
+   If true, return (mask >> shamt).  Return NULL_RTX otherwise.  */
+
+rtx
+loongarch_reassoc_shift_bitwise (bool is_and, rtx shamt, rtx mask,
+				 machine_mode mode)
+{
+  gcc_checking_assert (CONST_INT_P (shamt));
+  gcc_checking_assert (CONST_INT_P (mask));
+  gcc_checking_assert (mode == SImode || mode == DImode);
+
+  if (ctz_hwi (INTVAL (mask)) < INTVAL (shamt))
+    return NULL_RTX;
+
+  /* When trying alsl.w, deliberately ignore the high bits.  */
+  mask = gen_int_mode (UINTVAL (mask), mode);
+
+  rtx new_mask = simplify_const_binary_operation (LSHIFTRT, mode, mask,
+						  shamt);
+
+  /* Do an arithmetic shift for checking ins_zero_bitmask_operand or -1:
+     ashiftrt (0xffffffff00000000, 2) is 0xffffffff60000000 which is an
+     ins_zero_bitmask_operand, but lshiftrt will produce
+     0x3fffffff60000000.  */
+  rtx new_mask_1 = simplify_const_binary_operation (ASHIFTRT, mode, mask,
+						    shamt);
+
+  if (is_and && const_m1_operand (new_mask_1, mode))
+    return new_mask_1;
+
+  if (const_uns_arith_operand (new_mask, mode))
+    return new_mask;
+
+  if (!is_and)
+    return NULL_RTX;
+
+  if (low_bitmask_operand (new_mask, mode))
+    return new_mask;
+
+  return ins_zero_bitmask_operand (new_mask_1, mode) ? new_mask_1 : NULL_RTX;
+}
+
 /* Implement TARGET_CONSTANT_ALIGNMENT.  */
 
 static HOST_WIDE_INT
@@ -4604,7 +4692,7 @@ loongarch_split_vector_move_p (rtx dest, rtx src)
   /* Check for vector set to an immediate const vector with valid replicated
      element.  */
   if (FP_REG_RTX_P (dest)
-      && loongarch_const_vector_same_int_p (src, GET_MODE (src), -512, 511))
+      && loongarch_const_vector_vrepli (src, GET_MODE (src)))
     return false;
 
   /* Check for vector load zero immediate.  */
@@ -4721,8 +4809,10 @@ loongarch_split_vector_move (rtx dest, rtx src)
    that SRC is operand 1 and DEST is operand 0.  */
 
 const char *
-loongarch_output_move (rtx dest, rtx src)
+loongarch_output_move (rtx *operands)
 {
+  rtx src = operands[1];
+  rtx dest = operands[0];
   enum rtx_code dest_code = GET_CODE (dest);
   enum rtx_code src_code = GET_CODE (src);
   machine_mode mode = GET_MODE (dest);
@@ -4738,13 +4828,15 @@ loongarch_output_move (rtx dest, rtx src)
       && src_code == CONST_VECTOR
       && CONST_INT_P (CONST_VECTOR_ELT (src, 0)))
     {
-      gcc_assert (loongarch_const_vector_same_int_p (src, mode, -512, 511));
+      operands[1] = loongarch_const_vector_vrepli (src, mode);
+      gcc_assert (operands[1]);
+
       switch (GET_MODE_SIZE (mode))
 	{
 	case 16:
-	  return "vrepli.%v0\t%w0,%E1";
+	  return "vrepli.%v1\t%w0,%E1";
 	case 32:
-	  return "xvrepli.%v0\t%u0,%E1";
+	  return "xvrepli.%v1\t%u0,%E1";
 	default: gcc_unreachable ();
 	}
     }
@@ -4772,6 +4864,8 @@ loongarch_output_move (rtx dest, rtx src)
 		      gcc_unreachable ();
 		    }
 		}
+	      if (ISA_HAS_LSX && src == CONST0_RTX (GET_MODE (src)))
+		return "vxor.v\t%w0,%w0,%w0";
 
 	      return dbl_p ? "movgr2fr.d\t%0,%z1" : "movgr2fr.w\t%0,%z1";
 	    }
@@ -4875,13 +4969,19 @@ loongarch_output_move (rtx dest, rtx src)
       if (src_code == CONST_INT)
 	{
 	  if (LU12I_INT (src))
-	    return "lu12i.w\t%0,%1>>12\t\t\t# %X1";
+	    {
+	      operands[1] = GEN_INT (INTVAL (operands[1]) >> 12);
+	      return "lu12i.w\t%0,%1\t\t\t# %X1";
+	    }
 	  else if (IMM12_INT (src))
 	    return "addi.w\t%0,$r0,%1\t\t\t# %X1";
 	  else if (IMM12_INT_UNSIGNED (src))
 	    return "ori\t%0,$r0,%1\t\t\t# %X1";
 	  else if (LU52I_INT (src))
-	    return "lu52i.d\t%0,$r0,%X1>>52\t\t\t# %1";
+	    {
+	      operands[1] = GEN_INT (INTVAL (operands[1]) >> 52);
+	      return "lu52i.d\t%0,$r0,%X1\t\t\t# %1";
+	    }
 	  else
 	    gcc_unreachable ();
 	}
@@ -5294,6 +5394,81 @@ loongarch_expand_conditional_move (rtx *operands)
     loongarch_emit_float_compare (&code, &op0, &op1);
   else
     {
+      /* Optimize to reduce the number of instructions for ternary operations.
+	 Mainly implemented based on noce_try_cmove_arith.
+	 For dest = (condition) ? value_if_true : value_if_false;
+	 the optimization requires:
+	  a. value_if_false = var;
+	  b. value_if_true = var OP C (a positive integer power of 2).
+
+	 Situations similar to the following:
+	    if (condition)
+	      dest += 1 << imm;
+	 to:
+	    dest += (condition ? 1 : 0) << imm;  */
+
+      rtx_insn *insn;
+      HOST_WIDE_INT val = 0; /* The value of rtx C.  */
+      /* INSN with operands[2] as the output.  */
+      rtx_insn *value_if_true_insn = NULL;
+      /* INSN with operands[3] as the output.  */
+      rtx_insn *value_if_false_insn = NULL;
+      rtx value_if_true_insn_src = NULL_RTX;
+      /* Common operand var in value_if_true and value_if_false.  */
+      rtx comm_var = NULL_RTX;
+      bool can_be_optimized = false;
+
+      /* Search value_if_true_insn and value_if_false_insn.  */
+      struct sequence_stack *seq = get_current_sequence ()->next;
+      for (insn = seq->last; insn; insn = PREV_INSN (insn))
+	{
+	  if (single_set (insn))
+	    {
+	      rtx set_dest = SET_DEST (single_set (insn));
+	      if (rtx_equal_p (set_dest, operands[2]))
+		value_if_true_insn = insn;
+	      else if (rtx_equal_p (set_dest, operands[3]))
+		value_if_false_insn = insn;
+	      if (value_if_true_insn && value_if_false_insn)
+		break;
+	    }
+	}
+
+      /* Check if the optimization conditions are met.  */
+      if (value_if_true_insn
+	  && value_if_false_insn
+	  /* Make sure that value_if_false and var are the same.  */
+	  && BINARY_P (value_if_true_insn_src
+		       = SET_SRC (single_set (value_if_true_insn)))
+	  /* Make sure that both value_if_true and value_if_false
+	     has the same var.  */
+	  && rtx_equal_p (XEXP (value_if_true_insn_src, 0),
+			  SET_SRC (single_set (value_if_false_insn))))
+	{
+	  comm_var = SET_SRC (single_set (value_if_false_insn));
+	  rtx src = XEXP (value_if_true_insn_src, 1);
+	  rtx imm = NULL_RTX;
+	  if (CONST_INT_P (src))
+	    imm = src;
+	  else
+	    for (insn = seq->last; insn; insn = PREV_INSN (insn))
+	      {
+		rtx set = single_set (insn);
+		if (set && rtx_equal_p (SET_DEST (set), src))
+		  {
+		    imm = SET_SRC (set);
+		    break;
+		  }
+	      }
+	  if (imm && CONST_INT_P (imm))
+	    {
+	      val = INTVAL (imm);
+	      /* Make sure that imm is a positive integer power of 2.  */
+	      if (val > 0 && !(val & (val - 1)))
+		can_be_optimized = true;
+	    }
+	}
+
       if (GET_MODE_SIZE (GET_MODE (op0)) < UNITS_PER_WORD)
 	{
 	  promote_op[0] = (REG_P (op0) && REG_P (operands[2]) &&
@@ -5314,22 +5489,48 @@ loongarch_expand_conditional_move (rtx *operands)
       op0_extend = op0;
       op1_extend = force_reg (word_mode, op1);
 
+      rtx target = gen_reg_rtx (GET_MODE (op0));
+
       if (code == EQ || code == NE)
 	{
 	  op0 = loongarch_zero_if_equal (op0, op1);
 	  op1 = const0_rtx;
+	  /* For EQ, set target to 1 if op0 and op1 are the same,
+	     otherwise set to 0.
+	     For NE, set target to 0 if op0 and op1 are the same,
+	     otherwise set to 1.  */
+	  if (can_be_optimized)
+	    loongarch_emit_binary (code, target, op0, const0_rtx);
 	}
       else
 	{
 	  /* The comparison needs a separate scc instruction.  Store the
 	     result of the scc in *OP0 and compare it against zero.  */
 	  bool invert = false;
-	  rtx target = gen_reg_rtx (GET_MODE (op0));
 	  loongarch_emit_int_order_test (code, &invert, target, op0, op1);
+	  if (can_be_optimized && invert)
+	    loongarch_emit_binary (EQ, target, target, const0_rtx);
 	  code = invert ? EQ : NE;
 	  op0 = target;
 	  op1 = const0_rtx;
 	}
+
+      if (can_be_optimized)
+	{
+	  /* Perform (condition ? 1 : 0) << log2 (C).  */
+	  loongarch_emit_binary (ASHIFT, target, target,
+				 GEN_INT (exact_log2 (val)));
+	  /* Shift-related insn patterns only support SImode operands[2].  */
+	  enum rtx_code opcode = GET_CODE (value_if_true_insn_src);
+	  if (opcode == ASHIFT || opcode == ASHIFTRT || opcode == LSHIFTRT
+	      || opcode == ROTATE || opcode == ROTATERT)
+	    target = gen_lowpart (SImode, target);
+	  /* Perform target = target OP ((condition ? 1 : 0) << log2 (C)).  */
+	  loongarch_emit_binary (opcode, operands[0],
+				 force_reg (GET_MODE (operands[3]), comm_var),
+				 target);
+	  return;
+	}
     }
 
   rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
@@ -6014,6 +6215,8 @@ loongarch_print_operand_reloc (FILE *file, rtx op, bool hi64_part,
    'i'	Print i if the operand is not a register.
    'L'  Print the low-part relocation associated with OP.
    'm'	Print one less than CONST_INT OP in decimal.
+   'M'	Print the indices of the lowest enabled bit and the highest
+	enabled bit in a mask (for bstr* instructions).
    'N'	Print the inverse of the integer branch condition for comparison OP.
    'Q'  Print R_LARCH_RELAX for TLS IE.
    'r'  Print address 12-31bit relocation associated with OP.
@@ -6087,6 +6290,10 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
 	output_operand_lossage ("invalid use of '%%%c'", letter);
       break;
 
+    case 'O':
+      fprintf (file, "%s", INTVAL (XVECEXP (op, 0, 0)) ? "od" : "ev");
+      break;
+
     case 'F':
       loongarch_print_float_branch_condition (file, code, letter);
       break;
@@ -6140,6 +6347,16 @@ loongarch_print_operand (FILE *file, rtx op, int letter)
 	output_operand_lossage ("invalid use of '%%%c'", letter);
       break;
 
+    case 'M':
+      if (CONST_INT_P (op))
+	{
+	  HOST_WIDE_INT mask = INTVAL (op);
+	  fprintf (file, "%d,%d", floor_log2 (mask), ctz_hwi (mask));
+	}
+      else
+	output_operand_lossage ("invalid use of '%%%c'", letter);
+      break;
+
     case 'N':
       loongarch_print_int_branch_condition (file, reverse_condition (code),
 					    letter);
@@ -6989,6 +7206,40 @@ loongarch_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x,
   return NO_REGS;
 }
 
+/* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS.
+
+   The register allocator chooses ALL_REGS if FP_REGS and GR_REGS have the
+   same cost - even if ALL_REGS has a much higher cost.  ALL_REGS is also used
+   if the cost of both FP_REGS and GR_REGS is lower than the memory cost (in
+   this case the best class is the lowest cost one).  Using ALL_REGS
+   irrespectively of itself cost results in bad allocations with many redundant
+   int<->FP moves which are expensive on various cores.
+
+   To avoid this we don't allow ALL_REGS as the allocno class, but force a
+   decision between FP_REGS and GR_REGS.  We use the allocno class if it isn't
+   ALL_REGS.  Similarly, use the best class if it isn't ALL_REGS.  Otherwise Set
+   the allocno class depending on the mode.
+
+   This change has a similar effect to increasing the cost of FPR->GPR register
+   moves for integer modes so that they are higher than the cost of memory but
+   changing the allocno class is more reliable.  */
+
+static reg_class_t
+loongarch_ira_change_pseudo_allocno_class (int regno, reg_class_t allocno_class,
+					   reg_class_t best_class)
+{
+  enum machine_mode mode;
+
+  if (allocno_class != ALL_REGS)
+    return allocno_class;
+
+  if (best_class != ALL_REGS)
+    return best_class;
+
+  mode = PSEUDO_REGNO_MODE (regno);
+  return FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode) ? FP_REGS : GR_REGS;
+}
+
 /* Implement TARGET_VALID_POINTER_MODE.  */
 
 static bool
@@ -7509,7 +7760,7 @@ loongarch_reg_init (void)
 	= loongarch_hard_regno_mode_ok_uncached (regno, (machine_mode) mode);
 }
 
-static void
+void
 loongarch_option_override_internal (struct loongarch_target *target,
 				    struct gcc_options *opts,
 				    struct gcc_options *opts_set)
@@ -7535,9 +7786,6 @@ loongarch_option_override_internal (struct loongarch_target *target,
   /* Override some options according to the resolved target.  */
   loongarch_target_option_override (target, opts, opts_set);
 
-  target_option_default_node = target_option_current_node
-    = build_target_option_node (opts, opts_set);
-
   loongarch_reg_init ();
 }
 
@@ -7545,11 +7793,17 @@ loongarch_option_override_internal (struct loongarch_target *target,
 
 static GTY(()) tree loongarch_previous_fndecl;
 
+void
+loongarch_reset_previous_fndecl (void)
+{
+  loongarch_previous_fndecl = NULL;
+}
+
 /* Restore or save the TREE_TARGET_GLOBALS from or to new_tree.
    Used by loongarch_set_current_function to
    make sure optab availability predicates are recomputed when necessary.  */
 
-static void
+void
 loongarch_save_restore_target_globals (tree new_tree)
 {
   if (TREE_TARGET_GLOBALS (new_tree))
@@ -7576,10 +7830,15 @@ loongarch_set_current_function (tree fndecl)
   else
     old_tree = target_option_default_node;
 
+  /* When the function is optimized, the pop_cfun will be called, and
+     the fndecl will be NULL.  */
   if (fndecl == NULL_TREE)
     {
       if (old_tree != target_option_current_node)
 	{
+	  /* When this function is set with special options, we need to
+	     restore the original global optimization options at the end
+	     of function optimization.  */
 	  loongarch_previous_fndecl = NULL_TREE;
 	  cl_target_option_restore (&global_options, &global_options_set,
 				    TREE_TARGET_OPTION
@@ -7589,17 +7848,24 @@ loongarch_set_current_function (tree fndecl)
     }
 
   tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
+
+  /* When no separate compilation parameters are set for the function,
+    new_tree is NULL.  */
   if (new_tree == NULL_TREE)
     new_tree = target_option_default_node;
 
   loongarch_previous_fndecl = fndecl;
 
-  if (new_tree == old_tree)
-    return;
+  if (new_tree != old_tree)
+    /* According to the settings of the functions attribute and pragma,
+       the options is corrected.  */
+    cl_target_option_restore (&global_options, &global_options_set,
+			      TREE_TARGET_OPTION (new_tree));
 
-  cl_target_option_restore (&global_options, &global_options_set,
-			    TREE_TARGET_OPTION (new_tree));
 
+  /* After correcting the value of options, we need to update the
+     rules for using the hardware registers to ensure that the
+     rules correspond to the options.  */
   loongarch_reg_init ();
 
   loongarch_save_restore_target_globals (new_tree);
@@ -7620,6 +7886,11 @@ loongarch_option_override (void)
 				      &global_options,
 				      &global_options_set);
 
+  /* Save the initial options so that we can restore the initial option
+     settings later when processing attributes and pragmas.  */
+  target_option_default_node = target_option_current_node
+    = build_target_option_node (&global_options, &global_options_set);
+
 }
 
 /* Implement TARGET_OPTION_SAVE.  */
@@ -8102,14 +8373,6 @@ loongarch_set_handled_components (sbitmap components)
 	cfun->machine->reg_is_wrapped_separately[regno] = true;
 }
 
-/* Initialize the GCC target structure.  */
-#undef TARGET_ASM_ALIGNED_HI_OP
-#define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
-#undef TARGET_ASM_ALIGNED_SI_OP
-#define TARGET_ASM_ALIGNED_SI_OP "\t.word\t"
-#undef TARGET_ASM_ALIGNED_DI_OP
-#define TARGET_ASM_ALIGNED_DI_OP "\t.dword\t"
-
 /* Use the vshuf instruction to implement all 128-bit constant vector
    permuatation.  */
 
@@ -9733,7 +9996,7 @@ loongarch_expand_vector_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
 /* Expand an integral vector unpack operation.  */
 
 void
-loongarch_expand_vec_unpack (rtx operands[2], bool unsigned_p, bool high_p)
+loongarch_expand_vec_unpack (rtx operands[2], bool unsigned_p)
 {
   machine_mode imode = GET_MODE (operands[1]);
   rtx (*unpack) (rtx, rtx, rtx);
@@ -9742,31 +10005,32 @@ loongarch_expand_vec_unpack (rtx operands[2], bool unsigned_p, bool high_p)
   rtx (*swap_hi_lo) (rtx, rtx, rtx, rtx);
   rtx tmp, dest;
 
+  /* In LASX, only vec_unpacks_hi_<mode> requires expander.  */
   if (ISA_HAS_LASX && GET_MODE_SIZE (imode) == 32)
     {
       switch (imode)
 	{
 	case E_V8SImode:
 	  if (unsigned_p)
-	    extend = gen_lasx_vext2xv_du_wu;
+	    extend = gen_vec_unpacku_lo_v8si;
 	  else
-	    extend = gen_lasx_vext2xv_d_w;
+	    extend = gen_vec_unpacks_lo_v8si;
 	  swap_hi_lo = gen_lasx_xvpermi_q_v8si;
 	  break;
 
 	case E_V16HImode:
 	  if (unsigned_p)
-	    extend = gen_lasx_vext2xv_wu_hu;
+	    extend = gen_vec_unpacku_lo_v16hi;
 	  else
-	    extend = gen_lasx_vext2xv_w_h;
+	    extend = gen_vec_unpacks_lo_v16hi;
 	  swap_hi_lo = gen_lasx_xvpermi_q_v16hi;
 	  break;
 
 	case E_V32QImode:
 	  if (unsigned_p)
-	    extend = gen_lasx_vext2xv_hu_bu;
+	    extend = gen_vec_unpacku_lo_v32qi;
 	  else
-	    extend = gen_lasx_vext2xv_h_b;
+	    extend = gen_vec_unpacks_lo_v32qi;
 	  swap_hi_lo = gen_lasx_xvpermi_q_v32qi;
 	  break;
 
@@ -9775,46 +10039,28 @@ loongarch_expand_vec_unpack (rtx operands[2], bool unsigned_p, bool high_p)
 	  break;
 	}
 
-      if (high_p)
-	{
-	  tmp = gen_reg_rtx (imode);
-	  emit_insn (swap_hi_lo (tmp, tmp, operands[1], const1_rtx));
-	  emit_insn (extend (operands[0], tmp));
-	  return;
-	}
-
-      emit_insn (extend (operands[0], operands[1]));
+      tmp = gen_reg_rtx (imode);
+      emit_insn (swap_hi_lo (tmp, tmp, operands[1], const1_rtx));
+      emit_insn (extend (operands[0], tmp));
       return;
-
     }
-  else if (ISA_HAS_LSX)
+  /* In LSX, only vec_unpacks_lo_<mode> requires expander.  */
+  else if (ISA_HAS_LSX && !ISA_HAS_LASX)
     {
       switch (imode)
 	{
 	case E_V4SImode:
-	  if (high_p != 0)
-	    unpack = gen_lsx_vilvh_w;
-	  else
-	    unpack = gen_lsx_vilvl_w;
-
+	  unpack = gen_lsx_vilvl_w;
 	  cmpFunc = gen_lsx_vslt_w;
 	  break;
 
 	case E_V8HImode:
-	  if (high_p != 0)
-	    unpack = gen_lsx_vilvh_h;
-	  else
-	    unpack = gen_lsx_vilvl_h;
-
+	  unpack = gen_lsx_vilvl_h;
 	  cmpFunc = gen_lsx_vslt_h;
 	  break;
 
 	case E_V16QImode:
-	  if (high_p != 0)
-	    unpack = gen_lsx_vilvh_b;
-	  else
-	    unpack = gen_lsx_vilvl_b;
-
+	  unpack = gen_lsx_vilvl_b;
 	  cmpFunc = gen_lsx_vslt_b;
 	  break;
 
@@ -10373,19 +10619,29 @@ loongarch_expand_lsx_cmp (rtx dest, enum rtx_code cond, rtx op0, rtx op1)
       switch (cond)
 	{
 	case NE:
+	  if (!loongarch_const_vector_same_int_p (op1, cmp_mode, -16, 15))
+	    op1 = force_reg (cmp_mode, op1);
 	  cond = reverse_condition (cond);
 	  negate = true;
 	  break;
 	case EQ:
 	case LT:
 	case LE:
+	  if (!loongarch_const_vector_same_int_p (op1, cmp_mode, -16, 15))
+	    op1 = force_reg (cmp_mode, op1);
+	  break;
 	case LTU:
 	case LEU:
+	  if (!loongarch_const_vector_same_int_p (op1, cmp_mode, 0, 31))
+	    op1 = force_reg (cmp_mode, op1);
 	  break;
 	case GE:
 	case GT:
 	case GEU:
 	case GTU:
+	  /* Only supports reg-reg comparison.  */
+	  if (!register_operand (op1, cmp_mode))
+	    op1 = force_reg (cmp_mode, op1);
 	  std::swap (op0, op1);
 	  cond = swap_condition (cond);
 	  break;
@@ -10401,6 +10657,8 @@ loongarch_expand_lsx_cmp (rtx dest, enum rtx_code cond, rtx op0, rtx op1)
     case E_V2DFmode:
     case E_V8SFmode:
     case E_V4DFmode:
+      if (!register_operand (op1, cmp_mode))
+	op1 = force_reg (cmp_mode, op1);
       loongarch_emit_binary (cond, dest, op0, op1);
       break;
 
@@ -10871,6 +11129,18 @@ loongarch_builtin_support_vector_misalignment (machine_mode mode,
 						      is_packed);
 }
 
+/* Return a PARALLEL containing NELTS elements, with element I equal
+   to BASE + I * STEP.  */
+rtx
+loongarch_gen_stepped_int_parallel (unsigned int nelts, int base,
+				    int step)
+{
+  rtvec vec = rtvec_alloc (nelts);
+  for (unsigned int i = 0; i < nelts; i++)
+    RTVEC_ELT (vec, i) = GEN_INT (base + i * step);
+  return gen_rtx_PARALLEL (VOIDmode, vec);
+}
+
 /* Implement TARGET_C_MODE_FOR_FLOATING_TYPE.  Return TFmode or DFmode
    for TI_LONG_DOUBLE_TYPE which is for long double type, go with the
    default one for the others.  */
@@ -10934,6 +11204,16 @@ loongarch_asm_code_end (void)
 #undef DUMP_FEATURE
 }
 
+/* Target hook for c_mode_for_suffix.  */
+static machine_mode
+loongarch_c_mode_for_suffix (char suffix)
+{
+  if (suffix == 'q')
+    return TFmode;
+
+  return VOIDmode;
+}
+
 /* Initialize the GCC target structure.  */
 #undef TARGET_ASM_ALIGNED_HI_OP
 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
@@ -11156,6 +11436,10 @@ loongarch_asm_code_end (void)
 #undef TARGET_SECONDARY_RELOAD
 #define TARGET_SECONDARY_RELOAD loongarch_secondary_reload
 
+#undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
+#define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS \
+  loongarch_ira_change_pseudo_allocno_class
+
 #undef  TARGET_HAVE_SPECULATION_SAFE_VALUE
 #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
 
@@ -11198,6 +11482,12 @@ loongarch_asm_code_end (void)
 #undef TARGET_C_MODE_FOR_FLOATING_TYPE
 #define TARGET_C_MODE_FOR_FLOATING_TYPE loongarch_c_mode_for_floating_type
 
+#undef TARGET_OPTION_VALID_ATTRIBUTE_P
+#define TARGET_OPTION_VALID_ATTRIBUTE_P loongarch_option_valid_attribute_p
+
+#undef TARGET_C_MODE_FOR_SUFFIX
+#define TARGET_C_MODE_FOR_SUFFIX loongarch_c_mode_for_suffix
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-loongarch.h"
diff --git a/gcc/config/loongarch/loongarch.h b/gcc/config/loongarch/loongarch.h
index 5efeae5..d897763 100644
--- a/gcc/config/loongarch/loongarch.h
+++ b/gcc/config/loongarch/loongarch.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler.  LoongArch version.
-   Copyright (C) 2021-2024 Free Software Foundation, Inc.
+   Copyright (C) 2021-2025 Free Software Foundation, Inc.
    Contributed by Loongson Ltd.
    Based on MIPS and RISC-V target for GNU compiler.
 
@@ -26,6 +26,8 @@ along with GCC; see the file COPYING3.  If not see
 
 #define SWITCHABLE_TARGET 1
 
+#define REGISTER_TARGET_PRAGMAS() loongarch_register_pragmas ()
+
 #define TARGET_SUPPORTS_WIDE_INT 1
 
 /* Macros to silence warnings about numbers being signed in traditional
@@ -939,6 +941,38 @@ typedef struct {
   { "s6",	29 + GP_REG_FIRST },					\
   { "s7",	30 + GP_REG_FIRST },					\
   { "s8",	31 + GP_REG_FIRST },					\
+  { "fa0",	 0 + FP_REG_FIRST },					\
+  { "fa1",	 1 + FP_REG_FIRST },					\
+  { "fa2",	 2 + FP_REG_FIRST },					\
+  { "fa3",	 3 + FP_REG_FIRST },					\
+  { "fa4",	 4 + FP_REG_FIRST },					\
+  { "fa5",	 5 + FP_REG_FIRST },					\
+  { "fa6",	 6 + FP_REG_FIRST },					\
+  { "fa7",	 7 + FP_REG_FIRST },					\
+  { "ft0",	 8 + FP_REG_FIRST },					\
+  { "ft1",	 9 + FP_REG_FIRST },					\
+  { "ft2",	10 + FP_REG_FIRST },					\
+  { "ft3",	11 + FP_REG_FIRST },					\
+  { "ft4",	12 + FP_REG_FIRST },					\
+  { "ft5",	13 + FP_REG_FIRST },					\
+  { "ft6",	14 + FP_REG_FIRST },					\
+  { "ft7",	15 + FP_REG_FIRST },					\
+  { "ft8",	16 + FP_REG_FIRST },					\
+  { "ft9",	17 + FP_REG_FIRST },					\
+  { "ft10",	18 + FP_REG_FIRST },					\
+  { "ft11",	19 + FP_REG_FIRST },					\
+  { "ft12",	20 + FP_REG_FIRST },					\
+  { "ft13",	21 + FP_REG_FIRST },					\
+  { "ft14",	22 + FP_REG_FIRST },					\
+  { "ft15",	23 + FP_REG_FIRST },					\
+  { "fs0",	24 + FP_REG_FIRST },					\
+  { "fs1",	25 + FP_REG_FIRST },					\
+  { "fs2",	26 + FP_REG_FIRST },					\
+  { "fs3",	27 + FP_REG_FIRST },					\
+  { "fs4",	28 + FP_REG_FIRST },					\
+  { "fs5",	29 + FP_REG_FIRST },					\
+  { "fs6",	30 + FP_REG_FIRST },					\
+  { "fs7",	31 + FP_REG_FIRST },					\
   { "v0",	 4 + GP_REG_FIRST },					\
   { "v1",	 5 + GP_REG_FIRST },					\
   { "vr0",	 0 + FP_REG_FIRST },					\
diff --git a/gcc/config/loongarch/loongarch.md b/gcc/config/loongarch/loongarch.md
index f70ca85..32ef980 100644
--- a/gcc/config/loongarch/loongarch.md
+++ b/gcc/config/loongarch/loongarch.md
@@ -1,5 +1,5 @@
 ;; Machine Description for LoongArch for GNU compiler.
-;; Copyright (C) 2021-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2021-2025 Free Software Foundation, Inc.
 ;; Contributed by Loongson Ltd.
 ;; Based on MIPS target for GNU compiler.
 
@@ -187,7 +187,7 @@
 
 ;; Main data type used by the insn
 (define_attr "mode" "unknown,none,QI,HI,SI,DI,TI,SF,DF,TF,FCC,
-  V2DI,V4SI,V8HI,V16QI,V2DF,V4SF,V4DI,V8SI,V16HI,V32QI,V4DF,V8SF"
+  V1TI,V2DI,V4SI,V8HI,V16QI,V2DF,V4SF,V2TI,V4DI,V8SI,V16HI,V32QI,V4DF,V8SF"
   (const_string "unknown"))
 
 ;; True if the main data type is twice the size of a word.
@@ -374,10 +374,6 @@
 ;; from the same template.
 (define_mode_iterator GPR [SI (DI "TARGET_64BIT")])
 
-;; A copy of GPR that can be used when a pattern has two independent
-;; modes.
-(define_mode_iterator GPR2 [SI (DI "TARGET_64BIT")])
-
 ;; This mode iterator allows 16-bit and 32-bit GPR patterns and 32-bit 64-bit
 ;; FPR patterns to be generated from the same template.
 (define_mode_iterator JOIN_MODE [HI
@@ -485,7 +481,16 @@
 ;; This code iterator allows the three bitwise instructions to be generated
 ;; from the same template.
 (define_code_iterator any_bitwise [and ior xor])
+(define_code_iterator any_or [ior xor])
 (define_code_iterator neg_bitwise [and ior])
+(define_code_attr bitwise_operand [(and "and_operand")
+				   (ior "uns_arith_operand")
+				   (xor "uns_arith_operand")])
+(define_code_attr is_and [(and "true") (ior "false") (xor "false")])
+
+;; If we know the operands does not have overlapping bits, use this
+;; instead of just ior to cover more cases.
+(define_code_iterator any_or_plus [any_or plus])
 
 ;; This code iterator allows unsigned and signed division to be generated
 ;; from the same template.
@@ -527,13 +532,15 @@
 		     (gt "") (gtu "u")
 		     (ge "") (geu "u")
 		     (lt "") (ltu "u")
-		     (le "") (leu "u")])
+		     (le "") (leu "u")
+		     (smax "") (umax "u")])
 
 ;; <U> is like <u> except uppercase.
 (define_code_attr U [(sign_extend "") (zero_extend "U")])
 
 ;; <su> is like <u>, but the signed form expands to "s" rather than "".
-(define_code_attr su [(sign_extend "s") (zero_extend "u")])
+(define_code_attr su [(sign_extend "s") (zero_extend "u")
+                      (smax "s") (umax "u")])
 
 (define_code_attr u_bool [(sign_extend "false") (zero_extend "true")])
 
@@ -1340,8 +1347,8 @@
   machine_mode lsx_mode
     = <MODE>mode == SFmode ? V4SFmode : V2DFmode;
   rtx tmp = gen_reg_rtx (lsx_mode);
-  rtx op1 = lowpart_subreg (lsx_mode, operands[1], <MODE>mode);
-  rtx op2 = lowpart_subreg (lsx_mode, operands[2], <MODE>mode);
+  rtx op1 = force_lowpart_subreg (lsx_mode, operands[1], <MODE>mode);
+  rtx op2 = force_lowpart_subreg (lsx_mode, operands[2], <MODE>mode);
   emit_insn (gen_xorsign3 (lsx_mode, tmp, op1, op2));
   emit_move_insn (operands[0],
           lowpart_subreg (<MODE>mode, tmp, lsx_mode));
@@ -1535,23 +1542,37 @@
 ;;  ....................
 ;;
 
-(define_insn "<optab><mode>3"
-  [(set (match_operand:X 0 "register_operand" "=r,r")
-	(any_bitwise:X (match_operand:X 1 "register_operand" "%r,r")
-		       (match_operand:X 2 "uns_arith_operand" "r,K")))]
+(define_insn "*<optab><mode>3"
+  [(set (match_operand:GPR 0 "register_operand" "=r,r")
+	(any_or:GPR (match_operand:GPR 1 "register_operand" "%r,r")
+		    (match_operand:GPR 2 "uns_arith_operand" "r,K")))]
   ""
   "<insn>%i2\t%0,%1,%2"
   [(set_attr "type" "logical")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*<optab>si3_internal"
-  [(set (match_operand:SI 0 "register_operand" "=r,r")
-	(any_bitwise:SI (match_operand:SI 1 "register_operand" "%r,r")
-			(match_operand:SI 2 "uns_arith_operand"    " r,K")))]
-  "TARGET_64BIT"
-  "<insn>%i2\t%0,%1,%2"
-  [(set_attr "type" "logical")
-   (set_attr "mode" "SI")])
+(define_insn "*and<mode>3"
+  [(set (match_operand:GPR 0 "register_operand" "=r,r,r,r")
+	(and:GPR (match_operand:GPR 1 "register_operand" "%r,r,r,0")
+		 (match_operand:GPR 2 "and_operand" "r,K,Yx,Yy")))]
+  ""
+  "@
+   and\t%0,%1,%2
+   andi\t%0,%1,%2
+   * operands[2] = GEN_INT (INTVAL (operands[2]) \
+			    & GET_MODE_MASK (<MODE>mode)); \
+     return \"bstrpick.<d>\t%0,%1,%M2\";
+   * operands[2] = GEN_INT (~INTVAL (operands[2]) \
+			    & GET_MODE_MASK (<MODE>mode)); \
+     return \"bstrins.<d>\t%0,%.,%M2\";"
+  [(set_attr "move_type" "logical,logical,pick_ins,pick_ins")
+   (set_attr "mode" "<MODE>")])
+
+(define_expand "<optab><mode>3"
+  [(set (match_operand:X 0 "register_operand")
+	(any_bitwise:X (match_operand:X 1 "register_operand")
+		       (match_operand:X 2 "<bitwise_operand>")))]
+  "")
 
 (define_insn "one_cmpl<mode>2"
   [(set (match_operand:X 0 "register_operand" "=r")
@@ -1569,47 +1590,13 @@
   [(set_attr "type" "logical")
    (set_attr "mode" "SI")])
 
-(define_insn "and<mode>3_extended"
-  [(set (match_operand:GPR 0 "register_operand" "=r")
-	(and:GPR (match_operand:GPR 1 "nonimmediate_operand" "r")
-		 (match_operand:GPR 2 "low_bitmask_operand" "Yx")))]
-  ""
-{
-  int len;
-
-  len = low_bitmask_len (<MODE>mode, INTVAL (operands[2]));
-  operands[2] = GEN_INT (len-1);
-  return "bstrpick.<d>\t%0,%1,%2,0";
-}
-  [(set_attr "move_type" "pick_ins")
-   (set_attr "mode" "<MODE>")])
-
-(define_insn_and_split "*bstrins_<mode>_for_mask"
-  [(set (match_operand:GPR 0 "register_operand" "=r")
-	(and:GPR (match_operand:GPR 1 "register_operand" "r")
-		 (match_operand:GPR 2 "ins_zero_bitmask_operand" "i")))]
-  ""
-  "#"
-  ""
-  [(set (match_dup 0) (match_dup 1))
-   (set (zero_extract:GPR (match_dup 0) (match_dup 2) (match_dup 3))
-	(const_int 0))]
-  {
-    unsigned HOST_WIDE_INT mask = ~UINTVAL (operands[2]);
-    int lo = ffs_hwi (mask) - 1;
-    int len = low_bitmask_len (<MODE>mode, mask >> lo);
-
-    len = MIN (len, GET_MODE_BITSIZE (<MODE>mode) - lo);
-    operands[2] = GEN_INT (len);
-    operands[3] = GEN_INT (lo);
-  })
-
 (define_insn_and_split "*bstrins_<mode>_for_ior_mask"
   [(set (match_operand:GPR 0 "register_operand" "=r")
-	(ior:GPR (and:GPR (match_operand:GPR 1 "register_operand" "r")
-			  (match_operand:GPR 2 "const_int_operand" "i"))
-		 (and:GPR (match_operand:GPR 3 "register_operand" "r")
-			  (match_operand:GPR 4 "const_int_operand" "i"))))]
+	(any_or_plus:GPR
+	  (and:GPR (match_operand:GPR 1 "register_operand" "r")
+		   (match_operand:GPR 2 "const_int_operand" "i"))
+	  (and:GPR (match_operand:GPR 3 "register_operand" "r")
+		   (match_operand:GPR 4 "const_int_operand" "i"))))]
   "loongarch_pre_reload_split ()
    && loongarch_use_bstrins_for_ior_with_mask (<MODE>mode, operands)"
   "#"
@@ -2207,7 +2194,7 @@
   "!TARGET_64BIT
    && (register_operand (operands[0], DImode)
        || reg_or_0_operand (operands[1], DImode))"
-  { return loongarch_output_move (operands[0], operands[1]); }
+  { return loongarch_output_move (operands); }
   "CONST_INT_P (operands[1]) && REG_P (operands[0]) && GP_REG_P (REGNO
   (operands[0]))"
   [(const_int 0)]
@@ -2226,7 +2213,9 @@
   "TARGET_64BIT
    && (register_operand (operands[0], DImode)
        || reg_or_0_operand (operands[1], DImode))"
-  { return loongarch_output_move (operands[0], operands[1]); }
+  {
+    return loongarch_output_move (operands);
+  }
   "CONST_INT_P (operands[1]) && REG_P (operands[0]) && GP_REG_P (REGNO
   (operands[0]))"
   [(const_int 0)]
@@ -2313,7 +2302,7 @@
 	(match_operand:SI 1 "move_operand" "r,Yd,w,rJ,*r*J,m,*f,*f"))]
   "(register_operand (operands[0], SImode)
     || reg_or_0_operand (operands[1], SImode))"
-  { return loongarch_output_move (operands[0], operands[1]); }
+  { return loongarch_output_move (operands); }
   "CONST_INT_P (operands[1]) && REG_P (operands[0]) && GP_REG_P (REGNO
   (operands[0]))"
   [(const_int 0)]
@@ -2347,7 +2336,7 @@
 	(match_operand:HI 1 "move_operand" "r,Yd,I,m,rJ,k,rJ"))]
   "(register_operand (operands[0], HImode)
        || reg_or_0_operand (operands[1], HImode))"
-  { return loongarch_output_move (operands[0], operands[1]); }
+  { return loongarch_output_move (operands); }
   "CONST_INT_P (operands[1]) && REG_P (operands[0]) && GP_REG_P (REGNO
   (operands[0]))"
   [(const_int 0)]
@@ -2381,7 +2370,7 @@
 	(match_operand:QI 1 "move_operand" "r,I,m,rJ,k,rJ"))]
   "(register_operand (operands[0], QImode)
        || reg_or_0_operand (operands[1], QImode))"
-  { return loongarch_output_move (operands[0], operands[1]); }
+  { return loongarch_output_move (operands); }
   [(set_attr "move_type" "move,const,load,store,load,store")
    (set_attr "mode" "QI")])
 
@@ -2402,7 +2391,7 @@
   "TARGET_HARD_FLOAT
    && (register_operand (operands[0], SFmode)
        || reg_or_0_operand (operands[1], SFmode))"
-  { return loongarch_output_move (operands[0], operands[1]); }
+  { return loongarch_output_move (operands); }
   [(set_attr "move_type" "fmove,mgtf,fpload,fpstore,fpload,fpstore,store,store,mgtf,mftg,move,load,store")
    (set_attr "mode" "SF")])
 
@@ -2412,7 +2401,7 @@
   "TARGET_SOFT_FLOAT
    && (register_operand (operands[0], SFmode)
        || reg_or_0_operand (operands[1], SFmode))"
-  { return loongarch_output_move (operands[0], operands[1]); }
+  { return loongarch_output_move (operands); }
   [(set_attr "move_type" "move,load,store")
    (set_attr "mode" "SF")])
 
@@ -2433,7 +2422,7 @@
   "TARGET_DOUBLE_FLOAT
    && (register_operand (operands[0], DFmode)
        || reg_or_0_operand (operands[1], DFmode))"
-  { return loongarch_output_move (operands[0], operands[1]); }
+  { return loongarch_output_move (operands); }
   [(set_attr "move_type" "fmove,mgtf,fpload,fpstore,fpload,fpstore,store,store,mgtf,mftg,move,load,store")
    (set_attr "mode" "DF")])
 
@@ -2444,7 +2433,7 @@
    && TARGET_64BIT
    && (register_operand (operands[0], DFmode)
        || reg_or_0_operand (operands[1], DFmode))"
-  { return loongarch_output_move (operands[0], operands[1]); }
+  { return loongarch_output_move (operands); }
   [(set_attr "move_type" "move,load,store")
    (set_attr "mode" "DF")])
 
@@ -2519,11 +2508,11 @@
 
 ;; Conditional move instructions.
 
-(define_insn "*sel<code><GPR:mode>_using_<GPR2:mode>"
+(define_insn "*sel<code><GPR:mode>_using_<X:mode>"
   [(set (match_operand:GPR 0 "register_operand" "=r,r")
 	(if_then_else:GPR
-	 (equality_op:GPR2 (match_operand:GPR2 1 "register_operand" "r,r")
-			   (const_int 0))
+	 (equality_op:X (match_operand:X 1 "register_operand" "r,r")
+			(const_int 0))
 	 (match_operand:GPR 2 "reg_or_0_operand" "r,J")
 	 (match_operand:GPR 3 "reg_or_0_operand" "J,r")))]
   "register_operand (operands[2], <GPR:MODE>mode)
@@ -2587,7 +2576,10 @@
 	    (subreg:SI (match_operand:DI 1 "register_operand" "0") 0))
 	  (match_operand:DI 2 "const_lu32i_operand" "u")))]
   "TARGET_64BIT"
-  "lu32i.d\t%0,%X2>>32"
+  {
+    operands[2] = GEN_INT (INTVAL (operands[2]) >> 32);
+    return "lu32i.d\t%0,%X2";
+  }
   [(set_attr "type" "arith")
    (set_attr "mode" "DI")])
 
@@ -2598,7 +2590,10 @@
 		  (match_operand 2 "lu52i_mask_operand"))
 	  (match_operand 3 "const_lu52i_operand" "v")))]
   "TARGET_64BIT"
-  "lu52i.d\t%0,%1,%X3>>52"
+  {
+    operands[3] = GEN_INT (INTVAL (operands[3]) >> 52);
+    return "lu52i.d\t%0,%1,%X3";
+  }
   [(set_attr "type" "arith")
    (set_attr "mode" "DI")])
 
@@ -3090,38 +3085,6 @@
       }
   });
 
-;; The following templates were added to generate "bstrpick.d + alsl.d"
-;; instruction pairs.
-;; It is required that the values of const_immalsl_operand and
-;; immediate_operand must have the following correspondence:
-;;
-;; (immediate_operand >> const_immalsl_operand) == 0xffffffff
-
-(define_insn "zero_extend_ashift"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(and:DI (ashift:DI (match_operand:DI 1 "register_operand" "r")
-			   (match_operand 2 "const_immalsl_operand" ""))
-		(match_operand 3 "immediate_operand" "")))]
-  "TARGET_64BIT
-   && ((INTVAL (operands[3]) >> INTVAL (operands[2])) == 0xffffffff)"
-  "bstrpick.d\t%0,%1,31,0\n\talsl.d\t%0,%0,$r0,%2"
-  [(set_attr "type" "arith")
-   (set_attr "mode" "DI")
-   (set_attr "insn_count" "2")])
-
-(define_insn "bstrpick_alsl_paired"
-  [(set (match_operand:DI 0 "register_operand" "=&r")
-	(plus:DI (match_operand:DI 1 "register_operand" "r")
-		 (and:DI (ashift:DI (match_operand:DI 2 "register_operand" "r")
-				    (match_operand 3 "const_immalsl_operand" ""))
-			 (match_operand 4 "immediate_operand" ""))))]
-  "TARGET_64BIT
-   && ((INTVAL (operands[4]) >> INTVAL (operands[3])) == 0xffffffff)"
-  "bstrpick.d\t%0,%2,31,0\n\talsl.d\t%0,%0,%1,%3"
-  [(set_attr "type" "arith")
-   (set_attr "mode" "DI")
-   (set_attr "insn_count" "2")])
-
 (define_insn "alsl<mode>3"
   [(set (match_operand:GPR 0 "register_operand" "=r")
 	(plus:GPR (ashift:GPR (match_operand:GPR 1 "register_operand" "r")
@@ -3132,18 +3095,118 @@
   [(set_attr "type" "arith")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "alslsi3_extend"
+(define_insn "*alslsi3_extend"
   [(set (match_operand:DI 0 "register_operand" "=r")
-	(sign_extend:DI
+	(any_extend:DI
 	  (plus:SI
 	    (ashift:SI (match_operand:SI 1 "register_operand" "r")
 		       (match_operand 2 "const_immalsl_operand" ""))
 	    (match_operand:SI 3 "register_operand" "r"))))]
-  ""
-  "alsl.w\t%0,%1,%3,%2"
+  "TARGET_64BIT"
+  "alsl.w<u>\t%0,%1,%3,%2"
+  [(set_attr "type" "arith")
+   (set_attr "mode" "SI")])
+
+(define_insn "*alslsi3_extend_subreg"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(any_extend:DI
+	  (plus:SI
+	    (subreg:SI
+	      (ashift:DI (match_operand:DI 1 "register_operand" "r")
+			 (match_operand 2 "const_immalsl_operand" ""))
+	      0)
+	    (subreg:SI (match_operand:DI 3 "register_operand" "r") 0))))]
+  "TARGET_64BIT"
+  "alsl.w<u>\t%0,%1,%3,%2"
   [(set_attr "type" "arith")
    (set_attr "mode" "SI")])
 
+;; The generic code prefers "(reg << shamt) [&|^] (mask << shamt)"
+;; instead of "(reg [&|^] mask) << shamt" but we want the latter if
+;; we don't need to load mask into an register, and either:
+;; - (mask << shamt) needs to be loaded into an register, or
+;; - shamt is a const_immalsl_operand, so the outer shift may be further
+;;   combined with an add.
+(define_insn_and_split "<optab>_shift_reverse<X:mode>"
+  [(set (match_operand:X 0 "register_operand" "=r")
+	(any_bitwise:X
+	  (ashift:X (match_operand:X  1 "register_operand"  "r")
+		    (match_operand:SI 2 "const_int_operand" "i"))
+	  (match_operand:X 3 "const_int_operand" "i")))]
+  "(const_immalsl_operand (operands[2], SImode)
+    || !<bitwise_operand> (operands[3], <MODE>mode))
+   && loongarch_reassoc_shift_bitwise (<is_and>, operands[2], operands[3],
+				       <MODE>mode)"
+  "#"
+  "&& true"
+  [(set (match_dup 0) (any_bitwise:X (match_dup 1) (match_dup 3)))
+   (set (match_dup 0) (ashift:X (match_dup 0) (match_dup 2)))]
+  {
+    operands[3] = loongarch_reassoc_shift_bitwise (<is_and>,
+						   operands[2],
+						   operands[3],
+						   <MODE>mode);
+
+    if (ins_zero_bitmask_operand (operands[3], <MODE>mode))
+      {
+	gcc_checking_assert (<is_and>);
+	emit_move_insn (operands[0], operands[1]);
+	operands[1] = operands[0];
+      }
+  })
+
+;; The late_combine2 pass can handle slli.d + add.d => alsl.d, so we
+;; already have slli.d + any_bitwise + add.d => any_bitwise + slli.d +
+;; add.d => any_bitwise + alsl.d.  But late_combine2 cannot handle slli.d +
+;; add.w => alsl.w, so implement slli.d + and + add.w => and + alsl.w on
+;; our own.
+(define_insn_and_split "<optab>_alsl_reversesi_extended"
+  [(set (match_operand:DI 0 "register_operand" "=&r")
+	(sign_extend:DI
+	  (plus:SI
+	    (subreg:SI
+	      (any_bitwise:DI
+		(ashift:DI
+		  (match_operand:DI 1 "register_operand" "r0")
+		  (match_operand:SI 2 "const_immalsl_operand" ""))
+		(match_operand:DI 3 "const_int_operand" "i"))
+	      0)
+	    (match_operand:SI 4 "register_operand" "r"))))]
+  "TARGET_64BIT
+   && loongarch_reassoc_shift_bitwise (<is_and>, operands[2], operands[3],
+				       SImode)"
+  "#"
+  "&& reload_completed"
+  [; r0 = r1 [&|^] r3 is emitted in PREPARATION-STATEMENTS because we
+   ; need to handle a special case, see below.
+   (set (match_dup 0)
+	(sign_extend:DI
+	  (plus:SI (ashift:SI (subreg:SI (match_dup 0) 0) (match_dup 2))
+		   (match_dup 4))))]
+  {
+    operands[3] = loongarch_reassoc_shift_bitwise (<is_and>,
+						   operands[2],
+						   operands[3],
+						   SImode);
+
+    if (ins_zero_bitmask_operand (operands[3], SImode))
+      {
+	gcc_checking_assert (<is_and>);
+	emit_move_insn (operands[0], operands[1]);
+	operands[1] = operands[0];
+      }
+
+    if (operands[3] != CONSTM1_RTX (SImode))
+      emit_insn (gen_<optab>di3 (operands[0], operands[1], operands[3]));
+    else
+      {
+	/* We can end up here with things like:
+	   x:DI = sign_extend(a:SI + ((b:DI << 2) & 0xfffffffc)#0)  */
+	gcc_checking_assert (<is_and>);
+	emit_move_insn (operands[0], operands[1]);
+      }
+  })
+
 
 
 ;; Reverse the order of bytes of operand 1 and store the result in operand 0.
@@ -3496,12 +3559,22 @@
   DONE;
 })
 
+(define_mode_attr mode_size [(DI "8") (SI "4")])
+
 (define_insn "@tablejump<mode>"
   [(set (pc)
 	(match_operand:P 0 "register_operand" "e"))
    (use (label_ref (match_operand 1 "" "")))]
   ""
-  "jr\t%0"
+  {
+    return TARGET_ANNOTATE_TABLEJUMP
+      ? "1:jr\t%0\n\t"
+	".pushsection\t.discard.tablejump_annotate\n\t"
+	"\t.<mode_size>byte\t1b\n\t"
+	"\t.<mode_size>byte\t%1\n\t"
+	".popsection"
+      : "jr\t%0";
+  }
   [(set_attr "type" "jump")
    (set_attr "mode" "none")])
 
@@ -4081,7 +4154,8 @@
 {
   switch (INTVAL (operands[1]))
   {
-    case 0: return "preld\t0,%a0";
+    case 0:
+    case 2: return "preld\t0,%a0";
     case 1: return "preld\t8,%a0";
     default: gcc_unreachable ();
   }
@@ -4157,12 +4231,13 @@
     DONE;
   })
 
-(define_insn "bytepick_w_<bytepick_imm>"
+(define_insn "*bytepick_w_<bytepick_imm>"
   [(set (match_operand:SI 0 "register_operand" "=r")
-	(ior:SI (lshiftrt:SI (match_operand:SI 1 "register_operand" "r")
-			     (const_int <bytepick_w_lshiftrt_amount>))
-		(ashift:SI (match_operand:SI 2 "register_operand" "r")
-			   (const_int bytepick_w_ashift_amount))))]
+	(any_or_plus:SI
+	  (lshiftrt:SI (match_operand:SI 1 "register_operand" "r")
+		       (const_int <bytepick_w_lshiftrt_amount>))
+	  (ashift:SI (match_operand:SI 2 "register_operand" "r")
+		     (const_int bytepick_w_ashift_amount))))]
   ""
   "bytepick.w\t%0,%1,%2,<bytepick_imm>"
   [(set_attr "mode" "SI")])
@@ -4200,16 +4275,28 @@
   "bytepick.w\t%0,%2,%1,1"
   [(set_attr "mode" "SI")])
 
-(define_insn "bytepick_d_<bytepick_imm>"
+(define_insn "*bytepick_d_<bytepick_imm>"
   [(set (match_operand:DI 0 "register_operand" "=r")
-	(ior:DI (lshiftrt (match_operand:DI 1 "register_operand" "r")
-			  (const_int <bytepick_d_lshiftrt_amount>))
-		(ashift (match_operand:DI 2 "register_operand" "r")
-			(const_int bytepick_d_ashift_amount))))]
+	(any_or_plus:DI
+	  (lshiftrt (match_operand:DI 1 "register_operand" "r")
+		    (const_int <bytepick_d_lshiftrt_amount>))
+	  (ashift (match_operand:DI 2 "register_operand" "r")
+		  (const_int bytepick_d_ashift_amount))))]
   "TARGET_64BIT"
   "bytepick.d\t%0,%1,%2,<bytepick_imm>"
   [(set_attr "mode" "DI")])
 
+(define_insn "*bytepick_d_<bytepick_imm>_rev"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(any_or_plus:DI
+	  (ashift (match_operand:DI 1 "register_operand" "r")
+		  (const_int bytepick_d_ashift_amount))
+	  (lshiftrt (match_operand:DI 2 "register_operand" "r")
+		    (const_int <bytepick_d_lshiftrt_amount>))))]
+  "TARGET_64BIT"
+  "bytepick.d\t%0,%2,%1,<bytepick_imm>"
+  [(set_attr "mode" "DI")])
+
 (define_insn "bitrev_4b"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(unspec:SI [(match_operand:SI 1 "register_operand" "r")]
@@ -4228,6 +4315,57 @@
   [(set_attr "type" "unknown")
    (set_attr "mode" "DI")])
 
+(define_insn "@rbit<mode>"
+  [(set (match_operand:GPR 0 "register_operand" "=r")
+	(bitreverse:GPR (match_operand:GPR 1 "register_operand" "r")))]
+  ""
+  "bitrev.<size>\t%0,%1"
+  [(set_attr "type" "unknown")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "rbitsi_extended"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI
+	  (bitreverse:SI (match_operand:SI 1 "register_operand" "r"))))]
+  "TARGET_64BIT"
+  "bitrev.w\t%0,%1"
+  [(set_attr "type" "unknown")
+   (set_attr "mode" "SI")])
+
+;; If we don't care high bits, bitrev.4b can reverse bits of values in
+;; QImode.
+(define_insn "rbitqi"
+  [(set (match_operand:QI 0 "register_operand" "=r")
+	(bitreverse:QI (match_operand:QI 1 "register_operand" "r")))]
+  ""
+  "bitrev.4b\t%0,%1"
+  [(set_attr "type" "unknown")
+   (set_attr "mode" "SI")])
+
+;; For HImode it's a little complicated...
+(define_expand "rbithi"
+  [(match_operand:HI 0 "register_operand")
+   (match_operand:HI 1 "register_operand")]
+  ""
+  {
+    rtx t = gen_reg_rtx (word_mode);
+
+    /* Oh, using paradoxical subreg.  I learnt the trick from RISC-V,
+       hoping we won't be blown up altogether one day.  */
+    emit_insn (gen_rbit(word_mode, t,
+			gen_lowpart (word_mode, operands[1])));
+    t = expand_simple_binop (word_mode, LSHIFTRT, t,
+			     GEN_INT (GET_MODE_BITSIZE (word_mode) - 16),
+			     NULL_RTX, false, OPTAB_DIRECT);
+
+    t = gen_lowpart (HImode, t);
+    SUBREG_PROMOTED_VAR_P (t) = 1;
+    SUBREG_PROMOTED_SET (t, SRP_UNSIGNED);
+    emit_move_insn (operands[0], t);
+
+    DONE;
+  })
+
 (define_insn "@stack_tie<mode>"
   [(set (mem:BLK (scratch))
 	(unspec:BLK [(match_operand:X 0 "register_operand" "r")
@@ -4264,9 +4402,9 @@
   {
     /* The load destination does not overlap the source.  */
     gcc_assert (!reg_overlap_mentioned_p (operands[0], operands[1]));
-    output_asm_insn (loongarch_output_move (operands[0], operands[1]),
+    output_asm_insn (loongarch_output_move (operands),
 		     operands);
-    output_asm_insn (loongarch_output_move (operands[2], operands[3]),
+    output_asm_insn (loongarch_output_move (&operands[2]),
 		     &operands[2]);
     return "";
   }
@@ -4334,13 +4472,12 @@
 
 
 
-(define_mode_iterator QHSD [QI HI SI DI])
 (define_int_iterator CRC [UNSPEC_CRC UNSPEC_CRCC])
 (define_int_attr crc [(UNSPEC_CRC "crc") (UNSPEC_CRCC "crcc")])
 
 (define_insn "loongarch_<crc>_w_<size>_w"
   [(set (match_operand:SI 0 "register_operand" "=r")
-	(unspec:SI [(match_operand:QHSD 1 "register_operand" "r")
+	(unspec:SI [(match_operand:QHWD 1 "register_operand" "r")
 		   (match_operand:SI 2 "register_operand" "r")]
 		     CRC))]
   ""
@@ -4351,7 +4488,7 @@
 (define_insn "loongarch_<crc>_w_<size>_w_extended"
   [(set (match_operand:DI 0 "register_operand" "=r")
 	(sign_extend:DI
-	  (unspec:SI [(match_operand:QHSD 1 "register_operand" "r")
+	  (unspec:SI [(match_operand:QHWD 1 "register_operand" "r")
 		      (match_operand:SI 2 "register_operand" "r")]
 		     CRC)))]
   "TARGET_64BIT"
@@ -4359,6 +4496,89 @@
   [(set_attr "type" "unknown")
    (set_attr "mode" "<MODE>")])
 
+(define_expand "crc_rev<mode>si4"
+  [(match_operand:SI	0 "register_operand")	; new_chksum
+   (match_operand:SI	1 "register_operand")	; old_chksum
+   (match_operand:SUBDI	2 "reg_or_0_operand")	; msg
+   (match_operand	3 "const_int_operand")]	; poly
+  ""
+  {
+    unsigned HOST_WIDE_INT poly = UINTVAL (operands[3]);
+    rtx msg = operands[2];
+    rtx (*crc_insn)(rtx, rtx, rtx) = nullptr;
+
+    /* TODO: Review this when adding LA32 support.  If we're going to
+       support CRC instructions on LA32 we'll need a "-mcrc" switch as
+       they are optional on LA32.  */
+
+    if (TARGET_64BIT)
+      {
+	if (poly == reflect_hwi (0xedb88320u, 32))
+	  crc_insn = gen_loongarch_crc_w_<size>_w;
+	else if (poly == reflect_hwi (0x82f63b78u, 32))
+	  crc_insn = gen_loongarch_crcc_w_<size>_w;
+      }
+
+    if (crc_insn)
+      {
+	/* We cannot make crc_insn to accept const0_rtx easily:
+	   it's not possible to figure out the mode of const0_rtx so we'd
+	   have to separate both UNSPEC_CRC and UNSPEC_CRCC to 4 different
+	   UNSPECs.  Instead just hack it around here.  */
+	if (msg == const0_rtx)
+	  msg = gen_rtx_REG (<MODE>mode, 0);
+
+	emit_insn (crc_insn (operands[0], msg, operands[1]));
+      }
+    else
+      {
+	/* No CRC instruction is suitable, use the generic table-based
+	   implementation but optimize bit reversion.  */
+	auto rbit = [](rtx *r)
+	  {
+	    /* Well, this is ugly.  The problem is
+	       expand_reversed_crc_table_based only accepts one helper
+	       for reversing data elements and CRC states.  */
+	    auto mode = GET_MODE (*r);
+	    auto rbit = (mode == <MODE>mode ? gen_rbit<mode> : gen_rbitsi);
+	    rtx out = gen_reg_rtx (mode);
+
+	    emit_insn (rbit (out, *r));
+	    *r = out;
+	  };
+	expand_reversed_crc_table_based (operands[0], operands[1],
+					 msg, operands[3], <MODE>mode,
+					 rbit);
+      }
+    DONE;
+  })
+
+(define_insn_and_split "*crc_combine"
+  [(set (match_operand:SI 0 "register_operand" "=r,r")
+	(unspec:SI
+	  [(reg:SUBDI 0)
+	   (subreg:SI
+	     (xor:DI
+	       (match_operand:DI 1 "register_operand" "r,r")
+	       ; Our LOAD_EXTEND_OP makes this same as sign_extend
+	       ; if SUBDI is SI, or zero_extend if SUBDI is QI or HI.
+	       ; For the former the high bits in rk are ignored by
+	       ; crc.w.w.w anyway, for the latter the zero extension is
+	       ; necessary for the correctness of this transformation.
+	       (subreg:DI
+		 (match_operand:SUBDI 2 "memory_operand" "m,k") 0)) 0)]
+	  CRC))]
+  "TARGET_64BIT && loongarch_pre_reload_split ()"
+  "#"
+  "&& true"
+  [(set (match_dup 3) (match_dup 2))
+   (set (match_dup 0)
+	(unspec:SI [(match_dup 3) (match_dup 1)] CRC))]
+  {
+    operands[3] = gen_reg_rtx (<MODE>mode);
+    operands[1] = lowpart_subreg (SImode, operands[1], DImode);
+  })
+
 ;; With normal or medium code models, if the only use of a pc-relative
 ;; address is for loading or storing a value, then relying on linker
 ;; relaxation is not better than emitting the machine instruction directly.
diff --git a/gcc/config/loongarch/loongarch.opt b/gcc/config/loongarch/loongarch.opt
index 91cb523..4d85cf5 100644
--- a/gcc/config/loongarch/loongarch.opt
+++ b/gcc/config/loongarch/loongarch.opt
@@ -6,7 +6,7 @@
 ; if you change "loongarch.opt.in", "loongarch-strings", or
 ; "isa-evolution.in".
 ;
-; Copyright (C) 2021-2024 Free Software Foundation, Inc.
+; Copyright (C) 2021-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
@@ -185,6 +185,10 @@ mbranch-cost=
 Target RejectNegative Joined UInteger Var(la_branch_cost) Save
 -mbranch-cost=COST	Set the cost of branches to roughly COST instructions.
 
+maddr-reg-reg-cost=
+Target RejectNegative Joined UInteger Var(la_addr_reg_reg_cost) Save
+-maddr-reg-reg-cost=COST  Set the cost of ADDRESS_REG_REG to the value calculated by COST.
+
 mcheck-zero-division
 Target Mask(CHECK_ZERO_DIV) Save
 Trap on integer divide by zero.
@@ -310,6 +314,10 @@ default value is 4.
 TargetVariable
 HOST_WIDE_INT la_isa_evolution = 0
 
+mannotate-tablejump
+Target Mask(ANNOTATE_TABLEJUMP) Save
+Annotate table jump instruction (jr {reg}) to correlate it with the jump table.
+
 mfrecipe
 Target Mask(ISA_FRECIPE) Var(la_isa_evolution)
 Support frecipe.{s/d} and frsqrte.{s/d} instructions.
diff --git a/gcc/config/loongarch/loongarch.opt.urls b/gcc/config/loongarch/loongarch.opt.urls
index f7545f6..5f644f6 100644
--- a/gcc/config/loongarch/loongarch.opt.urls
+++ b/gcc/config/loongarch/loongarch.opt.urls
@@ -27,6 +27,9 @@ UrlSuffix(gcc/LoongArch-Options.html#index-mabi-2)
 mbranch-cost=
 UrlSuffix(gcc/LoongArch-Options.html#index-mbranch-cost-2)
 
+maddr-reg-reg-cost=
+UrlSuffix(gcc/LoongArch-Options.html#index-maddr-reg-reg-cost)
+
 mcheck-zero-division
 UrlSuffix(gcc/LoongArch-Options.html#index-mcheck-zero-division)
 
@@ -72,6 +75,9 @@ UrlSuffix(gcc/LoongArch-Options.html#index-mpass-mrelax-to-as)
 mtls-dialect=
 UrlSuffix(gcc/LoongArch-Options.html#index-mtls-dialect-1)
 
+mannotate-tablejump
+UrlSuffix(gcc/LoongArch-Options.html#index-mannotate-tablejump)
+
 mfrecipe
 UrlSuffix(gcc/LoongArch-Options.html#index-mfrecipe)
 
diff --git a/gcc/config/loongarch/lsx.md b/gcc/config/loongarch/lsx.md
index fcba28b..407c868 100644
--- a/gcc/config/loongarch/lsx.md
+++ b/gcc/config/loongarch/lsx.md
@@ -1,6 +1,6 @@
 ;; Machine Description for LARCH Loongson SX ASE
 ;;
-;; Copyright (C) 2018-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2018-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
@@ -20,8 +20,6 @@
 ;;
 
 (define_c_enum "unspec" [
-  UNSPEC_LSX_ABSD_S
-  UNSPEC_LSX_VABSD_U
   UNSPEC_LSX_VAVG_S
   UNSPEC_LSX_VAVG_U
   UNSPEC_LSX_VAVGR_S
@@ -39,19 +37,14 @@
   UNSPEC_LSX_VFCVTH
   UNSPEC_LSX_VFCVTL
   UNSPEC_LSX_VFLOGB
-  UNSPEC_LSX_VFRECIP
   UNSPEC_LSX_VFRECIPE
-  UNSPEC_LSX_VFRINT
   UNSPEC_LSX_VFRSQRT
   UNSPEC_LSX_VFRSQRTE
   UNSPEC_LSX_VFTINT_U
   UNSPEC_LSX_VSAT_S
   UNSPEC_LSX_VSAT_U
-  UNSPEC_LSX_VREPLVEI
   UNSPEC_LSX_VSRAR
-  UNSPEC_LSX_VSRARI
   UNSPEC_LSX_VSRLR
-  UNSPEC_LSX_VSRLRI
   UNSPEC_LSX_VSHUF
   UNSPEC_LSX_VEXTW_S
   UNSPEC_LSX_VEXTW_U
@@ -76,16 +69,10 @@
   UNSPEC_LSX_VMSKLTZ
   UNSPEC_LSX_VSIGNCOV
   UNSPEC_LSX_VFTINT_W_D
-  UNSPEC_LSX_VFFINT_S_L
-  UNSPEC_LSX_VFTINTRZ_W_D
   UNSPEC_LSX_VFTINTRP_W_D
   UNSPEC_LSX_VFTINTRM_W_D
   UNSPEC_LSX_VFTINTRNE_W_D
   UNSPEC_LSX_VFTINTL_L_S
-  UNSPEC_LSX_VFFINTH_D_W
-  UNSPEC_LSX_VFFINTL_D_W
-  UNSPEC_LSX_VFTINTRZL_L_S
-  UNSPEC_LSX_VFTINTRZH_L_S
   UNSPEC_LSX_VFTINTRPL_L_S
   UNSPEC_LSX_VFTINTRPH_L_S
   UNSPEC_LSX_VFTINTRMH_L_S
@@ -100,36 +87,9 @@
   UNSPEC_LSX_VSSRLRN
   UNSPEC_LSX_VLDI
   UNSPEC_LSX_VSHUF_B
-  UNSPEC_LSX_VLDX
   UNSPEC_LSX_VSTX
   UNSPEC_LSX_VEXTL_QU_DU
   UNSPEC_LSX_VSETEQZ_V
-  UNSPEC_LSX_VADDWEV
-  UNSPEC_LSX_VADDWEV2
-  UNSPEC_LSX_VADDWEV3
-  UNSPEC_LSX_VADDWOD
-  UNSPEC_LSX_VADDWOD2
-  UNSPEC_LSX_VADDWOD3
-  UNSPEC_LSX_VSUBWEV
-  UNSPEC_LSX_VSUBWEV2
-  UNSPEC_LSX_VSUBWOD
-  UNSPEC_LSX_VSUBWOD2
-  UNSPEC_LSX_VMULWEV
-  UNSPEC_LSX_VMULWEV2
-  UNSPEC_LSX_VMULWEV3
-  UNSPEC_LSX_VMULWOD
-  UNSPEC_LSX_VMULWOD2
-  UNSPEC_LSX_VMULWOD3
-  UNSPEC_LSX_VHADDW_Q_D
-  UNSPEC_LSX_VHADDW_QU_DU
-  UNSPEC_LSX_VHSUBW_Q_D
-  UNSPEC_LSX_VHSUBW_QU_DU
-  UNSPEC_LSX_VMADDWEV
-  UNSPEC_LSX_VMADDWEV2
-  UNSPEC_LSX_VMADDWEV3
-  UNSPEC_LSX_VMADDWOD
-  UNSPEC_LSX_VMADDWOD2
-  UNSPEC_LSX_VMADDWOD3
   UNSPEC_LSX_VADD_Q
   UNSPEC_LSX_VSUB_Q
   UNSPEC_LSX_VEXTH_Q_D
@@ -167,25 +127,6 @@
    (V8HI "wu")
    (V16QI "hu")])
 
-(define_mode_attr d2lsxfmt
-  [(V4SI "q")
-   (V8HI "d")
-   (V16QI "w")])
-
-(define_mode_attr d2lsxfmt_u
-  [(V4SI "qu")
-   (V8HI "du")
-   (V16QI "wu")])
-
-;; The attribute gives two double modes for vector modes.
-(define_mode_attr VD2MODE
-  [(V4SI "V2DI")
-   (V8HI "V2DI")
-   (V16QI "V4SI")])
-
-;; All vector modes with 128 bits.
-(define_mode_iterator LSX      [V2DF V4SF V2DI V4SI V8HI V16QI])
-
 ;; Only used for vilvh and splitting insert_d and copy_{u,s}.d.
 (define_mode_iterator LSX_D    [V2DI V2DF])
 
@@ -219,6 +160,11 @@
    (V4SI "V8HI")
    (V2DI "V4SI")])
 
+;; Signed and unsigned max operations.
+(define_code_iterator SU_MAX [smax umax])
+
+(define_code_attr su_min [(smax "smin") (umax "umin")])
+
 ;; The attribute gives double modes for vector modes.
 (define_mode_attr VDMODE
   [(V2DI "V2DI")
@@ -302,24 +248,12 @@
    (V8HI "h")
    (V16QI "b")])
 
-(define_mode_attr flsxfmt_f
-  [(V2DF "d_f")
-   (V4SF "s_f")
-   (V2DI "d")
-   (V4SI "w")
-   (V8HI "h")
-   (V16QI "b")])
-
 (define_mode_attr flsxfmt
   [(V2DF "d")
    (V4SF "s")
    (V2DI "d")
    (V4SI "s")])
 
-(define_mode_attr flsxfrint
-  [(V2DF "d")
-   (V4SF "s")])
-
 (define_mode_attr ilsxfmt
   [(V2DF "l")
    (V4SF "w")])
@@ -370,54 +304,15 @@
   [(set_attr "type" "simd_permute")
    (set_attr "mode" "<MODE>")])
 
-(define_expand "vec_unpacks_hi_v4sf"
-  [(set (match_operand:V2DF 0 "register_operand" "=f")
-	(float_extend:V2DF
-	  (vec_select:V2SF
-	    (match_operand:V4SF 1 "register_operand" "f")
-	    (match_dup 2))))]
-  "ISA_HAS_LSX"
-{
-  operands[2] = loongarch_lsx_vec_parallel_const_half (V4SFmode,
-      true/*high_p*/);
-})
-
-(define_expand "vec_unpacks_lo_v4sf"
-  [(set (match_operand:V2DF 0 "register_operand" "=f")
-	(float_extend:V2DF
-	  (vec_select:V2SF
-	    (match_operand:V4SF 1 "register_operand" "f")
-	    (match_dup 2))))]
-  "ISA_HAS_LSX"
-{
-  operands[2] = loongarch_lsx_vec_parallel_const_half (V4SFmode,
-      false/*high_p*/);
-})
-
-(define_expand "vec_unpacks_hi_<mode>"
-  [(match_operand:<VDMODE> 0 "register_operand")
-   (match_operand:ILSX_WHB 1 "register_operand")]
-  "ISA_HAS_LSX"
-{
-  loongarch_expand_vec_unpack (operands, false/*unsigned_p*/, true/*high_p*/);
-  DONE;
-})
-
 (define_expand "vec_unpacks_lo_<mode>"
   [(match_operand:<VDMODE> 0 "register_operand")
    (match_operand:ILSX_WHB 1 "register_operand")]
   "ISA_HAS_LSX"
 {
-  loongarch_expand_vec_unpack (operands, false/*unsigned_p*/, false/*high_p*/);
-  DONE;
-})
-
-(define_expand "vec_unpacku_hi_<mode>"
-  [(match_operand:<VDMODE> 0 "register_operand")
-   (match_operand:ILSX_WHB 1 "register_operand")]
-  "ISA_HAS_LSX"
-{
-  loongarch_expand_vec_unpack (operands, true/*unsigned_p*/, true/*high_p*/);
+  if (ISA_HAS_LASX)
+    emit_insn (gen_vec_unpacks_lo_<mode>_internal (operands[0], operands[1]));
+  else
+    loongarch_expand_vec_unpack (operands, false/*unsigned_p*/);
   DONE;
 })
 
@@ -426,7 +321,10 @@
    (match_operand:ILSX_WHB 1 "register_operand")]
   "ISA_HAS_LSX"
 {
-  loongarch_expand_vec_unpack (operands, true/*unsigned_p*/, false/*high_p*/);
+  if (ISA_HAS_LASX)
+    emit_insn (gen_vec_unpacku_lo_<mode>_internal (operands[0], operands[1]));
+  else
+    loongarch_expand_vec_unpack (operands, true/*unsigned_p*/);
   DONE;
 })
 
@@ -508,28 +406,6 @@
   DONE;
 })
 
-(define_expand "vec_cmp<mode><mode_i>"
-  [(set (match_operand:<VIMODE> 0 "register_operand")
-	(match_operator 1 ""
-	  [(match_operand:LSX 2 "register_operand")
-	   (match_operand:LSX 3 "register_operand")]))]
-  "ISA_HAS_LSX"
-{
-  loongarch_expand_vec_cmp (operands);
-  DONE;
-})
-
-(define_expand "vec_cmpu<ILSX:mode><mode_i>"
-  [(set (match_operand:<VIMODE> 0 "register_operand")
-	(match_operator 1 ""
-	  [(match_operand:ILSX 2 "register_operand")
-	   (match_operand:ILSX 3 "register_operand")]))]
-  "ISA_HAS_LSX"
-{
-  loongarch_expand_vec_cmp (operands);
-  DONE;
-})
-
 (define_expand "vcond_mask_<mode><mode_i>"
   [(match_operand:LSX 0 "register_operand")
    (match_operand:LSX 1 "reg_or_m1_operand")
@@ -670,42 +546,6 @@
   [(set_attr "type" "simd_sld")
    (set_attr "mode" "<MODE>")])
 
-(define_expand "mov<mode>"
-  [(set (match_operand:LSX 0)
-	(match_operand:LSX 1))]
-  "ISA_HAS_LSX"
-{
-  if (loongarch_legitimize_move (<MODE>mode, operands[0], operands[1]))
-    DONE;
-})
-
-(define_expand "movmisalign<mode>"
-  [(set (match_operand:LSX 0)
-	(match_operand:LSX 1))]
-  "ISA_HAS_LSX"
-{
-  if (loongarch_legitimize_move (<MODE>mode, operands[0], operands[1]))
-    DONE;
-})
-
-(define_insn "mov<mode>_lsx"
-  [(set (match_operand:LSX 0 "nonimmediate_operand" "=f,f,R,*r,*f,*r")
-	(match_operand:LSX 1 "move_operand" "fYGYI,R,f,*f,*r,*r"))]
-  "ISA_HAS_LSX"
-{ return loongarch_output_move (operands[0], operands[1]); }
-  [(set_attr "type" "simd_move,simd_load,simd_store,simd_copy,simd_insert,simd_copy")
-   (set_attr "mode" "<MODE>")])
-
-(define_split
-  [(set (match_operand:LSX 0 "nonimmediate_operand")
-	(match_operand:LSX 1 "move_operand"))]
-  "reload_completed && ISA_HAS_LSX
-   && loongarch_split_move_p (operands[0], operands[1])"
-  [(const_int 0)]
-{
-  loongarch_split_move (operands[0], operands[1]);
-  DONE;
-})
 
 ;; Integer operations
 (define_insn "add<mode>3"
@@ -879,11 +719,23 @@
   [(set (match_operand:ILSX 0 "register_operand" "=f,f")
 	(lshiftrt:ILSX
 	  (match_operand:ILSX 1 "register_operand" "f,f")
-	  (match_operand:ILSX 2 "reg_or_vector_same_uimm6_operand" "f,Uuv6")))]
+	  (match_operand:ILSX 2 "reg_or_vector_same_uimm_operand" "f,Uuvx")))]
   "ISA_HAS_LSX"
-  "@
-   vsrl.<lsxfmt>\t%w0,%w1,%w2
-   vsrli.<lsxfmt>\t%w0,%w1,%E2"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "vsrl.<lsxfmt>\t%w0,%w1,%w2";
+    case 1:
+      {
+	unsigned HOST_WIDE_INT val = UINTVAL (CONST_VECTOR_ELT (operands[2], 0));
+	operands[2] = GEN_INT (val & (GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1));
+	return "vsrli.<lsxfmt>\t%w0,%w1,%d2";
+      }
+    default:
+      gcc_unreachable ();
+    }
+}
   [(set_attr "type" "simd_shift")
    (set_attr "mode" "<MODE>")])
 
@@ -891,11 +743,23 @@
   [(set (match_operand:ILSX 0 "register_operand" "=f,f")
 	(ashiftrt:ILSX
 	  (match_operand:ILSX 1 "register_operand" "f,f")
-	  (match_operand:ILSX 2 "reg_or_vector_same_uimm6_operand" "f,Uuv6")))]
+	  (match_operand:ILSX 2 "reg_or_vector_same_uimm_operand" "f,Uuvx")))]
   "ISA_HAS_LSX"
-  "@
-   vsra.<lsxfmt>\t%w0,%w1,%w2
-   vsrai.<lsxfmt>\t%w0,%w1,%E2"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "vsra.<lsxfmt>\t%w0,%w1,%w2";
+    case 1:
+      {
+	unsigned HOST_WIDE_INT val = UINTVAL (CONST_VECTOR_ELT (operands[2], 0));
+	operands[2] = GEN_INT (val & (GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1));
+	return "vsrai.<lsxfmt>\t%w0,%w1,%d2";
+      }
+    default:
+      gcc_unreachable ();
+    }
+}
   [(set_attr "type" "simd_shift")
    (set_attr "mode" "<MODE>")])
 
@@ -903,11 +767,23 @@
   [(set (match_operand:ILSX 0 "register_operand" "=f,f")
 	(ashift:ILSX
 	  (match_operand:ILSX 1 "register_operand" "f,f")
-	  (match_operand:ILSX 2 "reg_or_vector_same_uimm6_operand" "f,Uuv6")))]
+	  (match_operand:ILSX 2 "reg_or_vector_same_uimm_operand" "f,Uuvx")))]
   "ISA_HAS_LSX"
-  "@
-   vsll.<lsxfmt>\t%w0,%w1,%w2
-   vslli.<lsxfmt>\t%w0,%w1,%E2"
+{
+  switch (which_alternative)
+    {
+    case 0:
+      return "vsll.<lsxfmt>\t%w0,%w1,%w2";
+    case 1:
+      {
+	unsigned HOST_WIDE_INT val = UINTVAL (CONST_VECTOR_ELT (operands[2], 0));
+	operands[2] = GEN_INT (val & (GET_MODE_UNIT_BITSIZE (<MODE>mode) - 1));
+	return "vslli.<lsxfmt>\t%w0,%w1,%d2";
+      }
+    default:
+      gcc_unreachable ();
+    }
+}
   [(set_attr "type" "simd_shift")
    (set_attr "mode" "<MODE>")])
 
@@ -1038,23 +914,17 @@
   [(set_attr "type" "simd_int_arith")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "lsx_vabsd_s_<lsxfmt>"
+(define_insn "<su>abd<mode>3"
   [(set (match_operand:ILSX 0 "register_operand" "=f")
-	(unspec:ILSX [(match_operand:ILSX 1 "register_operand" "f")
-		      (match_operand:ILSX 2 "register_operand" "f")]
-		     UNSPEC_LSX_ABSD_S))]
-  "ISA_HAS_LSX"
-  "vabsd.<lsxfmt>\t%w0,%w1,%w2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "<MODE>")])
-
-(define_insn "lsx_vabsd_u_<lsxfmt_u>"
-  [(set (match_operand:ILSX 0 "register_operand" "=f")
-	(unspec:ILSX [(match_operand:ILSX 1 "register_operand" "f")
-		      (match_operand:ILSX 2 "register_operand" "f")]
-		     UNSPEC_LSX_VABSD_U))]
+	(minus:ILSX
+	  (SU_MAX:ILSX
+	    (match_operand:ILSX 1 "register_operand" "f")
+	    (match_operand:ILSX 2 "register_operand" "f"))
+	  (<su_min>:ILSX
+	    (match_dup 1)
+	    (match_dup 2))))]
   "ISA_HAS_LSX"
-  "vabsd.<lsxfmt_u>\t%w0,%w1,%w2"
+  "vabsd.<lsxfmt><u>\t%w0,%w1,%w2"
   [(set_attr "type" "simd_int_arith")
    (set_attr "mode" "<MODE>")])
 
@@ -1264,10 +1134,6 @@
    (set_attr "cnv_mode" "<FINTCNV>")
    (set_attr "mode" "<MODE>")])
 
-(define_mode_attr FFQ
-  [(V4SF "V8HI")
-   (V2DF "V4SI")])
-
 (define_insn "lsx_vreplgr2vr_<lsxfmt_f>"
   [(set (match_operand:ILSX 0 "register_operand" "=f,f")
 	(vec_duplicate:ILSX
@@ -1275,7 +1141,7 @@
   "ISA_HAS_LSX"
 {
   if (which_alternative == 1)
-    return "vldi.<lsxfmt>\t%w0,0";
+    return "vrepli.b\t%w0,0";
 
   return "vreplgr2vr.<lsxfmt>\t%w0,%z1";
 }
@@ -1417,62 +1283,6 @@
    (set_attr "cnv_mode" "<FINTCNV_2>")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "lsx_vh<optab>w_h<u>_b<u>"
-  [(set (match_operand:V8HI 0 "register_operand" "=f")
-	(addsub:V8HI
-	  (any_extend:V8HI
-	    (vec_select:V8QI
-	      (match_operand:V16QI 1 "register_operand" "f")
-	      (parallel [(const_int 1) (const_int 3)
-			 (const_int 5) (const_int 7)
-			 (const_int 9) (const_int 11)
-			 (const_int 13) (const_int 15)])))
-	  (any_extend:V8HI
-	    (vec_select:V8QI
-	      (match_operand:V16QI 2 "register_operand" "f")
-	      (parallel [(const_int 0) (const_int 2)
-			 (const_int 4) (const_int 6)
-			 (const_int 8) (const_int 10)
-			 (const_int 12) (const_int 14)])))))]
-  "ISA_HAS_LSX"
-  "vh<optab>w.h<u>.b<u>\t%w0,%w1,%w2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V8HI")])
-
-(define_insn "lsx_vh<optab>w_w<u>_h<u>"
-  [(set (match_operand:V4SI 0 "register_operand" "=f")
-	(addsub:V4SI
-	  (any_extend:V4SI
-	    (vec_select:V4HI
-	      (match_operand:V8HI 1 "register_operand" "f")
-	      (parallel [(const_int 1) (const_int 3)
-			 (const_int 5) (const_int 7)])))
-	  (any_extend:V4SI
-	    (vec_select:V4HI
-	      (match_operand:V8HI 2 "register_operand" "f")
-	      (parallel [(const_int 0) (const_int 2)
-			 (const_int 4) (const_int 6)])))))]
-  "ISA_HAS_LSX"
-  "vh<optab>w.w<u>.h<u>\t%w0,%w1,%w2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V4SI")])
-
-(define_insn "lsx_vh<optab>w_d<u>_w<u>"
-  [(set (match_operand:V2DI 0 "register_operand" "=f")
-	(addsub:V2DI
-	  (any_extend:V2DI
-	    (vec_select:V2SI
-	      (match_operand:V4SI 1 "register_operand" "f")
-	      (parallel [(const_int 1) (const_int 3)])))
-	  (any_extend:V2DI
-	    (vec_select:V2SI
-	      (match_operand:V4SI 2 "register_operand" "f")
-	      (parallel [(const_int 0) (const_int 2)])))))]
-  "ISA_HAS_LSX"
-  "vh<optab>w.d<u>.w<u>\t%w0,%w1,%w2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V2DI")])
-
 (define_insn "lsx_vpackev_b"
   [(set (match_operand:V16QI 0 "register_operand" "=f")
 	(vec_select:V16QI
@@ -1811,125 +1621,33 @@
   [(set_attr "type" "simd_logic")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "lsx_vpickev_b"
-[(set (match_operand:V16QI 0 "register_operand" "=f")
-      (vec_select:V16QI
-	(vec_concat:V32QI
-	  (match_operand:V16QI 1 "register_operand" "f")
-	  (match_operand:V16QI 2 "register_operand" "f"))
-	(parallel [(const_int 0) (const_int 2)
-		   (const_int 4) (const_int 6)
-		   (const_int 8) (const_int 10)
-		   (const_int 12) (const_int 14)
-		   (const_int 16) (const_int 18)
-		   (const_int 20) (const_int 22)
-		   (const_int 24) (const_int 26)
-		   (const_int 28) (const_int 30)])))]
-  "ISA_HAS_LSX"
-  "vpickev.b\t%w0,%w2,%w1"
-  [(set_attr "type" "simd_permute")
-   (set_attr "mode" "V16QI")])
-
-(define_insn "lsx_vpickev_h"
-[(set (match_operand:V8HI 0 "register_operand" "=f")
-      (vec_select:V8HI
-	(vec_concat:V16HI
-	  (match_operand:V8HI 1 "register_operand" "f")
-	  (match_operand:V8HI 2 "register_operand" "f"))
-	(parallel [(const_int 0) (const_int 2)
-		   (const_int 4) (const_int 6)
-		   (const_int 8) (const_int 10)
-		   (const_int 12) (const_int 14)])))]
-  "ISA_HAS_LSX"
-  "vpickev.h\t%w0,%w2,%w1"
-  [(set_attr "type" "simd_permute")
-   (set_attr "mode" "V8HI")])
-
-(define_insn "lsx_vpickev_w"
-[(set (match_operand:V4SI 0 "register_operand" "=f")
-      (vec_select:V4SI
-	(vec_concat:V8SI
-	  (match_operand:V4SI 1 "register_operand" "f")
-	  (match_operand:V4SI 2 "register_operand" "f"))
-	(parallel [(const_int 0) (const_int 2)
-		   (const_int 4) (const_int 6)])))]
-  "ISA_HAS_LSX"
-  "vpickev.w\t%w0,%w2,%w1"
-  [(set_attr "type" "simd_permute")
-   (set_attr "mode" "V4SI")])
-
-(define_insn "lsx_vpickev_w_f"
-[(set (match_operand:V4SF 0 "register_operand" "=f")
-      (vec_select:V4SF
-	(vec_concat:V8SF
-	  (match_operand:V4SF 1 "register_operand" "f")
-	  (match_operand:V4SF 2 "register_operand" "f"))
-	(parallel [(const_int 0) (const_int 2)
-		   (const_int 4) (const_int 6)])))]
-  "ISA_HAS_LSX"
-  "vpickev.w\t%w0,%w2,%w1"
-  [(set_attr "type" "simd_permute")
-   (set_attr "mode" "V4SF")])
-
-(define_insn "lsx_vpickod_b"
-[(set (match_operand:V16QI 0 "register_operand" "=f")
-      (vec_select:V16QI
-	(vec_concat:V32QI
-	  (match_operand:V16QI 1 "register_operand" "f")
-	  (match_operand:V16QI 2 "register_operand" "f"))
-	(parallel [(const_int 1) (const_int 3)
-		   (const_int 5) (const_int 7)
-		   (const_int 9) (const_int 11)
-		   (const_int 13) (const_int 15)
-		   (const_int 17) (const_int 19)
-		   (const_int 21) (const_int 23)
-		   (const_int 25) (const_int 27)
-		   (const_int 29) (const_int 31)])))]
-  "ISA_HAS_LSX"
-  "vpickod.b\t%w0,%w2,%w1"
-  [(set_attr "type" "simd_permute")
-   (set_attr "mode" "V16QI")])
-
-(define_insn "lsx_vpickod_h"
-[(set (match_operand:V8HI 0 "register_operand" "=f")
-      (vec_select:V8HI
-	(vec_concat:V16HI
-	  (match_operand:V8HI 1 "register_operand" "f")
-	  (match_operand:V8HI 2 "register_operand" "f"))
-	(parallel [(const_int 1) (const_int 3)
-		   (const_int 5) (const_int 7)
-		   (const_int 9) (const_int 11)
-		   (const_int 13) (const_int 15)])))]
-  "ISA_HAS_LSX"
-  "vpickod.h\t%w0,%w2,%w1"
-  [(set_attr "type" "simd_permute")
-   (set_attr "mode" "V8HI")])
-
-(define_insn "lsx_vpickod_w"
-[(set (match_operand:V4SI 0 "register_operand" "=f")
-      (vec_select:V4SI
-	(vec_concat:V8SI
-	  (match_operand:V4SI 1 "register_operand" "f")
-	  (match_operand:V4SI 2 "register_operand" "f"))
-	(parallel [(const_int 1) (const_int 3)
-		   (const_int 5) (const_int 7)])))]
-  "ISA_HAS_LSX"
-  "vpickod.w\t%w0,%w2,%w1"
+;; Picking even/odd elements.
+(define_insn "lsx_pick_evod_<mode>"
+  [(set (match_operand:LSX 0 "register_operand" "=f")
+	(vec_select:LSX
+	  (vec_concat:<LVEC>
+	    (match_operand:LSX 1 "register_operand" "f")
+	    (match_operand:LSX 2 "register_operand" "f"))
+	  (match_operand:<LVEC> 3 "vect_par_cnst_even_or_odd_half")))]
+  "GET_MODE_SIZE (<UNITMODE>mode) != 8" ;; Use vilvl.d instead
+  "vpick%O3.<simdfmt_as_i>\t%<wu>0,%<wu>2,%<wu>1"
   [(set_attr "type" "simd_permute")
-   (set_attr "mode" "V4SI")])
+   (set_attr "mode" "<MODE>")])
 
-(define_insn "lsx_vpickod_w_f"
-[(set (match_operand:V4SF 0 "register_operand" "=f")
-      (vec_select:V4SF
-	(vec_concat:V8SF
-	  (match_operand:V4SF 1 "register_operand" "f")
-	  (match_operand:V4SF 2 "register_operand" "f"))
-	(parallel [(const_int 1) (const_int 3)
-		   (const_int 5) (const_int 7)])))]
-  "ISA_HAS_LSX"
-  "vpickod.w\t%w0,%w2,%w1"
-  [(set_attr "type" "simd_permute")
-   (set_attr "mode" "V4SF")])
+(define_expand "lsx_vpick<ev_od>_<simdfmt_as_i><_f>"
+  [(match_operand:LSX 0 "register_operand" "=f")
+   (match_operand:LSX 1 "register_operand" " f")
+   (match_operand:LSX 2 "register_operand" " f")
+   (const_int zero_one)]
+  "GET_MODE_SIZE (<UNITMODE>mode) != 8" ;; Use vilvl.d instead
+{
+  int nelts = GET_MODE_NUNITS (<MODE>mode);
+  rtx op3 = loongarch_gen_stepped_int_parallel (nelts, <zero_one>, 2);
+  rtx insn = gen_lsx_pick_evod_<mode> (operands[0], operands[1],
+				       operands[2], op3);
+  emit_insn (insn);
+  DONE;
+})
 
 (define_insn "popcount<mode>2"
   [(set (match_operand:ILSX 0 "register_operand" "=f")
@@ -1989,16 +1707,6 @@
   [(set_attr "type" "simd_shift")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "lsx_vsrari_<lsxfmt>"
-  [(set (match_operand:ILSX 0 "register_operand" "=f")
-	(unspec:ILSX [(match_operand:ILSX 1 "register_operand" "f")
-		      (match_operand 2 "const_<bitimm>_operand" "")]
-		     UNSPEC_LSX_VSRARI))]
-  "ISA_HAS_LSX"
-  "vsrari.<lsxfmt>\t%w0,%w1,%2"
-  [(set_attr "type" "simd_shift")
-   (set_attr "mode" "<MODE>")])
-
 (define_insn "lsx_vsrlr_<lsxfmt>"
   [(set (match_operand:ILSX 0 "register_operand" "=f")
 	(unspec:ILSX [(match_operand:ILSX 1 "register_operand" "f")
@@ -2009,16 +1717,6 @@
   [(set_attr "type" "simd_shift")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "lsx_vsrlri_<lsxfmt>"
-  [(set (match_operand:ILSX 0 "register_operand" "=f")
-	(unspec:ILSX [(match_operand:ILSX 1 "register_operand" "f")
-		      (match_operand 2 "const_<bitimm>_operand" "")]
-		     UNSPEC_LSX_VSRLRI))]
-  "ISA_HAS_LSX"
-  "vsrlri.<lsxfmt>\t%w0,%w1,%2"
-  [(set_attr "type" "simd_shift")
-   (set_attr "mode" "<MODE>")])
-
 (define_insn "lsx_vssub_s_<lsxfmt>"
   [(set (match_operand:ILSX 0 "register_operand" "=f")
 	(ss_minus:ILSX (match_operand:ILSX 1 "register_operand" "f")
@@ -2117,7 +1815,7 @@
   [(set_attr "type" "simd_fcvt")
    (set_attr "mode" "V4SF")])
 
-(define_insn "lsx_vfcvth_d_s"
+(define_insn "vec_unpacks_hi_v4sf"
   [(set (match_operand:V2DF 0 "register_operand" "=f")
 	(float_extend:V2DF
 	(vec_select:V2SF
@@ -2137,7 +1835,7 @@
   [(set_attr "type" "simd_fcvt")
    (set_attr "mode" "V4SF")])
 
-(define_insn "lsx_vfcvtl_d_s"
+(define_insn "vec_unpacks_lo_v4sf"
   [(set (match_operand:V2DF 0 "register_operand" "=f")
 	(float_extend:V2DF
 	(vec_select:V2SF
@@ -2715,23 +2413,23 @@
   [(set_attr "type" "simd_int_arith")
    (set_attr "mode" "V2DF")])
 
-(define_insn "lsx_vffint_s_l"
+(define_insn "vec_packs_float_v2di"
   [(set (match_operand:V4SF 0 "register_operand" "=f")
-	(unspec:V4SF [(match_operand:V2DI 1 "register_operand" "f")
-		      (match_operand:V2DI 2 "register_operand" "f")]
-		     UNSPEC_LSX_VFFINT_S_L))]
+	(vec_concat:V4SF
+	  (float:V2SF (match_operand:V2DI 1 "register_operand" "f"))
+	  (float:V2SF (match_operand:V2DI 2 "register_operand" "f"))))]
   "ISA_HAS_LSX"
-  "vffint.s.l\t%w0,%w1,%w2"
+  "vffint.s.l\t%w0,%w2,%w1"
   [(set_attr "type" "simd_int_arith")
    (set_attr "mode" "V2DI")])
 
-(define_insn "lsx_vftintrz_w_d"
+(define_insn "vec_pack_sfix_trunc_v2df"
   [(set (match_operand:V4SI 0 "register_operand" "=f")
-	(unspec:V4SI [(match_operand:V2DF 1 "register_operand" "f")
-		      (match_operand:V2DF 2 "register_operand" "f")]
-		     UNSPEC_LSX_VFTINTRZ_W_D))]
+	(vec_concat:V4SI
+	  (fix:V2SI (match_operand:V2DF 1 "register_operand" "f"))
+	  (fix:V2SI (match_operand:V2DF 2 "register_operand" "f"))))]
   "ISA_HAS_LSX"
-  "vftintrz.w.d\t%w0,%w1,%w2"
+  "vftintrz.w.d\t%w0,%w2,%w1"
   [(set_attr "type" "simd_int_arith")
    (set_attr "mode" "V2DF")])
 
@@ -2783,37 +2481,45 @@
   [(set_attr "type" "simd_shift")
    (set_attr "mode" "V4SF")])
 
-(define_insn "lsx_vffinth_d_w"
+(define_insn "vec_unpacks_float_hi_v4si"
   [(set (match_operand:V2DF 0 "register_operand" "=f")
-	(unspec:V2DF [(match_operand:V4SI 1 "register_operand" "f")]
-		     UNSPEC_LSX_VFFINTH_D_W))]
+	(float:V2DF
+	  (vec_select:V2SI
+	    (match_operand:V4SI 1 "register_operand" "f")
+	    (parallel [(const_int 2) (const_int 3)]))))]
   "ISA_HAS_LSX"
   "vffinth.d.w\t%w0,%w1"
   [(set_attr "type" "simd_shift")
    (set_attr "mode" "V4SI")])
 
-(define_insn "lsx_vffintl_d_w"
+(define_insn "vec_unpacks_float_lo_v4si"
   [(set (match_operand:V2DF 0 "register_operand" "=f")
-	(unspec:V2DF [(match_operand:V4SI 1 "register_operand" "f")]
-		     UNSPEC_LSX_VFFINTL_D_W))]
+	(float:V2DF
+	  (vec_select:V2SI
+	    (match_operand:V4SI 1 "register_operand" "f")
+	    (parallel [(const_int 0) (const_int 1)]))))]
   "ISA_HAS_LSX"
   "vffintl.d.w\t%w0,%w1"
   [(set_attr "type" "simd_shift")
    (set_attr "mode" "V4SI")])
 
-(define_insn "lsx_vftintrzh_l_s"
+(define_insn "vec_unpack_sfix_trunc_hi_v4sf"
   [(set (match_operand:V2DI 0 "register_operand" "=f")
-	(unspec:V2DI [(match_operand:V4SF 1 "register_operand" "f")]
-		     UNSPEC_LSX_VFTINTRZH_L_S))]
+	(fix:V2DI
+	  (vec_select:V2SF
+	    (match_operand:V4SF 1 "register_operand" "f")
+	    (parallel [(const_int 2) (const_int 3)]))))]
   "ISA_HAS_LSX"
   "vftintrzh.l.s\t%w0,%w1"
   [(set_attr "type" "simd_shift")
    (set_attr "mode" "V4SF")])
 
-(define_insn "lsx_vftintrzl_l_s"
+(define_insn "vec_unpack_sfix_trunc_lo_v4sf"
   [(set (match_operand:V2DI 0 "register_operand" "=f")
-	(unspec:V2DI [(match_operand:V4SF 1 "register_operand" "f")]
-		     UNSPEC_LSX_VFTINTRZL_L_S))]
+	(fix:V2DI
+	  (vec_select:V2SF
+	    (match_operand:V4SF 1 "register_operand" "f")
+	    (parallel [(const_int 0) (const_int 1)]))))]
   "ISA_HAS_LSX"
   "vftintrzl.l.s\t%w0,%w1"
   [(set_attr "type" "simd_shift")
@@ -3039,18 +2745,6 @@
   [(set_attr "type" "simd_shf")
    (set_attr "mode" "V16QI")])
 
-(define_insn "lsx_vldx"
-  [(set (match_operand:V16QI 0 "register_operand" "=f")
-	(unspec:V16QI [(match_operand:DI 1 "register_operand" "r")
-		       (match_operand:DI 2 "reg_or_0_operand" "rJ")]
-		      UNSPEC_LSX_VLDX))]
-  "ISA_HAS_LSX"
-{
-  return "vldx\t%w0,%1,%z2";
-}
-  [(set_attr "type" "simd_load")
-   (set_attr "mode" "V16QI")])
-
 (define_insn "lsx_vstx"
   [(set (mem:V16QI (plus:DI (match_operand:DI 1 "register_operand" "r")
 			    (match_operand:DI 2 "reg_or_0_operand" "rJ")))
@@ -3092,9 +2786,11 @@
    (match_operand:V2DI 1 "register_operand")]
   "ISA_HAS_LSX"
 {
-  rtx tmp = gen_reg_rtx (V2DImode);
+  rtx tmp = gen_reg_rtx (V1TImode);
   emit_insn (gen_lsx_vhaddw_q_d (tmp, operands[1], operands[1]));
-  emit_insn (gen_vec_extractv2didi (operands[0], tmp, const0_rtx));
+  emit_insn (gen_vec_extractv2didi (operands[0],
+				    gen_lowpart (V2DImode, tmp),
+				    const0_rtx));
   DONE;
 })
 
@@ -3104,7 +2800,7 @@
   "ISA_HAS_LSX"
 {
   rtx tmp = gen_reg_rtx (V2DImode);
-  rtx tmp1 = gen_reg_rtx (V2DImode);
+  rtx tmp1 = gen_reg_rtx (V1TImode);
   emit_insn (gen_lsx_vhaddw_d_w (tmp, operands[1], operands[1]));
   emit_insn (gen_lsx_vhaddw_q_d (tmp1, tmp, tmp));
   emit_insn (gen_vec_extractv4sisi (operands[0], gen_lowpart (V4SImode,tmp1),
@@ -3239,7 +2935,7 @@
   rtx t1 = gen_reg_rtx (V16QImode);
   rtx t2 = gen_reg_rtx (V8HImode);
   rtx t3 = gen_reg_rtx (V4SImode);
-  emit_insn (gen_lsx_vabsd_u_bu (t1, operands[1], operands[2]));
+  emit_insn (gen_uabdv16qi3 (t1, operands[1], operands[2]));
   emit_insn (gen_lsx_vhaddw_hu_bu (t2, t1, t1));
   emit_insn (gen_lsx_vhaddw_wu_hu (t3, t2, t2));
   emit_insn (gen_addv4si3 (operands[0], t3, operands[3]));
@@ -3256,751 +2952,13 @@
   rtx t1 = gen_reg_rtx (V16QImode);
   rtx t2 = gen_reg_rtx (V8HImode);
   rtx t3 = gen_reg_rtx (V4SImode);
-  emit_insn (gen_lsx_vabsd_s_b (t1, operands[1], operands[2]));
+  emit_insn (gen_sabdv16qi3 (t1, operands[1], operands[2]));
   emit_insn (gen_lsx_vhaddw_hu_bu (t2, t1, t1));
   emit_insn (gen_lsx_vhaddw_wu_hu (t3, t2, t2));
   emit_insn (gen_addv4si3 (operands[0], t3, operands[3]));
   DONE;
 })
 
-(define_insn "lsx_v<optab>wev_d_w<u>"
-  [(set (match_operand:V2DI 0 "register_operand" "=f")
-	(addsubmul:V2DI
-	  (any_extend:V2DI
-	    (vec_select:V2SI
-	      (match_operand:V4SI 1 "register_operand" "%f")
-	      (parallel [(const_int 0) (const_int 2)])))
-	  (any_extend:V2DI
-	    (vec_select:V2SI
-	      (match_operand:V4SI 2 "register_operand" "f")
-	      (parallel [(const_int 0) (const_int 2)])))))]
-  "ISA_HAS_LSX"
-  "v<optab>wev.d.w<u>\t%w0,%w1,%w2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V2DI")])
-
-(define_insn "lsx_v<optab>wev_w_h<u>"
-  [(set (match_operand:V4SI 0 "register_operand" "=f")
-	(addsubmul:V4SI
-	  (any_extend:V4SI
-	    (vec_select:V4HI
-	      (match_operand:V8HI 1 "register_operand" "%f")
-	      (parallel [(const_int 0) (const_int 2)
-			 (const_int 4) (const_int 6)])))
-	  (any_extend:V4SI
-	    (vec_select:V4HI
-	      (match_operand:V8HI 2 "register_operand" "f")
-	      (parallel [(const_int 0) (const_int 2)
-			 (const_int 4) (const_int 6)])))))]
-  "ISA_HAS_LSX"
-  "v<optab>wev.w.h<u>\t%w0,%w1,%w2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V4SI")])
-
-(define_insn "lsx_v<optab>wev_h_b<u>"
-  [(set (match_operand:V8HI 0 "register_operand" "=f")
-	(addsubmul:V8HI
-	  (any_extend:V8HI
-	    (vec_select:V8QI
-	      (match_operand:V16QI 1 "register_operand" "%f")
-	      (parallel [(const_int 0) (const_int 2)
-			 (const_int 4) (const_int 6)
-			 (const_int 8) (const_int 10)
-			 (const_int 12) (const_int 14)])))
-	  (any_extend:V8HI
-	    (vec_select:V8QI
-	      (match_operand:V16QI 2 "register_operand" "f")
-	      (parallel [(const_int 0) (const_int 2)
-			 (const_int 4) (const_int 6)
-			 (const_int 8) (const_int 10)
-			 (const_int 12) (const_int 14)])))))]
-  "ISA_HAS_LSX"
-  "v<optab>wev.h.b<u>\t%w0,%w1,%w2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V8HI")])
-
-(define_insn "lsx_v<optab>wod_d_w<u>"
-  [(set (match_operand:V2DI 0 "register_operand" "=f")
-	(addsubmul:V2DI
-	  (any_extend:V2DI
-	    (vec_select:V2SI
-	      (match_operand:V4SI 1 "register_operand" "%f")
-	      (parallel [(const_int 1) (const_int 3)])))
-	  (any_extend:V2DI
-	    (vec_select:V2SI
-	      (match_operand:V4SI 2 "register_operand" "f")
-	      (parallel [(const_int 1) (const_int 3)])))))]
-  "ISA_HAS_LSX"
-  "v<optab>wod.d.w<u>\t%w0,%w1,%w2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V2DI")])
-
-(define_insn "lsx_v<optab>wod_w_h<u>"
-  [(set (match_operand:V4SI 0 "register_operand" "=f")
-	(addsubmul:V4SI
-	  (any_extend:V4SI
-	    (vec_select:V4HI
-	      (match_operand:V8HI 1 "register_operand" "%f")
-	      (parallel [(const_int 1) (const_int 3)
-			 (const_int 5) (const_int 7)])))
-	  (any_extend:V4SI
-	    (vec_select:V4HI
-	      (match_operand:V8HI 2 "register_operand" "f")
-	      (parallel [(const_int 1) (const_int 3)
-			 (const_int 5) (const_int 7)])))))]
-  "ISA_HAS_LSX"
-  "v<optab>wod.w.h<u>\t%w0,%w1,%w2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V4SI")])
-
-(define_insn "lsx_v<optab>wod_h_b<u>"
-  [(set (match_operand:V8HI 0 "register_operand" "=f")
-	(addsubmul:V8HI
-	  (any_extend:V8HI
-	    (vec_select:V8QI
-	      (match_operand:V16QI 1 "register_operand" "%f")
-	      (parallel [(const_int 1) (const_int 3)
-			 (const_int 5) (const_int 7)
-			 (const_int 9) (const_int 11)
-			 (const_int 13) (const_int 15)])))
-	  (any_extend:V8HI
-	    (vec_select:V8QI
-	      (match_operand:V16QI 2 "register_operand" "f")
-	      (parallel [(const_int 1) (const_int 3)
-			 (const_int 5) (const_int 7)
-			 (const_int 9) (const_int 11)
-			 (const_int 13) (const_int 15)])))))]
-  "ISA_HAS_LSX"
-  "v<optab>wod.h.b<u>\t%w0,%w1,%w2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V8HI")])
-
-(define_insn "lsx_v<optab>wev_d_wu_w"
-  [(set (match_operand:V2DI 0 "register_operand" "=f")
-	(addmul:V2DI
-	  (zero_extend:V2DI
-	    (vec_select:V2SI
-	      (match_operand:V4SI 1 "register_operand" "%f")
-	      (parallel [(const_int 0) (const_int 2)])))
-	  (sign_extend:V2DI
-	    (vec_select:V2SI
-	      (match_operand:V4SI 2 "register_operand" "f")
-	      (parallel [(const_int 0) (const_int 2)])))))]
-  "ISA_HAS_LSX"
-  "v<optab>wev.d.wu.w\t%w0,%w1,%w2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V2DI")])
-
-(define_insn "lsx_v<optab>wev_w_hu_h"
-  [(set (match_operand:V4SI 0 "register_operand" "=f")
-	(addmul:V4SI
-	  (zero_extend:V4SI
-	    (vec_select:V4HI
-	      (match_operand:V8HI 1 "register_operand" "%f")
-	      (parallel [(const_int 0) (const_int 2)
-			 (const_int 4) (const_int 6)])))
-	  (sign_extend:V4SI
-	    (vec_select:V4HI
-	      (match_operand:V8HI 2 "register_operand" "f")
-	      (parallel [(const_int 0) (const_int 2)
-			 (const_int 4) (const_int 6)])))))]
-  "ISA_HAS_LSX"
-  "v<optab>wev.w.hu.h\t%w0,%w1,%w2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V4SI")])
-
-(define_insn "lsx_v<optab>wev_h_bu_b"
-  [(set (match_operand:V8HI 0 "register_operand" "=f")
-	(addmul:V8HI
-	  (zero_extend:V8HI
-	    (vec_select:V8QI
-	      (match_operand:V16QI 1 "register_operand" "%f")
-	      (parallel [(const_int 0) (const_int 2)
-			 (const_int 4) (const_int 6)
-			 (const_int 8) (const_int 10)
-			 (const_int 12) (const_int 14)])))
-	  (sign_extend:V8HI
-	    (vec_select:V8QI
-	      (match_operand:V16QI 2 "register_operand" "f")
-	      (parallel [(const_int 0) (const_int 2)
-			 (const_int 4) (const_int 6)
-			 (const_int 8) (const_int 10)
-			 (const_int 12) (const_int 14)])))))]
-  "ISA_HAS_LSX"
-  "v<optab>wev.h.bu.b\t%w0,%w1,%w2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V8HI")])
-
-(define_insn "lsx_v<optab>wod_d_wu_w"
-  [(set (match_operand:V2DI 0 "register_operand" "=f")
-	(addmul:V2DI
-	  (zero_extend:V2DI
-	    (vec_select:V2SI
-	      (match_operand:V4SI 1 "register_operand" "%f")
-	      (parallel [(const_int 1) (const_int 3)])))
-	  (sign_extend:V2DI
-	    (vec_select:V2SI
-	      (match_operand:V4SI 2 "register_operand" "f")
-	      (parallel [(const_int 1) (const_int 3)])))))]
-  "ISA_HAS_LSX"
-  "v<optab>wod.d.wu.w\t%w0,%w1,%w2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V2DI")])
-
-(define_insn "lsx_v<optab>wod_w_hu_h"
-  [(set (match_operand:V4SI 0 "register_operand" "=f")
-	(addmul:V4SI
-	  (zero_extend:V4SI
-	    (vec_select:V4HI
-	      (match_operand:V8HI 1 "register_operand" "%f")
-	      (parallel [(const_int 1) (const_int 3)
-			 (const_int 5) (const_int 7)])))
-	  (sign_extend:V4SI
-	    (vec_select:V4HI
-	      (match_operand:V8HI 2 "register_operand" "f")
-	      (parallel [(const_int 1) (const_int 3)
-			 (const_int 5) (const_int 7)])))))]
-  "ISA_HAS_LSX"
-  "v<optab>wod.w.hu.h\t%w0,%w1,%w2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V4SI")])
-
-(define_insn "lsx_v<optab>wod_h_bu_b"
-  [(set (match_operand:V8HI 0 "register_operand" "=f")
-	(addmul:V8HI
-	  (zero_extend:V8HI
-	    (vec_select:V8QI
-	      (match_operand:V16QI 1 "register_operand" "%f")
-	      (parallel [(const_int 1) (const_int 3)
-			 (const_int 5) (const_int 7)
-			 (const_int 9) (const_int 11)
-			 (const_int 13) (const_int 15)])))
-	  (sign_extend:V8HI
-	    (vec_select:V8QI
-	      (match_operand:V16QI 2 "register_operand" "f")
-	      (parallel [(const_int 1) (const_int 3)
-			 (const_int 5) (const_int 7)
-			 (const_int 9) (const_int 11)
-			 (const_int 13) (const_int 15)])))))]
-  "ISA_HAS_LSX"
-  "v<optab>wod.h.bu.b\t%w0,%w1,%w2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V8HI")])
-
-(define_insn "lsx_vaddwev_q_d"
-  [(set (match_operand:V2DI 0 "register_operand" "=f")
-	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "f")
-		      (match_operand:V2DI 2 "register_operand" "f")]
-		     UNSPEC_LSX_VADDWEV))]
-  "ISA_HAS_LSX"
-  "vaddwev.q.d\t%w0,%w1,%w2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V2DI")])
-
-(define_insn "lsx_vaddwev_q_du"
-  [(set (match_operand:V2DI 0 "register_operand" "=f")
-	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "f")
-		      (match_operand:V2DI 2 "register_operand" "f")]
-		     UNSPEC_LSX_VADDWEV2))]
-  "ISA_HAS_LSX"
-  "vaddwev.q.du\t%w0,%w1,%w2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V2DI")])
-
-(define_insn "lsx_vaddwod_q_d"
-  [(set (match_operand:V2DI 0 "register_operand" "=f")
-	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "f")
-		      (match_operand:V2DI 2 "register_operand" "f")]
-		     UNSPEC_LSX_VADDWOD))]
-  "ISA_HAS_LSX"
-  "vaddwod.q.d\t%w0,%w1,%w2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V2DI")])
-
-(define_insn "lsx_vaddwod_q_du"
-  [(set (match_operand:V2DI 0 "register_operand" "=f")
-	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "f")
-		      (match_operand:V2DI 2 "register_operand" "f")]
-		     UNSPEC_LSX_VADDWOD2))]
-  "ISA_HAS_LSX"
-  "vaddwod.q.du\t%w0,%w1,%w2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V2DI")])
-
-(define_insn "lsx_vsubwev_q_d"
-  [(set (match_operand:V2DI 0 "register_operand" "=f")
-	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "f")
-		      (match_operand:V2DI 2 "register_operand" "f")]
-		     UNSPEC_LSX_VSUBWEV))]
-  "ISA_HAS_LSX"
-  "vsubwev.q.d\t%w0,%w1,%w2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V2DI")])
-
-(define_insn "lsx_vsubwev_q_du"
-  [(set (match_operand:V2DI 0 "register_operand" "=f")
-	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "f")
-		      (match_operand:V2DI 2 "register_operand" "f")]
-		     UNSPEC_LSX_VSUBWEV2))]
-  "ISA_HAS_LSX"
-  "vsubwev.q.du\t%w0,%w1,%w2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V2DI")])
-
-(define_insn "lsx_vsubwod_q_d"
-  [(set (match_operand:V2DI 0 "register_operand" "=f")
-	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "f")
-		      (match_operand:V2DI 2 "register_operand" "f")]
-		     UNSPEC_LSX_VSUBWOD))]
-  "ISA_HAS_LSX"
-  "vsubwod.q.d\t%w0,%w1,%w2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V2DI")])
-
-(define_insn "lsx_vsubwod_q_du"
-  [(set (match_operand:V2DI 0 "register_operand" "=f")
-	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "f")
-		      (match_operand:V2DI 2 "register_operand" "f")]
-		     UNSPEC_LSX_VSUBWOD2))]
-  "ISA_HAS_LSX"
-  "vsubwod.q.du\t%w0,%w1,%w2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V2DI")])
-
-(define_insn "lsx_vaddwev_q_du_d"
-  [(set (match_operand:V2DI 0 "register_operand" "=f")
-	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "f")
-		      (match_operand:V2DI 2 "register_operand" "f")]
-		     UNSPEC_LSX_VADDWEV3))]
-  "ISA_HAS_LSX"
-  "vaddwev.q.du.d\t%w0,%w1,%w2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V2DI")])
-
-(define_insn "lsx_vaddwod_q_du_d"
-  [(set (match_operand:V2DI 0 "register_operand" "=f")
-	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "f")
-		      (match_operand:V2DI 2 "register_operand" "f")]
-		     UNSPEC_LSX_VADDWOD3))]
-  "ISA_HAS_LSX"
-  "vaddwod.q.du.d\t%w0,%w1,%w2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V2DI")])
-
-(define_insn "lsx_vmulwev_q_du_d"
-  [(set (match_operand:V2DI 0 "register_operand" "=f")
-	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "f")
-		      (match_operand:V2DI 2 "register_operand" "f")]
-		     UNSPEC_LSX_VMULWEV3))]
-  "ISA_HAS_LSX"
-  "vmulwev.q.du.d\t%w0,%w1,%w2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V2DI")])
-
-(define_insn "lsx_vmulwod_q_du_d"
-  [(set (match_operand:V2DI 0 "register_operand" "=f")
-	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "f")
-		      (match_operand:V2DI 2 "register_operand" "f")]
-		     UNSPEC_LSX_VMULWOD3))]
-  "ISA_HAS_LSX"
-  "vmulwod.q.du.d\t%w0,%w1,%w2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V2DI")])
-
-(define_insn "lsx_vmulwev_q_d"
-  [(set (match_operand:V2DI 0 "register_operand" "=f")
-	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "f")
-		      (match_operand:V2DI 2 "register_operand" "f")]
-		     UNSPEC_LSX_VMULWEV))]
-  "ISA_HAS_LSX"
-  "vmulwev.q.d\t%w0,%w1,%w2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V2DI")])
-
-(define_insn "lsx_vmulwev_q_du"
-  [(set (match_operand:V2DI 0 "register_operand" "=f")
-	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "f")
-		      (match_operand:V2DI 2 "register_operand" "f")]
-		     UNSPEC_LSX_VMULWEV2))]
-  "ISA_HAS_LSX"
-  "vmulwev.q.du\t%w0,%w1,%w2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V2DI")])
-
-(define_insn "lsx_vmulwod_q_d"
-  [(set (match_operand:V2DI 0 "register_operand" "=f")
-	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "f")
-		      (match_operand:V2DI 2 "register_operand" "f")]
-		     UNSPEC_LSX_VMULWOD))]
-  "ISA_HAS_LSX"
-  "vmulwod.q.d\t%w0,%w1,%w2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V2DI")])
-
-(define_insn "lsx_vmulwod_q_du"
-  [(set (match_operand:V2DI 0 "register_operand" "=f")
-	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "f")
-		      (match_operand:V2DI 2 "register_operand" "f")]
-		     UNSPEC_LSX_VMULWOD2))]
-  "ISA_HAS_LSX"
-  "vmulwod.q.du\t%w0,%w1,%w2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V2DI")])
-
-(define_insn "lsx_vhaddw_q_d"
-  [(set (match_operand:V2DI 0 "register_operand" "=f")
-	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "f")
-		      (match_operand:V2DI 2 "register_operand" "f")]
-		     UNSPEC_LSX_VHADDW_Q_D))]
-  "ISA_HAS_LSX"
-  "vhaddw.q.d\t%w0,%w1,%w2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V2DI")])
-
-(define_insn "lsx_vhaddw_qu_du"
-  [(set (match_operand:V2DI 0 "register_operand" "=f")
-	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "f")
-		      (match_operand:V2DI 2 "register_operand" "f")]
-		     UNSPEC_LSX_VHADDW_QU_DU))]
-  "ISA_HAS_LSX"
-  "vhaddw.qu.du\t%w0,%w1,%w2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V2DI")])
-
-(define_insn "lsx_vhsubw_q_d"
-  [(set (match_operand:V2DI 0 "register_operand" "=f")
-	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "f")
-		      (match_operand:V2DI 2 "register_operand" "f")]
-		     UNSPEC_LSX_VHSUBW_Q_D))]
-  "ISA_HAS_LSX"
-  "vhsubw.q.d\t%w0,%w1,%w2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V2DI")])
-
-(define_insn "lsx_vhsubw_qu_du"
-  [(set (match_operand:V2DI 0 "register_operand" "=f")
-	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "f")
-		      (match_operand:V2DI 2 "register_operand" "f")]
-		     UNSPEC_LSX_VHSUBW_QU_DU))]
-  "ISA_HAS_LSX"
-  "vhsubw.qu.du\t%w0,%w1,%w2"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V2DI")])
-
-(define_insn "lsx_vmaddwev_d_w<u>"
-  [(set (match_operand:V2DI 0 "register_operand" "=f")
-	(plus:V2DI
-	  (match_operand:V2DI 1 "register_operand" "0")
-	  (mult:V2DI
-	    (any_extend:V2DI
-	      (vec_select:V2SI
-		(match_operand:V4SI 2 "register_operand" "%f")
-		(parallel [(const_int 0) (const_int 2)])))
-	    (any_extend:V2DI
-	      (vec_select:V2SI
-		(match_operand:V4SI 3 "register_operand" "f")
-		(parallel [(const_int 0) (const_int 2)]))))))]
-  "ISA_HAS_LSX"
-  "vmaddwev.d.w<u>\t%w0,%w2,%w3"
-  [(set_attr "type" "simd_fmadd")
-   (set_attr "mode" "V2DI")])
-
-(define_insn "lsx_vmaddwev_w_h<u>"
-  [(set (match_operand:V4SI 0 "register_operand" "=f")
-	(plus:V4SI
-	  (match_operand:V4SI 1 "register_operand" "0")
-	  (mult:V4SI
-	    (any_extend:V4SI
-	      (vec_select:V4HI
-		(match_operand:V8HI 2 "register_operand" "%f")
-		(parallel [(const_int 0) (const_int 2)
-			   (const_int 4) (const_int 6)])))
-	    (any_extend:V4SI
-	      (vec_select:V4HI
-		(match_operand:V8HI 3 "register_operand" "f")
-		(parallel [(const_int 0) (const_int 2)
-			   (const_int 4) (const_int 6)]))))))]
-  "ISA_HAS_LSX"
-  "vmaddwev.w.h<u>\t%w0,%w2,%w3"
-  [(set_attr "type" "simd_fmadd")
-   (set_attr "mode" "V4SI")])
-
-(define_insn "lsx_vmaddwev_h_b<u>"
-  [(set (match_operand:V8HI 0 "register_operand" "=f")
-	(plus:V8HI
-	  (match_operand:V8HI 1 "register_operand" "0")
-	  (mult:V8HI
-	    (any_extend:V8HI
-	      (vec_select:V8QI
-		(match_operand:V16QI 2 "register_operand" "%f")
-		(parallel [(const_int 0) (const_int 2)
-			   (const_int 4) (const_int 6)
-			   (const_int 8) (const_int 10)
-			   (const_int 12) (const_int 14)])))
-	    (any_extend:V8HI
-	      (vec_select:V8QI
-		(match_operand:V16QI 3 "register_operand" "f")
-		(parallel [(const_int 0) (const_int 2)
-			   (const_int 4) (const_int 6)
-			   (const_int 8) (const_int 10)
-			   (const_int 12) (const_int 14)]))))))]
-  "ISA_HAS_LSX"
-  "vmaddwev.h.b<u>\t%w0,%w2,%w3"
-  [(set_attr "type" "simd_fmadd")
-   (set_attr "mode" "V8HI")])
-
-(define_insn "lsx_vmaddwod_d_w<u>"
-  [(set (match_operand:V2DI 0 "register_operand" "=f")
-	(plus:V2DI
-	  (match_operand:V2DI 1 "register_operand" "0")
-	  (mult:V2DI
-	    (any_extend:V2DI
-	      (vec_select:V2SI
-		(match_operand:V4SI 2 "register_operand" "%f")
-		(parallel [(const_int 1) (const_int 3)])))
-	    (any_extend:V2DI
-	      (vec_select:V2SI
-		(match_operand:V4SI 3 "register_operand" "f")
-		(parallel [(const_int 1) (const_int 3)]))))))]
-  "ISA_HAS_LSX"
-  "vmaddwod.d.w<u>\t%w0,%w2,%w3"
-  [(set_attr "type" "simd_fmadd")
-   (set_attr "mode" "V2DI")])
-
-(define_insn "lsx_vmaddwod_w_h<u>"
-  [(set (match_operand:V4SI 0 "register_operand" "=f")
-	(plus:V4SI
-	  (match_operand:V4SI 1 "register_operand" "0")
-	  (mult:V4SI
-	    (any_extend:V4SI
-	      (vec_select:V4HI
-		(match_operand:V8HI 2 "register_operand" "%f")
-		(parallel [(const_int 1) (const_int 3)
-			   (const_int 5) (const_int 7)])))
-	    (any_extend:V4SI
-	      (vec_select:V4HI
-		(match_operand:V8HI 3 "register_operand" "f")
-		(parallel [(const_int 1) (const_int 3)
-			   (const_int 5) (const_int 7)]))))))]
-  "ISA_HAS_LSX"
-  "vmaddwod.w.h<u>\t%w0,%w2,%w3"
-  [(set_attr "type" "simd_fmadd")
-   (set_attr "mode" "V4SI")])
-
-(define_insn "lsx_vmaddwod_h_b<u>"
-  [(set (match_operand:V8HI 0 "register_operand" "=f")
-	(plus:V8HI
-	  (match_operand:V8HI 1 "register_operand" "0")
-	  (mult:V8HI
-	    (any_extend:V8HI
-	      (vec_select:V8QI
-		(match_operand:V16QI 2 "register_operand" "%f")
-		(parallel [(const_int 1) (const_int 3)
-			   (const_int 5) (const_int 7)
-			   (const_int 9) (const_int 11)
-			   (const_int 13) (const_int 15)])))
-	    (any_extend:V8HI
-	      (vec_select:V8QI
-		(match_operand:V16QI 3 "register_operand" "f")
-		(parallel [(const_int 1) (const_int 3)
-			   (const_int 5) (const_int 7)
-			   (const_int 9) (const_int 11)
-			   (const_int 13) (const_int 15)]))))))]
-  "ISA_HAS_LSX"
-  "vmaddwod.h.b<u>\t%w0,%w2,%w3"
-  [(set_attr "type" "simd_fmadd")
-   (set_attr "mode" "V8HI")])
-
-(define_insn "lsx_vmaddwev_d_wu_w"
-  [(set (match_operand:V2DI 0 "register_operand" "=f")
-	(plus:V2DI
-	  (match_operand:V2DI 1 "register_operand" "0")
-	  (mult:V2DI
-	    (zero_extend:V2DI
-	      (vec_select:V2SI
-		(match_operand:V4SI 2 "register_operand" "%f")
-		(parallel [(const_int 0) (const_int 2)])))
-	    (sign_extend:V2DI
-	      (vec_select:V2SI
-		(match_operand:V4SI 3 "register_operand" "f")
-		(parallel [(const_int 0) (const_int 2)]))))))]
-  "ISA_HAS_LSX"
-  "vmaddwev.d.wu.w\t%w0,%w2,%w3"
-  [(set_attr "type" "simd_fmadd")
-   (set_attr "mode" "V2DI")])
-
-(define_insn "lsx_vmaddwev_w_hu_h"
-  [(set (match_operand:V4SI 0 "register_operand" "=f")
-	(plus:V4SI
-	  (match_operand:V4SI 1 "register_operand" "0")
-	  (mult:V4SI
-	    (zero_extend:V4SI
-	      (vec_select:V4HI
-		(match_operand:V8HI 2 "register_operand" "%f")
-		(parallel [(const_int 0) (const_int 2)
-			   (const_int 4) (const_int 6)])))
-	    (sign_extend:V4SI
-	      (vec_select:V4HI
-		(match_operand:V8HI 3 "register_operand" "f")
-		(parallel [(const_int 0) (const_int 2)
-			   (const_int 4) (const_int 6)]))))))]
-  "ISA_HAS_LSX"
-  "vmaddwev.w.hu.h\t%w0,%w2,%w3"
-  [(set_attr "type" "simd_fmadd")
-   (set_attr "mode" "V4SI")])
-
-(define_insn "lsx_vmaddwev_h_bu_b"
-  [(set (match_operand:V8HI 0 "register_operand" "=f")
-	(plus:V8HI
-	  (match_operand:V8HI 1 "register_operand" "0")
-	  (mult:V8HI
-	    (zero_extend:V8HI
-	      (vec_select:V8QI
-		(match_operand:V16QI 2 "register_operand" "%f")
-		(parallel [(const_int 0) (const_int 2)
-			   (const_int 4) (const_int 6)
-			   (const_int 8) (const_int 10)
-			   (const_int 12) (const_int 14)])))
-	    (sign_extend:V8HI
-	      (vec_select:V8QI
-		(match_operand:V16QI 3 "register_operand" "f")
-		(parallel [(const_int 0) (const_int 2)
-			   (const_int 4) (const_int 6)
-			   (const_int 8) (const_int 10)
-			   (const_int 12) (const_int 14)]))))))]
-  "ISA_HAS_LSX"
-  "vmaddwev.h.bu.b\t%w0,%w2,%w3"
-  [(set_attr "type" "simd_fmadd")
-   (set_attr "mode" "V8HI")])
-
-(define_insn "lsx_vmaddwod_d_wu_w"
-  [(set (match_operand:V2DI 0 "register_operand" "=f")
-	(plus:V2DI
-	  (match_operand:V2DI 1 "register_operand" "0")
-	  (mult:V2DI
-	    (zero_extend:V2DI
-	      (vec_select:V2SI
-		(match_operand:V4SI 2 "register_operand" "%f")
-		(parallel [(const_int 1) (const_int 3)])))
-	    (sign_extend:V2DI
-	      (vec_select:V2SI
-		(match_operand:V4SI 3 "register_operand" "f")
-		(parallel [(const_int 1) (const_int 3)]))))))]
-  "ISA_HAS_LSX"
-  "vmaddwod.d.wu.w\t%w0,%w2,%w3"
-  [(set_attr "type" "simd_fmadd")
-   (set_attr "mode" "V2DI")])
-
-(define_insn "lsx_vmaddwod_w_hu_h"
-  [(set (match_operand:V4SI 0 "register_operand" "=f")
-	(plus:V4SI
-	  (match_operand:V4SI 1 "register_operand" "0")
-	  (mult:V4SI
-	    (zero_extend:V4SI
-	      (vec_select:V4HI
-		(match_operand:V8HI 2 "register_operand" "%f")
-		(parallel [(const_int 1) (const_int 3)
-			   (const_int 5) (const_int 7)])))
-	    (sign_extend:V4SI
-	      (vec_select:V4HI
-		(match_operand:V8HI 3 "register_operand" "f")
-		(parallel [(const_int 1) (const_int 3)
-			   (const_int 5) (const_int 7)]))))))]
-  "ISA_HAS_LSX"
-  "vmaddwod.w.hu.h\t%w0,%w2,%w3"
-  [(set_attr "type" "simd_fmadd")
-   (set_attr "mode" "V4SI")])
-
-(define_insn "lsx_vmaddwod_h_bu_b"
-  [(set (match_operand:V8HI 0 "register_operand" "=f")
-	(plus:V8HI
-	  (match_operand:V8HI 1 "register_operand" "0")
-	  (mult:V8HI
-	    (zero_extend:V8HI
-	      (vec_select:V8QI
-		(match_operand:V16QI 2 "register_operand" "%f")
-		(parallel [(const_int 1) (const_int 3)
-			   (const_int 5) (const_int 7)
-			   (const_int 9) (const_int 11)
-			   (const_int 13) (const_int 15)])))
-	    (sign_extend:V8HI
-	      (vec_select:V8QI
-		(match_operand:V16QI 3 "register_operand" "f")
-		(parallel [(const_int 1) (const_int 3)
-			   (const_int 5) (const_int 7)
-			   (const_int 9) (const_int 11)
-			   (const_int 13) (const_int 15)]))))))]
-  "ISA_HAS_LSX"
-  "vmaddwod.h.bu.b\t%w0,%w2,%w3"
-  [(set_attr "type" "simd_fmadd")
-   (set_attr "mode" "V8HI")])
-
-(define_insn "lsx_vmaddwev_q_d"
-  [(set (match_operand:V2DI 0 "register_operand" "=f")
-	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
-		      (match_operand:V2DI 2 "register_operand" "f")
-		      (match_operand:V2DI 3 "register_operand" "f")]
-		     UNSPEC_LSX_VMADDWEV))]
-  "ISA_HAS_LSX"
-  "vmaddwev.q.d\t%w0,%w2,%w3"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V2DI")])
-
-(define_insn "lsx_vmaddwod_q_d"
-  [(set (match_operand:V2DI 0 "register_operand" "=f")
-	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
-		      (match_operand:V2DI 2 "register_operand" "f")
-		      (match_operand:V2DI 3 "register_operand" "f")]
-		     UNSPEC_LSX_VMADDWOD))]
-  "ISA_HAS_LSX"
-  "vmaddwod.q.d\t%w0,%w2,%w3"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V2DI")])
-
-(define_insn "lsx_vmaddwev_q_du"
-  [(set (match_operand:V2DI 0 "register_operand" "=f")
-	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
-		      (match_operand:V2DI 2 "register_operand" "f")
-		      (match_operand:V2DI 3 "register_operand" "f")]
-		     UNSPEC_LSX_VMADDWEV2))]
-  "ISA_HAS_LSX"
-  "vmaddwev.q.du\t%w0,%w2,%w3"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V2DI")])
-
-(define_insn "lsx_vmaddwod_q_du"
-  [(set (match_operand:V2DI 0 "register_operand" "=f")
-	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
-		      (match_operand:V2DI 2 "register_operand" "f")
-		      (match_operand:V2DI 3 "register_operand" "f")]
-		     UNSPEC_LSX_VMADDWOD2))]
-  "ISA_HAS_LSX"
-  "vmaddwod.q.du\t%w0,%w2,%w3"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V2DI")])
-
-(define_insn "lsx_vmaddwev_q_du_d"
-  [(set (match_operand:V2DI 0 "register_operand" "=f")
-	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
-		      (match_operand:V2DI 2 "register_operand" "f")
-		      (match_operand:V2DI 3 "register_operand" "f")]
-		     UNSPEC_LSX_VMADDWEV3))]
-  "ISA_HAS_LSX"
-  "vmaddwev.q.du.d\t%w0,%w2,%w3"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V2DI")])
-
-(define_insn "lsx_vmaddwod_q_du_d"
-  [(set (match_operand:V2DI 0 "register_operand" "=f")
-	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "0")
-		      (match_operand:V2DI 2 "register_operand" "f")
-		      (match_operand:V2DI 3 "register_operand" "f")]
-		     UNSPEC_LSX_VMADDWOD3))]
-  "ISA_HAS_LSX"
-  "vmaddwod.q.du.d\t%w0,%w2,%w3"
-  [(set_attr "type" "simd_int_arith")
-   (set_attr "mode" "V2DI")])
-
 (define_insn "lsx_vadd_q"
   [(set (match_operand:V2DI 0 "register_operand" "=f")
 	(unspec:V2DI [(match_operand:V2DI 1 "register_operand" "f")
@@ -4039,7 +2997,7 @@
   [(set_attr "type" "simd_bit")
    (set_attr "mode" "V16QI")])
 
-(define_insn "lsx_vexth_h<u>_b<u>"
+(define_insn "vec_unpack<su>_hi_v16qi"
   [(set (match_operand:V8HI 0 "register_operand" "=f")
 	(any_extend:V8HI
 	  (vec_select:V8QI
@@ -4053,7 +3011,7 @@
   [(set_attr "type" "simd_fcvt")
    (set_attr "mode" "V8HI")])
 
-(define_insn "lsx_vexth_w<u>_h<u>"
+(define_insn "vec_unpack<su>_hi_v8hi"
   [(set (match_operand:V4SI 0 "register_operand" "=f")
 	(any_extend:V4SI
 	  (vec_select:V4HI
@@ -4065,7 +3023,7 @@
   [(set_attr "type" "simd_fcvt")
    (set_attr "mode" "V4SI")])
 
-(define_insn "lsx_vexth_d<u>_w<u>"
+(define_insn "vec_unpack<su>_hi_v4si"
   [(set (match_operand:V2DI 0 "register_operand" "=f")
 	(any_extend:V2DI
 	  (vec_select:V2SI
diff --git a/gcc/config/loongarch/lsxintrin.h b/gcc/config/loongarch/lsxintrin.h
index 9ab8269..83b4c5b 100644
--- a/gcc/config/loongarch/lsxintrin.h
+++ b/gcc/config/loongarch/lsxintrin.h
@@ -1,6 +1,6 @@
 /* LARCH Loongson SX intrinsics include file.
 
-   Copyright (C) 2018-2024 Free Software Foundation, Inc.
+   Copyright (C) 2018-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -4745,11 +4745,11 @@ __m128i __lsx_vssrln_w_d (__m128i _1, __m128i _2)
 }
 
 /* Assembly instruction format:	vd, vj, vk.  */
-/* Data types in instruction templates:  V16QI, V16QI, V16QI.  */
+/* Data types in instruction templates:  UV16QI, UV16QI, UV16QI.  */
 extern __inline __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 __m128i __lsx_vorn_v (__m128i _1, __m128i _2)
 {
-  return (__m128i)__builtin_lsx_vorn_v ((v16i8)_1, (v16i8)_2);
+  return (__m128i)__builtin_lsx_vorn_v ((v16u8)_1, (v16u8)_2);
 }
 
 /* Assembly instruction format:	vd, i13.  */
diff --git a/gcc/config/loongarch/musl.h b/gcc/config/loongarch/musl.h
index fa43bc8..685271a 100644
--- a/gcc/config/loongarch/musl.h
+++ b/gcc/config/loongarch/musl.h
@@ -1,5 +1,5 @@
 /* Definitions for MUSL C library support.
-   Copyright (C) 2024 Free Software Foundation, Inc.
+   Copyright (C) 2024-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/loongarch/predicates.md b/gcc/config/loongarch/predicates.md
index 95c2544c..fd2d7b9 100644
--- a/gcc/config/loongarch/predicates.md
+++ b/gcc/config/loongarch/predicates.md
@@ -1,5 +1,5 @@
 ;; Predicate definitions for LoongArch target.
-;; Copyright (C) 2021-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2021-2025 Free Software Foundation, Inc.
 ;; Contributed by Loongson Ltd.
 ;; Based on MIPS target for GNU compiler.
 ;;
@@ -267,11 +267,11 @@
 
 (define_predicate "const_8_to_15_operand"
   (and (match_code "const_int")
-       (match_test "IN_RANGE (INTVAL (op), 0, 7)")))
+       (match_test "IN_RANGE (INTVAL (op), 8, 15)")))
 
 (define_predicate "const_16_to_31_operand"
   (and (match_code "const_int")
-       (match_test "IN_RANGE (INTVAL (op), 0, 7)")))
+       (match_test "IN_RANGE (INTVAL (op), 16, 31)")))
 
 (define_predicate "qi_mask_operand"
   (and (match_code "const_int")
@@ -405,7 +405,13 @@
        (match_test "low_bitmask_len (mode, \
 				     ~UINTVAL (op) | (~UINTVAL(op) - 1)) \
 		    > 0")
-       (not (match_operand 0 "const_uns_arith_operand"))))
+       (not (match_operand 0 "const_uns_arith_operand"))
+       (not (match_operand 0 "low_bitmask_operand"))))
+
+(define_predicate "and_operand"
+  (ior (match_operand 0 "uns_arith_operand")
+       (match_operand 0 "low_bitmask_operand")
+       (match_operand 0 "ins_zero_bitmask_operand")))
 
 (define_predicate "const_call_insn_operand"
   (match_code "const,symbol_ref,label_ref")
@@ -635,10 +641,10 @@
   return loongarch_const_vector_same_int_p (op, mode, -31, 31);
 })
 
-(define_predicate "const_vector_same_uimm6_operand"
+(define_predicate "const_vector_same_uimm_operand"
   (match_code "const_vector")
 {
-  return loongarch_const_vector_same_int_p (op, mode, 0, 63);
+  return loongarch_const_vector_same_int_p (op, mode);
 })
 
 (define_predicate "par_const_vector_shf_set_operand"
@@ -663,6 +669,33 @@
   (ior (match_operand 0 "register_operand")
        (match_operand 0 "const_vector_same_ximm5_operand")))
 
-(define_predicate "reg_or_vector_same_uimm6_operand"
+(define_predicate "reg_or_vector_same_uimm_operand"
   (ior (match_operand 0 "register_operand")
-       (match_operand 0 "const_vector_same_uimm6_operand")))
+       (match_operand 0 "const_vector_same_uimm_operand")))
+
+;; PARALLEL for a vec_select that selects all the even or all the odd
+;; elements of a vector of MODE.
+(define_special_predicate "vect_par_cnst_even_or_odd_half"
+  (match_code "parallel")
+{
+  int nunits = XVECLEN (op, 0);
+
+  if (!known_eq (GET_MODE_NUNITS (mode), nunits * 2))
+    return false;
+
+  rtx first = XVECEXP (op, 0, 0);
+  if (!CONST_INT_P (first))
+    return false;
+
+  if (INTVAL (first) != 0 && INTVAL (first) != 1)
+    return false;
+
+  for (int i = 1; i < nunits; i++)
+    {
+      rtx elem = XVECEXP (op, 0, i);
+      if (!CONST_INT_P (elem) || INTVAL (elem) != INTVAL (first) + i * 2)
+	return false;
+    }
+
+  return true;
+})
diff --git a/gcc/config/loongarch/simd.md b/gcc/config/loongarch/simd.md
index 45ea114..dd17cd1 100644
--- a/gcc/config/loongarch/simd.md
+++ b/gcc/config/loongarch/simd.md
@@ -1,5 +1,5 @@
 ;; Machine Description for LoongArch SIMD instructions for GNU compiler.
-;; Copyright (C) 2023-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2023-2025 Free Software Foundation, Inc.
 
 ;; This file is part of GCC.
 
@@ -29,12 +29,21 @@
 ;; FP modes supported by LASX
 (define_mode_iterator FLASX   [V4DF V8SF])
 
+;; All modes supported by LSX
+(define_mode_iterator LSX    [ILSX FLSX])
+
+;; ALL modes supported by LASX
+(define_mode_iterator LASX   [ILASX FLASX])
+
 ;; All integer modes available
 (define_mode_iterator IVEC    [(ILSX "ISA_HAS_LSX") (ILASX "ISA_HAS_LASX")])
 
 ;; All FP modes available
 (define_mode_iterator FVEC    [(FLSX "ISA_HAS_LSX") (FLASX "ISA_HAS_LASX")])
 
+;; All vector modes available
+(define_mode_iterator ALLVEC  [(LSX "ISA_HAS_LSX") (LASX "ISA_HAS_LASX")])
+
 ;; Mnemonic prefix, "x" for LASX modes.
 (define_mode_attr x [(V2DI "") (V4SI "") (V8HI "") (V16QI "")
 		     (V2DF "") (V4SF "")
@@ -60,6 +69,33 @@
 			(V8HI "V8SI") (V16HI "V16SI")
 			(V16QI "V16HI") (V32QI "V32HI")])
 
+;; The element type is not changed but the number of elements is halved.
+(define_mode_attr VEC_HALF [(V2DI "V1DI") (V4DI "V2DI")
+			    (V4SI "V2SI") (V8SI "V4SI")
+			    (V8HI "V4HI") (V16HI "V8HI")
+			    (V16QI "V8QI") (V32QI "V16QI")])
+
+;; Modes with doubled length for intermediate values in RTX pattern.
+(define_mode_attr LVEC [(V2DF "V4DF") (V4DF "V8DF")
+			(V4SF "V8SF") (V8SF "V16SF")
+			(V2DI "V4DI") (V4DI "V8DI")
+			(V4SI "V8SI") (V8SI "V16SI")
+			(V8HI "V16HI") (V16HI "V32HI")
+			(V16QI "V32QI") (V32QI "V64QI")])
+
+;; The elements are widen but the total size is unchanged
+;; (i.e. the number of elements is halfed).
+(define_mode_attr WVEC_HALF [(V2DI "V1TI") (V4DI "V2TI")
+			     (V4SI "V2DI") (V8SI "V4DI")
+			     (V8HI "V4SI") (V16HI "V8SI")
+			     (V16QI "V8HI") (V32QI "V16HI")])
+
+;; Lower-case version.
+(define_mode_attr wvec_half [(V2DI "v1ti") (V4DI "v2ti")
+			     (V4SI "v2di") (V8SI "v4di")
+			     (V8HI "v4si") (V16HI "v8si")
+			     (V16QI "v8hi") (V32QI "v16hi")])
+
 ;; Integer vector modes with the same length and unit size as a mode.
 (define_mode_attr VIMODE [(V2DI "V2DI") (V4SI "V4SI")
 			  (V8HI "V8HI") (V16QI "V16QI")
@@ -72,6 +108,14 @@
 (define_mode_attr vimode [(V2DF "v2di") (V4SF "v4si")
 			  (V4DF "v4di") (V8SF "v8si")])
 
+;; Integer vector modes with the same size, in lower-case.
+(define_mode_attr allmode_i [(V2DI "v2di") (V4SI "v4si")
+              (V8HI "v8hi") (V16QI "v16qi")
+              (V2DF "v2di") (V4SF "v4si")
+              (V4DI "v4di") (V8SI "v8si")
+              (V16HI "v16hi") (V32QI "v32qi")
+              (V4DF "v4di") (V8SF "v8si")])
+
 ;; Suffix for LSX or LASX instructions.
 (define_mode_attr simdfmt [(V2DF "d") (V4DF "d")
 			   (V4SF "s") (V8SF "s")
@@ -80,15 +124,33 @@
 			   (V8HI "h") (V16HI "h")
 			   (V16QI "b") (V32QI "b")])
 
+;; Suffix for widening LSX or LASX instructions.
+(define_mode_attr simdfmt_w [(V2DI "q") (V4DI "q")
+			     (V4SI "d") (V8SI "d")
+			     (V8HI "w") (V16HI "w")
+			     (V16QI "h") (V32QI "h")])
+
 ;; Suffix for integer mode in LSX or LASX instructions with FP input but
 ;; integer output.
 (define_mode_attr simdifmt_for_f [(V2DF "l") (V4DF "l")
 				  (V4SF "w") (V8SF "w")])
 
-;; Suffix for integer mode in LSX or LASX instructions to operating FP
+;; Suffix for integer mode in LSX or LASX instructions to operate FP
 ;; vectors using integer vector operations.
 (define_mode_attr simdfmt_as_i [(V2DF "d") (V4DF "d")
-				(V4SF "w") (V8SF "w")])
+				(V4SF "w") (V8SF "w")
+				(V2DI "d") (V4DI "d")
+				(V4SI "w") (V8SI "w")
+				(V8HI "h") (V16HI "h")
+				(V16QI "b") (V32QI "b")])
+
+;; "_f" for FP vectors, "" for integer vectors
+(define_mode_attr _f [(V2DF "_f") (V4DF "_f")
+		      (V4SF "_f") (V8SF "_f")
+		      (V2DI "") (V4DI "")
+		      (V4SI "") (V8SI "")
+		      (V8HI "") (V16HI "")
+		      (V16QI "") (V32QI "")])
 
 ;; Size of vector elements in bits.
 (define_mode_attr elmbits [(V2DI "64") (V4DI "64")
@@ -113,6 +175,57 @@
 ;; instruction here so we can avoid duplicating logics.
 ;; =======================================================================
 
+
+;; Move
+
+;; Some immediate values in V1TI or V2TI may be stored in LSX or LASX
+;; registers, thus we need to allow moving them for reload.
+(define_mode_iterator ALLVEC_TI [ALLVEC
+				 (V1TI "ISA_HAS_LSX")
+				 (V2TI "ISA_HAS_LASX")])
+
+(define_expand "mov<mode>"
+  [(set (match_operand:ALLVEC_TI 0)
+	(match_operand:ALLVEC_TI 1))]
+  ""
+{
+  if (loongarch_legitimize_move (<MODE>mode, operands[0], operands[1]))
+    DONE;
+})
+
+(define_expand "movmisalign<mode>"
+  [(set (match_operand:ALLVEC_TI 0)
+	(match_operand:ALLVEC_TI 1))]
+  ""
+{
+  if (loongarch_legitimize_move (<MODE>mode, operands[0], operands[1]))
+    DONE;
+})
+
+(define_insn_and_split "mov<mode>_simd"
+  [(set (match_operand:ALLVEC_TI 0 "nonimmediate_operand" "=f,f,R,*r,*f,*r")
+	(match_operand:ALLVEC_TI 1 "move_operand" "fYGYI,R,f,*f,*r,*r"))]
+  ""
+{ return loongarch_output_move (operands); }
+  "reload_completed && loongarch_split_move_p (operands[0], operands[1])"
+  [(const_int 0)]
+{
+  loongarch_split_move (operands[0], operands[1]);
+  DONE;
+}
+  [(set_attr "type" "simd_move,simd_load,simd_store,simd_copy,simd_insert,simd_copy")
+   (set_attr "mode" "<MODE>")])
+
+
+;; REG + REG load
+
+(define_mode_iterator QIVEC [(V16QI "ISA_HAS_LSX") (V32QI "ISA_HAS_LASX")])
+(define_expand "<simd_isa>_<x>vldx"
+  [(set (match_operand:QIVEC 0 "register_operand" "=f")
+	(mem:QIVEC (plus:DI (match_operand:DI 1 "register_operand")
+			    (match_operand:DI 2 "register_operand"))))]
+  "TARGET_64BIT")
+
 ;;
 ;; FP vector rounding instructions
 ;;
@@ -476,6 +589,389 @@
   [(set_attr "type" "simd_logic")
    (set_attr "mode" "<MODE>")])
 
+;; vector compare
+(define_expand "vec_cmp<mode><allmode_i>"
+  [(set (match_operand:<VIMODE> 0 "register_operand")
+    (match_operator 1 ""
+      [(match_operand:ALLVEC 2 "register_operand")
+       (match_operand:ALLVEC 3 "nonmemory_operand")]))]
+  ""
+{
+  loongarch_expand_vec_cmp (operands);
+  DONE;
+})
+
+(define_expand "vec_cmpu<mode><allmode_i>"
+  [(set (match_operand:<VIMODE> 0 "register_operand")
+    (match_operator 1 ""
+      [(match_operand:IVEC 2 "register_operand")
+       (match_operand:IVEC 3 "nonmemory_operand")]))]
+  ""
+{
+  loongarch_expand_vec_cmp (operands);
+  DONE;
+})
+
+;; cbranch
+(define_expand "cbranch<mode>4"
+ [(set (pc)
+       (if_then_else
+         (match_operator 0 "equality_operator"
+           [(match_operand:IVEC 1 "register_operand")
+            (match_operand:IVEC 2 "reg_or_vector_same_val_operand")])
+         (label_ref (match_operand 3 ""))
+         (pc)))]
+ ""
+{
+  RTX_CODE code = GET_CODE (operands[0]);
+  rtx tmp = operands[1];
+  rtx const0 = CONST0_RTX (SImode);
+
+  /* If comparing against a non-zero vector we have to do a comparison first
+    so we can have a != 0 comparison with the result.  */
+  if (operands[2] != CONST0_RTX (<MODE>mode))
+    {
+      tmp = gen_reg_rtx (<MODE>mode);
+      emit_insn (gen_xor<mode>3 (tmp, operands[1], operands[2]));
+    }
+
+  if (code == NE)
+    emit_jump_insn (gen_<simd_isa>_<x>bnz_v_b (operands[3], tmp, const0));
+  else
+    emit_jump_insn (gen_<simd_isa>_<x>bz_v_b (operands[3], tmp, const0));
+  DONE;
+})
+
+;; Operations on elements at even/odd indices.
+(define_int_iterator zero_one [0 1])
+(define_int_attr ev_od [(0 "ev") (1 "od")])
+(define_int_attr even_odd [(0 "even") (1 "odd")])
+
+;; Integer widening add/sub/mult.
+(define_insn "simd_<optab>w_evod_<mode>_<su>"
+  [(set (match_operand:<WVEC_HALF> 0 "register_operand" "=f")
+	(addsubmul:<WVEC_HALF>
+	  (any_extend:<WVEC_HALF>
+	    (vec_select:<VEC_HALF>
+	      (match_operand:IVEC 1 "register_operand" "f")
+	      (match_operand:IVEC 3 "vect_par_cnst_even_or_odd_half")))
+	  (any_extend:<WVEC_HALF>
+	    (vec_select:<VEC_HALF>
+	      (match_operand:IVEC 2 "register_operand" "f")
+	      (match_dup 3)))))]
+  ""
+  "<x>v<optab>w%O3.<simdfmt_w>.<simdfmt><u>\t%<wu>0,%<wu>1,%<wu>2"
+  [(set_attr "type" "simd_int_arith")
+   (set_attr "mode" "<WVEC_HALF>")])
+
+(define_expand "<simd_isa>_<x>v<optab>w<ev_od>_<simdfmt_w>_<simdfmt><u>"
+  [(match_operand:<WVEC_HALF> 0 "register_operand" "=f")
+   (match_operand:IVEC	      1 "register_operand" " f")
+   (match_operand:IVEC	      2 "register_operand" " f")
+   (any_extend (const_int 0))
+   (addsubmul (const_int 0) (const_int 0))
+   (const_int zero_one)]
+  ""
+{
+  int nelts = GET_MODE_NUNITS (<WVEC_HALF>mode);
+  rtx op3 = loongarch_gen_stepped_int_parallel (nelts, <zero_one>, 2);
+  rtx insn = gen_simd_<optab>w_evod_<mode>_<su> (operands[0], operands[1],
+						 operands[2], op3);
+  emit_insn (insn);
+  DONE;
+})
+
+(define_expand "vec_widen_<su>mult_<even_odd>_<mode>"
+  [(match_operand:<WVEC_HALF> 0 "register_operand" "=f")
+   (match_operand:IVEC	      1 "register_operand" " f")
+   (match_operand:IVEC	      2 "register_operand" " f")
+   (any_extend (const_int 0))
+   (const_int zero_one)]
+  ""
+{
+  emit_insn (
+    gen_<simd_isa>_<x>vmulw<ev_od>_<simdfmt_w>_<simdfmt><u> (operands[0],
+							     operands[1],
+							     operands[2]));
+  DONE;
+})
+
+(define_insn "simd_<optab>w_evod_<mode>_hetero"
+  [(set (match_operand:<WVEC_HALF> 0 "register_operand" "=f")
+	(addsubmul:<WVEC_HALF>
+	  (zero_extend:<WVEC_HALF>
+	    (vec_select:<VEC_HALF>
+	      (match_operand:IVEC 1 "register_operand" "f")
+	      (match_operand:IVEC 3 "vect_par_cnst_even_or_odd_half")))
+	  (sign_extend:<WVEC>
+	    (vec_select:<VEC_HALF>
+	      (match_operand:IVEC 2 "register_operand" "f")
+	      (match_dup 3)))))]
+  ""
+  "<x>v<optab>w%O3.<simdfmt_w>.<simdfmt>u.<simdfmt>\t%<wu>0,%<wu>1,%<wu>2"
+  [(set_attr "type" "simd_int_arith")
+   (set_attr "mode" "<WVEC_HALF>")])
+
+(define_expand "<simd_isa>_<x>v<optab>w<ev_od>_<simdfmt_w>_<simdfmt>u_<simdfmt>"
+  [(match_operand:<WVEC_HALF> 0 "register_operand" "=f")
+   (match_operand:IVEC	      1 "register_operand" " f")
+   (match_operand:IVEC	      2 "register_operand" " f")
+   (addmul (const_int 0) (const_int 0))
+   (const_int zero_one)]
+  ""
+{
+  int nelts = GET_MODE_NUNITS (<WVEC_HALF>mode);
+  rtx op3 = loongarch_gen_stepped_int_parallel (nelts, <zero_one>, 2);
+  rtx insn = gen_simd_<optab>w_evod_<mode>_hetero (operands[0], operands[1],
+						   operands[2], op3);
+  emit_insn (insn);
+  DONE;
+})
+
+(define_insn "simd_h<optab>w_<mode>_<su>"
+  [(set (match_operand:<WVEC_HALF> 0 "register_operand" "=f")
+	(addsub:<WVEC_HALF>
+	  (any_extend:<WVEC_HALF>
+	    (vec_select:<VEC_HALF>
+	      (match_operand:IVEC 1 "register_operand" "f")
+	      (match_operand:IVEC 3 "vect_par_cnst_even_or_odd_half")))
+	  (any_extend:<WVEC_HALF>
+	    (vec_select:<VEC_HALF>
+	      (match_operand:IVEC 2 "register_operand" "f")
+	      (match_operand:IVEC 4 "vect_par_cnst_even_or_odd_half")))))]
+  "!rtx_equal_p (operands[3], operands[4])"
+{
+  if (!INTVAL (XVECEXP (operands[3], 0, 0)))
+    std::swap (operands[1], operands[2]);
+  return "<x>vh<optab>w.<simdfmt_w><u>.<simdfmt><u>\t%<wu>0,%<wu>1,%<wu>2";
+}
+  [(set_attr "type" "simd_int_arith")
+   (set_attr "mode" "<WVEC_HALF>")])
+
+(define_expand "<simd_isa>_<x>vh<optab>w_<simdfmt_w><u>_<simdfmt><u>"
+  [(match_operand:<WVEC_HALF> 0 "register_operand" "=f")
+   (match_operand:IVEC	      1 "register_operand" " f")
+   (match_operand:IVEC	      2 "register_operand" " f")
+   (any_extend (const_int 0))
+   (addsub (const_int 0) (const_int 0))]
+  ""
+{
+  int nelts = GET_MODE_NUNITS (<WVEC_HALF>mode);
+  rtx op3 = loongarch_gen_stepped_int_parallel (nelts, 1, 2);
+  rtx op4 = loongarch_gen_stepped_int_parallel (nelts, 0, 2);
+  rtx insn = gen_simd_h<optab>w_<mode>_<su> (operands[0], operands[1],
+					     operands[2], op3, op4);
+  emit_insn (insn);
+  DONE;
+})
+
+(define_insn "simd_maddw_evod_<mode>_<su>"
+  [(set (match_operand:<WVEC_HALF> 0 "register_operand" "=f")
+	(plus:<WVEC_HALF>
+	  (mult:<WVEC_HALF>
+	    (any_extend:<WVEC_HALF>
+	      (vec_select:<VEC_HALF>
+		(match_operand:IVEC 2 "register_operand" "f")
+		(match_operand:IVEC 4 "vect_par_cnst_even_or_odd_half")))
+	    (any_extend:<WVEC>
+	      (vec_select:<VEC_HALF>
+		(match_operand:IVEC 3 "register_operand" "f")
+		(match_dup 4))))
+	  (match_operand:<WVEC_HALF> 1 "register_operand" "0")))]
+  ""
+  "<x>vmaddw%O4.<simdfmt_w>.<simdfmt><u>\t%<wu>0,%<wu>2,%<wu>3"
+  [(set_attr "type" "simd_int_arith")
+   (set_attr "mode" "<WVEC_HALF>")])
+
+(define_expand "<simd_isa>_<x>vmaddw<ev_od>_<simdfmt_w>_<simdfmt><u>"
+  [(match_operand:<WVEC_HALF> 0 "register_operand" "=f")
+   (match_operand:<WVEC_HALF> 1 "register_operand" " 0")
+   (match_operand:IVEC	      2 "register_operand" " f")
+   (match_operand:IVEC	      3 "register_operand" " f")
+   (any_extend (const_int 0))
+   (const_int zero_one)]
+  ""
+{
+  int nelts = GET_MODE_NUNITS (<WVEC_HALF>mode);
+  rtx op4 = loongarch_gen_stepped_int_parallel (nelts, <zero_one>, 2);
+  rtx insn = gen_simd_maddw_evod_<mode>_<su> (operands[0], operands[1],
+					      operands[2], operands[3],
+					      op4);
+  emit_insn (insn);
+  DONE;
+})
+
+(define_expand "<su>dot_prod<wvec_half><mode>"
+  [(match_operand:<WVEC_HALF> 0 "register_operand" "=f,f")
+   (match_operand:IVEC	      1 "register_operand" " f,f")
+   (match_operand:IVEC	      2 "register_operand" " f,f")
+   (match_operand:<WVEC_HALF> 3 "reg_or_0_operand" " 0,YG")
+   (any_extend (const_int 0))]
+  ""
+{
+  rtx *op = operands;
+
+  if (op[3] == CONST0_RTX (<WVEC_HALF>mode))
+    emit_insn (
+      gen_<simd_isa>_<x>vmulwev_<simdfmt_w>_<simdfmt><u> (op[0], op[1],
+							  op[2]));
+  else
+    emit_insn (
+      gen_<simd_isa>_<x>vmaddwev_<simdfmt_w>_<simdfmt><u> (op[0], op[3],
+							   op[1], op[2]));
+
+  emit_insn (
+    gen_<simd_isa>_<x>vmaddwod_<simdfmt_w>_<simdfmt><u> (op[0], op[0],
+							 op[1], op[2]));
+  DONE;
+})
+
+(define_insn "simd_maddw_evod_<mode>_hetero"
+  [(set (match_operand:<WVEC_HALF> 0 "register_operand" "=f")
+	(plus:<WVEC_HALF>
+	  (mult:<WVEC_HALF>
+	    (zero_extend:<WVEC_HALF>
+	      (vec_select:<VEC_HALF>
+		(match_operand:IVEC 2 "register_operand" "f")
+		(match_operand:IVEC 4 "vect_par_cnst_even_or_odd_half")))
+	    (sign_extend:<WVEC_HALF>
+	      (vec_select:<VEC_HALF>
+		(match_operand:IVEC 3 "register_operand" "f")
+		(match_dup 4))))
+	  (match_operand:<WVEC_HALF> 1 "register_operand" "0")))]
+  ""
+  "<x>vmaddw%O4.<simdfmt_w>.<simdfmt>u.<simdfmt>\t%<wu>0,%<wu>2,%<wu>3"
+  [(set_attr "type" "simd_int_arith")
+   (set_attr "mode" "<WVEC_HALF>")])
+
+(define_expand "<simd_isa>_<x>vmaddw<ev_od>_<simdfmt_w>_<simdfmt>u_<simdfmt>"
+  [(match_operand:<WVEC_HALF> 0 "register_operand" "=f")
+   (match_operand:<WVEC_HALF> 1 "register_operand" " 0")
+   (match_operand:IVEC	      2 "register_operand" " f")
+   (match_operand:IVEC	      3 "register_operand" " f")
+   (const_int zero_one)]
+  ""
+{
+  int nelts = GET_MODE_NUNITS (<WVEC_HALF>mode);
+  rtx op4 = loongarch_gen_stepped_int_parallel (nelts, <zero_one>, 2);
+  rtx insn = gen_simd_maddw_evod_<mode>_hetero (operands[0], operands[1],
+						operands[2], operands[3],
+						op4);
+  emit_insn (insn);
+  DONE;
+})
+
+; For "historical" reason we need a punned version of q_d variants.
+(define_mode_iterator DIVEC [(V2DI "ISA_HAS_LSX") (V4DI "ISA_HAS_LASX")])
+
+(define_expand "<simd_isa>_<optab>w<ev_od>_q_d<u>_punned"
+  [(match_operand:DIVEC 0 "register_operand" "=f")
+   (match_operand:DIVEC 1 "register_operand" " f")
+   (match_operand:DIVEC 2 "register_operand" " f")
+   (any_extend (const_int 0))
+   (addsubmul (const_int 0) (const_int 0))
+   (const_int zero_one)]
+  ""
+{
+  rtx t = gen_reg_rtx (<WVEC_HALF>mode);
+  emit_insn (gen_<simd_isa>_<x>v<optab>w<ev_od>_q_d<u> (t, operands[1],
+							operands[2]));
+  emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t));
+  DONE;
+})
+
+(define_expand "<simd_isa>_<optab>w<ev_od>_q_du_d_punned"
+  [(match_operand:DIVEC 0 "register_operand" "=f")
+   (match_operand:DIVEC 1 "register_operand" " f")
+   (match_operand:DIVEC 2 "register_operand" " f")
+   (addmul (const_int 0) (const_int 0))
+   (const_int zero_one)]
+  ""
+{
+  rtx t = gen_reg_rtx (<WVEC_HALF>mode);
+  emit_insn (gen_<simd_isa>_<x>v<optab>w<ev_od>_q_du_d (t, operands[1],
+							operands[2]));
+  emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t));
+  DONE;
+})
+
+(define_expand "<simd_isa>_h<optab>w_q<u>_d<u>_punned"
+  [(match_operand:DIVEC 0 "register_operand" "=f")
+   (match_operand:DIVEC 1 "register_operand" "f")
+   (match_operand:DIVEC 2 "register_operand" "f")
+   (any_extend (const_int 0))
+   (addsub (const_int 0) (const_int 0))]
+  ""
+{
+  rtx t = gen_reg_rtx (<WVEC_HALF>mode);
+  emit_insn (gen_<simd_isa>_<x>vh<optab>w_q<u>_d<u> (t, operands[1],
+						     operands[2]));
+  emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t));
+  DONE;
+})
+
+(define_expand "<simd_isa>_maddw<ev_od>_q_d<u>_punned"
+  [(match_operand:DIVEC 0 "register_operand" "=f")
+   (match_operand:DIVEC 1 "register_operand" " 0")
+   (match_operand:DIVEC 2 "register_operand" " f")
+   (match_operand:DIVEC 3 "register_operand" " f")
+   (const_int zero_one)
+   (any_extend (const_int 0))]
+  ""
+{
+  rtx t = gen_reg_rtx (<WVEC_HALF>mode);
+  rtx op1 = gen_lowpart (<WVEC_HALF>mode, operands[1]);
+  emit_insn (gen_<simd_isa>_<x>vmaddw<ev_od>_q_d<u> (t, op1, operands[2],
+						     operands[3]));
+  emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t));
+  DONE;
+})
+
+(define_expand "<simd_isa>_maddw<ev_od>_q_du_d_punned"
+  [(match_operand:DIVEC 0 "register_operand" "=f")
+   (match_operand:DIVEC 1 "register_operand" " 0")
+   (match_operand:DIVEC 2 "register_operand" " f")
+   (match_operand:DIVEC 3 "register_operand" " f")
+   (const_int zero_one)]
+  ""
+{
+  rtx t = gen_reg_rtx (<WVEC_HALF>mode);
+  rtx op1 = gen_lowpart (<WVEC_HALF>mode, operands[1]);
+  emit_insn (gen_<simd_isa>_<x>vmaddw<ev_od>_q_du_d (t, op1, operands[2],
+						     operands[3]));
+  emit_move_insn (operands[0], gen_lowpart (<MODE>mode, t));
+  DONE;
+})
+
+;; Integer shift right with rounding.
+(define_insn "simd_<optab>_imm_round_<mode>"
+  [(set (match_operand:IVEC 0 "register_operand" "=f")
+	(any_shiftrt:IVEC
+	  (plus:IVEC
+	    (match_operand:IVEC 1 "register_operand" "f")
+	    (match_operand:IVEC 2 "const_vector_same_val_operand" "Uuvx"))
+	  (match_operand:SI 3 "const_<bitimm>_operand" "I")))]
+  "(HOST_WIDE_INT_1U << UINTVAL (operands[3]) >> 1)
+   == UINTVAL (CONST_VECTOR_ELT (operands[2], 0))"
+  "<x>v<insn>ri.<simdfmt>\t%<wu>0,%<wu>1,%d3"
+  [(set_attr "type" "simd_shift")
+   (set_attr "mode" "<MODE>")])
+
+(define_expand "<simd_isa>_<x>v<insn>ri_<simdfmt>"
+  [(match_operand:IVEC 0 "register_operand" "=f")
+   (match_operand:IVEC 1 "register_operand" " f")
+   (match_operand 2 "const_<bitimm>_operand")
+   (any_shiftrt (const_int 0) (const_int 0))]
+  ""
+{
+  auto addend = HOST_WIDE_INT_1U << UINTVAL (operands[2]) >> 1;
+  rtx addend_v = loongarch_gen_const_int_vector (<MODE>mode, addend);
+
+  emit_insn (gen_simd_<optab>_imm_round_<mode> (operands[0], operands[1],
+						addend_v, operands[2]));
+  DONE;
+})
+
 ; The LoongArch SX Instructions.
 (include "lsx.md")
 
diff --git a/gcc/config/loongarch/sync.md b/gcc/config/loongarch/sync.md
index d41c2d2..fd8d732 100644
--- a/gcc/config/loongarch/sync.md
+++ b/gcc/config/loongarch/sync.md
@@ -1,5 +1,5 @@
 ;; Machine description for LoongArch atomic operations.
-;; Copyright (C) 2021-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2021-2025 Free Software Foundation, Inc.
 ;; Contributed by Loongson Ltd.
 ;; Based on MIPS and RISC-V target for GNU compiler.
 
diff --git a/gcc/config/loongarch/t-linux b/gcc/config/loongarch/t-linux
index 23e1fb9..4586204 100644
--- a/gcc/config/loongarch/t-linux
+++ b/gcc/config/loongarch/t-linux
@@ -1,4 +1,4 @@
-# Copyright (C) 2021-2024 Free Software Foundation, Inc.
+# Copyright (C) 2021-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/loongarch/t-loongarch b/gcc/config/loongarch/t-loongarch
index 6ce36a8..b7dbb4b 100644
--- a/gcc/config/loongarch/t-loongarch
+++ b/gcc/config/loongarch/t-loongarch
@@ -1,4 +1,4 @@
-# Copyright (C) 2021-2024 Free Software Foundation, Inc.
+# Copyright (C) 2021-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
@@ -47,6 +47,12 @@ loongarch-c.o: $(srcdir)/config/loongarch/loongarch-c.cc $(CONFIG_H) $(SYSTEM_H)
 	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
 	$(srcdir)/config/loongarch/loongarch-c.cc
 
+loongarch-target-attr.o: $(srcdir)/config/loongarch/loongarch-target-attr.cc \
+	$(CONFIG_H) $(SYSTEM_H) coretypes.h $(TARGET_H) $(TREE_H) $(TM_H) \
+	$(DIAGNOSTIC_CORE_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+	$(srcdir)/config/loongarch/loongarch-target-attr.cc
+
 loongarch-builtins.o: $(srcdir)/config/loongarch/loongarch-builtins.cc $(CONFIG_H) \
 	$(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) $(TREE_H) $(RECOG_H) langhooks.h \
 	$(DIAGNOSTIC_CORE_H) $(OPTABS_H) $(srcdir)/config/loongarch/loongarch-ftypes.def \
diff --git a/gcc/config/loongarch/t-multilib b/gcc/config/loongarch/t-multilib
index 90d7629..1cc1103 100644
--- a/gcc/config/loongarch/t-multilib
+++ b/gcc/config/loongarch/t-multilib
@@ -1,4 +1,4 @@
-# Copyright (C) 2023-2024 Free Software Foundation, Inc.
+# Copyright (C) 2023-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/lynx.h b/gcc/config/lynx.h
index 86e0975..dbabeb6 100644
--- a/gcc/config/lynx.h
+++ b/gcc/config/lynx.h
@@ -1,5 +1,5 @@
 /* Target independent definitions for LynxOS.
-   Copyright (C) 1993-2024 Free Software Foundation, Inc.
+   Copyright (C) 1993-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/lynx.opt b/gcc/config/lynx.opt
index f6658e6..f942168 100644
--- a/gcc/config/lynx.opt
+++ b/gcc/config/lynx.opt
@@ -1,6 +1,6 @@
 ; Processor-independent options for LynxOS.
 
-; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/lynx.opt.urls b/gcc/config/lynx.opt.urls
index b547138..2b5f44e 100644
--- a/gcc/config/lynx.opt.urls
+++ b/gcc/config/lynx.opt.urls
@@ -1,5 +1,5 @@
 ; Autogenerated by regenerate-opt-urls.py from gcc/config/lynx.opt and generated HTML
 
 mthreads
-UrlSuffix(gcc/Cygwin-and-MinGW-Options.html#index-mthreads-1)
+UrlSuffix(gcc/Cygwin-and-MinGW-Options.html#index-mthreads)
 
diff --git a/gcc/config/m32c/addsub.md b/gcc/config/m32c/addsub.md
index 2a9d0e7..ba0cebe 100644
--- a/gcc/config/m32c/addsub.md
+++ b/gcc/config/m32c/addsub.md
@@ -1,5 +1,5 @@
 ;; Machine Descriptions for R8C/M16C/M32C
-;; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;; Contributed by Red Hat.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/m32c/bitops.md b/gcc/config/m32c/bitops.md
index a9e7d37..671bd28 100644
--- a/gcc/config/m32c/bitops.md
+++ b/gcc/config/m32c/bitops.md
@@ -1,5 +1,5 @@
 ;; Machine Descriptions for R8C/M16C/M32C
-;; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;; Contributed by Red Hat.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/m32c/blkmov.md b/gcc/config/m32c/blkmov.md
index 93ce6d2..2dd33f8 100644
--- a/gcc/config/m32c/blkmov.md
+++ b/gcc/config/m32c/blkmov.md
@@ -1,5 +1,5 @@
 ;; Machine Descriptions for R8C/M16C/M32C
-;; Copyright (C) 2006-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2006-2025 Free Software Foundation, Inc.
 ;; Contributed by Red Hat.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/m32c/cond.md b/gcc/config/m32c/cond.md
index 8045bde..69dc65a 100644
--- a/gcc/config/m32c/cond.md
+++ b/gcc/config/m32c/cond.md
@@ -1,5 +1,5 @@
 ;; Machine Descriptions for R8C/M16C/M32C
-;; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;; Contributed by Red Hat.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/m32c/constraints.md b/gcc/config/m32c/constraints.md
index 0a5a217..497fce3 100644
--- a/gcc/config/m32c/constraints.md
+++ b/gcc/config/m32c/constraints.md
@@ -1,5 +1,5 @@
 ;; m32c constraints
-;; Copyright (C) 2012-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2012-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/m32c/jump.md b/gcc/config/m32c/jump.md
index 866423b..8cbf19d 100644
--- a/gcc/config/m32c/jump.md
+++ b/gcc/config/m32c/jump.md
@@ -1,5 +1,5 @@
 ;; Machine Descriptions for R8C/M16C/M32C
-;; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;; Contributed by Red Hat.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/m32c/m32c-modes.def b/gcc/config/m32c/m32c-modes.def
index 6a702f9..f17cea5 100644
--- a/gcc/config/m32c/m32c-modes.def
+++ b/gcc/config/m32c/m32c-modes.def
@@ -1,5 +1,5 @@
 /* Target-Specific Modes for R8C/M16C/M32C
-   Copyright (C) 2005-2024 Free Software Foundation, Inc.
+   Copyright (C) 2005-2025 Free Software Foundation, Inc.
    Contributed by Red Hat.
 
    This file is part of GCC.
diff --git a/gcc/config/m32c/m32c-pragma.cc b/gcc/config/m32c/m32c-pragma.cc
index 67feaa6..72ed34c 100644
--- a/gcc/config/m32c/m32c-pragma.cc
+++ b/gcc/config/m32c/m32c-pragma.cc
@@ -1,5 +1,5 @@
 /* M32C Pragma support
-   Copyright (C) 2004-2024 Free Software Foundation, Inc.
+   Copyright (C) 2004-2025 Free Software Foundation, Inc.
    Contributed by Red Hat, Inc.
 
    This file is part of GCC.
diff --git a/gcc/config/m32c/m32c-protos.h b/gcc/config/m32c/m32c-protos.h
index b610508..d382a09 100644
--- a/gcc/config/m32c/m32c-protos.h
+++ b/gcc/config/m32c/m32c-protos.h
@@ -1,5 +1,5 @@
 /* Target Prototypes for R8C/M16C/M32C
-   Copyright (C) 2005-2024 Free Software Foundation, Inc.
+   Copyright (C) 2005-2025 Free Software Foundation, Inc.
    Contributed by Red Hat.
 
    This file is part of GCC.
diff --git a/gcc/config/m32c/m32c.abi b/gcc/config/m32c/m32c.abi
index e5e3043..878a4d9 100644
--- a/gcc/config/m32c/m32c.abi
+++ b/gcc/config/m32c/m32c.abi
@@ -1,5 +1,5 @@
    Target Definitions for R8C/M16C/M32C
-   Copyright (C) 2005-2024 Free Software Foundation, Inc.
+   Copyright (C) 2005-2025 Free Software Foundation, Inc.
    Contributed by Red Hat.
 
    This file is part of GCC.
diff --git a/gcc/config/m32c/m32c.cc b/gcc/config/m32c/m32c.cc
index 38abf17..c853c96 100644
--- a/gcc/config/m32c/m32c.cc
+++ b/gcc/config/m32c/m32c.cc
@@ -1,5 +1,5 @@
 /* Target Code for R8C/M16C/M32C
-   Copyright (C) 2005-2024 Free Software Foundation, Inc.
+   Copyright (C) 2005-2025 Free Software Foundation, Inc.
    Contributed by Red Hat.
 
    This file is part of GCC.
@@ -873,7 +873,7 @@ m32c_matches_constraint_p (rtx value, int constraint)
 		    && A0_OR_PSEUDO (patternr[5])
 		    && GET_MODE (patternr[5]) == HImode)
 		|| RTX_IS ("ms")));
-  case CONSTRAINT_Sd:    
+  case CONSTRAINT_Sd:
     {
       /* This is the common "src/dest" address */
       rtx r;
@@ -2790,7 +2790,7 @@ m32c_print_operand (FILE * file, rtx x, int code)
 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P m32c_print_operand_punct_valid_p
 
-static bool 
+static bool
 m32c_print_operand_punct_valid_p (unsigned char c)
 {
   if (c == '&' || c == '!')
@@ -3036,7 +3036,7 @@ m32c_insert_attributes (tree node ATTRIBUTE_UNUSED,
 	{
 	  TREE_THIS_VOLATILE (node) = true;
 	}
-    }	
+    }
 }
 
 /* Hash table of pragma info.  */
@@ -3227,7 +3227,7 @@ m32c_immd_dbl_mov (rtx * operands ATTRIBUTE_UNUSED,
   /* ??? This relied on the now-defunct MEM_SCALAR and MEM_IN_STRUCT_P
      flags.  */
   return false;
-}  
+}
 
 /* Expanders */
 
@@ -4099,7 +4099,7 @@ m32c_emit_prologue (void)
 
   if (flag_stack_usage_info)
     current_function_static_stack_size = frame_size;
-  
+
   if (frame_size > 254)
     {
       extra_frame_size = frame_size - 254;
@@ -4501,6 +4501,9 @@ m32c_output_compare (rtx_insn *insn, rtx *operands)
 #undef TARGET_CAN_CHANGE_MODE_CLASS
 #define TARGET_CAN_CHANGE_MODE_CLASS m32c_can_change_mode_class
 
+#undef TARGET_DOCUMENTATION_NAME
+#define TARGET_DOCUMENTATION_NAME "M32C"
+
 /* The Global `targetm' Variable. */
 
 struct gcc_target targetm = TARGET_INITIALIZER;
diff --git a/gcc/config/m32c/m32c.h b/gcc/config/m32c/m32c.h
index a956c50..c7221db 100644
--- a/gcc/config/m32c/m32c.h
+++ b/gcc/config/m32c/m32c.h
@@ -1,5 +1,5 @@
 /* Target Definitions for R8C/M16C/M32C
-   Copyright (C) 2005-2024 Free Software Foundation, Inc.
+   Copyright (C) 2005-2025 Free Software Foundation, Inc.
    Contributed by Red Hat.
 
    This file is part of GCC.
diff --git a/gcc/config/m32c/m32c.md b/gcc/config/m32c/m32c.md
index c4d8efa..643792c 100644
--- a/gcc/config/m32c/m32c.md
+++ b/gcc/config/m32c/m32c.md
@@ -1,5 +1,5 @@
 ;; Machine Descriptions for R8C/M16C/M32C
-;; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;; Contributed by Red Hat.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/m32c/m32c.opt b/gcc/config/m32c/m32c.opt
index 47d75a7..41634a0 100644
--- a/gcc/config/m32c/m32c.opt
+++ b/gcc/config/m32c/m32c.opt
@@ -1,5 +1,5 @@
 ; Target Options for R8C/M16C/M32C
-; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ; Contributed by Red Hat.
 ;
 ; This file is part of GCC.
diff --git a/gcc/config/m32c/minmax.md b/gcc/config/m32c/minmax.md
index c3846f7..6c74e96 100644
--- a/gcc/config/m32c/minmax.md
+++ b/gcc/config/m32c/minmax.md
@@ -1,5 +1,5 @@
 ;; Machine Descriptions for R8C/M16C/M32C
-;; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;; Contributed by Red Hat.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/m32c/mov.md b/gcc/config/m32c/mov.md
index 359012b..a941957 100644
--- a/gcc/config/m32c/mov.md
+++ b/gcc/config/m32c/mov.md
@@ -1,5 +1,5 @@
 ;; Machine Descriptions for R8C/M16C/M32C
-;; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;; Contributed by Red Hat.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/m32c/muldiv.md b/gcc/config/m32c/muldiv.md
index 039b665..0b13e30 100644
--- a/gcc/config/m32c/muldiv.md
+++ b/gcc/config/m32c/muldiv.md
@@ -1,5 +1,5 @@
 ;; Machine Descriptions for R8C/M16C/M32C
-;; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;; Contributed by Red Hat.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/m32c/predicates.md b/gcc/config/m32c/predicates.md
index 861a7a4..64dc873 100644
--- a/gcc/config/m32c/predicates.md
+++ b/gcc/config/m32c/predicates.md
@@ -1,5 +1,5 @@
 ;; Machine Descriptions for R8C/M16C/M32C
-;; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;; Contributed by Red Hat.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/m32c/prologue.md b/gcc/config/m32c/prologue.md
index 33b7289..cf27a77 100644
--- a/gcc/config/m32c/prologue.md
+++ b/gcc/config/m32c/prologue.md
@@ -1,5 +1,5 @@
 ;; Machine Descriptions for R8C/M16C/M32C
-;; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;; Contributed by Red Hat.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/m32c/shift.md b/gcc/config/m32c/shift.md
index 7d3fdf8..29c2879 100644
--- a/gcc/config/m32c/shift.md
+++ b/gcc/config/m32c/shift.md
@@ -1,5 +1,5 @@
 ;; Machine Descriptions for R8C/M16C/M32C
-;; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;; Contributed by Red Hat.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/m32c/t-m32c b/gcc/config/m32c/t-m32c
index 1f9a032..32a3546 100644
--- a/gcc/config/m32c/t-m32c
+++ b/gcc/config/m32c/t-m32c
@@ -1,5 +1,5 @@
 # Target Makefile Fragment for R8C/M16C/M32C
-# Copyright (C) 2005-2024 Free Software Foundation, Inc.
+# Copyright (C) 2005-2025 Free Software Foundation, Inc.
 # Contributed by Red Hat.
 #
 # This file is part of GCC.
diff --git a/gcc/config/m32r/constraints.md b/gcc/config/m32r/constraints.md
index 1c73eebd..59f2274 100644
--- a/gcc/config/m32r/constraints.md
+++ b/gcc/config/m32r/constraints.md
@@ -1,5 +1,5 @@
 ;; Constraint definitions for Renesas M32R cpu for GNU C compiler
-;; Copyright (C) 2007-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2007-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/m32r/little.h b/gcc/config/m32r/little.h
index d6983f1..ad9aef2 100644
--- a/gcc/config/m32r/little.h
+++ b/gcc/config/m32r/little.h
@@ -1,5 +1,5 @@
 /* Definitions for Renesas little endian M32R cpu.
-   Copyright (C) 2003-2024 Free Software Foundation, Inc.
+   Copyright (C) 2003-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/m32r/m32r-opts.h b/gcc/config/m32r/m32r-opts.h
index 52384c4..40f301d 100644
--- a/gcc/config/m32r/m32r-opts.h
+++ b/gcc/config/m32r/m32r-opts.h
@@ -1,5 +1,5 @@
 /* Definitions for option handling for Renesas M32R cpu.
-   Copyright (C) 1996-2024 Free Software Foundation, Inc.
+   Copyright (C) 1996-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/m32r/m32r-protos.h b/gcc/config/m32r/m32r-protos.h
index 342b1f8..db6a3aa9 100644
--- a/gcc/config/m32r/m32r-protos.h
+++ b/gcc/config/m32r/m32r-protos.h
@@ -1,5 +1,5 @@
 /* Prototypes for m32r.cc functions used in the md file & elsewhere.
-   Copyright (C) 1999-2024 Free Software Foundation, Inc.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/m32r/m32r.cc b/gcc/config/m32r/m32r.cc
index c45a7d6..75db280 100644
--- a/gcc/config/m32r/m32r.cc
+++ b/gcc/config/m32r/m32r.cc
@@ -1,5 +1,5 @@
 /* Subroutines used for code generation on the Renesas M32R cpu.
-   Copyright (C) 1996-2024 Free Software Foundation, Inc.
+   Copyright (C) 1996-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -227,6 +227,9 @@ TARGET_GNU_ATTRIBUTES (m32r_attribute_table,
 #undef  TARGET_HAVE_SPECULATION_SAFE_VALUE
 #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
 
+#undef TARGET_DOCUMENTATION_NAME
+#define TARGET_DOCUMENTATION_NAME "M32R/D"
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 /* Called by m32r_option_override to initialize various things.  */
@@ -307,7 +310,7 @@ init_reg_tables (void)
   for (i = 0; i < NUM_MACHINE_MODES; i++)
     {
       machine_mode m = (machine_mode) i;
-      
+
       switch (GET_MODE_CLASS (m))
 	{
 	case MODE_INT:
@@ -1147,8 +1150,7 @@ gen_split_move_double (rtx operands[])
   else
     gcc_unreachable ();
 
-  val = get_insns ();
-  end_sequence ();
+  val = end_sequence ();
   return val;
 }
 
diff --git a/gcc/config/m32r/m32r.h b/gcc/config/m32r/m32r.h
index 7be8dfd..3941c3f 100644
--- a/gcc/config/m32r/m32r.h
+++ b/gcc/config/m32r/m32r.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler, Renesas M32R cpu.
-   Copyright (C) 1996-2024 Free Software Foundation, Inc.
+   Copyright (C) 1996-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -222,7 +222,7 @@
 #define UNITS_PER_WORD 4
 
 /* Define this macro if it is advisable to hold scalars in registers
-   in a wider mode than that declared by the program.  In such cases, 
+   in a wider mode than that declared by the program.  In such cases,
    the value is constrained to be within the bounds of the declared
    type, but kept valid in the wider mode.  The signedness of the
    extension may differ from that of the type.  */
@@ -303,7 +303,7 @@
 #endif
 
 #define FIRST_PSEUDO_REGISTER (M32R_NUM_REGISTERS + SUBTARGET_NUM_REGISTERS)
-	
+
 /* 1 for registers that have pervasive standard uses
    and are not available for the register allocator.
 
diff --git a/gcc/config/m32r/m32r.md b/gcc/config/m32r/m32r.md
index 9fa3378..393e0da 100644
--- a/gcc/config/m32r/m32r.md
+++ b/gcc/config/m32r/m32r.md
@@ -1,5 +1,5 @@
 ;; Machine description of the Renesas M32R cpu for GNU C compiler
-;; Copyright (C) 1996-2024 Free Software Foundation, Inc.
+;; Copyright (C) 1996-2025 Free Software Foundation, Inc.
 
 ;; This file is part of GCC.
 
@@ -1554,8 +1554,7 @@
   start_sequence ();
   emit_insn (gen_cmp_ltusi_insn (op1, const1_rtx));
   emit_insn (gen_movcc_insn (op0));
-  operands[3] = get_insns ();
-  end_sequence ();
+  operands[3] = end_sequence ();
 }")
 
 (define_insn "seq_insn"
@@ -1607,8 +1606,7 @@
 
   emit_insn (gen_cmp_ltusi_insn (op3, const1_rtx));
   emit_insn (gen_movcc_insn (op0));
-  operands[4] = get_insns ();
-  end_sequence ();
+  operands[4] = end_sequence ();
 }")
 
 (define_insn "sne_zero_insn"
diff --git a/gcc/config/m32r/m32r.opt b/gcc/config/m32r/m32r.opt
index d3a3026..d353e11 100644
--- a/gcc/config/m32r/m32r.opt
+++ b/gcc/config/m32r/m32r.opt
@@ -1,6 +1,6 @@
 ; Options for the Renesas M32R port of the compiler.
 
-; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/m32r/predicates.md b/gcc/config/m32r/predicates.md
index f93dfa2..8c1d49d 100644
--- a/gcc/config/m32r/predicates.md
+++ b/gcc/config/m32r/predicates.md
@@ -1,5 +1,5 @@
 ;; Predicate definitions for Renesas M32R.
-;; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/m32r/t-m32r b/gcc/config/m32r/t-m32r
index c3202e5..7e99d3b 100644
--- a/gcc/config/m32r/t-m32r
+++ b/gcc/config/m32r/t-m32r
@@ -1,4 +1,4 @@
-# Copyright (C) 1997-2024 Free Software Foundation, Inc.
+# Copyright (C) 1997-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/m68k/cf.md b/gcc/config/m68k/cf.md
index d26ba40..0216529 100644
--- a/gcc/config/m68k/cf.md
+++ b/gcc/config/m68k/cf.md
@@ -1,5 +1,5 @@
 ;; ColdFire V1, V2, V3 and V4/V4e DFA description.
-;; Copyright (C) 2007-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2007-2025 Free Software Foundation, Inc.
 ;; Contributed by CodeSourcery Inc., www.codesourcery.com
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/m68k/constraints.md b/gcc/config/m68k/constraints.md
index 81bf728..7412a2f 100644
--- a/gcc/config/m68k/constraints.md
+++ b/gcc/config/m68k/constraints.md
@@ -1,5 +1,5 @@
 ;; Constraint definitions for m68k
-;; Copyright (C) 2007-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2007-2025 Free Software Foundation, Inc.
 
 ;; This file is part of GCC.
 
diff --git a/gcc/config/m68k/genopt.sh b/gcc/config/m68k/genopt.sh
index c0615d3..286da7b 100755
--- a/gcc/config/m68k/genopt.sh
+++ b/gcc/config/m68k/genopt.sh
@@ -1,6 +1,6 @@
 #!/bin/sh
 # Generate m68k-tables.opt from the lists in *.def.
-# Copyright (C) 2011-2024 Free Software Foundation, Inc.
+# Copyright (C) 2011-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
@@ -23,7 +23,7 @@ cat <<EOF
 ; Generated automatically by genopt.sh from m68k-devices.def,
 ; m68k-isas.def and m68k-microarchs.def.
 
-; Copyright (C) 2011-2024 Free Software Foundation, Inc.
+; Copyright (C) 2011-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/m68k/ieee.opt b/gcc/config/m68k/ieee.opt
index ee69b6c..82762c4 100644
--- a/gcc/config/m68k/ieee.opt
+++ b/gcc/config/m68k/ieee.opt
@@ -1,6 +1,6 @@
 ; Extra IEEE options for the Motorola 68000 port of the compiler.
 
-; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/m68k/linux.h b/gcc/config/m68k/linux.h
index b711f49..0cf5e13 100644
--- a/gcc/config/m68k/linux.h
+++ b/gcc/config/m68k/linux.h
@@ -1,6 +1,6 @@
 /* Definitions for Motorola 68k running Linux-based GNU systems with
    ELF format.
-   Copyright (C) 1995-2024 Free Software Foundation, Inc.
+   Copyright (C) 1995-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -90,7 +90,7 @@ along with GCC; see the file COPYING3.  If not see
 
 /* Currently, JUMP_TABLES_IN_TEXT_SECTION must be defined in order to
    keep switch tables in the text section.  */
-   
+
 #define JUMP_TABLES_IN_TEXT_SECTION 1
 
 /* Use the default action for outputting the case label.  */
diff --git a/gcc/config/m68k/m68020-elf.h b/gcc/config/m68k/m68020-elf.h
index 3bac5df..b3f2a6b 100644
--- a/gcc/config/m68k/m68020-elf.h
+++ b/gcc/config/m68k/m68020-elf.h
@@ -1,6 +1,6 @@
 /* Definitions of target machine for GNU compiler.  "naked" 68020,
    elf object files and debugging, version.
-   Copyright (C) 1987-2024 Free Software Foundation, Inc.
+   Copyright (C) 1987-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/m68k/m68k-devices.def b/gcc/config/m68k/m68k-devices.def
index d8d46af..c773eed 100644
--- a/gcc/config/m68k/m68k-devices.def
+++ b/gcc/config/m68k/m68k-devices.def
@@ -1,5 +1,5 @@
 /* m68k device names -*- C -*-
-   Copyright (C) 2005-2024 Free Software Foundation, Inc.
+   Copyright (C) 2005-2025 Free Software Foundation, Inc.
    Written by CodeSourcery
 
    This file is part of GCC.
diff --git a/gcc/config/m68k/m68k-isas.def b/gcc/config/m68k/m68k-isas.def
index 169981d..d452b8c 100644
--- a/gcc/config/m68k/m68k-isas.def
+++ b/gcc/config/m68k/m68k-isas.def
@@ -1,5 +1,5 @@
 /* m68k ISA names.
-   Copyright (C) 1987-2024 Free Software Foundation, Inc.
+   Copyright (C) 1987-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/m68k/m68k-microarchs.def b/gcc/config/m68k/m68k-microarchs.def
index 7170535..cfaa0fc 100644
--- a/gcc/config/m68k/m68k-microarchs.def
+++ b/gcc/config/m68k/m68k-microarchs.def
@@ -1,5 +1,5 @@
 /* m68k microarchitecture names.
-   Copyright (C) 1987-2024 Free Software Foundation, Inc.
+   Copyright (C) 1987-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/m68k/m68k-modes.def b/gcc/config/m68k/m68k-modes.def
index 78af5dc..a9a17a5 100644
--- a/gcc/config/m68k/m68k-modes.def
+++ b/gcc/config/m68k/m68k-modes.def
@@ -1,5 +1,5 @@
 /* M68k extra machine modes. 
-   Copyright (C) 2003-2024 Free Software Foundation, Inc.
+   Copyright (C) 2003-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/m68k/m68k-none.h b/gcc/config/m68k/m68k-none.h
index 51a9916..df48505 100644
--- a/gcc/config/m68k/m68k-none.h
+++ b/gcc/config/m68k/m68k-none.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler.  "naked" 68020.
-   Copyright (C) 1994-2024 Free Software Foundation, Inc.
+   Copyright (C) 1994-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/m68k/m68k-opts.h b/gcc/config/m68k/m68k-opts.h
index 485f6e1..49e62af 100644
--- a/gcc/config/m68k/m68k-opts.h
+++ b/gcc/config/m68k/m68k-opts.h
@@ -1,5 +1,5 @@
 /* Definitions for option handling for Motorola 680x0/ColdFire.
-   Copyright (C) 1987-2024 Free Software Foundation, Inc.
+   Copyright (C) 1987-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/m68k/m68k-protos.h b/gcc/config/m68k/m68k-protos.h
index 75efbaf..9cba3be 100644
--- a/gcc/config/m68k/m68k-protos.h
+++ b/gcc/config/m68k/m68k-protos.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler.  Sun 68000/68020 version.
-   Copyright (C) 2000-2024 Free Software Foundation, Inc.
+   Copyright (C) 2000-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/m68k/m68k-tables.opt b/gcc/config/m68k/m68k-tables.opt
index 4c97849..43c791e 100644
--- a/gcc/config/m68k/m68k-tables.opt
+++ b/gcc/config/m68k/m68k-tables.opt
@@ -2,7 +2,7 @@
 ; Generated automatically by genopt.sh from m68k-devices.def,
 ; m68k-isas.def and m68k-microarchs.def.
 
-; Copyright (C) 2011-2024 Free Software Foundation, Inc.
+; Copyright (C) 2011-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/m68k/m68k.cc b/gcc/config/m68k/m68k.cc
index 21c9498..800a385 100644
--- a/gcc/config/m68k/m68k.cc
+++ b/gcc/config/m68k/m68k.cc
@@ -1,5 +1,5 @@
 /* Subroutines for insn-output.cc for Motorola 68000 family.
-   Copyright (C) 1987-2024 Free Software Foundation, Inc.
+   Copyright (C) 1987-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -382,6 +382,9 @@ TARGET_GNU_ATTRIBUTES (m68k_attribute_table,
     m68k_handle_fndecl_attribute, NULL }
 });
 
+#undef TARGET_DOCUMENTATION_NAME
+#define TARGET_DOCUMENTATION_NAME "m68k"
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 /* Base flags for 68k ISAs.  */
@@ -772,7 +775,7 @@ m68k_get_function_kind (tree func)
   tree a;
 
   gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
-  
+
   a = lookup_attribute ("interrupt", DECL_ATTRIBUTES (func));
   if (a != NULL_TREE)
     return m68k_fk_interrupt_handler;
@@ -1400,7 +1403,7 @@ static bool
 m68k_ok_for_sibcall_p (tree decl, tree exp)
 {
   enum m68k_function_kind kind;
-  
+
   /* We cannot use sibcalls for nested functions because we use the
      static chain register for indirect calls.  */
   if (CALL_EXPR_STATIC_CHAIN (exp))
@@ -1436,7 +1439,7 @@ m68k_ok_for_sibcall_p (tree decl, tree exp)
      the same.  */
   if (decl && m68k_get_function_kind (decl) == kind)
     return true;
-  
+
   return false;
 }
 
@@ -1503,12 +1506,14 @@ m68k_legitimize_address (rtx x, rtx oldx, machine_mode mode)
 
 #define COPY_ONCE(Y) if (!copied) { Y = copy_rtx (Y); copied = ch = 1; }
 
-      if (GET_CODE (XEXP (x, 0)) == MULT)
+      if (GET_CODE (XEXP (x, 0)) == MULT
+	  || GET_CODE (XEXP (x, 0)) == ASHIFT)
 	{
 	  COPY_ONCE (x);
 	  XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
 	}
-      if (GET_CODE (XEXP (x, 1)) == MULT)
+      if (GET_CODE (XEXP (x, 1)) == MULT
+	  || GET_CODE (XEXP (x, 1)) == ASHIFT)
 	{
 	  COPY_ONCE (x);
 	  XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
@@ -1731,7 +1736,7 @@ m68k_asm_final_postscan_insn (FILE *, rtx_insn *insn, rtx [], int)
   return;
 }
 
-/* Output a dbCC; jCC sequence.  Note we do not handle the 
+/* Output a dbCC; jCC sequence.  Note we do not handle the
    floating point version of this sequence (Fdbcc).
    OPERANDS are as in the two peepholes.  CODE is the code
    returned by m68k_output_branch_<mode>.  */
@@ -2069,16 +2074,29 @@ m68k_decompose_index (rtx x, bool strict_p, struct m68k_address *address)
 
   /* Check for a scale factor.  */
   scale = 1;
-  if ((TARGET_68020 || TARGET_COLDFIRE)
-      && GET_CODE (x) == MULT
-      && GET_CODE (XEXP (x, 1)) == CONST_INT
-      && (INTVAL (XEXP (x, 1)) == 2
-	  || INTVAL (XEXP (x, 1)) == 4
-	  || (INTVAL (XEXP (x, 1)) == 8
-	      && (TARGET_COLDFIRE_FPU || !TARGET_COLDFIRE))))
+  if (TARGET_68020 || TARGET_COLDFIRE)
     {
-      scale = INTVAL (XEXP (x, 1));
-      x = XEXP (x, 0);
+      if (GET_CODE (x) == MULT
+	  && GET_CODE (XEXP (x, 1)) == CONST_INT
+	  && (INTVAL (XEXP (x, 1)) == 2
+	      || INTVAL (XEXP (x, 1)) == 4
+	      || (INTVAL (XEXP (x, 1)) == 8
+		  && (TARGET_COLDFIRE_FPU || !TARGET_COLDFIRE))))
+	{
+	  scale = INTVAL (XEXP (x, 1));
+	  x = XEXP (x, 0);
+	}
+      /* LRA uses ASHIFT instead of MULT outside of MEM.  */
+      else if (GET_CODE (x) == ASHIFT
+	       && GET_CODE (XEXP (x, 1)) == CONST_INT
+	       && (INTVAL (XEXP (x, 1)) == 1
+		   || INTVAL (XEXP (x, 1)) == 2
+		   || (INTVAL (XEXP (x, 1)) == 3
+		       && (TARGET_COLDFIRE_FPU || !TARGET_COLDFIRE))))
+	{
+	  scale = 1 << INTVAL (XEXP (x, 1));
+	  x = XEXP (x, 0);
+	}
     }
 
   /* Check for a word extension.  */
@@ -2246,8 +2264,10 @@ m68k_decompose_address (machine_mode mode, rtx x,
      ??? do_tablejump creates these addresses before placing the target
      label, so we have to assume that unplaced labels are jump table
      references.  It seems unlikely that we would ever generate indexed
-     accesses to unplaced labels in other cases.  */
+     accesses to unplaced labels in other cases.  Do not accept it in
+     PIC mode, since the label address will need to be loaded from memory.  */
   if (GET_CODE (x) == PLUS
+      && !flag_pic
       && m68k_jump_table_ref_p (XEXP (x, 1))
       && m68k_decompose_index (XEXP (x, 0), strict_p, address))
     {
@@ -2335,7 +2355,8 @@ m68k_legitimate_mem_p (rtx x, struct m68k_address *address)
 {
   return (MEM_P (x)
 	  && m68k_decompose_address (GET_MODE (x), XEXP (x, 0),
-				     reload_in_progress || reload_completed,
+				     (reload_in_progress || lra_in_progress
+				      || reload_completed),
 				     address));
 }
 
@@ -2610,19 +2631,19 @@ m68k_wrap_symbol_into_got_ref (rtx x, enum m68k_reloc reloc, rtx temp_reg)
 /* Legitimize PIC addresses.  If the address is already
    position-independent, we return ORIG.  Newly generated
    position-independent addresses go to REG.  If we need more
-   than one register, we lose.  
+   than one register, we lose.
 
    An address is legitimized by making an indirect reference
    through the Global Offset Table with the name of the symbol
-   used as an offset.  
+   used as an offset.
 
-   The assembler and linker are responsible for placing the 
+   The assembler and linker are responsible for placing the
    address of the symbol in the GOT.  The function prologue
    is responsible for initializing a5 to the starting address
    of the GOT.
 
    The assembler is also responsible for translating a symbol name
-   into a constant displacement from the start of the GOT.  
+   into a constant displacement from the start of the GOT.
 
    A quick example may make things a little clearer:
 
@@ -2642,9 +2663,9 @@ m68k_wrap_symbol_into_got_ref (rtx x, enum m68k_reloc reloc, rtx temp_reg)
 
 	movel   a5@(_foo:w), a0
 	movel   #12345, a0@
-   
 
-   That (in a nutshell) is how *all* symbol and label references are 
+
+   That (in a nutshell) is how *all* symbol and label references are
    handled.  */
 
 rtx
@@ -2673,7 +2694,7 @@ legitimize_pic_address (rtx orig, machine_mode mode ATTRIBUTE_UNUSED,
 
       /* legitimize both operands of the PLUS */
       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
-      
+
       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
       orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
 				     base == reg ? 0 : reg);
@@ -2735,15 +2756,14 @@ m68k_call_tls_get_addr (rtx x, rtx eqv, enum m68k_reloc reloc)
      is the simpliest way of generating a call.  The difference between
      __tls_get_addr() and libcall is that the result is returned in D0
      instead of A0.  To workaround this, we use m68k_libcall_value_in_a0_p
-     which temporarily switches returning the result to A0.  */ 
+     which temporarily switches returning the result to A0.  */
 
   m68k_libcall_value_in_a0_p = true;
   a0 = emit_library_call_value (m68k_get_tls_get_addr (), NULL_RTX, LCT_PURE,
 				Pmode, x, Pmode);
   m68k_libcall_value_in_a0_p = false;
-  
-  insns = get_insns ();
-  end_sequence ();
+
+  insns = end_sequence ();
 
   gcc_assert (can_create_pseudo_p ());
   dest = gen_reg_rtx (Pmode);
@@ -2769,7 +2789,7 @@ m68k_get_m68k_read_tp (void)
 /* Emit instruction sequence that calls __m68k_read_tp.
    A pseudo register with result of __m68k_read_tp call is returned.  */
 
-static rtx 
+static rtx
 m68k_call_m68k_read_tp (void)
 {
   rtx a0;
@@ -2783,15 +2803,14 @@ m68k_call_m68k_read_tp (void)
      is the simpliest way of generating a call.  The difference between
      __m68k_read_tp() and libcall is that the result is returned in D0
      instead of A0.  To workaround this, we use m68k_libcall_value_in_a0_p
-     which temporarily switches returning the result to A0.  */ 
+     which temporarily switches returning the result to A0.  */
 
   /* Emit the call sequence.  */
   m68k_libcall_value_in_a0_p = true;
   a0 = emit_library_call_value (m68k_get_m68k_read_tp (), NULL_RTX, LCT_PURE,
 				Pmode);
   m68k_libcall_value_in_a0_p = false;
-  insns = get_insns ();
-  end_sequence ();
+  insns = end_sequence ();
 
   /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
      share the m68k_read_tp result with other IE/LE model accesses.  */
@@ -2822,7 +2841,7 @@ m68k_legitimize_tls_address (rtx orig)
 	rtx eqv;
 	rtx a0;
 	rtx x;
- 
+
 	/* Attach a unique REG_EQUIV, to allow the RTL optimizers to
 	   share the LDM result with other LD model accesses.  */
 	eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
@@ -3068,12 +3087,17 @@ m68k_rtx_costs (rtx x, machine_mode mode, int outer_code,
       /* An lea costs about three times as much as a simple add.  */
       if (mode == SImode
 	  && GET_CODE (XEXP (x, 1)) == REG
-	  && GET_CODE (XEXP (x, 0)) == MULT
-	  && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
-	  && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
-	  && (INTVAL (XEXP (XEXP (x, 0), 1)) == 2
-	      || INTVAL (XEXP (XEXP (x, 0), 1)) == 4
-	      || INTVAL (XEXP (XEXP (x, 0), 1)) == 8))
+	  && ((GET_CODE (XEXP (x, 0)) == MULT
+	       && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
+	       && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
+	       && (INTVAL (XEXP (XEXP (x, 0), 1)) == 2
+		   || INTVAL (XEXP (XEXP (x, 0), 1)) == 4
+		   || INTVAL (XEXP (XEXP (x, 0), 1)) == 8))
+	      || (GET_CODE (XEXP (x, 0)) == ASHIFT
+		  && GET_CODE (XEXP (XEXP (x, 0), 0)) == REG
+		  && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
+		  && ((unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1))
+		      <= 3))))
 	{
 	    /* lea an@(dx:l:i),am */
 	    *total = COSTS_N_INSNS (TARGET_COLDFIRE ? 2 : 3);
@@ -3877,11 +3901,13 @@ emit_move_sequence (rtx *operands, machine_mode mode, rtx scratch_reg)
   rtx tem;
 
   if (scratch_reg
-      && reload_in_progress && GET_CODE (operand0) == REG
+      && (reload_in_progress || lra_in_progress)
+      && GET_CODE (operand0) == REG
       && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
     operand0 = reg_equiv_mem (REGNO (operand0));
   else if (scratch_reg
-	   && reload_in_progress && GET_CODE (operand0) == SUBREG
+	   && (reload_in_progress || lra_in_progress)
+	   && GET_CODE (operand0) == SUBREG
 	   && GET_CODE (SUBREG_REG (operand0)) == REG
 	   && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
     {
@@ -3894,11 +3920,13 @@ emit_move_sequence (rtx *operands, machine_mode mode, rtx scratch_reg)
     }
 
   if (scratch_reg
-      && reload_in_progress && GET_CODE (operand1) == REG
+      && (reload_in_progress || lra_in_progress)
+      && GET_CODE (operand1) == REG
       && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
     operand1 = reg_equiv_mem (REGNO (operand1));
   else if (scratch_reg
-	   && reload_in_progress && GET_CODE (operand1) == SUBREG
+	   && (reload_in_progress || lra_in_progress)
+	   && GET_CODE (operand1) == SUBREG
 	   && GET_CODE (SUBREG_REG (operand1)) == REG
 	   && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
     {
@@ -3910,11 +3938,13 @@ emit_move_sequence (rtx *operands, machine_mode mode, rtx scratch_reg)
       operand1 = alter_subreg (&temp, true);
     }
 
-  if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
+  if (scratch_reg && (reload_in_progress || lra_in_progress)
+      && GET_CODE (operand0) == MEM
       && ((tem = find_replacement (&XEXP (operand0, 0)))
 	  != XEXP (operand0, 0)))
     operand0 = gen_rtx_MEM (GET_MODE (operand0), tem);
-  if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
+  if (scratch_reg && (reload_in_progress || lra_in_progress)
+      && GET_CODE (operand1) == MEM
       && ((tem = find_replacement (&XEXP (operand1, 0)))
 	  != XEXP (operand1, 0)))
     operand1 = gen_rtx_MEM (GET_MODE (operand1), tem);
@@ -4819,7 +4849,7 @@ output_move_const_single (rtx *operands)
    to get the desired constant.  */
 
 /* This code has been fixed for cross-compilation.  */
-  
+
 static int inited_68881_table = 0;
 
 static const char *const strings_68881[7] = {
@@ -4887,7 +4917,7 @@ standard_68881_constant_p (rtx x)
       if (real_identical (r, &values_68881[i]))
         return (codes_68881[i]);
     }
-  
+
   if (GET_MODE (x) == SFmode)
     return 0;
 
@@ -5176,7 +5206,7 @@ m68k_delegitimize_address (rtx orig_x)
   unspec = XEXP (addr.offset, 0);
   if (GET_CODE (unspec) == PLUS && CONST_INT_P (XEXP (unspec, 1)))
     unspec = XEXP (unspec, 0);
-  if (GET_CODE (unspec) != UNSPEC 
+  if (GET_CODE (unspec) != UNSPEC
       || (XINT (unspec, 1) != UNSPEC_RELOC16
 	  && XINT (unspec, 1) != UNSPEC_RELOC32))
     return orig_x;
@@ -5197,7 +5227,7 @@ m68k_delegitimize_address (rtx orig_x)
     x = replace_equiv_address_nv (orig_x, x);
   return x;
 }
-  
+
 
 /* A C compound statement to output to stdio stream STREAM the
    assembler syntax for an instruction operand that is a memory
@@ -5211,7 +5241,7 @@ m68k_delegitimize_address (rtx orig_x)
    It is possible for PIC to generate a (plus (label_ref...) (reg...))
    and we handle that just like we would a (plus (symbol_ref...) (reg...)).
 
-   This routine is responsible for distinguishing between -fpic and -fPIC 
+   This routine is responsible for distinguishing between -fpic and -fPIC
    style relocations in an address.  When generating -fpic code the
    offset is output in word mode (e.g. movel a5@(_foo:w), a0).  When generating
    -fPIC code the offset is output in long mode (e.g. movel a5@(_foo:l), a0) */
@@ -6632,7 +6662,7 @@ m68k_sched_variable_issue (FILE *sched_dump ATTRIBUTE_UNUSED,
 
 	case CPU_CFV3:
 	  insn_size = sched_get_attr_size_int (insn);
-	  
+
 	  /* ColdFire V3 and V4 cores have instruction buffers that can
 	     accumulate up to 8 instructions regardless of instructions'
 	     sizes.  So we should take care not to "prefetch" 24 one-word
@@ -6767,8 +6797,7 @@ m68k_sched_md_init_global (FILE *sched_dump ATTRIBUTE_UNUSED,
 
   start_sequence ();
   emit_insn (gen_ib ());
-  sched_ib.insn = get_insns ();
-  end_sequence ();
+  sched_ib.insn = end_sequence ();
 }
 
 /* Scheduling pass is now finished.  Free/reset static variables.  */
diff --git a/gcc/config/m68k/m68k.h b/gcc/config/m68k/m68k.h
index 0d57199..05e6ca7 100644
--- a/gcc/config/m68k/m68k.h
+++ b/gcc/config/m68k/m68k.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GCC for Motorola 680x0/ColdFire.
-   Copyright (C) 1987-2024 Free Software Foundation, Inc.
+   Copyright (C) 1987-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/m68k/m68k.md b/gcc/config/m68k/m68k.md
index e5c2528..c96937f 100644
--- a/gcc/config/m68k/m68k.md
+++ b/gcc/config/m68k/m68k.md
@@ -1,5 +1,5 @@
 ;;- Machine description for GNU compiler, Motorola 68000 Version
-;;  Copyright (C) 1987-2024 Free Software Foundation, Inc.
+;;  Copyright (C) 1987-2025 Free Software Foundation, Inc.
 
 ;; This file is part of GCC.
 
@@ -957,11 +957,12 @@
       /* The source is an address which requires PIC relocation.
          Call legitimize_pic_address with the source, mode, and a relocation
          register (a new pseudo, or the final destination if reload_in_progress
-         is set).   Then fall through normally */
-      rtx temp = reload_in_progress ? operands[0] : gen_reg_rtx (Pmode);
+         or lra_in_progress is set).   Then fall through normally */
+      rtx temp = ((reload_in_progress || lra_in_progress)
+		  ? operands[0] : gen_reg_rtx (Pmode));
       operands[1] = legitimize_pic_address (operands[1], SImode, temp);
     }
-  else if (flag_pic && TARGET_PCREL && ! reload_in_progress)
+  else if (flag_pic && TARGET_PCREL && ! (reload_in_progress || lra_in_progress))
     {
       /* Don't allow writes to memory except via a register;
 	 the m68k doesn't consider PC-relative addresses to be writable.  */
@@ -1353,8 +1354,8 @@
 })
 
 (define_insn ""
-  [(set (match_operand:DF 0 "nonimmediate_operand" "=rm,rf,rf,&rof<>")
-	(match_operand:DF 1 "general_operand" "*rf,m,0,*rofE<>"))]
+  [(set (match_operand:DF 0 "nonimmediate_operand" "=rm,rf,rf,&rof<>,&rf<>")
+	(match_operand:DF 1 "general_operand" "*rf,m,0,*rfE<>,*rofE<>"))]
 ;  [(set (match_operand:DF 0 "nonimmediate_operand" "=rm,&rf,&rof<>")
 ;	(match_operand:DF 1 "general_operand" "rf,m,rofF<>"))]
   "!TARGET_COLDFIRE"
@@ -1452,7 +1453,7 @@
   ""
 {
   /* We can't rewrite operands during reload.  */
-  if (! reload_in_progress)
+  if (! (reload_in_progress || lra_in_progress))
     {
       if (CONSTANT_P (operands[1]))
 	{
@@ -1513,8 +1514,8 @@
   [(set_attr "flags_valid" "move")])
 
 (define_insn ""
-  [(set (match_operand:XF 0 "nonimmediate_operand" "=rm,rf,&rof<>")
-	(match_operand:XF 1 "nonimmediate_operand" "rf,m,rof<>"))]
+  [(set (match_operand:XF 0 "nonimmediate_operand" "=rm,rf,&rof<>,&rf<>")
+	(match_operand:XF 1 "nonimmediate_operand" "rf,m,rf<>,rof<>"))]
   "! TARGET_68881 && ! TARGET_COLDFIRE"
 {
   if (FP_REG_P (operands[0]))
@@ -1567,8 +1568,8 @@
 ;; movdi can apply to fp regs in some cases
 (define_insn ""
   ;; Let's see if it really still needs to handle fp regs, and, if so, why.
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r,&ro<>")
-	(match_operand:DI 1 "general_operand" "rF,m,roi<>F"))]
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,r,&ro<>,&r<>")
+	(match_operand:DI 1 "general_operand" "rF,m,ri<>F,roi<>F"))]
 ;  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,&r,&ro<>,!&rm,!&f")
 ;	(match_operand:DI 1 "general_operand" "r,m,roi<>,fF"))]
 ;  [(set (match_operand:DI 0 "nonimmediate_operand" "=rm,&rf,&ro<>,!&rm,!&f")
@@ -4097,8 +4098,7 @@
       emit_move_insn (operand_subword (operands[0], 1, 1, DFmode),
 		      operand_subword_force (operands[1], 1, DFmode));
 
-      insns = get_insns ();
-      end_sequence ();
+      insns = end_sequence ();
 
       emit_insn (insns);
       DONE;
@@ -4131,8 +4131,7 @@
       emit_move_insn (operand_subword (operands[0], 2, 1, XFmode),
 		      operand_subword_force (operands[1], 2, XFmode));
 
-      insns = get_insns ();
-      end_sequence ();
+      insns = end_sequence ();
 
       emit_insn (insns);
       DONE;
@@ -4250,8 +4249,7 @@
       emit_move_insn (operand_subword (operands[0], 1, 1, DFmode),
 		      operand_subword_force (operands[1], 1, DFmode));
 
-      insns = get_insns ();
-      end_sequence ();
+      insns = end_sequence ();
 
       emit_insn (insns);
       DONE;
@@ -4284,8 +4282,7 @@
       emit_move_insn (operand_subword (operands[0], 2, 1, XFmode),
 		      operand_subword_force (operands[1], 2, XFmode));
 
-      insns = get_insns ();
-      end_sequence ();
+      insns = end_sequence ();
 
       emit_insn (insns);
       DONE;
diff --git a/gcc/config/m68k/m68k.opt b/gcc/config/m68k/m68k.opt
index 1b393f5..35f86ba 100644
--- a/gcc/config/m68k/m68k.opt
+++ b/gcc/config/m68k/m68k.opt
@@ -1,6 +1,6 @@
 ; Options for the Motorola 68000 port of the compiler.
 
-; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/m68k/m68kelf.h b/gcc/config/m68k/m68kelf.h
index 0af1951..2358b61 100644
--- a/gcc/config/m68k/m68kelf.h
+++ b/gcc/config/m68k/m68kelf.h
@@ -1,7 +1,7 @@
 /* m68kelf support, derived from m68kv4.h */
 
 /* Target definitions for GNU compiler for mc680x0 running System V.4
-   Copyright (C) 1991-2024 Free Software Foundation, Inc.
+   Copyright (C) 1991-2025 Free Software Foundation, Inc.
 
    Written by Ron Guilmette (rfg@netcom.com) and Fred Fish (fnf@cygnus.com).
 
@@ -104,7 +104,7 @@ do {								\
 #define DEBUGGER_REGNO(REGNO) (REGNO)
 
 #if 0
-/* SVR4 m68k assembler is bitching on the `comm i,1,1' which askes for 
+/* SVR4 m68k assembler is bitching on the `comm i,1,1' which askes for
    1 byte alignment. Don't generate alignment for COMMON seems to be
    safer until we the assembler is fixed.  */
 #undef ASM_OUTPUT_ALIGNED_COMMON
@@ -126,7 +126,7 @@ do {								\
 
 /* Currently, JUMP_TABLES_IN_TEXT_SECTION must be defined in order to
    keep switch tables in the text section.  */
-   
+
 #define JUMP_TABLES_IN_TEXT_SECTION 1
 
 /* In m68k svr4, using swbeg is the standard way to do switch
diff --git a/gcc/config/m68k/m68kemb.h b/gcc/config/m68k/m68kemb.h
index 85b55d9..ff63ae0 100644
--- a/gcc/config/m68k/m68kemb.h
+++ b/gcc/config/m68k/m68kemb.h
@@ -1,6 +1,6 @@
 /* Definitions of target machine for GNU compiler.  "embedded" 68XXX.
    This is meant to be included after m68k.h.
-   Copyright (C) 1994-2024 Free Software Foundation, Inc.  */
+   Copyright (C) 1994-2025 Free Software Foundation, Inc.  */
 
 /* Override the SVR4 ABI for this target.  */
 
diff --git a/gcc/config/m68k/netbsd-elf.h b/gcc/config/m68k/netbsd-elf.h
index 6fc5ad1..936512f 100644
--- a/gcc/config/m68k/netbsd-elf.h
+++ b/gcc/config/m68k/netbsd-elf.h
@@ -1,7 +1,7 @@
 /* Definitions of target machine for GNU compiler,
    for m68k (including m68010) NetBSD platforms using the
    ELF object format.
-   Copyright (C) 2002-2024 Free Software Foundation, Inc.
+   Copyright (C) 2002-2025 Free Software Foundation, Inc.
    Contributed by Wasabi Systems. Inc.
 
    This file is derived from <m68k/m68kv4.h>, <m68k/m68kelf.h>,
@@ -35,7 +35,7 @@ along with GCC; see the file COPYING3.  If not see
     }						\
   while (0)
 
-/* Don't try using XFmode on the 68010.  */ 
+/* Don't try using XFmode on the 68010.  */
 #undef LONG_DOUBLE_TYPE_MODE
 #define LONG_DOUBLE_TYPE_MODE (TARGET_68020 ? XFmode : DFmode)
 
diff --git a/gcc/config/m68k/openbsd.h b/gcc/config/m68k/openbsd.h
index f91464e..c945a14 100644
--- a/gcc/config/m68k/openbsd.h
+++ b/gcc/config/m68k/openbsd.h
@@ -1,5 +1,5 @@
 /* Configuration file for an m68k OpenBSD target.
-   Copyright (C) 1999-2024 Free Software Foundation, Inc.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/m68k/predicates.md b/gcc/config/m68k/predicates.md
index 46fc379..e6a73d3 100644
--- a/gcc/config/m68k/predicates.md
+++ b/gcc/config/m68k/predicates.md
@@ -1,5 +1,5 @@
 ;; Predicate definitions for Motorola 68000.
-;; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
@@ -237,6 +237,7 @@
 	  || (TARGET_68881
 	      && (!standard_68881_constant_p (op)
 		  || reload_in_progress
+		  || lra_in_progress
 		  || reload_completed)));
 })
 
diff --git a/gcc/config/m68k/print-sysroot-suffix.sh b/gcc/config/m68k/print-sysroot-suffix.sh
index fb8484f..bc23f03 100644
--- a/gcc/config/m68k/print-sysroot-suffix.sh
+++ b/gcc/config/m68k/print-sysroot-suffix.sh
@@ -1,5 +1,5 @@
 #!/bin/sh
-# Copyright (C) 2006-2024 Free Software Foundation, Inc.
+# Copyright (C) 2006-2025 Free Software Foundation, Inc.
 # This file is part of GCC.
 
 # GCC is free software; you can redistribute it and/or modify
diff --git a/gcc/config/m68k/rtemself.h b/gcc/config/m68k/rtemself.h
index 6e18f29..f946e7e 100644
--- a/gcc/config/m68k/rtemself.h
+++ b/gcc/config/m68k/rtemself.h
@@ -1,6 +1,6 @@
 /* Definitions for rtems targeting a Motorola m68k using elf.
    Copyright (C) 1999, 2000, 2002 National Research Council of Canada.
-   Copyright (C) 2007-2024 Free Software Foundation, Inc.
+   Copyright (C) 2007-2025 Free Software Foundation, Inc.
    Contributed by Charles-Antoine Gauthier (charles.gauthier@nrc.ca).
 
    This file is part of GCC.
diff --git a/gcc/config/m68k/sync.md b/gcc/config/m68k/sync.md
index 398cd74..a87be81 100644
--- a/gcc/config/m68k/sync.md
+++ b/gcc/config/m68k/sync.md
@@ -1,5 +1,5 @@
 ;; GCC machine description for m68k synchronization instructions.
-;; Copyright (C) 2011-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2011-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/m68k/t-linux b/gcc/config/m68k/t-linux
index 23c7d1d..18ad23b 100644
--- a/gcc/config/m68k/t-linux
+++ b/gcc/config/m68k/t-linux
@@ -1,4 +1,4 @@
-# Copyright (C) 2008-2024 Free Software Foundation, Inc.
+# Copyright (C) 2008-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/m68k/t-mlibs b/gcc/config/m68k/t-mlibs
index b7da743..e6e9fd3 100644
--- a/gcc/config/m68k/t-mlibs
+++ b/gcc/config/m68k/t-mlibs
@@ -1,6 +1,6 @@
 # multilibs  -*- mode:Makefile -*-
 #
-# Copyright (C) 2007-2024 Free Software Foundation, Inc.
+# Copyright (C) 2007-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/m68k/t-uclinux b/gcc/config/m68k/t-uclinux
index b16ffd6..694bd67 100644
--- a/gcc/config/m68k/t-uclinux
+++ b/gcc/config/m68k/t-uclinux
@@ -1,4 +1,4 @@
-# Copyright (C) 2003-2024 Free Software Foundation, Inc.
+# Copyright (C) 2003-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/m68k/uclinux.h b/gcc/config/m68k/uclinux.h
index 56ca146..fe662f0 100644
--- a/gcc/config/m68k/uclinux.h
+++ b/gcc/config/m68k/uclinux.h
@@ -2,7 +2,7 @@
    using ELF objects with special linker post-processing to produce FLAT
    executables.
 
-   Copyright (C) 2003-2024 Free Software Foundation, Inc.
+   Copyright (C) 2003-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/m68k/uclinux.opt b/gcc/config/m68k/uclinux.opt
index f9ba9a0..8a87714 100644
--- a/gcc/config/m68k/uclinux.opt
+++ b/gcc/config/m68k/uclinux.opt
@@ -1,6 +1,6 @@
 ; m68k/ColdFire uClinux options.
 
-; Copyright (C) 2011-2024 Free Software Foundation, Inc.
+; Copyright (C) 2011-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/mcore/constraints.md b/gcc/config/mcore/constraints.md
index fe99507..7ea0669 100644
--- a/gcc/config/mcore/constraints.md
+++ b/gcc/config/mcore/constraints.md
@@ -1,5 +1,5 @@
 ;; Constraint definitions for the Motorola MCore
-;; Copyright (C) 2011-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2011-2025 Free Software Foundation, Inc.
 
 ;; This file is part of GCC.
 
diff --git a/gcc/config/mcore/mcore-elf.h b/gcc/config/mcore/mcore-elf.h
index 6c522c7..c362387 100644
--- a/gcc/config/mcore/mcore-elf.h
+++ b/gcc/config/mcore/mcore-elf.h
@@ -1,5 +1,5 @@
-/* Definitions of MCore target. 
-   Copyright (C) 1998-2024 Free Software Foundation, Inc.
+/* Definitions of MCore target.
+   Copyright (C) 1998-2025 Free Software Foundation, Inc.
    Contributed by Cygnus Solutions.
 
 This file is part of GCC.
@@ -78,7 +78,7 @@ along with GCC; see the file COPYING3.  If not see
       ASM_OUTPUT_LABEL(FILE, NAME);				\
     }								\
   while (0)
- 
+
 /* Output the size directive for a decl in rest_of_decl_compilation
    in the case where we did not do so before the initializer.
    Once we find the error_mark_node, we know that the value of
@@ -121,5 +121,5 @@ along with GCC; see the file COPYING3.  If not see
 #define CTORS_SECTION_ASM_OP	"\t.section\t.ctors,\"aw\""
 #undef  DTORS_SECTION_ASM_OP
 #define DTORS_SECTION_ASM_OP	"\t.section\t.dtors,\"aw\""
-     
+
 #endif /* __MCORE_ELF_H__ */
diff --git a/gcc/config/mcore/mcore-protos.h b/gcc/config/mcore/mcore-protos.h
index 2b4c1d19..f98414f 100644
--- a/gcc/config/mcore/mcore-protos.h
+++ b/gcc/config/mcore/mcore-protos.h
@@ -1,5 +1,5 @@
 /* Prototypes for exported functions defined in mcore.cc
-   Copyright (C) 2000-2024 Free Software Foundation, Inc.
+   Copyright (C) 2000-2025 Free Software Foundation, Inc.
    Contributed by Nick Clifton (nickc@redhat.com)
 
    This file is part of GCC.
diff --git a/gcc/config/mcore/mcore.cc b/gcc/config/mcore/mcore.cc
index ee58c8f..c4fc145 100644
--- a/gcc/config/mcore/mcore.cc
+++ b/gcc/config/mcore/mcore.cc
@@ -1,5 +1,5 @@
 /* Output routines for Motorola MCore processor
-   Copyright (C) 1993-2024 Free Software Foundation, Inc.
+   Copyright (C) 1993-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -255,6 +255,9 @@ TARGET_GNU_ATTRIBUTES (mcore_attribute_table,
 #undef  TARGET_HAVE_SPECULATION_SAFE_VALUE
 #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
 
+#undef TARGET_DOCUMENTATION_NAME
+#define TARGET_DOCUMENTATION_NAME "MCORE"
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 /* Adjust the stack and return the number of bytes taken to do it.  */
@@ -293,12 +296,12 @@ output_stack_adjust (int direction, int size)
 	  emit_insn (gen_movsi (nval, val));
 	  val = nval;
 	}
-      
+
       if (direction > 0)
 	insn = gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, val);
       else
 	insn = gen_subsi3 (stack_pointer_rtx, stack_pointer_rtx, val);
-      
+
       emit_insn (insn);
     }
 }
@@ -311,7 +314,7 @@ calc_live_regs (int * count)
 {
   int reg;
   int live_regs_mask = 0;
-  
+
   * count = 0;
 
   for (reg = 0; reg < FIRST_PSEUDO_REGISTER; reg++)
@@ -336,7 +339,7 @@ mcore_print_operand_address (FILE * stream, machine_mode /*mode*/, rtx x)
     case REG:
       fprintf (stream, "(%s)", reg_names[REGNO (x)]);
       break;
-      
+
     case PLUS:
       {
 	rtx base = XEXP (x, 0);
@@ -463,25 +466,25 @@ mcore_const_costs (rtx exp, enum rtx_code code)
   HOST_WIDE_INT val = INTVAL (exp);
 
   /* Easy constants.  */
-  if (   CONST_OK_FOR_I (val)	
-      || CONST_OK_FOR_M (val)	
-      || CONST_OK_FOR_N (val)	
+  if (   CONST_OK_FOR_I (val)
+      || CONST_OK_FOR_M (val)
+      || CONST_OK_FOR_N (val)
       || (code == PLUS && CONST_OK_FOR_L (val)))
-    return 1;					
+    return 1;
   else if (code == AND
 	   && (   CONST_OK_FOR_M (~val)
 	       || CONST_OK_FOR_N (~val)))
     return 2;
-  else if (code == PLUS			
-	   && (   CONST_OK_FOR_I (-val)	
-	       || CONST_OK_FOR_M (-val)	
-	       || CONST_OK_FOR_N (-val)))	
-    return 2;						
+  else if (code == PLUS
+	   && (   CONST_OK_FOR_I (-val)
+	       || CONST_OK_FOR_M (-val)
+	       || CONST_OK_FOR_N (-val)))
+    return 2;
 
-  return 5;					
+  return 5;
 }
 
-/* What does an and instruction cost - we do this b/c immediates may 
+/* What does an and instruction cost - we do this b/c immediates may
    have been relaxed.   We want to ensure that cse will cse relaxed immeds
    out.  Otherwise we'll get bad code (multiple reloads of the same const).  */
 
@@ -494,7 +497,7 @@ mcore_and_cost (rtx x)
     return 2;
 
   val = INTVAL (XEXP (x, 1));
-   
+
   /* Do it directly.  */
   if (CONST_OK_FOR_K (val) || CONST_OK_FOR_M (~val))
     return 2;
@@ -530,7 +533,7 @@ mcore_ior_cost (rtx x)
   /* Takes two instructions to load.  */
   else if (TARGET_HARDLIT && mcore_const_ok_for_inline (val))
     return 4;
-  
+
   /* Takes a lrw to load.  */
   return 5;
 }
@@ -572,7 +575,7 @@ mcore_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
     case FIX:
       *total = COSTS_N_INSNS (100);
       return true;
-  
+
     default:
       return false;
     }
@@ -590,7 +593,7 @@ mcore_gen_compare (enum rtx_code code, rtx op0, rtx op1)
   if (GET_CODE (op1) == CONST_INT)
     {
       HOST_WIDE_INT val = INTVAL (op1);
-      
+
       switch (code)
 	{
 	case GTU:
@@ -610,12 +613,12 @@ mcore_gen_compare (enum rtx_code code, rtx op0, rtx op1)
 	      code = code == LE ? LT : GE;
 	    }
 	  break;
-	  
+
 	default:
 	  break;
 	}
     }
- 
+
   if (CONSTANT_P (op1) && GET_CODE (op1) != CONST_INT)
     op1 = force_reg (SImode, op1);
 
@@ -628,7 +631,7 @@ mcore_gen_compare (enum rtx_code code, rtx op0, rtx op1)
       code = NE;
       invert = true;
       /* FALLTHRU */
-      
+
     case NE:	/* Use normal condition, cmpne.  */
       if (GET_CODE (op1) == CONST_INT && ! CONST_OK_FOR_K (INTVAL (op1)))
 	op1 = force_reg (SImode, op1);
@@ -638,7 +641,7 @@ mcore_gen_compare (enum rtx_code code, rtx op0, rtx op1)
       code = GT;
       invert = true;
       /* FALLTHRU */
-      
+
     case GT:	/* Use normal condition, reversed cmplt.  */
       if (GET_CODE (op1) == CONST_INT)
 	op1 = force_reg (SImode, op1);
@@ -648,9 +651,9 @@ mcore_gen_compare (enum rtx_code code, rtx op0, rtx op1)
       code = LT;
       invert = true;
       /* FALLTHRU */
-      
+
     case LT:	/* Use normal condition, cmplt.  */
-      if (GET_CODE (op1) == CONST_INT && 
+      if (GET_CODE (op1) == CONST_INT &&
 	  /* covered by btsti x,31.  */
 	  INTVAL (op1) != 0 &&
 	  ! CONST_OK_FOR_J (INTVAL (op1)))
@@ -663,7 +666,7 @@ mcore_gen_compare (enum rtx_code code, rtx op0, rtx op1)
       code = LEU;
       invert = true;
       /* FALLTHRU */
-      
+
     case LEU:	/* Use normal condition, reversed cmphs.  */
       if (GET_CODE (op1) == CONST_INT && INTVAL (op1) != 0)
 	op1 = force_reg (SImode, op1);
@@ -673,7 +676,7 @@ mcore_gen_compare (enum rtx_code code, rtx op0, rtx op1)
       code = GEU;
       invert = true;
       /* FALLTHRU */
-      
+
     case GEU:	/* Use normal condition, cmphs.  */
       if (GET_CODE (op1) == CONST_INT && INTVAL (op1) != 0)
 	op1 = force_reg (SImode, op1);
@@ -712,13 +715,13 @@ mcore_output_call (rtx operands[], int index)
 {
   static char buffer[20];
   rtx addr = operands [index];
-  
+
   if (REG_P (addr))
     {
       if (TARGET_CG_DATA)
 	{
 	  gcc_assert (mcore_current_function_name);
-	  
+
 	  ASM_OUTPUT_CG_EDGE (asm_out_file, mcore_current_function_name,
 			      "unknown", 1);
 	}
@@ -731,11 +734,11 @@ mcore_output_call (rtx operands[], int index)
 	{
 	  gcc_assert (mcore_current_function_name);
 	  gcc_assert (GET_CODE (addr) == SYMBOL_REF);
-	  
+
 	  ASM_OUTPUT_CG_EDGE (asm_out_file, mcore_current_function_name,
 			      XSTR (addr, 0), 0);
 	}
-      
+
       sprintf (buffer, "jbsr\t%%%d", index);
     }
 
@@ -749,15 +752,15 @@ const_ok_for_mcore (HOST_WIDE_INT value)
 {
   if (value >= 0 && value <= 127)
     return 1;
-  
+
   /* Try exact power of two.  */
   if (CONST_OK_FOR_M (value))
     return 1;
-  
+
   /* Try exact power of two - 1.  */
   if (CONST_OK_FOR_N (value) && value != -1)
     return 1;
-  
+
   return 0;
 }
 
@@ -767,7 +770,7 @@ int
 mcore_const_ok_for_inline (HOST_WIDE_INT value)
 {
   HOST_WIDE_INT x, y;
-   
+
   return try_constant_tricks (value, & x, & y) > 0;
 }
 
@@ -778,12 +781,12 @@ mcore_const_trick_uses_not (HOST_WIDE_INT value)
 {
   HOST_WIDE_INT x, y;
 
-  return try_constant_tricks (value, & x, & y) == 2; 
-}       
+  return try_constant_tricks (value, & x, & y) == 2;
+}
 
 /* Try tricks to load a constant inline and return the trick number if
    success (0 is non-inlinable).
-  
+
    0: not inlinable
    1: single instruction (do the usual thing)
    2: single insn followed by a 'not'
@@ -805,8 +808,8 @@ try_constant_tricks (HOST_WIDE_INT value, HOST_WIDE_INT * x, HOST_WIDE_INT * y)
 
   if (const_ok_for_mcore (value))
     return 1;	/* Do the usual thing.  */
-  
-  if (! TARGET_HARDLIT) 
+
+  if (! TARGET_HARDLIT)
     return 0;
 
   if (const_ok_for_mcore (~value))
@@ -912,13 +915,13 @@ try_constant_tricks (HOST_WIDE_INT value, HOST_WIDE_INT * x, HOST_WIDE_INT * y)
 
       return 11;
     }
-  
+
   return 0;
 }
 
 /* Check whether reg is dead at first.  This is done by searching ahead
    for either the next use (i.e., reg is live), a death note, or a set of
-   reg.  Don't just use dead_or_set_p() since reload does not always mark 
+   reg.  Don't just use dead_or_set_p() since reload does not always mark
    deaths (especially if PRESERVE_DEATH_NOTES_REGNO_P is not defined). We
    can ignore subregs by extracting the actual register.  BRC  */
 
@@ -1032,11 +1035,11 @@ mcore_output_bseti (rtx dst, int mask)
       if ((mask & 0x1) == 0x1)
 	{
 	  out_operands[1] = GEN_INT (bit);
-	  
+
 	  output_asm_insn ("bseti\t%0,%1", out_operands);
 	}
       mask >>= 1;
-    }  
+    }
 
   return "";
 }
@@ -1056,12 +1059,12 @@ mcore_output_bclri (rtx dst, int mask)
       if ((mask & 0x1) == 0x0)
 	{
 	  out_operands[1] = GEN_INT (bit);
-	  
+
 	  output_asm_insn ("bclri\t%0,%1", out_operands);
 	}
-      
+
       mask >>= 1;
-    }  
+    }
 
   return "";
 }
@@ -1098,7 +1101,7 @@ mcore_output_cmov (rtx operands[], int cmp_t, const char * test)
 
   /* First output the test if folded into the pattern.  */
 
-  if (test) 
+  if (test)
     output_asm_insn (test, operands);
 
   /* Load the constant - for now, only support constants that can be
@@ -1111,7 +1114,7 @@ mcore_output_cmov (rtx operands[], int cmp_t, const char * test)
     output_asm_insn ("bgeni\t%0,%P1", out_operands);
   else if (CONST_OK_FOR_N (load_value))
     output_asm_insn ("bmaski\t%0,%N1", out_operands);
-   
+
   /* Output the constant adjustment.  */
   if (load_value > adjust_value)
     {
@@ -1131,7 +1134,7 @@ mcore_output_cmov (rtx operands[], int cmp_t, const char * test)
   return "";
 }
 
-/* Outputs the peephole for moving a constant that gets not'ed followed 
+/* Outputs the peephole for moving a constant that gets not'ed followed
    by an and (i.e. combine the not and the and into andn). BRC  */
 
 const char *
@@ -1152,15 +1155,15 @@ mcore_output_andn (rtx insn ATTRIBUTE_UNUSED, rtx operands[])
 
   if (x >= 0 && x <= 127)
     load_op = "movi\t%0,%1";
-  
+
   /* Try exact power of two.  */
   else if (CONST_OK_FOR_M (x))
     load_op = "bgeni\t%0,%P1";
-  
+
   /* Try exact power of two - 1.  */
   else if (CONST_OK_FOR_N (x))
     load_op = "bmaski\t%0,%N1";
-  
+
   else
     {
       load_op = "BADMOVI-andn\t%0, %1";
@@ -1193,14 +1196,14 @@ output_inline_const (machine_mode mode, rtx operands[])
      turned into lrw's.  Our caller uses try_constant_tricks to back
      off to an lrw rather than calling this routine.  */
   gcc_assert (trick_no != 0);
-  
+
   if (trick_no == 1)
     x = value;
 
   /* operands: 0 = dst, 1 = load immed., 2 = immed. adjustment.  */
   out_operands[0] = operands[0];
   out_operands[1] = GEN_INT (x);
-  
+
   if (trick_no > 2)
     out_operands[2] = GEN_INT (y);
 
@@ -1212,20 +1215,20 @@ output_inline_const (machine_mode mode, rtx operands[])
 
   if (x >= 0 && x <= 127)
     sprintf (load_op, "movi\t%s,%%1", dst_fmt);
-  
+
   /* Try exact power of two.  */
   else if (CONST_OK_FOR_M (x))
     sprintf (load_op, "bgeni\t%s,%%P1", dst_fmt);
-  
+
   /* Try exact power of two - 1.  */
   else if (CONST_OK_FOR_N (x))
     sprintf (load_op, "bmaski\t%s,%%N1", dst_fmt);
-  
+
   else
     {
       sprintf (load_op, "BADMOVI-inline_const %s, %%1", dst_fmt);
       gcc_unreachable ();
-    }      
+    }
 
   switch (trick_no)
     {
@@ -1266,7 +1269,7 @@ output_inline_const (machine_mode mode, rtx operands[])
     default:
       return "";
     }
-  
+
   output_asm_insn (buf, out_operands);
 
   return "";
@@ -1284,15 +1287,15 @@ mcore_output_move (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
   if (GET_CODE (dst) == REG)
     {
       if (GET_CODE (src) == REG)
-	{               
+	{
 	  if (REGNO (src) == CC_REG)            /* r-c */
-            return "mvc\t%0"; 
-	  else 
+            return "mvc\t%0";
+	  else
             return "mov\t%0,%1";                /* r-r*/
 	}
       else if (GET_CODE (src) == MEM)
 	{
-	  if (GET_CODE (XEXP (src, 0)) == LABEL_REF) 
+	  if (GET_CODE (XEXP (src, 0)) == LABEL_REF)
             return "lrw\t%0,[%1]";              /* a-R */
 	  else
 	    switch (GET_MODE (src))		/* r-m */
@@ -1310,7 +1313,7 @@ mcore_output_move (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
       else if (GET_CODE (src) == CONST_INT)
 	{
 	  HOST_WIDE_INT x, y;
-	  
+
 	  if (CONST_OK_FOR_I (INTVAL (src)))       /* r-I */
             return "movi\t%0,%1";
 	  else if (CONST_OK_FOR_M (INTVAL (src)))  /* r-M */
@@ -1319,7 +1322,7 @@ mcore_output_move (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
             return "bmaski\t%0,%N1\t// %1 %x1";
 	  else if (try_constant_tricks (INTVAL (src), &x, &y))     /* R-P */
             return output_inline_const (SImode, operands);  /* 1-2 insns */
-	  else 
+	  else
             return "lrw\t%0,%x1\t// %1";	/* Get it from literal pool.  */
 	}
       else
@@ -1357,7 +1360,7 @@ mcore_output_movedouble (rtx operands[], machine_mode mode ATTRIBUTE_UNUSED)
 	{
 	  int dstreg = REGNO (dst);
 	  int srcreg = REGNO (src);
-	  
+
 	  /* Ensure the second source not overwritten.  */
 	  if (srcreg + 1 == dstreg)
 	    return "mov	%R0,%R1\n\tmov	%0,%1";
@@ -1369,10 +1372,10 @@ mcore_output_movedouble (rtx operands[], machine_mode mode ATTRIBUTE_UNUSED)
 	  rtx memexp = XEXP (src, 0);
 	  int dstreg = REGNO (dst);
 	  int basereg = -1;
-	  
+
 	  if (GET_CODE (memexp) == LABEL_REF)
 	    return "lrw\t%0,[%1]\n\tlrw\t%R0,[%R1]";
-	  else if (GET_CODE (memexp) == REG) 
+	  else if (GET_CODE (memexp) == REG)
 	    basereg = REGNO (memexp);
 	  else if (GET_CODE (memexp) == PLUS)
 	    {
@@ -1391,7 +1394,7 @@ mcore_output_movedouble (rtx operands[], machine_mode mode ATTRIBUTE_UNUSED)
 	    {
 	      /* Just load them in reverse order.  */
 	      return "ldw\t%R0,%R1\n\tldw\t%0,%1";
-	      
+
 	      /* XXX: alternative: move basereg to basereg+1
 	         and then fall through.  */
 	    }
@@ -1449,7 +1452,7 @@ mcore_arith_S_operand (rtx op)
 {
   if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (~INTVAL (op)))
     return 1;
-  
+
   return 0;
 }
 
@@ -1484,7 +1487,7 @@ mcore_expand_insv (rtx operands[])
 				  gen_rtx_IOR (SImode, operands[0],
 					       GEN_INT (mask))));
 	}
-      
+
       return 1;
     }
 
@@ -1496,7 +1499,7 @@ mcore_expand_insv (rtx operands[])
   if (width == 8 && posn % 8 == 0)
     /* Byte sized and aligned; let caller break it up.  */
     return 0;
-  
+
   if (width == 16 && posn % 16 == 0)
     /* Short sized and aligned; let caller break it up.  */
     return 0;
@@ -1539,7 +1542,7 @@ mcore_expand_insv (rtx operands[])
      bits.  */
   if (width + posn != (int) GET_MODE_SIZE (SImode))
     {
-      ereg = force_reg (SImode, GEN_INT ((1 << width) - 1));      
+      ereg = force_reg (SImode, GEN_INT ((1 << width) - 1));
       emit_insn (gen_rtx_SET (sreg, gen_rtx_AND (SImode, sreg, ereg)));
     }
 
@@ -1547,7 +1550,7 @@ mcore_expand_insv (rtx operands[])
   if (posn != 0)
     emit_insn (gen_rtx_SET (sreg, gen_rtx_ASHIFT (SImode, sreg,
 						  GEN_INT (posn))));
-  
+
   emit_insn (gen_rtx_SET (operands[0],
 			  gen_rtx_IOR (SImode, operands[0], sreg)));
 
@@ -1630,7 +1633,7 @@ block_move_sequence (rtx dst_mem, rtx src_mem, int size, int align)
       if (active[phase])
 	{
 	  active[phase] = false;
-	  
+
 	  x = adjust_address (dst_mem, mode[phase], offset_st);
 	  emit_insn (gen_rtx_SET (x, temp[phase]));
 
@@ -1712,11 +1715,11 @@ layout_mcore_frame (struct mcore_frame * infp)
   /* Might have to spill bytes to re-assemble a big argument that
      was passed partially in registers and partially on the stack.  */
   nbytes = crtl->args.pretend_args_size;
-  
+
   /* Determine how much space for spilled anonymous args (e.g., stdarg).  */
   if (current_function_anonymous_args)
     nbytes += (NPARM_REGS - number_of_regs_before_varargs) * UNITS_PER_WORD;
-  
+
   infp->arg_size = nbytes;
 
   /* How much space to save non-volatile registers we stomp.  */
@@ -1730,7 +1733,7 @@ layout_mcore_frame (struct mcore_frame * infp)
   /* Make sure we have a whole number of words for the locals.  */
   if (infp->local_size % STACK_BYTES)
     infp->local_size = (infp->local_size + STACK_BYTES - 1) & ~ (STACK_BYTES -1);
-  
+
   /* Only thing we know we have to pad is the outbound space, since
      we've aligned our locals assuming that base of locals is aligned.  */
   infp->pad_local = 0;
@@ -1765,23 +1768,23 @@ layout_mcore_frame (struct mcore_frame * infp)
 
       step = localregarg + infp->pad_reg;
       infp->reg_offset = infp->local_size;
-      
+
       if (outbounds + step <= ADDI_REACH && !frame_pointer_needed)
 	{
 	  step += outbounds;
 	  infp->reg_offset += outbounds;
 	  outbounds = 0;
 	}
-      
+
       infp->arg_offset = step - 4;
       infp->growth[growths++] = step;
       infp->reg_growth = growths;
       infp->local_growth = growths;
-      
+
       /* If we haven't already folded it in.  */
       if (outbounds)
 	infp->growth[growths++] = outbounds;
-      
+
       goto finish;
     }
 
@@ -1803,7 +1806,7 @@ layout_mcore_frame (struct mcore_frame * infp)
       step = ADDI_REACH;	/* As much up front as we can.  */
       if (step > all)
 	step = all;
-      
+
       /* XXX: Consider whether step will still be aligned; we believe so.  */
       infp->arg_offset = step - 4;
       infp->growth[growths++] = step;
@@ -1829,7 +1832,7 @@ layout_mcore_frame (struct mcore_frame * infp)
       /* Finish off if we need to do so.  */
       if (outbounds)
 	infp->growth[growths++] = outbounds;
-      
+
       goto finish;
     }
 
@@ -1845,28 +1848,28 @@ layout_mcore_frame (struct mcore_frame * infp)
 
       if (infp->local_size % STACK_BYTES)
 	infp->pad_local = STACK_BYTES - (infp->local_size % STACK_BYTES);
-      
+
       step = infp->local_size + infp->pad_local;
-      
+
       if (!frame_pointer_needed)
 	{
 	  step += outbounds;
 	  outbounds = 0;
 	}
-      
+
       infp->growth[growths++] = step;
       infp->local_growth = growths;
 
       /* If there's any left to be done.  */
       if (outbounds)
 	infp->growth[growths++] = outbounds;
-      
+
       goto finish;
     }
 
   /* XXX: optimizations that we'll want to play with....
      -- regarg is not aligned, but it's a small number of registers;
-    	use some of localsize so that regarg is aligned and then 
+    	use some of localsize so that regarg is aligned and then
     	save the registers.  */
 
   /* Simple encoding; plods down the stack buying the pieces as it goes.
@@ -1875,27 +1878,27 @@ layout_mcore_frame (struct mcore_frame * infp)
      -- but it is safe for all alignments.  */
   if (regarg % STACK_BYTES != 0)
     infp->pad_reg = STACK_BYTES - (regarg % STACK_BYTES);
-  
+
   infp->growth[growths++] = infp->arg_size + infp->reg_size + infp->pad_reg;
   infp->reg_growth = growths;
   infp->arg_offset = infp->growth[0] - 4;
   infp->reg_offset = 0;
-  
+
   if (frame_pointer_needed)
     {
       if (infp->local_size % STACK_BYTES != 0)
 	infp->pad_local = STACK_BYTES - (infp->local_size % STACK_BYTES);
-      
+
       infp->growth[growths++] = infp->local_size + infp->pad_local;
       infp->local_growth = growths;
-      
+
       infp->growth[growths++] = outbounds;
     }
   else
     {
       if ((infp->local_size + outbounds) % STACK_BYTES != 0)
 	infp->pad_local = STACK_BYTES - ((infp->local_size + outbounds) % STACK_BYTES);
-      
+
       infp->growth[growths++] = infp->local_size + infp->pad_local + outbounds;
       infp->local_growth = growths;
     }
@@ -1904,7 +1907,7 @@ layout_mcore_frame (struct mcore_frame * infp)
  finish:
   gcc_assert (infp->reg_offset >= 0);
   gcc_assert (growths <= MAX_STACK_GROWS);
-  
+
   for (i = 0; i < growths; i++)
     gcc_assert (!(infp->growth[i] % STACK_BYTES));
 }
@@ -1956,12 +1959,12 @@ mcore_setup_incoming_varargs (cumulative_args_t args_so_far_v,
   number_of_regs_before_varargs = *args_so_far;
   if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl)))
     number_of_regs_before_varargs += mcore_num_arg_regs (arg.mode, arg.type);
-  
+
   /* There is a bug somewhere in the arg handling code.
      Until I can find it this workaround always pushes the
      last named argument onto the stack.  */
   number_of_regs_before_varargs = *args_so_far;
-  
+
   /* The last named argument may be split between argument registers
      and the stack.  Allow for this here.  */
   if (number_of_regs_before_varargs > NPARM_REGS)
@@ -1977,7 +1980,7 @@ mcore_expand_prolog (void)
 
   /* Find out what we're doing.  */
   layout_mcore_frame (&fi);
-  
+
   space_allocated = fi.arg_size + fi.reg_size + fi.local_size +
     fi.outbound_size + fi.pad_outbound + fi.pad_local + fi.pad_reg;
 
@@ -1987,17 +1990,17 @@ mcore_expand_prolog (void)
       rtx x;
 
       x = DECL_RTL (current_function_decl);
-      
+
       gcc_assert (GET_CODE (x) == MEM);
-      
+
       x = XEXP (x, 0);
-      
+
       gcc_assert (GET_CODE (x) == SYMBOL_REF);
-      
+
       free (mcore_current_function_name);
-      
+
       mcore_current_function_name = xstrdup (XSTR (x, 0));
-      
+
       ASM_OUTPUT_CG_NODE (asm_out_file, mcore_current_function_name, space_allocated);
 
       if (cfun->calls_alloca)
@@ -2017,7 +2020,7 @@ mcore_expand_prolog (void)
 
   if (mcore_naked_function_p ())
     return;
-  
+
   /* Handle stdarg+regsaves in one shot: can't be more than 64 bytes.  */
   output_stack_adjust (-1, fi.growth[growth++]);	/* Grows it.  */
 
@@ -2048,7 +2051,7 @@ mcore_expand_prolog (void)
     {
       int i;
       int offs = fi.reg_offset;
-      
+
       for (i = 15; i >= 0; i--)
         {
           if (offs == 0 && i == 15 && ((fi.reg_mask & 0xc000) == 0xc000))
@@ -2084,7 +2087,7 @@ mcore_expand_prolog (void)
       /* If we haven't already purchased to 'fp'.  */
       if (growth < fi.local_growth)
         output_stack_adjust (-1, fi.growth[growth++]);		/* Grows it.  */
-      
+
       emit_insn (gen_movsi (frame_pointer_rtx, stack_pointer_rtx));
 
       /* ... and then go any remaining distance for outbounds, etc.  */
@@ -2108,7 +2111,7 @@ mcore_expand_epilog (void)
   int offs;
   int growth = MAX_STACK_GROWS - 1 ;
 
-    
+
   /* Find out what we're doing.  */
   layout_mcore_frame(&fi);
 
@@ -2137,9 +2140,9 @@ mcore_expand_epilog (void)
      register save information back off the stack.  */
   while (growth >= fi.reg_growth)
     output_stack_adjust ( 1, fi.growth[growth--]);
-  
+
   offs = fi.reg_offset;
-  
+
   for (i = 15; i >= 0; i--)
     {
       if (offs == 0 && i == 15 && ((fi.reg_mask & 0xc000) == 0xc000))
@@ -2148,10 +2151,10 @@ mcore_expand_epilog (void)
 
 	  /* Find the starting register.  */
 	  first_reg = 15;
-	  
+
 	  while (fi.reg_mask & (1 << first_reg))
 	    first_reg--;
-	  
+
 	  first_reg++;
 
 	  emit_insn (gen_load_multiple (gen_rtx_REG (SImode, first_reg),
@@ -2257,16 +2260,16 @@ mcore_output_jump_label_table (void)
   if (pool_size)
     {
       fprintf (asm_out_file, "\t.align 2\n");
-      
+
       for (i = 0; i < pool_size; i++)
 	{
 	  pool_node * p = pool_vector + i;
 
 	  (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (p->label));
-	  
+
 	  output_asm_insn (".long	%0", &p->value);
 	}
-      
+
       pool_size = 0;
     }
 
@@ -2279,7 +2282,7 @@ static cond_type
 is_cond_candidate (rtx insn)
 {
   /* The only things we conditionalize are those that can be directly
-     changed into a conditional.  Only bother with SImode items.  If 
+     changed into a conditional.  Only bother with SImode items.  If
      we wanted to be a little more aggressive, we could also do other
      modes such as DImode with reg-reg move or load 0.  */
   if (NONJUMP_INSN_P (insn))
@@ -2296,7 +2299,7 @@ is_cond_candidate (rtx insn)
            GET_CODE (dst) != SUBREG) ||
 	  GET_MODE (dst) != SImode)
 	return COND_NO;
-  
+
       src = XEXP (pat, 1);
 
       if ((GET_CODE (src) == REG ||
@@ -2304,7 +2307,7 @@ is_cond_candidate (rtx insn)
 	    GET_CODE (SUBREG_REG (src)) == REG)) &&
 	  GET_MODE (src) == SImode)
 	return COND_MOV_INSN;
-      else if (GET_CODE (src) == CONST_INT && 
+      else if (GET_CODE (src) == CONST_INT &&
                INTVAL (src) == 0)
 	return COND_CLR_INSN;
       else if (GET_CODE (src) == PLUS &&
@@ -2330,7 +2333,7 @@ is_cond_candidate (rtx insn)
       /* Some insns that we don't bother with:
 	 (set (rx:DI) (ry:DI))
 	 (set (rx:DI) (const_int 0))
-      */            
+      */
 
     }
   else if (JUMP_P (insn)
@@ -2369,7 +2372,7 @@ emit_new_cond_insn (rtx_insn *insn, int cond)
 
   switch (num)
     {
-    case COND_MOV_INSN: 
+    case COND_MOV_INSN:
     case COND_CLR_INSN:
       if (cond)
 	c_insn = gen_movt0 (dst, src, dst);
@@ -2383,7 +2386,7 @@ emit_new_cond_insn (rtx_insn *insn, int cond)
       else
 	c_insn = gen_incscc_false (dst, dst);
       break;
-  
+
     case COND_DEC_INSN:
       if (cond)
 	c_insn = gen_decscc (dst, dst);
@@ -2411,7 +2414,7 @@ emit_new_cond_insn (rtx_insn *insn, int cond)
 	 used any more beyond this point for the mcore).  */
       REG_NOTES (c_insn) = REG_NOTES (insn);
     }
-  
+
   if (num == COND_BRANCH_INSN)
     {
       /* For jumps, we need to be a little bit careful and emit the new jump
@@ -2419,32 +2422,32 @@ emit_new_cond_insn (rtx_insn *insn, int cond)
          This way, the barrier following the old (uncond) jump will get
 	 deleted, but the label won't.  */
       c_insn = emit_jump_insn_before (c_insn, insn);
-      
+
       ++ LABEL_NUSES (dst);
-      
+
       JUMP_LABEL (c_insn) = dst;
     }
   else
     c_insn = emit_insn_after (c_insn, insn);
 
   delete_insn (insn);
-  
+
   return as_a <rtx_insn *> (c_insn);
 }
 
 /* Attempt to change a basic block into a series of conditional insns.  This
-   works by taking the branch at the end of the 1st block and scanning for the 
+   works by taking the branch at the end of the 1st block and scanning for the
    end of the 2nd block.  If all instructions in the 2nd block have cond.
    versions and the label at the start of block 3 is the same as the target
    from the branch at block 1, then conditionalize all insn in block 2 using
    the inverse condition of the branch at block 1.  (Note I'm bending the
    definition of basic block here.)
 
-   e.g., change:   
+   e.g., change:
 
 		bt	L2             <-- end of block 1 (delete)
-		mov	r7,r8          
-		addu	r7,1           
+		mov	r7,r8
+		addu	r7,1
 		br	L3             <-- end of block 2
 
 	L2:	...                    <-- start of block 3 (NUSES==1)
@@ -2473,7 +2476,7 @@ conditionalize_block (rtx_insn *first)
   int br_lab_num;
   int blk_size = 0;
 
-    
+
   /* Check that the first insn is a candidate conditional jump.  This is
      the one that we'll eliminate.  If not, advance to the next insn to
      try.  */
@@ -2506,12 +2509,12 @@ conditionalize_block (rtx_insn *first)
   /* Scan forward for the start of block 2: it must start with a
      label and that label must be the same as the branch target
      label from block 1.  We don't care about whether block 2 actually
-     ends with a branch or a label (an uncond. branch is 
+     ends with a branch or a label (an uncond. branch is
      conditionalizable).  */
   for (insn = NEXT_INSN (first); insn; insn = NEXT_INSN (insn))
     {
       enum rtx_code code;
-      
+
       code = GET_CODE (insn);
 
       /* Look for the label at the start of block 3.  */
@@ -2523,7 +2526,7 @@ conditionalize_block (rtx_insn *first)
          just return the next insn so we can start over from that point.  */
       if (code != BARRIER && code != NOTE && !is_cond_candidate (insn))
 	return NEXT_INSN (insn);
-     
+
       /* Remember the last real insn before the label (i.e. end of block 2).  */
       if (code == JUMP_INSN || code == INSN)
 	{
@@ -2534,16 +2537,16 @@ conditionalize_block (rtx_insn *first)
 
   if (!insn)
     return insn;
- 
-  /* It is possible for this optimization to slow performance if the blocks 
-     are long.  This really depends upon whether the branch is likely taken 
+
+  /* It is possible for this optimization to slow performance if the blocks
+     are long.  This really depends upon whether the branch is likely taken
      or not.  If the branch is taken, we slow performance in many cases.  But,
-     if the branch is not taken, we always help performance (for a single 
-     block, but for a double block (i.e. when the optimization is re-applied) 
+     if the branch is not taken, we always help performance (for a single
+     block, but for a double block (i.e. when the optimization is re-applied)
      this is not true since the 'right thing' depends on the overall length of
-     the collapsed block).  As a compromise, don't apply this optimization on 
+     the collapsed block).  As a compromise, don't apply this optimization on
      blocks larger than size 2 (unlikely for the mcore) when speed is important.
-     the best threshold depends on the latencies of the instructions (i.e., 
+     the best threshold depends on the latencies of the instructions (i.e.,
      the branch penalty).  */
   if (optimize > 1 && blk_size > 2)
     return insn;
@@ -2552,16 +2555,16 @@ conditionalize_block (rtx_insn *first)
      it is the destination of the branch from block 1.   Also, all
      instructions in the block 2 are conditionalizable.  So, apply the
      conditionalization and delete the branch.  */
-  start_blk_3_lab = insn;   
-   
-  for (insn = NEXT_INSN (end_blk_1_br); insn != start_blk_3_lab; 
+  start_blk_3_lab = insn;
+
+  for (insn = NEXT_INSN (end_blk_1_br); insn != start_blk_3_lab;
        insn = NEXT_INSN (insn))
     {
       rtx_insn *newinsn;
 
       if (insn->deleted ())
 	continue;
-      
+
       /* Try to form a conditional variant of the instruction and emit it.  */
       if ((newinsn = emit_new_cond_insn (insn, cond)))
 	{
@@ -2573,7 +2576,7 @@ conditionalize_block (rtx_insn *first)
     }
 
   /* Note whether we will delete the label starting blk 3 when the jump
-     gets deleted.  If so, we want to re-apply this optimization at the 
+     gets deleted.  If so, we want to re-apply this optimization at the
      last real instruction right before the label.  */
   if (LABEL_NUSES (start_blk_3_lab) == 1)
     {
@@ -2588,7 +2591,7 @@ conditionalize_block (rtx_insn *first)
 
   if (! start_blk_3_lab)
     return end_blk_2_insn;
-  
+
   /* Return the insn right after the label at the start of block 3.  */
   return NEXT_INSN (start_blk_3_lab);
 }
@@ -2597,8 +2600,8 @@ conditionalize_block (rtx_insn *first)
    outer loop that traverses through the insns scanning for a branch
    that signifies an opportunity to apply the optimization.  Note that
    this optimization is applied late.  If we could apply it earlier,
-   say before cse 2, it may expose more optimization opportunities.  
-   but, the pay back probably isn't really worth the effort (we'd have 
+   say before cse 2, it may expose more optimization opportunities.
+   but, the pay back probably isn't really worth the effort (we'd have
    to update all reg/flow/notes/links/etc to make it work - and stick it
    in before cse 2).  */
 
@@ -2618,10 +2621,10 @@ mcore_reorg (void)
 {
   /* Reset this variable.  */
   current_function_anonymous_args = 0;
-  
+
   if (optimize == 0)
     return;
-  
+
   /* Conditionalize blocks where we can.  */
   conditionalize_optimization ();
 
@@ -2687,7 +2690,7 @@ mcore_is_same_reg (rtx x, rtx y)
   /* Strip any and all of the subreg wrappers.  */
   while (GET_CODE (x) == SUBREG)
     x = SUBREG_REG (x);
-  
+
   while (GET_CODE (y) == SUBREG)
     y = SUBREG_REG (y);
 
@@ -2706,7 +2709,7 @@ mcore_option_override (void)
 }
 
 
-/* Compute the number of word sized registers needed to 
+/* Compute the number of word sized registers needed to
    hold a function argument of mode MODE and type TYPE.  */
 
 int
@@ -2745,11 +2748,11 @@ handle_structs_in_regs (machine_mode mode, const_tree type, int reg)
       && (size % UNITS_PER_WORD != 0)
       && (reg + mcore_num_arg_regs (mode, type) <= (FIRST_PARM_REG + NPARM_REGS)))
     {
-      rtx    arg_regs [NPARM_REGS]; 
+      rtx    arg_regs [NPARM_REGS];
       int    nregs;
       rtx    result;
       rtvec  rtvec;
-		     
+
       for (nregs = 0; size > 0; size -= UNITS_PER_WORD)
         {
           arg_regs [nregs] =
@@ -2762,11 +2765,11 @@ handle_structs_in_regs (machine_mode mode, const_tree type, int reg)
       gcc_assert (ARRAY_SIZE (arg_regs) == 6);
       rtvec = gen_rtvec (nregs, arg_regs[0], arg_regs[1], arg_regs[2],
 			  arg_regs[3], arg_regs[4], arg_regs[5]);
-      
+
       result = gen_rtx_PARALLEL (mode, rtvec);
       return result;
     }
-  
+
   return gen_rtx_REG (mode, reg);
 }
 
@@ -2775,12 +2778,12 @@ mcore_function_value (const_tree valtype, const_tree func)
 {
   machine_mode mode;
   int unsigned_p;
-  
+
   mode = TYPE_MODE (valtype);
 
   /* Since we promote return types, we must promote the mode here too.  */
   mode = promote_function_mode (valtype, mode, &unsigned_p, func, 1);
-  
+
   return handle_structs_in_regs (mode, valtype, FIRST_RET_REG);
 }
 
@@ -2801,7 +2804,7 @@ static rtx
 mcore_function_arg (cumulative_args_t cum, const function_arg_info &arg)
 {
   int arg_reg;
-  
+
   if (!arg.named || arg.end_marker_p ())
     return 0;
 
@@ -2809,7 +2812,7 @@ mcore_function_arg (cumulative_args_t cum, const function_arg_info &arg)
     return 0;
 
   arg_reg = ROUND_REG (*get_cumulative_args (cum), arg.mode);
-  
+
   if (arg_reg < NPARM_REGS)
     return handle_structs_in_regs (arg.mode, arg.type,
 				   FIRST_PARM_REG + arg_reg);
@@ -2852,7 +2855,7 @@ mcore_arg_partial_bytes (cumulative_args_t cum, const function_arg_info &arg)
 
   if (targetm.calls.must_pass_in_stack (arg))
     return 0;
-      
+
   /* REG is not the *hardware* register number of the register that holds
      the argument, it is the *argument* register number.  So for example,
      the first argument to a function goes in argument register 0, which
@@ -2904,12 +2907,12 @@ mcore_mark_dllexport (tree decl)
   tree   idp;
 
   rtlname = XEXP (DECL_RTL (decl), 0);
-  
+
   if (GET_CODE (rtlname) == MEM)
     rtlname = XEXP (rtlname, 0);
   gcc_assert (GET_CODE (rtlname) == SYMBOL_REF);
   oldname = XSTR (rtlname, 0);
-  
+
   if (mcore_dllexport_name_p (oldname))
     return;  /* Already done.  */
 
@@ -2939,12 +2942,12 @@ mcore_mark_dllimport (tree decl)
   rtx    newrtl;
 
   rtlname = XEXP (DECL_RTL (decl), 0);
-  
+
   if (GET_CODE (rtlname) == MEM)
     rtlname = XEXP (rtlname, 0);
   gcc_assert (GET_CODE (rtlname) == SYMBOL_REF);
   oldname = XSTR (rtlname, 0);
-  
+
   gcc_assert (!mcore_dllexport_name_p (oldname));
   if (mcore_dllimport_name_p (oldname))
     return; /* Already done.  */
@@ -2960,7 +2963,7 @@ mcore_mark_dllimport (tree decl)
       error ("initialized variable %q+D is marked dllimport", decl);
       return;
     }
-  
+
   /* `extern' needn't be specified with dllimport.
      Specify `extern' now and hope for the best.  Sigh.  */
   if (VAR_P (decl)
@@ -2981,9 +2984,7 @@ mcore_mark_dllimport (tree decl)
   /* ??? At least I think that's why we do this.  */
   idp = get_identifier (newname);
 
-  newrtl = gen_rtx_MEM (Pmode,
-		    gen_rtx_SYMBOL_REF (Pmode,
-			     IDENTIFIER_POINTER (idp)));
+  newrtl = gen_rtx_SYMBOL_REF (Pmode, IDENTIFIER_POINTER (idp));
   XEXP (DECL_RTL (decl), 0) = newrtl;
 }
 
@@ -3019,7 +3020,7 @@ mcore_encode_section_info (tree decl, rtx rtl ATTRIBUTE_UNUSED, int first ATTRIB
     mcore_mark_dllexport (decl);
   else if (mcore_dllimport_p (decl))
     mcore_mark_dllimport (decl);
-  
+
   /* It might be that DECL has already been marked as dllimport, but
      a subsequent definition nullified that.  The attribute is gone
      but DECL_RTL still has @i.__imp_foo.  We need to remove that.  */
@@ -3084,7 +3085,7 @@ mcore_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
   const char * prefix;
 
   name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
-  
+
   /* Strip off any encoding in name.  */
   name = (* targetm.strip_name_encoding) (name);
 
@@ -3099,10 +3100,10 @@ mcore_unique_section (tree decl, int reloc ATTRIBUTE_UNUSED)
     prefix = ".rdata$";
   else
     prefix = ".data$";
-  
+
   len = strlen (name) + strlen (prefix);
   string = XALLOCAVEC (char, len + 1);
-  
+
   sprintf (string, "%s%s", prefix, name);
 
   set_decl_section_name (decl, string);
@@ -3124,7 +3125,7 @@ mcore_warn_func_return (tree decl)
 
 #ifdef OBJECT_FORMAT_ELF
 static void
-mcore_asm_named_section (const char *name, 
+mcore_asm_named_section (const char *name,
 			 unsigned int flags ATTRIBUTE_UNUSED,
 			 tree decl ATTRIBUTE_UNUSED)
 {
@@ -3214,13 +3215,13 @@ mcore_reg_ok_for_base_p (const_rtx reg, bool strict_p)
 static bool
 mcore_base_register_rtx_p (const_rtx x, bool strict_p)
 {
-  return REG_P(x) && mcore_reg_ok_for_base_p (x, strict_p); 
+  return REG_P(x) && mcore_reg_ok_for_base_p (x, strict_p);
 }
 
 /*  A legitimate index for a QI is 0..15, for HI is 0..30, for SI is 0..60,
     and for DI is 0..56 because we use two SI loads, etc.  */
 
-static bool   
+static bool
 mcore_legitimate_index_p (machine_mode mode, const_rtx op)
 {
   if (CONST_INT_P (op))
@@ -3237,11 +3238,11 @@ mcore_legitimate_index_p (machine_mode mode, const_rtx op)
       if (GET_MODE_SIZE (mode) == 1
 	  && ((unsigned HOST_WIDE_INT) INTVAL (op)) <= 15)
 	return true;
-  }								
+  }
   return false;
 }
 
- 
+
 /* Worker function for TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P.
 
    Allow  REG
diff --git a/gcc/config/mcore/mcore.h b/gcc/config/mcore/mcore.h
index 36dc860..1b6f0a7 100644
--- a/gcc/config/mcore/mcore.h
+++ b/gcc/config/mcore/mcore.h
@@ -1,6 +1,6 @@
 /* Definitions of target machine for GNU compiler,
    for Motorola M*CORE Processor.
-   Copyright (C) 1993-2024 Free Software Foundation, Inc.
+   Copyright (C) 1993-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -22,7 +22,7 @@
 #define GCC_MCORE_H
 
 /* RBE: need to move these elsewhere.  */
-#undef	LIKE_PPC_ABI 
+#undef	LIKE_PPC_ABI
 #define	MCORE_STRUCT_ARGS
 /* RBE: end of "move elsewhere".  */
 
@@ -80,7 +80,7 @@
 #define TARGET_8ALIGN 1
 
 extern char * mcore_current_function_name;
- 
+
 /* Target machine storage Layout.  */
 
 #define PROMOTE_MODE(MODE,UNSIGNEDP,TYPE)  	\
@@ -136,7 +136,7 @@ extern char * mcore_current_function_name;
 /* Every structures size must be a multiple of 8 bits.  */
 #define STRUCTURE_SIZE_BOUNDARY 8
 
-/* Look at the fundamental type that is used for a bit-field and use 
+/* Look at the fundamental type that is used for a bit-field and use
    that to impose alignment on the enclosing structure.
    struct s {int a:8}; should have same alignment as "int", not "char".  */
 #define	PCC_BITFIELD_TYPE_MATTERS	1
@@ -150,14 +150,14 @@ extern char * mcore_current_function_name;
   (TREE_CODE (TYPE) == ARRAY_TYPE		\
    && TYPE_MODE (TREE_TYPE (TYPE)) == QImode	\
    && (ALIGN) < FASTEST_ALIGNMENT ? FASTEST_ALIGNMENT : (ALIGN))
-     
+
 /* Set this nonzero if move instructions will actually fail to work
    when given unaligned data.  */
 #define STRICT_ALIGNMENT 1
 
 /* Standard register usage.  */
 
-/* Register allocation for our first guess 
+/* Register allocation for our first guess
 
 	r0		stack pointer
 	r1		scratch, target reg for xtrb?
@@ -333,7 +333,7 @@ extern const enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER];
    but prevents the compiler from extending the lifetime of these
    registers.  */
 #define TARGET_SMALL_REGISTER_CLASSES_FOR_MODE_P hook_bool_mode_true
- 
+
 /* The class value for index registers, and the one for base regs.  */
 #define INDEX_REG_CLASS  NO_REGS
 #define BASE_REG_CLASS	 GENERAL_REGS
@@ -369,7 +369,7 @@ extern const enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER];
   mcore_secondary_reload_class (CLASS, MODE, X)
 
 /* Return the maximum number of consecutive registers
-   needed to represent mode MODE in a register of class CLASS. 
+   needed to represent mode MODE in a register of class CLASS.
 
    On MCore this is the size of MODE in words.  */
 #define CLASS_MAX_NREGS(CLASS, MODE)  \
@@ -434,9 +434,9 @@ extern const enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER];
 #define ROUND_ADVANCE(SIZE)	\
   ((SIZE + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
 
-/* Round a register number up to a proper boundary for an arg of mode 
-   MODE. 
-   
+/* Round a register number up to a proper boundary for an arg of mode
+   MODE.
+
    We round to an even reg for things larger than a word.  */
 #define ROUND_REG(X, MODE) 				\
   ((TARGET_8ALIGN 					\
@@ -486,7 +486,7 @@ extern const enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER];
 
 #define REGNO_OK_FOR_INDEX_P(REGNO)   0
 
-/* Maximum number of registers that can appear in a valid memory 
+/* Maximum number of registers that can appear in a valid memory
    address.  */
 #define MAX_REGS_PER_ADDRESS 1
 
@@ -587,7 +587,7 @@ extern const enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER];
 	   reg_names[STACK_POINTER_REGNUM],		\
 	   (STACK_BOUNDARY / BITS_PER_UNIT))
 
-  
+
 /* Output a reference to a label.  */
 #undef  ASM_OUTPUT_LABELREF
 #define ASM_OUTPUT_LABELREF(STREAM, NAME)  \
@@ -614,8 +614,8 @@ extern const enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER];
   	0 a call from src to dst
   	1 the call is special (e.g. dst is "unknown" or "alloca")
   	2 the call is special (e.g., the src is a table instead of routine)
-  
-   Frame sizes are augmented with timestamps to help later tools 
+
+   Frame sizes are augmented with timestamps to help later tools
    differentiate between static entities with same names in different
    files.  */
 extern long mcore_current_compilation_timestamp;
@@ -673,7 +673,7 @@ extern long mcore_current_compilation_timestamp;
 
 /* This says how to output an assembler line
    to define a global common symbol, with alignment information.  */
-/* XXX - for now we ignore the alignment.  */     
+/* XXX - for now we ignore the alignment.  */
 #undef  ASM_OUTPUT_ALIGNED_COMMON
 #define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN)	\
   do								\
diff --git a/gcc/config/mcore/mcore.md b/gcc/config/mcore/mcore.md
index 432b895..73d5d04 100644
--- a/gcc/config/mcore/mcore.md
+++ b/gcc/config/mcore/mcore.md
@@ -1,5 +1,5 @@
 ;;  Machine description the Motorola MCore
-;;  Copyright (C) 1993-2024 Free Software Foundation, Inc.
+;;  Copyright (C) 1993-2025 Free Software Foundation, Inc.
 ;;  Contributed by Motorola.
 
 ;; This file is part of GCC.
diff --git a/gcc/config/mcore/mcore.opt b/gcc/config/mcore/mcore.opt
index b4ac230..8f6c0d9 100644
--- a/gcc/config/mcore/mcore.opt
+++ b/gcc/config/mcore/mcore.opt
@@ -1,6 +1,6 @@
 ; Options for the Motorola MCore port of the compiler.
 
-; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/mcore/predicates.md b/gcc/config/mcore/predicates.md
index 7da6ac9..bcaf4e3 100644
--- a/gcc/config/mcore/predicates.md
+++ b/gcc/config/mcore/predicates.md
@@ -1,5 +1,5 @@
 ;; Predicate definitions for Motorola MCore.
-;; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/mcore/t-mcore b/gcc/config/mcore/t-mcore
index b7c3320..257f923 100644
--- a/gcc/config/mcore/t-mcore
+++ b/gcc/config/mcore/t-mcore
@@ -1,4 +1,4 @@
-# Copyright (C) 2000-2024 Free Software Foundation, Inc.
+# Copyright (C) 2000-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/microblaze/constraints.md b/gcc/config/microblaze/constraints.md
index 93337c3..64229c2 100644
--- a/gcc/config/microblaze/constraints.md
+++ b/gcc/config/microblaze/constraints.md
@@ -1,5 +1,5 @@
 ;; Constraint definitions for Xilinx MicroBlaze processors.
-;; Copyright (C) 2010-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2010-2025 Free Software Foundation, Inc.
 
 ;; Contributed by Michael Eager <eager@eagercon.com>.
 
diff --git a/gcc/config/microblaze/linux.h b/gcc/config/microblaze/linux.h
index 5ed8ee5..7b09cde 100644
--- a/gcc/config/microblaze/linux.h
+++ b/gcc/config/microblaze/linux.h
@@ -1,5 +1,5 @@
 /* Definitions for MicroBlaze running Linux.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/microblaze/microblaze-c.cc b/gcc/config/microblaze/microblaze-c.cc
index e60783e..c57271d 100644
--- a/gcc/config/microblaze/microblaze-c.cc
+++ b/gcc/config/microblaze/microblaze-c.cc
@@ -1,5 +1,5 @@
 /* Subroutines used for the C front end for Xilinx MicroBlaze.
-   Copyright (C) 2010-2024 Free Software Foundation, Inc.
+   Copyright (C) 2010-2025 Free Software Foundation, Inc.
 
    Contributed by Michael Eager <eager@eagercon.com>.
 
@@ -30,10 +30,10 @@
 #define builtin_define(TXT) cpp_define (pfile, TXT)
 #define builtin_assert(TXT) cpp_assert (pfile, TXT)
 
-/* Define preprocessor symbols for MicroBlaze.  
+/* Define preprocessor symbols for MicroBlaze.
    Symbols which do not start with __ are deprecated.  */
 
-void 
+void
 microblaze_cpp_define (cpp_reader *pfile)
 {
   builtin_assert ("cpu=microblaze");
@@ -52,7 +52,7 @@ microblaze_cpp_define (cpp_reader *pfile)
       builtin_define ("__BIG_ENDIAN__");
       builtin_define ("__MICROBLAZEEB__");
     }
-  if (!TARGET_SOFT_MUL) 
+  if (!TARGET_SOFT_MUL)
     {
       if (!flag_iso)
         builtin_define ("HAVE_HW_MUL");
@@ -100,4 +100,4 @@ microblaze_cpp_define (cpp_reader *pfile)
         builtin_define ("HAVE_HW_FPU_SQRT");
       builtin_define ("__HAVE_HW_FPU_SQRT__");
     }
-}  
+}
diff --git a/gcc/config/microblaze/microblaze-protos.h b/gcc/config/microblaze/microblaze-protos.h
index ae97cc2..90b79cf 100644
--- a/gcc/config/microblaze/microblaze-protos.h
+++ b/gcc/config/microblaze/microblaze-protos.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler, for Xilinx MicroBlaze.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -37,7 +37,7 @@ extern bool microblaze_expand_block_move (rtx, rtx, rtx, rtx);
 extern void microblaze_expand_divide (rtx *);
 extern void microblaze_expand_conditional_branch (machine_mode, rtx *);
 extern void microblaze_expand_conditional_branch_reg (machine_mode, rtx *);
-extern void microblaze_expand_conditional_branch_sf (rtx *); 
+extern void microblaze_expand_conditional_branch_sf (rtx *);
 extern int microblaze_can_use_return_insn (void);
 extern void print_operand (FILE *, rtx, int);
 extern void print_operand_address (FILE *, rtx);
@@ -65,6 +65,6 @@ extern void microblaze_eh_return (rtx op0);
 #endif  /* RTX_CODE */
 
 /* Declare functions in microblaze-c.cc.  */
-extern void microblaze_cpp_define (struct cpp_reader *); 
+extern void microblaze_cpp_define (struct cpp_reader *);
 
 #endif  /* GCC_MICROBLAZE_PROTOS_H */
diff --git a/gcc/config/microblaze/microblaze.cc b/gcc/config/microblaze/microblaze.cc
index 98ec611..2ab5ada 100644
--- a/gcc/config/microblaze/microblaze.cc
+++ b/gcc/config/microblaze/microblaze.cc
@@ -1,5 +1,5 @@
 /* Subroutines used for code generation on Xilinx MicroBlaze.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
 
    Contributed by Michael Eager <eager@eagercon.com>.
 
@@ -65,8 +65,8 @@ An invalid address.
 
 ADDRESS_REG
 
-A natural register or a register + const_int offset address.  
-The register satisfies microblaze_valid_base_register_p and the 
+A natural register or a register + const_int offset address.
+The register satisfies microblaze_valid_base_register_p and the
 offset is a const_arith_operand.
 
 ADDRESS_REG_INDEX
@@ -99,7 +99,7 @@ enum microblaze_address_type
 /* Classifies symbols
 
 SYMBOL_TYPE_GENERAL
-        
+
 A general symbol.  */
 enum microblaze_symbol_type
 {
@@ -120,7 +120,7 @@ enum tls_reloc {
 struct microblaze_address_info
 {
   enum microblaze_address_type type;
-  rtx regA; 	/* Contains valid values on ADDRESS_REG, ADDRESS_REG_INDEX, 
+  rtx regA; 	/* Contains valid values on ADDRESS_REG, ADDRESS_REG_INDEX,
      		   ADDRESS_SYMBOLIC.  */
   rtx regB; 	/* Contains valid values on ADDRESS_REG_INDEX.  */
   rtx offset; 	/* Contains valid values on ADDRESS_CONST_INT and ADDRESS_REG.  */
@@ -143,7 +143,7 @@ struct GTY(()) microblaze_frame_info {
   int initialized;		/* != 0 if frame size already calculated.  */
   int num_gp;			/* number of gp registers saved.  */
   long insns_len;		/* length of insns.  */
-  int alloc_stack;		/* Flag to indicate if the current function 
+  int alloc_stack;		/* Flag to indicate if the current function
 				   must not create stack space. (As an optimization).  */
 };
 
@@ -158,18 +158,18 @@ static GTY(()) int microblaze_sched_use_dfa = 0;
    data area takes 2 instructions).  */
 int microblaze_section_threshold = -1;
 
-/* Prevent scheduling potentially exception causing instructions in 
+/* Prevent scheduling potentially exception causing instructions in
    delay slots.  -mcpu=v3.00.a or v4.00.a turns this on.  */
 int microblaze_no_unsafe_delay;
 
 /* Set to one if the targeted core has the CLZ insn.  */
 int microblaze_has_clz = 0;
 
-/* Which CPU pipeline do we use. We haven't really standardized on a CPU 
-   version having only a particular type of pipeline. There can still be 
-   options on the CPU to scale pipeline features up or down. :( 
-   Bad Presentation (??), so we let the MD file rely on the value of 
-   this variable instead Making PIPE_5 the default. It should be backward 
+/* Which CPU pipeline do we use. We haven't really standardized on a CPU
+   version having only a particular type of pipeline. There can still be
+   options on the CPU to scale pipeline features up or down. :(
+   Bad Presentation (??), so we let the MD file rely on the value of
+   this variable instead Making PIPE_5 the default. It should be backward
    optimal with PIPE_3 MicroBlazes.  */
 enum pipeline_type microblaze_pipe = MICROBLAZE_PIPE_5;
 
@@ -210,7 +210,7 @@ enum reg_class microblaze_regno_to_class[] =
 };
 
 /* MicroBlaze specific machine attributes.
-   interrupt_handler - Interrupt handler attribute to add interrupt prologue 
+   interrupt_handler - Interrupt handler attribute to add interrupt prologue
 		       and epilogue and use appropriate interrupt return.
    save_volatiles    - Similar to interrupt handler, but use normal return.  */
 int interrupt_handler;
@@ -239,6 +239,10 @@ section *sdata2_section;
 #define TARGET_HAVE_TLS true
 #endif
 
+/* MicroBlaze does not do speculative execution.  */
+#undef  TARGET_HAVE_SPECULATION_SAFE_VALUE
+#define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
+
 /* Return truth value if a CONST_DOUBLE is ok to be a legitimate constant.  */
 static bool
 microblaze_const_double_ok (rtx op, machine_mode mode)
@@ -585,8 +589,7 @@ microblaze_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
                                      LCT_PURE, /* LCT_CONST?  */
                                      Pmode, reg, Pmode);
 
-  insns = get_insns ();
-  end_sequence ();
+  insns = end_sequence ();
 
   return insns;
 }
@@ -719,8 +722,8 @@ get_base_reg (rtx x)
                                                   const_int
    ADDRESS_REG_INDEX           %0        %1       NULL        NULL
 
-   ADDRESS_SYMBOLIC            r0 /      NULL     NULL        symbol    
-                           sda_base_reg 
+   ADDRESS_SYMBOLIC            r0 /      NULL     NULL        symbol
+                           sda_base_reg
 
    ADDRESS_CONST_INT           r0       NULL      const       NULL
 
@@ -1005,7 +1008,7 @@ microblaze_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
       result = gen_rtx_PLUS (Pmode, ptr_reg, constant);
       if (SMALL_INT (constant))
 	return result;
-      /* Otherwise we fall through so the code below will fix the 
+      /* Otherwise we fall through so the code below will fix the
          constant.  */
       xinsn = result;
     }
@@ -1363,7 +1366,7 @@ microblaze_rtx_costs (rtx x, machine_mode mode, int outer_code ATTRIBUTE_UNUSED,
 	      *total -= 2;
 	  }
 	else
-	  /* Double the worst cost of shifts when there is no barrel shifter and 
+	  /* Double the worst cost of shifts when there is no barrel shifter and
 	     the shift amount is in a reg.  */
 	  *total = COSTS_N_INSNS (32 * 4);
 	return true;
@@ -1498,7 +1501,7 @@ microblaze_address_cost (rtx addr, machine_mode mode ATTRIBUTE_UNUSED,
   return COSTS_N_INSNS (microblaze_address_insns (addr, GET_MODE (addr)));
 }
 
-/* Return nonzero if X is an address which needs a temporary register when 
+/* Return nonzero if X is an address which needs a temporary register when
    reloaded while generating PIC code.  */
 
 int
@@ -1680,7 +1683,7 @@ function_arg_partial_bytes (cumulative_args_t cum_v,
   return 0;
 }
 
-/*  Convert a version number of the form "vX.YY.Z" to an integer encoding 
+/*  Convert a version number of the form "vX.YY.Z" to an integer encoding
     for easier range comparison.  */
 static int
 microblaze_version_to_int (const char *version)
@@ -1794,7 +1797,7 @@ microblaze_option_override (void)
     }
   else
     {
-      /* We agree to use 5 pipe-stage model even on area optimized 3 
+      /* We agree to use 5 pipe-stage model even on area optimized 3
          pipe-stage variants.  */
 #if 0
       microblaze_select_flags &= ~(MICROBLAZE_MASK_NO_UNSAFE_DELAY);
@@ -1807,7 +1810,7 @@ microblaze_option_override (void)
 	  || MICROBLAZE_VERSION_COMPARE (microblaze_select_cpu,
 					 "v5.00.c") == 0)
 	{
-	  /* Pattern compares are to be turned on by default only when 
+	  /* Pattern compares are to be turned on by default only when
  	     compiling for MB v5.00.'z'.  */
 	  target_flags |= MASK_PATTERN_COMPARE;
 	}
@@ -2039,7 +2042,7 @@ microblaze_must_save_register (int regno)
 
   if (microblaze_is_interrupt_variant ())
     {
-      if (df_regs_ever_live_p (regno) 
+      if (df_regs_ever_live_p (regno)
 	  || regno == MB_ABI_MSR_SAVE_REG
 	  || ((interrupt_handler || fast_interrupt)
               && (regno == MB_ABI_ASM_TEMP_REGNUM
@@ -2109,7 +2112,7 @@ microblaze_must_save_register (int regno)
 */
 
 static HOST_WIDE_INT
-compute_frame_size (HOST_WIDE_INT size)	
+compute_frame_size (HOST_WIDE_INT size)
 {
   int regno;
   HOST_WIDE_INT total_size;	/* # bytes that the entire frame takes up.  */
@@ -2207,7 +2210,7 @@ microblaze_can_eliminate (const int from, const int to)
 }
 
 /* Implement INITIAL_ELIMINATION_OFFSET.  FROM is either the frame
-   pointer or argument pointer or the return address pointer.  TO is either 
+   pointer or argument pointer or the return address pointer.  TO is either
    the stack pointer or hard frame pointer.  */
 
 HOST_WIDE_INT
@@ -2240,7 +2243,7 @@ microblaze_initial_elimination_offset (int from, int to)
 }
 
 /* Print operands using format code.
- 
+
    The MicroBlaze specific codes are:
 
    'X'  X is CONST_INT, prints 32 bits in hexadecimal format = "0x%08x",
@@ -2267,7 +2270,7 @@ microblaze_initial_elimination_offset (int from, int to)
    'j'  Print low word of const_double (int or float) value as hex
    's'  Print -1 if operand is negative, 0 if positive (sign extend)
    '@'	Print the name of the temporary register (rMB_ABI_ASM_TEMP_REGNUM).
-   '#'	Print nop if the delay slot of a branch is not filled. 
+   '#'	Print nop if the delay slot of a branch is not filled.
 */
 
 void
@@ -2463,7 +2466,7 @@ print_operand (FILE * file, rtx op, int letter)
 	  val[1] = INTVAL (op) & 0x00000000ffffffffLL;
 	  if (val[0] == 0 && val[1] < 0)
 	    val[0] = -1;
-	    
+
         }
       fprintf (file, "0x%8.8lx", (letter == 'h') ? val[0] : val[1]);
     }
@@ -2543,19 +2546,19 @@ print_operand (FILE * file, rtx op, int letter)
    reference whose address is ADDR.  ADDR is an RTL expression.
 
    Possible address classifications and output formats are,
-   
+
    ADDRESS_REG                  "%0, r0"
 
    ADDRESS_REG with non-zero    "%0, <addr_const>"
-   offset       
+   offset
 
-   ADDRESS_REG_INDEX            "rA, RB"    
+   ADDRESS_REG_INDEX            "rA, RB"
                                 (if rA is r0, rA and rB are swapped)
 
    ADDRESS_CONST_INT            "r0, <addr_const>"
 
-   ADDRESS_SYMBOLIC             "rBase, <addr_const>"   
-                                (rBase is a base register suitable for the 
+   ADDRESS_SYMBOLIC             "rBase, <addr_const>"
+                                (rBase is a base register suitable for the
 				 symbol's type)
 */
 
@@ -2576,7 +2579,7 @@ print_operand_address (FILE * file, rtx addr)
       break;
     case ADDRESS_REG_INDEX:
       if (REGNO (info.regA) == 0)
-	/* Make rB == r0 instead of rA == r0. This helps reduce read port 
+	/* Make rB == r0 instead of rA == r0. This helps reduce read port
            congestion.  */
 	fprintf (file, "%s,%s", reg_names[REGNO (info.regB)],
 		 reg_names[REGNO (info.regA)]);
@@ -2641,7 +2644,7 @@ print_operand_address (FILE * file, rtx addr)
 }
 
 /* Emit either a label, .comm, or .lcomm directive, and mark that the symbol
-   is used, so that we don't emit an .extern for it in 
+   is used, so that we don't emit an .extern for it in
    microblaze_asm_file_end.  */
 
 void
@@ -2649,7 +2652,7 @@ microblaze_declare_object (FILE * stream, const char *name,
 			   const char *section, const char *fmt, int size)
 {
 
-  fputs (section, stream);	
+  fputs (section, stream);
   assemble_name (stream, name);
   fprintf (stream, fmt, size);
 }
@@ -2662,7 +2665,7 @@ microblaze_declare_object (FILE * stream, const char *name,
 
 #define BITSET_P(VALUE,BIT) (((VALUE) & (1L << (BIT))) != 0)
 
-/* Save or restore instructions based on whether this is the prologue or 
+/* Save or restore instructions based on whether this is the prologue or
    epilogue.  prologue is 1 for the prologue.  */
 static void
 save_restore_insns (int prologue)
@@ -2892,7 +2895,7 @@ microblaze_expand_prologue (void)
       && !cfun->returns_pcc_struct)
     {
       tree type = build_pointer_type (fntype);
-      tree function_result_decl = build_decl (BUILTINS_LOCATION, PARM_DECL, 
+      tree function_result_decl = build_decl (BUILTINS_LOCATION, PARM_DECL,
 					      NULL_TREE, type);
 
       DECL_ARG_TYPE (function_result_decl) = type;
@@ -3108,7 +3111,7 @@ microblaze_expand_epilogue (void)
   rtx reg_rtx;
   rtx mem_rtx;
 
-  /* In case of interrupt handlers use addki instead of addi for changing the 
+  /* In case of interrupt handlers use addki instead of addi for changing the
      stack pointer value.  */
 
   if (microblaze_can_use_return_insn ())
@@ -3121,9 +3124,9 @@ microblaze_expand_epilogue (void)
 
   if (fsiz > 0)
     {
-      /* Restore SUB_RETURN_ADDR_REGNUM at first. This is to prevent the 
-         sequence of load-followed by a use (in rtsd) in every prologue. Saves 
-         a load-use stall cycle  :)   This is also important to handle alloca. 
+      /* Restore SUB_RETURN_ADDR_REGNUM at first. This is to prevent the
+         sequence of load-followed by a use (in rtsd) in every prologue. Saves
+         a load-use stall cycle  :)   This is also important to handle alloca.
          (See comments for if (frame_pointer_needed) below.  */
 
       if (!crtl->is_leaf || interrupt_handler)
@@ -3138,11 +3141,11 @@ microblaze_expand_epilogue (void)
 	  emit_move_insn (reg_rtx, mem_rtx);
 	}
 
-      /* It is important that this is done after we restore the return address 
-         register (above).  When alloca is used, we want to restore the 
-	 sub-routine return address only from the current stack top and not 
-	 from the frame pointer (which we restore below). (frame_pointer + 0) 
-	 might have been over-written since alloca allocates memory on the 
+      /* It is important that this is done after we restore the return address
+         register (above).  When alloca is used, we want to restore the
+	 sub-routine return address only from the current stack top and not
+	 from the frame pointer (which we restore below). (frame_pointer + 0)
+	 might have been over-written since alloca allocates memory on the
 	 current stack.  */
       if (frame_pointer_needed)
 	emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
@@ -3186,8 +3189,8 @@ microblaze_can_use_return_insn (void)
 /* Implement TARGET_SECONDARY_RELOAD.  */
 
 static reg_class_t
-microblaze_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x ATTRIBUTE_UNUSED, 
-			     reg_class_t rclass, machine_mode mode ATTRIBUTE_UNUSED, 
+microblaze_secondary_reload (bool in_p ATTRIBUTE_UNUSED, rtx x ATTRIBUTE_UNUSED,
+			     reg_class_t rclass, machine_mode mode ATTRIBUTE_UNUSED,
 			     secondary_reload_info *sri ATTRIBUTE_UNUSED)
 {
   if (rclass == ST_REGS)
@@ -3263,7 +3266,7 @@ microblaze_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
     case SECCAT_RODATA_MERGE_STR:
     case SECCAT_RODATA_MERGE_STR_INIT:
       /* MB binutils have various issues with mergeable string sections and
-         relaxation/relocation. Currently, turning mergeable sections 
+         relaxation/relocation. Currently, turning mergeable sections
          into regular readonly sections.  */
 
       return readonly_data_section;
@@ -3274,7 +3277,7 @@ microblaze_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
 
 /*
   Encode info about sections into the RTL based on a symbol's declaration.
-  The default definition of this hook, default_encode_section_info in 
+  The default definition of this hook, default_encode_section_info in
   `varasm.cc', sets a number of commonly-useful bits in SYMBOL_REF_FLAGS. */
 
 static void
@@ -3521,7 +3524,7 @@ microblaze_eh_return (rtx op0)
    If the string size is below the threshold, put it into .sdata2.
    If the front-end is done, we must be being called from toplev.cc.
    In that case, do nothing.  */
-void 
+void
 microblaze_asm_output_ident (const char *string)
 {
   const char *section_asm_op;
@@ -3695,7 +3698,7 @@ microblaze_expand_divide (rtx operands[])
 {
   /* Table lookup software divides. Works for all (nr/dr) where (0 <= nr,dr <= 15).  */
 
-  rtx regt1 = gen_reg_rtx (SImode); 
+  rtx regt1 = gen_reg_rtx (SImode);
   rtx reg18 = gen_rtx_REG (SImode, R_TMP);
   rtx regqi = gen_reg_rtx (QImode);
   rtx_code_label *div_label = gen_label_rtx ();
@@ -3707,9 +3710,9 @@ microblaze_expand_divide (rtx operands[])
 
   insn = emit_insn (gen_iorsi3 (regt1, operands[1], operands[2]));
   cjump = emit_jump_insn_after (gen_cbranchsi4 (
-					gen_rtx_GTU (SImode, regt1, GEN_INT (15)), 
+					gen_rtx_GTU (SImode, regt1, GEN_INT (15)),
 					regt1, GEN_INT (15), div_label), insn);
-  LABEL_NUSES (div_label) = 1; 
+  LABEL_NUSES (div_label) = 1;
   JUMP_LABEL (cjump) = div_label;
   emit_insn (gen_rtx_CLOBBER (SImode, reg18));
 
@@ -3718,21 +3721,21 @@ microblaze_expand_divide (rtx operands[])
   mem_rtx = gen_rtx_MEM (QImode,
                             gen_rtx_PLUS (Pmode, regt1, div_table_rtx));
 
-  insn = emit_insn (gen_movqi (regqi, mem_rtx)); 
+  insn = emit_insn (gen_movqi (regqi, mem_rtx));
   insn = emit_insn (gen_movsi (operands[0], gen_rtx_SUBREG (SImode, regqi, 0)));
-  jump = emit_jump_insn_after (gen_jump (div_end_label), insn); 
+  jump = emit_jump_insn_after (gen_jump (div_end_label), insn);
   JUMP_LABEL (jump) = div_end_label;
-  LABEL_NUSES (div_end_label) = 1; 
+  LABEL_NUSES (div_end_label) = 1;
   emit_barrier ();
 
   emit_label (div_label);
-  ret = emit_library_call_value (gen_rtx_SYMBOL_REF (Pmode, "__divsi3"), 
+  ret = emit_library_call_value (gen_rtx_SYMBOL_REF (Pmode, "__divsi3"),
 				 operands[0], LCT_NORMAL,
 				 GET_MODE (operands[0]),
 				 operands[1], GET_MODE (operands[1]),
 				 operands[2], GET_MODE (operands[2]));
   if (ret != operands[0])
-                emit_move_insn (operands[0], ret);    
+                emit_move_insn (operands[0], ret);
 
   emit_label (div_end_label);
   emit_insn (gen_blockage ());
@@ -4014,7 +4017,7 @@ microblaze_starting_frame_offset (void)
 #define TARGET_LEGITIMIZE_ADDRESS 	microblaze_legitimize_address
 
 #undef TARGET_LEGITIMATE_ADDRESS_P
-#define TARGET_LEGITIMATE_ADDRESS_P 	microblaze_legitimate_address_p 
+#define TARGET_LEGITIMATE_ADDRESS_P 	microblaze_legitimate_address_p
 
 #undef TARGET_FRAME_POINTER_REQUIRED
 #define TARGET_FRAME_POINTER_REQUIRED	microblaze_frame_pointer_required
@@ -4029,7 +4032,7 @@ microblaze_starting_frame_offset (void)
 #define TARGET_PROMOTE_FUNCTION_MODE 	default_promote_function_mode_always_promote
 
 #undef TARGET_FUNCTION_VALUE
-#define TARGET_FUNCTION_VALUE		microblaze_function_value 
+#define TARGET_FUNCTION_VALUE		microblaze_function_value
 
 #undef TARGET_SECONDARY_RELOAD
 #define TARGET_SECONDARY_RELOAD		microblaze_secondary_reload
@@ -4047,7 +4050,7 @@ microblaze_starting_frame_offset (void)
 #define TARGET_ASM_INIT_SECTIONS	microblaze_elf_asm_init_sections
 
 #undef  TARGET_OPTION_OVERRIDE
-#define TARGET_OPTION_OVERRIDE		microblaze_option_override 
+#define TARGET_OPTION_OVERRIDE		microblaze_option_override
 
 #undef TARGET_LEGITIMATE_CONSTANT_P
 #define TARGET_LEGITIMATE_CONSTANT_P microblaze_legitimate_constant_p
@@ -4070,6 +4073,9 @@ microblaze_starting_frame_offset (void)
 #undef TARGET_STARTING_FRAME_OFFSET
 #define TARGET_STARTING_FRAME_OFFSET microblaze_starting_frame_offset
 
+#undef TARGET_DOCUMENTATION_NAME
+#define TARGET_DOCUMENTATION_NAME "MicroBlaze"
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-microblaze.h"
diff --git a/gcc/config/microblaze/microblaze.h b/gcc/config/microblaze/microblaze.h
index 5d28abf..2390542 100644
--- a/gcc/config/microblaze/microblaze.h
+++ b/gcc/config/microblaze/microblaze.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler for Xilinx MicroBlaze.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
 
    Contributed by Michael Eager <eager@eagercon.com>.
 
@@ -68,8 +68,8 @@ extern enum pipeline_type microblaze_pipe;
 /* The default is to not need GOT for TLS.  */
 #define TLS_NEEDS_GOT 0
 
-/* What is the default setting for -mcpu= . We set it to v4.00.a even though 
-   we are actually ahead. This is safest version that has generate code 
+/* What is the default setting for -mcpu= . We set it to v4.00.a even though
+   we are actually ahead. This is safest version that has generate code
    compatible for the original ISA */
 #define MICROBLAZE_DEFAULT_CPU      "v4.00.a"
 
@@ -142,7 +142,7 @@ extern enum pipeline_type microblaze_pipe;
 #define MB_ABI_SUB_RETURN_ADDR_REGNUM       15
 #define MB_ABI_DEBUG_RETURN_ADDR_REGNUM     16
 #define MB_ABI_EXCEPTION_RETURN_ADDR_REGNUM 17
-#define MB_ABI_ASM_TEMP_REGNUM              18	
+#define MB_ABI_ASM_TEMP_REGNUM              18
 /* This is our temp register.  */
 #define MB_ABI_FRAME_POINTER_REGNUM         19
 #define MB_ABI_PIC_ADDR_REGNUM              20
@@ -157,7 +157,7 @@ extern enum pipeline_type microblaze_pipe;
 #define MB_ABI_STATIC_CHAIN_REGNUM           3
 #define MB_ABI_TEMP1_REGNUM                 11
 #define MB_ABI_TEMP2_REGNUM                 12
-#define MB_ABI_MSR_SAVE_REG                 11	
+#define MB_ABI_MSR_SAVE_REG                 11
 /* Volatile register used to save MSR in interrupt handlers.  */
 
 
@@ -177,8 +177,8 @@ extern enum pipeline_type microblaze_pipe;
 	(GP_REG_FIRST + MB_ABI_SUB_RETURN_ADDR_REGNUM)
 
 /* Initial state of return address on entry to func = R15.
-   Actually, the RA is at R15+8, but gcc doesn't know how 
-   to generate this. 
+   Actually, the RA is at R15+8, but gcc doesn't know how
+   to generate this.
    NOTE:  GDB has a workaround and expects this incorrect value.
    If this is fixed, a corresponding fix to GDB is needed.  */
 #define INCOMING_RETURN_ADDR_RTX  			\
@@ -294,7 +294,7 @@ extern enum pipeline_type microblaze_pipe;
    rMB_ABI_INTR_RETUREN_ADDR_REGNUM is a fixed
    register(return address for interrupt), and will not be used for
    anything else.  */
-   
+
 #define FRAME_POINTER_REGNUM 		FRP_REG_NUM
 #define HARD_FRAME_POINTER_REGNUM       \
         (GP_REG_FIRST + MB_ABI_FRAME_POINTER_REGNUM)
@@ -383,7 +383,7 @@ extern enum reg_class microblaze_regno_to_class[];
    && (((VALUE) & 0x0000ffff) != 0					\
        || (((VALUE) & ~2147483647) != 0					\
 	   && ((VALUE) & ~2147483647) != ~2147483647)))
-	
+
 #define PREFERRED_RELOAD_CLASS(X,CLASS)					\
   ((CLASS) != ALL_REGS							\
    ? (CLASS)							\
@@ -470,7 +470,7 @@ typedef struct microblaze_args
   int fp_code;			/* Mode of FP arguments */
   int num_adjusts;		/* number of adjustments made */
   /* Adjustments made to args pass in regs.  */
-  /* ??? The size is doubled to work around a bug in the code that sets the 
+  /* ??? The size is doubled to work around a bug in the code that sets the
      adjustments in function_arg.  */
   rtx adjust[MAX_ARGS_IN_REGISTERS * 2];
 } CUMULATIVE_ARGS;
@@ -512,7 +512,7 @@ typedef struct microblaze_args
 #define MAX_REGS_PER_ADDRESS 2
 
 
-/* Identify valid constant addresses.  Exclude if PIC addr which 
+/* Identify valid constant addresses.  Exclude if PIC addr which
    needs scratch register.  */
 #define CONSTANT_ADDRESS_P(X)	microblaze_constant_address_p(X)
 
@@ -608,7 +608,7 @@ typedef struct microblaze_args
 /* ASM_OUTPUT_ALIGNED_COMMON and ASM_OUTPUT_ALIGNED_LOCAL
 
    Unfortunately, we still need to set the section explicitly. Somehow,
-   our binutils assign .comm and .lcomm variables to the "current" section 
+   our binutils assign .comm and .lcomm variables to the "current" section
    in the assembly file, rather than where they implicitly belong. We need to
    remove this explicit setting in GCC when binutils can understand sections
    better.  */
@@ -836,11 +836,11 @@ do {									 \
 #undef TARGET_ASM_NAMED_SECTION
 #define TARGET_ASM_NAMED_SECTION        default_elf_asm_named_section
 
-/* Define the strings to put out for each section in the object file.  
-   
-   Note: For ctors/dtors, we want to give these sections the SHF_WRITE 
-   attribute to allow shared libraries to patch/resolve addresses into 
-   these locations.  On Microblaze, there is no concept of shared libraries 
+/* Define the strings to put out for each section in the object file.
+
+   Note: For ctors/dtors, we want to give these sections the SHF_WRITE
+   attribute to allow shared libraries to patch/resolve addresses into
+   these locations.  On Microblaze, there is no concept of shared libraries
    yet, so this is for future use.  */
 #define TEXT_SECTION_ASM_OP	"\t.text"
 #define DATA_SECTION_ASM_OP	"\t.data"
@@ -865,7 +865,7 @@ do {									 \
           "\tbrlid   r15, " #FUNC "\n\t nop\n"         \
           TEXT_SECTION_ASM_OP);
 
-/* We need to group -lm as well, since some Newlib math functions 
+/* We need to group -lm as well, since some Newlib math functions
    reference __errno!  */
 #undef LIB_SPEC
 #define LIB_SPEC \
diff --git a/gcc/config/microblaze/microblaze.md b/gcc/config/microblaze/microblaze.md
index f1f57e6..45c48a7 100644
--- a/gcc/config/microblaze/microblaze.md
+++ b/gcc/config/microblaze/microblaze.md
@@ -1,5 +1,5 @@
 ;; microblaze.md -- Machine description for Xilinx MicroBlaze processors.
-;; Copyright (C) 2009-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2009-2025 Free Software Foundation, Inc.
 
 ;; Contributed by Michael Eager <eager@eagercon.com>.
 
diff --git a/gcc/config/microblaze/microblaze.opt b/gcc/config/microblaze/microblaze.opt
index eb6c8be..6395577 100644
--- a/gcc/config/microblaze/microblaze.opt
+++ b/gcc/config/microblaze/microblaze.opt
@@ -1,6 +1,6 @@
 ; Options for the MicroBlaze port of the compiler
 ;
-; Copyright (C) 2009-2024 Free Software Foundation, Inc.
+; Copyright (C) 2009-2025 Free Software Foundation, Inc.
 ;
 ; Contributed by Michael Eager <eager@eagercon.com>.
 ;
diff --git a/gcc/config/microblaze/predicates.md b/gcc/config/microblaze/predicates.md
index f27393b..dd8257e 100644
--- a/gcc/config/microblaze/predicates.md
+++ b/gcc/config/microblaze/predicates.md
@@ -1,5 +1,5 @@
 ;; Predicate definitions for Xilinx MicroBlaze
-;; Copyright (C) 2009-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2009-2025 Free Software Foundation, Inc.
 ;;
 ;; Contributed by Michael Eager <eager@eagercon.com>.
 ;;
diff --git a/gcc/config/microblaze/rtems.h b/gcc/config/microblaze/rtems.h
index c784ca8..dcfea80 100644
--- a/gcc/config/microblaze/rtems.h
+++ b/gcc/config/microblaze/rtems.h
@@ -1,5 +1,5 @@
 /* Definitions for rtems targeting a microblaze using ELF.
-   Copyright (C) 2012-2024 Free Software Foundation, Inc.
+   Copyright (C) 2012-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/microblaze/sync.md b/gcc/config/microblaze/sync.md
index 939168a..db7b11e 100644
--- a/gcc/config/microblaze/sync.md
+++ b/gcc/config/microblaze/sync.md
@@ -1,5 +1,5 @@
 ;; Machine description for Xilinx MicroBlaze synchronization instructions.
-;; Copyright (C) 2011-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2011-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/mingw/cygming.opt b/gcc/config/mingw/cygming.opt
index b7b0bc5f6..f247801 100644
--- a/gcc/config/mingw/cygming.opt
+++ b/gcc/config/mingw/cygming.opt
@@ -1,6 +1,6 @@
 ; Cygwin- and MinGW-specific options.
 
-; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/mingw/cygming.opt.urls b/gcc/config/mingw/cygming.opt.urls
index af11c49..fd5502c 100644
--- a/gcc/config/mingw/cygming.opt.urls
+++ b/gcc/config/mingw/cygming.opt.urls
@@ -10,7 +10,7 @@ mnop-fun-dllimport
 UrlSuffix(gcc/Cygwin-and-MinGW-Options.html#index-mnop-fun-dllimport)
 
 mthreads
-UrlSuffix(gcc/Cygwin-and-MinGW-Options.html#index-mthreads-1)
+UrlSuffix(gcc/Cygwin-and-MinGW-Options.html#index-mthreads)
 
 mwin32
 UrlSuffix(gcc/Cygwin-and-MinGW-Options.html#index-mwin32)
diff --git a/gcc/config/mingw/cygwin-d.cc b/gcc/config/mingw/cygwin-d.cc
index ca341ae..3516696 100644
--- a/gcc/config/mingw/cygwin-d.cc
+++ b/gcc/config/mingw/cygwin-d.cc
@@ -1,5 +1,5 @@
 /* Cygwin support needed only by D front-end.
-   Copyright (C) 2021-2024 Free Software Foundation, Inc.
+   Copyright (C) 2021-2025 Free Software Foundation, Inc.
 
 GCC is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
diff --git a/gcc/config/mingw/mingw-stdint.h b/gcc/config/mingw/mingw-stdint.h
index debbe82..7975bb8 100644
--- a/gcc/config/mingw/mingw-stdint.h
+++ b/gcc/config/mingw/mingw-stdint.h
@@ -1,5 +1,5 @@
 /* Definitions for <stdint.h> types on systems using mingw.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -52,4 +52,4 @@ along with GCC; see the file COPYING3.  If not see
 #else
 # define INTPTR_TYPE (TARGET_64BIT ? "long long int" : "int")
 # define UINTPTR_TYPE (TARGET_64BIT ? "long long unsigned int" : "unsigned int")
-#endif
-\ No newline at end of file
+#endif
diff --git a/gcc/config/mingw/mingw.opt b/gcc/config/mingw/mingw.opt
index ae92059..9fda89e 100644
--- a/gcc/config/mingw/mingw.opt
+++ b/gcc/config/mingw/mingw.opt
@@ -1,6 +1,6 @@
 ; MinGW-specific options.
 
-; Copyright (C) 2008-2024 Free Software Foundation, Inc.
+; Copyright (C) 2008-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/mingw/mingw32.h b/gcc/config/mingw/mingw32.h
index 0dfe8e9..be2461f 100644
--- a/gcc/config/mingw/mingw32.h
+++ b/gcc/config/mingw/mingw32.h
@@ -1,6 +1,6 @@
 /* Operating system specific defines to be used when targeting GCC for
    hosting on Windows32, using GNU tools and the Windows32 API Library.
-   Copyright (C) 1997-2024 Free Software Foundation, Inc.
+   Copyright (C) 1997-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -308,6 +308,15 @@ do {						         \
 #undef TARGET_N_FORMAT_TYPES
 #define TARGET_N_FORMAT_TYPES 3
 
+#undef TARGET_WIN32_TLS
+#define TARGET_WIN32_TLS 1
+
+#undef TARGET_ASM_SELECT_SECTION
+#define TARGET_ASM_SELECT_SECTION mingw_pe_select_section
+
+#undef DEFAULT_TLS_SEG_REG
+#define DEFAULT_TLS_SEG_REG (TARGET_64BIT ? ADDR_SPACE_SEG_GS : ADDR_SPACE_SEG_FS)
+
 #define HAVE_ENABLE_EXECUTE_STACK
 #undef  CHECK_EXECUTE_STACK_ENABLED
 #define CHECK_EXECUTE_STACK_ENABLED flag_setstackexecutable
diff --git a/gcc/config/mingw/msformat-c.cc b/gcc/config/mingw/msformat-c.cc
index 0767d04..bae3b53 100644
--- a/gcc/config/mingw/msformat-c.cc
+++ b/gcc/config/mingw/msformat-c.cc
@@ -1,5 +1,5 @@
 /* Check calls to formatted I/O functions (-Wformat).
-   Copyright (C) 1992-2024 Free Software Foundation, Inc.
+   Copyright (C) 1992-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/mingw/t-cygming b/gcc/config/mingw/t-cygming
index 3dd9116..99fc2d6 100644
--- a/gcc/config/mingw/t-cygming
+++ b/gcc/config/mingw/t-cygming
@@ -1,4 +1,4 @@
-# Copyright (C) 2003-2024 Free Software Foundation, Inc.
+# Copyright (C) 2003-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/mingw/winnt-cxx.cc b/gcc/config/mingw/winnt-cxx.cc
index f4d7a50..8e17e02 100644
--- a/gcc/config/mingw/winnt-cxx.cc
+++ b/gcc/config/mingw/winnt-cxx.cc
@@ -1,6 +1,6 @@
 /* Target support for C++ classes on Windows.
    Contributed by Danny Smith (dannysmith@users.sourceforge.net)
-   Copyright (C) 2005-2024 Free Software Foundation, Inc.
+   Copyright (C) 2005-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -45,8 +45,8 @@ i386_pe_type_dllimport_p (tree decl)
 	  || DECL_TEMPLATE_INSTANTIATION (decl)
 	  || DECL_ARTIFICIAL (decl)))
     return false;
-  
-  /* Overrides of the class dllimport decls by out-of-class definitions are 
+
+  /* Overrides of the class dllimport decls by out-of-class definitions are
      handled by tree.cc:merge_dllimport_decl_attributes.   */
   return true;
 }
@@ -73,16 +73,16 @@ i386_pe_type_dllexport_p (tree decl)
   return true;
 }
 
-static inline void maybe_add_dllimport (tree decl) 
+static inline void maybe_add_dllimport (tree decl)
 {
   if (i386_pe_type_dllimport_p (decl))
     DECL_DLLIMPORT_P (decl) = 1;
 }
 
-static inline void maybe_add_dllexport (tree decl) 
+static inline void maybe_add_dllexport (tree decl)
 {
   if (i386_pe_type_dllexport_p (decl))
-    {   
+    {
       tree decl_attrs = DECL_ATTRIBUTES (decl);
       if (lookup_attribute ("dllexport", decl_attrs) != NULL_TREE)
 	/* Already done.  */
@@ -98,8 +98,8 @@ i386_pe_adjust_class_at_definition (tree t)
   tree member;
 
   gcc_assert (CLASS_TYPE_P (t));
- 
- 
+
+
   if (lookup_attribute ("dllexport", TYPE_ATTRIBUTES (t)) != NULL_TREE)
     {
       tree tmv = TYPE_MAIN_VARIANT (t);
@@ -124,7 +124,7 @@ i386_pe_adjust_class_at_definition (tree t)
 	  {
 	    tree thunk;
 	    maybe_add_dllexport (member);
-	  
+
 	    /* Also add the attribute to its thunks.  */
 	    for (thunk = DECL_THUNKS (member); thunk;
 		 thunk = TREE_CHAIN (thunk))
@@ -156,13 +156,13 @@ i386_pe_adjust_class_at_definition (tree t)
 	  {
 	    tree thunk;
 	    maybe_add_dllimport (member);
-	  
+
 	    /* Also add the attribute to its thunks.  */
 	    for (thunk = DECL_THUNKS (member); thunk;
 		 thunk = DECL_CHAIN (thunk))
 	      maybe_add_dllimport (thunk);
 	  }
- 
+
       /* Check vtables  */
       for (member = CLASSTYPE_VTABLES (t);
 	   member;  member = DECL_CHAIN (member))
@@ -172,6 +172,6 @@ i386_pe_adjust_class_at_definition (tree t)
       /* We leave typeinfo tables alone.  We can't mark TI objects as
 	dllimport, since the address of a secondary VTT may be needed
 	for static initialization of a primary VTT.  VTT's  of
-	dllimport'd classes should always be link-once COMDAT.  */ 
+	dllimport'd classes should always be link-once COMDAT.  */
     }
 }
diff --git a/gcc/config/mingw/winnt-d.cc b/gcc/config/mingw/winnt-d.cc
index e4981a1..923ab19 100644
--- a/gcc/config/mingw/winnt-d.cc
+++ b/gcc/config/mingw/winnt-d.cc
@@ -1,5 +1,5 @@
 /* Windows support needed only by D front-end.
-   Copyright (C) 2021-2024 Free Software Foundation, Inc.
+   Copyright (C) 2021-2025 Free Software Foundation, Inc.
 
 GCC is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
diff --git a/gcc/config/mingw/winnt-dll.cc b/gcc/config/mingw/winnt-dll.cc
index f74495b..fba1b70 100644
--- a/gcc/config/mingw/winnt-dll.cc
+++ b/gcc/config/mingw/winnt-dll.cc
@@ -1,6 +1,6 @@
 /* Expand a SYMBOL into its corresponding dllimport, far-address,
 or refptr symbol.
-Copyright (C) 1988-2024 Free Software Foundation, Inc.
+Copyright (C) 1988-2025 Free Software Foundation, Inc.
 
 GCC is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
@@ -134,7 +134,7 @@ get_dllimport_decl (tree decl, bool beimport)
     {
       SYMBOL_REF_FLAGS (rtl) |= SYMBOL_FLAG_EXTERNAL;
 #ifdef SUB_TARGET_RECORD_STUB
-      SUB_TARGET_RECORD_STUB (name);
+      SUB_TARGET_RECORD_STUB (name, decl);
 #endif
     }
 
@@ -206,7 +206,7 @@ legitimize_pe_coff_symbol (rtx addr, bool inreg)
 	}
     }
 
-  if (!PE_COFF_LEGITIMIZE_EXTERN_DECL)
+  if (!PE_COFF_LEGITIMIZE_EXTERN_DECL (addr))
     return NULL_RTX;
 
   if (GET_CODE (addr) == SYMBOL_REF
@@ -228,4 +228,4 @@ legitimize_pe_coff_symbol (rtx addr, bool inreg)
   return NULL_RTX;
 }
 
-#include "gt-winnt-dll.h"
-\ No newline at end of file
+#include "gt-winnt-dll.h"
diff --git a/gcc/config/mingw/winnt-dll.h b/gcc/config/mingw/winnt-dll.h
index 659d9c1..bd69e89 100644
--- a/gcc/config/mingw/winnt-dll.h
+++ b/gcc/config/mingw/winnt-dll.h
@@ -1,6 +1,6 @@
 /* Expand a SYMBOL into its corresponding dllimport, far-address,
 or refptr symbol.
-Copyright (C) 2024 Free Software Foundation, Inc.
+Copyright (C) 2024-2025 Free Software Foundation, Inc.
 
 GCC is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
diff --git a/gcc/config/mingw/winnt-stubs.cc b/gcc/config/mingw/winnt-stubs.cc
index dd6aa6f..9dc02d2 100644
--- a/gcc/config/mingw/winnt-stubs.cc
+++ b/gcc/config/mingw/winnt-stubs.cc
@@ -1,6 +1,6 @@
 /* Dummy subroutines for language-specific support on Windows.
    Contributed by Danny Smith (dannysmith@users.sourceforge.net)
-   Copyright (C) 2005-2024 Free Software Foundation, Inc.
+   Copyright (C) 2005-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/mingw/winnt.cc b/gcc/config/mingw/winnt.cc
index 803e5f5..f224966 100644
--- a/gcc/config/mingw/winnt.cc
+++ b/gcc/config/mingw/winnt.cc
@@ -1,6 +1,6 @@
 /* Subroutines for insn-output.cc for Windows NT.
    Contributed by Douglas Rupp (drupp@cs.washington.edu)
-   Copyright (C) 1995-2024 Free Software Foundation, Inc.
+   Copyright (C) 1995-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -120,7 +120,7 @@ i386_pe_determine_dllexport_p (tree decl)
   if (TREE_CODE (decl) == FUNCTION_DECL
       && DECL_DECLARED_INLINE_P (decl)
       && !flag_keep_inline_dllexport)
-    return false; 
+    return false;
 
   if (lookup_attribute ("dllexport", DECL_ATTRIBUTES (decl)))
     return true;
@@ -185,11 +185,11 @@ gen_stdcall_or_fastcall_suffix (tree decl, tree id, bool fastcall)
   tree arg;
   function_args_iterator args_iter;
 
-  gcc_assert (TREE_CODE (decl) == FUNCTION_DECL);  
+  gcc_assert (TREE_CODE (decl) == FUNCTION_DECL);
 
   if (prototype_p (type))
     {
-      /* This attribute is ignored for variadic functions.  */ 
+      /* This attribute is ignored for variadic functions.  */
       if (stdarg_p (type))
 	return NULL_TREE;
 
@@ -235,7 +235,7 @@ i386_pe_maybe_mangle_decl_assembler_name (tree decl, tree id)
   tree new_id = NULL_TREE;
 
   if (TREE_CODE (decl) == FUNCTION_DECL)
-    { 
+    {
       unsigned int ccvt = ix86_get_callcvt (TREE_TYPE (decl));
       if ((ccvt & IX86_CALLCVT_STDCALL) != 0)
         {
@@ -279,7 +279,7 @@ i386_pe_assemble_visibility (tree decl, int)
 tree
 i386_pe_mangle_decl_assembler_name (tree decl, tree id)
 {
-  tree new_id = i386_pe_maybe_mangle_decl_assembler_name (decl, id);   
+  tree new_id = i386_pe_maybe_mangle_decl_assembler_name (decl, id);
 
   return (new_id ? new_id : id);
 }
@@ -335,7 +335,7 @@ mingw_pe_encode_section_info (tree decl, rtx rtl, int first)
     flags |= SYMBOL_FLAG_DLLEXPORT;
   else if (i386_pe_determine_dllimport_p (decl))
     flags |= SYMBOL_FLAG_DLLIMPORT;
- 
+
   SYMBOL_REF_FLAGS (symbol) = flags;
 }
 
@@ -367,7 +367,7 @@ i386_pe_binds_local_p (const_tree exp)
       && DECL_DECLARED_INLINE_P (exp))
     return false;
 #endif
-  
+
   return default_binds_local_p_1 (exp, 0);
 }
 
@@ -391,6 +391,15 @@ i386_pe_strip_name_encoding_full (const char *str)
   return name;
 }
 
+section *
+mingw_pe_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
+{
+  if (TREE_CODE (decl) == VAR_DECL && DECL_THREAD_LOCAL_P (decl))
+    return get_named_section (decl, ".tls$", reloc);
+  else
+    return default_select_section (decl, reloc, align);
+}
+
 void
 mingw_pe_unique_section (tree decl, int reloc)
 {
@@ -415,6 +424,8 @@ mingw_pe_unique_section (tree decl, int reloc)
     prefix = ".text$";
   else if (decl_readonly_section (decl, reloc))
     prefix = ".rdata$";
+  else if (DECL_THREAD_LOCAL_P (decl))
+    prefix = ".tls$";
   else
     prefix = ".data$";
   len = strlen (name) + strlen (prefix);
@@ -489,13 +500,16 @@ mingw_pe_asm_named_section (const char *name, unsigned int flags,
     *f++ = 'e';
 #endif
 
+  if (strcmp (name, ".tls$") == 0)
+    *f++ = 'd';
+
   if ((flags & (SECTION_CODE | SECTION_WRITE)) == 0)
     /* readonly data */
     {
       *f++ ='d';  /* This is necessary for older versions of gas.  */
       *f++ ='r';
     }
-  else	
+  else
     {
       if (flags & SECTION_CODE)
         *f++ = 'x';
@@ -527,7 +541,7 @@ mingw_pe_asm_named_section (const char *name, unsigned int flags,
 	 Instead, have the linker pick one, without warning.
 	 If 'selectany' attribute has been specified,  MS compiler
 	 sets 'discard' characteristic, rather than telling linker
-	 to warn of size or content mismatch, so do the same.  */ 
+	 to warn of size or content mismatch, so do the same.  */
       bool discard = (flags & SECTION_CODE)
 		      || (TREE_CODE (decl) != IDENTIFIER_NODE
 			  && lookup_attribute ("selectany",
@@ -555,7 +569,7 @@ i386_pe_asm_output_aligned_decl_common (FILE *stream, tree decl,
   rounded += (BIGGEST_ALIGNMENT / BITS_PER_UNIT) - 1;
   rounded = (rounded / (BIGGEST_ALIGNMENT / BITS_PER_UNIT)
 	     * (BIGGEST_ALIGNMENT / BITS_PER_UNIT));
-  
+
   mingw_pe_maybe_record_exported_symbol (decl, name, 1);
 
   fprintf (stream, "\t.comm\t");
@@ -575,20 +589,20 @@ i386_pe_asm_output_aligned_decl_common (FILE *stream, tree decl,
 
 #include "gsyms.h"
 
-/* Mark a function appropriately.  This should only be called for
+/* Mark a function or an object appropriately.  This should only be called for
    functions for which we are not emitting COFF debugging information.
    FILE is the assembler output file, NAME is the name of the
    function, and PUB is nonzero if the function is globally
    visible.  */
 
 void
-mingw_pe_declare_function_type (FILE *file, const char *name, int pub)
+mingw_pe_declare_type (FILE *file, const char *name, bool pub, bool func)
 {
   fprintf (file, "\t.def\t");
   assemble_name (file, name);
   fprintf (file, ";\t.scl\t%d;\t.type\t%d;\t.endef\n",
 	   pub ? (int) C_EXT : (int) C_STAT,
-	   (int) DT_FCN << N_BTSHFT);
+	   (int) (func ? DT_FCN : DT_NON) << N_BTSHFT);
 }
 
 /* Keep a list of external functions.  */
@@ -635,6 +649,7 @@ struct GTY(()) stub_list
 {
   struct stub_list *next;
   const char *name;
+  bool is_weak_decl_needed;
 };
 
 static GTY(()) struct export_list *export_head;
@@ -672,7 +687,7 @@ mingw_pe_maybe_record_exported_symbol (tree decl, const char *name, int is_data)
 }
 
 void
-mingw_pe_record_stub (const char *name)
+mingw_pe_record_stub (const char *name, bool is_weak_decl_needed)
 {
   struct stub_list *p;
 
@@ -691,6 +706,7 @@ mingw_pe_record_stub (const char *name)
   p = ggc_alloc<stub_list> ();
   p->next = stub_head;
   p->name = name;
+  p->is_weak_decl_needed = is_weak_decl_needed;
   stub_head = p;
 }
 
@@ -769,12 +785,12 @@ mingw_pe_file_end (void)
 	     the real function so that an (unused) import is created.  */
 	  const char *realsym = i386_find_on_wrapper_list (p->name);
 	  if (realsym)
-	    mingw_pe_declare_function_type (asm_out_file,
-		concat ("__real_", realsym, NULL), TREE_PUBLIC (decl));
+	    mingw_pe_declare_type (asm_out_file,
+		concat ("__real_", realsym, NULL), TREE_PUBLIC (decl), 1);
 #endif /* CXX_WRAP_SPEC_LIST */
 	  TREE_ASM_WRITTEN (decl) = 1;
-	  mingw_pe_declare_function_type (asm_out_file, p->name,
-					 TREE_PUBLIC (decl));
+	  mingw_pe_declare_type (asm_out_file, p->name,
+				 TREE_PUBLIC (decl), 1);
 	}
     }
 
@@ -807,7 +823,17 @@ mingw_pe_file_end (void)
 	  if (!startswith (name, "refptr."))
 	    continue;
 	  name += 7;
+
+	  if (q->is_weak_decl_needed)
+	    {
+#ifdef ASM_WEAKEN_LABEL
+	      ASM_WEAKEN_LABEL (asm_out_file, name);
+#endif
+	      mingw_pe_declare_type (asm_out_file, name, 1, 1);
+	    }
+
 	  fprintf (asm_out_file, "\t.section\t.rdata$%s, \"dr\"\n"
+			   "\t.p2align\t3, 0\n"
 	  		   "\t.globl\t%s\n"
 			   "\t.linkonce\tdiscard\n", oname, oname);
 	  fprintf (asm_out_file, "%s:\n\t.quad\t%s\n", oname, name);
@@ -820,14 +846,14 @@ mingw_pe_file_end (void)
 static enum debug_info_levels saved_debug_info_level;
 
 void
-i386_pe_asm_lto_start (void)
+mingw_pe_asm_lto_start (void)
 {
   saved_debug_info_level = debug_info_level;
   debug_info_level = DINFO_LEVEL_NONE;
 }
 
 void
-i386_pe_asm_lto_end (void)
+mingw_pe_asm_lto_end (void)
 {
   debug_info_level = saved_debug_info_level;
 }
@@ -1363,7 +1389,7 @@ void
 i386_pe_start_function (FILE *f, const char *name, tree decl)
 {
   mingw_pe_maybe_record_exported_symbol (decl, name, 0);
-  mingw_pe_declare_function_type (f, name, TREE_PUBLIC (decl));
+  mingw_pe_declare_type (f, name, TREE_PUBLIC (decl), 1);
   /* In case section was altered by debugging output.  */
   if (decl != NULL_TREE)
     switch_to_section (function_section (decl));
diff --git a/gcc/config/mingw/winnt.h b/gcc/config/mingw/winnt.h
index 97fefbc..23f4dc9 100644
--- a/gcc/config/mingw/winnt.h
+++ b/gcc/config/mingw/winnt.h
@@ -1,5 +1,5 @@
 /* Subroutines for targets on Windows.
-Copyright (C) 2024 Free Software Foundation, Inc.
+Copyright (C) 2024-2025 Free Software Foundation, Inc.
 
 GCC is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
@@ -23,13 +23,15 @@ http://www.gnu.org/licenses/.  */
 extern tree mingw_handle_selectany_attribute (tree *, tree, tree, int, bool *);
 
 extern void mingw_pe_asm_named_section (const char *, unsigned int, tree);
-extern void mingw_pe_declare_function_type (FILE *file, const char *name,
-	int pub);
+extern void mingw_pe_asm_lto_start (void);
+extern void mingw_pe_asm_lto_end (void);
+extern void mingw_pe_declare_type (FILE *, const char *, bool, bool);
 extern void mingw_pe_encode_section_info (tree, rtx, int);
 extern void mingw_pe_file_end (void);
 extern void mingw_pe_maybe_record_exported_symbol (tree, const char *, int);
-extern void mingw_pe_record_stub (const char *);
+extern void mingw_pe_record_stub (const char *, bool);
 extern unsigned int mingw_pe_section_type_flags (tree, const char *, int);
+extern section *mingw_pe_select_section (tree, int, unsigned HOST_WIDE_INT);
 extern void mingw_pe_unique_section (tree, int);
 extern bool mingw_pe_valid_dllimport_attribute_p (const_tree);
 
diff --git a/gcc/config/mips/10000.md b/gcc/config/mips/10000.md
index 8dc264e..1b2c56d 100644
--- a/gcc/config/mips/10000.md
+++ b/gcc/config/mips/10000.md
@@ -1,5 +1,5 @@
 ;; DFA-based pipeline description for the VR1x000.
-;;   Copyright (C) 2005-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 
diff --git a/gcc/config/mips/20kc.md b/gcc/config/mips/20kc.md
index 9b62848..a30e7f4 100644
--- a/gcc/config/mips/20kc.md
+++ b/gcc/config/mips/20kc.md
@@ -1,4 +1,4 @@
-;; Copyright (C) 2007-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2007-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/mips/24k.md b/gcc/config/mips/24k.md
index 1d09c92..06e5f89 100644
--- a/gcc/config/mips/24k.md
+++ b/gcc/config/mips/24k.md
@@ -8,7 +8,7 @@
 ;; References:
 ;;   "MIPS32 24K Processor Core Family Software User's Manual, Rev 3.04."
 ;;
-;; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/mips/3000.md b/gcc/config/mips/3000.md
index 22c0780..9647790 100644
--- a/gcc/config/mips/3000.md
+++ b/gcc/config/mips/3000.md
@@ -1,5 +1,5 @@
 ;; R3000 and TX39 pipeline description.
-;;   Copyright (C) 2004-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2004-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 
diff --git a/gcc/config/mips/4000.md b/gcc/config/mips/4000.md
index 194e898..417cf0b 100644
--- a/gcc/config/mips/4000.md
+++ b/gcc/config/mips/4000.md
@@ -1,5 +1,5 @@
 ;; R4000 pipeline description.
-;;   Copyright (C) 2004-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2004-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 
diff --git a/gcc/config/mips/4100.md b/gcc/config/mips/4100.md
index acd2ee4..3147823 100644
--- a/gcc/config/mips/4100.md
+++ b/gcc/config/mips/4100.md
@@ -1,5 +1,5 @@
 ;; VR4100 and VR4120 pipeline description.
-;;   Copyright (C) 2004-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2004-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 
diff --git a/gcc/config/mips/4130.md b/gcc/config/mips/4130.md
index 35f9f05..fc63d49 100644
--- a/gcc/config/mips/4130.md
+++ b/gcc/config/mips/4130.md
@@ -1,4 +1,4 @@
-;; Copyright (C) 2004-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2004-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/mips/4300.md b/gcc/config/mips/4300.md
index d58e53a..81a5f3d 100644
--- a/gcc/config/mips/4300.md
+++ b/gcc/config/mips/4300.md
@@ -1,5 +1,5 @@
 ;; VR4300 pipeline description.
-;;   Copyright (C) 2004-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2004-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 
diff --git a/gcc/config/mips/4600.md b/gcc/config/mips/4600.md
index c4f71ea..d1884bc 100644
--- a/gcc/config/mips/4600.md
+++ b/gcc/config/mips/4600.md
@@ -1,5 +1,5 @@
 ;; R4600, R4650, and R4700 pipeline description.
-;;   Copyright (C) 2004-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2004-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 
diff --git a/gcc/config/mips/4k.md b/gcc/config/mips/4k.md
index f36aa77..364bb2a 100644
--- a/gcc/config/mips/4k.md
+++ b/gcc/config/mips/4k.md
@@ -10,7 +10,7 @@
 ;; 4km - pipelined multiplier and block address translator (BAT)
 ;; 4kp - non-pipelined multiplier and block address translator (BAT)
 ;;
-;; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/mips/5000.md b/gcc/config/mips/5000.md
index 7af901a..f376dab 100644
--- a/gcc/config/mips/5000.md
+++ b/gcc/config/mips/5000.md
@@ -1,5 +1,5 @@
 ;; VR5000 pipeline description.
-;;   Copyright (C) 2004-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2004-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 
diff --git a/gcc/config/mips/5400.md b/gcc/config/mips/5400.md
index 5e5fd28..d70243a 100644
--- a/gcc/config/mips/5400.md
+++ b/gcc/config/mips/5400.md
@@ -1,4 +1,4 @@
-;; Copyright (C) 2002-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2002-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/mips/5500.md b/gcc/config/mips/5500.md
index 96179be..2e247d6 100644
--- a/gcc/config/mips/5500.md
+++ b/gcc/config/mips/5500.md
@@ -1,4 +1,4 @@
-;; Copyright (C) 2002-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2002-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/mips/5k.md b/gcc/config/mips/5k.md
index b9fe20b..a9b4a8d 100644
--- a/gcc/config/mips/5k.md
+++ b/gcc/config/mips/5k.md
@@ -10,7 +10,7 @@
 ;; 5kf - Separate floating point pipe which can dual-issue with the
 ;;       integer pipe.
 ;;
-;; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/mips/6000.md b/gcc/config/mips/6000.md
index 7c6e188..e9b0873 100644
--- a/gcc/config/mips/6000.md
+++ b/gcc/config/mips/6000.md
@@ -1,5 +1,5 @@
 ;; R6000 pipeline description.
-;;   Copyright (C) 2004-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2004-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 
diff --git a/gcc/config/mips/7000.md b/gcc/config/mips/7000.md
index 985d83a..fd8b12c 100644
--- a/gcc/config/mips/7000.md
+++ b/gcc/config/mips/7000.md
@@ -1,5 +1,5 @@
 ;; DFA-based pipeline description for the RM7000.
-;;   Copyright (C) 2003-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2003-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 
diff --git a/gcc/config/mips/74k.md b/gcc/config/mips/74k.md
index b39f096..5c613b9 100644
--- a/gcc/config/mips/74k.md
+++ b/gcc/config/mips/74k.md
@@ -5,7 +5,7 @@
 ;;   "MIPS32 74K Microarchitecure Specification Rev. 01.02 Jun 15, 2006"
 ;;   "MIPS32 74Kf Processor Core Datasheet Jun 2, 2006"
 ;;
-;; Copyright (C) 2007-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2007-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/mips/9000.md b/gcc/config/mips/9000.md
index 598aeb6..2b08577 100644
--- a/gcc/config/mips/9000.md
+++ b/gcc/config/mips/9000.md
@@ -1,5 +1,5 @@
 ;; DFA-based pipeline description for the RM9000.
-;;   Copyright (C) 2003-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2003-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 
diff --git a/gcc/config/mips/constraints.md b/gcc/config/mips/constraints.md
index a96028d..0ae7aad 100644
--- a/gcc/config/mips/constraints.md
+++ b/gcc/config/mips/constraints.md
@@ -1,5 +1,5 @@
 ;; Constraint definitions for MIPS.
-;; Copyright (C) 2006-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2006-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/mips/driver-native.cc b/gcc/config/mips/driver-native.cc
index 3b28086..95a5de8 100644
--- a/gcc/config/mips/driver-native.cc
+++ b/gcc/config/mips/driver-native.cc
@@ -1,5 +1,5 @@
 /* Subroutines for the gcc driver.
-   Copyright (C) 2008-2024 Free Software Foundation, Inc.
+   Copyright (C) 2008-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/mips/elf.h b/gcc/config/mips/elf.h
index 532c77f..7ed51ed 100644
--- a/gcc/config/mips/elf.h
+++ b/gcc/config/mips/elf.h
@@ -1,5 +1,5 @@
 /* Target macros for mips*-elf targets.
-   Copyright (C) 1994-2024 Free Software Foundation, Inc.
+   Copyright (C) 1994-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/mips/elfoabi.h b/gcc/config/mips/elfoabi.h
index e856696..25270e7 100644
--- a/gcc/config/mips/elfoabi.h
+++ b/gcc/config/mips/elfoabi.h
@@ -1,6 +1,6 @@
 /* Target macros for mips*-elf targets that selected between o32 and o64
    based on the target architecture.
-   Copyright (C) 1994-2024 Free Software Foundation, Inc.
+   Copyright (C) 1994-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/mips/elforion.h b/gcc/config/mips/elforion.h
index abeda76..722b9c5 100644
--- a/gcc/config/mips/elforion.h
+++ b/gcc/config/mips/elforion.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler.  MIPS ORION version.
-   Copyright (C) 1994-2024 Free Software Foundation, Inc.
+   Copyright (C) 1994-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/mips/frame-header-opt.cc b/gcc/config/mips/frame-header-opt.cc
index 1e7260e..428ec89 100644
--- a/gcc/config/mips/frame-header-opt.cc
+++ b/gcc/config/mips/frame-header-opt.cc
@@ -4,7 +4,7 @@
    targets, if a frame header is required, it is allocated by the callee.
 
 
-   Copyright (C) 2015-2024 Free Software Foundation, Inc.
+   Copyright (C) 2015-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -207,7 +207,7 @@ callees_functions_use_frame_header (function *fn)
 	        {
 	          called_fn = DECL_STRUCT_FUNCTION (called_fn_tree);
 		  if (called_fn == NULL
-		      || DECL_WEAK (called_fn_tree) 
+		      || DECL_WEAK (called_fn_tree)
 		      || has_inlined_assembly (called_fn)
 		      || !is_leaf_function (called_fn)
 		      || !called_fn->machine->does_not_use_frame_header)
diff --git a/gcc/config/mips/generic.md b/gcc/config/mips/generic.md
index 4175c20..8a2f401 100644
--- a/gcc/config/mips/generic.md
+++ b/gcc/config/mips/generic.md
@@ -1,5 +1,5 @@
 ;; Generic DFA-based pipeline description for MIPS targets
-;;   Copyright (C) 2004-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2004-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 
diff --git a/gcc/config/mips/genopt.sh b/gcc/config/mips/genopt.sh
index 3ff8a1b..56b2483 100755
--- a/gcc/config/mips/genopt.sh
+++ b/gcc/config/mips/genopt.sh
@@ -1,6 +1,6 @@
 #!/bin/sh
 # Generate mips-tables.opt from the list of CPUs in mips-cpus.def.
-# Copyright (C) 2011-2024 Free Software Foundation, Inc.
+# Copyright (C) 2011-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
@@ -22,7 +22,7 @@ cat <<EOF
 ; -*- buffer-read-only: t -*-
 ; Generated automatically by genopt.sh from mips-cpus.def.
 
-; Copyright (C) 2011-2024 Free Software Foundation, Inc.
+; Copyright (C) 2011-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/mips/gnu-user.h b/gcc/config/mips/gnu-user.h
index 875a24f..5a82508 100644
--- a/gcc/config/mips/gnu-user.h
+++ b/gcc/config/mips/gnu-user.h
@@ -1,5 +1,5 @@
 /* Definitions for MIPS systems using GNU userspace.
-   Copyright (C) 1998-2024 Free Software Foundation, Inc.
+   Copyright (C) 1998-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/mips/gs264e.md b/gcc/config/mips/gs264e.md
index 175a3b0..910fcd6 100644
--- a/gcc/config/mips/gs264e.md
+++ b/gcc/config/mips/gs264e.md
@@ -1,6 +1,6 @@
 ;; Pipeline model for Loongson gs264e cores.
 
-;; Copyright (C) 2018-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2018-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/mips/gs464.md b/gcc/config/mips/gs464.md
index 041558a..75a7e9d 100644
--- a/gcc/config/mips/gs464.md
+++ b/gcc/config/mips/gs464.md
@@ -1,6 +1,6 @@
 ;; Pipeline model for Loongson gs464 cores.
 
-;; Copyright (C) 2011-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2011-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/mips/gs464e.md b/gcc/config/mips/gs464e.md
index 3f088f1..f3bdc54 100644
--- a/gcc/config/mips/gs464e.md
+++ b/gcc/config/mips/gs464e.md
@@ -1,6 +1,6 @@
 ;; Pipeline model for Loongson gs464e cores.
 
-;; Copyright (C) 2018-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2018-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/mips/i6400.md b/gcc/config/mips/i6400.md
index d6f691e..467afad 100644
--- a/gcc/config/mips/i6400.md
+++ b/gcc/config/mips/i6400.md
@@ -1,6 +1,6 @@
 ;; DFA-based pipeline description for I6400.
 ;;
-;; Copyright (C) 2015-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2015-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
@@ -219,16 +219,16 @@
        (eq_attr "type" "fabs,fneg,fmove"))
   "i6400_fpu_short, i6400_fpu_apu")
 
-;; min, max
+;; min, max, fclass
 (define_insn_reservation "i6400_fpu_minmax" 2
   (and (eq_attr "cpu" "i6400")
-       (eq_attr "type" "fminmax"))
+       (eq_attr "type" "fminmax,fclass"))
   "i6400_fpu_short+i6400_fpu_logic")
 
-;; fadd, fsub, fcvt
+;; fadd, fsub, fcvt, frint
 (define_insn_reservation "i6400_fpu_fadd" 4
   (and (eq_attr "cpu" "i6400")
-       (eq_attr "type" "fadd,fcvt"))
+       (eq_attr "type" "fadd,fcvt,frint"))
   "i6400_fpu_long, i6400_fpu_apu")
 
 ;; fmul
diff --git a/gcc/config/mips/linux-common.h b/gcc/config/mips/linux-common.h
index 5383fec..ec357fb 100644
--- a/gcc/config/mips/linux-common.h
+++ b/gcc/config/mips/linux-common.h
@@ -1,5 +1,5 @@
 /* Definitions for MIPS running Linux-based GNU systems with ELF format.
-   Copyright (C) 2012-2024 Free Software Foundation, Inc.
+   Copyright (C) 2012-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/mips/linux.h b/gcc/config/mips/linux.h
index 8d098dd..ae15d49 100644
--- a/gcc/config/mips/linux.h
+++ b/gcc/config/mips/linux.h
@@ -1,5 +1,5 @@
 /* Definitions for MIPS running Linux-based GNU systems with ELF format.
-   Copyright (C) 1998-2024 Free Software Foundation, Inc.
+   Copyright (C) 1998-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/mips/loongson-mmi.md b/gcc/config/mips/loongson-mmi.md
index dd166bf..6ec9df0 100644
--- a/gcc/config/mips/loongson-mmi.md
+++ b/gcc/config/mips/loongson-mmi.md
@@ -1,5 +1,5 @@
 ;; Machine description for Loongson MultiMedia extensions Instructions (MMI).
-;; Copyright (C) 2008-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2008-2025 Free Software Foundation, Inc.
 ;; Contributed by CodeSourcery.
 ;;
 ;; This file is part of GCC.
@@ -394,7 +394,7 @@
   "pmaddhw\t%0,%1,%2"
   [(set_attr "type" "fmul")])
 
-(define_expand "sdot_prodv4hi"
+(define_expand "sdot_prodv2siv4hi"
   [(match_operand:V2SI 0 "register_operand" "")
    (match_operand:V4HI 1 "register_operand" "")
    (match_operand:V4HI 2 "register_operand" "")
diff --git a/gcc/config/mips/loongson-mmiintrin.h b/gcc/config/mips/loongson-mmiintrin.h
index f97ae8a..e383dae 100644
--- a/gcc/config/mips/loongson-mmiintrin.h
+++ b/gcc/config/mips/loongson-mmiintrin.h
@@ -1,6 +1,6 @@
 /* Intrinsics for Loongson MultiMedia extension Instructions operations.
 
-   Copyright (C) 2008-2024 Free Software Foundation, Inc.
+   Copyright (C) 2008-2025 Free Software Foundation, Inc.
    Contributed by CodeSourcery.
 
    This file is part of GCC.
diff --git a/gcc/config/mips/loongson.h b/gcc/config/mips/loongson.h
index 37e3ae9..edf6dae 100644
--- a/gcc/config/mips/loongson.h
+++ b/gcc/config/mips/loongson.h
@@ -1,6 +1,6 @@
 /* Intrinsics for Loongson MultiMedia extension Instructions operations.
 
-   Copyright (C) 2008-2024 Free Software Foundation, Inc.
+   Copyright (C) 2008-2025 Free Software Foundation, Inc.
    Contributed by CodeSourcery.
 
    This file is part of GCC.
diff --git a/gcc/config/mips/loongson2ef.md b/gcc/config/mips/loongson2ef.md
index d1ccb1f..56f6af0 100644
--- a/gcc/config/mips/loongson2ef.md
+++ b/gcc/config/mips/loongson2ef.md
@@ -1,6 +1,6 @@
 ;; Pipeline model for ST Microelectronics Loongson-2E/2F cores.
 
-;; Copyright (C) 2008-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2008-2025 Free Software Foundation, Inc.
 ;; Contributed by CodeSourcery.
 ;;
 ;; GCC is free software; you can redistribute it and/or modify
diff --git a/gcc/config/mips/m5100.md b/gcc/config/mips/m5100.md
index 6138c16..c994e69 100644
--- a/gcc/config/mips/m5100.md
+++ b/gcc/config/mips/m5100.md
@@ -1,6 +1,6 @@
 ;; DFA-based pipeline description for MIPS32 models M5100.
 ;;
-;; Copyright (C) 2015-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2015-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/mips/micromips.md b/gcc/config/mips/micromips.md
index 5ba1d1d..3e43d55 100644
--- a/gcc/config/mips/micromips.md
+++ b/gcc/config/mips/micromips.md
@@ -1,4 +1,4 @@
-;; Copyright (C) 2013-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2013-2025 Free Software Foundation, Inc.
 ;;
 ;; micromips.md   Machine Description for the microMIPS instruction set
 ;; This file is part of GCC.
diff --git a/gcc/config/mips/mips-cpus.def b/gcc/config/mips/mips-cpus.def
index 17bbba4..dfc4116 100644
--- a/gcc/config/mips/mips-cpus.def
+++ b/gcc/config/mips/mips-cpus.def
@@ -1,5 +1,5 @@
 /* MIPS CPU names.
-   Copyright (C) 1989-2024 Free Software Foundation, Inc.
+   Copyright (C) 1989-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/mips/mips-d.cc b/gcc/config/mips/mips-d.cc
index cd3cff3..abbeb83 100644
--- a/gcc/config/mips/mips-d.cc
+++ b/gcc/config/mips/mips-d.cc
@@ -1,5 +1,5 @@
 /* Subroutines for the D front end on the MIPS architecture.
-   Copyright (C) 2017-2024 Free Software Foundation, Inc.
+   Copyright (C) 2017-2025 Free Software Foundation, Inc.
 
 GCC is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
diff --git a/gcc/config/mips/mips-d.h b/gcc/config/mips/mips-d.h
index f44fd18..4cfb5e3 100644
--- a/gcc/config/mips/mips-d.h
+++ b/gcc/config/mips/mips-d.h
@@ -1,5 +1,5 @@
 /* Definitions for the D front end on the MIPS architecture.
-   Copyright (C) 2022-2024 Free Software Foundation, Inc.
+   Copyright (C) 2022-2025 Free Software Foundation, Inc.
 
 GCC is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
diff --git a/gcc/config/mips/mips-dsp.md b/gcc/config/mips/mips-dsp.md
index ac3efb2..41133d6 100644
--- a/gcc/config/mips/mips-dsp.md
+++ b/gcc/config/mips/mips-dsp.md
@@ -1,4 +1,4 @@
-;; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/mips/mips-dspr2.md b/gcc/config/mips/mips-dspr2.md
index 3052357..dbfa797 100644
--- a/gcc/config/mips/mips-dspr2.md
+++ b/gcc/config/mips/mips-dspr2.md
@@ -1,4 +1,4 @@
-;; Copyright (C) 2007-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2007-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/mips/mips-fixed.md b/gcc/config/mips/mips-fixed.md
index d11b13a..0bdc806 100644
--- a/gcc/config/mips/mips-fixed.md
+++ b/gcc/config/mips/mips-fixed.md
@@ -1,4 +1,4 @@
-;; Copyright (C) 2007-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2007-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/mips/mips-ftypes.def b/gcc/config/mips/mips-ftypes.def
index 1a297f2..68b6f1e 100644
--- a/gcc/config/mips/mips-ftypes.def
+++ b/gcc/config/mips/mips-ftypes.def
@@ -1,5 +1,5 @@
 /* Definitions of prototypes for MIPS built-in functions.  -*- C -*-
-   Copyright (C) 2007-2024 Free Software Foundation, Inc.
+   Copyright (C) 2007-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/mips/mips-modes.def b/gcc/config/mips/mips-modes.def
index 21f50a2..76778cc 100644
--- a/gcc/config/mips/mips-modes.def
+++ b/gcc/config/mips/mips-modes.def
@@ -1,5 +1,5 @@
 /* MIPS extra machine modes. 
-   Copyright (C) 2003-2024 Free Software Foundation, Inc.
+   Copyright (C) 2003-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/mips/mips-msa.md b/gcc/config/mips/mips-msa.md
index 377c63f..cffb1bc 100644
--- a/gcc/config/mips/mips-msa.md
+++ b/gcc/config/mips/mips-msa.md
@@ -1,7 +1,7 @@
 ;; Machine Description for MIPS MSA ASE
 ;; Based on the MIPS MSA spec Revision 1.11 8/4/2014
 ;;
-;; Copyright (C) 2015-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2015-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
@@ -125,6 +125,9 @@
 ;; Only floating-point modes.
 (define_mode_iterator FMSA     [V2DF V4SF])
 
+;; Only used for reduce_plus_scal: V4SI, V8HI, V16QI have HADD.
+(define_mode_iterator MSA_NO_HADD [V2DF V4SF V2DI])
+
 ;; The attribute gives the integer vector mode with same size.
 (define_mode_attr VIMODE
   [(V2DF "V2DI")
@@ -2802,3 +2805,128 @@
   (set_attr "mode" "TI")
   (set_attr "compact_form" "never")
   (set_attr "branch_likely" "no")])
+
+
+;; Vector reduction operation
+(define_expand "reduc_smin_scal_<mode>"
+  [(match_operand:<UNITMODE> 0 "register_operand")
+   (match_operand:MSA 1 "register_operand")]
+  "ISA_HAS_MSA"
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+  mips_expand_msa_reduc (gen_smin<mode>3, tmp, operands[1]);
+  emit_insn (gen_vec_extract<mode><unitmode> (operands[0], tmp,
+					      const0_rtx));
+  DONE;
+})
+
+(define_expand "reduc_smax_scal_<mode>"
+  [(match_operand:<UNITMODE> 0 "register_operand")
+   (match_operand:MSA 1 "register_operand")]
+  "ISA_HAS_MSA"
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+  mips_expand_msa_reduc (gen_smax<mode>3, tmp, operands[1]);
+  emit_insn (gen_vec_extract<mode><unitmode> (operands[0], tmp,
+					      const0_rtx));
+  DONE;
+})
+
+(define_expand "reduc_umin_scal_<mode>"
+  [(match_operand:<UNITMODE> 0 "register_operand")
+   (match_operand:IMSA 1 "register_operand")]
+  "ISA_HAS_MSA"
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+  mips_expand_msa_reduc (gen_umin<mode>3, tmp, operands[1]);
+  emit_insn (gen_vec_extract<mode><unitmode> (operands[0], tmp,
+					      const0_rtx));
+  DONE;
+})
+
+(define_expand "reduc_umax_scal_<mode>"
+  [(match_operand:<UNITMODE> 0 "register_operand")
+   (match_operand:IMSA 1 "register_operand")]
+  "ISA_HAS_MSA"
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+  mips_expand_msa_reduc (gen_umax<mode>3, tmp, operands[1]);
+  emit_insn (gen_vec_extract<mode><unitmode> (operands[0], tmp,
+					      const0_rtx));
+  DONE;
+})
+
+(define_expand "reduc_plus_scal_<mode>"
+  [(match_operand:<UNITMODE> 0 "register_operand")
+   (match_operand:MSA_NO_HADD 1 "register_operand")]
+  "ISA_HAS_MSA"
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+  mips_expand_msa_reduc (gen_add<mode>3, tmp, operands[1]);
+  emit_insn (gen_vec_extract<mode><unitmode> (operands[0], tmp,
+					      const0_rtx));
+  DONE;
+})
+
+(define_expand "reduc_plus_scal_v4si"
+  [(match_operand:SI 0 "register_operand")
+   (match_operand:V4SI 1 "register_operand")]
+  "ISA_HAS_MSA"
+{
+  rtx tmp = gen_reg_rtx (SImode);
+  rtx tmp1 = gen_reg_rtx (V2DImode);
+  emit_insn (gen_msa_hadd_s_d (tmp1, operands[1], operands[1]));
+  emit_insn (gen_vec_extractv4sisi (operands[0], gen_lowpart (V4SImode, tmp1),
+				    const0_rtx));
+  emit_insn (gen_vec_extractv4sisi (tmp, gen_lowpart (V4SImode, tmp1),
+				    GEN_INT (2)));
+  emit_insn (gen_addsi3 (operands[0], operands[0], tmp));
+  DONE;
+})
+
+(define_expand "reduc_plus_scal_v8hi"
+  [(match_operand:HI 0 "register_operand")
+   (match_operand:V8HI 1 "register_operand")]
+  "ISA_HAS_MSA"
+{
+  rtx tmp1 = gen_reg_rtx (V4SImode);
+  rtx tmp2 = gen_reg_rtx (V2DImode);
+  rtx tmp3 = gen_reg_rtx (V2DImode);
+  emit_insn (gen_msa_hadd_s_w (tmp1, operands[1], operands[1]));
+  emit_insn (gen_msa_hadd_s_d (tmp2, tmp1, tmp1));
+  mips_expand_msa_reduc (gen_addv2di3, tmp3, tmp2);
+  emit_insn (gen_vec_extractv8hihi (operands[0], gen_lowpart (V8HImode, tmp3),
+				    const0_rtx));
+  DONE;
+})
+
+(define_expand "reduc_plus_scal_v16qi"
+  [(match_operand:QI 0 "register_operand")
+   (match_operand:V16QI 1 "register_operand")]
+  "ISA_HAS_MSA"
+{
+  rtx tmp1 = gen_reg_rtx (V8HImode);
+  rtx tmp2 = gen_reg_rtx (V4SImode);
+  rtx tmp3 = gen_reg_rtx (V2DImode);
+  rtx tmp4 = gen_reg_rtx (V2DImode);
+  emit_insn (gen_msa_hadd_s_h (tmp1, operands[1], operands[1]));
+  emit_insn (gen_msa_hadd_s_w (tmp2, tmp1, tmp1));
+  emit_insn (gen_msa_hadd_s_d (tmp3, tmp2, tmp2));
+  mips_expand_msa_reduc (gen_addv2di3, tmp4, tmp3);
+  emit_insn (gen_vec_extractv16qiqi (operands[0], gen_lowpart (V16QImode, tmp4),
+				    const0_rtx));
+  DONE;
+})
+
+(define_expand "reduc_<optab>_scal_<mode>"
+  [(any_bitwise:<UNITMODE>
+      (match_operand:<UNITMODE> 0 "register_operand")
+      (match_operand:IMSA 1 "register_operand"))]
+  "ISA_HAS_MSA"
+{
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+  mips_expand_msa_reduc (gen_<optab><mode>3, tmp, operands[1]);
+  emit_insn (gen_vec_extract<mode><unitmode> (operands[0], tmp,
+					      const0_rtx));
+  DONE;
+})
diff --git a/gcc/config/mips/mips-opts.h b/gcc/config/mips/mips-opts.h
index 4b0c2c0..5207275 100644
--- a/gcc/config/mips/mips-opts.h
+++ b/gcc/config/mips/mips-opts.h
@@ -1,5 +1,5 @@
 /* Definitions for option handling for MIPS.
-   Copyright (C) 1989-2024 Free Software Foundation, Inc.
+   Copyright (C) 1989-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/mips/mips-protos.h b/gcc/config/mips/mips-protos.h
index 90b4c87..ee2d38a 100644
--- a/gcc/config/mips/mips-protos.h
+++ b/gcc/config/mips/mips-protos.h
@@ -1,5 +1,5 @@
 /* Prototypes of target machine for GNU compiler.  MIPS version.
-   Copyright (C) 1989-2024 Free Software Foundation, Inc.
+   Copyright (C) 1989-2025 Free Software Foundation, Inc.
    Contributed by A. Lichnewsky (lich@inria.inria.fr).
    Changed by Michael Meissner	(meissner@osf.org).
    64-bit r4000 support by Ian Lance Taylor (ian@cygnus.com) and
@@ -352,6 +352,7 @@ extern void mips_expand_atomic_qihi (union mips_gen_fn_ptrs,
 extern void mips_expand_vector_init (rtx, rtx);
 extern void mips_expand_vec_unpack (rtx op[2], bool, bool);
 extern void mips_expand_vec_reduc (rtx, rtx, rtx (*)(rtx, rtx, rtx));
+extern void mips_expand_msa_reduc (rtx (*)(rtx, rtx, rtx), rtx, rtx);
 extern void mips_expand_vec_minmax (rtx, rtx, rtx,
 				    rtx (*) (rtx, rtx, rtx), bool);
 
diff --git a/gcc/config/mips/mips-ps-3d.md b/gcc/config/mips/mips-ps-3d.md
index 763936c..065717f 100644
--- a/gcc/config/mips/mips-ps-3d.md
+++ b/gcc/config/mips/mips-ps-3d.md
@@ -1,5 +1,5 @@
 ;; MIPS Paired-Single Floating and MIPS-3D Instructions.
-;; Copyright (C) 2004-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2004-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/mips/mips-tables.opt b/gcc/config/mips/mips-tables.opt
index 874157d..027abd7 100644
--- a/gcc/config/mips/mips-tables.opt
+++ b/gcc/config/mips/mips-tables.opt
@@ -1,7 +1,7 @@
 ; -*- buffer-read-only: t -*-
 ; Generated automatically by genopt.sh from mips-cpus.def.
 
-; Copyright (C) 2011-2024 Free Software Foundation, Inc.
+; Copyright (C) 2011-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/mips/mips.cc b/gcc/config/mips/mips.cc
index 6c797b6..81eaa3c 100644
--- a/gcc/config/mips/mips.cc
+++ b/gcc/config/mips/mips.cc
@@ -1,5 +1,5 @@
 /* Subroutines used for MIPS code generation.
-   Copyright (C) 1989-2024 Free Software Foundation, Inc.
+   Copyright (C) 1989-2025 Free Software Foundation, Inc.
    Contributed by A. Lichnewsky, lich@inria.inria.fr.
    Changes by Michael Meissner, meissner@osf.org.
    64-bit r4000 support by Ian Lance Taylor, ian@cygnus.com, and
@@ -2803,7 +2803,7 @@ mips_lwxs_address_p (rtx addr)
   return false;
 }
 
-/* Return true if ADDR matches the pattern for the L{B,H,W,D}{,U}X load 
+/* Return true if ADDR matches the pattern for the L{B,H,W,D}{,U}X load
    indexed address instruction.  Note that such addresses are
    not considered legitimate in the TARGET_LEGITIMATE_ADDRESS_P
    sense, because their use is so restricted.  */
@@ -3621,9 +3621,7 @@ mips_call_tls_get_addr (rtx sym, enum mips_symbol_type type, rtx v0)
 			   const0_rtx, NULL_RTX, false);
   RTL_CONST_CALL_P (insn) = 1;
   use_reg (&CALL_INSN_FUNCTION_USAGE (insn), a0);
-  insn = get_insns ();
-
-  end_sequence ();
+  insn = end_sequence ();
 
   return insn;
 }
@@ -4454,7 +4452,7 @@ mips_rtx_costs (rtx x, machine_mode mode, int outer_code,
 		    + set_src_cost (XEXP (XEXP (x, 1), 0), mode, speed));
 	  return true;
 	}
-	    
+
       /* Fall through.  */
 
     case IOR:
@@ -12545,7 +12543,7 @@ mips_output_probe_stack_range (rtx reg1, rtx reg2)
   /* Probe at TEST_ADDR, test if TEST_ADDR == LAST_ADDR and branch.  */
   xops[1] = reg2;
   strcpy (tmp, "%(%<bne\t%0,%1,");
-  output_asm_insn (strcat (tmp, &loop_lab[1]), xops); 
+  output_asm_insn (strcat (tmp, &loop_lab[1]), xops);
   if (TARGET_64BIT)
     output_asm_insn ("sd\t$0,0(%0)%)", xops);
   else
@@ -13729,7 +13727,7 @@ mips_memory_move_cost (machine_mode mode, reg_class_t rclass, bool in)
 {
   return (mips_cost->memory_latency
 	  + memory_move_secondary_cost (mode, rclass, in));
-} 
+}
 
 /* Implement TARGET_SECONDARY_MEMORY_NEEDED.
 
@@ -14997,7 +14995,7 @@ bool
 mips_fmadd_bypass (rtx_insn *out_insn, rtx_insn *in_insn)
 {
   int dst_reg, src_reg;
-  
+
   gcc_assert (get_attr_type (in_insn) == TYPE_FMADD);
   gcc_assert (get_attr_type (out_insn) == TYPE_FMADD);
 
@@ -15167,23 +15165,19 @@ mips_ls2_init_dfa_post_cycle_insn (void)
 {
   start_sequence ();
   emit_insn (gen_ls2_alu1_turn_enabled_insn ());
-  mips_ls2.alu1_turn_enabled_insn = get_insns ();
-  end_sequence ();
+  mips_ls2.alu1_turn_enabled_insn = end_sequence ();
 
   start_sequence ();
   emit_insn (gen_ls2_alu2_turn_enabled_insn ());
-  mips_ls2.alu2_turn_enabled_insn = get_insns ();
-  end_sequence ();
+  mips_ls2.alu2_turn_enabled_insn = end_sequence ();
 
   start_sequence ();
   emit_insn (gen_ls2_falu1_turn_enabled_insn ());
-  mips_ls2.falu1_turn_enabled_insn = get_insns ();
-  end_sequence ();
+  mips_ls2.falu1_turn_enabled_insn = end_sequence ();
 
   start_sequence ();
   emit_insn (gen_ls2_falu2_turn_enabled_insn ());
-  mips_ls2.falu2_turn_enabled_insn = get_insns ();
-  end_sequence ();
+  mips_ls2.falu2_turn_enabled_insn = end_sequence ();
 
   mips_ls2.alu1_core_unit_code = get_cpu_unit_code ("ls2_alu1_core");
   mips_ls2.alu2_core_unit_code = get_cpu_unit_code ("ls2_alu2_core");
@@ -15775,6 +15769,7 @@ AVAIL_NON_MIPS16 (dspr2_32, !TARGET_64BIT && TARGET_DSPR2)
 AVAIL_NON_MIPS16 (loongson, TARGET_LOONGSON_MMI)
 AVAIL_MIPS16E2_OR_NON_MIPS16 (cache, TARGET_CACHE_BUILTIN)
 AVAIL_NON_MIPS16 (msa, TARGET_MSA)
+AVAIL_NON_MIPS16 (r6, mips_isa_rev >= 6)
 
 /* Construct a mips_builtin_description from the given arguments.
 
@@ -15940,6 +15935,14 @@ AVAIL_NON_MIPS16 (msa, TARGET_MSA)
     "__builtin_msa_" #INSN,  MIPS_BUILTIN_DIRECT_NO_TARGET,		\
     FUNCTION_TYPE, mips_builtin_avail_msa, false }
 
+/* Define a MIPSr6 MIPS_BUILTIN_DIRECT pure function __builtin_mipsr6_<INSN>
+   for instruction CODE_FOR_mipsr6_<INSN>.  FUNCTION_TYPE is a builtin_description
+   field.  */
+#define MIPSR6_BUILTIN_PURE(INSN, FUNCTION_TYPE)			\
+    { CODE_FOR_mipsr6_ ## INSN, MIPS_FP_COND_f,				\
+    "__builtin_mipsr6_" #INSN,  MIPS_BUILTIN_DIRECT,			\
+    FUNCTION_TYPE, mips_builtin_avail_r6, true }
+
 #define CODE_FOR_mips_sqrt_ps CODE_FOR_sqrtv2sf2
 #define CODE_FOR_mips_addq_ph CODE_FOR_addv2hi3
 #define CODE_FOR_mips_addu_qb CODE_FOR_addv4qi3
@@ -16177,6 +16180,13 @@ AVAIL_NON_MIPS16 (msa, TARGET_MSA)
 #define CODE_FOR_msa_ldi_w CODE_FOR_msa_ldiv4si
 #define CODE_FOR_msa_ldi_d CODE_FOR_msa_ldiv2di
 
+#define CODE_FOR_mipsr6_min_a_s CODE_FOR_fmin_a_sf
+#define CODE_FOR_mipsr6_min_a_d CODE_FOR_fmin_a_df
+#define CODE_FOR_mipsr6_max_a_s CODE_FOR_fmax_a_sf
+#define CODE_FOR_mipsr6_max_a_d CODE_FOR_fmax_a_df
+#define CODE_FOR_mipsr6_class_s CODE_FOR_fclass_sf
+#define CODE_FOR_mipsr6_class_d CODE_FOR_fclass_df
+
 static const struct mips_builtin_description mips_builtins[] = {
 #define MIPS_GET_FCSR 0
   DIRECT_BUILTIN (get_fcsr, MIPS_USI_FTYPE_VOID, hard_float),
@@ -16998,6 +17008,14 @@ static const struct mips_builtin_description mips_builtins[] = {
   MSA_NO_TARGET_BUILTIN (ctcmsa, MIPS_VOID_FTYPE_UQI_SI),
   MSA_BUILTIN_PURE (cfcmsa, MIPS_SI_FTYPE_UQI),
   MSA_BUILTIN_PURE (move_v, MIPS_V16QI_FTYPE_V16QI),
+
+  /* Built-in functions for MIPSr6.  */
+  MIPSR6_BUILTIN_PURE (min_a_s, MIPS_SF_FTYPE_SF_SF),
+  MIPSR6_BUILTIN_PURE (min_a_d, MIPS_DF_FTYPE_DF_DF),
+  MIPSR6_BUILTIN_PURE (max_a_s, MIPS_SF_FTYPE_SF_SF),
+  MIPSR6_BUILTIN_PURE (max_a_d, MIPS_DF_FTYPE_DF_DF),
+  MIPSR6_BUILTIN_PURE (class_s, MIPS_SF_FTYPE_SF),
+  MIPSR6_BUILTIN_PURE (class_d, MIPS_DF_FTYPE_DF),
 };
 
 /* Index I is the function declaration for mips_builtins[I], or null if the
@@ -19868,8 +19886,7 @@ mips16_split_long_branches (void)
 		emit_label (new_label);
 	      }
 
-	    jump_sequence = get_insns ();
-	    end_sequence ();
+	    jump_sequence = end_sequence ();
 
 	    emit_insn_after (jump_sequence, jump_insn);
 	    if (new_label)
@@ -22239,6 +22256,47 @@ mips_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
   return ok;
 }
 
+/* Expand a vector reduction.  FN is the binary pattern to reduce;
+   DEST is the destination; IN is the input vector.  */
+
+void
+mips_expand_msa_reduc (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
+{
+  rtx swap, vec = in;
+  machine_mode mode = GET_MODE (in);
+  unsigned int i, gelt;
+  const unsigned nelt = GET_MODE_BITSIZE (mode) / GET_MODE_UNIT_BITSIZE (mode);
+  unsigned char perm[MAX_VECT_LEN];
+
+  /* We have no SHF.d.  */
+  if (nelt == 2)
+    {
+      perm[0] = 2;
+      perm[1] = 3;
+      perm[2] = 0;
+      perm[3] = 1;
+      rtx rsi = simplify_gen_subreg (V4SImode, in, mode, 0);
+      swap = gen_reg_rtx (V4SImode);
+      mips_expand_vselect (swap, rsi, perm, 4);
+      emit_move_insn (dest, gen_rtx_SUBREG (mode, swap, 0));
+      emit_insn (fn (dest, dest, vec));
+      return;
+    }
+
+  for (gelt=1; gelt<=nelt/2; gelt *= 2)
+    {
+      for (i = 0; i<nelt; i++)
+	perm[i] = ((i/gelt)%2) ? (i-gelt) : (i+gelt);
+      if (gelt == nelt/2)
+	swap = dest;
+      else
+	swap = gen_reg_rtx (mode);
+      mips_expand_vselect (swap, vec, perm, nelt);
+      emit_insn (fn (swap, swap, vec));
+      vec = swap;
+    }
+}
+
 /* Implement TARGET_SCHED_REASSOCIATION_WIDTH.  */
 
 static int
@@ -23583,6 +23641,9 @@ mips_bit_clear_p (enum machine_mode mode, unsigned HOST_WIDE_INT m)
 #undef TARGET_C_MODE_FOR_FLOATING_TYPE
 #define TARGET_C_MODE_FOR_FLOATING_TYPE mips_c_mode_for_floating_type
 
+#undef TARGET_DOCUMENTATION_NAME
+#define TARGET_DOCUMENTATION_NAME "MIPS"
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-mips.h"
diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h
index 84dd64d..e224ade 100644
--- a/gcc/config/mips/mips.h
+++ b/gcc/config/mips/mips.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler.  MIPS version.
-   Copyright (C) 1989-2024 Free Software Foundation, Inc.
+   Copyright (C) 1989-2025 Free Software Foundation, Inc.
    Contributed by A. Lichnewsky (lich@inria.inria.fr).
    Changed by Michael Meissner	(meissner@osf.org).
    64-bit r4000 support by Ian Lance Taylor (ian@cygnus.com) and
@@ -1264,6 +1264,10 @@ struct mips_cpu_info {
 
 #define ISA_HAS_FMIN_FMAX	(mips_isa_rev >= 6)
 
+#define ISA_HAS_FRINT		(mips_isa_rev >= 6)
+
+#define ISA_HAS_FCLASS		(mips_isa_rev >= 6)
+
 /* ISA has data indexed prefetch instructions.  This controls use of
    'prefx', along with TARGET_HARD_FLOAT and TARGET_DOUBLE_FLOAT.
    (prefx is a cop1x instruction, so can only be used if FP is
@@ -1759,7 +1763,7 @@ FP_ASM_SPEC "\
    optimised to use word loads. */
 #define LOCAL_ALIGNMENT(TYPE, ALIGN) \
   DATA_ALIGNMENT (TYPE, ALIGN)
-  
+
 #define PAD_VARARGS_DOWN \
   (targetm.calls.function_arg_padding (TYPE_MODE (type), type) == PAD_DOWNWARD)
 
diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md
index 737d256..aadf4dd 100644
--- a/gcc/config/mips/mips.md
+++ b/gcc/config/mips/mips.md
@@ -1,5 +1,5 @@
 ;;  Mips.md	     Machine Description for MIPS based processors
-;;  Copyright (C) 1989-2024 Free Software Foundation, Inc.
+;;  Copyright (C) 1989-2025 Free Software Foundation, Inc.
 ;;  Contributed by   A. Lichnewsky, lich@inria.inria.fr
 ;;  Changes by       Michael Meissner, meissner@osf.org
 ;;  64-bit r4000 support by Ian Lance Taylor, ian@cygnus.com, and
@@ -100,6 +100,8 @@
   ;; Floating-point unspecs.
   UNSPEC_FMIN
   UNSPEC_FMAX
+  UNSPEC_FRINT
+  UNSPEC_FCLASS
 
   ;; HI/LO moves.
   UNSPEC_MFHI
@@ -375,6 +377,8 @@
 ;; frsqrt1      floating point reciprocal square root step1
 ;; frsqrt2      floating point reciprocal square root step2
 ;; fminmax      floating point min/max
+;; frint        floating point round to integral
+;; fclass       floating point class mask
 ;; dspmac       DSP MAC instructions not saturating the accumulator
 ;; dspmacsat    DSP MAC instructions that saturate the accumulator
 ;; accext       DSP accumulator extract instructions
@@ -392,8 +396,8 @@
    prefetch,prefetchx,condmove,mtc,mfc,mthi,mtlo,mfhi,mflo,const,arith,logical,
    shift,slt,signext,clz,pop,trap,imul,imul3,imul3nc,imadd,idiv,idiv3,move,
    fmove,fadd,fmul,fmadd,fdiv,frdiv,frdiv1,frdiv2,fabs,fneg,fcmp,fcvt,fsqrt,
-   frsqrt,frsqrt1,frsqrt2,fminmax,dspmac,dspmacsat,accext,accmod,dspalu,
-   dspalusat,multi,atomic,syncloop,nop,ghost,multimem,
+   frsqrt,frsqrt1,frsqrt2,fminmax,frint,fclass,dspmac,dspmacsat,accext,
+   accmod,dspalu,dspalusat,multi,atomic,syncloop,nop,ghost,multimem,
    simd_div,simd_fclass,simd_flog2,simd_fadd,simd_fcvt,simd_fmul,simd_fmadd,
    simd_fdiv,simd_bitins,simd_bitmov,simd_insert,simd_sld,simd_mul,simd_fcmp,
    simd_fexp2,simd_int_arith,simd_bit,simd_shift,simd_splat,simd_fill,
@@ -993,6 +997,10 @@
 ;; from the same template.
 (define_code_iterator any_shift [ashift ashiftrt lshiftrt])
 
+;; This code iterator allows the three bitwise instructions to be generated
+;; from the same template.
+(define_code_iterator any_bitwise [and ior xor])
+
 ;; This code iterator allows unsigned and signed division to be generated
 ;; from the same template.
 (define_code_iterator any_div [div udiv])
@@ -7391,6 +7399,8 @@
       else
 	return "lw\t$0,%a0";
     }
+  if (operands[1] == const2_rtx)
+    operands[1] = const0_rtx;
   /* Loongson ext2 implementation pref instructions.  */
   if (TARGET_LOONGSON_EXT2)
     {
@@ -8007,6 +8017,50 @@
   [(set_attr "type" "fminmax")
   (set_attr "mode" "<UNITMODE>")])
 
+(define_insn "fmin_a_<mode>"
+  [(set (match_operand:SCALARF 0 "register_operand" "=f")
+    (if_then_else
+       (lt (abs:SCALARF (match_operand:SCALARF 1 "register_operand" "f"))
+           (abs:SCALARF (match_operand:SCALARF 2 "register_operand" "f")))
+       (match_dup 1)
+       (match_dup 2)))]
+  "ISA_HAS_FMIN_FMAX"
+  "mina.<fmt>\t%0,%1,%2"
+  [(set_attr "type" "fminmax")
+   (set_attr "mode" "<UNITMODE>")])
+
+(define_insn "fmax_a_<mode>"
+  [(set (match_operand:SCALARF 0 "register_operand" "=f")
+    (if_then_else
+       (gt (abs:SCALARF (match_operand:SCALARF 1 "register_operand" "f"))
+           (abs:SCALARF (match_operand:SCALARF 2 "register_operand" "f")))
+       (match_dup 1)
+       (match_dup 2)))]
+  "ISA_HAS_FMIN_FMAX"
+  "maxa.<fmt>\t%0,%1,%2"
+  [(set_attr "type" "fminmax")
+   (set_attr "mode" "<UNITMODE>")])
+
+;;Float point round to integral
+(define_insn "rint<mode>2"
+  [(set (match_operand:SCALARF 0 "register_operand" "=f")
+	(unspec:SCALARF [(match_operand:SCALARF 1 "register_operand" "f")]
+			UNSPEC_FRINT))]
+  "ISA_HAS_FRINT"
+  "rint.<fmt>\t%0,%1"
+  [(set_attr "type" "frint")
+   (set_attr "mode" "<UNITMODE>")])
+
+;;Float point class mask
+(define_insn "fclass_<mode>"
+  [(set (match_operand:SCALARF 0 "register_operand" "=f")
+	(unspec:SCALARF [(match_operand:SCALARF 1 "register_operand" "f")]
+			UNSPEC_FCLASS))]
+  "ISA_HAS_FCLASS"
+  "class.<fmt>\t%0,%1"
+  [(set_attr "type" "fclass")
+   (set_attr "mode" "<UNITMODE>")])
+
 ;; 2 HI loads are joined.
 (define_peephole2
   [(set (match_operand:SI 0 "register_operand")
diff --git a/gcc/config/mips/mips.opt b/gcc/config/mips/mips.opt
index 99fe930..e245654 100644
--- a/gcc/config/mips/mips.opt
+++ b/gcc/config/mips/mips.opt
@@ -1,6 +1,6 @@
 ; Options for the MIPS port of the compiler
 ;
-; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/mips/msa.h b/gcc/config/mips/msa.h
index ed5ebdd..6c4845f 100644
--- a/gcc/config/mips/msa.h
+++ b/gcc/config/mips/msa.h
@@ -1,6 +1,6 @@
 /* MIPS MSA intrinsics include file.
 
-   Copyright (C) 2015-2024 Free Software Foundation, Inc.
+   Copyright (C) 2015-2025 Free Software Foundation, Inc.
    Contributed by Imagination Technologies Ltd.
 
    This file is part of GCC.
diff --git a/gcc/config/mips/mti-elf.h b/gcc/config/mips/mti-elf.h
index 5dd7e2f..e600b94 100644
--- a/gcc/config/mips/mti-elf.h
+++ b/gcc/config/mips/mti-elf.h
@@ -1,5 +1,5 @@
 /* Target macros for mips*-mti-elf targets.
-   Copyright (C) 2012-2024 Free Software Foundation, Inc.
+   Copyright (C) 2012-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/mips/mti-linux.h b/gcc/config/mips/mti-linux.h
index a6698d3..4042be7 100644
--- a/gcc/config/mips/mti-linux.h
+++ b/gcc/config/mips/mti-linux.h
@@ -1,5 +1,5 @@
 /* Target macros for mips*-mti-linux* targets.
-   Copyright (C) 2012-2024 Free Software Foundation, Inc.
+   Copyright (C) 2012-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/mips/n32-elf.h b/gcc/config/mips/n32-elf.h
index 01c8a85..769cb7a 100644
--- a/gcc/config/mips/n32-elf.h
+++ b/gcc/config/mips/n32-elf.h
@@ -1,6 +1,6 @@
 /* Definitions of target machine for GNU compiler.
    n32 for embedded systems.
-   Copyright (C) 2003-2024 Free Software Foundation, Inc.
+   Copyright (C) 2003-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/mips/netbsd.h b/gcc/config/mips/netbsd.h
index 72241af..643a11a 100644
--- a/gcc/config/mips/netbsd.h
+++ b/gcc/config/mips/netbsd.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler, for MIPS NetBSD systems.
-   Copyright (C) 1993-2024 Free Software Foundation, Inc.
+   Copyright (C) 1993-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/mips/octeon.md b/gcc/config/mips/octeon.md
index 0babda2..c52e794 100644
--- a/gcc/config/mips/octeon.md
+++ b/gcc/config/mips/octeon.md
@@ -1,5 +1,5 @@
 ;;  Octeon pipeline description.
-;;  Copyright (C) 2008-2024 Free Software Foundation, Inc.
+;;  Copyright (C) 2008-2025 Free Software Foundation, Inc.
 
 ;; This file is part of GCC.
 
diff --git a/gcc/config/mips/p5600.md b/gcc/config/mips/p5600.md
index 7ef6f54..b22fd48 100644
--- a/gcc/config/mips/p5600.md
+++ b/gcc/config/mips/p5600.md
@@ -1,6 +1,6 @@
 ;; DFA-based pipeline description for P5600.
 ;;
-;; Copyright (C) 2007-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2007-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/mips/p6600.md b/gcc/config/mips/p6600.md
index b6cb554..f89d85c 100644
--- a/gcc/config/mips/p6600.md
+++ b/gcc/config/mips/p6600.md
@@ -1,6 +1,6 @@
 ;; DFA-based pipeline description for P6600.
 ;;
-;; Copyright (C) 2018-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2018-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
@@ -161,16 +161,16 @@
 ;; FPU pipe
 ;;
 
-;; fadd, fsub
+;; fadd, fsub, frint
 (define_insn_reservation "p6600_fpu_fadd" 4
   (and (eq_attr "cpu" "p6600")
-       (eq_attr "type" "fadd"))
+       (eq_attr "type" "fadd,frint"))
   "p6600_fpu_long, p6600_fpu_apu")
 
-;; fabs, fneg, fcmp, fminmax
+;; fabs, fneg, fcmp, fminmax, fclass
 (define_insn_reservation "p6600_fpu_fabs" 2
   (and (eq_attr "cpu" "p6600")
-       (ior (eq_attr "type" "fabs,fneg,fcmp,fmove,fminmax")
+       (ior (eq_attr "type" "fabs,fneg,fcmp,fmove,fminmax,fclass")
 	   (and (eq_attr "type" "condmove")
 		(eq_attr "mode" "SF,DF"))))
   "p6600_fpu_short, p6600_fpu_apu")
diff --git a/gcc/config/mips/predicates.md b/gcc/config/mips/predicates.md
index a64900d..c3ed9c9 100644
--- a/gcc/config/mips/predicates.md
+++ b/gcc/config/mips/predicates.md
@@ -1,5 +1,5 @@
 ;; Predicate definitions for MIPS.
-;; Copyright (C) 2004-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2004-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/mips/r3900.h b/gcc/config/mips/r3900.h
index 059d4ee..2c886e9 100644
--- a/gcc/config/mips/r3900.h
+++ b/gcc/config/mips/r3900.h
@@ -1,7 +1,7 @@
 /* Definitions of MIPS sub target machine for GNU compiler.
    Toshiba r3900.  You should include mips.h after this.
 
-   Copyright (C) 1989-2024 Free Software Foundation, Inc.
+   Copyright (C) 1989-2025 Free Software Foundation, Inc.
    Contributed by Gavin Koch (gavin@cygnus.com).
 
 This file is part of GCC.
diff --git a/gcc/config/mips/rtems.h b/gcc/config/mips/rtems.h
index 82131a5..3cb4456 100644
--- a/gcc/config/mips/rtems.h
+++ b/gcc/config/mips/rtems.h
@@ -1,5 +1,5 @@
 /* Definitions for rtems targeting a MIPS using ELF.
-   Copyright (C) 1996-2024 Free Software Foundation, Inc.
+   Copyright (C) 1996-2025 Free Software Foundation, Inc.
    Contributed by Joel Sherrill (joel@OARcorp.com).
 
    This file is part of GCC.
diff --git a/gcc/config/mips/sb1.md b/gcc/config/mips/sb1.md
index 0f085fb..b8c7372 100644
--- a/gcc/config/mips/sb1.md
+++ b/gcc/config/mips/sb1.md
@@ -1,4 +1,4 @@
-;; Copyright (C) 2004-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2004-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/mips/sde.h b/gcc/config/mips/sde.h
index 35ca431..1c3d1ec 100644
--- a/gcc/config/mips/sde.h
+++ b/gcc/config/mips/sde.h
@@ -1,6 +1,6 @@
 /* Definitions of target machine for GNU compiler.
    MIPS SDE version.
-   Copyright (C) 2003-2024 Free Software Foundation, Inc.
+   Copyright (C) 2003-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -45,7 +45,7 @@ along with GCC; see the file COPYING3.  If not see
   "%{!EB:%{!EL:%(endian_spec)}}",					\
 									\
   /* Configuration-independent MIPS rules.  */				\
-  BASE_DRIVER_SELF_SPECS				
+  BASE_DRIVER_SELF_SPECS
 
 /* Use trap rather than break for all but MIPS I ISA.  Force -no-mips16,
    so that MIPS16 assembler code requires an explicit ".set mips16".
diff --git a/gcc/config/mips/sde.opt b/gcc/config/mips/sde.opt
index 1f5e652..f95fbea 100644
--- a/gcc/config/mips/sde.opt
+++ b/gcc/config/mips/sde.opt
@@ -1,6 +1,6 @@
 ; MIPS SDE options.
 ;
-; Copyright (C) 2010-2024 Free Software Foundation, Inc.
+; Copyright (C) 2010-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/mips/sdemtk.h b/gcc/config/mips/sdemtk.h
index f59f058..8cdae0c 100644
--- a/gcc/config/mips/sdemtk.h
+++ b/gcc/config/mips/sdemtk.h
@@ -1,6 +1,6 @@
 /* Definitions of target machine for GNU compiler.
    MIPS SDE version, for use with the SDE C library rather than newlib.
-   Copyright (C) 2007-2024 Free Software Foundation, Inc.
+   Copyright (C) 2007-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/mips/sr71k.md b/gcc/config/mips/sr71k.md
index 258bd8a..840460c 100644
--- a/gcc/config/mips/sr71k.md
+++ b/gcc/config/mips/sr71k.md
@@ -1,4 +1,4 @@
-;; Copyright (C) 2002-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2002-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/mips/st.h b/gcc/config/mips/st.h
index bec10b8..60b8713 100644
--- a/gcc/config/mips/st.h
+++ b/gcc/config/mips/st.h
@@ -1,5 +1,5 @@
 /* ST 2e / 2f GNU/Linux Configuration.
-   Copyright (C) 2008-2024 Free Software Foundation, Inc.
+   Copyright (C) 2008-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/mips/sync.md b/gcc/config/mips/sync.md
index 4eedc43..4f93735 100644
--- a/gcc/config/mips/sync.md
+++ b/gcc/config/mips/sync.md
@@ -1,6 +1,6 @@
 ;;  Machine Description for MIPS based processor synchronization
 ;;  instructions.
-;;  Copyright (C) 2007-2024 Free Software Foundation, Inc.
+;;  Copyright (C) 2007-2025 Free Software Foundation, Inc.
 
 ;; This file is part of GCC.
 
diff --git a/gcc/config/mips/t-elf b/gcc/config/mips/t-elf
index e6d2efd..8e37f7b 100644
--- a/gcc/config/mips/t-elf
+++ b/gcc/config/mips/t-elf
@@ -1,4 +1,4 @@
-# Copyright (C) 1999-2024 Free Software Foundation, Inc.
+# Copyright (C) 1999-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/mips/t-img-elf b/gcc/config/mips/t-img-elf
index 25d33ed..ab24a79 100644
--- a/gcc/config/mips/t-img-elf
+++ b/gcc/config/mips/t-img-elf
@@ -1,4 +1,4 @@
-# Copyright (C) 2014-2024 Free Software Foundation, Inc.
+# Copyright (C) 2014-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/mips/t-img-linux b/gcc/config/mips/t-img-linux
index b899080..5e3d49e 100644
--- a/gcc/config/mips/t-img-linux
+++ b/gcc/config/mips/t-img-linux
@@ -1,4 +1,4 @@
-# Copyright (C) 2014-2024 Free Software Foundation, Inc.
+# Copyright (C) 2014-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/mips/t-isa3264 b/gcc/config/mips/t-isa3264
index e97d737..238fed4 100644
--- a/gcc/config/mips/t-isa3264
+++ b/gcc/config/mips/t-isa3264
@@ -1,4 +1,4 @@
-# Copyright (C) 2001-2024 Free Software Foundation, Inc.
+# Copyright (C) 2001-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/mips/t-linux64 b/gcc/config/mips/t-linux64
index e4e2f5e..34a8446 100644
--- a/gcc/config/mips/t-linux64
+++ b/gcc/config/mips/t-linux64
@@ -1,4 +1,4 @@
-# Copyright (C) 2003-2024 Free Software Foundation, Inc.
+# Copyright (C) 2003-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/mips/t-mips b/gcc/config/mips/t-mips
index f92fc17..1782ece 100644
--- a/gcc/config/mips/t-mips
+++ b/gcc/config/mips/t-mips
@@ -1,4 +1,4 @@
-# Copyright (C) 2002-2024 Free Software Foundation, Inc.
+# Copyright (C) 2002-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/mips/t-mti-elf b/gcc/config/mips/t-mti-elf
index 9655397..8e57d52 100644
--- a/gcc/config/mips/t-mti-elf
+++ b/gcc/config/mips/t-mti-elf
@@ -1,4 +1,4 @@
-# Copyright (C) 2012-2024 Free Software Foundation, Inc.
+# Copyright (C) 2012-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/mips/t-mti-linux b/gcc/config/mips/t-mti-linux
index 4919fd9b..d59c9d6 100644
--- a/gcc/config/mips/t-mti-linux
+++ b/gcc/config/mips/t-mti-linux
@@ -1,4 +1,4 @@
-# Copyright (C) 2012-2024 Free Software Foundation, Inc.
+# Copyright (C) 2012-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/mips/t-r3900 b/gcc/config/mips/t-r3900
index 63cb113..f4991f8 100644
--- a/gcc/config/mips/t-r3900
+++ b/gcc/config/mips/t-r3900
@@ -1,4 +1,4 @@
-# Copyright (C) 1998-2024 Free Software Foundation, Inc.
+# Copyright (C) 1998-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/mips/t-rtems b/gcc/config/mips/t-rtems
index 11cee6f..8eb7959 100644
--- a/gcc/config/mips/t-rtems
+++ b/gcc/config/mips/t-rtems
@@ -1,6 +1,6 @@
 # Custom multilibs for RTEMS
 #
-# Copyright (C) 2003-2024 Free Software Foundation, Inc.
+# Copyright (C) 2003-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/mips/t-sb1 b/gcc/config/mips/t-sb1
index 50e9650..54a3f90 100644
--- a/gcc/config/mips/t-sb1
+++ b/gcc/config/mips/t-sb1
@@ -1,4 +1,4 @@
-# Copyright (C) 2006-2024 Free Software Foundation, Inc.
+# Copyright (C) 2006-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/mips/t-sde b/gcc/config/mips/t-sde
index a598051..64f2652 100644
--- a/gcc/config/mips/t-sde
+++ b/gcc/config/mips/t-sde
@@ -1,4 +1,4 @@
-# Copyright (C) 2007-2024 Free Software Foundation, Inc.
+# Copyright (C) 2007-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/mips/t-sdemtk b/gcc/config/mips/t-sdemtk
index 8c702a0..f576adb 100644
--- a/gcc/config/mips/t-sdemtk
+++ b/gcc/config/mips/t-sdemtk
@@ -1,4 +1,4 @@
-# Copyright (C) 2007-2024 Free Software Foundation, Inc.
+# Copyright (C) 2007-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/mips/t-sr71k b/gcc/config/mips/t-sr71k
index 16feeb8..dc0f0ec 100644
--- a/gcc/config/mips/t-sr71k
+++ b/gcc/config/mips/t-sr71k
@@ -1,4 +1,4 @@
-# Copyright (C) 2002-2024 Free Software Foundation, Inc.
+# Copyright (C) 2002-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/mips/t-st b/gcc/config/mips/t-st
index 43fd4cd..3835680 100644
--- a/gcc/config/mips/t-st
+++ b/gcc/config/mips/t-st
@@ -1,4 +1,4 @@
-# Copyright (C) 2008-2024 Free Software Foundation, Inc.
+# Copyright (C) 2008-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/mips/t-vr b/gcc/config/mips/t-vr
index 89dd665..e1fc01f 100644
--- a/gcc/config/mips/t-vr
+++ b/gcc/config/mips/t-vr
@@ -1,4 +1,4 @@
-# Copyright (C) 2002-2024 Free Software Foundation, Inc.
+# Copyright (C) 2002-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/mips/t-vxworks b/gcc/config/mips/t-vxworks
index bc4a6c2..fce91b3 100644
--- a/gcc/config/mips/t-vxworks
+++ b/gcc/config/mips/t-vxworks
@@ -1,4 +1,4 @@
-# Copyright (C) 2003-2024 Free Software Foundation, Inc.
+# Copyright (C) 2003-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/mips/vr.h b/gcc/config/mips/vr.h
index e84f2b6..242d78e 100644
--- a/gcc/config/mips/vr.h
+++ b/gcc/config/mips/vr.h
@@ -1,6 +1,6 @@
 /* Definitions of target machine for GNU compiler.
    NEC VR Series Processors
-   Copyright (C) 2002-2024 Free Software Foundation, Inc.
+   Copyright (C) 2002-2025 Free Software Foundation, Inc.
    Contributed by Red Hat, Inc.
 
 This file is part of GCC.
diff --git a/gcc/config/mips/vxworks.h b/gcc/config/mips/vxworks.h
index e01f484..72e7e8d 100644
--- a/gcc/config/mips/vxworks.h
+++ b/gcc/config/mips/vxworks.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 1999-2024 Free Software Foundation, Inc.
+/* Copyright (C) 1999-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/mips/xlp.md b/gcc/config/mips/xlp.md
index e25e7fa..7ccfe59 100644
--- a/gcc/config/mips/xlp.md
+++ b/gcc/config/mips/xlp.md
@@ -1,5 +1,5 @@
 ;; DFA-based pipeline description for the XLP.
-;; Copyright (C) 2012-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2012-2025 Free Software Foundation, Inc.
 ;;
 ;; xlp.md   Machine Description for the Broadcom XLP Microprocessor
 ;; This file is part of GCC.
diff --git a/gcc/config/mips/xlr.md b/gcc/config/mips/xlr.md
index 3f70fd4..8b97308 100644
--- a/gcc/config/mips/xlr.md
+++ b/gcc/config/mips/xlr.md
@@ -1,5 +1,5 @@
 ;; DFA-based pipeline description for the XLR.
-;;   Copyright (C) 2008-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2008-2025 Free Software Foundation, Inc.
 ;;
 ;; xlr.md   Machine Description for the RMI XLR Microprocessor
 ;; This file is part of GCC.
diff --git a/gcc/config/mmix/constraints.md b/gcc/config/mmix/constraints.md
index f6ac681..7dadcdc 100644
--- a/gcc/config/mmix/constraints.md
+++ b/gcc/config/mmix/constraints.md
@@ -1,5 +1,5 @@
 ;; MMIX constraints
-;; Copyright (C) 2012-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2012-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/mmix/mmix-modes.def b/gcc/config/mmix/mmix-modes.def
index f0eafaf..66e3f82 100644
--- a/gcc/config/mmix/mmix-modes.def
+++ b/gcc/config/mmix/mmix-modes.def
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler, for MMIX.
-   Copyright (C) 2002-2024 Free Software Foundation, Inc.
+   Copyright (C) 2002-2025 Free Software Foundation, Inc.
    Contributed by Hans-Peter Nilsson (hp@bitrange.com)
 
 This file is part of GCC.
diff --git a/gcc/config/mmix/mmix-protos.h b/gcc/config/mmix/mmix-protos.h
index 993f56a..f7c176d 100644
--- a/gcc/config/mmix/mmix-protos.h
+++ b/gcc/config/mmix/mmix-protos.h
@@ -1,5 +1,5 @@
 /* Prototypes for exported functions defined in mmix.cc
-   Copyright (C) 2000-2024 Free Software Foundation, Inc.
+   Copyright (C) 2000-2025 Free Software Foundation, Inc.
    Contributed by Hans-Peter Nilsson (hp@bitrange.com)
 
 This file is part of GCC.
diff --git a/gcc/config/mmix/mmix.cc b/gcc/config/mmix/mmix.cc
index 167aea7..e167ffc 100644
--- a/gcc/config/mmix/mmix.cc
+++ b/gcc/config/mmix/mmix.cc
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler, for MMIX.
-   Copyright (C) 2000-2024 Free Software Foundation, Inc.
+   Copyright (C) 2000-2025 Free Software Foundation, Inc.
    Contributed by Hans-Peter Nilsson (hp@bitrange.com)
 
 This file is part of GCC.
@@ -761,7 +761,7 @@ mmix_function_value (const_tree valtype,
 
   if (!outgoing)
     return gen_rtx_REG (mode, MMIX_RETURN_VALUE_REGNUM);
-  
+
   /* Return values that fit in a register need no special handling.
      There's no register hole when parameters are passed in global
      registers.  */
@@ -990,10 +990,18 @@ mmix_setup_incoming_varargs (cumulative_args_t args_so_farp_v,
 {
   CUMULATIVE_ARGS *args_so_farp = get_cumulative_args (args_so_farp_v);
 
-  /* The last named variable has been handled, but
-     args_so_farp has not been advanced for it.  */
-  if (args_so_farp->regs + 1 < MMIX_MAX_ARGS_IN_REGS)
-    *pretend_sizep = (MMIX_MAX_ARGS_IN_REGS - (args_so_farp->regs + 1)) * 8;
+  /* Better pay special attention to (...) functions and not fold that
+     case into the general case in the else-arm.  */
+  if (TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl)))
+    {
+      *pretend_sizep = MMIX_MAX_ARGS_IN_REGS * 8;
+      gcc_assert (args_so_farp->regs == 0);
+    }
+  else
+    /* The last named variable has been handled, but
+       args_so_farp has not been advanced for it.  */
+    if (args_so_farp->regs + 1 < MMIX_MAX_ARGS_IN_REGS)
+      *pretend_sizep = (MMIX_MAX_ARGS_IN_REGS - (args_so_farp->regs + 1)) * 8;
 
   /* We assume that one argument takes up one register here.  That should
      be true until we start messing with multi-reg parameters.  */
@@ -1578,6 +1586,35 @@ mmix_asm_output_labelref (FILE *stream, const char *name)
     if (*name == '@')
       is_extern = 0;
 
+  size_t ndots = 0;
+  for (const char *s = name; *s != 0; s++)
+    if (*s == '.')
+      ndots++;
+
+  /* Replace all '.' with '::'.  We don't want a '.' as part of an identifier
+     as that'd be incompatible with mmixal.  We also don't want to do things
+     like overriding the default "%s.%lu" by '#define ASM_PN_FORMAT "%s::%lu"'
+     as that format will show up in warnings and error messages.  The default
+     won't show up in warnings and errors, as there are mechanisms in place to
+     strip that (but that only handles the default format).  FIXME: Make sure
+     no ":" is seen in the object file; we don't really want that mmixal
+     feature visible there.  */
+  if (ndots > 0)
+    {
+      char *colonized_name = XALLOCAVEC (char, strlen (name) + 1 + ndots);
+
+      char *cs = colonized_name;
+      const char *s = name;
+      for (; *s != 0; s++)
+	{
+	  if (*s == '.')
+	    *cs++ = ':';
+	  *cs++ = *s;
+	}
+      *cs = 0;
+      name = colonized_name;
+    }
+
   asm_fprintf (stream, "%s%U%s",
 	       is_extern && TARGET_TOPLEVEL_SYMBOLS ? ":" : "",
 	       name);
diff --git a/gcc/config/mmix/mmix.h b/gcc/config/mmix/mmix.h
index e20bca1..44669e1 100644
--- a/gcc/config/mmix/mmix.h
+++ b/gcc/config/mmix/mmix.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler, for MMIX.
-   Copyright (C) 2000-2024 Free Software Foundation, Inc.
+   Copyright (C) 2000-2025 Free Software Foundation, Inc.
    Contributed by Hans-Peter Nilsson (hp@bitrange.com)
 
 This file is part of GCC.
@@ -656,11 +656,11 @@ typedef struct { int regs; int lib; } CUMULATIVE_ARGS;
 #define ASM_GENERATE_INTERNAL_LABEL(LABEL, PREFIX, NUM) \
  sprintf (LABEL, "*%s:%ld", PREFIX, (long)(NUM))
 
-/* Insert "::"; these are rarer than internal labels.  FIXME: Make sure no
-   ":" is seen in the object file; we don't really want that mmixal
-   feature visible there.  We don't want the default, which uses a dot;
-   that'd be incompatible with mmixal.  */
-#define ASM_PN_FORMAT "%s::%lu"
+/* Override the default, which looks at NO_DOT_IN_LABEL and NO_DOLLAR_IN_LABEL.
+   We want the real default "%s.%lu" in dumps and compiler messages, but the
+   actual assembly code format is adjusted to the effect of "%s::%lu".  See
+   mmix_asm_output_labelref.  */
+#define ASM_PN_FORMAT "%s.%lu"
 
 #define ASM_OUTPUT_DEF(STREAM, NAME, VALUE) \
  mmix_asm_output_def (STREAM, NAME, VALUE)
diff --git a/gcc/config/mmix/mmix.md b/gcc/config/mmix/mmix.md
index 974fe36..137be0b 100644
--- a/gcc/config/mmix/mmix.md
+++ b/gcc/config/mmix/mmix.md
@@ -1,5 +1,5 @@
 ;; GCC machine description for MMIX
-;; Copyright (C) 2000-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2000-2025 Free Software Foundation, Inc.
 ;; Contributed by Hans-Peter Nilsson (hp@bitrange.com)
 
 ;; This file is part of GCC.
diff --git a/gcc/config/mmix/mmix.opt b/gcc/config/mmix/mmix.opt
index 67b5317..79b36b4 100644
--- a/gcc/config/mmix/mmix.opt
+++ b/gcc/config/mmix/mmix.opt
@@ -1,6 +1,6 @@
 ; Options for the MMIX port of the compiler.
 
-; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/mmix/predicates.md b/gcc/config/mmix/predicates.md
index 8619569..21f5f1d 100644
--- a/gcc/config/mmix/predicates.md
+++ b/gcc/config/mmix/predicates.md
@@ -1,5 +1,5 @@
 ;; Operand and operator predicates for the GCC MMIX port.
-;; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/mmix/t-mmix b/gcc/config/mmix/t-mmix
index 247b8c9..4608c6c 100644
--- a/gcc/config/mmix/t-mmix
+++ b/gcc/config/mmix/t-mmix
@@ -1,4 +1,4 @@
-# Copyright (C) 2001-2024 Free Software Foundation, Inc.
+# Copyright (C) 2001-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/mn10300/constraints.md b/gcc/config/mn10300/constraints.md
index 5e71b02..a3cfbc1 100644
--- a/gcc/config/mn10300/constraints.md
+++ b/gcc/config/mn10300/constraints.md
@@ -1,5 +1,5 @@
 ;; Constraint definitions for the MN10300.
-;; Copyright (C) 2007-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2007-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/mn10300/linux.h b/gcc/config/mn10300/linux.h
index 8cfe0e1..8411502 100644
--- a/gcc/config/mn10300/linux.h
+++ b/gcc/config/mn10300/linux.h
@@ -1,6 +1,6 @@
 /* Definitions of taret machine for GNU compiler.
    Matsushita AM33/2.0
-   Copyright (C) 2001-2024 Free Software Foundation, Inc.
+   Copyright (C) 2001-2025 Free Software Foundation, Inc.
    Contributed by Alexandre Oliva <aoliva@redhat.com>
 
    This file is part of GCC.
@@ -18,7 +18,7 @@
    You should have received a copy of the GNU General Public License
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
-   
+
 #undef  PREFERRED_DEBUGGING_TYPE
 #define PREFERRED_DEBUGGING_TYPE DWARF2_DEBUG
 
@@ -77,7 +77,7 @@ extern int mn10300_protect_label;
         asm_fprintf (FILE, "+");		\
       asm_fprintf (FILE, "%U%s", real_name);	\
     }						\
-  while (0)           
+  while (0)
 
 #undef SIZE_TYPE
 #undef PTRDIFF_TYPE
diff --git a/gcc/config/mn10300/mn10300-modes.def b/gcc/config/mn10300/mn10300-modes.def
index db9aa4b..7aa39bc 100644
--- a/gcc/config/mn10300/mn10300-modes.def
+++ b/gcc/config/mn10300/mn10300-modes.def
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler, for MN10300.
-   Copyright (C) 2006-2024 Free Software Foundation, Inc.
+   Copyright (C) 2006-2025 Free Software Foundation, Inc.
    Contributed by Red Hat Inc.
 
    This file is part of GCC.
diff --git a/gcc/config/mn10300/mn10300-opts.h b/gcc/config/mn10300/mn10300-opts.h
index 379e49e..b322342 100644
--- a/gcc/config/mn10300/mn10300-opts.h
+++ b/gcc/config/mn10300/mn10300-opts.h
@@ -1,5 +1,5 @@
 /* Definitions for option handling for Matsushita MN10300 series.
-   Copyright (C) 1996-2024 Free Software Foundation, Inc.
+   Copyright (C) 1996-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/mn10300/mn10300-protos.h b/gcc/config/mn10300/mn10300-protos.h
index 2baf815..3dc8c8a 100644
--- a/gcc/config/mn10300/mn10300-protos.h
+++ b/gcc/config/mn10300/mn10300-protos.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler. Matsushita MN10300 series
-   Copyright (C) 2000-2024 Free Software Foundation, Inc.
+   Copyright (C) 2000-2025 Free Software Foundation, Inc.
    Contributed by Jeff Law (law@cygnus.com).
 
    This file is part of GCC.
diff --git a/gcc/config/mn10300/mn10300.cc b/gcc/config/mn10300/mn10300.cc
index 1cf0811..2517451 100644
--- a/gcc/config/mn10300/mn10300.cc
+++ b/gcc/config/mn10300/mn10300.cc
@@ -1,5 +1,5 @@
 /* Subroutines for insn-output.cc for Matsushita MN10300 series
-   Copyright (C) 1996-2024 Free Software Foundation, Inc.
+   Copyright (C) 1996-2025 Free Software Foundation, Inc.
    Contributed by Jeff Law (law@cygnus.com).
 
    This file is part of GCC.
@@ -475,7 +475,7 @@ mn10300_print_operand_address (FILE *file, rtx addr)
       {
 	rtx base = XEXP (addr, 0);
 	rtx index = XEXP (addr, 1);
-	
+
 	if (REG_P (index) && !REG_OK_FOR_INDEX_P (index))
 	  {
 	    rtx x = base;
@@ -651,7 +651,7 @@ mn10300_get_live_callee_saved_regs (unsigned int * bytes_saved)
       for (i = 0x04000; i < 0x40000; i <<= 1)
 	if ((mask & i) == 0)
 	  ++ count;
-      
+
       mask |= 0x3c000;
     }
 
@@ -748,7 +748,7 @@ static inline unsigned int
 popcount (unsigned int mask)
 {
   unsigned int count = 0;
-  
+
   while (mask)
     {
       ++ count;
@@ -1333,7 +1333,7 @@ mn10300_preferred_reload_class (rtx x, reg_class_t rclass)
   if (x == stack_pointer_rtx && rclass != SP_REGS)
     return (TARGET_AM33 ? GENERAL_REGS : ADDRESS_REGS);
   else if (MEM_P (x)
-	   || (REG_P (x) 
+	   || (REG_P (x)
 	       && !HARD_REGISTER_P (x))
 	   || (GET_CODE (x) == SUBREG
 	       && REG_P (SUBREG_REG (x))
@@ -1706,7 +1706,7 @@ mn10300_output_add (rtx operands[3], bool need_flags)
 
   src2_regnum = true_regnum (src2);
   src2_class = REGNO_REG_CLASS (src2_regnum);
-      
+
   if (dest_regnum == src1_regnum)
     return "add %2,%0";
   if (dest_regnum == src2_regnum)
@@ -2296,7 +2296,7 @@ mn10300_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
    move cost above.  This is not a problem.  */
 
 static int
-mn10300_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED, 
+mn10300_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
 			  reg_class_t iclass, bool in ATTRIBUTE_UNUSED)
 {
   enum reg_class rclass = (enum reg_class) iclass;
@@ -2410,7 +2410,7 @@ mn10300_rtx_costs (rtx x, machine_mode mode, int outer_code,
 	    }
 	}
       goto do_arith_costs;
-	
+
     case MINUS:
     case AND:
     case IOR:
@@ -2533,7 +2533,7 @@ mn10300_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
 	               0xdc		jmp fnaddr
 	<disp>
 
-     Note that the two extra insns are effectively nops; they 
+     Note that the two extra insns are effectively nops; they
      clobber the flags but do not affect the contents of D0 or D1.  */
 
   disp = expand_binop (SImode, sub_optab, fnaddr,
@@ -2631,7 +2631,7 @@ mn10300_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
       || (TARGET_AM33 && REGNO_REG_CLASS (regno) == ADDRESS_REGS)
       || REGNO_REG_CLASS (regno) == EXTENDED_REGS)
     return GET_MODE_SIZE (mode) <= 4;
-  
+
   return false;
 }
 
@@ -2906,14 +2906,14 @@ mn10300_match_ccmode (rtx insn, machine_mode cc_mode)
 }
 
 /* This function is used to help split:
-   
+
      (set (reg) (and (reg) (int)))
-     
+
    into:
-   
+
      (set (reg) (shift (reg) (int))
      (set (reg) (shift (reg) (int))
-     
+
    where the shitfs will be shorter than the "and" insn.
 
    It returns the number of bits that should be shifted.  A positive
@@ -3038,7 +3038,7 @@ check_liw_constraints (struct liw_data * pliw1, struct liw_data * pliw2)
      check its values prior to any changes made by OP.  */
   if (pliw1->op == LIW_OP_CMP)
     {
-      /* Two sequential comparisons means dead code, which ought to 
+      /* Two sequential comparisons means dead code, which ought to
          have been eliminated given that bundling only happens with
          optimization.  We cannot bundle them in any case.  */
       gcc_assert (pliw1->op != pliw2->op);
@@ -3076,7 +3076,7 @@ check_liw_constraints (struct liw_data * pliw1, struct liw_data * pliw2)
 		  || pliw2->op == LIW_OP_OR
 		  || pliw2->op == LIW_OP_XOR))
 	    return false;
-		  
+
 	  pliw2->src = pliw1->src;
 	  return true;
 	}
@@ -3114,7 +3114,7 @@ mn10300_bundle_liw (void)
       if (liw1.slot == LIW_OP2 || liw2.slot == LIW_OP1)
 	{
 	  struct liw_data temp;
-	  
+
 	  temp = liw1;
 	  liw1 = liw2;
 	  liw2 = temp;
@@ -3191,7 +3191,7 @@ mn10300_insert_setlb_lcc (rtx_insn *label, rtx_insn *branch)
   if (GET_MODE (cmp_reg) == CC_FLOATmode)
     lcc = gen_FLcc (comparison, label);
   else
-    lcc = gen_Lcc (comparison, label);    
+    lcc = gen_Lcc (comparison, label);
 
   rtx_insn *jump = emit_jump_insn_before (lcc, branch);
   mark_jump_label (XVECEXP (lcc, 0, 0), jump, 0);
@@ -3294,7 +3294,7 @@ mn10300_scan_for_setlb_lcc (void)
 
   loop_optimizer_finalize ();
 
-  df_finish_pass (false);  
+  df_finish_pass (false);
 
   DUMP ("SETLB scan complete", NULL_RTX);
 }
diff --git a/gcc/config/mn10300/mn10300.h b/gcc/config/mn10300/mn10300.h
index 54078f7..9dc819b 100644
--- a/gcc/config/mn10300/mn10300.h
+++ b/gcc/config/mn10300/mn10300.h
@@ -1,6 +1,6 @@
 /* Definitions of target machine for GNU compiler.
    Matsushita MN10300 series
-   Copyright (C) 1996-2024 Free Software Foundation, Inc.
+   Copyright (C) 1996-2025 Free Software Foundation, Inc.
    Contributed by Jeff Law (law@cygnus.com).
 
    This file is part of GCC.
diff --git a/gcc/config/mn10300/mn10300.md b/gcc/config/mn10300/mn10300.md
index 80b07cc..248473f 100644
--- a/gcc/config/mn10300/mn10300.md
+++ b/gcc/config/mn10300/mn10300.md
@@ -1,5 +1,5 @@
 ;; GCC machine description for Matsushita MN10300
-;; Copyright (C) 1996-2024 Free Software Foundation, Inc.
+;; Copyright (C) 1996-2025 Free Software Foundation, Inc.
 ;; Contributed by Jeff Law (law@cygnus.com).
 
 ;; This file is part of GCC.
diff --git a/gcc/config/mn10300/mn10300.opt b/gcc/config/mn10300/mn10300.opt
index dd8e005..4ec9c6a 100644
--- a/gcc/config/mn10300/mn10300.opt
+++ b/gcc/config/mn10300/mn10300.opt
@@ -1,6 +1,6 @@
 ; Options for the Matsushita MN10300 port of the compiler.
 
-; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/mn10300/predicates.md b/gcc/config/mn10300/predicates.md
index 1eba6e2..006bf53 100644
--- a/gcc/config/mn10300/predicates.md
+++ b/gcc/config/mn10300/predicates.md
@@ -1,5 +1,5 @@
 ;; Predicate definitions for Matsushita MN10300.
-;; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/mn10300/t-mn10300 b/gcc/config/mn10300/t-mn10300
index e9bff7a..5554123 100644
--- a/gcc/config/mn10300/t-mn10300
+++ b/gcc/config/mn10300/t-mn10300
@@ -1,4 +1,4 @@
-# Copyright (C) 1996-2024 Free Software Foundation, Inc.
+# Copyright (C) 1996-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/moxie/constraints.md b/gcc/config/moxie/constraints.md
index fcd57aa..98901c3 100644
--- a/gcc/config/moxie/constraints.md
+++ b/gcc/config/moxie/constraints.md
@@ -1,5 +1,5 @@
 ;; Constraint definitions for Moxie
-;; Copyright (C) 2009-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2009-2025 Free Software Foundation, Inc.
 ;; Contributed by Anthony Green <green@moxielogic.com>
 
 ;; This file is part of GCC.
diff --git a/gcc/config/moxie/moxie-protos.h b/gcc/config/moxie/moxie-protos.h
index 7410eae..0a2f7cb 100644
--- a/gcc/config/moxie/moxie-protos.h
+++ b/gcc/config/moxie/moxie-protos.h
@@ -1,5 +1,5 @@
 /* Prototypes for moxie.cc functions used in the md file & elsewhere.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/moxie/moxie.cc b/gcc/config/moxie/moxie.cc
index 47a14ea..b2c3199 100644
--- a/gcc/config/moxie/moxie.cc
+++ b/gcc/config/moxie/moxie.cc
@@ -1,5 +1,5 @@
 /* Target Code for moxie
-   Copyright (C) 2008-2024 Free Software Foundation, Inc.
+   Copyright (C) 2008-2025 Free Software Foundation, Inc.
    Contributed by Anthony Green.
 
    This file is part of GCC.
@@ -63,12 +63,12 @@ moxie_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
 /* Define how to find the value returned by a function.
    VALTYPE is the data type of the value (as a tree).
    If the precise function being called is known, FUNC is its
-   FUNCTION_DECL; otherwise, FUNC is 0.  
+   FUNCTION_DECL; otherwise, FUNC is 0.
 
    We always return values in register $r0 for moxie.  */
 
 static rtx
-moxie_function_value (const_tree valtype, 
+moxie_function_value (const_tree valtype,
 		      const_tree fntype_or_decl ATTRIBUTE_UNUSED,
 		      bool outgoing ATTRIBUTE_UNUSED)
 {
@@ -118,12 +118,12 @@ moxie_print_operand_address (FILE *file, machine_mode, rtx x)
     case REG:
       fprintf (file, "(%s)", reg_names[REGNO (x)]);
       break;
-      
+
     case PLUS:
       switch (GET_CODE (XEXP (x, 1)))
 	{
 	case CONST_INT:
-	  fprintf (file, "%ld(%s)", 
+	  fprintf (file, "%ld(%s)",
 		   INTVAL(XEXP (x, 1)), reg_names[REGNO (XEXP (x, 0))]);
 	  break;
 	case SYMBOL_REF:
@@ -133,7 +133,7 @@ moxie_print_operand_address (FILE *file, machine_mode, rtx x)
 	case CONST:
 	  {
 	    rtx plus = XEXP (XEXP (x, 1), 0);
-	    if (GET_CODE (XEXP (plus, 0)) == SYMBOL_REF 
+	    if (GET_CODE (XEXP (plus, 0)) == SYMBOL_REF
 		&& CONST_INT_P (XEXP (plus, 1)))
 	      {
 		output_addr_const(file, XEXP (plus, 0));
@@ -234,7 +234,7 @@ moxie_option_override (void)
   /* Set the per-function-data initializer.  */
   init_machine_status = moxie_init_machine_status;
 
-#ifdef TARGET_MOXIEBOX  
+#ifdef TARGET_MOXIEBOX
   target_flags |= MASK_HAS_MULX;
 #endif
 }
@@ -267,9 +267,9 @@ moxie_compute_frame (void)
     if (df_regs_ever_live_p (regno) && (! call_used_or_fixed_reg_p (regno)))
       cfun->machine->callee_saved_reg_size += 4;
 
-  cfun->machine->size_for_adjusting_sp = 
+  cfun->machine->size_for_adjusting_sp =
     crtl->args.pretend_args_size
-    + cfun->machine->local_vars_size 
+    + cfun->machine->local_vars_size
     + (ACCUMULATE_OUTGOING_ARGS
        ? (HOST_WIDE_INT) crtl->outgoing_args_size : 0);
 }
@@ -298,19 +298,19 @@ moxie_expand_prologue (void)
 
   if (cfun->machine->size_for_adjusting_sp > 0)
     {
-      int i = cfun->machine->size_for_adjusting_sp; 
+      int i = cfun->machine->size_for_adjusting_sp;
       while ((i >= 255) && (i <= 510))
 	{
-	  insn = emit_insn (gen_subsi3 (stack_pointer_rtx, 
-					stack_pointer_rtx, 
+	  insn = emit_insn (gen_subsi3 (stack_pointer_rtx,
+					stack_pointer_rtx,
 					GEN_INT (255)));
 	  RTX_FRAME_RELATED_P (insn) = 1;
 	  i -= 255;
 	}
       if (i <= 255)
 	{
-	  insn = emit_insn (gen_subsi3 (stack_pointer_rtx, 
-					stack_pointer_rtx, 
+	  insn = emit_insn (gen_subsi3 (stack_pointer_rtx,
+					stack_pointer_rtx,
 					GEN_INT (i)));
 	  RTX_FRAME_RELATED_P (insn) = 1;
 	}
@@ -319,8 +319,8 @@ moxie_expand_prologue (void)
 	  rtx reg = gen_rtx_REG (SImode, MOXIE_R12);
 	  insn = emit_move_insn (reg, GEN_INT (i));
 	  RTX_FRAME_RELATED_P (insn) = 1;
-	  insn = emit_insn (gen_subsi3 (stack_pointer_rtx, 
-					stack_pointer_rtx, 
+	  insn = emit_insn (gen_subsi3 (stack_pointer_rtx,
+					stack_pointer_rtx,
 					reg));
 	  RTX_FRAME_RELATED_P (insn) = 1;
 	}
@@ -339,8 +339,8 @@ moxie_expand_epilogue (void)
       if (cfun->machine->callee_saved_reg_size <= 255)
 	{
 	  emit_move_insn (reg, hard_frame_pointer_rtx);
-	  emit_insn (gen_subsi3 
-		     (reg, reg, 
+	  emit_insn (gen_subsi3
+		     (reg, reg,
 		      GEN_INT (cfun->machine->callee_saved_reg_size)));
 	}
       else
@@ -367,7 +367,7 @@ int
 moxie_initial_elimination_offset (int from, int to)
 {
   int ret;
-  
+
   if ((from) == FRAME_POINTER_REGNUM && (to) == HARD_FRAME_POINTER_REGNUM)
     {
       /* Compute this since we need to use cfun->machine->local_vars_size.  */
@@ -392,19 +392,19 @@ moxie_setup_incoming_varargs (cumulative_args_t cum_v,
   CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
   int regno;
   int regs = 8 - *cum;
-  
+
   *pretend_size = regs < 0 ? 0 : GET_MODE_SIZE (SImode) * regs;
-  
+
   if (no_rtl)
     return;
-  
+
   for (regno = *cum; regno < 8; regno++)
     {
       rtx reg = gen_rtx_REG (SImode, regno);
       rtx slot = gen_rtx_PLUS (Pmode,
 			       gen_rtx_REG (SImode, ARG_POINTER_REGNUM),
 			       GEN_INT (UNITS_PER_WORD * (3 + (regno-2))));
-      
+
       emit_move_insn (gen_rtx_MEM (SImode, slot), reg);
     }
 }
@@ -430,7 +430,7 @@ moxie_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
 
   if (*cum < 8)
     return gen_rtx_REG (arg.mode, *cum);
-  else 
+  else
     return NULL_RTX;
 }
 
@@ -567,7 +567,7 @@ moxie_reg_ok_for_base_p (const_rtx reg, bool strict_p)
   if (strict_p)
     return HARD_REGNO_OK_FOR_BASE_P (regno)
 	   || HARD_REGNO_OK_FOR_BASE_P (reg_renumber[regno]);
-  else    
+  else
     return !HARD_REGISTER_NUM_P (regno)
 	   || HARD_REGNO_OK_FOR_BASE_P (regno);
 }
diff --git a/gcc/config/moxie/moxie.h b/gcc/config/moxie/moxie.h
index 4857c92..8818203 100644
--- a/gcc/config/moxie/moxie.h
+++ b/gcc/config/moxie/moxie.h
@@ -1,5 +1,5 @@
 /* Target Definitions for moxie.
-   Copyright (C) 2008-2024 Free Software Foundation, Inc.
+   Copyright (C) 2008-2025 Free Software Foundation, Inc.
    Contributed by Anthony Green.
 
    This file is part of GCC.
@@ -91,7 +91,7 @@
    Special Registers...
 
    $pc - 32-bit program counter.
-   
+
 */
 
 #define REGISTER_NAMES {	\
@@ -104,7 +104,7 @@
 #define MOXIE_FP     0
 #define MOXIE_SP     1
 #define MOXIE_R0     2
-#define MOXIE_R1     3 
+#define MOXIE_R1     3
 #define MOXIE_R2     4
 #define MOXIE_R3     5
 #define MOXIE_R4     6
@@ -209,7 +209,7 @@ enum reg_class
 #define ACCUMULATE_OUTGOING_ARGS 1
 
 /* A C statement (sans semicolon) for initializing the variable CUM
-   for the state at the beginning of the argument list.  
+   for the state at the beginning of the argument list.
    For moxie, the first arg is passed in register 2 (aka $r0).  */
 #define INIT_CUMULATIVE_ARGS(CUM,FNTYPE,LIBNAME,FNDECL,N_NAMED_ARGS) \
   (CUM = MOXIE_R0)
@@ -300,7 +300,7 @@ enum reg_class
 /* Every structures size must be a multiple of 8 bits.  */
 #define STRUCTURE_SIZE_BOUNDARY 8
 
-/* Look at the fundamental type that is used for a bit-field and use 
+/* Look at the fundamental type that is used for a bit-field and use
    that to impose alignment on the enclosing structure.
    struct s {int a:8}; should have same alignment as "int", not "char".  */
 #define	PCC_BITFIELD_TYPE_MATTERS	1
@@ -314,7 +314,7 @@ enum reg_class
   (TREE_CODE (TYPE) == ARRAY_TYPE		\
    && TYPE_MODE (TREE_TYPE (TYPE)) == QImode	\
    && (ALIGN) < FASTEST_ALIGNMENT ? FASTEST_ALIGNMENT : (ALIGN))
-     
+
 /* Set this nonzero if move instructions will actually fail to work
    when given unaligned data.  */
 #define STRICT_ALIGNMENT 1
@@ -351,7 +351,7 @@ enum reg_class
 
 #define ELIMINABLE_REGS							\
 {{ FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM },			\
- { ARG_POINTER_REGNUM,   HARD_FRAME_POINTER_REGNUM }}			
+ { ARG_POINTER_REGNUM,   HARD_FRAME_POINTER_REGNUM }}
 
 /* This macro returns the initial difference between the specified pair
    of registers.  */
diff --git a/gcc/config/moxie/moxie.md b/gcc/config/moxie/moxie.md
index a6c170a..06c3472 100644
--- a/gcc/config/moxie/moxie.md
+++ b/gcc/config/moxie/moxie.md
@@ -1,5 +1,5 @@
 ;; Machine description for Moxie
-;; Copyright (C) 2009-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2009-2025 Free Software Foundation, Inc.
 ;; Contributed by Anthony Green <green@moxielogic.com>
 
 ;; This file is part of GCC.
diff --git a/gcc/config/moxie/moxie.opt b/gcc/config/moxie/moxie.opt
index 47672fa..f30d2ae 100644
--- a/gcc/config/moxie/moxie.opt
+++ b/gcc/config/moxie/moxie.opt
@@ -1,6 +1,6 @@
 ; Options for the moxie compiler port.
 
-; Copyright (C) 2012-2024 Free Software Foundation, Inc.
+; Copyright (C) 2012-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/moxie/moxiebox.h b/gcc/config/moxie/moxiebox.h
index 10ea452..998f194 100644
--- a/gcc/config/moxie/moxiebox.h
+++ b/gcc/config/moxie/moxiebox.h
@@ -1,5 +1,5 @@
 /* Definitions for the moxiebox.
-   Copyright (C) 2014-2024 Free Software Foundation, Inc.
+   Copyright (C) 2014-2025 Free Software Foundation, Inc.
    Contributed by Anthony Green (green@moxielogic.com)
 
 This file is part of GCC.
diff --git a/gcc/config/moxie/predicates.md b/gcc/config/moxie/predicates.md
index 019674a..59c1254 100644
--- a/gcc/config/moxie/predicates.md
+++ b/gcc/config/moxie/predicates.md
@@ -1,5 +1,5 @@
 ;; Predicate definitions for Moxie
-;; Copyright (C) 2009-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2009-2025 Free Software Foundation, Inc.
 ;; Contributed by Anthony Green <green@moxielogic.com>
 
 ;; This file is part of GCC.
diff --git a/gcc/config/moxie/rtems.h b/gcc/config/moxie/rtems.h
index 85854c2..0fda214 100644
--- a/gcc/config/moxie/rtems.h
+++ b/gcc/config/moxie/rtems.h
@@ -1,5 +1,5 @@
 /* Definitions for rtems targeting the Moxie core.
-   Copyright (C) 2010-2024 Free Software Foundation, Inc.
+   Copyright (C) 2010-2025 Free Software Foundation, Inc.
    Contributed by Anthony Green (green@moxielogic.com)
 
    This file is part of GCC.
diff --git a/gcc/config/moxie/t-moxie b/gcc/config/moxie/t-moxie
index 3eeb116..03c6abf 100644
--- a/gcc/config/moxie/t-moxie
+++ b/gcc/config/moxie/t-moxie
@@ -1,5 +1,5 @@
 # Target Makefile Fragment for moxie
-# Copyright (C) 2008-2024 Free Software Foundation, Inc.
+# Copyright (C) 2008-2025 Free Software Foundation, Inc.
 # Contributed by Anthony Green.
 #
 # This file is part of GCC.
diff --git a/gcc/config/moxie/uclinux.h b/gcc/config/moxie/uclinux.h
index e31147b..f527874 100644
--- a/gcc/config/moxie/uclinux.h
+++ b/gcc/config/moxie/uclinux.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2009-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2009-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/msp430/constraints.md b/gcc/config/msp430/constraints.md
index d9b5791..9dd28b1 100644
--- a/gcc/config/msp430/constraints.md
+++ b/gcc/config/msp430/constraints.md
@@ -1,5 +1,5 @@
 ;;  Machine Description for TI MSP43* processors
-;;  Copyright (C) 2013-2024 Free Software Foundation, Inc.
+;;  Copyright (C) 2013-2025 Free Software Foundation, Inc.
 ;;  Contributed by Red Hat.
 
 ;; This file is part of GCC.
diff --git a/gcc/config/msp430/driver-msp430.cc b/gcc/config/msp430/driver-msp430.cc
index a11fd3d..e0796c8 100644
--- a/gcc/config/msp430/driver-msp430.cc
+++ b/gcc/config/msp430/driver-msp430.cc
@@ -1,5 +1,5 @@
 /* Subroutines for the gcc driver.
-   Copyright (C) 2015-2024 Free Software Foundation, Inc.
+   Copyright (C) 2015-2025 Free Software Foundation, Inc.
    Contributed by Georg-Johann Lay <avr@gjlay.de>
 
    This file is part of GCC.
diff --git a/gcc/config/msp430/msp430-c.cc b/gcc/config/msp430/msp430-c.cc
index bbbb9c2..ce9d02d 100644
--- a/gcc/config/msp430/msp430-c.cc
+++ b/gcc/config/msp430/msp430-c.cc
@@ -1,5 +1,5 @@
 /* MSP430 C-specific support
-   Copyright (C) 2013-2024 Free Software Foundation, Inc.
+   Copyright (C) 2013-2025 Free Software Foundation, Inc.
    Contributed by Red Hat, Inc.
 
    This file is part of GCC.
diff --git a/gcc/config/msp430/msp430-devices.cc b/gcc/config/msp430/msp430-devices.cc
index 1156d94..f83f542 100644
--- a/gcc/config/msp430/msp430-devices.cc
+++ b/gcc/config/msp430/msp430-devices.cc
@@ -1,5 +1,5 @@
 /* Subroutines used for reading MCU data on TI MSP430 processors.
-   Copyright (C) 2019-2024 Free Software Foundation, Inc.
+   Copyright (C) 2019-2025 Free Software Foundation, Inc.
    Contributed by Jozef Lawrynowicz  <jozef.l@mittosystems.com>.
 
    This file is part of GCC.
diff --git a/gcc/config/msp430/msp430-devices.h b/gcc/config/msp430/msp430-devices.h
index eed4123..d1fdeb9 100644
--- a/gcc/config/msp430/msp430-devices.h
+++ b/gcc/config/msp430/msp430-devices.h
@@ -1,5 +1,5 @@
 /* Definitions of subroutines used for reading MCU data on TI MSP430 processors.
-   Copyright (C) 2019-2024 Free Software Foundation, Inc.
+   Copyright (C) 2019-2025 Free Software Foundation, Inc.
    Contributed by Jozef Lawrynowicz  <jozef.l@mittosystems.com>.
 
    This file is part of GCC.
diff --git a/gcc/config/msp430/msp430-opts.h b/gcc/config/msp430/msp430-opts.h
index 073ad17..669858d 100644
--- a/gcc/config/msp430/msp430-opts.h
+++ b/gcc/config/msp430/msp430-opts.h
@@ -1,5 +1,5 @@
 /* GCC option-handling definitions for the TI MSP430
-   Copyright (C) 2014-2024 Free Software Foundation, Inc.
+   Copyright (C) 2014-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/msp430/msp430-protos.h b/gcc/config/msp430/msp430-protos.h
index 41d8d9d..5eb7df6 100644
--- a/gcc/config/msp430/msp430-protos.h
+++ b/gcc/config/msp430/msp430-protos.h
@@ -1,5 +1,5 @@
 /* Exported function prototypes from the TI MSP430 backend.
-   Copyright (C) 2012-2024 Free Software Foundation, Inc.
+   Copyright (C) 2012-2025 Free Software Foundation, Inc.
    Contributed by Red Hat.
 
    This file is part of GCC.
diff --git a/gcc/config/msp430/msp430.cc b/gcc/config/msp430/msp430.cc
index 21be7f1..f8bc2fa 100644
--- a/gcc/config/msp430/msp430.cc
+++ b/gcc/config/msp430/msp430.cc
@@ -1,5 +1,5 @@
 /* Subroutines used for code generation on TI MSP430 processors.
-   Copyright (C) 2012-2024 Free Software Foundation, Inc.
+   Copyright (C) 2012-2025 Free Software Foundation, Inc.
    Contributed by Red Hat.
 
    This file is part of GCC.
@@ -4516,6 +4516,9 @@ msp430_can_change_mode_class (machine_mode from, machine_mode to, reg_class_t)
 #undef  TARGET_HAVE_SPECULATION_SAFE_VALUE
 #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
 
+#undef TARGET_DOCUMENTATION_NAME
+#define TARGET_DOCUMENTATION_NAME "MSP430"
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-msp430.h"
diff --git a/gcc/config/msp430/msp430.h b/gcc/config/msp430/msp430.h
index f20309c..829efad 100644
--- a/gcc/config/msp430/msp430.h
+++ b/gcc/config/msp430/msp430.h
@@ -1,5 +1,5 @@
 /* GCC backend definitions for the TI MSP430 Processor
-   Copyright (C) 2012-2024 Free Software Foundation, Inc.
+   Copyright (C) 2012-2025 Free Software Foundation, Inc.
    Contributed by Red Hat.
 
    This file is part of GCC.
diff --git a/gcc/config/msp430/msp430.md b/gcc/config/msp430/msp430.md
index 375e231..003beb2 100644
--- a/gcc/config/msp430/msp430.md
+++ b/gcc/config/msp430/msp430.md
@@ -1,5 +1,5 @@
 ;;  Machine Description for TI MSP43* processors
-;;  Copyright (C) 2013-2024 Free Software Foundation, Inc.
+;;  Copyright (C) 2013-2025 Free Software Foundation, Inc.
 ;;  Contributed by Red Hat.
 
 ;; This file is part of GCC.
diff --git a/gcc/config/msp430/predicates.md b/gcc/config/msp430/predicates.md
index 442f2e7..8c7186c 100644
--- a/gcc/config/msp430/predicates.md
+++ b/gcc/config/msp430/predicates.md
@@ -1,5 +1,5 @@
 ;;  Machine Description for TI MSP43* processors
-;;  Copyright (C) 2013-2024 Free Software Foundation, Inc.
+;;  Copyright (C) 2013-2025 Free Software Foundation, Inc.
 ;;  Contributed by Red Hat.
 
 ;; This file is part of GCC.
diff --git a/gcc/config/msp430/t-msp430 b/gcc/config/msp430/t-msp430
index 5cf5dab..0995680 100644
--- a/gcc/config/msp430/t-msp430
+++ b/gcc/config/msp430/t-msp430
@@ -1,5 +1,5 @@
 # Makefile fragment for building GCC for the TI MSP430 target.
-# Copyright (C) 2012-2024 Free Software Foundation, Inc.
+# Copyright (C) 2012-2025 Free Software Foundation, Inc.
 # Contributed by Red Hat.
 #
 # This file is part of GCC.
diff --git a/gcc/config/nds32/constants.md b/gcc/config/nds32/constants.md
index 231ad36..a31202f 100644
--- a/gcc/config/nds32/constants.md
+++ b/gcc/config/nds32/constants.md
@@ -1,5 +1,5 @@
 ;; Constant defintions of Andes NDS32 cpu for GNU compiler
-;; Copyright (C) 2012-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2012-2025 Free Software Foundation, Inc.
 ;; Contributed by Andes Technology Corporation.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/nds32/constraints.md b/gcc/config/nds32/constraints.md
index e2e78e5..9efd7a6 100644
--- a/gcc/config/nds32/constraints.md
+++ b/gcc/config/nds32/constraints.md
@@ -1,5 +1,5 @@
 ;; Constraint definitions of Andes NDS32 cpu for GNU compiler
-;; Copyright (C) 2012-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2012-2025 Free Software Foundation, Inc.
 ;; Contributed by Andes Technology Corporation.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/nds32/elf.h b/gcc/config/nds32/elf.h
index 871d769..5443786 100644
--- a/gcc/config/nds32/elf.h
+++ b/gcc/config/nds32/elf.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2024 Free Software Foundation, Inc.
+   Copyright (C) 2012-2025 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff --git a/gcc/config/nds32/iterators.md b/gcc/config/nds32/iterators.md
index 4359a82..d52cd96 100644
--- a/gcc/config/nds32/iterators.md
+++ b/gcc/config/nds32/iterators.md
@@ -1,6 +1,6 @@
 ;; Code and mode itertator and attribute definitions
 ;; of Andes NDS32 cpu for GNU compiler
-;; Copyright (C) 2012-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2012-2025 Free Software Foundation, Inc.
 ;; Contributed by Andes Technology Corporation.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/nds32/linux.h b/gcc/config/nds32/linux.h
index 4bf8ac2..14289fd 100644
--- a/gcc/config/nds32/linux.h
+++ b/gcc/config/nds32/linux.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2024 Free Software Foundation, Inc.
+   Copyright (C) 2012-2025 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff --git a/gcc/config/nds32/nds32-cost.cc b/gcc/config/nds32/nds32-cost.cc
index ddea3d6..aeafc16 100644
--- a/gcc/config/nds32/nds32-cost.cc
+++ b/gcc/config/nds32/nds32-cost.cc
@@ -1,5 +1,5 @@
 /* Subroutines used for calculate rtx costs of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2024 Free Software Foundation, Inc.
+   Copyright (C) 2012-2025 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff --git a/gcc/config/nds32/nds32-doubleword.md b/gcc/config/nds32/nds32-doubleword.md
index 05d742b..4761c0c 100644
--- a/gcc/config/nds32/nds32-doubleword.md
+++ b/gcc/config/nds32/nds32-doubleword.md
@@ -1,5 +1,5 @@
 ;; DImode/DFmode patterns description of Andes NDS32 cpu for GNU compiler
-;; Copyright (C) 2012-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2012-2025 Free Software Foundation, Inc.
 ;; Contributed by Andes Technology Corporation.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/nds32/nds32-dspext.md b/gcc/config/nds32/nds32-dspext.md
index a6c7985..a8005f5 100644
--- a/gcc/config/nds32/nds32-dspext.md
+++ b/gcc/config/nds32/nds32-dspext.md
@@ -1,5 +1,5 @@
 ;; Machine description of Andes NDS32 cpu for GNU compiler
-;; Copyright (C) 2012-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2012-2025 Free Software Foundation, Inc.
 ;; Contributed by Andes Technology Corporation.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/nds32/nds32-e8.md b/gcc/config/nds32/nds32-e8.md
index 6bcbb33..6cf083a 100644
--- a/gcc/config/nds32/nds32-e8.md
+++ b/gcc/config/nds32/nds32-e8.md
@@ -1,5 +1,5 @@
 ;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler
-;; Copyright (C) 2012-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2012-2025 Free Software Foundation, Inc.
 ;; Contributed by Andes Technology Corporation.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/nds32/nds32-fp-as-gp.cc b/gcc/config/nds32/nds32-fp-as-gp.cc
index 8675e2f..7d9d056 100644
--- a/gcc/config/nds32/nds32-fp-as-gp.cc
+++ b/gcc/config/nds32/nds32-fp-as-gp.cc
@@ -1,5 +1,5 @@
 /* The fp-as-gp pass of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2024 Free Software Foundation, Inc.
+   Copyright (C) 2012-2025 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff --git a/gcc/config/nds32/nds32-fpu.md b/gcc/config/nds32/nds32-fpu.md
index 185cc70..a624a6f 100644
--- a/gcc/config/nds32/nds32-fpu.md
+++ b/gcc/config/nds32/nds32-fpu.md
@@ -1,5 +1,5 @@
 ;; Machine description of Andes NDS32 cpu for GNU compiler
-;; Copyright (C) 2012-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2012-2025 Free Software Foundation, Inc.
 ;; Contributed by Andes Technology Corporation.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/nds32/nds32-graywolf.md b/gcc/config/nds32/nds32-graywolf.md
index 84fce91..6a39471 100644
--- a/gcc/config/nds32/nds32-graywolf.md
+++ b/gcc/config/nds32/nds32-graywolf.md
@@ -1,5 +1,5 @@
 ;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler
-;; Copyright (C) 2012-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2012-2025 Free Software Foundation, Inc.
 ;; Contributed by Andes Technology Corporation.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/nds32/nds32-intrinsic.cc b/gcc/config/nds32/nds32-intrinsic.cc
index 1651d97..14dbadc 100644
--- a/gcc/config/nds32/nds32-intrinsic.cc
+++ b/gcc/config/nds32/nds32-intrinsic.cc
@@ -1,5 +1,5 @@
 /* Intrinsic functions of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2024 Free Software Foundation, Inc.
+   Copyright (C) 2012-2025 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff --git a/gcc/config/nds32/nds32-intrinsic.md b/gcc/config/nds32/nds32-intrinsic.md
index 2a337eb..85acea3 100644
--- a/gcc/config/nds32/nds32-intrinsic.md
+++ b/gcc/config/nds32/nds32-intrinsic.md
@@ -1,5 +1,5 @@
 ;; Intrinsic patterns description of Andes NDS32 cpu for GNU compiler
-;; Copyright (C) 2012-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2012-2025 Free Software Foundation, Inc.
 ;; Contributed by Andes Technology Corporation.
 ;;
 ;; This file is part of GCC.
@@ -333,30 +333,31 @@
   ""
 {
   rtx system_reg = NULL_RTX;
+  rtx shift_amt = NULL_RTX;
 
   /* Set system register form nds32_intrinsic_register_names[].  */
   if ((INTVAL (operands[1]) >= NDS32_INT_H0)
       && (INTVAL (operands[1]) <= NDS32_INT_H15))
     {
       system_reg = GEN_INT (__NDS32_REG_INT_PEND__);
-      operands[2] = GEN_INT (31 - INTVAL (operands[1]));
+      shift_amt = GEN_INT (31 - INTVAL (operands[1]));
     }
   else if (INTVAL (operands[1]) == NDS32_INT_SWI)
     {
       system_reg = GEN_INT (__NDS32_REG_INT_PEND__);
-      operands[2] = GEN_INT (15);
+      shift_amt = GEN_INT (15);
     }
   else if ((INTVAL (operands[1]) >= NDS32_INT_H16)
 	   && (INTVAL (operands[1]) <= NDS32_INT_H31))
     {
       system_reg = GEN_INT (__NDS32_REG_INT_PEND2__);
-      operands[2] = GEN_INT (31 - INTVAL (operands[1]));
+      shift_amt = GEN_INT (31 - INTVAL (operands[1]));
     }
   else if ((INTVAL (operands[1]) >= NDS32_INT_H32)
 	   && (INTVAL (operands[1]) <= NDS32_INT_H63))
     {
       system_reg = GEN_INT (__NDS32_REG_INT_PEND3__);
-      operands[2] = GEN_INT (31 - (INTVAL (operands[1]) - 32));
+      shift_amt = GEN_INT (31 - (INTVAL (operands[1]) - 32));
     }
   else
     error ("%<get_pending_int%> not support %<NDS32_INT_ALZ%>,"
@@ -366,7 +367,7 @@
   if (system_reg != NULL_RTX)
     {
       emit_insn (gen_unspec_volatile_mfsr (operands[0], system_reg));
-      emit_insn (gen_ashlsi3 (operands[0], operands[0], operands[2]));
+      emit_insn (gen_ashlsi3 (operands[0], operands[0], shift_amt));
       emit_insn (gen_lshrsi3 (operands[0], operands[0], GEN_INT (31)));
       emit_insn (gen_unspec_dsb ());
     }
diff --git a/gcc/config/nds32/nds32-isr.cc b/gcc/config/nds32/nds32-isr.cc
index bef723a..b0efedd 100644
--- a/gcc/config/nds32/nds32-isr.cc
+++ b/gcc/config/nds32/nds32-isr.cc
@@ -1,5 +1,5 @@
 /* Subroutines used for ISR of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2024 Free Software Foundation, Inc.
+   Copyright (C) 2012-2025 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff --git a/gcc/config/nds32/nds32-md-auxiliary.cc b/gcc/config/nds32/nds32-md-auxiliary.cc
index ef951f6..87994fa 100644
--- a/gcc/config/nds32/nds32-md-auxiliary.cc
+++ b/gcc/config/nds32/nds32-md-auxiliary.cc
@@ -1,6 +1,6 @@
 /* Auxiliary functions for output asm template or expand rtl
    pattern of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2024 Free Software Foundation, Inc.
+   Copyright (C) 2012-2025 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff --git a/gcc/config/nds32/nds32-memory-manipulation.cc b/gcc/config/nds32/nds32-memory-manipulation.cc
index 786e19e..5a085ce 100644
--- a/gcc/config/nds32/nds32-memory-manipulation.cc
+++ b/gcc/config/nds32/nds32-memory-manipulation.cc
@@ -1,6 +1,6 @@
 /* Auxiliary functions for expand cpymem, setmem, cmpmem, load_multiple
    and store_multiple pattern of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2024 Free Software Foundation, Inc.
+   Copyright (C) 2012-2025 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff --git a/gcc/config/nds32/nds32-modes.def b/gcc/config/nds32/nds32-modes.def
index 36362f7..328156d 100644
--- a/gcc/config/nds32/nds32-modes.def
+++ b/gcc/config/nds32/nds32-modes.def
@@ -1,5 +1,5 @@
 /* Extra machine modes of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2024 Free Software Foundation, Inc.
+   Copyright (C) 2012-2025 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff --git a/gcc/config/nds32/nds32-multiple.md b/gcc/config/nds32/nds32-multiple.md
index 166b5d1..1a8bef6 100644
--- a/gcc/config/nds32/nds32-multiple.md
+++ b/gcc/config/nds32/nds32-multiple.md
@@ -1,5 +1,5 @@
 ;; Load/Store Multiple patterns description of Andes NDS32 cpu for GNU compiler
-;; Copyright (C) 2012-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2012-2025 Free Software Foundation, Inc.
 ;; Contributed by Andes Technology Corporation.for NDS32.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/nds32/nds32-n10.md b/gcc/config/nds32/nds32-n10.md
index e69815d..b3846f4 100644
--- a/gcc/config/nds32/nds32-n10.md
+++ b/gcc/config/nds32/nds32-n10.md
@@ -1,5 +1,5 @@
 ;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler
-;; Copyright (C) 2012-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2012-2025 Free Software Foundation, Inc.
 ;; Contributed by Andes Technology Corporation.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/nds32/nds32-n13.md b/gcc/config/nds32/nds32-n13.md
index de31998..1bab8d1 100644
--- a/gcc/config/nds32/nds32-n13.md
+++ b/gcc/config/nds32/nds32-n13.md
@@ -1,5 +1,5 @@
 ;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler
-;; Copyright (C) 2012-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2012-2025 Free Software Foundation, Inc.
 ;; Contributed by Andes Technology Corporation.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/nds32/nds32-n7.md b/gcc/config/nds32/nds32-n7.md
index b6ecd67..99d769e 100644
--- a/gcc/config/nds32/nds32-n7.md
+++ b/gcc/config/nds32/nds32-n7.md
@@ -1,5 +1,5 @@
 ;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler
-;; Copyright (C) 2012-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2012-2025 Free Software Foundation, Inc.
 ;; Contributed by Andes Technology Corporation.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/nds32/nds32-n8.md b/gcc/config/nds32/nds32-n8.md
index 877160f..c93ee2c 100644
--- a/gcc/config/nds32/nds32-n8.md
+++ b/gcc/config/nds32/nds32-n8.md
@@ -1,5 +1,5 @@
 ;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler
-;; Copyright (C) 2012-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2012-2025 Free Software Foundation, Inc.
 ;; Contributed by Andes Technology Corporation.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/nds32/nds32-n9-2r1w.md b/gcc/config/nds32/nds32-n9-2r1w.md
index 40457ab..dca6ca2 100644
--- a/gcc/config/nds32/nds32-n9-2r1w.md
+++ b/gcc/config/nds32/nds32-n9-2r1w.md
@@ -1,5 +1,5 @@
 ;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler
-;; Copyright (C) 2012-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2012-2025 Free Software Foundation, Inc.
 ;; Contributed by Andes Technology Corporation.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/nds32/nds32-n9-3r2w.md b/gcc/config/nds32/nds32-n9-3r2w.md
index b6d5082..150b3c6 100644
--- a/gcc/config/nds32/nds32-n9-3r2w.md
+++ b/gcc/config/nds32/nds32-n9-3r2w.md
@@ -1,5 +1,5 @@
 ;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler
-;; Copyright (C) 2012-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2012-2025 Free Software Foundation, Inc.
 ;; Contributed by Andes Technology Corporation.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/nds32/nds32-opts.h b/gcc/config/nds32/nds32-opts.h
index 154c615..fbf0392 100644
--- a/gcc/config/nds32/nds32-opts.h
+++ b/gcc/config/nds32/nds32-opts.h
@@ -1,5 +1,5 @@
 /* Definitions for option handling of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2024 Free Software Foundation, Inc.
+   Copyright (C) 2012-2025 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff --git a/gcc/config/nds32/nds32-peephole2.md b/gcc/config/nds32/nds32-peephole2.md
index 8a72f2b..2ac7149 100644
--- a/gcc/config/nds32/nds32-peephole2.md
+++ b/gcc/config/nds32/nds32-peephole2.md
@@ -1,5 +1,5 @@
 ;; define_peephole2 optimization patterns of Andes NDS32 cpu for GNU compiler
-;; Copyright (C) 2012-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2012-2025 Free Software Foundation, Inc.
 ;; Contributed by Andes Technology Corporation.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/nds32/nds32-pipelines-auxiliary.cc b/gcc/config/nds32/nds32-pipelines-auxiliary.cc
index c198ebb..97cf10c 100644
--- a/gcc/config/nds32/nds32-pipelines-auxiliary.cc
+++ b/gcc/config/nds32/nds32-pipelines-auxiliary.cc
@@ -1,6 +1,6 @@
 /* Auxiliary functions for pipeline descriptions pattern of Andes
    NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2024 Free Software Foundation, Inc.
+   Copyright (C) 2012-2025 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff --git a/gcc/config/nds32/nds32-predicates.cc b/gcc/config/nds32/nds32-predicates.cc
index 6f04a49..30bc117 100644
--- a/gcc/config/nds32/nds32-predicates.cc
+++ b/gcc/config/nds32/nds32-predicates.cc
@@ -1,5 +1,5 @@
 /* Predicate functions of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2024 Free Software Foundation, Inc.
+   Copyright (C) 2012-2025 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff --git a/gcc/config/nds32/nds32-protos.h b/gcc/config/nds32/nds32-protos.h
index 777a71e..69298ff 100644
--- a/gcc/config/nds32/nds32-protos.h
+++ b/gcc/config/nds32/nds32-protos.h
@@ -1,5 +1,5 @@
 /* Prototypes for exported functions of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2024 Free Software Foundation, Inc.
+   Copyright (C) 2012-2025 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff --git a/gcc/config/nds32/nds32-relax-opt.cc b/gcc/config/nds32/nds32-relax-opt.cc
index 8721921..6f801b6 100644
--- a/gcc/config/nds32/nds32-relax-opt.cc
+++ b/gcc/config/nds32/nds32-relax-opt.cc
@@ -1,5 +1,5 @@
 /* relax-opt pass of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2024 Free Software Foundation, Inc.
+   Copyright (C) 2012-2025 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff --git a/gcc/config/nds32/nds32-utils.cc b/gcc/config/nds32/nds32-utils.cc
index 87eaf04..bbd9dfa 100644
--- a/gcc/config/nds32/nds32-utils.cc
+++ b/gcc/config/nds32/nds32-utils.cc
@@ -1,6 +1,6 @@
 /* Auxiliary functions for pipeline descriptions pattern of Andes
    NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2024 Free Software Foundation, Inc.
+   Copyright (C) 2012-2025 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff --git a/gcc/config/nds32/nds32.cc b/gcc/config/nds32/nds32.cc
index f5ea1da..65cc318 100644
--- a/gcc/config/nds32/nds32.cc
+++ b/gcc/config/nds32/nds32.cc
@@ -1,5 +1,5 @@
 /* Subroutines used for code generation of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2024 Free Software Foundation, Inc.
+   Copyright (C) 2012-2025 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
@@ -5881,6 +5881,9 @@ nds32_use_blocks_for_constant_p (machine_mode mode,
 #undef  TARGET_HAVE_SPECULATION_SAFE_VALUE
 #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
 
+#undef TARGET_DOCUMENTATION_NAME
+#define TARGET_DOCUMENTATION_NAME "NDS32"
+
 
 /* ------------------------------------------------------------------------ */
 
diff --git a/gcc/config/nds32/nds32.h b/gcc/config/nds32/nds32.h
index 4c02302..0aecbe6 100644
--- a/gcc/config/nds32/nds32.h
+++ b/gcc/config/nds32/nds32.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2024 Free Software Foundation, Inc.
+   Copyright (C) 2012-2025 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff --git a/gcc/config/nds32/nds32.md b/gcc/config/nds32/nds32.md
index 519c882..441ecbe 100644
--- a/gcc/config/nds32/nds32.md
+++ b/gcc/config/nds32/nds32.md
@@ -1,5 +1,5 @@
 ;; Machine description of Andes NDS32 cpu for GNU compiler
-;; Copyright (C) 2012-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2012-2025 Free Software Foundation, Inc.
 ;; Contributed by Andes Technology Corporation.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/nds32/nds32.opt b/gcc/config/nds32/nds32.opt
index cc573b6..4630798 100644
--- a/gcc/config/nds32/nds32.opt
+++ b/gcc/config/nds32/nds32.opt
@@ -1,5 +1,5 @@
 ; Options of Andes NDS32 cpu for GNU compiler
-; Copyright (C) 2012-2024 Free Software Foundation, Inc.
+; Copyright (C) 2012-2025 Free Software Foundation, Inc.
 ; Contributed by Andes Technology Corporation.
 ;
 ; This file is part of GCC.
diff --git a/gcc/config/nds32/nds32_intrinsic.h b/gcc/config/nds32/nds32_intrinsic.h
index 3233866..d8fdd07 100644
--- a/gcc/config/nds32/nds32_intrinsic.h
+++ b/gcc/config/nds32/nds32_intrinsic.h
@@ -1,5 +1,5 @@
 /* Intrinsic definitions of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2024 Free Software Foundation, Inc.
+   Copyright (C) 2012-2025 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff --git a/gcc/config/nds32/nds32_isr.h b/gcc/config/nds32/nds32_isr.h
index 0759163..8e9a373 100644
--- a/gcc/config/nds32/nds32_isr.h
+++ b/gcc/config/nds32/nds32_isr.h
@@ -1,5 +1,5 @@
 /* Intrinsic definitions of Andes NDS32 cpu for GNU compiler
-   Copyright (C) 2012-2024 Free Software Foundation, Inc.
+   Copyright (C) 2012-2025 Free Software Foundation, Inc.
    Contributed by Andes Technology Corporation.
 
    This file is part of GCC.
diff --git a/gcc/config/nds32/pipelines.md b/gcc/config/nds32/pipelines.md
index 64d1db1..043d093 100644
--- a/gcc/config/nds32/pipelines.md
+++ b/gcc/config/nds32/pipelines.md
@@ -1,5 +1,5 @@
 ;; Pipeline descriptions of Andes NDS32 cpu for GNU compiler
-;; Copyright (C) 2012-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2012-2025 Free Software Foundation, Inc.
 ;; Contributed by Andes Technology Corporation.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/nds32/predicates.md b/gcc/config/nds32/predicates.md
index 100f785..b225ec3 100644
--- a/gcc/config/nds32/predicates.md
+++ b/gcc/config/nds32/predicates.md
@@ -1,5 +1,5 @@
 ;; Predicate definitions of Andes NDS32 cpu for GNU compiler
-;; Copyright (C) 2012-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2012-2025 Free Software Foundation, Inc.
 ;; Contributed by Andes Technology Corporation.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/nds32/t-elf b/gcc/config/nds32/t-elf
index ce35b23..71f7ced 100644
--- a/gcc/config/nds32/t-elf
+++ b/gcc/config/nds32/t-elf
@@ -1,5 +1,5 @@
 # The multilib settings of Andes NDS32 cpu for GNU compiler
-# Copyright (C) 2012-2024 Free Software Foundation, Inc.
+# Copyright (C) 2012-2025 Free Software Foundation, Inc.
 # Contributed by Andes Technology Corporation.
 #
 # This file is part of GCC.
diff --git a/gcc/config/nds32/t-linux b/gcc/config/nds32/t-linux
index e306a51..1d9bbf3 100644
--- a/gcc/config/nds32/t-linux
+++ b/gcc/config/nds32/t-linux
@@ -1,5 +1,5 @@
 # The multilib settings of Andes NDS32 cpu for GNU compiler
-# Copyright (C) 2012-2024 Free Software Foundation, Inc.
+# Copyright (C) 2012-2025 Free Software Foundation, Inc.
 # Contributed by Andes Technology Corporation.
 #
 # This file is part of GCC.
diff --git a/gcc/config/nds32/t-mlibs b/gcc/config/nds32/t-mlibs
index f854d46..b3276af 100644
--- a/gcc/config/nds32/t-mlibs
+++ b/gcc/config/nds32/t-mlibs
@@ -1,5 +1,5 @@
 # The multilib settings of Andes NDS32 cpu for GNU compiler
-# Copyright (C) 2012-2024 Free Software Foundation, Inc.
+# Copyright (C) 2012-2025 Free Software Foundation, Inc.
 # Contributed by Andes Technology Corporation.
 #
 # This file is part of GCC.
diff --git a/gcc/config/nds32/t-nds32 b/gcc/config/nds32/t-nds32
index 9a86505..d90d4d5 100644
--- a/gcc/config/nds32/t-nds32
+++ b/gcc/config/nds32/t-nds32
@@ -1,5 +1,5 @@
 # General rules that all nds32/ targets must have.
-# Copyright (C) 2012-2024 Free Software Foundation, Inc.
+# Copyright (C) 2012-2025 Free Software Foundation, Inc.
 # Contributed by Andes Technology Corporation.
 #
 # This file is part of GCC.
diff --git a/gcc/config/netbsd-d.cc b/gcc/config/netbsd-d.cc
index b672868..65904a4 100644
--- a/gcc/config/netbsd-d.cc
+++ b/gcc/config/netbsd-d.cc
@@ -1,5 +1,5 @@
 /* Functions for generic NetBSD as target machine for GNU D compiler.
-   Copyright (C) 2019-2024 Free Software Foundation, Inc.
+   Copyright (C) 2019-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/netbsd-elf.h b/gcc/config/netbsd-elf.h
index de4a422..d7cbead 100644
--- a/gcc/config/netbsd-elf.h
+++ b/gcc/config/netbsd-elf.h
@@ -1,5 +1,5 @@
 /* Common configuration file for NetBSD ELF targets.
-   Copyright (C) 2002-2024 Free Software Foundation, Inc.
+   Copyright (C) 2002-2025 Free Software Foundation, Inc.
    Contributed by Wasabi Systems, Inc.
 
 This file is part of GCC.
diff --git a/gcc/config/netbsd-elf.opt b/gcc/config/netbsd-elf.opt
index 798d347..cf26839 100644
--- a/gcc/config/netbsd-elf.opt
+++ b/gcc/config/netbsd-elf.opt
@@ -1,6 +1,6 @@
 ; NetBSD ELF-only options.
 
-; Copyright (C) 2010-2024 Free Software Foundation, Inc.
+; Copyright (C) 2010-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/netbsd-protos.h b/gcc/config/netbsd-protos.h
index 724c371..d2c4057 100644
--- a/gcc/config/netbsd-protos.h
+++ b/gcc/config/netbsd-protos.h
@@ -1,5 +1,5 @@
 /* Prototypes.
-   Copyright (C) 2017-2024 Free Software Foundation, Inc.
+   Copyright (C) 2017-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/netbsd-rust.cc b/gcc/config/netbsd-rust.cc
index f29d20a..648ce54 100644
--- a/gcc/config/netbsd-rust.cc
+++ b/gcc/config/netbsd-rust.cc
@@ -1,5 +1,5 @@
 /* NetBSD support needed only by Rust front-end.
-   Copyright (C) 2022-2024 Free Software Foundation, Inc.
+   Copyright (C) 2022-2025 Free Software Foundation, Inc.
 
 GCC is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
diff --git a/gcc/config/netbsd-stdint.h b/gcc/config/netbsd-stdint.h
index 92d71e8..ebed8c7 100644
--- a/gcc/config/netbsd-stdint.h
+++ b/gcc/config/netbsd-stdint.h
@@ -1,5 +1,5 @@
 /* Definitions for <stdint.h> types for NetBSD systems.
-   Copyright (C) 2016-2024 Free Software Foundation, Inc.
+   Copyright (C) 2016-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/netbsd.cc b/gcc/config/netbsd.cc
index a1645b1..65e1492 100644
--- a/gcc/config/netbsd.cc
+++ b/gcc/config/netbsd.cc
@@ -1,5 +1,5 @@
 /* Functions for generic NetBSD as target machine for GNU C compiler.
-   Copyright (C) 2017-2024 Free Software Foundation, Inc.
+   Copyright (C) 2017-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/netbsd.h b/gcc/config/netbsd.h
index de72879..6858329 100644
--- a/gcc/config/netbsd.h
+++ b/gcc/config/netbsd.h
@@ -1,5 +1,5 @@
 /* Base configuration file for all NetBSD targets.
-   Copyright (C) 1997-2024 Free Software Foundation, Inc.
+   Copyright (C) 1997-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -131,7 +131,7 @@ along with GCC; see the file COPYING3.  If not see
 #undef TARGET_LIBC_HAS_FUNCTION
 #define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function
 
-/* When building shared libraries, the initialization and finalization 
+/* When building shared libraries, the initialization and finalization
    functions for the library are .init and .fini respectively.  */
 
 #define COLLECT_SHARED_INIT_FUNC(STREAM,FUNC)				\
diff --git a/gcc/config/netbsd.opt b/gcc/config/netbsd.opt
index ca959bb..99a90b2 100644
--- a/gcc/config/netbsd.opt
+++ b/gcc/config/netbsd.opt
@@ -1,6 +1,6 @@
 ; NetBSD options.
 
-; Copyright (C) 2010-2024 Free Software Foundation, Inc.
+; Copyright (C) 2010-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/newlib-stdint.h b/gcc/config/newlib-stdint.h
index a35aeca..9295e03 100644
--- a/gcc/config/newlib-stdint.h
+++ b/gcc/config/newlib-stdint.h
@@ -1,5 +1,5 @@
 /* Definitions for <stdint.h> types on systems using newlib.
-   Copyright (C) 2008-2024 Free Software Foundation, Inc.
+   Copyright (C) 2008-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/nios2/constraints.md b/gcc/config/nios2/constraints.md
deleted file mode 100644
index edabb46..0000000
--- a/gcc/config/nios2/constraints.md
+++ /dev/null
@@ -1,118 +0,0 @@
-;; Constraint definitions for Altera Nios II.
-;; Copyright (C) 2012-2024 Free Software Foundation, Inc.
-;; Contributed by Chung-Lin Tang <cltang@codesourcery.com>
-;;
-;; This file is part of GCC.
-;;
-;; GCC is free software; you can redistribute it and/or modify
-;; it under the terms of the GNU General Public License as published by
-;; the Free Software Foundation; either version 3, or (at your option)
-;; any later version.
-;;
-;; GCC is distributed in the hope that it will be useful,
-;; but WITHOUT ANY WARRANTY; without even the implied warranty of
-;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-;; GNU General Public License for more details.
-;;
-;; You should have received a copy of the GNU General Public License
-;; along with GCC; see the file COPYING3.  If not see
-;; <http://www.gnu.org/licenses/>.
-
-;; We use the following constraint letters for constants
-;;
-;;  I: -32768 to 32767
-;;  J: 0 to 65535
-;;  K: $nnnn0000 for some nnnn
-;;  P: Under R2, $nnnnffff or $ffffnnnn for some nnnn
-;;  L: 0 to 31 (for shift counts)
-;;  M: 0
-;;  N: 0 to 255 (for custom instruction numbers)
-;;  O: 0 to 31 (for control register numbers)
-;;  U: -32768 to 32767 under R1, -2048 to 2047 under R2
-;;
-;; We use the following constraint letters for memory constraints
-;;
-;;  v: memory operands for R2 load/store exclusive instructions
-;;  w: memory operands for load/store IO and cache instructions
-;;
-;; We use the following built-in register classes:
-;;
-;;  r: general purpose register (r0..r31)
-;;  m: memory operand
-;;
-;; Plus, we define the following constraint strings:
-;;
-;;  S: symbol that is in the "small data" area
-
-;; Register constraints
-
-(define_register_constraint "c" "IJMP_REGS"
-  "A register suitable for an indirect jump.")
-
-(define_register_constraint "j" "SIB_REGS"
-  "A register suitable for an indirect sibcall.")
-
-;; Integer constraints
-
-(define_constraint "I"
-  "A signed 16-bit constant (for arithmetic instructions)."
-  (and (match_code "const_int")
-       (match_test "SMALL_INT (ival)")))
-
-(define_constraint "J"
-  "An unsigned 16-bit constant (for logical instructions)."
-  (and (match_code "const_int")
-       (match_test "SMALL_INT_UNSIGNED (ival)")))
-
-(define_constraint "K"
-  "An unsigned 16-bit high constant (for logical instructions)."
-  (and (match_code "const_int")
-       (match_test "UPPER16_INT (ival)")))
-
-(define_constraint "L"
-  "An unsigned 5-bit constant (for shift counts)."
-  (and (match_code "const_int")
-       (match_test "ival >= 0 && ival <= 31")))
-
-(define_constraint "M"
-  "Integer zero."
-  (and (match_code "const_int")
-       (match_test "ival == 0")))
-
-(define_constraint "N"
-  "An unsigned 8-bit constant (for custom instruction codes)."
-  (and (match_code "const_int")
-       (match_test "ival >= 0 && ival <= 255")))
-
-(define_constraint "O"
-  "An unsigned 5-bit constant (for control register numbers)."
-  (and (match_code "const_int")
-       (match_test "ival >= 0 && ival <= 31")))
-
-(define_constraint "P"
-  "An immediate operand for R2 andchi/andci instructions."
-  (and (match_code "const_int")
-       (match_test "TARGET_ARCH_R2 && ANDCLEAR_INT (ival)")))
-
-(define_constraint "S"
-  "An immediate stored in small data, accessible by GP, or by offset from r0."
-  (match_test "gprel_constant_p (op) || r0rel_constant_p (op)"))
-
-(define_constraint "T"
-  "A constant unspec offset representing a relocation."
-  (match_test "nios2_unspec_reloc_p (op)"))
-
-(define_constraint "U"
-  "A 12-bit or 16-bit constant (for RDPRS and DCACHE)."
-  (and (match_code "const_int")
-       (if_then_else (match_test "TARGET_ARCH_R2")
-                     (match_test "SMALL_INT12 (ival)")
-                     (match_test "SMALL_INT (ival)"))))
-
-(define_memory_constraint "v"
-  "A memory operand suitable for R2 load/store exclusive instructions."
-  (match_operand 0 "ldstex_memory_operand"))
-
-(define_memory_constraint "w"
-  "A memory operand suitable for load/store IO and cache instructions."
-  (match_operand 0 "ldstio_memory_operand"))
diff --git a/gcc/config/nios2/elf.h b/gcc/config/nios2/elf.h
deleted file mode 100644
index 44664c9..0000000
--- a/gcc/config/nios2/elf.h
+++ /dev/null
@@ -1,61 +0,0 @@
-/* Definitions of ELF target support for Altera Nios II.
-   Copyright (C) 2012-2024 Free Software Foundation, Inc.
-   Contributed by Jonah Graham (jgraham@altera.com), 
-   Will Reece (wreece@altera.com), and Jeff DaSilva (jdasilva@altera.com).
-   Contributed by Mentor Graphics, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with GCC; see the file COPYING3.  If not see
-   <http://www.gnu.org/licenses/>.  */
-
-
-/* Specs to support the additional command-line options for Nios II ELF
-   toolchains.  */
-
-/* -msmallc chooses an alternate C library.
-   -msys-lib= specifies an additional low-level system/hosting library and
-   is typically used to suck in a library provided by a HAL BSP.  */
-#undef LIB_SPEC
-#define LIB_SPEC \
-"--start-group %{msmallc: -lsmallc} %{!msmallc: -lc} -lgcc \
- %{msys-lib=*: -l%*} \
- --end-group \
-"
-
-/* Linking with -mhal suppresses inclusion of the GCC-provided crt* begin/end
-   code.  Normally in this case you also link with -msys-crt0= to specify
-   the startup code provided by the HAL BSP instead.  */
-#undef STARTFILE_SPEC
-#define STARTFILE_SPEC						\
-  "%{mhal:"							\
-  "%{msys-crt0=*:%*} %{!msys-crt0=*:crt0%O%s} "			\
-  "%{msys-crt0=:%eYou need a C startup file for -msys-crt0=};"	\
-  ":crti%O%s crtbegin%O%s}"
-
-#undef  ENDFILE_SPEC
-#define ENDFILE_SPEC "%{!mhal:crtend%O%s crtn%O%s}"
-
-/* The ELF target doesn't support the Nios II Linux ABI.  */
-#define TARGET_LINUX_ABI 0
-
-/* Default -fdelete-null-pointer-checks to off, to prevent the compiler
-   from treating accesses to address zero as traps.  On bare-metal Nios II
-   targets address zero may legitimately be mapped to memory (e.g., the
-   hardware description may specify this as the address of the interrupt
-   vector).  Users can override this on the command line to get the
-   additional optimizations it enables.  */
-#define SUBTARGET_OVERRIDE_OPTIONS 		\
-  if (flag_delete_null_pointer_checks < 0)	\
-    flag_delete_null_pointer_checks = 0
diff --git a/gcc/config/nios2/elf.opt b/gcc/config/nios2/elf.opt
deleted file mode 100644
index 91af4fa..0000000
--- a/gcc/config/nios2/elf.opt
+++ /dev/null
@@ -1,38 +0,0 @@
-; Options for the Altera Nios II port of the compiler.
-; Copyright (C) 2012-2024 Free Software Foundation, Inc.
-; Contributed by Altera and Mentor Graphics, Inc.
-;
-; This file is part of GCC.
-;
-; GCC is free software; you can redistribute it and/or modify
-; it under the terms of the GNU General Public License as published by
-; the Free Software Foundation; either version 3, or (at your option)
-; any later version.
-;
-; GCC is distributed in the hope that it will be useful,
-; but WITHOUT ANY WARRANTY; without even the implied warranty of
-; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-; GNU General Public License for more details.
-;
-; You should have received a copy of the GNU General Public License
-; along with GCC; see the file COPYING3.  If not see
-; <http://www.gnu.org/licenses/>.
-
-; These additional options are supported for ELF (bare-metal) Nios II
-; toolchains.
-
-msmallc
-Target RejectNegative
-Link with a limited version of the C library.
-
-msys-lib=
-Target RejectNegative Joined Var(nios2_sys_lib_string)
-Name of system library to link against.
-
-msys-crt0=
-Target RejectNegative Joined Var(nios2_sys_crt0_string)
-Name of the startfile.
-
-mhal
-Target RejectNegative
-Link with HAL BSP.
diff --git a/gcc/config/nios2/elf.opt.urls b/gcc/config/nios2/elf.opt.urls
deleted file mode 100644
index 1367c6b..0000000
--- a/gcc/config/nios2/elf.opt.urls
+++ /dev/null
@@ -1,14 +0,0 @@
-; Autogenerated by regenerate-opt-urls.py from gcc/config/nios2/elf.opt and generated HTML
-
-msmallc
-UrlSuffix(gcc/Nios-II-Options.html#index-msmallc)
-
-msys-lib=
-UrlSuffix(gcc/Nios-II-Options.html#index-msys-lib)
-
-msys-crt0=
-UrlSuffix(gcc/Nios-II-Options.html#index-msys-crt0)
-
-mhal
-UrlSuffix(gcc/Nios-II-Options.html#index-mhal)
-
diff --git a/gcc/config/nios2/ldstwm.md b/gcc/config/nios2/ldstwm.md
deleted file mode 100644
index 1aeea6b..0000000
--- a/gcc/config/nios2/ldstwm.md
+++ /dev/null
@@ -1,4190 +0,0 @@
-/* Nios II R2 CDX ldwm/stwm/push.h/pop.n instruction patterns.
-   This file was automatically generated using nios2-ldstwm.sml.
-   Please do not edit manually.
-
-   Copyright (C) 2014-2024 Free Software Foundation, Inc.
-   Contributed by Mentor Graphics.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   You should have received a copy of the GNU General Public License and
-   a copy of the GCC Runtime Library Exception along with this program;
-   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-   <http://www.gnu.org/licenses/>.  */
-
-(define_insn "*cdx_push_ra_fp"
-  [(match_parallel 0 ""
-    [(set (reg:SI SP_REGNO)
-          (plus:SI (reg:SI SP_REGNO) (match_operand 1 "const_int_operand" "")))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -4))) (reg:SI RA_REGNO))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -8))) (reg:SI FP_REGNO))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 3
-    && (-INTVAL (operands[1]) & 3) == 0
-    && (-INTVAL (operands[1]) - 8) <= 60"
-{
-  operands[2] = GEN_INT (-INTVAL (operands[1]) - 8);
-  return "push.n\\t{ra, fp}, %2";
-}
-  [(set_attr "type" "push")])
-
-(define_insn "*cdx_push_ra"
-  [(match_parallel 0 ""
-    [(set (reg:SI SP_REGNO)
-          (plus:SI (reg:SI SP_REGNO) (match_operand 1 "const_int_operand" "")))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -4))) (reg:SI RA_REGNO))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 2
-    && (-INTVAL (operands[1]) & 3) == 0
-    && (-INTVAL (operands[1]) - 4) <= 60"
-{
-  operands[2] = GEN_INT (-INTVAL (operands[1]) - 4);
-  return "push.n\\t{ra}, %2";
-}
-  [(set_attr "type" "push")])
-
-(define_insn "*cdx_pop_fp_ra"
-  [(match_parallel 0 "pop_operation"
-    [(return)
-     (set (reg:SI SP_REGNO)
-          (plus:SI (reg:SI SP_REGNO) (match_operand 1 "const_int_operand" "")))
-     (set (reg:SI RA_REGNO) (match_operand:SI 2 "stack_memory_operand" ""))
-     (set (reg:SI FP_REGNO) (match_operand:SI 3 "stack_memory_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 4"
-{
-  rtx x = XEXP (operands[3], 0);
-  operands[3] = REG_P (x) ? const0_rtx : XEXP (x, 1);
-  return "pop.n\\t{fp, ra}, %3";
-}
-  [(set_attr "type" "pop")])
-
-(define_insn "*cdx_pop_ra"
-  [(match_parallel 0 "pop_operation"
-    [(return)
-     (set (reg:SI SP_REGNO)
-          (plus:SI (reg:SI SP_REGNO) (match_operand 1 "const_int_operand" "")))
-     (set (reg:SI RA_REGNO) (match_operand:SI 2 "stack_memory_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 3"
-{
-  rtx x = XEXP (operands[2], 0);
-  operands[2] = REG_P (x) ? const0_rtx : XEXP (x, 1);
-  return "pop.n\\t{ra}, %2";
-}
-  [(set_attr "type" "pop")])
-
-(define_insn "*cdx_push_ra_fp_r16"
-  [(match_parallel 0 ""
-    [(set (reg:SI SP_REGNO)
-          (plus:SI (reg:SI SP_REGNO) (match_operand 1 "const_int_operand" "")))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -4))) (reg:SI RA_REGNO))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -8))) (reg:SI FP_REGNO))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -12))) (reg:SI 16))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 4
-    && (-INTVAL (operands[1]) & 3) == 0
-    && (-INTVAL (operands[1]) - 12) <= 60"
-{
-  operands[2] = GEN_INT (-INTVAL (operands[1]) - 12);
-  return "push.n\\t{ra, fp, r16}, %2";
-}
-  [(set_attr "type" "push")])
-
-(define_insn "*cdx_push_ra_r16"
-  [(match_parallel 0 ""
-    [(set (reg:SI SP_REGNO)
-          (plus:SI (reg:SI SP_REGNO) (match_operand 1 "const_int_operand" "")))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -4))) (reg:SI RA_REGNO))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -8))) (reg:SI 16))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 3
-    && (-INTVAL (operands[1]) & 3) == 0
-    && (-INTVAL (operands[1]) - 8) <= 60"
-{
-  operands[2] = GEN_INT (-INTVAL (operands[1]) - 8);
-  return "push.n\\t{ra, r16}, %2";
-}
-  [(set_attr "type" "push")])
-
-(define_insn "*cdx_pop_r16_fp_ra"
-  [(match_parallel 0 "pop_operation"
-    [(return)
-     (set (reg:SI SP_REGNO)
-          (plus:SI (reg:SI SP_REGNO) (match_operand 1 "const_int_operand" "")))
-     (set (reg:SI RA_REGNO) (match_operand:SI 2 "stack_memory_operand" ""))
-     (set (reg:SI FP_REGNO) (match_operand:SI 3 "stack_memory_operand" ""))
-     (set (reg:SI 16) (match_operand:SI 4 "stack_memory_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 5"
-{
-  rtx x = XEXP (operands[4], 0);
-  operands[4] = REG_P (x) ? const0_rtx : XEXP (x, 1);
-  return "pop.n\\t{r16, fp, ra}, %4";
-}
-  [(set_attr "type" "pop")])
-
-(define_insn "*cdx_pop_r16_ra"
-  [(match_parallel 0 "pop_operation"
-    [(return)
-     (set (reg:SI SP_REGNO)
-          (plus:SI (reg:SI SP_REGNO) (match_operand 1 "const_int_operand" "")))
-     (set (reg:SI RA_REGNO) (match_operand:SI 2 "stack_memory_operand" ""))
-     (set (reg:SI 16) (match_operand:SI 3 "stack_memory_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 4"
-{
-  rtx x = XEXP (operands[3], 0);
-  operands[3] = REG_P (x) ? const0_rtx : XEXP (x, 1);
-  return "pop.n\\t{r16, ra}, %3";
-}
-  [(set_attr "type" "pop")])
-
-(define_insn "*cdx_push_ra_fp_r17_r16"
-  [(match_parallel 0 ""
-    [(set (reg:SI SP_REGNO)
-          (plus:SI (reg:SI SP_REGNO) (match_operand 1 "const_int_operand" "")))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -4))) (reg:SI RA_REGNO))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -8))) (reg:SI FP_REGNO))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -12))) (reg:SI 17))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -16))) (reg:SI 16))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 5
-    && (-INTVAL (operands[1]) & 3) == 0
-    && (-INTVAL (operands[1]) - 16) <= 60"
-{
-  operands[2] = GEN_INT (-INTVAL (operands[1]) - 16);
-  return "push.n\\t{ra, fp, r17, r16}, %2";
-}
-  [(set_attr "type" "push")])
-
-(define_insn "*cdx_push_ra_r17_r16"
-  [(match_parallel 0 ""
-    [(set (reg:SI SP_REGNO)
-          (plus:SI (reg:SI SP_REGNO) (match_operand 1 "const_int_operand" "")))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -4))) (reg:SI RA_REGNO))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -8))) (reg:SI 17))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -12))) (reg:SI 16))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 4
-    && (-INTVAL (operands[1]) & 3) == 0
-    && (-INTVAL (operands[1]) - 12) <= 60"
-{
-  operands[2] = GEN_INT (-INTVAL (operands[1]) - 12);
-  return "push.n\\t{ra, r17, r16}, %2";
-}
-  [(set_attr "type" "push")])
-
-(define_insn "*cdx_pop_r16_r17_fp_ra"
-  [(match_parallel 0 "pop_operation"
-    [(return)
-     (set (reg:SI SP_REGNO)
-          (plus:SI (reg:SI SP_REGNO) (match_operand 1 "const_int_operand" "")))
-     (set (reg:SI RA_REGNO) (match_operand:SI 2 "stack_memory_operand" ""))
-     (set (reg:SI FP_REGNO) (match_operand:SI 3 "stack_memory_operand" ""))
-     (set (reg:SI 17) (match_operand:SI 4 "stack_memory_operand" ""))
-     (set (reg:SI 16) (match_operand:SI 5 "stack_memory_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 6"
-{
-  rtx x = XEXP (operands[5], 0);
-  operands[5] = REG_P (x) ? const0_rtx : XEXP (x, 1);
-  return "pop.n\\t{r16, r17, fp, ra}, %5";
-}
-  [(set_attr "type" "pop")])
-
-(define_insn "*cdx_pop_r16_r17_ra"
-  [(match_parallel 0 "pop_operation"
-    [(return)
-     (set (reg:SI SP_REGNO)
-          (plus:SI (reg:SI SP_REGNO) (match_operand 1 "const_int_operand" "")))
-     (set (reg:SI RA_REGNO) (match_operand:SI 2 "stack_memory_operand" ""))
-     (set (reg:SI 17) (match_operand:SI 3 "stack_memory_operand" ""))
-     (set (reg:SI 16) (match_operand:SI 4 "stack_memory_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 5"
-{
-  rtx x = XEXP (operands[4], 0);
-  operands[4] = REG_P (x) ? const0_rtx : XEXP (x, 1);
-  return "pop.n\\t{r16, r17, ra}, %4";
-}
-  [(set_attr "type" "pop")])
-
-(define_insn "*cdx_push_ra_fp_r18_r17_r16"
-  [(match_parallel 0 ""
-    [(set (reg:SI SP_REGNO)
-          (plus:SI (reg:SI SP_REGNO) (match_operand 1 "const_int_operand" "")))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -4))) (reg:SI RA_REGNO))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -8))) (reg:SI FP_REGNO))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -12))) (reg:SI 18))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -16))) (reg:SI 17))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -20))) (reg:SI 16))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 6
-    && (-INTVAL (operands[1]) & 3) == 0
-    && (-INTVAL (operands[1]) - 20) <= 60"
-{
-  operands[2] = GEN_INT (-INTVAL (operands[1]) - 20);
-  return "push.n\\t{ra, fp, r18, r17, r16}, %2";
-}
-  [(set_attr "type" "push")])
-
-(define_insn "*cdx_push_ra_r18_r17_r16"
-  [(match_parallel 0 ""
-    [(set (reg:SI SP_REGNO)
-          (plus:SI (reg:SI SP_REGNO) (match_operand 1 "const_int_operand" "")))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -4))) (reg:SI RA_REGNO))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -8))) (reg:SI 18))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -12))) (reg:SI 17))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -16))) (reg:SI 16))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 5
-    && (-INTVAL (operands[1]) & 3) == 0
-    && (-INTVAL (operands[1]) - 16) <= 60"
-{
-  operands[2] = GEN_INT (-INTVAL (operands[1]) - 16);
-  return "push.n\\t{ra, r18, r17, r16}, %2";
-}
-  [(set_attr "type" "push")])
-
-(define_insn "*cdx_pop_r16_r17_r18_fp_ra"
-  [(match_parallel 0 "pop_operation"
-    [(return)
-     (set (reg:SI SP_REGNO)
-          (plus:SI (reg:SI SP_REGNO) (match_operand 1 "const_int_operand" "")))
-     (set (reg:SI RA_REGNO) (match_operand:SI 2 "stack_memory_operand" ""))
-     (set (reg:SI FP_REGNO) (match_operand:SI 3 "stack_memory_operand" ""))
-     (set (reg:SI 18) (match_operand:SI 4 "stack_memory_operand" ""))
-     (set (reg:SI 17) (match_operand:SI 5 "stack_memory_operand" ""))
-     (set (reg:SI 16) (match_operand:SI 6 "stack_memory_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 7"
-{
-  rtx x = XEXP (operands[6], 0);
-  operands[6] = REG_P (x) ? const0_rtx : XEXP (x, 1);
-  return "pop.n\\t{r16, r17, r18, fp, ra}, %6";
-}
-  [(set_attr "type" "pop")])
-
-(define_insn "*cdx_pop_r16_r17_r18_ra"
-  [(match_parallel 0 "pop_operation"
-    [(return)
-     (set (reg:SI SP_REGNO)
-          (plus:SI (reg:SI SP_REGNO) (match_operand 1 "const_int_operand" "")))
-     (set (reg:SI RA_REGNO) (match_operand:SI 2 "stack_memory_operand" ""))
-     (set (reg:SI 18) (match_operand:SI 3 "stack_memory_operand" ""))
-     (set (reg:SI 17) (match_operand:SI 4 "stack_memory_operand" ""))
-     (set (reg:SI 16) (match_operand:SI 5 "stack_memory_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 6"
-{
-  rtx x = XEXP (operands[5], 0);
-  operands[5] = REG_P (x) ? const0_rtx : XEXP (x, 1);
-  return "pop.n\\t{r16, r17, r18, ra}, %5";
-}
-  [(set_attr "type" "pop")])
-
-(define_insn "*cdx_push_ra_fp_r19_r18_r17_r16"
-  [(match_parallel 0 ""
-    [(set (reg:SI SP_REGNO)
-          (plus:SI (reg:SI SP_REGNO) (match_operand 1 "const_int_operand" "")))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -4))) (reg:SI RA_REGNO))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -8))) (reg:SI FP_REGNO))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -12))) (reg:SI 19))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -16))) (reg:SI 18))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -20))) (reg:SI 17))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -24))) (reg:SI 16))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 7
-    && (-INTVAL (operands[1]) & 3) == 0
-    && (-INTVAL (operands[1]) - 24) <= 60"
-{
-  operands[2] = GEN_INT (-INTVAL (operands[1]) - 24);
-  return "push.n\\t{ra, fp, r19, r18, r17, r16}, %2";
-}
-  [(set_attr "type" "push")])
-
-(define_insn "*cdx_push_ra_r19_r18_r17_r16"
-  [(match_parallel 0 ""
-    [(set (reg:SI SP_REGNO)
-          (plus:SI (reg:SI SP_REGNO) (match_operand 1 "const_int_operand" "")))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -4))) (reg:SI RA_REGNO))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -8))) (reg:SI 19))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -12))) (reg:SI 18))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -16))) (reg:SI 17))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -20))) (reg:SI 16))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 6
-    && (-INTVAL (operands[1]) & 3) == 0
-    && (-INTVAL (operands[1]) - 20) <= 60"
-{
-  operands[2] = GEN_INT (-INTVAL (operands[1]) - 20);
-  return "push.n\\t{ra, r19, r18, r17, r16}, %2";
-}
-  [(set_attr "type" "push")])
-
-(define_insn "*cdx_pop_r16_r17_r18_r19_fp_ra"
-  [(match_parallel 0 "pop_operation"
-    [(return)
-     (set (reg:SI SP_REGNO)
-          (plus:SI (reg:SI SP_REGNO) (match_operand 1 "const_int_operand" "")))
-     (set (reg:SI RA_REGNO) (match_operand:SI 2 "stack_memory_operand" ""))
-     (set (reg:SI FP_REGNO) (match_operand:SI 3 "stack_memory_operand" ""))
-     (set (reg:SI 19) (match_operand:SI 4 "stack_memory_operand" ""))
-     (set (reg:SI 18) (match_operand:SI 5 "stack_memory_operand" ""))
-     (set (reg:SI 17) (match_operand:SI 6 "stack_memory_operand" ""))
-     (set (reg:SI 16) (match_operand:SI 7 "stack_memory_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 8"
-{
-  rtx x = XEXP (operands[7], 0);
-  operands[7] = REG_P (x) ? const0_rtx : XEXP (x, 1);
-  return "pop.n\\t{r16, r17, r18, r19, fp, ra}, %7";
-}
-  [(set_attr "type" "pop")])
-
-(define_insn "*cdx_pop_r16_r17_r18_r19_ra"
-  [(match_parallel 0 "pop_operation"
-    [(return)
-     (set (reg:SI SP_REGNO)
-          (plus:SI (reg:SI SP_REGNO) (match_operand 1 "const_int_operand" "")))
-     (set (reg:SI RA_REGNO) (match_operand:SI 2 "stack_memory_operand" ""))
-     (set (reg:SI 19) (match_operand:SI 3 "stack_memory_operand" ""))
-     (set (reg:SI 18) (match_operand:SI 4 "stack_memory_operand" ""))
-     (set (reg:SI 17) (match_operand:SI 5 "stack_memory_operand" ""))
-     (set (reg:SI 16) (match_operand:SI 6 "stack_memory_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 7"
-{
-  rtx x = XEXP (operands[6], 0);
-  operands[6] = REG_P (x) ? const0_rtx : XEXP (x, 1);
-  return "pop.n\\t{r16, r17, r18, r19, ra}, %6";
-}
-  [(set_attr "type" "pop")])
-
-(define_insn "*cdx_push_ra_fp_r20_r19_r18_r17_r16"
-  [(match_parallel 0 ""
-    [(set (reg:SI SP_REGNO)
-          (plus:SI (reg:SI SP_REGNO) (match_operand 1 "const_int_operand" "")))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -4))) (reg:SI RA_REGNO))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -8))) (reg:SI FP_REGNO))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -12))) (reg:SI 20))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -16))) (reg:SI 19))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -20))) (reg:SI 18))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -24))) (reg:SI 17))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -28))) (reg:SI 16))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 8
-    && (-INTVAL (operands[1]) & 3) == 0
-    && (-INTVAL (operands[1]) - 28) <= 60"
-{
-  operands[2] = GEN_INT (-INTVAL (operands[1]) - 28);
-  return "push.n\\t{ra, fp, r20, r19, r18, r17, r16}, %2";
-}
-  [(set_attr "type" "push")])
-
-(define_insn "*cdx_push_ra_r20_r19_r18_r17_r16"
-  [(match_parallel 0 ""
-    [(set (reg:SI SP_REGNO)
-          (plus:SI (reg:SI SP_REGNO) (match_operand 1 "const_int_operand" "")))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -4))) (reg:SI RA_REGNO))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -8))) (reg:SI 20))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -12))) (reg:SI 19))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -16))) (reg:SI 18))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -20))) (reg:SI 17))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -24))) (reg:SI 16))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 7
-    && (-INTVAL (operands[1]) & 3) == 0
-    && (-INTVAL (operands[1]) - 24) <= 60"
-{
-  operands[2] = GEN_INT (-INTVAL (operands[1]) - 24);
-  return "push.n\\t{ra, r20, r19, r18, r17, r16}, %2";
-}
-  [(set_attr "type" "push")])
-
-(define_insn "*cdx_pop_r16_r17_r18_r19_r20_fp_ra"
-  [(match_parallel 0 "pop_operation"
-    [(return)
-     (set (reg:SI SP_REGNO)
-          (plus:SI (reg:SI SP_REGNO) (match_operand 1 "const_int_operand" "")))
-     (set (reg:SI RA_REGNO) (match_operand:SI 2 "stack_memory_operand" ""))
-     (set (reg:SI FP_REGNO) (match_operand:SI 3 "stack_memory_operand" ""))
-     (set (reg:SI 20) (match_operand:SI 4 "stack_memory_operand" ""))
-     (set (reg:SI 19) (match_operand:SI 5 "stack_memory_operand" ""))
-     (set (reg:SI 18) (match_operand:SI 6 "stack_memory_operand" ""))
-     (set (reg:SI 17) (match_operand:SI 7 "stack_memory_operand" ""))
-     (set (reg:SI 16) (match_operand:SI 8 "stack_memory_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 9"
-{
-  rtx x = XEXP (operands[8], 0);
-  operands[8] = REG_P (x) ? const0_rtx : XEXP (x, 1);
-  return "pop.n\\t{r16, r17, r18, r19, r20, fp, ra}, %8";
-}
-  [(set_attr "type" "pop")])
-
-(define_insn "*cdx_pop_r16_r17_r18_r19_r20_ra"
-  [(match_parallel 0 "pop_operation"
-    [(return)
-     (set (reg:SI SP_REGNO)
-          (plus:SI (reg:SI SP_REGNO) (match_operand 1 "const_int_operand" "")))
-     (set (reg:SI RA_REGNO) (match_operand:SI 2 "stack_memory_operand" ""))
-     (set (reg:SI 20) (match_operand:SI 3 "stack_memory_operand" ""))
-     (set (reg:SI 19) (match_operand:SI 4 "stack_memory_operand" ""))
-     (set (reg:SI 18) (match_operand:SI 5 "stack_memory_operand" ""))
-     (set (reg:SI 17) (match_operand:SI 6 "stack_memory_operand" ""))
-     (set (reg:SI 16) (match_operand:SI 7 "stack_memory_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 8"
-{
-  rtx x = XEXP (operands[7], 0);
-  operands[7] = REG_P (x) ? const0_rtx : XEXP (x, 1);
-  return "pop.n\\t{r16, r17, r18, r19, r20, ra}, %7";
-}
-  [(set_attr "type" "pop")])
-
-(define_insn "*cdx_push_ra_fp_r21_r20_r19_r18_r17_r16"
-  [(match_parallel 0 ""
-    [(set (reg:SI SP_REGNO)
-          (plus:SI (reg:SI SP_REGNO) (match_operand 1 "const_int_operand" "")))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -4))) (reg:SI RA_REGNO))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -8))) (reg:SI FP_REGNO))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -12))) (reg:SI 21))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -16))) (reg:SI 20))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -20))) (reg:SI 19))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -24))) (reg:SI 18))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -28))) (reg:SI 17))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -32))) (reg:SI 16))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 9
-    && (-INTVAL (operands[1]) & 3) == 0
-    && (-INTVAL (operands[1]) - 32) <= 60"
-{
-  operands[2] = GEN_INT (-INTVAL (operands[1]) - 32);
-  return "push.n\\t{ra, fp, r21, r20, r19, r18, r17, r16}, %2";
-}
-  [(set_attr "type" "push")])
-
-(define_insn "*cdx_push_ra_r21_r20_r19_r18_r17_r16"
-  [(match_parallel 0 ""
-    [(set (reg:SI SP_REGNO)
-          (plus:SI (reg:SI SP_REGNO) (match_operand 1 "const_int_operand" "")))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -4))) (reg:SI RA_REGNO))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -8))) (reg:SI 21))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -12))) (reg:SI 20))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -16))) (reg:SI 19))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -20))) (reg:SI 18))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -24))) (reg:SI 17))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -28))) (reg:SI 16))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 8
-    && (-INTVAL (operands[1]) & 3) == 0
-    && (-INTVAL (operands[1]) - 28) <= 60"
-{
-  operands[2] = GEN_INT (-INTVAL (operands[1]) - 28);
-  return "push.n\\t{ra, r21, r20, r19, r18, r17, r16}, %2";
-}
-  [(set_attr "type" "push")])
-
-(define_insn "*cdx_pop_r16_r17_r18_r19_r20_r21_fp_ra"
-  [(match_parallel 0 "pop_operation"
-    [(return)
-     (set (reg:SI SP_REGNO)
-          (plus:SI (reg:SI SP_REGNO) (match_operand 1 "const_int_operand" "")))
-     (set (reg:SI RA_REGNO) (match_operand:SI 2 "stack_memory_operand" ""))
-     (set (reg:SI FP_REGNO) (match_operand:SI 3 "stack_memory_operand" ""))
-     (set (reg:SI 21) (match_operand:SI 4 "stack_memory_operand" ""))
-     (set (reg:SI 20) (match_operand:SI 5 "stack_memory_operand" ""))
-     (set (reg:SI 19) (match_operand:SI 6 "stack_memory_operand" ""))
-     (set (reg:SI 18) (match_operand:SI 7 "stack_memory_operand" ""))
-     (set (reg:SI 17) (match_operand:SI 8 "stack_memory_operand" ""))
-     (set (reg:SI 16) (match_operand:SI 9 "stack_memory_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 10"
-{
-  rtx x = XEXP (operands[9], 0);
-  operands[9] = REG_P (x) ? const0_rtx : XEXP (x, 1);
-  return "pop.n\\t{r16, r17, r18, r19, r20, r21, fp, ra}, %9";
-}
-  [(set_attr "type" "pop")])
-
-(define_insn "*cdx_pop_r16_r17_r18_r19_r20_r21_ra"
-  [(match_parallel 0 "pop_operation"
-    [(return)
-     (set (reg:SI SP_REGNO)
-          (plus:SI (reg:SI SP_REGNO) (match_operand 1 "const_int_operand" "")))
-     (set (reg:SI RA_REGNO) (match_operand:SI 2 "stack_memory_operand" ""))
-     (set (reg:SI 21) (match_operand:SI 3 "stack_memory_operand" ""))
-     (set (reg:SI 20) (match_operand:SI 4 "stack_memory_operand" ""))
-     (set (reg:SI 19) (match_operand:SI 5 "stack_memory_operand" ""))
-     (set (reg:SI 18) (match_operand:SI 6 "stack_memory_operand" ""))
-     (set (reg:SI 17) (match_operand:SI 7 "stack_memory_operand" ""))
-     (set (reg:SI 16) (match_operand:SI 8 "stack_memory_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 9"
-{
-  rtx x = XEXP (operands[8], 0);
-  operands[8] = REG_P (x) ? const0_rtx : XEXP (x, 1);
-  return "pop.n\\t{r16, r17, r18, r19, r20, r21, ra}, %8";
-}
-  [(set_attr "type" "pop")])
-
-(define_insn "*cdx_push_ra_fp_r22_r21_r20_r19_r18_r17_r16"
-  [(match_parallel 0 ""
-    [(set (reg:SI SP_REGNO)
-          (plus:SI (reg:SI SP_REGNO) (match_operand 1 "const_int_operand" "")))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -4))) (reg:SI RA_REGNO))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -8))) (reg:SI FP_REGNO))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -12))) (reg:SI 22))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -16))) (reg:SI 21))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -20))) (reg:SI 20))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -24))) (reg:SI 19))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -28))) (reg:SI 18))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -32))) (reg:SI 17))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -36))) (reg:SI 16))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 10
-    && (-INTVAL (operands[1]) & 3) == 0
-    && (-INTVAL (operands[1]) - 36) <= 60"
-{
-  operands[2] = GEN_INT (-INTVAL (operands[1]) - 36);
-  return "push.n\\t{ra, fp, r22, r21, r20, r19, r18, r17, r16}, %2";
-}
-  [(set_attr "type" "push")])
-
-(define_insn "*cdx_push_ra_r22_r21_r20_r19_r18_r17_r16"
-  [(match_parallel 0 ""
-    [(set (reg:SI SP_REGNO)
-          (plus:SI (reg:SI SP_REGNO) (match_operand 1 "const_int_operand" "")))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -4))) (reg:SI RA_REGNO))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -8))) (reg:SI 22))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -12))) (reg:SI 21))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -16))) (reg:SI 20))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -20))) (reg:SI 19))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -24))) (reg:SI 18))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -28))) (reg:SI 17))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -32))) (reg:SI 16))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 9
-    && (-INTVAL (operands[1]) & 3) == 0
-    && (-INTVAL (operands[1]) - 32) <= 60"
-{
-  operands[2] = GEN_INT (-INTVAL (operands[1]) - 32);
-  return "push.n\\t{ra, r22, r21, r20, r19, r18, r17, r16}, %2";
-}
-  [(set_attr "type" "push")])
-
-(define_insn "*cdx_pop_r16_r17_r18_r19_r20_r21_r22_fp_ra"
-  [(match_parallel 0 "pop_operation"
-    [(return)
-     (set (reg:SI SP_REGNO)
-          (plus:SI (reg:SI SP_REGNO) (match_operand 1 "const_int_operand" "")))
-     (set (reg:SI RA_REGNO) (match_operand:SI 2 "stack_memory_operand" ""))
-     (set (reg:SI FP_REGNO) (match_operand:SI 3 "stack_memory_operand" ""))
-     (set (reg:SI 22) (match_operand:SI 4 "stack_memory_operand" ""))
-     (set (reg:SI 21) (match_operand:SI 5 "stack_memory_operand" ""))
-     (set (reg:SI 20) (match_operand:SI 6 "stack_memory_operand" ""))
-     (set (reg:SI 19) (match_operand:SI 7 "stack_memory_operand" ""))
-     (set (reg:SI 18) (match_operand:SI 8 "stack_memory_operand" ""))
-     (set (reg:SI 17) (match_operand:SI 9 "stack_memory_operand" ""))
-     (set (reg:SI 16) (match_operand:SI 10 "stack_memory_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 11"
-{
-  rtx x = XEXP (operands[10], 0);
-  operands[10] = REG_P (x) ? const0_rtx : XEXP (x, 1);
-  return "pop.n\\t{r16, r17, r18, r19, r20, r21, r22, fp, ra}, %10";
-}
-  [(set_attr "type" "pop")])
-
-(define_insn "*cdx_pop_r16_r17_r18_r19_r20_r21_r22_ra"
-  [(match_parallel 0 "pop_operation"
-    [(return)
-     (set (reg:SI SP_REGNO)
-          (plus:SI (reg:SI SP_REGNO) (match_operand 1 "const_int_operand" "")))
-     (set (reg:SI RA_REGNO) (match_operand:SI 2 "stack_memory_operand" ""))
-     (set (reg:SI 22) (match_operand:SI 3 "stack_memory_operand" ""))
-     (set (reg:SI 21) (match_operand:SI 4 "stack_memory_operand" ""))
-     (set (reg:SI 20) (match_operand:SI 5 "stack_memory_operand" ""))
-     (set (reg:SI 19) (match_operand:SI 6 "stack_memory_operand" ""))
-     (set (reg:SI 18) (match_operand:SI 7 "stack_memory_operand" ""))
-     (set (reg:SI 17) (match_operand:SI 8 "stack_memory_operand" ""))
-     (set (reg:SI 16) (match_operand:SI 9 "stack_memory_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 10"
-{
-  rtx x = XEXP (operands[9], 0);
-  operands[9] = REG_P (x) ? const0_rtx : XEXP (x, 1);
-  return "pop.n\\t{r16, r17, r18, r19, r20, r21, r22, ra}, %9";
-}
-  [(set_attr "type" "pop")])
-
-(define_insn "*cdx_push_ra_fp_r23_r22_r21_r20_r19_r18_r17_r16"
-  [(match_parallel 0 ""
-    [(set (reg:SI SP_REGNO)
-          (plus:SI (reg:SI SP_REGNO) (match_operand 1 "const_int_operand" "")))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -4))) (reg:SI RA_REGNO))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -8))) (reg:SI FP_REGNO))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -12))) (reg:SI 23))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -16))) (reg:SI 22))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -20))) (reg:SI 21))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -24))) (reg:SI 20))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -28))) (reg:SI 19))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -32))) (reg:SI 18))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -36))) (reg:SI 17))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -40))) (reg:SI 16))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 11
-    && (-INTVAL (operands[1]) & 3) == 0
-    && (-INTVAL (operands[1]) - 40) <= 60"
-{
-  operands[2] = GEN_INT (-INTVAL (operands[1]) - 40);
-  return "push.n\\t{ra, fp, r23, r22, r21, r20, r19, r18, r17, r16}, %2";
-}
-  [(set_attr "type" "push")])
-
-(define_insn "*cdx_push_ra_r23_r22_r21_r20_r19_r18_r17_r16"
-  [(match_parallel 0 ""
-    [(set (reg:SI SP_REGNO)
-          (plus:SI (reg:SI SP_REGNO) (match_operand 1 "const_int_operand" "")))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -4))) (reg:SI RA_REGNO))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -8))) (reg:SI 23))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -12))) (reg:SI 22))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -16))) (reg:SI 21))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -20))) (reg:SI 20))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -24))) (reg:SI 19))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -28))) (reg:SI 18))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -32))) (reg:SI 17))
-     (set (mem:SI (plus:SI (reg:SI SP_REGNO) (const_int -36))) (reg:SI 16))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 10
-    && (-INTVAL (operands[1]) & 3) == 0
-    && (-INTVAL (operands[1]) - 36) <= 60"
-{
-  operands[2] = GEN_INT (-INTVAL (operands[1]) - 36);
-  return "push.n\\t{ra, r23, r22, r21, r20, r19, r18, r17, r16}, %2";
-}
-  [(set_attr "type" "push")])
-
-(define_insn "*cdx_pop_r16_r17_r18_r19_r20_r21_r22_r23_fp_ra"
-  [(match_parallel 0 "pop_operation"
-    [(return)
-     (set (reg:SI SP_REGNO)
-          (plus:SI (reg:SI SP_REGNO) (match_operand 1 "const_int_operand" "")))
-     (set (reg:SI RA_REGNO) (match_operand:SI 2 "stack_memory_operand" ""))
-     (set (reg:SI FP_REGNO) (match_operand:SI 3 "stack_memory_operand" ""))
-     (set (reg:SI 23) (match_operand:SI 4 "stack_memory_operand" ""))
-     (set (reg:SI 22) (match_operand:SI 5 "stack_memory_operand" ""))
-     (set (reg:SI 21) (match_operand:SI 6 "stack_memory_operand" ""))
-     (set (reg:SI 20) (match_operand:SI 7 "stack_memory_operand" ""))
-     (set (reg:SI 19) (match_operand:SI 8 "stack_memory_operand" ""))
-     (set (reg:SI 18) (match_operand:SI 9 "stack_memory_operand" ""))
-     (set (reg:SI 17) (match_operand:SI 10 "stack_memory_operand" ""))
-     (set (reg:SI 16) (match_operand:SI 11 "stack_memory_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 12"
-{
-  rtx x = XEXP (operands[11], 0);
-  operands[11] = REG_P (x) ? const0_rtx : XEXP (x, 1);
-  return "pop.n\\t{r16, r17, r18, r19, r20, r21, r22, r23, fp, ra}, %11";
-}
-  [(set_attr "type" "pop")])
-
-(define_insn "*cdx_pop_r16_r17_r18_r19_r20_r21_r22_r23_ra"
-  [(match_parallel 0 "pop_operation"
-    [(return)
-     (set (reg:SI SP_REGNO)
-          (plus:SI (reg:SI SP_REGNO) (match_operand 1 "const_int_operand" "")))
-     (set (reg:SI RA_REGNO) (match_operand:SI 2 "stack_memory_operand" ""))
-     (set (reg:SI 23) (match_operand:SI 3 "stack_memory_operand" ""))
-     (set (reg:SI 22) (match_operand:SI 4 "stack_memory_operand" ""))
-     (set (reg:SI 21) (match_operand:SI 5 "stack_memory_operand" ""))
-     (set (reg:SI 20) (match_operand:SI 6 "stack_memory_operand" ""))
-     (set (reg:SI 19) (match_operand:SI 7 "stack_memory_operand" ""))
-     (set (reg:SI 18) (match_operand:SI 8 "stack_memory_operand" ""))
-     (set (reg:SI 17) (match_operand:SI 9 "stack_memory_operand" ""))
-     (set (reg:SI 16) (match_operand:SI 10 "stack_memory_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 11"
-{
-  rtx x = XEXP (operands[10], 0);
-  operands[10] = REG_P (x) ? const0_rtx : XEXP (x, 1);
-  return "pop.n\\t{r16, r17, r18, r19, r20, r21, r22, r23, ra}, %10";
-}
-  [(set_attr "type" "pop")])
-
-(define_insn "*cdx_ldwm1_inc_wb_ret"
-  [(match_parallel 0 "ldwm_operation"
-    [(return)
-     (set (match_operand:SI 2 "register_operand" "+&r")
-          (plus:SI (match_dup 2) (const_int 4)))
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (match_dup 2)))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 3"
-   "ldwm\\t{%1}, (%2)++, writeback, ret"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm1_inc_wb"
-  [(match_parallel 0 "ldwm_operation"
-    [(set (match_operand:SI 2 "register_operand" "+&r")
-          (plus:SI (match_dup 2) (const_int 4)))
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (match_dup 2)))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 2"
-   "ldwm\\t{%1}, (%2)++, writeback"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm1_inc_ret"
-  [(match_parallel 0 "ldwm_operation"
-    [(return)
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (match_operand:SI 2 "register_operand" "r")))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 2"
-   "ldwm\\t{%1}, (%2)++, ret"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm1_inc"
-  [(match_parallel 0 "ldwm_operation"
-    [(set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (match_operand:SI 2 "register_operand" "r")))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 1"
-   "ldwm\\t{%1}, (%2)++"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm1_dec_wb_ret"
-  [(match_parallel 0 "ldwm_operation"
-    [(return)
-     (set (match_operand:SI 2 "register_operand" "+&r")
-          (plus:SI (match_dup 2) (const_int -4)))
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 2) (const_int -4))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 3"
-   "ldwm\\t{%1}, --(%2), writeback, ret"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm1_dec_wb"
-  [(match_parallel 0 "ldwm_operation"
-    [(set (match_operand:SI 2 "register_operand" "+&r")
-          (plus:SI (match_dup 2) (const_int -4)))
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 2) (const_int -4))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 2"
-   "ldwm\\t{%1}, --(%2), writeback"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm1_dec_ret"
-  [(match_parallel 0 "ldwm_operation"
-    [(return)
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_operand:SI 2 "register_operand" "r") (const_int -4))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 2"
-   "ldwm\\t{%1}, --(%2), ret"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm1_dec"
-  [(match_parallel 0 "ldwm_operation"
-    [(set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_operand:SI 2 "register_operand" "r") (const_int -4))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 1"
-   "ldwm\\t{%1}, --(%2)"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm2_inc_wb_ret"
-  [(match_parallel 0 "ldwm_operation"
-    [(return)
-     (set (match_operand:SI 3 "register_operand" "+&r")
-          (plus:SI (match_dup 3) (const_int 8)))
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (match_dup 3)))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 3) (const_int 4))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 4"
-   "ldwm\\t{%1, %2}, (%3)++, writeback, ret"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm2_inc_wb"
-  [(match_parallel 0 "ldwm_operation"
-    [(set (match_operand:SI 3 "register_operand" "+&r")
-          (plus:SI (match_dup 3) (const_int 8)))
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (match_dup 3)))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 3) (const_int 4))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 3"
-   "ldwm\\t{%1, %2}, (%3)++, writeback"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm2_inc_ret"
-  [(match_parallel 0 "ldwm_operation"
-    [(return)
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (match_operand:SI 3 "register_operand" "r")))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 3) (const_int 4))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 3"
-   "ldwm\\t{%1, %2}, (%3)++, ret"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm2_inc"
-  [(match_parallel 0 "ldwm_operation"
-    [(set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (match_operand:SI 3 "register_operand" "r")))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 3) (const_int 4))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 2"
-   "ldwm\\t{%1, %2}, (%3)++"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm2_dec_wb_ret"
-  [(match_parallel 0 "ldwm_operation"
-    [(return)
-     (set (match_operand:SI 3 "register_operand" "+&r")
-          (plus:SI (match_dup 3) (const_int -8)))
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 3) (const_int -4))))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 3) (const_int -8))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 4"
-   "ldwm\\t{%1, %2}, --(%3), writeback, ret"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm2_dec_wb"
-  [(match_parallel 0 "ldwm_operation"
-    [(set (match_operand:SI 3 "register_operand" "+&r")
-          (plus:SI (match_dup 3) (const_int -8)))
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 3) (const_int -4))))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 3) (const_int -8))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 3"
-   "ldwm\\t{%1, %2}, --(%3), writeback"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm2_dec_ret"
-  [(match_parallel 0 "ldwm_operation"
-    [(return)
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_operand:SI 3 "register_operand" "r") (const_int -4))))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 3) (const_int -8))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 3"
-   "ldwm\\t{%1, %2}, --(%3), ret"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm2_dec"
-  [(match_parallel 0 "ldwm_operation"
-    [(set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_operand:SI 3 "register_operand" "r") (const_int -4))))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 3) (const_int -8))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 2"
-   "ldwm\\t{%1, %2}, --(%3)"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm3_inc_wb_ret"
-  [(match_parallel 0 "ldwm_operation"
-    [(return)
-     (set (match_operand:SI 4 "register_operand" "+&r")
-          (plus:SI (match_dup 4) (const_int 12)))
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (match_dup 4)))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 4) (const_int 4))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 4) (const_int 8))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 5"
-   "ldwm\\t{%1, %2, %3}, (%4)++, writeback, ret"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm3_inc_wb"
-  [(match_parallel 0 "ldwm_operation"
-    [(set (match_operand:SI 4 "register_operand" "+&r")
-          (plus:SI (match_dup 4) (const_int 12)))
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (match_dup 4)))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 4) (const_int 4))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 4) (const_int 8))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 4"
-   "ldwm\\t{%1, %2, %3}, (%4)++, writeback"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm3_inc_ret"
-  [(match_parallel 0 "ldwm_operation"
-    [(return)
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (match_operand:SI 4 "register_operand" "r")))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 4) (const_int 4))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 4) (const_int 8))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 4"
-   "ldwm\\t{%1, %2, %3}, (%4)++, ret"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm3_inc"
-  [(match_parallel 0 "ldwm_operation"
-    [(set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (match_operand:SI 4 "register_operand" "r")))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 4) (const_int 4))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 4) (const_int 8))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 3"
-   "ldwm\\t{%1, %2, %3}, (%4)++"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm3_dec_wb_ret"
-  [(match_parallel 0 "ldwm_operation"
-    [(return)
-     (set (match_operand:SI 4 "register_operand" "+&r")
-          (plus:SI (match_dup 4) (const_int -12)))
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 4) (const_int -4))))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 4) (const_int -8))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 4) (const_int -12))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 5"
-   "ldwm\\t{%1, %2, %3}, --(%4), writeback, ret"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm3_dec_wb"
-  [(match_parallel 0 "ldwm_operation"
-    [(set (match_operand:SI 4 "register_operand" "+&r")
-          (plus:SI (match_dup 4) (const_int -12)))
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 4) (const_int -4))))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 4) (const_int -8))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 4) (const_int -12))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 4"
-   "ldwm\\t{%1, %2, %3}, --(%4), writeback"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm3_dec_ret"
-  [(match_parallel 0 "ldwm_operation"
-    [(return)
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_operand:SI 4 "register_operand" "r") (const_int -4))))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 4) (const_int -8))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 4) (const_int -12))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 4"
-   "ldwm\\t{%1, %2, %3}, --(%4), ret"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm3_dec"
-  [(match_parallel 0 "ldwm_operation"
-    [(set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_operand:SI 4 "register_operand" "r") (const_int -4))))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 4) (const_int -8))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 4) (const_int -12))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 3"
-   "ldwm\\t{%1, %2, %3}, --(%4)"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm4_inc_wb_ret"
-  [(match_parallel 0 "ldwm_operation"
-    [(return)
-     (set (match_operand:SI 5 "register_operand" "+&r")
-          (plus:SI (match_dup 5) (const_int 16)))
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (match_dup 5)))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 5) (const_int 4))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 5) (const_int 8))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 5) (const_int 12))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 6"
-   "ldwm\\t{%1, %2, %3, %4}, (%5)++, writeback, ret"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm4_inc_wb"
-  [(match_parallel 0 "ldwm_operation"
-    [(set (match_operand:SI 5 "register_operand" "+&r")
-          (plus:SI (match_dup 5) (const_int 16)))
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (match_dup 5)))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 5) (const_int 4))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 5) (const_int 8))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 5) (const_int 12))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 5"
-   "ldwm\\t{%1, %2, %3, %4}, (%5)++, writeback"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm4_inc_ret"
-  [(match_parallel 0 "ldwm_operation"
-    [(return)
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (match_operand:SI 5 "register_operand" "r")))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 5) (const_int 4))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 5) (const_int 8))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 5) (const_int 12))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 5"
-   "ldwm\\t{%1, %2, %3, %4}, (%5)++, ret"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm4_inc"
-  [(match_parallel 0 "ldwm_operation"
-    [(set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (match_operand:SI 5 "register_operand" "r")))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 5) (const_int 4))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 5) (const_int 8))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 5) (const_int 12))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 4"
-   "ldwm\\t{%1, %2, %3, %4}, (%5)++"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm4_dec_wb_ret"
-  [(match_parallel 0 "ldwm_operation"
-    [(return)
-     (set (match_operand:SI 5 "register_operand" "+&r")
-          (plus:SI (match_dup 5) (const_int -16)))
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 5) (const_int -4))))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 5) (const_int -8))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 5) (const_int -12))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 5) (const_int -16))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 6"
-   "ldwm\\t{%1, %2, %3, %4}, --(%5), writeback, ret"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm4_dec_wb"
-  [(match_parallel 0 "ldwm_operation"
-    [(set (match_operand:SI 5 "register_operand" "+&r")
-          (plus:SI (match_dup 5) (const_int -16)))
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 5) (const_int -4))))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 5) (const_int -8))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 5) (const_int -12))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 5) (const_int -16))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 5"
-   "ldwm\\t{%1, %2, %3, %4}, --(%5), writeback"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm4_dec_ret"
-  [(match_parallel 0 "ldwm_operation"
-    [(return)
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_operand:SI 5 "register_operand" "r") (const_int -4))))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 5) (const_int -8))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 5) (const_int -12))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 5) (const_int -16))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 5"
-   "ldwm\\t{%1, %2, %3, %4}, --(%5), ret"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm4_dec"
-  [(match_parallel 0 "ldwm_operation"
-    [(set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_operand:SI 5 "register_operand" "r") (const_int -4))))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 5) (const_int -8))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 5) (const_int -12))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 5) (const_int -16))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 4"
-   "ldwm\\t{%1, %2, %3, %4}, --(%5)"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm5_inc_wb_ret"
-  [(match_parallel 0 "ldwm_operation"
-    [(return)
-     (set (match_operand:SI 6 "register_operand" "+&r")
-          (plus:SI (match_dup 6) (const_int 20)))
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (match_dup 6)))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 6) (const_int 4))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 6) (const_int 8))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 6) (const_int 12))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 6) (const_int 16))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 7"
-   "ldwm\\t{%1, %2, %3, %4, %5}, (%6)++, writeback, ret"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm5_inc_wb"
-  [(match_parallel 0 "ldwm_operation"
-    [(set (match_operand:SI 6 "register_operand" "+&r")
-          (plus:SI (match_dup 6) (const_int 20)))
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (match_dup 6)))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 6) (const_int 4))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 6) (const_int 8))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 6) (const_int 12))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 6) (const_int 16))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 6"
-   "ldwm\\t{%1, %2, %3, %4, %5}, (%6)++, writeback"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm5_inc_ret"
-  [(match_parallel 0 "ldwm_operation"
-    [(return)
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (match_operand:SI 6 "register_operand" "r")))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 6) (const_int 4))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 6) (const_int 8))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 6) (const_int 12))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 6) (const_int 16))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 6"
-   "ldwm\\t{%1, %2, %3, %4, %5}, (%6)++, ret"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm5_inc"
-  [(match_parallel 0 "ldwm_operation"
-    [(set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (match_operand:SI 6 "register_operand" "r")))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 6) (const_int 4))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 6) (const_int 8))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 6) (const_int 12))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 6) (const_int 16))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 5"
-   "ldwm\\t{%1, %2, %3, %4, %5}, (%6)++"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm5_dec_wb_ret"
-  [(match_parallel 0 "ldwm_operation"
-    [(return)
-     (set (match_operand:SI 6 "register_operand" "+&r")
-          (plus:SI (match_dup 6) (const_int -20)))
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 6) (const_int -4))))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 6) (const_int -8))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 6) (const_int -12))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 6) (const_int -16))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 6) (const_int -20))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 7"
-   "ldwm\\t{%1, %2, %3, %4, %5}, --(%6), writeback, ret"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm5_dec_wb"
-  [(match_parallel 0 "ldwm_operation"
-    [(set (match_operand:SI 6 "register_operand" "+&r")
-          (plus:SI (match_dup 6) (const_int -20)))
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 6) (const_int -4))))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 6) (const_int -8))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 6) (const_int -12))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 6) (const_int -16))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 6) (const_int -20))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 6"
-   "ldwm\\t{%1, %2, %3, %4, %5}, --(%6), writeback"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm5_dec_ret"
-  [(match_parallel 0 "ldwm_operation"
-    [(return)
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_operand:SI 6 "register_operand" "r") (const_int -4))))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 6) (const_int -8))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 6) (const_int -12))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 6) (const_int -16))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 6) (const_int -20))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 6"
-   "ldwm\\t{%1, %2, %3, %4, %5}, --(%6), ret"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm5_dec"
-  [(match_parallel 0 "ldwm_operation"
-    [(set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_operand:SI 6 "register_operand" "r") (const_int -4))))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 6) (const_int -8))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 6) (const_int -12))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 6) (const_int -16))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 6) (const_int -20))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 5"
-   "ldwm\\t{%1, %2, %3, %4, %5}, --(%6)"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm6_inc_wb_ret"
-  [(match_parallel 0 "ldwm_operation"
-    [(return)
-     (set (match_operand:SI 7 "register_operand" "+&r")
-          (plus:SI (match_dup 7) (const_int 24)))
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (match_dup 7)))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 7) (const_int 4))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 7) (const_int 8))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 7) (const_int 12))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 7) (const_int 16))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 7) (const_int 20))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 8"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6}, (%7)++, writeback, ret"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm6_inc_wb"
-  [(match_parallel 0 "ldwm_operation"
-    [(set (match_operand:SI 7 "register_operand" "+&r")
-          (plus:SI (match_dup 7) (const_int 24)))
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (match_dup 7)))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 7) (const_int 4))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 7) (const_int 8))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 7) (const_int 12))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 7) (const_int 16))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 7) (const_int 20))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 7"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6}, (%7)++, writeback"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm6_inc_ret"
-  [(match_parallel 0 "ldwm_operation"
-    [(return)
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (match_operand:SI 7 "register_operand" "r")))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 7) (const_int 4))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 7) (const_int 8))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 7) (const_int 12))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 7) (const_int 16))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 7) (const_int 20))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 7"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6}, (%7)++, ret"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm6_inc"
-  [(match_parallel 0 "ldwm_operation"
-    [(set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (match_operand:SI 7 "register_operand" "r")))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 7) (const_int 4))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 7) (const_int 8))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 7) (const_int 12))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 7) (const_int 16))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 7) (const_int 20))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 6"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6}, (%7)++"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm6_dec_wb_ret"
-  [(match_parallel 0 "ldwm_operation"
-    [(return)
-     (set (match_operand:SI 7 "register_operand" "+&r")
-          (plus:SI (match_dup 7) (const_int -24)))
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 7) (const_int -4))))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 7) (const_int -8))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 7) (const_int -12))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 7) (const_int -16))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 7) (const_int -20))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 7) (const_int -24))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 8"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6}, --(%7), writeback, ret"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm6_dec_wb"
-  [(match_parallel 0 "ldwm_operation"
-    [(set (match_operand:SI 7 "register_operand" "+&r")
-          (plus:SI (match_dup 7) (const_int -24)))
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 7) (const_int -4))))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 7) (const_int -8))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 7) (const_int -12))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 7) (const_int -16))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 7) (const_int -20))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 7) (const_int -24))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 7"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6}, --(%7), writeback"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm6_dec_ret"
-  [(match_parallel 0 "ldwm_operation"
-    [(return)
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_operand:SI 7 "register_operand" "r") (const_int -4))))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 7) (const_int -8))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 7) (const_int -12))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 7) (const_int -16))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 7) (const_int -20))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 7) (const_int -24))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 7"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6}, --(%7), ret"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm6_dec"
-  [(match_parallel 0 "ldwm_operation"
-    [(set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_operand:SI 7 "register_operand" "r") (const_int -4))))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 7) (const_int -8))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 7) (const_int -12))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 7) (const_int -16))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 7) (const_int -20))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 7) (const_int -24))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 6"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6}, --(%7)"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm7_inc_wb_ret"
-  [(match_parallel 0 "ldwm_operation"
-    [(return)
-     (set (match_operand:SI 8 "register_operand" "+&r")
-          (plus:SI (match_dup 8) (const_int 28)))
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (match_dup 8)))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 8) (const_int 4))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 8) (const_int 8))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 8) (const_int 12))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 8) (const_int 16))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 8) (const_int 20))))
-     (set (match_operand:SI 7 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 8) (const_int 24))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 9"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6, %7}, (%8)++, writeback, ret"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm7_inc_wb"
-  [(match_parallel 0 "ldwm_operation"
-    [(set (match_operand:SI 8 "register_operand" "+&r")
-          (plus:SI (match_dup 8) (const_int 28)))
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (match_dup 8)))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 8) (const_int 4))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 8) (const_int 8))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 8) (const_int 12))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 8) (const_int 16))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 8) (const_int 20))))
-     (set (match_operand:SI 7 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 8) (const_int 24))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 8"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6, %7}, (%8)++, writeback"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm7_inc_ret"
-  [(match_parallel 0 "ldwm_operation"
-    [(return)
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (match_operand:SI 8 "register_operand" "r")))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 8) (const_int 4))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 8) (const_int 8))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 8) (const_int 12))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 8) (const_int 16))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 8) (const_int 20))))
-     (set (match_operand:SI 7 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 8) (const_int 24))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 8"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6, %7}, (%8)++, ret"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm7_inc"
-  [(match_parallel 0 "ldwm_operation"
-    [(set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (match_operand:SI 8 "register_operand" "r")))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 8) (const_int 4))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 8) (const_int 8))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 8) (const_int 12))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 8) (const_int 16))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 8) (const_int 20))))
-     (set (match_operand:SI 7 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 8) (const_int 24))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 7"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6, %7}, (%8)++"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm7_dec_wb_ret"
-  [(match_parallel 0 "ldwm_operation"
-    [(return)
-     (set (match_operand:SI 8 "register_operand" "+&r")
-          (plus:SI (match_dup 8) (const_int -28)))
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 8) (const_int -4))))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 8) (const_int -8))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 8) (const_int -12))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 8) (const_int -16))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 8) (const_int -20))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 8) (const_int -24))))
-     (set (match_operand:SI 7 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 8) (const_int -28))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 9"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6, %7}, --(%8), writeback, ret"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm7_dec_wb"
-  [(match_parallel 0 "ldwm_operation"
-    [(set (match_operand:SI 8 "register_operand" "+&r")
-          (plus:SI (match_dup 8) (const_int -28)))
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 8) (const_int -4))))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 8) (const_int -8))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 8) (const_int -12))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 8) (const_int -16))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 8) (const_int -20))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 8) (const_int -24))))
-     (set (match_operand:SI 7 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 8) (const_int -28))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 8"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6, %7}, --(%8), writeback"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm7_dec_ret"
-  [(match_parallel 0 "ldwm_operation"
-    [(return)
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_operand:SI 8 "register_operand" "r") (const_int -4))))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 8) (const_int -8))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 8) (const_int -12))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 8) (const_int -16))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 8) (const_int -20))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 8) (const_int -24))))
-     (set (match_operand:SI 7 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 8) (const_int -28))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 8"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6, %7}, --(%8), ret"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm7_dec"
-  [(match_parallel 0 "ldwm_operation"
-    [(set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_operand:SI 8 "register_operand" "r") (const_int -4))))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 8) (const_int -8))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 8) (const_int -12))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 8) (const_int -16))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 8) (const_int -20))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 8) (const_int -24))))
-     (set (match_operand:SI 7 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 8) (const_int -28))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 7"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6, %7}, --(%8)"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm8_inc_wb_ret"
-  [(match_parallel 0 "ldwm_operation"
-    [(return)
-     (set (match_operand:SI 9 "register_operand" "+&r")
-          (plus:SI (match_dup 9) (const_int 32)))
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (match_dup 9)))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int 4))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int 8))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int 12))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int 16))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int 20))))
-     (set (match_operand:SI 7 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int 24))))
-     (set (match_operand:SI 8 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int 28))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 10"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6, %7, %8}, (%9)++, writeback, ret"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm8_inc_wb"
-  [(match_parallel 0 "ldwm_operation"
-    [(set (match_operand:SI 9 "register_operand" "+&r")
-          (plus:SI (match_dup 9) (const_int 32)))
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (match_dup 9)))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int 4))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int 8))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int 12))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int 16))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int 20))))
-     (set (match_operand:SI 7 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int 24))))
-     (set (match_operand:SI 8 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int 28))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 9"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6, %7, %8}, (%9)++, writeback"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm8_inc_ret"
-  [(match_parallel 0 "ldwm_operation"
-    [(return)
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (match_operand:SI 9 "register_operand" "r")))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int 4))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int 8))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int 12))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int 16))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int 20))))
-     (set (match_operand:SI 7 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int 24))))
-     (set (match_operand:SI 8 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int 28))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 9"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6, %7, %8}, (%9)++, ret"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm8_inc"
-  [(match_parallel 0 "ldwm_operation"
-    [(set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (match_operand:SI 9 "register_operand" "r")))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int 4))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int 8))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int 12))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int 16))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int 20))))
-     (set (match_operand:SI 7 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int 24))))
-     (set (match_operand:SI 8 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int 28))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 8"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6, %7, %8}, (%9)++"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm8_dec_wb_ret"
-  [(match_parallel 0 "ldwm_operation"
-    [(return)
-     (set (match_operand:SI 9 "register_operand" "+&r")
-          (plus:SI (match_dup 9) (const_int -32)))
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int -4))))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int -8))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int -12))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int -16))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int -20))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int -24))))
-     (set (match_operand:SI 7 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int -28))))
-     (set (match_operand:SI 8 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int -32))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 10"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6, %7, %8}, --(%9), writeback, ret"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm8_dec_wb"
-  [(match_parallel 0 "ldwm_operation"
-    [(set (match_operand:SI 9 "register_operand" "+&r")
-          (plus:SI (match_dup 9) (const_int -32)))
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int -4))))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int -8))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int -12))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int -16))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int -20))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int -24))))
-     (set (match_operand:SI 7 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int -28))))
-     (set (match_operand:SI 8 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int -32))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 9"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6, %7, %8}, --(%9), writeback"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm8_dec_ret"
-  [(match_parallel 0 "ldwm_operation"
-    [(return)
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_operand:SI 9 "register_operand" "r") (const_int -4))))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int -8))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int -12))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int -16))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int -20))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int -24))))
-     (set (match_operand:SI 7 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int -28))))
-     (set (match_operand:SI 8 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int -32))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 9"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6, %7, %8}, --(%9), ret"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm8_dec"
-  [(match_parallel 0 "ldwm_operation"
-    [(set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_operand:SI 9 "register_operand" "r") (const_int -4))))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int -8))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int -12))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int -16))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int -20))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int -24))))
-     (set (match_operand:SI 7 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int -28))))
-     (set (match_operand:SI 8 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 9) (const_int -32))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 8"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6, %7, %8}, --(%9)"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm9_inc_wb_ret"
-  [(match_parallel 0 "ldwm_operation"
-    [(return)
-     (set (match_operand:SI 10 "register_operand" "+&r")
-          (plus:SI (match_dup 10) (const_int 36)))
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (match_dup 10)))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int 4))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int 8))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int 12))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int 16))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int 20))))
-     (set (match_operand:SI 7 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int 24))))
-     (set (match_operand:SI 8 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int 28))))
-     (set (match_operand:SI 9 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int 32))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 11"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6, %7, %8, %9}, (%10)++, writeback, ret"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm9_inc_wb"
-  [(match_parallel 0 "ldwm_operation"
-    [(set (match_operand:SI 10 "register_operand" "+&r")
-          (plus:SI (match_dup 10) (const_int 36)))
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (match_dup 10)))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int 4))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int 8))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int 12))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int 16))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int 20))))
-     (set (match_operand:SI 7 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int 24))))
-     (set (match_operand:SI 8 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int 28))))
-     (set (match_operand:SI 9 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int 32))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 10"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6, %7, %8, %9}, (%10)++, writeback"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm9_inc_ret"
-  [(match_parallel 0 "ldwm_operation"
-    [(return)
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (match_operand:SI 10 "register_operand" "r")))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int 4))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int 8))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int 12))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int 16))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int 20))))
-     (set (match_operand:SI 7 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int 24))))
-     (set (match_operand:SI 8 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int 28))))
-     (set (match_operand:SI 9 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int 32))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 10"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6, %7, %8, %9}, (%10)++, ret"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm9_inc"
-  [(match_parallel 0 "ldwm_operation"
-    [(set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (match_operand:SI 10 "register_operand" "r")))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int 4))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int 8))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int 12))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int 16))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int 20))))
-     (set (match_operand:SI 7 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int 24))))
-     (set (match_operand:SI 8 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int 28))))
-     (set (match_operand:SI 9 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int 32))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 9"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6, %7, %8, %9}, (%10)++"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm9_dec_wb_ret"
-  [(match_parallel 0 "ldwm_operation"
-    [(return)
-     (set (match_operand:SI 10 "register_operand" "+&r")
-          (plus:SI (match_dup 10) (const_int -36)))
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int -4))))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int -8))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int -12))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int -16))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int -20))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int -24))))
-     (set (match_operand:SI 7 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int -28))))
-     (set (match_operand:SI 8 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int -32))))
-     (set (match_operand:SI 9 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int -36))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 11"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6, %7, %8, %9}, --(%10), writeback, ret"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm9_dec_wb"
-  [(match_parallel 0 "ldwm_operation"
-    [(set (match_operand:SI 10 "register_operand" "+&r")
-          (plus:SI (match_dup 10) (const_int -36)))
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int -4))))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int -8))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int -12))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int -16))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int -20))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int -24))))
-     (set (match_operand:SI 7 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int -28))))
-     (set (match_operand:SI 8 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int -32))))
-     (set (match_operand:SI 9 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int -36))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 10"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6, %7, %8, %9}, --(%10), writeback"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm9_dec_ret"
-  [(match_parallel 0 "ldwm_operation"
-    [(return)
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_operand:SI 10 "register_operand" "r") (const_int -4))))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int -8))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int -12))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int -16))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int -20))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int -24))))
-     (set (match_operand:SI 7 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int -28))))
-     (set (match_operand:SI 8 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int -32))))
-     (set (match_operand:SI 9 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int -36))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 10"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6, %7, %8, %9}, --(%10), ret"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm9_dec"
-  [(match_parallel 0 "ldwm_operation"
-    [(set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_operand:SI 10 "register_operand" "r") (const_int -4))))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int -8))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int -12))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int -16))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int -20))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int -24))))
-     (set (match_operand:SI 7 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int -28))))
-     (set (match_operand:SI 8 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int -32))))
-     (set (match_operand:SI 9 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 10) (const_int -36))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 9"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6, %7, %8, %9}, --(%10)"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm10_inc_wb_ret"
-  [(match_parallel 0 "ldwm_operation"
-    [(return)
-     (set (match_operand:SI 11 "register_operand" "+&r")
-          (plus:SI (match_dup 11) (const_int 40)))
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (match_dup 11)))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int 4))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int 8))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int 12))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int 16))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int 20))))
-     (set (match_operand:SI 7 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int 24))))
-     (set (match_operand:SI 8 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int 28))))
-     (set (match_operand:SI 9 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int 32))))
-     (set (match_operand:SI 10 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int 36))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 12"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6, %7, %8, %9, %10}, (%11)++, writeback, ret"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm10_inc_wb"
-  [(match_parallel 0 "ldwm_operation"
-    [(set (match_operand:SI 11 "register_operand" "+&r")
-          (plus:SI (match_dup 11) (const_int 40)))
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (match_dup 11)))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int 4))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int 8))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int 12))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int 16))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int 20))))
-     (set (match_operand:SI 7 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int 24))))
-     (set (match_operand:SI 8 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int 28))))
-     (set (match_operand:SI 9 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int 32))))
-     (set (match_operand:SI 10 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int 36))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 11"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6, %7, %8, %9, %10}, (%11)++, writeback"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm10_inc_ret"
-  [(match_parallel 0 "ldwm_operation"
-    [(return)
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (match_operand:SI 11 "register_operand" "r")))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int 4))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int 8))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int 12))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int 16))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int 20))))
-     (set (match_operand:SI 7 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int 24))))
-     (set (match_operand:SI 8 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int 28))))
-     (set (match_operand:SI 9 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int 32))))
-     (set (match_operand:SI 10 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int 36))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 11"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6, %7, %8, %9, %10}, (%11)++, ret"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm10_inc"
-  [(match_parallel 0 "ldwm_operation"
-    [(set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (match_operand:SI 11 "register_operand" "r")))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int 4))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int 8))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int 12))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int 16))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int 20))))
-     (set (match_operand:SI 7 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int 24))))
-     (set (match_operand:SI 8 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int 28))))
-     (set (match_operand:SI 9 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int 32))))
-     (set (match_operand:SI 10 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int 36))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 10"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6, %7, %8, %9, %10}, (%11)++"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm10_dec_wb_ret"
-  [(match_parallel 0 "ldwm_operation"
-    [(return)
-     (set (match_operand:SI 11 "register_operand" "+&r")
-          (plus:SI (match_dup 11) (const_int -40)))
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int -4))))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int -8))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int -12))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int -16))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int -20))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int -24))))
-     (set (match_operand:SI 7 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int -28))))
-     (set (match_operand:SI 8 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int -32))))
-     (set (match_operand:SI 9 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int -36))))
-     (set (match_operand:SI 10 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int -40))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 12"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6, %7, %8, %9, %10}, --(%11), writeback, ret"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm10_dec_wb"
-  [(match_parallel 0 "ldwm_operation"
-    [(set (match_operand:SI 11 "register_operand" "+&r")
-          (plus:SI (match_dup 11) (const_int -40)))
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int -4))))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int -8))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int -12))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int -16))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int -20))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int -24))))
-     (set (match_operand:SI 7 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int -28))))
-     (set (match_operand:SI 8 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int -32))))
-     (set (match_operand:SI 9 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int -36))))
-     (set (match_operand:SI 10 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int -40))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 11"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6, %7, %8, %9, %10}, --(%11), writeback"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm10_dec_ret"
-  [(match_parallel 0 "ldwm_operation"
-    [(return)
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_operand:SI 11 "register_operand" "r") (const_int -4))))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int -8))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int -12))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int -16))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int -20))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int -24))))
-     (set (match_operand:SI 7 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int -28))))
-     (set (match_operand:SI 8 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int -32))))
-     (set (match_operand:SI 9 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int -36))))
-     (set (match_operand:SI 10 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int -40))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 11"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6, %7, %8, %9, %10}, --(%11), ret"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm10_dec"
-  [(match_parallel 0 "ldwm_operation"
-    [(set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_operand:SI 11 "register_operand" "r") (const_int -4))))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int -8))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int -12))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int -16))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int -20))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int -24))))
-     (set (match_operand:SI 7 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int -28))))
-     (set (match_operand:SI 8 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int -32))))
-     (set (match_operand:SI 9 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int -36))))
-     (set (match_operand:SI 10 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 11) (const_int -40))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 10"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6, %7, %8, %9, %10}, --(%11)"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm11_inc_wb_ret"
-  [(match_parallel 0 "ldwm_operation"
-    [(return)
-     (set (match_operand:SI 12 "register_operand" "+&r")
-          (plus:SI (match_dup 12) (const_int 44)))
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (match_dup 12)))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int 4))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int 8))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int 12))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int 16))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int 20))))
-     (set (match_operand:SI 7 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int 24))))
-     (set (match_operand:SI 8 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int 28))))
-     (set (match_operand:SI 9 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int 32))))
-     (set (match_operand:SI 10 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int 36))))
-     (set (match_operand:SI 11 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int 40))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 13"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11}, (%12)++, writeback, ret"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm11_inc_wb"
-  [(match_parallel 0 "ldwm_operation"
-    [(set (match_operand:SI 12 "register_operand" "+&r")
-          (plus:SI (match_dup 12) (const_int 44)))
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (match_dup 12)))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int 4))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int 8))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int 12))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int 16))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int 20))))
-     (set (match_operand:SI 7 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int 24))))
-     (set (match_operand:SI 8 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int 28))))
-     (set (match_operand:SI 9 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int 32))))
-     (set (match_operand:SI 10 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int 36))))
-     (set (match_operand:SI 11 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int 40))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 12"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11}, (%12)++, writeback"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm11_inc_ret"
-  [(match_parallel 0 "ldwm_operation"
-    [(return)
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (match_operand:SI 12 "register_operand" "r")))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int 4))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int 8))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int 12))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int 16))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int 20))))
-     (set (match_operand:SI 7 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int 24))))
-     (set (match_operand:SI 8 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int 28))))
-     (set (match_operand:SI 9 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int 32))))
-     (set (match_operand:SI 10 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int 36))))
-     (set (match_operand:SI 11 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int 40))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 12"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11}, (%12)++, ret"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm11_inc"
-  [(match_parallel 0 "ldwm_operation"
-    [(set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (match_operand:SI 12 "register_operand" "r")))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int 4))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int 8))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int 12))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int 16))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int 20))))
-     (set (match_operand:SI 7 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int 24))))
-     (set (match_operand:SI 8 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int 28))))
-     (set (match_operand:SI 9 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int 32))))
-     (set (match_operand:SI 10 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int 36))))
-     (set (match_operand:SI 11 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int 40))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 11"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11}, (%12)++"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm11_dec_wb_ret"
-  [(match_parallel 0 "ldwm_operation"
-    [(return)
-     (set (match_operand:SI 12 "register_operand" "+&r")
-          (plus:SI (match_dup 12) (const_int -44)))
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int -4))))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int -8))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int -12))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int -16))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int -20))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int -24))))
-     (set (match_operand:SI 7 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int -28))))
-     (set (match_operand:SI 8 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int -32))))
-     (set (match_operand:SI 9 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int -36))))
-     (set (match_operand:SI 10 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int -40))))
-     (set (match_operand:SI 11 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int -44))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 13"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11}, --(%12), writeback, ret"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm11_dec_wb"
-  [(match_parallel 0 "ldwm_operation"
-    [(set (match_operand:SI 12 "register_operand" "+&r")
-          (plus:SI (match_dup 12) (const_int -44)))
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int -4))))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int -8))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int -12))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int -16))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int -20))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int -24))))
-     (set (match_operand:SI 7 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int -28))))
-     (set (match_operand:SI 8 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int -32))))
-     (set (match_operand:SI 9 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int -36))))
-     (set (match_operand:SI 10 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int -40))))
-     (set (match_operand:SI 11 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int -44))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 12"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11}, --(%12), writeback"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm11_dec_ret"
-  [(match_parallel 0 "ldwm_operation"
-    [(return)
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_operand:SI 12 "register_operand" "r") (const_int -4))))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int -8))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int -12))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int -16))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int -20))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int -24))))
-     (set (match_operand:SI 7 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int -28))))
-     (set (match_operand:SI 8 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int -32))))
-     (set (match_operand:SI 9 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int -36))))
-     (set (match_operand:SI 10 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int -40))))
-     (set (match_operand:SI 11 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int -44))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 12"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11}, --(%12), ret"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm11_dec"
-  [(match_parallel 0 "ldwm_operation"
-    [(set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_operand:SI 12 "register_operand" "r") (const_int -4))))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int -8))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int -12))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int -16))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int -20))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int -24))))
-     (set (match_operand:SI 7 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int -28))))
-     (set (match_operand:SI 8 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int -32))))
-     (set (match_operand:SI 9 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int -36))))
-     (set (match_operand:SI 10 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int -40))))
-     (set (match_operand:SI 11 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 12) (const_int -44))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 11"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11}, --(%12)"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm12_inc_wb_ret"
-  [(match_parallel 0 "ldwm_operation"
-    [(return)
-     (set (match_operand:SI 13 "register_operand" "+&r")
-          (plus:SI (match_dup 13) (const_int 48)))
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (match_dup 13)))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int 4))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int 8))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int 12))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int 16))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int 20))))
-     (set (match_operand:SI 7 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int 24))))
-     (set (match_operand:SI 8 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int 28))))
-     (set (match_operand:SI 9 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int 32))))
-     (set (match_operand:SI 10 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int 36))))
-     (set (match_operand:SI 11 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int 40))))
-     (set (match_operand:SI 12 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int 44))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 14"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11, %12}, (%13)++, writeback, ret"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm12_inc_wb"
-  [(match_parallel 0 "ldwm_operation"
-    [(set (match_operand:SI 13 "register_operand" "+&r")
-          (plus:SI (match_dup 13) (const_int 48)))
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (match_dup 13)))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int 4))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int 8))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int 12))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int 16))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int 20))))
-     (set (match_operand:SI 7 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int 24))))
-     (set (match_operand:SI 8 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int 28))))
-     (set (match_operand:SI 9 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int 32))))
-     (set (match_operand:SI 10 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int 36))))
-     (set (match_operand:SI 11 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int 40))))
-     (set (match_operand:SI 12 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int 44))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 13"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11, %12}, (%13)++, writeback"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm12_inc_ret"
-  [(match_parallel 0 "ldwm_operation"
-    [(return)
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (match_operand:SI 13 "register_operand" "r")))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int 4))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int 8))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int 12))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int 16))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int 20))))
-     (set (match_operand:SI 7 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int 24))))
-     (set (match_operand:SI 8 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int 28))))
-     (set (match_operand:SI 9 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int 32))))
-     (set (match_operand:SI 10 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int 36))))
-     (set (match_operand:SI 11 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int 40))))
-     (set (match_operand:SI 12 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int 44))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 13"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11, %12}, (%13)++, ret"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm12_inc"
-  [(match_parallel 0 "ldwm_operation"
-    [(set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (match_operand:SI 13 "register_operand" "r")))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int 4))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int 8))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int 12))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int 16))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int 20))))
-     (set (match_operand:SI 7 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int 24))))
-     (set (match_operand:SI 8 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int 28))))
-     (set (match_operand:SI 9 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int 32))))
-     (set (match_operand:SI 10 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int 36))))
-     (set (match_operand:SI 11 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int 40))))
-     (set (match_operand:SI 12 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int 44))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 12"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11, %12}, (%13)++"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm12_dec_wb_ret"
-  [(match_parallel 0 "ldwm_operation"
-    [(return)
-     (set (match_operand:SI 13 "register_operand" "+&r")
-          (plus:SI (match_dup 13) (const_int -48)))
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int -4))))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int -8))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int -12))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int -16))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int -20))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int -24))))
-     (set (match_operand:SI 7 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int -28))))
-     (set (match_operand:SI 8 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int -32))))
-     (set (match_operand:SI 9 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int -36))))
-     (set (match_operand:SI 10 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int -40))))
-     (set (match_operand:SI 11 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int -44))))
-     (set (match_operand:SI 12 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int -48))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 14"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11, %12}, --(%13), writeback, ret"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm12_dec_wb"
-  [(match_parallel 0 "ldwm_operation"
-    [(set (match_operand:SI 13 "register_operand" "+&r")
-          (plus:SI (match_dup 13) (const_int -48)))
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int -4))))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int -8))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int -12))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int -16))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int -20))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int -24))))
-     (set (match_operand:SI 7 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int -28))))
-     (set (match_operand:SI 8 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int -32))))
-     (set (match_operand:SI 9 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int -36))))
-     (set (match_operand:SI 10 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int -40))))
-     (set (match_operand:SI 11 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int -44))))
-     (set (match_operand:SI 12 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int -48))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 13"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11, %12}, --(%13), writeback"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm12_dec_ret"
-  [(match_parallel 0 "ldwm_operation"
-    [(return)
-     (set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_operand:SI 13 "register_operand" "r") (const_int -4))))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int -8))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int -12))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int -16))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int -20))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int -24))))
-     (set (match_operand:SI 7 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int -28))))
-     (set (match_operand:SI 8 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int -32))))
-     (set (match_operand:SI 9 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int -36))))
-     (set (match_operand:SI 10 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int -40))))
-     (set (match_operand:SI 11 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int -44))))
-     (set (match_operand:SI 12 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int -48))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 13"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11, %12}, --(%13), ret"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_ldwm12_dec"
-  [(match_parallel 0 "ldwm_operation"
-    [(set (match_operand:SI 1 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_operand:SI 13 "register_operand" "r") (const_int -4))))
-     (set (match_operand:SI 2 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int -8))))
-     (set (match_operand:SI 3 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int -12))))
-     (set (match_operand:SI 4 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int -16))))
-     (set (match_operand:SI 5 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int -20))))
-     (set (match_operand:SI 6 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int -24))))
-     (set (match_operand:SI 7 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int -28))))
-     (set (match_operand:SI 8 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int -32))))
-     (set (match_operand:SI 9 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int -36))))
-     (set (match_operand:SI 10 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int -40))))
-     (set (match_operand:SI 11 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int -44))))
-     (set (match_operand:SI 12 "nios2_hard_register_operand" "")
-          (mem:SI (plus:SI (match_dup 13) (const_int -48))))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 12"
-   "ldwm\\t{%1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11, %12}, --(%13)"
-  [(set_attr "type" "ldwm")])
-
-(define_insn "*cdx_stwm1_inc_wb"
-  [(match_parallel 0 "stwm_operation"
-    [(set (match_operand:SI 2 "register_operand" "+&r")
-          (plus:SI (match_dup 2) (const_int 4)))
-     (set (mem:SI (match_dup 2))
-          (match_operand:SI 1 "nios2_hard_register_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 2"
-   "stwm\\t{%1}, (%2)++, writeback"
-  [(set_attr "type" "stwm")])
-
-(define_insn "*cdx_stwm1_inc"
-  [(match_parallel 0 "stwm_operation"
-    [(set (mem:SI (match_operand:SI 2 "register_operand" "r"))
-          (match_operand:SI 1 "nios2_hard_register_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 1"
-   "stwm\\t{%1}, (%2)++"
-  [(set_attr "type" "stwm")])
-
-(define_insn "*cdx_stwm1_dec_wb"
-  [(match_parallel 0 "stwm_operation"
-    [(set (match_operand:SI 2 "register_operand" "+&r")
-          (plus:SI (match_dup 2) (const_int -4)))
-     (set (mem:SI (plus:SI (match_dup 2) (const_int -4)))
-          (match_operand:SI 1 "nios2_hard_register_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 2"
-   "stwm\\t{%1}, --(%2), writeback"
-  [(set_attr "type" "stwm")])
-
-(define_insn "*cdx_stwm1_dec"
-  [(match_parallel 0 "stwm_operation"
-    [(set (mem:SI (plus:SI (match_operand:SI 2 "register_operand" "r") (const_int -4)))
-          (match_operand:SI 1 "nios2_hard_register_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 1"
-   "stwm\\t{%1}, --(%2)"
-  [(set_attr "type" "stwm")])
-
-(define_insn "*cdx_stwm2_inc_wb"
-  [(match_parallel 0 "stwm_operation"
-    [(set (match_operand:SI 3 "register_operand" "+&r")
-          (plus:SI (match_dup 3) (const_int 8)))
-     (set (mem:SI (match_dup 3))
-          (match_operand:SI 1 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 3) (const_int 4)))
-          (match_operand:SI 2 "nios2_hard_register_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 3"
-   "stwm\\t{%1, %2}, (%3)++, writeback"
-  [(set_attr "type" "stwm")])
-
-(define_insn "*cdx_stwm2_inc"
-  [(match_parallel 0 "stwm_operation"
-    [(set (mem:SI (match_operand:SI 3 "register_operand" "r"))
-          (match_operand:SI 1 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 3) (const_int 4)))
-          (match_operand:SI 2 "nios2_hard_register_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 2"
-   "stwm\\t{%1, %2}, (%3)++"
-  [(set_attr "type" "stwm")])
-
-(define_insn "*cdx_stwm2_dec_wb"
-  [(match_parallel 0 "stwm_operation"
-    [(set (match_operand:SI 3 "register_operand" "+&r")
-          (plus:SI (match_dup 3) (const_int -8)))
-     (set (mem:SI (plus:SI (match_dup 3) (const_int -4)))
-          (match_operand:SI 1 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 3) (const_int -8)))
-          (match_operand:SI 2 "nios2_hard_register_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 3"
-   "stwm\\t{%1, %2}, --(%3), writeback"
-  [(set_attr "type" "stwm")])
-
-(define_insn "*cdx_stwm2_dec"
-  [(match_parallel 0 "stwm_operation"
-    [(set (mem:SI (plus:SI (match_operand:SI 3 "register_operand" "r") (const_int -4)))
-          (match_operand:SI 1 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 3) (const_int -8)))
-          (match_operand:SI 2 "nios2_hard_register_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 2"
-   "stwm\\t{%1, %2}, --(%3)"
-  [(set_attr "type" "stwm")])
-
-(define_insn "*cdx_stwm3_inc_wb"
-  [(match_parallel 0 "stwm_operation"
-    [(set (match_operand:SI 4 "register_operand" "+&r")
-          (plus:SI (match_dup 4) (const_int 12)))
-     (set (mem:SI (match_dup 4))
-          (match_operand:SI 1 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 4) (const_int 4)))
-          (match_operand:SI 2 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 4) (const_int 8)))
-          (match_operand:SI 3 "nios2_hard_register_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 4"
-   "stwm\\t{%1, %2, %3}, (%4)++, writeback"
-  [(set_attr "type" "stwm")])
-
-(define_insn "*cdx_stwm3_inc"
-  [(match_parallel 0 "stwm_operation"
-    [(set (mem:SI (match_operand:SI 4 "register_operand" "r"))
-          (match_operand:SI 1 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 4) (const_int 4)))
-          (match_operand:SI 2 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 4) (const_int 8)))
-          (match_operand:SI 3 "nios2_hard_register_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 3"
-   "stwm\\t{%1, %2, %3}, (%4)++"
-  [(set_attr "type" "stwm")])
-
-(define_insn "*cdx_stwm3_dec_wb"
-  [(match_parallel 0 "stwm_operation"
-    [(set (match_operand:SI 4 "register_operand" "+&r")
-          (plus:SI (match_dup 4) (const_int -12)))
-     (set (mem:SI (plus:SI (match_dup 4) (const_int -4)))
-          (match_operand:SI 1 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 4) (const_int -8)))
-          (match_operand:SI 2 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 4) (const_int -12)))
-          (match_operand:SI 3 "nios2_hard_register_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 4"
-   "stwm\\t{%1, %2, %3}, --(%4), writeback"
-  [(set_attr "type" "stwm")])
-
-(define_insn "*cdx_stwm3_dec"
-  [(match_parallel 0 "stwm_operation"
-    [(set (mem:SI (plus:SI (match_operand:SI 4 "register_operand" "r") (const_int -4)))
-          (match_operand:SI 1 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 4) (const_int -8)))
-          (match_operand:SI 2 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 4) (const_int -12)))
-          (match_operand:SI 3 "nios2_hard_register_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 3"
-   "stwm\\t{%1, %2, %3}, --(%4)"
-  [(set_attr "type" "stwm")])
-
-(define_insn "*cdx_stwm4_inc_wb"
-  [(match_parallel 0 "stwm_operation"
-    [(set (match_operand:SI 5 "register_operand" "+&r")
-          (plus:SI (match_dup 5) (const_int 16)))
-     (set (mem:SI (match_dup 5))
-          (match_operand:SI 1 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 5) (const_int 4)))
-          (match_operand:SI 2 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 5) (const_int 8)))
-          (match_operand:SI 3 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 5) (const_int 12)))
-          (match_operand:SI 4 "nios2_hard_register_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 5"
-   "stwm\\t{%1, %2, %3, %4}, (%5)++, writeback"
-  [(set_attr "type" "stwm")])
-
-(define_insn "*cdx_stwm4_inc"
-  [(match_parallel 0 "stwm_operation"
-    [(set (mem:SI (match_operand:SI 5 "register_operand" "r"))
-          (match_operand:SI 1 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 5) (const_int 4)))
-          (match_operand:SI 2 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 5) (const_int 8)))
-          (match_operand:SI 3 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 5) (const_int 12)))
-          (match_operand:SI 4 "nios2_hard_register_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 4"
-   "stwm\\t{%1, %2, %3, %4}, (%5)++"
-  [(set_attr "type" "stwm")])
-
-(define_insn "*cdx_stwm4_dec_wb"
-  [(match_parallel 0 "stwm_operation"
-    [(set (match_operand:SI 5 "register_operand" "+&r")
-          (plus:SI (match_dup 5) (const_int -16)))
-     (set (mem:SI (plus:SI (match_dup 5) (const_int -4)))
-          (match_operand:SI 1 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 5) (const_int -8)))
-          (match_operand:SI 2 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 5) (const_int -12)))
-          (match_operand:SI 3 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 5) (const_int -16)))
-          (match_operand:SI 4 "nios2_hard_register_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 5"
-   "stwm\\t{%1, %2, %3, %4}, --(%5), writeback"
-  [(set_attr "type" "stwm")])
-
-(define_insn "*cdx_stwm4_dec"
-  [(match_parallel 0 "stwm_operation"
-    [(set (mem:SI (plus:SI (match_operand:SI 5 "register_operand" "r") (const_int -4)))
-          (match_operand:SI 1 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 5) (const_int -8)))
-          (match_operand:SI 2 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 5) (const_int -12)))
-          (match_operand:SI 3 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 5) (const_int -16)))
-          (match_operand:SI 4 "nios2_hard_register_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 4"
-   "stwm\\t{%1, %2, %3, %4}, --(%5)"
-  [(set_attr "type" "stwm")])
-
-(define_insn "*cdx_stwm5_inc_wb"
-  [(match_parallel 0 "stwm_operation"
-    [(set (match_operand:SI 6 "register_operand" "+&r")
-          (plus:SI (match_dup 6) (const_int 20)))
-     (set (mem:SI (match_dup 6))
-          (match_operand:SI 1 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 6) (const_int 4)))
-          (match_operand:SI 2 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 6) (const_int 8)))
-          (match_operand:SI 3 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 6) (const_int 12)))
-          (match_operand:SI 4 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 6) (const_int 16)))
-          (match_operand:SI 5 "nios2_hard_register_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 6"
-   "stwm\\t{%1, %2, %3, %4, %5}, (%6)++, writeback"
-  [(set_attr "type" "stwm")])
-
-(define_insn "*cdx_stwm5_inc"
-  [(match_parallel 0 "stwm_operation"
-    [(set (mem:SI (match_operand:SI 6 "register_operand" "r"))
-          (match_operand:SI 1 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 6) (const_int 4)))
-          (match_operand:SI 2 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 6) (const_int 8)))
-          (match_operand:SI 3 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 6) (const_int 12)))
-          (match_operand:SI 4 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 6) (const_int 16)))
-          (match_operand:SI 5 "nios2_hard_register_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 5"
-   "stwm\\t{%1, %2, %3, %4, %5}, (%6)++"
-  [(set_attr "type" "stwm")])
-
-(define_insn "*cdx_stwm5_dec_wb"
-  [(match_parallel 0 "stwm_operation"
-    [(set (match_operand:SI 6 "register_operand" "+&r")
-          (plus:SI (match_dup 6) (const_int -20)))
-     (set (mem:SI (plus:SI (match_dup 6) (const_int -4)))
-          (match_operand:SI 1 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 6) (const_int -8)))
-          (match_operand:SI 2 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 6) (const_int -12)))
-          (match_operand:SI 3 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 6) (const_int -16)))
-          (match_operand:SI 4 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 6) (const_int -20)))
-          (match_operand:SI 5 "nios2_hard_register_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 6"
-   "stwm\\t{%1, %2, %3, %4, %5}, --(%6), writeback"
-  [(set_attr "type" "stwm")])
-
-(define_insn "*cdx_stwm5_dec"
-  [(match_parallel 0 "stwm_operation"
-    [(set (mem:SI (plus:SI (match_operand:SI 6 "register_operand" "r") (const_int -4)))
-          (match_operand:SI 1 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 6) (const_int -8)))
-          (match_operand:SI 2 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 6) (const_int -12)))
-          (match_operand:SI 3 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 6) (const_int -16)))
-          (match_operand:SI 4 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 6) (const_int -20)))
-          (match_operand:SI 5 "nios2_hard_register_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 5"
-   "stwm\\t{%1, %2, %3, %4, %5}, --(%6)"
-  [(set_attr "type" "stwm")])
-
-(define_insn "*cdx_stwm6_inc_wb"
-  [(match_parallel 0 "stwm_operation"
-    [(set (match_operand:SI 7 "register_operand" "+&r")
-          (plus:SI (match_dup 7) (const_int 24)))
-     (set (mem:SI (match_dup 7))
-          (match_operand:SI 1 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 7) (const_int 4)))
-          (match_operand:SI 2 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 7) (const_int 8)))
-          (match_operand:SI 3 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 7) (const_int 12)))
-          (match_operand:SI 4 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 7) (const_int 16)))
-          (match_operand:SI 5 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 7) (const_int 20)))
-          (match_operand:SI 6 "nios2_hard_register_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 7"
-   "stwm\\t{%1, %2, %3, %4, %5, %6}, (%7)++, writeback"
-  [(set_attr "type" "stwm")])
-
-(define_insn "*cdx_stwm6_inc"
-  [(match_parallel 0 "stwm_operation"
-    [(set (mem:SI (match_operand:SI 7 "register_operand" "r"))
-          (match_operand:SI 1 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 7) (const_int 4)))
-          (match_operand:SI 2 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 7) (const_int 8)))
-          (match_operand:SI 3 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 7) (const_int 12)))
-          (match_operand:SI 4 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 7) (const_int 16)))
-          (match_operand:SI 5 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 7) (const_int 20)))
-          (match_operand:SI 6 "nios2_hard_register_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 6"
-   "stwm\\t{%1, %2, %3, %4, %5, %6}, (%7)++"
-  [(set_attr "type" "stwm")])
-
-(define_insn "*cdx_stwm6_dec_wb"
-  [(match_parallel 0 "stwm_operation"
-    [(set (match_operand:SI 7 "register_operand" "+&r")
-          (plus:SI (match_dup 7) (const_int -24)))
-     (set (mem:SI (plus:SI (match_dup 7) (const_int -4)))
-          (match_operand:SI 1 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 7) (const_int -8)))
-          (match_operand:SI 2 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 7) (const_int -12)))
-          (match_operand:SI 3 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 7) (const_int -16)))
-          (match_operand:SI 4 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 7) (const_int -20)))
-          (match_operand:SI 5 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 7) (const_int -24)))
-          (match_operand:SI 6 "nios2_hard_register_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 7"
-   "stwm\\t{%1, %2, %3, %4, %5, %6}, --(%7), writeback"
-  [(set_attr "type" "stwm")])
-
-(define_insn "*cdx_stwm6_dec"
-  [(match_parallel 0 "stwm_operation"
-    [(set (mem:SI (plus:SI (match_operand:SI 7 "register_operand" "r") (const_int -4)))
-          (match_operand:SI 1 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 7) (const_int -8)))
-          (match_operand:SI 2 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 7) (const_int -12)))
-          (match_operand:SI 3 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 7) (const_int -16)))
-          (match_operand:SI 4 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 7) (const_int -20)))
-          (match_operand:SI 5 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 7) (const_int -24)))
-          (match_operand:SI 6 "nios2_hard_register_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 6"
-   "stwm\\t{%1, %2, %3, %4, %5, %6}, --(%7)"
-  [(set_attr "type" "stwm")])
-
-(define_insn "*cdx_stwm7_inc_wb"
-  [(match_parallel 0 "stwm_operation"
-    [(set (match_operand:SI 8 "register_operand" "+&r")
-          (plus:SI (match_dup 8) (const_int 28)))
-     (set (mem:SI (match_dup 8))
-          (match_operand:SI 1 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 8) (const_int 4)))
-          (match_operand:SI 2 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 8) (const_int 8)))
-          (match_operand:SI 3 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 8) (const_int 12)))
-          (match_operand:SI 4 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 8) (const_int 16)))
-          (match_operand:SI 5 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 8) (const_int 20)))
-          (match_operand:SI 6 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 8) (const_int 24)))
-          (match_operand:SI 7 "nios2_hard_register_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 8"
-   "stwm\\t{%1, %2, %3, %4, %5, %6, %7}, (%8)++, writeback"
-  [(set_attr "type" "stwm")])
-
-(define_insn "*cdx_stwm7_inc"
-  [(match_parallel 0 "stwm_operation"
-    [(set (mem:SI (match_operand:SI 8 "register_operand" "r"))
-          (match_operand:SI 1 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 8) (const_int 4)))
-          (match_operand:SI 2 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 8) (const_int 8)))
-          (match_operand:SI 3 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 8) (const_int 12)))
-          (match_operand:SI 4 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 8) (const_int 16)))
-          (match_operand:SI 5 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 8) (const_int 20)))
-          (match_operand:SI 6 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 8) (const_int 24)))
-          (match_operand:SI 7 "nios2_hard_register_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 7"
-   "stwm\\t{%1, %2, %3, %4, %5, %6, %7}, (%8)++"
-  [(set_attr "type" "stwm")])
-
-(define_insn "*cdx_stwm7_dec_wb"
-  [(match_parallel 0 "stwm_operation"
-    [(set (match_operand:SI 8 "register_operand" "+&r")
-          (plus:SI (match_dup 8) (const_int -28)))
-     (set (mem:SI (plus:SI (match_dup 8) (const_int -4)))
-          (match_operand:SI 1 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 8) (const_int -8)))
-          (match_operand:SI 2 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 8) (const_int -12)))
-          (match_operand:SI 3 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 8) (const_int -16)))
-          (match_operand:SI 4 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 8) (const_int -20)))
-          (match_operand:SI 5 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 8) (const_int -24)))
-          (match_operand:SI 6 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 8) (const_int -28)))
-          (match_operand:SI 7 "nios2_hard_register_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 8"
-   "stwm\\t{%1, %2, %3, %4, %5, %6, %7}, --(%8), writeback"
-  [(set_attr "type" "stwm")])
-
-(define_insn "*cdx_stwm7_dec"
-  [(match_parallel 0 "stwm_operation"
-    [(set (mem:SI (plus:SI (match_operand:SI 8 "register_operand" "r") (const_int -4)))
-          (match_operand:SI 1 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 8) (const_int -8)))
-          (match_operand:SI 2 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 8) (const_int -12)))
-          (match_operand:SI 3 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 8) (const_int -16)))
-          (match_operand:SI 4 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 8) (const_int -20)))
-          (match_operand:SI 5 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 8) (const_int -24)))
-          (match_operand:SI 6 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 8) (const_int -28)))
-          (match_operand:SI 7 "nios2_hard_register_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 7"
-   "stwm\\t{%1, %2, %3, %4, %5, %6, %7}, --(%8)"
-  [(set_attr "type" "stwm")])
-
-(define_insn "*cdx_stwm8_inc_wb"
-  [(match_parallel 0 "stwm_operation"
-    [(set (match_operand:SI 9 "register_operand" "+&r")
-          (plus:SI (match_dup 9) (const_int 32)))
-     (set (mem:SI (match_dup 9))
-          (match_operand:SI 1 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 9) (const_int 4)))
-          (match_operand:SI 2 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 9) (const_int 8)))
-          (match_operand:SI 3 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 9) (const_int 12)))
-          (match_operand:SI 4 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 9) (const_int 16)))
-          (match_operand:SI 5 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 9) (const_int 20)))
-          (match_operand:SI 6 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 9) (const_int 24)))
-          (match_operand:SI 7 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 9) (const_int 28)))
-          (match_operand:SI 8 "nios2_hard_register_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 9"
-   "stwm\\t{%1, %2, %3, %4, %5, %6, %7, %8}, (%9)++, writeback"
-  [(set_attr "type" "stwm")])
-
-(define_insn "*cdx_stwm8_inc"
-  [(match_parallel 0 "stwm_operation"
-    [(set (mem:SI (match_operand:SI 9 "register_operand" "r"))
-          (match_operand:SI 1 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 9) (const_int 4)))
-          (match_operand:SI 2 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 9) (const_int 8)))
-          (match_operand:SI 3 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 9) (const_int 12)))
-          (match_operand:SI 4 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 9) (const_int 16)))
-          (match_operand:SI 5 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 9) (const_int 20)))
-          (match_operand:SI 6 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 9) (const_int 24)))
-          (match_operand:SI 7 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 9) (const_int 28)))
-          (match_operand:SI 8 "nios2_hard_register_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 8"
-   "stwm\\t{%1, %2, %3, %4, %5, %6, %7, %8}, (%9)++"
-  [(set_attr "type" "stwm")])
-
-(define_insn "*cdx_stwm8_dec_wb"
-  [(match_parallel 0 "stwm_operation"
-    [(set (match_operand:SI 9 "register_operand" "+&r")
-          (plus:SI (match_dup 9) (const_int -32)))
-     (set (mem:SI (plus:SI (match_dup 9) (const_int -4)))
-          (match_operand:SI 1 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 9) (const_int -8)))
-          (match_operand:SI 2 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 9) (const_int -12)))
-          (match_operand:SI 3 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 9) (const_int -16)))
-          (match_operand:SI 4 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 9) (const_int -20)))
-          (match_operand:SI 5 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 9) (const_int -24)))
-          (match_operand:SI 6 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 9) (const_int -28)))
-          (match_operand:SI 7 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 9) (const_int -32)))
-          (match_operand:SI 8 "nios2_hard_register_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 9"
-   "stwm\\t{%1, %2, %3, %4, %5, %6, %7, %8}, --(%9), writeback"
-  [(set_attr "type" "stwm")])
-
-(define_insn "*cdx_stwm8_dec"
-  [(match_parallel 0 "stwm_operation"
-    [(set (mem:SI (plus:SI (match_operand:SI 9 "register_operand" "r") (const_int -4)))
-          (match_operand:SI 1 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 9) (const_int -8)))
-          (match_operand:SI 2 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 9) (const_int -12)))
-          (match_operand:SI 3 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 9) (const_int -16)))
-          (match_operand:SI 4 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 9) (const_int -20)))
-          (match_operand:SI 5 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 9) (const_int -24)))
-          (match_operand:SI 6 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 9) (const_int -28)))
-          (match_operand:SI 7 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 9) (const_int -32)))
-          (match_operand:SI 8 "nios2_hard_register_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 8"
-   "stwm\\t{%1, %2, %3, %4, %5, %6, %7, %8}, --(%9)"
-  [(set_attr "type" "stwm")])
-
-(define_insn "*cdx_stwm9_inc_wb"
-  [(match_parallel 0 "stwm_operation"
-    [(set (match_operand:SI 10 "register_operand" "+&r")
-          (plus:SI (match_dup 10) (const_int 36)))
-     (set (mem:SI (match_dup 10))
-          (match_operand:SI 1 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 10) (const_int 4)))
-          (match_operand:SI 2 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 10) (const_int 8)))
-          (match_operand:SI 3 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 10) (const_int 12)))
-          (match_operand:SI 4 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 10) (const_int 16)))
-          (match_operand:SI 5 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 10) (const_int 20)))
-          (match_operand:SI 6 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 10) (const_int 24)))
-          (match_operand:SI 7 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 10) (const_int 28)))
-          (match_operand:SI 8 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 10) (const_int 32)))
-          (match_operand:SI 9 "nios2_hard_register_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 10"
-   "stwm\\t{%1, %2, %3, %4, %5, %6, %7, %8, %9}, (%10)++, writeback"
-  [(set_attr "type" "stwm")])
-
-(define_insn "*cdx_stwm9_inc"
-  [(match_parallel 0 "stwm_operation"
-    [(set (mem:SI (match_operand:SI 10 "register_operand" "r"))
-          (match_operand:SI 1 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 10) (const_int 4)))
-          (match_operand:SI 2 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 10) (const_int 8)))
-          (match_operand:SI 3 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 10) (const_int 12)))
-          (match_operand:SI 4 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 10) (const_int 16)))
-          (match_operand:SI 5 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 10) (const_int 20)))
-          (match_operand:SI 6 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 10) (const_int 24)))
-          (match_operand:SI 7 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 10) (const_int 28)))
-          (match_operand:SI 8 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 10) (const_int 32)))
-          (match_operand:SI 9 "nios2_hard_register_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 9"
-   "stwm\\t{%1, %2, %3, %4, %5, %6, %7, %8, %9}, (%10)++"
-  [(set_attr "type" "stwm")])
-
-(define_insn "*cdx_stwm9_dec_wb"
-  [(match_parallel 0 "stwm_operation"
-    [(set (match_operand:SI 10 "register_operand" "+&r")
-          (plus:SI (match_dup 10) (const_int -36)))
-     (set (mem:SI (plus:SI (match_dup 10) (const_int -4)))
-          (match_operand:SI 1 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 10) (const_int -8)))
-          (match_operand:SI 2 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 10) (const_int -12)))
-          (match_operand:SI 3 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 10) (const_int -16)))
-          (match_operand:SI 4 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 10) (const_int -20)))
-          (match_operand:SI 5 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 10) (const_int -24)))
-          (match_operand:SI 6 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 10) (const_int -28)))
-          (match_operand:SI 7 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 10) (const_int -32)))
-          (match_operand:SI 8 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 10) (const_int -36)))
-          (match_operand:SI 9 "nios2_hard_register_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 10"
-   "stwm\\t{%1, %2, %3, %4, %5, %6, %7, %8, %9}, --(%10), writeback"
-  [(set_attr "type" "stwm")])
-
-(define_insn "*cdx_stwm9_dec"
-  [(match_parallel 0 "stwm_operation"
-    [(set (mem:SI (plus:SI (match_operand:SI 10 "register_operand" "r") (const_int -4)))
-          (match_operand:SI 1 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 10) (const_int -8)))
-          (match_operand:SI 2 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 10) (const_int -12)))
-          (match_operand:SI 3 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 10) (const_int -16)))
-          (match_operand:SI 4 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 10) (const_int -20)))
-          (match_operand:SI 5 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 10) (const_int -24)))
-          (match_operand:SI 6 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 10) (const_int -28)))
-          (match_operand:SI 7 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 10) (const_int -32)))
-          (match_operand:SI 8 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 10) (const_int -36)))
-          (match_operand:SI 9 "nios2_hard_register_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 9"
-   "stwm\\t{%1, %2, %3, %4, %5, %6, %7, %8, %9}, --(%10)"
-  [(set_attr "type" "stwm")])
-
-(define_insn "*cdx_stwm10_inc_wb"
-  [(match_parallel 0 "stwm_operation"
-    [(set (match_operand:SI 11 "register_operand" "+&r")
-          (plus:SI (match_dup 11) (const_int 40)))
-     (set (mem:SI (match_dup 11))
-          (match_operand:SI 1 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 11) (const_int 4)))
-          (match_operand:SI 2 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 11) (const_int 8)))
-          (match_operand:SI 3 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 11) (const_int 12)))
-          (match_operand:SI 4 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 11) (const_int 16)))
-          (match_operand:SI 5 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 11) (const_int 20)))
-          (match_operand:SI 6 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 11) (const_int 24)))
-          (match_operand:SI 7 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 11) (const_int 28)))
-          (match_operand:SI 8 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 11) (const_int 32)))
-          (match_operand:SI 9 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 11) (const_int 36)))
-          (match_operand:SI 10 "nios2_hard_register_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 11"
-   "stwm\\t{%1, %2, %3, %4, %5, %6, %7, %8, %9, %10}, (%11)++, writeback"
-  [(set_attr "type" "stwm")])
-
-(define_insn "*cdx_stwm10_inc"
-  [(match_parallel 0 "stwm_operation"
-    [(set (mem:SI (match_operand:SI 11 "register_operand" "r"))
-          (match_operand:SI 1 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 11) (const_int 4)))
-          (match_operand:SI 2 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 11) (const_int 8)))
-          (match_operand:SI 3 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 11) (const_int 12)))
-          (match_operand:SI 4 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 11) (const_int 16)))
-          (match_operand:SI 5 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 11) (const_int 20)))
-          (match_operand:SI 6 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 11) (const_int 24)))
-          (match_operand:SI 7 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 11) (const_int 28)))
-          (match_operand:SI 8 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 11) (const_int 32)))
-          (match_operand:SI 9 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 11) (const_int 36)))
-          (match_operand:SI 10 "nios2_hard_register_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 10"
-   "stwm\\t{%1, %2, %3, %4, %5, %6, %7, %8, %9, %10}, (%11)++"
-  [(set_attr "type" "stwm")])
-
-(define_insn "*cdx_stwm10_dec_wb"
-  [(match_parallel 0 "stwm_operation"
-    [(set (match_operand:SI 11 "register_operand" "+&r")
-          (plus:SI (match_dup 11) (const_int -40)))
-     (set (mem:SI (plus:SI (match_dup 11) (const_int -4)))
-          (match_operand:SI 1 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 11) (const_int -8)))
-          (match_operand:SI 2 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 11) (const_int -12)))
-          (match_operand:SI 3 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 11) (const_int -16)))
-          (match_operand:SI 4 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 11) (const_int -20)))
-          (match_operand:SI 5 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 11) (const_int -24)))
-          (match_operand:SI 6 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 11) (const_int -28)))
-          (match_operand:SI 7 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 11) (const_int -32)))
-          (match_operand:SI 8 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 11) (const_int -36)))
-          (match_operand:SI 9 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 11) (const_int -40)))
-          (match_operand:SI 10 "nios2_hard_register_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 11"
-   "stwm\\t{%1, %2, %3, %4, %5, %6, %7, %8, %9, %10}, --(%11), writeback"
-  [(set_attr "type" "stwm")])
-
-(define_insn "*cdx_stwm10_dec"
-  [(match_parallel 0 "stwm_operation"
-    [(set (mem:SI (plus:SI (match_operand:SI 11 "register_operand" "r") (const_int -4)))
-          (match_operand:SI 1 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 11) (const_int -8)))
-          (match_operand:SI 2 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 11) (const_int -12)))
-          (match_operand:SI 3 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 11) (const_int -16)))
-          (match_operand:SI 4 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 11) (const_int -20)))
-          (match_operand:SI 5 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 11) (const_int -24)))
-          (match_operand:SI 6 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 11) (const_int -28)))
-          (match_operand:SI 7 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 11) (const_int -32)))
-          (match_operand:SI 8 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 11) (const_int -36)))
-          (match_operand:SI 9 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 11) (const_int -40)))
-          (match_operand:SI 10 "nios2_hard_register_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 10"
-   "stwm\\t{%1, %2, %3, %4, %5, %6, %7, %8, %9, %10}, --(%11)"
-  [(set_attr "type" "stwm")])
-
-(define_insn "*cdx_stwm11_inc_wb"
-  [(match_parallel 0 "stwm_operation"
-    [(set (match_operand:SI 12 "register_operand" "+&r")
-          (plus:SI (match_dup 12) (const_int 44)))
-     (set (mem:SI (match_dup 12))
-          (match_operand:SI 1 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 12) (const_int 4)))
-          (match_operand:SI 2 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 12) (const_int 8)))
-          (match_operand:SI 3 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 12) (const_int 12)))
-          (match_operand:SI 4 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 12) (const_int 16)))
-          (match_operand:SI 5 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 12) (const_int 20)))
-          (match_operand:SI 6 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 12) (const_int 24)))
-          (match_operand:SI 7 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 12) (const_int 28)))
-          (match_operand:SI 8 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 12) (const_int 32)))
-          (match_operand:SI 9 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 12) (const_int 36)))
-          (match_operand:SI 10 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 12) (const_int 40)))
-          (match_operand:SI 11 "nios2_hard_register_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 12"
-   "stwm\\t{%1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11}, (%12)++, writeback"
-  [(set_attr "type" "stwm")])
-
-(define_insn "*cdx_stwm11_inc"
-  [(match_parallel 0 "stwm_operation"
-    [(set (mem:SI (match_operand:SI 12 "register_operand" "r"))
-          (match_operand:SI 1 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 12) (const_int 4)))
-          (match_operand:SI 2 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 12) (const_int 8)))
-          (match_operand:SI 3 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 12) (const_int 12)))
-          (match_operand:SI 4 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 12) (const_int 16)))
-          (match_operand:SI 5 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 12) (const_int 20)))
-          (match_operand:SI 6 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 12) (const_int 24)))
-          (match_operand:SI 7 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 12) (const_int 28)))
-          (match_operand:SI 8 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 12) (const_int 32)))
-          (match_operand:SI 9 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 12) (const_int 36)))
-          (match_operand:SI 10 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 12) (const_int 40)))
-          (match_operand:SI 11 "nios2_hard_register_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 11"
-   "stwm\\t{%1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11}, (%12)++"
-  [(set_attr "type" "stwm")])
-
-(define_insn "*cdx_stwm11_dec_wb"
-  [(match_parallel 0 "stwm_operation"
-    [(set (match_operand:SI 12 "register_operand" "+&r")
-          (plus:SI (match_dup 12) (const_int -44)))
-     (set (mem:SI (plus:SI (match_dup 12) (const_int -4)))
-          (match_operand:SI 1 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 12) (const_int -8)))
-          (match_operand:SI 2 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 12) (const_int -12)))
-          (match_operand:SI 3 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 12) (const_int -16)))
-          (match_operand:SI 4 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 12) (const_int -20)))
-          (match_operand:SI 5 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 12) (const_int -24)))
-          (match_operand:SI 6 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 12) (const_int -28)))
-          (match_operand:SI 7 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 12) (const_int -32)))
-          (match_operand:SI 8 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 12) (const_int -36)))
-          (match_operand:SI 9 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 12) (const_int -40)))
-          (match_operand:SI 10 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 12) (const_int -44)))
-          (match_operand:SI 11 "nios2_hard_register_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 12"
-   "stwm\\t{%1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11}, --(%12), writeback"
-  [(set_attr "type" "stwm")])
-
-(define_insn "*cdx_stwm11_dec"
-  [(match_parallel 0 "stwm_operation"
-    [(set (mem:SI (plus:SI (match_operand:SI 12 "register_operand" "r") (const_int -4)))
-          (match_operand:SI 1 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 12) (const_int -8)))
-          (match_operand:SI 2 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 12) (const_int -12)))
-          (match_operand:SI 3 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 12) (const_int -16)))
-          (match_operand:SI 4 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 12) (const_int -20)))
-          (match_operand:SI 5 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 12) (const_int -24)))
-          (match_operand:SI 6 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 12) (const_int -28)))
-          (match_operand:SI 7 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 12) (const_int -32)))
-          (match_operand:SI 8 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 12) (const_int -36)))
-          (match_operand:SI 9 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 12) (const_int -40)))
-          (match_operand:SI 10 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 12) (const_int -44)))
-          (match_operand:SI 11 "nios2_hard_register_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 11"
-   "stwm\\t{%1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11}, --(%12)"
-  [(set_attr "type" "stwm")])
-
-(define_insn "*cdx_stwm12_inc_wb"
-  [(match_parallel 0 "stwm_operation"
-    [(set (match_operand:SI 13 "register_operand" "+&r")
-          (plus:SI (match_dup 13) (const_int 48)))
-     (set (mem:SI (match_dup 13))
-          (match_operand:SI 1 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 13) (const_int 4)))
-          (match_operand:SI 2 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 13) (const_int 8)))
-          (match_operand:SI 3 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 13) (const_int 12)))
-          (match_operand:SI 4 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 13) (const_int 16)))
-          (match_operand:SI 5 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 13) (const_int 20)))
-          (match_operand:SI 6 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 13) (const_int 24)))
-          (match_operand:SI 7 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 13) (const_int 28)))
-          (match_operand:SI 8 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 13) (const_int 32)))
-          (match_operand:SI 9 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 13) (const_int 36)))
-          (match_operand:SI 10 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 13) (const_int 40)))
-          (match_operand:SI 11 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 13) (const_int 44)))
-          (match_operand:SI 12 "nios2_hard_register_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 13"
-   "stwm\\t{%1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11, %12}, (%13)++, writeback"
-  [(set_attr "type" "stwm")])
-
-(define_insn "*cdx_stwm12_inc"
-  [(match_parallel 0 "stwm_operation"
-    [(set (mem:SI (match_operand:SI 13 "register_operand" "r"))
-          (match_operand:SI 1 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 13) (const_int 4)))
-          (match_operand:SI 2 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 13) (const_int 8)))
-          (match_operand:SI 3 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 13) (const_int 12)))
-          (match_operand:SI 4 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 13) (const_int 16)))
-          (match_operand:SI 5 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 13) (const_int 20)))
-          (match_operand:SI 6 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 13) (const_int 24)))
-          (match_operand:SI 7 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 13) (const_int 28)))
-          (match_operand:SI 8 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 13) (const_int 32)))
-          (match_operand:SI 9 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 13) (const_int 36)))
-          (match_operand:SI 10 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 13) (const_int 40)))
-          (match_operand:SI 11 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 13) (const_int 44)))
-          (match_operand:SI 12 "nios2_hard_register_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 12"
-   "stwm\\t{%1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11, %12}, (%13)++"
-  [(set_attr "type" "stwm")])
-
-(define_insn "*cdx_stwm12_dec_wb"
-  [(match_parallel 0 "stwm_operation"
-    [(set (match_operand:SI 13 "register_operand" "+&r")
-          (plus:SI (match_dup 13) (const_int -48)))
-     (set (mem:SI (plus:SI (match_dup 13) (const_int -4)))
-          (match_operand:SI 1 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 13) (const_int -8)))
-          (match_operand:SI 2 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 13) (const_int -12)))
-          (match_operand:SI 3 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 13) (const_int -16)))
-          (match_operand:SI 4 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 13) (const_int -20)))
-          (match_operand:SI 5 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 13) (const_int -24)))
-          (match_operand:SI 6 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 13) (const_int -28)))
-          (match_operand:SI 7 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 13) (const_int -32)))
-          (match_operand:SI 8 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 13) (const_int -36)))
-          (match_operand:SI 9 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 13) (const_int -40)))
-          (match_operand:SI 10 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 13) (const_int -44)))
-          (match_operand:SI 11 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 13) (const_int -48)))
-          (match_operand:SI 12 "nios2_hard_register_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 13"
-   "stwm\\t{%1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11, %12}, --(%13), writeback"
-  [(set_attr "type" "stwm")])
-
-(define_insn "*cdx_stwm12_dec"
-  [(match_parallel 0 "stwm_operation"
-    [(set (mem:SI (plus:SI (match_operand:SI 13 "register_operand" "r") (const_int -4)))
-          (match_operand:SI 1 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 13) (const_int -8)))
-          (match_operand:SI 2 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 13) (const_int -12)))
-          (match_operand:SI 3 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 13) (const_int -16)))
-          (match_operand:SI 4 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 13) (const_int -20)))
-          (match_operand:SI 5 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 13) (const_int -24)))
-          (match_operand:SI 6 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 13) (const_int -28)))
-          (match_operand:SI 7 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 13) (const_int -32)))
-          (match_operand:SI 8 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 13) (const_int -36)))
-          (match_operand:SI 9 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 13) (const_int -40)))
-          (match_operand:SI 10 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 13) (const_int -44)))
-          (match_operand:SI 11 "nios2_hard_register_operand" ""))
-     (set (mem:SI (plus:SI (match_dup 13) (const_int -48)))
-          (match_operand:SI 12 "nios2_hard_register_operand" ""))])]
-   "TARGET_HAS_CDX && XVECLEN (operands[0], 0) == 12"
-   "stwm\\t{%1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11, %12}, --(%13)"
-  [(set_attr "type" "stwm")])
-
-(define_peephole2
-  [(match_scratch:SI 24 "r")
-   (set (match_operand:SI 0 "register_operand" "")
-        (match_operand:SI 12 "memory_operand" ""))
-   (set (match_operand:SI 1 "register_operand" "")
-        (match_operand:SI 13 "memory_operand" ""))
-   (set (match_operand:SI 2 "register_operand" "")
-        (match_operand:SI 14 "memory_operand" ""))
-   (set (match_operand:SI 3 "register_operand" "")
-        (match_operand:SI 15 "memory_operand" ""))
-   (set (match_operand:SI 4 "register_operand" "")
-        (match_operand:SI 16 "memory_operand" ""))
-   (set (match_operand:SI 5 "register_operand" "")
-        (match_operand:SI 17 "memory_operand" ""))
-   (set (match_operand:SI 6 "register_operand" "")
-        (match_operand:SI 18 "memory_operand" ""))
-   (set (match_operand:SI 7 "register_operand" "")
-        (match_operand:SI 19 "memory_operand" ""))
-   (set (match_operand:SI 8 "register_operand" "")
-        (match_operand:SI 20 "memory_operand" ""))
-   (set (match_operand:SI 9 "register_operand" "")
-        (match_operand:SI 21 "memory_operand" ""))
-   (set (match_operand:SI 10 "register_operand" "")
-        (match_operand:SI 22 "memory_operand" ""))
-   (set (match_operand:SI 11 "register_operand" "")
-        (match_operand:SI 23 "memory_operand" ""))
-   (match_dup 24)]
-  "TARGET_HAS_CDX"
-  [(const_int 0)]
-{
-  if (gen_ldstwm_peep (true, 12, operands[24], operands))
-    DONE;
-  else
-    FAIL;
-})
-
-(define_peephole2
-  [(match_scratch:SI 22 "r")
-   (set (match_operand:SI 0 "register_operand" "")
-        (match_operand:SI 11 "memory_operand" ""))
-   (set (match_operand:SI 1 "register_operand" "")
-        (match_operand:SI 12 "memory_operand" ""))
-   (set (match_operand:SI 2 "register_operand" "")
-        (match_operand:SI 13 "memory_operand" ""))
-   (set (match_operand:SI 3 "register_operand" "")
-        (match_operand:SI 14 "memory_operand" ""))
-   (set (match_operand:SI 4 "register_operand" "")
-        (match_operand:SI 15 "memory_operand" ""))
-   (set (match_operand:SI 5 "register_operand" "")
-        (match_operand:SI 16 "memory_operand" ""))
-   (set (match_operand:SI 6 "register_operand" "")
-        (match_operand:SI 17 "memory_operand" ""))
-   (set (match_operand:SI 7 "register_operand" "")
-        (match_operand:SI 18 "memory_operand" ""))
-   (set (match_operand:SI 8 "register_operand" "")
-        (match_operand:SI 19 "memory_operand" ""))
-   (set (match_operand:SI 9 "register_operand" "")
-        (match_operand:SI 20 "memory_operand" ""))
-   (set (match_operand:SI 10 "register_operand" "")
-        (match_operand:SI 21 "memory_operand" ""))
-   (match_dup 22)]
-  "TARGET_HAS_CDX"
-  [(const_int 0)]
-{
-  if (gen_ldstwm_peep (true, 11, operands[22], operands))
-    DONE;
-  else
-    FAIL;
-})
-
-(define_peephole2
-  [(match_scratch:SI 20 "r")
-   (set (match_operand:SI 0 "register_operand" "")
-        (match_operand:SI 10 "memory_operand" ""))
-   (set (match_operand:SI 1 "register_operand" "")
-        (match_operand:SI 11 "memory_operand" ""))
-   (set (match_operand:SI 2 "register_operand" "")
-        (match_operand:SI 12 "memory_operand" ""))
-   (set (match_operand:SI 3 "register_operand" "")
-        (match_operand:SI 13 "memory_operand" ""))
-   (set (match_operand:SI 4 "register_operand" "")
-        (match_operand:SI 14 "memory_operand" ""))
-   (set (match_operand:SI 5 "register_operand" "")
-        (match_operand:SI 15 "memory_operand" ""))
-   (set (match_operand:SI 6 "register_operand" "")
-        (match_operand:SI 16 "memory_operand" ""))
-   (set (match_operand:SI 7 "register_operand" "")
-        (match_operand:SI 17 "memory_operand" ""))
-   (set (match_operand:SI 8 "register_operand" "")
-        (match_operand:SI 18 "memory_operand" ""))
-   (set (match_operand:SI 9 "register_operand" "")
-        (match_operand:SI 19 "memory_operand" ""))
-   (match_dup 20)]
-  "TARGET_HAS_CDX"
-  [(const_int 0)]
-{
-  if (gen_ldstwm_peep (true, 10, operands[20], operands))
-    DONE;
-  else
-    FAIL;
-})
-
-(define_peephole2
-  [(match_scratch:SI 18 "r")
-   (set (match_operand:SI 0 "register_operand" "")
-        (match_operand:SI 9 "memory_operand" ""))
-   (set (match_operand:SI 1 "register_operand" "")
-        (match_operand:SI 10 "memory_operand" ""))
-   (set (match_operand:SI 2 "register_operand" "")
-        (match_operand:SI 11 "memory_operand" ""))
-   (set (match_operand:SI 3 "register_operand" "")
-        (match_operand:SI 12 "memory_operand" ""))
-   (set (match_operand:SI 4 "register_operand" "")
-        (match_operand:SI 13 "memory_operand" ""))
-   (set (match_operand:SI 5 "register_operand" "")
-        (match_operand:SI 14 "memory_operand" ""))
-   (set (match_operand:SI 6 "register_operand" "")
-        (match_operand:SI 15 "memory_operand" ""))
-   (set (match_operand:SI 7 "register_operand" "")
-        (match_operand:SI 16 "memory_operand" ""))
-   (set (match_operand:SI 8 "register_operand" "")
-        (match_operand:SI 17 "memory_operand" ""))
-   (match_dup 18)]
-  "TARGET_HAS_CDX"
-  [(const_int 0)]
-{
-  if (gen_ldstwm_peep (true, 9, operands[18], operands))
-    DONE;
-  else
-    FAIL;
-})
-
-(define_peephole2
-  [(match_scratch:SI 16 "r")
-   (set (match_operand:SI 0 "register_operand" "")
-        (match_operand:SI 8 "memory_operand" ""))
-   (set (match_operand:SI 1 "register_operand" "")
-        (match_operand:SI 9 "memory_operand" ""))
-   (set (match_operand:SI 2 "register_operand" "")
-        (match_operand:SI 10 "memory_operand" ""))
-   (set (match_operand:SI 3 "register_operand" "")
-        (match_operand:SI 11 "memory_operand" ""))
-   (set (match_operand:SI 4 "register_operand" "")
-        (match_operand:SI 12 "memory_operand" ""))
-   (set (match_operand:SI 5 "register_operand" "")
-        (match_operand:SI 13 "memory_operand" ""))
-   (set (match_operand:SI 6 "register_operand" "")
-        (match_operand:SI 14 "memory_operand" ""))
-   (set (match_operand:SI 7 "register_operand" "")
-        (match_operand:SI 15 "memory_operand" ""))
-   (match_dup 16)]
-  "TARGET_HAS_CDX"
-  [(const_int 0)]
-{
-  if (gen_ldstwm_peep (true, 8, operands[16], operands))
-    DONE;
-  else
-    FAIL;
-})
-
-(define_peephole2
-  [(match_scratch:SI 14 "r")
-   (set (match_operand:SI 0 "register_operand" "")
-        (match_operand:SI 7 "memory_operand" ""))
-   (set (match_operand:SI 1 "register_operand" "")
-        (match_operand:SI 8 "memory_operand" ""))
-   (set (match_operand:SI 2 "register_operand" "")
-        (match_operand:SI 9 "memory_operand" ""))
-   (set (match_operand:SI 3 "register_operand" "")
-        (match_operand:SI 10 "memory_operand" ""))
-   (set (match_operand:SI 4 "register_operand" "")
-        (match_operand:SI 11 "memory_operand" ""))
-   (set (match_operand:SI 5 "register_operand" "")
-        (match_operand:SI 12 "memory_operand" ""))
-   (set (match_operand:SI 6 "register_operand" "")
-        (match_operand:SI 13 "memory_operand" ""))
-   (match_dup 14)]
-  "TARGET_HAS_CDX"
-  [(const_int 0)]
-{
-  if (gen_ldstwm_peep (true, 7, operands[14], operands))
-    DONE;
-  else
-    FAIL;
-})
-
-(define_peephole2
-  [(match_scratch:SI 12 "r")
-   (set (match_operand:SI 0 "register_operand" "")
-        (match_operand:SI 6 "memory_operand" ""))
-   (set (match_operand:SI 1 "register_operand" "")
-        (match_operand:SI 7 "memory_operand" ""))
-   (set (match_operand:SI 2 "register_operand" "")
-        (match_operand:SI 8 "memory_operand" ""))
-   (set (match_operand:SI 3 "register_operand" "")
-        (match_operand:SI 9 "memory_operand" ""))
-   (set (match_operand:SI 4 "register_operand" "")
-        (match_operand:SI 10 "memory_operand" ""))
-   (set (match_operand:SI 5 "register_operand" "")
-        (match_operand:SI 11 "memory_operand" ""))
-   (match_dup 12)]
-  "TARGET_HAS_CDX"
-  [(const_int 0)]
-{
-  if (gen_ldstwm_peep (true, 6, operands[12], operands))
-    DONE;
-  else
-    FAIL;
-})
-
-(define_peephole2
-  [(match_scratch:SI 10 "r")
-   (set (match_operand:SI 0 "register_operand" "")
-        (match_operand:SI 5 "memory_operand" ""))
-   (set (match_operand:SI 1 "register_operand" "")
-        (match_operand:SI 6 "memory_operand" ""))
-   (set (match_operand:SI 2 "register_operand" "")
-        (match_operand:SI 7 "memory_operand" ""))
-   (set (match_operand:SI 3 "register_operand" "")
-        (match_operand:SI 8 "memory_operand" ""))
-   (set (match_operand:SI 4 "register_operand" "")
-        (match_operand:SI 9 "memory_operand" ""))
-   (match_dup 10)]
-  "TARGET_HAS_CDX"
-  [(const_int 0)]
-{
-  if (gen_ldstwm_peep (true, 5, operands[10], operands))
-    DONE;
-  else
-    FAIL;
-})
-
-(define_peephole2
-  [(match_scratch:SI 8 "r")
-   (set (match_operand:SI 0 "register_operand" "")
-        (match_operand:SI 4 "memory_operand" ""))
-   (set (match_operand:SI 1 "register_operand" "")
-        (match_operand:SI 5 "memory_operand" ""))
-   (set (match_operand:SI 2 "register_operand" "")
-        (match_operand:SI 6 "memory_operand" ""))
-   (set (match_operand:SI 3 "register_operand" "")
-        (match_operand:SI 7 "memory_operand" ""))
-   (match_dup 8)]
-  "TARGET_HAS_CDX"
-  [(const_int 0)]
-{
-  if (gen_ldstwm_peep (true, 4, operands[8], operands))
-    DONE;
-  else
-    FAIL;
-})
-
-(define_peephole2
-  [(match_scratch:SI 6 "r")
-   (set (match_operand:SI 0 "register_operand" "")
-        (match_operand:SI 3 "memory_operand" ""))
-   (set (match_operand:SI 1 "register_operand" "")
-        (match_operand:SI 4 "memory_operand" ""))
-   (set (match_operand:SI 2 "register_operand" "")
-        (match_operand:SI 5 "memory_operand" ""))
-   (match_dup 6)]
-  "TARGET_HAS_CDX"
-  [(const_int 0)]
-{
-  if (gen_ldstwm_peep (true, 3, operands[6], operands))
-    DONE;
-  else
-    FAIL;
-})
-
-(define_peephole2
-  [(match_scratch:SI 4 "r")
-   (set (match_operand:SI 0 "register_operand" "")
-        (match_operand:SI 2 "memory_operand" ""))
-   (set (match_operand:SI 1 "register_operand" "")
-        (match_operand:SI 3 "memory_operand" ""))
-   (match_dup 4)]
-  "TARGET_HAS_CDX"
-  [(const_int 0)]
-{
-  if (gen_ldstwm_peep (true, 2, operands[4], operands))
-    DONE;
-  else
-    FAIL;
-})
-
-(define_peephole2
-  [(match_scratch:SI 24 "r")
-   (set (match_operand:SI 12 "memory_operand" "")
-        (match_operand:SI 0 "register_operand" ""))
-   (set (match_operand:SI 13 "memory_operand" "")
-        (match_operand:SI 1 "register_operand" ""))
-   (set (match_operand:SI 14 "memory_operand" "")
-        (match_operand:SI 2 "register_operand" ""))
-   (set (match_operand:SI 15 "memory_operand" "")
-        (match_operand:SI 3 "register_operand" ""))
-   (set (match_operand:SI 16 "memory_operand" "")
-        (match_operand:SI 4 "register_operand" ""))
-   (set (match_operand:SI 17 "memory_operand" "")
-        (match_operand:SI 5 "register_operand" ""))
-   (set (match_operand:SI 18 "memory_operand" "")
-        (match_operand:SI 6 "register_operand" ""))
-   (set (match_operand:SI 19 "memory_operand" "")
-        (match_operand:SI 7 "register_operand" ""))
-   (set (match_operand:SI 20 "memory_operand" "")
-        (match_operand:SI 8 "register_operand" ""))
-   (set (match_operand:SI 21 "memory_operand" "")
-        (match_operand:SI 9 "register_operand" ""))
-   (set (match_operand:SI 22 "memory_operand" "")
-        (match_operand:SI 10 "register_operand" ""))
-   (set (match_operand:SI 23 "memory_operand" "")
-        (match_operand:SI 11 "register_operand" ""))
-   (match_dup 24)]
-  "TARGET_HAS_CDX"
-  [(const_int 0)]
-{
-  if (gen_ldstwm_peep (false, 12, operands[24], operands))
-    DONE;
-  else
-    FAIL;
-})
-
-(define_peephole2
-  [(match_scratch:SI 22 "r")
-   (set (match_operand:SI 11 "memory_operand" "")
-        (match_operand:SI 0 "register_operand" ""))
-   (set (match_operand:SI 12 "memory_operand" "")
-        (match_operand:SI 1 "register_operand" ""))
-   (set (match_operand:SI 13 "memory_operand" "")
-        (match_operand:SI 2 "register_operand" ""))
-   (set (match_operand:SI 14 "memory_operand" "")
-        (match_operand:SI 3 "register_operand" ""))
-   (set (match_operand:SI 15 "memory_operand" "")
-        (match_operand:SI 4 "register_operand" ""))
-   (set (match_operand:SI 16 "memory_operand" "")
-        (match_operand:SI 5 "register_operand" ""))
-   (set (match_operand:SI 17 "memory_operand" "")
-        (match_operand:SI 6 "register_operand" ""))
-   (set (match_operand:SI 18 "memory_operand" "")
-        (match_operand:SI 7 "register_operand" ""))
-   (set (match_operand:SI 19 "memory_operand" "")
-        (match_operand:SI 8 "register_operand" ""))
-   (set (match_operand:SI 20 "memory_operand" "")
-        (match_operand:SI 9 "register_operand" ""))
-   (set (match_operand:SI 21 "memory_operand" "")
-        (match_operand:SI 10 "register_operand" ""))
-   (match_dup 22)]
-  "TARGET_HAS_CDX"
-  [(const_int 0)]
-{
-  if (gen_ldstwm_peep (false, 11, operands[22], operands))
-    DONE;
-  else
-    FAIL;
-})
-
-(define_peephole2
-  [(match_scratch:SI 20 "r")
-   (set (match_operand:SI 10 "memory_operand" "")
-        (match_operand:SI 0 "register_operand" ""))
-   (set (match_operand:SI 11 "memory_operand" "")
-        (match_operand:SI 1 "register_operand" ""))
-   (set (match_operand:SI 12 "memory_operand" "")
-        (match_operand:SI 2 "register_operand" ""))
-   (set (match_operand:SI 13 "memory_operand" "")
-        (match_operand:SI 3 "register_operand" ""))
-   (set (match_operand:SI 14 "memory_operand" "")
-        (match_operand:SI 4 "register_operand" ""))
-   (set (match_operand:SI 15 "memory_operand" "")
-        (match_operand:SI 5 "register_operand" ""))
-   (set (match_operand:SI 16 "memory_operand" "")
-        (match_operand:SI 6 "register_operand" ""))
-   (set (match_operand:SI 17 "memory_operand" "")
-        (match_operand:SI 7 "register_operand" ""))
-   (set (match_operand:SI 18 "memory_operand" "")
-        (match_operand:SI 8 "register_operand" ""))
-   (set (match_operand:SI 19 "memory_operand" "")
-        (match_operand:SI 9 "register_operand" ""))
-   (match_dup 20)]
-  "TARGET_HAS_CDX"
-  [(const_int 0)]
-{
-  if (gen_ldstwm_peep (false, 10, operands[20], operands))
-    DONE;
-  else
-    FAIL;
-})
-
-(define_peephole2
-  [(match_scratch:SI 18 "r")
-   (set (match_operand:SI 9 "memory_operand" "")
-        (match_operand:SI 0 "register_operand" ""))
-   (set (match_operand:SI 10 "memory_operand" "")
-        (match_operand:SI 1 "register_operand" ""))
-   (set (match_operand:SI 11 "memory_operand" "")
-        (match_operand:SI 2 "register_operand" ""))
-   (set (match_operand:SI 12 "memory_operand" "")
-        (match_operand:SI 3 "register_operand" ""))
-   (set (match_operand:SI 13 "memory_operand" "")
-        (match_operand:SI 4 "register_operand" ""))
-   (set (match_operand:SI 14 "memory_operand" "")
-        (match_operand:SI 5 "register_operand" ""))
-   (set (match_operand:SI 15 "memory_operand" "")
-        (match_operand:SI 6 "register_operand" ""))
-   (set (match_operand:SI 16 "memory_operand" "")
-        (match_operand:SI 7 "register_operand" ""))
-   (set (match_operand:SI 17 "memory_operand" "")
-        (match_operand:SI 8 "register_operand" ""))
-   (match_dup 18)]
-  "TARGET_HAS_CDX"
-  [(const_int 0)]
-{
-  if (gen_ldstwm_peep (false, 9, operands[18], operands))
-    DONE;
-  else
-    FAIL;
-})
-
-(define_peephole2
-  [(match_scratch:SI 16 "r")
-   (set (match_operand:SI 8 "memory_operand" "")
-        (match_operand:SI 0 "register_operand" ""))
-   (set (match_operand:SI 9 "memory_operand" "")
-        (match_operand:SI 1 "register_operand" ""))
-   (set (match_operand:SI 10 "memory_operand" "")
-        (match_operand:SI 2 "register_operand" ""))
-   (set (match_operand:SI 11 "memory_operand" "")
-        (match_operand:SI 3 "register_operand" ""))
-   (set (match_operand:SI 12 "memory_operand" "")
-        (match_operand:SI 4 "register_operand" ""))
-   (set (match_operand:SI 13 "memory_operand" "")
-        (match_operand:SI 5 "register_operand" ""))
-   (set (match_operand:SI 14 "memory_operand" "")
-        (match_operand:SI 6 "register_operand" ""))
-   (set (match_operand:SI 15 "memory_operand" "")
-        (match_operand:SI 7 "register_operand" ""))
-   (match_dup 16)]
-  "TARGET_HAS_CDX"
-  [(const_int 0)]
-{
-  if (gen_ldstwm_peep (false, 8, operands[16], operands))
-    DONE;
-  else
-    FAIL;
-})
-
-(define_peephole2
-  [(match_scratch:SI 14 "r")
-   (set (match_operand:SI 7 "memory_operand" "")
-        (match_operand:SI 0 "register_operand" ""))
-   (set (match_operand:SI 8 "memory_operand" "")
-        (match_operand:SI 1 "register_operand" ""))
-   (set (match_operand:SI 9 "memory_operand" "")
-        (match_operand:SI 2 "register_operand" ""))
-   (set (match_operand:SI 10 "memory_operand" "")
-        (match_operand:SI 3 "register_operand" ""))
-   (set (match_operand:SI 11 "memory_operand" "")
-        (match_operand:SI 4 "register_operand" ""))
-   (set (match_operand:SI 12 "memory_operand" "")
-        (match_operand:SI 5 "register_operand" ""))
-   (set (match_operand:SI 13 "memory_operand" "")
-        (match_operand:SI 6 "register_operand" ""))
-   (match_dup 14)]
-  "TARGET_HAS_CDX"
-  [(const_int 0)]
-{
-  if (gen_ldstwm_peep (false, 7, operands[14], operands))
-    DONE;
-  else
-    FAIL;
-})
-
-(define_peephole2
-  [(match_scratch:SI 12 "r")
-   (set (match_operand:SI 6 "memory_operand" "")
-        (match_operand:SI 0 "register_operand" ""))
-   (set (match_operand:SI 7 "memory_operand" "")
-        (match_operand:SI 1 "register_operand" ""))
-   (set (match_operand:SI 8 "memory_operand" "")
-        (match_operand:SI 2 "register_operand" ""))
-   (set (match_operand:SI 9 "memory_operand" "")
-        (match_operand:SI 3 "register_operand" ""))
-   (set (match_operand:SI 10 "memory_operand" "")
-        (match_operand:SI 4 "register_operand" ""))
-   (set (match_operand:SI 11 "memory_operand" "")
-        (match_operand:SI 5 "register_operand" ""))
-   (match_dup 12)]
-  "TARGET_HAS_CDX"
-  [(const_int 0)]
-{
-  if (gen_ldstwm_peep (false, 6, operands[12], operands))
-    DONE;
-  else
-    FAIL;
-})
-
-(define_peephole2
-  [(match_scratch:SI 10 "r")
-   (set (match_operand:SI 5 "memory_operand" "")
-        (match_operand:SI 0 "register_operand" ""))
-   (set (match_operand:SI 6 "memory_operand" "")
-        (match_operand:SI 1 "register_operand" ""))
-   (set (match_operand:SI 7 "memory_operand" "")
-        (match_operand:SI 2 "register_operand" ""))
-   (set (match_operand:SI 8 "memory_operand" "")
-        (match_operand:SI 3 "register_operand" ""))
-   (set (match_operand:SI 9 "memory_operand" "")
-        (match_operand:SI 4 "register_operand" ""))
-   (match_dup 10)]
-  "TARGET_HAS_CDX"
-  [(const_int 0)]
-{
-  if (gen_ldstwm_peep (false, 5, operands[10], operands))
-    DONE;
-  else
-    FAIL;
-})
-
-(define_peephole2
-  [(match_scratch:SI 8 "r")
-   (set (match_operand:SI 4 "memory_operand" "")
-        (match_operand:SI 0 "register_operand" ""))
-   (set (match_operand:SI 5 "memory_operand" "")
-        (match_operand:SI 1 "register_operand" ""))
-   (set (match_operand:SI 6 "memory_operand" "")
-        (match_operand:SI 2 "register_operand" ""))
-   (set (match_operand:SI 7 "memory_operand" "")
-        (match_operand:SI 3 "register_operand" ""))
-   (match_dup 8)]
-  "TARGET_HAS_CDX"
-  [(const_int 0)]
-{
-  if (gen_ldstwm_peep (false, 4, operands[8], operands))
-    DONE;
-  else
-    FAIL;
-})
-
-(define_peephole2
-  [(match_scratch:SI 6 "r")
-   (set (match_operand:SI 3 "memory_operand" "")
-        (match_operand:SI 0 "register_operand" ""))
-   (set (match_operand:SI 4 "memory_operand" "")
-        (match_operand:SI 1 "register_operand" ""))
-   (set (match_operand:SI 5 "memory_operand" "")
-        (match_operand:SI 2 "register_operand" ""))
-   (match_dup 6)]
-  "TARGET_HAS_CDX"
-  [(const_int 0)]
-{
-  if (gen_ldstwm_peep (false, 3, operands[6], operands))
-    DONE;
-  else
-    FAIL;
-})
-
-(define_peephole2
-  [(match_scratch:SI 4 "r")
-   (set (match_operand:SI 2 "memory_operand" "")
-        (match_operand:SI 0 "register_operand" ""))
-   (set (match_operand:SI 3 "memory_operand" "")
-        (match_operand:SI 1 "register_operand" ""))
-   (match_dup 4)]
-  "TARGET_HAS_CDX"
-  [(const_int 0)]
-{
-  if (gen_ldstwm_peep (false, 2, operands[4], operands))
-    DONE;
-  else
-    FAIL;
-})
-
diff --git a/gcc/config/nios2/linux.h b/gcc/config/nios2/linux.h
deleted file mode 100644
index 06c442e..0000000
--- a/gcc/config/nios2/linux.h
+++ /dev/null
@@ -1,53 +0,0 @@
-/* Definitions of target support for Altera Nios II systems
-   running GNU/Linux with ELF format.
-   Copyright (C) 2012-2024 Free Software Foundation, Inc.
-   Contributed by Mentor Graphics, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with GCC; see the file COPYING3.  If not see
-   <http://www.gnu.org/licenses/>.  */
-
-#define TARGET_OS_CPP_BUILTINS()                \
-  do                                            \
-    {                                           \
-      GNU_USER_TARGET_OS_CPP_BUILTINS();           \
-    }                                           \
-  while (0)
-
-#undef CPP_SPEC
-#define CPP_SPEC "%{posix:-D_POSIX_SOURCE} %{pthread:-D_REENTRANT}"
-
-#define GLIBC_DYNAMIC_LINKER "/lib/ld-linux-nios2.so.1"
-
-#undef MUSL_DYNAMIC_LINKER
-#define MUSL_DYNAMIC_LINKER  "/lib/ld-musl-nios2.so.1"
-
-#undef LINK_SPEC
-#define LINK_SPEC LINK_SPEC_ENDIAN \
- "%{shared:-shared} \
-  %{!shared: \
-    %{!static: \
-      %{rdynamic:-export-dynamic} \
-      -dynamic-linker " GNU_USER_DYNAMIC_LINKER "} \
-    %{static:-static}}"
-
-/* This toolchain implements the ABI for Linux Systems documented in the
-   Nios II Processor Reference Handbook.  */
-#define TARGET_LINUX_ABI 1
-
-/* For Linux, we have access to kernel support for atomic operations,
-   add initialization for __sync_* builtins.  */
-#undef TARGET_INIT_LIBFUNCS
-#define TARGET_INIT_LIBFUNCS nios2_init_libfuncs
diff --git a/gcc/config/nios2/nios2-ldstwm.sml b/gcc/config/nios2/nios2-ldstwm.sml
deleted file mode 100644
index fd79967..0000000
--- a/gcc/config/nios2/nios2-ldstwm.sml
+++ /dev/null
@@ -1,277 +0,0 @@
-(* Auto-generate Nios II R2 CDX ldwm/stwm/push.n/pop.n patterns
-   Copyright (C) 2014-2024 Free Software Foundation, Inc.
-   Contributed by Mentor Graphics.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it under
-   the terms of the GNU General Public License as published by the Free
-   Software Foundation; either version 3, or (at your option) any later
-   version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT ANY
-   WARRANTY; without even the implied warranty of MERCHANTABILITY or
-   FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-   for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with GCC; see the file COPYING3.  If not see
-   <http://www.gnu.org/licenses/>.
-
-   This is a Standard ML program.  There are multiple Standard ML
-   implementations widely available.  We recommend the MLton optimizing
-   SML compiler, due to its ease of creating a standalone executable.
-
-     http://www.mlton.org/
-
-   Or from your favourite OS's friendly packaging system. Tested with
-   MLton Release 20130715, though other versions will probably work too.
-
-   Run with:
-     mlton -output a.out /path/to/gcc/config/nios2/nios2-ldstwm.sml
-     ./a.out >/path/to/gcc/config/nios2/ldstwm.md
-*)
-
-datatype ld_st = ld | st;    
-datatype push_pop = push | pop;
-datatype inc_dec = inc | dec;
-
-fun for ls f = map f ls;
-fun conds cond str = if cond then str else "";
-fun ints n = if n>=0 then (Int.toString n) else ("-" ^ (Int.toString (~n)));
-
-fun pushpop_pattern pptype n fp =
-    let 
-	val sp_reg = "(reg:SI SP_REGNO)";
-	val ra_reg = "(reg:SI RA_REGNO)";
-	val fp_reg = "(reg:SI FP_REGNO)";
-
-	fun sets lhs rhs = "(set " ^ lhs ^
-			   (if pptype=push then " "
-			    else " ") ^ rhs ^ ")";
-	val sp_adj =
-	    "(set " ^ sp_reg ^ "\n          " ^
-	    "(plus:SI " ^ sp_reg ^
-	    " (match_operand 1 \"const_int_operand\" \"\")))";
-
-	fun reg i regi = "(reg:SI " ^ (ints regi) ^ ")";
-	fun mem i opndi =
-	    if pptype=push then
-		"(mem:SI (plus:SI (reg:SI SP_REGNO) (const_int " ^ (ints (~4*i)) ^ ")))"
-	    else
-		"(match_operand:SI " ^
-		(ints opndi) ^ " \"stack_memory_operand\" \"\")";
-
-	val start = 1 + (if fp then 2 else 1);
-	val lim = n + (if fp then 2 else 1);
-	fun set_elt i regi opndi =
-	    if pptype=push then (sets (mem i opndi) (reg i regi))
-	    else (sets (reg i regi) (mem i opndi));
-	fun get_elt_list (i, regi, opndi) =
-	    if i > lim then []
-	    else (set_elt i regi opndi) :: get_elt_list (i+1, regi-1, opndi+1);
-
-	val set_elements = get_elt_list (start, 16+n-1, start+1);
-
-	val ra_set = if pptype=push then sets (mem 1 2) ra_reg
-		     else sets ra_reg (mem 1 2);
-	val fp_set = (conds fp (if pptype=push then sets (mem 2 3) fp_reg
-				else sets fp_reg (mem 2 3)));
-	val ret = (conds (pptype=pop) "(return)");
-	val element_list =
-	    List.filter (fn x => x<>"")
-			([ret, sp_adj, ra_set, fp_set] @ set_elements);
-
-	fun reg_index i = 16 + n - i;
-	fun pop_opnds 0 spl = (conds fp ("fp" ^ spl)) ^ "ra"
-	  | pop_opnds n spl = "r" ^ (ints (reg_index n)) ^ spl ^ (pop_opnds (n-1) spl);
-	fun push_opnds 0 spl = "ra" ^ (conds fp (spl ^ "fp"))
-	  | push_opnds n spl = (push_opnds (n-1) spl) ^ spl ^ "r" ^ (ints (reg_index n));
-
-	val spadj_opnd = if pptype=push then 2 else (start+n);
-	val spadj = ints spadj_opnd;
-	val regsave_num = n + (if fp then 2 else 1);
-
-	val ppname = if pptype=push then "push" else "pop";
-	val name = if pptype=push then "push" ^ "_" ^ (push_opnds n "_")
-		   else "pop" ^ "_" ^ (pop_opnds n "_");
-    in
-	"(define_insn \"*cdx_" ^ name ^ "\"\n" ^
-	"  [(match_parallel 0 \"" ^
-	(conds (pptype=pop) "pop_operation") ^ "\"\n" ^
-	"    [" ^ (String.concatWith ("\n     ") element_list) ^ "])]\n" ^
-	"   \"TARGET_HAS_CDX && XVECLEN (operands[0], 0) == " ^
-	(ints (length element_list)) ^
-	(conds (pptype=push)
-	       ("\n    && (-INTVAL (operands[1]) & 3) == 0\n" ^
-		"    && (-INTVAL (operands[1]) - " ^
-		(ints (4*regsave_num)) ^ ") <= 60")) ^
-	"\"\n" ^
-	(if pptype=pop then
-	     "{\n" ^
-	     "  rtx x = XEXP (operands[" ^ spadj ^ "], 0);\n" ^
-	     "  operands[" ^ spadj ^ "] = REG_P (x) ? const0_rtx : XEXP (x, 1);\n" ^
-	     "  return \"pop.n\\\\t{" ^ (pop_opnds n ", ") ^ "}, %" ^ spadj ^ "\";\n" ^
-	     "}\n"
-	 else
-	     "{\n" ^
-	     "  operands[" ^ spadj ^ "] = " ^
-	     "GEN_INT (-INTVAL (operands[1]) - " ^ (ints (4*regsave_num)) ^ ");\n" ^
-	     "  return \"push.n\\\\t{" ^ (push_opnds n ", ") ^ "}, %" ^ spadj ^ "\";\n" ^
-	     "}\n") ^
-	"  [(set_attr \"type\" \"" ^ ppname ^ "\")])\n\n"
-    end;
-
-fun ldstwm_pattern ldst n id wb pc =
-    let
-	val ldstwm = (if ldst=ld then "ldwm" else "stwm");
-	val name = "*cdx_" ^ ldstwm ^ (Int.toString n) ^
-		   (if id=inc then "_inc" else "_dec") ^
-		   (conds wb "_wb") ^ (conds pc "_ret");
-	val base_reg_referenced_p = ref false;
-	val base_regno = ints (n+1);
-	fun plus_addr base offset =
-	    "(plus:SI " ^ base ^ " (const_int " ^ (ints offset) ^ "))";
-	fun base_reg () =
-	    if !base_reg_referenced_p then
-		"(match_dup " ^ base_regno ^ ")"
-	    else (base_reg_referenced_p := true;
-		  "(match_operand:SI " ^ base_regno ^
-		  " \"register_operand\" \"" ^ (conds wb "+&") ^ "r\")");
-	fun reg i = "(match_operand:SI " ^ (ints i) ^
-		    " \"nios2_hard_register_operand\" \"" ^
-		    (conds (ldst=ld) "") ^ "\")";
-
-	fun addr 1 = if id=inc then base_reg ()
-		     else plus_addr (base_reg ()) (~4)
-	  | addr i = let val offset = if id=inc then (i-1)*4 else (~i*4)
-		     in plus_addr (base_reg ()) offset end;
-
-	fun mem i = "(mem:SI " ^ (addr i) ^ ")";
-	fun lhs i = if ldst=ld then reg i else mem i;
-	fun rhs i = if ldst=st then reg i else mem i;
-	fun sets lhs rhs = "(set " ^ lhs ^ "\n          " ^ rhs ^ ")";
-	fun set_elements i =
-	    if i > n then []
-	    else (sets (lhs i) (rhs i)) :: (set_elements (i+1));
-
-	fun opnds 1 = "%1"
-	  | opnds n = opnds(n-1) ^ ", %" ^ (Int.toString n);
-
-	val asm_template = ldstwm ^ "\\\\t{" ^ (opnds n) ^ "}" ^
-			   (if id=inc
-			    then ", (%" ^ base_regno ^ ")++"
-			    else ", --(%" ^ base_regno ^ ")") ^
-			   (conds wb ", writeback") ^
-			   (conds pc ", ret");
-	val wbtmp =
-	    if wb then
-		(sets (base_reg ())
-		      (plus_addr (base_reg ())
-				 ((if id=inc then n else ~n)*4)))
-	    else "";
-	val pctmp = conds pc "(return)";
-	val set_list = List.filter (fn x => x<>"")
-				   ([pctmp, wbtmp] @ (set_elements 1));
-    in
-	if ldst=st andalso pc then ""
-	else
-	    "(define_insn \"" ^ name ^ "\"\n" ^
-	    "  [(match_parallel 0 \"" ^ ldstwm ^  "_operation\"\n" ^
-	    "    [" ^ (String.concatWith ("\n     ") set_list) ^ "])]\n" ^
-	    "   \"TARGET_HAS_CDX && XVECLEN (operands[0], 0) == " ^
-	    (ints (length set_list)) ^ "\"\n" ^
-	    "   \"" ^ asm_template ^ "\"\n" ^
-	    "  [(set_attr \"type\" \"" ^ ldstwm ^ "\")])\n\n"
-    end;
-
-fun peephole_pattern ldst n scratch_p =
-    let
-	fun sets lhs rhs = "(set " ^ lhs ^ "\n        " ^ rhs ^ ")";
-	fun single_set i indent =
-	    let val reg = "(match_operand:SI " ^ (ints i) ^
-			  " \"register_operand\" \"\")";
-		val mem = "(match_operand:SI " ^ (ints (i+n)) ^
-			  " \"memory_operand\" \"\")";
-	    in
-		if ldst=ld then sets reg mem
-		else sets mem reg
-	    end;
-
-	fun single_sets i =
-	    if i=n then []
-	    else (single_set i "   ") :: (single_sets (i+1));
-
-	val scratch = ints (2*n);
-	val peephole_elements =
-	    let val tmp = single_sets 0 in
-		if scratch_p
-		then (["(match_scratch:SI " ^ scratch ^ " \"r\")"] @
-		      tmp @
-		      ["(match_dup " ^ scratch ^ ")"])
-		else tmp
-	    end;
-    in
-	"(define_peephole2\n" ^
-	"  [" ^ (String.concatWith ("\n   ") peephole_elements) ^ "]\n" ^
-	"  \"TARGET_HAS_CDX\"\n" ^
-	"  [(const_int 0)]\n" ^
-	"{\n" ^
-	"  if (gen_ldstwm_peep (" ^
-	(if ldst=st then "false" else "true") ^ ", " ^ (ints n) ^ ", " ^ 
-	(if scratch_p then ("operands[" ^ scratch ^ "]") else "NULL_RTX") ^
-	", operands))\n" ^
-	"    DONE;\n" ^
-	"  else\n" ^
-	"    FAIL;\n" ^
-	"})\n\n"
-    end;
-
-
-print
-("/* Nios II R2 CDX ldwm/stwm/push.h/pop.n instruction patterns.\n" ^
- "   This file was automatically generated using nios2-ldstwm.sml.\n" ^
- "   Please do not edit manually.\n" ^
- "\n" ^
- "   Copyright (C) 2014-2024 Free Software Foundation, Inc.\n" ^
- "   Contributed by Mentor Graphics.\n" ^
- "\n" ^
- "   This file is part of GCC.\n" ^
- "\n" ^
- "   GCC is free software; you can redistribute it and/or modify it\n" ^
- "   under the terms of the GNU General Public License as published\n" ^
- "   by the Free Software Foundation; either version 3, or (at your\n" ^
- "   option) any later version.\n" ^
- "\n" ^
- "   GCC is distributed in the hope that it will be useful, but WITHOUT\n" ^
- "   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY\n" ^
- "   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public\n" ^
- "   License for more details.\n" ^
- "\n" ^
- "   You should have received a copy of the GNU General Public License and\n" ^
- "   a copy of the GCC Runtime Library Exception along with this program;\n" ^
- "   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see\n" ^
- "   <http://www.gnu.org/licenses/>.  */\n\n");
-
-fun seq a b = if a=b then [b]
-	      else a :: (seq (if a<b then a+1 else a-1) b);
-
-(* push/pop patterns *)
-for (seq 0 8) (fn n =>
-  for [push, pop] (fn p =>
-    for [true, false] (fn fp =>
-       print (pushpop_pattern p n fp))));
-
-(* ldwm/stwm patterns *)
-for [ld, st] (fn l =>
-  for (seq 1 12) (fn n =>
-    for [inc, dec] (fn id =>
-      for [true, false] (fn wb =>
-        for [true, false] (fn pc =>
-          print (ldstwm_pattern l n id wb pc))))));
-
-(* peephole patterns *)
-for [ld, st] (fn l =>
-  for (seq 12 2) (fn n =>
-    print (peephole_pattern l n true)));
-
diff --git a/gcc/config/nios2/nios2-opts.h b/gcc/config/nios2/nios2-opts.h
deleted file mode 100644
index 9ab7977..0000000
--- a/gcc/config/nios2/nios2-opts.h
+++ /dev/null
@@ -1,88 +0,0 @@
-/* Definitions for option handling for Nios II.
-   Copyright (C) 2013-2024 Free Software Foundation, Inc.
-
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 3, or (at your option)
-any later version.
-
-GCC is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-You should have received a copy of the GNU General Public License
-along with GCC; see the file COPYING3.  If not see
-<http://www.gnu.org/licenses/>.  */
-
-#ifndef NIOS2_OPTS_H
-#define NIOS2_OPTS_H
-
-/* Enumerate the possible -mgpopt choices.  */
-enum nios2_gpopt_type
-{
-  gpopt_unspecified = -1,
-  gpopt_none,
-  gpopt_local,
-  gpopt_global,
-  gpopt_data,
-  gpopt_all
-};
-
-
-/* Enumeration of all FPU insn codes.  */
-#define N2FPU_ALL_CODES							\
-  N2FPU_CODE(fadds) N2FPU_CODE(fsubs) N2FPU_CODE(fmuls) N2FPU_CODE(fdivs) \
-  N2FPU_CODE(fmins) N2FPU_CODE(fmaxs)					\
-  N2FPU_CODE(fnegs) N2FPU_CODE(fabss) N2FPU_CODE(fsqrts)		\
-  N2FPU_CODE(fsins) N2FPU_CODE(fcoss) N2FPU_CODE(ftans) N2FPU_CODE(fatans) \
-  N2FPU_CODE(fexps) N2FPU_CODE(flogs)					\
-  N2FPU_CODE(fcmpeqs) N2FPU_CODE(fcmpnes)				\
-  N2FPU_CODE(fcmplts) N2FPU_CODE(fcmples)				\
-  N2FPU_CODE(fcmpgts) N2FPU_CODE(fcmpges)				\
-  									\
-  N2FPU_CODE(faddd) N2FPU_CODE(fsubd) N2FPU_CODE(fmuld) N2FPU_CODE(fdivd) \
-  N2FPU_CODE(fmind) N2FPU_CODE(fmaxd)					\
-  N2FPU_CODE(fnegd) N2FPU_CODE(fabsd) N2FPU_CODE(fsqrtd)		\
-  N2FPU_CODE(fsind) N2FPU_CODE(fcosd) N2FPU_CODE(ftand) N2FPU_CODE(fatand) \
-  N2FPU_CODE(fexpd) N2FPU_CODE(flogd)					\
-  N2FPU_CODE(fcmpeqd) N2FPU_CODE(fcmpned)				\
-  N2FPU_CODE(fcmpltd) N2FPU_CODE(fcmpled)				\
-  N2FPU_CODE(fcmpgtd) N2FPU_CODE(fcmpged)				\
-  									\
-  N2FPU_CODE(floatis) N2FPU_CODE(floatus)				\
-  N2FPU_CODE(floatid) N2FPU_CODE(floatud)				\
-  N2FPU_CODE(round) N2FPU_CODE(fixsi) N2FPU_CODE(fixsu)			\
-  N2FPU_CODE(fixdi) N2FPU_CODE(fixdu)					\
-  N2FPU_CODE(fextsd) N2FPU_CODE(ftruncds)				\
-									\
-  N2FPU_CODE(fwrx) N2FPU_CODE(fwry)					\
-  N2FPU_CODE(frdxlo) N2FPU_CODE(frdxhi) N2FPU_CODE(frdy)
-
-enum n2fpu_code {
-#define N2FPU_CODE(name) n2fpu_ ## name,
-  N2FPU_ALL_CODES
-#undef N2FPU_CODE
-  n2fpu_code_num
-};
-
-/* An enumeration to indicate the custom code status; if values within 0--255
-   are registered to an FPU insn, or custom insn.  */
-enum nios2_ccs_code
-{
-  CCS_UNUSED,
-  CCS_FPU,
-  CCS_BUILTIN_CALL
-};
-
-/* Supported Nios II Architectures.  */
-enum nios2_arch_type
-{
-  ARCH_R1=1,
-  ARCH_R2
-};
-
-#endif
-
diff --git a/gcc/config/nios2/nios2-protos.h b/gcc/config/nios2/nios2-protos.h
deleted file mode 100644
index 049480c..0000000
--- a/gcc/config/nios2/nios2-protos.h
+++ /dev/null
@@ -1,72 +0,0 @@
-/* Subroutine declarations for Altera Nios II target support.
-   Copyright (C) 2012-2024 Free Software Foundation, Inc.
-   Contributed by Jonah Graham (jgraham@altera.com).
-   Contributed by Mentor Graphics, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with GCC; see the file COPYING3.  If not see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef GCC_NIOS2_PROTOS_H
-#define GCC_NIOS2_PROTOS_H
-
-extern int nios2_initial_elimination_offset (int, int);
-extern int nios2_can_use_return_insn (void);
-extern void nios2_expand_prologue (void);
-extern void nios2_expand_epilogue (bool);
-extern bool nios2_expand_return (void);
-extern void nios2_function_profiler (FILE *, int);
-extern bool nios2_fpu_insn_enabled (enum n2fpu_code);
-
-#ifdef RTX_CODE
-extern bool nios2_large_constant_p (rtx);
-extern bool nios2_large_constant_memory_operand_p (rtx);
-
-extern rtx nios2_split_large_constant (rtx, rtx);
-extern rtx nios2_split_large_constant_memory_operand (rtx);
-extern bool nios2_emit_move_sequence (rtx *, machine_mode);
-extern void nios2_emit_expensive_div (rtx *, machine_mode);
-extern void nios2_adjust_call_address (rtx *, rtx);
-
-extern rtx nios2_get_return_address (int);
-extern void nios2_set_return_address (rtx, rtx);
-
-extern bool nios2_validate_compare (machine_mode, rtx *, rtx *, rtx *);
-extern bool nios2_validate_fpu_compare (machine_mode, rtx *, rtx *, rtx *,
-					bool);
-
-extern const char * nios2_fpu_insn_asm (enum n2fpu_code);
-extern const char * nios2_add_insn_asm (rtx_insn *, rtx *);
-
-extern bool nios2_legitimate_pic_operand_p (rtx);
-extern bool gprel_constant_p (rtx);
-extern bool r0rel_constant_p (rtx);
-extern bool nios2_regno_ok_for_base_p (int, bool);
-extern bool nios2_unspec_reloc_p (rtx);
-
-extern int nios2_label_align (rtx);
-extern bool nios2_cdx_narrow_form_p (rtx_insn *);
-
-extern bool pop_operation_p (rtx);
-extern bool ldstwm_operation_p (rtx, bool);
-extern bool gen_ldstwm_peep (bool, int, rtx, rtx *);
-
-extern void nios2_adjust_reg_alloc_order (void);
-
-extern pad_direction nios2_block_reg_padding (machine_mode, tree, int);
-
-#endif /* RTX_CODE */
-
-#endif /* GCC_NIOS2_PROTOS_H */
diff --git a/gcc/config/nios2/nios2.cc b/gcc/config/nios2/nios2.cc
deleted file mode 100644
index a981e50..0000000
--- a/gcc/config/nios2/nios2.cc
+++ /dev/null
@@ -1,5630 +0,0 @@
-/* Target machine subroutines for Altera Nios II.
-   Copyright (C) 2012-2024 Free Software Foundation, Inc.
-   Contributed by Jonah Graham (jgraham@altera.com), 
-   Will Reece (wreece@altera.com), and Jeff DaSilva (jdasilva@altera.com).
-   Contributed by Mentor Graphics, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with GCC; see the file COPYING3.  If not see
-   <http://www.gnu.org/licenses/>.  */
-
-#define IN_TARGET_CODE 1
-
-#include "config.h"
-#include "system.h"
-#include "coretypes.h"
-#include "backend.h"
-#include "target.h"
-#include "rtl.h"
-#include "tree.h"
-#include "stringpool.h"
-#include "attribs.h"
-#include "df.h"
-#include "memmodel.h"
-#include "tm_p.h"
-#include "optabs.h"
-#include "regs.h"
-#include "emit-rtl.h"
-#include "recog.h"
-#include "diagnostic-core.h"
-#include "output.h"
-#include "insn-attr.h"
-#include "flags.h"
-#include "explow.h"
-#include "calls.h"
-#include "varasm.h"
-#include "expr.h"
-#include "toplev.h"
-#include "langhooks.h"
-#include "stor-layout.h"
-#include "builtins.h"
-#include "tree-pass.h"
-#include "xregex.h"
-#include "opts.h"
-
-/* This file should be included last.  */
-#include "target-def.h"
-
-/* Forward function declarations.  */
-static bool nios2_symbolic_constant_p (rtx);
-static bool prologue_saved_reg_p (unsigned);
-static void nios2_load_pic_register (void);
-static void nios2_register_custom_code (unsigned int, enum nios2_ccs_code, int);
-static const char *nios2_unspec_reloc_name (int);
-static void nios2_register_builtin_fndecl (unsigned, tree);
-static rtx nios2_ldst_parallel (bool, bool, bool, rtx, int,
-				unsigned HOST_WIDE_INT, bool);
-static int nios2_address_cost (rtx, machine_mode, addr_space_t, bool);
-
-/* Threshold for data being put into the small data/bss area, instead
-   of the normal data area (references to the small data/bss area take
-   1 instruction, and use the global pointer, references to the normal
-   data area takes 2 instructions).  */
-unsigned HOST_WIDE_INT nios2_section_threshold = NIOS2_DEFAULT_GVALUE;
-
-struct GTY (()) machine_function
-{
-  /* Current frame information, to be filled in by nios2_compute_frame_layout
-     with register save masks, and offsets for the current function.  */
-
-  /* Mask of registers to save.  */
-  unsigned int save_mask;
-  /* Number of bytes that the entire frame takes up.  */
-  int total_size;
-  /* Number of bytes that variables take up.  */
-  int var_size;
-  /* Number of bytes that outgoing arguments take up.  */
-  int args_size;
-  /* Number of bytes needed to store registers in frame.  */
-  int save_reg_size;
-  /* Number of bytes used to store callee-saved registers.  */
-  int callee_save_reg_size;
-  /* Offset from new stack pointer to store registers.  */
-  int save_regs_offset;
-  /* Offset from save_regs_offset to store frame pointer register.  */
-  int fp_save_offset;
-  /* != 0 if function has a variable argument list.  */
-  int uses_anonymous_args;
-  /* != 0 if frame layout already calculated.  */
-  int initialized;
-};
-
-/* State to track the assignment of custom codes to FPU/custom builtins.  */
-static enum nios2_ccs_code custom_code_status[256];
-static int custom_code_index[256];
-/* Set to true if any conflicts (re-use of a code between 0-255) are found.  */
-static bool custom_code_conflict = false;
-
-/* State for command-line options.  */
-regex_t nios2_gprel_sec_regex;
-regex_t nios2_r0rel_sec_regex;
-
-
-/* Definition of builtin function types for nios2.  */
-
-#define N2_FTYPES				\
-  N2_FTYPE(1, (SF))				\
-  N2_FTYPE(1, (VOID))				\
-  N2_FTYPE(2, (DF, DF))				\
-  N2_FTYPE(3, (DF, DF, DF))			\
-  N2_FTYPE(2, (DF, SF))				\
-  N2_FTYPE(2, (DF, SI))				\
-  N2_FTYPE(2, (DF, UI))				\
-  N2_FTYPE(2, (SF, DF))				\
-  N2_FTYPE(2, (SF, SF))				\
-  N2_FTYPE(3, (SF, SF, SF))			\
-  N2_FTYPE(2, (SF, SI))				\
-  N2_FTYPE(2, (SF, UI))				\
-  N2_FTYPE(2, (SI, CVPTR))			\
-  N2_FTYPE(2, (SI, DF))				\
-  N2_FTYPE(3, (SI, DF, DF))			\
-  N2_FTYPE(2, (SI, SF))				\
-  N2_FTYPE(3, (SI, SF, SF))			\
-  N2_FTYPE(2, (SI, SI))				\
-  N2_FTYPE(3, (SI, SI, SI))			\
-  N2_FTYPE(3, (SI, VPTR, SI))			\
-  N2_FTYPE(2, (UI, CVPTR))			\
-  N2_FTYPE(2, (UI, DF))				\
-  N2_FTYPE(2, (UI, SF))				\
-  N2_FTYPE(2, (VOID, DF))			\
-  N2_FTYPE(2, (VOID, SF))			\
-  N2_FTYPE(2, (VOID, SI))			\
-  N2_FTYPE(3, (VOID, SI, SI))			\
-  N2_FTYPE(2, (VOID, VPTR))			\
-  N2_FTYPE(3, (VOID, VPTR, SI))
-
-#define N2_FTYPE_OP1(R)         N2_FTYPE_ ## R ## _VOID
-#define N2_FTYPE_OP2(R, A1)     N2_FTYPE_ ## R ## _ ## A1
-#define N2_FTYPE_OP3(R, A1, A2) N2_FTYPE_ ## R ## _ ## A1 ## _ ## A2
-
-/* Expand ftcode enumeration.  */
-enum nios2_ftcode {
-#define N2_FTYPE(N,ARGS) N2_FTYPE_OP ## N ARGS,
-N2_FTYPES
-#undef N2_FTYPE
-N2_FTYPE_MAX
-};
-
-/* Return the tree function type, based on the ftcode.  */
-static tree
-nios2_ftype (enum nios2_ftcode ftcode)
-{
-  static tree types[(int) N2_FTYPE_MAX];
-
-  tree N2_TYPE_SF = float_type_node;
-  tree N2_TYPE_DF = double_type_node;
-  tree N2_TYPE_SI = integer_type_node;
-  tree N2_TYPE_UI = unsigned_type_node;
-  tree N2_TYPE_VOID = void_type_node;
-
-  static const_tree N2_TYPE_CVPTR, N2_TYPE_VPTR;
-  if (!N2_TYPE_CVPTR)
-    {
-      /* const volatile void *.  */
-      N2_TYPE_CVPTR
-	= build_pointer_type (build_qualified_type (void_type_node,
-						    (TYPE_QUAL_CONST
-						     | TYPE_QUAL_VOLATILE)));
-      /* volatile void *.  */
-      N2_TYPE_VPTR
-	= build_pointer_type (build_qualified_type (void_type_node,
-						    TYPE_QUAL_VOLATILE));
-    }
-  if (types[(int) ftcode] == NULL_TREE)
-    switch (ftcode)
-      {
-#define N2_FTYPE_ARGS1(R) N2_TYPE_ ## R
-#define N2_FTYPE_ARGS2(R,A1) N2_TYPE_ ## R, N2_TYPE_ ## A1
-#define N2_FTYPE_ARGS3(R,A1,A2) N2_TYPE_ ## R, N2_TYPE_ ## A1, N2_TYPE_ ## A2
-#define N2_FTYPE(N,ARGS)						\
-  case N2_FTYPE_OP ## N ARGS:						\
-    types[(int) ftcode]							\
-      = build_function_type_list (N2_FTYPE_ARGS ## N ARGS, NULL_TREE); \
-    break;
-	N2_FTYPES
-#undef N2_FTYPE
-      default: gcc_unreachable ();
-      }
-  return types[(int) ftcode];
-}
-
-
-/* Definition of FPU instruction descriptions.  */
-
-struct nios2_fpu_insn_info
-{
-  const char *name;
-  int num_operands, *optvar;
-  int opt, no_opt;
-#define N2F_DF            0x1
-#define N2F_DFREQ         0x2
-#define N2F_UNSAFE        0x4
-#define N2F_FINITE        0x8
-#define N2F_NO_ERRNO      0x10
-  unsigned int flags;
-  enum insn_code icode;
-  enum nios2_ftcode ftcode;
-};
-
-/* Base macro for defining FPU instructions.  */
-#define N2FPU_INSN_DEF_BASE(insn, nop, flags, icode, args)	\
-  { #insn, nop, &nios2_custom_ ## insn, OPT_mcustom_##insn##_,	\
-    OPT_mno_custom_##insn, flags, CODE_FOR_ ## icode,		\
-    N2_FTYPE_OP ## nop args }
-
-/* Arithmetic and math functions; 2 or 3 operand FP operations.  */
-#define N2FPU_OP2(mode) (mode, mode)
-#define N2FPU_OP3(mode) (mode, mode, mode)
-#define N2FPU_INSN_DEF(code, icode, nop, flags, m, M)			\
-  N2FPU_INSN_DEF_BASE (f ## code ## m, nop, flags,			\
-		       icode ## m ## f ## nop, N2FPU_OP ## nop (M ## F))
-#define N2FPU_INSN_SF(code, nop, flags)		\
-  N2FPU_INSN_DEF (code, code, nop, flags, s, S)
-#define N2FPU_INSN_DF(code, nop, flags)		\
-  N2FPU_INSN_DEF (code, code, nop, flags | N2F_DF, d, D)
-
-/* Compare instructions, 3 operand FP operation with a SI result.  */
-#define N2FPU_CMP_DEF(code, flags, m, M)				\
-  N2FPU_INSN_DEF_BASE (fcmp ## code ## m, 3, flags,			\
-		       nios2_s ## code ## m ## f, (SI, M ## F, M ## F))
-#define N2FPU_CMP_SF(code) N2FPU_CMP_DEF (code, 0, s, S)
-#define N2FPU_CMP_DF(code) N2FPU_CMP_DEF (code, N2F_DF, d, D)
-
-/* The order of definition needs to be maintained consistent with
-   enum n2fpu_code in nios2-opts.h.  */
-struct nios2_fpu_insn_info nios2_fpu_insn[] =
-  {
-    /* Single precision instructions.  */
-    N2FPU_INSN_SF (add, 3, 0),
-    N2FPU_INSN_SF (sub, 3, 0),
-    N2FPU_INSN_SF (mul, 3, 0),
-    N2FPU_INSN_SF (div, 3, 0),
-    /* Due to textual difference between min/max and smin/smax.  */
-    N2FPU_INSN_DEF (min, smin, 3, N2F_FINITE, s, S),
-    N2FPU_INSN_DEF (max, smax, 3, N2F_FINITE, s, S),
-    N2FPU_INSN_SF (neg, 2, 0),
-    N2FPU_INSN_SF (abs, 2, 0),
-    N2FPU_INSN_SF (sqrt, 2, 0),
-    N2FPU_INSN_SF (sin, 2, N2F_UNSAFE),
-    N2FPU_INSN_SF (cos, 2, N2F_UNSAFE),
-    N2FPU_INSN_SF (tan, 2, N2F_UNSAFE),
-    N2FPU_INSN_SF (atan, 2, N2F_UNSAFE),
-    N2FPU_INSN_SF (exp, 2, N2F_UNSAFE),
-    N2FPU_INSN_SF (log, 2, N2F_UNSAFE),
-    /* Single precision compares.  */
-    N2FPU_CMP_SF (eq), N2FPU_CMP_SF (ne),
-    N2FPU_CMP_SF (lt), N2FPU_CMP_SF (le),
-    N2FPU_CMP_SF (gt), N2FPU_CMP_SF (ge),
-
-    /* Double precision instructions.  */
-    N2FPU_INSN_DF (add, 3, 0),
-    N2FPU_INSN_DF (sub, 3, 0),
-    N2FPU_INSN_DF (mul, 3, 0),
-    N2FPU_INSN_DF (div, 3, 0),
-    /* Due to textual difference between min/max and smin/smax.  */
-    N2FPU_INSN_DEF (min, smin, 3, N2F_FINITE, d, D),
-    N2FPU_INSN_DEF (max, smax, 3, N2F_FINITE, d, D),
-    N2FPU_INSN_DF (neg, 2, 0),
-    N2FPU_INSN_DF (abs, 2, 0),
-    N2FPU_INSN_DF (sqrt, 2, 0),
-    N2FPU_INSN_DF (sin, 2, N2F_UNSAFE),
-    N2FPU_INSN_DF (cos, 2, N2F_UNSAFE),
-    N2FPU_INSN_DF (tan, 2, N2F_UNSAFE),
-    N2FPU_INSN_DF (atan, 2, N2F_UNSAFE),
-    N2FPU_INSN_DF (exp, 2, N2F_UNSAFE),
-    N2FPU_INSN_DF (log, 2, N2F_UNSAFE),
-    /* Double precision compares.  */
-    N2FPU_CMP_DF (eq), N2FPU_CMP_DF (ne),
-    N2FPU_CMP_DF (lt), N2FPU_CMP_DF (le),
-    N2FPU_CMP_DF (gt), N2FPU_CMP_DF (ge),
-
-    /* Conversion instructions.  */
-    N2FPU_INSN_DEF_BASE (floatis,  2, 0, floatsisf2,    (SF, SI)),
-    N2FPU_INSN_DEF_BASE (floatus,  2, 0, floatunssisf2, (SF, UI)),
-    N2FPU_INSN_DEF_BASE (floatid,  2, 0, floatsidf2,    (DF, SI)),
-    N2FPU_INSN_DEF_BASE (floatud,  2, 0, floatunssidf2, (DF, UI)),
-    N2FPU_INSN_DEF_BASE (round,    2, N2F_NO_ERRNO, lroundsfsi2,   (SI, SF)),
-    N2FPU_INSN_DEF_BASE (fixsi,    2, 0, fix_truncsfsi2,      (SI, SF)),
-    N2FPU_INSN_DEF_BASE (fixsu,    2, 0, fixuns_truncsfsi2,   (UI, SF)),
-    N2FPU_INSN_DEF_BASE (fixdi,    2, 0, fix_truncdfsi2,      (SI, DF)),
-    N2FPU_INSN_DEF_BASE (fixdu,    2, 0, fixuns_truncdfsi2,   (UI, DF)),
-    N2FPU_INSN_DEF_BASE (fextsd,   2, 0, extendsfdf2,   (DF, SF)),
-    N2FPU_INSN_DEF_BASE (ftruncds, 2, 0, truncdfsf2,    (SF, DF)),
-
-    /* X, Y access instructions.  */
-    N2FPU_INSN_DEF_BASE (fwrx,     2, N2F_DFREQ, nios2_fwrx,   (VOID, DF)),
-    N2FPU_INSN_DEF_BASE (fwry,     2, N2F_DFREQ, nios2_fwry,   (VOID, SF)),
-    N2FPU_INSN_DEF_BASE (frdxlo,   1, N2F_DFREQ, nios2_frdxlo, (SF)),
-    N2FPU_INSN_DEF_BASE (frdxhi,   1, N2F_DFREQ, nios2_frdxhi, (SF)),
-    N2FPU_INSN_DEF_BASE (frdy,     1, N2F_DFREQ, nios2_frdy,   (SF))
-  };
-
-/* Some macros for ease of access.  */
-#define N2FPU(code) nios2_fpu_insn[(int) code]
-#define N2FPU_ENABLED_P(code) (N2FPU_N(code) >= 0)
-#define N2FPU_N(code) (*N2FPU(code).optvar)
-#define N2FPU_NAME(code) (N2FPU(code).name)
-#define N2FPU_ICODE(code) (N2FPU(code).icode)
-#define N2FPU_FTCODE(code) (N2FPU(code).ftcode)
-#define N2FPU_FINITE_P(code) (N2FPU(code).flags & N2F_FINITE)
-#define N2FPU_UNSAFE_P(code) (N2FPU(code).flags & N2F_UNSAFE)
-#define N2FPU_NO_ERRNO_P(code) (N2FPU(code).flags & N2F_NO_ERRNO)
-#define N2FPU_DOUBLE_P(code) (N2FPU(code).flags & N2F_DF)
-#define N2FPU_DOUBLE_REQUIRED_P(code) (N2FPU(code).flags & N2F_DFREQ)
-
-/* Same as above, but for cases where using only the op part is shorter.  */
-#define N2FPU_OP(op) N2FPU(n2fpu_ ## op)
-#define N2FPU_OP_NAME(op) N2FPU_NAME(n2fpu_ ## op)
-#define N2FPU_OP_ENABLED_P(op) N2FPU_ENABLED_P(n2fpu_ ## op)
-
-/* Export the FPU insn enabled predicate to nios2.md.  */
-bool
-nios2_fpu_insn_enabled (enum n2fpu_code code)
-{
-  return N2FPU_ENABLED_P (code);
-}
-
-/* Return true if COND comparison for mode MODE is enabled under current
-   settings.  */
-
-static bool
-nios2_fpu_compare_enabled (enum rtx_code cond, machine_mode mode)
-{
-  if (mode == SFmode)
-    switch (cond) 
-      {
-      case EQ: return N2FPU_OP_ENABLED_P (fcmpeqs);
-      case NE: return N2FPU_OP_ENABLED_P (fcmpnes);
-      case GT: return N2FPU_OP_ENABLED_P (fcmpgts);
-      case GE: return N2FPU_OP_ENABLED_P (fcmpges);
-      case LT: return N2FPU_OP_ENABLED_P (fcmplts);
-      case LE: return N2FPU_OP_ENABLED_P (fcmples);
-      default: break;
-      }
-  else if (mode == DFmode)
-    switch (cond) 
-      {
-      case EQ: return N2FPU_OP_ENABLED_P (fcmpeqd);
-      case NE: return N2FPU_OP_ENABLED_P (fcmpned);
-      case GT: return N2FPU_OP_ENABLED_P (fcmpgtd);
-      case GE: return N2FPU_OP_ENABLED_P (fcmpged);
-      case LT: return N2FPU_OP_ENABLED_P (fcmpltd);
-      case LE: return N2FPU_OP_ENABLED_P (fcmpled);
-      default: break;
-      }
-  return false;
-}
-
-/* Stack layout and calling conventions.  */
-
-#define NIOS2_STACK_ALIGN(LOC)						\
-  (((LOC) + ((PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT) - 1))		\
-   & ~((PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT) - 1))
-
-/* Return the bytes needed to compute the frame pointer from the current
-   stack pointer.  */
-static int
-nios2_compute_frame_layout (void)
-{
-  unsigned int regno;
-  unsigned int save_mask = 0;
-  int total_size;
-  int var_size;
-  int out_args_size;
-  int save_reg_size;
-  int callee_save_reg_size;
-
-  if (cfun->machine->initialized)
-    return cfun->machine->total_size;
-  
-  /* Calculate space needed for gp registers.  */
-  save_reg_size = 0;
-  for (regno = 0; regno <= LAST_GP_REG; regno++)
-    if (prologue_saved_reg_p (regno))
-      {
-	save_mask |= 1 << regno;
-	save_reg_size += 4;
-      }
-
-  /* If we are saving any callee-save register, then assume
-     push.n/pop.n should be used. Make sure RA is saved, and
-     contiguous registers starting from r16-- are all saved.  */
-  if (TARGET_HAS_CDX && save_reg_size != 0)
-    {
-      if ((save_mask & (1 << RA_REGNO)) == 0)
-	{
-	  save_mask |= 1 << RA_REGNO;
-	  save_reg_size += 4;
-	}
-
-      for (regno = 23; regno >= 16; regno--)
-	if ((save_mask & (1 << regno)) != 0)
-	  {
-	    /* Starting from highest numbered callee-saved
-	       register that is used, make sure all regs down
-	       to r16 is saved, to maintain contiguous range
-	       for push.n/pop.n.  */
-	    unsigned int i;
-	    for (i = regno - 1; i >= 16; i--)
-	      if ((save_mask & (1 << i)) == 0)
-		{
-		  save_mask |= 1 << i;
-		  save_reg_size += 4;
-		}
-	    break;
-	  }
-    }
-
-  callee_save_reg_size = save_reg_size;
-
-  /* If we call eh_return, we need to save the EH data registers.  */
-  if (crtl->calls_eh_return)
-    {
-      unsigned i;
-      unsigned r;
-      
-      for (i = 0; (r = EH_RETURN_DATA_REGNO (i)) != INVALID_REGNUM; i++)
-	if (!(save_mask & (1 << r)))
-	  {
-	    save_mask |= 1 << r;
-	    save_reg_size += 4;
-	  }
-    }
-
-  cfun->machine->fp_save_offset = 0;
-  if (save_mask & (1 << HARD_FRAME_POINTER_REGNUM))
-    {
-      int fp_save_offset = 0;
-      for (regno = 0; regno < HARD_FRAME_POINTER_REGNUM; regno++)
-	if (save_mask & (1 << regno))
-	  fp_save_offset += 4;
-
-      cfun->machine->fp_save_offset = fp_save_offset;
-    }
-
-  var_size = NIOS2_STACK_ALIGN (get_frame_size ());
-  out_args_size = NIOS2_STACK_ALIGN (crtl->outgoing_args_size);
-  total_size = var_size + out_args_size;
-
-  save_reg_size = NIOS2_STACK_ALIGN (save_reg_size);
-  total_size += save_reg_size;
-  total_size += NIOS2_STACK_ALIGN (crtl->args.pretend_args_size);
-
-  /* Save other computed information.  */
-  cfun->machine->save_mask = save_mask;
-  cfun->machine->total_size = total_size;
-  cfun->machine->var_size = var_size;
-  cfun->machine->args_size = out_args_size;
-  cfun->machine->save_reg_size = save_reg_size;
-  cfun->machine->callee_save_reg_size = callee_save_reg_size;
-  cfun->machine->initialized = reload_completed;
-  cfun->machine->save_regs_offset = out_args_size + var_size;
-
-  return total_size;
-}
-
-/* Generate save/restore of register REGNO at SP + OFFSET.  Used by the
-   prologue/epilogue expand routines.  */
-static void
-save_reg (int regno, unsigned offset)
-{
-  rtx reg = gen_rtx_REG (SImode, regno);
-  rtx addr = plus_constant (Pmode, stack_pointer_rtx, offset, false);
-  rtx_insn *insn = emit_move_insn (gen_frame_mem (Pmode, addr), reg);
-  RTX_FRAME_RELATED_P (insn) = 1;
-}
-
-static void
-restore_reg (int regno, unsigned offset)
-{
-  rtx reg = gen_rtx_REG (SImode, regno);
-  rtx addr = plus_constant (Pmode, stack_pointer_rtx, offset, false);
-  rtx_insn *insn = emit_move_insn (reg, gen_frame_mem (Pmode, addr));
-  /* Tag epilogue unwind note.  */
-  add_reg_note (insn, REG_CFA_RESTORE, reg);
-  RTX_FRAME_RELATED_P (insn) = 1;
-}
-
-/* This routine tests for the base register update SET in load/store
-   multiple RTL insns, used in pop_operation_p and ldstwm_operation_p.  */
-static bool
-base_reg_adjustment_p (rtx set, rtx *base_reg, rtx *offset)
-{
-  if (GET_CODE (set) == SET
-      && REG_P (SET_DEST (set))
-      && GET_CODE (SET_SRC (set)) == PLUS
-      && REG_P (XEXP (SET_SRC (set), 0))
-      && rtx_equal_p (SET_DEST (set), XEXP (SET_SRC (set), 0))
-      && CONST_INT_P (XEXP (SET_SRC (set), 1)))
-    {
-      *base_reg = XEXP (SET_SRC (set), 0);
-      *offset = XEXP (SET_SRC (set), 1);
-      return true;
-    }
-  return false;
-}
-
-/* Does the CFA note work for push/pop prologue/epilogue instructions.  */
-static void
-nios2_create_cfa_notes (rtx_insn *insn, bool epilogue_p)
-{
-  int i = 0;
-  rtx base_reg, offset, elt, pat = PATTERN (insn);
-  if (epilogue_p)
-    {
-      elt = XVECEXP (pat, 0, 0);
-      if (GET_CODE (elt) == RETURN)
-	i++;
-      elt = XVECEXP (pat, 0, i);
-      if (base_reg_adjustment_p (elt, &base_reg, &offset))
-	{
-	  add_reg_note (insn, REG_CFA_ADJUST_CFA, copy_rtx (elt));
-	  i++;
-	}
-      for (; i < XVECLEN (pat, 0); i++)
-	{
-	  elt = SET_DEST (XVECEXP (pat, 0, i));
-	  gcc_assert (REG_P (elt));
-	  add_reg_note (insn, REG_CFA_RESTORE, elt);
-	}
-    }
-  else
-    {
-      /* Tag each of the prologue sets.  */
-      for (i = 0; i < XVECLEN (pat, 0); i++)
-	RTX_FRAME_RELATED_P (XVECEXP (pat, 0, i)) = 1;
-    }
-}
-
-/* Temp regno used inside prologue/epilogue.  */
-#define TEMP_REG_NUM 8
-
-/* Emit conditional trap for checking stack limit.  SIZE is the number of
-   additional bytes required.  
-
-   GDB prologue analysis depends on this generating a direct comparison
-   to the SP register, so the adjustment to add SIZE needs to be done on
-   the other operand to the comparison.  Use TEMP_REG_NUM as a temporary,
-   if necessary.  */
-static void
-nios2_emit_stack_limit_check (int size)
-{
-  rtx sum = NULL_RTX;
-
-  if (GET_CODE (stack_limit_rtx) == SYMBOL_REF)
-    {
-      /* This generates a %hiadj/%lo pair with the constant size
-	 add handled by the relocations.  */
-      sum = gen_rtx_REG (Pmode, TEMP_REG_NUM);
-      emit_move_insn (sum, plus_constant (Pmode, stack_limit_rtx, size));
-    }
-  else if (!REG_P (stack_limit_rtx))
-    sorry ("Unknown form for stack limit expression");
-  else if (size == 0)
-    sum = stack_limit_rtx;
-  else if (SMALL_INT (size))
-    {
-      sum = gen_rtx_REG (Pmode, TEMP_REG_NUM);
-      emit_move_insn (sum, plus_constant (Pmode, stack_limit_rtx, size));
-    }
-  else
-    {
-      sum = gen_rtx_REG (Pmode, TEMP_REG_NUM);
-      emit_move_insn (sum, gen_int_mode (size, Pmode));
-      emit_insn (gen_add2_insn (sum, stack_limit_rtx));
-    }
-
-  emit_insn (gen_ctrapsi4 (gen_rtx_LTU (VOIDmode, stack_pointer_rtx, sum),
-			   stack_pointer_rtx, sum, GEN_INT (3)));
-}
-
-static rtx_insn *
-nios2_emit_add_constant (rtx reg, HOST_WIDE_INT immed)
-{
-  rtx_insn *insn;
-  if (SMALL_INT (immed))
-    insn = emit_insn (gen_add2_insn (reg, gen_int_mode (immed, Pmode)));
-  else
-    {
-      rtx tmp = gen_rtx_REG (Pmode, TEMP_REG_NUM);
-      emit_move_insn (tmp, gen_int_mode (immed, Pmode));
-      insn = emit_insn (gen_add2_insn (reg, tmp));
-    }
-  return insn;
-}
-
-static rtx_insn *
-nios2_adjust_stack (int sp_adjust, bool epilogue_p)
-{
-  enum reg_note note_kind = REG_NOTE_MAX;
-  rtx_insn *insn = NULL;
-  if (sp_adjust)
-    {
-      if (SMALL_INT (sp_adjust))
-	insn = emit_insn (gen_add2_insn (stack_pointer_rtx,
-					 gen_int_mode (sp_adjust, Pmode)));
-      else
-	{
-	  rtx tmp = gen_rtx_REG (Pmode, TEMP_REG_NUM);
-	  emit_move_insn (tmp, gen_int_mode (sp_adjust, Pmode));
-	  insn = emit_insn (gen_add2_insn (stack_pointer_rtx, tmp));
-	  /* Attach a note indicating what happened.  */
-	  if (!epilogue_p)
-	    note_kind = REG_FRAME_RELATED_EXPR;
-	}
-      if (epilogue_p)
-	note_kind = REG_CFA_ADJUST_CFA;
-      if (note_kind != REG_NOTE_MAX)
-	{
-	  rtx cfa_adj = gen_rtx_SET (stack_pointer_rtx,
-				     plus_constant (Pmode, stack_pointer_rtx,
-						    sp_adjust));
-	  add_reg_note (insn, note_kind, cfa_adj);
-	}
-      RTX_FRAME_RELATED_P (insn) = 1;
-    }
-  return insn;
-}
-
-void
-nios2_expand_prologue (void)
-{
-  unsigned int regno;
-  int total_frame_size, save_offset;
-  int sp_offset;      /* offset from base_reg to final stack value.  */
-  int save_regs_base; /* offset from base_reg to register save area.  */
-  rtx_insn *insn;
-
-  total_frame_size = nios2_compute_frame_layout ();
-
-  if (flag_stack_usage_info)
-    current_function_static_stack_size = total_frame_size;
-
-  /* When R2 CDX push.n/stwm is available, arrange for stack frame to be built
-     using them.  */
-  if (TARGET_HAS_CDX
-      && (cfun->machine->save_reg_size != 0
-	  || cfun->machine->uses_anonymous_args))
-    {
-      unsigned int regmask = cfun->machine->save_mask;
-      unsigned int callee_save_regs = regmask & 0xffff0000;
-      unsigned int caller_save_regs = regmask & 0x0000ffff;
-      int push_immed = 0;
-      int pretend_args_size = NIOS2_STACK_ALIGN (crtl->args.pretend_args_size);
-      rtx stack_mem =
-	gen_frame_mem (SImode, plus_constant (Pmode, stack_pointer_rtx, -4));
-
-      /* Check that there is room for the entire stack frame before doing
-	 any SP adjustments or pushes.  */
-      if (crtl->limit_stack)
-	nios2_emit_stack_limit_check (total_frame_size);
-
-      if (pretend_args_size)
-	{
-	  if (cfun->machine->uses_anonymous_args)
-	    {
-	      /* Emit a stwm to push copy of argument registers onto
-	         the stack for va_arg processing.  */
-	      unsigned int r, mask = 0, n = pretend_args_size / 4;
-	      for (r = LAST_ARG_REGNO - n + 1; r <= LAST_ARG_REGNO; r++)
-		mask |= (1 << r);
-	      insn = emit_insn (nios2_ldst_parallel
-				(false, false, false, stack_mem,
-				 -pretend_args_size, mask, false));
-	      /* Tag first SP adjustment as frame-related.  */
-	      RTX_FRAME_RELATED_P (XVECEXP (PATTERN (insn), 0, 0)) = 1;
-	      RTX_FRAME_RELATED_P (insn) = 1;
-	    }
-	  else
-	    nios2_adjust_stack (-pretend_args_size, false);
-	}
-      if (callee_save_regs)
-	{
-	  /* Emit a push.n to save registers and optionally allocate
-	     push_immed extra bytes on the stack.  */
-	  int sp_adjust;
-	  if (caller_save_regs)
-	    /* Can't allocate extra stack space yet.  */
-	    push_immed = 0;
-	  else if (cfun->machine->save_regs_offset <= 60)
-	    /* Stack adjustment fits entirely in the push.n.  */
-	    push_immed = cfun->machine->save_regs_offset;
-	  else if (frame_pointer_needed
-		   && cfun->machine->fp_save_offset == 0)
-	    /* Deferring the entire stack adjustment until later
-	       allows us to use a mov.n instead of a 32-bit addi
-	       instruction to set the frame pointer.  */
-	    push_immed = 0;
-	  else
-	    /* Splitting the stack adjustment between the push.n
-	       and an explicit adjustment makes it more likely that
-	       we can use spdeci.n for the explicit part.  */
-	    push_immed = 60;
-	  sp_adjust = -(cfun->machine->callee_save_reg_size + push_immed);
-	  insn = emit_insn (nios2_ldst_parallel (false, false, false,
-						 stack_mem, sp_adjust,
-						 callee_save_regs, false));
-	  nios2_create_cfa_notes (insn, false);
-	  RTX_FRAME_RELATED_P (insn) = 1;
-	}
-
-      if (caller_save_regs)
-	{
-	  /* Emit a stwm to save the EH data regs, r4-r7.  */
-	  int caller_save_size = (cfun->machine->save_reg_size
-				  - cfun->machine->callee_save_reg_size);
-	  gcc_assert ((caller_save_regs & ~0xf0) == 0);
-	  insn = emit_insn (nios2_ldst_parallel
-			    (false, false, false, stack_mem,
-			     -caller_save_size, caller_save_regs, false));
-	  nios2_create_cfa_notes (insn, false);
-	  RTX_FRAME_RELATED_P (insn) = 1;
-	}
-
-      save_regs_base = push_immed;
-      sp_offset = -(cfun->machine->save_regs_offset - push_immed);
-    }
-  /* The non-CDX cases decrement the stack pointer, to prepare for individual
-     register saves to the stack.  */
-  else if (!SMALL_INT (total_frame_size))
-    {
-      /* We need an intermediary point, this will point at the spill block.  */
-      nios2_adjust_stack (cfun->machine->save_regs_offset - total_frame_size,
-			  false);
-      save_regs_base = 0;
-      sp_offset = -cfun->machine->save_regs_offset;
-      if (crtl->limit_stack)
-	nios2_emit_stack_limit_check (cfun->machine->save_regs_offset);
-    }
-  else if (total_frame_size)
-    {
-      nios2_adjust_stack (-total_frame_size, false);
-      save_regs_base = cfun->machine->save_regs_offset;
-      sp_offset = 0;
-      if (crtl->limit_stack)
-	nios2_emit_stack_limit_check (0);
-    }
-  else
-    save_regs_base = sp_offset = 0;
-
-  /* Save the registers individually in the non-CDX case.  */
-  if (!TARGET_HAS_CDX)
-    {
-      save_offset = save_regs_base + cfun->machine->save_reg_size;
-
-      for (regno = LAST_GP_REG; regno > 0; regno--)
-	if (cfun->machine->save_mask & (1 << regno))
-	  {
-	    save_offset -= 4;
-	    save_reg (regno, save_offset);
-	  }
-    }
-
-  /* Set the hard frame pointer.  */
-  if (frame_pointer_needed)
-    {
-      int fp_save_offset = save_regs_base + cfun->machine->fp_save_offset;
-      insn =
-	(fp_save_offset == 0
-	 ? emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx)
-	 : emit_insn (gen_add3_insn (hard_frame_pointer_rtx,
-				     stack_pointer_rtx,
-				     gen_int_mode (fp_save_offset, Pmode))));
-      RTX_FRAME_RELATED_P (insn) = 1;
-    }
-
-  /* Allocate sp_offset more bytes in the stack frame.  */
-  nios2_adjust_stack (sp_offset, false);
-
-  /* Load the PIC register if needed.  */
-  if (crtl->uses_pic_offset_table)
-    nios2_load_pic_register ();
-
-  /* If we are profiling, make sure no instructions are scheduled before
-     the call to mcount.  */
-  if (crtl->profile)
-    emit_insn (gen_blockage ());
-}
-
-void
-nios2_expand_epilogue (bool sibcall_p)
-{
-  rtx_insn *insn;
-  rtx cfa_adj;
-  int total_frame_size;
-  int sp_adjust, save_offset;
-  unsigned int regno;
-
-  if (!sibcall_p && nios2_can_use_return_insn ())
-    {
-      emit_jump_insn (gen_return ());
-      return;
-    }
-
-  emit_insn (gen_blockage ());
-
-  total_frame_size = nios2_compute_frame_layout ();
-  if (frame_pointer_needed)
-    {
-      /* Recover the stack pointer.  */
-      insn =
-	(cfun->machine->fp_save_offset == 0
-	 ? emit_move_insn (stack_pointer_rtx, hard_frame_pointer_rtx)
-	 : emit_insn (gen_add3_insn
-		      (stack_pointer_rtx, hard_frame_pointer_rtx,
-		       gen_int_mode (-cfun->machine->fp_save_offset, Pmode))));
-      cfa_adj = plus_constant (Pmode, stack_pointer_rtx,
-			       (total_frame_size
-				- cfun->machine->save_regs_offset));
-      add_reg_note (insn, REG_CFA_DEF_CFA, cfa_adj);
-      RTX_FRAME_RELATED_P (insn) = 1;
-
-      save_offset = 0;
-      sp_adjust = total_frame_size - cfun->machine->save_regs_offset;
-    }
-  else if (!SMALL_INT (total_frame_size))
-    {
-      nios2_adjust_stack (cfun->machine->save_regs_offset, true);
-      save_offset = 0;
-      sp_adjust = total_frame_size - cfun->machine->save_regs_offset;
-    }
-  else
-    {
-      save_offset = cfun->machine->save_regs_offset;
-      sp_adjust = total_frame_size;
-    }
-
-  if (!TARGET_HAS_CDX)
-    {
-      /* Generate individual register restores.  */
-      save_offset += cfun->machine->save_reg_size;
-
-      for (regno = LAST_GP_REG; regno > 0; regno--)
-	if (cfun->machine->save_mask & (1 << regno))
-	  {
-	    save_offset -= 4;
-	    restore_reg (regno, save_offset);
-	  }
-      nios2_adjust_stack (sp_adjust, true);
-    }
-  else if (cfun->machine->save_reg_size == 0)
-    {
-      /* Nothing to restore, just recover the stack position.  */
-      nios2_adjust_stack (sp_adjust, true);
-    }
-  else
-    {
-      /* Emit CDX pop.n/ldwm to restore registers and optionally return.  */
-      unsigned int regmask = cfun->machine->save_mask;
-      unsigned int callee_save_regs = regmask & 0xffff0000;
-      unsigned int caller_save_regs = regmask & 0x0000ffff;
-      int callee_save_size = cfun->machine->callee_save_reg_size;
-      int caller_save_size = cfun->machine->save_reg_size - callee_save_size;
-      int pretend_args_size = NIOS2_STACK_ALIGN (crtl->args.pretend_args_size);
-      bool ret_p = (!pretend_args_size && !crtl->calls_eh_return
-		    && !sibcall_p);
-
-      if (!ret_p || caller_save_size > 0)
-	sp_adjust = save_offset;
-      else
-	sp_adjust = (save_offset > 60 ? save_offset - 60 : 0);
-
-      save_offset -= sp_adjust;
-
-      nios2_adjust_stack (sp_adjust, true);
-
-      if (caller_save_regs)
-	{
-	  /* Emit a ldwm to restore EH data regs.  */
-	  rtx stack_mem = gen_frame_mem (SImode, stack_pointer_rtx);
-	  insn = emit_insn (nios2_ldst_parallel
-			    (true, true, true, stack_mem,
-			     caller_save_size, caller_save_regs, false));
-	  RTX_FRAME_RELATED_P (insn) = 1;
-	  nios2_create_cfa_notes (insn, true);
-	}
-
-      if (callee_save_regs)
-	{
-	  int sp_adjust = save_offset + callee_save_size;
-	  rtx stack_mem;
-	  if (ret_p)
-	    {
-	      /* Emit a pop.n to restore regs and return.  */
-	      stack_mem =
-		gen_frame_mem (SImode,
-			       gen_rtx_PLUS (Pmode, stack_pointer_rtx,
-					     gen_int_mode (sp_adjust - 4,
-							   Pmode)));
-	      insn =
-		emit_jump_insn (nios2_ldst_parallel (true, false, false,
-						     stack_mem, sp_adjust,
-						     callee_save_regs, ret_p));
-	      RTX_FRAME_RELATED_P (insn) = 1;
-	      /* No need to attach CFA notes since we cannot step over
-		 a return.  */
-	      return;
-	    }
-	  else
-	    {
-	      /* If no return, we have to use the ldwm form.  */
-	      stack_mem = gen_frame_mem (SImode, stack_pointer_rtx);
-	      insn =
-		emit_insn (nios2_ldst_parallel (true, true, true,
-						stack_mem, sp_adjust,
-						callee_save_regs, ret_p));
-	      RTX_FRAME_RELATED_P (insn) = 1;
-	      nios2_create_cfa_notes (insn, true);
-	    }
-	}
-
-      if (pretend_args_size)
-	nios2_adjust_stack (pretend_args_size, true);
-    }
-
-  /* Add in the __builtin_eh_return stack adjustment.  */
-  if (crtl->calls_eh_return)
-    emit_insn (gen_add2_insn (stack_pointer_rtx, EH_RETURN_STACKADJ_RTX));
-
-  if (!sibcall_p)
-    emit_jump_insn (gen_simple_return ());
-}
-
-bool
-nios2_expand_return (void)
-{
-  /* If CDX is available, generate a pop.n instruction to do both
-     the stack pop and return.  */
-  if (TARGET_HAS_CDX)
-    {
-      int total_frame_size = nios2_compute_frame_layout ();
-      int sp_adjust = (cfun->machine->save_regs_offset
-		       + cfun->machine->callee_save_reg_size);
-      gcc_assert (sp_adjust == total_frame_size);
-      if (sp_adjust != 0)
-	{
-	  rtx mem =
-	    gen_frame_mem (SImode,
-			   plus_constant (Pmode, stack_pointer_rtx,
-					  sp_adjust - 4, false));
-	  rtx_insn *insn =
-	    emit_jump_insn (nios2_ldst_parallel (true, false, false,
-						 mem, sp_adjust,
-						 cfun->machine->save_mask,
-						 true));
-	  RTX_FRAME_RELATED_P (insn) = 1;
-	  /* No need to create CFA notes since we can't step over
-	     a return.  */
-	  return true;
-	}
-    }
-  return false;
-}
-
-/* Implement RETURN_ADDR_RTX.  Note, we do not support moving
-   back to a previous frame.  */
-rtx
-nios2_get_return_address (int count)
-{
-  if (count != 0)
-    return const0_rtx;
-
-  return get_hard_reg_initial_val (Pmode, RA_REGNO);
-}
-
-/* Emit code to change the current function's return address to
-   ADDRESS.  SCRATCH is available as a scratch register, if needed.
-   ADDRESS and SCRATCH are both word-mode GPRs.  */
-void
-nios2_set_return_address (rtx address, rtx scratch)
-{
-  nios2_compute_frame_layout ();
-  if (cfun->machine->save_mask & (1 << RA_REGNO))
-    {
-      unsigned offset = cfun->machine->save_reg_size - 4;
-      rtx base;
-      
-      if (frame_pointer_needed)
-	base = hard_frame_pointer_rtx;
-      else
-	{
-	  base = stack_pointer_rtx;
-	  offset += cfun->machine->save_regs_offset;
-
-	  if (!SMALL_INT (offset))
-	    {
-	      emit_move_insn (scratch, gen_int_mode (offset, Pmode));
-	      emit_insn (gen_add2_insn (scratch, base));
-	      base = scratch;
-	      offset = 0;
-	    }
-	}
-      if (offset)
-	base = plus_constant (Pmode, base, offset);
-      emit_move_insn (gen_rtx_MEM (Pmode, base), address);
-    }
-  else
-    emit_move_insn (gen_rtx_REG (Pmode, RA_REGNO), address);
-}
-
-/* Implement FUNCTION_PROFILER macro.  */
-void
-nios2_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
-{
-  fprintf (file, "\tmov\tr8, ra\n");
-  if (flag_pic == 1)
-    {
-      fprintf (file, "\tnextpc\tr2\n");
-      fprintf (file, "\t1: movhi\tr3, %%hiadj(_gp_got - 1b)\n");
-      fprintf (file, "\taddi\tr3, r3, %%lo(_gp_got - 1b)\n");
-      fprintf (file, "\tadd\tr2, r2, r3\n");
-      fprintf (file, "\tldw\tr2, %%call(_mcount)(r2)\n");
-      fprintf (file, "\tcallr\tr2\n");
-    }
-  else if (flag_pic == 2)
-    {
-      fprintf (file, "\tnextpc\tr2\n");
-      fprintf (file, "\t1: movhi\tr3, %%hiadj(_gp_got - 1b)\n");
-      fprintf (file, "\taddi\tr3, r3, %%lo(_gp_got - 1b)\n");
-      fprintf (file, "\tadd\tr2, r2, r3\n");
-      fprintf (file, "\tmovhi\tr3, %%call_hiadj(_mcount)\n");
-      fprintf (file, "\taddi\tr3, r3, %%call_lo(_mcount)\n");
-      fprintf (file, "\tadd\tr3, r2, r3\n");
-      fprintf (file, "\tldw\tr2, 0(r3)\n");
-      fprintf (file, "\tcallr\tr2\n");
-    }
-  else
-    fprintf (file, "\tcall\t_mcount\n");
-  fprintf (file, "\tmov\tra, r8\n");
-}
-
-/* Dump stack layout.  */
-static void
-nios2_dump_frame_layout (FILE *file)
-{
-  fprintf (file, "\t%s Current Frame Info\n", ASM_COMMENT_START);
-  fprintf (file, "\t%s total_size = %d\n", ASM_COMMENT_START,
-           cfun->machine->total_size);
-  fprintf (file, "\t%s var_size = %d\n", ASM_COMMENT_START,
-           cfun->machine->var_size);
-  fprintf (file, "\t%s args_size = %d\n", ASM_COMMENT_START,
-           cfun->machine->args_size);
-  fprintf (file, "\t%s save_reg_size = %d\n", ASM_COMMENT_START,
-           cfun->machine->save_reg_size);
-  fprintf (file, "\t%s initialized = %d\n", ASM_COMMENT_START,
-           cfun->machine->initialized);
-  fprintf (file, "\t%s save_regs_offset = %d\n", ASM_COMMENT_START,
-           cfun->machine->save_regs_offset);
-  fprintf (file, "\t%s is_leaf = %d\n", ASM_COMMENT_START,
-           crtl->is_leaf);
-  fprintf (file, "\t%s frame_pointer_needed = %d\n", ASM_COMMENT_START,
-           frame_pointer_needed);
-  fprintf (file, "\t%s pretend_args_size = %d\n", ASM_COMMENT_START,
-           crtl->args.pretend_args_size);
-}
-
-/* Return true if REGNO should be saved in the prologue.  */
-static bool
-prologue_saved_reg_p (unsigned regno)
-{
-  gcc_assert (GP_REG_P (regno));
-  
-  if (df_regs_ever_live_p (regno) && !call_used_or_fixed_reg_p (regno))
-    return true;
-
-  if (regno == HARD_FRAME_POINTER_REGNUM && frame_pointer_needed)
-    return true;
-
-  if (regno == PIC_OFFSET_TABLE_REGNUM && crtl->uses_pic_offset_table)
-    return true;
-
-  if (regno == RA_REGNO && df_regs_ever_live_p (RA_REGNO))
-    return true;
-
-  return false;
-}
-
-/* Implement TARGET_CAN_ELIMINATE.  */
-static bool
-nios2_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
-{
-  if (to == STACK_POINTER_REGNUM)
-    return !frame_pointer_needed;
-  return true;
-}
-
-/* Implement INITIAL_ELIMINATION_OFFSET macro.  */
-int
-nios2_initial_elimination_offset (int from, int to)
-{
-  int offset;
-
-  nios2_compute_frame_layout ();
-
-  /* Set OFFSET to the offset from the stack pointer.  */
-  switch (from)
-    {
-    case FRAME_POINTER_REGNUM:
-      /* This is the high end of the local variable storage, not the
-	 hard frame pointer.  */
-      offset = cfun->machine->args_size + cfun->machine->var_size;
-      break;
-
-    case ARG_POINTER_REGNUM:
-      offset = cfun->machine->total_size;
-      offset -= crtl->args.pretend_args_size;
-      break;
-
-    default:
-      gcc_unreachable ();
-    }
-
-    /* If we are asked for the frame pointer offset, then adjust OFFSET
-       by the offset from the frame pointer to the stack pointer.  */
-  if (to == HARD_FRAME_POINTER_REGNUM)
-    offset -= (cfun->machine->save_regs_offset
-	       + cfun->machine->fp_save_offset); 
-
-  return offset;
-}
-
-/* Return nonzero if this function is known to have a null epilogue.
-   This allows the optimizer to omit jumps to jumps if no stack
-   was created.  */
-int
-nios2_can_use_return_insn (void)
-{
-  int total_frame_size;
-
-  if (!reload_completed || crtl->profile)
-    return 0;
-
-  total_frame_size = nios2_compute_frame_layout ();
-
-  /* If CDX is available, check if we can return using a
-     single pop.n instruction.  */
-  if (TARGET_HAS_CDX
-      && !frame_pointer_needed
-      && cfun->machine->save_regs_offset <= 60
-      && (cfun->machine->save_mask & 0x80000000) != 0
-      && (cfun->machine->save_mask & 0xffff) == 0
-      && crtl->args.pretend_args_size == 0)
-    return true;
-
-  return total_frame_size == 0;
-}
-
-
-/* Check and signal some warnings/errors on FPU insn options.  */
-static void
-nios2_custom_check_insns (void)
-{
-  unsigned int i, j;
-  bool errors = false;
-
-  for (i = 0; i < ARRAY_SIZE (nios2_fpu_insn); i++)
-    if (N2FPU_ENABLED_P (i) && N2FPU_DOUBLE_P (i))
-      {
-	for (j = 0; j < ARRAY_SIZE (nios2_fpu_insn); j++)
-	  if (N2FPU_DOUBLE_REQUIRED_P (j) && ! N2FPU_ENABLED_P (j))
-	    {
-	      error ("switch %<-mcustom-%s%> is required for "
-		     "double-precision floating-point", N2FPU_NAME (j));
-	      errors = true;
-	    }
-	break;
-      }
-
-  if (errors || custom_code_conflict)
-    fatal_error (input_location,
-		 "conflicting use of %<-mcustom%> switches, "
-		 "target attributes, "
-		 "and/or %<__builtin_custom_%> functions");
-}
-
-static void
-nios2_set_fpu_custom_code (enum n2fpu_code code, int n, bool override_p)
-{
-  if (override_p || N2FPU_N (code) == -1)
-    N2FPU_N (code) = n;
-  nios2_register_custom_code (n, CCS_FPU, (int) code);
-}
-
-/* Type to represent a standard FPU config.  */
-struct nios2_fpu_config
-{
-  const char *name;
-  bool set_sp_constants;
-  int code[n2fpu_code_num];
-};
-
-#define NIOS2_FPU_CONFIG_NUM 4
-static struct nios2_fpu_config custom_fpu_config[NIOS2_FPU_CONFIG_NUM];
-
-static void
-nios2_init_fpu_configs (void)
-{
-  struct nios2_fpu_config* cfg;
-  int i = 0;
-#define NEXT_FPU_CONFIG				\
-  do {						\
-    cfg = &custom_fpu_config[i++];			\
-    memset (cfg, -1, sizeof (struct nios2_fpu_config));\
-  } while (0)
-
-  NEXT_FPU_CONFIG;
-  cfg->name = "60-1";
-  cfg->set_sp_constants  = true;
-  cfg->code[n2fpu_fmuls] = 252;
-  cfg->code[n2fpu_fadds] = 253;
-  cfg->code[n2fpu_fsubs] = 254;
-
-  NEXT_FPU_CONFIG;
-  cfg->name = "60-2";
-  cfg->set_sp_constants  = true;
-  cfg->code[n2fpu_fmuls] = 252;
-  cfg->code[n2fpu_fadds] = 253;
-  cfg->code[n2fpu_fsubs] = 254;
-  cfg->code[n2fpu_fdivs] = 255;
-
-  NEXT_FPU_CONFIG;
-  cfg->name = "72-3";
-  cfg->set_sp_constants    = true;
-  cfg->code[n2fpu_floatus] = 243;
-  cfg->code[n2fpu_fixsi]   = 244;
-  cfg->code[n2fpu_floatis] = 245;
-  cfg->code[n2fpu_fcmpgts] = 246;
-  cfg->code[n2fpu_fcmples] = 249;
-  cfg->code[n2fpu_fcmpeqs] = 250;
-  cfg->code[n2fpu_fcmpnes] = 251;
-  cfg->code[n2fpu_fmuls]   = 252;
-  cfg->code[n2fpu_fadds]   = 253;
-  cfg->code[n2fpu_fsubs]   = 254;
-  cfg->code[n2fpu_fdivs]   = 255;
-
-  NEXT_FPU_CONFIG;
-  cfg->name = "fph2";
-  cfg->code[n2fpu_fabss]   = 224;
-  cfg->code[n2fpu_fnegs]   = 225;
-  cfg->code[n2fpu_fcmpnes] = 226;
-  cfg->code[n2fpu_fcmpeqs] = 227;
-  cfg->code[n2fpu_fcmpges] = 228;
-  cfg->code[n2fpu_fcmpgts] = 229;
-  cfg->code[n2fpu_fcmples] = 230;
-  cfg->code[n2fpu_fcmplts] = 231;
-  cfg->code[n2fpu_fmaxs]   = 232;
-  cfg->code[n2fpu_fmins]   = 233;
-  cfg->code[n2fpu_round]   = 248;
-  cfg->code[n2fpu_fixsi]   = 249;
-  cfg->code[n2fpu_floatis] = 250;
-  cfg->code[n2fpu_fsqrts]  = 251;
-  cfg->code[n2fpu_fmuls]   = 252;
-  cfg->code[n2fpu_fadds]   = 253;
-  cfg->code[n2fpu_fsubs]   = 254;
-  cfg->code[n2fpu_fdivs]   = 255;
-
-#undef NEXT_FPU_CONFIG
-  gcc_assert (i == NIOS2_FPU_CONFIG_NUM);
-}
-
-static struct nios2_fpu_config *
-nios2_match_custom_fpu_cfg (const char *cfgname, const char *endp)
-{
-  int i;
-  for (i = 0; i < NIOS2_FPU_CONFIG_NUM; i++)
-    {
-      bool match = !(endp != NULL
-		     ? strncmp (custom_fpu_config[i].name, cfgname,
-				endp - cfgname)
-		     : strcmp (custom_fpu_config[i].name, cfgname));
-      if (match)
-	return &custom_fpu_config[i];
-    }
-  return NULL;
-}
-
-/* Use CFGNAME to lookup FPU config, ENDP if not NULL marks end of string.
-   OVERRIDE is true if loaded config codes should overwrite current state.  */
-static void
-nios2_handle_custom_fpu_cfg (const char *cfgname, const char *endp,
-			     bool override)
-{
-  struct nios2_fpu_config *cfg = nios2_match_custom_fpu_cfg (cfgname, endp);
-  if (cfg)
-    {
-      unsigned int i;
-      for (i = 0; i < ARRAY_SIZE (nios2_fpu_insn); i++)
-	if (cfg->code[i] >= 0)
-	  nios2_set_fpu_custom_code ((enum n2fpu_code) i, cfg->code[i],
-				     override);
-      if (cfg->set_sp_constants)
-	flag_single_precision_constant = 1;
-    }
-  else
-    warning (0, "ignoring unrecognized switch %<-mcustom-fpu-cfg%> "
-	     "value %<%s%>", cfgname);
-
-  /* Guard against errors in the standard configurations.  */
-  nios2_custom_check_insns ();
-}
-
-/* Check individual FPU insn options, and register custom code.  */
-static void
-nios2_handle_custom_fpu_insn_option (int fpu_insn_index)
-{
-  int param = N2FPU_N (fpu_insn_index);
-
-  if (param >= 0 && param <= 255)
-    nios2_register_custom_code (param, CCS_FPU, fpu_insn_index);
-
-  /* Valid values are 0-255, but also allow -1 so that the
-     -mno-custom-<opt> switches work.  */
-  else if (param != -1)
-    error ("switch %<-mcustom-%s%> value %d must be between 0 and 255",
-	   N2FPU_NAME (fpu_insn_index), param);
-}
-
-/* Allocate a chunk of memory for per-function machine-dependent data.  */
-static struct machine_function *
-nios2_init_machine_status (void)
-{
-  return ggc_cleared_alloc<machine_function> ();
-}
-
-/* Implement TARGET_OPTION_OVERRIDE.  */
-static void
-nios2_option_override (void)
-{
-  unsigned int i;
-
-#ifdef SUBTARGET_OVERRIDE_OPTIONS
-  SUBTARGET_OVERRIDE_OPTIONS;
-#endif
-
-  /* Check for unsupported options.  */
-  if (flag_pic && !TARGET_LINUX_ABI)
-    sorry ("position-independent code requires the Linux ABI");
-  if (flag_pic && stack_limit_rtx
-      && GET_CODE (stack_limit_rtx) == SYMBOL_REF)
-    sorry ("PIC support for %<-fstack-limit-symbol%>");
-
-  /* Function to allocate machine-dependent function status.  */
-  init_machine_status = &nios2_init_machine_status;
-
-  nios2_section_threshold
-    = (OPTION_SET_P (g_switch_value)
-       ? g_switch_value : NIOS2_DEFAULT_GVALUE);
-
-  if (nios2_gpopt_option == gpopt_unspecified)
-    {
-      /* Default to -mgpopt unless -fpic or -fPIC.  */
-      if (flag_pic)
-	nios2_gpopt_option = gpopt_none;
-      else
-	nios2_gpopt_option = gpopt_local;
-    }
-
-  /* GP-relative and r0-relative addressing don't make sense for PIC.  */
-  if (flag_pic)
-    {
-      if (nios2_gpopt_option != gpopt_none)
-	error ("%<-mgpopt%> not supported with PIC");
-      if (nios2_gprel_sec)
-	error ("%<-mgprel-sec=%> not supported with PIC");
-      if (nios2_r0rel_sec)
-	error ("%<-mr0rel-sec=%> not supported with PIC");
-    }
-
-  /* Process -mgprel-sec= and -m0rel-sec=.  */
-  if (nios2_gprel_sec)
-    {
-      if (regcomp (&nios2_gprel_sec_regex, nios2_gprel_sec, 
-		   REG_EXTENDED | REG_NOSUB))
-	error ("%<-mgprel-sec=%> argument is not a valid regular expression");
-    }
-  if (nios2_r0rel_sec)
-    {
-      if (regcomp (&nios2_r0rel_sec_regex, nios2_r0rel_sec, 
-		   REG_EXTENDED | REG_NOSUB))
-	error ("%<-mr0rel-sec=%> argument is not a valid regular expression");
-    }
-
-  /* If we don't have mul, we don't have mulx either!  */
-  if (!TARGET_HAS_MUL && TARGET_HAS_MULX)
-    target_flags &= ~MASK_HAS_MULX;
-
-  /* Optional BMX and CDX instructions only make sense for R2.  */
-  if (!TARGET_ARCH_R2)
-    {
-      if (TARGET_HAS_BMX)
-	error ("BMX instructions are only supported with R2 architecture");
-      if (TARGET_HAS_CDX)
-	error ("CDX instructions are only supported with R2 architecture");
-    }
-
-  /* R2 is little-endian only.  */
-  if (TARGET_ARCH_R2 && TARGET_BIG_ENDIAN)
-    error ("R2 architecture is little-endian only");
-
-  /* Initialize default FPU configurations.  */
-  nios2_init_fpu_configs ();
-
-  /* Set up default handling for floating point custom instructions.
-
-     Putting things in this order means that the -mcustom-fpu-cfg=
-     switch will always be overridden by individual -mcustom-fadds=
-     switches, regardless of the order in which they were specified
-     on the command line.
-
-     This behavior of prioritization of individual -mcustom-<insn>=
-     options before the -mcustom-fpu-cfg= switch is maintained for
-     compatibility.  */
-  if (nios2_custom_fpu_cfg_string && *nios2_custom_fpu_cfg_string)
-    nios2_handle_custom_fpu_cfg (nios2_custom_fpu_cfg_string, NULL, false);
-
-  /* Handle options for individual FPU insns.  */
-  for (i = 0; i < ARRAY_SIZE (nios2_fpu_insn); i++)
-    nios2_handle_custom_fpu_insn_option (i);
-
-  nios2_custom_check_insns ();
-
-  /* Save the initial options in case the user does function specific
-     options.  */
-  target_option_default_node = target_option_current_node
-    = build_target_option_node (&global_options, &global_options_set);
-}
-
-
-/* Return true if CST is a constant within range of movi/movui/movhi.  */
-static bool
-nios2_simple_const_p (const_rtx cst)
-{
-  if (!CONST_INT_P (cst))
-    return false;
-  HOST_WIDE_INT val = INTVAL (cst);
-  return SMALL_INT (val) || SMALL_INT_UNSIGNED (val) || UPPER16_INT (val);
-}
-
-/* Compute a (partial) cost for rtx X.  Return true if the complete
-   cost has been computed, and false if subexpressions should be
-   scanned.  In either case, *TOTAL contains the cost result.  */
-static bool
-nios2_rtx_costs (rtx x, machine_mode mode,
-		 int outer_code,
-		 int opno,
-		 int *total, bool speed)
-{
-  int code = GET_CODE (x);
-
-  switch (code)
-    {
-      case CONST_INT:
-        if (INTVAL (x) == 0 || nios2_simple_const_p (x))
-          {
-            *total = COSTS_N_INSNS (0);
-            return true;
-          }
-        else
-          {
-	    /* High + lo_sum.  */
-            *total = COSTS_N_INSNS (1);
-            return true;
-          }
-
-      case LABEL_REF:
-      case SYMBOL_REF:
-      case CONST:
-      case CONST_DOUBLE:
-	if (gprel_constant_p (x) || r0rel_constant_p (x))
-          {
-            *total = COSTS_N_INSNS (1);
-            return true;
-          }
-	else
-	  {
-	    /* High + lo_sum.  */
-	    *total = COSTS_N_INSNS (1);
-	    return true;
-	  }
-
-      case HIGH:
-	{
-	  /* This is essentially a constant.  */
-	  *total = COSTS_N_INSNS (0);
-	  return true;
-	}
-
-      case LO_SUM:
-	{
-	  *total = COSTS_N_INSNS (0);
-	  return true;
-	}
-
-      case AND:
-	{
-	  /* Recognize 'nor' insn pattern.  */
-	  if (GET_CODE (XEXP (x, 0)) == NOT
-	      && GET_CODE (XEXP (x, 1)) == NOT)
-	    {
-	      *total = COSTS_N_INSNS (1);
-	      return true;
-	    }
-	  return false;
-	}
-
-      /* For insns that have an execution latency (3 cycles), don't
-	 penalize by the full amount since we can often schedule
-	 to avoid it.  */
-      case MULT:
-        {
-	  if (!TARGET_HAS_MUL)
-	    *total = COSTS_N_INSNS (5);  /* Guess?  */
-	  else if (speed)
-	    *total = COSTS_N_INSNS (2);  /* Latency adjustment.  */
-	  else 
-	    *total = COSTS_N_INSNS (1);
-	  if (TARGET_HAS_MULX && GET_MODE (x) == DImode)
-	    {
-	      enum rtx_code c0 = GET_CODE (XEXP (x, 0));
-	      enum rtx_code c1 = GET_CODE (XEXP (x, 1));
-	      if ((c0 == SIGN_EXTEND && c1 == SIGN_EXTEND)
-		  || (c0 == ZERO_EXTEND && c1 == ZERO_EXTEND))
-		/* This is the <mul>sidi3 pattern, which expands into 4 insns,
-		   2 multiplies and 2 moves.  */
-		{
-		  *total = *total * 2 + COSTS_N_INSNS (2);
-		  return true;
-		}
-	    }
-          return false;
-        }
-
-      case DIV:
-        {
-	  if (!TARGET_HAS_DIV)
-	    *total = COSTS_N_INSNS (5);  /* Guess?  */
-	  else if (speed)
-	    *total = COSTS_N_INSNS (2);  /* Latency adjustment.  */
-	  else 
-	    *total = COSTS_N_INSNS (1);
-          return false;
-        }
-
-      case ASHIFT:
-      case ASHIFTRT:
-      case LSHIFTRT:
-      case ROTATE:
-        {
-	  if (!speed)
-	    *total = COSTS_N_INSNS (1);
-	  else 
-	    *total = COSTS_N_INSNS (2);  /* Latency adjustment.  */
-          return false;
-        }
-	
-      case ZERO_EXTRACT:
-	if (TARGET_HAS_BMX)
-	  {
-	    *total = COSTS_N_INSNS (1);
-	    return true;
-	  }
-	return false;
-
-      case SIGN_EXTEND:
-        {
-	  if (MEM_P (XEXP (x, 0)))
-	    *total = COSTS_N_INSNS (1);
-	  else
-	    *total = COSTS_N_INSNS (3);
-	  return false;
-	}
-
-      case MEM:
-	{
-	  rtx addr = XEXP (x, 0);
-
-	  /* Account for cost of different addressing modes.  */
-	  *total = nios2_address_cost (addr, mode, ADDR_SPACE_GENERIC, speed);
-
-	  if (outer_code == SET && opno == 0)
-	    /* Stores execute in 1 cycle accounted for by
-	       the outer SET.  */
-	    ;
-	  else if (outer_code == SET || outer_code == SIGN_EXTEND
-		   || outer_code == ZERO_EXTEND)
-	    /* Latency adjustment.  */
-	    {
-	      if (speed)
-		*total += COSTS_N_INSNS (1);
-	    }
-	  else
-	    /* This is going to have to be split into a load.  */
-	    *total += COSTS_N_INSNS (speed ? 2 : 1);
-	  return true;
-	}
-
-      default:
-        return false;
-    }
-}
-
-/* Implement TARGET_PREFERRED_RELOAD_CLASS.  */
-static reg_class_t
-nios2_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t regclass)
-{
-  return regclass == NO_REGS ? GENERAL_REGS : regclass;
-}
-
-/* Emit a call to __tls_get_addr.  TI is the argument to this function.
-   RET is an RTX for the return value location.  The entire insn sequence
-   is returned.  */
-static GTY(()) rtx nios2_tls_symbol;
-
-static rtx
-nios2_call_tls_get_addr (rtx ti)
-{
-  rtx arg = gen_rtx_REG (Pmode, FIRST_ARG_REGNO);
-  rtx ret = gen_rtx_REG (Pmode, FIRST_RETVAL_REGNO);
-  rtx fn;
-  rtx_insn *insn;
-  
-  if (!nios2_tls_symbol)
-    nios2_tls_symbol = init_one_libfunc ("__tls_get_addr");
-
-  emit_move_insn (arg, ti);
-  fn = gen_rtx_MEM (QImode, nios2_tls_symbol);
-  insn = emit_call_insn (gen_call_value (ret, fn, const0_rtx));
-  RTL_CONST_CALL_P (insn) = 1;
-  use_reg (&CALL_INSN_FUNCTION_USAGE (insn), ret);
-  use_reg (&CALL_INSN_FUNCTION_USAGE (insn), arg);
-
-  return ret;
-}
-
-/* Return true for large offsets requiring hiadj/lo relocation pairs.  */
-static bool
-nios2_large_offset_p (int unspec)
-{
-  gcc_assert (nios2_unspec_reloc_name (unspec) != NULL);
-
-  if (flag_pic == 2
-      /* FIXME: TLS GOT offset relocations will eventually also get this
-	 treatment, after binutils support for those are also completed.  */
-      && (unspec == UNSPEC_PIC_SYM || unspec == UNSPEC_PIC_CALL_SYM))
-    return true;
-
-  /* 'gotoff' offsets are always hiadj/lo.  */
-  if (unspec == UNSPEC_PIC_GOTOFF_SYM)
-    return true;
-
-  return false;
-}
-
-/* Return true for conforming unspec relocations.  Also used in
-   constraints.md and predicates.md.  */
-bool
-nios2_unspec_reloc_p (rtx op)
-{
-  return (GET_CODE (op) == CONST
-	  && GET_CODE (XEXP (op, 0)) == UNSPEC
-	  && ! nios2_large_offset_p (XINT (XEXP (op, 0), 1)));
-}
-
-static bool
-nios2_large_unspec_reloc_p (rtx op)
-{
-  return (GET_CODE (op) == CONST
-	  && GET_CODE (XEXP (op, 0)) == UNSPEC
-	  && nios2_large_offset_p (XINT (XEXP (op, 0), 1)));
-}
-
-/* Helper to generate unspec constant.  */
-static rtx
-nios2_unspec_offset (rtx loc, int unspec)
-{
-  return gen_rtx_CONST (Pmode, gen_rtx_UNSPEC (Pmode, gen_rtvec (1, loc),
-					       unspec));
-}
-
-/* Generate GOT pointer based address with large offset.  */
-static rtx
-nios2_large_got_address (rtx offset, rtx tmp)
-{
-  if (!tmp)
-    tmp = gen_reg_rtx (Pmode);
-  emit_move_insn (tmp, offset);
-  return gen_rtx_PLUS (Pmode, tmp, pic_offset_table_rtx);
-}
-
-/* Generate a GOT pointer based address.  */
-static rtx
-nios2_got_address (rtx loc, int unspec)
-{
-  rtx offset = nios2_unspec_offset (loc, unspec);
-  crtl->uses_pic_offset_table = 1;
-
-  if (nios2_large_offset_p (unspec))
-    return force_reg (Pmode, nios2_large_got_address (offset, NULL_RTX));
-
-  return gen_rtx_PLUS (Pmode, pic_offset_table_rtx, offset);
-}
-
-/* Generate the code to access LOC, a thread local SYMBOL_REF.  The
-   return value will be a valid address and move_operand (either a REG
-   or a LO_SUM).  */
-static rtx
-nios2_legitimize_tls_address (rtx loc)
-{
-  rtx tmp, mem, tp;
-  enum tls_model model = SYMBOL_REF_TLS_MODEL (loc);
-
-  switch (model)
-    {
-    case TLS_MODEL_GLOBAL_DYNAMIC:
-      tmp = gen_reg_rtx (Pmode);
-      emit_move_insn (tmp, nios2_got_address (loc, UNSPEC_ADD_TLS_GD));
-      return nios2_call_tls_get_addr (tmp);
-
-    case TLS_MODEL_LOCAL_DYNAMIC:
-      tmp = gen_reg_rtx (Pmode);
-      emit_move_insn (tmp, nios2_got_address (loc, UNSPEC_ADD_TLS_LDM));
-      return gen_rtx_PLUS (Pmode, nios2_call_tls_get_addr (tmp),
-			   nios2_unspec_offset (loc, UNSPEC_ADD_TLS_LDO));
-
-    case TLS_MODEL_INITIAL_EXEC:
-      tmp = gen_reg_rtx (Pmode);
-      mem = gen_const_mem (Pmode, nios2_got_address (loc, UNSPEC_LOAD_TLS_IE));
-      emit_move_insn (tmp, mem);
-      tp = gen_rtx_REG (Pmode, TP_REGNO);
-      return gen_rtx_PLUS (Pmode, tp, tmp);
-
-    case TLS_MODEL_LOCAL_EXEC:
-      tp = gen_rtx_REG (Pmode, TP_REGNO);
-      return gen_rtx_PLUS (Pmode, tp,
-			   nios2_unspec_offset (loc, UNSPEC_ADD_TLS_LE));
-    default:
-      gcc_unreachable ();
-    }
-}
-
-/* Divide Support
-
-   If -O3 is used, we want to output a table lookup for
-   divides between small numbers (both num and den >= 0
-   and < 0x10).  The overhead of this method in the worst
-   case is 40 bytes in the text section (10 insns) and
-   256 bytes in the data section.  Additional divides do
-   not incur additional penalties in the data section.
-
-   Code speed is improved for small divides by about 5x
-   when using this method in the worse case (~9 cycles
-   vs ~45).  And in the worst case divides not within the
-   table are penalized by about 10% (~5 cycles vs ~45).
-   However in the typical case the penalty is not as bad
-   because doing the long divide in only 45 cycles is
-   quite optimistic.
-
-   ??? would be nice to have some benchmarks other
-   than Dhrystone to back this up.
-
-   This bit of expansion is to create this instruction
-   sequence as rtl.
-        or      $8, $4, $5
-        slli    $9, $4, 4
-        cmpgeui $3, $8, 16
-        beq     $3, $0, .L3
-        or      $10, $9, $5
-        add     $12, $11, divide_table
-        ldbu    $2, 0($12)
-        br      .L1
-.L3:
-        call    slow_div
-.L1:
-#       continue here with result in $2
-
-   ??? Ideally I would like the libcall block to contain all
-   of this code, but I don't know how to do that.  What it
-   means is that if the divide can be eliminated, it may not
-   completely disappear.
-
-   ??? The __divsi3_table label should ideally be moved out
-   of this block and into a global.  If it is placed into the
-   sdata section we can save even more cycles by doing things
-   gp relative.  */
-void
-nios2_emit_expensive_div (rtx *operands, machine_mode mode)
-{
-  rtx or_result, shift_left_result;
-  rtx lookup_value;
-  rtx_code_label *lab1, *lab3;
-  rtx_insn *insns;
-  rtx libfunc;
-  rtx final_result;
-  rtx_insn *tmp;
-  rtx table;
-
-  /* It may look a little generic, but only SImode is supported for now.  */
-  gcc_assert (mode == SImode);
-  libfunc = optab_libfunc (sdiv_optab, SImode);
-
-  lab1 = gen_label_rtx ();
-  lab3 = gen_label_rtx ();
-
-  or_result = expand_simple_binop (SImode, IOR,
-                                   operands[1], operands[2],
-                                   0, 0, OPTAB_LIB_WIDEN);
-
-  emit_cmp_and_jump_insns (or_result, GEN_INT (15), GTU, 0,
-                           GET_MODE (or_result), 0, lab3);
-  JUMP_LABEL (get_last_insn ()) = lab3;
-
-  shift_left_result = expand_simple_binop (SImode, ASHIFT,
-                                           operands[1], GEN_INT (4),
-                                           0, 0, OPTAB_LIB_WIDEN);
-
-  lookup_value = expand_simple_binop (SImode, IOR,
-                                      shift_left_result, operands[2],
-                                      0, 0, OPTAB_LIB_WIDEN);
-  table = gen_rtx_PLUS (SImode, lookup_value,
-			gen_rtx_SYMBOL_REF (SImode, "__divsi3_table"));
-  convert_move (operands[0], gen_rtx_MEM (QImode, table), 1);
-
-  tmp = emit_jump_insn (gen_jump (lab1));
-  JUMP_LABEL (tmp) = lab1;
-  emit_barrier ();
-
-  emit_label (lab3);
-  LABEL_NUSES (lab3) = 1;
-
-  start_sequence ();
-  final_result = emit_library_call_value (libfunc, NULL_RTX,
-                                          LCT_CONST, SImode,
-                                          operands[1], SImode,
-                                          operands[2], SImode);
-
-  insns = get_insns ();
-  end_sequence ();
-  emit_libcall_block (insns, operands[0], final_result,
-                      gen_rtx_DIV (SImode, operands[1], operands[2]));
-
-  emit_label (lab1);
-  LABEL_NUSES (lab1) = 1;
-}
-
-
-/* Branches and compares.  */
-
-/* Return in *ALT_CODE and *ALT_OP, an alternate equivalent constant
-   comparison, e.g. >= 1 into > 0.  */
-static void
-nios2_alternate_compare_const (enum rtx_code code, rtx op,
-			       enum rtx_code *alt_code, rtx *alt_op,
-			       machine_mode mode)
-{
-  gcc_assert (CONST_INT_P (op));
-
-  HOST_WIDE_INT opval = INTVAL (op);
-  enum rtx_code scode = signed_condition (code);
-  bool dec_p = (scode == LT || scode == GE);
-
-  if (code == EQ || code == NE)
-    {
-      *alt_code = code;
-      *alt_op = op;
-      return;
-    }
-
-  *alt_op = (dec_p
-	     ? gen_int_mode (opval - 1, mode)
-	     : gen_int_mode (opval + 1, mode));
-
-  /* The required conversion between [>,>=] and [<,<=] is captured
-     by a reverse + swap of condition codes.  */
-  *alt_code = reverse_condition (swap_condition (code));
-
-  {
-    /* Test if the incremented/decremented value crosses the over/underflow
-       boundary.  Supposedly, such boundary cases should already be transformed
-       into always-true/false or EQ conditions, so use an assertion here.  */
-    unsigned HOST_WIDE_INT alt_opval = INTVAL (*alt_op);
-    if (code == scode)
-      alt_opval ^= (1 << (GET_MODE_BITSIZE (mode) - 1));
-    alt_opval &= GET_MODE_MASK (mode);
-    gcc_assert (dec_p ? alt_opval != GET_MODE_MASK (mode) : alt_opval != 0);
-  }
-}
-
-/* Return true if the constant comparison is supported by nios2.  */
-static bool
-nios2_valid_compare_const_p (enum rtx_code code, rtx op)
-{
-  gcc_assert (CONST_INT_P (op));
-  switch (code)
-    {
-    case EQ: case NE: case GE: case LT:
-      return SMALL_INT (INTVAL (op));
-    case GEU: case LTU:
-      return SMALL_INT_UNSIGNED (INTVAL (op));
-    default:
-      return false;
-    }
-}
-
-/* Checks if the FPU comparison in *CMP, *OP1, and *OP2 can be supported in
-   the current configuration.  Perform modifications if MODIFY_P is true.
-   Returns true if FPU compare can be done.  */
-
-bool
-nios2_validate_fpu_compare (machine_mode mode, rtx *cmp, rtx *op1, rtx *op2,
-			    bool modify_p)
-{
-  bool rev_p = false;
-  enum rtx_code code = GET_CODE (*cmp);
-
-  if (!nios2_fpu_compare_enabled (code, mode))
-    {
-      code = swap_condition (code);
-      if (nios2_fpu_compare_enabled (code, mode))
-	rev_p = true;
-      else
-	return false;
-    }
-
-  if (modify_p)
-    {
-      if (rev_p)
-	{
-	  rtx tmp = *op1;
-	  *op1 = *op2;
-	  *op2 = tmp;
-	}
-      *op1 = force_reg (mode, *op1);
-      *op2 = force_reg (mode, *op2);
-      *cmp = gen_rtx_fmt_ee (code, mode, *op1, *op2);
-    }
-  return true;
-}
-
-/* Checks and modifies the comparison in *CMP, *OP1, and *OP2 into valid
-   nios2 supported form.  Returns true if success.  */
-bool
-nios2_validate_compare (machine_mode mode, rtx *cmp, rtx *op1, rtx *op2)
-{
-  enum rtx_code code = GET_CODE (*cmp);
-  enum rtx_code alt_code;
-  rtx alt_op2;
-
-  if (GET_MODE_CLASS (mode) == MODE_FLOAT)
-    return nios2_validate_fpu_compare (mode, cmp, op1, op2, true);
-
-  if (CONST_INT_P (*op2) && *op2 != const0_rtx)
-    {
-      /* Create alternate constant compare.  */
-      nios2_alternate_compare_const (code, *op2, &alt_code, &alt_op2, mode);
-
-      /* If alterate op2 is zero(0), we can use it directly, possibly
-	 swapping the compare code.  */
-      if (alt_op2 == const0_rtx)
-	{
-	  code = alt_code;
-	  *op2 = alt_op2;
-	  goto check_rebuild_cmp;
-	}
-
-      /* Check if either constant compare can be used.  */
-      if (nios2_valid_compare_const_p (code, *op2))
-	return true;
-      else if (nios2_valid_compare_const_p (alt_code, alt_op2))
-	{
-	  code = alt_code;
-	  *op2 = alt_op2;
-	  goto rebuild_cmp;
-	}
-
-      /* We have to force op2 into a register now.  Try to pick one
-	 with a lower cost.  */
-      if (! nios2_simple_const_p (*op2)
-	  && nios2_simple_const_p (alt_op2))
-	{
-	  code = alt_code;
-	  *op2 = alt_op2;
-	}
-      *op2 = force_reg (mode, *op2);
-    }
-    else if (!reg_or_0_operand (*op2, mode))
-      *op2 = force_reg (mode, *op2);
-    
- check_rebuild_cmp:
-  if (code == GT || code == GTU || code == LE || code == LEU)
-    {
-      rtx t = *op1; *op1 = *op2; *op2 = t;
-      code = swap_condition (code);
-    }
- rebuild_cmp:
-  *cmp = gen_rtx_fmt_ee (code, mode, *op1, *op2);
-  return true;
-}
-
-
-/* Addressing modes and constants.  */
-
-/* Symbol references and other 32-bit constants are split into
-   high/lo_sum pairs during the split1 pass.  After that, they are not
-   considered legitimate addresses.
-   This function returns true if in a pre-split context where these
-   constants are allowed.  */
-static bool
-nios2_large_constant_allowed (void)
-{
-  /* The reload_completed check is for the benefit of
-     nios2_asm_output_mi_thunk and perhaps other places that try to
-     emulate a post-reload pass.  */
-  return !(cfun->curr_properties & PROP_rtl_split_insns) && !reload_completed;
-}
-
-/* Return true if X is constant expression with a reference to an
-   "ordinary" symbol; not GOT-relative, not GP-relative, not TLS.  */
-static bool
-nios2_symbolic_constant_p (rtx x)
-{
-  rtx base, offset;
-
-  if (flag_pic)
-    return false;
-  if (GET_CODE (x) == LABEL_REF)
-    return true;
-  else if (CONSTANT_P (x))
-    {
-      split_const (x, &base, &offset);
-      return (SYMBOL_REF_P (base)
-		&& !SYMBOL_REF_TLS_MODEL (base)
-		&& !gprel_constant_p (base)
-		&& !r0rel_constant_p (base)
-		&& SMALL_INT (INTVAL (offset)));
-    }
-  return false;
-}
-
-/* Return true if X is an expression of the form 
-   (PLUS reg large_constant).  */
-static bool
-nios2_plus_large_constant_p (rtx x)
-{
-  return (GET_CODE (x) == PLUS
-	  && REG_P (XEXP (x, 0))
-	  && nios2_large_constant_p (XEXP (x, 1)));
-}
-
-/* Implement TARGET_LEGITIMATE_CONSTANT_P.  */
-static bool
-nios2_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
-{
-  rtx base, offset;
-  split_const (x, &base, &offset);
-  return GET_CODE (base) != SYMBOL_REF || !SYMBOL_REF_TLS_MODEL (base);
-}
-
-/* Implement TARGET_CANNOT_FORCE_CONST_MEM.  */
-static bool
-nios2_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
-{
-  return nios2_legitimate_constant_p (mode, x) == false;
-}
-
-/* Return true if register REGNO is a valid base register.
-   STRICT_P is true if REG_OK_STRICT is in effect.  */
-
-bool
-nios2_regno_ok_for_base_p (int regno, bool strict_p)
-{
-  if (!HARD_REGISTER_NUM_P (regno))
-    {
-      if (!strict_p)
-	return true;
-
-      if (!reg_renumber)
-	return false;
-
-      regno = reg_renumber[regno];
-    }
-
-  /* The fake registers will be eliminated to either the stack or
-     hard frame pointer, both of which are usually valid base registers.
-     Reload deals with the cases where the eliminated form isn't valid.  */
-  return (GP_REG_P (regno)
-	  || regno == FRAME_POINTER_REGNUM
-	  || regno == ARG_POINTER_REGNUM);
-}
-
-/* Return true if OFFSET is permitted in a load/store address expression.
-   Normally any 16-bit value is permitted, but on R2 if we may be emitting
-   the IO forms of these instructions we must restrict the offset to fit
-   in a 12-bit field instead.  */
-
-static bool
-nios2_valid_addr_offset_p (rtx offset)
-{
-  return (CONST_INT_P (offset)
-	  && ((TARGET_ARCH_R2 && (TARGET_BYPASS_CACHE
-				  || TARGET_BYPASS_CACHE_VOLATILE))
-	      ? SMALL_INT12 (INTVAL (offset))
-	      : SMALL_INT (INTVAL (offset))));
-}
-
-/* Return true if the address expression formed by BASE + OFFSET is
-   valid.  */
-static bool
-nios2_valid_addr_expr_p (rtx base, rtx offset, bool strict_p)
-{
-  if (!strict_p && GET_CODE (base) == SUBREG)
-    base = SUBREG_REG (base);
-  return (REG_P (base)
-	  && nios2_regno_ok_for_base_p (REGNO (base), strict_p)
-	  && (offset == NULL_RTX
-	      || nios2_valid_addr_offset_p (offset)
-	      || (nios2_large_constant_allowed () 
-		  && nios2_symbolic_constant_p (offset))
-	      || nios2_unspec_reloc_p (offset)));
-}
-
-/* Implement TARGET_LEGITIMATE_ADDRESS_P.  */
-static bool
-nios2_legitimate_address_p (machine_mode mode ATTRIBUTE_UNUSED, rtx operand,
-			    bool strict_p,
-			    code_helper = ERROR_MARK)
-{
-  switch (GET_CODE (operand))
-    {
-      /* Direct.  */
-    case SYMBOL_REF:
-      if (SYMBOL_REF_TLS_MODEL (operand))
-	return false;
-
-      /* Else, fall through.  */
-    case CONST:
-      if (gprel_constant_p (operand) || r0rel_constant_p (operand))
-	return true;
-
-      /* Else, fall through.  */
-    case LABEL_REF:
-      if (nios2_large_constant_allowed () 
-	  && nios2_symbolic_constant_p (operand))
-	return true;
-      return false;
-
-    case CONST_INT:
-      if (r0rel_constant_p (operand))
-	return true;
-      return nios2_large_constant_allowed ();
-
-    case CONST_DOUBLE:
-      return false;
-
-      /* Register indirect.  */
-    case REG:
-      return nios2_regno_ok_for_base_p (REGNO (operand), strict_p);
-
-      /* Register indirect with displacement.  */
-    case PLUS:
-      {
-        rtx op0 = XEXP (operand, 0);
-        rtx op1 = XEXP (operand, 1);
-
-	if (nios2_valid_addr_expr_p (op0, op1, strict_p) 
-	    || nios2_valid_addr_expr_p (op1, op0, strict_p))
-	  return true;
-      }
-      break;
-
-      /* %lo(constant)(reg)
-	 This requires a 16-bit relocation and isn't valid with R2
-	 io-variant load/stores.  */
-    case LO_SUM:
-      if (TARGET_ARCH_R2 
-	  && (TARGET_BYPASS_CACHE || TARGET_BYPASS_CACHE_VOLATILE))
-	return false;
-      else
-	{
-	  rtx op0 = XEXP (operand, 0);
-	  rtx op1 = XEXP (operand, 1);
-
-	  return (REG_P (op0)
-		  && nios2_regno_ok_for_base_p (REGNO (op0), strict_p)
-		  && nios2_large_constant_p (op1));
-	}
-
-    default:
-      break;
-    }
-  return false;
-}
-
-/* Implement TARGET_ADDRESS_COST.
-   Experimentation has shown that we get better code by penalizing the
-   the (plus reg symbolic_constant) and (plus reg (const ...)) forms
-   but giving (plus reg symbol_ref) address modes the same cost as those
-   that don't require splitting.  Also, from a theoretical point of view:
-   - This is in line with the recommendation in the GCC internals 
-     documentation to make address forms involving multiple
-     registers more expensive than single-register forms.  
-   - OTOH it still encourages fwprop1 to propagate constants into 
-     address expressions more aggressively.
-   - We should discourage splitting (symbol + offset) into hi/lo pairs
-     to allow CSE'ing the symbol when it's used with more than one offset,
-     but not so heavily as to avoid this addressing mode at all.  */
-static int
-nios2_address_cost (rtx address, 
-		    machine_mode mode ATTRIBUTE_UNUSED,
-		    addr_space_t as ATTRIBUTE_UNUSED, 
-		    bool speed ATTRIBUTE_UNUSED)
-{
-  if (nios2_plus_large_constant_p (address))
-    return COSTS_N_INSNS (1);
-  if (nios2_large_constant_p (address))
-    {
-      if (GET_CODE (address) == CONST)
-	return COSTS_N_INSNS (1);
-      else
-	return COSTS_N_INSNS (0);
-    }
-  return COSTS_N_INSNS (0);
-}
-
-/* Return true if X is a MEM whose address expression involves a large (32-bit)
-   constant.  */
-bool
-nios2_large_constant_memory_operand_p (rtx x)
-{
-  rtx addr;
-
-  if (GET_CODE (x) != MEM)
-    return false;
-  addr = XEXP (x, 0);
-
-  return (nios2_large_constant_p (addr)
-	  || nios2_plus_large_constant_p (addr));
-}
-
-
-/* Return true if X is something that needs to be split into a 
-   high/lo_sum pair.  */
-bool
-nios2_large_constant_p (rtx x)
-{
-  return (nios2_symbolic_constant_p (x)
-	  || nios2_large_unspec_reloc_p (x)
-	  || (CONST_INT_P (x) && !SMALL_INT (INTVAL (x))));
-}
-
-/* Given an RTX X that satisfies nios2_large_constant_p, split it into
-   high and lo_sum parts using TEMP as a scratch register.  Emit the high 
-   instruction and return the lo_sum expression.  
-   Also handle special cases involving constant integers.  */
-rtx
-nios2_split_large_constant (rtx x, rtx temp)
-{
-  if (CONST_INT_P (x))
-    {
-      HOST_WIDE_INT val = INTVAL (x);
-      if (SMALL_INT (val))
-	return x;
-      else if (SMALL_INT_UNSIGNED (val) || UPPER16_INT (val))
-	{
-	  emit_move_insn (temp, x);
-	  return temp;
-	}
-      else
-	{
-	  HOST_WIDE_INT high = (val + 0x8000) & ~0xffff;
-	  HOST_WIDE_INT low = val - high;
-	  emit_move_insn (temp, gen_int_mode (high, Pmode));
-	  return gen_rtx_PLUS (Pmode, temp, gen_int_mode (low, Pmode));
-	}
-    }
-  
-  emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (Pmode, copy_rtx (x))));
-  return gen_rtx_LO_SUM (Pmode, temp, copy_rtx (x));
-}
-
-/* Split an RTX of the form
-     (plus op0 op1)
-   where op1 is a large constant into
-     (set temp (high op1))
-     (set temp (plus op0 temp))
-     (lo_sum temp op1)
-   returning the lo_sum expression as the value.  */
-static rtx
-nios2_split_plus_large_constant (rtx op0, rtx op1)
-{
-  rtx temp = gen_reg_rtx (Pmode);
-  op0 = force_reg (Pmode, op0);
-
-  emit_insn (gen_rtx_SET (temp, gen_rtx_HIGH (Pmode, copy_rtx (op1))));
-  emit_insn (gen_rtx_SET (temp, gen_rtx_PLUS (Pmode, op0, temp)));
-  return gen_rtx_LO_SUM (Pmode, temp, copy_rtx (op1));
-}
-
-/* Given a MEM OP with an address that includes a splittable symbol or
-   other large constant, emit some instructions to do the split and 
-   return a new MEM.  */
-rtx
-nios2_split_large_constant_memory_operand (rtx op)
-{
-  rtx addr = XEXP (op, 0);
-
-  if (nios2_large_constant_p (addr))
-    addr = nios2_split_large_constant (addr, gen_reg_rtx (Pmode));
-  else if (nios2_plus_large_constant_p (addr))
-    addr = nios2_split_plus_large_constant (XEXP (addr, 0), XEXP (addr, 1));
-  else
-    gcc_unreachable ();
-  return replace_equiv_address (op, addr, false);
-}
-
-/* Return true if SECTION is a small section name.  */
-static bool
-nios2_small_section_name_p (const char *section)
-{
-  return (strcmp (section, ".sbss") == 0
-	  || startswith (section, ".sbss.")
-	  || strcmp (section, ".sdata") == 0
-	  || startswith (section, ".sdata.")
-	  || (nios2_gprel_sec 
-	      && regexec (&nios2_gprel_sec_regex, section, 0, NULL, 0) == 0));
-}
-
-/* Return true if SECTION is a r0-relative section name.  */
-static bool
-nios2_r0rel_section_name_p (const char *section)
-{
-  return (nios2_r0rel_sec 
-	  && regexec (&nios2_r0rel_sec_regex, section, 0, NULL, 0) == 0);
-}
-
-/* Return true if EXP should be placed in the small data section.  */
-static bool
-nios2_in_small_data_p (const_tree exp)
-{
-  /* We want to merge strings, so we never consider them small data.  */
-  if (TREE_CODE (exp) == STRING_CST)
-    return false;
-
-  if (TREE_CODE (exp) == VAR_DECL)
-    {
-      if (DECL_SECTION_NAME (exp))
-	{
-	  const char *section = DECL_SECTION_NAME (exp);
-	  if (nios2_small_section_name_p (section))
-	    return true;
-	}
-      else if (flexible_array_type_p (TREE_TYPE (exp))
-	       && (!TREE_PUBLIC (exp) || DECL_EXTERNAL (exp)))
-	{
-	  /* We really should not consider any objects of any flexibly-sized
-	     type to be small data, but pre-GCC 10 did not test
-	     for this and just fell through to the next case.  Thus older
-	     code compiled with -mgpopt=global could contain GP-relative
-	     accesses to objects defined in this compilation unit with
-	     external linkage.  We retain the possible small-data treatment
-	     of such definitions for backward ABI compatibility, but
-	     no longer generate GP-relative accesses for external
-	     references (so that the ABI could be changed in the future
-	     with less potential impact), or objects with internal
-	     linkage.  */
-	  return false;
-	}
-      else
-	{
-	  HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
-
-	  /* If this is an incomplete type with size 0, then we can't put it
-	     in sdata because it might be too big when completed.  */
-	  if (size > 0
-	      && (unsigned HOST_WIDE_INT) size <= nios2_section_threshold)
-	    return true;
-	}
-    }
-
-  return false;
-}
-
-/* Return true if symbol is in small data section.  */
-
-static bool
-nios2_symbol_ref_in_small_data_p (rtx sym)
-{
-  tree decl;
-
-  gcc_assert (GET_CODE (sym) == SYMBOL_REF);
-  decl = SYMBOL_REF_DECL (sym);
-
-  /* TLS variables are not accessed through the GP.  */
-  if (SYMBOL_REF_TLS_MODEL (sym) != 0)
-    return false;
-
-  /* On Nios II R2, there is no GP-relative relocation that can be
-     used with "io" instructions.  So, if we are implicitly generating
-     those instructions, we cannot emit GP-relative accesses.  */
-  if (TARGET_ARCH_R2
-      && (TARGET_BYPASS_CACHE || TARGET_BYPASS_CACHE_VOLATILE))
-    return false;
-
-  /* If the user has explicitly placed the symbol in a small data section
-     via an attribute, generate gp-relative addressing even if the symbol
-     is external, weak, or larger than we'd automatically put in the
-     small data section.  OTOH, if the symbol is located in some
-     non-small-data section, we can't use gp-relative accesses on it
-     unless the user has requested gpopt_data or gpopt_all.  */
-
-  switch (nios2_gpopt_option)
-    {
-    case gpopt_none:
-      /* Don't generate a gp-relative addressing mode if that's been
-	 disabled.  */
-      return false;
-
-    case gpopt_local:
-      /* Use GP-relative addressing for small data symbols that are
-	 not external or weak or uninitialized common, plus any symbols
-	 that have explicitly been placed in a small data section.  */
-      if (decl && DECL_SECTION_NAME (decl))
-	return nios2_small_section_name_p (DECL_SECTION_NAME (decl));
-      return (SYMBOL_REF_SMALL_P (sym)
-	      && !SYMBOL_REF_EXTERNAL_P (sym)
-	      && !(decl && DECL_WEAK (decl))
-	      && !(decl && DECL_COMMON (decl)
-		   && (DECL_INITIAL (decl) == NULL
-		       || (!in_lto_p
-			   && DECL_INITIAL (decl) == error_mark_node))));
-
-    case gpopt_global:
-      /* Use GP-relative addressing for small data symbols, even if
-	 they are external or weak.  Note that SYMBOL_REF_SMALL_P
-         is also true of symbols that have explicitly been placed
-         in a small data section.  */
-      return SYMBOL_REF_SMALL_P (sym);
-
-    case gpopt_data:
-      /* Use GP-relative addressing for all data symbols regardless
-	 of the object size, but not for code symbols.  This option
-	 is equivalent to the user asserting that the entire data
-	 section is accessible from the GP.  */
-      return !SYMBOL_REF_FUNCTION_P (sym);
-
-    case gpopt_all:
-      /* Use GP-relative addressing for everything, including code.
-	 Effectively, the user has asserted that the entire program
-	 fits within the 64K range of the GP offset.  */
-      return true;
-
-    default:
-      /* We shouldn't get here.  */
-      return false;
-    }
-}
-
-/* Likewise for r0-relative addressing.  */
-static bool
-nios2_symbol_ref_in_r0rel_data_p (rtx sym)
-{
-  tree decl;
-
-  gcc_assert (GET_CODE (sym) == SYMBOL_REF);
-  decl = SYMBOL_REF_DECL (sym);
-
-  /* TLS variables are not accessed through r0.  */
-  if (SYMBOL_REF_TLS_MODEL (sym) != 0)
-    return false;
-
-  /* On Nios II R2, there is no r0-relative relocation that can be
-     used with "io" instructions.  So, if we are implicitly generating
-     those instructions, we cannot emit r0-relative accesses.  */
-  if (TARGET_ARCH_R2
-      && (TARGET_BYPASS_CACHE || TARGET_BYPASS_CACHE_VOLATILE))
-    return false;
-
-  /* If the user has explicitly placed the symbol in a r0rel section
-     via an attribute, generate r0-relative addressing.  */
-  if (decl && DECL_SECTION_NAME (decl))
-    return nios2_r0rel_section_name_p (DECL_SECTION_NAME (decl));
-  return false;
-}
-
-/* Implement TARGET_SECTION_TYPE_FLAGS.  */
-
-static unsigned int
-nios2_section_type_flags (tree decl, const char *name, int reloc)
-{
-  unsigned int flags;
-
-  flags = default_section_type_flags (decl, name, reloc);
-
-  if (nios2_small_section_name_p (name))
-    flags |= SECTION_SMALL;
-
-  return flags;
-}
-
-/* Return true if SYMBOL_REF X binds locally.  */
-
-static bool
-nios2_symbol_binds_local_p (const_rtx x)
-{
-  return (SYMBOL_REF_DECL (x)
-	  ? targetm.binds_local_p (SYMBOL_REF_DECL (x))
-	  : SYMBOL_REF_LOCAL_P (x));
-}
-
-/* Position independent code related.  */
-
-/* Emit code to load the PIC register.  */
-static void
-nios2_load_pic_register (void)
-{
-  rtx tmp = gen_rtx_REG (Pmode, TEMP_REG_NUM);
-
-  emit_insn (gen_load_got_register (pic_offset_table_rtx, tmp));
-  emit_insn (gen_add3_insn (pic_offset_table_rtx, pic_offset_table_rtx, tmp));
-}
-
-/* Generate a PIC address as a MEM rtx.  */
-static rtx
-nios2_load_pic_address (rtx sym, int unspec, rtx tmp)
-{
-  if (flag_pic == 2
-      && GET_CODE (sym) == SYMBOL_REF
-      && nios2_symbol_binds_local_p (sym))
-    /* Under -fPIC, generate a GOTOFF address for local symbols.  */
-    {
-      rtx offset = nios2_unspec_offset (sym, UNSPEC_PIC_GOTOFF_SYM);
-      crtl->uses_pic_offset_table = 1;
-      return nios2_large_got_address (offset, tmp);
-    }
-
-  if (unspec == UNSPEC_PIC_CALL_SYM)
-    return gen_rtx_MEM (Pmode, nios2_got_address (sym, unspec));
-  else
-    return gen_const_mem (Pmode, nios2_got_address (sym, unspec));
-}
-
-/* Nonzero if the constant value X is a legitimate general operand
-   when generating PIC code.  It is given that flag_pic is on and
-   that X satisfies CONSTANT_P or is a CONST_DOUBLE.  */
-bool
-nios2_legitimate_pic_operand_p (rtx x)
-{
-  if (nios2_large_unspec_reloc_p (x))
-    return true;
-
-  return ! (GET_CODE (x) == SYMBOL_REF
-	    || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST);
-}
-
-/* Return TRUE if X is a thread-local symbol.  */
-static bool
-nios2_tls_symbol_p (rtx x)
-{
-  return (targetm.have_tls && GET_CODE (x) == SYMBOL_REF
-	  && SYMBOL_REF_TLS_MODEL (x) != 0);
-}
-
-/* Legitimize addresses that are CONSTANT_P expressions.  */
-static rtx
-nios2_legitimize_constant_address (rtx addr)
-{
-  rtx base, offset;
-  split_const (addr, &base, &offset);
-
-  if (nios2_tls_symbol_p (base))
-    base = nios2_legitimize_tls_address (base);
-  else if (flag_pic)
-    base = nios2_load_pic_address (base, UNSPEC_PIC_SYM, NULL_RTX);
-  else if (!nios2_large_constant_allowed () 
-	   && nios2_symbolic_constant_p (addr))
-    return nios2_split_large_constant (addr, gen_reg_rtx (Pmode));
-  else if (CONST_INT_P (addr))
-    {
-      HOST_WIDE_INT val = INTVAL (addr);
-      if (SMALL_INT (val))
-	/* Use r0-relative addressing.  */
-	return addr;
-      else if (!nios2_large_constant_allowed ())
-	/* Split into high/lo pair.  */
-	return nios2_split_large_constant (addr, gen_reg_rtx (Pmode));
-    }
-  else
-    return addr;
-
-  if (offset != const0_rtx)
-    {
-      gcc_assert (can_create_pseudo_p ());
-      return gen_rtx_PLUS (Pmode, force_reg (Pmode, base),
-			   (CONST_INT_P (offset)
-			    ? (SMALL_INT (INTVAL (offset))
-			       ? offset : force_reg (Pmode, offset))
-			    : offset));
-    }
-  return base;
-}
-
-/* Implement TARGET_LEGITIMIZE_ADDRESS.  */
-static rtx
-nios2_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
-			  machine_mode mode ATTRIBUTE_UNUSED)
-{
-  rtx op0, op1;
-  
-  if (CONSTANT_P (x))
-    return nios2_legitimize_constant_address (x);
-
-  /* Remaining cases all involve something + a constant.  */
-  if (GET_CODE (x) != PLUS)
-    return x;
-
-  op0 = XEXP (x, 0);
-  op1 = XEXP (x, 1);
-
-  /* Target-independent code turns (exp + constant) into plain
-     register indirect.  Although subsequent optimization passes will
-     eventually sort that out, ivopts uses the unoptimized form for
-     computing its cost model, so we get better results by generating
-     the correct form from the start.  */
-  if (nios2_valid_addr_offset_p (op1))
-    return gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), copy_rtx (op1));
-
-  /* We may need to split symbolic constants now.  */
-  else if (nios2_symbolic_constant_p (op1))
-    {
-      if (nios2_large_constant_allowed ())
-	return gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), copy_rtx (op1));
-      else
-	return nios2_split_plus_large_constant (op0, op1);
-    }
-
-  /* For the TLS LE (Local Exec) model, the compiler may try to
-     combine constant offsets with unspec relocs, creating address RTXs
-     looking like this:
-     (plus:SI (reg:SI 23 r23)
-              (const:SI
-                (plus:SI
-                  (unspec:SI [(symbol_ref:SI ("var"))] UNSPEC_ADD_TLS_LE)
-                  (const_int 48 [0x30]))))
-
-     This usually happens when 'var' is a thread-local struct variable,
-     and access of a field in var causes the addend.
-
-     We typically want this combining, so transform the above into this
-     form, which is allowed:
-     (plus:SI (reg:SI 23 r23)
-              (const:SI
-                (unspec:SI
-                  [(const:SI
-                     (plus:SI (symbol_ref:SI ("var"))
-                              (const_int 48 [0x30])))] UNSPEC_ADD_TLS_LE)))
-
-     Which will be output as '%tls_le(var+48)(r23)' in assembly.  */
-  else if (GET_CODE (op1) == CONST)
-    {
-      rtx unspec, offset;
-      split_const (op1, &unspec, &offset);
-      if (GET_CODE (unspec) == UNSPEC
-	  && !nios2_large_offset_p (XINT (unspec, 1))
-	  && offset != const0_rtx)
-	{
-	  rtx reg = force_reg (Pmode, op0);
-	  unspec = copy_rtx (unspec);
-	  XVECEXP (unspec, 0, 0)
-	    = plus_constant (Pmode, XVECEXP (unspec, 0, 0), INTVAL (offset));
-	  return gen_rtx_PLUS (Pmode, reg, gen_rtx_CONST (Pmode, unspec));
-	}
-    }
-
-  return x;
-}
-
-static rtx
-nios2_delegitimize_address (rtx x)
-{
-  x = delegitimize_mem_from_attrs (x);
-
-  if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == UNSPEC)
-    {
-      switch (XINT (XEXP (x, 0), 1))
-	{
-	case UNSPEC_PIC_SYM:
-	case UNSPEC_PIC_CALL_SYM:
-	case UNSPEC_PIC_GOTOFF_SYM:
-	case UNSPEC_ADD_TLS_GD:
-	case UNSPEC_ADD_TLS_LDM:
-	case UNSPEC_LOAD_TLS_IE:
-	case UNSPEC_ADD_TLS_LE:
-	  x = XVECEXP (XEXP (x, 0), 0, 0);
-	  gcc_assert (CONSTANT_P (x));
-	  break;
-	}
-    }
-  return x;
-}
-
-/* Main expander function for RTL moves.  */
-bool
-nios2_emit_move_sequence (rtx *operands, machine_mode mode)
-{
-  rtx to = operands[0];
-  rtx from = operands[1];
-
-  if (!register_operand (to, mode) && !reg_or_0_operand (from, mode))
-    {
-      gcc_assert (can_create_pseudo_p ());
-      from = copy_to_mode_reg (mode, from);
-    }
-
-  if (CONSTANT_P (from))
-    {
-      if (CONST_INT_P (from))
-	{
-	  if (!SMALL_INT (INTVAL (from))
-	      && !SMALL_INT_UNSIGNED (INTVAL (from))
-	      && !UPPER16_INT (INTVAL (from)))
-	    {
-	      HOST_WIDE_INT high = (INTVAL (from) + 0x8000) & ~0xffff;
-	      HOST_WIDE_INT low = INTVAL (from) & 0xffff;
-	      emit_move_insn (to, gen_int_mode (high, SImode));
-	      emit_insn (gen_add2_insn (to, gen_int_mode (low, HImode)));
-	      set_unique_reg_note (get_last_insn (), REG_EQUAL,
-				   copy_rtx (from));
-	      return true;
-	    }
-	}
-      else if (gprel_constant_p (from) || r0rel_constant_p (from))
-	/* Handled directly by movsi_internal as gp + offset 
-	   or r0 + offset.  */
-	;
-      else if (nios2_large_constant_p (from))
-	/* This case covers either a regular symbol reference or an UNSPEC
-	   representing a 32-bit offset.  We split the former 
-	   only conditionally and the latter always.  */
-	{
-	  if (!nios2_large_constant_allowed () 
-	      || nios2_large_unspec_reloc_p (from))
-	    {
-	      rtx lo = nios2_split_large_constant (from, to);
-	      emit_insn (gen_rtx_SET (to, lo));
-	      set_unique_reg_note (get_last_insn (), REG_EQUAL,
-				   copy_rtx (operands[1]));
-	      return true;
-	    }
-	}
-      else 
-	/* This is a TLS or PIC symbol.  */
-	{
-	  from = nios2_legitimize_constant_address (from);
-	  if (CONSTANT_P (from))
-	    {
-	      emit_insn (gen_rtx_SET (to,
-				      gen_rtx_HIGH (Pmode, copy_rtx (from))));
-	      emit_insn (gen_rtx_SET (to, gen_rtx_LO_SUM (Pmode, to, from)));
-	      set_unique_reg_note (get_last_insn (), REG_EQUAL,
-				   copy_rtx (operands[1]));
-	      return true;
-	    }
-	}
-    }
-
-  operands[0] = to;
-  operands[1] = from;
-  return false;
-}
-
-/* The function with address *ADDR is being called.  If the address
-   needs to be loaded from the GOT, emit the instruction to do so and
-   update *ADDR to point to the rtx for the loaded value.
-   If REG != NULL_RTX, it is used as the target/scratch register in the
-   GOT address calculation.  */
-void
-nios2_adjust_call_address (rtx *call_op, rtx reg)
-{
-  if (MEM_P (*call_op))
-    call_op = &XEXP (*call_op, 0);
-
-  rtx addr = *call_op;
-  if (flag_pic && CONSTANT_P (addr))
-    {
-      rtx tmp = reg ? reg : NULL_RTX;
-      if (!reg)
-	reg = gen_reg_rtx (Pmode);
-      addr = nios2_load_pic_address (addr, UNSPEC_PIC_CALL_SYM, tmp);
-      emit_insn (gen_rtx_SET (reg, addr));
-      *call_op = reg;
-    }
-}
-
-
-/* Output assembly language related definitions.  */
-
-/* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P.  */
-static bool
-nios2_print_operand_punct_valid_p (unsigned char code)
-{
-  return (code == '.' || code == '!');
-}
-
-
-/* Print the operand OP to file stream FILE modified by LETTER.
-   LETTER can be one of:
-
-     i: print i/hi/ui suffixes (used for mov instruction variants),
-        when OP is the appropriate immediate operand.
-
-     u: like 'i', except without "ui" suffix case (used for cmpgeu/cmpltu)
-
-     o: print "io" if OP needs volatile access (due to TARGET_BYPASS_CACHE
-        or TARGET_BYPASS_CACHE_VOLATILE).
-
-     x: print i/hi/ci/chi suffixes for the and instruction,
-        when OP is the appropriate immediate operand.
-
-     z: prints the third register immediate operand in assembly
-        instructions.  Outputs const0_rtx as the 'zero' register
-	instead of '0'.
-	
-     y: same as 'z', but for specifically for logical instructions,
-        where the processing for immediates are slightly different.
-
-     H: for %hiadj
-     L: for %lo
-     D: for the upper 32-bits of a 64-bit double value
-     R: prints reverse condition.
-     A: prints (reg) operand for ld[s]ex and st[s]ex.
-
-     .: print .n suffix for 16-bit instructions.
-     !: print r.n suffix for 16-bit instructions.  Used for jmpr.n.
-*/
-static void
-nios2_print_operand (FILE *file, rtx op, int letter)
-{
-
-  /* First take care of the format letters that just insert a string
-     into the output stream.  */
-  switch (letter)
-    {
-    case '.':
-      if (current_output_insn && get_attr_length (current_output_insn) == 2)
-	fprintf (file, ".n");
-      return;
-
-    case '!':
-      if (current_output_insn && get_attr_length (current_output_insn) == 2)
-	fprintf (file, "r.n");
-      return;
-
-    case 'x':
-      if (CONST_INT_P (op))
-	{
-	  HOST_WIDE_INT val = INTVAL (op);
-	  HOST_WIDE_INT low = val & 0xffff;
-	  HOST_WIDE_INT high = (val >> 16) & 0xffff;
-
-	  if (val != 0)
-	    {
-	      if (high != 0)
-		{
-		  if (low != 0)
-		    {
-		      gcc_assert (TARGET_ARCH_R2);
-		      if (high == 0xffff)
-			fprintf (file, "c");
-		      else if (low == 0xffff)
-			fprintf (file, "ch");
-		      else
-			gcc_unreachable ();
-		    }
-		  else
-		    fprintf (file, "h");
-		}
-	      fprintf (file, "i");
-	    }
-	}
-      return;
-
-    case 'u':
-    case 'i':
-      if (CONST_INT_P (op))
-	{
-	  HOST_WIDE_INT val = INTVAL (op);
-	  HOST_WIDE_INT low = val & 0xffff;
-	  HOST_WIDE_INT high = (val >> 16) & 0xffff;
-	  if (val != 0)
-	    {
-	      if (low == 0 && high != 0)
-		fprintf (file, "h");
-	      else if (high == 0 && (low & 0x8000) != 0 && letter != 'u')
-		fprintf (file, "u");
-	    }
-	}
-      if (CONSTANT_P (op) && op != const0_rtx)
-        fprintf (file, "i");
-      return;
-
-    case 'o':
-      if (GET_CODE (op) == MEM
-	  && ((MEM_VOLATILE_P (op) && TARGET_BYPASS_CACHE_VOLATILE)
-	      || TARGET_BYPASS_CACHE))
-	{
-	  gcc_assert (current_output_insn
-		      && get_attr_length (current_output_insn) == 4);
-	  fprintf (file, "io");
-	}
-      return;
-
-    default:
-      break;
-    }
-
-  /* Handle comparison operator names.  */
-  if (comparison_operator (op, VOIDmode))
-    {
-      enum rtx_code cond = GET_CODE (op);
-      if (letter == 0)
-	{
-	  fprintf (file, "%s", GET_RTX_NAME (cond));
-	  return;
-	}
-      if (letter == 'R')
-	{
-	  fprintf (file, "%s", GET_RTX_NAME (reverse_condition (cond)));
-	  return;
-	}
-    }
-
-  /* Now handle the cases where we actually need to format an operand.  */
-  switch (GET_CODE (op))
-    {
-    case REG:
-      if (letter == 0 || letter == 'z' || letter == 'y')
-        {
-          fprintf (file, "%s", reg_names[REGNO (op)]);
-          return;
-        }
-      else if (letter == 'D')
-        {
-          fprintf (file, "%s", reg_names[REGNO (op)+1]);
-          return;
-        }
-      break;
-
-    case CONST_INT:
-      {
-	rtx int_rtx = op;
-	HOST_WIDE_INT val = INTVAL (int_rtx);
-	HOST_WIDE_INT low = val & 0xffff;
-	HOST_WIDE_INT high = (val >> 16) & 0xffff;
-
-	if (letter == 'y')
-	  {
-	    if (val == 0)
-	      fprintf (file, "zero");
-	    else
-	      {
-		if (high != 0)
-		  {
-		    if (low != 0)
-		      {
-			gcc_assert (TARGET_ARCH_R2);
-			if (high == 0xffff)
-			  /* andci.  */
-			  int_rtx = gen_int_mode (low, SImode);
-			else if (low == 0xffff)
-			  /* andchi.  */
-			  int_rtx = gen_int_mode (high, SImode);
-			else
-			  gcc_unreachable ();
-		      }
-		    else
-		      /* andhi.  */
-		      int_rtx = gen_int_mode (high, SImode);
-		  }
-		else
-		  /* andi.  */
-		  int_rtx = gen_int_mode (low, SImode);
-		output_addr_const (file, int_rtx);
-	      }
-	    return;
-	  }
-	else if (letter == 'z')
-	  {
-	    if (val == 0)
-	      fprintf (file, "zero");
-	    else
-	      {
-		if (low == 0 && high != 0)
-		  int_rtx = gen_int_mode (high, SImode);
-		else if (low != 0)
-		  {
-		    gcc_assert (high == 0 || high == 0xffff);
-		    int_rtx = gen_int_mode (low, high == 0 ? SImode : HImode);
-		  }
-		else
-		  gcc_unreachable ();
-		output_addr_const (file, int_rtx);
-	      }
-	    return;
-	  }
-      }
-
-      /* Else, fall through.  */
-
-    case CONST:
-    case LABEL_REF:
-    case SYMBOL_REF:
-    case CONST_DOUBLE:
-      if (letter == 0 || letter == 'z')
-        {
-          output_addr_const (file, op);
-          return;
-        }
-      else if (letter == 'H' || letter == 'L')
-	{
-	  fprintf (file, "%%");
-	  if (GET_CODE (op) == CONST
-	      && GET_CODE (XEXP (op, 0)) == UNSPEC)
-	    {
-	      rtx unspec = XEXP (op, 0);
-	      int unspec_reloc = XINT (unspec, 1);
-	      gcc_assert (nios2_large_offset_p (unspec_reloc));
-	      fprintf (file, "%s_", nios2_unspec_reloc_name (unspec_reloc));
-	      op = XVECEXP (unspec, 0, 0);
-	    }
-          fprintf (file, letter == 'H' ? "hiadj(" : "lo(");
-          output_addr_const (file, op);
-          fprintf (file, ")");
-          return;
-	}
-      break;
-
-    case SUBREG:
-    case MEM:
-      if (letter == 'A')
-	{
-	  /* Address of '(reg)' form, with no index.  */
-	  fprintf (file, "(%s)", reg_names[REGNO (XEXP (op, 0))]);
-	  return;
-	}
-      if (letter == 0)
-        {
-          output_address (VOIDmode, op);
-          return;
-        }
-      break;
-
-    case CODE_LABEL:
-      if (letter == 0)
-        {
-          output_addr_const (file, op);
-          return;
-        }
-      break;
-
-    default:
-      break;
-    }
-
-  debug_rtx (op);
-  output_operand_lossage ("Unsupported operand for code '%c'", letter);
-  gcc_unreachable ();
-}
-
-/* Return true if this is a GP-relative accessible reference.  */
-bool
-gprel_constant_p (rtx op)
-{
-  if (GET_CODE (op) == SYMBOL_REF
-      && nios2_symbol_ref_in_small_data_p (op))
-    return true;
-  else if (GET_CODE (op) == CONST
-           && GET_CODE (XEXP (op, 0)) == PLUS)
-    return gprel_constant_p (XEXP (XEXP (op, 0), 0));
-
-  return false;
-}
-
-/* Likewise if this is a zero-relative accessible reference.  */
-bool
-r0rel_constant_p (rtx op)
-{
-  if (GET_CODE (op) == SYMBOL_REF
-      && nios2_symbol_ref_in_r0rel_data_p (op))
-    return true;
-  else if (GET_CODE (op) == CONST
-           && GET_CODE (XEXP (op, 0)) == PLUS)
-    return r0rel_constant_p (XEXP (XEXP (op, 0), 0));
-  else if (GET_CODE (op) == CONST_INT
-	   && SMALL_INT (INTVAL (op)))
-    return true;
-
-  return false;
-}
-
-/* Return the name string for a supported unspec reloc offset.  */
-static const char *
-nios2_unspec_reloc_name (int unspec)
-{
-  switch (unspec)
-    {
-    case UNSPEC_PIC_SYM:
-      return "got";
-    case UNSPEC_PIC_CALL_SYM:
-      return "call";
-    case UNSPEC_PIC_GOTOFF_SYM:
-      return "gotoff";
-    case UNSPEC_LOAD_TLS_IE:
-      return "tls_ie";
-    case UNSPEC_ADD_TLS_LE:
-      return "tls_le";
-    case UNSPEC_ADD_TLS_GD:
-      return "tls_gd";
-    case UNSPEC_ADD_TLS_LDM:
-      return "tls_ldm";
-    case UNSPEC_ADD_TLS_LDO:
-      return "tls_ldo";
-    default:
-      return NULL;
-    }
-}
-
-/* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA.  */
-static bool
-nios2_output_addr_const_extra (FILE *file, rtx op)
-{
-  const char *name;
-  gcc_assert (GET_CODE (op) == UNSPEC);
-
-  /* Support for printing out const unspec relocations.  */
-  name = nios2_unspec_reloc_name (XINT (op, 1));
-  if (name)
-    {
-      fprintf (file, "%%%s(", name);
-      output_addr_const (file, XVECEXP (op, 0, 0));
-      fprintf (file, ")");
-      return true;
-    }
-  return false;
-}
-
-/* Implement TARGET_PRINT_OPERAND_ADDRESS.  */
-static void
-nios2_print_operand_address (FILE *file, machine_mode mode, rtx op)
-{
-  switch (GET_CODE (op))
-    {
-    case CONST:
-    case CONST_INT:
-    case LABEL_REF:
-    case CONST_DOUBLE:
-    case SYMBOL_REF:
-      if (gprel_constant_p (op))
-        {
-          fprintf (file, "%%gprel(");
-          output_addr_const (file, op);
-          fprintf (file, ")(%s)", reg_names[GP_REGNO]);
-          return;
-        }
-      else if (r0rel_constant_p (op))
-        {
-	  if (CONST_INT_P (op))
-	    {
-	      output_addr_const (file, op);
-	      fprintf (file, "(r0)");
-	      return;
-	    }
-	  else
-	    {
-	      fprintf (file, "%%lo(");
-	      output_addr_const (file, op);
-	      fprintf (file, ")(r0)");
-	      return;
-	    }
-	}
-      break;
-
-    case PLUS:
-      {
-        rtx op0 = XEXP (op, 0);
-        rtx op1 = XEXP (op, 1);
-
-        if (REG_P (op0) && CONSTANT_P (op1))
-          {
-            output_addr_const (file, op1);
-            fprintf (file, "(%s)", reg_names[REGNO (op0)]);
-            return;
-          }
-        else if (REG_P (op1) && CONSTANT_P (op0))
-          {
-            output_addr_const (file, op0);
-            fprintf (file, "(%s)", reg_names[REGNO (op1)]);
-            return;
-          }
-      }
-      break;
-
-    case LO_SUM:
-      {
-        rtx op0 = XEXP (op, 0);
-        rtx op1 = XEXP (op, 1);
-
-	if (REG_P (op0) && CONSTANT_P (op1))
-	  {
-	    nios2_print_operand (file, op1, 'L');
-	    fprintf (file, "(%s)", reg_names[REGNO (op0)]);
-	    return;
-	  }
-      }
-      break;
-
-    case REG:
-      fprintf (file, "0(%s)", reg_names[REGNO (op)]);
-      return;
-
-    case MEM:
-      {
-        rtx base = XEXP (op, 0);
-        nios2_print_operand_address (file, mode, base);
-        return;
-      }
-    default:
-      break;
-    }
-
-  fprintf (stderr, "Missing way to print address\n");
-  debug_rtx (op);
-  gcc_unreachable ();
-}
-
-/* Implement TARGET_ASM_OUTPUT_DWARF_DTPREL.  */
-static void
-nios2_output_dwarf_dtprel (FILE *file, int size, rtx x)
-{
-  gcc_assert (size == 4);
-  fprintf (file, "\t.4byte\t%%tls_ldo(");
-  output_addr_const (file, x);
-  fprintf (file, ")");
-}
-
-/* Implemet TARGET_ASM_FILE_END.  */
-
-static void
-nios2_asm_file_end (void)
-{
-  /* The Nios II Linux stack is mapped non-executable by default, so add a
-     .note.GNU-stack section for switching to executable stacks only when
-     trampolines are generated.  */
-  if (TARGET_LINUX_ABI && trampolines_created)
-    file_end_indicate_exec_stack ();
-}
-
-/* Implement TARGET_ASM_FUNCTION_PROLOGUE.  */
-static void
-nios2_asm_function_prologue (FILE *file)
-{
-  if (flag_verbose_asm || flag_debug_asm)
-    {
-      nios2_compute_frame_layout ();
-      nios2_dump_frame_layout (file);
-    }
-}
-
-/* Emit assembly of custom FPU instructions.  */
-const char *
-nios2_fpu_insn_asm (enum n2fpu_code code)
-{
-  static char buf[256];
-  const char *op1, *op2, *op3;
-  int ln = 256, n = 0;
-  
-  int N = N2FPU_N (code);
-  int num_operands = N2FPU (code).num_operands;
-  const char *insn_name = N2FPU_NAME (code);
-  tree ftype = nios2_ftype (N2FPU_FTCODE (code));
-  machine_mode dst_mode = TYPE_MODE (TREE_TYPE (ftype));
-  machine_mode src_mode = TYPE_MODE (TREE_VALUE (TYPE_ARG_TYPES (ftype)));
-
-  /* Prepare X register for DF input operands.  */
-  if (GET_MODE_SIZE (src_mode) == 8 && num_operands == 3)
-    n = snprintf (buf, ln, "custom\t%d, zero, %%1, %%D1 # fwrx %%1\n\t",
-		  N2FPU_N (n2fpu_fwrx));
-
-  if (src_mode == SFmode)
-    {
-      if (dst_mode == VOIDmode)
-	{
-	  /* The fwry case.  */
-	  op1 = op3 = "zero";
-	  op2 = "%0";
-	  num_operands -= 1;
-	}
-      else
-	{
-	  op1 = (dst_mode == DFmode ? "%D0" : "%0");
-	  op2 = "%1";
-	  op3 = (num_operands == 2 ? "zero" : "%2");
-	}
-    }
-  else if (src_mode == DFmode)
-    {
-      if (dst_mode == VOIDmode)
-	{
-	  /* The fwrx case.  */
-	  op1 = "zero";
-	  op2 = "%0";
-	  op3 = "%D0";
-	  num_operands -= 1;
-	}
-      else
-	{
-	  op1 = (dst_mode == DFmode ? "%D0" : "%0");
-	  op2 = (num_operands == 2 ? "%1" : "%2");
-	  op3 = (num_operands == 2 ? "%D1" : "%D2");
-	}
-    }
-  else if (src_mode == VOIDmode)
-    {
-      /* frdxlo, frdxhi, frdy cases.  */
-      gcc_assert (dst_mode == SFmode);
-      op1 = "%0";
-      op2 = op3 = "zero";
-    }
-  else if (src_mode == SImode)
-    {
-      /* Conversion operators.  */
-      gcc_assert (num_operands == 2);
-      op1 = (dst_mode == DFmode ? "%D0" : "%0");
-      op2 = "%1";
-      op3 = "zero";
-    }
-  else
-    gcc_unreachable ();
-
-  /* Main instruction string.  */
-  n += snprintf (buf + n, ln - n, "custom\t%d, %s, %s, %s # %s %%0%s%s",
-		 N, op1, op2, op3, insn_name,
-		 (num_operands >= 2 ? ", %1" : ""),
-		 (num_operands == 3 ? ", %2" : ""));
-
-  /* Extraction of Y register for DF results.  */
-  if (dst_mode == DFmode)
-    snprintf (buf + n, ln - n, "\n\tcustom\t%d, %%0, zero, zero # frdy %%0",
-	      N2FPU_N (n2fpu_frdy));
-  return buf;
-}
-
-
-
-/* Function argument related.  */
-
-/* Define where to put the arguments to a function.  Value is zero to
-   push the argument on the stack, or a hard register in which to
-   store the argument.
-
-   CUM is a variable of type CUMULATIVE_ARGS which gives info about
-   the preceding args and about the function being called.
-   ARG is a description of the argument.  */
-
-static rtx
-nios2_function_arg (cumulative_args_t cum_v, const function_arg_info &arg)
-{
-  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 
-  rtx return_rtx = NULL_RTX;
-
-  if (cum->regs_used < NUM_ARG_REGS)
-    return_rtx = gen_rtx_REG (arg.mode, FIRST_ARG_REGNO + cum->regs_used);
-
-  return return_rtx;
-}
-
-/* Return number of bytes, at the beginning of the argument, that must be
-   put in registers.  0 is the argument is entirely in registers or entirely
-   in memory.  */
-
-static int
-nios2_arg_partial_bytes (cumulative_args_t cum_v, const function_arg_info &arg)
-{
-  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 
-  HOST_WIDE_INT param_size = arg.promoted_size_in_bytes ();
-  gcc_assert (param_size >= 0);
-
-  /* Convert to words (round up).  */
-  param_size = (UNITS_PER_WORD - 1 + param_size) / UNITS_PER_WORD;
-
-  if (cum->regs_used < NUM_ARG_REGS
-      && cum->regs_used + param_size > NUM_ARG_REGS)
-    return (NUM_ARG_REGS - cum->regs_used) * UNITS_PER_WORD;
-
-  return 0;
-}
-
-/* Update the data in CUM to advance over argument ARG.  */
-
-static void
-nios2_function_arg_advance (cumulative_args_t cum_v,
-			    const function_arg_info &arg)
-{
-  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 
-  HOST_WIDE_INT param_size = arg.promoted_size_in_bytes ();
-  gcc_assert (param_size >= 0);
-
-  /* Convert to words (round up).  */
-  param_size = (UNITS_PER_WORD - 1 + param_size) / UNITS_PER_WORD;
-
-  if (cum->regs_used + param_size > NUM_ARG_REGS)
-    cum->regs_used = NUM_ARG_REGS;
-  else
-    cum->regs_used += param_size;
-}
-
-static pad_direction
-nios2_function_arg_padding (machine_mode mode, const_tree type)
-{
-  /* On little-endian targets, the first byte of every stack argument
-     is passed in the first byte of the stack slot.  */
-  if (!BYTES_BIG_ENDIAN)
-    return PAD_UPWARD;
-
-  /* Otherwise, integral types are padded downward: the last byte of a
-     stack argument is passed in the last byte of the stack slot.  */
-  if (type != 0
-      ? INTEGRAL_TYPE_P (type) || POINTER_TYPE_P (type)
-      : GET_MODE_CLASS (mode) == MODE_INT)
-    return PAD_DOWNWARD;
-
-  /* Arguments smaller than a stack slot are padded downward.  */
-  if (mode != BLKmode)
-    return (GET_MODE_BITSIZE (mode) >= PARM_BOUNDARY
-	    ? PAD_UPWARD : PAD_DOWNWARD);
-
-  return ((int_size_in_bytes (type) >= (PARM_BOUNDARY / BITS_PER_UNIT))
-	  ? PAD_UPWARD : PAD_DOWNWARD);
-}
-
-pad_direction
-nios2_block_reg_padding (machine_mode mode, tree type,
-                         int first ATTRIBUTE_UNUSED)
-{
-  return nios2_function_arg_padding (mode, type);
-}
-
-/* Emit RTL insns to initialize the variable parts of a trampoline.
-   FNADDR is an RTX for the address of the function's pure code.
-   CXT is an RTX for the static chain value for the function.
-   On Nios II, we handle this by a library call.  */
-static void
-nios2_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
-{
-  rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
-  rtx ctx_reg = force_reg (Pmode, cxt);
-  rtx addr = force_reg (Pmode, XEXP (m_tramp, 0));
-
-  emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__trampoline_setup"),
-		     LCT_NORMAL, VOIDmode, addr, Pmode, fnaddr, Pmode,
-		     ctx_reg, Pmode);
-}
-
-/* Implement TARGET_FUNCTION_VALUE.  */
-static rtx
-nios2_function_value (const_tree ret_type, const_tree fn ATTRIBUTE_UNUSED,
-		      bool outgoing ATTRIBUTE_UNUSED)
-{
-  return gen_rtx_REG (TYPE_MODE (ret_type), FIRST_RETVAL_REGNO);
-}
-
-/* Implement TARGET_LIBCALL_VALUE.  */
-static rtx
-nios2_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
-{
-  return gen_rtx_REG (mode, FIRST_RETVAL_REGNO);
-}
-
-/* Implement TARGET_FUNCTION_VALUE_REGNO_P.  */
-static bool
-nios2_function_value_regno_p (const unsigned int regno)
-{
-  return regno == FIRST_RETVAL_REGNO;
-}
-
-/* Implement TARGET_RETURN_IN_MEMORY.  */
-static bool
-nios2_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
-{
-  return (int_size_in_bytes (type) > (2 * UNITS_PER_WORD)
-	  || int_size_in_bytes (type) == -1);
-}
-
-/* TODO: It may be possible to eliminate the copyback and implement
-   own va_arg type.  */
-static void
-nios2_setup_incoming_varargs (cumulative_args_t cum_v,
-			      const function_arg_info &arg,
-			      int *pretend_size, int second_time)
-{
-  CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v); 
-  CUMULATIVE_ARGS local_cum;
-  cumulative_args_t local_cum_v = pack_cumulative_args (&local_cum);
-  int regs_to_push;
-  int pret_size;
-
-  cfun->machine->uses_anonymous_args = 1;
-  local_cum = *cum;
-  if (!TYPE_NO_NAMED_ARGS_STDARG_P (TREE_TYPE (current_function_decl))
-      || arg.type != NULL_TREE)
-    nios2_function_arg_advance (local_cum_v, arg);
-
-  regs_to_push = NUM_ARG_REGS - local_cum.regs_used;
-
-  /* If we can use CDX stwm to push the arguments on the stack,
-     nios2_expand_prologue will do that instead.  */
-  if (!TARGET_HAS_CDX && !second_time && regs_to_push > 0)
-    {
-      rtx ptr = virtual_incoming_args_rtx;
-      rtx mem = gen_rtx_MEM (BLKmode, ptr);
-      emit_insn (gen_blockage ());
-      move_block_from_reg (local_cum.regs_used + FIRST_ARG_REGNO, mem,
-			   regs_to_push);
-      emit_insn (gen_blockage ());
-    }
-
-  pret_size = regs_to_push * UNITS_PER_WORD;
-  if (pret_size)
-    *pretend_size = pret_size;
-}
-
-
-
-/* Init FPU builtins.  */
-static void
-nios2_init_fpu_builtins (int start_code)
-{
-  tree fndecl;
-  char builtin_name[64] = "__builtin_custom_";
-  unsigned int i, n = strlen ("__builtin_custom_");
-
-  for (i = 0; i < ARRAY_SIZE (nios2_fpu_insn); i++)
-    {
-      snprintf (builtin_name + n, sizeof (builtin_name) - n,
-		"%s", N2FPU_NAME (i));
-      fndecl =
-	add_builtin_function (builtin_name, nios2_ftype (N2FPU_FTCODE (i)),
-			      start_code + i, BUILT_IN_MD, NULL, NULL_TREE);
-      nios2_register_builtin_fndecl (start_code + i, fndecl);
-    }
-}
-
-/* Helper function for expanding FPU builtins.  */
-static rtx
-nios2_expand_fpu_builtin (tree exp, unsigned int code, rtx target)
-{
-  struct expand_operand ops[MAX_RECOG_OPERANDS];
-  enum insn_code icode = N2FPU_ICODE (code);
-  int nargs, argno, opno = 0;
-  int num_operands = N2FPU (code).num_operands;
-  machine_mode dst_mode = TYPE_MODE (TREE_TYPE (exp));
-  bool has_target_p = (dst_mode != VOIDmode);
-
-  if (N2FPU_N (code) < 0)
-    fatal_error (input_location,
-		 "cannot call %<__builtin_custom_%s%> without specifying "
-		 "switch %<-mcustom-%s%>",
-		 N2FPU_NAME (code), N2FPU_NAME (code));
-  if (has_target_p)
-    create_output_operand (&ops[opno++], target, dst_mode);
-  else
-    /* Subtract away the count of the VOID return, mainly for fwrx/fwry.   */
-    num_operands -= 1;
-  nargs = call_expr_nargs (exp);
-  for (argno = 0; argno < nargs; argno++)
-    {
-      tree arg = CALL_EXPR_ARG (exp, argno);
-      create_input_operand (&ops[opno++], expand_normal (arg),
-			    TYPE_MODE (TREE_TYPE (arg)));
-    }
-  if (!maybe_expand_insn (icode, num_operands, ops))
-    {
-      error ("invalid argument to built-in function");
-      return has_target_p ? gen_reg_rtx (ops[0].mode) : const0_rtx;
-    }
-  return has_target_p ? ops[0].value : const0_rtx;
-}
-
-/* Nios II has custom instruction built-in functions of the forms:
-   __builtin_custom_n
-   __builtin_custom_nX
-   __builtin_custom_nXX
-   __builtin_custom_Xn
-   __builtin_custom_XnX
-   __builtin_custom_XnXX
-
-   where each X could be either 'i' (int), 'f' (float), or 'p' (void*).
-   Therefore with 0-1 return values, and 0-2 arguments, we have a
-   total of (3 + 1) * (1 + 3 + 9) == 52 custom builtin functions.
-*/
-#define NUM_CUSTOM_BUILTINS ((3 + 1) * (1 + 3 + 9))
-static char custom_builtin_name[NUM_CUSTOM_BUILTINS][5];
-
-static void
-nios2_init_custom_builtins (int start_code)
-{
-  tree builtin_ftype, ret_type, fndecl;
-  char builtin_name[32] = "__builtin_custom_";
-  int n = strlen ("__builtin_custom_");
-  int builtin_code = 0;
-  int lhs, rhs1, rhs2;
-
-  struct { tree type; const char *c; } op[4];
-  /* z */ op[0].c = "";  op[0].type = NULL_TREE;
-  /* f */ op[1].c = "f"; op[1].type = float_type_node;
-  /* i */ op[2].c = "i"; op[2].type = integer_type_node;
-  /* p */ op[3].c = "p"; op[3].type = ptr_type_node;
-
-  /* We enumerate through the possible operand types to create all the
-     __builtin_custom_XnXX function tree types.  Note that these may slightly
-     overlap with the function types created for other fixed builtins.  */
-
-  for (lhs = 0; lhs < 4; lhs++)
-    for (rhs1 = 0; rhs1 < 4; rhs1++)
-      for (rhs2 = 0; rhs2 < 4; rhs2++)
-	{
-	  if (rhs1 == 0 && rhs2 != 0)
-	    continue;
-	  ret_type = (op[lhs].type ? op[lhs].type : void_type_node);
-	  builtin_ftype
-	    = build_function_type_list (ret_type, integer_type_node,
-					op[rhs1].type, op[rhs2].type,
-					NULL_TREE);
-	  /* Save copy of parameter string into custom_builtin_name[].  */
-	  snprintf (custom_builtin_name[builtin_code], 5, "%sn%s%s",
-		    op[lhs].c, op[rhs1].c, op[rhs2].c);
-	  strncpy (builtin_name + n, custom_builtin_name[builtin_code], 5);
-	  fndecl =
-	    add_builtin_function (builtin_name, builtin_ftype,
-				  start_code + builtin_code,
-				  BUILT_IN_MD, NULL, NULL_TREE);
-	  nios2_register_builtin_fndecl (start_code + builtin_code, fndecl);
-	  builtin_code += 1;
-	}
-}
-
-/* Helper function for expanding custom builtins.  */
-static rtx
-nios2_expand_custom_builtin (tree exp, unsigned int index, rtx target)
-{
-  bool has_target_p = (TREE_TYPE (exp) != void_type_node);
-  machine_mode tmode = VOIDmode;
-  int nargs, argno;
-  rtx value, insn, unspec_args[3];
-  tree arg;
-
-  /* XnXX form.  */
-  if (has_target_p)
-    {
-      tmode = TYPE_MODE (TREE_TYPE (exp));
-      if (!target || GET_MODE (target) != tmode
-	  || !REG_P (target))
-	target = gen_reg_rtx (tmode);
-    }
-
-  nargs = call_expr_nargs (exp);
-  for (argno = 0; argno < nargs; argno++)
-    {
-      arg = CALL_EXPR_ARG (exp, argno);
-      value = expand_normal (arg);
-      unspec_args[argno] = value;
-      if (argno == 0)
-	{
-	  if (!custom_insn_opcode (value, VOIDmode))
-	    error ("custom instruction opcode must be a compile-time "
-		   "constant in the range 0-255 for %<__builtin_custom_%s%>",
-		   custom_builtin_name[index]);
-	}
-      else
-	/* For other arguments, force into a register.  */
-	unspec_args[argno] = force_reg (TYPE_MODE (TREE_TYPE (arg)),
-					unspec_args[argno]);
-    }
-  /* Fill remaining unspec operands with zero.  */
-  for (; argno < 3; argno++)
-    unspec_args[argno] = const0_rtx;
-
-  insn = (has_target_p
-	  ? gen_rtx_SET (target,
-			 gen_rtx_UNSPEC_VOLATILE (tmode,
-						  gen_rtvec_v (3, unspec_args),
-						  UNSPECV_CUSTOM_XNXX))
-	  : gen_rtx_UNSPEC_VOLATILE (VOIDmode, gen_rtvec_v (3, unspec_args),
-				     UNSPECV_CUSTOM_NXX));
-  emit_insn (insn);
-  return has_target_p ? target : const0_rtx;
-}
-
-
-
-
-/* Main definition of built-in functions.  Nios II has a small number of fixed
-   builtins, plus a large number of FPU insn builtins, and builtins for
-   generating custom instructions.  */
-
-struct nios2_builtin_desc
-{
-  enum insn_code icode;
-  enum nios2_arch_type arch;
-  enum nios2_ftcode ftype;
-  const char *name;
-};
-
-#define N2_BUILTINS					\
-  N2_BUILTIN_DEF (sync,    R1, N2_FTYPE_VOID_VOID)	\
-  N2_BUILTIN_DEF (ldbio,   R1, N2_FTYPE_SI_CVPTR)	\
-  N2_BUILTIN_DEF (ldbuio,  R1, N2_FTYPE_UI_CVPTR)	\
-  N2_BUILTIN_DEF (ldhio,   R1, N2_FTYPE_SI_CVPTR)	\
-  N2_BUILTIN_DEF (ldhuio,  R1, N2_FTYPE_UI_CVPTR)	\
-  N2_BUILTIN_DEF (ldwio,   R1, N2_FTYPE_SI_CVPTR)	\
-  N2_BUILTIN_DEF (stbio,   R1, N2_FTYPE_VOID_VPTR_SI)	\
-  N2_BUILTIN_DEF (sthio,   R1, N2_FTYPE_VOID_VPTR_SI)	\
-  N2_BUILTIN_DEF (stwio,   R1, N2_FTYPE_VOID_VPTR_SI)	\
-  N2_BUILTIN_DEF (rdctl,   R1, N2_FTYPE_SI_SI)		\
-  N2_BUILTIN_DEF (wrctl,   R1, N2_FTYPE_VOID_SI_SI)	\
-  N2_BUILTIN_DEF (rdprs,   R1, N2_FTYPE_SI_SI_SI)	\
-  N2_BUILTIN_DEF (flushd,  R1, N2_FTYPE_VOID_VPTR)	\
-  N2_BUILTIN_DEF (flushda, R1, N2_FTYPE_VOID_VPTR)	\
-  N2_BUILTIN_DEF (wrpie,   R2, N2_FTYPE_SI_SI)		\
-  N2_BUILTIN_DEF (eni,     R2, N2_FTYPE_VOID_SI)	\
-  N2_BUILTIN_DEF (ldex,    R2, N2_FTYPE_SI_CVPTR)	\
-  N2_BUILTIN_DEF (ldsex,   R2, N2_FTYPE_SI_CVPTR)	\
-  N2_BUILTIN_DEF (stex,    R2, N2_FTYPE_SI_VPTR_SI)	\
-  N2_BUILTIN_DEF (stsex,   R2, N2_FTYPE_SI_VPTR_SI)
-
-enum nios2_builtin_code {
-#define N2_BUILTIN_DEF(name, arch, ftype) NIOS2_BUILTIN_ ## name,
-  N2_BUILTINS
-#undef N2_BUILTIN_DEF
-  NUM_FIXED_NIOS2_BUILTINS
-};
-
-static const struct nios2_builtin_desc nios2_builtins[] = {
-#define N2_BUILTIN_DEF(name, arch, ftype)		\
-  { CODE_FOR_ ## name, ARCH_ ## arch, ftype, "__builtin_" #name },
-  N2_BUILTINS
-#undef N2_BUILTIN_DEF
-};
-
-/* Start/ends of FPU/custom insn builtin index ranges.  */
-static unsigned int nios2_fpu_builtin_base;
-static unsigned int nios2_custom_builtin_base;
-static unsigned int nios2_custom_builtin_end;
-
-/* Implement TARGET_INIT_BUILTINS.  */
-static void
-nios2_init_builtins (void)
-{
-  unsigned int i;
-
-  /* Initialize fixed builtins.  */
-  for (i = 0; i < ARRAY_SIZE (nios2_builtins); i++)
-    {
-      const struct nios2_builtin_desc *d = &nios2_builtins[i];
-      tree fndecl =
-	add_builtin_function (d->name, nios2_ftype (d->ftype), i,
-			      BUILT_IN_MD, NULL, NULL);
-      nios2_register_builtin_fndecl (i, fndecl);
-    }
-
-  /* Initialize FPU builtins.  */
-  nios2_fpu_builtin_base = ARRAY_SIZE (nios2_builtins);
-  nios2_init_fpu_builtins (nios2_fpu_builtin_base);
-
-  /* Initialize custom insn builtins.  */
-  nios2_custom_builtin_base
-    = nios2_fpu_builtin_base + ARRAY_SIZE (nios2_fpu_insn);
-  nios2_custom_builtin_end
-    = nios2_custom_builtin_base + NUM_CUSTOM_BUILTINS;
-  nios2_init_custom_builtins (nios2_custom_builtin_base);
-}
-
-/* Array of fndecls for TARGET_BUILTIN_DECL.  */
-#define NIOS2_NUM_BUILTINS \
-  (ARRAY_SIZE (nios2_builtins) + ARRAY_SIZE (nios2_fpu_insn) + NUM_CUSTOM_BUILTINS)
-static GTY(()) tree nios2_builtin_decls[NIOS2_NUM_BUILTINS];
-
-static void
-nios2_register_builtin_fndecl (unsigned code, tree fndecl)
-{
-  nios2_builtin_decls[code] = fndecl;
-}
-
-/* Implement TARGET_BUILTIN_DECL.  */
-static tree
-nios2_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
-{
-  gcc_assert (nios2_custom_builtin_end == ARRAY_SIZE (nios2_builtin_decls));
-
-  if (code >= nios2_custom_builtin_end)
-    return error_mark_node;
-
-  if (code >= nios2_fpu_builtin_base
-      && code < nios2_custom_builtin_base
-      && ! N2FPU_ENABLED_P (code - nios2_fpu_builtin_base))
-    return error_mark_node;
-
-  return nios2_builtin_decls[code];
-}
-
-
-/* Low-level built-in expand routine.  */
-static rtx
-nios2_expand_builtin_insn (const struct nios2_builtin_desc *d, int n,
-			   struct expand_operand *ops, bool has_target_p)
-{
-  if (maybe_expand_insn (d->icode, n, ops))
-    return has_target_p ? ops[0].value : const0_rtx;
-  else
-    {
-      error ("invalid argument to built-in function %s", d->name);
-      return has_target_p ? gen_reg_rtx (ops[0].mode) : const0_rtx;	  
-    } 
-}
-
-/* Expand ldio/stio and ldex/ldsex/stex/stsex form load-store
-   instruction builtins.  */
-static rtx
-nios2_expand_ldst_builtin (tree exp, rtx target,
-			   const struct nios2_builtin_desc *d)
-{
-  bool has_target_p;
-  rtx addr, mem, val;
-  struct expand_operand ops[MAX_RECOG_OPERANDS];
-  machine_mode mode = insn_data[d->icode].operand[0].mode;
-
-  addr = expand_normal (CALL_EXPR_ARG (exp, 0));
-  mem = gen_rtx_MEM (mode, addr);
-
-  if (insn_data[d->icode].operand[0].allows_mem)
-    {
-      /* stxio/stex/stsex.  */
-      val = expand_normal (CALL_EXPR_ARG (exp, 1));
-      if (CONST_INT_P (val))
-	val = force_reg (mode, gen_int_mode (INTVAL (val), mode));
-      val = simplify_gen_subreg (mode, val, GET_MODE (val), 0);
-      create_output_operand (&ops[0], mem, mode);
-      create_input_operand (&ops[1], val, mode);
-      if (insn_data[d->icode].n_operands == 3)
-	{
-	  /* stex/stsex status value, returned as result of function.  */
-	  create_output_operand (&ops[2], target, mode);
-	  has_target_p = true;
-	}
-      else
-	has_target_p = false;
-    }
-  else
-    {
-      /* ldxio.  */
-      create_output_operand (&ops[0], target, mode);
-      create_input_operand (&ops[1], mem, mode);
-      has_target_p = true;
-    }
-  return nios2_expand_builtin_insn (d, insn_data[d->icode].n_operands, ops,
-				    has_target_p);
-}
-
-/* Expand rdctl/wrctl builtins.  */
-static rtx
-nios2_expand_rdwrctl_builtin (tree exp, rtx target,
-			     const struct nios2_builtin_desc *d)
-{
-  bool has_target_p = (insn_data[d->icode].operand[0].predicate
-		       == register_operand);
-  rtx ctlcode = expand_normal (CALL_EXPR_ARG (exp, 0));
-  struct expand_operand ops[MAX_RECOG_OPERANDS];
-  if (!rdwrctl_operand (ctlcode, VOIDmode))
-    {
-      error ("control register number must be in range 0-31 for %s",
-	     d->name);
-      return has_target_p ? gen_reg_rtx (SImode) : const0_rtx;
-    }
-  if (has_target_p)
-    {
-      create_output_operand (&ops[0], target, SImode);
-      create_integer_operand (&ops[1], INTVAL (ctlcode));
-    }
-  else
-    {
-      rtx val = expand_normal (CALL_EXPR_ARG (exp, 1));
-      create_integer_operand (&ops[0], INTVAL (ctlcode));
-      create_input_operand (&ops[1], val, SImode);
-    }
-  return nios2_expand_builtin_insn (d, 2, ops, has_target_p);
-}
-
-static rtx
-nios2_expand_rdprs_builtin (tree exp, rtx target,
-			    const struct nios2_builtin_desc *d)
-{
-  rtx reg = expand_normal (CALL_EXPR_ARG (exp, 0));
-  rtx imm = expand_normal (CALL_EXPR_ARG (exp, 1));
-  struct expand_operand ops[MAX_RECOG_OPERANDS];
-
-  if (!rdwrctl_operand (reg, VOIDmode))
-    {
-      error ("register number must be in range 0-31 for %s",
-	     d->name);
-      return gen_reg_rtx (SImode);
-    }
-
-  if (!rdprs_dcache_operand (imm, VOIDmode))
-    {
-      error ("immediate value must fit into a %d-bit integer for %s",
-	     (TARGET_ARCH_R2) ? 12 : 16, d->name);
-      return gen_reg_rtx (SImode);
-    }
-
-  create_output_operand (&ops[0], target, SImode);
-  create_input_operand (&ops[1], reg, SImode);
-  create_integer_operand (&ops[2], INTVAL (imm));
-
-  return nios2_expand_builtin_insn (d, 3, ops, true);
-}
-
-static rtx
-nios2_expand_cache_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
-			    const struct nios2_builtin_desc *d)
-{
-  rtx mem, addr;
-  struct expand_operand ops[MAX_RECOG_OPERANDS];
-
-  addr = expand_normal (CALL_EXPR_ARG (exp, 0));
-  mem = gen_rtx_MEM (SImode, addr);
-
-  create_input_operand (&ops[0], mem, SImode);
- 
-  return nios2_expand_builtin_insn (d, 1, ops, false);
-}
-
-static rtx
-nios2_expand_wrpie_builtin (tree exp, rtx target,
-			    const struct nios2_builtin_desc *d)
-{
-  rtx val;
-  struct expand_operand ops[MAX_RECOG_OPERANDS];
-
-  val = expand_normal (CALL_EXPR_ARG (exp, 0));
-  create_input_operand (&ops[1], val, SImode);
-  create_output_operand (&ops[0], target, SImode);
- 
-  return nios2_expand_builtin_insn (d, 2, ops, true);
-}
-
-static rtx
-nios2_expand_eni_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
-			    const struct nios2_builtin_desc *d)
-{
-  rtx imm = expand_normal (CALL_EXPR_ARG (exp, 0));
-  struct expand_operand ops[MAX_RECOG_OPERANDS];
-
-  if (INTVAL (imm) != 0 && INTVAL (imm) != 1)
-    {
-      error ("the ENI instruction operand must be either 0 or 1");
-      return const0_rtx;      
-    }
-  create_integer_operand (&ops[0], INTVAL (imm));
- 
-  return nios2_expand_builtin_insn (d, 1, ops, false);
-}
-
-/* Implement TARGET_EXPAND_BUILTIN.  Expand an expression EXP that calls
-   a built-in function, with result going to TARGET if that's convenient
-   (and in mode MODE if that's convenient).
-   SUBTARGET may be used as the target for computing one of EXP's operands.
-   IGNORE is nonzero if the value is to be ignored.  */
-
-static rtx
-nios2_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
-                      machine_mode mode ATTRIBUTE_UNUSED,
-		      int ignore ATTRIBUTE_UNUSED)
-{
-  tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
-  unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
-
-  if (fcode < nios2_fpu_builtin_base)
-    {
-      const struct nios2_builtin_desc *d = &nios2_builtins[fcode];
-
-      if (d->arch > nios2_arch_option)
-	{
-	  error ("built-in function %s requires Nios II R%d",
-		 d->name, (int) d->arch);
-	  /* Given it is invalid, just generate a normal call.  */
-	  return expand_call (exp, target, ignore);
-	}
-
-      switch (fcode)
-	{
-	case NIOS2_BUILTIN_sync:
-	  emit_insn (gen_sync ());
-	  return const0_rtx;
-
-	case NIOS2_BUILTIN_ldbio:
-	case NIOS2_BUILTIN_ldbuio:
-	case NIOS2_BUILTIN_ldhio:
-	case NIOS2_BUILTIN_ldhuio:
-	case NIOS2_BUILTIN_ldwio:
-	case NIOS2_BUILTIN_stbio:
-	case NIOS2_BUILTIN_sthio:
-	case NIOS2_BUILTIN_stwio:
-	case NIOS2_BUILTIN_ldex:
-	case NIOS2_BUILTIN_ldsex:
-	case NIOS2_BUILTIN_stex:
-	case NIOS2_BUILTIN_stsex:
-	  return nios2_expand_ldst_builtin (exp, target, d);
-
-	case NIOS2_BUILTIN_rdctl:
-	case NIOS2_BUILTIN_wrctl:
-	  return nios2_expand_rdwrctl_builtin (exp, target, d);
-
-	case NIOS2_BUILTIN_rdprs:
-	  return nios2_expand_rdprs_builtin (exp, target, d);
-
-	case NIOS2_BUILTIN_flushd:
-	case NIOS2_BUILTIN_flushda:
-	  return nios2_expand_cache_builtin (exp, target, d);
-
-	case NIOS2_BUILTIN_wrpie:
-	  return nios2_expand_wrpie_builtin (exp, target, d);
-
-	case NIOS2_BUILTIN_eni:
-	  return nios2_expand_eni_builtin (exp, target, d);
-
-	default:
-	  gcc_unreachable ();
-	}
-    }
-  else if (fcode < nios2_custom_builtin_base)
-    /* FPU builtin range.  */
-    return nios2_expand_fpu_builtin (exp, fcode - nios2_fpu_builtin_base,
-				     target);
-  else if (fcode < nios2_custom_builtin_end)
-    /* Custom insn builtin range.  */
-    return nios2_expand_custom_builtin (exp, fcode - nios2_custom_builtin_base,
-					target);
-  else
-    gcc_unreachable ();
-}
-
-/* Implement TARGET_INIT_LIBFUNCS.  */
-static void ATTRIBUTE_UNUSED
-nios2_init_libfuncs (void)
-{
-  init_sync_libfuncs (UNITS_PER_WORD);
-}
-
-
-
-/* Register a custom code use, and signal error if a conflict was found.  */
-static void
-nios2_register_custom_code (unsigned int N, enum nios2_ccs_code status,
-			    int index)
-{
-  gcc_assert (N <= 255);
-
-  if (status == CCS_FPU)
-    {
-      if (custom_code_status[N] == CCS_FPU && index != custom_code_index[N])
-	{
-	  custom_code_conflict = true;
-	  error ("switch %<-mcustom-%s%> conflicts with "
-		 "switch %<-mcustom-%s%>",
-		 N2FPU_NAME (custom_code_index[N]), N2FPU_NAME (index));
-	}
-      else if (custom_code_status[N] == CCS_BUILTIN_CALL)
-	{
-	  custom_code_conflict = true;
-	  error ("call to %<__builtin_custom_%s%> conflicts with "
-		 "switch %<-mcustom-%s%>",
-		 custom_builtin_name[custom_code_index[N]],
-		 N2FPU_NAME (index));
-	}
-    }
-  else if (status == CCS_BUILTIN_CALL)
-    {
-      if (custom_code_status[N] == CCS_FPU)
-	{
-	  custom_code_conflict = true;
-	  error ("call to %<__builtin_custom_%s%> conflicts with "
-		 "switch %<-mcustom-%s%>",
-		 custom_builtin_name[index],
-		 N2FPU_NAME (custom_code_index[N]));
-	}
-      else
-	{
-	  /* Note that code conflicts between different __builtin_custom_xnxx
-	     calls are not checked.  */
-	}
-    }
-  else
-    gcc_unreachable ();
-
-  custom_code_status[N] = status;
-  custom_code_index[N] = index;
-}
-
-/* Mark a custom code as not in use.  */
-static void
-nios2_deregister_custom_code (unsigned int N)
-{
-  if (N <= 255)
-    {
-      custom_code_status[N] = CCS_UNUSED;
-      custom_code_index[N] = 0;
-    }
-}
-
-/* Target attributes can affect per-function option state, so we need to
-   save/restore the custom code tracking info using the
-   TARGET_OPTION_SAVE/TARGET_OPTION_RESTORE hooks.  */
-
-static void
-nios2_option_save (struct cl_target_option *ptr,
-		   struct gcc_options *opts ATTRIBUTE_UNUSED,
-		   struct gcc_options *opts_set ATTRIBUTE_UNUSED)
-{
-  unsigned int i;
-  for (i = 0; i < ARRAY_SIZE (nios2_fpu_insn); i++)
-    ptr->saved_fpu_custom_code[i] = N2FPU_N (i);
-  memcpy (ptr->saved_custom_code_status, custom_code_status,
-	  sizeof (custom_code_status));
-  memcpy (ptr->saved_custom_code_index, custom_code_index,
-	  sizeof (custom_code_index));
-}
-
-static void
-nios2_option_restore (struct gcc_options *opts ATTRIBUTE_UNUSED,
-		      struct gcc_options *opts_set ATTRIBUTE_UNUSED,
-		      struct cl_target_option *ptr)
-{
-  unsigned int i;
-  for (i = 0; i < ARRAY_SIZE (nios2_fpu_insn); i++)
-    N2FPU_N (i) = ptr->saved_fpu_custom_code[i];
-  memcpy (custom_code_status, ptr->saved_custom_code_status,
-	  sizeof (custom_code_status));
-  memcpy (custom_code_index, ptr->saved_custom_code_index,
-	  sizeof (custom_code_index));
-}
-
-static bool
-nios2_can_inline_p (tree caller, tree callee)
-{
-  tree callee_opts = DECL_FUNCTION_SPECIFIC_TARGET (callee);
-  tree caller_opts = DECL_FUNCTION_SPECIFIC_TARGET (caller);
-  struct cl_target_option *callee_ptr, *caller_ptr;
-  unsigned int i;
-
-  if (! callee_opts)
-    callee_opts = target_option_default_node;
-  if (! caller_opts)
-    caller_opts = target_option_default_node;
-
-  /* If both caller and callee have attributes, assume that if the
-     pointer is different, the two functions have different target
-     options since build_target_option_node uses a hash table for the
-     options.  */
-  if (callee_opts == caller_opts)
-    return true;
-
-  /* The only target options we recognize via function attributes are
-     those related to custom instructions.  If we failed the above test,
-     check that any custom instructions enabled in the callee are also
-     enabled with the same value in the caller.  */
-  callee_ptr = TREE_TARGET_OPTION (callee_opts);
-  caller_ptr = TREE_TARGET_OPTION (caller_opts);
-  for (i = 0; i < ARRAY_SIZE (nios2_fpu_insn); i++)
-    if (callee_ptr->saved_fpu_custom_code[i] != -1
-	&& (callee_ptr->saved_fpu_custom_code[i]
-	    != caller_ptr->saved_fpu_custom_code[i]))
-      return false;
-  return true;
-}
-
-/* Inner function to process the attribute((target(...))), take an argument and
-   set the current options from the argument.  If we have a list, recursively
-   go over the list.  */
-
-static bool
-nios2_valid_target_attribute_rec (tree args)
-{
-  if (TREE_CODE (args) == TREE_LIST)
-    {
-      bool ret = true;
-      for (; args; args = TREE_CHAIN (args))
-	if (TREE_VALUE (args)
-	    && !nios2_valid_target_attribute_rec (TREE_VALUE (args)))
-	  ret = false;
-      return ret;
-    }
-  else if (TREE_CODE (args) == STRING_CST)
-    {
-      char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
-      while (argstr && *argstr != '\0')
-	{
-	  bool no_opt = false, end_p = false;
-	  char *eq = NULL, *p;
-	  while (ISSPACE (*argstr))
-	    argstr++;
-	  p = argstr;
-	  while (*p != '\0' && *p != ',')
-	    {
-	      if (!eq && *p == '=')
-		eq = p;
-	      ++p;
-	    }
-	  if (*p == '\0')
-	    end_p = true;
-	  else
-	    *p = '\0';
-	  if (eq) *eq = '\0';
-
-	  if (startswith (argstr, "no-"))
-	    {
-	      no_opt = true;
-	      argstr += 3;
-	    }
-	  if (startswith (argstr, "custom-fpu-cfg"))
-	    {
-	      char *end_eq = p;
-	      if (no_opt)
-		{
-		  error ("%<custom-fpu-cfg%> option does not support %<no-%>");
-		  return false;
-		}
-	      if (!eq)
-		{
-		  error ("%<custom-fpu-cfg%> option requires configuration "
-			 "argument");
-		  return false;
-		}
-	      /* Increment and skip whitespace.  */
-	      while (ISSPACE (*(++eq))) ;
-	      /* Decrement and skip to before any trailing whitespace.  */
-	      while (ISSPACE (*(--end_eq))) ;
-
-	      nios2_handle_custom_fpu_cfg (eq, end_eq + 1, true);
-	    }
-	  else if (startswith (argstr, "custom-"))
-	    {
-	      int code = -1;
-	      unsigned int i;
-	      for (i = 0; i < ARRAY_SIZE (nios2_fpu_insn); i++)
-		if (startswith (argstr + 7, N2FPU_NAME (i)))
-		  {
-		    /* Found insn.  */
-		    code = i;
-		    break;
-		  }
-	      if (code >= 0)
-		{
-		  if (no_opt)
-		    {
-		      if (eq)
-			{
-			  error ("%<no-custom-%s%> does not accept arguments",
-				 N2FPU_NAME (code));
-			  return false;
-			}
-		      /* Disable option by setting to -1.  */
-		      nios2_deregister_custom_code (N2FPU_N (code));
-		      N2FPU_N (code) = -1;
-		    }
-		  else
-		    {
-		      char *t;
-		      if (eq)
-			while (ISSPACE (*(++eq))) ;
-		      if (!eq || eq == p)
-			{
-			  error ("%<custom-%s=%> requires argument",
-				 N2FPU_NAME (code));
-			  return false;
-			}
-		      for (t = eq; t != p; ++t)
-			{
-			  if (ISSPACE (*t))
-			    continue;
-			  if (!ISDIGIT (*t))
-			    {			 
-			      error ("%<custom-%s=%> argument should be "
-				     "a non-negative integer", N2FPU_NAME (code));
-			      return false;
-			    }
-			}
-		      /* Set option to argument.  */
-		      N2FPU_N (code) = atoi (eq);
-		      nios2_handle_custom_fpu_insn_option (code);
-		    }
-		}
-	      else
-		{
-		  error ("%<custom-%s=%> is not recognized as FPU instruction",
-			 argstr + 7);
-		  return false;
-		}		
-	    }
-	  else
-	    {
-	      error ("invalid custom instruction option %qs", argstr);
-	      return false;
-	    }
-
-	  if (end_p)
-	    break;
-	  else
-	    argstr = p + 1;
-	}
-      return true;
-    }
-  else
-    gcc_unreachable ();
-}
-
-/* Return a TARGET_OPTION_NODE tree of the target options listed or NULL.  */
-
-static tree
-nios2_valid_target_attribute_tree (tree args)
-{
-  if (!nios2_valid_target_attribute_rec (args))
-    return NULL_TREE;
-  nios2_custom_check_insns ();
-  return build_target_option_node (&global_options, &global_options_set);
-}
-
-/* Hook to validate attribute((target("string"))).  */
-
-static bool
-nios2_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
-				tree args, int ARG_UNUSED (flags))
-{
-  struct cl_target_option cur_target;
-  bool ret = true;
-  tree old_optimize
-    = build_optimization_node (&global_options, &global_options_set);
-  tree new_target, new_optimize;
-  tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
-
-  /* If the function changed the optimization levels as well as setting target
-     options, start with the optimizations specified.  */
-  if (func_optimize && func_optimize != old_optimize)
-    cl_optimization_restore (&global_options, &global_options_set,
-			     TREE_OPTIMIZATION (func_optimize));
-
-  /* The target attributes may also change some optimization flags, so update
-     the optimization options if necessary.  */
-  cl_target_option_save (&cur_target, &global_options, &global_options_set);
-  new_target = nios2_valid_target_attribute_tree (args);
-  new_optimize = build_optimization_node (&global_options, &global_options_set);
-
-  if (!new_target)
-    ret = false;
-
-  else if (fndecl)
-    {
-      DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
-
-      if (old_optimize != new_optimize)
-	DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
-    }
-
-  cl_target_option_restore (&global_options, &global_options_set, &cur_target);
-
-  if (old_optimize != new_optimize)
-    cl_optimization_restore (&global_options, &global_options_set,
-			     TREE_OPTIMIZATION (old_optimize));
-  return ret;
-}
-
-/* Remember the last target of nios2_set_current_function.  */
-static GTY(()) tree nios2_previous_fndecl;
-
-/* Establish appropriate back-end context for processing the function
-   FNDECL.  The argument might be NULL to indicate processing at top
-   level, outside of any function scope.  */
-static void
-nios2_set_current_function (tree fndecl)
-{
-  tree old_tree = (nios2_previous_fndecl
-		   ? DECL_FUNCTION_SPECIFIC_TARGET (nios2_previous_fndecl)
-		   : NULL_TREE);
-
-  tree new_tree = (fndecl
-		   ? DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
-		   : NULL_TREE);
-
-  if (fndecl && fndecl != nios2_previous_fndecl)
-    {
-      nios2_previous_fndecl = fndecl;
-      if (old_tree == new_tree)
-	;
-
-      else if (new_tree)
-	{
-	  cl_target_option_restore (&global_options, &global_options_set,
-				    TREE_TARGET_OPTION (new_tree));
-	  target_reinit ();
-	}
-
-      else if (old_tree)
-	{
-	  struct cl_target_option *def
-	    = TREE_TARGET_OPTION (target_option_current_node);
-
-	  cl_target_option_restore (&global_options, &global_options_set, def);
-	  target_reinit ();
-	}
-    }
-}
-
-/* Hook to validate the current #pragma GCC target and set the FPU custom
-   code option state.  If ARGS is NULL, then POP_TARGET is used to reset
-   the options.  */
-static bool
-nios2_pragma_target_parse (tree args, tree pop_target)
-{
-  tree cur_tree;
-  if (! args)
-    {
-      cur_tree = ((pop_target)
-		  ? pop_target
-		  : target_option_default_node);
-      cl_target_option_restore (&global_options, &global_options_set,
-				TREE_TARGET_OPTION (cur_tree));
-    }
-  else
-    {
-      cur_tree = nios2_valid_target_attribute_tree (args);
-      if (!cur_tree)
-	return false;
-    }
-
-  target_option_current_node = cur_tree;
-  return true;
-}
-
-/* Implement TARGET_MERGE_DECL_ATTRIBUTES.
-   We are just using this hook to add some additional error checking to
-   the default behavior.  GCC does not provide a target hook for merging
-   the target options, and only correctly handles merging empty vs non-empty
-   option data; see merge_decls() in c-decl.cc.
-   So here we require either that at least one of the decls has empty
-   target options, or that the target options/data be identical.  */
-static tree
-nios2_merge_decl_attributes (tree olddecl, tree newdecl)
-{
-  tree oldopts = lookup_attribute ("target", DECL_ATTRIBUTES (olddecl));
-  tree newopts = lookup_attribute ("target", DECL_ATTRIBUTES (newdecl));
-  if (newopts && oldopts && newopts != oldopts)
-    {
-      tree oldtree = DECL_FUNCTION_SPECIFIC_TARGET (olddecl);
-      tree newtree = DECL_FUNCTION_SPECIFIC_TARGET (newdecl);
-      if (oldtree && newtree && oldtree != newtree)
-	{
-	  struct cl_target_option *olddata = TREE_TARGET_OPTION (oldtree);
-	  struct cl_target_option *newdata = TREE_TARGET_OPTION (newtree);
-	  if (olddata != newdata
-	      && memcmp (olddata, newdata, sizeof (struct cl_target_option)))
-	    error ("%qE redeclared with conflicting %qs attributes",
-		   DECL_NAME (newdecl), "target");
-	}
-    }
-  return merge_attributes (DECL_ATTRIBUTES (olddecl),
-			   DECL_ATTRIBUTES (newdecl));
-}
-
-/* Implement TARGET_ASM_OUTPUT_MI_THUNK.  */
-static void
-nios2_asm_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
-			   HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
-			   tree function)
-{
-  const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
-  rtx this_rtx, funexp;
-  rtx_insn *insn;
-
-  /* Pretend to be a post-reload pass while generating rtl.  */
-  reload_completed = 1;
-
-  if (flag_pic)
-    nios2_load_pic_register ();
-
-  /* Mark the end of the (empty) prologue.  */
-  emit_note (NOTE_INSN_PROLOGUE_END);
-
-  /* Find the "this" pointer.  If the function returns a structure,
-     the structure return pointer is in $5.  */
-  if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
-    this_rtx = gen_rtx_REG (Pmode, FIRST_ARG_REGNO + 1);
-  else
-    this_rtx = gen_rtx_REG (Pmode, FIRST_ARG_REGNO);
-
-  /* Add DELTA to THIS_RTX.  */
-  nios2_emit_add_constant (this_rtx, delta);
-
-  /* If needed, add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX.  */
-  if (vcall_offset)
-    {
-      rtx tmp;
-
-      tmp = gen_rtx_REG (Pmode, 2);
-      emit_move_insn (tmp, gen_rtx_MEM (Pmode, this_rtx));
-      nios2_emit_add_constant (tmp, vcall_offset);
-      emit_move_insn (tmp, gen_rtx_MEM (Pmode, tmp));
-      emit_insn (gen_add2_insn (this_rtx, tmp));
-    }
-
-  /* Generate a tail call to the target function.  */
-  if (!TREE_USED (function))
-    {
-      assemble_external (function);
-      TREE_USED (function) = 1;
-    }
-  funexp = XEXP (DECL_RTL (function), 0);
-  /* Function address needs to be constructed under PIC,
-     provide r2 to use here.  */
-  nios2_adjust_call_address (&funexp, gen_rtx_REG (Pmode, 2));
-  insn = emit_call_insn (gen_sibcall_internal (funexp, const0_rtx));
-  SIBLING_CALL_P (insn) = 1;
-
-  /* Run just enough of rest_of_compilation to get the insns emitted.
-     There's not really enough bulk here to make other passes such as
-     instruction scheduling worth while.  */
-  insn = get_insns ();
-  shorten_branches (insn);
-  assemble_start_function (thunk_fndecl, fnname);
-  final_start_function (insn, file, 1);
-  final (insn, file, 1);
-  final_end_function ();
-  assemble_end_function (thunk_fndecl, fnname);
-
-  /* Stop pretending to be a post-reload pass.  */
-  reload_completed = 0;
-}
-
-
-/* Utility function to break a memory address into
-   base register + constant offset.  Return false if something
-   unexpected is seen.  */
-static bool
-split_mem_address (rtx addr, rtx *base_reg, rtx *offset)
-{
-  if (REG_P (addr))
-    {
-      *base_reg = addr;
-      *offset = const0_rtx;
-      return true;
-    }
-  else if (GET_CODE (addr) == PLUS)
-    {
-      *base_reg = XEXP (addr, 0);
-      *offset = XEXP (addr, 1);
-      return true;
-    }
-  return false;
-}
-
-/* Splits out the operands of an ALU insn, places them in *LHS, *RHS1, *RHS2.  */
-static void
-split_alu_insn (rtx_insn *insn, rtx *lhs, rtx *rhs1, rtx *rhs2)
-{
-  rtx pat = PATTERN (insn);
-  gcc_assert (GET_CODE (pat) == SET);
-  *lhs = SET_DEST (pat);
-  *rhs1 = XEXP (SET_SRC (pat), 0);
-  if (GET_RTX_CLASS (GET_CODE (SET_SRC (pat))) != RTX_UNARY)
-    *rhs2 = XEXP (SET_SRC (pat), 1);
-  return;
-}
-
-/* Returns true if OP is a REG and assigned a CDX reg.  */
-static bool
-cdxreg (rtx op)
-{
-  return REG_P (op) && (!reload_completed || CDX_REG_P (REGNO (op)));
-}
-
-/* Returns true if OP is within range of CDX addi.n immediates.  */
-static bool
-cdx_add_immed (rtx op)
-{
-  if (CONST_INT_P (op))
-    {
-      HOST_WIDE_INT ival = INTVAL (op);
-      return ival <= 128 && ival > 0 && (ival & (ival - 1)) == 0;
-    }
-  return false;
-}
-
-/* Returns true if OP is within range of CDX andi.n immediates.  */
-static bool
-cdx_and_immed (rtx op)
-{
-  if (CONST_INT_P (op))
-    {
-      HOST_WIDE_INT ival = INTVAL (op);
-      return (ival == 1 || ival == 2 || ival == 3 || ival == 4
-	      || ival == 8 || ival == 0xf || ival == 0x10
-	      || ival == 0x1f || ival == 0x20
-	      || ival == 0x3f || ival == 0x7f
-	      || ival == 0x80 || ival == 0xff || ival == 0x7ff
-	      || ival == 0xff00 || ival == 0xffff);
-    }
-  return false;
-}
-
-/* Returns true if OP is within range of CDX movi.n immediates.  */
-static bool
-cdx_mov_immed (rtx op)
-{
-  if (CONST_INT_P (op))
-    {
-      HOST_WIDE_INT ival = INTVAL (op);
-      return ((ival >= 0 && ival <= 124)
-	      || ival == 0xff || ival == -2 || ival == -1);
-    }
-  return false;
-}
-
-/* Returns true if OP is within range of CDX slli.n/srli.n immediates.  */
-static bool
-cdx_shift_immed (rtx op)
-{
-  if (CONST_INT_P (op))
-    {
-      HOST_WIDE_INT ival = INTVAL (op);
-      return (ival == 1 || ival == 2 || ival == 3 || ival == 8
-	      || ival == 12 || ival == 16 || ival == 24
-	      || ival == 31);
-    }
-  return false;
-}
-
-
-
-/* Classification of different kinds of add instructions.  */
-enum nios2_add_insn_kind {
-  nios2_add_n_kind,
-  nios2_addi_n_kind,
-  nios2_subi_n_kind,
-  nios2_spaddi_n_kind,
-  nios2_spinci_n_kind,
-  nios2_spdeci_n_kind,
-  nios2_add_kind,
-  nios2_addi_kind
-};
-
-static const char *nios2_add_insn_names[] = {
-  "add.n", "addi.n", "subi.n", "spaddi.n",  "spinci.n", "spdeci.n",
-  "add", "addi" };
-static bool nios2_add_insn_narrow[] = {
-  true, true, true, true, true, true,
-  false, false};
-
-/* Function to classify kinds of add instruction patterns.  */
-static enum nios2_add_insn_kind 
-nios2_add_insn_classify (rtx_insn *insn ATTRIBUTE_UNUSED,
-			 rtx lhs, rtx rhs1, rtx rhs2)
-{
-  if (TARGET_HAS_CDX)
-    {
-      if (cdxreg (lhs) && cdxreg (rhs1))
-	{
-	  if (cdxreg (rhs2))
-	    return nios2_add_n_kind;
-	  if (CONST_INT_P (rhs2))
-	    {
-	      HOST_WIDE_INT ival = INTVAL (rhs2);
-	      if (ival > 0 && cdx_add_immed (rhs2))
-		return nios2_addi_n_kind;
-	      if (ival < 0 && cdx_add_immed (GEN_INT (-ival)))
-		return nios2_subi_n_kind;
-	    }
-	}
-      else if (rhs1 == stack_pointer_rtx
-	       && CONST_INT_P (rhs2))
-	{
-	  HOST_WIDE_INT imm7 = INTVAL (rhs2) >> 2;
-	  HOST_WIDE_INT rem = INTVAL (rhs2) & 3;
-	  if (rem == 0 && (imm7 & ~0x7f) == 0)
-	    {
-	      if (cdxreg (lhs))
-		return nios2_spaddi_n_kind;
-	      if (lhs == stack_pointer_rtx)
-		return nios2_spinci_n_kind;
-	    }
-	  imm7 = -INTVAL(rhs2) >> 2;
-	  rem = -INTVAL (rhs2) & 3;
-	  if (lhs == stack_pointer_rtx
-	      && rem == 0 && (imm7 & ~0x7f) == 0)
-	    return nios2_spdeci_n_kind;
-	}
-    }
-  return ((REG_P (rhs2) || rhs2 == const0_rtx)
-	  ? nios2_add_kind : nios2_addi_kind);
-}
-
-/* Emit assembly language for the different kinds of add instructions.  */
-const char*
-nios2_add_insn_asm (rtx_insn *insn, rtx *operands)
-{
-  static char buf[256];
-  int ln = 256;
-  enum nios2_add_insn_kind kind
-    = nios2_add_insn_classify (insn, operands[0], operands[1], operands[2]);
-  if (kind == nios2_subi_n_kind)
-    snprintf (buf, ln, "subi.n\t%%0, %%1, %d", (int) -INTVAL (operands[2]));
-  else if (kind == nios2_spaddi_n_kind)
-    snprintf (buf, ln, "spaddi.n\t%%0, %%2");
-  else if (kind == nios2_spinci_n_kind)
-    snprintf (buf, ln, "spinci.n\t%%2");
-  else if (kind == nios2_spdeci_n_kind)
-    snprintf (buf, ln, "spdeci.n\t%d", (int) -INTVAL (operands[2]));
-  else
-    snprintf (buf, ln, "%s\t%%0, %%1, %%z2", nios2_add_insn_names[(int)kind]);
-  return buf;
-}
-
-/* This routine, which the default "length" attribute computation is
-   based on, encapsulates information about all the cases where CDX
-   provides a narrow 2-byte instruction form.  */
-bool
-nios2_cdx_narrow_form_p (rtx_insn *insn)
-{
-  rtx pat, lhs, rhs1 = NULL_RTX, rhs2 = NULL_RTX;
-  enum attr_type type;
-  if (!TARGET_HAS_CDX)
-    return false;
-  type = get_attr_type (insn);
-  pat = PATTERN (insn);
-  gcc_assert (reload_completed);
-  switch (type)
-    {
-    case TYPE_CONTROL:
-      if (GET_CODE (pat) == SIMPLE_RETURN)
-	return true;
-      if (GET_CODE (pat) == PARALLEL)
-	pat = XVECEXP (pat, 0, 0);
-      if (GET_CODE (pat) == SET)
-	pat = SET_SRC (pat);
-      if (GET_CODE (pat) == IF_THEN_ELSE)
-	{
-	  /* Conditional branch patterns; for these we
-	     only check the comparison to find beqz.n/bnez.n cases.
-	     For the 'nios2_cbranch' pattern, we cannot also check
-	     the branch range here. That will be done at the md
-	     pattern "length" attribute computation.  */
-	  rtx cmp = XEXP (pat, 0);
-	  return ((GET_CODE (cmp) == EQ || GET_CODE (cmp) == NE)
-		  && cdxreg (XEXP (cmp, 0))
-		  && XEXP (cmp, 1) == const0_rtx);
-	}
-      if (GET_CODE (pat) == TRAP_IF)
-	/* trap.n is always usable.  */
-	return true;
-      if (GET_CODE (pat) == CALL)
-	pat = XEXP (XEXP (pat, 0), 0);
-      if (REG_P (pat))
-	/* Control instructions taking a register operand are indirect
-	   jumps and calls.  The CDX instructions have a 5-bit register
-	   field so any reg is valid.  */
-	return true;
-      else
-	{
-	  gcc_assert (!insn_variable_length_p (insn));
-	  return false;
-	}
-    case TYPE_ADD:
-      {
-	enum nios2_add_insn_kind kind;
-	split_alu_insn (insn, &lhs, &rhs1, &rhs2);
-	kind = nios2_add_insn_classify (insn, lhs, rhs1, rhs2);
-	return nios2_add_insn_narrow[(int)kind];
-      }
-    case TYPE_LD:
-      {
-	bool ret;
-	HOST_WIDE_INT offset, rem = 0;
-	rtx addr, reg = SET_DEST (pat), mem = SET_SRC (pat);
-	if (GET_CODE (mem) == SIGN_EXTEND)
-	  /* No CDX form for sign-extended load.  */
-	  return false;
-	if (GET_CODE (mem) == ZERO_EXTEND)
-	  /* The load alternatives in the zero_extend* patterns.  */
-	  mem = XEXP (mem, 0);
-	if (MEM_P (mem))
-	  {
-	    /* ldxio.  */
-	    if ((MEM_VOLATILE_P (mem) && TARGET_BYPASS_CACHE_VOLATILE)
-		|| TARGET_BYPASS_CACHE)
-	      return false;
-	    addr = XEXP (mem, 0);
-	    /* GP-based and R0-based references are never narrow.  */
-	    if (gprel_constant_p (addr) || r0rel_constant_p (addr))
-		return false;
-	    /* %lo requires a 16-bit relocation and is never narrow.  */
-	    if (GET_CODE (addr) == LO_SUM)
-	      return false;
-	    ret = split_mem_address (addr, &rhs1, &rhs2);
-	    gcc_assert (ret);
-	  }
-	else
-	  return false;
-
-	offset = INTVAL (rhs2);
-	if (GET_MODE (mem) == SImode)
-	  {
-	    rem = offset & 3;
-	    offset >>= 2;
-	    /* ldwsp.n case.  */
-	    if (rtx_equal_p (rhs1, stack_pointer_rtx)
-		&& rem == 0 && (offset & ~0x1f) == 0)
-	      return true;
-	  }
-	else if (GET_MODE (mem) == HImode)
-	  {
-	    rem = offset & 1;
-	    offset >>= 1;
-	  }
-	/* ldbu.n, ldhu.n, ldw.n cases.  */
-	return (cdxreg (reg) && cdxreg (rhs1)
-		&& rem == 0 && (offset & ~0xf) == 0);
-      }
-    case TYPE_ST:
-      if (GET_CODE (pat) == PARALLEL)
-	/* stex, stsex.  */
-	return false;
-      else
-	{
-	  bool ret;
-	  HOST_WIDE_INT offset, rem = 0;
-	  rtx addr, reg = SET_SRC (pat), mem = SET_DEST (pat);
-	  if (!MEM_P (mem))
-	    return false;
-	  /* stxio.  */
-	  if ((MEM_VOLATILE_P (mem) && TARGET_BYPASS_CACHE_VOLATILE)
-	      || TARGET_BYPASS_CACHE)
-	    return false;
-	  addr = XEXP (mem, 0);
-	  /* GP-based and r0-based references are never narrow.  */
-	  if (gprel_constant_p (addr) || r0rel_constant_p (addr))
-	    return false;
-	  /* %lo requires a 16-bit relocation and is never narrow.  */
-	  if (GET_CODE (addr) == LO_SUM)
-	    return false;
-	  ret = split_mem_address (addr, &rhs1, &rhs2);
-	  gcc_assert (ret);
-	  offset = INTVAL (rhs2);
-	  if (GET_MODE (mem) == SImode)
-	    {
-	      rem = offset & 3;
-	      offset >>= 2;
-	      /* stwsp.n case.  */
-	      if (rtx_equal_p (rhs1, stack_pointer_rtx)
-		  && rem == 0 && (offset & ~0x1f) == 0)
-		return true;
-	      /* stwz.n case.  */
-	      else if (reg == const0_rtx && cdxreg (rhs1)
-		       && rem == 0 && (offset & ~0x3f) == 0)
-		return true;
-	    }
-	  else if (GET_MODE (mem) == HImode)
-	    {
-	      rem = offset & 1;
-	      offset >>= 1;
-	    }
-	  else
-	    {
-	      gcc_assert (GET_MODE (mem) == QImode);
-	      /* stbz.n case.  */
-	      if (reg == const0_rtx && cdxreg (rhs1)
-		  && (offset & ~0x3f) == 0)
-		return true;
-	    }
-
-	  /* stbu.n, sthu.n, stw.n cases.  */
-	  return (cdxreg (reg) && cdxreg (rhs1)
-		  && rem == 0 && (offset & ~0xf) == 0);
-	}
-    case TYPE_MOV:
-      lhs = SET_DEST (pat);
-      rhs1 = SET_SRC (pat);
-      if (CONST_INT_P (rhs1))
-	return (cdxreg (lhs) && cdx_mov_immed (rhs1));
-      gcc_assert (REG_P (lhs) && REG_P (rhs1));
-      return true;
-
-    case TYPE_AND:
-      /* Some zero_extend* alternatives are and insns.  */
-      if (GET_CODE (SET_SRC (pat)) == ZERO_EXTEND)
-	return (cdxreg (SET_DEST (pat))
-		&& cdxreg (XEXP (SET_SRC (pat), 0)));
-      split_alu_insn (insn, &lhs, &rhs1, &rhs2);
-      if (CONST_INT_P (rhs2))
-	return (cdxreg (lhs) && cdxreg (rhs1) && cdx_and_immed (rhs2));
-      return (cdxreg (lhs) && cdxreg (rhs2)
-	      && (!reload_completed || rtx_equal_p (lhs, rhs1)));
-
-    case TYPE_OR:
-    case TYPE_XOR:
-      /* Note the two-address limitation for CDX form.  */
-      split_alu_insn (insn, &lhs, &rhs1, &rhs2);
-      return (cdxreg (lhs) && cdxreg (rhs2)
-	      && (!reload_completed || rtx_equal_p (lhs, rhs1)));
-
-    case TYPE_SUB:
-      split_alu_insn (insn, &lhs, &rhs1, &rhs2);
-      return (cdxreg (lhs) && cdxreg (rhs1) && cdxreg (rhs2));
-
-    case TYPE_NEG:
-    case TYPE_NOT:
-      split_alu_insn (insn, &lhs, &rhs1, NULL);
-      return (cdxreg (lhs) && cdxreg (rhs1));
-
-    case TYPE_SLL:
-    case TYPE_SRL:
-      split_alu_insn (insn, &lhs, &rhs1, &rhs2);
-      return (cdxreg (lhs)
-	      && ((cdxreg (rhs1) && cdx_shift_immed (rhs2))
-		  || (cdxreg (rhs2)
-		      && (!reload_completed || rtx_equal_p (lhs, rhs1)))));
-    case TYPE_NOP:
-    case TYPE_PUSH:
-    case TYPE_POP:
-      return true;
-    default:
-      break;
-    }
-  return false;
-}
-
-/* Main function to implement the pop_operation predicate that
-   check pop.n insn pattern integrity.  The CDX pop.n patterns mostly
-   hardcode the restored registers, so the main checking is for the
-   SP offsets.  */
-bool
-pop_operation_p (rtx op)
-{
-  int i;
-  HOST_WIDE_INT last_offset = -1, len = XVECLEN (op, 0);
-  rtx base_reg, offset;
-
-  if (len < 3 /* At least has a return, SP-update, and RA restore.  */
-      || GET_CODE (XVECEXP (op, 0, 0)) != RETURN
-      || !base_reg_adjustment_p (XVECEXP (op, 0, 1), &base_reg, &offset)
-      || !rtx_equal_p (base_reg, stack_pointer_rtx)
-      || !CONST_INT_P (offset)
-      || (INTVAL (offset) & 3) != 0)
-    return false;
-
-  for (i = len - 1; i > 1; i--)
-    {
-      rtx set = XVECEXP (op, 0, i);
-      rtx curr_base_reg, curr_offset;
-
-      if (GET_CODE (set) != SET || !MEM_P (SET_SRC (set))
-	  || !split_mem_address (XEXP (SET_SRC (set), 0),
-				 &curr_base_reg, &curr_offset)
-	  || !rtx_equal_p (base_reg, curr_base_reg)
-	  || !CONST_INT_P (curr_offset))
-	return false;
-      if (i == len - 1)
-	{
-	  last_offset = INTVAL (curr_offset);
-	  if ((last_offset & 3) != 0 || last_offset > 60)
-	    return false;
-	}
-      else
-	{
-	  last_offset += 4;
-	  if (INTVAL (curr_offset) != last_offset)
-	    return false;
-	}
-    }
-  if (last_offset < 0 || last_offset + 4 != INTVAL (offset))
-    return false;
-
-  return true;
-}
-
-
-/* Masks of registers that are valid for CDX ldwm/stwm instructions.
-   The instruction can encode subsets drawn from either R2-R13 or
-   R14-R23 + FP + RA.  */
-#define CDX_LDSTWM_VALID_REGS_0 0x00003ffc
-#define CDX_LDSTWM_VALID_REGS_1 0x90ffc000
-
-static bool
-nios2_ldstwm_regset_p (unsigned int regno, unsigned int *regset)
-{
-  if (*regset == 0)
-    {
-      if (CDX_LDSTWM_VALID_REGS_0 & (1 << regno))
-	*regset = CDX_LDSTWM_VALID_REGS_0;
-      else if (CDX_LDSTWM_VALID_REGS_1 & (1 << regno))
-	*regset = CDX_LDSTWM_VALID_REGS_1;
-      else
-	return false;
-      return true;
-    }
-  else
-    return (*regset & (1 << regno)) != 0;
-}
-
-/* Main function to implement ldwm_operation/stwm_operation
-   predicates that check ldwm/stwm insn pattern integrity.  */
-bool
-ldstwm_operation_p (rtx op, bool load_p)
-{
-  int start, i, end = XVECLEN (op, 0) - 1, last_regno = -1;
-  unsigned int regset = 0;
-  rtx base_reg, offset;  
-  rtx first_elt = XVECEXP (op, 0, 0);
-  bool inc_p = true;
-  bool wb_p = base_reg_adjustment_p (first_elt, &base_reg, &offset);
-  if (GET_CODE (XVECEXP (op, 0, end)) == RETURN)
-    end--;
-  start = wb_p ? 1 : 0;
-  for (i = start; i <= end; i++)
-    {
-      int regno;
-      rtx reg, mem, elt = XVECEXP (op, 0, i);
-      /* Return early if not a SET at all.  */
-      if (GET_CODE (elt) != SET)
-	return false;
-      reg = load_p ? SET_DEST (elt) : SET_SRC (elt);
-      mem = load_p ? SET_SRC (elt) : SET_DEST (elt);
-      if (!REG_P (reg) || !MEM_P (mem))
-	return false;
-      regno = REGNO (reg);
-      if (!nios2_ldstwm_regset_p (regno, &regset))
-	return false;
-      /* If no writeback to determine direction, use offset of first MEM.  */
-      if (wb_p)
-	inc_p = INTVAL (offset) > 0;
-      else if (i == start)
-	{
-	  rtx first_base, first_offset;
-	  if (!split_mem_address (XEXP (mem, 0),
-				  &first_base, &first_offset))
-	    return false;
-	  if (!REG_P (first_base) || !CONST_INT_P (first_offset))
-	    return false;
-	  base_reg = first_base;
-	  inc_p = INTVAL (first_offset) >= 0;
-	}
-      /* Ensure that the base register is not loaded into.  */
-      if (load_p && regno == (int) REGNO (base_reg))
-	return false;
-      /* Check for register order inc/dec integrity.  */
-      if (last_regno >= 0)
-	{
-	  if (inc_p && last_regno >= regno)
-	    return false;
-	  if (!inc_p && last_regno <= regno)
-	    return false;
-	}
-      last_regno = regno;
-    }
-  return true;
-}
-
-/* Helper for nios2_ldst_parallel, for generating a parallel vector
-   SET element.  */
-static rtx
-gen_ldst (bool load_p, int regno, rtx base_mem, int offset)
-{
-  rtx reg = gen_rtx_REG (SImode, regno);
-  rtx mem = adjust_address_nv (base_mem, SImode, offset);
-  return gen_rtx_SET (load_p ? reg : mem,
-		      load_p ? mem : reg);
-}
-
-/* A general routine for creating the body RTL pattern of
-   ldwm/stwm/push.n/pop.n insns.
-   LOAD_P: true/false for load/store direction.
-   REG_INC_P: whether registers are incrementing/decrementing in the
-   *RTL vector* (not necessarily the order defined in the ISA specification).
-   OFFSET_INC_P: Same as REG_INC_P, but for the memory offset order.
-   BASE_MEM: starting MEM.
-   BASE_UPDATE: amount to update base register; zero means no writeback.
-   REGMASK: register mask to load/store.
-   RET_P: true if to tag a (return) element at the end.
-
-   Note that this routine does not do any checking. It's the job of the
-   caller to do the right thing, and the insn patterns to do the
-   safe-guarding.  */
-static rtx
-nios2_ldst_parallel (bool load_p, bool reg_inc_p, bool offset_inc_p,
-		     rtx base_mem, int base_update,
-		     unsigned HOST_WIDE_INT regmask, bool ret_p)
-{
-  rtvec p;
-  int regno, b = 0, i = 0, n = 0, len = popcount_hwi (regmask);
-  if (ret_p) len++, i++, b++;
-  if (base_update != 0) len++, i++;
-  p = rtvec_alloc (len);
-  for (regno = (reg_inc_p ? 0 : 31);
-       regno != (reg_inc_p ? 32 : -1);
-       regno += (reg_inc_p ? 1 : -1))
-    if ((regmask & (1 << regno)) != 0)
-      {
-	int offset = (offset_inc_p ? 4 : -4) * n++;
-	RTVEC_ELT (p, i++) = gen_ldst (load_p, regno, base_mem, offset);
-      }
-  if (ret_p)
-    RTVEC_ELT (p, 0) = ret_rtx;
-  if (base_update != 0)
-    {
-      rtx reg, offset;
-      if (!split_mem_address (XEXP (base_mem, 0), &reg, &offset))
-	gcc_unreachable ();
-      RTVEC_ELT (p, b) =
-	gen_rtx_SET (reg, plus_constant (Pmode, reg, base_update));
-    }
-  return gen_rtx_PARALLEL (VOIDmode, p);
-}
-
-/* CDX ldwm/stwm peephole optimization pattern related routines.  */
-
-/* Data structure and sorting function for ldwm/stwm peephole optimizers.  */
-struct ldstwm_operand
-{
-  int offset;	/* Offset from base register.  */
-  rtx reg;	/* Register to store at this offset.  */
-  rtx mem;	/* Original mem.  */
-  bool bad;	/* True if this load/store can't be combined.  */
-  bool rewrite; /* True if we should rewrite using scratch.  */
-};
-
-static int
-compare_ldstwm_operands (const void *arg1, const void *arg2)
-{
-  const struct ldstwm_operand *op1 = (const struct ldstwm_operand *) arg1;
-  const struct ldstwm_operand *op2 = (const struct ldstwm_operand *) arg2;
-  if (op1->bad)
-    return op2->bad ? 0 : 1;
-  else if (op2->bad)
-    return -1;
-  else
-    return op1->offset - op2->offset;
-}
-
-/* Helper function: return true if a load/store using REGNO with address
-   BASEREG and offset OFFSET meets the constraints for a 2-byte CDX ldw.n,
-   stw.n, ldwsp.n, or stwsp.n instruction.  */
-static bool
-can_use_cdx_ldstw (int regno, int basereg, int offset)
-{
-  if (CDX_REG_P (regno) && CDX_REG_P (basereg)
-      && (offset & 0x3) == 0 && offset >= 0 && offset < 0x40)
-    return true;
-  else if (basereg == SP_REGNO
-	   && offset >= 0 && offset < 0x80 && (offset & 0x3) == 0)
-    return true;
-  return false;
-}
-
-/* This function is called from peephole2 optimizers to try to merge
-   a series of individual loads and stores into a ldwm or stwm.  It
-   can also rewrite addresses inside the individual loads and stores
-   using a common base register using a scratch register and smaller
-   offsets if that allows them to use CDX ldw.n or stw.n instructions
-   instead of 4-byte loads or stores.
-   N is the number of insns we are trying to merge.  SCRATCH is non-null
-   if there is a scratch register available.  The OPERANDS array contains
-   alternating REG (even) and MEM (odd) operands.  */
-bool
-gen_ldstwm_peep (bool load_p, int n, rtx scratch, rtx *operands)
-{
-  /* CDX ldwm/stwm instructions allow a maximum of 12 registers to be
-     specified.  */
-#define MAX_LDSTWM_OPS 12
-  struct ldstwm_operand sort[MAX_LDSTWM_OPS];
-  int basereg = -1;
-  int baseoffset;
-  int i, m, lastoffset, lastreg;
-  unsigned int regmask = 0, usemask = 0, regset;
-  bool needscratch;
-  int newbasereg;
-  int nbytes;
-
-  if (!TARGET_HAS_CDX)
-    return false;
-  if (n < 2 || n > MAX_LDSTWM_OPS)
-    return false;
-
-  /* Check all the operands for validity and initialize the sort array.
-     The places where we return false here are all situations that aren't
-     expected to ever happen -- invalid patterns, invalid registers, etc.  */
-  for (i = 0; i < n; i++)
-    {
-      rtx base, offset;
-      rtx reg = operands[i];
-      rtx mem = operands[i + n];
-      int r, o, regno;
-      bool bad = false;
-
-      if (!REG_P (reg) || !MEM_P (mem))
-	return false;
-
-      regno = REGNO (reg);
-      if (regno > 31)
-	return false;
-      if (load_p && (regmask & (1 << regno)) != 0)
-	return false;
-      regmask |= 1 << regno;
-
-      if (!split_mem_address (XEXP (mem, 0), &base, &offset))
-	return false;
-      r = REGNO (base);
-      o = INTVAL (offset);
-
-      if (basereg == -1)
-	basereg = r;
-      else if (r != basereg)
-	bad = true;
-      usemask |= 1 << r;
-
-      sort[i].bad = bad;
-      sort[i].rewrite = false;
-      sort[i].offset = o;
-      sort[i].reg = reg;
-      sort[i].mem = mem;
-    }
-
-  /* If we are doing a series of register loads, we can't safely reorder
-     them if any of the regs used in addr expressions are also being set.  */
-  if (load_p && (regmask & usemask))
-    return false;
-
-  /* Sort the array by increasing mem offset order, then check that
-     offsets are valid and register order matches mem order.  At the
-     end of this loop, m is the number of loads/stores we will try to
-     combine; the rest are leftovers.  */
-  qsort (sort, n, sizeof (struct ldstwm_operand), compare_ldstwm_operands);
-
-  baseoffset = sort[0].offset;
-  needscratch = baseoffset != 0;
-  if (needscratch && !scratch)
-    return false;
-
-  lastreg = regmask = regset = 0;
-  lastoffset = baseoffset;
-  for (m = 0; m < n && !sort[m].bad; m++)
-    {
-      int thisreg = REGNO (sort[m].reg);
-      if (sort[m].offset != lastoffset
-	  || (m > 0 && lastreg >= thisreg)
-	  || !nios2_ldstwm_regset_p (thisreg, &regset))
-	break;
-      lastoffset += 4;
-      lastreg = thisreg;
-      regmask |= (1 << thisreg);
-    }
-
-  /* For loads, make sure we are not overwriting the scratch reg.
-     The peephole2 pattern isn't supposed to match unless the register is
-     unused all the way through, so this isn't supposed to happen anyway.  */
-  if (load_p
-      && needscratch
-      && ((1 << REGNO (scratch)) & regmask) != 0)
-    return false;
-  newbasereg = needscratch ? (int) REGNO (scratch) : basereg;
-
-  /* We may be able to combine only the first m of the n total loads/stores
-     into a single instruction.  If m < 2, there's no point in emitting
-     a ldwm/stwm at all, but we might be able to do further optimizations
-     if we have a scratch.  We will count the instruction lengths of the
-     old and new patterns and store the savings in nbytes.  */
-  if (m < 2)
-    {
-      if (!needscratch)
-	return false;
-      m = 0;
-      nbytes = 0;
-    }
-  else
-    nbytes = -4;  /* Size of ldwm/stwm.  */
-  if (needscratch)
-    {
-      int bo = baseoffset > 0 ? baseoffset : -baseoffset;
-      if (CDX_REG_P (newbasereg)
-	  && CDX_REG_P (basereg)
-	  && bo <= 128 && bo > 0 && (bo & (bo - 1)) == 0)
-	nbytes -= 2;  /* Size of addi.n/subi.n.  */
-      else
-	nbytes -= 4;  /* Size of non-CDX addi.  */
-    }
-
-  /* Count the size of the input load/store instructions being replaced.  */
-  for (i = 0; i < m; i++)
-    if (can_use_cdx_ldstw (REGNO (sort[i].reg), basereg, sort[i].offset))
-      nbytes += 2;
-    else
-      nbytes += 4;
-
-  /* We may also be able to save a bit if we can rewrite non-CDX
-     load/stores that can't be combined into the ldwm/stwm into CDX
-     load/stores using the scratch reg.  For example, this might happen
-     if baseoffset is large, by bringing in the offsets in the load/store
-     instructions within the range that fits in the CDX instruction.  */
-  if (needscratch && CDX_REG_P (newbasereg))
-    for (i = m; i < n && !sort[i].bad; i++)
-      if (!can_use_cdx_ldstw (REGNO (sort[i].reg), basereg, sort[i].offset)
-	  && can_use_cdx_ldstw (REGNO (sort[i].reg), newbasereg,
-				sort[i].offset - baseoffset))
-	{
-	  sort[i].rewrite = true;
-	  nbytes += 2;
-	}
-
-  /* Are we good to go?  */
-  if (nbytes <= 0)
-    return false;
-
-  /* Emit the scratch load.  */
-  if (needscratch)
-    emit_insn (gen_rtx_SET (scratch, XEXP (sort[0].mem, 0)));
-
-  /* Emit the ldwm/stwm insn.  */
-  if (m > 0)
-    {
-      rtvec p = rtvec_alloc (m);
-      for (i = 0; i < m; i++)
-	{
-	  int offset = sort[i].offset;
-	  rtx mem, reg = sort[i].reg;
-	  rtx base_reg = gen_rtx_REG (Pmode, newbasereg);
-	  if (needscratch)
-	    offset -= baseoffset;
-	  mem = gen_rtx_MEM (SImode, plus_constant (Pmode, base_reg, offset));
-	  if (load_p)
-	    RTVEC_ELT (p, i) = gen_rtx_SET (reg, mem);
-	  else
-	    RTVEC_ELT (p, i) = gen_rtx_SET (mem, reg);
-	}
-      emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
-    }
-
-  /* Emit any leftover load/stores as individual instructions, doing
-     the previously-noted rewrites to use the scratch reg.  */
-  for (i = m; i < n; i++)
-    {
-      rtx reg = sort[i].reg;
-      rtx mem = sort[i].mem;
-      if (sort[i].rewrite)
-	{
-	  int offset = sort[i].offset - baseoffset;
-	  mem = gen_rtx_MEM (SImode, plus_constant (Pmode, scratch, offset));
-	}
-      if (load_p)
-	emit_move_insn (reg, mem);
-      else
-	emit_move_insn (mem, reg);
-    }
-  return true;
-}
-
-/* Implement TARGET_MACHINE_DEPENDENT_REORG:
-   We use this hook when emitting CDX code to enforce the 4-byte
-   alignment requirement for labels that are used as the targets of
-   jmpi instructions.  CDX code can otherwise contain a mix of 16-bit
-   and 32-bit instructions aligned on any 16-bit boundary, but functions
-   and jmpi labels have to be 32-bit aligned because of the way the address
-   is encoded in the instruction.  */
-
-static unsigned char *label_align;
-static int min_labelno, max_labelno;
-
-static void
-nios2_reorg (void)
-{
-  bool changed = true;
-  rtx_insn *insn;
-
-  if (!TARGET_HAS_CDX)
-    return;
-
-  /* Initialize the data structures.  */
-  if (label_align)
-    free (label_align);
-  max_labelno = max_label_num ();
-  min_labelno = get_first_label_num ();
-  label_align = XCNEWVEC (unsigned char, max_labelno - min_labelno + 1);
-  
-  /* Iterate on inserting alignment and adjusting branch lengths until
-     no more changes.  */
-  while (changed)
-    {
-      changed = false;
-      shorten_branches (get_insns ());
-
-      for (insn = get_insns (); insn != 0; insn = NEXT_INSN (insn))
-	if (JUMP_P (insn) && insn_variable_length_p (insn))
-	  {
-	    rtx label = JUMP_LABEL (insn);
-	    /* We use the current fact that all cases of 'jmpi'
-	       doing the actual branch in the machine description
-	       has a computed length of 6 or 8.  Length 4 and below
-	       are all PC-relative 'br' branches without the jump-align
-	       problem.  */
-	    if (label && LABEL_P (label) && get_attr_length (insn) > 4)
-	      {
-		int index = CODE_LABEL_NUMBER (label) - min_labelno;
-		if (label_align[index] != 2)
-		  {
-		    label_align[index] = 2;
-		    changed = true;
-		  }
-	      }
-	  }
-    }
-}
-
-/* Implement LABEL_ALIGN, using the information gathered in nios2_reorg.  */
-int
-nios2_label_align (rtx label)
-{
-  int n = CODE_LABEL_NUMBER (label);
-
-  if (label_align && n >= min_labelno && n <= max_labelno)
-    return MAX (label_align[n - min_labelno], align_labels.levels[0].log);
-  return align_labels.levels[0].log;
-}
-
-/* Implement ADJUST_REG_ALLOC_ORDER.  We use the default ordering
-   for R1 and non-CDX R2 code; for CDX we tweak thing to prefer
-   the registers that can be used as operands to instructions that
-   have 3-bit register fields.  */
-void
-nios2_adjust_reg_alloc_order (void)
-{
-  const int cdx_reg_alloc_order[] =
-    {
-      /* Call-clobbered GPRs within CDX 3-bit encoded range.  */
-      2, 3, 4, 5, 6, 7, 
-      /* Call-saved GPRs within CDX 3-bit encoded range.  */
-      16, 17,
-      /* Other call-clobbered GPRs.  */
-      8, 9, 10, 11, 12, 13, 14, 15,
-      /* Other call-saved GPRs. RA placed first since it is always saved.  */
-      31, 18, 19, 20, 21, 22, 23, 28,
-      /* Fixed GPRs, not used by the register allocator.  */
-      0, 1, 24, 25, 26, 27, 29, 30, 32, 33, 34, 35, 36, 37, 38, 39
-   };
-
-  if (TARGET_HAS_CDX)
-    memcpy (reg_alloc_order, cdx_reg_alloc_order,
-	    sizeof (int) * FIRST_PSEUDO_REGISTER);
-}
-
-
-/* Initialize the GCC target structure.  */
-#undef TARGET_ASM_FUNCTION_PROLOGUE
-#define TARGET_ASM_FUNCTION_PROLOGUE nios2_asm_function_prologue
-
-#undef TARGET_IN_SMALL_DATA_P
-#define TARGET_IN_SMALL_DATA_P nios2_in_small_data_p
-
-#undef  TARGET_SECTION_TYPE_FLAGS
-#define TARGET_SECTION_TYPE_FLAGS  nios2_section_type_flags
-
-#undef TARGET_INIT_BUILTINS
-#define TARGET_INIT_BUILTINS nios2_init_builtins
-#undef TARGET_EXPAND_BUILTIN
-#define TARGET_EXPAND_BUILTIN nios2_expand_builtin
-#undef TARGET_BUILTIN_DECL
-#define TARGET_BUILTIN_DECL nios2_builtin_decl
-
-#undef TARGET_FUNCTION_OK_FOR_SIBCALL
-#define TARGET_FUNCTION_OK_FOR_SIBCALL hook_bool_tree_tree_true
-
-#undef TARGET_CAN_ELIMINATE
-#define TARGET_CAN_ELIMINATE nios2_can_eliminate
-
-#undef TARGET_FUNCTION_ARG
-#define TARGET_FUNCTION_ARG nios2_function_arg
-
-#undef TARGET_FUNCTION_ARG_ADVANCE
-#define TARGET_FUNCTION_ARG_ADVANCE nios2_function_arg_advance
-
-#undef TARGET_FUNCTION_ARG_PADDING
-#define TARGET_FUNCTION_ARG_PADDING nios2_function_arg_padding
-
-#undef TARGET_ARG_PARTIAL_BYTES
-#define TARGET_ARG_PARTIAL_BYTES nios2_arg_partial_bytes
-
-#undef TARGET_TRAMPOLINE_INIT
-#define TARGET_TRAMPOLINE_INIT nios2_trampoline_init
-
-#undef TARGET_FUNCTION_VALUE
-#define TARGET_FUNCTION_VALUE nios2_function_value
-
-#undef TARGET_LIBCALL_VALUE
-#define TARGET_LIBCALL_VALUE nios2_libcall_value
-
-#undef TARGET_FUNCTION_VALUE_REGNO_P
-#define TARGET_FUNCTION_VALUE_REGNO_P nios2_function_value_regno_p
-
-#undef TARGET_RETURN_IN_MEMORY
-#define TARGET_RETURN_IN_MEMORY nios2_return_in_memory
-
-#undef TARGET_PROMOTE_PROTOTYPES
-#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
-
-#undef TARGET_SETUP_INCOMING_VARARGS
-#define TARGET_SETUP_INCOMING_VARARGS nios2_setup_incoming_varargs
-
-#undef TARGET_MUST_PASS_IN_STACK
-#define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
-
-#undef TARGET_LEGITIMATE_CONSTANT_P
-#define TARGET_LEGITIMATE_CONSTANT_P nios2_legitimate_constant_p
-
-#undef TARGET_LEGITIMIZE_ADDRESS
-#define TARGET_LEGITIMIZE_ADDRESS nios2_legitimize_address
-
-#undef TARGET_DELEGITIMIZE_ADDRESS
-#define TARGET_DELEGITIMIZE_ADDRESS nios2_delegitimize_address
-
-#undef TARGET_LEGITIMATE_ADDRESS_P
-#define TARGET_LEGITIMATE_ADDRESS_P nios2_legitimate_address_p
-
-#undef TARGET_PREFERRED_RELOAD_CLASS
-#define TARGET_PREFERRED_RELOAD_CLASS nios2_preferred_reload_class
-
-#undef TARGET_RTX_COSTS
-#define TARGET_RTX_COSTS nios2_rtx_costs
-
-#undef TARGET_ADDRESS_COST
-#define TARGET_ADDRESS_COST nios2_address_cost
-
-#undef TARGET_HAVE_TLS
-#define TARGET_HAVE_TLS TARGET_LINUX_ABI
-
-#undef TARGET_CANNOT_FORCE_CONST_MEM
-#define TARGET_CANNOT_FORCE_CONST_MEM nios2_cannot_force_const_mem
-
-#undef TARGET_ASM_OUTPUT_DWARF_DTPREL
-#define TARGET_ASM_OUTPUT_DWARF_DTPREL nios2_output_dwarf_dtprel
-
-#undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
-#define TARGET_PRINT_OPERAND_PUNCT_VALID_P nios2_print_operand_punct_valid_p
-
-#undef TARGET_PRINT_OPERAND
-#define TARGET_PRINT_OPERAND nios2_print_operand
-
-#undef TARGET_PRINT_OPERAND_ADDRESS
-#define TARGET_PRINT_OPERAND_ADDRESS nios2_print_operand_address
-
-#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
-#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA nios2_output_addr_const_extra
-
-#undef TARGET_ASM_FILE_END
-#define TARGET_ASM_FILE_END nios2_asm_file_end
-
-#undef TARGET_OPTION_OVERRIDE
-#define TARGET_OPTION_OVERRIDE nios2_option_override
-
-#undef TARGET_OPTION_SAVE
-#define TARGET_OPTION_SAVE nios2_option_save
-
-#undef TARGET_OPTION_RESTORE
-#define TARGET_OPTION_RESTORE nios2_option_restore
-
-#undef TARGET_CAN_INLINE_P
-#define TARGET_CAN_INLINE_P nios2_can_inline_p
-
-#undef TARGET_SET_CURRENT_FUNCTION
-#define TARGET_SET_CURRENT_FUNCTION nios2_set_current_function
-
-#undef TARGET_OPTION_VALID_ATTRIBUTE_P
-#define TARGET_OPTION_VALID_ATTRIBUTE_P nios2_valid_target_attribute_p
-
-#undef TARGET_OPTION_PRAGMA_PARSE
-#define TARGET_OPTION_PRAGMA_PARSE nios2_pragma_target_parse
-
-#undef TARGET_MERGE_DECL_ATTRIBUTES
-#define TARGET_MERGE_DECL_ATTRIBUTES nios2_merge_decl_attributes
-
-#undef  TARGET_ASM_CAN_OUTPUT_MI_THUNK
-#define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
-  hook_bool_const_tree_hwi_hwi_const_tree_true
-
-#undef  TARGET_ASM_OUTPUT_MI_THUNK
-#define TARGET_ASM_OUTPUT_MI_THUNK nios2_asm_output_mi_thunk
-
-#undef TARGET_MACHINE_DEPENDENT_REORG
-#define TARGET_MACHINE_DEPENDENT_REORG nios2_reorg
-
-#undef TARGET_CONSTANT_ALIGNMENT
-#define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings
-
-#undef TARGET_HAVE_SPECULATION_SAFE_VALUE
-#define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
-
-struct gcc_target targetm = TARGET_INITIALIZER;
-
-#include "gt-nios2.h"
diff --git a/gcc/config/nios2/nios2.h b/gcc/config/nios2/nios2.h
deleted file mode 100644
index bad72671..0000000
--- a/gcc/config/nios2/nios2.h
+++ /dev/null
@@ -1,540 +0,0 @@
-/* Definitions of target machine for Altera Nios II.
-   Copyright (C) 2012-2024 Free Software Foundation, Inc.
-   Contributed by Jonah Graham (jgraham@altera.com), 
-   Will Reece (wreece@altera.com), and Jeff DaSilva (jdasilva@altera.com).
-   Contributed by Mentor Graphics, Inc.
-
-   This file is part of GCC.
-
-   GCC is free software; you can redistribute it and/or modify it
-   under the terms of the GNU General Public License as published
-   by the Free Software Foundation; either version 3, or (at your
-   option) any later version.
-
-   GCC is distributed in the hope that it will be useful, but WITHOUT
-   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-   License for more details.
-
-   You should have received a copy of the GNU General Public License
-   along with GCC; see the file COPYING3.  If not see
-   <http://www.gnu.org/licenses/>.  */
-
-#ifndef GCC_NIOS2_H
-#define GCC_NIOS2_H
-
-/* Indicate R2 ISA level support.  */
-#define TARGET_ARCH_R2 (nios2_arch_option == ARCH_R2)
-
-/* FPU insn codes declared here.  */
-#include "config/nios2/nios2-opts.h"
-
-/* Define built-in preprocessor macros.  */
-#define TARGET_CPU_CPP_BUILTINS()                   \
-  do                                                \
-    {                                               \
-      builtin_define_std ("NIOS2");                 \
-      builtin_define_std ("nios2");                 \
-      if (TARGET_BIG_ENDIAN)                        \
-        builtin_define_std ("nios2_big_endian");    \
-      else                                          \
-        builtin_define_std ("nios2_little_endian"); \
-      builtin_define_with_int_value (		    \
-        "__nios2_arch__", (int) nios2_arch_option); \
-    }						    \
-  while (0)
-
-/* We're little endian, unless otherwise specified by defining
-   BIG_ENDIAN_FLAG.  */
-#ifndef TARGET_ENDIAN_DEFAULT
-# define TARGET_ENDIAN_DEFAULT 0
-#endif
-
-/* Default target_flags if no switches specified.  */
-#ifndef TARGET_DEFAULT
-# define TARGET_DEFAULT (MASK_HAS_MUL | TARGET_ENDIAN_DEFAULT)
-#endif
-
-#define OPTION_DEFAULT_SPECS \
-  {"arch", "%{!march=*:%{!mcpu=*:-march=%(VALUE)}}" }
-
-#define CC1_SPEC "%{G*}"
-
-#if TARGET_ENDIAN_DEFAULT == 0
-# define ASM_SPEC "%{!meb:-EL} %{meb:-EB} %{march=*:-march=%*}"
-# define LINK_SPEC_ENDIAN "%{!meb:-EL} %{meb:-EB}"
-# define MULTILIB_DEFAULTS { "EL" }
-#else
-# define ASM_SPEC "%{!mel:-EB} %{mel:-EL} %{march=*:-march=%*}"
-# define LINK_SPEC_ENDIAN "%{!mel:-EB} %{mel:-EL}"
-# define MULTILIB_DEFAULTS { "EB" }
-#endif
-
-#define LINK_SPEC LINK_SPEC_ENDIAN \
-  " %{shared:-shared} \
-    %{static:-Bstatic}"
-
-
-/* Storage layout.  */
-
-#define DEFAULT_SIGNED_CHAR 1
-#define BITS_BIG_ENDIAN 0
-#define BYTES_BIG_ENDIAN (TARGET_BIG_ENDIAN != 0)
-#define WORDS_BIG_ENDIAN (TARGET_BIG_ENDIAN != 0)
-#define BITS_PER_WORD 32
-#define UNITS_PER_WORD 4
-#define POINTER_SIZE 32
-#define BIGGEST_ALIGNMENT 32
-#define STRICT_ALIGNMENT 1
-#define FUNCTION_BOUNDARY 32
-#define PARM_BOUNDARY 32
-#define STACK_BOUNDARY 32
-#define PREFERRED_STACK_BOUNDARY 32
-#define MAX_FIXED_MODE_SIZE 64
-
-#define LABEL_ALIGN(LABEL) nios2_label_align (LABEL)
-
-/* Layout of source language data types.  */
-
-#define INT_TYPE_SIZE 32
-#define SHORT_TYPE_SIZE 16
-#define LONG_TYPE_SIZE 32
-#define LONG_LONG_TYPE_SIZE 64
-
-#undef SIZE_TYPE
-#define SIZE_TYPE "unsigned int"
-
-#undef PTRDIFF_TYPE
-#define PTRDIFF_TYPE "int"
-
-
-/* Basic characteristics of Nios II registers:
-
-   Regno  Name
-   0      r0       zero    always zero
-   1      r1       at      Assembler Temporary
-   2-3    r2-r3            Return Location
-   4-7    r4-r7            Register Arguments
-   8-15   r8-r15           Caller Saved Registers
-   16-22  r16-r22          Callee Saved Registers
-   22     r22              Global Offset Table pointer (Linux ABI only)
-   23     r23              Thread pointer (Linux ABI only)
-   24     r24      et      Exception Temporary
-   25     r25      bt      Breakpoint Temporary
-   26     r26      gp      Global Pointer
-   27     r27      sp      Stack Pointer
-   28     r28      fp      Frame Pointer
-   29     r29      ea      Exception Return Address
-   30     r30      ba      Breakpoint Return Address
-   31     r31      ra      Return Address
-			   
-   32     ctl0     status
-   33     ctl1     estatus STATUS saved by exception
-   34     ctl2     bstatus STATUS saved by break
-   35     ctl3     ipri    Interrupt Priority Mask
-   36     ctl4     ecause  Exception Cause
-
-   37     pc               Not an actual register
-
-   38     fake_fp          Fake Frame Pointer (always eliminated)
-   39     fake_ap          Fake Argument Pointer (always eliminated)
-   40                      First Pseudo Register
-
-   In addition, r12 is used as the static chain register and r13, r14, and r15
-   are clobbered by PLT code sequences.  
-
-   The definitions for all the hard register numbers are located in nios2.md.
-*/
-
-#define FIXED_REGISTERS                      \
-  {					     \
-/*        +0  1  2  3  4  5  6  7  8  9 */   \
-/*   0 */  1, 1, 0, 0, 0, 0, 0, 0, 0, 0,     \
-/*  10 */  0, 0, 0, 0, 0, 0, 0, 0, 0, 0,     \
-/*  20 */  0, 0, TARGET_LINUX_ABI, TARGET_LINUX_ABI, 1, 1, 1, 1, 0, 1,     \
-/*  30 */  1, 0, 1, 1, 1, 1, 1, 1, 1, 1,     \
-  }
-
-/* Call used == caller saved + fixed regs + args + ret vals.  */
-#define CALL_USED_REGISTERS                  \
-  {					     \
-/*        +0  1  2  3  4  5  6  7  8  9 */   \
-/*   0 */  1, 1, 1, 1, 1, 1, 1, 1, 1, 1,     \
-/*  10 */  1, 1, 1, 1, 1, 1, 0, 0, 0, 0,     \
-/*  20 */  0, 0, TARGET_LINUX_ABI, TARGET_LINUX_ABI, 1, 1, 1, 1, 0, 1,     \
-/*  30 */  1, 1, 1, 1, 1, 1, 1, 1, 1, 1,     \
-  }
-
-/* Order in which to allocate registers.  Each register must be
-   listed once.  This is the default ordering for R1 and non-CDX R2
-   code.  For CDX, we overwrite this in ADJUST_REG_ALLOC_ORDER.  */
-#define REG_ALLOC_ORDER							\
-  { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, \
-      20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, \
-      37, 38, 39 }
-
-#define ADJUST_REG_ALLOC_ORDER nios2_adjust_reg_alloc_order ()
-
-/* Caller-save costs can be less emphasized under R2 CDX, where we can
-   use push.n/pop.n.  */
-#define HONOR_REG_ALLOC_ORDER (TARGET_HAS_CDX)
-
-/* Register Classes.  */
-
-enum reg_class
-{
-  NO_REGS,
-  SIB_REGS,
-  IJMP_REGS,
-  GP_REGS,
-  ALL_REGS,
-  LIM_REG_CLASSES
-};
-
-#define N_REG_CLASSES (int) LIM_REG_CLASSES
-
-#define REG_CLASS_NAMES   \
-  {  "NO_REGS",		  \
-     "SIB_REGS",	  \
-     "IJMP_REGS",	  \
-     "GP_REGS",           \
-     "ALL_REGS" }
-
-#define GENERAL_REGS ALL_REGS
-
-#define REG_CLASS_CONTENTS			\
-  {						\
-    /* NO_REGS    */ { 0, 0},			\
-    /* SIB_REGS   */ { 0xfe0c, 0},		\
-    /* IJMP_REGS  */ { 0x7fffffff, 0},		\
-    /* GP_REGS    */ {~0, 0},			\
-    /* ALL_REGS   */ {~0,~0}			\
-  }
-
-
-#define GP_REG_P(REGNO) ((unsigned)(REGNO) <= LAST_GP_REG)
-#define REGNO_REG_CLASS(REGNO) (GP_REG_P (REGNO) ? GP_REGS : ALL_REGS)
-#define CLASS_MAX_NREGS(CLASS, MODE)					\
-  ((GET_MODE_SIZE (MODE) + UNITS_PER_WORD - 1) / UNITS_PER_WORD)
-
-#define CDX_REG_P(REGNO)						\
-  ((REGNO) == 16 || (REGNO) == 17 || ((REGNO) >= 2 && (REGNO) <= 7))
-
-/* Tests for various kinds of constants used in the Nios II port.  */
-
-#define SMALL_INT(X) ((unsigned HOST_WIDE_INT)(X) + 0x8000 < 0x10000)
-#define SMALL_INT12(X) ((unsigned HOST_WIDE_INT)(X) + 0x800 < 0x1000)
-#define SMALL_INT_UNSIGNED(X) ((X) >= 0 && (X) < 0x10000)
-#define UPPER16_INT(X) (((X) & 0xffff) == 0)
-#define SHIFT_INT(X) ((X) >= 0 && (X) <= 31)
-#define RDWRCTL_INT(X) ((X) >= 0 && (X) <= 31)
-#define CUSTOM_INSN_OPCODE(X) ((X) >= 0 && (X) <= 255)
-#define ANDCLEAR_INT(X) \
-  (((X) & 0xffff) == 0xffff || (((X) >> 16) & 0xffff) == 0xffff)
-
-/* Say that the epilogue uses the return address register.  Note that
-   in the case of sibcalls, the values "used by the epilogue" are
-   considered live at the start of the called function.  */
-#define EPILOGUE_USES(REGNO) (epilogue_completed && (REGNO) == RA_REGNO)
-
-/* EXIT_IGNORE_STACK should be nonzero if, when returning from a function,
-   the stack pointer does not matter.  The value is tested only in
-   functions that have frame pointers.
-   No definition is equivalent to always zero.  */
-
-#define EXIT_IGNORE_STACK 1
-
-/* Trampolines use a 5-instruction sequence.  */
-#define TRAMPOLINE_SIZE 20
-
-/* Stack layout.  */
-#define STACK_GROWS_DOWNWARD 1
-#define FRAME_GROWS_DOWNWARD 1
-#define FIRST_PARM_OFFSET(FUNDECL) 0
-
-/* Before the prologue, RA lives in r31.  */
-#define INCOMING_RETURN_ADDR_RTX  gen_rtx_REG (Pmode, RA_REGNO)
-#define RETURN_ADDR_RTX(C,F) nios2_get_return_address (C)
-
-#define DWARF_FRAME_RETURN_COLUMN RA_REGNO
-
-/* The CFA includes the pretend args.  */
-#define ARG_POINTER_CFA_OFFSET(FNDECL) \
-  (gcc_assert ((FNDECL) == current_function_decl), \
-   FIRST_PARM_OFFSET (FNDECL) + crtl->args.pretend_args_size)
-
-/* Frame/arg pointer elimination settings.  */
-#define ELIMINABLE_REGS                                                 \
-{{ ARG_POINTER_REGNUM,   STACK_POINTER_REGNUM},                         \
- { ARG_POINTER_REGNUM,   HARD_FRAME_POINTER_REGNUM},                    \
- { FRAME_POINTER_REGNUM, STACK_POINTER_REGNUM},                         \
- { FRAME_POINTER_REGNUM, HARD_FRAME_POINTER_REGNUM}}
-
-#define INITIAL_ELIMINATION_OFFSET(FROM, TO, OFFSET) \
-  (OFFSET) = nios2_initial_elimination_offset ((FROM), (TO))
-
-/* Calling convention definitions.  */
-typedef struct nios2_args
-{
-  int regs_used;
-} CUMULATIVE_ARGS;
-
-#define NUM_ARG_REGS (LAST_ARG_REGNO - FIRST_ARG_REGNO + 1)
-
-#define INIT_CUMULATIVE_ARGS(CUM, FNTYPE, LIBNAME, FNDECL, N_NAMED_ARGS) \
-  do { (CUM).regs_used = 0; } while (0)
-
-#define PAD_VARARGS_DOWN \
-  (targetm.calls.function_arg_padding (TYPE_MODE (type), type) == PAD_DOWNWARD)
-
-#define BLOCK_REG_PADDING(MODE, TYPE, FIRST) \
-  (nios2_block_reg_padding ((MODE), (TYPE), (FIRST)))
-
-#define FUNCTION_ARG_REGNO_P(REGNO) \
-  ((REGNO) >= FIRST_ARG_REGNO && (REGNO) <= LAST_ARG_REGNO)
-
-/* Passing function arguments on stack.  */
-#define ACCUMULATE_OUTGOING_ARGS 1
-
-/* We define TARGET_RETURN_IN_MEMORY, so set to zero.  */
-#define DEFAULT_PCC_STRUCT_RETURN 0
-
-/* Profiling.  */
-#define PROFILE_BEFORE_PROLOGUE
-#define NO_PROFILE_COUNTERS 1
-#define FUNCTION_PROFILER(FILE, LABELNO) \
-  nios2_function_profiler ((FILE), (LABELNO))
-
-/* Addressing modes.  */
-
-#define CONSTANT_ADDRESS_P(X) \
-  (CONSTANT_P (X) && memory_address_p (SImode, X))
-
-#define MAX_REGS_PER_ADDRESS 1
-#define BASE_REG_CLASS ALL_REGS
-#define INDEX_REG_CLASS NO_REGS
-
-#define REGNO_OK_FOR_BASE_P(REGNO) nios2_regno_ok_for_base_p ((REGNO), true)
-#define REGNO_OK_FOR_INDEX_P(REGNO) 0
-
-/* Describing Relative Costs of Operations.  */
-#define MOVE_MAX 4
-#define SLOW_BYTE_ACCESS 1
-
-/* It is as good to call a constant function address as to call an address
-   kept in a register.  */
-#define NO_FUNCTION_CSE 1
-
-/* Position independent code.  */
-
-#define PIC_OFFSET_TABLE_REGNUM 22
-#define LEGITIMATE_PIC_OPERAND_P(X) nios2_legitimate_pic_operand_p (X)
-
-/* Define output assembler language.  */
-
-#define ASM_APP_ON "#APP\n"
-#define ASM_APP_OFF "#NO_APP\n"
-
-#define ASM_COMMENT_START "# "
-
-#define GLOBAL_ASM_OP "\t.global\t"
-
-#define REGISTER_NAMES \
-  {		       \
-    "zero", \
-    "at", \
-    "r2", \
-    "r3", \
-    "r4", \
-    "r5", \
-    "r6", \
-    "r7", \
-    "r8", \
-    "r9", \
-    "r10", \
-    "r11", \
-    "r12", \
-    "r13", \
-    "r14", \
-    "r15", \
-    "r16", \
-    "r17", \
-    "r18", \
-    "r19", \
-    "r20", \
-    "r21", \
-    "r22", \
-    "r23", \
-    "et", \
-    "bt", \
-    "gp", \
-    "sp", \
-    "fp", \
-    "ta", \
-    "ba", \
-    "ra", \
-    "status", \
-    "estatus", \
-    "bstatus", \
-    "ipri", \
-    "ecause", \
-    "pc", \
-    "fake_fp", \
-    "fake_ap", \
-}
-
-#define ADDITIONAL_REGISTER_NAMES       \
-{					\
-  {"r0", 0},				\
-  {"r1", 1},				\
-  {"r24", 24},                          \
-  {"r25", 25},                          \
-  {"r26", 26},                          \
-  {"r27", 27},                          \
-  {"r28", 28},                          \
-  {"r29", 29},                          \
-  {"r30", 30},                          \
-  {"r31", 31}                           \
-}
-
-#define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE)  \
-  do									\
-    {									\
-      fputs (integer_asm_op (POINTER_SIZE / BITS_PER_UNIT, TRUE), FILE); \
-      fprintf (FILE, ".L%u\n", (unsigned) (VALUE));			\
-    }									\
-  while (0)
-
-#define ASM_OUTPUT_ADDR_DIFF_ELT(STREAM, BODY, VALUE, REL)\
-  do									\
-    {									\
-      fputs (integer_asm_op (POINTER_SIZE / BITS_PER_UNIT, TRUE), STREAM); \
-      fprintf (STREAM, ".L%u-.L%u\n", (unsigned) (VALUE), (unsigned) (REL)); \
-    }									\
-  while (0)
-
-/* Section directives.  */
-
-/* Output before read-only data.  */
-#define TEXT_SECTION_ASM_OP "\t.section\t.text"
-
-/* Output before writable data.  */
-#define DATA_SECTION_ASM_OP "\t.section\t.data"
-
-/* Output before uninitialized data.  */
-#define BSS_SECTION_ASM_OP "\t.section\t.bss"
-
-/* Output before 'small' uninitialized data.  */
-#define SBSS_SECTION_ASM_OP "\t.section\t.sbss"
-
-#ifndef USED_FOR_TARGET
-/* Default the definition of "small data" to 8 bytes.  */
-extern unsigned HOST_WIDE_INT nios2_section_threshold;
-#endif
-
-#define NIOS2_DEFAULT_GVALUE 8
-
-/* This says how to output assembler code to declare an
-   uninitialized external linkage data object.  Under SVR4,
-   the linker seems to want the alignment of data objects
-   to depend on their types.  We do exactly that here.  */
-#undef COMMON_ASM_OP
-#define COMMON_ASM_OP   "\t.comm\t"
-
-#define ASM_OUTPUT_ALIGN(FILE, LOG)		     \
-  do {						     \
-    fprintf ((FILE), "%s%d\n", ALIGN_ASM_OP, (LOG)); \
-  } while (0)
-
-#undef  ASM_OUTPUT_ALIGNED_COMMON
-#define ASM_OUTPUT_ALIGNED_COMMON(FILE, NAME, SIZE, ALIGN)              \
-do                                                                      \
-  {									\
-    fprintf ((FILE), "%s", COMMON_ASM_OP);				\
-    assemble_name ((FILE), (NAME));					\
-    fprintf ((FILE), "," HOST_WIDE_INT_PRINT_UNSIGNED",%u\n", (SIZE),	\
-	     (ALIGN) / BITS_PER_UNIT);					\
-  }									\
-while (0)
-
-
-/* This says how to output assembler code to declare an
-   uninitialized internal linkage data object.  Under SVR4,
-   the linker seems to want the alignment of data objects
-   to depend on their types.  We do exactly that here.  */
-
-#undef  ASM_OUTPUT_ALIGNED_DECL_LOCAL
-#define ASM_OUTPUT_ALIGNED_DECL_LOCAL(FILE, DECL, NAME, SIZE, ALIGN)	\
-do {                                                                    \
- if (targetm.in_small_data_p (DECL))					\
-    switch_to_section (sbss_section);					\
-  else                                                                  \
-    switch_to_section (bss_section);					\
-  ASM_OUTPUT_TYPE_DIRECTIVE (FILE, NAME, "object");                     \
-  if (!flag_inhibit_size_directive)                                     \
-    ASM_OUTPUT_SIZE_DIRECTIVE (FILE, NAME, SIZE);                       \
-  ASM_OUTPUT_ALIGN ((FILE), exact_log2((ALIGN) / BITS_PER_UNIT));       \
-  ASM_OUTPUT_LABEL(FILE, NAME);                                         \
-  ASM_OUTPUT_SKIP((FILE), (SIZE) ? (SIZE) : 1);                         \
-} while (0)
-
-/* Put the jump tables in .text because when using position-independent code,
-   Nios II elf has no relocation that can represent arbitrary differences
-   between symbols in different sections.  */
-#define JUMP_TABLES_IN_TEXT_SECTION 1
-
-/* Exception handling.  */
-
-/* Describe __builtin_eh_return.  */
-#define EH_RETURN_STACKADJ_RTX gen_rtx_REG (Pmode, LAST_RETVAL_REGNO)
-#define EH_RETURN_DATA_REGNO(N) ((N) <= (LAST_ARG_REGNO - FIRST_ARG_REGNO) \
-				 ? (N) + FIRST_ARG_REGNO : INVALID_REGNUM)
-
-/* For PIC, use indirect for global references; it'll end up using a dynamic
-   relocation, which we want to keep out of read-only EH sections.
-   For local references, we want to use GOT-relative offsets provided
-   the assembler supports them.  For non-PIC, use an absolute encoding.  */
-#ifdef HAVE_AS_NIOS2_GOTOFF_RELOCATION
-#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL)		\
-  (flag_pic							\
-   ? ((GLOBAL)							\
-      ? DW_EH_PE_indirect | DW_EH_PE_absptr			\
-      : DW_EH_PE_datarel | DW_EH_PE_sdata4)			\
-   : DW_EH_PE_absptr)
-
-#define ASM_MAYBE_OUTPUT_ENCODED_ADDR_RTX(FILE, ENCODING, SIZE, ADDR, DONE) \
-  do {									\
-      if (((ENCODING) & 0xf0) == DW_EH_PE_datarel)			\
-      {									\
-	fputs ("\t.4byte %gotoff(", FILE);				\
-	output_addr_const (FILE, ADDR);					\
-	fputs (")", FILE);						\
-	goto DONE;							\
-      }									\
-  } while (0)
-
-#else
-/* We don't have %gotoff support in the assembler.  Fall back to the encoding
-   it used to use instead before the assembler was fixed.  This has known
-   bugs but mostly works.  */
-#define ASM_PREFERRED_EH_DATA_FORMAT(CODE, GLOBAL)		\
-  (flag_pic							\
-   ? ((GLOBAL)							\
-      ? DW_EH_PE_indirect | DW_EH_PE_absptr			\
-      : DW_EH_PE_aligned)					\
-   : DW_EH_PE_absptr)
-#endif
-
-/* Misc. parameters.  */
-
-#define STORE_FLAG_VALUE 1
-#define Pmode SImode
-#define FUNCTION_MODE QImode
-
-#define CASE_VECTOR_MODE Pmode
-
-#define LOAD_EXTEND_OP(MODE) (ZERO_EXTEND)
-
-#define WORD_REGISTER_OPERATIONS 1
-
-#endif /* GCC_NIOS2_H */
diff --git a/gcc/config/nios2/nios2.md b/gcc/config/nios2/nios2.md
deleted file mode 100644
index 2a06494..0000000
--- a/gcc/config/nios2/nios2.md
+++ /dev/null
@@ -1,1311 +0,0 @@
-;; Machine Description for Altera Nios II.
-;; Copyright (C) 2012-2024 Free Software Foundation, Inc.
-;; Contributed by Jonah Graham (jgraham@altera.com) and 
-;; Will Reece (wreece@altera.com).
-;; Contributed by Mentor Graphics, Inc.
-;;
-;; This file is part of GCC.
-;;
-;; GCC is free software; you can redistribute it and/or modify
-;; it under the terms of the GNU General Public License as published by
-;; the Free Software Foundation; either version 3, or (at your option)
-;; any later version.
-;;
-;; GCC is distributed in the hope that it will be useful,
-;; but WITHOUT ANY WARRANTY; without even the implied warranty of
-;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-;; GNU General Public License for more details.
-;;
-;; You should have received a copy of the GNU General Public License
-;; along with GCC; see the file COPYING3.  If not see
-;; <http://www.gnu.org/licenses/>.
-
-;; Register numbers
-(define_constants
-  [
-   (FIRST_RETVAL_REGNO     2)	; Return value registers
-   (LAST_RETVAL_REGNO      3)	;
-   (FIRST_ARG_REGNO        4)	; Argument registers
-   (LAST_ARG_REGNO         7)	;
-
-   (TP_REGNO              23)	; Thread pointer register
-   (GP_REGNO	          26)	; Global pointer register
-   (SP_REGNO	          27)	; Stack pointer register
-   (FP_REGNO	          28)	; Frame pointer register
-   (EA_REGNO	          29)	; Exception return address register
-   (RA_REGNO              31)	; Return address register
-   (LAST_GP_REG           31)	; Last general purpose register
-
-   ;; Target register definitions
-   (STATIC_CHAIN_REGNUM        12)
-   (STACK_POINTER_REGNUM       27)
-   (HARD_FRAME_POINTER_REGNUM  28)
-   (PC_REGNUM                  37)
-   (FRAME_POINTER_REGNUM       38)
-   (ARG_POINTER_REGNUM         39)
-   (FIRST_PSEUDO_REGISTER      40)
-  ]
-)
-
-;; Enumeration of UNSPECs
-
-(define_c_enum "unspecv" [
-  UNSPECV_BLOCKAGE
-  UNSPECV_WRCTL
-  UNSPECV_RDCTL
-  UNSPECV_FWRX
-  UNSPECV_FWRY
-  UNSPECV_FRDXLO
-  UNSPECV_FRDXHI
-  UNSPECV_FRDY
-  UNSPECV_CUSTOM_NXX
-  UNSPECV_CUSTOM_XNXX
-  UNSPECV_LDXIO
-  UNSPECV_STXIO
-  UNSPECV_RDPRS
-  UNSPECV_FLUSHD
-  UNSPECV_FLUSHDA
-  UNSPECV_WRPIE
-  UNSPECV_ENI
-  UNSPECV_LDEX
-  UNSPECV_LDSEX
-  UNSPECV_STEX
-  UNSPECV_STSEX
-])
-
-(define_c_enum "unspec" [
-  UNSPEC_FCOS
-  UNSPEC_FSIN
-  UNSPEC_FTAN
-  UNSPEC_FATAN
-  UNSPEC_FEXP
-  UNSPEC_FLOG
-  UNSPEC_ROUND
-  UNSPEC_LOAD_GOT_REGISTER
-  UNSPEC_PIC_SYM
-  UNSPEC_PIC_CALL_SYM
-  UNSPEC_PIC_GOTOFF_SYM
-  UNSPEC_LOAD_TLS_IE
-  UNSPEC_ADD_TLS_LE
-  UNSPEC_ADD_TLS_GD
-  UNSPEC_ADD_TLS_LDM
-  UNSPEC_ADD_TLS_LDO
-  UNSPEC_EH_RETURN
-  UNSPEC_SYNC
-])
-
-
-;;  Instruction scheduler
-
-; No schedule info is currently available, using an assumption that no
-; instruction can use the results of the previous instruction without
-; incuring a stall.
-
-; length of an instruction (in bytes)
-(define_attr "length" ""
-  (if_then_else (match_test "nios2_cdx_narrow_form_p (insn)")
-    (const_int 2)
-    (const_int 4)))
-
-(define_attr "type" 
-  "unknown,complex,control,alu,cond_alu,st,ld,stwm,ldwm,push,pop,mul,div,\
-   custom,add,sub,mov,and,or,xor,neg,not,sll,srl,sra,rol,ror,nop"
-  (const_string "complex"))
-
-(define_asm_attributes
- [(set_attr "length" "4")
-  (set_attr "type" "complex")])
-
-(define_automaton "nios2")
-(automata_option "v")
-;(automata_option "no-minimization")
-(automata_option "ndfa")
-
-; The nios2 pipeline is fairly straightforward for the fast model.
-; Every alu operation is pipelined so that an instruction can
-; be issued every cycle.  However, there are still potential
-; stalls which this description tries to deal with.
-
-(define_cpu_unit "cpu" "nios2")
-
-(define_insn_reservation "complex" 1
-  (eq_attr "type" "complex")
-  "cpu")
-
-(define_insn_reservation "control" 1
-  (eq_attr "type" "control,pop")
-  "cpu")
-
-(define_insn_reservation "alu" 1
-  (eq_attr "type" "alu,add,sub,mov,and,or,xor,neg,not")
-  "cpu")
-
-(define_insn_reservation "cond_alu" 1
-  (eq_attr "type" "cond_alu")
-  "cpu")
-
-(define_insn_reservation "st" 1
-  (eq_attr "type" "st,stwm,push")
-  "cpu")
-  
-(define_insn_reservation "custom" 1
-  (eq_attr "type" "custom")
-  "cpu")
-
-; shifts, muls and lds have three cycle latency
-(define_insn_reservation "ld" 3
-  (eq_attr "type" "ld,ldwm")
-  "cpu")
-
-(define_insn_reservation "shift" 3
-  (eq_attr "type" "sll,srl,sra,rol,ror")
-  "cpu")
-
-(define_insn_reservation "mul" 3
-  (eq_attr "type" "mul")
-  "cpu")
-
-(define_insn_reservation "div" 1
-  (eq_attr "type" "div")
-  "cpu")
-
-(include "predicates.md")
-(include "constraints.md")
-
-
-;; Move instructions
-
-(define_mode_iterator M [QI HI SI])
-
-(define_expand "mov<mode>"
-  [(set (match_operand:M 0 "nonimmediate_operand" "")
-        (match_operand:M 1 "general_operand" ""))]
-  ""
-{
-  if (nios2_emit_move_sequence (operands, <MODE>mode))
-    DONE;
-})
-
-(define_insn "*high"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-        (high:SI (match_operand:SI 1 "immediate_operand" "i")))]
-  ""
-  "movhi\\t%0, %H1"
-  [(set_attr "type" "alu")])
-
-(define_insn "*lo_sum"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-        (lo_sum:SI (match_operand:SI 1 "register_operand"  "r")
-                   (match_operand:SI 2 "immediate_operand" "i")))]
-  ""
-  "addi\\t%0, %1, %L2"
-  [(set_attr "type" "alu")])
-
-(define_insn_and_split "movqi_internal"
-  [(set (match_operand:QI 0 "nonimmediate_operand" "=m, r,r")
-        (match_operand:QI 1 "general_operand"       "rM,m,rI"))]
-  "(register_operand (operands[0], QImode)
-    || reg_or_0_operand (operands[1], QImode))"
-  {
-    switch (which_alternative)
-      {
-      case 0:
-	if (get_attr_length (insn) != 2)
-	  return "stb%o0\\t%z1, %0";
-	else if (const_0_operand (operands[1], QImode))
-	  return "stbz.n\\t%z1, %0";
-	else
-	  return "stb.n\\t%z1, %0";
-      case 1:
-	return "ldbu%o1%.\\t%0, %1";
-      case 2:
-	return "mov%i1%.\\t%0, %z1";
-      default:
-	gcc_unreachable ();
-      }
-  }
-  "(nios2_large_constant_memory_operand_p (operands[0]) 
-   || nios2_large_constant_memory_operand_p (operands[1]))"
-  [(set (match_dup 0) (match_dup 1))]
-  {
-    if (nios2_large_constant_memory_operand_p (operands[0]))
-      operands[0] = nios2_split_large_constant_memory_operand (operands[0]);
-    else
-      operands[1] = nios2_split_large_constant_memory_operand (operands[1]);
-  }
-  [(set_attr "type" "st,ld,mov")])
-
-(define_insn_and_split "movhi_internal"
-  [(set (match_operand:HI 0 "nonimmediate_operand" "=m, r,r")
-        (match_operand:HI 1 "general_operand"       "rM,m,rI"))]
-  "(register_operand (operands[0], HImode)
-    || reg_or_0_operand (operands[1], HImode))"
-  {
-    switch (which_alternative)
-      {
-      case 0:
-        return "sth%o0%.\\t%z1, %0";
-      case 1:
-        return "ldhu%o1%.\\t%0, %1";
-      case 2:
-        return "mov%i1%.\\t%0, %z1";
-      default:
-	gcc_unreachable ();
-      }
-  }
-  "(nios2_large_constant_memory_operand_p (operands[0]) 
-   || nios2_large_constant_memory_operand_p (operands[1]))"
-  [(set (match_dup 0) (match_dup 1))]
-  {
-    if (nios2_large_constant_memory_operand_p (operands[0]))
-      operands[0] = nios2_split_large_constant_memory_operand (operands[0]);
-    else
-      operands[1] = nios2_split_large_constant_memory_operand (operands[1]);
-  }
-  [(set_attr "type" "st,ld,mov")])
-
-(define_insn_and_split "movsi_internal"
-  [(set (match_operand:SI 0 "nonimmediate_operand" "=m, r,r,   r")
-        (match_operand:SI 1 "general_operand"       "rM,m,rIJK,S"))]
-  "(register_operand (operands[0], SImode)
-    || reg_or_0_operand (operands[1], SImode))"
-  {
-    switch (which_alternative)
-      {
-      case 0:
-	if (get_attr_length (insn) != 2)
-	  return "stw%o0\\t%z1, %0";
-	else if (stack_memory_operand (operands[0], SImode))
-	  return "stwsp.n\\t%z1, %0";
-	else if (const_0_operand (operands[1], SImode))
-	  return "stwz.n\\t%z1, %0";
-	else
-	  return "stw.n\\t%z1, %0";
-      case 1:
-	if (get_attr_length (insn) != 2)
-	  return "ldw%o1\\t%0, %1";
-	else if (stack_memory_operand (operands[1], SImode))
-	  return "ldwsp.n\\t%0, %1";
-	else
-	  return "ldw.n\\t%0, %1";
-      case 2:
-	return "mov%i1%.\\t%0, %z1";
-      case 3:
-	return "addi\\t%0, gp, %%gprel(%1)";
-      default:
-	gcc_unreachable ();
-      }
-  }
-  "(nios2_large_constant_memory_operand_p (operands[0]) 
-    || nios2_large_constant_memory_operand_p (operands[1])
-    || (nios2_large_constant_p (operands[1])
-        && !(CONST_INT_P (operands[1])
-	     && (SMALL_INT_UNSIGNED (INTVAL (operands[1]))
-	     	 || UPPER16_INT (INTVAL (operands[1]))))))"
-  [(set (match_dup 0) (match_dup 1))]
-  {
-    if (nios2_large_constant_memory_operand_p (operands[0]))
-      operands[0] = nios2_split_large_constant_memory_operand (operands[0]);
-    else if (nios2_large_constant_memory_operand_p (operands[1]))
-      operands[1] = nios2_split_large_constant_memory_operand (operands[1]);
-    else
-      operands[1] = nios2_split_large_constant (operands[1], operands[0]);
-  }
-  [(set_attr "type" "st,ld,mov,alu")])
-
-(define_mode_iterator BH [QI HI])
-(define_mode_iterator BHW [QI HI SI])
-(define_mode_attr bh [(QI "b") (HI "h")])
-(define_mode_attr bhw [(QI "b") (HI "h") (SI "w")])
-(define_mode_attr bhw_uns [(QI "bu") (HI "hu") (SI "w")])
-
-(define_insn_and_split "ld<bhw_uns>io"
-  [(set (match_operand:BHW 0 "register_operand" "=r")
-        (unspec_volatile:BHW
-          [(match_operand:BHW 1 "ldstio_memory_operand" "w")] UNSPECV_LDXIO))]
-  ""
-  "ld<bhw_uns>io\\t%0, %1"
-  "nios2_large_constant_memory_operand_p (operands[1])"
-  [(set (match_dup 0) 
-        (unspec_volatile:BHW [(match_dup 1)] UNSPECV_LDXIO))]
-  {
-    operands[1] = nios2_split_large_constant_memory_operand (operands[1]);
-  }
-  [(set_attr "type" "ld")])
-
-(define_expand "ld<bh>io"
-  [(set (match_operand:BH 0 "register_operand" "=r")
-        (match_operand:BH 1 "ldstio_memory_operand" "w"))]
-  ""
-{
-  rtx tmp = gen_reg_rtx (SImode);
-  emit_insn (gen_ld<bh>io_signed (tmp, operands[1]));
-  emit_insn (gen_mov<mode> (operands[0], gen_lowpart (<MODE>mode, tmp)));
-  DONE;
-})
-
-(define_insn_and_split "ld<bh>io_signed"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-        (sign_extend:SI
-          (unspec_volatile:BH
-            [(match_operand:BH 1 "ldstio_memory_operand" "w")] UNSPECV_LDXIO)))]
-  ""
-  "ld<bh>io\\t%0, %1"
-  "nios2_large_constant_memory_operand_p (operands[1])"
-  [(set (match_dup 0) 
-        (sign_extend:SI (unspec_volatile:BH [(match_dup 1)] UNSPECV_LDXIO)))]
-  {
-    operands[1] = nios2_split_large_constant_memory_operand (operands[1]);
-  }
-  [(set_attr "type" "ld")])
-
-(define_insn_and_split "st<bhw>io"
-  [(set (match_operand:BHW 0 "ldstio_memory_operand" "=w")
-        (unspec_volatile:BHW
-          [(match_operand:BHW 1 "reg_or_0_operand" "rM")] UNSPECV_STXIO))]
-  ""
-  "st<bhw>io\\t%z1, %0"
-  "nios2_large_constant_memory_operand_p (operands[0])"
-  [(set (match_dup 0) (unspec_volatile:BHW [(match_dup 1)] UNSPECV_STXIO))]
-  {
-    operands[0] = nios2_split_large_constant_memory_operand (operands[0]);
-  }
-  [(set_attr "type" "st")])
-
-
-;; QI to [HI, SI] extension patterns are collected together
-(define_mode_iterator QX [HI SI])
-
-;; Zero extension patterns
-(define_insn_and_split "zero_extendhisi2"
-  [(set (match_operand:SI 0 "register_operand" "=r,r")
-        (zero_extend:SI (match_operand:HI 1 "nonimmediate_operand" "r,m")))]
-  ""
-  "@
-    andi%.\\t%0, %1, 0xffff
-    ldhu%o1%.\\t%0, %1"
-  "nios2_large_constant_memory_operand_p (operands[1])"
-  [(set (match_dup 0) (zero_extend:SI (match_dup 1)))]
-  {
-    operands[1] = nios2_split_large_constant_memory_operand (operands[1]);
-  }
-  [(set_attr "type"     "and,ld")])
-
-(define_insn_and_split "zero_extendqi<mode>2"
-  [(set (match_operand:QX 0 "register_operand" "=r,r")
-        (zero_extend:QX (match_operand:QI 1 "nonimmediate_operand" "r,m")))]
-  ""
-  "@
-    andi%.\\t%0, %1, 0xff
-    ldbu%o1%.\\t%0, %1"
-  "nios2_large_constant_memory_operand_p (operands[1])"
-  [(set (match_dup 0) (zero_extend:QX (match_dup 1)))]
-  {
-    operands[1] = nios2_split_large_constant_memory_operand (operands[1]);
-  }
-  [(set_attr "type"     "and,ld")])
-
-;; Sign extension patterns
-
-(define_insn_and_split "extendhisi2"
-  [(set (match_operand:SI 0 "register_operand"                     "=r,r")
-        (sign_extend:SI (match_operand:HI 1 "nonimmediate_operand"  "r,m")))]
-  ""
-  "@
-   #
-   ldh%o1%.\\t%0, %1"
-  "nios2_large_constant_memory_operand_p (operands[1])"
-  [(set (match_dup 0) (sign_extend:SI (match_dup 1)))]
-  {
-    operands[1] = nios2_split_large_constant_memory_operand (operands[1]);
-  }
-  [(set_attr "type" "alu,ld")])
-
-(define_insn_and_split "extendqi<mode>2"
-  [(set (match_operand:QX 0 "register_operand"                     "=r,r")
-        (sign_extend:QX (match_operand:QI 1 "nonimmediate_operand"  "r,m")))]
-  ""
-  "@
-   #
-   ldb%o1%.\\t%0, %1"
-  "nios2_large_constant_memory_operand_p (operands[1])"
-  [(set (match_dup 0) (sign_extend:QX (match_dup 1)))]
-  {
-    operands[1] = nios2_split_large_constant_memory_operand (operands[1]);
-  }
-  [(set_attr "type" "alu,ld")])
-
-;; Split patterns for register alternative cases.
-(define_split
-  [(set (match_operand:SI 0 "register_operand" "")
-        (sign_extend:SI (match_operand:HI 1 "register_operand" "")))]
-  "reload_completed"
-  [(set (match_dup 0)
-        (and:SI (match_dup 1) (const_int 65535)))
-   (set (match_dup 0)
-        (xor:SI (match_dup 0) (const_int 32768)))
-   (set (match_dup 0)
-        (plus:SI (match_dup 0) (const_int -32768)))]
-  "operands[1] = gen_lowpart (SImode, operands[1]);")
-
-(define_split
-  [(set (match_operand:QX 0 "register_operand" "")
-        (sign_extend:QX (match_operand:QI 1 "register_operand" "")))]
-  "reload_completed"
-  [(set (match_dup 0)
-        (and:SI (match_dup 1) (const_int 255)))
-   (set (match_dup 0)
-        (xor:SI (match_dup 0) (const_int 128)))
-   (set (match_dup 0)
-        (plus:SI (match_dup 0) (const_int -128)))]
-  "operands[0] = gen_lowpart (SImode, operands[0]);
-   operands[1] = gen_lowpart (SImode, operands[1]);")
-
-
-;; Arithmetic Operations
-
-(define_insn "addsi3"
-  [(set (match_operand:SI 0 "register_operand"            "=r")
-        (plus:SI (match_operand:SI 1 "register_operand"   "%r")
-                 (match_operand:SI 2 "add_regimm_operand" "rIT")))]
-  ""
-{
-  return nios2_add_insn_asm (insn, operands);
-}
-  [(set_attr "type" "add")])
-
-(define_insn "subsi3"
-  [(set (match_operand:SI 0 "register_operand"           "=r")
-        (minus:SI (match_operand:SI 1 "reg_or_0_operand" "rM")
-                  (match_operand:SI 2 "register_operand" "r")))]
-  ""
-  "sub%.\\t%0, %z1, %2"
-  [(set_attr "type" "sub")])
-
-(define_insn "mulsi3"
-  [(set (match_operand:SI 0 "register_operand"          "=r")
-        (mult:SI (match_operand:SI 1 "register_operand" "%r")
-                 (match_operand:SI 2 "arith_operand"    "rI")))]
-  "TARGET_HAS_MUL"
-  "mul%i2\\t%0, %1, %z2"
-  [(set_attr "type" "mul")])
-
-(define_expand "divsi3"
-  [(set (match_operand:SI 0 "register_operand"          "=r")
-        (div:SI (match_operand:SI 1 "register_operand"   "r")
-                (match_operand:SI 2 "register_operand"   "r")))]
-  ""
-{
-  if (!TARGET_HAS_DIV)
-    {
-      if (TARGET_FAST_SW_DIV)
-        {
-          nios2_emit_expensive_div (operands, SImode);
-          DONE;
-        }
-      else
-        FAIL;
-    }
-})
-
-(define_insn "divsi3_insn"
-  [(set (match_operand:SI 0 "register_operand"            "=r")
-        (div:SI (match_operand:SI 1 "register_operand"     "r")
-                (match_operand:SI 2 "register_operand"     "r")))]
-  "TARGET_HAS_DIV"
-  "div\\t%0, %1, %2"
-  [(set_attr "type" "div")])
-
-(define_insn "udivsi3"
-  [(set (match_operand:SI 0 "register_operand"            "=r")
-        (udiv:SI (match_operand:SI 1 "register_operand"    "r")
-                 (match_operand:SI 2 "register_operand"    "r")))]
-  "TARGET_HAS_DIV"
-  "divu\\t%0, %1, %2"
-  [(set_attr "type" "div")])
-
-(define_code_iterator EXTEND [sign_extend zero_extend])
-(define_code_attr us [(sign_extend "s") (zero_extend "u")])
-(define_code_attr mul [(sign_extend "mul") (zero_extend "umul")])
-
-(define_insn "<us>mulsi3_highpart"
-  [(set (match_operand:SI 0 "register_operand"                       "=r")
-        (truncate:SI
-         (lshiftrt:DI
-          (mult:DI (EXTEND:DI (match_operand:SI 1 "register_operand"  "r"))
-                   (EXTEND:DI (match_operand:SI 2 "register_operand"  "r")))
-          (const_int 32))))]
-  "TARGET_HAS_MULX"
-  "mulx<us><us>\\t%0, %1, %2"
-  [(set_attr "type" "mul")])
-
-(define_expand "<mul>sidi3"
-  [(set (match_operand:DI 0 "register_operand" "")
-	(mult:DI (EXTEND:DI (match_operand:SI 1 "register_operand" ""))
-		 (EXTEND:DI (match_operand:SI 2 "register_operand" ""))))]
-  "TARGET_HAS_MULX"
-{
-  rtx hi = gen_reg_rtx (SImode);
-  rtx lo = gen_reg_rtx (SImode);
-
-  emit_insn (gen_<us>mulsi3_highpart (hi, operands[1], operands[2]));
-  emit_insn (gen_mulsi3 (lo, operands[1], operands[2]));
-  emit_move_insn (gen_lowpart (SImode, operands[0]), lo);
-  emit_move_insn (gen_highpart (SImode, operands[0]), hi);
-  DONE;
-})
-
-
-;;  Negate and ones complement
-
-(define_insn "negsi2"
-  [(set (match_operand:SI 0 "register_operand"        "=r")
-        (neg:SI (match_operand:SI 1 "register_operand" "r")))]
-  ""
-{
-  if (get_attr_length (insn) == 2)
-    return "neg.n\\t%0, %1";
-  else
-    return "sub\\t%0, zero, %1";
-}
-  [(set_attr "type" "neg")])
-
-(define_insn "one_cmplsi2"
-  [(set (match_operand:SI 0 "register_operand"        "=r")
-        (not:SI (match_operand:SI 1 "register_operand" "r")))]
-  ""
-{
-  if (get_attr_length (insn) == 2)
-    return "not.n\\t%0, %1";
-  else
-    return "nor\\t%0, zero, %1";
-}
-  [(set_attr "type" "not")])
-
-
-;;  Integer logical Operations
-
-(define_insn "andsi3"
-  [(set (match_operand:SI 0 "register_operand"          "=r")
-        (and:SI (match_operand:SI 1 "register_operand"  "%r")
-                (match_operand:SI 2 "and_operand"     "rJKP")))]
-  ""
-  "and%x2%.\\t%0, %1, %y2"
-  [(set_attr "type" "and")])
-
-(define_code_iterator LOGICAL [ior xor])
-(define_code_attr logical_asm [(ior "or") (xor "xor")])
-
-(define_insn "<code>si3"
-  [(set (match_operand:SI 0 "register_operand"             "=r")
-        (LOGICAL:SI (match_operand:SI 1 "register_operand" "%r")
-                    (match_operand:SI 2 "logical_operand" "rJK")))]
-  ""
-  "<logical_asm>%x2%.\\t%0, %1, %y2"
-  [(set_attr "type" "<logical_asm>")])
-
-(define_insn "*norsi3"
-  [(set (match_operand:SI 0 "register_operand"                 "=r")
-        (and:SI (not:SI (match_operand:SI 1 "register_operand" "%r"))
-                (not:SI (match_operand:SI 2 "register_operand"  "r"))))]
-  ""
-  "nor\\t%0, %1, %2"
-  [(set_attr "type" "alu")])
-
-
-;;  Shift instructions
-
-(define_code_iterator SHIFT  [ashift ashiftrt lshiftrt rotate])
-(define_code_attr shift_op   [(ashift "ashl") (ashiftrt "ashr")
-                              (lshiftrt "lshr") (rotate "rotl")])
-(define_code_attr shift_asm  [(ashift "sll") (ashiftrt "sra")
-                              (lshiftrt "srl") (rotate "rol")])
-
-(define_insn "<shift_op>si3"
-  [(set (match_operand:SI 0 "register_operand"          "=r")
-        (SHIFT:SI (match_operand:SI 1 "register_operand" "r")
-                  (match_operand:SI 2 "shift_operand"    "rL")))]
-  ""
-  "<shift_asm>%i2%.\\t%0, %1, %z2"
-  [(set_attr "type" "<shift_asm>")])
-
-(define_insn "rotrsi3"
-  [(set (match_operand:SI 0 "register_operand"             "=r")
-        (rotatert:SI (match_operand:SI 1 "register_operand" "r")
-                     (match_operand:SI 2 "register_operand" "r")))]
-  ""
-  "ror\\t%0, %1, %2"
-  [(set_attr "type" "ror")])
-
-;; Nios II R2 Bit Manipulation Extension (BMX), provides
-;; bit merge/insertion/extraction instructions.
-
-(define_insn "*merge"
-  [(set (zero_extract:SI (match_operand:SI 0 "register_operand"   "+r")
-			 (match_operand:SI 1 "const_shift_operand" "L")
-			 (match_operand:SI 2 "const_shift_operand" "L"))
-        (zero_extract:SI (match_operand:SI 3 "register_operand"    "r")
-                         (match_dup 1) (match_dup 2)))]
-  "TARGET_HAS_BMX"
-{
-  operands[4] = GEN_INT (INTVAL (operands[1]) + INTVAL (operands[2]) - 1);
-  return "merge\\t%0, %3, %4, %2";
-}
-  [(set_attr "type" "alu")])
-
-(define_insn "extzv"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-        (zero_extract:SI (match_operand:SI 1 "register_operand"    "r")
-                         (match_operand:SI 2 "const_shift_operand" "L")
-                         (match_operand:SI 3 "const_shift_operand" "L")))]
-  "TARGET_HAS_BMX"
-{
-  operands[4] = GEN_INT (INTVAL (operands[2]) + INTVAL (operands[3]) - 1);
-  return "extract\\t%0, %1, %4, %3";
-}
-  [(set_attr "type" "alu")])
-
-(define_insn "insv"
-  [(set (zero_extract:SI (match_operand:SI 0 "register_operand"   "+r")
-			 (match_operand:SI 1 "const_shift_operand" "L")
-			 (match_operand:SI 2 "const_shift_operand" "L"))
-	(match_operand:SI 3 "reg_or_0_operand" "rM"))]
-  "TARGET_HAS_BMX"
-{
-  operands[4] = GEN_INT (INTVAL (operands[1]) + INTVAL (operands[2]) - 1);
-  return "insert\\t%0, %z3, %4, %2";
-}
-  [(set_attr "type" "alu")])
-
-
-
-;; Floating point instructions
-
-;; Mode iterator for single/double float
-(define_mode_iterator F [SF DF])
-(define_mode_attr f [(SF "s") (DF "d")])
-
-;; Basic arithmetic instructions
-(define_code_iterator FOP3 [plus minus mult div])
-(define_code_attr fop3 [(plus "add") (minus "sub") (mult "mul") (div "div")])
-
-(define_insn "<fop3><mode>3"
-  [(set (match_operand:F 0 "register_operand"        "=r")
-        (FOP3:F (match_operand:F 1 "register_operand" "r")
-                (match_operand:F 2 "register_operand" "r")))]
-  "nios2_fpu_insn_enabled (n2fpu_f<fop3><f>)"
-  { return nios2_fpu_insn_asm (n2fpu_f<fop3><f>); }
-  [(set_attr "type" "custom")])
-
-;; Floating point min/max operations
-(define_code_iterator SMINMAX [smin smax])
-(define_code_attr minmax [(smin "min") (smax "max")])
-(define_insn "<code><mode>3"
-  [(set (match_operand:F 0 "register_operand" "=r")
-        (SMINMAX:F (match_operand:F 1 "register_operand" "r")
-                   (match_operand:F 2 "register_operand" "r")))]
-  "nios2_fpu_insn_enabled (n2fpu_f<minmax><f>)"
-  { return nios2_fpu_insn_asm (n2fpu_f<minmax><f>); }
-  [(set_attr "type" "custom")])
-
-;; These 2-operand FP operations can be collected together
-(define_code_iterator FOP2 [abs neg sqrt])
-(define_insn "<code><mode>2"
-  [(set (match_operand:F 0 "register_operand" "=r")
-        (FOP2:F (match_operand:F 1 "register_operand" "r")))]
-  "nios2_fpu_insn_enabled (n2fpu_f<code><f>)"
-  { return nios2_fpu_insn_asm (n2fpu_f<code><f>); }
-  [(set_attr "type" "custom")])
-
-;; X, Y register access instructions
-(define_insn "nios2_fwrx"
-  [(unspec_volatile [(match_operand:DF 0 "register_operand" "r")] UNSPECV_FWRX)]
-  "nios2_fpu_insn_enabled (n2fpu_fwrx)"
-  { return nios2_fpu_insn_asm (n2fpu_fwrx); }
-  [(set_attr "type" "custom")])
-
-(define_insn "nios2_fwry"
-  [(unspec_volatile [(match_operand:SF 0 "register_operand" "r")] UNSPECV_FWRY)]
-  "nios2_fpu_insn_enabled (n2fpu_fwry)"
-  { return nios2_fpu_insn_asm (n2fpu_fwry); }
-  [(set_attr "type" "custom")])
-
-;; The X, Y read insns uses an int iterator
-(define_int_iterator UNSPEC_READ_XY [UNSPECV_FRDXLO UNSPECV_FRDXHI
-                                     UNSPECV_FRDY])
-(define_int_attr read_xy [(UNSPECV_FRDXLO "frdxlo") (UNSPECV_FRDXHI "frdxhi")
-                          (UNSPECV_FRDY "frdy")])
-(define_insn "nios2_<read_xy>"
-  [(set (match_operand:SF 0 "register_operand" "=r")
-        (unspec_volatile:SF [(const_int 0)] UNSPEC_READ_XY))]
-  "nios2_fpu_insn_enabled (n2fpu_<read_xy>)"
-  { return nios2_fpu_insn_asm (n2fpu_<read_xy>); }
-  [(set_attr "type" "custom")])
-
-;; Various math functions
-(define_int_iterator MATHFUNC
-  [UNSPEC_FCOS UNSPEC_FSIN UNSPEC_FTAN UNSPEC_FATAN UNSPEC_FEXP UNSPEC_FLOG])
-(define_int_attr mathfunc [(UNSPEC_FCOS "cos") (UNSPEC_FSIN "sin")
-                           (UNSPEC_FTAN "tan") (UNSPEC_FATAN "atan")
-                           (UNSPEC_FEXP "exp") (UNSPEC_FLOG "log")])
-
-(define_insn "<mathfunc><mode>2"
-  [(set (match_operand:F 0 "register_operand" "=r")
-        (unspec:F [(match_operand:F 1 "register_operand" "r")] MATHFUNC))]
-  "nios2_fpu_insn_enabled (n2fpu_f<mathfunc><f>)"
-  { return nios2_fpu_insn_asm (n2fpu_f<mathfunc><f>); }
-  [(set_attr "type" "custom")])
-
-;; Converting between floating point and fixed point
-
-(define_code_iterator FLOAT [float unsigned_float])
-(define_code_iterator FIX [fix unsigned_fix])
-
-(define_code_attr conv_op [(float "float") (unsigned_float "floatuns")
-                           (fix "fix") (unsigned_fix "fixuns")])
-(define_code_attr i [(float "i") (unsigned_float "u")
-                     (fix "i") (unsigned_fix "u")])
-
-;; Integer to float conversions
-(define_insn "<conv_op>si<mode>2"
-  [(set (match_operand:F 0 "register_operand" "=r")
-        (FLOAT:F (match_operand:SI 1 "register_operand" "r")))]
-  "nios2_fpu_insn_enabled (n2fpu_float<i><f>)"
-  { return nios2_fpu_insn_asm (n2fpu_float<i><f>); }
-  [(set_attr "type" "custom")])
-
-;; Float to integer conversions
-(define_insn "<conv_op>_trunc<mode>si2"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-        (FIX:SI (match_operand:F 1 "general_operand" "r")))]
-  "nios2_fpu_insn_enabled (n2fpu_fix<f><i>)"
-  { return nios2_fpu_insn_asm (n2fpu_fix<f><i>); }
-  [(set_attr "type" "custom")])
-
-(define_insn "lroundsfsi2"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-        (unspec:SI [(match_operand:SF 1 "general_operand" "r")] UNSPEC_ROUND))]
-  "nios2_fpu_insn_enabled (n2fpu_round)"
-  { return nios2_fpu_insn_asm (n2fpu_round); }
-  [(set_attr "type" "custom")])
-
-(define_insn "extendsfdf2"
-  [(set (match_operand:DF 0 "register_operand" "=r")
-        (float_extend:DF (match_operand:SF 1 "general_operand" "r")))]
-  "nios2_fpu_insn_enabled (n2fpu_fextsd)"
-  { return nios2_fpu_insn_asm (n2fpu_fextsd); }
-  [(set_attr "type" "custom")])
-
-(define_insn "truncdfsf2"
-  [(set (match_operand:SF 0 "register_operand" "=r")
-        (float_truncate:SF (match_operand:DF 1 "general_operand" "r")))]
-  "nios2_fpu_insn_enabled (n2fpu_ftruncds)"
-  { return nios2_fpu_insn_asm (n2fpu_ftruncds); }
-  [(set_attr "type" "custom")])
-
-
-
-;; Prologue, Epilogue and Return
-
-(define_expand "prologue"
-  [(const_int 1)]
-  ""
-{
-  nios2_expand_prologue ();
-  DONE;
-})
-
-(define_expand "epilogue"
-  [(return)]
-  ""
-{
-  nios2_expand_epilogue (false);
-  DONE;
-})
-
-(define_expand "sibcall_epilogue"
-  [(return)]
-  ""
-{
-  nios2_expand_epilogue (true);
-  DONE;
-})
-
-(define_expand "return"
-  [(simple_return)]
-  "nios2_can_use_return_insn ()"
-{
-  if (nios2_expand_return ())
-    DONE;
-})
-
-(define_insn "simple_return"
-  [(simple_return)]
-  ""
-  "ret%."
-  [(set_attr "type" "control")])
-
-;; Block any insns from being moved before this point, since the
-;; profiling call to mcount can use various registers that aren't
-;; saved or used to pass arguments.
-
-(define_insn "blockage"
-  [(unspec_volatile [(const_int 0)] UNSPECV_BLOCKAGE)]
-  ""
-  ""
-  [(set_attr "type" "unknown")
-   (set_attr "length" "0")])
-
-;; This is used in compiling the unwind routines.
-(define_expand "eh_return"
-  [(use (match_operand 0 "general_operand"))]
-  ""
-{
-  if (GET_MODE (operands[0]) != Pmode)
-    operands[0] = convert_to_mode (Pmode, operands[0], 0);
-  emit_insn (gen_eh_set_ra (operands[0]));
-  DONE;
-})
-
-;; Modify the return address for EH return.  We can't expand this
-;; until we know where it will be put in the stack frame.
-
-(define_insn_and_split "eh_set_ra"
-  [(unspec [(match_operand:SI 0 "register_operand" "r")] UNSPEC_EH_RETURN)
-   (clobber (match_scratch:SI 1 "=&r"))]
-  ""
-  "#"
-  "reload_completed"
-  [(const_int 0)]
-{
-  nios2_set_return_address (operands[0], operands[1]);
-  DONE;
-})
-
-
-;;  Jumps and calls
-
-; Note that the assembler fixes up any out-of-range branch instructions not
-; caught by the compiler branch shortening code.  The sequence emitted by
-; the assembler can be very inefficient, but it is correct for PIC code.
-; For non-PIC we are better off converting to an absolute JMPI.
-;
-; Direct calls and sibcalls use the CALL and JMPI instructions, respectively.
-; These instructions have an immediate operand that specifies the low 28 bits
-; of the PC, effectively allowing direct calls within a 256MB memory segment.
-; Per the Nios II Processor Reference Handbook, the linker is not required to
-; check or adjust for overflow.
-
-(define_insn "indirect_jump"
-  [(set (pc) (match_operand:SI 0 "register_operand" "c"))]
-  ""
-  "jmp%!\\t%0"
-  [(set_attr "type" "control")])
-
-(define_insn "jump"
-  [(set (pc)
-        (label_ref (match_operand 0 "" "")))]
-  ""
-  {
-    if (get_attr_length (insn) == 2)
-      return "br.n\\t%0";
-    else if (get_attr_length (insn) == 4)
-      return "br\\t%0";
-    else
-      return "jmpi\\t%0";
-  }
-  [(set_attr "type" "control")
-   (set (attr "length") 
-        (if_then_else
-	    (and (match_test "TARGET_HAS_CDX")
-	         (and (ge (minus (match_dup 0) (pc)) (const_int -1022))
-	              (le (minus (match_dup 0) (pc)) (const_int 1022))))
-	    (const_int 2)
-	    (if_then_else
-	        (ior (match_test "flag_pic")
-	             (and (ge (minus (match_dup 0) (pc)) (const_int -32764))
-	                  (le (minus (match_dup 0) (pc)) (const_int 32764))))
-	        (const_int 4)
-	        (const_int 8))))])
-
-(define_expand "call"
-  [(parallel [(call (match_operand 0 "" "")
-                    (match_operand 1 "" ""))
-              (clobber (reg:SI RA_REGNO))])]
-  ""
-  "nios2_adjust_call_address (&operands[0], NULL_RTX);")
-
-(define_expand "call_value"
-  [(parallel [(set (match_operand 0 "" "")
-                   (call (match_operand 1 "" "")
-                         (match_operand 2 "" "")))
-              (clobber (reg:SI RA_REGNO))])]
-  ""
-  "nios2_adjust_call_address (&operands[1], NULL_RTX);")
-
-(define_insn "*call"
-  [(call (mem:QI (match_operand:SI 0 "call_operand" "i,r"))
-         (match_operand 1 "" ""))
-   (clobber (reg:SI RA_REGNO))]
-  ""
-  "@
-   call\\t%0
-   callr%.\\t%0"
-  [(set_attr "type" "control")])
-
-(define_insn "*call_value"
-  [(set (match_operand 0 "" "")
-        (call (mem:QI (match_operand:SI 1 "call_operand" "i,r"))
-              (match_operand 2 "" "")))
-   (clobber (reg:SI RA_REGNO))]
-  ""
-  "@
-   call\\t%1
-   callr%.\\t%1"
-  [(set_attr "type" "control")])
-
-(define_expand "sibcall"
-  [(parallel [(call (match_operand 0 "" "")
-                    (match_operand 1 "" ""))
-              (return)])]
-  ""
-  "nios2_adjust_call_address (&operands[0], NULL_RTX);")
-
-(define_expand "sibcall_value"
-  [(parallel [(set (match_operand 0 "" "")
-                   (call (match_operand 1 "" "")
-                         (match_operand 2 "" "")))
-              (return)])]
-  ""
-  "nios2_adjust_call_address (&operands[1], NULL_RTX);")
-
-(define_insn "sibcall_internal"
- [(call (mem:QI (match_operand:SI 0 "call_operand" "i,j"))
-        (match_operand 1 "" ""))
-  (return)]
-  ""
-  "@
-   jmpi\\t%0
-   jmp%!\\t%0"
-  [(set_attr "type" "control")])
-
-(define_insn "sibcall_value_internal"
- [(set (match_operand 0 "register_operand" "")
-       (call (mem:QI (match_operand:SI 1 "call_operand" "i,j"))
-             (match_operand 2 "" "")))
-  (return)]
-  ""
-  "@
-   jmpi\\t%1
-   jmp%!\\t%1"
-  [(set_attr "type" "control")])
-
-(define_expand "tablejump"
-  [(parallel [(set (pc) (match_operand 0 "register_operand" "r"))
-              (use (label_ref (match_operand 1 "" "")))])]
-  ""
-{
-  if (flag_pic)
-    {
-      /* Hopefully, CSE will eliminate this copy.  */
-      rtx reg1 = copy_addr_to_reg (gen_rtx_LABEL_REF (Pmode, operands[1]));
-      rtx reg2 = gen_reg_rtx (SImode);
-
-      emit_insn (gen_addsi3 (reg2, operands[0], reg1));
-      operands[0] = reg2;
-    }
-})
-
-(define_insn "*tablejump"
-  [(set (pc)
-        (match_operand:SI 0 "register_operand" "c"))
-   (use (label_ref (match_operand 1 "" "")))]
-  ""
-  "jmp%!\\t%0"
-  [(set_attr "type" "control")])
-
-
-;; cstore, cbranch patterns
-
-(define_mode_iterator CM [SI SF DF])
-
-(define_expand "cstore<mode>4"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-        (match_operator:SI 1 "expandable_comparison_operator"
-	  [(match_operand:CM 2 "register_operand")
-	   (match_operand:CM 3 "nonmemory_operand")]))]
-  ""
-{
-  if (!nios2_validate_compare (<MODE>mode, &operands[1], &operands[2],
-                               &operands[3]))
-    FAIL;
-})
-
-(define_expand "cbranch<mode>4"
-  [(set (pc)
-     (if_then_else
-       (match_operator 0 "expandable_comparison_operator"
-         [(match_operand:CM 1 "register_operand")
-          (match_operand:CM 2 "nonmemory_operand")])
-       (label_ref (match_operand 3 ""))
-       (pc)))]
-  ""
-{
-  if (!nios2_validate_compare (<MODE>mode, &operands[0], &operands[1],
-                               &operands[2]))
-    FAIL;
-  if (GET_MODE_CLASS (<MODE>mode) == MODE_FLOAT
-      || !reg_or_0_operand (operands[2], <MODE>mode))
-    {
-      rtx condreg = gen_reg_rtx (SImode);
-      emit_insn (gen_cstore<mode>4
-                  (condreg, operands[0], operands[1], operands[2]));
-      operands[1] = condreg;
-      operands[2] = const0_rtx;
-      operands[0] = gen_rtx_fmt_ee (NE, VOIDmode, condreg, const0_rtx);
-    }
-})
-
-(define_insn "nios2_cbranch"
-  [(set (pc)
-     (if_then_else
-       (match_operator 0 "ordered_comparison_operator"
-         [(match_operand:SI 1 "reg_or_0_operand" "rM")
-          (match_operand:SI 2 "reg_or_0_operand" "rM")])
-       (label_ref (match_operand 3 "" ""))
-       (pc)))]
-  ""
-{
-  if (get_attr_length (insn) == 2)
-    return "b%0z.n\t%z1, %l3";
-  else if (get_attr_length (insn) == 4)
-    return "b%0\t%z1, %z2, %l3";
-  else if (get_attr_length (insn) == 6)
-    return "b%R0z.n\t%z1, .+6;jmpi\t%l3";
-  else
-    return "b%R0\t%z1, %z2, .+8;jmpi\t%l3";
-}
-  [(set_attr "type" "control")
-   (set (attr "length") 
-        (cond
-         [(and (match_test "nios2_cdx_narrow_form_p (insn)")
-               (ge (minus (match_dup 3) (pc)) (const_int -126))
-               (le (minus (match_dup 3) (pc)) (const_int 126)))
-          (const_int 2)
-          (ior (match_test "flag_pic")
-               (and (ge (minus (match_dup 3) (pc)) (const_int -32764))
-                    (le (minus (match_dup 3) (pc)) (const_int 32764))))
-          (const_int 4)
-          (match_test "nios2_cdx_narrow_form_p (insn)")
-          (const_int 6)]
-         (const_int 8)))])
-
-;; Floating point comparisons
-(define_code_iterator FCMP [eq ne gt ge le lt])
-(define_insn "nios2_s<code><mode>"
-  [(set (match_operand:SI 0 "register_operand"        "=r")
-        (FCMP:SI (match_operand:F 1 "register_operand" "r")
-                 (match_operand:F 2 "register_operand" "r")))]
-  "nios2_fpu_insn_enabled (n2fpu_fcmp<code><f>)"
-  { return nios2_fpu_insn_asm (n2fpu_fcmp<code><f>); }
-  [(set_attr "type" "custom")])
-
-;; Integer comparisons
-
-(define_code_iterator EQNE [eq ne])
-(define_insn "nios2_cmp<code>"
-  [(set (match_operand:SI 0 "register_operand"           "=r")
-        (EQNE:SI (match_operand:SI 1 "register_operand"  "%r")
-                 (match_operand:SI 2 "arith_operand"     "rI")))]
-  ""
-  "cmp<code>%i2\\t%0, %1, %z2"
-  [(set_attr "type" "alu")])
-
-(define_code_iterator SCMP [ge lt])
-(define_insn "nios2_cmp<code>"
-  [(set (match_operand:SI 0 "register_operand"           "=r")
-        (SCMP:SI (match_operand:SI 1 "reg_or_0_operand"  "rM")
-                 (match_operand:SI 2 "arith_operand"     "rI")))]
-  ""
-  "cmp<code>%i2\\t%0, %z1, %z2"
-  [(set_attr "type" "alu")])
-
-(define_code_iterator UCMP [geu ltu])
-(define_insn "nios2_cmp<code>"
-  [(set (match_operand:SI 0 "register_operand"           "=r")
-        (UCMP:SI (match_operand:SI 1 "reg_or_0_operand"  "rM")
-                 (match_operand:SI 2 "uns_arith_operand" "rJ")))]
-  ""
-  "cmp<code>%u2\\t%0, %z1, %z2"
-  [(set_attr "type" "alu")])
-
-
-
-;; Custom instruction patterns.  The operands are intentionally
-;; mode-less, to serve as generic carriers of all Altera defined
-;; built-in instruction/function types.
-
-(define_insn "custom_nxx"
-  [(unspec_volatile [(match_operand 0 "custom_insn_opcode" "N")
-                     (match_operand 1 "reg_or_0_operand"  "rM")
-                     (match_operand 2 "reg_or_0_operand"  "rM")]
-    UNSPECV_CUSTOM_NXX)]
-  ""
-  "custom\\t%0, zero, %z1, %z2"
-  [(set_attr "type" "custom")])
-
-(define_insn "custom_xnxx"
-  [(set (match_operand 0 "register_operand"   "=r")
-        (unspec_volatile [(match_operand 1 "custom_insn_opcode" "N")
-                          (match_operand 2 "reg_or_0_operand"  "rM")
-                          (match_operand 3 "reg_or_0_operand"  "rM")] 
-	 UNSPECV_CUSTOM_XNXX))]
-  ""
-  "custom\\t%1, %0, %z2, %z3"
-  [(set_attr "type" "custom")])
-
-
-;;  Misc. patterns
-
-(define_insn "nop"
-  [(const_int 0)]
-  ""
-  "nop%."
-  [(set_attr "type" "nop")])
-
-;; Connect 'sync' to 'memory_barrier' standard expand name
-(define_expand "memory_barrier"
-  [(const_int 0)]
-  ""
-{
-  emit_insn (gen_sync ());
-  DONE;
-})
-
-;; For the nios2 __builtin_sync built-in function
-(define_expand "sync"
-  [(set (match_dup 0)
-	(unspec:BLK [(match_dup 0)] UNSPEC_SYNC))]
-  ""
-{
-  operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
-  MEM_VOLATILE_P (operands[0]) = 1;
-})
-
-(define_insn "*sync_insn"
-  [(set (match_operand:BLK 0 "" "")
-	(unspec:BLK [(match_dup 0)] UNSPEC_SYNC))]
-  ""
-  "sync"
-  [(set_attr "type" "control")])
-
-(define_insn "rdctl"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-        (unspec_volatile:SI [(match_operand:SI 1 "rdwrctl_operand" "O")] 
-	 UNSPECV_RDCTL))]
-  ""
-  "rdctl\\t%0, ctl%1"
-  [(set_attr "type" "control")])
-
-(define_insn "wrctl"
-  [(unspec_volatile:SI [(match_operand:SI 0 "rdwrctl_operand"  "O")
-                        (match_operand:SI 1 "reg_or_0_operand" "rM")] 
-    UNSPECV_WRCTL)]
-  ""
-  "wrctl\\tctl%0, %z1"
-  [(set_attr "type" "control")])
-
-(define_insn "rdprs"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-        (unspec_volatile:SI [(match_operand:SI 1 "rdwrctl_operand" "O")
-                             (match_operand:SI 2 "arith_operand"   "U")]
-         UNSPECV_RDPRS))]
-  ""
-  "rdprs\\t%0, %1, %2"
-  [(set_attr "type" "control")])
-
-;; Cache Instructions
-
-(define_insn "flushd"
-  [(unspec_volatile:SI [(match_operand:SI 0 "ldstio_memory_operand" "w")]
-  		        UNSPECV_FLUSHD)]
-  ""
-  "flushd\\t%0"
-  [(set_attr "type" "control")])
-
-(define_insn "flushda"
-  [(unspec_volatile:SI [(match_operand:SI 0 "ldstio_memory_operand" "w")]
-  		        UNSPECV_FLUSHDA)]
-  ""
-  "flushda\\t%0"
-  [(set_attr "type" "control")])
-
-;; R2 Instructions
-
-(define_insn "wrpie"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-        (unspec_volatile:SI [(match_operand:SI 1 "register_operand" "r")]
-		 	     UNSPECV_WRPIE))]
-  "TARGET_ARCH_R2"
-  "wrpie\\t%0, %1"
-  [(set_attr "type" "control")])
-
-(define_insn "eni"
-  [(unspec:VOID [(match_operand 0 "const_int_operand" "i")]
-  		 UNSPECV_ENI)]
-  "TARGET_ARCH_R2"
-  "eni\\t%0"
-  [(set_attr "type" "control")])
-
-;; Trap patterns
-(define_insn "trap"
-  [(trap_if (const_int 1) (const_int 3))]
-  ""
-  "trap%.\\t3"
-  [(set_attr "type" "control")])
-
-(define_insn "ctrapsi4"
-  [(trap_if (match_operator 0 "ordered_comparison_operator"
-              [(match_operand:SI 1 "reg_or_0_operand" "rM")
-               (match_operand:SI 2 "reg_or_0_operand" "rM")])
-            (match_operand 3 "const_int_operand" "i"))]
-  ""
-{
-  if (get_attr_length (insn) == 6)
-    return "b%R0\\t%z1, %z2, 1f\;trap.n\\t%3\;1:";
-  else
-    return "b%R0\\t%z1, %z2, 1f\;trap\\t%3\;1:";
-}
-  [(set_attr "type" "control")
-   (set (attr "length")
-        (if_then_else (match_test "nios2_cdx_narrow_form_p (insn)")
-                      (const_int 6) (const_int 8)))])
-  
-;; Load the GOT register.
-(define_insn "load_got_register"
-  [(set (match_operand:SI 0 "register_operand" "=&r")
-	 (unspec:SI [(const_int 0)] UNSPEC_LOAD_GOT_REGISTER))
-   (set (match_operand:SI 1 "register_operand" "=r")
-	 (unspec:SI [(const_int 0)] UNSPEC_LOAD_GOT_REGISTER))]
-  ""
-  "nextpc\\t%0
-\\t1:
-\\tmovhi\\t%1, %%hiadj(_gp_got - 1b)
-\\taddi\\t%1, %1, %%lo(_gp_got - 1b)"
-  [(set_attr "length" "12")])
-
-;; Read thread pointer register
-(define_expand "get_thread_pointersi"
-  [(match_operand:SI 0 "register_operand" "=r")]
-  "TARGET_LINUX_ABI"
-{
-  emit_move_insn (operands[0], gen_rtx_REG (Pmode, TP_REGNO));
-  DONE;
-})
-
-;; Synchronization Primitives
-(include "sync.md")
-
-;; Include the ldwm/stwm/push.n/pop.n patterns and peepholes.
-(include "ldstwm.md")
-
diff --git a/gcc/config/nios2/nios2.opt b/gcc/config/nios2/nios2.opt
deleted file mode 100644
index 53df5b7..0000000
--- a/gcc/config/nios2/nios2.opt
+++ /dev/null
@@ -1,596 +0,0 @@
-; Options for the Altera Nios II port of the compiler.
-; Copyright (C) 2012-2024 Free Software Foundation, Inc.
-; Contributed by Altera and Mentor Graphics, Inc.
-;
-; This file is part of GCC.
-;
-; GCC is free software; you can redistribute it and/or modify
-; it under the terms of the GNU General Public License as published by
-; the Free Software Foundation; either version 3, or (at your option)
-; any later version.
-;
-; GCC is distributed in the hope that it will be useful,
-; but WITHOUT ANY WARRANTY; without even the implied warranty of
-; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-; GNU General Public License for more details.
-;
-; You should have received a copy of the GNU General Public License
-; along with GCC; see the file COPYING3.  If not see
-; <http://www.gnu.org/licenses/>.
-
-HeaderInclude
-config/nios2/nios2-opts.h
-
-TargetSave
-int saved_fpu_custom_code[n2fpu_code_num]
-
-TargetSave
-enum nios2_ccs_code saved_custom_code_status[256]
-
-TargetSave
-int saved_custom_code_index[256]
-
-mhw-div
-Target Mask(HAS_DIV)
-Enable DIV, DIVU.
-
-mhw-mul
-Target Mask(HAS_MUL)
-Enable MUL instructions.
-
-mhw-mulx
-Target Mask(HAS_MULX)
-Enable MULX instructions, assume fast shifter.
-
-mfast-sw-div
-Target Mask(FAST_SW_DIV)
-Use table based fast divide (default at -O3).
-
-mbypass-cache
-Target Mask(BYPASS_CACHE)
-All memory accesses use I/O load/store instructions.
-
-mno-cache-volatile
-Target RejectNegative Mask(BYPASS_CACHE_VOLATILE)
-Volatile memory accesses use I/O load/store instructions.
-
-mcache-volatile
-Target RejectNegative Undocumented InverseMask(BYPASS_CACHE_VOLATILE)
-Volatile memory accesses do not use I/O load/store instructions.
-
-mgpopt=
-Target RejectNegative Joined Enum(nios2_gpopt_type) Var(nios2_gpopt_option) Init(gpopt_unspecified)
-Enable/disable GP-relative addressing.
-
-Enum
-Name(nios2_gpopt_type) Type(enum nios2_gpopt_type)
-Valid options for GP-relative addressing (for -mgpopt):
-
-EnumValue
-Enum(nios2_gpopt_type) String(none) Value(gpopt_none)
-
-EnumValue
-Enum(nios2_gpopt_type) String(local) Value(gpopt_local)
-
-EnumValue
-Enum(nios2_gpopt_type) String(global) Value(gpopt_global)
-
-EnumValue
-Enum(nios2_gpopt_type) String(data) Value(gpopt_data)
-
-EnumValue
-Enum(nios2_gpopt_type) String(all) Value(gpopt_all)
-
-mgpopt
-Target RejectNegative Var(nios2_gpopt_option, gpopt_local)
-Equivalent to -mgpopt=local.
-
-mno-gpopt
-Target RejectNegative Var(nios2_gpopt_option, gpopt_none)
-Equivalent to -mgpopt=none.
-
-meb
-Target RejectNegative Mask(BIG_ENDIAN)
-Use big-endian byte order.
-
-mel
-Target RejectNegative InverseMask(BIG_ENDIAN)
-Use little-endian byte order.
-
-mcustom-fpu-cfg=
-Target RejectNegative Joined Var(nios2_custom_fpu_cfg_string)
-Floating point custom instruction configuration name.
-
-mno-custom-ftruncds
-Target RejectNegative Var(nios2_custom_ftruncds, -1)
-Do not use the ftruncds custom instruction.
-
-mcustom-ftruncds=
-Target RejectNegative Joined UInteger Var(nios2_custom_ftruncds) Init(-1)
-Integer id (N) of ftruncds custom instruction.
-
-mno-custom-fextsd
-Target RejectNegative Var(nios2_custom_fextsd, -1)
-Do not use the fextsd custom instruction.
-
-mcustom-fextsd=
-Target RejectNegative Joined UInteger Var(nios2_custom_fextsd) Init(-1)
-Integer id (N) of fextsd custom instruction.
-
-mno-custom-fixdu
-Target RejectNegative Var(nios2_custom_fixdu, -1)
-Do not use the fixdu custom instruction.
-
-mcustom-fixdu=
-Target RejectNegative Joined UInteger Var(nios2_custom_fixdu) Init(-1)
-Integer id (N) of fixdu custom instruction.
-
-mno-custom-fixdi
-Target RejectNegative Var(nios2_custom_fixdi, -1)
-Do not use the fixdi custom instruction.
-
-mcustom-fixdi=
-Target RejectNegative Joined UInteger Var(nios2_custom_fixdi) Init(-1)
-Integer id (N) of fixdi custom instruction.
-
-mno-custom-fixsu
-Target RejectNegative Var(nios2_custom_fixsu, -1)
-Do not use the fixsu custom instruction.
-
-mcustom-fixsu=
-Target RejectNegative Joined UInteger Var(nios2_custom_fixsu) Init(-1)
-Integer id (N) of fixsu custom instruction.
-
-mno-custom-fixsi
-Target RejectNegative Var(nios2_custom_fixsi, -1)
-Do not use the fixsi custom instruction.
-
-mcustom-fixsi=
-Target RejectNegative Joined UInteger Var(nios2_custom_fixsi) Init(-1)
-Integer id (N) of fixsi custom instruction.
-
-mno-custom-floatud
-Target RejectNegative Var(nios2_custom_floatud, -1)
-Do not use the floatud custom instruction.
-
-mcustom-floatud=
-Target RejectNegative Joined UInteger Var(nios2_custom_floatud) Init(-1)
-Integer id (N) of floatud custom instruction.
-
-mno-custom-floatid
-Target RejectNegative Var(nios2_custom_floatid, -1)
-Do not use the floatid custom instruction.
-
-mcustom-floatid=
-Target RejectNegative Joined UInteger Var(nios2_custom_floatid) Init(-1)
-Integer id (N) of floatid custom instruction.
-
-mno-custom-floatus
-Target RejectNegative Var(nios2_custom_floatus, -1)
-Do not use the floatus custom instruction.
-
-mcustom-floatus=
-Target RejectNegative Joined UInteger Var(nios2_custom_floatus) Init(-1)
-Integer id (N) of floatus custom instruction.
-
-mno-custom-floatis
-Target RejectNegative Var(nios2_custom_floatis, -1)
-Do not use the floatis custom instruction.
-
-mcustom-floatis=
-Target RejectNegative Joined UInteger Var(nios2_custom_floatis) Init(-1)
-Integer id (N) of floatis custom instruction.
-
-mno-custom-fcmpned
-Target RejectNegative Var(nios2_custom_fcmpned, -1)
-Do not use the fcmpned custom instruction.
-
-mcustom-fcmpned=
-Target RejectNegative Joined UInteger Var(nios2_custom_fcmpned) Init(-1)
-Integer id (N) of fcmpned custom instruction.
-
-mno-custom-fcmpeqd
-Target RejectNegative Var(nios2_custom_fcmpeqd, -1)
-Do not use the fcmpeqd custom instruction.
-
-mcustom-fcmpeqd=
-Target RejectNegative Joined UInteger Var(nios2_custom_fcmpeqd) Init(-1)
-Integer id (N) of fcmpeqd custom instruction.
-
-mno-custom-fcmpged
-Target RejectNegative Var(nios2_custom_fcmpged, -1)
-Do not use the fcmpged custom instruction.
-
-mcustom-fcmpged=
-Target RejectNegative Joined UInteger Var(nios2_custom_fcmpged) Init(-1)
-Integer id (N) of fcmpged custom instruction.
-
-mno-custom-fcmpgtd
-Target RejectNegative Var(nios2_custom_fcmpgtd, -1)
-Do not use the fcmpgtd custom instruction.
-
-mcustom-fcmpgtd=
-Target RejectNegative Joined UInteger Var(nios2_custom_fcmpgtd) Init(-1)
-Integer id (N) of fcmpgtd custom instruction.
-
-mno-custom-fcmpled
-Target RejectNegative Var(nios2_custom_fcmpled, -1)
-Do not use the fcmpled custom instruction.
-
-mcustom-fcmpled=
-Target RejectNegative Joined UInteger Var(nios2_custom_fcmpled) Init(-1)
-Integer id (N) of fcmpled custom instruction.
-
-mno-custom-fcmpltd
-Target RejectNegative Var(nios2_custom_fcmpltd, -1)
-Do not use the fcmpltd custom instruction.
-
-mcustom-fcmpltd=
-Target RejectNegative Joined UInteger Var(nios2_custom_fcmpltd) Init(-1)
-Integer id (N) of fcmpltd custom instruction.
-
-mno-custom-flogd
-Target RejectNegative Var(nios2_custom_flogd, -1)
-Do not use the flogd custom instruction.
-
-mcustom-flogd=
-Target RejectNegative Joined UInteger Var(nios2_custom_flogd) Init(-1)
-Integer id (N) of flogd custom instruction.
-
-mno-custom-fexpd
-Target RejectNegative Var(nios2_custom_fexpd, -1)
-Do not use the fexpd custom instruction.
-
-mcustom-fexpd=
-Target RejectNegative Joined UInteger Var(nios2_custom_fexpd) Init(-1)
-Integer id (N) of fexpd custom instruction.
-
-mno-custom-fatand
-Target RejectNegative Var(nios2_custom_fatand, -1)
-Do not use the fatand custom instruction.
-
-mcustom-fatand=
-Target RejectNegative Joined UInteger Var(nios2_custom_fatand) Init(-1)
-Integer id (N) of fatand custom instruction.
-
-mno-custom-ftand
-Target RejectNegative Var(nios2_custom_ftand, -1)
-Do not use the ftand custom instruction.
-
-mcustom-ftand=
-Target RejectNegative Joined UInteger Var(nios2_custom_ftand) Init(-1)
-Integer id (N) of ftand custom instruction.
-
-mno-custom-fsind
-Target RejectNegative Var(nios2_custom_fsind, -1)
-Do not use the fsind custom instruction.
-
-mcustom-fsind=
-Target RejectNegative Joined UInteger Var(nios2_custom_fsind) Init(-1)
-Integer id (N) of fsind custom instruction.
-
-mno-custom-fcosd
-Target RejectNegative Var(nios2_custom_fcosd, -1)
-Do not use the fcosd custom instruction.
-
-mcustom-fcosd=
-Target RejectNegative Joined UInteger Var(nios2_custom_fcosd) Init(-1)
-Integer id (N) of fcosd custom instruction.
-
-mno-custom-fsqrtd
-Target RejectNegative Var(nios2_custom_fsqrtd, -1)
-Do not use the fsqrtd custom instruction.
-
-mcustom-fsqrtd=
-Target RejectNegative Joined UInteger Var(nios2_custom_fsqrtd) Init(-1)
-Integer id (N) of fsqrtd custom instruction.
-
-mno-custom-fabsd
-Target RejectNegative Var(nios2_custom_fabsd, -1)
-Do not use the fabsd custom instruction.
-
-mcustom-fabsd=
-Target RejectNegative Joined UInteger Var(nios2_custom_fabsd) Init(-1)
-Integer id (N) of fabsd custom instruction.
-
-mno-custom-fnegd
-Target RejectNegative Var(nios2_custom_fnegd, -1)
-Do not use the fnegd custom instruction.
-
-mcustom-fnegd=
-Target RejectNegative Joined UInteger Var(nios2_custom_fnegd) Init(-1)
-Integer id (N) of fnegd custom instruction.
-
-mno-custom-fmaxd
-Target RejectNegative Var(nios2_custom_fmaxd, -1)
-Do not use the fmaxd custom instruction.
-
-mcustom-fmaxd=
-Target RejectNegative Joined UInteger Var(nios2_custom_fmaxd) Init(-1)
-Integer id (N) of fmaxd custom instruction.
-
-mno-custom-fmind
-Target RejectNegative Var(nios2_custom_fmind, -1)
-Do not use the fmind custom instruction.
-
-mcustom-fmind=
-Target RejectNegative Joined UInteger Var(nios2_custom_fmind) Init(-1)
-Integer id (N) of fmind custom instruction.
-
-mno-custom-fdivd
-Target RejectNegative Var(nios2_custom_fdivd, -1)
-Do not use the fdivd custom instruction.
-
-mcustom-fdivd=
-Target RejectNegative Joined UInteger Var(nios2_custom_fdivd) Init(-1)
-Integer id (N) of fdivd custom instruction.
-
-mno-custom-fmuld
-Target RejectNegative Var(nios2_custom_fmuld, -1)
-Do not use the fmuld custom instruction.
-
-mcustom-fmuld=
-Target RejectNegative Joined UInteger Var(nios2_custom_fmuld) Init(-1)
-Integer id (N) of fmuld custom instruction.
-
-mno-custom-fsubd
-Target RejectNegative Var(nios2_custom_fsubd, -1)
-Do not use the fsubd custom instruction.
-
-mcustom-fsubd=
-Target RejectNegative Joined UInteger Var(nios2_custom_fsubd) Init(-1)
-Integer id (N) of fsubd custom instruction.
-
-mno-custom-faddd
-Target RejectNegative Var(nios2_custom_faddd, -1)
-Do not use the faddd custom instruction.
-
-mcustom-faddd=
-Target RejectNegative Joined UInteger Var(nios2_custom_faddd) Init(-1)
-Integer id (N) of faddd custom instruction.
-
-mno-custom-fcmpnes
-Target RejectNegative Var(nios2_custom_fcmpnes, -1)
-Do not use the fcmpnes custom instruction.
-
-mcustom-fcmpnes=
-Target RejectNegative Joined UInteger Var(nios2_custom_fcmpnes) Init(-1)
-Integer id (N) of fcmpnes custom instruction.
-
-mno-custom-fcmpeqs
-Target RejectNegative Var(nios2_custom_fcmpeqs, -1)
-Do not use the fcmpeqs custom instruction.
-
-mcustom-fcmpeqs=
-Target RejectNegative Joined UInteger Var(nios2_custom_fcmpeqs) Init(-1)
-Integer id (N) of fcmpeqs custom instruction.
-
-mno-custom-fcmpges
-Target RejectNegative Var(nios2_custom_fcmpges, -1)
-Do not use the fcmpges custom instruction.
-
-mcustom-fcmpges=
-Target RejectNegative Joined UInteger Var(nios2_custom_fcmpges) Init(-1)
-Integer id (N) of fcmpges custom instruction.
-
-mno-custom-fcmpgts
-Target RejectNegative Var(nios2_custom_fcmpgts, -1)
-Do not use the fcmpgts custom instruction.
-
-mcustom-fcmpgts=
-Target RejectNegative Joined UInteger Var(nios2_custom_fcmpgts) Init(-1)
-Integer id (N) of fcmpgts custom instruction.
-
-mno-custom-fcmples
-Target RejectNegative Var(nios2_custom_fcmples, -1)
-Do not use the fcmples custom instruction.
-
-mcustom-fcmples=
-Target RejectNegative Joined UInteger Var(nios2_custom_fcmples) Init(-1)
-Integer id (N) of fcmples custom instruction.
-
-mno-custom-fcmplts
-Target RejectNegative Var(nios2_custom_fcmplts, -1)
-Do not use the fcmplts custom instruction.
-
-mcustom-fcmplts=
-Target RejectNegative Joined UInteger Var(nios2_custom_fcmplts) Init(-1)
-Integer id (N) of fcmplts custom instruction.
-
-mno-custom-flogs
-Target RejectNegative Var(nios2_custom_flogs, -1)
-Do not use the flogs custom instruction.
-
-mcustom-flogs=
-Target RejectNegative Joined UInteger Var(nios2_custom_flogs) Init(-1)
-Integer id (N) of flogs custom instruction.
-
-mno-custom-fexps
-Target RejectNegative Var(nios2_custom_fexps, -1)
-Do not use the fexps custom instruction.
-
-mcustom-fexps=
-Target RejectNegative Joined UInteger Var(nios2_custom_fexps) Init(-1)
-Integer id (N) of fexps custom instruction.
-
-mno-custom-fatans
-Target RejectNegative Var(nios2_custom_fatans, -1)
-Do not use the fatans custom instruction.
-
-mcustom-fatans=
-Target RejectNegative Joined UInteger Var(nios2_custom_fatans) Init(-1)
-Integer id (N) of fatans custom instruction.
-
-mno-custom-ftans
-Target RejectNegative Var(nios2_custom_ftans, -1)
-Do not use the ftans custom instruction.
-
-mcustom-ftans=
-Target RejectNegative Joined UInteger Var(nios2_custom_ftans) Init(-1)
-Integer id (N) of ftans custom instruction.
-
-mno-custom-fsins
-Target RejectNegative Var(nios2_custom_fsins, -1)
-Do not use the fsins custom instruction.
-
-mcustom-fsins=
-Target RejectNegative Joined UInteger Var(nios2_custom_fsins) Init(-1)
-Integer id (N) of fsins custom instruction.
-
-mno-custom-fcoss
-Target RejectNegative Var(nios2_custom_fcoss, -1)
-Do not use the fcoss custom instruction.
-
-mcustom-fcoss=
-Target RejectNegative Joined UInteger Var(nios2_custom_fcoss) Init(-1)
-Integer id (N) of fcoss custom instruction.
-
-mno-custom-fsqrts
-Target RejectNegative Var(nios2_custom_fsqrts, -1)
-Do not use the fsqrts custom instruction.
-
-mcustom-fsqrts=
-Target RejectNegative Joined UInteger Var(nios2_custom_fsqrts) Init(-1)
-Integer id (N) of fsqrts custom instruction.
-
-mno-custom-fabss
-Target RejectNegative Var(nios2_custom_fabss, -1)
-Do not use the fabss custom instr.
-
-mcustom-fabss=
-Target RejectNegative Joined UInteger Var(nios2_custom_fabss) Init(-1)
-Integer id (N) of fabss custom instruction.
-
-mno-custom-fnegs
-Target RejectNegative Var(nios2_custom_fnegs, -1)
-Do not use the fnegs custom instruction.
-
-mcustom-fnegs=
-Target RejectNegative Joined UInteger Var(nios2_custom_fnegs) Init(-1)
-Integer id (N) of fnegs custom instruction.
-
-mno-custom-fmaxs
-Target RejectNegative Var(nios2_custom_fmaxs, -1)
-Do not use the fmaxs custom instruction.
-
-mcustom-fmaxs=
-Target RejectNegative Joined UInteger Var(nios2_custom_fmaxs) Init(-1)
-Integer id (N) of fmaxs custom instruction.
-
-mno-custom-fmins
-Target RejectNegative Var(nios2_custom_fmins, -1)
-Do not use the fmins custom instruction.
-
-mcustom-fmins=
-Target RejectNegative Joined UInteger Var(nios2_custom_fmins) Init(-1)
-Integer id (N) of fmins custom instruction.
-
-mno-custom-fdivs
-Target RejectNegative Var(nios2_custom_fdivs, -1)
-Do not use the fdivs custom instruction.
-
-mcustom-fdivs=
-Target RejectNegative Joined UInteger Var(nios2_custom_fdivs) Init(-1)
-Integer id (N) of fdivs custom instruction.
-
-mno-custom-fmuls
-Target RejectNegative Var(nios2_custom_fmuls, -1)
-Do not use the fmuls custom instruction.
-
-mcustom-fmuls=
-Target RejectNegative Joined UInteger Var(nios2_custom_fmuls) Init(-1)
-Integer id (N) of fmuls custom instruction.
-
-mno-custom-fsubs
-Target RejectNegative Var(nios2_custom_fsubs, -1)
-Do not use the fsubs custom instruction.
-
-mcustom-fsubs=
-Target RejectNegative Joined UInteger Var(nios2_custom_fsubs) Init(-1)
-Integer id (N) of fsubs custom instruction.
-
-mno-custom-fadds
-Target RejectNegative Var(nios2_custom_fadds, -1)
-Do not use the fadds custom instruction.
-
-mcustom-fadds=
-Target RejectNegative Joined UInteger Var(nios2_custom_fadds) Init(-1)
-Integer id (N) of fadds custom instruction.
-
-mno-custom-frdy
-Target RejectNegative Var(nios2_custom_frdy, -1)
-Do not use the frdy custom instruction.
-
-mcustom-frdy=
-Target RejectNegative Joined UInteger Var(nios2_custom_frdy) Init(-1)
-Integer id (N) of frdy custom instruction.
-
-mno-custom-frdxhi
-Target RejectNegative Var(nios2_custom_frdxhi, -1)
-Do not use the frdxhi custom instruction.
-
-mcustom-frdxhi=
-Target RejectNegative Joined UInteger Var(nios2_custom_frdxhi) Init(-1)
-Integer id (N) of frdxhi custom instruction.
-
-mno-custom-frdxlo
-Target RejectNegative Var(nios2_custom_frdxlo, -1)
-Do not use the frdxlo custom instruction.
-
-mcustom-frdxlo=
-Target RejectNegative Joined UInteger Var(nios2_custom_frdxlo) Init(-1)
-Integer id (N) of frdxlo custom instruction.
-
-mno-custom-fwry
-Target RejectNegative Var(nios2_custom_fwry, -1)
-Do not use the fwry custom instruction.
-
-mcustom-fwry=
-Target RejectNegative Joined UInteger Var(nios2_custom_fwry) Init(-1)
-Integer id (N) of fwry custom instruction.
-
-mno-custom-fwrx
-Target RejectNegative Var(nios2_custom_fwrx, -1)
-Do not use the fwrx custom instruction.
-
-mcustom-fwrx=
-Target RejectNegative Joined UInteger Var(nios2_custom_fwrx) Init(-1)
-Integer id (N) of fwrx custom instruction.
-
-mno-custom-round
-Target RejectNegative Var(nios2_custom_round, -1)
-Do not use the round custom instruction.
-
-mcustom-round=
-Target RejectNegative Joined UInteger Var(nios2_custom_round) Init(-1)
-Integer id (N) of round custom instruction.
-
-march=
-Target RejectNegative Joined Enum(nios2_arch_type) Var(nios2_arch_option) Init(ARCH_R1)
-Specify the name of the target architecture.
-
-Enum
-Name(nios2_arch_type) Type(enum nios2_arch_type)
-Valid Nios II ISA levels (for -march):
-
-EnumValue
-Enum(nios2_arch_type) String(r1) Value(ARCH_R1)
-
-EnumValue
-Enum(nios2_arch_type) String(r2) Value(ARCH_R2)
-
-mbmx
-Target Mask(HAS_BMX)
-Enable generation of R2 BMX instructions.
-
-mcdx
-Target Mask(HAS_CDX)
-Enable generation of R2 CDX instructions.
-
-mgprel-sec=
-Target RejectNegative Joined Var(nios2_gprel_sec) Init(NULL)
-Regular expression matching additional GP-addressible section names.
-
-mr0rel-sec=
-Target RejectNegative Joined Var(nios2_r0rel_sec) Init(NULL)
-Regular expression matching section names for r0-relative addressing.
diff --git a/gcc/config/nios2/nios2.opt.urls b/gcc/config/nios2/nios2.opt.urls
deleted file mode 100644
index a38ea28..0000000
--- a/gcc/config/nios2/nios2.opt.urls
+++ /dev/null
@@ -1,50 +0,0 @@
-; Autogenerated by regenerate-opt-urls.py from gcc/config/nios2/nios2.opt and generated HTML
-
-mhw-div
-UrlSuffix(gcc/Nios-II-Options.html#index-mhw-div)
-
-mhw-mul
-UrlSuffix(gcc/Nios-II-Options.html#index-mhw-mul)
-
-mhw-mulx
-UrlSuffix(gcc/Nios-II-Options.html#index-mhw-mulx)
-
-mfast-sw-div
-UrlSuffix(gcc/Nios-II-Options.html#index-mfast-sw-div)
-
-mbypass-cache
-UrlSuffix(gcc/Nios-II-Options.html#index-mbypass-cache)
-
-mno-cache-volatile
-UrlSuffix(gcc/Nios-II-Options.html#index-mno-cache-volatile)
-
-mcache-volatile
-UrlSuffix(gcc/Nios-II-Options.html#index-mcache-volatile)
-
-mgpopt=
-UrlSuffix(gcc/Nios-II-Options.html#index-mgpopt-1)
-
-mgpopt
-UrlSuffix(gcc/Nios-II-Options.html#index-mgpopt-1)
-
-mno-gpopt
-UrlSuffix(gcc/Nios-II-Options.html#index-mno-gpopt-1)
-
-meb
-UrlSuffix(gcc/Nios-II-Options.html#index-meb-1)
-
-mel
-UrlSuffix(gcc/Nios-II-Options.html#index-mel-1)
-
-mcustom-fpu-cfg=
-UrlSuffix(gcc/Nios-II-Options.html#index-mcustom-fpu-cfg)
-
-march=
-UrlSuffix(gcc/Nios-II-Options.html#index-march-11)
-
-mgprel-sec=
-UrlSuffix(gcc/Nios-II-Options.html#index-mgprel-sec)
-
-mr0rel-sec=
-UrlSuffix(gcc/Nios-II-Options.html#index-mr0rel-sec)
-
diff --git a/gcc/config/nios2/predicates.md b/gcc/config/nios2/predicates.md
deleted file mode 100644
index 2b138de..0000000
--- a/gcc/config/nios2/predicates.md
+++ /dev/null
@@ -1,161 +0,0 @@
-;; Predicate definitions for Altera Nios II.
-;; Copyright (C) 2012-2024 Free Software Foundation, Inc.
-;; Contributed by Chung-Lin Tang <cltang@codesourcery.com>
-;;
-;; This file is part of GCC.
-;;
-;; GCC is free software; you can redistribute it and/or modify
-;; it under the terms of the GNU General Public License as published by
-;; the Free Software Foundation; either version 3, or (at your option)
-;; any later version.
-;;
-;; GCC is distributed in the hope that it will be useful,
-;; but WITHOUT ANY WARRANTY; without even the implied warranty of
-;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-;; GNU General Public License for more details.
-;;
-;; You should have received a copy of the GNU General Public License
-;; along with GCC; see the file COPYING3.  If not see
-;; <http://www.gnu.org/licenses/>.
-
-(define_predicate "const_0_operand"
-  (and (match_code "const_int,const_double,const_vector")
-       (match_test "op == CONST0_RTX (GET_MODE (op))")))
-
-(define_predicate "reg_or_0_operand"
-  (ior (match_operand 0 "const_0_operand")
-       (match_operand 0 "register_operand")))
-
-(define_predicate "const_uns_arith_operand"
-  (and (match_code "const_int")
-       (match_test "SMALL_INT_UNSIGNED (INTVAL (op))")))
-
-(define_predicate "uns_arith_operand"
-  (ior (match_operand 0 "const_uns_arith_operand")
-       (match_operand 0 "register_operand")))
-
-(define_predicate "const_arith_operand"
-  (and (match_code "const_int")
-       (match_test "SMALL_INT (INTVAL (op))")))
-
-(define_predicate "arith_operand"
-  (ior (match_operand 0 "const_arith_operand")
-       (match_operand 0 "register_operand")))
-
-(define_predicate "add_regimm_operand"
-  (ior (match_operand 0 "arith_operand")
-       (match_test "nios2_unspec_reloc_p (op)")))
-
-(define_predicate "const_logical_operand"
-  (and (match_code "const_int")
-       (match_test "(INTVAL (op) & 0xffff) == 0
-                    || (INTVAL (op) & 0xffff0000) == 0")))
-
-(define_predicate "logical_operand"
-  (ior (match_operand 0 "const_logical_operand")
-       (match_operand 0 "register_operand")))
-
-(define_predicate "const_and_operand"
-  (and (match_code "const_int")
-       (match_test "SMALL_INT_UNSIGNED (INTVAL (op))
-                    || UPPER16_INT (INTVAL (op))
-                    || (TARGET_ARCH_R2 && ANDCLEAR_INT (INTVAL (op)))")))
-
-(define_predicate "and_operand"
-  (ior (match_operand 0 "const_and_operand")
-       (match_operand 0 "register_operand")))
-
-(define_predicate "const_shift_operand"
-  (and (match_code "const_int")
-       (match_test "SHIFT_INT (INTVAL (op))")))
-
-(define_predicate "shift_operand"
-  (ior (match_operand 0 "const_shift_operand")
-       (match_operand 0 "register_operand")))
-
-(define_predicate "call_operand"
-  (ior (match_operand 0 "immediate_operand")
-       (match_operand 0 "register_operand")))
-
-(define_predicate "rdwrctl_operand"
-  (and (match_code "const_int")
-       (match_test "RDWRCTL_INT (INTVAL (op))")))
-
-(define_predicate "rdprs_dcache_operand"
-  (and (match_code "const_int")
-       (if_then_else (match_test "TARGET_ARCH_R2")
-                     (match_test "SMALL_INT12 (INTVAL (op))")
-                     (match_test "SMALL_INT (INTVAL (op))"))))
-
-(define_predicate "custom_insn_opcode"
-  (and (match_code "const_int")
-       (match_test "CUSTOM_INSN_OPCODE (INTVAL (op))")))
-
-(define_special_predicate "expandable_comparison_operator"
-  (match_operand 0 "ordered_comparison_operator")
-{
-  return (GET_MODE_CLASS (GET_MODE (XEXP (op, 0))) != MODE_FLOAT
-          || nios2_validate_fpu_compare (GET_MODE (XEXP (op, 0)), &op,
-                                         &XEXP (op, 0), &XEXP (op, 1),
-                                         false));
-})
-
-(define_special_predicate "pop_operation"
-  (match_code "parallel")
-{
-  return pop_operation_p (op);
-})
-
-(define_special_predicate "ldwm_operation"
-  (match_code "parallel")
-{
-  return ldstwm_operation_p (op, /*load_p=*/true);
-})
-
-(define_special_predicate "stwm_operation"
-  (match_code "parallel")
-{
-  return ldstwm_operation_p (op, /*load_p=*/false);
-})
-
-(define_predicate "nios2_hard_register_operand"
-  (match_code "reg")
-{
-  return GP_REG_P (REGNO (op));
-})
-
-(define_predicate "stack_memory_operand"
-  (match_code "mem")
-{
-  rtx addr = XEXP (op, 0);
-  return ((REG_P (addr) && REGNO (addr) == SP_REGNO)
-          || (GET_CODE (addr) == PLUS
-              && REG_P (XEXP (addr, 0)) && REGNO (XEXP (addr, 0)) == SP_REGNO
-              && CONST_INT_P (XEXP (addr, 1))));
-})
-
-(define_predicate "ldstio_memory_operand"
-  (match_code "mem")
-{
-  if (TARGET_ARCH_R2)
-    {
-      rtx addr = XEXP (op, 0);
-      if (REG_P (addr))
-        return true;
-      else if (GET_CODE (addr) == PLUS)
-        return (REG_P (XEXP (addr, 0))
-                && CONST_INT_P (XEXP (addr, 1))
-                && SMALL_INT12 (INTVAL (XEXP (addr, 1))));
-      else if (CONST_INT_P (addr))
-        return SMALL_INT12 (INTVAL (addr));
-      return false;
-    }
-  return memory_operand (op, mode);
-})
-
-(define_predicate "ldstex_memory_operand"
-  (match_code "mem")
-{
-  /* ldex/ldsex/stex/stsex cannot handle memory addresses with offsets.  */
-  return GET_CODE (XEXP (op, 0)) == REG;
-})
diff --git a/gcc/config/nios2/sync.md b/gcc/config/nios2/sync.md
deleted file mode 100644
index e919af7..0000000
--- a/gcc/config/nios2/sync.md
+++ /dev/null
@@ -1,45 +0,0 @@
-;; Machine Description for Altera Nios II synchronization primitives.
-;; Copyright (C) 2014-2024 Free Software Foundation, Inc.
-;; Contributed by Mentor Graphics, Inc.
-;;
-;; This file is part of GCC.
-;;
-;; GCC is free software; you can redistribute it and/or modify
-;; it under the terms of the GNU General Public License as published by
-;; the Free Software Foundation; either version 3, or (at your option)
-;; any later version.
-;;
-;; GCC is distributed in the hope that it will be useful,
-;; but WITHOUT ANY WARRANTY; without even the implied warranty of
-;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-;; GNU General Public License for more details.
-;;
-;; You should have received a copy of the GNU General Public License
-;; along with GCC; see the file COPYING3.  If not see
-;; <http://www.gnu.org/licenses/>.
-
-(define_int_iterator UNSPECV_LOAD_EXCLUSIVE [UNSPECV_LDEX UNSPECV_LDSEX])
-(define_int_attr load_exclusive [(UNSPECV_LDEX  "ldex")
-                                 (UNSPECV_LDSEX "ldsex")])
-(define_insn "<load_exclusive>"
-  [(set (match_operand:SI 0 "register_operand" "=r")
-        (unspec_volatile:SI
-          [(match_operand:SI 1 "ldstex_memory_operand" "v")]
-          UNSPECV_LOAD_EXCLUSIVE))]
-  "TARGET_ARCH_R2"
-  "<load_exclusive>\\t%0, %A1"
-  [(set_attr "type" "ld")])
-
-(define_int_iterator UNSPECV_STORE_EXCLUSIVE [UNSPECV_STEX UNSPECV_STSEX])
-(define_int_attr store_exclusive [(UNSPECV_STEX  "stex")
-                                  (UNSPECV_STSEX "stsex")])
-(define_insn "<store_exclusive>"
-  [(set (match_operand:SI 2 "register_operand" "=r")
-        (unspec_volatile:SI [(const_int 0)] UNSPECV_STORE_EXCLUSIVE))
-   (set (match_operand:SI 0 "ldstex_memory_operand" "=v")
-        (unspec_volatile:SI
-          [(match_operand:SI 1 "reg_or_0_operand" "rM")]
-          UNSPECV_STORE_EXCLUSIVE))]
-  "TARGET_ARCH_R2"
-  "<store_exclusive>\\t%2, %z1, %A0"
-  [(set_attr "type" "st")])
diff --git a/gcc/config/nios2/t-nios2 b/gcc/config/nios2/t-nios2
deleted file mode 100644
index 6c76c57..0000000
--- a/gcc/config/nios2/t-nios2
+++ /dev/null
@@ -1,27 +0,0 @@
-# Target Makefile Fragment for Altera Nios II.
-# Copyright (C) 2013-2024 Free Software Foundation, Inc.
-# Contributed by Altera and Mentor Graphics, Inc.
-#
-# This file is part of GCC.
-#
-# GCC is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License as published
-# by the Free Software Foundation; either version 3, or (at your
-# option) any later version.
-#
-# GCC is distributed in the hope that it will be useful, but WITHOUT
-# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-# or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-# License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with GCC; see the file COPYING3.  If not see
-# <http://www.gnu.org/licenses/>.
-
-# MULTILIB_OPTIONS = mno-hw-mul/mhw-mulx mcustom-fpu-cfg=60-1/mcustom-fpu-cfg=60-2
-# MULTILIB_DIRNAMES = nomul mulx fpu-60-1 fpu-60-2
-# MULTILIB_EXCEPTIONS = 
-
-# MULTILIB_OPTIONS += EL/EB
-# MULTILIB_DIRNAMES += le be
-# MULTILIB_MATCHES += EL=mel EB=meb
diff --git a/gcc/config/nios2/t-rtems b/gcc/config/nios2/t-rtems
deleted file mode 100644
index beda832..0000000
--- a/gcc/config/nios2/t-rtems
+++ /dev/null
@@ -1,23 +0,0 @@
-# Custom RTEMS multilibs
-
-# Reset all MULTILIB variables
-
-MULTILIB_OPTIONS	=
-MULTILIB_DIRNAMES	=
-MULTILIB_EXCEPTIONS	=
-MULTILIB_REUSE		=
-MULTILIB_MATCHES	=
-MULTILIB_REQUIRED	=
-
-# Enumeration of multilibs
-
-MULTILIB_OPTIONS  += mhw-mul mhw-mulx mhw-div
-MULTILIB_DIRNAMES += mul mulx div
-
-MULTILIB_OPTIONS  += mcustom-fadds=253 mcustom-fdivs=255 mcustom-fmuls=252 mcustom-fsubs=254 mcustom-fpu-cfg=fph2
-MULTILIB_DIRNAMES += fadds fdivs fmuls fsubs fph2
-
-MULTILIB_REQUIRED += mhw-mul
-MULTILIB_REQUIRED += mhw-mul/mhw-mulx/mhw-div
-MULTILIB_REQUIRED += mhw-mul/mhw-mulx/mhw-div/mcustom-fadds=253/mcustom-fdivs=255/mcustom-fmuls=252/mcustom-fsubs=254
-MULTILIB_REQUIRED += mhw-mul/mhw-mulx/mhw-div/mcustom-fpu-cfg=fph2
diff --git a/gcc/config/nvptx/gen-copyright.sh b/gcc/config/nvptx/gen-copyright.sh
index d0a86ac..8cf2d35 100644
--- a/gcc/config/nvptx/gen-copyright.sh
+++ b/gcc/config/nvptx/gen-copyright.sh
@@ -1,6 +1,6 @@
 #!/bin/sh
 
-# Copyright (C) 2022-2024 Free Software Foundation, Inc.
+# Copyright (C) 2022-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
@@ -32,7 +32,7 @@ case $style in
 esac
 
 ( cat <<EOF
-Copyright (C) 2022-2024 Free Software Foundation, Inc.
+Copyright (C) 2022-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/nvptx/gen-h.sh b/gcc/config/nvptx/gen-h.sh
index ea75e12..3e0a4b8 100644
--- a/gcc/config/nvptx/gen-h.sh
+++ b/gcc/config/nvptx/gen-h.sh
@@ -1,6 +1,6 @@
 #!/bin/sh
 
-# Copyright (C) 2022-2024 Free Software Foundation, Inc.
+# Copyright (C) 2022-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
@@ -18,8 +18,12 @@
 # along with GCC; see the file COPYING3.  If not see
 # <http://www.gnu.org/licenses/>.
 
-nvptx_sm_def="$1/nvptx-sm.def"
-gen_copyright_sh="$1/gen-copyright.sh"
+
+nvptx_dir=$(dirname "$0")
+
+
+nvptx_sm_def="$nvptx_dir/nvptx-sm.def"
+gen_copyright_sh="$nvptx_dir/gen-copyright.sh"
 
 sms=$(grep ^NVPTX_SM $nvptx_sm_def | sed 's/.*(//;s/,.*//')
 
diff --git a/gcc/config/nvptx/gen-multilib-matches-tests b/gcc/config/nvptx/gen-multilib-matches-tests
new file mode 100644
index 0000000..fbfae88
--- /dev/null
+++ b/gcc/config/nvptx/gen-multilib-matches-tests
@@ -0,0 +1,355 @@
+# Test cases for 'gen-multilib-matches.sh'.
+
+# Blank lines and lines beginning with '#' are ignored.
+
+# 'BEGIN [name]': clear state, begin test [name].
+# 'SSMS 30 35 53': set 'sms' to '30 35 53'.  Default: per 'nvptx-sm.def'.
+# 'SMOID sm_30': set 'multilib_options_isa_default' to 'sm_30'.  Default: unset.
+# 'SMOIL sm_35 sm_30': set 'multilib_options_isa_list' to 'sm_35 sm_30'.  Default: unset.
+# 'AEMM .=misa?sm_30': append '.=misa?sm_30' to expected "multilib matches".  Default: unset.
+# 'CMMC': compute "multilib matches" per the current settings, and compare to the expected.
+
+
+BEGIN '--with-arch=sm_30', '--with-multilib-list=sm_30'
+SMOID sm_30
+SMOIL sm_30
+AEMM .=misa?sm_30
+AEMM .=misa?sm_35
+AEMM .=misa?sm_37
+AEMM .=misa?sm_52
+AEMM .=misa?sm_53
+AEMM .=misa?sm_61
+AEMM .=misa?sm_70
+AEMM .=misa?sm_75
+AEMM .=misa?sm_80
+AEMM .=misa?sm_89
+CMMC
+
+BEGIN '--with-arch=sm_30', '--with-multilib-list=sm_30,sm_89'
+SMOID sm_30
+SMOIL sm_30 sm_89
+AEMM .=misa?sm_30
+AEMM .=misa?sm_35
+AEMM .=misa?sm_37
+AEMM .=misa?sm_52
+AEMM .=misa?sm_53
+AEMM .=misa?sm_61
+AEMM .=misa?sm_70
+AEMM .=misa?sm_75
+AEMM .=misa?sm_80
+CMMC
+
+BEGIN '--with-arch=sm_30', '--with-multilib-list=sm_30,sm_35,sm_37,sm_52,sm_53,sm_61,sm_70,sm_75,sm_80,sm_89'
+SMOID sm_30
+SMOIL sm_30 sm_35 sm_37 sm_52 sm_53 sm_61 sm_70 sm_75 sm_80 sm_89
+AEMM .=misa?sm_30
+CMMC
+
+
+BEGIN '--with-arch=sm_35', '--with-multilib-list=sm_35'
+SMOID sm_35
+SMOIL sm_35
+AEMM .=misa?sm_30
+AEMM .=misa?sm_35
+AEMM .=misa?sm_37
+AEMM .=misa?sm_52
+AEMM .=misa?sm_53
+AEMM .=misa?sm_61
+AEMM .=misa?sm_70
+AEMM .=misa?sm_75
+AEMM .=misa?sm_80
+AEMM .=misa?sm_89
+CMMC
+
+BEGIN '--with-arch=sm_35', '--with-multilib-list=sm_35,sm_30'
+SMOID sm_35
+SMOIL sm_35 sm_30
+AEMM .=misa?sm_35
+AEMM .=misa?sm_37
+AEMM .=misa?sm_52
+AEMM .=misa?sm_53
+AEMM .=misa?sm_61
+AEMM .=misa?sm_70
+AEMM .=misa?sm_75
+AEMM .=misa?sm_80
+AEMM .=misa?sm_89
+CMMC
+
+
+BEGIN '--with-arch=sm_37', '--with-multilib-list=sm_37,sm_30'
+SMOID sm_37
+SMOIL sm_37 sm_30
+AEMM misa?sm_30=misa?sm_35
+AEMM .=misa?sm_37
+AEMM .=misa?sm_52
+AEMM .=misa?sm_53
+AEMM .=misa?sm_61
+AEMM .=misa?sm_70
+AEMM .=misa?sm_75
+AEMM .=misa?sm_80
+AEMM .=misa?sm_89
+CMMC
+
+
+BEGIN '--with-arch=sm_52', '--with-multilib-list=sm_52,sm_75,sm_35'
+SMOID sm_52
+SMOIL sm_52 sm_75 sm_35
+AEMM misa?sm_35=misa?sm_30
+AEMM misa?sm_35=misa?sm_37
+AEMM .=misa?sm_52
+AEMM .=misa?sm_53
+AEMM .=misa?sm_61
+AEMM .=misa?sm_70
+AEMM misa?sm_75=misa?sm_80
+AEMM misa?sm_75=misa?sm_89
+CMMC
+
+
+BEGIN '--with-arch=sm_53', '--with-multilib-list=sm_53,sm_30'
+SMOID sm_53
+SMOIL sm_53 sm_30
+AEMM misa?sm_30=misa?sm_35
+AEMM misa?sm_30=misa?sm_37
+AEMM misa?sm_30=misa?sm_52
+AEMM .=misa?sm_53
+AEMM .=misa?sm_61
+AEMM .=misa?sm_70
+AEMM .=misa?sm_75
+AEMM .=misa?sm_80
+AEMM .=misa?sm_89
+CMMC
+
+BEGIN '--with-arch=sm_53', '--with-multilib-list=sm_53,sm_37'
+SMOID sm_53
+SMOIL sm_53 sm_37
+AEMM misa?sm_37=misa?sm_30
+AEMM misa?sm_37=misa?sm_35
+AEMM misa?sm_37=misa?sm_52
+AEMM .=misa?sm_53
+AEMM .=misa?sm_61
+AEMM .=misa?sm_70
+AEMM .=misa?sm_75
+AEMM .=misa?sm_80
+AEMM .=misa?sm_89
+CMMC
+
+BEGIN '--with-arch=sm_53', '--with-multilib-list=sm_30,sm_35,sm_37,sm_52,sm_61,sm_70,sm_75,sm_80,sm_89'
+SMOID sm_53
+SMOIL sm_53 sm_30 sm_35 sm_37 sm_52 sm_61 sm_70 sm_75 sm_80 sm_89
+AEMM .=misa?sm_53
+CMMC
+
+
+BEGIN '--with-arch=sm_61', '--with-multilib-list=sm_61,sm_30'
+SMOID sm_61
+SMOIL sm_61 sm_30
+AEMM misa?sm_30=misa?sm_35
+AEMM misa?sm_30=misa?sm_37
+AEMM misa?sm_30=misa?sm_52
+AEMM misa?sm_30=misa?sm_53
+AEMM .=misa?sm_61
+AEMM .=misa?sm_70
+AEMM .=misa?sm_75
+AEMM .=misa?sm_80
+AEMM .=misa?sm_89
+CMMC
+
+BEGIN '--with-arch=sm_61', '--with-multilib-list=sm_61,sm_37'
+SMOID sm_61
+SMOIL sm_61 sm_37
+AEMM misa?sm_37=misa?sm_30
+AEMM misa?sm_37=misa?sm_35
+AEMM misa?sm_37=misa?sm_52
+AEMM misa?sm_37=misa?sm_53
+AEMM .=misa?sm_61
+AEMM .=misa?sm_70
+AEMM .=misa?sm_75
+AEMM .=misa?sm_80
+AEMM .=misa?sm_89
+CMMC
+
+BEGIN '--with-arch=sm_61', '--with-multilib-list=sm_30,sm_35,sm_37,sm_52,sm_61,sm_70,sm_75,sm_80,sm_89'
+SMOID sm_61
+SMOIL sm_61 sm_30 sm_35 sm_37 sm_52 sm_53 sm_70 sm_75 sm_80 sm_89
+AEMM .=misa?sm_61
+CMMC
+
+
+BEGIN '--with-arch=sm_70', '--with-multilib-list=sm_70'
+SMOID sm_70
+SMOIL sm_70
+AEMM .=misa?sm_30
+AEMM .=misa?sm_35
+AEMM .=misa?sm_37
+AEMM .=misa?sm_52
+AEMM .=misa?sm_53
+AEMM .=misa?sm_61
+AEMM .=misa?sm_70
+AEMM .=misa?sm_75
+AEMM .=misa?sm_80
+AEMM .=misa?sm_89
+CMMC
+
+BEGIN '--with-arch=sm_70', '--with-multilib-list=sm_70,sm_30'
+SMOID sm_70
+SMOIL sm_70 sm_30
+AEMM misa?sm_30=misa?sm_35
+AEMM misa?sm_30=misa?sm_37
+AEMM misa?sm_30=misa?sm_52
+AEMM misa?sm_30=misa?sm_53
+AEMM misa?sm_30=misa?sm_61
+AEMM .=misa?sm_70
+AEMM .=misa?sm_75
+AEMM .=misa?sm_80
+AEMM .=misa?sm_89
+CMMC
+
+BEGIN '--with-arch=sm_70', '--with-multilib-list=sm_70,sm_53'
+SMOID sm_70
+SMOIL sm_70 sm_53
+AEMM misa?sm_53=misa?sm_30
+AEMM misa?sm_53=misa?sm_35
+AEMM misa?sm_53=misa?sm_37
+AEMM misa?sm_53=misa?sm_52
+AEMM misa?sm_53=misa?sm_61
+AEMM .=misa?sm_70
+AEMM .=misa?sm_75
+AEMM .=misa?sm_80
+AEMM .=misa?sm_89
+CMMC
+
+BEGIN '--with-arch=sm_70', '--with-multilib-list=sm_70,sm_53,sm_30'
+SMOID sm_70
+SMOIL sm_70 sm_53 sm_30
+AEMM misa?sm_30=misa?sm_35
+AEMM misa?sm_30=misa?sm_37
+AEMM misa?sm_30=misa?sm_52
+AEMM misa?sm_53=misa?sm_61
+AEMM .=misa?sm_70
+AEMM .=misa?sm_75
+AEMM .=misa?sm_80
+AEMM .=misa?sm_89
+CMMC
+
+
+BEGIN '--with-arch=sm_75', '--with-multilib-list=sm_75,sm_30'
+SMOID sm_75
+SMOIL sm_75 sm_30
+AEMM misa?sm_30=misa?sm_35
+AEMM misa?sm_30=misa?sm_37
+AEMM misa?sm_30=misa?sm_52
+AEMM misa?sm_30=misa?sm_53
+AEMM misa?sm_30=misa?sm_61
+AEMM misa?sm_30=misa?sm_70
+AEMM .=misa?sm_75
+AEMM .=misa?sm_80
+AEMM .=misa?sm_89
+CMMC
+
+BEGIN '--with-arch=sm_75', '--with-multilib-list=sm_75,sm_53'
+SMOID sm_75
+SMOIL sm_75 sm_53
+AEMM misa?sm_53=misa?sm_30
+AEMM misa?sm_53=misa?sm_35
+AEMM misa?sm_53=misa?sm_37
+AEMM misa?sm_53=misa?sm_52
+AEMM misa?sm_53=misa?sm_61
+AEMM misa?sm_53=misa?sm_70
+AEMM .=misa?sm_75
+AEMM .=misa?sm_80
+AEMM .=misa?sm_89
+CMMC
+
+BEGIN '--with-arch=sm_75', '--with-multilib-list=sm_75,sm_30,sm_53'
+SMOID sm_75
+SMOIL sm_75 sm_30 sm_53
+AEMM misa?sm_30=misa?sm_35
+AEMM misa?sm_30=misa?sm_37
+AEMM misa?sm_30=misa?sm_52
+AEMM misa?sm_53=misa?sm_61
+AEMM misa?sm_53=misa?sm_70
+AEMM .=misa?sm_75
+AEMM .=misa?sm_80
+AEMM .=misa?sm_89
+CMMC
+
+
+BEGIN '--with-arch=sm_80', '--with-multilib-list=sm_80'
+SMOID sm_80
+SMOIL sm_80
+AEMM .=misa?sm_30
+AEMM .=misa?sm_35
+AEMM .=misa?sm_37
+AEMM .=misa?sm_52
+AEMM .=misa?sm_53
+AEMM .=misa?sm_61
+AEMM .=misa?sm_70
+AEMM .=misa?sm_75
+AEMM .=misa?sm_80
+AEMM .=misa?sm_89
+CMMC
+
+BEGIN '--with-arch=sm_80', '--with-multilib-list=sm_80,sm_30'
+SMOID sm_80
+SMOIL sm_80 sm_30
+AEMM misa?sm_30=misa?sm_35
+AEMM misa?sm_30=misa?sm_37
+AEMM misa?sm_30=misa?sm_52
+AEMM misa?sm_30=misa?sm_53
+AEMM misa?sm_30=misa?sm_61
+AEMM misa?sm_30=misa?sm_70
+AEMM misa?sm_30=misa?sm_75
+AEMM .=misa?sm_80
+AEMM .=misa?sm_89
+CMMC
+
+BEGIN '--with-arch=sm_80', '--with-multilib-list=sm_80,sm_75'
+SMOID sm_80
+SMOIL sm_80 sm_75
+AEMM misa?sm_75=misa?sm_30
+AEMM misa?sm_75=misa?sm_35
+AEMM misa?sm_75=misa?sm_37
+AEMM misa?sm_75=misa?sm_52
+AEMM misa?sm_75=misa?sm_53
+AEMM misa?sm_75=misa?sm_61
+AEMM misa?sm_75=misa?sm_70
+AEMM .=misa?sm_80
+AEMM .=misa?sm_89
+CMMC
+
+
+BEGIN '--with-arch=sm_89', '--with-multilib-list=sm_89'
+SMOID sm_89
+SMOIL sm_89
+AEMM .=misa?sm_30
+AEMM .=misa?sm_35
+AEMM .=misa?sm_37
+AEMM .=misa?sm_52
+AEMM .=misa?sm_53
+AEMM .=misa?sm_61
+AEMM .=misa?sm_70
+AEMM .=misa?sm_75
+AEMM .=misa?sm_80
+AEMM .=misa?sm_89
+CMMC
+
+BEGIN '--with-arch=sm_89', '--with-multilib-list=sm_89,sm_52'
+SMOID sm_89
+SMOIL sm_89 sm_52
+AEMM misa?sm_52=misa?sm_30
+AEMM misa?sm_52=misa?sm_35
+AEMM misa?sm_52=misa?sm_37
+AEMM misa?sm_52=misa?sm_53
+AEMM misa?sm_52=misa?sm_61
+AEMM misa?sm_52=misa?sm_70
+AEMM misa?sm_52=misa?sm_75
+AEMM misa?sm_52=misa?sm_80
+AEMM .=misa?sm_89
+CMMC
+
+
+BEGIN '--with-arch=sm_89', '--with-multilib-list=sm_89,sm_30,sm_35,sm_37,sm_52,sm_53,sm_61,sm_70,sm_75,sm_80'
+SMOID sm_89
+SMOIL sm_89 sm_30 sm_35 sm_37 sm_52 sm_53 sm_61 sm_70 sm_75 sm_80
+AEMM .=misa?sm_89
+CMMC
diff --git a/gcc/config/nvptx/gen-multilib-matches.sh b/gcc/config/nvptx/gen-multilib-matches.sh
index 44c758c..40b9a39 100755
--- a/gcc/config/nvptx/gen-multilib-matches.sh
+++ b/gcc/config/nvptx/gen-multilib-matches.sh
@@ -2,7 +2,7 @@
 
 # Print nvptx 'MULTILIB_MATCHES'
 
-# Copyright (C) 2022-2024 Free Software Foundation, Inc.
+# Copyright (C) 2022-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
@@ -22,39 +22,166 @@
 
 set -e
 
-nvptx_sm_def="$1/nvptx-sm.def"
-multilib_options_isa_default=$2
-multilib_options_isa_list=$3
+
+nvptx_dir=$(dirname "$0")
+
+
+nvptx_sm_def="$nvptx_dir/nvptx-sm.def"
+gen_multilib_matches_tests="$nvptx_dir/gen-multilib-matches-tests"
 
 sms=$(grep ^NVPTX_SM $nvptx_sm_def | sed 's/.*(//;s/,.*//')
 
+
 # Every variant in 'sms' has to either be remapped to the default variant
 # ('.', which is always built), or does get built as non-default variant
-# ('misa=sm_SM'; thus not remapped), or has to be remapped to the "next lower"
-# variant that does get built.
-
-# The "lowest" variant has to be built.
-sm_next_lower=INVALID
-
-for sm in $sms; do
-    if [ x"sm_$sm" = x"$multilib_options_isa_default" ]; then
-	sm_map=.
-    elif expr " $multilib_options_isa_list " : ".* sm_$sm " > /dev/null; then
-	sm_map=
-    else
-	sm_map=$sm_next_lower
-    fi
-
-    if [ x"$sm_map" = x ]; then
-	sm_next_lower=$sm
-    else
-	# Output format as required for 'MULTILIB_MATCHES'.
-	if [ x"$sm_map" = x. ]; then
-	    echo ".=misa?sm_$sm"
+# ('misa=sm_SM'; thus not remapped), or gets remapped to a suitable variant,
+# typically the "next lower" one that does get built.  If no "next lower" one
+# does get built, then remap to the "lowest" one that does get built.  This
+# increases chances that the linked code is compatible with more GPU hardware
+# (backward compatibility).  For example, for GCC built '--with-arch=sm_80',
+# '--with-multilib-list=sm_53', only 'sm_53' and 'sm_80' target libraries get
+# built.  If now requesting a '-march=[...]' where no corresponding or "next
+# lower" variant of the target libraries have been built, GCC's default
+# behavior is to link in the default variant, 'sm_80'.  However, if compiling
+# user code with '-march=sm_35', for example, linking in the 'sm_53' variant is
+# supposedly more useful in terms of compatibility with GPU hardware.
+
+print_multilib_matches() {
+    local sms
+    sms=${1?}
+    shift
+    local multilib_options_isa_default
+    multilib_options_isa_default=${1?}
+    shift
+    local multilib_options_isa_list
+    multilib_options_isa_list=${1?}
+    shift
+    [ $# = 0 ]
+
+    local multilib_matches
+    multilib_matches=
+
+    # Determine the "lowest" variant that does get built.
+    local sm_next_lower
+    sm_next_lower=.
+    local sm
+    for sm in $sms; do
+	if [ x"sm_$sm" = x"$multilib_options_isa_default" ]; then
+	    continue
+	elif expr " $multilib_options_isa_list " : ".* sm_$sm " > /dev/null; then
+	    sm_next_lower=$sm
+	    break
+	fi
+    done
+
+    local sm
+    for sm in $sms; do
+	local sm_map
+	unset sm_map
+	if [ x"sm_$sm" = x"$multilib_options_isa_default" ]; then
+	    sm_map=.
+	elif expr " $multilib_options_isa_list " : ".* sm_$sm " > /dev/null; then
+	    sm_map=
+	else
+	    sm_map=$sm_next_lower
+	fi
+
+	if [ x"${sm_map?}" = x ]; then
+	    sm_next_lower=$sm
 	else
-	    echo "misa?sm_$sm_map=misa?sm_$sm"
+	    local multilib_matches_sm
+	    unset multilib_matches_sm
+	    # Output format as required for 'MULTILIB_MATCHES'.
+	    if [ x"$sm_map" = x. ]; then
+		multilib_matches_sm=".=misa?sm_$sm"
+	    else
+		multilib_matches_sm="misa?sm_$sm_map=misa?sm_$sm"
+	    fi
+	    multilib_matches="$multilib_matches ${multilib_matches_sm?}"
+
+	    sm_next_lower=$sm_map
 	fi
+    done
+
+    echo "$multilib_matches"
+}
+
+
+selftest() {
+    [ $# = 0 ]
+
+    local sms_default
+    sms_default=$sms
+
+    local name
+    unset name
+    local sms
+    unset sms
+    local multilib_options_isa_default
+    unset multilib_options_isa_default
+    local multilib_options_isa_list
+    unset multilib_options_isa_list
+    local multilib_matches_expected
+    unset multilib_matches_expected
+
+    local line
+    line=0
+    local f1 f2
+    unset f1 f2
+    while read -r f1 f2; do
+	line=$((line + 1))
+	case "$f1 $f2" in
+	    ' ' | '#'* )
+		:
+		;;
+	    'BEGIN '* )
+		name=$f2
+		sms=$sms_default
+		unset multilib_options_isa_default
+		unset multilib_options_isa_list
+		unset multilib_matches_expected
+		;;
+	    'SSMS '* )
+		sms=$f2
+		;;
+	    'SMOID '* )
+		multilib_options_isa_default=$f2
+		;;
+	    'SMOIL '* )
+		multilib_options_isa_list=$f2
+		;;
+	    'AEMM '* )
+		multilib_matches_expected="$multilib_matches_expected $f2"
+		;;
+	    'CMMC ' )
+		local multilib_matches
+		multilib_matches=$(print_multilib_matches "${sms?}" "${multilib_options_isa_default?}" "${multilib_options_isa_list?}")
+		if [ "$multilib_matches" = "$multilib_matches_expected" ]; then
+		    echo >&2 "$0": selftest PASS "${name?}" at "$gen_multilib_matches_tests:$line"
+		else
+		    echo >&2 "$0": selftest FAIL "${name?}" at "$gen_multilib_matches_tests:$line"
+		    echo >&2 expected:"$multilib_matches_expected"
+		    echo >&2 actual:"$multilib_matches"
+		    exit 1
+		fi
+		;;
+	    * )
+		echo >&2 "$0": selftest ERROR at "$gen_multilib_matches_tests:$line"
+		echo >&2 invalid directive: "$f1 $f2"
+		exit 1
+		;;
+	esac
+    done < "$gen_multilib_matches_tests"
+}
+
 
-	sm_next_lower=$sm_map
-    fi
-done
+case "${1?}" in
+    --selftest )
+	shift
+	selftest "$@"
+	:;;
+    * )
+	multilib_matches=$(print_multilib_matches "$sms" "$@")
+	echo "multilib_matches := $multilib_matches"
+	;;
+esac
diff --git a/gcc/config/nvptx/gen-omp-device-properties.sh b/gcc/config/nvptx/gen-omp-device-properties.sh
index 3666f97..f072a6d 100644
--- a/gcc/config/nvptx/gen-omp-device-properties.sh
+++ b/gcc/config/nvptx/gen-omp-device-properties.sh
@@ -1,6 +1,6 @@
 #!/bin/sh
 
-# Copyright (C) 2022-2024 Free Software Foundation, Inc.
+# Copyright (C) 2022-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
@@ -18,7 +18,11 @@
 # along with GCC; see the file COPYING3.  If not see
 # <http://www.gnu.org/licenses/>.
 
-nvptx_sm_def="$1/nvptx-sm.def"
+
+nvptx_dir=$(dirname "$0")
+
+
+nvptx_sm_def="$nvptx_dir/nvptx-sm.def"
 
 sms=$(grep ^NVPTX_SM $nvptx_sm_def | sed 's/.*(//;s/,.*//')
 
diff --git a/gcc/config/nvptx/gen-opt.sh b/gcc/config/nvptx/gen-opt.sh
index 3f78382..59edeb2 100644
--- a/gcc/config/nvptx/gen-opt.sh
+++ b/gcc/config/nvptx/gen-opt.sh
@@ -1,6 +1,6 @@
 #!/bin/sh
 
-# Copyright (C) 2022-2024 Free Software Foundation, Inc.
+# Copyright (C) 2022-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
@@ -18,8 +18,12 @@
 # along with GCC; see the file COPYING3.  If not see
 # <http://www.gnu.org/licenses/>.
 
-nvptx_sm_def="$1/nvptx-sm.def"
-gen_copyright_sh="$1/gen-copyright.sh"
+
+nvptx_dir=$(dirname "$0")
+
+
+nvptx_sm_def="$nvptx_dir/nvptx-sm.def"
+gen_copyright_sh="$nvptx_dir/gen-copyright.sh"
 
 sms=$(grep ^NVPTX_SM $nvptx_sm_def | sed 's/.*(//;s/,.*//')
 
@@ -38,12 +42,24 @@ echo
 
 . $gen_copyright_sh opt
 
+# Not emitting the following here (in addition to having it in 'nvptx.opt'), as
+# we'll otherwise run into:
+# 
+#     gtyp-input.list:10: file [...]/gcc/config/nvptx/nvptx-opts.h specified more than once for language (all)
+#     make[2]: *** [Makefile:2981: s-gtype] Error 1
+: ||
+cat <<EOF
+
+HeaderInclude
+config/nvptx/nvptx-opts.h
+EOF
+
 # Separator.
 echo
 
 cat <<EOF
 Enum
-Name(ptx_isa) Type(int)
+Name(ptx_isa) Type(enum ptx_isa)
 Known PTX ISA target architectures (for use with the -misa= option):
 EOF
 
diff --git a/gcc/config/nvptx/mkoffload.cc b/gcc/config/nvptx/mkoffload.cc
index 503b1ab..bb3f0fc 100644
--- a/gcc/config/nvptx/mkoffload.cc
+++ b/gcc/config/nvptx/mkoffload.cc
@@ -1,6 +1,6 @@
 /* Offload image generation tool for PTX.
 
-   Copyright (C) 2014-2024 Free Software Foundation, Inc.
+   Copyright (C) 2014-2025 Free Software Foundation, Inc.
 
    Contributed by Nathan Sidwell <nathan@codesourcery.com> and
    Bernd Schmidt <bernds@codesourcery.com>.
@@ -61,6 +61,7 @@ static const char *omp_requires_file;
 static const char *ptx_dumpbase;
 
 enum offload_abi offload_abi = OFFLOAD_ABI_UNSET;
+const char *offload_abi_host_opts = NULL;
 
 /* Delete tempfiles.  */
 
@@ -259,8 +260,10 @@ process (FILE *in, FILE *out, uint32_t omp_requires)
   unsigned ix;
   const char *sm_ver = NULL, *version = NULL;
   const char *sm_ver2 = NULL, *version2 = NULL;
-  size_t file_cnt = 0;
-  size_t *file_idx = XALLOCAVEC (size_t, len);
+  /* To reduce the number of reallocations for 'file_idx', guess 'file_cnt'
+     (very roughly...), based on 'len'.  */
+  const size_t file_cnt_guessed = 13 + len / 27720;
+  auto_vec<size_t> file_idx (file_cnt_guessed);
 
   fprintf (out, "#include <stdint.h>\n\n");
 
@@ -268,9 +271,10 @@ process (FILE *in, FILE *out, uint32_t omp_requires)
      terminated by a NUL.  */
   for (size_t i = 0; i != len;)
     {
+      file_idx.safe_push (i);
+
       char c;
       bool output_fn_ptr = false;
-      file_idx[file_cnt++] = i;
 
       fprintf (out, "static const char ptx_code_%u[] =\n\t\"", obj_count++);
       while ((c = input[i++]))
@@ -348,6 +352,9 @@ process (FILE *in, FILE *out, uint32_t omp_requires)
 	}
     }
 
+  const size_t file_cnt = file_idx.length ();
+  gcc_checking_assert (file_cnt == obj_count);
+
   /* Create function-pointer array, required for reverse
      offload function-pointer lookup.  */
 
@@ -607,17 +614,10 @@ compile_native (const char *infile, const char *outfile, const char *compiler,
   obstack_ptr_grow (&argv_obstack, ptx_dumpbase);
   obstack_ptr_grow (&argv_obstack, "-dumpbase-ext");
   obstack_ptr_grow (&argv_obstack, ".c");
-  switch (offload_abi)
-    {
-    case OFFLOAD_ABI_LP64:
-      obstack_ptr_grow (&argv_obstack, "-m64");
-      break;
-    case OFFLOAD_ABI_ILP32:
-      obstack_ptr_grow (&argv_obstack, "-m32");
-      break;
-    default:
-      gcc_unreachable ();
-    }
+  if (!offload_abi_host_opts)
+    fatal_error (input_location,
+		 "%<-foffload-abi-host-opts%> not specified.");
+  obstack_ptr_grow (&argv_obstack, offload_abi_host_opts);
   obstack_ptr_grow (&argv_obstack, infile);
   obstack_ptr_grow (&argv_obstack, "-c");
   obstack_ptr_grow (&argv_obstack, "-o");
@@ -721,6 +721,15 @@ main (int argc, char **argv)
 			 "unrecognizable argument of option " STR);
 	}
 #undef STR
+      else if (startswith (argv[i], "-foffload-abi-host-opts="))
+	{
+	  if (offload_abi_host_opts)
+	    fatal_error (input_location,
+			 "%<-foffload-abi-host-opts%> specified "
+			 "multiple times");
+	  offload_abi_host_opts
+	    = argv[i] + strlen ("-foffload-abi-host-opts=");
+	}
       else if (strcmp (argv[i], "-fopenmp") == 0)
 	fopenmp = true;
       else if (strcmp (argv[i], "-fopenacc") == 0)
@@ -738,7 +747,8 @@ main (int argc, char **argv)
 	dumppfx = argv[++i];
       /* Translate host into offloading libraries.  */
       else if (strcmp (argv[i], "-l_GCC_gfortran") == 0
-	       || strcmp (argv[i], "-l_GCC_m") == 0)
+	       || strcmp (argv[i], "-l_GCC_m") == 0
+	       || strcmp (argv[i], "-l_GCC_stdc++") == 0)
 	{
 	  /* Elide '_GCC_'.  */
 	  size_t i_dst = strlen ("-l");
@@ -774,6 +784,9 @@ main (int argc, char **argv)
     }
   if (fopenmp)
     obstack_ptr_grow (&argv_obstack, "-mgomp");
+  /* The host code may contain exception handling constructs.
+     Handle these as good as we can.  */
+  obstack_ptr_grow (&argv_obstack, "-mfake-exceptions");
 
   for (int ix = 1; ix != argc; ix++)
     {
diff --git a/gcc/config/nvptx/nvptx-c.cc b/gcc/config/nvptx/nvptx-c.cc
index 8538952..3b1272a 100644
--- a/gcc/config/nvptx/nvptx-c.cc
+++ b/gcc/config/nvptx/nvptx-c.cc
@@ -1,5 +1,5 @@
 /* Subroutines for the C front end on the NVPTX architecture.
- * Copyright (C) 2021-2024 Free Software Foundation, Inc.
+ * Copyright (C) 2021-2025 Free Software Foundation, Inc.
  *
  * This file is part of GCC.
  *
@@ -51,10 +51,8 @@ nvptx_cpu_cpp_builtins (void)
   cpp_define (parse_in, ptx_sm);
 
   {
-    unsigned major
-      = ptx_version_to_number ((ptx_version)ptx_version_option, true);
-    unsigned minor
-      = ptx_version_to_number ((ptx_version)ptx_version_option, false);
+    unsigned major = ptx_version_to_number (ptx_version_option, true);
+    unsigned minor = ptx_version_to_number (ptx_version_option, false);
     cpp_define_formatted (parse_in, "__PTX_ISA_VERSION_MAJOR__=%u", major);
     cpp_define_formatted (parse_in, "__PTX_ISA_VERSION_MINOR__=%u", minor);
   }
diff --git a/gcc/config/nvptx/nvptx-gen.h b/gcc/config/nvptx/nvptx-gen.h
index b244158..f5b9899 100644
--- a/gcc/config/nvptx/nvptx-gen.h
+++ b/gcc/config/nvptx/nvptx-gen.h
@@ -2,7 +2,7 @@
    Generated automatically by gen-h.sh from nvptx-sm.def.
 */
 
-/* Copyright (C) 2022-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2022-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -23,7 +23,11 @@
 
 #define TARGET_SM30 (ptx_isa_option >= PTX_ISA_SM30)
 #define TARGET_SM35 (ptx_isa_option >= PTX_ISA_SM35)
+#define TARGET_SM37 (ptx_isa_option >= PTX_ISA_SM37)
+#define TARGET_SM52 (ptx_isa_option >= PTX_ISA_SM52)
 #define TARGET_SM53 (ptx_isa_option >= PTX_ISA_SM53)
+#define TARGET_SM61 (ptx_isa_option >= PTX_ISA_SM61)
 #define TARGET_SM70 (ptx_isa_option >= PTX_ISA_SM70)
 #define TARGET_SM75 (ptx_isa_option >= PTX_ISA_SM75)
 #define TARGET_SM80 (ptx_isa_option >= PTX_ISA_SM80)
+#define TARGET_SM89 (ptx_isa_option >= PTX_ISA_SM89)
diff --git a/gcc/config/nvptx/nvptx-gen.opt b/gcc/config/nvptx/nvptx-gen.opt
index b097caf..bbae32d 100644
--- a/gcc/config/nvptx/nvptx-gen.opt
+++ b/gcc/config/nvptx/nvptx-gen.opt
@@ -1,7 +1,7 @@
 ; -*- buffer-read-only: t -*-
 ; Generated automatically by gen-opt.sh from nvptx-sm.def.
 
-; Copyright (C) 2022-2024 Free Software Foundation, Inc.
+; Copyright (C) 2022-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
@@ -20,7 +20,7 @@
 ; <http://www.gnu.org/licenses/>.
 
 Enum
-Name(ptx_isa) Type(int)
+Name(ptx_isa) Type(enum ptx_isa)
 Known PTX ISA target architectures (for use with the -misa= option):
 
 EnumValue
@@ -30,9 +30,18 @@ EnumValue
 Enum(ptx_isa) String(sm_35) Value(PTX_ISA_SM35)
 
 EnumValue
+Enum(ptx_isa) String(sm_37) Value(PTX_ISA_SM37)
+
+EnumValue
+Enum(ptx_isa) String(sm_52) Value(PTX_ISA_SM52)
+
+EnumValue
 Enum(ptx_isa) String(sm_53) Value(PTX_ISA_SM53)
 
 EnumValue
+Enum(ptx_isa) String(sm_61) Value(PTX_ISA_SM61)
+
+EnumValue
 Enum(ptx_isa) String(sm_70) Value(PTX_ISA_SM70)
 
 EnumValue
@@ -40,3 +49,6 @@ Enum(ptx_isa) String(sm_75) Value(PTX_ISA_SM75)
 
 EnumValue
 Enum(ptx_isa) String(sm_80) Value(PTX_ISA_SM80)
+
+EnumValue
+Enum(ptx_isa) String(sm_89) Value(PTX_ISA_SM89)
diff --git a/gcc/config/nvptx/nvptx-opts.h b/gcc/config/nvptx/nvptx-opts.h
index f897532..07bcd32 100644
--- a/gcc/config/nvptx/nvptx-opts.h
+++ b/gcc/config/nvptx/nvptx-opts.h
@@ -1,5 +1,5 @@
 /* Definitions for the NVPTX port needed for option handling.
-   Copyright (C) 2015-2024 Free Software Foundation, Inc.
+   Copyright (C) 2015-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -22,6 +22,7 @@
 
 enum ptx_isa
 {
+  PTX_ISA_unset,
 #define NVPTX_SM(XX, SEP) PTX_ISA_SM ## XX SEP
 #define NVPTX_SM_SEP ,
 #include "nvptx-sm.def"
@@ -29,15 +30,22 @@ enum ptx_isa
 #undef NVPTX_SM
 };
 
+/* 'PTX_VERSION_[...]'s smaller than 'PTX_VERSION_3_1' are not listed here:
+   our baseline is PTX ISA Version 3.1.  */
+
 enum ptx_version
 {
-  PTX_VERSION_default,
-  PTX_VERSION_3_0,
+  PTX_VERSION_unset,
+  PTX_VERSION_default = PTX_VERSION_unset,
   PTX_VERSION_3_1,
+  PTX_VERSION_4_1,
   PTX_VERSION_4_2,
+  PTX_VERSION_5_0,
   PTX_VERSION_6_0,
   PTX_VERSION_6_3,
-  PTX_VERSION_7_0
+  PTX_VERSION_7_0,
+  PTX_VERSION_7_3,
+  PTX_VERSION_7_8
 };
 
 #endif
diff --git a/gcc/config/nvptx/nvptx-protos.h b/gcc/config/nvptx/nvptx-protos.h
index 3fc86c1..ca1fed6 100644
--- a/gcc/config/nvptx/nvptx-protos.h
+++ b/gcc/config/nvptx/nvptx-protos.h
@@ -1,5 +1,5 @@
 /* Prototypes for exported functions defined in nvptx.cc.
-   Copyright (C) 2014-2024 Free Software Foundation, Inc.
+   Copyright (C) 2014-2025 Free Software Foundation, Inc.
    Contributed by Bernd Schmidt <bernds@codesourcery.com>
 
    This file is part of GCC.
@@ -55,6 +55,7 @@ extern rtx nvptx_expand_compare (rtx);
 extern const char *nvptx_ptx_type_from_mode (machine_mode, bool);
 extern const char *nvptx_output_mov_insn (rtx, rtx);
 extern const char *nvptx_output_call_insn (rtx_insn *, rtx, rtx);
+extern const char *nvptx_output_fake_ptx_alloca (void);
 extern const char *nvptx_output_return (void);
 extern const char *nvptx_output_set_softstack (unsigned);
 extern const char *nvptx_output_simt_enter (rtx, rtx, rtx);
diff --git a/gcc/config/nvptx/nvptx-sm.def b/gcc/config/nvptx/nvptx-sm.def
index 2f71777..9f9e864 100644
--- a/gcc/config/nvptx/nvptx-sm.def
+++ b/gcc/config/nvptx/nvptx-sm.def
@@ -1,4 +1,4 @@
-/* Copyright (C) 2022-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2022-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -22,9 +22,13 @@
 
 NVPTX_SM (30, NVPTX_SM_SEP)
 NVPTX_SM (35, NVPTX_SM_SEP)
+NVPTX_SM (37, NVPTX_SM_SEP)
+NVPTX_SM (52, NVPTX_SM_SEP)
 NVPTX_SM (53, NVPTX_SM_SEP)
+NVPTX_SM (61, NVPTX_SM_SEP)
 NVPTX_SM (70, NVPTX_SM_SEP)
 NVPTX_SM (75, NVPTX_SM_SEP)
-NVPTX_SM (80,)
+NVPTX_SM (80, NVPTX_SM_SEP)
+NVPTX_SM (89,)
 
 #undef NVPTX_SM_SEP
diff --git a/gcc/config/nvptx/nvptx.cc b/gcc/config/nvptx/nvptx.cc
index 2a8f713..a92a1e3 100644
--- a/gcc/config/nvptx/nvptx.cc
+++ b/gcc/config/nvptx/nvptx.cc
@@ -1,5 +1,5 @@
 /* Target code for NVPTX.
-   Copyright (C) 2014-2024 Free Software Foundation, Inc.
+   Copyright (C) 2014-2025 Free Software Foundation, Inc.
    Contributed by Bernd Schmidt <bernds@codesourcery.com>
 
    This file is part of GCC.
@@ -212,17 +212,24 @@ first_ptx_version_supporting_sm (enum ptx_isa sm)
   switch (sm)
     {
     case PTX_ISA_SM30:
-      return PTX_VERSION_3_0;
+      return /* PTX_VERSION_3_0 not defined */ PTX_VERSION_3_1;
     case PTX_ISA_SM35:
       return PTX_VERSION_3_1;
+    case PTX_ISA_SM37:
+    case PTX_ISA_SM52:
+      return PTX_VERSION_4_1;
     case PTX_ISA_SM53:
       return PTX_VERSION_4_2;
+    case PTX_ISA_SM61:
+      return PTX_VERSION_5_0;
     case PTX_ISA_SM70:
       return PTX_VERSION_6_0;
     case PTX_ISA_SM75:
       return PTX_VERSION_6_3;
     case PTX_ISA_SM80:
       return PTX_VERSION_7_0;
+    case PTX_ISA_SM89:
+      return PTX_VERSION_7_8;
     default:
       gcc_unreachable ();
     }
@@ -231,19 +238,22 @@ first_ptx_version_supporting_sm (enum ptx_isa sm)
 static enum ptx_version
 default_ptx_version_option (void)
 {
-  enum ptx_version first
-    = first_ptx_version_supporting_sm ((enum ptx_isa) ptx_isa_option);
+  enum ptx_version first = first_ptx_version_supporting_sm (ptx_isa_option);
 
   /* Pick a version that supports the sm.  */
   enum ptx_version res = first;
 
-  /* Pick at least 3.1.  This has been the smallest version historically.  */
-  res = MAX (res, PTX_VERSION_3_1);
-
   /* Pick at least 6.0, to enable using bar.warp.sync to have a way to force
      warp convergence.  */
   res = MAX (res, PTX_VERSION_6_0);
 
+  /* Pick at least 6.3, to enable PTX '.alias'.  */
+  res = MAX (res, PTX_VERSION_6_3);
+
+  /* For sm_52+, pick at least 7.3, to enable PTX 'alloca'.  */
+  if (ptx_isa_option >= PTX_ISA_SM52)
+    res = MAX (res, PTX_VERSION_7_3);
+
   /* Verify that we pick a version that supports the sm.  */
   gcc_assert (first <= res);
   return res;
@@ -254,18 +264,24 @@ ptx_version_to_string (enum ptx_version v)
 {
   switch (v)
     {
-    case PTX_VERSION_3_0:
-      return "3.0";
     case PTX_VERSION_3_1:
       return "3.1";
+    case PTX_VERSION_4_1:
+      return "4.1";
     case PTX_VERSION_4_2:
       return "4.2";
+    case PTX_VERSION_5_0:
+      return "5.0";
     case PTX_VERSION_6_0:
       return "6.0";
     case PTX_VERSION_6_3:
       return "6.3";
     case PTX_VERSION_7_0:
       return "7.0";
+    case PTX_VERSION_7_3:
+      return "7.3";
+    case PTX_VERSION_7_8:
+      return "7.8";
     default:
       gcc_unreachable ();
     }
@@ -276,18 +292,24 @@ ptx_version_to_number (enum ptx_version v, bool major_p)
 {
   switch (v)
     {
-    case PTX_VERSION_3_0:
-      return major_p ? 3 : 0;
     case PTX_VERSION_3_1:
       return major_p ? 3 : 1;
+    case PTX_VERSION_4_1:
+      return major_p ? 4 : 1;
     case PTX_VERSION_4_2:
       return major_p ? 4 : 2;
+    case PTX_VERSION_5_0:
+      return major_p ? 5 : 0;
     case PTX_VERSION_6_0:
       return major_p ? 6 : 0;
     case PTX_VERSION_6_3:
       return major_p ? 6 : 3;
     case PTX_VERSION_7_0:
       return major_p ? 7 : 0;
+    case PTX_VERSION_7_3:
+      return major_p ? 7 : 3;
+    case PTX_VERSION_7_8:
+      return major_p ? 7 : 8;
     default:
       gcc_unreachable ();
     }
@@ -311,20 +333,21 @@ sm_version_to_string (enum ptx_isa sm)
 static void
 handle_ptx_version_option (void)
 {
-  if (!OPTION_SET_P (ptx_version_option)
-      || ptx_version_option == PTX_VERSION_default)
+  if (!OPTION_SET_P (ptx_version_option))
+    gcc_checking_assert (ptx_version_option == PTX_VERSION_default);
+
+  if (ptx_version_option == PTX_VERSION_default)
     {
       ptx_version_option = default_ptx_version_option ();
       return;
     }
 
-  enum ptx_version first
-    = first_ptx_version_supporting_sm ((enum ptx_isa) ptx_isa_option);
+  enum ptx_version first = first_ptx_version_supporting_sm (ptx_isa_option);
 
   if (ptx_version_option < first)
     error ("PTX version (%<-mptx%>) needs to be at least %s to support selected"
 	   " %<-misa%> (sm_%s)", ptx_version_to_string (first),
-	   sm_version_to_string ((enum ptx_isa)ptx_isa_option));
+	   sm_version_to_string (ptx_isa_option));
 }
 
 /* Implement TARGET_OPTION_OVERRIDE.  */
@@ -336,7 +359,9 @@ nvptx_option_override (void)
 
   /* Via nvptx 'OPTION_DEFAULT_SPECS', '-misa' always appears on the command
      line; but handle the case that the compiler is not run via the driver.  */
-  if (!OPTION_SET_P (ptx_isa_option))
+  gcc_checking_assert ((ptx_isa_option == PTX_ISA_unset)
+		       == (!OPTION_SET_P (ptx_isa_option)));
+  if (ptx_isa_option == PTX_ISA_unset)
     fatal_error (UNKNOWN_LOCATION, "%<-march=%> must be specified");
 
   handle_ptx_version_option ();
@@ -451,9 +476,7 @@ nvptx_encode_section_info (tree decl, rtx rtl, int first)
     {
       nvptx_data_area area = DATA_AREA_GENERIC;
 
-      if (TREE_CONSTANT (decl))
-	area = DATA_AREA_CONST;
-      else if (VAR_P (decl))
+      if (VAR_P (decl))
 	{
 	  if (lookup_attribute ("shared", DECL_ATTRIBUTES (decl)))
 	    {
@@ -463,7 +486,7 @@ nvptx_encode_section_info (tree decl, rtx rtl, int first)
 		       " memory is not supported", decl);
 	    }
 	  else
-	    area = TREE_READONLY (decl) ? DATA_AREA_CONST : DATA_AREA_GLOBAL;
+	    area = DATA_AREA_GLOBAL;
 	}
 
       SET_SYMBOL_DATA_AREA (XEXP (rtl, 0), area);
@@ -594,7 +617,7 @@ nvptx_emit_forking (unsigned mask, bool is_call)
   if (mask)
     {
       rtx op = GEN_INT (mask | (is_call << GOMP_DIM_MAX));
-      
+
       /* Emit fork at all levels.  This helps form SESE regions, as
 	 it creates a block with a single successor before entering a
 	 partitooned region.  That is a good candidate for the end of
@@ -902,10 +925,10 @@ write_return_mode (std::stringstream &s, bool for_proto, machine_mode mode)
   const char *ptx_type = nvptx_ptx_type_from_mode (mode, false);
   const char *pfx = "\t.reg";
   const char *sfx = ";\n";
-  
+
   if (for_proto)
     pfx = "(.param", sfx = "_out) ";
-  
+
   s << pfx << ptx_type << " " << reg_names[NVPTX_RETURN_REGNUM] << sfx;
 }
 
@@ -928,7 +951,7 @@ write_return_type (std::stringstream &s, bool for_proto, tree type)
     {
       if (for_proto)
 	return return_in_mem;
-      
+
       /* Named return values can cause us to return a pointer as well
 	 as expect an argument for the return location.  This is
 	 optimization-level specific, so no caller can make use of
@@ -995,8 +1018,7 @@ static void
 write_fn_proto_1 (std::stringstream &s, bool is_defn,
 		  const char *name, const_tree decl, bool force_public)
 {
-  if (lookup_attribute ("alias", DECL_ATTRIBUTES (decl)) == NULL)
-    write_fn_marker (s, is_defn, TREE_PUBLIC (decl) || force_public, name);
+  write_fn_marker (s, is_defn, TREE_PUBLIC (decl) || force_public, name);
 
   /* PTX declaration.  */
   if (DECL_EXTERNAL (decl))
@@ -1055,7 +1077,7 @@ write_fn_proto_1 (std::stringstream &s, bool is_defn,
   for (; args; args = TREE_CHAIN (args), not_atomic_weak_arg--)
     {
       tree type = prototyped ? TREE_VALUE (args) : TREE_TYPE (args);
-      
+
       if (not_atomic_weak_arg)
 	argno = write_arg_type (s, -1, argno, type, prototyped);
       else
@@ -1205,7 +1227,7 @@ nvptx_record_libfunc (rtx callee, rtx retval, rtx pat)
    is prototyped, record it now.  Otherwise record it as needed at end
    of compilation, when we might have more information about it.  */
 
-void
+static void
 nvptx_record_needed_fndecl (tree decl)
 {
   if (TYPE_ARG_TYPES (TREE_TYPE (decl)) == NULL_TREE)
@@ -1225,7 +1247,7 @@ static void
 nvptx_maybe_record_fnsym (rtx sym)
 {
   tree decl = SYMBOL_REF_DECL (sym);
-  
+
   if (decl && TREE_CODE (decl) == FUNCTION_DECL && DECL_EXTERNAL (decl))
     nvptx_record_needed_fndecl (decl);
 }
@@ -1509,7 +1531,7 @@ nvptx_declare_function_name (FILE *file, const char *name, const_tree decl)
   bool return_in_mem = write_return_type (s, false, result_type);
   if (return_in_mem)
     argno = write_arg_type (s, 0, argno, ptr_type_node, true);
-  
+
   /* Declare and initialize incoming arguments.  */
   tree args = TYPE_ARG_TYPES (fntype);
   bool prototyped = true;
@@ -1743,6 +1765,27 @@ nvptx_output_set_softstack (unsigned src_regno)
     }
   return "";
 }
+
+/* Output a fake PTX 'alloca'.  */
+
+const char *
+nvptx_output_fake_ptx_alloca (void)
+{
+#define FAKE_PTX_ALLOCA_NAME "__GCC_nvptx__PTX_alloca_not_supported"
+  static tree decl;
+  if (!decl)
+    {
+      tree alloca_type = TREE_TYPE (builtin_decl_explicit (BUILT_IN_ALLOCA));
+      decl = build_decl (UNKNOWN_LOCATION, FUNCTION_DECL,
+			 get_identifier (FAKE_PTX_ALLOCA_NAME), alloca_type);
+      DECL_EXTERNAL (decl) = 1;
+      TREE_PUBLIC (decl) = 1;
+      nvptx_record_needed_fndecl (decl);
+    }
+  return "\tcall\t(%0), " FAKE_PTX_ALLOCA_NAME ", (%1);";
+#undef FAKE_PTX_ALLOCA_NAME
+}
+
 /* Output a return instruction.  Also copy the return value to its outgoing
    location.  */
 
@@ -1782,7 +1825,7 @@ nvptx_function_ok_for_sibcall (tree, tree)
 static rtx
 nvptx_get_drap_rtx (void)
 {
-  if (TARGET_SOFT_STACK && stack_realign_drap)
+  if (stack_realign_drap)
     return arg_pointer_rtx;
   return NULL_RTX;
 }
@@ -1900,7 +1943,7 @@ nvptx_expand_call (rtx retval, rtx address)
   if (varargs)
     XVECEXP (pat, 0, vec_pos++) = gen_rtx_USE (VOIDmode, varargs);
 
-  gcc_assert (vec_pos = XVECLEN (pat, 0));
+  gcc_assert (vec_pos == XVECLEN (pat, 0));
 
   nvptx_emit_forking (parallel, true);
   emit_call_insn (pat);
@@ -1944,7 +1987,7 @@ static rtx
 nvptx_gen_unpack (rtx dst0, rtx dst1, rtx src)
 {
   rtx res;
-  
+
   switch (GET_MODE (src))
     {
     case E_DImode:
@@ -1965,7 +2008,7 @@ static rtx
 nvptx_gen_pack (rtx dst, rtx src0, rtx src1)
 {
   rtx res;
-  
+
   switch (GET_MODE (dst))
     {
     case E_DImode:
@@ -2002,8 +2045,7 @@ nvptx_gen_shuffle (rtx dst, rtx src, rtx idx, nvptx_shuffle_kind kind)
 	  start_sequence ();
 	  emit_insn (nvptx_gen_shuffle (dst_real, src_real, idx, kind));
 	  emit_insn (nvptx_gen_shuffle (dst_imag, src_imag, idx, kind));
-	  res = get_insns ();
-	  end_sequence ();
+	  res = end_sequence ();
 	}
 	break;
     case E_SImode:
@@ -2023,8 +2065,7 @@ nvptx_gen_shuffle (rtx dst, rtx src, rtx idx, nvptx_shuffle_kind kind)
 	emit_insn (nvptx_gen_shuffle (tmp0, tmp0, idx, kind));
 	emit_insn (nvptx_gen_shuffle (tmp1, tmp1, idx, kind));
 	emit_insn (nvptx_gen_pack (dst, tmp0, tmp1));
-	res = get_insns ();
-	end_sequence ();
+	res = end_sequence ();
       }
       break;
     case E_V2SImode:
@@ -2042,8 +2083,7 @@ nvptx_gen_shuffle (rtx dst, rtx src, rtx idx, nvptx_shuffle_kind kind)
 	emit_insn (nvptx_gen_shuffle (tmp1, tmp1, idx, kind));
 	emit_insn (gen_movsi (dst0, tmp0));
 	emit_insn (gen_movsi (dst1, tmp1));
-	res = get_insns ();
-	end_sequence ();
+	res = end_sequence ();
       }
       break;
     case E_V2DImode:
@@ -2061,20 +2101,18 @@ nvptx_gen_shuffle (rtx dst, rtx src, rtx idx, nvptx_shuffle_kind kind)
 	emit_insn (nvptx_gen_shuffle (tmp1, tmp1, idx, kind));
 	emit_insn (gen_movdi (dst0, tmp0));
 	emit_insn (gen_movdi (dst1, tmp1));
-	res = get_insns ();
-	end_sequence ();
+	res = end_sequence ();
       }
       break;
     case E_BImode:
       {
 	rtx tmp = gen_reg_rtx (SImode);
-	
+
 	start_sequence ();
 	emit_insn (gen_sel_truesi (tmp, src, GEN_INT (1), const0_rtx));
 	emit_insn (nvptx_gen_shuffle (tmp, tmp, idx, kind));
 	emit_insn (gen_rtx_SET (dst, gen_rtx_NE (BImode, tmp, const0_rtx)));
-	res = get_insns ();
-	end_sequence ();
+	res = end_sequence ();
       }
       break;
     case E_QImode:
@@ -2087,11 +2125,10 @@ nvptx_gen_shuffle (rtx dst, rtx src, rtx idx, nvptx_shuffle_kind kind)
 	emit_insn (nvptx_gen_shuffle (tmp, tmp, idx, kind));
 	emit_insn (gen_rtx_SET (dst, gen_rtx_fmt_e (TRUNCATE, GET_MODE (dst),
 						    tmp)));
-	res = get_insns ();
-	end_sequence ();
+	res = end_sequence ();
       }
       break;
-      
+
     default:
       gcc_unreachable ();
     }
@@ -2131,7 +2168,7 @@ enum propagate_mask
 /* Generate instruction(s) to spill or fill register REG to/from the
    worker broadcast array.  PM indicates what is to be done, REP
    how many loop iterations will be executed (0 for not a loop).  */
-   
+
 static rtx
 nvptx_gen_shared_bcast (rtx reg, propagate_mask pm, unsigned rep,
 			broadcast_data_t *data, bool vector)
@@ -2144,15 +2181,14 @@ nvptx_gen_shared_bcast (rtx reg, propagate_mask pm, unsigned rep,
     case E_BImode:
       {
 	rtx tmp = gen_reg_rtx (SImode);
-	
+
 	start_sequence ();
 	if (pm & PM_read)
 	  emit_insn (gen_sel_truesi (tmp, reg, GEN_INT (1), const0_rtx));
 	emit_insn (nvptx_gen_shared_bcast (tmp, pm, rep, data, vector));
 	if (pm & PM_write)
 	  emit_insn (gen_rtx_SET (reg, gen_rtx_NE (BImode, tmp, const0_rtx)));
-	res = get_insns ();
-	end_sequence ();
+	res = end_sequence ();
       }
       break;
 
@@ -2171,7 +2207,7 @@ nvptx_gen_shared_bcast (rtx reg, propagate_mask pm, unsigned rep,
 	    if (data->offset)
 	      addr = gen_rtx_PLUS (Pmode, addr, GEN_INT (data->offset));
 	  }
-	
+
 	addr = gen_rtx_MEM (mode, addr);
 	if (pm == PM_read)
 	  res = gen_rtx_SET (addr, reg);
@@ -2184,12 +2220,11 @@ nvptx_gen_shared_bcast (rtx reg, propagate_mask pm, unsigned rep,
 	  {
 	    /* We're using a ptr, increment it.  */
 	    start_sequence ();
-	    
+
 	    emit_insn (res);
 	    emit_insn (gen_adddi3 (data->ptr, data->ptr,
 				   GEN_INT (GET_MODE_SIZE (GET_MODE (reg)))));
-	    res = get_insns ();
-	    end_sequence ();
+	    res = end_sequence ();
 	  }
 	else
 	  rep = 1;
@@ -2240,6 +2275,7 @@ static struct
 					out.  */
   unsigned size;  /* Fragment size to accumulate.  */
   unsigned offset;  /* Offset within current fragment.  */
+  bool active; /* Whether this machinery is active.  */
   bool started;   /* Whether we've output any initializer.  */
 } init_frag;
 
@@ -2250,6 +2286,8 @@ static struct
 static void
 output_init_frag (rtx sym)
 {
+  gcc_checking_assert (init_frag.active);
+
   fprintf (asm_out_file, init_frag.started ? ", " : " = { ");
   unsigned HOST_WIDE_INT val = init_frag.val;
 
@@ -2257,7 +2295,7 @@ output_init_frag (rtx sym)
   init_frag.val = 0;
   init_frag.offset = 0;
   init_frag.remaining--;
-  
+
   if (sym)
     {
       bool function = (SYMBOL_REF_DECL (sym)
@@ -2281,6 +2319,8 @@ output_init_frag (rtx sym)
 static void
 nvptx_assemble_value (unsigned HOST_WIDE_INT val, unsigned size)
 {
+  gcc_checking_assert (init_frag.active);
+
   bool negative_p
     = val & (HOST_WIDE_INT_1U << (HOST_BITS_PER_WIDE_INT - 1));
 
@@ -2313,6 +2353,33 @@ nvptx_assemble_value (unsigned HOST_WIDE_INT val, unsigned size)
 static bool
 nvptx_assemble_integer (rtx x, unsigned int size, int ARG_UNUSED (aligned_p))
 {
+  if (in_section == exception_section)
+    {
+      gcc_checking_assert (!init_frag.active);
+      /* Just use the default machinery; it's not getting used, anyway.  */
+      bool ok = default_assemble_integer (x, size, aligned_p);
+      /* ..., but a few cases need special handling.  */
+      switch (GET_CODE (x))
+	{
+	case SYMBOL_REF:
+	  /* The default machinery won't work: we don't define the necessary
+	     operations; don't use them outside of this.  */
+	  gcc_checking_assert (!ok);
+	  {
+	    /* Just emit something; it's not getting used, anyway.  */
+	    const char *op = "\t.symbol_ref\t";
+	    ok = (assemble_integer_with_op (op, x), true);
+	  }
+	  break;
+
+	default:
+	  break;
+	}
+      return ok;
+    }
+
+  gcc_checking_assert (init_frag.active);
+
   HOST_WIDE_INT val = 0;
 
   switch (GET_CODE (x))
@@ -2355,6 +2422,17 @@ nvptx_assemble_integer (rtx x, unsigned int size, int ARG_UNUSED (aligned_p))
 void
 nvptx_output_skip (FILE *, unsigned HOST_WIDE_INT size)
 {
+  gcc_checking_assert (in_section == data_section
+		       || in_section == text_section);
+
+  if (!init_frag.active)
+    /* We're in the 'data_section' or 'text_section', outside of an
+       initializer context ('init_frag').  There's nothing to do here:
+       in PTX, there's no concept of an assembler's "location counter",
+       "current address", "dot symbol" ('.') that might need padding or
+       aligning.  */
+    return;
+
   /* Finish the current fragment, if it's started.  */
   if (init_frag.offset)
     {
@@ -2376,6 +2454,12 @@ nvptx_output_skip (FILE *, unsigned HOST_WIDE_INT size)
       if (size)
 	nvptx_assemble_value (0, size);
     }
+  else
+    /* Otherwise, we don't have to do anything: this skip terminates the
+       initializer; we skip either the full ('!init_frag.started' case) or the
+       remainder ('init_frag.started' case) of the initializer (that is, either
+       no or incomplete initializer).  */
+    gcc_checking_assert (size == init_frag.remaining * init_frag.size);
 }
 
 /* Output a string STR with length SIZE.  As in nvptx_output_skip we
@@ -2425,6 +2509,8 @@ nvptx_assemble_decl_begin (FILE *file, const char *name, const char *section,
 			   const_tree type, HOST_WIDE_INT size, unsigned align,
 			   bool undefined = false)
 {
+  gcc_checking_assert (!init_frag.active);
+
   bool atype = (TREE_CODE (type) == ARRAY_TYPE)
     && (TYPE_DOMAIN (type) == NULL_TREE);
 
@@ -2452,6 +2538,8 @@ nvptx_assemble_decl_begin (FILE *file, const char *name, const char *section,
   elt_size |= GET_MODE_SIZE (elt_mode);
   elt_size &= -elt_size; /* Extract LSB set.  */
 
+  init_frag.active = true;
+
   init_frag.size = elt_size;
   /* Avoid undefined shift behavior by using '2'.  */
   init_frag.mask = ((unsigned HOST_WIDE_INT)2
@@ -2483,10 +2571,14 @@ nvptx_assemble_decl_begin (FILE *file, const char *name, const char *section,
 static void
 nvptx_assemble_decl_end (void)
 {
+  gcc_checking_assert (init_frag.active);
+
   if (init_frag.offset)
     /* This can happen with a packed struct with trailing array member.  */
     nvptx_assemble_value (0, init_frag.size - init_frag.offset);
   fprintf (asm_out_file, init_frag.started ? " };\n" : ";\n");
+
+  init_frag.active = false;
 }
 
 /* Output an uninitialized common or file-scope variable.  */
@@ -2519,7 +2611,7 @@ nvptx_asm_declare_constant_name (FILE *file, const char *name,
   fprintf (file, "\t");
 
   tree type = TREE_TYPE (exp);
-  nvptx_assemble_decl_begin (file, name, ".const", type, obj_size,
+  nvptx_assemble_decl_begin (file, name, ".global", type, obj_size,
 			     TYPE_ALIGN (type));
 }
 
@@ -2738,7 +2830,7 @@ nvptx_output_call_insn (rtx_insn *insn, rtx result, rtx callee)
   fprintf (asm_out_file, "\t\tcall ");
   if (result != NULL_RTX)
     fprintf (asm_out_file, "(%s_in), ", reg_names[NVPTX_RETURN_REGNUM]);
-  
+
   if (decl)
     {
       char *replaced_dots = NULL;
@@ -3000,7 +3092,7 @@ nvptx_print_operand (FILE *file, rtx x, int code)
       {
 	nvptx_shuffle_kind kind = (nvptx_shuffle_kind) UINTVAL (x);
 	/* Same order as nvptx_shuffle_kind.  */
-	static const char *const kinds[] = 
+	static const char *const kinds[] =
 	  {".up", ".down", ".bfly", ".idx"};
 	fputs (kinds[kind], file);
       }
@@ -3495,7 +3587,7 @@ struct parallel
 {
   /* Parent parallel.  */
   parallel *parent;
-  
+
   /* Next sibling parallel.  */
   parallel *next;
 
@@ -3539,7 +3631,7 @@ parallel::parallel (parallel *parent_, unsigned mask_)
   forked_block = join_block = 0;
   forked_insn = join_insn = 0;
   fork_insn = joining_insn = 0;
-  
+
   if (parent)
     {
       next = parent->inner;
@@ -3627,7 +3719,7 @@ nvptx_split_blocks (bb_insn_map_t *map)
 	  block = elt->second;
 	  remap = block;
 	}
-      
+
       /* Split block before insn. The insn is in the new block  */
       edge e = split_block (block, PREV_INSN (elt->first));
 
@@ -3799,7 +3891,7 @@ nvptx_discover_pars (bb_insn_map_t *map)
       nvptx_dump_pars (par, 0);
       fprintf (dump_file, "\n");
     }
-  
+
   return par;
 }
 
@@ -3830,7 +3922,7 @@ nvptx_discover_pars (bb_insn_map_t *map)
    the node itself and one for the output edges.  Such back edges are
    referred to as 'Brackets'.  Cycle equivalent nodes will have the
    same set of brackets.
-   
+
    Determining bracket equivalency is done by maintaining a list of
    brackets in such a manner that the list length and final bracket
    uniquely identify the set.
@@ -3840,7 +3932,7 @@ nvptx_discover_pars (bb_insn_map_t *map)
    algorithm.  Notice it doesn't actually find the set of nodes within
    a particular region, just unorderd sets of nodes that are the
    entries and exits of SESE regions.
-   
+
    After determining cycle equivalency, we need to find the minimal
    set of SESE regions.  Do this with a DFS coloring walk of the
    complete graph.  We're either 'looking' or 'coloring'.  When
@@ -3931,7 +4023,7 @@ struct bb_sese
 	       back.first ? back.first->index : 0, back.second);
     brackets.safe_push (bracket (back));
   }
-  
+
   void append (bb_sese *child);
   void remove (const pseudo_node_t &);
 
@@ -4019,10 +4111,10 @@ nvptx_sese_number (int n, int p, int dir, basic_block b,
   if (dump_file)
     fprintf (dump_file, "Block %d(%d), parent (%d), orientation %+d\n",
 	     b->index, n, p, dir);
-  
+
   BB_SET_SESE (b, new bb_sese (n, p, dir));
   p = n;
-      
+
   n += 3;
   list->quick_push (b);
 
@@ -4039,7 +4131,7 @@ nvptx_sese_number (int n, int p, int dir, basic_block b,
       FOR_EACH_EDGE (e, ei, edges)
 	{
 	  basic_block target = *(basic_block *)((char *)e + offset);
-	  
+
 	  if (target->flags & BB_VISITED)
 	    n = nvptx_sese_number (n, p, dir, target, list);
 	}
@@ -4117,7 +4209,7 @@ nvptx_sese_pseudo (basic_block me, bb_sese *sese, int depth, int dir,
 	      /* Non-parental ancestor node -- a backlink.  */
 	      int d = usd * t_sese->dir;
 	      int back = t_sese->node + d;
-	
+
 	      if (hi_back > back)
 		{
 		  hi_back = back;
@@ -4152,7 +4244,7 @@ nvptx_sese_pseudo (basic_block me, bb_sese *sese, int depth, int dir,
 	  sese->push (pseudo_node_t (nullptr, 0));
 	}
     }
-  
+
  /* If this node leads directly or indirectly to a no-return region of
      the graph, then fake a backedge to entry node.  */
   if (!sese->brackets.length () || !edges || !edges->length ())
@@ -4209,7 +4301,7 @@ nvptx_sese_pseudo (basic_block me, bb_sese *sese, int depth, int dir,
 	      node_child = t_sese->high;
 	    }
 	}
-      
+
       sese->push (node_child);
     }
 }
@@ -4232,7 +4324,7 @@ nvptx_sese_color (auto_vec<unsigned> &color_counts, bb_pair_vec_t &regions,
       gcc_assert (coloring < 0 || (sese && coloring == sese->color));
       return;
     }
-  
+
   block->flags |= BB_VISITED;
 
   if (sese)
@@ -4264,7 +4356,7 @@ nvptx_sese_color (auto_vec<unsigned> &color_counts, bb_pair_vec_t &regions,
     {
       edge e;
       edge_iterator ei;
-      
+
       FOR_EACH_EDGE (e, ei, block->succs)
 	nvptx_sese_color (color_counts, regions, e->dest, coloring);
     }
@@ -4281,7 +4373,7 @@ nvptx_find_sese (auto_vec<basic_block> &blocks, bb_pair_vec_t &regions)
   basic_block block;
   int ix;
 
-  /* First clear each BB of the whole function.  */ 
+  /* First clear each BB of the whole function.  */
   FOR_ALL_BB_FN (block, cfun)
     {
       block->flags &= ~BB_VISITED;
@@ -4312,7 +4404,7 @@ nvptx_find_sese (auto_vec<basic_block> &blocks, bb_pair_vec_t &regions)
 
       if (dump_file)
 	fprintf (dump_file, "Searching graph starting at %d\n", block->index);
-      
+
       /* Number the nodes reachable from block initial DFS order.  */
       int depth = nvptx_sese_number (2, 0, +1, block, &spanlist);
 
@@ -4342,7 +4434,7 @@ nvptx_find_sese (auto_vec<basic_block> &blocks, bb_pair_vec_t &regions)
     {
       unsigned count;
       const char *comma = "";
-      
+
       fprintf (dump_file, "Found %d cycle equivalents\n",
 	       color_counts.length ());
       for (ix = 0; color_counts.iterate (ix, &count); ix++)
@@ -4362,7 +4454,7 @@ nvptx_find_sese (auto_vec<basic_block> &blocks, bb_pair_vec_t &regions)
 	}
       fprintf (dump_file, "\n");
    }
-  
+
   /* Now we've colored every block in the subgraph.  We now need to
      determine the minimal set of SESE regions that cover that
      subgraph.  Do this with a DFS walk of the complete function.
@@ -4384,7 +4476,7 @@ nvptx_find_sese (auto_vec<basic_block> &blocks, bb_pair_vec_t &regions)
     {
       const char *comma = "";
       int len = regions.length ();
-      
+
       fprintf (dump_file, "SESE regions:");
       for (ix = 0; ix != len; ix++)
 	{
@@ -4414,7 +4506,7 @@ nvptx_find_sese (auto_vec<basic_block> &blocks, bb_pair_vec_t &regions)
 	}
       fprintf (dump_file, "\n\n");
     }
-  
+
   for (ix = 0; blocks.iterate (ix, &block); ix++)
     delete BB_GET_SESE (block);
 }
@@ -4476,7 +4568,7 @@ nvptx_propagate (bool is_call, basic_block block, rtx_insn *insn,
 	  idx = gen_reg_rtx (SImode);
 	  pred = gen_reg_rtx (BImode);
 	  label = gen_label_rtx ();
-	  
+
 	  emit_insn (gen_rtx_SET (idx, GEN_INT (fs)));
 	  /* Allow worker function to initialize anything needed.  */
 	  rtx init = fn (tmp, PM_loop_begin, fs, data, vector);
@@ -4503,8 +4595,7 @@ nvptx_propagate (bool is_call, basic_block block, rtx_insn *insn,
 	}
       emit_insn (gen_rtx_CLOBBER (GET_MODE (tmp), tmp));
       emit_insn (gen_rtx_CLOBBER (GET_MODE (ptr), ptr));
-      rtx cpy = get_insns ();
-      end_sequence ();
+      rtx cpy = end_sequence ();
       insn = emit_insn_after (cpy, insn);
     }
 
@@ -4534,7 +4625,7 @@ warp_prop_gen (rtx reg, propagate_mask pm,
 {
   if (!(pm & PM_read_write))
     return 0;
-  
+
   return nvptx_gen_warp_bcast (reg);
 }
 
@@ -4796,7 +4887,7 @@ verify_neutering_labels (basic_block to, rtx_insn *vector_label,
 /* Single neutering according to MASK.  FROM is the incoming block and
    TO is the outgoing block.  These may be the same block. Insert at
    start of FROM:
-   
+
      if (tid.<axis>) goto end.
 
    and insert before ending branch of TO (if there is such an insn):
@@ -5165,7 +5256,7 @@ nvptx_process_pars (parallel *par)
 {
   if (nvptx_optimize)
     nvptx_optimize_inner (par);
-  
+
   unsigned inner_mask = par->mask;
 
   /* Do the inner parallels first.  */
@@ -5231,7 +5322,7 @@ nvptx_neuter_pars (parallel *par, unsigned modes, unsigned outer)
 		 & (GOMP_DIM_MASK (GOMP_DIM_WORKER)
 		    | GOMP_DIM_MASK (GOMP_DIM_VECTOR)));
   unsigned  skip_mask = 0, neuter_mask = 0;
-  
+
   if (par->inner)
     nvptx_neuter_pars (par->inner, modes, outer | me);
 
@@ -5292,7 +5383,7 @@ nvptx_neuter_pars (parallel *par, unsigned modes, unsigned outer)
 
   if (skip_mask)
     nvptx_skip_par (skip_mask, par);
-  
+
   if (par->next)
     nvptx_neuter_pars (par->next, modes, outer);
 }
@@ -5509,8 +5600,7 @@ workaround_uninit_method_1 (void)
       if (nvptx_comment && first != NULL)
 	emit_insn (gen_comment ("Start: Added by -minit-regs=1"));
       emit_move_insn (reg, CONST0_RTX (GET_MODE (reg)));
-      rtx_insn *inits = get_insns ();
-      end_sequence ();
+      rtx_insn *inits = end_sequence ();
 
       if (dump_file && (dump_flags & TDF_DETAILS))
 	for (rtx_insn *init = inits; init != NULL; init = NEXT_INSN (init))
@@ -5566,8 +5656,7 @@ workaround_uninit_method_2 (void)
       if (nvptx_comment && first != NULL)
 	emit_insn (gen_comment ("Start: Added by -minit-regs=2:"));
       emit_move_insn (reg, CONST0_RTX (GET_MODE (reg)));
-      rtx_insn *inits = get_insns ();
-      end_sequence ();
+      rtx_insn *inits = end_sequence ();
 
       if (dump_file && (dump_flags & TDF_DETAILS))
 	for (rtx_insn *init = inits; init != NULL; init = NEXT_INSN (init))
@@ -5637,8 +5726,7 @@ workaround_uninit_method_3 (void)
 
 	      start_sequence ();
 	      emit_move_insn (reg, CONST0_RTX (GET_MODE (reg)));
-	      rtx_insn *inits = get_insns ();
-	      end_sequence ();
+	      rtx_insn *inits = end_sequence ();
 
 	      if (dump_file && (dump_flags & TDF_DETAILS))
 		for (rtx_insn *init = inits; init != NULL;
@@ -5669,8 +5757,7 @@ workaround_uninit_method_3 (void)
 	    emit_insn (gen_comment ("Start: Added by -minit-regs=3:"));
 	    emit_insn (e->insns.r);
 	    emit_insn (gen_comment ("End: Added by -minit-regs=3:"));
-	    e->insns.r = get_insns ();
-	    end_sequence ();
+	    e->insns.r = end_sequence ();
 	  }
       }
 
@@ -5735,7 +5822,7 @@ nvptx_reorg (void)
 
   if (dump_file)
     df_dump (dump_file);
-  
+
   /* Mark unused regs as unused.  */
   int max_regs = max_reg_num ();
   for (int i = LAST_VIRTUAL_REGISTER + 1; i < max_regs; i++)
@@ -5944,6 +6031,55 @@ nvptx_record_offload_symbol (tree decl)
     }
 }
 
+/* Fake "sections support", just for 'exception_section'.  */
+
+static void
+nvptx_output_section_asm_op (const char *directive)
+{
+  static char prev_section = '?';
+
+  gcc_checking_assert (directive[1] == '\0');
+  const char new_section = directive[0];
+  gcc_checking_assert (new_section != prev_section);
+  switch (new_section)
+    {
+    case 'T':
+    case 'D':
+      if (prev_section == 'E')
+	/* We're leaving the 'exception_section'.  End the comment block.  */
+	fprintf (asm_out_file, "\tEND '.gcc_except_table' */\n");
+      break;
+    case 'E':
+      /* We're entering the 'exception_section'.  Put into a comment block
+	 whatever GCC decides to emit.  We assume:
+           - No nested comment blocks.
+           - Going to leave 'exception_section' before end of file.
+	 Should any of these ever get violated, this will result in PTX-level
+	 syntax errors.  */
+      fprintf (asm_out_file, "\t/* BEGIN '.gcc_except_table'\n");
+      break;
+    default:
+      gcc_unreachable ();
+    }
+  prev_section = new_section;
+}
+
+static void
+nvptx_asm_init_sections (void)
+{
+  /* Like '#define TEXT_SECTION_ASM_OP ""', just with different 'callback'.  */
+  text_section = get_unnamed_section (SECTION_CODE,
+				      nvptx_output_section_asm_op, "T");
+  /* Like '#define DATA_SECTION_ASM_OP ""', just with different 'callback'.  */
+  data_section = get_unnamed_section (SECTION_WRITE,
+				      nvptx_output_section_asm_op, "D");
+  /* In order to later be able to recognize the 'exception_section', we set it
+     up here, instead of letting 'gcc/except.cc:switch_to_exception_section'
+     pick the 'data_section'.  */
+  exception_section = get_unnamed_section (0,
+					   nvptx_output_section_asm_op, "E");
+}
+
 /* Implement TARGET_ASM_FILE_START.  Write the kinds of things ptxas expects
    at the start of a file.  */
 
@@ -5953,13 +6089,11 @@ nvptx_file_start (void)
   fputs ("// BEGIN PREAMBLE\n", asm_out_file);
 
   fputs ("\t.version\t", asm_out_file);
-  fputs (ptx_version_to_string ((enum ptx_version)ptx_version_option),
-	 asm_out_file);
+  fputs (ptx_version_to_string (ptx_version_option), asm_out_file);
   fputs ("\n", asm_out_file);
 
   fputs ("\t.target\tsm_", asm_out_file);
-  fputs (sm_version_to_string ((enum ptx_isa)ptx_isa_option),
-	 asm_out_file);
+  fputs (sm_version_to_string (ptx_isa_option), asm_out_file);
   fputs ("\n", asm_out_file);
 
   fprintf (asm_out_file, "\t.address_size %d\n", GET_MODE_BITSIZE (Pmode));
@@ -6031,7 +6165,7 @@ nvptx_expand_shuffle (tree exp, rtx target, machine_mode mode, int ignore)
 {
   if (ignore)
     return target;
-  
+
   rtx src = expand_expr (CALL_EXPR_ARG (exp, 0),
 			 NULL_RTX, mode, EXPAND_NORMAL);
   if (!REG_P (src))
@@ -6041,7 +6175,7 @@ nvptx_expand_shuffle (tree exp, rtx target, machine_mode mode, int ignore)
 			 NULL_RTX, SImode, EXPAND_NORMAL);
   rtx op = expand_expr (CALL_EXPR_ARG  (exp, 2),
 			NULL_RTX, SImode, EXPAND_NORMAL);
-  
+
   if (!REG_P (idx) && GET_CODE (idx) != CONST_INT)
     idx = copy_to_mode_reg (SImode, idx);
 
@@ -6060,7 +6194,7 @@ nvptx_expand_brev (tree exp, rtx target, machine_mode mode, int ignore)
 {
   if (ignore)
     return target;
-  
+
   rtx arg = expand_expr (CALL_EXPR_ARG (exp, 0),
 			 NULL_RTX, mode, EXPAND_NORMAL);
   if (!REG_P (arg))
@@ -6150,7 +6284,7 @@ nvptx_expand_cmp_swap (tree exp, rtx target,
 		       machine_mode ARG_UNUSED (m), int ARG_UNUSED (ignore))
 {
   machine_mode mode = TYPE_MODE (TREE_TYPE (exp));
-  
+
   if (!target)
     target = gen_reg_rtx (mode);
 
@@ -6167,7 +6301,7 @@ nvptx_expand_cmp_swap (tree exp, rtx target,
     cmp = copy_to_mode_reg (mode, cmp);
   if (!REG_P (src))
     src = copy_to_mode_reg (mode, src);
-  
+
   if (mode == SImode)
     pat = gen_atomic_compare_and_swapsi_1 (target, mem, cmp, src, const0_rtx);
   else
@@ -6747,7 +6881,7 @@ nvptx_generate_vector_shuffle (location_t loc,
       fn = NVPTX_BUILTIN_SHUFFLELL;
       arg_type = long_long_unsigned_type_node;
     }
-  
+
   tree call = nvptx_builtin_decl (fn, true);
   tree bits = build_int_cst (unsigned_type_node, shift);
   tree kind = build_int_cst (unsigned_type_node, SHUFFLE_DOWN);
@@ -6784,7 +6918,7 @@ static tree
 nvptx_global_lock_addr ()
 {
   tree v = global_lock_var;
-  
+
   if (!v)
     {
       tree name = get_identifier ("__reduction_lock");
@@ -6847,7 +6981,7 @@ nvptx_lockless_update (location_t loc, gimple_stmt_iterator *gsi,
   gimple *init_end = gimple_seq_last (init_seq);
 
   gsi_insert_seq_before (gsi, init_seq, GSI_SAME_STMT);
-  
+
   /* Split the block just after the init stmts.  */
   basic_block pre_bb = gsi_bb (*gsi);
   edge pre_edge = split_block (pre_bb, init_end);
@@ -6859,7 +6993,7 @@ nvptx_lockless_update (location_t loc, gimple_stmt_iterator *gsi,
   tree expect_var = make_ssa_name (arg_type);
   tree actual_var = make_ssa_name (arg_type);
   tree write_var = make_ssa_name (arg_type);
-  
+
   /* Build and insert the reduction calculation.  */
   gimple_seq red_seq = NULL;
   tree write_expr = fold_build1 (code, var_type, expect_var);
@@ -6961,7 +7095,7 @@ nvptx_lockfull_update (location_t loc, gimple_stmt_iterator *gsi,
   basic_block update_bb = locked_edge->dest;
   lock_bb = locked_edge->src;
   *gsi = gsi_for_stmt (gsi_stmt (*gsi));
-  
+
   /* Create the lock loop ... */
   locked_edge->flags ^= EDGE_TRUE_VALUE | EDGE_FALLTHRU;
   locked_edge->probability = profile_probability::even ();
@@ -6993,11 +7127,11 @@ nvptx_lockfull_update (location_t loc, gimple_stmt_iterator *gsi,
   tree ref_in = build_simple_mem_ref (ptr);
   TREE_THIS_VOLATILE (ref_in) = 1;
   gimplify_assign (acc_in, ref_in, &red_seq);
-  
+
   tree acc_out = make_ssa_name (var_type);
   tree update_expr = fold_build2 (op, var_type, ref_in, var);
   gimplify_assign (acc_out, update_expr, &red_seq);
-  
+
   tree ref_out = build_simple_mem_ref (ptr);
   TREE_THIS_VOLATILE (ref_out) = 1;
   gimplify_assign (ref_out, acc_out, &red_seq);
@@ -7060,7 +7194,7 @@ nvptx_goacc_reduction_setup (gcall *call, offload_attrs *oa)
       if (!integer_zerop (ref_to_res))
 	var = build_simple_mem_ref (ref_to_res);
     }
-  
+
   if (level == GOMP_DIM_WORKER
       || (level == GOMP_DIM_VECTOR && oa->vector_length > PTX_WARP_SIZE))
     {
@@ -7097,7 +7231,7 @@ nvptx_goacc_reduction_init (gcall *call, offload_attrs *oa)
   tree init = omp_reduction_init_op (gimple_location (call), rcode,
 				     TREE_TYPE (var));
   gimple_seq seq = NULL;
-  
+
   push_gimplify_context (true);
 
   if (level == GOMP_DIM_VECTOR && oa->vector_length == PTX_WARP_SIZE)
@@ -7122,7 +7256,7 @@ nvptx_goacc_reduction_init (gcall *call, offload_attrs *oa)
       /* Fixup flags from call_bb to init_bb.  */
       init_edge->flags ^= EDGE_FALLTHRU | EDGE_TRUE_VALUE;
       init_edge->probability = profile_probability::even ();
-      
+
       /* Set the initialization stmts.  */
       gimple_seq init_seq = NULL;
       tree init_var = make_ssa_name (TREE_TYPE (var));
@@ -7134,7 +7268,7 @@ nvptx_goacc_reduction_init (gcall *call, offload_attrs *oa)
       gsi_prev (&gsi);
       edge inited_edge = split_block (gsi_bb (gsi), gsi_stmt (gsi));
       basic_block dst_bb = inited_edge->dest;
-      
+
       /* Create false edge from call_bb to dst_bb.  */
       edge nop_edge = make_edge (call_bb, dst_bb, EDGE_FALSE_VALUE);
       nop_edge->probability = profile_probability::even ();
@@ -7249,7 +7383,7 @@ nvptx_goacc_reduction_teardown (gcall *call, offload_attrs *oa)
   tree var = gimple_call_arg (call, 2);
   int level = TREE_INT_CST_LOW (gimple_call_arg (call, 3));
   gimple_seq seq = NULL;
-  
+
   push_gimplify_context (true);
   if (level == GOMP_DIM_WORKER
       || (level == GOMP_DIM_VECTOR && oa->vector_length > PTX_WARP_SIZE))
@@ -7276,7 +7410,7 @@ nvptx_goacc_reduction_teardown (gcall *call, offload_attrs *oa)
 
   if (lhs)
     gimplify_assign (lhs, var, &seq);
-  
+
   pop_gimplify_context (NULL);
 
   gsi_replace_with_seq (&gsi, seq, true);
@@ -7583,10 +7717,35 @@ nvptx_mem_local_p (rtx mem)
   while (0)
 
 void
-nvptx_asm_output_def_from_decls (FILE *stream, tree name, tree value)
+nvptx_asm_output_def_from_decls (FILE *stream, tree name,
+				 tree value ATTRIBUTE_UNUSED)
 {
   if (nvptx_alias == 0 || !TARGET_PTX_6_3)
     {
+      /* Symbol aliases are not supported here.  */
+
+#ifdef ACCEL_COMPILER
+      if (DECL_CXX_CONSTRUCTOR_P (name)
+	  || DECL_CXX_DESTRUCTOR_P (name))
+	{
+	  /* ..., but symbol aliases are supported and used in the host system,
+	     via 'gcc/cp/optimize.cc:can_alias_cdtor'.  */
+
+	  gcc_assert (!lookup_attribute ("weak", DECL_ATTRIBUTES (name)));
+	  gcc_assert (TREE_CODE (name) == FUNCTION_DECL);
+
+	  /* In this specific case, use PTX '.alias', if available, even for
+	     (default) '-mno-alias'.  */
+	  if (TARGET_PTX_6_3)
+	    {
+	      DECL_ATTRIBUTES (name)
+		= tree_cons (get_identifier ("symbol alias handled"),
+			     NULL_TREE, DECL_ATTRIBUTES (name));
+	      goto emit_ptx_alias;
+	    }
+	}
+#endif
+
       /* Copied from assemble_alias.  */
       error_at (DECL_SOURCE_LOCATION (name),
 		"alias definitions not supported in this configuration");
@@ -7618,7 +7777,24 @@ nvptx_asm_output_def_from_decls (FILE *stream, tree name, tree value)
       return;
     }
 
-  if (!cgraph_node::get (name)->referred_to_p ())
+#ifdef ACCEL_COMPILER
+ emit_ptx_alias:
+#endif
+
+  cgraph_node *cnode = cgraph_node::get (name);
+#ifdef ACCEL_COMPILER
+  /* For nvptx offloading, make sure to emit C++ constructor, destructor aliases [PR97106]
+
+     For some reason (yet to be analyzed), they're not 'cnode->referred_to_p ()'.
+     (..., or that's not the right approach at all;
+     <https://inbox.sourceware.org/87v7rx8lbx.fsf@euler.schwinge.ddns.net>
+     "Re: [committed][nvptx] Use .alias directive for mptx >= 6.3").  */
+  if (DECL_CXX_CONSTRUCTOR_P (name)
+      || DECL_CXX_DESTRUCTOR_P (name))
+    ;
+  else
+#endif
+  if (!cnode->referred_to_p ())
     /* Prevent "Internal error: reference to deleted section".  */
     return;
 
@@ -7627,8 +7803,27 @@ nvptx_asm_output_def_from_decls (FILE *stream, tree name, tree value)
   fputs (s.str ().c_str (), stream);
 
   tree id = DECL_ASSEMBLER_NAME (name);
+
+  /* Walk alias chain to get reference callgraph node.
+     The rationale of using ultimate_alias_target here is that
+     PTX's .alias directive only supports 1-level aliasing where
+     aliasee is function defined in same module.
+
+     So for the following case:
+     int foo() { return 42; }
+     int bar () __attribute__((alias ("foo")));
+     int baz () __attribute__((alias ("bar")));
+
+     should resolve baz to foo:
+     .visible .func (.param.u32 %value_out) baz;
+     .alias baz,foo;  */
+  symtab_node *alias_target_node = cnode->ultimate_alias_target ();
+  tree alias_target_id = DECL_ASSEMBLER_NAME (alias_target_node->decl);
+  std::stringstream s_def;
+  write_fn_marker (s_def, true, TREE_PUBLIC (name), IDENTIFIER_POINTER (id));
+  fputs (s_def.str ().c_str (), stream);
   NVPTX_ASM_OUTPUT_DEF (stream, IDENTIFIER_POINTER (id),
-			IDENTIFIER_POINTER (value));
+			IDENTIFIER_POINTER (alias_target_id));
 }
 
 #undef NVPTX_ASM_OUTPUT_DEF
@@ -7679,6 +7874,8 @@ nvptx_asm_output_def_from_decls (FILE *stream, tree name, tree value)
 #undef TARGET_END_CALL_ARGS
 #define TARGET_END_CALL_ARGS nvptx_end_call_args
 
+#undef TARGET_ASM_INIT_SECTIONS
+#define TARGET_ASM_INIT_SECTIONS nvptx_asm_init_sections
 #undef TARGET_ASM_FILE_START
 #define TARGET_ASM_FILE_START nvptx_file_start
 #undef TARGET_ASM_FILE_END
@@ -7701,8 +7898,6 @@ nvptx_asm_output_def_from_decls (FILE *stream, tree name, tree value)
 #define TARGET_ASM_DECLARE_CONSTANT_NAME nvptx_asm_declare_constant_name
 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
-#undef TARGET_ASM_NEED_VAR_DECL_BEFORE_USE
-#define TARGET_ASM_NEED_VAR_DECL_BEFORE_USE true
 
 #undef TARGET_MACHINE_DEPENDENT_REORG
 #define TARGET_MACHINE_DEPENDENT_REORG nvptx_reorg
@@ -7795,6 +7990,9 @@ nvptx_asm_output_def_from_decls (FILE *stream, tree name, tree value)
 #undef TARGET_HAVE_STRUB_SUPPORT_FOR
 #define TARGET_HAVE_STRUB_SUPPORT_FOR hook_bool_tree_false
 
+#undef TARGET_DOCUMENTATION_NAME
+#define TARGET_DOCUMENTATION_NAME "Nvidia PTX"
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-nvptx.h"
diff --git a/gcc/config/nvptx/nvptx.h b/gcc/config/nvptx/nvptx.h
index 68ab011..a2bb2fb 100644
--- a/gcc/config/nvptx/nvptx.h
+++ b/gcc/config/nvptx/nvptx.h
@@ -1,5 +1,5 @@
 /* Target Definitions for NVPTX.
-   Copyright (C) 2014-2024 Free Software Foundation, Inc.
+   Copyright (C) 2014-2025 Free Software Foundation, Inc.
    Contributed by Bernd Schmidt <bernds@codesourcery.com>
 
    This file is part of GCC.
@@ -77,6 +77,8 @@
 #define LONG_LONG_TYPE_SIZE 64
 #define TARGET_SUPPORTS_WIDE_INT 1
 
+#define MAX_FIXED_MODE_SIZE 128
+
 #undef SIZE_TYPE
 #define SIZE_TYPE (TARGET_ABI64 ? "long unsigned int" : "unsigned int")
 #undef PTRDIFF_TYPE
@@ -86,11 +88,25 @@
 #define Pmode (TARGET_ABI64 ? DImode : SImode)
 #define STACK_SIZE_MODE Pmode
 
+/* We always have to maintain the '-msoft-stack' pointer, but the PTX "native"
+   stack pointer is handled implicitly at function level.  */
+#define STACK_SAVEAREA_MODE(LEVEL) \
+  (TARGET_SOFT_STACK ? Pmode \
+   : (LEVEL == SAVE_FUNCTION ? VOIDmode \
+      : Pmode))
+
 #include "nvptx-gen.h"
 
+/* There are no 'TARGET_PTX_3_1' and smaller conditionals: our baseline is
+   PTX ISA Version 3.1.  */
+#define TARGET_PTX_4_1 (ptx_version_option >= PTX_VERSION_4_1)
+#define TARGET_PTX_4_2 (ptx_version_option >= PTX_VERSION_4_2)
+#define TARGET_PTX_5_0 (ptx_version_option >= PTX_VERSION_5_0)
 #define TARGET_PTX_6_0 (ptx_version_option >= PTX_VERSION_6_0)
 #define TARGET_PTX_6_3 (ptx_version_option >= PTX_VERSION_6_3)
 #define TARGET_PTX_7_0 (ptx_version_option >= PTX_VERSION_7_0)
+#define TARGET_PTX_7_3 (ptx_version_option >= PTX_VERSION_7_3)
+#define TARGET_PTX_7_8 (ptx_version_option >= PTX_VERSION_7_8)
 
 /* Registers.  Since ptx is a virtual target, we just define a few
    hard registers for special purposes and leave pseudos unallocated.
@@ -260,9 +276,6 @@ struct GTY(()) machine_function
 
 #define DEBUGGER_REGNO(N) N
 
-#define TEXT_SECTION_ASM_OP ""
-#define DATA_SECTION_ASM_OP ""
-
 #undef  ASM_GENERATE_INTERNAL_LABEL
 #define ASM_GENERATE_INTERNAL_LABEL(LABEL, PREFIX, NUM)		\
   do								\
@@ -367,8 +380,20 @@ struct GTY(()) machine_function
 /* See 'libgcc/config/nvptx/crt0.c' for wrapping of 'main'.  */
 #define HAS_INIT_SECTION
 
-/* The C++ front end insists to link against libstdc++ -- which we don't build.
-   Tell it to instead link against the innocuous libgcc.  */
-#define LIBSTDCXX "gcc"
+/* The default doesn't fly ('internal compiler error: in simplify_subreg' when
+   'dw2_assemble_integer' -> 'assemble_integer' attempts to simplify
+   '(minus:DI (symbol_ref:DI ("$LEHB0")) (symbol_ref:DI ("$LFB3")))', for
+   example.  Just emit something; it's not getting used, anyway.  */
+#define ASM_OUTPUT_DWARF_DELTA(STREAM, SIZE, LABEL1, LABEL2) \
+  do \
+    { \
+      fprintf (STREAM, "%s[%d]: ", targetm.asm_out.byte_op, SIZE); \
+      const char *label1 = LABEL1; \
+      assemble_name_raw (STREAM, label1 ? label1 : "*nil"); \
+      fprintf (STREAM, " - "); \
+      const char *label2 = LABEL2; \
+      assemble_name_raw (STREAM, label2 ? label2 : "*nil"); \
+    } \
+  while (0)
 
 #endif /* GCC_NVPTX_H */
diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md
index 7878a3b..7c3bd69 100644
--- a/gcc/config/nvptx/nvptx.md
+++ b/gcc/config/nvptx/nvptx.md
@@ -1,5 +1,5 @@
 ;; Machine description for NVPTX.
-;; Copyright (C) 2014-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2014-2025 Free Software Foundation, Inc.
 ;; Contributed by Bernd Schmidt <bernds@codesourcery.com>
 ;;
 ;; This file is part of GCC.
@@ -35,8 +35,9 @@
    UNSPEC_FPINT_NEARBYINT
 
    UNSPEC_ALLOCA
-
    UNSPEC_SET_SOFTSTACK
+   UNSPEC_STACKSAVE
+   UNSPEC_STACKRESTORE
 
    UNSPEC_DIM_SIZE
 
@@ -1639,6 +1640,15 @@
   DONE;
 })
 
+(define_expand "exception_receiver"
+  [(const_int 0)]
+  ""
+{
+  if (!fake_exceptions)
+    sorry ("exception handling not supported");
+  DONE;
+})
+
 (define_expand "nonlocal_goto"
   [(match_operand 0 "" "")
    (match_operand 1 "" "")
@@ -1663,22 +1673,51 @@
    (match_operand 1 "nvptx_register_operand")]
   ""
 {
-  if (TARGET_SOFT_STACK)
+  if (!TARGET_SOFT_STACK)
+    emit_insn (gen_nvptx_alloca (Pmode, operands[0], operands[1]));
+  else if (TARGET_SOFT_STACK)
     {
       emit_move_insn (stack_pointer_rtx,
 		      gen_rtx_MINUS (Pmode, stack_pointer_rtx, operands[1]));
       emit_insn (gen_set_softstack (Pmode, stack_pointer_rtx));
       emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
-      DONE;
     }
-  /* The ptx documentation specifies an alloca intrinsic (for 32 bit
-     only)  but notes it is not implemented.  The assembler emits a
-     confused error message.  Issue a blunt one now instead.  */
-  sorry ("target cannot support alloca");
-  emit_insn (gen_nop ());
+  else
+    gcc_unreachable ();
   DONE;
 })
 
+(define_insn "@nvptx_alloca_<mode>"
+  [(set (match_operand:P 0 "nvptx_register_operand" "=R")
+        (unspec:P [(match_operand:P 1 "nvptx_nonmemory_operand" "Ri")]
+		  UNSPEC_ALLOCA))]
+  ""
+  {
+    if (TARGET_PTX_7_3
+	&& TARGET_SM52)
+      {
+	/* Convert the address from '.local' state space to generic.  That way,
+	   we don't have to use 'st.local', 'ld.local', and can easily pass the
+	   address to other "generic functions".
+	   TODO 'gcc.target/nvptx/alloca-5.c' */
+	output_asm_insn ("{", NULL);
+	output_asm_insn ("\\t.reg%t0\\t%0_local;", operands);
+	output_asm_insn ("\\talloca%u0\\t%0_local, %1;", operands);
+	output_asm_insn ("\\tcvta.local%u0\\t%0, %0_local;", operands);
+	output_asm_insn ("}", NULL);
+	return "";
+      }
+    else if (nvptx_fake_ptx_alloca)
+      return nvptx_output_fake_ptx_alloca ();
+    else
+      {
+	sorry_at (INSN_LOCATION (insn),
+		  "dynamic stack allocation not supported");
+	return "";
+      }
+  }
+  [(set_attr "predicable" "no")])
+
 (define_insn "@set_softstack_<mode>"
   [(unspec [(match_operand:P 0 "nvptx_register_operand" "R")]
 	   UNSPEC_SET_SOFTSTACK)]
@@ -1687,16 +1726,78 @@
   return nvptx_output_set_softstack (REGNO (operands[0]));
 })
 
+(define_expand "save_stack_block"
+  [(match_operand 0 "register_operand" "")
+   (match_operand 1 "register_operand" "")]
+  "!TARGET_SOFT_STACK"
+{
+  if (TARGET_PTX_7_3
+      && TARGET_SM52)
+    {
+      gcc_checking_assert (REG_P (operands[0]));
+      emit_insn (gen_nvptx_stacksave (Pmode, operands[0], operands[1]));
+    }
+  /* We don't bother to special-case '-mfake-ptx-alloca' here.  */
+  else
+    {
+      /* The concept of a '%stack' pointer doesn't apply like this.
+         GCC however occasionally synthesizes '__builtin_stack_save ()',
+	 '__builtin_stack_restore ()', and isn't able to optimize them all
+	 away.  Just submit a dummy -- user code shouldn't be able to observe
+	 this.  */
+      emit_move_insn (operands[0], GEN_INT (0xdeadbeef));
+    }
+  DONE;
+})
+
+(define_insn "@nvptx_stacksave_<mode>"
+  [(set (match_operand:P 0 "nvptx_register_operand" "=R")
+        (unspec:P [(match_operand:P 1 "register_operand" "R")]
+	 UNSPEC_STACKSAVE))]
+  "TARGET_PTX_7_3
+   && TARGET_SM52"
+  "%.\\tstacksave%u0\\t%0;")
+
 (define_expand "restore_stack_block"
   [(match_operand 0 "register_operand" "")
    (match_operand 1 "register_operand" "")]
   ""
 {
-  if (TARGET_SOFT_STACK)
+  if (!TARGET_SOFT_STACK
+      && TARGET_PTX_7_3
+      && TARGET_SM52)
+    {
+      operands[1] = force_reg (Pmode, operands[1]);
+      emit_insn (gen_nvptx_stackrestore (Pmode, operands[0], operands[1]));
+    }
+  /* We don't bother to special-case '-mfake-ptx-alloca' here.  */
+  else if (!TARGET_SOFT_STACK)
+    ; /* See 'save_stack_block'.  */
+  else if (TARGET_SOFT_STACK)
     {
       emit_move_insn (operands[0], operands[1]);
       emit_insn (gen_set_softstack (Pmode, operands[0]));
     }
+  else
+    gcc_unreachable ();
+  DONE;
+})
+
+(define_insn "@nvptx_stackrestore_<mode>"
+  [(set (match_operand:P 0 "nvptx_register_operand" "=R")
+        (unspec:P [(match_operand:P 1 "nvptx_register_operand" "R")]
+         UNSPEC_STACKRESTORE))]
+  "TARGET_PTX_7_3
+   && TARGET_SM52"
+  "%.\\tstackrestore%u1\\t%1;")
+
+(define_expand "save_stack_function"
+  [(match_operand 0 "register_operand" "")
+   (match_operand 1 "register_operand" "")]
+  "!TARGET_SOFT_STACK"
+{
+  /* See 'STACK_SAVEAREA_MODE'.  */
+  gcc_checking_assert (operands[0] == 0);
   DONE;
 })
 
@@ -1705,6 +1806,9 @@
    (match_operand 1 "register_operand" "")]
   ""
 {
+  if (!TARGET_SOFT_STACK)
+    /* See 'STACK_SAVEAREA_MODE'.  */
+    gcc_checking_assert (operands[1] == 0);
   DONE;
 })
 
diff --git a/gcc/config/nvptx/nvptx.opt b/gcc/config/nvptx/nvptx.opt
index deb0066..d326ca4 100644
--- a/gcc/config/nvptx/nvptx.opt
+++ b/gcc/config/nvptx/nvptx.opt
@@ -1,5 +1,5 @@
 ; Options for the NVPTX port
-; Copyright (C) 2014-2024 Free Software Foundation, Inc.
+; Copyright (C) 2014-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
@@ -17,6 +17,9 @@
 ; along with GCC; see the file COPYING3.  If not see
 ; <http://www.gnu.org/licenses/>.
 
+HeaderInclude
+config/nvptx/nvptx-opts.h
+
 ; It's not clear whether this was ever build/tested/used, so this is no longer
 ; exposed to the user.
 ;m32
@@ -53,7 +56,7 @@ Target Mask(GOMP)
 Generate code for OpenMP offloading: enables -msoft-stack and -muniform-simt.
 
 misa=
-Target RejectNegative ToLower Joined Enum(ptx_isa) Var(ptx_isa_option)
+Target RejectNegative Negative(misa=) ToLower Joined Enum(ptx_isa) Var(ptx_isa_option) Init(PTX_ISA_unset)
 Specify the PTX ISA target architecture to use.
 
 march=
@@ -70,13 +73,13 @@ march-map=sm_35
 Target RejectNegative Alias(misa=,sm_35)
 
 march-map=sm_37
-Target RejectNegative Alias(misa=,sm_35)
+Target RejectNegative Alias(misa=,sm_37)
 
 march-map=sm_50
-Target RejectNegative Alias(misa=,sm_35)
+Target RejectNegative Alias(misa=,sm_37)
 
 march-map=sm_52
-Target RejectNegative Alias(misa=,sm_35)
+Target RejectNegative Alias(misa=,sm_52)
 
 march-map=sm_53
 Target RejectNegative Alias(misa=,sm_53)
@@ -85,10 +88,10 @@ march-map=sm_60
 Target RejectNegative Alias(misa=,sm_53)
 
 march-map=sm_61
-Target RejectNegative Alias(misa=,sm_53)
+Target RejectNegative Alias(misa=,sm_61)
 
 march-map=sm_62
-Target RejectNegative Alias(misa=,sm_53)
+Target RejectNegative Alias(misa=,sm_61)
 
 march-map=sm_70
 Target RejectNegative Alias(misa=,sm_70)
@@ -109,22 +112,31 @@ march-map=sm_87
 Target RejectNegative Alias(misa=,sm_80)
 
 march-map=sm_89
-Target RejectNegative Alias(misa=,sm_80)
+Target RejectNegative Alias(misa=,sm_89)
 
 march-map=sm_90
-Target RejectNegative Alias(misa=,sm_80)
+Target RejectNegative Alias(misa=,sm_89)
 
 march-map=sm_90a
-Target RejectNegative Alias(misa=,sm_80)
+Target RejectNegative Alias(misa=,sm_89)
 
 Enum
-Name(ptx_version) Type(int)
+Name(ptx_version) Type(enum ptx_version)
 Known PTX ISA versions (for use with the -mptx= option):
 
 EnumValue
 Enum(ptx_version) String(3.1) Value(PTX_VERSION_3_1)
 
 EnumValue
+Enum(ptx_version) String(4.1) Value(PTX_VERSION_4_1)
+
+EnumValue
+Enum(ptx_version) String(4.2) Value(PTX_VERSION_4_2)
+
+EnumValue
+Enum(ptx_version) String(5.0) Value(PTX_VERSION_5_0)
+
+EnumValue
 Enum(ptx_version) String(6.0) Value(PTX_VERSION_6_0)
 
 EnumValue
@@ -134,10 +146,16 @@ EnumValue
 Enum(ptx_version) String(7.0) Value(PTX_VERSION_7_0)
 
 EnumValue
+Enum(ptx_version) String(7.3) Value(PTX_VERSION_7_3)
+
+EnumValue
+Enum(ptx_version) String(7.8) Value(PTX_VERSION_7_8)
+
+EnumValue
 Enum(ptx_version) String(_) Value(PTX_VERSION_default)
 
 mptx=
-Target RejectNegative ToLower Joined Enum(ptx_version) Var(ptx_version_option)
+Target RejectNegative Negative(mptx=) ToLower Joined Enum(ptx_version) Var(ptx_version_option) Init(PTX_VERSION_unset)
 Specify the PTX ISA version to use.
 
 minit-regs=
@@ -152,3 +170,26 @@ Target Var(nvptx_alias) Init(0) Undocumented
 
 mexperimental
 Target Var(nvptx_experimental) Init(0) Undocumented
+
+mfake-exceptions
+Target Var(fake_exceptions) Init(0) Undocumented
+; With '-mfake-exceptions' enabled, the user-visible behavior in presence of
+; exception handling constructs changes such that the compile-time
+; 'sorry, unimplemented: exception handling not supported' is skipped, code
+; generation proceeds, and instead, exception handling constructs 'abort' at
+; run time.  (..., or don't, if they're in dead code.)
+
+mfake-ptx-alloca
+Target Var(nvptx_fake_ptx_alloca) Init(0) Undocumented
+; With '-mfake-ptx-alloca' enabled, the user-visible behavior changes only
+; for configurations where PTX 'alloca' is not available.  Rather than a
+; compile-time 'sorry, unimplemented: dynamic stack allocation not supported'
+; in presence of dynamic stack allocation, with '-mfake-ptx-alloca' enabled,
+; compilation, assembly, and linking succeeds, as does execution, in case that
+; 'alloca' is not attempted (if only used in error code paths, for example),
+; and a run-time failure only in case that 'alloca' is actually attempted.
+; 
+; This is meant to be used in scenarios where large volumes of code are
+; compiled, a small fraction of which runs into dynamic stack allocation, but
+; these parts are not important for specific use cases, and we'd thus like the
+; build to succeed, and error out just upon actual, very rare use of 'alloca'.
diff --git a/gcc/config/nvptx/nvptx.opt.urls b/gcc/config/nvptx/nvptx.opt.urls
index 5c8942c..9892a62 100644
--- a/gcc/config/nvptx/nvptx.opt.urls
+++ b/gcc/config/nvptx/nvptx.opt.urls
@@ -22,7 +22,7 @@ misa=
 UrlSuffix(gcc/Nvidia-PTX-Options.html#index-misa)
 
 march=
-UrlSuffix(gcc/Nvidia-PTX-Options.html#index-march-12)
+UrlSuffix(gcc/Nvidia-PTX-Options.html#index-march-11)
 
 mptx=
 UrlSuffix(gcc/Nvidia-PTX-Options.html#index-mptx)
diff --git a/gcc/config/nvptx/offload.h b/gcc/config/nvptx/offload.h
index 10fba1b..17fdc9b 100644
--- a/gcc/config/nvptx/offload.h
+++ b/gcc/config/nvptx/offload.h
@@ -1,6 +1,6 @@
 /* Support for Nvidia PTX offloading.
 
-   Copyright (C) 2014-2024 Free Software Foundation, Inc.
+   Copyright (C) 2014-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/nvptx/t-nvptx b/gcc/config/nvptx/t-nvptx
index 9c5cbda..b77f63b 100644
--- a/gcc/config/nvptx/t-nvptx
+++ b/gcc/config/nvptx/t-nvptx
@@ -16,7 +16,7 @@ mkoffload$(exeext): mkoffload.o collect-utils.o libcommon-target.a $(LIBIBERTY)
 $(srcdir)/config/nvptx/nvptx.h: $(srcdir)/config/nvptx/nvptx-gen.h
 $(srcdir)/config/nvptx/nvptx-gen.h: s-nvptx-gen-h; @true
 s-nvptx-gen-h: $(srcdir)/config/nvptx/nvptx-sm.def
-	$(SHELL) $(srcdir)/config/nvptx/gen-h.sh "$(srcdir)/config/nvptx" \
+	$(SHELL) $(srcdir)/config/nvptx/gen-h.sh \
 	  > tmp-nvptx-gen.h
 	$(SHELL) $(srcdir)/../move-if-change \
 	  tmp-nvptx-gen.h $(srcdir)/config/nvptx/nvptx-gen.h
@@ -25,7 +25,7 @@ s-nvptx-gen-h: $(srcdir)/config/nvptx/nvptx-sm.def
 $(srcdir)/config/nvptx/nvptx-gen.opt: s-nvptx-gen-opt; @true
 s-nvptx-gen-opt: $(srcdir)/config/nvptx/nvptx-sm.def \
   $(srcdir)/config/nvptx/gen-opt.sh
-	$(SHELL) $(srcdir)/config/nvptx/gen-opt.sh "$(srcdir)/config/nvptx" \
+	$(SHELL) $(srcdir)/config/nvptx/gen-opt.sh \
 	  > tmp-nvptx-gen.opt
 	$(SHELL) $(srcdir)/../move-if-change \
 	  tmp-nvptx-gen.opt $(srcdir)/config/nvptx/nvptx-gen.opt
@@ -43,21 +43,37 @@ MULTILIB_OPTIONS += mgomp
 multilib_options_isa_list := $(TM_MULTILIB_CONFIG)
 multilib_options_isa_default := $(word 1,$(multilib_options_isa_list))
 multilib_options_misa_list := $(addprefix misa=,$(multilib_options_isa_list))
+
+t-nvptx-gen-multilib-matches: $(srcdir)/config/nvptx/gen-multilib-matches.sh \
+  $(srcdir)/config/nvptx/t-nvptx \
+  Makefile \
+  $(srcdir)/config/nvptx/nvptx-sm.def
+	$(SHELL) $< \
+	  --selftest
+	$(SHELL) $< \
+	  $(multilib_options_isa_default) \
+	  '$(multilib_options_isa_list)' \
+	  > $@
+
+include t-nvptx-gen-multilib-matches
+
 # Add the requested '-misa' variants as a multilib option ('misa=VAR1/misa=VAR2/misa=VAR3' etc.):
 empty :=
 space := $(empty) $(empty)
 MULTILIB_OPTIONS += $(subst $(space),/,$(multilib_options_misa_list))
 # ..., and remap '-misa' variants as appropriate:
-multilib_matches := $(shell $(srcdir)/config/nvptx/gen-multilib-matches.sh $(srcdir)/config/nvptx $(multilib_options_isa_default) "$(multilib_options_isa_list)")
 MULTILIB_MATCHES += $(multilib_matches)
 # ..., and don't actually build what's the default '-misa':
 MULTILIB_EXCEPTIONS += *misa=$(multilib_options_isa_default)*
 
+# Build '-mptx=3.1' sub-variants, if applicable.  Per 'nvptx-sm.def',
+# 'nvptx.opt:ptx_version', 'nvptx.cc:first_ptx_version_supporting_sm':
+# only for sm_30, sm_35.
+ifneq ($(filter sm_30 sm_35,$(multilib_options_isa_list)),)
 MULTILIB_OPTIONS += mptx=3.1
-# Filter out invalid '-misa'/'-mptx=3.1' combinations; per 'nvptx-sm.def',
-# 'nvptx.opt:ptx_version', 'nvptx.cc:first_ptx_version_supporting_sm'
-# (that is, '-mptx=3.1' only for sm_30, sm_35 variants):
+# Filter out invalid '-misa'/'-mptx=3.1' combinations:
 MULTILIB_EXCEPTIONS += $(foreach misa,$(filter-out %=sm_30 %=sm_35,$(multilib_options_misa_list)),*$(misa)/mptx=3.1)
 # ..., and special care has to be taken if '-mptx=3.1' is invalid for the
 # default variant:
 MULTILIB_EXCEPTIONS += $(if $(filter-out sm_30 sm_35,$(multilib_options_isa_default)),mgomp/mptx=3.1 mptx=3.1)
+endif
diff --git a/gcc/config/nvptx/t-omp-device b/gcc/config/nvptx/t-omp-device
index c2b28a4..6785ddd 100644
--- a/gcc/config/nvptx/t-omp-device
+++ b/gcc/config/nvptx/t-omp-device
@@ -1,3 +1,3 @@
 omp-device-properties-nvptx: $(srcdir)/config/nvptx/nvptx-sm.def
 	$(SHELL) $(srcdir)/config/nvptx/gen-omp-device-properties.sh \
-	  "$(srcdir)/config/nvptx" > $@
+	  > $@
diff --git a/gcc/config/openbsd-d.cc b/gcc/config/openbsd-d.cc
index 5a41d1f..46e6673 100644
--- a/gcc/config/openbsd-d.cc
+++ b/gcc/config/openbsd-d.cc
@@ -1,5 +1,5 @@
 /* Functions for generic OpenBSD as target machine for GNU D compiler.
-   Copyright (C) 2021-2024 Free Software Foundation, Inc.
+   Copyright (C) 2021-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/openbsd-libpthread.h b/gcc/config/openbsd-libpthread.h
index 9aabd2f..81ccb1c 100644
--- a/gcc/config/openbsd-libpthread.h
+++ b/gcc/config/openbsd-libpthread.h
@@ -1,6 +1,6 @@
 /* LIB_SPEC appropriate for OpenBSD.  Include -lpthread if -pthread is
    specified on the command line. */
-/*   Copyright (C) 2004-2024 Free Software Foundation, Inc.
+/*   Copyright (C) 2004-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/openbsd-rust.cc b/gcc/config/openbsd-rust.cc
index b79a123..4536fa7 100644
--- a/gcc/config/openbsd-rust.cc
+++ b/gcc/config/openbsd-rust.cc
@@ -1,5 +1,5 @@
 /* OpenBSD support needed only by Rust front-end.
-   Copyright (C) 2022-2024 Free Software Foundation, Inc.
+   Copyright (C) 2022-2025 Free Software Foundation, Inc.
 
 GCC is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
diff --git a/gcc/config/openbsd-stdint.h b/gcc/config/openbsd-stdint.h
index a6da1da..00ca36c 100644
--- a/gcc/config/openbsd-stdint.h
+++ b/gcc/config/openbsd-stdint.h
@@ -1,5 +1,5 @@
 #define SIG_ATOMIC_TYPE		"int"
- 
+
 #define INT8_TYPE		"signed char"
 #define INT16_TYPE		"short int"
 #define INT32_TYPE		"int"
@@ -8,7 +8,7 @@
 #define UINT16_TYPE		"short unsigned int"
 #define UINT32_TYPE		"unsigned int"
 #define UINT64_TYPE		"long long unsigned int"
- 
+
 #define INT_LEAST8_TYPE		"signed char"
 #define INT_LEAST16_TYPE	"short int"
 #define INT_LEAST32_TYPE	"int"
@@ -17,7 +17,7 @@
 #define UINT_LEAST16_TYPE	"short unsigned int"
 #define UINT_LEAST32_TYPE	"unsigned int"
 #define UINT_LEAST64_TYPE	"long long unsigned int"
- 
+
 #define INT_FAST8_TYPE		"int"
 #define INT_FAST16_TYPE		"int"
 #define INT_FAST32_TYPE		"int"
@@ -29,6 +29,6 @@
 
 #define INTMAX_TYPE		"long long int"
 #define UINTMAX_TYPE		"long long unsigned int"
- 
+
 #define INTPTR_TYPE		"long int"
 #define UINTPTR_TYPE		"long unsigned int"
diff --git a/gcc/config/openbsd.h b/gcc/config/openbsd.h
index 3493df9..bb1bae3 100644
--- a/gcc/config/openbsd.h
+++ b/gcc/config/openbsd.h
@@ -1,5 +1,5 @@
 /* Base configuration file for all OpenBSD targets.
-   Copyright (C) 1999-2024 Free Software Foundation, Inc.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -17,27 +17,27 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-/* Common OpenBSD configuration. 
+/* Common OpenBSD configuration.
    All OpenBSD architectures include this file, which is intended as
-   a repository for common defines. 
+   a repository for common defines.
 
    Some defines are common to all architectures, a few of them are
    triggered by OBSD_* guards, so that we won't override architecture
    defaults by mistakes.
 
-   OBSD_HAS_CORRECT_SPECS: 
+   OBSD_HAS_CORRECT_SPECS:
       another mechanism provides correct specs already.
-   OBSD_NO_DYNAMIC_LIBRARIES: 
+   OBSD_NO_DYNAMIC_LIBRARIES:
       no implementation of dynamic libraries.
-   OBSD_OLD_GAS: 
+   OBSD_OLD_GAS:
       older flavor of gas which needs help for PIC.
    OBSD_HAS_DECLARE_FUNCTION_NAME, OBSD_HAS_DECLARE_FUNCTION_SIZE,
-   OBSD_HAS_DECLARE_OBJECT: 
+   OBSD_HAS_DECLARE_OBJECT:
       PIC support, FUNCTION_NAME/FUNCTION_SIZE are independent, whereas
       the corresponding logic for OBJECTS is necessarily coupled.
 
    There are also a few `default' defines such as ASM_WEAKEN_LABEL,
-   intended as common ground for arch that don't provide 
+   intended as common ground for arch that don't provide
    anything suitable.  */
 
 /* OPENBSD_NATIVE is defined only when gcc is configured as part of
@@ -104,7 +104,7 @@ while (0)
 
 /* CPP_SPEC appropriate for OpenBSD. We deal with -posix and -pthread.
    XXX the way threads are handled currently is not very satisfying,
-   since all code must be compiled with -pthread to work. 
+   since all code must be compiled with -pthread to work.
    This two-stage defines makes it easy to pick that for targets that
    have subspecs.  */
 #ifdef CPP_CPU_SPEC
@@ -122,8 +122,8 @@ while (0)
 #define CPP_SPEC OBSD_CPP_SPEC
 
 #ifdef OBSD_OLD_GAS
-/* ASM_SPEC appropriate for OpenBSD.  For some architectures, OpenBSD 
-   still uses a special flavor of gas that needs to be told when generating 
+/* ASM_SPEC appropriate for OpenBSD.  For some architectures, OpenBSD
+   still uses a special flavor of gas that needs to be told when generating
    pic code.  */
 #undef ASM_SPEC
 #define ASM_SPEC "%{" FPIE1_OR_FPIC1_SPEC ":-k} %{" FPIE2_OR_FPIC2_SPEC ":-k -K}"
@@ -152,7 +152,7 @@ while (0)
 
 
 /* - we use . - _func instead of a local label,
-   - we put extra spaces in expressions such as 
+   - we put extra spaces in expressions such as
      .type _func , @function
      This is more readable for a human being and confuses c++filt less.  */
 
@@ -161,11 +161,11 @@ while (0)
 /* Define the strings used for the .type and .size directives.
    These strings generally do not vary from one system running OpenBSD
    to another, but if a given system needs to use different pseudo-op
-   names for these, they may be overridden in the arch specific file.  */ 
+   names for these, they may be overridden in the arch specific file.  */
 
 /* OpenBSD assembler is hacked to have .type & .size support even in a.out
-   format object files.  Functions size are supported but not activated 
-   yet (look for GRACE_PERIOD_EXPIRED in gas/config/obj-aout.c).  
+   format object files.  Functions size are supported but not activated
+   yet (look for GRACE_PERIOD_EXPIRED in gas/config/obj-aout.c).
    SET_ASM_OP is needed for attribute alias to work.  */
 
 #undef TYPE_ASM_OP
@@ -191,12 +191,12 @@ while (0)
 
 /* These macros generate the special .type and .size directives which
    are used to set the corresponding fields of the linker symbol table
-   entries under OpenBSD.  These macros also have to output the starting 
+   entries under OpenBSD.  These macros also have to output the starting
    labels for the relevant functions/objects.  */
 
 #ifndef OBSD_HAS_DECLARE_FUNCTION_NAME
 /* Extra assembler code needed to declare a function properly.
-   Some assemblers may also need to also have something extra said 
+   Some assemblers may also need to also have something extra said
    about the function's return value.  We allow for that here.  */
 #undef ASM_DECLARE_FUNCTION_NAME
 #define ASM_DECLARE_FUNCTION_NAME(FILE, NAME, DECL)			\
@@ -238,7 +238,7 @@ while (0)
 /* Output the size directive for a decl in rest_of_decl_compilation
    in the case where we did not do so before the initializer.
    Once we find the error_mark_node, we know that the value of
-   size_directive_output was set by ASM_DECLARE_OBJECT_NAME 
+   size_directive_output was set by ASM_DECLARE_OBJECT_NAME
    when it was run for the same decl.  */
 #undef ASM_FINISH_DECLARE_OBJECT
 #define ASM_FINISH_DECLARE_OBJECT(FILE, DECL, TOP_LEVEL, AT_END)	 \
@@ -260,11 +260,11 @@ do {									 \
 
 /* Those are `generic' ways to weaken/globalize a label. We shouldn't need
    to override a processor specific definition. Hence, #ifndef ASM_*
-   In case overriding turns out to be needed, one can always #undef ASM_* 
+   In case overriding turns out to be needed, one can always #undef ASM_*
    before including this file.  */
 
 /* Tell the assembler that a symbol is weak.  */
-/* Note: netbsd arm32 assembler needs a .globl here. An override may 
+/* Note: netbsd arm32 assembler needs a .globl here. An override may
    be needed when/if we go for arm32 support.  */
 #ifndef ASM_WEAKEN_LABEL
 #define ASM_WEAKEN_LABEL(FILE,NAME) \
diff --git a/gcc/config/openbsd.opt b/gcc/config/openbsd.opt
index 1b6e789..4f59405 100644
--- a/gcc/config/openbsd.opt
+++ b/gcc/config/openbsd.opt
@@ -1,6 +1,6 @@
 ; OpenBSD options.
 
-; Copyright (C) 2010-2024 Free Software Foundation, Inc.
+; Copyright (C) 2010-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/or1k/constraints.md b/gcc/config/or1k/constraints.md
index aaf7347..04db35e 100644
--- a/gcc/config/or1k/constraints.md
+++ b/gcc/config/or1k/constraints.md
@@ -1,5 +1,5 @@
 ;; Constraint definitions for OpenRISC
-;; Copyright (C) 2018-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2018-2025 Free Software Foundation, Inc.
 ;; Contributed by Stafford Horne
 
 ;; This file is part of GCC.
diff --git a/gcc/config/or1k/elf.h b/gcc/config/or1k/elf.h
index b260a51..705cc16 100644
--- a/gcc/config/or1k/elf.h
+++ b/gcc/config/or1k/elf.h
@@ -1,5 +1,5 @@
 /* Target Newlib Definitions for OpenRISC.
-   Copyright (C) 2018-2024 Free Software Foundation, Inc.
+   Copyright (C) 2018-2025 Free Software Foundation, Inc.
    Contributed by Stafford Horne.
 
    This file is part of GCC.
diff --git a/gcc/config/or1k/elf.opt b/gcc/config/or1k/elf.opt
index 69dfb06..28e960e 100644
--- a/gcc/config/or1k/elf.opt
+++ b/gcc/config/or1k/elf.opt
@@ -1,6 +1,6 @@
 ; OpenRISC command line options for newlib binaries
 
-; Copyright (C) 2010-2024 Free Software Foundation, Inc.
+; Copyright (C) 2010-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/or1k/linux.h b/gcc/config/or1k/linux.h
index 35583a2..e467840 100644
--- a/gcc/config/or1k/linux.h
+++ b/gcc/config/or1k/linux.h
@@ -1,5 +1,5 @@
 /* Linux Definitions for OpenRISC.
-   Copyright (C) 2018-2024 Free Software Foundation, Inc.
+   Copyright (C) 2018-2025 Free Software Foundation, Inc.
    Contributed by Stafford Horne.
 
    This file is part of GCC.
diff --git a/gcc/config/or1k/or1k-opts.h b/gcc/config/or1k/or1k-opts.h
index b6ab39d..4e0ec0d 100644
--- a/gcc/config/or1k/or1k-opts.h
+++ b/gcc/config/or1k/or1k-opts.h
@@ -1,5 +1,5 @@
 /* Definitions for option handling for OpenRISC.
-   Copyright (C) 2021-2024 Free Software Foundation, Inc.
+   Copyright (C) 2021-2025 Free Software Foundation, Inc.
    Contributed by Stafford Horne.
 
    This file is part of GCC.
diff --git a/gcc/config/or1k/or1k-protos.h b/gcc/config/or1k/or1k-protos.h
index 13cd8fc..c84e901 100644
--- a/gcc/config/or1k/or1k-protos.h
+++ b/gcc/config/or1k/or1k-protos.h
@@ -1,5 +1,5 @@
 /* Prototypes for OpenRISC functions used in the md file & elsewhere.
-   Copyright (C) 2018-2024 Free Software Foundation, Inc.
+   Copyright (C) 2018-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/or1k/or1k.cc b/gcc/config/or1k/or1k.cc
index 0760718..868df67 100644
--- a/gcc/config/or1k/or1k.cc
+++ b/gcc/config/or1k/or1k.cc
@@ -1,5 +1,5 @@
 /* Target Code for OpenRISC
-   Copyright (C) 2018-2024 Free Software Foundation, Inc.
+   Copyright (C) 2018-2025 Free Software Foundation, Inc.
    Contributed by Stafford Horne based on other ports.
 
    This file is part of GCC.
@@ -460,8 +460,7 @@ or1k_init_pic_reg (void)
       cfun->machine->set_got_insn =
 	emit_insn (gen_set_got_tmp (pic_offset_table_rtx));
 
-      rtx_insn *seq = get_insns ();
-      end_sequence ();
+      rtx_insn *seq = end_sequence ();
 
       edge entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
       insert_insn_on_edge (seq, entry_edge);
@@ -1409,8 +1408,9 @@ static bool
 or1k_can_change_mode_class (machine_mode from, machine_mode to,
 			    reg_class_t rclass)
 {
+  /* Allow cnoverting special flags to SI mode subregs.  */
   if (rclass == FLAG_REGS)
-    return from == to;
+    return from == to || (from == BImode && to == SImode);
   return true;
 }
 
@@ -1654,6 +1654,63 @@ or1k_rtx_costs (rtx x, machine_mode mode, int outer_code, int /* opno */,
 #undef TARGET_RTX_COSTS
 #define TARGET_RTX_COSTS or1k_rtx_costs
 
+static bool
+or1k_is_cmov_insn (rtx_insn *seq)
+{
+  rtx_insn *curr_insn = seq;
+  rtx set = NULL_RTX;
+
+  /* The pattern may start with a simple set with register operands.  Skip
+     through any of those.  */
+  while (curr_insn)
+    {
+      set = single_set (curr_insn);
+      if (!set
+	  || !REG_P (SET_DEST (set)))
+	return false;
+
+      /* If it's not a simple reg or immediate break.  */
+      if (REG_P (SET_SRC (set)) || CONST_INT_P (SET_SRC (set)))
+	curr_insn = NEXT_INSN (curr_insn);
+      else
+	break;
+    }
+
+  if (!curr_insn)
+    return false;
+
+  /* The next instruction should be a compare.  OpenRISC has many operators used
+     for comparison so skip and confirm the next is IF_THEN_ELSE.  */
+  curr_insn = NEXT_INSN (curr_insn);
+  if (!curr_insn)
+    return false;
+
+  /* And the last instruction should be an IF_THEN_ELSE.  */
+  set = single_set (curr_insn);
+  if (!set
+      || !REG_P (SET_DEST (set))
+      || GET_CODE (SET_SRC (set)) != IF_THEN_ELSE)
+    return false;
+
+  return !NEXT_INSN (curr_insn);
+}
+
+/* Implement TARGET_NOCE_CONVERSION_PROFITABLE_P.  We detect if the conversion
+   resulted in a l.cmov instruction and if so we consider it more profitable than
+   branch instructions.  */
+
+static bool
+or1k_noce_conversion_profitable_p (rtx_insn *seq,
+				    struct noce_if_info *if_info)
+{
+  if (TARGET_CMOV)
+    return or1k_is_cmov_insn (seq);
+
+  return default_noce_conversion_profitable_p (seq, if_info);
+}
+
+#undef TARGET_NOCE_CONVERSION_PROFITABLE_P
+#define TARGET_NOCE_CONVERSION_PROFITABLE_P or1k_noce_conversion_profitable_p
 
 /* A subroutine of the atomic operation splitters.  Jump to LABEL if
    COND is true.  Mark the jump as unlikely to be taken.  */
diff --git a/gcc/config/or1k/or1k.h b/gcc/config/or1k/or1k.h
index a55509f..d39321c 100644
--- a/gcc/config/or1k/or1k.h
+++ b/gcc/config/or1k/or1k.h
@@ -1,5 +1,5 @@
 /* Target Definitions for OpenRISC.
-   Copyright (C) 2018-2024 Free Software Foundation, Inc.
+   Copyright (C) 2018-2025 Free Software Foundation, Inc.
    Contributed by Stafford Horne.
 
    This file is part of GCC.
diff --git a/gcc/config/or1k/or1k.md b/gcc/config/or1k/or1k.md
index 7750925..bf71253 100644
--- a/gcc/config/or1k/or1k.md
+++ b/gcc/config/or1k/or1k.md
@@ -1,5 +1,5 @@
 ;; Machine description for OpenRISC
-;; Copyright (C) 2018-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2018-2025 Free Software Foundation, Inc.
 ;; Contributed by Stafford Horne
 
 ;; This file is part of GCC.
@@ -515,6 +515,31 @@
 	(ne:SI (reg:BI SR_F_REGNUM) (const_int 0)))]
   "")
 
+;; Allowing "extending" the BImode SR_F to a general register
+;; avoids 'convert_mode_scalar' from trying to do subregging
+;; which we don't have support for.
+;; We require signed and unsigned extend instructions because
+;; signed comparisons require signed extention, but for SR_F
+;; it doesn't matter.
+
+(define_expand "zero_extendbisi2_sr_f"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(zero_extend:SI (match_operand:BI 1 "sr_f_reg_operand" "")))]
+  ""
+{
+  emit_insn(gen_sne_sr_f (operands[0]));
+  DONE;
+})
+
+(define_expand "extendbisi2_sr_f"
+  [(set (match_operand:SI 0 "register_operand" "")
+	(sign_extend:SI (match_operand:BI 1 "sr_f_reg_operand" "")))]
+  ""
+{
+  emit_insn(gen_sne_sr_f (operands[0]));
+  DONE;
+})
+
 (define_insn_and_split "*scc"
   [(set (match_operand:SI 0 "register_operand" "=r")
 	(match_operator:SI 1 "equality_comparison_operator"
@@ -584,7 +609,7 @@
 ;; Branch instructions
 ;; -------------------------------------------------------------------------
 
-(define_expand "cbranchsi4"
+(define_insn_and_split "cbranchsi4"
   [(set (pc)
 	(if_then_else
 	  (match_operator 0 "comparison_operator"
@@ -593,13 +618,27 @@
 	  (label_ref (match_operand 3 "" ""))
 	  (pc)))]
   ""
+  "#"
+  "&& 1"
+  [(const_int 0)]
 {
+  rtx label;
+
+  /* Generate *scc */
   or1k_expand_compare (operands);
+  /* Generate *cbranch */
+  label = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
+  emit_jump_insn (gen_rtx_SET (pc_rtx,
+			       gen_rtx_IF_THEN_ELSE (VOIDmode,
+						     operands[0],
+						     label,
+						     pc_rtx)));
+  DONE;
 })
 
 ;; Support FP branching
 
-(define_expand "cbranch<F:mode>4"
+(define_insn_and_split "cbranch<F:mode>4"
   [(set (pc)
 	(if_then_else
 	  (match_operator 0 "fp_comparison_operator"
@@ -608,8 +647,22 @@
 	  (label_ref (match_operand 3 "" ""))
 	  (pc)))]
   "TARGET_HARD_FLOAT"
+  "#"
+  "&& 1"
+  [(const_int 0)]
 {
+  rtx label;
+
+  /* Generate *scc */
   or1k_expand_compare (operands);
+  /* Generate *cbranch */
+  label = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
+  emit_jump_insn (gen_rtx_SET (pc_rtx,
+			       gen_rtx_IF_THEN_ELSE (VOIDmode,
+						     operands[0],
+						     label,
+						     pc_rtx)));
+  DONE;
 })
 
 (define_insn "*cbranch"
diff --git a/gcc/config/or1k/or1k.opt b/gcc/config/or1k/or1k.opt
index 650b42c..d252de0 100644
--- a/gcc/config/or1k/or1k.opt
+++ b/gcc/config/or1k/or1k.opt
@@ -1,6 +1,6 @@
 ; OpenRISC command line options
 
-; Copyright (C) 2010-2024 Free Software Foundation, Inc.
+; Copyright (C) 2010-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
@@ -69,8 +69,8 @@ are used to perform unordered floating point compare and set flag operations.
 mcmodel=
 Target RejectNegative Joined Enum(or1k_cmodel_type) Var(or1k_code_model) Init(CMODEL_SMALL)
 Specify the code model used for accessing memory addresses.  Specifying large
-enables generating binaries with large global offset tables.  By default the
-value is small.
+enables generating binaries with large global offset tables and calling
+functions anywhere in an executable.  By default the value is small.
 
 Enum
 Name(or1k_cmodel_type) Type(enum or1k_cmodel_type)
diff --git a/gcc/config/or1k/predicates.md b/gcc/config/or1k/predicates.md
index 0a5479d..7ccfd09 100644
--- a/gcc/config/or1k/predicates.md
+++ b/gcc/config/or1k/predicates.md
@@ -1,5 +1,5 @@
 ;; Predicate definitions for OpenRISC
-;; Copyright (C) 2018-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2018-2025 Free Software Foundation, Inc.
 ;; Contributed by Stafford Horne
 
 ;; This file is part of GCC.
@@ -60,8 +60,13 @@
     (and (match_operand 0 "register_operand")
 	 (match_test "TARGET_ROR"))))
 
+(define_predicate "sr_f_reg_operand"
+  (and (match_operand 0 "register_operand")
+       (match_test "REGNO (op) == SR_F_REGNUM")))
+
 (define_predicate "call_insn_operand"
-  (ior (match_code "symbol_ref")
+  (ior (and (match_code "symbol_ref")
+	    (match_test "!TARGET_CMODEL_LARGE"))
        (match_operand 0 "register_operand")))
 
 (define_predicate "high_operand"
diff --git a/gcc/config/or1k/rtems.h b/gcc/config/or1k/rtems.h
index b560b1f..53d64e2 100644
--- a/gcc/config/or1k/rtems.h
+++ b/gcc/config/or1k/rtems.h
@@ -1,5 +1,5 @@
 /* Target Newlib Definitions for OpenRISC.
-   Copyright (C) 2018-2024 Free Software Foundation, Inc.
+   Copyright (C) 2018-2025 Free Software Foundation, Inc.
    Contributed by Joel Sherrill (joel.sherrill@OARcorp.com).
 
    This file is part of GCC.
diff --git a/gcc/config/or1k/t-or1k b/gcc/config/or1k/t-or1k
index ba4e567..12ce37b 100644
--- a/gcc/config/or1k/t-or1k
+++ b/gcc/config/or1k/t-or1k
@@ -1,5 +1,5 @@
 # Target Makefile Fragment for OpenRISC
-# Copyright (C) 2018-2024 Free Software Foundation, Inc.
+# Copyright (C) 2018-2025 Free Software Foundation, Inc.
 # Contributed by Stafford Horne.
 #
 # This file is part of GCC.
diff --git a/gcc/config/pa/constraints.md b/gcc/config/pa/constraints.md
index 17771bf..4ef5ce0 100644
--- a/gcc/config/pa/constraints.md
+++ b/gcc/config/pa/constraints.md
@@ -1,5 +1,5 @@
 ;; Constraint definitions for pa
-;; Copyright (C) 2007-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2007-2025 Free Software Foundation, Inc.
 
 ;; This file is part of GCC.
 
diff --git a/gcc/config/pa/elf.h b/gcc/config/pa/elf.h
index f49ffa22..df21388 100644
--- a/gcc/config/pa/elf.h
+++ b/gcc/config/pa/elf.h
@@ -1,5 +1,5 @@
 /* Definitions for ELF assembler support.
-   Copyright (C) 1999-2024 Free Software Foundation, Inc.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/pa/pa-64.h b/gcc/config/pa/pa-64.h
index b676468..6c2eccc 100644
--- a/gcc/config/pa/pa-64.h
+++ b/gcc/config/pa/pa-64.h
@@ -1,6 +1,6 @@
 /* Definitions of target machine for GNU compiler, for HPs using the
    64bit runtime model.
-   Copyright (C) 1999-2024 Free Software Foundation, Inc.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -35,7 +35,7 @@ along with GCC; see the file COPYING3.  If not see
      size_t	8 bytes
      ptrdiff_t	8 bytes
      wchar	4 bytes
-     
+
   Make GCC agree with types.h.  */
 #undef SIZE_TYPE
 #define SIZE_TYPE "long unsigned int"
@@ -91,9 +91,9 @@ along with GCC; see the file COPYING3.  If not see
    the RTL to avoid scheduling related problems.  For example, the
    store and load could be separated by a call to a pure or const
    function which has no frame and this function might also use SP-16.
-   We have 14-bit immediates on the 64-bit port, so we use secondary
-   memory for the copies.  */
-#define PA_SECONDARY_MEMORY_NEEDED(MODE, CLASS1, CLASS2) \
-  (MAYBE_FP_REG_CLASS_P (CLASS1) != FP_REG_CLASS_P (CLASS2)		\
-   || MAYBE_FP_REG_CLASS_P (CLASS2) != FP_REG_CLASS_P (CLASS1))
 
+   On the 64-bit port, I couldn't get SECONDARY_MEMORY_NEEDED to work
+   with LRA, so I modified the move patterns to use SP-40.  The HP
+   compiler also uses this slot in the frame marker for moving data
+   between the general and floating-point registers.  */
+#define PA_SECONDARY_MEMORY_NEEDED(MODE, CLASS1, CLASS2) false
diff --git a/gcc/config/pa/pa-d.cc b/gcc/config/pa/pa-d.cc
index dedaa97..2e81f25 100644
--- a/gcc/config/pa/pa-d.cc
+++ b/gcc/config/pa/pa-d.cc
@@ -1,5 +1,5 @@
 /* Subroutines for the D front end on the HPPA architecture.
-   Copyright (C) 2018-2024 Free Software Foundation, Inc.
+   Copyright (C) 2018-2025 Free Software Foundation, Inc.
 
 GCC is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
diff --git a/gcc/config/pa/pa-d.h b/gcc/config/pa/pa-d.h
index 75aacee..cb49490 100644
--- a/gcc/config/pa/pa-d.h
+++ b/gcc/config/pa/pa-d.h
@@ -1,5 +1,5 @@
 /* Definitions for the D front end on the HPPA architecture.
-   Copyright (C) 2022-2024 Free Software Foundation, Inc.
+   Copyright (C) 2022-2025 Free Software Foundation, Inc.
 
 GCC is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
diff --git a/gcc/config/pa/pa-hpux.h b/gcc/config/pa/pa-hpux.h
index accef44..1439447 100644
--- a/gcc/config/pa/pa-hpux.h
+++ b/gcc/config/pa/pa-hpux.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler, for HP-UX.
-   Copyright (C) 1991-2024 Free Software Foundation, Inc.
+   Copyright (C) 1991-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -114,3 +114,17 @@ along with GCC; see the file COPYING3.  If not see
 
 #undef TARGET_LIBC_HAS_FUNCTION
 #define TARGET_LIBC_HAS_FUNCTION no_c99_libc_has_function
+
+/* Assume we have libatomic if sync libcalls are disabled.  */
+#undef TARGET_HAVE_LIBATOMIC
+#define TARGET_HAVE_LIBATOMIC (!flag_sync_libcalls)
+
+/* The SYNC operations are implemented as library functions, not
+   INSN patterns.  As a result, the HAVE defines for the patterns are
+   not defined.  We need to define them to generate the corresponding
+   __GCC_HAVE_SYNC_COMPARE_AND_SWAP_* and __GCC_ATOMIC_*_LOCK_FREE
+   defines.  */
+#define HAVE_sync_compare_and_swapqi (flag_sync_libcalls)
+#define HAVE_sync_compare_and_swaphi (flag_sync_libcalls)
+#define HAVE_sync_compare_and_swapsi (flag_sync_libcalls)
+#define HAVE_sync_compare_and_swapdi (flag_sync_libcalls)
diff --git a/gcc/config/pa/pa-hpux.opt b/gcc/config/pa/pa-hpux.opt
index 89ab099..9d2caac 100644
--- a/gcc/config/pa/pa-hpux.opt
+++ b/gcc/config/pa/pa-hpux.opt
@@ -1,6 +1,6 @@
 ; Options for the HP PA-RISC port of the compiler.
 
-; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/pa/pa-hpux1010.h b/gcc/config/pa/pa-hpux1010.h
index 3109f5e..393ff68 100644
--- a/gcc/config/pa/pa-hpux1010.h
+++ b/gcc/config/pa/pa-hpux1010.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler, for HP PA-RISC
-   Copyright (C) 2004-2024 Free Software Foundation, Inc.
+   Copyright (C) 2004-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/pa/pa-hpux1010.opt b/gcc/config/pa/pa-hpux1010.opt
index 5049cc6..2d79bbf 100644
--- a/gcc/config/pa/pa-hpux1010.opt
+++ b/gcc/config/pa/pa-hpux1010.opt
@@ -1,6 +1,6 @@
 ; Options for the HP PA-RISC port of the compiler.
 
-; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/pa/pa-hpux11.h b/gcc/config/pa/pa-hpux11.h
index 402051f..af4f859 100644
--- a/gcc/config/pa/pa-hpux11.h
+++ b/gcc/config/pa/pa-hpux11.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler, for HP PA-RISC
-   Copyright (C) 1998-2024 Free Software Foundation, Inc.
+   Copyright (C) 1998-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/pa/pa-hpux1111.h b/gcc/config/pa/pa-hpux1111.h
index b8b479d..1ab462b 100644
--- a/gcc/config/pa/pa-hpux1111.h
+++ b/gcc/config/pa/pa-hpux1111.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler, for HP PA-RISC
-   Copyright (C) 2004-2024 Free Software Foundation, Inc.
+   Copyright (C) 2004-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/pa/pa-hpux1111.opt b/gcc/config/pa/pa-hpux1111.opt
index 1d63051..c796b66 100644
--- a/gcc/config/pa/pa-hpux1111.opt
+++ b/gcc/config/pa/pa-hpux1111.opt
@@ -1,6 +1,6 @@
 ; Options for the HP PA-RISC port of the compiler.
 
-; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/pa/pa-hpux1131.h b/gcc/config/pa/pa-hpux1131.h
index b6806e8..1f8ec53 100644
--- a/gcc/config/pa/pa-hpux1131.h
+++ b/gcc/config/pa/pa-hpux1131.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler, for HP PA-RISC
-   Copyright (C) 2004-2024 Free Software Foundation, Inc.
+   Copyright (C) 2004-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/pa/pa-hpux1131.opt b/gcc/config/pa/pa-hpux1131.opt
index 4c8701d..503991c 100644
--- a/gcc/config/pa/pa-hpux1131.opt
+++ b/gcc/config/pa/pa-hpux1131.opt
@@ -1,6 +1,6 @@
 ; Options for the HP PA-RISC port of the compiler.
 
-; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/pa/pa-linux.h b/gcc/config/pa/pa-linux.h
index 2fcb3f9..4bb64d1 100644
--- a/gcc/config/pa/pa-linux.h
+++ b/gcc/config/pa/pa-linux.h
@@ -1,5 +1,5 @@
 /* Definitions for PA_RISC with ELF format
-   Copyright (C) 1999-2024 Free Software Foundation, Inc.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/pa/pa-modes.def b/gcc/config/pa/pa-modes.def
index 2c58f48..ba0fbcc 100644
--- a/gcc/config/pa/pa-modes.def
+++ b/gcc/config/pa/pa-modes.def
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler, for the HP Spectrum.
-   Copyright (C) 2002-2024 Free Software Foundation, Inc.
+   Copyright (C) 2002-2025 Free Software Foundation, Inc.
    Contributed by Michael Tiemann (tiemann@cygnus.com) of Cygnus Support
    and Tim Moore (moore@defmacro.cs.utah.edu) of the Center for
    Software Science at the University of Utah.
diff --git a/gcc/config/pa/pa-netbsd.h b/gcc/config/pa/pa-netbsd.h
index 8ba83f0..d33d11c 100644
--- a/gcc/config/pa/pa-netbsd.h
+++ b/gcc/config/pa/pa-netbsd.h
@@ -1,5 +1,5 @@
 /* Definitions for PA_RISC with ELF format
-   Copyright (C) 1999-2024 Free Software Foundation, Inc.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/pa/pa-openbsd.h b/gcc/config/pa/pa-openbsd.h
index ae1d9c2..6c04f8a 100644
--- a/gcc/config/pa/pa-openbsd.h
+++ b/gcc/config/pa/pa-openbsd.h
@@ -1,5 +1,5 @@
 /* Definitions for PA_RISC with ELF format
-   Copyright (C) 1999-2024 Free Software Foundation, Inc.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/pa/pa-opts.h b/gcc/config/pa/pa-opts.h
index c471379..ee6de2a 100644
--- a/gcc/config/pa/pa-opts.h
+++ b/gcc/config/pa/pa-opts.h
@@ -1,5 +1,5 @@
 /* Definitions for option handling for HP PA.
-   Copyright (C) 1992-2024 Free Software Foundation, Inc.
+   Copyright (C) 1992-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/pa/pa-protos.h b/gcc/config/pa/pa-protos.h
index 5ca5dd3..ebc4c88 100644
--- a/gcc/config/pa/pa-protos.h
+++ b/gcc/config/pa/pa-protos.h
@@ -1,5 +1,5 @@
 /* Prototypes for pa.cc functions used in the md file & elsewhere.
-   Copyright (C) 2000-2024 Free Software Foundation, Inc.
+   Copyright (C) 2000-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/pa/pa.cc b/gcc/config/pa/pa.cc
index 911b7d9..b63ccf1 100644
--- a/gcc/config/pa/pa.cc
+++ b/gcc/config/pa/pa.cc
@@ -1,5 +1,5 @@
 /* Subroutines for insn-output.cc for HPPA.
-   Copyright (C) 1992-2024 Free Software Foundation, Inc.
+   Copyright (C) 1992-2025 Free Software Foundation, Inc.
    Contributed by Tim Moore (moore@cs.utah.edu), based on sparc.cc
 
 This file is part of GCC.
@@ -58,7 +58,7 @@ along with GCC; see the file COPYING3.  If not see
 /* This file should be included last.  */
 #include "target-def.h"
 
-/* Return nonzero if there is a bypass for the output of 
+/* Return nonzero if there is a bypass for the output of
    OUT_INSN and the fp store IN_INSN.  */
 int
 pa_fpstore_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
@@ -83,7 +83,7 @@ pa_fpstore_bypass_p (rtx_insn *out_insn, rtx_insn *in_insn)
 
   return (GET_MODE_SIZE (store_mode) == GET_MODE_SIZE (other_mode));
 }
-  
+
 
 #ifndef DO_FRAME_NOTES
 #ifdef INCOMING_RETURN_ADDR_RTX
@@ -198,7 +198,6 @@ static machine_mode pa_c_mode_for_floating_type (enum tree_index);
 static section *pa_function_section (tree, enum node_frequency, bool, bool);
 static bool pa_cannot_force_const_mem (machine_mode, rtx);
 static bool pa_legitimate_constant_p (machine_mode, rtx);
-static unsigned int pa_section_type_flags (tree, const char *, int);
 static bool pa_legitimate_address_p (machine_mode, rtx, bool,
 				     code_helper = ERROR_MARK);
 static bool pa_callee_copies (cumulative_args_t, const function_arg_info &);
@@ -209,6 +208,8 @@ static bool pa_can_change_mode_class (machine_mode, machine_mode, reg_class_t);
 static HOST_WIDE_INT pa_starting_frame_offset (void);
 static section* pa_elf_select_rtx_section(machine_mode, rtx, unsigned HOST_WIDE_INT) ATTRIBUTE_UNUSED;
 static void pa_atomic_assign_expand_fenv (tree *, tree *, tree *);
+static bool pa_use_lra_p (void);
+static bool pa_frame_pointer_required (void);
 
 /* The following extra sections are only used for SOM.  */
 static GTY(()) section *som_readonly_data_section;
@@ -393,6 +394,8 @@ static size_t n_deferred_plabels = 0;
 #define TARGET_DELEGITIMIZE_ADDRESS pa_delegitimize_address
 #undef TARGET_INTERNAL_ARG_POINTER
 #define TARGET_INTERNAL_ARG_POINTER pa_internal_arg_pointer
+#undef TARGET_FRAME_POINTER_REQUIRED
+#define TARGET_FRAME_POINTER_REQUIRED pa_frame_pointer_required
 #undef TARGET_CAN_ELIMINATE
 #define TARGET_CAN_ELIMINATE pa_can_eliminate
 #undef TARGET_CONDITIONAL_REGISTER_USAGE
@@ -406,13 +409,11 @@ static size_t n_deferred_plabels = 0;
 
 #undef TARGET_LEGITIMATE_CONSTANT_P
 #define TARGET_LEGITIMATE_CONSTANT_P pa_legitimate_constant_p
-#undef TARGET_SECTION_TYPE_FLAGS
-#define TARGET_SECTION_TYPE_FLAGS pa_section_type_flags
 #undef TARGET_LEGITIMATE_ADDRESS_P
 #define TARGET_LEGITIMATE_ADDRESS_P pa_legitimate_address_p
 
 #undef TARGET_LRA_P
-#define TARGET_LRA_P hook_bool_void_false
+#define TARGET_LRA_P pa_use_lra_p
 
 #undef TARGET_HARD_REGNO_NREGS
 #define TARGET_HARD_REGNO_NREGS pa_hard_regno_nregs
@@ -973,7 +974,7 @@ legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
 
       /* During and after reload, we need to generate a REG_LABEL_OPERAND note
 	 and update LABEL_NUSES because this is not done automatically.  */
-      if (reload_in_progress || reload_completed)
+      if (lra_in_progress || reload_in_progress || reload_completed)
 	{
 	  /* Extract LABEL_REF.  */
 	  if (GET_CODE (orig) == CONST)
@@ -998,7 +999,7 @@ legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
       /* Before reload, allocate a temporary register for the intermediate
 	 result.  This allows the sequence to be deleted when the final
 	 result is unused and the insns are trivially dead.  */
-      tmp_reg = ((reload_in_progress || reload_completed)
+      tmp_reg = ((lra_in_progress || reload_in_progress || reload_completed)
 		 ? reg : gen_reg_rtx (Pmode));
 
       if (function_label_operand (orig, VOIDmode))
@@ -1052,7 +1053,7 @@ legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
 
       gcc_assert (reg);
       gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
-      
+
       base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
       orig = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
 				     base == reg ? 0 : reg);
@@ -1102,7 +1103,7 @@ legitimize_tls_address (rtx addr)
   if (GET_CODE (addr) != SYMBOL_REF)
     return addr;
 
-  switch (SYMBOL_REF_TLS_MODEL (addr)) 
+  switch (SYMBOL_REF_TLS_MODEL (addr))
     {
       case TLS_MODEL_GLOBAL_DYNAMIC:
 	tmp = gen_reg_rtx (Pmode);
@@ -1122,10 +1123,9 @@ legitimize_tls_address (rtx addr)
 	else
 	  emit_insn (gen_tld_load (tmp, addr));
 	t1 = hppa_tls_call (tmp);
-	insn = get_insns ();
-	end_sequence ();
+	insn = end_sequence ();
 	t2 = gen_reg_rtx (Pmode);
-	emit_libcall_block (insn, t2, t1, 
+	emit_libcall_block (insn, t2, t1,
 			    gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
 				            UNSPEC_TLSLDBASE));
 	emit_insn (gen_tld_offset_load (ret, addr, t2));
@@ -1959,11 +1959,13 @@ pa_emit_move_sequence (rtx *operands, machine_mode mode, rtx scratch_reg)
 			       copy_to_mode_reg (Pmode, XEXP (operand1, 0)));
 
   if (scratch_reg
-      && reload_in_progress && GET_CODE (operand0) == REG
+      && reload_in_progress
+      && GET_CODE (operand0) == REG
       && REGNO (operand0) >= FIRST_PSEUDO_REGISTER)
     operand0 = reg_equiv_mem (REGNO (operand0));
   else if (scratch_reg
-	   && reload_in_progress && GET_CODE (operand0) == SUBREG
+	   && reload_in_progress
+	   && GET_CODE (operand0) == SUBREG
 	   && GET_CODE (SUBREG_REG (operand0)) == REG
 	   && REGNO (SUBREG_REG (operand0)) >= FIRST_PSEUDO_REGISTER)
     {
@@ -1976,11 +1978,13 @@ pa_emit_move_sequence (rtx *operands, machine_mode mode, rtx scratch_reg)
     }
 
   if (scratch_reg
-      && reload_in_progress && GET_CODE (operand1) == REG
+      && reload_in_progress
+      && GET_CODE (operand1) == REG
       && REGNO (operand1) >= FIRST_PSEUDO_REGISTER)
     operand1 = reg_equiv_mem (REGNO (operand1));
   else if (scratch_reg
-	   && reload_in_progress && GET_CODE (operand1) == SUBREG
+	   && reload_in_progress
+	   && GET_CODE (operand1) == SUBREG
 	   && GET_CODE (SUBREG_REG (operand1)) == REG
 	   && REGNO (SUBREG_REG (operand1)) >= FIRST_PSEUDO_REGISTER)
     {
@@ -1992,12 +1996,16 @@ pa_emit_move_sequence (rtx *operands, machine_mode mode, rtx scratch_reg)
       operand1 = alter_subreg (&temp, true);
     }
 
-  if (scratch_reg && reload_in_progress && GET_CODE (operand0) == MEM
+  if (scratch_reg
+      && (lra_in_progress || reload_in_progress)
+      && GET_CODE (operand0) == MEM
       && ((tem = find_replacement (&XEXP (operand0, 0)))
 	  != XEXP (operand0, 0)))
     operand0 = replace_equiv_address (operand0, tem);
 
-  if (scratch_reg && reload_in_progress && GET_CODE (operand1) == MEM
+  if (scratch_reg
+      && (lra_in_progress || reload_in_progress)
+      && GET_CODE (operand1) == MEM
       && ((tem = find_replacement (&XEXP (operand1, 0)))
 	  != XEXP (operand1, 0)))
     operand1 = replace_equiv_address (operand1, tem);
@@ -2043,8 +2051,7 @@ pa_emit_move_sequence (rtx *operands, machine_mode mode, rtx scratch_reg)
 	      op1 = replace_equiv_address (op1, scratch_reg);
 	    }
 	}
-      else if (((TARGET_ELF32 || !TARGET_PA_20)
-		&& symbolic_memory_operand (op1, VOIDmode))
+      else if ((!INT14_OK_STRICT && symbolic_memory_operand (op1, VOIDmode))
 	       || IS_LO_SUM_DLT_ADDR_P (XEXP (op1, 0))
 	       || IS_INDEX_ADDR_P (XEXP (op1, 0)))
 	{
@@ -2093,8 +2100,7 @@ pa_emit_move_sequence (rtx *operands, machine_mode mode, rtx scratch_reg)
 	      op0 = replace_equiv_address (op0, scratch_reg);
 	    }
 	}
-      else if (((TARGET_ELF32 || !TARGET_PA_20)
-		&& symbolic_memory_operand (op0, VOIDmode))
+      else if ((!INT14_OK_STRICT && symbolic_memory_operand (op0, VOIDmode))
 	       || IS_LO_SUM_DLT_ADDR_P (XEXP (op0, 0))
 	       || IS_INDEX_ADDR_P (XEXP (op0, 0)))
 	{
@@ -2220,7 +2226,7 @@ pa_emit_move_sequence (rtx *operands, machine_mode mode, rtx scratch_reg)
 		  && !HARD_REGISTER_P (operand0))
 		copy_reg_pointer (operand0, operand1);
 	    }
-	  
+
 	  /* When MEMs are broken out, the REG_POINTER flag doesn't
 	     get set.  In some cases, we can set the REG_POINTER flag
 	     from the declaration for the MEM.  */
@@ -2257,7 +2263,7 @@ pa_emit_move_sequence (rtx *operands, machine_mode mode, rtx scratch_reg)
   else if (GET_CODE (operand0) == MEM)
     {
       if (mode == DFmode && operand1 == CONST0_RTX (mode)
-	  && !(reload_in_progress || reload_completed))
+	  && !(lra_in_progress || reload_in_progress || reload_completed))
 	{
 	  rtx temp = gen_reg_rtx (DFmode);
 
@@ -2271,7 +2277,7 @@ pa_emit_move_sequence (rtx *operands, machine_mode mode, rtx scratch_reg)
 	  emit_insn (gen_rtx_SET (operand0, operand1));
 	  return 1;
 	}
-      if (! (reload_in_progress || reload_completed))
+      if (! (lra_in_progress || reload_in_progress || reload_completed))
 	{
 	  operands[0] = validize_mem (operand0);
 	  operands[1] = operand1 = force_reg (mode, operand1);
@@ -2311,7 +2317,7 @@ pa_emit_move_sequence (rtx *operands, machine_mode mode, rtx scratch_reg)
 	      rtx temp, const_part;
 
 	      /* Figure out what (if any) scratch register to use.  */
-	      if (reload_in_progress || reload_completed)
+	      if (lra_in_progress || reload_in_progress || reload_completed)
 		{
 		  scratch_reg = scratch_reg ? scratch_reg : operand0;
 		  /* SCRATCH_REG will hold an address and maybe the actual
@@ -2369,7 +2375,7 @@ pa_emit_move_sequence (rtx *operands, machine_mode mode, rtx scratch_reg)
 	      rtx_insn *insn;
 	      rtx temp;
 
-	      if (reload_in_progress || reload_completed)
+	      if (lra_in_progress || reload_in_progress || reload_completed)
 		{
 		  temp = scratch_reg ? scratch_reg : operand0;
 		  /* TEMP will hold an address and maybe the actual
@@ -2413,7 +2419,7 @@ pa_emit_move_sequence (rtx *operands, machine_mode mode, rtx scratch_reg)
 	    {
 	      rtx temp, set;
 
-	      if (reload_in_progress || reload_completed)
+	      if (lra_in_progress || reload_in_progress || reload_completed)
 		{
 		  temp = scratch_reg ? scratch_reg : operand0;
 		  /* TEMP will hold an address and maybe the actual
@@ -2504,7 +2510,7 @@ pa_emit_move_sequence (rtx *operands, machine_mode mode, rtx scratch_reg)
 		}
 	    }
 
-	  if (reload_in_progress || reload_completed)
+	  if (lra_in_progress || reload_in_progress || reload_completed)
 	    temp = scratch_reg ? scratch_reg : operand0;
 	  else
 	    temp = gen_reg_rtx (mode);
@@ -2863,7 +2869,7 @@ pa_output_move_double (rtx *operands)
 		      && GET_CODE (operands[0]) == REG);
 
 	  gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
-	  
+
 	  /* No overlap between high target register and address
 	     register.  (We do this in a non-obvious way to
 	     save a register file writeback)  */
@@ -2878,7 +2884,7 @@ pa_output_move_double (rtx *operands)
 	  operands[0] = XEXP (addr, 0);
 	  gcc_assert (GET_CODE (operands[1]) == REG
 		      && GET_CODE (operands[0]) == REG);
-	  
+
 	  gcc_assert (!reg_overlap_mentioned_p (high_reg, addr));
 	  /* No overlap between high target register and address
 	     register.  (We do this in a non-obvious way to save a
@@ -3099,15 +3105,15 @@ pa_output_fp_move_double (rtx *operands)
   else
     {
       rtx xoperands[2];
-      
+
       gcc_assert (operands[1] == CONST0_RTX (GET_MODE (operands[0])));
-      
+
       /* This is a pain.  You have to be prepared to deal with an
 	 arbitrary address here including pre/post increment/decrement.
 
 	 so avoid this in the MD.  */
       gcc_assert (GET_CODE (operands[0]) == REG);
-      
+
       xoperands[1] = gen_rtx_REG (SImode, REGNO (operands[0]) + 1);
       xoperands[0] = operands[0];
       output_asm_insn ("copy %%r0,%0\n\tcopy %%r0,%1", xoperands);
@@ -4076,7 +4082,7 @@ pa_compute_frame_size (poly_int64 size, int *fregs_live)
      first slot is only used when the frame pointer is needed.  */
   if (size || frame_pointer_needed)
     size += pa_starting_frame_offset ();
-  
+
   /* If the current function calls __builtin_eh_return, then we need
      to allocate stack space for registers that will hold data for
      the exception handler.  */
@@ -4416,7 +4422,7 @@ pa_expand_prologue (void)
      to do for functions which make no calls and allocate no
      frame?  Do we need to allocate a frame, or can we just omit
      the save?   For now we'll just omit the save.
-     
+
      We don't want a note on this insn as the frame marker can
      move if there is a dynamic stack allocation.  */
   if (flag_pic && actual_fsize != 0 && !TARGET_64BIT)
@@ -4517,7 +4523,7 @@ load_reg (int reg, HOST_WIDE_INT disp, int base)
       rtx tmpreg = gen_rtx_REG (Pmode, 1);
 
       emit_move_insn (tmpreg, delta);
-      if (TARGET_DISABLE_INDEXING)
+      if (!TARGET_NO_SPACE_REGS || TARGET_DISABLE_INDEXING)
 	{
 	  emit_move_insn (tmpreg, gen_rtx_PLUS (Pmode, tmpreg, basereg));
 	  src = gen_rtx_MEM (word_mode, tmpreg);
@@ -5227,7 +5233,7 @@ pa_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
 		  /* A fpload can't be issued until one cycle before a
 		     preceding arithmetic operation has finished if
 		     the target of the fpload is the destination of the
-		     arithmetic operation. 
+		     arithmetic operation.
 
 		     Exception: For PA7100LC, PA7200 and PA7300, the cost
 		     is 3 cycles, unless they bundle together.   We also
@@ -5866,7 +5872,7 @@ pa_output_global_address (FILE *file, rtx x, int round_constant)
 	default:
 	  gcc_unreachable ();
 	}
-      
+
       if (!read_only_operand (base, VOIDmode) && !flag_pic)
 	fputs ("-$global$", file);
       if (offset)
@@ -5926,7 +5932,7 @@ pa_file_start_mcount (const char *aswhat)
   if (profile_flag)
     fprintf (asm_out_file, "\t.IMPORT _mcount,%s\n", aswhat);
 }
-  
+
 static void
 pa_elf_file_start (void)
 {
@@ -6155,13 +6161,12 @@ pa_emit_hpdiv_const (rtx *operands, int unsignedp)
       emit
 	(gen_rtx_PARALLEL
 	 (VOIDmode,
-	  gen_rtvec (6, gen_rtx_SET (gen_rtx_REG (SImode, 29),
+	  gen_rtvec (5, gen_rtx_SET (gen_rtx_REG (SImode, 29),
 				     gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
 						     SImode,
 						     gen_rtx_REG (SImode, 26),
 						     operands[2])),
-		     gen_rtx_CLOBBER (VOIDmode, operands[4]),
-		     gen_rtx_CLOBBER (VOIDmode, operands[3]),
+		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 1)),
 		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 26)),
 		     gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, 25)),
 		     gen_rtx_CLOBBER (VOIDmode, ret))));
@@ -6410,24 +6415,21 @@ pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
   if (regno >= FIRST_PSEUDO_REGISTER || GET_CODE (x) == SUBREG)
     regno = true_regnum (x);
 
-  /* Handle reloads for floating point loads and stores.  */
-  if ((regno >= FIRST_PSEUDO_REGISTER || regno == -1)
-      && FP_REG_CLASS_P (rclass))
+  /* Handle reloads for floating-point loads and stores.  */
+  if (regno < 0 && FP_REG_CLASS_P (rclass))
     {
-      if (MEM_P (x))
-	{
-	  x = XEXP (x, 0);
+      if (REG_P (x) || GET_CODE (x) == SUBREG)
+	return NO_REGS;
 
-	  /* We don't need a secondary reload for indexed memory addresses.
+      /* We don't need a secondary reload for indexed memory addresses.
 
-	     When INT14_OK_STRICT is true, it might appear that we could
-	     directly allow register indirect memory addresses.  However,
-	     this doesn't work because we don't support SUBREGs in
-	     floating-point register copies and reload doesn't tell us
-	     when it's going to use a SUBREG.  */
-	  if (IS_INDEX_ADDR_P (x))
-	    return NO_REGS;
-	}
+	 When INT14_OK_STRICT is true, it might appear that we could
+	 directly allow register indirect memory addresses.  However,
+	 this doesn't work because we don't support SUBREGs in
+	 floating-point register copies and reload doesn't tell us
+	 when it's going to use a SUBREG.  */
+      if (MEM_P (x) && IS_INDEX_ADDR_P (XEXP (x, 0)))
+	return NO_REGS;
 
       /* Request a secondary reload with a general scratch register
 	 for everything else.  ??? Could symbolic operands be handled
@@ -6444,8 +6446,14 @@ pa_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
   if (rclass == SHIFT_REGS)
     {
       /* Handle spill.  */
-      if (regno >= FIRST_PSEUDO_REGISTER || regno < 0)
+      if (regno < 0)
 	{
+	  if (REG_P (x) || GET_CODE (x) == SUBREG)
+	    return GENERAL_REGS;
+
+	  if (TARGET_64BIT && GET_CODE (x) == CONST_INT)
+	    return GENERAL_REGS;
+
 	  sri->icode = (in_p
 			? direct_optab_handler (reload_in_optab, mode)
 			: direct_optab_handler (reload_out_optab, mode));
@@ -7110,7 +7118,7 @@ const char *
 pa_output_lbranch (rtx dest, rtx_insn *insn, int xdelay)
 {
   rtx xoperands[4];
- 
+
   xoperands[0] = dest;
 
   /* First, free up the delay slot.  */
@@ -7631,7 +7639,7 @@ pa_output_dbra (rtx *operands, rtx_insn *insn, int which_alternative)
 	    }
 	  else
 	    return "addib,%C2 %1,%0,%3";
-      
+
 	case 8:
 	  /* Handle weird backwards branch with a fulled delay slot
 	     which is nullified.  */
@@ -7681,7 +7689,7 @@ pa_output_dbra (rtx *operands, rtx_insn *insn, int which_alternative)
 
 	  return pa_output_lbranch (operands[3], insn, xdelay);
 	}
-      
+
     }
   /* Deal with gross reload from FP register case.  */
   else if (which_alternative == 1)
@@ -8477,7 +8485,7 @@ pa_output_indirect_call (rtx_insn *insn, rtx call_dest)
       pa_output_arg_descriptor (insn);
       if (TARGET_PA_20)
 	return "bve,l,n (%%r22),%%r2\n\tnop";
-      return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2"; 
+      return "ble 0(%%sr4,%%r22)\n\tcopy %%r31,%%r2";
     }
 
   if (TARGET_PORTABLE_RUNTIME)
@@ -8489,7 +8497,7 @@ pa_output_indirect_call (rtx_insn *insn, rtx call_dest)
     }
 
   /* Now the normal case -- we can reach $$dyncall directly or
-     we're sure that we can get there via a long-branch stub. 
+     we're sure that we can get there via a long-branch stub.
 
      No need to check target flags as the length uniquely identifies
      the remaining cases.  */
@@ -9203,7 +9211,7 @@ pa_asm_out_destructor (rtx symbol, int priority)
    The ASM_OUTPUT_ALIGNED_BSS macro needs to be defined to call this
    function on the SOM port to prevent uninitialized global data from
    being placed in the data section.  */
-   
+
 void
 pa_asm_output_aligned_bss (FILE *stream,
 			   const char *name,
@@ -9369,7 +9377,7 @@ forward_branch_p (rtx_insn *insn)
   gcc_assert (lab != NULL_RTX);
 
   if (INSN_ADDRESSES_SET_P ())
-    return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));  
+    return INSN_ADDRESSES (INSN_UID (lab)) > INSN_ADDRESSES (INSN_UID (insn));
 
   while (insn)
     {
@@ -9804,8 +9812,8 @@ pa_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
    to match the HP Compiler ABI.  */
 
 static rtx
-pa_function_value (const_tree valtype, 
-                   const_tree func ATTRIBUTE_UNUSED, 
+pa_function_value (const_tree valtype,
+                   const_tree func ATTRIBUTE_UNUSED,
                    bool outgoing ATTRIBUTE_UNUSED)
 {
   machine_mode valmode;
@@ -10328,7 +10336,7 @@ pa_select_section (tree exp, int reloc,
    and the function is in a COMDAT group, place the plabel reference in the
    .data.rel.ro.local section.  The linker ignores references to symbols in
    discarded sections from this section.  */
-   
+
 static section *
 pa_elf_select_rtx_section (machine_mode mode, rtx x,
 			   unsigned HOST_WIDE_INT align)
@@ -10479,7 +10487,7 @@ pa_can_change_mode_class (machine_mode from, machine_mode to,
   if (COMPLEX_MODE_P (from) || VECTOR_MODE_P (from)
       || COMPLEX_MODE_P (to) || VECTOR_MODE_P (to))
     return false;
-      
+
   /* There is no way to load QImode or HImode values directly from memory
      to a FP register.  SImode loads to the FP registers are not zero
      extended.  On the 64-bit target, this conflicts with the definition
@@ -10500,7 +10508,7 @@ pa_can_change_mode_class (machine_mode from, machine_mode to,
 }
 
 /* Implement TARGET_MODES_TIEABLE_P.
-   
+
    We should return FALSE for QImode and HImode because these modes
    are not ok in the floating-point registers.  However, this prevents
    tieing these modes to SImode and DImode in the general registers.
@@ -10877,6 +10885,7 @@ pa_legitimate_constant_p (machine_mode mode, rtx x)
   if (TARGET_64BIT
       && HOST_BITS_PER_WIDE_INT > 32
       && GET_CODE (x) == CONST_INT
+      && !lra_in_progress
       && !reload_in_progress
       && !reload_completed
       && !LEGITIMATE_64BIT_CONST_INT_P (INTVAL (x))
@@ -10889,25 +10898,6 @@ pa_legitimate_constant_p (machine_mode mode, rtx x)
   return true;
 }
 
-/* Implement TARGET_SECTION_TYPE_FLAGS.  */
-
-static unsigned int
-pa_section_type_flags (tree decl, const char *name, int reloc)
-{
-  unsigned int flags;
-
-  flags = default_section_type_flags (decl, name, reloc);
-
-  /* Function labels are placed in the constant pool.  This can
-     cause a section conflict if decls are put in ".data.rel.ro"
-     or ".data.rel.ro.local" using the __attribute__ construct.  */
-  if (strcmp (name, ".data.rel.ro") == 0
-      || strcmp (name, ".data.rel.ro.local") == 0)
-    flags |= SECTION_WRITE | SECTION_RELRO;
-
-  return flags;
-}
-
 /* pa_legitimate_address_p recognizes an RTL expression that is a
    valid memory address for an instruction.  The MODE argument is the
    machine mode for the MEM expression that wants to use this address.
@@ -10922,7 +10912,7 @@ pa_section_type_flags (tree decl, const char *name, int reloc)
    must provide patterns for doing indexed integer stores, or the move
    expanders must force the address of an indexed store to a register.
    We have adopted the latter approach.
-   
+
    Another function of pa_legitimate_address_p is to ensure that
    the base register is a valid pointer for indexed instructions.
    On targets that have non-equivalent space registers, we have to
@@ -10998,6 +10988,7 @@ pa_legitimate_address_p (machine_mode mode, rtx x, bool strict, code_helper)
 	  /* Long 14-bit displacements always okay for these cases.  */
 	  if (INT14_OK_STRICT
 	      || reload_completed
+	      || (reload_in_progress && !strict)
 	      || mode == QImode
 	      || mode == HImode)
 	    return true;
@@ -11009,17 +11000,13 @@ pa_legitimate_address_p (machine_mode mode, rtx x, bool strict, code_helper)
 	}
 
       if (!TARGET_DISABLE_INDEXING
-	  /* Only accept the "canonical" INDEX+BASE operand order
-	     on targets with non-equivalent space registers.  */
-	  && (TARGET_NO_SPACE_REGS
-	      ? REG_P (index)
-	      : (base == XEXP (x, 1) && REG_P (index)
-		 && (reload_completed
-		     || (reload_in_progress && HARD_REGISTER_P (base))
-		     || REG_POINTER (base))
-		 && (reload_completed
-		     || (reload_in_progress && HARD_REGISTER_P (index))
-		     || !REG_POINTER (index))))
+	  /* Currently, the REG_POINTER flag is not set in a variety
+	     of situations (e.g., call arguments and pointer arithmetic).
+	     As a result, we can't reliably determine when unscaled
+	     addresses are legitimate on targets that need space register
+	     selection.  */
+	  && TARGET_NO_SPACE_REGS
+	  && REG_P (index)
 	  && MODE_OK_FOR_UNSCALED_INDEXING_P (mode)
 	  && (strict ? STRICT_REG_OK_FOR_INDEX_P (index)
 		     : REG_OK_FOR_INDEX_P (index))
@@ -11028,14 +11015,14 @@ pa_legitimate_address_p (machine_mode mode, rtx x, bool strict, code_helper)
 	return true;
 
       if (!TARGET_DISABLE_INDEXING
-	  && GET_CODE (index) == MULT
 	  /* Only accept base operands with the REG_POINTER flag prior to
 	     reload on targets with non-equivalent space registers.  */
 	  && (TARGET_NO_SPACE_REGS
-	      || (base == XEXP (x, 1)
-		  && (reload_completed
-		      || (reload_in_progress && HARD_REGISTER_P (base))
-		      || REG_POINTER (base))))
+	      || reload_completed
+	      || ((lra_in_progress || reload_in_progress)
+		   && HARD_REGISTER_P (base))
+	      || REG_POINTER (base))
+	  && GET_CODE (index) == MULT
 	  && REG_P (XEXP (index, 0))
 	  && GET_MODE (XEXP (index, 0)) == Pmode
 	  && MODE_OK_FOR_SCALED_INDEXING_P (mode)
@@ -11063,20 +11050,21 @@ pa_legitimate_address_p (machine_mode mode, rtx x, bool strict, code_helper)
 	{
 	  y = XEXP (x, 1);
 
-	  /* Needed for -fPIC */
+	  /* UNSPEC_DLTIND14R is always okay.  Needed for -fPIC */
 	  if (mode == Pmode
 	      && GET_CODE (y) == UNSPEC)
 	    return true;
 
 	  /* Before reload, we need support for 14-bit floating
 	     point loads and stores, and associated relocations.  */
-	  if ((TARGET_ELF32 || !INT14_OK_STRICT)
+	  if (!INT14_OK_STRICT
 	      && !reload_completed
 	      && mode != QImode
 	      && mode != HImode)
 	    return false;
 
-	  if (CONSTANT_P (y))
+	  if (CONSTANT_P (y)
+	      || (!flag_pic && symbolic_operand (y, mode)))
 	    return true;
 	}
       return false;
@@ -11252,7 +11240,7 @@ pa_function_arg_size (machine_mode mode, const_tree type)
 {
   HOST_WIDE_INT size;
 
-  size = mode != BLKmode ? GET_MODE_SIZE (mode) : int_size_in_bytes (type); 
+  size = mode != BLKmode ? GET_MODE_SIZE (mode) : int_size_in_bytes (type);
 
   /* The 64-bit runtime does not restrict the size of stack frames,
      but the gcc calling conventions limit argument sizes to 1G.  Our
@@ -11340,4 +11328,25 @@ pa_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
 			    reload_fenv, restore_fnenv), update_call);
 }
 
+/* Implement TARGET_LRA_P.  */
+
+static bool
+pa_use_lra_p ()
+{
+  return pa_lra_p;
+}
+
+/* Implement TARGET_FRAME_POINTER_REQUIRED.  */
+
+bool
+pa_frame_pointer_required (void)
+{
+  /* If the function receives nonlocal gotos, it needs to save the frame
+     pointer in the argument save area.  */
+  if (cfun->has_nonlocal_label)
+    return true;
+
+  return false;
+}
+
 #include "gt-pa.h"
diff --git a/gcc/config/pa/pa.h b/gcc/config/pa/pa.h
index 7e45c35..be845c8 100644
--- a/gcc/config/pa/pa.h
+++ b/gcc/config/pa/pa.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler, for the HP Spectrum.
-   Copyright (C) 1992-2024 Free Software Foundation, Inc.
+   Copyright (C) 1992-2025 Free Software Foundation, Inc.
    Contributed by Michael Tiemann (tiemann@cygnus.com) of Cygnus Support
    and Tim Moore (moore@defmacro.cs.utah.edu) of the Center for
    Software Science at the University of Utah.
@@ -226,7 +226,7 @@ typedef struct GTY(()) machine_function
 } machine_function;
 
 /* Define this macro if it is advisable to hold scalars in registers
-   in a wider mode than that declared by the program.  In such cases, 
+   in a wider mode than that declared by the program.  In such cases,
    the value is constrained to be within the bounds of the declared
    type, but kept valid in the wider mode.  The signedness of the
    extension may differ from that of the type.  */
@@ -260,7 +260,7 @@ typedef struct GTY(()) machine_function
    This needs to be 8 when TARGET_64BIT is true to allow building various
    TImode routines in libgcc.  However, we also need the DImode DIVMOD
    routines because they are not currently implemented in pa.md.
-   
+
    The HP runtime specification doesn't provide the alignment requirements
    and calling conventions for TImode variables.  */
 #ifdef IN_LIBGCC2
@@ -480,6 +480,9 @@ extern rtx hppa_pic_save_rtx (void);
 #define INDEX_REG_CLASS GENERAL_REGS
 #define BASE_REG_CLASS GENERAL_REGS
 
+/* True if register is a general register.  */
+#define GENERAL_REGNO_P(N) ((N) >= 1 && (N) <= 31)
+
 #define FP_REG_CLASS_P(CLASS) \
   ((CLASS) == FP_REGS || (CLASS) == FPUPPER_REGS)
 
@@ -564,13 +567,13 @@ extern rtx hppa_pic_save_rtx (void);
    of arguments scanned so far (including the invisible argument,
    if any, which holds the structure-value-address).  Thus, 4 or
    more means all following args should go on the stack.
-   
+
    The INCOMING field tracks whether this is an "incoming" or
    "outgoing" argument.
-   
+
    The INDIRECT field indicates whether this is an indirect
    call or not.
-   
+
    The NARGS_PROTOTYPE field indicates that an argument does not
    have a prototype when it less than or equal to 0.  */
 
@@ -712,7 +715,7 @@ extern int may_call_alloca;
 #define MIN_CACHELINE_SIZE 32
 
 
-/* Addressing modes, and classification of registers for them. 
+/* Addressing modes, and classification of registers for them.
 
    Using autoincrement addressing modes on PA8000 class machines is
    not profitable.  */
@@ -970,7 +973,7 @@ do {									     \
 
 /* Higher than the default as we prefer to use simple move insns
    (better scheduling and delay slot filling) and because our
-   built-in block move is really a 2X unrolled loop. 
+   built-in block move is really a 2X unrolled loop.
 
    Believe it or not, this has to be big enough to allow for copying all
    arguments passed in registers to avoid infinite recursion during argument
@@ -1163,7 +1166,7 @@ do {									     \
 #define ASM_OUTPUT_ADDR_VEC_ELT(FILE, VALUE)  \
   fprintf (FILE, "\t.word L$%d\n", VALUE)
 
-/* This is how to output an element of a case-vector that is relative. 
+/* This is how to output an element of a case-vector that is relative.
    Since we always place jump tables in the text section, the difference
    is absolute and requires no relocation.  */
 
@@ -1197,7 +1200,7 @@ do {									     \
 
 #define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN)		\
   pa_asm_output_aligned_bss (FILE, NAME, SIZE, ALIGN)
-  
+
 /* This says how to output an assembler line to define a global common symbol
    with size SIZE (in bytes) and alignment ALIGN (in bits).  */
 
@@ -1211,7 +1214,7 @@ do {									     \
 
 #define ASM_OUTPUT_ALIGNED_LOCAL(FILE, NAME, SIZE, ALIGN)		\
   pa_asm_output_aligned_local (FILE, NAME, SIZE, ALIGN)
-  
+
 /* All HP assemblers use "!" to separate logical lines.  */
 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == '!')
 
@@ -1295,7 +1298,7 @@ do {									     \
    instructions for non-PIC and PIC, respectively.  Import stubs are
    seven and five instructions for HP-UX and ELF targets, respectively.
    The default stub group size for ELF targets is 217856 bytes.
-   FIXME: We need an option to set the maximum offset.  */  
+   FIXME: We need an option to set the maximum offset.  */
 #define MAX_PCREL17F_OFFSET (TARGET_HPUX ? 198164 : 217856)
 
 #define NEED_INDICATE_EXEC_STACK 0
diff --git a/gcc/config/pa/pa.md b/gcc/config/pa/pa.md
index 9e410f4..2312994 100644
--- a/gcc/config/pa/pa.md
+++ b/gcc/config/pa/pa.md
@@ -1,5 +1,5 @@
 ;;- Machine description for HP PA-RISC architecture for GCC compiler
-;;   Copyright (C) 1992-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 1992-2025 Free Software Foundation, Inc.
 ;;   Contributed by the Center for Software Science at the University
 ;;   of Utah.
 
@@ -2222,9 +2222,9 @@
 
 (define_insn ""
   [(set (match_operand:SI 0 "move_dest_operand"
-			  "=r,r,r,r,r,r,Q,!*q,!r,!*f,*f,T")
+			  "=r,r,r,r,r,r,Q,!*q,!r,!*f,*f,T,?r,?*f")
 	(match_operand:SI 1 "move_src_operand"
-			  "A,r,J,N,K,RQ,rM,!rM,!*q,!*fM,RT,*f"))]
+			  "A,r,J,N,K,RQ,rM,!rM,!*q,!*fM,RT,*f,*f,r"))]
   "(register_operand (operands[0], SImode)
     || reg_or_0_operand (operands[1], SImode))
    && !TARGET_SOFT_FLOAT
@@ -2241,10 +2241,12 @@
    {mfctl|mfctl,w} %%sar,%0
    fcpy,sgl %f1,%0
    fldw%F1 %1,%0
-   fstw%F0 %1,%0"
-  [(set_attr "type" "load,move,move,move,shift,load,store,move,move,fpalu,fpload,fpstore")
+   fstw%F0 %1,%0
+   fstw %1,-40(%%sp)\n\tldw -40(%%sp),%0
+   stw %1,-40(%%sp)\n\tfldw -40(%%sp),%0"
+  [(set_attr "type" "load,move,move,move,shift,load,store,move,move,fpalu,fpload,fpstore,fpstore_load,store_fpload")
    (set_attr "pa_combine_type" "addmove")
-   (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4")])
+   (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,8,8")])
 
 (define_insn ""
   [(set (match_operand:SI 0 "move_dest_operand"
@@ -2280,6 +2282,58 @@
    (set_attr "pa_combine_type" "addmove")
    (set_attr "length" "4")])
 
+; Rewrite RTL using a REG+D store.  This will allow the insn that
+; computes the address to be deleted if the register it sets is dead.
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "const_int_operand" "")))
+   (set (mem:SI (match_dup 0))
+	(match_operand:SI 3 "register_operand" ""))]
+  "!TARGET_64BIT
+   && !INT14_OK_STRICT
+   && GENERAL_REGNO_P (REGNO (operands[0]))
+   && GENERAL_REGNO_P (REGNO (operands[3]))
+   && REGNO (operands[0]) != REGNO (operands[3])
+   && base14_operand (operands[2], E_SImode)"
+  [(set (mem:SI (plus:SI (match_dup 1) (match_dup 2))) (match_dup 3))
+   (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))]
+  "")
+
+; Rewrite RTL using a REG+D load.  This will allow the insn that
+; computes the address to be deleted if the register it sets is dead.
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "const_int_operand" "")))
+   (set (match_operand:SI 3 "register_operand" "")
+	(mem:SI (match_dup 0)))]
+  "!TARGET_64BIT
+   && !INT14_OK_STRICT
+   && GENERAL_REGNO_P (REGNO (operands[0]))
+   && GENERAL_REGNO_P (REGNO (operands[3]))
+   && REGNO (operands[0]) != REGNO (operands[3])
+   && REGNO (operands[1]) != REGNO (operands[3])
+   && base14_operand (operands[2], E_SImode)"
+  [(set (match_dup 3) (mem:SI (plus:SI (match_dup 1) (match_dup 2))))
+   (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "const_int_operand" "")))
+   (set (match_operand:SI 3 "register_operand" "")
+	(mem:SI (match_dup 0)))]
+  "!TARGET_64BIT
+   && !INT14_OK_STRICT
+   && GENERAL_REGNO_P (REGNO (operands[0]))
+   && GENERAL_REGNO_P (REGNO (operands[3]))
+   && REGNO (operands[0]) == REGNO (operands[3])
+   && base14_operand (operands[2], E_SImode)"
+  [(set (match_dup 3) (mem:SI (plus:SI (match_dup 1) (match_dup 2))))]
+  "")
+
 ; Rewrite RTL using an indexed store.  This will allow the insn that
 ; computes the address to be deleted if the register it sets is dead.
 (define_peephole2
@@ -3866,7 +3920,7 @@
 (define_insn ""
   [(set (match_operand:DF 0 "move_dest_operand"
 			  "=f,*r,T,?o,?Q,f,*r,*r,?*r,?f")
-	(match_operand:DF 1 "reg_or_0_or_nonsymb_mem_operand"
+	(match_operand:DF 1 "reg_or_0_or_mem_operand"
 			  "fG,*rG,f,*r,*r,RT,o,RQ,f,*r"))]
   "(register_operand (operands[0], DFmode)
     || reg_or_0_operand (operands[1], DFmode))
@@ -4040,7 +4094,7 @@
 (define_insn ""
   [(set (match_operand:DF 0 "move_dest_operand"
 			  "=r,?o,?Q,r,r")
-	(match_operand:DF 1 "reg_or_0_or_nonsymb_mem_operand"
+	(match_operand:DF 1 "reg_or_0_or_mem_operand"
 			  "rG,r,r,o,RQ"))]
   "(register_operand (operands[0], DFmode)
     || reg_or_0_operand (operands[1], DFmode))
@@ -4055,9 +4109,9 @@
 
 (define_insn ""
   [(set (match_operand:DF 0 "move_dest_operand"
-			  "=!*r,*r,*r,*r,*r,Q,f,f,T")
+			  "=!*r,*r,*r,*r,*r,Q,f,f,T,?*r,?f")
 	(match_operand:DF 1 "move_src_operand"
-			  "!*rG,J,N,K,RQ,*rG,fG,RT,f"))]
+			  "!*rG,J,N,K,RQ,*rG,fG,RT,f,f,*r"))]
   "(register_operand (operands[0], DFmode)
     || reg_or_0_operand (operands[1], DFmode))
    && !TARGET_SOFT_FLOAT && TARGET_64BIT"
@@ -4070,10 +4124,12 @@
    std%M0 %r1,%0
    fcpy,dbl %f1,%0
    fldd%F1 %1,%0
-   fstd%F0 %1,%0"
-  [(set_attr "type" "move,move,move,shift,load,store,fpalu,fpload,fpstore")
+   fstd%F0 %1,%0
+   fstd %1,-40(%%sp)\n\tldd -40(%%sp),%0
+   std %1,-40(%%sp)\n\tfldd -40(%%sp),%0"
+  [(set_attr "type" "move,move,move,shift,load,store,fpalu,fpload,fpstore,fpstore_load,store_fpload")
    (set_attr "pa_combine_type" "addmove")
-   (set_attr "length" "4,4,4,4,4,4,4,4,4")])
+   (set_attr "length" "4,4,4,4,4,4,4,4,4,8,8")])
 
 (define_insn ""
   [(set (match_operand:DF 0 "move_dest_operand"
@@ -4229,9 +4285,9 @@
 
 (define_insn ""
   [(set (match_operand:DI 0 "move_dest_operand"
-			  "=r,r,r,r,r,r,Q,!*q,!r,!*f,*f,T")
+			  "=r,r,r,r,r,r,Q,!*q,!r,!*f,*f,T,?r,?*f")
 	(match_operand:DI 1 "move_src_operand"
-			  "A,r,J,N,K,RQ,rM,!rM,!*q,!*fM,RT,*f"))]
+			  "A,r,J,N,K,RQ,rM,!rM,!*q,!*fM,RT,*f,*f,r"))]
   "(register_operand (operands[0], DImode)
     || reg_or_0_operand (operands[1], DImode))
    && !TARGET_SOFT_FLOAT && TARGET_64BIT"
@@ -4247,10 +4303,12 @@
    {mfctl|mfctl,w} %%sar,%0
    fcpy,dbl %f1,%0
    fldd%F1 %1,%0
-   fstd%F0 %1,%0"
-  [(set_attr "type" "load,move,move,move,shift,load,store,move,move,fpalu,fpload,fpstore")
+   fstd%F0 %1,%0
+   fstd %1,-40(%%sp)\n\tldd -40(%%sp),%0
+   std %1,-40(%%sp)\n\tfldd -40(%%sp),%0"
+  [(set_attr "type" "load,move,move,move,shift,load,store,move,move,fpalu,fpload,fpstore,fpstore_load,store_fpload")
    (set_attr "pa_combine_type" "addmove")
-   (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4")])
+   (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,8,8")])
 
 (define_insn ""
   [(set (match_operand:DI 0 "move_dest_operand"
@@ -4440,7 +4498,7 @@
 (define_insn ""
   [(set (match_operand:SF 0 "move_dest_operand"
 			  "=f,!*r,f,*r,T,Q,?*r,?f")
-	(match_operand:SF 1 "reg_or_0_or_nonsymb_mem_operand"
+	(match_operand:SF 1 "reg_or_0_or_mem_operand"
 			  "fG,!*rG,RT,RQ,f,*rG,f,*r"))]
   "(register_operand (operands[0], SFmode)
     || reg_or_0_operand (operands[1], SFmode))
@@ -4461,9 +4519,9 @@
 
 (define_insn ""
   [(set (match_operand:SF 0 "move_dest_operand"
-			  "=f,!*r,f,*r,T,Q")
-	(match_operand:SF 1 "reg_or_0_or_nonsymb_mem_operand"
-			  "fG,!*rG,RT,RQ,f,*rG"))]
+			  "=f,!*r,f,*r,T,Q,?*r,?f")
+	(match_operand:SF 1 "reg_or_0_or_mem_operand"
+			  "fG,!*rG,RT,RQ,f,*rG,f,*r"))]
   "(register_operand (operands[0], SFmode)
     || reg_or_0_operand (operands[1], SFmode))
    && !TARGET_SOFT_FLOAT
@@ -4474,15 +4532,17 @@
    fldw%F1 %1,%0
    ldw%M1 %1,%0
    fstw%F0 %1,%0
-   stw%M0 %r1,%0"
-  [(set_attr "type" "fpalu,move,fpload,load,fpstore,store")
+   stw%M0 %r1,%0
+   fstw %1,-40(%%sp)\n\tldw -40(%%sp),%0
+   stw %1,-40(%%sp)\n\tfldw -40(%%sp),%0"
+  [(set_attr "type" "fpalu,move,fpload,load,fpstore,store,fpstore_load,store_fpload")
    (set_attr "pa_combine_type" "addmove")
-   (set_attr "length" "4,4,4,4,4,4")])
+   (set_attr "length" "4,4,4,4,4,4,8,8")])
 
 (define_insn ""
   [(set (match_operand:SF 0 "move_dest_operand"
 			  "=!*r,*r,Q")
-	(match_operand:SF 1 "reg_or_0_or_nonsymb_mem_operand"
+	(match_operand:SF 1 "reg_or_0_or_mem_operand"
 			  "!*rG,RQ,*rG"))]
   "(register_operand (operands[0], SFmode)
     || reg_or_0_operand (operands[1], SFmode))
@@ -4509,6 +4569,54 @@
 
 (define_peephole2
   [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "const_int_operand" "")))
+   (set (mem:SF (match_dup 0))
+	(match_operand:SF 3 "register_operand" ""))]
+  "!TARGET_64BIT
+   && !INT14_OK_STRICT
+   && GENERAL_REGNO_P (REGNO (operands[0]))
+   && GENERAL_REGNO_P (REGNO (operands[3]))
+   && REGNO (operands[0]) != REGNO (operands[3])
+   && base14_operand (operands[2], E_SImode)"
+  [(set (mem:SF (plus:SI (match_dup 1) (match_dup 2))) (match_dup 3))
+   (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "const_int_operand" "")))
+   (set (match_operand:SF 3 "register_operand" "")
+	(mem:SF (match_dup 0)))]
+  "!TARGET_64BIT
+   && !INT14_OK_STRICT
+   && GENERAL_REGNO_P (REGNO (operands[0]))
+   && GENERAL_REGNO_P (REGNO (operands[3]))
+   && REGNO (operands[0]) != REGNO (operands[3])
+   && REGNO (operands[1]) != REGNO (operands[3])
+   && base14_operand (operands[2], E_SImode)"
+  [(set (match_dup 3) (mem:SF (plus:SI (match_dup 1) (match_dup 2))))
+   (set (match_dup 0) (plus:SI (match_dup 1) (match_dup 2)))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
+	(plus:SI (match_operand:SI 1 "register_operand" "")
+		 (match_operand:SI 2 "const_int_operand" "")))
+   (set (match_operand:SF 3 "register_operand" "")
+	(mem:SF (match_dup 0)))]
+  "!TARGET_64BIT
+   && !INT14_OK_STRICT
+   && GENERAL_REGNO_P (REGNO (operands[0]))
+   && GENERAL_REGNO_P (REGNO (operands[3]))
+   && REGNO (operands[0]) == REGNO (operands[3])
+   && base14_operand (operands[2], E_SImode)"
+  [(set (match_dup 3) (mem:SF (plus:SI (match_dup 1) (match_dup 2))))]
+  "")
+
+(define_peephole2
+  [(set (match_operand:SI 0 "register_operand" "")
 	(plus:SI (ashift:SI (match_operand:SI 1 "register_operand" "")
 			    (const_int 2))
 		 (match_operand:SI 2 "register_operand" "")))
@@ -4615,7 +4723,7 @@
 (define_insn ""
   [(set (match_operand:SF 0 "move_dest_operand"
 			  "=r,r,Q")
-	(match_operand:SF 1 "reg_or_0_or_nonsymb_mem_operand"
+	(match_operand:SF 1 "reg_or_0_or_mem_operand"
 			  "rG,RQ,rG"))]
   "(register_operand (operands[0], SFmode)
     || reg_or_0_operand (operands[1], SFmode))
@@ -4918,7 +5026,7 @@
 
 (define_insn "fix_truncsfsi2"
   [(set (match_operand:SI 0 "register_operand" "=f")
-	(fix:SI (fix:SF (match_operand:SF 1 "register_operand" "f"))))]
+	(fix:SI (match_operand:SF 1 "register_operand" "f")))]
   "! TARGET_SOFT_FLOAT"
   "{fcnvfxt,sgl,sgl|fcnv,t,sgl,w} %1,%0"
   [(set_attr "type" "fpalu")
@@ -4926,7 +5034,7 @@
 
 (define_insn "fix_truncdfsi2"
   [(set (match_operand:SI 0 "register_operand" "=f")
-	(fix:SI (fix:DF (match_operand:DF 1 "register_operand" "f"))))]
+	(fix:SI (match_operand:DF 1 "register_operand" "f")))]
   "! TARGET_SOFT_FLOAT"
   "{fcnvfxt,dbl,sgl|fcnv,t,dbl,w} %1,%0"
   [(set_attr "type" "fpalu")
@@ -4934,7 +5042,7 @@
 
 (define_insn "fix_truncsfdi2"
   [(set (match_operand:DI 0 "register_operand" "=f")
-	(fix:DI (fix:SF (match_operand:SF 1 "register_operand" "f"))))]
+	(fix:DI (match_operand:SF 1 "register_operand" "f")))]
   "TARGET_PA_11 && ! TARGET_SOFT_FLOAT"
   "{fcnvfxt,sgl,dbl|fcnv,t,sgl,dw} %1,%0"
   [(set_attr "type" "fpalu")
@@ -4942,7 +5050,7 @@
 
 (define_insn "fix_truncdfdi2"
   [(set (match_operand:DI 0 "register_operand" "=f")
-	(fix:DI (fix:DF (match_operand:DF 1 "register_operand" "f"))))]
+	(fix:DI (match_operand:DF 1 "register_operand" "f")))]
   "TARGET_PA_11 && ! TARGET_SOFT_FLOAT"
   "{fcnvfxt,dbl,dbl|fcnv,t,dbl,dw} %1,%0"
   [(set_attr "type" "fpalu")
@@ -4982,7 +5090,7 @@
 
 (define_insn "fixuns_truncsfsi2"
   [(set (match_operand:SI 0 "register_operand" "=f")
-	(unsigned_fix:SI (fix:SF (match_operand:SF 1 "register_operand" "f"))))]
+	(unsigned_fix:SI (match_operand:SF 1 "register_operand" "f")))]
   "! TARGET_SOFT_FLOAT && TARGET_PA_20"
   "fcnv,t,sgl,uw %1,%0"
   [(set_attr "type" "fpalu")
@@ -4990,7 +5098,7 @@
 
 (define_insn "fixuns_truncdfsi2"
   [(set (match_operand:SI 0 "register_operand" "=f")
-	(unsigned_fix:SI (fix:DF (match_operand:DF 1 "register_operand" "f"))))]
+	(unsigned_fix:SI (match_operand:DF 1 "register_operand" "f")))]
   "! TARGET_SOFT_FLOAT && TARGET_PA_20"
   "fcnv,t,dbl,uw %1,%0"
   [(set_attr "type" "fpalu")
@@ -4998,7 +5106,7 @@
 
 (define_insn "fixuns_truncsfdi2"
   [(set (match_operand:DI 0 "register_operand" "=f")
-	(unsigned_fix:DI (fix:SF (match_operand:SF 1 "register_operand" "f"))))]
+	(unsigned_fix:DI (match_operand:SF 1 "register_operand" "f")))]
   "! TARGET_SOFT_FLOAT && TARGET_PA_20"
   "fcnv,t,sgl,udw %1,%0"
   [(set_attr "type" "fpalu")
@@ -5006,7 +5114,7 @@
 
 (define_insn "fixuns_truncdfdi2"
   [(set (match_operand:DI 0 "register_operand" "=f")
-	(unsigned_fix:DI (fix:DF (match_operand:DF 1 "register_operand" "f"))))]
+	(unsigned_fix:DI (match_operand:DF 1 "register_operand" "f")))]
   "! TARGET_SOFT_FLOAT && TARGET_PA_20"
   "fcnv,t,dbl,udw %1,%0"
   [(set_attr "type" "fpalu")
@@ -5373,18 +5481,20 @@
 
 (define_insn "addti3"
   [(set (match_operand:TI 0 "register_operand" "=r")
-	(plus:TI (match_operand:TI 1 "register_operand" "r")
-		 (match_operand:TI 2 "register_operand" "r")))]
+	(plus:TI (match_operand:TI 1 "register_operand" "%r")
+		 (match_operand:TI 2 "arith11_operand" "rI")))]
   "TARGET_64BIT"
   "*
 {
-  operands[3] = gen_lowpart (DImode, operands[0]);
-  operands[4] = gen_lowpart (DImode, operands[1]);
-  operands[5] = gen_lowpart (DImode, operands[2]);
-  operands[0] = gen_highpart (DImode, operands[0]);
-  operands[1] = gen_highpart (DImode, operands[1]);
-  operands[2] = gen_highpart (DImode, operands[2]);
-  return \"add %4,%5,%3\;add,dc %1,%2,%0\";
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      if (INTVAL (operands[2]) >= 0)
+	return \"addi %2,%R1,%R0\;add,dc %1,%%r0,%0\";
+      else
+	return \"addi %2,%R1,%R0\;sub,db %1,%%r0,%0\";
+    }
+  else
+    return \"add %R2,%R1,%R0\;add,dc %2,%1,%0\";
 }"
   [(set_attr "type" "multi")
    (set_attr "length" "8")])
@@ -5392,7 +5502,7 @@
 (define_insn "addvti3"
   [(set (match_operand:TI 0 "register_operand" "=r")
 	(plus:TI (match_operand:TI 1 "register_operand" "r")
-		 (match_operand:TI 2 "register_operand" "r")))
+		 (match_operand:TI 2 "arith11_operand" "rI")))
    (trap_if (ne (plus:OI (sign_extend:OI (match_dup 1))
 			 (sign_extend:OI (match_dup 2)))
 		(sign_extend:OI (plus:TI (match_dup 1)
@@ -5401,39 +5511,49 @@
   "TARGET_64BIT"
   "*
 {
-  operands[3] = gen_lowpart (DImode, operands[0]);
-  operands[4] = gen_lowpart (DImode, operands[1]);
-  operands[5] = gen_lowpart (DImode, operands[2]);
-  operands[0] = gen_highpart (DImode, operands[0]);
-  operands[1] = gen_highpart (DImode, operands[1]);
-  operands[2] = gen_highpart (DImode, operands[2]);
-  return \"add %4,%5,%3\;add,dc,tsv %1,%2,%0\";
+  if (GET_CODE (operands[2]) == CONST_INT)
+    {
+      if (INTVAL (operands[2]) >= 0)
+	return \"addi %2,%R1,%R0\;add,dc,tsv %1,%%r0,%0\";
+      else
+	return \"addi %2,%R1,%R0\;sub,db,tsv %1,%%r0,%0\";
+    }
+  else
+    return \"add %R2,%R1,%R0\;add,dc,tsv %2,%1,%0\";
 }"
   [(set_attr "type" "multi")
    (set_attr "length" "8")])
 
 (define_insn "subti3"
-  [(set (match_operand:TI 0 "register_operand" "=r")
-	(minus:TI (match_operand:TI 1 "register_operand" "r")
-		  (match_operand:TI 2 "register_operand" "r")))]
+  [(set (match_operand:TI 0 "register_operand" "=r,&r")
+	(minus:TI (match_operand:TI 1 "arith11_operand" "r,I")
+		  (match_operand:TI 2 "reg_or_0_operand" "rM,rM")))]
   "TARGET_64BIT"
   "*
 {
-  operands[3] = gen_lowpart (DImode, operands[0]);
-  operands[4] = gen_lowpart (DImode, operands[1]);
-  operands[5] = gen_lowpart (DImode, operands[2]);
-  operands[0] = gen_highpart (DImode, operands[0]);
-  operands[1] = gen_highpart (DImode, operands[1]);
-  operands[2] = gen_highpart (DImode, operands[2]);
-  return \"sub %4,%5,%3\;sub,db %1,%2,%0\";
+  if (GET_CODE (operands[1]) == CONST_INT)
+    {
+      if (INTVAL (operands[1]) >= 0)
+	return \"subi %1,%R2,%R0\;sub,db %%r0,%2,%0\";
+      else
+	return \"ldi -1,%0\;subi %1,%R2,%R0\;sub,db %0,%2,%0\";
+    }
+  else
+    return \"sub %R1,%R2,%R0\;sub,db %1,%2,%0\";
 }"
   [(set_attr "type" "multi")
-   (set_attr "length" "8")])
+   (set (attr "length")
+	(if_then_else (eq_attr "alternative" "0")
+	  (const_int 8)
+	  (if_then_else (ge (symbol_ref "INTVAL (operands[1])")
+			    (const_int 0))
+	    (const_int 8)
+	    (const_int 12))))])
 
 (define_insn "subvti3"
-  [(set (match_operand:TI 0 "register_operand" "=r")
-	(minus:TI (match_operand:TI 1 "register_operand" "r")
-		  (match_operand:TI 2 "register_operand" "r")))
+  [(set (match_operand:TI 0 "register_operand" "=r,&r")
+	(minus:TI (match_operand:TI 1 "arith11_operand" "r,I")
+		  (match_operand:TI 2 "reg_or_0_operand" "rM,rM")))
    (trap_if (ne (minus:OI (sign_extend:OI (match_dup 1))
 			  (sign_extend:OI (match_dup 2)))
 		(sign_extend:OI (minus:TI (match_dup 1)
@@ -5442,16 +5562,24 @@
   "TARGET_64BIT"
   "*
 {
-  operands[3] = gen_lowpart (DImode, operands[0]);
-  operands[4] = gen_lowpart (DImode, operands[1]);
-  operands[5] = gen_lowpart (DImode, operands[2]);
-  operands[0] = gen_highpart (DImode, operands[0]);
-  operands[1] = gen_highpart (DImode, operands[1]);
-  operands[2] = gen_highpart (DImode, operands[2]);
-  return \"sub %4,%5,%3\;sub,db,tsv %1,%2,%0\";
+  if (GET_CODE (operands[1]) == CONST_INT)
+    {
+      if (INTVAL (operands[1]) >= 0)
+	return \"subi %1,%R2,%R0\;sub,db,tsv %%r0,%2,%0\";
+      else
+	return \"ldi -1,%0\;subi %1,%R2,%R0\;sub,db,tsv %0,%2,%0\";
+    }
+  else
+    return \"sub %R1,%R2,%R0\;sub,db,tsv %1,%2,%0\";
 }"
-  [(set_attr "type" "multi")
-   (set_attr "length" "8")])
+  [(set_attr "type" "multi,multi")
+   (set (attr "length")
+	(if_then_else (eq_attr "alternative" "0")
+	  (const_int 8)
+	  (if_then_else (ge (symbol_ref "INTVAL (operands[1])")
+			    (const_int 0))
+	    (const_int 8)
+	    (const_int 12))))])
 
 ;; Trap instructions.
 
@@ -5504,8 +5632,10 @@
    (set_attr "length" "4")])
 
 (define_insn ""
-  [(set (reg:SI 29) (mult:SI (reg:SI 26) (reg:SI 25)))
-   (clobber (match_operand:SI 0 "register_operand" "=a"))
+  [(set (reg:SI 29)
+	(mult:SI (match_operand:SI 1 "r26_operand" "")
+		 (match_operand:SI 0 "r25_operand" "")))
+   (clobber (match_operand:SI 2 "register_operand" "=a"))
    (clobber (reg:SI 26))
    (clobber (reg:SI 25))
    (clobber (reg:SI 31))]
@@ -5517,8 +5647,10 @@
 	      (symbol_ref "pa_attr_length_millicode_call (insn)")))])
 
 (define_insn ""
-  [(set (reg:SI 29) (mult:SI (reg:SI 26) (reg:SI 25)))
-   (clobber (match_operand:SI 0 "register_operand" "=a"))
+  [(set (reg:SI 29)
+	(mult:SI (match_operand:SI 1 "r26_operand" "")
+		 (match_operand:SI 0 "r25_operand" "")))
+   (clobber (match_operand:SI 2 "register_operand" "=a"))
    (clobber (reg:SI 26))
    (clobber (reg:SI 25))
    (clobber (reg:SI 2))]
@@ -5610,35 +5742,24 @@
   [(set (reg:SI 26) (match_operand:SI 1 "move_src_operand" ""))
    (set (reg:SI 25) (match_operand:SI 2 "move_src_operand" ""))
    (parallel [(set (reg:SI 29) (div:SI (reg:SI 26) (reg:SI 25)))
-	      (clobber (match_dup 3))
-	      (clobber (match_dup 4))
+	      (clobber (reg:SI 1))
 	      (clobber (reg:SI 26))
 	      (clobber (reg:SI 25))
-	      (clobber (match_dup 5))])
+	      (clobber (match_dup 3))])
    (set (match_operand:SI 0 "move_dest_operand" "") (reg:SI 29))]
   ""
   "
 {
-  operands[3] = gen_reg_rtx (SImode);
-  if (TARGET_64BIT)
-    {
-      operands[5] = gen_rtx_REG (SImode, 2);
-      operands[4] = operands[5];
-    }
-  else
-    {
-      operands[5] = gen_rtx_REG (SImode, 31);
-      operands[4] = gen_reg_rtx (SImode);
-    }
-  if (GET_CODE (operands[2]) == CONST_INT && pa_emit_hpdiv_const (operands, 0))
+  operands[3] = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
+  if (pa_emit_hpdiv_const (operands, 0))
     DONE;
 }")
 
 (define_insn ""
   [(set (reg:SI 29)
-	(div:SI (reg:SI 26) (match_operand:SI 0 "div_operand" "")))
-   (clobber (match_operand:SI 1 "register_operand" "=a"))
-   (clobber (match_operand:SI 2 "register_operand" "=&r"))
+	(div:SI (match_operand:SI 1 "r26_operand" "")
+		(match_operand:SI 0 "div_operand" "")))
+   (clobber (match_operand:SI 2 "register_operand" "=a"))
    (clobber (reg:SI 26))
    (clobber (reg:SI 25))
    (clobber (reg:SI 31))]
@@ -5652,9 +5773,9 @@
 
 (define_insn ""
   [(set (reg:SI 29)
-	(div:SI (reg:SI 26) (match_operand:SI 0 "div_operand" "")))
-   (clobber (match_operand:SI 1 "register_operand" "=a"))
-   (clobber (match_operand:SI 2 "register_operand" "=&r"))
+	(div:SI (match_operand:SI 1 "r26_operand" "")
+		(match_operand:SI 0 "div_operand" "")))
+   (clobber (match_operand:SI 2 "register_operand" "=a"))
    (clobber (reg:SI 26))
    (clobber (reg:SI 25))
    (clobber (reg:SI 2))]
@@ -5670,36 +5791,24 @@
   [(set (reg:SI 26) (match_operand:SI 1 "move_src_operand" ""))
    (set (reg:SI 25) (match_operand:SI 2 "move_src_operand" ""))
    (parallel [(set (reg:SI 29) (udiv:SI (reg:SI 26) (reg:SI 25)))
-	      (clobber (match_dup 3))
-	      (clobber (match_dup 4))
+	      (clobber (reg:SI 1))
 	      (clobber (reg:SI 26))
 	      (clobber (reg:SI 25))
-	      (clobber (match_dup 5))])
+	      (clobber (match_dup 3))])
    (set (match_operand:SI 0 "move_dest_operand" "") (reg:SI 29))]
   ""
   "
 {
-  operands[3] = gen_reg_rtx (SImode);
-
-  if (TARGET_64BIT)
-    {
-      operands[5] = gen_rtx_REG (SImode, 2);
-      operands[4] = operands[5];
-    }
-  else
-    {
-      operands[5] = gen_rtx_REG (SImode, 31);
-      operands[4] = gen_reg_rtx (SImode);
-    }
-  if (GET_CODE (operands[2]) == CONST_INT && pa_emit_hpdiv_const (operands, 1))
+  operands[3] = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
+  if (pa_emit_hpdiv_const (operands, 1))
     DONE;
 }")
 
 (define_insn ""
   [(set (reg:SI 29)
-	(udiv:SI (reg:SI 26) (match_operand:SI 0 "div_operand" "")))
-   (clobber (match_operand:SI 1 "register_operand" "=a"))
-   (clobber (match_operand:SI 2 "register_operand" "=&r"))
+	(udiv:SI (match_operand:SI 1 "r26_operand" "")
+		 (match_operand:SI 0 "div_operand" "")))
+   (clobber (match_operand:SI 2 "register_operand" "=a"))
    (clobber (reg:SI 26))
    (clobber (reg:SI 25))
    (clobber (reg:SI 31))]
@@ -5713,9 +5822,9 @@
 
 (define_insn ""
   [(set (reg:SI 29)
-	(udiv:SI (reg:SI 26) (match_operand:SI 0 "div_operand" "")))
-   (clobber (match_operand:SI 1 "register_operand" "=a"))
-   (clobber (match_operand:SI 2 "register_operand" "=&r"))
+	(udiv:SI (match_operand:SI 1 "r26_operand" "")
+		 (match_operand:SI 0 "div_operand" "")))
+   (clobber (match_operand:SI 2 "register_operand" "=a"))
    (clobber (reg:SI 26))
    (clobber (reg:SI 25))
    (clobber (reg:SI 2))]
@@ -5731,32 +5840,22 @@
   [(set (reg:SI 26) (match_operand:SI 1 "move_src_operand" ""))
    (set (reg:SI 25) (match_operand:SI 2 "move_src_operand" ""))
    (parallel [(set (reg:SI 29) (mod:SI (reg:SI 26) (reg:SI 25)))
-	      (clobber (match_dup 3))
-	      (clobber (match_dup 4))
+	      (clobber (reg:SI 1))
 	      (clobber (reg:SI 26))
 	      (clobber (reg:SI 25))
-	      (clobber (match_dup 5))])
+	      (clobber (match_dup 3))])
    (set (match_operand:SI 0 "move_dest_operand" "") (reg:SI 29))]
   ""
   "
 {
-  if (TARGET_64BIT)
-    {
-      operands[5] = gen_rtx_REG (SImode, 2);
-      operands[4] = operands[5];
-    }
-  else
-    {
-      operands[5] = gen_rtx_REG (SImode, 31);
-      operands[4] = gen_reg_rtx (SImode);
-    }
-  operands[3] = gen_reg_rtx (SImode);
+  operands[3] = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
 }")
 
 (define_insn ""
-  [(set (reg:SI 29) (mod:SI (reg:SI 26) (reg:SI 25)))
-   (clobber (match_operand:SI 0 "register_operand" "=a"))
-   (clobber (match_operand:SI 1 "register_operand" "=&r"))
+  [(set (reg:SI 29)
+	(mod:SI (match_operand:SI 1 "r26_operand" "")
+		(match_operand:SI 0 "r25_operand" "")))
+   (clobber (match_operand:SI 2 "register_operand" "=a"))
    (clobber (reg:SI 26))
    (clobber (reg:SI 25))
    (clobber (reg:SI 31))]
@@ -5769,9 +5868,10 @@
 	      (symbol_ref "pa_attr_length_millicode_call (insn)")))])
 
 (define_insn ""
-  [(set (reg:SI 29) (mod:SI (reg:SI 26) (reg:SI 25)))
-   (clobber (match_operand:SI 0 "register_operand" "=a"))
-   (clobber (match_operand:SI 1 "register_operand" "=&r"))
+  [(set (reg:SI 29)
+	(mod:SI (match_operand:SI 1 "r26_operand" "")
+		(match_operand:SI 0 "r25_operand" "")))
+   (clobber (match_operand:SI 2 "register_operand" "=a"))
    (clobber (reg:SI 26))
    (clobber (reg:SI 25))
    (clobber (reg:SI 2))]
@@ -5787,32 +5887,22 @@
   [(set (reg:SI 26) (match_operand:SI 1 "move_src_operand" ""))
    (set (reg:SI 25) (match_operand:SI 2 "move_src_operand" ""))
    (parallel [(set (reg:SI 29) (umod:SI (reg:SI 26) (reg:SI 25)))
-	      (clobber (match_dup 3))
-	      (clobber (match_dup 4))
+	      (clobber (reg:SI 1))
 	      (clobber (reg:SI 26))
 	      (clobber (reg:SI 25))
-	      (clobber (match_dup 5))])
+	      (clobber (match_dup 3))])
    (set (match_operand:SI 0 "move_dest_operand" "") (reg:SI 29))]
   ""
   "
 {
-  if (TARGET_64BIT)
-    {
-      operands[5] = gen_rtx_REG (SImode, 2);
-      operands[4] = operands[5];
-    }
-  else
-    {
-      operands[5] = gen_rtx_REG (SImode, 31);
-      operands[4] = gen_reg_rtx (SImode);
-    }
-  operands[3] = gen_reg_rtx (SImode);
+  operands[3] = gen_rtx_REG (SImode, TARGET_64BIT ? 2 : 31);
 }")
 
 (define_insn ""
-  [(set (reg:SI 29) (umod:SI (reg:SI 26) (reg:SI 25)))
-   (clobber (match_operand:SI 0 "register_operand" "=a"))
-   (clobber (match_operand:SI 1 "register_operand" "=&r"))
+  [(set (reg:SI 29)
+	(umod:SI (match_operand:SI 1 "r26_operand" "")
+		 (match_operand:SI 0 "r25_operand" "")))
+   (clobber (match_operand:SI 2 "register_operand" "=a"))
    (clobber (reg:SI 26))
    (clobber (reg:SI 25))
    (clobber (reg:SI 31))]
@@ -5825,9 +5915,10 @@
 	      (symbol_ref "pa_attr_length_millicode_call (insn)")))])
 
 (define_insn ""
-  [(set (reg:SI 29) (umod:SI (reg:SI 26) (reg:SI 25)))
-   (clobber (match_operand:SI 0 "register_operand" "=a"))
-   (clobber (match_operand:SI 1 "register_operand" "=&r"))
+  [(set (reg:SI 29)
+	(umod:SI (match_operand:SI 1 "r26_operand" "")
+		 (match_operand:SI 0 "r25_operand" "")))
+   (clobber (match_operand:SI 2 "register_operand" "=a"))
    (clobber (reg:SI 26))
    (clobber (reg:SI 25))
    (clobber (reg:SI 2))]
@@ -5990,14 +6081,7 @@
   [(set (match_operand:TI 0 "register_operand" "=r")
 	(neg:TI (match_operand:TI 1 "register_operand" "r")))]
   "TARGET_64BIT"
-  "*
-{
-  operands[2] = gen_lowpart (DImode, operands[0]);
-  operands[3] = gen_lowpart (DImode, operands[1]);
-  operands[0] = gen_highpart (DImode, operands[0]);
-  operands[1] = gen_highpart (DImode, operands[1]);
-  return \"sub %%r0,%3,%2\;sub,db %%r0,%1,%0\";
-}"
+  "sub %%r0,%R1,%R0\;sub,db %%r0,%1,%0"
   [(set_attr "type" "multi")
    (set_attr "length" "8")])
 
@@ -6039,14 +6123,7 @@
 		(sign_extend:OI (neg:TI (match_dup 1))))
 	    (const_int 0))]
   "TARGET_64BIT"
-  "*
-{
-  operands[2] = gen_lowpart (DImode, operands[0]);
-  operands[3] = gen_lowpart (DImode, operands[1]);
-  operands[0] = gen_highpart (DImode, operands[0]);
-  operands[1] = gen_highpart (DImode, operands[1]);
-  return \"sub %%r0,%3,%2\;sub,db,tsv %%r0,%1,%0\";
-}"
+  "sub %%r0,%R1,%R0\;sub,db,tsv %%r0,%1,%0"
   [(set_attr "type" "multi")
    (set_attr "length" "8")])
 
@@ -7311,7 +7388,6 @@
   /* Ensure the frame pointer move is not optimized.  */
   emit_insn (gen_blockage ());
   emit_clobber (hard_frame_pointer_rtx);
-  emit_clobber (frame_pointer_rtx);
   emit_move_insn (hard_frame_pointer_rtx, fp);
 
   emit_use (hard_frame_pointer_rtx);
@@ -7326,7 +7402,7 @@
 })
 
 (define_insn "indirect_goto"
-  [(unspec [(match_operand 0 "register_operand" "=r")] UNSPEC_GOTO)]
+  [(unspec [(match_operand 0 "register_operand" "r")] UNSPEC_GOTO)]
   "GET_MODE (operands[0]) == word_mode"
   "bv%* %%r0(%0)"
   [(set_attr "type" "branch")
@@ -9102,7 +9178,6 @@ add,l %2,%3,%3\;bv,n %%r0(%3)"
   /* Ensure the frame pointer move is not optimized.  */
   emit_insn (gen_blockage ());
   emit_clobber (hard_frame_pointer_rtx);
-  emit_clobber (frame_pointer_rtx);
   emit_move_insn (hard_frame_pointer_rtx, fp);
 
   emit_use (hard_frame_pointer_rtx);
@@ -9461,7 +9536,7 @@ add,l %2,%3,%3\;bv,n %%r0(%3)"
 	  (pc)))
    (set (match_dup 0)
 	(plus:SI (match_dup 0) (match_dup 1)))
-   (clobber (match_scratch:SI 4 "=X,r,r"))]
+   (clobber (match_scratch:SI 4 "=X,r,&r"))]
   ""
   "* return pa_output_dbra (operands, insn, which_alternative); "
 ;; Do not expect to understand this the first time through.
@@ -10242,7 +10317,7 @@ add,l %2,%3,%3\;bv,n %%r0(%3)"
       "ldd 0(%0),%%r0"
     }
   };
-  int read_or_write = INTVAL (operands[1]) == 0 ? 0 : 1;
+  int read_or_write = INTVAL (operands[1]) & 1;
   int locality = INTVAL (operands[2]) == 0 ? 0 : 1;
 
   return instr [locality][read_or_write];
diff --git a/gcc/config/pa/pa.opt b/gcc/config/pa/pa.opt
index 6863f91..fae4247 100644
--- a/gcc/config/pa/pa.opt
+++ b/gcc/config/pa/pa.opt
@@ -1,6 +1,6 @@
 ; Options for the HP PA-RISC port of the compiler.
 
-; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
@@ -86,6 +86,10 @@ mlong-calls
 Target Mask(LONG_CALLS)
 Always generate long calls.
 
+mlra
+Target Var(pa_lra_p) Init(0)
+Use LRA instead of reload (transitional).
+
 mlong-load-store
 Target Mask(LONG_LOAD_STORE)
 Emit long load/store sequences.
diff --git a/gcc/config/pa/pa.opt.urls b/gcc/config/pa/pa.opt.urls
index 5b8bceb..5516332 100644
--- a/gcc/config/pa/pa.opt.urls
+++ b/gcc/config/pa/pa.opt.urls
@@ -36,6 +36,8 @@ UrlSuffix(gcc/HPPA-Options.html#index-mlinker-opt)
 mlong-calls
 UrlSuffix(gcc/HPPA-Options.html#index-mlong-calls-5)
 
+; skipping UrlSuffix for 'mlra' due to finding no URLs
+
 mlong-load-store
 UrlSuffix(gcc/HPPA-Options.html#index-mlong-load-store)
 
diff --git a/gcc/config/pa/pa32-linux.h b/gcc/config/pa/pa32-linux.h
index 187ae62..522931c 100644
--- a/gcc/config/pa/pa32-linux.h
+++ b/gcc/config/pa/pa32-linux.h
@@ -1,5 +1,5 @@
 /* Definitions for PA_RISC with ELF-32 format
-   Copyright (C) 2000-2024 Free Software Foundation, Inc.
+   Copyright (C) 2000-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/pa/pa32-netbsd.h b/gcc/config/pa/pa32-netbsd.h
index 7274a61..26c103a 100644
--- a/gcc/config/pa/pa32-netbsd.h
+++ b/gcc/config/pa/pa32-netbsd.h
@@ -1,5 +1,5 @@
 /* Definitions for PA_RISC with ELF-32 format
-   Copyright (C) 2000-2024 Free Software Foundation, Inc.
+   Copyright (C) 2000-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/pa/pa32-openbsd.h b/gcc/config/pa/pa32-openbsd.h
index 5f0ef7d..162b753 100644
--- a/gcc/config/pa/pa32-openbsd.h
+++ b/gcc/config/pa/pa32-openbsd.h
@@ -1,5 +1,5 @@
 /* Definitions for PA_RISC with ELF-32 format
-   Copyright (C) 2000-2024 Free Software Foundation, Inc.
+   Copyright (C) 2000-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/pa/pa32-regs.h b/gcc/config/pa/pa32-regs.h
index 6485ab2..b61d0e4 100644
--- a/gcc/config/pa/pa32-regs.h
+++ b/gcc/config/pa/pa32-regs.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2000-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2000-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -187,10 +187,9 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    that includes the incoming arguments and the return value.  We specify a
    set with no overlaps so that we don't have to specify that the destination
    register is an early clobber in patterns using this mode.  Except for the
-   return value, the starting registers are odd.  For 128 and 256 bit modes,
-   we similarly specify non-overlapping sets of cpu registers.  However,
-   there aren't any patterns defined for modes larger than 64 bits at the
-   moment.
+   return value, the starting registers are odd.  Except for complex modes,
+   we don't allow modes larger than 64 bits in the general registers as there
+   are issues with copies, spills and SUBREG support.
 
    We limit the modes allowed in the floating point registers to the
    set of modes used in the machine definition.  In addition, we allow
@@ -217,15 +216,13 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
      ? (VALID_FP_MODE_P (MODE)						\
 	&& (GET_MODE_SIZE (MODE) <= 4					\
 	    || (GET_MODE_SIZE (MODE) == 8 && ((REGNO) & 1) == 0)	\
-	    || (GET_MODE_SIZE (MODE) == 16 && ((REGNO) & 3) == 0)	\
-	    || (GET_MODE_SIZE (MODE) == 32 && ((REGNO) & 7) == 0)))	\
+	    || (GET_MODE_SIZE (MODE) == 16 && ((REGNO) & 3) == 0)))	\
    : (GET_MODE_SIZE (MODE) <= UNITS_PER_WORD				\
       || (GET_MODE_SIZE (MODE) == 2 * UNITS_PER_WORD			\
 	  && ((((REGNO) & 1) == 1 && (REGNO) <= 25) || (REGNO) == 28))	\
       || (GET_MODE_SIZE (MODE) == 4 * UNITS_PER_WORD			\
-	  && ((REGNO) & 3) == 3 && (REGNO) <= 23)			\
-      || (GET_MODE_SIZE (MODE) == 8 * UNITS_PER_WORD			\
-	  && ((REGNO) & 7) == 3 && (REGNO) <= 19)))
+	  && COMPLEX_MODE_P (MODE)					\
+	  && ((REGNO) & 3) == 3 && (REGNO) <= 23)))
 
 /* How to renumber registers for gdb.
 
@@ -318,7 +315,7 @@ enum reg_class { NO_REGS, R1_REGS, GENERAL_REGS, FPUPPER_REGS, FP_REGS,
 /* 1 if N is a possible register number for function argument passing.  */
 
 #define FUNCTION_ARG_REGNO_P(N) \
-  (((N) >= 23 && (N) <= 26) || (! TARGET_SOFT_FLOAT && (N) >= 32 && (N) <= 39)) 
+  (((N) >= 23 && (N) <= 26) || (! TARGET_SOFT_FLOAT && (N) >= 32 && (N) <= 39))
 
 /* How to refer to registers in assembler output.
    This sequence is indexed by compiler's hard-register-number (see above).  */
@@ -344,7 +341,7 @@ enum reg_class { NO_REGS, R1_REGS, GENERAL_REGS, FPUPPER_REGS, FP_REGS,
  {"%fr16L",56}, {"%fr17L",58}, {"%fr18L",60}, {"%fr19L",62},		\
  {"%fr20L",64}, {"%fr21L",66}, {"%fr22L",68}, {"%fr23L",70},		\
  {"%fr24L",72}, {"%fr25L",74}, {"%fr26L",76}, {"%fr27L",78},		\
- {"%fr28L",80}, {"%fr29L",82}, {"%fr30L",84}, {"%fr31R",86},		\
+ {"%fr28L",80}, {"%fr29L",82}, {"%fr30L",84}, {"%fr31L",86},		\
  {"%cr11",88}}
 
 #define FP_SAVED_REG_LAST 66
diff --git a/gcc/config/pa/pa64-hpux.h b/gcc/config/pa/pa64-hpux.h
index 21eda22..b5f36cf 100644
--- a/gcc/config/pa/pa64-hpux.h
+++ b/gcc/config/pa/pa64-hpux.h
@@ -1,6 +1,6 @@
 /* Definitions of target machine for GNU compiler, for HPs running
    HPUX using the 64bit runtime model.
-   Copyright (C) 1999-2024 Free Software Foundation, Inc.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/pa/pa64-hpux.opt b/gcc/config/pa/pa64-hpux.opt
index 4b914a9..f9433ff 100644
--- a/gcc/config/pa/pa64-hpux.opt
+++ b/gcc/config/pa/pa64-hpux.opt
@@ -1,6 +1,6 @@
 ; Options for the HP PA-RISC port of the compiler.
 
-; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/pa/pa64-linux.h b/gcc/config/pa/pa64-linux.h
index 2317a65..9b9c1f4 100644
--- a/gcc/config/pa/pa64-linux.h
+++ b/gcc/config/pa/pa64-linux.h
@@ -1,5 +1,5 @@
 /* Definitions for PA_RISC with ELF format on 64-bit Linux
-   Copyright (C) 1999-2024 Free Software Foundation, Inc.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/pa/pa64-regs.h b/gcc/config/pa/pa64-regs.h
index 3b9273c..5e4f72d2 100644
--- a/gcc/config/pa/pa64-regs.h
+++ b/gcc/config/pa/pa64-regs.h
@@ -1,5 +1,5 @@
 /* Configuration for GCC-compiler for PA-RISC.
-   Copyright (C) 1999-2024 Free Software Foundation, Inc.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -157,13 +157,10 @@ along with GCC; see the file COPYING3.  If not see
    : FP_REGNO_P (REGNO)							\
      ? (VALID_FP_MODE_P (MODE)						\
 	&& (GET_MODE_SIZE (MODE) <= 8					\
-	    || (GET_MODE_SIZE (MODE) == 16 && ((REGNO) & 1) == 0)	\
-	    || (GET_MODE_SIZE (MODE) == 32 && ((REGNO) & 3) == 0)))	\
+	    || (GET_MODE_SIZE (MODE) == 16 && ((REGNO) & 1) == 0)))	\
    : (GET_MODE_SIZE (MODE) <= UNITS_PER_WORD				\
       || (GET_MODE_SIZE (MODE) == 2 * UNITS_PER_WORD			\
-	  && ((((REGNO) & 1) == 1 && (REGNO) <= 25) || (REGNO) == 28))	\
-      || (GET_MODE_SIZE (MODE) == 4 * UNITS_PER_WORD			\
-	  && ((REGNO) & 3) == 3 && (REGNO) <= 23)))
+	  && ((((REGNO) & 1) == 1 && (REGNO) <= 25) || (REGNO) == 28))))
 
 /* How to renumber registers for gdb.
 
diff --git a/gcc/config/pa/predicates.md b/gcc/config/pa/predicates.md
index 50dffa1..b0f8274 100644
--- a/gcc/config/pa/predicates.md
+++ b/gcc/config/pa/predicates.md
@@ -1,5 +1,5 @@
 ;; Predicate definitions for HP PA-RISC.
-;; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
@@ -191,6 +191,22 @@
 	  && CONSTANT_P (op) && ! TARGET_PORTABLE_RUNTIME);
 })
 
+;; True iff OP is a SImode r25 register operand.
+
+(define_predicate "r25_operand"
+  (match_code "reg")
+{
+  return mode == SImode && REG_P (op) && REGNO (op) == 25;
+})
+
+;; True iff OP is a SImode r26 register operand.
+
+(define_predicate "r26_operand"
+  (match_code "reg")
+{
+  return mode == SImode && REG_P (op) && REGNO (op) == 26;
+})
+
 ;; True iff OP can be used as the divisor in a div millicode call.
 
 (define_predicate "div_operand"
@@ -285,7 +301,7 @@
       return false;
 
     default:
-      return (INTVAL (op) % GET_MODE_SIZE (mode)) == 0;
+      return (INTVAL (op) & (GET_MODE_SIZE (mode) - 1)) == 0;
     }
 
   return false;
@@ -300,7 +316,7 @@
 (define_predicate "integer_store_memory_operand"
   (match_code "reg,mem")
 {
-  if (reload_in_progress
+  if ((lra_in_progress || reload_in_progress)
       && REG_P (op)
       && REGNO (op) >= FIRST_PSEUDO_REGISTER
       && reg_renumber [REGNO (op)] < 0)
@@ -312,7 +328,7 @@
 	 REG+D instructions in pa_emit_move_sequence.  Further, the Q
 	 constraint is used in more than simple move instructions.  So,
 	 we must return true and let reload handle the reload.  */
-      if (reload_in_progress)
+      if (lra_in_progress || reload_in_progress)
 	return true;
 
       /* Extract CONST_INT operand.  */
@@ -326,7 +342,8 @@
   if (!MEM_P (op))
     return false;
 
-  return ((reload_in_progress || memory_address_p (mode, XEXP (op, 0)))
+  return ((lra_in_progress || reload_in_progress
+	   || memory_address_p (mode, XEXP (op, 0)))
 	  && !IS_LO_SUM_DLT_ADDR_P (XEXP (op, 0))
 	  && !IS_INDEX_ADDR_P (XEXP (op, 0)));
 })
@@ -335,17 +352,18 @@
 ;; floating point store.  This also implies the operand could be used as
 ;; the source operand of a floating point load.  LO_SUM DLT and indexed
 ;; memory operands are not allowed.  Symbolic operands are accepted for
-;; PA 2.0 when TARGET_ELF32 is not true.  We accept reloading pseudos
-;; and other memory; operands.
+;; PA 2.0.  We accept reloading pseudos and other memory operands.
 
-;; FIXME: The GNU ELF32 linker clobbers the LSB of the FP register number
-;; in PA 2.0 {fldw,fstw} insns with long displacements.  This is because
-;; R_PARISC_DPREL14WR and other relocations like it are not supported.
+;; NOTE: The GNU ELF32 linker clobbered the least significant bit of
+;; the target floating-point register in PA 2.0 floating-point loads
+;; and stores with long displacements in ld versions prior to 2.42.
+;; The global pointer also was not double-word aligned.  This broke
+;; various DPREL relocations.
 
 (define_predicate "floating_point_store_memory_operand"
   (match_code "reg,mem")
 {
-  if (reload_in_progress
+  if ((lra_in_progress || reload_in_progress)
       && REG_P (op)
       && REGNO (op) >= FIRST_PSEUDO_REGISTER
       && reg_renumber [REGNO (op)] < 0)
@@ -365,9 +383,9 @@
   if (!MEM_P (op))
     return false;
 
-  return ((reload_in_progress || memory_address_p (mode, XEXP (op, 0)))
-	  && !((TARGET_ELF32 || !TARGET_PA_20)
-	       && symbolic_memory_operand (op, VOIDmode))
+  return ((lra_in_progress || reload_in_progress
+	   || memory_address_p (mode, XEXP (op, 0)))
+	  && (INT14_OK_STRICT || !symbolic_memory_operand (op, VOIDmode))
 	  && !IS_LO_SUM_DLT_ADDR_P (XEXP (op, 0))
 	  && !IS_INDEX_ADDR_P (XEXP (op, 0)));
 })
@@ -467,9 +485,9 @@
   return memory_address_p (mode, XEXP (op, 0));
 })
 
-;; True iff OP is not a symbolic memory operand. 
+;; True iff OP is a valid memory operand. 
 
-(define_predicate "nonsymb_mem_operand"
+(define_predicate "mem_operand"
   (match_code "subreg,mem")
 {
   if (GET_CODE (op) == SUBREG)
@@ -488,8 +506,7 @@
       && REG_P (XEXP (XEXP (op, 0), 1)))
     return false;
 
-  return (!symbolic_memory_operand (op, mode)
-	  && memory_address_p (mode, XEXP (op, 0)));
+  return (memory_address_p (mode, XEXP (op, 0)));
 })
 
 ;; True iff OP is anything other than a hard register.
@@ -556,7 +573,7 @@
   if (register_operand (op, mode))
     return true;
 
-  if (!reload_in_progress && !reload_completed)
+  if (!lra_in_progress && !reload_in_progress && !reload_completed)
     return false;
 
   if (! MEM_P (op))
@@ -576,11 +593,11 @@
   (ior (match_operand 0 "register_operand")
        (match_operand 0 "const_0_operand")))
 
-;; True iff OP is either a register, zero, or a non-symbolic memory operand.
+;; True iff OP is either a register, zero, or a memory operand.
 
-(define_predicate "reg_or_0_or_nonsymb_mem_operand"
+(define_predicate "reg_or_0_or_mem_operand"
   (ior (match_operand 0 "reg_or_0_operand")
-       (match_operand 0 "nonsymb_mem_operand")))
+       (match_operand 0 "mem_operand")))
 
 ;; Accept REG and any CONST_INT that can be moved in one instruction
 ;; into a general register.
diff --git a/gcc/config/pa/som.h b/gcc/config/pa/som.h
index 1039a6a..3392614 100644
--- a/gcc/config/pa/som.h
+++ b/gcc/config/pa/som.h
@@ -1,5 +1,5 @@
 /* Definitions for SOM assembler support.
-   Copyright (C) 1999-2024 Free Software Foundation, Inc.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -25,7 +25,7 @@ along with GCC; see the file COPYING3.  If not see
    linked executables and shared libraries.  */
 #define LDD_SUFFIX "chatr"
 /* Look for lines like "dynamic   /usr/lib/X11R5/libX11.sl"
-   or "static    /usr/lib/X11R5/libX11.sl". 
+   or "static    /usr/lib/X11R5/libX11.sl".
 
    HPUX 10.20 also has lines like "static branch prediction ..."
    so we filter that out explicitly.
@@ -357,7 +357,7 @@ do {						\
 #define GTHREAD_USE_WEAK 0
 
 /* Shared library suffix.  Collect2 strips the version string after
-   this suffix when generating constructor/destructor names.  */ 
+   this suffix when generating constructor/destructor names.  */
 #define SHLIB_SUFFIX ".sl"
 
 /* We don't have named sections.  */
diff --git a/gcc/config/pdp11/constraints.md b/gcc/config/pdp11/constraints.md
index 1b82cff..49b82f5 100644
--- a/gcc/config/pdp11/constraints.md
+++ b/gcc/config/pdp11/constraints.md
@@ -1,5 +1,5 @@
 ;;- Constraint definitions for the pdp11 for GNU C compiler
-;; Copyright (C) 2010-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2010-2025 Free Software Foundation, Inc.
 ;; Contributed by Michael K. Gschwind (mike@vlsivie.tuwien.ac.at).
 
 ;; This file is part of GCC.
diff --git a/gcc/config/pdp11/pdp11-modes.def b/gcc/config/pdp11/pdp11-modes.def
index a178047..704dfe7 100644
--- a/gcc/config/pdp11/pdp11-modes.def
+++ b/gcc/config/pdp11/pdp11-modes.def
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler, for the pdp-11
-   Copyright (C) 2002-2024 Free Software Foundation, Inc.
+   Copyright (C) 2002-2025 Free Software Foundation, Inc.
    Contributed by Michael K. Gschwind (mike@vlsivie.tuwien.ac.at).
 
 This file is part of GCC.
diff --git a/gcc/config/pdp11/pdp11-protos.h b/gcc/config/pdp11/pdp11-protos.h
index 64ac7eb..4167b4e 100644
--- a/gcc/config/pdp11/pdp11-protos.h
+++ b/gcc/config/pdp11/pdp11-protos.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler, for the pdp-11
-   Copyright (C) 2000-2024 Free Software Foundation, Inc.
+   Copyright (C) 2000-2025 Free Software Foundation, Inc.
    Contributed by Michael K. Gschwind (mike@vlsivie.tuwien.ac.at).
 
 This file is part of GCC.
diff --git a/gcc/config/pdp11/pdp11.cc b/gcc/config/pdp11/pdp11.cc
index 084af21..f57c901 100644
--- a/gcc/config/pdp11/pdp11.cc
+++ b/gcc/config/pdp11/pdp11.cc
@@ -1,5 +1,5 @@
 /* Subroutines for gcc2 for pdp11.
-   Copyright (C) 1994-2024 Free Software Foundation, Inc.
+   Copyright (C) 1994-2025 Free Software Foundation, Inc.
    Contributed by Michael K. Gschwind (mike@vlsivie.tuwien.ac.at).
 
 This file is part of GCC.
@@ -50,7 +50,7 @@ along with GCC; see the file COPYING3.  If not see
 /* This file should be included last.  */
 #include "target-def.h"
 
-/* this is the current value returned by the macro FIRST_PARM_OFFSET 
+/* this is the current value returned by the macro FIRST_PARM_OFFSET
    defined in tm.h */
 int current_first_parm_offset;
 
@@ -220,7 +220,7 @@ static bool pdp11_scalar_mode_supported_p (scalar_mode);
 #undef  TARGET_SECONDARY_RELOAD
 #define TARGET_SECONDARY_RELOAD pdp11_secondary_reload
 
-#undef  TARGET_REGISTER_MOVE_COST 
+#undef  TARGET_REGISTER_MOVE_COST
 #define TARGET_REGISTER_MOVE_COST pdp11_register_move_cost
 
 #undef  TARGET_PREFERRED_RELOAD_CLASS
@@ -327,7 +327,7 @@ pdp11_saved_regno (unsigned regno)
    alloca storage if any.  */
 void
 pdp11_expand_prologue (void)
-{							       
+{
   HOST_WIDE_INT fsize = get_frame_size ();
   unsigned regno;
   rtx x, via_ac = NULL;
@@ -339,7 +339,7 @@ pdp11_expand_prologue (void)
       emit_insn (gen_setd ());
       emit_insn (gen_seti ());
     }
-    
+
   /* Save CPU registers.  */
   for (regno = R0_REGNUM; regno <= PC_REGNUM; regno++)
     if (pdp11_saved_regno (regno))
@@ -350,7 +350,7 @@ pdp11_expand_prologue (void)
       }
 
   /* Save FPU registers.  */
-  for (regno = AC0_REGNUM; regno <= AC3_REGNUM; regno++) 
+  for (regno = AC0_REGNUM; regno <= AC3_REGNUM; regno++)
     if (pdp11_saved_regno (regno))
       {
 	x = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
@@ -388,7 +388,7 @@ pdp11_expand_prologue (void)
 
 void
 pdp11_expand_epilogue (void)
-{								
+{
   HOST_WIDE_INT fsize = get_frame_size ();
   unsigned regno;
   rtx x, reg, via_ac = NULL;
@@ -476,13 +476,13 @@ pdp11_expand_operands (rtx *operands, rtx exops[][2],
   bool sameoff = false;
   enum { REGOP, OFFSOP, MEMOP, PUSHOP, POPOP, CNSTOP, RNDOP } optype;
   long sval[2];
-  
+
   /* If either piece order is accepted and one is pre-decrement
      while the other is post-increment, set order to be high order
      word first.  That will force the pre-decrement to be turned
      into a pointer adjust, then offset addressing.
      Otherwise, if either operand uses pre-decrement, that means
-     the order is low order first. 
+     the order is low order first.
      Otherwise, if both operands are registers and destination is
      higher than source and they overlap, do low order word (highest
      register number) first.  */
@@ -512,7 +512,7 @@ pdp11_expand_operands (rtx *operands, rtx exops[][2],
 	 the push increases the offset to each source word.
 	 In theory there are other cases like this, for example dest == pop,
 	 but those don't occur in real life so ignore those.  */
-      if (GET_CODE (operands[0]) ==  MEM 
+      if (GET_CODE (operands[0]) ==  MEM
 	  && GET_CODE (XEXP (operands[0], 0)) == PRE_DEC
 	  && REGNO (XEXP (XEXP (operands[0], 0), 0)) == STACK_POINTER_REGNUM
 	  && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
@@ -529,7 +529,7 @@ pdp11_expand_operands (rtx *operands, rtx exops[][2],
   else
     gcc_assert (useorder == either || useorder == order);
 
-  
+
   for (op = 0; op < opcount; op++)
     {
       /* First classify the operand.  */
@@ -553,10 +553,10 @@ pdp11_expand_operands (rtx *operands, rtx exops[][2],
 	 supposed to allow to happen. Return failure for such cases.  */
       if (optype == RNDOP)
 	return false;
-      
+
       if (action != NULL)
 	action[op] = no_action;
-      
+
       /* If the operand uses pre-decrement addressing but we
 	 want to get the parts high order first,
 	 decrement the former register explicitly
@@ -569,7 +569,7 @@ pdp11_expand_operands (rtx *operands, rtx exops[][2],
 				      XEXP (XEXP (operands[op], 0), 0));
 	  optype = OFFSOP;
 	}
-      /* If the operand uses post-increment mode but we want 
+      /* If the operand uses post-increment mode but we want
 	 to get the parts low order first, change the operand
 	 into ordinary indexing and remember to increment
 	 the register explicitly when we're done.  */
@@ -588,7 +588,7 @@ pdp11_expand_operands (rtx *operands, rtx exops[][2],
 	  REAL_VALUE_TO_TARGET_DOUBLE
 	    (*CONST_DOUBLE_REAL_VALUE (operands[op]), sval);
 	}
-      
+
       for (i = 0; i < words; i++)
 	{
 	  if (order == big)
@@ -633,18 +633,18 @@ output_move_multiple (rtx *operands)
   rtx inops[2];
   rtx exops[4][2];
   rtx adjops[2];
-  
+
   pdp11_action action[2];
   int i, words;
-  
+
   words = GET_MODE_BITSIZE (GET_MODE (operands[0])) / 16;
   adjops[1] = gen_rtx_CONST_INT (HImode, words * 2);
 
   inops[0] = operands[0];
   inops[1] = operands[1];
-  
+
   pdp11_expand_operands (inops, exops, 2, words, action, either);
-  
+
   /* Check for explicit decrement before.  */
   if (action[0] == dec_before)
     {
@@ -686,7 +686,7 @@ pdp11_gen_int_label (char *label, const char *prefix, int num)
   else
     sprintf (label, "*%s_%u", prefix, num);
 }
-  
+
 /* Output an ascii string.  */
 void
 output_ascii (FILE *file, const char *p, int size)
@@ -694,7 +694,7 @@ output_ascii (FILE *file, const char *p, int size)
   int i, c;
   const char *pseudo = "\t.ascii\t";
   bool delim = false;
-  
+
   if (TARGET_DEC_ASM)
     {
       if (p[size - 1] == '\0')
@@ -769,7 +769,7 @@ pdp11_asm_output_var (FILE *file, const char *name, int size,
       assemble_name (file, name);
       fputs (":", file);
       ASM_OUTPUT_SKIP (file, size);
-    }  
+    }
 }
 
 /* Special format operators handled here:
@@ -782,7 +782,7 @@ static void
 pdp11_asm_print_operand (FILE *file, rtx x, int code)
 {
   long sval[2];
- 
+
   if (code == '#')
     {
       if (TARGET_DEC_ASM)
@@ -955,7 +955,7 @@ pdp11_lra_p (void)
 
 /* Register to register moves are cheap if both are general
    registers.  */
-static int 
+static int
 pdp11_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
 			  reg_class_t c1, reg_class_t c2)
 {
@@ -979,7 +979,7 @@ pdp11_rtx_costs (rtx x, machine_mode mode, int outer_code,
   const int asize = (mode == QImode) ? 2 : GET_MODE_SIZE (mode);
   rtx src, dest;
   const char *fmt;
-  
+
   switch (code)
     {
     case CONST_INT:
@@ -1026,7 +1026,7 @@ pdp11_rtx_costs (rtx x, machine_mode mode, int outer_code,
   if (GET_RTX_LENGTH (code) > 1)
     src = XEXP (x, 1);
   dest = XEXP (x, 0);
-      
+
   /* If optimizing for size, claim everything costs 2 per word, plus
      whatever the operands require.  */
   if (!speed)
@@ -1070,7 +1070,7 @@ pdp11_rtx_costs (rtx x, machine_mode mode, int outer_code,
 	    case DIV:
 	      *total = 10 * asize * asize;
 	      break;
-	  
+
 	    case MOD:
 	      /* Fake value because it's accounted for under DIV, since we
 		 use a divmod pattern.  */
@@ -1085,14 +1085,14 @@ pdp11_rtx_costs (rtx x, machine_mode mode, int outer_code,
 		 case of a one bit shift.  */
 	      *total = asize;
 	      break;
-	  
+
 	    default:
 	      *total = asize;
 	      break;
 	    }
 	}
     }
-  
+
   /* Now see if we're looking at a SET.  If yes, then look at the
      source to see if this is a move or an arithmetic operation, and
      continue accordingly to handle the operands.  */
@@ -1142,7 +1142,7 @@ pdp11_addr_cost (rtx addr, machine_mode mode, addr_space_t as ATTRIBUTE_UNUSED,
 		 bool speed)
 {
   int cost = 0;
-  
+
   if (GET_CODE (addr) != REG)
     {
       if (!simple_memory_operand (addr, mode))
@@ -1184,7 +1184,7 @@ pdp11_insn_cost (rtx_insn *insn, bool speed)
      the actual operation plus a clobber, or the implicit compare plus
      the actual operation.  Find the actual operation.  */
   pat = PATTERN (insn);
-  
+
   if (GET_CODE (pat) == PARALLEL)
     {
       set = XVECEXP (pat, 0, 0);
@@ -1199,7 +1199,7 @@ pdp11_insn_cost (rtx_insn *insn, bool speed)
       if (GET_CODE (set) != SET)
 	return 0;
     }
-  
+
   /* Pick up the SET source and destination RTL.  */
   dest = XEXP (set, 0);
   src = XEXP (set, 1);
@@ -1242,7 +1242,7 @@ pdp11_insn_cost (rtx_insn *insn, bool speed)
       src2 = XEXP (src, 1);
       base_cost += pdp11_addr_cost (src2, mode, ADDR_SPACE_GENERIC, speed);
     }
-  
+
   return base_cost;
 }
 
@@ -1354,7 +1354,7 @@ simple_memory_operand(rtx op, machine_mode mode ATTRIBUTE_UNUSED)
   /* Decode the address now.  */
 
  indirection:
-    
+
   addr = XEXP (op, 0);
 
   switch (GET_CODE (addr))
@@ -1362,27 +1362,27 @@ simple_memory_operand(rtx op, machine_mode mode ATTRIBUTE_UNUSED)
     case REG:
       /* (R0) - no extra cost */
       return 1;
-	
+
     case PRE_DEC:
     case POST_INC:
     case PRE_MODIFY:
     case POST_MODIFY:
       /* -(R0), (R0)+ - cheap! */
       return 1;
-	
+
     case MEM:
-      /* cheap - is encoded in addressing mode info! 
+      /* cheap - is encoded in addressing mode info!
 
 	 -- except for @(R0), which has to be @0(R0) !!! */
 
       if (GET_CODE (XEXP (addr, 0)) == REG)
 	return 0;
-	
+
       op=addr;
       goto indirection;
-	
+
     case CONST_INT:
-    case LABEL_REF:	       
+    case LABEL_REF:
     case CONST:
     case SYMBOL_REF:
       /* @#address - extra cost */
@@ -1395,7 +1395,7 @@ simple_memory_operand(rtx op, machine_mode mode ATTRIBUTE_UNUSED)
     default:
       break;
     }
-    
+
   return FALSE;
 }
 
@@ -1412,7 +1412,7 @@ no_side_effect_operand(rtx op, machine_mode mode ATTRIBUTE_UNUSED)
   /* Decode the address now.  */
 
  indirection:
-    
+
   addr = XEXP (op, 0);
 
   switch (GET_CODE (addr))
@@ -1420,26 +1420,26 @@ no_side_effect_operand(rtx op, machine_mode mode ATTRIBUTE_UNUSED)
     case REG:
       /* (R0) - no extra cost */
       return 1;
-	
+
     case PRE_DEC:
     case POST_INC:
     case PRE_MODIFY:
     case POST_MODIFY:
       return 0;
-	
+
     case MEM:
-      /* cheap - is encoded in addressing mode info! 
+      /* cheap - is encoded in addressing mode info!
 
 	 -- except for @(R0), which has to be @0(R0) !!! */
 
       if (GET_CODE (XEXP (addr, 0)) == REG)
 	return 0;
-	
+
       op=addr;
       goto indirection;
-	
+
     case CONST_INT:
-    case LABEL_REF:	       
+    case LABEL_REF:
     case CONST:
     case SYMBOL_REF:
       /* @#address - extra cost */
@@ -1452,7 +1452,7 @@ no_side_effect_operand(rtx op, machine_mode mode ATTRIBUTE_UNUSED)
     default:
       break;
     }
-    
+
   return FALSE;
 }
 
@@ -1514,7 +1514,7 @@ pdp11_can_change_mode_class (machine_mode from,
      So we disallow all mode changes involving FPRs.  */
   if (FLOAT_MODE_P (from) != FLOAT_MODE_P (to))
     return false;
-  
+
   return !reg_classes_intersect_p (FPU_REGS, rclass);
 }
 
@@ -1530,7 +1530,7 @@ pdp11_guard_type (void)
    Given an rtx X being reloaded into a reg required to be
    in class CLASS, return the class of reg to actually use.
    In general this is just CLASS; but on some machines
-   in some cases it is preferable to use a more restrictive class.  
+   in some cases it is preferable to use a more restrictive class.
 
 loading is easier into LOAD_FPU_REGS than FPU_REGS! */
 
@@ -1554,7 +1554,7 @@ pdp11_preferred_reload_class (rtx x, reg_class_t rclass)
    Given an rtx X being reloaded into a reg required to be
    in class CLASS, return the class of reg to actually use.
    In general this is just CLASS; but on some machines
-   in some cases it is preferable to use a more restrictive class.  
+   in some cases it is preferable to use a more restrictive class.
 
 loading is easier into LOAD_FPU_REGS than FPU_REGS! */
 
@@ -1576,10 +1576,10 @@ pdp11_preferred_output_reload_class (rtx x, reg_class_t rclass)
 
 /* TARGET_SECONDARY_RELOAD.
 
-   FPU registers AC4 and AC5 (class NO_LOAD_FPU_REGS) require an 
+   FPU registers AC4 and AC5 (class NO_LOAD_FPU_REGS) require an
    intermediate register (AC0-AC3: LOAD_FPU_REGS).  Everything else
    can be loaded/stored directly.  */
-static reg_class_t 
+static reg_class_t
 pdp11_secondary_reload (bool in_p ATTRIBUTE_UNUSED,
 			rtx x,
 			reg_class_t reload_class,
@@ -1589,7 +1589,7 @@ pdp11_secondary_reload (bool in_p ATTRIBUTE_UNUSED,
   if (reload_class != NO_LOAD_FPU_REGS || GET_CODE (x) != REG ||
       REGNO_REG_CLASS (REGNO (x)) == LOAD_FPU_REGS)
     return NO_REGS;
-  
+
   return LOAD_FPU_REGS;
 }
 
@@ -1600,11 +1600,11 @@ pdp11_secondary_reload (bool in_p ATTRIBUTE_UNUSED,
 static bool
 pdp11_secondary_memory_needed (machine_mode, reg_class_t c1, reg_class_t c2)
 {
-  int fromfloat = (c1 == LOAD_FPU_REGS || c1 == NO_LOAD_FPU_REGS || 
+  int fromfloat = (c1 == LOAD_FPU_REGS || c1 == NO_LOAD_FPU_REGS ||
 		   c1 == FPU_REGS);
-  int tofloat = (c2 == LOAD_FPU_REGS || c2 == NO_LOAD_FPU_REGS || 
+  int tofloat = (c2 == LOAD_FPU_REGS || c2 == NO_LOAD_FPU_REGS ||
 		 c2 == FPU_REGS);
-  
+
   return (fromfloat != tofloat);
 }
 
@@ -1624,13 +1624,13 @@ pdp11_legitimate_address_p (machine_mode mode, rtx operand, bool strict,
     /* accept @#address */
     if (CONSTANT_ADDRESS_P (operand))
       return true;
-    
+
     switch (GET_CODE (operand))
       {
       case REG:
 	/* accept (R0) */
 	return !strict || REGNO_OK_FOR_BASE_P (REGNO (operand));
-    
+
       case PLUS:
 	/* accept X(R0) */
 	return GET_CODE (XEXP (operand, 0)) == REG
@@ -1672,11 +1672,11 @@ pdp11_legitimate_address_p (machine_mode mode, rtx operand, bool strict,
 	xfoob = XEXP (operand, 0);
 
 	/* (MEM:xx (MEM:xx ())) is not valid for SI, DI and currently
-	   also forbidden for float, because we have to handle this 
+	   also forbidden for float, because we have to handle this
 	   in output_move_double and/or output_move_quad() - we could
-	   do it, but currently it's not worth it!!! 
-	   now that DFmode cannot go into CPU register file, 
-	   maybe I should allow float ... 
+	   do it, but currently it's not worth it!!!
+	   now that DFmode cannot go into CPU register file,
+	   maybe I should allow float ...
 	   but then I have to handle memory-to-memory moves in movdf ??  */
 	if (GET_MODE_BITSIZE(mode) > 16)
 	  return false;
@@ -1722,7 +1722,7 @@ pdp11_legitimate_address_p (machine_mode mode, rtx operand, bool strict,
    reg number REGNO.  */
 enum reg_class
 pdp11_regno_reg_class (int regno)
-{ 
+{
   if (regno == ARG_POINTER_REGNUM)
     return NOTSP_REG;
   else if (regno == CC_REGNUM || regno == FCC_REGNUM)
@@ -1759,9 +1759,9 @@ pdp11_reg_save_size (void)
   for (regno = AC0_REGNUM; regno <= AC5_REGNUM; regno++)
     if (pdp11_saved_regno (regno))
       offset += 8;
-  
+
   return offset;
-}   
+}
 
 /* Return the offset between two registers, one to be eliminated, and the other
    its replacement, at the start of a routine.  */
@@ -1791,7 +1791,7 @@ output_addr_const_pdp11 (FILE *file, rtx x)
 {
   char buf[256];
   int i;
-  
+
  restart:
   switch (GET_CODE (x))
     {
@@ -1891,7 +1891,7 @@ pdp11_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
    On the pdp11 the value is found in R0 (or ac0??? not without FPU!!!! )  */
 
 static rtx
-pdp11_function_value (const_tree valtype, 
+pdp11_function_value (const_tree valtype,
  		      const_tree fntype_or_decl ATTRIBUTE_UNUSED,
  		      bool outgoing ATTRIBUTE_UNUSED)
 {
@@ -1935,7 +1935,7 @@ pdp11_expand_shift (rtx *operands, rtx (*shift_sc) (rtx, rtx, rtx),
 {
   rtx r, test;
   rtx_code_label *lb;
-  
+
   if (CONST_INT_P (operands[2]) && pdp11_small_shift (INTVAL (operands[2])))
     emit_insn ((*shift_sc) (operands[0], operands[1], operands[2]));
   else if (TARGET_40_PLUS)
@@ -1988,7 +1988,7 @@ pdp11_assemble_shift (rtx *operands, machine_mode m, int code)
       inops[0] = operands[0];
       pdp11_expand_operands (inops, exops, 1, 2, action, either);
     }
-  
+
   if (!small)
     {
       /* Loop case, generate the top of loop label.  */
@@ -2154,8 +2154,8 @@ pdp11_md_asm_adjust (vec<rtx> & /*outputs*/, vec<rtx> & /*inputs*/,
 
 /* Worker function for TARGET_TRAMPOLINE_INIT.
 
-   trampoline - how should i do it in separate i+d ? 
-   have some allocate_trampoline magic??? 
+   trampoline - how should i do it in separate i+d ?
+   have some allocate_trampoline magic???
 
    the following should work for shared I/D:
 
@@ -2252,7 +2252,7 @@ static void pdp11_output_ident (const char *ident)
       if (!startswith (ident, "GCC:"))
 	fprintf (asm_out_file, "\t.ident\t\"%s\"\n", ident);
     }
-  
+
 }
 
 /* This emits a (user) label, which gets a "_" prefix except for DEC
@@ -2281,7 +2281,7 @@ pdp11_output_def (FILE *file, const char *label1, const char *label2)
       assemble_name (file, label1);
       putc (',', file);
       assemble_name (file, label2);
-    } 
+    }
   putc ('\n', file);
 }
 
@@ -2314,7 +2314,7 @@ pdp11_asm_named_section (const char *name, unsigned int flags,
 {
   const char *rwro = (flags & SECTION_WRITE) ? "rw" : "ro";
   const char *insdat = (flags & SECTION_CODE) ? "i" : "d";
-  
+
   gcc_assert (TARGET_DEC_ASM);
   fprintf (asm_out_file, "\t.psect\t%s,con,%s,%s\n", name, insdat, rwro);
 }
@@ -2333,12 +2333,12 @@ pdp11_asm_init_sections (void)
 					 ".bss");
     }
 }
-  
+
 static void
 pdp11_file_start (void)
 {
   default_file_start ();
-  
+
   if (TARGET_DEC_ASM)
     fprintf (asm_out_file, "\t.enabl\tlsb,reg\n\n");
 }
diff --git a/gcc/config/pdp11/pdp11.h b/gcc/config/pdp11/pdp11.h
index 6c8e045..eac1008 100644
--- a/gcc/config/pdp11/pdp11.h
+++ b/gcc/config/pdp11/pdp11.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler, for the pdp-11
-   Copyright (C) 1994-2024 Free Software Foundation, Inc.
+   Copyright (C) 1994-2025 Free Software Foundation, Inc.
    Contributed by Michael K. Gschwind (mike@vlsivie.tuwien.ac.at).
 
 This file is part of GCC.
@@ -69,7 +69,7 @@ along with GCC; see the file COPYING3.  If not see
 #define SHORT_TYPE_SIZE		16
 #define INT_TYPE_SIZE		(TARGET_INT16 ? 16 : 32)
 #define LONG_TYPE_SIZE		32
-#define LONG_LONG_TYPE_SIZE	64     
+#define LONG_LONG_TYPE_SIZE	64
 
 /* machine types from ansi */
 #define SIZE_TYPE "short unsigned int" 	/* definition of size_t */
@@ -93,7 +93,7 @@ along with GCC; see the file COPYING3.  If not see
 /* Define that floats are in VAX order, not high word first as for ints.  */
 #define FLOAT_WORDS_BIG_ENDIAN 0
 
-/* Width of a word, in units (bytes). 
+/* Width of a word, in units (bytes).
 
    UNITS OR BYTES - seems like units */
 #define UNITS_PER_WORD 2
@@ -107,7 +107,7 @@ extern const struct real_format pdp11_f_format;
 extern const struct real_format pdp11_d_format;
 
 /* Maximum sized of reasonable data type -- DImode ...*/
-#define MAX_FIXED_MODE_SIZE 64	
+#define MAX_FIXED_MODE_SIZE 64
 
 /* Allocation boundary (in *bits*) for storing pointers in memory.  */
 #define POINTER_BOUNDARY 16
@@ -144,7 +144,7 @@ extern const struct real_format pdp11_d_format;
    All registers that the compiler knows about must be given numbers,
    even those that are not normally considered general registers.
 
-   we have 8 integer registers, plus 6 float 
+   we have 8 integer registers, plus 6 float
    (don't use scratch float !) */
 
 /* 1 for registers that have pervasive standard uses
@@ -153,7 +153,7 @@ extern const struct real_format pdp11_d_format;
    On the pdp, these are:
    Reg 7	= pc;
    reg 6	= sp;
-   reg 5	= fp;  not necessarily! 
+   reg 5	= fp;  not necessarily!
 */
 
 #define FIXED_REGISTERS  \
@@ -203,7 +203,7 @@ extern const struct real_format pdp11_d_format;
 
    For any two classes, it is very desirable that there be another
    class that represents their union.  */
-   
+
 /* The pdp has a couple of classes:
 
 MUL_REGS are used for odd numbered regs, to use in 16-bit multiplication
@@ -211,7 +211,7 @@ MUL_REGS are used for odd numbered regs, to use in 16-bit multiplication
 GENERAL_REGS is all cpu
 LOAD_FPU_REGS is the first four cpu regs, they are easier to load
 NO_LOAD_FPU_REGS is ac4 and ac5, currently - difficult to load them
-FPU_REGS is all fpu regs 
+FPU_REGS is all fpu regs
 CC_REGS is the condition codes (CPU and FPU)
 */
 
@@ -292,7 +292,7 @@ enum reg_class
 /* Return TRUE if the class is a CPU register.  */
 #define CPU_REG_CLASS(CLASS) \
   (CLASS >= NOTR0_REG && CLASS <= GENERAL_REGS)
-  
+
 /* Return the maximum number of consecutive registers
    needed to represent mode MODE in a register of class CLASS.  */
 #define CLASS_MAX_NREGS(CLASS, MODE)	\
@@ -316,7 +316,7 @@ enum reg_class
 
 #define PUSH_ROUNDING(BYTES) pdp11_push_rounding (BYTES)
 
-/* current_first_parm_offset stores the # of registers pushed on the 
+/* current_first_parm_offset stores the # of registers pushed on the
    stack */
 extern int current_first_parm_offset;
 
@@ -328,7 +328,7 @@ extern int current_first_parm_offset;
    If the precise function being called is known, FUNC is its FUNCTION_DECL;
    otherwise, FUNC is 0.  */
 #define BASE_RETURN_VALUE_REG(MODE) \
- (FLOAT_MODE_P (MODE) ? AC0_REGNUM : RETVAL_REGNUM) 
+ (FLOAT_MODE_P (MODE) ? AC0_REGNUM : RETVAL_REGNUM)
 
 /* 1 if N is a possible register number for function argument passing.
    - not used on pdp */
@@ -462,7 +462,7 @@ extern int current_first_parm_offset;
 #define DEFAULT_SIGNED_CHAR 1
 
 /* Max number of bytes we can move from memory to memory
-   in one reasonably fast instruction.  
+   in one reasonably fast instruction.
 */
 #define MOVE_MAX 2
 
diff --git a/gcc/config/pdp11/pdp11.md b/gcc/config/pdp11/pdp11.md
index 585f098..88fa832 100644
--- a/gcc/config/pdp11/pdp11.md
+++ b/gcc/config/pdp11/pdp11.md
@@ -1,5 +1,5 @@
 ;;- Machine description for the pdp11 for GNU C compiler
-;; Copyright (C) 1994-2024 Free Software Foundation, Inc.
+;; Copyright (C) 1994-2025 Free Software Foundation, Inc.
 ;; Contributed by Michael K. Gschwind (mike@vlsivie.tuwien.ac.at).
 
 ;; This file is part of GCC.
diff --git a/gcc/config/pdp11/pdp11.opt b/gcc/config/pdp11/pdp11.opt
index b60abd1..39f1e63 100644
--- a/gcc/config/pdp11/pdp11.opt
+++ b/gcc/config/pdp11/pdp11.opt
@@ -1,6 +1,6 @@
 ; Options for the PDP11 port of the compiler.
 
-; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/pdp11/predicates.md b/gcc/config/pdp11/predicates.md
index b5bbdcd..062170d 100644
--- a/gcc/config/pdp11/predicates.md
+++ b/gcc/config/pdp11/predicates.md
@@ -1,5 +1,5 @@
 ;;- Predicate definitions for the pdp11 for GNU C compiler
-;; Copyright (C) 1994-2024 Free Software Foundation, Inc.
+;; Copyright (C) 1994-2025 Free Software Foundation, Inc.
 ;; Contributed by Michael K. Gschwind (mike@vlsivie.tuwien.ac.at).
 
 ;; This file is part of GCC.
diff --git a/gcc/config/pdp11/t-pdp11 b/gcc/config/pdp11/t-pdp11
index b843d58..c6a1cd3 100644
--- a/gcc/config/pdp11/t-pdp11
+++ b/gcc/config/pdp11/t-pdp11
@@ -1,4 +1,4 @@
-# Copyright (C) 1995-2024 Free Software Foundation, Inc.
+# Copyright (C) 1995-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/phoenix.h b/gcc/config/phoenix.h
index 9bad225..41a132c 100644
--- a/gcc/config/phoenix.h
+++ b/gcc/config/phoenix.h
@@ -1,5 +1,5 @@
 /* Base configuration file for all Phoenix-RTOS targets.
-   Copyright (C) 2016-2024 Free Software Foundation, Inc.
+   Copyright (C) 2016-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/print-sysroot-suffix.sh b/gcc/config/print-sysroot-suffix.sh
index 7887da3..81ae7c5 100644
--- a/gcc/config/print-sysroot-suffix.sh
+++ b/gcc/config/print-sysroot-suffix.sh
@@ -3,7 +3,7 @@
 # Arguments are MULTILIB_OSDIRNAMES, MULTILIB_OPTIONS, MULTILIB_MATCHES,
 # and MULTILIB_REUSE.
 
-# Copyright (C) 2009-2024 Free Software Foundation, Inc.
+# Copyright (C) 2009-2025 Free Software Foundation, Inc.
 
 # This file is part of GCC.
 
diff --git a/gcc/config/pru/alu-zext.md b/gcc/config/pru/alu-zext.md
index c88eaf0..2436c6c 100644
--- a/gcc/config/pru/alu-zext.md
+++ b/gcc/config/pru/alu-zext.md
@@ -1,6 +1,6 @@
 ;; ALU operations with zero extensions
 ;;
-;; Copyright (C) 2015-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2015-2025 Free Software Foundation, Inc.
 ;; Contributed by Dimitar Dimitrov <dimitar@dinux.eu>
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/pru/constraints.md b/gcc/config/pru/constraints.md
index 01d0599..ee786ce 100644
--- a/gcc/config/pru/constraints.md
+++ b/gcc/config/pru/constraints.md
@@ -1,5 +1,5 @@
 ;; Constraint definitions for TI PRU.
-;; Copyright (C) 2014-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2014-2025 Free Software Foundation, Inc.
 ;; Contributed by Dimitar Dimitrov <dimitar@dinux.eu>
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/pru/predicates.md b/gcc/config/pru/predicates.md
index 5563559..73cd40a 100644
--- a/gcc/config/pru/predicates.md
+++ b/gcc/config/pru/predicates.md
@@ -1,5 +1,5 @@
 ;; Predicate definitions for TI PRU.
-;; Copyright (C) 2014-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2014-2025 Free Software Foundation, Inc.
 ;; Contributed by Dimitar Dimitrov <dimitar@dinux.eu>
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/pru/pru-opts.h b/gcc/config/pru/pru-opts.h
index c9cae94..6b58154 100644
--- a/gcc/config/pru/pru-opts.h
+++ b/gcc/config/pru/pru-opts.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2017-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2017-2025 Free Software Foundation, Inc.
    Contributed by Dimitar Dimitrov <dimitar@dinux.eu>
 
    This file is part of GCC.
diff --git a/gcc/config/pru/pru-passes.cc b/gcc/config/pru/pru-passes.cc
index 5e7e22d..a763e1e 100644
--- a/gcc/config/pru/pru-passes.cc
+++ b/gcc/config/pru/pru-passes.cc
@@ -1,5 +1,5 @@
 /* PRU target specific passes
-   Copyright (C) 2017-2024 Free Software Foundation, Inc.
+   Copyright (C) 2017-2025 Free Software Foundation, Inc.
    Dimitar Dimitrov <dimitar@dinux.eu>
 
    This file is part of GCC.
diff --git a/gcc/config/pru/pru-passes.def b/gcc/config/pru/pru-passes.def
index 3eee313..da48a2c 100644
--- a/gcc/config/pru/pru-passes.def
+++ b/gcc/config/pru/pru-passes.def
@@ -1,5 +1,5 @@
 /* Description of target passes for PRU.
-   Copyright (C) 2024 Free Software Foundation, Inc.  */
+   Copyright (C) 2024-2025 Free Software Foundation, Inc.  */
 
 /* This file is part of GCC.
 
diff --git a/gcc/config/pru/pru-pragma.cc b/gcc/config/pru/pru-pragma.cc
index 73bb4b6..c3f3d33 100644
--- a/gcc/config/pru/pru-pragma.cc
+++ b/gcc/config/pru/pru-pragma.cc
@@ -1,5 +1,5 @@
 /* PRU target specific pragmas
-   Copyright (C) 2015-2024 Free Software Foundation, Inc.
+   Copyright (C) 2015-2025 Free Software Foundation, Inc.
    Contributed by Dimitar Dimitrov <dimitar@dinux.eu>
 
    This file is part of GCC.
@@ -58,11 +58,9 @@ pru_pragma_ctable_entry (cpp_reader *)
 	  if (type != CPP_EOF)
 	    error ("junk at end of %<#pragma CTABLE_ENTRY%>");
 	  else if (i >= ARRAY_SIZE (pru_ctable))
-	    error ("%<CTABLE_ENTRY%> index %" HOST_WIDE_INT_PRINT "d"
-		   " is not valid", i);
+	    error ("%<CTABLE_ENTRY%> index %wd is not valid", i);
 	  else if (pru_ctable[i].valid && pru_ctable[i].base != base)
-	    error ("redefinition of %<CTABLE_ENTRY "
-		   "%" HOST_WIDE_INT_PRINT "d%>", i);
+	    error ("redefinition of %<CTABLE_ENTRY %wd%>", i);
 	  else
 	    {
 	      if (base & 0xff)
diff --git a/gcc/config/pru/pru-protos.h b/gcc/config/pru/pru-protos.h
index 3baf605..c73fad8 100644
--- a/gcc/config/pru/pru-protos.h
+++ b/gcc/config/pru/pru-protos.h
@@ -1,5 +1,5 @@
 /* Subroutine declarations for TI PRU target support.
-   Copyright (C) 2014-2024 Free Software Foundation, Inc.
+   Copyright (C) 2014-2025 Free Software Foundation, Inc.
    Contributed by Dimitar Dimitrov <dimitar@dinux.eu>
 
    This file is part of GCC.
diff --git a/gcc/config/pru/pru.cc b/gcc/config/pru/pru.cc
index d070007..47e5f24 100644
--- a/gcc/config/pru/pru.cc
+++ b/gcc/config/pru/pru.cc
@@ -1,5 +1,5 @@
 /* Target machine subroutines for TI PRU.
-   Copyright (C) 2014-2024 Free Software Foundation, Inc.
+   Copyright (C) 2014-2025 Free Software Foundation, Inc.
    Dimitar Dimitrov <dimitar@dinux.eu>
 
    This file is part of GCC.
@@ -1040,8 +1040,7 @@ pru_expand_fp_compare (rtx comparison, machine_mode mode)
 
   cmp = emit_library_call_value (libfunc, 0, LCT_CONST, SImode,
 				 op0, op_mode, op1, op_mode);
-  insns = get_insns ();
-  end_sequence ();
+  insns = end_sequence ();
 
   emit_libcall_block (insns, cmp, cmp,
 		      gen_rtx_fmt_ee (code, SImode, op0, op1));
@@ -1858,12 +1857,22 @@ pru_print_operand (FILE *file, rtx op, int letter)
 	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (op) & 0xff);
 	  return;
 	}
+      else if (letter == 'c')
+	{
+	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (op));
+	  return;
+	}
+      else if (letter == 'n')
+	{
+	  fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (op));
+	  return;
+	}
       /* Else, fall through.  */
 
     case CONST:
     case LABEL_REF:
     case SYMBOL_REF:
-      if (letter == 0)
+      if (letter == 0 || letter == 'c')
 	{
 	  output_addr_const (file, op);
 	  return;
@@ -2217,7 +2226,7 @@ pru_insert_attributes (tree node, tree *)
 	error ("only 32-bit access is supported "
 	       "for %<__regio_symbol%> address space");
       if (strcmp (name, "__R30") != 0 && strcmp (name, "__R31") != 0)
-	error ("register name %<%s%> not recognized "
+	error ("register name %qs not recognized "
 	       "in %<__regio_symbol%> address space", name);
     }
 
@@ -2909,8 +2918,7 @@ pru_reorg_loop (rtx_insn *insns)
 	    LABEL_NUSES (end->label)++;
 
 	    /* Emit the whole sequence before the doloop_end.  */
-	    insn = get_insns ();
-	    end_sequence ();
+	    insn = end_sequence ();
 	    emit_insn_before (insn, end->insn);
 
 	    /* Delete the doloop_end.  */
diff --git a/gcc/config/pru/pru.h b/gcc/config/pru/pru.h
index 7aee286..6c0719b 100644
--- a/gcc/config/pru/pru.h
+++ b/gcc/config/pru/pru.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for TI PRU.
-   Copyright (C) 2014-2024 Free Software Foundation, Inc.
+   Copyright (C) 2014-2025 Free Software Foundation, Inc.
    Contributed by Dimitar Dimitrov <dimitar@dinux.eu>
 
    This file is part of GCC.
diff --git a/gcc/config/pru/pru.md b/gcc/config/pru/pru.md
index db7a5af..3504e42 100644
--- a/gcc/config/pru/pru.md
+++ b/gcc/config/pru/pru.md
@@ -1,5 +1,5 @@
 ;; Machine Description for TI PRU.
-;; Copyright (C) 2014-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2014-2025 Free Software Foundation, Inc.
 ;; Contributed by Dimitar Dimitrov <dimitar@dinux.eu>
 ;; Based on the NIOS2 GCC port.
 ;;
@@ -283,6 +283,83 @@
   [(set_attr "type" "st,ld,alu,alu,alu,alu,alu,alu")
    (set_attr "length" "4,4,4,4,8,8,8,16")])
 
+; Break 64-bit register-to-register moves into 32-bit moves.
+; If only a subreg of the destination is used, this split would allow
+; for the other 32-bit subreg of the DI register to be eliminated.
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(match_operand:DI 1 "register_operand"))]
+  "
+   /* TODO - LRA does not yet handle subregs efficiently.
+      So it is profitable to split only after register allocation is
+      complete.
+      Once https://gcc.gnu.org/pipermail/gcc-patches/2024-May/651366.html
+      is merged, this condition should be removed to allow splitting
+      before LRA.  */
+   reload_completed
+   /* Sign-extended paradoxical registers require expansion
+      of the proper pattern.  We can do only zero extension here.  */
+   && (SUBREG_P (operands[1]) && paradoxical_subreg_p (operands[1])
+	? SUBREG_PROMOTED_VAR_P (operands[1])
+	  && SUBREG_PROMOTED_UNSIGNED_P (operands[1]) > 0
+	: true)"
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 2) (match_dup 3))]
+  "
+  rtx dst_lo = simplify_gen_subreg (SImode, operands[0], DImode, 0);
+  rtx dst_hi = simplify_gen_subreg (SImode, operands[0], DImode, 4);
+  rtx src_lo = simplify_gen_subreg (SImode, operands[1], DImode, 0);
+  rtx src_hi = simplify_gen_subreg (SImode, operands[1], DImode, 4);
+
+  if (SUBREG_P (operands[1]) && paradoxical_subreg_p (operands[1]))
+    {
+      gcc_assert (SUBREG_PROMOTED_VAR_P (operands[1]));
+      gcc_assert (SUBREG_PROMOTED_UNSIGNED_P (operands[1]) > 0);
+
+      operands[0] = dst_lo;
+      operands[1] = src_lo;
+      operands[2] = dst_hi;
+      operands[3] = const0_rtx;
+    }
+  else if (!reg_overlap_mentioned_p (dst_lo, src_hi))
+    {
+      operands[0] = dst_lo;
+      operands[1] = src_lo;
+      operands[2] = dst_hi;
+      operands[3] = src_hi;
+    }
+  else
+    {
+      operands[0] = dst_hi;
+      operands[1] = src_hi;
+      operands[2] = dst_lo;
+      operands[3] = src_lo;
+    }
+  "
+)
+
+; Break loading of non-trivial 64-bit constant integers.  The split
+; will not generate better code sequence, but at least would allow
+; dropping a non-live 32-bit part of the destination, or better
+; constant propagation.
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(match_operand:DI 1 "const_int_operand"))]
+  "reload_completed
+   && !satisfies_constraint_Z (operands[1])
+   && !satisfies_constraint_Um (operands[1])
+   && !satisfies_constraint_T (operands[1])"
+
+  [(set (match_dup 0) (match_dup 1))
+   (set (match_dup 2) (match_dup 3))]
+  "
+  operands[2] = simplify_gen_subreg (SImode, operands[0], DImode, 4);
+  operands[3] = simplify_gen_subreg (SImode, operands[1], DImode, 4);;
+  operands[0] = simplify_gen_subreg (SImode, operands[0], DImode, 0);
+  operands[1] = simplify_gen_subreg (SImode, operands[1], DImode, 0);
+  "
+)
+
 ;
 ; load_multiple pattern(s).
 ;
diff --git a/gcc/config/pru/pru.opt b/gcc/config/pru/pru.opt
index e8921b5..8385beb 100644
--- a/gcc/config/pru/pru.opt
+++ b/gcc/config/pru/pru.opt
@@ -1,5 +1,5 @@
 ; Options for the TI PRU port of the compiler.
-; Copyright (C) 2018-2024 Free Software Foundation, Inc.
+; Copyright (C) 2018-2025 Free Software Foundation, Inc.
 ; Contributed by Dimitar Dimitrov <dimitar@dinux.eu>
 ;
 ; This file is part of GCC.
diff --git a/gcc/config/pru/t-pru b/gcc/config/pru/t-pru
index 1e67fdf..169c71b 100644
--- a/gcc/config/pru/t-pru
+++ b/gcc/config/pru/t-pru
@@ -1,5 +1,5 @@
 # Makefile fragment for building GCC for the TI PRU target.
-# Copyright (C) 2012-2024 Free Software Foundation, Inc.
+# Copyright (C) 2012-2025 Free Software Foundation, Inc.
 # Contributed by Dimitar Dimitrov <dimitar.dinux.eu>
 # Based on the t-nios2
 #
diff --git a/gcc/config/riscv/arch-canonicalize b/gcc/config/riscv/arch-canonicalize
index 95cfe17..fd55255 100755
--- a/gcc/config/riscv/arch-canonicalize
+++ b/gcc/config/riscv/arch-canonicalize
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 
 # Tool for canonical RISC-V architecture string.
-# Copyright (C) 2011-2024 Free Software Foundation, Inc.
+# Copyright (C) 2011-2025 Free Software Foundation, Inc.
 # Contributed by Andrew Waterman (andrew@sifive.com).
 #
 # This file is part of GCC.
diff --git a/gcc/config/riscv/autovec-opt.md b/gcc/config/riscv/autovec-opt.md
index 4b33a14..8df7f64 100644
--- a/gcc/config/riscv/autovec-opt.md
+++ b/gcc/config/riscv/autovec-opt.md
@@ -1,5 +1,5 @@
 ;; Machine description for optimization of RVV auto-vectorization.
-;; Copyright (C) 2023-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2023-2025 Free Software Foundation, Inc.
 ;; Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd.
 
 ;; This file is part of GCC.
@@ -638,6 +638,34 @@
 }
 [(set_attr "type" "viwalu")])
 
+; This is a bridge pattern for combine.  Although we do not have a
+; pattern that extends just the first operand it helps combine bridge
+; the gap to the pred_dual_widen insns.
+; If combination fails it will just expand two a separate extend and
+; a binop again.
+(define_insn_and_split "*single_widen_first_sub<any_extend:su><mode>"
+  [(set (match_operand:VWEXTI 0 "register_operand")
+	(minus:VWEXTI
+	  (any_extend:VWEXTI
+	    (match_operand:<V_DOUBLE_TRUNC> 1 "register_operand"))
+	  (match_operand:VWEXTI 2 "register_operand")))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  insn_code extend_icode = code_for_pred_vf2 (<any_extend:CODE>, <MODE>mode);
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+  rtx extend_ops[] = {tmp, operands[1]};
+  riscv_vector::emit_vlmax_insn (extend_icode, riscv_vector::UNARY_OP, extend_ops);
+
+  rtx ops[] = {operands[0], tmp, operands[2]};
+  insn_code icode = code_for_pred (MINUS, <MODE>mode);
+  riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops);
+  DONE;
+}
+[(set_attr "type" "vector")])
+
 (define_insn_and_split "*single_widen_add<any_extend:su><mode>"
   [(set (match_operand:VWEXTI 0 "register_operand")
 	(plus:VWEXTI
@@ -674,7 +702,8 @@
   insn_code extend_icode = code_for_pred_vf2 (<any_extend:CODE>, <MODE>mode);
   rtx tmp = gen_reg_rtx (<MODE>mode);
   rtx extend_ops[] = {tmp, operands[2]};
-  riscv_vector::emit_vlmax_insn (extend_icode, riscv_vector::UNARY_OP, extend_ops);
+  riscv_vector::emit_vlmax_insn (extend_icode, riscv_vector::UNARY_OP,
+				 extend_ops);
 
   rtx ops[] = {operands[0], operands[1], tmp};
   insn_code icode = code_for_pred (MULT, <MODE>mode);
@@ -771,6 +800,31 @@
 }
 [(set_attr "type" "vfwalu")])
 
+; This is a bridge pattern for combine (see above).
+(define_insn_and_split "*single_widen_first_sub<mode>"
+  [(set (match_operand:VWEXTF 0 "register_operand")
+	(minus:VWEXTF
+	  (float_extend:VWEXTF
+	    (match_operand:<V_DOUBLE_TRUNC> 1 "register_operand"))
+	  (match_operand:VWEXTF 2 "register_operand")))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+{
+  insn_code extend_icode = code_for_pred_extend (<MODE>mode);
+  rtx tmp = gen_reg_rtx (<MODE>mode);
+  rtx extend_ops[] = {tmp, operands[1]};
+  riscv_vector::emit_vlmax_insn (extend_icode, riscv_vector::UNARY_OP,
+				 extend_ops);
+
+  rtx ops[] = {operands[0], tmp, operands[2]};
+  insn_code icode = code_for_pred (MINUS, <MODE>mode);
+  riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP_FRM_DYN, ops);
+  DONE;
+}
+[(set_attr "type" "vector")])
+
 ;; This combine pattern does not correspond to an single instruction,
 ;; i.e. there is no vfwmul.wv instruction. This is a temporary pattern
 ;; produced by a combine pass and if there is no further combine into
@@ -810,7 +864,8 @@
   "&& 1"
   [(const_int 0)]
 {
-  riscv_vector::expand_reduction (<WREDUC_UNSPEC>, riscv_vector::REDUCE_OP,
+  riscv_vector::expand_reduction (<WREDUC_UNSPEC>, <WREDUC_UNSPEC_VL0_SAFE>,
+				  riscv_vector::REDUCE_OP,
                                   operands,
                                   CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode));
   DONE;
@@ -830,6 +885,7 @@
   [(const_int 0)]
 {
   riscv_vector::expand_reduction (UNSPEC_WREDUC_SUM_UNORDERED,
+				  UNSPEC_WREDUC_SUM_UNORDERED_VL0_SAFE,
                                   riscv_vector::REDUCE_OP_FRM_DYN,
                                   operands,
                                   CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode));
@@ -851,6 +907,7 @@
   [(const_int 0)]
 {
   riscv_vector::expand_reduction (UNSPEC_WREDUC_SUM_ORDERED,
+				  UNSPEC_WREDUC_SUM_ORDERED_VL0_SAFE,
                                   riscv_vector::REDUCE_OP_FRM_DYN,
                                   operands, operands[2]);
   DONE;
@@ -879,6 +936,7 @@
     {
       rtx ops[] = {operands[0], operands[2], operands[3], operands[4]};
       riscv_vector::expand_reduction (UNSPEC_WREDUC_SUM_ORDERED,
+				      UNSPEC_WREDUC_SUM_ORDERED_VL0_SAFE,
                                       riscv_vector::REDUCE_OP_M_FRM_DYN,
                                       ops, operands[1]);
     }
@@ -1227,6 +1285,7 @@
   rtx ops[] = {operands[0], operands[2], operands[1],
                gen_int_mode (GET_MODE_NUNITS (<MODE>mode), Pmode)};
   riscv_vector::expand_reduction (<WREDUC_UNSPEC>,
+				  <WREDUC_UNSPEC_VL0_SAFE>,
                                   riscv_vector::REDUCE_OP_M,
                                   ops, CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode));
   DONE;
@@ -1282,6 +1341,7 @@
 {
   rtx ops[] = {operands[0], operands[3], operands[1], operands[2]};
   riscv_vector::expand_reduction (<WREDUC_UNSPEC>,
+				  <WREDUC_UNSPEC_VL0_SAFE>,
                                   riscv_vector::REDUCE_OP_M,
                                   ops, CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode));
   DONE;
@@ -1318,6 +1378,7 @@
   rtx ops[] = {operands[0], operands[2], operands[1],
                gen_int_mode (GET_MODE_NUNITS (<MODE>mode), Pmode)};
   riscv_vector::expand_reduction (UNSPEC_WREDUC_SUM_UNORDERED,
+				  UNSPEC_WREDUC_SUM_UNORDERED_VL0_SAFE,
                                   riscv_vector::REDUCE_OP_M_FRM_DYN,
                                   ops, CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode));
   DONE;
@@ -1373,6 +1434,7 @@
 {
   rtx ops[] = {operands[0], operands[3], operands[1], operands[2]};
   riscv_vector::expand_reduction (UNSPEC_WREDUC_SUM_UNORDERED,
+				  UNSPEC_WREDUC_SUM_UNORDERED_VL0_SAFE,
                                   riscv_vector::REDUCE_OP_M_FRM_DYN,
                                   ops, CONST0_RTX (<V_DOUBLE_EXTEND_VEL>mode));
   DONE;
@@ -1611,3 +1673,126 @@
     DONE;
   }
   [(set_attr "type" "vandn")])
+
+
+;; =============================================================================
+;; Combine vec_duplicate + op.vv to op.vx
+;; Include
+;; - vadd.vx
+;; =============================================================================
+(define_insn_and_split "*<optab>_vx_<mode>"
+ [(set (match_operand:V_VLSI    0 "register_operand")
+       (any_int_binop_no_shift_vdup_v:V_VLSI
+	 (vec_duplicate:V_VLSI
+	   (match_operand:<VEL> 1 "register_operand"))
+	 (match_operand:V_VLSI  2 "<binop_rhs2_predicate>")))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    riscv_vector::expand_vx_binary_vec_dup_vec (operands[0], operands[2],
+						operands[1], <CODE>,
+						<MODE>mode);
+  }
+  [(set_attr "type" "vialu")])
+
+(define_insn_and_split "*<optab>_vx_<mode>"
+ [(set (match_operand:V_VLSI    0 "register_operand")
+       (any_int_binop_no_shift_v_vdup:V_VLSI
+	 (match_operand:V_VLSI  1 "<binop_rhs2_predicate>")
+	 (vec_duplicate:V_VLSI
+	   (match_operand:<VEL> 2 "register_operand"))))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    riscv_vector::expand_vx_binary_vec_vec_dup (operands[0], operands[1],
+						operands[2], <CODE>,
+						<MODE>mode);
+  }
+  [(set_attr "type" "vialu")])
+
+;; =============================================================================
+;; Combine vec_duplicate + op.vv to op.vf
+;; Include
+;; - vfmadd.vf
+;; - vfmsub.vf
+;; - vfnmadd.vf
+;; - vfnmsub.vf
+;; - vfmacc.vf
+;; - vfmsac.vf
+;; - vfnmacc.vf
+;; - vfnmsac.vf
+;; =============================================================================
+
+;; vfmadd.vf, vfmsub.vf, vfmacc.vf, vfmsac.vf
+(define_insn_and_split "*<optab>_vf_<mode>"
+  [(set (match_operand:V_VLSF 0 "register_operand")
+    (plus_minus:V_VLSF
+	    (mult:V_VLSF
+	      (vec_duplicate:V_VLSF
+		(match_operand:<VEL> 1 "register_operand"))
+	      (match_operand:V_VLSF 2 "register_operand"))
+	    (match_operand:V_VLSF 3 "register_operand")))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    rtx ops[] = {operands[0], operands[1], operands[2], operands[3],
+		 RVV_VUNDEF(<MODE>mode)};
+    riscv_vector::emit_vlmax_insn (code_for_pred_mul_scalar (<CODE>, <MODE>mode),
+				   riscv_vector::TERNARY_OP_FRM_DYN, ops);
+    DONE;
+  }
+  [(set_attr "type" "vfmuladd")]
+)
+
+;; vfnmsub.vf, vfnmsac.vf
+(define_insn_and_split "*vfnmsub_<mode>"
+  [(set (match_operand:V_VLSF 0 "register_operand")
+    (minus:V_VLSF
+      (match_operand:V_VLSF 3 "register_operand")
+      (mult:V_VLSF
+	(vec_duplicate:V_VLSF
+	  (match_operand:<VEL> 1 "register_operand"))
+	(match_operand:V_VLSF 2 "register_operand"))))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    rtx ops[] = {operands[0], operands[1], operands[2], operands[3],
+		 RVV_VUNDEF(<MODE>mode)};
+    riscv_vector::emit_vlmax_insn (code_for_pred_mul_neg_scalar (PLUS, <MODE>mode),
+				   riscv_vector::TERNARY_OP_FRM_DYN, ops);
+    DONE;
+  }
+  [(set_attr "type" "vfmuladd")]
+)
+
+;; vfnmadd.vf, vfnmacc.vf
+(define_insn_and_split "*vfnmadd_<mode>"
+  [(set (match_operand:V_VLSF 0 "register_operand")
+    (minus:V_VLSF
+      (mult:V_VLSF
+	(neg:V_VLSF
+	  (match_operand:V_VLSF 2 "register_operand"))
+	(vec_duplicate:V_VLSF
+	  (match_operand:<VEL> 1 "register_operand")))
+      (match_operand:V_VLSF 3 "register_operand")))]
+  "TARGET_VECTOR && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(const_int 0)]
+  {
+    rtx ops[] = {operands[0], operands[1], operands[2], operands[3],
+		 RVV_VUNDEF(<MODE>mode)};
+    riscv_vector::emit_vlmax_insn (code_for_pred_mul_neg_scalar (MINUS, <MODE>mode),
+				   riscv_vector::TERNARY_OP_FRM_DYN, ops);
+    DONE;
+  }
+  [(set_attr "type" "vfmuladd")]
+)
diff --git a/gcc/config/riscv/autovec.md b/gcc/config/riscv/autovec.md
index 4decaed..94a61bd 100644
--- a/gcc/config/riscv/autovec.md
+++ b/gcc/config/riscv/autovec.md
@@ -1,5 +1,5 @@
 ;; Machine description for auto-vectorization using RVV for GNU compiler.
-;; Copyright (C) 2023-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2023-2025 Free Software Foundation, Inc.
 ;; Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd.
 
 ;; This file is part of GCC.
@@ -26,8 +26,9 @@
   [(match_operand:V 0 "register_operand")
    (match_operand:V 1 "memory_operand")
    (match_operand:<VM> 2 "vector_mask_operand")
-   (match_operand 3 "autovec_length_operand")
-   (match_operand 4 "const_0_operand")]
+   (match_operand:V 3 "maskload_else_operand")
+   (match_operand 4 "autovec_length_operand")
+   (match_operand 5 "const_0_operand")]
   "TARGET_VECTOR"
 {
   riscv_vector::expand_load_store (operands, true);
@@ -57,8 +58,9 @@
    (match_operand 3 "<RATIO64:gs_extension>")
    (match_operand 4 "<RATIO64:gs_scale>")
    (match_operand:<RATIO64:VM> 5 "vector_mask_operand")
-   (match_operand 6 "autovec_length_operand")
-   (match_operand 7 "const_0_operand")]
+   (match_operand 6 "maskload_else_operand")
+   (match_operand 7 "autovec_length_operand")
+   (match_operand 8 "const_0_operand")]
   "TARGET_VECTOR && riscv_vector::gather_scatter_valid_offset_p (<RATIO64I:MODE>mode)"
 {
   riscv_vector::expand_gather_scatter (operands, true);
@@ -72,8 +74,9 @@
    (match_operand 3 "<RATIO32:gs_extension>")
    (match_operand 4 "<RATIO32:gs_scale>")
    (match_operand:<RATIO32:VM> 5 "vector_mask_operand")
-   (match_operand 6 "autovec_length_operand")
-   (match_operand 7 "const_0_operand")]
+   (match_operand 6 "maskload_else_operand")
+   (match_operand 7 "autovec_length_operand")
+   (match_operand 8 "const_0_operand")]
   "TARGET_VECTOR && riscv_vector::gather_scatter_valid_offset_p (<RATIO32I:MODE>mode)"
 {
   riscv_vector::expand_gather_scatter (operands, true);
@@ -87,8 +90,9 @@
    (match_operand 3 "<RATIO16:gs_extension>")
    (match_operand 4 "<RATIO16:gs_scale>")
    (match_operand:<RATIO16:VM> 5 "vector_mask_operand")
-   (match_operand 6 "autovec_length_operand")
-   (match_operand 7 "const_0_operand")]
+   (match_operand 6 "maskload_else_operand")
+   (match_operand 7 "autovec_length_operand")
+   (match_operand 8 "const_0_operand")]
   "TARGET_VECTOR && riscv_vector::gather_scatter_valid_offset_p (<RATIO16I:MODE>mode)"
 {
   riscv_vector::expand_gather_scatter (operands, true);
@@ -102,8 +106,9 @@
    (match_operand 3 "<RATIO8:gs_extension>")
    (match_operand 4 "<RATIO8:gs_scale>")
    (match_operand:<RATIO8:VM> 5 "vector_mask_operand")
-   (match_operand 6 "autovec_length_operand")
-   (match_operand 7 "const_0_operand")]
+   (match_operand 6 "maskload_else_operand")
+   (match_operand 7 "autovec_length_operand")
+   (match_operand 8 "const_0_operand")]
   "TARGET_VECTOR && riscv_vector::gather_scatter_valid_offset_p (<RATIO8I:MODE>mode)"
 {
   riscv_vector::expand_gather_scatter (operands, true);
@@ -117,8 +122,9 @@
    (match_operand 3 "<RATIO4:gs_extension>")
    (match_operand 4 "<RATIO4:gs_scale>")
    (match_operand:<RATIO4:VM> 5 "vector_mask_operand")
-   (match_operand 6 "autovec_length_operand")
-   (match_operand 7 "const_0_operand")]
+   (match_operand 6 "maskload_else_operand")
+   (match_operand 7 "autovec_length_operand")
+   (match_operand 8 "const_0_operand")]
   "TARGET_VECTOR && riscv_vector::gather_scatter_valid_offset_p (<RATIO4I:MODE>mode)"
 {
   riscv_vector::expand_gather_scatter (operands, true);
@@ -132,8 +138,9 @@
    (match_operand 3 "<RATIO2:gs_extension>")
    (match_operand 4 "<RATIO2:gs_scale>")
    (match_operand:<RATIO2:VM> 5 "vector_mask_operand")
-   (match_operand 6 "autovec_length_operand")
-   (match_operand 7 "const_0_operand")]
+   (match_operand 6 "maskload_else_operand")
+   (match_operand 7 "autovec_length_operand")
+   (match_operand 8 "const_0_operand")]
   "TARGET_VECTOR && riscv_vector::gather_scatter_valid_offset_p (<RATIO2I:MODE>mode)"
 {
   riscv_vector::expand_gather_scatter (operands, true);
@@ -151,8 +158,9 @@
    (match_operand 3 "<gs_extension>")
    (match_operand 4 "<gs_scale>")
    (match_operand:<VM> 5 "vector_mask_operand")
-   (match_operand 6 "autovec_length_operand")
-   (match_operand 7 "const_0_operand")]
+   (match_operand 6 "maskload_else_operand")
+   (match_operand 7 "autovec_length_operand")
+   (match_operand 8 "const_0_operand")]
   "TARGET_VECTOR"
 {
   riscv_vector::expand_gather_scatter (operands, true);
@@ -280,9 +288,10 @@
   [(match_operand:VT 0 "register_operand")
    (match_operand:VT 1 "memory_operand")
    (match_operand:<VM> 2 "vector_mask_operand")
-   (match_operand 3 "autovec_length_operand")
-   (match_operand 4 "const_0_operand")]
-  "TARGET_VECTOR"
+   (match_operand 3 "maskload_else_operand")
+   (match_operand 4 "autovec_length_operand")
+   (match_operand 5 "const_0_operand")]
+  "TARGET_VECTOR_AUTOVEC_SEGMENT"
   {
     riscv_vector::expand_lanes_load_store (operands, true);
     DONE;
@@ -295,7 +304,7 @@
    (match_operand:<VM> 2 "vector_mask_operand")
    (match_operand 3 "autovec_length_operand")
    (match_operand 4 "const_0_operand")]
-  "TARGET_VECTOR"
+  "TARGET_VECTOR_AUTOVEC_SEGMENT"
   {
     riscv_vector::expand_lanes_load_store (operands, false);
     DONE;
@@ -321,7 +330,15 @@
   {
     poly_int64 nunits = GET_MODE_NUNITS (<MODE>mode);
     machine_mode mode = riscv_vector::get_vector_mode (QImode, nunits).require ();
-    rtx dup = expand_vector_broadcast (mode, operands[1]);
+
+    /* The 1-bit mask is in a QImode register, make sure we only use the last
+       bit.  See also PR119114 and the respective vec_init expander.  */
+    rtx tmp = gen_reg_rtx (Xmode);
+    emit_insn
+      (gen_rtx_SET (tmp, gen_rtx_AND (Xmode, gen_lowpart (Xmode, operands[1]),
+				      CONST1_RTX (Xmode))));
+
+    rtx dup = expand_vector_broadcast (mode, gen_lowpart (QImode, tmp));
     riscv_vector::expand_vec_cmp (operands[0], NE, dup, CONST0_RTX (mode));
     DONE;
   }
@@ -396,16 +413,28 @@
 
 ;; Provide a vec_init for mask registers by initializing
 ;; a QImode vector and comparing it against 0.
+;; As we need to ignore all but the lowest bit apply an AND mask
+;; before doing the comparison.
 (define_expand "vec_init<mode>qi"
   [(match_operand:VB 0 "register_operand")
    (match_operand 1 "")]
   "TARGET_VECTOR"
   {
+    /* Expand into a QImode vector.  */
     machine_mode qimode = riscv_vector::get_vector_mode
 	(QImode, GET_MODE_NUNITS (<MODE>mode)).require ();
     rtx tmp = gen_reg_rtx (qimode);
     riscv_vector::expand_vec_init (tmp, operands[1]);
-    riscv_vector::expand_vec_cmp (operands[0], NE, tmp, CONST0_RTX (qimode));
+
+    /* & 0x1.  */
+    insn_code icode = code_for_pred (AND, qimode);
+    rtx tmp2 = gen_reg_rtx (qimode);
+    rtx ones = gen_const_vec_duplicate (qimode, GEN_INT (1));
+    rtx ops[] = {tmp2, tmp, ones};
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops);
+
+    /* Compare against zero.  */
+    riscv_vector::expand_vec_cmp (operands[0], NE, tmp2, CONST0_RTX (qimode));
     DONE;
   }
 )
@@ -1309,7 +1338,7 @@
 (define_expand "select_vl<mode>"
   [(match_operand:P 0 "register_operand")
    (match_operand:P 1 "vector_length_operand")
-   (match_operand:P 2 "")]
+   (match_operand:P 2 "immediate_operand")]
   "TARGET_VECTOR"
 {
   riscv_vector::expand_select_vl (operands);
@@ -1454,6 +1483,69 @@
 })
 
 ;; -------------------------------------------------------------------------
+;; ---- [INT,FP] Extract a vector from a vector.
+;; -------------------------------------------------------------------------
+;; TODO: This can be extended to allow basically any extract mode.
+;; For now this helps optimize VLS subregs like (subreg:V2DI (reg:V4DI) 16)
+;; that would otherwise need to go via memory.
+
+(define_expand "vec_extract<mode><vls_half>"
+  [(set (match_operand:<VLS_HALF>	 0 "nonimmediate_operand")
+     (vec_select:<VLS_HALF>
+       (match_operand:VLS_HAS_HALF	 1 "register_operand")
+       (parallel
+	 [(match_operand		 2 "immediate_operand")])))]
+  "TARGET_VECTOR"
+{
+  int sz = GET_MODE_NUNITS (<VLS_HALF>mode).to_constant ();
+  int part = INTVAL (operands[2]);
+
+  rtx start = GEN_INT (part * sz);
+  rtx tmp = operands[1];
+
+  if (part != 0)
+    {
+      tmp = gen_reg_rtx (<MODE>mode);
+
+      rtx ops[] = {tmp, operands[1], start};
+      riscv_vector::emit_vlmax_insn
+	(code_for_pred_slide (UNSPEC_VSLIDEDOWN, <MODE>mode),
+	 riscv_vector::BINARY_OP, ops);
+    }
+
+  emit_move_insn (operands[0], gen_lowpart (<VLS_HALF>mode, tmp));
+  DONE;
+})
+
+(define_expand "vec_extract<mode><vls_quarter>"
+  [(set (match_operand:<VLS_QUARTER>	 0 "nonimmediate_operand")
+     (vec_select:<VLS_QUARTER>
+       (match_operand:VLS_HAS_QUARTER	 1 "register_operand")
+       (parallel
+	 [(match_operand		 2 "immediate_operand")])))]
+  "TARGET_VECTOR"
+{
+  int sz = GET_MODE_NUNITS (<VLS_QUARTER>mode).to_constant ();
+  int part = INTVAL (operands[2]);
+
+  rtx start = GEN_INT (part * sz);
+  rtx tmp = operands[1];
+
+  if (part != 0)
+    {
+      tmp = gen_reg_rtx (<MODE>mode);
+
+      rtx ops[] = {tmp, operands[1], start};
+      riscv_vector::emit_vlmax_insn
+	(code_for_pred_slide (UNSPEC_VSLIDEDOWN, <MODE>mode),
+	 riscv_vector::BINARY_OP, ops);
+    }
+
+  emit_move_insn (operands[0], gen_lowpart (<VLS_QUARTER>mode, tmp));
+  DONE;
+})
+
+;; -------------------------------------------------------------------------
 ;; ---- [FP] Binary operations
 ;; -------------------------------------------------------------------------
 ;; Includes:
@@ -2113,7 +2205,9 @@
   "&& 1"
   [(const_int 0)]
 {
-  riscv_vector::expand_reduction (UNSPEC_REDUC_SUM, riscv_vector::REDUCE_OP,
+  riscv_vector::expand_reduction (UNSPEC_REDUC_SUM,
+				  UNSPEC_REDUC_SUM_VL0_SAFE,
+				  riscv_vector::REDUCE_OP,
                                   operands, CONST0_RTX (<VEL>mode));
   DONE;
 }
@@ -2126,7 +2220,9 @@
 {
   int prec = GET_MODE_PRECISION (<VEL>mode);
   rtx min = immed_wide_int_const (wi::min_value (prec, SIGNED), <VEL>mode);
-  riscv_vector::expand_reduction (UNSPEC_REDUC_MAX, riscv_vector::REDUCE_OP,
+  riscv_vector::expand_reduction (UNSPEC_REDUC_MAX,
+				  UNSPEC_REDUC_MAX_VL0_SAFE,
+				  riscv_vector::REDUCE_OP,
                                   operands, min);
   DONE;
 })
@@ -2136,7 +2232,9 @@
    (match_operand:V_VLSI 1 "register_operand")]
   "TARGET_VECTOR"
 {
-  riscv_vector::expand_reduction (UNSPEC_REDUC_MAXU, riscv_vector::REDUCE_OP,
+  riscv_vector::expand_reduction (UNSPEC_REDUC_MAXU,
+				  UNSPEC_REDUC_MAXU_VL0_SAFE,
+				  riscv_vector::REDUCE_OP,
                                   operands, CONST0_RTX (<VEL>mode));
   DONE;
 })
@@ -2148,7 +2246,9 @@
 {
   int prec = GET_MODE_PRECISION (<VEL>mode);
   rtx max = immed_wide_int_const (wi::max_value (prec, SIGNED), <VEL>mode);
-  riscv_vector::expand_reduction (UNSPEC_REDUC_MIN, riscv_vector::REDUCE_OP,
+  riscv_vector::expand_reduction (UNSPEC_REDUC_MIN,
+				  UNSPEC_REDUC_MIN_VL0_SAFE,
+				  riscv_vector::REDUCE_OP,
                                   operands, max);
   DONE;
 })
@@ -2160,7 +2260,9 @@
 {
   int prec = GET_MODE_PRECISION (<VEL>mode);
   rtx max = immed_wide_int_const (wi::max_value (prec, UNSIGNED), <VEL>mode);
-  riscv_vector::expand_reduction (UNSPEC_REDUC_MINU, riscv_vector::REDUCE_OP,
+  riscv_vector::expand_reduction (UNSPEC_REDUC_MINU,
+				  UNSPEC_REDUC_MINU_VL0_SAFE,
+				  riscv_vector::REDUCE_OP,
                                   operands, max);
   DONE;
 })
@@ -2170,7 +2272,9 @@
    (match_operand:V_VLSI 1 "register_operand")]
   "TARGET_VECTOR"
 {
-  riscv_vector::expand_reduction (UNSPEC_REDUC_AND, riscv_vector::REDUCE_OP,
+  riscv_vector::expand_reduction (UNSPEC_REDUC_AND,
+				  UNSPEC_REDUC_AND_VL0_SAFE,
+				  riscv_vector::REDUCE_OP,
                                   operands, CONSTM1_RTX (<VEL>mode));
   DONE;
 })
@@ -2180,7 +2284,9 @@
    (match_operand:V_VLSI 1 "register_operand")]
   "TARGET_VECTOR"
 {
-  riscv_vector::expand_reduction (UNSPEC_REDUC_OR, riscv_vector::REDUCE_OP,
+  riscv_vector::expand_reduction (UNSPEC_REDUC_OR,
+				  UNSPEC_REDUC_OR_VL0_SAFE,
+				  riscv_vector::REDUCE_OP,
                                   operands, CONST0_RTX (<VEL>mode));
   DONE;
 })
@@ -2190,7 +2296,9 @@
    (match_operand:V_VLSI 1 "register_operand")]
   "TARGET_VECTOR"
 {
-  riscv_vector::expand_reduction (UNSPEC_REDUC_XOR, riscv_vector::REDUCE_OP,
+  riscv_vector::expand_reduction (UNSPEC_REDUC_XOR,
+				  UNSPEC_REDUC_XOR_VL0_SAFE,
+				  riscv_vector::REDUCE_OP,
                                   operands, CONST0_RTX (<VEL>mode));
   DONE;
 })
@@ -2215,6 +2323,7 @@
   [(const_int 0)]
 {
   riscv_vector::expand_reduction (UNSPEC_REDUC_SUM_UNORDERED,
+				  UNSPEC_REDUC_SUM_UNORDERED_VL0_SAFE,
                                   riscv_vector::REDUCE_OP_FRM_DYN,
                                   operands, CONST0_RTX (<VEL>mode));
   DONE;
@@ -2229,7 +2338,9 @@
   REAL_VALUE_TYPE rv;
   real_inf (&rv, true);
   rtx f = const_double_from_real_value (rv, <VEL>mode);
-  riscv_vector::expand_reduction (UNSPEC_REDUC_MAX, riscv_vector::REDUCE_OP,
+  riscv_vector::expand_reduction (UNSPEC_REDUC_MAX,
+				  UNSPEC_REDUC_MAX_VL0_SAFE,
+				  riscv_vector::REDUCE_OP,
                                   operands, f);
   DONE;
 })
@@ -2242,7 +2353,9 @@
   REAL_VALUE_TYPE rv;
   real_inf (&rv, false);
   rtx f = const_double_from_real_value (rv, <VEL>mode);
-  riscv_vector::expand_reduction (UNSPEC_REDUC_MIN, riscv_vector::REDUCE_OP,
+  riscv_vector::expand_reduction (UNSPEC_REDUC_MIN,
+				  UNSPEC_REDUC_MIN_VL0_SAFE,
+				  riscv_vector::REDUCE_OP,
                                   operands, f);
   DONE;
 })
@@ -2255,7 +2368,9 @@
   REAL_VALUE_TYPE rv;
   real_inf (&rv, true);
   rtx f = const_double_from_real_value (rv, <VEL>mode);
-  riscv_vector::expand_reduction (UNSPEC_REDUC_MAX, riscv_vector::REDUCE_OP,
+  riscv_vector::expand_reduction (UNSPEC_REDUC_MAX,
+				  UNSPEC_REDUC_MAX_VL0_SAFE,
+				  riscv_vector::REDUCE_OP,
                                   operands, f);
   DONE;
 })
@@ -2268,7 +2383,9 @@
   REAL_VALUE_TYPE rv;
   real_inf (&rv, false);
   rtx f = const_double_from_real_value (rv, <VEL>mode);
-  riscv_vector::expand_reduction (UNSPEC_REDUC_MIN, riscv_vector::REDUCE_OP,
+  riscv_vector::expand_reduction (UNSPEC_REDUC_MIN,
+				  UNSPEC_REDUC_MIN_VL0_SAFE,
+				  riscv_vector::REDUCE_OP,
                                   operands, f);
   DONE;
 })
@@ -2294,6 +2411,7 @@
 {
   rtx ops[] = {operands[0], operands[2]};
   riscv_vector::expand_reduction (UNSPEC_REDUC_SUM_ORDERED,
+				  UNSPEC_REDUC_SUM_ORDERED_VL0_SAFE,
                                   riscv_vector::REDUCE_OP_FRM_DYN,
                                   ops, operands[1]);
   DONE;
@@ -2321,6 +2439,7 @@
     {
       rtx ops[] = {operands[0], operands[2], operands[3], operands[4]};
       riscv_vector::expand_reduction (UNSPEC_REDUC_SUM_ORDERED,
+				      UNSPEC_REDUC_SUM_ORDERED_VL0_SAFE,
                                       riscv_vector::REDUCE_OP_M_FRM_DYN,
                                       ops, operands[1]);
     }
@@ -2372,19 +2491,13 @@
       (sign_extend:VWEXTI
        (match_operand:<V_DOUBLE_TRUNC> 2 "register_operand"))))))]
   "TARGET_VECTOR"
-{
-  /* First emit a widening addition.  */
-  rtx tmp1 = gen_reg_rtx (<MODE>mode);
-  rtx ops1[] = {tmp1, operands[1], operands[2]};
-  insn_code icode = code_for_pred_dual_widen (PLUS, SIGN_EXTEND, <MODE>mode);
-  riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops1);
-
-  /* Then a narrowing shift.  */
-  rtx ops2[] = {operands[0], tmp1, const1_rtx};
-  icode = code_for_pred_narrow_scalar (ASHIFTRT, <MODE>mode);
-  riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops2);
-  DONE;
-})
+  {
+    insn_code icode = code_for_pred (UNSPEC_VAADD, <V_DOUBLE_TRUNC>mode);
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP_VXRM_RDN,
+				   operands);
+    DONE;
+  }
+)
 
 (define_expand "avg<v_double_trunc>3_ceil"
  [(set (match_operand:<V_DOUBLE_TRUNC> 0 "register_operand")
@@ -2398,25 +2511,13 @@
 	(match_operand:<V_DOUBLE_TRUNC> 2 "register_operand")))
       (const_int 1)))))]
   "TARGET_VECTOR"
-{
-  /* First emit a widening addition.  */
-  rtx tmp1 = gen_reg_rtx (<MODE>mode);
-  rtx ops1[] = {tmp1, operands[1], operands[2]};
-  insn_code icode = code_for_pred_dual_widen (PLUS, SIGN_EXTEND, <MODE>mode);
-  riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops1);
-
-  /* Then add 1.  */
-  rtx tmp2 = gen_reg_rtx (<MODE>mode);
-  rtx ops2[] = {tmp2, tmp1, const1_rtx};
-  icode = code_for_pred_scalar (PLUS, <MODE>mode);
-  riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops2);
-
-  /* Finally, a narrowing shift.  */
-  rtx ops3[] = {operands[0], tmp2, const1_rtx};
-  icode = code_for_pred_narrow_scalar (ASHIFTRT, <MODE>mode);
-  riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops3);
-  DONE;
-})
+  {
+    insn_code icode = code_for_pred (UNSPEC_VAADD, <V_DOUBLE_TRUNC>mode);
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP_VXRM_RNU,
+				   operands);
+    DONE;
+  }
+)
 
 ;; csrwi vxrm, 2
 ;; vaaddu.vv vd, vs2, vs1
@@ -2427,7 +2528,8 @@
   "TARGET_VECTOR"
 {
   insn_code icode = code_for_pred (UNSPEC_VAADDU, <MODE>mode);
-  riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP_VXRM_RDN, operands);
+  riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP_VXRM_RDN,
+				 operands);
   DONE;
 })
 
@@ -2440,7 +2542,8 @@
   "TARGET_VECTOR"
 {
   insn_code icode = code_for_pred (UNSPEC_VAADDU, <MODE>mode);
-  riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP_VXRM_RNU, operands);
+  riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP_VXRM_RNU,
+				 operands);
   DONE;
 })
 
@@ -2465,7 +2568,8 @@
    (match_operand:V_VLSF 1 "register_operand")]
   "TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
   {
-    riscv_vector::expand_vec_ceil (operands[0], operands[1], <MODE>mode, <VCONVERT>mode);
+    riscv_vector::expand_vec_ceil (operands[0], operands[1], <MODE>mode,
+				   <VCONVERT>mode);
     DONE;
   }
 )
@@ -2475,7 +2579,8 @@
    (match_operand:V_VLSF 1 "register_operand")]
   "TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
   {
-    riscv_vector::expand_vec_floor (operands[0], operands[1], <MODE>mode, <VCONVERT>mode);
+    riscv_vector::expand_vec_floor (operands[0], operands[1], <MODE>mode,
+				    <VCONVERT>mode);
     DONE;
   }
 )
@@ -2485,7 +2590,8 @@
    (match_operand:V_VLSF 1 "register_operand")]
   "TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
   {
-    riscv_vector::expand_vec_nearbyint (operands[0], operands[1], <MODE>mode, <VCONVERT>mode);
+    riscv_vector::expand_vec_nearbyint (operands[0], operands[1], <MODE>mode,
+					<VCONVERT>mode);
     DONE;
   }
 )
@@ -2495,7 +2601,8 @@
    (match_operand:V_VLSF 1 "register_operand")]
   "TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
   {
-    riscv_vector::expand_vec_rint (operands[0], operands[1], <MODE>mode, <VCONVERT>mode);
+    riscv_vector::expand_vec_rint (operands[0], operands[1], <MODE>mode,
+				   <VCONVERT>mode);
     DONE;
   }
 )
@@ -2505,7 +2612,8 @@
    (match_operand:V_VLSF 1 "register_operand")]
   "TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
   {
-    riscv_vector::expand_vec_round (operands[0], operands[1], <MODE>mode, <VCONVERT>mode);
+    riscv_vector::expand_vec_round (operands[0], operands[1], <MODE>mode,
+				    <VCONVERT>mode);
     DONE;
   }
 )
@@ -2515,7 +2623,8 @@
    (match_operand:V_VLSF 1 "register_operand")]
   "TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
   {
-    riscv_vector::expand_vec_trunc (operands[0], operands[1], <MODE>mode, <VCONVERT>mode);
+    riscv_vector::expand_vec_trunc (operands[0], operands[1], <MODE>mode,
+				    <VCONVERT>mode);
     DONE;
   }
 )
@@ -2525,7 +2634,8 @@
    (match_operand:V_VLSF 1 "register_operand")]
   "TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
   {
-    riscv_vector::expand_vec_roundeven (operands[0], operands[1], <MODE>mode, <VCONVERT>mode);
+    riscv_vector::expand_vec_roundeven (operands[0], operands[1], <MODE>mode,
+					<VCONVERT>mode);
     DONE;
   }
 )
@@ -2582,7 +2692,8 @@
    (match_operand:V_VLS_F_CONVERT_SI 1 "register_operand")]
   "TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
   {
-    riscv_vector::expand_vec_lceil (operands[0], operands[1], <MODE>mode, <V_F2SI_CONVERT>mode);
+    riscv_vector::expand_vec_lceil (operands[0], operands[1], <MODE>mode,
+				    <V_F2SI_CONVERT>mode);
     DONE;
   }
 )
@@ -2592,7 +2703,8 @@
    (match_operand:V_VLS_F_CONVERT_DI 1 "register_operand")]
   "TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
   {
-    riscv_vector::expand_vec_lceil (operands[0], operands[1], <MODE>mode, <V_F2DI_CONVERT>mode);
+    riscv_vector::expand_vec_lceil (operands[0], operands[1], <MODE>mode,
+				    <V_F2DI_CONVERT>mode);
     DONE;
   }
 )
@@ -2602,7 +2714,8 @@
    (match_operand:V_VLS_F_CONVERT_SI 1 "register_operand")]
   "TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
   {
-    riscv_vector::expand_vec_lfloor (operands[0], operands[1], <MODE>mode, <V_F2SI_CONVERT>mode);
+    riscv_vector::expand_vec_lfloor (operands[0], operands[1], <MODE>mode,
+				     <V_F2SI_CONVERT>mode);
     DONE;
   }
 )
@@ -2612,7 +2725,8 @@
    (match_operand:V_VLS_F_CONVERT_DI 1 "register_operand")]
   "TARGET_VECTOR && !flag_trapping_math && !flag_rounding_math"
   {
-    riscv_vector::expand_vec_lfloor (operands[0], operands[1], <MODE>mode, <V_F2DI_CONVERT>mode);
+    riscv_vector::expand_vec_lfloor (operands[0], operands[1], <MODE>mode,
+				     <V_F2DI_CONVERT>mode);
     DONE;
   }
 )
@@ -2644,7 +2758,20 @@
    (match_operand:V_VLSI 2 "register_operand")]
   "TARGET_VECTOR"
   {
-    riscv_vector::expand_vec_usadd (operands[0], operands[1], operands[2], <MODE>mode);
+    riscv_vector::expand_vec_usadd (operands[0], operands[1], operands[2],
+				    <MODE>mode);
+    DONE;
+  }
+)
+
+(define_expand "ssadd<mode>3"
+  [(match_operand:V_VLSI 0 "register_operand")
+   (match_operand:V_VLSI 1 "register_operand")
+   (match_operand:V_VLSI 2 "register_operand")]
+  "TARGET_VECTOR"
+  {
+    riscv_vector::expand_vec_ssadd (operands[0], operands[1], operands[2],
+				    <MODE>mode);
     DONE;
   }
 )
@@ -2655,7 +2782,20 @@
    (match_operand:V_VLSI 2 "register_operand")]
   "TARGET_VECTOR"
   {
-    riscv_vector::expand_vec_ussub (operands[0], operands[1], operands[2], <MODE>mode);
+    riscv_vector::expand_vec_ussub (operands[0], operands[1], operands[2],
+				    <MODE>mode);
+    DONE;
+  }
+)
+
+(define_expand "sssub<mode>3"
+  [(match_operand:V_VLSI 0 "register_operand")
+   (match_operand:V_VLSI 1 "register_operand")
+   (match_operand:V_VLSI 2 "register_operand")]
+  "TARGET_VECTOR"
+  {
+    riscv_vector::expand_vec_sssub (operands[0], operands[1], operands[2],
+				    <MODE>mode);
     DONE;
   }
 )
@@ -2684,7 +2824,7 @@
 
 (define_expand "ustrunc<mode><v_oct_trunc>2"
   [(match_operand:<V_OCT_TRUNC> 0 "register_operand")
-   (match_operand:VOEXTI         1 "register_operand")]
+   (match_operand:VOEXTI        1 "register_operand")]
   "TARGET_VECTOR"
   {
     riscv_vector::expand_vec_oct_ustrunc (operands[0], operands[1], <MODE>mode,
@@ -2694,6 +2834,40 @@
   }
 )
 
+(define_expand "sstrunc<mode><v_double_trunc>2"
+  [(match_operand:<V_DOUBLE_TRUNC> 0 "register_operand")
+   (match_operand:VWEXTI           1 "register_operand")]
+  "TARGET_VECTOR"
+  {
+    riscv_vector::expand_vec_double_sstrunc (operands[0], operands[1],
+					  <MODE>mode);
+    DONE;
+  }
+)
+
+(define_expand "sstrunc<mode><v_quad_trunc>2"
+  [(match_operand:<V_QUAD_TRUNC> 0 "register_operand")
+   (match_operand:VQEXTI         1 "register_operand")]
+  "TARGET_VECTOR"
+  {
+    riscv_vector::expand_vec_quad_sstrunc (operands[0], operands[1], <MODE>mode,
+					   <V_DOUBLE_TRUNC>mode);
+    DONE;
+  }
+)
+
+(define_expand "sstrunc<mode><v_oct_trunc>2"
+  [(match_operand:<V_OCT_TRUNC> 0 "register_operand")
+   (match_operand:VOEXTI        1 "register_operand")]
+  "TARGET_VECTOR"
+  {
+    riscv_vector::expand_vec_oct_sstrunc (operands[0], operands[1], <MODE>mode,
+					  <V_DOUBLE_TRUNC>mode,
+					  <V_QUAD_TRUNC>mode);
+    DONE;
+  }
+)
+
 ;; =========================================================================
 ;; == Early break auto-vectorization patterns
 ;; =========================================================================
@@ -2770,3 +2944,60 @@
     DONE;
   }
 )
+
+;; =========================================================================
+;; == Strided Load/Store
+;; =========================================================================
+(define_expand "mask_len_strided_load_<mode>"
+  [(match_operand:V_VLS 0 "register_operand")
+   (match_operand       1 "pmode_reg_or_0_operand")
+   (match_operand       2 "pmode_reg_or_0_operand")
+   (match_operand:<VM>  3 "vector_mask_operand")
+   (match_operand       4 "maskload_else_operand")
+   (match_operand       5 "autovec_length_operand")
+   (match_operand       6 "const_0_operand")]
+  "TARGET_VECTOR"
+  {
+    riscv_vector::expand_strided_load (<MODE>mode, operands);
+    DONE;
+  })
+
+(define_expand "mask_len_strided_store_<mode>"
+  [(match_operand       0 "pmode_reg_or_0_operand")
+   (match_operand       1 "pmode_reg_or_0_operand")
+   (match_operand:V_VLS 2 "register_operand")
+   (match_operand:<VM>  3 "vector_mask_operand")
+   (match_operand       4 "autovec_length_operand")
+   (match_operand       5 "const_0_operand")]
+  "TARGET_VECTOR"
+  {
+    riscv_vector::expand_strided_store (<MODE>mode, operands);
+    DONE;
+  })
+
+; ========
+; == Absolute difference (not including sum)
+; ========
+(define_expand "uabd<mode>3"
+  [(match_operand:V_VLSI 0 "register_operand")
+   (match_operand:V_VLSI 1 "register_operand")
+   (match_operand:V_VLSI 2 "register_operand")]
+  ;; Disabled until PR119224 is resolved
+  "TARGET_VECTOR && 0"
+  {
+    rtx max = gen_reg_rtx (<MODE>mode);
+    insn_code icode = code_for_pred (UMAX, <MODE>mode);
+    rtx ops1[] = {max, operands[1], operands[2]};
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops1);
+
+    rtx min = gen_reg_rtx (<MODE>mode);
+    icode = code_for_pred (UMIN, <MODE>mode);
+    rtx ops2[] = {min, operands[1], operands[2]};
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops2);
+
+    icode = code_for_pred (MINUS, <MODE>mode);
+    rtx ops3[] = {operands[0], max, min};
+    riscv_vector::emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops3);
+
+    DONE;
+  });
diff --git a/gcc/config/riscv/bitmanip.md b/gcc/config/riscv/bitmanip.md
index 06ff698..21426f4 100644
--- a/gcc/config/riscv/bitmanip.md
+++ b/gcc/config/riscv/bitmanip.md
@@ -1,5 +1,5 @@
-;; Machine description for RISC-V Bit Manipulation operations.
-;; Copyright (C) 2021-2024 Free Software Foundation, Inc.
+;); Machine description for RISC-V Bit Manipulation operations.
+;; Copyright (C) 2021-2025 Free Software Foundation, Inc.
 
 ;; This file is part of GCC.
 
@@ -68,21 +68,25 @@
   [(set (match_operand:DI 0 "register_operand")
 	(zero_extend:DI (plus:SI (ashift:SI (subreg:SI (match_operand:DI 1 "register_operand") 0)
 						       (match_operand:QI 2 "imm123_operand"))
-				 (subreg:SI (match_operand:DI 3 "register_operand") 0))))]
+				 (subreg:SI (match_operand:DI 3 "register_operand") 0))))
+   (clobber (match_operand:DI 4 "register_operand"))]
   "TARGET_64BIT && TARGET_ZBA"
-  [(set (match_dup 0) (plus:DI (ashift:DI (match_dup 1) (match_dup 2)) (match_dup 3)))
-   (set (match_dup 0) (zero_extend:DI (subreg:SI (match_dup 0) 0)))])
+  [(set (match_dup 4) (plus:DI (ashift:DI (match_dup 1) (match_dup 2)) (match_dup 3)))
+   (set (match_dup 0) (zero_extend:DI (subreg:SI (match_dup 4) 0)))])
 
 (define_split
   [(set (match_operand:DI 0 "register_operand")
 	(zero_extend:DI (plus:SI (subreg:SI (and:DI (ashift:DI (match_operand:DI 1 "register_operand")
 							       (match_operand:QI 2 "imm123_operand"))
 						    (match_operand:DI 3 "consecutive_bits_operand")) 0)
-				 (subreg:SI (match_operand:DI 4 "register_operand") 0))))]
+				 (subreg:SI (match_operand:DI 4 "register_operand") 0))))
+   (clobber (match_operand:DI 5 "register_operand"))]
   "TARGET_64BIT && TARGET_ZBA
-   && riscv_shamt_matches_mask_p (INTVAL (operands[2]), INTVAL (operands[3]))"
-  [(set (match_dup 0) (plus:DI (ashift:DI (match_dup 1) (match_dup 2)) (match_dup 4)))
-   (set (match_dup 0) (zero_extend:DI (subreg:SI (match_dup 0) 0)))])
+   && riscv_shamt_matches_mask_p (INTVAL (operands[2]), INTVAL (operands[3]))
+   /* Ensure the mask includes all the bits in SImode.  */
+   && ((INTVAL (operands[3]) & (HOST_WIDE_INT_1U << 31)) != 0)"
+  [(set (match_dup 5) (plus:DI (ashift:DI (match_dup 1) (match_dup 2)) (match_dup 4)))
+   (set (match_dup 0) (zero_extend:DI (subreg:SI (match_dup 5) 0)))])
 
 ; Make sure that an andi followed by a sh[123]add remains a two instruction
 ; sequence--and is not torn apart into slli, slri, add.
@@ -193,13 +197,14 @@
 					     (match_operand:QI 2 "imm123_operand"))
 				  (match_operand 3 "consecutive_bits32_operand"))
 			  (match_operand:DI 4 "register_operand"))
-		 (match_operand 5 "immediate_operand")))]
+		 (match_operand 5 "immediate_operand")))
+   (clobber (match_operand:DI 6 "register_operand"))]
   "TARGET_64BIT && TARGET_ZBA"
-  [(set (match_dup 0)
+  [(set (match_dup 6)
 	(plus:DI (and:DI (ashift:DI (match_dup 1) (match_dup 2))
 			 (match_dup 3))
 		 (match_dup 4)))
-   (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 5)))])
+   (set (match_dup 0) (plus:DI (match_dup 6) (match_dup 5)))])
 
 ;; ZBB extension.
 
@@ -421,39 +426,40 @@
   "rolw\t%0,%1,%2"
   [(set_attr "type" "bitmanip")])
 
-(define_insn_and_split "*<bitmanip_optab><GPR:mode>3_mask"
-  [(set (match_operand:GPR     0 "register_operand" "= r")
-        (bitmanip_rotate:GPR
-            (match_operand:GPR 1 "register_operand" "  r")
-            (match_operator 4 "subreg_lowpart_operator"
-             [(and:GPR2
-               (match_operand:GPR2 2 "register_operand"  "r")
-               (match_operand 3 "<GPR:shiftm1>" "<GPR:shiftm1p>"))])))]
+(define_insn "*<bitmanip_optab><mode>3_mask"
+  [(set (match_operand:X 0 "register_operand" "=r")
+	(bitmanip_rotate:X
+	  (match_operand:X 1 "register_operand" "r")
+	  (match_operator 4 "subreg_lowpart_operator"
+	    [(and:X (match_operand:X 2 "register_operand"  "r")
+		    (match_operand 3 "<X:shiftm1>" "<X:shiftm1p>"))])))]
   "TARGET_ZBB || TARGET_ZBKB"
-  "#"
-  "&& 1"
-  [(set (match_dup 0)
-        (bitmanip_rotate:GPR (match_dup 1)
-                             (match_dup 2)))]
-  "operands[2] = gen_lowpart (QImode, operands[2]);"
+  "<bitmanip_insn>\t%0,%1,%2"
   [(set_attr "type" "bitmanip")
-   (set_attr "mode" "<GPR:MODE>")])
+   (set_attr "mode" "<X:MODE>")])
 
-(define_insn_and_split "*<bitmanip_optab>si3_sext_mask"
-  [(set (match_operand:DI     0 "register_operand" "= r")
-  (sign_extend:DI (bitmanip_rotate:SI
-            (match_operand:SI 1 "register_operand" "  r")
-            (match_operator 4 "subreg_lowpart_operator"
-             [(and:GPR
-               (match_operand:GPR 2 "register_operand"  "r")
-               (match_operand 3 "const_si_mask_operand"))]))))]
+(define_insn "*<bitmanip_optab>3_mask_si"
+  [(set (match_operand:SI 0 "register_operand" "=r")
+	(bitmanip_rotate:SI
+	  (match_operand:SI 1 "register_operand" "r")
+	  (match_operator 3 "subreg_lowpart_operator"
+	    [(and:X (match_operand:SI 2 "register_operand"  "r")
+		    (const_int 31))])))]
   "TARGET_64BIT && (TARGET_ZBB || TARGET_ZBKB)"
-  "#"
-  "&& 1"
-  [(set (match_dup 0)
-  (sign_extend:DI (bitmanip_rotate:SI (match_dup 1)
-                           (match_dup 2))))]
-  "operands[2] = gen_lowpart (QImode, operands[2]);"
+  "<bitmanip_insn>w\t%0,%1,%2"
+  [(set_attr "type" "bitmanip")
+   (set_attr "mode" "SI")])
+
+(define_insn "*<bitmanip_optab>si3_sext_mask"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(sign_extend:DI
+	  (bitmanip_rotate:SI
+	    (match_operand:SI 1 "register_operand" "r")
+	    (match_operator 3 "subreg_lowpart_operator"
+	      [(and:X (match_operand:GPR 2 "register_operand"  "r")
+		      (const_int 31))]))))]
+  "TARGET_64BIT && (TARGET_ZBB || TARGET_ZBKB)"
+  "<bitmanip_insn>w\t%0,%1,%2"
   [(set_attr "type" "bitmanip")
    (set_attr "mode" "DI")])
 
@@ -790,18 +796,6 @@
   "<bit_optab>i\t%0,%1,%S2"
   [(set_attr "type" "bitmanip")])
 
-;; As long as the SImode operand is not a partial subreg, we can use a
-;; bseti without postprocessing, as the middle end is smart enough to
-;; stay away from the signbit.
-(define_insn "*<bit_optab>idisi"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(any_or:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "r"))
-		   (match_operand 2 "single_bit_mask_operand" "i")))]
-  "TARGET_ZBS && TARGET_64BIT
-   && !partial_subreg_p (operands[1])"
-  "<bit_optab>i\t%0,%1,%S2"
-  [(set_attr "type" "bitmanip")])
-
 ;; We can easily handle zero extensions
 (define_split
   [(set (match_operand:DI 0 "register_operand")
@@ -852,44 +846,40 @@
   [(set_attr "type" "bitmanip")])
 
 ;; In case we have "val & ~IMM" where ~IMM has 2 bits set.
-(define_insn_and_split "*bclri<mode>_nottwobits"
-  [(set (match_operand:X 0 "register_operand" "=r")
-	(and:X (match_operand:X 1 "register_operand" "r")
-	       (match_operand:X 2 "const_nottwobits_not_arith_operand" "i")))]
+(define_split
+  [(set (match_operand:X 0 "register_operand")
+	(and:X (match_operand:X 1 "register_operand")
+	       (match_operand:X 2 "const_nottwobits_not_arith_operand")))
+   (clobber (match_operand:X 3 "register_operand"))]
   "TARGET_ZBS && !paradoxical_subreg_p (operands[1])"
-  "#"
-  "&& reload_completed"
-  [(set (match_dup 0) (and:X (match_dup 1) (match_dup 3)))
-   (set (match_dup 0) (and:X (match_dup 0) (match_dup 4)))]
+  [(set (match_dup 3) (and:X (match_dup 1) (match_dup 4)))
+   (set (match_dup 0) (and:X (match_dup 3) (match_dup 5)))]
 {
-	unsigned HOST_WIDE_INT bits = ~UINTVAL (operands[2]);
-	unsigned HOST_WIDE_INT topbit = HOST_WIDE_INT_1U << floor_log2 (bits);
+  unsigned HOST_WIDE_INT bits = ~UINTVAL (operands[2]);
+  unsigned HOST_WIDE_INT topbit = HOST_WIDE_INT_1U << floor_log2 (bits);
 
-	operands[3] = GEN_INT (~bits | topbit);
-	operands[4] = GEN_INT (~topbit);
-}
-[(set_attr "type" "bitmanip")])
+  operands[4] = GEN_INT (~bits | topbit);
+  operands[5] = GEN_INT (~topbit);
+})
 
 ;; In case of a paradoxical subreg, the sign bit and the high bits are
 ;; not allowed to be changed
-(define_insn_and_split "*bclridisi_nottwobits"
-  [(set (match_operand:DI 0 "register_operand" "=r")
-	(and:DI (match_operand:DI 1 "register_operand" "r")
-		(match_operand:DI 2 "const_nottwobits_not_arith_operand" "i")))]
+(define_split
+  [(set (match_operand:DI 0 "register_operand")
+	(and:DI (match_operand:DI 1 "register_operand")
+		(match_operand:DI 2 "const_nottwobits_not_arith_operand")))
+   (clobber (match_operand:DI 3 "register_operand"))]
   "TARGET_64BIT && TARGET_ZBS
    && clz_hwi (~UINTVAL (operands[2])) > 33"
-  "#"
-  "&& reload_completed"
-  [(set (match_dup 0) (and:DI (match_dup 1) (match_dup 3)))
-   (set (match_dup 0) (and:DI (match_dup 0) (match_dup 4)))]
+  [(set (match_dup 3) (and:DI (match_dup 1) (match_dup 4)))
+   (set (match_dup 0) (and:DI (match_dup 3) (match_dup 5)))]
 {
-	unsigned HOST_WIDE_INT bits = ~UINTVAL (operands[2]);
-	unsigned HOST_WIDE_INT topbit = HOST_WIDE_INT_1U << floor_log2 (bits);
+  unsigned HOST_WIDE_INT bits = ~UINTVAL (operands[2]);
+  unsigned HOST_WIDE_INT topbit = HOST_WIDE_INT_1U << floor_log2 (bits);
 
-	operands[3] = GEN_INT (~bits | topbit);
-	operands[4] = GEN_INT (~topbit);
-}
-[(set_attr "type" "bitmanip")])
+  operands[4] = GEN_INT (~bits | topbit);
+  operands[5] = GEN_INT (~topbit);
+})
 
 ;; An outer AND with a constant where bits 31..63 are 0 can be seen as
 ;; a virtual zero extension from 31 to 64 bits.
@@ -918,6 +908,24 @@
   "bext\t%0,%1,%2"
   [(set_attr "type" "bitmanip")])
 
+;; We do not define SHIFT_COUNT_TRUNCATED, so we have to have variants
+;; that mask/extend the count if we want to eliminate those ops
+;;      
+;; We could (in theory) use GPR for the various modes, but I haven't
+;; seen those cases appear in practice.  Without a testcase I've
+;; elected to keep the modes X which is easy to reason about.
+(define_insn "*bext<mode>_mask_pos"
+  [(set (match_operand:X 0 "register_operand" "=r")
+	(zero_extract:X (match_operand:X 1 "register_operand" "r")
+			(const_int 1)
+			(and:X
+			  (match_operand:X 2 "register_operand" "r")
+			  (match_operand 3 "const_int_operand"))))]
+  "(TARGET_ZBS
+    && INTVAL (operands[3]) + 1 == GET_MODE_BITSIZE (<MODE>mode))"
+  "bext\t%0,%1,%2"
+  [(set_attr "type" "bitmanip")])
+
 ;; This is a bext followed by a seqz.  Normally this would be a 3->2 split
 ;; But the and-not pattern with a constant operand is a define_insn_and_split,
 ;; so this looks like a 2->2 split, which combine rejects.  So implement it
@@ -1002,12 +1010,13 @@
   [(set (match_operand:X 0 "register_operand")
 	(and:X (not:X (lshiftrt:X (match_operand:X 1 "register_operand")
 				  (match_operand:QI 2 "register_operand")))
-	       (const_int 1)))]
+	       (const_int 1)))
+   (clobber (match_operand:X 3 "register_operand"))]
   "TARGET_ZBS"
-  [(set (match_dup 0) (zero_extract:X (match_dup 1)
+  [(set (match_dup 3) (zero_extract:X (match_dup 1)
 				      (const_int 1)
 				      (match_dup 2)))
-   (set (match_dup 0) (xor:X (match_dup 0) (const_int 1)))]
+   (set (match_dup 0) (xor:X (match_dup 3) (const_int 1)))]
   "operands[2] = gen_lowpart (<MODE>mode, operands[2]);")
 
 ;; We can create a polarity-reversed mask (i.e. bit N -> { set = 0, clear = -1 })
@@ -1018,49 +1027,49 @@
        (neg:GPR (eq:GPR (zero_extract:GPR (match_operand:GPR 1 "register_operand")
                                           (const_int 1)
                                           (match_operand 2))
-                        (const_int 0))))]
+			(const_int 0))))
+   (clobber (match_operand:X 3 "register_operand"))]
   "TARGET_ZBS"
-  [(set (match_dup 0) (zero_extract:GPR (match_dup 1) (const_int 1) (match_dup 2)))
-   (set (match_dup 0) (plus:GPR (match_dup 0) (const_int -1)))])
+  [(set (match_dup 3) (zero_extract:GPR (match_dup 1) (const_int 1) (match_dup 2)))
+   (set (match_dup 0) (plus:GPR (match_dup 3) (const_int -1)))])
 
 ;; Catch those cases where we can use a bseti/binvi + ori/xori or
 ;; bseti/binvi + bseti/binvi instead of a lui + addi + or/xor sequence.
 (define_insn_and_split "*<or_optab>i<mode>_extrabit"
   [(set (match_operand:X 0 "register_operand" "=r")
 	(any_or:X (match_operand:X 1 "register_operand" "r")
-	          (match_operand:X 2 "uimm_extra_bit_or_twobits" "i")))]
-  "TARGET_ZBS"
+		  (match_operand:X 2 "uimm_extra_bit_or_twobits" "i")))
+   (clobber (match_scratch:X 3 "=&r"))]
+  "TARGET_ZBS && !single_bit_mask_operand (operands[2], VOIDmode)"
   "#"
   "&& reload_completed"
-  [(set (match_dup 0) (<or_optab>:X (match_dup 1) (match_dup 3)))
-   (set (match_dup 0) (<or_optab>:X (match_dup 0) (match_dup 4)))]
+  [(set (match_dup 3) (<or_optab>:X (match_dup 1) (match_dup 4)))
+   (set (match_dup 0) (<or_optab>:X (match_dup 3) (match_dup 5)))]
 {
-	unsigned HOST_WIDE_INT bits = UINTVAL (operands[2]);
-	unsigned HOST_WIDE_INT topbit = HOST_WIDE_INT_1U << floor_log2 (bits);
+  unsigned HOST_WIDE_INT bits = UINTVAL (operands[2]);
+  unsigned HOST_WIDE_INT topbit = HOST_WIDE_INT_1U << floor_log2 (bits);
 
-	operands[3] = GEN_INT (bits &~ topbit);
-	operands[4] = GEN_INT (topbit);
+  operands[4] = GEN_INT (bits &~ topbit);
+  operands[5] = GEN_INT (topbit);
 }
 [(set_attr "type" "bitmanip")])
 
 ;; Same to use blcri + andi and blcri + bclri
-(define_insn_and_split "*andi<mode>_extrabit"
-  [(set (match_operand:X 0 "register_operand" "=r")
-	(and:X (match_operand:X 1 "register_operand" "r")
-	       (match_operand:X 2 "not_uimm_extra_bit_or_nottwobits" "i")))]
-  "TARGET_ZBS"
-  "#"
-  "&& reload_completed"
-  [(set (match_dup 0) (and:X (match_dup 1) (match_dup 3)))
-   (set (match_dup 0) (and:X (match_dup 0) (match_dup 4)))]
+(define_split
+  [(set (match_operand:X 0 "register_operand")
+	(and:X (match_operand:X 1 "register_operand")
+	       (match_operand:X 2 "not_uimm_extra_bit_or_nottwobits")))
+   (clobber (match_operand:X 3 "register_operand"))]
+  "TARGET_ZBS && !not_single_bit_mask_operand (operands[2], VOIDmode)"
+  [(set (match_dup 3) (and:X (match_dup 1) (match_dup 4)))
+   (set (match_dup 0) (and:X (match_dup 3) (match_dup 5)))]
 {
-	unsigned HOST_WIDE_INT bits = UINTVAL (operands[2]);
-	unsigned HOST_WIDE_INT topbit = HOST_WIDE_INT_1U << floor_log2 (~bits);
+  unsigned HOST_WIDE_INT bits = UINTVAL (operands[2]);
+  unsigned HOST_WIDE_INT topbit = HOST_WIDE_INT_1U << floor_log2 (~bits);
 
-	operands[3] = GEN_INT (bits | topbit);
-	operands[4] = GEN_INT (~topbit);
-}
-[(set_attr "type" "bitmanip")])
+  operands[4] = GEN_INT (bits | topbit);
+  operands[5] = GEN_INT (~topbit);
+})
 
 ;; If we have the ZBA extension, then we can clear the upper half of a 64
 ;; bit object with a zext.w.  So if we have AND where the constant would
@@ -1192,3 +1201,153 @@
   "TARGET_ZBC"
   "clmulr\t%0,%1,%2"
   [(set_attr "type" "clmul")])
+
+;; Reversed CRC 8, 16, 32 for TARGET_64
+(define_expand "crc_rev<ANYI1:mode><ANYI:mode>4"
+	;; return value (calculated CRC)
+  [(set (match_operand:ANYI 0 "register_operand" "=r")
+		      ;; initial CRC
+	(unspec:ANYI [(match_operand:ANYI 1 "register_operand" "r")
+		      ;; data
+		      (match_operand:ANYI1 2 "register_operand" "r")
+		      ;; polynomial without leading 1
+		      (match_operand:ANYI 3)]
+		      UNSPEC_CRC_REV))]
+  /* We don't support the case when data's size is bigger than CRC's size.  */
+  "<ANYI:MODE>mode >= <ANYI1:MODE>mode"
+{
+  /* If we have the ZBC or ZBKC extension (ie, clmul) and
+     it is possible to store the quotient within a single variable
+     (E.g.  CRC64's quotient may need 65 bits,
+     we can't keep it in 64 bit variable.)
+     then use clmul instruction to implement the CRC,
+     otherwise (TARGET_ZBKB) generate table based using brev.  */
+  if ((TARGET_ZBKC || TARGET_ZBC || TARGET_ZVBC) && <ANYI:MODE>mode < word_mode)
+    expand_reversed_crc_using_clmul (<ANYI:MODE>mode, <ANYI1:MODE>mode,
+				     operands);
+  else if (TARGET_ZBKB)
+    /* Generate table-based CRC.
+       To reflect values use brev and bswap instructions.  */
+    expand_reversed_crc_table_based (operands[0], operands[1],
+				     operands[2], operands[3],
+				     GET_MODE (operands[2]),
+				     generate_reflecting_code_using_brev);
+  else
+    /* Generate table-based CRC.
+       To reflect values use standard reflecting algorithm.  */
+    expand_reversed_crc_table_based (operands[0], operands[1],
+				     operands[2], operands[3],
+				     GET_MODE (operands[2]),
+				     generate_reflecting_code_standard);
+  DONE;
+})
+
+;; CRC 8, 16, (32 for TARGET_64)
+(define_expand "crc<SUBX1:mode><SUBX:mode>4"
+	;; return value (calculated CRC)
+  [(set (match_operand:SUBX 0 "register_operand" "=r")
+		      ;; initial CRC
+	(unspec:SUBX [(match_operand:SUBX 1 "register_operand" "r")
+		      ;; data
+		      (match_operand:SUBX1 2 "register_operand" "r")
+		      ;; polynomial without leading 1
+		      (match_operand:SUBX 3)]
+		      UNSPEC_CRC))]
+  /* We don't support the case when data's size is bigger than CRC's size.  */
+  "(TARGET_ZBKC || TARGET_ZBC || TARGET_ZVBC)
+   && <SUBX:MODE>mode >= <SUBX1:MODE>mode"
+{
+  /* If we have the ZBC or ZBKC extension (ie, clmul) and
+     it is possible to store the quotient within a single variable
+     (E.g.  CRC64's quotient may need 65 bits,
+     we can't keep it in 64 bit variable.)
+     then use clmul instruction to implement the CRC.  */
+  expand_crc_using_clmul (<SUBX:MODE>mode, <SUBX1:MODE>mode, operands);
+  DONE;
+})
+
+;; If we have an XOR/IOR with a constant operand (C) and the we can
+;; synthesize ~C more efficiently than C, then synthesize ~C and use
+;; xnor/orn instead.
+;;
+;; The same can be done for AND, but mvconst_internal's issues get in
+;; the way.  That's future work.
+(define_split
+  [(set (match_operand:X 0 "register_operand")
+	(any_or:X (match_operand:X 1 "register_operand")
+		  (match_operand:X 2 "const_int_operand")))
+   (clobber (match_operand:X 3 "register_operand"))]
+  "TARGET_ZBB
+   && (riscv_const_insns (operands[2], true)
+       > riscv_const_insns (GEN_INT (~INTVAL (operands[2])), true))"
+  [(const_int 0)]
+{
+  /* Get the inverted constant into the temporary register.  */
+  riscv_emit_move (operands[3], GEN_INT (~INTVAL (operands[2])));
+
+  /* For xnor, the NOT operation is in a different position.  So
+     we have to customize the split code we generate a bit.
+
+     It is expected that AND will be handled like IOR in the future.  */
+  if (<CODE> == XOR)
+    {
+      rtx x = gen_rtx_XOR (<X:MODE>mode, operands[1], operands[3]);
+      x = gen_rtx_NOT (<X:MODE>mode, x);
+      emit_insn (gen_rtx_SET (operands[0], x));
+    }
+  else
+    {
+      rtx x = gen_rtx_NOT (<X:MODE>mode, operands[3]);
+      x = gen_rtx_IOR (<X:MODE>mode, x, operands[1]);
+      emit_insn (gen_rtx_SET (operands[0], x));
+    }
+  DONE;
+})
+
+;; More forms of single bit extraction.  The RISC-V port does not
+;; define SHIFT_COUNT_TRUNCATED so we need forms where the bit position
+;; is masked.
+;;
+;; We could in theory use this for rv32 as well, but it probably does
+;; not occur in practice.  The bit position would need to be QI/HI mode,
+;; otherwise we would not need the zero extension.
+;;
+;; One could also argue that the zero extension is redundant and should
+;; have been optimized away during RTL simplification.
+(define_insn "*bextdi_position_ze_masked"
+  [(set (match_operand:DI 0 "register_operand" "=r")
+	(zero_extract:DI (match_operand:DI 1 "register_operand" "r")
+			 (const_int 1)
+			 (zero_extend:DI
+			  (and:SI (match_operand:SI 2 "register_operand" "r")
+				  (const_int 63)))))]
+  "TARGET_64BIT && TARGET_ZBS"
+  "bext\t%0,%1,%2"
+  [(set_attr "type" "bitmanip")])
+
+;; Same as above, but without the extraneous zero_extend.
+(define_insn "*bextdi_position_ze_masked"
+  [(set (match_operand:X 0 "register_operand" "=r")
+	(zero_extract:X
+	  (match_operand:X 1 "register_operand" "r")
+	  (const_int 1)
+	  (and:X (match_operand:SI 2 "register_operand" "r")
+		 (match_operand:SI 3 "bitpos_mask_operand" "n"))))]
+  "TARGET_64BIT && TARGET_ZBS"
+  "bext\t%0,%1,%2"
+  [(set_attr "type" "bitmanip")])
+
+;; This has shown up in testing.  In particular we end up with an
+;; immediate input.  We can load that into a register and target
+;; one of the above bext patterns.
+(define_split
+  [(set (match_operand:X 0 "register_operand")
+	(and:X (lshiftrt:X (match_operand 1 "immediate_operand")
+			   (match_operand:QI 2 "register_operand"))
+	       (const_int 1)))
+   (clobber (match_operand:X 3 "register_operand"))]
+  ""
+  [(set (match_dup 3) (match_dup 1))
+   (set (match_dup 0) (zero_extract:X (match_dup 3)
+				      (const_int 1)
+				      (zero_extend:X (match_dup 2))))])
diff --git a/gcc/config/riscv/constraints.md b/gcc/config/riscv/constraints.md
index 3ab6d54..5ecaa19 100644
--- a/gcc/config/riscv/constraints.md
+++ b/gcc/config/riscv/constraints.md
@@ -1,5 +1,5 @@
 ;; Constraint definitions for RISC-V target.
-;; Copyright (C) 2011-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2011-2025 Free Software Foundation, Inc.
 ;; Contributed by Andrew Waterman (andrew@sifive.com).
 ;; Based on MIPS target for GNU compiler.
 ;;
@@ -28,11 +28,25 @@
 (define_register_constraint "j" "SIBCALL_REGS"
   "@internal")
 
+(define_register_constraint "R" "GR_REGS"
+  "Even-odd general purpose register pair."
+  "regno % 2 == 0")
+
 ;; Avoid using register t0 for JALR's argument, because for some
 ;; microarchitectures that is a return-address stack hint.
 (define_register_constraint "l" "JALR_REGS"
   "@internal")
 
+(define_register_constraint "cr" "RVC_GR_REGS"
+  "RVC general purpose register (x8-x15).")
+
+(define_register_constraint "cf" "TARGET_HARD_FLOAT ? RVC_FP_REGS : (TARGET_ZFINX ? RVC_GR_REGS : NO_REGS)"
+  "RVC floating-point registers (f8-f15), if available, reuse GPR as FPR when use zfinx.")
+
+(define_register_constraint "cR" "RVC_GR_REGS"
+  "Even-odd RVC general purpose register (x8-x15)."
+  "regno % 2 == 0")
+
 ;; General constraints
 
 (define_constraint "I"
@@ -45,31 +59,36 @@
   (and (match_code "const_int")
        (match_test "ival == 0")))
 
-(define_constraint "c01"
+(define_constraint "k01"
   "Constant value 1."
   (and (match_code "const_int")
        (match_test "ival == 1")))
 
-(define_constraint "c02"
+(define_constraint "k02"
   "Constant value 2"
   (and (match_code "const_int")
        (match_test "ival == 2")))
 
-(define_constraint "c03"
+(define_constraint "k03"
   "Constant value 3"
   (and (match_code "const_int")
        (match_test "ival == 3")))
 
-(define_constraint "c04"
+(define_constraint "k04"
   "Constant value 4"
   (and (match_code "const_int")
        (match_test "ival == 4")))
 
-(define_constraint "c08"
+(define_constraint "k08"
   "Constant value 8"
   (and (match_code "const_int")
        (match_test "ival == 8")))
 
+(define_constraint "P"
+  "A 5-bit signed immediate for vmv.v.i."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, -16, 15)")))
+
 (define_constraint "K"
   "A 5-bit unsigned immediate for CSR access instructions."
   (and (match_code "const_int")
@@ -194,6 +213,12 @@
   (and (match_code "const_vector")
        (match_test "riscv_vector::const_vec_all_same_in_range_p (op, 0, 31)")))
 
+(define_constraint "vl"
+  "A uimm5 for Vector or zero for XTheadVector."
+  (and (match_code "const_int")
+       (ior (match_test "!TARGET_XTHEADVECTOR && satisfies_constraint_K (op)")
+	    (match_test "TARGET_XTHEADVECTOR && satisfies_constraint_J (op)"))))
+
 (define_constraint "Wc0"
   "@internal
  A constraint that matches a vector of immediate all zeros."
@@ -212,10 +237,11 @@
  (and (match_code "const_vector")
       (match_test "rtx_equal_p (op, riscv_vector::gen_scalar_move_mask (GET_MODE (op)))")))
 
-(define_memory_constraint "Wdm"
+(define_constraint "Wdm"
   "Vector duplicate memory operand"
-  (and (match_code "mem")
-       (match_code "reg" "0")))
+  (and (match_test "strided_load_broadcast_p ()")
+       (and (match_code "mem")
+	    (match_code "reg" "0"))))
 
 ;; Vendor ISA extension constraints.
 
@@ -290,3 +316,17 @@
   "Shifting immediate for SIMD shufflei3."
   (and (match_code "const_int")
        (match_test "IN_RANGE (ival, -64, -1)")))
+
+(define_constraint "Ou01"
+  "A 1-bit unsigned immediate."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 1)")))
+
+(define_constraint "Ou02"
+  "A 2-bit unsigned immediate."
+  (and (match_code "const_int")
+       (match_test "IN_RANGE (ival, 0, 3)")))
+
+(define_constraint "Q"
+  "An address operand that is valid for a prefetch instruction"
+  (match_operand 0 "prefetch_operand"))
diff --git a/gcc/config/riscv/corev.md b/gcc/config/riscv/corev.md
index e2db8f3..1d4eb84 100644
--- a/gcc/config/riscv/corev.md
+++ b/gcc/config/riscv/corev.md
@@ -1,5 +1,5 @@
 ;; Machine description for CORE-V vendor extensions.
-;; Copyright (C) 2023-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2023-2025 Free Software Foundation, Inc.
 
 ;; This file is part of GCC.
 
@@ -871,7 +871,7 @@
 	[(set (match_operand:SI 0 "register_operand" "=r,r,r,r")
 		(unspec:SI [(match_operand:SI 1 "register_operand" "r,r,r,r")
 		(match_operand:SI 2 "register_operand" "r,r,r,r")
-		(match_operand:QI 3 "const_int2_operand" "J,c01,c02,c03")]
+		(match_operand:QI 3 "const_int2_operand" "J,k01,k02,k03")]
 	UNSPEC_CV_ADD_H))]
 	"TARGET_XCVSIMD && !TARGET_64BIT"
 	"@
@@ -924,7 +924,7 @@
 	[(set (match_operand:SI 0 "register_operand" "=r,r,r,r")
 		(unspec:SI [(match_operand:SI 1 "register_operand" "r,r,r,r")
 		(match_operand:SI 2 "register_operand" "r,r,r,r")
-		(match_operand:QI 3 "const_int2_operand" "J,c01,c02,c03")]
+		(match_operand:QI 3 "const_int2_operand" "J,k01,k02,k03")]
 	UNSPEC_CV_SUB_H))]
 	"TARGET_XCVSIMD && !TARGET_64BIT"
 	"@
@@ -2561,7 +2561,7 @@
 		(unspec:SI [(match_operand:SI 1 "register_operand" "r,r,r,r")
 		(match_operand:SI 2 "register_operand" "r,r,r,r")
 		(match_operand:SI 3 "register_operand" "0,0,0,0")
-		(match_operand:QI 4 "const_int2_operand" "J,c01,c02,c03")]
+		(match_operand:QI 4 "const_int2_operand" "J,k01,k02,k03")]
 	UNSPEC_CV_CPLXMUL_R))]
 	"TARGET_XCVSIMD && !TARGET_64BIT"
 	"@
@@ -2578,7 +2578,7 @@
 		(unspec:SI [(match_operand:SI 1 "register_operand" "r,r,r,r")
 		(match_operand:SI 2 "register_operand" "r,r,r,r")
 		(match_operand:SI 3 "register_operand" "0,0,0,0")
-		(match_operand:QI 4 "const_int2_operand" "J,c01,c02,c03")]
+		(match_operand:QI 4 "const_int2_operand" "J,k01,k02,k03")]
 	UNSPEC_CV_CPLXMUL_I))]
 	"TARGET_XCVSIMD && !TARGET_64BIT"
 	"@
@@ -2604,7 +2604,7 @@
 	[(set (match_operand:SI 0 "register_operand" "=r,r,r,r")
 		(unspec:SI [(match_operand:SI 1 "register_operand" "r,r,r,r")
 		(match_operand:SI 2 "register_operand" "r,r,r,r")
-		(match_operand:QI 3 "const_int2_operand" "J,c01,c02,c03")]
+		(match_operand:QI 3 "const_int2_operand" "J,k01,k02,k03")]
 	UNSPEC_CV_SUBROTMJ))]
 	"TARGET_XCVSIMD && !TARGET_64BIT"
 	"@
@@ -2627,7 +2627,7 @@
   "TARGET_XCVBI"
 {
   if (get_attr_length (insn) == 12)
-    return "cv.b%N1\t%2,%z3,1f; jump\t%l0,ra; 1:";
+    return "cv.b%r1\t%2,%z3,1f; jump\t%l0,ra; 1:";
 
   return "cv.b%C1imm\t%2,%3,%0";
 }
@@ -2645,7 +2645,7 @@
   "TARGET_XCVBI"
 {
   if (get_attr_length (insn) == 12)
-    return "b%N1\t%2,%z3,1f; jump\t%l0,ra; 1:";
+    return "b%r1\t%2,%z3,1f; jump\t%l0,ra; 1:";
 
   return "b%C1\t%2,%z3,%l0";
 }
diff --git a/gcc/config/riscv/crypto.md b/gcc/config/riscv/crypto.md
index b9cac78..37ab5c3 100644
--- a/gcc/config/riscv/crypto.md
+++ b/gcc/config/riscv/crypto.md
@@ -1,5 +1,5 @@
 ;; Machine description for RISC-V Scalar Cryptography extensions.
-;; Copyright (C) 2023-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2023-2025 Free Software Foundation, Inc.
 
 ;; This file is part of GCC.
 
diff --git a/gcc/config/riscv/elf.h b/gcc/config/riscv/elf.h
index c97f13c..48f704f 100644
--- a/gcc/config/riscv/elf.h
+++ b/gcc/config/riscv/elf.h
@@ -1,5 +1,5 @@
 /* Target macros for riscv*-elf targets.
-   Copyright (C) 1994-2024 Free Software Foundation, Inc.
+   Copyright (C) 1994-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/riscv/freebsd.h b/gcc/config/riscv/freebsd.h
index 6063861..217e0ac 100644
--- a/gcc/config/riscv/freebsd.h
+++ b/gcc/config/riscv/freebsd.h
@@ -1,5 +1,5 @@
 /* Definitions for RISC-V FreeBSD systems with ELF format.
-   Copyright (C) 2018-2024 Free Software Foundation, Inc.
+   Copyright (C) 2018-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -42,7 +42,7 @@ along with GCC; see the file COPYING3.  If not see
 #define LINK_SPEC "						\
   -melf" XLEN_SPEC DEFAULT_ENDIAN_SPEC "riscv			\
   %{p:%nconsider using `-pg' instead of `-p' with gprof (1)}	\
-  " FBSD_LINK_PG_NOTES "						\
+  " FBSD_LINK_PG_NOTE "						\
   %{v:-V}							\
   %{assert*} %{R*} %{rpath*} %{defsym*}				\
   -X								\
diff --git a/gcc/config/riscv/gen-riscv-ext-opt.cc b/gcc/config/riscv/gen-riscv-ext-opt.cc
new file mode 100644
index 0000000..17b8f5b
--- /dev/null
+++ b/gcc/config/riscv/gen-riscv-ext-opt.cc
@@ -0,0 +1,105 @@
+#include <vector>
+#include <string>
+#include <set>
+#include <stdio.h>
+#include "riscv-opts.h"
+
+struct version_t
+{
+  int major;
+  int minor;
+  version_t (int major, int minor,
+	     enum riscv_isa_spec_class spec = ISA_SPEC_CLASS_NONE)
+    : major (major), minor (minor)
+  {}
+  bool operator<(const version_t &other) const
+  {
+    if (major != other.major)
+      return major < other.major;
+    return minor < other.minor;
+  }
+
+  bool operator== (const version_t &other) const
+  {
+    return major == other.major && minor == other.minor;
+  }
+};
+
+static void
+print_ext_doc_entry (const std::string &ext_name, const std::string &full_name,
+		     const std::string &desc,
+		     const std::vector<version_t> &supported_versions)
+{
+  // Implementation of the function to print the documentation entry
+  // for the extension.
+  std::set<version_t> unique_versions;
+  for (const auto &version : supported_versions)
+    unique_versions.insert (version);
+  printf ("@item %s\n", ext_name.c_str ());
+  printf ("@tab");
+  for (const auto &version : unique_versions)
+    {
+      printf (" %d.%d", version.major, version.minor);
+    }
+  printf ("\n");
+  printf ("@tab %s", full_name.c_str ());
+  if (desc.size ())
+    printf (", %s", desc.c_str ());
+  printf ("\n\n");
+}
+
+int
+main ()
+{
+  puts ("; Target options for the RISC-V port of the compiler");
+  puts (";");
+  puts ("; Copyright (C) 2025 Free Software Foundation, Inc.");
+  puts (";");
+  puts ("; This file is part of GCC.");
+  puts (";");
+  puts (
+    "; GCC is free software; you can redistribute it and/or modify it under");
+  puts (
+    "; the terms of the GNU General Public License as published by the Free");
+  puts (
+    "; Software Foundation; either version 3, or (at your option) any later");
+  puts ("; version.");
+  puts (";");
+  puts ("; GCC is distributed in the hope that it will be useful, but WITHOUT");
+  puts ("; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY");
+  puts ("; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public");
+  puts ("; License for more details.");
+  puts (";");
+  puts ("; You should have received a copy of the GNU General Public License");
+  puts ("; along with GCC; see the file COPYING3.  If not see ");
+  puts ("; <http://www.gnu.org/licenses/>.");
+
+  puts ("; This file is generated automatically using");
+  puts (";  gcc/config/riscv/gen-riscv-ext-opt.cc from:");
+  puts (";       gcc/config/riscv/riscv-ext.def");
+  puts ("");
+  puts ("; Please *DO NOT* edit manually.");
+
+  std::set<std::string> all_vars;
+#define DEFINE_RISCV_EXT(NAME, UPPERCAE_NAME, FULL_NAME, DESC, URL, DEP_EXTS,  \
+			 SUPPORTED_VERSIONS, FLAG_GROUP, BITMASK_GROUP_ID,     \
+			 BITMASK_BIT_POSITION, EXTRA_EXTENSION_FLAGS)          \
+  all_vars.insert ("riscv_" #FLAG_GROUP "_subext");
+#include "riscv-ext.def"
+#undef DEFINE_RISCV_EXT
+
+  for (auto var : all_vars)
+    {
+      puts ("TargetVariable");
+      printf ("int %s\n\n", var.c_str ());
+    }
+
+#define DEFINE_RISCV_EXT(NAME, UPPERCAE_NAME, FULL_NAME, DESC, URL, DEP_EXTS,  \
+			 SUPPORTED_VERSIONS, FLAG_GROUP, BITMASK_GROUP_ID,     \
+			 BITMASK_BIT_POSITION, EXTRA_EXTENSION_FLAGS)          \
+  puts ("Mask(" #UPPERCAE_NAME ") Var(riscv_" #FLAG_GROUP "_subext)\n");
+#include "riscv-ext.def"
+#undef DEFINE_RISCV_EXT
+
+  return 0;
+}
diff --git a/gcc/config/riscv/gen-riscv-ext-texi.cc b/gcc/config/riscv/gen-riscv-ext-texi.cc
new file mode 100644
index 0000000..c29a375
--- /dev/null
+++ b/gcc/config/riscv/gen-riscv-ext-texi.cc
@@ -0,0 +1,88 @@
+#include <vector>
+#include <string>
+#include <set>
+#include <stdio.h>
+#include "riscv-opts.h"
+
+struct version_t
+{
+  int major_version;
+  int minor_version;
+  version_t (int major, int minor,
+	     enum riscv_isa_spec_class spec = ISA_SPEC_CLASS_NONE)
+    : major_version (major), minor_version (minor)
+  {}
+  bool operator<(const version_t &other) const
+  {
+    if (major_version != other.major_version)
+      return major_version < other.major_version;
+    return minor_version < other.minor_version;
+  }
+
+  bool operator== (const version_t &other) const
+  {
+    return major_version == other.major_version && minor_version == other.minor_version;
+  }
+};
+
+static void
+print_ext_doc_entry (const std::string &ext_name, const std::string &full_name,
+		     const std::string &desc,
+		     const std::vector<version_t> &supported_versions)
+{
+  // Implementation of the function to print the documentation entry
+  // for the extension.
+  std::set<version_t> unique_versions;
+  for (const auto &version : supported_versions)
+    unique_versions.insert (version);
+  printf ("@item %s\n", ext_name.c_str ());
+  printf ("@tab");
+  for (const auto &version : unique_versions)
+    {
+      printf (" %d.%d", version.major_version, version.minor_version);
+    }
+  printf ("\n");
+  printf ("@tab %s", full_name.c_str ());
+  if (desc.size ())
+    printf (", %s", desc.c_str ());
+  printf ("\n\n");
+}
+
+int
+main ()
+{
+  puts ("@c Copyright (C) 2025 Free Software Foundation, Inc.");
+  puts ("@c This is part of the GCC manual.");
+  puts ("@c For copying conditions, see the file gcc/doc/include/fdl.texi.");
+  puts ("");
+  puts ("@c This file is generated automatically using");
+  puts ("@c  gcc/config/riscv/gen-riscv-ext-texi.cc from:");
+  puts ("@c       gcc/config/riscv/riscv-ext.def");
+  puts ("@c       gcc/config/riscv/riscv-opts.h");
+  puts ("");
+  puts ("@c Please *DO NOT* edit manually.");
+  puts ("");
+  puts ("@multitable @columnfractions .10 .10 .80");
+  puts ("@headitem Extension Name @tab Supported Version @tab Description");
+  puts ("");
+
+  /* g extension is a very speical extension that no clear version...  */
+  puts ("@item g");
+  puts ("@tab -");
+  puts (
+    "@tab General-purpose computing base extension, @samp{g} will expand to");
+  puts ("@samp{i}, @samp{m}, @samp{a}, @samp{f}, @samp{d}, @samp{zicsr} and");
+  puts ("@samp{zifencei}.");
+  puts ("");
+
+#define DEFINE_RISCV_EXT(NAME, UPPERCAE_NAME, FULL_NAME, DESC, URL, DEP_EXTS,  \
+			 SUPPORTED_VERSIONS, FLAG_GROUP, BITMASK_GROUP_ID,     \
+			 BITMASK_BIT_POSITION, EXTRA_EXTENSION_FLAGS)          \
+  print_ext_doc_entry (#NAME, FULL_NAME, DESC,                                 \
+		       std::vector<version_t> SUPPORTED_VERSIONS);
+#include "riscv-ext.def"
+#undef DEFINE_RISCV_EXT
+
+  puts ("@end multitable");
+  return 0;
+}
diff --git a/gcc/config/riscv/generic-ooo.md b/gcc/config/riscv/generic-ooo.md
index e70df63..ae9430e 100644
--- a/gcc/config/riscv/generic-ooo.md
+++ b/gcc/config/riscv/generic-ooo.md
@@ -1,5 +1,5 @@
 ;; RISC-V generic out-of-order core scheduling model.
-;; Copyright (C) 2023-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2023-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
@@ -145,6 +145,12 @@
   "generic_ooo_issue,generic_ooo_fxu")
 
 ;; Assume float division and sqrt are not pipelined.
+(define_insn_reservation "generic_ooo_float_div_half" 10
+  (and (eq_attr "tune" "generic_ooo")
+       (and (eq_attr "type" "fdiv,fsqrt")
+	    (eq_attr "mode" "HF")))
+  "generic_ooo_issue,generic_ooo_fxu,generic_ooo_div,generic_ooo_div*3")
+
 (define_insn_reservation "generic_ooo_float_div_single" 12
   (and (eq_attr "tune" "generic_ooo")
        (and (eq_attr "type" "fdiv,fsqrt")
diff --git a/gcc/config/riscv/generic-vector-ooo.md b/gcc/config/riscv/generic-vector-ooo.md
index efe6bc4..ab9e57f 100644
--- a/gcc/config/riscv/generic-vector-ooo.md
+++ b/gcc/config/riscv/generic-vector-ooo.md
@@ -1,4 +1,4 @@
-;; Copyright (C) 2024-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2024-2025 Free Software Foundation, Inc.
 
 ;; This file is part of GCC.
 
@@ -69,7 +69,7 @@
 
 ;; Vector float multiplication and FMA.
 (define_insn_reservation "vec_fmul" 6
-  (eq_attr "type" "vfmul,vfwmul,vfmuladd,vfwmuladd,vfwmaccbf16")
+  (eq_attr "type" "vfmul,vfwmul,vfmuladd,vfwmuladd,vfwmaccbf16,sf_vqmacc,sf_vfnrclip")
   "vxu_ooo_issue,vxu_ooo_alu")
 
 ;; Vector crypto, assumed to be a generic operation for now.
@@ -141,3 +141,7 @@
   (eq_attr "type" "rdvlenb,rdvl")
   "vxu_ooo_issue,vxu_ooo_issue")
 
+;; Vector sf_vcp.
+(define_insn_reservation "vec_sf_vcp" 2
+  (eq_attr "type" "sf_vc,sf_vc_se")
+  "vxu_ooo_issue")
diff --git a/gcc/config/riscv/generic.md b/gcc/config/riscv/generic.md
index 4f6e63b..9d1d358 100644
--- a/gcc/config/riscv/generic.md
+++ b/gcc/config/riscv/generic.md
@@ -1,5 +1,5 @@
 ;; Generic DFA-based pipeline description for RISC-V targets.
-;; Copyright (C) 2011-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2011-2025 Free Software Foundation, Inc.
 ;; Contributed by Andrew Waterman (andrew@sifive.com).
 ;; Based on MIPS target for GNU compiler.
 
diff --git a/gcc/config/riscv/genrvv-type-indexer.cc b/gcc/config/riscv/genrvv-type-indexer.cc
index 8626dde..f296089 100644
--- a/gcc/config/riscv/genrvv-type-indexer.cc
+++ b/gcc/config/riscv/genrvv-type-indexer.cc
@@ -1,5 +1,5 @@
 /* Generate the RVV type indexer tables.
-   Copyright (C) 2023-2024 Free Software Foundation, Inc.
+   Copyright (C) 2023-2025 Free Software Foundation, Inc.
 This file is part of GCC.
 GCC is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
@@ -23,8 +23,14 @@ along with GCC; see the file COPYING3.  If not see
 #include <assert.h>
 #include <math.h>
 
-#define BOOL_SIZE_LIST {1, 2, 4, 8, 16, 32, 64}
-#define EEW_SIZE_LIST {8, 16, 32, 64}
+#define BOOL_SIZE_LIST                                                         \
+  {                                                                            \
+    1, 2, 4, 8, 16, 32, 64                                                     \
+  }
+#define EEW_SIZE_LIST                                                          \
+  {                                                                            \
+    8, 16, 32, 64                                                              \
+  }
 #define LMUL1_LOG2 0
 
 std::string
@@ -165,6 +171,18 @@ floattype (unsigned sew, int lmul_log2)
 }
 
 std::string
+expand_floattype (unsigned sew, int lmul_log2, unsigned nf)
+{
+  if (sew != 8 || nf != 1
+      || (!valid_type (sew * 4, lmul_log2 + 2, /*float_t*/ true)))
+    return "INVALID";
+
+  std::stringstream mode;
+  mode << "vfloat" << sew * 4 << to_lmul (lmul_log2 + 2) << "_t";
+  return mode.str ();
+}
+
+std::string
 floattype (unsigned sew, int lmul_log2, unsigned nf)
 {
   if (!valid_type (sew, lmul_log2, nf, /*float_t*/ true))
@@ -250,11 +268,18 @@ main (int argc, const char **argv)
       fprintf (fp, "  /*MASK*/ %s,\n", mode.str ().c_str ());
       fprintf (fp, "  /*SIGNED*/ INVALID,\n");
       fprintf (fp, "  /*UNSIGNED*/ INVALID,\n");
+      fprintf (fp, "  /*SIGNED_EEW8_INDEX*/ INVALID,\n");
       for (unsigned eew : {8, 16, 32, 64})
 	fprintf (fp, "  /*EEW%d_INDEX*/ INVALID,\n", eew);
       fprintf (fp, "  /*SHIFT*/ INVALID,\n");
       fprintf (fp, "  /*DOUBLE_TRUNC*/ INVALID,\n");
       fprintf (fp, "  /*QUAD_TRUNC*/ INVALID,\n");
+      fprintf (fp, "  /*QUAD_EMUL*/ INVALID,\n");
+      fprintf (fp, "  /*QUAD_EMUL_SIGNED*/ INVALID,\n");
+      fprintf (fp, "  /*QUAD_EMUL_UNSIGNED*/ INVALID,\n");
+      fprintf (fp, "  /*QUAD_FIX*/ INVALID,\n");
+      fprintf (fp, "  /*QUAD_FIX_SIGNED*/ INVALID,\n");
+      fprintf (fp, "  /*QUAD_FIX_UNSIGNED*/ INVALID,\n");
       fprintf (fp, "  /*OCT_TRUNC*/ INVALID,\n");
       fprintf (fp, "  /*DOUBLE_TRUNC_SCALAR*/ INVALID,\n");
       fprintf (fp, "  /*DOUBLE_TRUNC_SIGNED*/ INVALID,\n");
@@ -266,6 +291,10 @@ main (int argc, const char **argv)
       fprintf (fp, "  /*FLOAT*/ INVALID,\n");
       fprintf (fp, "  /*LMUL1*/ INVALID,\n");
       fprintf (fp, "  /*WLMUL1*/ INVALID,\n");
+      fprintf (fp, "  /*QLMUL1*/ INVALID,\n");
+      fprintf (fp, "  /*QLMUL1_SIGNED*/ INVALID,\n");
+      fprintf (fp, "  /*QLMUL1_UNSIGNED*/ INVALID,\n");
+      fprintf (fp, "  /*XFQF*/ INVALID,\n");
       for (unsigned eew : {8, 16, 32, 64})
 	fprintf (fp, "  /*EEW%d_INTERPRET*/ INVALID,\n", eew);
 
@@ -274,11 +303,13 @@ main (int argc, const char **argv)
 
       for (unsigned eew : EEW_SIZE_LIST)
 	fprintf (fp, "  /*SIGNED_EEW%d_LMUL1_INTERPRET*/ %s,\n", eew,
-		 inttype (eew, LMUL1_LOG2, /* unsigned_p */false).c_str ());
+		 inttype (eew, LMUL1_LOG2, /* unsigned_p */ false).c_str ());
 
       for (unsigned eew : EEW_SIZE_LIST)
 	fprintf (fp, "  /*UNSIGNED_EEW%d_LMUL1_INTERPRET*/ %s,\n", eew,
-		 inttype (eew, LMUL1_LOG2, /* unsigned_p */true).c_str ());
+		 inttype (eew, LMUL1_LOG2, /* unsigned_p */ true).c_str ());
+
+      fprintf (fp, "  /*X2*/ INVALID,\n");
 
       for (unsigned lmul_log2_offset : {1, 2, 3, 4, 5, 6})
 	{
@@ -307,6 +338,10 @@ main (int argc, const char **argv)
 		     inttype (sew, lmul_log2, /*unsigned_p*/ false).c_str ());
 	    fprintf (fp, "  /*UNSIGNED*/ %s,\n",
 		     inttype (sew, lmul_log2, /*unsigned_p*/ true).c_str ());
+	    fprintf (fp, "  /*SIGNED_EEW8_INDEX*/ %s,\n",
+		     same_ratio_eew_type (sew, lmul_log2, 8,
+					  /*unsigned_p*/ false, false)
+		       .c_str ());
 	    for (unsigned eew : {8, 16, 32, 64})
 	      fprintf (fp, "  /*EEW%d_INDEX*/ %s,\n", eew,
 		       same_ratio_eew_type (sew, lmul_log2, eew,
@@ -322,6 +357,18 @@ main (int argc, const char **argv)
 		     same_ratio_eew_type (sew, lmul_log2, sew / 4, unsigned_p,
 					  false)
 		       .c_str ());
+	    fprintf (fp, "  /*QUAD_EMUL*/ %s,\n",
+		     inttype (8, lmul_log2 - 1, unsigned_p).c_str ());
+	    fprintf (fp, "  /*QUAD_EMUL_SIGNED*/ %s,\n",
+		     inttype (8, lmul_log2 - 1, false).c_str ());
+	    fprintf (fp, "  /*QUAD_EMUL_UNSIGNED*/ %s,\n",
+		     inttype (8, lmul_log2 - 1, true).c_str ());
+	    fprintf (fp, "  /*QUAD_FIX*/ %s,\n",
+		     inttype (8, lmul_log2, unsigned_p).c_str ());
+	    fprintf (fp, "  /*QUAD_FIX_SIGNED*/ %s,\n",
+		     inttype (8, lmul_log2, false).c_str ());
+	    fprintf (fp, "  /*QUAD_FIX_UNSIGNED*/ %s,\n",
+		     inttype (8, lmul_log2, true).c_str ());
 	    fprintf (fp, "  /*OCT_TRUNC*/ %s,\n",
 		     same_ratio_eew_type (sew, lmul_log2, sew / 8, unsigned_p,
 					  false)
@@ -352,6 +399,14 @@ main (int argc, const char **argv)
 		     inttype (sew, /*lmul_log2*/ 0, unsigned_p).c_str ());
 	    fprintf (fp, "  /*WLMUL1*/ %s,\n",
 		     inttype (sew * 2, /*lmul_log2*/ 0, unsigned_p).c_str ());
+	    fprintf (fp, "  /*QLMUL1*/ %s,\n",
+		     inttype (8, /*lmul_log2*/ 0, unsigned_p).c_str ());
+	    fprintf (fp, "  /*QLMUL1_SIGNED*/ %s,\n",
+		     inttype (8, /*lmul_log2*/ 0, false).c_str ());
+	    fprintf (fp, "  /*QLMUL1_UNSIGNED*/ %s,\n",
+		     inttype (8, /*lmul_log2*/ 0, true).c_str ());
+	    fprintf (fp, "  /*XFQF*/ %s,\n",
+		     expand_floattype (sew, lmul_log2, nf).c_str ());
 	    for (unsigned eew : {8, 16, 32, 64})
 	      {
 		if (eew == sew)
@@ -379,6 +434,10 @@ main (int argc, const char **argv)
 	      fprintf (fp, "  /*UNSIGNED_EEW%d_LMUL1_INTERPRET*/ INVALID,\n",
 		       eew);
 
+	    fprintf (
+	      fp, "  /*X2*/ %s,\n",
+	      inttype (sew * 2, lmul_log2 + 1, /*unsigned_p*/ true).c_str ());
+
 	    for (unsigned lmul_log2_offset : {1, 2, 3, 4, 5, 6})
 	      {
 		unsigned multiple_of_lmul = 1 << lmul_log2_offset;
@@ -405,6 +464,7 @@ main (int argc, const char **argv)
 		 inttype (16, lmul_log2, /*unsigned_p*/ false).c_str ());
 	fprintf (fp, "  /*UNSIGNED*/ %s,\n",
 		 inttype (16, lmul_log2, /*unsigned_p*/ true).c_str ());
+	fprintf (fp, "  /*SIGNED_EEW8_INDEX*/ INVALID,\n");
 	for (unsigned eew : {8, 16, 32, 64})
 	  fprintf (
 	    fp, "  /*EEW%d_INDEX*/ %s,\n", eew,
@@ -413,6 +473,12 @@ main (int argc, const char **argv)
 	fprintf (fp, "  /*DOUBLE_TRUNC*/ %s,\n",
 		 same_ratio_eew_type (16, lmul_log2, 8, false, true).c_str ());
 	fprintf (fp, "  /*QUAD_TRUNC*/ INVALID,\n");
+	fprintf (fp, "  /*QUAD_EMUL*/ INVALID,\n");
+	fprintf (fp, "  /*QUAD_EMUL_SIGNED*/ INVALID,\n");
+	fprintf (fp, "  /*QUAD_EMUL_UNSIGNED*/ INVALID,\n");
+	fprintf (fp, "  /*QUAD_FIX*/ INVALID,\n");
+	fprintf (fp, "  /*QUAD_FIX_SIGNED*/ INVALID,\n");
+	fprintf (fp, "  /*QUAD_FIX_UNSIGNED*/ INVALID,\n");
 	fprintf (fp, "  /*OCT_TRUNC*/ INVALID,\n");
 	fprintf (fp, "  /*DOUBLE_TRUNC_SCALAR*/ %s,\n",
 		 same_ratio_eew_type (16, lmul_log2, 8, false, true).c_str ());
@@ -430,6 +496,11 @@ main (int argc, const char **argv)
 		 bfloat16_type (/*lmul_log2*/ 0).c_str ());
 	fprintf (fp, "  /*WLMUL1*/ %s,\n",
 		 bfloat16_wide_type (/*lmul_log2*/ 0).c_str ());
+	fprintf (fp, "  /*QLMUL1*/ %s,\n",
+		 bfloat16_wide_type (/*lmul_log2*/ 0).c_str ());
+	fprintf (fp, "  /*QLMUL1_SIGNED*/ INVALID,\n");
+	fprintf (fp, "  /*QLMUL1_UNSIGNED*/ INVALID,\n");
+	fprintf (fp, "  /*XFQF*/ INVALID,\n");
 	for (unsigned eew : {8, 16, 32, 64})
 	  fprintf (fp, "  /*EEW%d_INTERPRET*/ INVALID,\n", eew);
 
@@ -442,6 +513,8 @@ main (int argc, const char **argv)
 	for (unsigned eew : EEW_SIZE_LIST)
 	  fprintf (fp, "  /*UNSIGNED_EEW%d_LMUL1_INTERPRET*/ INVALID,\n", eew);
 
+	fprintf (fp, "  /*X2*/ INVALID,\n");
+
 	for (unsigned lmul_log2_offset : {1, 2, 3, 4, 5, 6})
 	  {
 	    unsigned multiple_of_lmul = 1 << lmul_log2_offset;
@@ -468,6 +541,10 @@ main (int argc, const char **argv)
 		   inttype (sew, lmul_log2, /*unsigned_p*/ false).c_str ());
 	  fprintf (fp, "  /*UNSIGNED*/ %s,\n",
 		   inttype (sew, lmul_log2, /*unsigned_p*/ true).c_str ());
+	  fprintf (fp, "  /*SIGNED_EEW8_INDEX*/ %s,\n",
+		   same_ratio_eew_type (sew, lmul_log2, 8,
+					/*unsigned_p*/ false, false)
+		     .c_str ());
 	  for (unsigned eew : {8, 16, 32, 64})
 	    fprintf (fp, "  /*EEW%d_INDEX*/ %s,\n", eew,
 		     same_ratio_eew_type (sew, lmul_log2, eew,
@@ -478,6 +555,12 @@ main (int argc, const char **argv)
 		   same_ratio_eew_type (sew, lmul_log2, sew / 2, false, true)
 		     .c_str ());
 	  fprintf (fp, "  /*QUAD_TRUNC*/ INVALID,\n");
+	  fprintf (fp, "  /*QUAD_EMUL*/ INVALID,\n");
+	  fprintf (fp, "  /*QUAD_EMUL_SIGNED*/ INVALID,\n");
+	  fprintf (fp, "  /*QUAD_EMUL_UNSIGNED*/ INVALID,\n");
+	  fprintf (fp, "  /*QUAD_FIX*/ INVALID,\n");
+	  fprintf (fp, "  /*QUAD_FIX_SIGNED*/ INVALID,\n");
+	  fprintf (fp, "  /*QUAD_FIX_UNSIGNED*/ INVALID,\n");
 	  fprintf (fp, "  /*OCT_TRUNC*/ INVALID,\n");
 	  fprintf (fp, "  /*DOUBLE_TRUNC_SCALAR*/ %s,\n",
 		   same_ratio_eew_type (sew, lmul_log2, sew / 2, false, true)
@@ -501,6 +584,11 @@ main (int argc, const char **argv)
 		   floattype (sew, /*lmul_log2*/ 0).c_str ());
 	  fprintf (fp, "  /*WLMUL1*/ %s,\n",
 		   floattype (sew * 2, /*lmul_log2*/ 0).c_str ());
+	  fprintf (fp, "  /*QLMUL1*/ %s,\n",
+		   floattype (sew / 4, /*lmul_log2*/ 0).c_str ());
+	  fprintf (fp, "  /*QLMUL1_SIGNED*/ INVALID,\n");
+	  fprintf (fp, "  /*QLMUL1_UNSIGNED*/ INVALID,\n");
+	  fprintf (fp, "  /*XFQF*/ INVALID,\n");
 	  for (unsigned eew : {8, 16, 32, 64})
 	    fprintf (fp, "  /*EEW%d_INTERPRET*/ INVALID,\n", eew);
 
@@ -514,6 +602,8 @@ main (int argc, const char **argv)
 	    fprintf (fp, "  /*UNSIGNED_EEW%d_LMUL1_INTERPRET*/ INVALID,\n",
 		     eew);
 
+	  fprintf (fp, "  /*X2*/ INVALID,\n");
+
 	  for (unsigned lmul_log2_offset : {1, 2, 3, 4, 5, 6})
 	    {
 	      unsigned multiple_of_lmul = 1 << lmul_log2_offset;
diff --git a/gcc/config/riscv/gnu.h b/gcc/config/riscv/gnu.h
new file mode 100644
index 0000000..047399b
--- /dev/null
+++ b/gcc/config/riscv/gnu.h
@@ -0,0 +1,59 @@
+/* Definitions for RISC-V GNU/Hurd systems with ELF format.
+   Copyright (C) 1998-2025 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define TARGET_OS_CPP_BUILTINS()				\
+  do {								\
+    GNU_USER_TARGET_OS_CPP_BUILTINS();				\
+  } while (0)
+
+#define GNU_USER_DYNAMIC_LINKER "/lib/ld-riscv" XLEN_SPEC "-" ABI_SPEC ".so.1"
+
+#define ICACHE_FLUSH_FUNC "__riscv_flush_icache"
+
+#define CPP_SPEC "%{pthread:-D_REENTRANT}"
+
+#define LD_EMUL_SUFFIX \
+  "%{mabi=lp64d:}" \
+  "%{mabi=lp64f:_lp64f}" \
+  "%{mabi=lp64:_lp64}" \
+  "%{mabi=ilp32d:}" \
+  "%{mabi=ilp32f:_ilp32f}" \
+  "%{mabi=ilp32:_ilp32}"
+
+#define LINK_SPEC "\
+-melf" XLEN_SPEC DEFAULT_ENDIAN_SPEC "riscv" LD_EMUL_SUFFIX " \
+%{mno-relax:--no-relax} \
+-X \
+%{mbig-endian:-EB} \
+%{mlittle-endian:-EL} \
+%{shared} \
+  %{!shared: \
+    %{!static: \
+      %{!static-pie: \
+	%{rdynamic:-export-dynamic} \
+	-dynamic-linker " GNU_USER_DYNAMIC_LINKER "}} \
+    %{static:-static} %{static-pie:-static -pie --no-dynamic-linker -z text}}"
+
+#define STARTFILE_PREFIX_SPEC 			\
+   "/lib" XLEN_SPEC "/" ABI_SPEC "/ "		\
+   "/usr/lib" XLEN_SPEC "/" ABI_SPEC "/ "	\
+   "/lib/ "					\
+   "/usr/lib/ "
+
+#define RISCV_USE_CUSTOMISED_MULTI_LIB select_by_abi
diff --git a/gcc/config/riscv/iterators.md b/gcc/config/riscv/iterators.md
index 2844cb0..584b345 100644
--- a/gcc/config/riscv/iterators.md
+++ b/gcc/config/riscv/iterators.md
@@ -1,5 +1,5 @@
 ;; Iterators for the machine description for RISC-V
-;; Copyright (C) 2011-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2011-2025 Free Software Foundation, Inc.
 
 ;; This file is part of GCC.
 ;;
@@ -62,9 +62,15 @@
 ;; Iterator for hardware integer modes narrower than XLEN.
 (define_mode_iterator SUBX [QI HI (SI "TARGET_64BIT")])
 
+;; Iterator for hardware integer modes narrower than XLEN, same as SUBX.
+(define_mode_iterator SUBX1 [QI HI (SI "TARGET_64BIT")])
+
 ;; Iterator for hardware-supported integer modes.
 (define_mode_iterator ANYI [QI HI SI (DI "TARGET_64BIT")])
 
+;; Iterator for hardware integer modes narrower than XLEN, same as ANYI.
+(define_mode_iterator ANYI1 [QI HI SI (DI "TARGET_64BIT")])
+
 (define_mode_iterator ANYI_DOUBLE_TRUNC [HI SI (DI "TARGET_64BIT")])
 
 (define_mode_iterator ANYI_QUAD_TRUNC [SI (DI "TARGET_64BIT")])
@@ -233,6 +239,10 @@
 (define_code_iterator any_ge [ge geu])
 (define_code_iterator any_lt [lt ltu])
 (define_code_iterator any_le [le leu])
+(define_code_iterator any_eq [eq ne])
+
+;; Iterators for conditions we can emit a sCC against 0 or a reg directly
+(define_code_iterator scc_0  [eq ne gt gtu])
 
 ; atomics code iterator
 (define_code_iterator any_atomic [plus ior xor and])
@@ -252,6 +262,9 @@
 
 (define_code_attr fix_uns [(fix "fix") (unsigned_fix "fixuns")])
 
+(define_code_attr OPTAB [(ior "IOR")
+                         (xor "XOR")])
+
 
 ;; -------------------------------------------------------------------
 ;; Code Attributes
@@ -283,6 +296,8 @@
 			 (le "le")
 			 (gt "gt")
 			 (lt "lt")
+			 (eq "eq")
+			 (ne "ne")
 			 (ior "ior")
 			 (xor "xor")
 			 (and "and")
diff --git a/gcc/config/riscv/linux.h b/gcc/config/riscv/linux.h
index 3c35622..9060c94 100644
--- a/gcc/config/riscv/linux.h
+++ b/gcc/config/riscv/linux.h
@@ -1,5 +1,5 @@
 /* Definitions for RISC-V GNU/Linux systems with ELF format.
-   Copyright (C) 1998-2024 Free Software Foundation, Inc.
+   Copyright (C) 1998-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/riscv/mips-p8700.md b/gcc/config/riscv/mips-p8700.md
new file mode 100644
index 0000000..ae0ea8d
--- /dev/null
+++ b/gcc/config/riscv/mips-p8700.md
@@ -0,0 +1,167 @@
+;; DFA-based pipeline description for MIPS P8700.
+;;
+;; Copyright (C) 2025 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "mips_p8700_agen_alq_pipe, mips_p8700_mdu_pipe, mips_p8700_fpu_pipe")
+
+;; The address generation queue (AGQ) has AL2, CTISTD and LDSTA pipes
+(define_cpu_unit "mips_p8700_agq, mips_p8700_al2, mips_p8700_ctistd, mips_p8700_lsu"
+		 "mips_p8700_agen_alq_pipe")
+
+(define_cpu_unit "mips_p8700_gpmul, mips_p8700_gpdiv" "mips_p8700_mdu_pipe")
+
+;; The arithmetic-logic-unit queue (ALQ) has ALU pipe
+(define_cpu_unit "mips_p8700_alq, mips_p8700_alu" "mips_p8700_agen_alq_pipe")
+
+;; The floating-point-unit queue (FPQ) has short and long pipes
+(define_cpu_unit "mips_p8700_fpu_short, mips_p8700_fpu_long" "mips_p8700_fpu_pipe")
+
+;; Long FPU pipeline.
+(define_cpu_unit "mips_p8700_fpu_apu" "mips_p8700_fpu_pipe")
+
+;; P8700 unsupported insns are mapped to dummies reservations
+(define_reservation "mips_p8700_dummies"
+ "mips_p8700_agq |  mips_p8700_al2 |  mips_p8700_ctistd |  mips_p8700_lsu |
+ mips_p8700_fpu_short |  mips_p8700_fpu_long")
+
+(define_reservation "mips_p8700_agq_al2" "mips_p8700_agq, mips_p8700_al2")
+(define_reservation "mips_p8700_agq_ctistd" "mips_p8700_agq, mips_p8700_ctistd")
+(define_reservation "mips_p8700_agq_lsu" "mips_p8700_agq, mips_p8700_lsu")
+(define_reservation "mips_p8700_alq_alu" "mips_p8700_alq, mips_p8700_alu")
+
+;;
+;; FPU pipe
+;;
+
+(define_insn_reservation "mips_p8700_fpu_fadd" 4
+  (and (eq_attr "tune" "mips_p8700")
+       (eq_attr "type" "fadd"))
+  "mips_p8700_fpu_long, mips_p8700_fpu_apu")
+
+(define_insn_reservation "mips_p8700_fpu_fabs" 2
+  (and (eq_attr "tune" "mips_p8700")
+       (eq_attr "type" "fcmp,fmove"))
+  "mips_p8700_fpu_short, mips_p8700_fpu_apu")
+
+(define_insn_reservation "mips_p8700_fpu_fload" 8
+  (and (eq_attr "tune" "mips_p8700")
+       (eq_attr "type" "fpload"))
+  "mips_p8700_agq_lsu")
+
+(define_insn_reservation "mips_p8700_fpu_fstore" 1
+  (and (eq_attr "tune" "mips_p8700")
+       (eq_attr "type" "fpstore"))
+  "mips_p8700_agq_lsu")
+
+(define_insn_reservation "mips_p8700_fpu_fmadd" 8
+  (and (eq_attr "tune" "mips_p8700")
+       (eq_attr "type" "fmadd"))
+  "mips_p8700_fpu_long, mips_p8700_fpu_apu")
+
+(define_insn_reservation "mips_p8700_fpu_fmul" 5
+  (and (eq_attr "tune" "mips_p8700")
+       (eq_attr "type" "fmul"))
+  "mips_p8700_fpu_long, mips_p8700_fpu_apu")
+
+(define_insn_reservation "mips_p8700_fpu_div" 17
+  (and (eq_attr "tune" "mips_p8700")
+       (eq_attr "type" "fdiv,fsqrt"))
+  "mips_p8700_fpu_long, mips_p8700_fpu_apu*17")
+
+(define_insn_reservation "mips_p8700_fpu_fcvt" 4
+  (and (eq_attr "tune" "mips_p8700")
+       (eq_attr "type" "fcvt,fcvt_i2f,fcvt_f2i"))
+  "mips_p8700_fpu_long, mips_p8700_fpu_apu")
+
+(define_insn_reservation "mips_p8700_fpu_fmtc" 7
+  (and (eq_attr "tune" "mips_p8700")
+       (eq_attr "type" "mtc"))
+  "mips_p8700_agq_lsu")
+
+(define_insn_reservation "mips_p8700_fpu_fmfc" 7
+  (and (eq_attr "tune" "mips_p8700")
+       (eq_attr "type" "mfc"))
+  "mips_p8700_agq_lsu")
+
+;;
+;; Integer pipe
+;;
+
+(define_insn_reservation "mips_p8700_int_load" 4
+  (and (eq_attr "tune" "mips_p8700")
+       (eq_attr "type" "load"))
+  "mips_p8700_agq_lsu")
+
+(define_insn_reservation "mips_p8700_int_store" 3
+  (and (eq_attr "tune" "mips_p8700")
+       (eq_attr "type" "store"))
+  "mips_p8700_agq_lsu")
+
+(define_insn_reservation "mips_p8700_int_arith_1" 1
+  (and (eq_attr "tune" "mips_p8700")
+       (eq_attr "type" "unknown,const,arith,shift,slt,multi,auipc,logical,move,bitmanip,min,max,minu,maxu,clz,ctz,rotate,atomic,condmove,crypto,mvpair,zicond"))
+  "mips_p8700_alq_alu | mips_p8700_agq_al2")
+
+(define_insn_reservation "mips_p8700_int_nop" 0
+  (and (eq_attr "tune" "mips_p8700")
+       (eq_attr "type" "nop"))
+  "mips_p8700_alq_alu | mips_p8700_agq_al2")
+
+(define_insn_reservation "mips_p8700_dsp_mult" 4
+  (and (eq_attr "tune" "mips_p8700")
+       (eq_attr "type" "imul,cpop,clmul"))
+  "mips_p8700_gpmul")
+
+(define_insn_reservation "mips_p8700_int_div" 8
+  (and (eq_attr "tune" "mips_p8700")
+       (eq_attr "type" "idiv"))
+  "mips_p8700_gpdiv*5")
+
+(define_insn_reservation "mips_p8700_int_branch" 1
+  (and (eq_attr "tune" "mips_p8700")
+       (eq_attr "type" "branch,jump,ret,sfb_alu,trap"))
+  "mips_p8700_agq_ctistd")
+
+(define_insn_reservation "mips_p8700_int_call" 2
+  (and (eq_attr "tune" "mips_p8700")
+       (eq_attr "type" "call,jalr"))
+  "mips_p8700_agq_ctistd")
+
+;; mips-p8700 dummies insn and placeholder that had no mapping to p8700 hardware.
+(define_insn_reservation "mips_p8700_unknown" 1
+  (and (eq_attr "tune" "mips_p8700")
+       (eq_attr "type" "rdvlenb,rdvl,wrvxrm,wrfrm,
+   rdfrm,vsetvl,vsetvl_pre,vlde,vste,vldm,vstm,vlds,vsts,
+   vldux,vldox,vstux,vstox,vldff,vldr,vstr,
+   vlsegde,vssegte,vlsegds,vssegts,vlsegdux,vlsegdox,vssegtux,vssegtox,vlsegdff,
+   vialu,viwalu,vext,vicalu,vshift,vnshift,vicmp,viminmax,
+   vimul,vidiv,viwmul,vimuladd,sf_vqmacc,viwmuladd,vimerge,vimov,
+   vsalu,vaalu,vsmul,vsshift,vnclip,sf_vfnrclip,
+   vfalu,vfwalu,vfmul,vfdiv,vfwmul,vfmuladd,vfwmuladd,vfsqrt,vfrecp,
+   vfcmp,vfminmax,vfsgnj,vfclass,vfmerge,vfmov,
+   vfcvtitof,vfcvtftoi,vfwcvtitof,vfwcvtftoi,
+   vfwcvtftof,vfncvtitof,vfncvtftoi,vfncvtftof,
+   vired,viwred,vfredu,vfredo,vfwredu,vfwredo,
+   vmalu,vmpop,vmffs,vmsfs,vmiota,vmidx,vimovvx,vimovxv,vfmovvf,vfmovfv,
+   vslideup,vslidedown,vislide1up,vislide1down,vfslide1up,vfslide1down,
+   vgather,vcompress,vmov,vector,vandn,vbrev,vbrev8,vrev8,vclz,vctz,vcpop,vrol,vror,vwsll,
+   vclmul,vclmulh,vghsh,vgmul,vaesef,vaesem,vaesdf,vaesdm,vaeskf1,vaeskf2,vaesz,
+   vsha2ms,vsha2ch,vsha2cl,vsm4k,vsm4r,vsm3me,vsm3c,vfncvtbf16,vfwcvtbf16,vfwmaccbf16,
+   sf_vc,sf_vc_se"))
+  "mips_p8700_dummies")
diff --git a/gcc/config/riscv/multilib-generator b/gcc/config/riscv/multilib-generator
index 25cb676..6ad1cf0 100755
--- a/gcc/config/riscv/multilib-generator
+++ b/gcc/config/riscv/multilib-generator
@@ -1,7 +1,7 @@
 #!/usr/bin/env python3
 
 # RISC-V multilib list generator.
-# Copyright (C) 2011-2024 Free Software Foundation, Inc.
+# Copyright (C) 2011-2025 Free Software Foundation, Inc.
 # Contributed by Andrew Waterman (andrew@sifive.com).
 # 
 # This file is part of GCC.
@@ -159,8 +159,8 @@ for cmodel in cmodels:
              "e.g. rv32imafd-ilp32--" % cfg)
       sys.exit(1)
 
-    # Compact code model only support rv64.
-    if cmodel == "compact" and arch.startswith("rv32"):
+    # Large code model only support rv64.
+    if cmodel == "large" and arch.startswith("rv32"):
       continue
 
     arch = arch_canonicalize (arch, args.misa_spec)
diff --git a/gcc/config/riscv/peephole.md b/gcc/config/riscv/peephole.md
index e3948fb..b5cc192 100644
--- a/gcc/config/riscv/peephole.md
+++ b/gcc/config/riscv/peephole.md
@@ -1,5 +1,5 @@
 ;; Peephole optimizations for RISC-V for GNU compiler.
-;; Copyright (C) 2011-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2011-2025 Free Software Foundation, Inc.
 ;; Contributed by Andrew Waterman (andrew@sifive.com).
 
 ;; This file is part of GCC.
diff --git a/gcc/config/riscv/pic.md b/gcc/config/riscv/pic.md
index 05f3dfd..8ea6eb8 100644
--- a/gcc/config/riscv/pic.md
+++ b/gcc/config/riscv/pic.md
@@ -1,5 +1,5 @@
 ;; PIC codegen for RISC-V for GNU compiler.
-;; Copyright (C) 2011-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2011-2025 Free Software Foundation, Inc.
 ;; Contributed by Andrew Waterman (andrew@sifive.com).
 
 ;; This file is part of GCC.
diff --git a/gcc/config/riscv/pipeline-checker b/gcc/config/riscv/pipeline-checker
new file mode 100755
index 0000000..815698b
--- /dev/null
+++ b/gcc/config/riscv/pipeline-checker
@@ -0,0 +1,191 @@
+#!/usr/bin/env python3
+
+# RISC-V pipeline model checker.
+# Copyright (C) 2025 Free Software Foundation, Inc.
+#
+# This file is part of GCC.
+#
+# GCC is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# GCC is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with GCC; see the file COPYING3.  If not see
+# <http://www.gnu.org/licenses/>.
+
+import re
+import sys
+import argparse
+from pathlib import Path
+from typing import List
+import pprint
+
+def remove_line_comments(text: str) -> str:
+    # Remove ';;' and everything after it on each line
+    cleaned_lines = []
+    for line in text.splitlines():
+        comment_index = line.find(';;')
+        if comment_index != -1:
+            line = line[:comment_index]
+        cleaned_lines.append(line)
+    return '\n'.join(cleaned_lines)
+
+
+def tokenize_sexpr(s: str) -> List[str]:
+    # Tokenize input string, including support for balanced {...} C blocks
+    tokens = []
+    i = 0
+    while i < len(s):
+        c = s[i]
+        if c.isspace():
+            i += 1
+        elif c == '(' or c == ')':
+            tokens.append(c)
+            i += 1
+        elif c == '"':
+            # Parse quoted string
+            j = i + 1
+            while j < len(s) and s[j] != '"':
+                if s[j] == '\\':
+                    j += 1  # Skip escape
+                j += 1
+            tokens.append(s[i:j+1])
+            i = j + 1
+        elif c == '{':
+            # Parse balanced C block
+            depth = 1
+            j = i + 1
+            while j < len(s) and depth > 0:
+                if s[j] == '{':
+                    depth += 1
+                elif s[j] == '}':
+                    depth -= 1
+                j += 1
+            tokens.append(s[i:j])  # Include enclosing braces
+            i = j
+        else:
+            # Parse atom
+            j = i
+            while j < len(s) and not s[j].isspace() and s[j] not in '()"{}':
+                j += 1
+            tokens.append(s[i:j])
+            i = j
+    return tokens
+
+
+def parse_sexpr(tokens: List[str]) -> any:
+    # Recursively parse tokenized S-expression
+    token = tokens.pop(0)
+    if token == '(':
+        lst = []
+        while tokens[0] != ')':
+            lst.append(parse_sexpr(tokens))
+        tokens.pop(0)  # Discard closing parenthesis
+        return lst
+    elif token.startswith('"') and token.endswith('"'):
+        return token[1:-1]  # Remove surrounding quotes
+    elif token.startswith('{') and token.endswith('}'):
+        return token  # Keep C code block as-is
+    else:
+        return token
+
+
+def find_define_attr_type(ast: any) -> List[List[str]]:
+    # Traverse AST to find all (define_attr "type" ...) entries
+    result = []
+    if isinstance(ast, list):
+        if ast and ast[0] == 'define_attr' and len(ast) >= 2 and ast[1] == 'type':
+            result.append(ast)
+        for elem in ast:
+            result.extend(find_define_attr_type(elem))
+    return result
+
+
+def parse_md_file(path: Path):
+    # Read file, remove comments, and parse all top-level S-expressions
+    with open(path, encoding='utf-8') as f:
+        raw_content = f.read()
+    clean_content = remove_line_comments(raw_content)
+    tokens = tokenize_sexpr(clean_content)
+    items = []
+    while tokens:
+        items.append(parse_sexpr(tokens))
+    return items
+
+def parsing_str_set(s: str) -> set:
+    s = s.replace('\\','').split(',')
+    s = set(map(lambda x: x.strip(), s))
+    return s
+
+def get_avaliable_types(md_file_path: str):
+    # Main logic: parse input file and print define_attr "type" expressions
+    ast = parse_md_file(Path(md_file_path))
+
+    # Get all type from define_attr type
+    define_attr_types = find_define_attr_type(ast)
+    types = parsing_str_set (define_attr_types[0][2])
+    return types
+
+def get_consumed_type(entry: List[str]) -> set:
+    # Extract the consumed type from a define_insn_reservation entry
+    current_type = entry[0]
+    if current_type in ['and', 'or']:
+        return get_consumed_type(entry[1]) | get_consumed_type(entry[2])
+    elif current_type == 'eq_attr' and entry[1] == 'type':
+        return parsing_str_set(entry[2])
+    return set()
+
+def check_pipemodel(md_file_path: str):
+    # Load the RISCV MD file and check for pipemodel
+    ast = parse_md_file(Path(md_file_path))
+
+    consumed_type = set()
+
+    for entry in ast:
+        entry_type = entry[0]
+        if entry_type not in ["define_insn_reservation"]:
+            continue
+        consumed_type |= get_consumed_type(entry[3])
+    return consumed_type
+
+
+def main():
+    parser = argparse.ArgumentParser(description='Check GCC pipeline model for instruction type coverage')
+    parser.add_argument('pipeline_model', help='Pipeline model file to check')
+    parser.add_argument('--base-md',
+                        help='Base machine description file (default: riscv.md in script directory)',
+                        default=None)
+    parser.add_argument('-v', '--verbose',
+                        help='Show detailed type information',
+                        action='store_true')
+    args = parser.parse_args()
+
+    # Set default base-md path if not provided
+    if args.base_md is None:
+        script_dir = Path(__file__).parent
+        base_md_path = script_dir / "riscv.md"
+    else:
+        base_md_path = Path(args.base_md)
+    avaliable_types = get_avaliable_types(str(base_md_path))
+    consumed_type = check_pipemodel(args.pipeline_model)
+
+    if args.verbose:
+        print("Available types:\n", avaliable_types)
+        print("Consumed types:\n", consumed_type)
+
+    if not avaliable_types.issubset(consumed_type):
+        print("Error: Some types are not consumed by the pipemodel")
+        print("Missing types:\n", avaliable_types - consumed_type)
+        sys.exit(1)
+    else:
+        print("All available types are consumed by the pipemodel.")
+
+
+if __name__ == '__main__':
+    main()
diff --git a/gcc/config/riscv/predicates.md b/gcc/config/riscv/predicates.md
index 9971fab..1f9a6b5 100644
--- a/gcc/config/riscv/predicates.md
+++ b/gcc/config/riscv/predicates.md
@@ -1,5 +1,5 @@
 ;; Predicate description for RISC-V target.
-;; Copyright (C) 2011-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2011-2025 Free Software Foundation, Inc.
 ;; Contributed by Andrew Waterman (andrew@sifive.com).
 ;; Based on MIPS target for GNU compiler.
 ;;
@@ -27,6 +27,18 @@
   (ior (match_operand 0 "const_arith_operand")
        (match_operand 0 "register_operand")))
 
+;; REG or REG+D where D fits in a simm12 and has the low 5 bits
+;; off.  The REG+D form can be reloaded into a temporary if needed
+;; after FP elimination if that exposes an invalid offset.
+(define_predicate "prefetch_operand"
+  (ior (match_operand 0 "register_operand")
+       (and (match_test "const_arith_operand (op, VOIDmode)")
+	    (match_test "(INTVAL (op) & 0x1f) == 0"))
+       (and (match_code "plus")
+	    (match_test "register_operand (XEXP (op, 0), word_mode)")
+	    (match_test "const_arith_operand (XEXP (op, 1), VOIDmode)")
+	    (match_test "(INTVAL (XEXP (op, 1)) & 0x1f) == 0"))))
+
 (define_predicate "lui_operand"
   (and (match_code "const_int")
        (match_test "LUI_OPERAND (INTVAL (op))")))
@@ -329,7 +341,7 @@
 {
   enum riscv_symbol_type type;
   return (riscv_symbolic_constant_p (op, &type)
-	  && type == SYMBOL_GOT_DISP && !SYMBOL_REF_WEAK (op) && TARGET_PLT);
+	  && type == SYMBOL_GOT_DISP && !SYMBOL_REF_WEAK (op) && flag_plt);
 })
 
 (define_predicate "call_insn_operand"
@@ -380,14 +392,6 @@
   (and (match_code "const_int")
        (match_test "SINGLE_BIT_MASK_OPERAND (UINTVAL (op))")))
 
-;; Register, small constant or single bit constant for use in
-;; bseti/binvi.
-(define_predicate "arith_or_zbs_operand"
-  (ior (match_operand 0 "const_arith_operand")
-       (match_operand 0 "register_operand")
-       (and (match_test "TARGET_ZBS")
-	    (match_operand 0 "single_bit_mask_operand"))))
-
 (define_predicate "not_single_bit_mask_operand"
   (and (match_code "const_int")
        (match_test "SINGLE_BIT_MASK_OPERAND (~UINTVAL (op))")))
@@ -423,11 +427,11 @@
 (define_predicate "consecutive_bits_operand"
   (match_code "const_int")
 {
-	unsigned HOST_WIDE_INT val = UINTVAL (op);
-	if (exact_log2 ((val >> ctz_hwi (val)) + 1) < 0)
-	        return false;
+  unsigned HOST_WIDE_INT val = UINTVAL (op);
+  if (exact_log2 ((val >> ctz_hwi (val)) + 1) <= 0)
+    return false;
 
-	return true;
+  return true;
 })
 
 (define_predicate "const_two_s12"
@@ -528,6 +532,9 @@
   (ior (match_operand 0 "register_operand")
        (match_operand 0 "scratch_operand")))
 
+(define_predicate "maskload_else_operand"
+  (match_operand 0 "scratch_operand"))
+
 (define_predicate "vector_arith_operand"
   (ior (match_operand 0 "register_operand")
        (and (match_code "const_vector")
@@ -610,7 +617,7 @@
 
 ;; The scalar operand can be directly broadcast by RVV instructions.
 (define_predicate "direct_broadcast_operand"
-  (match_test "riscv_vector::can_be_broadcasted_p (op)"))
+  (match_test "riscv_vector::can_be_broadcast_p (op)"))
 
 ;; A CONST_INT operand that has exactly two bits cleared.
 (define_predicate "const_nottwobits_operand"
@@ -676,3 +683,17 @@
   return (riscv_symbolic_constant_p (op, &type)
          && type == SYMBOL_PCREL);
 })
+
+;; Shadow stack operands only allow x1, x5 registers
+(define_predicate "x1x5_operand"
+  (and (match_operand 0 "register_operand")
+       (match_test "REGNO (op) == RETURN_ADDR_REGNUM
+		    || REGNO (op) == T0_REGNUM")))
+
+(define_predicate "bitpos_mask_operand"
+  (and (match_code "const_int")
+       (match_test "TARGET_64BIT ? INTVAL (op) == 63 : INTVAL (op) == 31")))
+
+(define_predicate "reg_or_const_int_operand"
+  (ior (match_operand 0 "const_int_operand")
+       (match_operand 0 "register_operand")))
diff --git a/gcc/config/riscv/riscv-avlprop.cc b/gcc/config/riscv/riscv-avlprop.cc
index 91d80aa..3031c29 100644
--- a/gcc/config/riscv/riscv-avlprop.cc
+++ b/gcc/config/riscv/riscv-avlprop.cc
@@ -1,5 +1,5 @@
 /* AVL propagation pass for RISC-V 'V' Extension for GNU compiler.
-   Copyright (C) 2023-2024 Free Software Foundation, Inc.
+   Copyright (C) 2023-2025 Free Software Foundation, Inc.
    Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd.
 
 This file is part of GCC.
@@ -351,7 +351,8 @@ pass_avlprop::get_vlmax_ta_preferred_avl (insn_info *insn) const
 	  if (!use_insn->can_be_optimized () || use_insn->is_asm ()
 	      || use_insn->is_call () || use_insn->has_volatile_refs ()
 	      || use_insn->has_pre_post_modify ()
-	      || !has_vl_op (use_insn->rtl ()))
+	      || !has_vl_op (use_insn->rtl ())
+	      || !avl_can_be_propagated_p (use_insn->rtl ()))
 	    return NULL_RTX;
 
 	  /* We should only propagate non-VLMAX AVL into VLMAX insn when
@@ -507,7 +508,7 @@ pass_avlprop::execute (function *fn)
       simplify_replace_vlmax_avl (rinsn, prop.second);
     }
 
-  if (rvv_vector_bits == RVV_VECTOR_BITS_ZVL)
+  if (rvv_vector_bits == RVV_VECTOR_BITS_ZVL && !TARGET_XTHEADVECTOR)
     {
       /* Simplify VLMAX AVL into immediate AVL.
 	 E.g. Simplify this following case:
diff --git a/gcc/config/riscv/riscv-builtins.cc b/gcc/config/riscv/riscv-builtins.cc
index dc54e1a..9f8a980 100644
--- a/gcc/config/riscv/riscv-builtins.cc
+++ b/gcc/config/riscv/riscv-builtins.cc
@@ -1,5 +1,5 @@
 /* Subroutines used for expanding RISC-V builtins.
-   Copyright (C) 2011-2024 Free Software Foundation, Inc.
+   Copyright (C) 2011-2025 Free Software Foundation, Inc.
    Contributed by Andrew Waterman (andrew@sifive.com).
 
 This file is part of GCC.
diff --git a/gcc/config/riscv/riscv-c.cc b/gcc/config/riscv/riscv-c.cc
index 71112d9..d2c0af3 100644
--- a/gcc/config/riscv/riscv-c.cc
+++ b/gcc/config/riscv/riscv-c.cc
@@ -1,5 +1,5 @@
 /* RISC-V-specific code for C family languages.
-   Copyright (C) 2011-2024 Free Software Foundation, Inc.
+   Copyright (C) 2011-2025 Free Software Foundation, Inc.
    Contributed by Andrew Waterman (andrew@sifive.com).
 
 This file is part of GCC.
@@ -36,10 +36,10 @@ along with GCC; see the file COPYING3.  If not see
 
 struct pragma_intrinsic_flags
 {
-  int intrinsic_target_flags;
+  int intrinsic_riscv_isa_flags;
 
   int intrinsic_riscv_vector_elen_flags;
-  int intrinsic_riscv_zvl_flags;
+  int intrinsic_riscv_zvl_subext;
   int intrinsic_riscv_zvb_subext;
   int intrinsic_riscv_zvk_subext;
 };
@@ -47,19 +47,24 @@ struct pragma_intrinsic_flags
 static void
 riscv_pragma_intrinsic_flags_pollute (struct pragma_intrinsic_flags *flags)
 {
-  flags->intrinsic_target_flags = target_flags;
+  flags->intrinsic_riscv_isa_flags = riscv_isa_flags;
   flags->intrinsic_riscv_vector_elen_flags = riscv_vector_elen_flags;
-  flags->intrinsic_riscv_zvl_flags = riscv_zvl_flags;
+  flags->intrinsic_riscv_zvl_subext = riscv_zvl_subext;
   flags->intrinsic_riscv_zvb_subext = riscv_zvb_subext;
   flags->intrinsic_riscv_zvk_subext = riscv_zvk_subext;
 
-  target_flags = target_flags
+  riscv_isa_flags = riscv_isa_flags
     | MASK_VECTOR;
 
-  riscv_zvl_flags = riscv_zvl_flags
+  riscv_zvl_subext = riscv_zvl_subext
     | MASK_ZVL32B
     | MASK_ZVL64B
-    | MASK_ZVL128B;
+    | MASK_ZVL128B
+    | MASK_ZVL256B
+    | MASK_ZVL512B
+    | MASK_ZVL1024B
+    | MASK_ZVL2048B
+    | MASK_ZVL4096B;
 
   riscv_vector_elen_flags = riscv_vector_elen_flags
     | MASK_VECTOR_ELEN_32
@@ -92,10 +97,10 @@ riscv_pragma_intrinsic_flags_pollute (struct pragma_intrinsic_flags *flags)
 static void
 riscv_pragma_intrinsic_flags_restore (struct pragma_intrinsic_flags *flags)
 {
-  target_flags = flags->intrinsic_target_flags;
+  riscv_isa_flags = flags->intrinsic_riscv_isa_flags;
 
   riscv_vector_elen_flags = flags->intrinsic_riscv_vector_elen_flags;
-  riscv_zvl_flags = flags->intrinsic_riscv_zvl_flags;
+  riscv_zvl_subext = flags->intrinsic_riscv_zvl_subext;
   riscv_zvb_subext = flags->intrinsic_riscv_zvb_subext;
   riscv_zvk_subext = flags->intrinsic_riscv_zvk_subext;
 }
@@ -218,6 +223,15 @@ riscv_cpu_cpp_builtins (cpp_reader *pfile)
   /* Define architecture extension test macros.  */
   builtin_define_with_int_value ("__riscv_arch_test", 1);
 
+  if (TARGET_ZICFISS && ((flag_cf_protection & CF_RETURN) == CF_RETURN))
+    builtin_define ("__riscv_shadow_stack");
+
+  if (TARGET_ZICFILP && ((flag_cf_protection & CF_BRANCH) == CF_BRANCH))
+    {
+      builtin_define ("__riscv_landing_pad");
+      builtin_define ("__riscv_landing_pad_unlabeled");
+    }
+
   const riscv_subset_list *subset_list = riscv_cmdline_subset_list ();
   if (!subset_list)
     return;
@@ -225,26 +239,22 @@ riscv_cpu_cpp_builtins (cpp_reader *pfile)
   size_t max_ext_len = 0;
 
   /* Figure out the max length of extension name for reserving buffer.   */
-  for (const riscv_subset_t *subset = subset_list->begin ();
-       subset != subset_list->end ();
-       subset = subset->next)
-    max_ext_len = MAX (max_ext_len, subset->name.length ());
+  for (auto &subset : *subset_list)
+    max_ext_len = MAX (max_ext_len, subset.name.length ());
 
   char *buf = (char *)alloca (max_ext_len + 10 /* For __riscv_ and '\0'.  */);
 
-  for (const riscv_subset_t *subset = subset_list->begin ();
-       subset != subset_list->end ();
-       subset = subset->next)
+  for (auto &subset : *subset_list)
     {
-      int version_value = riscv_ext_version_value (subset->major_version,
-						   subset->minor_version);
+      int version_value = riscv_ext_version_value (subset.major_version,
+						   subset.minor_version);
       /* Special rule for zicsr and zifencei, it's used for ISA spec 2.2 or
 	 earlier.  */
-      if ((subset->name == "zicsr" || subset->name == "zifencei")
+      if ((subset.name == "zicsr" || subset.name == "zifencei")
 	  && version_value == 0)
 	version_value = riscv_ext_version_value (2, 0);
 
-      sprintf (buf, "__riscv_%s", subset->name.c_str ());
+      sprintf (buf, "__riscv_%s", subset.name.c_str ());
       builtin_define_with_int_value (buf, version_value);
     }
 }
@@ -265,7 +275,8 @@ riscv_pragma_intrinsic (cpp_reader *)
   const char *name = TREE_STRING_POINTER (x);
 
   if (strcmp (name, "vector") == 0
-      || strcmp (name, "xtheadvector") == 0)
+      || strcmp (name, "xtheadvector") == 0
+      || strcmp (name, "xsfvcp") == 0)
     {
       struct pragma_intrinsic_flags backup_flags;
 
@@ -289,7 +300,7 @@ riscv_pragma_intrinsic (cpp_reader *)
 /* Implement TARGET_CHECK_BUILTIN_CALL.  */
 static bool
 riscv_check_builtin_call (location_t loc, vec<location_t> arg_loc, tree fndecl,
-			  tree, unsigned int nargs, tree *args)
+			  tree, unsigned int nargs, tree *args, bool)
 {
   unsigned int code = DECL_MD_FUNCTION_CODE (fndecl);
   unsigned int subcode = code >> RISCV_BUILTIN_SHIFT;
@@ -307,11 +318,10 @@ riscv_check_builtin_call (location_t loc, vec<location_t> arg_loc, tree fndecl,
 
 /* Implement TARGET_RESOLVE_OVERLOADED_BUILTIN.  */
 static tree
-riscv_resolve_overloaded_builtin (unsigned int uncast_location, tree fndecl,
-				  void *uncast_arglist)
+riscv_resolve_overloaded_builtin (location_t loc, tree fndecl,
+				  void *uncast_arglist, bool)
 {
   vec<tree, va_gc> empty = {};
-  location_t loc = (location_t) uncast_location;
   vec<tree, va_gc> *arglist = (vec<tree, va_gc> *) uncast_arglist;
   unsigned int code = DECL_MD_FUNCTION_CODE (fndecl);
   unsigned int subcode = code >> RISCV_BUILTIN_SHIFT;
diff --git a/gcc/config/riscv/riscv-cores.def b/gcc/config/riscv/riscv-cores.def
index 2f5efe3..2096c00 100644
--- a/gcc/config/riscv/riscv-cores.def
+++ b/gcc/config/riscv/riscv-cores.def
@@ -1,5 +1,5 @@
 /* List of supported core and tune info for RISC-V.
-   Copyright (C) 2020-2024 Free Software Foundation, Inc.
+   Copyright (C) 2020-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -33,16 +33,26 @@
 #define RISCV_TUNE(TUNE_NAME, PIPELINE_MODEL, TUNE_INFO)
 #endif
 
+RISCV_TUNE("generic", generic, generic_tune_info)
 RISCV_TUNE("rocket", generic, rocket_tune_info)
 RISCV_TUNE("sifive-3-series", generic, rocket_tune_info)
 RISCV_TUNE("sifive-5-series", generic, rocket_tune_info)
 RISCV_TUNE("sifive-7-series", sifive_7, sifive_7_tune_info)
 RISCV_TUNE("sifive-p400-series", sifive_p400, sifive_p400_tune_info)
 RISCV_TUNE("sifive-p600-series", sifive_p600, sifive_p600_tune_info)
+RISCV_TUNE("tt-ascalon-d8", generic_ooo, tt_ascalon_d8_tune_info)
 RISCV_TUNE("thead-c906", generic, thead_c906_tune_info)
+RISCV_TUNE("xt-c908", generic, generic_ooo_tune_info)
+RISCV_TUNE("xt-c908v", generic, generic_ooo_tune_info)
+RISCV_TUNE("xt-c910", generic, generic_ooo_tune_info)
+RISCV_TUNE("xt-c910v2", generic, generic_ooo_tune_info)
+RISCV_TUNE("xt-c920", generic, generic_ooo_tune_info)
+RISCV_TUNE("xt-c920v2", generic, generic_ooo_tune_info)
 RISCV_TUNE("xiangshan-nanhu", xiangshan, xiangshan_nanhu_tune_info)
+RISCV_TUNE("xiangshan-kunminghu", xiangshan, generic_ooo_tune_info)
 RISCV_TUNE("generic-ooo", generic_ooo, generic_ooo_tune_info)
 RISCV_TUNE("size", generic, optimize_size_tune_info)
+RISCV_TUNE("mips-p8700", mips_p8700, mips_p8700_tune_info)
 
 #undef RISCV_TUNE
 
@@ -92,8 +102,74 @@ RISCV_CORE("thead-c906",      "rv64imafdc_xtheadba_xtheadbb_xtheadbs_xtheadcmo_"
 			      "xtheadmemidx_xtheadmempair_xtheadsync",
 			      "thead-c906")
 
+RISCV_CORE("xt-c908",         "rv64imafdc_zicbom_zicbop_zicboz_zicntr_zicsr_"
+			      "zifencei_zihintpause_zihpm_zfh_zba_zbb_zbc_zbs_"
+			      "sstc_svinval_svnapot_svpbmt_xtheadba_xtheadbb_"
+			      "xtheadbs_xtheadcmo_xtheadcondmov_xtheadfmemidx_"
+			      "xtheadmac_xtheadmemidx_xtheadmempair_xtheadsync",
+			      "xt-c908")
+RISCV_CORE("xt-c908v",        "rv64imafdcv_zicbom_zicbop_zicboz_zicntr_zicsr_"
+			      "zifencei_zihintpause_zihpm_zfh_zba_zbb_zbc_zbs_"
+			      "zvfh_sstc_svinval_svnapot_svpbmt__xtheadba_"
+			      "xtheadbb_xtheadbs_xtheadcmo_xtheadcondmov_"
+			      "xtheadfmemidx_xtheadmac_xtheadmemidx_"
+			      "xtheadmempair_xtheadsync_xtheadvdot",
+			      "xt-c908")
+RISCV_CORE("xt-c910",         "rv64imafdc_zicntr_zicsr_zifencei_zihpm_zfh_"
+			      "xtheadba_xtheadbb_xtheadbs_xtheadcmo_"
+			      "xtheadcondmov_xtheadfmemidx_xtheadmac_"
+			      "xtheadmemidx_xtheadmempair_xtheadsync",
+			      "xt-c910")
+RISCV_CORE("xt-c910v2",       "rv64imafdc_zicbom_zicbop_zicboz_zicntr_zicond_"
+			      "zicsr_zifencei _zihintntl_zihintpause_zihpm_"
+			      "zawrs_zfa_zfbfmin_zfh_zca_zcb_zcd_zba_zbb_zbc_"
+			      "zbs_sscofpmf_sstc_svinval_svnapot_svpbmt_"
+			      "xtheadba_xtheadbb_xtheadbs_xtheadcmo_"
+			      "xtheadcondmov_xtheadfmemidx_xtheadmac_"
+			      "xtheadmemidx_xtheadmempair_xtheadsync",
+			      "xt-c910v2")
+RISCV_CORE("xt-c920",         "rv64imafdc_zicntr_zicsr_zifencei_zihpm_zfh_"
+			      "xtheadba_xtheadbb_xtheadbs_xtheadcmo_"
+			      "xtheadcondmov_xtheadfmemidx_xtheadmac_"
+			      "xtheadmemidx_xtheadmempair_xtheadsync_"
+			      "xtheadvector",
+			      "xt-c910")
+RISCV_CORE("xt-c920v2",       "rv64imafdcv_zicbom_zicbop_zicboz_zicntr_zicond_"
+			      "zicsr_zifencei _zihintntl_zihintpause_zihpm_"
+			      "zawrs_zfa_zfbfmin_zfh_zca_zcb_zcd_zba_zbb_zbc_"
+			      "zbs_zvfbfmin_zvfbfwma_zvfh_sscofpmf_sstc_"
+			      "svinval_svnapot_svpbmt_xtheadba_xtheadbb_"
+			      "xtheadbs_xtheadcmo_xtheadcondmov_xtheadfmemidx_"
+			      "xtheadmac_xtheadmemidx_xtheadmempair_"
+			      "xtheadsync_xtheadvdot",
+			       "xt-c920v2")
+
+RISCV_CORE("tt-ascalon-d8",   "rv64imafdcv_zic64b_zicbom_zicbop_zicboz_"
+			      "ziccamoa_ziccif_zicclsm_ziccrse_zicond_zicsr_"
+			      "zifencei_zihintntl_zihintpause_zimop_za64rs_"
+			      "zawrs_zfa_zfbfmin_zfh_zcb_zcmop_zba_zbb_zbs_"
+			      "zvbb_zvbc_zvfbfwma_zvfh_zvkng_zvl256b",
+			      "tt-ascalon-d8")
+
 RISCV_CORE("xiangshan-nanhu",      "rv64imafdc_zba_zbb_zbc_zbs_"
 			      "zbkb_zbkc_zbkx_zknd_zkne_zknh_zksed_zksh_"
 			      "svinval_zicbom_zicboz",
 			      "xiangshan-nanhu")
+
+RISCV_CORE("xiangshan-kunminghu",   "rv64imafdcbvh_sdtrig_sha_shcounterenw_"
+			      "shgatpa_shlcofideleg_shtvala_shvsatpa_shvstvala_shvstvecd_"
+			      "smaia_smcsrind_smdbltrp_smmpm_smnpm_smrnmi_smstateen_"
+			      "ssaia_ssccptr_sscofpmf_sscounterenw_sscsrind_ssdbltrp_"
+			      "ssnpm_sspm_ssstateen_ssstrict_sstc_sstvala_sstvecd_"
+			      "ssu64xl_supm_svade_svbare_svinval_svnapot_svpbmt_za64rs_"
+			      "zacas_zawrs_zba_zbb_zbc_zbkb_zbkc_zbkx_zbs_zcb_zcmop_"
+			      "zfa_zfh_zfhmin_zic64b_zicbom_zicbop_zicboz_ziccif_"
+			      "zicclsm_ziccrse_zicntr_zicond_zicsr_zifencei_zihintpause_"
+			      "zihpm_zimop_zkn_zknd_zkne_zknh_zksed_zksh_zkt_zvbb_zvfh_"
+			      "zvfhmin_zvkt_zvl128b_zvl32b_zvl64b",
+			      "xiangshan-kunminghu")
+
+RISCV_CORE("mips-p8700",     	"rv64imafd_zicsr_zmmul_"
+			      "zaamo_zalrsc_zba_zbb",
+			      "mips-p8700")
 #undef RISCV_CORE
diff --git a/gcc/config/riscv/riscv-d.cc b/gcc/config/riscv/riscv-d.cc
index bb45392..9cce48d 100644
--- a/gcc/config/riscv/riscv-d.cc
+++ b/gcc/config/riscv/riscv-d.cc
@@ -1,5 +1,5 @@
 /* Subroutines for the D front end on the RISC-V architecture.
-   Copyright (C) 2017-2024 Free Software Foundation, Inc.
+   Copyright (C) 2017-2025 Free Software Foundation, Inc.
 
 GCC is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
diff --git a/gcc/config/riscv/riscv-d.h b/gcc/config/riscv/riscv-d.h
index d201f6b..37ab078 100644
--- a/gcc/config/riscv/riscv-d.h
+++ b/gcc/config/riscv/riscv-d.h
@@ -1,5 +1,5 @@
 /* Definitions for the D front end on the RISC-V architecture.
-   Copyright (C) 2022-2024 Free Software Foundation, Inc.
+   Copyright (C) 2022-2025 Free Software Foundation, Inc.
 
 GCC is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
diff --git a/gcc/config/riscv/riscv-ext-corev.def b/gcc/config/riscv/riscv-ext-corev.def
new file mode 100644
index 0000000..eb97399
--- /dev/null
+++ b/gcc/config/riscv/riscv-ext-corev.def
@@ -0,0 +1,87 @@
+/* CORE-V extension definition file for RISC-V.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.
+
+Please run `make riscv-regen` in build folder to make sure updated anything.
+
+Format of DEFINE_RISCV_EXT, please refer to riscv-ext.def.  */
+
+DEFINE_RISCV_EXT(
+  /* NAME */ xcvalu,
+  /* UPPERCAE_NAME */ XCVALU,
+  /* FULL_NAME */ "Core-V miscellaneous ALU extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ xcv,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ xcvbi,
+  /* UPPERCAE_NAME */ XCVBI,
+  /* FULL_NAME */ "xcvbi extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ xcv,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ xcvelw,
+  /* UPPERCAE_NAME */ XCVELW,
+  /* FULL_NAME */ "Core-V event load word extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ xcv,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ xcvmac,
+  /* UPPERCAE_NAME */ XCVMAC,
+  /* FULL_NAME */ "Core-V multiply-accumulate extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ xcv,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ xcvsimd,
+  /* UPPERCAE_NAME */ XCVSIMD,
+  /* FULL_NAME */ "xcvsimd extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ xcv,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
diff --git a/gcc/config/riscv/riscv-ext-sifive.def b/gcc/config/riscv/riscv-ext-sifive.def
new file mode 100644
index 0000000..c8d79da
--- /dev/null
+++ b/gcc/config/riscv/riscv-ext-sifive.def
@@ -0,0 +1,87 @@
+/* SiFive extension definition file for RISC-V.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.
+
+Please run `make riscv-regen` in build folder to make sure updated anything.
+
+Format of DEFINE_RISCV_EXT, please refer to riscv-ext.def.  */
+
+DEFINE_RISCV_EXT(
+  /* NAME */ xsfcease,
+  /* UPPERCAE_NAME */ XSFCEASE,
+  /* FULL_NAME */ "xsfcease extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ xsf,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ xsfvcp,
+  /* UPPERCAE_NAME */ XSFVCP,
+  /* FULL_NAME */ "xsfvcp extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zve32x"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ xsf,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ xsfvfnrclipxfqf,
+  /* UPPERCAE_NAME */ XSFVFNRCLIPXFQF,
+  /* FULL_NAME */ "xsfvfnrclipxfqf extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ xsf,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ xsfvqmaccdod,
+  /* UPPERCAE_NAME */ XSFVQMACCDOD,
+  /* FULL_NAME */ "xsfvqmaccdod extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ xsf,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ xsfvqmaccqoq,
+  /* UPPERCAE_NAME */ XSFVQMACCQOQ,
+  /* FULL_NAME */ "xsfvqmaccqoq extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ xsf,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
diff --git a/gcc/config/riscv/riscv-ext-thead.def b/gcc/config/riscv/riscv-ext-thead.def
new file mode 100644
index 0000000..327d2ae
--- /dev/null
+++ b/gcc/config/riscv/riscv-ext-thead.def
@@ -0,0 +1,191 @@
+/* T-head extension definition file for RISC-V.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.
+
+Please run `make riscv-regen` in build folder to make sure updated anything.
+
+Format of DEFINE_RISCV_EXT, please refer to riscv-ext.def.  */
+
+DEFINE_RISCV_EXT(
+  /* NAME */ xtheadba,
+  /* UPPERCAE_NAME */ XTHEADBA,
+  /* FULL_NAME */ "T-head address calculation extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ xthead,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ xtheadbb,
+  /* UPPERCAE_NAME */ XTHEADBB,
+  /* FULL_NAME */ "T-head basic bit-manipulation extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ xthead,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ xtheadbs,
+  /* UPPERCAE_NAME */ XTHEADBS,
+  /* FULL_NAME */ "T-head single-bit instructions extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ xthead,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ xtheadcmo,
+  /* UPPERCAE_NAME */ XTHEADCMO,
+  /* FULL_NAME */ "T-head cache management operations extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ xthead,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ xtheadcondmov,
+  /* UPPERCAE_NAME */ XTHEADCONDMOV,
+  /* FULL_NAME */ "T-head conditional move extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ xthead,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ xtheadfmemidx,
+  /* UPPERCAE_NAME */ XTHEADFMEMIDX,
+  /* FULL_NAME */ "T-head indexed memory operations for floating-point registers extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ xthead,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ xtheadfmv,
+  /* UPPERCAE_NAME */ XTHEADFMV,
+  /* FULL_NAME */ "T-head double floating-point high-bit data transmission extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ xthead,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ xtheadint,
+  /* UPPERCAE_NAME */ XTHEADINT,
+  /* FULL_NAME */ "T-head acceleration interruption extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ xthead,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ xtheadmac,
+  /* UPPERCAE_NAME */ XTHEADMAC,
+  /* FULL_NAME */ "T-head multiply-accumulate extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ xthead,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ xtheadmemidx,
+  /* UPPERCAE_NAME */ XTHEADMEMIDX,
+  /* FULL_NAME */ "T-head indexed memory operation extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ xthead,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ xtheadmempair,
+  /* UPPERCAE_NAME */ XTHEADMEMPAIR,
+  /* FULL_NAME */ "T-head two-GPR memory operation extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ xthead,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ xtheadsync,
+  /* UPPERCAE_NAME */ XTHEADSYNC,
+  /* FULL_NAME */ "T-head multi-core synchronization extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ xthead,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ xtheadvector,
+  /* UPPERCAE_NAME */ XTHEADVECTOR,
+  /* FULL_NAME */ "xtheadvector extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ xthead,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
diff --git a/gcc/config/riscv/riscv-ext-ventana.def b/gcc/config/riscv/riscv-ext-ventana.def
new file mode 100644
index 0000000..deed47f
--- /dev/null
+++ b/gcc/config/riscv/riscv-ext-ventana.def
@@ -0,0 +1,35 @@
+/* Ventana extension definition file for RISC-V.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.
+
+Please run `make riscv-regen` in build folder to make sure updated anything.
+
+Format of DEFINE_RISCV_EXT, please refer to riscv-ext.def.  */
+
+DEFINE_RISCV_EXT(
+  /* NAME */ xventanacondops,
+  /* UPPERCAE_NAME */ XVENTANACONDOPS,
+  /* FULL_NAME */ "Ventana integer conditional operations extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ xventana,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
diff --git a/gcc/config/riscv/riscv-ext.def b/gcc/config/riscv/riscv-ext.def
new file mode 100644
index 0000000..816acaa
--- /dev/null
+++ b/gcc/config/riscv/riscv-ext.def
@@ -0,0 +1,2084 @@
+/* RISC-V extension definition file for RISC-V.
+   Copyright (C) 2025 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.
+
+Please run `make riscv-regen` in build folder to make sure updated anything.
+
+Format of DEFINE_RISCV_EXT:
+  NAME:
+    The name of the extension, e.g. "i".
+  UPPERCASE_NAME:
+    The name of the extension in uppercase, e.g. "ZBA", this used
+    for generate TARGET_<ext-name> marco and MASK_<ext-name> macro.
+    For those extension only named with single letter, it should also come with
+    'RV', e.g. 'v' should use 'RVV' here.
+    Some of old extension like 'i' and 'm' are not follow the rule.
+  FULL_NAME:
+    The full name of the extension, e.g. "Base integer extension".
+  DESC:
+    A short description of the extension, this will used during generating
+    documentation, GNU Texinfo format can be used this field.
+  URL:
+    A URL for the extension.
+  DEP_EXTS:
+    A list of dependent extensions, this is a list of strings or
+    a list of tuples.  The first element of the tuple is the name
+    of the extension and the second element is a function that
+    takes a subset_list and returns true if the extension should be added as
+    a dependent extension, `c` and `zca` are examples of this.
+  SUPPORTED_VERSIONS:
+    A list of tuples, each tuple contains the major version number, minor
+    version number and the class of the specification.  The version number is a
+    list of integers, e.g. {2, 0} for version 2.0.  The class is
+    a string, e.g. "ISA_SPEC_CLASS_20191213", the class of the
+    specification is not required for any new extension.
+  FLAG_GROUP:
+    The group of the extension, this is used to group extensions
+    together.  The group is a string, e.g. "base", "zi", "zm", "za", "zf",
+    "zc", "zb", "zk" and "zi".
+    This should be auto generated in theory in some day...
+  BITMASK_GROUP_ID:
+    The group id of the extension for the __riscv_feature_bits.
+    this field should sync with riscv-c-api-doc, and keep BITMASK_NOT_YET_ALLOCATED
+    if not got allocated.
+    https://github.com/riscv-non-isa/riscv-c-api-doc/blob/main/src/c-api.adoc#extension-bitmask-definitions
+  BITMASK_BIT_POSITION:
+    The bit position of the extension for the __riscv_feature_bits.
+    this field should sync with riscv-c-api-doc, and keep BITMASK_NOT_YET_ALLOCATED
+    if not got allocated.
+    https://github.com/riscv-non-isa/riscv-c-api-doc/blob/main/src/c-api.adoc#extension-bitmask-definitions
+  EXTRA_EXTENSION_FLAGS:
+    Extra flags for the extension, this is a bitmask of the
+    extra flags.  The extra flags are:
+    - EXT_FLAG_MACRO: Set this flag if this extension is just a macro of set of
+       extensions, and not define any new instrcutions, new CSRs or new
+       behaviors, the example is `b` extension is just a macro of `zba`, `zbb`
+       and `zbs`.
+*/
+
+DEFINE_RISCV_EXT(
+  /* NAME */ e,
+  /* UPPERCASE_NAME */ RVE,
+  /* FULL_NAME */ "Reduced base integer extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{2, 0}}),
+  /* FLAG_GROUP */ base,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ i,
+  /* UPPERCASE_NAME */ RVI,
+  /* FULL_NAME */ "Base integer extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{2, 1, ISA_SPEC_CLASS_20191213},
+			     {2, 1, ISA_SPEC_CLASS_20190608},
+			     {2, 0, ISA_SPEC_CLASS_2P2}}),
+  /* FLAG_GROUP */ base,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 8,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ m,
+  /* UPPERCASE_NAME */ MUL,
+  /* FULL_NAME */ "Integer multiplication and division extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zmmul"}),
+  /* SUPPORTED_VERSIONS */ ({{2, 0}}),
+  /* FLAG_GROUP */ base,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 12,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ a,
+  /* UPPERCASE_NAME */ ATOMIC,
+  /* FULL_NAME */ "Atomic extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zaamo", "zalrsc"}),
+  /* SUPPORTED_VERSIONS */ ({{2, 1, ISA_SPEC_CLASS_20191213},
+			     {2, 0, ISA_SPEC_CLASS_20190608},
+			     {2, 0, ISA_SPEC_CLASS_2P2}}),
+  /* FLAG_GROUP */ base,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 0,
+  /* EXTRA_EXTENSION_FLAGS */ EXT_FLAG_MACRO)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ f,
+  /* UPPERCASE_NAME */ HARD_FLOAT,
+  /* FULL_NAME */ "Single-precision floating-point extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{2, 2, ISA_SPEC_CLASS_20191213},
+			     {2, 2, ISA_SPEC_CLASS_20190608},
+			     {2, 0, ISA_SPEC_CLASS_2P2}}),
+  /* FLAG_GROUP */ base,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 5,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ d,
+  /* UPPERCASE_NAME */ DOUBLE_FLOAT,
+  /* FULL_NAME */ "Double-precision floating-point extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"f", "zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{2, 2, ISA_SPEC_CLASS_20191213},
+			     {2, 2, ISA_SPEC_CLASS_20190608},
+			     {2, 0, ISA_SPEC_CLASS_2P2}}),
+  /* FLAG_GROUP */ base,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 3,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ c,
+  /* UPPERCASE_NAME */ RVC,
+  /* FULL_NAME */ "Compressed extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zca",
+		   {"zcf",
+		    [] (const riscv_subset_list *subset_list) -> bool
+		      {
+			return subset_list->xlen () == 32
+			       && subset_list->lookup ("f");
+		      }},
+		   {"zcd",
+		    [] (const riscv_subset_list *subset_list) -> bool
+		      {
+			return subset_list->lookup ("d");
+		      }}}),
+  /* SUPPORTED_VERSIONS */ ({{2, 0}}),
+  /* FLAG_GROUP */ base,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 2,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ b,
+  /* UPPERCASE_NAME */ RVB,
+  /* FULL_NAME */ "b extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zba", "zbb", "zbs"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ base,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ EXT_FLAG_MACRO)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ v,
+  /* UPPERCASE_NAME */ RVV,
+  /* FULL_NAME */ "Vector extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zvl128b", "zve64d"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ base,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 21,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ h,
+  /* UPPERCASE_NAME */ RVH,
+  /* FULL_NAME */ "Hypervisor extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ base,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zic64b,
+  /* UPPERCASE_NAME */ ZIC64B,
+  /* FULL_NAME */ "Cache block size isf 64 bytes",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zi,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zicbom,
+  /* UPPERCASE_NAME */ ZICBOM,
+  /* FULL_NAME */ "Cache-block management extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zi,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zicbop,
+  /* UPPERCASE_NAME */ ZICBOP,
+  /* FULL_NAME */ "Cache-block prefetch extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zi,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zicboz,
+  /* UPPERCASE_NAME */ ZICBOZ,
+  /* FULL_NAME */ "Cache-block zero extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zi,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 37,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ ziccamoa,
+  /* UPPERCASE_NAME */ ZICCAMOA,
+  /* FULL_NAME */ "Main memory supports all atomics in A",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zi,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ ziccif,
+  /* UPPERCASE_NAME */ ZICCIF,
+  /* FULL_NAME */ "Main memory supports instruction fetch with atomicity requirement",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zi,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zicclsm,
+  /* UPPERCASE_NAME */ ZICCLSM,
+  /* FULL_NAME */ "Main memory supports misaligned loads/stores",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zi,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ ziccrse,
+  /* UPPERCASE_NAME */ ZICCRSE,
+  /* FULL_NAME */ "Main memory supports forward progress on LR/SC sequences",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zi,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zicfilp,
+  /* UPPERCASE_NAME */ ZICFILP,
+  /* FULL_NAME */ "zicfilp extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zi,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zicfiss,
+  /* UPPERCASE_NAME */ ZICFISS,
+  /* FULL_NAME */ "zicfiss extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr", "zimop"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zi,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zicntr,
+  /* UPPERCASE_NAME */ ZICNTR,
+  /* FULL_NAME */ "Standard extension for base counters and timers",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{2, 0}}),
+  /* FLAG_GROUP */ zi,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zicond,
+  /* UPPERCASE_NAME */ ZICOND,
+  /* FULL_NAME */ "Integer conditional operations extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zi,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 38,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zicsr,
+  /* UPPERCASE_NAME */ ZICSR,
+  /* FULL_NAME */ "Control and status register access extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{2, 0}}),
+  /* FLAG_GROUP */ zi,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zifencei,
+  /* UPPERCASE_NAME */ ZIFENCEI,
+  /* FULL_NAME */ "Instruction-fetch fence extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{2, 0}}),
+  /* FLAG_GROUP */ zi,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zihintntl,
+  /* UPPERCASE_NAME */ ZIHINTNTL,
+  /* FULL_NAME */ "Non-temporal locality hints extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zi,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 39,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zihintpause,
+  /* UPPERCASE_NAME */ ZIHINTPAUSE,
+  /* FULL_NAME */ "Pause hint extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{2, 0}}),
+  /* FLAG_GROUP */ zi,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 40,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zihpm,
+  /* UPPERCASE_NAME */ ZIHPM,
+  /* FULL_NAME */ "Standard extension for hardware performance counters",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{2, 0}}),
+  /* FLAG_GROUP */ zi,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zimop,
+  /* UPPERCASE_NAME */ ZIMOP,
+  /* FULL_NAME */ "zimop extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zi,
+  /* BITMASK_GROUP_ID */ 1,
+  /* BITMASK_BIT_POSITION*/ 1,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zilsd,
+  /* UPPERCASE_NAME */ ZILSD,
+  /* FULL_NAME */ "Load/Store pair instructions extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zi,
+  /* BITMASK_GROUP_ID */ 1,
+  /* BITMASK_BIT_POSITION*/ 1,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zmmul,
+  /* UPPERCASE_NAME */ ZMMUL,
+  /* FULL_NAME */ "Integer multiplication extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zm,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ za128rs,
+  /* UPPERCASE_NAME */ ZA128RS,
+  /* FULL_NAME */ "Reservation set size of 128 bytes",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ za,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ za64rs,
+  /* UPPERCASE_NAME */ ZA64RS,
+  /* FULL_NAME */ "Reservation set size of 64 bytes",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ za,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zaamo,
+  /* UPPERCASE_NAME */ ZAAMO,
+  /* FULL_NAME */ "zaamo extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ za,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zabha,
+  /* UPPERCASE_NAME */ ZABHA,
+  /* FULL_NAME */ "zabha extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zaamo"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ za,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zacas,
+  /* UPPERCASE_NAME */ ZACAS,
+  /* FULL_NAME */ "zacas extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zaamo"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ za,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 26,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zalrsc,
+  /* UPPERCASE_NAME */ ZALRSC,
+  /* FULL_NAME */ "zalrsc extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ za,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zawrs,
+  /* UPPERCASE_NAME */ ZAWRS,
+  /* FULL_NAME */ "Wait-on-reservation-set extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zalrsc"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ za,
+  /* BITMASK_GROUP_ID */ 1,
+  /* BITMASK_BIT_POSITION*/ 7,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zama16b,
+  /* UPPERCASE_NAME */ ZAMA16B,
+  /* FULL_NAME */ "Zama16b extension",
+  /* DESC */ "Misaligned loads, stores, and AMOs to main memory regions that do"
+	     " not cross a naturally aligned 16-byte boundary are atomic.",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ za,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zfa,
+  /* UPPERCASE_NAME */ ZFA,
+  /* FULL_NAME */ "Additional floating-point extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"f"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zf,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 34,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zfbfmin,
+  /* UPPERCASE_NAME */ ZFBFMIN,
+  /* FULL_NAME */ "zfbfmin extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zfhmin"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zf,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zfh,
+  /* UPPERCASE_NAME */ ZFH,
+  /* FULL_NAME */ "Half-precision floating-point extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zfhmin"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zf,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 35,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zfhmin,
+  /* UPPERCASE_NAME */ ZFHMIN,
+  /* FULL_NAME */ "Minimal half-precision floating-point extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"f"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zf,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 36,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zfinx,
+  /* UPPERCASE_NAME */ ZFINX,
+  /* FULL_NAME */ "Single-precision floating-point in integer registers extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zinx,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zdinx,
+  /* UPPERCASE_NAME */ ZDINX,
+  /* FULL_NAME */ "Double-precision floating-point in integer registers extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zfinx", "zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zinx,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zca,
+  /* UPPERCASE_NAME */ ZCA,
+  /* FULL_NAME */ "Integer compressed instruction extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({{"c",
+[] (const riscv_subset_list *subset_list) -> bool
+{
+  /* For RV32 Zca implies C for one of these combinations of
+     extensions: Zca, F_Zca_Zcf and FD_Zca_Zcf_Zcd.  */
+  if (subset_list->xlen () == 32)
+    {
+      if (subset_list->lookup ("d"))
+	return subset_list->lookup ("zcf") && subset_list->lookup ("zcd");
+
+      if (subset_list->lookup ("f"))
+	return subset_list->lookup ("zcf");
+
+      return true;
+    }
+
+  /* For RV64 Zca implies C for one of these combinations of
+     extensions: Zca and FD_Zca_Zcd (Zcf is not available
+     for RV64).  */
+  if (subset_list->xlen () == 64)
+    {
+      if (subset_list->lookup ("d"))
+	return subset_list->lookup ("zcd");
+
+      return true;
+    }
+
+  /* Do nothing for future RV128 specification. Behaviour
+     for this case is not yet well defined.  */
+  return false;
+
+}}}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zc,
+  /* BITMASK_GROUP_ID */ 1,
+  /* BITMASK_BIT_POSITION*/ 2,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zcb,
+  /* UPPERCASE_NAME */ ZCB,
+  /* FULL_NAME */ "Simple compressed instruction extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zca"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zc,
+  /* BITMASK_GROUP_ID */ 1,
+  /* BITMASK_BIT_POSITION*/ 3,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zcd,
+  /* UPPERCASE_NAME */ ZCD,
+  /* FULL_NAME */ "Compressed double-precision floating point loads and stores extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zca"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zc,
+  /* BITMASK_GROUP_ID */ 1,
+  /* BITMASK_BIT_POSITION*/ 4,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zce,
+  /* UPPERCASE_NAME */ ZCE,
+  /* FULL_NAME */ "Compressed instruction extensions for embedded processors",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zca", "zcb", "zcmp", "zcmt",
+		  {"zcf",
+		   [] (const riscv_subset_list *subset_list) -> bool
+		     {
+		       return subset_list->xlen () == 32
+			      && subset_list->lookup ("f");
+		     }}}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zc,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zcf,
+  /* UPPERCASE_NAME */ ZCF,
+  /* FULL_NAME */ "Compressed single-precision floating point loads and stores extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zca"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zc,
+  /* BITMASK_GROUP_ID */ 1,
+  /* BITMASK_BIT_POSITION*/ 5,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zcmop,
+  /* UPPERCASE_NAME */ ZCMOP,
+  /* FULL_NAME */ "zcmop extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zca"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zc,
+  /* BITMASK_GROUP_ID */ 1,
+  /* BITMASK_BIT_POSITION*/ 6,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zcmp,
+  /* UPPERCASE_NAME */ ZCMP,
+  /* FULL_NAME */ "Compressed push pop extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zca"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zc,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zcmt,
+  /* UPPERCASE_NAME */ ZCMT,
+  /* FULL_NAME */ "Table jump instruction extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zca", "zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zc,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zclsd,
+  /* UPPERCASE_NAME */ ZCLSD,
+  /* FULL_NAME */ "Compressed load/store pair instructions extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zca", "zilsd"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zc,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zba,
+  /* UPPERCASE_NAME */ ZBA,
+  /* FULL_NAME */ "Address calculation extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zb,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 27,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zbb,
+  /* UPPERCASE_NAME */ ZBB,
+  /* FULL_NAME */ "Basic bit manipulation extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zb,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 28,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zbc,
+  /* UPPERCASE_NAME */ ZBC,
+  /* FULL_NAME */ "Carry-less multiplication extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zb,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 29,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zbkb,
+  /* UPPERCASE_NAME */ ZBKB,
+  /* FULL_NAME */ "Cryptography bit-manipulation extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zb,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 30,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zbkc,
+  /* UPPERCASE_NAME */ ZBKC,
+  /* FULL_NAME */ "Cryptography carry-less multiply extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zb,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 31,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zbkx,
+  /* UPPERCASE_NAME */ ZBKX,
+  /* FULL_NAME */ "Cryptography crossbar permutation extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zb,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 32,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zbs,
+  /* UPPERCASE_NAME */ ZBS,
+  /* FULL_NAME */ "Single-bit operation extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zb,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 33,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zk,
+  /* UPPERCASE_NAME */ ZK,
+  /* FULL_NAME */ "Standard scalar cryptography extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zkn", "zkr", "zkt"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zk,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ EXT_FLAG_MACRO)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zkn,
+  /* UPPERCASE_NAME */ ZKN,
+  /* FULL_NAME */ "NIST algorithm suite extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zbkb", "zbkc", "zbkx", "zkne", "zknd", "zknh"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zk,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ EXT_FLAG_MACRO)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zknd,
+  /* UPPERCASE_NAME */ ZKND,
+  /* FULL_NAME */ "AES Decryption extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zk,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 41,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zkne,
+  /* UPPERCASE_NAME */ ZKNE,
+  /* FULL_NAME */ "AES Encryption extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zk,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 42,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zknh,
+  /* UPPERCASE_NAME */ ZKNH,
+  /* FULL_NAME */ "Hash function extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zk,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 43,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zkr,
+  /* UPPERCASE_NAME */ ZKR,
+  /* FULL_NAME */ "Entropy source extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zk,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zks,
+  /* UPPERCASE_NAME */ ZKS,
+  /* FULL_NAME */ "ShangMi algorithm suite extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zbkb", "zbkc", "zbkx", "zksed", "zksh"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zk,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ EXT_FLAG_MACRO)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zksed,
+  /* UPPERCASE_NAME */ ZKSED,
+  /* FULL_NAME */ "SM4 block cipher extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zk,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 44,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zksh,
+  /* UPPERCASE_NAME */ ZKSH,
+  /* FULL_NAME */ "SM3 hash function extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zk,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 45,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zkt,
+  /* UPPERCASE_NAME */ ZKT,
+  /* FULL_NAME */ "Data independent execution latency extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zk,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 46,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ ztso,
+  /* UPPERCASE_NAME */ ZTSO,
+  /* FULL_NAME */ "Total store ordering extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zt,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 47,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvbb,
+  /* UPPERCASE_NAME */ ZVBB,
+  /* FULL_NAME */ "Vector basic bit-manipulation extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zvkb"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvb,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 48,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvbc,
+  /* UPPERCASE_NAME */ ZVBC,
+  /* FULL_NAME */ "Vector carryless multiplication extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zve64x"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvb,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 49,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zve32f,
+  /* UPPERCASE_NAME */ ZVE32F,
+  /* FULL_NAME */ "Vector extensions for embedded processors",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"f", "zve32x", "zvl32b"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zve,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 61,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zve32x,
+  /* UPPERCASE_NAME */ ZVE32X,
+  /* FULL_NAME */ "Vector extensions for embedded processors",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr", "zvl32b"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zve,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 60,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zve64d,
+  /* UPPERCASE_NAME */ ZVE64D,
+  /* FULL_NAME */ "Vector extensions for embedded processors",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"d", "zve64f", "zvl64b"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zve,
+  /* BITMASK_GROUP_ID */ 1,
+  /* BITMASK_BIT_POSITION*/ 0,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zve64f,
+  /* UPPERCASE_NAME */ ZVE64F,
+  /* FULL_NAME */ "Vector extensions for embedded processors",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"f", "zve32f", "zve64x", "zvl64b"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zve,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 63,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zve64x,
+  /* UPPERCASE_NAME */ ZVE64X,
+  /* FULL_NAME */ "Vector extensions for embedded processors",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zve32x", "zvl64b"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zve,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 62,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvfbfmin,
+  /* UPPERCASE_NAME */ ZVFBFMIN,
+  /* FULL_NAME */ "Vector BF16 converts extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zve32f"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvf,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvfbfwma,
+  /* UPPERCASE_NAME */ ZVFBFWMA,
+  /* FULL_NAME */ "zvfbfwma extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zvfbfmin", "zfbfmin"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvf,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvfh,
+  /* UPPERCASE_NAME */ ZVFH,
+  /* FULL_NAME */ "Vector half-precision floating-point extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zve32f", "zfhmin"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvf,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 50,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvfhmin,
+  /* UPPERCASE_NAME */ ZVFHMIN,
+  /* FULL_NAME */ "Vector minimal half-precision floating-point extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zve32f"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvf,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 51,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvkb,
+  /* UPPERCASE_NAME */ ZVKB,
+  /* FULL_NAME */ "Vector cryptography bit-manipulation extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zve32x"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvk,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 52,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvkg,
+  /* UPPERCASE_NAME */ ZVKG,
+  /* FULL_NAME */ "Vector GCM/GMAC extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zve32x"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvk,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 53,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvkn,
+  /* UPPERCASE_NAME */ ZVKN,
+  /* FULL_NAME */ "Vector NIST Algorithm Suite extension",
+  /* DESC */ "@samp{zvkn} will expand to",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zvkned", "zvknhb", "zvkb", "zvkt"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvk,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ EXT_FLAG_MACRO)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvknc,
+  /* UPPERCASE_NAME */ ZVKNC,
+  /* FULL_NAME */ "Vector NIST Algorithm Suite with carryless multiply extension, @samp{zvknc}",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zvkn", "zvbc"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvk,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ EXT_FLAG_MACRO)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvkned,
+  /* UPPERCASE_NAME */ ZVKNED,
+  /* FULL_NAME */ "Vector AES block cipher extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zve32x"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvk,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 54,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvkng,
+  /* UPPERCASE_NAME */ ZVKNG,
+  /* FULL_NAME */ "Vector NIST Algorithm Suite with GCM extension, @samp{zvkng} will expand",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zvkn", "zvkg"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvk,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ EXT_FLAG_MACRO)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvknha,
+  /* UPPERCASE_NAME */ ZVKNHA,
+  /* FULL_NAME */ "Vector SHA-2 secure hash extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zve32x"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvk,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 55,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvknhb,
+  /* UPPERCASE_NAME */ ZVKNHB,
+  /* FULL_NAME */ "Vector SHA-2 secure hash extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zve64x"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvk,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 56,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvks,
+  /* UPPERCASE_NAME */ ZVKS,
+  /* FULL_NAME */ "Vector ShangMi algorithm suite extension, @samp{zvks} will expand",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zvksed", "zvksh", "zvkb", "zvkt"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvk,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ EXT_FLAG_MACRO)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvksc,
+  /* UPPERCASE_NAME */ ZVKSC,
+  /* FULL_NAME */ "Vector ShangMi algorithm suite with carryless multiplication extension,",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zvks", "zvbc"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvk,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ EXT_FLAG_MACRO)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvksed,
+  /* UPPERCASE_NAME */ ZVKSED,
+  /* FULL_NAME */ "Vector SM4 Block Cipher extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zve32x"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvk,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 57,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvksg,
+  /* UPPERCASE_NAME */ ZVKSG,
+  /* FULL_NAME */ "Vector ShangMi algorithm suite with GCM extension, @samp{zvksg} will expand",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zvks", "zvkg"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvk,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ EXT_FLAG_MACRO)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvksh,
+  /* UPPERCASE_NAME */ ZVKSH,
+  /* FULL_NAME */ "Vector SM3 Secure Hash extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zve32x"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvk,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 58,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvkt,
+  /* UPPERCASE_NAME */ ZVKT,
+  /* FULL_NAME */ "Vector data independent execution latency extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvk,
+  /* BITMASK_GROUP_ID */ 0,
+  /* BITMASK_BIT_POSITION*/ 59,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvl1024b,
+  /* UPPERCASE_NAME */ ZVL1024B,
+  /* FULL_NAME */ "Minimum vector length standard extensions",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zvl512b"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvl,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvl128b,
+  /* UPPERCASE_NAME */ ZVL128B,
+  /* FULL_NAME */ "Minimum vector length standard extensions",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zvl64b"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvl,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvl16384b,
+  /* UPPERCASE_NAME */ ZVL16384B,
+  /* FULL_NAME */ "zvl16384b extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zvl8192b"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvl,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvl2048b,
+  /* UPPERCASE_NAME */ ZVL2048B,
+  /* FULL_NAME */ "Minimum vector length standard extensions",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zvl1024b"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvl,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvl256b,
+  /* UPPERCASE_NAME */ ZVL256B,
+  /* FULL_NAME */ "Minimum vector length standard extensions",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zvl128b"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvl,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvl32768b,
+  /* UPPERCASE_NAME */ ZVL32768B,
+  /* FULL_NAME */ "zvl32768b extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zvl16384b"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvl,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvl32b,
+  /* UPPERCASE_NAME */ ZVL32B,
+  /* FULL_NAME */ "Minimum vector length standard extensions",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvl,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvl4096b,
+  /* UPPERCASE_NAME */ ZVL4096B,
+  /* FULL_NAME */ "Minimum vector length standard extensions",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zvl2048b"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvl,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvl512b,
+  /* UPPERCASE_NAME */ ZVL512B,
+  /* FULL_NAME */ "Minimum vector length standard extensions",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zvl256b"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvl,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvl64b,
+  /* UPPERCASE_NAME */ ZVL64B,
+  /* FULL_NAME */ "Minimum vector length standard extensions",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zvl32b"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvl,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvl65536b,
+  /* UPPERCASE_NAME */ ZVL65536B,
+  /* FULL_NAME */ "zvl65536b extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zvl32768b"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvl,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zvl8192b,
+  /* UPPERCASE_NAME */ ZVL8192B,
+  /* FULL_NAME */ "zvl8192b extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zvl4096b"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zvl,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zhinx,
+  /* UPPERCASE_NAME */ ZHINX,
+  /* FULL_NAME */ "Half-precision floating-point in integer registers extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zhinxmin"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zinx,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ zhinxmin,
+  /* UPPERCASE_NAME */ ZHINXMIN,
+  /* FULL_NAME */ "Minimal half-precision floating-point in integer registers extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zfinx"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ zinx,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ sdtrig,
+  /* UPPERCASE_NAME */ SDTRIG,
+  /* FULL_NAME */ "sdtrig extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ sd,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ sha,
+  /* UPPERCASE_NAME */ SHA,
+  /* FULL_NAME */ "The augmented hypervisor extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"h", "shcounterenw", "shgatpa", "shtvala", "shvstvala", "shvstvecd", "shvsatpa", "ssstateen"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ sh,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ shcounterenw,
+  /* UPPERCASE_NAME */ SHCOUNTERENW,
+  /* FULL_NAME */ "Support writeable enables for any supported counter",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"h", "zihpm"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ sh,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ shgatpa,
+  /* UPPERCASE_NAME */ SHGATPA,
+  /* FULL_NAME */ "SvNNx4 mode supported for all modes supported by satp",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"h", "ssstateen"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ sh,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ shlcofideleg,
+  /* UPPERCASE_NAME */ SHLCOFIDELEG,
+  /* FULL_NAME */ "Delegating LCOFI interrupts to VS-mode",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"h"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ sh,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ shtvala,
+  /* UPPERCASE_NAME */ SHTVALA,
+  /* FULL_NAME */ "The htval register provides all needed values",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"h"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ sh,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ shvstvala,
+  /* UPPERCASE_NAME */ SHVSTVALA,
+  /* FULL_NAME */ "The vstval register provides all needed values",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"h"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ sh,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ shvstvecd,
+  /* UPPERCASE_NAME */ SHVSTVECD,
+  /* FULL_NAME */ "The vstvec register supports Direct mode",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"h"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ sh,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ shvsatpa,
+  /* UPPERCASE_NAME */ SHVSATPA,
+  /* FULL_NAME */ "The vsatp register supports all modes supported by satp",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"h"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ sh,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ smaia,
+  /* UPPERCASE_NAME */ SMAIA,
+  /* FULL_NAME */ "Advanced interrupt architecture extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"ssaia"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ sm,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ smcntrpmf,
+  /* UPPERCASE_NAME */ SMCNTRPMF,
+  /* FULL_NAME */ "Cycle and instret privilege mode filtering",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ sm,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ smcsrind,
+  /* UPPERCASE_NAME */ SMCSRIND,
+  /* FULL_NAME */ "Machine-Level Indirect CSR Access",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr", "sscsrind"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ sm,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ smepmp,
+  /* UPPERCASE_NAME */ SMEPMP,
+  /* FULL_NAME */ "PMP Enhancements for memory access and execution prevention on Machine mode",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ sm,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ smmpm,
+  /* UPPERCASE_NAME */ SMMPM,
+  /* FULL_NAME */ "smmpm extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ sm,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ smnpm,
+  /* UPPERCASE_NAME */ SMNPM,
+  /* FULL_NAME */ "smnpm extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ sm,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ smrnmi,
+  /* UPPERCASE_NAME */ SMRNMI,
+  /* FULL_NAME */ "Resumable non-maskable interrupts",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ sm,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ smstateen,
+  /* UPPERCASE_NAME */ SMSTATEEN,
+  /* FULL_NAME */ "State enable extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"ssstateen"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ sm,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ smdbltrp,
+  /* UPPERCASE_NAME */ SMDBLTRP,
+  /* FULL_NAME */ "Double Trap Extensions",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ sm,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ ssaia,
+  /* UPPERCASE_NAME */ SSAIA,
+  /* FULL_NAME */ "Advanced interrupt architecture extension for supervisor-mode",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ ss,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ ssccptr,
+  /* UPPERCASE_NAME */ SSCCPTR,
+  /* FULL_NAME */ "Main memory supports page table reads",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ ss,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ sscofpmf,
+  /* UPPERCASE_NAME */ SSCOFPMF,
+  /* FULL_NAME */ "Count overflow & filtering extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ ss,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ sscounterenw,
+  /* UPPERCASE_NAME */ SSCOUNTERENW,
+  /* FULL_NAME */ "Support writeable enables for any supported counter",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ ss,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ sscsrind,
+  /* UPPERCASE_NAME */ SSCSRIND,
+  /* FULL_NAME */ "Supervisor-Level Indirect CSR Access",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ ss,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ ssnpm,
+  /* UPPERCASE_NAME */ SSNPM,
+  /* FULL_NAME */ "ssnpm extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ ss,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ sspm,
+  /* UPPERCASE_NAME */ SSPM,
+  /* FULL_NAME */ "sspm extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ ss,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ ssstateen,
+  /* UPPERCASE_NAME */ SSSTATEEN,
+  /* FULL_NAME */ "State-enable extension for supervisor-mode",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ ss,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ sstc,
+  /* UPPERCASE_NAME */ SSTC,
+  /* FULL_NAME */ "Supervisor-mode timer interrupts extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ ss,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ sstvala,
+  /* UPPERCASE_NAME */ SSTVALA,
+  /* FULL_NAME */ "Stval provides all needed values",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ ss,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ sstvecd,
+  /* UPPERCASE_NAME */ SSTVECD,
+  /* FULL_NAME */ "Stvec supports Direct mode",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ ss,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ ssstrict,
+  /* UPPERCASE_NAME */ SSSTRICT,
+  /* FULL_NAME */ "ssstrict extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ ss,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ ssdbltrp,
+  /* UPPERCASE_NAME */ SSDBLTRP,
+  /* FULL_NAME */ "Double Trap Extensions",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ ss,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ ssu64xl,
+  /* UPPERCASE_NAME */ SSU64XL,
+  /* FULL_NAME */ "UXLEN=64 must be supported",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ ss,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ supm,
+  /* UPPERCASE_NAME */ SUPM,
+  /* FULL_NAME */ "supm extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ su,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ svinval,
+  /* UPPERCASE_NAME */ SVINVAL,
+  /* FULL_NAME */ "Fine-grained address-translation cache invalidation extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ sv,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ svnapot,
+  /* UPPERCASE_NAME */ SVNAPOT,
+  /* FULL_NAME */ "NAPOT translation contiguity extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ sv,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ svpbmt,
+  /* UPPERCASE_NAME */ SVPBMT,
+  /* FULL_NAME */ "Page-based memory types extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ sv,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ svvptc,
+  /* UPPERCASE_NAME */ SVVPTC,
+  /* FULL_NAME */ "svvptc extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ sv,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ svadu,
+  /* UPPERCASE_NAME */ SVADU,
+  /* FULL_NAME */ "Hardware Updating of A/D Bits extension",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ sv,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ svade,
+  /* UPPERCASE_NAME */ SVADE,
+  /* FULL_NAME */ "Cause exception when hardware updating of A/D bits is disabled",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ sv,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+DEFINE_RISCV_EXT(
+  /* NAME */ svbare,
+  /* UPPERCASE_NAME */ SVBARE,
+  /* FULL_NAME */ "Satp mode bare is supported",
+  /* DESC */ "",
+  /* URL */ ,
+  /* DEP_EXTS */ ({"zicsr"}),
+  /* SUPPORTED_VERSIONS */ ({{1, 0}}),
+  /* FLAG_GROUP */ sv,
+  /* BITMASK_GROUP_ID */ BITMASK_NOT_YET_ALLOCATED,
+  /* BITMASK_BIT_POSITION*/ BITMASK_NOT_YET_ALLOCATED,
+  /* EXTRA_EXTENSION_FLAGS */ 0)
+
+#include "riscv-ext-corev.def"
+#include "riscv-ext-sifive.def"
+#include "riscv-ext-thead.def"
+#include "riscv-ext-ventana.def"
diff --git a/gcc/config/riscv/riscv-ext.opt b/gcc/config/riscv/riscv-ext.opt
new file mode 100644
index 0000000..9f8c545
--- /dev/null
+++ b/gcc/config/riscv/riscv-ext.opt
@@ -0,0 +1,447 @@
+; Target options for the RISC-V port of the compiler
+;
+; Copyright (C) 2025 Free Software Foundation, Inc.
+;
+; This file is part of GCC.
+;
+; GCC is free software; you can redistribute it and/or modify it under
+; the terms of the GNU General Public License as published by the Free
+; Software Foundation; either version 3, or (at your option) any later
+; version.
+;
+; GCC is distributed in the hope that it will be useful, but WITHOUT
+; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+; or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+; License for more details.
+;
+; You should have received a copy of the GNU General Public License
+; along with GCC; see the file COPYING3.  If not see 
+; <http://www.gnu.org/licenses/>.
+; This file is generated automatically using
+;  gcc/config/riscv/gen-riscv-ext-opt.cc from:
+;       gcc/config/riscv/riscv-ext.def
+
+; Please *DO NOT* edit manually.
+TargetVariable
+int riscv_base_subext
+
+TargetVariable
+int riscv_sd_subext
+
+TargetVariable
+int riscv_sh_subext
+
+TargetVariable
+int riscv_sm_subext
+
+TargetVariable
+int riscv_ss_subext
+
+TargetVariable
+int riscv_su_subext
+
+TargetVariable
+int riscv_sv_subext
+
+TargetVariable
+int riscv_xcv_subext
+
+TargetVariable
+int riscv_xsf_subext
+
+TargetVariable
+int riscv_xthead_subext
+
+TargetVariable
+int riscv_xventana_subext
+
+TargetVariable
+int riscv_za_subext
+
+TargetVariable
+int riscv_zb_subext
+
+TargetVariable
+int riscv_zc_subext
+
+TargetVariable
+int riscv_zf_subext
+
+TargetVariable
+int riscv_zi_subext
+
+TargetVariable
+int riscv_zinx_subext
+
+TargetVariable
+int riscv_zk_subext
+
+TargetVariable
+int riscv_zm_subext
+
+TargetVariable
+int riscv_zt_subext
+
+TargetVariable
+int riscv_zvb_subext
+
+TargetVariable
+int riscv_zve_subext
+
+TargetVariable
+int riscv_zvf_subext
+
+TargetVariable
+int riscv_zvk_subext
+
+TargetVariable
+int riscv_zvl_subext
+
+Mask(RVE) Var(riscv_base_subext)
+
+Mask(RVI) Var(riscv_base_subext)
+
+Mask(MUL) Var(riscv_base_subext)
+
+Mask(ATOMIC) Var(riscv_base_subext)
+
+Mask(HARD_FLOAT) Var(riscv_base_subext)
+
+Mask(DOUBLE_FLOAT) Var(riscv_base_subext)
+
+Mask(RVC) Var(riscv_base_subext)
+
+Mask(RVB) Var(riscv_base_subext)
+
+Mask(RVV) Var(riscv_base_subext)
+
+Mask(RVH) Var(riscv_base_subext)
+
+Mask(ZIC64B) Var(riscv_zi_subext)
+
+Mask(ZICBOM) Var(riscv_zi_subext)
+
+Mask(ZICBOP) Var(riscv_zi_subext)
+
+Mask(ZICBOZ) Var(riscv_zi_subext)
+
+Mask(ZICCAMOA) Var(riscv_zi_subext)
+
+Mask(ZICCIF) Var(riscv_zi_subext)
+
+Mask(ZICCLSM) Var(riscv_zi_subext)
+
+Mask(ZICCRSE) Var(riscv_zi_subext)
+
+Mask(ZICFILP) Var(riscv_zi_subext)
+
+Mask(ZICFISS) Var(riscv_zi_subext)
+
+Mask(ZICNTR) Var(riscv_zi_subext)
+
+Mask(ZICOND) Var(riscv_zi_subext)
+
+Mask(ZICSR) Var(riscv_zi_subext)
+
+Mask(ZIFENCEI) Var(riscv_zi_subext)
+
+Mask(ZIHINTNTL) Var(riscv_zi_subext)
+
+Mask(ZIHINTPAUSE) Var(riscv_zi_subext)
+
+Mask(ZIHPM) Var(riscv_zi_subext)
+
+Mask(ZIMOP) Var(riscv_zi_subext)
+
+Mask(ZILSD) Var(riscv_zi_subext)
+
+Mask(ZMMUL) Var(riscv_zm_subext)
+
+Mask(ZA128RS) Var(riscv_za_subext)
+
+Mask(ZA64RS) Var(riscv_za_subext)
+
+Mask(ZAAMO) Var(riscv_za_subext)
+
+Mask(ZABHA) Var(riscv_za_subext)
+
+Mask(ZACAS) Var(riscv_za_subext)
+
+Mask(ZALRSC) Var(riscv_za_subext)
+
+Mask(ZAWRS) Var(riscv_za_subext)
+
+Mask(ZAMA16B) Var(riscv_za_subext)
+
+Mask(ZFA) Var(riscv_zf_subext)
+
+Mask(ZFBFMIN) Var(riscv_zf_subext)
+
+Mask(ZFH) Var(riscv_zf_subext)
+
+Mask(ZFHMIN) Var(riscv_zf_subext)
+
+Mask(ZFINX) Var(riscv_zinx_subext)
+
+Mask(ZDINX) Var(riscv_zinx_subext)
+
+Mask(ZCA) Var(riscv_zc_subext)
+
+Mask(ZCB) Var(riscv_zc_subext)
+
+Mask(ZCD) Var(riscv_zc_subext)
+
+Mask(ZCE) Var(riscv_zc_subext)
+
+Mask(ZCF) Var(riscv_zc_subext)
+
+Mask(ZCMOP) Var(riscv_zc_subext)
+
+Mask(ZCMP) Var(riscv_zc_subext)
+
+Mask(ZCMT) Var(riscv_zc_subext)
+
+Mask(ZCLSD) Var(riscv_zc_subext)
+
+Mask(ZBA) Var(riscv_zb_subext)
+
+Mask(ZBB) Var(riscv_zb_subext)
+
+Mask(ZBC) Var(riscv_zb_subext)
+
+Mask(ZBKB) Var(riscv_zb_subext)
+
+Mask(ZBKC) Var(riscv_zb_subext)
+
+Mask(ZBKX) Var(riscv_zb_subext)
+
+Mask(ZBS) Var(riscv_zb_subext)
+
+Mask(ZK) Var(riscv_zk_subext)
+
+Mask(ZKN) Var(riscv_zk_subext)
+
+Mask(ZKND) Var(riscv_zk_subext)
+
+Mask(ZKNE) Var(riscv_zk_subext)
+
+Mask(ZKNH) Var(riscv_zk_subext)
+
+Mask(ZKR) Var(riscv_zk_subext)
+
+Mask(ZKS) Var(riscv_zk_subext)
+
+Mask(ZKSED) Var(riscv_zk_subext)
+
+Mask(ZKSH) Var(riscv_zk_subext)
+
+Mask(ZKT) Var(riscv_zk_subext)
+
+Mask(ZTSO) Var(riscv_zt_subext)
+
+Mask(ZVBB) Var(riscv_zvb_subext)
+
+Mask(ZVBC) Var(riscv_zvb_subext)
+
+Mask(ZVE32F) Var(riscv_zve_subext)
+
+Mask(ZVE32X) Var(riscv_zve_subext)
+
+Mask(ZVE64D) Var(riscv_zve_subext)
+
+Mask(ZVE64F) Var(riscv_zve_subext)
+
+Mask(ZVE64X) Var(riscv_zve_subext)
+
+Mask(ZVFBFMIN) Var(riscv_zvf_subext)
+
+Mask(ZVFBFWMA) Var(riscv_zvf_subext)
+
+Mask(ZVFH) Var(riscv_zvf_subext)
+
+Mask(ZVFHMIN) Var(riscv_zvf_subext)
+
+Mask(ZVKB) Var(riscv_zvk_subext)
+
+Mask(ZVKG) Var(riscv_zvk_subext)
+
+Mask(ZVKN) Var(riscv_zvk_subext)
+
+Mask(ZVKNC) Var(riscv_zvk_subext)
+
+Mask(ZVKNED) Var(riscv_zvk_subext)
+
+Mask(ZVKNG) Var(riscv_zvk_subext)
+
+Mask(ZVKNHA) Var(riscv_zvk_subext)
+
+Mask(ZVKNHB) Var(riscv_zvk_subext)
+
+Mask(ZVKS) Var(riscv_zvk_subext)
+
+Mask(ZVKSC) Var(riscv_zvk_subext)
+
+Mask(ZVKSED) Var(riscv_zvk_subext)
+
+Mask(ZVKSG) Var(riscv_zvk_subext)
+
+Mask(ZVKSH) Var(riscv_zvk_subext)
+
+Mask(ZVKT) Var(riscv_zvk_subext)
+
+Mask(ZVL1024B) Var(riscv_zvl_subext)
+
+Mask(ZVL128B) Var(riscv_zvl_subext)
+
+Mask(ZVL16384B) Var(riscv_zvl_subext)
+
+Mask(ZVL2048B) Var(riscv_zvl_subext)
+
+Mask(ZVL256B) Var(riscv_zvl_subext)
+
+Mask(ZVL32768B) Var(riscv_zvl_subext)
+
+Mask(ZVL32B) Var(riscv_zvl_subext)
+
+Mask(ZVL4096B) Var(riscv_zvl_subext)
+
+Mask(ZVL512B) Var(riscv_zvl_subext)
+
+Mask(ZVL64B) Var(riscv_zvl_subext)
+
+Mask(ZVL65536B) Var(riscv_zvl_subext)
+
+Mask(ZVL8192B) Var(riscv_zvl_subext)
+
+Mask(ZHINX) Var(riscv_zinx_subext)
+
+Mask(ZHINXMIN) Var(riscv_zinx_subext)
+
+Mask(SDTRIG) Var(riscv_sd_subext)
+
+Mask(SHA) Var(riscv_sh_subext)
+
+Mask(SHCOUNTERENW) Var(riscv_sh_subext)
+
+Mask(SHGATPA) Var(riscv_sh_subext)
+
+Mask(SHLCOFIDELEG) Var(riscv_sh_subext)
+
+Mask(SHTVALA) Var(riscv_sh_subext)
+
+Mask(SHVSTVALA) Var(riscv_sh_subext)
+
+Mask(SHVSTVECD) Var(riscv_sh_subext)
+
+Mask(SHVSATPA) Var(riscv_sh_subext)
+
+Mask(SMAIA) Var(riscv_sm_subext)
+
+Mask(SMCNTRPMF) Var(riscv_sm_subext)
+
+Mask(SMCSRIND) Var(riscv_sm_subext)
+
+Mask(SMEPMP) Var(riscv_sm_subext)
+
+Mask(SMMPM) Var(riscv_sm_subext)
+
+Mask(SMNPM) Var(riscv_sm_subext)
+
+Mask(SMRNMI) Var(riscv_sm_subext)
+
+Mask(SMSTATEEN) Var(riscv_sm_subext)
+
+Mask(SMDBLTRP) Var(riscv_sm_subext)
+
+Mask(SSAIA) Var(riscv_ss_subext)
+
+Mask(SSCCPTR) Var(riscv_ss_subext)
+
+Mask(SSCOFPMF) Var(riscv_ss_subext)
+
+Mask(SSCOUNTERENW) Var(riscv_ss_subext)
+
+Mask(SSCSRIND) Var(riscv_ss_subext)
+
+Mask(SSNPM) Var(riscv_ss_subext)
+
+Mask(SSPM) Var(riscv_ss_subext)
+
+Mask(SSSTATEEN) Var(riscv_ss_subext)
+
+Mask(SSTC) Var(riscv_ss_subext)
+
+Mask(SSTVALA) Var(riscv_ss_subext)
+
+Mask(SSTVECD) Var(riscv_ss_subext)
+
+Mask(SSSTRICT) Var(riscv_ss_subext)
+
+Mask(SSDBLTRP) Var(riscv_ss_subext)
+
+Mask(SSU64XL) Var(riscv_ss_subext)
+
+Mask(SUPM) Var(riscv_su_subext)
+
+Mask(SVINVAL) Var(riscv_sv_subext)
+
+Mask(SVNAPOT) Var(riscv_sv_subext)
+
+Mask(SVPBMT) Var(riscv_sv_subext)
+
+Mask(SVVPTC) Var(riscv_sv_subext)
+
+Mask(SVADU) Var(riscv_sv_subext)
+
+Mask(SVADE) Var(riscv_sv_subext)
+
+Mask(SVBARE) Var(riscv_sv_subext)
+
+Mask(XCVALU) Var(riscv_xcv_subext)
+
+Mask(XCVBI) Var(riscv_xcv_subext)
+
+Mask(XCVELW) Var(riscv_xcv_subext)
+
+Mask(XCVMAC) Var(riscv_xcv_subext)
+
+Mask(XCVSIMD) Var(riscv_xcv_subext)
+
+Mask(XSFCEASE) Var(riscv_xsf_subext)
+
+Mask(XSFVCP) Var(riscv_xsf_subext)
+
+Mask(XSFVFNRCLIPXFQF) Var(riscv_xsf_subext)
+
+Mask(XSFVQMACCDOD) Var(riscv_xsf_subext)
+
+Mask(XSFVQMACCQOQ) Var(riscv_xsf_subext)
+
+Mask(XTHEADBA) Var(riscv_xthead_subext)
+
+Mask(XTHEADBB) Var(riscv_xthead_subext)
+
+Mask(XTHEADBS) Var(riscv_xthead_subext)
+
+Mask(XTHEADCMO) Var(riscv_xthead_subext)
+
+Mask(XTHEADCONDMOV) Var(riscv_xthead_subext)
+
+Mask(XTHEADFMEMIDX) Var(riscv_xthead_subext)
+
+Mask(XTHEADFMV) Var(riscv_xthead_subext)
+
+Mask(XTHEADINT) Var(riscv_xthead_subext)
+
+Mask(XTHEADMAC) Var(riscv_xthead_subext)
+
+Mask(XTHEADMEMIDX) Var(riscv_xthead_subext)
+
+Mask(XTHEADMEMPAIR) Var(riscv_xthead_subext)
+
+Mask(XTHEADSYNC) Var(riscv_xthead_subext)
+
+Mask(XTHEADVECTOR) Var(riscv_xthead_subext)
+
+Mask(XVENTANACONDOPS) Var(riscv_xventana_subext)
+
diff --git a/gcc/config/riscv/riscv-ext.opt.urls b/gcc/config/riscv/riscv-ext.opt.urls
new file mode 100644
index 0000000..c4f4710
--- /dev/null
+++ b/gcc/config/riscv/riscv-ext.opt.urls
@@ -0,0 +1,2 @@
+; Autogenerated by regenerate-opt-urls.py from gcc/config/riscv/riscv-ext.opt and generated HTML
+
diff --git a/gcc/config/riscv/riscv-feature-bits.h b/gcc/config/riscv/riscv-feature-bits.h
new file mode 100644
index 0000000..7ef7a2a
--- /dev/null
+++ b/gcc/config/riscv/riscv-feature-bits.h
@@ -0,0 +1,37 @@
+/* Definition of RISC-V feature bits corresponding to
+   libgcc/config/riscv/feature_bits.c
+   Copyright (C) 2024-2025 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_RISCV_FEATURE_BITS_H
+#define GCC_RISCV_FEATURE_BITS_H
+
+#define RISCV_FEATURE_BITS_LENGTH 2
+
+struct riscv_feature_bits {
+  unsigned length;
+  unsigned long long features[RISCV_FEATURE_BITS_LENGTH];
+};
+
+struct riscv_cpu_model {
+  unsigned mvendorid;
+  unsigned long long marchid;
+  unsigned long long mimpid;
+};
+
+#endif /* GCC_RISCV_FEATURE_BITS_H */
diff --git a/gcc/config/riscv/riscv-ftypes.def b/gcc/config/riscv/riscv-ftypes.def
index baea490..f7854dd 100644
--- a/gcc/config/riscv/riscv-ftypes.def
+++ b/gcc/config/riscv/riscv-ftypes.def
@@ -1,5 +1,5 @@
 /* Definitions of prototypes for RISC-V built-in functions.  -*- C -*-
-   Copyright (C) 2011-2024 Free Software Foundation, Inc.
+   Copyright (C) 2011-2025 Free Software Foundation, Inc.
    Contributed by Andrew Waterman (andrew@sifive.com).
    Based on MIPS target for GNU compiler.
 
diff --git a/gcc/config/riscv/riscv-modes.def b/gcc/config/riscv/riscv-modes.def
index 970bf66..55f7fd0 100644
--- a/gcc/config/riscv/riscv-modes.def
+++ b/gcc/config/riscv/riscv-modes.def
@@ -1,5 +1,5 @@
 /* Extra machine modes for RISC-V target.
-   Copyright (C) 2011-2024 Free Software Foundation, Inc.
+   Copyright (C) 2011-2025 Free Software Foundation, Inc.
    Contributed by Andrew Waterman (andrew@sifive.com).
    Based on MIPS target for GNU compiler.
 
diff --git a/gcc/config/riscv/riscv-opts.h b/gcc/config/riscv/riscv-opts.h
index 5497d11..e1a820b 100644
--- a/gcc/config/riscv/riscv-opts.h
+++ b/gcc/config/riscv/riscv-opts.h
@@ -1,5 +1,5 @@
 /* Definition of RISC-V target for GNU compiler.
-   Copyright (C) 2016-2024 Free Software Foundation, Inc.
+   Copyright (C) 2016-2025 Free Software Foundation, Inc.
    Contributed by Andrew Waterman (andrew@sifive.com).
 
 This file is part of GCC.
@@ -58,7 +58,8 @@ enum riscv_microarchitecture_type {
   sifive_p400,
   sifive_p600,
   xiangshan,
-  generic_ooo
+  generic_ooo,
+  mips_p8700,
 };
 extern enum riscv_microarchitecture_type riscv_microarchitecture;
 
@@ -136,16 +137,16 @@ enum rvv_vector_bits_enum {
 /* Bit of riscv_zvl_flags will set continually, N-1 bit will set if N-bit is
    set, e.g. MASK_ZVL64B has set then MASK_ZVL32B is set, so we can use
    popcount to calculate the minimal VLEN.  */
-#define TARGET_MIN_VLEN \
-  ((riscv_zvl_flags == 0) \
-   ? 0 \
-   : 32 << (__builtin_popcount (riscv_zvl_flags) - 1))
+#define TARGET_MIN_VLEN                                                        \
+  ((riscv_zvl_subext == 0)                                                     \
+     ? 0                                                                       \
+     : 32 << (__builtin_popcount (riscv_zvl_subext) - 1))
 
 /* Same as TARGET_MIN_VLEN, but take an OPTS as gcc_options.  */
 #define TARGET_MIN_VLEN_OPTS(opts)                                             \
-  ((opts->x_riscv_zvl_flags == 0)                                              \
+  ((opts->x_riscv_zvl_subext == 0)                                             \
      ? 0                                                                       \
-     : 32 << (__builtin_popcount (opts->x_riscv_zvl_flags) - 1))
+     : 32 << (__builtin_popcount (opts->x_riscv_zvl_subext) - 1))
 
 /* The maximum LMUL according to user configuration.  */
 #define TARGET_MAX_LMUL                                                        \
@@ -157,4 +158,20 @@ enum riscv_tls_type {
   TLS_DESCRIPTORS
 };
 
+/* On some microarchitectures, vector segment loads and stores are excessively
+   expensive, so predicate the generation of those instrunctions.  */
+#define TARGET_VECTOR_AUTOVEC_SEGMENT					       \
+  (TARGET_VECTOR && riscv_mautovec_segment)
+
+#define GPR2VR_COST_UNPROVIDED -1
+#define FPR2VR_COST_UNPROVIDED -1
+
+/* Extra extension flags, used for carry extra info for a RISC-V extension.  */
+enum
+{
+  EXT_FLAG_MACRO = 1 << 0,
+};
+
+#define BITMASK_NOT_YET_ALLOCATED -1
+
 #endif /* ! GCC_RISCV_OPTS_H */
diff --git a/gcc/config/riscv/riscv-passes.def b/gcc/config/riscv/riscv-passes.def
index 32b79a7..bc803c4 100644
--- a/gcc/config/riscv/riscv-passes.def
+++ b/gcc/config/riscv/riscv-passes.def
@@ -1,5 +1,5 @@
 /* Declaration of target-specific passes for RISC-V.
-   Copyright (C) 2019-2024 Free Software Foundation, Inc.
+   Copyright (C) 2019-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -20,3 +20,6 @@
 INSERT_PASS_AFTER (pass_rtl_store_motion, 1, pass_shorten_memrefs);
 INSERT_PASS_AFTER (pass_split_all_insns, 1, pass_avlprop);
 INSERT_PASS_BEFORE (pass_fast_rtl_dce, 1, pass_vsetvl);
+INSERT_PASS_BEFORE (pass_shorten_branches, 1, pass_insert_landing_pad);
+INSERT_PASS_AFTER (pass_cse2, 1, pass_vector_permconst);
+
diff --git a/gcc/config/riscv/riscv-protos.h b/gcc/config/riscv/riscv-protos.h
index 926899c..a41c4c2 100644
--- a/gcc/config/riscv/riscv-protos.h
+++ b/gcc/config/riscv/riscv-protos.h
@@ -1,5 +1,5 @@
 /* Definition of RISC-V target for GNU compiler.
-   Copyright (C) 2011-2024 Free Software Foundation, Inc.
+   Copyright (C) 2011-2025 Free Software Foundation, Inc.
    Contributed by Andrew Waterman (andrew@sifive.com).
    Based on MIPS target for GNU compiler.
 
@@ -134,8 +134,15 @@ extern bool
 riscv_zcmp_valid_stack_adj_bytes_p (HOST_WIDE_INT, int);
 extern void riscv_legitimize_poly_move (machine_mode, rtx, rtx, rtx);
 extern void riscv_expand_usadd (rtx, rtx, rtx);
+extern void riscv_expand_ssadd (rtx, rtx, rtx);
 extern void riscv_expand_ussub (rtx, rtx, rtx);
+extern void riscv_expand_sssub (rtx, rtx, rtx);
+extern void riscv_expand_usmul (rtx, rtx, rtx);
 extern void riscv_expand_ustrunc (rtx, rtx);
+extern void riscv_expand_sstrunc (rtx, rtx);
+extern int riscv_register_move_cost (machine_mode, reg_class_t, reg_class_t);
+extern bool synthesize_ior_xor (rtx_code, rtx [3]);
+extern bool synthesize_and (rtx [3]);
 
 #ifdef RTX_CODE
 extern void riscv_expand_int_scc (rtx, enum rtx_code, rtx, rtx, bool *invert_ptr = 0);
@@ -172,6 +179,9 @@ extern bool riscv_reg_frame_related (rtx);
 extern void riscv_split_sum_of_two_s12 (HOST_WIDE_INT, HOST_WIDE_INT *,
 					HOST_WIDE_INT *);
 extern bool riscv_vector_float_type_p (const_tree type);
+extern void generate_reflecting_code_using_brev (rtx *);
+extern void expand_crc_using_clmul (scalar_mode, scalar_mode, rtx *);
+extern void expand_reversed_crc_using_clmul (scalar_mode, scalar_mode, rtx *);
 
 /* Routines implemented in riscv-c.cc.  */
 void riscv_cpu_cpp_builtins (cpp_reader *);
@@ -193,6 +203,9 @@ extern bool riscv_hard_regno_rename_ok (unsigned, unsigned);
 rtl_opt_pass * make_pass_shorten_memrefs (gcc::context *ctxt);
 rtl_opt_pass * make_pass_avlprop (gcc::context *ctxt);
 rtl_opt_pass * make_pass_vsetvl (gcc::context *ctxt);
+rtl_opt_pass * make_pass_insert_landing_pad (gcc::context *ctxt);
+rtl_opt_pass * make_pass_vector_permconst (gcc::context *ctxt);
+
 
 /* Routines implemented in riscv-string.c.  */
 extern bool riscv_expand_block_compare (rtx, rtx, rtx, rtx);
@@ -502,9 +515,9 @@ enum insn_type : unsigned int
 
   /* For vcompress.vm */
   COMPRESS_OP = __NORMAL_OP_TA2 | BINARY_OP_P,
-  /* has merge operand but use ta.  */
+  /* has merge operand but use tu.  */
   COMPRESS_OP_MERGE
-  = HAS_DEST_P | HAS_MERGE_P | TDEFAULT_POLICY_P | BINARY_OP_P,
+  = HAS_DEST_P | HAS_MERGE_P | TU_POLICY_P | BINARY_OP_P,
 
   /* For vslideup.up has merge operand but use ta.  */
   SLIDEUP_OP_MERGE = HAS_DEST_P | HAS_MASK_P | USE_ALL_TRUES_MASK_P
@@ -591,6 +604,7 @@ void emit_vlmax_vsetvl (machine_mode, rtx);
 void emit_hard_vlmax_vsetvl (machine_mode, rtx);
 void emit_vlmax_insn (unsigned, unsigned, rtx *);
 void emit_nonvlmax_insn (unsigned, unsigned, rtx *, rtx);
+void emit_avltype_insn (unsigned, unsigned, rtx *, avl_type, rtx = nullptr);
 void emit_vlmax_insn_lra (unsigned, unsigned, rtx *, rtx);
 enum vlmul_type get_vlmul (machine_mode);
 rtx get_vlmax_rtx (machine_mode);
@@ -621,6 +635,7 @@ enum mask_policy
 enum tail_policy get_prefer_tail_policy ();
 enum mask_policy get_prefer_mask_policy ();
 rtx get_avl_type_rtx (enum avl_type);
+opt_machine_mode get_lmul_mode (scalar_mode, int);
 opt_machine_mode get_vector_mode (scalar_mode, poly_uint64);
 opt_machine_mode get_tuple_mode (machine_mode, unsigned int);
 bool simm5_p (rtx);
@@ -631,7 +646,7 @@ void expand_vec_cmp (rtx, rtx_code, rtx, rtx, rtx = nullptr, rtx = nullptr);
 bool expand_vec_cmp_float (rtx, rtx_code, rtx, rtx, bool);
 void expand_cond_len_unop (unsigned, rtx *);
 void expand_cond_len_binop (unsigned, rtx *);
-void expand_reduction (unsigned, unsigned, rtx *, rtx);
+void expand_reduction (unsigned, unsigned, unsigned, rtx *, rtx);
 void expand_vec_ceil (rtx, rtx, machine_mode, machine_mode);
 void expand_vec_floor (rtx, rtx, machine_mode, machine_mode);
 void expand_vec_nearbyint (rtx, rtx, machine_mode, machine_mode);
@@ -644,11 +659,19 @@ void expand_vec_lround (rtx, rtx, machine_mode, machine_mode, machine_mode);
 void expand_vec_lceil (rtx, rtx, machine_mode, machine_mode);
 void expand_vec_lfloor (rtx, rtx, machine_mode, machine_mode);
 void expand_vec_usadd (rtx, rtx, rtx, machine_mode);
+void expand_vec_ssadd (rtx, rtx, rtx, machine_mode);
 void expand_vec_ussub (rtx, rtx, rtx, machine_mode);
+void expand_vec_sssub (rtx, rtx, rtx, machine_mode);
 void expand_vec_double_ustrunc (rtx, rtx, machine_mode);
+void expand_vec_double_sstrunc (rtx, rtx, machine_mode);
 void expand_vec_quad_ustrunc (rtx, rtx, machine_mode, machine_mode);
+void expand_vec_quad_sstrunc (rtx, rtx, machine_mode, machine_mode);
 void expand_vec_oct_ustrunc (rtx, rtx, machine_mode, machine_mode,
 			     machine_mode);
+void expand_vec_oct_sstrunc (rtx, rtx, machine_mode, machine_mode,
+			     machine_mode);
+void expand_vx_binary_vec_dup_vec (rtx, rtx, rtx, rtx_code, machine_mode);
+void expand_vx_binary_vec_vec_dup (rtx, rtx, rtx, rtx_code, machine_mode);
 #endif
 bool sew64_scalar_helper (rtx *, rtx *, rtx, machine_mode,
 			  bool, void (*)(rtx *, rtx), enum avl_type);
@@ -668,7 +691,7 @@ bool slide1_sew64_helper (int, machine_mode, machine_mode,
 			  machine_mode, rtx *);
 rtx gen_avl_for_scalar_move (rtx);
 void expand_tuple_move (rtx *);
-bool expand_block_move (rtx, rtx, rtx);
+bool expand_block_move (rtx, rtx, rtx, bool);
 machine_mode preferred_simd_mode (scalar_mode);
 machine_mode get_mask_mode (machine_mode);
 void expand_vec_series (rtx, rtx, rtx, rtx = 0);
@@ -690,6 +713,8 @@ bool expand_strcmp (rtx, rtx, rtx, rtx, unsigned HOST_WIDE_INT, bool);
 void emit_vec_extract (rtx, rtx, rtx);
 bool expand_vec_setmem (rtx, rtx, rtx);
 bool expand_vec_cmpmem (rtx, rtx, rtx, rtx);
+void expand_strided_load (machine_mode, rtx *);
+void expand_strided_store (machine_mode, rtx *);
 
 /* Rounding mode bitfield for fixed point VXRM.  */
 enum fixed_point_rounding_mode
@@ -736,7 +761,7 @@ uint8_t get_sew (rtx_insn *);
 enum vlmul_type get_vlmul (rtx_insn *);
 int count_regno_occurrences (rtx_insn *, unsigned int);
 bool imm_avl_p (machine_mode);
-bool can_be_broadcasted_p (rtx);
+bool can_be_broadcast_p (rtx);
 bool gather_scatter_valid_offset_p (machine_mode);
 HOST_WIDE_INT estimated_poly_value (poly_int64, unsigned int);
 bool whole_reg_to_reg_move_p (rtx *, machine_mode, int);
@@ -789,10 +814,15 @@ extern const char *th_output_move (rtx, rtx);
 extern bool th_print_operand_address (FILE *, machine_mode, rtx);
 #endif
 
+extern bool strided_load_broadcast_p (void);
 extern bool riscv_use_divmod_expander (void);
 void riscv_init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int);
 extern bool
 riscv_option_valid_attribute_p (tree, tree, tree, int);
+extern bool
+riscv_option_valid_version_attribute_p (tree, tree, tree, int);
+extern bool
+riscv_process_target_version_attr (tree, location_t);
 extern void
 riscv_override_options_internal (struct gcc_options *);
 extern void riscv_option_override (void);
@@ -813,6 +843,8 @@ struct riscv_tune_info {
 const struct riscv_tune_info *
 riscv_parse_tune (const char *, bool);
 const cpu_vector_cost *get_vector_costs ();
+int get_gr2vr_cost ();
+int get_fr2vr_cost ();
 
 enum
 {
diff --git a/gcc/config/riscv/riscv-scalar-crypto.def b/gcc/config/riscv/riscv-scalar-crypto.def
index d66b0fe..95e70c3 100644
--- a/gcc/config/riscv/riscv-scalar-crypto.def
+++ b/gcc/config/riscv/riscv-scalar-crypto.def
@@ -1,5 +1,5 @@
 /* Builtin functions for RISC-V Scalar Cryptography extensions.
-   Copyright (C) 2023-2024 Free Software Foundation, Inc.
+   Copyright (C) 2023-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/riscv/riscv-selftests.cc b/gcc/config/riscv/riscv-selftests.cc
index 34d01ac..9ca1ffe 100644
--- a/gcc/config/riscv/riscv-selftests.cc
+++ b/gcc/config/riscv/riscv-selftests.cc
@@ -342,9 +342,13 @@ run_broadcast_selftests (void)
 	  expand_vector_broadcast (mode, mem);                                 \
 	  insn = get_last_insn ();                                             \
 	  src = SET_SRC (PATTERN (insn));                                      \
-	  ASSERT_TRUE (MEM_P (XEXP (src, 0)));                                 \
-	  ASSERT_TRUE (                                                        \
-	    rtx_equal_p (src, gen_rtx_VEC_DUPLICATE (mode, XEXP (src, 0))));   \
+	  if (strided_load_broadcast_p ())                                     \
+	    {                                                                  \
+	      ASSERT_TRUE (MEM_P (XEXP (src, 0)));                             \
+	      ASSERT_TRUE (                                                    \
+		rtx_equal_p (src,                                              \
+			     gen_rtx_VEC_DUPLICATE (mode, XEXP (src, 0))));    \
+	    }                                                                  \
 	  end_sequence ();                                                     \
 	  /* Test vmv.v.x or vfmv.v.f.  */                                     \
 	  start_sequence ();                                                   \
diff --git a/gcc/config/riscv/riscv-shorten-memrefs.cc b/gcc/config/riscv/riscv-shorten-memrefs.cc
index 856832a..2e3d9f6 100644
--- a/gcc/config/riscv/riscv-shorten-memrefs.cc
+++ b/gcc/config/riscv/riscv-shorten-memrefs.cc
@@ -1,5 +1,5 @@
 /* Shorten memrefs pass for RISC-V.
-   Copyright (C) 2018-2024 Free Software Foundation, Inc.
+   Copyright (C) 2018-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -189,8 +189,7 @@ pass_shorten_memrefs::transform (regno_map *m, basic_block bb)
 		}
 	    }
 	}
-      rtx_insn *seq = get_insns ();
-      end_sequence ();
+      rtx_insn *seq = end_sequence ();
       emit_insn_before (seq, insn);
     }
 }
diff --git a/gcc/config/riscv/riscv-string.cc b/gcc/config/riscv/riscv-string.cc
index 4bb8bce..9080189 100644
--- a/gcc/config/riscv/riscv-string.cc
+++ b/gcc/config/riscv/riscv-string.cc
@@ -1,5 +1,5 @@
 /* Subroutines used to expand string operations for RISC-V.
-   Copyright (C) 2023-2024 Free Software Foundation, Inc.
+   Copyright (C) 2023-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -804,7 +804,7 @@ static void
 riscv_block_move_straight (rtx dest, rtx src, unsigned HOST_WIDE_INT length,
 			   unsigned HOST_WIDE_INT align)
 {
-  unsigned HOST_WIDE_INT offset, delta;
+  unsigned HOST_WIDE_INT offset = 0, delta;
   unsigned HOST_WIDE_INT bits;
   int i;
   enum machine_mode mode;
@@ -815,20 +815,25 @@ riscv_block_move_straight (rtx dest, rtx src, unsigned HOST_WIDE_INT length,
   mode = mode_for_size (bits, MODE_INT, 0).require ();
   delta = bits / BITS_PER_UNIT;
 
-  /* Allocate a buffer for the temporary registers.  */
-  regs = XALLOCAVEC (rtx, length / delta - 1);
-
-  /* Load as many BITS-sized chunks as possible.  Use a normal load if
-     the source has enough alignment, otherwise use left/right pairs.  */
-  for (offset = 0, i = 0; offset + 2 * delta <= length; offset += delta, i++)
+  if (2 * delta <= length)
     {
-      regs[i] = gen_reg_rtx (mode);
-      riscv_emit_move (regs[i], adjust_address (src, mode, offset));
-    }
+      /* Allocate a buffer for the temporary registers.  */
+      regs = XALLOCAVEC (rtx, length / delta - 1);
+
+      /* Load as many BITS-sized chunks as possible.  Use a normal load if
+	 the source has enough alignment, otherwise use left/right pairs.  */
+      for (offset = 0, i = 0; offset + 2 * delta <= length;
+	   offset += delta, i++)
+	{
+	  regs[i] = gen_reg_rtx (mode);
+	  riscv_emit_move (regs[i], adjust_address (src, mode, offset));
+	}
 
-  /* Copy the chunks to the destination.  */
-  for (offset = 0, i = 0; offset + 2 * delta <= length; offset += delta, i++)
-    riscv_emit_move (adjust_address (dest, mode, offset), regs[i]);
+      /* Copy the chunks to the destination.  */
+      for (offset = 0, i = 0; offset + 2 * delta <= length;
+	   offset += delta, i++)
+	riscv_emit_move (adjust_address (dest, mode, offset), regs[i]);
+    }
 
   /* Mop up any left-over bytes.  */
   if (offset < length)
@@ -966,7 +971,7 @@ riscv_expand_block_move_scalar (rtx dest, rtx src, rtx length)
 
 /* This function delegates block-move expansion to either the vector
    implementation or the scalar one.  Return TRUE if successful or FALSE
-   otherwise.  */
+   otherwise.  Assume that the memory regions do not overlap.  */
 
 bool
 riscv_expand_block_move (rtx dest, rtx src, rtx length)
@@ -974,7 +979,7 @@ riscv_expand_block_move (rtx dest, rtx src, rtx length)
   if ((TARGET_VECTOR && !TARGET_XTHEADVECTOR)
       && stringop_strategy & STRATEGY_VECTOR)
     {
-      bool ok = riscv_vector::expand_block_move (dest, src, length);
+      bool ok = riscv_vector::expand_block_move (dest, src, length, false);
       if (ok)
 	return true;
     }
@@ -1051,57 +1056,54 @@ riscv_expand_block_clear (rtx dest, rtx length)
 
 namespace riscv_vector {
 
-/* Used by cpymemsi in riscv.md .  */
+struct stringop_info {
+  rtx avl;
+  bool need_loop;
+  machine_mode vmode;
+};
 
-bool
-expand_block_move (rtx dst_in, rtx src_in, rtx length_in)
-{
-  /*
-    memcpy:
-	mv a3, a0                       # Copy destination
-    loop:
-	vsetvli t0, a2, e8, m8, ta, ma  # Vectors of 8b
-	vle8.v v0, (a1)                 # Load bytes
-	add a1, a1, t0                  # Bump pointer
-	sub a2, a2, t0                  # Decrement count
-	vse8.v v0, (a3)                 # Store bytes
-	add a3, a3, t0                  # Bump pointer
-	bnez a2, loop                   # Any more?
-	ret                             # Return
-  */
-  gcc_assert (TARGET_VECTOR);
+/* If a vectorized stringop should be used populate INFO and return TRUE.
+   Otherwise return false and leave INFO unchanged.
 
-  HOST_WIDE_INT potential_ew
-    = (MIN (MIN (MEM_ALIGN (src_in), MEM_ALIGN (dst_in)), BITS_PER_WORD)
-       / BITS_PER_UNIT);
-  machine_mode vmode = VOIDmode;
+   MAX_EW is the maximum element width that the caller wants to use and
+   LENGTH_IN is the length of the stringop in bytes.
+
+   This is currently used for cpymem and setmem.  If expand_vec_cmpmem switches
+   to using it too then check_vectorise_memory_operation can be removed.
+*/
+
+static bool
+use_vector_stringop_p (struct stringop_info &info, HOST_WIDE_INT max_ew,
+		       rtx length_in)
+{
   bool need_loop = true;
-  bool size_p = optimize_function_for_size_p (cfun);
-  rtx src, dst;
-  rtx end = gen_reg_rtx (Pmode);
-  rtx vec;
-  rtx length_rtx = length_in;
+  machine_mode vmode = VOIDmode;
+  /* The number of elements in the stringop.  */
+  rtx avl = length_in;
+  HOST_WIDE_INT potential_ew = max_ew;
+
+  if (!TARGET_VECTOR || !(stringop_strategy & STRATEGY_VECTOR))
+    return false;
 
   if (CONST_INT_P (length_in))
     {
       HOST_WIDE_INT length = INTVAL (length_in);
 
-    /* By using LMUL=8, we can copy as many bytes in one go as there
-       are bits in a vector register.  If the entire block thus fits,
-       we don't need a loop.  */
-    if (length <= TARGET_MIN_VLEN)
-      {
-	need_loop = false;
+      /* If the VLEN and preferred LMUL allow the entire block to be copied in
+	 one go then no loop is needed.  */
+      if (known_le (length, BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL))
+	{
+	  need_loop = false;
 
-	/* If a single scalar load / store pair can do the job, leave it
-	   to the scalar code to do that.  */
-	/* ??? If fast unaligned access is supported, the scalar code could
-	   use suitably sized scalars irrespective of alignment.  If that
-	   gets fixed, we have to adjust the test here.  */
+	  /* If a single scalar load / store pair can do the job, leave it
+	     to the scalar code to do that.  */
+	  /* ??? If fast unaligned access is supported, the scalar code could
+	     use suitably sized scalars irrespective of alignment.  If that
+	     gets fixed, we have to adjust the test here.  */
 
-	if (pow2p_hwi (length) && length <= potential_ew)
-	  return false;
-      }
+	  if (pow2p_hwi (length) && length <= potential_ew)
+	    return false;
+	}
 
       /* Find the vector mode to use.  Using the largest possible element
 	 size is likely to give smaller constants, and thus potentially
@@ -1120,14 +1122,17 @@ expand_block_move (rtx dst_in, rtx src_in, rtx length_in)
 	{
 	  scalar_int_mode elem_mode;
 	  unsigned HOST_WIDE_INT bits = potential_ew * BITS_PER_UNIT;
-	  unsigned HOST_WIDE_INT per_iter;
-	  HOST_WIDE_INT nunits;
+	  poly_uint64 per_iter;
+	  poly_int64 nunits;
 
 	  if (need_loop)
-	    per_iter = TARGET_MIN_VLEN;
+	    per_iter = BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL;
 	  else
 	    per_iter = length;
-	  nunits = per_iter / potential_ew;
+	  /* BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL may not be divisible by
+	     this potential_ew.  */
+	  if (!multiple_p (per_iter, potential_ew, &nunits))
+	    continue;
 
 	  /* Unless we get an implementation that's slow for small element
 	     size / non-word-aligned accesses, we assume that the hardware
@@ -1138,6 +1143,8 @@ expand_block_move (rtx dst_in, rtx src_in, rtx length_in)
 	  if (length % potential_ew != 0
 	      || !int_mode_for_size (bits, 0).exists (&elem_mode))
 	    continue;
+
+	  poly_uint64 mode_units;
 	  /* Find the mode to use for the copy inside the loop - or the
 	     sole copy, if there is no loop.  */
 	  if (!need_loop)
@@ -1153,26 +1160,25 @@ expand_block_move (rtx dst_in, rtx src_in, rtx length_in)
 		 pointless.
 		 Still, by choosing a lower LMUL factor that still allows
 		 an entire transfer, we can reduce register pressure.  */
-	      for (unsigned lmul = 1; lmul <= 4; lmul <<= 1)
-		if (TARGET_MIN_VLEN * lmul <= nunits * BITS_PER_UNIT
-		    /* Avoid loosing the option of using vsetivli .  */
-		    && (nunits <= 31 * lmul || nunits > 31 * 8)
-		    && multiple_p (BYTES_PER_RISCV_VECTOR * lmul, potential_ew)
+	      for (unsigned lmul = 1; lmul < TARGET_MAX_LMUL; lmul <<= 1)
+		if (known_le (length * BITS_PER_UNIT, TARGET_MIN_VLEN * lmul)
+		    && multiple_p (BYTES_PER_RISCV_VECTOR * lmul, potential_ew,
+				   &mode_units)
 		    && (riscv_vector::get_vector_mode
-			 (elem_mode, exact_div (BYTES_PER_RISCV_VECTOR * lmul,
-				     potential_ew)).exists (&vmode)))
+			 (elem_mode, mode_units).exists (&vmode)))
 		  break;
 	    }
 
-	  /* The RVVM8?I modes are notionally 8 * BYTES_PER_RISCV_VECTOR bytes
-	     wide.  BYTES_PER_RISCV_VECTOR can't be evenly divided by
-	     the sizes of larger element types; the LMUL factor of 8 can at
-	     the moment be divided by the SEW, with SEW of up to 8 bytes,
-	     but there are reserved encodings so there might be larger
-	     SEW in the future.  */
-	  if (riscv_vector::get_vector_mode
-	      (elem_mode, exact_div (BYTES_PER_RISCV_VECTOR * 8,
-				     potential_ew)).exists (&vmode))
+	  /* Stop searching if a suitable vmode has been found.  */
+	  if (vmode != VOIDmode)
+	    break;
+
+	  /* BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL will at least be divisible
+	     by potential_ew 1, so this should succeed eventually.  */
+	  if (multiple_p (BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL,
+			  potential_ew, &mode_units)
+	      && riscv_vector::get_vector_mode (elem_mode,
+						mode_units).exists (&vmode))
 	    break;
 
 	  /* We may get here if we tried an element size that's larger than
@@ -1181,45 +1187,90 @@ expand_block_move (rtx dst_in, rtx src_in, rtx length_in)
 	  gcc_assert (potential_ew > 1);
 	}
       if (potential_ew > 1)
-	length_rtx = GEN_INT (length / potential_ew);
+	avl = GEN_INT (length / potential_ew);
     }
   else
     {
-      vmode = E_RVVM8QImode;
+      gcc_assert (get_lmul_mode (QImode, TARGET_MAX_LMUL).exists (&vmode));
     }
 
   /* A memcpy libcall in the worst case takes 3 instructions to prepare the
      arguments + 1 for the call.  When RVV should take 7 instructions and
      we're optimizing for size a libcall may be preferable.  */
-  if (size_p && need_loop)
+  if (optimize_function_for_size_p (cfun) && need_loop)
+    return false;
+
+  info.need_loop = need_loop;
+  info.vmode = vmode;
+  info.avl = avl;
+  return true;
+}
+
+/* Used by cpymemsi in riscv.md .  */
+
+bool
+expand_block_move (rtx dst_in, rtx src_in, rtx length_in, bool movmem_p)
+{
+  /*
+    memcpy:
+	mv a3, a0                       # Copy destination
+    loop:
+	vsetvli t0, a2, e8, m8, ta, ma  # Vectors of 8b
+	vle8.v v0, (a1)                 # Load bytes
+	add a1, a1, t0                  # Bump pointer
+	sub a2, a2, t0                  # Decrement count
+	vse8.v v0, (a3)                 # Store bytes
+	add a3, a3, t0                  # Bump pointer
+	bnez a2, loop                   # Any more?
+	ret                             # Return
+  */
+  struct stringop_info info;
+
+  HOST_WIDE_INT potential_ew
+    = (MIN (MIN (MEM_ALIGN (src_in), MEM_ALIGN (dst_in)), BITS_PER_WORD)
+       / BITS_PER_UNIT);
+
+  if (!use_vector_stringop_p (info, potential_ew, length_in))
+    return false;
+
+  /* Inlining general memmove is a pessimisation: we can't avoid having to
+     decide which direction to go at runtime, which is costly in instruction
+     count however for situations where the entire move fits in one vector
+     operation we can do all reads before doing any writes so we don't have to
+     worry so generate the inline vector code in such situations.  */
+  if (info.need_loop && movmem_p)
     return false;
 
-  /* length_rtx holds the (remaining) length of the required copy.
+  rtx src, dst;
+  rtx vec;
+
+  /* avl holds the (remaining) length of the required copy.
      cnt holds the length we copy with the current load/store pair.  */
-  rtx cnt = length_rtx;
+  rtx cnt = info.avl;
   rtx label = NULL_RTX;
   rtx dst_addr = copy_addr_to_reg (XEXP (dst_in, 0));
   rtx src_addr = copy_addr_to_reg (XEXP (src_in, 0));
 
-  if (need_loop)
+  if (info.need_loop)
     {
-      length_rtx = copy_to_mode_reg (Pmode, length_rtx);
+      info.avl = copy_to_mode_reg (Pmode, info.avl);
       cnt = gen_reg_rtx (Pmode);
       label = gen_label_rtx ();
 
       emit_label (label);
-      emit_insn (riscv_vector::gen_no_side_effects_vsetvl_rtx (vmode, cnt,
-							       length_rtx));
+      emit_insn (riscv_vector::gen_no_side_effects_vsetvl_rtx (info.vmode, cnt,
+							       info.avl));
     }
 
-  vec = gen_reg_rtx (vmode);
-  src = change_address (src_in, vmode, src_addr);
-  dst = change_address (dst_in, vmode, dst_addr);
+  vec = gen_reg_rtx (info.vmode);
+  src = change_address (src_in, info.vmode, src_addr);
+  dst = change_address (dst_in, info.vmode, dst_addr);
 
   /* If we don't need a loop and have a suitable mode to describe the size,
      just do a load / store pair and leave it up to the later lazy code
      motion pass to insert the appropriate vsetvli.  */
-  if (!need_loop && known_eq (GET_MODE_SIZE (vmode), INTVAL (length_in)))
+  if (!info.need_loop
+      && known_eq (GET_MODE_SIZE (info.vmode), INTVAL (length_in)))
     {
       emit_move_insn (vec, src);
       emit_move_insn (dst, vec);
@@ -1227,26 +1278,26 @@ expand_block_move (rtx dst_in, rtx src_in, rtx length_in)
   else
     {
       machine_mode mask_mode = riscv_vector::get_vector_mode
-	(BImode, GET_MODE_NUNITS (vmode)).require ();
+	(BImode, GET_MODE_NUNITS (info.vmode)).require ();
       rtx mask =  CONSTM1_RTX (mask_mode);
-      if (!satisfies_constraint_K (cnt))
+      if (!satisfies_constraint_vl (cnt))
 	cnt= force_reg (Pmode, cnt);
       rtx m_ops[] = {vec, mask, src};
-      emit_nonvlmax_insn (code_for_pred_mov (vmode),
+      emit_nonvlmax_insn (code_for_pred_mov (info.vmode),
 			  riscv_vector::UNARY_OP_TAMA, m_ops, cnt);
-      emit_insn (gen_pred_store (vmode, dst, mask, vec, cnt,
+      emit_insn (gen_pred_store (info.vmode, dst, mask, vec, cnt,
 				 get_avl_type_rtx (riscv_vector::NONVLMAX)));
     }
 
-  if (need_loop)
+  if (info.need_loop)
     {
       emit_insn (gen_rtx_SET (src_addr, gen_rtx_PLUS (Pmode, src_addr, cnt)));
       emit_insn (gen_rtx_SET (dst_addr, gen_rtx_PLUS (Pmode, dst_addr, cnt)));
-      emit_insn (gen_rtx_SET (length_rtx, gen_rtx_MINUS (Pmode, length_rtx, cnt)));
+      emit_insn (gen_rtx_SET (info.avl, gen_rtx_MINUS (Pmode, info.avl, cnt)));
 
       /* Emit the loop condition.  */
-      rtx test = gen_rtx_NE (VOIDmode, end, const0_rtx);
-      emit_jump_insn (gen_cbranch4 (Pmode, test, length_rtx, const0_rtx, label));
+      rtx test = gen_rtx_NE (VOIDmode, info.avl, const0_rtx);
+      emit_jump_insn (gen_cbranch4 (Pmode, test, info.avl, const0_rtx, label));
       emit_insn (gen_nop ());
     }
 
@@ -1557,41 +1608,39 @@ check_vectorise_memory_operation (rtx length_in, HOST_WIDE_INT &lmul_out)
 bool
 expand_vec_setmem (rtx dst_in, rtx length_in, rtx fill_value_in)
 {
-  HOST_WIDE_INT lmul;
+  stringop_info info;
+
   /* Check we are able and allowed to vectorise this operation;
      bail if not.  */
-  if (!check_vectorise_memory_operation (length_in, lmul))
+  if (!use_vector_stringop_p (info, 1, length_in) || info.need_loop)
     return false;
 
-  machine_mode vmode
-      = riscv_vector::get_vector_mode (QImode, BYTES_PER_RISCV_VECTOR * lmul)
-	    .require ();
   rtx dst_addr = copy_addr_to_reg (XEXP (dst_in, 0));
-  rtx dst = change_address (dst_in, vmode, dst_addr);
+  rtx dst = change_address (dst_in, info.vmode, dst_addr);
 
-  rtx fill_value = gen_reg_rtx (vmode);
+  rtx fill_value = gen_reg_rtx (info.vmode);
   rtx broadcast_ops[] = { fill_value, fill_value_in };
 
   /* If the length is exactly vlmax for the selected mode, do that.
      Otherwise, use a predicated store.  */
-  if (known_eq (GET_MODE_SIZE (vmode), INTVAL (length_in)))
+  if (known_eq (GET_MODE_SIZE (info.vmode), INTVAL (info.avl)))
     {
-      emit_vlmax_insn (code_for_pred_broadcast (vmode), UNARY_OP,
-			  broadcast_ops);
+      emit_vlmax_insn (code_for_pred_broadcast (info.vmode), UNARY_OP,
+		       broadcast_ops);
       emit_move_insn (dst, fill_value);
     }
   else
     {
-      if (!satisfies_constraint_K (length_in))
-	      length_in = force_reg (Pmode, length_in);
-      emit_nonvlmax_insn (code_for_pred_broadcast (vmode), UNARY_OP,
-			  broadcast_ops, length_in);
+      if (!satisfies_constraint_vl (info.avl))
+	info.avl = force_reg (Pmode, info.avl);
+      emit_nonvlmax_insn (code_for_pred_broadcast (info.vmode),
+			  riscv_vector::UNARY_OP, broadcast_ops, info.avl);
       machine_mode mask_mode
-	      = riscv_vector::get_vector_mode (BImode, GET_MODE_NUNITS (vmode))
-		      .require ();
+	= riscv_vector::get_vector_mode (BImode, GET_MODE_NUNITS (info.vmode))
+	  .require ();
       rtx mask = CONSTM1_RTX (mask_mode);
-      emit_insn (gen_pred_store (vmode, dst, mask, fill_value, length_in,
-			  get_avl_type_rtx (riscv_vector::NONVLMAX)));
+      emit_insn (gen_pred_store (info.vmode, dst, mask, fill_value, info.avl,
+				 get_avl_type_rtx (riscv_vector::NONVLMAX)));
     }
 
   return true;
@@ -1650,7 +1699,7 @@ expand_vec_cmpmem (rtx result_out, rtx blk_a_in, rtx blk_b_in, rtx length_in)
     }
   else
     {
-      if (!satisfies_constraint_K (length_in))
+      if (!satisfies_constraint_vl (length_in))
 	      length_in = force_reg (Pmode, length_in);
 
       rtx memmask = CONSTM1_RTX (mask_mode);
diff --git a/gcc/config/riscv/riscv-subset.h b/gcc/config/riscv/riscv-subset.h
index dace4de..a35537d 100644
--- a/gcc/config/riscv/riscv-subset.h
+++ b/gcc/config/riscv/riscv-subset.h
@@ -1,5 +1,5 @@
 /* Definition of data structure of RISC-V subset for GNU compiler.
-   Copyright (C) 2011-2024 Free Software Foundation, Inc.
+   Copyright (C) 2011-2025 Free Software Foundation, Inc.
    Contributed by Andrew Waterman (andrew@sifive.com).
    Based on MIPS target for GNU compiler.
 
@@ -22,6 +22,8 @@ along with GCC; see the file COPYING3.  If not see
 #ifndef GCC_RISCV_SUBSET_H
 #define GCC_RISCV_SUBSET_H
 
+#include "riscv-feature-bits.h"
+
 #define RISCV_DONT_CARE_VERSION -1
 
 /* Subset info.  */
@@ -80,6 +82,8 @@ private:
   const char *parse_single_multiletter_ext (const char *, const char *,
 					    const char *, bool);
 
+  std::string parse_profiles (const char*);
+
   void handle_implied_ext (const char *);
   bool check_implied_ext ();
   void handle_combine_ext ();
@@ -105,9 +109,6 @@ public:
   static riscv_subset_list *parse (const char *, location_t);
   const char *parse_single_ext (const char *, bool exact_single_p = true);
 
-  const riscv_subset_t *begin () const {return m_head;};
-  const riscv_subset_t *end () const {return NULL;};
-
   int match_score (riscv_subset_list *) const;
 
   void set_loc (location_t);
@@ -115,10 +116,74 @@ public:
   void set_allow_adding_dup (bool v) { m_allow_adding_dup = v; }
 
   void finalize ();
+
+  class iterator
+  {
+  public:
+    explicit iterator(riscv_subset_t *node) : m_node(node) {}
+
+    riscv_subset_t &operator*() const { return *m_node; }
+    riscv_subset_t *operator->() const { return m_node; }
+
+    iterator &operator++()
+    {
+      if (m_node)
+	m_node = m_node->next;
+      return *this;
+    }
+
+    bool operator!=(const iterator &other) const
+    {
+      return m_node != other.m_node;
+    }
+
+    bool operator==(const iterator &other) const
+    {
+      return m_node == other.m_node;
+    }
+
+  private:
+    riscv_subset_t *m_node;
+  };
+
+  iterator begin() { return iterator(m_head); }
+  iterator end()   { return iterator(nullptr); }
+
+  class const_iterator
+  {
+  public:
+    explicit const_iterator(const riscv_subset_t *node) : m_node(node) {}
+
+    const riscv_subset_t &operator*() const { return *m_node; }
+    const riscv_subset_t *operator->() const { return m_node; }
+
+    const_iterator &operator++()
+    {
+      if (m_node)
+	m_node = m_node->next;
+      return *this;
+    }
+
+    bool operator!=(const const_iterator &other) const
+    {
+      return m_node != other.m_node;
+    }
+
+  private:
+    const riscv_subset_t *m_node;
+  };
+
+  const_iterator begin() const { return const_iterator(m_head); }
+  const_iterator end() const   { return const_iterator(nullptr); }
 };
 
 extern const riscv_subset_list *riscv_cmdline_subset_list (void);
 extern void
 riscv_set_arch_by_subset_list (riscv_subset_list *, struct gcc_options *);
+extern bool riscv_minimal_hwprobe_feature_bits (const char *,
+						struct riscv_feature_bits *,
+						location_t);
+extern bool
+riscv_ext_is_subset (struct cl_target_option *, struct cl_target_option *);
 
 #endif /* ! GCC_RISCV_SUBSET_H */
diff --git a/gcc/config/riscv/riscv-target-attr.cc b/gcc/config/riscv/riscv-target-attr.cc
index bf14ade..8ad3025 100644
--- a/gcc/config/riscv/riscv-target-attr.cc
+++ b/gcc/config/riscv/riscv-target-attr.cc
@@ -1,5 +1,5 @@
 /* Subroutines used for parsing target attribute for RISC-V.
-   Copyright (C) 2023-2024 Free Software Foundation, Inc.
+   Copyright (C) 2023-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -19,7 +19,6 @@ along with GCC; see the file COPYING3.  If not see
 
 #define IN_TARGET_CODE 1
 
-#define INCLUDE_MEMORY
 #define INCLUDE_STRING
 #include "config.h"
 #include "system.h"
@@ -39,16 +38,19 @@ public:
     : m_found_arch_p (false)
     , m_found_tune_p (false)
     , m_found_cpu_p (false)
+    , m_found_priority_p (false)
     , m_subset_list (nullptr)
     , m_loc (loc)
     , m_cpu_info (nullptr)
     , m_tune (nullptr)
+    , m_priority (0)
   {
   }
 
   bool handle_arch (const char *);
   bool handle_cpu (const char *);
   bool handle_tune (const char *);
+  bool handle_priority (const char *);
 
   void update_settings (struct gcc_options *opts) const;
 private:
@@ -58,10 +60,12 @@ private:
   bool m_found_arch_p;
   bool m_found_tune_p;
   bool m_found_cpu_p;
+  bool m_found_priority_p;
   riscv_subset_list *m_subset_list;
   location_t m_loc;
   const  riscv_cpu_info *m_cpu_info;
   const char *m_tune;
+  int m_priority;
 };
 }
 
@@ -77,10 +81,16 @@ struct riscv_attribute_info
 
 /* The target attributes that we support.  */
 
-static const struct riscv_attribute_info riscv_attributes[]
+static const struct riscv_attribute_info riscv_target_attrs[]
   = {{"arch", &riscv_target_attr_parser::handle_arch},
      {"cpu", &riscv_target_attr_parser::handle_cpu},
-     {"tune", &riscv_target_attr_parser::handle_tune}};
+     {"tune", &riscv_target_attr_parser::handle_tune},
+     {NULL, NULL}};
+
+static const struct riscv_attribute_info riscv_target_version_attrs[]
+  = {{"arch", &riscv_target_attr_parser::handle_arch},
+     {"priority", &riscv_target_attr_parser::handle_priority},
+     {NULL, NULL}};
 
 bool
 riscv_target_attr_parser::parse_arch (const char *str)
@@ -90,6 +100,20 @@ riscv_target_attr_parser::parse_arch (const char *str)
   /* Check if it's setting full arch string.  */
   if (strncmp ("rv", str, strlen ("rv")) == 0)
     {
+      if (TARGET_64BIT && strncmp ("32", str + 2, strlen ("32")) == 0)
+	{
+	  error_at (m_loc, "unexpected arch for %<target()%> attribute: "
+		    "must start with rv64 but found %qs", str);
+	  goto fail;
+	}
+
+      if (!TARGET_64BIT && strncmp ("64", str + 2, strlen ("64")) == 0)
+	{
+	  error_at (m_loc, "unexpected arch for %<target()%> attribute: "
+		    "must start with rv32 but found %qs", str);
+	  goto fail;
+	}
+
       m_subset_list = riscv_subset_list::parse (str, m_loc);
 
       if (m_subset_list == nullptr)
@@ -130,7 +154,7 @@ riscv_target_attr_parser::parse_arch (const char *str)
 	      error_at (
 		m_loc,
 		"unexpected arch for %<target()%> attribute: bad "
-		"string found %<%s%>", token);
+		"string found %qs", token);
 	      goto fail;
 	    }
 
@@ -210,6 +234,22 @@ riscv_target_attr_parser::handle_tune (const char *str)
   return true;
 }
 
+bool
+riscv_target_attr_parser::handle_priority (const char *str)
+{
+  if (m_found_priority_p)
+    error_at (m_loc, "%<target()%> attribute: priority appears more than once");
+  m_found_priority_p = true;
+
+  if (sscanf (str, "%d", &m_priority) != 1)
+    {
+      error_at (m_loc, "%<target()%> attribute: invalid priority %qs", str);
+      return false;
+    }
+
+  return true;
+}
+
 void
 riscv_target_attr_parser::update_settings (struct gcc_options *opts) const
 {
@@ -217,11 +257,7 @@ riscv_target_attr_parser::update_settings (struct gcc_options *opts) const
     {
       std::string local_arch = m_subset_list->to_string (true);
       const char* local_arch_str = local_arch.c_str ();
-      struct cl_target_option *default_opts
-	= TREE_TARGET_OPTION (target_option_default_node);
-      if (opts->x_riscv_arch_string != default_opts->x_riscv_arch_string)
-	free (CONST_CAST (void *, (const void *) opts->x_riscv_arch_string));
-      opts->x_riscv_arch_string = xstrdup (local_arch_str);
+      opts->x_riscv_arch_string = ggc_strdup (local_arch_str);
 
       riscv_set_arch_by_subset_list (m_subset_list, opts);
     }
@@ -236,6 +272,9 @@ riscv_target_attr_parser::update_settings (struct gcc_options *opts) const
       if (m_cpu_info)
 	opts->x_riscv_tune_string = m_cpu_info->tune;
     }
+
+  if (m_priority)
+    opts->x_riscv_fmv_priority = m_priority;
 }
 
 /* Parse ARG_STR which contains the definition of one target attribute.
@@ -244,7 +283,8 @@ riscv_target_attr_parser::update_settings (struct gcc_options *opts) const
 static bool
 riscv_process_one_target_attr (char *arg_str,
 			       location_t loc,
-			       riscv_target_attr_parser &attr_parser)
+			       riscv_target_attr_parser &attr_parser,
+			       const struct riscv_attribute_info *attrs)
 {
   size_t len = strlen (arg_str);
 
@@ -271,15 +311,17 @@ riscv_process_one_target_attr (char *arg_str,
 
   arg[0] = '\0';
   ++arg;
-  for (const auto &attr : riscv_attributes)
+  for (const auto *attr = attrs;
+       attr->name;
+       ++attr)
     {
       /* If the names don't match up, or the user has given an argument
 	 to an attribute that doesn't accept one, or didn't give an argument
 	 to an attribute that expects one, fail to match.  */
-      if (strncmp (str_to_check, attr.name, strlen (attr.name)) != 0)
+      if (strncmp (str_to_check, attr->name, strlen (attr->name)) != 0)
 	continue;
 
-      return (&attr_parser->*attr.handler) (arg);
+      return (&attr_parser->*attr->handler) (arg);
     }
 
   error_at (loc, "Got unknown attribute %<target(\"%s\")%>", str_to_check);
@@ -304,35 +346,15 @@ num_occurrences_in_str (char c, char *str)
   return res;
 }
 
-/* Parse the tree in ARGS that contains the target attribute information
+/* Parse the string in ARGS that contains the target attribute information
    and update the global target options space.  */
 
-static bool
-riscv_process_target_attr (tree args, location_t loc)
+bool
+riscv_process_target_attr (const char *args,
+			   location_t loc,
+			   const struct riscv_attribute_info *attrs)
 {
-  if (TREE_CODE (args) == TREE_LIST)
-    {
-      do
-	{
-	  tree head = TREE_VALUE (args);
-	  if (head)
-	    {
-	      if (!riscv_process_target_attr (head, loc))
-		return false;
-	    }
-	  args = TREE_CHAIN (args);
-      } while (args);
-
-      return true;
-    }
-
-  if (TREE_CODE (args) != STRING_CST)
-    {
-      error_at (loc, "attribute %<target%> argument not a string");
-      return false;
-    }
-
-  size_t len = strlen (TREE_STRING_POINTER (args));
+  size_t len = strlen (args);
 
   /* No need to emit warning or error on empty string here, generic code already
      handle this case.  */
@@ -343,7 +365,7 @@ riscv_process_target_attr (tree args, location_t loc)
 
   std::unique_ptr<char[]> buf (new char[len+1]);
   char *str_to_check = buf.get ();
-  strcpy (str_to_check, TREE_STRING_POINTER (args));
+  strcpy (str_to_check, args);
 
   /* Used to catch empty spaces between semi-colons i.e.
      attribute ((target ("attr1;;attr2"))).  */
@@ -357,7 +379,7 @@ riscv_process_target_attr (tree args, location_t loc)
   while (token)
     {
       num_attrs++;
-      if (!riscv_process_one_target_attr (token, loc, attr_parser))
+      if (!riscv_process_one_target_attr (token, loc, attr_parser, attrs))
 	return false;
 
       token = strtok_r (NULL, ";", &str_to_check);
@@ -366,7 +388,7 @@ riscv_process_target_attr (tree args, location_t loc)
   if (num_attrs != num_semicolons + 1)
     {
       error_at (loc, "malformed %<target(\"%s\")%> attribute",
-		TREE_STRING_POINTER (args));
+		args);
       return false;
     }
 
@@ -376,6 +398,39 @@ riscv_process_target_attr (tree args, location_t loc)
   return true;
 }
 
+/* Parse the tree in ARGS that contains the target attribute information
+   and update the global target options space.  */
+
+static bool
+riscv_process_target_attr (tree args,
+			   location_t loc,
+			   const struct riscv_attribute_info *attrs)
+{
+  if (TREE_CODE (args) == TREE_LIST)
+    {
+      do
+	{
+	  tree head = TREE_VALUE (args);
+	  if (head)
+	    {
+	      if (!riscv_process_target_attr (head, loc, attrs))
+		return false;
+	    }
+	  args = TREE_CHAIN (args);
+      } while (args);
+
+      return true;
+    }
+
+  if (TREE_CODE (args) != STRING_CST)
+    {
+      error_at (loc, "attribute %<target%> argument not a string");
+      return false;
+    }
+
+  return riscv_process_target_attr (TREE_STRING_POINTER (args), loc, attrs);
+}
+
 /* Implement TARGET_OPTION_VALID_ATTRIBUTE_P.
    This is used to process attribute ((target ("..."))).
    Note, that riscv_set_current_function() has not been called before,
@@ -411,7 +466,7 @@ riscv_option_valid_attribute_p (tree fndecl, tree, tree args, int)
 			      TREE_TARGET_OPTION (target_option_default_node));
 
   /* Now we can parse the attributes and set &global_options accordingly.  */
-  ret = riscv_process_target_attr (args, loc);
+  ret = riscv_process_target_attr (args, loc, riscv_target_attrs);
   if (ret)
     {
       riscv_override_options_internal (&global_options);
@@ -424,3 +479,84 @@ riscv_option_valid_attribute_p (tree fndecl, tree, tree args, int)
   cl_target_option_restore (&global_options, &global_options_set, &cur_target);
   return ret;
 }
+
+/* Parse the tree in ARGS that contains the target_version attribute
+   information and update the global target options space.  */
+
+bool
+riscv_process_target_version_attr (tree args, location_t loc)
+{
+  if (TREE_CODE (args) == TREE_LIST)
+    {
+      if (TREE_CHAIN (args))
+	{
+	  error ("attribute %<target_version%> has multiple values");
+	  return false;
+	}
+      args = TREE_VALUE (args);
+    }
+
+  if (!args || TREE_CODE (args) != STRING_CST)
+    {
+      error ("attribute %<target_version%> argument not a string");
+      return false;
+    }
+
+  const char *str = TREE_STRING_POINTER (args);
+  if (strcmp (str, "default") == 0)
+    return true;
+
+  return riscv_process_target_attr (str, loc, riscv_target_version_attrs);
+}
+
+
+/* Implement TARGET_OPTION_VALID_VERSION_ATTRIBUTE_P.  This is used to
+   process attribute ((target_version ("..."))).  */
+
+bool
+riscv_option_valid_version_attribute_p (tree fndecl, tree, tree args, int)
+{
+  struct cl_target_option cur_target;
+  bool ret;
+  tree new_target;
+  tree existing_target = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
+  location_t loc = DECL_SOURCE_LOCATION (fndecl);
+
+  /* Save the current target options to restore at the end.  */
+  cl_target_option_save (&cur_target, &global_options, &global_options_set);
+
+  /* If fndecl already has some target attributes applied to it, unpack
+     them so that we add this attribute on top of them, rather than
+     overwriting them.  */
+  if (existing_target)
+    {
+      struct cl_target_option *existing_options
+	= TREE_TARGET_OPTION (existing_target);
+
+      if (existing_options)
+	cl_target_option_restore (&global_options, &global_options_set,
+				  existing_options);
+    }
+  else
+    cl_target_option_restore (&global_options, &global_options_set,
+			      TREE_TARGET_OPTION (target_option_current_node));
+
+  ret = riscv_process_target_version_attr (args, loc);
+
+  /* Set up any additional state.  */
+  if (ret)
+    {
+      riscv_override_options_internal (&global_options);
+      new_target = build_target_option_node (&global_options,
+					     &global_options_set);
+    }
+  else
+    new_target = NULL;
+
+  if (fndecl && ret)
+    DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
+
+  cl_target_option_restore (&global_options, &global_options_set, &cur_target);
+
+  return ret;
+}
diff --git a/gcc/config/riscv/riscv-v.cc b/gcc/config/riscv/riscv-v.cc
index c896036..242ac08 100644
--- a/gcc/config/riscv/riscv-v.cc
+++ b/gcc/config/riscv/riscv-v.cc
@@ -1,6 +1,6 @@
 /* Subroutines used for code generation for RISC-V 'V' Extension for
    GNU compiler.
-   Copyright (C) 2022-2024 Free Software Foundation, Inc.
+   Copyright (C) 2022-2025 Free Software Foundation, Inc.
    Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd.
 
    This file is part of GCC.
@@ -51,6 +51,7 @@
 #include "targhooks.h"
 #include "predict.h"
 #include "errors.h"
+#include "riscv-v.h"
 
 using namespace riscv_vector;
 
@@ -407,7 +408,7 @@ emit_vlmax_insn_lra (unsigned icode, unsigned insn_flags, rtx *ops, rtx vl)
   gcc_assert (!can_create_pseudo_p ());
   machine_mode mode = GET_MODE (ops[0]);
 
-  if (imm_avl_p (mode))
+  if (imm_avl_p (mode) && !TARGET_XTHEADVECTOR)
     {
       /* Even though VL is a real hardreg already allocated since
 	 it is post-RA now, we still gain benefits that we emit
@@ -436,57 +437,25 @@ emit_nonvlmax_insn (unsigned icode, unsigned insn_flags, rtx *ops, rtx vl)
   e.emit_insn ((enum insn_code) icode, ops);
 }
 
-class rvv_builder : public rtx_vector_builder
-{
-public:
-  rvv_builder () : rtx_vector_builder () {}
-  rvv_builder (machine_mode mode, unsigned int npatterns,
-	       unsigned int nelts_per_pattern)
-    : rtx_vector_builder (mode, npatterns, nelts_per_pattern)
-  {
-    m_inner_mode = GET_MODE_INNER (mode);
-    m_inner_bits_size = GET_MODE_BITSIZE (m_inner_mode);
-    m_inner_bytes_size = GET_MODE_SIZE (m_inner_mode);
-    m_mask_mode = get_mask_mode (mode);
-
-    gcc_assert (
-      int_mode_for_size (inner_bits_size (), 0).exists (&m_inner_int_mode));
-    m_int_mode
-      = get_vector_mode (m_inner_int_mode, GET_MODE_NUNITS (mode)).require ();
-  }
+/* Emit either a VLMAX insn or a non-VLMAX insn depending on TYPE.  For a
+   non-VLMAX insn, the length must be specified in VL.  */
 
-  bool can_duplicate_repeating_sequence_p ();
-  bool is_repeating_sequence ();
-  rtx get_merged_repeating_sequence ();
-
-  bool repeating_sequence_use_merge_profitable_p ();
-  bool combine_sequence_use_slideup_profitable_p ();
-  bool combine_sequence_use_merge_profitable_p ();
-  rtx get_merge_scalar_mask (unsigned int, machine_mode) const;
-
-  bool single_step_npatterns_p () const;
-  bool npatterns_all_equal_p () const;
-  bool interleaved_stepped_npatterns_p () const;
-  bool npatterns_vid_diff_repeated_p () const;
-
-  machine_mode new_mode () const { return m_new_mode; }
-  scalar_mode inner_mode () const { return m_inner_mode; }
-  scalar_int_mode inner_int_mode () const { return m_inner_int_mode; }
-  machine_mode mask_mode () const { return m_mask_mode; }
-  machine_mode int_mode () const { return m_int_mode; }
-  unsigned int inner_bits_size () const { return m_inner_bits_size; }
-  unsigned int inner_bytes_size () const { return m_inner_bytes_size; }
-
-private:
-  scalar_mode m_inner_mode;
-  scalar_int_mode m_inner_int_mode;
-  machine_mode m_new_mode;
-  scalar_int_mode m_new_inner_mode;
-  machine_mode m_mask_mode;
-  machine_mode m_int_mode;
-  unsigned int m_inner_bits_size;
-  unsigned int m_inner_bytes_size;
-};
+void
+emit_avltype_insn (unsigned icode, unsigned insn_flags, rtx *ops,
+		   avl_type type, rtx vl)
+{
+  if (type != avl_type::VLMAX && vl != NULL_RTX)
+    {
+      insn_expander<RVV_INSN_OPERANDS_MAX> e (insn_flags, false);
+      e.set_vl (vl);
+      e.emit_insn ((enum insn_code) icode, ops);
+    }
+  else
+    {
+      insn_expander<RVV_INSN_OPERANDS_MAX> e (insn_flags, true);
+      e.emit_insn ((enum insn_code) icode, ops);
+    }
+}
 
 /* Return true if the vector duplicated by a super element which is the fusion
    of consecutive elements.
@@ -845,6 +814,15 @@ const_vec_all_in_range_p (rtx vec, poly_int64 minval, poly_int64 maxval)
   return true;
 }
 
+/* Returns true if the vector's elements are all duplicates in
+   range -16 ~ 15 integer or 0.0 floating-point.  */
+
+bool
+valid_vec_immediate_p (rtx x)
+{
+  return (satisfies_constraint_vi (x) || satisfies_constraint_Wc0 (x));
+}
+
 /* Return a const vector of VAL. The VAL can be either const_int or
    const_poly_int.  */
 
@@ -1146,360 +1124,576 @@ expand_vec_series (rtx dest, rtx base, rtx step, rtx vid)
     emit_move_insn (dest, result);
 }
 
+/* Subroutine of riscv_vector_expand_vector_init.
+   Works as follows:
+   (a) Initialize TARGET by broadcasting element NELTS_REQD - 1 of BUILDER.
+   (b) Skip leading elements from BUILDER, which are the same as
+       element NELTS_REQD - 1.
+   (c) Insert earlier elements in reverse order in TARGET using vslide1down.  */
+
 static void
-expand_const_vector (rtx target, rtx src)
+expand_vector_init_insert_elems (rtx target, const rvv_builder &builder,
+				 int nelts_reqd)
+{
+  machine_mode mode = GET_MODE (target);
+  rtx dup = expand_vector_broadcast (mode, builder.elt (0));
+  emit_move_insn (target, dup);
+  int ndups = builder.count_dups (0, nelts_reqd - 1, 1);
+  for (int i = ndups; i < nelts_reqd; i++)
+    {
+      unsigned int unspec
+	= FLOAT_MODE_P (mode) ? UNSPEC_VFSLIDE1DOWN : UNSPEC_VSLIDE1DOWN;
+      insn_code icode = code_for_pred_slide (unspec, mode);
+      rtx ops[] = {target, target, builder.elt (i)};
+      emit_vlmax_insn (icode, BINARY_OP, ops);
+    }
+}
+
+/* Subroutine of expand_vec_init to handle case
+   when all trailing elements of builder are same.
+   This works as follows:
+   (a) Use expand_insn interface to broadcast last vector element in TARGET.
+   (b) Insert remaining elements in TARGET using insr.
+
+   ??? The heuristic used is to do above if number of same trailing elements
+   is greater than leading_ndups, loosely based on
+   heuristic from mostly_zeros_p.  May need fine-tuning.  */
+
+static bool
+expand_vector_init_trailing_same_elem (rtx target,
+				       const rtx_vector_builder &builder,
+				       int nelts_reqd)
+{
+  int leading_ndups = builder.count_dups (0, nelts_reqd - 1, 1);
+  int trailing_ndups = builder.count_dups (nelts_reqd - 1, -1, -1);
+  machine_mode mode = GET_MODE (target);
+
+  if (trailing_ndups > leading_ndups)
+    {
+      rtx dup = expand_vector_broadcast (mode, builder.elt (nelts_reqd - 1));
+      for (int i = nelts_reqd - trailing_ndups - 1; i >= 0; i--)
+	{
+	  unsigned int unspec
+	    = FLOAT_MODE_P (mode) ? UNSPEC_VFSLIDE1UP : UNSPEC_VSLIDE1UP;
+	  insn_code icode = code_for_pred_slide (unspec, mode);
+	  rtx tmp = gen_reg_rtx (mode);
+	  rtx ops[] = {tmp, dup, builder.elt (i)};
+	  emit_vlmax_insn (icode, BINARY_OP, ops);
+	  /* slide1up need source and dest to be different REG.  */
+	  dup = tmp;
+	}
+
+      emit_move_insn (target, dup);
+      return true;
+    }
+
+  return false;
+}
+
+static void
+expand_const_vec_duplicate (rtx target, rtx src, rtx elt)
 {
   machine_mode mode = GET_MODE (target);
+  rtx result = register_operand (target, mode) ? target : gen_reg_rtx (mode);
+
   if (GET_MODE_CLASS (mode) == MODE_VECTOR_BOOL)
     {
-      rtx elt;
-      gcc_assert (
-	const_vec_duplicate_p (src, &elt)
-	&& (rtx_equal_p (elt, const0_rtx) || rtx_equal_p (elt, const1_rtx)));
-      rtx ops[] = {target, src};
+      gcc_assert (rtx_equal_p (elt, const0_rtx)
+		  || rtx_equal_p (elt, const1_rtx));
+
+      rtx ops[] = {result, src};
       emit_vlmax_insn (code_for_pred_mov (mode), UNARY_MASK_OP, ops);
-      return;
     }
-
-  rtx elt;
-  if (const_vec_duplicate_p (src, &elt))
+  else if (valid_vec_immediate_p (src))
     {
-      rtx tmp = register_operand (target, mode) ? target : gen_reg_rtx (mode);
       /* Element in range -16 ~ 15 integer or 0.0 floating-point,
 	 we use vmv.v.i instruction.  */
-      if (satisfies_constraint_vi (src) || satisfies_constraint_Wc0 (src))
+      rtx ops[] = {result, src};
+      emit_vlmax_insn (code_for_pred_mov (mode), UNARY_OP, ops);
+    }
+  else
+    {
+      /* Emit vec_duplicate<mode> split pattern before RA so that
+	 we could have a better optimization opportunity in LICM
+	 which will hoist vmv.v.x outside the loop and in fwprop && combine
+	 which will transform 'vv' into 'vx' instruction.
+
+	 The reason we don't emit vec_duplicate<mode> split pattern during
+	 RA since the split stage after RA is a too late stage to generate
+	 RVV instruction which need an additional register (We can't
+	 allocate a new register after RA) for VL operand of vsetvl
+	 instruction (vsetvl a5, zero).  */
+      if (lra_in_progress)
 	{
-	  rtx ops[] = {tmp, src};
-	  emit_vlmax_insn (code_for_pred_mov (mode), UNARY_OP, ops);
+	  rtx ops[] = {result, elt};
+	  emit_vlmax_insn (code_for_pred_broadcast (mode), UNARY_OP, ops);
 	}
       else
 	{
-	  /* Emit vec_duplicate<mode> split pattern before RA so that
-	     we could have a better optimization opportunity in LICM
-	     which will hoist vmv.v.x outside the loop and in fwprop && combine
-	     which will transform 'vv' into 'vx' instruction.
-
-	     The reason we don't emit vec_duplicate<mode> split pattern during
-	     RA since the split stage after RA is a too late stage to generate
-	     RVV instruction which need an additional register (We can't
-	     allocate a new register after RA) for VL operand of vsetvl
-	     instruction (vsetvl a5, zero).  */
-	  if (lra_in_progress)
-	    {
-	      rtx ops[] = {tmp, elt};
-	      emit_vlmax_insn (code_for_pred_broadcast (mode), UNARY_OP, ops);
-	    }
-	  else
-	    {
-	      struct expand_operand ops[2];
-	      enum insn_code icode = optab_handler (vec_duplicate_optab, mode);
-	      gcc_assert (icode != CODE_FOR_nothing);
-	      create_output_operand (&ops[0], tmp, mode);
-	      create_input_operand (&ops[1], elt, GET_MODE_INNER (mode));
-	      expand_insn (icode, 2, ops);
-	      tmp = ops[0].value;
-	    }
+	  struct expand_operand ops[2];
+	  enum insn_code icode = optab_handler (vec_duplicate_optab, mode);
+	  gcc_assert (icode != CODE_FOR_nothing);
+	  create_output_operand (&ops[0], result, mode);
+	  create_input_operand (&ops[1], elt, GET_MODE_INNER (mode));
+	  expand_insn (icode, 2, ops);
+	  result = ops[0].value;
 	}
-
-      if (tmp != target)
-	emit_move_insn (target, tmp);
-      return;
     }
 
-  /* Support scalable const series vector.  */
-  rtx base, step;
-  if (const_vec_series_p (src, &base, &step))
+  if (result != target)
+    emit_move_insn (target, result);
+}
+
+static void
+expand_const_vec_series (rtx target, rtx base, rtx step)
+{
+  machine_mode mode = GET_MODE (target);
+  rtx result = register_operand (target, mode) ? target : gen_reg_rtx (mode);
+
+  expand_vec_series (result, base, step);
+
+  if (result != target)
+    emit_move_insn (target, result);
+}
+
+
+/* We handle the case that we can find a vector container to hold
+   element bitsize = NPATTERNS * ele_bitsize.
+
+     NPATTERNS = 8, element width = 8
+       v = { 0, 1, 2, 3, 4, 5, 6, 7, ... }
+     In this case, we can combine NPATTERNS element into a larger
+     element.  Use element width = 64 and broadcast a vector with
+     all element equal to 0x0706050403020100.  */
+
+static void
+expand_const_vector_duplicate_repeating (rtx target, rvv_builder *builder)
+{
+  machine_mode mode = GET_MODE (target);
+  rtx result = register_operand (target, mode) ? target : gen_reg_rtx (mode);
+  rtx ele = builder->get_merged_repeating_sequence ();
+  rtx dup;
+
+  if (lra_in_progress)
     {
-      expand_vec_series (target, base, step);
-      return;
+      dup = gen_reg_rtx (builder->new_mode ());
+      rtx ops[] = {dup, ele};
+      emit_vlmax_insn (code_for_pred_broadcast (builder->new_mode ()),
+		       UNARY_OP, ops);
     }
+  else
+    dup = expand_vector_broadcast (builder->new_mode (), ele);
 
-  /* Handle variable-length vector.  */
-  unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (src);
-  unsigned int npatterns = CONST_VECTOR_NPATTERNS (src);
-  rvv_builder builder (mode, npatterns, nelts_per_pattern);
-  for (unsigned int i = 0; i < nelts_per_pattern; i++)
+  emit_move_insn (result, gen_lowpart (mode, dup));
+
+  if (result != target)
+    emit_move_insn (target, result);
+}
+
+/* We handle the case that we can't find a vector container to hold
+   element bitsize = NPATTERNS * ele_bitsize.
+
+     NPATTERNS = 8, element width = 16
+       v = { 0, 1, 2, 3, 4, 5, 6, 7, ... }
+     Since NPATTERNS * element width = 128, we can't find a container
+     to hold it.
+
+     In this case, we use NPATTERNS merge operations to generate such
+     vector.  */
+
+static void
+expand_const_vector_duplicate_default (rtx target, rvv_builder *builder)
+{
+  machine_mode mode = GET_MODE (target);
+  rtx result = register_operand (target, mode) ? target : gen_reg_rtx (mode);
+  unsigned int nbits = builder->npatterns () - 1;
+
+  /* Generate vid = { 0, 1, 2, 3, 4, 5, 6, 7, ... }.  */
+  rtx vid = gen_reg_rtx (builder->int_mode ());
+  rtx op[] = {vid};
+  emit_vlmax_insn (code_for_pred_series (builder->int_mode ()), NULLARY_OP, op);
+
+  /* Generate vid_repeat = { 0, 1, ... nbits, ... }  */
+  rtx vid_repeat = gen_reg_rtx (builder->int_mode ());
+  rtx and_ops[] = {vid_repeat, vid,
+		   gen_int_mode (nbits, builder->inner_int_mode ())};
+  emit_vlmax_insn (code_for_pred_scalar (AND, builder->int_mode ()), BINARY_OP,
+		   and_ops);
+
+  rtx tmp1 = gen_reg_rtx (builder->mode ());
+  rtx dup_ops[] = {tmp1, builder->elt (0)};
+  emit_vlmax_insn (code_for_pred_broadcast (builder->mode ()), UNARY_OP,
+		   dup_ops);
+
+  for (unsigned int i = 1; i < builder->npatterns (); i++)
     {
-      for (unsigned int j = 0; j < npatterns; j++)
-	builder.quick_push (CONST_VECTOR_ELT (src, i * npatterns + j));
+      /* Generate mask according to i.  */
+      rtx mask = gen_reg_rtx (builder->mask_mode ());
+      rtx const_vec = gen_const_vector_dup (builder->int_mode (), i);
+      expand_vec_cmp (mask, EQ, vid_repeat, const_vec);
+
+      /* Merge scalar to each i.  */
+      rtx tmp2 = gen_reg_rtx (builder->mode ());
+      rtx merge_ops[] = {tmp2, tmp1, builder->elt (i), mask};
+      insn_code icode = code_for_pred_merge_scalar (builder->mode ());
+      emit_vlmax_insn (icode, MERGE_OP, merge_ops);
+      tmp1 = tmp2;
     }
-  builder.finalize ();
 
-  if (CONST_VECTOR_DUPLICATE_P (src))
+  emit_move_insn (result, tmp1);
+
+  if (result != target)
+    emit_move_insn (target, result);
+}
+
+/* Handle the case with repeating sequence that NELTS_PER_PATTERN = 1
+   E.g.  NPATTERNS = 4, v = { 0, 2, 6, 7, ... }
+	 NPATTERNS = 8, v = { 0, 2, 6, 7, 19, 20, 8, 7 ... }
+	 The elements within NPATTERNS are not necessary regular.  */
+static void
+expand_const_vector_duplicate (rtx target, rvv_builder *builder)
+{
+  if (builder->can_duplicate_repeating_sequence_p ())
+    return expand_const_vector_duplicate_repeating (target, builder);
+  else
+    return expand_const_vector_duplicate_default (target, builder);
+}
+
+static void
+expand_const_vector_single_step_npatterns (rtx target, rvv_builder *builder)
+{
+  machine_mode mode = GET_MODE (target);
+  rtx result = register_operand (target, mode) ? target : gen_reg_rtx (mode);
+
+  /* Describe the case by choosing NPATTERNS = 4 as an example.  */
+  insn_code icode;
+
+  /* Step 1: Generate vid = { 0, 1, 2, 3, 4, 5, 6, 7, ... }.  */
+  rtx vid = gen_reg_rtx (builder->mode ());
+  rtx vid_ops[] = {vid};
+  icode = code_for_pred_series (builder->mode ());
+  emit_vlmax_insn (icode, NULLARY_OP, vid_ops);
+
+  if (builder->npatterns_all_equal_p ())
     {
-      /* Handle the case with repeating sequence that NELTS_PER_PATTERN = 1
-	 E.g. NPATTERNS = 4, v = { 0, 2, 6, 7, ... }
-	      NPATTERNS = 8, v = { 0, 2, 6, 7, 19, 20, 8, 7 ... }
-	The elements within NPATTERNS are not necessary regular.  */
-      if (builder.can_duplicate_repeating_sequence_p ())
+      /* Generate the variable-length vector following this rule:
+	 { a, a, a + step, a + step, a + step * 2, a + step * 2, ...}
+	   E.g. { 0, 0, 8, 8, 16, 16, ... } */
+
+      /* We want to create a pattern where value[idx] = floor (idx /
+	 NPATTERNS). As NPATTERNS is always a power of two we can
+	 rewrite this as = idx & -NPATTERNS.  */
+      /* Step 2: VID AND -NPATTERNS:
+	 { 0&-4, 1&-4, 2&-4, 3 &-4, 4 &-4, 5 &-4, 6 &-4, 7 &-4, ... } */
+      rtx imm = gen_int_mode (-builder->npatterns (), builder->inner_mode ());
+      rtx tmp1 = gen_reg_rtx (builder->mode ());
+      rtx and_ops[] = {tmp1, vid, imm};
+      icode = code_for_pred_scalar (AND, builder->mode ());
+      emit_vlmax_insn (icode, BINARY_OP, and_ops);
+
+      /* Step 3: Convert to step size 1.  */
+      rtx tmp2 = gen_reg_rtx (builder->mode ());
+      /* log2 (npatterns) to get the shift amount to convert
+	 Eg.  { 0, 0, 0, 0, 4, 4, ... }
+	 into { 0, 0, 0, 0, 1, 1, ... }.  */
+      HOST_WIDE_INT shift_amt = exact_log2 (builder->npatterns ());
+      rtx shift = gen_int_mode (shift_amt, builder->inner_mode ());
+      rtx shift_ops[] = {tmp2, tmp1, shift};
+      icode = code_for_pred_scalar (ASHIFTRT, builder->mode ());
+      emit_vlmax_insn (icode, BINARY_OP, shift_ops);
+
+      /* Step 4: Multiply to step size n.  */
+      HOST_WIDE_INT step_size =
+	INTVAL (builder->elt (builder->npatterns ()))
+	  - INTVAL (builder->elt (0));
+      rtx tmp3 = gen_reg_rtx (builder->mode ());
+      if (pow2p_hwi (step_size))
 	{
-	  /* We handle the case that we can find a vector container to hold
-	     element bitsize = NPATTERNS * ele_bitsize.
-
-	       NPATTERNS = 8, element width = 8
-		 v = { 0, 1, 2, 3, 4, 5, 6, 7, ... }
-	       In this case, we can combine NPATTERNS element into a larger
-	       element. Use element width = 64 and broadcast a vector with
-	       all element equal to 0x0706050403020100.  */
-	  rtx ele = builder.get_merged_repeating_sequence ();
-	  rtx dup = expand_vector_broadcast (builder.new_mode (), ele);
-	  emit_move_insn (target, gen_lowpart (mode, dup));
+	  /* Power of 2 can be handled with a left shift.  */
+	  HOST_WIDE_INT shift = exact_log2 (step_size);
+	  rtx shift_amount = gen_int_mode (shift, Pmode);
+	  insn_code icode = code_for_pred_scalar (ASHIFT, mode);
+	  rtx ops[] = {tmp3, tmp2, shift_amount};
+	  emit_vlmax_insn (icode, BINARY_OP, ops);
 	}
       else
 	{
-	  /* We handle the case that we can't find a vector container to hold
-	     element bitsize = NPATTERNS * ele_bitsize.
-
-	       NPATTERNS = 8, element width = 16
-		 v = { 0, 1, 2, 3, 4, 5, 6, 7, ... }
-	       Since NPATTERNS * element width = 128, we can't find a container
-	       to hold it.
-
-	       In this case, we use NPATTERNS merge operations to generate such
-	       vector.  */
-	  unsigned int nbits = npatterns - 1;
-
-	  /* Generate vid = { 0, 1, 2, 3, 4, 5, 6, 7, ... }.  */
-	  rtx vid = gen_reg_rtx (builder.int_mode ());
-	  rtx op[] = {vid};
-	  emit_vlmax_insn (code_for_pred_series (builder.int_mode ()),
-			    NULLARY_OP, op);
-
-	  /* Generate vid_repeat = { 0, 1, ... nbits, ... }  */
-	  rtx vid_repeat = gen_reg_rtx (builder.int_mode ());
-	  rtx and_ops[] = {vid_repeat, vid,
-			   gen_int_mode (nbits, builder.inner_int_mode ())};
-	  emit_vlmax_insn (code_for_pred_scalar (AND, builder.int_mode ()),
-			    BINARY_OP, and_ops);
-
-	  rtx tmp = gen_reg_rtx (builder.mode ());
-	  rtx dup_ops[] = {tmp, builder.elt (0)};
-	  emit_vlmax_insn (code_for_pred_broadcast (builder.mode ()), UNARY_OP,
-			    dup_ops);
-	  for (unsigned int i = 1; i < builder.npatterns (); i++)
-	    {
-	      /* Generate mask according to i.  */
-	      rtx mask = gen_reg_rtx (builder.mask_mode ());
-	      rtx const_vec = gen_const_vector_dup (builder.int_mode (), i);
-	      expand_vec_cmp (mask, EQ, vid_repeat, const_vec);
-
-	      /* Merge scalar to each i.  */
-	      rtx tmp2 = gen_reg_rtx (builder.mode ());
-	      rtx merge_ops[] = {tmp2, tmp, builder.elt (i), mask};
-	      insn_code icode = code_for_pred_merge_scalar (builder.mode ());
-	      emit_vlmax_insn (icode, MERGE_OP, merge_ops);
-	      tmp = tmp2;
-	    }
-	  emit_move_insn (target, tmp);
+	  rtx mult_amt = gen_int_mode (step_size, builder->inner_mode ());
+	  insn_code icode = code_for_pred_scalar (MULT, builder->mode ());
+	  rtx ops[] = {tmp3, tmp2, mult_amt};
+	  emit_vlmax_insn (icode, BINARY_OP, ops);
+	}
+
+      /* Step 5: Add starting value to all elements.  */
+      HOST_WIDE_INT init_val = INTVAL (builder->elt (0));
+      if (init_val == 0)
+	emit_move_insn (result, tmp3);
+      else
+	{
+	  rtx dup = gen_const_vector_dup (builder->mode (), init_val);
+	  rtx add_ops[] = {result, tmp3, dup};
+	  icode = code_for_pred (PLUS, builder->mode ());
+	  emit_vlmax_insn (icode, BINARY_OP, add_ops);
 	}
     }
-  else if (CONST_VECTOR_STEPPED_P (src))
+  else
     {
-      gcc_assert (GET_MODE_CLASS (mode) == MODE_VECTOR_INT);
-      if (builder.single_step_npatterns_p ())
+      /* Generate the variable-length vector following this rule:
+	{ a, b, a + step, b + step, a + step*2, b + step*2, ... }  */
+      if (builder->npatterns_vid_diff_repeated_p ())
 	{
-	  /* Describe the case by choosing NPATTERNS = 4 as an example.  */
-	  insn_code icode;
-
-	  /* Step 1: Generate vid = { 0, 1, 2, 3, 4, 5, 6, 7, ... }.  */
-	  rtx vid = gen_reg_rtx (builder.mode ());
-	  rtx vid_ops[] = {vid};
-	  icode = code_for_pred_series (builder.mode ());
-	  emit_vlmax_insn (icode, NULLARY_OP, vid_ops);
-
-	  if (builder.npatterns_all_equal_p ())
+	  /* Case 1: For example as below:
+	     {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8... }
+	     We have 3 - 0 = 3 equals 7 - 4 = 3, the sequence is
+	     repeated as below after minus vid.
+	     {3, 1, -1, -3, 3, 1, -1, -3...}
+	     Then we can simplify the diff code gen to at most
+	     npatterns().  */
+	  rvv_builder v (builder->mode (), builder->npatterns (), 1);
+
+	  /* Step 1: Generate diff = TARGET - VID.  */
+	  for (unsigned int i = 0; i < v.npatterns (); ++i)
 	    {
-	      /* Generate the variable-length vector following this rule:
-		 { a, a, a + step, a + step, a + step * 2, a + step * 2, ...}
-		   E.g. { 0, 0, 8, 8, 16, 16, ... } */
-	      /* We want to create a pattern where value[ix] = floor (ix /
-		 NPATTERNS). As NPATTERNS is always a power of two we can
-		 rewrite this as = ix & -NPATTERNS.  */
-	      /* Step 2: VID AND -NPATTERNS:
-		 { 0&-4, 1&-4, 2&-4, 3 &-4, 4 &-4, 5 &-4, 6 &-4, 7 &-4, ... }
-	      */
-	      rtx imm
-		= gen_int_mode (-builder.npatterns (), builder.inner_mode ());
-	      rtx tmp = gen_reg_rtx (builder.mode ());
-	      rtx and_ops[] = {tmp, vid, imm};
-	      icode = code_for_pred_scalar (AND, builder.mode ());
-	      emit_vlmax_insn (icode, BINARY_OP, and_ops);
-	      HOST_WIDE_INT init_val = INTVAL (builder.elt (0));
-	      if (init_val == 0)
-		emit_move_insn (target, tmp);
-	      else
-		{
-		  rtx dup = gen_const_vector_dup (builder.mode (), init_val);
-		  rtx add_ops[] = {target, tmp, dup};
-		  icode = code_for_pred (PLUS, builder.mode ());
-		  emit_vlmax_insn (icode, BINARY_OP, add_ops);
-		}
+	      poly_int64 diff = rtx_to_poly_int64 (builder->elt (i)) - i;
+	      v.quick_push (gen_int_mode (diff, v.inner_mode ()));
 	    }
-	  else
-	    {
-	      /* Generate the variable-length vector following this rule:
-		{ a, b, a + step, b + step, a + step*2, b + step*2, ... }  */
 
-	      if (builder.npatterns_vid_diff_repeated_p ())
-		{
-		  /* Case 1: For example as below:
-		     {3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8... }
-		     We have 3 - 0 = 3 equals 7 - 4 = 3, the sequence is
-		     repeated as below after minus vid.
-		     {3, 1, -1, -3, 3, 1, -1, -3...}
-		     Then we can simplify the diff code gen to at most
-		     npatterns().  */
-		  rvv_builder v (builder.mode (), builder.npatterns (), 1);
-
-		  /* Step 1: Generate diff = TARGET - VID.  */
-		  for (unsigned int i = 0; i < v.npatterns (); ++i)
-		    {
-		     poly_int64 diff = rtx_to_poly_int64 (builder.elt (i)) - i;
-		     v.quick_push (gen_int_mode (diff, v.inner_mode ()));
-		    }
-
-		  /* Step 2: Generate result = VID + diff.  */
-		  rtx vec = v.build ();
-		  rtx add_ops[] = {target, vid, vec};
-		  emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
-				   BINARY_OP, add_ops);
-		}
-	      else
-		{
-		  /* Case 2: For example as below:
-		     { -4, 4, -4 + 1, 4 + 1, -4 + 2, 4 + 2, -4 + 3, 4 + 3, ... }
-		   */
-		  rvv_builder v (builder.mode (), builder.npatterns (), 1);
-
-		  /* Step 1: Generate { a, b, a, b, ... }  */
-		  for (unsigned int i = 0; i < v.npatterns (); ++i)
-		    v.quick_push (builder.elt (i));
-		  rtx new_base = v.build ();
-
-		  /* Step 2: Generate tmp = VID >> LOG2 (NPATTERNS).  */
-		  rtx shift_count
-		    = gen_int_mode (exact_log2 (builder.npatterns ()),
-				    builder.inner_mode ());
-		  rtx tmp = expand_simple_binop (builder.mode (), LSHIFTRT,
-						 vid, shift_count, NULL_RTX,
-						 false, OPTAB_DIRECT);
-
-		  /* Step 3: Generate tmp2 = tmp * step.  */
-		  rtx tmp2 = gen_reg_rtx (builder.mode ());
-		  rtx step
-		    = simplify_binary_operation (MINUS, builder.inner_mode (),
-						 builder.elt (v.npatterns()),
-						 builder.elt (0));
-		  expand_vec_series (tmp2, const0_rtx, step, tmp);
-
-		  /* Step 4: Generate target = tmp2 + new_base.  */
-		  rtx add_ops[] = {target, tmp2, new_base};
-		  emit_vlmax_insn (code_for_pred (PLUS, builder.mode ()),
-				   BINARY_OP, add_ops);
-		}
-	    }
+	  /* Step 2: Generate result = VID + diff.  */
+	  rtx vec = v.build ();
+	  rtx add_ops[] = {result, vid, vec};
+	  emit_vlmax_insn (code_for_pred (PLUS, builder->mode ()), BINARY_OP,
+			   add_ops);
 	}
-      else if (builder.interleaved_stepped_npatterns_p ())
+      else
 	{
-	  rtx base1 = builder.elt (0);
-	  rtx base2 = builder.elt (1);
-	  poly_int64 step1
-	    = rtx_to_poly_int64 (builder.elt (builder.npatterns ()))
-	      - rtx_to_poly_int64 (base1);
-	  poly_int64 step2
-	    = rtx_to_poly_int64 (builder.elt (builder.npatterns () + 1))
-	      - rtx_to_poly_int64 (base2);
-
-	  /* For { 1, 0, 2, 0, ... , n - 1, 0 }, we can use larger EEW
-	     integer vector mode to generate such vector efficiently.
-
-	     E.g. EEW = 16, { 2, 0, 4, 0, ... }
-
-	     can be interpreted into:
-
-		  EEW = 32, { 2, 4, ... }  */
-	  unsigned int new_smode_bitsize = builder.inner_bits_size () * 2;
-	  scalar_int_mode new_smode;
-	  machine_mode new_mode;
-	  poly_uint64 new_nunits
-	    = exact_div (GET_MODE_NUNITS (builder.mode ()), 2);
-	  if (int_mode_for_size (new_smode_bitsize, 0).exists (&new_smode)
-	      && get_vector_mode (new_smode, new_nunits).exists (&new_mode))
+	  /* Case 2: For example as below:
+	     { -4, 4, -4 + 1, 4 + 1, -4 + 2, 4 + 2, -4 + 3, 4 + 3, ... }
+	   */
+	  rvv_builder v (builder->mode (), builder->npatterns (), 1);
+
+	  /* Step 1: Generate { a, b, a, b, ... }  */
+	  for (unsigned int i = 0; i < v.npatterns (); ++i)
+	    v.quick_push (builder->elt (i));
+	  rtx new_base = v.build ();
+
+	  /* Step 2: Generate tmp1 = VID >> LOG2 (NPATTERNS).  */
+	  rtx shift_count = gen_int_mode (exact_log2 (builder->npatterns ()),
+					  builder->inner_mode ());
+	  rtx tmp1 = gen_reg_rtx (builder->mode ());
+	  rtx shift_ops[] = {tmp1, vid, shift_count};
+	  emit_vlmax_insn (code_for_pred_scalar (LSHIFTRT, builder->mode ()),
+			   BINARY_OP, shift_ops);
+
+	  /* Step 3: Generate tmp2 = tmp1 * step.  */
+	  rtx tmp2 = gen_reg_rtx (builder->mode ());
+	  rtx step
+	    = simplify_binary_operation (MINUS, builder->inner_mode (),
+					 builder->elt (v.npatterns()),
+					 builder->elt (0));
+	  expand_vec_series (tmp2, const0_rtx, step, tmp1);
+
+	  /* Step 4: Generate result = tmp2 + new_base.  */
+	  rtx add_ops[] = {result, tmp2, new_base};
+	  emit_vlmax_insn (code_for_pred (PLUS, builder->mode ()), BINARY_OP,
+			   add_ops);
+	}
+    }
+
+  if (result != target)
+    emit_move_insn (target, result);
+}
+
+static void
+expand_const_vector_interleaved_stepped_npatterns (rtx target, rtx src,
+						   rvv_builder *builder)
+{
+  machine_mode mode = GET_MODE (target);
+  rtx result = register_operand (target, mode) ? target : gen_reg_rtx (mode);
+  rtx base1 = builder->elt (0);
+  rtx base2 = builder->elt (1);
+
+  poly_int64 step1 = rtx_to_poly_int64 (builder->elt (builder->npatterns ()))
+    - rtx_to_poly_int64 (base1);
+  poly_int64 step2 =
+    rtx_to_poly_int64 (builder->elt (builder->npatterns () + 1))
+      - rtx_to_poly_int64 (base2);
+
+  /* For { 1, 0, 2, 0, ... , n - 1, 0 }, we can use larger EEW
+     integer vector mode to generate such vector efficiently.
+
+     E.g. EEW = 16, { 2, 0, 4, 0, ... }
+
+     can be interpreted into:
+
+     EEW = 32, { 2, 4, ... }.
+
+     Both the series1 and series2 may overflow before taking the IOR
+     to generate the final result.  However, only series1 matters
+     because the series2 will shift before IOR, thus the overflow
+     bits will never pollute the final result.
+
+     For now we forbid the negative steps and overflow, and they
+     will fall back to the default merge way to generate the
+     const_vector.  */
+
+  unsigned int new_smode_bitsize = builder->inner_bits_size () * 2;
+  scalar_int_mode new_smode;
+  machine_mode new_mode;
+  poly_uint64 new_nunits = exact_div (GET_MODE_NUNITS (builder->mode ()), 2);
+
+  poly_int64 base1_poly = rtx_to_poly_int64 (base1);
+  bool overflow_smode_p = false;
+
+  if (!step1.is_constant ())
+    overflow_smode_p = true;
+  else
+    {
+      int elem_count = XVECLEN (src, 0);
+      uint64_t step1_val = step1.to_constant ();
+      uint64_t base1_val = base1_poly.to_constant ();
+      uint64_t elem_val = base1_val + (elem_count - 1) * step1_val;
+
+      if ((elem_val >> builder->inner_bits_size ()) != 0)
+	overflow_smode_p = true;
+    }
+
+  if (known_ge (step1, 0) && known_ge (step2, 0)
+      && int_mode_for_size (new_smode_bitsize, 0).exists (&new_smode)
+      && get_vector_mode (new_smode, new_nunits).exists (&new_mode)
+      && !overflow_smode_p)
+    {
+      rtx tmp1 = gen_reg_rtx (new_mode);
+      base1 = gen_int_mode (base1_poly, new_smode);
+      expand_vec_series (tmp1, base1, gen_int_mode (step1, new_smode));
+
+      if (rtx_equal_p (base2, const0_rtx) && known_eq (step2, 0))
+	/* { 1, 0, 2, 0, ... }.  */
+	emit_move_insn (result, gen_lowpart (mode, tmp1));
+      else if (known_eq (step2, 0))
+	{
+	  /* { 1, 1, 2, 1, ... }.  */
+	  rtx scalar = expand_simple_binop (
+	    Xmode, ASHIFT, gen_int_mode (rtx_to_poly_int64 (base2), Xmode),
+	    gen_int_mode (builder->inner_bits_size (), Xmode), NULL_RTX, false,
+	    OPTAB_DIRECT);
+	  scalar = simplify_gen_subreg (new_smode, scalar, Xmode, 0);
+	  rtx tmp2 = gen_reg_rtx (new_mode);
+	  rtx ior_ops[] = {tmp2, tmp1, scalar};
+	  emit_vlmax_insn (code_for_pred_scalar (IOR, new_mode), BINARY_OP,
+			   ior_ops);
+	  emit_move_insn (result, gen_lowpart (mode, tmp2));
+	}
+      else
+	{
+	  /* { 1, 3, 2, 6, ... }.  */
+	  rtx tmp2 = gen_reg_rtx (new_mode);
+	  base2 = gen_int_mode (rtx_to_poly_int64 (base2), new_smode);
+	  expand_vec_series (tmp2, base2, gen_int_mode (step2, new_smode));
+	  rtx shifted_tmp2;
+	  rtx shift = gen_int_mode (builder->inner_bits_size (), Xmode);
+	  if (lra_in_progress)
 	    {
-	      rtx tmp = gen_reg_rtx (new_mode);
-	      base1 = gen_int_mode (rtx_to_poly_int64 (base1), new_smode);
-	      expand_vec_series (tmp, base1, gen_int_mode (step1, new_smode));
-
-	      if (rtx_equal_p (base2, const0_rtx) && known_eq (step2, 0))
-		/* { 1, 0, 2, 0, ... }.  */
-		emit_move_insn (target, gen_lowpart (mode, tmp));
-	      else if (known_eq (step2, 0))
-		{
-		  /* { 1, 1, 2, 1, ... }.  */
-		  rtx scalar = expand_simple_binop (
-		    new_smode, ASHIFT,
-		    gen_int_mode (rtx_to_poly_int64 (base2), new_smode),
-		    gen_int_mode (builder.inner_bits_size (), new_smode),
-		    NULL_RTX, false, OPTAB_DIRECT);
-		  rtx tmp2 = gen_reg_rtx (new_mode);
-		  rtx and_ops[] = {tmp2, tmp, scalar};
-		  emit_vlmax_insn (code_for_pred_scalar (AND, new_mode),
-				   BINARY_OP, and_ops);
-		  emit_move_insn (target, gen_lowpart (mode, tmp2));
-		}
-	      else
-		{
-		  /* { 1, 3, 2, 6, ... }.  */
-		  rtx tmp2 = gen_reg_rtx (new_mode);
-		  base2 = gen_int_mode (rtx_to_poly_int64 (base2), new_smode);
-		  expand_vec_series (tmp2, base2,
-				     gen_int_mode (step2, new_smode));
-		  rtx shifted_tmp2 = expand_simple_binop (
-		    new_mode, ASHIFT, tmp2,
-		    gen_int_mode (builder.inner_bits_size (), Pmode), NULL_RTX,
-		    false, OPTAB_DIRECT);
-		  rtx tmp3 = gen_reg_rtx (new_mode);
-		  rtx ior_ops[] = {tmp3, tmp, shifted_tmp2};
-		  emit_vlmax_insn (code_for_pred (IOR, new_mode), BINARY_OP,
-				   ior_ops);
-		  emit_move_insn (target, gen_lowpart (mode, tmp3));
-		}
+	      shifted_tmp2 = gen_reg_rtx (new_mode);
+	      rtx shift_ops[] = {shifted_tmp2, tmp2, shift};
+	      emit_vlmax_insn (code_for_pred_scalar (ASHIFT, new_mode),
+			       BINARY_OP, shift_ops);
 	    }
 	  else
-	    {
-	      rtx vid = gen_reg_rtx (mode);
-	      expand_vec_series (vid, const0_rtx, const1_rtx);
-	      /* Transform into { 0, 0, 1, 1, 2, 2, ... }.  */
-	      rtx shifted_vid
-		= expand_simple_binop (mode, LSHIFTRT, vid, const1_rtx,
-				       NULL_RTX, false, OPTAB_DIRECT);
-	      rtx tmp1 = gen_reg_rtx (mode);
-	      rtx tmp2 = gen_reg_rtx (mode);
-	      expand_vec_series (tmp1, base1,
-				 gen_int_mode (step1, builder.inner_mode ()),
-				 shifted_vid);
-	      expand_vec_series (tmp2, base2,
-				 gen_int_mode (step2, builder.inner_mode ()),
-				 shifted_vid);
-
-	      /* Transform into { 0, 1, 0, 1, 0, 1, ... }.  */
-	      rtx and_vid = gen_reg_rtx (mode);
-	      rtx and_ops[] = {and_vid, vid, const1_rtx};
-	      emit_vlmax_insn (code_for_pred_scalar (AND, mode), BINARY_OP,
-			       and_ops);
-	      rtx mask = gen_reg_rtx (builder.mask_mode ());
-	      expand_vec_cmp (mask, EQ, and_vid, CONST1_RTX (mode));
-
-	      rtx ops[] = {target, tmp1, tmp2, mask};
-	      emit_vlmax_insn (code_for_pred_merge (mode), MERGE_OP, ops);
-	    }
+	    shifted_tmp2 = expand_simple_binop (new_mode, ASHIFT, tmp2, shift,
+						NULL_RTX, false, OPTAB_DIRECT);
+	  rtx tmp3 = gen_reg_rtx (new_mode);
+	  rtx ior_ops[] = {tmp3, tmp1, shifted_tmp2};
+	  emit_vlmax_insn (code_for_pred (IOR, new_mode), BINARY_OP, ior_ops);
+	  emit_move_insn (result, gen_lowpart (mode, tmp3));
 	}
-      else
-	/* TODO: We will enable more variable-length vector in the future.  */
-	gcc_unreachable ();
     }
   else
-    gcc_unreachable ();
+    {
+      rtx vid = gen_reg_rtx (mode);
+      expand_vec_series (vid, const0_rtx, const1_rtx);
+      /* Transform into { 0, 0, 1, 1, 2, 2, ... }.  */
+      rtx shifted_vid;
+      if (lra_in_progress)
+	{
+	  shifted_vid = gen_reg_rtx (mode);
+	  rtx shift = gen_int_mode (1, Xmode);
+	  rtx shift_ops[] = {shifted_vid, vid, shift};
+	  emit_vlmax_insn (code_for_pred_scalar (LSHIFTRT, mode), BINARY_OP,
+			   shift_ops);
+	}
+      else
+	shifted_vid = expand_simple_binop (mode, LSHIFTRT, vid, const1_rtx,
+					   NULL_RTX, false, OPTAB_DIRECT);
+      rtx tmp1 = gen_reg_rtx (mode);
+      rtx tmp2 = gen_reg_rtx (mode);
+      expand_vec_series (tmp1, base1,
+			 gen_int_mode (step1, builder->inner_mode ()),
+			 shifted_vid);
+      expand_vec_series (tmp2, base2,
+			 gen_int_mode (step2, builder->inner_mode ()),
+			 shifted_vid);
+
+      /* Transform into { 0, 1, 0, 1, 0, 1, ... }.  */
+      rtx and_vid = gen_reg_rtx (mode);
+      rtx and_ops[] = {and_vid, vid, const1_rtx};
+      emit_vlmax_insn (code_for_pred_scalar (AND, mode), BINARY_OP, and_ops);
+      rtx mask = gen_reg_rtx (builder->mask_mode ());
+      expand_vec_cmp (mask, EQ, and_vid, CONST1_RTX (mode));
+
+      rtx ops[] = {result, tmp1, tmp2, mask};
+      emit_vlmax_insn (code_for_pred_merge (mode), MERGE_OP, ops);
+    }
+
+  if (result != target)
+    emit_move_insn (target, result);
+}
+
+static void
+expand_const_vector_stepped (rtx target, rtx src, rvv_builder *builder)
+{
+  gcc_assert (GET_MODE_CLASS (GET_MODE (target)) == MODE_VECTOR_INT);
+
+  if (builder->single_step_npatterns_p ())
+    return expand_const_vector_single_step_npatterns (target, builder);
+  else if (builder->interleaved_stepped_npatterns_p ())
+    return expand_const_vector_interleaved_stepped_npatterns (target, src,
+							      builder);
+
+  /* TODO: We will enable more variable-length vector in the future.  */
+  gcc_unreachable ();
+}
+
+static void
+expand_const_vector (rtx target, rtx src)
+{
+  rtx elt;
+  if (const_vec_duplicate_p (src, &elt))
+    return expand_const_vec_duplicate (target, src, elt);
+
+  /* Support scalable const series vector.  */
+  rtx base, step;
+  if (const_vec_series_p (src, &base, &step))
+    return expand_const_vec_series(target, base, step);
+
+  /* Handle variable-length vector.  */
+  unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (src);
+  unsigned int npatterns = CONST_VECTOR_NPATTERNS (src);
+  rvv_builder builder (GET_MODE (target), npatterns, nelts_per_pattern);
+
+  for (unsigned int i = 0; i < nelts_per_pattern; i++)
+    {
+      for (unsigned int j = 0; j < npatterns; j++)
+	builder.quick_push (CONST_VECTOR_ELT (src, i * npatterns + j));
+    }
+
+  builder.finalize ();
+
+  if (CONST_VECTOR_DUPLICATE_P (src))
+    return expand_const_vector_duplicate (target, &builder);
+  else if (CONST_VECTOR_STEPPED_P (src))
+    return expand_const_vector_stepped (target, src, &builder);
+
+  gcc_unreachable ();
 }
 
 /* Get the frm mode with given CONST_INT rtx, the default mode is
@@ -1658,13 +1852,15 @@ get_vlmul (machine_mode mode)
       int inner_size = GET_MODE_BITSIZE (GET_MODE_INNER (mode));
       if (size < TARGET_MIN_VLEN)
 	{
+	  /* Follow rule LMUL >= SEW / ELEN.  */
+	  int elen = TARGET_VECTOR_ELEN_64 ? 1 : 2;
 	  int factor = TARGET_MIN_VLEN / size;
 	  if (inner_size == 8)
-	    factor = MIN (factor, 8);
+	    factor = MIN (factor, 8 / elen);
 	  else if (inner_size == 16)
-	    factor = MIN (factor, 4);
+	    factor = MIN (factor, 4 / elen);
 	  else if (inner_size == 32)
-	    factor = MIN (factor, 2);
+	    factor = MIN (factor, 2 / elen);
 	  else if (inner_size == 64)
 	    factor = MIN (factor, 1);
 	  else
@@ -1828,6 +2024,18 @@ get_mask_mode (machine_mode mode)
   return get_vector_mode (BImode, nunits).require ();
 }
 
+/* Return the appropriate LMUL mode for MODE.  */
+
+opt_machine_mode
+get_lmul_mode (scalar_mode mode, int lmul)
+{
+  poly_uint64 lmul_nunits;
+  unsigned int bytes = GET_MODE_SIZE (mode);
+  if (multiple_p (BYTES_PER_RISCV_VECTOR * lmul, bytes, &lmul_nunits))
+    return get_vector_mode (mode, lmul_nunits);
+  return E_VOIDmode;
+}
+
 /* Return the appropriate M1 mode for MODE.  */
 
 static opt_machine_mode
@@ -1956,21 +2164,40 @@ sew64_scalar_helper (rtx *operands, rtx *scalar_op, rtx vl,
       return false;
     }
 
+  bool avoid_strided_broadcast = false;
   if (CONST_INT_P (*scalar_op))
     {
       if (maybe_gt (GET_MODE_SIZE (scalar_mode), GET_MODE_SIZE (Pmode)))
-	*scalar_op = force_const_mem (scalar_mode, *scalar_op);
+	{
+	  if (strided_load_broadcast_p ())
+	    *scalar_op = force_const_mem (scalar_mode, *scalar_op);
+	  else
+	    avoid_strided_broadcast = true;
+	}
       else
 	*scalar_op = force_reg (scalar_mode, *scalar_op);
     }
 
   rtx tmp = gen_reg_rtx (vector_mode);
-  rtx ops[] = {tmp, *scalar_op};
-  if (type == VLMAX)
-    emit_vlmax_insn (code_for_pred_broadcast (vector_mode), UNARY_OP, ops);
+  if (!avoid_strided_broadcast)
+    {
+      rtx ops[] = {tmp, *scalar_op};
+      emit_avltype_insn (code_for_pred_broadcast (vector_mode), UNARY_OP, ops,
+			 type, vl);
+    }
   else
-    emit_nonvlmax_insn (code_for_pred_broadcast (vector_mode), UNARY_OP, ops,
-			vl);
+    {
+      /* Load scalar as V1DI and broadcast via vrgather.vi.  */
+      rtx tmp1 = gen_reg_rtx (V1DImode);
+      emit_move_insn (tmp1, lowpart_subreg (V1DImode, *scalar_op,
+					    scalar_mode));
+      tmp1 = lowpart_subreg (vector_mode, tmp1, V1DImode);
+
+      rtx ops[] = {tmp, tmp1, CONST0_RTX (Pmode)};
+      emit_vlmax_insn (code_for_pred_gather_scalar (vector_mode),
+		       BINARY_OP, ops);
+    }
+
   emit_vector_func (operands, tmp);
 
   return true;
@@ -2019,7 +2246,7 @@ get_unknown_min_value (machine_mode mode)
 static rtx
 force_vector_length_operand (rtx vl)
 {
-  if (CONST_INT_P (vl) && !satisfies_constraint_K (vl))
+  if (CONST_INT_P (vl) && !satisfies_constraint_vl (vl))
     return force_reg (Pmode, vl);
   return vl;
 }
@@ -2342,31 +2569,6 @@ preferred_simd_mode (scalar_mode mode)
   return word_mode;
 }
 
-/* Subroutine of riscv_vector_expand_vector_init.
-   Works as follows:
-   (a) Initialize TARGET by broadcasting element NELTS_REQD - 1 of BUILDER.
-   (b) Skip leading elements from BUILDER, which are the same as
-       element NELTS_REQD - 1.
-   (c) Insert earlier elements in reverse order in TARGET using vslide1down.  */
-
-static void
-expand_vector_init_insert_elems (rtx target, const rvv_builder &builder,
-				 int nelts_reqd)
-{
-  machine_mode mode = GET_MODE (target);
-  rtx dup = expand_vector_broadcast (mode, builder.elt (0));
-  emit_move_insn (target, dup);
-  int ndups = builder.count_dups (0, nelts_reqd - 1, 1);
-  for (int i = ndups; i < nelts_reqd; i++)
-    {
-      unsigned int unspec
-	= FLOAT_MODE_P (mode) ? UNSPEC_VFSLIDE1DOWN : UNSPEC_VSLIDE1DOWN;
-      insn_code icode = code_for_pred_slide (unspec, mode);
-      rtx ops[] = {target, target, builder.elt (i)};
-      emit_vlmax_insn (icode, BINARY_OP, ops);
-    }
-}
-
 /* Use merge approach to initialize the vector with repeating sequence.
    v = {a, b, a, b, a, b, a, b}.
 
@@ -2491,47 +2693,6 @@ expand_vector_init_merge_combine_sequence (rtx target,
   emit_vlmax_insn (icode, MERGE_OP, merge_ops);
 }
 
-/* Subroutine of expand_vec_init to handle case
-   when all trailing elements of builder are same.
-   This works as follows:
-   (a) Use expand_insn interface to broadcast last vector element in TARGET.
-   (b) Insert remaining elements in TARGET using insr.
-
-   ??? The heuristic used is to do above if number of same trailing elements
-   is greater than leading_ndups, loosely based on
-   heuristic from mostly_zeros_p.  May need fine-tuning.  */
-
-static bool
-expand_vector_init_trailing_same_elem (rtx target,
-				       const rtx_vector_builder &builder,
-				       int nelts_reqd)
-{
-  int leading_ndups = builder.count_dups (0, nelts_reqd - 1, 1);
-  int trailing_ndups = builder.count_dups (nelts_reqd - 1, -1, -1);
-  machine_mode mode = GET_MODE (target);
-
-  if (trailing_ndups > leading_ndups)
-    {
-      rtx dup = expand_vector_broadcast (mode, builder.elt (nelts_reqd - 1));
-      for (int i = nelts_reqd - trailing_ndups - 1; i >= 0; i--)
-	{
-	  unsigned int unspec
-	    = FLOAT_MODE_P (mode) ? UNSPEC_VFSLIDE1UP : UNSPEC_VSLIDE1UP;
-	  insn_code icode = code_for_pred_slide (unspec, mode);
-	  rtx tmp = gen_reg_rtx (mode);
-	  rtx ops[] = {tmp, dup, builder.elt (i)};
-	  emit_vlmax_insn (icode, BINARY_OP, ops);
-	  /* slide1up need source and dest to be different REG.  */
-	  dup = tmp;
-	}
-
-      emit_move_insn (target, dup);
-      return true;
-    }
-
-  return false;
-}
-
 /* Initialize register TARGET from the elements in PARALLEL rtx VALS.  */
 
 void
@@ -2699,6 +2860,28 @@ autovectorize_vector_modes (vector_modes *modes, bool)
 	i++;
 	size = base_size / (1U << i);
      }
+
+  /* If the user specified the exact mode to use look if it is available and
+     remove all other ones before returning.  */
+  if (riscv_autovec_mode)
+    {
+      auto_vector_modes ms;
+      ms.safe_splice (*modes);
+      modes->truncate (0);
+
+      for (machine_mode mode : ms)
+	{
+	  if (!strcmp (GET_MODE_NAME (mode), riscv_autovec_mode))
+	    {
+	      modes->safe_push (mode);
+	      return 0;
+	    }
+	}
+
+      /* Nothing found, fall back to regular handling.  */
+      modes->safe_splice (ms);
+    }
+
   /* Enable LOOP_VINFO comparison in COST model.  */
   return VECT_COMPARE_COSTS;
 }
@@ -3093,9 +3276,27 @@ shuffle_merge_patterns (struct expand_vec_perm_d *d)
   machine_mode mask_mode = get_mask_mode (vmode);
   rtx mask = gen_reg_rtx (mask_mode);
 
-  if (indices_fit_selector_p)
+  if (indices_fit_selector_p && vec_len.is_constant ())
     {
-      /* MASK = SELECTOR < NUNITS ? 1 : 0.  */
+      /* For a constant vector length we can generate the needed mask at
+	 compile time and load it as mask at runtime.
+	 This saves a compare at runtime.  */
+      rtx_vector_builder sel (mask_mode, d->perm.encoding ().npatterns (),
+			      d->perm.encoding ().nelts_per_pattern ());
+      unsigned int encoded_nelts = sel.encoded_nelts ();
+      for (unsigned int i = 0; i < encoded_nelts; i++)
+	sel.quick_push (gen_int_mode (d->perm[i].to_constant ()
+				      < vec_len.to_constant (),
+				      GET_MODE_INNER (mask_mode)));
+      mask = sel.build ();
+    }
+  else if (indices_fit_selector_p)
+    {
+      /* For a dynamic vector length < 256 we keep the permutation
+	 indices in the literal pool, load it at runtime and create the
+	 mask by selecting either OP0 or OP1 by
+
+	    INDICES < NUNITS ? 1 : 0.  */
       rtx sel = vec_perm_indices_to_rtx (sel_mode, d->perm);
       rtx x = gen_int_mode (vec_len, GET_MODE_INNER (sel_mode));
       insn_code icode = code_for_pred_cmp_scalar (sel_mode);
@@ -3365,7 +3566,247 @@ shuffle_compress_patterns (struct expand_vec_perm_d *d)
 
   insn_code icode = code_for_pred_compress (vmode);
   rtx ops[] = {d->target, merge, d->op0, mask};
-  emit_vlmax_insn (icode, COMPRESS_OP_MERGE, ops);
+  emit_nonvlmax_insn (icode, COMPRESS_OP_MERGE, ops,
+		      gen_int_mode (vlen, Pmode));
+  return true;
+}
+
+/* Recognize patterns like [4 5 6 7 12 13 14 15] where either the lower
+   or the higher parts of both vectors are combined into one.  */
+
+static bool
+shuffle_slide_patterns (struct expand_vec_perm_d *d)
+{
+  machine_mode vmode = d->vmode;
+  poly_int64 vec_len = d->perm.length ();
+
+  if (!vec_len.is_constant ())
+    return false;
+
+  int vlen = vec_len.to_constant ();
+  if (vlen < 4)
+    return false;
+
+  if (d->one_vector_p)
+    return false;
+
+  /* For a slideup OP0 can stay, for a slidedown OP1 can.
+     The former requires that the first element of the permutation
+     is the first element of OP0, the latter that the last permutation
+     element is the last element of OP1.  */
+  bool slideup = false;
+  bool slidedown = false;
+
+  /* For a slideup the permutation must start at OP0's first element.  */
+  if (known_eq (d->perm[0], 0))
+    slideup = true;
+
+  /* For a slidedown the permutation must end at OP1's last element.  */
+  if (known_eq (d->perm[vlen - 1], 2 * vlen - 1))
+    slidedown = true;
+
+  if (slideup && slidedown)
+    return false;
+
+  if (!slideup && !slidedown)
+    return false;
+
+  /* Check for a monotonic sequence with one pivot.  */
+  int pivot = -1;
+  for (int i = 0; i < vlen; i++)
+    {
+      if (pivot == -1 && known_ge (d->perm[i], vec_len))
+	pivot = i;
+      if (i > 0 && i != pivot
+	  && maybe_ne (d->perm[i], d->perm[i - 1] + 1))
+	return false;
+    }
+
+  if (pivot == -1)
+    return false;
+
+  /* For a slideup OP1's part (to be slid up) must be a low part,
+     i.e. starting with its first element.  */
+  if (slideup && maybe_ne (d->perm[pivot], vlen))
+      return false;
+
+  /* For a slidedown OP0's part (to be slid down) must be a high part,
+     i.e. ending with its last element.  */
+  if (slidedown && maybe_ne (d->perm[pivot - 1], vlen - 1))
+    return false;
+
+  /* Success!  */
+  if (d->testing_p)
+    return true;
+
+  /* PIVOT is the start of the lower/higher part of OP1 or OP2.
+     For a slideup it indicates how many elements of OP1 to
+     skip/slide over.  For a slidedown it indicates how long
+     OP1's high part is, while VLEN - PIVOT is the amount to slide.  */
+  int slide_cnt = slideup ? pivot : vlen - pivot;
+  insn_code icode;
+  if (slideup)
+    {
+      /* No need for a vector length because we slide up until the
+	 end of OP1 anyway.  */
+      rtx ops[] = {d->target, d->op0, d->op1, gen_int_mode (slide_cnt, Pmode)};
+      icode = code_for_pred_slide (UNSPEC_VSLIDEUP, vmode);
+      emit_vlmax_insn (icode, SLIDEUP_OP_MERGE, ops);
+    }
+  else
+    {
+      /* Here we need a length because we slide to the beginning of OP1
+	 leaving the remaining elements undisturbed.  */
+      int len = pivot;
+      rtx ops[] = {d->target, d->op1, d->op0,
+		   gen_int_mode (slide_cnt, Pmode)};
+      icode = code_for_pred_slide (UNSPEC_VSLIDEDOWN, vmode);
+      emit_nonvlmax_insn (icode, BINARY_OP_TUMA, ops,
+			  gen_int_mode (len, Pmode));
+    }
+
+  return true;
+}
+
+/* Recognize interleaving patterns like [0 4 1 5].  */
+
+static bool
+shuffle_interleave_patterns (struct expand_vec_perm_d *d)
+{
+  machine_mode vmode = d->vmode;
+  machine_mode sel_mode = related_int_vector_mode (vmode).require ();
+  poly_int64 vec_len = d->perm.length ();
+  int n_patterns = d->perm.encoding ().npatterns ();
+
+  if (!vec_len.is_constant ())
+    return false;
+
+  if (n_patterns != 2)
+    return false;
+
+  unsigned vlen = vec_len.to_constant ();
+
+  if (vlen < 4 || vlen > 64)
+    return false;
+
+  if (d->one_vector_p)
+    return false;
+
+  bool low = true;
+  if (d->perm.series_p (0, 2, 0, 1)
+      && d->perm.series_p (1, 2, vlen, 1))
+    low = true;
+  else if (d->perm.series_p (0, 2, vlen / 2, 1)
+	   && d->perm.series_p (1, 2, vlen + vlen / 2, 1))
+    low = false;
+  else
+    return false;
+
+  vec_perm_builder sel (vlen, 2, 1);
+  sel.safe_grow (vlen);
+  int cnt = 0;
+  for (unsigned i = 0; i < vlen; i += 2)
+    {
+      sel[i] = cnt;
+      sel[i + 1] = cnt + vlen / 2;
+      cnt++;
+    }
+
+  vec_perm_indices indices (sel, 2, vlen);
+
+  if (vlen != indices.length ().to_constant ())
+    return false;
+
+  /* Success!  */
+  if (d->testing_p)
+    return true;
+
+  int slide_cnt = vlen / 2;
+  rtx tmp = gen_reg_rtx (vmode);
+
+  if (low)
+    {
+      /* No need for a vector length because we slide up until the
+	 end of OP1 anyway.  */
+      rtx ops[] = {tmp, d->op0, d->op1, gen_int_mode (slide_cnt, Pmode)};
+      insn_code icode = code_for_pred_slide (UNSPEC_VSLIDEUP, vmode);
+      emit_vlmax_insn (icode, SLIDEUP_OP_MERGE, ops);
+    }
+  else
+    {
+      rtx ops[] = {tmp, d->op1, d->op0, gen_int_mode (slide_cnt, Pmode)};
+      insn_code icode = code_for_pred_slide (UNSPEC_VSLIDEDOWN, vmode);
+      emit_nonvlmax_insn (icode, BINARY_OP_TUMA, ops,
+			  gen_int_mode (slide_cnt, Pmode));
+    }
+
+  rtx sel_rtx = vec_perm_indices_to_rtx (sel_mode, indices);
+  emit_vlmax_gather_insn (gen_lowpart (vmode, d->target), tmp, sel_rtx);
+
+  return true;
+}
+
+
+/* Recognize even/odd patterns like [0 2 4 6].  We use two compress
+   and one slideup.  */
+
+static bool
+shuffle_even_odd_patterns (struct expand_vec_perm_d *d)
+{
+  machine_mode vmode = d->vmode;
+  poly_int64 vec_len = d->perm.length ();
+  int n_patterns = d->perm.encoding ().npatterns ();
+
+  if (n_patterns != 1)
+    return false;
+
+  if (!vec_len.is_constant ())
+    return false;
+
+  int vlen = vec_len.to_constant ();
+  if (vlen < 4 || vlen > 64)
+    return false;
+
+  if (d->one_vector_p)
+    return false;
+
+  bool even = true;
+  if (!d->perm.series_p (0, 1, 0, 2))
+    {
+      even = false;
+      if (!d->perm.series_p (0, 1, 1, 2))
+	return false;
+    }
+
+  /* Success!  */
+  if (d->testing_p)
+    return true;
+
+  machine_mode mask_mode = get_mask_mode (vmode);
+  rvv_builder builder (mask_mode, vlen, 1);
+  int bit = even ? 0 : 1;
+  for (int i = 0; i < vlen; i++)
+    {
+      bit ^= 1;
+      if (bit)
+	builder.quick_push (CONST1_RTX (BImode));
+      else
+	builder.quick_push (CONST0_RTX (BImode));
+    }
+  rtx mask = force_reg (mask_mode, builder.build ());
+
+  insn_code icode = code_for_pred_compress (vmode);
+  rtx ops1[] = {d->target, d->op0, mask};
+  emit_vlmax_insn (icode, COMPRESS_OP, ops1);
+
+  rtx tmp2 = gen_reg_rtx (vmode);
+  rtx ops2[] = {tmp2, d->op1, mask};
+  emit_vlmax_insn (icode, COMPRESS_OP, ops2);
+
+  rtx ops[] = {d->target, d->target, tmp2, gen_int_mode (vlen / 2, Pmode)};
+  icode = code_for_pred_slide (UNSPEC_VSLIDEUP, vmode);
+  emit_vlmax_insn (icode, SLIDEUP_OP_MERGE, ops);
+
   return true;
 }
 
@@ -3531,11 +3972,13 @@ shuffle_bswap_pattern (struct expand_vec_perm_d *d)
   return true;
 }
 
-/* Recognize the pattern that can be shuffled by vec_extract and slide1up
-   approach.  */
+/* Recognize patterns like [3 4 5 6] where we combine the last element
+   of the first vector and the first n - 1 elements of the second vector.
+   This can be implemented by slides or by extracting and re-inserting
+   (slide1up) the first vector's last element.  */
 
 static bool
-shuffle_extract_and_slide1up_patterns (struct expand_vec_perm_d *d)
+shuffle_off_by_one_patterns (struct expand_vec_perm_d *d)
 {
   poly_int64 nunits = GET_MODE_NUNITS (d->vmode);
 
@@ -3553,17 +3996,39 @@ shuffle_extract_and_slide1up_patterns (struct expand_vec_perm_d *d)
   if (d->testing_p)
     return true;
 
-  /* Extract the last element of the first vector.  */
-  scalar_mode smode = GET_MODE_INNER (d->vmode);
-  rtx tmp = gen_reg_rtx (smode);
-  emit_vec_extract (tmp, d->op0, gen_int_mode (nunits - 1, Pmode));
+  int scalar_cost = riscv_register_move_cost (d->vmode, V_REGS, GR_REGS)
+    + riscv_register_move_cost (d->vmode, GR_REGS, V_REGS) + 2;
+  int slide_cost = 2;
+
+  if (slide_cost < scalar_cost)
+    {
+      /* This variant should always be preferable because we just need two
+	 slides.  The extract-variant also requires two slides but additionally
+	 pays the latency for register-file crossing.  */
+      rtx tmp = gen_reg_rtx (d->vmode);
+      rtx ops[] = {tmp, d->op1, gen_int_mode (1, Pmode)};
+      insn_code icode = code_for_pred_slide (UNSPEC_VSLIDEUP, d->vmode);
+      emit_vlmax_insn (icode, BINARY_OP, ops);
+
+      rtx ops2[] = {d->target, tmp, d->op0, gen_int_mode (nunits - 1, Pmode)};
+      icode = code_for_pred_slide (UNSPEC_VSLIDEDOWN, d->vmode);
+      emit_nonvlmax_insn (icode, BINARY_OP_TUMA, ops2, gen_int_mode (1, Pmode));
+    }
+  else
+    {
+      /* Extract the last element of the first vector.  */
+      scalar_mode smode = GET_MODE_INNER (d->vmode);
+      rtx tmp = gen_reg_rtx (smode);
+      emit_vec_extract (tmp, d->op0, gen_int_mode (nunits - 1, Pmode));
+
+      /* Insert the scalar into element 0.  */
+      unsigned int unspec
+	= FLOAT_MODE_P (d->vmode) ? UNSPEC_VFSLIDE1UP : UNSPEC_VSLIDE1UP;
+      insn_code icode = code_for_pred_slide (unspec, d->vmode);
+      rtx ops[] = {d->target, d->op1, tmp};
+      emit_vlmax_insn (icode, BINARY_OP, ops);
+    }
 
-  /* Insert the scalar into element 0.  */
-  unsigned int unspec
-    = FLOAT_MODE_P (d->vmode) ? UNSPEC_VFSLIDE1UP : UNSPEC_VSLIDE1UP;
-  insn_code icode = code_for_pred_slide (unspec, d->vmode);
-  rtx ops[] = {d->target, d->op1, tmp};
-  emit_vlmax_insn (icode, BINARY_OP, ops);
   return true;
 }
 
@@ -3647,11 +4112,22 @@ shuffle_generic_patterns (struct expand_vec_perm_d *d)
   if (!get_gather_index_mode (d).exists (&sel_mode))
     return false;
 
+  rtx sel = vec_perm_indices_to_rtx (sel_mode, d->perm);
+  poly_uint64 nunits = GET_MODE_NUNITS (sel_mode);
+  rtx elt;
+
+  bool is_simple = d->one_vector_p
+    || const_vec_duplicate_p (sel, &elt)
+    || (nunits.is_constant ()
+	&& const_vec_all_in_range_p (sel, 0, nunits - 1));
+
+  if (!is_simple && !riscv_two_source_permutes)
+    return false;
+
   /* Success! */
   if (d->testing_p)
     return true;
 
-  rtx sel = vec_perm_indices_to_rtx (sel_mode, d->perm);
   /* Some FIXED-VLMAX/VLS vector permutation situations call targethook
      instead of expand vec_perm<mode>, we handle it directly.  */
   expand_vec_perm (d->target, d->op0, d->op1, sel);
@@ -3683,13 +4159,19 @@ expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
 	    return true;
 	  if (shuffle_consecutive_patterns (d))
 	    return true;
+	  if (shuffle_slide_patterns (d))
+	    return true;
+	  if (shuffle_interleave_patterns (d))
+	    return true;
+	  if (shuffle_even_odd_patterns (d))
+	    return true;
 	  if (shuffle_compress_patterns (d))
 	    return true;
 	  if (shuffle_decompress_patterns (d))
 	    return true;
 	  if (shuffle_bswap_pattern (d))
 	    return true;
-	  if (shuffle_extract_and_slide1up_patterns (d))
+	  if (shuffle_off_by_one_patterns (d))
 	    return true;
 	  if (shuffle_series_patterns (d))
 	    return true;
@@ -3713,15 +4195,6 @@ expand_vec_perm_const (machine_mode vmode, machine_mode op_mode, rtx target,
      mask to do the iteration loop control. Just disable it directly.  */
   if (GET_MODE_CLASS (vmode) == MODE_VECTOR_BOOL)
     return false;
-  /* FIXME: Explicitly disable VLA interleave SLP vectorization when we
-     may encounter ICE for poly size (1, 1) vectors in loop vectorizer.
-     Ideally, middle-end loop vectorizer should be able to disable it
-     itself, We can remove the codes here when middle-end code is able
-     to disable VLA SLP vectorization for poly size (1, 1) VF.  */
-  if (!BYTES_PER_RISCV_VECTOR.is_constant ()
-      && maybe_lt (BYTES_PER_RISCV_VECTOR * TARGET_MAX_LMUL,
-		   poly_int64 (16, 16)))
-    return false;
 
   struct expand_vec_perm_d d;
 
@@ -3785,12 +4258,23 @@ expand_select_vl (rtx *ops)
   emit_insn (gen_no_side_effects_vsetvl_rtx (rvv_mode, ops[0], ops[1]));
 }
 
+/* Return RVV_VUNDEF if the ELSE value is scratch rtx.  */
+static rtx
+get_else_operand (rtx op)
+{
+  return GET_CODE (op) == SCRATCH ? RVV_VUNDEF (GET_MODE (op)) : op;
+}
+
 /* Expand MASK_LEN_{LOAD,STORE}.  */
 void
 expand_load_store (rtx *ops, bool is_load)
 {
-  rtx mask = ops[2];
-  rtx len = ops[3];
+  int idx = 2;
+  rtx mask = ops[idx++];
+  /* A masked load has a merge/else operand.  */
+  if (is_load)
+    get_else_operand (ops[idx++]);
+  rtx len = ops[idx];
   machine_mode mode = GET_MODE (ops[0]);
 
   if (is_vlmax_len_p (mode, len))
@@ -3811,7 +4295,7 @@ expand_load_store (rtx *ops, bool is_load)
     }
   else
     {
-      if (!satisfies_constraint_K (len))
+      if (!satisfies_constraint_vl (len))
 	len = force_reg (Pmode, len);
       if (is_load)
 	{
@@ -3825,6 +4309,60 @@ expand_load_store (rtx *ops, bool is_load)
     }
 }
 
+/* Expand MASK_LEN_STRIDED_LOAD.  */
+void
+expand_strided_load (machine_mode mode, rtx *ops)
+{
+  rtx v_reg = ops[0];
+  rtx base = ops[1];
+  rtx stride = ops[2];
+  rtx mask = ops[3];
+  int idx = 4;
+  get_else_operand (ops[idx++]);
+  rtx len = ops[idx];
+  poly_int64 len_val;
+
+  insn_code icode = code_for_pred_strided_load (mode);
+  rtx emit_ops[] = {v_reg, mask, gen_rtx_MEM (mode, base), stride};
+
+  if (poly_int_rtx_p (len, &len_val)
+      && known_eq (len_val, GET_MODE_NUNITS (mode)))
+    emit_vlmax_insn (icode, BINARY_OP_TAMA, emit_ops);
+  else
+    {
+      len = satisfies_constraint_vl (len) ? len : force_reg (Pmode, len);
+      emit_nonvlmax_insn (icode, BINARY_OP_TAMA, emit_ops, len);
+    }
+}
+
+/* Expand MASK_LEN_STRIDED_STORE.  */
+void
+expand_strided_store (machine_mode mode, rtx *ops)
+{
+  rtx v_reg = ops[2];
+  rtx base = ops[0];
+  rtx stride = ops[1];
+  rtx mask = ops[3];
+  rtx len = ops[4];
+  poly_int64 len_val;
+  rtx vl_type;
+
+  if (poly_int_rtx_p (len, &len_val)
+      && known_eq (len_val, GET_MODE_NUNITS (mode)))
+    {
+      len = gen_reg_rtx (Pmode);
+      emit_vlmax_vsetvl (mode, len);
+      vl_type = get_avl_type_rtx (VLMAX);
+    }
+  else
+    {
+      len = satisfies_constraint_vl (len) ? len : force_reg (Pmode, len);
+      vl_type = get_avl_type_rtx (NONVLMAX);
+    }
+
+  emit_insn (gen_pred_strided_store (mode, gen_rtx_MEM (mode, base),
+				     mask, stride, v_reg, len, vl_type));
+}
 
 /* Return true if the operation is the floating-point operation need FRM.  */
 static bool
@@ -3883,13 +4421,6 @@ expand_cond_len_op (unsigned icode, insn_flags op_type, rtx *ops, rtx len)
     emit_nonvlmax_insn (icode, insn_flags, ops, len);
 }
 
-/* Return RVV_VUNDEF if the ELSE value is scratch rtx.  */
-static rtx
-get_else_operand (rtx op)
-{
-  return GET_CODE (op) == SCRATCH ? RVV_VUNDEF (GET_MODE (op)) : op;
-}
-
 /* Expand unary ops COND_LEN_*.  */
 void
 expand_cond_len_unop (unsigned icode, rtx *ops)
@@ -4011,6 +4542,8 @@ expand_gather_scatter (rtx *ops, bool is_load)
   rtx mask = ops[5];
   rtx len = ops[6];
   if (is_load)
+    len = ops[7];
+  if (is_load)
     {
       vec_reg = ops[0];
       ptr = ops[1];
@@ -4155,30 +4688,47 @@ expand_cond_ternop (unsigned icode, rtx *ops)
      Case 2: ops = {scalar_dest, vector_src, mask, vl}
 */
 void
-expand_reduction (unsigned unspec, unsigned insn_flags, rtx *ops, rtx init)
+expand_reduction (unsigned unspec, unsigned unspec_for_vl0_safe,
+		  unsigned insn_flags, rtx *ops, rtx init)
 {
   rtx scalar_dest = ops[0];
   rtx vector_src = ops[1];
   machine_mode vmode = GET_MODE (vector_src);
   machine_mode vel_mode = GET_MODE (scalar_dest);
   machine_mode m1_mode = get_m1_mode (vel_mode).require ();
+  rtx vl_op = NULL_RTX;
+  bool need_vl0_safe = false;
+  if (need_mask_operand_p (insn_flags))
+    {
+      vl_op = ops[3];
+      need_vl0_safe = !CONST_INT_P (vl_op) && !CONST_POLY_INT_P (vl_op);
+    }
 
   rtx m1_tmp = gen_reg_rtx (m1_mode);
   rtx scalar_move_ops[] = {m1_tmp, init};
   insn_code icode = code_for_pred_broadcast (m1_mode);
   if (need_mask_operand_p (insn_flags))
-    emit_nonvlmax_insn (icode, SCALAR_MOVE_OP, scalar_move_ops, ops[3]);
+    {
+      if (need_vl0_safe)
+	emit_nonvlmax_insn (icode, SCALAR_MOVE_OP, scalar_move_ops, const1_rtx);
+      else
+	emit_nonvlmax_insn (icode, SCALAR_MOVE_OP, scalar_move_ops, vl_op);
+    }
   else
     emit_vlmax_insn (icode, SCALAR_MOVE_OP, scalar_move_ops);
 
   rtx m1_tmp2 = gen_reg_rtx (m1_mode);
   rtx reduc_ops[] = {m1_tmp2, vector_src, m1_tmp};
-  icode = code_for_pred (unspec, vmode);
+
+  if (need_vl0_safe)
+    icode = code_for_pred (unspec_for_vl0_safe, vmode);
+  else
+    icode = code_for_pred (unspec, vmode);
 
   if (need_mask_operand_p (insn_flags))
     {
       rtx mask_len_reduc_ops[] = {m1_tmp2, ops[2], vector_src, m1_tmp};
-      emit_nonvlmax_insn (icode, insn_flags, mask_len_reduc_ops, ops[3]);
+      emit_nonvlmax_insn (icode, insn_flags, mask_len_reduc_ops, vl_op);
     }
   else
     emit_vlmax_insn (icode, insn_flags, reduc_ops);
@@ -4212,7 +4762,7 @@ prepare_ternary_operands (rtx *ops)
 				   ops[4], ops[1], ops[6], ops[7], ops[9]));
       ops[5] = ops[4] = ops[0];
     }
-  else
+  else if (VECTOR_MODE_P (GET_MODE (ops[2])))
     {
       /* Swap the multiplication ops if the fallback value is the
 	 second of the two.  */
@@ -4222,8 +4772,10 @@ prepare_ternary_operands (rtx *ops)
       /* TODO: ??? Maybe we could support splitting FMA (a, 4, b)
 	 into PLUS (ASHIFT (a, 2), b) according to uarchs.  */
     }
-  gcc_assert (rtx_equal_p (ops[5], RVV_VUNDEF (mode))
-	      || rtx_equal_p (ops[5], ops[2]) || rtx_equal_p (ops[5], ops[4]));
+  gcc_assert (
+    rtx_equal_p (ops[5], RVV_VUNDEF (mode)) || rtx_equal_p (ops[5], ops[2])
+    || (!VECTOR_MODE_P (GET_MODE (ops[2])) && rtx_equal_p (ops[5], ops[3]))
+    || rtx_equal_p (ops[5], ops[4]));
 }
 
 /* Expand VEC_MASK_LEN_{LOAD_LANES,STORE_LANES}.  */
@@ -4232,6 +4784,8 @@ expand_lanes_load_store (rtx *ops, bool is_load)
 {
   rtx mask = ops[2];
   rtx len = ops[3];
+  if (is_load)
+    len = ops[4];
   rtx addr = is_load ? XEXP (ops[1], 0) : XEXP (ops[0], 0);
   rtx reg = is_load ? ops[0] : ops[1];
   machine_mode mode = GET_MODE (ops[0]);
@@ -4255,7 +4809,7 @@ expand_lanes_load_store (rtx *ops, bool is_load)
     }
   else
     {
-      if (!satisfies_constraint_K (len))
+      if (!satisfies_constraint_vl (len))
 	len = force_reg (Pmode, len);
       if (is_load)
 	{
@@ -4888,6 +5442,15 @@ expand_vec_usadd (rtx op_0, rtx op_1, rtx op_2, machine_mode vec_mode)
   emit_vec_binary_alu (op_0, op_1, op_2, US_PLUS, vec_mode);
 }
 
+/* Expand the standard name ssadd<mode>3 for vector mode,  we can leverage
+   the vector fixed point vector single-width saturating add directly.  */
+
+void
+expand_vec_ssadd (rtx op_0, rtx op_1, rtx op_2, machine_mode vec_mode)
+{
+  emit_vec_binary_alu (op_0, op_1, op_2, SS_PLUS, vec_mode);
+}
+
 /* Expand the standard name usadd<mode>3 for vector mode,  we can leverage
    the vector fixed point vector single-width saturating add directly.  */
 
@@ -4897,6 +5460,15 @@ expand_vec_ussub (rtx op_0, rtx op_1, rtx op_2, machine_mode vec_mode)
   emit_vec_binary_alu (op_0, op_1, op_2, US_MINUS, vec_mode);
 }
 
+/* Expand the standard name ssadd<mode>3 for vector mode,  we can leverage
+   the vector fixed point vector single-width saturating add directly.  */
+
+void
+expand_vec_sssub (rtx op_0, rtx op_1, rtx op_2, machine_mode vec_mode)
+{
+  emit_vec_binary_alu (op_0, op_1, op_2, SS_MINUS, vec_mode);
+}
+
 /* Expand the standard name ustrunc<m><n>2 for double vector mode,  like
    DI => SI.  we can leverage the vector fixed point vector narrowing
    fixed-point clip directly.  */
@@ -4913,6 +5485,22 @@ expand_vec_double_ustrunc (rtx op_0, rtx op_1, machine_mode vec_mode)
   emit_vlmax_insn (icode, BINARY_OP_VXRM_RNU, ops);
 }
 
+/* Expand the standard name sstrunc<m><n>2 for double vector mode,  like
+   DI => SI.  we can leverage the vector fixed point vector narrowing
+   fixed-point clip directly.  */
+
+void
+expand_vec_double_sstrunc (rtx op_0, rtx op_1, machine_mode vec_mode)
+{
+  insn_code icode;
+  rtx zero = CONST0_RTX (Xmode);
+  enum unspec unspec = UNSPEC_VNCLIP;
+  rtx ops[] = {op_0, op_1, zero};
+
+  icode = code_for_pred_narrow_clip_scalar (unspec, vec_mode);
+  emit_vlmax_insn (icode, BINARY_OP_VXRM_RNU, ops);
+}
+
 /* Expand the standard name ustrunc<m><n>2 for double vector mode,  like
    DI => HI.  we can leverage the vector fixed point vector narrowing
    fixed-point clip directly.  */
@@ -4927,6 +5515,20 @@ expand_vec_quad_ustrunc (rtx op_0, rtx op_1, machine_mode vec_mode,
   expand_vec_double_ustrunc (op_0, double_rtx, double_mode);
 }
 
+/* Expand the standard name sstrunc<m><n>2 for quad vector mode,  like
+   DI => HI.  we can leverage the vector fixed point vector narrowing
+   fixed-point clip directly.  */
+
+void
+expand_vec_quad_sstrunc (rtx op_0, rtx op_1, machine_mode vec_mode,
+			 machine_mode double_mode)
+{
+  rtx double_rtx = gen_reg_rtx (double_mode);
+
+  expand_vec_double_sstrunc (double_rtx, op_1, vec_mode);
+  expand_vec_double_sstrunc (op_0, double_rtx, double_mode);
+}
+
 /* Expand the standard name ustrunc<m><n>2 for double vector mode,  like
    DI => QI.  we can leverage the vector fixed point vector narrowing
    fixed-point clip directly.  */
@@ -4943,6 +5545,97 @@ expand_vec_oct_ustrunc (rtx op_0, rtx op_1, machine_mode vec_mode,
   expand_vec_double_ustrunc (op_0, quad_rtx, quad_mode);
 }
 
+/* Expand the standard name sstrunc<m><n>2 for oct vector mode,  like
+   DI => QI.  we can leverage the vector fixed point vector narrowing
+   fixed-point clip directly.  */
+
+void
+expand_vec_oct_sstrunc (rtx op_0, rtx op_1, machine_mode vec_mode,
+			machine_mode double_mode, machine_mode quad_mode)
+{
+  rtx double_rtx = gen_reg_rtx (double_mode);
+  rtx quad_rtx = gen_reg_rtx (quad_mode);
+
+  expand_vec_double_sstrunc (double_rtx, op_1, vec_mode);
+  expand_vec_double_sstrunc (quad_rtx, double_rtx, double_mode);
+  expand_vec_double_sstrunc (op_0, quad_rtx, quad_mode);
+}
+
+/* Expand the binary vx combine with the format like v2 = vop(vec_dup(x), v1).
+   Aka the first op comes from the vec_duplicate, and the second op is
+   the vector reg.  */
+
+void
+expand_vx_binary_vec_dup_vec (rtx op_0, rtx op_1, rtx op_2,
+			      rtx_code code, machine_mode mode)
+{
+  enum insn_code icode;
+
+  switch (code)
+    {
+    case PLUS:
+    case AND:
+    case IOR:
+    case XOR:
+    case MULT:
+    case SMAX:
+    case UMAX:
+    case SMIN:
+    case UMIN:
+    case US_PLUS:
+    case SS_PLUS:
+      icode = code_for_pred_scalar (code, mode);
+      break;
+    case MINUS:
+      icode = code_for_pred_sub_reverse_scalar (mode);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  rtx ops[] = {op_0, op_1, op_2};
+  emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops);
+}
+
+/* Expand the binary vx combine with the format like v2 = vop(v1, vec_dup(x)).
+   Aka the second op comes from the vec_duplicate, and the first op is
+   the vector reg.  */
+
+void
+expand_vx_binary_vec_vec_dup (rtx op_0, rtx op_1, rtx op_2,
+			      rtx_code code, machine_mode mode)
+{
+  enum insn_code icode;
+
+  switch (code)
+    {
+    case MINUS:
+    case AND:
+    case IOR:
+    case XOR:
+    case MULT:
+    case DIV:
+    case UDIV:
+    case MOD:
+    case UMOD:
+    case SMAX:
+    case UMAX:
+    case SMIN:
+    case UMIN:
+    case US_PLUS:
+    case US_MINUS:
+    case SS_PLUS:
+    case SS_MINUS:
+      icode = code_for_pred_scalar (code, mode);
+      break;
+    default:
+      gcc_unreachable ();
+    }
+
+  rtx ops[] = {op_0, op_1, op_2};
+  emit_vlmax_insn (icode, riscv_vector::BINARY_OP, ops);
+}
+
 /* Vectorize popcount by the Wilkes-Wheeler-Gill algorithm that libgcc uses as
    well.  */
 void
@@ -5022,6 +5715,9 @@ vlmax_avl_type_p (rtx_insn *rinsn)
   int index = get_attr_avl_type_idx (rinsn);
   if (index == INVALID_ATTRIBUTE)
     return false;
+
+  gcc_assert (index < recog_data.n_operands);
+
   rtx avl_type = recog_data.operand[index];
   return INTVAL (avl_type) == VLMAX;
 }
@@ -5070,6 +5766,9 @@ nonvlmax_avl_type_p (rtx_insn *rinsn)
   int index = get_attr_avl_type_idx (rinsn);
   if (index == INVALID_ATTRIBUTE)
     return false;
+
+  gcc_assert (index < recog_data.n_operands);
+
   rtx avl_type = recog_data.operand[index];
   return INTVAL (avl_type) == NONVLMAX;
 }
@@ -5109,9 +5808,9 @@ count_regno_occurrences (rtx_insn *rinsn, unsigned int regno)
   return count;
 }
 
-/* Return true if the OP can be directly broadcasted.  */
+/* Return true if the OP can be directly broadcast.  */
 bool
-can_be_broadcasted_p (rtx op)
+can_be_broadcast_p (rtx op)
 {
   machine_mode mode = GET_MODE (op);
   /* We don't allow RA (register allocation) reload generate
@@ -5123,7 +5822,8 @@ can_be_broadcasted_p (rtx op)
     return false;
 
   if (satisfies_constraint_K (op) || register_operand (op, mode)
-      || satisfies_constraint_Wdm (op) || rtx_equal_p (op, CONST0_RTX (mode)))
+      || (strided_load_broadcast_p () && satisfies_constraint_Wdm (op))
+      || rtx_equal_p (op, CONST0_RTX (mode)))
     return true;
 
   return can_create_pseudo_p () && nonmemory_operand (op, mode);
@@ -5248,7 +5948,7 @@ splat_to_scalar_move_p (rtx *ops)
   return satisfies_constraint_Wc1 (ops[1])
 	 && satisfies_constraint_vu (ops[2])
 	 && !MEM_P (ops[3])
-	 && satisfies_constraint_c01 (ops[4])
+	 && satisfies_constraint_k01 (ops[4])
 	 && INTVAL (ops[7]) == NONVLMAX
 	 && known_ge (GET_MODE_SIZE (Pmode), GET_MODE_SIZE (GET_MODE (ops[3])));
 }
diff --git a/gcc/config/riscv/riscv-v.h b/gcc/config/riscv/riscv-v.h
new file mode 100644
index 0000000..1130122
--- /dev/null
+++ b/gcc/config/riscv/riscv-v.h
@@ -0,0 +1,90 @@
+/* Subroutines used for code generation for RISC-V 'V' Extension for
+   GNU compiler.
+   Copyright (C) 2022-2025 Free Software Foundation, Inc.
+   Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_RISCV_V_H
+#define GCC_RISCV_V_H
+
+#include "rtx-vector-builder.h"
+
+using namespace riscv_vector;
+
+namespace riscv_vector {
+
+extern machine_mode get_mask_mode (machine_mode);
+extern opt_machine_mode get_vector_mode (scalar_mode, poly_uint64);
+
+class rvv_builder : public rtx_vector_builder
+{
+public:
+  rvv_builder () : rtx_vector_builder () {}
+  rvv_builder (machine_mode mode, unsigned int npatterns,
+	       unsigned int nelts_per_pattern)
+    : rtx_vector_builder (mode, npatterns, nelts_per_pattern)
+  {
+    m_inner_mode = GET_MODE_INNER (mode);
+    m_inner_bits_size = GET_MODE_BITSIZE (m_inner_mode);
+    m_inner_bytes_size = GET_MODE_SIZE (m_inner_mode);
+    m_mask_mode = get_mask_mode (mode);
+
+    gcc_assert (
+      int_mode_for_size (inner_bits_size (), 0).exists (&m_inner_int_mode));
+    m_int_mode
+      = get_vector_mode (m_inner_int_mode, GET_MODE_NUNITS (mode)).require ();
+  }
+
+  bool can_duplicate_repeating_sequence_p ();
+  bool is_repeating_sequence ();
+  rtx get_merged_repeating_sequence ();
+
+  bool repeating_sequence_use_merge_profitable_p ();
+  bool combine_sequence_use_slideup_profitable_p ();
+  bool combine_sequence_use_merge_profitable_p ();
+  rtx get_merge_scalar_mask (unsigned int, machine_mode) const;
+
+  bool single_step_npatterns_p () const;
+  bool npatterns_all_equal_p () const;
+  bool interleaved_stepped_npatterns_p () const;
+  bool npatterns_vid_diff_repeated_p () const;
+
+  machine_mode new_mode () const { return m_new_mode; }
+  scalar_mode inner_mode () const { return m_inner_mode; }
+  scalar_int_mode inner_int_mode () const { return m_inner_int_mode; }
+  machine_mode mask_mode () const { return m_mask_mode; }
+  machine_mode int_mode () const { return m_int_mode; }
+  unsigned int inner_bits_size () const { return m_inner_bits_size; }
+  unsigned int inner_bytes_size () const { return m_inner_bytes_size; }
+
+private:
+  scalar_mode m_inner_mode;
+  scalar_int_mode m_inner_int_mode;
+  machine_mode m_new_mode;
+  scalar_int_mode m_new_inner_mode;
+  machine_mode m_mask_mode;
+  machine_mode m_int_mode;
+  unsigned int m_inner_bits_size;
+  unsigned int m_inner_bytes_size;
+};
+
+extern bool valid_vec_immediate_p(rtx);
+
+} // namespace riscv_vector
+
+#endif // GCC_RISCV_V_H
diff --git a/gcc/config/riscv/riscv-vect-permconst.cc b/gcc/config/riscv/riscv-vect-permconst.cc
new file mode 100644
index 0000000..087f26a
--- /dev/null
+++ b/gcc/config/riscv/riscv-vect-permconst.cc
@@ -0,0 +1,318 @@
+/* Copyright (C) 2024 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or(at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define IN_TARGET_CODE 1
+#define INCLUDE_ALGORITHM
+#define INCLUDE_FUNCTIONAL
+#define INCLUDE_MEMORY
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "backend.h"
+#include "rtl.h"
+#include "target.h"
+#include "tree-pass.h"
+#include "df.h"
+#include "rtl-ssa.h"
+#include "cfgcleanup.h"
+#include "insn-attr.h"
+#include "tm-constrs.h"
+#include "insn-opinit.h"
+#include "cfgrtl.h"
+
+/* So the basic idea of this pass is to identify loads of permutation
+   constants from the constant pool which could instead be trivially
+   derived from some earlier vector permutation constant.  This will
+   replace a memory load from the constant pool with a vadd.vi
+   instruction.
+
+   Conceptually this is much like the related_values optimization in
+   CSE, reload_cse_move2add or using SLSR to optimize constant synthesis.
+   If we wanted to make this generic I would suggest putting it into CSE
+   and providing target hooks to determine if particular permutation
+   constants could be derived from earlier permutation constants.  */
+
+const pass_data pass_data_vect_permconst = {
+  RTL_PASS,	 /* type */
+  "vect_permconst",	 /* name */
+  OPTGROUP_NONE, /* optinfo_flags */
+  TV_NONE,	 /* tv_id */
+  0,		 /* properties_required */
+  0,		 /* properties_provided */
+  0,		 /* properties_destroyed */
+  0,		 /* todo_flags_start */
+  0,		 /* todo_flags_finish */
+};
+
+/* Entry in the hash table.  We "normalize" the permutation constant
+   by adjusting all entries by the value in the first element.  This
+   allows simple hashing to discover permutation constants that differ
+   by a single constant across all their elements and may be derived
+   from each other with a vadd.vi.  */
+
+struct vector_permconst_entry
+{
+  /* The CONST_VECTOR in normalized form (first entry is zero).  */
+  /* We could avoid copying the vector with a more customized hash
+     routine which took care of normalization.  */
+  rtx normalized_vec;
+
+  /* The destination register holding the CONST_VECTOR.  When the optimization
+     applies this will be used as a source operand in the vadd.vi.  */
+  rtx dest;
+
+  /* The insn generating DEST, the only reason we need this is because we
+     do not invalidate entries which implies we have to verify that DEST
+     is unchanged between INSN and the point where we want to use DEST
+     to derive a new permutation constant.  */
+  rtx_insn *insn;
+
+  /* The bias of this entry used for normalization.  If this value is added
+     to each element in NORMALIZED_VEC we would have the original permutation
+     constant.  */
+  HOST_WIDE_INT bias;
+};
+
+struct const_vector_hasher : nofree_ptr_hash <vector_permconst_entry>
+{
+  static inline hashval_t hash (const vector_permconst_entry *);
+  static inline bool equal (const vector_permconst_entry *,
+			    const vector_permconst_entry *);
+};
+
+inline bool
+const_vector_hasher::equal (const vector_permconst_entry *vpe1,
+			    const vector_permconst_entry *vpe2)
+{
+  /* Do the cheap tests first, namely that the mode and number of entries
+     match between the two enries.  */
+  if (GET_MODE (vpe1->normalized_vec) != GET_MODE (vpe2->normalized_vec))
+    return false;
+
+  if (CONST_VECTOR_NUNITS (vpe1->normalized_vec).to_constant ()
+      != CONST_VECTOR_NUNITS (vpe2->normalized_vec).to_constant ())
+    return false;
+
+  /* Check the value of each entry in the vector.  We violate structure
+     sharing rules inside this pass, so while pointer equality would normally
+     be OK, it isn't here.  */
+  for (int i = 0;
+       i < CONST_VECTOR_NUNITS (vpe1->normalized_vec).to_constant ();
+       i++)
+    if (!rtx_equal_p (CONST_VECTOR_ELT (vpe1->normalized_vec, i),
+		      CONST_VECTOR_ELT (vpe2->normalized_vec, i)))
+      return false;
+
+  return true;
+}
+
+inline hashval_t
+const_vector_hasher::hash (const vector_permconst_entry *vpe)
+{
+  int do_not_record;
+  return hash_rtx (vpe->normalized_vec, Pmode, &do_not_record, NULL, false);
+}
+
+
+class vector_permconst : public rtl_opt_pass
+{
+public:
+  vector_permconst (gcc::context *ctxt)
+    : rtl_opt_pass (pass_data_vect_permconst, ctxt) {}
+
+  /* opt_pass methods: */
+  virtual bool gate (function *) final override
+  {
+    return TARGET_VECTOR && optimize > 0;
+  }
+  virtual unsigned int execute (function *) final override;
+
+private:
+  void process_bb (basic_block);
+  hash_table<const_vector_hasher> *vector_permconst_table;
+}; // class pass_vector_permconst
+
+/* Try to optimize vector permutation constants in BB.  */
+void
+vector_permconst::process_bb (basic_block bb)
+{
+  vector_permconst_table = new hash_table<const_vector_hasher> (11);
+
+  /* Walk the insns in BB searching for vector loads from the constant pool
+     which can be satisfied by adjusting an earlier load with trivial
+     arithmetic.  */
+  rtx_insn *insn;
+  FOR_BB_INSNS (bb, insn)
+    {
+      if (!INSN_P (insn))
+	continue;
+
+      rtx set = single_set (insn);
+      if (!set)
+	continue;
+
+      rtx dest = SET_DEST (set);
+      if (GET_MODE_CLASS (GET_MODE (dest)) != MODE_VECTOR_INT)
+	continue;
+
+      rtx src = SET_SRC (set);
+      if (!MEM_P (src))
+	continue;
+
+      /* A load from the constant pool should have a REG_EQUAL
+	 note with the vector contant in the note.  */
+      rtx note = find_reg_equal_equiv_note (insn);
+      if (!note
+	  || REG_NOTE_KIND (note) != REG_EQUAL
+	  || GET_CODE (XEXP (note, 0)) != CONST_VECTOR)
+	continue;
+
+      if (!CONST_VECTOR_NUNITS (XEXP (note, 0)).is_constant ())
+	continue;
+
+      /* XXX Do we need to consider other forms of constants?  */
+
+      /* We want to be selective about what gets past this point since
+	 we make a copy of the vector and possibly enter it into the
+	 hash table.  So reject cases that are not likely a permutation
+	 constant.  ie, negative bias and large biases.  We arbitrarily
+	 use 16k as the largest vector size in bits we try to optimize.
+
+	 It may seem like a bias outside the range of vadd.vi should
+	 be rejected, but what really matters is the difference of
+	 biases across the two permutation constants.  */
+      rtx cvec = XEXP (note, 0);
+      HOST_WIDE_INT bias = INTVAL (CONST_VECTOR_ELT (cvec, 0));
+      if (bias < 0 || bias > 16384 / 8)
+	continue;
+
+      /* We need to verify that each element would be a valid value
+	 in the inner mode after applying the bias.  */
+      machine_mode inner = GET_MODE_INNER (GET_MODE (cvec));
+      HOST_WIDE_INT precision = GET_MODE_PRECISION (inner).to_constant ();
+      int i;
+      for (i = 0; i < CONST_VECTOR_NUNITS (cvec).to_constant (); i++)
+	{
+	  HOST_WIDE_INT val = INTVAL (CONST_VECTOR_ELT (cvec, i)) - bias;
+	  if (val != sext_hwi (val, precision))
+	    break;
+	}
+
+      /* If the loop terminated early, then we found a case where the
+	 adjusted constant would not fit, so we can't record the constant
+	 for this case (it's unlikely to be useful anyway.  */
+      if (i != CONST_VECTOR_NUNITS (cvec).to_constant ())
+	continue;
+
+      /* At this point we have a load of a constant integer vector from the
+	 constant pool.  That constant integer vector is hopefully a
+	 permutation constant.  We need to make a copy of the vector and
+	 normalize it to zero.
+
+	 XXX This violates structure sharing conventions.  */
+      rtvec_def *nvec = rtvec_alloc (CONST_VECTOR_NUNITS (cvec).to_constant ());
+
+      for (i = 0; i < CONST_VECTOR_NUNITS (cvec).to_constant (); i++)
+	nvec->elem[i] = GEN_INT (INTVAL (CONST_VECTOR_ELT (cvec, i)) - bias);
+
+      rtx copy = gen_rtx_CONST_VECTOR (GET_MODE (cvec), nvec);
+
+      /* Now that we have a normalized vector, look it up in the hash table,
+	 inserting it if it wasn't already in the table.  */
+      struct vector_permconst_entry tmp;
+      tmp.normalized_vec = copy;
+      struct vector_permconst_entry **slot
+	= vector_permconst_table->find_slot (&tmp, INSERT);
+      if (*slot == NULL)
+	{
+	  /* This constant was not in the table, so initialize the hash table
+	     entry.  */
+	  *slot = XNEW (vector_permconst_entry);
+	  (*slot)->normalized_vec = copy;
+	  (*slot)->dest = dest;
+	  (*slot)->bias = bias;
+	  (*slot)->insn = insn;
+	}
+      else
+	{
+	  /* A hit in the hash table.  We may be able to optimize this case.
+
+	     If the difference in biases fits in the immediate range for
+	     vadd.vi, then we may optimize.  */
+	  HOST_WIDE_INT adjustment = bias - (*slot)->bias;
+	  if (IN_RANGE (adjustment, -16, 15))
+	    {
+	      /* We also need to make sure the destination register was not
+		 modified.  I've chosen to test for that at optimization time
+		 rather than invalidate entries in the table.  This could be
+		 changed to use REG_TICK like schemes or true invalidation if
+		 this proves too compile-time costly.  */
+	      if (!reg_set_between_p ((*slot)->dest, (*slot)->insn, insn))
+		{
+		  /* Instead of loading from the constant pool, adjust the
+		     output of the earlier insn into our destination.  */
+		  rtx x = gen_const_vec_duplicate (GET_MODE (copy),
+						   GEN_INT (adjustment));
+		  rtx plus = gen_rtx_PLUS (GET_MODE (copy), (*slot)->dest, x);
+		  rtx set = gen_rtx_SET (dest, plus);
+		  rtx_insn *new_insn = emit_insn_after (set, insn);
+		  /* XXX Should we copy over the REG_EQUAL note first?  */
+		  delete_insn (insn);
+		  insn = new_insn;
+		}
+	    }
+
+	  /* We always keep the hash table entry pointing to the most recent
+	     INSN that could generate the normalized entry.  We can adjust
+	     in the future if data says it's useful to do so.  This just
+	     keeps things simple for now.
+
+	     For example, we might want to keep multiple entries if they
+	     have a different biases.  */
+	  (*slot)->dest = dest;
+	  (*slot)->bias = bias;
+	  (*slot)->insn = insn;
+	}
+    }
+
+  /* We construct and tear down the table for each block.  This may
+     be overly expensive.  */
+  vector_permconst_table->empty ();
+}
+
+/* Main entry point for this pass.  */
+unsigned int
+vector_permconst::execute (function *fn)
+{
+  /* Handle each block independently.  While this should work nicely on EBBs,
+     let's wait for real world cases where it matters before adding that
+     complexity.  */
+  basic_block bb;
+  FOR_EACH_BB_FN (bb, fn)
+    process_bb (bb);
+
+  return 0;
+}
+
+rtl_opt_pass *
+make_pass_vector_permconst (gcc::context *ctxt)
+{
+  return new vector_permconst (ctxt);
+}
diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.cc b/gcc/config/riscv/riscv-vector-builtins-bases.cc
index 193392f..bf5172c 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.cc
@@ -1,5 +1,5 @@
 /* function_base implementation for RISC-V 'V' Extension for GNU compiler.
-   Copyright (C) 2022-2024 Free Software Foundation, Inc.
+   Copyright (C) 2022-2025 Free Software Foundation, Inc.
    Contributed by Ju-Zhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd.
 
    This file is part of GCC.
@@ -58,12 +58,6 @@ enum lst_type
   LST_INDEXED,
 };
 
-enum frm_op_type
-{
-  NO_FRM,
-  HAS_FRM,
-};
-
 /* Helper function to fold vleff and vlsegff.  */
 static gimple *
 fold_fault_load (gimple_folder &f)
@@ -1753,6 +1747,8 @@ public:
 
   rtx expand (function_expander &e) const override
   {
+    if (!e.target)
+      return NULL_RTX;
     tree arg = CALL_EXPR_ARG (e.exp, 0);
     rtx src = expand_normal (arg);
     emit_move_insn (gen_lowpart (e.vector_mode (), e.target), src);
@@ -1767,6 +1763,8 @@ public:
 
   rtx expand (function_expander &e) const override
   {
+    if (!e.target)
+      return NULL_RTX;
     rtx src = expand_normal (CALL_EXPR_ARG (e.exp, 0));
     emit_move_insn (e.target, gen_lowpart (GET_MODE (e.target), src));
     return e.target;
@@ -2247,7 +2245,7 @@ public:
   {
     return (CODE == CLZ || CODE == CTZ) ? false : true;
   }
-  
+
   rtx expand (function_expander &e) const override
   {
     switch (e.op_info->op)
diff --git a/gcc/config/riscv/riscv-vector-builtins-bases.h b/gcc/config/riscv/riscv-vector-builtins-bases.h
index af1cb1a..4a2f6b4 100644
--- a/gcc/config/riscv/riscv-vector-builtins-bases.h
+++ b/gcc/config/riscv/riscv-vector-builtins-bases.h
@@ -1,5 +1,5 @@
 /* function_base declaration for RISC-V 'V' Extension for GNU compiler.
-   Copyright (C) 2022-2024 Free Software Foundation, Inc.
+   Copyright (C) 2022-2025 Free Software Foundation, Inc.
    Contributed by Ju-Zhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd.
 
    This file is part of GCC.
@@ -23,6 +23,12 @@
 
 namespace riscv_vector {
 
+enum frm_op_type
+{
+  NO_FRM,
+  HAS_FRM,
+};
+
 namespace bases {
 extern const function_base *const vsetvl;
 extern const function_base *const vsetvlmax;
diff --git a/gcc/config/riscv/riscv-vector-builtins-functions.def b/gcc/config/riscv/riscv-vector-builtins-functions.def
index 49887ec..9969532 100644
--- a/gcc/config/riscv/riscv-vector-builtins-functions.def
+++ b/gcc/config/riscv/riscv-vector-builtins-functions.def
@@ -1,5 +1,5 @@
 /* Intrinsic define macros for RISC-V 'V' Extension for GNU compiler.
-   Copyright (C) 2022-2024 Free Software Foundation, Inc.
+   Copyright (C) 2022-2025 Free Software Foundation, Inc.
    Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd.
 
 This file is part of GCC.
diff --git a/gcc/config/riscv/riscv-vector-builtins-shapes.cc b/gcc/config/riscv/riscv-vector-builtins-shapes.cc
index 3339541..9832eb9 100644
--- a/gcc/config/riscv/riscv-vector-builtins-shapes.cc
+++ b/gcc/config/riscv/riscv-vector-builtins-shapes.cc
@@ -1,5 +1,5 @@
 /* function_shape implementation for RISC-V 'V' Extension for GNU compiler.
-   Copyright (C) 2022-2024 Free Software Foundation, Inc.
+   Copyright (C) 2022-2025 Free Software Foundation, Inc.
    Contributed by Ju-Zhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd.
 
    This file is part of GCC.
@@ -908,6 +908,8 @@ struct vset_def : public build_base
   {
     poly_int64 outer_size = GET_MODE_SIZE (c.arg_mode (0));
     poly_int64 inner_size = GET_MODE_SIZE (c.arg_mode (2));
+    if (maybe_eq (inner_size, 0))
+      return false;
     unsigned int nvecs = exact_div (outer_size, inner_size).to_constant ();
     return c.require_immediate (1, 0, nvecs - 1);
   }
@@ -920,6 +922,8 @@ struct vget_def : public misc_def
   {
     poly_int64 outer_size = GET_MODE_SIZE (c.arg_mode (0));
     poly_int64 inner_size = GET_MODE_SIZE (c.ret_mode ());
+    if (maybe_eq (inner_size, 0))
+      return false;
     unsigned int nvecs = exact_div (outer_size, inner_size).to_constant ();
     return c.require_immediate (1, 0, nvecs - 1);
   }
@@ -1268,7 +1272,7 @@ struct crypto_vv_no_op_type_def : public build_base
     if (overloaded_p && !instance.base->can_be_overloaded_p (instance.pred))
       return nullptr;
     b.append_base_name (instance.base_name);
-      
+
     if (!overloaded_p)
     {
       b.append_name (operand_suffixes[instance.op_info->op]);
@@ -1287,6 +1291,108 @@ struct crypto_vv_no_op_type_def : public build_base
   }
 };
 
+/* sf_vqmacc_def class.  */
+struct sf_vqmacc_def : public build_base
+{
+  char *get_name (function_builder &b, const function_instance &instance,
+		  bool overloaded_p) const override
+  {
+    b.append_base_name (instance.base_name);
+
+    /* vop --> vop_v.  */
+    b.append_name (operand_suffixes[instance.op_info->op]);
+
+    /* Return nullptr if it can not be overloaded.  */
+    if (overloaded_p && !instance.base->can_be_overloaded_p (instance.pred))
+      return b.finish_name ();
+
+    if (!overloaded_p)
+      {
+	/* vop_v --> vop_v_<type>.  */
+	b.append_name (type_suffixes[instance.type.index].vector);
+      }
+
+    /* According to SIFIVE vector-intrinsic-doc, it adds "_tu" suffix
+       for vop_m C++ overloaded API.*/
+    b.append_name (predication_suffixes[instance.pred]);
+
+    return b.finish_name ();
+  }
+};
+
+/* sf_vfnrclip_def class. Handle instructions like vfnrclip.  */
+struct sf_vfnrclip_def : public build_base
+{
+  char *get_name (function_builder &b, const function_instance &instance,
+		  bool overloaded_p) const override
+  {
+    b.append_base_name (instance.base_name);
+
+    if (overloaded_p && (!instance.base->can_be_overloaded_p (instance.pred)
+			  || instance.pred == PRED_TYPE_m))
+      return b.finish_name ();
+
+    if (!overloaded_p)
+      {
+	vector_type_index ret_type_idx
+	  = instance.op_info->ret.get_function_type_index (instance.type.index);
+	/* v --> v_<type>.  */
+	b.append_name (type_suffixes[ret_type_idx].vector);
+      }
+
+    /* According to SIFIVE vector-intrinsic-doc, it adds "_m\_tu\
+       _tum\_tumu\_mu" suffixes for vop_m C++ overloaded API.*/
+    b.append_name (predication_suffixes[instance.pred]);
+    return b.finish_name ();
+  }
+};
+
+/* sf_vcix_se_def class.  */
+struct sf_vcix_se_def : public build_base
+{
+  char *get_name (function_builder &b, const function_instance &instance,
+		  bool overloaded_p) const override
+  {
+    /* Return nullptr if it is overloaded.  */
+    if (overloaded_p)
+      return nullptr;
+
+    b.append_base_name (instance.base_name);
+
+    /* vop --> vop<op>_se_<type>.  */
+    if (!overloaded_p)
+      {
+	b.append_name (operand_suffixes[instance.op_info->op]);
+	b.append_name ("_se");
+	b.append_name (type_suffixes[instance.type.index].vector);
+      }
+    return b.finish_name ();
+  }
+};
+
+/* sf_vcix_def class.  */
+struct sf_vcix_def : public build_base
+{
+  char *get_name (function_builder &b, const function_instance &instance,
+		  bool overloaded_p) const override
+  {
+    /* Return nullptr if it is overloaded.  */
+    if (overloaded_p)
+      return nullptr;
+
+    b.append_base_name (instance.base_name);
+
+    /* vop --> vop_<type>.  */
+    if (!overloaded_p)
+      {
+	b.append_name (operand_suffixes[instance.op_info->op]);
+	b.append_name (type_suffixes[instance.type.index].vector);
+      }
+    return b.finish_name ();
+  }
+};
+
+
 SHAPE(vsetvl, vsetvl)
 SHAPE(vsetvl, vsetvlmax)
 SHAPE(loadstore, loadstore)
@@ -1321,4 +1427,8 @@ SHAPE(seg_fault_load, seg_fault_load)
 SHAPE(crypto_vv, crypto_vv)
 SHAPE(crypto_vi, crypto_vi)
 SHAPE(crypto_vv_no_op_type, crypto_vv_no_op_type)
+SHAPE (sf_vqmacc, sf_vqmacc)
+SHAPE (sf_vfnrclip, sf_vfnrclip)
+SHAPE(sf_vcix_se, sf_vcix_se)
+SHAPE(sf_vcix, sf_vcix)
 } // end namespace riscv_vector
diff --git a/gcc/config/riscv/riscv-vector-builtins-shapes.h b/gcc/config/riscv/riscv-vector-builtins-shapes.h
index 3de837c..2f2636e 100644
--- a/gcc/config/riscv/riscv-vector-builtins-shapes.h
+++ b/gcc/config/riscv/riscv-vector-builtins-shapes.h
@@ -1,5 +1,5 @@
 /* function_shape declaration for RISC-V 'V' Extension for GNU compiler.
-   Copyright (C) 2022-2024 Free Software Foundation, Inc.
+   Copyright (C) 2022-2025 Free Software Foundation, Inc.
    Contributed by Ju-Zhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd.
 
    This file is part of GCC.
@@ -59,6 +59,11 @@ extern const function_shape *const seg_fault_load;
 extern const function_shape *const crypto_vv;
 extern const function_shape *const crypto_vi;
 extern const function_shape *const crypto_vv_no_op_type;
+/* Sifive vendor extension.  */
+extern const function_shape *const sf_vqmacc;
+extern const function_shape *const sf_vfnrclip;
+extern const function_shape *const sf_vcix_se;
+extern const function_shape *const sf_vcix;
 }
 
 } // end namespace riscv_vector
diff --git a/gcc/config/riscv/riscv-vector-builtins-types.def b/gcc/config/riscv/riscv-vector-builtins-types.def
index e85ca27..ade6644 100644
--- a/gcc/config/riscv/riscv-vector-builtins-types.def
+++ b/gcc/config/riscv/riscv-vector-builtins-types.def
@@ -1,5 +1,5 @@
 /* Intrinsic type iterators for RISC-V 'V' Extension for GNU compiler.
-   Copyright (C) 2022-2024 Free Software Foundation, Inc.
+   Copyright (C) 2022-2025 Free Software Foundation, Inc.
    Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd.
 
 This file is part of GCC.
@@ -357,20 +357,44 @@ along with GCC; see the file COPYING3. If not see
 #define DEF_RVV_CRYPTO_SEW64_OPS(TYPE, REQUIRE)
 #endif
 
-DEF_RVV_I_OPS (vint8mf8_t, RVV_REQUIRE_MIN_VLEN_64)
+/* Use "DEF_RVV_QMACC_OPS" macro include signed integer which will
+   be iterated and registered as intrinsic functions.  */
+#ifndef DEF_RVV_QMACC_OPS
+#define DEF_RVV_QMACC_OPS(TYPE, REQUIRE)
+#endif
+
+/* Use "DEF_RVV_XFQF_OPS" macro include signed integer which will
+   be iterated and registered as intrinsic functions.  */
+#ifndef DEF_RVV_XFQF_OPS
+#define DEF_RVV_XFQF_OPS(TYPE, REQUIRE)
+#endif
+
+/* Use "DEF_RVV_X2_U_OPS" macro include unsigned integer which will
+   be iterated and registered as intrinsic functions.  */
+#ifndef DEF_RVV_X2_U_OPS
+#define DEF_RVV_X2_U_OPS(TYPE, REQUIRE)
+#endif
+
+/* Use "DEF_RVV_X2_WU_OPS" macro include widen unsigned integer which will
+   be iterated and registered as intrinsic functions.  */
+#ifndef DEF_RVV_X2_WU_OPS
+#define DEF_RVV_X2_WU_OPS(TYPE, REQUIRE)
+#endif
+
+DEF_RVV_I_OPS (vint8mf8_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_I_OPS (vint8mf4_t, 0)
 DEF_RVV_I_OPS (vint8mf2_t, 0)
 DEF_RVV_I_OPS (vint8m1_t, 0)
 DEF_RVV_I_OPS (vint8m2_t, 0)
 DEF_RVV_I_OPS (vint8m4_t, 0)
 DEF_RVV_I_OPS (vint8m8_t, 0)
-DEF_RVV_I_OPS (vint16mf4_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_I_OPS (vint16mf4_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_I_OPS (vint16mf2_t, 0)
 DEF_RVV_I_OPS (vint16m1_t, 0)
 DEF_RVV_I_OPS (vint16m2_t, 0)
 DEF_RVV_I_OPS (vint16m4_t, 0)
 DEF_RVV_I_OPS (vint16m8_t, 0)
-DEF_RVV_I_OPS (vint32mf2_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_I_OPS (vint32mf2_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_I_OPS (vint32m1_t, 0)
 DEF_RVV_I_OPS (vint32m2_t, 0)
 DEF_RVV_I_OPS (vint32m4_t, 0)
@@ -380,20 +404,20 @@ DEF_RVV_I_OPS (vint64m2_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_I_OPS (vint64m4_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_I_OPS (vint64m8_t, RVV_REQUIRE_ELEN_64)
 
-DEF_RVV_U_OPS (vuint8mf8_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_U_OPS (vuint8mf8_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_U_OPS (vuint8mf4_t, 0)
 DEF_RVV_U_OPS (vuint8mf2_t, 0)
 DEF_RVV_U_OPS (vuint8m1_t, 0)
 DEF_RVV_U_OPS (vuint8m2_t, 0)
 DEF_RVV_U_OPS (vuint8m4_t, 0)
 DEF_RVV_U_OPS (vuint8m8_t, 0)
-DEF_RVV_U_OPS (vuint16mf4_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_U_OPS (vuint16mf4_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_U_OPS (vuint16mf2_t, 0)
 DEF_RVV_U_OPS (vuint16m1_t, 0)
 DEF_RVV_U_OPS (vuint16m2_t, 0)
 DEF_RVV_U_OPS (vuint16m4_t, 0)
 DEF_RVV_U_OPS (vuint16m8_t, 0)
-DEF_RVV_U_OPS (vuint32mf2_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_U_OPS (vuint32mf2_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_U_OPS (vuint32m1_t, 0)
 DEF_RVV_U_OPS (vuint32m2_t, 0)
 DEF_RVV_U_OPS (vuint32m4_t, 0)
@@ -403,21 +427,21 @@ DEF_RVV_U_OPS (vuint64m2_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_U_OPS (vuint64m4_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_U_OPS (vuint64m8_t, RVV_REQUIRE_ELEN_64)
 
-DEF_RVV_F_OPS (vbfloat16mf4_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_F_OPS (vbfloat16mf4_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_ELEN_64)
 DEF_RVV_F_OPS (vbfloat16mf2_t, RVV_REQUIRE_ELEN_BF_16)
 DEF_RVV_F_OPS (vbfloat16m1_t,  RVV_REQUIRE_ELEN_BF_16)
 DEF_RVV_F_OPS (vbfloat16m2_t,  RVV_REQUIRE_ELEN_BF_16)
 DEF_RVV_F_OPS (vbfloat16m4_t,  RVV_REQUIRE_ELEN_BF_16)
 DEF_RVV_F_OPS (vbfloat16m8_t,  RVV_REQUIRE_ELEN_BF_16)
 
-DEF_RVV_F_OPS (vfloat16mf4_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_F_OPS (vfloat16mf4_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_ELEN_64)
 DEF_RVV_F_OPS (vfloat16mf2_t, RVV_REQUIRE_ELEN_FP_16)
 DEF_RVV_F_OPS (vfloat16m1_t, RVV_REQUIRE_ELEN_FP_16)
 DEF_RVV_F_OPS (vfloat16m2_t, RVV_REQUIRE_ELEN_FP_16)
 DEF_RVV_F_OPS (vfloat16m4_t, RVV_REQUIRE_ELEN_FP_16)
 DEF_RVV_F_OPS (vfloat16m8_t, RVV_REQUIRE_ELEN_FP_16)
 
-DEF_RVV_F_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_F_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_ELEN_64)
 DEF_RVV_F_OPS (vfloat32m1_t, RVV_REQUIRE_ELEN_FP_32)
 DEF_RVV_F_OPS (vfloat32m2_t, RVV_REQUIRE_ELEN_FP_32)
 DEF_RVV_F_OPS (vfloat32m4_t, RVV_REQUIRE_ELEN_FP_32)
@@ -427,7 +451,7 @@ DEF_RVV_F_OPS (vfloat64m2_t, RVV_REQUIRE_ELEN_FP_64)
 DEF_RVV_F_OPS (vfloat64m4_t, RVV_REQUIRE_ELEN_FP_64)
 DEF_RVV_F_OPS (vfloat64m8_t, RVV_REQUIRE_ELEN_FP_64)
 
-DEF_RVV_B_OPS (vbool64_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_B_OPS (vbool64_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_B_OPS (vbool32_t, 0)
 DEF_RVV_B_OPS (vbool16_t, 0)
 DEF_RVV_B_OPS (vbool8_t, 0)
@@ -435,13 +459,13 @@ DEF_RVV_B_OPS (vbool4_t, 0)
 DEF_RVV_B_OPS (vbool2_t, 0)
 DEF_RVV_B_OPS (vbool1_t, 0)
 
-DEF_RVV_WEXTI_OPS (vint16mf4_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_WEXTI_OPS (vint16mf4_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_WEXTI_OPS (vint16mf2_t, 0)
 DEF_RVV_WEXTI_OPS (vint16m1_t, 0)
 DEF_RVV_WEXTI_OPS (vint16m2_t, 0)
 DEF_RVV_WEXTI_OPS (vint16m4_t, 0)
 DEF_RVV_WEXTI_OPS (vint16m8_t, 0)
-DEF_RVV_WEXTI_OPS (vint32mf2_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_WEXTI_OPS (vint32mf2_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_WEXTI_OPS (vint32m1_t, 0)
 DEF_RVV_WEXTI_OPS (vint32m2_t, 0)
 DEF_RVV_WEXTI_OPS (vint32m4_t, 0)
@@ -451,7 +475,7 @@ DEF_RVV_WEXTI_OPS (vint64m2_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_WEXTI_OPS (vint64m4_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_WEXTI_OPS (vint64m8_t, RVV_REQUIRE_ELEN_64)
 
-DEF_RVV_QEXTI_OPS (vint32mf2_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_QEXTI_OPS (vint32mf2_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_QEXTI_OPS (vint32m1_t, 0)
 DEF_RVV_QEXTI_OPS (vint32m2_t, 0)
 DEF_RVV_QEXTI_OPS (vint32m4_t, 0)
@@ -466,13 +490,13 @@ DEF_RVV_OEXTI_OPS (vint64m2_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_OEXTI_OPS (vint64m4_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_OEXTI_OPS (vint64m8_t, RVV_REQUIRE_ELEN_64)
 
-DEF_RVV_WEXTU_OPS (vuint16mf4_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_WEXTU_OPS (vuint16mf4_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_WEXTU_OPS (vuint16mf2_t, 0)
 DEF_RVV_WEXTU_OPS (vuint16m1_t, 0)
 DEF_RVV_WEXTU_OPS (vuint16m2_t, 0)
 DEF_RVV_WEXTU_OPS (vuint16m4_t, 0)
 DEF_RVV_WEXTU_OPS (vuint16m8_t, 0)
-DEF_RVV_WEXTU_OPS (vuint32mf2_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_WEXTU_OPS (vuint32mf2_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_WEXTU_OPS (vuint32m1_t, 0)
 DEF_RVV_WEXTU_OPS (vuint32m2_t, 0)
 DEF_RVV_WEXTU_OPS (vuint32m4_t, 0)
@@ -482,7 +506,7 @@ DEF_RVV_WEXTU_OPS (vuint64m2_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_WEXTU_OPS (vuint64m4_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_WEXTU_OPS (vuint64m8_t, RVV_REQUIRE_ELEN_64)
 
-DEF_RVV_QEXTU_OPS (vuint32mf2_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_QEXTU_OPS (vuint32mf2_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_QEXTU_OPS (vuint32m1_t, 0)
 DEF_RVV_QEXTU_OPS (vuint32m2_t, 0)
 DEF_RVV_QEXTU_OPS (vuint32m4_t, 0)
@@ -497,20 +521,20 @@ DEF_RVV_OEXTU_OPS (vuint64m2_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_OEXTU_OPS (vuint64m4_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_OEXTU_OPS (vuint64m8_t, RVV_REQUIRE_ELEN_64)
 
-DEF_RVV_FULL_V_I_OPS (vint8mf8_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_FULL_V_I_OPS (vint8mf8_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_FULL_V_I_OPS (vint8mf4_t, 0)
 DEF_RVV_FULL_V_I_OPS (vint8mf2_t, 0)
 DEF_RVV_FULL_V_I_OPS (vint8m1_t, 0)
 DEF_RVV_FULL_V_I_OPS (vint8m2_t, 0)
 DEF_RVV_FULL_V_I_OPS (vint8m4_t, 0)
 DEF_RVV_FULL_V_I_OPS (vint8m8_t, 0)
-DEF_RVV_FULL_V_I_OPS (vint16mf4_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_FULL_V_I_OPS (vint16mf4_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_FULL_V_I_OPS (vint16mf2_t, 0)
 DEF_RVV_FULL_V_I_OPS (vint16m1_t, 0)
 DEF_RVV_FULL_V_I_OPS (vint16m2_t, 0)
 DEF_RVV_FULL_V_I_OPS (vint16m4_t, 0)
 DEF_RVV_FULL_V_I_OPS (vint16m8_t, 0)
-DEF_RVV_FULL_V_I_OPS (vint32mf2_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_FULL_V_I_OPS (vint32mf2_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_FULL_V_I_OPS (vint32m1_t, 0)
 DEF_RVV_FULL_V_I_OPS (vint32m2_t, 0)
 DEF_RVV_FULL_V_I_OPS (vint32m4_t, 0)
@@ -520,20 +544,20 @@ DEF_RVV_FULL_V_I_OPS (vint64m2_t, RVV_REQUIRE_FULL_V)
 DEF_RVV_FULL_V_I_OPS (vint64m4_t, RVV_REQUIRE_FULL_V)
 DEF_RVV_FULL_V_I_OPS (vint64m8_t, RVV_REQUIRE_FULL_V)
 
-DEF_RVV_FULL_V_U_OPS (vuint8mf8_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_FULL_V_U_OPS (vuint8mf8_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_FULL_V_U_OPS (vuint8mf4_t, 0)
 DEF_RVV_FULL_V_U_OPS (vuint8mf2_t, 0)
 DEF_RVV_FULL_V_U_OPS (vuint8m1_t, 0)
 DEF_RVV_FULL_V_U_OPS (vuint8m2_t, 0)
 DEF_RVV_FULL_V_U_OPS (vuint8m4_t, 0)
 DEF_RVV_FULL_V_U_OPS (vuint8m8_t, 0)
-DEF_RVV_FULL_V_U_OPS (vuint16mf4_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_FULL_V_U_OPS (vuint16mf4_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_FULL_V_U_OPS (vuint16mf2_t, 0)
 DEF_RVV_FULL_V_U_OPS (vuint16m1_t, 0)
 DEF_RVV_FULL_V_U_OPS (vuint16m2_t, 0)
 DEF_RVV_FULL_V_U_OPS (vuint16m4_t, 0)
 DEF_RVV_FULL_V_U_OPS (vuint16m8_t, 0)
-DEF_RVV_FULL_V_U_OPS (vuint32mf2_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_FULL_V_U_OPS (vuint32mf2_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_FULL_V_U_OPS (vuint32m1_t, 0)
 DEF_RVV_FULL_V_U_OPS (vuint32m2_t, 0)
 DEF_RVV_FULL_V_U_OPS (vuint32m4_t, 0)
@@ -543,7 +567,7 @@ DEF_RVV_FULL_V_U_OPS (vuint64m2_t, RVV_REQUIRE_FULL_V)
 DEF_RVV_FULL_V_U_OPS (vuint64m4_t, RVV_REQUIRE_FULL_V)
 DEF_RVV_FULL_V_U_OPS (vuint64m8_t, RVV_REQUIRE_FULL_V)
 
-DEF_RVV_WEXTF_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_WEXTF_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_ELEN_64)
 DEF_RVV_WEXTF_OPS (vfloat32m1_t, RVV_REQUIRE_ELEN_FP_16)
 DEF_RVV_WEXTF_OPS (vfloat32m2_t, RVV_REQUIRE_ELEN_FP_16)
 DEF_RVV_WEXTF_OPS (vfloat32m4_t, RVV_REQUIRE_ELEN_FP_16)
@@ -554,14 +578,14 @@ DEF_RVV_WEXTF_OPS (vfloat64m2_t, RVV_REQUIRE_ELEN_FP_64)
 DEF_RVV_WEXTF_OPS (vfloat64m4_t, RVV_REQUIRE_ELEN_FP_64)
 DEF_RVV_WEXTF_OPS (vfloat64m8_t, RVV_REQUIRE_ELEN_FP_64)
 
-DEF_RVV_CONVERT_I_OPS (vint16mf4_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_CONVERT_I_OPS (vint16mf4_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_ELEN_64)
 DEF_RVV_CONVERT_I_OPS (vint16mf2_t, RVV_REQUIRE_ELEN_FP_16)
 DEF_RVV_CONVERT_I_OPS (vint16m1_t, RVV_REQUIRE_ELEN_FP_16)
 DEF_RVV_CONVERT_I_OPS (vint16m2_t, RVV_REQUIRE_ELEN_FP_16)
 DEF_RVV_CONVERT_I_OPS (vint16m4_t, RVV_REQUIRE_ELEN_FP_16)
 DEF_RVV_CONVERT_I_OPS (vint16m8_t, RVV_REQUIRE_ELEN_FP_16)
 
-DEF_RVV_CONVERT_I_OPS (vint32mf2_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_CONVERT_I_OPS (vint32mf2_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_CONVERT_I_OPS (vint32m1_t, 0)
 DEF_RVV_CONVERT_I_OPS (vint32m2_t, 0)
 DEF_RVV_CONVERT_I_OPS (vint32m4_t, 0)
@@ -571,14 +595,14 @@ DEF_RVV_CONVERT_I_OPS (vint64m2_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_CONVERT_I_OPS (vint64m4_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_CONVERT_I_OPS (vint64m8_t, RVV_REQUIRE_ELEN_64)
 
-DEF_RVV_CONVERT_U_OPS (vuint16mf4_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_CONVERT_U_OPS (vuint16mf4_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_ELEN_64)
 DEF_RVV_CONVERT_U_OPS (vuint16mf2_t, RVV_REQUIRE_ELEN_FP_16)
 DEF_RVV_CONVERT_U_OPS (vuint16m1_t, RVV_REQUIRE_ELEN_FP_16)
 DEF_RVV_CONVERT_U_OPS (vuint16m2_t, RVV_REQUIRE_ELEN_FP_16)
 DEF_RVV_CONVERT_U_OPS (vuint16m4_t, RVV_REQUIRE_ELEN_FP_16)
 DEF_RVV_CONVERT_U_OPS (vuint16m8_t, RVV_REQUIRE_ELEN_FP_16)
 
-DEF_RVV_CONVERT_U_OPS (vuint32mf2_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_CONVERT_U_OPS (vuint32mf2_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_CONVERT_U_OPS (vuint32m1_t, 0)
 DEF_RVV_CONVERT_U_OPS (vuint32m2_t, 0)
 DEF_RVV_CONVERT_U_OPS (vuint32m4_t, 0)
@@ -588,7 +612,7 @@ DEF_RVV_CONVERT_U_OPS (vuint64m2_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_CONVERT_U_OPS (vuint64m4_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_CONVERT_U_OPS (vuint64m8_t, RVV_REQUIRE_ELEN_64)
 
-DEF_RVV_WCONVERT_I_OPS (vint32mf2_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_WCONVERT_I_OPS (vint32mf2_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_ELEN_64)
 DEF_RVV_WCONVERT_I_OPS (vint32m1_t, RVV_REQUIRE_ELEN_FP_16)
 DEF_RVV_WCONVERT_I_OPS (vint32m2_t, RVV_REQUIRE_ELEN_FP_16)
 DEF_RVV_WCONVERT_I_OPS (vint32m4_t, RVV_REQUIRE_ELEN_FP_16)
@@ -599,7 +623,7 @@ DEF_RVV_WCONVERT_I_OPS (vint64m2_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_ELEN_64
 DEF_RVV_WCONVERT_I_OPS (vint64m4_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_ELEN_64)
 DEF_RVV_WCONVERT_I_OPS (vint64m8_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_ELEN_64)
 
-DEF_RVV_WCONVERT_U_OPS (vuint32mf2_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_WCONVERT_U_OPS (vuint32mf2_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_ELEN_64)
 DEF_RVV_WCONVERT_U_OPS (vuint32m1_t, RVV_REQUIRE_ELEN_FP_16)
 DEF_RVV_WCONVERT_U_OPS (vuint32m2_t, RVV_REQUIRE_ELEN_FP_16)
 DEF_RVV_WCONVERT_U_OPS (vuint32m4_t, RVV_REQUIRE_ELEN_FP_16)
@@ -610,7 +634,7 @@ DEF_RVV_WCONVERT_U_OPS (vuint64m2_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_ELEN_6
 DEF_RVV_WCONVERT_U_OPS (vuint64m4_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_ELEN_64)
 DEF_RVV_WCONVERT_U_OPS (vuint64m8_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_ELEN_64)
 
-DEF_RVV_WCONVERT_F_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_WCONVERT_F_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_ELEN_64)
 DEF_RVV_WCONVERT_F_OPS (vfloat32m1_t, RVV_REQUIRE_ELEN_FP_32)
 DEF_RVV_WCONVERT_F_OPS (vfloat32m2_t, RVV_REQUIRE_ELEN_FP_32)
 DEF_RVV_WCONVERT_F_OPS (vfloat32m4_t, RVV_REQUIRE_ELEN_FP_32)
@@ -621,76 +645,76 @@ DEF_RVV_WCONVERT_F_OPS (vfloat64m2_t, RVV_REQUIRE_ELEN_FP_64)
 DEF_RVV_WCONVERT_F_OPS (vfloat64m4_t, RVV_REQUIRE_ELEN_FP_64)
 DEF_RVV_WCONVERT_F_OPS (vfloat64m8_t, RVV_REQUIRE_ELEN_FP_64)
 
-DEF_RVV_F32_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_F32_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_ELEN_64)
 DEF_RVV_F32_OPS (vfloat32m1_t, RVV_REQUIRE_ELEN_FP_32)
 DEF_RVV_F32_OPS (vfloat32m2_t, RVV_REQUIRE_ELEN_FP_32)
 DEF_RVV_F32_OPS (vfloat32m4_t, RVV_REQUIRE_ELEN_FP_32)
 DEF_RVV_F32_OPS (vfloat32m8_t, RVV_REQUIRE_ELEN_FP_32)
 
-DEF_RVV_WI_OPS (vint8mf8_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_WI_OPS (vint8mf8_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_WI_OPS (vint8mf4_t, 0)
 DEF_RVV_WI_OPS (vint8mf2_t, 0)
 DEF_RVV_WI_OPS (vint8m1_t, 0)
 DEF_RVV_WI_OPS (vint8m2_t, 0)
 DEF_RVV_WI_OPS (vint8m4_t, 0)
 DEF_RVV_WI_OPS (vint8m8_t, 0)
-DEF_RVV_WI_OPS (vint16mf4_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_WI_OPS (vint16mf4_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_WI_OPS (vint16mf2_t, 0)
 DEF_RVV_WI_OPS (vint16m1_t, 0)
 DEF_RVV_WI_OPS (vint16m2_t, 0)
 DEF_RVV_WI_OPS (vint16m4_t, 0)
 DEF_RVV_WI_OPS (vint16m8_t, 0)
-DEF_RVV_WI_OPS (vint32mf2_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_WI_OPS (vint32mf2_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_WI_OPS (vint32m1_t, 0)
 DEF_RVV_WI_OPS (vint32m2_t, 0)
 DEF_RVV_WI_OPS (vint32m4_t, 0)
 DEF_RVV_WI_OPS (vint32m8_t, 0)
 
-DEF_RVV_WU_OPS (vuint8mf8_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_WU_OPS (vuint8mf8_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_WU_OPS (vuint8mf4_t, 0)
 DEF_RVV_WU_OPS (vuint8mf2_t, 0)
 DEF_RVV_WU_OPS (vuint8m1_t, 0)
 DEF_RVV_WU_OPS (vuint8m2_t, 0)
 DEF_RVV_WU_OPS (vuint8m4_t, 0)
 DEF_RVV_WU_OPS (vuint8m8_t, 0)
-DEF_RVV_WU_OPS (vuint16mf4_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_WU_OPS (vuint16mf4_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_WU_OPS (vuint16mf2_t, 0)
 DEF_RVV_WU_OPS (vuint16m1_t, 0)
 DEF_RVV_WU_OPS (vuint16m2_t, 0)
 DEF_RVV_WU_OPS (vuint16m4_t, 0)
 DEF_RVV_WU_OPS (vuint16m8_t, 0)
-DEF_RVV_WU_OPS (vuint32mf2_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_WU_OPS (vuint32mf2_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_WU_OPS (vuint32m1_t, 0)
 DEF_RVV_WU_OPS (vuint32m2_t, 0)
 DEF_RVV_WU_OPS (vuint32m4_t, 0)
 DEF_RVV_WU_OPS (vuint32m8_t, 0)
 
-DEF_RVV_WF_OPS (vfloat16mf4_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_WF_OPS (vfloat16mf4_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_ELEN_64)
 DEF_RVV_WF_OPS (vfloat16mf2_t, RVV_REQUIRE_ELEN_FP_16)
 DEF_RVV_WF_OPS (vfloat16m1_t, RVV_REQUIRE_ELEN_FP_16)
 DEF_RVV_WF_OPS (vfloat16m2_t, RVV_REQUIRE_ELEN_FP_16)
 DEF_RVV_WF_OPS (vfloat16m4_t, RVV_REQUIRE_ELEN_FP_16)
 DEF_RVV_WF_OPS (vfloat16m8_t, RVV_REQUIRE_ELEN_FP_16)
 
-DEF_RVV_WF_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_WF_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_ELEN_64)
 DEF_RVV_WF_OPS (vfloat32m1_t, RVV_REQUIRE_ELEN_FP_32)
 DEF_RVV_WF_OPS (vfloat32m2_t, RVV_REQUIRE_ELEN_FP_32)
 DEF_RVV_WF_OPS (vfloat32m4_t, RVV_REQUIRE_ELEN_FP_32)
 DEF_RVV_WF_OPS (vfloat32m8_t, RVV_REQUIRE_ELEN_FP_32)
 
-DEF_RVV_EI16_OPS (vint8mf8_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_EI16_OPS (vint8mf8_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_EI16_OPS (vint8mf4_t, 0)
 DEF_RVV_EI16_OPS (vint8mf2_t, 0)
 DEF_RVV_EI16_OPS (vint8m1_t, 0)
 DEF_RVV_EI16_OPS (vint8m2_t, 0)
 DEF_RVV_EI16_OPS (vint8m4_t, 0)
-DEF_RVV_EI16_OPS (vint16mf4_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_EI16_OPS (vint16mf4_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_EI16_OPS (vint16mf2_t, 0)
 DEF_RVV_EI16_OPS (vint16m1_t, 0)
 DEF_RVV_EI16_OPS (vint16m2_t, 0)
 DEF_RVV_EI16_OPS (vint16m4_t, 0)
 DEF_RVV_EI16_OPS (vint16m8_t, 0)
-DEF_RVV_EI16_OPS (vint32mf2_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_EI16_OPS (vint32mf2_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_EI16_OPS (vint32m1_t, 0)
 DEF_RVV_EI16_OPS (vint32m2_t, 0)
 DEF_RVV_EI16_OPS (vint32m4_t, 0)
@@ -699,19 +723,19 @@ DEF_RVV_EI16_OPS (vint64m1_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_EI16_OPS (vint64m2_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_EI16_OPS (vint64m4_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_EI16_OPS (vint64m8_t, RVV_REQUIRE_ELEN_64)
-DEF_RVV_EI16_OPS (vuint8mf8_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_EI16_OPS (vuint8mf8_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_EI16_OPS (vuint8mf4_t, 0)
 DEF_RVV_EI16_OPS (vuint8mf2_t, 0)
 DEF_RVV_EI16_OPS (vuint8m1_t, 0)
 DEF_RVV_EI16_OPS (vuint8m2_t, 0)
 DEF_RVV_EI16_OPS (vuint8m4_t, 0)
-DEF_RVV_EI16_OPS (vuint16mf4_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_EI16_OPS (vuint16mf4_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_EI16_OPS (vuint16mf2_t, 0)
 DEF_RVV_EI16_OPS (vuint16m1_t, 0)
 DEF_RVV_EI16_OPS (vuint16m2_t, 0)
 DEF_RVV_EI16_OPS (vuint16m4_t, 0)
 DEF_RVV_EI16_OPS (vuint16m8_t, 0)
-DEF_RVV_EI16_OPS (vuint32mf2_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_EI16_OPS (vuint32mf2_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_EI16_OPS (vuint32m1_t, 0)
 DEF_RVV_EI16_OPS (vuint32m2_t, 0)
 DEF_RVV_EI16_OPS (vuint32m4_t, 0)
@@ -721,14 +745,14 @@ DEF_RVV_EI16_OPS (vuint64m2_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_EI16_OPS (vuint64m4_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_EI16_OPS (vuint64m8_t, RVV_REQUIRE_ELEN_64)
 
-DEF_RVV_EI16_OPS (vfloat16mf4_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_EI16_OPS (vfloat16mf4_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_ELEN_64)
 DEF_RVV_EI16_OPS (vfloat16mf2_t, RVV_REQUIRE_ELEN_FP_16)
 DEF_RVV_EI16_OPS (vfloat16m1_t, RVV_REQUIRE_ELEN_FP_16)
 DEF_RVV_EI16_OPS (vfloat16m2_t, RVV_REQUIRE_ELEN_FP_16)
 DEF_RVV_EI16_OPS (vfloat16m4_t, RVV_REQUIRE_ELEN_FP_16)
 DEF_RVV_EI16_OPS (vfloat16m8_t, RVV_REQUIRE_ELEN_FP_16)
 
-DEF_RVV_EI16_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_EI16_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_ELEN_64)
 DEF_RVV_EI16_OPS (vfloat32m1_t, RVV_REQUIRE_ELEN_FP_32)
 DEF_RVV_EI16_OPS (vfloat32m2_t, RVV_REQUIRE_ELEN_FP_32)
 DEF_RVV_EI16_OPS (vfloat32m4_t, RVV_REQUIRE_ELEN_FP_32)
@@ -739,13 +763,13 @@ DEF_RVV_EI16_OPS (vfloat64m2_t, RVV_REQUIRE_ELEN_FP_64)
 DEF_RVV_EI16_OPS (vfloat64m4_t, RVV_REQUIRE_ELEN_FP_64)
 DEF_RVV_EI16_OPS (vfloat64m8_t, RVV_REQUIRE_ELEN_FP_64)
 
-DEF_RVV_EEW8_INTERPRET_OPS (vint16mf4_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_EEW8_INTERPRET_OPS (vint16mf4_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_EEW8_INTERPRET_OPS (vint16mf2_t, 0)
 DEF_RVV_EEW8_INTERPRET_OPS (vint16m1_t, 0)
 DEF_RVV_EEW8_INTERPRET_OPS (vint16m2_t, 0)
 DEF_RVV_EEW8_INTERPRET_OPS (vint16m4_t, 0)
 DEF_RVV_EEW8_INTERPRET_OPS (vint16m8_t, 0)
-DEF_RVV_EEW8_INTERPRET_OPS (vint32mf2_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_EEW8_INTERPRET_OPS (vint32mf2_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_EEW8_INTERPRET_OPS (vint32m1_t, 0)
 DEF_RVV_EEW8_INTERPRET_OPS (vint32m2_t, 0)
 DEF_RVV_EEW8_INTERPRET_OPS (vint32m4_t, 0)
@@ -754,13 +778,13 @@ DEF_RVV_EEW8_INTERPRET_OPS (vint64m1_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_EEW8_INTERPRET_OPS (vint64m2_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_EEW8_INTERPRET_OPS (vint64m4_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_EEW8_INTERPRET_OPS (vint64m8_t, RVV_REQUIRE_ELEN_64)
-DEF_RVV_EEW8_INTERPRET_OPS (vuint16mf4_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_EEW8_INTERPRET_OPS (vuint16mf4_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_EEW8_INTERPRET_OPS (vuint16mf2_t, 0)
 DEF_RVV_EEW8_INTERPRET_OPS (vuint16m1_t, 0)
 DEF_RVV_EEW8_INTERPRET_OPS (vuint16m2_t, 0)
 DEF_RVV_EEW8_INTERPRET_OPS (vuint16m4_t, 0)
 DEF_RVV_EEW8_INTERPRET_OPS (vuint16m8_t, 0)
-DEF_RVV_EEW8_INTERPRET_OPS (vuint32mf2_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_EEW8_INTERPRET_OPS (vuint32mf2_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_EEW8_INTERPRET_OPS (vuint32m1_t, 0)
 DEF_RVV_EEW8_INTERPRET_OPS (vuint32m2_t, 0)
 DEF_RVV_EEW8_INTERPRET_OPS (vuint32m4_t, 0)
@@ -776,7 +800,7 @@ DEF_RVV_EEW16_INTERPRET_OPS (vint8m1_t, 0)
 DEF_RVV_EEW16_INTERPRET_OPS (vint8m2_t, 0)
 DEF_RVV_EEW16_INTERPRET_OPS (vint8m4_t, 0)
 DEF_RVV_EEW16_INTERPRET_OPS (vint8m8_t, 0)
-DEF_RVV_EEW16_INTERPRET_OPS (vint32mf2_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_EEW16_INTERPRET_OPS (vint32mf2_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_EEW16_INTERPRET_OPS (vint32m1_t, 0)
 DEF_RVV_EEW16_INTERPRET_OPS (vint32m2_t, 0)
 DEF_RVV_EEW16_INTERPRET_OPS (vint32m4_t, 0)
@@ -791,7 +815,7 @@ DEF_RVV_EEW16_INTERPRET_OPS (vuint8m1_t, 0)
 DEF_RVV_EEW16_INTERPRET_OPS (vuint8m2_t, 0)
 DEF_RVV_EEW16_INTERPRET_OPS (vuint8m4_t, 0)
 DEF_RVV_EEW16_INTERPRET_OPS (vuint8m8_t, 0)
-DEF_RVV_EEW16_INTERPRET_OPS (vuint32mf2_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_EEW16_INTERPRET_OPS (vuint32mf2_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_EEW16_INTERPRET_OPS (vuint32m1_t, 0)
 DEF_RVV_EEW16_INTERPRET_OPS (vuint32m2_t, 0)
 DEF_RVV_EEW16_INTERPRET_OPS (vuint32m4_t, 0)
@@ -982,53 +1006,53 @@ DEF_RVV_UNSIGNED_EEW64_LMUL1_INTERPRET_OPS(vbool16_t, 0)
 DEF_RVV_UNSIGNED_EEW64_LMUL1_INTERPRET_OPS(vbool32_t, 0)
 DEF_RVV_UNSIGNED_EEW64_LMUL1_INTERPRET_OPS(vbool64_t, RVV_REQUIRE_ELEN_64)
 
-DEF_RVV_X2_VLMUL_EXT_OPS (vint8mf8_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_X2_VLMUL_EXT_OPS (vint8mf8_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_X2_VLMUL_EXT_OPS (vint8mf4_t, 0)
 DEF_RVV_X2_VLMUL_EXT_OPS (vint8mf2_t, 0)
 DEF_RVV_X2_VLMUL_EXT_OPS (vint8m1_t, 0)
 DEF_RVV_X2_VLMUL_EXT_OPS (vint8m2_t, 0)
 DEF_RVV_X2_VLMUL_EXT_OPS (vint8m4_t, 0)
-DEF_RVV_X2_VLMUL_EXT_OPS (vint16mf4_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_X2_VLMUL_EXT_OPS (vint16mf4_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_X2_VLMUL_EXT_OPS (vint16mf2_t, 0)
 DEF_RVV_X2_VLMUL_EXT_OPS (vint16m1_t, 0)
 DEF_RVV_X2_VLMUL_EXT_OPS (vint16m2_t, 0)
 DEF_RVV_X2_VLMUL_EXT_OPS (vint16m4_t, 0)
-DEF_RVV_X2_VLMUL_EXT_OPS (vint32mf2_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_X2_VLMUL_EXT_OPS (vint32mf2_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_X2_VLMUL_EXT_OPS (vint32m1_t, 0)
 DEF_RVV_X2_VLMUL_EXT_OPS (vint32m2_t, 0)
 DEF_RVV_X2_VLMUL_EXT_OPS (vint32m4_t, 0)
 DEF_RVV_X2_VLMUL_EXT_OPS (vint64m1_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_X2_VLMUL_EXT_OPS (vint64m2_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_X2_VLMUL_EXT_OPS (vint64m4_t, RVV_REQUIRE_ELEN_64)
-DEF_RVV_X2_VLMUL_EXT_OPS (vuint8mf8_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_X2_VLMUL_EXT_OPS (vuint8mf8_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_X2_VLMUL_EXT_OPS (vuint8mf4_t, 0)
 DEF_RVV_X2_VLMUL_EXT_OPS (vuint8mf2_t, 0)
 DEF_RVV_X2_VLMUL_EXT_OPS (vuint8m1_t, 0)
 DEF_RVV_X2_VLMUL_EXT_OPS (vuint8m2_t, 0)
 DEF_RVV_X2_VLMUL_EXT_OPS (vuint8m4_t, 0)
-DEF_RVV_X2_VLMUL_EXT_OPS (vuint16mf4_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_X2_VLMUL_EXT_OPS (vuint16mf4_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_X2_VLMUL_EXT_OPS (vuint16mf2_t, 0)
 DEF_RVV_X2_VLMUL_EXT_OPS (vuint16m1_t, 0)
 DEF_RVV_X2_VLMUL_EXT_OPS (vuint16m2_t, 0)
 DEF_RVV_X2_VLMUL_EXT_OPS (vuint16m4_t, 0)
-DEF_RVV_X2_VLMUL_EXT_OPS (vuint32mf2_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_X2_VLMUL_EXT_OPS (vuint32mf2_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_X2_VLMUL_EXT_OPS (vuint32m1_t, 0)
 DEF_RVV_X2_VLMUL_EXT_OPS (vuint32m2_t, 0)
 DEF_RVV_X2_VLMUL_EXT_OPS (vuint32m4_t, 0)
 DEF_RVV_X2_VLMUL_EXT_OPS (vuint64m1_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_X2_VLMUL_EXT_OPS (vuint64m2_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_X2_VLMUL_EXT_OPS (vuint64m4_t, RVV_REQUIRE_ELEN_64)
-DEF_RVV_X2_VLMUL_EXT_OPS (vbfloat16mf4_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_X2_VLMUL_EXT_OPS (vbfloat16mf4_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_ELEN_64)
 DEF_RVV_X2_VLMUL_EXT_OPS (vbfloat16mf2_t, RVV_REQUIRE_ELEN_BF_16)
 DEF_RVV_X2_VLMUL_EXT_OPS (vbfloat16m1_t, RVV_REQUIRE_ELEN_BF_16)
 DEF_RVV_X2_VLMUL_EXT_OPS (vbfloat16m2_t, RVV_REQUIRE_ELEN_BF_16)
 DEF_RVV_X2_VLMUL_EXT_OPS (vbfloat16m4_t, RVV_REQUIRE_ELEN_BF_16)
-DEF_RVV_X2_VLMUL_EXT_OPS (vfloat16mf4_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_X2_VLMUL_EXT_OPS (vfloat16mf4_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_ELEN_64)
 DEF_RVV_X2_VLMUL_EXT_OPS (vfloat16mf2_t, RVV_REQUIRE_ELEN_FP_16)
 DEF_RVV_X2_VLMUL_EXT_OPS (vfloat16m1_t, RVV_REQUIRE_ELEN_FP_16)
 DEF_RVV_X2_VLMUL_EXT_OPS (vfloat16m2_t, RVV_REQUIRE_ELEN_FP_16)
 DEF_RVV_X2_VLMUL_EXT_OPS (vfloat16m4_t, RVV_REQUIRE_ELEN_FP_16)
-DEF_RVV_X2_VLMUL_EXT_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_X2_VLMUL_EXT_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_ELEN_64)
 DEF_RVV_X2_VLMUL_EXT_OPS (vfloat32m1_t, RVV_REQUIRE_ELEN_FP_32)
 DEF_RVV_X2_VLMUL_EXT_OPS (vfloat32m2_t, RVV_REQUIRE_ELEN_FP_32)
 DEF_RVV_X2_VLMUL_EXT_OPS (vfloat32m4_t, RVV_REQUIRE_ELEN_FP_32)
@@ -1036,107 +1060,107 @@ DEF_RVV_X2_VLMUL_EXT_OPS (vfloat64m1_t, RVV_REQUIRE_ELEN_FP_64)
 DEF_RVV_X2_VLMUL_EXT_OPS (vfloat64m2_t, RVV_REQUIRE_ELEN_FP_64)
 DEF_RVV_X2_VLMUL_EXT_OPS (vfloat64m4_t, RVV_REQUIRE_ELEN_FP_64)
 
-DEF_RVV_X4_VLMUL_EXT_OPS (vint8mf8_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_X4_VLMUL_EXT_OPS (vint8mf8_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_X4_VLMUL_EXT_OPS (vint8mf4_t, 0)
 DEF_RVV_X4_VLMUL_EXT_OPS (vint8mf2_t, 0)
 DEF_RVV_X4_VLMUL_EXT_OPS (vint8m1_t, 0)
 DEF_RVV_X4_VLMUL_EXT_OPS (vint8m2_t, 0)
-DEF_RVV_X4_VLMUL_EXT_OPS (vint16mf4_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_X4_VLMUL_EXT_OPS (vint16mf4_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_X4_VLMUL_EXT_OPS (vint16mf2_t, 0)
 DEF_RVV_X4_VLMUL_EXT_OPS (vint16m1_t, 0)
 DEF_RVV_X4_VLMUL_EXT_OPS (vint16m2_t, 0)
-DEF_RVV_X4_VLMUL_EXT_OPS (vint32mf2_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_X4_VLMUL_EXT_OPS (vint32mf2_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_X4_VLMUL_EXT_OPS (vint32m1_t, 0)
 DEF_RVV_X4_VLMUL_EXT_OPS (vint32m2_t, 0)
 DEF_RVV_X4_VLMUL_EXT_OPS (vint64m1_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_X4_VLMUL_EXT_OPS (vint64m2_t, RVV_REQUIRE_ELEN_64)
-DEF_RVV_X4_VLMUL_EXT_OPS (vuint8mf8_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_X4_VLMUL_EXT_OPS (vuint8mf8_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_X4_VLMUL_EXT_OPS (vuint8mf4_t, 0)
 DEF_RVV_X4_VLMUL_EXT_OPS (vuint8mf2_t, 0)
 DEF_RVV_X4_VLMUL_EXT_OPS (vuint8m1_t, 0)
 DEF_RVV_X4_VLMUL_EXT_OPS (vuint8m2_t, 0)
-DEF_RVV_X4_VLMUL_EXT_OPS (vuint16mf4_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_X4_VLMUL_EXT_OPS (vuint16mf4_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_X4_VLMUL_EXT_OPS (vuint16mf2_t, 0)
 DEF_RVV_X4_VLMUL_EXT_OPS (vuint16m1_t, 0)
 DEF_RVV_X4_VLMUL_EXT_OPS (vuint16m2_t, 0)
-DEF_RVV_X4_VLMUL_EXT_OPS (vuint32mf2_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_X4_VLMUL_EXT_OPS (vuint32mf2_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_X4_VLMUL_EXT_OPS (vuint32m1_t, 0)
 DEF_RVV_X4_VLMUL_EXT_OPS (vuint32m2_t, 0)
 DEF_RVV_X4_VLMUL_EXT_OPS (vuint64m1_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_X4_VLMUL_EXT_OPS (vuint64m2_t, RVV_REQUIRE_ELEN_64)
-DEF_RVV_X4_VLMUL_EXT_OPS (vbfloat16mf4_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_X4_VLMUL_EXT_OPS (vbfloat16mf4_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_ELEN_64)
 DEF_RVV_X4_VLMUL_EXT_OPS (vbfloat16mf2_t, RVV_REQUIRE_ELEN_BF_16)
 DEF_RVV_X4_VLMUL_EXT_OPS (vbfloat16m1_t, RVV_REQUIRE_ELEN_BF_16)
 DEF_RVV_X4_VLMUL_EXT_OPS (vbfloat16m2_t, RVV_REQUIRE_ELEN_BF_16)
-DEF_RVV_X4_VLMUL_EXT_OPS (vfloat16mf4_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_X4_VLMUL_EXT_OPS (vfloat16mf4_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_ELEN_64)
 DEF_RVV_X4_VLMUL_EXT_OPS (vfloat16mf2_t, RVV_REQUIRE_ELEN_FP_16)
 DEF_RVV_X4_VLMUL_EXT_OPS (vfloat16m1_t, RVV_REQUIRE_ELEN_FP_16)
 DEF_RVV_X4_VLMUL_EXT_OPS (vfloat16m2_t, RVV_REQUIRE_ELEN_FP_16)
-DEF_RVV_X4_VLMUL_EXT_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_X4_VLMUL_EXT_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_ELEN_64)
 DEF_RVV_X4_VLMUL_EXT_OPS (vfloat32m1_t, RVV_REQUIRE_ELEN_FP_32)
 DEF_RVV_X4_VLMUL_EXT_OPS (vfloat32m2_t, RVV_REQUIRE_ELEN_FP_32)
 DEF_RVV_X4_VLMUL_EXT_OPS (vfloat64m1_t, RVV_REQUIRE_ELEN_FP_64)
 DEF_RVV_X4_VLMUL_EXT_OPS (vfloat64m2_t, RVV_REQUIRE_ELEN_FP_64)
 
-DEF_RVV_X8_VLMUL_EXT_OPS (vint8mf8_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_X8_VLMUL_EXT_OPS (vint8mf8_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_X8_VLMUL_EXT_OPS (vint8mf4_t, 0)
 DEF_RVV_X8_VLMUL_EXT_OPS (vint8mf2_t, 0)
 DEF_RVV_X8_VLMUL_EXT_OPS (vint8m1_t, 0)
-DEF_RVV_X8_VLMUL_EXT_OPS (vint16mf4_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_X8_VLMUL_EXT_OPS (vint16mf4_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_X8_VLMUL_EXT_OPS (vint16mf2_t, 0)
 DEF_RVV_X8_VLMUL_EXT_OPS (vint16m1_t, 0)
-DEF_RVV_X8_VLMUL_EXT_OPS (vint32mf2_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_X8_VLMUL_EXT_OPS (vint32mf2_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_X8_VLMUL_EXT_OPS (vint32m1_t, 0)
 DEF_RVV_X8_VLMUL_EXT_OPS (vint64m1_t, RVV_REQUIRE_ELEN_64)
-DEF_RVV_X8_VLMUL_EXT_OPS (vuint8mf8_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_X8_VLMUL_EXT_OPS (vuint8mf8_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_X8_VLMUL_EXT_OPS (vuint8mf4_t, 0)
 DEF_RVV_X8_VLMUL_EXT_OPS (vuint8mf2_t, 0)
 DEF_RVV_X8_VLMUL_EXT_OPS (vuint8m1_t, 0)
-DEF_RVV_X8_VLMUL_EXT_OPS (vuint16mf4_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_X8_VLMUL_EXT_OPS (vuint16mf4_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_X8_VLMUL_EXT_OPS (vuint16mf2_t, 0)
 DEF_RVV_X8_VLMUL_EXT_OPS (vuint16m1_t, 0)
-DEF_RVV_X8_VLMUL_EXT_OPS (vuint32mf2_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_X8_VLMUL_EXT_OPS (vuint32mf2_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_X8_VLMUL_EXT_OPS (vuint32m1_t, 0)
 DEF_RVV_X8_VLMUL_EXT_OPS (vuint64m1_t, RVV_REQUIRE_ELEN_64)
-DEF_RVV_X8_VLMUL_EXT_OPS (vbfloat16mf4_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_X8_VLMUL_EXT_OPS (vbfloat16mf4_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_ELEN_64)
 DEF_RVV_X8_VLMUL_EXT_OPS (vbfloat16mf2_t, RVV_REQUIRE_ELEN_BF_16)
 DEF_RVV_X8_VLMUL_EXT_OPS (vbfloat16m1_t, RVV_REQUIRE_ELEN_BF_16)
-DEF_RVV_X8_VLMUL_EXT_OPS (vfloat16mf4_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_X8_VLMUL_EXT_OPS (vfloat16mf4_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_ELEN_64)
 DEF_RVV_X8_VLMUL_EXT_OPS (vfloat16mf2_t, RVV_REQUIRE_ELEN_FP_16)
 DEF_RVV_X8_VLMUL_EXT_OPS (vfloat16m1_t, RVV_REQUIRE_ELEN_FP_16)
-DEF_RVV_X8_VLMUL_EXT_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_X8_VLMUL_EXT_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_ELEN_64)
 DEF_RVV_X8_VLMUL_EXT_OPS (vfloat32m1_t, RVV_REQUIRE_ELEN_FP_32)
 DEF_RVV_X8_VLMUL_EXT_OPS (vfloat64m1_t, RVV_REQUIRE_ELEN_FP_64)
 
-DEF_RVV_X16_VLMUL_EXT_OPS (vint8mf8_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_X16_VLMUL_EXT_OPS (vint8mf8_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_X16_VLMUL_EXT_OPS (vint8mf4_t, 0)
 DEF_RVV_X16_VLMUL_EXT_OPS (vint8mf2_t, 0)
-DEF_RVV_X16_VLMUL_EXT_OPS (vint16mf4_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_X16_VLMUL_EXT_OPS (vint16mf4_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_X16_VLMUL_EXT_OPS (vint16mf2_t, 0)
-DEF_RVV_X16_VLMUL_EXT_OPS (vint32mf2_t, RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_X16_VLMUL_EXT_OPS (vuint8mf8_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_X16_VLMUL_EXT_OPS (vint32mf2_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_X16_VLMUL_EXT_OPS (vuint8mf8_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_X16_VLMUL_EXT_OPS (vuint8mf4_t, 0)
 DEF_RVV_X16_VLMUL_EXT_OPS (vuint8mf2_t, 0)
-DEF_RVV_X16_VLMUL_EXT_OPS (vuint16mf4_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_X16_VLMUL_EXT_OPS (vuint16mf4_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_X16_VLMUL_EXT_OPS (vuint16mf2_t, 0)
-DEF_RVV_X16_VLMUL_EXT_OPS (vuint32mf2_t, RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_X16_VLMUL_EXT_OPS (vbfloat16mf4_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_X16_VLMUL_EXT_OPS (vuint32mf2_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_X16_VLMUL_EXT_OPS (vbfloat16mf4_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_ELEN_64)
 DEF_RVV_X16_VLMUL_EXT_OPS (vbfloat16mf2_t, RVV_REQUIRE_ELEN_BF_16)
-DEF_RVV_X16_VLMUL_EXT_OPS (vfloat16mf4_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_X16_VLMUL_EXT_OPS (vfloat16mf4_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_ELEN_64)
 DEF_RVV_X16_VLMUL_EXT_OPS (vfloat16mf2_t, RVV_REQUIRE_ELEN_FP_16)
-DEF_RVV_X16_VLMUL_EXT_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_X16_VLMUL_EXT_OPS (vfloat32mf2_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_ELEN_64)
 
-DEF_RVV_X32_VLMUL_EXT_OPS (vint8mf8_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_X32_VLMUL_EXT_OPS (vint8mf8_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_X32_VLMUL_EXT_OPS (vint8mf4_t, 0)
-DEF_RVV_X32_VLMUL_EXT_OPS (vint16mf4_t, RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_X32_VLMUL_EXT_OPS (vuint8mf8_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_X32_VLMUL_EXT_OPS (vint16mf4_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_X32_VLMUL_EXT_OPS (vuint8mf8_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_X32_VLMUL_EXT_OPS (vuint8mf4_t, 0)
-DEF_RVV_X32_VLMUL_EXT_OPS (vuint16mf4_t, RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_X32_VLMUL_EXT_OPS (vbfloat16mf4_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_X32_VLMUL_EXT_OPS (vfloat16mf4_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_X32_VLMUL_EXT_OPS (vuint16mf4_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_X32_VLMUL_EXT_OPS (vbfloat16mf4_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_ELEN_64)
+DEF_RVV_X32_VLMUL_EXT_OPS (vfloat16mf4_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_ELEN_64)
 
-DEF_RVV_X64_VLMUL_EXT_OPS (vint8mf8_t, RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_X64_VLMUL_EXT_OPS (vuint8mf8_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_X64_VLMUL_EXT_OPS (vint8mf8_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_X64_VLMUL_EXT_OPS (vuint8mf8_t, RVV_REQUIRE_ELEN_64)
 
 DEF_RVV_LMUL1_OPS (vint8m1_t, 0)
 DEF_RVV_LMUL1_OPS (vint16m1_t, 0)
@@ -1177,20 +1201,20 @@ DEF_RVV_LMUL4_OPS (vbfloat16m4_t, RVV_REQUIRE_ELEN_BF_16)
 DEF_RVV_LMUL4_OPS (vfloat32m4_t, RVV_REQUIRE_ELEN_FP_32)
 DEF_RVV_LMUL4_OPS (vfloat64m4_t, RVV_REQUIRE_ELEN_FP_64)
 
-DEF_RVV_TUPLE_OPS (vint8mf8x2_t, RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vuint8mf8x2_t, RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vint8mf8x3_t, RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vuint8mf8x3_t, RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vint8mf8x4_t, RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vuint8mf8x4_t, RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vint8mf8x5_t, RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vuint8mf8x5_t, RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vint8mf8x6_t, RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vuint8mf8x6_t, RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vint8mf8x7_t, RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vuint8mf8x7_t, RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vint8mf8x8_t, RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vuint8mf8x8_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_TUPLE_OPS (vint8mf8x2_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vuint8mf8x2_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vint8mf8x3_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vuint8mf8x3_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vint8mf8x4_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vuint8mf8x4_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vint8mf8x5_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vuint8mf8x5_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vint8mf8x6_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vuint8mf8x6_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vint8mf8x7_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vuint8mf8x7_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vint8mf8x8_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vuint8mf8x8_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_TUPLE_OPS (vint8mf4x2_t, 0)
 DEF_RVV_TUPLE_OPS (vuint8mf4x2_t, 0)
 DEF_RVV_TUPLE_OPS (vint8mf4x3_t, 0)
@@ -1241,20 +1265,20 @@ DEF_RVV_TUPLE_OPS (vint8m2x4_t, 0)
 DEF_RVV_TUPLE_OPS (vuint8m2x4_t, 0)
 DEF_RVV_TUPLE_OPS (vint8m4x2_t, 0)
 DEF_RVV_TUPLE_OPS (vuint8m4x2_t, 0)
-DEF_RVV_TUPLE_OPS (vint16mf4x2_t, RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vuint16mf4x2_t, RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vint16mf4x3_t, RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vuint16mf4x3_t, RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vint16mf4x4_t, RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vuint16mf4x4_t, RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vint16mf4x5_t, RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vuint16mf4x5_t, RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vint16mf4x6_t, RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vuint16mf4x6_t, RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vint16mf4x7_t, RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vuint16mf4x7_t, RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vint16mf4x8_t, RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vuint16mf4x8_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_TUPLE_OPS (vint16mf4x2_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vuint16mf4x2_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vint16mf4x3_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vuint16mf4x3_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vint16mf4x4_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vuint16mf4x4_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vint16mf4x5_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vuint16mf4x5_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vint16mf4x6_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vuint16mf4x6_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vint16mf4x7_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vuint16mf4x7_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vint16mf4x8_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vuint16mf4x8_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_TUPLE_OPS (vint16mf2x2_t, 0)
 DEF_RVV_TUPLE_OPS (vuint16mf2x2_t, 0)
 DEF_RVV_TUPLE_OPS (vint16mf2x3_t, 0)
@@ -1291,20 +1315,20 @@ DEF_RVV_TUPLE_OPS (vint16m2x4_t, 0)
 DEF_RVV_TUPLE_OPS (vuint16m2x4_t, 0)
 DEF_RVV_TUPLE_OPS (vint16m4x2_t, 0)
 DEF_RVV_TUPLE_OPS (vuint16m4x2_t, 0)
-DEF_RVV_TUPLE_OPS (vint32mf2x2_t, RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vuint32mf2x2_t, RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vint32mf2x3_t, RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vuint32mf2x3_t, RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vint32mf2x4_t, RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vuint32mf2x4_t, RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vint32mf2x5_t, RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vuint32mf2x5_t, RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vint32mf2x6_t, RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vuint32mf2x6_t, RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vint32mf2x7_t, RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vuint32mf2x7_t, RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vint32mf2x8_t, RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vuint32mf2x8_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_TUPLE_OPS (vint32mf2x2_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vuint32mf2x2_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vint32mf2x3_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vuint32mf2x3_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vint32mf2x4_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vuint32mf2x4_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vint32mf2x5_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vuint32mf2x5_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vint32mf2x6_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vuint32mf2x6_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vint32mf2x7_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vuint32mf2x7_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vint32mf2x8_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vuint32mf2x8_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_TUPLE_OPS (vint32m1x2_t, 0)
 DEF_RVV_TUPLE_OPS (vuint32m1x2_t, 0)
 DEF_RVV_TUPLE_OPS (vint32m1x3_t, 0)
@@ -1349,13 +1373,13 @@ DEF_RVV_TUPLE_OPS (vint64m2x4_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_TUPLE_OPS (vuint64m2x4_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_TUPLE_OPS (vint64m4x2_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_TUPLE_OPS (vuint64m4x2_t, RVV_REQUIRE_ELEN_64)
-DEF_RVV_TUPLE_OPS (vbfloat16mf4x2_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vbfloat16mf4x3_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vbfloat16mf4x4_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vbfloat16mf4x5_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vbfloat16mf4x6_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vbfloat16mf4x7_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vbfloat16mf4x8_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_TUPLE_OPS (vbfloat16mf4x2_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vbfloat16mf4x3_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vbfloat16mf4x4_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vbfloat16mf4x5_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vbfloat16mf4x6_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vbfloat16mf4x7_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vbfloat16mf4x8_t, RVV_REQUIRE_ELEN_BF_16 | RVV_REQUIRE_ELEN_64)
 DEF_RVV_TUPLE_OPS (vbfloat16mf2x2_t, RVV_REQUIRE_ELEN_BF_16)
 DEF_RVV_TUPLE_OPS (vbfloat16mf2x3_t, RVV_REQUIRE_ELEN_BF_16)
 DEF_RVV_TUPLE_OPS (vbfloat16mf2x4_t, RVV_REQUIRE_ELEN_BF_16)
@@ -1374,13 +1398,13 @@ DEF_RVV_TUPLE_OPS (vbfloat16m2x2_t, RVV_REQUIRE_ELEN_BF_16)
 DEF_RVV_TUPLE_OPS (vbfloat16m2x3_t, RVV_REQUIRE_ELEN_BF_16)
 DEF_RVV_TUPLE_OPS (vbfloat16m2x4_t, RVV_REQUIRE_ELEN_BF_16)
 DEF_RVV_TUPLE_OPS (vbfloat16m4x2_t, RVV_REQUIRE_ELEN_BF_16)
-DEF_RVV_TUPLE_OPS (vfloat16mf4x2_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vfloat16mf4x3_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vfloat16mf4x4_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vfloat16mf4x5_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vfloat16mf4x6_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vfloat16mf4x7_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vfloat16mf4x8_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_TUPLE_OPS (vfloat16mf4x2_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vfloat16mf4x3_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vfloat16mf4x4_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vfloat16mf4x5_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vfloat16mf4x6_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vfloat16mf4x7_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vfloat16mf4x8_t, RVV_REQUIRE_ELEN_FP_16 | RVV_REQUIRE_ELEN_64)
 DEF_RVV_TUPLE_OPS (vfloat16mf2x2_t, RVV_REQUIRE_ELEN_FP_16)
 DEF_RVV_TUPLE_OPS (vfloat16mf2x3_t, RVV_REQUIRE_ELEN_FP_16)
 DEF_RVV_TUPLE_OPS (vfloat16mf2x4_t, RVV_REQUIRE_ELEN_FP_16)
@@ -1399,13 +1423,13 @@ DEF_RVV_TUPLE_OPS (vfloat16m2x2_t, RVV_REQUIRE_ELEN_FP_16)
 DEF_RVV_TUPLE_OPS (vfloat16m2x3_t, RVV_REQUIRE_ELEN_FP_16)
 DEF_RVV_TUPLE_OPS (vfloat16m2x4_t, RVV_REQUIRE_ELEN_FP_16)
 DEF_RVV_TUPLE_OPS (vfloat16m4x2_t, RVV_REQUIRE_ELEN_FP_16)
-DEF_RVV_TUPLE_OPS (vfloat32mf2x2_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vfloat32mf2x3_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vfloat32mf2x4_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vfloat32mf2x5_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vfloat32mf2x6_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vfloat32mf2x7_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_MIN_VLEN_64)
-DEF_RVV_TUPLE_OPS (vfloat32mf2x8_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_TUPLE_OPS (vfloat32mf2x2_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vfloat32mf2x3_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vfloat32mf2x4_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vfloat32mf2x5_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vfloat32mf2x6_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vfloat32mf2x7_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_ELEN_64)
+DEF_RVV_TUPLE_OPS (vfloat32mf2x8_t, RVV_REQUIRE_ELEN_FP_32 | RVV_REQUIRE_ELEN_64)
 DEF_RVV_TUPLE_OPS (vfloat32m1x2_t, RVV_REQUIRE_ELEN_FP_32)
 DEF_RVV_TUPLE_OPS (vfloat32m1x3_t, RVV_REQUIRE_ELEN_FP_32)
 DEF_RVV_TUPLE_OPS (vfloat32m1x4_t, RVV_REQUIRE_ELEN_FP_32)
@@ -1429,7 +1453,7 @@ DEF_RVV_TUPLE_OPS (vfloat64m2x3_t, RVV_REQUIRE_ELEN_FP_64)
 DEF_RVV_TUPLE_OPS (vfloat64m2x4_t, RVV_REQUIRE_ELEN_FP_64)
 DEF_RVV_TUPLE_OPS (vfloat64m4x2_t, RVV_REQUIRE_ELEN_FP_64)
 
-DEF_RVV_CRYPTO_SEW32_OPS (vuint32mf2_t, RVV_REQUIRE_MIN_VLEN_64)
+DEF_RVV_CRYPTO_SEW32_OPS (vuint32mf2_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_CRYPTO_SEW32_OPS (vuint32m1_t, 0)
 DEF_RVV_CRYPTO_SEW32_OPS (vuint32m2_t, 0)
 DEF_RVV_CRYPTO_SEW32_OPS (vuint32m4_t, 0)
@@ -1440,6 +1464,43 @@ DEF_RVV_CRYPTO_SEW64_OPS (vuint64m2_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_CRYPTO_SEW64_OPS (vuint64m4_t, RVV_REQUIRE_ELEN_64)
 DEF_RVV_CRYPTO_SEW64_OPS (vuint64m8_t, RVV_REQUIRE_ELEN_64)
 
+DEF_RVV_QMACC_OPS (vint32m1_t, 0)
+DEF_RVV_QMACC_OPS (vint32m2_t, 0)
+DEF_RVV_QMACC_OPS (vint32m4_t, 0)
+DEF_RVV_QMACC_OPS (vint32m8_t, 0)
+
+DEF_RVV_XFQF_OPS (vint8mf8_t, 0)
+DEF_RVV_XFQF_OPS (vint8mf4_t, 0)
+DEF_RVV_XFQF_OPS (vint8mf2_t, 0)
+DEF_RVV_XFQF_OPS (vint8m1_t, 0)
+DEF_RVV_XFQF_OPS (vint8m2_t, 0)
+
+DEF_RVV_X2_U_OPS (vuint8mf8_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_X2_U_OPS (vuint8mf4_t, 0)
+DEF_RVV_X2_U_OPS (vuint8mf2_t, 0)
+DEF_RVV_X2_U_OPS (vuint8m1_t, 0)
+DEF_RVV_X2_U_OPS (vuint8m2_t, 0)
+DEF_RVV_X2_U_OPS (vuint8m4_t, 0)
+DEF_RVV_X2_U_OPS (vuint16mf4_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_X2_U_OPS (vuint16mf2_t, 0)
+DEF_RVV_X2_U_OPS (vuint16m1_t, 0)
+DEF_RVV_X2_U_OPS (vuint16m2_t, 0)
+DEF_RVV_X2_U_OPS (vuint16m4_t, 0)
+DEF_RVV_X2_U_OPS (vuint32mf2_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_X2_U_OPS (vuint32m1_t, 0)
+DEF_RVV_X2_U_OPS (vuint32m2_t, 0)
+DEF_RVV_X2_U_OPS (vuint32m4_t, 0)
+
+DEF_RVV_X2_WU_OPS (vuint16mf4_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_X2_WU_OPS (vuint16mf2_t, 0)
+DEF_RVV_X2_WU_OPS (vuint16m1_t, 0)
+DEF_RVV_X2_WU_OPS (vuint16m2_t, 0)
+DEF_RVV_X2_WU_OPS (vuint16m4_t, 0)
+DEF_RVV_X2_WU_OPS (vuint32mf2_t, RVV_REQUIRE_ELEN_64)
+DEF_RVV_X2_WU_OPS (vuint32m1_t, 0)
+DEF_RVV_X2_WU_OPS (vuint32m2_t, 0)
+DEF_RVV_X2_WU_OPS (vuint32m4_t, 0)
+
 #undef DEF_RVV_I_OPS
 #undef DEF_RVV_U_OPS
 #undef DEF_RVV_F_OPS
@@ -1494,3 +1555,7 @@ DEF_RVV_CRYPTO_SEW64_OPS (vuint64m8_t, RVV_REQUIRE_ELEN_64)
 #undef DEF_RVV_CRYPTO_SEW32_OPS
 #undef DEF_RVV_CRYPTO_SEW64_OPS
 #undef DEF_RVV_F32_OPS
+#undef DEF_RVV_QMACC_OPS
+#undef DEF_RVV_XFQF_OPS
+#undef DEF_RVV_X2_U_OPS
+#undef DEF_RVV_X2_WU_OPS
diff --git a/gcc/config/riscv/riscv-vector-builtins.cc b/gcc/config/riscv/riscv-vector-builtins.cc
index 41730c4..8810af0 100644
--- a/gcc/config/riscv/riscv-vector-builtins.cc
+++ b/gcc/config/riscv/riscv-vector-builtins.cc
@@ -1,5 +1,5 @@
 /* Builtins implementation for RISC-V 'V' Extension for GNU compiler.
-   Copyright (C) 2022-2024 Free Software Foundation, Inc.
+   Copyright (C) 2022-2025 Free Software Foundation, Inc.
    Contributed by Ju-Zhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd.
 
    This file is part of GCC.
@@ -51,6 +51,7 @@
 #include "riscv-vector-builtins.h"
 #include "riscv-vector-builtins-shapes.h"
 #include "riscv-vector-builtins-bases.h"
+#include "sifive-vector-builtins-bases.h"
 
 using namespace riscv_vector;
 
@@ -543,6 +544,33 @@ static const rvv_type_info crypto_sew64_ops[] = {
 #include "riscv-vector-builtins-types.def"
   {NUM_VECTOR_TYPES, 0}};
 
+/* A list of signed integer will be registered for Sifive Xsfvcp intrinsic*/
+/* functions.  */
+static const rvv_type_info x2_u_ops[] = {
+#define DEF_RVV_X2_U_OPS(TYPE, REQUIRE) {VECTOR_TYPE_##TYPE, REQUIRE},
+#include "riscv-vector-builtins-types.def"
+  {NUM_VECTOR_TYPES, 0}};
+
+/* A list of signed integer will be registered for Sifive Xsfvcp intrinsic*/
+/* functions.  */
+static const rvv_type_info x2_wu_ops[] = {
+#define DEF_RVV_X2_WU_OPS(TYPE, REQUIRE) {VECTOR_TYPE_##TYPE, REQUIRE},
+#include "riscv-vector-builtins-types.def"
+  {NUM_VECTOR_TYPES, 0}};
+
+/* A list of signed integer will be registered for intrinsic
+ * functions.  */
+static const rvv_type_info qmacc_ops[] = {
+#define DEF_RVV_QMACC_OPS(TYPE, REQUIRE) {VECTOR_TYPE_##TYPE, REQUIRE},
+#include "riscv-vector-builtins-types.def"
+  {NUM_VECTOR_TYPES, 0}};
+
+/* A list of signed integer will be registered for intrinsic functions. */
+static const rvv_type_info xfqf_ops[] = {
+#define DEF_RVV_XFQF_OPS(TYPE, REQUIRE) {VECTOR_TYPE_##TYPE, REQUIRE},
+#include "riscv-vector-builtins-types.def"
+  {NUM_VECTOR_TYPES, 0}};
+
 static CONSTEXPR const rvv_arg_type_info rvv_arg_type_info_end
   = rvv_arg_type_info (NUM_BASE_TYPES);
 
@@ -711,6 +739,11 @@ static CONSTEXPR const rvv_arg_type_info shift_wv_args[]
      rvv_arg_type_info (RVV_BASE_double_trunc_unsigned_vector),
      rvv_arg_type_info_end};
 
+static CONSTEXPR const rvv_arg_type_info clip_args[]
+  = {rvv_arg_type_info (RVV_BASE_xfqf_vector),
+     rvv_arg_type_info (RVV_BASE_xfqf_float),
+     rvv_arg_type_info_end};
+
 /* A list of args for vector_type func (vector_type) function.  */
 static CONSTEXPR const rvv_arg_type_info v_args[]
   = {rvv_arg_type_info (RVV_BASE_vector), rvv_arg_type_info_end};
@@ -786,7 +819,7 @@ static CONSTEXPR const rvv_arg_type_info bf_wwxv_args[]
 static CONSTEXPR const rvv_arg_type_info m_args[]
   = {rvv_arg_type_info (RVV_BASE_mask), rvv_arg_type_info_end};
 
-/* A list of args for vector_type func (scalar_type) function.  */
+/* A list of args for vector_type func (scalar_type/sf.vc) function.  */
 static CONSTEXPR const rvv_arg_type_info x_args[]
   = {rvv_arg_type_info (RVV_BASE_scalar), rvv_arg_type_info_end};
 
@@ -854,6 +887,54 @@ static CONSTEXPR const rvv_arg_type_info us_wwxv_args[]
      rvv_arg_type_info (RVV_BASE_double_trunc_vector),
      rvv_arg_type_info_end};
 
+/* A static operand information for vector_type func (vector_type, quad lmul1
+ * type, quad half lmul type) function registration. */
+static CONSTEXPR const rvv_arg_type_info qqvv_args[]
+  = {rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info (RVV_BASE_quad_lmul1_vector),
+     rvv_arg_type_info (RVV_BASE_quad_emul_vector), rvv_arg_type_info_end};
+
+static CONSTEXPR const rvv_arg_type_info uqqvv_args[]
+  = {rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info (RVV_BASE_quad_lmul1_unsigned_vector),
+     rvv_arg_type_info (RVV_BASE_quad_emul_unsigned_vector),
+     rvv_arg_type_info_end};
+
+static CONSTEXPR const rvv_arg_type_info su_qqvv_args[]
+  = {rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info (RVV_BASE_quad_lmul1_vector),
+     rvv_arg_type_info (RVV_BASE_quad_emul_unsigned_vector),
+     rvv_arg_type_info_end};
+
+static CONSTEXPR const rvv_arg_type_info us_qqvv_args[]
+  = {rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info (RVV_BASE_quad_lmul1_unsigned_vector),
+     rvv_arg_type_info (RVV_BASE_quad_emul_vector), rvv_arg_type_info_end};
+
+/* A static operand information for vector_type func (vector_type, quad lmul1
+ * type, quad emul type) function registration. */
+static CONSTEXPR const rvv_arg_type_info qdvv_args[]
+  = {rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info (RVV_BASE_quad_lmul1_vector),
+     rvv_arg_type_info (RVV_BASE_quad_fixed_vector), rvv_arg_type_info_end};
+
+static CONSTEXPR const rvv_arg_type_info uqdvv_args[]
+  = {rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info (RVV_BASE_quad_lmul1_unsigned_vector),
+     rvv_arg_type_info (RVV_BASE_quad_fixed_unsigned_vector),
+     rvv_arg_type_info_end};
+
+static CONSTEXPR const rvv_arg_type_info su_qdvv_args[]
+  = {rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info (RVV_BASE_quad_lmul1_vector),
+     rvv_arg_type_info (RVV_BASE_quad_fixed_unsigned_vector),
+     rvv_arg_type_info_end};
+
+static CONSTEXPR const rvv_arg_type_info us_qdvv_args[]
+  = {rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info (RVV_BASE_quad_lmul1_unsigned_vector),
+     rvv_arg_type_info (RVV_BASE_quad_fixed_vector), rvv_arg_type_info_end};
+
 /* A list of args for vector_type func (signed double demote type,
  * unsigneddouble demote type) function.  */
 static CONSTEXPR const rvv_arg_type_info su_wvv_args[]
@@ -988,6 +1069,161 @@ static CONSTEXPR const rvv_arg_type_info scalar_ptr_size_args[]
      rvv_arg_type_info (RVV_BASE_size), rvv_arg_type_info (RVV_BASE_vector),
      rvv_arg_type_info_end};
 
+/* A list of args for vector_type func (sf.vc.x) function.  */
+static CONSTEXPR const rvv_arg_type_info sf_vc_x_args[]
+  = {rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.v.x) function.  */
+static CONSTEXPR const rvv_arg_type_info sf_vc_v_x_args[]
+  = {rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.i) function.  */
+static CONSTEXPR const rvv_arg_type_info sf_vc_i_args[]
+  = {rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info_end};
+
+
+/* A list of args for vector_type func (sf.vc.i) function.  */
+static CONSTEXPR const rvv_arg_type_info sf_vc_v_i_args[]
+  = {rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.vv) function.  */
+static CONSTEXPR const rvv_arg_type_info sf_vc_vv_args[]
+  = {rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.v.vv) function.  */
+static CONSTEXPR const rvv_arg_type_info sf_vc_v_vv_args[]
+  = {rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.xv) function.  */
+static CONSTEXPR const rvv_arg_type_info sf_vc_xv_args[]
+  = {rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.v.xv) function.  */
+static CONSTEXPR const rvv_arg_type_info sf_vc_v_xv_args[]
+  = {rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.iv) function.  */
+static CONSTEXPR const rvv_arg_type_info sf_vc_iv_args[]
+  = {rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.v.iv) function.  */
+static CONSTEXPR const rvv_arg_type_info sf_vc_v_iv_args[]
+  = {rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.fv) function.  */
+static CONSTEXPR const rvv_arg_type_info sf_vc_fv_args[]
+  = {rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info (RVV_BASE_scalar_float),
+     rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.v.fv) function.  */
+static CONSTEXPR const rvv_arg_type_info sf_vc_v_fv_args[]
+  = {rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info (RVV_BASE_scalar_float),
+     rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.vvv/sf.vc.v.vvv) function.  */
+static CONSTEXPR const rvv_arg_type_info sf_vc_vvv_args[]
+  = {rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.xvv/sf.vc.v.xvv) function.  */
+static CONSTEXPR const rvv_arg_type_info sf_vc_xvv_args[]
+  = {rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.ivv/sf.vc.v.ivv) function.  */
+static CONSTEXPR const rvv_arg_type_info sf_vc_ivv_args[]
+  = {rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_unsigned_vector),
+     rvv_arg_type_info (RVV_BASE_unsigned_vector),
+     rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.fvv/sf.vc.v.fvv) function.  */
+static CONSTEXPR const rvv_arg_type_info sf_vc_fvv_args[]
+  = {rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info (RVV_BASE_scalar_float),
+     rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.vvw/sf.vc.v.vvw) function.  */
+static CONSTEXPR const rvv_arg_type_info sf_vc_vvw_args[]
+  = {rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_x2_vector),
+     rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.xvw/sf.vc.v.xvw) function.  */
+static CONSTEXPR const rvv_arg_type_info sf_vc_xvw_args[]
+  = {rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_x2_vector),
+     rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.ivw/sf.vc.v.ivw) function.  */
+static CONSTEXPR const rvv_arg_type_info sf_vc_ivw_args[]
+  = {rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_x2_vector),
+     rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info_end};
+
+/* A list of args for vector_type func (sf.vc.fvw/sf.vc.v.fvw) function.  */
+static CONSTEXPR const rvv_arg_type_info sf_vc_fvw_args[]
+  = {rvv_arg_type_info (RVV_BASE_scalar),
+     rvv_arg_type_info (RVV_BASE_x2_vector),
+     rvv_arg_type_info (RVV_BASE_vector),
+     rvv_arg_type_info (RVV_BASE_scalar_float),
+     rvv_arg_type_info_end};
+
 /* A list of none preds that will be registered for intrinsic functions.  */
 static CONSTEXPR const predication_type_index none_preds[]
   = {PRED_TYPE_none, NUM_PRED_TYPES};
@@ -2278,6 +2514,70 @@ static CONSTEXPR const rvv_op_info i_us_wwxv_ops
      rvv_arg_type_info (RVV_BASE_vector), /* Return type */
      us_wwxv_args /* Args */};
 
+/* A static operand information for vector_type func (vector_type, quad demote
+ * type, quad demote type) function registration. */
+static CONSTEXPR const rvv_op_info i_qqvv_ops
+  = {qmacc_ops,				  /* Types */
+     OP_TYPE_4x8x4,			  /* Suffix */
+     rvv_arg_type_info (RVV_BASE_vector), /* Return type */
+     qqvv_args /* Args */};
+
+/* A static operand information for vector_type func (vector_type, quad demote
+ * type, quad demote type) function registration. */
+static CONSTEXPR const rvv_op_info u_qqvv_ops
+  = {qmacc_ops,				  /* Types */
+     OP_TYPE_4x8x4,			  /* Suffix */
+     rvv_arg_type_info (RVV_BASE_vector), /* Return type */
+     uqqvv_args /* Args */};
+
+/* A static operand information for vector_type func (vector_type, quad demote
+ * type, quad demote type) function registration. */
+static CONSTEXPR const rvv_op_info i_su_qqvv_ops
+  = {qmacc_ops,				  /* Types */
+     OP_TYPE_4x8x4,			  /* Suffix */
+     rvv_arg_type_info (RVV_BASE_vector), /* Return type */
+     su_qqvv_args /* Args */};
+
+/* A static operand information for vector_type func (vector_type, quad demote
+ * type, quad demote type) function registration. */
+static CONSTEXPR const rvv_op_info i_us_qqvv_ops
+  = {qmacc_ops,				  /* Types */
+     OP_TYPE_4x8x4,			  /* Suffix */
+     rvv_arg_type_info (RVV_BASE_vector), /* Return type */
+     us_qqvv_args /* Args */};
+
+/* A static operand information for vector_type func (vector_type, quad demote
+ * type, quad demote type) function registration. */
+static CONSTEXPR const rvv_op_info i_qdvv_ops
+  = {qmacc_ops,				  /* Types */
+     OP_TYPE_2x8x2,			  /* Suffix */
+     rvv_arg_type_info (RVV_BASE_vector), /* Return type */
+     qdvv_args /* Args */};
+
+/* A static operand information for vector_type func (vector_type, quad demote
+ * type, quad demote type) function registration. */
+static CONSTEXPR const rvv_op_info u_qdvv_ops
+  = {qmacc_ops,				  /* Types */
+     OP_TYPE_2x8x2,			  /* Suffix */
+     rvv_arg_type_info (RVV_BASE_vector), /* Return type */
+     uqdvv_args /* Args */};
+
+/* A static operand information for vector_type func (vector_type, quad demote
+ * type, quad demote type) function registration. */
+static CONSTEXPR const rvv_op_info i_su_qdvv_ops
+  = {qmacc_ops,				  /* Types */
+     OP_TYPE_2x8x2,			  /* Suffix */
+     rvv_arg_type_info (RVV_BASE_vector), /* Return type */
+     su_qdvv_args /* Args */};
+
+/* A static operand information for vector_type func (vector_type, quad demote
+ * type, quad demote type) function registration. */
+static CONSTEXPR const rvv_op_info i_us_qdvv_ops
+  = {qmacc_ops,				  /* Types */
+     OP_TYPE_2x8x2,			  /* Suffix */
+     rvv_arg_type_info (RVV_BASE_vector), /* Return type */
+     us_qdvv_args /* Args */};
+
 /* A static operand information for vector_type func (signed double demote type,
  * unsigned double demote type) function registration. */
 static CONSTEXPR const rvv_op_info i_su_wvv_ops
@@ -2423,6 +2723,22 @@ static CONSTEXPR const rvv_op_info i_narrow_shift_vwx_ops
      v_size_args /* Args */};
 
 /* A static operand information for double demote type func (vector_type,
+ * shift_type) function registration. */
+static CONSTEXPR const rvv_op_info u_clip_qf_ops
+  = {xfqf_ops,				      /* Types */
+     OP_TYPE_none,			      /* Suffix */
+     rvv_arg_type_info (RVV_BASE_unsigned_vector), /* Return type */
+     clip_args /* Args */};
+
+/* A static operand information for double demote type func (vector_type,
+ * shift_type) function registration. */
+static CONSTEXPR const rvv_op_info i_clip_qf_ops
+  = {xfqf_ops,					     /* Types */
+     OP_TYPE_none,				     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_vector), /* Return type */
+     clip_args /* Args */};
+
+/* A static operand information for double demote type func (vector_type,
  * size_t) function registration. */
 static CONSTEXPR const rvv_op_info u_narrow_shift_vwx_ops
   = {wextu_ops,					       /* Types */
@@ -2859,23 +3175,192 @@ static CONSTEXPR const rvv_op_info u_vvvv_crypto_sew64_ops
      rvv_arg_type_info (RVV_BASE_vector), /* Return type */
      vvv_args /* Args */};
 
+static CONSTEXPR const rvv_op_info sf_vc_x_ops
+  = {full_v_u_ops,					     /* Types */
+     OP_TYPE_x,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_void), /* Return type */
+     sf_vc_x_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_v_x_ops
+  = {full_v_u_ops,					     /* Types */
+     OP_TYPE_v_x,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_vector), /* Return type */
+     sf_vc_v_x_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_i_ops
+  = {full_v_u_ops,					     /* Types */
+     OP_TYPE_i,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_void), /* Return type */
+     sf_vc_i_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_v_i_ops
+  = {full_v_u_ops,					     /* Types */
+     OP_TYPE_v_i,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_vector), /* Return type */
+     sf_vc_v_i_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_vv_ops
+  = {full_v_u_ops,					     /* Types */
+     OP_TYPE_vv,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_void), /* Return type */
+     sf_vc_vv_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_v_vv_ops
+  = {full_v_u_ops,					     /* Types */
+     OP_TYPE_v_vv,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_vector), /* Return type */
+     sf_vc_v_vv_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_xv_ops
+  = {full_v_u_ops,					     /* Types */
+     OP_TYPE_xv,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_void), /* Return type */
+     sf_vc_xv_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_v_xv_ops
+  = {full_v_u_ops,					     /* Types */
+     OP_TYPE_v_xv,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_vector), /* Return type */
+     sf_vc_v_xv_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_iv_ops
+  = {full_v_u_ops,					     /* Types */
+     OP_TYPE_iv,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_void), /* Return type */
+     sf_vc_iv_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_v_iv_ops
+  = {full_v_u_ops,					     /* Types */
+     OP_TYPE_v_iv,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_vector), /* Return type */
+     sf_vc_v_iv_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_fv_ops
+  = {wextu_ops,					     /* Types */
+     OP_TYPE_fv,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_void), /* Return type */
+     sf_vc_fv_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_v_fv_ops
+  = {wextu_ops,					     /* Types */
+     OP_TYPE_v_fv,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_vector), /* Return type */
+     sf_vc_v_fv_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_vvv_ops
+  = {full_v_u_ops,					     /* Types */
+     OP_TYPE_vvv,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_void), /* Return type */
+     sf_vc_vvv_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_v_vvv_ops
+  = {full_v_u_ops,					     /* Types */
+     OP_TYPE_v_vvv,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_vector), /* Return type */
+     sf_vc_vvv_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_xvv_ops
+  = {full_v_u_ops,					     /* Types */
+     OP_TYPE_xvv,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_void), /* Return type */
+     sf_vc_xvv_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_v_xvv_ops
+  = {full_v_u_ops,					     /* Types */
+     OP_TYPE_v_xvv,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_vector), /* Return type */
+     sf_vc_xvv_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_ivv_ops
+  = {full_v_u_ops,					     /* Types */
+     OP_TYPE_ivv,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_void), /* Return type */
+     sf_vc_ivv_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_v_ivv_ops
+  = {full_v_u_ops,					     /* Types */
+     OP_TYPE_v_ivv,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_vector), /* Return type */
+     sf_vc_ivv_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_fvv_ops
+  = {wextu_ops,					     /* Types */
+     OP_TYPE_fvv,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_void), /* Return type */
+     sf_vc_fvv_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_v_fvv_ops
+  = {wextu_ops,					     /* Types */
+     OP_TYPE_v_fvv,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_vector), /* Return type */
+     sf_vc_fvv_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_vvw_ops
+  = {x2_u_ops,					     /* Types */
+     OP_TYPE_vvw,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_void), /* Return type */
+     sf_vc_vvw_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_v_vvw_ops
+  = {x2_u_ops,					     /* Types */
+     OP_TYPE_v_vvw,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_x2_vector), /* Return type */
+     sf_vc_vvw_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_xvw_ops
+  = {x2_u_ops,					     /* Types */
+     OP_TYPE_xvw,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_void), /* Return type */
+     sf_vc_xvw_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_v_xvw_ops
+  = {x2_u_ops,					     /* Types */
+     OP_TYPE_v_xvw,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_x2_vector), /* Return type */
+     sf_vc_xvw_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_ivw_ops
+  = {x2_u_ops,					     /* Types */
+     OP_TYPE_ivw,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_void), /* Return type */
+     sf_vc_ivw_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_v_ivw_ops
+  = {x2_u_ops,					     /* Types */
+     OP_TYPE_v_ivw,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_x2_vector), /* Return type */
+     sf_vc_ivw_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_fvw_ops
+  = {x2_wu_ops,					     /* Types */
+     OP_TYPE_fvw,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_void), /* Return type */
+     sf_vc_fvw_args /* Args */};
+
+static CONSTEXPR const rvv_op_info sf_vc_v_fvw_ops
+  = {x2_wu_ops,					     /* Types */
+     OP_TYPE_v_fvw,					     /* Suffix */
+     rvv_arg_type_info (RVV_BASE_x2_vector), /* Return type */
+     sf_vc_fvw_args /* Args */};
+
 /* A list of all RVV base function types.  */
 static CONSTEXPR const function_type_info function_types[] = {
 #define DEF_RVV_TYPE_INDEX(                                                    \
-  VECTOR, MASK, SIGNED, UNSIGNED, EEW8_INDEX, EEW16_INDEX, EEW32_INDEX,        \
-  EEW64_INDEX, SHIFT, DOUBLE_TRUNC, QUAD_TRUNC, OCT_TRUNC,                     \
-  DOUBLE_TRUNC_SCALAR, DOUBLE_TRUNC_SIGNED, DOUBLE_TRUNC_UNSIGNED,             \
-  DOUBLE_TRUNC_UNSIGNED_SCALAR, DOUBLE_TRUNC_BFLOAT_SCALAR,			\
-  DOUBLE_TRUNC_BFLOAT, DOUBLE_TRUNC_FLOAT, FLOAT, LMUL1, WLMUL1,		\
-  EEW8_INTERPRET, EEW16_INTERPRET, EEW32_INTERPRET, EEW64_INTERPRET,           \
-  BOOL1_INTERPRET, BOOL2_INTERPRET, BOOL4_INTERPRET, BOOL8_INTERPRET,          \
-  BOOL16_INTERPRET, BOOL32_INTERPRET, BOOL64_INTERPRET,                        \
-  SIGNED_EEW8_LMUL1_INTERPRET, SIGNED_EEW16_LMUL1_INTERPRET,                   \
-  SIGNED_EEW32_LMUL1_INTERPRET, SIGNED_EEW64_LMUL1_INTERPRET,                  \
-  UNSIGNED_EEW8_LMUL1_INTERPRET, UNSIGNED_EEW16_LMUL1_INTERPRET,               \
-  UNSIGNED_EEW32_LMUL1_INTERPRET, UNSIGNED_EEW64_LMUL1_INTERPRET,              \
-  X2_VLMUL_EXT, X4_VLMUL_EXT, X8_VLMUL_EXT, X16_VLMUL_EXT, X32_VLMUL_EXT,      \
-  X64_VLMUL_EXT, TUPLE_SUBPART)                                                \
+  VECTOR, MASK, SIGNED, UNSIGNED, SIGNED_EEW8_INDEX, EEW8_INDEX, EEW16_INDEX,  \
+  EEW32_INDEX, EEW64_INDEX, SHIFT, DOUBLE_TRUNC, QUAD_TRUNC, QUAD_EMUL,        \
+  QUAD_EMUL_SIGNED, QUAD_EMUL_UNSIGNED, QUAD_FIX, QUAD_FIX_SIGNED,             \
+  QUAD_FIX_UNSIGNED, OCT_TRUNC, DOUBLE_TRUNC_SCALAR, DOUBLE_TRUNC_SIGNED,      \
+  DOUBLE_TRUNC_UNSIGNED, DOUBLE_TRUNC_UNSIGNED_SCALAR,                         \
+  DOUBLE_TRUNC_BFLOAT_SCALAR, DOUBLE_TRUNC_BFLOAT, DOUBLE_TRUNC_FLOAT, FLOAT,  \
+  LMUL1, WLMUL1, QLMUL1, QLMUL1_SIGNED, QLMUL1_UNSIGNED, XFQF, EEW8_INTERPRET, \
+  EEW16_INTERPRET, EEW32_INTERPRET, EEW64_INTERPRET, BOOL1_INTERPRET,          \
+  BOOL2_INTERPRET, BOOL4_INTERPRET, BOOL8_INTERPRET, BOOL16_INTERPRET,         \
+  BOOL32_INTERPRET, BOOL64_INTERPRET, SIGNED_EEW8_LMUL1_INTERPRET,             \
+  SIGNED_EEW16_LMUL1_INTERPRET, SIGNED_EEW32_LMUL1_INTERPRET,                  \
+  SIGNED_EEW64_LMUL1_INTERPRET, UNSIGNED_EEW8_LMUL1_INTERPRET,                 \
+  UNSIGNED_EEW16_LMUL1_INTERPRET, UNSIGNED_EEW32_LMUL1_INTERPRET,              \
+  UNSIGNED_EEW64_LMUL1_INTERPRET, X2, X2_VLMUL_EXT, X4_VLMUL_EXT, X8_VLMUL_EXT,\
+  X16_VLMUL_EXT, X32_VLMUL_EXT, X64_VLMUL_EXT, TUPLE_SUBPART)                  \
   {                                                                            \
     VECTOR_TYPE_##VECTOR,                                                      \
     VECTOR_TYPE_INVALID,                                                       \
@@ -2891,6 +3376,7 @@ static CONSTEXPR const function_type_info function_types[] = {
     VECTOR_TYPE_INVALID,                                                       \
     VECTOR_TYPE_INVALID,                                                       \
     VECTOR_TYPE_INVALID,                                                       \
+    VECTOR_TYPE_##SIGNED_EEW8_INDEX,                                           \
     VECTOR_TYPE_##EEW8_INDEX,                                                  \
     VECTOR_TYPE_##EEW16_INDEX,                                                 \
     VECTOR_TYPE_##EEW32_INDEX,                                                 \
@@ -2898,17 +3384,27 @@ static CONSTEXPR const function_type_info function_types[] = {
     VECTOR_TYPE_##SHIFT,                                                       \
     VECTOR_TYPE_##DOUBLE_TRUNC,                                                \
     VECTOR_TYPE_##QUAD_TRUNC,                                                  \
+    VECTOR_TYPE_##QUAD_EMUL,                                                   \
+    VECTOR_TYPE_##QUAD_EMUL_SIGNED,                                            \
+    VECTOR_TYPE_##QUAD_EMUL_UNSIGNED,                                          \
+    VECTOR_TYPE_##QUAD_FIX,                                                    \
+    VECTOR_TYPE_##QUAD_FIX_SIGNED,                                             \
+    VECTOR_TYPE_##QUAD_FIX_UNSIGNED,                                           \
     VECTOR_TYPE_##OCT_TRUNC,                                                   \
     VECTOR_TYPE_##DOUBLE_TRUNC_SCALAR,                                         \
     VECTOR_TYPE_##DOUBLE_TRUNC_SIGNED,                                         \
     VECTOR_TYPE_##DOUBLE_TRUNC_UNSIGNED,                                       \
     VECTOR_TYPE_##DOUBLE_TRUNC_UNSIGNED_SCALAR,                                \
-    VECTOR_TYPE_##DOUBLE_TRUNC_BFLOAT_SCALAR,					\
-    VECTOR_TYPE_##DOUBLE_TRUNC_BFLOAT,						\
+    VECTOR_TYPE_##DOUBLE_TRUNC_BFLOAT_SCALAR,                                  \
+    VECTOR_TYPE_##DOUBLE_TRUNC_BFLOAT,                                         \
     VECTOR_TYPE_##DOUBLE_TRUNC_FLOAT,                                          \
     VECTOR_TYPE_##FLOAT,                                                       \
     VECTOR_TYPE_##LMUL1,                                                       \
     VECTOR_TYPE_##WLMUL1,                                                      \
+    VECTOR_TYPE_##QLMUL1,                                                      \
+    VECTOR_TYPE_##QLMUL1_SIGNED,                                               \
+    VECTOR_TYPE_##QLMUL1_UNSIGNED,                                             \
+    VECTOR_TYPE_##XFQF,                                                        \
     VECTOR_TYPE_##EEW8_INTERPRET,                                              \
     VECTOR_TYPE_##EEW16_INTERPRET,                                             \
     VECTOR_TYPE_##EEW32_INTERPRET,                                             \
@@ -2928,6 +3424,7 @@ static CONSTEXPR const function_type_info function_types[] = {
     VECTOR_TYPE_##UNSIGNED_EEW16_LMUL1_INTERPRET,                              \
     VECTOR_TYPE_##UNSIGNED_EEW32_LMUL1_INTERPRET,                              \
     VECTOR_TYPE_##UNSIGNED_EEW64_LMUL1_INTERPRET,                              \
+    VECTOR_TYPE_##X2,                                                          \
     VECTOR_TYPE_##X2_VLMUL_EXT,                                                \
     VECTOR_TYPE_##X4_VLMUL_EXT,                                                \
     VECTOR_TYPE_##X8_VLMUL_EXT,                                                \
@@ -2949,6 +3446,9 @@ static function_group_info function_groups[] = {
 #define DEF_RVV_FUNCTION(NAME, SHAPE, PREDS, OPS_INFO)                         \
   {#NAME, &bases::NAME, &shapes::SHAPE, PREDS, OPS_INFO, REQUIRED_EXTENSIONS},
 #include "thead-vector-builtins-functions.def"
+#define DEF_RVV_FUNCTION(NAME, SHAPE, PREDS, OPS_INFO)                         \
+  {#NAME, &bases::NAME, &shapes::SHAPE, PREDS, OPS_INFO, REQUIRED_EXTENSIONS},
+#include "sifive-vector-builtins-functions.def"
 };
 
 /* The RVV types, with their built-in
@@ -3342,26 +3842,26 @@ check_required_extensions (const function_instance &instance)
 	required_extensions |= RVV_REQUIRE_RV64BIT;
     }
 
-  uint64_t riscv_isa_flags = 0;
+  uint64_t isa_flags = 0;
 
   if (TARGET_VECTOR_ELEN_BF_16)
-    riscv_isa_flags |= RVV_REQUIRE_ELEN_BF_16;
+    isa_flags |= RVV_REQUIRE_ELEN_BF_16;
   if (TARGET_VECTOR_ELEN_FP_16)
-    riscv_isa_flags |= RVV_REQUIRE_ELEN_FP_16;
+    isa_flags |= RVV_REQUIRE_ELEN_FP_16;
   if (TARGET_VECTOR_ELEN_FP_32)
-    riscv_isa_flags |= RVV_REQUIRE_ELEN_FP_32;
+    isa_flags |= RVV_REQUIRE_ELEN_FP_32;
   if (TARGET_VECTOR_ELEN_FP_64)
-    riscv_isa_flags |= RVV_REQUIRE_ELEN_FP_64;
+    isa_flags |= RVV_REQUIRE_ELEN_FP_64;
   if (TARGET_VECTOR_ELEN_64)
-    riscv_isa_flags |= RVV_REQUIRE_ELEN_64;
+    isa_flags |= RVV_REQUIRE_ELEN_64;
   if (TARGET_64BIT)
-    riscv_isa_flags |= RVV_REQUIRE_RV64BIT;
+    isa_flags |= RVV_REQUIRE_RV64BIT;
   if (TARGET_FULL_V)
-    riscv_isa_flags |= RVV_REQUIRE_FULL_V;
+    isa_flags |= RVV_REQUIRE_FULL_V;
   if (TARGET_MIN_VLEN > 32)
-    riscv_isa_flags |= RVV_REQUIRE_MIN_VLEN_64;
+    isa_flags |= RVV_REQUIRE_MIN_VLEN_64;
 
-  uint64_t missing_extensions = required_extensions & ~riscv_isa_flags;
+  uint64_t missing_extensions = required_extensions & ~isa_flags;
   if (missing_extensions != 0)
     return false;
   return true;
@@ -3425,6 +3925,37 @@ rvv_arg_type_info::get_scalar_const_ptr_type (vector_type_index type_idx) const
     return builtin_types[type_idx].scalar_const_ptr;
 }
 
+tree
+rvv_arg_type_info::get_xfqf_float_type (vector_type_index type_idx) const
+{
+  /* Convert vint8 types into float types.
+     Note:
+     - According to riscv-vector-builtins-types.def, the index of an unsigned
+       type is always one greater than its corresponding signed type.  */
+  if (type_idx >= VECTOR_TYPE_vint8mf8_t && type_idx <= VECTOR_TYPE_vuint8m2_t)
+    return builtin_types[VECTOR_TYPE_vfloat32m1_t].scalar;
+  else
+    return NULL_TREE;
+}
+
+tree
+rvv_arg_type_info::get_scalar_float_type (vector_type_index type_idx) const
+{
+  /* Convert vint types to their corresponding scalar float types.
+     Note:
+     - According to riscv-vector-builtins-types.def, the index of an unsigned
+       type is always one greater than its corresponding signed type.
+     - Conversion for vint8 types is not required.  */
+  if (type_idx >= VECTOR_TYPE_vint16mf4_t && type_idx <= VECTOR_TYPE_vuint16m8_t)
+    return builtin_types[VECTOR_TYPE_vfloat16m1_t].scalar;
+  else if (type_idx >= VECTOR_TYPE_vint32mf2_t && type_idx <= VECTOR_TYPE_vuint32m8_t)
+    return builtin_types[VECTOR_TYPE_vfloat32m1_t].scalar;
+  else if (type_idx >= VECTOR_TYPE_vint64m1_t && type_idx <= VECTOR_TYPE_vuint64m8_t)
+    return builtin_types[VECTOR_TYPE_vfloat64m1_t].scalar;
+  else
+    return NULL_TREE;
+}
+
 vector_type_index
 rvv_arg_type_info::get_function_type_index (vector_type_index type_idx) const
 {
@@ -3583,7 +4114,7 @@ function_instance::modifies_global_state_p () const
     return true;
 
   /* Handle direct modifications of global state.  */
-  return flags & (CP_WRITE_MEMORY | CP_WRITE_CSR);
+  return flags & (CP_WRITE_MEMORY | CP_WRITE_CSR | CP_USE_COPROCESSORS);
 }
 
 /* Return true if calls to the function could raise a signal.  */
@@ -3843,16 +4374,23 @@ function_builder::add_unique_function (const function_instance &instance,
     {
       /* Attribute lists shouldn't be shared.  */
       tree attrs = get_attributes (instance);
-      bool placeholder_p = !m_direct_overloads;
-      add_function (instance, overload_name, fntype, attrs, placeholder_p, NULL,
-		    vNULL, required);
-
-      /* Enter the function into the non-overloaded hash table.  */
-      hash = rfn.overloaded_hash ();
-      rfn_slot = non_overloaded_function_table->find_slot_with_hash (&rfn, hash,
-								     INSERT);
-      gcc_assert (!*rfn_slot);
-      *rfn_slot = &rfn;
+      if (m_direct_overloads)
+	add_function (instance, overload_name, fntype, attrs, false, NULL,
+		      vNULL, required);
+      else
+	{
+	  if (!non_overloaded_function_table)
+	    non_overloaded_function_table
+	      = new hash_table<non_overloaded_registered_function_hasher> (
+		1023);
+	  /* Enter the function into the non-overloaded hash table.  */
+	  hash = rfn.overloaded_hash ();
+	  rfn_slot
+	    = non_overloaded_function_table->find_slot_with_hash (&rfn, hash,
+								  INSERT);
+	  gcc_assert (!*rfn_slot);
+	  *rfn_slot = &rfn;
+	}
     }
   obstack_free (&m_string_obstack, name);
 }
@@ -3863,6 +4401,9 @@ function_builder::add_overloaded_function (const function_instance &instance,
 					   const function_shape *shape,
 					   enum required_ext required)
 {
+  if (m_direct_overloads)
+    return;
+
   if (!check_required_extensions (instance))
     return;
 
@@ -3873,7 +4414,7 @@ function_builder::add_overloaded_function (const function_instance &instance,
       /* To avoid API conflicting, take void return type and void argument
 	 for the overloaded function.  */
       tree fntype = build_function_type (void_type_node, void_list_node);
-      add_function (instance, name, fntype, NULL_TREE, m_direct_overloads, name,
+      add_function (instance, name, fntype, NULL_TREE, false, name,
 		    vNULL, required, true);
       obstack_free (&m_string_obstack, name);
     }
@@ -4436,6 +4977,12 @@ registered_function::overloaded_hash () const
   for (unsigned int i = 0; i < argument_types.length (); i++)
     {
       type = argument_types[i];
+
+      /* If we're passed something entirely unreasonable, just ignore here.
+	 We'll warn later anyway.  */
+      if (TREE_CODE_CLASS (TREE_CODE (type)) != tcc_type)
+	continue;
+
       unsigned_p = POINTER_TYPE_P (type) ? TYPE_UNSIGNED (TREE_TYPE (type))
 					 : TYPE_UNSIGNED (type);
       mode_p = POINTER_TYPE_P (type) ? TYPE_MODE (TREE_TYPE (type))
@@ -4539,7 +5086,11 @@ bool
 verify_type_context (location_t loc, type_context_kind context, const_tree type,
 		     bool silent_p)
 {
-  if (!sizeless_type_p (type))
+  const_tree tmp = type;
+  if (omp_type_context (context) && POINTER_TYPE_P (type))
+    tmp = strip_pointer_types (tmp);
+
+  if (!sizeless_type_p (tmp))
     return true;
 
   switch (context)
@@ -4611,6 +5162,34 @@ verify_type_context (location_t loc, type_context_kind context, const_tree type,
 	error_at (loc, "capture by copy of RVV type %qT", type);
 
       return false;
+
+    case TCTX_OMP_MAP:
+      if (!silent_p)
+	error_at (loc, "RVV type %qT not allowed in %<map%> clause", type);
+      return false;
+
+    case TCTX_OMP_MAP_IMP_REF:
+      if (!silent_p)
+	error ("cannot reference %qT object types in %<target%> region", type);
+      return false;
+
+    case TCTX_OMP_PRIVATE:
+      if (!silent_p)
+	error_at (loc, "RVV type %qT not allowed in"
+		  " %<target%> %<private%> clause", type);
+      return false;
+
+    case TCTX_OMP_FIRSTPRIVATE:
+      if (!silent_p)
+	error_at (loc, "RVV type %qT not allowed in"
+		  " %<target%> %<firstprivate%> clause", type);
+      return false;
+
+    case TCTX_OMP_DEVICE_ADDR:
+      if (!silent_p)
+	error_at (loc, "RVV type %qT not allowed in"
+		  " %<target%> device clauses", type);
+      return false;
     }
 
   gcc_unreachable ();
@@ -4663,8 +5242,6 @@ handle_pragma_vector ()
 
   /* Define the functions.  */
   function_table = new hash_table<registered_function_hasher> (1023);
-  non_overloaded_function_table
-    = new hash_table<non_overloaded_registered_function_hasher> (1023);
   function_builder builder;
   for (unsigned int i = 0; i < ARRAY_SIZE (function_groups); ++i)
   {
diff --git a/gcc/config/riscv/riscv-vector-builtins.def b/gcc/config/riscv/riscv-vector-builtins.def
index ffa14d4..be3fb1a 100644
--- a/gcc/config/riscv/riscv-vector-builtins.def
+++ b/gcc/config/riscv/riscv-vector-builtins.def
@@ -1,5 +1,5 @@
 /* Builtins macros for RISC-V 'V' Extension for GNU compiler.
-   Copyright (C) 2022-2024 Free Software Foundation, Inc.
+   Copyright (C) 2022-2025 Free Software Foundation, Inc.
    Contributed by Ju-Zhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd.
 
 This file is part of GCC.
@@ -69,20 +69,21 @@ along with GCC; see the file COPYING3.  If not see
 /* Use "DEF_RVV_TYPE_INDEX" macro to define RVV function types.  */
 #ifndef DEF_RVV_TYPE_INDEX
 #define DEF_RVV_TYPE_INDEX(                                                    \
-  VECTOR, MASK, SIGNED, UNSIGNED, EEW8_INDEX, EEW16_INDEX, EEW32_INDEX,        \
-  EEW64_INDEX, SHIFT, DOUBLE_TRUNC, QUAD_TRUNC, OCT_TRUNC,                     \
-  DOUBLE_TRUNC_SCALAR, DOUBLE_TRUNC_SIGNED, DOUBLE_TRUNC_UNSIGNED,             \
-  DOUBLE_TRUNC_UNSIGNED_SCALAR, DOUBLE_TRUNC_BFLOAT_SCALAR,			\
-  DOUBLE_TRUNC_BFLOAT, DOUBLE_TRUNC_FLOAT, FLOAT, LMUL1, WLMUL1,		\
-  EEW8_INTERPRET, EEW16_INTERPRET, EEW32_INTERPRET, EEW64_INTERPRET,           \
-  BOOL1_INTERPRET, BOOL2_INTERPRET, BOOL4_INTERPRET, BOOL8_INTERPRET,          \
-  BOOL16_INTERPRET, BOOL32_INTERPRET, BOOL64_INTERPRET,                        \
-  SIGNED_EEW8_LMUL1_INTERPRET, SIGNED_EEW16_LMUL1_INTERPRET,                   \
-  SIGNED_EEW32_LMUL1_INTERPRET, SIGNED_EEW64_LMUL1_INTERPRET,                  \
-  UNSIGNED_EEW8_LMUL1_INTERPRET, UNSIGNED_EEW16_LMUL1_INTERPRET,               \
-  UNSIGNED_EEW32_LMUL1_INTERPRET, UNSIGNED_EEW64_LMUL1_INTERPRET,              \
-  X2_VLMUL_EXT, X4_VLMUL_EXT, X8_VLMUL_EXT, X16_VLMUL_EXT, X32_VLMUL_EXT,      \
-  X64_VLMUL_EXT, TUPLE_SUBPART)
+  VECTOR, MASK, SIGNED, UNSIGNED, SIGNED_EEW8_INDEX, EEW8_INDEX, EEW16_INDEX,  \
+  EEW32_INDEX, EEW64_INDEX, SHIFT, DOUBLE_TRUNC, QUAD_TRUNC, QUAD_EMUL,        \
+  QUAD_EMUL_SIGNED, QUAD_EMUL_UNSIGNED, QUAD_FIX, QUAD_FIX_SIGNED,             \
+  QUAD_FIX_UNSIGNED, OCT_TRUNC, DOUBLE_TRUNC_SCALAR, DOUBLE_TRUNC_SIGNED,      \
+  DOUBLE_TRUNC_UNSIGNED, DOUBLE_TRUNC_UNSIGNED_SCALAR,                         \
+  DOUBLE_TRUNC_BFLOAT_SCALAR, DOUBLE_TRUNC_BFLOAT, DOUBLE_TRUNC_FLOAT, FLOAT,  \
+  LMUL1, WLMUL1, QLMUL1, QLMUL1_SIGNED, QLMUL1_UNSIGNED, XFQF, EEW8_INTERPRET, \
+  EEW16_INTERPRET, EEW32_INTERPRET, EEW64_INTERPRET, BOOL1_INTERPRET,          \
+  BOOL2_INTERPRET, BOOL4_INTERPRET, BOOL8_INTERPRET, BOOL16_INTERPRET,         \
+  BOOL32_INTERPRET, BOOL64_INTERPRET, SIGNED_EEW8_LMUL1_INTERPRET,             \
+  SIGNED_EEW16_LMUL1_INTERPRET, SIGNED_EEW32_LMUL1_INTERPRET,                  \
+  SIGNED_EEW64_LMUL1_INTERPRET, UNSIGNED_EEW8_LMUL1_INTERPRET,                 \
+  UNSIGNED_EEW16_LMUL1_INTERPRET, UNSIGNED_EEW32_LMUL1_INTERPRET,              \
+  UNSIGNED_EEW64_LMUL1_INTERPRET, X2, X2_VLMUL_EXT, X4_VLMUL_EXT, X8_VLMUL_EXT,\
+  X16_VLMUL_EXT, X32_VLMUL_EXT, X64_VLMUL_EXT, TUPLE_SUBPART)
 #endif
 
 /* Define RVV_VXRM rounding mode enum for fixed-point intrinsics.  */
@@ -634,6 +635,34 @@ DEF_RVV_OP_TYPE (xu_v)
 DEF_RVV_OP_TYPE (f_w)
 DEF_RVV_OP_TYPE (xu_w)
 DEF_RVV_OP_TYPE (s)
+DEF_RVV_OP_TYPE (4x8x4)
+DEF_RVV_OP_TYPE (2x8x2)
+DEF_RVV_OP_TYPE (v_x)
+DEF_RVV_OP_TYPE (i)
+DEF_RVV_OP_TYPE (v_i)
+DEF_RVV_OP_TYPE (xv)
+DEF_RVV_OP_TYPE (iv)
+DEF_RVV_OP_TYPE (fv)
+DEF_RVV_OP_TYPE (vvv)
+DEF_RVV_OP_TYPE (xvv)
+DEF_RVV_OP_TYPE (ivv)
+DEF_RVV_OP_TYPE (fvv)
+DEF_RVV_OP_TYPE (vvw)
+DEF_RVV_OP_TYPE (xvw)
+DEF_RVV_OP_TYPE (ivw)
+DEF_RVV_OP_TYPE (fvw)
+DEF_RVV_OP_TYPE (v_vv)
+DEF_RVV_OP_TYPE (v_xv)
+DEF_RVV_OP_TYPE (v_iv)
+DEF_RVV_OP_TYPE (v_fv)
+DEF_RVV_OP_TYPE (v_vvv)
+DEF_RVV_OP_TYPE (v_xvv)
+DEF_RVV_OP_TYPE (v_ivv)
+DEF_RVV_OP_TYPE (v_fvv)
+DEF_RVV_OP_TYPE (v_vvw)
+DEF_RVV_OP_TYPE (v_xvw)
+DEF_RVV_OP_TYPE (v_ivw)
+DEF_RVV_OP_TYPE (v_fvw)
 
 DEF_RVV_PRED_TYPE (ta)
 DEF_RVV_PRED_TYPE (tu)
@@ -669,6 +698,7 @@ DEF_RVV_BASE_TYPE (size, size_type_node)
 DEF_RVV_BASE_TYPE (ptrdiff, ptrdiff_type_node)
 DEF_RVV_BASE_TYPE (unsigned_long, long_unsigned_type_node)
 DEF_RVV_BASE_TYPE (long, long_integer_type_node)
+DEF_RVV_BASE_TYPE (signed_eew8_index, get_vector_type (type_idx))
 DEF_RVV_BASE_TYPE (eew8_index, get_vector_type (type_idx))
 DEF_RVV_BASE_TYPE (eew16_index, get_vector_type (type_idx))
 DEF_RVV_BASE_TYPE (eew32_index, get_vector_type (type_idx))
@@ -676,6 +706,12 @@ DEF_RVV_BASE_TYPE (eew64_index, get_vector_type (type_idx))
 DEF_RVV_BASE_TYPE (shift_vector, get_vector_type (type_idx))
 DEF_RVV_BASE_TYPE (double_trunc_vector, get_vector_type (type_idx))
 DEF_RVV_BASE_TYPE (quad_trunc_vector, get_vector_type (type_idx))
+DEF_RVV_BASE_TYPE (quad_emul_vector, get_vector_type (type_idx))
+DEF_RVV_BASE_TYPE (quad_emul_signed_vector, get_vector_type (type_idx))
+DEF_RVV_BASE_TYPE (quad_emul_unsigned_vector, get_vector_type (type_idx))
+DEF_RVV_BASE_TYPE (quad_fixed_vector, get_vector_type (type_idx))
+DEF_RVV_BASE_TYPE (quad_fixed_signed_vector, get_vector_type (type_idx))
+DEF_RVV_BASE_TYPE (quad_fixed_unsigned_vector, get_vector_type (type_idx))
 DEF_RVV_BASE_TYPE (oct_trunc_vector, get_vector_type (type_idx))
 DEF_RVV_BASE_TYPE (double_trunc_scalar, get_scalar_type (type_idx))
 DEF_RVV_BASE_TYPE (double_trunc_signed_vector, get_vector_type (type_idx))
@@ -687,6 +723,10 @@ DEF_RVV_BASE_TYPE (double_trunc_float_vector, get_vector_type (type_idx))
 DEF_RVV_BASE_TYPE (float_vector, get_vector_type (type_idx))
 DEF_RVV_BASE_TYPE (lmul1_vector, get_vector_type (type_idx))
 DEF_RVV_BASE_TYPE (widen_lmul1_vector, get_vector_type (type_idx))
+DEF_RVV_BASE_TYPE (quad_lmul1_vector, get_vector_type (type_idx))
+DEF_RVV_BASE_TYPE (quad_lmul1_signed_vector, get_vector_type (type_idx))
+DEF_RVV_BASE_TYPE (quad_lmul1_unsigned_vector, get_vector_type (type_idx))
+DEF_RVV_BASE_TYPE (xfqf_vector, get_vector_type (type_idx))
 DEF_RVV_BASE_TYPE (eew8_interpret, get_vector_type (type_idx))
 DEF_RVV_BASE_TYPE (eew16_interpret, get_vector_type (type_idx))
 DEF_RVV_BASE_TYPE (eew32_interpret, get_vector_type (type_idx))
@@ -706,6 +746,7 @@ DEF_RVV_BASE_TYPE (unsigned_eew8_lmul1_interpret, get_vector_type (type_idx))
 DEF_RVV_BASE_TYPE (unsigned_eew16_lmul1_interpret, get_vector_type (type_idx))
 DEF_RVV_BASE_TYPE (unsigned_eew32_lmul1_interpret, get_vector_type (type_idx))
 DEF_RVV_BASE_TYPE (unsigned_eew64_lmul1_interpret, get_vector_type (type_idx))
+DEF_RVV_BASE_TYPE (x2_vector, get_vector_type (type_idx))
 DEF_RVV_BASE_TYPE (vlmul_ext_x2, get_vector_type (type_idx))
 DEF_RVV_BASE_TYPE (vlmul_ext_x4, get_vector_type (type_idx))
 DEF_RVV_BASE_TYPE (vlmul_ext_x8, get_vector_type (type_idx))
@@ -714,6 +755,8 @@ DEF_RVV_BASE_TYPE (vlmul_ext_x32, get_vector_type (type_idx))
 DEF_RVV_BASE_TYPE (vlmul_ext_x64, get_vector_type (type_idx))
 DEF_RVV_BASE_TYPE (size_ptr, build_pointer_type (size_type_node))
 DEF_RVV_BASE_TYPE (tuple_subpart, get_tuple_subpart_type (type_idx))
+DEF_RVV_BASE_TYPE (xfqf_float, get_xfqf_float_type (type_idx))
+DEF_RVV_BASE_TYPE (scalar_float, get_scalar_float_type (type_idx))
 
 DEF_RVV_VXRM_ENUM (RNU, VXRM_RNU)
 DEF_RVV_VXRM_ENUM (RNE, VXRM_RNE)
diff --git a/gcc/config/riscv/riscv-vector-builtins.h b/gcc/config/riscv/riscv-vector-builtins.h
index f092dbf..1f2587a 100644
--- a/gcc/config/riscv/riscv-vector-builtins.h
+++ b/gcc/config/riscv/riscv-vector-builtins.h
@@ -1,5 +1,5 @@
 /* Builtins definitions for RISC-V 'V' Extension for GNU compiler.
-   Copyright (C) 2022-2024 Free Software Foundation, Inc.
+   Copyright (C) 2022-2025 Free Software Foundation, Inc.
    Contributed by Ju-Zhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd.
 
    This file is part of GCC.
@@ -127,6 +127,10 @@ enum required_ext
   XTHEADVECTOR_EXT,	/* XTheadVector extension */
   ZVFBFMIN_EXT,		/* Zvfbfmin extension */
   ZVFBFWMA_EXT,		/* Zvfbfwma extension */
+  XSFVQMACCQOQ_EXT,	/* XSFVQMACCQOQ extension */
+  XSFVQMACCDOD_EXT,	/* XSFVQMACCDOD extension */
+  XSFVFNRCLIPXFQF_EXT,	/* XSFVFNRCLIPXFQF extension */
+  XSFVCP_EXT, /* XSFVCP extension*/
   /* Please update below to isa_name func when add or remove enum type(s).  */
 };
 
@@ -160,6 +164,14 @@ static inline const char * required_ext_to_isa_name (enum required_ext required)
       return "zvfbfmin";
     case ZVFBFWMA_EXT:
       return "zvfbfwma";
+    case XSFVQMACCQOQ_EXT:
+      return "xsfvqmaccqoq";
+    case XSFVQMACCDOD_EXT:
+      return "xsfvqmaccdod";
+    case XSFVFNRCLIPXFQF_EXT:
+      return "xsfvfnrclipxfqf";
+    case XSFVCP_EXT:
+      return "xsfvcp";
     default:
       gcc_unreachable ();
   }
@@ -197,6 +209,14 @@ static inline bool required_extensions_specified (enum required_ext required)
       return TARGET_ZVFBFMIN;
     case ZVFBFWMA_EXT:
       return TARGET_ZVFBFWMA;
+    case XSFVQMACCQOQ_EXT:
+      return TARGET_XSFVQMACCQOQ;
+    case XSFVQMACCDOD_EXT:
+      return TARGET_XSFVQMACCDOD;
+    case XSFVFNRCLIPXFQF_EXT:
+      return TARGET_XSFVFNRCLIPXFQF;
+    case XSFVCP_EXT:
+      return TARGET_XSFVCP;
     default:
       gcc_unreachable ();
   }
@@ -281,6 +301,8 @@ struct rvv_arg_type_info
   tree get_vector_type (vector_type_index) const;
   tree get_tree_type (vector_type_index) const;
   tree get_tuple_subpart_type (vector_type_index) const;
+  tree get_xfqf_float_type (vector_type_index) const;
+  tree get_scalar_float_type (vector_type_index) const;
 };
 
 /* Static information for each operand.  */
@@ -309,37 +331,7 @@ struct function_group_info
   /* Return true if required extension is enabled */
   bool match (required_ext ext_value) const
   {
-    switch (ext_value)
-    {
-      case VECTOR_EXT:
-        return TARGET_VECTOR;
-      case ZVBB_EXT:
-        return TARGET_ZVBB;
-      case ZVBB_OR_ZVKB_EXT:
-        return (TARGET_ZVBB || TARGET_ZVKB);
-      case ZVBC_EXT:
-        return TARGET_ZVBC;
-      case ZVKG_EXT:
-        return TARGET_ZVKG;
-      case ZVKNED_EXT:
-        return TARGET_ZVKNED;
-      case ZVKNHA_OR_ZVKNHB_EXT:
-        return (TARGET_ZVKNHA || TARGET_ZVKNHB);
-      case ZVKNHB_EXT:
-        return TARGET_ZVKNHB;
-      case ZVKSED_EXT:
-        return TARGET_ZVKSED;
-      case ZVKSH_EXT:
-        return TARGET_ZVKSH;
-      case XTHEADVECTOR_EXT:
-	return TARGET_XTHEADVECTOR;
-      case ZVFBFMIN_EXT:
-	return TARGET_ZVFBFMIN;
-      case ZVFBFWMA_EXT:
-	return TARGET_ZVFBFWMA;
-      default:
-        gcc_unreachable ();
-    }
+    return required_extensions_specified (ext_value);
   }
   /* The base name, as a string.  */
   const char *base_name;
diff --git a/gcc/config/riscv/riscv-vector-costs.cc b/gcc/config/riscv/riscv-vector-costs.cc
index a80e167..4d8170d 100644
--- a/gcc/config/riscv/riscv-vector-costs.cc
+++ b/gcc/config/riscv/riscv-vector-costs.cc
@@ -1,5 +1,5 @@
 /* Cost model implementation for RISC-V 'V' Extension for GNU compiler.
-   Copyright (C) 2023-2024 Free Software Foundation, Inc.
+   Copyright (C) 2023-2025 Free Software Foundation, Inc.
    Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd.
 
 This file is part of GCC.
@@ -193,7 +193,7 @@ compute_local_program_points (
       /* Collect the stmts that is vectorized and mark their program point.  */
       for (i = 0; i < nbbs; i++)
 	{
-	  int point = 1;
+	  unsigned int point = 1;
 	  basic_block bb = bbs[i];
 	  vec<stmt_point> program_points = vNULL;
 	  if (dump_enabled_p ())
@@ -205,9 +205,7 @@ compute_local_program_points (
 	      if (!is_gimple_assign_or_call (gsi_stmt (si)))
 		continue;
 	      stmt_vec_info stmt_info = vinfo->lookup_stmt (gsi_stmt (si));
-	      enum stmt_vec_info_type type
-		= STMT_VINFO_TYPE (vect_stmt_to_vectorize (stmt_info));
-	      if (type != undef_vec_info_type)
+	      if (STMT_VINFO_RELEVANT_P (stmt_info))
 		{
 		  stmt_point info = {point, gsi_stmt (si), stmt_info};
 		  program_points.safe_push (info);
@@ -217,6 +215,35 @@ compute_local_program_points (
 				     "program point %d: %G", info.point,
 				     gsi_stmt (si));
 		}
+
+	      /* If the statement is part of a pattern, also add the other
+		 pattern statements.  */
+	      gimple_seq pattern_def_seq;
+	      if (STMT_VINFO_IN_PATTERN_P (stmt_info)
+		  && (pattern_def_seq = STMT_VINFO_PATTERN_DEF_SEQ (stmt_info)))
+		{
+		  gimple_stmt_iterator si2;
+
+		  for (si2 = gsi_start (pattern_def_seq);
+		       !gsi_end_p (si2);
+		       gsi_next (&si2))
+		    {
+		      stmt_vec_info pattern_def_stmt_info
+			= vinfo->lookup_stmt (gsi_stmt (si2));
+		      if (STMT_VINFO_RELEVANT_P (pattern_def_stmt_info)
+			  || STMT_VINFO_LIVE_P (pattern_def_stmt_info))
+			{
+			  stmt_point info = {point, gsi_stmt (si2),
+			      pattern_def_stmt_info};
+			  program_points.safe_push (info);
+			  point++;
+			  if (dump_enabled_p ())
+			    dump_printf_loc (MSG_NOTE, vect_location,
+					     "program point %d: %G",
+					     info.point, gsi_stmt (si2));
+			}
+		    }
+		}
 	    }
 	  program_points_per_bb.put (bb, program_points);
 	}
@@ -488,9 +515,15 @@ max_number_of_live_regs (loop_vec_info loop_vinfo, const basic_block bb,
       pair live_range = (*iter).second;
       for (i = live_range.first + 1; i <= live_range.second; i++)
 	{
-	  machine_mode mode = TREE_CODE (TREE_TYPE (var)) == BOOLEAN_TYPE
-				? BImode
-				: TYPE_MODE (TREE_TYPE (var));
+	  machine_mode mode;
+	  if (TREE_CODE (TREE_TYPE (var)) == BOOLEAN_TYPE)
+	    mode = BImode;
+	  /* Constants do not have a mode, just use the biggest so
+	     compute_nregs will return 1.  */
+	  else if (TREE_CODE (var) == INTEGER_CST)
+	    mode = biggest_mode;
+	  else
+	    mode = TYPE_MODE (TREE_TYPE (var));
 	  unsigned int nregs
 	    = compute_nregs_for_mode (loop_vinfo, mode, biggest_mode, lmul);
 	  live_vars_vec[i] += nregs;
@@ -591,7 +624,7 @@ compute_estimated_lmul (loop_vec_info loop_vinfo, machine_mode mode)
   int regno_alignment = riscv_get_v_regno_alignment (loop_vinfo->vector_mode);
   if (riscv_v_ext_vls_mode_p (loop_vinfo->vector_mode))
     return regno_alignment;
-  else if (known_eq (LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo), 1U))
+  else
     {
       int estimated_vf = vect_vf_for_cost (loop_vinfo);
       int estimated_lmul = estimated_vf * GET_MODE_BITSIZE (mode).to_constant ()
@@ -601,25 +634,6 @@ compute_estimated_lmul (loop_vec_info loop_vinfo, machine_mode mode)
       else
 	return estimated_lmul;
     }
-  else
-    {
-      /* Estimate the VLA SLP LMUL.  */
-      if (regno_alignment > RVV_M1)
-	return regno_alignment;
-      else if (mode != QImode
-	       || LOOP_VINFO_SLP_UNROLLING_FACTOR (loop_vinfo).is_constant ())
-	{
-	  int ratio;
-	  if (can_div_trunc_p (BYTES_PER_RISCV_VECTOR,
-			       GET_MODE_SIZE (loop_vinfo->vector_mode), &ratio))
-	    {
-	      if (ratio == 1)
-		return RVV_M4;
-	      else if (ratio == 2)
-		return RVV_M2;
-	    }
-	}
-    }
   return 0;
 }
 
@@ -1085,8 +1099,8 @@ costs::adjust_stmt_cost (enum vect_cost_for_stmt kind, loop_vec_info loop,
   switch (kind)
     {
     case scalar_to_vec:
-      stmt_cost += (FLOAT_TYPE_P (vectype) ? costs->regmove->FR2VR
-		    : costs->regmove->GR2VR);
+      stmt_cost
+	+= (FLOAT_TYPE_P (vectype) ? get_fr2vr_cost () : get_gr2vr_cost ());
       break;
     case vec_to_scalar:
       stmt_cost += (FLOAT_TYPE_P (vectype) ? costs->regmove->VR2FR
diff --git a/gcc/config/riscv/riscv-vector-costs.h b/gcc/config/riscv/riscv-vector-costs.h
index ca0ef11..de546a6 100644
--- a/gcc/config/riscv/riscv-vector-costs.h
+++ b/gcc/config/riscv/riscv-vector-costs.h
@@ -1,5 +1,5 @@
 /* Cost model declaration of RISC-V 'V' Extension for GNU compiler.
-   Copyright (C) 2023-2024 Free Software Foundation, Inc.
+   Copyright (C) 2023-2025 Free Software Foundation, Inc.
    Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd.
 
    This file is part of GCC.
diff --git a/gcc/config/riscv/riscv-vector-switch.def b/gcc/config/riscv/riscv-vector-switch.def
index de72e41..1b0d619 100644
--- a/gcc/config/riscv/riscv-vector-switch.def
+++ b/gcc/config/riscv/riscv-vector-switch.def
@@ -1,5 +1,5 @@
 /* Machine mode switch for RISC-V 'V' Extension for GNU compiler.
-   Copyright (C) 2022-2024 Free Software Foundation, Inc.
+   Copyright (C) 2022-2025 Free Software Foundation, Inc.
    Contributed by Ju-Zhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd.
 
 This file is part of GCC.
@@ -64,13 +64,13 @@ Encode the ratio of SEW/LMUL into the mask types.
   |BI   |RVVM1BI|RVVMF2BI|RVVMF4BI|RVVMF8BI|RVVMF16BI|RVVMF32BI|RVVMF64BI|  */
 
 /* Return 'REQUIREMENT' for machine_mode 'MODE'.
-   For example: 'MODE' = RVVMF64BImode needs TARGET_MIN_VLEN > 32.  */
+   For example: 'MODE' = RVVMF64BImode needs TARGET_VECTOR_ELEN_64.  */
 #ifndef ENTRY
 #define ENTRY(MODE, REQUIREMENT, VLMUL, RATIO)
 #endif
 
 /* Disable modes if TARGET_MIN_VLEN == 32.  */
-ENTRY (RVVMF64BI, TARGET_MIN_VLEN > 32, TARGET_XTHEADVECTOR ? LMUL_1 :LMUL_F8, 64)
+ENTRY (RVVMF64BI, TARGET_VECTOR_ELEN_64, TARGET_XTHEADVECTOR ? LMUL_1 :LMUL_F8, 64)
 ENTRY (RVVMF32BI, true, TARGET_XTHEADVECTOR ? LMUL_1 :LMUL_F4, 32)
 ENTRY (RVVMF16BI, true, TARGET_XTHEADVECTOR ? LMUL_1 : LMUL_F2 , 16)
 ENTRY (RVVMF8BI, true, LMUL_1, 8)
@@ -85,7 +85,7 @@ ENTRY (RVVM2QI, true, LMUL_2, 4)
 ENTRY (RVVM1QI, true, LMUL_1, 8)
 ENTRY (RVVMF2QI, !TARGET_XTHEADVECTOR, LMUL_F2, 16)
 ENTRY (RVVMF4QI, !TARGET_XTHEADVECTOR, LMUL_F4, 32)
-ENTRY (RVVMF8QI, TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, LMUL_F8, 64)
+ENTRY (RVVMF8QI, TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, LMUL_F8, 64)
 
 /* Disable modes if TARGET_MIN_VLEN == 32.  */
 ENTRY (RVVM8HI, true, LMUL_8, 2)
@@ -93,7 +93,7 @@ ENTRY (RVVM4HI, true, LMUL_4, 4)
 ENTRY (RVVM2HI, true, LMUL_2, 8)
 ENTRY (RVVM1HI, true, LMUL_1, 16)
 ENTRY (RVVMF2HI, !TARGET_XTHEADVECTOR, LMUL_F2, 32)
-ENTRY (RVVMF4HI, TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, LMUL_F4, 64)
+ENTRY (RVVMF4HI, TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, LMUL_F4, 64)
 
 /* Disable modes if TARGET_MIN_VLEN == 32 or !TARGET_VECTOR_ELEN_BF_16.  */
 ENTRY (RVVM8BF, TARGET_VECTOR_ELEN_BF_16, LMUL_8, 2)
@@ -109,21 +109,21 @@ ENTRY (RVVM4HF, TARGET_VECTOR_ELEN_FP_16, LMUL_4, 4)
 ENTRY (RVVM2HF, TARGET_VECTOR_ELEN_FP_16, LMUL_2, 8)
 ENTRY (RVVM1HF, TARGET_VECTOR_ELEN_FP_16, LMUL_1, 16)
 ENTRY (RVVMF2HF, TARGET_VECTOR_ELEN_FP_16 && !TARGET_XTHEADVECTOR, LMUL_F2, 32)
-ENTRY (RVVMF4HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, LMUL_F4, 64)
+ENTRY (RVVMF4HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, LMUL_F4, 64)
 
 /* Disable modes if TARGET_MIN_VLEN == 32.  */
 ENTRY (RVVM8SI, true, LMUL_8, 4)
 ENTRY (RVVM4SI, true, LMUL_4, 8)
 ENTRY (RVVM2SI, true, LMUL_2, 16)
 ENTRY (RVVM1SI, true, LMUL_1, 32)
-ENTRY (RVVMF2SI, TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, LMUL_F2, 64)
+ENTRY (RVVMF2SI, TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, LMUL_F2, 64)
 
 /* Disable modes if TARGET_MIN_VLEN == 32 or !TARGET_VECTOR_ELEN_FP_32.  */
 ENTRY (RVVM8SF, TARGET_VECTOR_ELEN_FP_32, LMUL_8, 4)
 ENTRY (RVVM4SF, TARGET_VECTOR_ELEN_FP_32, LMUL_4, 8)
 ENTRY (RVVM2SF, TARGET_VECTOR_ELEN_FP_32, LMUL_2, 16)
 ENTRY (RVVM1SF, TARGET_VECTOR_ELEN_FP_32, LMUL_1, 32)
-ENTRY (RVVMF2SF, TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, LMUL_F2, 64)
+ENTRY (RVVMF2SF, TARGET_VECTOR_ELEN_FP_32 && TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, LMUL_F2, 64)
 
 /* Disable modes if !TARGET_VECTOR_ELEN_64.  */
 ENTRY (RVVM8DI, TARGET_VECTOR_ELEN_64, LMUL_8, 8)
@@ -152,61 +152,61 @@ ENTRY (RVVM1DF, TARGET_VECTOR_ELEN_FP_64, LMUL_1, 64)
 TUPLE_ENTRY (RVVM1x8QI, true, RVVM1QI, 8, LMUL_1, 8)
 TUPLE_ENTRY (RVVMF2x8QI, !TARGET_XTHEADVECTOR, RVVMF2QI, 8, LMUL_F2, 16)
 TUPLE_ENTRY (RVVMF4x8QI, !TARGET_XTHEADVECTOR, RVVMF4QI, 8, LMUL_F4, 32)
-TUPLE_ENTRY (RVVMF8x8QI, TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF8QI, 8, LMUL_F8, 64)
+TUPLE_ENTRY (RVVMF8x8QI, TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF8QI, 8, LMUL_F8, 64)
 TUPLE_ENTRY (RVVM1x7QI, true, RVVM1QI, 7, LMUL_1, 8)
 TUPLE_ENTRY (RVVMF2x7QI, !TARGET_XTHEADVECTOR, RVVMF2QI, 7, LMUL_F2, 16)
 TUPLE_ENTRY (RVVMF4x7QI, !TARGET_XTHEADVECTOR, RVVMF4QI, 7, LMUL_F4, 32)
-TUPLE_ENTRY (RVVMF8x7QI, TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF8QI, 7, LMUL_F8, 64)
+TUPLE_ENTRY (RVVMF8x7QI, TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF8QI, 7, LMUL_F8, 64)
 TUPLE_ENTRY (RVVM1x6QI, true, RVVM1QI, 6, LMUL_1, 8)
 TUPLE_ENTRY (RVVMF2x6QI, !TARGET_XTHEADVECTOR, RVVMF2QI, 6, LMUL_F2, 16)
 TUPLE_ENTRY (RVVMF4x6QI, !TARGET_XTHEADVECTOR, RVVMF4QI, 6, LMUL_F4, 32)
-TUPLE_ENTRY (RVVMF8x6QI, TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF8QI, 6, LMUL_F8, 64)
+TUPLE_ENTRY (RVVMF8x6QI, TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF8QI, 6, LMUL_F8, 64)
 TUPLE_ENTRY (RVVM1x5QI, true, RVVM1QI, 5, LMUL_1, 8)
 TUPLE_ENTRY (RVVMF2x5QI, !TARGET_XTHEADVECTOR, RVVMF2QI, 5, LMUL_F2, 16)
 TUPLE_ENTRY (RVVMF4x5QI, !TARGET_XTHEADVECTOR, RVVMF4QI, 5, LMUL_F4, 32)
-TUPLE_ENTRY (RVVMF8x5QI, TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF8QI, 5, LMUL_F8, 64)
+TUPLE_ENTRY (RVVMF8x5QI, TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF8QI, 5, LMUL_F8, 64)
 TUPLE_ENTRY (RVVM2x4QI, true, RVVM2QI, 4, LMUL_2, 4)
 TUPLE_ENTRY (RVVM1x4QI, true, RVVM1QI, 4, LMUL_1, 8)
 TUPLE_ENTRY (RVVMF2x4QI, !TARGET_XTHEADVECTOR, RVVMF2QI, 4, LMUL_F2, 16)
 TUPLE_ENTRY (RVVMF4x4QI, !TARGET_XTHEADVECTOR, RVVMF4QI, 4, LMUL_F4, 32)
-TUPLE_ENTRY (RVVMF8x4QI, TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF8QI, 4, LMUL_F8, 64)
+TUPLE_ENTRY (RVVMF8x4QI, TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF8QI, 4, LMUL_F8, 64)
 TUPLE_ENTRY (RVVM2x3QI, true, RVVM2QI, 3, LMUL_2, 4)
 TUPLE_ENTRY (RVVM1x3QI, true, RVVM1QI, 3, LMUL_1, 8)
 TUPLE_ENTRY (RVVMF2x3QI, !TARGET_XTHEADVECTOR, RVVMF2QI, 3, LMUL_F2, 16)
 TUPLE_ENTRY (RVVMF4x3QI, !TARGET_XTHEADVECTOR, RVVMF4QI, 3, LMUL_F4, 32)
-TUPLE_ENTRY (RVVMF8x3QI, TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF8QI, 3, LMUL_F8, 64)
+TUPLE_ENTRY (RVVMF8x3QI, TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF8QI, 3, LMUL_F8, 64)
 TUPLE_ENTRY (RVVM4x2QI, true, RVVM4QI, 2, LMUL_4, 2)
 TUPLE_ENTRY (RVVM2x2QI, true, RVVM2QI, 2, LMUL_2, 4)
 TUPLE_ENTRY (RVVM1x2QI, true, RVVM1QI, 2, LMUL_1, 8)
 TUPLE_ENTRY (RVVMF2x2QI, !TARGET_XTHEADVECTOR, RVVMF2QI, 2, LMUL_F2, 16)
 TUPLE_ENTRY (RVVMF4x2QI, !TARGET_XTHEADVECTOR, RVVMF4QI, 2, LMUL_F4, 32)
-TUPLE_ENTRY (RVVMF8x2QI, TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF8QI, 2, LMUL_F8, 64)
+TUPLE_ENTRY (RVVMF8x2QI, TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF8QI, 2, LMUL_F8, 64)
 
 TUPLE_ENTRY (RVVM1x8HI, true, RVVM1HI, 8, LMUL_1, 16)
 TUPLE_ENTRY (RVVMF2x8HI, !TARGET_XTHEADVECTOR, RVVMF2HI, 8, LMUL_F2, 32)
-TUPLE_ENTRY (RVVMF4x8HI, TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF4HI, 8, LMUL_F4, 64)
+TUPLE_ENTRY (RVVMF4x8HI, TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF4HI, 8, LMUL_F4, 64)
 TUPLE_ENTRY (RVVM1x7HI, true, RVVM1HI, 7, LMUL_1, 16)
 TUPLE_ENTRY (RVVMF2x7HI, !TARGET_XTHEADVECTOR, RVVMF2HI, 7, LMUL_F2, 32)
-TUPLE_ENTRY (RVVMF4x7HI, TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF4HI, 7, LMUL_F4, 64)
+TUPLE_ENTRY (RVVMF4x7HI, TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF4HI, 7, LMUL_F4, 64)
 TUPLE_ENTRY (RVVM1x6HI, true, RVVM1HI, 6, LMUL_1, 16)
 TUPLE_ENTRY (RVVMF2x6HI, !TARGET_XTHEADVECTOR, RVVMF2HI, 6, LMUL_F2, 32)
-TUPLE_ENTRY (RVVMF4x6HI, TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF4HI, 6, LMUL_F4, 64)
+TUPLE_ENTRY (RVVMF4x6HI, TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF4HI, 6, LMUL_F4, 64)
 TUPLE_ENTRY (RVVM1x5HI, true, RVVM1HI, 5, LMUL_1, 16)
 TUPLE_ENTRY (RVVMF2x5HI, !TARGET_XTHEADVECTOR, RVVMF2HI, 5, LMUL_F2, 32)
-TUPLE_ENTRY (RVVMF4x5HI, TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF4HI, 5, LMUL_F4, 64)
+TUPLE_ENTRY (RVVMF4x5HI, TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF4HI, 5, LMUL_F4, 64)
 TUPLE_ENTRY (RVVM2x4HI, true, RVVM2HI, 4, LMUL_2, 8)
 TUPLE_ENTRY (RVVM1x4HI, true, RVVM1HI, 4, LMUL_1, 16)
 TUPLE_ENTRY (RVVMF2x4HI, !TARGET_XTHEADVECTOR, RVVMF2HI, 4, LMUL_F2, 32)
-TUPLE_ENTRY (RVVMF4x4HI, TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF4HI, 4, LMUL_F4, 64)
+TUPLE_ENTRY (RVVMF4x4HI, TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF4HI, 4, LMUL_F4, 64)
 TUPLE_ENTRY (RVVM2x3HI, true, RVVM2HI, 3, LMUL_2, 8)
 TUPLE_ENTRY (RVVM1x3HI, true, RVVM1HI, 3, LMUL_1, 16)
 TUPLE_ENTRY (RVVMF2x3HI, !TARGET_XTHEADVECTOR, RVVMF2HI, 3, LMUL_F2, 32)
-TUPLE_ENTRY (RVVMF4x3HI, TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF4HI, 3, LMUL_F4, 64)
+TUPLE_ENTRY (RVVMF4x3HI, TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF4HI, 3, LMUL_F4, 64)
 TUPLE_ENTRY (RVVM4x2HI, true, RVVM4HI, 2, LMUL_4, 4)
 TUPLE_ENTRY (RVVM2x2HI, true, RVVM2HI, 2, LMUL_2, 8)
 TUPLE_ENTRY (RVVM1x2HI, true, RVVM1HI, 2, LMUL_1, 16)
 TUPLE_ENTRY (RVVMF2x2HI, !TARGET_XTHEADVECTOR, RVVMF2HI, 2, LMUL_F2, 32)
-TUPLE_ENTRY (RVVMF4x2HI, TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF4HI, 2, LMUL_F4, 64)
+TUPLE_ENTRY (RVVMF4x2HI, TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF4HI, 2, LMUL_F4, 64)
 
 TUPLE_ENTRY (RVVM1x8BF, TARGET_VECTOR_ELEN_BF_16, RVVM1BF, 8, LMUL_1, 16)
 TUPLE_ENTRY (RVVMF2x8BF, TARGET_VECTOR_ELEN_BF_16, RVVMF2BF, 8, LMUL_F2, 32)
@@ -236,67 +236,67 @@ TUPLE_ENTRY (RVVMF4x2BF, TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN > 32, RVVMF
 
 TUPLE_ENTRY (RVVM1x8HF, TARGET_VECTOR_ELEN_FP_16, RVVM1HF, 8, LMUL_1, 16)
 TUPLE_ENTRY (RVVMF2x8HF, TARGET_VECTOR_ELEN_FP_16 && !TARGET_XTHEADVECTOR, RVVMF2HF, 8, LMUL_F2, 32)
-TUPLE_ENTRY (RVVMF4x8HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF4HF, 8, LMUL_F4, 64)
+TUPLE_ENTRY (RVVMF4x8HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF4HF, 8, LMUL_F4, 64)
 TUPLE_ENTRY (RVVM1x7HF, TARGET_VECTOR_ELEN_FP_16, RVVM1HF, 7, LMUL_1, 16)
 TUPLE_ENTRY (RVVMF2x7HF, TARGET_VECTOR_ELEN_FP_16 && !TARGET_XTHEADVECTOR, RVVMF2HF, 7, LMUL_F2, 32)
-TUPLE_ENTRY (RVVMF4x7HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF4HF, 7, LMUL_F4, 64)
+TUPLE_ENTRY (RVVMF4x7HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF4HF, 7, LMUL_F4, 64)
 TUPLE_ENTRY (RVVM1x6HF, TARGET_VECTOR_ELEN_FP_16, RVVM1HF, 6, LMUL_1, 16)
 TUPLE_ENTRY (RVVMF2x6HF, TARGET_VECTOR_ELEN_FP_16 && !TARGET_XTHEADVECTOR, RVVMF2HF, 6, LMUL_F2, 32)
-TUPLE_ENTRY (RVVMF4x6HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF4HF, 6, LMUL_F4, 64)
+TUPLE_ENTRY (RVVMF4x6HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF4HF, 6, LMUL_F4, 64)
 TUPLE_ENTRY (RVVM1x5HF, TARGET_VECTOR_ELEN_FP_16, RVVM1HF, 5, LMUL_1, 16)
 TUPLE_ENTRY (RVVMF2x5HF, TARGET_VECTOR_ELEN_FP_16 && !TARGET_XTHEADVECTOR, RVVMF2HF, 5, LMUL_F2, 32)
-TUPLE_ENTRY (RVVMF4x5HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF4HF, 5, LMUL_F4, 64)
+TUPLE_ENTRY (RVVMF4x5HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF4HF, 5, LMUL_F4, 64)
 TUPLE_ENTRY (RVVM2x4HF, TARGET_VECTOR_ELEN_FP_16, RVVM2HF, 4, LMUL_2, 8)
 TUPLE_ENTRY (RVVM1x4HF, TARGET_VECTOR_ELEN_FP_16, RVVM1HF, 4, LMUL_1, 16)
 TUPLE_ENTRY (RVVMF2x4HF, TARGET_VECTOR_ELEN_FP_16 && !TARGET_XTHEADVECTOR, RVVMF2HF, 4, LMUL_F2, 32)
-TUPLE_ENTRY (RVVMF4x4HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF4HF, 4, LMUL_F4, 64)
+TUPLE_ENTRY (RVVMF4x4HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF4HF, 4, LMUL_F4, 64)
 TUPLE_ENTRY (RVVM2x3HF, TARGET_VECTOR_ELEN_FP_16, RVVM2HF, 3, LMUL_2, 8)
 TUPLE_ENTRY (RVVM1x3HF, TARGET_VECTOR_ELEN_FP_16, RVVM1HF, 3, LMUL_1, 16)
 TUPLE_ENTRY (RVVMF2x3HF, TARGET_VECTOR_ELEN_FP_16 && !TARGET_XTHEADVECTOR, RVVMF2HF, 3, LMUL_F2, 32)
-TUPLE_ENTRY (RVVMF4x3HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF4HF, 3, LMUL_F4, 64)
+TUPLE_ENTRY (RVVMF4x3HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF4HF, 3, LMUL_F4, 64)
 TUPLE_ENTRY (RVVM4x2HF, TARGET_VECTOR_ELEN_FP_16, RVVM4HF, 2, LMUL_4, 4)
 TUPLE_ENTRY (RVVM2x2HF, TARGET_VECTOR_ELEN_FP_16, RVVM2HF, 2, LMUL_2, 8)
 TUPLE_ENTRY (RVVM1x2HF, TARGET_VECTOR_ELEN_FP_16, RVVM1HF, 2, LMUL_1, 16)
 TUPLE_ENTRY (RVVMF2x2HF, TARGET_VECTOR_ELEN_FP_16 && !TARGET_XTHEADVECTOR, RVVMF2HF, 2, LMUL_F2, 32)
-TUPLE_ENTRY (RVVMF4x2HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF4HF, 2, LMUL_F4, 64)
+TUPLE_ENTRY (RVVMF4x2HF, TARGET_VECTOR_ELEN_FP_16 && TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF4HF, 2, LMUL_F4, 64)
 
 TUPLE_ENTRY (RVVM1x8SI, true, RVVM1SI, 8, LMUL_1, 16)
-TUPLE_ENTRY (RVVMF2x8SI, (TARGET_MIN_VLEN > 32) && !TARGET_XTHEADVECTOR, RVVMF2SI, 8, LMUL_F2, 32)
+TUPLE_ENTRY (RVVMF2x8SI, (TARGET_VECTOR_ELEN_64) && !TARGET_XTHEADVECTOR, RVVMF2SI, 8, LMUL_F2, 32)
 TUPLE_ENTRY (RVVM1x7SI, true, RVVM1SI, 7, LMUL_1, 16)
-TUPLE_ENTRY (RVVMF2x7SI, (TARGET_MIN_VLEN > 32) && !TARGET_XTHEADVECTOR, RVVMF2SI, 7, LMUL_F2, 32)
+TUPLE_ENTRY (RVVMF2x7SI, (TARGET_VECTOR_ELEN_64) && !TARGET_XTHEADVECTOR, RVVMF2SI, 7, LMUL_F2, 32)
 TUPLE_ENTRY (RVVM1x6SI, true, RVVM1SI, 6, LMUL_1, 16)
-TUPLE_ENTRY (RVVMF2x6SI, TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF2SI, 6, LMUL_F2, 32)
+TUPLE_ENTRY (RVVMF2x6SI, TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF2SI, 6, LMUL_F2, 32)
 TUPLE_ENTRY (RVVM1x5SI, true, RVVM1SI, 5, LMUL_1, 16)
-TUPLE_ENTRY (RVVMF2x5SI, TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF2SI, 5, LMUL_F2, 32)
+TUPLE_ENTRY (RVVMF2x5SI, TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF2SI, 5, LMUL_F2, 32)
 TUPLE_ENTRY (RVVM2x4SI, true, RVVM2SI, 4, LMUL_2, 8)
 TUPLE_ENTRY (RVVM1x4SI, true, RVVM1SI, 4, LMUL_1, 16)
-TUPLE_ENTRY (RVVMF2x4SI, TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF2SI, 4, LMUL_F2, 32)
+TUPLE_ENTRY (RVVMF2x4SI, TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF2SI, 4, LMUL_F2, 32)
 TUPLE_ENTRY (RVVM2x3SI, true, RVVM2SI, 3, LMUL_2, 8)
 TUPLE_ENTRY (RVVM1x3SI, true, RVVM1SI, 3, LMUL_1, 16)
-TUPLE_ENTRY (RVVMF2x3SI, TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF2SI, 3, LMUL_F2, 32)
+TUPLE_ENTRY (RVVMF2x3SI, TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF2SI, 3, LMUL_F2, 32)
 TUPLE_ENTRY (RVVM4x2SI, true, RVVM4SI, 2, LMUL_4, 4)
 TUPLE_ENTRY (RVVM2x2SI, true, RVVM2SI, 2, LMUL_2, 8)
 TUPLE_ENTRY (RVVM1x2SI, true, RVVM1SI, 2, LMUL_1, 16)
-TUPLE_ENTRY (RVVMF2x2SI, TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF2SI, 2, LMUL_F2, 32)
+TUPLE_ENTRY (RVVMF2x2SI, TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF2SI, 2, LMUL_F2, 32)
 
 TUPLE_ENTRY (RVVM1x8SF, TARGET_VECTOR_ELEN_FP_32, RVVM1SF, 8, LMUL_1, 16)
-TUPLE_ENTRY (RVVMF2x8SF, TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF2SF, 8, LMUL_F2, 32)
+TUPLE_ENTRY (RVVMF2x8SF, TARGET_VECTOR_ELEN_FP_32 && TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF2SF, 8, LMUL_F2, 32)
 TUPLE_ENTRY (RVVM1x7SF, TARGET_VECTOR_ELEN_FP_32, RVVM1SF, 7, LMUL_1, 16)
-TUPLE_ENTRY (RVVMF2x7SF, TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF2SF, 7, LMUL_F2, 32)
+TUPLE_ENTRY (RVVMF2x7SF, TARGET_VECTOR_ELEN_FP_32 && TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF2SF, 7, LMUL_F2, 32)
 TUPLE_ENTRY (RVVM1x6SF, TARGET_VECTOR_ELEN_FP_32, RVVM1SF, 6, LMUL_1, 16)
-TUPLE_ENTRY (RVVMF2x6SF, TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF2SF, 6, LMUL_F2, 32)
+TUPLE_ENTRY (RVVMF2x6SF, TARGET_VECTOR_ELEN_FP_32 && TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF2SF, 6, LMUL_F2, 32)
 TUPLE_ENTRY (RVVM1x5SF, TARGET_VECTOR_ELEN_FP_32, RVVM1SF, 5, LMUL_1, 16)
-TUPLE_ENTRY (RVVMF2x5SF, TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF2SF, 5, LMUL_F2, 32)
+TUPLE_ENTRY (RVVMF2x5SF, TARGET_VECTOR_ELEN_FP_32 && TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF2SF, 5, LMUL_F2, 32)
 TUPLE_ENTRY (RVVM2x4SF, TARGET_VECTOR_ELEN_FP_32, RVVM2SF, 4, LMUL_2, 8)
 TUPLE_ENTRY (RVVM1x4SF, TARGET_VECTOR_ELEN_FP_32, RVVM1SF, 4, LMUL_1, 16)
-TUPLE_ENTRY (RVVMF2x4SF, TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF2SF, 4, LMUL_F2, 32)
+TUPLE_ENTRY (RVVMF2x4SF, TARGET_VECTOR_ELEN_FP_32 && TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF2SF, 4, LMUL_F2, 32)
 TUPLE_ENTRY (RVVM2x3SF, TARGET_VECTOR_ELEN_FP_32, RVVM2SF, 3, LMUL_2, 8)
 TUPLE_ENTRY (RVVM1x3SF, TARGET_VECTOR_ELEN_FP_32, RVVM1SF, 3, LMUL_1, 16)
-TUPLE_ENTRY (RVVMF2x3SF, TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF2SF, 3, LMUL_F2, 32)
+TUPLE_ENTRY (RVVMF2x3SF, TARGET_VECTOR_ELEN_FP_32 && TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF2SF, 3, LMUL_F2, 32)
 TUPLE_ENTRY (RVVM4x2SF, TARGET_VECTOR_ELEN_FP_32, RVVM4SF, 2, LMUL_4, 4)
 TUPLE_ENTRY (RVVM2x2SF, TARGET_VECTOR_ELEN_FP_32, RVVM2SF, 2, LMUL_2, 8)
 TUPLE_ENTRY (RVVM1x2SF, TARGET_VECTOR_ELEN_FP_32, RVVM1SF, 2, LMUL_1, 16)
-TUPLE_ENTRY (RVVMF2x2SF, TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32 && !TARGET_XTHEADVECTOR, RVVMF2SF, 2, LMUL_F2, 32)
+TUPLE_ENTRY (RVVMF2x2SF, TARGET_VECTOR_ELEN_FP_32 && TARGET_VECTOR_ELEN_64 && !TARGET_XTHEADVECTOR, RVVMF2SF, 2, LMUL_F2, 32)
 
 TUPLE_ENTRY (RVVM1x8DI, TARGET_VECTOR_ELEN_64, RVVM1DI, 8, LMUL_1, 16)
 TUPLE_ENTRY (RVVM1x7DI, TARGET_VECTOR_ELEN_64, RVVM1DI, 7, LMUL_1, 16)
diff --git a/gcc/config/riscv/riscv-vsetvl.cc b/gcc/config/riscv/riscv-vsetvl.cc
index 017efa8..4fe0ae6 100644
--- a/gcc/config/riscv/riscv-vsetvl.cc
+++ b/gcc/config/riscv/riscv-vsetvl.cc
@@ -1,5 +1,5 @@
 /* VSETVL pass for RISC-V 'V' Extension for GNU compiler.
-   Copyright (C) 2022-2024 Free Software Foundation, Inc.
+   Copyright (C) 2022-2025 Free Software Foundation, Inc.
    Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd.
 
 This file is part of GCC.
@@ -100,31 +100,28 @@ using namespace riscv_vector;
 static void
 bitmap_union_of_preds_with_entry (sbitmap dst, sbitmap *src, basic_block b)
 {
-  unsigned int set_size = dst->size;
-  edge e;
-  unsigned ix;
-
-  for (ix = 0; ix < EDGE_COUNT (b->preds); ix++)
+  /* Handle case with no predecessors (including ENTRY block).  */
+  if (EDGE_COUNT (b->preds) == 0)
     {
-      e = EDGE_PRED (b, ix);
-      bitmap_copy (dst, src[e->src->index]);
-      break;
+      bitmap_clear (dst);
+      return;
     }
 
-  if (ix == EDGE_COUNT (b->preds))
-    bitmap_clear (dst);
-  else
-    for (ix++; ix < EDGE_COUNT (b->preds); ix++)
-      {
-	unsigned int i;
-	SBITMAP_ELT_TYPE *p, *r;
-
-	e = EDGE_PRED (b, ix);
-	p = src[e->src->index]->elms;
-	r = dst->elms;
-	for (i = 0; i < set_size; i++)
-	  *r++ |= *p++;
-      }
+  edge e;
+  edge_iterator ei;
+  /* Union remaining predecessors' bitmaps.  */
+  FOR_EACH_EDGE (e, ei, b->preds)
+    {
+      /* Initialize with first predecessor's bitmap.  */
+      if (ei.index == 0)
+	{
+	  bitmap_copy (dst, src[e->src->index]);
+	  continue;
+	}
+
+      /* Perform bitmap OR operation element-wise.  */
+      bitmap_ior (dst, dst, src[e->src->index]);
+    }
 }
 
 /* Compute the reaching definition in and out based on the gen and KILL
@@ -223,6 +220,8 @@ enum emit_type
   EMIT_AFTER,
 };
 
+static const int MAX_LMUL = 8;
+
 /* dump helper functions */
 static const char *
 vlmul_to_str (vlmul_type vlmul)
@@ -683,7 +682,7 @@ invalid_opt_bb_p (basic_block cfg_bb)
   /* We only do LCM optimizations on blocks that are post dominated by
      EXIT block, that is, we don't do LCM optimizations on infinite loop.  */
   FOR_EACH_EDGE (e, ei, cfg_bb->succs)
-    if (e->flags & EDGE_FAKE)
+    if ((e->flags & EDGE_FAKE) || (e->flags & EDGE_ABNORMAL))
       return true;
 
   return false;
@@ -778,6 +777,36 @@ enum class avl_demand_type : unsigned
   ignore_avl = demand_flags::DEMAND_EMPTY_P,
 };
 
+/* Go through all uses of INSN looking for a single use of register REG.
+   Return true if we find
+    - Uses in a non-RVV insn
+    - More than one use in an RVV insn
+    - A single use in the VL operand of an RVV insn
+   and false otherwise.
+   A single use in the AVL operand does not count as use as we take care of
+   those separately in the pass.  */
+
+static bool
+reg_used (insn_info *insn, rtx reg)
+{
+  unsigned int regno = REGNO (reg);
+  const hash_set<use_info *> vl_uses = get_all_real_uses (insn, regno);
+  for (use_info *use : vl_uses)
+    {
+      gcc_assert (use->insn ()->is_real ());
+      rtx_insn *rinsn = use->insn ()->rtl ();
+      if (!has_vl_op (rinsn)
+	  || count_regno_occurrences (rinsn, regno) != 1)
+	return true;
+
+      rtx avl = ::get_avl (rinsn);
+      if (!avl || !REG_P (avl) || regno != REGNO (avl))
+	return true;
+    }
+  return false;
+}
+
+
 class vsetvl_info
 {
 private:
@@ -901,7 +930,8 @@ public:
   bool valid_p () const { return m_state == state_type::VALID; }
   bool unknown_p () const { return m_state == state_type::UNKNOWN; }
   bool empty_p () const { return m_state == state_type::EMPTY; }
-  bool change_vtype_only_p () const { return m_change_vtype_only; }
+  bool change_vtype_only_p () const { return m_change_vtype_only
+					     && !TARGET_XTHEADVECTOR; }
 
   void set_valid () { m_state = state_type::VALID; }
   void set_unknown () { m_state = state_type::UNKNOWN; }
@@ -1002,6 +1032,9 @@ public:
 
   void parse_insn (insn_info *insn)
   {
+    /* The VL dest of the insn */
+    rtx dest_vl = NULL_RTX;
+
     m_insn = insn;
     m_bb = insn->bb ();
     /* Return if it is debug insn for the consistency with optimize == 0.  */
@@ -1035,7 +1068,10 @@ public:
     if (m_avl)
       {
 	if (vsetvl_insn_p (insn->rtl ()) || has_vlmax_avl ())
-	  m_vl = ::get_vl (insn->rtl ());
+	  {
+	    m_vl = ::get_vl (insn->rtl ());
+	    dest_vl = m_vl;
+	  }
 
 	if (has_nonvlmax_reg_avl ())
 	  m_avl_def = find_access (insn->uses (), REGNO (m_avl))->def ();
@@ -1132,28 +1168,8 @@ public:
       }
 
     /* Determine if dest operand(vl) has been used by non-RVV instructions.  */
-    if (has_vl ())
-      {
-	const hash_set<use_info *> vl_uses
-	  = get_all_real_uses (get_insn (), REGNO (get_vl ()));
-	for (use_info *use : vl_uses)
-	  {
-	    gcc_assert (use->insn ()->is_real ());
-	    rtx_insn *rinsn = use->insn ()->rtl ();
-	    if (!has_vl_op (rinsn)
-		|| count_regno_occurrences (rinsn, REGNO (get_vl ())) != 1)
-	      {
-		m_vl_used_by_non_rvv_insn = true;
-		break;
-	      }
-	    rtx avl = ::get_avl (rinsn);
-	    if (!avl || !REG_P (avl) || REGNO (get_vl ()) != REGNO (avl))
-	      {
-		m_vl_used_by_non_rvv_insn = true;
-		break;
-	      }
-	  }
-      }
+    if (dest_vl)
+      m_vl_used_by_non_rvv_insn = reg_used (get_insn (), dest_vl);
 
     /* Collect the read vl insn for the fault-only-first rvv loads.  */
     if (fault_first_load_p (insn->rtl ()))
@@ -1360,6 +1376,35 @@ public:
   void set_empty_info () { global_info.set_empty (); }
 };
 
+/* Same as REG_USED () but looks for a single use in an RVV insn's AVL
+   operand.  */
+static bool
+reg_single_use_in_avl (insn_info *insn, rtx reg)
+{
+  if (!reg)
+    return false;
+  unsigned int regno = REGNO (reg);
+  const hash_set<use_info *> vl_uses = get_all_real_uses (insn, regno);
+  for (use_info *use : vl_uses)
+    {
+      gcc_assert (use->insn ()->is_real ());
+      rtx_insn *rinsn = use->insn ()->rtl ();
+      if (!has_vl_op (rinsn)
+	  || count_regno_occurrences (rinsn, regno) != 1)
+	return false;
+
+      vsetvl_info info = vsetvl_info (use->insn ());
+
+      if (!info.has_nonvlmax_reg_avl ())
+	return false;
+
+      rtx avl = info.get_avl ();
+      if (avl && REG_P (avl) && regno == REGNO (avl))
+	return true;
+    }
+  return false;
+}
+
 /* Demand system is the RVV-based VSETVL info analysis tools wrapper.
    It defines compatible rules for SEW/LMUL, POLICY and AVL.
    Also, it provides 3 interfaces available_p, compatible_p and
@@ -1439,14 +1484,13 @@ private:
   inline bool prev_ratio_valid_for_next_sew_p (const vsetvl_info &prev,
 					       const vsetvl_info &next)
   {
-    return prev.get_ratio () >= (next.get_sew () / 8);
+    return prev.get_ratio () >= (next.get_sew () / MAX_LMUL);
   }
   inline bool next_ratio_valid_for_prev_sew_p (const vsetvl_info &prev,
 					       const vsetvl_info &next)
   {
-    return next.get_ratio () >= (prev.get_sew () / 8);
+    return next.get_ratio () >= (prev.get_sew () / MAX_LMUL);
   }
-
   inline bool sew_ge_and_ratio_eq_p (const vsetvl_info &prev,
 				     const vsetvl_info &next)
   {
@@ -1464,6 +1508,13 @@ private:
     return sew_ge_p (prev, next) && prev_sew_le_next_max_sew_p (prev, next)
 	   && next_ratio_valid_for_prev_sew_p (prev, next);
   }
+  inline bool
+  sew_ge_and_prev_sew_le_next_max_sew_and_ratio_eq_p (
+    const vsetvl_info &prev, const vsetvl_info &next)
+  {
+    return sew_ge_p (prev, next) && prev_sew_le_next_max_sew_p (prev, next)
+	   && ratio_eq_p (prev, next);
+  }
   inline bool sew_le_and_next_sew_le_prev_max_sew_p (const vsetvl_info &prev,
 						     const vsetvl_info &next)
   {
@@ -1473,8 +1524,15 @@ private:
   max_sew_overlap_and_next_ratio_valid_for_prev_sew_p (const vsetvl_info &prev,
 						       const vsetvl_info &next)
   {
-    return next_ratio_valid_for_prev_sew_p (prev, next)
-	   && max_sew_overlap_p (prev, next);
+    if (next_ratio_valid_for_prev_sew_p (prev, next)
+	&& max_sew_overlap_p (prev, next))
+      {
+	if (next.get_sew () < prev.get_sew ()
+	    && (!next.get_ta () || !next.get_ma ()))
+	  return false;
+	return true;
+      }
+    return false;
   }
   inline bool
   sew_le_and_next_sew_le_prev_max_sew_and_ratio_eq_p (const vsetvl_info &prev,
@@ -1707,8 +1765,11 @@ private:
   }
   inline void use_max_sew (vsetvl_info &prev, const vsetvl_info &next)
   {
-    int max_sew = MAX (prev.get_sew (), next.get_sew ());
-    prev.set_sew (max_sew);
+    bool prev_sew_larger = prev.get_sew () >= next.get_sew ();
+    const vsetvl_info from = prev_sew_larger ? prev : next;
+    prev.set_sew (from.get_sew ());
+    prev.set_vlmul (from.get_vlmul ());
+    prev.set_ratio (from.get_ratio ());
     use_min_of_max_sew (prev, next);
   }
   inline void use_next_sew_lmul (vsetvl_info &prev, const vsetvl_info &next)
@@ -1733,7 +1794,8 @@ private:
   inline void use_max_sew_and_lmul_with_next_ratio (vsetvl_info &prev,
 						    const vsetvl_info &next)
   {
-    prev.set_vlmul (calculate_vlmul (prev.get_sew (), next.get_ratio ()));
+    int max_sew = MAX (prev.get_sew (), next.get_sew ());
+    prev.set_vlmul (calculate_vlmul (max_sew, next.get_ratio ()));
     use_max_sew (prev, next);
     prev.set_ratio (next.get_ratio ());
   }
@@ -2633,6 +2695,7 @@ pre_vsetvl::compute_lcm_local_properties ()
   m_avout = sbitmap_vector_alloc (last_basic_block_for_fn (cfun), num_exprs);
 
   bitmap_vector_clear (m_avloc, last_basic_block_for_fn (cfun));
+  bitmap_vector_clear (m_kill, last_basic_block_for_fn (cfun));
   bitmap_vector_clear (m_antloc, last_basic_block_for_fn (cfun));
   bitmap_vector_ones (m_transp, last_basic_block_for_fn (cfun));
 
@@ -2684,6 +2747,10 @@ pre_vsetvl::compute_lcm_local_properties ()
 
       if (invalid_opt_bb_p (bb->cfg_bb ()))
 	{
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    fprintf (dump_file, "\n --- skipping bb %u due to weird edge",
+		     bb->index ());
+
 	  bitmap_clear (m_antloc[bb_index]);
 	  bitmap_clear (m_transp[bb_index]);
 	}
@@ -2771,8 +2838,18 @@ pre_vsetvl::fuse_local_vsetvl_info ()
 		     64 into 32.  */
 		  prev_info.set_max_sew (
 		    MIN (prev_info.get_max_sew (), curr_info.get_max_sew ()));
-		  if (!curr_info.vl_used_by_non_rvv_insn_p ()
-		      && vsetvl_insn_p (curr_info.get_insn ()->rtl ()))
+
+		  /* If we fuse and the current, to be deleted vsetvl has uses
+		     of its VL as AVL operand in other vsetvls those will be
+		     orphaned.  Therefore only delete if that's not the case.
+		     */
+		  rtx cur_dest = curr_info.has_vl ()
+		    ? curr_info.get_vl ()
+		    : NULL_RTX;
+
+		  if (vsetvl_insn_p (curr_info.get_insn ()->rtl ())
+		      && !reg_single_use_in_avl (curr_info.get_insn (),
+						 cur_dest))
 		    m_delete_list.safe_push (curr_info);
 
 		  if (curr_info.get_read_vl_insn ())
@@ -2790,6 +2867,9 @@ pre_vsetvl::fuse_local_vsetvl_info ()
 		      curr_info.dump (dump_file, "        ");
 		    }
 		  m_dem.merge (prev_info, curr_info);
+		  if (!curr_info.vl_used_by_non_rvv_insn_p ()
+		      && vsetvl_insn_p (curr_info.get_insn ()->rtl ()))
+		    m_delete_list.safe_push (curr_info);
 		  if (curr_info.get_read_vl_insn ())
 		    prev_info.set_read_vl_insn (curr_info.get_read_vl_insn ());
 		  if (dump_file && (dump_flags & TDF_DETAILS))
@@ -2944,6 +3024,18 @@ pre_vsetvl::earliest_fuse_vsetvl_info (int iter)
 		  continue;
 		}
 
+	      /* We cannot lift a vsetvl into the source block if the block is
+		 not transparent WRT to it.
+		 This is too restrictive for blocks where a register's use only
+		 feeds into vsetvls and no regular insns.  One example is the
+		 test rvv/vsetvl/avl_single-68.c which is currently XFAILed for
+		 that reason.
+		 In order to support this case we'd need to check the vsetvl's
+		 AVL operand's uses in the source block and make sure they are
+		 only used in other vsetvls.  */
+	      if (!bitmap_bit_p (m_transp[eg->src->index], expr_index))
+		continue;
+
 	      if (dump_file && (dump_flags & TDF_DETAILS))
 		{
 		  fprintf (dump_file,
@@ -3324,8 +3416,7 @@ pre_vsetvl::emit_vsetvl ()
 	    }
 	  start_sequence ();
 	  insert_vsetvl_insn (EMIT_DIRECT, footer_info);
-	  rtx_insn *rinsn = get_insns ();
-	  end_sequence ();
+	  rtx_insn *rinsn = end_sequence ();
 	  default_rtl_profile ();
 	  insert_insn_on_edge (rinsn, eg);
 	  need_commit = true;
@@ -3356,8 +3447,7 @@ pre_vsetvl::emit_vsetvl ()
       start_sequence ();
 
       insert_vsetvl_insn (EMIT_DIRECT, info);
-      rtx_insn *rinsn = get_insns ();
-      end_sequence ();
+      rtx_insn *rinsn = end_sequence ();
       default_rtl_profile ();
 
       /* We should not get an abnormal edge here.  */
diff --git a/gcc/config/riscv/riscv-vsetvl.def b/gcc/config/riscv/riscv-vsetvl.def
index 8f624f5..d7a5ada 100644
--- a/gcc/config/riscv/riscv-vsetvl.def
+++ b/gcc/config/riscv/riscv-vsetvl.def
@@ -1,5 +1,5 @@
 /* VSETVL pass def for RISC-V 'V' Extension for GNU compiler.
-   Copyright (C) 2023-2024 Free Software Foundation, Inc.
+   Copyright (C) 2023-2025 Free Software Foundation, Inc.
    Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd.
 
 This file is part of GCC.
@@ -53,8 +53,8 @@ DEF_SEW_LMUL_RULE (sew_lmul, ge_sew, sew_lmul,
 		   sew_ge_and_prev_sew_le_next_max_sew_p, nop)
 DEF_SEW_LMUL_RULE (
   sew_lmul, ratio_and_ge_sew, sew_lmul,
-  sew_ge_and_prev_sew_le_next_max_sew_and_next_ratio_valid_for_prev_sew_p,
-  sew_ge_and_prev_sew_le_next_max_sew_and_next_ratio_valid_for_prev_sew_p, nop)
+  sew_ge_and_prev_sew_le_next_max_sew_and_ratio_eq_p,
+  sew_ge_and_prev_sew_le_next_max_sew_and_ratio_eq_p, nop)
 
 DEF_SEW_LMUL_RULE (ratio_only, sew_lmul, sew_lmul, ratio_eq_p, always_false,
 		   use_next_sew_lmul)
diff --git a/gcc/config/riscv/riscv-zicfilp.cc b/gcc/config/riscv/riscv-zicfilp.cc
new file mode 100644
index 0000000..834d6e5
--- /dev/null
+++ b/gcc/config/riscv/riscv-zicfilp.cc
@@ -0,0 +1,169 @@
+/* Branch Target Identification for RISCV architecture.
+   Copyright (C) 2019-2025 Free Software Foundation, Inc.
+   Based on ARM target.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#define IN_TARGET_CODE 1
+
+#include "config.h"
+#define INCLUDE_STRING
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "target.h"
+#include "rtl.h"
+#include "tree.h"
+#include "memmodel.h"
+#include "gimple.h"
+#include "tm_p.h"
+#include "stringpool.h"
+#include "attribs.h"
+#include "expr.h"
+#include "emit-rtl.h"
+#include "gimplify.h"
+#include "gimple-iterator.h"
+#include "dumpfile.h"
+#include "rtl-iter.h"
+#include "cfgrtl.h"
+#include "tree-pass.h"
+#include "cgraph.h"
+#include "output.h"
+
+/* This pass implements forward-CFI landing pad checks for RISCV. This is
+   a security feature similar to BTI (branch target identification) in
+   AArch64 and IBT (indirect branch tracking)in X86. A LPAD (landing-pad
+   check) instruction is used to guard against the execution of
+   instructions which are not the intended target of an indirect branch.
+
+   When forward-CFI is disabled or unimplemented in the CPU, the
+   landing-pad check label instructions behave as NOP. When implemented in
+   the CPU, and enabled, the destination of an indirect branch must be
+   LPAD insn. Otherwise, the CPU reaises an exception.
+
+   In order to enable this mechanism, this pass iterates through the
+   control flow of the code and adds appropriate LPAD instructions at the
+   beginning of any function that can be called indirectly, and for targets
+   of indirect jumps, i.e., jump table targets, non-local goto targets, and
+   labels that might be referenced by variables, constant pools, etc
+   (NOTE_INSN_DELETED_LABEL). */
+
+namespace {
+
+const pass_data pass_data_insert_landing_pad =
+{
+  RTL_PASS, /* type. */
+  "zisslpcfi", /* name. */
+  OPTGROUP_NONE, /* optinfo_flags. */
+  TV_MACH_DEP, /* tv_id. */
+  0, /* properties_required. */
+  0, /* properties_provided. */
+  0, /* properties_destroyed. */
+  0, /* todo_flags_start. */
+  0, /* todo_flags_finish. */
+};
+
+static bool
+is_interrupt_handler_p (tree type)
+{
+  return lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type)) != NULL;
+}
+
+/* Insert landing-pad check instructions.  This is a late RTL pass that runs
+   before branch shortening. */
+static unsigned int
+rest_of_insert_landing_pad (void)
+{
+  timevar_push (TV_MACH_DEP);
+
+  struct cgraph_node *c_node;
+  rtx lpad_insn;
+  rtx_insn *insn;
+  basic_block bb;
+
+  bb = 0;
+  FOR_EACH_BB_FN (bb, cfun)
+    {
+      for (insn = BB_HEAD (bb); insn != NEXT_INSN (BB_END (bb));
+	   insn = NEXT_INSN (insn))
+	{
+	  /* If a label is marked to be preserved or can be a non-local goto
+	     target, it must be protected with a lpad instruction.  */
+	  if (LABEL_P (insn)
+	       && (LABEL_PRESERVE_P (insn)
+		   || bb->flags & BB_NON_LOCAL_GOTO_TARGET))
+	    {
+	      emit_insn_before (gen_lpad_align (), insn);
+	      emit_insn_after (gen_lpad (const0_rtx), insn);
+	      continue;
+	    }
+
+	  if (INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_gpr_save)
+	    {
+	      emit_move_insn (RISCV_CALL_ADDRESS_LPAD (Pmode), const0_rtx);
+	      emit_insn_before (gen_lpad_align (), insn);
+	      emit_insn_after (gen_lpad (const0_rtx), insn);
+	      continue;
+	    }
+
+	  if (INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_gpr_restore)
+	    emit_move_insn (RISCV_CALL_ADDRESS_LPAD (Pmode), const0_rtx);
+
+	}
+    }
+
+  c_node = cgraph_node::get (cfun->decl);
+  if (!c_node->only_called_directly_p ()
+      && !is_interrupt_handler_p (TREE_TYPE (cfun->decl)))
+    {
+      bb = ENTRY_BLOCK_PTR_FOR_FN (cfun)->next_bb;
+      insn = BB_HEAD (bb);
+      lpad_insn = gen_lpad (const0_rtx);
+      emit_insn_before (lpad_insn, insn);
+    }
+
+  timevar_pop (TV_MACH_DEP);
+  return 0;
+}
+
+class pass_insert_landing_pad : public rtl_opt_pass
+{
+public:
+  pass_insert_landing_pad (gcc::context *ctxt)
+    : rtl_opt_pass (pass_data_insert_landing_pad, ctxt)
+  {}
+
+  /* opt_pass methods: */
+  virtual bool gate (function *)
+    {
+      return is_zicfilp_p ();
+    }
+
+  virtual unsigned int execute (function *)
+    {
+      return rest_of_insert_landing_pad ();
+    }
+
+}; // class pass_insert_landing_pad
+
+} // anon namespace
+
+rtl_opt_pass *
+make_pass_insert_landing_pad (gcc::context *ctxt)
+{
+  return new pass_insert_landing_pad (ctxt);
+}
diff --git a/gcc/config/riscv/riscv.cc b/gcc/config/riscv/riscv.cc
index 90a6e93..a4428f0 100644
--- a/gcc/config/riscv/riscv.cc
+++ b/gcc/config/riscv/riscv.cc
@@ -1,5 +1,5 @@
 /* Subroutines used for code generation for RISC-V.
-   Copyright (C) 2011-2024 Free Software Foundation, Inc.
+   Copyright (C) 2011-2025 Free Software Foundation, Inc.
    Contributed by Andrew Waterman (andrew@sifive.com).
    Based on MIPS target for GNU compiler.
 
@@ -22,6 +22,8 @@ along with GCC; see the file COPYING3.  If not see
 #define IN_TARGET_CODE 1
 
 #define INCLUDE_STRING
+#define INCLUDE_VECTOR
+#define INCLUDE_ALGORITHM
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
@@ -75,6 +77,10 @@ along with GCC; see the file COPYING3.  If not see
 #include "gcse.h"
 #include "tree-dfa.h"
 #include "target-globals.h"
+#include "riscv-v.h"
+#include "cgraph.h"
+#include "langhooks.h"
+#include "gimplify.h"
 
 /* This file should be included last.  */
 #include "target-def.h"
@@ -101,6 +107,8 @@ along with GCC; see the file COPYING3.  If not see
 /* True the mode switching has static frm, or false.  */
 #define STATIC_FRM_P(c) ((c)->machine->mode_sw_info.static_frm_p)
 
+#define CFUN_IN_CALL(c) ((c)->machine->mode_sw_info.cfun_call)
+
 /* True if we can use the instructions in the XTheadInt extension
    to handle interrupts, or false.  */
 #define TH_INT_INTERRUPT(c)						\
@@ -170,10 +178,13 @@ struct GTY(()) mode_switching_info {
      mode instruction in the function or not.  */
   bool static_frm_p;
 
+  bool cfun_call;
+
   mode_switching_info ()
     {
       dynamic_frm = NULL_RTX;
       static_frm_p = false;
+      cfun_call = false;
     }
 };
 
@@ -272,6 +283,10 @@ enum riscv_fusion_pairs
   RISCV_FUSE_AUIPC_LD = (1 << 7),
   RISCV_FUSE_LDPREINCREMENT = (1 << 8),
   RISCV_FUSE_ALIGNED_STD = (1 << 9),
+  RISCV_FUSE_CACHE_ALIGNED_STD = (1 << 10),
+  RISCV_FUSE_BFEXT = (1 << 11),
+  RISCV_FUSE_EXPANDED_LD = (1 << 12),
+  RISCV_FUSE_B_ALUI = (1 << 13),
 };
 
 /* Costs of various operations on the different architectures.  */
@@ -291,8 +306,13 @@ struct riscv_tune_param
   bool vector_unaligned_access;
   bool use_divmod_expansion;
   bool overlap_op_by_pieces;
+  bool use_zero_stride_load;
+  bool speculative_sched_vsetvl;
   unsigned int fusible_ops;
   const struct cpu_vector_cost *vec_costs;
+  const char *function_align;
+  const char *jump_align;
+  const char *loop_align;
 };
 
 
@@ -341,14 +361,14 @@ const enum reg_class riscv_regno_to_class[FIRST_PSEUDO_REGISTER] = {
   JALR_REGS,	JALR_REGS,	JALR_REGS,	JALR_REGS,
   JALR_REGS,	JALR_REGS,	JALR_REGS,	JALR_REGS,
   SIBCALL_REGS,	SIBCALL_REGS,	SIBCALL_REGS,	SIBCALL_REGS,
-  FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
-  FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
-  FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
-  FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
-  FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
-  FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
-  FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
-  FP_REGS,	FP_REGS,	FP_REGS,	FP_REGS,
+  RVC_FP_REGS,	RVC_FP_REGS,	RVC_FP_REGS,	RVC_FP_REGS,
+  RVC_FP_REGS,	RVC_FP_REGS,	RVC_FP_REGS,	RVC_FP_REGS,
+  RVC_FP_REGS,	RVC_FP_REGS,	RVC_FP_REGS,	RVC_FP_REGS,
+  RVC_FP_REGS,	RVC_FP_REGS,	RVC_FP_REGS,	RVC_FP_REGS,
+  RVC_FP_REGS,	RVC_FP_REGS,	RVC_FP_REGS,	RVC_FP_REGS,
+  RVC_FP_REGS,	RVC_FP_REGS,	RVC_FP_REGS,	RVC_FP_REGS,
+  RVC_FP_REGS,	RVC_FP_REGS,	RVC_FP_REGS,	RVC_FP_REGS,
+  RVC_FP_REGS,	RVC_FP_REGS,	RVC_FP_REGS,	RVC_FP_REGS,
   FRAME_REGS,	FRAME_REGS,	NO_REGS,	NO_REGS,
   NO_REGS,	NO_REGS,	NO_REGS,	NO_REGS,
   NO_REGS,	NO_REGS,	NO_REGS,	NO_REGS,
@@ -435,6 +455,30 @@ static const struct cpu_vector_cost generic_vector_cost = {
   &rvv_regmove_vector_cost, /* regmove  */
 };
 
+/* Costs to use when optimizing for generic.  */
+static const struct riscv_tune_param generic_tune_info = {
+  {COSTS_N_INSNS (4), COSTS_N_INSNS (5)},	/* fp_add */
+  {COSTS_N_INSNS (4), COSTS_N_INSNS (5)},	/* fp_mul */
+  {COSTS_N_INSNS (20), COSTS_N_INSNS (20)},	/* fp_div */
+  {COSTS_N_INSNS (4), COSTS_N_INSNS (4)},	/* int_mul */
+  {COSTS_N_INSNS (33), COSTS_N_INSNS (65)},	/* int_div */
+  1,						/* issue_rate */
+  4,						/* branch_cost */
+  5,						/* memory_cost */
+  8,						/* fmv_cost */
+  true,						/* slow_unaligned_access */
+  false,					/* vector_unaligned_access */
+  false,					/* use_divmod_expansion */
+  false,					/* overlap_op_by_pieces */
+  true,						/* use_zero_stride_load */
+  false,					/* speculative_sched_vsetvl */
+  RISCV_FUSE_NOTHING,                           /* fusible_ops */
+  NULL,						/* vector cost */
+  NULL,						/* function_align */
+  NULL,						/* jump_align */
+  NULL,						/* loop_align */
+};
+
 /* Costs to use when optimizing for rocket.  */
 static const struct riscv_tune_param rocket_tune_info = {
   {COSTS_N_INSNS (4), COSTS_N_INSNS (5)},	/* fp_add */
@@ -450,8 +494,13 @@ static const struct riscv_tune_param rocket_tune_info = {
   false,					/* vector_unaligned_access */
   false,					/* use_divmod_expansion */
   false,					/* overlap_op_by_pieces */
+  true,						/* use_zero_stride_load */
+  false,					/* speculative_sched_vsetvl */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
   NULL,						/* vector cost */
+  NULL,						/* function_align */
+  NULL,						/* jump_align */
+  NULL,						/* loop_align */
 };
 
 /* Costs to use when optimizing for Sifive 7 Series.  */
@@ -469,8 +518,13 @@ static const struct riscv_tune_param sifive_7_tune_info = {
   false,					/* vector_unaligned_access */
   false,					/* use_divmod_expansion */
   false,					/* overlap_op_by_pieces */
+  true,						/* use_zero_stride_load */
+  false,					/* speculative_sched_vsetvl */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
   NULL,						/* vector cost */
+  NULL,						/* function_align */
+  NULL,						/* jump_align */
+  NULL,						/* loop_align */
 };
 
 /* Costs to use when optimizing for Sifive p400 Series.  */
@@ -488,8 +542,13 @@ static const struct riscv_tune_param sifive_p400_tune_info = {
   false,					/* vector_unaligned_access */
   false,					/* use_divmod_expansion */
   false,					/* overlap_op_by_pieces */
+  true,						/* use_zero_stride_load */
+  false,					/* speculative_sched_vsetvl */
   RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI,  /* fusible_ops */
   &generic_vector_cost,				/* vector cost */
+  NULL,						/* function_align */
+  NULL,						/* jump_align */
+  NULL,						/* loop_align */
 };
 
 /* Costs to use when optimizing for Sifive p600 Series.  */
@@ -507,8 +566,13 @@ static const struct riscv_tune_param sifive_p600_tune_info = {
   false,					/* vector_unaligned_access */
   false,					/* use_divmod_expansion */
   false,					/* overlap_op_by_pieces */
+  true,						/* use_zero_stride_load */
+  false,					/* speculative_sched_vsetvl */
   RISCV_FUSE_LUI_ADDI | RISCV_FUSE_AUIPC_ADDI,  /* fusible_ops */
   &generic_vector_cost,				/* vector cost */
+  NULL,						/* function_align */
+  NULL,						/* jump_align */
+  NULL,						/* loop_align */
 };
 
 /* Costs to use when optimizing for T-HEAD c906.  */
@@ -526,8 +590,13 @@ static const struct riscv_tune_param thead_c906_tune_info = {
   false,					/* vector_unaligned_access */
   false,	/* use_divmod_expansion */
   false,					/* overlap_op_by_pieces */
+  true,						/* use_zero_stride_load */
+  false,					/* speculative_sched_vsetvl */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
   NULL,						/* vector cost */
+  NULL,						/* function_align */
+  NULL,						/* jump_align */
+  NULL,						/* loop_align */
 };
 
 /* Costs to use when optimizing for xiangshan nanhu.  */
@@ -545,8 +614,13 @@ static const struct riscv_tune_param xiangshan_nanhu_tune_info = {
   false,					/* vector_unaligned_access */
   false,					/* use_divmod_expansion */
   false,					/* overlap_op_by_pieces */
+  true,						/* use_zero_stride_load */
+  false,					/* speculative_sched_vsetvl */
   RISCV_FUSE_ZEXTW | RISCV_FUSE_ZEXTH,          /* fusible_ops */
   NULL,						/* vector cost */
+  NULL,						/* function_align */
+  NULL,						/* jump_align */
+  NULL,						/* loop_align */
 };
 
 /* Costs to use when optimizing for a generic ooo profile.  */
@@ -564,8 +638,37 @@ static const struct riscv_tune_param generic_ooo_tune_info = {
   true,						/* vector_unaligned_access */
   false,					/* use_divmod_expansion */
   true,						/* overlap_op_by_pieces */
+  true,						/* use_zero_stride_load */
+  false,					/* speculative_sched_vsetvl */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
   &generic_vector_cost,				/* vector cost */
+  NULL,						/* function_align */
+  NULL,						/* jump_align */
+  NULL,						/* loop_align */
+};
+
+/* Costs to use when optimizing for Tenstorrent Ascalon 8 wide.  */
+static const struct riscv_tune_param tt_ascalon_d8_tune_info = {
+  {COSTS_N_INSNS (2), COSTS_N_INSNS (2)},	/* fp_add */
+  {COSTS_N_INSNS (3), COSTS_N_INSNS (3)},	/* fp_mul */
+  {COSTS_N_INSNS (9), COSTS_N_INSNS (16)},	/* fp_div */
+  {COSTS_N_INSNS (3), COSTS_N_INSNS (3)},	/* int_mul */
+  {COSTS_N_INSNS (13), COSTS_N_INSNS (13)},	/* int_div */
+  8,						/* issue_rate */
+  3,						/* branch_cost */
+  4,						/* memory_cost */
+  4,						/* fmv_cost */
+  false,					/* slow_unaligned_access */
+  true,						/* vector_unaligned_access */
+  true,						/* use_divmod_expansion */
+  true,						/* overlap_op_by_pieces */
+  true,						/* use_zero_stride_load */
+  false,					/* speculative_sched_vsetvl */
+  RISCV_FUSE_NOTHING,                           /* fusible_ops */
+  &generic_vector_cost,				/* vector cost */
+  NULL,						/* function_align */
+  NULL,						/* jump_align */
+  NULL,						/* loop_align */
 };
 
 /* Costs to use when optimizing for size.  */
@@ -583,8 +686,37 @@ static const struct riscv_tune_param optimize_size_tune_info = {
   false,					/* vector_unaligned_access */
   false,					/* use_divmod_expansion */
   false,					/* overlap_op_by_pieces */
+  true,						/* use_zero_stride_load */
+  false,					/* speculative_sched_vsetvl */
   RISCV_FUSE_NOTHING,                           /* fusible_ops */
   NULL,						/* vector cost */
+  NULL,						/* function_align */
+  NULL,						/* jump_align */
+  NULL,						/* loop_align */
+};
+
+/* Costs to use when optimizing for MIPS P8700 */
+static const struct riscv_tune_param mips_p8700_tune_info = {
+  {COSTS_N_INSNS (4), COSTS_N_INSNS (4)},	/* fp_add */
+  {COSTS_N_INSNS (5), COSTS_N_INSNS (5)},	/* fp_mul */
+  {COSTS_N_INSNS (17), COSTS_N_INSNS (17)},	/* fp_div */
+  {COSTS_N_INSNS (5), COSTS_N_INSNS (5)},	/* int_mul */
+  {COSTS_N_INSNS (8), COSTS_N_INSNS (8)},	/* int_div */
+  4,            /* issue_rate */
+  8,            /* branch_cost */
+  4,            /* memory_cost */
+  8,            /* fmv_cost */
+  true,         /* slow_unaligned_access */
+  false,        /* vector_unaligned_access */
+  true,         /* use_divmod_expansion */
+  false,        /* overlap_op_by_pieces */
+  true,						/* use_zero_stride_load */
+  false,					/* speculative_sched_vsetvl */
+  RISCV_FUSE_NOTHING,				/* fusible_ops */
+  NULL,         /* vector cost */
+  NULL,         /* function_align */
+  NULL,         /* jump_align */
+  NULL,         /* loop_align */
 };
 
 static bool riscv_avoid_shrink_wrapping_separate ();
@@ -625,6 +757,10 @@ static const attribute_spec riscv_gnu_attributes[] =
      types.  */
   {"riscv_rvv_vector_bits", 1, 1, false, true, false, true,
    riscv_handle_rvv_vector_bits_attribute, NULL},
+  /* This attribute is used to declare a function, forcing it to use the
+    standard vector calling convention variant. Syntax:
+    __attribute__((norelax)). */
+  {"norelax", 0, 0, true, false, false, false, NULL, NULL},
 };
 
 static const scoped_attribute_specs riscv_gnu_attribute_table  =
@@ -669,6 +805,12 @@ static const unsigned gpr_save_reg_order[] = {
   S10_REGNUM, S11_REGNUM
 };
 
+/* Order for the (ra, s0-sx) of zcmp_save.  */
+static const unsigned zcmp_save_reg_order[]
+  = {RETURN_ADDR_REGNUM, S0_REGNUM,  S1_REGNUM,	 S2_REGNUM,	S3_REGNUM,
+     S4_REGNUM,		 S5_REGNUM,  S6_REGNUM,	 S7_REGNUM,	S8_REGNUM,
+     S9_REGNUM,		 S10_REGNUM, S11_REGNUM, INVALID_REGNUM};
+
 /* A table describing all the processors GCC knows about.  */
 static const struct riscv_tune_info riscv_tune_info_table[] = {
 #define RISCV_TUNE(TUNE_NAME, PIPELINE_MODEL, TUNE_INFO)	\
@@ -711,6 +853,16 @@ void riscv_frame_info::reset(void)
   arg_pointer_offset = 0;
 }
 
+/* Check if the mode is twice the size of the XLEN mode.  */
+
+static bool
+riscv_2x_xlen_mode_p (machine_mode mode)
+{
+  poly_int64 mode_size = GET_MODE_SIZE (mode);
+  return mode_size.is_constant ()
+	 && (mode_size.to_constant () == UNITS_PER_WORD * 2);
+}
+
 /* Implement TARGET_MIN_ARITHMETIC_PRECISION.  */
 
 static unsigned int
@@ -942,16 +1094,16 @@ riscv_build_integer_1 (struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS],
 	  /* Now iterate over the bits we want to clear until the cost is
 	     too high or we're done.  */
 	  nval = value ^ HOST_WIDE_INT_C (-1);
-	  nval &= HOST_WIDE_INT_C (~0x7fffffff);
+	  nval &= ~HOST_WIDE_INT_C (0x7fffffff);
 	  while (nval && alt_cost < cost)
 	    {
 	      HOST_WIDE_INT bit = ctz_hwi (nval);
 	      alt_codes[alt_cost].code = AND;
-	      alt_codes[alt_cost].value = ~(1UL << bit);
+	      alt_codes[alt_cost].value = ~(HOST_WIDE_INT_UC (1) << bit);
 	      alt_codes[alt_cost].use_uw = false;
 	      alt_codes[alt_cost].save_temporary = false;
 	      alt_cost++;
-	      nval &= ~(1UL << bit);
+	      nval &= ~(HOST_WIDE_INT_UC (1) << bit);
 	    }
 
 	  if (nval == 0 && alt_cost <= cost)
@@ -1053,10 +1205,10 @@ riscv_build_integer_1 (struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS],
 	{
 	  HOST_WIDE_INT bit = ctz_hwi (value);
 	  alt_codes[i].code = (i == 0 ? UNKNOWN : IOR);
-	  alt_codes[i].value = 1UL << bit;
+	  alt_codes[i].value = HOST_WIDE_INT_1U << bit;
 	  alt_codes[i].use_uw = false;
 	  alt_codes[i].save_temporary = false;
-	  value &= ~(1ULL << bit);
+	  value &= ~(HOST_WIDE_INT_1U << bit);
 	  i++;
 	}
 
@@ -1229,6 +1381,184 @@ riscv_build_integer (struct riscv_integer_op *codes, HOST_WIDE_INT value,
 	}
 
     }
+  else if (cost > 4 && TARGET_64BIT && can_create_pseudo_p ()
+	   && allow_new_pseudos)
+    {
+      struct riscv_integer_op alt_codes[RISCV_MAX_INTEGER_OPS];
+      int alt_cost;
+
+      unsigned HOST_WIDE_INT loval = value & 0xffffffff;
+      unsigned HOST_WIDE_INT hival = (value & ~loval) >> 32;
+      bool bit31 = (loval & 0x80000000) != 0;
+      int trailing_shift = ctz_hwi (loval) - ctz_hwi (hival);
+      int leading_shift = clz_hwi (loval) - clz_hwi (hival);
+      int shiftval = 0;
+
+      /* Adjust the shift into the high half accordingly.  */
+      if ((trailing_shift > 0 && hival == (loval >> trailing_shift)))
+	shiftval = 32 - trailing_shift;
+      else if ((leading_shift > 0 && hival == (loval << leading_shift)))
+	shiftval = 32 + leading_shift;
+
+      if (shiftval && !bit31)
+	alt_cost = 2 + riscv_build_integer_1 (alt_codes, sext_hwi (loval, 32),
+					      mode);
+
+      /* For constants where the upper half is a shift of the lower half we
+	 can do a shift followed by an or.  */
+      if (shiftval && !bit31 && alt_cost < cost)
+	{
+	  /* We need to save the first constant we build.  */
+	  alt_codes[alt_cost - 3].save_temporary = true;
+
+	  /* Now we want to shift the previously generated constant into the
+	     high half.  */
+	  alt_codes[alt_cost - 2].code = ASHIFT;
+	  alt_codes[alt_cost - 2].value = shiftval;
+	  alt_codes[alt_cost - 2].use_uw = false;
+	  alt_codes[alt_cost - 2].save_temporary = false;
+
+	  /* And the final step, IOR the two halves together.  Since this uses
+	     the saved temporary, use CONCAT similar to what we do for Zbkb.  */
+	  alt_codes[alt_cost - 1].code = CONCAT;
+	  alt_codes[alt_cost - 1].value = 0;
+	  alt_codes[alt_cost - 1].use_uw = false;
+	  alt_codes[alt_cost - 1].save_temporary = false;
+
+	  memcpy (codes, alt_codes, sizeof (alt_codes));
+	  cost = alt_cost;
+	}
+
+      /* If bit31 is on and the upper constant is one less than the lower
+	 constant, then we can exploit sign extending nature of the lower
+	 half to trivially generate the upper half with an ADD.
+
+	 Not appropriate for ZBKB since that won't use "add"
+	 at codegen time.  */
+      if (!TARGET_ZBKB
+	  && cost > 4
+	  && bit31
+	  && hival == loval - 1)
+	{
+	  alt_cost = 2 + riscv_build_integer_1 (alt_codes,
+						sext_hwi (loval, 32), mode);
+	  alt_codes[alt_cost - 3].save_temporary = true;
+	  alt_codes[alt_cost - 2].code = ASHIFT;
+	  alt_codes[alt_cost - 2].value = 32;
+	  alt_codes[alt_cost - 2].use_uw = false;
+	  alt_codes[alt_cost - 2].save_temporary = false;
+	  /* This will turn into an ADD.  */
+	  alt_codes[alt_cost - 1].code = CONCAT;
+	  alt_codes[alt_cost - 1].value = 32;
+	  alt_codes[alt_cost - 1].use_uw = false;
+	  alt_codes[alt_cost - 1].save_temporary = false;
+
+	  memcpy (codes, alt_codes, sizeof (alt_codes));
+	  cost = alt_cost;
+	}
+
+      if (cost > 4 && !bit31 && TARGET_ZBA)
+	{
+	  int value = 0;
+
+	  /* Check for a shNadd.  */
+	  if (hival == loval * 3)
+	    value = 3;
+	  else if (hival == loval * 5)
+	    value = 5;
+	  else if (hival == loval * 9)
+	    value = 9;
+
+	  if (value)
+	    alt_cost = 3 + riscv_build_integer_1 (alt_codes,
+						  sext_hwi (loval, 32), mode);
+
+	  /* For constants where the upper half is a shNadd of the lower half
+	     we can do a similar transformation.  */
+	  if (value && alt_cost < cost)
+	    {
+	      alt_codes[alt_cost - 4].save_temporary = true;
+	      alt_codes[alt_cost - 3].code = FMA;
+	      alt_codes[alt_cost - 3].value = value;
+	      alt_codes[alt_cost - 3].use_uw = false;
+	      alt_codes[alt_cost - 3].save_temporary = false;
+	      alt_codes[alt_cost - 2].code = ASHIFT;
+	      alt_codes[alt_cost - 2].value = 32;
+	      alt_codes[alt_cost - 2].use_uw = false;
+	      alt_codes[alt_cost - 2].save_temporary = false;
+	      alt_codes[alt_cost - 1].code = CONCAT;
+	      alt_codes[alt_cost - 1].value = 0;
+	      alt_codes[alt_cost - 1].use_uw = false;
+	      alt_codes[alt_cost - 1].save_temporary = false;
+
+	      memcpy (codes, alt_codes, sizeof (alt_codes));
+	      cost = alt_cost;
+	    }
+	}
+
+      if (cost > 4 && !bit31)
+	{
+	  int value = hival - loval;
+
+	  /* For constants were the halves differ by less than 2048 we can
+	     generate the upper half by using an addi on the lower half then
+	     using a shift 32 followed by an or.  */
+	  if (IN_RANGE (value, -2048, 2047))
+	    {
+	      alt_cost = 3 + riscv_build_integer_1 (alt_codes,
+						    sext_hwi (loval, 32), mode);
+	      if (alt_cost < cost)
+		{
+		  alt_codes[alt_cost - 4].save_temporary = true;
+		  alt_codes[alt_cost - 3].code = PLUS;
+		  alt_codes[alt_cost - 3].value = value;
+		  alt_codes[alt_cost - 3].use_uw = false;
+		  alt_codes[alt_cost - 3].save_temporary = false;
+		  alt_codes[alt_cost - 2].code = ASHIFT;
+		  alt_codes[alt_cost - 2].value = 32;
+		  alt_codes[alt_cost - 2].use_uw = false;
+		  alt_codes[alt_cost - 2].save_temporary = false;
+		  alt_codes[alt_cost - 1].code = CONCAT;
+		  alt_codes[alt_cost - 1].value = 0;
+		  alt_codes[alt_cost - 1].use_uw = false;
+		  alt_codes[alt_cost - 1].save_temporary = false;
+
+		  memcpy (codes, alt_codes, sizeof (alt_codes));
+		  cost = alt_cost;
+		}
+	    }
+	}
+
+      if (cost > 5 && !bit31)
+	{
+	  /* For constants where the upper half is the lower half inverted we can flip
+	     it with an xor and do a shift 32 followed by an or.  */
+	  if (hival == (~loval & 0xffffffff))
+	    {
+	      alt_cost = 3 + riscv_build_integer_1 (alt_codes,
+						    sext_hwi (loval, 32), mode);
+	      if (alt_cost < cost)
+		{
+		  alt_codes[alt_cost - 4].save_temporary = true;
+		  alt_codes[alt_cost - 3].code = XOR;
+		  alt_codes[alt_cost - 3].value = -1;
+		  alt_codes[alt_cost - 3].use_uw = false;
+		  alt_codes[alt_cost - 3].save_temporary = false;
+		  alt_codes[alt_cost - 2].code = ASHIFT;
+		  alt_codes[alt_cost - 2].value = 32;
+		  alt_codes[alt_cost - 2].use_uw = false;
+		  alt_codes[alt_cost - 2].save_temporary = false;
+		  alt_codes[alt_cost - 1].code = CONCAT;
+		  alt_codes[alt_cost - 1].value = 0;
+		  alt_codes[alt_cost - 1].use_uw = false;
+		  alt_codes[alt_cost - 1].save_temporary = false;
+
+		  memcpy (codes, alt_codes, sizeof (alt_codes));
+		  cost = alt_cost;
+		}
+	    }
+	}
+    }
 
   return cost;
 }
@@ -1240,18 +1570,20 @@ static int
 riscv_split_integer_cost (HOST_WIDE_INT val)
 {
   int cost;
-  unsigned HOST_WIDE_INT loval = sext_hwi (val, 32);
-  unsigned HOST_WIDE_INT hival = sext_hwi ((val - loval) >> 32, 32);
+  unsigned HOST_WIDE_INT loval = val & 0xffffffff;
+  unsigned HOST_WIDE_INT hival = (val & ~loval) >> 32;
   struct riscv_integer_op codes[RISCV_MAX_INTEGER_OPS];
 
   /* This routine isn't used by pattern conditions, so whether or
      not to allow new pseudos can be a function of where we are in the
-     RTL pipeline.  We shouldn't need scratch pseudos for this case
-     anyway.  */
+     RTL pipeline.  */
   bool allow_new_pseudos = can_create_pseudo_p ();
   cost = 2 + riscv_build_integer (codes, loval, VOIDmode, allow_new_pseudos);
   if (loval != hival)
     cost += riscv_build_integer (codes, hival, VOIDmode, allow_new_pseudos);
+  else if ((loval & 0x80000000) != 0)
+    cost = 3 + riscv_build_integer (codes, ~loval & 0xffffffff,
+				    VOIDmode, allow_new_pseudos);
 
   return cost;
 }
@@ -1274,11 +1606,16 @@ riscv_integer_cost (HOST_WIDE_INT val, bool allow_new_pseudos)
 static rtx
 riscv_split_integer (HOST_WIDE_INT val, machine_mode mode)
 {
-  unsigned HOST_WIDE_INT loval = sext_hwi (val, 32);
-  unsigned HOST_WIDE_INT hival = sext_hwi ((val - loval) >> 32, 32);
+  unsigned HOST_WIDE_INT loval = val & 0xffffffff;
+  unsigned HOST_WIDE_INT hival = (val & ~loval) >> 32;
   rtx hi = gen_reg_rtx (mode), lo = gen_reg_rtx (mode);
+  rtx x = gen_reg_rtx (mode);
+  bool eq_neg = (loval == hival) && ((loval & 0x80000000) != 0);
 
-  riscv_move_integer (lo, lo, loval, mode);
+  if (eq_neg)
+    riscv_move_integer (lo, lo, ~loval & 0xffffffff, mode);
+  else
+    riscv_move_integer (lo, lo, loval, mode);
 
   if (loval == hival)
       hi = gen_rtx_ASHIFT (mode, lo, GEN_INT (32));
@@ -1289,7 +1626,13 @@ riscv_split_integer (HOST_WIDE_INT val, machine_mode mode)
     }
 
   hi = force_reg (mode, hi);
-  return gen_rtx_PLUS (mode, hi, lo);
+  x = gen_rtx_PLUS (mode, hi, lo);
+  if (eq_neg)
+    {
+      x = force_reg (mode, x);
+      x = gen_rtx_XOR (mode, x, GEN_INT (-1));
+    }
+  return x;
 }
 
 /* Return true if X is a thread-local symbol.  */
@@ -1409,35 +1752,49 @@ static int riscv_symbol_insns (enum riscv_symbol_type type)
    Manual draft. For details, please see:
    https://github.com/riscv/riscv-isa-manual/releases/tag/isa-449cd0c  */
 
-static unsigned HOST_WIDE_INT fli_value_hf[32] =
-{
-  0xbcp8, 0x4p8, 0x1p8, 0x2p8, 0x1cp8, 0x20p8, 0x2cp8, 0x30p8,
-  0x34p8, 0x35p8, 0x36p8, 0x37p8, 0x38p8, 0x39p8, 0x3ap8, 0x3bp8,
-  0x3cp8, 0x3dp8, 0x3ep8, 0x3fp8, 0x40p8, 0x41p8, 0x42p8, 0x44p8,
-  0x48p8, 0x4cp8, 0x58p8, 0x5cp8, 0x78p8,
+static const unsigned HOST_WIDE_INT fli_value_hf[32] =
+{
+#define P8(v) ((unsigned HOST_WIDE_INT) (v) << 8)
+  P8(0xbc), P8(0x4), P8(0x1), P8(0x2),
+  P8(0x1c), P8(0x20), P8(0x2c), P8(0x30),
+  P8(0x34), P8(0x35), P8(0x36), P8(0x37),
+  P8(0x38), P8(0x39), P8(0x3a), P8(0x3b),
+  P8(0x3c), P8(0x3d), P8(0x3e), P8(0x3f),
+  P8(0x40), P8(0x41), P8(0x42), P8(0x44),
+  P8(0x48), P8(0x4c), P8(0x58), P8(0x5c),
+  P8(0x78),
   /* Only used for filling, ensuring that 29 and 30 of HF are the same.  */
-  0x78p8,
-  0x7cp8, 0x7ep8
+  P8(0x78),
+  P8(0x7c), P8(0x7e)
+#undef P8
 };
 
-static unsigned HOST_WIDE_INT fli_value_sf[32] =
-{
-  0xbf8p20, 0x008p20, 0x378p20, 0x380p20, 0x3b8p20, 0x3c0p20, 0x3d8p20, 0x3e0p20,
-  0x3e8p20, 0x3eap20, 0x3ecp20, 0x3eep20, 0x3f0p20, 0x3f2p20, 0x3f4p20, 0x3f6p20,
-  0x3f8p20, 0x3fap20, 0x3fcp20, 0x3fep20, 0x400p20, 0x402p20, 0x404p20, 0x408p20,
-  0x410p20, 0x418p20, 0x430p20, 0x438p20, 0x470p20, 0x478p20, 0x7f8p20, 0x7fcp20
+static const unsigned HOST_WIDE_INT fli_value_sf[32] =
+{
+#define P20(v) ((unsigned HOST_WIDE_INT) (v) << 20)
+  P20(0xbf8), P20(0x008), P20(0x378), P20(0x380),
+  P20(0x3b8), P20(0x3c0), P20(0x3d8), P20(0x3e0),
+  P20(0x3e8), P20(0x3ea), P20(0x3ec), P20(0x3ee),
+  P20(0x3f0), P20(0x3f2), P20(0x3f4), P20(0x3f6),
+  P20(0x3f8), P20(0x3fa), P20(0x3fc), P20(0x3fe),
+  P20(0x400), P20(0x402), P20(0x404), P20(0x408),
+  P20(0x410), P20(0x418), P20(0x430), P20(0x438),
+  P20(0x470), P20(0x478), P20(0x7f8), P20(0x7fc)
+#undef P20
 };
 
-static unsigned HOST_WIDE_INT fli_value_df[32] =
-{
-  0xbff0p48, 0x10p48, 0x3ef0p48, 0x3f00p48,
-  0x3f70p48, 0x3f80p48, 0x3fb0p48, 0x3fc0p48,
-  0x3fd0p48, 0x3fd4p48, 0x3fd8p48, 0x3fdcp48,
-  0x3fe0p48, 0x3fe4p48, 0x3fe8p48, 0x3fecp48,
-  0x3ff0p48, 0x3ff4p48, 0x3ff8p48, 0x3ffcp48,
-  0x4000p48, 0x4004p48, 0x4008p48, 0x4010p48,
-  0x4020p48, 0x4030p48, 0x4060p48, 0x4070p48,
-  0x40e0p48, 0x40f0p48, 0x7ff0p48, 0x7ff8p48
+static const unsigned HOST_WIDE_INT fli_value_df[32] =
+{
+#define P48(v) ((unsigned HOST_WIDE_INT) (v) << 48)
+  P48(0xbff0), P48(0x10), P48(0x3ef0), P48(0x3f00),
+  P48(0x3f70), P48(0x3f80), P48(0x3fb0), P48(0x3fc0),
+  P48(0x3fd0), P48(0x3fd4), P48(0x3fd8), P48(0x3fdc),
+  P48(0x3fe0), P48(0x3fe4), P48(0x3fe8), P48(0x3fec),
+  P48(0x3ff0), P48(0x3ff4), P48(0x3ff8), P48(0x3ffc),
+  P48(0x4000), P48(0x4004), P48(0x4008), P48(0x4010),
+  P48(0x4020), P48(0x4030), P48(0x4060), P48(0x4070),
+  P48(0x40e0), P48(0x40f0), P48(0x7ff0), P48(0x7ff8)
+#undef P48
 };
 
 /* Display floating-point values at the assembly level, which is consistent
@@ -1458,7 +1815,7 @@ const char *fli_value_print[32] =
 int
 riscv_float_const_rtx_index_for_fli (rtx x)
 {
-  unsigned HOST_WIDE_INT *fli_value_array;
+  const unsigned HOST_WIDE_INT *fli_value_array;
 
   machine_mode mode = GET_MODE (x);
 
@@ -2142,18 +2499,13 @@ riscv_const_insns (rtx x, bool allow_new_pseudos)
 	  ...etc.  */
 	if (riscv_v_ext_mode_p (GET_MODE (x)))
 	  {
-	    /* const series vector.  */
-	    rtx base, step;
-	    if (const_vec_series_p (x, &base, &step))
-	      {
-		/* This is not accurate, we will need to adapt the COST
-		 * accurately according to BASE && STEP.  */
-		return 1;
-	      }
-
 	    rtx elt;
 	    if (const_vec_duplicate_p (x, &elt))
 	      {
+		if (GET_MODE_CLASS (GET_MODE (x)) == MODE_VECTOR_BOOL)
+		  /* Duplicate values of 0/1 can be emitted using vmv.v.i.  */
+		  return 1;
+
 		/* We don't allow CONST_VECTOR for DI vector on RV32
 		   system since the ELT constant value can not held
 		   within a single register to disable reload a DI
@@ -2162,11 +2514,9 @@ riscv_const_insns (rtx x, bool allow_new_pseudos)
 		if (maybe_gt (GET_MODE_SIZE (smode), UNITS_PER_WORD)
 		    && !immediate_operand (elt, Pmode))
 		  return 0;
-		/* Constants from -16 to 15 can be loaded with vmv.v.i.
-		   The Wc0, Wc1 constraints are already covered by the
-		   vi constraint so we do not need to check them here
-		   separately.  */
-		if (satisfies_constraint_vi (x))
+		/* Constants in range -16 ~ 15 integer or 0.0 floating-point
+		   can be emitted using vmv.v.i.  */
+		if (valid_vec_immediate_p (x))
 		  return 1;
 
 		/* Any int/FP constants can always be broadcast from a
@@ -2186,6 +2536,52 @@ riscv_const_insns (rtx x, bool allow_new_pseudos)
 		      return 1 + 4; /*vmv.v.x + memory access.  */
 		  }
 	      }
+
+	    /* const series vector.  */
+	    rtx base, step;
+	    if (const_vec_series_p (x, &base, &step))
+	      {
+		/* This cost is not accurate, we will need to adapt the COST
+		   accurately according to BASE && STEP.  */
+		return 1;
+	      }
+
+	    if (CONST_VECTOR_STEPPED_P (x))
+	      {
+		/* Some cases are unhandled so we need construct a builder to
+		   detect/allow those cases to be handled by the fallthrough
+		   handler.  */
+		unsigned int nelts_per_pattern = CONST_VECTOR_NELTS_PER_PATTERN (x);
+		unsigned int npatterns = CONST_VECTOR_NPATTERNS (x);
+		rvv_builder builder (GET_MODE(x), npatterns, nelts_per_pattern);
+		for (unsigned int i = 0; i < nelts_per_pattern; i++)
+		  {
+		    for (unsigned int j = 0; j < npatterns; j++)
+		      builder.quick_push (CONST_VECTOR_ELT (x, i * npatterns + j));
+		  }
+		builder.finalize ();
+
+		if (builder.single_step_npatterns_p ())
+		  {
+		    if (builder.npatterns_all_equal_p ())
+		      {
+			/* TODO: This cost is not accurate.  */
+			return 1;
+		      }
+		    else
+		      {
+			/* TODO: This cost is not accurate.  */
+			return 1;
+		      }
+		  }
+		else if (builder.interleaved_stepped_npatterns_p ())
+		  {
+		    /* TODO: This cost is not accurate.  */
+		    return 1;
+		  }
+
+		/* Fallthrough.  */
+	      }
 	  }
 
 	/* TODO: We may support more const vector in the future.  */
@@ -2547,9 +2943,7 @@ riscv_call_tls_get_addr (rtx sym, rtx result)
 					 gen_int_mode (RISCV_CC_BASE, SImode)));
   RTL_CONST_CALL_P (insn) = 1;
   use_reg (&CALL_INSN_FUNCTION_USAGE (insn), a0);
-  insn = get_insns ();
-
-  end_sequence ();
+  insn = end_sequence ();
 
   return insn;
 }
@@ -2579,14 +2973,12 @@ riscv_legitimize_tls_address (rtx loc)
     case TLS_MODEL_GLOBAL_DYNAMIC:
       if (TARGET_TLSDESC)
 	{
-	  static unsigned seqno;
 	  tp = gen_rtx_REG (Pmode, THREAD_POINTER_REGNUM);
 	  a0 = gen_rtx_REG (Pmode, GP_ARG_FIRST);
 	  dest = gen_reg_rtx (Pmode);
 
-	  emit_insn (gen_tlsdesc (Pmode, loc, GEN_INT (seqno)));
+	  emit_insn (gen_tlsdesc (Pmode, loc));
 	  emit_insn (gen_add3_insn (dest, a0, tp));
-	  seqno++;
 	}
       else
 	{
@@ -2810,12 +3202,22 @@ riscv_move_integer (rtx temp, rtx dest, HOST_WIDE_INT value,
 	    }
 	  else if (codes[i].code == CONCAT || codes[i].code == VEC_MERGE)
 	    {
-	      rtx t = can_create_pseudo_p () ? gen_reg_rtx (mode) : temp;
-	      rtx t2 = codes[i].code == VEC_MERGE ? old_value : x;
-	      gcc_assert (t2);
-	      t2 = gen_lowpart (SImode, t2);
-	      emit_insn (gen_riscv_xpack_di_si_2 (t, x, GEN_INT (32), t2));
-	      x = t;
+	      if (codes[i].code == CONCAT && !TARGET_ZBKB)
+		{
+		  /* The two values should have no bits in common, so we can
+		     use PLUS instead of IOR which has a higher chance of
+		     using a compressed instruction.  */
+		  x = gen_rtx_PLUS (mode, x, old_value);
+		}
+	      else
+		{
+		  rtx t = can_create_pseudo_p () ? gen_reg_rtx (mode) : temp;
+		  rtx t2 = codes[i].code == VEC_MERGE ? old_value : x;
+		  gcc_assert (t2);
+		  t2 = gen_lowpart (SImode, t2);
+		  emit_insn (gen_riscv_xpack_di_si_2 (t, x, GEN_INT (32), t2));
+		  x = t;
+		}
 	    }
 	  else
 	    x = gen_rtx_fmt_ee (codes[i].code, mode,
@@ -3276,6 +3678,9 @@ riscv_legitimize_move (machine_mode mode, rtx dest, rtx src)
 	  nunits = nunits * 2;
 	}
 
+      /* This test can fail if (for example) we want a HF and Z[v]fh is
+	 not enabled.  In that case we just want to let the standard
+	 expansion path run.  */
       if (riscv_vector::get_vector_mode (smode, nunits).exists (&vmode))
 	{
 	  rtx v = gen_lowpart (vmode, SUBREG_REG (src));
@@ -3325,12 +3730,10 @@ riscv_legitimize_move (machine_mode mode, rtx dest, rtx src)
 	    emit_move_insn (dest, gen_lowpart (GET_MODE (dest), int_reg));
 	  else
 	    emit_move_insn (dest, int_reg);
+	  return true;
 	}
-      else
-	gcc_unreachable ();
-
-      return true;
     }
+
   /* Expand
        (set (reg:QI target) (mem:QI (address)))
      to
@@ -3402,7 +3805,7 @@ riscv_legitimize_move (machine_mode mode, rtx dest, rtx src)
       rtx mask = force_reg (word_mode, gen_int_mode (-65536, word_mode));
       rtx temp = gen_reg_rtx (word_mode);
       emit_insn (gen_extend_insn (temp,
-				  simplify_gen_subreg (HImode, src, mode, 0),
+				  gen_lowpart (HImode, src),
 				  word_mode, HImode, 1));
       if (word_mode == SImode)
 	emit_insn (gen_iorsi3 (temp, mask, temp));
@@ -3424,6 +3827,24 @@ riscv_legitimize_move (machine_mode mode, rtx dest, rtx src)
       return true;
     }
 
+  if (TARGET_ZILSD && riscv_2x_xlen_mode_p (mode)
+      && ((REG_P (dest) && MEM_P (src)) || (MEM_P (dest) && REG_P (src)))
+      && can_create_pseudo_p ())
+    {
+      rtx reg = REG_P (dest) ? dest : src;
+      unsigned regno = REGNO (reg);
+      /* ZILSD requires an even-odd register pair, let RA to
+	 fix the constraint if the reg is hard reg and not even reg.  */
+      if ((regno < FIRST_PSEUDO_REGISTER)
+	  && (regno % 2) != 0)
+	{
+	  rtx tmp = gen_reg_rtx (GET_MODE (reg));
+	  emit_move_insn (tmp, src);
+	  emit_move_insn (dest, tmp);
+	  return true;
+	}
+    }
+
   /* RISC-V GCC may generate non-legitimate address due to we provide some
      pattern for optimize access PIC local symbol and it's make GCC generate
      unrecognizable instruction during optimizing.  */
@@ -3494,7 +3915,7 @@ static int
 riscv_binary_cost (rtx x, int single_insns, int double_insns)
 {
   if (!riscv_v_ext_mode_p (GET_MODE (x))
-      && GET_MODE_SIZE (GET_MODE (x)).to_constant () == UNITS_PER_WORD * 2)
+      && riscv_2x_xlen_mode_p (GET_MODE (x)))
     return COSTS_N_INSNS (double_insns);
   return COSTS_N_INSNS (single_insns);
 }
@@ -3533,6 +3954,27 @@ riscv_extend_cost (rtx op, bool unsigned_p)
   return COSTS_N_INSNS (2);
 }
 
+/* Return the cost of the vector binary rtx like add, minus, mult.
+   The cost of scalar2vr_cost will be appended if there one of the
+   op comes from the VEC_DUPLICATE.  */
+
+static int
+get_vector_binary_rtx_cost (rtx x, int scalar2vr_cost)
+{
+  gcc_assert (riscv_v_ext_mode_p (GET_MODE (x)));
+
+  rtx op_0 = XEXP (x, 0);
+  rtx op_1 = XEXP (x, 1);
+
+  if (GET_CODE (op_0) == VEC_DUPLICATE
+      || GET_CODE (op_1) == VEC_DUPLICATE)
+    return (scalar2vr_cost + 1) * COSTS_N_INSNS (1);
+  else if (GET_CODE (op_0) == NEG && GET_CODE (op_1) == VEC_DUPLICATE)
+    return (scalar2vr_cost + 1) * COSTS_N_INSNS (1);
+  else
+    return COSTS_N_INSNS (1);
+}
+
 /* Implement TARGET_RTX_COSTS.  */
 
 #define SINGLE_SHIFT_COST 1
@@ -3545,7 +3987,75 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN
      Cost Model need to be well analyzed and supported in the future. */
   if (riscv_v_ext_mode_p (mode))
     {
-      *total = COSTS_N_INSNS (1);
+      int gr2vr_cost = get_gr2vr_cost ();
+      int fr2vr_cost = get_fr2vr_cost ();
+      int scalar2vr_cost = FLOAT_MODE_P (GET_MODE_INNER (mode))
+	? fr2vr_cost : gr2vr_cost;
+
+      switch (outer_code)
+	{
+	case SET:
+	  {
+	    switch (GET_CODE (x))
+	      {
+	      case VEC_DUPLICATE:
+		*total = gr2vr_cost * COSTS_N_INSNS (1);
+		break;
+	      case IF_THEN_ELSE:
+		{
+		  rtx op = XEXP (x, 1);
+
+		  switch (GET_CODE (op))
+		    {
+		    case DIV:
+		    case UDIV:
+		    case MOD:
+		    case UMOD:
+		    case US_PLUS:
+		    case US_MINUS:
+		    case SS_PLUS:
+		    case SS_MINUS:
+		      *total = get_vector_binary_rtx_cost (op, scalar2vr_cost);
+		      break;
+		    default:
+		      *total = COSTS_N_INSNS (1);
+		      break;
+		    }
+		}
+		break;
+	      case PLUS:
+	      case MINUS:
+	      case AND:
+	      case IOR:
+	      case XOR:
+	      case MULT:
+	      case SMAX:
+	      case UMAX:
+	      case SMIN:
+	      case UMIN:
+		{
+		  rtx op;
+		  rtx op_0 = XEXP (x, 0);
+		  rtx op_1 = XEXP (x, 1);
+
+		  if (GET_CODE (op = op_0) == MULT
+		      || GET_CODE (op = op_1) == MULT)
+		    *total = get_vector_binary_rtx_cost (op, scalar2vr_cost);
+		  else
+		    *total = get_vector_binary_rtx_cost (x, scalar2vr_cost);
+		}
+		break;
+	      default:
+		*total = COSTS_N_INSNS (1);
+		break;
+	      }
+	  }
+	  break;
+	default:
+	  *total = COSTS_N_INSNS (1);
+	  break;
+	}
+
       return true;
     }
 
@@ -3560,7 +4070,43 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN
       if (outer_code == INSN
 	  && register_operand (SET_DEST (x), GET_MODE (SET_DEST (x))))
 	{
-	  riscv_rtx_costs (SET_SRC (x), mode, outer_code, opno, total, speed);
+	  if (REG_P (SET_SRC (x)) && TARGET_DOUBLE_FLOAT && mode == DFmode)
+	    {
+	      *total = COSTS_N_INSNS (1);
+	      return true;
+	    }
+
+	  /* Register move for XLEN * 2.  */
+	  if (TARGET_ZILSD
+	      && register_operand (SET_SRC (x), GET_MODE (SET_SRC (x)))
+	      && riscv_2x_xlen_mode_p (mode))
+	    {
+	      /* We still need two instruction for move with ZILSD,
+		 but let minus one cost to let subreg split don't.
+		 TODO: Add riscv_tune_param for this.  */
+	      *total = COSTS_N_INSNS (2) - 1;
+	      return true;
+	    }
+
+	  /* Load for XLEN * 2.  */
+	  if (TARGET_ZILSD && MEM_P (SET_SRC (x))
+	      && riscv_2x_xlen_mode_p (mode))
+	    {
+	      /* TODO: Add riscv_tune_param for this.  */
+	      *total = COSTS_N_INSNS (1);
+	      return true;
+	    }
+
+	  riscv_rtx_costs (SET_SRC (x), mode, SET, opno, total, speed);
+	  return true;
+	}
+
+      /* Store for XLEN * 2.  */
+      if (TARGET_ZILSD && MEM_P (SET_DEST (x)) && REG_P (SET_SRC (x))
+	  && riscv_2x_xlen_mode_p (mode))
+	{
+	  /* TODO: Add riscv_tune_param for this.  */
+	  *total = COSTS_N_INSNS (1);
 	  return true;
 	}
 
@@ -3779,7 +4325,8 @@ riscv_rtx_costs (rtx x, machine_mode mode, int outer_code, int opno ATTRIBUTE_UN
       return false;
 
     case LO_SUM:
-      *total = set_src_cost (XEXP (x, 0), mode, speed);
+      *total = (set_src_cost (XEXP (x, 0), mode, speed)
+		+ set_src_cost (XEXP (x, 1), mode, speed));
       return true;
 
     case LT:
@@ -4159,11 +4706,27 @@ riscv_noce_conversion_profitable_p (rtx_insn *seq,
 	      riscv_if_info.original_cost += COSTS_N_INSNS (1);
 	      riscv_if_info.max_seq_cost += COSTS_N_INSNS (1);
 	    }
-	  last_dest = NULL_RTX;
+
 	  rtx dest = SET_DEST (x);
-	  if (COMPARISON_P (src)
-	      && REG_P (dest)
-	      && GET_MODE (dest) == SImode)
+
+	  /* Do something similar for the moves that are likely to
+	     turn into NOP moves by the time the register allocator is
+	     done.  We don't require src to be something set in this
+	     sequence, just a promoted SUBREG.  */
+	  if (REG_P (dest)
+	      && GET_MODE (dest) == SImode
+	      && SUBREG_P (src)
+	      && SUBREG_PROMOTED_VAR_P (src))
+	    {
+	      riscv_if_info.original_cost += COSTS_N_INSNS (1);
+	      riscv_if_info.max_seq_cost += COSTS_N_INSNS (1);
+	      if (last_dest)
+		last_dest = dest;
+	    }
+	  else
+	    last_dest = NULL_RTX;
+
+	  if (COMPARISON_P (src) && REG_P (dest))
 	    last_dest = dest;
 	}
       else
@@ -4202,6 +4765,19 @@ riscv_split_64bit_move_p (rtx dest, rtx src)
   if (TARGET_64BIT)
     return false;
 
+  /* Zilsd provides load/store with even-odd register pair. */
+  if (TARGET_ZILSD
+      && (((REG_P (dest) && MEM_P (src))
+	  || (MEM_P (dest) && REG_P (src)))))
+    {
+      rtx reg = REG_P (dest) ? dest : src;
+      unsigned regno = REGNO (reg);
+      /* GCC may still generating some load/store with odd-even reg pair
+	 because the ABI handling, but that's fine, just split that later.  */
+      if (GP_REG_P (regno))
+	return (regno < FIRST_PSEUDO_REGISTER) && ((regno % 2) != 0);
+    }
+
   /* There is no need to split if the FLI instruction in the `Zfa` extension can be used.  */
   if (satisfies_constraint_zfli (src))
     return false;
@@ -4845,13 +5421,31 @@ riscv_expand_int_scc (rtx target, enum rtx_code code, rtx op0, rtx op1, bool *in
   riscv_extend_comparands (code, &op0, &op1);
   op0 = force_reg (word_mode, op0);
 
+  /* For sub-word targets on rv64, do the computation in DImode
+     then extract the lowpart for the final target, marking it
+     as sign extended.  Note that it's also properly zero extended,
+     but it's probably more profitable to expose it as sign extended.  */
+  rtx t;
+  if (TARGET_64BIT && GET_MODE (target) == SImode)
+    t = gen_reg_rtx (DImode);
+  else
+    t = target;
+
   if (code == EQ || code == NE)
     {
       rtx zie = riscv_zero_if_equal (op0, op1);
-      riscv_emit_binary (code, target, zie, const0_rtx);
+      riscv_emit_binary (code, t, zie, const0_rtx);
     }
   else
-    riscv_emit_int_order_test (code, invert_ptr, target, op0, op1);
+    riscv_emit_int_order_test (code, invert_ptr, t, op0, op1);
+
+  if (t != target)
+    {
+      t = gen_lowpart (SImode, t);
+      SUBREG_PROMOTED_VAR_P (t) = 1;
+      SUBREG_PROMOTED_SET (t, SRP_SIGNED);
+      emit_move_insn (target, t);
+    }
 }
 
 /* Like riscv_expand_int_scc, but for floating-point comparisons.  */
@@ -4899,34 +5493,81 @@ riscv_expand_conditional_branch (rtx label, rtx_code code, rtx op0, rtx op1)
 bool
 riscv_expand_conditional_move (rtx dest, rtx op, rtx cons, rtx alt)
 {
-  machine_mode mode = GET_MODE (dest);
+  machine_mode dst_mode = GET_MODE (dest);
+  machine_mode cond_mode = GET_MODE (dest);
   rtx_code code = GET_CODE (op);
   rtx op0 = XEXP (op, 0);
   rtx op1 = XEXP (op, 1);
 
+  /* General note.  This is called from the conditional move
+     expander.  That simplifies the cases we need to worry about
+     as we know the destination will have the same mode as the
+     true/false arms.  Furthermore we know that mode will be
+     DI/SI for rv64 or SI for rv32.  */
+
+  /* For some tests, we can easily construct a 0, -1 value
+     which can then be used to synthesize more efficient
+     sequences that don't use zicond.  */
+  if ((code == LT || code == GE)
+      && (REG_P (op0) || SUBREG_P (op0))
+      && op1 == CONST0_RTX (GET_MODE (op0)))
+    {
+      /* The code to expand signed division by a power of 2 uses a
+	 conditional add by 2^n-1 idiom.  It can be more efficiently
+	 synthesized without zicond using srai+srli+add.
+
+	 But we don't see the constants here.  Just a conditional move
+	 with registers as the true/false values.  So this is a little
+	 over-aggressive and can result in a few missed if-conversions.  */
+      if ((REG_P (cons) || SUBREG_P (cons))
+	  && (REG_P (alt) || SUBREG_P (alt)))
+	return false;
+
+      /* If one value is a nonzero constant and the other value is
+	 not a constant, then avoid zicond as more efficient sequences
+	 using the splatted sign bit are often possible.  */
+      if (CONST_INT_P (alt)
+	  && alt != CONST0_RTX (dst_mode)
+	  && !CONST_INT_P (cons))
+	return false;
+
+      if (CONST_INT_P (cons)
+	  && cons != CONST0_RTX (dst_mode)
+	  && !CONST_INT_P (alt))
+	return false;
+
+      /* If we need more special cases, add them here.  */
+    }
+
   if (((TARGET_ZICOND_LIKE
-	|| (arith_operand (cons, mode) && arith_operand (alt, mode)))
-       && (GET_MODE_CLASS (mode) == MODE_INT))
+	|| (arith_operand (cons, dst_mode) && arith_operand (alt, dst_mode)))
+       && GET_MODE_CLASS (dst_mode) == MODE_INT
+       && GET_MODE_CLASS (cond_mode) == MODE_INT)
       || TARGET_SFB_ALU || TARGET_XTHEADCONDMOV)
     {
       machine_mode mode0 = GET_MODE (op0);
       machine_mode mode1 = GET_MODE (op1);
 
-      /* An integer comparison must be comparing WORD_MODE objects.  We
-	 must enforce that so that we don't strip away a sign_extension
-	 thinking it is unnecessary.  We might consider using
-	 riscv_extend_operands if they are not already properly extended.  */
+      /* An integer comparison must be comparing WORD_MODE objects.
+	 Extend the comparison arguments as necessary.  */
       if ((INTEGRAL_MODE_P (mode0) && mode0 != word_mode)
 	  || (INTEGRAL_MODE_P (mode1) && mode1 != word_mode))
-	return false;
+	riscv_extend_comparands (code, &op0, &op1);
 
-      /* In the fallback generic case use MODE rather than WORD_MODE for
-	 the output of the SCC instruction, to match the mode of the NEG
+      /* We might have been handed back a SUBREG.  Just to make things
+	 easy, force it into a REG.  */
+      if (!REG_P (op0) && !CONST_INT_P (op0))
+	op0 = force_reg (word_mode, op0);
+      if (!REG_P (op1) && !CONST_INT_P (op1))
+	op1 = force_reg (word_mode, op1);
+
+      /* In the fallback generic case use DST_MODE rather than WORD_MODE
+	 for the output of the SCC instruction, to match the mode of the NEG
 	 operation below.  The output of SCC is 0 or 1 boolean, so it is
 	 valid for input in any scalar integer mode.  */
       rtx tmp = gen_reg_rtx ((TARGET_ZICOND_LIKE
 			      || TARGET_SFB_ALU || TARGET_XTHEADCONDMOV)
-			     ? word_mode : mode);
+			     ? word_mode : dst_mode);
       bool invert = false;
 
       /* Canonicalize the comparison.  It must be an equality comparison
@@ -4955,7 +5596,7 @@ riscv_expand_conditional_move (rtx dest, rtx op, rtx cons, rtx alt)
 	  else
 	    return false;
 
-	  op = gen_rtx_fmt_ee (invert ? EQ : NE, mode, tmp, const0_rtx);
+	  op = gen_rtx_fmt_ee (invert ? EQ : NE, cond_mode, tmp, const0_rtx);
 
 	  /* We've generated a new comparison.  Update the local variables.  */
 	  code = GET_CODE (op);
@@ -4974,10 +5615,10 @@ riscv_expand_conditional_move (rtx dest, rtx op, rtx cons, rtx alt)
 	     arm of the conditional move.  That allows us to support more
 	     cases for extensions which are more general than SFB.  But
 	     does mean we need to force CONS into a register at this point.  */
-	  cons = force_reg (mode, cons);
+	  cons = force_reg (dst_mode, cons);
 	  /* With XTheadCondMov we need to force ALT into a register too.  */
-	  alt = force_reg (mode, alt);
-	  emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cond,
+	  alt = force_reg (dst_mode, alt);
+	  emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (dst_mode, cond,
 							      cons, alt)));
 	  return true;
 	}
@@ -4986,10 +5627,10 @@ riscv_expand_conditional_move (rtx dest, rtx op, rtx cons, rtx alt)
 	  if (invert)
 	    std::swap (cons, alt);
 
-	  rtx reg1 = gen_reg_rtx (mode);
-	  rtx reg2 = gen_reg_rtx (mode);
-	  rtx reg3 = gen_reg_rtx (mode);
-	  rtx reg4 = gen_reg_rtx (mode);
+	  rtx reg1 = gen_reg_rtx (dst_mode);
+	  rtx reg2 = gen_reg_rtx (dst_mode);
+	  rtx reg3 = gen_reg_rtx (dst_mode);
+	  rtx reg4 = gen_reg_rtx (dst_mode);
 
 	  riscv_emit_unary (NEG, reg1, tmp);
 	  riscv_emit_binary (AND, reg2, reg1, cons);
@@ -4999,48 +5640,52 @@ riscv_expand_conditional_move (rtx dest, rtx op, rtx cons, rtx alt)
 	  return true;
 	}
       /* 0, reg or 0, imm */
-      else if (cons == CONST0_RTX (mode)
-	       && (REG_P (alt)
-		   || (CONST_INT_P (alt) && alt != CONST0_RTX (mode))))
+      else if (cons == CONST0_RTX (dst_mode)
+	       && ((REG_P (alt) || SUBREG_P (alt))
+		   || (CONST_INT_P (alt) && alt != CONST0_RTX (dst_mode))))
 	{
 	  riscv_emit_int_compare (&code, &op0, &op1, true);
 	  rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
-	  alt = force_reg (mode, alt);
+	  alt = force_reg (dst_mode, alt);
 	  emit_insn (gen_rtx_SET (dest,
-				  gen_rtx_IF_THEN_ELSE (mode, cond,
+				  gen_rtx_IF_THEN_ELSE (dst_mode, cond,
 							cons, alt)));
 	  return true;
 	}
       /* imm, imm */
-      else if (CONST_INT_P (cons) && cons != CONST0_RTX (mode)
-	       && CONST_INT_P (alt) && alt != CONST0_RTX (mode))
+      else if (CONST_INT_P (cons) && cons != CONST0_RTX (dst_mode)
+	       && CONST_INT_P (alt) && alt != CONST0_RTX (dst_mode))
 	{
 	  riscv_emit_int_compare (&code, &op0, &op1, true);
 	  rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
 	  HOST_WIDE_INT t = INTVAL (alt) - INTVAL (cons);
-	  alt = force_reg (mode, gen_int_mode (t, mode));
+	  alt = force_reg (dst_mode, gen_int_mode (t, dst_mode));
 	  emit_insn (gen_rtx_SET (dest,
-				  gen_rtx_IF_THEN_ELSE (mode, cond,
-							CONST0_RTX (mode),
+				  gen_rtx_IF_THEN_ELSE (dst_mode, cond,
+							CONST0_RTX (dst_mode),
 							alt)));
 	  /* CONS might not fit into a signed 12 bit immediate suitable
 	     for an addi instruction.  If that's the case, force it
 	     into a register.  */
 	  if (!SMALL_OPERAND (INTVAL (cons)))
-	    cons = force_reg (mode, cons);
+	    cons = force_reg (dst_mode, cons);
 	  riscv_emit_binary (PLUS, dest, dest, cons);
 	  return true;
 	}
       /* imm, reg  */
-      else if (CONST_INT_P (cons) && cons != CONST0_RTX (mode) && REG_P (alt))
+      else if (CONST_INT_P (cons)
+	       && cons != CONST0_RTX (dst_mode)
+	       && (REG_P (alt) || SUBREG_P (alt)))
 	{
 	  /* Optimize for register value of 0.  */
-	  if (code == NE && rtx_equal_p (op0, alt) && op1 == CONST0_RTX (mode))
+	  if (code == NE
+	      && rtx_equal_p (op0, alt)
+	      && op1 == CONST0_RTX (dst_mode))
 	    {
 	      rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
-	      cons = force_reg (mode, cons);
+	      cons = force_reg (dst_mode, cons);
 	      emit_insn (gen_rtx_SET (dest,
-				      gen_rtx_IF_THEN_ELSE (mode, cond,
+				      gen_rtx_IF_THEN_ELSE (dst_mode, cond,
 							    cons, alt)));
 	      return true;
 	    }
@@ -5048,47 +5693,51 @@ riscv_expand_conditional_move (rtx dest, rtx op, rtx cons, rtx alt)
 	  riscv_emit_int_compare (&code, &op0, &op1, true);
 	  rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
 
-	  rtx temp1 = gen_reg_rtx (mode);
-	  rtx temp2 = gen_int_mode (-1 * INTVAL (cons), mode);
+	  rtx temp1 = gen_reg_rtx (dst_mode);
+	  rtx temp2 = gen_int_mode (-1 * INTVAL (cons), dst_mode);
 
 	  /* TEMP2 and/or CONS might not fit into a signed 12 bit immediate
 	     suitable for an addi instruction.  If that's the case, force it
 	     into a register.  */
 	  if (!SMALL_OPERAND (INTVAL (temp2)))
-	    temp2 = force_reg (mode, temp2);
+	    temp2 = force_reg (dst_mode, temp2);
 	  if (!SMALL_OPERAND (INTVAL (cons)))
-	    cons = force_reg (mode, cons);
+	    cons = force_reg (dst_mode, cons);
 
 	  riscv_emit_binary (PLUS, temp1, alt, temp2);
 	  emit_insn (gen_rtx_SET (dest,
-				  gen_rtx_IF_THEN_ELSE (mode, cond,
-							CONST0_RTX (mode),
+				  gen_rtx_IF_THEN_ELSE (dst_mode, cond,
+							CONST0_RTX (dst_mode),
 							temp1)));
 	  riscv_emit_binary (PLUS, dest, dest, cons);
 	  return true;
 	}
       /* reg, 0 or imm, 0  */
-      else if ((REG_P (cons)
-		|| (CONST_INT_P (cons) && cons != CONST0_RTX (mode)))
-	       && alt == CONST0_RTX (mode))
+      else if (((REG_P (cons) || SUBREG_P (cons))
+		|| (CONST_INT_P (cons) && cons != CONST0_RTX (dst_mode)))
+	       && alt == CONST0_RTX (dst_mode))
 	{
 	  riscv_emit_int_compare (&code, &op0, &op1, true);
 	  rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
-	  cons = force_reg (mode, cons);
-	  emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (mode, cond,
+	  cons = force_reg (dst_mode, cons);
+	  emit_insn (gen_rtx_SET (dest, gen_rtx_IF_THEN_ELSE (dst_mode, cond,
 							      cons, alt)));
 	  return true;
 	}
       /* reg, imm  */
-      else if (REG_P (cons) && CONST_INT_P (alt) && alt != CONST0_RTX (mode))
+      else if ((REG_P (cons) || (SUBREG_P (cons)))
+	       && CONST_INT_P (alt)
+	       && alt != CONST0_RTX (dst_mode))
 	{
 	  /* Optimize for register value of 0.  */
-	  if (code == EQ && rtx_equal_p (op0, cons) && op1 == CONST0_RTX (mode))
+	  if (code == EQ
+	      && rtx_equal_p (op0, cons)
+	      && op1 == CONST0_RTX (dst_mode))
 	    {
 	      rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
-	      alt = force_reg (mode, alt);
+	      alt = force_reg (dst_mode, alt);
 	      emit_insn (gen_rtx_SET (dest,
-				      gen_rtx_IF_THEN_ELSE (mode, cond,
+				      gen_rtx_IF_THEN_ELSE (dst_mode, cond,
 							    cons, alt)));
 	      return true;
 	    }
@@ -5096,53 +5745,54 @@ riscv_expand_conditional_move (rtx dest, rtx op, rtx cons, rtx alt)
 	  riscv_emit_int_compare (&code, &op0, &op1, true);
 	  rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
 
-	  rtx temp1 = gen_reg_rtx (mode);
-	  rtx temp2 = gen_int_mode (-1 * INTVAL (alt), mode);
+	  rtx temp1 = gen_reg_rtx (dst_mode);
+	  rtx temp2 = gen_int_mode (-1 * INTVAL (alt), dst_mode);
 
 	  /* TEMP2 and/or ALT might not fit into a signed 12 bit immediate
 	     suitable for an addi instruction.  If that's the case, force it
 	     into a register.  */
 	  if (!SMALL_OPERAND (INTVAL (temp2)))
-	    temp2 = force_reg (mode, temp2);
+	    temp2 = force_reg (dst_mode, temp2);
 	  if (!SMALL_OPERAND (INTVAL (alt)))
-	    alt = force_reg (mode, alt);
+	    alt = force_reg (dst_mode, alt);
 
 	  riscv_emit_binary (PLUS, temp1, cons, temp2);
 	  emit_insn (gen_rtx_SET (dest,
-				  gen_rtx_IF_THEN_ELSE (mode, cond,
+				  gen_rtx_IF_THEN_ELSE (dst_mode, cond,
 							temp1,
-							CONST0_RTX (mode))));
+							CONST0_RTX (dst_mode))));
 	  riscv_emit_binary (PLUS, dest, dest, alt);
 	  return true;
 	}
       /* reg, reg  */
-      else if (REG_P (cons) && REG_P (alt))
+      else if ((REG_P (cons) || SUBREG_P (cons))
+	       && (REG_P (alt) || SUBREG_P (alt)))
 	{
 	  if (((code == EQ && rtx_equal_p (cons, op0))
 	       || (code == NE && rtx_equal_p (alt, op0)))
-	      && op1 == CONST0_RTX (mode))
+	      && op1 == CONST0_RTX (dst_mode))
 	    {
 	      rtx cond = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
-	      alt = force_reg (mode, alt);
+	      alt = force_reg (dst_mode, alt);
 	      emit_insn (gen_rtx_SET (dest,
-				      gen_rtx_IF_THEN_ELSE (mode, cond,
+				      gen_rtx_IF_THEN_ELSE (dst_mode, cond,
 							    cons, alt)));
 	      return true;
 	    }
 
-	  rtx reg1 = gen_reg_rtx (mode);
-	  rtx reg2 = gen_reg_rtx (mode);
+	  rtx reg1 = gen_reg_rtx (dst_mode);
+	  rtx reg2 = gen_reg_rtx (dst_mode);
 	  riscv_emit_int_compare (&code, &op0, &op1, true);
 	  rtx cond1 = gen_rtx_fmt_ee (code, GET_MODE (op0), op0, op1);
 	  rtx cond2 = gen_rtx_fmt_ee (code == NE ? EQ : NE,
 				      GET_MODE (op0), op0, op1);
 	  emit_insn (gen_rtx_SET (reg2,
-				  gen_rtx_IF_THEN_ELSE (mode, cond2,
-							CONST0_RTX (mode),
+				  gen_rtx_IF_THEN_ELSE (dst_mode, cond2,
+							CONST0_RTX (dst_mode),
 							cons)));
 	  emit_insn (gen_rtx_SET (reg1,
-				  gen_rtx_IF_THEN_ELSE (mode, cond1,
-							CONST0_RTX (mode),
+				  gen_rtx_IF_THEN_ELSE (dst_mode, cond1,
+							CONST0_RTX (dst_mode),
 							alt)));
 	  riscv_emit_binary (PLUS, dest, reg1, reg2);
 	  return true;
@@ -6125,9 +6775,13 @@ riscv_fntype_abi (const_tree fntype)
   /* Implement the vector calling convention.  For more details please
      reference the below link.
      https://github.com/riscv-non-isa/riscv-elf-psabi-doc/pull/389  */
-  if (riscv_return_value_is_vector_type_p (fntype)
-	  || riscv_arguments_is_vector_type_p (fntype)
-	  || riscv_vector_cc_function_p (fntype))
+  bool validate_v_abi_p = false;
+
+  validate_v_abi_p |= riscv_return_value_is_vector_type_p (fntype);
+  validate_v_abi_p |= riscv_arguments_is_vector_type_p (fntype);
+  validate_v_abi_p |= riscv_vector_cc_function_p (fntype);
+
+  if (validate_v_abi_p)
     return riscv_v_abi ();
 
   return default_function_abi;
@@ -6328,8 +6982,20 @@ riscv_legitimize_call_address (rtx addr)
     {
       rtx reg = RISCV_CALL_ADDRESS_TEMP (Pmode);
       riscv_emit_move (reg, addr);
+
+      if (is_zicfilp_p ())
+	{
+	  rtx sw_guarded = RISCV_CALL_ADDRESS_LPAD (Pmode);
+	  emit_insn (gen_set_guarded (Pmode, reg));
+	  return sw_guarded;
+	}
+
       return reg;
     }
+
+  if (is_zicfilp_p () && REG_P (addr))
+    emit_insn (gen_set_lpl (Pmode, const0_rtx));
+
   return addr;
 }
 
@@ -6373,8 +7039,8 @@ riscv_union_memmodels (enum memmodel model1, enum memmodel model2)
   model1 = memmodel_base (model1);
   model2 = memmodel_base (model2);
 
-  enum memmodel weaker = model1 <= model2 ? model1: model2;
-  enum memmodel stronger = model1 > model2 ? model1: model2;
+  enum memmodel weaker = model1 <= model2 ? model1 : model2;
+  enum memmodel stronger = model1 > model2 ? model1 : model2;
 
   switch (stronger)
     {
@@ -6492,7 +7158,7 @@ riscv_asm_output_opcode (FILE *asm_out_file, const char *p)
 	  any outermost HIGH.
    'R'	Print the low-part relocation associated with OP.
    'C'	Print the integer branch condition for comparison OP.
-   'N'	Print the inverse of the integer branch condition for comparison OP.
+   'r'	Print the inverse of the integer branch condition for comparison OP.
    'A'	Print the atomic operation suffix for memory model OP.
    'I'	Print the LR suffix for memory model OP.
    'J'	Print the SC suffix for memory model OP.
@@ -6502,6 +7168,8 @@ riscv_asm_output_opcode (FILE *asm_out_file, const char *p)
    'S'	Print shift-index of single-bit mask OP.
    'T'	Print shift-index of inverted single-bit mask OP.
    '~'	Print w if TARGET_64BIT is true; otherwise not print anything.
+   'N'  Print register encoding as integer (0-31).
+   'H'  Print the name of the next register for integer.
 
    Note please keep this list and the list in riscv.md in sync.  */
 
@@ -6650,67 +7318,82 @@ riscv_print_operand (FILE *file, rtx op, int letter)
       fputs (GET_RTX_NAME (code), file);
       break;
 
-    case 'N':
+    case 'r':
       /* The RTL names match the instruction names. */
       fputs (GET_RTX_NAME (reverse_condition (code)), file);
       break;
 
-    case 'A': {
-      const enum memmodel model = memmodel_base (INTVAL (op));
-      if (riscv_memmodel_needs_amo_acquire (model)
-	  && riscv_memmodel_needs_amo_release (model))
-	fputs (".aqrl", file);
-      else if (riscv_memmodel_needs_amo_acquire (model))
-	fputs (".aq", file);
-      else if (riscv_memmodel_needs_amo_release (model))
-	fputs (".rl", file);
+    case 'A':
+      if (!CONST_INT_P (op))
+	output_operand_lossage ("invalid operand for '%%%c'", letter);
+      else
+	{
+	  const enum memmodel model = memmodel_base (INTVAL (op));
+	  if (riscv_memmodel_needs_amo_acquire (model)
+	      && riscv_memmodel_needs_amo_release (model))
+	    fputs (".aqrl", file);
+	  else if (riscv_memmodel_needs_amo_acquire (model))
+	    fputs (".aq", file);
+	  else if (riscv_memmodel_needs_amo_release (model))
+	    fputs (".rl", file);
+	}
       break;
-    }
 
-    case 'I': {
-      const enum memmodel model = memmodel_base (INTVAL (op));
-      if (TARGET_ZTSO && model != MEMMODEL_SEQ_CST)
-	/* LR ops only have an annotation for SEQ_CST in the Ztso mapping.  */
-	break;
-      else if (model == MEMMODEL_SEQ_CST)
-	fputs (".aqrl", file);
-      else if (riscv_memmodel_needs_amo_acquire (model))
-	fputs (".aq", file);
+    case 'I':
+      if (!CONST_INT_P (op))
+	output_operand_lossage ("invalid operand for '%%%c'", letter);
+      else
+	{
+	  const enum memmodel model = memmodel_base (INTVAL (op));
+	  if (TARGET_ZTSO && model != MEMMODEL_SEQ_CST)
+	    /* LR ops only have an annotation for SEQ_CST in the Ztso mapping.  */
+	    break;
+	  else if (model == MEMMODEL_SEQ_CST)
+	    fputs (".aqrl", file);
+	  else if (riscv_memmodel_needs_amo_acquire (model))
+	    fputs (".aq", file);
+	}
       break;
-    }
 
-    case 'J': {
-      const enum memmodel model = memmodel_base (INTVAL (op));
-      if (TARGET_ZTSO && model == MEMMODEL_SEQ_CST)
-	/* SC ops only have an annotation for SEQ_CST in the Ztso mapping.  */
-	fputs (".rl", file);
-      else if (TARGET_ZTSO)
-	break;
-      else if (riscv_memmodel_needs_amo_release (model))
-	fputs (".rl", file);
+    case 'J':
+      if (!CONST_INT_P (op))
+	output_operand_lossage ("invalid operand for '%%%c'", letter);
+      else
+	{
+	  const enum memmodel model = memmodel_base (INTVAL (op));
+	  if (TARGET_ZTSO && model == MEMMODEL_SEQ_CST)
+	    /* SC ops only have an annotation for SEQ_CST in the Ztso mapping.  */
+	    fputs (".rl", file);
+	  else if (TARGET_ZTSO)
+	    break;
+	  else if (riscv_memmodel_needs_amo_release (model))
+	    fputs (".rl", file);
+	}
       break;
-    }
 
     case 'L':
-      {
-	const char *ntl_hint = NULL;
-	switch (INTVAL (op))
-	  {
-	  case 0:
-	    ntl_hint = "ntl.all";
-	    break;
-	  case 1:
-	    ntl_hint = "ntl.pall";
-	    break;
-	  case 2:
-	    ntl_hint = "ntl.p1";
-	    break;
-	  }
+      if (!CONST_INT_P (op))
+	output_operand_lossage ("invalid operand for '%%%c'", letter);
+      else
+	{
+	  const char *ntl_hint = NULL;
+	  switch (INTVAL (op))
+	    {
+	    case 0:
+	      ntl_hint = "ntl.all";
+	      break;
+	    case 1:
+	      ntl_hint = "ntl.pall";
+	      break;
+	    case 2:
+	      ntl_hint = "ntl.p1";
+	      break;
+	    }
 
-      if (ntl_hint)
-	asm_fprintf (file, "%s\n\t", ntl_hint);
+	  if (ntl_hint)
+	    asm_fprintf (file, "%s\n\t", ntl_hint);
+	}
       break;
-      }
 
     case 'i':
       if (code != REG)
@@ -6722,30 +7405,85 @@ riscv_print_operand (FILE *file, rtx op, int letter)
       break;
 
     case 'S':
-      {
-	rtx newop = GEN_INT (ctz_hwi (INTVAL (op)));
-	output_addr_const (file, newop);
-	break;
-      }
+      if (!CONST_INT_P (op))
+	output_operand_lossage ("invalid operand for '%%%c'", letter);
+      else
+	{
+	  rtx newop = GEN_INT (ctz_hwi (INTVAL (op)));
+	  output_addr_const (file, newop);
+	}
+      break;
     case 'T':
-      {
-	rtx newop = GEN_INT (ctz_hwi (~INTVAL (op)));
-	output_addr_const (file, newop);
-	break;
-      }
+      if (!CONST_INT_P (op))
+	output_operand_lossage ("invalid operand for '%%%c'", letter);
+      else
+	{
+	  rtx newop = GEN_INT (ctz_hwi (~INTVAL (op)));
+	  output_addr_const (file, newop);
+	}
+      break;
     case 'X':
+      if (!CONST_INT_P (op))
+	output_operand_lossage ("invalid operand for '%%%c'", letter);
+      else
+	{
+	  int ival = INTVAL (op) + 1;
+	  rtx newop = GEN_INT (ctz_hwi (ival) + 1);
+	  output_addr_const (file, newop);
+	}
+      break;
+    case 'Y':
+      if (!CONST_INT_P (op))
+	output_operand_lossage ("invalid operand for '%%%c'", letter);
+      else
+	{
+	  unsigned int imm = (UINTVAL (op) & 63);
+	  gcc_assert (imm <= 63);
+	  rtx newop = GEN_INT (imm);
+	  output_addr_const (file, newop);
+	}
+      break;
+    case 'N':
       {
-	int ival = INTVAL (op) + 1;
-	rtx newop = GEN_INT (ctz_hwi (ival) + 1);
-	output_addr_const (file, newop);
+	if (!REG_P(op))
+	  {
+	    output_operand_lossage ("modifier 'N' require register operand");
+	    break;
+	  }
+
+	unsigned regno = REGNO (op);
+	unsigned offset = 0;
+	if (IN_RANGE (regno, GP_REG_FIRST, GP_REG_LAST))
+	  offset = GP_REG_FIRST;
+	else if (IN_RANGE (regno, FP_REG_FIRST, FP_REG_LAST))
+	  offset = FP_REG_FIRST;
+	else if (IN_RANGE (regno, V_REG_FIRST, V_REG_LAST))
+	  offset = V_REG_FIRST;
+	else
+	  output_operand_lossage ("invalid register number for 'N' modifier");
+
+	asm_fprintf (file, "%u", (regno - offset));
 	break;
       }
-    case 'Y':
+    case 'H':
       {
-	unsigned int imm = (UINTVAL (op) & 63);
-	gcc_assert (imm <= 63);
-	rtx newop = GEN_INT (imm);
-	output_addr_const (file, newop);
+	if (!REG_P (op))
+	  {
+	    output_operand_lossage ("modifier 'H' require register operand");
+	    break;
+	  }
+	if (REGNO (op) > 31)
+	  {
+	    output_operand_lossage ("modifier 'H' is for integer registers only");
+	    break;
+	  }
+	if (REGNO (op) == 31)
+	  {
+	    output_operand_lossage ("modifier 'H' cannot be applied to R31");
+	    break;
+	  }
+
+	fputs (reg_names[REGNO (op) + 1], file);
 	break;
       }
     default:
@@ -7093,6 +7831,9 @@ riscv_save_reg_p (unsigned int regno)
       if (regno == GP_REGNUM || regno == THREAD_POINTER_REGNUM)
 	return false;
 
+      if (regno == RETURN_ADDR_REGNUM && is_zicfiss_p ())
+	return true;
+
       /* We must save every register used in this function.  If this is not a
 	 leaf function, then we must save all temporary registers.  */
       if (df_regs_ever_live_p (regno)
@@ -7405,6 +8146,71 @@ riscv_compute_frame_info (void)
   /* Next points the incoming stack pointer and any incoming arguments. */
 }
 
+/* Implement TARGET_CAN_INLINE_P.  Determine whether inlining the function
+   CALLER into the function CALLEE is safe.  Inlining should be rejected if
+   there is no always_inline attribute and the target options differ except
+   for differences in ISA extensions or performance tuning options like the
+   code model, TLS dialect, and stack protector, etc.  Inlining is
+   permissible when the non-ISA extension options are identical and the ISA
+   extensions of CALLEE are a subset of those of CALLER, thereby improving
+   the performance of Function Multi-Versioning.  */
+
+static bool
+riscv_can_inline_p (tree caller, tree callee)
+{
+  /* Do not inline when callee is versioned but caller is not.  */
+  if (DECL_FUNCTION_VERSIONED (callee) && ! DECL_FUNCTION_VERSIONED (caller))
+    return false;
+
+  tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
+  tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
+
+  /* It's safe to inline if callee has no opts.  */
+  if (! callee_tree)
+    return true;
+
+  if (! caller_tree)
+    caller_tree = target_option_default_node;
+
+  struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
+  struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
+
+  /* Callee and caller should have the same target options.  */
+  int callee_target_flags = callee_opts->x_target_flags;
+  int caller_target_flags = caller_opts->x_target_flags;
+
+  if (callee_target_flags != caller_target_flags)
+    return false;
+
+  /* Callee's ISA should be a subset of the caller's ISA.  */
+  if (! riscv_ext_is_subset (caller_opts, callee_opts))
+    return false;
+
+  /* If the callee has always_inline set, we can ignore the rest attributes.  */
+  if (lookup_attribute ("always_inline", DECL_ATTRIBUTES (callee)))
+    return true;
+
+  if (caller_opts->x_riscv_cmodel != callee_opts->x_riscv_cmodel)
+    return false;
+
+  if (caller_opts->x_riscv_tls_dialect != callee_opts->x_riscv_tls_dialect)
+    return false;
+
+  if (caller_opts->x_riscv_stack_protector_guard_reg
+      != callee_opts->x_riscv_stack_protector_guard_reg)
+    return false;
+
+  if (caller_opts->x_riscv_stack_protector_guard_offset
+      != callee_opts->x_riscv_stack_protector_guard_offset)
+    return false;
+
+  if (caller_opts->x_rvv_vector_strict_align
+      != callee_opts->x_rvv_vector_strict_align)
+    return false;
+
+  return true;
+}
+
 /* Make sure that we're not trying to eliminate to the wrong hard frame
    pointer.  */
 
@@ -7579,7 +8385,7 @@ riscv_is_eh_return_data_register (unsigned int regno)
 
 static void
 riscv_for_each_saved_reg (poly_int64 sp_offset, riscv_save_restore_fn fn,
-			  bool epilogue, bool maybe_eh_return)
+			  bool epilogue, bool maybe_eh_return, bool sibcall_p)
 {
   HOST_WIDE_INT offset, first_fp_offset;
   unsigned int regno, num_masked_fp = 0;
@@ -7665,7 +8471,14 @@ riscv_for_each_saved_reg (poly_int64 sp_offset, riscv_save_restore_fn fn,
 	    }
 	}
 
-      riscv_save_restore_reg (word_mode, regno, offset, fn);
+      if (need_shadow_stack_push_pop_p () && epilogue && !sibcall_p
+	  && !(maybe_eh_return && crtl->calls_eh_return)
+	  && (regno == RETURN_ADDR_REGNUM)
+	  && !cfun->machine->interrupt_handler_p)
+	riscv_save_restore_reg (word_mode, RISCV_PROLOGUE_TEMP_REGNUM,
+				offset, fn);
+      else
+	riscv_save_restore_reg (word_mode, regno, offset, fn);
     }
 
   /* This loop must iterate over the same space as its companion in
@@ -7867,8 +8680,11 @@ riscv_adjust_multi_push_cfi_prologue (int saved_size)
   int offset;
   int saved_cnt = 0;
 
-  if (mask & S10_MASK)
-    mask |= S11_MASK;
+  unsigned int num_multi_push = riscv_multi_push_regs_count (mask);
+  for (unsigned int i = 0; i < num_multi_push; i++) {
+    gcc_assert(zcmp_save_reg_order[i] != INVALID_REGNUM);
+    mask |= 1 << (zcmp_save_reg_order[i] - GP_REG_FIRST);
+  }
 
   for (int regno = GP_REG_LAST; regno >= GP_REG_FIRST; regno--)
     if (BITSET_P (mask & MULTI_PUSH_GPR_MASK, regno - GP_REG_FIRST))
@@ -8180,12 +8996,20 @@ riscv_allocate_and_probe_stack_space (rtx temp1, HOST_WIDE_INT size)
       temp2 = riscv_force_temporary (temp2, gen_int_mode (rounded_size, Pmode));
       insn = emit_insn (gen_sub3_insn (temp2, stack_pointer_rtx, temp2));
 
+      /* The size does not represent actual stack pointer address shift
+	 from the top of the frame, as it might be lowered before.
+	 To consider the correct SP addresses for the CFA notes, it is needed
+	 to correct them with the initial offset value.  */
+      HOST_WIDE_INT initial_cfa_offset
+	= cfun->machine->frame.total_size.to_constant () - size;
+
       if (!frame_pointer_needed)
 	{
 	  /* We want the CFA independent of the stack pointer for the
 	     duration of the loop.  */
 	  add_reg_note (insn, REG_CFA_DEF_CFA,
-			plus_constant (Pmode, temp1, rounded_size));
+			plus_constant (Pmode, temp1,
+				       initial_cfa_offset + rounded_size));
 	  RTX_FRAME_RELATED_P (insn) = 1;
 	}
 
@@ -8198,7 +9022,8 @@ riscv_allocate_and_probe_stack_space (rtx temp1, HOST_WIDE_INT size)
 	{
 	  insn = get_last_insn ();
 	  add_reg_note (insn, REG_CFA_DEF_CFA,
-			plus_constant (Pmode, stack_pointer_rtx, rounded_size));
+			plus_constant (Pmode, stack_pointer_rtx,
+				       initial_cfa_offset + rounded_size));
 	  RTX_FRAME_RELATED_P (insn) = 1;
 	}
 
@@ -8259,6 +9084,9 @@ riscv_expand_prologue (void)
   if (cfun->machine->naked_p)
     return;
 
+  if (need_shadow_stack_push_pop_p ())
+    emit_insn (gen_sspush (Pmode, gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM)));
+
   /* prefer multi-push to save-restore libcall.  */
   if (riscv_use_multi_push (frame))
     {
@@ -8302,7 +9130,7 @@ riscv_expand_prologue (void)
 	    = get_multi_push_fpr_mask (multi_push_additional / UNITS_PER_WORD);
 	  frame->fmask &= mask_fprs_push;
 	  riscv_for_each_saved_reg (remaining_size, riscv_save_reg, false,
-				    false);
+				    false, false);
 	  frame->fmask = fmask & ~mask_fprs_push; /* mask for the rest FPRs.  */
 	}
     }
@@ -8353,7 +9181,8 @@ riscv_expand_prologue (void)
 				GEN_INT (-step1));
 	  RTX_FRAME_RELATED_P (emit_insn (insn)) = 1;
 	}
-      riscv_for_each_saved_reg (remaining_size, riscv_save_reg, false, false);
+      riscv_for_each_saved_reg (remaining_size, riscv_save_reg,
+				false, false, false);
     }
 
   /* Undo the above fib.  */
@@ -8515,6 +9344,7 @@ riscv_expand_epilogue (int style)
     = use_multi_pop ? frame->multi_push_adj_base + frame->multi_push_adj_addi
 		    : 0;
   rtx ra = gen_rtx_REG (Pmode, RETURN_ADDR_REGNUM);
+  rtx t0 = gen_rtx_REG (Pmode, RISCV_PROLOGUE_TEMP_REGNUM);
   unsigned th_int_mask = 0;
   rtx insn;
 
@@ -8724,7 +9554,8 @@ riscv_expand_epilogue (int style)
   riscv_for_each_saved_v_reg (step2, riscv_restore_reg, false);
   riscv_for_each_saved_reg (frame->total_size - step2 - libcall_size
 			      - multipop_size,
-			    riscv_restore_reg, true, style == EXCEPTION_RETURN);
+			    riscv_restore_reg, true, style == EXCEPTION_RETURN,
+			    style == SIBCALL_RETURN);
 
   if (th_int_mask && TH_INT_INTERRUPT (cfun))
     {
@@ -8764,7 +9595,8 @@ riscv_expand_epilogue (int style)
 	riscv_for_each_saved_reg (frame->total_size - libcall_size
 				    - multipop_size,
 				  riscv_restore_reg, true,
-				  style == EXCEPTION_RETURN);
+				  style == EXCEPTION_RETURN, false);
+
       /* Undo the above fib.  */
       frame->mask = mask;
       frame->fmask = fmask;
@@ -8789,6 +9621,17 @@ riscv_expand_epilogue (int style)
     emit_insn (gen_add3_insn (stack_pointer_rtx, stack_pointer_rtx,
 			      EH_RETURN_STACKADJ_RTX));
 
+  if (need_shadow_stack_push_pop_p ()
+      && !((style == EXCEPTION_RETURN) && crtl->calls_eh_return))
+    {
+      if (BITSET_P (cfun->machine->frame.mask, RETURN_ADDR_REGNUM)
+	  && style != SIBCALL_RETURN
+	  && !cfun->machine->interrupt_handler_p)
+	emit_insn (gen_sspopchk (Pmode, t0));
+      else
+	emit_insn (gen_sspopchk (Pmode, ra));
+    }
+
   /* Return from interrupt.  */
   if (cfun->machine->interrupt_handler_p)
     {
@@ -8806,7 +9649,15 @@ riscv_expand_epilogue (int style)
 	emit_jump_insn (gen_riscv_uret ());
     }
   else if (style != SIBCALL_RETURN)
-    emit_jump_insn (gen_simple_return_internal (ra));
+    {
+      if (need_shadow_stack_push_pop_p ()
+	  && !((style == EXCEPTION_RETURN) && crtl->calls_eh_return)
+	  && BITSET_P (cfun->machine->frame.mask, RETURN_ADDR_REGNUM)
+	  && !cfun->machine->interrupt_handler_p)
+	emit_jump_insn (gen_simple_return_internal (t0));
+      else
+	emit_jump_insn (gen_simple_return_internal (ra));
+    }
 }
 
 /* Implement EPILOGUE_USES.  */
@@ -9003,7 +9854,8 @@ bool
 riscv_can_use_return_insn (void)
 {
   return (reload_completed && known_eq (cfun->machine->frame.total_size, 0)
-	  && ! cfun->machine->interrupt_handler_p);
+	  && ! cfun->machine->interrupt_handler_p
+	  && ! need_shadow_stack_push_pop_p ());
 }
 
 /* Given that there exists at least one variable that is set (produced)
@@ -9102,23 +9954,44 @@ static bool
 riscv_secondary_memory_needed (machine_mode mode, reg_class_t class1,
 			       reg_class_t class2)
 {
+  bool class1_is_fpr = class1 == FP_REGS || class1 == RVC_FP_REGS;
+  bool class2_is_fpr = class2 == FP_REGS || class2 == RVC_FP_REGS;
   return (!riscv_v_ext_mode_p (mode)
 	  && GET_MODE_SIZE (mode).to_constant () > UNITS_PER_WORD
-	  && (class1 == FP_REGS) != (class2 == FP_REGS)
+	  && (class1_is_fpr != class2_is_fpr)
 	  && !TARGET_XTHEADFMV
 	  && !TARGET_ZFA);
 }
 
 /* Implement TARGET_REGISTER_MOVE_COST.  */
 
-static int
+int
 riscv_register_move_cost (machine_mode mode,
 			  reg_class_t from, reg_class_t to)
 {
-  if ((from == FP_REGS && to == GR_REGS) ||
-      (from == GR_REGS && to == FP_REGS))
+  bool from_is_fpr = reg_class_subset_p (from, FP_REGS);
+  bool from_is_gpr = reg_class_subset_p (from, GR_REGS);
+  bool to_is_fpr = reg_class_subset_p (to, FP_REGS);
+  bool to_is_gpr = reg_class_subset_p (to, GR_REGS);
+  if ((from_is_fpr && to_is_gpr) || (from_is_gpr && to_is_fpr))
     return tune_param->fmv_cost;
 
+  if (from == V_REGS)
+    {
+      if (to_is_gpr)
+	return get_vector_costs ()->regmove->VR2GR;
+      else if (to_is_fpr)
+	return get_vector_costs ()->regmove->VR2FR;
+    }
+
+  if (to == V_REGS)
+    {
+      if (from_is_gpr)
+	return get_gr2vr_cost ();
+      else if (from_is_fpr)
+	return get_fr2vr_cost ();
+    }
+
   return riscv_secondary_memory_needed (mode, from, to) ? 8 : 2;
 }
 
@@ -9192,6 +10065,10 @@ riscv_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
       if (riscv_v_ext_mode_p (mode))
 	return false;
 
+      /* Zilsd require load/store with even-odd reg pair.  */
+      if (TARGET_ZILSD && riscv_2x_xlen_mode_p (mode) && ((regno % 2) != 0))
+	return false;
+
       if (!GP_REG_P (regno + nregs - 1))
 	return false;
     }
@@ -9344,23 +10221,114 @@ riscv_fusion_enabled_p(enum riscv_fusion_pairs op)
   return tune_param->fusible_ops & op;
 }
 
+/* Matches an add:
+   (set (reg:DI rd) (plus:SI (reg:SI rs1) (reg:SI rs2))) */
+
+static bool
+riscv_set_is_add (rtx set)
+{
+  return (GET_CODE (SET_SRC (set)) == PLUS
+	  && REG_P (XEXP (SET_SRC (set), 0))
+	  && REG_P (XEXP (SET_SRC (set), 1))
+	  && REG_P (SET_DEST (set)));
+}
+
+/* Matches an addi:
+   (set (reg:DI rd) (plus:SI (reg:SI rs1) (const_int imm))) */
+
+static bool
+riscv_set_is_addi (rtx set)
+{
+  return (GET_CODE (SET_SRC (set)) == PLUS
+	  && REG_P (XEXP (SET_SRC (set), 0))
+	  && CONST_INT_P (XEXP (SET_SRC (set), 1))
+	  && REG_P (SET_DEST (set)));
+}
+
+/* Matches an add.uw:
+  (set (reg:DI rd)
+    (plus:DI (zero_extend:DI (reg:SI rs1)) (reg:DI rs2))) */
+
+static bool
+riscv_set_is_adduw (rtx set)
+{
+  return (GET_CODE (SET_SRC (set)) == PLUS
+	  && GET_CODE (XEXP (SET_SRC (set), 0)) == ZERO_EXTEND
+	  && REG_P (XEXP (XEXP (SET_SRC (set), 0), 0))
+	  && REG_P (XEXP (SET_SRC (set), 1))
+	  && REG_P (SET_DEST (set)));
+}
+
+/* Matches a shNadd:
+  (set (reg:DI rd)
+       (plus:DI (ashift:DI (reg:DI rs1) (const_int N)) (reg:DI rS2)) */
+
+static bool
+riscv_set_is_shNadd (rtx set)
+{
+  return (GET_CODE (SET_SRC (set)) == PLUS
+	  && GET_CODE (XEXP (SET_SRC (set), 0)) == ASHIFT
+	  && REG_P (XEXP (XEXP (SET_SRC (set), 0), 0))
+	  && CONST_INT_P (XEXP (XEXP (SET_SRC (set), 0), 1))
+	  && (INTVAL (XEXP (XEXP (SET_SRC (set), 0), 1)) == 1
+	      || INTVAL (XEXP (XEXP (SET_SRC (set), 0), 1)) == 2
+	      || INTVAL (XEXP (XEXP (SET_SRC (set), 0), 1)) == 3)
+	  && REG_P (SET_DEST (set)));
+}
+
+/* Matches a shNadd.uw:
+  (set (reg:DI rd)
+       (plus:DI (and:DI (ashift:DI (reg:DI rs1) (const_int N))
+			(const_int N))
+		(reg:DI rs2)) */
+
+static bool
+riscv_set_is_shNadduw (rtx set)
+{
+  return (GET_CODE (SET_SRC (set)) == PLUS
+	  && GET_CODE (XEXP (SET_SRC (set), 0)) == AND
+	  && GET_CODE (XEXP (XEXP (SET_SRC (set), 0), 0)) == ASHIFT
+	  && REG_P (XEXP (XEXP (XEXP (SET_SRC (set), 0), 0), 0))
+	  && CONST_INT_P (XEXP (XEXP (XEXP (SET_SRC (set), 0), 0), 1))
+	  && (INTVAL (XEXP (XEXP (XEXP (SET_SRC (set), 0), 0), 1)) == 1
+	      || INTVAL (XEXP (XEXP (XEXP (SET_SRC (set), 0), 0), 1)) == 2
+	      || INTVAL (XEXP (XEXP (XEXP (SET_SRC (set), 0), 0), 1)) == 3)
+	  && REG_P (SET_DEST (set)));
+}
+
 /* Implement TARGET_SCHED_MACRO_FUSION_PAIR_P.  Return true if PREV and CURR
    should be kept together during scheduling.  */
 
 static bool
 riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
 {
+  /* If fusion is not enabled, then there's nothing to do.  */
+  if (!riscv_macro_fusion_p ())
+    return false;
+
+  /* If PREV is already marked as fused, then we can't fuse CURR with PREV
+     and if we were to fuse them we'd end up with a blob of insns that
+     essentially are an atomic unit which is bad for scheduling.  */
+  if (SCHED_GROUP_P (prev))
+    return false;
+
   rtx prev_set = single_set (prev);
   rtx curr_set = single_set (curr);
   /* prev and curr are simple SET insns i.e. no flag setting or branching.  */
   bool simple_sets_p = prev_set && curr_set && !any_condjump_p (curr);
+  bool sched1 = can_create_pseudo_p ();
 
-  if (!riscv_macro_fusion_p ())
-    return false;
+  unsigned int prev_dest_regno = (REG_P (SET_DEST (prev_set))
+				  ? REGNO (SET_DEST (prev_set))
+				  : FIRST_PSEUDO_REGISTER);
+  unsigned int curr_dest_regno = (REG_P (SET_DEST (curr_set))
+				  ? REGNO (SET_DEST (curr_set))
+				  : FIRST_PSEUDO_REGISTER);
 
   if (simple_sets_p
       && (riscv_fusion_enabled_p (RISCV_FUSE_ZEXTW)
-	  || riscv_fusion_enabled_p (RISCV_FUSE_ZEXTWS)))
+	  || riscv_fusion_enabled_p (RISCV_FUSE_ZEXTWS))
+      && (sched1 || prev_dest_regno == curr_dest_regno))
     {
       /* We are trying to match the following:
 	   prev (slli) == (set (reg:DI rD)
@@ -9374,19 +10342,23 @@ riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
 	  && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT
 	  && REG_P (SET_DEST (prev_set))
 	  && REG_P (SET_DEST (curr_set))
-	  && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set))
-	  && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO(SET_DEST (curr_set))
+	  && REGNO (XEXP (SET_SRC (curr_set), 0)) == curr_dest_regno
 	  && CONST_INT_P (XEXP (SET_SRC (prev_set), 1))
 	  && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
 	  && INTVAL (XEXP (SET_SRC (prev_set), 1)) == 32
-	  && (( INTVAL (XEXP (SET_SRC (curr_set), 1)) == 32
-		&& riscv_fusion_enabled_p(RISCV_FUSE_ZEXTW) )
-	      || ( INTVAL (XEXP (SET_SRC (curr_set), 1)) < 32
-		   && riscv_fusion_enabled_p(RISCV_FUSE_ZEXTWS))))
-	return true;
+	  && ((INTVAL (XEXP (SET_SRC (curr_set), 1)) == 32
+	       && riscv_fusion_enabled_p (RISCV_FUSE_ZEXTW) )
+	      || (INTVAL (XEXP (SET_SRC (curr_set), 1)) < 32
+		  && riscv_fusion_enabled_p (RISCV_FUSE_ZEXTWS))))
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "RISCV_FUSE_ZEXTWS\n");
+	  return true;
+	}
     }
 
-  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_ZEXTH))
+  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_ZEXTH)
+      && (sched1 || prev_dest_regno == curr_dest_regno))
     {
       /* We are trying to match the following:
 	   prev (slli) == (set (reg:DI rD)
@@ -9398,16 +10370,20 @@ riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
 	  && GET_CODE (SET_SRC (curr_set)) == LSHIFTRT
 	  && REG_P (SET_DEST (prev_set))
 	  && REG_P (SET_DEST (curr_set))
-	  && REGNO (SET_DEST (prev_set)) == REGNO (SET_DEST (curr_set))
-	  && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO(SET_DEST (curr_set))
+	  && REGNO (XEXP (SET_SRC (curr_set), 0)) == curr_dest_regno
 	  && CONST_INT_P (XEXP (SET_SRC (prev_set), 1))
 	  && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
 	  && INTVAL (XEXP (SET_SRC (prev_set), 1)) == 48
 	  && INTVAL (XEXP (SET_SRC (curr_set), 1)) == 48)
-	return true;
+	{
+	  if (dump_file)
+	    fprintf (dump_file,"RISCV_FUSE_ZEXTH\n");
+	  return true;
+	}
     }
 
-  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LDINDEXED))
+  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LDINDEXED)
+      && (sched1 || prev_dest_regno == curr_dest_regno))
     {
       /* We are trying to match the following:
 	   prev (add) == (set (reg:DI rD)
@@ -9416,12 +10392,17 @@ riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
 			      (mem:DI (reg:DI rD))) */
 
       if (MEM_P (SET_SRC (curr_set))
+	  && SCALAR_INT_MODE_P (GET_MODE (SET_DEST (curr_set)))
 	  && REG_P (XEXP (SET_SRC (curr_set), 0))
-	  && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO (SET_DEST (prev_set))
+	  && REGNO (XEXP (SET_SRC (curr_set), 0)) == prev_dest_regno
 	  && GET_CODE (SET_SRC (prev_set)) == PLUS
 	  && REG_P (XEXP (SET_SRC (prev_set), 0))
 	  && REG_P (XEXP (SET_SRC (prev_set), 1)))
-	return true;
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "RISCV_FUSE_LDINDEXED\n");
+	  return true;
+	}
 
       /* We are trying to match the following:
 	   prev (add) == (set (reg:DI rD)
@@ -9431,15 +10412,154 @@ riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
       if ((GET_CODE (SET_SRC (curr_set)) == SIGN_EXTEND
 	   || (GET_CODE (SET_SRC (curr_set)) == ZERO_EXTEND))
 	  && MEM_P (XEXP (SET_SRC (curr_set), 0))
+	  && SCALAR_INT_MODE_P (GET_MODE (SET_DEST (curr_set)))
 	  && REG_P (XEXP (XEXP (SET_SRC (curr_set), 0), 0))
-	  && REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == REGNO (SET_DEST (prev_set))
+	  && REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == prev_dest_regno
 	  && GET_CODE (SET_SRC (prev_set)) == PLUS
 	  && REG_P (XEXP (SET_SRC (prev_set), 0))
 	  && REG_P (XEXP (SET_SRC (prev_set), 1)))
-	return true;
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "RISCV_FUSE_LDINDEXED\n");
+	  return true;
+	}
+    }
+
+  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_EXPANDED_LD)
+      && (sched1 || prev_dest_regno == curr_dest_regno))
+    {
+      /* For the "expanded add/load fusion" family we have 2 main
+	 categories: memory loads with displacement (i.e. with imm offset)
+	 and loads without displacement (i.e. with offset = x0).
+
+	 For loads without displacement we'll need:
+	 - add + ld (done in RISCV_FUSE_LDINDEXED)
+	 - addi + ld (done in RISCV_FUSE_LDPREINCREMENT)
+	 - shNadd + ld
+	 - add.uw + lw
+	 - shNadd.uw + lw
+
+	 For loads with displacement/immediates:
+	 with lw with immediate):
+	 - add + ld with displacement
+	 - addi + ld with displacement
+	 - shNadd + ld with displacement
+	 - add.uw + lw with displacement
+	 - shNadd.uw + lw with displacement */
+
+      /* We're trying to match a curr_set ld with displacement:
+	  prev (add|addi) = (set (reg:DI rd) (...))
+	  curr (ld)  == (set (reg:DI rD)
+		(mem:DI (plus:DI (reg:DI rD) (const_int IMM12)))) */
+      if (MEM_P (SET_SRC (curr_set))
+	  && SCALAR_INT_MODE_P (GET_MODE (SET_DEST (curr_set)))
+	  && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == PLUS
+	  && REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == prev_dest_regno)
+	{
+	  if (riscv_set_is_add (prev_set))
+	    {
+	      if (dump_file)
+		fprintf (dump_file, "RISCV_FUSE_EXPANDED_LD\n");
+	      return true;
+	    }
+
+	  if (riscv_set_is_addi (prev_set))
+	    {
+	      if (dump_file)
+		fprintf (dump_file, "RISCV_FUSE_EXPANDED_LD\n");
+	      return true;
+	    }
+
+	  if (riscv_set_is_shNadd (prev_set))
+	    {
+	      if (dump_file)
+		fprintf (dump_file, "RISCV_FUSE_EXPANDED_LD\n");
+	      return true;
+	    }
+	}
+
+      /* We're trying to match a ld without displacement:
+	  prev (addi|shNadd) = (reg:DI rD) (...))
+	  curr (ld)  == (set (reg:DI rD)
+			     (mem:DI (reg:DI rD))) */
+      if (MEM_P (SET_SRC (curr_set))
+	  && SCALAR_INT_MODE_P (GET_MODE (SET_DEST (curr_set)))
+	  && REG_P (XEXP (SET_SRC (curr_set), 0))
+	  && REGNO (XEXP (SET_SRC (curr_set), 0)) == prev_dest_regno)
+	{
+	  if (riscv_set_is_addi (prev_set))
+	    {
+	      if (dump_file)
+		fprintf (dump_file, "RISCV_FUSE_EXPANDED_LD\n");
+	      return true;
+	    }
+
+	  if (riscv_set_is_shNadd (prev_set))
+	    {
+	      if (dump_file)
+		fprintf (dump_file, "RISCV_FUSE_EXPANDED_LD\n");
+	      return true;
+	    }
+	}
+
+      /* We're trying to match a curr_set lw with displacement:
+	  prev (add.uw|shNadd.uw) = (set (reg:DI rd) (...))
+	  curr (lw)  == (set (reg:DI rd)
+		(any_extend:DI (mem:SUBX (plus:DI ((reg:DI rd)
+						   (const_int IMM)))) */
+      if ((GET_CODE (SET_SRC (curr_set)) == SIGN_EXTEND
+	   || (GET_CODE (SET_SRC (curr_set)) == ZERO_EXTEND))
+	  && MEM_P (XEXP (SET_SRC (curr_set), 0))
+	  && SCALAR_INT_MODE_P (GET_MODE (SET_DEST (curr_set)))
+	  && GET_CODE (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == PLUS
+	  && REG_P (XEXP (XEXP (XEXP (SET_SRC (curr_set), 0), 0),0))
+	  && (REGNO (XEXP (XEXP (XEXP (SET_SRC (curr_set), 0), 0),0))
+	      == prev_dest_regno))
+	{
+	  if (riscv_set_is_adduw (prev_set))
+	    {
+	      if (dump_file)
+		fprintf (dump_file, "RISCV_FUSE_EXPANDED_LD\n");
+	      return true;
+	    }
+
+	  if (riscv_set_is_shNadduw (prev_set))
+	    {
+	      if (dump_file)
+		fprintf (dump_file, "RISCV_FUSE_EXPANDED_LD\n");
+	      return true;
+	    }
+	}
+
+      /* We're trying to match a curr_set lw without displacement:
+	  prev (add.uw|shNadd.uw) = (set (reg:DI rd) (...))
+	  curr (ld|lh|lw)  == (set (reg:DI rd)
+		(any_extend:DI (mem:SUBX (reg:DI rsd)))) */
+      if ((GET_CODE (SET_SRC (curr_set)) == SIGN_EXTEND
+	   || (GET_CODE (SET_SRC (curr_set)) == ZERO_EXTEND))
+	  && MEM_P (XEXP (SET_SRC (curr_set), 0))
+	  && SCALAR_INT_MODE_P (GET_MODE (SET_DEST (curr_set)))
+	  && REG_P (XEXP (XEXP (SET_SRC (curr_set), 0), 0))
+	  && REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == prev_dest_regno)
+	{
+	  if (riscv_set_is_adduw (prev_set))
+	    {
+	      if (dump_file)
+		fprintf (dump_file, "RISCV_FUSE_EXPANDED_LD\n");
+	      return true;
+	    }
+
+	  if (riscv_set_is_shNadduw (prev_set))
+	    {
+	      if (dump_file)
+		fprintf (dump_file, "RISCV_FUSE_EXPANDED_LD\n");
+	      return true;
+	    }
+	}
     }
 
-    if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LDPREINCREMENT))
+  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LDPREINCREMENT)
+      && (sched1 || prev_dest_regno == curr_dest_regno))
     {
       /* We are trying to match the following:
 	   prev (add) == (set (reg:DI rS)
@@ -9448,15 +10568,21 @@ riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
 			      (mem:DI (reg:DI rS))) */
 
       if (MEM_P (SET_SRC (curr_set))
+	  && SCALAR_INT_MODE_P (GET_MODE (SET_DEST (curr_set)))
 	  && REG_P (XEXP (SET_SRC (curr_set), 0))
-	  && REGNO (XEXP (SET_SRC (curr_set), 0)) == REGNO (SET_DEST (prev_set))
+	  && REGNO (XEXP (SET_SRC (curr_set), 0)) == prev_dest_regno
 	  && GET_CODE (SET_SRC (prev_set)) == PLUS
 	  && REG_P (XEXP (SET_SRC (prev_set), 0))
 	  && CONST_INT_P (XEXP (SET_SRC (prev_set), 1)))
-	return true;
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "RISCV_FUSE_LDPREINCREMENT\n");
+	  return true;
+	}
     }
 
-  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LUI_ADDI))
+  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LUI_ADDI)
+      && (sched1 || prev_dest_regno == curr_dest_regno))
     {
       /* We are trying to match the following:
 	   prev (lui)  == (set (reg:DI rD) (const_int UPPER_IMM_20))
@@ -9470,10 +10596,15 @@ riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
 	  && (GET_CODE (SET_SRC (prev_set)) == HIGH
 	      || (CONST_INT_P (SET_SRC (prev_set))
 		  && LUI_OPERAND (INTVAL (SET_SRC (prev_set))))))
-	return true;
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "RISCV_FUSE_LUI_ADDI\n");
+	  return true;
+	}
     }
 
-  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_AUIPC_ADDI))
+  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_AUIPC_ADDI)
+      && (sched1 || prev_dest_regno == curr_dest_regno))
     {
       /* We are trying to match the following:
 	   prev (auipc) == (set (reg:DI rD) (unspec:DI [...] UNSPEC_AUIPC))
@@ -9491,38 +10622,64 @@ riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
 		  && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
 		  && SMALL_OPERAND (INTVAL (XEXP (SET_SRC (curr_set), 1))))))
 
-	return true;
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "RISCV_FUSE_AUIPC_ADDI\n");
+	  return true;
+	}
     }
 
-  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LUI_LD))
+  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_LUI_LD)
+      && (sched1 || prev_dest_regno == curr_dest_regno))
     {
       /* We are trying to match the following:
 	   prev (lui)  == (set (reg:DI rD) (const_int UPPER_IMM_20))
 	   curr (ld)  == (set (reg:DI rD)
 			      (mem:DI (plus:DI (reg:DI rD) (const_int IMM12)))) */
 
+      /* A LUI_OPERAND accepts (const_int 0), but we won't emit that as LUI.  So
+	 reject that case explicitly.  */
       if (CONST_INT_P (SET_SRC (prev_set))
+	  && SET_SRC (prev_set) != CONST0_RTX (GET_MODE (SET_DEST (prev_set)))
 	  && LUI_OPERAND (INTVAL (SET_SRC (prev_set)))
 	  && MEM_P (SET_SRC (curr_set))
-	  && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == PLUS)
-	return true;
+	  && SCALAR_INT_MODE_P (GET_MODE (SET_DEST (curr_set)))
+	  && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == PLUS
+	  && REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == prev_dest_regno)
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "RISCV_FUSE_LUI_LD\n");
+	  return true;
+	}
 
       if (GET_CODE (SET_SRC (prev_set)) == HIGH
 	  && MEM_P (SET_SRC (curr_set))
+	  && SCALAR_INT_MODE_P (GET_MODE (SET_DEST (curr_set)))
 	  && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == LO_SUM
-	  && REGNO (SET_DEST (prev_set)) == REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)))
-	return true;
+	  && REGNO (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == prev_dest_regno)
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "RISCV_FUSE_LUI_LD\n");
+	  return true;
+	}
 
       if (GET_CODE (SET_SRC (prev_set)) == HIGH
 	  && (GET_CODE (SET_SRC (curr_set)) == SIGN_EXTEND
 	      || GET_CODE (SET_SRC (curr_set)) == ZERO_EXTEND)
 	  && MEM_P (XEXP (SET_SRC (curr_set), 0))
+	  && SCALAR_INT_MODE_P (GET_MODE (SET_DEST (curr_set)))
 	  && (GET_CODE (XEXP (XEXP (SET_SRC (curr_set), 0), 0)) == LO_SUM
-	      && REGNO (SET_DEST (prev_set)) == REGNO (XEXP (XEXP (XEXP (SET_SRC (curr_set), 0), 0), 0))))
-	return true;
+	      && (REGNO (XEXP (XEXP (XEXP (SET_SRC (curr_set), 0), 0), 0))
+		  == prev_dest_regno)))
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "RISCV_FUSE_LUI_LD\n");
+	  return true;
+	}
     }
 
-  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_AUIPC_LD))
+  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_AUIPC_LD)
+      && (sched1 || prev_dest_regno == curr_dest_regno))
     {
       /* We are trying to match the following:
 	   prev (auipc) == (set (reg:DI rD) (unspec:DI [...] UNSPEC_AUIPC))
@@ -9532,11 +10689,16 @@ riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
       if (GET_CODE (SET_SRC (prev_set)) == UNSPEC
 	  && XINT (prev_set, 1) == UNSPEC_AUIPC
 	  && MEM_P (SET_SRC (curr_set))
+	  && SCALAR_INT_MODE_P (GET_MODE (SET_DEST (curr_set)))
 	  && GET_CODE (XEXP (SET_SRC (curr_set), 0)) == PLUS)
-	return true;
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "RISCV_FUSE_AUIPC_LD\n");
+	  return true;
+	}
     }
 
-  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_ALIGNED_STD))
+  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_CACHE_ALIGNED_STD))
     {
       /* We are trying to match the following:
 	   prev (sd) == (set (mem (plus (reg sp|fp) (const_int)))
@@ -9546,6 +10708,7 @@ riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
 
       if (MEM_P (SET_DEST (prev_set))
 	  && MEM_P (SET_DEST (curr_set))
+	  && SCALAR_INT_MODE_P (GET_MODE (SET_DEST (curr_set)))
 	  /* We can probably relax this condition.  The documentation is a bit
 	     unclear about sub-word cases.  So we just model DImode for now.  */
 	  && GET_MODE (SET_DEST (curr_set)) == DImode
@@ -9556,43 +10719,205 @@ riscv_macro_fusion_pair_p (rtx_insn *prev, rtx_insn *curr)
 	  extract_base_offset_in_addr (SET_DEST (prev_set), &base_prev, &offset_prev);
 	  extract_base_offset_in_addr (SET_DEST (curr_set), &base_curr, &offset_curr);
 
-	  /* Fail if we did not find both bases.  */
-	  if (base_prev == NULL_RTX || base_curr == NULL_RTX)
-	    return false;
+	  /* Proceed only if we find both bases, both bases are register and
+	     bases are the same register.  */
+	  if (base_prev != NULL_RTX && base_curr != NULL_RTX
+	      && REG_P (base_prev) && REG_P (base_curr)
+	      && REGNO (base_prev) != REGNO (base_curr)
+	      /* The alignment of hte base pointer is more useful than the
+		 alignment of the memory reference for determining if we're
+		 on opposite sides of a cache line.  */
+	      && REGNO_POINTER_ALIGN (ORIGINAL_REGNO (base_prev)) >= 128)
+	    {
+	      /* The two stores must be contained within opposite halves of the
+		 same 16 byte aligned block of memory.  We know the pointer
+		 has suitable alignment, so we just need to check the offsets
+		 of the two stores for suitable alignment.  */
+
+	      /* Get the smaller offset into OFFSET_PREV.  */
+	      if (INTVAL (offset_prev) > INTVAL (offset_curr))
+		std::swap (offset_prev, offset_curr);
+
+	      /* We have a match if the smaller offset (OFFSET_PREV) is 16
+		 byte aligned and the higher offset is 8 bytes more than the
+		 lower offset.  */
+	      if ((INTVAL (offset_prev) % 16) == 0
+		  && (INTVAL (offset_prev) + 8 == INTVAL (offset_curr)))
+		{
+		  if (dump_file)
+		    fprintf (dump_file, "RISCV_FUSE_ALIGNED_STD\n");
+		  return true;
+		}
+	    }
+	}
+    }
 
-	  /* Fail if either base is not a register.  */
-	  if (!REG_P (base_prev) || !REG_P (base_curr))
-	    return false;
+  /* More general form of the RISCV_FUSE_CACHE_ALIGNED_STD.  The
+     major difference is the dependency on the stores being opposite
+     halves of a cache line is dropped.  Instead the lowest address
+     needs 2X the alignment of the object and the higher address
+     immediately followed the first object.  */
+  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_ALIGNED_STD))
+    {
+      /* We are trying to match the following:
+	prev (sd) == (set (mem (plus (reg rS1) (const_int)))
+			  (reg rS2))
+	curr (sd) == (set (mem (plus (reg rS1) (const_int)))
+			  (reg rS3)) */
 
-	  /* Fail if the bases are not the same register.  */
-	  if (REGNO (base_prev) != REGNO (base_curr))
-	    return false;
+      if (MEM_P (SET_DEST (prev_set))
+	  && SCALAR_INT_MODE_P (GET_MODE (SET_DEST (curr_set)))
+	  && MEM_P (SET_DEST (curr_set))
+	  /* Stores must have the same width */
+	  && GET_MODE (SET_DEST (curr_set)) == GET_MODE (SET_DEST (prev_set)))
+	{
+	  rtx base_prev, base_curr, offset_prev, offset_curr;
+	  unsigned mode_size;
+
+	  extract_base_offset_in_addr (SET_DEST (prev_set),
+				       &base_prev, &offset_prev);
+	  extract_base_offset_in_addr (SET_DEST (curr_set),
+				       &base_curr, &offset_curr);
+
+	  /* Proceed only if we find both bases, both bases
+	     are registers and bases are the same register.  */
+	  if (base_prev != NULL_RTX && base_curr != NULL_RTX
+	      && REG_P (base_prev) && REG_P (base_curr)
+	      && REGNO (base_prev) == REGNO (base_curr))
+	    {
+	      machine_mode mode = GET_MODE (SET_DEST (curr_set));
+	      mode_size = estimated_poly_value (GET_MODE_SIZE (mode));
+
+	      HOST_WIDE_INT offset_prev_int = INTVAL (offset_prev);
+	      HOST_WIDE_INT offset_curr_int = INTVAL (offset_curr);
+
+	      /* Get the smaller offset into OFFSET_PREV_INT.  */
+	      if (offset_prev_int > offset_curr_int)
+		std::swap (offset_prev_int, offset_curr_int);
+
+	      /* We've normalized, so we need to check that the lower
+		 address is aligned to 2X the size of the object.  The
+		 higher address must be the lower address plus the
+		 size of the object.  */
+	      if (((offset_prev_int % (2 * mode_size)) == 0)
+		  && offset_prev_int + mode_size == offset_curr_int)
+		{
+		  if (dump_file)
+		    fprintf (dump_file, "RISCV_FUSE_ALIGNED_STD\n");
+		  return true;
+		}
+	    }
+	}
+    }
 
-	  /* Originally the thought was to check MEM_ALIGN, but that was
-	     reporting incorrect alignments, even for SP/FP accesses, so we
-	     gave up on that approach.  Instead just check for stack/hfp
-	     which we know are aligned.  */
-	  if (REGNO (base_prev) != STACK_POINTER_REGNUM
-	      && REGNO (base_prev) != HARD_FRAME_POINTER_REGNUM)
-	    return false;
+  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_BFEXT)
+      && (sched1 || prev_dest_regno == curr_dest_regno))
+    {
+      /* We are trying to match the following:
+	   prev (slli) == (set (reg:DI rD)
+			       (ashift:DI (reg:DI rS) (const_int)))
+	   curr (srli) == (set (reg:DI rD)
+			       (lshiftrt:DI (reg:DI rD) (const_int))) */
 
-	  /* The two stores must be contained within opposite halves of the
-	     same 16 byte aligned block of memory.  We know that the stack
-	     pointer and the frame pointer have suitable alignment.  So we
-	     just need to check the offsets of the two stores for suitable
-	     alignment.  */
-	  /* Get the smaller offset into OFFSET_PREV.  */
-	  if (INTVAL (offset_prev) > INTVAL (offset_curr))
-	    std::swap (offset_prev, offset_curr);
-
-	  /* If the smaller offset (OFFSET_PREV) is not 16 byte aligned,
-	     then fail.  */
-	  if ((INTVAL (offset_prev) % 16) != 0)
-	    return false;
+      if (GET_CODE (SET_SRC (prev_set)) == ASHIFT
+	  && (GET_CODE (SET_SRC (curr_set)) == LSHIFTRT
+	      || GET_CODE (SET_SRC (curr_set)) == ASHIFTRT)
+	  && REG_P (SET_DEST (prev_set))
+	  && REG_P (SET_DEST (curr_set))
+	  && REGNO (XEXP (SET_SRC (curr_set), 0)) == prev_dest_regno
+	  && CONST_INT_P (XEXP (SET_SRC (prev_set), 1))
+	  && CONST_INT_P (XEXP (SET_SRC (curr_set), 1)))
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "RISCV_FUSE_BFEXT\n");
+	  return true;
+	}
+    }
+
+  if (simple_sets_p && riscv_fusion_enabled_p (RISCV_FUSE_B_ALUI)
+      && (sched1 || prev_dest_regno == curr_dest_regno))
+    {
+      /* We are trying to match the following:
+	  prev (orc.b) == (set (reg rD)
+				(unspec (reg rS1)))
+	  curr (not) == (set (reg rD2) (not (reg rD))) */
+
+      if (GET_CODE (SET_SRC (prev_set)) == UNSPEC
+	  && GET_CODE (SET_SRC (curr_set)) == NOT
+	  && XINT (SET_SRC (prev_set), 1) == UNSPEC_ORC_B
+	  && REG_P (SET_DEST (prev_set))
+	  && REG_P (SET_DEST (curr_set))
+	  && REG_P (XEXP (SET_SRC (curr_set), 0))
+	  && REGNO (XEXP (SET_SRC (curr_set), 0)) == prev_dest_regno)
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "RISCV_FUSE_B_ALUI\n");
+	  return true;
+	}
 
-	  /* The higher offset must be 8 bytes more than the lower
-	     offset.  */
-	  return (INTVAL (offset_prev) + 8 == INTVAL (offset_curr));
+      /* We are trying to match the following:
+	  prev (ctz) == (set (reg rD) (ctz (reg rS1)))
+	  curr (andi) == (set (reg rD)
+				(and (reg rD) (const_int 63))) */
+
+      if (GET_CODE (SET_SRC (prev_set)) == CTZ
+	  && GET_CODE (SET_SRC (curr_set)) == AND
+	  && CONST_INT_P (XEXP (SET_SRC (curr_set), 1))
+	  && INTVAL (XEXP (SET_SRC (curr_set), 1)) == 63
+	  && REG_P (SET_DEST (prev_set))
+	  && REG_P (SET_DEST (curr_set))
+	  && REG_P (XEXP (SET_SRC (curr_set), 0))
+	  && REGNO (XEXP (SET_SRC (curr_set), 0)) == prev_dest_regno)
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "RISCV_FUSE_B_ALUI\n");
+	  return true;
+	}
+
+      /* We are trying to match the following:
+	  prev (sub) == (set (reg rD)
+				(minus (const_int 0) (reg rS2))
+	  curr (max) == (set (reg rD)
+				(smax (reg rD) (reg rS2))) */
+
+      if (GET_CODE (SET_SRC (prev_set)) == MINUS
+	  && (XEXP (SET_SRC (prev_set), 0)
+	      == CONST0_RTX (GET_MODE (SET_SRC (prev_set))))
+	  && CONST_INT_P (XEXP (SET_SRC (prev_set), 0))
+	  && GET_CODE (SET_SRC (curr_set)) == SMAX
+	  && REG_P (SET_DEST (prev_set))
+	  && REG_P (SET_DEST (curr_set))
+	  && REG_P (XEXP (SET_SRC (curr_set), 0))
+	  && REGNO (XEXP (SET_SRC (curr_set), 0)) == prev_dest_regno
+	  && REG_P (XEXP (SET_SRC (prev_set), 1))
+	  && REG_P (XEXP (SET_SRC (curr_set), 1))
+	  && (REGNO (XEXP (SET_SRC (prev_set), 1))
+	      == REGNO (XEXP (SET_SRC (curr_set), 1))))
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "RISCV_FUSE_B_ALUI\n");
+	  return true;
+	}
+
+      /* We are trying to match the following:
+	  prev (neg) == (set (reg rD) (neg (reg rS1)))
+	  curr (max) == (set (reg rD)
+				(smax (reg rD) (reg rS1))) */
+
+      if (GET_CODE (SET_SRC (prev_set)) == NEG
+	  && GET_CODE (SET_SRC (curr_set)) == SMAX
+	  && REG_P (SET_DEST (prev_set))
+	  && REG_P (SET_DEST (curr_set))
+	  && REG_P (XEXP (SET_SRC (curr_set), 0))
+	  && REGNO (XEXP (SET_SRC (curr_set), 0)) == prev_dest_regno
+	  && REG_P (XEXP (SET_SRC (prev_set), 0))
+	  && REG_P (XEXP (SET_SRC (curr_set), 1))
+	  && (REGNO (XEXP (SET_SRC (prev_set), 0))
+	      == REGNO (XEXP (SET_SRC (curr_set), 1))))
+	{
+	  if (dump_file)
+	    fprintf (dump_file, "RISCV_FUSE_B_ALUI\n");
+	  return true;
 	}
     }
 
@@ -9668,6 +10993,27 @@ riscv_sched_adjust_cost (rtx_insn *, int, rtx_insn *insn, int cost,
   return new_cost;
 }
 
+/* Implement TARGET_SCHED_CAN_SPECULATE_INSN hook.  Return true if insn can
+   can be scheduled for speculative execution.  Reject vsetvl instructions to
+   prevent the scheduler from hoisting them out of basic blocks without
+   checking for data dependencies PR117974.  */
+static bool
+riscv_sched_can_speculate_insn (rtx_insn *insn)
+{
+  /* Gate speculative scheduling of vsetvl instructions behind tune param.  */
+  if (tune_param->speculative_sched_vsetvl)
+    return true;
+
+  switch (get_attr_type (insn))
+    {
+      case TYPE_VSETVL:
+      case TYPE_VSETVL_PRE:
+	return false;
+      default:
+	return true;
+    }
+}
+
 /* Auxiliary function to emit RISC-V ELF attribute. */
 static void
 riscv_emit_attribute ()
@@ -9708,24 +11054,31 @@ riscv_declare_function_name (FILE *stream, const char *name, tree fndecl)
   riscv_asm_output_variant_cc (stream, fndecl, name);
   ASM_OUTPUT_TYPE_DIRECTIVE (stream, name, "function");
   ASM_OUTPUT_FUNCTION_LABEL (stream, name, fndecl);
-  if (DECL_FUNCTION_SPECIFIC_TARGET (fndecl))
+  if (DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
+      || lookup_attribute ("norelax", DECL_ATTRIBUTES (fndecl)))
     {
       fprintf (stream, "\t.option push\n");
+      if (lookup_attribute ("norelax", DECL_ATTRIBUTES (fndecl)))
+	{
+	  fprintf (stream, "\t.option norelax\n");
+	}
+      if (DECL_FUNCTION_SPECIFIC_TARGET (fndecl))
+	{
+	  struct cl_target_option *local_cl_target
+	    = TREE_TARGET_OPTION (DECL_FUNCTION_SPECIFIC_TARGET (fndecl));
+	  struct cl_target_option *global_cl_target
+	    = TREE_TARGET_OPTION (target_option_default_node);
 
-      struct cl_target_option *local_cl_target =
-	TREE_TARGET_OPTION (DECL_FUNCTION_SPECIFIC_TARGET (fndecl));
-      struct cl_target_option *global_cl_target =
-	TREE_TARGET_OPTION (target_option_default_node);
-
-      const char *local_arch_str = get_arch_str (local_cl_target);
-      const char *arch_str = local_arch_str != NULL
-	? local_arch_str
-	: riscv_arch_str (true).c_str ();
-      fprintf (stream, "\t.option arch, %s\n", arch_str);
-      const char *local_tune_str = get_tune_str (local_cl_target);
-      const char *global_tune_str = get_tune_str (global_cl_target);
-      if (strcmp (local_tune_str, global_tune_str) != 0)
-	fprintf (stream, "\t# tune = %s\n", local_tune_str);
+	  const char *local_arch_str = get_arch_str (local_cl_target);
+	  const char *arch_str = local_arch_str != NULL
+				   ? local_arch_str
+				   : riscv_arch_str (true).c_str ();
+	  fprintf (stream, "\t.option arch, %s\n", arch_str);
+	  const char *local_tune_str = get_tune_str (local_cl_target);
+	  const char *global_tune_str = get_tune_str (global_cl_target);
+	  if (strcmp (local_tune_str, global_tune_str) != 0)
+	    fprintf (stream, "\t# tune = %s\n", local_tune_str);
+	}
     }
 }
 
@@ -9735,7 +11088,8 @@ riscv_declare_function_size (FILE *stream, const char *name, tree fndecl)
   if (!flag_inhibit_size_directive)
     ASM_OUTPUT_MEASURED_SIZE (stream, name);
 
-  if (DECL_FUNCTION_SPECIFIC_TARGET (fndecl))
+  if (DECL_FUNCTION_SPECIFIC_TARGET (fndecl)
+      || lookup_attribute ("norelax", DECL_ATTRIBUTES (fndecl)))
     {
       fprintf (stream, "\t.option pop\n");
     }
@@ -9785,6 +11139,55 @@ riscv_file_start (void)
     riscv_emit_attribute ();
 }
 
+void
+riscv_file_end ()
+{
+  file_end_indicate_exec_stack ();
+  unsigned long feature_1_and = 0;
+
+  if (is_zicfilp_p ())
+    feature_1_and |= 0x1 << 0;
+
+  if (is_zicfiss_p ())
+    feature_1_and |= 0x1 << 1;
+
+  if (feature_1_and)
+    {
+      /* Generate .note.gnu.property section.  */
+      switch_to_section (get_section (".note.gnu.property",
+				      SECTION_NOTYPE, NULL));
+
+      /* The program property descriptor is aligned to 4 bytes in 32-bit
+	 objects and 8 bytes in 64-bit objects.  */
+      unsigned p2align = TARGET_64BIT ? 3 : 2;
+
+      fprintf (asm_out_file, "\t.p2align\t%u\n", p2align);
+      /* name length.  */
+      fprintf (asm_out_file, "\t.long\t1f - 0f\n");
+      /* data length.  */
+      fprintf (asm_out_file, "\t.long\t5f - 2f\n");
+      /* note type.  */
+      fprintf (asm_out_file, "\t.long\t5\n");
+      fprintf (asm_out_file, "0:\n");
+      /* vendor name: "GNU".  */
+      fprintf (asm_out_file, "\t.asciz\t\"GNU\"\n");
+      fprintf (asm_out_file, "1:\n");
+
+      /* pr_type.  */
+      fprintf (asm_out_file, "\t.p2align\t%u\n", p2align);
+      fprintf (asm_out_file, "2:\n");
+      fprintf (asm_out_file, "\t.long\t0xc0000000\n");
+      /* pr_datasz.  */
+      fprintf (asm_out_file, "\t.long\t4f - 3f\n");
+      fprintf (asm_out_file, "3:\n");
+      /* zicfiss, zicfilp.  */
+      fprintf (asm_out_file, "\t.long\t%lx\n", feature_1_and);
+      fprintf (asm_out_file, "4:\n");
+      fprintf (asm_out_file, "\t.p2align\t%u\n", p2align);
+      fprintf (asm_out_file, "5:\n");
+    }
+}
+
 /* Implement TARGET_ASM_OUTPUT_MI_THUNK.  Generate rtl rather than asm text
    in order to avoid duplicating too much logic from elsewhere.  */
 
@@ -9805,6 +11208,9 @@ riscv_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
   /* Mark the end of the (empty) prologue.  */
   emit_note (NOTE_INSN_PROLOGUE_END);
 
+  if (is_zicfilp_p ())
+    emit_insn(gen_lpad (const0_rtx));
+
   /* Determine if we can use a sibcall to call FUNCTION directly.  */
   fnaddr = gen_rtx_MEM (FUNCTION_MODE, XEXP (DECL_RTL (function), 0));
 
@@ -9946,9 +11352,13 @@ riscv_override_options_internal (struct gcc_options *opts)
   const struct riscv_tune_info *cpu;
 
   /* The presence of the M extension implies that division instructions
-     are present, so include them unless explicitly disabled.  */
+     are present, so include them unless explicitly disabled.
+     Similarly, if the M extension is not available, then disable
+     division instructions, unless they are explicitly enabled.  */
   if (TARGET_MUL_OPTS_P (opts) && (target_flags_explicit & MASK_DIV) == 0)
     opts->x_target_flags |= MASK_DIV;
+  else if (!TARGET_MUL_OPTS_P (opts) && (target_flags_explicit & MASK_DIV) == 0)
+    opts->x_target_flags &= ~MASK_DIV;
   else if (!TARGET_MUL_OPTS_P (opts) && TARGET_DIV_OPTS_P (opts))
     error ("%<-mdiv%> requires %<-march%> to subsume the %<M%> extension");
 
@@ -9971,6 +11381,18 @@ riscv_override_options_internal (struct gcc_options *opts)
 		 ? &optimize_size_tune_info
 		 : cpu->tune_param;
 
+  /* If not optimizing for size, set the default
+      alignment to what the target wants.  */
+  if (!opts->x_optimize_size)
+    {
+      if (opts->x_flag_align_loops && !opts->x_str_align_loops)
+	opts->x_str_align_loops = tune_param->loop_align;
+      if (opts->x_flag_align_jumps && !opts->x_str_align_jumps)
+	opts->x_str_align_jumps = tune_param->jump_align;
+      if (opts->x_flag_align_functions && !opts->x_str_align_functions)
+	opts->x_str_align_functions = tune_param->function_align;
+    }
+
   /* Use -mtune's setting for slow_unaligned_access, even when optimizing
      for size.  For architectures that trap and emulate unaligned accesses,
      the performance cost is too great, even for -Os.  Similarly, if
@@ -10013,6 +11435,20 @@ riscv_override_options_internal (struct gcc_options *opts)
 
   /* Convert -march and -mrvv-vector-bits to a chunks count.  */
   riscv_vector_chunks = riscv_convert_vector_chunks (opts);
+
+  if (opts->x_flag_cf_protection != CF_NONE)
+    {
+      if ((opts->x_flag_cf_protection & CF_RETURN) == CF_RETURN
+	  && !TARGET_ZICFISS)
+	error ("%<-fcf-protection%> is not compatible with this target");
+
+      if ((opts->x_flag_cf_protection & CF_BRANCH) == CF_BRANCH
+	  && !TARGET_ZICFILP)
+	error ("%<-fcf-protection%> is not compatible with this target");
+
+      opts->x_flag_cf_protection
+      = (cf_protection_level) (opts->x_flag_cf_protection | CF_SET);
+    }
 }
 
 /* Implement TARGET_OPTION_OVERRIDE.  */
@@ -10194,6 +11630,10 @@ riscv_option_override (void)
 		       param_sched_pressure_algorithm,
 		       SCHED_PRESSURE_MODEL);
 
+  SET_OPTION_IF_UNSET (&global_options, &global_options_set,
+		       param_cycle_accurate_model,
+		       0);
+
   /* Function to allocate machine-dependent function status.  */
   init_machine_status = &riscv_init_machine_status;
 
@@ -10251,7 +11691,9 @@ riscv_conditional_register_usage (void)
 	call_used_regs[r] = 1;
     }
 
-  if (!TARGET_HARD_FLOAT)
+  if (TARGET_HARD_FLOAT)
+    global_regs[FRM_REGNUM] = 1;
+  else
     {
       for (int regno = FP_REG_FIRST; regno <= FP_REG_LAST; regno++)
 	fixed_regs[regno] = call_used_regs[regno] = 1;
@@ -10264,7 +11706,9 @@ riscv_conditional_register_usage (void)
 	call_used_regs[regno] = 1;
     }
 
-  if (!TARGET_VECTOR)
+  if (TARGET_VECTOR)
+    global_regs[VXRM_REGNUM] = 1;
+  else
     {
       for (int regno = V_REG_FIRST; regno <= V_REG_LAST; regno++)
 	fixed_regs[regno] = call_used_regs[regno] = 1;
@@ -10296,12 +11740,17 @@ riscv_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
 {
   rtx addr, end_addr, mem;
   uint32_t trampoline[4];
+  uint32_t trampoline_cfi[6];
   unsigned int i;
   HOST_WIDE_INT static_chain_offset, target_function_offset;
+  HOST_WIDE_INT lp_value = 0;
 
   /* Work out the offsets of the pointers from the start of the
      trampoline code.  */
-  gcc_assert (ARRAY_SIZE (trampoline) * 4 == TRAMPOLINE_CODE_SIZE);
+  if (!is_zicfilp_p ())
+    gcc_assert (ARRAY_SIZE (trampoline) * 4 == TRAMPOLINE_CODE_SIZE);
+  else
+    gcc_assert (ARRAY_SIZE (trampoline_cfi) * 4 == TRAMPOLINE_CODE_SIZE);
 
   /* Get pointers to the beginning and end of the code block.  */
   addr = force_reg (Pmode, XEXP (m_tramp, 0));
@@ -10323,6 +11772,17 @@ riscv_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
       unsigned HOST_WIDE_INT lo_chain_code, lo_func_code;
 
       rtx uimm_mask = force_reg (SImode, gen_int_mode (-IMM_REACH, SImode));
+      unsigned insn_count = 0;
+
+      /* Insert lpad, if zicfilp is enabled.  */
+      if (is_zicfilp_p ())
+	{
+	  unsigned HOST_WIDE_INT lpad_code;
+	  lpad_code = OPCODE_AUIPC | (0 << SHIFT_RD) | (lp_value << IMM_BITS);
+	  mem = adjust_address (m_tramp, SImode, 0);
+	  riscv_emit_move (mem, gen_int_mode (lpad_code, SImode));
+	  insn_count++;
+	}
 
       /* 0xfff.  */
       rtx imm12_mask = gen_reg_rtx (SImode);
@@ -10336,11 +11796,14 @@ riscv_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
       hi_chain = riscv_force_binary (SImode, AND, hi_chain,
 				     uimm_mask);
       lui_hi_chain_code = OPCODE_LUI | (STATIC_CHAIN_REGNUM << SHIFT_RD);
+      rtx lui_hi_chain_value = force_reg (SImode, gen_int_mode (lui_hi_chain_code,
+								SImode));
       rtx lui_hi_chain = riscv_force_binary (SImode, IOR, hi_chain,
-					     gen_int_mode (lui_hi_chain_code, SImode));
-
-      mem = adjust_address (m_tramp, SImode, 0);
+					     lui_hi_chain_value);
+      mem = adjust_address (m_tramp, SImode,
+			    insn_count * GET_MODE_SIZE (SImode));
       riscv_emit_move (mem, riscv_swap_instruction (lui_hi_chain));
+      insn_count++;
 
       /* Gen lui t0, hi(func).  */
       rtx hi_func = riscv_force_binary (SImode, PLUS, target_function,
@@ -10351,8 +11814,10 @@ riscv_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
       rtx lui_hi_func = riscv_force_binary (SImode, IOR, hi_func,
 					    gen_int_mode (lui_hi_func_code, SImode));
 
-      mem = adjust_address (m_tramp, SImode, 1 * GET_MODE_SIZE (SImode));
+      mem = adjust_address (m_tramp, SImode,
+			    insn_count * GET_MODE_SIZE (SImode));
       riscv_emit_move (mem, riscv_swap_instruction (lui_hi_func));
+      insn_count++;
 
       /* Gen addi t2, t2, lo(chain).  */
       rtx lo_chain = riscv_force_binary (SImode, AND, chain_value,
@@ -10366,8 +11831,23 @@ riscv_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
       rtx addi_lo_chain = riscv_force_binary (SImode, IOR, lo_chain,
 					      force_reg (SImode, GEN_INT (lo_chain_code)));
 
-      mem = adjust_address (m_tramp, SImode, 2 * GET_MODE_SIZE (SImode));
+      mem = adjust_address (m_tramp, SImode,
+			    insn_count * GET_MODE_SIZE (SImode));
       riscv_emit_move (mem, riscv_swap_instruction (addi_lo_chain));
+      insn_count++;
+
+      /* For zicfilp only, insert lui t2, 1, because use jr t0.  */
+      if (is_zicfilp_p ())
+	{
+	  unsigned HOST_WIDE_INT set_lpl_code;
+	  set_lpl_code  = OPCODE_LUI
+			  | (RISCV_CALL_ADDRESS_LPAD_REGNUM << SHIFT_RD)
+			  | (lp_value << IMM_BITS);
+	  mem = adjust_address (m_tramp, SImode,
+				insn_count * GET_MODE_SIZE (SImode));
+	  riscv_emit_move (mem, gen_int_mode (set_lpl_code, SImode));
+	  insn_count++;
+	}
 
       /* Gen jr t0, lo(func).  */
       rtx lo_func = riscv_force_binary (SImode, AND, target_function,
@@ -10379,7 +11859,7 @@ riscv_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
       rtx jr_lo_func = riscv_force_binary (SImode, IOR, lo_func,
 					   force_reg (SImode, GEN_INT (lo_func_code)));
 
-      mem = adjust_address (m_tramp, SImode, 3 * GET_MODE_SIZE (SImode));
+      mem = adjust_address (m_tramp, SImode, insn_count * GET_MODE_SIZE (SImode));
       riscv_emit_move (mem, riscv_swap_instruction (jr_lo_func));
     }
   else
@@ -10387,29 +11867,65 @@ riscv_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
       static_chain_offset = TRAMPOLINE_CODE_SIZE;
       target_function_offset = static_chain_offset + GET_MODE_SIZE (ptr_mode);
 
-      /* auipc   t2, 0
-	 l[wd]   t0, target_function_offset(t2)
-	 l[wd]   t2, static_chain_offset(t2)
-	 jr      t0
-      */
-      trampoline[0] = OPCODE_AUIPC | (STATIC_CHAIN_REGNUM << SHIFT_RD);
-      trampoline[1] = (Pmode == DImode ? OPCODE_LD : OPCODE_LW)
-		      | (RISCV_PROLOGUE_TEMP_REGNUM << SHIFT_RD)
-		      | (STATIC_CHAIN_REGNUM << SHIFT_RS1)
-		      | (target_function_offset << SHIFT_IMM);
-      trampoline[2] = (Pmode == DImode ? OPCODE_LD : OPCODE_LW)
-		      | (STATIC_CHAIN_REGNUM << SHIFT_RD)
-		      | (STATIC_CHAIN_REGNUM << SHIFT_RS1)
-		      | (static_chain_offset << SHIFT_IMM);
-      trampoline[3] = OPCODE_JALR | (RISCV_PROLOGUE_TEMP_REGNUM << SHIFT_RS1);
-
-      /* Copy the trampoline code.  */
-      for (i = 0; i < ARRAY_SIZE (trampoline); i++)
+      if (!is_zicfilp_p ())
 	{
-	  if (BYTES_BIG_ENDIAN)
-	    trampoline[i] = __builtin_bswap32(trampoline[i]);
-	  mem = adjust_address (m_tramp, SImode, i * GET_MODE_SIZE (SImode));
-	  riscv_emit_move (mem, gen_int_mode (trampoline[i], SImode));
+	  /* auipc   t2, 0
+	     l[wd]   t0, (target_function_offset)(t2)
+	     l[wd]   t2, (static_chain_offset)(t2)
+	     jr      t0
+	  */
+	  trampoline[0] = OPCODE_AUIPC | (STATIC_CHAIN_REGNUM << SHIFT_RD);
+	  trampoline[1] = (Pmode == DImode ? OPCODE_LD : OPCODE_LW)
+			  | (RISCV_PROLOGUE_TEMP_REGNUM << SHIFT_RD)
+			  | (STATIC_CHAIN_REGNUM << SHIFT_RS1)
+			  | (target_function_offset << SHIFT_IMM);
+	  trampoline[2] = (Pmode == DImode ? OPCODE_LD : OPCODE_LW)
+			  | (STATIC_CHAIN_REGNUM << SHIFT_RD)
+			  | (STATIC_CHAIN_REGNUM << SHIFT_RS1)
+			  | (static_chain_offset << SHIFT_IMM);
+	  trampoline[3] = OPCODE_JALR | (RISCV_PROLOGUE_TEMP_REGNUM << SHIFT_RS1);
+
+	  /* Copy the trampoline code.  */
+	  for (i = 0; i < ARRAY_SIZE (trampoline); i++)
+	    {
+	      if (BYTES_BIG_ENDIAN)
+		trampoline[i] = __builtin_bswap32 (trampoline[i]);
+	      mem = adjust_address (m_tramp, SImode, i * GET_MODE_SIZE (SImode));
+	      riscv_emit_move (mem, gen_int_mode (trampoline[i], SImode));
+	    }
+	}
+      else
+	{
+	  /* lpad    1
+	     auipc   t3, 0
+	     l[wd]   t0, (target_function_offset - 4)(t3)
+	     l[wd]   t3, (static_chain_offset - 4)(t3)
+	     lui     t2, 1
+	     jr      t0
+	  */
+	  trampoline_cfi[0] = OPCODE_AUIPC | (0 << SHIFT_RD) | (lp_value << IMM_BITS);
+	  trampoline_cfi[1] = OPCODE_AUIPC | (STATIC_CHAIN_REGNUM << SHIFT_RD);
+	  trampoline_cfi[2] = (Pmode == DImode ? OPCODE_LD : OPCODE_LW)
+			      | (RISCV_PROLOGUE_TEMP_REGNUM << SHIFT_RD)
+			      | (STATIC_CHAIN_REGNUM << SHIFT_RS1)
+			      | ((target_function_offset - 4) << SHIFT_IMM);
+	  trampoline_cfi[3] = (Pmode == DImode ? OPCODE_LD : OPCODE_LW)
+			      | (STATIC_CHAIN_REGNUM << SHIFT_RD)
+			      | (STATIC_CHAIN_REGNUM << SHIFT_RS1)
+			      | ((static_chain_offset - 4) << SHIFT_IMM);
+	  trampoline_cfi[4] = OPCODE_LUI
+			      | (RISCV_CALL_ADDRESS_LPAD_REGNUM << SHIFT_RD)
+			      | (lp_value << IMM_BITS);
+	  trampoline_cfi[5] = OPCODE_JALR | (RISCV_PROLOGUE_TEMP_REGNUM << SHIFT_RS1);
+
+	  /* Copy the trampoline code.  */
+	  for (i = 0; i < ARRAY_SIZE (trampoline_cfi); i++)
+	    {
+	      if (BYTES_BIG_ENDIAN)
+		trampoline_cfi[i] = __builtin_bswap32 (trampoline_cfi[i]);
+	      mem = adjust_address (m_tramp, SImode, i * GET_MODE_SIZE (SImode));
+	      riscv_emit_move (mem, gen_int_mode (trampoline_cfi[i], SImode));
+	    }
 	}
 
       /* Set up the static chain pointer field.  */
@@ -10630,6 +12146,17 @@ riscv_can_change_mode_class (machine_mode from, machine_mode to,
   if (reg_classes_intersect_p (V_REGS, rclass)
       && !ordered_p (GET_MODE_PRECISION (from), GET_MODE_PRECISION (to)))
     return false;
+
+  /* Subregs of modes larger than one vector are ambiguous.
+     A V4DImode with rv64gcv_zvl128b could, for example, span two registers/one
+     register group of two at VLEN = 128 or one register at VLEN >= 256 and
+     we cannot, statically, determine which part of it to extract.
+     Therefore prevent that.  */
+  if (reg_classes_intersect_p (V_REGS, rclass)
+      && riscv_v_ext_vls_mode_p (from)
+      && !ordered_p (BITS_PER_RISCV_VECTOR, GET_MODE_PRECISION (from)))
+      return false;
+
   return !reg_classes_intersect_p (FP_REGS, rclass);
 }
 
@@ -10793,11 +12320,10 @@ riscv_gpr_save_operation_p (rtx op)
 	  /* Two CLOBBER and USEs, must check the order.  */
 	  unsigned expect_code = i < 3 ? CLOBBER : USE;
 	  if (GET_CODE (elt) != expect_code
-	      || !REG_P (XEXP (elt, 1))
-	      || (REGNO (XEXP (elt, 1)) != gpr_save_reg_order[i]))
+	      || !REG_P (XEXP (elt, 0))
+	      || (REGNO (XEXP (elt, 0)) != gpr_save_reg_order[i]))
 	    return false;
 	}
-	break;
     }
   return true;
 }
@@ -10807,11 +12333,18 @@ riscv_gpr_save_operation_p (rtx op)
 static unsigned HOST_WIDE_INT
 riscv_asan_shadow_offset (void)
 {
-  /* We only have libsanitizer support for RV64 at present.
+  /* This number must match ASAN_SHADOW_OFFSET_CONST in the file
+     libsanitizer/asan/asan_mapping.h, we use 0 here because RV64
+     using dynamic shadow offset, and RV32 isn't support yet.  */
+  return 0;
+}
+
+/* Implement TARGET_ASAN_DYNAMIC_SHADOW_OFFSET_P.  */
 
-     This number must match ASAN_SHADOW_OFFSET_CONST in the file
-     libsanitizer/asan/asan_mapping.h.  */
-  return TARGET_64BIT ? HOST_WIDE_INT_UC (0xd55550000) : 0;
+static bool
+riscv_asan_dynamic_shadow_offset_p (void)
+{
+  return TARGET_64BIT;
 }
 
 /* Implement TARGET_MANGLE_TYPE.  */
@@ -11235,17 +12768,23 @@ riscv_subword_address (rtx mem, rtx *aligned_mem, rtx *shift, rtx *mask,
 /* Leftshift a subword within an SImode register.  */
 
 void
-riscv_lshift_subword (machine_mode mode, rtx value, rtx shift,
+riscv_lshift_subword (machine_mode mode ATTRIBUTE_UNUSED, rtx value, rtx shift,
 		      rtx *shifted_value)
 {
   rtx value_reg = gen_reg_rtx (SImode);
-  emit_move_insn (value_reg, simplify_gen_subreg (SImode, value,
-						  mode, 0));
-
+  emit_move_insn (value_reg, gen_lowpart (SImode, value));
   emit_move_insn (*shifted_value, gen_rtx_ASHIFT (SImode, value_reg,
 						  gen_lowpart (QImode, shift)));
 }
 
+/* Return TRUE if we should use the zero stride load, FALSE otherwise. */
+
+bool
+strided_load_broadcast_p ()
+{
+  return tune_param->use_zero_stride_load;
+}
+
 /* Return TRUE if we should use the divmod expander, FALSE otherwise.  This
    allows the behavior to be tuned for specific implementations as well as
    when optimizing for size.  */
@@ -11307,27 +12846,30 @@ riscv_emit_frm_mode_set (int mode, int prev_mode)
   if (prev_mode == riscv_vector::FRM_DYN_CALL)
     emit_insn (gen_frrmsi (backup_reg)); /* Backup frm when DYN_CALL.  */
 
-  if (mode != prev_mode)
-    {
-      rtx frm = gen_int_mode (mode, SImode);
+  if (mode == prev_mode)
+    return;
 
-      if (mode == riscv_vector::FRM_DYN_CALL
-	&& prev_mode != riscv_vector::FRM_DYN && STATIC_FRM_P (cfun))
-	/* No need to emit when prev mode is DYN already.  */
-	emit_insn (gen_fsrmsi_restore_volatile (backup_reg));
-      else if (mode == riscv_vector::FRM_DYN_EXIT && STATIC_FRM_P (cfun)
-	&& prev_mode != riscv_vector::FRM_DYN
-	&& prev_mode != riscv_vector::FRM_DYN_CALL)
-	/* No need to emit when prev mode is DYN or DYN_CALL already.  */
-	emit_insn (gen_fsrmsi_restore_volatile (backup_reg));
-      else if (mode == riscv_vector::FRM_DYN
-	&& prev_mode != riscv_vector::FRM_DYN_CALL)
-	/* Restore frm value from backup when switch to DYN mode.  */
-	emit_insn (gen_fsrmsi_restore (backup_reg));
-      else if (riscv_static_frm_mode_p (mode))
-	/* Set frm value when switch to static mode.  */
-	emit_insn (gen_fsrmsi_restore (frm));
+  if (riscv_static_frm_mode_p (mode))
+    {
+      /* Set frm value when switch to static mode.  */
+      emit_insn (gen_fsrmsi_restore (gen_int_mode (mode, SImode)));
+      return;
     }
+
+  bool restore_p
+    = /* No need to emit when prev mode is DYN.  */
+      (STATIC_FRM_P (cfun) && mode == riscv_vector::FRM_DYN_CALL
+       && prev_mode != riscv_vector::FRM_DYN)
+      /* No need to emit if prev mode is DYN or DYN_CALL.  */
+      || (STATIC_FRM_P (cfun) && mode == riscv_vector::FRM_DYN_EXIT
+	  && prev_mode != riscv_vector::FRM_DYN
+	  && prev_mode != riscv_vector::FRM_DYN_CALL)
+      /* Restore frm value when switch to DYN mode.  */
+      || (STATIC_FRM_P (cfun) && mode == riscv_vector::FRM_DYN
+	  && prev_mode != riscv_vector::FRM_DYN_CALL);
+
+  if (restore_p)
+    emit_insn (gen_fsrmsi_restore (backup_reg));
 }
 
 /* Implement Mode switching.  */
@@ -11350,59 +12892,6 @@ riscv_emit_mode_set (int entity, int mode, int prev_mode,
     }
 }
 
-/* Adjust the FRM_NONE insn after a call to FRM_DYN for the
-   underlying emit.  */
-
-static int
-riscv_frm_adjust_mode_after_call (rtx_insn *cur_insn, int mode)
-{
-  rtx_insn *insn = prev_nonnote_nondebug_insn_bb (cur_insn);
-
-  if (insn && CALL_P (insn))
-    return riscv_vector::FRM_DYN;
-
-  return mode;
-}
-
-/* Insert the backup frm insn to the end of the bb if and only if the call
-   is the last insn of this bb.  */
-
-static void
-riscv_frm_emit_after_bb_end (rtx_insn *cur_insn)
-{
-  edge eg;
-  bool abnormal_edge_p = false;
-  edge_iterator eg_iterator;
-  basic_block bb = BLOCK_FOR_INSN (cur_insn);
-
-  FOR_EACH_EDGE (eg, eg_iterator, bb->succs)
-    {
-      if (eg->flags & EDGE_ABNORMAL)
-	abnormal_edge_p = true;
-      else
-	{
-	  start_sequence ();
-	  emit_insn (gen_frrmsi (DYNAMIC_FRM_RTL (cfun)));
-	  rtx_insn *backup_insn = get_insns ();
-	  end_sequence ();
-
-	  insert_insn_on_edge (backup_insn, eg);
-	}
-    }
-
-  if (abnormal_edge_p)
-    {
-      start_sequence ();
-      emit_insn (gen_frrmsi (DYNAMIC_FRM_RTL (cfun)));
-      rtx_insn *backup_insn = get_insns ();
-      end_sequence ();
-
-      insert_insn_end_basic_block (backup_insn, bb);
-    }
-
-  commit_edge_insertions ();
-}
-
 /* Return mode that frm must be switched into
    prior to the execution of insn.  */
 
@@ -11414,37 +12903,88 @@ riscv_frm_mode_needed (rtx_insn *cur_insn, int code)
       /* The dynamic frm will be initialized only onece during cfun.  */
       DYNAMIC_FRM_RTL (cfun) = gen_reg_rtx (SImode);
       emit_insn_at_entry (gen_frrmsi (DYNAMIC_FRM_RTL (cfun)));
+      CFUN_IN_CALL (cfun) = false;
     }
 
   if (CALL_P (cur_insn))
     {
-      rtx_insn *insn = next_nonnote_nondebug_insn_bb (cur_insn);
-
-      if (!insn)
-	riscv_frm_emit_after_bb_end (cur_insn);
-
+      CFUN_IN_CALL (cfun) = true;
       return riscv_vector::FRM_DYN_CALL;
     }
 
   int mode = code >= 0 ? get_attr_frm_mode (cur_insn) : riscv_vector::FRM_NONE;
 
   if (mode == riscv_vector::FRM_NONE)
-      /* After meet a call, we need to backup the frm because it may be
-	 updated during the call. Here, for each insn, we will check if
-	 the previous insn is a call or not. When previous insn is call,
-	 there will be 2 cases for the emit mode set.
-
-	 1. Current insn is not MODE_NONE, then the mode switch framework
-	    will do the mode switch from MODE_CALL to MODE_NONE natively.
-	 2. Current insn is MODE_NONE, we need to adjust the MODE_NONE to
-	    the MODE_DYN, and leave the mode switch itself to perform
-	    the emit mode set.
-       */
-    mode = riscv_frm_adjust_mode_after_call (cur_insn, mode);
+    {
+      if (CFUN_IN_CALL (cfun))
+	{
+	  CFUN_IN_CALL (cfun) = false;
+	  return riscv_vector::FRM_DYN;
+	}
+    }
 
   return mode;
 }
 
+/* If the current function needs a single VXRM mode, return it.  Else
+   return VXRM_MODE_NONE.
+
+   This is called on the first insn in the chain and scans the full function
+   once to collect VXRM mode settings.  If a single mode is needed, it will
+   often be better to set it once at the start of the function rather than
+   at an anticipation point.  */
+static int
+singleton_vxrm_need (void)
+{
+  /* Only needed for vector code.  */
+  if (!TARGET_VECTOR)
+    return VXRM_MODE_NONE;
+
+  /* If ENTRY has more than once successor, then don't optimize, just to
+     keep things simple.  */
+  if (EDGE_COUNT (ENTRY_BLOCK_PTR_FOR_FN (cfun)->succs) > 1)
+    return VXRM_MODE_NONE;
+
+  /* Walk the IL noting if VXRM is needed and if there's more than one
+     mode needed.  */
+  bool found = false;
+  int saved_vxrm_mode = VXRM_MODE_NONE;
+  for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
+    {
+      if (!INSN_P (insn) || DEBUG_INSN_P (insn))
+	continue;
+
+      int code = recog_memoized (insn);
+      if (code < 0)
+	continue;
+
+      int vxrm_mode = get_attr_vxrm_mode (insn);
+      if (vxrm_mode == VXRM_MODE_NONE)
+	continue;
+
+      /* If this is the first VXRM need, note it.  */
+      if (!found)
+	{
+	  saved_vxrm_mode = vxrm_mode;
+	  found = true;
+	  continue;
+	}
+
+      /* Not the first VXRM need.  If this is different than
+	 the saved need, then we're not going to be able to
+	 optimize and we can stop scanning now.  */
+      if (saved_vxrm_mode != vxrm_mode)
+	return VXRM_MODE_NONE;
+
+      /* Same mode as we've seen, keep scanning.  */
+    }
+
+  /* If we got here we scanned the whole function.  If we found
+     some VXRM state, then we can optimize.  If we didn't find
+     VXRM state, then there's nothing to optimize.  */
+  return found ? saved_vxrm_mode : VXRM_MODE_NONE;
+}
+
 /* Return mode that entity must be switched into
    prior to the execution of insn.  */
 
@@ -11456,6 +12996,16 @@ riscv_mode_needed (int entity, rtx_insn *insn, HARD_REG_SET)
   switch (entity)
     {
     case RISCV_VXRM:
+      /* If CUR_INSN is the first insn in the function, then determine if we
+	 want to signal a need in ENTRY->succs to allow for aggressive
+	 elimination of subsequent sets of VXRM.  */
+      if (insn == get_first_nonnote_insn ())
+	{
+	  int need = singleton_vxrm_need ();
+	  if (need != VXRM_MODE_NONE)
+	    return need;
+	}
+
       return code >= 0 ? get_attr_vxrm_mode (insn) : VXRM_MODE_NONE;
     case RISCV_FRM:
       return riscv_frm_mode_needed (insn, code);
@@ -11671,6 +13221,36 @@ get_vector_costs ()
   return costs;
 }
 
+/* Return the cost of operation that move from gpr to vr.
+   It will take the value of --param=gpr2vr_cost if it is provided.
+   Or the default regmove->GR2VR will be returned.  */
+
+int
+get_gr2vr_cost ()
+{
+  int cost = get_vector_costs ()->regmove->GR2VR;
+
+  if (gpr2vr_cost != GPR2VR_COST_UNPROVIDED)
+    cost = gpr2vr_cost;
+
+  return cost;
+}
+
+/* Return the cost of moving data from floating-point to vector register.
+   It will take the value of --param=fpr2vr-cost if it is provided.
+   Otherwise the default regmove->FR2VR will be returned.  */
+
+int
+get_fr2vr_cost ()
+{
+  int cost = get_vector_costs ()->regmove->FR2VR;
+
+  if (fpr2vr_cost != FPR2VR_COST_UNPROVIDED)
+    cost = fpr2vr_cost;
+
+  return cost;
+}
+
 /* Implement targetm.vectorize.builtin_vectorization_cost.  */
 
 static int
@@ -11734,7 +13314,12 @@ riscv_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
       return fp ? common_costs->fp_stmt_cost : common_costs->int_stmt_cost;
 
     case vec_construct:
-      return estimated_poly_value (TYPE_VECTOR_SUBPARTS (vectype));
+	{
+	  /* TODO: This is too pessimistic in case we can splat.  */
+	  int regmove_cost = fp ? get_fr2vr_cost () : get_gr2vr_cost ();
+	  return (regmove_cost + common_costs->scalar_to_vec_cost)
+	    * estimated_poly_value (TYPE_VECTOR_SUBPARTS (vectype));
+	}
 
     default:
       gcc_unreachable ();
@@ -11842,19 +13427,82 @@ riscv_get_raw_result_mode (int regno)
   return default_get_reg_raw_mode (regno);
 }
 
-/* Generate a new rtx of Xmode based on the rtx and mode in define pattern.
-   The rtx x will be zero extended to Xmode if the mode is HI/QImode,  and
-   the new zero extended Xmode rtx will be returned.
-   Or the gen_lowpart rtx of Xmode will be returned.  */
+/* Generate a REG rtx of Xmode from the given rtx and mode.
+   The rtx x can be REG (QI/HI/SI/DI) or const_int.
+   The machine_mode mode is the original mode from define pattern.
+   The rtx_code can be ZERO_EXTEND or SIGN_EXTEND.
+
+   If rtx is REG:
+
+   1.  If rtx Xmode, the RTX x will be returned directly.
+   2.  If rtx non-Xmode, the value extended into a new REG of Xmode will be
+       returned.
+
+   The scalar ALU like add don't support non-Xmode like QI/HI.  Then the
+   gen_lowpart will have problem here.  For example, when we would like
+   to add -1 (0xff if QImode) and 2 (0x2 if QImode).  The 0xff and 0x2 will
+   be loaded to register for adding.  Aka:
+
+   0xff + 0x2 = 0x101 instead of -1 + 2 = 1.
+
+   Thus we need to sign extend 0xff to 0xffffffffffffffff if Xmode is DImode
+   for correctness.  Similar the unsigned also need zero extend.
+
+   If rtx is const_int:
+
+   1.  A new REG rtx will be created to hold the value of const_int.
+
+   According to the gccint doc, the constants generated for modes with fewer
+   bits than in HOST_WIDE_INT must be sign extended to full width.  Thus there
+   will be two cases here, take QImode as example.
+
+   For .SAT_SUB (127, y) in QImode, we have (const_int 127) and one simple
+   mov from const_int to the new REG rtx is good enough here.
+
+   For .SAT_SUB (254, y) in QImode, we have (const_int -2) after define_expand.
+   Aka 0xfffffffffffffffe in Xmode of RV64 but we actually need 0xfe in Xmode
+   of RV64.  So we need to cleanup the highest 56 bits of the new REG rtx moved
+   from the (const_int -2).
+
+   Then the underlying expanding can perform the code generation based on
+   the REG rtx of Xmode, instead of taking care of these in expand func.  */
 
 static rtx
-riscv_gen_zero_extend_rtx (rtx x, machine_mode mode)
+riscv_extend_to_xmode_reg (rtx x, machine_mode mode, enum rtx_code rcode)
 {
-  if (mode == Xmode)
-    return x;
+  gcc_assert (rcode == ZERO_EXTEND || rcode == SIGN_EXTEND);
 
   rtx xmode_reg = gen_reg_rtx (Xmode);
-  riscv_emit_unary (ZERO_EXTEND, xmode_reg, x);
+
+  if (CONST_INT_P (x))
+    {
+      if (mode == Xmode)
+	emit_move_insn (xmode_reg, x);
+      else if (rcode == ZERO_EXTEND)
+	{
+	  /* Combine deliberately does not simplify extensions of constants
+	     (long story).  So try to generate the zero extended constant
+	     efficiently.
+
+	     First extract the constant and mask off all the bits not in
+	     MODE.  */
+	  HOST_WIDE_INT val = INTVAL (x);
+	  val &= GET_MODE_MASK (mode);
+
+	  /* X may need synthesis, so do not blindly copy it.  */
+	  xmode_reg = force_reg (Xmode, gen_int_mode (val, Xmode));
+	}
+      else /* SIGN_EXTEND.  */
+	{
+	  rtx x_reg = gen_reg_rtx (mode);
+	  emit_move_insn (x_reg, x);
+	  riscv_emit_unary (rcode, xmode_reg, x_reg);
+	}
+    }
+  else if (mode == Xmode)
+    return x;
+  else
+    riscv_emit_unary (rcode, xmode_reg, x);
 
   return xmode_reg;
 }
@@ -11875,8 +13523,8 @@ riscv_expand_usadd (rtx dest, rtx x, rtx y)
   machine_mode mode = GET_MODE (dest);
   rtx xmode_sum = gen_reg_rtx (Xmode);
   rtx xmode_lt = gen_reg_rtx (Xmode);
-  rtx xmode_x = riscv_gen_zero_extend_rtx (x, mode);
-  rtx xmode_y = gen_lowpart (Xmode, y);
+  rtx xmode_x = riscv_extend_to_xmode_reg (x, mode, ZERO_EXTEND);
+  rtx xmode_y = riscv_extend_to_xmode_reg (y, mode, ZERO_EXTEND);
   rtx xmode_dest = gen_reg_rtx (Xmode);
 
   /* Step-1: sum = x + y  */
@@ -11907,48 +13555,94 @@ riscv_expand_usadd (rtx dest, rtx x, rtx y)
   emit_move_insn (dest, gen_lowpart (mode, xmode_dest));
 }
 
-/* Generate a REG rtx of Xmode from the given rtx and mode.
-   The rtx x can be REG (QI/HI/SI/DI) or const_int.
-   The machine_mode mode is the original mode from define pattern.
-
-   If rtx is REG,  the gen_lowpart of Xmode will be returned.
-
-   If rtx is const_int,  a new REG rtx will be created to hold the value of
-   const_int and then returned.
-
-   According to the gccint doc, the constants generated for modes with fewer
-   bits than in HOST_WIDE_INT must be sign extended to full width.  Thus there
-   will be two cases here,  take QImode as example.
-
-   For .SAT_SUB (127, y) in QImode, we have (const_int 127) and one simple
-   mov from const_int to the new REG rtx is good enough here.
-
-   For .SAT_SUB (254, y) in QImode, we have (const_int -2) after define_expand.
-   Aka 0xfffffffffffffffe in Xmode of RV64 but we actually need 0xfe in Xmode
-   of RV64.  So we need to cleanup the highest 56 bits of the new REG rtx moved
-   from the (const_int -2).
-
-   Then the underlying expanding can perform the code generation based on
-   the REG rtx of Xmode,  instead of taking care of these in expand func.  */
+/* Return a new const RTX of MAX value based on given mode.  Only
+   int scalar mode is allowed.  */
 
 static rtx
-riscv_gen_unsigned_xmode_reg (rtx x, machine_mode mode)
+riscv_gen_sign_max_cst (machine_mode mode)
 {
-  if (!CONST_INT_P (x))
-    return gen_lowpart (Xmode, x);
-
-  rtx xmode_x = gen_reg_rtx (Xmode);
-
-  if (mode == Xmode)
-    emit_move_insn (xmode_x, x);
-  else
+  switch (mode)
     {
-      rtx reg_x = gen_reg_rtx (mode);
-      emit_move_insn (reg_x, x);
-      riscv_emit_unary (ZERO_EXTEND, xmode_x, reg_x);
+    case QImode:
+      return GEN_INT (INT8_MAX);
+    case HImode:
+      return GEN_INT (INT16_MAX);
+    case SImode:
+      return GEN_INT (INT32_MAX);
+    case DImode:
+      return GEN_INT (INT64_MAX);
+    default:
+      gcc_unreachable ();
     }
+}
+
+/* Implements the signed saturation sub standard name ssadd for int mode.
+
+   z = SAT_ADD(x, y).
+   =>
+   1.  sum = x + y
+   2.  xor_0 = x ^ y
+   3.  xor_1 = x ^ sum
+   4.  lt = xor_1 < 0
+   5.  ge = xor_0 >= 0
+   6.  and = ge & lt
+   7.  lt = x < 0
+   8.  neg = -lt
+   9.  max = INT_MAX
+   10. max = max ^ neg
+   11. neg = -and
+   12. max = max & neg
+   13. and = and - 1
+   14. z = sum & and
+   15. z = z | max  */
 
-  return xmode_x;
+void
+riscv_expand_ssadd (rtx dest, rtx x, rtx y)
+{
+  machine_mode mode = GET_MODE (dest);
+  unsigned bitsize = GET_MODE_BITSIZE (mode).to_constant ();
+  rtx shift_bits = GEN_INT (bitsize - 1);
+  rtx xmode_x = riscv_extend_to_xmode_reg (x, mode, SIGN_EXTEND);
+  rtx xmode_y = riscv_extend_to_xmode_reg (y, mode, SIGN_EXTEND);
+  rtx xmode_sum = gen_reg_rtx (Xmode);
+  rtx xmode_dest = gen_reg_rtx (Xmode);
+  rtx xmode_xor_0 = gen_reg_rtx (Xmode);
+  rtx xmode_xor_1 = gen_reg_rtx (Xmode);
+  rtx xmode_ge = gen_reg_rtx (Xmode);
+  rtx xmode_lt = gen_reg_rtx (Xmode);
+  rtx xmode_neg = gen_reg_rtx (Xmode);
+  rtx xmode_and = gen_reg_rtx (Xmode);
+  rtx xmode_max = gen_reg_rtx (Xmode);
+
+  /* Step-1: sum = x + y, xor_0 = x ^ y, xor_1 = x ^ sum.  */
+  riscv_emit_binary (PLUS, xmode_sum, xmode_x, xmode_y);
+  riscv_emit_binary (XOR, xmode_xor_0, xmode_x, xmode_y);
+  riscv_emit_binary (XOR, xmode_xor_1, xmode_x, xmode_sum);
+
+  /* Step-2: lt = xor_1 < 0, ge = xor_0 >= 0, and = ge & lt.  */
+  riscv_emit_binary (LSHIFTRT, xmode_lt, xmode_xor_1, shift_bits);
+  riscv_emit_binary (LSHIFTRT, xmode_ge, xmode_xor_0, shift_bits);
+  riscv_emit_binary (XOR, xmode_ge, xmode_ge, CONST1_RTX (Xmode));
+  riscv_emit_binary (AND, xmode_and, xmode_lt, xmode_ge);
+  riscv_emit_binary (AND, xmode_and, xmode_and, CONST1_RTX (Xmode));
+
+  /* Step-3: lt = x < 0, neg = -lt  */
+  riscv_emit_binary (LT, xmode_lt, xmode_x, CONST0_RTX (Xmode));
+  riscv_emit_unary (NEG, xmode_neg, xmode_lt);
+
+  /* Step-4: max = 0x7f..., max = max ^ neg, neg = -and, max = max & neg  */
+  riscv_emit_move (xmode_max, riscv_gen_sign_max_cst (mode));
+  riscv_emit_binary (XOR, xmode_max, xmode_max, xmode_neg);
+  riscv_emit_unary (NEG, xmode_neg, xmode_and);
+  riscv_emit_binary (AND, xmode_max, xmode_max, xmode_neg);
+
+  /* Step-5: and = and - 1, dest = sum & and  */
+  riscv_emit_binary (PLUS, xmode_and, xmode_and, CONSTM1_RTX (Xmode));
+  riscv_emit_binary (AND, xmode_dest, xmode_sum, xmode_and);
+
+  /* Step-6: xmode_dest = xmode_dest | xmode_max, dest = xmode_dest  */
+  riscv_emit_binary (IOR, xmode_dest, xmode_dest, xmode_max);
+  emit_move_insn (dest, gen_lowpart (mode, xmode_dest));
 }
 
 /* Implements the unsigned saturation sub standard name usadd for int mode.
@@ -11964,8 +13658,8 @@ void
 riscv_expand_ussub (rtx dest, rtx x, rtx y)
 {
   machine_mode mode = GET_MODE (dest);
-  rtx xmode_x = riscv_gen_unsigned_xmode_reg (x, mode);
-  rtx xmode_y = gen_lowpart (Xmode, y);
+  rtx xmode_x = riscv_extend_to_xmode_reg (x, mode, ZERO_EXTEND);
+  rtx xmode_y = riscv_extend_to_xmode_reg (y, mode, ZERO_EXTEND);
   rtx xmode_lt = gen_reg_rtx (Xmode);
   rtx xmode_minus = gen_reg_rtx (Xmode);
   rtx xmode_dest = gen_reg_rtx (Xmode);
@@ -11986,6 +13680,157 @@ riscv_expand_ussub (rtx dest, rtx x, rtx y)
   emit_move_insn (dest, gen_lowpart (mode, xmode_dest));
 }
 
+/* Implements the signed saturation sub standard name ssadd for int mode.
+
+   z = SAT_SUB(x, y).
+   =>
+   1.  minus = x - y
+   2.  xor_0 = x ^ y
+   3.  xor_1 = x ^ minus
+   4.  lt_0 = xor_1 < 0
+   5.  lt_1 = xor_0 < 0
+   6.  and = lt_0 & lt_1
+   7.  lt = x < 0
+   8.  neg = -lt
+   9.  max = INT_MAX
+   10. max = max ^ neg
+   11. neg = -and
+   12. max = max & neg
+   13. and = and - 1
+   14. z = minus & and
+   15. z = z | max  */
+
+void
+riscv_expand_sssub (rtx dest, rtx x, rtx y)
+{
+  machine_mode mode = GET_MODE (dest);
+  unsigned bitsize = GET_MODE_BITSIZE (mode).to_constant ();
+  rtx shift_bits = GEN_INT (bitsize - 1);
+  rtx xmode_x = riscv_extend_to_xmode_reg (x, mode, SIGN_EXTEND);
+  rtx xmode_y = riscv_extend_to_xmode_reg (y, mode, SIGN_EXTEND);
+  rtx xmode_minus = gen_reg_rtx (Xmode);
+  rtx xmode_xor_0 = gen_reg_rtx (Xmode);
+  rtx xmode_xor_1 = gen_reg_rtx (Xmode);
+  rtx xmode_lt_0 = gen_reg_rtx (Xmode);
+  rtx xmode_lt_1 = gen_reg_rtx (Xmode);
+  rtx xmode_and = gen_reg_rtx (Xmode);
+  rtx xmode_lt = gen_reg_rtx (Xmode);
+  rtx xmode_neg = gen_reg_rtx (Xmode);
+  rtx xmode_max = gen_reg_rtx (Xmode);
+  rtx xmode_dest = gen_reg_rtx (Xmode);
+
+  /* Step-1: mins = x - y, xor_0 = x ^ y, xor_1 = x ^ minus.  */
+  riscv_emit_binary (MINUS, xmode_minus, xmode_x, xmode_y);
+  riscv_emit_binary (XOR, xmode_xor_0, xmode_x, xmode_y);
+  riscv_emit_binary (XOR, xmode_xor_1, xmode_x, xmode_minus);
+
+  /* Step-2: and = xor_0 < 0 & xor_1 < 0.  */
+  riscv_emit_binary (LSHIFTRT, xmode_lt_0, xmode_xor_0, shift_bits);
+  riscv_emit_binary (LSHIFTRT, xmode_lt_1, xmode_xor_1, shift_bits);
+  riscv_emit_binary (AND, xmode_and, xmode_lt_0, xmode_lt_1);
+  riscv_emit_binary (AND, xmode_and, xmode_and, CONST1_RTX (Xmode));
+
+  /* Step-3: lt = x < 0, neg = -lt.  */
+  riscv_emit_binary (LT, xmode_lt, xmode_x, CONST0_RTX (Xmode));
+  riscv_emit_unary (NEG, xmode_neg, xmode_lt);
+
+  /* Step-4: max = 0x7f..., max = max ^ neg, neg = -and, max = max & neg.  */
+  riscv_emit_move (xmode_max, riscv_gen_sign_max_cst (mode));
+  riscv_emit_binary (XOR, xmode_max, xmode_max, xmode_neg);
+  riscv_emit_unary (NEG, xmode_neg, xmode_and);
+  riscv_emit_binary (AND, xmode_max, xmode_max, xmode_neg);
+
+  /* Step-5: and = and - 1, dest = minus & and.  */
+  riscv_emit_binary (PLUS, xmode_and, xmode_and, CONSTM1_RTX (Xmode));
+  riscv_emit_binary (AND, xmode_dest, xmode_minus, xmode_and);
+
+  /* Step-6: dest = dest | max.  */
+  riscv_emit_binary (IOR, xmode_dest, xmode_dest, xmode_max);
+  emit_move_insn (dest, gen_lowpart (mode, xmode_dest));
+}
+
+/* Implement the Xmode usmul.
+
+   b = SAT_MUL (a, b);
+   =>
+   _1 = a * b;
+   _2 = mulhu (a, b);
+   _overflow_p = _2 == 0;
+   _mask = - _overflow_p;
+   b = _1 | _mask;
+ */
+
+static void
+riscv_expand_xmode_usmul (rtx dest, rtx x, rtx y)
+{
+  machine_mode mode = GET_MODE (dest);
+
+  gcc_assert (mode == Xmode);
+
+  rtx mul = gen_reg_rtx (Xmode);
+  rtx mulhu = gen_reg_rtx (Xmode);
+  rtx overflow_p = gen_reg_rtx (Xmode);
+
+  riscv_emit_binary (MULT, mul, x, y);
+
+  if (TARGET_64BIT)
+    emit_insn (gen_usmuldi3_highpart (mulhu, x, y));
+  else
+    emit_insn (gen_usmulsi3_highpart (mulhu, x, y));
+
+  riscv_emit_binary (NE, overflow_p, mulhu, CONST0_RTX (Xmode));
+  riscv_emit_unary (NEG, overflow_p, overflow_p);
+  riscv_emit_binary (IOR, dest, mul, overflow_p);
+}
+
+/* Implement the non-Xmode usmul.
+
+   b = SAT_MUL (a, b);
+   =>
+   _1 = a * b;
+   _max = (T)-1
+   _overflow_p = _1 > _max;
+   _mask = - _overflow_p;
+   b = _1 | _mask;
+ */
+
+static void
+riscv_expand_non_xmode_usmul (rtx dest, rtx x, rtx y)
+{
+  machine_mode mode = GET_MODE (dest);
+  unsigned bitsize = GET_MODE_BITSIZE (mode).to_constant ();
+
+  gcc_assert (mode != Xmode);
+
+  rtx xmode_x = riscv_extend_to_xmode_reg (x, mode, ZERO_EXTEND);
+  rtx xmode_y = riscv_extend_to_xmode_reg (y, mode, ZERO_EXTEND);
+  rtx xmode_mul = gen_reg_rtx (Xmode);
+  rtx mul_max = gen_reg_rtx (Xmode);
+  rtx overflow_p = gen_reg_rtx (Xmode);
+
+  uint64_t max = ((uint64_t)1 << bitsize) - 1;
+
+  emit_move_insn (mul_max, GEN_INT (max));
+  riscv_emit_binary (MULT, xmode_mul, xmode_x, xmode_y);
+
+  riscv_emit_binary (LTU, overflow_p, mul_max, xmode_mul);
+  riscv_emit_unary (NEG, overflow_p, overflow_p);
+  riscv_emit_binary (IOR, xmode_mul, xmode_mul, overflow_p);
+
+  emit_move_insn (dest, gen_lowpart (mode, xmode_mul));
+}
+
+/* Implements the unsigned saturation mult standard name usmul for int mode.  */
+
+void
+riscv_expand_usmul (rtx dest, rtx x, rtx y)
+{
+  if (GET_MODE (dest) == Xmode)
+    return riscv_expand_xmode_usmul (dest, x, y) ;
+  else
+    return riscv_expand_non_xmode_usmul (dest, x, y);
+}
+
 /* Implement the unsigned saturation truncation for int mode.
 
    b = SAT_TRUNC (a);
@@ -12026,6 +13871,67 @@ riscv_expand_ustrunc (rtx dest, rtx src)
   emit_move_insn (dest, gen_lowpart (mode, xmode_dest));
 }
 
+/* Implement the signed saturation truncation for int mode.
+
+   b = SAT_TRUNC (a);
+   =>
+   1.  lt = a < max
+   2.  gt = min < a
+   3.  mask = lt & gt
+   4.  trunc_mask = -mask
+   5.  sat_mask = mask - 1
+   6.  lt = a < 0
+   7.  neg = -lt
+   8.  sat = neg ^ max
+   9.  trunc = src & trunc_mask
+   10. sat = sat & sat_mask
+   11. dest = trunc | sat  */
+
+void
+riscv_expand_sstrunc (rtx dest, rtx src)
+{
+  machine_mode mode = GET_MODE (dest);
+  unsigned narrow_prec = GET_MODE_PRECISION (mode).to_constant ();
+  HOST_WIDE_INT narrow_max = ((int64_t)1 << (narrow_prec - 1)) - 1; // 127
+  HOST_WIDE_INT narrow_min = -narrow_max - 1; // -128
+
+  rtx xmode_narrow_max = gen_reg_rtx (Xmode);
+  rtx xmode_narrow_min = gen_reg_rtx (Xmode);
+  rtx xmode_lt = gen_reg_rtx (Xmode);
+  rtx xmode_gt = gen_reg_rtx (Xmode);
+  rtx xmode_src = riscv_extend_to_xmode_reg (src, GET_MODE (src), SIGN_EXTEND);
+  rtx xmode_dest = gen_reg_rtx (Xmode);
+  rtx xmode_mask = gen_reg_rtx (Xmode);
+  rtx xmode_sat = gen_reg_rtx (Xmode);
+  rtx xmode_trunc = gen_reg_rtx (Xmode);
+  rtx xmode_sat_mask = gen_reg_rtx (Xmode);
+  rtx xmode_trunc_mask = gen_reg_rtx (Xmode);
+
+  /* Step-1: lt = src < max, gt = min < src, mask = lt & gt  */
+  emit_move_insn (xmode_narrow_min, gen_int_mode (narrow_min, Xmode));
+  emit_move_insn (xmode_narrow_max, gen_int_mode (narrow_max, Xmode));
+  riscv_emit_binary (LT, xmode_lt, xmode_src, xmode_narrow_max);
+  riscv_emit_binary (LT, xmode_gt, xmode_narrow_min, xmode_src);
+  riscv_emit_binary (AND, xmode_mask, xmode_lt, xmode_gt);
+
+  /* Step-2: sat_mask = mask - 1, trunc_mask = ~mask  */
+  riscv_emit_binary (PLUS, xmode_sat_mask, xmode_mask, CONSTM1_RTX (Xmode));
+  riscv_emit_unary (NEG, xmode_trunc_mask, xmode_mask);
+
+  /* Step-3: lt = src < 0, lt = -lt, sat = lt ^ narrow_max  */
+  riscv_emit_binary (LT, xmode_lt, xmode_src, CONST0_RTX (Xmode));
+  riscv_emit_unary (NEG, xmode_lt, xmode_lt);
+  riscv_emit_binary (XOR, xmode_sat, xmode_lt, xmode_narrow_max);
+
+  /* Step-4: xmode_dest = (src & trunc_mask) | (sat & sat_mask)  */
+  riscv_emit_binary (AND, xmode_trunc, xmode_src, xmode_trunc_mask);
+  riscv_emit_binary (AND, xmode_sat, xmode_sat, xmode_sat_mask);
+  riscv_emit_binary (IOR, xmode_dest, xmode_trunc, xmode_sat);
+
+  /* Step-5: dest = xmode_dest  */
+  emit_move_insn (dest, gen_lowpart (mode, xmode_dest));
+}
+
 /* Implement TARGET_C_MODE_FOR_FLOATING_TYPE.  Return TFmode for
    TI_LONG_DOUBLE_TYPE which is for long double type, go with the
    default one for the others.  */
@@ -12038,6 +13944,708 @@ riscv_c_mode_for_floating_type (enum tree_index ti)
   return default_mode_for_floating_type (ti);
 }
 
+/* This parses the attribute arguments to target_version in DECL and modifies
+   the feature mask and priority required to select those targets.  */
+static void
+parse_features_for_version (tree decl,
+			    struct riscv_feature_bits &res,
+			    int &priority)
+{
+  tree version_attr = lookup_attribute ("target_version",
+					DECL_ATTRIBUTES (decl));
+  if (version_attr == NULL_TREE)
+    {
+      res.length = 0;
+      priority = 0;
+      return;
+    }
+
+  const char *version_string = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE
+						    (version_attr)));
+  gcc_assert (version_string != NULL);
+  if (strcmp (version_string, "default") == 0)
+    {
+      res.length = 0;
+      priority = 0;
+      return;
+    }
+  struct cl_target_option cur_target;
+  cl_target_option_save (&cur_target, &global_options,
+			 &global_options_set);
+  /* Always set to default option before parsing "arch=+..."  */
+  struct cl_target_option *default_opts
+	= TREE_TARGET_OPTION (target_option_default_node);
+  cl_target_option_restore (&global_options, &global_options_set,
+			    default_opts);
+
+  riscv_process_target_version_attr (TREE_VALUE (version_attr),
+				     DECL_SOURCE_LOCATION (decl));
+
+  priority = global_options.x_riscv_fmv_priority;
+  const char *arch_string = global_options.x_riscv_arch_string;
+  bool parse_res
+    = riscv_minimal_hwprobe_feature_bits (arch_string, &res,
+					  DECL_SOURCE_LOCATION (decl));
+  gcc_assert (parse_res);
+
+  cl_target_option_restore (&global_options, &global_options_set,
+			    &cur_target);
+}
+
+/* Compare priorities of two feature masks.  Return:
+     1: mask1 is higher priority
+    -1: mask2 is higher priority
+     0: masks are equal.
+   Since riscv_feature_bits has total 128 bits to be used as mask, when counting
+   the total 1s in the mask, the 1s in group1 needs to multiply a weight.  */
+static int
+compare_fmv_features (const struct riscv_feature_bits &mask1,
+		      const struct riscv_feature_bits &mask2,
+		      int prio1, int prio2)
+{
+  unsigned length1 = mask1.length, length2 = mask2.length;
+  /* 1.  Compare length, for length == 0 means default version, which should be
+	 the lowest priority).  */
+  if (length1 != length2)
+    return length1 > length2 ? 1 : -1;
+  /* 2.  Compare the priority.  */
+  if (prio1 != prio2)
+    return prio1 > prio2 ? 1 : -1;
+  /* 3.  Compare the total number of 1s in the mask.  */
+  unsigned pop1 = 0, pop2 = 0;
+  for (unsigned i = 0; i < length1; i++)
+    {
+      pop1 += __builtin_popcountll (mask1.features[i]);
+      pop2 += __builtin_popcountll (mask2.features[i]);
+    }
+  if (pop1 != pop2)
+    return pop1 > pop2 ? 1 : -1;
+  /* 4.  Compare the mask bit by bit order.  */
+  for (unsigned i = 0; i < length1; i++)
+    {
+      unsigned long long xor_mask = mask1.features[i] ^ mask2.features[i];
+      if (xor_mask == 0)
+	continue;
+      return TEST_BIT (mask1.features[i], __builtin_ctzll (xor_mask)) ? 1 : -1;
+    }
+  /* 5.  If all bits are equal, return 0.  */
+  return 0;
+}
+
+/* Compare priorities of two version decls.  Return:
+     1: mask1 is higher priority
+    -1: mask2 is higher priority
+     0: masks are equal.  */
+int
+riscv_compare_version_priority (tree decl1, tree decl2)
+{
+  struct riscv_feature_bits mask1, mask2;
+  int prio1, prio2;
+
+  parse_features_for_version (decl1, mask1, prio1);
+  parse_features_for_version (decl2, mask2, prio2);
+
+  return compare_fmv_features (mask1, mask2, prio1, prio2);
+}
+
+/* This function returns true if FN1 and FN2 are versions of the same function,
+   that is, the target_version attributes of the function decls are different.
+   This assumes that FN1 and FN2 have the same signature.  */
+
+bool
+riscv_common_function_versions (tree fn1, tree fn2)
+{
+  if (TREE_CODE (fn1) != FUNCTION_DECL
+      || TREE_CODE (fn2) != FUNCTION_DECL)
+    return false;
+
+  return riscv_compare_version_priority (fn1, fn2) != 0;
+}
+
+/* Implement TARGET_MANGLE_DECL_ASSEMBLER_NAME, to add function multiversioning
+   suffixes.  */
+
+tree
+riscv_mangle_decl_assembler_name (tree decl, tree id)
+{
+  /* For function version, add the target suffix to the assembler name.  */
+  if (TREE_CODE (decl) == FUNCTION_DECL
+      && DECL_FUNCTION_VERSIONED (decl))
+    {
+      std::string name = IDENTIFIER_POINTER (id) + std::string (".");
+      tree target_attr = lookup_attribute ("target_version",
+					   DECL_ATTRIBUTES (decl));
+
+      if (target_attr == NULL_TREE)
+	{
+	  name += "default";
+	  return get_identifier (name.c_str ());
+	}
+
+      const char *version_string = TREE_STRING_POINTER (TREE_VALUE (TREE_VALUE
+							(target_attr)));
+
+      /* Replace non-alphanumeric characters with underscores as the suffix.  */
+      for (const char *c = version_string; *c; c++)
+	name += ISALNUM (*c) == 0 ? '_' : *c;
+
+      if (DECL_ASSEMBLER_NAME_SET_P (decl))
+	SET_DECL_RTL (decl, NULL);
+
+      id = get_identifier (name.c_str ());
+    }
+
+  return id;
+}
+
+/* This adds a condition to the basic_block NEW_BB in function FUNCTION_DECL
+   to return a pointer to VERSION_DECL if all feature bits specified in
+   FEATURE_MASK are not set in MASK_VAR.  This function will be called during
+   version dispatch to decide which function version to execute.  It returns
+   the basic block at the end, to which more conditions can be added.  */
+static basic_block
+add_condition_to_bb (tree function_decl, tree version_decl,
+		     const struct riscv_feature_bits *features,
+		     tree mask_var, basic_block new_bb)
+{
+  gimple *return_stmt;
+  tree convert_expr, result_var;
+  gimple *convert_stmt;
+  gimple *if_else_stmt;
+
+  basic_block bb1, bb2, bb3;
+  edge e12, e23;
+
+  gimple_seq gseq;
+
+  push_cfun (DECL_STRUCT_FUNCTION (function_decl));
+
+  gcc_assert (new_bb != NULL);
+  gseq = bb_seq (new_bb);
+
+  convert_expr = build1 (CONVERT_EXPR, ptr_type_node,
+			 build_fold_addr_expr (version_decl));
+  result_var = create_tmp_var (ptr_type_node);
+  convert_stmt = gimple_build_assign (result_var, convert_expr);
+  return_stmt = gimple_build_return (result_var);
+
+  if (features->length == 0)
+    {
+      /* Default version.  */
+      gimple_seq_add_stmt (&gseq, convert_stmt);
+      gimple_seq_add_stmt (&gseq, return_stmt);
+      set_bb_seq (new_bb, gseq);
+      gimple_set_bb (convert_stmt, new_bb);
+      gimple_set_bb (return_stmt, new_bb);
+      pop_cfun ();
+      return new_bb;
+    }
+
+  tree zero_llu = build_int_cst (long_long_unsigned_type_node, 0);
+  tree cond_status = create_tmp_var (boolean_type_node);
+  tree mask_array_ele_var = create_tmp_var (long_long_unsigned_type_node);
+  tree and_expr_var = create_tmp_var (long_long_unsigned_type_node);
+  tree eq_expr_var = create_tmp_var (boolean_type_node);
+
+  /* cond_status = true.  */
+  gimple *cond_init_stmt = gimple_build_assign (cond_status, boolean_true_node);
+  gimple_set_block (cond_init_stmt, DECL_INITIAL (function_decl));
+  gimple_set_bb (cond_init_stmt, new_bb);
+  gimple_seq_add_stmt (&gseq, cond_init_stmt);
+
+  for (int i = 0; i < RISCV_FEATURE_BITS_LENGTH; i++)
+    {
+      tree index_expr = build_int_cst (unsigned_type_node, i);
+      /* mask_array_ele_var = mask_var[i] */
+      tree mask_array_ref = build4 (ARRAY_REF, long_long_unsigned_type_node,
+				    mask_var, index_expr, NULL_TREE, NULL_TREE);
+
+      gimple *mask_stmt = gimple_build_assign (mask_array_ele_var,
+					       mask_array_ref);
+      gimple_set_block (mask_stmt, DECL_INITIAL (function_decl));
+      gimple_set_bb (mask_stmt, new_bb);
+      gimple_seq_add_stmt (&gseq, mask_stmt);
+      /* and_expr_var = mask_array_ele_var & features[i] */
+      tree and_expr = build2 (BIT_AND_EXPR,
+			      long_long_unsigned_type_node,
+			      mask_array_ele_var,
+			      build_int_cst (long_long_unsigned_type_node,
+			      features->features[i]));
+      gimple *and_stmt = gimple_build_assign (and_expr_var, and_expr);
+      gimple_set_block (and_stmt, DECL_INITIAL (function_decl));
+      gimple_set_bb (and_stmt, new_bb);
+      gimple_seq_add_stmt (&gseq, and_stmt);
+      /* eq_expr_var = and_expr_var == 0.  */
+      tree eq_expr = build2 (EQ_EXPR, boolean_type_node,
+			     and_expr_var, zero_llu);
+      gimple *eq_stmt = gimple_build_assign (eq_expr_var, eq_expr);
+      gimple_set_block (eq_stmt, DECL_INITIAL (function_decl));
+      gimple_set_bb (eq_stmt, new_bb);
+      gimple_seq_add_stmt (&gseq, eq_stmt);
+      /* cond_status = cond_status & eq_expr_var.  */
+      tree cond_expr = build2 (BIT_AND_EXPR, boolean_type_node,
+			       cond_status, eq_expr_var);
+      gimple *cond_stmt = gimple_build_assign (cond_status, cond_expr);
+      gimple_set_block (cond_stmt, DECL_INITIAL (function_decl));
+      gimple_set_bb (cond_stmt, new_bb);
+      gimple_seq_add_stmt (&gseq, cond_stmt);
+    }
+  if_else_stmt = gimple_build_cond (EQ_EXPR, cond_status, boolean_true_node,
+				    NULL_TREE, NULL_TREE);
+  gimple_set_block (if_else_stmt, DECL_INITIAL (function_decl));
+  gimple_set_bb (if_else_stmt, new_bb);
+  gimple_seq_add_stmt (&gseq, if_else_stmt);
+
+  gimple_seq_add_stmt (&gseq, convert_stmt);
+  gimple_seq_add_stmt (&gseq, return_stmt);
+  set_bb_seq (new_bb, gseq);
+
+  bb1 = new_bb;
+  e12 = split_block (bb1, if_else_stmt);
+  bb2 = e12->dest;
+  e12->flags &= ~EDGE_FALLTHRU;
+  e12->flags |= EDGE_TRUE_VALUE;
+
+  e23 = split_block (bb2, return_stmt);
+
+  gimple_set_bb (convert_stmt, bb2);
+  gimple_set_bb (return_stmt, bb2);
+
+  bb3 = e23->dest;
+  make_edge (bb1, bb3, EDGE_FALSE_VALUE);
+
+  remove_edge (e23);
+  make_edge (bb2, EXIT_BLOCK_PTR_FOR_FN (cfun), 0);
+
+  pop_cfun ();
+
+  return bb3;
+}
+
+/* This function generates the dispatch function for
+   multi-versioned functions.  DISPATCH_DECL is the function which will
+   contain the dispatch logic.  FNDECLS are the function choices for
+   dispatch, and is a tree chain.  EMPTY_BB is the basic block pointer
+   in DISPATCH_DECL in which the dispatch code is generated.  */
+
+static int
+dispatch_function_versions (tree dispatch_decl,
+			    void *fndecls_p,
+			    basic_block *empty_bb)
+{
+  gimple *ifunc_cpu_init_stmt;
+  gimple_seq gseq;
+  vec<tree> *fndecls;
+
+  gcc_assert (dispatch_decl != NULL
+	      && fndecls_p != NULL
+	      && empty_bb != NULL);
+
+  push_cfun (DECL_STRUCT_FUNCTION (dispatch_decl));
+
+  gseq = bb_seq (*empty_bb);
+  /* Function version dispatch is via IFUNC.  IFUNC resolvers fire before
+     constructors, so explicity call __init_riscv_feature_bits here.  */
+  tree init_fn_type = build_function_type_list (void_type_node,
+						long_unsigned_type_node,
+						ptr_type_node,
+						NULL);
+  tree init_fn_id = get_identifier ("__init_riscv_feature_bits");
+  tree init_fn_decl = build_decl (UNKNOWN_LOCATION, FUNCTION_DECL,
+				  init_fn_id, init_fn_type);
+  DECL_EXTERNAL (init_fn_decl) = 1;
+  TREE_PUBLIC (init_fn_decl) = 1;
+  DECL_VISIBILITY (init_fn_decl) = VISIBILITY_HIDDEN;
+  DECL_VISIBILITY_SPECIFIED (init_fn_decl) = 1;
+  ifunc_cpu_init_stmt = gimple_build_call (init_fn_decl, 0);
+  gimple_seq_add_stmt (&gseq, ifunc_cpu_init_stmt);
+  gimple_set_bb (ifunc_cpu_init_stmt, *empty_bb);
+
+  /* Build the struct type for __riscv_feature_bits.  */
+  tree global_type = lang_hooks.types.make_type (RECORD_TYPE);
+  tree features_type = build_array_type_nelts (long_long_unsigned_type_node,
+					       RISCV_FEATURE_BITS_LENGTH);
+  tree field1 = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
+			    get_identifier ("length"),
+			    unsigned_type_node);
+  tree field2 = build_decl (UNKNOWN_LOCATION, FIELD_DECL,
+			    get_identifier ("features"),
+			    features_type);
+  DECL_FIELD_CONTEXT (field1) = global_type;
+  DECL_FIELD_CONTEXT (field2) = global_type;
+  TYPE_FIELDS (global_type) = field1;
+  DECL_CHAIN (field1) = field2;
+  layout_type (global_type);
+
+  tree global_var = build_decl (UNKNOWN_LOCATION, VAR_DECL,
+				get_identifier ("__riscv_feature_bits"),
+				global_type);
+  DECL_EXTERNAL (global_var) = 1;
+  TREE_PUBLIC (global_var) = 1;
+  DECL_VISIBILITY (global_var) = VISIBILITY_HIDDEN;
+  DECL_VISIBILITY_SPECIFIED (global_var) = 1;
+  tree mask_var = create_tmp_var (features_type);
+  tree feature_ele_var = create_tmp_var (long_long_unsigned_type_node);
+  tree noted_var = create_tmp_var (long_long_unsigned_type_node);
+
+
+  for (int i = 0; i < RISCV_FEATURE_BITS_LENGTH; i++)
+    {
+      tree index_expr = build_int_cst (unsigned_type_node, i);
+      /* feature_ele_var = __riscv_feature_bits.features[i] */
+      tree component_expr = build3 (COMPONENT_REF, features_type,
+				    global_var, field2, NULL_TREE);
+      tree feature_array_ref = build4 (ARRAY_REF, long_long_unsigned_type_node,
+				       component_expr, index_expr,
+				       NULL_TREE, NULL_TREE);
+      gimple *feature_stmt = gimple_build_assign (feature_ele_var,
+						  feature_array_ref);
+      gimple_set_block (feature_stmt, DECL_INITIAL (dispatch_decl));
+      gimple_set_bb (feature_stmt, *empty_bb);
+      gimple_seq_add_stmt (&gseq, feature_stmt);
+      /* noted_var = ~feature_ele_var.  */
+      tree not_expr = build1 (BIT_NOT_EXPR, long_long_unsigned_type_node,
+			      feature_ele_var);
+      gimple *not_stmt = gimple_build_assign (noted_var, not_expr);
+      gimple_set_block (not_stmt, DECL_INITIAL (dispatch_decl));
+      gimple_set_bb (not_stmt, *empty_bb);
+      gimple_seq_add_stmt (&gseq, not_stmt);
+      /* mask_var[i] = noted_var.  */
+      tree mask_array_ref = build4 (ARRAY_REF, long_long_unsigned_type_node,
+				    mask_var, index_expr, NULL_TREE, NULL_TREE);
+      gimple *mask_assign_stmt = gimple_build_assign (mask_array_ref,
+						      noted_var);
+      gimple_set_block (mask_assign_stmt, DECL_INITIAL (dispatch_decl));
+      gimple_set_bb (mask_assign_stmt, *empty_bb);
+      gimple_seq_add_stmt (&gseq, mask_assign_stmt);
+    }
+
+  set_bb_seq (*empty_bb, gseq);
+
+  pop_cfun ();
+
+  /* fndecls_p is actually a vector.  */
+  fndecls = static_cast<vec<tree> *> (fndecls_p);
+
+  /* At least one more version other than the default.  */
+  unsigned int num_versions = fndecls->length ();
+  gcc_assert (num_versions >= 2);
+
+  struct function_version_info
+    {
+      tree version_decl;
+      struct riscv_feature_bits features;
+      int prio;
+    };
+
+  std::vector <function_version_info> function_versions;
+
+  for (tree version_decl : *fndecls)
+    {
+      struct function_version_info version_info;
+      version_info.version_decl = version_decl;
+      // Get attribute string, parse it and find the right features.
+      parse_features_for_version (version_decl,
+				  version_info.features,
+				  version_info.prio);
+      function_versions.push_back (version_info);
+    }
+
+
+  auto compare_feature_version_info = [](const struct function_version_info &v1,
+					 const struct function_version_info &v2)
+    {
+      return compare_fmv_features (v1.features, v2.features,
+				   v1.prio, v2.prio) > 0;
+    };
+
+  /* Sort the versions according to descending order of dispatch priority.  */
+  std::sort (function_versions.begin (), function_versions.end (),
+	     compare_feature_version_info);
+
+  for (auto version : function_versions)
+    {
+      *empty_bb = add_condition_to_bb (dispatch_decl,
+				       version.version_decl,
+				       &version.features,
+				       mask_var,
+				       *empty_bb);
+    }
+
+  return 0;
+}
+
+/* Return an identifier for the base assembler name of a versioned function.
+   This is computed by taking the default version's assembler name, and
+   stripping off the ".default" suffix if it's already been appended.  */
+
+static tree
+get_suffixed_assembler_name (tree default_decl, const char *suffix)
+{
+  std::string name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (default_decl));
+
+  auto size = name.size ();
+  if (size >= 8 && name.compare (size - 8, 8, ".default") == 0)
+    name.resize (size - 8);
+  name += suffix;
+  return get_identifier (name.c_str ());
+}
+
+/* Make the resolver function decl to dispatch the versions of
+   a multi-versioned function,  DEFAULT_DECL.  IFUNC_ALIAS_DECL is
+   ifunc alias that will point to the created resolver.  Create an
+   empty basic block in the resolver and store the pointer in
+   EMPTY_BB.  Return the decl of the resolver function.  */
+
+static tree
+make_resolver_func (const tree default_decl,
+		    const tree ifunc_alias_decl,
+		    basic_block *empty_bb)
+{
+  tree decl, type, t;
+
+  /* Create resolver function name based on default_decl.  We need to remove an
+     existing ".default" suffix if this has already been appended.  */
+  tree decl_name = get_suffixed_assembler_name (default_decl, ".resolver");
+  const char *resolver_name = IDENTIFIER_POINTER (decl_name);
+
+  /* The resolver function should have signature
+     (void *) resolver (uint64_t, void *) */
+  type = build_function_type_list (ptr_type_node,
+				   uint64_type_node,
+				   ptr_type_node,
+				   NULL_TREE);
+
+  decl = build_fn_decl (resolver_name, type);
+  SET_DECL_ASSEMBLER_NAME (decl, decl_name);
+
+  DECL_NAME (decl) = decl_name;
+  TREE_USED (decl) = 1;
+  DECL_ARTIFICIAL (decl) = 1;
+  DECL_IGNORED_P (decl) = 1;
+  TREE_PUBLIC (decl) = 0;
+  DECL_UNINLINABLE (decl) = 1;
+
+  /* Resolver is not external, body is generated.  */
+  DECL_EXTERNAL (decl) = 0;
+  DECL_EXTERNAL (ifunc_alias_decl) = 0;
+
+  DECL_CONTEXT (decl) = NULL_TREE;
+  DECL_INITIAL (decl) = make_node (BLOCK);
+  DECL_STATIC_CONSTRUCTOR (decl) = 0;
+
+  if (DECL_COMDAT_GROUP (default_decl)
+      || TREE_PUBLIC (default_decl))
+    {
+      /* In this case, each translation unit with a call to this
+	 versioned function will put out a resolver.  Ensure it
+	 is comdat to keep just one copy.  */
+      DECL_COMDAT (decl) = 1;
+      make_decl_one_only (decl, DECL_ASSEMBLER_NAME (decl));
+    }
+  else
+    TREE_PUBLIC (ifunc_alias_decl) = 0;
+
+  /* Build result decl and add to function_decl.  */
+  t = build_decl (UNKNOWN_LOCATION, RESULT_DECL, NULL_TREE, ptr_type_node);
+  DECL_CONTEXT (t) = decl;
+  DECL_ARTIFICIAL (t) = 1;
+  DECL_IGNORED_P (t) = 1;
+  DECL_RESULT (decl) = t;
+
+  /* Build parameter decls and add to function_decl.  */
+  tree arg1 = build_decl (UNKNOWN_LOCATION, PARM_DECL,
+			  get_identifier ("hwcap"),
+			  uint64_type_node);
+  tree arg2 = build_decl (UNKNOWN_LOCATION, PARM_DECL,
+			  get_identifier ("hwprobe_func"),
+			  ptr_type_node);
+  DECL_CONTEXT (arg1) = decl;
+  DECL_CONTEXT (arg2) = decl;
+  DECL_ARTIFICIAL (arg1) = 1;
+  DECL_ARTIFICIAL (arg2) = 1;
+  DECL_IGNORED_P (arg1) = 1;
+  DECL_IGNORED_P (arg2) = 1;
+  DECL_ARG_TYPE (arg1) = uint64_type_node;
+  DECL_ARG_TYPE (arg2) = ptr_type_node;
+  DECL_ARGUMENTS (decl) = arg1;
+  TREE_CHAIN (arg1) = arg2;
+
+  gimplify_function_tree (decl);
+  push_cfun (DECL_STRUCT_FUNCTION (decl));
+  *empty_bb = init_lowered_empty_function (decl, false,
+					   profile_count::uninitialized ());
+
+  cgraph_node::add_new_function (decl, true);
+  symtab->call_cgraph_insertion_hooks (cgraph_node::get_create (decl));
+
+  pop_cfun ();
+
+  gcc_assert (ifunc_alias_decl != NULL);
+  /* Mark ifunc_alias_decl as "ifunc" with resolver as resolver_name.  */
+  DECL_ATTRIBUTES (ifunc_alias_decl)
+    = make_attribute ("ifunc", resolver_name,
+		      DECL_ATTRIBUTES (ifunc_alias_decl));
+
+  /* Create the alias for dispatch to resolver here.  */
+  cgraph_node::create_same_body_alias (ifunc_alias_decl, decl);
+  return decl;
+}
+
+/* Implement TARGET_GENERATE_VERSION_DISPATCHER_BODY.  */
+
+tree
+riscv_generate_version_dispatcher_body (void *node_p)
+{
+  tree resolver_decl;
+  basic_block empty_bb;
+  tree default_ver_decl;
+  struct cgraph_node *versn;
+  struct cgraph_node *node;
+
+  struct cgraph_function_version_info *node_version_info = NULL;
+  struct cgraph_function_version_info *versn_info = NULL;
+
+  node = (cgraph_node *)node_p;
+
+  node_version_info = node->function_version ();
+  gcc_assert (node->dispatcher_function
+	      && node_version_info != NULL);
+
+  if (node_version_info->dispatcher_resolver)
+    return node_version_info->dispatcher_resolver;
+
+  /* The first version in the chain corresponds to the default version.  */
+  default_ver_decl = node_version_info->next->this_node->decl;
+
+  /* node is going to be an alias, so remove the finalized bit.  */
+  node->definition = false;
+
+  resolver_decl = make_resolver_func (default_ver_decl,
+				      node->decl, &empty_bb);
+
+  node_version_info->dispatcher_resolver = resolver_decl;
+
+  push_cfun (DECL_STRUCT_FUNCTION (resolver_decl));
+
+  auto_vec<tree, 2> fn_ver_vec;
+
+  for (versn_info = node_version_info->next; versn_info;
+       versn_info = versn_info->next)
+    {
+      versn = versn_info->this_node;
+      /* Check for virtual functions here again, as by this time it should
+	 have been determined if this function needs a vtable index or
+	 not.  This happens for methods in derived classes that override
+	 virtual methods in base classes but are not explicitly marked as
+	 virtual.  */
+      if (DECL_VINDEX (versn->decl))
+	sorry ("virtual function multiversioning not supported");
+
+      fn_ver_vec.safe_push (versn->decl);
+    }
+
+  dispatch_function_versions (resolver_decl, &fn_ver_vec, &empty_bb);
+  cgraph_edge::rebuild_edges ();
+  pop_cfun ();
+
+  /* Fix up symbol names.  First we need to obtain the base name, which may
+     have already been mangled.  */
+  tree base_name = get_suffixed_assembler_name (default_ver_decl, "");
+
+  /* We need to redo the version mangling on the non-default versions for the
+     target_clones case.  Redoing the mangling for the target_version case is
+     redundant but does no harm.  We need to skip the default version, because
+     expand_clones will append ".default" later; fortunately that suffix is the
+     one we want anyway.  */
+  for (versn_info = node_version_info->next->next; versn_info;
+       versn_info = versn_info->next)
+    {
+      tree version_decl = versn_info->this_node->decl;
+      tree name = riscv_mangle_decl_assembler_name (version_decl,
+						    base_name);
+      symtab->change_decl_assembler_name (version_decl, name);
+    }
+
+  /* We also need to use the base name for the ifunc declaration.  */
+  symtab->change_decl_assembler_name (node->decl, base_name);
+
+  return resolver_decl;
+}
+
+/* Make a dispatcher declaration for the multi-versioned function DECL.
+   Calls to DECL function will be replaced with calls to the dispatcher
+   by the front-end.  Returns the decl of the dispatcher function.  */
+
+tree
+riscv_get_function_versions_dispatcher (void *decl)
+{
+  tree fn = (tree) decl;
+  struct cgraph_node *node = NULL;
+  struct cgraph_node *default_node = NULL;
+  struct cgraph_function_version_info *node_v = NULL;
+
+  tree dispatch_decl = NULL;
+
+  struct cgraph_function_version_info *default_version_info = NULL;
+
+  gcc_assert (fn != NULL && DECL_FUNCTION_VERSIONED (fn));
+
+  node = cgraph_node::get (fn);
+  gcc_assert (node != NULL);
+
+  node_v = node->function_version ();
+  gcc_assert (node_v != NULL);
+
+  if (node_v->dispatcher_resolver != NULL)
+    return node_v->dispatcher_resolver;
+
+  /* The default node is always the beginning of the chain.  */
+  default_version_info = node_v;
+  while (default_version_info->prev)
+    default_version_info = default_version_info->prev;
+  default_node = default_version_info->this_node;
+
+  /* If there is no default node, just return NULL.  */
+  if (!is_function_default_version (default_node->decl))
+    return NULL;
+
+  if (targetm.has_ifunc_p ())
+    {
+      struct cgraph_function_version_info *it_v = NULL;
+      struct cgraph_node *dispatcher_node = NULL;
+      struct cgraph_function_version_info *dispatcher_version_info = NULL;
+
+      /* Right now, the dispatching is done via ifunc.  */
+      dispatch_decl = make_dispatcher_decl (default_node->decl);
+      TREE_NOTHROW (dispatch_decl) = TREE_NOTHROW (fn);
+
+      dispatcher_node = cgraph_node::get_create (dispatch_decl);
+      gcc_assert (dispatcher_node != NULL);
+      dispatcher_node->dispatcher_function = 1;
+      dispatcher_version_info
+	= dispatcher_node->insert_new_function_version ();
+      dispatcher_version_info->next = default_version_info;
+      dispatcher_node->definition = 1;
+
+      /* Set the dispatcher for all the versions.  */
+      it_v = default_version_info;
+      while (it_v != NULL)
+	{
+	  it_v->dispatcher_resolver = dispatch_decl;
+	  it_v = it_v->next;
+	}
+    }
+  else
+    {
+      error_at (DECL_SOURCE_LOCATION (default_node->decl),
+		"multiversioning needs %<ifunc%> which is not supported "
+		"on this target");
+    }
+
+  return dispatch_decl;
+}
+
 /* On riscv we have an ABI defined safe buffer.  This constant is used to
    determining the probe offset for alloca.  */
 
@@ -12047,6 +14655,700 @@ riscv_stack_clash_protection_alloca_probe_range (void)
   return STACK_CLASH_CALLER_GUARD;
 }
 
+static bool
+riscv_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
+				      unsigned alignment,
+				      enum by_pieces_operation op, bool speed_p)
+{
+  /* For set/clear with size > UNITS_PER_WORD, by pieces uses vector broadcasts
+     with UNITS_PER_WORD size pieces.  Use setmem<mode> instead which can use
+     bigger chunks.  */
+  if (TARGET_VECTOR && stringop_strategy & STRATEGY_VECTOR
+      && (op == CLEAR_BY_PIECES || op == SET_BY_PIECES)
+      && speed_p && size > UNITS_PER_WORD)
+    return false;
+
+  return default_use_by_pieces_infrastructure_p (size, alignment, op, speed_p);
+}
+
+/* Generate instruction sequence
+   which reflects the value of the OP using bswap and brev8 instructions.
+   OP's mode may be less than word_mode, to get the correct number,
+   after reflecting we shift right the value by SHIFT_VAL.
+   E.g. we have 1111 0001, after reflection (target 32-bit) we will get
+   1000 1111 0000 0000, if we shift-out 16 bits,
+   we will get the desired one: 1000 1111.  */
+
+void
+generate_reflecting_code_using_brev (rtx *op_p)
+{
+  rtx op = *op_p;
+  machine_mode op_mode = GET_MODE (op);
+
+  /* OP may be smaller than a word.  We can use a paradoxical subreg
+     to compensate for that.  It should never be larger than a word
+     for RISC-V.  */
+  gcc_assert (op_mode <= word_mode);
+  if (op_mode != word_mode)
+    op = gen_lowpart (word_mode, op);
+
+  HOST_WIDE_INT shift_val = (BITS_PER_WORD
+			     - GET_MODE_BITSIZE (op_mode).to_constant ());
+  riscv_expand_op (BSWAP, word_mode, op, op, op);
+  riscv_expand_op (LSHIFTRT, word_mode, op, op,
+		   gen_int_mode (shift_val, word_mode));
+  if (TARGET_64BIT)
+    emit_insn (gen_riscv_brev8_di (op, op));
+  else
+    emit_insn (gen_riscv_brev8_si (op, op));
+}
+
+
+/* Generate assembly to calculate CRC using clmul instruction.
+   The following code will be generated when the CRC and data sizes are equal:
+     li      a4,quotient
+     li      a5,polynomial
+     xor     a0,a1,a0
+     clmul   a0,a0,a4
+     srli    a0,a0,crc_size
+     clmul   a0,a0,a5
+     slli    a0,a0,word_mode_size - crc_size
+     srli    a0,a0,word_mode_size - crc_size
+     ret
+   crc_size may be 8, 16, 32.
+   Some instructions will be added for the cases when CRC's size is larger than
+   data's size.
+   OPERANDS[1] is input CRC,
+   OPERANDS[2] is data (message),
+   OPERANDS[3] is the polynomial without the leading 1.  */
+
+void
+expand_crc_using_clmul (scalar_mode crc_mode, scalar_mode data_mode,
+			rtx *operands)
+{
+  /* Check and keep arguments.  */
+  gcc_assert (!CONST_INT_P (operands[0]));
+  gcc_assert (CONST_INT_P (operands[3]));
+  unsigned short crc_size = GET_MODE_BITSIZE (crc_mode);
+  gcc_assert (crc_size <= 32);
+  unsigned short data_size = GET_MODE_BITSIZE (data_mode);
+
+  /* Calculate the quotient.  */
+  unsigned HOST_WIDE_INT
+      q = gf2n_poly_long_div_quotient (UINTVAL (operands[3]), crc_size);
+
+  rtx crc_extended = gen_rtx_ZERO_EXTEND (word_mode, operands[1]);
+  rtx crc = gen_reg_rtx (word_mode);
+  if (crc_size > data_size)
+    riscv_expand_op (LSHIFTRT, word_mode, crc, crc_extended,
+		     gen_int_mode (crc_size - data_size, word_mode));
+  else
+    crc = gen_rtx_ZERO_EXTEND (word_mode, operands[1]);
+  rtx t0 = gen_reg_rtx (word_mode);
+  riscv_emit_move (t0, gen_int_mode (q, word_mode));
+  rtx t1 = gen_reg_rtx (word_mode);
+  riscv_emit_move (t1, operands[3]);
+
+  rtx a0 = gen_reg_rtx (word_mode);
+  rtx data = gen_rtx_ZERO_EXTEND (word_mode, operands[2]);
+  riscv_expand_op (XOR, word_mode, a0, crc, data);
+
+  if (TARGET_ZBKC || TARGET_ZBC)
+    {
+      if (TARGET_64BIT)
+	emit_insn (gen_riscv_clmul_di (a0, a0, t0));
+      else
+	emit_insn (gen_riscv_clmul_si (a0, a0, t0));
+
+      riscv_expand_op (LSHIFTRT, word_mode, a0, a0,
+		       gen_int_mode (crc_size, word_mode));
+      if (TARGET_64BIT)
+	emit_insn (gen_riscv_clmul_di (a0, a0, t1));
+      else
+	emit_insn (gen_riscv_clmul_si (a0, a0, t1));
+    }
+  else
+    {
+      machine_mode vmode;
+      if (!riscv_vector::get_vector_mode (DImode, 1).exists (&vmode))
+	gcc_unreachable ();
+
+      rtx vec = gen_reg_rtx (vmode);
+
+      insn_code icode1 = code_for_pred_broadcast (vmode);
+      rtx ops1[] = {vec, a0};
+      emit_nonvlmax_insn (icode1, UNARY_OP, ops1, CONST1_RTX (Pmode));
+
+      rtx rvv1di_reg = gen_rtx_SUBREG (RVVM1DImode, vec, 0);
+      insn_code icode2 = code_for_pred_vclmul_scalar (UNSPEC_VCLMUL,
+						      E_RVVM1DImode);
+      rtx ops2[] = {rvv1di_reg, rvv1di_reg, t0};
+      emit_nonvlmax_insn (icode2, riscv_vector::BINARY_OP, ops2, CONST1_RTX
+			  (Pmode));
+
+      rtx shift_amount = gen_int_mode (data_size, Pmode);
+      insn_code icode3 = code_for_pred_scalar (LSHIFTRT, vmode);
+      rtx ops3[] = {vec, vec, shift_amount};
+      emit_nonvlmax_insn (icode3, BINARY_OP, ops3, CONST1_RTX (Pmode));
+
+      insn_code icode4 = code_for_pred_vclmul_scalar (UNSPEC_VCLMULH,
+						      E_RVVM1DImode);
+      rtx ops4[] = {rvv1di_reg, rvv1di_reg, t1};
+      emit_nonvlmax_insn (icode4, riscv_vector::BINARY_OP, ops4, CONST1_RTX
+			  (Pmode));
+
+      rtx vec_low_lane = gen_lowpart (DImode, vec);
+      riscv_emit_move (a0, vec_low_lane);
+    }
+
+  if (crc_size > data_size)
+    {
+      rtx crc_part = gen_reg_rtx (word_mode);
+      riscv_expand_op (ASHIFT, word_mode, crc_part, operands[1],
+		       gen_int_mode (data_size, word_mode));
+      riscv_expand_op (XOR, word_mode, a0, a0, crc_part);
+    }
+  riscv_emit_move (operands[0], gen_lowpart (crc_mode, a0));
+}
+
+/* Generate assembly to calculate reversed CRC using clmul instruction.
+   OPERANDS[1] is input CRC,
+   OPERANDS[2] is data (message),
+   OPERANDS[3] is the polynomial without the leading 1.  */
+
+void
+expand_reversed_crc_using_clmul (scalar_mode crc_mode, scalar_mode data_mode,
+				 rtx *operands)
+{
+  /* Check and keep arguments.  */
+  gcc_assert (!CONST_INT_P (operands[0]));
+  gcc_assert (CONST_INT_P (operands[3]));
+  unsigned short crc_size = GET_MODE_BITSIZE (crc_mode);
+  gcc_assert (crc_size <= 32);
+  unsigned short data_size = GET_MODE_BITSIZE (data_mode);
+  rtx polynomial = operands[3];
+
+  /* Calculate the quotient.  */
+  unsigned HOST_WIDE_INT
+  q = gf2n_poly_long_div_quotient (UINTVAL (polynomial), crc_size);
+  /* Reflect the calculated quotient.  */
+  q = reflect_hwi (q, crc_size + 1);
+  rtx t0 = gen_reg_rtx (word_mode);
+  riscv_emit_move (t0, gen_int_mode (q, word_mode));
+
+  /* Reflect the polynomial.  */
+  unsigned HOST_WIDE_INT
+  ref_polynomial = reflect_hwi (UINTVAL (polynomial),
+				crc_size);
+  rtx t1 = gen_reg_rtx (word_mode);
+  riscv_emit_move (t1, gen_int_mode (ref_polynomial << 1, word_mode));
+
+  rtx crc = gen_rtx_ZERO_EXTEND (word_mode, operands[1]);
+  rtx data = gen_rtx_ZERO_EXTEND (word_mode, operands[2]);
+  rtx a0 = gen_reg_rtx (word_mode);
+  riscv_expand_op (XOR, word_mode, a0, crc, data);
+
+  if (TARGET_ZBKC || TARGET_ZBC)
+    {
+      if (TARGET_64BIT)
+	emit_insn (gen_riscv_clmul_di (a0, a0, t0));
+      else
+	emit_insn (gen_riscv_clmul_si (a0, a0, t0));
+
+      rtx num_shift = gen_int_mode (BITS_PER_WORD - data_size, word_mode);
+      riscv_expand_op (ASHIFT, word_mode, a0, a0, num_shift);
+
+      if (TARGET_64BIT)
+	emit_insn (gen_riscv_clmulh_di (a0, a0, t1));
+      else
+	emit_insn (gen_riscv_clmulh_si (a0, a0, t1));
+    }
+  else
+    {
+      machine_mode vmode;
+      if (!riscv_vector::get_vector_mode (DImode, 1).exists (&vmode))
+	gcc_unreachable ();
+
+      rtx vec = gen_reg_rtx (vmode);
+      insn_code icode1 = code_for_pred_broadcast (vmode);
+      rtx ops1[] = {vec, a0};
+      emit_nonvlmax_insn (icode1, UNARY_OP, ops1, CONST1_RTX (Pmode));
+
+      rtx rvv1di_reg = gen_rtx_SUBREG (RVVM1DImode, vec, 0);
+      insn_code icode2 = code_for_pred_vclmul_scalar (UNSPEC_VCLMUL,
+						      E_RVVM1DImode);
+      rtx ops2[] = {rvv1di_reg, rvv1di_reg, t0};
+      emit_nonvlmax_insn (icode2, riscv_vector::BINARY_OP, ops2, CONST1_RTX
+			  (Pmode));
+
+      rtx shift_amount = gen_int_mode (BITS_PER_WORD - data_size, Pmode);
+      insn_code icode3 = code_for_pred_scalar (ASHIFT, vmode);
+      rtx ops3[] = {vec, vec, shift_amount};
+      emit_nonvlmax_insn (icode3, BINARY_OP, ops3, CONST1_RTX (Pmode));
+
+      insn_code icode4 = code_for_pred_vclmul_scalar (UNSPEC_VCLMULH,
+						      E_RVVM1DImode);
+      rtx ops4[] = {rvv1di_reg, rvv1di_reg, t1};
+      emit_nonvlmax_insn (icode4, riscv_vector::BINARY_OP, ops4, CONST1_RTX
+			  (Pmode));
+
+      rtx vec_low_lane = gen_lowpart (DImode, vec);
+      riscv_emit_move (a0, vec_low_lane);
+    }
+
+  if (crc_size > data_size)
+    {
+      rtx data_size_shift = gen_int_mode (data_size, word_mode);
+      rtx crc_part = gen_reg_rtx (word_mode);
+      riscv_expand_op (LSHIFTRT, word_mode, crc_part, crc, data_size_shift);
+      riscv_expand_op (XOR, word_mode, a0, a0, crc_part);
+    }
+
+  riscv_emit_move (operands[0], gen_lowpart (crc_mode, a0));
+}
+
+bool is_zicfiss_p ()
+{
+  if (TARGET_ZICFISS && (flag_cf_protection & CF_RETURN))
+    return true;
+
+  return false;
+}
+
+bool is_zicfilp_p ()
+{
+  if (TARGET_ZICFILP && (flag_cf_protection & CF_BRANCH))
+    return true;
+
+  return false;
+}
+
+bool need_shadow_stack_push_pop_p ()
+{
+  return is_zicfiss_p () && riscv_save_return_addr_reg_p ();
+}
+
+/* Synthesize OPERANDS[0] = OPERANDS[1] CODE OPERANDS[2].
+
+    OPERANDS[0] and OPERANDS[1] will be a REG and may be the same
+    REG.
+
+    OPERANDS[2] is a CONST_INT.
+
+    CODE is IOR or XOR.
+
+    Return TRUE if the operation was fully synthesized and the caller
+    need not generate additional code.  Return FALSE if the operation
+    was not synthesized and the caller is responsible for emitting the
+    proper sequence.  */
+
+bool
+synthesize_ior_xor (rtx_code code, rtx operands[3])
+{
+  /* Trivial cases that don't need synthesis.  */
+  if (SMALL_OPERAND (INTVAL (operands[2]))
+     || ((TARGET_ZBS || TARGET_ZBKB)
+	 && single_bit_mask_operand (operands[2], word_mode)))
+    return false;
+
+  /* The number of instructions to synthesize the constant is a good
+     estimate of the budget.  That does not account for out of order
+     execution an fusion in the constant synthesis those would naturally
+     decrease the budget.  It also does not account for the IOR/XOR at
+     the end of the sequence which would increase the budget.  */
+  int budget = (TARGET_ZBS ? riscv_const_insns (operands[2], true) : -1);
+  int original_budget = budget;
+
+  /* Bits we need to set in operands[0].  As we synthesize the operation,
+     we clear bits in IVAL.  Once IVAL is zero, then synthesis of the
+     operation is complete.  */
+  unsigned HOST_WIDE_INT ival = INTVAL (operands[2]);
+
+  /* Check if we want to use [x]ori. Then get the remaining bits
+     and decrease the budget by one. */
+  if ((ival & HOST_WIDE_INT_UC (0x7ff)) != 0)
+    {
+      ival &= ~HOST_WIDE_INT_UC (0x7ff);
+      budget--;
+    }
+
+  /* Check for bseti cases. For each remaining bit in ival,
+     decrease the budget by one. */
+  while (ival)
+    {
+      HOST_WIDE_INT tmpval = HOST_WIDE_INT_UC (1) << ctz_hwi (ival);
+      ival &= ~tmpval;
+      budget--;
+    }
+
+  /* If we're flipping all but a small number of bits we can pre-flip
+     the outliers, then flip all the bits, which would restore those
+     bits that were pre-flipped. */
+  if ((TARGET_ZBS || TARGET_ZBKB)
+      && budget < 0
+      && code == XOR
+      && popcount_hwi (~INTVAL (operands[2])) < original_budget)
+    {
+      /* Pre-flipping bits we want to preserve.  */
+      rtx input = operands[1];
+      rtx output = NULL_RTX;
+      ival = ~INTVAL (operands[2]);
+      while (ival)
+	{
+	  HOST_WIDE_INT tmpval = HOST_WIDE_INT_UC (1) << ctz_hwi (ival);
+	  rtx x = GEN_INT (tmpval);
+	  x = gen_rtx_XOR (word_mode, input, x);
+	  output = gen_reg_rtx (word_mode);
+	  emit_insn (gen_rtx_SET (output, x));
+	  input = output;
+	  ival &= ~tmpval;
+	}
+
+      gcc_assert (output);
+
+      /* Now flip all the bits, which restores the bits we were
+	 preserving.  */
+      rtx x = gen_rtx_NOT (word_mode, input);
+      emit_insn (gen_rtx_SET (operands[0], x));
+      return true;
+    }
+
+  /* One more approach we can try.  If our budget is 3+ instructions,
+     then we can try to rotate the source so that the bits we want to
+     set are in the low 11 bits.  We then use [x]ori to set those low
+     bits, then rotate things back into their proper place.  */
+  if ((TARGET_ZBB || TARGET_XTHEADBB || TARGET_ZBKB)
+      && budget < 0
+      && popcount_hwi (INTVAL (operands[2])) <= 11
+      && riscv_const_insns (operands[2], true) >= 3)
+    {
+      ival = INTVAL (operands[2]);
+      /* First see if the constant trivially fits into 11 bits in the LSB.  */
+      int lsb = ctz_hwi (ival);
+      int msb = BITS_PER_WORD - 1 - clz_hwi (ival);
+      if (msb - lsb + 1 <= 11)
+	{
+	  rtx output = gen_reg_rtx (word_mode);
+	  rtx input = operands[1];
+
+	  /* Rotate the source right by LSB bits.  */
+	  rtx x = GEN_INT (lsb);
+	  x = gen_rtx_ROTATERT (word_mode, input, x);
+	  emit_insn (gen_rtx_SET (output, x));
+	  input = output;
+
+	  /* Shift the constant right by LSB bits.  */
+	  x = GEN_INT (ival >> lsb);
+
+	  /* Perform the IOR/XOR operation.  */
+	  x = gen_rtx_fmt_ee (code, word_mode, input, x);
+	  output = gen_reg_rtx (word_mode);
+	  emit_insn (gen_rtx_SET (output, x));
+	  input = output;
+
+	  /* And rotate left to put everything back in place, we don't
+	     have rotate left by a constant, so use rotate right by
+	     an adjusted constant.  */
+	  x = GEN_INT (BITS_PER_WORD - lsb);
+	  x = gen_rtx_ROTATERT (word_mode, input, x);
+	  emit_insn (gen_rtx_SET (operands[0], x));
+	  return true;
+	}
+
+      /* Maybe the bits are split between the high and low parts
+	 of the constant.  A bit more complex, but still manageable.
+
+	 Conceptually we want to rotate left the constant by the number
+	 of leading zeros after masking off all but the low 11 bits.  */
+      int rotcount = clz_hwi (ival & 0x7ff) - (BITS_PER_WORD - 11);
+
+      /* Rotate the constant left by MSB bits.  */
+      ival = (ival << rotcount) | (ival >> (BITS_PER_WORD - rotcount));
+
+      /* Now we can do the same tests as before. */
+      lsb = ctz_hwi (ival);
+      msb = BITS_PER_WORD - clz_hwi (ival);
+      if ((INTVAL (operands[2]) & HOST_WIDE_INT_UC (0x7ff)) != 0
+	  && msb - lsb + 1 <= 11)
+	{
+	  rtx output = gen_reg_rtx (word_mode);
+	  rtx input = operands[1];
+
+	  /* Rotate the source left by ROTCOUNT bits, we don't have
+	     rotate left by a constant, so use rotate right by an
+	     adjusted constant.  */
+	  rtx x = GEN_INT (BITS_PER_WORD - rotcount);
+	  x = gen_rtx_ROTATERT (word_mode, input, x);
+	  emit_insn (gen_rtx_SET (output, x));
+	  input = output;
+
+	  /* We've already rotated the constant.  So perform the IOR/XOR
+	     operation.  */
+	  x = GEN_INT (ival);
+	  x = gen_rtx_fmt_ee (code, word_mode, input, x);
+	  output = gen_reg_rtx (word_mode);
+	  emit_insn (gen_rtx_SET (output, x));
+	  input = output;
+
+	  /* And rotate right to put everything into its proper place.  */
+	  x = GEN_INT (rotcount);
+	  x = gen_rtx_ROTATERT (word_mode, input, x);
+	  emit_insn (gen_rtx_SET (operands[0], x));
+	  return true;
+	}
+    }
+
+  /* If after accounting for bseti the remaining budget has
+     gone to less than zero, it forces the value into a
+     register and performs the IOR operation.  It returns
+     TRUE to the caller so the caller knows code generation
+     is complete. */
+  if (budget < 0)
+    {
+      rtx x = force_reg (word_mode, operands[2]);
+      x = gen_rtx_fmt_ee (code, word_mode, operands[1], x);
+      emit_insn (gen_rtx_SET (operands[0], x));
+      return true;
+    }
+
+  /* Synthesis is better than loading the constant.  */
+  ival = INTVAL (operands[2]);
+  rtx input = operands[1];
+  rtx output = NULL_RTX;
+
+  /* Emit the [x]ori insn that sets the low 11 bits into
+     the proper state.  */
+  if ((ival & HOST_WIDE_INT_UC (0x7ff)) != 0)
+    {
+      rtx x = GEN_INT (ival & HOST_WIDE_INT_UC (0x7ff));
+      x = gen_rtx_fmt_ee (code, word_mode, input, x);
+      output = gen_reg_rtx (word_mode);
+      emit_insn (gen_rtx_SET (output, x));
+      input = output;
+      ival &= ~HOST_WIDE_INT_UC (0x7ff);
+    }
+
+  /* We figure out a single bit as a constant and
+     generate a CONST_INT node for that.  Then we
+     construct the IOR node, then the SET node and
+     emit it.  An IOR with a suitable constant that is
+     a single bit will be implemented with a bseti. */
+  while (ival)
+    {
+      HOST_WIDE_INT tmpval = HOST_WIDE_INT_UC (1) << ctz_hwi (ival);
+      rtx x = GEN_INT (tmpval);
+      x = gen_rtx_fmt_ee (code, word_mode, input, x);
+      output = gen_reg_rtx (word_mode);
+      emit_insn (gen_rtx_SET (output, x));
+      input = output;
+      ival &= ~tmpval;
+    }
+
+  gcc_assert (output);
+  emit_move_insn (operands[0], output);
+  return true;
+}
+
+/* Synthesize OPERANDS[0] = OPERANDS[1] & OPERANDS[2].
+
+    OPERANDS[0] and OPERANDS[1] will be a REG and may be the same
+    REG.
+
+    OPERANDS[2] is a CONST_INT.
+
+    Return TRUE if the operation was fully synthesized and the caller
+    need not generate additional code.  Return FALSE if the operation
+    was not synthesized and the caller is responsible for emitting the
+    proper sequence.  */
+
+bool
+synthesize_and (rtx operands[3])
+{
+  /* Trivial cases that don't need synthesis.  */
+  if (SMALL_OPERAND (INTVAL (operands[2]))
+     || (TARGET_ZBS && not_single_bit_mask_operand (operands[2], word_mode)))
+    return false;
+
+  /* If the second operand is a mode mask, emit an extension
+     insn instead.  */
+  if (CONST_INT_P (operands[2]))
+    {
+      enum machine_mode tmode = VOIDmode;
+      if (UINTVAL (operands[2]) == GET_MODE_MASK (HImode))
+	tmode = HImode;
+      else if (UINTVAL (operands[2]) == GET_MODE_MASK (SImode))
+	tmode = SImode;
+
+      if (tmode != VOIDmode)
+	{
+	  rtx tmp = gen_lowpart (tmode, operands[1]);
+	  emit_insn (gen_extend_insn (operands[0], tmp, word_mode, tmode, 1));
+	  return true;
+	}
+    }
+
+  /* The number of instructions to synthesize the constant is a good
+     estimate of the budget.  That does not account for out of order
+     execution an fusion in the constant synthesis those would naturally
+     decrease the budget.  It also does not account for the AND at
+     the end of the sequence which would increase the budget. */
+  int budget = riscv_const_insns (operands[2], true);
+  rtx input = NULL_RTX;
+  rtx output = NULL_RTX;
+
+  /* Left shift + right shift to clear high bits.  */
+  if (budget >= 2 && p2m1_shift_operand (operands[2], word_mode))
+    {
+      int count = (GET_MODE_BITSIZE (GET_MODE (operands[1])).to_constant ()
+		   - exact_log2 (INTVAL (operands[2]) + 1));
+      rtx x = gen_rtx_ASHIFT (word_mode, operands[1], GEN_INT (count));
+      output = gen_reg_rtx (word_mode);
+      emit_insn (gen_rtx_SET (output, x));
+      input = output;
+      x = gen_rtx_LSHIFTRT (word_mode, input, GEN_INT (count));
+      emit_insn (gen_rtx_SET (operands[0], x));
+      return true;
+    }
+
+  /* Clears a bunch of low bits with only high bits set.  */
+  unsigned HOST_WIDE_INT t = ~INTVAL (operands[2]);
+  if (budget >= 2 && exact_log2 (t + 1) >= 0)
+    {
+      int count = ctz_hwi (INTVAL (operands[2]));
+      rtx x = gen_rtx_LSHIFTRT (word_mode, operands[1], GEN_INT (count));
+      output = gen_reg_rtx (word_mode);
+      emit_insn (gen_rtx_SET (output, x));
+      input = output;
+      x = gen_rtx_ASHIFT (word_mode, input, GEN_INT (count));
+      emit_insn (gen_rtx_SET (operands[0], x));
+      return true;
+    }
+
+  /* If we shift right to eliminate the trailing zeros and
+     the result is a SMALL_OPERAND, then it's a shift right,
+     andi and shift left.  */
+  t = INTVAL (operands[2]);
+  t >>= ctz_hwi (t);
+  if (budget >= 3 && SMALL_OPERAND (t) && popcount_hwi (t) > 2)
+    {
+      /* Shift right to clear the low order bits.  */
+      unsigned HOST_WIDE_INT count = ctz_hwi (INTVAL (operands[2]));
+      rtx x = gen_rtx_LSHIFTRT (word_mode, operands[1], GEN_INT (count));
+      output = gen_reg_rtx (word_mode);
+      emit_insn (gen_rtx_SET (output, x));
+      input = output;
+
+      /* Now emit the ANDI.  */
+      unsigned HOST_WIDE_INT mask = INTVAL (operands[2]);
+      mask >>= ctz_hwi (mask);
+      x = gen_rtx_AND (word_mode, input, GEN_INT (mask));
+      output = gen_reg_rtx (word_mode);
+      emit_insn (gen_rtx_SET (output, x));
+      input = output;
+
+      /* Shift left to move bits into position.  */
+      count = INTVAL (operands[2]);
+      count = ctz_hwi (count);
+      x = gen_rtx_ASHIFT (word_mode, input, GEN_INT (count));
+      emit_insn (gen_rtx_SET (operands[0], x));
+      return true;
+    }
+
+  /* If there are all zeros, except for a run of 1s somewhere in the middle
+     of the constant, then this is at worst 3 shifts.  */
+  t = INTVAL (operands[2]);
+  if (budget >= 3
+      && consecutive_bits_operand (GEN_INT (t), word_mode)
+      && popcount_hwi (t) > 3)
+    {
+      /* Shift right to clear the low order bits.  */
+      int count = ctz_hwi (INTVAL (operands[2]));
+      rtx x = gen_rtx_LSHIFTRT (word_mode, operands[1], GEN_INT (count));
+      output = gen_reg_rtx (word_mode);
+      emit_insn (gen_rtx_SET (output, x));
+      input = output;
+
+      /* Shift left to clear the high order bits.  */
+      count += clz_hwi (INTVAL (operands[2])) % BITS_PER_WORD;
+      x = gen_rtx_ASHIFT (word_mode, input, GEN_INT (count));
+      output = gen_reg_rtx (word_mode);
+      emit_insn (gen_rtx_SET (output, x));
+      input = output;
+
+      /* And shift back right to put the bits into position.  */
+      count = clz_hwi (INTVAL (operands[2])) % BITS_PER_WORD;
+      x = gen_rtx_LSHIFTRT (word_mode, input, GEN_INT (count));
+      emit_insn (gen_rtx_SET (operands[0], x));
+      return true;
+    }
+
+  /* The special cases didn't apply.  It's entirely possible we may
+     want to combine some of the ideas above with bclr, but for now
+     those are deferred until we see them popping up in practice.  */
+
+  unsigned HOST_WIDE_INT ival = ~INTVAL (operands[2]);
+
+  /* Clear as many bits using andi as we can.  */
+  if ((ival & HOST_WIDE_INT_UC (0x7ff)) != 0x0)
+    {
+      ival &= ~HOST_WIDE_INT_UC (0x7ff);
+      budget--;
+    }
+
+  /* And handle remaining bits via bclr.  */
+  while (TARGET_ZBS && ival)
+    {
+      unsigned HOST_WIDE_INT tmpval = HOST_WIDE_INT_UC (1) << ctz_hwi (ival);
+      ival &= ~tmpval;
+      budget--;
+    }
+
+  /* If the remaining budget has gone to less than zero, it
+     forces the value into a register and performs the AND
+     operation.  It returns TRUE to the caller so the caller
+     knows code generation is complete.
+     FIXME: This is hacked to always be enabled until the last
+     patch in the series is enabled.  */
+  if (ival || budget < 0)
+    {
+      rtx x = force_reg (word_mode, operands[2]);
+      x = gen_rtx_AND (word_mode, operands[1], x);
+      emit_insn (gen_rtx_SET (operands[0], x));
+      return true;
+    }
+
+  /* Synthesis is better than loading the constant.  */
+  ival = ~INTVAL (operands[2]);
+  input = operands[1];
+
+  /* Clear any of the lower 11 bits we need.  */
+  if ((ival & HOST_WIDE_INT_UC (0x7ff)) != 0)
+    {
+      rtx x = GEN_INT (~(ival & HOST_WIDE_INT_UC (0x7ff)));
+      x = gen_rtx_AND (word_mode, input, x);
+      output = gen_reg_rtx (word_mode);
+      emit_insn (gen_rtx_SET (output, x));
+      input = output;
+      ival &= ~HOST_WIDE_INT_UC (0x7ff);
+    }
+
+  /* Clear the rest with bclr.  */
+  while (ival)
+    {
+      unsigned HOST_WIDE_INT tmpval = HOST_WIDE_INT_UC (1) << ctz_hwi (ival);
+      rtx x = GEN_INT (~tmpval);
+      x = gen_rtx_AND (word_mode, input, x);
+      output = gen_reg_rtx (word_mode);
+      emit_insn (gen_rtx_SET (output, x));
+      input = output;
+      ival &= ~tmpval;
+    }
+
+  emit_move_insn (operands[0], input);
+  return true;
+}
+
+
 /* Initialize the GCC target structure.  */
 #undef TARGET_ASM_ALIGNED_HI_OP
 #define TARGET_ASM_ALIGNED_HI_OP "\t.half\t"
@@ -12080,6 +15382,9 @@ riscv_stack_clash_protection_alloca_probe_range (void)
 #undef  TARGET_SCHED_ADJUST_COST
 #define TARGET_SCHED_ADJUST_COST riscv_sched_adjust_cost
 
+#undef TARGET_SCHED_CAN_SPECULATE_INSN
+#define TARGET_SCHED_CAN_SPECULATE_INSN riscv_sched_can_speculate_insn
+
 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
 #define TARGET_FUNCTION_OK_FOR_SIBCALL riscv_function_ok_for_sibcall
 
@@ -12107,7 +15412,7 @@ riscv_stack_clash_protection_alloca_probe_range (void)
 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
 #undef TARGET_ASM_FILE_END
-#define TARGET_ASM_FILE_END file_end_indicate_exec_stack
+#define TARGET_ASM_FILE_END riscv_file_end
 
 #undef TARGET_EXPAND_BUILTIN_VA_START
 #define TARGET_EXPAND_BUILTIN_VA_START riscv_va_start
@@ -12195,6 +15500,9 @@ riscv_stack_clash_protection_alloca_probe_range (void)
 #undef TARGET_LEGITIMATE_ADDRESS_P
 #define TARGET_LEGITIMATE_ADDRESS_P	riscv_legitimate_address_p
 
+#undef TARGET_CAN_INLINE_P
+#define TARGET_CAN_INLINE_P riscv_can_inline_p
+
 #undef TARGET_CAN_ELIMINATE
 #define TARGET_CAN_ELIMINATE riscv_can_eliminate
 
@@ -12313,6 +15621,9 @@ riscv_stack_clash_protection_alloca_probe_range (void)
 #undef TARGET_ASAN_SHADOW_OFFSET
 #define TARGET_ASAN_SHADOW_OFFSET riscv_asan_shadow_offset
 
+#undef TARGET_ASAN_DYNAMIC_SHADOW_OFFSET_P
+#define TARGET_ASAN_DYNAMIC_SHADOW_OFFSET_P riscv_asan_dynamic_shadow_offset_p
+
 #ifdef TARGET_BIG_ENDIAN_DEFAULT
 #undef  TARGET_DEFAULT_TARGET_FLAGS
 #define TARGET_DEFAULT_TARGET_FLAGS (MASK_BIG_ENDIAN)
@@ -12409,6 +15720,33 @@ riscv_stack_clash_protection_alloca_probe_range (void)
 #undef TARGET_C_MODE_FOR_FLOATING_TYPE
 #define TARGET_C_MODE_FOR_FLOATING_TYPE riscv_c_mode_for_floating_type
 
+#undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
+#define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P riscv_use_by_pieces_infrastructure_p
+
+#undef TARGET_OPTION_VALID_VERSION_ATTRIBUTE_P
+#define TARGET_OPTION_VALID_VERSION_ATTRIBUTE_P \
+  riscv_option_valid_version_attribute_p
+
+#undef TARGET_COMPARE_VERSION_PRIORITY
+#define TARGET_COMPARE_VERSION_PRIORITY riscv_compare_version_priority
+
+#undef TARGET_OPTION_FUNCTION_VERSIONS
+#define TARGET_OPTION_FUNCTION_VERSIONS riscv_common_function_versions
+
+#undef TARGET_MANGLE_DECL_ASSEMBLER_NAME
+#define TARGET_MANGLE_DECL_ASSEMBLER_NAME riscv_mangle_decl_assembler_name
+
+#undef TARGET_GENERATE_VERSION_DISPATCHER_BODY
+#define TARGET_GENERATE_VERSION_DISPATCHER_BODY \
+  riscv_generate_version_dispatcher_body
+
+#undef TARGET_GET_FUNCTION_VERSIONS_DISPATCHER
+#define TARGET_GET_FUNCTION_VERSIONS_DISPATCHER \
+  riscv_get_function_versions_dispatcher
+
+#undef TARGET_DOCUMENTATION_NAME
+#define TARGET_DOCUMENTATION_NAME "RISC-V"
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-riscv.h"
diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
index ead9786..45fa521 100644
--- a/gcc/config/riscv/riscv.h
+++ b/gcc/config/riscv/riscv.h
@@ -1,5 +1,5 @@
 /* Definition of RISC-V target for GNU compiler.
-   Copyright (C) 2011-2024 Free Software Foundation, Inc.
+   Copyright (C) 2011-2025 Free Software Foundation, Inc.
    Contributed by Andrew Waterman (andrew@sifive.com).
    Based on MIPS target for GNU compiler.
 
@@ -43,7 +43,7 @@ along with GCC; see the file COPYING3.  If not see
 #endif
 
 #ifndef RISCV_TUNE_STRING_DEFAULT
-#define RISCV_TUNE_STRING_DEFAULT "rocket"
+#define RISCV_TUNE_STRING_DEFAULT "generic"
 #endif
 
 extern const char *riscv_expand_arch (int argc, const char **argv);
@@ -119,8 +119,6 @@ ASM_MISA_SPEC
 "%{march=*:%:riscv_expand_arch(%*)} "				\
 "%{!march=*:%{mcpu=*:%:riscv_expand_arch_from_cpu(%*)}} "
 
-#define TARGET_DEFAULT_CMODEL CM_MEDLOW
-
 #define LOCAL_LABEL_PREFIX	"."
 #define USER_LABEL_PREFIX	""
 
@@ -192,7 +190,8 @@ ASM_MISA_SPEC
 #define PARM_BOUNDARY BITS_PER_WORD
 
 /* Allocation boundary (in *bits*) for the code of a function.  */
-#define FUNCTION_BOUNDARY ((TARGET_RVC || TARGET_ZCA) ? 16 : 32)
+#define FUNCTION_BOUNDARY \
+  (((TARGET_RVC || TARGET_ZCA) && !is_zicfilp_p ()) ? 16 : 32)
 
 /* The smallest supported stack boundary the calling convention supports.  */
 #define STACK_BOUNDARY \
@@ -316,7 +315,7 @@ ASM_MISA_SPEC
 
 #define FIRST_PSEUDO_REGISTER 128
 
-/* x0, sp, gp, and tp are fixed.  */
+/* x0, ra, sp, gp, and tp are fixed.  */
 
 #define FIXED_REGISTERS							\
 { /* General registers.  */						\
@@ -415,7 +414,8 @@ ASM_MISA_SPEC
 #define RISCV_DWARF_VLENB (4096 + 0xc22)
 
 /* Register in which static-chain is passed to a function.  */
-#define STATIC_CHAIN_REGNUM (GP_TEMP_FIRST + 2)
+#define STATIC_CHAIN_REGNUM \
+  ((is_zicfilp_p ()) ? (GP_TEMP_FIRST + 23) : (GP_TEMP_FIRST + 2))
 
 /* Registers used as temporaries in prologue/epilogue code.
 
@@ -438,6 +438,10 @@ ASM_MISA_SPEC
 #define RISCV_CALL_ADDRESS_TEMP(MODE) \
   gen_rtx_REG (MODE, RISCV_CALL_ADDRESS_TEMP_REGNUM)
 
+#define RISCV_CALL_ADDRESS_LPAD_REGNUM (GP_TEMP_FIRST + 2)
+#define RISCV_CALL_ADDRESS_LPAD(MODE) \
+  gen_rtx_REG (MODE, RISCV_CALL_ADDRESS_LPAD_REGNUM)
+
 #define RETURN_ADDR_MASK (1 << RETURN_ADDR_REGNUM)
 #define S0_MASK (1 << S0_REGNUM)
 #define S1_MASK (1 << S1_REGNUM)
@@ -506,8 +510,10 @@ enum reg_class
 {
   NO_REGS,			/* no registers in set */
   SIBCALL_REGS,			/* registers used by indirect sibcalls */
+  RVC_GR_REGS,			/* RVC general registers */
   JALR_REGS,			/* registers used by indirect calls */
   GR_REGS,			/* integer registers */
+  RVC_FP_REGS,			/* RVC floating-point registers */
   FP_REGS,			/* floating-point registers */
   FRAME_REGS,			/* arg pointer and frame pointer */
   VM_REGS,			/* v0.t registers */
@@ -529,8 +535,10 @@ enum reg_class
 {									\
   "NO_REGS",								\
   "SIBCALL_REGS",							\
+  "RVC_GR_REGS",							\
   "JALR_REGS",								\
   "GR_REGS",								\
+  "RVC_FP_REGS",							\
   "FP_REGS",								\
   "FRAME_REGS",								\
   "VM_REGS",								\
@@ -554,8 +562,10 @@ enum reg_class
 {									\
   { 0x00000000, 0x00000000, 0x00000000, 0x00000000 },	/* NO_REGS */		\
   { 0xf003fcc0, 0x00000000, 0x00000000, 0x00000000 },	/* SIBCALL_REGS */	\
+  { 0x0000ff00, 0x00000000, 0x00000000, 0x00000000 },	/* RVC_GR_REGS */	\
   { 0xffffffc0, 0x00000000, 0x00000000, 0x00000000 },	/* JALR_REGS */		\
   { 0xffffffff, 0x00000000, 0x00000000, 0x00000000 },	/* GR_REGS */		\
+  { 0x00000000, 0x0000ff00, 0x00000000, 0x00000000 },	/* RVC_FP_REGS */	\
   { 0x00000000, 0xffffffff, 0x00000000, 0x00000000 },	/* FP_REGS */		\
   { 0x00000000, 0x00000000, 0x00000003, 0x00000000 },	/* FRAME_REGS */	\
   { 0x00000000, 0x00000000, 0x00000000, 0x00000001 },	/* V0_REGS */		\
@@ -667,6 +677,18 @@ enum reg_class
 /* True if bit BIT is set in VALUE.  */
 #define BITSET_P(VALUE, BIT) (((VALUE) & (1ULL << (BIT))) != 0)
 
+/* Returns the smaller (common) number of trailing zeros for VAL1 and VAL2.  */
+#define COMMON_TRAILING_ZEROS(VAL1, VAL2)				\
+  (ctz_hwi (VAL1) < ctz_hwi (VAL2)					\
+   ? ctz_hwi (VAL1)							\
+   : ctz_hwi (VAL2))
+
+/* Returns true if both VAL1 and VAL2 are SMALL_OPERANDs after shifting by
+   the common number of trailing zeros.  */
+#define SMALL_AFTER_COMMON_TRAILING_SHIFT(VAL1, VAL2)			\
+  (SMALL_OPERAND ((VAL1) >> COMMON_TRAILING_ZEROS (VAL1, VAL2))		\
+   && SMALL_OPERAND ((VAL2) >> COMMON_TRAILING_ZEROS (VAL1, VAL2)))
+
 /* Stack layout; function entry, exit and calling.  */
 
 #define STACK_GROWS_DOWNWARD 1
@@ -805,7 +827,8 @@ extern enum riscv_cc get_riscv_cc (const rtx use);
 
 /* Trampolines are a block of code followed by two pointers.  */
 
-#define TRAMPOLINE_CODE_SIZE 16
+#define TRAMPOLINE_CODE_SIZE ((is_zicfilp_p ()) ? 24 : 16)
+
 #define TRAMPOLINE_SIZE		\
   ((Pmode == SImode)		\
    ? TRAMPOLINE_CODE_SIZE	\
@@ -865,7 +888,7 @@ extern enum riscv_cc get_riscv_cc (const rtx use);
 #define ASM_OUTPUT_OPCODE(STREAM, PTR)	\
   (PTR) = riscv_asm_output_opcode(STREAM, PTR)
 
-#define JUMP_TABLES_IN_TEXT_SECTION 0
+#define JUMP_TABLES_IN_TEXT_SECTION (riscv_cmodel == CM_LARGE)
 #define CASE_VECTOR_MODE SImode
 #define CASE_VECTOR_PC_RELATIVE (riscv_cmodel != CM_MEDLOW)
 
@@ -939,8 +962,6 @@ extern enum riscv_cc get_riscv_cc (const rtx use);
 #define TARGET_VECTOR_MISALIGN_SUPPORTED \
    riscv_vector_unaligned_access_p
 
-#define LOGICAL_OP_NON_SHORT_CIRCUIT 0
-
 /* Control the assembler format that we output.  */
 
 /* Output to assembler file text saying following lines
@@ -1175,6 +1196,9 @@ extern poly_int64 riscv_v_adjust_nunits (enum machine_mode, int);
 extern poly_int64 riscv_v_adjust_nunits (machine_mode, bool, int, int);
 extern poly_int64 riscv_v_adjust_precision (enum machine_mode, int);
 extern poly_int64 riscv_v_adjust_bytesize (enum machine_mode, int);
+extern bool is_zicfiss_p ();
+extern bool is_zicfilp_p ();
+extern bool need_shadow_stack_push_pop_p ();
 /* The number of bits and bytes in a RVV vector.  */
 #define BITS_PER_RISCV_VECTOR (poly_uint16 (riscv_vector_chunks * riscv_bytes_per_vector_chunk * 8))
 #define BYTES_PER_RISCV_VECTOR (poly_uint16 (riscv_vector_chunks * riscv_bytes_per_vector_chunk))
@@ -1288,4 +1312,11 @@ extern void riscv_remove_unneeded_save_restore_calls (void);
 		STACK_BOUNDARY / BITS_PER_UNIT)		   \
     : (crtl->outgoing_args_size + STACK_POINTER_OFFSET))
 
+/* According to the RISC-V C API, the arch string may contains ','. To avoid
+   the conflict with the default separator, we choose '#' as the separator for
+   the target attribute.  */
+#define TARGET_CLONES_ATTR_SEPARATOR '#'
+
+#define TARGET_HAS_FMV_TARGET_ATTRIBUTE 0
+
 #endif /* ! GCC_RISCV_H */
diff --git a/gcc/config/riscv/riscv.md b/gcc/config/riscv/riscv.md
index a94705a..c6661f5 100644
--- a/gcc/config/riscv/riscv.md
+++ b/gcc/config/riscv/riscv.md
@@ -1,5 +1,5 @@
 ;; Machine description for RISC-V for GNU compiler.
-;; Copyright (C) 2011-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2011-2025 Free Software Foundation, Inc.
 ;; Contributed by Andrew Waterman (andrew@sifive.com).
 ;; Based on MIPS target for GNU compiler.
 
@@ -56,6 +56,8 @@
   UNSPEC_FLT_QUIET
   UNSPEC_FLE_QUIET
   UNSPEC_COPYSIGN
+  UNSPEC_FMV_X_W
+  UNSPEC_FMVH_X_D
   UNSPEC_RINT
   UNSPEC_ROUND
   UNSPEC_FLOOR
@@ -93,6 +95,14 @@
   ;; XTheadFmv moves
   UNSPEC_XTHEADFMV
   UNSPEC_XTHEADFMV_HW
+
+  ;; CRC unspecs
+  UNSPEC_CRC
+  UNSPEC_CRC_REV
+
+  ;; Stack Smash Protector
+  UNSPEC_SSP_SET
+  UNSPEC_SSP_TEST
 ])
 
 (define_c_enum "unspecv" [
@@ -117,10 +127,6 @@
   UNSPECV_FENCE
   UNSPECV_FENCE_I
 
-  ;; Stack Smash Protector
-  UNSPEC_SSP_SET
-  UNSPEC_SSP_TEST
-
   ;; CMO instructions.
   UNSPECV_CLEAN
   UNSPECV_FLUSH
@@ -131,6 +137,18 @@
   ;; Zihintpause unspec
   UNSPECV_PAUSE
 
+  ;; ZICFISS
+  UNSPECV_SSPUSH
+  UNSPECV_SSPOPCHK
+  UNSPECV_SSRDP
+  UNSPECV_SSP
+
+  ;; ZICFILP
+  UNSPECV_LPAD
+  UNSPECV_SETLPL
+  UNSPECV_LPAD_ALIGN
+  UNSPECV_SET_GUARDED
+
   ;; XTheadInt unspec
   UNSPECV_XTHEADINT_PUSH
   UNSPECV_XTHEADINT_POP
@@ -143,6 +161,7 @@
    (TP_REGNUM			4)
    (T0_REGNUM			5)
    (T1_REGNUM			6)
+   (T2_REGNUM			7)
    (S0_REGNUM			8)
    (S1_REGNUM			9)
    (A0_REGNUM			10)
@@ -473,6 +492,11 @@
 ;; vfncvtbf16  vector narrowing single floating-point to brain floating-point instruction
 ;; vfwcvtbf16  vector widening brain floating-point to single floating-point instruction
 ;; vfwmaccbf16  vector BF16 widening multiply-accumulate
+;; SiFive custom extension instrctions
+;; sf_vqmacc      vector matrix integer multiply-add instructions
+;; sf_vfnrclip     vector fp32 to int8 ranged clip instructions
+;; sf_vc vector coprocessor interface without side effect
+;; sf_vc_se vector coprocessor interface with side effect
 (define_attr "type"
   "unknown,branch,jump,jalr,ret,call,load,fpload,store,fpstore,
    mtc,mfc,const,arith,logical,shift,slt,imul,idiv,move,fmove,fadd,fmul,
@@ -483,8 +507,8 @@
    vldux,vldox,vstux,vstox,vldff,vldr,vstr,
    vlsegde,vssegte,vlsegds,vssegts,vlsegdux,vlsegdox,vssegtux,vssegtox,vlsegdff,
    vialu,viwalu,vext,vicalu,vshift,vnshift,vicmp,viminmax,
-   vimul,vidiv,viwmul,vimuladd,viwmuladd,vimerge,vimov,
-   vsalu,vaalu,vsmul,vsshift,vnclip,
+   vimul,vidiv,viwmul,vimuladd,sf_vqmacc,viwmuladd,vimerge,vimov,
+   vsalu,vaalu,vsmul,vsshift,vnclip,sf_vfnrclip,
    vfalu,vfwalu,vfmul,vfdiv,vfwmul,vfmuladd,vfwmuladd,vfsqrt,vfrecp,
    vfcmp,vfminmax,vfsgnj,vfclass,vfmerge,vfmov,
    vfcvtitof,vfcvtftoi,vfwcvtitof,vfwcvtftoi,
@@ -494,7 +518,8 @@
    vslideup,vslidedown,vislide1up,vislide1down,vfslide1up,vfslide1down,
    vgather,vcompress,vmov,vector,vandn,vbrev,vbrev8,vrev8,vclz,vctz,vcpop,vrol,vror,vwsll,
    vclmul,vclmulh,vghsh,vgmul,vaesef,vaesem,vaesdf,vaesdm,vaeskf1,vaeskf2,vaesz,
-   vsha2ms,vsha2ch,vsha2cl,vsm4k,vsm4r,vsm3me,vsm3c,vfncvtbf16,vfwcvtbf16,vfwmaccbf16"
+   vsha2ms,vsha2ch,vsha2cl,vsm4k,vsm4r,vsm3me,vsm3c,vfncvtbf16,vfwcvtbf16,vfwmaccbf16,
+   sf_vc,sf_vc_se"
   (cond [(eq_attr "got" "load") (const_string "load")
 
 	 ;; If a doubleword move uses these expensive instructions,
@@ -647,7 +672,7 @@
 ;; Microarchitectures we know how to tune for.
 ;; Keep this in sync with enum riscv_microarchitecture.
 (define_attr "tune"
-  "generic,sifive_7,sifive_p400,sifive_p600,xiangshan,generic_ooo"
+  "generic,sifive_7,sifive_p400,sifive_p600,xiangshan,generic_ooo,mips_p8700"
   (const (symbol_ref "((enum attr_tune) riscv_microarchitecture)")))
 
 ;; Describe a user's asm statement.
@@ -767,7 +792,7 @@
       rtx t5 = gen_reg_rtx (DImode);
       rtx t6 = gen_reg_rtx (DImode);
 
-      riscv_emit_binary (PLUS, operands[0], operands[1], operands[2]);
+      emit_insn (gen_addsi3_extended (t6, operands[1], operands[2]));
       if (GET_CODE (operands[1]) != CONST_INT)
 	emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0));
       else
@@ -777,7 +802,10 @@
       else
 	t5 = operands[2];
       emit_insn (gen_adddi3 (t3, t4, t5));
-      emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0));
+      rtx t7 = gen_lowpart (SImode, t6);
+      SUBREG_PROMOTED_VAR_P (t7) = 1;
+      SUBREG_PROMOTED_SET (t7, SRP_SIGNED);
+      emit_move_insn (operands[0], t7);
 
       riscv_expand_conditional_branch (operands[3], NE, t6, t3);
     }
@@ -813,8 +841,11 @@
 	emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0));
       else
 	t3 = operands[1];
-      riscv_emit_binary (PLUS, operands[0], operands[1], operands[2]);
-      emit_insn (gen_extend_insn (t4, operands[0], DImode, SImode, 0));
+      emit_insn (gen_addsi3_extended (t4, operands[1], operands[2]));
+      rtx t5 = gen_lowpart (SImode, t4);
+      SUBREG_PROMOTED_VAR_P (t5) = 1;
+      SUBREG_PROMOTED_SET (t5, SRP_SIGNED);
+      emit_move_insn (operands[0], t5);
 
       riscv_expand_conditional_branch (operands[3], LTU, t4, t3);
     }
@@ -849,6 +880,42 @@
   [(set_attr "type" "arith")
    (set_attr "mode" "SI")])
 
+;; Transform (X & C1) + C2 into (X | ~C1) - (-C2 | ~C1)
+;; Where C1 is not a LUI operand, but ~C1 is a LUI operand
+
+(define_insn_and_split "*lui_constraint<X:mode>_and_to_or"
+  [(set (match_operand:X 0 "register_operand" "=r")
+	(plus:X (and:X (match_operand:X 1 "register_operand" "r")
+		       (match_operand 2 "const_int_operand"))
+		(match_operand 3 "const_int_operand")))
+   (clobber (match_scratch:X 4 "=&r"))]
+  "(LUI_OPERAND (~INTVAL (operands[2]))
+    && ((INTVAL (operands[2]) & (-INTVAL (operands[3])))
+	== (-INTVAL (operands[3])))
+    && riscv_const_insns (operands[3], false)
+    && (riscv_const_insns (GEN_INT (~INTVAL (operands[2])
+				    | -INTVAL (operands[3])), false)
+	<= riscv_const_insns (operands[3], false)))"
+  "#"
+  "&& reload_completed"
+  [(const_int 0)]
+  {
+    operands[5] = GEN_INT (~INTVAL (operands[2]));
+    operands[6] = GEN_INT ((~INTVAL (operands[2])) | (-INTVAL (operands[3])));
+
+    /* This is always a LUI operand, so it's safe to just emit.  */
+    emit_move_insn (operands[4], operands[5]);
+
+    rtx x = gen_rtx_IOR (word_mode, operands[1], operands[4]);
+    emit_move_insn (operands[0], x);
+
+    /* This may require multiple steps to synthesize.  */
+    riscv_emit_move (operands[4], operands[6]);
+    x = gen_rtx_MINUS (word_mode, operands[0], operands[4]);
+    emit_move_insn (operands[0], x);
+  }
+  [(set_attr "type" "arith")])
+
 ;;
 ;;  ....................
 ;;
@@ -916,7 +983,7 @@
       rtx t5 = gen_reg_rtx (DImode);
       rtx t6 = gen_reg_rtx (DImode);
 
-      riscv_emit_binary (MINUS, operands[0], operands[1], operands[2]);
+      emit_insn (gen_subsi3_extended (t6, operands[1], operands[2]));
       if (GET_CODE (operands[1]) != CONST_INT)
 	emit_insn (gen_extend_insn (t4, operands[1], DImode, SImode, 0));
       else
@@ -926,7 +993,10 @@
       else
 	t5 = operands[2];
       emit_insn (gen_subdi3 (t3, t4, t5));
-      emit_insn (gen_extend_insn (t6, operands[0], DImode, SImode, 0));
+      rtx t7 = gen_lowpart (SImode, t6);
+      SUBREG_PROMOTED_VAR_P (t7) = 1;
+      SUBREG_PROMOTED_SET (t7, SRP_SIGNED);
+      emit_move_insn (operands[0], t7);
 
       riscv_expand_conditional_branch (operands[3], NE, t6, t3);
     }
@@ -965,8 +1035,11 @@
 	emit_insn (gen_extend_insn (t3, operands[1], DImode, SImode, 0));
       else
 	t3 = operands[1];
-      riscv_emit_binary (MINUS, operands[0], operands[1], operands[2]);
-      emit_insn (gen_extend_insn (t4, operands[0], DImode, SImode, 0));
+      emit_insn (gen_subsi3_extended (t4, operands[1], operands[2]));
+      rtx t5 = gen_lowpart (SImode, t4);
+      SUBREG_PROMOTED_VAR_P (t5) = 1;
+      SUBREG_PROMOTED_SET (t5, SRP_SIGNED);
+      emit_move_insn (operands[0], t5);
 
       riscv_expand_conditional_branch (operands[3], LTU, t3, t4);
     }
@@ -1659,26 +1732,11 @@
 (define_expand "and<mode>3"
   [(set (match_operand:X                0 "register_operand")
         (and:X (match_operand:X 1 "register_operand")
-	       (match_operand:X 2 "arith_or_mode_mask_or_zbs_operand")))]
+	       (match_operand:X 2 "reg_or_const_int_operand")))]
   ""
 {
-  /* If the second operand is a mode mask, emit an extension
-     insn instead.  */
-  if (CONST_INT_P (operands[2]))
-    {
-      enum machine_mode tmode = VOIDmode;
-      if (UINTVAL (operands[2]) == GET_MODE_MASK (HImode))
-	tmode = HImode;
-      else if (UINTVAL (operands[2]) == GET_MODE_MASK (SImode))
-	tmode = SImode;
-
-      if (tmode != VOIDmode)
-	{
-	  rtx tmp = gen_lowpart (tmode, operands[1]);
-	  emit_insn (gen_extend_insn (operands[0], tmp, <MODE>mode, tmode, 1));
-	  DONE;
-	}
-    }
+  if (CONST_INT_P (operands[2]) && synthesize_and (operands))
+    DONE;
 })
 
 (define_insn "*and<mode>3"
@@ -1702,8 +1760,15 @@
 (define_expand "<optab><mode>3"
   [(set (match_operand:X 0 "register_operand")
 	(any_or:X (match_operand:X 1 "register_operand" "")
-		   (match_operand:X 2 "arith_or_zbs_operand" "")))]
-  "")
+		   (match_operand:X 2 "reg_or_const_int_operand" "")))]
+  ""
+
+{
+  /* If synthesis of the logical op is successful, then no further code
+     generation is necessary.  Else just generate code normally.  */
+  if (CONST_INT_P (operands[2]) && synthesize_ior_xor (<OPTAB>, operands))
+    DONE;
+})
 
 (define_insn "*<optab><mode>3"
   [(set (match_operand:X                0 "register_operand" "=r,r")
@@ -1811,7 +1876,15 @@
 (define_expand "zero_extendsidi2"
   [(set (match_operand:DI 0 "register_operand")
 	(zero_extend:DI (match_operand:SI 1 "nonimmediate_operand")))]
-  "TARGET_64BIT")
+  "TARGET_64BIT"
+{
+  if (SUBREG_P (operands[1]) && SUBREG_PROMOTED_VAR_P (operands[1])
+      && SUBREG_PROMOTED_UNSIGNED_P (operands[1]))
+    {
+      emit_insn (gen_movdi (operands[0], SUBREG_REG (operands[1])));
+      DONE;
+    }
+})
 
 (define_insn_and_split "*zero_extendsidi2_internal"
   [(set (match_operand:DI     0 "register_operand"     "=r,r")
@@ -1892,7 +1965,15 @@
   [(set (match_operand:DI     0 "register_operand"     "=r,r")
 	(sign_extend:DI
 	    (match_operand:SI 1 "nonimmediate_operand" " r,m")))]
-  "TARGET_64BIT")
+  "TARGET_64BIT"
+{
+  if (SUBREG_P (operands[1]) && SUBREG_PROMOTED_VAR_P (operands[1])
+      && SUBREG_PROMOTED_SIGNED_P (operands[1]))
+    {
+      emit_insn (gen_movdi (operands[0], SUBREG_REG (operands[1])));
+      DONE;
+    }
+})
 
 (define_insn "*extendsidi2_internal"
   [(set (match_operand:DI     0 "register_operand"     "=r,r")
@@ -2327,17 +2408,16 @@
 
 (define_insn "@tlsdesc<mode>"
   [(set (reg:P A0_REGNUM)
-	    (unspec:P
-			[(match_operand:P 0 "symbolic_operand" "")
-			 (match_operand:P 1 "const_int_operand")]
-			UNSPEC_TLSDESC))
+	(unspec:P
+	    [(match_operand:P 0 "symbolic_operand" "")]
+	    UNSPEC_TLSDESC))
    (clobber (reg:P T0_REGNUM))]
   "TARGET_TLSDESC"
   {
-    return ".LT%1: auipc\ta0,%%tlsdesc_hi(%0)\;"
-           "<load>\tt0,%%tlsdesc_load_lo(.LT%1)(a0)\;"
-           "addi\ta0,a0,%%tlsdesc_add_lo(.LT%1)\;"
-           "jalr\tt0,t0,%%tlsdesc_call(.LT%1)";
+    return ".LT%=: auipc\ta0,%%tlsdesc_hi(%0)\;"
+           "<load>\tt0,%%tlsdesc_load_lo(.LT%=)(a0)\;"
+           "addi\ta0,a0,%%tlsdesc_add_lo(.LT%=)\;"
+           "jalr\tt0,t0,%%tlsdesc_call(.LT%=)";
   }
   [(set_attr "type" "multi")
    (set_attr "length" "16")
@@ -2405,7 +2485,8 @@
         (match_operand:GPR 1 "splittable_const_int_operand" "i"))]
   "!ira_in_progress
    && !(p2m1_shift_operand (operands[1], <MODE>mode)
-        || high_mask_shift_operand (operands[1], <MODE>mode))"
+	|| high_mask_shift_operand (operands[1], <MODE>mode)
+	|| exact_log2 (INTVAL (operands[1])) >= 0)"
   "#"
   "&& 1"
   [(const_int 0)]
@@ -2428,8 +2509,8 @@
 })
 
 (define_insn "*movdi_32bit"
-  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r,m,  *f,*f,*r,*f,*m,r")
-	(match_operand:DI 1 "move_operand"         " r,i,m,r,*J*r,*m,*f,*f,*f,vp"))]
+  [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r, m,  *f,*f,*r,*f,*m,r")
+	(match_operand:DI 1 "move_operand"         " r,i,m,rJ,*J*r,*m,*f,*f,*f,vp"))]
   "!TARGET_64BIT
    && (register_operand (operands[0], DImode)
        || reg_or_0_operand (operands[1], DImode))"
@@ -2627,8 +2708,9 @@
 
 (define_insn "movsidf2_low_rv32"
   [(set (match_operand:SI      0 "register_operand" "=  r")
-	(truncate:SI
-	    (match_operand:DF 1 "register_operand"  "zmvf")))]
+	(unspec:SI
+	    [(match_operand:DF 1 "register_operand" "zmvf")]
+	UNSPEC_FMV_X_W))]
   "TARGET_HARD_FLOAT && !TARGET_64BIT && TARGET_ZFA"
   "fmv.x.w\t%0,%1"
   [(set_attr "move_type" "fmove")
@@ -2637,11 +2719,10 @@
 
 
 (define_insn "movsidf2_high_rv32"
-  [(set (match_operand:SI      0 "register_operand"    "=  r")
-	(truncate:SI
-            (lshiftrt:DF
-                (match_operand:DF 1 "register_operand" "zmvf")
-                (const_int 32))))]
+  [(set (match_operand:SI      0 "register_operand" "=  r")
+	(unspec:SI
+	    [(match_operand:DF 1 "register_operand" "zmvf")]
+	UNSPEC_FMVH_X_D))]
   "TARGET_HARD_FLOAT && !TARGET_64BIT && TARGET_ZFA"
   "fmvh.x.d\t%0,%1"
   [(set_attr "move_type" "fmove")
@@ -2744,12 +2825,6 @@
     FAIL;
 })
 
-;; Inlining general memmove is a pessimisation: we can't avoid having to decide
-;; which direction to go at runtime, which is costly in instruction count
-;; however for situations where the entire move fits in one vector operation
-;; we can do all reads before doing any writes so we don't have to worry
-;; so generate the inline vector code in such situations
-;; nb. prefer scalar path for tiny memmoves.
 (define_expand "movmem<mode>"
   [(parallel [(set (match_operand:BLK 0 "general_operand")
    (match_operand:BLK 1 "general_operand"))
@@ -2757,10 +2832,8 @@
     (use (match_operand:SI 3 "const_int_operand"))])]
   "TARGET_VECTOR"
 {
-  if ((INTVAL (operands[2]) >= TARGET_MIN_VLEN / 8)
-	&& (INTVAL (operands[2]) <= TARGET_MIN_VLEN)
-	&& riscv_vector::expand_block_move (operands[0], operands[1],
-	     operands[2]))
+  if (riscv_vector::expand_block_move (operands[0], operands[1], operands[2],
+				       true))
     DONE;
   else
     FAIL;
@@ -2864,7 +2937,7 @@
   [(set_attr "type" "shift")
    (set_attr "mode" "DI")])
 
-(define_insn_and_split "*<optab><GPR:mode>3_mask_1"
+(define_insn "*<optab><GPR:mode>3_mask_1"
   [(set (match_operand:GPR     0 "register_operand" "= r")
 	(any_shift:GPR
 	    (match_operand:GPR 1 "register_operand" "  r")
@@ -2873,12 +2946,14 @@
 	       (match_operand:GPR2 2 "register_operand"  "r")
 	       (match_operand 3 "<GPR:shiftm1>"))])))]
   ""
-  "#"
-  "&& 1"
-  [(set (match_dup 0)
-	(any_shift:GPR (match_dup 1)
-		      (match_dup 2)))]
-  "operands[2] = gen_lowpart (QImode, operands[2]);"
+{
+  /* If the shift mode is not word mode, then it must be the
+     case that we're generating rv64 code, but this is a 32-bit
+     operation.  Thus we need to use the "w" variant.  */
+  if (E_<GPR:MODE>mode != word_mode)
+    return "<insn>w\t%0,%1,%2";
+  return "<insn>\t%0,%1,%2";
+}
   [(set_attr "type" "shift")
    (set_attr "mode" "<GPR:MODE>")])
 
@@ -2897,7 +2972,7 @@
   [(set_attr "type" "shift")
    (set_attr "mode" "SI")])
 
-(define_insn_and_split "*<optab>si3_extend_mask"
+(define_insn "*<optab>si3_extend_mask"
   [(set (match_operand:DI                   0 "register_operand" "= r")
 	(sign_extend:DI
 	    (any_shift:SI
@@ -2907,13 +2982,7 @@
 	        (match_operand:GPR 2 "register_operand" " r")
 	        (match_operand 3 "const_si_mask_operand"))]))))]
   "TARGET_64BIT"
-  "#"
-  "&& 1"
-  [(set (match_dup 0)
-	(sign_extend:DI
-	 (any_shift:SI (match_dup 1)
-		       (match_dup 2))))]
-  "operands[2] = gen_lowpart (QImode, operands[2]);"
+  "<insn>w\t%0,%1,%2"
   [(set_attr "type" "shift")
    (set_attr "mode" "SI")])
 
@@ -2925,7 +2994,7 @@
 ;; for IOR/XOR.  It probably doesn't matter for AND.
 ;;
 ;; We also don't want to do this if the immediate already fits in a simm12
-;; field.
+;; field, or it is a single bit operand and zbs is available.
 (define_insn_and_split "<optab>_shift_reverse<X:mode>"
   [(set (match_operand:X 0 "register_operand" "=r")
     (any_bitwise:X (ashift:X (match_operand:X 1 "register_operand" "r")
@@ -2933,10 +3002,7 @@
 		   (match_operand 3 "immediate_operand" "n")))]
   "(!SMALL_OPERAND (INTVAL (operands[3]))
     && SMALL_OPERAND (INTVAL (operands[3]) >> INTVAL (operands[2]))
-    && popcount_hwi (INTVAL (operands[3])) > 1
-    && (!TARGET_64BIT
-	|| (exact_log2 ((INTVAL (operands[3]) >> INTVAL (operands[2])) + 1)
-	     == -1))
+    && (!TARGET_ZBS || popcount_hwi (INTVAL (operands[3])) > 1)
     && (INTVAL (operands[3]) & ((1ULL << INTVAL (operands[2])) - 1)) == 0)"
   "#"
   "&& 1"
@@ -3114,15 +3180,67 @@
   "#"
   "&& reload_completed"
   [(set (match_dup 4) (lshiftrt:X (subreg:X (match_dup 2) 0) (match_dup 6)))
-   (set (match_dup 4) (and:X (match_dup 4) (match_dup 7)))
+   (set (match_dup 4) (match_dup 8))
    (set (pc) (if_then_else (match_op_dup 1 [(match_dup 4) (const_int 0)])
 			   (label_ref (match_dup 0)) (pc)))]
 {
-	HOST_WIDE_INT mask = INTVAL (operands[3]);
-	int trailing = ctz_hwi (mask);
+  HOST_WIDE_INT mask = INTVAL (operands[3]);
+  int trailing = ctz_hwi (mask);
+
+  operands[6] = GEN_INT (trailing);
+  operands[7] = GEN_INT (mask >> trailing);
+
+  /* This splits after reload, so there's little chance to clean things
+     up.  Rather than emit a ton of RTL here, we can just make a new
+     operand for that RHS and use it.  For the case where the AND would
+     have been redundant, we can make it a NOP move, which does get
+     cleaned up.  */
+  if (operands[7] == CONSTM1_RTX (word_mode))
+    operands[8] = operands[4];
+  else
+    operands[8] = gen_rtx_AND (word_mode, operands[4], operands[7]);
+}
+[(set_attr "type" "branch")])
 
-	operands[6] = GEN_INT (trailing);
-	operands[7] = GEN_INT (mask >> trailing);
+(define_insn_and_split "*branch<ANYI:mode>_shiftedarith_<optab>_shifted"
+  [(set (pc)
+	(if_then_else (any_eq
+		    (and:ANYI (match_operand:ANYI 1 "register_operand" "r")
+			  (match_operand 2 "shifted_const_arith_operand" "i"))
+		    (match_operand 3 "shifted_const_arith_operand" "i"))
+	 (label_ref (match_operand 0 "" ""))
+	 (pc)))
+   (clobber (match_scratch:X 4 "=&r"))
+   (clobber (match_scratch:X 5 "=&r"))]
+  "!SMALL_OPERAND (INTVAL (operands[2]))
+    && !SMALL_OPERAND (INTVAL (operands[3]))
+    && SMALL_AFTER_COMMON_TRAILING_SHIFT (INTVAL (operands[2]),
+					     INTVAL (operands[3]))"
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 4) (ashiftrt:X (match_dup 1) (match_dup 7)))
+   (set (match_dup 4) (match_dup 10))
+   (set (match_dup 5) (match_dup 9))
+   (set (pc) (if_then_else (any_eq (match_dup 4) (match_dup 5))
+			   (label_ref (match_dup 0)) (pc)))]
+{
+  HOST_WIDE_INT mask1 = INTVAL (operands[2]);
+  HOST_WIDE_INT mask2 = INTVAL (operands[3]);
+  int trailing_shift = COMMON_TRAILING_ZEROS (mask1, mask2);
+
+  operands[7] = GEN_INT (trailing_shift);
+  operands[8] = GEN_INT (mask1 >> trailing_shift);
+  operands[9] = GEN_INT (mask2 >> trailing_shift);
+
+  /* This splits after reload, so there's little chance to clean things
+     up.  Rather than emit a ton of RTL here, we can just make a new
+     operand for that RHS and use it.  For the case where the AND would
+     have been redundant, we can make it a NOP move, which does get
+     cleaned up.  */
+  if (operands[8] == CONSTM1_RTX (word_mode))
+    operands[10] = operands[4];
+  else
+    operands[10] = gen_rtx_AND (word_mode, operands[4], operands[8]);
 }
 [(set_attr "type" "branch")])
 
@@ -3165,7 +3283,7 @@
   "!TARGET_XCVBI"
 {
   if (get_attr_length (insn) == 12)
-    return "b%N1\t%2,%z3,1f; jump\t%l0,ra; 1:";
+    return "b%r1\t%2,%z3,1f; jump\t%l0,ra; 1:";
 
   return "b%C1\t%2,%z3,%l0";
 }
@@ -3625,11 +3743,18 @@
   [(set (pc) (match_operand 0 "register_operand"))]
   ""
 {
+  if (is_zicfilp_p ())
+    emit_insn (gen_set_lpl (Pmode, const0_rtx));
+
   operands[0] = force_reg (Pmode, operands[0]);
+  if (is_zicfilp_p ())
+    emit_use (gen_rtx_REG (Pmode, T2_REGNUM));
+
   if (Pmode == SImode)
     emit_jump_insn (gen_indirect_jumpsi (operands[0]));
   else
     emit_jump_insn (gen_indirect_jumpdi (operands[0]));
+
   DONE;
 })
 
@@ -3650,21 +3775,42 @@
 					 gen_rtx_LABEL_REF (Pmode, operands[1]),
 					 NULL_RTX, 0, OPTAB_DIRECT);
 
-  if (CASE_VECTOR_PC_RELATIVE && Pmode == DImode)
-    emit_jump_insn (gen_tablejumpdi (operands[0], operands[1]));
+  if (is_zicfilp_p ())
+    {
+      rtx t2 = RISCV_CALL_ADDRESS_LPAD (GET_MODE (operands[0]));
+      emit_move_insn (t2, operands[0]);
+
+      if (CASE_VECTOR_PC_RELATIVE && Pmode == DImode)
+	emit_jump_insn (gen_tablejump_cfidi (operands[1]));
+      else
+	emit_jump_insn (gen_tablejump_cfisi (operands[1]));
+    }
   else
-    emit_jump_insn (gen_tablejumpsi (operands[0], operands[1]));
+    {
+      if (CASE_VECTOR_PC_RELATIVE && Pmode == DImode)
+	emit_jump_insn (gen_tablejumpdi (operands[0], operands[1]));
+      else
+	emit_jump_insn (gen_tablejumpsi (operands[0], operands[1]));
+    }
   DONE;
 })
 
 (define_insn "tablejump<mode>"
   [(set (pc) (match_operand:GPR 0 "register_operand" "l"))
    (use (label_ref (match_operand 1 "" "")))]
-  ""
+  "!is_zicfilp_p ()"
   "jr\t%0"
   [(set_attr "type" "jalr")
    (set_attr "mode" "none")])
 
+(define_insn "tablejump_cfi<mode>"
+  [(set (pc) (reg:GPR T2_REGNUM))
+   (use (label_ref (match_operand 0 "")))]
+  "is_zicfilp_p ()"
+  "jr\tt2"
+  [(set_attr "type" "jalr")
+   (set_attr "mode" "none")])
+
 ;;
 ;;  ....................
 ;;
@@ -4038,6 +4184,30 @@
    (set_attr "length" "0")]
 )
 
+(define_expand "save_stack_nonlocal"
+  [(set (match_operand 0 "memory_operand")
+	(match_operand 1 "register_operand"))]
+  ""
+{
+  rtx stack_slot;
+
+  if (need_shadow_stack_push_pop_p ())
+    {
+      /* Copy shadow stack pointer to the first slot
+	 and stack pointer to the second slot.  */
+      rtx ssp_slot = adjust_address (operands[0], word_mode, 0);
+      stack_slot = adjust_address (operands[0], Pmode, UNITS_PER_WORD);
+
+      rtx reg_ssp = force_reg (word_mode, const0_rtx);
+      emit_insn (gen_ssrdp (word_mode, reg_ssp));
+      emit_move_insn (ssp_slot, reg_ssp);
+    }
+  else
+    stack_slot = adjust_address (operands[0], Pmode, 0);
+  emit_move_insn (stack_slot, operands[1]);
+  DONE;
+})
+
 ;; This fixes a failure with gcc.c-torture/execute/pr64242.c at -O2 for a
 ;; 32-bit target when using -mtune=sifive-7-series.  The first sched pass
 ;; runs before register elimination, and we have a non-obvious dependency
@@ -4048,7 +4218,70 @@
    (match_operand 1 "memory_operand")]
   ""
 {
-  emit_move_insn (operands[0], operands[1]);
+  rtx stack_slot;
+
+  if (need_shadow_stack_push_pop_p ())
+    {
+      rtx t0 = gen_rtx_REG (Pmode, RISCV_PROLOGUE_TEMP_REGNUM);
+      /* Restore shadow stack pointer from the first slot
+	 and stack pointer from the second slot.  */
+      rtx ssp_slot = adjust_address (operands[1], word_mode, 0);
+      stack_slot = adjust_address (operands[1], Pmode, UNITS_PER_WORD);
+
+      /* Get the current shadow stack pointer.  */
+      rtx cur_ssp = force_reg (word_mode, const0_rtx);
+      emit_insn (gen_ssrdp (word_mode, cur_ssp));
+
+      /* Compare and jump over adjustment code.  */
+      rtx noadj_label = gen_label_rtx ();
+      emit_cmp_and_jump_insns (cur_ssp, const0_rtx, EQ, NULL_RTX,
+			       word_mode, 1, noadj_label);
+
+      rtx loop_label = gen_label_rtx ();
+      emit_label (loop_label);
+      LABEL_NUSES (loop_label) = 1;
+
+      /* Check if current ssp less than jump buffer ssp,
+	 so no loop is needed.  */
+      emit_cmp_and_jump_insns (ssp_slot, cur_ssp, LE, NULL_RTX,
+			       ptr_mode, 1, noadj_label);
+
+      /* Advance by a maximum of 4K at a time to avoid unwinding
+	 past bounds of the shadow stack.  */
+      rtx reg_4096 = force_reg (word_mode, GEN_INT (4096));
+      rtx cmp_ssp  = gen_reg_rtx (word_mode);
+      cmp_ssp = expand_simple_binop (ptr_mode, MINUS,
+				     ssp_slot, cur_ssp,
+				     cmp_ssp, 1, OPTAB_DIRECT);
+
+      /* Update curr_ssp from jump buffer ssp.  */
+      emit_move_insn (cur_ssp, ssp_slot);
+      emit_insn (gen_write_ssp (word_mode, cur_ssp));
+      emit_jump_insn (gen_jump (loop_label));
+      emit_barrier ();
+
+      /* Adjust the ssp in a loop.  */
+      rtx cmp_4k_label = gen_label_rtx ();
+      emit_label (cmp_4k_label);
+      LABEL_NUSES (cmp_4k_label) = 1;
+
+      /* Add 4k for curr_ssp.  */
+      cur_ssp = expand_simple_binop (ptr_mode, PLUS,
+				     cur_ssp, reg_4096,
+				     cur_ssp, 1, OPTAB_DIRECT);
+      emit_insn (gen_write_ssp (word_mode, cur_ssp));
+      emit_insn (gen_sspush (Pmode, t0));
+      emit_insn (gen_sspopchk (Pmode, t0));
+      emit_jump_insn (gen_jump (loop_label));
+      emit_barrier ();
+
+      emit_label (noadj_label);
+      LABEL_NUSES (noadj_label) = 1;
+    }
+  else
+    stack_slot = adjust_address (operands[1], Pmode, 0);
+
+  emit_move_insn (operands[0], stack_slot);
   /* Prevent the following hard fp restore from being moved before the move
      insn above which uses a copy of the soft fp reg.  */
   emit_clobber (gen_rtx_MEM (BLKmode, hard_frame_pointer_rtx));
@@ -4169,14 +4402,15 @@
 )
 
 (define_insn "prefetch"
-  [(prefetch (match_operand 0 "address_operand" "r")
+  [(prefetch (match_operand 0 "prefetch_operand" "Qr")
              (match_operand 1 "imm5_operand" "i")
              (match_operand 2 "const_int_operand" "n"))]
   "TARGET_ZICBOP"
 {
   switch (INTVAL (operands[1]))
   {
-    case 0: return TARGET_ZIHINTNTL ? "%L2prefetch.r\t%a0" : "prefetch.r\t%a0";
+    case 0:
+    case 2: return TARGET_ZIHINTNTL ? "%L2prefetch.r\t%a0" : "prefetch.r\t%a0";
     case 1: return TARGET_ZIHINTNTL ? "%L2prefetch.w\t%a0" : "prefetch.w\t%a0";
     default: gcc_unreachable ();
   }
@@ -4188,7 +4422,7 @@
 				      (const_string "4")))])
 
 (define_insn "riscv_prefetchi_<mode>"
-  [(unspec_volatile:X [(match_operand:X 0 "address_operand" "r")
+  [(unspec_volatile:X [(match_operand:X 0 "prefetch_operand" "Q")
               (match_operand:X 1 "imm5_operand" "i")]
               UNSPECV_PREI)]
   "TARGET_ZICBOP"
@@ -4358,11 +4592,22 @@
 
 (define_expand "usadd<mode>3"
   [(match_operand:ANYI 0 "register_operand")
+   (match_operand:ANYI 1 "reg_or_int_operand")
+   (match_operand:ANYI 2 "reg_or_int_operand")]
+  ""
+  {
+    riscv_expand_usadd (operands[0], operands[1], operands[2]);
+    DONE;
+  }
+)
+
+(define_expand "ssadd<mode>3"
+  [(match_operand:ANYI 0 "register_operand")
    (match_operand:ANYI 1 "register_operand")
    (match_operand:ANYI 2 "register_operand")]
   ""
   {
-    riscv_expand_usadd (operands[0], operands[1], operands[2]);
+    riscv_expand_ssadd (operands[0], operands[1], operands[2]);
     DONE;
   }
 )
@@ -4370,7 +4615,7 @@
 (define_expand "ussub<mode>3"
   [(match_operand:ANYI 0 "register_operand")
    (match_operand:ANYI 1 "reg_or_int_operand")
-   (match_operand:ANYI 2 "register_operand")]
+   (match_operand:ANYI 2 "reg_or_int_operand")]
   ""
   {
     riscv_expand_ussub (operands[0], operands[1], operands[2]);
@@ -4378,6 +4623,28 @@
   }
 )
 
+(define_expand "sssub<mode>3"
+  [(match_operand:ANYI 0 "register_operand")
+   (match_operand:ANYI 1 "register_operand")
+   (match_operand:ANYI 2 "register_operand")]
+  ""
+  {
+    riscv_expand_sssub (operands[0], operands[1], operands[2]);
+    DONE;
+  }
+)
+
+(define_expand "usmul<mode>3"
+  [(match_operand:ANYI 0 "register_operand")
+   (match_operand:ANYI 1 "register_operand")
+   (match_operand:ANYI 2 "register_operand")]
+  ""
+  {
+    riscv_expand_usmul (operands[0], operands[1], operands[2]);
+    DONE;
+  }
+)
+
 (define_expand "ustrunc<mode><anyi_double_truncated>2"
   [(match_operand:<ANYI_DOUBLE_TRUNCATED> 0 "register_operand")
    (match_operand:ANYI_DOUBLE_TRUNC       1 "register_operand")]
@@ -4388,6 +4655,16 @@
   }
 )
 
+(define_expand "sstrunc<mode><anyi_double_truncated>2"
+  [(match_operand:<ANYI_DOUBLE_TRUNCATED> 0 "register_operand")
+   (match_operand:ANYI_DOUBLE_TRUNC       1 "register_operand")]
+  ""
+  {
+    riscv_expand_sstrunc (operands[0], operands[1]);
+    DONE;
+  }
+)
+
 (define_expand "ustrunc<mode><anyi_quad_truncated>2"
   [(match_operand:<ANYI_QUAD_TRUNCATED> 0 "register_operand")
    (match_operand:ANYI_QUAD_TRUNC       1 "register_operand")]
@@ -4398,6 +4675,16 @@
   }
 )
 
+(define_expand "sstrunc<mode><anyi_quad_truncated>2"
+  [(match_operand:<ANYI_QUAD_TRUNCATED> 0 "register_operand")
+   (match_operand:ANYI_QUAD_TRUNC       1 "register_operand")]
+  ""
+  {
+    riscv_expand_sstrunc (operands[0], operands[1]);
+    DONE;
+  }
+)
+
 (define_expand "ustrunc<mode><anyi_oct_truncated>2"
   [(match_operand:<ANYI_OCT_TRUNCATED> 0 "register_operand")
    (match_operand:ANYI_OCT_TRUNC       1 "register_operand")]
@@ -4408,6 +4695,16 @@
   }
 )
 
+(define_expand "sstrunc<mode><anyi_oct_truncated>2"
+  [(match_operand:<ANYI_OCT_TRUNCATED> 0 "register_operand")
+   (match_operand:ANYI_OCT_TRUNC       1 "register_operand")]
+  ""
+  {
+    riscv_expand_sstrunc (operands[0], operands[1]);
+    DONE;
+  }
+)
+
 ;; These are forms of (x << C1) + C2, potentially canonicalized from
 ;; ((x + C2') << C1.  Depending on the cost to load C2 vs C2' we may
 ;; want to go ahead and recognize this form as C2 may be cheaper to
@@ -4427,16 +4724,39 @@
 		 (match_operand 3 "const_int_operand" "n")))
    (clobber (match_scratch:DI 4 "=&r"))]
   "(TARGET_64BIT
-    && riscv_const_insns (operands[3], false)
-    && ((riscv_const_insns (operands[3], false)
-	 < riscv_const_insns (GEN_INT (INTVAL (operands[3]) >> INTVAL (operands[2])), false))
-	|| riscv_const_insns (GEN_INT (INTVAL (operands[3]) >> INTVAL (operands[2])), false) == 0))"
+    && riscv_const_insns (operands[3], false) == 1
+    && riscv_const_insns (GEN_INT (INTVAL (operands[3])
+			  << INTVAL (operands[2])), false) != 1)"
   "#"
   "&& reload_completed"
-  [(set (match_dup 0) (ashift:DI (match_dup 1) (match_dup 2)))
-   (set (match_dup 4) (match_dup 3))
-   (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 4)))]
-  ""
+  [(const_int 0)]
+  "{
+     /* Prefer to generate shNadd when we can, even over using an
+	immediate form.  If we're not going to be able to generate
+	a shNadd, then use the constant directly if it fits in a
+	simm12 field since we won't get another chance to optimize this.  */
+     if ((TARGET_ZBA && imm123_operand (operands[2], word_mode))
+	 || !SMALL_OPERAND (INTVAL (operands[3])))
+       emit_move_insn (operands[4], operands[3]);
+     else
+       operands[4] = operands[3];
+
+     if (TARGET_ZBA && imm123_operand (operands[2], word_mode))
+       {
+	 rtx x = gen_rtx_ASHIFT (DImode, operands[1], operands[2]);
+	 x = gen_rtx_PLUS (DImode, x, operands[4]);
+	 emit_insn (gen_rtx_SET (operands[0], x));
+       }
+     else
+       {
+	 rtx x = gen_rtx_ASHIFT (DImode, operands[1], operands[2]);
+	 emit_insn (gen_rtx_SET (operands[0], x));
+	 x = gen_rtx_PLUS (DImode, operands[0], operands[4]);
+	 emit_insn (gen_rtx_SET (operands[0], x));
+       }
+
+     DONE;
+   }"
   [(set_attr "type" "arith")])
 
 (define_insn_and_split ""
@@ -4446,24 +4766,112 @@
 				   (match_operand 2 "const_int_operand" "n"))
 				 (match_operand 3 "const_int_operand" "n"))))
    (clobber (match_scratch:DI 4 "=&r"))]
-  "(TARGET_64BIT
-    && riscv_const_insns (operands[3], false)
-    && ((riscv_const_insns (operands[3], false)
-	 < riscv_const_insns (GEN_INT (INTVAL (operands[3]) >> INTVAL (operands[2])), false))
-	|| riscv_const_insns (GEN_INT (INTVAL (operands[3]) >> INTVAL (operands[2])), false) == 0))"
+  "(TARGET_64BIT && riscv_const_insns (operands[3], false) == 1)"
   "#"
   "&& reload_completed"
-  [(set (match_dup 0) (ashift:DI (match_dup 1) (match_dup 2)))
-   (set (match_dup 4) (match_dup 3))
-   (set (match_dup 0) (sign_extend:DI (plus:SI (match_dup 5) (match_dup 6))))]
+  [(const_int 0)]
   "{
      operands[1] = gen_lowpart (DImode, operands[1]);
      operands[5] = gen_lowpart (SImode, operands[0]);
      operands[6] = gen_lowpart (SImode, operands[4]);
+
+     rtx x = gen_rtx_ASHIFT (DImode, operands[1], operands[2]);
+     emit_insn (gen_rtx_SET (operands[0], x));
+
+     /* If the constant fits in a simm12, use it directly as we do not
+	get another good chance to optimize things again.  */
+     if (!SMALL_OPERAND (INTVAL (operands[3])))
+       emit_move_insn (operands[4], operands[3]);
+     else
+       operands[6] = operands[3];
+
+     x = gen_rtx_PLUS (SImode, operands[5], operands[6]);
+     x = gen_rtx_SIGN_EXTEND (DImode, x);
+     emit_insn (gen_rtx_SET (operands[0], x));
+     DONE;
    }"
   [(set_attr "type" "arith")])
 
+;; Shadow stack
+
+(define_insn "@sspush<mode>"
+  [(unspec_volatile [(match_operand:P 0 "x1x5_operand" "r")] UNSPECV_SSPUSH)]
+  "TARGET_ZICFISS"
+  "sspush\t%0"
+  [(set_attr "type" "arith")
+   (set_attr "mode" "<MODE>")])
 
+(define_insn "@sspopchk<mode>"
+  [(unspec_volatile [(match_operand:P 0 "x1x5_operand" "r")] UNSPECV_SSPOPCHK)]
+  "TARGET_ZICFISS"
+  "sspopchk\t%0"
+  [(set_attr "type" "arith")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@ssrdp<mode>"
+  [(set (match_operand:P 0 "register_operand" "=r")
+	(unspec_volatile [(const_int 0)] UNSPECV_SSRDP))]
+  "TARGET_ZICFISS"
+  "ssrdp\t%0"
+  [(set_attr "type" "arith")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@write_ssp<mode>"
+  [(unspec_volatile [(match_operand:P 0 "register_operand" "r")] UNSPECV_SSP)]
+  "TARGET_ZICFISS"
+  "csrw\tssp, %0"
+  [(set_attr "type" "arith")
+   (set_attr "mode" "<MODE>")])
+
+;; Lading pad.
+
+(define_insn "lpad"
+  [(unspec_volatile [(match_operand 0 "immediate_operand" "i")] UNSPECV_LPAD)]
+  "TARGET_ZICFILP"
+  "lpad\t%0"
+  [(set_attr "type" "auipc")])
+
+(define_insn "@set_lpl<mode>"
+  [(set (reg:GPR T2_REGNUM)
+	(unspec_volatile [(match_operand:GPR 0 "immediate_operand" "i")] UNSPECV_SETLPL))]
+   "TARGET_ZICFILP"
+   "lui\tt2,%0"
+  [(set_attr "type" "const")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "lpad_align"
+  [(unspec_volatile [(const_int 0)] UNSPECV_LPAD_ALIGN)]
+  "TARGET_ZICFILP"
+  ".align 2"
+  [(set_attr "type" "nop")])
+
+(define_insn "@set_guarded<mode>"
+  [(set (reg:GPR T2_REGNUM)
+	(unspec_volatile [(match_operand:GPR 0 "register_operand" "r")] UNSPECV_SET_GUARDED))]
+  "TARGET_ZICFILP"
+  "mv\tt2,%0"
+  [(set_attr "type" "move")
+   (set_attr "mode" "<MODE>")])
+
+;; If we're trying to create 0 or 2^n-1 based on the result of
+;; a test such as (lt (reg) (const_int 0)), we'll see a splat of
+;; the sign bit across a GPR using srai, then a logical and to
+;; mask off high bits.  We can replace the logical and with
+;; a logical right shift which works without constant synthesis
+;; for larger constants.
+(define_split
+  [(set (match_operand:X 0 "register_operand")
+	(and:X (ashiftrt:X (match_operand:X 1 "register_operand")
+			   (match_operand 2 "const_int_operand"))
+	       (match_operand 3 "const_int_operand")))]
+  "(INTVAL (operands[2]) == BITS_PER_WORD - 1
+    && exact_log2 (INTVAL (operands[3]) + 1) >= 0)"
+  [(set (match_dup 0) (ashiftrt:X (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (lshiftrt:X (match_dup 0) (match_dup 3)))]
+  { operands[3] = GEN_INT (BITS_PER_WORD
+			   - exact_log2 (INTVAL (operands[3]) + 1)); })
+
+;; Standard extensions and pattern for optimization
 (include "bitmanip.md")
 (include "crypto.md")
 (include "sync.md")
@@ -4471,18 +4879,21 @@
 (include "sync-ztso.md")
 (include "peephole.md")
 (include "pic.md")
-(include "generic.md")
-(include "sifive-7.md")
-(include "sifive-p400.md")
-(include "sifive-p600.md")
-(include "thead.md")
-(include "generic-vector-ooo.md")
-(include "generic-ooo.md")
 (include "vector.md")
 (include "vector-crypto.md")
 (include "vector-bfloat16.md")
 (include "zicond.md")
 (include "sfb.md")
 (include "zc.md")
+;; Vendor extensions
+(include "thead.md")
 (include "corev.md")
+;; Pipeline models
+(include "generic.md")
 (include "xiangshan.md")
+(include "mips-p8700.md")
+(include "sifive-7.md")
+(include "sifive-p400.md")
+(include "sifive-p600.md")
+(include "generic-vector-ooo.md")
+(include "generic-ooo.md")
diff --git a/gcc/config/riscv/riscv.opt b/gcc/config/riscv/riscv.opt
index a8758ab..6543fd1 100644
--- a/gcc/config/riscv/riscv.opt
+++ b/gcc/config/riscv/riscv.opt
@@ -1,6 +1,6 @@
 ; Options for the RISC-V port of the compiler
 ;
-; Copyright (C) 2011-2024 Free Software Foundation, Inc.
+; Copyright (C) 2011-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
@@ -34,8 +34,8 @@ Target RejectNegative Joined UInteger Var(riscv_branch_cost)
 -mbranch-cost=N	Set the cost of branches to roughly N instructions.
 
 mplt
-Target Var(TARGET_PLT) Init(1)
-When generating -fpic code, allow the use of PLTs. Ignored for fno-pic.
+Target Alias(fplt)
+This option is deprecated; use -fplt or -fno-plt instead.
 
 mabi=
 Target RejectNegative Joined Enum(abi_type) Var(riscv_abi) Init(ABI_ILP32) Negative(mabi=)
@@ -168,23 +168,14 @@ momit-leaf-frame-pointer
 Target Mask(OMIT_LEAF_FRAME_POINTER) Save
 Omit the frame pointer in leaf functions.
 
-Mask(64BIT)
-
-Mask(MUL)
-
-Mask(ATOMIC)
-
-Mask(HARD_FLOAT)
-
-Mask(DOUBLE_FLOAT)
-
-Mask(RVC)
+TargetVariable
+int riscv_isa_flags
 
-Mask(RVE)
+Mask(64BIT) Var(riscv_isa_flags)
 
-Mask(VECTOR)
+Mask(VECTOR) Var(riscv_isa_flags)
 
-Mask(FULL_V)
+Mask(FULL_V) Var(riscv_isa_flags)
 
 mriscv-attribute
 Target Var(riscv_emit_attribute_p) Init(-1)
@@ -233,89 +224,6 @@ TargetVariable
 long riscv_stack_protector_guard_offset = 0
 
 TargetVariable
-int riscv_zi_subext
-
-Mask(ZICSR)       Var(riscv_zi_subext)
-
-Mask(ZIFENCEI)    Var(riscv_zi_subext)
-
-Mask(ZIHINTNTL)   Var(riscv_zi_subext)
-
-Mask(ZIHINTPAUSE) Var(riscv_zi_subext)
-
-Mask(ZICOND)      Var(riscv_zi_subext)
-
-Mask(ZICCAMOA)    Var(riscv_zi_subext)
-
-Mask(ZICCIF)      Var(riscv_zi_subext)
-
-Mask(ZICCLSM)     Var(riscv_zi_subext)
-
-Mask(ZICCRSE)     Var(riscv_zi_subext)
-
-TargetVariable
-int riscv_za_subext
-
-Mask(ZAWRS)  Var(riscv_za_subext)
-
-Mask(ZAAMO)  Var(riscv_za_subext)
-
-Mask(ZALRSC) Var(riscv_za_subext)
-
-Mask(ZABHA) Var(riscv_za_subext)
-
-Mask(ZACAS) Var(riscv_za_subext)
-
-Mask(ZA64RS)  Var(riscv_za_subext)
-
-Mask(ZA128RS) Var(riscv_za_subext)
-
-TargetVariable
-int riscv_zb_subext
-
-Mask(ZBA) Var(riscv_zb_subext)
-
-Mask(ZBB) Var(riscv_zb_subext)
-
-Mask(ZBC) Var(riscv_zb_subext)
-
-Mask(ZBS) Var(riscv_zb_subext)
-
-TargetVariable
-int riscv_zinx_subext
-
-Mask(ZFINX)    Var(riscv_zinx_subext)
-
-Mask(ZDINX)    Var(riscv_zinx_subext)
-
-Mask(ZHINX)    Var(riscv_zinx_subext)
-
-Mask(ZHINXMIN) Var(riscv_zinx_subext)
-
-TargetVariable
-int riscv_zk_subext
-
-Mask(ZBKB)  Var(riscv_zk_subext)
-
-Mask(ZBKC)  Var(riscv_zk_subext)
-
-Mask(ZBKX)  Var(riscv_zk_subext)
-
-Mask(ZKNE)  Var(riscv_zk_subext)
-
-Mask(ZKND)  Var(riscv_zk_subext)
-
-Mask(ZKNH)  Var(riscv_zk_subext)
-
-Mask(ZKR)   Var(riscv_zk_subext)
-
-Mask(ZKSED) Var(riscv_zk_subext)
-
-Mask(ZKSH)  Var(riscv_zk_subext)
-
-Mask(ZKT)   Var(riscv_zk_subext)
-
-TargetVariable
 int riscv_vector_elen_flags
 
 Mask(VECTOR_ELEN_32)    Var(riscv_vector_elen_flags)
@@ -331,197 +239,7 @@ Mask(VECTOR_ELEN_FP_16) Var(riscv_vector_elen_flags)
 Mask(VECTOR_ELEN_BF_16) Var(riscv_vector_elen_flags)
 
 TargetVariable
-int riscv_zvl_flags
-
-Mask(ZVL32B)    Var(riscv_zvl_flags)
-
-Mask(ZVL64B)    Var(riscv_zvl_flags)
-
-Mask(ZVL128B)   Var(riscv_zvl_flags)
-
-Mask(ZVL256B)   Var(riscv_zvl_flags)
-
-Mask(ZVL512B)   Var(riscv_zvl_flags)
-
-Mask(ZVL1024B)  Var(riscv_zvl_flags)
-
-Mask(ZVL2048B)  Var(riscv_zvl_flags)
-
-Mask(ZVL4096B)  Var(riscv_zvl_flags)
-
-Mask(ZVL8192B)  Var(riscv_zvl_flags)
-
-Mask(ZVL16384B) Var(riscv_zvl_flags)
-
-Mask(ZVL32768B) Var(riscv_zvl_flags)
-
-Mask(ZVL65536B) Var(riscv_zvl_flags)
-
-TargetVariable
-int riscv_zvb_subext
-
-Mask(ZVBB) Var(riscv_zvb_subext)
-
-Mask(ZVBC) Var(riscv_zvb_subext)
-
-Mask(ZVKB) Var(riscv_zvb_subext)
-
-TargetVariable
-int riscv_zvk_subext
-
-Mask(ZVKG)   Var(riscv_zvk_subext)
-
-Mask(ZVKNED) Var(riscv_zvk_subext)
-
-Mask(ZVKNHA) Var(riscv_zvk_subext)
-
-Mask(ZVKNHB) Var(riscv_zvk_subext)
-
-Mask(ZVKSED) Var(riscv_zvk_subext)
-
-Mask(ZVKSH)  Var(riscv_zvk_subext)
-
-Mask(ZVKN)   Var(riscv_zvk_subext)
-
-Mask(ZVKNC)  Var(riscv_zvk_subext)
-
-Mask(ZVKNG)  Var(riscv_zvk_subext)
-
-Mask(ZVKS)   Var(riscv_zvk_subext)
-
-Mask(ZVKSC)  Var(riscv_zvk_subext)
-
-Mask(ZVKSG)  Var(riscv_zvk_subext)
-
-Mask(ZVKT)   Var(riscv_zvk_subext)
-
-TargetVariable
-int riscv_zicmo_subext
-
-Mask(ZICBOZ) Var(riscv_zicmo_subext)
-
-Mask(ZICBOM) Var(riscv_zicmo_subext)
-
-Mask(ZICBOP) Var(riscv_zicmo_subext)
-
-Mask(ZIC64B) Var(riscv_zicmo_subext)
-
-TargetVariable
-int riscv_mop_subext
-
-Mask(ZIMOP) Var(riscv_mop_subext)
-
-Mask(ZCMOP) Var(riscv_mop_subext)
-
-TargetVariable
-int riscv_zf_subext
-
-Mask(ZFBFMIN)  Var(riscv_zf_subext)
-
-Mask(ZFHMIN)  Var(riscv_zf_subext)
-
-Mask(ZFH)     Var(riscv_zf_subext)
-
-Mask(ZVFBFMIN) Var(riscv_zf_subext)
-
-Mask(ZVFBFWMA) Var(riscv_zf_subext)
-
-Mask(ZVFHMIN) Var(riscv_zf_subext)
-
-Mask(ZVFH)    Var(riscv_zf_subext)
-
-TargetVariable
-int riscv_zfa_subext
-
-Mask(ZFA) Var(riscv_zfa_subext)
-
-TargetVariable
-int riscv_zm_subext
-
-Mask(ZMMUL) Var(riscv_zm_subext)
-
-TargetVariable
-int riscv_zc_subext
-
-Mask(ZCA)  Var(riscv_zc_subext)
-
-Mask(ZCB)  Var(riscv_zc_subext)
-
-Mask(ZCE)  Var(riscv_zc_subext)
-
-Mask(ZCF)  Var(riscv_zc_subext)
-
-Mask(ZCD)  Var(riscv_zc_subext)
-
-Mask(ZCMP) Var(riscv_zc_subext)
-
-Mask(ZCMT) Var(riscv_zc_subext)
-
-Mask(XCVBI) Var(riscv_xcv_subext)
-
-TargetVariable
-int riscv_sv_subext
-
-Mask(SVINVAL) Var(riscv_sv_subext)
-
-Mask(SVNAPOT) Var(riscv_sv_subext)
-
-TargetVariable
-int riscv_ztso_subext
-
-Mask(ZTSO) Var(riscv_ztso_subext)
-
-TargetVariable
-int riscv_xcv_subext
-
-Mask(XCVMAC) Var(riscv_xcv_subext)
-
-Mask(XCVALU) Var(riscv_xcv_subext)
-
-Mask(XCVELW) Var(riscv_xcv_subext)
-
-Mask(XCVSIMD) Var(riscv_xcv_subext)
-
-TargetVariable
-int riscv_xthead_subext
-
-Mask(XTHEADBA)      Var(riscv_xthead_subext)
-
-Mask(XTHEADBB)      Var(riscv_xthead_subext)
-
-Mask(XTHEADBS)      Var(riscv_xthead_subext)
-
-Mask(XTHEADCMO)     Var(riscv_xthead_subext)
-
-Mask(XTHEADCONDMOV) Var(riscv_xthead_subext)
-
-Mask(XTHEADFMEMIDX) Var(riscv_xthead_subext)
-
-Mask(XTHEADFMV)     Var(riscv_xthead_subext)
-
-Mask(XTHEADINT)     Var(riscv_xthead_subext)
-
-Mask(XTHEADMAC)     Var(riscv_xthead_subext)
-
-Mask(XTHEADMEMIDX)  Var(riscv_xthead_subext)
-
-Mask(XTHEADMEMPAIR) Var(riscv_xthead_subext)
-
-Mask(XTHEADSYNC)    Var(riscv_xthead_subext)
-
-Mask(XTHEADVECTOR)  Var(riscv_xthead_subext)
-
-TargetVariable
-int riscv_xventana_subext
-
-Mask(XVENTANACONDOPS) Var(riscv_xventana_subext)
-
-TargetVariable
-int riscv_sifive_subext
-
-Mask(XSFVCP) Var(riscv_sifive_subext)
-
-Mask(XSFCEASE) Var(riscv_sifive_subext)
+int riscv_fmv_priority = 0
 
 Enum
 Name(isa_spec_class) Type(enum riscv_isa_spec_class)
@@ -564,6 +282,18 @@ Inline strlen calls if possible.
 Target RejectNegative Joined UInteger Var(riscv_strcmp_inline_limit) Init(64)
 Max number of bytes to compare as part of inlined strcmp/strncmp routines (default: 64).
 
+-param=gpr2vr-cost=
+Target RejectNegative Joined UInteger Var(gpr2vr_cost) Init(GPR2VR_COST_UNPROVIDED)
+Set the cost value of the rvv instruction when operate from GPR to VR.
+
+-param=fpr2vr-cost=
+Target RejectNegative Joined UInteger Var(fpr2vr_cost) Init(FPR2VR_COST_UNPROVIDED)
+Set the cost value of the rvv instruction when operate from FPR to VR.
+
+-param=riscv-autovec-mode=
+Target Undocumented RejectNegative Joined Var(riscv_autovec_mode) Save
+Set the only autovec mode to try.
+
 Enum
 Name(rvv_max_lmul) Type(enum rvv_max_lmul_enum)
 The RVV possible LMUL (-mrvv-max-lmul=):
@@ -607,6 +337,10 @@ Enum(vsetvl_strategy) String(optim-no-fusion) Value(VSETVL_OPT_NO_FUSION)
 Target Undocumented RejectNegative Joined Enum(vsetvl_strategy) Var(vsetvl_strategy) Init(VSETVL_OPT)
 -param=vsetvl-strategy=<string>	Set the optimization level of VSETVL insert pass.
 
+-param=riscv-two-source-permutes
+Target Undocumented Uinteger Var(riscv_two_source_permutes) Init(0)
+-param=riscv-two-source-permutes Enable permutes with two source vectors.
+
 Enum
 Name(stringop_strategy) Type(enum stringop_strategy_enum)
 Valid arguments to -mstringop-strategy=:
@@ -658,3 +392,7 @@ Specify TLS dialect.
 mfence-tso
 Target Var(TARGET_FENCE_TSO) Init(1)
 Specifies whether the fence.tso instruction should be used.
+
+mautovec-segment
+Target Integer Var(riscv_mautovec_segment) Init(1)
+Enable (default) or disable generation of vector segment load/store instructions.
diff --git a/gcc/config/riscv/riscv.opt.urls b/gcc/config/riscv/riscv.opt.urls
index 622cb6e..fe88ec8 100644
--- a/gcc/config/riscv/riscv.opt.urls
+++ b/gcc/config/riscv/riscv.opt.urls
@@ -24,7 +24,7 @@ mdiv
 UrlSuffix(gcc/RISC-V-Options.html#index-mdiv-3)
 
 march=
-UrlSuffix(gcc/RISC-V-Options.html#index-march-14)
+UrlSuffix(gcc/RISC-V-Options.html#index-march-13)
 
 mtune=
 UrlSuffix(gcc/RISC-V-Options.html#index-mtune-12)
@@ -33,7 +33,7 @@ mcpu=
 UrlSuffix(gcc/RISC-V-Options.html#index-mcpu-8)
 
 msmall-data-limit=
-UrlSuffix(gcc/RISC-V-Options.html#index-msmall-data-limit-1)
+UrlSuffix(gcc/RISC-V-Options.html#index-msmall-data-limit)
 
 msave-restore
 UrlSuffix(gcc/RISC-V-Options.html#index-msave-restore)
diff --git a/gcc/config/riscv/riscv_bitmanip.h b/gcc/config/riscv/riscv_bitmanip.h
index c0cc86b..3990a48 100644
--- a/gcc/config/riscv/riscv_bitmanip.h
+++ b/gcc/config/riscv/riscv_bitmanip.h
@@ -1,5 +1,5 @@
 /* RISC-V  Bitmanip Extension intrinsics include file.
-   Copyright (C) 2024 Free Software Foundation, Inc.
+   Copyright (C) 2024-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/riscv/riscv_crypto.h b/gcc/config/riscv/riscv_crypto.h
index 1bfe3d7..06548ea 100644
--- a/gcc/config/riscv/riscv_crypto.h
+++ b/gcc/config/riscv/riscv_crypto.h
@@ -1,5 +1,5 @@
 /* RISC-V 'Scalar Crypto' Extension intrinsics include file.
-   Copyright (C) 2024 Free Software Foundation, Inc.
+   Copyright (C) 2024-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/riscv/riscv_th_vector.h b/gcc/config/riscv/riscv_th_vector.h
index 78ef853..640cd49 100644
--- a/gcc/config/riscv/riscv_th_vector.h
+++ b/gcc/config/riscv/riscv_th_vector.h
@@ -1,5 +1,5 @@
 /* RISC-V 'XTheadVector' Extension intrinsics include file.
-   Copyright (C) 2024 Free Software Foundation, Inc.
+   Copyright (C) 2024-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/riscv/riscv_vector.h b/gcc/config/riscv/riscv_vector.h
index aa0c3aa..7e63df8 100644
--- a/gcc/config/riscv/riscv_vector.h
+++ b/gcc/config/riscv/riscv_vector.h
@@ -1,5 +1,5 @@
 /* RISC-V 'V' Extension intrinsics include file.
-   Copyright (C) 2022-2024 Free Software Foundation, Inc.
+   Copyright (C) 2022-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/riscv/rtems.h b/gcc/config/riscv/rtems.h
index 30ba87d..b3ce25f 100644
--- a/gcc/config/riscv/rtems.h
+++ b/gcc/config/riscv/rtems.h
@@ -1,5 +1,5 @@
 /* Definitions for RISC-V RTEMS systems with ELF format.
-   Copyright (C) 2017-2024 Free Software Foundation, Inc.
+   Copyright (C) 2017-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/riscv/sfb.md b/gcc/config/riscv/sfb.md
index bfd229e..a77542b 100644
--- a/gcc/config/riscv/sfb.md
+++ b/gcc/config/riscv/sfb.md
@@ -1,5 +1,5 @@
 ;; Machine description for short forward branches(SFB).
-;; Copyright (C) 2023-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2023-2025 Free Software Foundation, Inc.
 
 ;; This file is part of GCC.
 
diff --git a/gcc/config/riscv/sifive-7.md b/gcc/config/riscv/sifive-7.md
index c208541..a04b80b 100644
--- a/gcc/config/riscv/sifive-7.md
+++ b/gcc/config/riscv/sifive-7.md
@@ -1,4 +1,4 @@
-(define_automaton "sifive_7")
+(define_automaton "sifive_7,sifive_7_vec,sifive_7_vec_mem")
 
 ;; Sifive 7 Series Base Core
 ;; This has two pipelines, A (Address) and B (Branch).
@@ -11,6 +11,14 @@
 
 (define_cpu_unit "sifive_7_idiv" "sifive_7")
 (define_cpu_unit "sifive_7_fpu" "sifive_7")
+;; Vector command queue
+(define_cpu_unit "sifive_7_vcq" "sifive_7")
+;; Vector arithmetic sequencer
+(define_cpu_unit "sifive_7_va" "sifive_7_vec")
+;; Vector store sequencer
+(define_cpu_unit "sifive_7_vs" "sifive_7_vec_mem")
+;; Vector load sequencer
+(define_cpu_unit "sifive_7_vl" "sifive_7_vec_mem")
 
 (define_insn_reservation "sifive_7_load" 3
   (and (eq_attr "tune" "sifive_7")
@@ -60,9 +68,14 @@
 (define_insn_reservation "sifive_7_alu" 2
   (and (eq_attr "tune" "sifive_7")
        (eq_attr "type" "unknown,arith,shift,slt,multi,logical,move,bitmanip,\
-			rotate,min,max,minu,maxu,clz,ctz,atomic,condmove,mvpair,zicond"))
+			min,max,minu,maxu,atomic,condmove,mvpair,zicond"))
   "sifive_7_A|sifive_7_B")
 
+(define_insn_reservation "sifive_7_alu_b" 2
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "clz,ctz,rotate"))
+  "sifive_7_B")
+
 (define_insn_reservation "sifive_7_load_immediate" 1
   (and (eq_attr "tune" "sifive_7")
        (eq_attr "type" "nop,const,auipc"))
@@ -91,6 +104,12 @@
        (eq_attr "type" "fcvt,fcvt_i2f,fcvt_f2i,fcmp,fmove"))
   "sifive_7_B")
 
+(define_insn_reservation "sifive_7_fdiv_h" 14
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "fdiv,fsqrt")
+       (eq_attr "mode" "HF"))
+  "sifive_7_B,sifive_7_fpu*13")
+
 (define_insn_reservation "sifive_7_fdiv_s" 27
   (and (eq_attr "tune" "sifive_7")
        (eq_attr "type" "fdiv,fsqrt")
@@ -119,6 +138,21 @@
        (eq_attr "type" "cpop,clmul"))
   "sifive_7_A")
 
+(define_insn_reservation "sifive_7_csr" 5
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "rdfrm,wrfrm,wrvxrm"))
+  "sifive_7_A")
+
+(define_insn_reservation "sifive_7_crypto" 10
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "crypto"))
+  "sifive_7_A")
+
+(define_insn_reservation "sifive_7_unknown" 10
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "ghost"))
+  "sifive_7_A")
+
 (define_bypass 1 "sifive_7_load,sifive_7_alu,sifive_7_mul,sifive_7_f2i,sifive_7_sfb_alu"
   "sifive_7_alu,sifive_7_branch")
 
@@ -129,13 +163,140 @@
   "sifive_7_store" "riscv_store_data_bypass_p")
 
 (define_bypass 2 "sifive_7_i2f"
-  "sifive_7_sfma,sifive_7_dfma,sifive_7_fp_other,sifive_7_fdiv_s,sifive_7_fdiv_d")
+  "sifive_7_sfma,sifive_7_dfma,sifive_7_fp_other,sifive_7_fdiv_h,sifive_7_fdiv_s,sifive_7_fdiv_d,sifive_7_hfma")
 
 (define_bypass 2 "sifive_7_fp_other"
-  "sifive_7_sfma,sifive_7_dfma,sifive_7_fp_other,sifive_7_fdiv_s,sifive_7_fdiv_d")
+  "sifive_7_sfma,sifive_7_dfma,sifive_7_fp_other,sifive_7_fdiv_h,sifive_7_fdiv_s,sifive_7_fdiv_d,sifive_7_hfma")
 
 (define_bypass 2 "sifive_7_fp_other"
   "sifive_7_alu,sifive_7_branch")
 
 (define_bypass 2 "sifive_7_fp_other"
   "sifive_7_store" "riscv_store_data_bypass_p")
+
+;; Vector pipeline
+;; The latency is depend on LMUL, but we didn't model that yet since we don't
+;; want to expand the rule too much unless we prove model that could get
+;; meaningful performance difference.
+
+(define_insn_reservation "sifive_7_vsetvl" 2
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "vsetvl_pre,vsetvl,rdvlenb,rdvl"))
+  "sifive_7_A")
+
+(define_insn_reservation "sifive_7_vec_load" 4
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "vlde,vldm,vlds,vldux,vldox,vldff,vldr,
+                        vlsegde,vlsegds,vlsegdux,vlsegdox,vlsegdff"))
+  "sifive_7_vcq,sifive_7_vl*3")
+
+(define_insn_reservation "sifive_7_vec_store" 4
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "vste,vstm,vsts,vstux,vstox,vstr,
+                        vssegte,vssegts,vssegtux,vssegtox"))
+  "sifive_7_vcq,sifive_7_vs*3")
+
+(define_insn_reservation "sifive_7_vec_ialu" 4
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "vimovxv,vmov,vimovvx,vialu,vicalu,vext,
+                        vshift,viminmax,vimerge,vbrev,vrev8,
+                        vimov,vext,vbrev8,vclz,vctz,vcpop,vrol,vror,vandn"))
+  "sifive_7_vcq,sifive_7_va*3")
+
+(define_insn_reservation "sifive_7_vec_slow_ialu" 8
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "vshift,vimul,vimuladd"))
+  "sifive_7_vcq,sifive_7_va*7")
+
+(define_insn_reservation "sifive_7_vec_cmp" 4
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "vicmp"))
+  "sifive_7_vcq,sifive_7_va*3")
+
+(define_insn_reservation "sifive_7_vec_iwalu" 8
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "viwalu,viwmul,viwmuladd,vnshift,vwsll"))
+  "sifive_7_vcq,sifive_7_va*7")
+
+(define_insn_reservation "sifive_7_vec_div" 16
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "vidiv,vfdiv"))
+  "sifive_7_vcq,sifive_7_va*15")
+
+(define_insn_reservation "sifive_7_vec_fixed_point" 8
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "vsalu,vaalu,vsmul,vsshift"))
+  "sifive_7_vcq,sifive_7_va*7")
+
+(define_insn_reservation "sifive_7_vec_narrow_fixed_point" 8
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "vnclip"))
+  "sifive_7_vcq,sifive_7_va*7")
+
+(define_insn_reservation "sifive_7_vec_fsimple" 4
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "vfmovvf,vfmovfv,vfclass"))
+  "sifive_7_vcq,sifive_7_va*3")
+
+(define_insn_reservation "sifive_7_vec_falu" 8
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "vfalu,vfmul,vfmuladd,vfrecp,
+                        vfcvtitof,vfcvtftoi,vfmerge,vfmov,vfsgnj"))
+  "sifive_7_vcq,sifive_7_va*7")
+
+(define_insn_reservation "sifive_7_vec_fcmp" 4
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "vfcmp,vfminmax"))
+  "sifive_7_vcq,sifive_7_va*3")
+
+(define_insn_reservation "sifive_7_vec_fsqrt_fdiv" 16
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "vfsqrt,vfdiv"))
+  "sifive_7_vcq,sifive_7_va*15")
+
+(define_insn_reservation "sifive_7_vec_fwalu" 8
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "vfwalu,vfwmul,vfwmuladd,vfwmaccbf16,vfwcvtitof,
+                        vfwcvtftoi,vfwcvtftof,vfwcvtbf16,
+                        vfncvtitof,vfncvtftoi,vfncvtftof,vfncvtbf16,
+                        sf_vfnrclip,sf_vqmacc"))
+  "sifive_7_vcq,sifive_7_va*7")
+
+(define_insn_reservation "sifive_7_vec_red" 12
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "vired,vfredu,vfredo,viwred,vfwredu,vfwredo"))
+  "sifive_7_vcq,sifive_7_va*11")
+
+(define_insn_reservation "sifive_7_vec_mask" 4
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "vmalu,vmpop,vmffs,vmsfs"))
+  "sifive_7_vcq,sifive_7_va*3")
+
+(define_insn_reservation "sifive_7_vec_mask_special" 4
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "vmiota,vmidx"))
+  "sifive_7_vcq,sifive_7_va*3")
+
+(define_insn_reservation "sifive_7_vec_gather" 8
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "vgather"))
+  "sifive_7_vcq,sifive_7_va*7")
+
+(define_insn_reservation "sifive_7_vec_compress" 16
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "vcompress"))
+  "sifive_7_vcq,sifive_7_va*15")
+
+(define_insn_reservation "sifive_7_vec_slide" 4
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "vslideup,vslidedown,vislide1up,vislide1down,vfslide1up,vfslide1down"))
+  "sifive_7_vcq,sifive_7_va*3")
+
+;; Assume that's slow if it's unknown instruction vector type.
+(define_insn_reservation "sifive_7_vec_unknown" 16
+  (and (eq_attr "tune" "sifive_7")
+       (eq_attr "type" "vector,vclmul,vclmulh,vghsh,vgmul,
+                        vaesef,vaesem,vaesdf,vaesdm,vaeskf1,vaeskf2,
+                        vaesz,vsha2ms,vsha2ch,vsha2cl,
+                        vsm4k,vsm4r,vsm3me,vsm3c,sf_vc,sf_vc_se"))
+  "sifive_7_vcq,sifive_7_va*15")
diff --git a/gcc/config/riscv/sifive-vector-builtins-bases.cc b/gcc/config/riscv/sifive-vector-builtins-bases.cc
new file mode 100644
index 0000000..be530ca
--- /dev/null
+++ b/gcc/config/riscv/sifive-vector-builtins-bases.cc
@@ -0,0 +1,294 @@
+/* function_base implementation for SiFive custom 'V' Extension for GNU compiler.
+   Copyright (C) 2024-2025 Free Software Foundation, Inc.
+   Contributed by SiFive and PLCT Lab.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "tm.h"
+#include "tree.h"
+#include "rtl.h"
+#include "tm_p.h"
+#include "memmodel.h"
+#include "insn-codes.h"
+#include "optabs.h"
+#include "recog.h"
+#include "expr.h"
+#include "basic-block.h"
+#include "function.h"
+#include "fold-const.h"
+#include "gimple.h"
+#include "gimple-iterator.h"
+#include "gimplify.h"
+#include "explow.h"
+#include "emit-rtl.h"
+#include "tree-vector-builder.h"
+#include "rtx-vector-builder.h"
+#include "riscv-vector-builtins.h"
+#include "riscv-vector-builtins-shapes.h"
+#include "sifive-vector-builtins-bases.h"
+#include "riscv-vector-builtins-bases.h"
+
+using namespace riscv_vector;
+
+namespace riscv_vector {
+
+/* Implements SiFive vqmacc.  */
+class sf_vqmacc : public function_base
+{
+public:
+  bool has_merge_operand_p () const override { return false; }
+  bool apply_mask_policy_p () const override { return false; }
+  bool use_mask_predication_p () const override { return false; }
+  bool can_be_overloaded_p (enum predication_type_index pred) const override
+  {
+    return pred == PRED_TYPE_tu;
+  }
+
+  rtx expand (function_expander &e) const override
+  {
+    if (e.op_info->op == OP_TYPE_4x8x4)
+      return e.use_widen_ternop_insn (
+	code_for_pred_matrix_mul_plus_qoq (SIGN_EXTEND, e.vector_mode ()));
+    if (e.op_info->op == OP_TYPE_2x8x2)
+      return e.use_widen_ternop_insn (
+	code_for_pred_matrix_mul_plus_dod (SIGN_EXTEND, e.vector_mode ()));
+    gcc_unreachable ();
+  }
+};
+
+/* Implements SiFive vqmaccu.  */
+class sf_vqmaccu : public function_base
+{
+public:
+  bool has_merge_operand_p () const override { return false; }
+  bool apply_mask_policy_p () const override { return false; }
+  bool use_mask_predication_p () const override { return false; }
+
+  bool can_be_overloaded_p (enum predication_type_index pred) const override
+  {
+    return pred == PRED_TYPE_tu;
+  }
+
+  rtx expand (function_expander &e) const override
+  {
+    if (e.op_info->op == OP_TYPE_4x8x4)
+      return e.use_widen_ternop_insn (
+	code_for_pred_matrix_mul_plus_qoq (ZERO_EXTEND, e.vector_mode ()));
+    if (e.op_info->op == OP_TYPE_2x8x2)
+      return e.use_widen_ternop_insn (
+	code_for_pred_matrix_mul_plus_dod (ZERO_EXTEND, e.vector_mode ()));
+    gcc_unreachable ();
+  }
+};
+
+/* Implements SiFive vqmaccsu.  */
+class sf_vqmaccsu : public function_base
+{
+public:
+  bool has_merge_operand_p () const override { return false; }
+  bool apply_mask_policy_p () const override { return false; }
+  bool use_mask_predication_p () const override { return false; }
+
+  bool can_be_overloaded_p (enum predication_type_index pred) const override
+  {
+    return pred == PRED_TYPE_tu;
+  }
+
+  rtx expand (function_expander &e) const override
+  {
+    if (e.op_info->op == OP_TYPE_4x8x4)
+      return e.use_widen_ternop_insn (
+	code_for_pred_matrix_mul_plussu_qoq (e.vector_mode ()));
+    if (e.op_info->op == OP_TYPE_2x8x2)
+      return e.use_widen_ternop_insn (
+	code_for_pred_matrix_mul_plussu_dod (e.vector_mode ()));
+    gcc_unreachable ();
+  }
+};
+
+/* Implements SiFive vqmaccus.  */
+class sf_vqmaccus : public function_base
+{
+public:
+  bool has_merge_operand_p () const override { return false; }
+  bool apply_mask_policy_p () const override { return false; }
+  bool use_mask_predication_p () const override { return false; }
+
+  bool can_be_overloaded_p (enum predication_type_index pred) const override
+  {
+    return pred == PRED_TYPE_tu;
+  }
+
+  rtx expand (function_expander &e) const override
+  {
+    if (e.op_info->op == OP_TYPE_4x8x4)
+      return e.use_widen_ternop_insn (
+	code_for_pred_matrix_mul_plusus_qoq (e.vector_mode ()));
+    if (e.op_info->op == OP_TYPE_2x8x2)
+      return e.use_widen_ternop_insn (
+	code_for_pred_matrix_mul_plusus_dod (e.vector_mode ()));
+    gcc_unreachable ();
+  }
+};
+
+/* Implements SiFive vfnrclip.  */
+template <int UNSPEC, enum frm_op_type FRM_OP = NO_FRM>
+class sf_vfnrclip_x_f_qf : public function_base
+{
+public:
+  bool has_rounding_mode_operand_p () const override
+  {
+    return FRM_OP == HAS_FRM;
+  }
+
+  bool may_require_frm_p () const override { return true; }
+
+  bool can_be_overloaded_p (enum predication_type_index pred) const override
+  {
+    return pred != PRED_TYPE_none;
+  }
+
+  rtx expand (function_expander &e) const override
+  {
+    return e.use_exact_insn (
+      code_for_pred_sf_vfnrclip_x_f_qf (UNSPEC, e.vector_mode ()));
+  }
+};
+
+template <int UNSPEC, enum frm_op_type FRM_OP = NO_FRM>
+class sf_vfnrclip_xu_f_qf : public function_base
+{
+public:
+  bool has_rounding_mode_operand_p () const override
+  {
+    return FRM_OP == HAS_FRM;
+  }
+
+  bool may_require_frm_p () const override { return true; }
+
+  bool can_be_overloaded_p (enum predication_type_index pred) const override
+  {
+    return pred != PRED_TYPE_none;
+  }
+
+  rtx expand (function_expander &e) const override
+  {
+    return e.use_exact_insn (
+      code_for_pred_sf_vfnrclip_x_f_qf (UNSPEC, e.vector_mode ()));
+  }
+};
+
+/* Implements SiFive sf.vc.  */
+class sf_vc : public function_base
+{
+public:
+
+  unsigned int call_properties (const function_instance &) const override
+  {
+    return CP_USE_COPROCESSORS;
+  }
+
+  rtx expand (function_expander &e) const override
+  {
+    switch (e.op_info->op)
+      {
+      case OP_TYPE_x:
+	return e.use_exact_insn (code_for_sf_vc_x_se (e.vector_mode ()));
+      case OP_TYPE_i:
+	return e.use_exact_insn (code_for_sf_vc_i_se (e.vector_mode ()));
+      case OP_TYPE_vv:
+	return e.use_exact_insn (code_for_sf_vc_vv_se (e.vector_mode ()));
+      case OP_TYPE_xv:
+	return e.use_exact_insn (code_for_sf_vc_xv_se (e.vector_mode ()));
+      case OP_TYPE_iv:
+	return e.use_exact_insn (code_for_sf_vc_iv_se (e.vector_mode ()));
+      case OP_TYPE_fv:
+	return e.use_exact_insn (code_for_sf_vc_fv_se (e.vector_mode ()));
+      case OP_TYPE_v_x:
+	return e.use_exact_insn (code_for_sf_vc_v_x_se (e.vector_mode ()));
+      case OP_TYPE_v_i:
+	return e.use_exact_insn (code_for_sf_vc_v_i_se (e.vector_mode ()));
+      case OP_TYPE_v_vv:
+	return e.use_exact_insn (code_for_sf_vc_v_vv_se (e.vector_mode ()));
+      case OP_TYPE_v_xv:
+	return e.use_exact_insn (code_for_sf_vc_v_xv_se (e.vector_mode ()));
+      case OP_TYPE_v_iv:
+	return e.use_exact_insn (code_for_sf_vc_v_iv_se (e.vector_mode ()));
+      case OP_TYPE_v_fv:
+	return e.use_exact_insn (code_for_sf_vc_v_fv_se (e.vector_mode ()));
+      case OP_TYPE_vvv:
+	return e.use_exact_insn (code_for_sf_vc_vvv_se (e.vector_mode ()));
+      case OP_TYPE_xvv:
+	return e.use_exact_insn (code_for_sf_vc_xvv_se (e.vector_mode ()));
+      case OP_TYPE_ivv:
+	return e.use_exact_insn (code_for_sf_vc_ivv_se (e.vector_mode ()));
+      case OP_TYPE_fvv:
+	return e.use_exact_insn (code_for_sf_vc_fvv_se (e.vector_mode ()));
+      case OP_TYPE_vvw:
+	return e.use_exact_insn (code_for_sf_vc_vvw_se (e.vector_mode ()));
+      case OP_TYPE_xvw:
+	return e.use_exact_insn (code_for_sf_vc_xvw_se (e.vector_mode ()));
+      case OP_TYPE_ivw:
+	return e.use_exact_insn (code_for_sf_vc_ivw_se (e.vector_mode ()));
+      case OP_TYPE_fvw:
+	return e.use_exact_insn (code_for_sf_vc_fvw_se (e.vector_mode ()));
+      case OP_TYPE_v_vvv:
+	return e.use_exact_insn (code_for_sf_vc_v_vvv_se (e.vector_mode ()));
+      case OP_TYPE_v_xvv:
+	return e.use_exact_insn (code_for_sf_vc_v_xvv_se (e.vector_mode ()));
+      case OP_TYPE_v_ivv:
+	return e.use_exact_insn (code_for_sf_vc_v_ivv_se (e.vector_mode ()));
+      case OP_TYPE_v_fvv:
+	return e.use_exact_insn (code_for_sf_vc_v_fvv_se (e.vector_mode ()));
+      case OP_TYPE_v_vvw:
+	return e.use_exact_insn (code_for_sf_vc_v_vvw_se (e.vector_mode ()));
+      case OP_TYPE_v_xvw:
+	return e.use_exact_insn (code_for_sf_vc_v_xvw_se (e.vector_mode ()));
+      case OP_TYPE_v_ivw:
+	return e.use_exact_insn (code_for_sf_vc_v_ivw_se (e.vector_mode ()));
+      case OP_TYPE_v_fvw:
+	return e.use_exact_insn (code_for_sf_vc_v_fvw_se (e.vector_mode ()));
+      default:
+	gcc_unreachable ();
+      }
+  }
+};
+
+static CONSTEXPR const sf_vqmacc sf_vqmacc_obj;
+static CONSTEXPR const sf_vqmaccu sf_vqmaccu_obj;
+static CONSTEXPR const sf_vqmaccsu sf_vqmaccsu_obj;
+static CONSTEXPR const sf_vqmaccus sf_vqmaccus_obj;
+static CONSTEXPR const sf_vfnrclip_x_f_qf<UNSPEC_SF_VFNRCLIP> sf_vfnrclip_x_f_qf_obj;
+static CONSTEXPR const sf_vfnrclip_xu_f_qf<UNSPEC_SF_VFNRCLIPU> sf_vfnrclip_xu_f_qf_obj;
+static CONSTEXPR const sf_vc sf_vc_obj;
+
+/* Declare the function base NAME, pointing it to an instance
+   of class <NAME>_obj.  */
+#define BASE(NAME) \
+  namespace bases { const function_base *const NAME = &NAME##_obj; }
+
+BASE (sf_vqmacc)
+BASE (sf_vqmaccu)
+BASE (sf_vqmaccsu)
+BASE (sf_vqmaccus)
+BASE (sf_vfnrclip_x_f_qf)
+BASE (sf_vfnrclip_xu_f_qf)
+BASE (sf_vc)
+} // end namespace riscv_vector
diff --git a/gcc/config/riscv/sifive-vector-builtins-bases.h b/gcc/config/riscv/sifive-vector-builtins-bases.h
new file mode 100644
index 0000000..4ec1e30
--- /dev/null
+++ b/gcc/config/riscv/sifive-vector-builtins-bases.h
@@ -0,0 +1,40 @@
+/* function_base declaration for SiFive custom 'V' Extension for GNU compiler.
+   Copyright (C) 2024-2025 Free Software Foundation, Inc.
+   Contributed by SiFive and PLCT Lab.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3, or (at your option)
+   any later version.
+
+   GCC is distributed in the hope that it will be useful, but
+   WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+   General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with GCC; see the file COPYING3.  If not see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef GCC_SIFIVE_VECTOR_BUILTINS_BASES_H
+#define GCC_SIFIVE_VECTOR_BUILTINS_BASES_H
+
+namespace riscv_vector {
+
+static const unsigned int CP_USE_COPROCESSORS = 1U << 6;
+
+namespace bases {
+extern const function_base *const sf_vqmacc;
+extern const function_base *const sf_vqmaccu;
+extern const function_base *const sf_vqmaccsu;
+extern const function_base *const sf_vqmaccus;
+extern const function_base *const sf_vfnrclip_x_f_qf;
+extern const function_base *const sf_vfnrclip_xu_f_qf;
+extern const function_base *const sf_vc;
+}
+
+} // end namespace riscv_vector
+
+#endif
diff --git a/gcc/config/riscv/sifive-vector-builtins-functions.def b/gcc/config/riscv/sifive-vector-builtins-functions.def
new file mode 100644
index 0000000..f6703ae
--- /dev/null
+++ b/gcc/config/riscv/sifive-vector-builtins-functions.def
@@ -0,0 +1,103 @@
+/* Intrinsic define macros for SiFive custom 'V' Extension for GNU compiler.
+   Copyright (C) 2024-2025 Free Software Foundation, Inc.
+   Contributed by SiFive and PLCT Lab.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+/* Use "DEF_RVV_FUNCTION" macro to define RVV intrinsic functions.
+
+     - NAME not only describes the base_name of the functions
+       but also point to the name of the function_base class.
+
+     - SHAPE point to the function_shape class.
+
+     - PREDS describes the predication types that are supported in the
+       functions.
+
+     - OPS_INFO describes all information of return type and each
+       argument type.
+
+*/
+#ifndef DEF_RVV_FUNCTION
+#define DEF_RVV_FUNCTION(NAME, SHAPE, PREDS, OPS_INFO)
+#endif
+
+#define REQUIRED_EXTENSIONS XSFVQMACCQOQ_EXT
+DEF_RVV_FUNCTION (sf_vqmacc, sf_vqmacc, none_tu_preds, i_qqvv_ops)
+DEF_RVV_FUNCTION (sf_vqmaccu, sf_vqmacc, none_tu_preds, u_qqvv_ops)
+DEF_RVV_FUNCTION (sf_vqmaccsu, sf_vqmacc, none_tu_preds, i_su_qqvv_ops)
+DEF_RVV_FUNCTION (sf_vqmaccus, sf_vqmacc, none_tu_preds, i_us_qqvv_ops)
+#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS XSFVQMACCDOD_EXT
+DEF_RVV_FUNCTION (sf_vqmacc, sf_vqmacc, none_tu_preds, i_qdvv_ops)
+DEF_RVV_FUNCTION (sf_vqmaccu, sf_vqmacc, none_tu_preds, u_qdvv_ops)
+DEF_RVV_FUNCTION (sf_vqmaccsu, sf_vqmacc, none_tu_preds, i_su_qdvv_ops)
+DEF_RVV_FUNCTION (sf_vqmaccus, sf_vqmacc, none_tu_preds, i_us_qdvv_ops)
+#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS XSFVFNRCLIPXFQF_EXT
+DEF_RVV_FUNCTION (sf_vfnrclip_x_f_qf, sf_vfnrclip, full_preds, i_clip_qf_ops)
+DEF_RVV_FUNCTION (sf_vfnrclip_xu_f_qf, sf_vfnrclip, full_preds, u_clip_qf_ops)
+#undef REQUIRED_EXTENSIONS
+
+#define REQUIRED_EXTENSIONS XSFVCP_EXT
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_x_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_i_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_vv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_xv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_iv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_fv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_vvv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_xvv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_ivv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_fvv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_vvw_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_xvw_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_ivw_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_fvw_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_x_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_i_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_vv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_xv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_iv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_fv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_vvv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_xvv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_ivv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_fvv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_vvw_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_xvw_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_ivw_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix_se, none_preds, sf_vc_v_fvw_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_x_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_i_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_vv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_xv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_iv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_fv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_vvv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_xvv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_ivv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_fvv_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_vvw_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_xvw_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_ivw_ops)
+DEF_RVV_FUNCTION (sf_vc, sf_vcix, none_preds, sf_vc_v_fvw_ops)
+#undef REQUIRED_EXTENSIONS
+
+#undef DEF_RVV_FUNCTION
diff --git a/gcc/config/riscv/sifive-vector.md b/gcc/config/riscv/sifive-vector.md
new file mode 100644
index 0000000..a416634
--- /dev/null
+++ b/gcc/config/riscv/sifive-vector.md
@@ -0,0 +1,1055 @@
+;; Machine description for RISC-V for GNU compiler.
+;; Copyright (C) 2024-2025 Free Software Foundation, Inc.
+;; Contributed by SiFive and PLCT Lab.
+;; Based on RISC-V target for GNU compiler.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+;; GNU General Public License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_insn "@pred_matrix_mul_plus<u><mode>_qoq"
+  [(set (match_operand:SF_VSI 0 "register_operand"                    "=&vr")
+	(if_then_else:SF_VSI
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "vmWc1")
+	     (match_operand 5 "vector_length_operand"                "   rK")
+	     (match_operand 6 "const_int_operand"                    "    i")
+	     (match_operand 7 "const_int_operand"                    "    i")
+	     (match_operand 8 "const_int_operand"                    "    i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (plus:SF_VSI
+	    (mult:SF_VSI
+	      (any_extend:SF_VSI
+	        (match_operand:RVVM1QI 3 "register_operand" "   vr"))
+	      (any_extend:SF_VSI
+	        (match_operand:<SF_VQMACC_QOQ> 4 "register_operand" "   vr")))
+	    (match_operand:SF_VSI 2 "register_operand"              "    0"))
+	  (match_dup 2)))]
+  "TARGET_VECTOR && TARGET_XSFVQMACCQOQ"
+  "sf.vqmacc<u>.4x8x4\t%0,%3,%4"
+  [(set_attr "type" "sf_vqmacc")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@pred_matrix_mul_plussu<mode>_qoq"
+  [(set (match_operand:SF_VSI 0 "register_operand"                    "=&vr")
+	(if_then_else:SF_VSI
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "vmWc1")
+	     (match_operand 5 "vector_length_operand"                "   rK")
+	     (match_operand 6 "const_int_operand"                    "    i")
+	     (match_operand 7 "const_int_operand"                    "    i")
+	     (match_operand 8 "const_int_operand"                    "    i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (plus:SF_VSI
+	    (mult:SF_VSI
+	      (sign_extend:SF_VSI
+	        (match_operand:RVVM1QI 3 "register_operand" "   vr"))
+	      (zero_extend:SF_VSI
+	        (match_operand:<SF_VQMACC_QOQ> 4 "register_operand" "   vr")))
+	    (match_operand:SF_VSI 2 "register_operand"              "    0"))
+	  (match_dup 2)))]
+  "TARGET_VECTOR && TARGET_XSFVQMACCQOQ"
+  "sf.vqmaccsu.4x8x4\t%0,%3,%4"
+  [(set_attr "type" "sf_vqmacc")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@pred_matrix_mul_plusus<mode>_qoq"
+  [(set (match_operand:SF_VSI 0 "register_operand"                    "=&vr")
+	(if_then_else:SF_VSI
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "vmWc1")
+	     (match_operand 5 "vector_length_operand"                "   rK")
+	     (match_operand 6 "const_int_operand"                    "    i")
+	     (match_operand 7 "const_int_operand"                    "    i")
+	     (match_operand 8 "const_int_operand"                    "    i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (plus:SF_VSI
+	    (mult:SF_VSI
+	      (zero_extend:SF_VSI
+	        (match_operand:RVVM1QI 3 "register_operand" "   vr"))
+	      (sign_extend:SF_VSI
+	        (match_operand:<SF_VQMACC_QOQ> 4 "register_operand" "   vr")))
+	    (match_operand:SF_VSI 2 "register_operand"              "    0"))
+	  (match_dup 2)))]
+  "TARGET_VECTOR && TARGET_XSFVQMACCQOQ"
+  "sf.vqmaccus.4x8x4\t%0,%3,%4"
+  [(set_attr "type" "sf_vqmacc")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@pred_matrix_mul_plus<u><mode>_dod"
+  [(set (match_operand:SF_VSI 0 "register_operand"                    "=&vr")
+	(if_then_else:SF_VSI
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "vmWc1")
+	     (match_operand 5 "vector_length_operand"                "   rK")
+	     (match_operand 6 "const_int_operand"                    "    i")
+	     (match_operand 7 "const_int_operand"                    "    i")
+	     (match_operand 8 "const_int_operand"                    "    i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (plus:SF_VSI
+	    (mult:SF_VSI
+	      (any_extend:SF_VSI
+	        (match_operand:RVVM1QI 3 "register_operand" "   vr"))
+	      (any_extend:SF_VSI
+	        (match_operand:<SF_VQMACC_DOD> 4 "register_operand" "   vr")))
+	    (match_operand:SF_VSI 2 "register_operand"              "    0"))
+	  (match_dup 2)))]
+  "TARGET_VECTOR && TARGET_XSFVQMACCDOD"
+  "sf.vqmacc<u>.2x8x2\t%0,%3,%4"
+  [(set_attr "type" "sf_vqmacc")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@pred_matrix_mul_plussu<mode>_dod"
+  [(set (match_operand:SF_VSI 0 "register_operand"                    "=&vr")
+	(if_then_else:SF_VSI
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "vmWc1")
+	     (match_operand 5 "vector_length_operand"                "   rK")
+	     (match_operand 6 "const_int_operand"                    "    i")
+	     (match_operand 7 "const_int_operand"                    "    i")
+	     (match_operand 8 "const_int_operand"                    "    i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (plus:SF_VSI
+	    (mult:SF_VSI
+	      (sign_extend:SF_VSI
+	        (match_operand:RVVM1QI 3 "register_operand" "   vr"))
+	      (zero_extend:SF_VSI
+	        (match_operand:<SF_VQMACC_DOD> 4 "register_operand" "   vr")))
+	    (match_operand:SF_VSI 2 "register_operand"              "    0"))
+	  (match_dup 2)))]
+  "TARGET_VECTOR && TARGET_XSFVQMACCDOD"
+  "sf.vqmaccsu.2x8x2\t%0,%3,%4"
+  [(set_attr "type" "sf_vqmacc")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@pred_matrix_mul_plusus<mode>_dod"
+  [(set (match_operand:SF_VSI 0 "register_operand"                    "=&vr")
+	(if_then_else:SF_VSI
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "vmWc1")
+	     (match_operand 5 "vector_length_operand"                "   rK")
+	     (match_operand 6 "const_int_operand"                    "    i")
+	     (match_operand 7 "const_int_operand"                    "    i")
+	     (match_operand 8 "const_int_operand"                    "    i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (plus:SF_VSI
+	    (mult:SF_VSI
+	      (zero_extend:SF_VSI
+	        (match_operand:RVVM1QI 3 "register_operand" "   vr"))
+	      (sign_extend:SF_VSI
+	        (match_operand:<SF_VQMACC_DOD> 4 "register_operand" "   vr")))
+	    (match_operand:SF_VSI 2 "register_operand"              "    0"))
+	  (match_dup 2)))]
+  "TARGET_VECTOR && TARGET_XSFVQMACCDOD"
+  "sf.vqmaccus.2x8x2\t%0,%3,%4"
+  [(set_attr "type" "sf_vqmacc")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@pred_sf_vfnrclip<v_su><mode>_x_f_qf"
+  [(set (match_operand:SF_XF 0 "register_operand"        "=vd, vd, vr, vr")
+	(if_then_else:SF_XF
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"     " vm, vm,Wc1,Wc1")
+	     (match_operand 5 "vector_length_operand"        " rK, rK, rK, rK")
+	     (match_operand 6 "const_int_operand"            "  i,  i,  i,  i")
+	     (match_operand 7 "const_int_operand"            "  i,  i,  i,  i")
+	     (match_operand 8 "const_int_operand"            "  i,  i,  i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec:SF_XF
+	    [(match_operand:SF 4 "register_operand"          "  f,  f,  f,  f")
+	     (match_operand:<SF_XFQF> 3 "register_operand"       " vr, vr, vr, vr")] SF_VFNRCLIP)
+	  (match_operand:SF_XF 2 "vector_merge_operand"  " vu,  0, vu,  0")))]
+  "TARGET_VECTOR && TARGET_XSFVFNRCLIPXFQF"
+  "sf.vfnrclip.x<v_su>.f.qf\t%0,%3,%4%p1"
+  [(set_attr "type" "sf_vfnrclip")
+   (set_attr "mode" "<MODE>")])
+
+;; SF_VCP
+(define_insn "@sf_vc_x_se<mode>"
+  [(unspec:<VM>
+	    [(match_operand:<VM> 0 "vector_mask_operand"             "  Wc1")
+	     (match_operand 5 "vector_length_operand"                "   rK")
+	     (match_operand 6 "const_int_operand"                    "    i")
+	     (match_operand 7 "const_int_operand"                    "    i")
+	     (match_operand 8 "const_int_operand"                    "    i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:VFULLI
+	    [(match_operand:SI 1 "const_int_operand" "Ou02")
+	     (match_operand:SI 2 "const_int_operand" "K")
+	     (match_operand:SI 3 "const_int_operand" "K")
+	     (match_operand:<VEL> 4 "register_operand" "r")] UNSPECV_SF_CV)]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.x\t%1,%2,%3,%4"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_x_se<mode>"
+  [(set (match_operand:VFULLI 0 "register_operand" "=vr,vr")
+	(if_then_else:VFULLI
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 6 "vector_length_operand"                "   rK, rK")
+	     (match_operand 7 "const_int_operand"                    "    i,  i")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:VFULLI
+	    [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+	     (match_operand:SI 4 "const_int_operand" "K,K")
+	     (match_operand:<VEL> 5 "register_operand" "r,r")] UNSPECV_SF_CV)
+       (match_operand:VFULLI 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.x\t%3,%4,%0,%5"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_x<mode>"
+  [(set (match_operand:VFULLI 0 "register_operand" "=vr,vr")
+	(if_then_else:VFULLI
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 6 "vector_length_operand"                "   rK, rK")
+	     (match_operand 7 "const_int_operand"                    "    i,  i")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec:VFULLI
+	    [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+	     (match_operand:SI 4 "const_int_operand" "K,K")
+	     (match_operand:<VEL> 5 "register_operand" "r,r")] UNSPEC_SF_CV)
+       (match_operand:VFULLI 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.x\t%3,%4,%0,%5"
+  [(set_attr "type" "sf_vc")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_i_se<mode>"
+  [(unspec:<VM>
+	    [(match_operand:<VM> 0 "vector_mask_operand"             "  Wc1")
+	     (match_operand 5 "vector_length_operand"                "   rK")
+	     (match_operand 6 "const_int_operand"                    "    i")
+	     (match_operand 7 "const_int_operand"                    "    i")
+	     (match_operand 8 "const_int_operand"                    "    i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:VFULLI
+	    [(match_operand:SI 1 "const_int_operand" "Ou02")
+	     (match_operand:SI 2 "const_int_operand" "K")
+	     (match_operand:SI 3 "const_int_operand" "K")
+	     (match_operand:SI 4 "const_int_operand" "P")] UNSPECV_SF_CV)]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.i\t%1,%2,%3,%4"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_i_se<mode>"
+  [(set (match_operand:VFULLI 0 "register_operand" "=vr,vr")
+	(if_then_else:VFULLI
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 6 "vector_length_operand"                "   rK, rK")
+	     (match_operand 7 "const_int_operand"                    "    i,  i")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:VFULLI
+	    [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+	     (match_operand:SI 4 "const_int_operand" "K,K")
+	     (match_operand:SI 5 "const_int_operand" "P,P")] UNSPECV_SF_CV)
+       (match_operand:VFULLI 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.i\t%3,%4,%0,%5"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_i<mode>"
+  [(set (match_operand:VFULLI 0 "register_operand" "=vr,vr")
+	(if_then_else:VFULLI
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 6 "vector_length_operand"                "   rK, rK")
+	     (match_operand 7 "const_int_operand"                    "    i,  i")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec:VFULLI
+	    [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+	     (match_operand:SI 4 "const_int_operand" "K,K")
+	     (match_operand:SI 5 "const_int_operand" "P,P")] UNSPEC_SF_CV)
+       (match_operand:VFULLI 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.i\t%3,%4,%0,%5"
+  [(set_attr "type" "sf_vc")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_vv_se<mode>"
+  [(unspec:<VM>
+	    [(match_operand:<VM> 0 "vector_mask_operand"             "  Wc1")
+	     (match_operand 5 "vector_length_operand"                "   rK")
+	     (match_operand 6 "const_int_operand"                    "    i")
+	     (match_operand 7 "const_int_operand"                    "    i")
+	     (match_operand 8 "const_int_operand"                    "    i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:VFULLI
+	    [(match_operand:SI 1 "const_int_operand" "Ou02")
+	     (match_operand:SI 2 "const_int_operand" "K")
+	     (match_operand:VFULLI 3 "register_operand" "vr")
+	     (match_operand:VFULLI 4 "register_operand" "vr")] UNSPECV_SF_CV)]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.vv\t%1,%2,%3,%4"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_vv_se<mode>"
+  [(set (match_operand:VFULLI 0 "register_operand" "=&vr,vr")
+	(if_then_else:VFULLI
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 6 "vector_length_operand"                "   rK, rK")
+	     (match_operand 7 "const_int_operand"                    "    i,  i")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:VFULLI
+	    [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+	     (match_operand:VFULLI 4 "register_operand" "vr,vr")
+	     (match_operand:VFULLI 5 "register_operand" "vr,vr")] UNSPECV_SF_CV)
+       (match_operand:VFULLI 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.vv\t%3,%0,%4,%5"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_vv<mode>"
+  [(set (match_operand:VFULLI 0 "register_operand" "=&vr,vr")
+	(if_then_else:VFULLI
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 6 "vector_length_operand"                "   rK, rK")
+	     (match_operand 7 "const_int_operand"                    "    i,  i")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec:VFULLI
+	    [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+	     (match_operand:VFULLI 4 "register_operand" "vr,vr")
+	     (match_operand:VFULLI 5 "register_operand" "vr,vr")] UNSPEC_SF_CV)
+       (match_operand:VFULLI 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.vv\t%3,%0,%4,%5"
+  [(set_attr "type" "sf_vc")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_xv_se<mode>"
+  [(unspec:<VM>
+	    [(match_operand:<VM> 0 "vector_mask_operand"             "  Wc1")
+	     (match_operand 5 "vector_length_operand"                "   rK")
+	     (match_operand 6 "const_int_operand"                    "    i")
+	     (match_operand 7 "const_int_operand"                    "    i")
+	     (match_operand 8 "const_int_operand"                    "    i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:VFULLI
+	    [(match_operand:SI 1 "const_int_operand" "Ou02")
+	     (match_operand:SI 2 "const_int_operand" "K")
+	     (match_operand:VFULLI 3 "register_operand" "vr")
+	     (match_operand:<VEL> 4 "register_operand" "r")] UNSPECV_SF_CV)]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.xv\t%1,%2,%3,%4"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_xv_se<mode>"
+  [(set (match_operand:VFULLI 0 "register_operand" "=&vd,vd")
+	(if_then_else:VFULLI
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 6 "vector_length_operand"                "   rK, rK")
+	     (match_operand 7 "const_int_operand"                    "    i,  i")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:VFULLI
+	    [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+	     (match_operand:VFULLI 4 "register_operand" "vr,vr")
+	     (match_operand:<VEL> 5 "register_operand" "r,r")] UNSPECV_SF_CV)
+       (match_operand:VFULLI 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.xv\t%3,%0,%4,%5"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_xv<mode>"
+  [(set (match_operand:VFULLI 0 "register_operand" "=&vd,vd")
+	(if_then_else:VFULLI
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 6 "vector_length_operand"                "   rK, rK")
+	     (match_operand 7 "const_int_operand"                    "    i,  i")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec:VFULLI
+	    [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+	     (match_operand:VFULLI 4 "register_operand" "vr,vr")
+	     (match_operand:<VEL> 5 "register_operand" "r,r")] UNSPEC_SF_CV)
+       (match_operand:VFULLI 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.xv\t%3,%0,%4,%5"
+  [(set_attr "type" "sf_vc")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_iv_se<mode>"
+  [(unspec:<VM>
+	    [(match_operand:<VM> 0 "vector_mask_operand"             "  Wc1")
+	     (match_operand 5 "vector_length_operand"                "   rK")
+	     (match_operand 6 "const_int_operand"                    "    i")
+	     (match_operand 7 "const_int_operand"                    "    i")
+	     (match_operand 8 "const_int_operand"                    "    i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:VFULLI
+	    [(match_operand:SI 1 "const_int_operand" "Ou02")
+	     (match_operand:SI 2 "const_int_operand" "K")
+	     (match_operand:VFULLI 3 "register_operand" "vr")
+	     (match_operand:SI 4 "const_int_operand" "P")] UNSPECV_SF_CV)]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.iv\t%1,%2,%3,%4"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_iv_se<mode>"
+  [(set (match_operand:VFULLI 0 "register_operand" "=&vd,vd")
+	(if_then_else:VFULLI
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 6 "vector_length_operand"                "   rK, rK")
+	     (match_operand 7 "const_int_operand"                    "    i,  i")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:VFULLI
+	    [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+	     (match_operand:VFULLI 4 "register_operand" "vr,vr")
+	     (match_operand:SI 5 "const_int_operand" "P,P")] UNSPECV_SF_CV)
+       (match_operand:VFULLI 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.iv\t%3,%0,%4,%5"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_iv<mode>"
+  [(set (match_operand:VFULLI 0 "register_operand" "=&vd,vd")
+	(if_then_else:VFULLI
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 6 "vector_length_operand"                "   rK, rK")
+	     (match_operand 7 "const_int_operand"                    "    i,  i")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec:VFULLI
+	    [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+	     (match_operand:VFULLI 4 "register_operand" "vr,vr")
+	     (match_operand:SI 5 "const_int_operand" "P,P")] UNSPEC_SF_CV)
+       (match_operand:VFULLI 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.iv\t%3,%0,%4,%5"
+  [(set_attr "type" "sf_vc")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_fv_se<mode>"
+  [(unspec:<VM>
+	    [(match_operand:<VM> 0 "vector_mask_operand"             "  Wc1")
+	     (match_operand 5 "vector_length_operand"                "   rK")
+	     (match_operand 6 "const_int_operand"                    "    i")
+	     (match_operand 7 "const_int_operand"                    "    i")
+	     (match_operand 8 "const_int_operand"                    "    i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:SF_FV
+	    [(match_operand:SI 1 "const_int_operand" "Ou01")
+	     (match_operand:SI 2 "const_int_operand" "K")
+	     (match_operand:SF_FV 3 "register_operand" "vr")
+	     (match_operand:<SF_XF> 4 "register_operand" "f")] UNSPECV_SF_CV)]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.fv\t%1,%2,%3,%4"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_fv_se<mode>"
+  [(set (match_operand:SF_FV 0 "register_operand" "=&vd,vd")
+	(if_then_else:SF_FV
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 6 "vector_length_operand"                "   rK, rK")
+	     (match_operand 7 "const_int_operand"                    "    i,  i")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:SF_FV
+	    [(match_operand:SI 3 "const_int_operand" "Ou01,Ou01")
+	     (match_operand:SF_FV 4 "register_operand" "vr,vr")
+	     (match_operand:<SF_XF> 5 "register_operand" "f,f")] UNSPECV_SF_CV)
+       (match_operand:SF_FV 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.fv\t%3,%0,%4,%5"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_fv<mode>"
+  [(set (match_operand:SF_FV 0 "register_operand" "=&vd,vd")
+	(if_then_else:SF_FV
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 6 "vector_length_operand"                "   rK, rK")
+	     (match_operand 7 "const_int_operand"                    "    i,  i")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec:SF_FV
+	    [(match_operand:SI 3 "const_int_operand" "Ou01,Ou01")
+	     (match_operand:SF_FV 4 "register_operand" "vr,vr")
+	     (match_operand:<SF_XF> 5 "register_operand" "f,f")] UNSPEC_SF_CV)
+       (match_operand:SF_FV 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.fv\t%3,%0,%4,%5"
+  [(set_attr "type" "sf_vc")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_vvv_se<mode>"
+  [(unspec:<VM>
+	    [(match_operand:<VM> 0 "vector_mask_operand"             "vmWc1")
+	     (match_operand 5 "vector_length_operand"                "   rK")
+	     (match_operand 6 "const_int_operand"                    "    i")
+	     (match_operand 7 "const_int_operand"                    "    i")
+	     (match_operand 8 "const_int_operand"                    "    i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:VFULLI
+	    [(match_operand:SI 1 "const_int_operand" "Ou02")
+	     (match_operand:VFULLI 2 "register_operand" "vd")
+	     (match_operand:VFULLI 3 "register_operand" "vr")
+	     (match_operand:VFULLI 4 "register_operand" "vr")] UNSPECV_SF_CV)]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.vvv\t%1,%2,%3,%4"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_vvv_se<mode>"
+  [(set (match_operand:VFULLI 0 "register_operand" "=&vr,vr")
+	(if_then_else:VFULLI
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 7 "vector_length_operand"                "   rK, rK")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (match_operand 10 "const_int_operand"                   "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:VFULLI
+	    [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+	     (match_operand:VFULLI 4 "register_operand" "vd,vd")
+	     (match_operand:VFULLI 5 "register_operand" "vr,vr")
+		 (match_operand:VFULLI 6 "register_operand" "vr,vr")] UNSPECV_SF_CV)
+       (match_operand:VFULLI 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.vvv\t%3,%4,%6,%5"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_vvv<mode>"
+  [(set (match_operand:VFULLI 0 "register_operand" "=&vr,vr")
+	(if_then_else:VFULLI
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 7 "vector_length_operand"                "   rK, rK")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (match_operand 10 "const_int_operand"                   "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec:VFULLI
+	    [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+	     (match_operand:VFULLI 4 "register_operand" "vd,vd")
+	     (match_operand:VFULLI 5 "register_operand" "vr,vr")
+		 (match_operand:VFULLI 6 "register_operand" "vr,vr")] UNSPEC_SF_CV)
+       (match_operand:VFULLI 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.vvv\t%3,%4,%6,%5"
+  [(set_attr "type" "sf_vc")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_xvv_se<mode>"
+  [(unspec:<VM>
+	    [(match_operand:<VM> 0 "vector_mask_operand"             "  Wc1")
+	     (match_operand 5 "vector_length_operand"                "   rK")
+	     (match_operand 6 "const_int_operand"                    "    i")
+	     (match_operand 7 "const_int_operand"                    "    i")
+	     (match_operand 8 "const_int_operand"                    "    i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:VFULLI
+	    [(match_operand:SI 1 "const_int_operand" "Ou02")
+	     (match_operand:VFULLI 2 "register_operand" "vd")
+	     (match_operand:VFULLI 3 "register_operand" "vr")
+		 (match_operand:<VEL> 4 "register_operand" "r")] UNSPECV_SF_CV)]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.xvv\t%1,%2,%3,%4"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_xvv_se<mode>"
+  [(set (match_operand:VFULLI 0 "register_operand" "=&vr,vr")
+	(if_then_else:VFULLI
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 7 "vector_length_operand"                "   rK, rK")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (match_operand 10 "const_int_operand"                   "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:VFULLI
+	    [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+	     (match_operand:VFULLI 4 "register_operand" "vd,vd")
+	     (match_operand:VFULLI 5 "register_operand" "vr,vr")
+	     (match_operand:<VEL> 6 "register_operand" "r,r")] UNSPECV_SF_CV)
+       (match_operand:VFULLI 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.xvv\t%3,%4,%5,%6"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_xvv<mode>"
+  [(set (match_operand:VFULLI 0 "register_operand" "=&vr,vr")
+	(if_then_else:VFULLI
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 7 "vector_length_operand"                "   rK, rK")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (match_operand 10 "const_int_operand"                   "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec:VFULLI
+	    [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+	     (match_operand:VFULLI 4 "register_operand" "vd,vd")
+	     (match_operand:VFULLI 5 "register_operand" "vr,vr")
+	     (match_operand:<VEL> 6 "register_operand" "r,r")] UNSPEC_SF_CV)
+       (match_operand:VFULLI 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.xvv\t%3,%4,%5,%6"
+  [(set_attr "type" "sf_vc")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_ivv_se<mode>"
+  [(unspec:<VM>
+	    [(match_operand:<VM> 0 "vector_mask_operand"             "  Wc1")
+	     (match_operand 5 "vector_length_operand"                "   rK")
+	     (match_operand 6 "const_int_operand"                    "    i")
+	     (match_operand 7 "const_int_operand"                    "    i")
+	     (match_operand 8 "const_int_operand"                    "    i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:VFULLI
+	    [(match_operand:SI 1 "const_int_operand" "Ou02")
+	     (match_operand:VFULLI 2 "register_operand" "vd")
+	     (match_operand:VFULLI 3 "register_operand" "vr")
+	     (match_operand:SI 4 "const_int_operand" "P")] UNSPECV_SF_CV)]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.ivv\t%1,%2,%3,%4"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_ivv_se<mode>"
+  [(set (match_operand:VFULLI 0 "register_operand" "=&vr,vr")
+	(if_then_else:VFULLI
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 7 "vector_length_operand"                "   rK, rK")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (match_operand 10 "const_int_operand"                   "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:VFULLI
+	    [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+	     (match_operand:VFULLI 4 "register_operand" "vd,vd")
+	     (match_operand:VFULLI 5 "register_operand" "vr,vr")
+	     (match_operand:SI 6 "const_int_operand" "P,P")] UNSPECV_SF_CV)
+       (match_operand:VFULLI 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.ivv\t%3,%4,%5,%6"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_ivv<mode>"
+  [(set (match_operand:VFULLI 0 "register_operand" "=&vr,vr")
+	(if_then_else:VFULLI
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 7 "vector_length_operand"                "   rK, rK")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (match_operand 10 "const_int_operand"                   "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec:VFULLI
+	    [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+	     (match_operand:VFULLI 4 "register_operand" "vd,vd")
+	     (match_operand:VFULLI 5 "register_operand" "vr,vr")
+	     (match_operand:SI 6 "const_int_operand" "P,P")] UNSPEC_SF_CV)
+       (match_operand:VFULLI 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.ivv\t%3,%4,%5,%6"
+  [(set_attr "type" "sf_vc")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_fvv_se<mode>"
+  [(unspec:<VM>
+	    [(match_operand:<VM> 0 "vector_mask_operand"             "  Wc1")
+	     (match_operand 5 "vector_length_operand"                "   rK")
+	     (match_operand 6 "const_int_operand"                    "    i")
+	     (match_operand 7 "const_int_operand"                    "    i")
+	     (match_operand 8 "const_int_operand"                    "    i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:SF_FV
+	    [(match_operand:SI 1 "const_int_operand" "Ou01")
+	     (match_operand:SF_FV 2 "register_operand" "vd")
+	     (match_operand:SF_FV 3 "register_operand" "vr")
+	     (match_operand:<SF_XF> 4 "register_operand" "f")] UNSPECV_SF_CV)]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.fvv\t%1,%2,%3,%4"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_fvv_se<mode>"
+  [(set (match_operand:SF_FV 0 "register_operand" "=&vr,vr")
+	(if_then_else:SF_FV
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 7 "vector_length_operand"                "   rK, rK")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (match_operand 10 "const_int_operand"                    "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:SF_FV
+	    [(match_operand:SI 3 "const_int_operand" "Ou01,Ou01")
+	     (match_operand:SF_FV 4 "register_operand" "vd,vd")
+	     (match_operand:SF_FV 5 "register_operand" "vr,vr")
+	     (match_operand:<SF_XF> 6 "register_operand" "f,f")] UNSPECV_SF_CV)
+       (match_operand:SF_FV 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.fvv\t%3,%4,%5,%6"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_fvv<mode>"
+  [(set (match_operand:SF_FV 0 "register_operand" "=&vr,vr")
+	(if_then_else:SF_FV
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 7 "vector_length_operand"                "   rK, rK")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (match_operand 10 "const_int_operand"                    "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec:SF_FV
+	    [(match_operand:SI 3 "const_int_operand" "Ou01,Ou01")
+	     (match_operand:SF_FV 4 "register_operand" "vd,vd")
+	     (match_operand:SF_FV 5 "register_operand" "vr,vr")
+	     (match_operand:<SF_XF> 6 "register_operand" "f,f")] UNSPEC_SF_CV)
+       (match_operand:SF_FV 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.fvv\t%3,%4,%5,%6"
+  [(set_attr "type" "sf_vc")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_vvw_se<mode>"
+  [(unspec:<VM>
+	    [(match_operand:<VM> 0 "vector_mask_operand"             "  Wc1")
+	     (match_operand 5 "vector_length_operand"                "   rK")
+	     (match_operand 6 "const_int_operand"                    "    i")
+	     (match_operand 7 "const_int_operand"                    "    i")
+	     (match_operand 8 "const_int_operand"                    "    i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:<SF_VW>
+	    [(match_operand:SI 1 "const_int_operand" "Ou02")
+	     (match_operand:<SF_VW> 2 "register_operand" "vd")
+	     (match_operand:SF_VC_W 3 "register_operand" "vr")
+	     (match_operand:SF_VC_W 4 "register_operand" "vr")] UNSPECV_SF_CV)]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.vvw\t%1,%2,%3,%4"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_vvw_se<mode>"
+  [(set (match_operand:<SF_VW> 0 "register_operand" "=&vr,vr")
+	(if_then_else:<SF_VW>
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 7 "vector_length_operand"                "   rK, rK")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (match_operand 10 "const_int_operand"                   "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:<SF_VW>
+	    [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+	     (match_operand:<SF_VW> 4 "register_operand" "vd,vd")
+	     (match_operand:SF_VC_W 5 "register_operand" "vr,vr")
+	     (match_operand:SF_VC_W 6 "register_operand" "vr,vr")] UNSPECV_SF_CV)
+       (match_operand:<SF_VW> 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.vvw\t%3,%4,%5,%6"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_vvw<mode>"
+  [(set (match_operand:<SF_VW> 0 "register_operand" "=&vr,vr")
+	(if_then_else:<SF_VW>
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 7 "vector_length_operand"                "   rK, rK")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (match_operand 10 "const_int_operand"                   "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec:<SF_VW>
+	    [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+	     (match_operand:<SF_VW> 4 "register_operand" "vd,vd")
+	     (match_operand:SF_VC_W 5 "register_operand" "vr,vr")
+	     (match_operand:SF_VC_W 6 "register_operand" "vr,vr")] UNSPEC_SF_CV)
+       (match_operand:<SF_VW> 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.vvw\t%3,%4,%5,%6"
+  [(set_attr "type" "sf_vc")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_xvw_se<mode>"
+  [(unspec:<VM>
+	    [(match_operand:<VM> 0 "vector_mask_operand"             "  Wc1")
+	     (match_operand 5 "vector_length_operand"                "   rK")
+	     (match_operand 6 "const_int_operand"                    "    i")
+	     (match_operand 7 "const_int_operand"                    "    i")
+	     (match_operand 8 "const_int_operand"                    "    i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:<SF_VW>
+	    [(match_operand:SI 1 "const_int_operand" "Ou02")
+	     (match_operand:<SF_VW> 2 "register_operand" "vd")
+	     (match_operand:SF_VC_W 3 "register_operand" "vr")
+	     (match_operand:<VEL> 4 "register_operand" "r")] UNSPECV_SF_CV)]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.xvw\t%1,%2,%3,%4"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_xvw_se<mode>"
+  [(set (match_operand:<SF_VW> 0 "register_operand" "=&vr,vr")
+	(if_then_else:<SF_VW>
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 7 "vector_length_operand"                "   rK, rK")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (match_operand 10 "const_int_operand"                   "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:<SF_VW>
+	    [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+	     (match_operand:<SF_VW> 4 "register_operand" "vd,vd")
+	     (match_operand:SF_VC_W 5 "register_operand" "vr,vr")
+	     (match_operand:<VEL> 6 "register_operand" "r,r")] UNSPECV_SF_CV)
+       (match_operand:<SF_VW> 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.xvw\t%3,%4,%5,%6"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_xvw<mode>"
+  [(set (match_operand:<SF_VW> 0 "register_operand" "=&vr,vr")
+	(if_then_else:<SF_VW>
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 7 "vector_length_operand"                "   rK, rK")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (match_operand 10 "const_int_operand"                   "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec:<SF_VW>
+	    [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+	     (match_operand:<SF_VW> 4 "register_operand" "vd,vd")
+	     (match_operand:SF_VC_W 5 "register_operand" "vr,vr")
+	     (match_operand:<VEL> 6 "register_operand" "r,r")] UNSPEC_SF_CV)
+       (match_operand:<SF_VW> 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.xvw\t%3,%4,%5,%6"
+  [(set_attr "type" "sf_vc")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_ivw_se<mode>"
+  [(unspec:<VM>
+	    [(match_operand:<VM> 0 "vector_mask_operand"             "  Wc1")
+	     (match_operand 5 "vector_length_operand"                "   rK")
+	     (match_operand 6 "const_int_operand"                    "    i")
+	     (match_operand 7 "const_int_operand"                    "    i")
+	     (match_operand 8 "const_int_operand"                    "    i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:<SF_VW>
+	    [(match_operand:SI 1 "const_int_operand" "Ou02")
+	     (match_operand:<SF_VW> 2 "register_operand" "vd")
+	     (match_operand:SF_VC_W 3 "register_operand" "vr")
+	     (match_operand:SI 4 "immediate_operand" "P")] UNSPECV_SF_CV)]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.ivw\t%1,%2,%3,%4"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_ivw_se<mode>"
+  [(set (match_operand:<SF_VW> 0 "register_operand" "=&vr,vr")
+	(if_then_else:<SF_VW>
+	  (unspec_volatile:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 7 "vector_length_operand"                "   rK, rK")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (match_operand 10 "const_int_operand"                   "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:<SF_VW>
+	    [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+	     (match_operand:<SF_VW> 4 "register_operand" "vd,vd")
+	     (match_operand:SF_VC_W 5 "register_operand" "vr,vr")
+	     (match_operand:SI 6 "immediate_operand" "P,P")] UNSPEC_SF_CV)
+       (match_operand:<SF_VW> 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.ivw\t%3,%4,%5,%6"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_ivw<mode>"
+  [(set (match_operand:<SF_VW> 0 "register_operand" "=&vr,vr")
+	(if_then_else:<SF_VW>
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 7 "vector_length_operand"                "   rK, rK")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (match_operand 10 "const_int_operand"                   "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec:<SF_VW>
+	    [(match_operand:SI 3 "const_int_operand" "Ou02,Ou02")
+	     (match_operand:<SF_VW> 4 "register_operand" "vd,vd")
+	     (match_operand:SF_VC_W 5 "register_operand" "vr,vr")
+	     (match_operand:SI 6 "immediate_operand" "P,P")] UNSPEC_SF_CV)
+       (match_operand:<SF_VW> 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.ivw\t%3,%4,%5,%6"
+  [(set_attr "type" "sf_vc")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_fvw_se<mode>"
+  [(unspec:<VM>
+	    [(match_operand:<VM> 0 "vector_mask_operand"             "  Wc1")
+	     (match_operand 5 "vector_length_operand"                "   rK")
+	     (match_operand 6 "const_int_operand"                    "    i")
+	     (match_operand 7 "const_int_operand"                    "    i")
+	     (match_operand 8 "const_int_operand"                    "    i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:<SF_VW>
+	    [(match_operand:SI 1 "const_int_operand" "Ou01")
+	     (match_operand:<SF_VW> 2 "register_operand" "vd")
+	     (match_operand:SF_VC_FW 3 "register_operand" "vr")
+	     (match_operand:<SF_XFW> 4 "register_operand" "f")] UNSPECV_SF_CV)]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.fvw\t%1,%2,%3,%4"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_fvw_se<mode>"
+  [(set (match_operand:<SF_VW> 0 "register_operand" "=&vr,vr")
+	(if_then_else:<SF_VW>
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 7 "vector_length_operand"                "   rK, rK")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (match_operand 10 "const_int_operand"                   "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec_volatile:<SF_VW>
+	    [(match_operand:SI 3 "const_int_operand" "Ou01,Ou01")
+	     (match_operand:<SF_VW> 4 "register_operand" "vd,vd")
+	     (match_operand:SF_VC_FW 5 "register_operand" "vr,vr")
+	     (match_operand:<SF_XFW> 6 "register_operand" "f,f")] UNSPECV_SF_CV)
+       (match_operand:<SF_VW> 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.fvw\t%3,%4,%5,%6"
+  [(set_attr "type" "sf_vc_se")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "@sf_vc_v_fvw<mode>"
+  [(set (match_operand:<SF_VW> 0 "register_operand" "=&vr,vr")
+	(if_then_else:<SF_VW>
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand"             "  Wc1,Wc1")
+	     (match_operand 7 "vector_length_operand"                "   rK, rK")
+	     (match_operand 8 "const_int_operand"                    "    i,  i")
+	     (match_operand 9 "const_int_operand"                    "    i,  i")
+	     (match_operand 10 "const_int_operand"                   "    i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	  (unspec:<SF_VW>
+	    [(match_operand:SI 3 "const_int_operand" "Ou01,Ou01")
+	     (match_operand:<SF_VW> 4 "register_operand" "vd,vd")
+	     (match_operand:SF_VC_FW 5 "register_operand" "vr,vr")
+	     (match_operand:<SF_XFW> 6 "register_operand" "f,f")] UNSPEC_SF_CV)
+       (match_operand:<SF_VW> 2 "vector_merge_operand"     "vu,vu")))]
+  "TARGET_VECTOR && TARGET_XSFVCP"
+  "sf.vc.v.fvw\t%3,%4,%5,%6"
+  [(set_attr "type" "sf_vc")
+   (set_attr "mode" "<MODE>")])
diff --git a/gcc/config/nios2/rtems.h b/gcc/config/riscv/sifive_vector.h
index fed9d42..02d314e 100644
--- a/gcc/config/nios2/rtems.h
+++ b/gcc/config/riscv/sifive_vector.h
@@ -1,7 +1,5 @@
-/* Definitions for rtems targeting a NIOS2 using ELF.
-   Copyright (C) 2011-2024 Free Software Foundation, Inc.
-
-   Contributed by Chris Johns (chrisj@rtems.org).
+/* SiFive Vector Extension intrinsics include file.
+   Copyright (C) 2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -24,16 +22,11 @@
    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
-/* Specify predefined symbols in preprocessor.  */
-#define TARGET_OS_CPP_BUILTINS()        \
-do {                                    \
-  builtin_define ("__rtems__");         \
-  builtin_define ("__USE_INIT_FINI__"); \
-  builtin_assert ("system=rtems");      \
-} while (0)
+#ifndef __SIFIVE_VECTOR_H
+#define __SIFIVE_VECTOR_H
 
-/* This toolchain implements the ABI for Linux Systems documented in the
-   Nios II Processor Reference Handbook.
+/* TODO: This should have a separate pragma to include only the SiFive
+         vector intrinsics. For now, we are including riscv_vector.h. */
+#include <riscv_vector.h>
 
-   This is done so RTEMS targets have Thread Local Storage like Linux.  */
-#define TARGET_LINUX_ABI 1
+#endif // __SIFIVE_VECTOR_H
diff --git a/gcc/config/riscv/sync-rvwmo.md b/gcc/config/riscv/sync-rvwmo.md
index e26f53c..c523dde 100644
--- a/gcc/config/riscv/sync-rvwmo.md
+++ b/gcc/config/riscv/sync-rvwmo.md
@@ -1,5 +1,5 @@
 ;; Machine description for RISC-V atomic operations.
-;; Copyright (C) 2011-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2011-2025 Free Software Foundation, Inc.
 ;; Contributed by Andrew Waterman (andrew@sifive.com).
 ;; Based on MIPS target for GNU compiler.
 
diff --git a/gcc/config/riscv/sync-ztso.md b/gcc/config/riscv/sync-ztso.md
index 7121b97..254accb 100644
--- a/gcc/config/riscv/sync-ztso.md
+++ b/gcc/config/riscv/sync-ztso.md
@@ -1,5 +1,5 @@
 ;; Machine description for RISC-V atomic operations.
-;; Copyright (C) 2011-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2011-2025 Free Software Foundation, Inc.
 ;; Contributed by Andrew Waterman (andrew@sifive.com).
 ;; Based on MIPS target for GNU compiler.
 
diff --git a/gcc/config/riscv/sync.md b/gcc/config/riscv/sync.md
index aa0c204..50ec8b3 100644
--- a/gcc/config/riscv/sync.md
+++ b/gcc/config/riscv/sync.md
@@ -1,5 +1,5 @@
 ;; Machine description for RISC-V atomic operations.
-;; Copyright (C) 2011-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2011-2025 Free Software Foundation, Inc.
 ;; Contributed by Andrew Waterman (andrew@sifive.com).
 ;; Based on MIPS target for GNU compiler.
 
@@ -405,18 +405,17 @@
 	   (match_operand:SI 3 "const_int_operand")] ;; model
 	  UNSPEC_SYNC_EXCHANGE))
    (set (match_dup 1)
-	(match_operand:GPR 2 "register_operand" "0"))
+	(match_operand:GPR 2 "reg_or_0_operand" "rJ"))
    (clobber (match_scratch:GPR 4 "=&r"))]	  ;; tmp_1
   "!TARGET_ZAAMO && TARGET_ZALRSC"
   {
     return "1:\;"
-	   "lr.<amo>%I3\t%4, %1\;"
-	   "sc.<amo>%J3\t%0, %0, %1\;"
-	   "bnez\t%0, 1b\;"
-	   "mv\t%0, %4";
+	   "lr.<amo>%I3\t%0, %1\;"
+	   "sc.<amo>%J3\t%4, %z2, %1\;"
+	   "bnez\t%4, 1b\";
   }
   [(set_attr "type" "atomic")
-   (set (attr "length") (const_int 16))])
+   (set (attr "length") (const_int 12))])
 
 (define_expand "atomic_exchange<mode>"
   [(match_operand:SHORT 0 "register_operand") ;; old value at mem
@@ -467,6 +466,7 @@
 
   rtx shifted_value = gen_reg_rtx (SImode);
   riscv_lshift_subword (<MODE>mode, value, shift, &shifted_value);
+  emit_move_insn (shifted_value, gen_rtx_AND (SImode, shifted_value, mask));
 
   emit_insn (gen_subword_atomic_exchange_strong (old, aligned_mem,
 						 shifted_value, model,
@@ -580,7 +580,7 @@
 	 value is sign-extended.  */
       rtx tmp0 = gen_reg_rtx (word_mode);
       emit_insn (gen_extend_insn (tmp0, operands[3], word_mode, <MODE>mode, 0));
-      operands[3] = simplify_gen_subreg (<MODE>mode, tmp0, word_mode, 0);
+      operands[3] = gen_lowpart (<MODE>mode, tmp0);
     }
 
   if (TARGET_ZACAS)
@@ -627,7 +627,7 @@
 	(match_operand:SHORT 1 "memory_operand" "+A"))				;; memory
    (set (match_dup 1)
 	(unspec_volatile:SHORT [(match_operand:SHORT 2 "register_operand" "0")  ;; expected_val
-				(match_operand:SHORT 3 "register_operand" "rJ") ;; desired_val
+				(match_operand:SHORT 3 "reg_or_0_operand" "rJ") ;; desired_val
 				(match_operand:SI 4 "const_int_operand")	;; mod_s
 				(match_operand:SI 5 "const_int_operand")]	;; mod_f
 	 UNSPEC_COMPARE_AND_SWAP))]
diff --git a/gcc/config/riscv/t-riscv b/gcc/config/riscv/t-riscv
index 3849432..32092d8 100644
--- a/gcc/config/riscv/t-riscv
+++ b/gcc/config/riscv/t-riscv
@@ -2,6 +2,7 @@ RISCV_BUILTINS_H = $(srcdir)/config/riscv/riscv-vector-builtins.h \
 		   $(srcdir)/config/riscv/riscv-vector-builtins.def \
 		   $(srcdir)/config/riscv/riscv-vector-builtins-functions.def \
        $(srcdir)/config/riscv/thead-vector-builtins-functions.def \
+       $(srcdir)/config/riscv/sifive-vector-builtins-functions.def \
 		   riscv-vector-type-indexer.gen.def
 
 riscv-builtins.o: $(srcdir)/config/riscv/riscv-builtins.cc $(CONFIG_H) \
@@ -9,6 +10,7 @@ riscv-builtins.o: $(srcdir)/config/riscv/riscv-builtins.cc $(CONFIG_H) \
   $(DIAGNOSTIC_CORE_H) $(OPTABS_H) $(RISCV_BUILTINS_H) \
   $(srcdir)/config/riscv/riscv-ftypes.def \
   $(srcdir)/config/riscv/riscv-vector-builtins-types.def \
+  $(srcdir)/config/riscv/sifive-vector-builtins-functions.def \
   $(srcdir)/config/riscv/riscv-modes.def \
   $(srcdir)/config/riscv/riscv-cmo.def \
   $(srcdir)/config/riscv/riscv-scalar-crypto.def
@@ -23,7 +25,9 @@ riscv-vector-builtins.o: $(srcdir)/config/riscv/riscv-vector-builtins.cc \
   gimple.h gimple-iterator.h \
   $(srcdir)/config/riscv/riscv-vector-builtins-shapes.h \
   $(srcdir)/config/riscv/riscv-vector-builtins-bases.h \
+  $(srcdir)/config/riscv/sifive-vector-builtins-bases.h \
   $(srcdir)/config/riscv/riscv-vector-builtins-types.def \
+  $(srcdir)/config/riscv/sifive-vector-builtins-functions.def \
   $(RISCV_BUILTINS_H)
 	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
 		$(srcdir)/config/riscv/riscv-vector-builtins.cc
@@ -34,6 +38,7 @@ riscv-vector-builtins-shapes.o: \
   $(TM_P_H) memmodel.h insn-codes.h $(OPTABS_H) \
   $(srcdir)/config/riscv/riscv-vector-builtins-shapes.h \
   $(srcdir)/config/riscv/riscv-vector-builtins-bases.h \
+  $(srcdir)/config/riscv/sifive-vector-builtins-bases.h \
   $(RISCV_BUILTINS_H)
 	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
 		$(srcdir)/config/riscv/riscv-vector-builtins-shapes.cc
@@ -51,6 +56,19 @@ riscv-vector-builtins-bases.o: \
 	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
 		$(srcdir)/config/riscv/riscv-vector-builtins-bases.cc
 
+sifive-vector-builtins-bases.o: \
+  $(srcdir)/config/riscv/sifive-vector-builtins-bases.cc \
+  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(TREE_H) $(RTL_H) \
+  $(TM_P_H) memmodel.h insn-codes.h $(OPTABS_H) $(RECOG_H) \
+  $(EXPR_H) $(BASIC_BLOCK_H) $(FUNCTION_H) fold-const.h $(GIMPLE_H) \
+  gimple-iterator.h gimplify.h explow.h $(EMIT_RTL_H) tree-vector-builder.h \
+  rtx-vector-builder.h \
+  $(srcdir)/config/riscv/riscv-vector-builtins-shapes.h \
+  $(srcdir)/config/riscv/sifive-vector-builtins-bases.h \
+  $(RISCV_BUILTINS_H)
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/riscv/sifive-vector-builtins-bases.cc
+
 riscv-sr.o: $(srcdir)/config/riscv/riscv-sr.cc $(CONFIG_H) \
   $(SYSTEM_H) $(TM_H)
 	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
@@ -86,6 +104,13 @@ riscv-avlprop.o: $(srcdir)/config/riscv/riscv-avlprop.cc \
 	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
 		$(srcdir)/config/riscv/riscv-avlprop.cc
 
+riscv-vect-permconst.o: $(srcdir)/config/riscv/riscv-vect-permconst.cc \
+  $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) $(RTL_H) $(REGS_H) \
+  $(TARGET_H) tree-pass.h df.h rtl-ssa.h cfgcleanup.h insn-attr.h \
+  tm-constrs.h insn-opinit.h cfgrtl.h
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/riscv/riscv-vect-permconst.cc
+
 riscv-d.o: $(srcdir)/config/riscv/riscv-d.cc \
   $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H)
 	$(COMPILE) $<
@@ -127,6 +152,15 @@ thead.o: $(srcdir)/config/riscv/thead.cc \
 	$(COMPILE) $<
 	$(POSTCOMPILE)
 
+riscv-zicfilp.o: $(srcdir)/config/riscv/riscv-zicfilp.cc \
+    $(CONFIG_H) $(SYSTEM_H) $(TM_H) $(REGS_H) insn-config.h $(RTL_BASE_H) \
+    dominance.h cfg.h cfganal.h $(BASIC_BLOCK_H) $(INSN_ATTR_H) $(RECOG_H) \
+    output.h hash-map.h $(DF_H) $(OBSTACK_H) $(TARGET_H) $(RTL_H) \
+    $(CONTEXT_H) $(TREE_PASS_H) regrename.h \
+    $(srcdir)/config/riscv/riscv-protos.h
+	$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
+		$(srcdir)/config/riscv/riscv-zicfilp.cc
+
 PASSES_EXTRA += $(srcdir)/config/riscv/riscv-passes.def
 
 $(common_out_file): $(srcdir)/config/riscv/riscv-cores.def \
@@ -142,6 +176,8 @@ build/genrvv-type-indexer$(build_exeext): build/genrvv-type-indexer.o
 
 $(srcdir)/config/riscv/riscv-vector-builtins.def: riscv-vector-type-indexer.gen.def
 $(srcdir)/config/riscv/riscv-vector-builtins.h: $(srcdir)/config/riscv/riscv-vector-builtins.def
+$(srcdir)/config/riscv/sifive-vector-builtins-functions.def: riscv-vector-type-indexer.gen.def
+$(srcdir)/config/riscv/riscv-vector-builtins.h: $(srcdir)/config/riscv/sifive-vector-builtins-functions.def
 
 riscv-vector-type-indexer.gen.def: s-riscv-vector-type-indexer.gen.defs ; @true
 
@@ -151,3 +187,49 @@ s-riscv-vector-type-indexer.gen.defs: build/genrvv-type-indexer$(build_exeext)
 	$(STAMP) s-riscv-vector-type-indexer.gen.defs
 
 genprog+=rvv-type-indexer
+
+RISCV_EXT_DEFS = \
+  $(srcdir)/config/riscv/riscv-ext.def \
+  $(srcdir)/config/riscv/riscv-ext-corev.def \
+  $(srcdir)/config/riscv/riscv-ext.def \
+  $(srcdir)/config/riscv/riscv-ext-sifive.def \
+  $(srcdir)/config/riscv/riscv-ext-thead.def \
+  $(srcdir)/config/riscv/riscv-ext-ventana.def
+
+$(srcdir)/config/riscv/riscv-ext.opt: $(RISCV_EXT_DEFS)
+
+build/gen-riscv-ext-opt.o: $(srcdir)/config/riscv/gen-riscv-ext-opt.cc \
+	$(RISCV_EXT_DEFS)
+	$(CXX_FOR_BUILD) $(CXXFLAGS_FOR_BUILD) -c $< -o $@
+
+build/gen-riscv-ext-opt$(build_exeext): build/gen-riscv-ext-opt.o
+	$(LINKER_FOR_BUILD) $(BUILD_LINKERFLAGS) $(BUILD_LDFLAGS) -o $@ $<
+
+s-riscv-ext.opt: build/gen-riscv-ext-opt$(build_exeext)
+	$(RUN_GEN) build/gen-riscv-ext-opt$(build_exeext) > tmp-riscv-ext.opt
+	$(SHELL) $(srcdir)/../move-if-change tmp-riscv-ext.opt $(srcdir)/config/riscv/riscv-ext.opt
+	$(STAMP) s-riscv-ext.opt
+
+build/gen-riscv-ext-texi.o: $(srcdir)/config/riscv/gen-riscv-ext-texi.cc \
+	$(RISCV_EXT_DEFS)
+	$(CXX_FOR_BUILD) $(CXXFLAGS_FOR_BUILD) -c $< -o $@
+
+build/gen-riscv-ext-texi$(build_exeext): build/gen-riscv-ext-texi.o
+	$(LINKER_FOR_BUILD) $(BUILD_LINKERFLAGS) $(BUILD_LDFLAGS) -o $@ $<
+
+$(srcdir)/doc/riscv-ext.texi: $(RISCV_EXT_DEFS)
+$(srcdir)/doc/riscv-ext.texi: s-riscv-ext.texi ; @true
+
+# Generate the doc when generating option file.
+$(srcdir)/config/riscv/riscv-ext.opt: s-riscv-ext.texi ; @true
+
+s-riscv-ext.texi: build/gen-riscv-ext-texi$(build_exeext)
+	$(RUN_GEN) build/gen-riscv-ext-texi$(build_exeext) > tmp-riscv-ext.texi
+	$(SHELL) $(srcdir)/../move-if-change tmp-riscv-ext.texi $(srcdir)/doc/riscv-ext.texi
+	$(STAMP) s-riscv-ext.texi
+
+# Run `riscv-regen' after you changed or added anything from riscv-ext*.def
+
+.PHONY: riscv-regen
+
+riscv-regen: s-riscv-ext.texi s-riscv-ext.opt
diff --git a/gcc/config/riscv/thead-peephole.md b/gcc/config/riscv/thead-peephole.md
index 6627ad6..dac6a65 100644
--- a/gcc/config/riscv/thead-peephole.md
+++ b/gcc/config/riscv/thead-peephole.md
@@ -1,5 +1,5 @@
 ;; Machine description for T-Head vendor extensions
-;; Copyright (C) 2023-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2023-2025 Free Software Foundation, Inc.
 
 ;; This file is part of GCC.
 
diff --git a/gcc/config/riscv/thead-vector.md b/gcc/config/riscv/thead-vector.md
index 5fe9ba0..5a02deb 100644
--- a/gcc/config/riscv/thead-vector.md
+++ b/gcc/config/riscv/thead-vector.md
@@ -108,7 +108,7 @@
   [(set (match_operand:V_VLS_VT 0 "reg_or_mem_operand"  "=vr,vr, m")
 	(unspec:V_VLS_VT
 	  [(match_operand:V_VLS_VT 1 "reg_or_mem_operand" " vr, m,vr")
-	   (match_operand 2 "vector_length_operand"   " rK, rK, rK")
+	   (match_operand 2 "vector_length_operand"   "rvl,rvl,rvl")
 	   (match_operand 3 "const_1_operand"         "  i, i, i")
 	   (reg:SI VL_REGNUM)
 	   (reg:SI VTYPE_REGNUM)]
@@ -133,7 +133,7 @@
   [(set (match_operand:VB 0 "reg_or_mem_operand"  "=vr,vr, m")
 	(unspec:VB
 	  [(match_operand:VB 1 "reg_or_mem_operand" " vr, m,vr")
-	   (match_operand 2 "vector_length_operand"   " rK, rK, rK")
+	   (match_operand 2 "vector_length_operand"   "rvl,rvl,rvl")
 	   (match_operand 3 "const_1_operand"         "  i, i, i")
 	   (reg:SI VL_REGNUM)
 	   (reg:SI VTYPE_REGNUM)]
@@ -161,7 +161,7 @@
 	(if_then_else:VB_VLS
 	  (unspec:VB_VLS
 	    [(match_operand:VB_VLS 1 "vector_all_trues_mask_operand" "Wc1, Wc1, Wc1, Wc1, Wc1")
-	     (match_operand 4 "vector_length_operand"            " rK,  rK,  rK,  rK,  rK")
+	     (match_operand 4 "vector_length_operand"            "rvl, rvl, rvl, rvl, rvl")
 	     (match_operand 5 "const_int_operand"                "  i,   i,   i,   i,   i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
@@ -210,7 +210,7 @@
     (if_then_else:V_VLS
       (unspec:<VM>
 	[(match_operand:<VM> 1 "vector_mask_operand"	   "vmWc1,   Wc1,    vm, vmWc1,   Wc1,   Wc1")
-	 (match_operand 4 "vector_length_operand"	      "   rK,    rK,    rK,    rK,    rK,    rK")
+	 (match_operand 4 "vector_length_operand"	      "  rvl,   rvl,   rvl,   rvl,   rvl,   rvl")
 	 (match_operand 5 "const_int_operand"		  "    i,     i,     i,     i,     i,     i")
 	 (match_operand 6 "const_int_operand"		  "    i,     i,     i,     i,     i,     i")
 	 (match_operand 7 "const_int_operand"		  "    i,     i,     i,     i,     i,     i")
@@ -239,7 +239,7 @@
 	(if_then_else:VI
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1")
-	     (match_operand 3 "vector_length_operand"    "   rK")
+	     (match_operand 3 "vector_length_operand"    "  rvl")
 	     (match_operand 4 "const_int_operand"	"    i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_TH_VSMEM_OP)
@@ -257,7 +257,7 @@
 	(if_then_else:VI
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,   Wc1,    vm")
-	     (match_operand 5 "vector_length_operand"    "   rK,    rK,    rK")
+	     (match_operand 5 "vector_length_operand"    "  rvl,   rvl,   rvl")
 	     (match_operand 6 "const_int_operand"	"    i,     i,     i")
 	     (match_operand 7 "const_int_operand"	"    i,     i,     i")
 	     (match_operand 8 "const_int_operand"	"    i,     i,     i")
@@ -277,7 +277,7 @@
 	(if_then_else:VI
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1")
-	     (match_operand 4 "vector_length_operand"    "   rK")
+	     (match_operand 4 "vector_length_operand"    "  rvl")
 	     (match_operand 5 "const_int_operand"	"    i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_TH_VSSMEM_OP)
@@ -296,7 +296,7 @@
 	(if_then_else:VI
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"  " vm,Wc1,vm,Wc1")
-	     (match_operand 5 "vector_length_operand"     " rK, rK,rK, rK")
+	     (match_operand 5 "vector_length_operand"     "rvl,rvl,rvl,rvl")
 	     (match_operand 6 "const_int_operand"	 "  i,  i, i,  i")
 	     (match_operand 7 "const_int_operand"	 "  i,  i, i,  i")
 	     (match_operand 8 "const_int_operand"	 "  i,  i, i,  i")
@@ -317,7 +317,7 @@
 	(unspec:BLK
 	  [(unspec:<VM>
 	    [(match_operand:<VM> 0 "vector_mask_operand" "vmWc1")
-	     (match_operand 4 "vector_length_operand"    "   rK")
+	     (match_operand 4 "vector_length_operand"    "  rvl")
 	     (match_operand 5 "const_int_operand"	"    i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_TH_VSXMEM_OP)
diff --git a/gcc/config/riscv/thead.cc b/gcc/config/riscv/thead.cc
index 2f1d83f..de23e41 100644
--- a/gcc/config/riscv/thead.cc
+++ b/gcc/config/riscv/thead.cc
@@ -1,5 +1,5 @@
 /* Subroutines used for code generation for RISC-V.
-   Copyright (C) 2023-2024 Free Software Foundation, Inc.
+   Copyright (C) 2023-2025 Free Software Foundation, Inc.
    Contributed by Christoph Müllner (christoph.muellner@vrull.eu).
 
    This file is part of GCC.
@@ -285,19 +285,27 @@ th_mempair_operands_p (rtx operands[4], bool load_p,
   if (MEM_VOLATILE_P (mem_1) || MEM_VOLATILE_P (mem_2))
     return false;
 
-  /* If we have slow unaligned access, we only accept aligned memory.  */
-  if (riscv_slow_unaligned_access_p
-      && known_lt (MEM_ALIGN (mem_1), GET_MODE_SIZE (mode) * BITS_PER_UNIT))
-    return false;
 
   /* Check if the addresses are in the form of [base+offset].  */
   bool reversed = false;
   if (!th_mempair_check_consecutive_mems (mode, &mem_1, &mem_2, &reversed))
     return false;
 
+  /* If necessary, reverse the local copy of the operands to simplify	
+     testing of alignments and mempair operand.  */
+  if (reversed)
+    {
+      std::swap (mem_1, mem_2);
+      std::swap (reg_1, reg_2);
+    }
+
+  /* If we have slow unaligned access, we only accept aligned memory.  */
+  if (riscv_slow_unaligned_access_p
+      && known_lt (MEM_ALIGN (mem_1), GET_MODE_SIZE (mode) * BITS_PER_UNIT))
+    return false;
+
   /* The first memory accesses must be a mempair operand.  */
-  if ((!reversed && !th_mempair_operand_p (mem_1, mode))
-      || (reversed && !th_mempair_operand_p (mem_2, mode)))
+  if (!th_mempair_operand_p (mem_1, mode))
     return false;
 
   /* The operands must be of the same size.  */
@@ -960,11 +968,11 @@ th_asm_output_opcode (FILE *asm_out_file, const char *p)
 	      if (strstr (p, "zero,zero"))
 		return "th.vsetvli\tzero,zero,e%0,%m1";
 	      else
-		return "th.vsetvli\tzero,%0,e%1,%m2";
+		return "th.vsetvli\tzero,%z0,e%1,%m2";
 	    }
 	  else
 	    {
-	      return "th.vsetvli\t%0,%1,e%2,%m3";
+	      return "th.vsetvli\t%z0,%z1,e%2,%m3";
 	    }
 	}
 
diff --git a/gcc/config/riscv/thead.md b/gcc/config/riscv/thead.md
index 2a3af76..d816f3b 100644
--- a/gcc/config/riscv/thead.md
+++ b/gcc/config/riscv/thead.md
@@ -1,5 +1,5 @@
 ;; Machine description for T-Head vendor extensions
-;; Copyright (C) 2021-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2021-2025 Free Software Foundation, Inc.
 
 ;; This file is part of GCC.
 
@@ -85,7 +85,9 @@
 	(zero_extract:GPR (match_operand:GPR 1 "register_operand" "r")
 			(match_operand 2 "const_int_operand")
 			(match_operand 3 "const_int_operand")))]
-  "TARGET_XTHEADBB"
+  "TARGET_XTHEADBB
+   && (UINTVAL (operands[2]) + UINTVAL (operands[3])
+       <= GET_MODE_BITSIZE (<MODE>mode))"
 {
   operands[2] = GEN_INT (INTVAL (operands[2]) + INTVAL (operands[3]) - 1);
   return "th.extu\t%0,%1,%2,%3";
@@ -152,11 +154,11 @@
 
 ;; XTheadCondMov
 
-(define_insn "*th_cond_mov<GPR:mode><GPR2:mode>"
+(define_insn "*th_cond_mov<GPR:mode>"
   [(set (match_operand:GPR 0 "register_operand" "=r,r")
 	(if_then_else:GPR
 	 (match_operator 4 "equality_operator"
-		[(match_operand:GPR2 1 "register_operand" "r,r")
+		[(match_operand:X 1 "register_operand" "r,r")
 		 (const_int 0)])
 	 (match_operand:GPR 2 "reg_or_0_operand" "rJ,0")
 	 (match_operand:GPR 3 "reg_or_0_operand" "0,rJ")))]
diff --git a/gcc/config/riscv/vector-bfloat16.md b/gcc/config/riscv/vector-bfloat16.md
index 562aa8e..b01d356 100644
--- a/gcc/config/riscv/vector-bfloat16.md
+++ b/gcc/config/riscv/vector-bfloat16.md
@@ -1,5 +1,5 @@
 ;; Machine description for RISC-V bfloat16 extensions.
-;; Copyright (C) 2024 Free Software Foundation, Inc.
+;; Copyright (C) 2024-2025 Free Software Foundation, Inc.
 
 ;; This file is part of GCC.
 
diff --git a/gcc/config/riscv/vector-crypto.md b/gcc/config/riscv/vector-crypto.md
index db372be..ca3ad44 100755..100644
--- a/gcc/config/riscv/vector-crypto.md
+++ b/gcc/config/riscv/vector-crypto.md
@@ -1,5 +1,5 @@
 ;; Machine description for the RISC-V Vector Crypto  extensions.
-;; Copyright (C) 2024 Free Software Foundation, Inc.
+;; Copyright (C) 2024-2025 Free Software Foundation, Inc.
 
 ;; This file is part of GCC.
 
diff --git a/gcc/config/riscv/vector-iterators.md b/gcc/config/riscv/vector-iterators.md
index cbbd248..e60e3a8 100644
--- a/gcc/config/riscv/vector-iterators.md
+++ b/gcc/config/riscv/vector-iterators.md
@@ -1,6 +1,6 @@
 
 ;; Iterators for RISC-V 'V' Extension for GNU compiler.
-;; Copyright (C) 2022-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2022-2025 Free Software Foundation, Inc.
 ;; Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd.
 
 ;; This file is part of GCC.
@@ -88,8 +88,11 @@
   ;; Integer and Float Reduction
   UNSPEC_REDUC
   UNSPEC_REDUC_SUM
+  UNSPEC_REDUC_SUM_VL0_SAFE
   UNSPEC_REDUC_SUM_ORDERED
   UNSPEC_REDUC_SUM_UNORDERED
+  UNSPEC_REDUC_SUM_ORDERED_VL0_SAFE
+  UNSPEC_REDUC_SUM_UNORDERED_VL0_SAFE
   UNSPEC_REDUC_MAXU
   UNSPEC_REDUC_MAX
   UNSPEC_REDUC_MINU
@@ -97,23 +100,38 @@
   UNSPEC_REDUC_AND
   UNSPEC_REDUC_OR
   UNSPEC_REDUC_XOR
+  UNSPEC_REDUC_MAXU_VL0_SAFE
+  UNSPEC_REDUC_MAX_VL0_SAFE
+  UNSPEC_REDUC_MINU_VL0_SAFE
+  UNSPEC_REDUC_MIN_VL0_SAFE
+  UNSPEC_REDUC_AND_VL0_SAFE
+  UNSPEC_REDUC_OR_VL0_SAFE
+  UNSPEC_REDUC_XOR_VL0_SAFE
 
   UNSPEC_WREDUC_SUM
   UNSPEC_WREDUC_SUMU
+  UNSPEC_WREDUC_SUM_VL0_SAFE
+  UNSPEC_WREDUC_SUMU_VL0_SAFE
   UNSPEC_WREDUC_SUM_ORDERED
   UNSPEC_WREDUC_SUM_UNORDERED
+  UNSPEC_WREDUC_SUM_ORDERED_VL0_SAFE
+  UNSPEC_WREDUC_SUM_UNORDERED_VL0_SAFE
   UNSPEC_SELECT_MASK
+
+  UNSPEC_SF_VFNRCLIP
+  UNSPEC_SF_VFNRCLIPU
+  UNSPEC_SF_CV
 ])
 
 (define_c_enum "unspecv" [
-  UNSPECV_FRM_RESTORE_EXIT
+  UNSPECV_SF_CV
 ])
 
 ;; Subset of VI with fractional LMUL types
 (define_mode_iterator VI_FRAC [
-  RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_MIN_VLEN > 32")
-  RVVMF2HI (RVVMF4HI "TARGET_MIN_VLEN > 32")
-  (RVVMF2SI "TARGET_MIN_VLEN > 32")
+  RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_VECTOR_ELEN_64")
+  RVVMF2HI (RVVMF4HI "TARGET_VECTOR_ELEN_64")
+  (RVVMF2SI "TARGET_VECTOR_ELEN_64")
 ])
 
 ;; Subset of VI with non-fractional LMUL types
@@ -137,10 +155,10 @@
 (define_mode_iterator VF [
   (RVVM8HF "TARGET_ZVFH") (RVVM4HF "TARGET_ZVFH") (RVVM2HF "TARGET_ZVFH")
   (RVVM1HF "TARGET_ZVFH") (RVVMF2HF "TARGET_ZVFH")
-  (RVVMF4HF "TARGET_ZVFH && TARGET_MIN_VLEN > 32")
+  (RVVMF4HF "TARGET_ZVFH && TARGET_VECTOR_ELEN_64")
 
   (RVVM8SF "TARGET_VECTOR_ELEN_FP_32") (RVVM4SF "TARGET_VECTOR_ELEN_FP_32") (RVVM2SF "TARGET_VECTOR_ELEN_FP_32")
-  (RVVM1SF "TARGET_VECTOR_ELEN_FP_32") (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32")
+  (RVVM1SF "TARGET_VECTOR_ELEN_FP_32") (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_VECTOR_ELEN_64")
 
   (RVVM8DF "TARGET_VECTOR_ELEN_FP_64") (RVVM4DF "TARGET_VECTOR_ELEN_FP_64")
   (RVVM2DF "TARGET_VECTOR_ELEN_FP_64") (RVVM1DF "TARGET_VECTOR_ELEN_FP_64")
@@ -152,16 +170,16 @@
   (RVVM2BF "TARGET_VECTOR_ELEN_BF_16")
   (RVVM1BF "TARGET_VECTOR_ELEN_BF_16")
   (RVVMF2BF "TARGET_VECTOR_ELEN_BF_16")
-  (RVVMF4BF "TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN > 32")
+  (RVVMF4BF "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_64")
 ])
 
 (define_mode_iterator VF_ZVFHMIN [
   (RVVM8HF "TARGET_VECTOR_ELEN_FP_16") (RVVM4HF "TARGET_VECTOR_ELEN_FP_16") (RVVM2HF "TARGET_VECTOR_ELEN_FP_16")
   (RVVM1HF "TARGET_VECTOR_ELEN_FP_16") (RVVMF2HF "TARGET_VECTOR_ELEN_FP_16")
-  (RVVMF4HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN > 32")
+  (RVVMF4HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_VECTOR_ELEN_64")
 
   (RVVM8SF "TARGET_VECTOR_ELEN_FP_32") (RVVM4SF "TARGET_VECTOR_ELEN_FP_32") (RVVM2SF "TARGET_VECTOR_ELEN_FP_32")
-  (RVVM1SF "TARGET_VECTOR_ELEN_FP_32") (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32")
+  (RVVM1SF "TARGET_VECTOR_ELEN_FP_32") (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_VECTOR_ELEN_64")
 
   (RVVM8DF "TARGET_VECTOR_ELEN_FP_64") (RVVM4DF "TARGET_VECTOR_ELEN_FP_64")
   (RVVM2DF "TARGET_VECTOR_ELEN_FP_64") (RVVM1DF "TARGET_VECTOR_ELEN_FP_64")
@@ -288,20 +306,20 @@
 ])
 
 (define_mode_iterator VEEWEXT2 [
-  RVVM8HI RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_MIN_VLEN > 32")
+  RVVM8HI RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_VECTOR_ELEN_64")
 
   (RVVM8BF "TARGET_VECTOR_ELEN_BF_16") (RVVM4BF "TARGET_VECTOR_ELEN_BF_16") (RVVM2BF "TARGET_VECTOR_ELEN_BF_16")
   (RVVM1BF "TARGET_VECTOR_ELEN_BF_16") (RVVMF2BF "TARGET_VECTOR_ELEN_BF_16")
-  (RVVMF4BF "TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN > 32")
+  (RVVMF4BF "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_64")
 
   (RVVM8HF "TARGET_VECTOR_ELEN_FP_16") (RVVM4HF "TARGET_VECTOR_ELEN_FP_16") (RVVM2HF "TARGET_VECTOR_ELEN_FP_16")
   (RVVM1HF "TARGET_VECTOR_ELEN_FP_16") (RVVMF2HF "TARGET_VECTOR_ELEN_FP_16")
-  (RVVMF4HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN > 32")
+  (RVVMF4HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_VECTOR_ELEN_64")
 
-  RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32")
+  RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_VECTOR_ELEN_64")
 
   (RVVM8SF "TARGET_VECTOR_ELEN_FP_32") (RVVM4SF "TARGET_VECTOR_ELEN_FP_32") (RVVM2SF "TARGET_VECTOR_ELEN_FP_32")
-  (RVVM1SF "TARGET_VECTOR_ELEN_FP_32") (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32")
+  (RVVM1SF "TARGET_VECTOR_ELEN_FP_32") (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_VECTOR_ELEN_64")
 
   (RVVM8DI "TARGET_VECTOR_ELEN_64") (RVVM4DI "TARGET_VECTOR_ELEN_64")
   (RVVM2DI "TARGET_VECTOR_ELEN_64") (RVVM1DI "TARGET_VECTOR_ELEN_64")
@@ -311,10 +329,10 @@
 ])
 
 (define_mode_iterator VEEWEXT4 [
-  RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32")
+  RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_VECTOR_ELEN_64")
 
   (RVVM8SF "TARGET_VECTOR_ELEN_FP_32") (RVVM4SF "TARGET_VECTOR_ELEN_FP_32") (RVVM2SF "TARGET_VECTOR_ELEN_FP_32")
-  (RVVM1SF "TARGET_VECTOR_ELEN_FP_32") (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32")
+  (RVVM1SF "TARGET_VECTOR_ELEN_FP_32") (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_VECTOR_ELEN_64")
 
   (RVVM8DI "TARGET_VECTOR_ELEN_64") (RVVM4DI "TARGET_VECTOR_ELEN_64")
   (RVVM2DI "TARGET_VECTOR_ELEN_64") (RVVM1DI "TARGET_VECTOR_ELEN_64")
@@ -332,68 +350,68 @@
 ])
 
 (define_mode_iterator VEEWTRUNC2 [
-  RVVM4QI RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_MIN_VLEN > 32")
+  RVVM4QI RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_VECTOR_ELEN_64")
 
-  RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_MIN_VLEN > 32")
+  RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_VECTOR_ELEN_64")
 
   (RVVM4BF "TARGET_VECTOR_ELEN_BF_16") (RVVM2BF "TARGET_VECTOR_ELEN_BF_16")
   (RVVM1BF "TARGET_VECTOR_ELEN_BF_16") (RVVMF2BF "TARGET_VECTOR_ELEN_BF_16")
-  (RVVMF4BF "TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN > 32")
+  (RVVMF4BF "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_64")
 
   (RVVM4HF "TARGET_VECTOR_ELEN_FP_16") (RVVM2HF "TARGET_VECTOR_ELEN_FP_16")
   (RVVM1HF "TARGET_VECTOR_ELEN_FP_16") (RVVMF2HF "TARGET_VECTOR_ELEN_FP_16")
-  (RVVMF4HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN > 32")
+  (RVVMF4HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_VECTOR_ELEN_64")
 
   (RVVM4SI "TARGET_64BIT")
   (RVVM2SI "TARGET_64BIT")
   (RVVM1SI "TARGET_64BIT")
-  (RVVMF2SI "TARGET_MIN_VLEN > 32 && TARGET_64BIT")
+  (RVVMF2SI "TARGET_VECTOR_ELEN_64 && TARGET_64BIT")
 
   (RVVM4SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_64BIT")
   (RVVM2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_64BIT")
   (RVVM1SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_64BIT")
-  (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32 && TARGET_64BIT")
+  (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_VECTOR_ELEN_64 && TARGET_64BIT")
 ])
 
 (define_mode_iterator VEEWTRUNC4 [
-  RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_MIN_VLEN > 32")
+  RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_VECTOR_ELEN_64")
 
   (RVVM2HI "TARGET_64BIT")
   (RVVM1HI "TARGET_64BIT")
   (RVVMF2HI "TARGET_64BIT")
-  (RVVMF4HI "TARGET_MIN_VLEN > 32 && TARGET_64BIT")
+  (RVVMF4HI "TARGET_VECTOR_ELEN_64 && TARGET_64BIT")
 
   (RVVM2BF "TARGET_VECTOR_ELEN_BF_16")
   (RVVM1BF "TARGET_VECTOR_ELEN_BF_16")
-  (RVVMF2BF "TARGET_VECTOR_ELEN_FP_16")
-  (RVVMF4BF "TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN > 32 && TARGET_64BIT")
+  (RVVMF2BF "TARGET_VECTOR_ELEN_BF_16")
+  (RVVMF4BF "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_64 && TARGET_64BIT")
 
   (RVVM2HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_64BIT")
   (RVVM1HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_64BIT")
   (RVVMF2HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_64BIT")
-  (RVVMF4HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN > 32 && TARGET_64BIT")
+  (RVVMF4HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_VECTOR_ELEN_64 && TARGET_64BIT")
 ])
 
 (define_mode_iterator VEEWTRUNC8 [
   (RVVM1QI "TARGET_64BIT")
   (RVVMF2QI "TARGET_64BIT")
   (RVVMF4QI "TARGET_64BIT")
-  (RVVMF8QI "TARGET_MIN_VLEN > 32 && TARGET_64BIT")
+  (RVVMF8QI "TARGET_VECTOR_ELEN_64 && TARGET_64BIT")
 ])
 
 (define_mode_iterator VEI16 [
-  RVVM4QI RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_MIN_VLEN > 32")
+  RVVM4QI RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_VECTOR_ELEN_64")
 
-  RVVM8HI RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_MIN_VLEN > 32")
+  RVVM8HI RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_VECTOR_ELEN_64")
 
   (RVVM8HF "TARGET_VECTOR_ELEN_FP_16") (RVVM4HF "TARGET_VECTOR_ELEN_FP_16") (RVVM2HF "TARGET_VECTOR_ELEN_FP_16")
   (RVVM1HF "TARGET_VECTOR_ELEN_FP_16") (RVVMF2HF "TARGET_VECTOR_ELEN_FP_16")
-  (RVVMF4HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN > 32")
+  (RVVMF4HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_VECTOR_ELEN_64")
 
-  RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32")
+  RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_VECTOR_ELEN_64")
 
   (RVVM8SF "TARGET_VECTOR_ELEN_FP_32") (RVVM4SF "TARGET_VECTOR_ELEN_FP_32") (RVVM2SF "TARGET_VECTOR_ELEN_FP_32")
-  (RVVM1SF "TARGET_VECTOR_ELEN_FP_32") (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32")
+  (RVVM1SF "TARGET_VECTOR_ELEN_FP_32") (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_VECTOR_ELEN_64")
 
   (RVVM8DI "TARGET_VECTOR_ELEN_64") (RVVM4DI "TARGET_VECTOR_ELEN_64")
   (RVVM2DI "TARGET_VECTOR_ELEN_64") (RVVM1DI "TARGET_VECTOR_ELEN_64")
@@ -482,11 +500,11 @@
 ])
 
 (define_mode_iterator VFULLI [
-  RVVM8QI RVVM4QI RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_MIN_VLEN > 32")
+  RVVM8QI RVVM4QI RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_VECTOR_ELEN_64")
 
-  RVVM8HI RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_MIN_VLEN > 32")
+  RVVM8HI RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_VECTOR_ELEN_64")
 
-  RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32")
+  RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_VECTOR_ELEN_64")
 
   (RVVM8DI "TARGET_FULL_V") (RVVM4DI "TARGET_FULL_V") (RVVM2DI "TARGET_FULL_V") (RVVM1DI "TARGET_FULL_V")
 
@@ -539,17 +557,17 @@
 ])
 
 (define_mode_iterator VI_QH [
-  RVVM8QI RVVM4QI RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_MIN_VLEN > 32")
+  RVVM8QI RVVM4QI RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_VECTOR_ELEN_64")
 
-  RVVM8HI RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_MIN_VLEN > 32")
+  RVVM8HI RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_VECTOR_ELEN_64")
 ])
 
 (define_mode_iterator VI_QHS [
-  RVVM8QI RVVM4QI RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_MIN_VLEN > 32")
+  RVVM8QI RVVM4QI RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_VECTOR_ELEN_64")
 
-  RVVM8HI RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_MIN_VLEN > 32")
+  RVVM8HI RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_VECTOR_ELEN_64")
 
-  RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32")
+  RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_VECTOR_ELEN_64")
 
   (V1QI "riscv_vector::vls_mode_valid_p (V1QImode)")
   (V2QI "riscv_vector::vls_mode_valid_p (V2QImode)")
@@ -590,11 +608,11 @@
 ])
 
 (define_mode_iterator VI_QHS_NO_M8 [
-  RVVM4QI RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_MIN_VLEN > 32")
+  RVVM4QI RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_VECTOR_ELEN_64")
 
-  RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_MIN_VLEN > 32")
+  RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_VECTOR_ELEN_64")
 
-  RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32")
+  RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_VECTOR_ELEN_64")
 
   (V1QI "riscv_vector::vls_mode_valid_p (V1QImode)")
   (V2QI "riscv_vector::vls_mode_valid_p (V2QImode)")
@@ -634,10 +652,10 @@
 (define_mode_iterator VF_HS [
   (RVVM8HF "TARGET_ZVFH") (RVVM4HF "TARGET_ZVFH") (RVVM2HF "TARGET_ZVFH")
   (RVVM1HF "TARGET_ZVFH") (RVVMF2HF "TARGET_ZVFH")
-  (RVVMF4HF "TARGET_ZVFH && TARGET_MIN_VLEN > 32")
+  (RVVMF4HF "TARGET_ZVFH && TARGET_VECTOR_ELEN_64")
 
   (RVVM8SF "TARGET_VECTOR_ELEN_FP_32") (RVVM4SF "TARGET_VECTOR_ELEN_FP_32") (RVVM2SF "TARGET_VECTOR_ELEN_FP_32")
-  (RVVM1SF "TARGET_VECTOR_ELEN_FP_32") (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32")
+  (RVVM1SF "TARGET_VECTOR_ELEN_FP_32") (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_VECTOR_ELEN_64")
 
   (V1HF "riscv_vector::vls_mode_valid_p (V1HFmode) && TARGET_ZVFH")
   (V2HF "riscv_vector::vls_mode_valid_p (V2HFmode) && TARGET_ZVFH")
@@ -669,11 +687,11 @@
   (RVVM2HF "TARGET_ZVFH")
   (RVVM1HF "TARGET_ZVFH")
   (RVVMF2HF "TARGET_ZVFH")
-  (RVVMF4HF "TARGET_ZVFH && TARGET_MIN_VLEN > 32")
+  (RVVMF4HF "TARGET_ZVFH && TARGET_VECTOR_ELEN_64")
   (RVVM4SF "TARGET_VECTOR_ELEN_FP_32")
   (RVVM2SF "TARGET_VECTOR_ELEN_FP_32")
   (RVVM1SF "TARGET_VECTOR_ELEN_FP_32")
-  (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32")
+  (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_VECTOR_ELEN_64")
 
   (V1HF "riscv_vector::vls_mode_valid_p (V1HFmode) && TARGET_ZVFH")
   (V2HF "riscv_vector::vls_mode_valid_p (V2HFmode) && TARGET_ZVFH")
@@ -704,11 +722,11 @@
 ])
 
 (define_mode_iterator V_VLSI_QHS [
-  RVVM8QI RVVM4QI RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_MIN_VLEN > 32")
+  RVVM8QI RVVM4QI RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_VECTOR_ELEN_64")
 
-  RVVM8HI RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_MIN_VLEN > 32")
+  RVVM8HI RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_VECTOR_ELEN_64")
 
-  RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32")
+  RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_VECTOR_ELEN_64")
 
   (V1QI "riscv_vector::vls_mode_valid_p (V1QImode)")
   (V2QI "riscv_vector::vls_mode_valid_p (V2QImode)")
@@ -786,13 +804,13 @@
 ;; E.g. when index mode = RVVM8QImode and Pmode = SImode, if it is not zero_extend or
 ;; scalar != 1, such gather/scatter is not allowed since we don't have RVVM32SImode.
 (define_mode_iterator RATIO64 [
-  (RVVMF8QI "TARGET_MIN_VLEN > 32")
-  (RVVMF4HI "TARGET_MIN_VLEN > 32")
-  (RVVMF2SI "TARGET_MIN_VLEN > 32")
+  (RVVMF8QI "TARGET_VECTOR_ELEN_64")
+  (RVVMF4HI "TARGET_VECTOR_ELEN_64")
+  (RVVMF2SI "TARGET_VECTOR_ELEN_64")
   (RVVM1DI "TARGET_VECTOR_ELEN_64")
-  (RVVMF4BF "TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN > 32")
-  (RVVMF4HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN > 32")
-  (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32")
+  (RVVMF4BF "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_64")
+  (RVVMF4HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_VECTOR_ELEN_64")
+  (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_VECTOR_ELEN_64")
   (RVVM1DF "TARGET_VECTOR_ELEN_FP_64")
 ])
 
@@ -850,9 +868,9 @@
 ])
 
 (define_mode_iterator RATIO64I [
-  (RVVMF8QI "TARGET_MIN_VLEN > 32")
-  (RVVMF4HI "TARGET_MIN_VLEN > 32")
-  (RVVMF2SI "TARGET_MIN_VLEN > 32")
+  (RVVMF8QI "TARGET_VECTOR_ELEN_64")
+  (RVVMF4HI "TARGET_VECTOR_ELEN_64")
+  (RVVMF2SI "TARGET_VECTOR_ELEN_64")
   (RVVM1DI "TARGET_VECTOR_ELEN_64 && TARGET_64BIT")
 ])
 
@@ -912,23 +930,23 @@
 ])
 
 (define_mode_iterator V_FRACT [
-  RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_MIN_VLEN > 32")
+  RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_VECTOR_ELEN_64")
 
-  RVVMF2HI (RVVMF4HI "TARGET_MIN_VLEN > 32")
+  RVVMF2HI (RVVMF4HI "TARGET_VECTOR_ELEN_64")
 
-  (RVVMF2BF "TARGET_VECTOR_ELEN_BF_16") (RVVMF4BF "TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN > 32")
+  (RVVMF2BF "TARGET_VECTOR_ELEN_BF_16") (RVVMF4BF "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_64")
 
-  (RVVMF2HF "TARGET_VECTOR_ELEN_FP_16") (RVVMF4HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_MIN_VLEN > 32")
+  (RVVMF2HF "TARGET_VECTOR_ELEN_FP_16") (RVVMF4HF "TARGET_VECTOR_ELEN_FP_16 && TARGET_VECTOR_ELEN_64")
 
-  (RVVMF2SI "TARGET_MIN_VLEN > 32")
+  (RVVMF2SI "TARGET_VECTOR_ELEN_64")
 
-  (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32")
+  (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_VECTOR_ELEN_64")
 ])
 
 (define_mode_iterator VWEXTI [
-  RVVM8HI RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_MIN_VLEN > 32")
+  RVVM8HI RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_VECTOR_ELEN_64")
 
-  RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32")
+  RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_VECTOR_ELEN_64")
 
   (RVVM8DI "TARGET_VECTOR_ELEN_64") (RVVM4DI "TARGET_VECTOR_ELEN_64")
   (RVVM2DI "TARGET_VECTOR_ELEN_64") (RVVM1DI "TARGET_VECTOR_ELEN_64")
@@ -974,7 +992,7 @@
   (RVVM4SF "TARGET_VECTOR_ELEN_FP_16 && TARGET_VECTOR_ELEN_FP_32")
   (RVVM2SF "TARGET_VECTOR_ELEN_FP_16 && TARGET_VECTOR_ELEN_FP_32")
   (RVVM1SF "TARGET_VECTOR_ELEN_FP_16 && TARGET_VECTOR_ELEN_FP_32")
-  (RVVMF2SF "TARGET_VECTOR_ELEN_FP_16 && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32")
+  (RVVMF2SF "TARGET_VECTOR_ELEN_FP_16 && TARGET_VECTOR_ELEN_FP_32 && TARGET_VECTOR_ELEN_64")
 
   (RVVM8DF "TARGET_VECTOR_ELEN_FP_64") (RVVM4DF "TARGET_VECTOR_ELEN_FP_64")
   (RVVM2DF "TARGET_VECTOR_ELEN_FP_64") (RVVM1DF "TARGET_VECTOR_ELEN_FP_64")
@@ -1007,7 +1025,7 @@
   (RVVM4SF "TARGET_ZVFH && TARGET_VECTOR_ELEN_FP_32")
   (RVVM2SF "TARGET_ZVFH && TARGET_VECTOR_ELEN_FP_32")
   (RVVM1SF "TARGET_ZVFH && TARGET_VECTOR_ELEN_FP_32")
-  (RVVMF2SF "TARGET_ZVFH && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32")
+  (RVVMF2SF "TARGET_ZVFH && TARGET_VECTOR_ELEN_FP_32 && TARGET_VECTOR_ELEN_64")
 
   (RVVM8DF "TARGET_VECTOR_ELEN_FP_64") (RVVM4DF "TARGET_VECTOR_ELEN_FP_64")
   (RVVM2DF "TARGET_VECTOR_ELEN_FP_64") (RVVM1DF "TARGET_VECTOR_ELEN_FP_64")
@@ -1086,7 +1104,7 @@
 ])
 
 (define_mode_iterator VQEXTI [
-  RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32")
+  RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_VECTOR_ELEN_64")
 
   (RVVM8DI "TARGET_VECTOR_ELEN_64") (RVVM4DI "TARGET_VECTOR_ELEN_64")
   (RVVM2DI "TARGET_VECTOR_ELEN_64") (RVVM1DI "TARGET_VECTOR_ELEN_64")
@@ -1147,27 +1165,27 @@
 ])
 
 (define_mode_iterator V1T [
-  (RVVMF8x2QI "TARGET_MIN_VLEN > 32")
-  (RVVMF8x3QI "TARGET_MIN_VLEN > 32")
-  (RVVMF8x4QI "TARGET_MIN_VLEN > 32")
-  (RVVMF8x5QI "TARGET_MIN_VLEN > 32")
-  (RVVMF8x6QI "TARGET_MIN_VLEN > 32")
-  (RVVMF8x7QI "TARGET_MIN_VLEN > 32")
-  (RVVMF8x8QI "TARGET_MIN_VLEN > 32")
-  (RVVMF4x2HI "TARGET_MIN_VLEN > 32")
-  (RVVMF4x3HI "TARGET_MIN_VLEN > 32")
-  (RVVMF4x4HI "TARGET_MIN_VLEN > 32")
-  (RVVMF4x5HI "TARGET_MIN_VLEN > 32")
-  (RVVMF4x6HI "TARGET_MIN_VLEN > 32")
-  (RVVMF4x7HI "TARGET_MIN_VLEN > 32")
-  (RVVMF4x8HI "TARGET_MIN_VLEN > 32")
-  (RVVMF2x2SI "TARGET_MIN_VLEN > 32")
-  (RVVMF2x3SI "TARGET_MIN_VLEN > 32")
-  (RVVMF2x4SI "TARGET_MIN_VLEN > 32")
-  (RVVMF2x5SI "TARGET_MIN_VLEN > 32")
-  (RVVMF2x6SI "TARGET_MIN_VLEN > 32")
-  (RVVMF2x7SI "TARGET_MIN_VLEN > 32")
-  (RVVMF2x8SI "TARGET_MIN_VLEN > 32")
+  (RVVMF8x2QI "TARGET_VECTOR_ELEN_64")
+  (RVVMF8x3QI "TARGET_VECTOR_ELEN_64")
+  (RVVMF8x4QI "TARGET_VECTOR_ELEN_64")
+  (RVVMF8x5QI "TARGET_VECTOR_ELEN_64")
+  (RVVMF8x6QI "TARGET_VECTOR_ELEN_64")
+  (RVVMF8x7QI "TARGET_VECTOR_ELEN_64")
+  (RVVMF8x8QI "TARGET_VECTOR_ELEN_64")
+  (RVVMF4x2HI "TARGET_VECTOR_ELEN_64")
+  (RVVMF4x3HI "TARGET_VECTOR_ELEN_64")
+  (RVVMF4x4HI "TARGET_VECTOR_ELEN_64")
+  (RVVMF4x5HI "TARGET_VECTOR_ELEN_64")
+  (RVVMF4x6HI "TARGET_VECTOR_ELEN_64")
+  (RVVMF4x7HI "TARGET_VECTOR_ELEN_64")
+  (RVVMF4x8HI "TARGET_VECTOR_ELEN_64")
+  (RVVMF2x2SI "TARGET_VECTOR_ELEN_64")
+  (RVVMF2x3SI "TARGET_VECTOR_ELEN_64")
+  (RVVMF2x4SI "TARGET_VECTOR_ELEN_64")
+  (RVVMF2x5SI "TARGET_VECTOR_ELEN_64")
+  (RVVMF2x6SI "TARGET_VECTOR_ELEN_64")
+  (RVVMF2x7SI "TARGET_VECTOR_ELEN_64")
+  (RVVMF2x8SI "TARGET_VECTOR_ELEN_64")
   (RVVM1x2DI "TARGET_VECTOR_ELEN_64")
   (RVVM1x3DI "TARGET_VECTOR_ELEN_64")
   (RVVM1x4DI "TARGET_VECTOR_ELEN_64")
@@ -1175,27 +1193,27 @@
   (RVVM1x6DI "TARGET_VECTOR_ELEN_64")
   (RVVM1x7DI "TARGET_VECTOR_ELEN_64")
   (RVVM1x8DI "TARGET_VECTOR_ELEN_64")
-  (RVVMF4x2BF "TARGET_MIN_VLEN > 32 && TARGET_VECTOR_ELEN_BF_16")
-  (RVVMF4x3BF "TARGET_MIN_VLEN > 32 && TARGET_VECTOR_ELEN_BF_16")
-  (RVVMF4x4BF "TARGET_MIN_VLEN > 32 && TARGET_VECTOR_ELEN_BF_16")
-  (RVVMF4x5BF "TARGET_MIN_VLEN > 32 && TARGET_VECTOR_ELEN_BF_16")
-  (RVVMF4x6BF "TARGET_MIN_VLEN > 32 && TARGET_VECTOR_ELEN_BF_16")
-  (RVVMF4x7BF "TARGET_MIN_VLEN > 32 && TARGET_VECTOR_ELEN_BF_16")
-  (RVVMF4x8BF "TARGET_MIN_VLEN > 32 && TARGET_VECTOR_ELEN_BF_16")
-  (RVVMF4x2HF "TARGET_MIN_VLEN > 32 && TARGET_VECTOR_ELEN_FP_16")
-  (RVVMF4x3HF "TARGET_MIN_VLEN > 32 && TARGET_VECTOR_ELEN_FP_16")
-  (RVVMF4x4HF "TARGET_MIN_VLEN > 32 && TARGET_VECTOR_ELEN_FP_16")
-  (RVVMF4x5HF "TARGET_MIN_VLEN > 32 && TARGET_VECTOR_ELEN_FP_16")
-  (RVVMF4x6HF "TARGET_MIN_VLEN > 32 && TARGET_VECTOR_ELEN_FP_16")
-  (RVVMF4x7HF "TARGET_MIN_VLEN > 32 && TARGET_VECTOR_ELEN_FP_16")
-  (RVVMF4x8HF "TARGET_MIN_VLEN > 32 && TARGET_VECTOR_ELEN_FP_16")
-  (RVVMF2x2SF "TARGET_MIN_VLEN > 32 && TARGET_VECTOR_ELEN_FP_32")
-  (RVVMF2x3SF "TARGET_MIN_VLEN > 32 && TARGET_VECTOR_ELEN_FP_32")
-  (RVVMF2x4SF "TARGET_MIN_VLEN > 32 && TARGET_VECTOR_ELEN_FP_32")
-  (RVVMF2x5SF "TARGET_MIN_VLEN > 32 && TARGET_VECTOR_ELEN_FP_32")
-  (RVVMF2x6SF "TARGET_MIN_VLEN > 32 && TARGET_VECTOR_ELEN_FP_32")
-  (RVVMF2x7SF "TARGET_MIN_VLEN > 32 && TARGET_VECTOR_ELEN_FP_32")
-  (RVVMF2x8SF "TARGET_MIN_VLEN > 32 && TARGET_VECTOR_ELEN_FP_32")
+  (RVVMF4x2BF "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_BF_16")
+  (RVVMF4x3BF "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_BF_16")
+  (RVVMF4x4BF "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_BF_16")
+  (RVVMF4x5BF "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_BF_16")
+  (RVVMF4x6BF "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_BF_16")
+  (RVVMF4x7BF "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_BF_16")
+  (RVVMF4x8BF "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_BF_16")
+  (RVVMF4x2HF "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_FP_16")
+  (RVVMF4x3HF "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_FP_16")
+  (RVVMF4x4HF "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_FP_16")
+  (RVVMF4x5HF "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_FP_16")
+  (RVVMF4x6HF "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_FP_16")
+  (RVVMF4x7HF "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_FP_16")
+  (RVVMF4x8HF "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_FP_16")
+  (RVVMF2x2SF "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_FP_32")
+  (RVVMF2x3SF "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_FP_32")
+  (RVVMF2x4SF "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_FP_32")
+  (RVVMF2x5SF "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_FP_32")
+  (RVVMF2x6SF "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_FP_32")
+  (RVVMF2x7SF "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_FP_32")
+  (RVVMF2x8SF "TARGET_VECTOR_ELEN_64 && TARGET_VECTOR_ELEN_FP_32")
   (RVVM1x2DF "TARGET_VECTOR_ELEN_FP_64")
   (RVVM1x3DF "TARGET_VECTOR_ELEN_FP_64")
   (RVVM1x4DF "TARGET_VECTOR_ELEN_FP_64")
@@ -1513,7 +1531,7 @@
   (V4096BI "riscv_vector::vls_mode_valid_p (V4096BImode) && TARGET_MIN_VLEN >= 4096")])
 
 (define_mode_iterator VB [
-  (RVVMF64BI "TARGET_MIN_VLEN > 32") RVVMF32BI RVVMF16BI RVVMF8BI RVVMF4BI RVVMF2BI RVVM1BI
+  (RVVMF64BI "TARGET_VECTOR_ELEN_64") RVVMF32BI RVVMF16BI RVVMF8BI RVVMF4BI RVVMF2BI RVVM1BI
 ])
 
 ;; Iterator for indexed loads and stores.  We must disallow 64-bit indices on
@@ -1522,11 +1540,11 @@
 ;; VINDEXED [VI8 VI16 VI32 (VI64 "TARGET_64BIT")].
 
 (define_mode_iterator VINDEXED [
-  RVVM8QI RVVM4QI RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_MIN_VLEN > 32")
+  RVVM8QI RVVM4QI RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_VECTOR_ELEN_64")
 
-  RVVM8HI RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_MIN_VLEN > 32")
+  RVVM8HI RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_VECTOR_ELEN_64")
 
-  RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32")
+  RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_VECTOR_ELEN_64")
 
   (RVVM8DI "TARGET_VECTOR_ELEN_64 && TARGET_64BIT")
   (RVVM4DI "TARGET_VECTOR_ELEN_64 && TARGET_64BIT")
@@ -1538,15 +1556,15 @@
   (RVVM2BF "TARGET_VECTOR_ELEN_BF_16")
   (RVVM1BF "TARGET_VECTOR_ELEN_BF_16")
   (RVVMF2BF "TARGET_VECTOR_ELEN_BF_16")
-  (RVVMF4BF "TARGET_VECTOR_ELEN_BF_16 && TARGET_MIN_VLEN > 32")
+  (RVVMF4BF "TARGET_VECTOR_ELEN_BF_16 && TARGET_VECTOR_ELEN_64")
 
   (RVVM8HF "TARGET_ZVFH") (RVVM4HF "TARGET_ZVFH") (RVVM2HF "TARGET_ZVFH")
   (RVVM1HF "TARGET_ZVFH") (RVVMF2HF "TARGET_ZVFH")
-  (RVVMF4HF "TARGET_ZVFH && TARGET_MIN_VLEN > 32")
+  (RVVMF4HF "TARGET_ZVFH && TARGET_VECTOR_ELEN_64")
 
   (RVVM8SF "TARGET_VECTOR_ELEN_FP_32") (RVVM4SF "TARGET_VECTOR_ELEN_FP_32")
   (RVVM2SF "TARGET_VECTOR_ELEN_FP_32") (RVVM1SF "TARGET_VECTOR_ELEN_FP_32")
-  (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32")
+  (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_VECTOR_ELEN_64")
 
   (RVVM8DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_64BIT")
   (RVVM4DF "TARGET_VECTOR_ELEN_FP_64 && TARGET_64BIT")
@@ -1662,32 +1680,75 @@
   UNSPEC_REDUC_MIN UNSPEC_REDUC_AND UNSPEC_REDUC_OR UNSPEC_REDUC_XOR
 ])
 
+(define_int_iterator ANY_REDUC_VL0_SAFE [
+  UNSPEC_REDUC_SUM_VL0_SAFE UNSPEC_REDUC_MAXU_VL0_SAFE UNSPEC_REDUC_MAX_VL0_SAFE UNSPEC_REDUC_MINU_VL0_SAFE
+  UNSPEC_REDUC_MIN_VL0_SAFE UNSPEC_REDUC_AND_VL0_SAFE UNSPEC_REDUC_OR_VL0_SAFE UNSPEC_REDUC_XOR_VL0_SAFE
+])
+
 (define_int_iterator ANY_WREDUC [
   UNSPEC_WREDUC_SUM UNSPEC_WREDUC_SUMU
 ])
 
+(define_int_iterator ANY_WREDUC_VL0_SAFE [
+  UNSPEC_WREDUC_SUM_VL0_SAFE UNSPEC_WREDUC_SUMU_VL0_SAFE
+])
+
 (define_int_iterator ANY_FREDUC [
   UNSPEC_REDUC_MAX UNSPEC_REDUC_MIN
 ])
 
+(define_int_iterator ANY_FREDUC_VL0_SAFE [
+  UNSPEC_REDUC_MAX_VL0_SAFE UNSPEC_REDUC_MIN_VL0_SAFE
+])
+
 (define_int_iterator ANY_FREDUC_SUM [
   UNSPEC_REDUC_SUM_ORDERED UNSPEC_REDUC_SUM_UNORDERED
 ])
 
+(define_int_iterator ANY_FREDUC_SUM_VL0_SAFE [
+  UNSPEC_REDUC_SUM_ORDERED_VL0_SAFE UNSPEC_REDUC_SUM_UNORDERED_VL0_SAFE
+])
+
 (define_int_iterator ANY_FWREDUC_SUM [
   UNSPEC_WREDUC_SUM_ORDERED UNSPEC_WREDUC_SUM_UNORDERED
 ])
 
+(define_int_iterator ANY_FWREDUC_SUM_VL0_SAFE [
+  UNSPEC_WREDUC_SUM_ORDERED_VL0_SAFE UNSPEC_WREDUC_SUM_UNORDERED_VL0_SAFE
+])
+
+(define_int_attr reduc_op_pat_name [
+  (UNSPEC_REDUC_SUM "redsum")
+  (UNSPEC_REDUC_SUM_VL0_SAFE "redsum_vl0s")
+  (UNSPEC_REDUC_SUM_ORDERED "redosum") (UNSPEC_REDUC_SUM_UNORDERED "redusum")
+  (UNSPEC_REDUC_SUM_ORDERED_VL0_SAFE "redosum_vl0s") (UNSPEC_REDUC_SUM_UNORDERED_VL0_SAFE "redusum_vl0s")
+  (UNSPEC_REDUC_MAXU "redmaxu") (UNSPEC_REDUC_MAX "redmax") (UNSPEC_REDUC_MINU "redminu") (UNSPEC_REDUC_MIN "redmin")
+  (UNSPEC_REDUC_MAXU_VL0_SAFE "redmaxu_vl0s") (UNSPEC_REDUC_MAX_VL0_SAFE "redmax_vl0s") (UNSPEC_REDUC_MINU_VL0_SAFE "redminu_vl0s") (UNSPEC_REDUC_MIN_VL0_SAFE "redmin_vl0s")
+  (UNSPEC_REDUC_AND "redand") (UNSPEC_REDUC_OR "redor") (UNSPEC_REDUC_XOR "redxor")
+  (UNSPEC_REDUC_AND_VL0_SAFE "redand_vl0s") (UNSPEC_REDUC_OR_VL0_SAFE "redor_vl0s") (UNSPEC_REDUC_XOR_VL0_SAFE "redxor_vl0s")
+  (UNSPEC_WREDUC_SUM "wredsum") (UNSPEC_WREDUC_SUMU "wredsumu")
+  (UNSPEC_WREDUC_SUM_VL0_SAFE "wredsum_vl0s") (UNSPEC_WREDUC_SUMU_VL0_SAFE "wredsumu_vl0s")
+  (UNSPEC_WREDUC_SUM_ORDERED "wredosum") (UNSPEC_WREDUC_SUM_UNORDERED "wredusum")
+  (UNSPEC_WREDUC_SUM_ORDERED_VL0_SAFE "wredosum_vl0s") (UNSPEC_WREDUC_SUM_UNORDERED_VL0_SAFE "wredusum_vl0s")
+])
+
 (define_int_attr reduc_op [
   (UNSPEC_REDUC_SUM "redsum")
+  (UNSPEC_REDUC_SUM_VL0_SAFE "redsum")
   (UNSPEC_REDUC_SUM_ORDERED "redosum") (UNSPEC_REDUC_SUM_UNORDERED "redusum")
+  (UNSPEC_REDUC_SUM_ORDERED_VL0_SAFE "redosum") (UNSPEC_REDUC_SUM_UNORDERED_VL0_SAFE "redusum")
   (UNSPEC_REDUC_MAXU "redmaxu") (UNSPEC_REDUC_MAX "redmax") (UNSPEC_REDUC_MINU "redminu") (UNSPEC_REDUC_MIN "redmin")
+  (UNSPEC_REDUC_MAXU_VL0_SAFE "redmaxu") (UNSPEC_REDUC_MAX_VL0_SAFE "redmax") (UNSPEC_REDUC_MINU_VL0_SAFE "redminu") (UNSPEC_REDUC_MIN_VL0_SAFE "redmin")
   (UNSPEC_REDUC_AND "redand") (UNSPEC_REDUC_OR "redor") (UNSPEC_REDUC_XOR "redxor")
+  (UNSPEC_REDUC_AND_VL0_SAFE "redand") (UNSPEC_REDUC_OR_VL0_SAFE "redor") (UNSPEC_REDUC_XOR_VL0_SAFE "redxor")
   (UNSPEC_WREDUC_SUM "wredsum") (UNSPEC_WREDUC_SUMU "wredsumu")
+  (UNSPEC_WREDUC_SUM_VL0_SAFE "wredsum") (UNSPEC_WREDUC_SUMU_VL0_SAFE "wredsumu")
   (UNSPEC_WREDUC_SUM_ORDERED "wredosum") (UNSPEC_WREDUC_SUM_UNORDERED "wredusum")
+  (UNSPEC_WREDUC_SUM_ORDERED_VL0_SAFE "wredosum") (UNSPEC_WREDUC_SUM_UNORDERED_VL0_SAFE "wredusum")
 ])
 
 (define_code_attr WREDUC_UNSPEC [(zero_extend "UNSPEC_WREDUC_SUMU") (sign_extend "UNSPEC_WREDUC_SUM")])
+(define_code_attr WREDUC_UNSPEC_VL0_SAFE [(zero_extend "UNSPEC_WREDUC_SUMU_VL0_SAFE") (sign_extend "UNSPEC_WREDUC_SUM_VL0_SAFE")])
 
 (define_mode_attr VINDEX [
   (RVVM8QI "RVVM8QI") (RVVM4QI "RVVM4QI") (RVVM2QI "RVVM2QI") (RVVM1QI "RVVM1QI")
@@ -3332,11 +3393,11 @@
 
 (define_mode_iterator V_VLS_F_CONVERT_SI [
   (RVVM4HF "TARGET_ZVFH") (RVVM2HF "TARGET_ZVFH") (RVVM1HF "TARGET_ZVFH")
-  (RVVMF2HF "TARGET_ZVFH") (RVVMF4HF "TARGET_ZVFH && TARGET_MIN_VLEN > 32")
+  (RVVMF2HF "TARGET_ZVFH") (RVVMF4HF "TARGET_ZVFH && TARGET_VECTOR_ELEN_64")
 
   (RVVM8SF "TARGET_VECTOR_ELEN_FP_32") (RVVM4SF "TARGET_VECTOR_ELEN_FP_32")
   (RVVM2SF "TARGET_VECTOR_ELEN_FP_32") (RVVM1SF "TARGET_VECTOR_ELEN_FP_32")
-  (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32")
+  (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_VECTOR_ELEN_64")
 
   (RVVM8DF "TARGET_VECTOR_ELEN_FP_64")
   (RVVM4DF "TARGET_VECTOR_ELEN_FP_64")
@@ -3450,11 +3511,11 @@
 
 (define_mode_iterator V_VLS_F_CONVERT_DI [
   (RVVM2HF "TARGET_ZVFH") (RVVM1HF "TARGET_ZVFH") (RVVMF2HF "TARGET_ZVFH")
-  (RVVMF4HF "TARGET_ZVFH && TARGET_MIN_VLEN > 32")
+  (RVVMF4HF "TARGET_ZVFH && TARGET_VECTOR_ELEN_64")
 
   (RVVM4SF "TARGET_VECTOR_ELEN_FP_32") (RVVM2SF "TARGET_VECTOR_ELEN_FP_32")
   (RVVM1SF "TARGET_VECTOR_ELEN_FP_32")
-  (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN > 32")
+  (RVVMF2SF "TARGET_VECTOR_ELEN_FP_32 && TARGET_VECTOR_ELEN_64")
 
   (RVVM8DF "TARGET_VECTOR_ELEN_FP_64") (RVVM4DF "TARGET_VECTOR_ELEN_FP_64")
   (RVVM2DF "TARGET_VECTOR_ELEN_FP_64") (RVVM1DF "TARGET_VECTOR_ELEN_FP_64")
@@ -3524,72 +3585,315 @@
 
   (RVVM8DF "vector_eew64_stride_operand") (RVVM4DF "vector_eew64_stride_operand")
   (RVVM2DF "vector_eew64_stride_operand") (RVVM1DF "vector_eew64_stride_operand")
+
+  (V1QI "vector_eew8_stride_operand")
+  (V2QI "vector_eew8_stride_operand")
+  (V4QI "vector_eew8_stride_operand")
+  (V8QI "vector_eew8_stride_operand")
+  (V16QI "vector_eew8_stride_operand")
+  (V32QI "vector_eew8_stride_operand")
+  (V64QI "vector_eew8_stride_operand")
+  (V128QI "vector_eew8_stride_operand")
+  (V256QI "vector_eew8_stride_operand")
+  (V512QI "vector_eew8_stride_operand")
+  (V1024QI "vector_eew8_stride_operand")
+  (V2048QI "vector_eew8_stride_operand")
+  (V4096QI "vector_eew8_stride_operand")
+  (V1HI "vector_eew16_stride_operand")
+  (V2HI "vector_eew16_stride_operand")
+  (V4HI "vector_eew16_stride_operand")
+  (V8HI "vector_eew16_stride_operand")
+  (V16HI "vector_eew16_stride_operand")
+  (V32HI "vector_eew16_stride_operand")
+  (V64HI "vector_eew16_stride_operand")
+  (V128HI "vector_eew16_stride_operand")
+  (V256HI "vector_eew16_stride_operand")
+  (V512HI "vector_eew16_stride_operand")
+  (V1024HI "vector_eew16_stride_operand")
+  (V2048HI "vector_eew16_stride_operand")
+  (V1SI "vector_eew32_stride_operand")
+  (V2SI "vector_eew32_stride_operand")
+  (V4SI "vector_eew32_stride_operand")
+  (V8SI "vector_eew32_stride_operand")
+  (V16SI "vector_eew32_stride_operand")
+  (V32SI "vector_eew32_stride_operand")
+  (V64SI "vector_eew32_stride_operand")
+  (V128SI "vector_eew32_stride_operand")
+  (V256SI "vector_eew32_stride_operand")
+  (V512SI "vector_eew32_stride_operand")
+  (V1024SI "vector_eew32_stride_operand")
+  (V1DI "vector_eew64_stride_operand")
+  (V2DI "vector_eew64_stride_operand")
+  (V4DI "vector_eew64_stride_operand")
+  (V8DI "vector_eew64_stride_operand")
+  (V16DI "vector_eew64_stride_operand")
+  (V32DI "vector_eew64_stride_operand")
+  (V64DI "vector_eew64_stride_operand")
+  (V128DI "vector_eew64_stride_operand")
+  (V256DI "vector_eew64_stride_operand")
+  (V512DI "vector_eew64_stride_operand")
+
+  (V1HF "vector_eew16_stride_operand")
+  (V2HF "vector_eew16_stride_operand")
+  (V4HF "vector_eew16_stride_operand")
+  (V8HF "vector_eew16_stride_operand")
+  (V16HF "vector_eew16_stride_operand")
+  (V32HF "vector_eew16_stride_operand")
+  (V64HF "vector_eew16_stride_operand")
+  (V128HF "vector_eew16_stride_operand")
+  (V256HF "vector_eew16_stride_operand")
+  (V512HF "vector_eew16_stride_operand")
+  (V1024HF "vector_eew16_stride_operand")
+  (V2048HF "vector_eew16_stride_operand")
+  (V1SF "vector_eew32_stride_operand")
+  (V2SF "vector_eew32_stride_operand")
+  (V4SF "vector_eew32_stride_operand")
+  (V8SF "vector_eew32_stride_operand")
+  (V16SF "vector_eew32_stride_operand")
+  (V32SF "vector_eew32_stride_operand")
+  (V64SF "vector_eew32_stride_operand")
+  (V128SF "vector_eew32_stride_operand")
+  (V256SF "vector_eew32_stride_operand")
+  (V512SF "vector_eew32_stride_operand")
+  (V1024SF "vector_eew32_stride_operand")
+  (V1DF "vector_eew64_stride_operand")
+  (V2DF "vector_eew64_stride_operand")
+  (V4DF "vector_eew64_stride_operand")
+  (V8DF "vector_eew64_stride_operand")
+  (V16DF "vector_eew64_stride_operand")
+  (V32DF "vector_eew64_stride_operand")
+  (V64DF "vector_eew64_stride_operand")
+  (V128DF "vector_eew64_stride_operand")
+  (V256DF "vector_eew64_stride_operand")
+  (V512DF "vector_eew64_stride_operand")
 ])
 
 (define_mode_attr stride_load_constraint [
-  (RVVM8QI "rJ,rJ,rJ,c01,c01,c01") (RVVM4QI "rJ,rJ,rJ,c01,c01,c01")
-  (RVVM2QI "rJ,rJ,rJ,c01,c01,c01") (RVVM1QI "rJ,rJ,rJ,c01,c01,c01")
-  (RVVMF2QI "rJ,rJ,rJ,c01,c01,c01") (RVVMF4QI "rJ,rJ,rJ,c01,c01,c01")
-  (RVVMF8QI "rJ,rJ,rJ,c01,c01,c01")
-
-  (RVVM8HI "rJ,rJ,rJ,c02,c02,c02") (RVVM4HI "rJ,rJ,rJ,c02,c02,c02")
-  (RVVM2HI "rJ,rJ,rJ,c02,c02,c02") (RVVM1HI "rJ,rJ,rJ,c02,c02,c02")
-  (RVVMF2HI "rJ,rJ,rJ,c02,c02,c02") (RVVMF4HI "rJ,rJ,rJ,c02,c02,c02")
-
-  (RVVM8BF "rJ,rJ,rJ,c02,c02,c02") (RVVM4BF "rJ,rJ,rJ,c02,c02,c02")
-  (RVVM2BF "rJ,rJ,rJ,c02,c02,c02") (RVVM1BF "rJ,rJ,rJ,c02,c02,c02")
-  (RVVMF2BF "rJ,rJ,rJ,c02,c02,c02") (RVVMF4BF "rJ,rJ,rJ,c02,c02,c02")
-
-  (RVVM8HF "rJ,rJ,rJ,c02,c02,c02") (RVVM4HF "rJ,rJ,rJ,c02,c02,c02")
-  (RVVM2HF "rJ,rJ,rJ,c02,c02,c02") (RVVM1HF "rJ,rJ,rJ,c02,c02,c02")
-  (RVVMF2HF "rJ,rJ,rJ,c02,c02,c02") (RVVMF4HF "rJ,rJ,rJ,c02,c02,c02")
-
-  (RVVM8SI "rJ,rJ,rJ,c04,c04,c04") (RVVM4SI "rJ,rJ,rJ,c04,c04,c04")
-  (RVVM2SI "rJ,rJ,rJ,c04,c04,c04") (RVVM1SI "rJ,rJ,rJ,c04,c04,c04")
-  (RVVMF2SI "rJ,rJ,rJ,c04,c04,c04")
-
-  (RVVM8SF "rJ,rJ,rJ,c04,c04,c04") (RVVM4SF "rJ,rJ,rJ,c04,c04,c04")
-  (RVVM2SF "rJ,rJ,rJ,c04,c04,c04") (RVVM1SF "rJ,rJ,rJ,c04,c04,c04")
-  (RVVMF2SF "rJ,rJ,rJ,c04,c04,c04")
-
-  (RVVM8DI "rJ,rJ,rJ,c08,c08,c08") (RVVM4DI "rJ,rJ,rJ,c08,c08,c08")
-  (RVVM2DI "rJ,rJ,rJ,c08,c08,c08") (RVVM1DI "rJ,rJ,rJ,c08,c08,c08")
-
-  (RVVM8DF "rJ,rJ,rJ,c08,c08,c08") (RVVM4DF "rJ,rJ,rJ,c08,c08,c08")
-  (RVVM2DF "rJ,rJ,rJ,c08,c08,c08") (RVVM1DF "rJ,rJ,rJ,c08,c08,c08")
+  (RVVM8QI "rJ,rJ,rJ,k01,k01,k01") (RVVM4QI "rJ,rJ,rJ,k01,k01,k01")
+  (RVVM2QI "rJ,rJ,rJ,k01,k01,k01") (RVVM1QI "rJ,rJ,rJ,k01,k01,k01")
+  (RVVMF2QI "rJ,rJ,rJ,k01,k01,k01") (RVVMF4QI "rJ,rJ,rJ,k01,k01,k01")
+  (RVVMF8QI "rJ,rJ,rJ,k01,k01,k01")
+
+  (RVVM8HI "rJ,rJ,rJ,k02,k02,k02") (RVVM4HI "rJ,rJ,rJ,k02,k02,k02")
+  (RVVM2HI "rJ,rJ,rJ,k02,k02,k02") (RVVM1HI "rJ,rJ,rJ,k02,k02,k02")
+  (RVVMF2HI "rJ,rJ,rJ,k02,k02,k02") (RVVMF4HI "rJ,rJ,rJ,k02,k02,k02")
+
+  (RVVM8BF "rJ,rJ,rJ,k02,k02,k02") (RVVM4BF "rJ,rJ,rJ,k02,k02,k02")
+  (RVVM2BF "rJ,rJ,rJ,k02,k02,k02") (RVVM1BF "rJ,rJ,rJ,k02,k02,k02")
+  (RVVMF2BF "rJ,rJ,rJ,k02,k02,k02") (RVVMF4BF "rJ,rJ,rJ,k02,k02,k02")
+
+  (RVVM8HF "rJ,rJ,rJ,k02,k02,k02") (RVVM4HF "rJ,rJ,rJ,k02,k02,k02")
+  (RVVM2HF "rJ,rJ,rJ,k02,k02,k02") (RVVM1HF "rJ,rJ,rJ,k02,k02,k02")
+  (RVVMF2HF "rJ,rJ,rJ,k02,k02,k02") (RVVMF4HF "rJ,rJ,rJ,k02,k02,k02")
+
+  (RVVM8SI "rJ,rJ,rJ,k04,k04,k04") (RVVM4SI "rJ,rJ,rJ,k04,k04,k04")
+  (RVVM2SI "rJ,rJ,rJ,k04,k04,k04") (RVVM1SI "rJ,rJ,rJ,k04,k04,k04")
+  (RVVMF2SI "rJ,rJ,rJ,k04,k04,k04")
+
+  (RVVM8SF "rJ,rJ,rJ,k04,k04,k04") (RVVM4SF "rJ,rJ,rJ,k04,k04,k04")
+  (RVVM2SF "rJ,rJ,rJ,k04,k04,k04") (RVVM1SF "rJ,rJ,rJ,k04,k04,k04")
+  (RVVMF2SF "rJ,rJ,rJ,k04,k04,k04")
+
+  (RVVM8DI "rJ,rJ,rJ,k08,k08,k08") (RVVM4DI "rJ,rJ,rJ,k08,k08,k08")
+  (RVVM2DI "rJ,rJ,rJ,k08,k08,k08") (RVVM1DI "rJ,rJ,rJ,k08,k08,k08")
+
+  (RVVM8DF "rJ,rJ,rJ,k08,k08,k08") (RVVM4DF "rJ,rJ,rJ,k08,k08,k08")
+  (RVVM2DF "rJ,rJ,rJ,k08,k08,k08") (RVVM1DF "rJ,rJ,rJ,k08,k08,k08")
+
+  (V1QI "rJ,rJ,rJ,k01,k01,k01")
+  (V2QI "rJ,rJ,rJ,k01,k01,k01")
+  (V4QI "rJ,rJ,rJ,k01,k01,k01")
+  (V8QI "rJ,rJ,rJ,k01,k01,k01")
+  (V16QI "rJ,rJ,rJ,k01,k01,k01")
+  (V32QI "rJ,rJ,rJ,k01,k01,k01")
+  (V64QI "rJ,rJ,rJ,k01,k01,k01")
+  (V128QI "rJ,rJ,rJ,k01,k01,k01")
+  (V256QI "rJ,rJ,rJ,k01,k01,k01")
+  (V512QI "rJ,rJ,rJ,k01,k01,k01")
+  (V1024QI "rJ,rJ,rJ,k01,k01,k01")
+  (V2048QI "rJ,rJ,rJ,k01,k01,k01")
+  (V4096QI "rJ,rJ,rJ,k01,k01,k01")
+  (V1HI "rJ,rJ,rJ,k02,k02,k02")
+  (V2HI "rJ,rJ,rJ,k02,k02,k02")
+  (V4HI "rJ,rJ,rJ,k02,k02,k02")
+  (V8HI "rJ,rJ,rJ,k02,k02,k02")
+  (V16HI "rJ,rJ,rJ,k02,k02,k02")
+  (V32HI "rJ,rJ,rJ,k02,k02,k02")
+  (V64HI "rJ,rJ,rJ,k02,k02,k02")
+  (V128HI "rJ,rJ,rJ,k02,k02,k02")
+  (V256HI "rJ,rJ,rJ,k02,k02,k02")
+  (V512HI "rJ,rJ,rJ,k02,k02,k02")
+  (V1024HI "rJ,rJ,rJ,k02,k02,k02")
+  (V2048HI "rJ,rJ,rJ,k02,k02,k02")
+  (V1SI "rJ,rJ,rJ,k04,k04,k04")
+  (V2SI "rJ,rJ,rJ,k04,k04,k04")
+  (V4SI "rJ,rJ,rJ,k04,k04,k04")
+  (V8SI "rJ,rJ,rJ,k04,k04,k04")
+  (V16SI "rJ,rJ,rJ,k04,k04,k04")
+  (V32SI "rJ,rJ,rJ,k04,k04,k04")
+  (V64SI "rJ,rJ,rJ,k04,k04,k04")
+  (V128SI "rJ,rJ,rJ,k04,k04,k04")
+  (V256SI "rJ,rJ,rJ,k04,k04,k04")
+  (V512SI "rJ,rJ,rJ,k04,k04,k04")
+  (V1024SI "rJ,rJ,rJ,k04,k04,k04")
+  (V1DI "rJ,rJ,rJ,k08,k08,k08")
+  (V2DI "rJ,rJ,rJ,k08,k08,k08")
+  (V4DI "rJ,rJ,rJ,k08,k08,k08")
+  (V8DI "rJ,rJ,rJ,k08,k08,k08")
+  (V16DI "rJ,rJ,rJ,k08,k08,k08")
+  (V32DI "rJ,rJ,rJ,k08,k08,k08")
+  (V64DI "rJ,rJ,rJ,k08,k08,k08")
+  (V128DI "rJ,rJ,rJ,k08,k08,k08")
+  (V256DI "rJ,rJ,rJ,k08,k08,k08")
+  (V512DI "rJ,rJ,rJ,k08,k08,k08")
+
+  (V1HF "rJ,rJ,rJ,k02,k02,k02")
+  (V2HF "rJ,rJ,rJ,k02,k02,k02")
+  (V4HF "rJ,rJ,rJ,k02,k02,k02")
+  (V8HF "rJ,rJ,rJ,k02,k02,k02")
+  (V16HF "rJ,rJ,rJ,k02,k02,k02")
+  (V32HF "rJ,rJ,rJ,k02,k02,k02")
+  (V64HF "rJ,rJ,rJ,k02,k02,k02")
+  (V128HF "rJ,rJ,rJ,k02,k02,k02")
+  (V256HF "rJ,rJ,rJ,k02,k02,k02")
+  (V512HF "rJ,rJ,rJ,k02,k02,k02")
+  (V1024HF "rJ,rJ,rJ,k02,k02,k02")
+  (V2048HF "rJ,rJ,rJ,k02,k02,k02")
+  (V1SF "rJ,rJ,rJ,k04,k04,k04")
+  (V2SF "rJ,rJ,rJ,k04,k04,k04")
+  (V4SF "rJ,rJ,rJ,k04,k04,k04")
+  (V8SF "rJ,rJ,rJ,k04,k04,k04")
+  (V16SF "rJ,rJ,rJ,k04,k04,k04")
+  (V32SF "rJ,rJ,rJ,k04,k04,k04")
+  (V64SF "rJ,rJ,rJ,k04,k04,k04")
+  (V128SF "rJ,rJ,rJ,k04,k04,k04")
+  (V256SF "rJ,rJ,rJ,k04,k04,k04")
+  (V512SF "rJ,rJ,rJ,k04,k04,k04")
+  (V1024SF "rJ,rJ,rJ,k04,k04,k04")
+  (V1DF "rJ,rJ,rJ,k08,k08,k08")
+  (V2DF "rJ,rJ,rJ,k08,k08,k08")
+  (V4DF "rJ,rJ,rJ,k08,k08,k08")
+  (V8DF "rJ,rJ,rJ,k08,k08,k08")
+  (V16DF "rJ,rJ,rJ,k08,k08,k08")
+  (V32DF "rJ,rJ,rJ,k08,k08,k08")
+  (V64DF "rJ,rJ,rJ,k08,k08,k08")
+  (V128DF "rJ,rJ,rJ,k08,k08,k08")
+  (V256DF "rJ,rJ,rJ,k08,k08,k08")
+  (V512DF "rJ,rJ,rJ,k08,k08,k08")
 ])
 
 (define_mode_attr stride_store_constraint [
-  (RVVM8QI "rJ,c01") (RVVM4QI "rJ,c01")
-  (RVVM2QI "rJ,c01") (RVVM1QI "rJ,c01")
-  (RVVMF2QI "rJ,c01") (RVVMF4QI "rJ,c01")
-  (RVVMF8QI "rJ,c01")
-
-  (RVVM8HI "rJ,c02") (RVVM4HI "rJ,c02")
-  (RVVM2HI "rJ,c02") (RVVM1HI "rJ,c02")
-  (RVVMF2HI "rJ,c02") (RVVMF4HI "rJ,c02")
-
-  (RVVM8BF "rJ,c02") (RVVM4BF "rJ,c02")
-  (RVVM2BF "rJ,c02") (RVVM1BF "rJ,c02")
-  (RVVMF2BF "rJ,c02") (RVVMF4BF "rJ,c02")
-
-  (RVVM8HF "rJ,c02") (RVVM4HF "rJ,c02")
-  (RVVM2HF "rJ,c02") (RVVM1HF "rJ,c02")
-  (RVVMF2HF "rJ,c02") (RVVMF4HF "rJ,c02")
-
-  (RVVM8SI "rJ,c04") (RVVM4SI "rJ,c04")
-  (RVVM2SI "rJ,c04") (RVVM1SI "rJ,c04")
-  (RVVMF2SI "rJ,c04")
-
-  (RVVM8SF "rJ,c04") (RVVM4SF "rJ,c04")
-  (RVVM2SF "rJ,c04") (RVVM1SF "rJ,c04")
-  (RVVMF2SF "rJ,c04")
-
-  (RVVM8DI "rJ,c08") (RVVM4DI "rJ,c08")
-  (RVVM2DI "rJ,c08") (RVVM1DI "rJ,c08")
-
-  (RVVM8DF "rJ,c08") (RVVM4DF "rJ,c08")
-  (RVVM2DF "rJ,c08") (RVVM1DF "rJ,c08")
+  (RVVM8QI "rJ,k01") (RVVM4QI "rJ,k01")
+  (RVVM2QI "rJ,k01") (RVVM1QI "rJ,k01")
+  (RVVMF2QI "rJ,k01") (RVVMF4QI "rJ,k01")
+  (RVVMF8QI "rJ,k01")
+
+  (RVVM8HI "rJ,k02") (RVVM4HI "rJ,k02")
+  (RVVM2HI "rJ,k02") (RVVM1HI "rJ,k02")
+  (RVVMF2HI "rJ,k02") (RVVMF4HI "rJ,k02")
+
+  (RVVM8BF "rJ,k02") (RVVM4BF "rJ,k02")
+  (RVVM2BF "rJ,k02") (RVVM1BF "rJ,k02")
+  (RVVMF2BF "rJ,k02") (RVVMF4BF "rJ,k02")
+
+  (RVVM8HF "rJ,k02") (RVVM4HF "rJ,k02")
+  (RVVM2HF "rJ,k02") (RVVM1HF "rJ,k02")
+  (RVVMF2HF "rJ,k02") (RVVMF4HF "rJ,k02")
+
+  (RVVM8SI "rJ,k04") (RVVM4SI "rJ,k04")
+  (RVVM2SI "rJ,k04") (RVVM1SI "rJ,k04")
+  (RVVMF2SI "rJ,k04")
+
+  (RVVM8SF "rJ,k04") (RVVM4SF "rJ,k04")
+  (RVVM2SF "rJ,k04") (RVVM1SF "rJ,k04")
+  (RVVMF2SF "rJ,k04")
+
+  (RVVM8DI "rJ,k08") (RVVM4DI "rJ,k08")
+  (RVVM2DI "rJ,k08") (RVVM1DI "rJ,k08")
+
+  (RVVM8DF "rJ,k08") (RVVM4DF "rJ,k08")
+  (RVVM2DF "rJ,k08") (RVVM1DF "rJ,k08")
+
+  (V1QI "rJ,k01")
+  (V2QI "rJ,k01")
+  (V4QI "rJ,k01")
+  (V8QI "rJ,k01")
+  (V16QI "rJ,k01")
+  (V32QI "rJ,k01")
+  (V64QI "rJ,k01")
+  (V128QI "rJ,k01")
+  (V256QI "rJ,k01")
+  (V512QI "rJ,k01")
+  (V1024QI "rJ,k01")
+  (V2048QI "rJ,k01")
+  (V4096QI "rJ,k01")
+  (V1HI "rJ,k02")
+  (V2HI "rJ,k02")
+  (V4HI "rJ,k02")
+  (V8HI "rJ,k02")
+  (V16HI "rJ,k02")
+  (V32HI "rJ,k02")
+  (V64HI "rJ,k02")
+  (V128HI "rJ,k02")
+  (V256HI "rJ,k02")
+  (V512HI "rJ,k02")
+  (V1024HI "rJ,k02")
+  (V2048HI "rJ,k02")
+  (V1SI "rJ,k04")
+  (V2SI "rJ,k04")
+  (V4SI "rJ,k04")
+  (V8SI "rJ,k04")
+  (V16SI "rJ,k04")
+  (V32SI "rJ,k04")
+  (V64SI "rJ,k04")
+  (V128SI "rJ,k04")
+  (V256SI "rJ,k04")
+  (V512SI "rJ,k04")
+  (V1024SI "rJ,k04")
+  (V1DI "rJ,k08")
+  (V2DI "rJ,k08")
+  (V4DI "rJ,k08")
+  (V8DI "rJ,k08")
+  (V16DI "rJ,k08")
+  (V32DI "rJ,k08")
+  (V64DI "rJ,k08")
+  (V128DI "rJ,k08")
+  (V256DI "rJ,k08")
+  (V512DI "rJ,k08")
+
+  (V1HF "rJ,k02")
+  (V2HF "rJ,k02")
+  (V4HF "rJ,k02")
+  (V8HF "rJ,k02")
+  (V16HF "rJ,k02")
+  (V32HF "rJ,k02")
+  (V64HF "rJ,k02")
+  (V128HF "rJ,k02")
+  (V256HF "rJ,k02")
+  (V512HF "rJ,k02")
+  (V1024HF "rJ,k02")
+  (V2048HF "rJ,k02")
+  (V1SF "rJ,k04")
+  (V2SF "rJ,k04")
+  (V4SF "rJ,k04")
+  (V8SF "rJ,k04")
+  (V16SF "rJ,k04")
+  (V32SF "rJ,k04")
+  (V64SF "rJ,k04")
+  (V128SF "rJ,k04")
+  (V256SF "rJ,k04")
+  (V512SF "rJ,k04")
+  (V1024SF "rJ,k04")
+  (V1DF "rJ,k08")
+  (V2DF "rJ,k08")
+  (V4DF "rJ,k08")
+  (V8DF "rJ,k08")
+  (V16DF "rJ,k08")
+  (V32DF "rJ,k08")
+  (V64DF "rJ,k08")
+  (V128DF "rJ,k08")
+  (V256DF "rJ,k08")
+  (V512DF "rJ,k08")
 ])
 
 (define_mode_attr gs_extension [
@@ -3658,6 +3962,8 @@
 
 (define_int_iterator VNCLIP [UNSPEC_VNCLIP UNSPEC_VNCLIPU])
 
+(define_int_iterator SF_VFNRCLIP [UNSPEC_SF_VFNRCLIP UNSPEC_SF_VFNRCLIPU])
+
 (define_int_iterator VSLIDES [UNSPEC_VSLIDEUP UNSPEC_VSLIDEDOWN])
 (define_int_iterator VSLIDES1 [UNSPEC_VSLIDE1UP UNSPEC_VSLIDE1DOWN])
 (define_int_iterator VFSLIDES1 [UNSPEC_VFSLIDE1UP UNSPEC_VFSLIDE1DOWN])
@@ -3682,11 +3988,14 @@
   (UNSPEC_ORDERED "o") (UNSPEC_UNORDERED "u")
   (UNSPEC_REDUC_SUM_ORDERED "o") (UNSPEC_REDUC_SUM_UNORDERED "u")
   (UNSPEC_WREDUC_SUM_ORDERED "o") (UNSPEC_WREDUC_SUM_UNORDERED "u")
+  (UNSPEC_REDUC_SUM_ORDERED_VL0_SAFE "o") (UNSPEC_REDUC_SUM_UNORDERED_VL0_SAFE "u")
+  (UNSPEC_WREDUC_SUM_ORDERED_VL0_SAFE "o") (UNSPEC_WREDUC_SUM_UNORDERED_VL0_SAFE "u")
 ])
 
 (define_int_attr v_su [(UNSPEC_VMULHS "") (UNSPEC_VMULHU "u") (UNSPEC_VMULHSU "su")
 		       (UNSPEC_VNCLIP "") (UNSPEC_VNCLIPU "u")
-		       (UNSPEC_VFCVT "") (UNSPEC_UNSIGNED_VFCVT "u")])
+		       (UNSPEC_VFCVT "") (UNSPEC_UNSIGNED_VFCVT "u")
+		       (UNSPEC_SF_VFNRCLIP "") (UNSPEC_SF_VFNRCLIPU "u")])
 (define_int_attr sat_op [(UNSPEC_VAADDU "aaddu") (UNSPEC_VAADD "aadd")
 			 (UNSPEC_VASUBU "asubu") (UNSPEC_VASUB "asub")
 			 (UNSPEC_VSMUL "smul") (UNSPEC_VSSRL "ssrl")
@@ -3732,6 +4041,15 @@
   smax umax smin umin mult div udiv mod umod
 ])
 
+(define_code_iterator any_int_binop_no_shift_v_vdup [
+  plus minus and ior xor mult div udiv mod umod smax umax smin umin us_plus
+  us_minus ss_plus ss_minus
+])
+
+(define_code_iterator any_int_binop_no_shift_vdup_v [
+  plus minus and ior xor mult smax umax smin umin us_plus ss_plus
+])
+
 (define_code_iterator any_int_unop [neg not])
 
 (define_code_iterator any_commutative_binop [plus and ior xor
@@ -4092,23 +4410,23 @@
   (V4096BI "riscv_vector::vls_mode_valid_p (V4096BImode) && TARGET_MIN_VLEN >= 4096")])
 
 (define_mode_iterator VSI [
-  RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32")
+  RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_VECTOR_ELEN_64")
 ])
 
 (define_mode_iterator VLMULX2_SI [
-  RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32")
+  RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_VECTOR_ELEN_64")
 ])
 
 (define_mode_iterator VLMULX4_SI [
-  RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32")
+  RVVM2SI RVVM1SI (RVVMF2SI "TARGET_VECTOR_ELEN_64")
 ])
 
 (define_mode_iterator VLMULX8_SI [
-  RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32")
+  RVVM1SI (RVVMF2SI "TARGET_VECTOR_ELEN_64")
 ])
 
 (define_mode_iterator VLMULX16_SI [
-  (RVVMF2SI "TARGET_MIN_VLEN > 32")
+  (RVVMF2SI "TARGET_VECTOR_ELEN_64")
 ])
 
 (define_mode_attr VSIX2 [
@@ -4126,3 +4444,489 @@
 (define_mode_attr VSIX16 [
   (RVVMF2SI "RVVM8SI")
 ])
+
+(define_mode_iterator VLS_HAS_HALF [
+  (V2QI "riscv_vector::vls_mode_valid_p (V2QImode)")
+  (V4QI "riscv_vector::vls_mode_valid_p (V4QImode)")
+  (V8QI "riscv_vector::vls_mode_valid_p (V8QImode)")
+  (V16QI "riscv_vector::vls_mode_valid_p (V16QImode)")
+  (V2HI "riscv_vector::vls_mode_valid_p (V2HImode)")
+  (V4HI "riscv_vector::vls_mode_valid_p (V4HImode)")
+  (V8HI "riscv_vector::vls_mode_valid_p (V8HImode)")
+  (V16HI "riscv_vector::vls_mode_valid_p (V16HImode)")
+  (V2SI "riscv_vector::vls_mode_valid_p (V2SImode)")
+  (V4SI "riscv_vector::vls_mode_valid_p (V4SImode)")
+  (V8SI "riscv_vector::vls_mode_valid_p (V8SImode)")
+  (V16SI "riscv_vector::vls_mode_valid_p (V16SImode) && TARGET_MIN_VLEN >= 64")
+  (V2DI "riscv_vector::vls_mode_valid_p (V2DImode) && TARGET_VECTOR_ELEN_64")
+  (V4DI "riscv_vector::vls_mode_valid_p (V4DImode) && TARGET_VECTOR_ELEN_64")
+  (V8DI "riscv_vector::vls_mode_valid_p (V8DImode) && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 64")
+  (V16DI "riscv_vector::vls_mode_valid_p (V16DImode) && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 128")
+  (V2SF "riscv_vector::vls_mode_valid_p (V2SFmode) && TARGET_VECTOR_ELEN_FP_32")
+  (V4SF "riscv_vector::vls_mode_valid_p (V4SFmode) && TARGET_VECTOR_ELEN_FP_32")
+  (V8SF "riscv_vector::vls_mode_valid_p (V8SFmode) && TARGET_VECTOR_ELEN_FP_32")
+  (V16SF "riscv_vector::vls_mode_valid_p (V16SFmode) && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 64")
+  (V2DF "riscv_vector::vls_mode_valid_p (V2DFmode) && TARGET_VECTOR_ELEN_FP_64")
+  (V4DF "riscv_vector::vls_mode_valid_p (V4DFmode) && TARGET_VECTOR_ELEN_FP_64")
+  (V8DF "riscv_vector::vls_mode_valid_p (V8DFmode) && TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 64")
+  (V16DF "riscv_vector::vls_mode_valid_p (V16DFmode) && TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 128")
+  (V32QI "riscv_vector::vls_mode_valid_p (V32QImode)")
+  (V64QI "riscv_vector::vls_mode_valid_p (V64QImode) && TARGET_MIN_VLEN >= 64")
+  (V128QI "riscv_vector::vls_mode_valid_p (V128QImode) && TARGET_MIN_VLEN >= 128")
+  (V256QI "riscv_vector::vls_mode_valid_p (V256QImode) && TARGET_MIN_VLEN >= 256")
+  (V512QI "riscv_vector::vls_mode_valid_p (V512QImode) && TARGET_MIN_VLEN >= 512")
+  (V1024QI "riscv_vector::vls_mode_valid_p (V1024QImode) && TARGET_MIN_VLEN >= 1024")
+  (V2048QI "riscv_vector::vls_mode_valid_p (V2048QImode) && TARGET_MIN_VLEN >= 2048")
+  (V4096QI "riscv_vector::vls_mode_valid_p (V4096QImode) && TARGET_MIN_VLEN >= 4096")
+  (V32HI "riscv_vector::vls_mode_valid_p (V32HImode) && TARGET_MIN_VLEN >= 64")
+  (V64HI "riscv_vector::vls_mode_valid_p (V64HImode) && TARGET_MIN_VLEN >= 128")
+  (V128HI "riscv_vector::vls_mode_valid_p (V128HImode) && TARGET_MIN_VLEN >= 256")
+  (V256HI "riscv_vector::vls_mode_valid_p (V256HImode) && TARGET_MIN_VLEN >= 512")
+  (V512HI "riscv_vector::vls_mode_valid_p (V512HImode) && TARGET_MIN_VLEN >= 1024")
+  (V1024HI "riscv_vector::vls_mode_valid_p (V1024HImode) && TARGET_MIN_VLEN >= 2048")
+  (V2048HI "riscv_vector::vls_mode_valid_p (V2048HImode) && TARGET_MIN_VLEN >= 4096")
+  (V32SI "riscv_vector::vls_mode_valid_p (V32SImode) && TARGET_MIN_VLEN >= 128")
+  (V64SI "riscv_vector::vls_mode_valid_p (V64SImode) && TARGET_MIN_VLEN >= 256")
+  (V128SI "riscv_vector::vls_mode_valid_p (V128SImode) && TARGET_MIN_VLEN >= 512")
+  (V256SI "riscv_vector::vls_mode_valid_p (V256SImode) && TARGET_MIN_VLEN >= 1024")
+  (V512SI "riscv_vector::vls_mode_valid_p (V512SImode) && TARGET_MIN_VLEN >= 2048")
+  (V1024SI "riscv_vector::vls_mode_valid_p (V1024SImode) && TARGET_MIN_VLEN >= 4096")
+  (V32DI "riscv_vector::vls_mode_valid_p (V32DImode) && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 256")
+  (V64DI "riscv_vector::vls_mode_valid_p (V64DImode) && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 512")
+  (V128DI "riscv_vector::vls_mode_valid_p (V128DImode) && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 1024")
+  (V256DI "riscv_vector::vls_mode_valid_p (V256DImode) && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 2048")
+  (V512DI "riscv_vector::vls_mode_valid_p (V512DImode) && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 4096")
+  (V32SF "riscv_vector::vls_mode_valid_p (V32SFmode) && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 128")
+  (V64SF "riscv_vector::vls_mode_valid_p (V64SFmode) && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 256")
+  (V128SF "riscv_vector::vls_mode_valid_p (V128SFmode) && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 512")
+  (V256SF "riscv_vector::vls_mode_valid_p (V256SFmode) && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 1024")
+  (V512SF "riscv_vector::vls_mode_valid_p (V512SFmode) && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 2048")
+  (V1024SF "riscv_vector::vls_mode_valid_p (V1024SFmode) && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 4096")
+  (V32DF "riscv_vector::vls_mode_valid_p (V32DFmode) && TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 256")
+  (V64DF "riscv_vector::vls_mode_valid_p (V64DFmode) && TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 512")
+  (V128DF "riscv_vector::vls_mode_valid_p (V128DFmode) && TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 1024")
+  (V256DF "riscv_vector::vls_mode_valid_p (V256DFmode) && TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 2048")
+  (V512DF "riscv_vector::vls_mode_valid_p (V512DFmode) && TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 4096")
+])
+
+(define_mode_attr VLS_HALF [
+  (V2QI "V1QI")
+  (V4QI "V2QI")
+  (V8QI "V4QI")
+  (V16QI "V8QI")
+  (V32QI "V16QI")
+  (V64QI "V32QI")
+  (V128QI "V64QI")
+  (V256QI "V128QI")
+  (V512QI "V256QI")
+  (V1024QI "V512QI")
+  (V2048QI "V1024QI")
+  (V4096QI "V2048QI")
+
+  (V2HI "V1HI")
+  (V4HI "V2HI")
+  (V8HI "V4HI")
+  (V16HI "V8HI")
+  (V32HI "V16HI")
+  (V64HI "V32HI")
+  (V128HI "V64HI")
+  (V256HI "V128HI")
+  (V512HI "V256HI")
+  (V1024HI "V512HI")
+  (V2048HI "V1024HI")
+
+  (V2SI "V1SI")
+  (V4SI "V2SI")
+  (V8SI "V4SI")
+  (V16SI "V8SI")
+  (V32SI "V16SI")
+  (V64SI "V32SI")
+  (V128SI "V64SI")
+  (V256SI "V128SI")
+  (V512SI "V256SI")
+  (V1024SI "V512SI")
+
+  (V2DI "V1DI")
+  (V4DI "V2DI")
+  (V8DI "V4DI")
+  (V16DI "V8DI")
+  (V32DI "V16DI")
+  (V64DI "V32DI")
+  (V128DI "V64DI")
+  (V256DI "V128DI")
+  (V512DI "V256DI")
+
+  (V2SF "V1SF")
+  (V4SF "V2SF")
+  (V8SF "V4SF")
+  (V16SF "V8SF")
+  (V32SF "V16SF")
+  (V64SF "V32SF")
+  (V128SF "V64SF")
+  (V256SF "V128SF")
+  (V512SF "V256SF")
+  (V1024SF "V512SF")
+
+  (V2DF "V1DF")
+  (V4DF "V2DF")
+  (V8DF "V4DF")
+  (V16DF "V8DF")
+  (V32DF "V16DF")
+  (V64DF "V32DF")
+  (V128DF "V64DF")
+  (V256DF "V128DF")
+  (V512DF "V256DF")
+])
+
+(define_mode_attr vls_half [
+  (V2QI "v1qi")
+  (V4QI "v2qi")
+  (V8QI "v4qi")
+  (V16QI "v8qi")
+  (V32QI "v16qi")
+  (V64QI "v32qi")
+  (V128QI "v64qi")
+  (V256QI "v128qi")
+  (V512QI "v256qi")
+  (V1024QI "v512qi")
+  (V2048QI "v1024qi")
+  (V4096QI "v2048qi")
+
+  (V2HI "v1hi")
+  (V4HI "v2hi")
+  (V8HI "v4hi")
+  (V16HI "v8hi")
+  (V32HI "v16hi")
+  (V64HI "v32hi")
+  (V128HI "v64hi")
+  (V256HI "v128hi")
+  (V512HI "v256hi")
+  (V1024HI "v512hi")
+  (V2048HI "v1024hi")
+
+  (V2SI "v1si")
+  (V4SI "v2si")
+  (V8SI "v4si")
+  (V16SI "v8si")
+  (V32SI "v16si")
+  (V64SI "v32si")
+  (V128SI "v64si")
+  (V256SI "v128si")
+  (V512SI "v256si")
+  (V1024SI "v512si")
+
+  (V2DI "v1di")
+  (V4DI "v2di")
+  (V8DI "v4di")
+  (V16DI "v8di")
+  (V32DI "v16di")
+  (V64DI "v32di")
+  (V128DI "v64di")
+  (V256DI "v128di")
+  (V512DI "v256di")
+
+  (V2SF "v1sf")
+  (V4SF "v2sf")
+  (V8SF "v4sf")
+  (V16SF "v8sf")
+  (V32SF "v16sf")
+  (V64SF "v32sf")
+  (V128SF "v64sf")
+  (V256SF "v128sf")
+  (V512SF "v256sf")
+  (V1024SF "v512sf")
+
+  (V2DF "v1df")
+  (V4DF "v2df")
+  (V8DF "v4df")
+  (V16DF "v8df")
+  (V32DF "v16df")
+  (V64DF "v32df")
+  (V128DF "v64df")
+  (V256DF "v128df")
+  (V512DF "v256df")
+])
+
+(define_mode_iterator VLS_HAS_QUARTER [
+  (V4QI "riscv_vector::vls_mode_valid_p (V4QImode)")
+  (V8QI "riscv_vector::vls_mode_valid_p (V8QImode)")
+  (V16QI "riscv_vector::vls_mode_valid_p (V16QImode)")
+  (V4HI "riscv_vector::vls_mode_valid_p (V4HImode)")
+  (V8HI "riscv_vector::vls_mode_valid_p (V8HImode)")
+  (V16HI "riscv_vector::vls_mode_valid_p (V16HImode)")
+  (V4SI "riscv_vector::vls_mode_valid_p (V4SImode)")
+  (V8SI "riscv_vector::vls_mode_valid_p (V8SImode)")
+  (V16SI "riscv_vector::vls_mode_valid_p (V16SImode) && TARGET_MIN_VLEN >= 64")
+  (V4DI "riscv_vector::vls_mode_valid_p (V4DImode) && TARGET_VECTOR_ELEN_64")
+  (V8DI "riscv_vector::vls_mode_valid_p (V8DImode) && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 64")
+  (V16DI "riscv_vector::vls_mode_valid_p (V16DImode) && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 128")
+  (V4SF "riscv_vector::vls_mode_valid_p (V4SFmode) && TARGET_VECTOR_ELEN_FP_32")
+  (V8SF "riscv_vector::vls_mode_valid_p (V8SFmode) && TARGET_VECTOR_ELEN_FP_32")
+  (V16SF "riscv_vector::vls_mode_valid_p (V16SFmode) && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 64")
+  (V4DF "riscv_vector::vls_mode_valid_p (V4DFmode) && TARGET_VECTOR_ELEN_FP_64")
+  (V8DF "riscv_vector::vls_mode_valid_p (V8DFmode) && TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 64")
+  (V16DF "riscv_vector::vls_mode_valid_p (V16DFmode) && TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 128")
+  (V32QI "riscv_vector::vls_mode_valid_p (V32QImode)")
+  (V64QI "riscv_vector::vls_mode_valid_p (V64QImode) && TARGET_MIN_VLEN >= 64")
+  (V128QI "riscv_vector::vls_mode_valid_p (V128QImode) && TARGET_MIN_VLEN >= 128")
+  (V256QI "riscv_vector::vls_mode_valid_p (V256QImode) && TARGET_MIN_VLEN >= 256")
+  (V512QI "riscv_vector::vls_mode_valid_p (V512QImode) && TARGET_MIN_VLEN >= 512")
+  (V1024QI "riscv_vector::vls_mode_valid_p (V1024QImode) && TARGET_MIN_VLEN >= 1024")
+  (V2048QI "riscv_vector::vls_mode_valid_p (V2048QImode) && TARGET_MIN_VLEN >= 2048")
+  (V4096QI "riscv_vector::vls_mode_valid_p (V4096QImode) && TARGET_MIN_VLEN >= 4096")
+  (V32HI "riscv_vector::vls_mode_valid_p (V32HImode) && TARGET_MIN_VLEN >= 64")
+  (V64HI "riscv_vector::vls_mode_valid_p (V64HImode) && TARGET_MIN_VLEN >= 128")
+  (V128HI "riscv_vector::vls_mode_valid_p (V128HImode) && TARGET_MIN_VLEN >= 256")
+  (V256HI "riscv_vector::vls_mode_valid_p (V256HImode) && TARGET_MIN_VLEN >= 512")
+  (V512HI "riscv_vector::vls_mode_valid_p (V512HImode) && TARGET_MIN_VLEN >= 1024")
+  (V1024HI "riscv_vector::vls_mode_valid_p (V1024HImode) && TARGET_MIN_VLEN >= 2048")
+  (V2048HI "riscv_vector::vls_mode_valid_p (V2048HImode) && TARGET_MIN_VLEN >= 4096")
+  (V32SI "riscv_vector::vls_mode_valid_p (V32SImode) && TARGET_MIN_VLEN >= 128")
+  (V64SI "riscv_vector::vls_mode_valid_p (V64SImode) && TARGET_MIN_VLEN >= 256")
+  (V128SI "riscv_vector::vls_mode_valid_p (V128SImode) && TARGET_MIN_VLEN >= 512")
+  (V256SI "riscv_vector::vls_mode_valid_p (V256SImode) && TARGET_MIN_VLEN >= 1024")
+  (V512SI "riscv_vector::vls_mode_valid_p (V512SImode) && TARGET_MIN_VLEN >= 2048")
+  (V1024SI "riscv_vector::vls_mode_valid_p (V1024SImode) && TARGET_MIN_VLEN >= 4096")
+  (V32DI "riscv_vector::vls_mode_valid_p (V32DImode) && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 256")
+  (V64DI "riscv_vector::vls_mode_valid_p (V64DImode) && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 512")
+  (V128DI "riscv_vector::vls_mode_valid_p (V128DImode) && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 1024")
+  (V256DI "riscv_vector::vls_mode_valid_p (V256DImode) && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 2048")
+  (V512DI "riscv_vector::vls_mode_valid_p (V512DImode) && TARGET_VECTOR_ELEN_64 && TARGET_MIN_VLEN >= 4096")
+  (V32SF "riscv_vector::vls_mode_valid_p (V32SFmode) && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 128")
+  (V64SF "riscv_vector::vls_mode_valid_p (V64SFmode) && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 256")
+  (V128SF "riscv_vector::vls_mode_valid_p (V128SFmode) && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 512")
+  (V256SF "riscv_vector::vls_mode_valid_p (V256SFmode) && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 1024")
+  (V512SF "riscv_vector::vls_mode_valid_p (V512SFmode) && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 2048")
+  (V1024SF "riscv_vector::vls_mode_valid_p (V1024SFmode) && TARGET_VECTOR_ELEN_FP_32 && TARGET_MIN_VLEN >= 4096")
+  (V32DF "riscv_vector::vls_mode_valid_p (V32DFmode) && TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 256")
+  (V64DF "riscv_vector::vls_mode_valid_p (V64DFmode) && TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 512")
+  (V128DF "riscv_vector::vls_mode_valid_p (V128DFmode) && TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 1024")
+  (V256DF "riscv_vector::vls_mode_valid_p (V256DFmode) && TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 2048")
+  (V512DF "riscv_vector::vls_mode_valid_p (V512DFmode) && TARGET_VECTOR_ELEN_FP_64 && TARGET_MIN_VLEN >= 4096")
+])
+
+(define_mode_attr VLS_QUARTER [
+  (V4QI "V1QI")
+  (V8QI "V2QI")
+  (V16QI "V4QI")
+  (V32QI "V8QI")
+  (V64QI "V16QI")
+  (V128QI "V32QI")
+  (V256QI "V64QI")
+  (V512QI "V128QI")
+  (V1024QI "V256QI")
+  (V2048QI "V512QI")
+  (V4096QI "V1024QI")
+
+  (V4HI "V1HI")
+  (V8HI "V2HI")
+  (V16HI "V4HI")
+  (V32HI "V8HI")
+  (V64HI "V16HI")
+  (V128HI "V32HI")
+  (V256HI "V64HI")
+  (V512HI "V128HI")
+  (V1024HI "V256HI")
+  (V2048HI "V512HI")
+
+  (V4SI "V1SI")
+  (V8SI "V2SI")
+  (V16SI "V4SI")
+  (V32SI "V8SI")
+  (V64SI "V16SI")
+  (V128SI "V32SI")
+  (V256SI "V64SI")
+  (V512SI "V128SI")
+  (V1024SI "V256SI")
+
+  (V4DI "V1DI")
+  (V8DI "V2DI")
+  (V16DI "V4DI")
+  (V32DI "V8DI")
+  (V64DI "V16DI")
+  (V128DI "V32DI")
+  (V256DI "V64DI")
+  (V512DI "V128DI")
+
+  (V4SF "V1SF")
+  (V8SF "V2SF")
+  (V16SF "V4SF")
+  (V32SF "V8SF")
+  (V64SF "V16SF")
+  (V128SF "V32SF")
+  (V256SF "V64SF")
+  (V512SF "V128SF")
+  (V1024SF "V256SF")
+
+  (V4DF "V1DF")
+  (V8DF "V2DF")
+  (V16DF "V4DF")
+  (V32DF "V8DF")
+  (V64DF "V16DF")
+  (V128DF "V32DF")
+  (V256DF "V64DF")
+  (V512DF "V128DF")
+])
+
+(define_mode_attr vls_quarter [
+  (V4QI "v1qi")
+  (V8QI "v2qi")
+  (V16QI "v4qi")
+  (V32QI "v8qi")
+  (V64QI "v16qi")
+  (V128QI "v32qi")
+  (V256QI "v64qi")
+  (V512QI "v128qi")
+  (V1024QI "v256qi")
+  (V2048QI "v512qi")
+  (V4096QI "v1024qi")
+
+  (V4HI "v1hi")
+  (V8HI "v2hi")
+  (V16HI "v4hi")
+  (V32HI "v8hi")
+  (V64HI "v16hi")
+  (V128HI "v32hi")
+  (V256HI "v64hi")
+  (V512HI "v128hi")
+  (V1024HI "v256hi")
+  (V2048HI "v512hi")
+
+  (V4SI "v1si")
+  (V8SI "v2si")
+  (V16SI "v4si")
+  (V32SI "v8si")
+  (V64SI "v16si")
+  (V128SI "v32si")
+  (V256SI "v64si")
+  (V512SI "v128si")
+  (V1024SI "v256si")
+
+  (V4DI "v1di")
+  (V8DI "v2di")
+  (V16DI "v4di")
+  (V32DI "v8di")
+  (V64DI "v16di")
+  (V128DI "v32di")
+  (V256DI "v64di")
+  (V512DI "v128di")
+
+  (V4SF "v1sf")
+  (V8SF "v2sf")
+  (V16SF "v4sf")
+  (V32SF "v8sf")
+  (V64SF "v16sf")
+  (V128SF "v32sf")
+  (V256SF "v64sf")
+  (V512SF "v128sf")
+  (V1024SF "v256sf")
+
+  (V4DF "v1df")
+  (V8DF "v2df")
+  (V16DF "v4df")
+  (V32DF "v8df")
+  (V64DF "v16df")
+  (V128DF "v32df")
+  (V256DF "v64df")
+  (V512DF "v128df")
+])
+
+(define_mode_iterator SF_VSI [
+  RVVM8SI RVVM4SI RVVM2SI RVVM1SI
+])
+
+(define_mode_attr SF_VQMACC_QOQ [
+  (RVVM8SI "RVVM4QI")
+  (RVVM4SI "RVVM2QI")
+  (RVVM2SI "RVVM1QI")
+  (RVVM1SI "RVVMF2QI")
+])
+
+(define_mode_attr sf_vqmacc_qoq [
+  (RVVM8SI "rvvm4qi")
+  (RVVM4SI "rvvm2qi")
+  (RVVM2SI "rvvm1qi")
+  (RVVM1SI "rvvmf2qi")
+])
+
+(define_mode_attr SF_VQMACC_DOD [
+  (RVVM8SI "RVVM8QI")
+  (RVVM4SI "RVVM4QI")
+  (RVVM2SI "RVVM2QI")
+  (RVVM1SI "RVVM1QI")
+])
+
+(define_mode_attr sf_vqmacc_dod [
+  (RVVM8SI "rvvm8qi")
+  (RVVM4SI "rvvm4qi")
+  (RVVM2SI "rvvm2qi")
+  (RVVM1SI "rvvm1qi")
+])
+
+(define_mode_iterator SF_XF [
+  RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_VECTOR_ELEN_64")
+])
+
+
+(define_mode_attr SF_XFQF [
+  (RVVMF8QI "RVVMF2SF")
+  (RVVMF4QI "RVVM1SF")
+  (RVVMF2QI "RVVM2SF")
+  (RVVM1QI  "RVVM4SF")
+  (RVVM2QI  "RVVM8SF")
+])
+
+(define_mode_attr sf_xfqf [
+  (RVVMF8QI "rvvmf2sf")
+  (RVVMF4QI "rvvm1sf")
+  (RVVMF2QI "rvvm2sf")
+  (RVVM1QI  "rvvm4sf")
+  (RVVM2QI  "rvvm8sf")
+])
+
+
+(define_mode_iterator SF_VC_W [
+  RVVM4QI RVVM2QI RVVM1QI RVVMF2QI RVVMF4QI (RVVMF8QI "TARGET_MIN_VLEN > 32")
+  RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_MIN_VLEN > 32")
+  RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32")
+])
+
+(define_mode_attr SF_VW [
+  (RVVM4QI "RVVM8HI") (RVVM2QI "RVVM4HI") (RVVM1QI "RVVM2HI") (RVVMF2QI "RVVM1HI")
+  (RVVMF4QI "RVVMF2HI") (RVVMF8QI "RVVMF4HI")
+  (RVVM4HI "RVVM8SI") (RVVM2HI "RVVM4SI") (RVVM1HI "RVVM2SI") (RVVMF2HI "RVVM1SI")
+  (RVVMF4HI "RVVMF2SI")
+  (RVVM4SI "RVVM8DI") (RVVM2SI "RVVM4DI") (RVVM1SI "RVVM2DI") (RVVMF2SI "RVVM1DI")
+])
+
+(define_mode_attr sf_vw [
+  (RVVM4QI "rvvm8hi") (RVVM2QI "rvvm4hi") (RVVM1QI "rvvm2hi") (RVVMF2QI "rvvm1hi")
+  (RVVMF4QI "rvvmf2hi") (RVVMF8QI "rvvmf4hi")
+  (RVVM4HI "rvvm8si") (RVVM2HI "rvvm4si") (RVVM1HI "rvvm2si") (RVVMF2HI "rvvm1si")
+  (RVVMF4HI "rvvmf2si")
+  (RVVM4SI "rvvm8di") (RVVM2SI "rvvm4di") (RVVM1SI "rvvm2di") (RVVMF2SI "rvvm1di")
+])
+
+(define_mode_iterator SF_FV [
+  RVVM8HI RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_MIN_VLEN > 32")
+  RVVM8SI RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32")
+  (RVVM8DI "TARGET_VECTOR_ELEN_64") (RVVM4DI "TARGET_VECTOR_ELEN_64")
+  (RVVM2DI "TARGET_VECTOR_ELEN_64") (RVVM1DI "TARGET_VECTOR_ELEN_64")
+])
+
+
+(define_mode_iterator SF_VC_FW [
+  RVVM4HI RVVM2HI RVVM1HI RVVMF2HI (RVVMF4HI "TARGET_MIN_VLEN > 32")
+  RVVM4SI RVVM2SI RVVM1SI (RVVMF2SI "TARGET_MIN_VLEN > 32")
+])
+
+(define_mode_attr SF_XF [
+  (RVVM8HI "HF") (RVVM4HI "HF") (RVVM2HI "HF") (RVVM1HI "HF") (RVVMF2HI "HF") (RVVMF4HI "HF")
+  (RVVM8SI "SF") (RVVM4SI "SF") (RVVM2SI "SF") (RVVM1SI "SF") (RVVMF2SI "SF")
+  (RVVM8DI "DF") (RVVM4DI "DF") (RVVM2DI "DF") (RVVM1DI "DF")
+])
+
+(define_mode_attr SF_XFW [
+  (RVVM4HI "HF") (RVVM2HI "HF") (RVVM1HI "HF") (RVVMF2HI "HF") (RVVMF4HI "HF")
+  (RVVM4SI "SF") (RVVM2SI "SF") (RVVM1SI "SF") (RVVMF2SI "SF")
+])
diff --git a/gcc/config/riscv/vector.md b/gcc/config/riscv/vector.md
index 6667193..c5b23b3 100644
--- a/gcc/config/riscv/vector.md
+++ b/gcc/config/riscv/vector.md
@@ -1,5 +1,5 @@
 ;; Machine description for RISC-V 'V' Extension for GNU compiler.
-;; Copyright (C) 2022-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2022-2025 Free Software Foundation, Inc.
 ;; Contributed by Juzhe Zhong (juzhe.zhong@rivai.ai), RiVAI Technologies Ltd.
 
 ;; This file is part of GCC.
@@ -42,7 +42,8 @@
   (cond [(eq_attr "type" "vlde,vste,vldm,vstm,vlds,vsts,\
 			  vldux,vldox,vstux,vstox,vldff,\
 			  vialu,viwalu,vext,vicalu,vshift,vnshift,vicmp,viminmax,\
-			  vimul,vidiv,viwmul,vimuladd,viwmuladd,vimerge,vimov,\
+			  vimul,vidiv,viwmul,vimuladd,viwmuladd,vimerge,
+			  vmov,vimov,\
 			  vsalu,vaalu,vsmul,vsshift,vnclip,\
 			  vfalu,vfwalu,vfmul,vfdiv,vfwmul,vfmuladd,vfwmuladd,vfsqrt,vfrecp,\
 			  vfcmp,vfminmax,vfsgnj,vfclass,vfmerge,vfmov,\
@@ -55,7 +56,7 @@
 			  vssegtux,vssegtox,vlsegdff,vandn,vbrev,vbrev8,vrev8,vcpop,vclz,vctz,vrol,\
 			  vror,vwsll,vclmul,vclmulh,vghsh,vgmul,vaesef,vaesem,vaesdf,vaesdm,\
 			  vaeskf1,vaeskf2,vaesz,vsha2ms,vsha2ch,vsha2cl,vsm4k,vsm4r,vsm3me,vsm3c,\
-			  vfncvtbf16,vfwcvtbf16,vfwmaccbf16")
+			  vfncvtbf16,vfwcvtbf16,vfwmaccbf16,sf_vqmacc,sf_vfnrclip,sf_vc,sf_vc_se")
 	 (const_string "true")]
 	(const_string "false")))
 
@@ -816,7 +817,7 @@
 				vfcmp,vfminmax,vfsgnj,vfclass,vfmerge,vfmov,\
 				vfcvtitof,vfncvtitof,vfncvtftoi,vfncvtftof,vmalu,vmiota,vmidx,\
 				vimovxv,vfmovfv,vslideup,vslidedown,vislide1up,vislide1down,vfslide1up,vfslide1down,\
-				vgather,vcompress,vmov,vnclip,vnshift,vandn,vcpop,vclz,vctz")
+				vgather,vcompress,vmov,vnclip,vnshift,vandn,vcpop,vclz,vctz,vrol,vror")
 	       (const_int 0)
 
 	       (eq_attr "type" "vimovvx,vfmovvf")
@@ -892,7 +893,7 @@
 			  vfredo,vfwredu,vfwredo,vslideup,vslidedown,vislide1up,\
 			  vislide1down,vfslide1up,vfslide1down,vgather,viwmuladd,vfwmuladd,\
 			  vlsegds,vlsegdux,vlsegdox,vandn,vrol,vror,vwsll,vclmul,vclmulh,\
-			  vfwmaccbf16")
+			  vfwmaccbf16,sf_vqmacc,sf_vfnrclip")
 	   (symbol_ref "riscv_vector::get_ta(operands[6])")
 
 	 (eq_attr "type" "vimuladd,vfmuladd")
@@ -923,7 +924,7 @@
 			  vfwalu,vfwmul,vfsgnj,vfcmp,vslideup,vslidedown,\
 			  vislide1up,vislide1down,vfslide1up,vfslide1down,vgather,\
 			  viwmuladd,vfwmuladd,vlsegds,vlsegdux,vlsegdox,vandn,vrol,\
-                          vror,vwsll,vclmul,vclmulh,vfwmaccbf16")
+			  vror,vwsll,vclmul,vclmulh,vfwmaccbf16,sf_vqmacc,sf_vfnrclip")
 	   (symbol_ref "riscv_vector::get_ma(operands[7])")
 
 	 (eq_attr "type" "vimuladd,vfmuladd")
@@ -1114,19 +1115,6 @@
    (set_attr "mode" "SI")]
  )
 
-;; The volatile fsrmsi restore is used for the exit point for the
-;; dynamic mode switching. It will generate one volatile fsrm a5
-;; which won't be eliminated.
-(define_insn "fsrmsi_restore_volatile"
-  [(set (reg:SI FRM_REGNUM)
-	(unspec_volatile:SI [(match_operand:SI 0 "register_operand" "r")]
-			    UNSPECV_FRM_RESTORE_EXIT))]
-  "TARGET_VECTOR"
-  "fsrm\t%0"
-  [(set_attr "type" "wrfrm")
-   (set_attr "mode" "SI")]
-)
-
 ;; Read FRM
 (define_insn "frrmsi"
   [(set (match_operand:SI 0 "register_operand" "=r")
@@ -1175,7 +1163,7 @@
        - We can not leave it to TARGET_SECONDARY_RELOAD since it happens
 	 before spilling. The clobber scratch is used by spilling fractional
 	 registers in IRA/LRA so it's too early.  */
-  if (TARGET_XTHEADVECTOR)
+  if (TARGET_XTHEADVECTOR && reg_or_mem_operand (operands[1], <MODE>mode))
     {
       emit_insn (gen_pred_th_whole_mov (<MODE>mode, operands[0], operands[1],
 					RVV_VLMAX, GEN_INT(riscv_vector::VLMAX)));
@@ -1214,21 +1202,58 @@
 ;; which is not the pattern we want.
 ;; According the facts above, we make "*mov<mode>_whole" includes load/store/move for whole
 ;; vector modes according to '-march' and "*mov<mode>_fract" only include fractional vector modes.
-(define_insn "*mov<mode>_whole"
+(define_insn_and_split "*mov<mode>_whole"
   [(set (match_operand:V_WHOLE 0 "reg_or_mem_operand" "=vr, m,vr")
 	(match_operand:V_WHOLE 1 "reg_or_mem_operand" "  m,vr,vr"))]
   "TARGET_VECTOR && !TARGET_XTHEADVECTOR"
   "@
    vl%m1re<sew>.v\t%0,%1
    vs%m1r.v\t%1,%0
-   vmv%m1r.v\t%0,%1"
+   #"
+  "&& !memory_operand (operands[0], <MODE>mode)
+   && !memory_operand (operands[1], <MODE>mode)"
+  [(parallel [(set (match_dup 0) (match_dup 1))
+	      (use (reg:SI VTYPE_REGNUM))])]
+  ""
   [(set_attr "type" "vldr,vstr,vmov")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*mov<mode>_fract"
+;; Full-register moves like vmv1r.v require a valid vtype.
+;; The ABI does not guarantee that the vtype is valid after a function
+;; call so we need to make it dependent on the vtype and have
+;; the vsetvl pass insert a vsetvl if necessary.
+;; To facilitate optimization we keep the reg-reg move patterns "regular"
+;; until split time and only then switch to a pattern like below that
+;; uses the vtype register.
+;; As the use of these patterns is limited (in the general context)
+;; there is no need for helper functions and we can just create the RTX
+;; directly.
+(define_insn "*mov<mode>_reg_whole_vtype"
+  [(set (match_operand:V_WHOLE 0 "reg_or_mem_operand" "=vr")
+	(match_operand:V_WHOLE 1 "reg_or_mem_operand" " vr"))
+   (use (reg:SI VTYPE_REGNUM))]
+  "TARGET_VECTOR && !TARGET_XTHEADVECTOR"
+  "vmv%m1r.v\t%0,%1"
+  [(set_attr "type" "vmov")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn_and_split "*mov<mode>_fract"
   [(set (match_operand:V_FRACT 0 "register_operand" "=vr")
 	(match_operand:V_FRACT 1 "register_operand" " vr"))]
   "TARGET_VECTOR"
+  "#"
+  "&& 1"
+  [(parallel [(set (match_dup 0) (match_dup 1))
+	      (use (reg:SI VTYPE_REGNUM))])]
+  ""
+  [(set_attr "type" "vmov")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*mov<mode>_fract_vtype"
+  [(set (match_operand:V_FRACT 0 "register_operand" "=vr")
+	(match_operand:V_FRACT 1 "register_operand" " vr"))
+   (use (reg:SI VTYPE_REGNUM))]
+  "TARGET_VECTOR"
   "vmv1r.v\t%0,%1"
   [(set_attr "type" "vmov")
    (set_attr "mode" "<MODE>")])
@@ -1238,7 +1263,7 @@
 	(match_operand:VB 1 "general_operand"))]
   "TARGET_VECTOR"
 {
-  if (TARGET_XTHEADVECTOR)
+  if (TARGET_XTHEADVECTOR && reg_or_mem_operand (operands[1], <MODE>mode))
     {
       emit_insn (gen_pred_th_whole_mov (<MODE>mode, operands[0], operands[1],
 					RVV_VLMAX, GEN_INT(riscv_vector::VLMAX)));
@@ -1249,10 +1274,23 @@
     DONE;
 })
 
-(define_insn "*mov<mode>"
+(define_insn_and_split "*mov<mode>"
   [(set (match_operand:VB 0 "register_operand" "=vr")
 	(match_operand:VB 1 "register_operand" " vr"))]
   "TARGET_VECTOR && !TARGET_XTHEADVECTOR"
+  "#"
+  "&& 1"
+  [(parallel [(set (match_dup 0) (match_dup 1))
+	      (use (reg:SI VTYPE_REGNUM))])]
+  ""
+  [(set_attr "type" "vmov")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*mov<mode>_vtype"
+  [(set (match_operand:VB 0 "register_operand" "=vr")
+	(match_operand:VB 1 "register_operand" " vr"))
+   (use (reg:SI VTYPE_REGNUM))]
+  "TARGET_VECTOR && !TARGET_XTHEADVECTOR"
   "vmv1r.v\t%0,%1"
   [(set_attr "type" "vmov")
    (set_attr "mode" "<MODE>")])
@@ -1395,7 +1433,10 @@
     gcc_assert (ok_p);
     DONE;
   }
-  [(set_attr "type" "vmov")]
+  [(set_attr "type" "vlde,vste,vmov")
+   (set_attr "mode" "<MODE>")
+   (set (attr "avl_type_idx") (const_int INVALID_ATTRIBUTE))
+   (set (attr "mode_idx") (const_int INVALID_ATTRIBUTE))]
 )
 
 (define_expand "mov<mode>"
@@ -1442,21 +1483,50 @@
     }
   DONE;
 }
-  [(set_attr "type" "vmov")]
+  [(set_attr "type" "vlde,vste,vmov")
+   (set_attr "mode" "<VLS_AVL_REG:MODE>")
+   (set (attr "avl_type_idx") (const_int INVALID_ATTRIBUTE))
+   (set (attr "mode_idx") (const_int INVALID_ATTRIBUTE))]
 )
 
-(define_insn "*mov<mode>_vls"
+(define_insn_and_split "*mov<mode>_vls"
   [(set (match_operand:VLS 0 "register_operand" "=vr")
 	(match_operand:VLS 1 "register_operand" " vr"))]
   "TARGET_VECTOR"
+  "#"
+  "&& 1"
+  [(parallel [(set (match_dup 0) (match_dup 1))
+	      (use (reg:SI VTYPE_REGNUM))])]
+  ""
+  [(set_attr "type" "vmov")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*mov<mode>_vls_vtype"
+  [(set (match_operand:VLS 0 "register_operand" "=vr")
+	(match_operand:VLS 1 "register_operand" " vr"))
+   (use (reg:SI VTYPE_REGNUM))]
+  "TARGET_VECTOR"
   "vmv%m1r.v\t%0,%1"
   [(set_attr "type" "vmov")
    (set_attr "mode" "<MODE>")])
 
-(define_insn "*mov<mode>_vls"
+(define_insn_and_split "*mov<mode>_vls"
   [(set (match_operand:VLSB 0 "register_operand" "=vr")
 	(match_operand:VLSB 1 "register_operand" " vr"))]
   "TARGET_VECTOR"
+  "#"
+  "&& 1"
+  [(parallel [(set (match_dup 0) (match_dup 1))
+	      (use (reg:SI VTYPE_REGNUM))])]
+  ""
+  [(set_attr "type" "vmov")
+   (set_attr "mode" "<MODE>")])
+
+(define_insn "*mov<mode>_vls_vtype"
+  [(set (match_operand:VLSB 0 "register_operand" "=vr")
+	(match_operand:VLSB 1 "register_operand" " vr"))
+   (use (reg:SI VTYPE_REGNUM))]
+  "TARGET_VECTOR"
   "vmv1r.v\t%0,%1"
   [(set_attr "type" "vmov")
    (set_attr "mode" "<MODE>")])
@@ -1510,8 +1580,22 @@
   "&& 1"
   [(const_int 0)]
   {
-    riscv_vector::emit_vlmax_insn (code_for_pred_broadcast (<MODE>mode),
-                                   riscv_vector::UNARY_OP, operands);
+    if (!strided_load_broadcast_p ()
+	&& TARGET_ZVFHMIN && !TARGET_ZVFH && <VEL>mode == HFmode)
+      {
+	/* For Float16, reinterpret as HImode, broadcast and reinterpret
+	   back.  */
+	poly_uint64 nunits = GET_MODE_NUNITS (<MODE>mode);
+	machine_mode vmodehi
+	  = riscv_vector::get_vector_mode (HImode, nunits).require ();
+	rtx ops[] = {lowpart_subreg (vmodehi, operands[0], <MODE>mode),
+		     lowpart_subreg (HImode, operands[1], HFmode)};
+	riscv_vector::emit_vlmax_insn (code_for_pred_broadcast (vmodehi),
+				       riscv_vector::UNARY_OP, ops);
+      }
+    else
+      riscv_vector::emit_vlmax_insn (code_for_pred_broadcast (<MODE>mode),
+				     riscv_vector::UNARY_OP, operands);
     DONE;
   }
   [(set_attr "type" "vector")]
@@ -1624,7 +1708,7 @@
 
 (define_insn "@vsetvl<mode>"
   [(set (match_operand:P 0 "register_operand" "=r")
-	(unspec:P [(match_operand:P 1 "vector_length_operand" "rK")
+	(unspec:P [(match_operand:P 1 "vector_length_operand" "rvl")
 		   (match_operand 2 "const_int_operand" "i")
 		   (match_operand 3 "const_int_operand" "i")
 		   (match_operand 4 "const_int_operand" "i")
@@ -1670,7 +1754,7 @@
 ;; in vsetvl instruction pattern.
 (define_insn "@vsetvl_discard_result<mode>"
   [(set (reg:SI VL_REGNUM)
-	(unspec:SI [(match_operand:P 0 "vector_length_operand" "rK")
+	(unspec:SI [(match_operand:P 0 "vector_length_operand" "rvl")
 		    (match_operand 1 "const_int_operand" "i")
 		    (match_operand 2 "const_int_operand" "i")] UNSPEC_VSETVL))
    (set (reg:SI VTYPE_REGNUM)
@@ -1692,7 +1776,7 @@
 ;; such pattern can allow us gain benefits of these optimizations.
 (define_insn_and_split "@vsetvl<mode>_no_side_effects"
   [(set (match_operand:P 0 "register_operand" "=r")
-	(unspec:P [(match_operand:P 1 "vector_length_operand" "rK")
+	(unspec:P [(match_operand:P 1 "vector_length_operand" "rvl")
 		   (match_operand 2 "const_int_operand" "i")
 		   (match_operand 3 "const_int_operand" "i")
 		   (match_operand 4 "const_int_operand" "i")
@@ -1713,7 +1797,7 @@
   [(set_attr "type" "vsetvl")
    (set_attr "mode" "SI")])
 
-;; This pattern use to combine bellow two insns and then further remove
+;; This pattern use to combine below two insns and then further remove
 ;; unnecessary sign_extend operations:
 ;;   (set (reg:DI 134 [ _1 ])
 ;;        (unspec:DI [
@@ -1817,7 +1901,7 @@
     (if_then_else:V_VLS
       (unspec:<VM>
         [(match_operand:<VM> 1 "vector_mask_operand"           "vmWc1,   Wc1,    vm, vmWc1,   Wc1,   Wc1")
-         (match_operand 4 "vector_length_operand"              "   rK,    rK,    rK,    rK,    rK,    rK")
+         (match_operand 4 "vector_length_operand"              "  rvl,   rvl,   rvl,   rvl,   rvl,   rvl")
          (match_operand 5 "const_int_operand"                  "    i,     i,     i,     i,     i,     i")
          (match_operand 6 "const_int_operand"                  "    i,     i,     i,     i,     i,     i")
          (match_operand 7 "const_int_operand"                  "    i,     i,     i,     i,     i,     i")
@@ -1848,7 +1932,7 @@
 	(if_then_else:V
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1")
-	     (match_operand 3 "vector_length_operand"    "   rK")
+	     (match_operand 3 "vector_length_operand"    "  rvl")
 	     (match_operand 4 "const_int_operand"        "    i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
@@ -1871,7 +1955,7 @@
 	(if_then_else:VB_VLS
 	  (unspec:VB_VLS
 	    [(match_operand:VB_VLS 1 "vector_all_trues_mask_operand" "Wc1, Wc1, Wc1, Wc1, Wc1")
-	     (match_operand 4 "vector_length_operand"            " rK,  rK,  rK,  rK,  rK")
+	     (match_operand 4 "vector_length_operand"            "rvl, rvl, rvl, rvl, rvl")
 	     (match_operand 5 "const_int_operand"                "  i,   i,   i,   i,   i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
@@ -1897,7 +1981,7 @@
 	(if_then_else:VB
 	  (unspec:VB
 	    [(match_operand:VB 1 "vector_all_trues_mask_operand" "Wc1")
-	     (match_operand 3 "vector_length_operand"            " rK")
+	     (match_operand 3 "vector_length_operand"            "rvl")
 	     (match_operand 4 "const_int_operand"                "  i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
@@ -1914,7 +1998,7 @@
   [(set (match_operand:V_VLS 0 "register_operand"        "=vd,vd,vd,vd")
     (if_then_else:V_VLS
       (unspec:<VM>
-        [(match_operand 5 "vector_length_operand"    " rK,rK,rK,rK")
+        [(match_operand 5 "vector_length_operand"    "rvl,rvl,rvl,rvl")
          (match_operand 6 "const_int_operand"        "  i, i, i, i")
          (match_operand 7 "const_int_operand"        "  i, i, i, i")
          (reg:SI VL_REGNUM)
@@ -1933,7 +2017,7 @@
   [(set (match_operand:V_VLSI_QHS 0 "register_operand"   "=vd,vd")
     (if_then_else:V_VLSI_QHS
       (unspec:<VM>
-        [(match_operand 5 "vector_length_operand"    " rK,rK")
+        [(match_operand 5 "vector_length_operand"    "rvl,rvl")
          (match_operand 6 "const_int_operand"        "  i, i")
          (match_operand 7 "const_int_operand"        "  i, i")
          (reg:SI VL_REGNUM)
@@ -1985,7 +2069,7 @@
   [(set (match_operand:V_VLSI_D 0 "register_operand"     "=vd,vd")
     (if_then_else:V_VLSI_D
       (unspec:<VM>
-        [(match_operand 5 "vector_length_operand"    " rK,rK")
+        [(match_operand 5 "vector_length_operand"    "rvl,rvl")
          (match_operand 6 "const_int_operand"        "  i, i")
          (match_operand 7 "const_int_operand"        "  i, i")
          (reg:SI VL_REGNUM)
@@ -2005,7 +2089,7 @@
   [(set (match_operand:V_VLSI_D 0 "register_operand"         "=vd,vd")
     (if_then_else:V_VLSI_D
       (unspec:<VM>
-        [(match_operand 5 "vector_length_operand"        " rK,rK")
+        [(match_operand 5 "vector_length_operand"        "rvl,rvl")
          (match_operand 6 "const_int_operand"            "  i, i")
          (match_operand 7 "const_int_operand"            "  i, i")
          (reg:SI VL_REGNUM)
@@ -2052,18 +2136,34 @@
 	     (match_operand 7 "const_int_operand")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
-	  (vec_duplicate:V_VLS
-	    (match_operand:<VEL> 3 "direct_broadcast_operand"))
+	  ;; (vec_duplicate:V_VLS ;; wrapper activated by wrap_vec_dup below.
+	  (match_operand:<VEL> 3 "direct_broadcast_operand") ;; )
 	  (match_operand:V_VLS 2 "vector_merge_operand")))]
   "TARGET_VECTOR"
 {
   /* Transform vmv.v.x/vfmv.v.f (avl = 1) into vmv.s.x since vmv.s.x/vfmv.s.f
      has better chances to do vsetvl fusion in vsetvl pass.  */
+  bool wrap_vec_dup = true;
+  rtx vec_cst = NULL_RTX;
   if (riscv_vector::splat_to_scalar_move_p (operands))
     {
       operands[1] = riscv_vector::gen_scalar_move_mask (<VM>mode);
       operands[3] = force_reg (<VEL>mode, operands[3]);
     }
+  else if (immediate_operand (operands[3], <VEL>mode)
+	   && (vec_cst = gen_const_vec_duplicate (<MODE>mode, operands[3]))
+	   && (/* -> pred_broadcast<mode>_zero */
+	       (vector_least_significant_set_mask_operand (operands[1],
+							   <VM>mode)
+		&& vector_const_0_operand (vec_cst, <MODE>mode))
+	       || (/* pred_broadcast<mode>_imm */
+		   vector_all_trues_mask_operand (operands[1], <VM>mode)
+		   && vector_const_int_or_double_0_operand (vec_cst,
+							    <MODE>mode))))
+    {
+      operands[3] = vec_cst;
+      wrap_vec_dup = false;
+    }
   /* Handle vmv.s.x instruction (Wb1 mask) which has memory scalar.  */
   else if (satisfies_constraint_Wdm (operands[3]))
     {
@@ -2085,7 +2185,7 @@
 	}
     }
   else if (GET_MODE_BITSIZE (<VEL>mode) > GET_MODE_BITSIZE (Pmode)
-           && (immediate_operand (operands[3], Pmode)
+	   && (immediate_operand (operands[3], Pmode)
 	       || (CONST_POLY_INT_P (operands[3])
 	           && known_ge (rtx_to_poly_int64 (operands[3]), 0U)
 		   && known_le (rtx_to_poly_int64 (operands[3]), GET_MODE_SIZE (<MODE>mode)))))
@@ -2095,8 +2195,20 @@
       emit_move_insn (tmp, gen_int_mode (value, Pmode));
       operands[3] = gen_rtx_SIGN_EXTEND (<VEL>mode, tmp);
     }
+  /* Never load (const_int 0) into a register, that's silly.  */
+  else if (operands[3] == CONST0_RTX (<VEL>mode))
+    ;
+  /* If we're broadcasting [-16..15] across more than just
+     element 0, then we can use vmv.v.i directly, thus avoiding
+     the load of the constant into a GPR.  */
+  else if (CONST_INT_P (operands[3])
+	   && IN_RANGE (INTVAL (operands[3]), -16, 15)
+	   && !satisfies_constraint_Wb1 (operands[1]))
+    ;
   else
     operands[3] = force_reg (<VEL>mode, operands[3]);
+  if (wrap_vec_dup)
+    operands[3] = gen_rtx_VEC_DUPLICATE (<MODE>mode, operands[3]);
 })
 
 (define_insn_and_split "*pred_broadcast<mode>"
@@ -2104,34 +2216,29 @@
 	(if_then_else:V_VLSI
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_broadcast_mask_operand" "Wc1,Wc1, vm, vm,Wc1,Wc1,Wb1,Wb1")
-	     (match_operand 4 "vector_length_operand"              " rK, rK, rK, rK, rK, rK, rK, rK")
+	     (match_operand 4 "vector_length_operand"              "rvl,rvl,rvl,rvl,rvl,rvl,rvl,rvl")
 	     (match_operand 5 "const_int_operand"                  "  i,  i,  i,  i,  i,  i,  i,  i")
 	     (match_operand 6 "const_int_operand"                  "  i,  i,  i,  i,  i,  i,  i,  i")
 	     (match_operand 7 "const_int_operand"                  "  i,  i,  i,  i,  i,  i,  i,  i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (vec_duplicate:V_VLSI
-	    (match_operand:<VEL> 3 "direct_broadcast_operand"       " r,  r,Wdm,Wdm,Wdm,Wdm,  r,  r"))
-	  (match_operand:V_VLSI 2 "vector_merge_operand"            "vu,  0, vu,  0, vu,  0, vu,  0")))]
+	    (match_operand:<VEL> 3 "direct_broadcast_operand"       "rP,rP,Wdm,Wdm,Wdm,Wdm, rJ, rJ"))
+	  (match_operand:V_VLSI 2 "vector_merge_operand"            "vu, 0, vu,  0, vu,  0, vu,  0")))]
   "TARGET_VECTOR"
   "@
-   vmv.v.x\t%0,%3
-   vmv.v.x\t%0,%3
+   vmv.v.%o3\t%0,%3
+   vmv.v.%o3\t%0,%3
    vlse<sew>.v\t%0,%3,zero,%1.t
    vlse<sew>.v\t%0,%3,zero,%1.t
    vlse<sew>.v\t%0,%3,zero
    vlse<sew>.v\t%0,%3,zero
-   vmv.s.x\t%0,%3
-   vmv.s.x\t%0,%3"
+   vmv.s.x\t%0,%z3
+   vmv.s.x\t%0,%z3"
   "(register_operand (operands[3], <VEL>mode)
   || CONST_POLY_INT_P (operands[3]))
   && GET_MODE_BITSIZE (<VEL>mode) > GET_MODE_BITSIZE (Pmode)"
-  [(set (match_dup 0)
-	(if_then_else:V_VLSI (unspec:<VM> [(match_dup 1) (match_dup 4)
-	     (match_dup 5) (match_dup 6) (match_dup 7)
-	     (reg:SI VL_REGNUM) (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
-	  (vec_duplicate:V_VLSI (match_dup 3))
-	  (match_dup 2)))]
+  [(const_int 0)]
   {
     gcc_assert (can_create_pseudo_p ());
     if (CONST_POLY_INT_P (operands[3]))
@@ -2140,12 +2247,6 @@
 	emit_move_insn (tmp, operands[3]);
 	operands[3] = tmp;
       }
-    rtx m = assign_stack_local (<VEL>mode, GET_MODE_SIZE (<VEL>mode),
-				GET_MODE_ALIGNMENT (<VEL>mode));
-    m = validize_mem (m);
-    emit_move_insn (m, operands[3]);
-    m = gen_rtx_MEM (<VEL>mode, force_reg (Pmode, XEXP (m, 0)));
-    operands[3] = m;
 
     /* For SEW = 64 in RV32 system, we expand vmv.s.x:
        andi a2,a2,1
@@ -2156,6 +2257,35 @@
 	operands[4] = riscv_vector::gen_avl_for_scalar_move (operands[4]);
 	operands[1] = CONSTM1_RTX (<VM>mode);
       }
+
+    /* If the target doesn't want a strided-load broadcast we go with a regular
+       V1DImode load and a broadcast gather.  */
+    if (strided_load_broadcast_p ())
+      {
+	rtx mem = assign_stack_local (<VEL>mode, GET_MODE_SIZE (<VEL>mode),
+				      GET_MODE_ALIGNMENT (<VEL>mode));
+	mem = validize_mem (mem);
+	emit_move_insn (mem, operands[3]);
+	mem = gen_rtx_MEM (<VEL>mode, force_reg (Pmode, XEXP (mem, 0)));
+
+	emit_insn
+	  (gen_pred_broadcast<mode>
+	   (operands[0], operands[1], operands[2], mem,
+	    operands[4], operands[5], operands[6], operands[7]));
+      }
+    else
+      {
+	rtx tmp = gen_reg_rtx (V1DImode);
+	emit_move_insn (tmp, lowpart_subreg (V1DImode, operands[3],
+					     <VEL>mode));
+	tmp = lowpart_subreg (<MODE>mode, tmp, V1DImode);
+
+	emit_insn
+	  (gen_pred_gather<mode>_scalar
+	   (operands[0], operands[1], operands[2], tmp, CONST0_RTX (Pmode),
+	    operands[4], operands[5], operands[6], operands[7]));
+      }
+    DONE;
   }
   [(set_attr "type" "vimov,vimov,vlds,vlds,vlds,vlds,vimovxv,vimovxv")
    (set_attr "mode" "<MODE>")])
@@ -2165,7 +2295,7 @@
 	(if_then_else:V_VLSF
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_broadcast_mask_operand" "Wc1, Wc1, Wb1, Wb1")
-	     (match_operand      4 "vector_length_operand"         " rK,  rK,  rK,  rK")
+	     (match_operand      4 "vector_length_operand"         "rvl, rvl, rvl, rvl")
 	     (match_operand      5 "const_int_operand"             "  i,   i,   i,   i")
 	     (match_operand      6 "const_int_operand"             "  i,   i,   i,   i")
 	     (match_operand      7 "const_int_operand"             "  i,   i,   i,   i")
@@ -2188,16 +2318,16 @@
 	(if_then_else:V_VLSF_ZVFHMIN
 	  (unspec:<VM>
 	    [(match_operand:<VM>        1 "vector_broadcast_mask_operand" " vm,  vm, Wc1, Wc1")
-	     (match_operand             4 "vector_length_operand"         " rK,  rK,  rK,  rK")
+	     (match_operand             4 "vector_length_operand"         "rvl, rvl, rvl, rvl")
 	     (match_operand             5 "const_int_operand"             "  i,   i,   i,   i")
 	     (match_operand             6 "const_int_operand"             "  i,   i,   i,   i")
 	     (match_operand             7 "const_int_operand"             "  i,   i,   i,   i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (vec_duplicate:V_VLSF_ZVFHMIN
-	    (match_operand:<VEL>        3 "direct_broadcast_operand"      "Wdm, Wdm, Wdm, Wdm"))
+	    (match_operand:<VEL>        3 "direct_broadcast_operand"      "  A,   A,   A,   A"))
 	  (match_operand:V_VLSF_ZVFHMIN 2 "vector_merge_operand"          " vu,   0,  vu,   0")))]
-  "TARGET_VECTOR"
+  "TARGET_VECTOR && strided_load_broadcast_p ()"
   "@
    vlse<sew>.v\t%0,%3,zero,%1.t
    vlse<sew>.v\t%0,%3,zero,%1.t
@@ -2211,7 +2341,7 @@
 	(if_then_else:V_VLSI_D
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_broadcast_mask_operand" "Wc1,Wc1,Wb1,Wb1")
-	     (match_operand 4 "vector_length_operand"              " rK, rK, rK, rK")
+	     (match_operand 4 "vector_length_operand"              "rvl,rvl,rvl,rvl")
 	     (match_operand 5 "const_int_operand"                  "  i,  i,  i,  i")
 	     (match_operand 6 "const_int_operand"                  "  i,  i,  i,  i")
 	     (match_operand 7 "const_int_operand"                  "  i,  i,  i,  i")
@@ -2235,7 +2365,7 @@
     (if_then_else:V_VLS
       (unspec:<VM>
         [(match_operand:<VM> 1 "vector_least_significant_set_mask_operand" "Wb1,   Wb1")
-         (match_operand 4 "vector_length_operand"                          " rK,    rK")
+         (match_operand 4 "vector_length_operand"                          "rvl,   rvl")
          (match_operand 5 "const_int_operand"                              "  i,     i")
          (match_operand 6 "const_int_operand"                              "  i,     i")
          (match_operand 7 "const_int_operand"                              "  i,     i")
@@ -2255,7 +2385,7 @@
     (if_then_else:V_VLS
       (unspec:<VM>
         [(match_operand:<VM> 1 "vector_all_trues_mask_operand"      "  Wc1,   Wc1")
-         (match_operand 4 "vector_length_operand"                   "   rK,    rK")
+         (match_operand 4 "vector_length_operand"                   "  rvl,   rvl")
          (match_operand 5 "const_int_operand"                       "    i,     i")
          (match_operand 6 "const_int_operand"                       "    i,     i")
          (match_operand 7 "const_int_operand"                       "    i,     i")
@@ -2276,20 +2406,21 @@
 ;; -------------------------------------------------------------------------------
 
 (define_insn "@pred_strided_load<mode>"
-  [(set (match_operand:V 0 "register_operand"              "=vr,    vr,    vd,    vr,    vr,    vd")
-	(if_then_else:V
+  [(set (match_operand:V_VLS 0 "register_operand"              "=vr,    vr,    vd,    vr,    vr,    vd")
+	(if_then_else:V_VLS
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,   Wc1,    vm,    vmWc1,   Wc1,    vm")
-	     (match_operand 5 "vector_length_operand"    "   rK,    rK,    rK,       rK,    rK,    rK")
+	     (match_operand 5 "vector_length_operand"    "  rvl,   rvl,   rvl,      rvl,   rvl,   rvl")
 	     (match_operand 6 "const_int_operand"        "    i,     i,     i,        i,     i,     i")
 	     (match_operand 7 "const_int_operand"        "    i,     i,     i,        i,     i,     i")
 	     (match_operand 8 "const_int_operand"        "    i,     i,     i,        i,     i,     i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
-	  (unspec:V
-	    [(match_operand:V 3 "memory_operand"         "     m,     m,     m,    m,     m,     m")
-	     (match_operand 4 "<V:stride_predicate>"     "<V:stride_load_constraint>")] UNSPEC_STRIDED)
-	  (match_operand:V 2 "vector_merge_operand"      "     0,    vu,    vu,    0,    vu,    vu")))]
+	  (unspec:V_VLS
+	    [(match_operand:V_VLS 3 "memory_operand"         "     m,     m,     m,    m,     m,     m")
+	     (mem:BLK (scratch))
+	     (match_operand 4 "<V_VLS:stride_predicate>"     "<V_VLS:stride_load_constraint>")] UNSPEC_STRIDED)
+	  (match_operand:V_VLS 2 "vector_merge_operand"      "     0,    vu,    vu,    0,    vu,    vu")))]
   "TARGET_VECTOR"
   "@
   vlse<sew>.v\t%0,%3,%z4%p1
@@ -2302,18 +2433,17 @@
    (set_attr "mode" "<MODE>")])
 
 (define_insn "@pred_strided_store<mode>"
-  [(set (match_operand:V 0 "memory_operand"                 "+m,    m")
-	(if_then_else:V
-	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,    vmWc1")
-	     (match_operand 4 "vector_length_operand"    "   rK,       rK")
-	     (match_operand 5 "const_int_operand"        "    i,        i")
+  [(set (mem:BLK (scratch))
+	(unspec:BLK
+	  [(match_operand:V_VLS   0 "memory_operand"           "   +m,     m")
+	   (unspec:<VM>
+	    [(match_operand:<VM>  1 "vector_mask_operand"      "vmWc1, vmWc1")
+	     (match_operand       4 "vector_length_operand"    "  rvl,   rvl")
+	     (match_operand       5 "const_int_operand"        "    i,     i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
-	  (unspec:V
-	    [(match_operand 2 "<V:stride_predicate>"     "<V:stride_store_constraint>")
-	     (match_operand:V 3 "register_operand"       "   vr,       vr")] UNSPEC_STRIDED)
-	  (match_dup 0)))]
+	   (match_operand         2 "<V_VLS:stride_predicate>" "<V_VLS:stride_store_constraint>")
+	   (match_operand:V_VLS   3 "register_operand"         "   vr,    vr")] UNSPEC_STRIDED))]
   "TARGET_VECTOR"
   "@
   vsse<sew>.v\t%3,%0,%z2%p1
@@ -2335,7 +2465,7 @@
 	(if_then_else:VINDEXED
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"    " vm,Wc1,vm,Wc1")
-	     (match_operand 5 "vector_length_operand"       " rK, rK,rK, rK")
+	     (match_operand 5 "vector_length_operand"       "rvl,rvl,rvl,rvl")
 	     (match_operand 6 "const_int_operand"           "  i,  i, i,  i")
 	     (match_operand 7 "const_int_operand"           "  i,  i, i,  i")
 	     (match_operand 8 "const_int_operand"           "  i,  i, i,  i")
@@ -2357,7 +2487,7 @@
 	(if_then_else:VEEWEXT2
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"               "vmWc1,vmWc1")
-	     (match_operand 5 "vector_length_operand"                  "   rK,   rK")
+	     (match_operand 5 "vector_length_operand"                  "  rvl,  rvl")
 	     (match_operand 6 "const_int_operand"                      "    i,    i")
 	     (match_operand 7 "const_int_operand"                      "    i,    i")
 	     (match_operand 8 "const_int_operand"                      "    i,    i")
@@ -2378,7 +2508,7 @@
 	(if_then_else:VEEWEXT4
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"               "vmWc1,vmWc1")
-	     (match_operand 5 "vector_length_operand"                  "   rK,   rK")
+	     (match_operand 5 "vector_length_operand"                  "  rvl,  rvl")
 	     (match_operand 6 "const_int_operand"                      "    i,    i")
 	     (match_operand 7 "const_int_operand"                      "    i,    i")
 	     (match_operand 8 "const_int_operand"                      "    i,    i")
@@ -2399,7 +2529,7 @@
 	(if_then_else:VEEWEXT8
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"               "vmWc1,vmWc1")
-	     (match_operand 5 "vector_length_operand"                  "   rK,   rK")
+	     (match_operand 5 "vector_length_operand"                  "  rvl,  rvl")
 	     (match_operand 6 "const_int_operand"                      "    i,    i")
 	     (match_operand 7 "const_int_operand"                      "    i,    i")
 	     (match_operand 8 "const_int_operand"                      "    i,    i")
@@ -2421,7 +2551,7 @@
 	(if_then_else:VEEWTRUNC2
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"             " vm, vm,Wc1,Wc1,vmWc1,vmWc1")
-	     (match_operand 5 "vector_length_operand"                " rK, rK, rK, rK,   rK,   rK")
+	     (match_operand 5 "vector_length_operand"                "rvl,rvl,rvl,rvl,  rvl,  rvl")
 	     (match_operand 6 "const_int_operand"                    "  i,  i,  i,  i,    i,    i")
 	     (match_operand 7 "const_int_operand"                    "  i,  i,  i,  i,    i,    i")
 	     (match_operand 8 "const_int_operand"                    "  i,  i,  i,  i,    i,    i")
@@ -2442,7 +2572,7 @@
 	(if_then_else:VEEWTRUNC4
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"           " vm, vm,Wc1,Wc1,vmWc1,vmWc1")
-	     (match_operand 5 "vector_length_operand"              " rK, rK, rK, rK,   rK,   rK")
+	     (match_operand 5 "vector_length_operand"              "rvl,rvl,rvl,rvl,  rvl,  rvl")
 	     (match_operand 6 "const_int_operand"                  "  i,  i,  i,  i,    i,    i")
 	     (match_operand 7 "const_int_operand"                  "  i,  i,  i,  i,    i,    i")
 	     (match_operand 8 "const_int_operand"                  "  i,  i,  i,  i,    i,    i")
@@ -2463,7 +2593,7 @@
 	(if_then_else:VEEWTRUNC8
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"          " vm, vm,Wc1,Wc1,vmWc1,vmWc1")
-	     (match_operand 5 "vector_length_operand"             " rK, rK, rK, rK,   rK,   rK")
+	     (match_operand 5 "vector_length_operand"             "rvl,rvl,rvl,rvl,  rvl,  rvl")
 	     (match_operand 6 "const_int_operand"                 "  i,  i,  i,  i,    i,    i")
 	     (match_operand 7 "const_int_operand"                 "  i,  i,  i,  i,    i,    i")
 	     (match_operand 8 "const_int_operand"                 "  i,  i,  i,  i,    i,    i")
@@ -2484,7 +2614,7 @@
 	(unspec:BLK
 	  [(unspec:<VM>
 	    [(match_operand:<VM> 0 "vector_mask_operand" "vmWc1")
-	     (match_operand 4 "vector_length_operand"    "   rK")
+	     (match_operand 4 "vector_length_operand"    "  rvl")
 	     (match_operand 5 "const_int_operand"        "    i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
@@ -2501,7 +2631,7 @@
 	(unspec:BLK
 	  [(unspec:<VM>
 	    [(match_operand:<VM> 0 "vector_mask_operand" "vmWc1")
-	     (match_operand 4 "vector_length_operand"    "   rK")
+	     (match_operand 4 "vector_length_operand"    "  rvl")
 	     (match_operand 5 "const_int_operand"        "    i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
@@ -2518,7 +2648,7 @@
 	(unspec:BLK
 	  [(unspec:<VM>
 	    [(match_operand:<VM> 0 "vector_mask_operand" "vmWc1")
-	     (match_operand 4 "vector_length_operand"    "   rK")
+	     (match_operand 4 "vector_length_operand"    "  rvl")
 	     (match_operand 5 "const_int_operand"        "    i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
@@ -2535,7 +2665,7 @@
 	(unspec:BLK
 	  [(unspec:<VM>
 	    [(match_operand:<VM> 0 "vector_mask_operand" "vmWc1")
-	     (match_operand 4 "vector_length_operand"    "   rK")
+	     (match_operand 4 "vector_length_operand"    "  rvl")
 	     (match_operand 5 "const_int_operand"        "    i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
@@ -2552,7 +2682,7 @@
 	(unspec:BLK
 	  [(unspec:<VM>
 	    [(match_operand:<VM> 0 "vector_mask_operand" "vmWc1")
-	     (match_operand 4 "vector_length_operand"    "   rK")
+	     (match_operand 4 "vector_length_operand"    "  rvl")
 	     (match_operand 5 "const_int_operand"        "    i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
@@ -2569,7 +2699,7 @@
 	(unspec:BLK
 	  [(unspec:<VM>
 	    [(match_operand:<VM> 0 "vector_mask_operand" "vmWc1")
-	     (match_operand 4 "vector_length_operand"    "   rK")
+	     (match_operand 4 "vector_length_operand"    "  rvl")
 	     (match_operand 5 "const_int_operand"        "    i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
@@ -2586,7 +2716,7 @@
 	(unspec:BLK
 	  [(unspec:<VM>
 	    [(match_operand:<VM> 0 "vector_mask_operand" "vmWc1")
-	     (match_operand 4 "vector_length_operand"    "   rK")
+	     (match_operand 4 "vector_length_operand"    "  rvl")
 	     (match_operand 5 "const_int_operand"        "    i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
@@ -2616,7 +2746,7 @@
 	(if_then_else:V_VLSI
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1, Wc1, vm, vm,Wc1,Wc1, vm, vm,Wc1,Wc1")
-	     (match_operand 5 "vector_length_operand"    " rK, rK, rK,  rK, rK, rK, rK, rK, rK, rK, rK, rK")
+	     (match_operand 5 "vector_length_operand"    "rvl,rvl,rvl, rvl,rvl,rvl,rvl,rvl,rvl,rvl,rvl,rvl")
 	     (match_operand 6 "const_int_operand"        "  i,  i,  i,   i,  i,  i,  i,  i,  i,  i,  i,  i")
 	     (match_operand 7 "const_int_operand"        "  i,  i,  i,   i,  i,  i,  i,  i,  i,  i,  i,  i")
 	     (match_operand 8 "const_int_operand"        "  i,  i,  i,   i,  i,  i,  i,  i,  i,  i,  i,  i")
@@ -2652,7 +2782,7 @@
 	(if_then_else:V_VLSI
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"  "vm,vm,Wc1,Wc1,vm,vm,Wc1,Wc1")
-	     (match_operand 5 "vector_length_operand"     "rK,rK, rK, rK,rK,rK, rK, rK")
+	     (match_operand 5 "vector_length_operand"     "rvl,rvl,rvl,rvl,rvl,rvl,rvl,rvl")
 	     (match_operand 6 "const_int_operand"         " i, i,  i,  i, i, i,  i,  i")
 	     (match_operand 7 "const_int_operand"         " i, i,  i,  i, i, i,  i,  i")
 	     (match_operand 8 "const_int_operand"         " i, i,  i,  i, i, i,  i,  i")
@@ -2673,7 +2803,7 @@
 	(if_then_else:V_VLSI_QHS
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" "vm,vm,Wc1,Wc1")
-	     (match_operand 5 "vector_length_operand"    "rK,rK, rK, rK")
+	     (match_operand 5 "vector_length_operand"    "rvl,rvl,rvl,rvl")
 	     (match_operand 6 "const_int_operand"        " i, i,  i,  i")
 	     (match_operand 7 "const_int_operand"        " i, i,  i,  i")
 	     (match_operand 8 "const_int_operand"        " i, i,  i,  i")
@@ -2694,7 +2824,7 @@
 	(if_then_else:V_VLSI_QHS
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" "vm,vm,Wc1,Wc1")
-	     (match_operand 5 "vector_length_operand"    "rK,rK, rK, rK")
+	     (match_operand 5 "vector_length_operand"    "rvl,rvl,rvl,rvl")
 	     (match_operand 6 "const_int_operand"        " i, i,  i,  i")
 	     (match_operand 7 "const_int_operand"        " i, i,  i,  i")
 	     (match_operand 8 "const_int_operand"        " i, i,  i,  i")
@@ -2715,7 +2845,7 @@
 	(if_then_else:V_VLSI_QHS
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" "vm,vm,Wc1,Wc1")
-	     (match_operand 5 "vector_length_operand"    "rK,rK, rK, rK")
+	     (match_operand 5 "vector_length_operand"    "rvl,rvl,rvl,rvl")
 	     (match_operand 6 "const_int_operand"        " i, i,  i,  i")
 	     (match_operand 7 "const_int_operand"        " i, i,  i,  i")
 	     (match_operand 8 "const_int_operand"        " i, i,  i,  i")
@@ -2771,7 +2901,7 @@
 	(if_then_else:V_VLSI_D
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" "vm,vm,Wc1,Wc1")
-	     (match_operand 5 "vector_length_operand"    "rK,rK, rK, rK")
+	     (match_operand 5 "vector_length_operand"    "rvl,rvl,rvl,rvl")
 	     (match_operand 6 "const_int_operand"        " i, i,  i,  i")
 	     (match_operand 7 "const_int_operand"        " i, i,  i,  i")
 	     (match_operand 8 "const_int_operand"        " i, i,  i,  i")
@@ -2792,7 +2922,7 @@
 	(if_then_else:V_VLSI_D
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"     "vm,vm,Wc1,Wc1")
-	     (match_operand 5 "vector_length_operand"        "rK,rK, rK, rK")
+	     (match_operand 5 "vector_length_operand"        "rvl,rvl,rvl,rvl")
 	     (match_operand 6 "const_int_operand"            " i, i,  i,  i")
 	     (match_operand 7 "const_int_operand"            " i, i,  i,  i")
 	     (match_operand 8 "const_int_operand"            " i, i,  i,  i")
@@ -2847,7 +2977,7 @@
 	(if_then_else:V_VLSI_D
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" "vm,vm,Wc1,Wc1")
-	     (match_operand 5 "vector_length_operand"    "rK,rK, rK, rK")
+	     (match_operand 5 "vector_length_operand"    "rvl,rvl,rvl,rvl")
 	     (match_operand 6 "const_int_operand"        " i, i,  i,  i")
 	     (match_operand 7 "const_int_operand"        " i, i,  i,  i")
 	     (match_operand 8 "const_int_operand"        " i, i,  i,  i")
@@ -2868,7 +2998,7 @@
 	(if_then_else:V_VLSI_D
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"     "vm,vm,Wc1,Wc1")
-	     (match_operand 5 "vector_length_operand"        "rK,rK, rK, rK")
+	     (match_operand 5 "vector_length_operand"        "rvl,rvl,rvl,rvl")
 	     (match_operand 6 "const_int_operand"            " i, i,  i,  i")
 	     (match_operand 7 "const_int_operand"            " i, i,  i,  i")
 	     (match_operand 8 "const_int_operand"            " i, i,  i,  i")
@@ -2923,7 +3053,7 @@
 	(if_then_else:V_VLSI_D
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" "vm,vm,Wc1,Wc1")
-	     (match_operand 5 "vector_length_operand"    "rK,rK, rK, rK")
+	     (match_operand 5 "vector_length_operand"    "rvl,rvl,rvl,rvl")
 	     (match_operand 6 "const_int_operand"        " i, i,  i,  i")
 	     (match_operand 7 "const_int_operand"        " i, i,  i,  i")
 	     (match_operand 8 "const_int_operand"        " i, i,  i,  i")
@@ -2944,7 +3074,7 @@
 	(if_then_else:V_VLSI_D
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"     "vm,vm,Wc1,Wc1")
-	     (match_operand 5 "vector_length_operand"        "rK,rK, rK, rK")
+	     (match_operand 5 "vector_length_operand"        "rvl,rvl,rvl,rvl")
 	     (match_operand 6 "const_int_operand"            " i, i,  i,  i")
 	     (match_operand 7 "const_int_operand"            " i, i,  i,  i")
 	     (match_operand 8 "const_int_operand"            " i, i,  i,  i")
@@ -2967,7 +3097,7 @@
 	(if_then_else:VFULLI
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" "vm,vm,Wc1,Wc1")
-	     (match_operand 5 "vector_length_operand"    "rK,rK, rK, rK")
+	     (match_operand 5 "vector_length_operand"    "rvl,rvl,rvl,rvl")
 	     (match_operand 6 "const_int_operand"        " i, i,  i,  i")
 	     (match_operand 7 "const_int_operand"        " i, i,  i,  i")
 	     (match_operand 8 "const_int_operand"        " i, i,  i,  i")
@@ -2987,7 +3117,7 @@
 	(if_then_else:VI_QHS
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" "vm,vm,Wc1,Wc1")
-	     (match_operand 5 "vector_length_operand"    "rK,rK, rK, rK")
+	     (match_operand 5 "vector_length_operand"    "rvl,rvl,rvl,rvl")
 	     (match_operand 6 "const_int_operand"        " i, i,  i,  i")
 	     (match_operand 7 "const_int_operand"        " i, i,  i,  i")
 	     (match_operand 8 "const_int_operand"        " i, i,  i,  i")
@@ -3041,7 +3171,7 @@
 	(if_then_else:VFULLI_D
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"   "vm,vm,Wc1,Wc1")
-	     (match_operand 5 "vector_length_operand"      "rK,rK, rK, rK")
+	     (match_operand 5 "vector_length_operand"      "rvl,rvl,rvl,rvl")
 	     (match_operand 6 "const_int_operand"          " i, i,  i,  i")
 	     (match_operand 7 "const_int_operand"          " i, i,  i,  i")
 	     (match_operand 8 "const_int_operand"          " i, i,  i,  i")
@@ -3062,7 +3192,7 @@
 	(if_then_else:VFULLI_D
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"      "vm,vm,Wc1,Wc1")
-	     (match_operand 5 "vector_length_operand"         "rK,rK, rK, rK")
+	     (match_operand 5 "vector_length_operand"         "rvl,rvl,rvl,rvl")
 	     (match_operand 6 "const_int_operand"             " i, i,  i,  i")
 	     (match_operand 7 "const_int_operand"             " i, i,  i,  i")
 	     (match_operand 8 "const_int_operand"             " i, i,  i,  i")
@@ -3084,7 +3214,7 @@
   [(set (match_operand:VI 0 "register_operand"           "=vd,vd,vd,vd")
 	(if_then_else:VI
 	  (unspec:<VM>
-	    [(match_operand 5 "vector_length_operand"     "rK,rK,rK,rK")
+	    [(match_operand 5 "vector_length_operand"     "rvl,rvl,rvl,rvl")
 	     (match_operand 6 "const_int_operand"         " i, i, i, i")
 	     (match_operand 7 "const_int_operand"         " i, i, i, i")
 	     (reg:SI VL_REGNUM)
@@ -3108,7 +3238,7 @@
   [(set (match_operand:VI 0 "register_operand"           "=vd,vd")
 	(if_then_else:VI
 	  (unspec:<VM>
-	    [(match_operand 5 "vector_length_operand"     "rK,rK")
+	    [(match_operand 5 "vector_length_operand"     "rvl,rvl")
 	     (match_operand 6 "const_int_operand"         " i, i")
 	     (match_operand 7 "const_int_operand"         " i, i")
 	     (reg:SI VL_REGNUM)
@@ -3132,7 +3262,7 @@
   [(set (match_operand:VI_QHS 0 "register_operand"        "=vd,vd")
 	(if_then_else:VI_QHS
 	  (unspec:<VM>
-	    [(match_operand 5 "vector_length_operand"      "rK,rK")
+	    [(match_operand 5 "vector_length_operand"      "rvl,rvl")
 	     (match_operand 6 "const_int_operand"          " i, i")
 	     (match_operand 7 "const_int_operand"          " i, i")
 	     (reg:SI VL_REGNUM)
@@ -3157,7 +3287,7 @@
   [(set (match_operand:VI_QHS 0 "register_operand"         "=vd,vd")
 	(if_then_else:VI_QHS
 	  (unspec:<VM>
-	    [(match_operand 5 "vector_length_operand"       "rK,rK")
+	    [(match_operand 5 "vector_length_operand"       "rvl,rvl")
 	     (match_operand 6 "const_int_operand"           " i, i")
 	     (match_operand 7 "const_int_operand"           " i, i")
 	     (reg:SI VL_REGNUM)
@@ -3215,7 +3345,7 @@
   [(set (match_operand:VI_D 0 "register_operand"           "=vd,vd")
 	(if_then_else:VI_D
 	  (unspec:<VM>
-	    [(match_operand 5 "vector_length_operand"       "rK,rK")
+	    [(match_operand 5 "vector_length_operand"       "rvl,rvl")
 	     (match_operand 6 "const_int_operand"           " i, i")
 	     (match_operand 7 "const_int_operand"           " i, i")
 	     (reg:SI VL_REGNUM)
@@ -3240,7 +3370,7 @@
   [(set (match_operand:VI_D 0 "register_operand"                "=vd,vd")
 	(if_then_else:VI_D
 	  (unspec:<VM>
-	    [(match_operand 5 "vector_length_operand"            "rK,rK")
+	    [(match_operand 5 "vector_length_operand"            "rvl,rvl")
 	     (match_operand 6 "const_int_operand"                " i, i")
 	     (match_operand 7 "const_int_operand"                " i, i")
 	     (reg:SI VL_REGNUM)
@@ -3299,7 +3429,7 @@
   [(set (match_operand:VI_D 0 "register_operand"           "=vd,vd")
 	(if_then_else:VI_D
 	  (unspec:<VM>
-	    [(match_operand 5 "vector_length_operand"       "rK,rK")
+	    [(match_operand 5 "vector_length_operand"       "rvl,rvl")
 	     (match_operand 6 "const_int_operand"           " i, i")
 	     (match_operand 7 "const_int_operand"           " i, i")
 	     (reg:SI VL_REGNUM)
@@ -3324,7 +3454,7 @@
   [(set (match_operand:VI_D 0 "register_operand"                "=vd,vd")
 	(if_then_else:VI_D
 	  (unspec:<VM>
-	    [(match_operand 5 "vector_length_operand"           "rK,rK")
+	    [(match_operand 5 "vector_length_operand"           "rvl,rvl")
 	     (match_operand 6 "const_int_operand"               " i, i")
 	     (match_operand 7 "const_int_operand"               " i, i")
 	     (reg:SI VL_REGNUM)
@@ -3354,7 +3484,7 @@
 	     (match_operand:VI 2 "vector_arith_operand" "vrvi,  vr,  vi"))
 	    (match_operand:<VM> 3 "register_operand"    "  vm,  vm,  vm")
 	    (unspec:<VM>
-	      [(match_operand 4 "vector_length_operand" "  rK,  rK,  rK")
+	      [(match_operand 4 "vector_length_operand" " rvl, rvl, rvl")
 	       (match_operand 5 "const_int_operand"     "   i,   i,   i")
 	       (reg:SI VL_REGNUM)
 	       (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)] UNSPEC_VMADC))]
@@ -3374,7 +3504,7 @@
 	     (match_operand:VI 2 "register_operand"     " vr,  0,  vr"))
 	    (match_operand:<VM> 3 "register_operand"    " vm, vm,  vm")
 	    (unspec:<VM>
-	      [(match_operand 4 "vector_length_operand" " rK, rK,  rK")
+	      [(match_operand 4 "vector_length_operand" "rvl,rvl, rvl")
 	       (match_operand 5 "const_int_operand"     "  i,  i,   i")
 	       (reg:SI VL_REGNUM)
 	       (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)] UNSPEC_VMSBC))]
@@ -3395,7 +3525,7 @@
 	     (match_operand:VI_QHS 1 "register_operand"  "  0,  vr"))
 	    (match_operand:<VM> 3 "register_operand"     " vm,  vm")
 	    (unspec:<VM>
-	      [(match_operand 4 "vector_length_operand"  " rK,  rK")
+	      [(match_operand 4 "vector_length_operand"  "rvl, rvl")
 	       (match_operand 5 "const_int_operand"      "  i,   i")
 	       (reg:SI VL_REGNUM)
 	       (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)] UNSPEC_VMADC))]
@@ -3416,7 +3546,7 @@
 	     (match_operand:VI_QHS 1 "register_operand"  "  0,  vr"))
 	    (match_operand:<VM> 3 "register_operand"     " vm,  vm")
 	    (unspec:<VM>
-	      [(match_operand 4 "vector_length_operand"  " rK,  rK")
+	      [(match_operand 4 "vector_length_operand"  "rvl, rvl")
 	       (match_operand 5 "const_int_operand"      "  i,   i")
 	       (reg:SI VL_REGNUM)
 	       (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)] UNSPEC_VMSBC))]
@@ -3466,7 +3596,7 @@
 	     (match_operand:VI_D 1 "register_operand"    "  0,  vr"))
 	    (match_operand:<VM> 3 "register_operand"     " vm,  vm")
 	    (unspec:<VM>
-	      [(match_operand 4 "vector_length_operand"  " rK,  rK")
+	      [(match_operand 4 "vector_length_operand"  "rvl, rvl")
 	       (match_operand 5 "const_int_operand"      "  i,   i")
 	       (reg:SI VL_REGNUM)
 	       (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)] UNSPEC_VMADC))]
@@ -3488,7 +3618,7 @@
 	     (match_operand:VI_D 1 "register_operand"         "  0,  vr"))
 	    (match_operand:<VM> 3 "register_operand"          " vm,  vm")
 	    (unspec:<VM>
-	      [(match_operand 4 "vector_length_operand"       " rK,  rK")
+	      [(match_operand 4 "vector_length_operand"       "rvl, rvl")
 	       (match_operand 5 "const_int_operand"           "  i,   i")
 	       (reg:SI VL_REGNUM)
 	       (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)] UNSPEC_VMADC))]
@@ -3538,7 +3668,7 @@
 	     (match_operand:VI_D 1 "register_operand"    "  0,  vr"))
 	    (match_operand:<VM> 3 "register_operand"     " vm,  vm")
 	    (unspec:<VM>
-	      [(match_operand 4 "vector_length_operand"  " rK,  rK")
+	      [(match_operand 4 "vector_length_operand"  "rvl, rvl")
 	       (match_operand 5 "const_int_operand"      "  i,   i")
 	       (reg:SI VL_REGNUM)
 	       (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)] UNSPEC_VMSBC))]
@@ -3560,7 +3690,7 @@
 	     (match_operand:VI_D 1 "register_operand"         "  0,  vr"))
 	    (match_operand:<VM> 3 "register_operand"          " vm,  vm")
 	    (unspec:<VM>
-	      [(match_operand 4 "vector_length_operand"       " rK,  rK")
+	      [(match_operand 4 "vector_length_operand"       "rvl, rvl")
 	       (match_operand 5 "const_int_operand"           "  i,   i")
 	       (reg:SI VL_REGNUM)
 	       (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)] UNSPEC_VMSBC))]
@@ -3579,7 +3709,7 @@
 	     (match_operand:VI 1 "register_operand"     "  %0,  vr,  vr")
 	     (match_operand:VI 2 "vector_arith_operand" "vrvi,  vr,  vi"))
 	    (unspec:<VM>
-	      [(match_operand 3 "vector_length_operand" "  rK,  rK,  rK")
+	      [(match_operand 3 "vector_length_operand" " rvl, rvl, rvl")
 	       (match_operand 4 "const_int_operand"     "   i,   i,   i")
 	       (reg:SI VL_REGNUM)
 	       (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)] UNSPEC_OVERFLOW))]
@@ -3598,7 +3728,7 @@
 	     (match_operand:VI 1 "register_operand"     "   0,  vr,  vr,  vr")
 	     (match_operand:VI 2 "register_operand"     "  vr,   0,  vr,  vi"))
 	    (unspec:<VM>
-	      [(match_operand 3 "vector_length_operand" "  rK,  rK,  rK,  rK")
+	      [(match_operand 3 "vector_length_operand" " rvl, rvl, rvl, rvl")
 	       (match_operand 4 "const_int_operand"     "   i,   i,   i,   i")
 	       (reg:SI VL_REGNUM)
 	       (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)] UNSPEC_OVERFLOW))]
@@ -3618,7 +3748,7 @@
 	       (match_operand:<VEL> 2 "reg_or_0_operand" " rJ,  rJ"))
 	     (match_operand:VI_QHS 1 "register_operand"  "  0,  vr"))
 	    (unspec:<VM>
-	      [(match_operand 3 "vector_length_operand"  " rK,  rK")
+	      [(match_operand 3 "vector_length_operand"  "rvl, rvl")
 	       (match_operand 4 "const_int_operand"      "  i,   i")
 	       (reg:SI VL_REGNUM)
 	       (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)] UNSPEC_OVERFLOW))]
@@ -3638,7 +3768,7 @@
 	       (match_operand:<VEL> 2 "reg_or_0_operand" " rJ,  rJ"))
 	     (match_operand:VI_QHS 1 "register_operand"  "  0,  vr"))
 	    (unspec:<VM>
-	      [(match_operand 3 "vector_length_operand"  " rK,  rK")
+	      [(match_operand 3 "vector_length_operand"  "rvl, rvl")
 	       (match_operand 4 "const_int_operand"      "  i,   i")
 	       (reg:SI VL_REGNUM)
 	       (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)] UNSPEC_OVERFLOW))]
@@ -3686,7 +3816,7 @@
 	       (match_operand:<VEL> 2 "reg_or_0_operand" " rJ,  rJ"))
 	     (match_operand:VI_D 1 "register_operand"    "  0,  vr"))
 	    (unspec:<VM>
-	      [(match_operand 3 "vector_length_operand"  " rK,  rK")
+	      [(match_operand 3 "vector_length_operand"  "rvl, rvl")
 	       (match_operand 4 "const_int_operand"      "  i,   i")
 	       (reg:SI VL_REGNUM)
 	       (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)] UNSPEC_OVERFLOW))]
@@ -3707,7 +3837,7 @@
 	         (match_operand:<VSUBEL> 2 "reg_or_0_operand" " rJ,  rJ")))
 	     (match_operand:VI_D 1 "register_operand"         "  0,  vr"))
 	    (unspec:<VM>
-	      [(match_operand 3 "vector_length_operand"       " rK,  rK")
+	      [(match_operand 3 "vector_length_operand"       "rvl, rvl")
 	       (match_operand 4 "const_int_operand"           "  i,   i")
 	       (reg:SI VL_REGNUM)
 	       (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)] UNSPEC_OVERFLOW))]
@@ -3755,7 +3885,7 @@
 	       (match_operand:<VEL> 2 "reg_or_0_operand" " rJ,  rJ"))
 	     (match_operand:VI_D 1 "register_operand"    "  0,  vr"))
 	    (unspec:<VM>
-	      [(match_operand 3 "vector_length_operand"  " rK,  rK")
+	      [(match_operand 3 "vector_length_operand"  "rvl, rvl")
 	       (match_operand 4 "const_int_operand"      "  i,   i")
 	       (reg:SI VL_REGNUM)
 	       (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)] UNSPEC_OVERFLOW))]
@@ -3776,7 +3906,7 @@
 	         (match_operand:<VSUBEL> 2 "reg_or_0_operand" " rJ,  rJ")))
 	     (match_operand:VI_D 1 "register_operand"         "  0,  vr"))
 	    (unspec:<VM>
-	      [(match_operand 3 "vector_length_operand"      " rK,  rK")
+	      [(match_operand 3 "vector_length_operand"      "rvl, rvl")
 	       (match_operand 4 "const_int_operand"          "  i,   i")
 	       (reg:SI VL_REGNUM)
 	       (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)] UNSPEC_OVERFLOW))]
@@ -3800,7 +3930,7 @@
 	(if_then_else:V_VLSI
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" "vm,vm,Wc1,Wc1")
-	     (match_operand 4 "vector_length_operand"    "rK,rK, rK, rK")
+	     (match_operand 4 "vector_length_operand"    "rvl,rvl,rvl,rvl")
 	     (match_operand 5 "const_int_operand"        " i, i,  i,  i")
 	     (match_operand 6 "const_int_operand"        " i, i,  i,  i")
 	     (match_operand 7 "const_int_operand"        " i, i,  i,  i")
@@ -3810,7 +3940,10 @@
 	    (match_operand:V_VLSI 3 "register_operand"       "vr,vr, vr, vr"))
 	  (match_operand:V_VLSI 2 "vector_merge_operand"     "vu, 0, vu,  0")))]
   "TARGET_VECTOR"
-  "v<insn>.v\t%0,%3%p1"
+  {
+    /* vneg.v = vrsub vd,vs,x0 = vrsub vd,vs,0.  */
+    return (<CODE> == NEG) ? "vrsub.vi\t%0,%3,0%p1" : "v<insn>.v\t%0,%3%p1";
+  }
   [(set_attr "type" "vialu")
    (set_attr "mode" "<MODE>")
    (set_attr "vl_op_idx" "4")
@@ -3833,7 +3966,7 @@
 	(if_then_else:VWEXTI
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"         "vmWc1,vmWc1")
-	     (match_operand 4 "vector_length_operand"            "   rK,   rK")
+	     (match_operand 4 "vector_length_operand"            "  rvl,  rvl")
 	     (match_operand 5 "const_int_operand"                "    i,    i")
 	     (match_operand 6 "const_int_operand"                "    i,    i")
 	     (match_operand 7 "const_int_operand"                "    i,    i")
@@ -3842,7 +3975,7 @@
 	  (any_extend:VWEXTI
 	    (match_operand:<V_DOUBLE_TRUNC> 3 "register_operand" "   vr,   vr"))
 	  (match_operand:VWEXTI 2 "vector_merge_operand"         "   vu,    0")))]
-  "TARGET_VECTOR"
+  "TARGET_VECTOR && !TARGET_XTHEADVECTOR"
   "v<sz>ext.vf2\t%0,%3%p1"
   [(set_attr "type" "vext")
    (set_attr "mode" "<MODE>")])
@@ -3853,7 +3986,7 @@
 	(if_then_else:VQEXTI
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"       "vmWc1,vmWc1")
-	     (match_operand 4 "vector_length_operand"          "   rK,   rK")
+	     (match_operand 4 "vector_length_operand"          "  rvl,  rvl")
 	     (match_operand 5 "const_int_operand"              "    i,    i")
 	     (match_operand 6 "const_int_operand"              "    i,    i")
 	     (match_operand 7 "const_int_operand"              "    i,    i")
@@ -3862,7 +3995,7 @@
 	  (any_extend:VQEXTI
 	    (match_operand:<V_QUAD_TRUNC> 3 "register_operand" "   vr,   vr"))
 	  (match_operand:VQEXTI 2 "vector_merge_operand"       "   vu,    0")))]
-  "TARGET_VECTOR"
+  "TARGET_VECTOR && !TARGET_XTHEADVECTOR"
   "v<sz>ext.vf4\t%0,%3%p1"
   [(set_attr "type" "vext")
    (set_attr "mode" "<MODE>")])
@@ -3873,7 +4006,7 @@
 	(if_then_else:VOEXTI
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1")
-	     (match_operand 4 "vector_length_operand"         "   rK,   rK")
+	     (match_operand 4 "vector_length_operand"         "  rvl,  rvl")
 	     (match_operand 5 "const_int_operand"             "    i,    i")
 	     (match_operand 6 "const_int_operand"             "    i,    i")
 	     (match_operand 7 "const_int_operand"             "    i,    i")
@@ -3882,7 +4015,7 @@
 	  (any_extend:VOEXTI
 	    (match_operand:<V_OCT_TRUNC> 3 "register_operand" "   vr,   vr"))
 	  (match_operand:VOEXTI 2 "vector_merge_operand"      "   vu,    0")))]
-  "TARGET_VECTOR"
+  "TARGET_VECTOR && !TARGET_XTHEADVECTOR"
   "v<sz>ext.vf8\t%0,%3%p1"
   [(set_attr "type" "vext")
    (set_attr "mode" "<MODE>")])
@@ -3893,7 +4026,7 @@
 	(if_then_else:VWEXTI
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"           "vmWc1,vmWc1")
-	     (match_operand 5 "vector_length_operand"              "   rK,   rK")
+	     (match_operand 5 "vector_length_operand"              "  rvl,  rvl")
 	     (match_operand 6 "const_int_operand"                  "    i,    i")
 	     (match_operand 7 "const_int_operand"                  "    i,    i")
 	     (match_operand 8 "const_int_operand"                  "    i,    i")
@@ -3915,7 +4048,7 @@
 	(if_then_else:VWEXTI
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"           "vmWc1,vmWc1")
-	     (match_operand 5 "vector_length_operand"              "   rK,   rK")
+	     (match_operand 5 "vector_length_operand"              "  rvl,  rvl")
 	     (match_operand 6 "const_int_operand"                  "    i,    i")
 	     (match_operand 7 "const_int_operand"                  "    i,    i")
 	     (match_operand 8 "const_int_operand"                  "    i,    i")
@@ -3938,7 +4071,7 @@
 	(if_then_else:VWEXTI
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"           "vmWc1,vmWc1")
-	     (match_operand 5 "vector_length_operand"              "   rK,   rK")
+	     (match_operand 5 "vector_length_operand"              "  rvl,  rvl")
 	     (match_operand 6 "const_int_operand"                  "    i,    i")
 	     (match_operand 7 "const_int_operand"                  "    i,    i")
 	     (match_operand 8 "const_int_operand"                  "    i,    i")
@@ -3959,7 +4092,7 @@
 	(if_then_else:VWEXTI
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"           "vmWc1,vmWc1")
-	     (match_operand 5 "vector_length_operand"              "   rK,   rK")
+	     (match_operand 5 "vector_length_operand"              "  rvl,  rvl")
 	     (match_operand 6 "const_int_operand"                  "    i,    i")
 	     (match_operand 7 "const_int_operand"                  "    i,    i")
 	     (match_operand 8 "const_int_operand"                  "    i,    i")
@@ -3980,7 +4113,7 @@
 	(if_then_else:VWEXTI
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"	   " vm,vm,Wc1,Wc1")
-	     (match_operand 5 "vector_length_operand"              " rK,rK, rK, rK")
+	     (match_operand 5 "vector_length_operand"              "rvl,rvl,rvl,rvl")
 	     (match_operand 6 "const_int_operand"                  "  i, i,  i,  i")
 	     (match_operand 7 "const_int_operand"                  "  i, i,  i,  i")
 	     (match_operand 8 "const_int_operand"                  "  i, i,  i,  i")
@@ -4002,7 +4135,7 @@
 	(if_then_else:VWEXTI
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"	   " vm,vm,Wc1,Wc1")
-	     (match_operand 5 "vector_length_operand"              " rK,rK, rK, rK")
+	     (match_operand 5 "vector_length_operand"              "rvl,rvl,rvl,rvl")
 	     (match_operand 6 "const_int_operand"                  "  i, i,  i,  i")
 	     (match_operand 7 "const_int_operand"                  "  i, i,  i,  i")
 	     (match_operand 8 "const_int_operand"                  "  i, i,  i,  i")
@@ -4025,7 +4158,7 @@
 	(if_then_else:VWEXTI
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"	   " vm,vm,Wc1,Wc1")
-	     (match_operand 5 "vector_length_operand"              " rK,rK, rK, rK")
+	     (match_operand 5 "vector_length_operand"              "rvl,rvl,rvl,rvl")
 	     (match_operand 6 "const_int_operand"                  "  i, i,  i,  i")
 	     (match_operand 7 "const_int_operand"                  "  i, i,  i,  i")
 	     (match_operand 8 "const_int_operand"                  "  i, i,  i,  i")
@@ -4048,7 +4181,7 @@
 	(if_then_else:VWEXTI
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"           "vmWc1,vmWc1")
-	     (match_operand 5 "vector_length_operand"              "   rK,   rK")
+	     (match_operand 5 "vector_length_operand"              "  rvl,  rvl")
 	     (match_operand 6 "const_int_operand"                  "    i,    i")
 	     (match_operand 7 "const_int_operand"                  "    i,    i")
 	     (match_operand 8 "const_int_operand"                  "    i,    i")
@@ -4070,7 +4203,7 @@
 	(if_then_else:VWEXTI
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"           "vmWc1,vmWc1")
-	     (match_operand 5 "vector_length_operand"              "   rK,   rK")
+	     (match_operand 5 "vector_length_operand"              "  rvl,  rvl")
 	     (match_operand 6 "const_int_operand"                  "    i,    i")
 	     (match_operand 7 "const_int_operand"                  "    i,    i")
 	     (match_operand 8 "const_int_operand"                  "    i,    i")
@@ -4094,7 +4227,7 @@
 	(if_then_else:VWEXTI
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"           "vmWc1,vmWc1")
-	     (match_operand 4 "vector_length_operand"              "   rK,   rK")
+	     (match_operand 4 "vector_length_operand"              "  rvl,  rvl")
 	     (match_operand 5 "const_int_operand"                  "    i,    i")
 	     (match_operand 6 "const_int_operand"                  "    i,    i")
 	     (match_operand 7 "const_int_operand"                  "    i,    i")
@@ -4130,7 +4263,7 @@
 	(if_then_else:<V_DOUBLE_TRUNC>
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"               " vm,vm,Wc1,Wc1,vm,Wc1,vmWc1,vmWc1, vm,Wc1,vmWc1,vmWc1")
-	     (match_operand 5 "vector_length_operand"                  " rK,rK, rK, rK,rK, rK,   rK,   rK, rK, rK,   rK,   rK")
+	     (match_operand 5 "vector_length_operand"                  "rvl,rvl,rvl,rvl,rvl,rvl,  rvl,  rvl,rvl,rvl,  rvl,  rvl")
 	     (match_operand 6 "const_int_operand"                      "  i, i,  i,  i, i,  i,    i,    i,  i,  i,    i,    i")
 	     (match_operand 7 "const_int_operand"                      "  i, i,  i,  i, i,  i,    i,    i,  i,  i,    i,    i")
 	     (match_operand 8 "const_int_operand"                      "  i, i,  i,  i, i,  i,    i,    i,  i,  i,    i,    i")
@@ -4152,7 +4285,7 @@
 	(if_then_else:<V_DOUBLE_TRUNC>
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"               " vm, vm,Wc1,Wc1,vmWc1,vmWc1")
-	     (match_operand 5 "vector_length_operand"                  " rK, rK, rK, rK,   rK,   rK")
+	     (match_operand 5 "vector_length_operand"                  "rvl,rvl,rvl,rvl,  rvl,  rvl")
 	     (match_operand 6 "const_int_operand"                      "  i,  i,  i,  i,    i,    i")
 	     (match_operand 7 "const_int_operand"                      "  i,  i,  i,  i,    i,    i")
 	     (match_operand 8 "const_int_operand"                      "  i,  i,  i,  i,    i,    i")
@@ -4164,7 +4297,13 @@
 	     (match_operand 4 "pmode_reg_or_uimm5_operand"             " rK, rK, rK, rK,   rK,   rK")))
 	  (match_operand:<V_DOUBLE_TRUNC> 2 "vector_merge_operand"     " vu,  0, vu,  0,   vu,    0")))]
   "TARGET_VECTOR"
-  "vn<insn>.w%o4\t%0,%3,%4%p1"
+  {
+    /* vnsrl vd,vs,x0 = vnsrl vd,vs,0.  */
+    if (REG_P (operands[4]) && REGNO (operands[4]) == 0)
+      return "vn<insn>.wi\t%0,%3,0%p1";
+
+    return "vn<insn>.w%o4\t%0,%3,%4%p1";
+  }
   [(set_attr "type" "vnshift")
    (set_attr "mode" "<V_DOUBLE_TRUNC>")
    (set_attr "spec_restriction" "none,none,thv,thv,none,none")])
@@ -4175,7 +4314,7 @@
 	(if_then_else:<V_DOUBLE_TRUNC>
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"               " vm, vm,Wc1,Wc1,vmWc1,vmWc1")
-	     (match_operand 4 "vector_length_operand"                  " rK, rK, rK, rK,   rK,   rK")
+	     (match_operand 4 "vector_length_operand"                  "rvl,rvl,rvl,rvl,  rvl,  rvl")
 	     (match_operand 5 "const_int_operand"                      "  i,  i,  i,  i,    i,    i")
 	     (match_operand 6 "const_int_operand"                      "  i,  i,  i,  i,    i,    i")
 	     (match_operand 7 "const_int_operand"                      "  i,  i,  i,  i,    i,    i")
@@ -4185,7 +4324,8 @@
 	    (match_operand:VWEXTI 3 "register_operand"                 "  0,  0,  0,  0,   vr,   vr"))
 	  (match_operand:<V_DOUBLE_TRUNC> 2 "vector_merge_operand"     " vu,  0, vu,  0,   vu,    0")))]
   "TARGET_VECTOR"
-  "vncvt.x.x.w\t%0,%3%p1"
+  ;; vncvt.x.x.w = vnsrl vd,vs,x0 = vnsrl vd,vs,0
+  "vnsrl.wi\t%0,%3,0%p1";
   [(set_attr "type" "vnshift")
    (set_attr "mode" "<V_DOUBLE_TRUNC>")
    (set_attr "vl_op_idx" "4")
@@ -4211,7 +4351,7 @@
 	(if_then_else:V_VLSI
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1, vm, vm,Wc1,Wc1")
-	     (match_operand 5 "vector_length_operand"    " rK, rK, rK, rK, rK, rK, rK, rK")
+	     (match_operand 5 "vector_length_operand"    "rvl,rvl,rvl,rvl,rvl,rvl,rvl,rvl")
 	     (match_operand 6 "const_int_operand"        "  i,  i,  i,  i,  i,  i,  i,  i")
 	     (match_operand 7 "const_int_operand"        "  i,  i,  i,  i,  i,  i,  i,  i")
 	     (match_operand 8 "const_int_operand"        "  i,  i,  i,  i,  i,  i,  i,  i")
@@ -4240,7 +4380,7 @@
 	(if_then_else:VI_QHS
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
-	     (match_operand 5 "vector_length_operand"    " rK, rK, rK, rK")
+	     (match_operand 5 "vector_length_operand"    "rvl,rvl,rvl,rvl")
 	     (match_operand 6 "const_int_operand"        "  i,  i,  i,  i")
 	     (match_operand 7 "const_int_operand"        "  i,  i,  i,  i")
 	     (match_operand 8 "const_int_operand"        "  i,  i,  i,  i")
@@ -4261,7 +4401,7 @@
 	(if_then_else:VI_QHS
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
-	     (match_operand 5 "vector_length_operand"    " rK, rK, rK, rK")
+	     (match_operand 5 "vector_length_operand"    "rvl,rvl,rvl,rvl")
 	     (match_operand 6 "const_int_operand"        "  i,  i,  i,  i")
 	     (match_operand 7 "const_int_operand"        "  i,  i,  i,  i")
 	     (match_operand 8 "const_int_operand"        "  i,  i,  i,  i")
@@ -4315,7 +4455,7 @@
 	(if_then_else:VI_D
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
-	     (match_operand 5 "vector_length_operand"    " rK, rK, rK, rK")
+	     (match_operand 5 "vector_length_operand"    "rvl,rvl,rvl,rvl")
 	     (match_operand 6 "const_int_operand"        "  i,  i,  i,  i")
 	     (match_operand 7 "const_int_operand"        "  i,  i,  i,  i")
 	     (match_operand 8 "const_int_operand"        "  i,  i,  i,  i")
@@ -4336,7 +4476,7 @@
 	(if_then_else:VI_D
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"     " vm, vm,Wc1,Wc1")
-	     (match_operand 5 "vector_length_operand"        " rK, rK, rK, rK")
+	     (match_operand 5 "vector_length_operand"        "rvl,rvl,rvl,rvl")
 	     (match_operand 6 "const_int_operand"            "  i,  i,  i,  i")
 	     (match_operand 7 "const_int_operand"            "  i,  i,  i,  i")
 	     (match_operand 8 "const_int_operand"            "  i,  i,  i,  i")
@@ -4391,7 +4531,7 @@
 	(if_then_else:VI_D
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
-	     (match_operand 5 "vector_length_operand"    " rK, rK, rK, rK")
+	     (match_operand 5 "vector_length_operand"    "rvl,rvl,rvl,rvl")
 	     (match_operand 6 "const_int_operand"        "  i,  i,  i,  i")
 	     (match_operand 7 "const_int_operand"        "  i,  i,  i,  i")
 	     (match_operand 8 "const_int_operand"        "  i,  i,  i,  i")
@@ -4400,10 +4540,10 @@
 	  (sat_int_minus_binop:VI_D
 	    (match_operand:VI_D 3 "register_operand"     " vr, vr, vr, vr")
 	    (vec_duplicate:VI_D
-	      (match_operand:<VEL> 4 "register_operand"  "  r,  r,  r,  r")))
+	      (match_operand:<VEL> 4 "reg_or_0_operand"  "  rJ, rJ, rJ, rJ")))
 	  (match_operand:VI_D 2 "vector_merge_operand"   " vu,  0, vu,  0")))]
   "TARGET_VECTOR"
-  "v<insn>.vx\t%0,%3,%4%p1"
+  "v<insn>.vx\t%0,%3,%z4%p1"
   [(set_attr "type" "<int_binop_insn_type>")
    (set_attr "mode" "<MODE>")])
 
@@ -4412,7 +4552,7 @@
 	(if_then_else:VI_D
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"     " vm, vm,Wc1,Wc1")
-	     (match_operand 5 "vector_length_operand"        " rK, rK, rK, rK")
+	     (match_operand 5 "vector_length_operand"        "rvl,rvl,rvl,rvl")
 	     (match_operand 6 "const_int_operand"            "  i,  i,  i,  i")
 	     (match_operand 7 "const_int_operand"            "  i,  i,  i,  i")
 	     (match_operand 8 "const_int_operand"            "  i,  i,  i,  i")
@@ -4422,10 +4562,10 @@
 	    (match_operand:VI_D 3 "register_operand"         " vr, vr, vr, vr")
 	    (vec_duplicate:VI_D
 	      (sign_extend:<VEL>
-	        (match_operand:<VSUBEL> 4 "register_operand" "  r,  r,  r,  r"))))
+		(match_operand:<VSUBEL> 4 "reg_or_0_operand" "  rJ, rJ, rJ, rJ"))))
 	  (match_operand:VI_D 2 "vector_merge_operand"       " vu,  0, vu,  0")))]
   "TARGET_VECTOR && !TARGET_64BIT"
-  "v<insn>.vx\t%0,%3,%4%p1"
+  "v<insn>.vx\t%0,%3,%z4%p1"
   [(set_attr "type" "<int_binop_insn_type>")
    (set_attr "mode" "<MODE>")])
 
@@ -4434,7 +4574,7 @@
 	(if_then_else:V_VLSI
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
-	     (match_operand 5 "vector_length_operand"    " rK, rK, rK, rK")
+	     (match_operand 5 "vector_length_operand"    "rvl,rvl,rvl,rvl")
 	     (match_operand 6 "const_int_operand"        "  i,  i,  i,  i")
 	     (match_operand 7 "const_int_operand"        "  i,  i,  i,  i")
 	     (match_operand 8 "const_int_operand"        "  i,  i,  i,  i")
@@ -4457,7 +4597,7 @@
 	(if_then_else:VI_QHS
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
-	     (match_operand 5 "vector_length_operand"    " rK, rK, rK, rK")
+	     (match_operand 5 "vector_length_operand"    "rvl,rvl,rvl,rvl")
 	     (match_operand 6 "const_int_operand"        "  i,  i,  i,  i")
 	     (match_operand 7 "const_int_operand"        "  i,  i,  i,  i")
 	     (match_operand 8 "const_int_operand"        "  i,  i,  i,  i")
@@ -4479,7 +4619,7 @@
 	(if_then_else:VI
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"   " vm, vm,Wc1,Wc1")
-	     (match_operand 5 "vector_length_operand"      " rK, rK, rK, rK")
+	     (match_operand 5 "vector_length_operand"      "rvl,rvl,rvl,rvl")
 	     (match_operand 6 "const_int_operand"          "  i,  i,  i,  i")
 	     (match_operand 7 "const_int_operand"          "  i,  i,  i,  i")
 	     (match_operand 8 "const_int_operand"          "  i,  i,  i,  i")
@@ -4537,7 +4677,7 @@
 	(if_then_else:VI_D
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
-	     (match_operand 5 "vector_length_operand"    " rK, rK, rK, rK")
+	     (match_operand 5 "vector_length_operand"    "rvl,rvl,rvl,rvl")
 	     (match_operand 6 "const_int_operand"        "  i,  i,  i,  i")
 	     (match_operand 7 "const_int_operand"        "  i,  i,  i,  i")
 	     (match_operand 8 "const_int_operand"        "  i,  i,  i,  i")
@@ -4559,7 +4699,7 @@
 	(if_then_else:VI_D
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"    " vm, vm,Wc1,Wc1")
-	     (match_operand 5 "vector_length_operand"       " rK, rK, rK, rK")
+	     (match_operand 5 "vector_length_operand"       "rvl,rvl,rvl,rvl")
 	     (match_operand 6 "const_int_operand"           "  i,  i,  i,  i")
 	     (match_operand 7 "const_int_operand"           "  i,  i,  i,  i")
 	     (match_operand 8 "const_int_operand"           "  i,  i,  i,  i")
@@ -4583,7 +4723,7 @@
 	(if_then_else:<V_DOUBLE_TRUNC>
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"               " vm,vm,Wc1,Wc1,vm,Wc1,vmWc1,vmWc1, vm,Wc1,vmWc1,vmWc1")
-	     (match_operand 5 "vector_length_operand"                  " rK,rK, rK, rK,rK, rK,   rK,   rK, rK, rK,   rK,   rK")
+	     (match_operand 5 "vector_length_operand"                  "rvl,rvl,rvl,rvl,rvl,rvl,  rvl,  rvl,rvl,rvl,  rvl,  rvl")
 	     (match_operand 6 "const_int_operand"                      "  i, i,  i,  i, i,  i,    i,    i,  i,  i,    i,    i")
 	     (match_operand 7 "const_int_operand"                      "  i, i,  i,  i, i,  i,    i,    i,  i,  i,    i,    i")
 	     (match_operand 8 "const_int_operand"                      "  i, i,  i,  i, i,  i,    i,    i,  i,  i,    i,    i")
@@ -4606,7 +4746,7 @@
 	(if_then_else:<V_DOUBLE_TRUNC>
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"               " vm, vm,Wc1,Wc1,vmWc1,vmWc1")
-	     (match_operand 5 "vector_length_operand"                  " rK, rK, rK, rK,   rK,   rK")
+	     (match_operand 5 "vector_length_operand"                  "rvl,rvl,rvl,rvl,  rvl,  rvl")
 	     (match_operand 6 "const_int_operand"                      "  i,  i,  i,  i,    i,    i")
 	     (match_operand 7 "const_int_operand"                      "  i,  i,  i,  i,    i,    i")
 	     (match_operand 8 "const_int_operand"                      "  i,  i,  i,  i,    i,    i")
@@ -4653,7 +4793,7 @@
 	(if_then_else:<VM>
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "register_operand"        "   0")
-	     (match_operand 5 "vector_length_operand"        "  rK")
+	     (match_operand 5 "vector_length_operand"        " rvl")
 	     (match_operand 6 "const_int_operand"            "   i")
 	     (match_operand 7 "const_int_operand"            "   i")
 	     (reg:SI VL_REGNUM)
@@ -4677,7 +4817,7 @@
 	(if_then_else:<VM>
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1")
-	     (match_operand 6 "vector_length_operand"         "   rK,   rK,   rK,   rK,   rK,   rK,   rK,   rK")
+	     (match_operand 6 "vector_length_operand"         "  rvl,  rvl,  rvl,  rvl,  rvl,  rvl,  rvl,  rvl")
 	     (match_operand 7 "const_int_operand"             "    i,    i,    i,    i,    i,    i,    i,    i")
 	     (match_operand 8 "const_int_operand"             "    i,    i,    i,    i,    i,    i,    i,    i")
 	     (reg:SI VL_REGNUM)
@@ -4698,7 +4838,7 @@
 	(if_then_else:<VM>
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"      "    0,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1")
-	     (match_operand 6 "vector_length_operand"         "   rK,   rK,   rK,   rK,   rK,   rK,   rK,   rK,   rK")
+	     (match_operand 6 "vector_length_operand"         "  rvl,  rvl,  rvl,  rvl,  rvl,  rvl,  rvl,  rvl,  rvl")
 	     (match_operand 7 "const_int_operand"             "    i,    i,    i,    i,    i,    i,    i,    i,    i")
 	     (match_operand 8 "const_int_operand"             "    i,    i,    i,    i,    i,    i,    i,    i,    i")
 	     (reg:SI VL_REGNUM)
@@ -4735,7 +4875,7 @@
 	(if_then_else:<VM>
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "register_operand"        "   0")
-	     (match_operand 5 "vector_length_operand"        "  rK")
+	     (match_operand 5 "vector_length_operand"        " rvl")
 	     (match_operand 6 "const_int_operand"            "   i")
 	     (match_operand 7 "const_int_operand"            "   i")
 	     (reg:SI VL_REGNUM)
@@ -4759,7 +4899,7 @@
 	(if_then_else:<VM>
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1")
-	     (match_operand 6 "vector_length_operand"         "   rK,   rK,   rK,   rK,   rK,   rK,   rK,   rK")
+	     (match_operand 6 "vector_length_operand"         "  rvl,  rvl,  rvl,  rvl,  rvl,  rvl,  rvl,  rvl")
 	     (match_operand 7 "const_int_operand"             "    i,    i,    i,    i,    i,    i,    i,    i")
 	     (match_operand 8 "const_int_operand"             "    i,    i,    i,    i,    i,    i,    i,    i")
 	     (reg:SI VL_REGNUM)
@@ -4780,7 +4920,7 @@
 	(if_then_else:<VM>
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"      "    0,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1")
-	     (match_operand 6 "vector_length_operand"         "   rK,   rK,   rK,   rK,   rK,   rK,   rK,   rK,   rK")
+	     (match_operand 6 "vector_length_operand"         "  rvl,  rvl,  rvl,  rvl,  rvl,  rvl,  rvl,  rvl,  rvl")
 	     (match_operand 7 "const_int_operand"             "    i,    i,    i,    i,    i,    i,    i,    i,    i")
 	     (match_operand 8 "const_int_operand"             "    i,    i,    i,    i,    i,    i,    i,    i,    i")
 	     (reg:SI VL_REGNUM)
@@ -4818,7 +4958,7 @@
 	(if_then_else:<VM>
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "register_operand"          "  0")
-	     (match_operand 5 "vector_length_operand"          " rK")
+	     (match_operand 5 "vector_length_operand"          "rvl")
 	     (match_operand 6 "const_int_operand"              "  i")
 	     (match_operand 7 "const_int_operand"              "  i")
 	     (reg:SI VL_REGNUM)
@@ -4843,7 +4983,7 @@
 	(if_then_else:<VM>
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1,vmWc1,vmWc1")
-	     (match_operand 6 "vector_length_operand"         "   rK,   rK,   rK,   rK")
+	     (match_operand 6 "vector_length_operand"         "  rvl,  rvl,  rvl,  rvl")
 	     (match_operand 7 "const_int_operand"             "    i,    i,    i,    i")
 	     (match_operand 8 "const_int_operand"             "    i,    i,    i,    i")
 	     (reg:SI VL_REGNUM)
@@ -4865,7 +5005,7 @@
 	(if_then_else:<VM>
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"   "    0,vmWc1,vmWc1,vmWc1,vmWc1")
-	     (match_operand 6 "vector_length_operand"      "   rK,   rK,   rK,   rK,   rK")
+	     (match_operand 6 "vector_length_operand"      "  rvl,  rvl,  rvl,  rvl,  rvl")
 	     (match_operand 7 "const_int_operand"          "    i,    i,    i,    i,    i")
 	     (match_operand 8 "const_int_operand"          "    i,    i,    i,    i,    i")
 	     (reg:SI VL_REGNUM)
@@ -4928,7 +5068,7 @@
 	(if_then_else:<VM>
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "register_operand"           "  0")
-	     (match_operand 5 "vector_length_operand"           " rK")
+	     (match_operand 5 "vector_length_operand"           "rvl")
 	     (match_operand 6 "const_int_operand"               "  i")
 	     (match_operand 7 "const_int_operand"               "  i")
 	     (reg:SI VL_REGNUM)
@@ -4953,7 +5093,7 @@
 	(if_then_else:<VM>
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1,vmWc1,vmWc1")
-	     (match_operand 6 "vector_length_operand"         "   rK,   rK,   rK,   rK")
+	     (match_operand 6 "vector_length_operand"         "  rvl,  rvl,  rvl,  rvl")
 	     (match_operand 7 "const_int_operand"             "    i,    i,    i,    i")
 	     (match_operand 8 "const_int_operand"             "    i,    i,    i,    i")
 	     (reg:SI VL_REGNUM)
@@ -4975,7 +5115,7 @@
 	(if_then_else:<VM>
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"   "    0,vmWc1,vmWc1,vmWc1,vmWc1")
-	     (match_operand 6 "vector_length_operand"      "   rK,   rK,   rK,   rK,   rK")
+	     (match_operand 6 "vector_length_operand"      "  rvl,  rvl,  rvl,  rvl,  rvl")
 	     (match_operand 7 "const_int_operand"          "    i,    i,    i,    i,    i")
 	     (match_operand 8 "const_int_operand"          "    i,    i,    i,    i,    i")
 	     (reg:SI VL_REGNUM)
@@ -4996,7 +5136,7 @@
 	(if_then_else:<VM>
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "register_operand"          "  0")
-	     (match_operand 5 "vector_length_operand"          " rK")
+	     (match_operand 5 "vector_length_operand"          "rvl")
 	     (match_operand 6 "const_int_operand"              "  i")
 	     (match_operand 7 "const_int_operand"              "  i")
 	     (reg:SI VL_REGNUM)
@@ -5022,7 +5162,7 @@
 	(if_then_else:<VM>
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"       "vmWc1,vmWc1,vmWc1,vmWc1")
-	     (match_operand 6 "vector_length_operand"          "   rK,   rK,   rK,   rK")
+	     (match_operand 6 "vector_length_operand"          "  rvl,  rvl,  rvl,  rvl")
 	     (match_operand 7 "const_int_operand"              "    i,    i,    i,    i")
 	     (match_operand 8 "const_int_operand"              "    i,    i,    i,    i")
 	     (reg:SI VL_REGNUM)
@@ -5044,7 +5184,7 @@
 	(if_then_else:<VM>
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"       "    0,vmWc1,vmWc1,vmWc1,vmWc1")
-	     (match_operand 6 "vector_length_operand"          "   rK,   rK,   rK,   rK,   rK")
+	     (match_operand 6 "vector_length_operand"          "  rvl,  rvl,  rvl,  rvl,  rvl")
 	     (match_operand 7 "const_int_operand"              "    i,    i,    i,    i,    i")
 	     (match_operand 8 "const_int_operand"              "    i,    i,    i,    i,    i")
 	     (reg:SI VL_REGNUM)
@@ -5208,56 +5348,52 @@
 })
 
 (define_insn "*pred_mul_plus<mode>_undef"
-  [(set (match_operand:V_VLSI 0 "register_operand"           "=vd, vd,?&vd, vr, vr,?&vr")
+  [(set (match_operand:V_VLSI 0 "register_operand"           "=vd, vd, vr, vr")
 	(if_then_else:V_VLSI
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,  vm,Wc1,Wc1, Wc1")
-	     (match_operand 6 "vector_length_operand"    " rK, rK,  rK, rK, rK,  rK")
-	     (match_operand 7 "const_int_operand"        "  i,  i,   i,  i,  i,   i")
-	     (match_operand 8 "const_int_operand"        "  i,  i,   i,  i,  i,   i")
-	     (match_operand 9 "const_int_operand"        "  i,  i,   i,  i,  i,   i")
+	    [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,  Wc1,Wc1")
+	     (match_operand 6 "vector_length_operand"    "rvl,rvl,  rvl,rvl")
+	     (match_operand 7 "const_int_operand"        "  i,  i,    i,  i")
+	     (match_operand 8 "const_int_operand"        "  i,  i,    i,  i")
+	     (match_operand 9 "const_int_operand"        "  i,  i,    i,  i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (plus:V_VLSI
 	    (mult:V_VLSI
-	      (match_operand:V_VLSI 3 "register_operand"     "  0, vr,  vr,  0, vr,  vr")
-	      (match_operand:V_VLSI 4 "register_operand"     " vr, vr,  vr, vr, vr,  vr"))
-	    (match_operand:V_VLSI 5 "register_operand"       " vr,  0,  vr, vr,  0,  vr"))
+	      (match_operand:V_VLSI 3 "register_operand"     "  0, vr,   0, vr")
+	      (match_operand:V_VLSI 4 "register_operand"     " vr, vr,  vr, vr"))
+	    (match_operand:V_VLSI 5 "register_operand"       " vr,  0,  vr,  0"))
 	  (match_operand:V_VLSI 2 "vector_undef_operand")))]
   "TARGET_VECTOR"
   "@
    vmadd.vv\t%0,%4,%5%p1
    vmacc.vv\t%0,%3,%4%p1
-   vmv%m4r.v\t%0,%4\;vmacc.vv\t%0,%3,%4%p1
    vmadd.vv\t%0,%4,%5%p1
-   vmacc.vv\t%0,%3,%4%p1
-   vmv%m5r.v\t%0,%5\;vmacc.vv\t%0,%3,%4%p1"
+   vmacc.vv\t%0,%3,%4%p1"
   [(set_attr "type" "vimuladd")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*pred_madd<mode>"
-  [(set (match_operand:V_VLSI 0 "register_operand"           "=vd,?&vd, vr,?&vr")
+  [(set (match_operand:V_VLSI 0 "register_operand"           "=vd, vr")
 	(if_then_else:V_VLSI
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand" " vm,  vm,Wc1, Wc1")
-	     (match_operand 5 "vector_length_operand"    " rK,  rK, rK,  rK")
-	     (match_operand 6 "const_int_operand"        "  i,   i,  i,   i")
-	     (match_operand 7 "const_int_operand"        "  i,   i,  i,   i")
-	     (match_operand 8 "const_int_operand"        "  i,   i,  i,   i")
+	    [(match_operand:<VM> 1 "vector_mask_operand" " vm,Wc1")
+	     (match_operand 5 "vector_length_operand"    "rvl,rvl")
+	     (match_operand 6 "const_int_operand"        "  i,  i")
+	     (match_operand 7 "const_int_operand"        "  i,  i")
+	     (match_operand 8 "const_int_operand"        "  i,  i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (plus:V_VLSI
 	    (mult:V_VLSI
-	      (match_operand:V_VLSI 2 "register_operand"     "  0,  vr,  0,  vr")
-	      (match_operand:V_VLSI 3 "register_operand"     " vr,  vr, vr,  vr"))
-	    (match_operand:V_VLSI 4 "register_operand"       " vr,  vr, vr,  vr"))
+	      (match_operand:V_VLSI 2 "register_operand"     "  0,  0")
+	      (match_operand:V_VLSI 3 "register_operand"     " vr, vr"))
+	    (match_operand:V_VLSI 4 "register_operand"       " vr, vr"))
 	  (match_dup 2)))]
   "TARGET_VECTOR"
   "@
    vmadd.vv\t%0,%3,%4%p1
-   vmv%m2r.v\t%0,%2\;vmadd.vv\t%0,%3,%4%p1
-   vmadd.vv\t%0,%3,%4%p1
-   vmv%m2r.v\t%0,%2\;vmadd.vv\t%0,%3,%4%p1"
+   vmadd.vv\t%0,%3,%4%p1"
   [(set_attr "type" "vimuladd")
    (set_attr "mode" "<MODE>")
    (set_attr "merge_op_idx" "2")
@@ -5267,28 +5403,24 @@
    (set (attr "avl_type_idx") (const_int 8))])
 
 (define_insn "*pred_macc<mode>"
-  [(set (match_operand:V_VLSI 0 "register_operand"           "=vd,?&vd, vr,?&vr")
+  [(set (match_operand:V_VLSI 0 "register_operand"           "=vd, vr")
 	(if_then_else:V_VLSI
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand" " vm,  vm,Wc1, Wc1")
-	     (match_operand 5 "vector_length_operand"    " rK,  rK, rK,  rK")
-	     (match_operand 6 "const_int_operand"        "  i,   i,  i,   i")
-	     (match_operand 7 "const_int_operand"        "  i,   i,  i,   i")
-	     (match_operand 8 "const_int_operand"        "  i,   i,  i,   i")
+	    [(match_operand:<VM> 1 "vector_mask_operand" " vm,Wc1")
+	     (match_operand 5 "vector_length_operand"    "rvl,rvl")
+	     (match_operand 6 "const_int_operand"        "  i,  i")
+	     (match_operand 7 "const_int_operand"        "  i,  i")
+	     (match_operand 8 "const_int_operand"        "  i,  i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (plus:V_VLSI
 	    (mult:V_VLSI
-	      (match_operand:V_VLSI 2 "register_operand"     " vr,  vr, vr,  vr")
-	      (match_operand:V_VLSI 3 "register_operand"     " vr,  vr, vr,  vr"))
-	    (match_operand:V_VLSI 4 "register_operand"       "  0,  vr,  0,  vr"))
+	      (match_operand:V_VLSI 2 "register_operand"     " vr, vr")
+	      (match_operand:V_VLSI 3 "register_operand"     " vr, vr"))
+	    (match_operand:V_VLSI 4 "register_operand"       "  0,  0"))
 	  (match_dup 4)))]
   "TARGET_VECTOR"
-  "@
-   vmacc.vv\t%0,%2,%3%p1
-   vmv%m4r.v\t%0,%4;vmacc.vv\t%0,%2,%3%p1
-   vmacc.vv\t%0,%2,%3%p1
-   vmv%m4r.v\t%0,%4\;vmacc.vv\t%0,%2,%3%p1"
+  "vmacc.vv\t%0,%2,%3%p1"
   [(set_attr "type" "vimuladd")
    (set_attr "mode" "<MODE>")
    (set_attr "merge_op_idx" "4")
@@ -5319,29 +5451,27 @@
 {})
 
 (define_insn "*pred_madd<mode>_scalar"
-  [(set (match_operand:V_VLSI 0 "register_operand"            "=vd,?&vd, vr,?&vr")
+  [(set (match_operand:V_VLSI 0 "register_operand"            "=vd, vr")
 	(if_then_else:V_VLSI
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand"  " vm,  vm,Wc1, Wc1")
-	     (match_operand 5 "vector_length_operand"     " rK,  rK, rK,  rK")
-	     (match_operand 6 "const_int_operand"         "  i,   i,  i,   i")
-	     (match_operand 7 "const_int_operand"         "  i,   i,  i,   i")
-	     (match_operand 8 "const_int_operand"         "  i,   i,  i,   i")
+	    [(match_operand:<VM> 1 "vector_mask_operand"  " vm,Wc1")
+	     (match_operand 5 "vector_length_operand"     "rvl,rvl")
+	     (match_operand 6 "const_int_operand"         "  i,  i")
+	     (match_operand 7 "const_int_operand"         "  i,  i")
+	     (match_operand 8 "const_int_operand"         "  i,  i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (plus:V_VLSI
 	    (mult:V_VLSI
 	      (vec_duplicate:V_VLSI
-		(match_operand:<VEL> 2 "reg_or_0_operand" " rJ,  rJ, rJ,  rJ"))
-	      (match_operand:V_VLSI 3 "register_operand"      "  0,  vr,  0,  vr"))
-	    (match_operand:V_VLSI 4 "register_operand"        " vr,  vr, vr,  vr"))
+		(match_operand:<VEL> 2 "reg_or_0_operand" " rJ, rJ"))
+	      (match_operand:V_VLSI 3 "register_operand"      "  0,  0"))
+	    (match_operand:V_VLSI 4 "register_operand"        " vr, vr"))
 	  (match_dup 3)))]
   "TARGET_VECTOR"
   "@
    vmadd.vx\t%0,%z2,%4%p1
-   vmv%m3r.v\t%0,%3\;vmadd.vx\t%0,%z2,%4%p1
-   vmadd.vx\t%0,%z2,%4%p1
-   vmv%m3r.v\t%0,%3\;vmadd.vx\t%0,%z2,%4%p1"
+   vmadd.vx\t%0,%z2,%4%p1"
   [(set_attr "type" "vimuladd")
    (set_attr "mode" "<MODE>")
    (set_attr "merge_op_idx" "3")
@@ -5351,29 +5481,27 @@
    (set (attr "avl_type_idx") (const_int 8))])
 
 (define_insn "*pred_macc<mode>_scalar"
-  [(set (match_operand:V_VLSI 0 "register_operand"            "=vd,?&vd, vr,?&vr")
+  [(set (match_operand:V_VLSI 0 "register_operand"            "=vd, vr")
 	(if_then_else:V_VLSI
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand"  " vm,  vm,Wc1, Wc1")
-	     (match_operand 5 "vector_length_operand"     " rK,  rK, rK,  rK")
-	     (match_operand 6 "const_int_operand"         "  i,   i,  i,   i")
-	     (match_operand 7 "const_int_operand"         "  i,   i,  i,   i")
-	     (match_operand 8 "const_int_operand"         "  i,   i,  i,   i")
+	    [(match_operand:<VM> 1 "vector_mask_operand"  " vm,Wc1")
+	     (match_operand 5 "vector_length_operand"     "rvl,rvl")
+	     (match_operand 6 "const_int_operand"         "  i,  i")
+	     (match_operand 7 "const_int_operand"         "  i,  i")
+	     (match_operand 8 "const_int_operand"         "  i,  i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (plus:V_VLSI
 	    (mult:V_VLSI
 	      (vec_duplicate:V_VLSI
-		(match_operand:<VEL> 2 "reg_or_0_operand" " rJ,  rJ, rJ,  rJ"))
-	      (match_operand:V_VLSI 3 "register_operand"      " vr,  vr, vr,  vr"))
-	    (match_operand:V_VLSI 4 "register_operand"        "  0,  vr,  0,  vr"))
+		(match_operand:<VEL> 2 "reg_or_0_operand" " rJ, rJ"))
+	      (match_operand:V_VLSI 3 "register_operand"      " vr, vr"))
+	    (match_operand:V_VLSI 4 "register_operand"        "  0,  0"))
 	  (match_dup 4)))]
   "TARGET_VECTOR"
   "@
    vmacc.vx\t%0,%z2,%3%p1
-   vmv%m4r.v\t%0,%4\;vmacc.vx\t%0,%z2,%3%p1
-   vmacc.vx\t%0,%z2,%3%p1
-   vmv%m4r.v\t%0,%4\;vmacc.vx\t%0,%z2,%3%p1"
+   vmacc.vx\t%0,%z2,%3%p1"
   [(set_attr "type" "vimuladd")
    (set_attr "mode" "<MODE>")
    (set_attr "merge_op_idx" "4")
@@ -5418,30 +5546,28 @@
 })
 
 (define_insn "*pred_madd<mode>_extended_scalar"
-  [(set (match_operand:V_VLSI_D 0 "register_operand"               "=vd,?&vd, vr,?&vr")
+  [(set (match_operand:V_VLSI_D 0 "register_operand"               "=vd, vr")
 	(if_then_else:V_VLSI_D
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand"       " vm,  vm,Wc1, Wc1")
-	     (match_operand 5 "vector_length_operand"          " rK,  rK, rK,  rK")
-	     (match_operand 6 "const_int_operand"              "  i,   i,  i,   i")
-	     (match_operand 7 "const_int_operand"              "  i,   i,  i,   i")
-	     (match_operand 8 "const_int_operand"              "  i,   i,  i,   i")
+	    [(match_operand:<VM> 1 "vector_mask_operand"       " vm,Wc1")
+	     (match_operand 5 "vector_length_operand"          "rvl,rvl")
+	     (match_operand 6 "const_int_operand"              "  i,  i")
+	     (match_operand 7 "const_int_operand"              "  i,  i")
+	     (match_operand 8 "const_int_operand"              "  i,  i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (plus:V_VLSI_D
 	    (mult:V_VLSI_D
 	      (vec_duplicate:V_VLSI_D
 	        (sign_extend:<VEL>
-		  (match_operand:<VSUBEL> 2 "reg_or_0_operand" " rJ,  rJ, rJ,  rJ")))
-	      (match_operand:V_VLSI_D 3 "register_operand"         "  0,  vr,  0,  vr"))
-	    (match_operand:V_VLSI_D 4 "register_operand"           " vr,  vr, vr,  vr"))
+		  (match_operand:<VSUBEL> 2 "reg_or_0_operand" " rJ, rJ")))
+	      (match_operand:V_VLSI_D 3 "register_operand"         "  0,  0"))
+	    (match_operand:V_VLSI_D 4 "register_operand"           " vr, vr"))
 	  (match_dup 3)))]
   "TARGET_VECTOR && !TARGET_64BIT"
   "@
    vmadd.vx\t%0,%z2,%4%p1
-   vmv%m2r.v\t%0,%z2\;vmadd.vx\t%0,%z2,%4%p1
-   vmadd.vx\t%0,%z2,%4%p1
-   vmv%m2r.v\t%0,%z2\;vmadd.vx\t%0,%z2,%4%p1"
+   vmadd.vx\t%0,%z2,%4%p1"
   [(set_attr "type" "vimuladd")
    (set_attr "mode" "<MODE>")
    (set_attr "merge_op_idx" "3")
@@ -5451,30 +5577,28 @@
    (set (attr "avl_type_idx") (const_int 8))])
 
 (define_insn "*pred_macc<mode>_extended_scalar"
-  [(set (match_operand:V_VLSI_D 0 "register_operand"               "=vd,?&vd, vr,?&vr")
+  [(set (match_operand:V_VLSI_D 0 "register_operand"               "=vd, vr")
 	(if_then_else:V_VLSI_D
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand"       " vm,  vm,Wc1, Wc1")
-	     (match_operand 5 "vector_length_operand"          " rK,  rK, rK,  rK")
-	     (match_operand 6 "const_int_operand"              "  i,   i,  i,   i")
-	     (match_operand 7 "const_int_operand"              "  i,   i,  i,   i")
-	     (match_operand 8 "const_int_operand"              "  i,   i,  i,   i")
+	    [(match_operand:<VM> 1 "vector_mask_operand"       " vm,Wc1")
+	     (match_operand 5 "vector_length_operand"          "rvl,rvl")
+	     (match_operand 6 "const_int_operand"              "  i,  i")
+	     (match_operand 7 "const_int_operand"              "  i,  i")
+	     (match_operand 8 "const_int_operand"              "  i,  i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (plus:V_VLSI_D
 	    (mult:V_VLSI_D
 	      (vec_duplicate:V_VLSI_D
 	        (sign_extend:<VEL>
-		  (match_operand:<VSUBEL> 2 "reg_or_0_operand" " rJ,  rJ, rJ,  rJ")))
-	      (match_operand:V_VLSI_D 3 "register_operand"         " vr,  vr, vr,  vr"))
-	    (match_operand:V_VLSI_D 4 "register_operand"           "  0,  vr,  0,  vr"))
+		  (match_operand:<VSUBEL> 2 "reg_or_0_operand" " rJ, rJ")))
+	      (match_operand:V_VLSI_D 3 "register_operand"         " vr, vr"))
+	    (match_operand:V_VLSI_D 4 "register_operand"           "  0,  0"))
 	  (match_dup 4)))]
   "TARGET_VECTOR && !TARGET_64BIT"
   "@
    vmacc.vx\t%0,%z2,%3%p1
-   vmv%m4r.v\t%0,%4\;vmacc.vx\t%0,%z2,%3%p1
-   vmacc.vx\t%0,%z2,%3%p1
-   vmv%m4r.v\t%0,%4\;vmacc.vx\t%0,%z2,%3%p1"
+   vmacc.vx\t%0,%z2,%3%p1"
   [(set_attr "type" "vimuladd")
    (set_attr "mode" "<MODE>")
    (set_attr "merge_op_idx" "4")
@@ -5506,56 +5630,52 @@
 })
 
 (define_insn "*pred_minus_mul<mode>_undef"
-  [(set (match_operand:V_VLSI 0 "register_operand"           "=vd, vd,?&vd, vr, vr,?&vr")
+  [(set (match_operand:V_VLSI 0 "register_operand"           "=vd, vd, vr, vr")
 	(if_then_else:V_VLSI
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,  vm,Wc1,Wc1, Wc1")
-	     (match_operand 6 "vector_length_operand"    " rK, rK,  rK, rK, rK,  rK")
-	     (match_operand 7 "const_int_operand"        "  i,  i,   i,  i,  i,   i")
-	     (match_operand 8 "const_int_operand"        "  i,  i,   i,  i,  i,   i")
-	     (match_operand 9 "const_int_operand"        "  i,  i,   i,  i,  i,   i")
+	    [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
+	     (match_operand 6 "vector_length_operand"    "rvl,rvl,rvl,rvl")
+	     (match_operand 7 "const_int_operand"        "  i,  i,  i,  i")
+	     (match_operand 8 "const_int_operand"        "  i,  i,  i,  i")
+	     (match_operand 9 "const_int_operand"        "  i,  i,  i,  i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (minus:V_VLSI
-            (match_operand:V_VLSI 5 "register_operand"       " vr,  0,  vr, vr,  0,  vr")
+            (match_operand:V_VLSI 5 "register_operand"       " vr,  0, vr,  0")
 	    (mult:V_VLSI
-	      (match_operand:V_VLSI 3 "register_operand"     "  0, vr,  vr,  0, vr,  vr")
-	      (match_operand:V_VLSI 4 "register_operand"     " vr, vr,  vr, vr, vr,  vr")))
+	      (match_operand:V_VLSI 3 "register_operand"     "  0, vr,  0, vr")
+	      (match_operand:V_VLSI 4 "register_operand"     " vr, vr, vr, vr")))
 	  (match_operand:V_VLSI 2 "vector_undef_operand")))]
   "TARGET_VECTOR"
   "@
    vnmsub.vv\t%0,%4,%5%p1
    vnmsac.vv\t%0,%3,%4%p1
-   vmv%m3r.v\t%0,%3\;vnmsub.vv\t%0,%4,%5%p1
    vnmsub.vv\t%0,%4,%5%p1
-   vnmsac.vv\t%0,%3,%4%p1
-   vmv%m3r.v\t%0,%3\;vnmsub.vv\t%0,%4,%5%p1"
+   vnmsac.vv\t%0,%3,%4%p1"
   [(set_attr "type" "vimuladd")
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*pred_nmsub<mode>"
-  [(set (match_operand:V_VLSI 0 "register_operand"           "=vd,?&vd, vr,?&vr")
+  [(set (match_operand:V_VLSI 0 "register_operand"           "=vd, vr")
 	(if_then_else:V_VLSI
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand" " vm,  vm,Wc1, Wc1")
-	     (match_operand 5 "vector_length_operand"    " rK,  rK, rK,  rK")
-	     (match_operand 6 "const_int_operand"        "  i,   i,  i,   i")
-	     (match_operand 7 "const_int_operand"        "  i,   i,  i,   i")
-	     (match_operand 8 "const_int_operand"        "  i,   i,  i,   i")
+	    [(match_operand:<VM> 1 "vector_mask_operand" " vm,Wc1")
+	     (match_operand 5 "vector_length_operand"    "rvl,rvl")
+	     (match_operand 6 "const_int_operand"        "  i,  i")
+	     (match_operand 7 "const_int_operand"        "  i,  i")
+	     (match_operand 8 "const_int_operand"        "  i,  i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (minus:V_VLSI
-	    (match_operand:V_VLSI 4 "register_operand"       " vr,  vr, vr,  vr")
+	    (match_operand:V_VLSI 4 "register_operand"       " vr, vr")
 	    (mult:V_VLSI
-	      (match_operand:V_VLSI 2 "register_operand"     "  0,  vr,  0,  vr")
-	      (match_operand:V_VLSI 3 "register_operand"     " vr,  vr, vr,  vr")))
+	      (match_operand:V_VLSI 2 "register_operand"     "  0,  0")
+	      (match_operand:V_VLSI 3 "register_operand"     " vr, vr")))
 	  (match_dup 2)))]
   "TARGET_VECTOR"
   "@
    vnmsub.vv\t%0,%3,%4%p1
-   vmv%m2r.v\t%0,%2\;vnmsub.vv\t%0,%3,%4%p1
-   vnmsub.vv\t%0,%3,%4%p1
-   vmv%m2r.v\t%0,%2\;vnmsub.vv\t%0,%3,%4%p1"
+   vnmsub.vv\t%0,%3,%4%p1"
   [(set_attr "type" "vimuladd")
    (set_attr "mode" "<MODE>")
    (set_attr "merge_op_idx" "2")
@@ -5565,28 +5685,26 @@
    (set (attr "avl_type_idx") (const_int 8))])
 
 (define_insn "*pred_nmsac<mode>"
-  [(set (match_operand:V_VLSI 0 "register_operand"           "=vd,?&vd, vr,?&vr")
+  [(set (match_operand:V_VLSI 0 "register_operand"           "=vd, vr")
 	(if_then_else:V_VLSI
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand" " vm,  vm,Wc1, Wc1")
-	     (match_operand 5 "vector_length_operand"    " rK,  rK, rK,  rK")
-	     (match_operand 6 "const_int_operand"        "  i,   i,  i,   i")
-	     (match_operand 7 "const_int_operand"        "  i,   i,  i,   i")
-	     (match_operand 8 "const_int_operand"        "  i,   i,  i,   i")
+	    [(match_operand:<VM> 1 "vector_mask_operand" " vm,Wc1")
+	     (match_operand 5 "vector_length_operand"    "rvl,rvl")
+	     (match_operand 6 "const_int_operand"        "  i,  i")
+	     (match_operand 7 "const_int_operand"        "  i,  i")
+	     (match_operand 8 "const_int_operand"        "  i,  i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (minus:V_VLSI
-	    (match_operand:V_VLSI 4 "register_operand"       "  0,  vr,  0,  vr")
+	    (match_operand:V_VLSI 4 "register_operand"       "  0,  0")
 	    (mult:V_VLSI
-	      (match_operand:V_VLSI 2 "register_operand"     " vr,  vr, vr,  vr")
-	      (match_operand:V_VLSI 3 "register_operand"     " vr,  vr, vr,  vr")))
+	      (match_operand:V_VLSI 2 "register_operand"     " vr, vr")
+	      (match_operand:V_VLSI 3 "register_operand"     " vr, vr")))
 	  (match_dup 4)))]
   "TARGET_VECTOR"
   "@
    vnmsac.vv\t%0,%2,%3%p1
-   vmv%m4r.v\t%0,%4\;vnmsac.vv\t%0,%2,%3%p1
-   vnmsac.vv\t%0,%2,%3%p1
-   vmv%m4r.v\t%0,%4\;vnmsac.vv\t%0,%2,%3%p1"
+   vnmsac.vv\t%0,%2,%3%p1"
   [(set_attr "type" "vimuladd")
    (set_attr "mode" "<MODE>")
    (set_attr "merge_op_idx" "4")
@@ -5617,29 +5735,27 @@
 {})
 
 (define_insn "*pred_nmsub<mode>_scalar"
-  [(set (match_operand:V_VLSI 0 "register_operand"            "=vd,?&vd, vr,?&vr")
+  [(set (match_operand:V_VLSI 0 "register_operand"            "=vd, vr")
 	(if_then_else:V_VLSI
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand"  " vm,  vm,Wc1, Wc1")
-	     (match_operand 5 "vector_length_operand"     " rK,  rK, rK,  rK")
-	     (match_operand 6 "const_int_operand"         "  i,   i,  i,   i")
-	     (match_operand 7 "const_int_operand"         "  i,   i,  i,   i")
-	     (match_operand 8 "const_int_operand"         "  i,   i,  i,   i")
+	    [(match_operand:<VM> 1 "vector_mask_operand"  " vm,Wc1")
+	     (match_operand 5 "vector_length_operand"     "rvl,rvl")
+	     (match_operand 6 "const_int_operand"         "  i,  i")
+	     (match_operand 7 "const_int_operand"         "  i,  i")
+	     (match_operand 8 "const_int_operand"         "  i,  i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (minus:V_VLSI
-	    (match_operand:V_VLSI 4 "register_operand"        " vr,  vr, vr,  vr")
+	    (match_operand:V_VLSI 4 "register_operand"        " vr, vr")
 	    (mult:V_VLSI
 	      (vec_duplicate:V_VLSI
-		(match_operand:<VEL> 2 "reg_or_0_operand" " rJ,  rJ, rJ,  rJ"))
-	      (match_operand:V_VLSI 3 "register_operand"      "  0,  vr,  0,  vr")))
+		(match_operand:<VEL> 2 "reg_or_0_operand" " rJ, rJ"))
+	      (match_operand:V_VLSI 3 "register_operand"      "  0,  0")))
 	  (match_dup 3)))]
   "TARGET_VECTOR"
   "@
    vnmsub.vx\t%0,%z2,%4%p1
-   vmv%m3r.v\t%0,%3\;vnmsub.vx\t%0,%z2,%4%p1
-   vnmsub.vx\t%0,%z2,%4%p1
-   vmv%m3r.v\t%0,%3\;vnmsub.vx\t%0,%z2,%4%p1"
+   vnmsub.vx\t%0,%z2,%4%p1"
   [(set_attr "type" "vimuladd")
    (set_attr "mode" "<MODE>")
    (set_attr "merge_op_idx" "3")
@@ -5649,29 +5765,27 @@
    (set (attr "avl_type_idx") (const_int 8))])
 
 (define_insn "*pred_nmsac<mode>_scalar"
-  [(set (match_operand:V_VLSI 0 "register_operand"            "=vd,?&vd, vr,?&vr")
+  [(set (match_operand:V_VLSI 0 "register_operand"            "=vd, vr")
 	(if_then_else:V_VLSI
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand"  " vm,  vm,Wc1, Wc1")
-	     (match_operand 5 "vector_length_operand"     " rK,  rK, rK,  rK")
-	     (match_operand 6 "const_int_operand"         "  i,   i,  i,   i")
-	     (match_operand 7 "const_int_operand"         "  i,   i,  i,   i")
-	     (match_operand 8 "const_int_operand"         "  i,   i,  i,   i")
+	    [(match_operand:<VM> 1 "vector_mask_operand"  " vm,Wc1")
+	     (match_operand 5 "vector_length_operand"     "rvl,rvl")
+	     (match_operand 6 "const_int_operand"         "  i,  i")
+	     (match_operand 7 "const_int_operand"         "  i,  i")
+	     (match_operand 8 "const_int_operand"         "  i,  i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (minus:V_VLSI
-	    (match_operand:V_VLSI 4 "register_operand"        "  0,  vr,  0,  vr")
+	    (match_operand:V_VLSI 4 "register_operand"        "  0,  0")
 	    (mult:V_VLSI
 	      (vec_duplicate:V_VLSI
-		(match_operand:<VEL> 2 "reg_or_0_operand" " rJ,  rJ, rJ,  rJ"))
-	      (match_operand:V_VLSI 3 "register_operand"      " vr,  vr, vr,  vr")))
+		(match_operand:<VEL> 2 "reg_or_0_operand" " rJ, rJ"))
+	      (match_operand:V_VLSI 3 "register_operand"      " vr, vr")))
 	  (match_dup 4)))]
   "TARGET_VECTOR"
   "@
    vnmsac.vx\t%0,%z2,%3%p1
-   vmv%m4r.v\t%0,%4\;vnmsac.vx\t%0,%z2,%3%p1
-   vnmsac.vx\t%0,%z2,%3%p1
-   vmv%m4r.v\t%0,%4\;vnmsac.vx\t%0,%z2,%3%p1"
+   vnmsac.vx\t%0,%z2,%3%p1"
   [(set_attr "type" "vimuladd")
    (set_attr "mode" "<MODE>")
    (set_attr "merge_op_idx" "4")
@@ -5716,30 +5830,28 @@
 })
 
 (define_insn "*pred_nmsub<mode>_extended_scalar"
-  [(set (match_operand:V_VLSI_D 0 "register_operand"               "=vd,?&vd, vr,?&vr")
+  [(set (match_operand:V_VLSI_D 0 "register_operand"               "=vd, vr")
 	(if_then_else:V_VLSI_D
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand"       " vm,  vm,Wc1, Wc1")
-	     (match_operand 5 "vector_length_operand"          " rK,  rK, rK,  rK")
-	     (match_operand 6 "const_int_operand"              "  i,   i,  i,   i")
-	     (match_operand 7 "const_int_operand"              "  i,   i,  i,   i")
-	     (match_operand 8 "const_int_operand"              "  i,   i,  i,   i")
+	    [(match_operand:<VM> 1 "vector_mask_operand"       " vm,Wc1")
+	     (match_operand 5 "vector_length_operand"          "rvl,rvl")
+	     (match_operand 6 "const_int_operand"              "  i,  i")
+	     (match_operand 7 "const_int_operand"              "  i,  i")
+	     (match_operand 8 "const_int_operand"              "  i,  i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (minus:V_VLSI_D
-	    (match_operand:V_VLSI_D 4 "register_operand"           " vr,  vr, vr,  vr")
+	    (match_operand:V_VLSI_D 4 "register_operand"           " vr, vr")
 	    (mult:V_VLSI_D
 	      (vec_duplicate:V_VLSI_D
 	        (sign_extend:<VEL>
-		  (match_operand:<VSUBEL> 2 "reg_or_0_operand" " rJ,  rJ, rJ,  rJ")))
-	      (match_operand:V_VLSI_D 3 "register_operand"         "  0,  vr,  0,  vr")))
+		  (match_operand:<VSUBEL> 2 "reg_or_0_operand" " rJ, rJ")))
+	      (match_operand:V_VLSI_D 3 "register_operand"         "  0,  0")))
 	  (match_dup 3)))]
   "TARGET_VECTOR && !TARGET_64BIT"
   "@
    vnmsub.vx\t%0,%z2,%4%p1
-   vmv%m3r.v\t%0,%3\;vnmsub.vx\t%0,%z2,%4%p1
-   vnmsub.vx\t%0,%z2,%4%p1
-   vmv%m3r.v\t%0,%3\;vnmsub.vx\t%0,%z2,%4%p1"
+   vnmsub.vx\t%0,%z2,%4%p1"
   [(set_attr "type" "vimuladd")
    (set_attr "mode" "<MODE>")
    (set_attr "merge_op_idx" "3")
@@ -5749,30 +5861,28 @@
    (set (attr "avl_type_idx") (const_int 8))])
 
 (define_insn "*pred_nmsac<mode>_extended_scalar"
-  [(set (match_operand:V_VLSI_D 0 "register_operand"               "=vd,?&vd, vr,?&vr")
+  [(set (match_operand:V_VLSI_D 0 "register_operand"               "=vd, vr")
 	(if_then_else:V_VLSI_D
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand"       " vm,  vm,Wc1, Wc1")
-	     (match_operand 5 "vector_length_operand"          " rK,  rK, rK,  rK")
-	     (match_operand 6 "const_int_operand"              "  i,   i,  i,   i")
-	     (match_operand 7 "const_int_operand"              "  i,   i,  i,   i")
-	     (match_operand 8 "const_int_operand"              "  i,   i,  i,   i")
+	    [(match_operand:<VM> 1 "vector_mask_operand"       " vm,Wc1")
+	     (match_operand 5 "vector_length_operand"          "rvl,rvl")
+	     (match_operand 6 "const_int_operand"              "  i,  i")
+	     (match_operand 7 "const_int_operand"              "  i,  i")
+	     (match_operand 8 "const_int_operand"              "  i,  i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
 	  (minus:V_VLSI_D
-	    (match_operand:V_VLSI_D 4 "register_operand"           "  0,  vr,  0,  vr")
+	    (match_operand:V_VLSI_D 4 "register_operand"           "  0,  0")
 	    (mult:V_VLSI_D
 	      (vec_duplicate:V_VLSI_D
 	        (sign_extend:<VEL>
-		  (match_operand:<VSUBEL> 2 "reg_or_0_operand" " rJ,  rJ, rJ,  rJ")))
-	      (match_operand:V_VLSI_D 3 "register_operand"         " vr,  vr, vr,  vr")))
+		  (match_operand:<VSUBEL> 2 "reg_or_0_operand" " rJ, rJ")))
+	      (match_operand:V_VLSI_D 3 "register_operand"         " vr, vr")))
 	  (match_dup 4)))]
   "TARGET_VECTOR && !TARGET_64BIT"
   "@
    vnmsac.vx\t%0,%z2,%3%p1
-   vmv%m4r.v\t%0,%4\;vnmsac.vx\t%0,%z2,%3%p1
-   vnmsac.vx\t%0,%z2,%3%p1
-   vmv%m4r.v\t%0,%4\;vnmsac.vx\t%0,%z2,%3%p1"
+   vnmsac.vx\t%0,%z2,%3%p1"
   [(set_attr "type" "vimuladd")
    (set_attr "mode" "<MODE>")
    (set_attr "merge_op_idx" "4")
@@ -5793,7 +5903,7 @@
 	(if_then_else:VWEXTI
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"             "vmWc1")
-	     (match_operand 5 "vector_length_operand"                "   rK")
+	     (match_operand 5 "vector_length_operand"                "  rvl")
 	     (match_operand 6 "const_int_operand"                    "    i")
 	     (match_operand 7 "const_int_operand"                    "    i")
 	     (match_operand 8 "const_int_operand"                    "    i")
@@ -5817,7 +5927,7 @@
 	(if_then_else:VWEXTI
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"             "vmWc1")
-	     (match_operand 5 "vector_length_operand"                "   rK")
+	     (match_operand 5 "vector_length_operand"                "  rvl")
 	     (match_operand 6 "const_int_operand"                    "    i")
 	     (match_operand 7 "const_int_operand"                    "    i")
 	     (match_operand 8 "const_int_operand"                    "    i")
@@ -5842,7 +5952,7 @@
 	(if_then_else:VWEXTI
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"             "vmWc1")
-	     (match_operand 5 "vector_length_operand"                "   rK")
+	     (match_operand 5 "vector_length_operand"                "  rvl")
 	     (match_operand 6 "const_int_operand"                    "    i")
 	     (match_operand 7 "const_int_operand"                    "    i")
 	     (match_operand 8 "const_int_operand"                    "    i")
@@ -5866,7 +5976,7 @@
 	(if_then_else:VWEXTI
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"             "vmWc1")
-	     (match_operand 5 "vector_length_operand"                "   rK")
+	     (match_operand 5 "vector_length_operand"                "  rvl")
 	     (match_operand 6 "const_int_operand"                    "    i")
 	     (match_operand 7 "const_int_operand"                    "    i")
 	     (match_operand 8 "const_int_operand"                    "    i")
@@ -5891,7 +6001,7 @@
 	(if_then_else:VWEXTI
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"             "vmWc1")
-	     (match_operand 5 "vector_length_operand"                "   rK")
+	     (match_operand 5 "vector_length_operand"                "  rvl")
 	     (match_operand 6 "const_int_operand"                    "    i")
 	     (match_operand 7 "const_int_operand"                    "    i")
 	     (match_operand 8 "const_int_operand"                    "    i")
@@ -5933,7 +6043,7 @@
 	(if_then_else:VB_VLS
 	  (unspec:VB_VLS
 	    [(match_operand:VB_VLS 1 "vector_all_trues_mask_operand" "Wc1")
-	     (match_operand 5 "vector_length_operand"            " rK")
+	     (match_operand 5 "vector_length_operand"            "rvl")
 	     (match_operand 6 "const_int_operand"                "  i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
@@ -5953,7 +6063,7 @@
 	(if_then_else:VB_VLS
 	  (unspec:VB_VLS
 	    [(match_operand:VB_VLS 1 "vector_all_trues_mask_operand" "Wc1")
-	     (match_operand 5 "vector_length_operand"            " rK")
+	     (match_operand 5 "vector_length_operand"            "rvl")
 	     (match_operand 6 "const_int_operand"                "  i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
@@ -5974,7 +6084,7 @@
 	(if_then_else:VB_VLS
 	  (unspec:VB_VLS
 	    [(match_operand:VB_VLS 1 "vector_all_trues_mask_operand" "Wc1")
-	     (match_operand 5 "vector_length_operand"            " rK")
+	     (match_operand 5 "vector_length_operand"            "rvl")
 	     (match_operand 6 "const_int_operand"                "  i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
@@ -5995,7 +6105,7 @@
 	(if_then_else:VB_VLS
 	  (unspec:VB_VLS
 	    [(match_operand:VB_VLS 1 "vector_all_trues_mask_operand" "Wc1")
-	     (match_operand 4 "vector_length_operand"            " rK")
+	     (match_operand 4 "vector_length_operand"            "rvl")
 	     (match_operand 5 "const_int_operand"                "  i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
@@ -6016,7 +6126,7 @@
 	    [(and:VB_VLS
 	       (match_operand:VB_VLS 1 "vector_mask_operand" "vmWc1")
 	       (match_operand:VB_VLS 2 "register_operand"    "   vr"))
-	     (match_operand 3 "vector_length_operand"        "   rK")
+	     (match_operand 3 "vector_length_operand"        "  rvl")
 	     (match_operand 4 "const_int_operand"            "    i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)))]
@@ -6033,7 +6143,7 @@
 	      [(and:VB
 	         (match_operand:VB 1 "vector_mask_operand" "vmWc1")
 	         (match_operand:VB 2 "register_operand"    "   vr"))
-	       (match_operand 3 "vector_length_operand"    "   rK")
+	       (match_operand 3 "vector_length_operand"    "  rvl")
 	       (match_operand 4 "const_int_operand"        "    i")
 	       (reg:SI VL_REGNUM)
 	       (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE))
@@ -6048,7 +6158,7 @@
 	(if_then_else:VB
 	  (unspec:VB
 	    [(match_operand:VB 1 "vector_mask_operand" "vmWc1,vmWc1")
-	     (match_operand 4 "vector_length_operand"  "   rK,   rK")
+	     (match_operand 4 "vector_length_operand"  "  rvl,  rvl")
 	     (match_operand 5 "const_int_operand"      "    i,    i")
 	     (match_operand 6 "const_int_operand"      "    i,    i")
 	     (reg:SI VL_REGNUM)
@@ -6066,7 +6176,7 @@
 	(if_then_else:VI
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
-	     (match_operand 4 "vector_length_operand"    "   rK,   rK")
+	     (match_operand 4 "vector_length_operand"    "  rvl,  rvl")
 	     (match_operand 5 "const_int_operand"        "    i,    i")
 	     (match_operand 6 "const_int_operand"        "    i,    i")
 	     (match_operand 7 "const_int_operand"        "    i,    i")
@@ -6085,7 +6195,7 @@
 	(if_then_else:V_VLSI
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
-	     (match_operand 3 "vector_length_operand"    " rK, rK, rK, rK")
+	     (match_operand 3 "vector_length_operand"    "rvl,rvl,rvl,rvl")
 	     (match_operand 4 "const_int_operand"        "  i,  i,  i,  i")
 	     (match_operand 5 "const_int_operand"        "  i,  i,  i,  i")
 	     (match_operand 6 "const_int_operand"        "  i,  i,  i,  i")
@@ -6113,7 +6223,7 @@
 	(if_then_else:V_VLSF
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
-	     (match_operand 5 "vector_length_operand"    " rK, rK, rK, rK")
+	     (match_operand 5 "vector_length_operand"    "rvl,rvl,rvl,rvl")
 	     (match_operand 6 "const_int_operand"        "  i,  i,  i,  i")
 	     (match_operand 7 "const_int_operand"        "  i,  i,  i,  i")
 	     (match_operand 8 "const_int_operand"        "  i,  i,  i,  i")
@@ -6137,7 +6247,7 @@
 	(if_then_else:V_VLSF
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
-	     (match_operand 5 "vector_length_operand"    " rK, rK, rK, rK")
+	     (match_operand 5 "vector_length_operand"    "rvl,rvl,rvl,rvl")
 	     (match_operand 6 "const_int_operand"        "  i,  i,  i,  i")
 	     (match_operand 7 "const_int_operand"        "  i,  i,  i,  i")
 	     (match_operand 8 "const_int_operand"        "  i,  i,  i,  i")
@@ -6157,7 +6267,7 @@
 	(if_then_else:V_VLSF
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
-	     (match_operand 5 "vector_length_operand"    " rK, rK, rK, rK")
+	     (match_operand 5 "vector_length_operand"    "rvl,rvl,rvl,rvl")
 	     (match_operand 6 "const_int_operand"        "  i,  i,  i,  i")
 	     (match_operand 7 "const_int_operand"        "  i,  i,  i,  i")
 	     (match_operand 8 "const_int_operand"        "  i,  i,  i,  i")
@@ -6178,7 +6288,7 @@
 	(if_then_else:VF
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
-	     (match_operand 5 "vector_length_operand"    " rK, rK, rK, rK")
+	     (match_operand 5 "vector_length_operand"    "rvl,rvl,rvl,rvl")
 	     (match_operand 6 "const_int_operand"        "  i,  i,  i,  i")
 	     (match_operand 7 "const_int_operand"        "  i,  i,  i,  i")
 	     (match_operand 8 "const_int_operand"        "  i,  i,  i,  i")
@@ -6203,7 +6313,7 @@
 	(if_then_else:VF
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
-	     (match_operand 5 "vector_length_operand"    " rK, rK, rK, rK")
+	     (match_operand 5 "vector_length_operand"    "rvl,rvl,rvl,rvl")
 	     (match_operand 6 "const_int_operand"        "  i,  i,  i,  i")
 	     (match_operand 7 "const_int_operand"        "  i,  i,  i,  i")
 	     (match_operand 8 "const_int_operand"        "  i,  i,  i,  i")
@@ -6224,7 +6334,7 @@
 	(if_then_else:VF
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
-	     (match_operand 5 "vector_length_operand"    " rK, rK, rK, rK")
+	     (match_operand 5 "vector_length_operand"    "rvl,rvl,rvl,rvl")
 	     (match_operand 6 "const_int_operand"        "  i,  i,  i,  i")
 	     (match_operand 7 "const_int_operand"        "  i,  i,  i,  i")
 	     (match_operand 8 "const_int_operand"        "  i,  i,  i,  i")
@@ -6246,7 +6356,7 @@
 	(if_then_else:VF
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
-	     (match_operand 5 "vector_length_operand"    " rK, rK, rK, rK")
+	     (match_operand 5 "vector_length_operand"    "rvl,rvl,rvl,rvl")
 	     (match_operand 6 "const_int_operand"        "  i,  i,  i,  i")
 	     (match_operand 7 "const_int_operand"        "  i,  i,  i,  i")
 	     (match_operand 8 "const_int_operand"        "  i,  i,  i,  i")
@@ -6271,7 +6381,7 @@
 	(if_then_else:VF
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
-	     (match_operand 5 "vector_length_operand"    " rK, rK, rK, rK")
+	     (match_operand 5 "vector_length_operand"    "rvl,rvl,rvl,rvl")
 	     (match_operand 6 "const_int_operand"        "  i,  i,  i,  i")
 	     (match_operand 7 "const_int_operand"        "  i,  i,  i,  i")
 	     (match_operand 8 "const_int_operand"        "  i,  i,  i,  i")
@@ -6296,7 +6406,7 @@
 	(if_then_else:V_VLSF
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
-	     (match_operand 5 "vector_length_operand"    " rK, rK, rK, rK")
+	     (match_operand 5 "vector_length_operand"    "rvl,rvl,rvl,rvl")
 	     (match_operand 6 "const_int_operand"        "  i,  i,  i,  i")
 	     (match_operand 7 "const_int_operand"        "  i,  i,  i,  i")
 	     (match_operand 8 "const_int_operand"        "  i,  i,  i,  i")
@@ -6316,7 +6426,7 @@
 	(if_then_else:VF
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
-	     (match_operand 5 "vector_length_operand"    " rK, rK, rK, rK")
+	     (match_operand 5 "vector_length_operand"    "rvl,rvl,rvl,rvl")
 	     (match_operand 6 "const_int_operand"        "  i,  i,  i,  i")
 	     (match_operand 7 "const_int_operand"        "  i,  i,  i,  i")
 	     (match_operand 8 "const_int_operand"        "  i,  i,  i,  i")
@@ -6337,7 +6447,7 @@
 	(if_then_else:V_VLSF
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
-	     (match_operand 5 "vector_length_operand"    " rK, rK, rK, rK")
+	     (match_operand 5 "vector_length_operand"    "rvl,rvl,rvl,rvl")
 	     (match_operand 6 "const_int_operand"        "  i,  i,  i,  i")
 	     (match_operand 7 "const_int_operand"        "  i,  i,  i,  i")
 	     (match_operand 8 "const_int_operand"        "  i,  i,  i,  i")
@@ -6358,7 +6468,7 @@
 	(if_then_else:VF
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
-	     (match_operand 5 "vector_length_operand"    " rK, rK, rK, rK")
+	     (match_operand 5 "vector_length_operand"    "rvl,rvl,rvl,rvl")
 	     (match_operand 6 "const_int_operand"        "  i,  i,  i,  i")
 	     (match_operand 7 "const_int_operand"        "  i,  i,  i,  i")
 	     (match_operand 8 "const_int_operand"        "  i,  i,  i,  i")
@@ -6407,62 +6517,58 @@
 })
 
 (define_insn "*pred_mul_<optab><mode>_undef"
-  [(set (match_operand:V_VLSF 0 "register_operand"           "=vd,vd,?&vd, vr, vr,?&vr")
+  [(set (match_operand:V_VLSF 0 "register_operand"           "=vd,vd, vr, vr")
 	(if_then_else:V_VLSF
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand" " vm,vm,  vm,Wc1,Wc1, Wc1")
-	     (match_operand 6 "vector_length_operand"    " rK,rK,  rK, rK, rK,  rK")
-	     (match_operand 7 "const_int_operand"        "  i, i,   i,  i,  i,   i")
-	     (match_operand 8 "const_int_operand"        "  i, i,   i,  i,  i,   i")
-	     (match_operand 9 "const_int_operand"        "  i, i,   i,  i,  i,   i")
-	     (match_operand 10 "const_int_operand"       "  i, i,   i,  i,  i,   i")
+	    [(match_operand:<VM> 1 "vector_mask_operand" " vm,vm,Wc1,Wc1")
+	     (match_operand 6 "vector_length_operand"    "rvl,rvl,rvl,rvl")
+	     (match_operand 7 "const_int_operand"        "  i, i,  i,  i")
+	     (match_operand 8 "const_int_operand"        "  i, i,  i,  i")
+	     (match_operand 9 "const_int_operand"        "  i, i,  i,  i")
+	     (match_operand 10 "const_int_operand"       "  i, i,  i,  i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)
 	     (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
 	  (plus_minus:V_VLSF
 	    (mult:V_VLSF
-	      (match_operand:V_VLSF 3 "register_operand"     "  0,vr,  vr,  0, vr,  vr")
-	      (match_operand:V_VLSF 4 "register_operand"     " vr,vr,  vr, vr, vr,  vr"))
-	    (match_operand:V_VLSF 5 "register_operand"       " vr, 0,  vr, vr,  0,  vr"))
+	      (match_operand:V_VLSF 3 "register_operand"     "  0,vr,  0, vr")
+	      (match_operand:V_VLSF 4 "register_operand"     " vr,vr, vr, vr"))
+	    (match_operand:V_VLSF 5 "register_operand"       " vr, 0, vr,  0"))
 	  (match_operand:V_VLSF 2 "vector_undef_operand")))]
   "TARGET_VECTOR"
   "@
    vf<madd_msub>.vv\t%0,%4,%5%p1
    vf<macc_msac>.vv\t%0,%3,%4%p1
-   vmv%m3r.v\t%0,%3\;vf<madd_msub>.vv\t%0,%4,%5%p1
    vf<madd_msub>.vv\t%0,%4,%5%p1
-   vf<macc_msac>.vv\t%0,%3,%4%p1
-   vmv%m3r.v\t%0,%3\;vf<madd_msub>.vv\t%0,%4,%5%p1"
+   vf<macc_msac>.vv\t%0,%3,%4%p1"
   [(set_attr "type" "vfmuladd")
    (set_attr "mode" "<MODE>")
    (set (attr "frm_mode")
 	(symbol_ref "riscv_vector::get_frm_mode (operands[10])"))])
 
 (define_insn "*pred_<madd_msub><mode>"
-  [(set (match_operand:V_VLSF 0 "register_operand"           "=vd, ?&vd, vr, ?&vr")
+  [(set (match_operand:V_VLSF 0 "register_operand"           "=vd, vr")
 	(if_then_else:V_VLSF
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand" " vm,   vm,Wc1,  Wc1")
-	     (match_operand 5 "vector_length_operand"    " rK,   rK, rK,   rK")
-	     (match_operand 6 "const_int_operand"        "  i,    i,  i,    i")
-	     (match_operand 7 "const_int_operand"        "  i,    i,  i,    i")
-	     (match_operand 8 "const_int_operand"        "  i,    i,  i,    i")
-	     (match_operand 9 "const_int_operand"        "  i,    i,  i,    i")
+	    [(match_operand:<VM> 1 "vector_mask_operand" " vm,Wc1")
+	     (match_operand 5 "vector_length_operand"    "rvl,rvl")
+	     (match_operand 6 "const_int_operand"        "  i,  i")
+	     (match_operand 7 "const_int_operand"        "  i,  i")
+	     (match_operand 8 "const_int_operand"        "  i,  i")
+	     (match_operand 9 "const_int_operand"        "  i,  i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)
 	     (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
 	  (plus_minus:V_VLSF
 	    (mult:V_VLSF
-	      (match_operand:V_VLSF 2 "register_operand"     "  0,   vr,  0,   vr")
-	      (match_operand:V_VLSF 3 "register_operand"     " vr,   vr, vr,   vr"))
-	    (match_operand:V_VLSF 4 "register_operand"       " vr,   vr, vr,   vr"))
+	      (match_operand:V_VLSF 2 "register_operand"     "  0,  0")
+	      (match_operand:V_VLSF 3 "register_operand"     " vr, vr"))
+	    (match_operand:V_VLSF 4 "register_operand"       " vr, vr"))
 	  (match_dup 2)))]
   "TARGET_VECTOR"
   "@
    vf<madd_msub>.vv\t%0,%3,%4%p1
-   vmv%m2r.v\t%0,%2\;vf<madd_msub>.vv\t%0,%3,%4%p1
-   vf<madd_msub>.vv\t%0,%3,%4%p1
-   vmv%m2r.v\t%0,%2\;vf<madd_msub>.vv\t%0,%3,%4%p1"
+   vf<madd_msub>.vv\t%0,%3,%4%p1"
   [(set_attr "type" "vfmuladd")
    (set_attr "mode" "<MODE>")
    (set_attr "merge_op_idx" "2")
@@ -6474,30 +6580,28 @@
 	(symbol_ref "riscv_vector::get_frm_mode (operands[9])"))])
 
 (define_insn "*pred_<macc_msac><mode>"
-  [(set (match_operand:V_VLSF 0 "register_operand"           "=vd, ?&vd, vr, ?&vr")
+  [(set (match_operand:V_VLSF 0 "register_operand"           "=vd, vr")
 	(if_then_else:V_VLSF
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand" " vm,   vm,Wc1,  Wc1")
-	     (match_operand 5 "vector_length_operand"    " rK,   rK, rK,   rK")
-	     (match_operand 6 "const_int_operand"        "  i,    i,  i,    i")
-	     (match_operand 7 "const_int_operand"        "  i,    i,  i,    i")
-	     (match_operand 8 "const_int_operand"        "  i,    i,  i,    i")
-	     (match_operand 9 "const_int_operand"        "  i,    i,  i,    i")
+	    [(match_operand:<VM> 1 "vector_mask_operand" " vm,Wc1")
+	     (match_operand 5 "vector_length_operand"    "rvl,rvl")
+	     (match_operand 6 "const_int_operand"        "  i,  i")
+	     (match_operand 7 "const_int_operand"        "  i,  i")
+	     (match_operand 8 "const_int_operand"        "  i,  i")
+	     (match_operand 9 "const_int_operand"        "  i,  i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)
 	     (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
 	  (plus_minus:V_VLSF
 	    (mult:V_VLSF
-	      (match_operand:V_VLSF 2 "register_operand"     " vr,   vr, vr,   vr")
-	      (match_operand:V_VLSF 3 "register_operand"     " vr,   vr, vr,   vr"))
-	    (match_operand:V_VLSF 4 "register_operand"       "  0,   vr,  0,   vr"))
+	      (match_operand:V_VLSF 2 "register_operand"     " vr, vr")
+	      (match_operand:V_VLSF 3 "register_operand"     " vr, vr"))
+	    (match_operand:V_VLSF 4 "register_operand"       "  0,  0"))
 	  (match_dup 4)))]
   "TARGET_VECTOR"
   "@
    vf<macc_msac>.vv\t%0,%2,%3%p1
-   vmv%m4r.v\t%0,%4\;vf<macc_msac>.vv\t%0,%2,%3%p1
-   vf<macc_msac>.vv\t%0,%2,%3%p1
-   vmv%m4r.v\t%0,%4\;vf<macc_msac>.vv\t%0,%2,%3%p1"
+   vf<macc_msac>.vv\t%0,%2,%3%p1"
   [(set_attr "type" "vfmuladd")
    (set_attr "mode" "<MODE>")
    (set_attr "merge_op_idx" "4")
@@ -6527,36 +6631,67 @@
 	        (match_operand:<VEL> 2 "register_operand"))
 	      (match_operand:V_VLSF 3 "register_operand"))
 	    (match_operand:V_VLSF 4 "register_operand"))
-	  (match_operand:V_VLSF 5 "register_operand")))]
+	  (match_operand:V_VLSF 5 "vector_merge_operand")))]
   "TARGET_VECTOR"
-{})
+{
+  riscv_vector::prepare_ternary_operands (operands);
+})
+
+(define_insn "*pred_mul_<optab><mode>_scalar_undef"
+  [(set (match_operand:V_VLSF 0 "register_operand"           "=vd,vd, vr, vr")
+	(if_then_else:V_VLSF
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
+	     (match_operand 6 "vector_length_operand"    "rvl,rvl,rvl,rvl")
+	     (match_operand 7 "const_int_operand"        "  i,  i,  i,  i")
+	     (match_operand 8 "const_int_operand"        "  i,  i,  i,  i")
+	     (match_operand 9 "const_int_operand"        "  i,  i,  i,  i")
+	     (match_operand 10 "const_int_operand"       "  i,  i,  i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)
+	     (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
+	  (plus_minus:V_VLSF
+	    (mult:V_VLSF
+	      (vec_duplicate:V_VLSF
+	        (match_operand:<VEL> 3 "register_operand"    "  f,  f,  f,  f"))
+	      (match_operand:V_VLSF 4 "register_operand"     "  0, vr,  0, vr"))
+	    (match_operand:V_VLSF 5 "register_operand"       " vr,  0, vr,  0"))
+	  (match_operand:V_VLSF 2 "vector_undef_operand")))]
+  "TARGET_VECTOR"
+  "@
+   vf<madd_msub>.vf\t%0,%3,%5%p1
+   vf<macc_msac>.vf\t%0,%3,%4%p1
+   vf<madd_msub>.vf\t%0,%3,%5%p1
+   vf<macc_msac>.vf\t%0,%3,%4%p1"
+  [(set_attr "type" "vfmuladd")
+   (set_attr "mode" "<MODE>")
+   (set (attr "frm_mode")
+	(symbol_ref "riscv_vector::get_frm_mode (operands[10])"))])
 
 (define_insn "*pred_<madd_msub><mode>_scalar"
-  [(set (match_operand:V_VLSF 0 "register_operand"            "=vd, ?&vd, vr, ?&vr")
+  [(set (match_operand:V_VLSF 0 "register_operand"            "=vd, vr")
 	(if_then_else:V_VLSF
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand"  " vm,   vm,Wc1,  Wc1")
-	     (match_operand 5 "vector_length_operand"     " rK,   rK, rK,   rK")
-	     (match_operand 6 "const_int_operand"         "  i,    i,  i,    i")
-	     (match_operand 7 "const_int_operand"         "  i,    i,  i,    i")
-	     (match_operand 8 "const_int_operand"         "  i,    i,  i,    i")
-	     (match_operand 9 "const_int_operand"         "  i,    i,  i,    i")
+	    [(match_operand:<VM> 1 "vector_mask_operand"  " vm,Wc1")
+	     (match_operand 5 "vector_length_operand"     "rvl,rvl")
+	     (match_operand 6 "const_int_operand"         "  i,  i")
+	     (match_operand 7 "const_int_operand"         "  i,  i")
+	     (match_operand 8 "const_int_operand"         "  i,  i")
+	     (match_operand 9 "const_int_operand"         "  i,  i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)
 	     (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
 	  (plus_minus:V_VLSF
 	    (mult:V_VLSF
 	      (vec_duplicate:V_VLSF
-	        (match_operand:<VEL> 2 "register_operand" "  f,  f,    f,    f"))
-	      (match_operand:V_VLSF 3 "register_operand"      "  0, vr,    0,   vr"))
-	    (match_operand:V_VLSF 4 "register_operand"        " vr, vr,   vr,   vr"))
+	        (match_operand:<VEL> 2 "register_operand" "  f,    f"))
+	      (match_operand:V_VLSF 3 "register_operand"      "  0,    0"))
+	    (match_operand:V_VLSF 4 "register_operand"        " vr,   vr"))
 	  (match_dup 3)))]
   "TARGET_VECTOR"
   "@
    vf<madd_msub>.vf\t%0,%2,%4%p1
-   vmv%m3r.v\t%0,%3\;vf<madd_msub>.vf\t%0,%2,%4%p1
-   vf<madd_msub>.vf\t%0,%2,%4%p1
-   vmv%m3r.v\t%0,%3\;vf<madd_msub>.vf\t%0,%2,%4%p1"
+   vf<madd_msub>.vf\t%0,%2,%4%p1"
   [(set_attr "type" "vfmuladd")
    (set_attr "mode" "<MODE>")
    (set_attr "merge_op_idx" "3")
@@ -6568,31 +6703,29 @@
 	(symbol_ref "riscv_vector::get_frm_mode (operands[9])"))])
 
 (define_insn "*pred_<macc_msac><mode>_scalar"
-  [(set (match_operand:V_VLSF 0 "register_operand"            "=vd, ?&vd, vr, ?&vr")
+  [(set (match_operand:V_VLSF 0 "register_operand"            "=vd, vr")
 	(if_then_else:V_VLSF
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand"  " vm,   vm,Wc1,  Wc1")
-	     (match_operand 5 "vector_length_operand"     " rK,   rK, rK,   rK")
-	     (match_operand 6 "const_int_operand"         "  i,    i,  i,    i")
-	     (match_operand 7 "const_int_operand"         "  i,    i,  i,    i")
-	     (match_operand 8 "const_int_operand"         "  i,    i,  i,    i")
-	     (match_operand 9 "const_int_operand"         "  i,    i,  i,    i")
+	    [(match_operand:<VM> 1 "vector_mask_operand"  " vm,Wc1")
+	     (match_operand 5 "vector_length_operand"     "rvl,rvl")
+	     (match_operand 6 "const_int_operand"         "  i,  i")
+	     (match_operand 7 "const_int_operand"         "  i,  i")
+	     (match_operand 8 "const_int_operand"         "  i,  i")
+	     (match_operand 9 "const_int_operand"         "  i,  i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)
 	     (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
 	  (plus_minus:V_VLSF
 	    (mult:V_VLSF
 	      (vec_duplicate:V_VLSF
-	        (match_operand:<VEL> 2 "register_operand" "  f,  f,    f,    f"))
-	      (match_operand:V_VLSF 3 "register_operand"      " vr, vr,   vr,   vr"))
-	    (match_operand:V_VLSF 4 "register_operand"        "  0, vr,    0,   vr"))
+	        (match_operand:<VEL> 2 "register_operand" "  f,    f"))
+	      (match_operand:V_VLSF 3 "register_operand"      "vr,   vr"))
+	    (match_operand:V_VLSF 4 "register_operand"        " 0,    0"))
 	  (match_dup 4)))]
   "TARGET_VECTOR"
   "@
    vf<macc_msac>.vf\t%0,%2,%3%p1
-   vmv%m4r.v\t%0,%4\;vf<macc_msac>.vf\t%0,%2,%3%p1
-   vf<macc_msac>.vf\t%0,%2,%3%p1
-   vmv%m4r.v\t%0,%4\;vf<macc_msac>.vf\t%0,%2,%3%p1"
+   vf<macc_msac>.vf\t%0,%2,%3%p1"
   [(set_attr "type" "vfmuladd")
    (set_attr "mode" "<MODE>")
    (set_attr "merge_op_idx" "4")
@@ -6629,64 +6762,60 @@
 })
 
 (define_insn "*pred_mul_neg_<optab><mode>_undef"
-  [(set (match_operand:V_VLSF 0 "register_operand"           "=vd,vd,?&vd, vr, vr,?&vr")
+  [(set (match_operand:V_VLSF 0 "register_operand"           "=vd,vd,vr, vr")
 	(if_then_else:V_VLSF
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand" " vm,vm,  vm,Wc1,Wc1, Wc1")
-	     (match_operand 6 "vector_length_operand"    " rK,rK,  rK, rK, rK,  rK")
-	     (match_operand 7 "const_int_operand"        "  i, i,   i,  i,  i,   i")
-	     (match_operand 8 "const_int_operand"        "  i, i,   i,  i,  i,   i")
-	     (match_operand 9 "const_int_operand"        "  i, i,   i,  i,  i,   i")
-	     (match_operand 10 "const_int_operand"       "  i, i,   i,  i,  i,   i")
+	    [(match_operand:<VM> 1 "vector_mask_operand" " vm,vm,Wc1,Wc1")
+	     (match_operand 6 "vector_length_operand"    "rvl,rvl,rvl,rvl")
+	     (match_operand 7 "const_int_operand"        "  i, i,  i,  i")
+	     (match_operand 8 "const_int_operand"        "  i, i,  i,  i")
+	     (match_operand 9 "const_int_operand"        "  i, i,  i,  i")
+	     (match_operand 10 "const_int_operand"       "  i, i,  i,  i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)
 	     (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
 	  (plus_minus:V_VLSF
             (neg:V_VLSF
 	      (mult:V_VLSF
-	        (match_operand:V_VLSF 3 "register_operand"   "  0,vr,  vr,  0, vr,  vr")
-	        (match_operand:V_VLSF 4 "register_operand"   " vr,vr,  vr, vr, vr,  vr")))
-	    (match_operand:V_VLSF 5 "register_operand"       " vr, 0,  vr, vr,  0,  vr"))
+	        (match_operand:V_VLSF 3 "register_operand"   "  0,vr,  0, vr")
+	        (match_operand:V_VLSF 4 "register_operand"   " vr,vr, vr, vr")))
+	    (match_operand:V_VLSF 5 "register_operand"       " vr, 0, vr,  0"))
 	  (match_operand:V_VLSF 2 "vector_undef_operand")))]
   "TARGET_VECTOR"
   "@
    vf<nmsub_nmadd>.vv\t%0,%4,%5%p1
    vf<nmsac_nmacc>.vv\t%0,%3,%4%p1
-   vmv%m3r.v\t%0,%3\;vf<nmsub_nmadd>.vv\t%0,%4,%5%p1
    vf<nmsub_nmadd>.vv\t%0,%4,%5%p1
-   vf<nmsac_nmacc>.vv\t%0,%3,%4%p1
-   vmv%m3r.v\t%0,%3\;vf<nmsub_nmadd>.vv\t%0,%4,%5%p1"
+   vf<nmsac_nmacc>.vv\t%0,%3,%4%p1"
   [(set_attr "type" "vfmuladd")
    (set_attr "mode" "<MODE>")
    (set (attr "frm_mode")
 	(symbol_ref "riscv_vector::get_frm_mode (operands[10])"))])
 
 (define_insn "*pred_<nmsub_nmadd><mode>"
-  [(set (match_operand:V_VLSF 0 "register_operand"           "=vd, ?&vd, vr, ?&vr")
+  [(set (match_operand:V_VLSF 0 "register_operand"           "=vd, vr")
 	(if_then_else:V_VLSF
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand" " vm,   vm,Wc1,  Wc1")
-	     (match_operand 5 "vector_length_operand"    " rK,   rK, rK,   rK")
-	     (match_operand 6 "const_int_operand"        "  i,    i,  i,    i")
-	     (match_operand 7 "const_int_operand"        "  i,    i,  i,    i")
-	     (match_operand 8 "const_int_operand"        "  i,    i,  i,    i")
-	     (match_operand 9 "const_int_operand"        "  i,    i,  i,    i")
+	    [(match_operand:<VM> 1 "vector_mask_operand" " vm,Wc1")
+	     (match_operand 5 "vector_length_operand"    "rvl,rvl")
+	     (match_operand 6 "const_int_operand"        "  i,  i")
+	     (match_operand 7 "const_int_operand"        "  i,  i")
+	     (match_operand 8 "const_int_operand"        "  i,  i")
+	     (match_operand 9 "const_int_operand"        "  i,  i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)
 	     (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
 	  (plus_minus:V_VLSF
 	    (neg:V_VLSF
 	      (mult:V_VLSF
-	        (match_operand:V_VLSF 2 "register_operand"   "  0,   vr,  0,   vr")
-	        (match_operand:V_VLSF 3 "register_operand"   " vr,   vr, vr,   vr")))
-	    (match_operand:V_VLSF 4 "register_operand"       " vr,   vr, vr,   vr"))
+	        (match_operand:V_VLSF 2 "register_operand"   "  0,  0")
+	        (match_operand:V_VLSF 3 "register_operand"   " vr, vr")))
+	    (match_operand:V_VLSF 4 "register_operand"       " vr, vr"))
 	  (match_dup 2)))]
   "TARGET_VECTOR"
   "@
    vf<nmsub_nmadd>.vv\t%0,%3,%4%p1
-   vmv%m2r.v\t%0,%2\;vf<nmsub_nmadd>.vv\t%0,%3,%4%p1
-   vf<nmsub_nmadd>.vv\t%0,%3,%4%p1
-   vmv%m2r.v\t%0,%2\;vf<nmsub_nmadd>.vv\t%0,%3,%4%p1"
+   vf<nmsub_nmadd>.vv\t%0,%3,%4%p1"
   [(set_attr "type" "vfmuladd")
    (set_attr "mode" "<MODE>")
    (set_attr "merge_op_idx" "2")
@@ -6698,31 +6827,29 @@
 	(symbol_ref "riscv_vector::get_frm_mode (operands[9])"))])
 
 (define_insn "*pred_<nmsac_nmacc><mode>"
-  [(set (match_operand:V_VLSF 0 "register_operand"           "=vd, ?&vd, vr, ?&vr")
+  [(set (match_operand:V_VLSF 0 "register_operand"           "=vd, vr")
 	(if_then_else:V_VLSF
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand" " vm,   vm,Wc1,  Wc1")
-	     (match_operand 5 "vector_length_operand"    " rK,   rK, rK,   rK")
-	     (match_operand 6 "const_int_operand"        "  i,    i,  i,    i")
-	     (match_operand 7 "const_int_operand"        "  i,    i,  i,    i")
-	     (match_operand 8 "const_int_operand"        "  i,    i,  i,    i")
-	     (match_operand 9 "const_int_operand"        "  i,    i,  i,    i")
+	    [(match_operand:<VM> 1 "vector_mask_operand" " vm,Wc1")
+	     (match_operand 5 "vector_length_operand"    "rvl,rvl")
+	     (match_operand 6 "const_int_operand"        "  i,  i")
+	     (match_operand 7 "const_int_operand"        "  i,  i")
+	     (match_operand 8 "const_int_operand"        "  i,  i")
+	     (match_operand 9 "const_int_operand"        "  i,  i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)
 	     (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
 	  (plus_minus:V_VLSF
 	    (neg:V_VLSF
 	      (mult:V_VLSF
-	        (match_operand:V_VLSF 2 "register_operand"   " vr,   vr, vr,   vr")
-	        (match_operand:V_VLSF 3 "register_operand"   " vr,   vr, vr,   vr")))
-	    (match_operand:V_VLSF 4 "register_operand"       "  0,   vr,  0,   vr"))
+	        (match_operand:V_VLSF 2 "register_operand"   " vr, vr")
+	        (match_operand:V_VLSF 3 "register_operand"   " vr, vr")))
+	    (match_operand:V_VLSF 4 "register_operand"       "  0,  0"))
 	  (match_dup 4)))]
   "TARGET_VECTOR"
   "@
    vf<nmsac_nmacc>.vv\t%0,%2,%3%p1
-   vmv%m4r.v\t%0,%4\;vf<nmsac_nmacc>.vv\t%0,%2,%3%p1
-   vf<nmsac_nmacc>.vv\t%0,%2,%3%p1
-   vmv%m4r.v\t%0,%4\;vf<nmsac_nmacc>.vv\t%0,%2,%3%p1"
+   vf<nmsac_nmacc>.vv\t%0,%2,%3%p1"
   [(set_attr "type" "vfmuladd")
    (set_attr "mode" "<MODE>")
    (set_attr "merge_op_idx" "4")
@@ -6753,20 +6880,54 @@
 	          (match_operand:<VEL> 2 "register_operand"))
 	        (match_operand:V_VLSF 3 "register_operand")))
 	    (match_operand:V_VLSF 4 "register_operand"))
-	  (match_operand:V_VLSF 5 "register_operand")))]
+	  (match_operand:V_VLSF 5 "vector_merge_operand")))]
   "TARGET_VECTOR"
-{})
+{
+  riscv_vector::prepare_ternary_operands (operands);
+})
+
+(define_insn "*pred_mul_neg_<optab><mode>_scalar_undef"
+  [(set (match_operand:V_VLSF 0 "register_operand"           "=vd,vd, vr, vr")
+	(if_then_else:V_VLSF
+	  (unspec:<VM>
+	    [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
+	     (match_operand 6 "vector_length_operand"    "rvl,rvl,rvl,rvl")
+	     (match_operand 7 "const_int_operand"        "  i,  i,  i,  i")
+	     (match_operand 8 "const_int_operand"        "  i,  i,  i,  i")
+	     (match_operand 9 "const_int_operand"        "  i,  i,  i,  i")
+	     (match_operand 10 "const_int_operand"       "  i,  i,  i,  i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)
+	     (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
+	  (plus_minus:V_VLSF
+	    (neg:V_VLSF
+	      (mult:V_VLSF
+		(vec_duplicate:V_VLSF
+		  (match_operand:<VEL> 3 "register_operand"  "  f,  f,  f,  f"))
+		(match_operand:V_VLSF 4 "register_operand"   "  0, vr,  0, vr")))
+	    (match_operand:V_VLSF 5 "register_operand"       " vr,  0, vr,  0"))
+	  (match_operand:V_VLSF 2 "vector_undef_operand")))]
+  "TARGET_VECTOR"
+  "@
+   vf<nmsub_nmadd>.vf\t%0,%3,%5%p1
+   vf<nmsac_nmacc>.vf\t%0,%3,%4%p1
+   vf<nmsub_nmadd>.vf\t%0,%3,%5%p1
+   vf<nmsac_nmacc>.vf\t%0,%3,%4%p1"
+  [(set_attr "type" "vfmuladd")
+   (set_attr "mode" "<MODE>")
+   (set (attr "frm_mode")
+	(symbol_ref "riscv_vector::get_frm_mode (operands[10])"))])
 
 (define_insn "*pred_<nmsub_nmadd><mode>_scalar"
-  [(set (match_operand:V_VLSF 0 "register_operand"            "=vd, ?&vd, vr, ?&vr")
+  [(set (match_operand:V_VLSF 0 "register_operand"            "=vd, vr")
 	(if_then_else:V_VLSF
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand"  " vm,   vm,Wc1,  Wc1")
-	     (match_operand 5 "vector_length_operand"     " rK,   rK, rK,   rK")
-	     (match_operand 6 "const_int_operand"         "  i,    i,  i,    i")
-	     (match_operand 7 "const_int_operand"         "  i,    i,  i,    i")
-	     (match_operand 8 "const_int_operand"         "  i,    i,  i,    i")
-	     (match_operand 9 "const_int_operand"         "  i,    i,  i,    i")
+	    [(match_operand:<VM> 1 "vector_mask_operand"  " vm,Wc1")
+	     (match_operand 5 "vector_length_operand"     "rvl,rvl")
+	     (match_operand 6 "const_int_operand"         "  i,  i")
+	     (match_operand 7 "const_int_operand"         "  i,  i")
+	     (match_operand 8 "const_int_operand"         "  i,  i")
+	     (match_operand 9 "const_int_operand"         "  i,  i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)
 	     (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
@@ -6774,16 +6935,14 @@
 	    (neg:V_VLSF
 	      (mult:V_VLSF
 	        (vec_duplicate:V_VLSF
-	          (match_operand:<VEL> 2 "register_operand" "  f,    f,  f,    f"))
-	        (match_operand:V_VLSF 3 "register_operand"      "  0,   vr,  0,   vr")))
-	    (match_operand:V_VLSF 4 "register_operand"          " vr,   vr, vr,   vr"))
+	          (match_operand:<VEL> 2 "register_operand" "  f,  f"))
+	        (match_operand:V_VLSF 3 "register_operand"      "  0,  0")))
+	    (match_operand:V_VLSF 4 "register_operand"          " vr, vr"))
 	  (match_dup 3)))]
   "TARGET_VECTOR"
   "@
    vf<nmsub_nmadd>.vf\t%0,%2,%4%p1
-   vmv%m3r.v\t%0,%3\;vf<nmsub_nmadd>.vf\t%0,%2,%4%p1
-   vf<nmsub_nmadd>.vf\t%0,%2,%4%p1
-   vmv%m3r.v\t%0,%3\;vf<nmsub_nmadd>.vf\t%0,%2,%4%p1"
+   vf<nmsub_nmadd>.vf\t%0,%2,%4%p1"
   [(set_attr "type" "vfmuladd")
    (set_attr "mode" "<MODE>")
    (set_attr "merge_op_idx" "3")
@@ -6795,15 +6954,15 @@
 	(symbol_ref "riscv_vector::get_frm_mode (operands[9])"))])
 
 (define_insn "*pred_<nmsac_nmacc><mode>_scalar"
-  [(set (match_operand:V_VLSF 0 "register_operand"              "=vd, ?&vd, vr, ?&vr")
+  [(set (match_operand:V_VLSF 0 "register_operand"              "=vd, vr")
 	(if_then_else:V_VLSF
 	  (unspec:<VM>
-	    [(match_operand:<VM> 1 "vector_mask_operand"    " vm,   vm,Wc1,  Wc1")
-	     (match_operand 5 "vector_length_operand"       " rK,   rK, rK,   rK")
-	     (match_operand 6 "const_int_operand"           "  i,    i,  i,    i")
-	     (match_operand 7 "const_int_operand"           "  i,    i,  i,    i")
-	     (match_operand 8 "const_int_operand"           "  i,    i,  i,    i")
-	     (match_operand 9 "const_int_operand"           "  i,    i,  i,    i")
+	    [(match_operand:<VM> 1 "vector_mask_operand"    " vm,Wc1")
+	     (match_operand 5 "vector_length_operand"       "rvl,rvl")
+	     (match_operand 6 "const_int_operand"           "  i,  i")
+	     (match_operand 7 "const_int_operand"           "  i,  i")
+	     (match_operand 8 "const_int_operand"           "  i,  i")
+	     (match_operand 9 "const_int_operand"           "  i,  i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)
 	     (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
@@ -6811,16 +6970,14 @@
 	    (neg:V_VLSF
 	      (mult:V_VLSF
 	        (vec_duplicate:V_VLSF
-	          (match_operand:<VEL> 2 "register_operand" "  f,    f,  f,    f"))
-	        (match_operand:V_VLSF 3 "register_operand"      " vr,   vr, vr,   vr")))
-	    (match_operand:V_VLSF 4 "register_operand"          "  0,   vr,  0,   vr"))
+	          (match_operand:<VEL> 2 "register_operand" "  f,  f"))
+	        (match_operand:V_VLSF 3 "register_operand"      " vr, vr")))
+	    (match_operand:V_VLSF 4 "register_operand"          "  0,  0"))
 	  (match_dup 4)))]
   "TARGET_VECTOR"
   "@
    vf<nmsac_nmacc>.vf\t%0,%2,%3%p1
-   vmv%m4r.v\t%0,%4\;vf<nmsac_nmacc>.vf\t%0,%2,%3%p1
-   vf<nmsac_nmacc>.vf\t%0,%2,%3%p1
-   vmv%m4r.v\t%0,%4\;vf<nmsac_nmacc>.vf\t%0,%2,%3%p1"
+   vf<nmsac_nmacc>.vf\t%0,%2,%3%p1"
   [(set_attr "type" "vfmuladd")
    (set_attr "mode" "<MODE>")
    (set_attr "merge_op_idx" "4")
@@ -6847,7 +7004,7 @@
 	(if_then_else:V_VLSF
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
-	     (match_operand 4 "vector_length_operand"    " rK, rK, rK, rK")
+	     (match_operand 4 "vector_length_operand"    "rvl,rvl,rvl,rvl")
 	     (match_operand 5 "const_int_operand"        "  i,  i,  i,  i")
 	     (match_operand 6 "const_int_operand"        "  i,  i,  i,  i")
 	     (match_operand 7 "const_int_operand"        "  i,  i,  i,  i")
@@ -6874,7 +7031,7 @@
 	(if_then_else:V_VLSF
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
-	     (match_operand 4 "vector_length_operand"    " rK, rK, rK, rK")
+	     (match_operand 4 "vector_length_operand"    "rvl,rvl,rvl,rvl")
 	     (match_operand 5 "const_int_operand"        "  i,  i,  i,  i")
 	     (match_operand 6 "const_int_operand"        "  i,  i,  i,  i")
 	     (match_operand 7 "const_int_operand"        "  i,  i,  i,  i")
@@ -6897,7 +7054,7 @@
 	(if_then_else:VF
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
-	     (match_operand 4 "vector_length_operand"    " rK, rK, rK, rK")
+	     (match_operand 4 "vector_length_operand"    "rvl,rvl,rvl,rvl")
 	     (match_operand 5 "const_int_operand"        "  i,  i,  i,  i")
 	     (match_operand 6 "const_int_operand"        "  i,  i,  i,  i")
 	     (match_operand 7 "const_int_operand"        "  i,  i,  i,  i")
@@ -6916,7 +7073,7 @@
 	(if_then_else:VF
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
-	     (match_operand 4 "vector_length_operand"    " rK, rK, rK, rK")
+	     (match_operand 4 "vector_length_operand"    "rvl,rvl,rvl,rvl")
 	     (match_operand 5 "const_int_operand"        "  i,  i,  i,  i")
 	     (match_operand 6 "const_int_operand"        "  i,  i,  i,  i")
 	     (match_operand 7 "const_int_operand"        "  i,  i,  i,  i")
@@ -6939,7 +7096,7 @@
 	(if_then_else:<VCONVERT>
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"     " vm, vm,Wc1,Wc1")
-	     (match_operand 4 "vector_length_operand"        " rK, rK, rK, rK")
+	     (match_operand 4 "vector_length_operand"        "rvl,rvl,rvl,rvl")
 	     (match_operand 5 "const_int_operand"            "  i,  i,  i,  i")
 	     (match_operand 6 "const_int_operand"            "  i,  i,  i,  i")
 	     (match_operand 7 "const_int_operand"            "  i,  i,  i,  i")
@@ -6967,7 +7124,7 @@
 	(if_then_else:VWEXTF
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"           "vmWc1,vmWc1")
-	     (match_operand 5 "vector_length_operand"              "   rK,   rK")
+	     (match_operand 5 "vector_length_operand"              "  rvl,  rvl")
 	     (match_operand 6 "const_int_operand"                  "    i,    i")
 	     (match_operand 7 "const_int_operand"                  "    i,    i")
 	     (match_operand 8 "const_int_operand"                  "    i,    i")
@@ -6993,7 +7150,7 @@
 	(if_then_else:VWEXTF
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"           "vmWc1,vmWc1")
-	     (match_operand 5 "vector_length_operand"              "   rK,   rK")
+	     (match_operand 5 "vector_length_operand"              "  rvl,  rvl")
 	     (match_operand 6 "const_int_operand"                  "    i,    i")
 	     (match_operand 7 "const_int_operand"                  "    i,    i")
 	     (match_operand 8 "const_int_operand"                  "    i,    i")
@@ -7020,7 +7177,7 @@
 	(if_then_else:VWEXTF
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"           "vmWc1,vmWc1")
-	     (match_operand 5 "vector_length_operand"              "   rK,   rK")
+	     (match_operand 5 "vector_length_operand"              "  rvl,  rvl")
 	     (match_operand 6 "const_int_operand"                  "    i,    i")
 	     (match_operand 7 "const_int_operand"                  "    i,    i")
 	     (match_operand 8 "const_int_operand"                  "    i,    i")
@@ -7045,7 +7202,7 @@
 	(if_then_else:VWEXTF
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"           "vmWc1,vmWc1")
-	     (match_operand 5 "vector_length_operand"              "   rK,   rK")
+	     (match_operand 5 "vector_length_operand"              "  rvl,  rvl")
 	     (match_operand 6 "const_int_operand"                  "    i,    i")
 	     (match_operand 7 "const_int_operand"                  "    i,    i")
 	     (match_operand 8 "const_int_operand"                  "    i,    i")
@@ -7070,7 +7227,7 @@
 	(if_then_else:VWEXTF
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"          " vm, vm,Wc1,Wc1")
-	     (match_operand 5 "vector_length_operand"             " rK, rK, rK, rK")
+	     (match_operand 5 "vector_length_operand"             "rvl,rvl,rvl,rvl")
 	     (match_operand 6 "const_int_operand"                 "  i,  i,  i,  i")
 	     (match_operand 7 "const_int_operand"                 "  i,  i,  i,  i")
 	     (match_operand 8 "const_int_operand"                 "  i,  i,  i,  i")
@@ -7103,7 +7260,7 @@
 	(if_then_else:VWEXTF
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"             "vmWc1")
-	     (match_operand 5 "vector_length_operand"                "   rK")
+	     (match_operand 5 "vector_length_operand"                "  rvl")
 	     (match_operand 6 "const_int_operand"                    "    i")
 	     (match_operand 7 "const_int_operand"                    "    i")
 	     (match_operand 8 "const_int_operand"                    "    i")
@@ -7131,7 +7288,7 @@
 	(if_then_else:VWEXTF
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"             "vmWc1")
-	     (match_operand 5 "vector_length_operand"                "   rK")
+	     (match_operand 5 "vector_length_operand"                "  rvl")
 	     (match_operand 6 "const_int_operand"                    "    i")
 	     (match_operand 7 "const_int_operand"                    "    i")
 	     (match_operand 8 "const_int_operand"                    "    i")
@@ -7160,7 +7317,7 @@
 	(if_then_else:VWEXTF
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"               "vmWc1")
-	     (match_operand 5 "vector_length_operand"                  "   rK")
+	     (match_operand 5 "vector_length_operand"                  "  rvl")
 	     (match_operand 6 "const_int_operand"                      "    i")
 	     (match_operand 7 "const_int_operand"                      "    i")
 	     (match_operand 8 "const_int_operand"                      "    i")
@@ -7189,7 +7346,7 @@
 	(if_then_else:VWEXTF
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"               "vmWc1")
-	     (match_operand 5 "vector_length_operand"                  "   rK")
+	     (match_operand 5 "vector_length_operand"                  "  rvl")
 	     (match_operand 6 "const_int_operand"                      "    i")
 	     (match_operand 7 "const_int_operand"                      "    i")
 	     (match_operand 8 "const_int_operand"                      "    i")
@@ -7244,7 +7401,7 @@
 	(if_then_else:<VM>
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1,vmWc1,vmWc1")
-	     (match_operand 6 "vector_length_operand"         "   rK,   rK,   rK,   rK")
+	     (match_operand 6 "vector_length_operand"         "  rvl,  rvl,  rvl,  rvl")
 	     (match_operand 7 "const_int_operand"             "    i,    i,    i,    i")
 	     (match_operand 8 "const_int_operand"             "    i,    i,    i,    i")
 	     (reg:SI VL_REGNUM)
@@ -7264,7 +7421,7 @@
 	(if_then_else:<VM>
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "register_operand"          "  0")
-	     (match_operand 5 "vector_length_operand"          " rK")
+	     (match_operand 5 "vector_length_operand"          "rvl")
 	     (match_operand 6 "const_int_operand"              "  i")
 	     (match_operand 7 "const_int_operand"              "  i")
 	     (reg:SI VL_REGNUM)
@@ -7288,7 +7445,7 @@
 	(if_then_else:<VM>
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"      "    0,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1,vmWc1")
-	     (match_operand 6 "vector_length_operand"         "   rK,   rK,   rK,   rK,   rK,   rK,   rK,   rK,   rK")
+	     (match_operand 6 "vector_length_operand"         "  rvl,  rvl,  rvl,  rvl,  rvl,  rvl,  rvl,  rvl,  rvl")
 	     (match_operand 7 "const_int_operand"             "    i,    i,    i,    i,    i,    i,    i,    i,    i")
 	     (match_operand 8 "const_int_operand"             "    i,    i,    i,    i,    i,    i,    i,    i,    i")
 	     (reg:SI VL_REGNUM)
@@ -7326,7 +7483,7 @@
 	(if_then_else:<VM>
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "register_operand"         "  0")
-	     (match_operand 5 "vector_length_operand"         " rK")
+	     (match_operand 5 "vector_length_operand"         "rvl")
 	     (match_operand 6 "const_int_operand"             "  i")
 	     (match_operand 7 "const_int_operand"             "  i")
 	     (reg:SI VL_REGNUM)
@@ -7351,7 +7508,7 @@
 	(if_then_else:<VM>
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1,vmWc1,vmWc1")
-	     (match_operand 6 "vector_length_operand"         "   rK,   rK,   rK,   rK")
+	     (match_operand 6 "vector_length_operand"         "  rvl,  rvl,  rvl,  rvl")
 	     (match_operand 7 "const_int_operand"             "    i,    i,    i,    i")
 	     (match_operand 8 "const_int_operand"             "    i,    i,    i,    i")
 	     (reg:SI VL_REGNUM)
@@ -7373,7 +7530,7 @@
 	(if_then_else:<VM>
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"      "    0,vmWc1,vmWc1,vmWc1,vmWc1")
-	     (match_operand 6 "vector_length_operand"         "   rK,   rK,   rK,   rK,   rK")
+	     (match_operand 6 "vector_length_operand"         "  rvl,  rvl,  rvl,  rvl,  rvl")
 	     (match_operand 7 "const_int_operand"             "    i,    i,    i,    i,    i")
 	     (match_operand 8 "const_int_operand"             "    i,    i,    i,    i,    i")
 	     (reg:SI VL_REGNUM)
@@ -7400,7 +7557,7 @@
   [(set (match_operand:V_VLSF 0 "register_operand"      "=vd,vd")
     (if_then_else:V_VLSF
       (unspec:<VM>
-        [(match_operand 5 "vector_length_operand"   " rK,rK")
+        [(match_operand 5 "vector_length_operand"   "rvl,rvl")
          (match_operand 6 "const_int_operand"       "  i, i")
          (match_operand 7 "const_int_operand"       "  i, i")
          (reg:SI VL_REGNUM)
@@ -7428,7 +7585,7 @@
 	(if_then_else:<VCONVERT>
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"     " vm, vm,Wc1,Wc1")
-	     (match_operand 4 "vector_length_operand"        " rK, rK, rK, rK")
+	     (match_operand 4 "vector_length_operand"        "rvl,rvl,rvl,rvl")
 	     (match_operand 5 "const_int_operand"            "  i,  i,  i,  i")
 	     (match_operand 6 "const_int_operand"            "  i,  i,  i,  i")
 	     (match_operand 7 "const_int_operand"            "  i,  i,  i,  i")
@@ -7451,7 +7608,7 @@
 	(if_then_else:<VCONVERT>
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"     " vm, vm,Wc1,Wc1")
-	     (match_operand 4 "vector_length_operand"        " rK, rK, rK, rK")
+	     (match_operand 4 "vector_length_operand"        "rvl,rvl,rvl,rvl")
 	     (match_operand 5 "const_int_operand"            "  i,  i,  i,  i")
 	     (match_operand 6 "const_int_operand"            "  i,  i,  i,  i")
 	     (match_operand 7 "const_int_operand"            "  i,  i,  i,  i")
@@ -7470,7 +7627,7 @@
 	(if_then_else:V_VLSF
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"    " vm, vm,Wc1,Wc1")
-	     (match_operand 4 "vector_length_operand"       " rK, rK, rK, rK")
+	     (match_operand 4 "vector_length_operand"       "rvl,rvl,rvl,rvl")
 	     (match_operand 5 "const_int_operand"           "  i,  i,  i,  i")
 	     (match_operand 6 "const_int_operand"           "  i,  i,  i,  i")
 	     (match_operand 7 "const_int_operand"           "  i,  i,  i,  i")
@@ -7500,7 +7657,7 @@
 	(if_then_else:VWCONVERTI
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1")
-	     (match_operand 4 "vector_length_operand"         "   rK,   rK")
+	     (match_operand 4 "vector_length_operand"         "  rvl,  rvl")
 	     (match_operand 5 "const_int_operand"             "    i,    i")
 	     (match_operand 6 "const_int_operand"             "    i,    i")
 	     (match_operand 7 "const_int_operand"             "    i,    i")
@@ -7523,7 +7680,7 @@
 	(if_then_else:VWCONVERTI
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"     "vmWc1,vmWc1")
-	     (match_operand 4 "vector_length_operand"        "   rK,   rK")
+	     (match_operand 4 "vector_length_operand"        "  rvl,  rvl")
 	     (match_operand 5 "const_int_operand"            "    i,    i")
 	     (match_operand 6 "const_int_operand"            "    i,    i")
 	     (match_operand 7 "const_int_operand"            "    i,    i")
@@ -7542,7 +7699,7 @@
 	(if_then_else:V_VLSF
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"     "vmWc1,vmWc1")
-	     (match_operand 4 "vector_length_operand"        "   rK,   rK")
+	     (match_operand 4 "vector_length_operand"        "  rvl,  rvl")
 	     (match_operand 5 "const_int_operand"            "    i,    i")
 	     (match_operand 6 "const_int_operand"            "    i,    i")
 	     (match_operand 7 "const_int_operand"            "    i,    i")
@@ -7561,7 +7718,7 @@
 	(if_then_else:VWEXTF_ZVFHMIN
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"          "vmWc1,vmWc1")
-	     (match_operand 4 "vector_length_operand"             "   rK,   rK")
+	     (match_operand 4 "vector_length_operand"             "  rvl,  rvl")
 	     (match_operand 5 "const_int_operand"                 "    i,    i")
 	     (match_operand 6 "const_int_operand"                 "    i,    i")
 	     (match_operand 7 "const_int_operand"                 "    i,    i")
@@ -7587,7 +7744,7 @@
 	(if_then_else:<VNCONVERT>
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"       " vm, vm,Wc1,Wc1,vmWc1,vmWc1")
-	     (match_operand 4 "vector_length_operand"          " rK, rK, rK, rK,   rK,   rK")
+	     (match_operand 4 "vector_length_operand"          "rvl,rvl,rvl,rvl,  rvl,  rvl")
 	     (match_operand 5 "const_int_operand"              "  i,  i,  i,  i,    i,    i")
 	     (match_operand 6 "const_int_operand"              "  i,  i,  i,  i,    i,    i")
 	     (match_operand 7 "const_int_operand"              "  i,  i,  i,  i,    i,    i")
@@ -7611,7 +7768,7 @@
 	(if_then_else:<VNCONVERT>
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"      " vm, vm,Wc1,Wc1,vmWc1,vmWc1")
-	     (match_operand 4 "vector_length_operand"         " rK, rK, rK, rK,   rK,   rK")
+	     (match_operand 4 "vector_length_operand"         "rvl,rvl,rvl,rvl,  rvl,  rvl")
 	     (match_operand 5 "const_int_operand"             "  i,  i,  i,  i,    i,    i")
 	     (match_operand 6 "const_int_operand"             "  i,  i,  i,  i,    i,    i")
 	     (match_operand 7 "const_int_operand"             "  i,  i,  i,  i,    i,    i")
@@ -7630,7 +7787,7 @@
 	(if_then_else:<VNCONVERT>
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"      " vm, vm,Wc1,Wc1,vmWc1,vmWc1")
-	     (match_operand 4 "vector_length_operand"         " rK, rK, rK, rK,   rK,   rK")
+	     (match_operand 4 "vector_length_operand"         "rvl,rvl,rvl,rvl,  rvl,  rvl")
 	     (match_operand 5 "const_int_operand"             "  i,  i,  i,  i,    i,    i")
 	     (match_operand 6 "const_int_operand"             "  i,  i,  i,  i,    i,    i")
 	     (match_operand 7 "const_int_operand"             "  i,  i,  i,  i,    i,    i")
@@ -7654,7 +7811,7 @@
 	(if_then_else:<V_DOUBLE_TRUNC>
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"           " vm, vm,Wc1,Wc1,vmWc1,vmWc1")
-	     (match_operand 4 "vector_length_operand"              " rK, rK, rK, rK,   rK,   rK")
+	     (match_operand 4 "vector_length_operand"              "rvl,rvl,rvl,rvl,  rvl,  rvl")
 	     (match_operand 5 "const_int_operand"                  "  i,  i,  i,  i,    i,    i")
 	     (match_operand 6 "const_int_operand"                  "  i,  i,  i,  i,    i,    i")
 	     (match_operand 7 "const_int_operand"                  "  i,  i,  i,  i,    i,    i")
@@ -7678,7 +7835,7 @@
 	(if_then_else:<V_DOUBLE_TRUNC>
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"           " vm, vm,Wc1,Wc1,vmWc1,vmWc1")
-	     (match_operand 4 "vector_length_operand"              " rK, rK, rK, rK,   rK,   rK")
+	     (match_operand 4 "vector_length_operand"              "rvl,rvl,rvl,rvl,  rvl,  rvl")
 	     (match_operand 5 "const_int_operand"                  "  i,  i,  i,  i,    i,    i")
 	     (match_operand 6 "const_int_operand"                  "  i,  i,  i,  i,    i,    i")
 	     (match_operand 7 "const_int_operand"                  "  i,  i,  i,  i,    i,    i")
@@ -7702,14 +7859,19 @@
 ;; - 14.3 Vector Single-Width Floating-Point Reduction Instructions
 ;; - 14.4 Vector Widening Floating-Point Reduction Instructions
 ;; -------------------------------------------------------------------------------
+;;
+;; NOTE for VL0 safe variantreduction:
+;;   The VL0 safe variantis used by the auto vectorizer to generate vectorized code
+;;   only, because the auto vectorizer expect reduction should propgat the start
+;;   value to dest even VL=0, the only way is force vd=vs1 by constraint.
 
 ;; Integer Reduction (vred(sum|maxu|max|minu|min|and|or|xor).vs)
-(define_insn "@pred_<reduc_op><mode>"
+(define_insn "@pred_<reduc_op_pat_name><mode>"
   [(set (match_operand:<V_LMUL1>          0 "register_operand"      "=vr,     vr")
 	(unspec:<V_LMUL1>
 	  [(unspec:<VM>
 	    [(match_operand:<VM>          1 "vector_mask_operand"   "vmWc1,vmWc1")
-	     (match_operand               5 "vector_length_operand" "   rK,   rK")
+	     (match_operand               5 "vector_length_operand" "  rvl,  rvl")
 	     (match_operand               6 "const_int_operand"     "    i,    i")
 	     (match_operand               7 "const_int_operand"     "    i,    i")
 	     (reg:SI VL_REGNUM)
@@ -7724,13 +7886,35 @@
   [(set_attr "type" "vired")
    (set_attr "mode" "<MODE>")])
 
+;; Integer Reduction (vred(sum|maxu|max|minu|min|and|or|xor).vs)
+;; but for auto vectorizer (see "NOTE for VL0 safe variantreduction" for detail)
+(define_insn "@pred_<reduc_op_pat_name><mode>"
+  [(set (match_operand:<V_LMUL1>          0 "register_operand"      "=vr")
+	(unspec:<V_LMUL1>
+	  [(unspec:<VM>
+	    [(match_operand:<VM>          1 "vector_mask_operand"   "vmWc1")
+	     (match_operand               5 "vector_length_operand" "  rvl")
+	     (match_operand               6 "const_int_operand"     "    i")
+	     (match_operand               7 "const_int_operand"     "    i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+	   (unspec:<V_LMUL1> [
+             (match_operand:V_VLSI        3 "register_operand"      "   vr")
+             (match_operand:<V_LMUL1>     4 "register_operand"      "    0")
+           ] ANY_REDUC_VL0_SAFE)
+	   (match_operand:<V_LMUL1>       2 "vector_merge_operand"  "   vu")] UNSPEC_REDUC))]
+  "TARGET_VECTOR"
+  "v<reduc_op>.vs\t%0,%3,%4%p1"
+  [(set_attr "type" "vired")
+   (set_attr "mode" "<MODE>")])
+
 ;; Integer Widen Reduction Sum (vwredsum[u].vs)
-(define_insn "@pred_<reduc_op><mode>"
+(define_insn "@pred_<reduc_op_pat_name><mode>"
   [(set (match_operand:<V_EXT_LMUL1>       0 "register_operand"        "=vr,   vr")
 	(unspec:<V_EXT_LMUL1>
 	  [(unspec:<VM>
 	    [(match_operand:<VM>           1 "vector_mask_operand"   "vmWc1,vmWc1")
-	     (match_operand                5 "vector_length_operand" "   rK,   rK")
+	     (match_operand                5 "vector_length_operand" "  rvl,  rvl")
 	     (match_operand                6 "const_int_operand"     "    i,    i")
 	     (match_operand                7 "const_int_operand"     "    i,    i")
 	     (reg:SI VL_REGNUM)
@@ -7745,13 +7929,35 @@
   [(set_attr "type" "viwred")
    (set_attr "mode" "<MODE>")])
 
+;; Integer Widen Reduction Sum (vwredsum[u].vs)
+;; but for auto vectorizer (see "NOTE for VL0 safe variantreduction" for detail)
+(define_insn "@pred_<reduc_op_pat_name><mode>"
+  [(set (match_operand:<V_EXT_LMUL1>       0 "register_operand"        "=vr")
+	(unspec:<V_EXT_LMUL1>
+	  [(unspec:<VM>
+	    [(match_operand:<VM>           1 "vector_mask_operand"   "vmWc1")
+	     (match_operand                5 "vector_length_operand" "  rvl")
+	     (match_operand                6 "const_int_operand"     "    i")
+	     (match_operand                7 "const_int_operand"     "    i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+           (unspec:<V_EXT_LMUL1> [
+	     (match_operand:VI_QHS         3 "register_operand"      "   vr")
+	     (match_operand:<V_EXT_LMUL1>  4 "register_operand"      "    0")
+           ] ANY_WREDUC_VL0_SAFE)
+	   (match_operand:<V_EXT_LMUL1>    2 "vector_merge_operand"  "   vu")] UNSPEC_REDUC))]
+  "TARGET_VECTOR"
+  "v<reduc_op>.vs\t%0,%3,%4%p1"
+  [(set_attr "type" "viwred")
+   (set_attr "mode" "<MODE>")])
+
 ;; Float Reduction (vfred(max|min).vs)
-(define_insn "@pred_<reduc_op><mode>"
+(define_insn "@pred_<reduc_op_pat_name><mode>"
   [(set (match_operand:<V_LMUL1>          0 "register_operand"      "=vr,     vr")
 	(unspec:<V_LMUL1>
 	  [(unspec:<VM>
 	    [(match_operand:<VM>          1 "vector_mask_operand"   "vmWc1,vmWc1")
-	     (match_operand               5 "vector_length_operand" "   rK,   rK")
+	     (match_operand               5 "vector_length_operand" "  rvl,  rvl")
 	     (match_operand               6 "const_int_operand"     "    i,    i")
 	     (match_operand               7 "const_int_operand"     "    i,    i")
 	     (reg:SI VL_REGNUM)
@@ -7766,13 +7972,35 @@
   [(set_attr "type" "vfredu")
    (set_attr "mode" "<MODE>")])
 
+;; Float Reduction (vfred(max|min).vs)
+;; but for auto vectorizer (see "NOTE for VL0 safe variantreduction" for detail)
+(define_insn "@pred_<reduc_op_pat_name><mode>"
+  [(set (match_operand:<V_LMUL1>          0 "register_operand"      "=vr")
+	(unspec:<V_LMUL1>
+	  [(unspec:<VM>
+	    [(match_operand:<VM>          1 "vector_mask_operand"   "vmWc1")
+	     (match_operand               5 "vector_length_operand" "  rvl")
+	     (match_operand               6 "const_int_operand"     "    i")
+	     (match_operand               7 "const_int_operand"     "    i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
+           (unspec:<V_LMUL1> [
+             (match_operand:V_VLSF        3 "register_operand"      "   vr")
+             (match_operand:<V_LMUL1>     4 "register_operand"      "    0")
+           ] ANY_FREDUC_VL0_SAFE)
+	   (match_operand:<V_LMUL1>       2 "vector_merge_operand"  "   vu")] UNSPEC_REDUC))]
+  "TARGET_VECTOR"
+  "vf<reduc_op>.vs\t%0,%3,%4%p1"
+  [(set_attr "type" "vfredu")
+   (set_attr "mode" "<MODE>")])
+
 ;; Float Reduction Sum (vfred[ou]sum.vs)
-(define_insn "@pred_<reduc_op><mode>"
+(define_insn "@pred_<reduc_op_pat_name><mode>"
   [(set (match_operand:<V_LMUL1>           0 "register_operand"      "=vr,vr")
 	(unspec:<V_LMUL1>
 	  [(unspec:<VM>
 	    [(match_operand:<VM>          1 "vector_mask_operand"   "vmWc1,vmWc1")
-	     (match_operand               5 "vector_length_operand" "   rK,   rK")
+	     (match_operand               5 "vector_length_operand" "  rvl,  rvl")
 	     (match_operand               6 "const_int_operand"     "    i,    i")
 	     (match_operand               7 "const_int_operand"     "    i,    i")
 	     (match_operand               8 "const_int_operand"     "    i,    i")
@@ -7791,13 +8019,39 @@
    (set (attr "frm_mode")
 	(symbol_ref "riscv_vector::get_frm_mode (operands[8])"))])
 
+;; Float Reduction Sum (vfred[ou]sum.vs)
+;; but for auto vectorizer (see "NOTE for VL0 safe variantreduction" for detail)
+(define_insn "@pred_<reduc_op_pat_name><mode>"
+  [(set (match_operand:<V_LMUL1>           0 "register_operand"      "=vr")
+	(unspec:<V_LMUL1>
+	  [(unspec:<VM>
+	    [(match_operand:<VM>          1 "vector_mask_operand"   "vmWc1")
+	     (match_operand               5 "vector_length_operand" "  rvl")
+	     (match_operand               6 "const_int_operand"     "    i")
+	     (match_operand               7 "const_int_operand"     "    i")
+	     (match_operand               8 "const_int_operand"     "    i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)
+	     (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
+           (unspec:<V_LMUL1> [
+             (match_operand:V_VLSF        3 "register_operand"      "   vr")
+             (match_operand:<V_LMUL1>     4 "register_operand"      "    0")
+           ] ANY_FREDUC_SUM_VL0_SAFE)
+	   (match_operand:<V_LMUL1>       2 "vector_merge_operand"  "   vu")] UNSPEC_REDUC))]
+  "TARGET_VECTOR"
+  "vf<reduc_op>.vs\t%0,%3,%4%p1"
+  [(set_attr "type" "vfred<order>")
+   (set_attr "mode" "<MODE>")
+   (set (attr "frm_mode")
+	(symbol_ref "riscv_vector::get_frm_mode (operands[8])"))])
+
 ;; Float Widen Reduction Sum (vfwred[ou]sum.vs)
-(define_insn "@pred_<reduc_op><mode>"
+(define_insn "@pred_<reduc_op_pat_name><mode>"
   [(set (match_operand:<V_EXT_LMUL1>         0 "register_operand"      "=vr,   vr")
 	(unspec:<V_EXT_LMUL1>
 	  [(unspec:<VM>
 	    [(match_operand:<VM>           1 "vector_mask_operand"   "vmWc1,vmWc1")
-	     (match_operand                5 "vector_length_operand" "   rK,   rK")
+	     (match_operand                5 "vector_length_operand" "  rvl,  rvl")
 	     (match_operand                6 "const_int_operand"     "    i,    i")
 	     (match_operand                7 "const_int_operand"     "    i,    i")
 	     (match_operand                8 "const_int_operand"     "    i,    i")
@@ -7816,6 +8070,32 @@
    (set (attr "frm_mode")
 	(symbol_ref "riscv_vector::get_frm_mode (operands[8])"))])
 
+;; Float Widen Reduction Sum (vfwred[ou]sum.vs)
+;; but for auto vectorizer (see "NOTE for VL0 safe variantreduction" for detail)
+(define_insn "@pred_<reduc_op_pat_name><mode>"
+  [(set (match_operand:<V_EXT_LMUL1>         0 "register_operand"      "=vr")
+	(unspec:<V_EXT_LMUL1>
+	  [(unspec:<VM>
+	    [(match_operand:<VM>           1 "vector_mask_operand"   "vmWc1")
+	     (match_operand                5 "vector_length_operand" "  rvl")
+	     (match_operand                6 "const_int_operand"     "    i")
+	     (match_operand                7 "const_int_operand"     "    i")
+	     (match_operand                8 "const_int_operand"     "    i")
+	     (reg:SI VL_REGNUM)
+	     (reg:SI VTYPE_REGNUM)
+	     (reg:SI FRM_REGNUM)] UNSPEC_VPREDICATE)
+           (unspec:<V_EXT_LMUL1> [
+	     (match_operand:VF_HS          3 "register_operand"      "   vr")
+	     (match_operand:<V_EXT_LMUL1>  4 "register_operand"      "    0")
+           ] ANY_FWREDUC_SUM_VL0_SAFE)
+	   (match_operand:<V_EXT_LMUL1>    2 "vector_merge_operand"  "   vu")] UNSPEC_REDUC))]
+  "TARGET_VECTOR"
+  "vf<reduc_op>.vs\t%0,%3,%4%p1"
+  [(set_attr "type" "vfwred<order>")
+   (set_attr "mode" "<MODE>")
+   (set (attr "frm_mode")
+	(symbol_ref "riscv_vector::get_frm_mode (operands[8])"))])
+
 ;; -------------------------------------------------------------------------------
 ;; ---- Predicated permutation operations
 ;; -------------------------------------------------------------------------------
@@ -7925,7 +8205,7 @@
 	(unspec:V_VLS
 	  [(unspec:<VM>
 	     [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
-	      (match_operand 5 "vector_length_operand"    " rK, rK, rK, rK")
+	      (match_operand 5 "vector_length_operand"    "rvl,rvl,rvl,rvl")
 	      (match_operand 6 "const_int_operand"        "  i,  i,  i,  i")
 	      (match_operand 7 "const_int_operand"        "  i,  i,  i,  i")
 	      (match_operand 8 "const_int_operand"        "  i,  i,  i,  i")
@@ -7945,7 +8225,7 @@
 	(unspec:V_VLSI_QHS
 	  [(unspec:<VM>
 	     [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
-	      (match_operand 5 "vector_length_operand"    " rK, rK, rK, rK")
+	      (match_operand 5 "vector_length_operand"    "rvl,rvl,rvl,rvl")
 	      (match_operand 6 "const_int_operand"        "  i,  i,  i,  i")
 	      (match_operand 7 "const_int_operand"        "  i,  i,  i,  i")
 	      (match_operand 8 "const_int_operand"        "  i,  i,  i,  i")
@@ -7989,7 +8269,7 @@
 	(unspec:V_VLSI_D
 	  [(unspec:<VM>
 	     [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
-	      (match_operand 5 "vector_length_operand"    " rK, rK, rK, rK")
+	      (match_operand 5 "vector_length_operand"    "rvl,rvl,rvl,rvl")
 	      (match_operand 6 "const_int_operand"        "  i,  i,  i,  i")
 	      (match_operand 7 "const_int_operand"        "  i,  i,  i,  i")
 	      (match_operand 8 "const_int_operand"        "  i,  i,  i,  i")
@@ -8008,7 +8288,7 @@
 	(unspec:V_VLSI_D
 	  [(unspec:<VM>
 	     [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
-	      (match_operand 5 "vector_length_operand"    " rK, rK, rK, rK")
+	      (match_operand 5 "vector_length_operand"    "rvl,rvl,rvl,rvl")
 	      (match_operand 6 "const_int_operand"        "  i,  i,  i,  i")
 	      (match_operand 7 "const_int_operand"        "  i,  i,  i,  i")
 	      (match_operand 8 "const_int_operand"        "  i,  i,  i,  i")
@@ -8029,7 +8309,7 @@
 	(unspec:V_VLSF
 	  [(unspec:<VM>
 	     [(match_operand:<VM> 1 "vector_mask_operand" " vm, vm,Wc1,Wc1")
-	      (match_operand 5 "vector_length_operand"    " rK, rK, rK, rK")
+	      (match_operand 5 "vector_length_operand"    "rvl,rvl,rvl,rvl")
 	      (match_operand 6 "const_int_operand"        "  i,  i,  i,  i")
 	      (match_operand 7 "const_int_operand"        "  i,  i,  i,  i")
 	      (match_operand 8 "const_int_operand"        "  i,  i,  i,  i")
@@ -8049,7 +8329,7 @@
 	(if_then_else:V_VLS
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"  "vmWc1,vmWc1")
-	     (match_operand 5 "vector_length_operand"     "   rK,   rK")
+	     (match_operand 5 "vector_length_operand"     "  rvl,  rvl")
 	     (match_operand 6 "const_int_operand"         "    i,    i")
 	     (match_operand 7 "const_int_operand"         "    i,    i")
 	     (match_operand 8 "const_int_operand"         "    i,    i")
@@ -8069,7 +8349,7 @@
 	(if_then_else:V_VLS
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"   "vmWc1,vmWc1")
-	     (match_operand 5 "vector_length_operand"      "   rK,   rK")
+	     (match_operand 5 "vector_length_operand"      "  rvl,  rvl")
 	     (match_operand 6 "const_int_operand"          "    i,    i")
 	     (match_operand 7 "const_int_operand"          "    i,    i")
 	     (match_operand 8 "const_int_operand"          "    i,    i")
@@ -8090,7 +8370,7 @@
 	(if_then_else:VEI16
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand"      "vmWc1,vmWc1")
-	     (match_operand 5 "vector_length_operand"         "   rK,   rK")
+	     (match_operand 5 "vector_length_operand"         "  rvl,  rvl")
 	     (match_operand 6 "const_int_operand"             "    i,    i")
 	     (match_operand 7 "const_int_operand"             "    i,    i")
 	     (match_operand 8 "const_int_operand"             "    i,    i")
@@ -8111,7 +8391,7 @@
 	(unspec:V_VLS
 	  [(unspec:<VM>
 	    [(match_operand:<VM> 3 "register_operand"    "  vm,  vm")
-	     (match_operand 4 "vector_length_operand"    "  rK,  rK")
+	     (match_operand 4 "vector_length_operand"    " rvl, rvl")
 	     (match_operand 5 "const_int_operand"        "   i,   i")
 	     (match_operand 6 "const_int_operand"        "   i,   i")
 	     (reg:SI VL_REGNUM)
@@ -8151,7 +8431,7 @@
 	(if_then_else:V
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" "   vm,    vm,   Wc1,   Wc1")
-	     (match_operand 4 "vector_length_operand"    "   rK,    rK,    rK,    rK")
+	     (match_operand 4 "vector_length_operand"    "  rvl,   rvl,   rvl,   rvl")
 	     (match_operand 5 "const_int_operand"        "    i,     i,     i,     i")
 	     (match_operand 6 "const_int_operand"        "    i,     i,     i,     i")
 	     (match_operand 7 "const_int_operand"        "    i,     i,     i,     i")
@@ -8189,7 +8469,7 @@
 	(if_then_else:VT
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,   Wc1,    vm")
-	     (match_operand 4 "vector_length_operand"    "   rK,    rK,    rK")
+	     (match_operand 4 "vector_length_operand"    "  rvl,   rvl,   rvl")
 	     (match_operand 5 "const_int_operand"        "    i,     i,     i")
 	     (match_operand 6 "const_int_operand"        "    i,     i,     i")
 	     (match_operand 7 "const_int_operand"        "    i,     i,     i")
@@ -8209,7 +8489,7 @@
 	(unspec:BLK
 	  [(unspec:<VM>
 	     [(match_operand:<VM> 0 "vector_mask_operand" "vmWc1")
-	      (match_operand 3 "vector_length_operand"    "   rK")
+	      (match_operand 3 "vector_length_operand"    "  rvl")
 	      (match_operand 4 "const_int_operand"        "    i")
 	      (reg:SI VL_REGNUM)
 	      (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
@@ -8226,7 +8506,7 @@
 	(if_then_else:VT
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,   Wc1,    vm")
-	     (match_operand 5 "vector_length_operand"    "   rK,    rK,    rK")
+	     (match_operand 5 "vector_length_operand"    "  rvl,   rvl,   rvl")
 	     (match_operand 6 "const_int_operand"        "    i,     i,     i")
 	     (match_operand 7 "const_int_operand"        "    i,     i,     i")
 	     (match_operand 8 "const_int_operand"        "    i,     i,     i")
@@ -8247,7 +8527,7 @@
 	(unspec:BLK
 	  [(unspec:<VM>
 	     [(match_operand:<VM> 0 "vector_mask_operand" "vmWc1")
-	      (match_operand 4 "vector_length_operand"    "   rK")
+	      (match_operand 4 "vector_length_operand"    "  rvl")
 	      (match_operand 5 "const_int_operand"        "    i")
 	      (reg:SI VL_REGNUM)
 	      (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
@@ -8265,7 +8545,7 @@
 	(if_then_else:VT
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,   Wc1,    vm")
-	     (match_operand 4 "vector_length_operand"    "   rK,    rK,    rK")
+	     (match_operand 4 "vector_length_operand"    "  rvl,   rvl,   rvl")
 	     (match_operand 5 "const_int_operand"        "    i,     i,     i")
 	     (match_operand 6 "const_int_operand"        "    i,     i,     i")
 	     (match_operand 7 "const_int_operand"        "    i,     i,     i")
@@ -8296,7 +8576,7 @@
 	(if_then_else:V1T
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
-	     (match_operand 5 "vector_length_operand"    "   rK,   rK")
+	     (match_operand 5 "vector_length_operand"    "  rvl,  rvl")
 	     (match_operand 6 "const_int_operand"        "    i,    i")
 	     (match_operand 7 "const_int_operand"        "    i,    i")
 	     (match_operand 8 "const_int_operand"        "    i,    i")
@@ -8317,7 +8597,7 @@
 	(if_then_else:V2T
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
-	     (match_operand 5 "vector_length_operand"    "   rK,   rK")
+	     (match_operand 5 "vector_length_operand"    "  rvl,  rvl")
 	     (match_operand 6 "const_int_operand"        "    i,    i")
 	     (match_operand 7 "const_int_operand"        "    i,    i")
 	     (match_operand 8 "const_int_operand"        "    i,    i")
@@ -8338,7 +8618,7 @@
 	(if_then_else:V4T
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
-	     (match_operand 5 "vector_length_operand"    "   rK,   rK")
+	     (match_operand 5 "vector_length_operand"    "  rvl,  rvl")
 	     (match_operand 6 "const_int_operand"        "    i,    i")
 	     (match_operand 7 "const_int_operand"        "    i,    i")
 	     (match_operand 8 "const_int_operand"        "    i,    i")
@@ -8359,7 +8639,7 @@
 	(if_then_else:V8T
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
-	     (match_operand 5 "vector_length_operand"    "   rK,   rK")
+	     (match_operand 5 "vector_length_operand"    "  rvl,  rvl")
 	     (match_operand 6 "const_int_operand"        "    i,    i")
 	     (match_operand 7 "const_int_operand"        "    i,    i")
 	     (match_operand 8 "const_int_operand"        "    i,    i")
@@ -8380,7 +8660,7 @@
 	(if_then_else:V16T
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
-	     (match_operand 5 "vector_length_operand"    "   rK,   rK")
+	     (match_operand 5 "vector_length_operand"    "  rvl,  rvl")
 	     (match_operand 6 "const_int_operand"        "    i,    i")
 	     (match_operand 7 "const_int_operand"        "    i,    i")
 	     (match_operand 8 "const_int_operand"        "    i,    i")
@@ -8401,7 +8681,7 @@
 	(if_then_else:V32T
 	  (unspec:<VM>
 	    [(match_operand:<VM> 1 "vector_mask_operand" "vmWc1,vmWc1")
-	     (match_operand 5 "vector_length_operand"    "   rK,   rK")
+	     (match_operand 5 "vector_length_operand"    "  rvl,  rvl")
 	     (match_operand 6 "const_int_operand"        "    i,    i")
 	     (match_operand 7 "const_int_operand"        "    i,    i")
 	     (match_operand 8 "const_int_operand"        "    i,    i")
@@ -8422,7 +8702,7 @@
 	(unspec:BLK
 	  [(unspec:<VM>
 	    [(match_operand:<VM> 0 "vector_mask_operand" "vmWc1")
-	     (match_operand 4 "vector_length_operand"    "   rK")
+	     (match_operand 4 "vector_length_operand"    "  rvl")
 	     (match_operand 5 "const_int_operand"        "    i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
@@ -8439,7 +8719,7 @@
 	(unspec:BLK
 	  [(unspec:<VM>
 	    [(match_operand:<VM> 0 "vector_mask_operand" "vmWc1")
-	     (match_operand 4 "vector_length_operand"    "   rK")
+	     (match_operand 4 "vector_length_operand"    "  rvl")
 	     (match_operand 5 "const_int_operand"        "    i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
@@ -8456,7 +8736,7 @@
 	(unspec:BLK
 	  [(unspec:<VM>
 	    [(match_operand:<VM> 0 "vector_mask_operand" "vmWc1")
-	     (match_operand 4 "vector_length_operand"    "   rK")
+	     (match_operand 4 "vector_length_operand"    "  rvl")
 	     (match_operand 5 "const_int_operand"        "    i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
@@ -8473,7 +8753,7 @@
 	(unspec:BLK
 	  [(unspec:<VM>
 	    [(match_operand:<VM> 0 "vector_mask_operand" "vmWc1")
-	     (match_operand 4 "vector_length_operand"    "   rK")
+	     (match_operand 4 "vector_length_operand"    "  rvl")
 	     (match_operand 5 "const_int_operand"        "    i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
@@ -8490,7 +8770,7 @@
 	(unspec:BLK
 	  [(unspec:<VM>
 	    [(match_operand:<VM> 0 "vector_mask_operand" "vmWc1")
-	     (match_operand 4 "vector_length_operand"    "   rK")
+	     (match_operand 4 "vector_length_operand"    "  rvl")
 	     (match_operand 5 "const_int_operand"        "    i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
@@ -8507,7 +8787,7 @@
 	(unspec:BLK
 	  [(unspec:<VM>
 	    [(match_operand:<VM> 0 "vector_mask_operand" "vmWc1")
-	     (match_operand 4 "vector_length_operand"    "   rK")
+	     (match_operand 4 "vector_length_operand"    "  rvl")
 	     (match_operand 5 "const_int_operand"        "    i")
 	     (reg:SI VL_REGNUM)
 	     (reg:SI VTYPE_REGNUM)] UNSPEC_VPREDICATE)
@@ -8521,3 +8801,4 @@
 
 (include "autovec.md")
 (include "autovec-opt.md")
+(include "sifive-vector.md")
diff --git a/gcc/config/riscv/xiangshan.md b/gcc/config/riscv/xiangshan.md
index 76539d3..5ed6bac 100644
--- a/gcc/config/riscv/xiangshan.md
+++ b/gcc/config/riscv/xiangshan.md
@@ -70,12 +70,17 @@
 
 (define_insn_reservation "xiangshan_jump" 1
   (and (eq_attr "tune" "xiangshan")
-       (eq_attr "type" "jump,call,auipc,unknown,branch,jalr,ret,sfb_alu"))
+       (eq_attr "type" "jump,call,auipc,unknown,branch,jalr,ret,sfb_alu,trap"))
   "xs_jmp_rs")
 
 (define_insn_reservation "xiangshan_i2f" 3
   (and (eq_attr "tune" "xiangshan")
-       (eq_attr "type" "mtc"))
+       (eq_attr "type" "mtc,fcvt_i2f"))
+  "xs_jmp_rs")
+
+(define_insn_reservation "xiangshan_atomic" 1
+  (and (eq_attr "tune" "xiangshan")
+       (eq_attr "type" "atomic"))
   "xs_jmp_rs")
 
 (define_insn_reservation "xiangshan_mul" 3
@@ -93,6 +98,18 @@
        (eq_attr "type" "nop,const,branch,arith,shift,slt,multi,logical,move,bitmanip,unknown"))
   "xs_alu_rs")
 
+;; Someone familiar with the xiangshan uarch needs to put
+;; these into the right reservations.  This is just a placeholder
+;; for everything I found that had no mapping to a reservation.
+;;
+;; Note that even if the processor does not implementat a particular
+;; instruction it should still have suitable reservations, even if
+;; they are just dummies like this one.
+(define_insn_reservation "xiangshan_alu_unknown" 1
+  (and (eq_attr "tune" "xiangshan")
+       (eq_attr "type" "zicond,min,max,minu,maxu,clz,ctz,cpop,ghost,rotate,clmul,condmove,crypto,mvpair,rdvlenb,rdvl,wrvxrm,wrfrm,rdfrm,vsetvl,vsetvl_pre,vlde,vste,vldm,vstm,vlds,vsts,vldux,vldox,vstux,vstox,vldff,vldr,vstr,vlsegde,vssegte,vlsegds,vssegts,vlsegdux,vlsegdox,vssegtux,vssegtox,vlsegdff,vialu,viwalu,vext,vicalu,vshift,vnshift,vicmp,viminmax,vimul,vidiv,viwmul,vimuladd,sf_vqmacc,viwmuladd,vimerge,vimov,vsalu,vaalu,vsmul,vsshift,vnclip,sf_vfnrclip,vfalu,vfwalu,vfmul,vfdiv,vfwmul,vfmuladd,vfwmuladd,vfsqrt,vfrecp,vfcmp,vfminmax,vfsgnj,vfclass,vfmerge,vfmov,vfcvtitof,vfcvtftoi,vfwcvtitof,vfwcvtftoi,vfwcvtftof,vfncvtitof,vfncvtftoi,vfncvtftof,vired,viwred,vfredu,vfredo,vfwredu,vfwredo,vmalu,vmpop,vmffs,vmsfs,vmiota,vmidx,vimovvx,vimovxv,vfmovvf,vfmovfv,vslideup,vslidedown,vislide1up,vislide1down,vfslide1up,vfslide1down,vgather,vcompress,vmov,vector,vandn,vbrev,vbrev8,vrev8,vclz,vctz,vcpop,vrol,vror,vwsll,vclmul,vclmulh,vghsh,vgmul,vaesef,vaesem,vaesdf,vaesdm,vaeskf1,vaeskf2,vaesz,vsha2ms,vsha2ch,vsha2cl,vsm4k,vsm4r,vsm3me,vsm3c,vfncvtbf16,vfwcvtbf16,vfwmaccbf16"))
+  "xs_alu_rs")
+
 ;; ----------------------------------------------------
 ;; Float
 ;; ----------------------------------------------------
@@ -115,7 +132,7 @@
 
 (define_insn_reservation "xiangshan_f2f" 3
   (and (eq_attr "tune" "xiangshan")
-       (eq_attr "type" "fcvt,fmove"))
+       (eq_attr "type" "fcvt,fcvt_f2i,fmove"))
   "xs_fmisc_rs")
 
 (define_insn_reservation "xiangshan_f2i" 3
diff --git a/gcc/config/riscv/zc.md b/gcc/config/riscv/zc.md
index 5b948b4..5cdc2bb 100644
--- a/gcc/config/riscv/zc.md
+++ b/gcc/config/riscv/zc.md
@@ -1,5 +1,5 @@
 ;; Machine description for RISC-V Zc extension.
-;; Copyright (C) 2023-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2023-2025 Free Software Foundation, Inc.
 ;; Contributed by Fei Gao (gaofei@eswincomputing.com).
 
 ;; This file is part of GCC.
@@ -1442,7 +1442,7 @@
         (match_operand:X 3 "zcmp_mv_sreg_operand" "r"))]
   "TARGET_ZCMP
    && (REGNO (operands[2]) != REGNO (operands[0]))"
-  { return (REGNO (operands[0]) == A0_REGNUM)?"cm.mva01s\t%1,%3":"cm.mva01s\t%3,%1"; }
+  { return (REGNO (operands[0]) == A0_REGNUM) ? "cm.mva01s\t%1,%3" : "cm.mva01s\t%3,%1"; }
   [(set_attr "mode" "<X:MODE>")
    (set_attr "type" "mvpair")])
 
@@ -1454,6 +1454,6 @@
   "TARGET_ZCMP
    && (REGNO (operands[0]) != REGNO (operands[2]))
    && (REGNO (operands[1]) != REGNO (operands[3]))"
-  { return (REGNO (operands[1]) == A0_REGNUM)?"cm.mvsa01\t%0,%2":"cm.mvsa01\t%2,%0"; }
+  { return (REGNO (operands[1]) == A0_REGNUM) ? "cm.mvsa01\t%0,%2" : "cm.mvsa01\t%2,%0"; }
   [(set_attr "mode" "<X:MODE>")
    (set_attr "type" "mvpair")])
diff --git a/gcc/config/riscv/zicond.md b/gcc/config/riscv/zicond.md
index 3876be7..d170f6a 100644
--- a/gcc/config/riscv/zicond.md
+++ b/gcc/config/riscv/zicond.md
@@ -1,6 +1,6 @@
 ;; Machine description for the RISC-V Zicond extension and functionally-
 ;; equivalent XVentanaCondOps vendor extension
-;; Copyright (C) 2022-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2022-2025 Free Software Foundation, Inc.
 
 ;; This file is part of GCC.
 
@@ -124,3 +124,149 @@
 {
   operands[2] = GEN_INT (1 << UINTVAL(operands[2]));
 })
+
+;; In some cases gimple can give us a sequence with a logical and
+;; of two sCC insns.  This can be implemented an sCC feeding a
+;; conditional zero.
+(define_split
+  [(set (match_operand:X 0 "register_operand")
+	(and:X (ne:X (match_operand:X 1 "register_operand") (const_int 0))
+	       (scc_0:X (match_operand:X 2 "register_operand")
+			(match_operand:X 3 "reg_or_0_operand"))))
+   (clobber (match_operand:X 4 "register_operand"))]
+  "TARGET_ZICOND_LIKE || TARGET_XTHEADCONDMOV"
+  [(set (match_dup 4) (scc_0:X (match_dup 2) (match_dup 3)))
+   (set (match_dup 0) (if_then_else:X (eq:X (match_dup 1) (const_int 0))
+				      (const_int 0)
+				      (match_dup 4)))])
+
+;; Similarly but GE/GEU which requires (const_int 1) as an operand.
+(define_split
+  [(set (match_operand:X 0 "register_operand")
+	(and:X (ne:X (match_operand:X 1 "register_operand") (const_int 0))
+	       (any_ge:X (match_operand:X 2 "register_operand")
+			 (const_int 1))))
+   (clobber (match_operand:X 3 "register_operand"))]
+  "TARGET_ZICOND_LIKE || TARGET_XTHEADCONDMOV"
+  [(set (match_dup 3) (any_ge:X (match_dup 2) (const_int 1)))
+   (set (match_dup 0) (if_then_else:X (eq:X (match_dup 1) (const_int 0))
+				      (const_int 0)
+				      (match_dup 3)))])
+
+;; Similarly but LU/LTU which allows an arith_operand
+(define_split
+  [(set (match_operand:X 0 "register_operand")
+	(and:X (ne:X (match_operand:X 1 "register_operand") (const_int 0))
+	       (any_lt:X (match_operand:X 2 "register_operand")
+			 (match_operand:X 3 "arith_operand"))))
+   (clobber (match_operand:X 4 "register_operand"))]
+  "TARGET_ZICOND_LIKE || TARGET_XTHEADCONDMOV"
+  [(set (match_dup 4) (any_lt:X (match_dup 2) (match_dup 3)))
+   (set (match_dup 0) (if_then_else:X (eq:X (match_dup 1) (const_int 0))
+				      (const_int 0)
+				      (match_dup 4)))])
+
+;; Finally LE/LEU which requires sle_operand.
+(define_split
+  [(set (match_operand:X 0 "register_operand")
+	(and:X (ne:X (match_operand:X 1 "register_operand") (const_int 0))
+	       (any_le:X (match_operand:X 2 "register_operand")
+			 (match_operand:X 3 "sle_operand"))))
+   (clobber (match_operand:X 4 "register_operand"))]
+  "TARGET_ZICOND_LIKE || TARGET_XTHEADCONDMOV"
+  [(set (match_dup 4) (any_le:X (match_dup 2) (match_dup 3)))
+   (set (match_dup 0) (if_then_else:X (eq:X (match_dup 1) (const_int 0))
+				      (const_int 0)
+				      (match_dup 4)))])
+
+
+;; Inverted versions from above.  I tried to get this to work with
+;; iterators, but didn't have any success disambiguating the code attr
+;; for the eq/ne flip we have to do.
+(define_split
+  [(set (match_operand:X 0 "register_operand")
+	(and:X (eq:X (match_operand:X 1 "register_operand") (const_int 0))
+	       (scc_0:X (match_operand:X 2 "register_operand")
+			(match_operand:X 3 "reg_or_0_operand"))))
+   (clobber (match_operand:X 4 "register_operand"))]
+  "TARGET_ZICOND_LIKE || TARGET_XTHEADCONDMOV"
+  [(set (match_dup 4) (scc_0:X (match_dup 2) (match_dup 3)))
+   (set (match_dup 0) (if_then_else:X (ne:X (match_dup 1) (const_int 0))
+				      (const_int 0)
+				      (match_dup 4)))])
+
+;; Similarly but GE/GEU which requires (const_int 1) as an operand.
+(define_split
+  [(set (match_operand:X 0 "register_operand")
+	(and:X (eq:X (match_operand:X 1 "register_operand") (const_int 0))
+	       (any_ge:X (match_operand:X 2 "register_operand")
+			 (const_int 1))))
+   (clobber (match_operand:X 3 "register_operand"))]
+  "TARGET_ZICOND_LIKE || TARGET_XTHEADCONDMOV"
+  [(set (match_dup 3) (any_ge:X (match_dup 2) (const_int 1)))
+   (set (match_dup 0) (if_then_else:X (ne:X (match_dup 1) (const_int 0))
+				      (const_int 0)
+				      (match_dup 3)))])
+
+;; Similarly but LU/LTU which allows an arith_operand
+(define_split
+  [(set (match_operand:X 0 "register_operand")
+	(and:X (eq:X (match_operand:X 1 "register_operand") (const_int 0))
+	       (any_lt:X (match_operand:X 2 "register_operand")
+			 (match_operand:X 3 "arith_operand"))))
+   (clobber (match_operand:X 4 "register_operand"))]
+  "TARGET_ZICOND_LIKE || TARGET_XTHEADCONDMOV"
+  [(set (match_dup 4) (any_lt:X (match_dup 2) (match_dup 3)))
+   (set (match_dup 0) (if_then_else:X (ne:X (match_dup 1) (const_int 0))
+				      (const_int 0)
+				      (match_dup 4)))])
+
+;; Finally LE/LEU which requires sle_operand.
+(define_split
+  [(set (match_operand:X 0 "register_operand")
+	(and:X (eq:X (match_operand:X 1 "register_operand") (const_int 0))
+	       (any_le:X (match_operand:X 2 "register_operand")
+			 (match_operand:X 3 "sle_operand"))))
+   (clobber (match_operand:X 4 "register_operand"))]
+  "TARGET_ZICOND_LIKE || TARGET_XTHEADCONDMOV"
+  [(set (match_dup 4) (any_le:X (match_dup 2) (match_dup 3)))
+   (set (match_dup 0) (if_then_else:X (ne:X (match_dup 1) (const_int 0))
+				      (const_int 0)
+				      (match_dup 4)))])
+
+;; We can splat the sign bit across a GPR with a arithmetic right shift
+;; which gives us a 0, -1 result.  We then turn on bit #0 unconditionally
+;; which results in 1, -1.  There's probably other cases that could be
+;; handled, this seems particularly important though.
+(define_split
+  [(set (match_operand:X 0 "register_operand")
+	(plus:X (if_then_else:X (ge:X (match_operand:X 1 "register_operand")
+				      (const_int 0))
+				(match_operand 2 "const_int_operand")
+				(match_operand 3 "const_int_operand"))
+		(match_operand 4 "const_int_operand")))]
+  "((TARGET_ZICOND_LIKE || TARGET_XTHEADCONDMOV)
+    && INTVAL (operands[2]) + INTVAL (operands[4]) == 1
+    && INTVAL (operands[3]) + INTVAL (operands[4]) == -1)"
+  [(set (match_dup 0) (ashiftrt:X (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (ior:X (match_dup 0) (const_int 1)))]
+  { operands[2] = GEN_INT (GET_MODE_BITSIZE (word_mode) - 1); })
+
+;; Similarly, but the condition and true/false values are reversed
+;;
+;; Note the case where the condition is reversed, but not the true/false
+;; values.  Or vice-versa is not handled because we don't support 4->3
+;; splits.
+(define_split
+  [(set (match_operand:X 0 "register_operand")
+	(plus:X (if_then_else:X (lt:X (match_operand:X 1 "register_operand")
+				      (const_int 0))
+				(match_operand 2 "const_int_operand")
+				(match_operand 3 "const_int_operand"))
+		(match_operand 4 "const_int_operand")))]
+  "((TARGET_ZICOND_LIKE || TARGET_XTHEADCONDMOV)
+    && INTVAL (operands[2]) + INTVAL (operands[4]) == -1
+    && INTVAL (operands[3]) + INTVAL (operands[4]) == 1)"
+  [(set (match_dup 0) (ashiftrt:X (match_dup 1) (match_dup 2)))
+   (set (match_dup 0) (ior:X (match_dup 0) (const_int 1)))]
+  { operands[2] = GEN_INT (GET_MODE_BITSIZE (word_mode) - 1); })
diff --git a/gcc/config/rl78/constraints.md b/gcc/config/rl78/constraints.md
index a0054b0..a3f06cb 100644
--- a/gcc/config/rl78/constraints.md
+++ b/gcc/config/rl78/constraints.md
@@ -1,5 +1,5 @@
 ;;  Machine Description for Renesas RL78 processors
-;;  Copyright (C) 2011-2024 Free Software Foundation, Inc.
+;;  Copyright (C) 2011-2025 Free Software Foundation, Inc.
 ;;  Contributed by Red Hat.
 
 ;; This file is part of GCC.
diff --git a/gcc/config/rl78/predicates.md b/gcc/config/rl78/predicates.md
index e415e98..478afe8 100644
--- a/gcc/config/rl78/predicates.md
+++ b/gcc/config/rl78/predicates.md
@@ -1,5 +1,5 @@
 ;;  Machine Description for Renesas RL78 processors
-;;  Copyright (C) 2011-2024 Free Software Foundation, Inc.
+;;  Copyright (C) 2011-2025 Free Software Foundation, Inc.
 ;;  Contributed by Red Hat.
 
 ;; This file is part of GCC.
diff --git a/gcc/config/rl78/rl78-c.cc b/gcc/config/rl78/rl78-c.cc
index 2208b99..b5c50f3 100644
--- a/gcc/config/rl78/rl78-c.cc
+++ b/gcc/config/rl78/rl78-c.cc
@@ -1,5 +1,5 @@
 /* RL78 C-specific support
-   Copyright (C) 2011-2024 Free Software Foundation, Inc.
+   Copyright (C) 2011-2025 Free Software Foundation, Inc.
    Contributed by Red Hat, Inc.
 
    This file is part of GCC.
diff --git a/gcc/config/rl78/rl78-expand.md b/gcc/config/rl78/rl78-expand.md
index cb347d2..79c2214 100644
--- a/gcc/config/rl78/rl78-expand.md
+++ b/gcc/config/rl78/rl78-expand.md
@@ -1,5 +1,5 @@
 ;;  Machine Description for Renesas RL78 processors
-;;  Copyright (C) 2011-2024 Free Software Foundation, Inc.
+;;  Copyright (C) 2011-2025 Free Software Foundation, Inc.
 ;;  Contributed by Red Hat.
 
 ;; This file is part of GCC.
diff --git a/gcc/config/rl78/rl78-opts.h b/gcc/config/rl78/rl78-opts.h
index bc7a82f..76ac5ac 100644
--- a/gcc/config/rl78/rl78-opts.h
+++ b/gcc/config/rl78/rl78-opts.h
@@ -1,5 +1,5 @@
 /* GCC option-handling definitions for the Renesas RL78 processor.
-   Copyright (C) 2011-2024 Free Software Foundation, Inc.
+   Copyright (C) 2011-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/rl78/rl78-protos.h b/gcc/config/rl78/rl78-protos.h
index 7ee867c..f2d9e45 100644
--- a/gcc/config/rl78/rl78-protos.h
+++ b/gcc/config/rl78/rl78-protos.h
@@ -1,5 +1,5 @@
 /* Prototypes for Renesas RL78 processors
-   Copyright (C) 2011-2024 Free Software Foundation, Inc.
+   Copyright (C) 2011-2025 Free Software Foundation, Inc.
    Contributed by Red Hat.
 
    This file is part of GCC.
diff --git a/gcc/config/rl78/rl78-real.md b/gcc/config/rl78/rl78-real.md
index 813f1cd..03ba018 100644
--- a/gcc/config/rl78/rl78-real.md
+++ b/gcc/config/rl78/rl78-real.md
@@ -1,5 +1,5 @@
 ;;  Machine Description for Renesas RL78 processors
-;;  Copyright (C) 2011-2024 Free Software Foundation, Inc.
+;;  Copyright (C) 2011-2025 Free Software Foundation, Inc.
 ;;  Contributed by Red Hat.
 
 ;; This file is part of GCC.
diff --git a/gcc/config/rl78/rl78-virt.md b/gcc/config/rl78/rl78-virt.md
index 99e0024..e4ffd98 100644
--- a/gcc/config/rl78/rl78-virt.md
+++ b/gcc/config/rl78/rl78-virt.md
@@ -1,5 +1,5 @@
 ;;  Machine Description for Renesas RL78 processors
-;;  Copyright (C) 2011-2024 Free Software Foundation, Inc.
+;;  Copyright (C) 2011-2025 Free Software Foundation, Inc.
 ;;  Contributed by Red Hat.
 
 ;; This file is part of GCC.
diff --git a/gcc/config/rl78/rl78.cc b/gcc/config/rl78/rl78.cc
index 25f6606..28728aa 100644
--- a/gcc/config/rl78/rl78.cc
+++ b/gcc/config/rl78/rl78.cc
@@ -1,5 +1,5 @@
 /* Subroutines used for code generation on Renesas RL78 processors.
-   Copyright (C) 2011-2024 Free Software Foundation, Inc.
+   Copyright (C) 2011-2025 Free Software Foundation, Inc.
    Contributed by Red Hat.
 
    This file is part of GCC.
@@ -1675,7 +1675,7 @@ static void
 rl78_start_function (FILE *file)
 {
   int i;
-  
+
   add_vector_labels (file, "interrupt");
   add_vector_labels (file, "vector");
 
@@ -4953,8 +4953,7 @@ rl78_emit_libcall (const char *name, enum rtx_code code,
       gcc_unreachable ();
     }
 
-  insns = get_insns ();
-  end_sequence ();
+  insns = end_sequence ();
   emit_libcall_block (insns, operands[0], ret, equiv);
   return ret;
 }
@@ -4993,6 +4992,9 @@ rl78_c_mode_for_floating_type (enum tree_index ti)
 #undef TARGET_HAVE_STRUB_SUPPORT_FOR
 #define TARGET_HAVE_STRUB_SUPPORT_FOR hook_bool_tree_false
 
+#undef TARGET_DOCUMENTATION_NAME
+#define TARGET_DOCUMENTATION_NAME "RL78"
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-rl78.h"
diff --git a/gcc/config/rl78/rl78.h b/gcc/config/rl78/rl78.h
index a960aee..adcbc31 100644
--- a/gcc/config/rl78/rl78.h
+++ b/gcc/config/rl78/rl78.h
@@ -1,5 +1,5 @@
 /* GCC backend definitions for the Renesas RL78 processor.
-   Copyright (C) 2011-2024 Free Software Foundation, Inc.
+   Copyright (C) 2011-2025 Free Software Foundation, Inc.
    Contributed by Red Hat.
 
    This file is part of GCC.
diff --git a/gcc/config/rl78/rl78.md b/gcc/config/rl78/rl78.md
index 8e8d58d..cc371dc 100644
--- a/gcc/config/rl78/rl78.md
+++ b/gcc/config/rl78/rl78.md
@@ -1,5 +1,5 @@
 ;;  Machine Description for Renesas RL78 processors
-;;  Copyright (C) 2011-2024 Free Software Foundation, Inc.
+;;  Copyright (C) 2011-2025 Free Software Foundation, Inc.
 ;;  Contributed by Red Hat.
 
 ;; This file is part of GCC.
diff --git a/gcc/config/rl78/rl78.opt b/gcc/config/rl78/rl78.opt
index ffbd253..107e443b 100644
--- a/gcc/config/rl78/rl78.opt
+++ b/gcc/config/rl78/rl78.opt
@@ -1,5 +1,5 @@
 ; Command line options for the Renesas RL78 port of GCC.
-; Copyright (C) 2011-2024 Free Software Foundation, Inc.
+; Copyright (C) 2011-2025 Free Software Foundation, Inc.
 ; Contributed by Red Hat.
 ;
 ; This file is part of GCC.
diff --git a/gcc/config/rl78/t-rl78 b/gcc/config/rl78/t-rl78
index d298c60..8e21ca4 100644
--- a/gcc/config/rl78/t-rl78
+++ b/gcc/config/rl78/t-rl78
@@ -1,5 +1,5 @@
 # Makefile fragment for building GCC for the Renesas RL78 target.
-# Copyright (C) 2011-2024 Free Software Foundation, Inc.
+# Copyright (C) 2011-2025 Free Software Foundation, Inc.
 # Contributed by Red Hat.
 #
 # This file is part of GCC.
diff --git a/gcc/config/rpath.opt b/gcc/config/rpath.opt
index c030e5e..4f183da 100644
--- a/gcc/config/rpath.opt
+++ b/gcc/config/rpath.opt
@@ -1,6 +1,6 @@
 ; -rpath option to the driver.
 
-; Copyright (C) 2010-2024 Free Software Foundation, Inc.
+; Copyright (C) 2010-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/rs6000/40x.md b/gcc/config/rs6000/40x.md
index cf10bc7..f8e2861 100644
--- a/gcc/config/rs6000/40x.md
+++ b/gcc/config/rs6000/40x.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for IBM PowerPC 403 and PowerPC 405  processors.
-;;   Copyright (C) 2003-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2003-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 
diff --git a/gcc/config/rs6000/440.md b/gcc/config/rs6000/440.md
index ef8ca13..e11e9ef 100644
--- a/gcc/config/rs6000/440.md
+++ b/gcc/config/rs6000/440.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for IBM PowerPC 440 processor.
-;;   Copyright (C) 2003-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2003-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/rs6000/476.h b/gcc/config/rs6000/476.h
index 1c56a1e..4b66dd3 100644
--- a/gcc/config/rs6000/476.h
+++ b/gcc/config/rs6000/476.h
@@ -1,5 +1,5 @@
 /* Enable IBM PowerPC 476 support.
-   Copyright (C) 2011-2024 Free Software Foundation, Inc.
+   Copyright (C) 2011-2025 Free Software Foundation, Inc.
    Contributed by Peter Bergner (bergner@vnet.ibm.com)
    This file is part of GCC.
 
diff --git a/gcc/config/rs6000/476.md b/gcc/config/rs6000/476.md
index 260fa34..ccdffc2 100644
--- a/gcc/config/rs6000/476.md
+++ b/gcc/config/rs6000/476.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for IBM PowerPC 476 processor.
-;; Copyright (C) 2009-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2009-2025 Free Software Foundation, Inc.
 ;; Contributed by Peter Bergner (bergner@vnet.ibm.com).
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/rs6000/476.opt b/gcc/config/rs6000/476.opt
index f7a58ca..20843c2 100644
--- a/gcc/config/rs6000/476.opt
+++ b/gcc/config/rs6000/476.opt
@@ -1,6 +1,6 @@
 ; IBM PowerPC 476 options.
 ;
-; Copyright (C) 2011-2024 Free Software Foundation, Inc.
+; Copyright (C) 2011-2025 Free Software Foundation, Inc.
 ; Contributed by Peter Bergner (bergner@vnet.ibm.com)
 ;
 ; This file is part of GCC.
diff --git a/gcc/config/rs6000/601.md b/gcc/config/rs6000/601.md
index 746104f..d8939c4 100644
--- a/gcc/config/rs6000/601.md
+++ b/gcc/config/rs6000/601.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for PowerPC 601 processor.
-;;   Copyright (C) 2003-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2003-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 
diff --git a/gcc/config/rs6000/603.md b/gcc/config/rs6000/603.md
index ade25c8..b1ea8b6 100644
--- a/gcc/config/rs6000/603.md
+++ b/gcc/config/rs6000/603.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for PowerPC 603 processor.
-;;   Copyright (C) 2003-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2003-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 
diff --git a/gcc/config/rs6000/6xx.md b/gcc/config/rs6000/6xx.md
index 8aec0f5..d72b494 100644
--- a/gcc/config/rs6000/6xx.md
+++ b/gcc/config/rs6000/6xx.md
@@ -1,6 +1,6 @@
 ;; Scheduling description for PowerPC 604, PowerPC 604e, PowerPC 620,
 ;; and PowerPC 630 processors.
-;;   Copyright (C) 2003-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2003-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 
diff --git a/gcc/config/rs6000/7450.md b/gcc/config/rs6000/7450.md
index 7eb12e1..73b988e 100644
--- a/gcc/config/rs6000/7450.md
+++ b/gcc/config/rs6000/7450.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for Motorola PowerPC 7450 processor.
-;;   Copyright (C) 2003-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2003-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 
diff --git a/gcc/config/rs6000/7xx.md b/gcc/config/rs6000/7xx.md
index c7512be..9831dc2 100644
--- a/gcc/config/rs6000/7xx.md
+++ b/gcc/config/rs6000/7xx.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for Motorola PowerPC 750 and PowerPC 7400 processors.
-;;   Copyright (C) 2003-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2003-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 
diff --git a/gcc/config/rs6000/8540.md b/gcc/config/rs6000/8540.md
index 98b1a25..a86355e 100644
--- a/gcc/config/rs6000/8540.md
+++ b/gcc/config/rs6000/8540.md
@@ -1,5 +1,5 @@
 ;; Pipeline description for Motorola PowerPC 8540 processor.
-;;   Copyright (C) 2003-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2003-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 
diff --git a/gcc/config/rs6000/a2.md b/gcc/config/rs6000/a2.md
index 58f6f7d..655b0ce 100644
--- a/gcc/config/rs6000/a2.md
+++ b/gcc/config/rs6000/a2.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for PowerPC A2 processors.
-;; Copyright (C) 2009-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2009-2025 Free Software Foundation, Inc.
 ;; Contributed by Ben Elliston (bje@au.ibm.com)
 
 ;; This file is part of GCC.
diff --git a/gcc/config/rs6000/aix-stdint.h b/gcc/config/rs6000/aix-stdint.h
index e69417e..a92cc7a 100644
--- a/gcc/config/rs6000/aix-stdint.h
+++ b/gcc/config/rs6000/aix-stdint.h
@@ -1,5 +1,5 @@
 /* Definitions for <stdint.h> types on systems using AIX.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/rs6000/aix.h b/gcc/config/rs6000/aix.h
index 03d39b1..9e7edbb 100644
--- a/gcc/config/rs6000/aix.h
+++ b/gcc/config/rs6000/aix.h
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-3.0-or-later
 /* Definitions of target machine for GNU compiler,
    for IBM RS/6000 POWER running AIX.
-   Copyright (C) 2000-2024 Free Software Foundation, Inc.
+   Copyright (C) 2000-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -182,7 +182,7 @@
    Don't do this until the fixed IBM assembler is more generally available.
    When this becomes permanently defined, the ASM_OUTPUT_EXTERNAL,
    ASM_OUTPUT_EXTERNAL_LIBCALL, and RS6000_OUTPUT_BASENAME macros will no
-   longer be needed.  Also, the extern declaration of mcount in 
+   longer be needed.  Also, the extern declaration of mcount in
    rs6000_xcoff_file_start will no longer be needed.  */
 
 /* #define ASM_SPEC "-u %(asm_cpu)" */
diff --git a/gcc/config/rs6000/aix64.opt b/gcc/config/rs6000/aix64.opt
index 94f5ecc..fc0d7e9 100644
--- a/gcc/config/rs6000/aix64.opt
+++ b/gcc/config/rs6000/aix64.opt
@@ -1,6 +1,6 @@
 ; Options for the 64-bit flavor of AIX.
 ;
-; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ; Contributed by Aldy Hernandez <aldy@quesejoda.com>.
 ;
 ; This file is part of GCC.
diff --git a/gcc/config/rs6000/aix71.h b/gcc/config/rs6000/aix71.h
index 41037b3..2b21dd7 100644
--- a/gcc/config/rs6000/aix71.h
+++ b/gcc/config/rs6000/aix71.h
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-3.0-or-later
 /* Definitions of target machine for GNU compiler,
    for IBM RS/6000 POWER running AIX V7.1.
-   Copyright (C) 2002-2024 Free Software Foundation, Inc.
+   Copyright (C) 2002-2025 Free Software Foundation, Inc.
    Contributed by David Edelsohn (edelsohn@gnu.org).
 
    This file is part of GCC.
@@ -125,7 +125,7 @@ do {									\
   %{mpe: -I%R/usr/lpp/ppe.poe/include}		\
   %{pthread: -D_THREAD_SAFE}"
 
-/* The GNU C++ standard library requires that these macros be 
+/* The GNU C++ standard library requires that these macros be
    defined.  Synchronize with libstdc++ os_defines.h.  */
 #define CPLUSPLUS_CPP_SPEC_COMMON		\
   "-D_ALL_SOURCE -D__COMPATMATH__		\
@@ -257,7 +257,7 @@ do {									\
 #define LD_INIT_SWITCH "-binitfini"
 
 #ifndef _AIX52
-extern long long int    atoll(const char *);  
+extern long long int    atoll(const char *);
 #endif
 
 /* This target uses the aix64.opt file.  */
diff --git a/gcc/config/rs6000/aix72.h b/gcc/config/rs6000/aix72.h
index fe59f83..53c0bde 100644
--- a/gcc/config/rs6000/aix72.h
+++ b/gcc/config/rs6000/aix72.h
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-3.0-or-later
 /* Definitions of target machine for GNU compiler,
    for IBM RS/6000 POWER running AIX V7.2.
-   Copyright (C) 2002-2024 Free Software Foundation, Inc.
+   Copyright (C) 2002-2025 Free Software Foundation, Inc.
    Contributed by David Edelsohn (edelsohn@gnu.org).
 
    This file is part of GCC.
diff --git a/gcc/config/rs6000/aix73.h b/gcc/config/rs6000/aix73.h
index 1318b0b..c763936 100644
--- a/gcc/config/rs6000/aix73.h
+++ b/gcc/config/rs6000/aix73.h
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-3.0-or-later
 /* Definitions of target machine for GNU compiler,
    for IBM RS/6000 POWER running AIX V7.3.
-   Copyright (C) 2002-2024 Free Software Foundation, Inc.
+   Copyright (C) 2002-2025 Free Software Foundation, Inc.
    Contributed by David Edelsohn (edelsohn@gnu.org).
 
    This file is part of GCC.
diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index c9f9486..80a2ab4 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -1,5 +1,5 @@
 /* PowerPC AltiVec include file.
-   Copyright (C) 2002-2024 Free Software Foundation, Inc.
+   Copyright (C) 2002-2025 Free Software Foundation, Inc.
    Contributed by Aldy Hernandez (aldyh@redhat.com).
    Rewritten by Paolo Bonzini (bonzini@gnu.org).
 
@@ -35,7 +35,7 @@
 #endif
 
 /* If __APPLE_ALTIVEC__ is defined, the compiler supports 'vector',
-   'pixel' and 'bool' as context-sensitive AltiVec keywords (in 
+   'pixel' and 'bool' as context-sensitive AltiVec keywords (in
    non-AltiVec contexts, they revert to their original meanings,
    if any), so we do not need to define them as macros.  Also,
    avoid defining them as macros for C++ with strict ANSI, as
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 1f5489b..7edc288 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -1,5 +1,5 @@
 ;; AltiVec patterns.
-;; Copyright (C) 2002-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2002-2025 Free Software Foundation, Inc.
 ;; Contributed by Aldy Hernandez (aldy@quesejoda.com)
 
 ;; This file is part of GCC.
@@ -170,6 +170,7 @@
    UNSPEC_VSTRIL
    UNSPEC_SLDB
    UNSPEC_SRDB
+   UNSPEC_VECTOR_SHIFT
 ])
 
 (define_c_enum "unspecv"
@@ -226,8 +227,7 @@
 (define_mode_attr VI_unit [(V16QI "VECTOR_UNIT_ALTIVEC_P (V16QImode)")
 			   (V8HI "VECTOR_UNIT_ALTIVEC_P (V8HImode)")
 			   (V4SI "VECTOR_UNIT_ALTIVEC_P (V4SImode)")
-			   (V2DI "VECTOR_UNIT_P8_VECTOR_P (V2DImode)")
-			   (V1TI "VECTOR_UNIT_ALTIVEC_P (V1TImode)")])
+			   (V2DI "VECTOR_UNIT_P8_VECTOR_P (V2DImode)")])
 
 ;; Vector pack/unpack
 (define_mode_iterator VP [V2DI V4SI V8HI])
@@ -2176,6 +2176,56 @@
   "vsro %0,%1,%2"
   [(set_attr "type" "vecperm")])
 
+;; Optimize V2DI shifts by constants.  This relies on the shift instructions
+;; only looking at the bits needed to do the shift.  This means we can use
+;; VSPLTISW or XXSPLTIB to load up the constant, and not worry about the bits
+;; that the vector shift instructions will not use.
+(define_mode_iterator VSHIFT_MODE	[(V4SI "TARGET_P9_VECTOR")
+					 (V2DI "TARGET_P8_VECTOR")])
+
+(define_code_iterator vshift_code	[ashift ashiftrt lshiftrt])
+(define_code_attr vshift_attr		[(ashift   "ashift")
+					 (ashiftrt "ashiftrt")
+					 (lshiftrt "lshiftrt")])
+
+(define_insn_and_split "*altivec_<mode>_<vshift_attr>_const"
+  [(set (match_operand:VSHIFT_MODE 0 "register_operand" "=v")
+	(vshift_code:VSHIFT_MODE
+	 (match_operand:VSHIFT_MODE 1 "register_operand" "v")
+	 (match_operand:VSHIFT_MODE 2 "vector_shift_constant" "")))
+   (clobber (match_scratch:VSHIFT_MODE 3 "=&v"))]
+  "((<MODE>mode == V2DImode && TARGET_P8_VECTOR)
+    || (<MODE>mode == V4SImode && TARGET_P9_VECTOR))"
+  "#"
+  "&& 1"
+  [(set (match_dup 3)
+	(unspec:VSHIFT_MODE [(match_dup 4)] UNSPEC_VECTOR_SHIFT))
+   (set (match_dup 0)
+	(vshift_code:VSHIFT_MODE (match_dup 1)
+				 (match_dup 3)))]
+{
+  if (GET_CODE (operands[3]) == SCRATCH)
+    operands[3] = gen_reg_rtx (<MODE>mode);
+
+  operands[4] = GET_CODE (operands[2]) == CONST_VECTOR
+		? CONST_VECTOR_ELT (operands[2], 0)
+		: XEXP (operands[2], 0);
+})
+
+(define_insn "*altivec_<mode>_shift_const"
+  [(set (match_operand:VSHIFT_MODE 0 "register_operand" "=v")
+	(unspec:VSHIFT_MODE [(match_operand 1 "const_int_operand" "n")]
+			    UNSPEC_VECTOR_SHIFT))]
+  "TARGET_P8_VECTOR"
+{
+  if (UINTVAL (operands[1]) <= 15)
+    return "vspltisw %0,%1";
+  else if (TARGET_P9_VECTOR)
+    return "xxspltib %x0,%1";
+  else
+    gcc_unreachable ();
+})
+
 (define_insn "altivec_vsum4ubs"
   [(set (match_operand:V4SI 0 "register_operand" "=v")
         (unspec:V4SI [(match_operand:V16QI 1 "register_operand" "v")
@@ -3698,7 +3748,7 @@
     }
 })
 
-(define_expand "udot_prod<mode>"
+(define_expand "udot_prodv4si<mode>"
   [(set (match_operand:V4SI 0 "register_operand" "=v")
         (plus:V4SI (match_operand:V4SI 3 "register_operand" "v")
                    (unspec:V4SI [(match_operand:VIshort 1 "register_operand" "v")  
@@ -3710,7 +3760,7 @@
   DONE;
 })
 
-(define_expand "sdot_prodv8hi"
+(define_expand "sdot_prodv4siv8hi"
   [(set (match_operand:V4SI 0 "register_operand" "=v")
         (plus:V4SI (match_operand:V4SI 3 "register_operand" "v")
                    (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")
@@ -4376,7 +4426,7 @@
 ;; ISA 2.07 128-bit binary support to target the VMX/altivec registers without
 ;; having to worry about the register allocator deciding GPRs are better.
 
-(define_insn "altivec_vadduqm"
+(define_insn "addv1ti3"
   [(set (match_operand:V1TI 0 "register_operand" "=v")
 	(plus:V1TI (match_operand:V1TI 1 "register_operand" "v")
 		   (match_operand:V1TI 2 "register_operand" "v")))]
@@ -4393,7 +4443,7 @@
   "vaddcuq %0,%1,%2"
   [(set_attr "type" "vecsimple")])
 
-(define_insn "altivec_vsubuqm"
+(define_insn "subv1ti3"
   [(set (match_operand:V1TI 0 "register_operand" "=v")
 	(minus:V1TI (match_operand:V1TI 1 "register_operand" "v")
 		    (match_operand:V1TI 2 "register_operand" "v")))]
diff --git a/gcc/config/rs6000/amo.h b/gcc/config/rs6000/amo.h
index 6b9e4e0..25ab1c7 100644
--- a/gcc/config/rs6000/amo.h
+++ b/gcc/config/rs6000/amo.h
@@ -1,5 +1,5 @@
 /* Power ISA 3.0 atomic memory operation include file.
-   Copyright (C) 2017-2024 Free Software Foundation, Inc.
+   Copyright (C) 2017-2025 Free Software Foundation, Inc.
    Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>.
 
    This file is part of GCC.
@@ -46,7 +46,7 @@ enum _AMO_LD {
   _AMO_LD_CS_NE		= 0x10,		/* Compare and Swap Not Equal.  */
   _AMO_LD_INC_BOUNDED	= 0x18,		/* Fetch and Increment Bounded.  */
   _AMO_LD_INC_EQUAL	= 0x19,		/* Fetch and Increment Equal.  */
-  _AMO_LD_DEC_BOUNDED	= 0x1A		/* Fetch and Decrement Bounded.  */
+  _AMO_LD_DEC_BOUNDED	= 0x1C		/* Fetch and Decrement Bounded.  */
 };
 
 /* Implementation of the simple LWAT/LDAT operations that take one register and
diff --git a/gcc/config/rs6000/biarch64.h b/gcc/config/rs6000/biarch64.h
index 2b5d6d7..a298a50 100644
--- a/gcc/config/rs6000/biarch64.h
+++ b/gcc/config/rs6000/biarch64.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler, for 32/64 bit powerpc.
-   Copyright (C) 2003-2024 Free Software Foundation, Inc.
+   Copyright (C) 2003-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/rs6000/bmi2intrin.h b/gcc/config/rs6000/bmi2intrin.h
index 1326810..dc73787 100644
--- a/gcc/config/rs6000/bmi2intrin.h
+++ b/gcc/config/rs6000/bmi2intrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2011-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/rs6000/bmiintrin.h b/gcc/config/rs6000/bmiintrin.h
index 30abb1c..7213b92 100644
--- a/gcc/config/rs6000/bmiintrin.h
+++ b/gcc/config/rs6000/bmiintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2010-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2010-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/rs6000/cell.md b/gcc/config/rs6000/cell.md
index 953062a..80917e0 100644
--- a/gcc/config/rs6000/cell.md
+++ b/gcc/config/rs6000/cell.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for cell processor.
-;; Copyright (C) 2001-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2001-2025 Free Software Foundation, Inc.
 ;; Contributed by Sony Computer Entertainment, Inc.,
 
 
diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index 369a7b7..4875895 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -1,5 +1,5 @@
 ;; Constraint definitions for RS6000
-;; Copyright (C) 2006-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2006-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/rs6000/crypto.md b/gcc/config/rs6000/crypto.md
index 1f1db6f..11e472b 100644
--- a/gcc/config/rs6000/crypto.md
+++ b/gcc/config/rs6000/crypto.md
@@ -1,5 +1,5 @@
 ;; Cryptographic instructions added in ISA 2.07
-;; Copyright (C) 2012-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2012-2025 Free Software Foundation, Inc.
 ;; Contributed by Michael Meissner (meissner@linux.vnet.ibm.com)
 
 ;; This file is part of GCC.
diff --git a/gcc/config/rs6000/darwin.h b/gcc/config/rs6000/darwin.h
index e8b1949..a0b9ce0 100644
--- a/gcc/config/rs6000/darwin.h
+++ b/gcc/config/rs6000/darwin.h
@@ -1,5 +1,5 @@
 /* Target definitions for PowerPC running Darwin (Mac OS X).
-   Copyright (C) 1997-2024 Free Software Foundation, Inc.
+   Copyright (C) 1997-2025 Free Software Foundation, Inc.
    Contributed by Apple Computer Inc.
 
    This file is part of GCC.
@@ -487,14 +487,14 @@
    default, as kernel code doesn't save/restore those registers.  */
 #define OS_MISSING_ALTIVEC (flag_mkernel || flag_apple_kext)
 
-/* Darwin has support for section anchors on powerpc*.  
+/* Darwin has support for section anchors on powerpc*.
    It is disabled for any section containing a "zero-sized item" (because these
    are re-written as size=1 to be compatible with the OSX ld64).
    The re-writing would interfere with the computation of anchor offsets.
    Therefore, we place zero-sized items in their own sections and make such
    sections unavailable to section anchoring.  */
 
-#undef TARGET_ASM_OUTPUT_ANCHOR 
+#undef TARGET_ASM_OUTPUT_ANCHOR
 #define TARGET_ASM_OUTPUT_ANCHOR darwin_asm_output_anchor
 
 #undef TARGET_USE_ANCHORS_FOR_SYMBOL_P
diff --git a/gcc/config/rs6000/darwin.md b/gcc/config/rs6000/darwin.md
index bfce5bd..cc50ed9 100644
--- a/gcc/config/rs6000/darwin.md
+++ b/gcc/config/rs6000/darwin.md
@@ -1,5 +1,5 @@
 /* Machine description patterns for PowerPC running Darwin (Mac OS X).
-   Copyright (C) 2004-2024 Free Software Foundation, Inc.
+   Copyright (C) 2004-2025 Free Software Foundation, Inc.
    Contributed by Apple Computer Inc.
 
 This file is part of GCC.
diff --git a/gcc/config/rs6000/darwin.opt b/gcc/config/rs6000/darwin.opt
index a01843b..4e6ec52 100644
--- a/gcc/config/rs6000/darwin.opt
+++ b/gcc/config/rs6000/darwin.opt
@@ -1,6 +1,6 @@
 ; Darwin options for PPC port.
 ;
-; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ; Contributed by Aldy Hernandez <aldy@quesejoda.com>.
 ;
 ; This file is part of GCC.
diff --git a/gcc/config/rs6000/darwin32-biarch.h b/gcc/config/rs6000/darwin32-biarch.h
index e0f9ddc..c1960ce 100644
--- a/gcc/config/rs6000/darwin32-biarch.h
+++ b/gcc/config/rs6000/darwin32-biarch.h
@@ -1,6 +1,6 @@
 /* Target definitions for PowerPC running Darwin (Mac OS X) for a 32b host
    with a 64b miultilib.
-   Copyright (C) 2019-2024 Free Software Foundation, Inc.
+   Copyright (C) 2019-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/rs6000/darwin64-biarch.h b/gcc/config/rs6000/darwin64-biarch.h
index cc78956..717a92e 100644
--- a/gcc/config/rs6000/darwin64-biarch.h
+++ b/gcc/config/rs6000/darwin64-biarch.h
@@ -1,6 +1,6 @@
 /* Target definitions for PowerPC64 running Darwin (Mac OS X) for a 64b host
    supporting a 32b multilib.
-   Copyright (C) 2006-2024 Free Software Foundation, Inc.
+   Copyright (C) 2006-2025 Free Software Foundation, Inc.
    Contributed by Apple Computer Inc.
 
    This file is part of GCC.
diff --git a/gcc/config/rs6000/darwin7.h b/gcc/config/rs6000/darwin7.h
index 9419dc5..eb78b96 100644
--- a/gcc/config/rs6000/darwin7.h
+++ b/gcc/config/rs6000/darwin7.h
@@ -1,5 +1,5 @@
 /* Target definitions for Darwin 7.x (Mac OS X) systems.
-   Copyright (C) 2004-2024 Free Software Foundation, Inc.
+   Copyright (C) 2004-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/rs6000/default64.h b/gcc/config/rs6000/default64.h
index 10e3dec..7f6001d 100644
--- a/gcc/config/rs6000/default64.h
+++ b/gcc/config/rs6000/default64.h
@@ -1,6 +1,6 @@
 /* Definitions of target machine for GNU compiler,
    for 64 bit powerpc linux defaulting to -m64.
-   Copyright (C) 2003-2024 Free Software Foundation, Inc.
+   Copyright (C) 2003-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/rs6000/dfp.md b/gcc/config/rs6000/dfp.md
index fa9d7dd..59fa66a 100644
--- a/gcc/config/rs6000/dfp.md
+++ b/gcc/config/rs6000/dfp.md
@@ -1,5 +1,5 @@
 ;; Decimal Floating Point (DFP) patterns.
-;; Copyright (C) 2007-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2007-2025 Free Software Foundation, Inc.
 ;; Contributed by Ben Elliston (bje@au.ibm.com) and Peter Bergner
 ;; (bergner@vnet.ibm.com).
 
diff --git a/gcc/config/rs6000/driver-rs6000.cc b/gcc/config/rs6000/driver-rs6000.cc
index f490072..2ab675e 100644
--- a/gcc/config/rs6000/driver-rs6000.cc
+++ b/gcc/config/rs6000/driver-rs6000.cc
@@ -1,5 +1,5 @@
 /* Subroutines for the gcc driver.
-   Copyright (C) 2007-2024 Free Software Foundation, Inc.
+   Copyright (C) 2007-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -628,7 +628,7 @@ host_detect_local_cpu (int argc, const char **argv)
   arch = strcmp (argv[0], "cpu") == 0;
   if (!arch && strcmp (argv[0], "tune"))
     return NULL;
-  
+
   if (arch)
     cpu = "powerpc";
 
diff --git a/gcc/config/rs6000/e300c2c3.md b/gcc/config/rs6000/e300c2c3.md
index b193009..a1d94c1 100644
--- a/gcc/config/rs6000/e300c2c3.md
+++ b/gcc/config/rs6000/e300c2c3.md
@@ -1,5 +1,5 @@
 ;; Pipeline description for Motorola PowerPC e300c3 core.
-;;   Copyright (C) 2008-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2008-2025 Free Software Foundation, Inc.
 ;;   Contributed by Edmar Wienskoski (edmar@freescale.com)
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/rs6000/e500mc.md b/gcc/config/rs6000/e500mc.md
index 8f8ee48..fe9212d 100644
--- a/gcc/config/rs6000/e500mc.md
+++ b/gcc/config/rs6000/e500mc.md
@@ -1,5 +1,5 @@
 ;; Pipeline description for Motorola PowerPC e500mc core.
-;;   Copyright (C) 2008-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2008-2025 Free Software Foundation, Inc.
 ;;   Contributed by Edmar Wienskoski (edmar@freescale.com)
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/rs6000/e500mc64.md b/gcc/config/rs6000/e500mc64.md
index f76df0c..39c1cd8 100644
--- a/gcc/config/rs6000/e500mc64.md
+++ b/gcc/config/rs6000/e500mc64.md
@@ -1,5 +1,5 @@
 ;; Pipeline description for Freescale PowerPC e500mc64 core.
-;;   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2009-2025 Free Software Foundation, Inc.
 ;;   Contributed by Edmar Wienskoski (edmar@freescale.com)
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/rs6000/e5500.md b/gcc/config/rs6000/e5500.md
index 036d904f4..aee57ac 100644
--- a/gcc/config/rs6000/e5500.md
+++ b/gcc/config/rs6000/e5500.md
@@ -1,5 +1,5 @@
 ;; Pipeline description for Freescale PowerPC e5500 core.
-;;   Copyright (C) 2012-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2012-2025 Free Software Foundation, Inc.
 ;;   Contributed by Edmar Wienskoski (edmar@freescale.com)
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/rs6000/e6500.md b/gcc/config/rs6000/e6500.md
index 2f0eec6..54a8d43 100644
--- a/gcc/config/rs6000/e6500.md
+++ b/gcc/config/rs6000/e6500.md
@@ -1,5 +1,5 @@
 ;; Pipeline description for Freescale PowerPC e6500 core.
-;;   Copyright (C) 2012-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2012-2025 Free Software Foundation, Inc.
 ;;   Contributed by Edmar Wienskoski (edmar@freescale.com)
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/rs6000/eabi.h b/gcc/config/rs6000/eabi.h
index d768459..3cbee36 100644
--- a/gcc/config/rs6000/eabi.h
+++ b/gcc/config/rs6000/eabi.h
@@ -1,6 +1,6 @@
 /* Core target definitions for GNU compiler
    for IBM RS/6000 PowerPC targeted to embedded ELF systems.
-   Copyright (C) 1995-2024 Free Software Foundation, Inc.
+   Copyright (C) 1995-2025 Free Software Foundation, Inc.
    Contributed by Cygnus Support.
 
    This file is part of GCC.
diff --git a/gcc/config/rs6000/eabialtivec.h b/gcc/config/rs6000/eabialtivec.h
index c9ae9e1..1596ba1 100644
--- a/gcc/config/rs6000/eabialtivec.h
+++ b/gcc/config/rs6000/eabialtivec.h
@@ -1,6 +1,6 @@
 /* Core target definitions for GNU compiler
    for PowerPC targeted systems with AltiVec support.
-   Copyright (C) 2001-2024 Free Software Foundation, Inc.
+   Copyright (C) 2001-2025 Free Software Foundation, Inc.
    Contributed by Aldy Hernandez (aldyh@redhat.com).
 
    This file is part of GCC.
diff --git a/gcc/config/rs6000/eabisim.h b/gcc/config/rs6000/eabisim.h
index a0a4fc5..f5b73d3 100644
--- a/gcc/config/rs6000/eabisim.h
+++ b/gcc/config/rs6000/eabisim.h
@@ -1,6 +1,6 @@
 /* Support for GCC on simulated PowerPC systems targeted to embedded ELF
    systems.
-   Copyright (C) 1995-2024 Free Software Foundation, Inc.
+   Copyright (C) 1995-2025 Free Software Foundation, Inc.
    Contributed by Cygnus Support.
 
    This file is part of GCC.
diff --git a/gcc/config/rs6000/emmintrin.h b/gcc/config/rs6000/emmintrin.h
index 1a50cfd..7cde5dc 100644
--- a/gcc/config/rs6000/emmintrin.h
+++ b/gcc/config/rs6000/emmintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2003-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2003-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/rs6000/freebsd.h b/gcc/config/rs6000/freebsd.h
index 0f42dc1..54135be 100644
--- a/gcc/config/rs6000/freebsd.h
+++ b/gcc/config/rs6000/freebsd.h
@@ -1,5 +1,5 @@
 /* Definitions for PowerPC running FreeBSD using the ELF format
-   Copyright (C) 2001-2024 Free Software Foundation, Inc.
+   Copyright (C) 2001-2025 Free Software Foundation, Inc.
    Contributed by David E. O'Brien <obrien@FreeBSD.org> and BSDi.
 
    This file is part of GCC.
@@ -50,7 +50,7 @@
 
 /************************[  Target stuff  ]***********************************/
 
-/* Define the actual types of some ANSI-mandated types.  
+/* Define the actual types of some ANSI-mandated types.
    Needs to agree with <machine/ansi.h>.  GCC defaults come from c-decl.cc,
    c-common.cc, and config/<arch>/<arch>.h.  */
 
diff --git a/gcc/config/rs6000/freebsd64.h b/gcc/config/rs6000/freebsd64.h
index 6740170..f5c64d1 100644
--- a/gcc/config/rs6000/freebsd64.h
+++ b/gcc/config/rs6000/freebsd64.h
@@ -1,5 +1,5 @@
 /* Definitions for 64-bit PowerPC running FreeBSD using the ELF format
-   Copyright (C) 2012-2024 Free Software Foundation, Inc.
+   Copyright (C) 2012-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -237,7 +237,7 @@ extern int dot_symbols;
 
 /************************[  Target stuff  ]***********************************/
 
-/* Define the actual types of some ANSI-mandated types.  
+/* Define the actual types of some ANSI-mandated types.
    Needs to agree with <machine/ansi.h>.  GCC defaults come from c-decl.cc,
    c-common.cc, and config/<arch>/<arch>.h.  */
 
diff --git a/gcc/config/rs6000/fusion.md b/gcc/config/rs6000/fusion.md
index 4ed9ae1..621b346 100644
--- a/gcc/config/rs6000/fusion.md
+++ b/gcc/config/rs6000/fusion.md
@@ -1,6 +1,6 @@
 ;; Generated automatically by genfusion.pl
 
-;; Copyright (C) 2020-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2020-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/rs6000/genfusion.pl b/gcc/config/rs6000/genfusion.pl
index 2271be1..e5d3b1e 100755
--- a/gcc/config/rs6000/genfusion.pl
+++ b/gcc/config/rs6000/genfusion.pl
@@ -1,7 +1,7 @@
 #!/usr/bin/perl
 # Generate fusion.md
 #
-# Copyright (C) 2020-2024 Free Software Foundation, Inc.
+# Copyright (C) 2020-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
@@ -25,7 +25,7 @@ use strict;
 print <<'EOF';
 ;; Generated automatically by genfusion.pl
 
-;; Copyright (C) 2020-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2020-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/rs6000/genopt.sh b/gcc/config/rs6000/genopt.sh
index ea7b36e..76d047b 100755
--- a/gcc/config/rs6000/genopt.sh
+++ b/gcc/config/rs6000/genopt.sh
@@ -1,6 +1,6 @@
 #!/bin/sh
 # Generate rs6000-tables.opt from the list of CPUs in rs6000-cpus.def.
-# Copyright (C) 2011-2024 Free Software Foundation, Inc.
+# Copyright (C) 2011-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
@@ -22,7 +22,7 @@ cat <<EOF
 ; -*- buffer-read-only: t -*-
 ; Generated automatically by genopt.sh from rs6000-cpus.def.
 
-; Copyright (C) 2011-2024 Free Software Foundation, Inc.
+; Copyright (C) 2011-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/rs6000/host-darwin.cc b/gcc/config/rs6000/host-darwin.cc
index e000177..b7411bf 100644
--- a/gcc/config/rs6000/host-darwin.cc
+++ b/gcc/config/rs6000/host-darwin.cc
@@ -1,5 +1,5 @@
 /* Darwin/powerpc host-specific hook definitions.
-   Copyright (C) 2003-2024 Free Software Foundation, Inc.
+   Copyright (C) 2003-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -92,7 +92,7 @@ segv_handler (int sig ATTRIBUTE_UNUSED,
       || (faulting_insn & 0xFC1F8000) == 0xBC018000 /* stmw xxx, -yyy(%r1) */)
     {
       char *shell_name;
-      
+
       fnotice (stderr, "Out of stack space.\n");
       shell_name = getenv ("SHELL");
       if (shell_name != NULL)
@@ -109,23 +109,23 @@ segv_handler (int sig ATTRIBUTE_UNUSED,
 	    { "zsh", "limit stacksize 32m" }
 	  };
 	  size_t i;
-	  
+
 	  for (i = 0; i < ARRAY_SIZE (shell_commands); i++)
 	    if (strcmp (shell_commands[i][0], shell_name + 1) == 0)
 	      {
-		fnotice (stderr, 
+		fnotice (stderr,
 			 "Try running '%s' in the shell to raise its limit.\n",
 			 shell_commands[i][1]);
 	      }
 	}
-      
+
       if (global_dc->m_abort_on_error)
 	fancy_abort (__FILE__, __LINE__, __FUNCTION__);
 
       exit (FATAL_EXIT_CODE);
     }
 
-  fprintf (stderr, "[address=%08lx pc=%08x]\n", 
+  fprintf (stderr, "[address=%08lx pc=%08x]\n",
 	   uc->uc_mcontext->MC_FLD(es).MC_FLD(dar),
 	   uc->uc_mcontext->MC_FLD(ss).MC_FLD(srr0));
   internal_error ("segmentation fault");
@@ -147,7 +147,7 @@ darwin_rs6000_extra_signals (void)
   sigemptyset(&sact.sa_mask);
   sact.sa_flags = SA_ONSTACK | SA_SIGINFO;
   sact.sa_sigaction = segv_handler;
-  if (sigaction (SIGSEGV, &sact, 0) < 0) 
+  if (sigaction (SIGSEGV, &sact, 0) < 0)
     fatal_error (input_location, "While setting up signal handler: %m");
 }
 
diff --git a/gcc/config/rs6000/host-ppc64-darwin.cc b/gcc/config/rs6000/host-ppc64-darwin.cc
index 242f806f..d6f0272 100644
--- a/gcc/config/rs6000/host-ppc64-darwin.cc
+++ b/gcc/config/rs6000/host-ppc64-darwin.cc
@@ -1,5 +1,5 @@
 /* ppc64-darwin host-specific hook definitions.
-   Copyright (C) 2006-2024 Free Software Foundation, Inc.
+   Copyright (C) 2006-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/rs6000/htm.md b/gcc/config/rs6000/htm.md
index 398138f..7de64e7 100644
--- a/gcc/config/rs6000/htm.md
+++ b/gcc/config/rs6000/htm.md
@@ -1,5 +1,5 @@
 ;; Hardware Transactional Memory (HTM) patterns.
-;; Copyright (C) 2013-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2013-2025 Free Software Foundation, Inc.
 ;; Contributed by Peter Bergner <bergner@vnet.ibm.com>.
 
 ;; This file is part of GCC.
diff --git a/gcc/config/rs6000/htmintrin.h b/gcc/config/rs6000/htmintrin.h
index 4d60cbf..66b91a5 100644
--- a/gcc/config/rs6000/htmintrin.h
+++ b/gcc/config/rs6000/htmintrin.h
@@ -1,5 +1,5 @@
 /* Hardware Transactional Memory (HTM) intrinsics.
-   Copyright (C) 2013-2024 Free Software Foundation, Inc.
+   Copyright (C) 2013-2025 Free Software Foundation, Inc.
    Contributed by Peter Bergner <bergner@vnet.ibm.com>.
 
    This file is free software; you can redistribute it and/or modify it under
diff --git a/gcc/config/rs6000/htmxlintrin.h b/gcc/config/rs6000/htmxlintrin.h
index 4c4b3ee..08cb1ae 100644
--- a/gcc/config/rs6000/htmxlintrin.h
+++ b/gcc/config/rs6000/htmxlintrin.h
@@ -1,5 +1,5 @@
 /* XL compiler Hardware Transactional Memory (HTM) execution intrinsics.
-   Copyright (C) 2013-2024 Free Software Foundation, Inc.
+   Copyright (C) 2013-2025 Free Software Foundation, Inc.
    Contributed by Peter Bergner <bergner@vnet.ibm.com>.
 
    This file is free software; you can redistribute it and/or modify it under
diff --git a/gcc/config/rs6000/immintrin.h b/gcc/config/rs6000/immintrin.h
index 89cc411..4a356a7 100644
--- a/gcc/config/rs6000/immintrin.h
+++ b/gcc/config/rs6000/immintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2021-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2021-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/rs6000/linux.h b/gcc/config/rs6000/linux.h
index 5f6cede..6c4480b 100644
--- a/gcc/config/rs6000/linux.h
+++ b/gcc/config/rs6000/linux.h
@@ -1,6 +1,6 @@
 /* Definitions of target machine for GNU compiler,
    for PowerPC machines running Linux.
-   Copyright (C) 1996-2024 Free Software Foundation, Inc.
+   Copyright (C) 1996-2025 Free Software Foundation, Inc.
    Contributed by Michael Meissner (meissner@cygnus.com).
 
    This file is part of GCC.
@@ -116,7 +116,7 @@
 /* We are 32-bit all the time, so optimize a little.  */
 #undef TARGET_64BIT
 #define TARGET_64BIT 0
- 
+
 /* We don't need to generate entries in .fixup, except when
    -mrelocatable or -mrelocatable-lib is given.  */
 #undef RELOCATABLE_NEEDS_FIXUP
diff --git a/gcc/config/rs6000/linux64.h b/gcc/config/rs6000/linux64.h
index 655d105..0316d8c 100644
--- a/gcc/config/rs6000/linux64.h
+++ b/gcc/config/rs6000/linux64.h
@@ -1,6 +1,6 @@
 /* Definitions of target machine for GNU compiler,
    for 64 bit PowerPC linux.
-   Copyright (C) 2000-2024 Free Software Foundation, Inc.
+   Copyright (C) 2000-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/rs6000/linux64.opt b/gcc/config/rs6000/linux64.opt
index 2acf228..67fc145 100644
--- a/gcc/config/rs6000/linux64.opt
+++ b/gcc/config/rs6000/linux64.opt
@@ -1,6 +1,6 @@
 ; Options for 64-bit PowerPC Linux.
 ;
-; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ; Contributed by Aldy Hernandez <aldy@quesejoda.com>.
 ;
 ; This file is part of GCC.
diff --git a/gcc/config/rs6000/linuxaltivec.h b/gcc/config/rs6000/linuxaltivec.h
index 7dbe31f..c8670eb 100644
--- a/gcc/config/rs6000/linuxaltivec.h
+++ b/gcc/config/rs6000/linuxaltivec.h
@@ -1,6 +1,6 @@
 /* Definitions of target machine for GNU compiler,
    for AltiVec enhanced PowerPC machines running GNU/Linux.
-   Copyright (C) 2001-2024 Free Software Foundation, Inc.
+   Copyright (C) 2001-2025 Free Software Foundation, Inc.
    Contributed by Aldy Hernandez (aldyh@redhat.com).
 
    This file is part of GCC.
diff --git a/gcc/config/rs6000/lynx.h b/gcc/config/rs6000/lynx.h
index e6fdda0..f6dd319 100644
--- a/gcc/config/rs6000/lynx.h
+++ b/gcc/config/rs6000/lynx.h
@@ -1,5 +1,5 @@
 /* Definitions for Rs6000 running LynxOS.
-   Copyright (C) 1995-2024 Free Software Foundation, Inc.
+   Copyright (C) 1995-2025 Free Software Foundation, Inc.
    Contributed by David Henkel-Wallace, Cygnus Support (gumby@cygnus.com)
    Rewritten by Adam Nemet, LynuxWorks Inc.
 
diff --git a/gcc/config/rs6000/mm_malloc.h b/gcc/config/rs6000/mm_malloc.h
index f32f8e1..5361240 100644
--- a/gcc/config/rs6000/mm_malloc.h
+++ b/gcc/config/rs6000/mm_malloc.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2004-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2004-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/rs6000/mma.md b/gcc/config/rs6000/mma.md
index 04e2d00..85f3a92 100644
--- a/gcc/config/rs6000/mma.md
+++ b/gcc/config/rs6000/mma.md
@@ -1,5 +1,5 @@
 ;; Matrix-Multiply Assist (MMA) patterns.
-;; Copyright (C) 2020-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2020-2025 Free Software Foundation, Inc.
 ;; Contributed by Peter Bergner <bergner@linux.ibm.com> and
 ;;		  Michael Meissner <meissner@linux.ibm.com>
 ;;
@@ -30,7 +30,6 @@
 
 (define_c_enum "unspec"
   [UNSPEC_VSX_ASSEMBLE
-   UNSPEC_MMA_EXTRACT
    UNSPEC_MMA_PMXVBF16GER2
    UNSPEC_MMA_PMXVBF16GER2NN
    UNSPEC_MMA_PMXVBF16GER2NP
@@ -398,29 +397,8 @@
    (match_operand 2 "const_0_to_1_operand")]
   "TARGET_MMA"
 {
-  rtx src;
-  int regoff = INTVAL (operands[2]);
-  src = gen_rtx_UNSPEC (V16QImode,
-			gen_rtvec (2, operands[1], GEN_INT (regoff)),
-			UNSPEC_MMA_EXTRACT);
-  emit_move_insn (operands[0], src);
-  DONE;
-})
-
-(define_insn_and_split "*vsx_disassemble_pair"
-  [(set (match_operand:V16QI 0 "mma_disassemble_output_operand" "=mwa")
-       (unspec:V16QI [(match_operand:OO 1 "vsx_register_operand" "wa")
-		      (match_operand 2 "const_0_to_1_operand")]
-		      UNSPEC_MMA_EXTRACT))]
-  "TARGET_MMA
-   && vsx_register_operand (operands[1], OOmode)"
-  "#"
-  "&& reload_completed"
-  [(const_int 0)]
-{
-  int reg = REGNO (operands[1]);
-  int regoff = INTVAL (operands[2]);
-  rtx src = gen_rtx_REG (V16QImode, reg + regoff);
+  int regoff = INTVAL (operands[2]) * 16;
+  rtx src = simplify_gen_subreg (V16QImode, operands[1], OOmode, regoff);
   emit_move_insn (operands[0], src);
   DONE;
 })
@@ -472,29 +450,8 @@
    (match_operand 2 "const_0_to_3_operand")]
   "TARGET_MMA"
 {
-  rtx src;
-  int regoff = INTVAL (operands[2]);
-  src = gen_rtx_UNSPEC (V16QImode,
-			gen_rtvec (2, operands[1], GEN_INT (regoff)),
-			UNSPEC_MMA_EXTRACT);
-  emit_move_insn (operands[0], src);
-  DONE;
-})
-
-(define_insn_and_split "*mma_disassemble_acc"
-  [(set (match_operand:V16QI 0 "mma_disassemble_output_operand" "=mwa")
-       (unspec:V16QI [(match_operand:XO 1 "fpr_reg_operand" "d")
-		      (match_operand 2 "const_0_to_3_operand")]
-		      UNSPEC_MMA_EXTRACT))]
-  "TARGET_MMA
-   && fpr_reg_operand (operands[1], XOmode)"
-  "#"
-  "&& reload_completed"
-  [(const_int 0)]
-{
-  int reg = REGNO (operands[1]);
-  int regoff = INTVAL (operands[2]);
-  rtx src = gen_rtx_REG (V16QImode, reg + regoff);
+  int regoff = INTVAL (operands[2]) * 16;
+  rtx src = simplify_gen_subreg (V16QImode, operands[1], XOmode, regoff);
   emit_move_insn (operands[0], src);
   DONE;
 })
diff --git a/gcc/config/rs6000/mmintrin.h b/gcc/config/rs6000/mmintrin.h
index c7988c1..8f00b01 100644
--- a/gcc/config/rs6000/mmintrin.h
+++ b/gcc/config/rs6000/mmintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2002-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -743,14 +743,14 @@ _mm_cmpeq_pi8 (__m64 __m1, __m64 __m2)
   __mu1.as_m64 = __m1;
   __mu2.as_m64 = __m2;
 
-  __res.as_char[0] = (__mu1.as_char[0] == __mu2.as_char[0])? -1: 0;
-  __res.as_char[1] = (__mu1.as_char[1] == __mu2.as_char[1])? -1: 0;
-  __res.as_char[2] = (__mu1.as_char[2] == __mu2.as_char[2])? -1: 0;
-  __res.as_char[3] = (__mu1.as_char[3] == __mu2.as_char[3])? -1: 0;
-  __res.as_char[4] = (__mu1.as_char[4] == __mu2.as_char[4])? -1: 0;
-  __res.as_char[5] = (__mu1.as_char[5] == __mu2.as_char[5])? -1: 0;
-  __res.as_char[6] = (__mu1.as_char[6] == __mu2.as_char[6])? -1: 0;
-  __res.as_char[7] = (__mu1.as_char[7] == __mu2.as_char[7])? -1: 0;
+  __res.as_char[0] = (__mu1.as_char[0] == __mu2.as_char[0]) ? -1 : 0;
+  __res.as_char[1] = (__mu1.as_char[1] == __mu2.as_char[1]) ? -1 : 0;
+  __res.as_char[2] = (__mu1.as_char[2] == __mu2.as_char[2]) ? -1 : 0;
+  __res.as_char[3] = (__mu1.as_char[3] == __mu2.as_char[3]) ? -1 : 0;
+  __res.as_char[4] = (__mu1.as_char[4] == __mu2.as_char[4]) ? -1 : 0;
+  __res.as_char[5] = (__mu1.as_char[5] == __mu2.as_char[5]) ? -1 : 0;
+  __res.as_char[6] = (__mu1.as_char[6] == __mu2.as_char[6]) ? -1 : 0;
+  __res.as_char[7] = (__mu1.as_char[7] == __mu2.as_char[7]) ? -1 : 0;
 
   return (__m64) __res.as_m64;
 #endif
@@ -778,14 +778,14 @@ _mm_cmpgt_pi8 (__m64 __m1, __m64 __m2)
   __mu1.as_m64 = __m1;
   __mu2.as_m64 = __m2;
 
-  __res.as_char[0] = (__mu1.as_char[0] > __mu2.as_char[0])? -1: 0;
-  __res.as_char[1] = (__mu1.as_char[1] > __mu2.as_char[1])? -1: 0;
-  __res.as_char[2] = (__mu1.as_char[2] > __mu2.as_char[2])? -1: 0;
-  __res.as_char[3] = (__mu1.as_char[3] > __mu2.as_char[3])? -1: 0;
-  __res.as_char[4] = (__mu1.as_char[4] > __mu2.as_char[4])? -1: 0;
-  __res.as_char[5] = (__mu1.as_char[5] > __mu2.as_char[5])? -1: 0;
-  __res.as_char[6] = (__mu1.as_char[6] > __mu2.as_char[6])? -1: 0;
-  __res.as_char[7] = (__mu1.as_char[7] > __mu2.as_char[7])? -1: 0;
+  __res.as_char[0] = (__mu1.as_char[0] > __mu2.as_char[0]) ? -1 : 0;
+  __res.as_char[1] = (__mu1.as_char[1] > __mu2.as_char[1]) ? -1 : 0;
+  __res.as_char[2] = (__mu1.as_char[2] > __mu2.as_char[2]) ? -1 : 0;
+  __res.as_char[3] = (__mu1.as_char[3] > __mu2.as_char[3]) ? -1 : 0;
+  __res.as_char[4] = (__mu1.as_char[4] > __mu2.as_char[4]) ? -1 : 0;
+  __res.as_char[5] = (__mu1.as_char[5] > __mu2.as_char[5]) ? -1 : 0;
+  __res.as_char[6] = (__mu1.as_char[6] > __mu2.as_char[6]) ? -1 : 0;
+  __res.as_char[7] = (__mu1.as_char[7] > __mu2.as_char[7]) ? -1 : 0;
 
   return (__m64) __res.as_m64;
 #endif
@@ -815,10 +815,10 @@ _mm_cmpeq_pi16 (__m64 __m1, __m64 __m2)
   __mu1.as_m64 = __m1;
   __mu2.as_m64 = __m2;
 
-  __res.as_short[0] = (__mu1.as_short[0] == __mu2.as_short[0])? -1: 0;
-  __res.as_short[1] = (__mu1.as_short[1] == __mu2.as_short[1])? -1: 0;
-  __res.as_short[2] = (__mu1.as_short[2] == __mu2.as_short[2])? -1: 0;
-  __res.as_short[3] = (__mu1.as_short[3] == __mu2.as_short[3])? -1: 0;
+  __res.as_short[0] = (__mu1.as_short[0] == __mu2.as_short[0]) ? -1 : 0;
+  __res.as_short[1] = (__mu1.as_short[1] == __mu2.as_short[1]) ? -1 : 0;
+  __res.as_short[2] = (__mu1.as_short[2] == __mu2.as_short[2]) ? -1 : 0;
+  __res.as_short[3] = (__mu1.as_short[3] == __mu2.as_short[3]) ? -1 : 0;
 
   return (__m64) __res.as_m64;
 #endif
@@ -846,10 +846,10 @@ _mm_cmpgt_pi16 (__m64 __m1, __m64 __m2)
   __mu1.as_m64 = __m1;
   __mu2.as_m64 = __m2;
 
-  __res.as_short[0] = (__mu1.as_short[0] > __mu2.as_short[0])? -1: 0;
-  __res.as_short[1] = (__mu1.as_short[1] > __mu2.as_short[1])? -1: 0;
-  __res.as_short[2] = (__mu1.as_short[2] > __mu2.as_short[2])? -1: 0;
-  __res.as_short[3] = (__mu1.as_short[3] > __mu2.as_short[3])? -1: 0;
+  __res.as_short[0] = (__mu1.as_short[0] > __mu2.as_short[0]) ? -1 : 0;
+  __res.as_short[1] = (__mu1.as_short[1] > __mu2.as_short[1]) ? -1 : 0;
+  __res.as_short[2] = (__mu1.as_short[2] > __mu2.as_short[2]) ? -1 : 0;
+  __res.as_short[3] = (__mu1.as_short[3] > __mu2.as_short[3]) ? -1 : 0;
 
   return (__m64) __res.as_m64;
 #endif
@@ -879,8 +879,8 @@ _mm_cmpeq_pi32 (__m64 __m1, __m64 __m2)
   __mu1.as_m64 = __m1;
   __mu2.as_m64 = __m2;
 
-  __res.as_int[0] = (__mu1.as_int[0] == __mu2.as_int[0])? -1: 0;
-  __res.as_int[1] = (__mu1.as_int[1] == __mu2.as_int[1])? -1: 0;
+  __res.as_int[0] = (__mu1.as_int[0] == __mu2.as_int[0]) ? -1 : 0;
+  __res.as_int[1] = (__mu1.as_int[1] == __mu2.as_int[1]) ? -1 : 0;
 
   return (__m64) __res.as_m64;
 #endif
@@ -908,8 +908,8 @@ _mm_cmpgt_pi32 (__m64 __m1, __m64 __m2)
   __mu1.as_m64 = __m1;
   __mu2.as_m64 = __m2;
 
-  __res.as_int[0] = (__mu1.as_int[0] > __mu2.as_int[0])? -1: 0;
-  __res.as_int[1] = (__mu1.as_int[1] > __mu2.as_int[1])? -1: 0;
+  __res.as_int[0] = (__mu1.as_int[0] > __mu2.as_int[0]) ? -1 : 0;
+  __res.as_int[1] = (__mu1.as_int[1] > __mu2.as_int[1]) ? -1 : 0;
 
   return (__m64) __res.as_m64;
 #endif
diff --git a/gcc/config/rs6000/mpc.md b/gcc/config/rs6000/mpc.md
index 8027b61..e941aaf 100644
--- a/gcc/config/rs6000/mpc.md
+++ b/gcc/config/rs6000/mpc.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for Motorola PowerPC processor cores.
-;;   Copyright (C) 2003-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2003-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/rs6000/netbsd.h b/gcc/config/rs6000/netbsd.h
index 71b0a5c..1f02641 100644
--- a/gcc/config/rs6000/netbsd.h
+++ b/gcc/config/rs6000/netbsd.h
@@ -1,6 +1,6 @@
 /* Definitions of target machine for GNU compiler,
    for PowerPC NetBSD systems.
-   Copyright (C) 2002-2024 Free Software Foundation, Inc.
+   Copyright (C) 2002-2025 Free Software Foundation, Inc.
    Contributed by Wasabi Systems, Inc.
 
    This file is part of GCC.
diff --git a/gcc/config/rs6000/nmmintrin.h b/gcc/config/rs6000/nmmintrin.h
index 49a2c1c..d19bcbc 100644
--- a/gcc/config/rs6000/nmmintrin.h
+++ b/gcc/config/rs6000/nmmintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2021-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2021-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/rs6000/option-defaults.h b/gcc/config/rs6000/option-defaults.h
index 0c0205e..20bcb2c 100644
--- a/gcc/config/rs6000/option-defaults.h
+++ b/gcc/config/rs6000/option-defaults.h
@@ -1,5 +1,5 @@
 /* Definitions of default options for config/rs6000 configurations.
-   Copyright (C) 1992-2024 Free Software Foundation, Inc.
+   Copyright (C) 1992-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/rs6000/pcrel-opt.md b/gcc/config/rs6000/pcrel-opt.md
index 0a6600d..86daea4 100644
--- a/gcc/config/rs6000/pcrel-opt.md
+++ b/gcc/config/rs6000/pcrel-opt.md
@@ -1,5 +1,5 @@
 ;; Machine description for the PCREL_OPT optimization.
-;; Copyright (C) 2020-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2020-2025 Free Software Foundation, Inc.
 ;; Contributed by Michael Meissner (meissner@linux.ibm.com)
 
 ;; This file is part of GCC.
diff --git a/gcc/config/rs6000/pmmintrin.h b/gcc/config/rs6000/pmmintrin.h
index 4d56f62..7d269fc 100644
--- a/gcc/config/rs6000/pmmintrin.h
+++ b/gcc/config/rs6000/pmmintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2003-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2003-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/rs6000/power10.md b/gcc/config/rs6000/power10.md
index 2310c46..fd31b16 100644
--- a/gcc/config/rs6000/power10.md
+++ b/gcc/config/rs6000/power10.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for the IBM Power10 and Power11 processors.
-;; Copyright (C) 2020-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2020-2025 Free Software Foundation, Inc.
 ;;
 ;; Contributed by Pat Haugen (pthaugen@us.ibm.com).
 
diff --git a/gcc/config/rs6000/power4.md b/gcc/config/rs6000/power4.md
index 4021b3a..7824430 100644
--- a/gcc/config/rs6000/power4.md
+++ b/gcc/config/rs6000/power4.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for IBM Power4 and PowerPC 970 processors.
-;;   Copyright (C) 2003-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2003-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/rs6000/power5.md b/gcc/config/rs6000/power5.md
index 67078bc..d4df5ad 100644
--- a/gcc/config/rs6000/power5.md
+++ b/gcc/config/rs6000/power5.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for IBM POWER5 processor.
-;;   Copyright (C) 2003-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2003-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/rs6000/power6.md b/gcc/config/rs6000/power6.md
index 86cd3b7..d0fd170 100644
--- a/gcc/config/rs6000/power6.md
+++ b/gcc/config/rs6000/power6.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for IBM POWER6 processor.
-;;   Copyright (C) 2006-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2006-2025 Free Software Foundation, Inc.
 ;;   Contributed by Peter Steinmetz (steinmtz@us.ibm.com)
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/rs6000/power7.md b/gcc/config/rs6000/power7.md
index ddd6f15..bc94fa1 100644
--- a/gcc/config/rs6000/power7.md
+++ b/gcc/config/rs6000/power7.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for IBM POWER7 processor.
-;; Copyright (C) 2009-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2009-2025 Free Software Foundation, Inc.
 ;;
 ;; Contributed by Pat Haugen (pthaugen@us.ibm.com).
 
diff --git a/gcc/config/rs6000/power8.md b/gcc/config/rs6000/power8.md
index 0418bc5..9912b9c 100644
--- a/gcc/config/rs6000/power8.md
+++ b/gcc/config/rs6000/power8.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for IBM POWER8 processor.
-;; Copyright (C) 2013-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2013-2025 Free Software Foundation, Inc.
 ;;
 ;; Contributed by Pat Haugen (pthaugen@us.ibm.com).
 
diff --git a/gcc/config/rs6000/power9.md b/gcc/config/rs6000/power9.md
index a9140d0..3f1e772 100644
--- a/gcc/config/rs6000/power9.md
+++ b/gcc/config/rs6000/power9.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for IBM POWER9 processor.
-;; Copyright (C) 2016-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2016-2025 Free Software Foundation, Inc.
 ;;
 ;; Contributed by Pat Haugen (pthaugen@us.ibm.com).
 
diff --git a/gcc/config/rs6000/ppc-asm.h b/gcc/config/rs6000/ppc-asm.h
index 07554c2..2865346 100644
--- a/gcc/config/rs6000/ppc-asm.h
+++ b/gcc/config/rs6000/ppc-asm.h
@@ -1,6 +1,6 @@
 /* PowerPC asm definitions for GNU C.
 
-Copyright (C) 2002-2024 Free Software Foundation, Inc.
+Copyright (C) 2002-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/rs6000/ppc-auxv.h b/gcc/config/rs6000/ppc-auxv.h
index ed269e3..2a0a5a7 100644
--- a/gcc/config/rs6000/ppc-auxv.h
+++ b/gcc/config/rs6000/ppc-auxv.h
@@ -1,7 +1,7 @@
 /* PowerPC support for accessing the AUXV AT_PLATFORM, AT_HWCAP and AT_HWCAP2
    values from the Thread Control Block (TCB).
 
-   Copyright (C) 2016-2024 Free Software Foundation, Inc.
+   Copyright (C) 2016-2025 Free Software Foundation, Inc.
    Contributed by Peter Bergner <bergner@vnet.ibm.com>.
 
    This file is part of GCC.
diff --git a/gcc/config/rs6000/ppu_intrinsics.h b/gcc/config/rs6000/ppu_intrinsics.h
index 4d91c72..d908019 100644
--- a/gcc/config/rs6000/ppu_intrinsics.h
+++ b/gcc/config/rs6000/ppu_intrinsics.h
@@ -1,5 +1,5 @@
 /* PPU intrinsics as defined by the C/C++ Language extension for Cell BEA.
-   Copyright (C) 2007-2024 Free Software Foundation, Inc.
+   Copyright (C) 2007-2025 Free Software Foundation, Inc.
 
    This file is free software; you can redistribute it and/or modify it under
    the terms of the GNU General Public License as published by the Free
@@ -34,7 +34,7 @@
 
 #ifdef __cplusplus
 extern "C" {
-#endif 
+#endif
 
 /*
  * unsigned int __cntlzw(unsigned int)
@@ -113,7 +113,7 @@ extern "C" {
  * void __mtfsb1(int)
  * double __setflm(double)
  *
- * dcbt intrinsics 
+ * dcbt intrinsics
  * void __protected_unlimited_stream_set (unsigned int direction, const void *add, unsigned int ID)
  * void __protected_stream_set (unsigned int direction, const void *add, unsigned int ID)
  * void __protected_stream_stop_all (void)
@@ -178,7 +178,7 @@ typedef int __V4SI __attribute__((vector_size(16)));
 #ifdef __powerpc64__
 #define __mtspr(spr, value) \
   __asm__ volatile ("mtspr %0,%1" : : "n" (spr), "r" (value))
-  
+
 #define __mfspr(spr) __extension__				\
   ({ unsigned long long result;					\
   __asm__ volatile ("mfspr %0,%1" : "=r" (result) : "n" (spr)); \
@@ -211,7 +211,7 @@ typedef int __V4SI __attribute__((vector_size(16)));
 
 #define __dcbf(base) \
   __asm__ volatile ("dcbf %y0" : "=Z" (*(__V4SI*) (base)) : : "memory")
-  
+
 #define __dcbz(base) \
   __asm__ volatile ("dcbz %y0" : "=Z" (*(__V4SI*) (base)) : : "memory")
 
@@ -226,7 +226,7 @@ typedef int __V4SI __attribute__((vector_size(16)));
 
 #define __icbi(base) \
   __asm__ volatile ("icbi %y0" : "=Z" (*(__V4SI*) (base)) : : "memory")
-  
+
 #define __dcbt_TH1000(EATRUNC, D, UG, ID)				\
   __asm__ volatile ("dcbt %y0,8"					\
 	   : "=Z" (*(__V4SI*) (__SIZE_TYPE__)((((__SIZE_TYPE__) (EATRUNC)) & ~0x7F)	\
@@ -390,7 +390,7 @@ typedef int __V4SI __attribute__((vector_size(16)));
 
 #define __mtfsf(mask,value) \
   __asm__ volatile ("mtfsf %0,%1" : : "n" (mask), "d" ((double) (value)))
-  
+
 #define __mtfsfi(bits,field) \
   __asm__ volatile ("mtfsfi %0,%1" : : "n" (bits), "n" (field))
 
@@ -406,10 +406,10 @@ typedef int __V4SI __attribute__((vector_size(16)));
 
 /* __builtin_fabs may perform unnecessary rounding.  */
 
-/* Rename __fabs and __fabsf to work around internal prototypes defined 
-   in bits/mathcalls.h with some glibc versions.  */ 
-#define __fabs __ppu_fabs 
-#define __fabsf __ppu_fabsf 
+/* Rename __fabs and __fabsf to work around internal prototypes defined
+   in bits/mathcalls.h with some glibc versions.  */
+#define __fabs __ppu_fabs
+#define __fabsf __ppu_fabsf
 
 static __inline__ double __fabs(double x) __attribute__((always_inline));
 static __inline__ double
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 7f0b4ab..647e89a 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -1,5 +1,5 @@
 ;; Predicate definitions for POWER and PowerPC.
-;; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
@@ -861,6 +861,69 @@
     return op == CONST0_RTX (mode) || op == CONSTM1_RTX (mode);
 })
 
+;; Return 1 if the operand is a V2DI or V4SI const_vector, where each element
+;; is the same constant, and the constant can be used for a shift operation.
+;; This is to prevent sub-optimal code, that needs to load up the constant and
+;; then zero extend it 32 or 64-bit vectors or load up the constant from the
+;; literal pool.
+;;
+;; For V4SImode, we only recognize shifts by 16..31 on ISA 3.0, since shifts by
+;; 1..15 can be handled by the normal VSPLTISW and vector shift instruction.
+;; For V2DImode, we do this all of the time, since there is no convenient
+;; instruction to load up a vector long long splatted constant.
+;;
+;; If we can use XXSPLTIB, then allow constants up to 63.  If not, we restrict
+;; the constant to 0..15 that can be loaded with VSPLTISW.  V4SI shifts are
+;; only optimized for ISA 3.0 when the shift value is >= 16 and <= 31.  Values
+;; between 0 and 15 can use a normal VSPLTISW to load the value, and it doesn't
+;; need this optimization.
+(define_predicate "vector_shift_constant"
+  (match_code "const_vector,vec_duplicate")
+{
+  unsigned HOST_WIDE_INT min_value;
+
+  if (mode == V2DImode)
+    {
+      min_value = 0;
+      if (!TARGET_P8_VECTOR)
+	return 0;
+    }
+  else if (mode == V4SImode)
+    {
+      min_value = 16;
+      if (!TARGET_P9_VECTOR)
+	return 0;
+    }
+  else
+    return 0;
+
+  unsigned HOST_WIDE_INT max_value = TARGET_P9_VECTOR ? 63 : 15;
+
+  if (GET_CODE (op) == CONST_VECTOR)
+    {
+      unsigned HOST_WIDE_INT first = UINTVAL (CONST_VECTOR_ELT (op, 0));
+      unsigned nunits = GET_MODE_NUNITS (mode);
+      unsigned i;
+
+      if (!IN_RANGE (first, min_value, max_value))
+	return 0;
+
+      for (i = 1; i < nunits; i++)
+	if (first != UINTVAL (CONST_VECTOR_ELT (op, i)))
+	  return 0;
+
+      return 1;
+    }
+  else
+    {
+      rtx op0 = XEXP (op, 0);
+      if (!CONST_INT_P (op0))
+	return 0;
+
+      return IN_RANGE (UINTVAL (op0), min_value, max_value);
+    }
+})
+
 ;; Return 1 if operand is 0.0.
 (define_predicate "zero_fp_constant"
   (and (match_code "const_double")
diff --git a/gcc/config/rs6000/rbtree.cc b/gcc/config/rs6000/rbtree.cc
index 4247b93..0313e05 100644
--- a/gcc/config/rs6000/rbtree.cc
+++ b/gcc/config/rs6000/rbtree.cc
@@ -1,5 +1,5 @@
 /* Partial red-black tree implementation for rs6000-gen-builtins.cc.
-   Copyright (C) 2020-2024 Free Software Foundation, Inc.
+   Copyright (C) 2020-2025 Free Software Foundation, Inc.
    Contributed by Bill Schmidt, IBM <wschmidt@linux.ibm.com>
 
 This file is part of GCC.
diff --git a/gcc/config/rs6000/rbtree.h b/gcc/config/rs6000/rbtree.h
index 07fd8c0..e97508a 100644
--- a/gcc/config/rs6000/rbtree.h
+++ b/gcc/config/rs6000/rbtree.h
@@ -1,5 +1,5 @@
 /* Partial red-black tree implementation for rs6000-gen-builtins.cc.
-   Copyright (C) 2020-2024 Free Software Foundation, Inc.
+   Copyright (C) 2020-2025 Free Software Foundation, Inc.
    Contributed by Bill Schmidt, IBM <wschmidt@linux.ibm.com>
 
 This file is part of GCC.
diff --git a/gcc/config/rs6000/rs6000-builtin.cc b/gcc/config/rs6000/rs6000-builtin.cc
index 9bdbae1..bc1580f 100644
--- a/gcc/config/rs6000/rs6000-builtin.cc
+++ b/gcc/config/rs6000/rs6000-builtin.cc
@@ -4,7 +4,7 @@
    Note that "normal" builtins (generic math functions, etc.) are handled
    in rs6000.c.
 
-   Copyright (C) 2002-2024 Free Software Foundation, Inc.
+   Copyright (C) 2002-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -915,7 +915,7 @@ fold_build_vec_cmp (tree_code code, tree type, tree arg0, tree arg1,
   tree cmp_type = truth_type_for (type);
   tree zero_vec = build_zero_cst (type);
   tree minus_one_vec = build_minus_one_cst (type);
-  tree temp = create_tmp_reg_or_ssa_name (cmp_type);
+  tree temp = make_ssa_name (cmp_type);
   gimple *g = gimple_build_assign (temp, code, arg0, arg1);
   gsi_insert_before (gsi, g, GSI_SAME_STMT);
   return fold_build3 (VEC_COND_EXPR, type, temp, minus_one_vec, zero_vec);
@@ -1106,7 +1106,7 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi,
       if (TREE_TYPE (src_ptr) != src_type)
 	src_ptr = build1 (NOP_EXPR, src_type, src_ptr);
 
-      tree src = create_tmp_reg_or_ssa_name (TREE_TYPE (src_type));
+      tree src = make_ssa_name (TREE_TYPE (src_type));
       gimplify_assign (src, build_simple_mem_ref (src_ptr), &new_seq);
 
       /* If we are not disassembling an accumulator/pair or our destination is
@@ -1130,7 +1130,7 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi,
 	{
 	  new_decl = rs6000_builtin_decls[RS6000_BIF_XXMFACC_INTERNAL];
 	  new_call = gimple_build_call (new_decl, 1, src);
-	  src = create_tmp_reg_or_ssa_name (vector_quad_type_node);
+	  src = make_ssa_name (vector_quad_type_node);
 	  gimple_call_set_lhs (new_call, src);
 	  gimple_seq_add_stmt (&new_seq, new_call);
 	}
@@ -1146,7 +1146,7 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi,
 	  unsigned index = WORDS_BIG_ENDIAN ? i : nvec - 1 - i;
 	  tree dst = build2 (MEM_REF, unsigned_V16QI_type_node, dst_base,
 			     build_int_cst (dst_type, index * 16));
-	  tree dstssa = create_tmp_reg_or_ssa_name (unsigned_V16QI_type_node);
+	  tree dstssa = make_ssa_name (unsigned_V16QI_type_node);
 	  new_call = gimple_build_call (new_decl, 2, src,
 					build_int_cstu (uint16_type_node, i));
 	  gimple_call_set_lhs (new_call, dstssa);
@@ -1204,7 +1204,7 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi,
     {
       /* This built-in has a pass-by-reference accumulator input, so load it
 	 into a temporary accumulator for use as a pass-by-value input.  */
-      op[0] = create_tmp_reg_or_ssa_name (vector_quad_type_node);
+      op[0] = make_ssa_name (vector_quad_type_node);
       for (unsigned i = 1; i < nopnds; i++)
 	op[i] = gimple_call_arg (stmt, i);
       gimplify_assign (op[0], build_simple_mem_ref (acc), &new_seq);
@@ -1252,9 +1252,9 @@ rs6000_gimple_fold_mma_builtin (gimple_stmt_iterator *gsi,
     }
 
   if (fncode == RS6000_BIF_BUILD_PAIR || fncode == RS6000_BIF_ASSEMBLE_PAIR_V)
-    lhs = create_tmp_reg_or_ssa_name (vector_pair_type_node);
+    lhs = make_ssa_name (vector_pair_type_node);
   else
-    lhs = create_tmp_reg_or_ssa_name (vector_quad_type_node);
+    lhs = make_ssa_name (vector_quad_type_node);
   gimple_call_set_lhs (new_call, lhs);
   gimple_seq_add_stmt (&new_seq, new_call);
   gimplify_assign (build_simple_mem_ref (acc), lhs, &new_seq);
@@ -1450,7 +1450,7 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
       arg0 = gimple_call_arg (stmt, 0);
       arg1 = gimple_call_arg (stmt, 1);
       lhs = gimple_call_lhs (stmt);
-      temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
+      temp = make_ssa_name (TREE_TYPE (arg1));
       g = gimple_build_assign (temp, BIT_NOT_EXPR, arg1);
       gimple_set_location (g, gimple_location (stmt));
       gsi_insert_before (gsi, g, GSI_SAME_STMT);
@@ -1472,7 +1472,7 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
       arg0 = gimple_call_arg (stmt, 0);
       arg1 = gimple_call_arg (stmt, 1);
       lhs = gimple_call_lhs (stmt);
-      temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
+      temp = make_ssa_name (TREE_TYPE (arg1));
       g = gimple_build_assign (temp, BIT_AND_EXPR, arg0, arg1);
       gimple_set_location (g, gimple_location (stmt));
       gsi_insert_before (gsi, g, GSI_SAME_STMT);
@@ -1512,7 +1512,7 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
       arg0 = gimple_call_arg (stmt, 0);
       arg1 = gimple_call_arg (stmt, 1);
       lhs = gimple_call_lhs (stmt);
-      temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
+      temp = make_ssa_name (TREE_TYPE (arg1));
       g = gimple_build_assign (temp, BIT_NOT_EXPR, arg1);
       gimple_set_location (g, gimple_location (stmt));
       gsi_insert_before (gsi, g, GSI_SAME_STMT);
@@ -1552,7 +1552,7 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
       arg0 = gimple_call_arg (stmt, 0);
       arg1 = gimple_call_arg (stmt, 1);
       lhs = gimple_call_lhs (stmt);
-      temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
+      temp = make_ssa_name (TREE_TYPE (arg1));
       g = gimple_build_assign (temp, BIT_IOR_EXPR, arg0, arg1);
       gimple_set_location (g, gimple_location (stmt));
       gsi_insert_before (gsi, g, GSI_SAME_STMT);
@@ -1643,7 +1643,7 @@ rs6000_gimple_fold_builtin (gimple_stmt_iterator *gsi)
       arg0 = gimple_call_arg (stmt, 0);
       arg1 = gimple_call_arg (stmt, 1);
       lhs = gimple_call_lhs (stmt);
-      temp = create_tmp_reg_or_ssa_name (TREE_TYPE (arg1));
+      temp = make_ssa_name (TREE_TYPE (arg1));
       g = gimple_build_assign (temp, BIT_XOR_EXPR, arg0, arg1);
       gimple_set_location (g, gimple_location (stmt));
       gsi_insert_before (gsi, g, GSI_SAME_STMT);
@@ -3441,7 +3441,7 @@ rs6000_expand_builtin (tree exp, rtx target, rtx /* subtarget */,
     }
 
   /* Check for restricted constant arguments.  */
-  for (int i = 0; i < 2; i++)
+  for (size_t i = 0; i < ARRAY_SIZE (bifaddr->restr); i++)
     {
       switch (bifaddr->restr[i])
 	{
@@ -3459,7 +3459,7 @@ rs6000_expand_builtin (tree exp, rtx target, rtx /* subtarget */,
 		error ("argument %d must be a literal between 0 and %d,"
 		       " inclusive",
 		       bifaddr->restr_opnd[i], p);
-		return CONST0_RTX (mode[0]);
+		return const0_rtx;
 	      }
 	    break;
 	  }
@@ -3476,7 +3476,7 @@ rs6000_expand_builtin (tree exp, rtx target, rtx /* subtarget */,
 		       " inclusive",
 		       bifaddr->restr_opnd[i], bifaddr->restr_val1[i],
 		       bifaddr->restr_val2[i]);
-		return CONST0_RTX (mode[0]);
+		return const0_rtx;
 	      }
 	    break;
 	  }
@@ -3493,7 +3493,7 @@ rs6000_expand_builtin (tree exp, rtx target, rtx /* subtarget */,
 		       "between %d and %d, inclusive",
 		       bifaddr->restr_opnd[i], bifaddr->restr_val1[i],
 		       bifaddr->restr_val2[i]);
-		return CONST0_RTX (mode[0]);
+		return const0_rtx;
 	      }
 	    break;
 	  }
@@ -3509,7 +3509,7 @@ rs6000_expand_builtin (tree exp, rtx target, rtx /* subtarget */,
 		       "literal %d",
 		       bifaddr->restr_opnd[i], bifaddr->restr_val1[i],
 		       bifaddr->restr_val2[i]);
-		return CONST0_RTX (mode[0]);
+		return const0_rtx;
 	      }
 	    break;
 	  }
diff --git a/gcc/config/rs6000/rs6000-builtins.def b/gcc/config/rs6000/rs6000-builtins.def
index 0e9dc05..555d7d5 100644
--- a/gcc/config/rs6000/rs6000-builtins.def
+++ b/gcc/config/rs6000/rs6000-builtins.def
@@ -1,5 +1,5 @@
 ; Built-in functions for PowerPC.
-; Copyright (C) 2020-2024 Free Software Foundation, Inc.
+; Copyright (C) 2020-2025 Free Software Foundation, Inc.
 ; Contributed by Bill Schmidt, IBM <wschmidt@linux.ibm.com>
 ;
 ; This file is part of GCC.
@@ -1472,12 +1472,6 @@
   const vf __builtin_vsx_uns_floato_v2di (vsll);
     UNS_FLOATO_V2DI unsfloatov2di {}
 
-  const vss __builtin_vsx_vperm_8hi (vss, vss, vuc);
-    VPERM_8HI_X altivec_vperm_v8hi {}
-
-  const vus __builtin_vsx_vperm_8hi_uns (vus, vus, vuc);
-    VPERM_8HI_UNS_X altivec_vperm_v8hi_uns {}
-
   const vsll __builtin_vsx_vsigned_v2df (vd);
     VEC_VSIGNED_V2DF vsx_xvcvdpsxds {}
 
@@ -1622,9 +1616,6 @@
   const vf __builtin_vsx_xvcvuxdsp (vull);
     XVCVUXDSP vsx_xvcvuxdsp {}
 
-  const vd __builtin_vsx_xvcvuxwdp (vsi);
-    XVCVUXWDP vsx_xvcvuxwdp {}
-
   const vf __builtin_vsx_xvcvuxwsp (vsi);
     XVCVUXWSP vsx_floatunsv4siv4sf2 {}
 
@@ -2012,7 +2003,7 @@
     VADDUDM addv2di3 {}
 
   const vsq __builtin_altivec_vadduqm (vsq, vsq);
-    VADDUQM altivec_vadduqm {}
+    VADDUQM addv1ti3 {}
 
   const vsll __builtin_altivec_vbpermq (vsc, vsc);
     VBPERMQ altivec_vbpermq {}
@@ -2150,7 +2141,7 @@
     VSUBUDM subv2di3 {}
 
   const vsq __builtin_altivec_vsubuqm (vsq, vsq);
-    VSUBUQM altivec_vsubuqm {}
+    VSUBUQM subv1ti3 {}
 
   const vsll __builtin_altivec_vupkhsw (vsi);
     VUPKHSW altivec_vupkhsw {}
@@ -3687,11 +3678,11 @@
     PMXVI4GER8_INTERNAL mma_pmxvi4ger8 {mma}
 
   void __builtin_mma_pmxvi4ger8pp (v512 *, vuc, vuc, const int<4>, \
-                                   const int<4>, const int<4>);
+                                   const int<4>, const int<8>);
     PMXVI4GER8PP nothing {mma,quad,mmaint}
 
   v512 __builtin_mma_pmxvi4ger8pp_internal (v512, vuc, vuc, const int<4>, \
-                                            const int<4>, const int<4>);
+                                            const int<4>, const int<8>);
     PMXVI4GER8PP_INTERNAL mma_pmxvi4ger8pp {mma,quad}
 
   void __builtin_mma_pmxvi8ger4 (v512 *, vuc, vuc, const int<4>, \
diff --git a/gcc/config/rs6000/rs6000-c.cc b/gcc/config/rs6000/rs6000-c.cc
index 04882c3..d3b0a56 100644
--- a/gcc/config/rs6000/rs6000-c.cc
+++ b/gcc/config/rs6000/rs6000-c.cc
@@ -1,5 +1,5 @@
 /* Subroutines for the C front end on the PowerPC architecture.
-   Copyright (C) 2002-2024 Free Software Foundation, Inc.
+   Copyright (C) 2002-2025 Free Software Foundation, Inc.
 
    Contributed by Zack Weinberg <zack@codesourcery.com>
    and Paolo Bonzini <bonzini@gnu.org>
@@ -808,7 +808,7 @@ is_float128_p (tree t)
 	      && TARGET_LONG_DOUBLE_128
 	      && t == long_double_type_node));
 }
-  
+
 
 /* Return true iff ARGTYPE can be compatibly passed as PARMTYPE.  */
 static bool
@@ -1680,7 +1680,7 @@ find_instance (bool *unsupported_builtin, int *instance,
 
 tree
 altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
-				    void *passed_arglist)
+				    void *passed_arglist, bool)
 {
   rs6000_gen_builtins fcode
     = (rs6000_gen_builtins) DECL_MD_FUNCTION_CODE (fndecl);
diff --git a/gcc/config/rs6000/rs6000-call.cc b/gcc/config/rs6000/rs6000-call.cc
index a039ff7..8fe5652 100644
--- a/gcc/config/rs6000/rs6000-call.cc
+++ b/gcc/config/rs6000/rs6000-call.cc
@@ -1,6 +1,6 @@
 /* Subroutines used to generate function calls and handle built-in
    instructions on IBM RS/6000.
-   Copyright (C) 1991-2024 Free Software Foundation, Inc.
+   Copyright (C) 1991-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -407,15 +407,15 @@ rs6000_discover_homogeneous_aggregate (machine_mode mode, const_tree type,
 
    The AIX ABI for the RS/6000 specifies that all structures are
    returned in memory.  The Darwin ABI does the same.
-   
+
    For the Darwin 64 Bit ABI, a function result can be returned in
    registers or in memory, depending on the size of the return data
    type.  If it is returned in registers, the value occupies the same
    registers as it would if it were the first and only function
    argument.  Otherwise, the function places its result in memory at
    the location pointed to by GPR3.
-   
-   The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4, 
+
+   The SVR4 ABI specifies that structures <= 8 bytes are returned in r3/r4,
    but a draft put them in memory, and GCC used to implement the draft
    instead of the final standard.  Therefore, aix_struct_return
    controls this instead of DEFAULT_ABI; V.4 targets needing backward
@@ -1045,10 +1045,10 @@ int
 rs6000_darwin64_struct_check_p (machine_mode mode, const_tree type)
 {
   return rs6000_darwin64_abi
-	 && ((mode == BLKmode 
-	      && TREE_CODE (type) == RECORD_TYPE 
+	 && ((mode == BLKmode
+	      && TREE_CODE (type) == RECORD_TYPE
 	      && int_size_in_bytes (type) > 0)
-	  || (type && TREE_CODE (type) == RECORD_TYPE 
+	  || (type && TREE_CODE (type) == RECORD_TYPE
 	      && int_size_in_bytes (type) == 8)) ? 1 : 0;
 }
 
@@ -1178,7 +1178,7 @@ rs6000_function_arg_advance_1 (CUMULATIVE_ARGS *cum, machine_mode mode,
 	    {
 	      fprintf (stderr, "function_adv: words = %2d, align=%d, size=%d",
 		       cum->words, TYPE_ALIGN (type), size);
-	      fprintf (stderr, 
+	      fprintf (stderr,
 	           "nargs = %4d, proto = %d, mode = %4s (darwin64 abi)\n",
 		       cum->nargs_prototype, cum->prototype,
 		       GET_MODE_NAME (mode));
@@ -2568,9 +2568,9 @@ rs6000_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
   /* We need to deal with the fact that the darwin ppc64 ABI is defined by an
      earlier version of gcc, with the property that it always applied alignment
      adjustments to the va-args (even for zero-sized types).  The cheapest way
-     to deal with this is to replicate the effect of the part of 
-     std_gimplify_va_arg_expr that carries out the align adjust, for the case 
-     of relevance.  
+     to deal with this is to replicate the effect of the part of
+     std_gimplify_va_arg_expr that carries out the align adjust, for the case
+     of relevance.
      We don't need to check for pass-by-reference because of the test above.
      We can return a simplifed answer, since we know there's no offset to add.  */
 
diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def
index 84fac8b..4a10376 100644
--- a/gcc/config/rs6000/rs6000-cpus.def
+++ b/gcc/config/rs6000/rs6000-cpus.def
@@ -1,5 +1,5 @@
 /* IBM RS/6000 CPU names..
-   Copyright (C) 1991-2024 Free Software Foundation, Inc.
+   Copyright (C) 1991-2025 Free Software Foundation, Inc.
    Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
 
    This file is part of GCC.
@@ -68,12 +68,6 @@
 				  | OPTION_MASK_P9_VECTOR)		\
 				 & ~OTHER_FUSION_MASKS)
 
-/* Support for the IEEE 128-bit floating point hardware requires a lot of the
-   VSX instructions that are part of ISA 3.0.  */
-#define ISA_3_0_MASKS_IEEE	(OPTION_MASK_VSX			\
-				 | OPTION_MASK_P8_VECTOR		\
-				 | OPTION_MASK_P9_VECTOR)
-
 /* Flags that need to be turned off if -mno-power10.  */
 /* We comment out PCREL_OPT here to disable it by default because SPEC2017
    performance was degraded by it.  */
diff --git a/gcc/config/rs6000/rs6000-d.cc b/gcc/config/rs6000/rs6000-d.cc
index c9e1aca..4a632a4 100644
--- a/gcc/config/rs6000/rs6000-d.cc
+++ b/gcc/config/rs6000/rs6000-d.cc
@@ -1,5 +1,5 @@
 /* Subroutines for the D front end on the PowerPC architecture.
-   Copyright (C) 2017-2024 Free Software Foundation, Inc.
+   Copyright (C) 2017-2025 Free Software Foundation, Inc.
 
 GCC is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
diff --git a/gcc/config/rs6000/rs6000-d.h b/gcc/config/rs6000/rs6000-d.h
index b790739..8ee20f0 100644
--- a/gcc/config/rs6000/rs6000-d.h
+++ b/gcc/config/rs6000/rs6000-d.h
@@ -1,5 +1,5 @@
 /* Definitions for the D front end on the PowerPC architecture.
-   Copyright (C) 2022-2024 Free Software Foundation, Inc.
+   Copyright (C) 2022-2025 Free Software Foundation, Inc.
 
 GCC is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
diff --git a/gcc/config/rs6000/rs6000-gen-builtins.cc b/gcc/config/rs6000/rs6000-gen-builtins.cc
index 1e2e780..f77087e 100644
--- a/gcc/config/rs6000/rs6000-gen-builtins.cc
+++ b/gcc/config/rs6000/rs6000-gen-builtins.cc
@@ -1,5 +1,5 @@
 /* Generate built-in function initialization and recognition for Power.
-   Copyright (C) 2020-2024 Free Software Foundation, Inc.
+   Copyright (C) 2020-2025 Free Software Foundation, Inc.
    Contributed by Bill Schmidt, IBM <wschmidt@linux.ibm.com>
 
 This file is part of GCC.
diff --git a/gcc/config/rs6000/rs6000-internal.h b/gcc/config/rs6000/rs6000-internal.h
index 3a6cc31..a4e9279 100644
--- a/gcc/config/rs6000/rs6000-internal.h
+++ b/gcc/config/rs6000/rs6000-internal.h
@@ -1,6 +1,6 @@
 /* Internal to rs6000 type, variable, and function declarations and
    definitons shared between the various rs6000 source files.
-   Copyright (C) 1991-2024 Free Software Foundation, Inc.
+   Copyright (C) 1991-2025 Free Software Foundation, Inc.
    Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
 
    This file is part of GCC.
@@ -149,7 +149,7 @@ extern machine_mode rs6000_promote_function_mode (const_tree type ATTRIBUTE_UNUS
 						  machine_mode mode,
 						  int *punsignedp ATTRIBUTE_UNUSED,
 						  const_tree, int);
-extern bool rs6000_return_in_memory (const_tree type, 
+extern bool rs6000_return_in_memory (const_tree type,
 				     const_tree fntype ATTRIBUTE_UNUSED);
 extern bool rs6000_return_in_msb (const_tree valtype);
 extern bool rs6000_pass_by_reference (cumulative_args_t,
diff --git a/gcc/config/rs6000/rs6000-linux.cc b/gcc/config/rs6000/rs6000-linux.cc
index 13f76d6..3bf5a94 100644
--- a/gcc/config/rs6000/rs6000-linux.cc
+++ b/gcc/config/rs6000/rs6000-linux.cc
@@ -1,5 +1,5 @@
 /* Functions for Linux on PowerPC.
-   Copyright (C) 2013-2024 Free Software Foundation, Inc.
+   Copyright (C) 2013-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/rs6000/rs6000-logue.cc b/gcc/config/rs6000/rs6000-logue.cc
index fdb6414..5377ad6 100644
--- a/gcc/config/rs6000/rs6000-logue.cc
+++ b/gcc/config/rs6000/rs6000-logue.cc
@@ -1,6 +1,6 @@
 /* Subroutines used to generate function prologues and epilogues
    on IBM RS/6000.
-   Copyright (C) 1991-2024 Free Software Foundation, Inc.
+   Copyright (C) 1991-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -918,7 +918,7 @@ rs6000_stack_info (void)
   else if (DEFAULT_ABI == ABI_V4)
     info->push_p = non_fixed_size != 0;
 
-  else if (frame_pointer_needed)
+  else if (frame_pointer_needed || cfun->machine->asm_redzone_clobber_seen)
     info->push_p = 1;
 
   else
@@ -1376,7 +1376,7 @@ rs6000_emit_eh_reg_restore (rtx source, rtx scratch)
   /* Freeze lr_save_p.  We've just emitted rtl that depends on the
      state of lr_save_p so any change from here on would be a bug.  In
      particular, stop rs6000_ra_ever_killed from considering the SET
-     of lr we may have added just above.  */ 
+     of lr we may have added just above.  */
   cfun->machine->lr_save_state = info->lr_save_p + 1;
 }
 
@@ -1462,7 +1462,7 @@ rs6000_emit_stack_tie (rtx fp, bool hard_frame_needed)
 /* Allocate SIZE_INT bytes on the stack using a store with update style insn
    and set the appropriate attributes for the generated insn.  Return the
    first insn which adjusts the stack pointer or the last insn before
-   the stack adjustment loop. 
+   the stack adjustment loop.
 
    SIZE_INT is used to create the CFI note for the allocation.
 
@@ -1487,7 +1487,7 @@ rs6000_emit_allocate_stack_1 (HOST_WIDE_INT size_int, rtx orig_sp)
       try_split (PATTERN (insn), insn, 0);
       size_rtx = tmp_reg;
     }
-  
+
   if (TARGET_32BIT)
     insn = emit_insn (gen_movsi_update_stack (stack_pointer_rtx,
 					      stack_pointer_rtx,
@@ -4005,8 +4005,8 @@ rs6000_output_function_prologue (FILE *file)
 
       unsigned short patch_area_size = crtl->patch_area_size;
       unsigned short patch_area_entry = crtl->patch_area_entry;
-      /* Need to emit the patching area.  */
-      if (patch_area_size > 0)
+      /* Emit non-split patching area now.  */
+      if (!TARGET_SPLIT_PATCH_NOPS && patch_area_size > 0)
 	{
 	  cfun->machine->global_entry_emitted = true;
 	  /* As ELFv2 ABI shows, the allowable bytes between the global
@@ -4027,7 +4027,6 @@ rs6000_output_function_prologue (FILE *file)
 		       patch_area_entry);
 	      rs6000_print_patchable_function_entry (file, patch_area_entry,
 						     true);
-	      patch_area_size -= patch_area_entry;
 	    }
 	}
 
@@ -4037,9 +4036,13 @@ rs6000_output_function_prologue (FILE *file)
       assemble_name (file, name);
       fputs ("\n", file);
       /* Emit the nops after local entry.  */
-      if (patch_area_size > 0)
-	rs6000_print_patchable_function_entry (file, patch_area_size,
-					       patch_area_entry == 0);
+      if (patch_area_size > patch_area_entry)
+	{
+	  patch_area_size -= patch_area_entry;
+	  cfun->machine->stop_patch_area_print = false;
+	  rs6000_print_patchable_function_entry (file, patch_area_size,
+						 patch_area_entry == 0);
+	}
     }
 
   else if (rs6000_pcrel_p ())
@@ -4689,7 +4692,7 @@ rs6000_emit_epilogue (enum epilogue_type epilogue_type)
 
 	      if (newptr_regno != 1 && REGNO (frame_reg_rtx) != newptr_regno)
 		frame_reg_rtx = gen_rtx_REG (Pmode, newptr_regno);
-		
+
 	      if (end_save + ptr_off != 0)
 		{
 		  rtx offset = GEN_INT (end_save + ptr_off);
@@ -5348,6 +5351,8 @@ rs6000_output_function_epilogue (FILE *file)
 	i = 1;
       else if (! strcmp (language_string, "GNU Ada"))
 	i = 3;
+      else if (! strcmp (language_string, "GCC COBOL"))
+	i = 7;
       else if (! strcmp (language_string, "GNU Modula-2"))
 	i = 8;
       else if (lang_GNU_CXX ()
diff --git a/gcc/config/rs6000/rs6000-modes.def b/gcc/config/rs6000/rs6000-modes.def
index b69593c..f89e4ef 100644
--- a/gcc/config/rs6000/rs6000-modes.def
+++ b/gcc/config/rs6000/rs6000-modes.def
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler, for IBM RS/6000.
-   Copyright (C) 2002-2024 Free Software Foundation, Inc.
+   Copyright (C) 2002-2025 Free Software Foundation, Inc.
    Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
 
    This file is part of GCC.
diff --git a/gcc/config/rs6000/rs6000-opts.h b/gcc/config/rs6000/rs6000-opts.h
index 88e3578..c31d297 100644
--- a/gcc/config/rs6000/rs6000-opts.h
+++ b/gcc/config/rs6000/rs6000-opts.h
@@ -1,6 +1,6 @@
 /* Definitions of target machine needed for option handling for GNU compiler,
    for IBM RS/6000.
-   Copyright (C) 2010-2024 Free Software Foundation, Inc.
+   Copyright (C) 2010-2025 Free Software Foundation, Inc.
    Contributed by Michael Meissner (meissner@linux.vnet.ibm.com)
 
    This file is part of GCC.
diff --git a/gcc/config/rs6000/rs6000-overload.def b/gcc/config/rs6000/rs6000-overload.def
index 87495ad..b4266c5 100644
--- a/gcc/config/rs6000/rs6000-overload.def
+++ b/gcc/config/rs6000/rs6000-overload.def
@@ -1,5 +1,5 @@
 ; Overloaded built-in functions for PowerPC.
-; Copyright (C) 2020-2024 Free Software Foundation, Inc.
+; Copyright (C) 2020-2025 Free Software Foundation, Inc.
 ; Contributed by Bill Schmidt, IBM <wschmidt@linux.ibm.com>
 ;
 ; This file is part of GCC.
@@ -4403,12 +4403,20 @@
     XXEVAL  XXEVAL_VUQ
 
 [VEC_TEST_LSBB_ALL_ONES, vec_test_lsbb_all_ones, __builtin_vec_xvtlsbb_all_ones]
+  signed int __builtin_vec_xvtlsbb_all_ones (vsc);
+    XVTLSBB_ONES LSBB_ALL_ONES_VSC
   signed int __builtin_vec_xvtlsbb_all_ones (vuc);
-    XVTLSBB_ONES
+    XVTLSBB_ONES LSBB_ALL_ONES_VUC
+  signed int __builtin_vec_xvtlsbb_all_ones (vbc);
+    XVTLSBB_ONES LSBB_ALL_ONES_VBC
 
 [VEC_TEST_LSBB_ALL_ZEROS, vec_test_lsbb_all_zeros, __builtin_vec_xvtlsbb_all_zeros]
+  signed int __builtin_vec_xvtlsbb_all_zeros (vsc);
+    XVTLSBB_ZEROS LSBB_ALL_ZEROS_VSC
   signed int __builtin_vec_xvtlsbb_all_zeros (vuc);
-    XVTLSBB_ZEROS
+    XVTLSBB_ZEROS LSBB_ALL_ZEROS_VUC
+  signed int __builtin_vec_xvtlsbb_all_zeros (vbc);
+    XVTLSBB_ZEROS LSBB_ALL_ZEROS_VBC
 
 [VEC_TRUNC, vec_trunc, __builtin_vec_trunc]
   vf __builtin_vec_trunc (vf);
diff --git a/gcc/config/rs6000/rs6000-p8swap.cc b/gcc/config/rs6000/rs6000-p8swap.cc
index 05fb760..e92f010 100644
--- a/gcc/config/rs6000/rs6000-p8swap.cc
+++ b/gcc/config/rs6000/rs6000-p8swap.cc
@@ -1,6 +1,6 @@
 /* Subroutines used to remove unnecessary doubleword swaps
    for p8 little-endian VSX code.
-   Copyright (C) 1991-2024 Free Software Foundation, Inc.
+   Copyright (C) 1991-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -133,7 +133,7 @@
    already in a register.  In some cases, this mask may be a constant
    that we can discover with ud-chains, in which case the above
    transformation is ok.  However, the common usage here is for the
-   mask to be produced by an UNSPEC_LVSL, in which case the mask 
+   mask to be produced by an UNSPEC_LVSL, in which case the mask
    cannot be known at compile time.  In such a case we would have to
    generate several instructions to compute M' as above at run time,
    and a cost model is needed again.
@@ -634,7 +634,7 @@ v2df_reduction_p (rtx op)
 {
   if (GET_MODE (op) != V2DFmode)
     return false;
-  
+
   enum rtx_code code = GET_CODE (op);
   if (code != PLUS && code != SMIN && code != SMAX)
     return false;
@@ -913,7 +913,7 @@ insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn,
 	    return 0;
 	  if (GET_CODE (XEXP (lhs, 0)) == AND)
 	    return 0;
-	  
+
 	  *special = SH_NOSWAP_ST;
 	  return 1;
 	}
@@ -1355,7 +1355,7 @@ adjust_vperm (rtx_insn *insn)
 	break;
       }
   gcc_assert (swap_insn);
-  
+
   /* Find the load.  */
   insn_info = DF_INSN_INFO_GET (swap_insn);
   rtx_insn *load_insn = 0;
@@ -2094,7 +2094,7 @@ alignment_with_canonical_addr (rtx align)
   return gen_rtx_AND (GET_MODE (align), canon, GEN_INT (-16));
 }
 
-/* Check whether an rtx is an alignment mask, and if so, return 
+/* Check whether an rtx is an alignment mask, and if so, return
    a fully-expanded rtx for the masking operation.  */
 static rtx
 alignment_mask (rtx_insn *insn)
@@ -2397,7 +2397,7 @@ recombine_lvx_stvx_patterns (function *fun)
 	remove_insn (to_delete[i].replace_insn);
 	to_delete[i].replace_insn->set_deleted ();
       }
-  
+
   free (to_delete);
 }
 
diff --git a/gcc/config/rs6000/rs6000-passes.def b/gcc/config/rs6000/rs6000-passes.def
index 46a0d0b..7b14988 100644
--- a/gcc/config/rs6000/rs6000-passes.def
+++ b/gcc/config/rs6000/rs6000-passes.def
@@ -1,5 +1,5 @@
 /* Description of target passes for rs6000
-   Copyright (C) 2016-2024 Free Software Foundation, Inc.
+   Copyright (C) 2016-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/rs6000/rs6000-pcrel-opt.cc b/gcc/config/rs6000/rs6000-pcrel-opt.cc
index 0959021..06a0803 100644
--- a/gcc/config/rs6000/rs6000-pcrel-opt.cc
+++ b/gcc/config/rs6000/rs6000-pcrel-opt.cc
@@ -1,5 +1,5 @@
 /* Subroutines used support the pc-relative linker optimization.
-   Copyright (C) 2020-2024 Free Software Foundation, Inc.
+   Copyright (C) 2020-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index b40557a..234eb0a 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler, for IBM RS/6000.
-   Copyright (C) 2000-2024 Free Software Foundation, Inc.
+   Copyright (C) 2000-2025 Free Software Foundation, Inc.
    Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
 
    This file is part of GCC.
@@ -266,7 +266,8 @@ extern unsigned int rs6000_special_round_type_align (tree, unsigned int,
 						     unsigned int);
 extern unsigned int darwin_rs6000_special_round_type_align (tree, unsigned int,
 							    unsigned int);
-extern tree altivec_resolve_overloaded_builtin (location_t, tree, void *);
+extern tree altivec_resolve_overloaded_builtin (location_t, tree, void *,
+						bool = true);
 extern rtx rs6000_libcall_value (machine_mode);
 extern rtx rs6000_va_arg (tree, tree);
 extern int function_ok_for_sibcall (tree);
diff --git a/gcc/config/rs6000/rs6000-string.cc b/gcc/config/rs6000/rs6000-string.cc
index 55b4133..3d2911c 100644
--- a/gcc/config/rs6000/rs6000-string.cc
+++ b/gcc/config/rs6000/rs6000-string.cc
@@ -1,6 +1,6 @@
 /* Subroutines used to expand string and block move, clear,
    compare and other operations for PowerPC.
-   Copyright (C) 1991-2024 Free Software Foundation, Inc.
+   Copyright (C) 1991-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -1337,7 +1337,7 @@ expand_compare_loop (rtx operands[])
 	{
 	  /* If remainder length < word length, branch to final
 	     cleanup compare.  */
-	  
+
 	  if (!bytes_is_const)
 	    {
 	      do_ifelse (CCmode, LT, cmp_rem, GEN_INT (load_mode_size),
@@ -2695,7 +2695,7 @@ gen_lvx_v4si_move (rtx dest, rtx src)
 
   if (MEM_P (dest))
     return gen_altivec_stvx_v4si_internal (dest, src);
-  else 
+  else
     return gen_altivec_lvx_v4si_internal (dest, src);
 }
 
@@ -2918,7 +2918,7 @@ expand_block_move (rtx operands[], bool might_overlap)
 	    emit_insn (stores[i]);
 	  num_reg = 0;
 	}
-	
+
     }
 
   return 1;
diff --git a/gcc/config/rs6000/rs6000-tables.opt b/gcc/config/rs6000/rs6000-tables.opt
index a5649fe..f5bbed5 100644
--- a/gcc/config/rs6000/rs6000-tables.opt
+++ b/gcc/config/rs6000/rs6000-tables.opt
@@ -1,7 +1,7 @@
 ; -*- buffer-read-only: t -*-
 ; Generated automatically by genopt.sh from rs6000-cpus.def.
 
-; Copyright (C) 2011-2024 Free Software Foundation, Inc.
+; Copyright (C) 2011-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/rs6000/rs6000.cc b/gcc/config/rs6000/rs6000.cc
index 94c0db4..7ee26e5 100644
--- a/gcc/config/rs6000/rs6000.cc
+++ b/gcc/config/rs6000/rs6000.cc
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-3.0-or-later
 /* Subroutines used for code generation on IBM RS/6000.
-   Copyright (C) 1991-2024 Free Software Foundation, Inc.
+   Copyright (C) 1991-2025 Free Software Foundation, Inc.
    Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
 
    This file is part of GCC.
@@ -55,7 +55,6 @@
 #include "output.h"
 #include "common/common-target.h"
 #include "langhooks.h"
-#include "reload.h"
 #include "sched-int.h"
 #include "gimplify.h"
 #include "gimple-iterator.h"
@@ -1751,6 +1750,9 @@ static const scoped_attribute_specs *const rs6000_attribute_table[] =
 #undef TARGET_CAN_CHANGE_MODE_CLASS
 #define TARGET_CAN_CHANGE_MODE_CLASS rs6000_can_change_mode_class
 
+#undef TARGET_REDZONE_CLOBBER
+#define TARGET_REDZONE_CLOBBER rs6000_redzone_clobber
+
 #undef TARGET_CONSTANT_ALIGNMENT
 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
 
@@ -4054,7 +4056,7 @@ rs6000_option_override_internal (bool global_init_p)
      support. If we only have ISA 2.06 support, and the user did not specify
      the switch, leave it set to -1 so the movmisalign patterns are enabled,
      but we don't enable the full vectorization support  */
-  if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
+  if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR)
     TARGET_ALLOW_MOVMISALIGN = 1;
 
   else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
@@ -4188,12 +4190,11 @@ rs6000_option_override_internal (bool global_init_p)
      because sometimes the compiler wants to put things in an integer
      container, and if we don't have __int128 support, it is impossible.  */
   if (TARGET_FLOAT128_TYPE && !TARGET_FLOAT128_HW && TARGET_64BIT
-      && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) == ISA_3_0_MASKS_IEEE
+      && TARGET_P9_VECTOR
       && !(rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW))
     rs6000_isa_flags |= OPTION_MASK_FLOAT128_HW;
 
-  if (TARGET_FLOAT128_HW
-      && (rs6000_isa_flags & ISA_3_0_MASKS_IEEE) != ISA_3_0_MASKS_IEEE)
+  if (TARGET_FLOAT128_HW && (!TARGET_P9_VECTOR))
     {
       if ((rs6000_isa_flags_explicit & OPTION_MASK_FLOAT128_HW) != 0)
 	error ("%qs requires full ISA 3.0 support", "%<-mfloat128-hardware%>");
@@ -4317,10 +4318,10 @@ rs6000_option_override_internal (bool global_init_p)
 	}
     }
 
-  /* Set the Darwin64 ABI as default for 64-bit Darwin.  
+  /* Set the Darwin64 ABI as default for 64-bit Darwin.
      So far, the only darwin64 targets are also MACH-O.  */
   if (TARGET_MACHO
-      && DEFAULT_ABI == ABI_DARWIN 
+      && DEFAULT_ABI == ABI_DARWIN
       && TARGET_64BIT)
     {
       if (main_target_opt != NULL && !main_target_opt->x_rs6000_darwin64_abi)
@@ -4945,7 +4946,7 @@ rs6000_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_pac
 }
 
 /* Return true if the vector misalignment factor is supported by the
-   target.  */ 
+   target.  */
 static bool
 rs6000_builtin_support_vector_misalignment (machine_mode mode,
 					    const_tree type,
@@ -8073,7 +8074,7 @@ rs6000_split_vec_extract_var (rtx dest, rtx src, rtx element, rtx tmp_gpr,
 /* Return alignment of TYPE.  Existing alignment is ALIGN.  HOW
    selects whether the alignment is abi mandated, optional, or
    both abi and optional alignment.  */
-   
+
 unsigned int
 rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
 {
@@ -8698,7 +8699,7 @@ virtual_stack_registers_memory_p (rtx op)
    to determine whether -mcmodel=medium code can use TOC pointer
    relative addressing for OP.  This means the alignment of the TOC
    pointer must also be taken into account, and unfortunately that is
-   only 8 bytes.  */ 
+   only 8 bytes.  */
 
 #ifndef POWERPC64_TOC_POINTER_ALIGNMENT
 #define POWERPC64_TOC_POINTER_ALIGNMENT 8
@@ -8846,8 +8847,8 @@ static const_rtx tocrel_base_oac, tocrel_offset_oac;
 
 /* Return true if OP is a toc pointer relative address (the output
    of create_TOC_reference).  If STRICT, do not match non-split
-   -mcmodel=large/medium toc pointer relative addresses.  If the pointers 
-   are non-NULL, place base and offset pieces in TOCREL_BASE_RET and 
+   -mcmodel=large/medium toc pointer relative addresses.  If the pointers
+   are non-NULL, place base and offset pieces in TOCREL_BASE_RET and
    TOCREL_OFFSET_RET respectively.  */
 
 bool
@@ -9257,8 +9258,7 @@ rs6000_debug_legitimize_address (rtx x, rtx oldx, machine_mode mode)
 
   start_sequence ();
   ret = rs6000_legitimize_address (x, oldx, mode);
-  insns = get_insns ();
-  end_sequence ();
+  insns = end_sequence ();
 
   if (ret != x)
     {
@@ -9574,7 +9574,7 @@ rs6000_legitimize_tls_address_aix (rtx addr, enum tls_model model)
       tocref = create_TOC_reference (modaddr, NULL_RTX);
       rtx modmem = gen_const_mem (Pmode, tocref);
       set_mem_alias_set (modmem, get_TOC_alias_set ());
-      
+
       rtx modreg = gen_reg_rtx (Pmode);
       emit_insn (gen_rtx_SET (modreg, modmem));
 
@@ -10138,13 +10138,13 @@ rs6000_offsettable_memref_p (rtx op, machine_mode reg_mode, bool strict)
    This takes into account how many parallel operations we
    can actually do of a given type, and also the latency.
    P8:
-     int add/sub 6/cycle     
+     int add/sub 6/cycle
          mul 2/cycle
      vect add/sub/mul 2/cycle
      fp   add/sub/mul 2/cycle
      dfp  1/cycle
 */
- 
+
 static int
 rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
                             machine_mode mode)
@@ -10159,7 +10159,7 @@ rs6000_reassociation_width (unsigned int opc ATTRIBUTE_UNUSED,
 	return 1;
       if (VECTOR_MODE_P (mode))
 	return 4;
-      if (INTEGRAL_MODE_P (mode)) 
+      if (INTEGRAL_MODE_P (mode))
 	return 1;
       if (FLOAT_MODE_P (mode))
 	return 4;
@@ -13726,6 +13726,24 @@ rs6000_can_change_mode_class (machine_mode from,
   return true;
 }
 
+/* Implement TARGET_REDZONE_CLOBBER.  */
+
+static rtx
+rs6000_redzone_clobber ()
+{
+  cfun->machine->asm_redzone_clobber_seen = true;
+  if (DEFAULT_ABI != ABI_V4)
+    {
+      int red_zone_size = TARGET_32BIT ? 220 : 288;
+      rtx base = plus_constant (Pmode, stack_pointer_rtx,
+				GEN_INT (-red_zone_size));
+      rtx mem = gen_rtx_MEM (BLKmode, base);
+      set_mem_size (mem, red_zone_size);
+      return mem;
+    }
+  return NULL_RTX;
+}
+
 /* Debug version of rs6000_can_change_mode_class.  */
 static bool
 rs6000_debug_can_change_mode_class (machine_mode from,
@@ -13798,7 +13816,7 @@ rs6000_output_move_128bit (rtx operands[])
 		    ? "mfvsrd %0,%x1\n\tmfvsrld %L0,%x1"
 		    : "mfvsrd %L0,%x1\n\tmfvsrld %0,%x1");
 
-	  else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
+	  else if (TARGET_DIRECT_MOVE && src_vsx_p)
 	    return "#";
 	}
 
@@ -14480,7 +14498,7 @@ print_operand (FILE *file, rtx x, int code)
 			 ? reg - 32
 			 : reg - FIRST_ALTIVEC_REGNO + 32);
 
-#ifdef TARGET_REGNAMES      
+#ifdef TARGET_REGNAMES
 	  if (TARGET_REGNAMES)
 	    fprintf (file, "%%vs%d", vsx_reg);
 	  else
@@ -15225,11 +15243,25 @@ rs6000_print_patchable_function_entry (FILE *file,
 {
   bool global_entry_needed_p = rs6000_global_entry_point_prologue_needed_p ();
   /* For a function which needs global entry point, we will emit the
-     patchable area before and after local entry point under the control of
-     cfun->machine->global_entry_emitted, see the handling in function
-     rs6000_output_function_prologue.  */
-  if (!global_entry_needed_p || cfun->machine->global_entry_emitted)
+     patchable area when it isn't split before and after local entry point
+     under the control of cfun->machine->global_entry_emitted, see the
+     handling in function rs6000_output_function_prologue.  */
+  if (!TARGET_SPLIT_PATCH_NOPS
+      && (!global_entry_needed_p || cfun->machine->global_entry_emitted))
     default_print_patchable_function_entry (file, patch_area_size, record_p);
+
+  /* For split patch nops we emit the before nops (from generic code)
+     in front of the global entry point and after the local entry point,
+     under the control of cfun->machine->stop_patch_area_print, see
+     rs6000_output_function_prologue and rs6000_elf_declare_function_name.  */
+  if (TARGET_SPLIT_PATCH_NOPS)
+    {
+      if (!cfun->machine->stop_patch_area_print)
+	default_print_patchable_function_entry (file, patch_area_size,
+						record_p);
+      else
+	gcc_assert (global_entry_needed_p);
+    }
 }
 
 enum rtx_code
@@ -16008,165 +16040,83 @@ output_cbranch (rtx op, const char *label, int reversed, rtx_insn *insn)
   return string;
 }
 
-/* Return insn for VSX or Altivec comparisons.  */
-
-static rtx
-rs6000_emit_vector_compare_inner (enum rtx_code code, rtx op0, rtx op1)
-{
-  rtx mask;
-  machine_mode mode = GET_MODE (op0);
-
-  switch (code)
-    {
-    default:
-      break;
-
-    case GE:
-      if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
-	return NULL_RTX;
-      /* FALLTHRU */
-
-    case EQ:
-    case GT:
-    case GTU:
-    case ORDERED:
-    case UNORDERED:
-    case UNEQ:
-    case LTGT:
-      mask = gen_reg_rtx (mode);
-      emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, mode, op0, op1)));
-      return mask;
-    }
-
-  return NULL_RTX;
-}
-
 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
-   DMODE is expected destination mode. This is a recursive function.  */
+   DMODE is expected destination mode.  */
 
 static rtx
 rs6000_emit_vector_compare (enum rtx_code rcode,
 			    rtx op0, rtx op1,
 			    machine_mode dmode)
 {
-  rtx mask;
-  bool swap_operands = false;
-  bool try_again = false;
-
   gcc_assert (VECTOR_UNIT_ALTIVEC_OR_VSX_P (dmode));
   gcc_assert (GET_MODE (op0) == GET_MODE (op1));
+  rtx mask = gen_reg_rtx (dmode);
 
-  /* See if the comparison works as is.  */
-  mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
-  if (mask)
-    return mask;
+  /* In vector.md, we support all kinds of vector float point
+     comparison operators in a comparison rtl pattern, we can
+     just emit the comparison rtx insn directly here.  Besides,
+     we should have a centralized place to handle the possibility
+     of raising invalid exception.  */
+  if (GET_MODE_CLASS (dmode) == MODE_VECTOR_FLOAT)
+    {
+      emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (rcode, dmode, op0, op1)));
+      return mask;
+    }
+
+  bool swap_operands = false;
+  bool need_invert = false;
+  enum rtx_code code = rcode;
 
   switch (rcode)
     {
-    case LT:
-      rcode = GT;
-      swap_operands = true;
-      try_again = true;
+    case EQ:
+    case GT:
+    case GTU:
+      /* Emit directly with native hardware insn.  */
       break;
+    case LT:
     case LTU:
-      rcode = GTU;
+      /* lt{,u}(a,b) = gt{,u}(b,a)  */
+      code = swap_condition (rcode);
       swap_operands = true;
-      try_again = true;
       break;
     case NE:
-    case UNLE:
-    case UNLT:
-    case UNGE:
-    case UNGT:
-      /* Invert condition and try again.
-	 e.g., A != B becomes ~(A==B).  */
-      {
-	enum rtx_code rev_code;
-	enum insn_code nor_code;
-	rtx mask2;
-
-	rev_code = reverse_condition_maybe_unordered (rcode);
-	if (rev_code == UNKNOWN)
-	  return NULL_RTX;
-
-	nor_code = optab_handler (one_cmpl_optab, dmode);
-	if (nor_code == CODE_FOR_nothing)
-	  return NULL_RTX;
-
-	mask2 = rs6000_emit_vector_compare (rev_code, op0, op1, dmode);
-	if (!mask2)
-	  return NULL_RTX;
-
-	mask = gen_reg_rtx (dmode);
-	emit_insn (GEN_FCN (nor_code) (mask, mask2));
-	return mask;
-      }
+    case LE:
+    case LEU:
+      /* ne(a,b) = ~eq(a,b); le{,u}(a,b) = ~gt{,u}(a,b)  */
+      code = reverse_condition (rcode);
+      need_invert = true;
       break;
     case GE:
+      /* ge(a,b) = ~gt(b,a)  */
+      code = GT;
+      swap_operands = true;
+      need_invert = true;
+      break;
     case GEU:
-    case LE:
-    case LEU:
-      /* Try GT/GTU/LT/LTU OR EQ */
-      {
-	rtx c_rtx, eq_rtx;
-	enum insn_code ior_code;
-	enum rtx_code new_code;
-
-	switch (rcode)
-	  {
-	  case  GE:
-	    new_code = GT;
-	    break;
-
-	  case GEU:
-	    new_code = GTU;
-	    break;
-
-	  case LE:
-	    new_code = LT;
-	    break;
-
-	  case LEU:
-	    new_code = LTU;
-	    break;
-
-	  default:
-	    gcc_unreachable ();
-	  }
-
-	ior_code = optab_handler (ior_optab, dmode);
-	if (ior_code == CODE_FOR_nothing)
-	  return NULL_RTX;
-
-	c_rtx = rs6000_emit_vector_compare (new_code, op0, op1, dmode);
-	if (!c_rtx)
-	  return NULL_RTX;
-
-	eq_rtx = rs6000_emit_vector_compare (EQ, op0, op1, dmode);
-	if (!eq_rtx)
-	  return NULL_RTX;
-
-	mask = gen_reg_rtx (dmode);
-	emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
-	return mask;
-      }
+      /* geu(a,b) = ~gtu(b,a)  */
+      code = GTU;
+      swap_operands = true;
+      need_invert = true;
       break;
     default:
-      return NULL_RTX;
+      gcc_unreachable ();
+      break;
     }
 
-  if (try_again)
-    {
-      if (swap_operands)
-	std::swap (op0, op1);
+  if (swap_operands)
+    std::swap (op0, op1);
 
-      mask = rs6000_emit_vector_compare_inner (rcode, op0, op1);
-      if (mask)
-	return mask;
+  emit_insn (gen_rtx_SET (mask, gen_rtx_fmt_ee (code, dmode, op0, op1)));
+
+  if (need_invert)
+    {
+      enum insn_code nor_code = optab_handler (one_cmpl_optab, dmode);
+      gcc_assert (nor_code != CODE_FOR_nothing);
+      emit_insn (GEN_FCN (nor_code) (mask, mask));
     }
 
-  /* You only get two chances.  */
-  return NULL_RTX;
+  return mask;
 }
 
 /* Emit vector conditional expression.  DEST is destination. OP_TRUE and
@@ -17329,9 +17279,9 @@ static char *
 rs6000_offload_options (void)
 {
   if (TARGET_64BIT)
-    return xstrdup ("-foffload-abi=lp64");
+    return xstrdup ("-foffload-abi=lp64 -foffload-abi-host-opts=-m64");
   else
-    return xstrdup ("-foffload-abi=ilp32");
+    return xstrdup ("-foffload-abi=ilp32 -foffload-abi-host-opts=-m32");
 }
 
 
@@ -17424,16 +17374,21 @@ rs6000_hash_constant (rtx k)
 	result = result * 613 + (unsigned) XINT (k, fidx);
 	break;
       case 'w':
-	if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
-	  result = result * 613 + (unsigned) XWINT (k, fidx);
-	else
-	  {
-	    size_t i;
-	    for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
-	      result = result * 613 + (unsigned) (XWINT (k, fidx)
-						  >> CHAR_BIT * i);
-	  }
-	break;
+      case 'L':
+	{
+	  const HOST_WIDE_INT val
+	    = (format[fidx] == 'L' ? XLOC (k, fidx) : XWINT (k, fidx));
+	  if (sizeof (unsigned) >= sizeof (HOST_WIDE_INT))
+	    result = result * 613 + (unsigned) val;
+	  else
+	    {
+	      size_t i;
+	      for (i = 0; i < sizeof (HOST_WIDE_INT) / sizeof (unsigned); i++)
+		result = result * 613 + (unsigned) (val
+						    >> CHAR_BIT * i);
+	    }
+	  break;
+	}
       case '0':
 	break;
       default:
@@ -21226,7 +21181,7 @@ rs6000_darwin_file_start (void)
   darwin_file_start ();
 
   /* Determine the argument to -mcpu=.  Default to G3 if not specified.  */
-  
+
   if (rs6000_default_cpu != 0 && rs6000_default_cpu[0] != '\0')
     cpu_id = rs6000_default_cpu;
 
@@ -21422,6 +21377,11 @@ rs6000_elf_declare_function_name (FILE *file, const char *name, tree decl)
       fprintf (file, "\t.previous\n");
     }
   ASM_OUTPUT_FUNCTION_LABEL (file, name, decl);
+  /* At this time, the "before" NOPs have been already emitted.
+     For split nops stop generic code from printing the "after" NOPs and
+     emit them just after local entry ourself later.  */
+  if (rs6000_global_entry_point_prologue_needed_p ())
+    cfun->machine->stop_patch_area_print = true;
 }
 
 static void rs6000_elf_file_end (void) ATTRIBUTE_UNUSED;
@@ -22509,7 +22469,7 @@ rs6000_rtx_costs (rtx x, machine_mode mode, int outer_code,
 	  return false;
 	}
       /* fall through */
-	  
+
     case ASHIFTRT:
     case LSHIFTRT:
     case ROTATE:
@@ -23082,7 +23042,7 @@ rs6000_emit_swdiv (rtx dst, rtx n, rtx d, bool note_p)
 
     for (i = 0, xprev = x1, eprev = e0; i < passes - 2;
 	 ++i, xprev = xnext, eprev = enext) {
-      
+
       /* enext = eprev * eprev  */
       enext = gen_reg_rtx (mode);
       emit_insn (gen_mul (enext, eprev, eprev));
@@ -23349,7 +23309,7 @@ rs6000_emit_parity (rtx dst, rtx src)
 
      vperm 9,10,11,12
 
-   places the desired result in vr9.  However, in LE mode the 
+   places the desired result in vr9.  However, in LE mode the
    vector contents will be
 
      vr10 = 00000003 00000002 00000001 00000000
@@ -23566,7 +23526,7 @@ altivec_expand_vec_perm_const (rtx target, rtx op0, rtx op1,
       one_vec = true;
       break;
     }
- 
+
   /* Look for splat patterns.  */
   if (one_vec)
     {
@@ -23968,7 +23928,7 @@ rs6000_function_value (const_tree valtype,
   int n_elts;
 
   /* Special handling for structs in darwin64.  */
-  if (TARGET_MACHO 
+  if (TARGET_MACHO
       && rs6000_darwin64_struct_check_p (TYPE_MODE (valtype), valtype))
     {
       CUMULATIVE_ARGS valcum;
@@ -24825,7 +24785,7 @@ rs6000_valid_attribute_p (tree fndecl,
 		 IDENTIFIER_POINTER (tname));
       else
 	fprintf (stderr, "function: unknown\n");
-  
+
       fprintf (stderr, "args:");
       rs6000_debug_target_options (args, " ");
       fprintf (stderr, "\n");
@@ -25094,7 +25054,7 @@ static void
 rs6000_function_specific_restore (struct gcc_options *opts,
 				  struct gcc_options */* opts_set */,
 				  struct cl_target_option *ptr)
-				  
+
 {
   opts->x_rs6000_isa_flags = ptr->x_rs6000_isa_flags;
   opts->x_rs6000_isa_flags_explicit = ptr->x_rs6000_isa_flags_explicit;
@@ -25352,7 +25312,6 @@ rs6000_get_function_versions_dispatcher (void *decl)
   struct cgraph_node *node = NULL;
   struct cgraph_node *default_node = NULL;
   struct cgraph_function_version_info *node_v = NULL;
-  struct cgraph_function_version_info *first_v = NULL;
 
   tree dispatch_decl = NULL;
 
@@ -25372,38 +25331,16 @@ rs6000_get_function_versions_dispatcher (void *decl)
   if (node_v->dispatcher_resolver != NULL)
     return node_v->dispatcher_resolver;
 
-  /* Find the default version and make it the first node.  */
-  first_v = node_v;
-  /* Go to the beginning of the chain.  */
-  while (first_v->prev != NULL)
-    first_v = first_v->prev;
-
-  default_version_info = first_v;
-  while (default_version_info != NULL)
-    {
-      const tree decl2 = default_version_info->this_node->decl;
-      if (is_function_default_version (decl2))
-        break;
-      default_version_info = default_version_info->next;
-    }
+  /* The default node is always the beginning of the chain.  */
+  default_version_info = node_v;
+  while (default_version_info->prev)
+    default_version_info = default_version_info->prev;
+  default_node = default_version_info->this_node;
 
   /* If there is no default node, just return NULL.  */
-  if (default_version_info == NULL)
+  if (!is_function_default_version (default_node->decl))
     return NULL;
 
-  /* Make default info the first node.  */
-  if (first_v != default_version_info)
-    {
-      default_version_info->prev->next = default_version_info->next;
-      if (default_version_info->next)
-        default_version_info->next->prev = default_version_info->prev;
-      first_v->prev = default_version_info;
-      default_version_info->next = first_v;
-      default_version_info->prev = NULL;
-    }
-
-  default_node = default_version_info->this_node;
-
 #ifndef TARGET_LIBC_PROVIDES_HWCAP_IN_TCB
   error_at (DECL_SOURCE_LOCATION (default_node->decl),
 	    "%<target_clones%> attribute needs GLIBC (2.23 and newer) that "
@@ -25803,10 +25740,13 @@ rs6000_can_inline_p (tree caller, tree callee)
 	}
     }
 
-  /* Ignore -mpower8-fusion and -mpower10-fusion options for inlining
-     purposes.  */
-  callee_isa &= ~(OPTION_MASK_P8_FUSION | OPTION_MASK_P10_FUSION);
-  explicit_isa &= ~(OPTION_MASK_P8_FUSION | OPTION_MASK_P10_FUSION);
+  /* Ignore -mpower8-fusion, -mpower10-fusion and -msave-toc-indirect options
+     for inlining purposes.  */
+  HOST_WIDE_INT ignored_isas = (OPTION_MASK_P8_FUSION
+				| OPTION_MASK_P10_FUSION
+				| OPTION_MASK_SAVE_TOC_INDIRECT);
+  callee_isa &= ~ignored_isas;
+  explicit_isa &= ~ignored_isas;
 
   /* The callee's options must be a subset of the caller's options, i.e.
      a vsx function may inline an altivec function, but a no-vsx function
@@ -26748,7 +26688,7 @@ is_lfs_stfs_insn (rtx_insn *insn)
   rtx set = XVECEXP (pattern, 0, 0);
   if (GET_CODE (set) != SET)
     return false;
-  
+
   rtx clobber = XVECEXP (pattern, 0, 1);
   if (GET_CODE (clobber) != CLOBBER)
     return false;
@@ -29370,6 +29310,9 @@ rs6000_opaque_type_invalid_use_p (gimple *stmt)
   return false;
 }
 
+#undef TARGET_DOCUMENTATION_NAME
+#define TARGET_DOCUMENTATION_NAME "PowerPC"
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-rs6000.h"
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index d460eb0..db6112a 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler, for IBM RS/6000.
-   Copyright (C) 1992-2024 Free Software Foundation, Inc.
+   Copyright (C) 1992-2025 Free Software Foundation, Inc.
    Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
 
    This file is part of GCC.
@@ -469,13 +469,11 @@ extern int rs6000_vector_align[];
 
 /* TARGET_DIRECT_MOVE is redundant to TARGET_P8_VECTOR, so alias it to that.  */
 #define TARGET_DIRECT_MOVE	TARGET_P8_VECTOR
-#define TARGET_XSCVDPSPN	(TARGET_DIRECT_MOVE || TARGET_P8_VECTOR)
-#define TARGET_XSCVSPDPN	(TARGET_DIRECT_MOVE || TARGET_P8_VECTOR)
+#define TARGET_XSCVDPSPN	TARGET_P8_VECTOR
+#define TARGET_XSCVSPDPN	TARGET_P8_VECTOR
 #define TARGET_VADDUQM		(TARGET_P8_VECTOR && TARGET_POWERPC64)
-#define TARGET_DIRECT_MOVE_128	(TARGET_P9_VECTOR && TARGET_DIRECT_MOVE \
-				 && TARGET_POWERPC64)
-#define TARGET_VEXTRACTUB	(TARGET_P9_VECTOR && TARGET_DIRECT_MOVE \
-				 && TARGET_POWERPC64)
+#define TARGET_DIRECT_MOVE_128	(TARGET_P9_VECTOR && TARGET_DIRECT_MOVE_64BIT)
+#define TARGET_VEXTRACTUB	(TARGET_P9_VECTOR && TARGET_DIRECT_MOVE_64BIT)
 
 /* Whether we should avoid (SUBREG:SI (REG:SF) and (SUBREG:SF (REG:SI).  */
 #define TARGET_NO_SF_SUBREG	TARGET_DIRECT_MOVE_64BIT
@@ -555,7 +553,6 @@ extern int rs6000_vector_align[];
    the calculation in 64-bit GPRs and then is transfered to the vector
    registers.  */
 #define TARGET_DIRECT_MOVE_64BIT	(TARGET_DIRECT_MOVE		\
-					 && TARGET_P8_VECTOR		\
 					 && TARGET_POWERPC64)
 
 /* Inlining allows targets to define the meanings of bits in target_info
@@ -2424,6 +2421,13 @@ typedef struct GTY(()) machine_function
      global entry.  It helps to control the patchable area before and after
      local entry.  */
   bool global_entry_emitted;
+  bool asm_redzone_clobber_seen;
+  /* With ELFv2 ABI dual entry points being adopted, generic framework
+     targetm.asm_out.print_patchable_function_entry would generate "after"
+     NOPs before local entry, which is wrong.  This flag is to stop it from
+     printing patch area before local entry, it is only useful when the
+     function requires dual entry points.  */
+  bool stop_patch_area_print;
 } machine_function;
 #endif
 
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 8eda2f7..9c718ca 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -1,5 +1,5 @@
 ;; Machine description for IBM RISC System 6000 (POWER) for GNU C compiler
-;; Copyright (C) 1990-2024 Free Software Foundation, Inc.
+;; Copyright (C) 1990-2025 Free Software Foundation, Inc.
 ;; Contributed by Richard Kenner (kenner@vlsi1.ultra.nyu.edu)
 
 ;; This file is part of GCC.
@@ -5287,7 +5287,7 @@
 ;; when little-endian.
 (define_expand "signbit<mode>2"
   [(set (match_dup 2)
-	(float_truncate:DF (match_operand:FLOAT128 1 "gpc_reg_operand")))
+	(float_truncate:DF (match_operand:FLOAT128 1 "reg_or_mem_operand")))
    (set (match_dup 3)
    	(subreg:DI (match_dup 2) 0))
    (set (match_dup 4)
@@ -5296,19 +5296,33 @@
   	(match_dup 6))]
   "TARGET_HARD_FLOAT
    && (!FLOAT128_IEEE_P (<MODE>mode)
-       || (TARGET_POWERPC64 && TARGET_DIRECT_MOVE))"
+       || TARGET_DIRECT_MOVE_64BIT)"
 {
   if (FLOAT128_IEEE_P (<MODE>mode))
     {
       rtx dest = operands[0];
       rtx src = operands[1];
       rtx tmp = gen_reg_rtx (DImode);
+      /* For P8 LE, we generate memory access with subreg:V1TI which
+         prevents the related gen_signbitkf2_dm_mem being matched so
+         directly emit it here and leave the other cases alone.  */
+      if (!BYTES_BIG_ENDIAN
+          && !TARGET_P9_VECTOR
+          && memory_operand (src, <MODE>mode))
+        emit_insn (gen_signbitkf2_dm_mem (tmp, src));
+      else
+        {
+          if (!gpc_reg_operand (src, <MODE>mode))
+            src = copy_to_mode_reg (<MODE>mode, src);
+          gcc_assert (gpc_reg_operand (src, <MODE>mode));
+          emit_insn (gen_signbit2_dm (<MODE>mode, tmp, src));
+        }
       rtx dest_di = gen_lowpart (DImode, dest);
-
-      emit_insn (gen_signbit2_dm (<MODE>mode, tmp, src));
       emit_insn (gen_lshrdi3 (dest_di, tmp, GEN_INT (63)));
       DONE;
     }
+  if (!gpc_reg_operand (operands[1], <MODE>mode))
+    operands[1] = copy_to_mode_reg (<MODE>mode, operands[1]);
   operands[2] = gen_reg_rtx (DFmode);
   operands[3] = gen_reg_rtx (DImode);
   if (TARGET_POWERPC64)
@@ -5339,7 +5353,7 @@
   [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
 	(unspec:DI [(match_operand:SIGNBIT 1 "gpc_reg_operand" "wa,r")]
 		   UNSPEC_SIGNBIT))]
-  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "TARGET_DIRECT_MOVE_64BIT"
   "@
    mfvsrd %0,%x1
    #"
@@ -5354,11 +5368,11 @@
 ;; Optimize IEEE 128-bit signbit on to avoid loading the value into a vector
 ;; register and then doing a direct move if the value comes from memory.  On
 ;; little endian, we have to load the 2nd double-word to get the sign bit.
-(define_insn_and_split "*signbit<mode>2_dm_mem"
+(define_insn_and_split "signbit<mode>2_dm_mem"
   [(set (match_operand:DI 0 "gpc_reg_operand" "=b")
 	(unspec:DI [(match_operand:SIGNBIT 1 "memory_operand" "m")]
 		   UNSPEC_SIGNBIT))]
-  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "TARGET_DIRECT_MOVE_64BIT"
   "#"
   "&& 1"
   [(set (match_dup 0)
@@ -5872,7 +5886,7 @@
   rtx src = operands[1];
   rtx tmp;
 
-  if (!MEM_P (src) && TARGET_POWERPC64 && TARGET_DIRECT_MOVE)
+  if (!MEM_P (src) && TARGET_DIRECT_MOVE_64BIT)
     tmp = convert_to_mode (DImode, src, false);
   else
     {
@@ -5928,7 +5942,7 @@
 	  (match_operand:QHI 1 "indexed_or_indirect_operand" "Z,Z"))))
    (clobber (match_scratch:DI 2 "=d,wa"))]
   "TARGET_HARD_FLOAT && <SI_CONVERT_FP> && TARGET_P9_VECTOR
-   && TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+   && TARGET_DIRECT_MOVE_64BIT"
   "#"
   "&& 1"
   [(pc)]
@@ -5968,7 +5982,7 @@
   rtx src = operands[1];
   rtx tmp;
 
-  if (!MEM_P (src) && TARGET_POWERPC64 && TARGET_DIRECT_MOVE)
+  if (!MEM_P (src) && TARGET_DIRECT_MOVE_64BIT)
     tmp = convert_to_mode (DImode, src, true);
   else
     {
@@ -6187,7 +6201,7 @@
 	      (clobber (match_scratch:DI 2))
 	      (clobber (match_scratch:DI 3))
 	      (clobber (match_scratch:<QHI:MODE> 4))])]
-  "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE && TARGET_POWERPC64"
+  "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE_64BIT"
 {
   if (MEM_P (operands[1]))
     operands[1] = rs6000_force_indexed_or_indirect_mem (operands[1]);
@@ -6200,7 +6214,7 @@
    (clobber (match_scratch:DI 2 "=v,wa,v"))
    (clobber (match_scratch:DI 3 "=X,r,X"))
    (clobber (match_scratch:<QHI:MODE> 4 "=X,X,v"))]
-  "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE && TARGET_POWERPC64"
+  "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE_64BIT"
   "#"
   "&& reload_completed"
   [(const_int 0)]
@@ -6240,7 +6254,7 @@
 		    (match_operand:QHI 1 "input_operand")))
 	      (clobber (match_scratch:DI 2))
 	      (clobber (match_scratch:DI 3))])]
-  "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE && TARGET_POWERPC64"
+  "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE_64BIT"
 {
   if (MEM_P (operands[1]))
     operands[1] = rs6000_force_indexed_or_indirect_mem (operands[1]);
@@ -6252,7 +6266,7 @@
 	 (match_operand:QHI 1 "reg_or_indexed_operand" "v,r,Z")))
    (clobber (match_scratch:DI 2 "=v,wa,wa"))
    (clobber (match_scratch:DI 3 "=X,r,X"))]
-  "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE && TARGET_POWERPC64"
+  "TARGET_P9_VECTOR && TARGET_DIRECT_MOVE_64BIT"
   "#"
   "&& reload_completed"
   [(const_int 0)]
@@ -6285,7 +6299,7 @@
 	(fix:SI (match_operand:SFDF 1 "gpc_reg_operand")))]
   "TARGET_HARD_FLOAT"
 {
-  if (!(TARGET_P8_VECTOR && TARGET_DIRECT_MOVE))
+  if (!TARGET_DIRECT_MOVE)
     {
       rtx src = force_reg (<MODE>mode, operands[1]);
 
@@ -6310,7 +6324,7 @@
 	(fix:SI (match_operand:SFDF 1 "gpc_reg_operand" "d")))
    (clobber (match_scratch:DI 2 "=d"))]
   "TARGET_HARD_FLOAT && TARGET_STFIWX && can_create_pseudo_p ()
-   && !(TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)"
+   && !TARGET_DIRECT_MOVE"
   "#"
   "&& 1"
   [(pc)]
@@ -6329,12 +6343,6 @@
       emit_insn (gen_stfiwx (dest, tmp));
       DONE;
     }
-  else if (TARGET_POWERPC64 && TARGET_DIRECT_MOVE && !MEM_P (dest))
-    {
-      dest = gen_lowpart (DImode, dest);
-      emit_move_insn (dest, tmp);
-      DONE;
-    }
   else
     {
       rtx stack = rs6000_allocate_stack_temp (SImode, false, true);
@@ -6352,7 +6360,7 @@
    (clobber (match_operand:DI 2 "gpc_reg_operand" "=1,d"))
    (clobber (match_operand:DI 3 "offsettable_mem_operand" "=o,o"))]
   "TARGET_HARD_FLOAT
-   && !(TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)"
+   && !TARGET_DIRECT_MOVE"
   "#"
   "&& 1"
   [(pc)]
@@ -6458,7 +6466,7 @@
    (clobber (match_scratch:DI 2 "=d"))]
   "TARGET_HARD_FLOAT && TARGET_FCTIWUZ
    && TARGET_STFIWX && can_create_pseudo_p ()
-   && !TARGET_P8_VECTOR"
+   && !TARGET_DIRECT_MOVE"
   "#"
   "&& 1"
   [(pc)]
@@ -6477,12 +6485,6 @@
       emit_insn (gen_stfiwx (dest, tmp));
       DONE;
     }
-  else if (TARGET_POWERPC64 && TARGET_DIRECT_MOVE)
-    {
-      dest = gen_lowpart (DImode, dest);
-      emit_move_insn (dest, tmp);
-      DONE;
-    }
   else
     {
       rtx stack = rs6000_allocate_stack_temp (SImode, false, true);
@@ -7569,9 +7571,12 @@
 	  (match_operand:BOOL_128 2 "vlogical_operand" "<BOOL_REGS_OP2>"))))]
   "TARGET_P8_VECTOR"
 {
-  if (vsx_register_operand (operands[0], <MODE>mode))
+  if (TARGET_VSX && vsx_register_operand (operands[0], <MODE>mode))
     return "xxleqv %x0,%x1,%x2";
 
+  if (TARGET_ALTIVEC && altivec_register_operand (operands[0], <MODE>mode))
+    return "veqv %0,%1,%2";
+
   return "#";
 }
   "TARGET_P8_VECTOR && reload_completed
@@ -9572,7 +9577,7 @@
   [(set (match_operand:V16QI 0 "register_operand" "=wa")
     (unspec:V16QI [(match_operand:DI 1 "register_operand" "r")]
 		  UNSPEC_P8V_MTVSRD))]
-  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "TARGET_DIRECT_MOVE_64BIT"
   "mtvsrd %x0,%1"
   [(set_attr "type" "mtvsr")])
 
@@ -9606,7 +9611,7 @@
   [(set (match_operand:DF 0 "register_operand" "=wa")
 	(unspec:DF [(match_operand:DI 1 "register_operand" "r")]
 		   UNSPEC_P8V_MTVSRD))]
-  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "TARGET_DIRECT_MOVE_64BIT"
   "mtvsrd %x0,%1"
   [(set_attr "type" "mtvsr")])
 
@@ -9616,7 +9621,7 @@
 		(match_operand:DF 1 "register_operand" "wa")
 		(match_operand:DF 2 "register_operand" "wa")]
 		UNSPEC_P8V_XXPERMDI))]
-  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "TARGET_DIRECT_MOVE_64BIT"
   "xxpermdi %x0,%x1,%x2,0"
   [(set_attr "type" "vecperm")])
 
@@ -9626,7 +9631,7 @@
 	 [(match_operand:FMOVE128_GPR 1 "register_operand" "r")]
 	 UNSPEC_P8V_RELOAD_FROM_GPR))
    (clobber (match_operand:IF 2 "register_operand" "=wa"))]
-  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "TARGET_DIRECT_MOVE_64BIT"
   "#"
   "&& reload_completed"
   [(const_int 0)]
@@ -9671,7 +9676,7 @@
   [(set (match_operand:SF 0 "register_operand" "=wa")
 	(unspec:SF [(match_operand:DI 1 "register_operand" "r")]
 		   UNSPEC_P8V_MTVSRD))]
-  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "TARGET_DIRECT_MOVE_64BIT"
   "mtvsrd %x0,%1"
   [(set_attr "type" "mtvsr")])
 
@@ -9680,7 +9685,7 @@
 	(unspec:SF [(match_operand:SF 1 "register_operand" "r")]
 		   UNSPEC_P8V_RELOAD_FROM_GPR))
    (clobber (match_operand:DI 2 "register_operand" "=r"))]
-  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "TARGET_DIRECT_MOVE_64BIT"
   "#"
   "&& reload_completed"
   [(const_int 0)]
@@ -9706,7 +9711,7 @@
   [(set (match_operand:DF 0 "register_operand" "=r")
 	(unspec:DF [(match_operand:FMOVE128_GPR 1 "register_operand" "wa")]
 		   UNSPEC_P8V_RELOAD_FROM_VSX))]
-  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "TARGET_DIRECT_MOVE_64BIT"
   "mfvsrd %0,%x1"
   [(set_attr "type" "mfvsr")])
 
@@ -9716,7 +9721,7 @@
 	 [(match_operand:FMOVE128_GPR 1 "register_operand" "wa")]
 	 UNSPEC_P8V_RELOAD_FROM_VSX))
    (clobber (match_operand:FMOVE128_GPR 2 "register_operand" "=wa"))]
-  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "TARGET_DIRECT_MOVE_64BIT"
   "#"
   "&& reload_completed"
   [(const_int 0)]
@@ -9744,7 +9749,7 @@
 	(unspec:SF [(match_operand:SF 1 "register_operand" "wa")]
 		   UNSPEC_P8V_RELOAD_FROM_VSX))
    (clobber (match_operand:V4SF 2 "register_operand" "=wa"))]
-  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+  "TARGET_DIRECT_MOVE_64BIT"
   "#"
   "&& reload_completed"
   [(const_int 0)]
@@ -14348,14 +14353,14 @@
 
   if (REG_P (operands[0]))
     {
-      if (INTVAL (operands[1]) == 0)
+      if (INTVAL (operands[1]) != 1)
         return inst_select ? "dcbt 0,%0" : "dcbt 0,%0,16";
       else
         return inst_select ? "dcbtst 0,%0" : "dcbtst 0,%0,16";
     }
   else
     {
-      if (INTVAL (operands[1]) == 0)
+      if (INTVAL (operands[1]) != 1)
         return inst_select ? "dcbt %a0" : "dcbt %a0,16";
       else
         return inst_select ? "dcbtst %a0" : "dcbtst %a0,16";
@@ -14873,7 +14878,7 @@
 	 [(match_operand:FMOVE128 1 "register_operand" "d,d,r,d,r")
 	  (match_operand:QI 2 "const_0_to_1_operand" "i,i,i,i,i")]
 	 UNSPEC_UNPACK_128BIT))]
-  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && FLOAT128_2REG_P (<MODE>mode)"
+  "TARGET_DIRECT_MOVE_64BIT && FLOAT128_2REG_P (<MODE>mode)"
   "#"
   "&& reload_completed"
   [(set (match_dup 0) (match_dup 3))]
@@ -14896,7 +14901,7 @@
 	 [(match_operand:FMOVE128 1 "register_operand" "d,d,r")
 	  (match_operand:QI 2 "const_0_to_1_operand" "i,i,i")]
 	 UNSPEC_UNPACK_128BIT))]
-  "(!TARGET_POWERPC64 || !TARGET_DIRECT_MOVE) && FLOAT128_2REG_P (<MODE>mode)"
+  "!TARGET_DIRECT_MOVE_64BIT && FLOAT128_2REG_P (<MODE>mode)"
   "#"
   "&& reload_completed"
   [(set (match_dup 0) (match_dup 3))]
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 94323bd..88cf16c 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -1,6 +1,6 @@
 ; Options for the rs6000 port of the compiler
 ;
-; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ; Contributed by Aldy Hernandez <aldy@quesejoda.com>.
 ;
 ; This file is part of GCC.
@@ -300,6 +300,10 @@ mfull-toc
 Target
 Put everything in the regular TOC.
 
+msplit-patch-nops
+Target Var(TARGET_SPLIT_PATCH_NOPS) Init(0)
+Emit NOPs before global and after local entry point for -fpatchable-function-entry.
+
 mvrsave
 Target Var(TARGET_ALTIVEC_VRSAVE) Save
 Generate VRSAVE instructions when generating AltiVec code.
diff --git a/gcc/config/rs6000/rs6000.opt.urls b/gcc/config/rs6000/rs6000.opt.urls
index c7c1cef..0b418c0 100644
--- a/gcc/config/rs6000/rs6000.opt.urls
+++ b/gcc/config/rs6000/rs6000.opt.urls
@@ -98,6 +98,9 @@ UrlSuffix(gcc/RS_002f6000-and-PowerPC-Options.html#index-mminimal-toc)
 mfull-toc
 UrlSuffix(gcc/RS_002f6000-and-PowerPC-Options.html#index-mfull-toc)
 
+msplit-patch-nops
+UrlSuffix(gcc/RS_002f6000-and-PowerPC-Options.html#index-msplit-patch-nops)
+
 mvrsave
 UrlSuffix(gcc/RS_002f6000-and-PowerPC-Options.html#index-mvrsave)
 
diff --git a/gcc/config/rs6000/rs64.md b/gcc/config/rs6000/rs64.md
index 762f850..9e50ff3 100644
--- a/gcc/config/rs6000/rs64.md
+++ b/gcc/config/rs6000/rs64.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for IBM RS64 processors.
-;;   Copyright (C) 2003-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2003-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 
diff --git a/gcc/config/rs6000/rtems.h b/gcc/config/rs6000/rtems.h
index 06f481f..95b59f6 100644
--- a/gcc/config/rs6000/rtems.h
+++ b/gcc/config/rs6000/rtems.h
@@ -1,5 +1,5 @@
 /* Definitions for rtems targeting a PowerPC using elf.
-   Copyright (C) 1996-2024 Free Software Foundation, Inc.
+   Copyright (C) 1996-2025 Free Software Foundation, Inc.
    Contributed by Joel Sherrill (joel@OARcorp.com).
 
    This file is part of GCC.
diff --git a/gcc/config/rs6000/secureplt.h b/gcc/config/rs6000/secureplt.h
index f04b8d0..36a35a0 100644
--- a/gcc/config/rs6000/secureplt.h
+++ b/gcc/config/rs6000/secureplt.h
@@ -1,5 +1,5 @@
 /* Default to -msecure-plt.
-   Copyright (C) 2005-2024 Free Software Foundation, Inc.
+   Copyright (C) 2005-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/rs6000/si2vmx.h b/gcc/config/rs6000/si2vmx.h
index d0a1a28..b5c6045 100644
--- a/gcc/config/rs6000/si2vmx.h
+++ b/gcc/config/rs6000/si2vmx.h
@@ -1,9 +1,9 @@
 /* Cell BEA specific SPU intrinsics to PPU/VMX intrinsics
-   Copyright (C) 2007-2024 Free Software Foundation, Inc.
+   Copyright (C) 2007-2025 Free Software Foundation, Inc.
 
    This file is free software; you can redistribute it and/or modify it under
    the terms of the GNU General Public License as published by the Free
-   Software Foundation; either version 3 of the License, or (at your option) 
+   Software Foundation; either version 3 of the License, or (at your option)
    any later version.
 
    This file is distributed in the hope that it will be useful, but WITHOUT
@@ -30,7 +30,7 @@
 
 
 /* Specify a default halt action for spu_hcmpeq and spu_hcmpgt intrinsics.
- * Users can override the action by defining it prior to including this 
+ * Users can override the action by defining it prior to including this
  * header file.
  */
 #ifndef SPU_HALT_ACTION
@@ -38,7 +38,7 @@
 #endif
 
 /* Specify a default stop action for the spu_stop intrinsic.
- * Users can override the action by defining it prior to including this 
+ * Users can override the action by defining it prior to including this
  * header file.
  */
 #ifndef SPU_STOP_ACTION
@@ -47,7 +47,7 @@
 
 
 /* Specify a default action for unsupported intrinsic.
- * Users can override the action by defining it prior to including this 
+ * Users can override the action by defining it prior to including this
  * header file.
  */
 #ifndef SPU_UNSUPPORTED_ACTION
@@ -55,7 +55,7 @@
 #endif
 
 
-/* Casting intrinsics - from scalar to quadword 
+/* Casting intrinsics - from scalar to quadword
  */
 
 static __inline qword si_from_uchar(unsigned char c) {
@@ -274,7 +274,7 @@ static __inline qword si_absdb(qword a, qword b)
   return ((qword)(dc));
 }
 
-/* Add intrinsics 
+/* Add intrinsics
  */
 #define si_a(_a, _b)		((qword)(vec_add((vec_uint4)(_a), (vec_uint4)(_b))))
 
@@ -282,14 +282,14 @@ static __inline qword si_absdb(qword a, qword b)
 
 static __inline qword si_ai(qword a, int b)
 {
-  return ((qword)(vec_add((vec_int4)(a), 
+  return ((qword)(vec_add((vec_int4)(a),
 			  vec_splat((vec_int4)(si_from_int(b)), 0))));
 }
 
 
 static __inline qword si_ahi(qword a, short b)
 {
-  return ((qword)(vec_add((vec_short8)(a), 
+  return ((qword)(vec_add((vec_short8)(a),
 			  vec_splat((vec_short8)(si_from_short(b)), 1))));
 }
 
@@ -325,13 +325,13 @@ static __inline qword si_dfa(qword a, qword b)
 
 static __inline qword si_andbi(qword a, signed char b)
 {
-  return ((qword)(vec_and((vec_char16)(a), 
+  return ((qword)(vec_and((vec_char16)(a),
 			  vec_splat((vec_char16)(si_from_char(b)), 3))));
 }
 
 static __inline qword si_andhi(qword a, signed short b)
 {
-  return ((qword)(vec_and((vec_short8)(a), 
+  return ((qword)(vec_and((vec_short8)(a),
 			  vec_splat((vec_short8)(si_from_short(b)), 1))));
 }
 
@@ -373,8 +373,8 @@ static __inline qword si_andi(qword a, signed int b)
 static __inline qword si_fcmeq(qword a, qword b)
 {
   vec_float4 msb = (vec_float4)((vec_uint4){0x80000000, 0x80000000, 0x80000000, 0x80000000});
-  
-  return ((qword)(vec_cmpeq(vec_andc((vec_float4)(a), msb), 
+
+  return ((qword)(vec_cmpeq(vec_andc((vec_float4)(a), msb),
 				  vec_andc((vec_float4)(b), msb))));
 }
 
@@ -408,11 +408,11 @@ static __inline qword si_dfcmeq(qword a, qword b)
   biteq = (vec_uint4) vec_cmpeq((vec_uint4)aabs,(vec_uint4)babs);
   biteq = vec_and(biteq,(vec_uint4)vec_slo((vec_uchar16)biteq,x.v));
 
-  /*  
+  /*
       B)  Check if a is NaN, store in high word
-        
+
       B1) If the high word is greater than max_exp (indicates a NaN)
-      B2) If the low word is greater than 0 
+      B2) If the low word is greater than 0
   */
   a_gt = (vec_uint4)vec_cmpgt(aabs,nan_mask);
 
@@ -435,7 +435,7 @@ static __inline qword si_dfcmeq(qword a, qword b)
 static __inline qword si_fcmgt(qword a, qword b)
 {
   vec_float4 msb = (vec_float4)((vec_uint4){0x80000000, 0x80000000, 0x80000000, 0x80000000});
-  
+
   return ((qword)(vec_cmpgt(vec_andc((vec_float4)(a), msb),
 				  vec_andc((vec_float4)(b), msb))));
 }
@@ -454,7 +454,7 @@ static __inline qword si_dfcmgt(qword a, qword b)
   /* Shift 4 bytes  */
   x.i[3] = 4 << 3;
 
-  // absolute value of a,b 
+  // absolute value of a,b
   vec_uint4 aabs = vec_and((vec_uint4)a, sign_mask);
   vec_uint4 babs = vec_and((vec_uint4)b, sign_mask);
 
@@ -470,7 +470,7 @@ static __inline qword si_dfcmgt(qword a, qword b)
   b_nan = vec_or(b_nan, vec_and((vec_uint4)vec_slo((vec_uchar16)b_nan,x.v),b_inf));
   b_nan = (vec_uint4)vec_perm((vec_uchar16)b_nan, (vec_uchar16)b_nan, splat_hi);
 
-  // A) Check if the exponents are different 
+  // A) Check if the exponents are different
   vec_uint4 gt_hi = (vec_uint4)vec_cmpgt(aabs,babs);
 
   // B) Check if high word equal, and low word greater
@@ -478,7 +478,7 @@ static __inline qword si_dfcmgt(qword a, qword b)
   vec_uint4 eq = (vec_uint4)vec_cmpeq(aabs, babs);
   vec_uint4 eqgt = vec_and(eq,vec_slo(gt_lo,x.v));
 
-  //  If either A or B is true, return true (unless NaNs detected) 
+  //  If either A or B is true, return true (unless NaNs detected)
   vec_uint4 r = vec_or(gt_hi, eqgt);
 
   // splat the high words of the comparison step
@@ -513,19 +513,19 @@ static __inline qword si_fceq(qword a, qword b)
 
 static __inline qword si_ceqbi(qword a, signed char b)
 {
-  return ((qword)(vec_cmpeq((vec_char16)(a), 
+  return ((qword)(vec_cmpeq((vec_char16)(a),
 			    vec_splat((vec_char16)(si_from_char(b)), 3))));
 }
 
 static __inline qword si_ceqhi(qword a, signed short b)
 {
-  return ((qword)(vec_cmpeq((vec_short8)(a), 
+  return ((qword)(vec_cmpeq((vec_short8)(a),
 			  vec_splat((vec_short8)(si_from_short(b)), 1))));
 }
 
 static __inline qword si_ceqi(qword a, signed int b)
 {
-  return ((qword)(vec_cmpeq((vec_int4)(a), 
+  return ((qword)(vec_cmpeq((vec_int4)(a),
 			  vec_splat((vec_int4)(si_from_int(b)), 0))));
 }
 
@@ -560,11 +560,11 @@ static __inline qword si_dfceq(qword a, qword b)
   aabs = vec_and((vec_uint4)a,sign_mask);
   babs = vec_and((vec_uint4)b,sign_mask);
 
-  /*  
+  /*
       B)  Check if a is NaN, store in high word
-        
+
       B1) If the high word is greater than max_exp (indicates a NaN)
-      B2) If the low word is greater than 0 
+      B2) If the low word is greater than 0
   */
   a_gt = (vec_uint4)vec_cmpgt(aabs,nan_mask);
 
@@ -583,7 +583,7 @@ static __inline qword si_dfceq(qword a, qword b)
   result = vec_andc(result, anan);
 
   /*  Promote high words to 64 bits and return  */
-  return ((qword)(vec_perm((vec_uchar16)result, (vec_uchar16)result, hihi_promote))); 
+  return ((qword)(vec_perm((vec_uchar16)result, (vec_uchar16)result, hihi_promote)));
 }
 
 
@@ -639,7 +639,7 @@ static __inline qword si_dfcgt(qword a, qword b)
   /* Shift 4 bytes  */
   x.i[3] = 4 << 3;
 
-  // absolute value of a,b 
+  // absolute value of a,b
   vec_uint4 aabs = vec_and((vec_uint4)a, sign_mask);
   vec_uint4 babs = vec_and((vec_uint4)b, sign_mask);
 
@@ -680,7 +680,7 @@ static __inline qword si_dfcgt(qword a, qword b)
   // pick the one we want
   vec_int4 bval=(vec_int4)vec_sel((vec_uchar16)babs, (vec_uchar16)bneg, (vec_uchar16)bsel);
 
-  // A) Check if the exponents are different 
+  // A) Check if the exponents are different
   vec_uint4 gt_hi = (vec_uint4)vec_cmpgt(aval,bval);
 
   // B) Check if high word equal, and low word greater
@@ -688,7 +688,7 @@ static __inline qword si_dfcgt(qword a, qword b)
   vec_uint4 eq = (vec_uint4)vec_cmpeq(aval, bval);
   vec_uint4 eqgt = vec_and(eq,vec_slo(gt_lo,x.v));
 
-  //  If either A or B is true, return true (unless NaNs detected) 
+  //  If either A or B is true, return true (unless NaNs detected)
   vec_uint4 r = vec_or(gt_hi, eqgt);
 
   // splat the high words of the comparison step
@@ -700,25 +700,25 @@ static __inline qword si_dfcgt(qword a, qword b)
 
 static __inline qword si_cgtbi(qword a, signed char b)
 {
-  return ((qword)(vec_cmpgt((vec_char16)(a), 
+  return ((qword)(vec_cmpgt((vec_char16)(a),
 			    vec_splat((vec_char16)(si_from_char(b)), 3))));
 }
 
 static __inline qword si_cgthi(qword a, signed short b)
 {
-  return ((qword)(vec_cmpgt((vec_short8)(a), 
+  return ((qword)(vec_cmpgt((vec_short8)(a),
 			    vec_splat((vec_short8)(si_from_short(b)), 1))));
 }
 
 static __inline qword si_cgti(qword a, signed int b)
 {
-  return ((qword)(vec_cmpgt((vec_int4)(a), 
+  return ((qword)(vec_cmpgt((vec_int4)(a),
 			    vec_splat((vec_int4)(si_from_int(b)), 0))));
 }
 
 static __inline qword si_clgtbi(qword a, unsigned char b)
 {
-  return ((qword)(vec_cmpgt((vec_uchar16)(a), 
+  return ((qword)(vec_cmpgt((vec_uchar16)(a),
 			    vec_splat((vec_uchar16)(si_from_uchar(b)), 3))));
 }
 
@@ -730,7 +730,7 @@ static __inline qword si_clgthi(qword a, unsigned short b)
 
 static __inline qword si_clgti(qword a, unsigned int b)
 {
-  return ((qword)(vec_cmpgt((vec_uint4)(a), 
+  return ((qword)(vec_cmpgt((vec_uint4)(a),
 			    vec_splat((vec_uint4)(si_from_uint(b)), 0))));
 }
 
@@ -742,7 +742,7 @@ static __inline qword si_dftsv(qword a, char b)
   vec_uint4 sign = (vec_uint4)vec_sra((vec_int4)(a), (vec_uint4)vec_splat(((vec_uint4)si_from_int(31)), 0));
   sign = (vec_uint4)vec_perm((vec_uchar16)sign,(vec_uchar16)sign,splat_hi);
   vec_uint4 aabs = vec_and((vec_uint4)a,sign_mask);
-  
+
   union {
     vec_uchar16 v;
     int i[4];
@@ -750,7 +750,7 @@ static __inline qword si_dftsv(qword a, char b)
 
   /* Shift 4 bytes  */
   x.i[3] = 4 << 3;
-  
+
   /* Nan or +inf or -inf  */
   if (b & 0x70)
   {
@@ -761,21 +761,21 @@ static __inline qword si_dftsv(qword a, char b)
      {
        vec_uint4 a_nan = (vec_uint4)vec_cmpgt(aabs, nan_mask);
        a_nan = vec_or(a_nan, vec_and((vec_uint4)vec_slo((vec_uchar16)a_nan,x.v),a_inf));
-       a_nan = (vec_uint4)vec_perm((vec_uchar16)a_nan, (vec_uchar16)a_nan, splat_hi); 
+       a_nan = (vec_uint4)vec_perm((vec_uchar16)a_nan, (vec_uchar16)a_nan, splat_hi);
        result = vec_or(result, a_nan);
      }
-     /* inf  */ 
+     /* inf  */
      if (b & 0x30)
      {
        a_inf = vec_and((vec_uint4)vec_slo((vec_uchar16)a_inf,x.v), a_inf);
-       a_inf = (vec_uint4)vec_perm((vec_uchar16)a_inf, (vec_uchar16)a_inf, splat_hi); 
+       a_inf = (vec_uint4)vec_perm((vec_uchar16)a_inf, (vec_uchar16)a_inf, splat_hi);
         /* +inf  */
         if (b & 0x20)
           result = vec_or(vec_andc(a_inf, sign), result);
         /* -inf  */
         if (b & 0x10)
           result = vec_or(vec_and(a_inf, sign), result);
-     } 
+     }
   }
   /* 0 or denorm  */
   if (b & 0xF)
@@ -860,7 +860,7 @@ static __inline qword si_clz(qword a)
   cnt = vec_add(cnt, vec_and(tmp1, vec_cmpeq(cnt, eight)));
   cnt = vec_add(cnt, vec_and(tmp2, vec_cmpeq(cnt, sixteen)));
   cnt = vec_add(cnt, vec_and(tmp3, vec_cmpeq(cnt, twentyfour)));
-  
+
   return (qword)((vec_sr((vec_uint4)(cnt), (vec_uint4)(twentyfour))));
 }
 
@@ -901,7 +901,7 @@ static __inline qword si_xsbh(qword a)
   vec_char16 av;
 
   av = (vec_char16)(a);
-  return ((qword)(vec_unpackh(vec_perm(av, av, ((vec_uchar16){1, 3, 5, 7, 9,11,13,15, 
+  return ((qword)(vec_unpackh(vec_perm(av, av, ((vec_uchar16){1, 3, 5, 7, 9,11,13,15,
 						              0, 0, 0, 0, 0, 0, 0, 0})))));
 }
 
@@ -910,9 +910,9 @@ static __inline qword si_xshw(qword a)
   vec_short8 av;
 
   av = (vec_short8)(a);
-  return ((qword)(vec_unpackh(vec_perm(av, av, ((vec_uchar16){2, 3, 6, 7, 
+  return ((qword)(vec_unpackh(vec_perm(av, av, ((vec_uchar16){2, 3, 6, 7,
 					                      10,11,14,15,
-							      0, 0, 0, 0, 
+							      0, 0, 0, 0,
 						              0, 0, 0, 0})))));
 }
 
@@ -921,10 +921,10 @@ static __inline qword si_xswd(qword a)
   vec_int4 av;
 
   av = (vec_int4)(a);
-  return ((qword)(vec_perm(av, vec_sra(av, ((vec_uint4){31,31,31,31})), 
-			   ((vec_uchar16){20, 21, 22, 23,  
-					   4,  5,  6,  7, 
-				          28, 29, 30, 31, 
+  return ((qword)(vec_perm(av, vec_sra(av, ((vec_uint4){31,31,31,31})),
+			   ((vec_uchar16){20, 21, 22, 23,
+					   4,  5,  6,  7,
+				          28, 29, 30, 31,
 				          12, 13, 14, 15}))));
 }
 
@@ -984,7 +984,7 @@ static __inline qword si_gb(qword a)
 }
 
 
-/* Compare and halt 
+/* Compare and halt
  */
 static __inline void si_heq(qword a, qword b)
 {
@@ -1066,8 +1066,8 @@ static __inline void si_hlgti(qword a, unsigned int b)
  */
 static __inline qword si_mpya(qword a, qword b, qword c)
 {
-  return ((qword)(vec_msum(vec_and((vec_short8)(a), 
-				   ((vec_short8){0, -1, 0, -1, 0, -1, 0, -1})), 
+  return ((qword)(vec_msum(vec_and((vec_short8)(a),
+				   ((vec_short8){0, -1, 0, -1, 0, -1, 0, -1})),
 			   (vec_short8)(b), (vec_int4)(c))));
 }
 
@@ -1116,7 +1116,7 @@ static __inline qword si_fsmh(qword a)
 
   in = (vec_uchar16)(a);
   mask = (vec_short8)(vec_splat(in, 3));
-  return ((qword)(vec_sra(vec_sl(mask, ((vec_ushort8){0, 1, 2, 3, 4, 5, 6, 7})), 
+  return ((qword)(vec_sra(vec_sl(mask, ((vec_ushort8){0, 1, 2, 3, 4, 5, 6, 7})),
 			  vec_splat_u16(15))));
 }
 
@@ -1155,7 +1155,7 @@ static __inline qword si_mpyhhau(qword a, qword b, qword c)
  */
 static __inline qword si_fms(qword a, qword b, qword c)
 {
-  return ((qword)(vec_madd((vec_float4)(a), (vec_float4)(b), 
+  return ((qword)(vec_madd((vec_float4)(a), (vec_float4)(b),
 			   vec_sub(((vec_float4){0.0f}), (vec_float4)(c)))));
 }
 
@@ -1231,13 +1231,13 @@ static __inline qword si_mpyu(qword a, qword b)
 
 static __inline qword si_mpyi(qword a, short b)
 {
-  return ((qword)(vec_mulo((vec_short8)(a), 
+  return ((qword)(vec_mulo((vec_short8)(a),
 			   vec_splat((vec_short8)(si_from_short(b)), 1))));
 }
 
 static __inline qword si_mpyui(qword a, unsigned short b)
 {
-  return ((qword)(vec_mulo((vec_ushort8)(a), 
+  return ((qword)(vec_mulo((vec_ushort8)(a),
 			   vec_splat((vec_ushort8)(si_from_ushort(b)), 1))));
 }
 
@@ -1313,19 +1313,19 @@ static __inline qword si_or(qword a, qword b)
 
 static __inline qword si_orbi(qword a, unsigned char b)
 {
-  return ((qword)(vec_or((vec_uchar16)(a), 
+  return ((qword)(vec_or((vec_uchar16)(a),
 			 vec_splat((vec_uchar16)(si_from_uchar(b)), 3))));
 }
 
 static __inline qword si_orhi(qword a, unsigned short b)
 {
-  return ((qword)(vec_or((vec_ushort8)(a), 
+  return ((qword)(vec_or((vec_ushort8)(a),
 			  vec_splat((vec_ushort8)(si_from_ushort(b)), 1))));
 }
 
 static __inline qword si_ori(qword a, unsigned int b)
 {
-  return ((qword)(vec_or((vec_uint4)(a), 
+  return ((qword)(vec_or((vec_uint4)(a),
 			  vec_splat((vec_uint4)(si_from_uint(b)), 0))));
 }
 
@@ -1384,13 +1384,13 @@ static __inline qword si_rot(qword a, qword b)
 
 static __inline qword si_rothi(qword a, int b)
 {
-  return ((qword)(vec_rl((vec_ushort8)(a), 
+  return ((qword)(vec_rl((vec_ushort8)(a),
 			 vec_splat((vec_ushort8)(si_from_int(b)), 1))));
 }
 
 static __inline qword si_roti(qword a, int b)
 {
-  return ((qword)(vec_rl((vec_uint4)(a), 
+  return ((qword)(vec_rl((vec_uint4)(a),
 			 vec_splat((vec_uint4)(si_from_int(b)), 0))));
 }
 
@@ -1526,7 +1526,7 @@ static __inline qword si_rotqbyi(qword a, int count)
     vec_uchar16 v;
     int i[4];
   } left, right;
- 
+
   count <<= 3;
   left.i[3] = count;
   right.i[3] = 0 - count;
@@ -1536,7 +1536,7 @@ static __inline qword si_rotqbyi(qword a, int count)
 static __inline qword si_rotqby(qword a, qword count)
 {
   vec_uchar16 left, right;
- 
+
   left = vec_sl(vec_splat((vec_uchar16)(count), 3), vec_splat_u8(3));
   right = vec_sub(vec_splat_u8(0), left);
   return ((qword)(vec_or(vec_slo((vec_uchar16)(a), left), vec_sro((vec_uchar16)(a), right))));
@@ -1560,7 +1560,7 @@ static __inline qword si_rotqbii(qword a, int count)
 {
   vec_uchar16 x, y;
   vec_uchar16 result;
- 
+
   x = vec_splat((vec_uchar16)(si_from_int(count & 7)), 3);
   y = (vec_uchar16)(vec_sr((vec_uint4)vec_sro((vec_uchar16)(a), ((vec_uchar16)((vec_uint4){0,0,0,120}))),
 			   (vec_uint4)vec_sub(vec_splat_u8(8), x)));
@@ -1572,11 +1572,11 @@ static __inline qword si_rotqbi(qword a, qword count)
 {
   vec_uchar16 x, y;
   vec_uchar16 result;
- 
+
   x = vec_and(vec_splat((vec_uchar16)(count), 3), vec_splat_u8(7));
   y = (vec_uchar16)(vec_sr((vec_uint4)vec_sro((vec_uchar16)(a), ((vec_uchar16)((vec_uint4){0,0,0,120}))),
 			   (vec_uint4)vec_sub(vec_splat_u8(8), x)));
-  
+
   result = vec_or(vec_sll((qword)(a), x), y);
   return ((qword)(result));
 }
@@ -1652,10 +1652,10 @@ static __inline qword si_shufb(qword a, qword b, qword pattern)
 {
   vec_uchar16 pat;
 
-  pat = vec_sel(((vec_uchar16){0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15}), 
+  pat = vec_sel(((vec_uchar16){0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10,11,12,13,14,15}),
 		vec_sr((vec_uchar16)(pattern), vec_splat_u8(3)),
 		vec_sra((vec_uchar16)(pattern), vec_splat_u8(7)));
-  return ((qword)(vec_perm(vec_perm(a, b, pattern), 
+  return ((qword)(vec_perm(vec_perm(a, b, pattern),
 			   ((vec_uchar16){0, 0, 0, 0, 0, 0, 0, 0,
 				          0xFF, 0xFF, 0xFF, 0xFF, 0x80, 0x80, 0x80, 0x80}),
 			   pat)));
@@ -1831,7 +1831,7 @@ static __inline qword si_sumb(qword a, qword b)
 {
   vec_uint4 zero = (vec_uint4){0};
   vec_ushort8 sum_a, sum_b;
-  
+
   sum_a = (vec_ushort8)vec_sum4s((vec_uchar16)(a), zero);
   sum_b = (vec_ushort8)vec_sum4s((vec_uchar16)(b), zero);
 
@@ -1848,19 +1848,19 @@ static __inline qword si_xor(qword a, qword b)
 
 static __inline qword si_xorbi(qword a, unsigned char b)
 {
-  return ((qword)(vec_xor((vec_uchar16)(a), 
+  return ((qword)(vec_xor((vec_uchar16)(a),
 			  vec_splat((vec_uchar16)(si_from_uchar(b)), 3))));
 }
 
 static __inline qword si_xorhi(qword a, unsigned short b)
 {
-  return ((qword)(vec_xor((vec_ushort8)(a), 
+  return ((qword)(vec_xor((vec_ushort8)(a),
 			  vec_splat((vec_ushort8)(si_from_ushort(b)), 1))));
 }
 
 static __inline qword si_xori(qword a, unsigned int b)
 {
-  return ((qword)(vec_xor((vec_uint4)(a), 
+  return ((qword)(vec_xor((vec_uint4)(a),
 			  vec_splat((vec_uint4)(si_from_uint(b)), 0))));
 }
 
@@ -2038,7 +2038,7 @@ static __inline void si_stqr(qword a, unsigned int imm)
 
 static __inline void si_stqx(qword a, qword b, qword c)
 {
-  vec_st((vec_uchar16)(a), 
+  vec_st((vec_uchar16)(a),
 	 si_to_uint((qword)(vec_add((vec_uint4)(b), (vec_uint4)(c)))),
 	 (vector unsigned char *)(0));
 }
diff --git a/gcc/config/rs6000/smmintrin.h b/gcc/config/rs6000/smmintrin.h
index 09e9d3d..2684c32 100644
--- a/gcc/config/rs6000/smmintrin.h
+++ b/gcc/config/rs6000/smmintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2018-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2018-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/rs6000/spu2vmx.h b/gcc/config/rs6000/spu2vmx.h
index b2573a9..e9805d7 100644
--- a/gcc/config/rs6000/spu2vmx.h
+++ b/gcc/config/rs6000/spu2vmx.h
@@ -1,9 +1,9 @@
 /* Cell SPU 2 VMX intrinsics header
-   Copyright (C) 2007-2024 Free Software Foundation, Inc.
+   Copyright (C) 2007-2025 Free Software Foundation, Inc.
 
    This file is free software; you can redistribute it and/or modify it under
    the terms of the GNU General Public License as published by the Free
-   Software Foundation; either version 3 of the License, or (at your option) 
+   Software Foundation; either version 3 of the License, or (at your option)
    any later version.
 
    This file is distributed in the hope that it will be useful, but WITHOUT
@@ -202,7 +202,7 @@ static __inline vec_int4 spu_and(vec_int4 a, signed int b)
  * =======
  */
 #define spu_avg(_a, _b)		vec_avg(_a, _b)
-  
+
 
 /* spu_bisled
  * spu_bisled_d
@@ -1070,12 +1070,12 @@ static __inline vec_float4 spu_nand(vec_float4 a, vec_float4 b)
 
 static __inline vec_ullong2 spu_nand(vec_ullong2 a, vec_ullong2 b)
 {
-  return ((vec_ullong2)(si_nand((qword)(a), (qword)(b)))); 
+  return ((vec_ullong2)(si_nand((qword)(a), (qword)(b))));
 }
 
 static __inline vec_llong2 spu_nand(vec_llong2 a, vec_llong2 b)
 {
-  return ((vec_llong2)(si_nand((qword)(a), (qword)(b)))); 
+  return ((vec_llong2)(si_nand((qword)(a), (qword)(b))));
 }
 
 static __inline vec_double2 spu_nand(vec_double2 a, vec_double2 b)
@@ -1653,7 +1653,7 @@ static __inline vec_double2 spu_rlmaskqwbytebc(vec_double2 a, int count)
 static __inline vec_uchar16 spu_rlqwbyte(vec_uchar16 a, int count)
 {
   return ((vec_uchar16)(si_rotqby((qword)(a), si_from_int(count))));
-}  
+}
 
 static __inline vec_char16 spu_rlqwbyte(vec_char16 a, int count)
 {
@@ -1663,7 +1663,7 @@ static __inline vec_char16 spu_rlqwbyte(vec_char16 a, int count)
 static __inline vec_ushort8 spu_rlqwbyte(vec_ushort8 a, int count)
 {
   return ((vec_ushort8)(si_rotqby((qword)(a), si_from_int(count))));
-}  
+}
 
 static __inline vec_short8 spu_rlqwbyte(vec_short8 a, int count)
 {
@@ -2304,7 +2304,7 @@ static __inline vec_int4 spu_subx(vec_int4 a, vec_int4 b, vec_int4 c)
 static __inline vec_ushort8 spu_sumb(vec_uchar16 a, vec_uchar16 b)
 {
   return ((vec_ushort8)(si_sumb((qword)(a), (qword)(b))));
-}  
+}
 
 
 /* spu_sync
diff --git a/gcc/config/rs6000/sync.md b/gcc/config/rs6000/sync.md
index 425eff8..f0ac334 100644
--- a/gcc/config/rs6000/sync.md
+++ b/gcc/config/rs6000/sync.md
@@ -1,5 +1,5 @@
 ;; Machine description for PowerPC synchronization instructions.
-;; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;; Contributed by Geoffrey Keating.
 
 ;; This file is part of GCC.
diff --git a/gcc/config/rs6000/sysv4.h b/gcc/config/rs6000/sysv4.h
index e997dd7..afb5c6a 100644
--- a/gcc/config/rs6000/sysv4.h
+++ b/gcc/config/rs6000/sysv4.h
@@ -1,5 +1,5 @@
 /* Target definitions for GNU compiler for PowerPC running System V.4
-   Copyright (C) 1995-2024 Free Software Foundation, Inc.
+   Copyright (C) 1995-2025 Free Software Foundation, Inc.
    Contributed by Cygnus Support.
 
    This file is part of GCC.
diff --git a/gcc/config/rs6000/sysv4.opt b/gcc/config/rs6000/sysv4.opt
index fdfe665..dc0b0e8 100644
--- a/gcc/config/rs6000/sysv4.opt
+++ b/gcc/config/rs6000/sysv4.opt
@@ -1,6 +1,6 @@
 ; SYSV4 options for PPC port.
 ;
-; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ; Contributed by Aldy Hernandez <aldy@quesejoda.com>.
 ;
 ; This file is part of GCC.
diff --git a/gcc/config/rs6000/sysv4le.h b/gcc/config/rs6000/sysv4le.h
index 018f0a2..52cbff0 100644
--- a/gcc/config/rs6000/sysv4le.h
+++ b/gcc/config/rs6000/sysv4le.h
@@ -1,6 +1,6 @@
 /* Target definitions for GCC for a little endian PowerPC
    running System V.4
-   Copyright (C) 1995-2024 Free Software Foundation, Inc.
+   Copyright (C) 1995-2025 Free Software Foundation, Inc.
    Contributed by Cygnus Support.
 
    This file is part of GCC.
diff --git a/gcc/config/rs6000/t-aix52 b/gcc/config/rs6000/t-aix52
index 66274d2..a32b479 100644
--- a/gcc/config/rs6000/t-aix52
+++ b/gcc/config/rs6000/t-aix52
@@ -1,4 +1,4 @@
-# Copyright (C) 2002-2024 Free Software Foundation, Inc.
+# Copyright (C) 2002-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/rs6000/t-aix64 b/gcc/config/rs6000/t-aix64
index 6dc86e2..60d1160 100644
--- a/gcc/config/rs6000/t-aix64
+++ b/gcc/config/rs6000/t-aix64
@@ -1,4 +1,4 @@
-# Copyright (C) 2002-2024 Free Software Foundation, Inc.
+# Copyright (C) 2002-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/rs6000/t-fprules b/gcc/config/rs6000/t-fprules
index e0a836e..7f039b0 100644
--- a/gcc/config/rs6000/t-fprules
+++ b/gcc/config/rs6000/t-fprules
@@ -1,4 +1,4 @@
-# Copyright (C) 2002-2024 Free Software Foundation, Inc.
+# Copyright (C) 2002-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/rs6000/t-freebsd64 b/gcc/config/rs6000/t-freebsd64
index 3c1cc6d..33b5581 100644
--- a/gcc/config/rs6000/t-freebsd64
+++ b/gcc/config/rs6000/t-freebsd64
@@ -1,6 +1,6 @@
 #rs6000/t-freebsd64
 
-# Copyright (C) 2012-2024 Free Software Foundation, Inc.
+# Copyright (C) 2012-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/rs6000/t-linux64 b/gcc/config/rs6000/t-linux64
index 248b589..f56b47c 100644
--- a/gcc/config/rs6000/t-linux64
+++ b/gcc/config/rs6000/t-linux64
@@ -1,6 +1,6 @@
 #rs6000/t-linux64
 
-# Copyright (C) 2002-2024 Free Software Foundation, Inc.
+# Copyright (C) 2002-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/rs6000/t-lynx b/gcc/config/rs6000/t-lynx
index c513aed..1cb3663 100644
--- a/gcc/config/rs6000/t-lynx
+++ b/gcc/config/rs6000/t-lynx
@@ -1,4 +1,4 @@
-# Copyright (C) 2004-2024 Free Software Foundation, Inc.
+# Copyright (C) 2004-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/rs6000/t-netbsd b/gcc/config/rs6000/t-netbsd
index be90f57..d5a0bba 100644
--- a/gcc/config/rs6000/t-netbsd
+++ b/gcc/config/rs6000/t-netbsd
@@ -1,6 +1,6 @@
 # Support for NetBSD PowerPC ELF targets (SVR4 ABI).
 #
-# Copyright (C) 2002-2024 Free Software Foundation, Inc.
+# Copyright (C) 2002-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/rs6000/t-ppccomm b/gcc/config/rs6000/t-ppccomm
index 0c4f74e..31cf67e 100644
--- a/gcc/config/rs6000/t-ppccomm
+++ b/gcc/config/rs6000/t-ppccomm
@@ -1,6 +1,6 @@
 # Common support for PowerPC ELF targets (both EABI and SVR4).
 #
-# Copyright (C) 1996-2024 Free Software Foundation, Inc.
+# Copyright (C) 1996-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/rs6000/t-ppcendian b/gcc/config/rs6000/t-ppcendian
index 780af623..de2b643 100644
--- a/gcc/config/rs6000/t-ppcendian
+++ b/gcc/config/rs6000/t-ppcendian
@@ -1,6 +1,6 @@
 # Multilibs for powerpc embedded ELF targets with altivec.
 #
-# Copyright (C) 2002-2024 Free Software Foundation, Inc.
+# Copyright (C) 2002-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/rs6000/t-ppcgas b/gcc/config/rs6000/t-ppcgas
index 6c2a32c..2106bb4 100644
--- a/gcc/config/rs6000/t-ppcgas
+++ b/gcc/config/rs6000/t-ppcgas
@@ -1,6 +1,6 @@
 # Multilibs for powerpc embedded ELF targets.
 #
-# Copyright (C) 1995-2024 Free Software Foundation, Inc.
+# Copyright (C) 1995-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000
index 155788d..a5d1c27 100644
--- a/gcc/config/rs6000/t-rs6000
+++ b/gcc/config/rs6000/t-rs6000
@@ -1,6 +1,6 @@
 # General rules that all rs6000/ targets must have.
 #
-# Copyright (C) 1995-2024 Free Software Foundation, Inc.
+# Copyright (C) 1995-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/rs6000/t-rtems b/gcc/config/rs6000/t-rtems
index e2e7d19..81db444 100644
--- a/gcc/config/rs6000/t-rtems
+++ b/gcc/config/rs6000/t-rtems
@@ -1,6 +1,6 @@
 # Multilibs for powerpc RTEMS targets.
 #
-# Copyright (C) 2004-2024 Free Software Foundation, Inc.
+# Copyright (C) 2004-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/rs6000/t-vxworks b/gcc/config/rs6000/t-vxworks
index 0c1c4fb..152028b 100644
--- a/gcc/config/rs6000/t-vxworks
+++ b/gcc/config/rs6000/t-vxworks
@@ -1,6 +1,6 @@
 # Multilibs for VxWorks.
 #
-# Copyright (C) 2002-2024 Free Software Foundation, Inc.
+# Copyright (C) 2002-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/rs6000/titan.md b/gcc/config/rs6000/titan.md
index 965afd9..ad0c389 100644
--- a/gcc/config/rs6000/titan.md
+++ b/gcc/config/rs6000/titan.md
@@ -1,5 +1,5 @@
 ;; Pipeline description for the AppliedMicro Titan core.
-;;   Copyright (C) 2010-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2010-2025 Free Software Foundation, Inc.
 ;;   Contributed by Theobroma Systems Design und Consulting GmbH
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/rs6000/tmmintrin.h b/gcc/config/rs6000/tmmintrin.h
index d547d78..f4b30f8 100644
--- a/gcc/config/rs6000/tmmintrin.h
+++ b/gcc/config/rs6000/tmmintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2003-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2003-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/rs6000/vec_types.h b/gcc/config/rs6000/vec_types.h
index 6d629cc..d712716 100644
--- a/gcc/config/rs6000/vec_types.h
+++ b/gcc/config/rs6000/vec_types.h
@@ -1,9 +1,9 @@
 /* Cell single token vector types
-   Copyright (C) 2007-2024 Free Software Foundation, Inc.
+   Copyright (C) 2007-2025 Free Software Foundation, Inc.
 
    This file is free software; you can redistribute it and/or modify it under
    the terms of the GNU General Public License as published by the Free
-   Software Foundation; either version 3 of the License, or (at your option) 
+   Software Foundation; either version 3 of the License, or (at your option)
    any later version.
 
    This file is distributed in the hope that it will be useful, but WITHOUT
@@ -20,7 +20,7 @@
    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
    <http://www.gnu.org/licenses/>.  */
 
-/* Single token vector data types for the PowerPC SIMD/Vector Multi-media 
+/* Single token vector data types for the PowerPC SIMD/Vector Multi-media
    eXtension */
 
 #ifndef _VEC_TYPES_H_
diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md
index 524ba87..f579738 100644
--- a/gcc/config/rs6000/vector.md
+++ b/gcc/config/rs6000/vector.md
@@ -3,7 +3,7 @@
 ;; expander, and the actual vector instructions will be in altivec.md and
 ;; vsx.md
 
-;; Copyright (C) 2009-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2009-2025 Free Software Foundation, Inc.
 ;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>
 
 ;; This file is part of GCC.
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index b2fc39a..dd3573b 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -1,5 +1,5 @@
 ;; VSX patterns.
-;; Copyright (C) 2009-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2009-2025 Free Software Foundation, Inc.
 ;; Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>
 
 ;; This file is part of GCC.
@@ -3453,12 +3453,13 @@
 
 (define_insn_and_split "vsx_stxvd2x4_le_const_<mode>"
   [(set (match_operand:VSX_W 0 "memory_operand" "=Z")
-	(match_operand:VSX_W 1 "immediate_operand" "W"))]
+	(match_operand:VSX_W 1 "immediate_operand" "W"))
+   (clobber (match_scratch:VSX_W 2 "=wa"))]
   "!BYTES_BIG_ENDIAN
    && VECTOR_MEM_VSX_P (<MODE>mode)
    && !TARGET_P9_VECTOR
-   && const_vec_duplicate_p (operands[1])
-   && can_create_pseudo_p ()"
+   && !altivec_indexed_or_indirect_operand (operands[0], <MODE>mode)
+   && const_vec_duplicate_p (operands[1])"
   "#"
   "&& 1"
   [(set (match_dup 2)
@@ -3471,7 +3472,8 @@
 {
   /* Here all the constants must be loaded without memory.  */
   gcc_assert (easy_altivec_constant (operands[1], <MODE>mode));
-  operands[2] = gen_reg_rtx (<MODE>mode);
+  if (GET_CODE (operands[2]) == SCRATCH)
+    operands[2] = gen_reg_rtx (<MODE>mode);
 }
   [(set_attr "type" "vecstore")
    (set_attr "length" "8")])
@@ -6338,7 +6340,7 @@
    (set (match_operand:SF SFBOOL_MTVSR_D "vsx_register_operand")
 	(unspec:SF [(match_dup SFBOOL_SHL_D)] UNSPEC_P8V_MTVSRD))]
 
-  "TARGET_POWERPC64 && TARGET_DIRECT_MOVE
+  "TARGET_DIRECT_MOVE_64BIT
    /* The REG_P (xxx) tests prevents SUBREG's, which allows us to use REGNO
       to compare registers, when the mode is different.  */
    && REG_P (operands[SFBOOL_MFVSR_D]) && REG_P (operands[SFBOOL_BOOL_D])
diff --git a/gcc/config/rs6000/vxworks.h b/gcc/config/rs6000/vxworks.h
index 084cd40..e77247b 100644
--- a/gcc/config/rs6000/vxworks.h
+++ b/gcc/config/rs6000/vxworks.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler.  Vxworks PowerPC version.
-   Copyright (C) 1996-2024 Free Software Foundation, Inc.
+   Copyright (C) 1996-2025 Free Software Foundation, Inc.
    Contributed by CodeSourcery, LLC.
 
 This file is part of GCC.
@@ -34,6 +34,21 @@ along with GCC; see the file COPYING3.  If not see
 /* Common definitions first.                                   */
 /*-------------------------------------------------------------*/
 
+/* Default to 64 bits when the target is powerpc64*-wrs-vxworks*,
+   and to 32 bits otherwise.  */
+#undef SUBTARGET_DRIVER_SELF_SPECS
+#if TARGET_VXWORKS64
+#define SUBTARGET_DRIVER_SELF_SPECS "%{!m64:%{!m32:-m64}}"
+#else
+#define SUBTARGET_DRIVER_SELF_SPECS "%{!m32:%{!m64:-m32}}"
+#endif
+
+/* Having used the build-time TARGET_VXWORKS64 to choose the default ABI above,
+   redefine it so that it matches whichever ABI is selected for each
+   compilation.  */
+#undef TARGET_VXWORKS64
+#define TARGET_VXWORKS64 TARGET_64BIT
+
 /* CPP predefined macros.  */
 
 #undef TARGET_OS_CPP_BUILTINS
diff --git a/gcc/config/rs6000/vxworksae.h b/gcc/config/rs6000/vxworksae.h
index c4f2c73..fd11017 100644
--- a/gcc/config/rs6000/vxworksae.h
+++ b/gcc/config/rs6000/vxworksae.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler.  PowerPC VxworksAE version.
-   Copyright (C) 2005-2024 Free Software Foundation, Inc.
+   Copyright (C) 2005-2025 Free Software Foundation, Inc.
    Contributed by CodeSourcery, LLC.
 
 This file is part of GCC.
diff --git a/gcc/config/rs6000/vxworksmils.h b/gcc/config/rs6000/vxworksmils.h
index 4bd5fc5..a767b3b 100644
--- a/gcc/config/rs6000/vxworksmils.h
+++ b/gcc/config/rs6000/vxworksmils.h
@@ -1,7 +1,7 @@
 /* PowerPC VxWorks MILS target definitions for GNU compiler.  Overrides
    on top of the canonical VxWorks definitions.
 
-   Copyright (C) 2014-2024 Free Software Foundation, Inc.
+   Copyright (C) 2014-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/rs6000/x86gprintrin.h b/gcc/config/rs6000/x86gprintrin.h
index 3340d73..a7224af 100644
--- a/gcc/config/rs6000/x86gprintrin.h
+++ b/gcc/config/rs6000/x86gprintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2021-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2021-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/rs6000/x86intrin.h b/gcc/config/rs6000/x86intrin.h
index 1c16b724..3d91d83 100644
--- a/gcc/config/rs6000/x86intrin.h
+++ b/gcc/config/rs6000/x86intrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2008-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2008-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/rs6000/xcoff.h b/gcc/config/rs6000/xcoff.h
index c22edd7..27de1e6 100644
--- a/gcc/config/rs6000/xcoff.h
+++ b/gcc/config/rs6000/xcoff.h
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-3.0-or-later
 /* Definitions of target machine for GNU compiler,
    for some generic XCOFF file format
-   Copyright (C) 2001-2024 Free Software Foundation, Inc.
+   Copyright (C) 2001-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
@@ -28,11 +28,11 @@
 #define OBJECT_FORMAT_COFF
 
 /* Define the magic numbers that we recognize as COFF.
- 
+
     AIX 4.3 adds U803XTOCMAGIC (0757) for 64-bit objects and AIX V5 adds
     U64_TOCMAGIC (0767), but collect2.cc does not include files in the
     correct order to conditionally define the symbolic name in this macro.
- 
+
     The AIX linker accepts import/export files as object files,
     so accept "#!" (0x2321) magic number.  */
 #define MY_ISCOFF(magic) \
@@ -233,7 +233,7 @@
 /* This is how we tell the assembler that two symbols have the same value.  */
 #define SET_ASM_OP "\t.set "
 
-/* This is how we tell the assembler to equate two values. 
+/* This is how we tell the assembler to equate two values.
    The semantic of AIX assembler's .set do not correspond to middle-end expectations.
    We output aliases as alternative symbols in the front of the definition
    via DECLARE_FUNCTION_NAME and DECLARE_OBJECT_NAME.
diff --git a/gcc/config/rs6000/xmmintrin.h b/gcc/config/rs6000/xmmintrin.h
index 6f6a9af..6c226d9 100644
--- a/gcc/config/rs6000/xmmintrin.h
+++ b/gcc/config/rs6000/xmmintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2002-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2002-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/rtems.h b/gcc/config/rtems.h
index cd5db38..a5b534c 100644
--- a/gcc/config/rtems.h
+++ b/gcc/config/rtems.h
@@ -1,5 +1,5 @@
-/* Configuration common to all targets running RTEMS. 
-   Copyright (C) 2000-2024 Free Software Foundation, Inc.
+/* Configuration common to all targets running RTEMS.
+   Copyright (C) 2000-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/rtems.opt b/gcc/config/rtems.opt
index b21113e..e47d5f3 100644
--- a/gcc/config/rtems.opt
+++ b/gcc/config/rtems.opt
@@ -1,6 +1,6 @@
 ; RTEMS options.
 
-; Copyright (C) 2010-2024 Free Software Foundation, Inc.
+; Copyright (C) 2010-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/rx/constraints.md b/gcc/config/rx/constraints.md
index c5188a3..4b81ae6 100644
--- a/gcc/config/rx/constraints.md
+++ b/gcc/config/rx/constraints.md
@@ -1,5 +1,5 @@
 ;; Constraint definitions for Renesas RX.
-;; Copyright (C) 2008-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2008-2025 Free Software Foundation, Inc.
 ;; Contributed by Red Hat.
 ;;
 ;; This file is part of GCC.
@@ -80,7 +80,8 @@
        (ior (match_code "reg" "0")
 	    (and (match_code "plus" "0")
 	         (and (match_code "reg,subreg" "00")
-		      (match_code "const_int" "01")
+		      (and (match_code "const_int" "01")
+		           (match_test "rx_is_restricted_memory_address (XEXP (op, 0), mode)"))
 		 )
 	    )
        )
diff --git a/gcc/config/rx/elf.opt b/gcc/config/rx/elf.opt
index a8d1d6b..6f09be1 100644
--- a/gcc/config/rx/elf.opt
+++ b/gcc/config/rx/elf.opt
@@ -1,5 +1,5 @@
 ; Command line options for the Renesas RX port of GCC.
-; Copyright (C) 2008-2024 Free Software Foundation, Inc.
+; Copyright (C) 2008-2025 Free Software Foundation, Inc.
 ; Contributed by Red Hat.
 ;
 ; This file is part of GCC.
diff --git a/gcc/config/rx/predicates.md b/gcc/config/rx/predicates.md
index 31cc7b5..c51a214 100644
--- a/gcc/config/rx/predicates.md
+++ b/gcc/config/rx/predicates.md
@@ -1,5 +1,5 @@
 ;; Predicate definitions for Renesas RX.
-;; Copyright (C) 2008-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2008-2025 Free Software Foundation, Inc.
 ;; Contributed by Red Hat.
 ;;
 ;; This file is part of GCC.
diff --git a/gcc/config/rx/rx-modes.def b/gcc/config/rx/rx-modes.def
index ac49ec5..3598111 100644
--- a/gcc/config/rx/rx-modes.def
+++ b/gcc/config/rx/rx-modes.def
@@ -1,5 +1,5 @@
 /* Definitions of target specific machine modes for the RX.
-   Copyright (C) 2008-2024 Free Software Foundation, Inc.
+   Copyright (C) 2008-2025 Free Software Foundation, Inc.
    Contributed by Red Hat.
 
    This file is part of GCC.
diff --git a/gcc/config/rx/rx-opts.h b/gcc/config/rx/rx-opts.h
index 86cdfb4..ec33d07 100644
--- a/gcc/config/rx/rx-opts.h
+++ b/gcc/config/rx/rx-opts.h
@@ -1,5 +1,5 @@
 /* GCC option-handling definitions for the Renesas RX processor.
-   Copyright (C) 2008-2024 Free Software Foundation, Inc.
+   Copyright (C) 2008-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/rx/rx-protos.h b/gcc/config/rx/rx-protos.h
index 7b46146..0c6e793 100644
--- a/gcc/config/rx/rx-protos.h
+++ b/gcc/config/rx/rx-protos.h
@@ -1,5 +1,5 @@
 /* Exported function prototypes from the Renesas RX backend.
-   Copyright (C) 2008-2024 Free Software Foundation, Inc.
+   Copyright (C) 2008-2025 Free Software Foundation, Inc.
    Contributed by Red Hat.
 
    This file is part of GCC.
diff --git a/gcc/config/rx/rx.cc b/gcc/config/rx/rx.cc
index c84e139..dd730dc 100644
--- a/gcc/config/rx/rx.cc
+++ b/gcc/config/rx/rx.cc
@@ -1,5 +1,5 @@
 /* Subroutines used for code generation on Renesas RX processors.
-   Copyright (C) 2008-2024 Free Software Foundation, Inc.
+   Copyright (C) 2008-2025 Free Software Foundation, Inc.
    Contributed by Red Hat.
 
    This file is part of GCC.
@@ -156,7 +156,7 @@ rx_legitimize_address (rtx x,
 
   if (GET_CODE (x) == PLUS
       && GET_CODE (XEXP (x, 0)) == PLUS
-      && REG_P (XEXP (XEXP (x, 0), 0)) 
+      && REG_P (XEXP (XEXP (x, 0), 0))
       && REG_P (XEXP (x, 1)))
     return force_reg (SImode, x);
 
@@ -232,7 +232,7 @@ rx_is_legitimate_address (machine_mode mode, rtx x,
 
 	    switch (GET_MODE_SIZE (mode))
 	      {
-	      default: 
+	      default:
 	      case 4: factor = 4; break;
 	      case 2: factor = 2; break;
 	      case 1: factor = 1; break;
@@ -299,7 +299,7 @@ rx_is_restricted_memory_address (rtx mem, machine_mode mode)
     case PLUS:
       {
 	rtx base, index;
-	
+
 	/* Only allow REG+INT addressing.  */
 	base = XEXP (mem, 0);
 	index = XEXP (mem, 1);
@@ -688,7 +688,7 @@ rx_print_operand (FILE * file, rtx op, int letter)
 	    fprintf (file, "#");
 	    /* Trickery to avoid problems with shifting 32 bits at a time.  */
 	    v = v >> 16;
-	    v = v >> 16;	  
+	    v = v >> 16;
 	    rx_print_integer (file, v);
 	    break;
 	  }
@@ -1002,14 +1002,14 @@ rx_gen_move_template (rtx * operands, bool is_movu)
     {
       gcc_assert (GET_MODE (src) != DImode);
       gcc_assert (GET_MODE (src) != DFmode);
-      
+
       src_template = "(%A1 - __pid_base)[%P1]";
     }
   else if (MEM_P (src) && rx_small_data_operand (XEXP (src, 0)))
     {
       gcc_assert (GET_MODE (src) != DImode);
       gcc_assert (GET_MODE (src) != DFmode);
-      
+
       src_template = "%%gp(%A1)[%G1]";
     }
   else
@@ -1019,7 +1019,7 @@ rx_gen_move_template (rtx * operands, bool is_movu)
     {
       gcc_assert (GET_MODE (dest) != DImode);
       gcc_assert (GET_MODE (dest) != DFmode);
-      
+
       dst_template = "%%gp(%A0)[%G0]";
     }
   else
@@ -1151,7 +1151,7 @@ rx_function_value (const_tree ret_type,
       && ! VECTOR_MODE_P (mode)
       )
     return gen_rtx_REG (SImode, FUNC_RETURN_REGNUM);
-    
+
   return gen_rtx_REG (mode, FUNC_RETURN_REGNUM);
 }
 
@@ -1279,7 +1279,7 @@ rx_conditional_register_usage (void)
 
 	  /* This is for fast interrupt handlers.  Any register in
 	     the range r10 to r13 (inclusive) that is currently
-	     marked as fixed is now a viable, call-used register.  */	  
+	     marked as fixed is now a viable, call-used register.  */
 	  for (r = 10; r <= 13; r++)
 	    if (fixed_regs[r])
 	      {
@@ -1363,7 +1363,7 @@ rx_set_current_function (tree fndecl)
 
   current_is_fast_interrupt
     = fndecl ? is_fast_interrupt_func (fndecl) : false;
-      
+
   if (prev_was_fast_interrupt != current_is_fast_interrupt)
     {
       use_fixed_regs = current_is_fast_interrupt;
@@ -1790,7 +1790,7 @@ rx_expand_prologue (void)
 		    break;
 		  }
 	      }
-	    
+
 	  /* We have assumed that there are at least two registers pushed... */
 	  gcc_assert (acc_high != 0);
 
@@ -1939,7 +1939,7 @@ rx_emit_stack_popm (rtx * operands, bool is_popm)
 
   gcc_assert (CONST_INT_P (operands[0]));
   stack_adjust = INTVAL (operands[0]);
-  
+
   gcc_assert (GET_CODE (operands[1]) == PARALLEL);
   last_reg = XVECLEN (operands[1], 0) - (is_popm ? 2 : 3);
 
@@ -1987,13 +1987,13 @@ gen_rx_rtsd_vector (unsigned int adjust, unsigned int low, unsigned int high)
 
   return vector;
 }
-  
+
 /* Generate a PARALLEL which will satisfy the rx_load_multiple_vector predicate.  */
 
 static rtx
 gen_rx_popm_vector (unsigned int low, unsigned int high)
 {
-  unsigned int i;  
+  unsigned int i;
   unsigned int count = (high - low) + 2;
   rtx vector;
 
@@ -2877,7 +2877,7 @@ rx_func_attr_inlinable (const_tree decl)
 {
   return ! is_fast_interrupt_func (decl)
     &&   ! is_interrupt_func (decl)
-    &&   ! is_naked_func (decl);  
+    &&   ! is_naked_func (decl);
 }
 
 static bool
@@ -2961,7 +2961,7 @@ rx_is_legitimate_constant (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
 	  gcc_unreachable ();
 	}
       break;
-      
+
     case LABEL_REF:
     case SYMBOL_REF:
       return true;
@@ -3001,7 +3001,7 @@ rx_address_cost (rtx addr, machine_mode mode ATTRIBUTE_UNUSED,
       && ((INTVAL (b) > 128) || INTVAL (b) < -127))
     /* Try to discourage REG + <large OFF> when optimizing for size.  */
     return COSTS_N_INSNS (2);
-    
+
   return COSTS_N_INSNS (1);
 }
 
@@ -3421,7 +3421,7 @@ rx_adjust_insn_length (rtx_insn *insn, int current_length)
       zero = false;
       factor = 2;
       break;
-      
+
     case CODE_FOR_plussi3_zero_extendqi:
     case CODE_FOR_andsi3_zero_extendqi:
     case CODE_FOR_iorsi3_zero_extendqi:
@@ -3436,7 +3436,7 @@ rx_adjust_insn_length (rtx_insn *insn, int current_length)
       zero = true;
       factor = 1;
       break;
-      
+
     case CODE_FOR_plussi3_sign_extendqi:
     case CODE_FOR_andsi3_sign_extendqi:
     case CODE_FOR_iorsi3_sign_extendqi:
@@ -3451,7 +3451,7 @@ rx_adjust_insn_length (rtx_insn *insn, int current_length)
       zero = false;
       factor = 1;
       break;
-    }      
+    }
 
   /* We are expecting: (SET (REG) (<OP> (REG) (<EXTEND> (MEM)))).  */
   extend = single_set (insn);
@@ -3466,7 +3466,7 @@ rx_adjust_insn_length (rtx_insn *insn, int current_length)
 
   gcc_assert ((zero && (GET_CODE (extend) == ZERO_EXTEND))
 	      || (! zero && (GET_CODE (extend) == SIGN_EXTEND)));
-    
+
   mem = XEXP (extend, 0);
   gcc_checking_assert (MEM_P (mem));
   if (REG_P (XEXP (mem, 0)))
@@ -3821,6 +3821,9 @@ rx_c_mode_for_floating_type (enum tree_index ti)
 #undef TARGET_C_MODE_FOR_FLOATING_TYPE
 #define TARGET_C_MODE_FOR_FLOATING_TYPE rx_c_mode_for_floating_type
 
+#undef TARGET_DOCUMENTATION_NAME
+#define TARGET_DOCUMENTATION_NAME "RX"
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-rx.h"
diff --git a/gcc/config/rx/rx.h b/gcc/config/rx/rx.h
index 15d6d25..8c95ad9 100644
--- a/gcc/config/rx/rx.h
+++ b/gcc/config/rx/rx.h
@@ -1,5 +1,5 @@
 /* GCC backend definitions for the Renesas RX processor.
-   Copyright (C) 2008-2024 Free Software Foundation, Inc.
+   Copyright (C) 2008-2025 Free Software Foundation, Inc.
    Contributed by Red Hat.
 
    This file is part of GCC.
diff --git a/gcc/config/rx/rx.md b/gcc/config/rx/rx.md
index f5857cb..a3d966e 100644
--- a/gcc/config/rx/rx.md
+++ b/gcc/config/rx/rx.md
@@ -1,5 +1,5 @@
 ;;  Machine Description for Renesas RX processors
-;;  Copyright (C) 2008-2024 Free Software Foundation, Inc.
+;;  Copyright (C) 2008-2025 Free Software Foundation, Inc.
 ;;  Contributed by Red Hat.
 
 ;; This file is part of GCC.
@@ -2541,10 +2541,17 @@
 	(unspec_volatile:SI [(match_operand:BLK 1 "memory_operand")     ;; String1
 			     (match_operand:BLK 2 "memory_operand")]    ;; String2
 			    UNSPEC_CMPSTRN))
-   (use (match_operand:SI                       3 "register_operand"))  ;; Max Length
+   (use (match_operand:SI                       3 "nonmemory_operand")) ;; Max Length
    (match_operand:SI                            4 "immediate_operand")] ;; Known Align
   "rx_allow_string_insns"
   {
+    bool const_len = CONST_INT_P (operands[3]);
+    if (const_len && operands[3] == CONST0_RTX (SImode))
+      {
+	emit_move_insn (operands[0], CONST0_RTX (SImode));
+	DONE;
+      }
+
     rtx str1 = gen_rtx_REG (SImode, 1);
     rtx str2 = gen_rtx_REG (SImode, 2);
     rtx len  = gen_rtx_REG (SImode, 3);
@@ -2553,6 +2560,13 @@
     emit_move_insn (str2, force_operand (XEXP (operands[2], 0), NULL_RTX));
     emit_move_insn (len, operands[3]);
 
+    /* Set flags in case len is zero */
+    if (!const_len)
+      {
+	emit_insn (gen_setpsw (GEN_INT ('C')));
+	emit_insn (gen_setpsw (GEN_INT ('Z')));
+      }
+
     emit_insn (gen_rx_cmpstrn (operands[0], operands[1], operands[2]));
     DONE;
   }
diff --git a/gcc/config/rx/rx.opt b/gcc/config/rx/rx.opt
index 949ed9e..b3698f1 100644
--- a/gcc/config/rx/rx.opt
+++ b/gcc/config/rx/rx.opt
@@ -1,5 +1,5 @@
 ; Command line options for the Renesas RX port of GCC.
-; Copyright (C) 2008-2024 Free Software Foundation, Inc.
+; Copyright (C) 2008-2025 Free Software Foundation, Inc.
 ; Contributed by Red Hat.
 ;
 ; This file is part of GCC.
diff --git a/gcc/config/rx/rx.opt.urls b/gcc/config/rx/rx.opt.urls
index 7af4bd2..62e2a23 100644
--- a/gcc/config/rx/rx.opt.urls
+++ b/gcc/config/rx/rx.opt.urls
@@ -22,7 +22,7 @@ mlittle-endian-data
 UrlSuffix(gcc/RX-Options.html#index-mlittle-endian-data)
 
 msmall-data-limit=
-UrlSuffix(gcc/RX-Options.html#index-msmall-data-limit-2)
+UrlSuffix(gcc/RX-Options.html#index-msmall-data-limit-1)
 
 mrelax
 UrlSuffix(gcc/RX-Options.html#index-mrelax-7)
diff --git a/gcc/config/rx/t-rx b/gcc/config/rx/t-rx
index 72b9e31..9a43b1f 100644
--- a/gcc/config/rx/t-rx
+++ b/gcc/config/rx/t-rx
@@ -1,5 +1,5 @@
 # Makefile fragment for building GCC for the Renesas RX target.
-# Copyright (C) 2008-2024 Free Software Foundation, Inc.
+# Copyright (C) 2008-2025 Free Software Foundation, Inc.
 # Contributed by Red Hat.
 #
 # This file is part of GCC.
diff --git a/gcc/config/s390/2064.md b/gcc/config/s390/2064.md
index 092f49c..a83ef33 100644
--- a/gcc/config/s390/2064.md
+++ b/gcc/config/s390/2064.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for z900 (cpu 2064).
-;;   Copyright (C) 2003-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2003-2025 Free Software Foundation, Inc.
 ;;   Contributed by Hartmut Penner (hpenner@de.ibm.com) and
 ;;                  Ulrich Weigand (uweigand@de.ibm.com).
 
diff --git a/gcc/config/s390/2084.md b/gcc/config/s390/2084.md
index 5d43a0b..9c97d17 100644
--- a/gcc/config/s390/2084.md
+++ b/gcc/config/s390/2084.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for z990 (cpu 2084).
-;;   Copyright (C) 2003-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2003-2025 Free Software Foundation, Inc.
 ;;   Contributed by Hartmut Penner (hpenner@de.ibm.com) and
 ;;                  Ulrich Weigand (uweigand@de.ibm.com).
 
diff --git a/gcc/config/s390/2097.md b/gcc/config/s390/2097.md
index c0c3c50..d84b207 100644
--- a/gcc/config/s390/2097.md
+++ b/gcc/config/s390/2097.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for z10 (cpu 2097).
-;; Copyright (C) 2008-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2008-2025 Free Software Foundation, Inc.
 ;; Contributed by Wolfgang Gellerich (gellerich@de.ibm.com).
 
 
diff --git a/gcc/config/s390/2817.md b/gcc/config/s390/2817.md
index 29fd3c8..2ff4ee4 100644
--- a/gcc/config/s390/2817.md
+++ b/gcc/config/s390/2817.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for z196 (cpu 2817).
-;;   Copyright (C) 2010-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2010-2025 Free Software Foundation, Inc.
 ;;   Contributed by Christian Borntraeger (Christian.Borntraeger@de.ibm.com)
 ;;                  Andreas Krebbel (Andreas.Krebbel@de.ibm.com)
 
diff --git a/gcc/config/s390/2827.md b/gcc/config/s390/2827.md
index aa5c3a0..438ba70 100644
--- a/gcc/config/s390/2827.md
+++ b/gcc/config/s390/2827.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for zEC12 (cpu 2827).
-;;   Copyright (C) 2012-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2012-2025 Free Software Foundation, Inc.
 ;;   Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com)
 
 ;; This file is part of GCC.
diff --git a/gcc/config/s390/2964.md b/gcc/config/s390/2964.md
index 83f97da..5f39e66 100644
--- a/gcc/config/s390/2964.md
+++ b/gcc/config/s390/2964.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for z13.
-;;   Copyright (C) 2016-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2016-2025 Free Software Foundation, Inc.
 ;;   Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com)
 
 ;; This file is part of GCC.
diff --git a/gcc/config/s390/3906.md b/gcc/config/s390/3906.md
index 7da2e65..9fa56f2 100644
--- a/gcc/config/s390/3906.md
+++ b/gcc/config/s390/3906.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for z14.
-;;   Copyright (C) 2019-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2019-2025 Free Software Foundation, Inc.
 ;;   Contributed by Robin Dapp (rdapp@linux.ibm.com)
 
 ;; This file is part of GCC.
diff --git a/gcc/config/s390/3931.md b/gcc/config/s390/3931.md
index 9f7a4c5..06e1f35 100644
--- a/gcc/config/s390/3931.md
+++ b/gcc/config/s390/3931.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for z16.
-;;   Copyright (C) 2022-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2022-2025 Free Software Foundation, Inc.
 ;;   Contributed by Robin Dapp (rdapp@linux.ibm.com)
 
 ;; This file is part of GCC.
diff --git a/gcc/config/s390/8561.md b/gcc/config/s390/8561.md
index 6e2daf2..b8517b2 100644
--- a/gcc/config/s390/8561.md
+++ b/gcc/config/s390/8561.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for z15.
-;;   Copyright (C) 2019-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2019-2025 Free Software Foundation, Inc.
 ;;   Contributed by Robin Dapp (rdapp@linux.ibm.com)
 ;; This file is part of GCC.
 
diff --git a/gcc/config/s390/9175.md b/gcc/config/s390/9175.md
new file mode 100644
index 0000000..d0ac0e1
--- /dev/null
+++ b/gcc/config/s390/9175.md
@@ -0,0 +1,316 @@
+;; Scheduling description for z17.
+;; Copyright (C) 2025 Free Software Foundation, Inc.
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it under
+;; the terms of the GNU General Public License as published by the Free
+;; Software Foundation; either version 3, or (at your option) any later
+;; version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+;; WARRANTY; without even the implied warranty of MERCHANTABILITY or
+;; FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+;; for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3.  If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_attr "z17_unit_fpd" ""
+ (cond [(eq_attr "mnemonic" "ddb,ddbr,deb,debr,dxbr,sqdb,sqdbr,sqeb,sqebr,\
+sqxbr,vdf,vdg,vdlf,vdlg,vdlq,vdq,vfddb,vfdsb,vfsqdb,vfsqsb,vrf,vrg,vrlf,vrlg,\
+vrlq,vrq,wfddb,wfdsb,wfdxb,wfsqdb,wfsqxb")
+ (const_int 1)] (const_int 0)))
+
+(define_attr "z17_unit_fxa" ""
+ (cond [(eq_attr "mnemonic" "a,afi,ag,agf,agfi,agfr,agh,aghi,aghik,agr,agrk,ah,\
+ahi,ahik,ahy,al,alc,alcg,alcgr,alcr,alfi,alg,algf,algfi,algfr,alghsik,algr,\
+algrk,alhsik,alr,alrk,aly,ar,ark,ay,bdepg,bextg,clzg,ctzg,etnd,flogr,ic,icm,\
+icmh,icmy,icy,iihf,iilf,ipm,la,larl,lay,lb,lbr,lcgfr,lcgr,lcr,lgb,lgbr,lgf,\
+lgfi,lgfr,lgfrl,lgh,lghi,lghr,lghrl,lgr,lh,lhi,lhr,lhrl,lhy,llcr,llgcr,llgfr,\
+llghr,llgtr,llhr,llihf,llihh,llihl,llilf,llilh,llill,llxab,llxaf,llxag,llxah,\
+llxaq,lngfr,lngr,lnr,loc,locg,locghi,locgr,lochi,locr,lpgfr,lpgr,lpr,lr,lrv,\
+lrvg,lrvgr,lrvh,lrvr,lt,ltg,ltgf,ltgfr,ltgr,ltr,lxab,lxaf,lxag,lxah,lxaq,m,mfy,\
+mg,mgh,mghi,mgrk,mh,mhi,mhy,ml,mlg,mlgr,mlr,mr,ms,msc,msfi,msg,msgc,msgf,msgfi,\
+msgfr,msgr,msgrkc,msr,msrkc,msy,n,ncgrk,ncrk,ng,ngr,ngrk,nihf,nihh,nihl,nilf,\
+nilh,nill,nngrk,nnrk,nogrk,nork,nr,nrk,nxgrk,nxrk,ny,o,ocgrk,ocrk,og,ogr,ogrk,\
+oihf,oihh,oihl,oilf,oilh,oill,or,ork,oy,pfpo,popcnt,risbg,risbgn,rll,rllg,\
+rnsbg,rosbg,rxsbg,s,selgr,selr,sg,sgf,sgfr,sgh,sgr,sgrk,sh,shy,sl,slb,slbg,\
+slbgr,slbr,sldl,slfi,slg,slgf,slgfi,slgfr,slgr,slgrk,sll,sllg,sllk,slr,slrk,\
+sly,sr,sra,srag,srak,srda,srdl,srk,srl,srlg,srlk,sy,x,xg,xgr,xgrk,xihf,xilf,xr,\
+xrk,xy")
+ (const_int 1)] (const_int 0)))
+
+(define_attr "z17_unit_fxb" ""
+ (cond [(eq_attr "mnemonic" "agsi,algsi,alsi,asi,b,bc,bcr,bi,br,c,cfi,cg,cgf,\
+cgfi,cgfr,cgfrl,cgh,cghi,cghrl,cghsi,cgit,cgr,cgrl,cgrt,ch,chi,chrl,chsi,chy,\
+cit,cl,clfhsi,clfi,clfit,clg,clgf,clgfi,clgfr,clgfrl,clghrl,clghsi,clgit,clgr,\
+clgrl,clgrt,clgt,clhhsi,clhrl,cli,cliy,clm,clmy,clr,clrl,clrt,clt,cly,cr,crl,\
+crt,cy,laa,laag,lan,lang,lao,laog,lat,lax,laxg,lcdfr,ldgr,ldr,lgat,lgdr,lndfr,\
+lpdfr,lxr,lzdr,lzer,lzxr,mvghi,mvhhi,mvhi,mvi,mviy,ni,niy,nop,nopr,ntstg,oi,\
+oiy,ppa,st,stc,stcy,std,stdy,ste,stey,stg,stgrl,sth,sthrl,sthy,stoc,stocg,strl,\
+strv,strvg,strvh,sty,tend,tm,tmh,tmhh,tmhl,tml,tmlh,tmll,tmy,vlgvb,vlgvf,vlgvg,\
+vlgvh,vlr,vlvgb,vlvgf,vlvgg,vlvgh,vlvgp,vscef,vsceg,vst,vstbrf,vstbrg,vstbrh,\
+vstbrq,vstebrf,vstebrg,vstef,vsteg,vsterf,vsterg,vsterh,vstl,vstrl,vstrlr,xi,\
+xiy")
+ (const_int 1)] (const_int 0)))
+
+(define_attr "z17_unit_fxd" ""
+ (cond [(eq_attr "mnemonic" "dlgr,dlr,dr,dsgfr,dsgr")
+ (const_int 1)] (const_int 0)))
+
+(define_attr "z17_unit_lsu" ""
+ (cond [(eq_attr "mnemonic" "clc,ear,l,lam,lcbb,ld,lde,ldy,lg,lgrl,llc,llgc,\
+llgf,llgfrl,llgh,llghrl,llgt,llh,llhrl,lm,lmg,lmy,lpq,lrl,ly,mvcrl,sar,sfpc,\
+tabort,vl,vlbb,vlbrf,vlbrg,vlbrh,vlbrq,vlbrrepf,vlbrrepg,vlbrreph,vlerf,vlerg,\
+vlerh,vll,vllebrzf,vllebrzg,vllebrzh,vllezb,vllezf,vllezg,vllezh,vllezlf,\
+vlrepb,vlrepf,vlrepg,vlreph,vlrl,vlrlr")
+ (const_int 1)] (const_int 0)))
+
+(define_attr "z17_unit_vfu" ""
+ (cond [(eq_attr "mnemonic" "adb,adbr,adtr,aeb,aebr,axbr,axtr,brcl,cdb,cdbr,\
+cdtr,ceb,cebr,cpsdr,cxbr,cxtr,ddtr,dxtr,fidbr,fidbra,fidtr,fiebr,fiebra,fixbr,\
+fixbra,fixtr,j,jg,kdb,kdbr,kdtr,keb,kebr,kxbr,kxtr,lcdbr,lcebr,lcxbr,ldeb,\
+ldebr,ldetr,le,ledbr,ledtr,ler,ley,lndbr,lnebr,lnxbr,lpdbr,lpebr,lpxbr,ltdbr,\
+ltdtr,ltebr,ltxbr,ltxtr,lxdb,lxdbr,lxdtr,lxeb,lxebr,madb,madbr,maeb,maebr,mdb,\
+mdbr,mdtr,meeb,meebr,msdb,msdbr,mseb,msebr,mxbr,mxtr,sdb,sdbr,sdtr,seb,sebr,\
+sxbr,sxtr,tcdb,tceb,tcxb,tdcdt,tdcet,tdcxt,vab,vaccb,vacccq,vaccf,vaccg,vacch,\
+vaccq,vacq,vaf,vag,vah,vaq,vavgb,vavgf,vavgg,vavgh,vavglb,vavglf,vavglg,vavglh,\
+vavglq,vavgq,vblendb,vblendf,vblendg,vblendh,vblendq,vbperm,vcdgb,vcdlgb,vcefb,\
+vcelfb,vceqb,vceqbs,vceqf,vceqfs,vceqg,vceqgs,vceqh,vceqhs,vceqq,vceqqs,vcfeb,\
+vcfn,vcgdb,vchb,vchbs,vchf,vchfs,vchg,vchgs,vchh,vchhs,vchlb,vchlbs,vchlf,\
+vchlfs,vchlg,vchlgs,vchlh,vchlhs,vchlq,vchlqs,vchq,vchqs,vcksm,vclfeb,vclfnh,\
+vclfnl,vclgdb,vclzb,vclzf,vclzg,vclzh,vclzq,vcnf,vcrnf,vctzb,vctzf,vctzg,vctzh,\
+vctzq,verimb,verimf,verimg,verimh,verllb,verllf,verllg,verllh,verllvb,verllvf,\
+verllvg,verllvh,veslb,veslf,veslg,veslh,veslvb,veslvf,veslvg,veslvh,vesrab,\
+vesraf,vesrag,vesrah,vesravb,vesravf,vesravg,vesravh,vesrlb,vesrlf,vesrlg,\
+vesrlh,vesrlvb,vesrlvf,vesrlvg,vesrlvh,veval,vfadb,vfasb,vfcedb,vfcedbs,vfcesb,\
+vfcesbs,vfchdb,vfchdbs,vfchedb,vfchedbs,vfchesb,vfchesbs,vfchsb,vfchsbs,vfeeb,\
+vfeef,vfeeh,vfeezbs,vfeezfs,vfeezhs,vfeneb,vfenef,vfeneh,vfenezb,vfenezf,\
+vfenezh,vfidb,vfisb,vfkedb,vfkesb,vfkhdb,vfkhedb,vfkhesb,vfkhsb,vflcdb,vflcsb,\
+vflndb,vflnsb,vflpdb,vflpsb,vfmadb,vfmasb,vfmaxdb,vfmaxsb,vfmdb,vfmindb,\
+vfminsb,vfmsb,vfmsdb,vfmssb,vfnmadb,vfnmasb,vfnmsdb,vfnmssb,vfsdb,vfssb,\
+vftcidb,vftcisb,vgbm,vgemb,vgemf,vgemg,vgemh,vgemq,vgfmab,vgfmaf,vgfmag,vgfmah,\
+vgfmb,vgfmf,vgfmg,vgfmh,vgm,vistrb,vistrbs,vistrf,vistrfs,vistrh,vistrhs,vlcb,\
+vlcf,vlcg,vlch,vldeb,vleb,vlebrf,vlebrg,vlebrh,vledb,vlef,vleg,vleh,vleib,\
+vleif,vleig,vleih,vlpb,vlpf,vlpg,vlph,vlpq,vmaeb,vmaef,vmaeg,vmaeh,vmahb,vmahf,\
+vmahg,vmahh,vmahq,vmalb,vmaleb,vmalef,vmaleg,vmaleh,vmalf,vmalg,vmalhb,vmalhf,\
+vmalhg,vmalhh,vmalhq,vmalhw,vmalob,vmalof,vmalog,vmaloh,vmalq,vmaob,vmaof,\
+vmaog,vmaoh,vmeb,vmef,vmeg,vmeh,vmhb,vmhf,vmhg,vmhh,vmhq,vmlb,vmleb,vmlef,\
+vmleg,vmleh,vmlf,vmlg,vmlhb,vmlhf,vmlhg,vmlhh,vmlhq,vmlhw,vmlob,vmlof,vmlog,\
+vmloh,vmlq,vmnb,vmnf,vmng,vmnh,vmnlb,vmnlf,vmnlg,vmnlh,vmnlq,vmnq,vmob,vmof,\
+vmog,vmoh,vmrhb,vmrhf,vmrhg,vmrhh,vmrlb,vmrlf,vmrlg,vmrlh,vmslg,vmxb,vmxf,vmxg,\
+vmxh,vmxlb,vmxlf,vmxlg,vmxlh,vmxlq,vmxq,vn,vnc,vnn,vno,vnot,vnx,vo,voc,vone,\
+vpdi,vperm,vpkf,vpkg,vpkh,vpklsf,vpklsfs,vpklsg,vpklsgs,vpklsh,vpklshs,vpksf,\
+vpksfs,vpksg,vpksgs,vpksh,vpkshs,vpopct,vpopctb,vpopctf,vpopctg,vpopcth,vrepb,\
+vrepf,vrepg,vreph,vrepi,vrepib,vrepif,vrepig,vrepih,vsb,vsbcbiq,vsbiq,vscbib,\
+vscbif,vscbig,vscbih,vscbiq,vsegb,vsegf,vsegh,vsel,vsf,vsg,vsh,vsl,vslb,vsld,\
+vsldb,vsq,vsra,vsrab,vsrd,vsrl,vsrlb,vsumb,vsumgf,vsumgh,vsumh,vsumqf,vsumqg,\
+vtm,vuphb,vuphf,vuphg,vuphh,vuplb,vuplf,vuplg,vuplhb,vuplhf,vuplhg,vuplhh,\
+vuplhw,vupllb,vupllf,vupllg,vupllh,vx,vzero,wcdgb,wcdlgb,wcefb,wcelfb,wcfeb,\
+wcgdb,wclfeb,wclgdb,wfadb,wfasb,wfaxb,wfcdb,wfcedb,wfcesb,wfcexb,wfcexbs,\
+wfchdb,wfchedb,wfchesb,wfchexb,wfchexbs,wfchsb,wfchxb,wfchxbs,wfcsb,wfcxb,\
+wfidb,wfisb,wfixb,wfkdb,wfkedb,wfkesb,wfkexb,wfkhdb,wfkhedb,wfkhesb,wfkhexb,\
+wfkhsb,wfkhxb,wfksb,wfkxb,wflcdb,wflcsb,wflcxb,wflld,wflndb,wflnsb,wflnxb,\
+wflpdb,wflpsb,wflpxb,wflrx,wfmadb,wfmasb,wfmaxb,wfmaxxb,wfmdb,wfminxb,wfmsb,\
+wfmsdb,wfmssb,wfmsxb,wfmxb,wfnmaxb,wfnmsxb,wfsdb,wfssb,wfsxb,wftcixb,wldeb,\
+wledb")
+ (const_int 1)] (const_int 0)))
+
+(define_attr "z17_cracked" ""
+ (cond [(eq_attr "mnemonic" "bas,basr,bras,brasl,cdfbr,cdftr,cdgbr,cdgtr,\
+cdlfbr,cdlftr,cdlgbr,cdlgtr,cefbr,cegbr,celfbr,celgbr,cfdbr,cfebr,cfxbr,cgdbr,\
+cgdtr,cgebr,cgxbr,cgxtr,chhsi,clfdbr,clfdtr,clfebr,clfxbr,clfxtr,clgdbr,clgdtr,\
+clgebr,clgxbr,clgxtr,cs,csg,csy,efpc,ex,exrl,lcgfr,lngfr,lpgfr,lpq,lxr,lzxr,\
+mvc,nc,oc,rnsbg,rosbg,rxsbg,stpq,vgef,vgeg,vscef,vsceg,vsteb,vstebrh,vsteh,xc")
+ (const_int 1)] (const_int 0)))
+
+(define_attr "z17_expanded" ""
+ (cond [(eq_attr "mnemonic" "cds,cdsg,cdsy,cxfbr,cxftr,cxgbr,cxgtr,cxlfbr,\
+cxlftr,cxlgbr,cxlgtr,d,dl,dlg,dsg,dsgf,lam,lm,lmg,lmy,sldl,srda,srdl,stam,stm,\
+stmg,stmy,tbegin,tbeginc")
+ (const_int 1)] (const_int 0)))
+
+(define_attr "z17_groupalone" ""
+ (cond [(eq_attr "mnemonic" "alc,alcg,alcgr,alcr,axbr,axtr,clc,cxbr,cxfbr,\
+cxftr,cxgbr,cxgtr,cxlfbr,cxlftr,cxlgbr,cxlgtr,cxtr,d,dl,dlg,dlgr,dlr,dr,dsg,\
+dsgf,dsgfr,dsgr,dxbr,dxtr,ex,exrl,fixbr,fixbra,fixtr,flogr,kxbr,kxtr,lcxbr,\
+lnxbr,lpxbr,ltxbr,ltxtr,lxdb,lxdbr,lxdtr,lxeb,lxebr,m,madb,maeb,maebr,mfy,mg,\
+mgrk,ml,mlg,mlgr,mlr,mr,msdb,mseb,msebr,mvc,mvcrl,mxbr,mxtr,nc,oc,ppa,sfpc,slb,\
+slbg,slbgr,slbr,sqxbr,sxbr,sxtr,tabort,tbegin,tbeginc,tcxb,tdcxt,tend,xc")
+ (const_int 1)] (const_int 0)))
+
+(define_attr "z17_endgroup" ""
+ (cond [(eq_attr "mnemonic" "bas,basr,bcr,br,bras,brasl,cdsg,clfebr,cs,csg,csy,\
+efpc,ex,exrl,ipm,lam,lpq,lxr,nopr,sldl,srda,srdl,stam,stm,stmg,stmy,tbegin,\
+tbeginc")
+ (const_int 1)] (const_int 0)))
+
+(define_attr "z17_groupoftwo" ""
+ (cond [(eq_attr "mnemonic" "cdfbr,cdftr,cdgbr,cdgtr,cdlfbr,cdlftr,cdlgbr,\
+cdlgtr,cefbr,cegbr,celfbr,celgbr,cfdbr,cfebr,cfxbr,cgdbr,cgdtr,cgebr,cgxbr,\
+cgxtr,chhsi,clfdbr,clfdtr,clfxbr,clfxtr,clgdbr,clgdtr,clgebr,clgxbr,clgxtr,\
+lcgfr,lngfr,lpgfr,lzxr,vacccq,vacq,vblendb,vblendf,vblendg,vblendh,vblendq,\
+veval,vfmadb,vfmasb,vfmsdb,vfmssb,vfnmadb,vfnmasb,vfnmsdb,vfnmssb,vgef,vgeg,\
+vgfmab,vgfmaf,vgfmag,vgfmah,vmaeb,vmaef,vmaeg,vmaeh,vmahb,vmahf,vmahg,vmahh,\
+vmahq,vmalb,vmaleb,vmalef,vmaleg,vmaleh,vmalf,vmalg,vmalhb,vmalhf,vmalhg,\
+vmalhh,vmalhq,vmalhw,vmalob,vmalof,vmalog,vmaloh,vmalq,vmaob,vmaof,vmaog,vmaoh,\
+vmslg,vperm,vsbcbiq,vsbiq,vscef,vsceg,vsel,vsteb,vstebrh,vsteh,wfmadb,wfmasb,\
+wfmaxb,wfmsdb,wfmssb,wfmsxb,wfnmaxb,wfnmsxb")
+ (const_int 1)] (const_int 0)))
+
+(define_insn_reservation "z17_0" 0
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "a,afi,ag,agfi,aghi,aghik,agr,agrk,ahi,ahik,al,alfi,alg,\
+algf,algfi,algfr,alghsik,algr,algrk,alhsik,alr,alrk,aly,ar,ark,ay,b,bc,bcr,bi,\
+br,brcl,c,cfi,cg,cgfi,cghi,cghsi,cgit,cgr,cgrl,cgrt,chi,chsi,cit,cl,clfhsi,\
+clfi,clfit,clg,clgf,clgfi,clgfr,clgfrl,clghrl,clghsi,clgit,clgr,clgrl,clgrt,\
+clgt,clhhsi,clhrl,cli,cliy,clr,clrl,clrt,clt,cly,cr,crl,crt,cy,etnd,ic,icm,\
+icmh,icmy,icy,iihf,iilf,j,jg,la,larl,lat,lay,lb,lbr,lcdfr,lcgr,lcr,ldgr,ldr,\
+lgat,lgb,lgbr,lgf,lgfi,lgfr,lgfrl,lgh,lghi,lghr,lghrl,lgr,lh,lhi,lhr,lhrl,lhy,\
+llcr,llgcr,llgfr,llghr,llgtr,llhr,llihf,llihh,llihl,llilf,llilh,llill,lndfr,\
+lngr,lnr,lpdfr,lpgr,lpr,lr,lrv,lrvg,lrvgr,lrvh,lrvr,lt,ltg,ltgf,ltgfr,ltgr,ltr,\
+lzdr,lzer,n,ncgrk,ncrk,ng,ngr,ngrk,nihf,nihh,nihl,nilf,nilh,nill,nngrk,nnrk,\
+nogrk,nop,nopr,nork,nr,nrk,nxgrk,nxrk,ny,o,ocgrk,ocrk,og,ogr,ogrk,oihf,oihh,\
+oihl,oilf,oilh,oill,or,ork,oy,pfpo,risbg,risbgn,rll,rllg,s,sg,sgr,sgrk,sl,sldl,\
+slfi,slg,slgf,slgfi,slgfr,slgr,slgrk,sll,sllg,sllk,slr,slrk,sly,sr,sra,srag,\
+srak,srda,srdl,srk,srl,srlg,srlk,sy,tm,tmh,tmhh,tmhl,tml,tmlh,tmll,tmy,vlr,\
+vlvgb,vlvgf,vlvgg,vlvgh,x,xg,xgr,xgrk,xihf,xilf,xr,xrk,xy")) "nothing")
+
+(define_insn_reservation "z17_1" 1
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "agf,agfr,agh,agsi,ah,ahy,algsi,alsi,asi,cgf,cgfr,cgfrl,\
+cgh,cghrl,ch,chrl,chy,clm,clmy,cpsdr,laa,laag,lan,lang,lao,laog,lax,laxg,le,\
+ler,ley,llxab,llxaf,llxag,llxah,llxaq,loc,locg,locghi,locgr,lochi,locr,lxab,\
+lxaf,lxag,lxah,lxaq,mvghi,mvhhi,mvhi,mvi,mviy,ni,niy,ntstg,oi,oiy,selgr,selr,\
+sgf,sgfr,sgh,sh,shy,st,stc,stcy,stg,stgrl,sth,sthrl,sthy,stoc,stocg,strl,strv,\
+strvg,strvh,sty,vab,vaccb,vacccq,vaccf,vaccg,vacch,vaccq,vacq,vaf,vag,vah,vaq,\
+vavgb,vavgf,vavgg,vavgh,vavglb,vavglf,vavglg,vavglh,vavglq,vavgq,vblendb,\
+vblendf,vblendg,vblendh,vblendq,vbperm,vceqb,vceqbs,vceqf,vceqfs,vceqg,vceqgs,\
+vceqh,vceqhs,vceqq,vceqqs,vcfn,vchb,vchbs,vchf,vchfs,vchg,vchgs,vchh,vchhs,\
+vchlb,vchlbs,vchlf,vchlfs,vchlg,vchlgs,vchlh,vchlhs,vchlq,vchlqs,vchq,vchqs,\
+vclfnh,vclfnl,vclzb,vclzf,vclzg,vclzh,vclzq,vcnf,vcrnf,vctzb,vctzf,vctzg,vctzh,\
+vctzq,verimb,verimf,verimg,verimh,verllb,verllf,verllg,verllh,verllvb,verllvf,\
+verllvg,verllvh,veslb,veslf,veslg,veslh,veslvb,veslvf,veslvg,veslvh,vesrab,\
+vesraf,vesrag,vesrah,vesravb,vesravf,vesravg,vesravh,vesrlb,vesrlf,vesrlg,\
+vesrlh,vesrlvb,vesrlvf,vesrlvg,vesrlvh,veval,vfcedb,vfcedbs,vfcesb,vfcesbs,\
+vfchdb,vfchdbs,vfchedb,vfchedbs,vfchesb,vfchesbs,vfchsb,vfchsbs,vfkedb,vfkesb,\
+vfkhdb,vfkhedb,vfkhesb,vfkhsb,vflcdb,vflcsb,vflndb,vflnsb,vflpdb,vflpsb,\
+vfmaxdb,vfmaxsb,vfmindb,vfminsb,vgbm,vgemb,vgemf,vgemg,vgemh,vgemq,vgm,vlcb,\
+vlcf,vlcg,vlch,vleb,vlebrf,vlebrg,vlebrh,vlef,vleg,vleh,vleib,vleif,vleig,\
+vleih,vlpb,vlpf,vlpg,vlph,vlpq,vmnb,vmnf,vmng,vmnh,vmnlb,vmnlf,vmnlg,vmnlh,\
+vmnlq,vmnq,vmrhb,vmrhf,vmrhg,vmrhh,vmrlb,vmrlf,vmrlg,vmrlh,vmxb,vmxf,vmxg,vmxh,\
+vmxlb,vmxlf,vmxlg,vmxlh,vmxlq,vmxq,vn,vnc,vnn,vno,vnot,vnx,vo,voc,vone,vpdi,\
+vperm,vpkf,vpkg,vpkh,vpklsf,vpklsfs,vpklsg,vpklsgs,vpklsh,vpklshs,vpksf,vpksfs,\
+vpksg,vpksgs,vpksh,vpkshs,vpopct,vpopctb,vpopctf,vpopctg,vpopcth,vrepb,vrepf,\
+vrepg,vreph,vrepi,vrepib,vrepif,vrepig,vrepih,vsb,vsbcbiq,vsbiq,vscbib,vscbif,\
+vscbig,vscbih,vscbiq,vsegb,vsegf,vsegh,vsel,vsf,vsg,vsh,vsl,vslb,vsld,vsldb,\
+vsq,vsra,vsrab,vsrd,vsrl,vsrlb,vuphb,vuphf,vuphg,vuphh,vuplb,vuplf,vuplg,\
+vuplhb,vuplhf,vuplhg,vuplhh,vuplhw,vupllb,vupllf,vupllg,vupllh,vx,vzero,wfcedb,\
+wfcesb,wfcexb,wfcexbs,wfchdb,wfchedb,wfchesb,wfchexb,wfchexbs,wfchsb,wfchxb,\
+wfchxbs,wfkedb,wfkesb,wfkexb,wfkhdb,wfkhedb,wfkhesb,wfkhexb,wfkhsb,wfkhxb,\
+wflcdb,wflcsb,wflcxb,wflndb,wflnsb,wflnxb,wflpdb,wflpsb,wflpxb,wfmaxxb,wfminxb,\
+xi,xiy")) "nothing")
+
+(define_insn_reservation "z17_2" 2
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "cdb,cdbr,ceb,cebr,clzg,ctzg,ear,ipm,kdb,kdbr,keb,kebr,l,\
+lcbb,lcdbr,lcebr,ld,lde,ldy,lg,lgdr,lgrl,llc,llgc,llgf,llgfrl,llgh,llghrl,llgt,\
+llh,llhrl,lm,lmg,lmy,lndbr,lnebr,lpdbr,lpebr,lrl,ltdbr,ltebr,ly,popcnt,sar,\
+tcdb,tceb,vfeeb,vfeef,vfeeh,vfeezbs,vfeezfs,vfeezhs,vfeneb,vfenef,vfeneh,\
+vfenezb,vfenezf,vfenezh,vftcidb,vftcisb,vistrb,vistrbs,vistrf,vistrfs,vistrh,\
+vistrhs,vlbrrepf,vlbrrepg,vlbrreph,vlgvb,vlgvf,vlgvg,vlgvh,vllebrzf,vllebrzg,\
+vllebrzh,vllezb,vllezf,vllezg,vllezh,vllezlf,vlrepb,vlrepf,vlrepg,vlreph,vlrl,\
+vlvgp,wfcdb,wfcsb,wfcxb,wfkdb,wfksb,wfkxb,wftcixb")) "nothing")
+
+(define_insn_reservation "z17_3" 3
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "bdepg,bextg,cds,cdsy,mgh,mghi,mh,mhi,mhy,ms,msc,msfi,msg,\
+msgc,msgf,msgfi,msgfr,msgr,msgrkc,msr,msrkc,msy,std,stdy,ste,stey,vcksm,vgfmab,\
+vgfmaf,vgfmag,vgfmah,vgfmb,vgfmf,vgfmg,vgfmh,vl,vlbb,vlbrf,vlbrg,vlbrh,vlbrq,\
+vlerf,vlerg,vlerh,vll,vlrlr,vmaeb,vmaef,vmaeg,vmaeh,vmahb,vmahf,vmahg,vmahh,\
+vmahq,vmalb,vmaleb,vmalef,vmaleg,vmaleh,vmalf,vmalg,vmalhb,vmalhf,vmalhg,\
+vmalhh,vmalhq,vmalhw,vmalob,vmalof,vmalog,vmaloh,vmalq,vmaob,vmaof,vmaog,vmaoh,\
+vmeb,vmef,vmeg,vmeh,vmhb,vmhf,vmhg,vmhh,vmhq,vmlb,vmleb,vmlef,vmleg,vmleh,vmlf,\
+vmlg,vmlhb,vmlhf,vmlhg,vmlhh,vmlhq,vmlhw,vmlob,vmlof,vmlog,vmloh,vmlq,vmob,\
+vmof,vmog,vmoh,vsumb,vsumgf,vsumgh,vsumh,vsumqf,vsumqg,vtm")) "nothing")
+
+(define_insn_reservation "z17_4" 4
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "bas,basr,bras,brasl,chhsi,clc,ex,exrl,lam,lcgfr,lngfr,\
+lpgfr,lxr,lzxr,mvcrl,ppa,rnsbg,rosbg,rxsbg,tabort,tend,vst,vstbrf,vstbrg,\
+vstbrh,vstbrq,vstebrf,vstebrg,vstef,vsteg,vsterf,vsterg,vsterh,vstl,vstrl,\
+vstrlr")) "nothing")
+
+(define_insn_reservation "z17_5" 5
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "adb,adbr,aeb,aebr,alc,alcg,alcgr,alcr,cs,csg,csy,fidbr,\
+fidbra,fiebr,fiebra,ldeb,ldebr,ledbr,madbr,mdb,mdbr,meeb,meebr,msdbr,sdb,sdbr,\
+seb,sebr,slb,slbg,slbgr,slbr,stm,stmg,stmy,vcdgb,vcdlgb,vcefb,vcelfb,vcfeb,\
+vcgdb,vclfeb,vclgdb,vldeb,vledb,vmslg,wcdgb,wcdlgb,wcefb,wcelfb,wcfeb,wcgdb,\
+wclfeb,wclgdb,wflld,wflrx,wldeb,wledb")) "nothing")
+
+(define_insn_reservation "z17_6" 6
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "sfpc")) "nothing")
+
+(define_insn_reservation "z17_7" 7
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "adtr,cdtr,fidtr,kdtr,ldetr,ltdtr,sdtr,tdcdt,tdcet,vfadb,\
+vfasb,vfidb,vfisb,vfsdb,vfssb,vgef,vgeg,wfadb,wfasb,wfaxb,wfidb,wfisb,wfixb,\
+wfsdb,wfssb,wfsxb")) "nothing")
+
+(define_insn_reservation "z17_8" 8
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "cdgtr,cdlgtr,cdsg,cxgtr,cxlgtr,flogr,lpq,m,mfy,mg,mgrk,\
+ml,mlg,mlgr,mlr,mr,stpq,vsteb,vstebrh,vsteh")) "nothing")
+
+(define_insn_reservation "z17_9" 9
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "cdfbr,cdgbr,cdlfbr,cdlgbr,cefbr,cegbr,celfbr,celgbr,madb,\
+maeb,maebr,msdb,mseb,msebr,stam")) "nothing")
+
+(define_insn_reservation "z17_10" 10
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "cgdtr,cgxtr,clfdtr,clfxtr,clgdtr,clgxtr,d,dl,dlg,dsg,\
+dsgf,efpc,lxdb,lxdbr,lxeb,lxebr,vscef,vsceg")) "nothing")
+
+(define_insn_reservation "z17_11" 11
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "cfdbr,cfebr,cgdbr,cgebr,clfdbr,clfebr,clgdbr,clgebr")) "nothing")
+
+(define_insn_reservation "z17_12" 12
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "cxbr,cxtr,kxbr,kxtr,tbegin,tbeginc,tcxb,tdcxt")) "nothing")
+
+(define_insn_reservation "z17_13" 13
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "axbr,axtr,cxfbr,cxgbr,cxlfbr,cxlgbr,fixbr,fixbra,fixtr,\
+lcxbr,lnxbr,lpxbr,ltxbr,ltxtr,lxdtr,sxbr,sxtr")) "nothing")
+
+(define_insn_reservation "z17_14" 14
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "cfxbr,cgxbr,clfxbr,clgxbr,ledtr")) "nothing")
+
+(define_insn_reservation "z17_15" 15
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "nc,oc")) "nothing")
+
+(define_insn_reservation "z17_16" 16
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "cdftr,cdlftr,cxftr,cxlftr")) "nothing")
+
+(define_insn_reservation "z17_18" 18
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "xc")) "nothing")
+
+(define_insn_reservation "z17_20" 20
+ (and (eq_attr "cpu" "z17")
+ (eq_attr "mnemonic" "ddb,ddbr,ddtr,deb,debr,dlgr,dlr,dr,dsgfr,dsgr,dxbr,dxtr,\
+mdtr,mvc,mxbr,mxtr,sqdb,sqdbr,sqeb,sqebr,sqxbr,vdf,vdg,vdlf,vdlg,vdlq,vdq,\
+vfddb,vfdsb,vfmadb,vfmasb,vfmdb,vfmsb,vfmsdb,vfmssb,vfnmadb,vfnmasb,vfnmsdb,\
+vfnmssb,vfsqdb,vfsqsb,vrf,vrg,vrlf,vrlg,vrlq,vrq,wfddb,wfdsb,wfdxb,wfmadb,\
+wfmasb,wfmaxb,wfmdb,wfmsb,wfmsdb,wfmssb,wfmsxb,wfmxb,wfnmaxb,wfnmsxb,wfsqdb,\
+wfsqxb")) "nothing")
+
diff --git a/gcc/config/s390/constraints.md b/gcc/config/s390/constraints.md
index 3f498d2..85b591d 100644
--- a/gcc/config/s390/constraints.md
+++ b/gcc/config/s390/constraints.md
@@ -1,5 +1,5 @@
 ;; Constraints definitions belonging to the gcc backend for IBM S/390.
-;; Copyright (C) 2006-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2006-2025 Free Software Foundation, Inc.
 ;; Written by Wolfgang Gellerich, using code and information found in
 ;; files s390.md, s390.h, and s390.cc.
 ;;
diff --git a/gcc/config/s390/driver-native.cc b/gcc/config/s390/driver-native.cc
index 43bc7ba..7a7ceea 100644
--- a/gcc/config/s390/driver-native.cc
+++ b/gcc/config/s390/driver-native.cc
@@ -1,5 +1,5 @@
 /* Subroutines for the gcc driver.
-   Copyright (C) 2015-2024 Free Software Foundation, Inc.
+   Copyright (C) 2015-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -127,8 +127,12 @@ s390_host_detect_local_cpu (int argc, const char **argv)
 	    case 0x3932:
 	      cpu = "arch14";
 	      break;
+	    case 0x9175:
+	    case 0x9176:
+	      cpu = "arch15";
+	      break;
 	    default:
-	      cpu = "arch14";
+	      cpu = "arch15";
 	      break;
 	    }
 	}
diff --git a/gcc/config/s390/htmintrin.h b/gcc/config/s390/htmintrin.h
index 705acb7..0a71904 100644
--- a/gcc/config/s390/htmintrin.h
+++ b/gcc/config/s390/htmintrin.h
@@ -1,5 +1,5 @@
 /* GNU compiler hardware transactional execution intrinsics
-   Copyright (C) 2013-2024 Free Software Foundation, Inc.
+   Copyright (C) 2013-2025 Free Software Foundation, Inc.
    Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com)
 
 This file is part of GCC.
diff --git a/gcc/config/s390/htmxlintrin.h b/gcc/config/s390/htmxlintrin.h
index a5813ce..b93bf7a 100644
--- a/gcc/config/s390/htmxlintrin.h
+++ b/gcc/config/s390/htmxlintrin.h
@@ -1,5 +1,5 @@
 /* XL compiler hardware transactional execution intrinsics
-   Copyright (C) 2013-2024 Free Software Foundation, Inc.
+   Copyright (C) 2013-2025 Free Software Foundation, Inc.
    Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com)
 
 This file is part of GCC.
diff --git a/gcc/config/s390/linux.h b/gcc/config/s390/linux.h
index 83ea2f6..2c3bca5 100644
--- a/gcc/config/s390/linux.h
+++ b/gcc/config/s390/linux.h
@@ -1,5 +1,5 @@
 /* Definitions for Linux for S/390.
-   Copyright (C) 1999-2024 Free Software Foundation, Inc.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
    Contributed by Hartmut Penner (hpenner@de.ibm.com) and
                   Ulrich Weigand (uweigand@de.ibm.com).
 
diff --git a/gcc/config/s390/predicates.md b/gcc/config/s390/predicates.md
index d234939..c7b93bd 100644
--- a/gcc/config/s390/predicates.md
+++ b/gcc/config/s390/predicates.md
@@ -1,5 +1,5 @@
 ;; Predicate definitions for S/390 and zSeries.
-;; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;; Contributed by Hartmut Penner (hpenner@de.ibm.com) and
 ;;                Ulrich Weigand (uweigand@de.ibm.com).
 ;;
diff --git a/gcc/config/s390/s390-builtin-types.def b/gcc/config/s390/s390-builtin-types.def
index e6f5631..3778373 100644
--- a/gcc/config/s390/s390-builtin-types.def
+++ b/gcc/config/s390/s390-builtin-types.def
@@ -1,5 +1,5 @@
 /* Builtin type definitions for IBM S/390 and zSeries
-   Copyright (C) 2015-2024 Free Software Foundation, Inc.
+   Copyright (C) 2015-2025 Free Software Foundation, Inc.
 
    Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
 
@@ -58,6 +58,7 @@ DEF_TYPE (BT_FLT, float_type_node, 0)
 DEF_TYPE (BT_FLTCONST, float_type_node, 1)
 DEF_TYPE (BT_INT, integer_type_node, 0)
 DEF_TYPE (BT_INT128, intTI_type_node, 0)
+DEF_TYPE (BT_INT128CONST, intTI_type_node, 1)
 DEF_TYPE (BT_INTCONST, integer_type_node, 1)
 DEF_TYPE (BT_LONG, long_integer_type_node, 0)
 DEF_TYPE (BT_LONGLONG, long_long_integer_type_node, 0)
@@ -70,6 +71,7 @@ DEF_TYPE (BT_UCHAR, unsigned_char_type_node, 0)
 DEF_TYPE (BT_UCHARCONST, unsigned_char_type_node, 1)
 DEF_TYPE (BT_UINT, unsigned_type_node, 0)
 DEF_TYPE (BT_UINT128, unsigned_intTI_type_node, 0)
+DEF_TYPE (BT_UINT128CONST, unsigned_intTI_type_node, 1)
 DEF_TYPE (BT_UINT64, c_uint64_type_node, 0)
 DEF_TYPE (BT_UINTCONST, unsigned_type_node, 1)
 DEF_TYPE (BT_ULONG, long_unsigned_type_node, 0)
@@ -80,10 +82,12 @@ DEF_TYPE (BT_USHORTCONST, short_unsigned_type_node, 1)
 DEF_TYPE (BT_VOID, void_type_node, 0)
 DEF_TYPE (BT_VOIDCONST, void_type_node, 1)
 DEF_VECTOR_TYPE (BT_UV16QI, BT_UCHAR, 16)
+DEF_VECTOR_TYPE (BT_UV1TI, BT_UINT128, 1)
 DEF_VECTOR_TYPE (BT_UV2DI, BT_ULONGLONG, 2)
 DEF_VECTOR_TYPE (BT_UV4SI, BT_UINT, 4)
 DEF_VECTOR_TYPE (BT_UV8HI, BT_USHORT, 8)
 DEF_VECTOR_TYPE (BT_V16QI, BT_SCHAR, 16)
+DEF_VECTOR_TYPE (BT_V1TI, BT_INT128, 1)
 DEF_VECTOR_TYPE (BT_V2DF, BT_DBL, 2)
 DEF_VECTOR_TYPE (BT_V2DI, BT_LONGLONG, 2)
 DEF_VECTOR_TYPE (BT_V4SF, BT_FLT, 4)
@@ -93,6 +97,8 @@ DEF_POINTER_TYPE (BT_DBLCONSTPTR, BT_DBLCONST)
 DEF_POINTER_TYPE (BT_DBLPTR, BT_DBL)
 DEF_POINTER_TYPE (BT_FLTCONSTPTR, BT_FLTCONST)
 DEF_POINTER_TYPE (BT_FLTPTR, BT_FLT)
+DEF_POINTER_TYPE (BT_INT128CONSTPTR, BT_INT128CONST)
+DEF_POINTER_TYPE (BT_INT128PTR, BT_INT128)
 DEF_POINTER_TYPE (BT_INTCONSTPTR, BT_INTCONST)
 DEF_POINTER_TYPE (BT_INTPTR, BT_INT)
 DEF_POINTER_TYPE (BT_LONGLONGCONSTPTR, BT_LONGLONGCONST)
@@ -103,6 +109,8 @@ DEF_POINTER_TYPE (BT_SHORTCONSTPTR, BT_SHORTCONST)
 DEF_POINTER_TYPE (BT_SHORTPTR, BT_SHORT)
 DEF_POINTER_TYPE (BT_UCHARCONSTPTR, BT_UCHARCONST)
 DEF_POINTER_TYPE (BT_UCHARPTR, BT_UCHAR)
+DEF_POINTER_TYPE (BT_UINT128CONSTPTR, BT_UINT128CONST)
+DEF_POINTER_TYPE (BT_UINT128PTR, BT_UINT128)
 DEF_POINTER_TYPE (BT_UINT64PTR, BT_UINT64)
 DEF_POINTER_TYPE (BT_UINTCONSTPTR, BT_UINTCONST)
 DEF_POINTER_TYPE (BT_UINTPTR, BT_UINT)
@@ -128,25 +136,33 @@ DEF_OPAQUE_VECTOR_TYPE (BT_OV4SI, BT_INT, 4)
 DEF_FN_TYPE_0 (BT_FN_INT, BT_INT)
 DEF_FN_TYPE_0 (BT_FN_UINT, BT_UINT)
 DEF_FN_TYPE_0 (BT_FN_VOID, BT_VOID)
+DEF_FN_TYPE_1 (BT_FN_INT128_INT128, BT_INT128, BT_INT128)
+DEF_FN_TYPE_1 (BT_FN_INT128_V2DI, BT_INT128, BT_V2DI)
 DEF_FN_TYPE_1 (BT_FN_INT_INT, BT_INT, BT_INT)
 DEF_FN_TYPE_1 (BT_FN_INT_VOIDPTR, BT_INT, BT_VOIDPTR)
 DEF_FN_TYPE_1 (BT_FN_OV4SI_INT, BT_OV4SI, BT_INT)
 DEF_FN_TYPE_1 (BT_FN_OV4SI_INTCONSTPTR, BT_OV4SI, BT_INTCONSTPTR)
 DEF_FN_TYPE_1 (BT_FN_OV4SI_OV4SI, BT_OV4SI, BT_OV4SI)
 DEF_FN_TYPE_1 (BT_FN_UINT128_UINT128, BT_UINT128, BT_UINT128)
+DEF_FN_TYPE_1 (BT_FN_UINT128_UV16QI, BT_UINT128, BT_UV16QI)
+DEF_FN_TYPE_1 (BT_FN_UINT128_UV2DI, BT_UINT128, BT_UV2DI)
 DEF_FN_TYPE_1 (BT_FN_UV16QI_UCHAR, BT_UV16QI, BT_UCHAR)
 DEF_FN_TYPE_1 (BT_FN_UV16QI_UCHARCONSTPTR, BT_UV16QI, BT_UCHARCONSTPTR)
 DEF_FN_TYPE_1 (BT_FN_UV16QI_USHORT, BT_UV16QI, BT_USHORT)
 DEF_FN_TYPE_1 (BT_FN_UV16QI_UV16QI, BT_UV16QI, BT_UV16QI)
+DEF_FN_TYPE_1 (BT_FN_UV16QI_UV8HI, BT_UV16QI, BT_UV8HI)
+DEF_FN_TYPE_1 (BT_FN_UV1TI_UV16QI, BT_UV1TI, BT_UV16QI)
 DEF_FN_TYPE_1 (BT_FN_UV2DI_ULONGLONG, BT_UV2DI, BT_ULONGLONG)
 DEF_FN_TYPE_1 (BT_FN_UV2DI_ULONGLONGCONSTPTR, BT_UV2DI, BT_ULONGLONGCONSTPTR)
 DEF_FN_TYPE_1 (BT_FN_UV2DI_USHORT, BT_UV2DI, BT_USHORT)
+DEF_FN_TYPE_1 (BT_FN_UV2DI_UV16QI, BT_UV2DI, BT_UV16QI)
 DEF_FN_TYPE_1 (BT_FN_UV2DI_UV2DI, BT_UV2DI, BT_UV2DI)
 DEF_FN_TYPE_1 (BT_FN_UV2DI_UV4SI, BT_UV2DI, BT_UV4SI)
 DEF_FN_TYPE_1 (BT_FN_UV2DI_V2DF, BT_UV2DI, BT_V2DF)
 DEF_FN_TYPE_1 (BT_FN_UV4SI_UINT, BT_UV4SI, BT_UINT)
 DEF_FN_TYPE_1 (BT_FN_UV4SI_UINTCONSTPTR, BT_UV4SI, BT_UINTCONSTPTR)
 DEF_FN_TYPE_1 (BT_FN_UV4SI_USHORT, BT_UV4SI, BT_USHORT)
+DEF_FN_TYPE_1 (BT_FN_UV4SI_UV16QI, BT_UV4SI, BT_UV16QI)
 DEF_FN_TYPE_1 (BT_FN_UV4SI_UV4SI, BT_UV4SI, BT_UV4SI)
 DEF_FN_TYPE_1 (BT_FN_UV4SI_UV8HI, BT_UV4SI, BT_UV8HI)
 DEF_FN_TYPE_1 (BT_FN_UV4SI_V4SF, BT_UV4SI, BT_V4SF)
@@ -187,8 +203,12 @@ DEF_FN_TYPE_1 (BT_FN_VOID_UINT, BT_VOID, BT_UINT)
 DEF_FN_TYPE_2 (BT_FN_DBL_V2DF_INT, BT_DBL, BT_V2DF, BT_INT)
 DEF_FN_TYPE_2 (BT_FN_FLT_V4SF_INT, BT_FLT, BT_V4SF, BT_INT)
 DEF_FN_TYPE_2 (BT_FN_INT128_INT128_INT128, BT_INT128, BT_INT128, BT_INT128)
+DEF_FN_TYPE_2 (BT_FN_INT128_UINT128_UINT128, BT_INT128, BT_UINT128, BT_UINT128)
+DEF_FN_TYPE_2 (BT_FN_INT128_V2DI_V2DI, BT_INT128, BT_V2DI, BT_V2DI)
+DEF_FN_TYPE_2 (BT_FN_INT_INT128_INT128, BT_INT, BT_INT128, BT_INT128)
 DEF_FN_TYPE_2 (BT_FN_INT_OV4SI_INT, BT_INT, BT_OV4SI, BT_INT)
 DEF_FN_TYPE_2 (BT_FN_INT_OV4SI_OV4SI, BT_INT, BT_OV4SI, BT_OV4SI)
+DEF_FN_TYPE_2 (BT_FN_INT_UINT128_UINT128, BT_INT, BT_UINT128, BT_UINT128)
 DEF_FN_TYPE_2 (BT_FN_INT_UV16QI_UV16QI, BT_INT, BT_UV16QI, BT_UV16QI)
 DEF_FN_TYPE_2 (BT_FN_INT_UV2DI_UV2DI, BT_INT, BT_UV2DI, BT_UV2DI)
 DEF_FN_TYPE_2 (BT_FN_INT_UV4SI_UV4SI, BT_INT, BT_UV4SI, BT_UV4SI)
@@ -215,6 +235,7 @@ DEF_FN_TYPE_2 (BT_FN_UINT128_UV4SI_UV4SI, BT_UINT128, BT_UV4SI, BT_UV4SI)
 DEF_FN_TYPE_2 (BT_FN_UINT_UV4SI_INT, BT_UINT, BT_UV4SI, BT_INT)
 DEF_FN_TYPE_2 (BT_FN_UINT_VOIDCONSTPTR_INT, BT_UINT, BT_VOIDCONSTPTR, BT_INT)
 DEF_FN_TYPE_2 (BT_FN_ULONGLONG_UV2DI_INT, BT_ULONGLONG, BT_UV2DI, BT_INT)
+DEF_FN_TYPE_2 (BT_FN_ULONG_ULONG_ULONG, BT_ULONG, BT_ULONG, BT_ULONG)
 DEF_FN_TYPE_2 (BT_FN_USHORT_UV8HI_INT, BT_USHORT, BT_UV8HI, BT_INT)
 DEF_FN_TYPE_2 (BT_FN_UV16QI_UCHAR_INT, BT_UV16QI, BT_UCHAR, BT_INT)
 DEF_FN_TYPE_2 (BT_FN_UV16QI_UCHAR_UCHAR, BT_UV16QI, BT_UCHAR, BT_UCHAR)
@@ -226,6 +247,7 @@ DEF_FN_TYPE_2 (BT_FN_UV16QI_UV8HI_UV8HI, BT_UV16QI, BT_UV8HI, BT_UV8HI)
 DEF_FN_TYPE_2 (BT_FN_UV2DI_UCHAR_UCHAR, BT_UV2DI, BT_UCHAR, BT_UCHAR)
 DEF_FN_TYPE_2 (BT_FN_UV2DI_ULONGLONG_INT, BT_UV2DI, BT_ULONGLONG, BT_INT)
 DEF_FN_TYPE_2 (BT_FN_UV2DI_UV16QI_UV16QI, BT_UV2DI, BT_UV16QI, BT_UV16QI)
+DEF_FN_TYPE_2 (BT_FN_UV2DI_UV1TI_UV16QI, BT_UV2DI, BT_UV1TI, BT_UV16QI)
 DEF_FN_TYPE_2 (BT_FN_UV2DI_UV2DI_UCHAR, BT_UV2DI, BT_UV2DI, BT_UCHAR)
 DEF_FN_TYPE_2 (BT_FN_UV2DI_UV2DI_UV2DI, BT_UV2DI, BT_UV2DI, BT_UV2DI)
 DEF_FN_TYPE_2 (BT_FN_UV2DI_UV4SI_UV4SI, BT_UV2DI, BT_UV4SI, BT_UV4SI)
@@ -259,7 +281,6 @@ DEF_FN_TYPE_2 (BT_FN_V2DF_UV4SI_INT, BT_V2DF, BT_UV4SI, BT_INT)
 DEF_FN_TYPE_2 (BT_FN_V2DF_V2DF_UCHAR, BT_V2DF, BT_V2DF, BT_UCHAR)
 DEF_FN_TYPE_2 (BT_FN_V2DF_V2DF_V2DF, BT_V2DF, BT_V2DF, BT_V2DF)
 DEF_FN_TYPE_2 (BT_FN_V2DF_V2DI_INT, BT_V2DF, BT_V2DI, BT_INT)
-DEF_FN_TYPE_2 (BT_FN_V2DI_BV2DI_V2DI, BT_V2DI, BT_BV2DI, BT_V2DI)
 DEF_FN_TYPE_2 (BT_FN_V2DI_UV2DI_UV2DI, BT_V2DI, BT_UV2DI, BT_UV2DI)
 DEF_FN_TYPE_2 (BT_FN_V2DI_V2DF_INT, BT_V2DI, BT_V2DF, BT_INT)
 DEF_FN_TYPE_2 (BT_FN_V2DI_V2DF_V2DF, BT_V2DI, BT_V2DF, BT_V2DF)
@@ -269,14 +290,12 @@ DEF_FN_TYPE_2 (BT_FN_V4SF_FLT_INT, BT_V4SF, BT_FLT, BT_INT)
 DEF_FN_TYPE_2 (BT_FN_V4SF_UV8HI_UINT, BT_V4SF, BT_UV8HI, BT_UINT)
 DEF_FN_TYPE_2 (BT_FN_V4SF_V4SF_UCHAR, BT_V4SF, BT_V4SF, BT_UCHAR)
 DEF_FN_TYPE_2 (BT_FN_V4SF_V4SF_V4SF, BT_V4SF, BT_V4SF, BT_V4SF)
-DEF_FN_TYPE_2 (BT_FN_V4SI_BV4SI_V4SI, BT_V4SI, BT_BV4SI, BT_V4SI)
 DEF_FN_TYPE_2 (BT_FN_V4SI_INT_VOIDCONSTPTR, BT_V4SI, BT_INT, BT_VOIDCONSTPTR)
 DEF_FN_TYPE_2 (BT_FN_V4SI_UV4SI_UV4SI, BT_V4SI, BT_UV4SI, BT_UV4SI)
 DEF_FN_TYPE_2 (BT_FN_V4SI_V2DI_V2DI, BT_V4SI, BT_V2DI, BT_V2DI)
 DEF_FN_TYPE_2 (BT_FN_V4SI_V4SF_V4SF, BT_V4SI, BT_V4SF, BT_V4SF)
 DEF_FN_TYPE_2 (BT_FN_V4SI_V4SI_V4SI, BT_V4SI, BT_V4SI, BT_V4SI)
 DEF_FN_TYPE_2 (BT_FN_V4SI_V8HI_V8HI, BT_V4SI, BT_V8HI, BT_V8HI)
-DEF_FN_TYPE_2 (BT_FN_V8HI_BV8HI_V8HI, BT_V8HI, BT_BV8HI, BT_V8HI)
 DEF_FN_TYPE_2 (BT_FN_V8HI_UV8HI_UV8HI, BT_V8HI, BT_UV8HI, BT_UV8HI)
 DEF_FN_TYPE_2 (BT_FN_V8HI_V16QI_V16QI, BT_V8HI, BT_V16QI, BT_V16QI)
 DEF_FN_TYPE_2 (BT_FN_V8HI_V4SI_V4SI, BT_V8HI, BT_V4SI, BT_V4SI)
@@ -285,6 +304,10 @@ DEF_FN_TYPE_2 (BT_FN_VOID_UINT64PTR_UINT64, BT_VOID, BT_UINT64PTR, BT_UINT64)
 DEF_FN_TYPE_2 (BT_FN_VOID_V2DF_FLTPTR, BT_VOID, BT_V2DF, BT_FLTPTR)
 DEF_FN_TYPE_3 (BT_FN_BV2DI_V2DF_USHORT_INTPTR, BT_BV2DI, BT_V2DF, BT_USHORT, BT_INTPTR)
 DEF_FN_TYPE_3 (BT_FN_BV4SI_V4SF_USHORT_INTPTR, BT_BV4SI, BT_V4SF, BT_USHORT, BT_INTPTR)
+DEF_FN_TYPE_3 (BT_FN_INT128_INT128_INT128_INT128, BT_INT128, BT_INT128, BT_INT128, BT_INT128)
+DEF_FN_TYPE_3 (BT_FN_INT128_INT128_INT128_INTPTR, BT_INT128, BT_INT128, BT_INT128, BT_INTPTR)
+DEF_FN_TYPE_3 (BT_FN_INT128_UINT128_UINT128_INTPTR, BT_INT128, BT_UINT128, BT_UINT128, BT_INTPTR)
+DEF_FN_TYPE_3 (BT_FN_INT128_V2DI_V2DI_INT128, BT_INT128, BT_V2DI, BT_V2DI, BT_INT128)
 DEF_FN_TYPE_3 (BT_FN_INT_OV4SI_OV4SI_INTPTR, BT_INT, BT_OV4SI, BT_OV4SI, BT_INTPTR)
 DEF_FN_TYPE_3 (BT_FN_OV4SI_INT_OV4SI_INT, BT_OV4SI, BT_INT, BT_OV4SI, BT_INT)
 DEF_FN_TYPE_3 (BT_FN_OV4SI_OV4SI_INT_INTPTR, BT_OV4SI, BT_OV4SI, BT_INT, BT_INTPTR)
@@ -294,23 +317,28 @@ DEF_FN_TYPE_3 (BT_FN_OV4SI_OV4SI_OV4SI_OV4SI, BT_OV4SI, BT_OV4SI, BT_OV4SI, BT_O
 DEF_FN_TYPE_3 (BT_FN_OV4SI_OV4SI_OV4SI_UCHAR, BT_OV4SI, BT_OV4SI, BT_OV4SI, BT_UCHAR)
 DEF_FN_TYPE_3 (BT_FN_OV4SI_OV4SI_OV4SI_UINT, BT_OV4SI, BT_OV4SI, BT_OV4SI, BT_UINT)
 DEF_FN_TYPE_3 (BT_FN_OV4SI_OV4SI_OV4SI_ULONGLONG, BT_OV4SI, BT_OV4SI, BT_OV4SI, BT_ULONGLONG)
+DEF_FN_TYPE_3 (BT_FN_UINT128_UINT128_UINT128_INT128, BT_UINT128, BT_UINT128, BT_UINT128, BT_INT128)
 DEF_FN_TYPE_3 (BT_FN_UINT128_UINT128_UINT128_UINT128, BT_UINT128, BT_UINT128, BT_UINT128, BT_UINT128)
 DEF_FN_TYPE_3 (BT_FN_UINT128_UV2DI_UV2DI_UINT128, BT_UINT128, BT_UV2DI, BT_UV2DI, BT_UINT128)
 DEF_FN_TYPE_3 (BT_FN_UV16QI_UV16QI_UCHAR_INT, BT_UV16QI, BT_UV16QI, BT_UCHAR, BT_INT)
 DEF_FN_TYPE_3 (BT_FN_UV16QI_UV16QI_UV16QI_INT, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_INT)
 DEF_FN_TYPE_3 (BT_FN_UV16QI_UV16QI_UV16QI_INTPTR, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_INTPTR)
 DEF_FN_TYPE_3 (BT_FN_UV16QI_UV16QI_UV16QI_UV16QI, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_UV16QI)
+DEF_FN_TYPE_3 (BT_FN_UV16QI_UV16QI_UV16QI_V16QI, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_V16QI)
 DEF_FN_TYPE_3 (BT_FN_UV16QI_UV2DI_UV2DI_UV16QI, BT_UV16QI, BT_UV2DI, BT_UV2DI, BT_UV16QI)
 DEF_FN_TYPE_3 (BT_FN_UV16QI_UV8HI_UV8HI_INTPTR, BT_UV16QI, BT_UV8HI, BT_UV8HI, BT_INTPTR)
+DEF_FN_TYPE_3 (BT_FN_UV1TI_UV1TI_UV1TI_UV1TI, BT_UV1TI, BT_UV1TI, BT_UV1TI, BT_UV1TI)
 DEF_FN_TYPE_3 (BT_FN_UV2DI_UV2DI_ULONGLONG_INT, BT_UV2DI, BT_UV2DI, BT_ULONGLONG, BT_INT)
 DEF_FN_TYPE_3 (BT_FN_UV2DI_UV2DI_UV2DI_INT, BT_UV2DI, BT_UV2DI, BT_UV2DI, BT_INT)
 DEF_FN_TYPE_3 (BT_FN_UV2DI_UV2DI_UV2DI_UV2DI, BT_UV2DI, BT_UV2DI, BT_UV2DI, BT_UV2DI)
+DEF_FN_TYPE_3 (BT_FN_UV2DI_UV2DI_UV2DI_V2DI, BT_UV2DI, BT_UV2DI, BT_UV2DI, BT_V2DI)
 DEF_FN_TYPE_3 (BT_FN_UV2DI_UV4SI_UV4SI_UV2DI, BT_UV2DI, BT_UV4SI, BT_UV4SI, BT_UV2DI)
 DEF_FN_TYPE_3 (BT_FN_UV4SI_UV2DI_UV2DI_INTPTR, BT_UV4SI, BT_UV2DI, BT_UV2DI, BT_INTPTR)
 DEF_FN_TYPE_3 (BT_FN_UV4SI_UV4SI_UINT_INT, BT_UV4SI, BT_UV4SI, BT_UINT, BT_INT)
 DEF_FN_TYPE_3 (BT_FN_UV4SI_UV4SI_UV4SI_INT, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_INT)
 DEF_FN_TYPE_3 (BT_FN_UV4SI_UV4SI_UV4SI_INTPTR, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_INTPTR)
 DEF_FN_TYPE_3 (BT_FN_UV4SI_UV4SI_UV4SI_UV4SI, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_UV4SI)
+DEF_FN_TYPE_3 (BT_FN_UV4SI_UV4SI_UV4SI_V4SI, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_V4SI)
 DEF_FN_TYPE_3 (BT_FN_UV4SI_UV8HI_UV8HI_UV4SI, BT_UV4SI, BT_UV8HI, BT_UV8HI, BT_UV4SI)
 DEF_FN_TYPE_3 (BT_FN_UV8HI_UV16QI_UV16QI_UV8HI, BT_UV8HI, BT_UV16QI, BT_UV16QI, BT_UV8HI)
 DEF_FN_TYPE_3 (BT_FN_UV8HI_UV4SI_UV4SI_INTPTR, BT_UV8HI, BT_UV4SI, BT_UV4SI, BT_INTPTR)
@@ -318,6 +346,7 @@ DEF_FN_TYPE_3 (BT_FN_UV8HI_UV8HI_USHORT_INT, BT_UV8HI, BT_UV8HI, BT_USHORT, BT_I
 DEF_FN_TYPE_3 (BT_FN_UV8HI_UV8HI_UV8HI_INT, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_INT)
 DEF_FN_TYPE_3 (BT_FN_UV8HI_UV8HI_UV8HI_INTPTR, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_INTPTR)
 DEF_FN_TYPE_3 (BT_FN_UV8HI_UV8HI_UV8HI_UV8HI, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_UV8HI)
+DEF_FN_TYPE_3 (BT_FN_UV8HI_UV8HI_UV8HI_V8HI, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_V8HI)
 DEF_FN_TYPE_3 (BT_FN_UV8HI_V4SF_V4SF_UINT, BT_UV8HI, BT_V4SF, BT_V4SF, BT_UINT)
 DEF_FN_TYPE_3 (BT_FN_V16QI_UV16QI_UV16QI_INTPTR, BT_V16QI, BT_UV16QI, BT_UV16QI, BT_INTPTR)
 DEF_FN_TYPE_3 (BT_FN_V16QI_V16QI_V16QI_INTPTR, BT_V16QI, BT_V16QI, BT_V16QI, BT_INTPTR)
@@ -333,6 +362,7 @@ DEF_FN_TYPE_3 (BT_FN_V2DI_UV2DI_UV2DI_INTPTR, BT_V2DI, BT_UV2DI, BT_UV2DI, BT_IN
 DEF_FN_TYPE_3 (BT_FN_V2DI_V2DF_INT_INTPTR, BT_V2DI, BT_V2DF, BT_INT, BT_INTPTR)
 DEF_FN_TYPE_3 (BT_FN_V2DI_V2DF_V2DF_INTPTR, BT_V2DI, BT_V2DF, BT_V2DF, BT_INTPTR)
 DEF_FN_TYPE_3 (BT_FN_V2DI_V2DI_V2DI_INTPTR, BT_V2DI, BT_V2DI, BT_V2DI, BT_INTPTR)
+DEF_FN_TYPE_3 (BT_FN_V2DI_V2DI_V2DI_V2DI, BT_V2DI, BT_V2DI, BT_V2DI, BT_V2DI)
 DEF_FN_TYPE_3 (BT_FN_V2DI_V4SI_V4SI_V2DI, BT_V2DI, BT_V4SI, BT_V4SI, BT_V2DI)
 DEF_FN_TYPE_3 (BT_FN_V4SF_V2DF_INT_INT, BT_V4SF, BT_V2DF, BT_INT, BT_INT)
 DEF_FN_TYPE_3 (BT_FN_V4SF_V4SF_FLT_INT, BT_V4SF, BT_V4SF, BT_FLT, BT_INT)
@@ -356,6 +386,7 @@ DEF_FN_TYPE_3 (BT_FN_VOID_OV4SI_INT_VOIDPTR, BT_VOID, BT_OV4SI, BT_INT, BT_VOIDP
 DEF_FN_TYPE_3 (BT_FN_VOID_OV4SI_VOIDPTR_UINT, BT_VOID, BT_OV4SI, BT_VOIDPTR, BT_UINT)
 DEF_FN_TYPE_3 (BT_FN_VOID_V16QI_UINT_VOIDPTR, BT_VOID, BT_V16QI, BT_UINT, BT_VOIDPTR)
 DEF_FN_TYPE_4 (BT_FN_OV4SI_OV4SI_OUV4SI_INTCONSTPTR_UCHAR, BT_OV4SI, BT_OV4SI, BT_OUV4SI, BT_INTCONSTPTR, BT_UCHAR)
+DEF_FN_TYPE_4 (BT_FN_OV4SI_OV4SI_OV4SI_OV4SI_INT, BT_OV4SI, BT_OV4SI, BT_OV4SI, BT_OV4SI, BT_INT)
 DEF_FN_TYPE_4 (BT_FN_OV4SI_OV4SI_OV4SI_OV4SI_INTPTR, BT_OV4SI, BT_OV4SI, BT_OV4SI, BT_OV4SI, BT_INTPTR)
 DEF_FN_TYPE_4 (BT_FN_UINT128_UV2DI_UV2DI_UINT128_INT, BT_UINT128, BT_UV2DI, BT_UV2DI, BT_UINT128, BT_INT)
 DEF_FN_TYPE_4 (BT_FN_UV16QI_UV16QI_UV16QI_INT_INTPTR, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_INT, BT_INTPTR)
@@ -364,6 +395,7 @@ DEF_FN_TYPE_4 (BT_FN_UV16QI_UV16QI_UV16QI_UV16QI_INTPTR, BT_UV16QI, BT_UV16QI, B
 DEF_FN_TYPE_4 (BT_FN_UV16QI_UV2DI_UV2DI_UV16QI_INT, BT_UV16QI, BT_UV2DI, BT_UV2DI, BT_UV16QI, BT_INT)
 DEF_FN_TYPE_4 (BT_FN_UV16QI_UV4SI_UV4SI_UV16QI_INTPTR, BT_UV16QI, BT_UV4SI, BT_UV4SI, BT_UV16QI, BT_INTPTR)
 DEF_FN_TYPE_4 (BT_FN_UV16QI_UV8HI_UV8HI_UV16QI_INTPTR, BT_UV16QI, BT_UV8HI, BT_UV8HI, BT_UV16QI, BT_INTPTR)
+DEF_FN_TYPE_4 (BT_FN_UV1TI_UV2DI_UV2DI_UV1TI_INT, BT_UV1TI, BT_UV2DI, BT_UV2DI, BT_UV1TI, BT_INT)
 DEF_FN_TYPE_4 (BT_FN_UV2DI_UV2DI_UV2DI_ULONGLONGCONSTPTR_UCHAR, BT_UV2DI, BT_UV2DI, BT_UV2DI, BT_ULONGLONGCONSTPTR, BT_UCHAR)
 DEF_FN_TYPE_4 (BT_FN_UV2DI_UV2DI_UV2DI_UV2DI_INT, BT_UV2DI, BT_UV2DI, BT_UV2DI, BT_UV2DI, BT_INT)
 DEF_FN_TYPE_4 (BT_FN_UV4SI_UV4SI_UV4SI_INT_INTPTR, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_INT, BT_INTPTR)
@@ -380,9 +412,11 @@ DEF_FN_TYPE_5 (BT_FN_UV8HI_UV8HI_UV8HI_UV8HI_INT_INTPTR, BT_UV8HI, BT_UV8HI, BT_
 DEF_OV_TYPE (BT_OV_BV16QI_BV16QI, BT_BV16QI, BT_BV16QI)
 DEF_OV_TYPE (BT_OV_BV16QI_BV16QI_BV16QI, BT_BV16QI, BT_BV16QI, BT_BV16QI)
 DEF_OV_TYPE (BT_OV_BV16QI_BV16QI_BV16QI_BV16QI, BT_BV16QI, BT_BV16QI, BT_BV16QI, BT_BV16QI)
+DEF_OV_TYPE (BT_OV_BV16QI_BV16QI_BV16QI_BV16QI_INT, BT_BV16QI, BT_BV16QI, BT_BV16QI, BT_BV16QI, BT_INT)
 DEF_OV_TYPE (BT_OV_BV16QI_BV16QI_BV16QI_INTPTR, BT_BV16QI, BT_BV16QI, BT_BV16QI, BT_INTPTR)
 DEF_OV_TYPE (BT_OV_BV16QI_BV16QI_BV16QI_ULONGLONG, BT_BV16QI, BT_BV16QI, BT_BV16QI, BT_ULONGLONG)
 DEF_OV_TYPE (BT_OV_BV16QI_BV16QI_BV16QI_UV16QI, BT_BV16QI, BT_BV16QI, BT_BV16QI, BT_UV16QI)
+DEF_OV_TYPE (BT_OV_BV16QI_BV16QI_BV16QI_V16QI, BT_BV16QI, BT_BV16QI, BT_BV16QI, BT_V16QI)
 DEF_OV_TYPE (BT_OV_BV16QI_BV16QI_INTPTR, BT_BV16QI, BT_BV16QI, BT_INTPTR)
 DEF_OV_TYPE (BT_OV_BV16QI_BV16QI_UCHAR, BT_BV16QI, BT_BV16QI, BT_UCHAR)
 DEF_OV_TYPE (BT_OV_BV16QI_BV16QI_UV16QI, BT_BV16QI, BT_BV16QI, BT_UV16QI)
@@ -395,13 +429,25 @@ DEF_OV_TYPE (BT_OV_BV16QI_UV16QI_UV16QI_UV16QI, BT_BV16QI, BT_UV16QI, BT_UV16QI,
 DEF_OV_TYPE (BT_OV_BV16QI_UV16QI_UV16QI_UV16QI_INTPTR, BT_BV16QI, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_INTPTR)
 DEF_OV_TYPE (BT_OV_BV16QI_V16QI_V16QI, BT_BV16QI, BT_V16QI, BT_V16QI)
 DEF_OV_TYPE (BT_OV_BV16QI_V16QI_V16QI_INTPTR, BT_BV16QI, BT_V16QI, BT_V16QI, BT_INTPTR)
+DEF_OV_TYPE (BT_OV_BV1TI_BV1TI_BV1TI, BT_BV1TI, BT_BV1TI, BT_BV1TI)
+DEF_OV_TYPE (BT_OV_BV1TI_BV1TI_BV1TI_BV1TI, BT_BV1TI, BT_BV1TI, BT_BV1TI, BT_BV1TI)
+DEF_OV_TYPE (BT_OV_BV1TI_BV1TI_BV1TI_BV1TI_INT, BT_BV1TI, BT_BV1TI, BT_BV1TI, BT_BV1TI, BT_INT)
+DEF_OV_TYPE (BT_OV_BV1TI_BV1TI_BV1TI_UV16QI, BT_BV1TI, BT_BV1TI, BT_BV1TI, BT_UV16QI)
+DEF_OV_TYPE (BT_OV_BV1TI_BV1TI_BV1TI_UV1TI, BT_BV1TI, BT_BV1TI, BT_BV1TI, BT_UV1TI)
+DEF_OV_TYPE (BT_OV_BV1TI_BV1TI_BV1TI_V1TI, BT_BV1TI, BT_BV1TI, BT_BV1TI, BT_V1TI)
+DEF_OV_TYPE (BT_OV_BV1TI_BV2DI, BT_BV1TI, BT_BV2DI)
+DEF_OV_TYPE (BT_OV_BV1TI_UV1TI_UV1TI, BT_BV1TI, BT_UV1TI, BT_UV1TI)
+DEF_OV_TYPE (BT_OV_BV1TI_V1TI_V1TI, BT_BV1TI, BT_V1TI, BT_V1TI)
+DEF_OV_TYPE (BT_OV_BV2DI_BV1TI_BV1TI, BT_BV2DI, BT_BV1TI, BT_BV1TI)
 DEF_OV_TYPE (BT_OV_BV2DI_BV2DI, BT_BV2DI, BT_BV2DI)
 DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_BV2DI, BT_BV2DI, BT_BV2DI, BT_BV2DI)
 DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_BV2DI_BV2DI, BT_BV2DI, BT_BV2DI, BT_BV2DI, BT_BV2DI)
+DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_BV2DI_BV2DI_INT, BT_BV2DI, BT_BV2DI, BT_BV2DI, BT_BV2DI, BT_INT)
 DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_BV2DI_INT, BT_BV2DI, BT_BV2DI, BT_BV2DI, BT_INT)
 DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_BV2DI_ULONGLONG, BT_BV2DI, BT_BV2DI, BT_BV2DI, BT_ULONGLONG)
 DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_BV2DI_UV16QI, BT_BV2DI, BT_BV2DI, BT_BV2DI, BT_UV16QI)
 DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_BV2DI_UV2DI, BT_BV2DI, BT_BV2DI, BT_BV2DI, BT_UV2DI)
+DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_BV2DI_V2DI, BT_BV2DI, BT_BV2DI, BT_BV2DI, BT_V2DI)
 DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_UCHAR, BT_BV2DI, BT_BV2DI, BT_UCHAR)
 DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_UV16QI, BT_BV2DI, BT_BV2DI, BT_UV16QI)
 DEF_OV_TYPE (BT_OV_BV2DI_BV2DI_UV2DI_ULONGLONGCONSTPTR_UCHAR, BT_BV2DI, BT_BV2DI, BT_UV2DI, BT_ULONGLONGCONSTPTR, BT_UCHAR)
@@ -416,10 +462,12 @@ DEF_OV_TYPE (BT_OV_BV4SI_BV2DI_BV2DI, BT_BV4SI, BT_BV2DI, BT_BV2DI)
 DEF_OV_TYPE (BT_OV_BV4SI_BV4SI, BT_BV4SI, BT_BV4SI)
 DEF_OV_TYPE (BT_OV_BV4SI_BV4SI_BV4SI, BT_BV4SI, BT_BV4SI, BT_BV4SI)
 DEF_OV_TYPE (BT_OV_BV4SI_BV4SI_BV4SI_BV4SI, BT_BV4SI, BT_BV4SI, BT_BV4SI, BT_BV4SI)
+DEF_OV_TYPE (BT_OV_BV4SI_BV4SI_BV4SI_BV4SI_INT, BT_BV4SI, BT_BV4SI, BT_BV4SI, BT_BV4SI, BT_INT)
 DEF_OV_TYPE (BT_OV_BV4SI_BV4SI_BV4SI_INTPTR, BT_BV4SI, BT_BV4SI, BT_BV4SI, BT_INTPTR)
 DEF_OV_TYPE (BT_OV_BV4SI_BV4SI_BV4SI_ULONGLONG, BT_BV4SI, BT_BV4SI, BT_BV4SI, BT_ULONGLONG)
 DEF_OV_TYPE (BT_OV_BV4SI_BV4SI_BV4SI_UV16QI, BT_BV4SI, BT_BV4SI, BT_BV4SI, BT_UV16QI)
 DEF_OV_TYPE (BT_OV_BV4SI_BV4SI_BV4SI_UV4SI, BT_BV4SI, BT_BV4SI, BT_BV4SI, BT_UV4SI)
+DEF_OV_TYPE (BT_OV_BV4SI_BV4SI_BV4SI_V4SI, BT_BV4SI, BT_BV4SI, BT_BV4SI, BT_V4SI)
 DEF_OV_TYPE (BT_OV_BV4SI_BV4SI_INTPTR, BT_BV4SI, BT_BV4SI, BT_INTPTR)
 DEF_OV_TYPE (BT_OV_BV4SI_BV4SI_UCHAR, BT_BV4SI, BT_BV4SI, BT_UCHAR)
 DEF_OV_TYPE (BT_OV_BV4SI_BV4SI_UV16QI, BT_BV4SI, BT_BV4SI, BT_UV16QI)
@@ -440,10 +488,12 @@ DEF_OV_TYPE (BT_OV_BV8HI_BV4SI_BV4SI, BT_BV8HI, BT_BV4SI, BT_BV4SI)
 DEF_OV_TYPE (BT_OV_BV8HI_BV8HI, BT_BV8HI, BT_BV8HI)
 DEF_OV_TYPE (BT_OV_BV8HI_BV8HI_BV8HI, BT_BV8HI, BT_BV8HI, BT_BV8HI)
 DEF_OV_TYPE (BT_OV_BV8HI_BV8HI_BV8HI_BV8HI, BT_BV8HI, BT_BV8HI, BT_BV8HI, BT_BV8HI)
+DEF_OV_TYPE (BT_OV_BV8HI_BV8HI_BV8HI_BV8HI_INT, BT_BV8HI, BT_BV8HI, BT_BV8HI, BT_BV8HI, BT_INT)
 DEF_OV_TYPE (BT_OV_BV8HI_BV8HI_BV8HI_INTPTR, BT_BV8HI, BT_BV8HI, BT_BV8HI, BT_INTPTR)
 DEF_OV_TYPE (BT_OV_BV8HI_BV8HI_BV8HI_ULONGLONG, BT_BV8HI, BT_BV8HI, BT_BV8HI, BT_ULONGLONG)
 DEF_OV_TYPE (BT_OV_BV8HI_BV8HI_BV8HI_UV16QI, BT_BV8HI, BT_BV8HI, BT_BV8HI, BT_UV16QI)
 DEF_OV_TYPE (BT_OV_BV8HI_BV8HI_BV8HI_UV8HI, BT_BV8HI, BT_BV8HI, BT_BV8HI, BT_UV8HI)
+DEF_OV_TYPE (BT_OV_BV8HI_BV8HI_BV8HI_V8HI, BT_BV8HI, BT_BV8HI, BT_BV8HI, BT_V8HI)
 DEF_OV_TYPE (BT_OV_BV8HI_BV8HI_INTPTR, BT_BV8HI, BT_BV8HI, BT_INTPTR)
 DEF_OV_TYPE (BT_OV_BV8HI_BV8HI_UCHAR, BT_BV8HI, BT_BV8HI, BT_UCHAR)
 DEF_OV_TYPE (BT_OV_BV8HI_BV8HI_UV16QI, BT_BV8HI, BT_BV8HI, BT_UV16QI)
@@ -460,6 +510,7 @@ DEF_OV_TYPE (BT_OV_FLT_V4SF_INT, BT_FLT, BT_V4SF, BT_INT)
 DEF_OV_TYPE (BT_OV_INT_BV16QI_BV16QI, BT_INT, BT_BV16QI, BT_BV16QI)
 DEF_OV_TYPE (BT_OV_INT_BV16QI_UV16QI, BT_INT, BT_BV16QI, BT_UV16QI)
 DEF_OV_TYPE (BT_OV_INT_BV16QI_V16QI, BT_INT, BT_BV16QI, BT_V16QI)
+DEF_OV_TYPE (BT_OV_INT_BV1TI_BV1TI, BT_INT, BT_BV1TI, BT_BV1TI)
 DEF_OV_TYPE (BT_OV_INT_BV2DI_BV2DI, BT_INT, BT_BV2DI, BT_BV2DI)
 DEF_OV_TYPE (BT_OV_INT_BV2DI_UV2DI, BT_INT, BT_BV2DI, BT_UV2DI)
 DEF_OV_TYPE (BT_OV_INT_BV2DI_V2DI, BT_INT, BT_BV2DI, BT_V2DI)
@@ -471,6 +522,7 @@ DEF_OV_TYPE (BT_OV_INT_BV8HI_UV8HI, BT_INT, BT_BV8HI, BT_UV8HI)
 DEF_OV_TYPE (BT_OV_INT_BV8HI_V8HI, BT_INT, BT_BV8HI, BT_V8HI)
 DEF_OV_TYPE (BT_OV_INT_UV16QI_BV16QI, BT_INT, BT_UV16QI, BT_BV16QI)
 DEF_OV_TYPE (BT_OV_INT_UV16QI_UV16QI, BT_INT, BT_UV16QI, BT_UV16QI)
+DEF_OV_TYPE (BT_OV_INT_UV1TI_UV1TI, BT_INT, BT_UV1TI, BT_UV1TI)
 DEF_OV_TYPE (BT_OV_INT_UV2DI_BV2DI, BT_INT, BT_UV2DI, BT_BV2DI)
 DEF_OV_TYPE (BT_OV_INT_UV2DI_UV2DI, BT_INT, BT_UV2DI, BT_UV2DI)
 DEF_OV_TYPE (BT_OV_INT_UV4SI_BV4SI, BT_INT, BT_UV4SI, BT_BV4SI)
@@ -480,6 +532,8 @@ DEF_OV_TYPE (BT_OV_INT_UV8HI_UV8HI, BT_INT, BT_UV8HI, BT_UV8HI)
 DEF_OV_TYPE (BT_OV_INT_V16QI_BV16QI, BT_INT, BT_V16QI, BT_BV16QI)
 DEF_OV_TYPE (BT_OV_INT_V16QI_UV16QI, BT_INT, BT_V16QI, BT_UV16QI)
 DEF_OV_TYPE (BT_OV_INT_V16QI_V16QI, BT_INT, BT_V16QI, BT_V16QI)
+DEF_OV_TYPE (BT_OV_INT_V1TI_UV1TI, BT_INT, BT_V1TI, BT_UV1TI)
+DEF_OV_TYPE (BT_OV_INT_V1TI_V1TI, BT_INT, BT_V1TI, BT_V1TI)
 DEF_OV_TYPE (BT_OV_INT_V2DF_UV2DI, BT_INT, BT_V2DF, BT_UV2DI)
 DEF_OV_TYPE (BT_OV_INT_V2DF_V2DF, BT_INT, BT_V2DF, BT_V2DF)
 DEF_OV_TYPE (BT_OV_INT_V2DI_BV2DI, BT_INT, BT_V2DI, BT_BV2DI)
@@ -532,7 +586,9 @@ DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_UV16QI_UCHAR, BT_UV16QI, BT_UV16QI, BT_UV16QI,
 DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_UV16QI_UINT, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_UINT)
 DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_UV16QI_ULONGLONG, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_ULONGLONG)
 DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_UV16QI_UV16QI, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_UV16QI)
+DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_UV16QI_UV16QI_INT, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_INT)
 DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_UV16QI_UV16QI_INTPTR, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_INTPTR)
+DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_UV16QI_V16QI, BT_UV16QI, BT_UV16QI, BT_UV16QI, BT_V16QI)
 DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_UV4SI, BT_UV16QI, BT_UV16QI, BT_UV4SI)
 DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_UV8HI, BT_UV16QI, BT_UV16QI, BT_UV8HI)
 DEF_OV_TYPE (BT_OV_UV16QI_UV16QI_V16QI, BT_UV16QI, BT_UV16QI, BT_V16QI)
@@ -547,6 +603,26 @@ DEF_OV_TYPE (BT_OV_UV16QI_V16QI_V16QI_UV16QI_INTPTR, BT_UV16QI, BT_V16QI, BT_V16
 DEF_OV_TYPE (BT_OV_UV16QI_V4SI_V4SI_UV16QI_INTPTR, BT_UV16QI, BT_V4SI, BT_V4SI, BT_UV16QI, BT_INTPTR)
 DEF_OV_TYPE (BT_OV_UV16QI_V8HI_V8HI, BT_UV16QI, BT_V8HI, BT_V8HI)
 DEF_OV_TYPE (BT_OV_UV16QI_V8HI_V8HI_UV16QI_INTPTR, BT_UV16QI, BT_V8HI, BT_V8HI, BT_UV16QI, BT_INTPTR)
+DEF_OV_TYPE (BT_OV_UV1TI_LONG_UINT128CONSTPTR, BT_UV1TI, BT_LONG, BT_UINT128CONSTPTR)
+DEF_OV_TYPE (BT_OV_UV1TI_UINT128, BT_UV1TI, BT_UINT128)
+DEF_OV_TYPE (BT_OV_UV1TI_UINT128CONSTPTR_USHORT, BT_UV1TI, BT_UINT128CONSTPTR, BT_USHORT)
+DEF_OV_TYPE (BT_OV_UV1TI_UV1TI, BT_UV1TI, BT_UV1TI)
+DEF_OV_TYPE (BT_OV_UV1TI_UV1TI_UV16QI, BT_UV1TI, BT_UV1TI, BT_UV16QI)
+DEF_OV_TYPE (BT_OV_UV1TI_UV1TI_UV1TI, BT_UV1TI, BT_UV1TI, BT_UV1TI)
+DEF_OV_TYPE (BT_OV_UV1TI_UV1TI_UV1TI_BV1TI, BT_UV1TI, BT_UV1TI, BT_UV1TI, BT_BV1TI)
+DEF_OV_TYPE (BT_OV_UV1TI_UV1TI_UV1TI_INT, BT_UV1TI, BT_UV1TI, BT_UV1TI, BT_INT)
+DEF_OV_TYPE (BT_OV_UV1TI_UV1TI_UV1TI_UINT, BT_UV1TI, BT_UV1TI, BT_UV1TI, BT_UINT)
+DEF_OV_TYPE (BT_OV_UV1TI_UV1TI_UV1TI_ULONGLONG, BT_UV1TI, BT_UV1TI, BT_UV1TI, BT_ULONGLONG)
+DEF_OV_TYPE (BT_OV_UV1TI_UV1TI_UV1TI_UV16QI, BT_UV1TI, BT_UV1TI, BT_UV1TI, BT_UV16QI)
+DEF_OV_TYPE (BT_OV_UV1TI_UV1TI_UV1TI_UV1TI, BT_UV1TI, BT_UV1TI, BT_UV1TI, BT_UV1TI)
+DEF_OV_TYPE (BT_OV_UV1TI_UV1TI_UV1TI_UV1TI_INT, BT_UV1TI, BT_UV1TI, BT_UV1TI, BT_UV1TI, BT_INT)
+DEF_OV_TYPE (BT_OV_UV1TI_UV1TI_UV1TI_V1TI, BT_UV1TI, BT_UV1TI, BT_UV1TI, BT_V1TI)
+DEF_OV_TYPE (BT_OV_UV1TI_UV1TI_V1TI, BT_UV1TI, BT_UV1TI, BT_V1TI)
+DEF_OV_TYPE (BT_OV_UV1TI_UV2DI, BT_UV1TI, BT_UV2DI)
+DEF_OV_TYPE (BT_OV_UV1TI_UV2DI_UV2DI, BT_UV1TI, BT_UV2DI, BT_UV2DI)
+DEF_OV_TYPE (BT_OV_UV1TI_UV2DI_UV2DI_UV1TI, BT_UV1TI, BT_UV2DI, BT_UV2DI, BT_UV1TI)
+DEF_OV_TYPE (BT_OV_UV1TI_UV4SI_UV4SI, BT_UV1TI, BT_UV4SI, BT_UV4SI)
+DEF_OV_TYPE (BT_OV_UV1TI_V1TI, BT_UV1TI, BT_V1TI)
 DEF_OV_TYPE (BT_OV_UV2DI_BV2DI_UV2DI, BT_UV2DI, BT_BV2DI, BT_UV2DI)
 DEF_OV_TYPE (BT_OV_UV2DI_LONG_ULONGLONGCONSTPTR, BT_UV2DI, BT_LONG, BT_ULONGLONGCONSTPTR)
 DEF_OV_TYPE (BT_OV_UV2DI_ULONGLONG, BT_UV2DI, BT_ULONGLONG)
@@ -557,6 +633,7 @@ DEF_OV_TYPE (BT_OV_UV2DI_ULONGLONG_BV2DI_INT, BT_UV2DI, BT_ULONGLONG, BT_BV2DI,
 DEF_OV_TYPE (BT_OV_UV2DI_ULONGLONG_INT, BT_UV2DI, BT_ULONGLONG, BT_INT)
 DEF_OV_TYPE (BT_OV_UV2DI_ULONGLONG_ULONGLONG, BT_UV2DI, BT_ULONGLONG, BT_ULONGLONG)
 DEF_OV_TYPE (BT_OV_UV2DI_ULONGLONG_UV2DI_INT, BT_UV2DI, BT_ULONGLONG, BT_UV2DI, BT_INT)
+DEF_OV_TYPE (BT_OV_UV2DI_UV1TI_UV1TI, BT_UV2DI, BT_UV1TI, BT_UV1TI)
 DEF_OV_TYPE (BT_OV_UV2DI_UV2DI, BT_UV2DI, BT_UV2DI)
 DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_BV2DI, BT_UV2DI, BT_UV2DI, BT_BV2DI)
 DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_UCHAR, BT_UV2DI, BT_UV2DI, BT_UCHAR)
@@ -571,6 +648,8 @@ DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_UV2DI_ULONGLONG, BT_UV2DI, BT_UV2DI, BT_UV2DI, BT
 DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_UV2DI_ULONGLONGCONSTPTR_UCHAR, BT_UV2DI, BT_UV2DI, BT_UV2DI, BT_ULONGLONGCONSTPTR, BT_UCHAR)
 DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_UV2DI_UV16QI, BT_UV2DI, BT_UV2DI, BT_UV2DI, BT_UV16QI)
 DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_UV2DI_UV2DI, BT_UV2DI, BT_UV2DI, BT_UV2DI, BT_UV2DI)
+DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_UV2DI_UV2DI_INT, BT_UV2DI, BT_UV2DI, BT_UV2DI, BT_UV2DI, BT_INT)
+DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_UV2DI_V2DI, BT_UV2DI, BT_UV2DI, BT_UV2DI, BT_V2DI)
 DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_UV4SI, BT_UV2DI, BT_UV2DI, BT_UV4SI)
 DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_UV8HI, BT_UV2DI, BT_UV2DI, BT_UV8HI)
 DEF_OV_TYPE (BT_OV_UV2DI_UV2DI_V2DI, BT_UV2DI, BT_UV2DI, BT_V2DI)
@@ -610,7 +689,9 @@ DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_UV4SI_UINTCONSTPTR_UCHAR, BT_UV4SI, BT_UV4SI, BT_
 DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_UV4SI_ULONGLONG, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_ULONGLONG)
 DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_UV4SI_UV16QI, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_UV16QI)
 DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_UV4SI_UV4SI, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_UV4SI)
+DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_UV4SI_UV4SI_INT, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_INT)
 DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_UV4SI_UV4SI_INTPTR, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_INTPTR)
+DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_UV4SI_V4SI, BT_UV4SI, BT_UV4SI, BT_UV4SI, BT_V4SI)
 DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_UV8HI, BT_UV4SI, BT_UV4SI, BT_UV8HI)
 DEF_OV_TYPE (BT_OV_UV4SI_UV4SI_V4SI, BT_UV4SI, BT_UV4SI, BT_V4SI)
 DEF_OV_TYPE (BT_OV_UV4SI_UV8HI, BT_UV4SI, BT_UV8HI)
@@ -651,7 +732,9 @@ DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_UV8HI_UINT, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_UINT
 DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_UV8HI_ULONGLONG, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_ULONGLONG)
 DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_UV8HI_UV16QI, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_UV16QI)
 DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_UV8HI_UV8HI, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_UV8HI)
+DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_UV8HI_UV8HI_INT, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_INT)
 DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_UV8HI_UV8HI_INTPTR, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_INTPTR)
+DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_UV8HI_V8HI, BT_UV8HI, BT_UV8HI, BT_UV8HI, BT_V8HI)
 DEF_OV_TYPE (BT_OV_UV8HI_UV8HI_V8HI, BT_UV8HI, BT_UV8HI, BT_V8HI)
 DEF_OV_TYPE (BT_OV_UV8HI_V4SI_V4SI, BT_UV8HI, BT_V4SI, BT_V4SI)
 DEF_OV_TYPE (BT_OV_UV8HI_V8HI, BT_UV8HI, BT_V8HI)
@@ -682,8 +765,29 @@ DEF_OV_TYPE (BT_OV_V16QI_V16QI_V16QI_UINT, BT_V16QI, BT_V16QI, BT_V16QI, BT_UINT
 DEF_OV_TYPE (BT_OV_V16QI_V16QI_V16QI_ULONGLONG, BT_V16QI, BT_V16QI, BT_V16QI, BT_ULONGLONG)
 DEF_OV_TYPE (BT_OV_V16QI_V16QI_V16QI_UV16QI, BT_V16QI, BT_V16QI, BT_V16QI, BT_UV16QI)
 DEF_OV_TYPE (BT_OV_V16QI_V16QI_V16QI_V16QI, BT_V16QI, BT_V16QI, BT_V16QI, BT_V16QI)
+DEF_OV_TYPE (BT_OV_V16QI_V16QI_V16QI_V16QI_INT, BT_V16QI, BT_V16QI, BT_V16QI, BT_V16QI, BT_INT)
 DEF_OV_TYPE (BT_OV_V16QI_V8HI_V8HI, BT_V16QI, BT_V8HI, BT_V8HI)
 DEF_OV_TYPE (BT_OV_V16QI_V8HI_V8HI_INTPTR, BT_V16QI, BT_V8HI, BT_V8HI, BT_INTPTR)
+DEF_OV_TYPE (BT_OV_V1TI_INT128, BT_V1TI, BT_INT128)
+DEF_OV_TYPE (BT_OV_V1TI_INT128CONSTPTR_USHORT, BT_V1TI, BT_INT128CONSTPTR, BT_USHORT)
+DEF_OV_TYPE (BT_OV_V1TI_LONG_INT128CONSTPTR, BT_V1TI, BT_LONG, BT_INT128CONSTPTR)
+DEF_OV_TYPE (BT_OV_V1TI_UV1TI_V1TI_V1TI, BT_V1TI, BT_UV1TI, BT_V1TI, BT_V1TI)
+DEF_OV_TYPE (BT_OV_V1TI_V1TI, BT_V1TI, BT_V1TI)
+DEF_OV_TYPE (BT_OV_V1TI_V1TI_UV16QI, BT_V1TI, BT_V1TI, BT_UV16QI)
+DEF_OV_TYPE (BT_OV_V1TI_V1TI_UV1TI, BT_V1TI, BT_V1TI, BT_UV1TI)
+DEF_OV_TYPE (BT_OV_V1TI_V1TI_UV1TI_UV1TI, BT_V1TI, BT_V1TI, BT_UV1TI, BT_UV1TI)
+DEF_OV_TYPE (BT_OV_V1TI_V1TI_V1TI, BT_V1TI, BT_V1TI, BT_V1TI)
+DEF_OV_TYPE (BT_OV_V1TI_V1TI_V1TI_BV1TI, BT_V1TI, BT_V1TI, BT_V1TI, BT_BV1TI)
+DEF_OV_TYPE (BT_OV_V1TI_V1TI_V1TI_INT, BT_V1TI, BT_V1TI, BT_V1TI, BT_INT)
+DEF_OV_TYPE (BT_OV_V1TI_V1TI_V1TI_UINT, BT_V1TI, BT_V1TI, BT_V1TI, BT_UINT)
+DEF_OV_TYPE (BT_OV_V1TI_V1TI_V1TI_ULONGLONG, BT_V1TI, BT_V1TI, BT_V1TI, BT_ULONGLONG)
+DEF_OV_TYPE (BT_OV_V1TI_V1TI_V1TI_UV16QI, BT_V1TI, BT_V1TI, BT_V1TI, BT_UV16QI)
+DEF_OV_TYPE (BT_OV_V1TI_V1TI_V1TI_UV1TI, BT_V1TI, BT_V1TI, BT_V1TI, BT_UV1TI)
+DEF_OV_TYPE (BT_OV_V1TI_V1TI_V1TI_V1TI, BT_V1TI, BT_V1TI, BT_V1TI, BT_V1TI)
+DEF_OV_TYPE (BT_OV_V1TI_V1TI_V1TI_V1TI_INT, BT_V1TI, BT_V1TI, BT_V1TI, BT_V1TI, BT_INT)
+DEF_OV_TYPE (BT_OV_V1TI_V2DI, BT_V1TI, BT_V2DI)
+DEF_OV_TYPE (BT_OV_V1TI_V2DI_V2DI, BT_V1TI, BT_V2DI, BT_V2DI)
+DEF_OV_TYPE (BT_OV_V1TI_V2DI_V2DI_V1TI, BT_V1TI, BT_V2DI, BT_V2DI, BT_V1TI)
 DEF_OV_TYPE (BT_OV_V2DF_BV2DI_V2DF, BT_V2DF, BT_BV2DI, BT_V2DF)
 DEF_OV_TYPE (BT_OV_V2DF_DBL, BT_V2DF, BT_DBL)
 DEF_OV_TYPE (BT_OV_V2DF_DBLCONSTPTR, BT_V2DF, BT_DBLCONSTPTR)
@@ -698,6 +802,7 @@ DEF_OV_TYPE (BT_OV_V2DF_V2DF, BT_V2DF, BT_V2DF)
 DEF_OV_TYPE (BT_OV_V2DF_V2DF_BV2DI, BT_V2DF, BT_V2DF, BT_BV2DI)
 DEF_OV_TYPE (BT_OV_V2DF_V2DF_UCHAR, BT_V2DF, BT_V2DF, BT_UCHAR)
 DEF_OV_TYPE (BT_OV_V2DF_V2DF_UCHAR_UCHAR, BT_V2DF, BT_V2DF, BT_UCHAR, BT_UCHAR)
+DEF_OV_TYPE (BT_OV_V2DF_V2DF_UV16QI, BT_V2DF, BT_V2DF, BT_UV16QI)
 DEF_OV_TYPE (BT_OV_V2DF_V2DF_UV2DI, BT_V2DF, BT_V2DF, BT_UV2DI)
 DEF_OV_TYPE (BT_OV_V2DF_V2DF_UV2DI_DBLCONSTPTR_UCHAR, BT_V2DF, BT_V2DF, BT_UV2DI, BT_DBLCONSTPTR, BT_UCHAR)
 DEF_OV_TYPE (BT_OV_V2DF_V2DF_V2DF, BT_V2DF, BT_V2DF, BT_V2DF)
@@ -708,6 +813,7 @@ DEF_OV_TYPE (BT_OV_V2DF_V2DF_V2DF_ULONGLONG, BT_V2DF, BT_V2DF, BT_V2DF, BT_ULONG
 DEF_OV_TYPE (BT_OV_V2DF_V2DF_V2DF_UV16QI, BT_V2DF, BT_V2DF, BT_V2DF, BT_UV16QI)
 DEF_OV_TYPE (BT_OV_V2DF_V2DF_V2DF_UV2DI, BT_V2DF, BT_V2DF, BT_V2DF, BT_UV2DI)
 DEF_OV_TYPE (BT_OV_V2DF_V2DF_V2DF_V2DF, BT_V2DF, BT_V2DF, BT_V2DF, BT_V2DF)
+DEF_OV_TYPE (BT_OV_V2DF_V2DF_V2DF_V2DI, BT_V2DF, BT_V2DF, BT_V2DF, BT_V2DI)
 DEF_OV_TYPE (BT_OV_V2DF_V2DF_V2DI, BT_V2DF, BT_V2DF, BT_V2DI)
 DEF_OV_TYPE (BT_OV_V2DF_V2DI, BT_V2DF, BT_V2DI)
 DEF_OV_TYPE (BT_OV_V2DF_V2DI_INT, BT_V2DF, BT_V2DI, BT_INT)
@@ -720,7 +826,9 @@ DEF_OV_TYPE (BT_OV_V2DI_LONGLONG_INT, BT_V2DI, BT_LONGLONG, BT_INT)
 DEF_OV_TYPE (BT_OV_V2DI_LONGLONG_LONGLONG, BT_V2DI, BT_LONGLONG, BT_LONGLONG)
 DEF_OV_TYPE (BT_OV_V2DI_LONGLONG_V2DI_INT, BT_V2DI, BT_LONGLONG, BT_V2DI, BT_INT)
 DEF_OV_TYPE (BT_OV_V2DI_LONG_LONGLONGCONSTPTR, BT_V2DI, BT_LONG, BT_LONGLONGCONSTPTR)
+DEF_OV_TYPE (BT_OV_V2DI_UV2DI_V2DI_V2DI, BT_V2DI, BT_UV2DI, BT_V2DI, BT_V2DI)
 DEF_OV_TYPE (BT_OV_V2DI_V16QI, BT_V2DI, BT_V16QI)
+DEF_OV_TYPE (BT_OV_V2DI_V1TI_V1TI, BT_V2DI, BT_V1TI, BT_V1TI)
 DEF_OV_TYPE (BT_OV_V2DI_V2DF, BT_V2DI, BT_V2DF)
 DEF_OV_TYPE (BT_OV_V2DI_V2DI, BT_V2DI, BT_V2DI)
 DEF_OV_TYPE (BT_OV_V2DI_V2DI_BV2DI, BT_V2DI, BT_V2DI, BT_BV2DI)
@@ -730,6 +838,7 @@ DEF_OV_TYPE (BT_OV_V2DI_V2DI_UV16QI, BT_V2DI, BT_V2DI, BT_UV16QI)
 DEF_OV_TYPE (BT_OV_V2DI_V2DI_UV2DI, BT_V2DI, BT_V2DI, BT_UV2DI)
 DEF_OV_TYPE (BT_OV_V2DI_V2DI_UV2DI_LONGLONGCONSTPTR_UCHAR, BT_V2DI, BT_V2DI, BT_UV2DI, BT_LONGLONGCONSTPTR, BT_UCHAR)
 DEF_OV_TYPE (BT_OV_V2DI_V2DI_UV2DI_UCHAR, BT_V2DI, BT_V2DI, BT_UV2DI, BT_UCHAR)
+DEF_OV_TYPE (BT_OV_V2DI_V2DI_UV2DI_UV2DI, BT_V2DI, BT_V2DI, BT_UV2DI, BT_UV2DI)
 DEF_OV_TYPE (BT_OV_V2DI_V2DI_UV4SI, BT_V2DI, BT_V2DI, BT_UV4SI)
 DEF_OV_TYPE (BT_OV_V2DI_V2DI_UV8HI, BT_V2DI, BT_V2DI, BT_UV8HI)
 DEF_OV_TYPE (BT_OV_V2DI_V2DI_V2DI, BT_V2DI, BT_V2DI, BT_V2DI)
@@ -739,6 +848,8 @@ DEF_OV_TYPE (BT_OV_V2DI_V2DI_V2DI_UINT, BT_V2DI, BT_V2DI, BT_V2DI, BT_UINT)
 DEF_OV_TYPE (BT_OV_V2DI_V2DI_V2DI_ULONGLONG, BT_V2DI, BT_V2DI, BT_V2DI, BT_ULONGLONG)
 DEF_OV_TYPE (BT_OV_V2DI_V2DI_V2DI_UV16QI, BT_V2DI, BT_V2DI, BT_V2DI, BT_UV16QI)
 DEF_OV_TYPE (BT_OV_V2DI_V2DI_V2DI_UV2DI, BT_V2DI, BT_V2DI, BT_V2DI, BT_UV2DI)
+DEF_OV_TYPE (BT_OV_V2DI_V2DI_V2DI_V2DI, BT_V2DI, BT_V2DI, BT_V2DI, BT_V2DI)
+DEF_OV_TYPE (BT_OV_V2DI_V2DI_V2DI_V2DI_INT, BT_V2DI, BT_V2DI, BT_V2DI, BT_V2DI, BT_INT)
 DEF_OV_TYPE (BT_OV_V2DI_V4SI, BT_V2DI, BT_V4SI)
 DEF_OV_TYPE (BT_OV_V2DI_V4SI_V4SI, BT_V2DI, BT_V4SI, BT_V4SI)
 DEF_OV_TYPE (BT_OV_V2DI_V4SI_V4SI_V2DI, BT_V2DI, BT_V4SI, BT_V4SI, BT_V2DI)
@@ -756,6 +867,7 @@ DEF_OV_TYPE (BT_OV_V4SF_V4SF, BT_V4SF, BT_V4SF)
 DEF_OV_TYPE (BT_OV_V4SF_V4SF_BV4SI, BT_V4SF, BT_V4SF, BT_BV4SI)
 DEF_OV_TYPE (BT_OV_V4SF_V4SF_UCHAR, BT_V4SF, BT_V4SF, BT_UCHAR)
 DEF_OV_TYPE (BT_OV_V4SF_V4SF_UCHAR_UCHAR, BT_V4SF, BT_V4SF, BT_UCHAR, BT_UCHAR)
+DEF_OV_TYPE (BT_OV_V4SF_V4SF_UV16QI, BT_V4SF, BT_V4SF, BT_UV16QI)
 DEF_OV_TYPE (BT_OV_V4SF_V4SF_UV4SI, BT_V4SF, BT_V4SF, BT_UV4SI)
 DEF_OV_TYPE (BT_OV_V4SF_V4SF_UV4SI_FLTCONSTPTR_UCHAR, BT_V4SF, BT_V4SF, BT_UV4SI, BT_FLTCONSTPTR, BT_UCHAR)
 DEF_OV_TYPE (BT_OV_V4SF_V4SF_V4SF, BT_V4SF, BT_V4SF, BT_V4SF)
@@ -765,6 +877,7 @@ DEF_OV_TYPE (BT_OV_V4SF_V4SF_V4SF_ULONGLONG, BT_V4SF, BT_V4SF, BT_V4SF, BT_ULONG
 DEF_OV_TYPE (BT_OV_V4SF_V4SF_V4SF_UV16QI, BT_V4SF, BT_V4SF, BT_V4SF, BT_UV16QI)
 DEF_OV_TYPE (BT_OV_V4SF_V4SF_V4SF_UV4SI, BT_V4SF, BT_V4SF, BT_V4SF, BT_UV4SI)
 DEF_OV_TYPE (BT_OV_V4SF_V4SF_V4SF_V4SF, BT_V4SF, BT_V4SF, BT_V4SF, BT_V4SF)
+DEF_OV_TYPE (BT_OV_V4SF_V4SF_V4SF_V4SI, BT_V4SF, BT_V4SF, BT_V4SF, BT_V4SI)
 DEF_OV_TYPE (BT_OV_V4SF_V4SF_V4SI, BT_V4SF, BT_V4SF, BT_V4SI)
 DEF_OV_TYPE (BT_OV_V4SF_V4SI, BT_V4SF, BT_V4SI)
 DEF_OV_TYPE (BT_OV_V4SI_BV4SI_V4SI, BT_V4SI, BT_BV4SI, BT_V4SI)
@@ -799,6 +912,7 @@ DEF_OV_TYPE (BT_OV_V4SI_V4SI_V4SI_ULONGLONG, BT_V4SI, BT_V4SI, BT_V4SI, BT_ULONG
 DEF_OV_TYPE (BT_OV_V4SI_V4SI_V4SI_UV16QI, BT_V4SI, BT_V4SI, BT_V4SI, BT_UV16QI)
 DEF_OV_TYPE (BT_OV_V4SI_V4SI_V4SI_UV4SI, BT_V4SI, BT_V4SI, BT_V4SI, BT_UV4SI)
 DEF_OV_TYPE (BT_OV_V4SI_V4SI_V4SI_V4SI, BT_V4SI, BT_V4SI, BT_V4SI, BT_V4SI)
+DEF_OV_TYPE (BT_OV_V4SI_V4SI_V4SI_V4SI_INT, BT_V4SI, BT_V4SI, BT_V4SI, BT_V4SI, BT_INT)
 DEF_OV_TYPE (BT_OV_V4SI_V8HI, BT_V4SI, BT_V8HI)
 DEF_OV_TYPE (BT_OV_V4SI_V8HI_V8HI, BT_V4SI, BT_V8HI, BT_V8HI)
 DEF_OV_TYPE (BT_OV_V4SI_V8HI_V8HI_V4SI, BT_V4SI, BT_V8HI, BT_V8HI, BT_V4SI)
@@ -835,10 +949,12 @@ DEF_OV_TYPE (BT_OV_V8HI_V8HI_V8HI_ULONGLONG, BT_V8HI, BT_V8HI, BT_V8HI, BT_ULONG
 DEF_OV_TYPE (BT_OV_V8HI_V8HI_V8HI_UV16QI, BT_V8HI, BT_V8HI, BT_V8HI, BT_UV16QI)
 DEF_OV_TYPE (BT_OV_V8HI_V8HI_V8HI_UV8HI, BT_V8HI, BT_V8HI, BT_V8HI, BT_UV8HI)
 DEF_OV_TYPE (BT_OV_V8HI_V8HI_V8HI_V8HI, BT_V8HI, BT_V8HI, BT_V8HI, BT_V8HI)
+DEF_OV_TYPE (BT_OV_V8HI_V8HI_V8HI_V8HI_INT, BT_V8HI, BT_V8HI, BT_V8HI, BT_V8HI, BT_INT)
 DEF_OV_TYPE (BT_OV_VOID_BV2DI_UV2DI_ULONGLONGPTR_ULONGLONG, BT_VOID, BT_BV2DI, BT_UV2DI, BT_ULONGLONGPTR, BT_ULONGLONG)
 DEF_OV_TYPE (BT_OV_VOID_BV4SI_UV4SI_UINTPTR_ULONGLONG, BT_VOID, BT_BV4SI, BT_UV4SI, BT_UINTPTR, BT_ULONGLONG)
 DEF_OV_TYPE (BT_OV_VOID_UV16QI_LONG_UCHARPTR, BT_VOID, BT_UV16QI, BT_LONG, BT_UCHARPTR)
 DEF_OV_TYPE (BT_OV_VOID_UV16QI_UCHARPTR_UINT, BT_VOID, BT_UV16QI, BT_UCHARPTR, BT_UINT)
+DEF_OV_TYPE (BT_OV_VOID_UV1TI_LONG_UINT128PTR, BT_VOID, BT_UV1TI, BT_LONG, BT_UINT128PTR)
 DEF_OV_TYPE (BT_OV_VOID_UV2DI_LONG_ULONGLONGPTR, BT_VOID, BT_UV2DI, BT_LONG, BT_ULONGLONGPTR)
 DEF_OV_TYPE (BT_OV_VOID_UV2DI_ULONGLONGPTR_UINT, BT_VOID, BT_UV2DI, BT_ULONGLONGPTR, BT_UINT)
 DEF_OV_TYPE (BT_OV_VOID_UV2DI_UV2DI_ULONGLONGPTR_ULONGLONG, BT_VOID, BT_UV2DI, BT_UV2DI, BT_ULONGLONGPTR, BT_ULONGLONG)
@@ -849,6 +965,7 @@ DEF_OV_TYPE (BT_OV_VOID_UV8HI_LONG_USHORTPTR, BT_VOID, BT_UV8HI, BT_LONG, BT_USH
 DEF_OV_TYPE (BT_OV_VOID_UV8HI_USHORTPTR_UINT, BT_VOID, BT_UV8HI, BT_USHORTPTR, BT_UINT)
 DEF_OV_TYPE (BT_OV_VOID_V16QI_LONG_SCHARPTR, BT_VOID, BT_V16QI, BT_LONG, BT_SCHARPTR)
 DEF_OV_TYPE (BT_OV_VOID_V16QI_SCHARPTR_UINT, BT_VOID, BT_V16QI, BT_SCHARPTR, BT_UINT)
+DEF_OV_TYPE (BT_OV_VOID_V1TI_LONG_INT128PTR, BT_VOID, BT_V1TI, BT_LONG, BT_INT128PTR)
 DEF_OV_TYPE (BT_OV_VOID_V2DF_DBLPTR_UINT, BT_VOID, BT_V2DF, BT_DBLPTR, BT_UINT)
 DEF_OV_TYPE (BT_OV_VOID_V2DF_LONG_DBLPTR, BT_VOID, BT_V2DF, BT_LONG, BT_DBLPTR)
 DEF_OV_TYPE (BT_OV_VOID_V2DF_UV2DI_DBLPTR_ULONGLONG, BT_VOID, BT_V2DF, BT_UV2DI, BT_DBLPTR, BT_ULONGLONG)
diff --git a/gcc/config/s390/s390-builtins.def b/gcc/config/s390/s390-builtins.def
index 7f6190f..cee2326 100644
--- a/gcc/config/s390/s390-builtins.def
+++ b/gcc/config/s390/s390-builtins.def
@@ -1,5 +1,5 @@
 /* Builtin definitions for IBM S/390 and zSeries
-   Copyright (C) 2015-2024 Free Software Foundation, Inc.
+   Copyright (C) 2015-2025 Free Software Foundation, Inc.
 
    Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
 
@@ -300,6 +300,8 @@
 #define B_VXE2  (1 << 4)  /* Builtins requiring the z15 vector extensions.  */
 #define B_DEP   (1 << 5)  /* Builtin has been deprecated and a warning should be issued.  */
 #define B_NNPA  (1 << 6)  /* Builtins requiring the NNPA Facility.  */
+#define B_VXE3  (1 << 7)  /* Builtins requiring the z17 vector extensions.  */
+#define B_Z17   (1 << 8) /* Builtins requiring z17.  */
 
 /* B_DEF defines a standard (not overloaded) builtin
    B_DEF (<builtin name>, <RTL expander name>, <function attributes>, <builtin flags>, <operand flags, see above>, <fntype>)
@@ -356,6 +358,8 @@ OB_DEF_VAR (s390_vec_xl_s32,            MAX,                0,
 OB_DEF_VAR (s390_vec_xl_u32,            MAX,                0,                  O1_LIT,             BT_OV_UV4SI_LONG_UINTCONSTPTR)           /* vl */
 OB_DEF_VAR (s390_vec_xl_s64,            MAX,                0,                  O1_LIT,             BT_OV_V2DI_LONG_LONGLONGCONSTPTR)        /* vl */
 OB_DEF_VAR (s390_vec_xl_u64,            MAX,                0,                  O1_LIT,             BT_OV_UV2DI_LONG_ULONGLONGCONSTPTR)      /* vl */
+OB_DEF_VAR (s390_vec_xl_s128,           MAX,                0,                  O1_LIT,             BT_OV_V1TI_LONG_INT128CONSTPTR)          /* vl */
+OB_DEF_VAR (s390_vec_xl_u128,           MAX,                0,                  O1_LIT,             BT_OV_UV1TI_LONG_UINT128CONSTPTR)        /* vl */
 OB_DEF_VAR (s390_vec_xl_flt,            MAX,                0,                  O1_LIT,             BT_OV_V4SF_LONG_FLTCONSTPTR)             /* vl */
 OB_DEF_VAR (s390_vec_xl_dbl,            MAX,                0,                  O1_LIT,             BT_OV_V2DF_LONG_DBLCONSTPTR)             /* vl */
 
@@ -389,6 +393,8 @@ OB_DEF_VAR (s390_vec_splats_s32,        s390_vlrepf,        0,
 OB_DEF_VAR (s390_vec_splats_u32,        s390_vlrepf,        0,                  0,                  BT_OV_UV4SI_UINT)
 OB_DEF_VAR (s390_vec_splats_s64,        s390_vlrepg,        0,                  0,                  BT_OV_V2DI_LONGLONG)
 OB_DEF_VAR (s390_vec_splats_u64,        s390_vlrepg,        0,                  0,                  BT_OV_UV2DI_ULONGLONG)
+OB_DEF_VAR (s390_vec_splats_s128,       s390_vlrepq_emu,    0,                  0,                  BT_OV_V1TI_INT128)
+OB_DEF_VAR (s390_vec_splats_u128,       s390_vlrepq_emu,    0,                  0,                  BT_OV_UV1TI_UINT128)
 OB_DEF_VAR (s390_vec_splats_flt,        s390_vlrepf_flt,    B_VXE,              0,                  BT_OV_V4SF_FLT)                          /* vlrepf */
 OB_DEF_VAR (s390_vec_splats_dbl,        s390_vlrepg_dbl,    0,                  0,                  BT_OV_V2DF_DBL)                          /* vlrepg */
 
@@ -398,6 +404,7 @@ B_DEF      (s390_vlrepf,                vec_splatsv4si,     0,
 B_DEF      (s390_vlrepf_flt,            vec_splatsv4sf,     0,                  B_INT | B_VXE,      0,                  BT_FN_V4SF_FLT)
 B_DEF      (s390_vlrepg,                vec_splatsv2di,     0,                  B_VX,               0,                  BT_FN_UV2DI_ULONGLONG)
 B_DEF      (s390_vlrepg_dbl,            vec_splatsv2df,     0,                  B_INT | B_VX,       0,                  BT_FN_V2DF_DBL)
+B_DEF      (s390_vlrepq_emu,            movti,              0,                  B_VX,               0,                  BT_FN_UINT128_UINT128)
 B_DEF      (s390_vrepib,                vec_splatsv16qi,    0,                  B_VX,               O1_U8,              BT_FN_V16QI_UCHAR)
 B_DEF      (s390_vrepih,                vec_splatsv8hi,     0,                  B_VX,               O1_S16,             BT_FN_V8HI_SHORT)
 B_DEF      (s390_vrepif,                vec_splatsv4si,     0,                  B_VX,               O1_S16,             BT_FN_V4SI_SHORT)
@@ -507,6 +514,8 @@ OB_DEF_VAR (s390_vec_load_bndry_s32,    s390_vlbb,          0,
 OB_DEF_VAR (s390_vec_load_bndry_u32,    s390_vlbb,          0,                  O2_U16,             BT_OV_UV4SI_UINTCONSTPTR_USHORT)
 OB_DEF_VAR (s390_vec_load_bndry_s64,    s390_vlbb,          0,                  O2_U16,             BT_OV_V2DI_LONGLONGCONSTPTR_USHORT)
 OB_DEF_VAR (s390_vec_load_bndry_u64,    s390_vlbb,          0,                  O2_U16,             BT_OV_UV2DI_ULONGLONGCONSTPTR_USHORT)
+OB_DEF_VAR (s390_vec_load_bndry_s128,   s390_vlbb,          0,                  O2_U16,             BT_OV_V1TI_INT128CONSTPTR_USHORT)
+OB_DEF_VAR (s390_vec_load_bndry_u128,   s390_vlbb,          0,                  O2_U16,             BT_OV_UV1TI_UINT128CONSTPTR_USHORT)
 OB_DEF_VAR (s390_vec_load_bndry_flt,    s390_vlbb,          B_VXE,              O2_U16,             BT_OV_V4SF_FLTCONSTPTR_USHORT)
 OB_DEF_VAR (s390_vec_load_bndry_dbl,    s390_vlbb,          0,                  O2_U16,             BT_OV_V2DF_DBLCONSTPTR_USHORT)
 
@@ -520,14 +529,18 @@ OB_DEF_VAR (s390_vec_load_pair_u64,     MAX,                0,
 OB_DEF     (s390_vec_load_len,          s390_vec_load_len_s8,s390_vec_load_len_dbl,B_VX,            BT_FN_OV4SI_INTCONSTPTR_UINT)
 OB_DEF_VAR (s390_vec_load_len_s8,       s390_vll,           0,                  0,                  BT_OV_V16QI_SCHARCONSTPTR_UINT)
 OB_DEF_VAR (s390_vec_load_len_u8,       s390_vll,           0,                  0,                  BT_OV_UV16QI_UCHARCONSTPTR_UINT)
-OB_DEF_VAR (s390_vec_load_len_s16,      s390_vll,           0,                  0,                  BT_OV_V8HI_SHORTCONSTPTR_UINT)
-OB_DEF_VAR (s390_vec_load_len_u16,      s390_vll,           0,                  0,                  BT_OV_UV8HI_USHORTCONSTPTR_UINT)
-OB_DEF_VAR (s390_vec_load_len_s32,      s390_vll,           0,                  0,                  BT_OV_V4SI_INTCONSTPTR_UINT)
-OB_DEF_VAR (s390_vec_load_len_u32,      s390_vll,           0,                  0,                  BT_OV_UV4SI_UINTCONSTPTR_UINT)
-OB_DEF_VAR (s390_vec_load_len_s64,      s390_vll,           0,                  0,                  BT_OV_V2DI_LONGLONGCONSTPTR_UINT)
-OB_DEF_VAR (s390_vec_load_len_u64,      s390_vll,           0,                  0,                  BT_OV_UV2DI_ULONGLONGCONSTPTR_UINT)
-OB_DEF_VAR (s390_vec_load_len_flt,      s390_vll,           B_VXE,              0,                  BT_OV_V4SF_FLTCONSTPTR_UINT)
-OB_DEF_VAR (s390_vec_load_len_dbl,      s390_vll,           0,                  0,                  BT_OV_V2DF_DBLCONSTPTR_UINT)
+OB_DEF_VAR (s390_vec_load_len_s16,      s390_vll,           B_DEP,              0,                  BT_OV_V8HI_SHORTCONSTPTR_UINT)
+OB_DEF_VAR (s390_vec_load_len_u16,      s390_vll,           B_DEP,              0,                  BT_OV_UV8HI_USHORTCONSTPTR_UINT)
+OB_DEF_VAR (s390_vec_load_len_s32,      s390_vll,           B_DEP,              0,                  BT_OV_V4SI_INTCONSTPTR_UINT)
+OB_DEF_VAR (s390_vec_load_len_u32,      s390_vll,           B_DEP,              0,                  BT_OV_UV4SI_UINTCONSTPTR_UINT)
+OB_DEF_VAR (s390_vec_load_len_s64,      s390_vll,           B_DEP,              0,                  BT_OV_V2DI_LONGLONGCONSTPTR_UINT)
+OB_DEF_VAR (s390_vec_load_len_u64,      s390_vll,           B_DEP,              0,                  BT_OV_UV2DI_ULONGLONGCONSTPTR_UINT)
+OB_DEF_VAR (s390_vec_load_len_flt,      s390_vll,           B_DEP | B_VXE,      0,                  BT_OV_V4SF_FLTCONSTPTR_UINT)
+OB_DEF_VAR (s390_vec_load_len_dbl,      s390_vll,           B_DEP,              0,                  BT_OV_V2DF_DBLCONSTPTR_UINT)
+
+OB_DEF     (s390_vec_load_len_r,        s390_vec_load_len_r_s8,s390_vec_load_len_r_u8,B_VXE,        BT_FN_OV4SI_INTCONSTPTR_UINT)
+OB_DEF_VAR (s390_vec_load_len_r_s8,     s390_vlrlr,         0,                  0,                  BT_OV_V16QI_SCHARCONSTPTR_UINT)
+OB_DEF_VAR (s390_vec_load_len_r_u8,     s390_vlrlr,         0,                  0,                  BT_OV_UV16QI_UCHARCONSTPTR_UINT)
 
 B_DEF      (s390_vll,                   vllv16qi,           0,                  B_VX,               0,                  BT_FN_V16QI_UINT_VOIDCONSTPTR)
 B_DEF      (s390_vlrlr,                 vlrlrv16qi,         0,                  B_VXE,              0,                  BT_FN_V16QI_UINT_VOIDCONSTPTR)
@@ -578,7 +591,7 @@ B_DEF      (s390_vmrlf_flt,             vec_mergelv4sf,     0,
 B_DEF      (s390_vmrlg,                 vec_mergelv2di,     0,                  B_VX,               0,                  BT_FN_UV2DI_UV2DI_UV2DI)
 B_DEF      (s390_vmrlg_dbl,             vec_mergelv2df,     0,                  B_INT | B_VX,       0,                  BT_FN_V2DF_V2DF_V2DF)
 
-OB_DEF     (s390_vec_pack,              s390_vec_pack_s16,  s390_vec_pack_b64,  B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
+OB_DEF     (s390_vec_pack,              s390_vec_pack_s16,  s390_vec_pack_b128, B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
 OB_DEF_VAR (s390_vec_pack_s16,          s390_vpkh,          0,                  0,                  BT_OV_V16QI_V8HI_V8HI)
 OB_DEF_VAR (s390_vec_pack_u16,          s390_vpkh,          0,                  0,                  BT_OV_UV16QI_UV8HI_UV8HI)
 OB_DEF_VAR (s390_vec_pack_b16,          s390_vpkh,          0,                  0,                  BT_OV_BV16QI_BV8HI_BV8HI)
@@ -588,6 +601,9 @@ OB_DEF_VAR (s390_vec_pack_b32,          s390_vpkf,          0,
 OB_DEF_VAR (s390_vec_pack_s64,          s390_vpkg,          0,                  0,                  BT_OV_V4SI_V2DI_V2DI)
 OB_DEF_VAR (s390_vec_pack_u64,          s390_vpkg,          0,                  0,                  BT_OV_UV4SI_UV2DI_UV2DI)
 OB_DEF_VAR (s390_vec_pack_b64,          s390_vpkg,          0,                  0,                  BT_OV_BV4SI_BV2DI_BV2DI)
+OB_DEF_VAR (s390_vec_pack_s128,         s390_vpkg,          0,                  0,                  BT_OV_V2DI_V1TI_V1TI)
+OB_DEF_VAR (s390_vec_pack_u128,         s390_vpkg,          0,                  0,                  BT_OV_UV2DI_UV1TI_UV1TI)
+OB_DEF_VAR (s390_vec_pack_b128,         s390_vpkg,          0,                  0,                  BT_OV_BV2DI_BV1TI_BV1TI)
 
 B_DEF      (s390_vpkh,                  vec_packv8hi,       0,                  B_VX,               0,                  BT_FN_UV16QI_UV8HI_UV8HI)
 B_DEF      (s390_vpkf,                  vec_packv4si,       0,                  B_VX,               0,                  BT_FN_UV8HI_UV4SI_UV4SI)
@@ -653,6 +669,9 @@ OB_DEF_VAR (s390_vec_perm_u32,          s390_vperm,         0,
 OB_DEF_VAR (s390_vec_perm_s64,          s390_vperm,         0,                  0,                  BT_OV_V2DI_V2DI_V2DI_UV16QI)
 OB_DEF_VAR (s390_vec_perm_b64,          s390_vperm,         0,                  0,                  BT_OV_BV2DI_BV2DI_BV2DI_UV16QI)
 OB_DEF_VAR (s390_vec_perm_u64,          s390_vperm,         0,                  0,                  BT_OV_UV2DI_UV2DI_UV2DI_UV16QI)
+OB_DEF_VAR (s390_vec_perm_s128,         s390_vperm,         0,                  0,                  BT_OV_V1TI_V1TI_V1TI_UV16QI)
+OB_DEF_VAR (s390_vec_perm_b128,         s390_vperm,         0,                  0,                  BT_OV_BV1TI_BV1TI_BV1TI_UV16QI)
+OB_DEF_VAR (s390_vec_perm_u128,         s390_vperm,         0,                  0,                  BT_OV_UV1TI_UV1TI_UV1TI_UV16QI)
 OB_DEF_VAR (s390_vec_perm_flt,          s390_vperm,         B_VXE,              0,                  BT_OV_V4SF_V4SF_V4SF_UV16QI)
 OB_DEF_VAR (s390_vec_perm_dbl,          s390_vperm,         0,                  0,                  BT_OV_V2DF_V2DF_V2DF_UV16QI)
 
@@ -728,6 +747,12 @@ OB_DEF_VAR (s390_vec_sel_s64_a,         s390_vselg,         0,
 OB_DEF_VAR (s390_vec_sel_s64_b,         s390_vselg,         0,                  0,                  BT_OV_V2DI_V2DI_V2DI_BV2DI)
 OB_DEF_VAR (s390_vec_sel_u64_a,         s390_vselg,         0,                  0,                  BT_OV_UV2DI_UV2DI_UV2DI_UV2DI)
 OB_DEF_VAR (s390_vec_sel_u64_b,         s390_vselg,         0,                  0,                  BT_OV_UV2DI_UV2DI_UV2DI_BV2DI)
+OB_DEF_VAR (s390_vec_sel_b128_a,        s390_vselq,         0,                  0,                  BT_OV_BV1TI_BV1TI_BV1TI_UV1TI)
+OB_DEF_VAR (s390_vec_sel_b128_b,        s390_vselq,         0,                  0,                  BT_OV_BV1TI_BV1TI_BV1TI_BV1TI)
+OB_DEF_VAR (s390_vec_sel_s128_a,        s390_vselq,         0,                  0,                  BT_OV_V1TI_V1TI_V1TI_UV1TI)
+OB_DEF_VAR (s390_vec_sel_s128_b,        s390_vselq,         0,                  0,                  BT_OV_V1TI_V1TI_V1TI_BV1TI)
+OB_DEF_VAR (s390_vec_sel_u128_a,        s390_vselq,         0,                  0,                  BT_OV_UV1TI_UV1TI_UV1TI_UV1TI)
+OB_DEF_VAR (s390_vec_sel_u128_b,        s390_vselq,         0,                  0,                  BT_OV_UV1TI_UV1TI_UV1TI_BV1TI)
 OB_DEF_VAR (s390_vec_sel_flt_a,         s390_vself_flt,     B_VXE,              0,                  BT_OV_V4SF_V4SF_V4SF_UV4SI)
 OB_DEF_VAR (s390_vec_sel_flt_b,         s390_vself_flt,     B_VXE,              0,                  BT_OV_V4SF_V4SF_V4SF_BV4SI)
 OB_DEF_VAR (s390_vec_sel_dbl_a,         s390_vselg_dbl,     0,                  0,                  BT_OV_V2DF_V2DF_V2DF_UV2DI)
@@ -737,6 +762,7 @@ B_DEF      (s390_vselb,                 vselv16qi,          0,
 B_DEF      (s390_vselh,                 vselv8hi,           0,                  B_VX,               0,                  BT_FN_UV8HI_UV8HI_UV8HI_UV8HI)
 B_DEF      (s390_vself,                 vselv4si,           0,                  B_VX,               0,                  BT_FN_UV4SI_UV4SI_UV4SI_UV4SI)
 B_DEF      (s390_vselg,                 vselv2di,           0,                  B_VX,               0,                  BT_FN_UV2DI_UV2DI_UV2DI_UV2DI)
+B_DEF      (s390_vselq,                 vselv1ti,           0,                  B_VX,               0,                  BT_FN_UV1TI_UV1TI_UV1TI_UV1TI)
 B_DEF      (s390_vself_flt,             vselv4sf,           0,                  B_VXE,              0,                  BT_FN_V4SF_V4SF_V4SF_UV4SI)
 B_DEF      (s390_vselg_dbl,             vselv2df,           0,                  B_VX,               0,                  BT_FN_V2DF_V2DF_V2DF_UV2DI)
 
@@ -758,6 +784,8 @@ OB_DEF_VAR (s390_vec_xst_s32,           MAX,                0,
 OB_DEF_VAR (s390_vec_xst_u32,           MAX,                0,                  O2_LIT,             BT_OV_VOID_UV4SI_LONG_UINTPTR)           /* vst */
 OB_DEF_VAR (s390_vec_xst_s64,           MAX,                0,                  O2_LIT,             BT_OV_VOID_V2DI_LONG_LONGLONGPTR)        /* vst */
 OB_DEF_VAR (s390_vec_xst_u64,           MAX,                0,                  O2_LIT,             BT_OV_VOID_UV2DI_LONG_ULONGLONGPTR)      /* vst */
+OB_DEF_VAR (s390_vec_xst_s128,          MAX,                0,                  O2_LIT,             BT_OV_VOID_V1TI_LONG_INT128PTR)          /* vst */
+OB_DEF_VAR (s390_vec_xst_u128,          MAX,                0,                  O2_LIT,             BT_OV_VOID_UV1TI_LONG_UINT128PTR)        /* vst */
 OB_DEF_VAR (s390_vec_xst_flt,           MAX,                0,                  O2_LIT,             BT_OV_VOID_V4SF_LONG_FLTPTR)             /* vst */
 OB_DEF_VAR (s390_vec_xst_dbl,           MAX,                0,                  O2_LIT,             BT_OV_VOID_V2DF_LONG_DBLPTR)             /* vst */
 
@@ -784,22 +812,27 @@ OB_DEF_VAR (s390_vec_xstw4_flt,         MAX,                B_VXE,
 OB_DEF     (s390_vec_store_len,         s390_vec_store_len_s8,s390_vec_store_len_dbl,B_VX,          BT_FN_VOID_OV4SI_VOIDPTR_UINT)
 OB_DEF_VAR (s390_vec_store_len_s8,      s390_vstl,          0,                  0,                  BT_OV_VOID_V16QI_SCHARPTR_UINT)
 OB_DEF_VAR (s390_vec_store_len_u8,      s390_vstl,          0,                  0,                  BT_OV_VOID_UV16QI_UCHARPTR_UINT)
-OB_DEF_VAR (s390_vec_store_len_s16,     s390_vstl,          0,                  0,                  BT_OV_VOID_V8HI_SHORTPTR_UINT)
-OB_DEF_VAR (s390_vec_store_len_u16,     s390_vstl,          0,                  0,                  BT_OV_VOID_UV8HI_USHORTPTR_UINT)
-OB_DEF_VAR (s390_vec_store_len_s32,     s390_vstl,          0,                  0,                  BT_OV_VOID_V4SI_INTPTR_UINT)
-OB_DEF_VAR (s390_vec_store_len_u32,     s390_vstl,          0,                  0,                  BT_OV_VOID_UV4SI_UINTPTR_UINT)
-OB_DEF_VAR (s390_vec_store_len_s64,     s390_vstl,          0,                  0,                  BT_OV_VOID_V2DI_LONGLONGPTR_UINT)
-OB_DEF_VAR (s390_vec_store_len_u64,     s390_vstl,          0,                  0,                  BT_OV_VOID_UV2DI_ULONGLONGPTR_UINT)
-OB_DEF_VAR (s390_vec_store_len_flt,     s390_vstl,          B_VXE,              0,                  BT_OV_VOID_V4SF_FLTPTR_UINT)
-OB_DEF_VAR (s390_vec_store_len_dbl,     s390_vstl,          0,                  0,                  BT_OV_VOID_V2DF_DBLPTR_UINT)
+OB_DEF_VAR (s390_vec_store_len_s16,     s390_vstl,          B_DEP,              0,                  BT_OV_VOID_V8HI_SHORTPTR_UINT)
+OB_DEF_VAR (s390_vec_store_len_u16,     s390_vstl,          B_DEP,              0,                  BT_OV_VOID_UV8HI_USHORTPTR_UINT)
+OB_DEF_VAR (s390_vec_store_len_s32,     s390_vstl,          B_DEP,              0,                  BT_OV_VOID_V4SI_INTPTR_UINT)
+OB_DEF_VAR (s390_vec_store_len_u32,     s390_vstl,          B_DEP,              0,                  BT_OV_VOID_UV4SI_UINTPTR_UINT)
+OB_DEF_VAR (s390_vec_store_len_s64,     s390_vstl,          B_DEP,              0,                  BT_OV_VOID_V2DI_LONGLONGPTR_UINT)
+OB_DEF_VAR (s390_vec_store_len_u64,     s390_vstl,          B_DEP,              0,                  BT_OV_VOID_UV2DI_ULONGLONGPTR_UINT)
+OB_DEF_VAR (s390_vec_store_len_flt,     s390_vstl,          B_DEP | B_VXE,      0,                  BT_OV_VOID_V4SF_FLTPTR_UINT)
+OB_DEF_VAR (s390_vec_store_len_dbl,     s390_vstl,          B_DEP,              0,                  BT_OV_VOID_V2DF_DBLPTR_UINT)
+
+OB_DEF     (s390_vec_store_len_r,       s390_vec_store_len_r_s8,s390_vec_store_len_r_u8,B_VXE,      BT_FN_VOID_OV4SI_VOIDPTR_UINT)
+OB_DEF_VAR (s390_vec_store_len_r_s8,    s390_vstrlr,        0,                  0,                  BT_OV_VOID_V16QI_SCHARPTR_UINT)
+OB_DEF_VAR (s390_vec_store_len_r_u8,    s390_vstrlr,        0,                  0,                  BT_OV_VOID_UV16QI_UCHARPTR_UINT)
 
 B_DEF      (s390_vstl,                  vstlv16qi,          0,                  B_VX,               0,                  BT_FN_VOID_V16QI_UINT_VOIDPTR)
 B_DEF      (s390_vstrlr,                vstrlrv16qi,        0,                  B_VXE,              0,                  BT_FN_VOID_V16QI_UINT_VOIDPTR)
 
-B_DEF      (s390_vec_bperm_u128,        vbpermv16qi,        0,                  B_VXE,              0,                  BT_FN_UV2DI_UV16QI_UV16QI)               /* vbperm */
-B_DEF      (s390_vbperm,                vbpermv16qi,        0,                  B_VXE,              0,                  BT_FN_UV2DI_UV16QI_UV16QI)
+B_DEF      (s390_vec_bperm_u128,        vbpermv16qi,        0,                  B_DEP | B_VXE,      0,                  BT_FN_UV2DI_UV16QI_UV16QI)
+B_DEF      (s390_vec_bperm,             vbpermv16qi,        0,                  B_VXE,              0,                  BT_FN_UV2DI_UV1TI_UV16QI)               /* vbperm */
+B_DEF      (s390_vbperm,                vbpermv16qi,        0,                  B_VXE,              0,                  BT_FN_UV2DI_UV16QI_UV16QI)              /* vbperm */
 
-OB_DEF     (s390_vec_unpackh,           s390_vec_unpackh_s8,s390_vec_unpackh_u32,B_VX,              BT_FN_OV4SI_OV4SI)
+OB_DEF     (s390_vec_unpackh,           s390_vec_unpackh_s8,s390_vec_unpackh_u64,B_VX,              BT_FN_OV4SI_OV4SI)
 OB_DEF_VAR (s390_vec_unpackh_s8,        s390_vuphb,         0,                  0,                  BT_OV_V8HI_V16QI)
 OB_DEF_VAR (s390_vec_unpackh_b8,        s390_vuphb,         0,                  0,                  BT_OV_BV8HI_BV16QI)
 OB_DEF_VAR (s390_vec_unpackh_u8,        s390_vuplhb,        0,                  0,                  BT_OV_UV8HI_UV16QI)
@@ -809,6 +842,9 @@ OB_DEF_VAR (s390_vec_unpackh_u16,       s390_vuplhh,        0,
 OB_DEF_VAR (s390_vec_unpackh_s32,       s390_vuphf,         0,                  0,                  BT_OV_V2DI_V4SI)
 OB_DEF_VAR (s390_vec_unpackh_b32,       s390_vuphf,         0,                  0,                  BT_OV_BV2DI_BV4SI)
 OB_DEF_VAR (s390_vec_unpackh_u32,       s390_vuplhf,        0,                  0,                  BT_OV_UV2DI_UV4SI)
+OB_DEF_VAR (s390_vec_unpackh_s64,       s390_vuphg,         B_VXE3,             0,                  BT_OV_V1TI_V2DI)
+OB_DEF_VAR (s390_vec_unpackh_b64,       s390_vuphg,         B_VXE3,             0,                  BT_OV_BV1TI_BV2DI)
+OB_DEF_VAR (s390_vec_unpackh_u64,       s390_vuplhg,        B_VXE3,             0,                  BT_OV_UV1TI_UV2DI)
 
 B_DEF      (s390_vuphb,                 vec_unpackhv16qi,   0,                  B_VX,               0,                  BT_FN_V8HI_V16QI)
 B_DEF      (s390_vuplhb,                vec_unpackh_lv16qi, 0,                  B_VX,               0,                  BT_FN_UV8HI_UV16QI)
@@ -816,8 +852,10 @@ B_DEF      (s390_vuphh,                 vec_unpackhv8hi,    0,
 B_DEF      (s390_vuplhh,                vec_unpackh_lv8hi,  0,                  B_VX,               0,                  BT_FN_UV4SI_UV8HI)
 B_DEF      (s390_vuphf,                 vec_unpackhv4si,    0,                  B_VX,               0,                  BT_FN_V2DI_V4SI)
 B_DEF      (s390_vuplhf,                vec_unpackh_lv4si,  0,                  B_VX,               0,                  BT_FN_UV2DI_UV4SI)
+B_DEF      (s390_vuphg,                 vec_unpackhv2di,    0,                  B_VXE3,             0,                  BT_FN_INT128_V2DI)
+B_DEF      (s390_vuplhg,                vec_unpackh_lv2di,  0,                  B_VXE3,             0,                  BT_FN_UINT128_UV2DI)
 
-OB_DEF     (s390_vec_unpackl,           s390_vec_unpackl_s8,s390_vec_unpackl_u32,B_VX,              BT_FN_OV4SI_OV4SI)
+OB_DEF     (s390_vec_unpackl,           s390_vec_unpackl_s8,s390_vec_unpackl_u64,B_VX,              BT_FN_OV4SI_OV4SI)
 OB_DEF_VAR (s390_vec_unpackl_s8,        s390_vuplb,         0,                  0,                  BT_OV_V8HI_V16QI)
 OB_DEF_VAR (s390_vec_unpackl_b8,        s390_vuplb,         0,                  0,                  BT_OV_BV8HI_BV16QI)
 OB_DEF_VAR (s390_vec_unpackl_u8,        s390_vupllb,        0,                  0,                  BT_OV_UV8HI_UV16QI)
@@ -827,6 +865,9 @@ OB_DEF_VAR (s390_vec_unpackl_u16,       s390_vupllh,        0,
 OB_DEF_VAR (s390_vec_unpackl_s32,       s390_vuplf,         0,                  0,                  BT_OV_V2DI_V4SI)
 OB_DEF_VAR (s390_vec_unpackl_b32,       s390_vuplf,         0,                  0,                  BT_OV_BV2DI_BV4SI)
 OB_DEF_VAR (s390_vec_unpackl_u32,       s390_vupllf,        0,                  0,                  BT_OV_UV2DI_UV4SI)
+OB_DEF_VAR (s390_vec_unpackl_s64,       s390_vuplg,         B_VXE3,             0,                  BT_OV_V1TI_V2DI)
+OB_DEF_VAR (s390_vec_unpackl_b64,       s390_vuplg,         B_VXE3,             0,                  BT_OV_BV1TI_BV2DI)
+OB_DEF_VAR (s390_vec_unpackl_u64,       s390_vupllg,        B_VXE3,             0,                  BT_OV_UV1TI_UV2DI)
 
 B_DEF      (s390_vuplb,                 vec_unpacklv16qi,   0,                  B_VX,               0,                  BT_FN_V8HI_V16QI)
 B_DEF      (s390_vupllb,                vec_unpackl_lv16qi, 0,                  B_VX,               0,                  BT_FN_UV8HI_UV16QI)
@@ -834,22 +875,29 @@ B_DEF      (s390_vuplhw,                vec_unpacklv8hi,    0,
 B_DEF      (s390_vupllh,                vec_unpackl_lv8hi,  0,                  B_VX,               0,                  BT_FN_UV4SI_UV8HI)
 B_DEF      (s390_vuplf,                 vec_unpacklv4si,    0,                  B_VX,               0,                  BT_FN_V2DI_V4SI)
 B_DEF      (s390_vupllf,                vec_unpackl_lv4si,  0,                  B_VX,               0,                  BT_FN_UV2DI_UV4SI)
+B_DEF      (s390_vuplg,                 vec_unpacklv2di,    0,                  B_VXE3,             0,                  BT_FN_INT128_V2DI)
+B_DEF      (s390_vupllg,                vec_unpackl_lv2di,  0,                  B_VXE3,             0,                  BT_FN_UINT128_UV2DI)
 
-OB_DEF     (s390_vec_addc,              s390_vec_addc_u8,   s390_vec_addc_u64,  B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
+OB_DEF     (s390_vec_addc,              s390_vec_addc_u8,   s390_vec_addc_u128, B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
 OB_DEF_VAR (s390_vec_addc_u8,           s390_vaccb,         0,                  0,                  BT_OV_UV16QI_UV16QI_UV16QI)
 OB_DEF_VAR (s390_vec_addc_u16,          s390_vacch,         0,                  0,                  BT_OV_UV8HI_UV8HI_UV8HI)
 OB_DEF_VAR (s390_vec_addc_u32,          s390_vaccf,         0,                  0,                  BT_OV_UV4SI_UV4SI_UV4SI)
 OB_DEF_VAR (s390_vec_addc_u64,          s390_vaccg,         0,                  0,                  BT_OV_UV2DI_UV2DI_UV2DI)
+OB_DEF_VAR (s390_vec_addc_u128,         s390_vaccq,         0,                  0,                  BT_OV_UV1TI_UV1TI_UV1TI)
+
+B_DEF      (s390_vec_adde,              vacq,               0,                  B_VX,               0,                  BT_FN_UV1TI_UV1TI_UV1TI_UV1TI)
+
+B_DEF      (s390_vec_addec,             vacccq,             0,                  B_VX,               0,                  BT_FN_UV1TI_UV1TI_UV1TI_UV1TI)
 
 B_DEF      (s390_vaccb,                 vaccb_v16qi,        0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI)
 B_DEF      (s390_vacch,                 vacch_v8hi,         0,                  B_VX,               0,                  BT_FN_UV8HI_UV8HI_UV8HI)
 B_DEF      (s390_vaccf,                 vaccf_v4si,         0,                  B_VX,               0,                  BT_FN_UV4SI_UV4SI_UV4SI)
 B_DEF      (s390_vaccg,                 vaccg_v2di,         0,                  B_VX,               0,                  BT_FN_UV2DI_UV2DI_UV2DI)
 
-B_DEF      (s390_vec_add_u128,          addti3,             0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI)
-B_DEF      (s390_vec_addc_u128,         vaccq_ti,           0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI)
-B_DEF      (s390_vec_adde_u128,         vacq,               0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI_UV16QI)
-B_DEF      (s390_vec_addec_u128,        vacccq,             0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI_UV16QI)
+B_DEF      (s390_vec_add_u128,          addti3,             0,                  B_DEP | B_VX,       0,                  BT_FN_UV16QI_UV16QI_UV16QI)
+B_DEF      (s390_vec_addc_u128,         vaccq_ti,           0,                  B_DEP | B_VX,       0,                  BT_FN_UV16QI_UV16QI_UV16QI)
+B_DEF      (s390_vec_adde_u128,         vacq,               0,                  B_DEP | B_VX,       0,                  BT_FN_UV16QI_UV16QI_UV16QI_UV16QI)
+B_DEF      (s390_vec_addec_u128,        vacccq,             0,                  B_DEP | B_VX,       0,                  BT_FN_UV16QI_UV16QI_UV16QI_UV16QI)
 
 B_DEF      (s390_vaq,                   addti3,             0,                  B_VX,               0,                  BT_FN_INT128_INT128_INT128)
 B_DEF      (s390_vaccq,                 vaccq_ti,           0,                  B_VX,               0,                  BT_FN_UINT128_UINT128_UINT128)
@@ -885,6 +933,9 @@ OB_DEF_VAR (s390_vec_and_s64_c,         s390_vn,            B_DEP,
 OB_DEF_VAR (s390_vec_and_u64_a,         s390_vn,            B_DEP,              0,                  BT_OV_UV2DI_BV2DI_UV2DI)
 OB_DEF_VAR (s390_vec_and_u64_b,         s390_vn,            0,                  0,                  BT_OV_UV2DI_UV2DI_UV2DI)
 OB_DEF_VAR (s390_vec_and_u64_c,         s390_vn,            B_DEP,              0,                  BT_OV_UV2DI_UV2DI_BV2DI)
+OB_DEF_VAR (s390_vec_and_b128,          s390_vn,            0,                  0,                  BT_OV_BV1TI_BV1TI_BV1TI)
+OB_DEF_VAR (s390_vec_and_s128,          s390_vn,            0,                  0,                  BT_OV_V1TI_V1TI_V1TI)
+OB_DEF_VAR (s390_vec_and_u128,          s390_vn,            0,                  0,                  BT_OV_UV1TI_UV1TI_UV1TI)
 OB_DEF_VAR (s390_vec_and_flt_a,         s390_vn,            B_VXE | B_DEP,      0,                  BT_OV_V4SF_BV4SI_V4SF)
 OB_DEF_VAR (s390_vec_and_flt_b,         s390_vn,            B_VXE,              0,                  BT_OV_V4SF_V4SF_V4SF)
 OB_DEF_VAR (s390_vec_and_flt_c,         s390_vn,            B_VXE | B_DEP,      0,                  BT_OV_V4SF_V4SF_BV4SI)
@@ -923,6 +974,9 @@ OB_DEF_VAR (s390_vec_andc_s64_c,        s390_vnc,           B_DEP,
 OB_DEF_VAR (s390_vec_andc_u64_a,        s390_vnc,           B_DEP,              0,                  BT_OV_UV2DI_BV2DI_UV2DI)
 OB_DEF_VAR (s390_vec_andc_u64_b,        s390_vnc,           0,                  0,                  BT_OV_UV2DI_UV2DI_UV2DI)
 OB_DEF_VAR (s390_vec_andc_u64_c,        s390_vnc,           B_DEP,              0,                  BT_OV_UV2DI_UV2DI_BV2DI)
+OB_DEF_VAR (s390_vec_andc_b128,         s390_vnc,           0,                  0,                  BT_OV_BV1TI_BV1TI_BV1TI)
+OB_DEF_VAR (s390_vec_andc_s128,         s390_vnc,           0,                  0,                  BT_OV_V1TI_V1TI_V1TI)
+OB_DEF_VAR (s390_vec_andc_u128,         s390_vnc,           0,                  0,                  BT_OV_UV1TI_UV1TI_UV1TI)
 OB_DEF_VAR (s390_vec_andc_flt_a,        s390_vnc,           B_VXE | B_DEP,      0,                  BT_OV_V4SF_BV4SI_V4SF)
 OB_DEF_VAR (s390_vec_andc_flt_b,        s390_vnc,           B_VXE,              0,                  BT_OV_V4SF_V4SF_V4SF)
 OB_DEF_VAR (s390_vec_andc_flt_c,        s390_vnc,           B_VXE | B_DEP,      0,                  BT_OV_V4SF_V4SF_BV4SI)
@@ -932,7 +986,7 @@ OB_DEF_VAR (s390_vec_andc_dbl_c,        s390_vnc,           B_DEP,
 
 B_DEF      (s390_vnc,                   vec_andcv16qi3,     0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI)
 
-OB_DEF     (s390_vec_avg,               s390_vec_avg_s8,    s390_vec_avg_u64,   B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
+OB_DEF     (s390_vec_avg,               s390_vec_avg_s8,    s390_vec_avg_u128,  B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
 OB_DEF_VAR (s390_vec_avg_s8,            s390_vavgb,         0,                  0,                  BT_OV_V16QI_V16QI_V16QI)
 OB_DEF_VAR (s390_vec_avg_u8,            s390_vavglb,        0,                  0,                  BT_OV_UV16QI_UV16QI_UV16QI)
 OB_DEF_VAR (s390_vec_avg_s16,           s390_vavgh,         0,                  0,                  BT_OV_V8HI_V8HI_V8HI)
@@ -941,6 +995,8 @@ OB_DEF_VAR (s390_vec_avg_s32,           s390_vavgf,         0,
 OB_DEF_VAR (s390_vec_avg_u32,           s390_vavglf,        0,                  0,                  BT_OV_UV4SI_UV4SI_UV4SI)
 OB_DEF_VAR (s390_vec_avg_s64,           s390_vavgg,         0,                  0,                  BT_OV_V2DI_V2DI_V2DI)
 OB_DEF_VAR (s390_vec_avg_u64,           s390_vavglg,        0,                  0,                  BT_OV_UV2DI_UV2DI_UV2DI)
+OB_DEF_VAR (s390_vec_avg_s128,          s390_vavgq,         B_VXE3,             0,                  BT_OV_V1TI_V1TI_V1TI)
+OB_DEF_VAR (s390_vec_avg_u128,          s390_vavglq,        B_VXE3,             0,                  BT_OV_UV1TI_UV1TI_UV1TI)
 
 B_DEF      (s390_vavgb,                 vec_avgv16qi,       0,                  B_VX,               0,                  BT_FN_V16QI_V16QI_V16QI)
 B_DEF      (s390_vavglb,                vec_avguv16qi,      0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI)
@@ -950,6 +1006,8 @@ B_DEF      (s390_vavgf,                 vec_avgv4si,        0,
 B_DEF      (s390_vavglf,                vec_avguv4si,       0,                  B_VX,               0,                  BT_FN_UV4SI_UV4SI_UV4SI)
 B_DEF      (s390_vavgg,                 vec_avgv2di,        0,                  B_VX,               0,                  BT_FN_V2DI_V2DI_V2DI)
 B_DEF      (s390_vavglg,                vec_avguv2di,       0,                  B_VX,               0,                  BT_FN_UV2DI_UV2DI_UV2DI)
+B_DEF      (s390_vavgq,                 vec_avgti,          0,                  B_VXE3,             0,                  BT_FN_INT128_INT128_INT128)
+B_DEF      (s390_vavglq,                vec_avguti,         0,                  B_VXE3,             0,                  BT_FN_UINT128_UINT128_UINT128)
 
 B_DEF      (s390_vcksm,                 vec_checksum,       0,                  B_VX,               0,                  BT_FN_UV4SI_UV4SI_UV4SI)
 
@@ -957,6 +1015,7 @@ B_DEF      (s390_vceqbs,                vec_cmpeqv16qi_cc,  0,
 B_DEF      (s390_vceqhs,                vec_cmpeqv8hi_cc,   0,                  B_VX,               0,                  BT_FN_V8HI_UV8HI_UV8HI_INTPTR)
 B_DEF      (s390_vceqfs,                vec_cmpeqv4si_cc,   0,                  B_VX,               0,                  BT_FN_V4SI_UV4SI_UV4SI_INTPTR)
 B_DEF      (s390_vceqgs,                vec_cmpeqv2di_cc,   0,                  B_VX,               0,                  BT_FN_V2DI_UV2DI_UV2DI_INTPTR)
+B_DEF      (s390_vceqqs,                vec_cmpeqti_cc,     0,                  B_VXE3,             0,                  BT_FN_INT128_UINT128_UINT128_INTPTR)
 B_DEF      (s390_vfcesbs,               vec_cmpeqv4sf_cc,   0,                  B_VXE,              0,                  BT_FN_V4SI_V4SF_V4SF_INTPTR)
 B_DEF      (s390_vfcedbs,               vec_cmpeqv2df_cc,   0,                  B_VX,               0,                  BT_FN_V2DI_V2DF_V2DF_INTPTR)
 
@@ -968,6 +1027,8 @@ B_DEF      (s390_vchfs,                 vec_cmphv4si_cc,    0,
 B_DEF      (s390_vchlfs,                vec_cmphlv4si_cc,   0,                  B_VX,               0,                  BT_FN_V4SI_UV4SI_UV4SI_INTPTR)
 B_DEF      (s390_vchgs,                 vec_cmphv2di_cc,    0,                  B_VX,               0,                  BT_FN_V2DI_V2DI_V2DI_INTPTR)
 B_DEF      (s390_vchlgs,                vec_cmphlv2di_cc,   0,                  B_VX,               0,                  BT_FN_V2DI_UV2DI_UV2DI_INTPTR)
+B_DEF      (s390_vchqs,                 vec_cmphti_cc,      0,                  B_VXE3,             0,                  BT_FN_INT128_INT128_INT128_INTPTR)
+B_DEF      (s390_vchlqs,                vec_cmphlti_cc  ,   0,                  B_VXE3,             0,                  BT_FN_INT128_UINT128_UINT128_INTPTR)
 B_DEF      (s390_vfchsbs,               vec_cmphv4sf_cc,    0,                  B_VXE,              0,                  BT_FN_V4SI_V4SF_V4SF_INTPTR)
 B_DEF      (s390_vfchdbs,               vec_cmphv2df_cc,    0,                  B_VX,               0,                  BT_FN_V2DI_V2DF_V2DF_INTPTR)
 
@@ -978,6 +1039,7 @@ B_DEF      (vec_all_eqv16qi,            vec_all_eqv16qi,    0,
 B_DEF      (vec_all_eqv8hi,             vec_all_eqv8hi,     0,                  B_INT | B_VX,       0,                  BT_FN_INT_UV8HI_UV8HI)
 B_DEF      (vec_all_eqv4si,             vec_all_eqv4si,     0,                  B_INT | B_VX,       0,                  BT_FN_INT_UV4SI_UV4SI)
 B_DEF      (vec_all_eqv2di,             vec_all_eqv2di,     0,                  B_INT | B_VX,       0,                  BT_FN_INT_UV2DI_UV2DI)
+B_DEF      (vec_all_eqti,               vec_all_eqti,       0,                  B_INT | B_VXE3,     0,                  BT_FN_INT_UINT128_UINT128)
 B_DEF      (vec_all_eqv4sf,             vec_all_eqv4sf,     0,                  B_INT | B_VXE,      0,                  BT_FN_INT_V4SF_V4SF)
 B_DEF      (vec_all_eqv2df,             vec_all_eqv2df,     0,                  B_INT | B_VX,       0,                  BT_FN_INT_V2DF_V2DF)
 
@@ -985,6 +1047,7 @@ B_DEF      (vec_all_nev16qi,            vec_all_nev16qi,    0,
 B_DEF      (vec_all_nev8hi,             vec_all_nev8hi,     0,                  B_INT | B_VX,       0,                  BT_FN_INT_UV8HI_UV8HI)
 B_DEF      (vec_all_nev4si,             vec_all_nev4si,     0,                  B_INT | B_VX,       0,                  BT_FN_INT_UV4SI_UV4SI)
 B_DEF      (vec_all_nev2di,             vec_all_nev2di,     0,                  B_INT | B_VX,       0,                  BT_FN_INT_UV2DI_UV2DI)
+B_DEF      (vec_all_neti,               vec_all_neti,       0,                  B_INT | B_VXE3,     0,                  BT_FN_INT_UINT128_UINT128)
 B_DEF      (vec_all_nev4sf,             vec_all_nev4sf,     0,                  B_INT | B_VXE,      0,                  BT_FN_INT_V4SF_V4SF)
 B_DEF      (vec_all_nev2df,             vec_all_nev2df,     0,                  B_INT | B_VX,       0,                  BT_FN_INT_V2DF_V2DF)
 
@@ -996,6 +1059,8 @@ B_DEF      (vec_all_gev4si,             vec_all_gev4si,     0,
 B_DEF      (vec_all_geuv4si,            vec_all_geuv4si,    0,                  B_INT | B_VX,       0,                  BT_FN_INT_UV4SI_UV4SI)
 B_DEF      (vec_all_gev2di,             vec_all_gev2di,     0,                  B_INT | B_VX,       0,                  BT_FN_INT_V2DI_V2DI)
 B_DEF      (vec_all_geuv2di,            vec_all_geuv2di,    0,                  B_INT | B_VX,       0,                  BT_FN_INT_UV2DI_UV2DI)
+B_DEF      (vec_all_geti,               vec_all_geti,       0,                  B_INT | B_VXE3,     0,                  BT_FN_INT_INT128_INT128)
+B_DEF      (vec_all_geuti,              vec_all_geuti,      0,                  B_INT | B_VXE3,     0,                  BT_FN_INT_UINT128_UINT128)
 B_DEF      (vec_all_gev4sf,             vec_all_gev4sf,     0,                  B_INT | B_VXE,      0,                  BT_FN_INT_V4SF_V4SF)
 B_DEF      (vec_all_gev2df,             vec_all_gev2df,     0,                  B_INT | B_VX,       0,                  BT_FN_INT_V2DF_V2DF)
 
@@ -1007,6 +1072,8 @@ B_DEF      (vec_all_gtv4si,             vec_all_gtv4si,     0,
 B_DEF      (vec_all_gtuv4si,            vec_all_gtuv4si,    0,                  B_INT | B_VX,       0,                  BT_FN_INT_UV4SI_UV4SI)
 B_DEF      (vec_all_gtv2di,             vec_all_gtv2di,     0,                  B_INT | B_VX,       0,                  BT_FN_INT_V2DI_V2DI)
 B_DEF      (vec_all_gtuv2di,            vec_all_gtuv2di,    0,                  B_INT | B_VX,       0,                  BT_FN_INT_UV2DI_UV2DI)
+B_DEF      (vec_all_gtti,               vec_all_gtti,       0,                  B_INT | B_VXE3,     0,                  BT_FN_INT_INT128_INT128)
+B_DEF      (vec_all_gtuti,              vec_all_gtuti,      0,                  B_INT | B_VXE3,     0,                  BT_FN_INT_UINT128_UINT128)
 B_DEF      (vec_all_gtv4sf,             vec_all_gtv4sf,     0,                  B_INT | B_VXE,      0,                  BT_FN_INT_V4SF_V4SF)
 B_DEF      (vec_all_gtv2df,             vec_all_gtv2df,     0,                  B_INT | B_VX,       0,                  BT_FN_INT_V2DF_V2DF)
 
@@ -1018,6 +1085,8 @@ B_DEF      (vec_all_lev4si,             vec_all_lev4si,     0,
 B_DEF      (vec_all_leuv4si,            vec_all_leuv4si,    0,                  B_INT | B_VX,       0,                  BT_FN_INT_UV4SI_UV4SI)
 B_DEF      (vec_all_lev2di,             vec_all_lev2di,     0,                  B_INT | B_VX,       0,                  BT_FN_INT_V2DI_V2DI)
 B_DEF      (vec_all_leuv2di,            vec_all_leuv2di,    0,                  B_INT | B_VX,       0,                  BT_FN_INT_UV2DI_UV2DI)
+B_DEF      (vec_all_leti,               vec_all_leti,       0,                  B_INT | B_VXE3,     0,                  BT_FN_INT_INT128_INT128)
+B_DEF      (vec_all_leuti,              vec_all_leuti,      0,                  B_INT | B_VXE3,     0,                  BT_FN_INT_UINT128_UINT128)
 B_DEF      (vec_all_lev4sf,             vec_all_lev4sf,     0,                  B_INT | B_VXE,      0,                  BT_FN_INT_V4SF_V4SF)
 B_DEF      (vec_all_lev2df,             vec_all_lev2df,     0,                  B_INT | B_VX,       0,                  BT_FN_INT_V2DF_V2DF)
 
@@ -1029,6 +1098,8 @@ B_DEF      (vec_all_ltv4si,             vec_all_ltv4si,     0,
 B_DEF      (vec_all_ltuv4si,            vec_all_ltuv4si,    0,                  B_INT | B_VX,       0,                  BT_FN_INT_UV4SI_UV4SI)
 B_DEF      (vec_all_ltv2di,             vec_all_ltv2di,     0,                  B_INT | B_VX,       0,                  BT_FN_INT_V2DI_V2DI)
 B_DEF      (vec_all_ltuv2di,            vec_all_ltuv2di,    0,                  B_INT | B_VX,       0,                  BT_FN_INT_UV2DI_UV2DI)
+B_DEF      (vec_all_ltti,               vec_all_ltti,       0,                  B_INT | B_VXE3,     0,                  BT_FN_INT_INT128_INT128)
+B_DEF      (vec_all_ltuti,              vec_all_ltuti,      0,                  B_INT | B_VXE3,     0,                  BT_FN_INT_UINT128_UINT128)
 B_DEF      (vec_all_ltv4sf,             vec_all_ltv4sf,     0,                  B_INT | B_VXE,      0,                  BT_FN_INT_V4SF_V4SF)
 B_DEF      (vec_all_ltv2df,             vec_all_ltv2df,     0,                  B_INT | B_VX,       0,                  BT_FN_INT_V2DF_V2DF)
 
@@ -1061,6 +1132,9 @@ OB_DEF_VAR (s390_vec_all_eq_b64_b,      vec_all_eqv2di,     B_DEP,
 OB_DEF_VAR (s390_vec_all_eq_b64_c,      vec_all_eqv2di,     B_DEP,              0,                  BT_OV_INT_BV2DI_UV2DI)
 OB_DEF_VAR (s390_vec_all_eq_u64_a,      vec_all_eqv2di,     0,                  0,                  BT_OV_INT_UV2DI_UV2DI)
 OB_DEF_VAR (s390_vec_all_eq_u64_b,      vec_all_eqv2di,     B_DEP,              0,                  BT_OV_INT_UV2DI_BV2DI)
+OB_DEF_VAR (s390_vec_all_eq_s128,       vec_all_eqti,       B_VXE3,             0,                  BT_OV_INT_V1TI_V1TI)
+OB_DEF_VAR (s390_vec_all_eq_b128,       vec_all_eqti,       B_VXE3,             0,                  BT_OV_INT_BV1TI_BV1TI)
+OB_DEF_VAR (s390_vec_all_eq_u128,       vec_all_eqti,       B_VXE3,             0,                  BT_OV_INT_UV1TI_UV1TI)
 OB_DEF_VAR (s390_vec_all_eq_flt,        vec_all_eqv4sf,     B_VXE,              0,                  BT_OV_INT_V4SF_V4SF)
 OB_DEF_VAR (s390_vec_all_eq_dbl,        vec_all_eqv2df,     0,                  0,                  BT_OV_INT_V2DF_V2DF)
 
@@ -1093,6 +1167,9 @@ OB_DEF_VAR (s390_vec_all_ne_b64_b,      vec_all_nev2di,     B_DEP,
 OB_DEF_VAR (s390_vec_all_ne_b64_c,      vec_all_nev2di,     B_DEP,              0,                  BT_OV_INT_BV2DI_UV2DI)
 OB_DEF_VAR (s390_vec_all_ne_u64_a,      vec_all_nev2di,     0,                  0,                  BT_OV_INT_UV2DI_UV2DI)
 OB_DEF_VAR (s390_vec_all_ne_u64_b,      vec_all_nev2di,     B_DEP,              0,                  BT_OV_INT_UV2DI_BV2DI)
+OB_DEF_VAR (s390_vec_all_ne_s128,       vec_all_neti,       B_VXE3,             0,                  BT_OV_INT_V1TI_V1TI)
+OB_DEF_VAR (s390_vec_all_ne_b128,       vec_all_neti,       B_VXE3,             0,                  BT_OV_INT_BV1TI_BV1TI)
+OB_DEF_VAR (s390_vec_all_ne_u128,       vec_all_neti,       B_VXE3,             0,                  BT_OV_INT_UV1TI_UV1TI)
 OB_DEF_VAR (s390_vec_all_ne_flt,        vec_all_nev4sf,     B_VXE,              0,                  BT_OV_INT_V4SF_V4SF)
 OB_DEF_VAR (s390_vec_all_ne_dbl,        vec_all_nev2df,     0,                  0,                  BT_OV_INT_V2DF_V2DF)
 
@@ -1125,6 +1202,8 @@ OB_DEF_VAR (s390_vec_all_ge_b64_b,      vec_all_gev2di,     B_DEP,
 OB_DEF_VAR (s390_vec_all_ge_b64_c,      vec_all_geuv2di,    B_DEP,              0,                  BT_OV_INT_BV2DI_UV2DI)
 OB_DEF_VAR (s390_vec_all_ge_u64_a,      vec_all_geuv2di,    0,                  0,                  BT_OV_INT_UV2DI_UV2DI)
 OB_DEF_VAR (s390_vec_all_ge_u64_b,      vec_all_geuv2di,    B_DEP,              0,                  BT_OV_INT_UV2DI_BV2DI)
+OB_DEF_VAR (s390_vec_all_ge_s128,       vec_all_geti,       B_VXE3,             0,                  BT_OV_INT_V1TI_V1TI)
+OB_DEF_VAR (s390_vec_all_ge_u128,       vec_all_geuti,      B_VXE3,             0,                  BT_OV_INT_UV1TI_UV1TI)
 OB_DEF_VAR (s390_vec_all_ge_flt,        vec_all_gev4sf,     B_VXE,              0,                  BT_OV_INT_V4SF_V4SF)
 OB_DEF_VAR (s390_vec_all_ge_dbl,        vec_all_gev2df,     0,                  0,                  BT_OV_INT_V2DF_V2DF)
 
@@ -1157,6 +1236,8 @@ OB_DEF_VAR (s390_vec_all_gt_b64_b,      vec_all_gtv2di,     B_DEP,
 OB_DEF_VAR (s390_vec_all_gt_b64_c,      vec_all_gtuv2di,    B_DEP,              0,                  BT_OV_INT_BV2DI_UV2DI)
 OB_DEF_VAR (s390_vec_all_gt_u64_a,      vec_all_gtuv2di,    0,                  0,                  BT_OV_INT_UV2DI_UV2DI)
 OB_DEF_VAR (s390_vec_all_gt_u64_b,      vec_all_gtuv2di,    B_DEP,              0,                  BT_OV_INT_UV2DI_BV2DI)
+OB_DEF_VAR (s390_vec_all_gt_s128,       vec_all_gtti,       B_VXE3,             0,                  BT_OV_INT_V1TI_V1TI)
+OB_DEF_VAR (s390_vec_all_gt_u128,       vec_all_gtuti,      B_VXE3,             0,                  BT_OV_INT_UV1TI_UV1TI)
 OB_DEF_VAR (s390_vec_all_gt_flt,        vec_all_gtv4sf,     B_VXE,              0,                  BT_OV_INT_V4SF_V4SF)
 OB_DEF_VAR (s390_vec_all_gt_dbl,        vec_all_gtv2df,     0,                  0,                  BT_OV_INT_V2DF_V2DF)
 
@@ -1189,6 +1270,8 @@ OB_DEF_VAR (s390_vec_all_le_b64_b,      vec_all_lev2di,     B_DEP,
 OB_DEF_VAR (s390_vec_all_le_b64_c,      vec_all_leuv2di,    B_DEP,              0,                  BT_OV_INT_BV2DI_UV2DI)
 OB_DEF_VAR (s390_vec_all_le_u64_a,      vec_all_leuv2di,    0,                  0,                  BT_OV_INT_UV2DI_UV2DI)
 OB_DEF_VAR (s390_vec_all_le_u64_b,      vec_all_leuv2di,    B_DEP,              0,                  BT_OV_INT_UV2DI_BV2DI)
+OB_DEF_VAR (s390_vec_all_le_s128,       vec_all_leti,       B_VXE3,             0,                  BT_OV_INT_V1TI_V1TI)
+OB_DEF_VAR (s390_vec_all_le_u128,       vec_all_leuti,      B_VXE3,             0,                  BT_OV_INT_UV1TI_UV1TI)
 OB_DEF_VAR (s390_vec_all_le_flt,        vec_all_lev4sf,     B_VXE,              0,                  BT_OV_INT_V4SF_V4SF)
 OB_DEF_VAR (s390_vec_all_le_dbl,        vec_all_lev2df,     0,                  0,                  BT_OV_INT_V2DF_V2DF)
 
@@ -1221,6 +1304,8 @@ OB_DEF_VAR (s390_vec_all_lt_b64_b,      vec_all_ltv2di,     B_DEP,
 OB_DEF_VAR (s390_vec_all_lt_b64_c,      vec_all_ltuv2di,    B_DEP,              0,                  BT_OV_INT_BV2DI_UV2DI)
 OB_DEF_VAR (s390_vec_all_lt_u64_a,      vec_all_ltuv2di,    0,                  0,                  BT_OV_INT_UV2DI_UV2DI)
 OB_DEF_VAR (s390_vec_all_lt_u64_b,      vec_all_ltuv2di,    B_DEP,              0,                  BT_OV_INT_UV2DI_BV2DI)
+OB_DEF_VAR (s390_vec_all_lt_s128,       vec_all_ltti,       B_VXE3,             0,                  BT_OV_INT_V1TI_V1TI)
+OB_DEF_VAR (s390_vec_all_lt_u128,       vec_all_ltuti,      B_VXE3,             0,                  BT_OV_INT_UV1TI_UV1TI)
 OB_DEF_VAR (s390_vec_all_lt_flt,        vec_all_ltv4sf,     B_VXE,              0,                  BT_OV_INT_V4SF_V4SF)
 OB_DEF_VAR (s390_vec_all_lt_dbl,        vec_all_ltv2df,     0,                  0,                  BT_OV_INT_V2DF_V2DF)
 
@@ -1228,6 +1313,7 @@ B_DEF      (vec_any_eqv16qi,            vec_any_eqv16qi,    0,
 B_DEF      (vec_any_eqv8hi,             vec_any_eqv8hi,     0,                  B_INT | B_VX,       0,                  BT_FN_INT_UV8HI_UV8HI)
 B_DEF      (vec_any_eqv4si,             vec_any_eqv4si,     0,                  B_INT | B_VX,       0,                  BT_FN_INT_UV4SI_UV4SI)
 B_DEF      (vec_any_eqv2di,             vec_any_eqv2di,     0,                  B_INT | B_VX,       0,                  BT_FN_INT_UV2DI_UV2DI)
+B_DEF      (vec_any_eqti,               vec_any_eqti,       0,                  B_INT | B_VXE3,     0,                  BT_FN_INT_UINT128_UINT128)
 B_DEF      (vec_any_eqv4sf,             vec_any_eqv4sf,     0,                  B_INT | B_VXE,      0,                  BT_FN_INT_V4SF_V4SF)
 B_DEF      (vec_any_eqv2df,             vec_any_eqv2df,     0,                  B_INT | B_VX,       0,                  BT_FN_INT_V2DF_V2DF)
 
@@ -1235,6 +1321,7 @@ B_DEF      (vec_any_nev16qi,            vec_any_nev16qi,    0,
 B_DEF      (vec_any_nev8hi,             vec_any_nev8hi,     0,                  B_INT | B_VX,       0,                  BT_FN_INT_UV8HI_UV8HI)
 B_DEF      (vec_any_nev4si,             vec_any_nev4si,     0,                  B_INT | B_VX,       0,                  BT_FN_INT_UV4SI_UV4SI)
 B_DEF      (vec_any_nev2di,             vec_any_nev2di,     0,                  B_INT | B_VX,       0,                  BT_FN_INT_UV2DI_UV2DI)
+B_DEF      (vec_any_neti,               vec_any_neti,       0,                  B_INT | B_VXE3,     0,                  BT_FN_INT_UINT128_UINT128)
 B_DEF      (vec_any_nev4sf,             vec_any_nev4sf,     0,                  B_INT | B_VXE,      0,                  BT_FN_INT_V4SF_V4SF)
 B_DEF      (vec_any_nev2df,             vec_any_nev2df,     0,                  B_INT | B_VX,       0,                  BT_FN_INT_V2DF_V2DF)
 
@@ -1246,6 +1333,8 @@ B_DEF      (vec_any_gev4si,             vec_any_gev4si,     0,
 B_DEF      (vec_any_geuv4si,            vec_any_geuv4si,    0,                  B_INT | B_VX,       0,                  BT_FN_INT_UV4SI_UV4SI)
 B_DEF      (vec_any_gev2di,             vec_any_gev2di,     0,                  B_INT | B_VX,       0,                  BT_FN_INT_V2DI_V2DI)
 B_DEF      (vec_any_geuv2di,            vec_any_geuv2di,    0,                  B_INT | B_VX,       0,                  BT_FN_INT_UV2DI_UV2DI)
+B_DEF      (vec_any_geti,               vec_any_geti,       0,                  B_INT | B_VXE3,     0,                  BT_FN_INT_INT128_INT128)
+B_DEF      (vec_any_geuti,              vec_any_geuti,      0,                  B_INT | B_VXE3,     0,                  BT_FN_INT_UINT128_UINT128)
 B_DEF      (vec_any_gev4sf,             vec_any_gev4sf,     0,                  B_INT | B_VXE,      0,                  BT_FN_INT_V4SF_V4SF)
 B_DEF      (vec_any_gev2df,             vec_any_gev2df,     0,                  B_INT | B_VX,       0,                  BT_FN_INT_V2DF_V2DF)
 
@@ -1257,6 +1346,8 @@ B_DEF      (vec_any_gtv4si,             vec_any_gtv4si,     0,
 B_DEF      (vec_any_gtuv4si,            vec_any_gtuv4si,    0,                  B_INT | B_VX,       0,                  BT_FN_INT_UV4SI_UV4SI)
 B_DEF      (vec_any_gtv2di,             vec_any_gtv2di,     0,                  B_INT | B_VX,       0,                  BT_FN_INT_V2DI_V2DI)
 B_DEF      (vec_any_gtuv2di,            vec_any_gtuv2di,    0,                  B_INT | B_VX,       0,                  BT_FN_INT_UV2DI_UV2DI)
+B_DEF      (vec_any_gtti,               vec_any_gtti,       0,                  B_INT | B_VXE3,     0,                  BT_FN_INT_INT128_INT128)
+B_DEF      (vec_any_gtuti,              vec_any_gtuti,      0,                  B_INT | B_VXE3,     0,                  BT_FN_INT_UINT128_UINT128)
 B_DEF      (vec_any_gtv4sf,             vec_any_gtv4sf,     0,                  B_INT | B_VXE,      0,                  BT_FN_INT_V4SF_V4SF)
 B_DEF      (vec_any_gtv2df,             vec_any_gtv2df,     0,                  B_INT | B_VX,       0,                  BT_FN_INT_V2DF_V2DF)
 
@@ -1268,6 +1359,8 @@ B_DEF      (vec_any_lev4si,             vec_any_lev4si,     0,
 B_DEF      (vec_any_leuv4si,            vec_any_leuv4si,    0,                  B_INT | B_VX,       0,                  BT_FN_INT_UV4SI_UV4SI)
 B_DEF      (vec_any_lev2di,             vec_any_lev2di,     0,                  B_INT | B_VX,       0,                  BT_FN_INT_V2DI_V2DI)
 B_DEF      (vec_any_leuv2di,            vec_any_leuv2di,    0,                  B_INT | B_VX,       0,                  BT_FN_INT_UV2DI_UV2DI)
+B_DEF      (vec_any_leti,               vec_any_leti,       0,                  B_INT | B_VXE3,     0,                  BT_FN_INT_INT128_INT128)
+B_DEF      (vec_any_leuti,              vec_any_leuti,      0,                  B_INT | B_VXE3,     0,                  BT_FN_INT_UINT128_UINT128)
 B_DEF      (vec_any_lev4sf,             vec_any_lev4sf,     0,                  B_INT | B_VXE,      0,                  BT_FN_INT_V4SF_V4SF)
 B_DEF      (vec_any_lev2df,             vec_any_lev2df,     0,                  B_INT | B_VX,       0,                  BT_FN_INT_V2DF_V2DF)
 
@@ -1279,6 +1372,8 @@ B_DEF      (vec_any_ltv4si,             vec_any_ltv4si,     0,
 B_DEF      (vec_any_ltuv4si,            vec_any_ltuv4si,    0,                  B_INT | B_VX,       0,                  BT_FN_INT_UV4SI_UV4SI)
 B_DEF      (vec_any_ltv2di,             vec_any_ltv2di,     0,                  B_INT | B_VX,       0,                  BT_FN_INT_V2DI_V2DI)
 B_DEF      (vec_any_ltuv2di,            vec_any_ltuv2di,    0,                  B_INT | B_VX,       0,                  BT_FN_INT_UV2DI_UV2DI)
+B_DEF      (vec_any_ltti,               vec_any_ltti,       0,                  B_INT | B_VXE3,     0,                  BT_FN_INT_INT128_INT128)
+B_DEF      (vec_any_ltuti,              vec_any_ltuti,      0,                  B_INT | B_VXE3,     0,                  BT_FN_INT_UINT128_UINT128)
 B_DEF      (vec_any_ltv4sf,             vec_any_ltv4sf,     0,                  B_INT | B_VXE,      0,                  BT_FN_INT_V4SF_V4SF)
 B_DEF      (vec_any_ltv2df,             vec_any_ltv2df,     0,                  B_INT | B_VX,       0,                  BT_FN_INT_V2DF_V2DF)
 
@@ -1311,6 +1406,9 @@ OB_DEF_VAR (s390_vec_any_eq_b64_b,      vec_any_eqv2di,     B_DEP,
 OB_DEF_VAR (s390_vec_any_eq_b64_c,      vec_any_eqv2di,     B_DEP,              0,                  BT_OV_INT_BV2DI_UV2DI)
 OB_DEF_VAR (s390_vec_any_eq_u64_a,      vec_any_eqv2di,     0,                  0,                  BT_OV_INT_UV2DI_UV2DI)
 OB_DEF_VAR (s390_vec_any_eq_u64_b,      vec_any_eqv2di,     B_DEP,              0,                  BT_OV_INT_UV2DI_BV2DI)
+OB_DEF_VAR (s390_vec_any_eq_s128,       vec_any_eqti,       B_VXE3,             0,                  BT_OV_INT_V1TI_V1TI)
+OB_DEF_VAR (s390_vec_any_eq_b128,       vec_any_eqti,       B_VXE3,             0,                  BT_OV_INT_BV1TI_BV1TI)
+OB_DEF_VAR (s390_vec_any_eq_u128,       vec_any_eqti,       B_VXE3,             0,                  BT_OV_INT_UV1TI_UV1TI)
 OB_DEF_VAR (s390_vec_any_eq_flt,        vec_any_eqv4sf,     B_VXE,              0,                  BT_OV_INT_V4SF_V4SF)
 OB_DEF_VAR (s390_vec_any_eq_dbl,        vec_any_eqv2df,     0,                  0,                  BT_OV_INT_V2DF_V2DF)
 
@@ -1343,6 +1441,9 @@ OB_DEF_VAR (s390_vec_any_ne_b64_b,      vec_any_nev2di,     B_DEP,
 OB_DEF_VAR (s390_vec_any_ne_b64_c,      vec_any_nev2di,     B_DEP,              0,                  BT_OV_INT_BV2DI_UV2DI)
 OB_DEF_VAR (s390_vec_any_ne_u64_a,      vec_any_nev2di,     0,                  0,                  BT_OV_INT_UV2DI_UV2DI)
 OB_DEF_VAR (s390_vec_any_ne_u64_b,      vec_any_nev2di,     B_DEP,              0,                  BT_OV_INT_UV2DI_BV2DI)
+OB_DEF_VAR (s390_vec_any_ne_s128,       vec_any_neti,       B_VXE3,             0,                  BT_OV_INT_V1TI_V1TI)
+OB_DEF_VAR (s390_vec_any_ne_b128,       vec_any_neti,       B_VXE3,             0,                  BT_OV_INT_BV1TI_BV1TI)
+OB_DEF_VAR (s390_vec_any_ne_u128,       vec_any_neti,       B_VXE3,             0,                  BT_OV_INT_UV1TI_UV1TI)
 OB_DEF_VAR (s390_vec_any_ne_flt,        vec_any_nev4sf,     B_VXE,              0,                  BT_OV_INT_V4SF_V4SF)
 OB_DEF_VAR (s390_vec_any_ne_dbl,        vec_any_nev2df,     0,                  0,                  BT_OV_INT_V2DF_V2DF)
 
@@ -1375,6 +1476,8 @@ OB_DEF_VAR (s390_vec_any_ge_b64_b,      vec_any_gev2di,     B_DEP,
 OB_DEF_VAR (s390_vec_any_ge_b64_c,      vec_any_geuv2di,    B_DEP,              0,                  BT_OV_INT_BV2DI_UV2DI)
 OB_DEF_VAR (s390_vec_any_ge_u64_a,      vec_any_geuv2di,    0,                  0,                  BT_OV_INT_UV2DI_UV2DI)
 OB_DEF_VAR (s390_vec_any_ge_u64_b,      vec_any_geuv2di,    B_DEP,              0,                  BT_OV_INT_UV2DI_BV2DI)
+OB_DEF_VAR (s390_vec_any_ge_s128,       vec_any_geti,       B_VXE3,             0,                  BT_OV_INT_V1TI_V1TI)
+OB_DEF_VAR (s390_vec_any_ge_u128,       vec_any_geuti,      B_VXE3,             0,                  BT_OV_INT_UV1TI_UV1TI)
 OB_DEF_VAR (s390_vec_any_ge_flt,        vec_any_gev4sf,     B_VXE,              0,                  BT_OV_INT_V4SF_V4SF)
 OB_DEF_VAR (s390_vec_any_ge_dbl,        vec_any_gev2df,     0,                  0,                  BT_OV_INT_V2DF_V2DF)
 
@@ -1407,6 +1510,8 @@ OB_DEF_VAR (s390_vec_any_gt_b64_b,      vec_any_gtv2di,     B_DEP,
 OB_DEF_VAR (s390_vec_any_gt_b64_c,      vec_any_gtuv2di,    B_DEP,              0,                  BT_OV_INT_BV2DI_UV2DI)
 OB_DEF_VAR (s390_vec_any_gt_u64_a,      vec_any_gtuv2di,    0,                  0,                  BT_OV_INT_UV2DI_UV2DI)
 OB_DEF_VAR (s390_vec_any_gt_u64_b,      vec_any_gtuv2di,    B_DEP,              0,                  BT_OV_INT_UV2DI_BV2DI)
+OB_DEF_VAR (s390_vec_any_gt_s128,       vec_any_gtti,       B_VXE3,             0,                  BT_OV_INT_V1TI_V1TI)
+OB_DEF_VAR (s390_vec_any_gt_u128,       vec_any_gtuti,      B_VXE3,             0,                  BT_OV_INT_UV1TI_UV1TI)
 OB_DEF_VAR (s390_vec_any_gt_flt,        vec_any_gtv4sf,     B_VXE,              0,                  BT_OV_INT_V4SF_V4SF)
 OB_DEF_VAR (s390_vec_any_gt_dbl,        vec_any_gtv2df,     0,                  0,                  BT_OV_INT_V2DF_V2DF)
 
@@ -1439,6 +1544,8 @@ OB_DEF_VAR (s390_vec_any_le_b64_b,      vec_any_lev2di,     B_DEP,
 OB_DEF_VAR (s390_vec_any_le_b64_c,      vec_any_leuv2di,    B_DEP,              0,                  BT_OV_INT_BV2DI_UV2DI)
 OB_DEF_VAR (s390_vec_any_le_u64_a,      vec_any_leuv2di,    0,                  0,                  BT_OV_INT_UV2DI_UV2DI)
 OB_DEF_VAR (s390_vec_any_le_u64_b,      vec_any_leuv2di,    B_DEP,              0,                  BT_OV_INT_UV2DI_BV2DI)
+OB_DEF_VAR (s390_vec_any_le_s128,       vec_any_leti,       B_VXE3,             0,                  BT_OV_INT_V1TI_V1TI)
+OB_DEF_VAR (s390_vec_any_le_u128,       vec_any_leuti,      B_VXE3,             0,                  BT_OV_INT_UV1TI_UV1TI)
 OB_DEF_VAR (s390_vec_any_le_flt,        vec_any_lev4sf,     B_VXE,              0,                  BT_OV_INT_V4SF_V4SF)
 OB_DEF_VAR (s390_vec_any_le_dbl,        vec_any_lev2df,     0,                  0,                  BT_OV_INT_V2DF_V2DF)
 
@@ -1471,6 +1578,8 @@ OB_DEF_VAR (s390_vec_any_lt_b64_b,      vec_any_ltv2di,     B_DEP,
 OB_DEF_VAR (s390_vec_any_lt_b64_c,      vec_any_ltuv2di,    B_DEP,              0,                  BT_OV_INT_BV2DI_UV2DI)
 OB_DEF_VAR (s390_vec_any_lt_u64_a,      vec_any_ltuv2di,    0,                  0,                  BT_OV_INT_UV2DI_UV2DI)
 OB_DEF_VAR (s390_vec_any_lt_u64_b,      vec_any_ltuv2di,    B_DEP,              0,                  BT_OV_INT_UV2DI_BV2DI)
+OB_DEF_VAR (s390_vec_any_lt_s128,       vec_any_ltti,       B_VXE3,             0,                  BT_OV_INT_V1TI_V1TI)
+OB_DEF_VAR (s390_vec_any_lt_u128,       vec_any_ltuti,      B_VXE3,             0,                  BT_OV_INT_UV1TI_UV1TI)
 OB_DEF_VAR (s390_vec_any_lt_flt,        vec_any_ltv4sf,     B_VXE,              0,                  BT_OV_INT_V4SF_V4SF)
 OB_DEF_VAR (s390_vec_any_lt_dbl,        vec_any_ltv2df,     0,                  0,                  BT_OV_INT_V2DF_V2DF)
 
@@ -1487,6 +1596,9 @@ OB_DEF_VAR (s390_vec_cmpeq_b32,         s390_vceqf,         0,
 OB_DEF_VAR (s390_vec_cmpeq_s64,         s390_vceqg,         0,                  0,                  BT_OV_BV2DI_V2DI_V2DI)
 OB_DEF_VAR (s390_vec_cmpeq_u64,         s390_vceqg,         0,                  0,                  BT_OV_BV2DI_UV2DI_UV2DI)
 OB_DEF_VAR (s390_vec_cmpeq_b64,         s390_vceqg,         0,                  0,                  BT_OV_BV2DI_BV2DI_BV2DI)
+OB_DEF_VAR (s390_vec_cmpeq_s128,        s390_vceqq,         0,                  0,                  BT_OV_BV1TI_V1TI_V1TI)           /* NOGEN */
+OB_DEF_VAR (s390_vec_cmpeq_u128,        s390_vceqq,         0,                  0,                  BT_OV_BV1TI_UV1TI_UV1TI)         /* NOGEN */
+OB_DEF_VAR (s390_vec_cmpeq_b128,        s390_vceqq,         0,                  0,                  BT_OV_BV1TI_BV1TI_BV1TI)         /* NOGEN */
 OB_DEF_VAR (s390_vec_cmpeq_flt,         s390_vfcesb,        B_VXE,              0,                  BT_OV_BV4SI_V4SF_V4SF)
 OB_DEF_VAR (s390_vec_cmpeq_dbl,         s390_vfcedb,        0,                  0,                  BT_OV_BV2DI_V2DF_V2DF)
 
@@ -1494,6 +1606,7 @@ B_DEF      (s390_vceqb,                 vec_cmpeqv16qi,     0,
 B_DEF      (s390_vceqh,                 vec_cmpeqv8hi,      0,                  B_VX,               0,                  BT_FN_V8HI_UV8HI_UV8HI)
 B_DEF      (s390_vceqf,                 vec_cmpeqv4si,      0,                  B_VX,               0,                  BT_FN_V4SI_UV4SI_UV4SI)
 B_DEF      (s390_vceqg,                 vec_cmpeqv2di,      0,                  B_VX,               0,                  BT_FN_V2DI_UV2DI_UV2DI)
+B_DEF      (s390_vceqq,                 vec_cmpeqti,        0,                  B_VX,               0,                  BT_FN_INT128_UINT128_UINT128)
 B_DEF      (s390_vfcesb,                vec_cmpeqv4sf,      0,                  B_VXE,              0,                  BT_FN_V4SI_V4SF_V4SF)
 B_DEF      (s390_vfcedb,                vec_cmpeqv2df,      0,                  B_VX,               0,                  BT_FN_V2DI_V2DF_V2DF)
 
@@ -1506,6 +1619,8 @@ OB_DEF_VAR (s390_vec_cmpge_s32,         vec_cmpgev4si,      0,
 OB_DEF_VAR (s390_vec_cmpge_u32,         vec_cmpgeuv4si,     0,                  0,                  BT_OV_BV4SI_UV4SI_UV4SI)
 OB_DEF_VAR (s390_vec_cmpge_s64,         vec_cmpgev2di,      0,                  0,                  BT_OV_BV2DI_V2DI_V2DI)
 OB_DEF_VAR (s390_vec_cmpge_u64,         vec_cmpgeuv2di,     0,                  0,                  BT_OV_BV2DI_UV2DI_UV2DI)
+OB_DEF_VAR (s390_vec_cmpge_s128,        vec_cmpgeti,        0,                  0,                  BT_OV_BV1TI_V1TI_V1TI)
+OB_DEF_VAR (s390_vec_cmpge_u128,        vec_cmpgeuti,       0,                  0,                  BT_OV_BV1TI_UV1TI_UV1TI)
 OB_DEF_VAR (s390_vec_cmpge_flt,         s390_vfchesb,       B_VXE,              0,                  BT_OV_BV4SI_V4SF_V4SF)
 OB_DEF_VAR (s390_vec_cmpge_dbl,         s390_vfchedb,       0,                  0,                  BT_OV_BV2DI_V2DF_V2DF)
 
@@ -1517,6 +1632,8 @@ B_DEF      (vec_cmpgev4si,              vec_cmpgev4si,      0,
 B_DEF      (vec_cmpgeuv4si,             vec_cmpgeuv4si,     0,                  B_INT | B_VX,       0,                  BT_FN_V4SI_UV4SI_UV4SI)
 B_DEF      (vec_cmpgev2di,              vec_cmpgev2di,      0,                  B_INT | B_VX,       0,                  BT_FN_V2DI_UV2DI_UV2DI)
 B_DEF      (vec_cmpgeuv2di,             vec_cmpgeuv2di,     0,                  B_INT | B_VX,       0,                  BT_FN_V2DI_UV2DI_UV2DI)
+B_DEF      (vec_cmpgeti,                vec_cmpgeti,        0,                  B_INT | B_VX,       0,                  BT_FN_INT128_UINT128_UINT128)
+B_DEF      (vec_cmpgeuti,               vec_cmpgeuti,       0,                  B_INT | B_VX,       0,                  BT_FN_INT128_UINT128_UINT128)
 B_DEF      (s390_vfchesb,               vec_cmpgev4sf_quiet_nocc,0,             B_VXE,              0,                  BT_FN_V4SI_V4SF_V4SF)
 B_DEF      (s390_vfchedb,               vec_cmpgev2df_quiet_nocc,0,             B_VX,               0,                  BT_FN_V2DI_V2DF_V2DF)
 
@@ -1529,6 +1646,8 @@ OB_DEF_VAR (s390_vec_cmpgt_s32,         s390_vchf,          0,
 OB_DEF_VAR (s390_vec_cmpgt_u32,         s390_vchlf,         0,                  0,                  BT_OV_BV4SI_UV4SI_UV4SI)
 OB_DEF_VAR (s390_vec_cmpgt_s64,         s390_vchg,          0,                  0,                  BT_OV_BV2DI_V2DI_V2DI)
 OB_DEF_VAR (s390_vec_cmpgt_u64,         s390_vchlg,         0,                  0,                  BT_OV_BV2DI_UV2DI_UV2DI)
+OB_DEF_VAR (s390_vec_cmpgt_s128,        s390_vchq,          0,                  0,                  BT_OV_BV1TI_V1TI_V1TI)        /* NOGEN */
+OB_DEF_VAR (s390_vec_cmpgt_u128,        s390_vchlq,         0,                  0,                  BT_OV_BV1TI_UV1TI_UV1TI)      /* NOGEN */
 OB_DEF_VAR (s390_vec_cmpgt_flt,         s390_vfchsb,        B_VXE,              0,                  BT_OV_BV4SI_V4SF_V4SF)
 OB_DEF_VAR (s390_vec_cmpgt_dbl,         s390_vfchdb,        0,                  0,                  BT_OV_BV2DI_V2DF_V2DF)
 
@@ -1540,6 +1659,8 @@ B_DEF      (s390_vchf,                  vec_cmpgtv4si,      0,
 B_DEF      (s390_vchlf,                 vec_cmpgtuv4si,     0,                  B_VX,               0,                  BT_FN_V4SI_UV4SI_UV4SI)
 B_DEF      (s390_vchg,                  vec_cmpgtv2di,      0,                  B_VX,               0,                  BT_FN_V2DI_V2DI_V2DI)
 B_DEF      (s390_vchlg,                 vec_cmpgtuv2di,     0,                  B_VX,               0,                  BT_FN_V2DI_UV2DI_UV2DI)
+B_DEF      (s390_vchq,                  vec_cmpgtti,        0,                  B_VX,               0,                  BT_FN_INT128_INT128_INT128)
+B_DEF      (s390_vchlq,                 vec_cmpgtuti,       0,                  B_VX,               0,                  BT_FN_INT128_UINT128_UINT128)
 B_DEF      (s390_vfchsb,                vec_cmpgtv4sf_quiet_nocc,0,             B_VXE,              0,                  BT_FN_V4SI_V4SF_V4SF)
 B_DEF      (s390_vfchdb,                vec_cmpgtv2df_quiet_nocc,0,             B_VX,               0,                  BT_FN_V2DI_V2DF_V2DF)
 
@@ -1552,6 +1673,8 @@ OB_DEF_VAR (s390_vec_cmple_s32,         vec_cmplev4si,      0,
 OB_DEF_VAR (s390_vec_cmple_u32,         vec_cmpleuv4si,     0,                  0,                  BT_OV_BV4SI_UV4SI_UV4SI)
 OB_DEF_VAR (s390_vec_cmple_s64,         vec_cmplev2di,      0,                  0,                  BT_OV_BV2DI_V2DI_V2DI)
 OB_DEF_VAR (s390_vec_cmple_u64,         vec_cmpleuv2di,     0,                  0,                  BT_OV_BV2DI_UV2DI_UV2DI)
+OB_DEF_VAR (s390_vec_cmple_s128,        vec_cmpleti,        0,                  0,                  BT_OV_BV1TI_V1TI_V1TI)
+OB_DEF_VAR (s390_vec_cmple_u128,        vec_cmpleuti,       0,                  0,                  BT_OV_BV1TI_UV1TI_UV1TI)
 OB_DEF_VAR (s390_vec_cmple_flt,         vec_cmplev4sf,      B_VXE,              0,                  BT_OV_BV4SI_V4SF_V4SF)
 OB_DEF_VAR (s390_vec_cmple_dbl,         vec_cmplev2df,      0,                  0,                  BT_OV_BV2DI_V2DF_V2DF)
 
@@ -1563,6 +1686,8 @@ B_DEF      (vec_cmplev4si,              vec_cmplev4si,      0,
 B_DEF      (vec_cmpleuv4si,             vec_cmpleuv4si,     0,                  B_INT | B_VX,       0,                  BT_FN_V4SI_UV4SI_UV4SI)
 B_DEF      (vec_cmplev2di,              vec_cmplev2di,      0,                  B_INT | B_VX,       0,                  BT_FN_V2DI_UV2DI_UV2DI)
 B_DEF      (vec_cmpleuv2di,             vec_cmpleuv2di,     0,                  B_INT | B_VX,       0,                  BT_FN_V2DI_UV2DI_UV2DI)
+B_DEF      (vec_cmpleti,                vec_cmpleti,        0,                  B_INT | B_VX,       0,                  BT_FN_INT128_UINT128_UINT128)
+B_DEF      (vec_cmpleuti,               vec_cmpleuti,       0,                  B_INT | B_VX,       0,                  BT_FN_INT128_UINT128_UINT128)
 B_DEF      (vec_cmplev4sf,              vec_cmplev4sf_quiet_nocc,0,             B_INT | B_VXE,      0,                  BT_FN_V4SI_V4SF_V4SF)
 B_DEF      (vec_cmplev2df,              vec_cmplev2df_quiet_nocc,0,             B_INT | B_VX,       0,                  BT_FN_V2DI_V2DF_V2DF)
 
@@ -1575,6 +1700,8 @@ OB_DEF_VAR (s390_vec_cmplt_s32,         vec_cmpltv4si,      0,
 OB_DEF_VAR (s390_vec_cmplt_u32,         vec_cmpltuv4si,     0,                  0,                  BT_OV_BV4SI_UV4SI_UV4SI)
 OB_DEF_VAR (s390_vec_cmplt_s64,         vec_cmpltv2di,      0,                  0,                  BT_OV_BV2DI_V2DI_V2DI)
 OB_DEF_VAR (s390_vec_cmplt_u64,         vec_cmpltuv2di,     0,                  0,                  BT_OV_BV2DI_UV2DI_UV2DI)
+OB_DEF_VAR (s390_vec_cmplt_s128,        vec_cmpltti,        0,                  0,                  BT_OV_BV1TI_V1TI_V1TI)
+OB_DEF_VAR (s390_vec_cmplt_u128,        vec_cmpltuti,       0,                  0,                  BT_OV_BV1TI_UV1TI_UV1TI)
 OB_DEF_VAR (s390_vec_cmplt_flt,         vec_cmpltv4sf,      B_VXE,              0,                  BT_OV_BV4SI_V4SF_V4SF)
 OB_DEF_VAR (s390_vec_cmplt_dbl,         vec_cmpltv2df,      0,                  0,                  BT_OV_BV2DI_V2DF_V2DF)
 
@@ -1586,10 +1713,12 @@ B_DEF      (vec_cmpltv4si,              vec_cmpltv4si,      0,
 B_DEF      (vec_cmpltuv4si,             vec_cmpltuv4si,     0,                  B_INT | B_VX,       0,                  BT_FN_V4SI_UV4SI_UV4SI)
 B_DEF      (vec_cmpltv2di,              vec_cmpltv2di,      0,                  B_INT | B_VX,       0,                  BT_FN_V2DI_UV2DI_UV2DI)
 B_DEF      (vec_cmpltuv2di,             vec_cmpltuv2di,     0,                  B_INT | B_VX,       0,                  BT_FN_V2DI_UV2DI_UV2DI)
+B_DEF      (vec_cmpltti,                vec_cmpltti,        0,                  B_INT | B_VX,       0,                  BT_FN_INT128_UINT128_UINT128)
+B_DEF      (vec_cmpltuti,               vec_cmpltuti,       0,                  B_INT | B_VX,       0,                  BT_FN_INT128_UINT128_UINT128)
 B_DEF      (vec_cmpltv4sf,              vec_cmpltv4sf_quiet_nocc,0,             B_INT | B_VXE,      0,                  BT_FN_V4SI_V4SF_V4SF)
 B_DEF      (vec_cmpltv2df,              vec_cmpltv2df_quiet_nocc,0,             B_INT | B_VX,       0,                  BT_FN_V2DI_V2DF_V2DF)
 
-OB_DEF     (s390_vec_cntlz,             s390_vec_cntlz_s8,  s390_vec_cntlz_u64, B_VX,               BT_FN_OV4SI_OV4SI)
+OB_DEF     (s390_vec_cntlz,             s390_vec_cntlz_s8,  s390_vec_cntlz_u128,B_VX,               BT_FN_OV4SI_OV4SI)
 OB_DEF_VAR (s390_vec_cntlz_s8,          s390_vclzb,         0,                  0,                  BT_OV_UV16QI_V16QI)
 OB_DEF_VAR (s390_vec_cntlz_u8,          s390_vclzb,         0,                  0,                  BT_OV_UV16QI_UV16QI)
 OB_DEF_VAR (s390_vec_cntlz_s16,         s390_vclzh,         0,                  0,                  BT_OV_UV8HI_V8HI)
@@ -1598,13 +1727,16 @@ OB_DEF_VAR (s390_vec_cntlz_s32,         s390_vclzf,         0,
 OB_DEF_VAR (s390_vec_cntlz_u32,         s390_vclzf,         0,                  0,                  BT_OV_UV4SI_UV4SI)
 OB_DEF_VAR (s390_vec_cntlz_s64,         s390_vclzg,         0,                  0,                  BT_OV_UV2DI_V2DI)
 OB_DEF_VAR (s390_vec_cntlz_u64,         s390_vclzg,         0,                  0,                  BT_OV_UV2DI_UV2DI)
+OB_DEF_VAR (s390_vec_cntlz_s128,        s390_vclzq,         B_VXE3,             0,                  BT_OV_UV1TI_V1TI)
+OB_DEF_VAR (s390_vec_cntlz_u128,        s390_vclzq,         B_VXE3,             0,                  BT_OV_UV1TI_UV1TI)
 
 B_DEF      (s390_vclzb,                 clzv16qi2,          0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI)
 B_DEF      (s390_vclzh,                 clzv8hi2,           0,                  B_VX,               0,                  BT_FN_UV8HI_UV8HI)
 B_DEF      (s390_vclzf,                 clzv4si2,           0,                  B_VX,               0,                  BT_FN_UV4SI_UV4SI)
 B_DEF      (s390_vclzg,                 clzv2di2,           0,                  B_VX,               0,                  BT_FN_UV2DI_UV2DI)
+B_DEF      (s390_vclzq,                 clzti2,             0,                  B_VXE3,             0,                  BT_FN_UINT128_UINT128)
 
-OB_DEF     (s390_vec_cnttz,             s390_vec_cnttz_s8,  s390_vec_cnttz_u64, B_VX,               BT_FN_OV4SI_OV4SI)
+OB_DEF     (s390_vec_cnttz,             s390_vec_cnttz_s8,  s390_vec_cnttz_u128,B_VX,               BT_FN_OV4SI_OV4SI)
 OB_DEF_VAR (s390_vec_cnttz_s8,          s390_vctzb,         0,                  0,                  BT_OV_UV16QI_V16QI)
 OB_DEF_VAR (s390_vec_cnttz_u8,          s390_vctzb,         0,                  0,                  BT_OV_UV16QI_UV16QI)
 OB_DEF_VAR (s390_vec_cnttz_s16,         s390_vctzh,         0,                  0,                  BT_OV_UV8HI_V8HI)
@@ -1613,11 +1745,14 @@ OB_DEF_VAR (s390_vec_cnttz_s32,         s390_vctzf,         0,
 OB_DEF_VAR (s390_vec_cnttz_u32,         s390_vctzf,         0,                  0,                  BT_OV_UV4SI_UV4SI)
 OB_DEF_VAR (s390_vec_cnttz_s64,         s390_vctzg,         0,                  0,                  BT_OV_UV2DI_V2DI)
 OB_DEF_VAR (s390_vec_cnttz_u64,         s390_vctzg,         0,                  0,                  BT_OV_UV2DI_UV2DI)
+OB_DEF_VAR (s390_vec_cnttz_s128,        s390_vctzq,         B_VXE3,             0,                  BT_OV_UV1TI_V1TI)
+OB_DEF_VAR (s390_vec_cnttz_u128,        s390_vctzq,         B_VXE3,             0,                  BT_OV_UV1TI_UV1TI)
 
 B_DEF      (s390_vctzb,                 ctzv16qi2,          0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI)
 B_DEF      (s390_vctzh,                 ctzv8hi2,           0,                  B_VX,               0,                  BT_FN_UV8HI_UV8HI)
 B_DEF      (s390_vctzf,                 ctzv4si2,           0,                  B_VX,               0,                  BT_FN_UV4SI_UV4SI)
 B_DEF      (s390_vctzg,                 ctzv2di2,           0,                  B_VX,               0,                  BT_FN_UV2DI_UV2DI)
+B_DEF      (s390_vctzq,                 ctzti2,             0,                  B_VXE3,             0,                  BT_FN_UINT128_UINT128)
 
 OB_DEF     (s390_vec_xor,               s390_vec_xor_b8,    s390_vec_xor_dbl_c, B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
 OB_DEF_VAR (s390_vec_xor_b8,            s390_vx,            0,                  0,                  BT_OV_BV16QI_BV16QI_BV16QI)
@@ -1648,6 +1783,9 @@ OB_DEF_VAR (s390_vec_xor_s64_c,         s390_vx,            B_DEP,
 OB_DEF_VAR (s390_vec_xor_u64_a,         s390_vx,            B_DEP,              0,                  BT_OV_UV2DI_BV2DI_UV2DI)
 OB_DEF_VAR (s390_vec_xor_u64_b,         s390_vx,            0,                  0,                  BT_OV_UV2DI_UV2DI_UV2DI)
 OB_DEF_VAR (s390_vec_xor_u64_c,         s390_vx,            B_DEP,              0,                  BT_OV_UV2DI_UV2DI_BV2DI)
+OB_DEF_VAR (s390_vec_xor_b128,          s390_vx,            0,                  0,                  BT_OV_BV1TI_BV1TI_BV1TI)
+OB_DEF_VAR (s390_vec_xor_s128,          s390_vx,            0,                  0,                  BT_OV_V1TI_V1TI_V1TI)
+OB_DEF_VAR (s390_vec_xor_u128,          s390_vx,            0,                  0,                  BT_OV_UV1TI_UV1TI_UV1TI)
 OB_DEF_VAR (s390_vec_xor_flt_a,         s390_vx,            B_VXE | B_DEP,      0,                  BT_OV_V4SF_BV4SI_V4SF)
 OB_DEF_VAR (s390_vec_xor_flt_b,         s390_vx,            B_VXE,              0,                  BT_OV_V4SF_V4SF_V4SF)
 OB_DEF_VAR (s390_vec_xor_flt_c,         s390_vx,            B_VXE | B_DEP,      0,                  BT_OV_V4SF_V4SF_BV4SI)
@@ -1657,10 +1795,11 @@ OB_DEF_VAR (s390_vec_xor_dbl_c,         s390_vx,            B_DEP,
 
 B_DEF      (s390_vx,                    xorv16qi3,          0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI)
 
-OB_DEF     (s390_vec_gfmsum,            s390_vec_gfmsum_u8, s390_vec_gfmsum_u32,B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
+OB_DEF     (s390_vec_gfmsum,            s390_vec_gfmsum_u8, s390_vec_gfmsum_u64,B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
 OB_DEF_VAR (s390_vec_gfmsum_u8,         s390_vgfmb,         0,                  0,                  BT_OV_UV8HI_UV16QI_UV16QI)
 OB_DEF_VAR (s390_vec_gfmsum_u16,        s390_vgfmh,         0,                  0,                  BT_OV_UV4SI_UV8HI_UV8HI)
 OB_DEF_VAR (s390_vec_gfmsum_u32,        s390_vgfmf,         0,                  0,                  BT_OV_UV2DI_UV4SI_UV4SI)
+OB_DEF_VAR (s390_vec_gfmsum_u64,        s390_vgfmg,         0,                  0,                  BT_OV_UV1TI_UV2DI_UV2DI)
 
 B_DEF      (s390_vgfmb,                 vec_gfmsumv16qi,    0,                  B_VX,               0,                  BT_FN_UV8HI_UV16QI_UV16QI)
 B_DEF      (s390_vgfmh,                 vec_gfmsumv8hi,     0,                  B_VX,               0,                  BT_FN_UV4SI_UV8HI_UV8HI)
@@ -1668,10 +1807,13 @@ B_DEF      (s390_vgfmf,                 vec_gfmsumv4si,     0,
 B_DEF      (s390_vgfmg,                 vec_gfmsum_128,     0,                  B_VX,               0,                  BT_FN_UINT128_UV2DI_UV2DI)
 B_DEF      (s390_vgfmg_128,             vec_gfmsum_128,     0,                  B_VX,               0,                  BT_FN_UV16QI_UV2DI_UV2DI)
 
-OB_DEF     (s390_vec_gfmsum_accum,      s390_vec_gfmsum_accum_u8,s390_vec_gfmsum_accum_u32,B_VX,    BT_FN_OV4SI_OV4SI_OV4SI_OV4SI)
+B_DEF      (s390_vec_gfmsum_128,        vec_gfmsum_128,     0,                  B_DEP | B_VX,       0,                  BT_FN_UV16QI_UV2DI_UV2DI)
+
+OB_DEF     (s390_vec_gfmsum_accum,      s390_vec_gfmsum_accum_u8,s390_vec_gfmsum_accum_u64,B_VX,    BT_FN_OV4SI_OV4SI_OV4SI_OV4SI)
 OB_DEF_VAR (s390_vec_gfmsum_accum_u8,   s390_vgfmab,        0,                  0,                  BT_OV_UV8HI_UV16QI_UV16QI_UV8HI)
 OB_DEF_VAR (s390_vec_gfmsum_accum_u16,  s390_vgfmah,        0,                  0,                  BT_OV_UV4SI_UV8HI_UV8HI_UV4SI)
 OB_DEF_VAR (s390_vec_gfmsum_accum_u32,  s390_vgfmaf,        0,                  0,                  BT_OV_UV2DI_UV4SI_UV4SI_UV2DI)
+OB_DEF_VAR (s390_vec_gfmsum_accum_u64,  s390_vgfmag,        0,                  0,                  BT_OV_UV1TI_UV2DI_UV2DI_UV1TI)
 
 B_DEF      (s390_vgfmab,                vec_gfmsum_accumv16qi,0,                B_VX,               0,                  BT_FN_UV8HI_UV16QI_UV16QI_UV8HI)
 B_DEF      (s390_vgfmah,                vec_gfmsum_accumv8hi,0,                 B_VX,               0,                  BT_FN_UV4SI_UV8HI_UV8HI_UV4SI)
@@ -1679,11 +1821,14 @@ B_DEF      (s390_vgfmaf,                vec_gfmsum_accumv4si,0,
 B_DEF      (s390_vgfmag,                vec_gfmsum_accum_128,0,                 B_VX,               0,                  BT_FN_UINT128_UV2DI_UV2DI_UINT128)
 B_DEF      (s390_vgfmag_128,            vec_gfmsum_accum_128,0,                 B_VX,               0,                  BT_FN_UV16QI_UV2DI_UV2DI_UV16QI)
 
+B_DEF      (s390_vec_gfmsum_accum_128,  vec_gfmsum_accum_128,0,                 B_DEP | B_VX,       0,                  BT_FN_UV16QI_UV2DI_UV2DI_UV16QI)
+
 OB_DEF     (s390_vec_abs,               s390_vec_abs_s8,    s390_vec_abs_dbl,   B_VX,               BT_FN_OV4SI_OV4SI)
 OB_DEF_VAR (s390_vec_abs_s8,            s390_vlpb,          0,                  0,                  BT_OV_V16QI_V16QI)
 OB_DEF_VAR (s390_vec_abs_s16,           s390_vlph,          0,                  0,                  BT_OV_V8HI_V8HI)
 OB_DEF_VAR (s390_vec_abs_s32,           s390_vlpf,          0,                  0,                  BT_OV_V4SI_V4SI)
 OB_DEF_VAR (s390_vec_abs_s64,           s390_vlpg,          0,                  0,                  BT_OV_V2DI_V2DI)
+OB_DEF_VAR (s390_vec_abs_s128,          s390_vlpq,          0,                  0,                  BT_OV_V1TI_V1TI)    /* NOGEN */
 OB_DEF_VAR (s390_vec_abs_flt,           s390_vflpsb,        B_VXE,              0,                  BT_OV_V4SF_V4SF)
 OB_DEF_VAR (s390_vec_abs_dbl,           s390_vflpdb,        0,                  0,                  BT_OV_V2DF_V2DF)
 
@@ -1691,6 +1836,7 @@ B_DEF      (s390_vlpb,                  absv16qi2,          0,
 B_DEF      (s390_vlph,                  absv8hi2,           0,                  B_VX,               0,                  BT_FN_V8HI_V8HI)
 B_DEF      (s390_vlpf,                  absv4si2,           0,                  B_VX,               0,                  BT_FN_V4SI_V4SI)
 B_DEF      (s390_vlpg,                  absv2di2,           0,                  B_VX,               0,                  BT_FN_V2DI_V2DI)
+B_DEF      (s390_vlpq,                  absti2,             0,                  B_VX,               0,                  BT_FN_INT128_INT128)
 B_DEF      (s390_vflpsb,                absv4sf2,           0,                  B_VXE,              0,                  BT_FN_V4SF_V4SF)
 B_DEF      (s390_vflpdb,                absv2df2,           0,                  B_VX,               0,                  BT_FN_V2DF_V2DF)
 
@@ -1719,17 +1865,21 @@ OB_DEF_VAR (s390_vec_max_s64_c,         s390_vmxg,          B_DEP,
 OB_DEF_VAR (s390_vec_max_u64_a,         s390_vmxlg,         B_DEP,              0,                  BT_OV_UV2DI_BV2DI_UV2DI)
 OB_DEF_VAR (s390_vec_max_u64_b,         s390_vmxlg,         0,                  0,                  BT_OV_UV2DI_UV2DI_UV2DI)
 OB_DEF_VAR (s390_vec_max_u64_c,         s390_vmxlg,         B_DEP,              0,                  BT_OV_UV2DI_UV2DI_BV2DI)
+OB_DEF_VAR (s390_vec_max_s128,          s390_vmxq,          0,                  0,                  BT_OV_V1TI_V1TI_V1TI)        /* NOGEN */
+OB_DEF_VAR (s390_vec_max_u128,          s390_vmxlq,         0,                  0,                  BT_OV_UV1TI_UV1TI_UV1TI)     /* NOGEN */
 OB_DEF_VAR (s390_vec_max_flt,           s390_vfmaxsb_4,     B_VXE,              0,                  BT_OV_V4SF_V4SF_V4SF)
 OB_DEF_VAR (s390_vec_max_dbl,           s390_vfmaxdb_4,     0,                  0,                  BT_OV_V2DF_V2DF_V2DF)
 
 B_DEF      (s390_vmxb,                  smaxv16qi3,         0,                  B_VX,               0,                  BT_FN_V16QI_BV16QI_V16QI)
 B_DEF      (s390_vmxlb,                 umaxv16qi3,         0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI)
-B_DEF      (s390_vmxh,                  smaxv8hi3,          0,                  B_VX,               0,                  BT_FN_V8HI_BV8HI_V8HI)
+B_DEF      (s390_vmxh,                  smaxv8hi3,          0,                  B_VX,               0,                  BT_FN_V8HI_V8HI_V8HI)
 B_DEF      (s390_vmxlh,                 umaxv8hi3,          0,                  B_VX,               0,                  BT_FN_UV8HI_UV8HI_UV8HI)
-B_DEF      (s390_vmxf,                  smaxv4si3,          0,                  B_VX,               0,                  BT_FN_V4SI_BV4SI_V4SI)
+B_DEF      (s390_vmxf,                  smaxv4si3,          0,                  B_VX,               0,                  BT_FN_V4SI_V4SI_V4SI)
 B_DEF      (s390_vmxlf,                 umaxv4si3,          0,                  B_VX,               0,                  BT_FN_UV4SI_UV4SI_UV4SI)
-B_DEF      (s390_vmxg,                  smaxv2di3,          0,                  B_VX,               0,                  BT_FN_V2DI_BV2DI_V2DI)
+B_DEF      (s390_vmxg,                  smaxv2di3,          0,                  B_VX,               0,                  BT_FN_V2DI_V2DI_V2DI)
 B_DEF      (s390_vmxlg,                 umaxv2di3,          0,                  B_VX,               0,                  BT_FN_UV2DI_UV2DI_UV2DI)
+B_DEF      (s390_vmxq,                  smaxti3,            0,                  B_VX,               0,                  BT_FN_INT128_INT128_INT128)
+B_DEF      (s390_vmxlq,                 umaxti3,            0,                  B_VX,               0,                  BT_FN_UINT128_UINT128_UINT128)
 B_DEF      (s390_vfmaxsb,               vfmaxv4sf,          0,                  B_VXE,              O3_U4,              BT_FN_V4SF_V4SF_V4SF_INT)
 B_DEF      (s390_vfmaxdb,               vfmaxv2df,          0,                  B_VXE,              O3_U4,              BT_FN_V2DF_V2DF_V2DF_INT)
 B_DEF      (s390_vfmaxsb_4,             smaxv4sf3,          0,                  B_INT | B_VXE,      0,                  BT_FN_V4SF_V4SF_V4SF)
@@ -1760,23 +1910,27 @@ OB_DEF_VAR (s390_vec_min_s64_c,         s390_vmng,          B_DEP,
 OB_DEF_VAR (s390_vec_min_u64_a,         s390_vmnlg,         B_DEP,              0,                  BT_OV_UV2DI_BV2DI_UV2DI)
 OB_DEF_VAR (s390_vec_min_u64_b,         s390_vmnlg,         0,                  0,                  BT_OV_UV2DI_UV2DI_UV2DI)
 OB_DEF_VAR (s390_vec_min_u64_c,         s390_vmnlg,         B_DEP,              0,                  BT_OV_UV2DI_UV2DI_BV2DI)
+OB_DEF_VAR (s390_vec_min_s128,          s390_vmnq,          0,                  0,                  BT_OV_V1TI_V1TI_V1TI)      /* NOGEN */
+OB_DEF_VAR (s390_vec_min_u128,          s390_vmnlq,         0,                  0,                  BT_OV_UV1TI_UV1TI_UV1TI)   /* NOGEN */
 OB_DEF_VAR (s390_vec_min_flt,           s390_vfminsb_4,     B_VXE,              0,                  BT_OV_V4SF_V4SF_V4SF)
 OB_DEF_VAR (s390_vec_min_dbl,           s390_vfmindb_4,     0,                  0,                  BT_OV_V2DF_V2DF_V2DF)
 
 B_DEF      (s390_vmnb,                  sminv16qi3,         0,                  B_VX,               0,                  BT_FN_V16QI_BV16QI_V16QI)
 B_DEF      (s390_vmnlb,                 uminv16qi3,         0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI)
-B_DEF      (s390_vmnh,                  sminv8hi3,          0,                  B_VX,               0,                  BT_FN_V8HI_BV8HI_V8HI)
+B_DEF      (s390_vmnh,                  sminv8hi3,          0,                  B_VX,               0,                  BT_FN_V8HI_V8HI_V8HI)
 B_DEF      (s390_vmnlh,                 uminv8hi3,          0,                  B_VX,               0,                  BT_FN_UV8HI_UV8HI_UV8HI)
-B_DEF      (s390_vmnf,                  sminv4si3,          0,                  B_VX,               0,                  BT_FN_V4SI_BV4SI_V4SI)
+B_DEF      (s390_vmnf,                  sminv4si3,          0,                  B_VX,               0,                  BT_FN_V4SI_V4SI_V4SI)
 B_DEF      (s390_vmnlf,                 uminv4si3,          0,                  B_VX,               0,                  BT_FN_UV4SI_UV4SI_UV4SI)
-B_DEF      (s390_vmng,                  sminv2di3,          0,                  B_VX,               0,                  BT_FN_V2DI_BV2DI_V2DI)
+B_DEF      (s390_vmng,                  sminv2di3,          0,                  B_VX,               0,                  BT_FN_V2DI_V2DI_V2DI)
 B_DEF      (s390_vmnlg,                 uminv2di3,          0,                  B_VX,               0,                  BT_FN_UV2DI_UV2DI_UV2DI)
+B_DEF      (s390_vmnq,                  sminti3,            0,                  B_VX,               0,                  BT_FN_INT128_INT128_INT128)
+B_DEF      (s390_vmnlq,                 uminti3,            0,                  B_VX,               0,                  BT_FN_UINT128_UINT128_UINT128)
 B_DEF      (s390_vfminsb,               vfminv4sf,          0,                  B_VXE,              O3_U4,              BT_FN_V4SF_V4SF_V4SF_INT)
 B_DEF      (s390_vfmindb,               vfminv2df,          0,                  B_VXE,              O3_U4,              BT_FN_V2DF_V2DF_V2DF_INT)
 B_DEF      (s390_vfminsb_4,             sminv4sf3,          0,                  B_INT | B_VXE,      0,                  BT_FN_V4SF_V4SF_V4SF) /* vfminsb */
 B_DEF      (s390_vfmindb_4,             sminv2df3,          0,                  B_INT | B_VX,       0,                  BT_FN_V2DF_V2DF_V2DF) /* vfmindb */
 
-OB_DEF     (s390_vec_mladd,             s390_vec_mladd_u8,  s390_vec_mladd_s32_c,B_VX,              BT_FN_OV4SI_OV4SI_OV4SI_OV4SI)
+OB_DEF     (s390_vec_mladd,             s390_vec_mladd_u8,  s390_vec_mladd_s128_c,B_VX,             BT_FN_OV4SI_OV4SI_OV4SI_OV4SI)
 OB_DEF_VAR (s390_vec_mladd_u8,          s390_vmalb,         0,                  0,                  BT_OV_UV16QI_UV16QI_UV16QI_UV16QI)
 OB_DEF_VAR (s390_vec_mladd_s8_a,        s390_vmalb,         0,                  0,                  BT_OV_V16QI_UV16QI_V16QI_V16QI)
 OB_DEF_VAR (s390_vec_mladd_s8_b,        s390_vmalb,         0,                  0,                  BT_OV_V16QI_V16QI_UV16QI_UV16QI)
@@ -1789,18 +1943,32 @@ OB_DEF_VAR (s390_vec_mladd_u32,         s390_vmalf,         0,
 OB_DEF_VAR (s390_vec_mladd_s32_a,       s390_vmalf,         0,                  0,                  BT_OV_V4SI_UV4SI_V4SI_V4SI)
 OB_DEF_VAR (s390_vec_mladd_s32_b,       s390_vmalf,         0,                  0,                  BT_OV_V4SI_V4SI_UV4SI_UV4SI)
 OB_DEF_VAR (s390_vec_mladd_s32_c,       s390_vmalf,         0,                  0,                  BT_OV_V4SI_V4SI_V4SI_V4SI)
+OB_DEF_VAR (s390_vec_mladd_u64,         s390_vmalg,         B_VXE3,             0,                  BT_OV_UV2DI_UV2DI_UV2DI_UV2DI)
+OB_DEF_VAR (s390_vec_mladd_s64_a,       s390_vmalg,         B_VXE3,             0,                  BT_OV_V2DI_UV2DI_V2DI_V2DI)
+OB_DEF_VAR (s390_vec_mladd_s64_b,       s390_vmalg,         B_VXE3,             0,                  BT_OV_V2DI_V2DI_UV2DI_UV2DI)
+OB_DEF_VAR (s390_vec_mladd_s64_c,       s390_vmalg,         B_VXE3,             0,                  BT_OV_V2DI_V2DI_V2DI_V2DI)
+OB_DEF_VAR (s390_vec_mladd_u128,        s390_vmalq,         B_VXE3,             0,                  BT_OV_UV1TI_UV1TI_UV1TI_UV1TI)
+OB_DEF_VAR (s390_vec_mladd_s128_a,      s390_vmalq,         B_VXE3,             0,                  BT_OV_V1TI_UV1TI_V1TI_V1TI)
+OB_DEF_VAR (s390_vec_mladd_s128_b,      s390_vmalq,         B_VXE3,             0,                  BT_OV_V1TI_V1TI_UV1TI_UV1TI)
+OB_DEF_VAR (s390_vec_mladd_s128_c,      s390_vmalq,         B_VXE3,             0,                  BT_OV_V1TI_V1TI_V1TI_V1TI)
 
 B_DEF      (s390_vmalb,                 vec_vmalv16qi,      0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI_UV16QI)
 B_DEF      (s390_vmalhw,                vec_vmalv8hi,       0,                  B_VX,               0,                  BT_FN_UV8HI_UV8HI_UV8HI_UV8HI)
 B_DEF      (s390_vmalf,                 vec_vmalv4si,       0,                  B_VX,               0,                  BT_FN_UV4SI_UV4SI_UV4SI_UV4SI)
+B_DEF      (s390_vmalg,                 vec_vmalv2di,       0,                  B_VXE3,             0,                  BT_FN_UV2DI_UV2DI_UV2DI_UV2DI)
+B_DEF      (s390_vmalq,                 vec_vmalti,         0,                  B_VXE3,             0,                  BT_FN_UINT128_UINT128_UINT128_UINT128)
 
-OB_DEF     (s390_vec_mhadd,             s390_vec_mhadd_u8,  s390_vec_mhadd_s32, B_VX,               BT_FN_OV4SI_OV4SI_OV4SI_OV4SI)
+OB_DEF     (s390_vec_mhadd,             s390_vec_mhadd_u8,  s390_vec_mhadd_s128,B_VX,               BT_FN_OV4SI_OV4SI_OV4SI_OV4SI)
 OB_DEF_VAR (s390_vec_mhadd_u8,          s390_vmalhb,        0,                  0,                  BT_OV_UV16QI_UV16QI_UV16QI_UV16QI)
 OB_DEF_VAR (s390_vec_mhadd_s8,          s390_vmahb,         0,                  0,                  BT_OV_V16QI_V16QI_V16QI_V16QI)
 OB_DEF_VAR (s390_vec_mhadd_u16,         s390_vmalhh,        0,                  0,                  BT_OV_UV8HI_UV8HI_UV8HI_UV8HI)
 OB_DEF_VAR (s390_vec_mhadd_s16,         s390_vmahh,         0,                  0,                  BT_OV_V8HI_V8HI_V8HI_V8HI)
 OB_DEF_VAR (s390_vec_mhadd_u32,         s390_vmalhf,        0,                  0,                  BT_OV_UV4SI_UV4SI_UV4SI_UV4SI)
 OB_DEF_VAR (s390_vec_mhadd_s32,         s390_vmahf,         0,                  0,                  BT_OV_V4SI_V4SI_V4SI_V4SI)
+OB_DEF_VAR (s390_vec_mhadd_u64,         s390_vmalhg,        B_VXE3,             0,                  BT_OV_UV2DI_UV2DI_UV2DI_UV2DI)
+OB_DEF_VAR (s390_vec_mhadd_s64,         s390_vmahg,         B_VXE3,             0,                  BT_OV_V2DI_V2DI_V2DI_V2DI)
+OB_DEF_VAR (s390_vec_mhadd_u128,        s390_vmalhq,        B_VXE3,             0,                  BT_OV_UV1TI_UV1TI_UV1TI_UV1TI)
+OB_DEF_VAR (s390_vec_mhadd_s128,        s390_vmahq,         B_VXE3,             0,                  BT_OV_V1TI_V1TI_V1TI_V1TI)
 
 B_DEF      (s390_vmalhb,                vec_vmalhv16qi,     0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI_UV16QI)
 B_DEF      (s390_vmahb,                 vec_vmahv16qi,      0,                  B_VX,               0,                  BT_FN_V16QI_V16QI_V16QI_V16QI)
@@ -1808,14 +1976,20 @@ B_DEF      (s390_vmalhh,                vec_vmalhv8hi,      0,
 B_DEF      (s390_vmahh,                 vec_vmahv8hi,       0,                  B_VX,               0,                  BT_FN_V8HI_V8HI_V8HI_V8HI)
 B_DEF      (s390_vmalhf,                vec_vmalhv4si,      0,                  B_VX,               0,                  BT_FN_UV4SI_UV4SI_UV4SI_UV4SI)
 B_DEF      (s390_vmahf,                 vec_vmahv4si,       0,                  B_VX,               0,                  BT_FN_V4SI_V4SI_V4SI_V4SI)
+B_DEF      (s390_vmalhg,                vec_vmalhv2di,      0,                  B_VXE3,             0,                  BT_FN_UV2DI_UV2DI_UV2DI_UV2DI)
+B_DEF      (s390_vmahg,                 vec_vmahv2di,       0,                  B_VXE3,             0,                  BT_FN_V2DI_V2DI_V2DI_V2DI)
+B_DEF      (s390_vmalhq,                vec_vmalhti,        0,                  B_VXE3,             0,                  BT_FN_UINT128_UINT128_UINT128_UINT128)
+B_DEF      (s390_vmahq,                 vec_vmahti,         0,                  B_VXE3,             0,                  BT_FN_INT128_INT128_INT128_INT128)
 
-OB_DEF     (s390_vec_meadd,             s390_vec_meadd_u8,  s390_vec_meadd_s32, B_VX,               BT_FN_OV4SI_OV4SI_OV4SI_OV4SI)
+OB_DEF     (s390_vec_meadd,             s390_vec_meadd_u8,  s390_vec_meadd_s64, B_VX,               BT_FN_OV4SI_OV4SI_OV4SI_OV4SI)
 OB_DEF_VAR (s390_vec_meadd_u8,          s390_vmaleb,        0,                  0,                  BT_OV_UV8HI_UV16QI_UV16QI_UV8HI)
 OB_DEF_VAR (s390_vec_meadd_s8,          s390_vmaeb,         0,                  0,                  BT_OV_V8HI_V16QI_V16QI_V8HI)
 OB_DEF_VAR (s390_vec_meadd_u16,         s390_vmaleh,        0,                  0,                  BT_OV_UV4SI_UV8HI_UV8HI_UV4SI)
 OB_DEF_VAR (s390_vec_meadd_s16,         s390_vmaeh,         0,                  0,                  BT_OV_V4SI_V8HI_V8HI_V4SI)
 OB_DEF_VAR (s390_vec_meadd_u32,         s390_vmalef,        0,                  0,                  BT_OV_UV2DI_UV4SI_UV4SI_UV2DI)
 OB_DEF_VAR (s390_vec_meadd_s32,         s390_vmaef,         0,                  0,                  BT_OV_V2DI_V4SI_V4SI_V2DI)
+OB_DEF_VAR (s390_vec_meadd_u64,         s390_vmaleg,        B_VXE3,             0,                  BT_OV_UV1TI_UV2DI_UV2DI_UV1TI)
+OB_DEF_VAR (s390_vec_meadd_s64,         s390_vmaeg,         B_VXE3,             0,                  BT_OV_V1TI_V2DI_V2DI_V1TI)
 
 B_DEF      (s390_vmaleb,                vec_vmalev16qi,     0,                  B_VX,               0,                  BT_FN_UV8HI_UV16QI_UV16QI_UV8HI)
 B_DEF      (s390_vmaeb,                 vec_vmaev16qi,      0,                  B_VX,               0,                  BT_FN_V8HI_V16QI_V16QI_V8HI)
@@ -1823,14 +1997,18 @@ B_DEF      (s390_vmaleh,                vec_vmalev8hi,      0,
 B_DEF      (s390_vmaeh,                 vec_vmaev8hi,       0,                  B_VX,               0,                  BT_FN_V4SI_V8HI_V8HI_V4SI)
 B_DEF      (s390_vmalef,                vec_vmalev4si,      0,                  B_VX,               0,                  BT_FN_UV2DI_UV4SI_UV4SI_UV2DI)
 B_DEF      (s390_vmaef,                 vec_vmaev4si,       0,                  B_VX,               0,                  BT_FN_V2DI_V4SI_V4SI_V2DI)
+B_DEF      (s390_vmaleg,                vec_vmalev2di,      0,                  B_VXE3,             0,                  BT_FN_UINT128_UV2DI_UV2DI_UINT128)
+B_DEF      (s390_vmaeg,                 vec_vmaev2di,       0,                  B_VXE3,             0,                  BT_FN_INT128_V2DI_V2DI_INT128)
 
-OB_DEF     (s390_vec_moadd,             s390_vec_moadd_u8,  s390_vec_moadd_s32, B_VX,               BT_FN_OV4SI_OV4SI_OV4SI_OV4SI)
+OB_DEF     (s390_vec_moadd,             s390_vec_moadd_u8,  s390_vec_moadd_s64, B_VX,               BT_FN_OV4SI_OV4SI_OV4SI_OV4SI)
 OB_DEF_VAR (s390_vec_moadd_u8,          s390_vmalob,        0,                  0,                  BT_OV_UV8HI_UV16QI_UV16QI_UV8HI)
 OB_DEF_VAR (s390_vec_moadd_s8,          s390_vmaob,         0,                  0,                  BT_OV_V8HI_V16QI_V16QI_V8HI)
 OB_DEF_VAR (s390_vec_moadd_u16,         s390_vmaloh,        0,                  0,                  BT_OV_UV4SI_UV8HI_UV8HI_UV4SI)
 OB_DEF_VAR (s390_vec_moadd_s16,         s390_vmaoh,         0,                  0,                  BT_OV_V4SI_V8HI_V8HI_V4SI)
 OB_DEF_VAR (s390_vec_moadd_u32,         s390_vmalof,        0,                  0,                  BT_OV_UV2DI_UV4SI_UV4SI_UV2DI)
 OB_DEF_VAR (s390_vec_moadd_s32,         s390_vmaof,         0,                  0,                  BT_OV_V2DI_V4SI_V4SI_V2DI)
+OB_DEF_VAR (s390_vec_moadd_u64,         s390_vmalog,        B_VXE3,             0,                  BT_OV_UV1TI_UV2DI_UV2DI_UV1TI)
+OB_DEF_VAR (s390_vec_moadd_s64,         s390_vmaog,         B_VXE3,             0,                  BT_OV_V1TI_V2DI_V2DI_V1TI)
 
 B_DEF      (s390_vmalob,                vec_vmalov16qi,     0,                  B_VX,               0,                  BT_FN_UV8HI_UV16QI_UV16QI_UV8HI)
 B_DEF      (s390_vmaob,                 vec_vmaov16qi,      0,                  B_VX,               0,                  BT_FN_V8HI_V16QI_V16QI_V8HI)
@@ -1838,14 +2016,20 @@ B_DEF      (s390_vmaloh,                vec_vmalov8hi,      0,
 B_DEF      (s390_vmaoh,                 vec_vmaov8hi,       0,                  B_VX,               0,                  BT_FN_V4SI_V8HI_V8HI_V4SI)
 B_DEF      (s390_vmalof,                vec_vmalov4si,      0,                  B_VX,               0,                  BT_FN_UV2DI_UV4SI_UV4SI_UV2DI)
 B_DEF      (s390_vmaof,                 vec_vmaov4si,       0,                  B_VX,               0,                  BT_FN_V2DI_V4SI_V4SI_V2DI)
+B_DEF      (s390_vmalog,                vec_vmalov2di,      0,                  B_VXE3,             0,                  BT_FN_UINT128_UV2DI_UV2DI_UINT128)
+B_DEF      (s390_vmaog,                 vec_vmaov2di,       0,                  B_VXE3,             0,                  BT_FN_INT128_V2DI_V2DI_INT128)
 
-OB_DEF     (s390_vec_mulh,              s390_vec_mulh_u8,   s390_vec_mulh_s32,  B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
+OB_DEF     (s390_vec_mulh,              s390_vec_mulh_u8,   s390_vec_mulh_s128, B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
 OB_DEF_VAR (s390_vec_mulh_u8,           s390_vmlhb,         0,                  0,                  BT_OV_UV16QI_UV16QI_UV16QI)
 OB_DEF_VAR (s390_vec_mulh_s8,           s390_vmhb,          0,                  0,                  BT_OV_V16QI_V16QI_V16QI)
 OB_DEF_VAR (s390_vec_mulh_u16,          s390_vmlhh,         0,                  0,                  BT_OV_UV8HI_UV8HI_UV8HI)
 OB_DEF_VAR (s390_vec_mulh_s16,          s390_vmhh,          0,                  0,                  BT_OV_V8HI_V8HI_V8HI)
 OB_DEF_VAR (s390_vec_mulh_u32,          s390_vmlhf,         0,                  0,                  BT_OV_UV4SI_UV4SI_UV4SI)
 OB_DEF_VAR (s390_vec_mulh_s32,          s390_vmhf,          0,                  0,                  BT_OV_V4SI_V4SI_V4SI)
+OB_DEF_VAR (s390_vec_mulh_u64,          s390_vmlhg,         B_VXE3,             0,                  BT_OV_UV2DI_UV2DI_UV2DI)
+OB_DEF_VAR (s390_vec_mulh_s64,          s390_vmhg,          B_VXE3,             0,                  BT_OV_V2DI_V2DI_V2DI)
+OB_DEF_VAR (s390_vec_mulh_u128,         s390_vmlhq,         B_VXE3,             0,                  BT_OV_UV1TI_UV1TI_UV1TI)
+OB_DEF_VAR (s390_vec_mulh_s128,         s390_vmhq,          B_VXE3,             0,                  BT_OV_V1TI_V1TI_V1TI)
 
 B_DEF      (s390_vmlhb,                 vec_umulhv16qi,     0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI)
 B_DEF      (s390_vmhb,                  vec_smulhv16qi,     0,                  B_VX,               0,                  BT_FN_V16QI_V16QI_V16QI)
@@ -1853,14 +2037,20 @@ B_DEF      (s390_vmlhh,                 vec_umulhv8hi,      0,
 B_DEF      (s390_vmhh,                  vec_smulhv8hi,      0,                  B_VX,               0,                  BT_FN_V8HI_V8HI_V8HI)
 B_DEF      (s390_vmlhf,                 vec_umulhv4si,      0,                  B_VX,               0,                  BT_FN_UV4SI_UV4SI_UV4SI)
 B_DEF      (s390_vmhf,                  vec_smulhv4si,      0,                  B_VX,               0,                  BT_FN_V4SI_V4SI_V4SI)
+B_DEF      (s390_vmlhg,                 vec_umulhv2di,      0,                  B_VXE3,             0,                  BT_FN_UV2DI_UV2DI_UV2DI)
+B_DEF      (s390_vmhg,                  vec_smulhv2di,      0,                  B_VXE3,             0,                  BT_FN_V2DI_V2DI_V2DI)
+B_DEF      (s390_vmlhq,                 vec_umulhti,        0,                  B_VXE3,             0,                  BT_FN_UINT128_UINT128_UINT128)
+B_DEF      (s390_vmhq,                  vec_smulhti,        0,                  B_VXE3,             0,                  BT_FN_INT128_INT128_INT128)
 
-OB_DEF     (s390_vec_mule,              s390_vec_mule_u8,   s390_vec_mule_s32,  B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
+OB_DEF     (s390_vec_mule,              s390_vec_mule_u8,   s390_vec_mule_s64,  B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
 OB_DEF_VAR (s390_vec_mule_u8,           s390_vmleb,         0,                  0,                  BT_OV_UV8HI_UV16QI_UV16QI)
 OB_DEF_VAR (s390_vec_mule_s8,           s390_vmeb,          0,                  0,                  BT_OV_V8HI_V16QI_V16QI)
 OB_DEF_VAR (s390_vec_mule_u16,          s390_vmleh,         0,                  0,                  BT_OV_UV4SI_UV8HI_UV8HI)
 OB_DEF_VAR (s390_vec_mule_s15,          s390_vmeh,          0,                  0,                  BT_OV_V4SI_V8HI_V8HI)
 OB_DEF_VAR (s390_vec_mule_u32,          s390_vmlef,         0,                  0,                  BT_OV_UV2DI_UV4SI_UV4SI)
 OB_DEF_VAR (s390_vec_mule_s32,          s390_vmef,          0,                  0,                  BT_OV_V2DI_V4SI_V4SI)
+OB_DEF_VAR (s390_vec_mule_u64,          s390_vmleg,         B_VXE3,             0,                  BT_OV_UV1TI_UV2DI_UV2DI)
+OB_DEF_VAR (s390_vec_mule_s64,          s390_vmeg,          B_VXE3,             0,                  BT_OV_V1TI_V2DI_V2DI)
 
 B_DEF      (s390_vmleb,                 vec_widen_umult_even_v16qi,0,           B_VX,               0,                  BT_FN_UV8HI_UV16QI_UV16QI)
 B_DEF      (s390_vmeb,                  vec_widen_smult_even_v16qi,0,           B_VX,               0,                  BT_FN_V8HI_V16QI_V16QI)
@@ -1868,14 +2058,18 @@ B_DEF      (s390_vmleh,                 vec_widen_umult_even_v8hi,0,
 B_DEF      (s390_vmeh,                  vec_widen_smult_even_v8hi,0,            B_VX,               0,                  BT_FN_V4SI_V8HI_V8HI)
 B_DEF      (s390_vmlef,                 vec_widen_umult_even_v4si,0,            B_VX,               0,                  BT_FN_UV2DI_UV4SI_UV4SI)
 B_DEF      (s390_vmef,                  vec_widen_smult_even_v4si,0,            B_VX,               0,                  BT_FN_V2DI_V4SI_V4SI)
+B_DEF      (s390_vmleg,                 vec_widen_umult_even_v2di,0,            B_VXE3,             0,                  BT_FN_UINT128_UV2DI_UV2DI)
+B_DEF      (s390_vmeg,                  vec_widen_smult_even_v2di,0,            B_VXE3,             0,                  BT_FN_INT128_V2DI_V2DI)
 
-OB_DEF     (s390_vec_mulo,              s390_vec_mulo_u8,   s390_vec_mulo_s32,  B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
+OB_DEF     (s390_vec_mulo,              s390_vec_mulo_u8,   s390_vec_mulo_s64,  B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
 OB_DEF_VAR (s390_vec_mulo_u8,           s390_vmlob,         0,                  0,                  BT_OV_UV8HI_UV16QI_UV16QI)
 OB_DEF_VAR (s390_vec_mulo_s8,           s390_vmob,          0,                  0,                  BT_OV_V8HI_V16QI_V16QI)
 OB_DEF_VAR (s390_vec_mulo_u16,          s390_vmloh,         0,                  0,                  BT_OV_UV4SI_UV8HI_UV8HI)
 OB_DEF_VAR (s390_vec_mulo_s16,          s390_vmoh,          0,                  0,                  BT_OV_V4SI_V8HI_V8HI)
 OB_DEF_VAR (s390_vec_mulo_u32,          s390_vmlof,         0,                  0,                  BT_OV_UV2DI_UV4SI_UV4SI)
 OB_DEF_VAR (s390_vec_mulo_s32,          s390_vmof,          0,                  0,                  BT_OV_V2DI_V4SI_V4SI)
+OB_DEF_VAR (s390_vec_mulo_u64,          s390_vmlog,         B_VXE3,             0,                  BT_OV_UV1TI_UV2DI_UV2DI)
+OB_DEF_VAR (s390_vec_mulo_s64,          s390_vmog,          B_VXE3,             0,                  BT_OV_V1TI_V2DI_V2DI)
 
 B_DEF      (s390_vmlob,                 vec_widen_umult_odd_v16qi,0,            B_VX,               0,                  BT_FN_UV8HI_UV16QI_UV16QI)
 B_DEF      (s390_vmob,                  vec_widen_smult_odd_v16qi,0,            B_VX,               0,                  BT_FN_V8HI_V16QI_V16QI)
@@ -1883,6 +2077,8 @@ B_DEF      (s390_vmloh,                 vec_widen_umult_odd_v8hi,0,
 B_DEF      (s390_vmoh,                  vec_widen_smult_odd_v8hi,0,             B_VX,               0,                  BT_FN_V4SI_V8HI_V8HI)
 B_DEF      (s390_vmlof,                 vec_widen_umult_odd_v4si,0,             B_VX,               0,                  BT_FN_UV2DI_UV4SI_UV4SI)
 B_DEF      (s390_vmof,                  vec_widen_smult_odd_v4si,0,             B_VX,               0,                  BT_FN_V2DI_V4SI_V4SI)
+B_DEF      (s390_vmlog,                 vec_widen_umult_odd_v2di,0,             B_VXE3,             0,                  BT_FN_UINT128_UV2DI_UV2DI)
+B_DEF      (s390_vmog,                  vec_widen_smult_odd_v2di,0,             B_VXE3,             0,                  BT_FN_INT128_V2DI_V2DI)
 
 OB_DEF     (s390_vec_nor,               s390_vec_nor_b8,    s390_vec_nor_dbl_c, B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
 OB_DEF_VAR (s390_vec_nor_b8,            s390_vno,           0,                  0,                  BT_OV_BV16QI_BV16QI_BV16QI)
@@ -1913,6 +2109,9 @@ OB_DEF_VAR (s390_vec_nor_s64_c,         s390_vno,           B_DEP,
 OB_DEF_VAR (s390_vec_nor_u64_a,         s390_vno,           B_DEP,              0,                  BT_OV_UV2DI_BV2DI_UV2DI)
 OB_DEF_VAR (s390_vec_nor_u64_b,         s390_vno,           0,                  0,                  BT_OV_UV2DI_UV2DI_UV2DI)
 OB_DEF_VAR (s390_vec_nor_u64_c,         s390_vno,           B_DEP,              0,                  BT_OV_UV2DI_UV2DI_BV2DI)
+OB_DEF_VAR (s390_vec_nor_b128,          s390_vno,           0,                  0,                  BT_OV_BV1TI_BV1TI_BV1TI)
+OB_DEF_VAR (s390_vec_nor_s128,          s390_vno,           0,                  0,                  BT_OV_V1TI_V1TI_V1TI)
+OB_DEF_VAR (s390_vec_nor_u128,          s390_vno,           0,                  0,                  BT_OV_UV1TI_UV1TI_UV1TI)
 OB_DEF_VAR (s390_vec_nor_flt_a,         s390_vno,           B_VXE | B_DEP,      0,                  BT_OV_V4SF_BV4SI_V4SF)
 OB_DEF_VAR (s390_vec_nor_flt_b,         s390_vno,           B_VXE,              0,                  BT_OV_V4SF_V4SF_V4SF)
 OB_DEF_VAR (s390_vec_nor_flt_c,         s390_vno,           B_VXE | B_DEP,      0,                  BT_OV_V4SF_V4SF_BV4SI)
@@ -1951,6 +2150,9 @@ OB_DEF_VAR (s390_vec_or_s64_c,          s390_vo,            B_DEP,
 OB_DEF_VAR (s390_vec_or_u64_a,          s390_vo,            B_DEP,              0,                  BT_OV_UV2DI_BV2DI_UV2DI)
 OB_DEF_VAR (s390_vec_or_u64_b,          s390_vo,            0,                  0,                  BT_OV_UV2DI_UV2DI_UV2DI)
 OB_DEF_VAR (s390_vec_or_u64_c,          s390_vo,            B_DEP,              0,                  BT_OV_UV2DI_UV2DI_BV2DI)
+OB_DEF_VAR (s390_vec_or_b128,           s390_vo,            0,                  0,                  BT_OV_BV1TI_BV1TI_BV1TI)
+OB_DEF_VAR (s390_vec_or_s128,           s390_vo,            0,                  0,                  BT_OV_V1TI_V1TI_V1TI)
+OB_DEF_VAR (s390_vec_or_u128,           s390_vo,            0,                  0,                  BT_OV_UV1TI_UV1TI_UV1TI)
 OB_DEF_VAR (s390_vec_or_flt_a,          s390_vo,            B_VXE | B_DEP,      0,                  BT_OV_V4SF_BV4SI_V4SF)
 OB_DEF_VAR (s390_vec_or_flt_b,          s390_vo,            B_VXE,              0,                  BT_OV_V4SF_V4SF_V4SF)
 OB_DEF_VAR (s390_vec_or_flt_c,          s390_vo,            B_VXE | B_DEP,      0,                  BT_OV_V4SF_V4SF_BV4SI)
@@ -2020,7 +2222,7 @@ B_DEF      (s390_verimh,                verimv8hi,          0,
 B_DEF      (s390_verimf,                verimv4si,          0,                  B_VX,               O4_U8,              BT_FN_UV4SI_UV4SI_UV4SI_UV4SI_INT)
 B_DEF      (s390_verimg,                verimv2di,          0,                  B_VX,               O4_U8,              BT_FN_UV2DI_UV2DI_UV2DI_UV2DI_INT)
 
-OB_DEF     (s390_vec_sll,               s390_vec_sll_u8q,   s390_vec_sll_b64s,  B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
+OB_DEF     (s390_vec_sll,               s390_vec_sll_u8q,   s390_vec_sll_s128,  B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
 OB_DEF_VAR (s390_vec_sll_u8q,           s390_vsl,           0,                  0,                  BT_OV_UV16QI_UV16QI_UV16QI)
 OB_DEF_VAR (s390_vec_sll_u8h,           s390_vsl,           B_DEP,              0,                  BT_OV_UV16QI_UV16QI_UV8HI)
 OB_DEF_VAR (s390_vec_sll_u8s,           s390_vsl,           B_DEP,              0,                  BT_OV_UV16QI_UV16QI_UV4SI)
@@ -2057,46 +2259,64 @@ OB_DEF_VAR (s390_vec_sll_s64s,          s390_vsl,           B_DEP,
 OB_DEF_VAR (s390_vec_sll_b64q,          s390_vsl,           B_DEP,              0,                  BT_OV_BV2DI_BV2DI_UV16QI)
 OB_DEF_VAR (s390_vec_sll_b64h,          s390_vsl,           B_DEP,              0,                  BT_OV_BV2DI_BV2DI_UV8HI)
 OB_DEF_VAR (s390_vec_sll_b64s,          s390_vsl,           B_DEP,              0,                  BT_OV_BV2DI_BV2DI_UV4SI)
+OB_DEF_VAR (s390_vec_sll_u128,          s390_vsl,           0,                  0,                  BT_OV_UV1TI_UV1TI_UV16QI)
+OB_DEF_VAR (s390_vec_sll_s128,          s390_vsl,           0,                  0,                  BT_OV_V1TI_V1TI_UV16QI)
 
 B_DEF      (s390_vsl,                   vec_sllv16qiv16qi,  0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI)
 
-OB_DEF     (s390_vec_slb,               s390_vec_slb_u8_u8, s390_vec_slb_dbl_s64,B_VX,              BT_FN_OV4SI_OV4SI_OV4SI)
+OB_DEF     (s390_vec_slb,               s390_vec_slb_u8_s8, s390_vec_slb_dbl_u8,B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
+OB_DEF_VAR (s390_vec_slb_u8_s8,         s390_vslb,          B_DEP,              0,                  BT_OV_UV16QI_UV16QI_V16QI)
+OB_DEF_VAR (s390_vec_slb_s8_s8,         s390_vslb,          B_DEP,              0,                  BT_OV_V16QI_V16QI_V16QI)
+OB_DEF_VAR (s390_vec_slb_u16_u16,       s390_vslb,          B_DEP,              0,                  BT_OV_UV8HI_UV8HI_UV8HI)
+OB_DEF_VAR (s390_vec_slb_u16_s16,       s390_vslb,          B_DEP,              0,                  BT_OV_UV8HI_UV8HI_V8HI)
+OB_DEF_VAR (s390_vec_slb_s16_u16,       s390_vslb,          B_DEP,              0,                  BT_OV_V8HI_V8HI_UV8HI)
+OB_DEF_VAR (s390_vec_slb_s16_s16,       s390_vslb,          B_DEP,              0,                  BT_OV_V8HI_V8HI_V8HI)
+OB_DEF_VAR (s390_vec_slb_u32_u32,       s390_vslb,          B_DEP,              0,                  BT_OV_UV4SI_UV4SI_UV4SI)
+OB_DEF_VAR (s390_vec_slb_u32_s32,       s390_vslb,          B_DEP,              0,                  BT_OV_UV4SI_UV4SI_V4SI)
+OB_DEF_VAR (s390_vec_slb_s32_u32,       s390_vslb,          B_DEP,              0,                  BT_OV_V4SI_V4SI_UV4SI)
+OB_DEF_VAR (s390_vec_slb_s32_s32,       s390_vslb,          B_DEP,              0,                  BT_OV_V4SI_V4SI_V4SI)
+OB_DEF_VAR (s390_vec_slb_u64_u64,       s390_vslb,          B_DEP,              0,                  BT_OV_UV2DI_UV2DI_UV2DI)
+OB_DEF_VAR (s390_vec_slb_u64_s64,       s390_vslb,          B_DEP,              0,                  BT_OV_UV2DI_UV2DI_V2DI)
+OB_DEF_VAR (s390_vec_slb_s64_u64,       s390_vslb,          B_DEP,              0,                  BT_OV_V2DI_V2DI_UV2DI)
+OB_DEF_VAR (s390_vec_slb_s64_s64,       s390_vslb,          B_DEP,              0,                  BT_OV_V2DI_V2DI_V2DI)
+OB_DEF_VAR (s390_vec_slb_u128_u128,     s390_vslb,          B_DEP,              0,                  BT_OV_UV1TI_UV1TI_UV1TI)
+OB_DEF_VAR (s390_vec_slb_u128_s128,     s390_vslb,          B_DEP,              0,                  BT_OV_UV1TI_UV1TI_V1TI)
+OB_DEF_VAR (s390_vec_slb_s128_u128,     s390_vslb,          B_DEP,              0,                  BT_OV_V1TI_V1TI_UV1TI)
+OB_DEF_VAR (s390_vec_slb_s128_s128,     s390_vslb,          B_DEP,              0,                  BT_OV_V1TI_V1TI_V1TI)
+OB_DEF_VAR (s390_vec_slb_flt_u64,       s390_vslb,          B_DEP | B_VXE,      0,                  BT_OV_V4SF_V4SF_UV4SI)
+OB_DEF_VAR (s390_vec_slb_dbl_u64,       s390_vslb,          B_DEP,              0,                  BT_OV_V2DF_V2DF_UV2DI)
+OB_DEF_VAR (s390_vec_slb_flt_s64,       s390_vslb,          B_DEP | B_VXE,      0,                  BT_OV_V4SF_V4SF_V4SI)
+OB_DEF_VAR (s390_vec_slb_dbl_s64,       s390_vslb,          B_DEP,              0,                  BT_OV_V2DF_V2DF_V2DI)
 OB_DEF_VAR (s390_vec_slb_u8_u8,         s390_vslb,          0,                  0,                  BT_OV_UV16QI_UV16QI_UV16QI)
-OB_DEF_VAR (s390_vec_slb_u8_s8,         s390_vslb,          0,                  0,                  BT_OV_UV16QI_UV16QI_V16QI)
 OB_DEF_VAR (s390_vec_slb_s8_u8,         s390_vslb,          0,                  0,                  BT_OV_V16QI_V16QI_UV16QI)
-OB_DEF_VAR (s390_vec_slb_s8_s8,         s390_vslb,          0,                  0,                  BT_OV_V16QI_V16QI_V16QI)
-OB_DEF_VAR (s390_vec_slb_u16_u16,       s390_vslb,          0,                  0,                  BT_OV_UV8HI_UV8HI_UV8HI)
-OB_DEF_VAR (s390_vec_slb_u16_s16,       s390_vslb,          0,                  0,                  BT_OV_UV8HI_UV8HI_V8HI)
-OB_DEF_VAR (s390_vec_slb_s16_u16,       s390_vslb,          0,                  0,                  BT_OV_V8HI_V8HI_UV8HI)
-OB_DEF_VAR (s390_vec_slb_s16_s16,       s390_vslb,          0,                  0,                  BT_OV_V8HI_V8HI_V8HI)
-OB_DEF_VAR (s390_vec_slb_u32_u32,       s390_vslb,          0,                  0,                  BT_OV_UV4SI_UV4SI_UV4SI)
-OB_DEF_VAR (s390_vec_slb_u32_s32,       s390_vslb,          0,                  0,                  BT_OV_UV4SI_UV4SI_V4SI)
-OB_DEF_VAR (s390_vec_slb_s32_u32,       s390_vslb,          0,                  0,                  BT_OV_V4SI_V4SI_UV4SI)
-OB_DEF_VAR (s390_vec_slb_s32_s32,       s390_vslb,          0,                  0,                  BT_OV_V4SI_V4SI_V4SI)
-OB_DEF_VAR (s390_vec_slb_u64_u64,       s390_vslb,          0,                  0,                  BT_OV_UV2DI_UV2DI_UV2DI)
-OB_DEF_VAR (s390_vec_slb_u64_s64,       s390_vslb,          0,                  0,                  BT_OV_UV2DI_UV2DI_V2DI)
-OB_DEF_VAR (s390_vec_slb_s64_u64,       s390_vslb,          0,                  0,                  BT_OV_V2DI_V2DI_UV2DI)
-OB_DEF_VAR (s390_vec_slb_s64_s64,       s390_vslb,          0,                  0,                  BT_OV_V2DI_V2DI_V2DI)
-OB_DEF_VAR (s390_vec_slb_flt_u64,       s390_vslb,          B_VXE,              0,                  BT_OV_V4SF_V4SF_UV4SI)
-OB_DEF_VAR (s390_vec_slb_dbl_u64,       s390_vslb,          0,                  0,                  BT_OV_V2DF_V2DF_UV2DI)
-OB_DEF_VAR (s390_vec_slb_flt_s64,       s390_vslb,          B_VXE,              0,                  BT_OV_V4SF_V4SF_V4SI)
-OB_DEF_VAR (s390_vec_slb_dbl_s64,       s390_vslb,          0,                  0,                  BT_OV_V2DF_V2DF_V2DI)
+OB_DEF_VAR (s390_vec_slb_u16_u8,        s390_vslb,          0,                  0,                  BT_OV_UV8HI_UV8HI_UV16QI)
+OB_DEF_VAR (s390_vec_slb_s16_u8,        s390_vslb,          0,                  0,                  BT_OV_V8HI_V8HI_UV16QI)
+OB_DEF_VAR (s390_vec_slb_u32_u8,        s390_vslb,          0,                  0,                  BT_OV_UV4SI_UV4SI_UV16QI)
+OB_DEF_VAR (s390_vec_slb_s32_u8,        s390_vslb,          0,                  0,                  BT_OV_V4SI_V4SI_UV16QI)
+OB_DEF_VAR (s390_vec_slb_u64_u8,        s390_vslb,          0,                  0,                  BT_OV_UV2DI_UV2DI_UV16QI)
+OB_DEF_VAR (s390_vec_slb_s64_u8,        s390_vslb,          0,                  0,                  BT_OV_V2DI_V2DI_UV16QI)
+OB_DEF_VAR (s390_vec_slb_u128_u8,       s390_vslb,          0,                  0,                  BT_OV_UV1TI_UV1TI_UV16QI)
+OB_DEF_VAR (s390_vec_slb_s128_u8,       s390_vslb,          0,                  0,                  BT_OV_V1TI_V1TI_UV16QI)
+OB_DEF_VAR (s390_vec_slb_flt_u8,        s390_vslb,          B_VXE,              0,                  BT_OV_V4SF_V4SF_UV16QI)
+OB_DEF_VAR (s390_vec_slb_dbl_u8,        s390_vslb,          0,                  0,                  BT_OV_V2DF_V2DF_UV16QI)
 
 B_DEF      (s390_vslb,                  vec_slbv16qi,       0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI)
 
 OB_DEF     (s390_vec_sld,               s390_vec_sld_b8,    s390_vec_sld_dbl,   B_VX,               BT_FN_OV4SI_OV4SI_OV4SI_ULONGLONG)
-OB_DEF_VAR (s390_vec_sld_b8,            s390_vsldb,         0,                  O3_U4,              BT_OV_BV16QI_BV16QI_BV16QI_ULONGLONG)
+OB_DEF_VAR (s390_vec_sld_b8,            s390_vsldb,         B_DEP,              O3_U4,              BT_OV_BV16QI_BV16QI_BV16QI_ULONGLONG)
 OB_DEF_VAR (s390_vec_sld_s8,            s390_vsldb,         0,                  O3_U4,              BT_OV_V16QI_V16QI_V16QI_ULONGLONG)
 OB_DEF_VAR (s390_vec_sld_u8,            s390_vsldb,         0,                  O3_U4,              BT_OV_UV16QI_UV16QI_UV16QI_ULONGLONG)
-OB_DEF_VAR (s390_vec_sld_b16,           s390_vsldb,         0,                  O3_U4,              BT_OV_BV8HI_BV8HI_BV8HI_ULONGLONG)
+OB_DEF_VAR (s390_vec_sld_b16,           s390_vsldb,         B_DEP,              O3_U4,              BT_OV_BV8HI_BV8HI_BV8HI_ULONGLONG)
 OB_DEF_VAR (s390_vec_sld_s16,           s390_vsldb,         0,                  O3_U4,              BT_OV_V8HI_V8HI_V8HI_ULONGLONG)
 OB_DEF_VAR (s390_vec_sld_u16,           s390_vsldb,         0,                  O3_U4,              BT_OV_UV8HI_UV8HI_UV8HI_ULONGLONG)
-OB_DEF_VAR (s390_vec_sld_b32,           s390_vsldb,         0,                  O3_U4,              BT_OV_BV4SI_BV4SI_BV4SI_ULONGLONG)
+OB_DEF_VAR (s390_vec_sld_b32,           s390_vsldb,         B_DEP,              O3_U4,              BT_OV_BV4SI_BV4SI_BV4SI_ULONGLONG)
 OB_DEF_VAR (s390_vec_sld_s32,           s390_vsldb,         0,                  O3_U4,              BT_OV_V4SI_V4SI_V4SI_ULONGLONG)
 OB_DEF_VAR (s390_vec_sld_u32,           s390_vsldb,         0,                  O3_U4,              BT_OV_UV4SI_UV4SI_UV4SI_ULONGLONG)
-OB_DEF_VAR (s390_vec_sld_b64,           s390_vsldb,         0,                  O3_U4,              BT_OV_BV2DI_BV2DI_BV2DI_ULONGLONG)
+OB_DEF_VAR (s390_vec_sld_b64,           s390_vsldb,         B_DEP,              O3_U4,              BT_OV_BV2DI_BV2DI_BV2DI_ULONGLONG)
 OB_DEF_VAR (s390_vec_sld_s64,           s390_vsldb,         0,                  O3_U4,              BT_OV_V2DI_V2DI_V2DI_ULONGLONG)
 OB_DEF_VAR (s390_vec_sld_u64,           s390_vsldb,         0,                  O3_U4,              BT_OV_UV2DI_UV2DI_UV2DI_ULONGLONG)
+OB_DEF_VAR (s390_vec_sld_s128,          s390_vsldb,         0,                  O3_U4,              BT_OV_V1TI_V1TI_V1TI_ULONGLONG)
+OB_DEF_VAR (s390_vec_sld_u128,          s390_vsldb,         0,                  O3_U4,              BT_OV_UV1TI_UV1TI_UV1TI_ULONGLONG)
 OB_DEF_VAR (s390_vec_sld_flt,           s390_vsldb,         B_VXE,              O3_U4,              BT_OV_V4SF_V4SF_V4SF_ULONGLONG)
 OB_DEF_VAR (s390_vec_sld_dbl,           s390_vsldb,         0,                  O3_U4,              BT_OV_V2DF_V2DF_V2DF_ULONGLONG)
 
@@ -2111,11 +2331,13 @@ OB_DEF_VAR (s390_vec_sldw_s32,          s390_vsldw,         0,
 OB_DEF_VAR (s390_vec_sldw_u32,          s390_vsldw,         0,                  O3_U4,              BT_OV_UV4SI_UV4SI_UV4SI_INT)
 OB_DEF_VAR (s390_vec_sldw_s64,          s390_vsldw,         0,                  O3_U4,              BT_OV_V2DI_V2DI_V2DI_INT)
 OB_DEF_VAR (s390_vec_sldw_u64,          s390_vsldw,         0,                  O3_U4,              BT_OV_UV2DI_UV2DI_UV2DI_INT)
+OB_DEF_VAR (s390_vec_sldw_s128,         s390_vsldw,         0,                  O3_U4,              BT_OV_V1TI_V1TI_V1TI_INT)
+OB_DEF_VAR (s390_vec_sldw_u128,         s390_vsldw,         0,                  O3_U4,              BT_OV_UV1TI_UV1TI_UV1TI_INT)
 OB_DEF_VAR (s390_vec_sldw_dbl,          s390_vsldw,         B_DEP,              O3_U4,              BT_OV_V2DF_V2DF_V2DF_INT)
 
 B_DEF      (s390_vsldw,                 vec_sldwv16qi,      0,                  B_VX,               O3_U4,              BT_FN_UV16QI_UV16QI_UV16QI_INT)
 
-OB_DEF     (s390_vec_sral,              s390_vec_sral_u8q,  s390_vec_sral_b64s, B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
+OB_DEF     (s390_vec_sral,              s390_vec_sral_u8q,  s390_vec_sral_s128, B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
 OB_DEF_VAR (s390_vec_sral_u8q,          s390_vsra,          0,                  0,                  BT_OV_UV16QI_UV16QI_UV16QI)
 OB_DEF_VAR (s390_vec_sral_u8h,          s390_vsra,          B_DEP,              0,                  BT_OV_UV16QI_UV16QI_UV8HI)
 OB_DEF_VAR (s390_vec_sral_u8s,          s390_vsra,          B_DEP,              0,                  BT_OV_UV16QI_UV16QI_UV4SI)
@@ -2152,34 +2374,50 @@ OB_DEF_VAR (s390_vec_sral_s64s,         s390_vsra,          B_DEP,
 OB_DEF_VAR (s390_vec_sral_b64q,         s390_vsra,          B_DEP,              0,                  BT_OV_BV2DI_BV2DI_UV16QI)
 OB_DEF_VAR (s390_vec_sral_b64h,         s390_vsra,          B_DEP,              0,                  BT_OV_BV2DI_BV2DI_UV8HI)
 OB_DEF_VAR (s390_vec_sral_b64s,         s390_vsra,          B_DEP,              0,                  BT_OV_BV2DI_BV2DI_UV4SI)
+OB_DEF_VAR (s390_vec_sral_u128,         s390_vsra,          0,                  0,                  BT_OV_UV1TI_UV1TI_UV16QI)
+OB_DEF_VAR (s390_vec_sral_s128,         s390_vsra,          0,                  0,                  BT_OV_V1TI_V1TI_UV16QI)
 
 B_DEF      (s390_vsra,                  vec_sralv16qiv16qi, 0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI)
 
-OB_DEF     (s390_vec_srab,              s390_vec_srab_u8_u8,s390_vec_srab_dbl_s64,B_VX,             BT_FN_OV4SI_OV4SI_OV4SI)
+OB_DEF     (s390_vec_srab,              s390_vec_srab_u8_s8,s390_vec_srab_dbl_u8,B_VX,              BT_FN_OV4SI_OV4SI_OV4SI)
+OB_DEF_VAR (s390_vec_srab_u8_s8,        s390_vsrab,         B_DEP,              0,                  BT_OV_UV16QI_UV16QI_V16QI)
+OB_DEF_VAR (s390_vec_srab_s8_s8,        s390_vsrab,         B_DEP,              0,                  BT_OV_V16QI_V16QI_V16QI)
+OB_DEF_VAR (s390_vec_srab_u16_u16,      s390_vsrab,         B_DEP,              0,                  BT_OV_UV8HI_UV8HI_UV8HI)
+OB_DEF_VAR (s390_vec_srab_u16_s16,      s390_vsrab,         B_DEP,              0,                  BT_OV_UV8HI_UV8HI_V8HI)
+OB_DEF_VAR (s390_vec_srab_s16_u16,      s390_vsrab,         B_DEP,              0,                  BT_OV_V8HI_V8HI_UV8HI)
+OB_DEF_VAR (s390_vec_srab_s16_s16,      s390_vsrab,         B_DEP,              0,                  BT_OV_V8HI_V8HI_V8HI)
+OB_DEF_VAR (s390_vec_srab_u32_u32,      s390_vsrab,         B_DEP,              0,                  BT_OV_UV4SI_UV4SI_UV4SI)
+OB_DEF_VAR (s390_vec_srab_u32_s32,      s390_vsrab,         B_DEP,              0,                  BT_OV_UV4SI_UV4SI_V4SI)
+OB_DEF_VAR (s390_vec_srab_s32_u32,      s390_vsrab,         B_DEP,              0,                  BT_OV_V4SI_V4SI_UV4SI)
+OB_DEF_VAR (s390_vec_srab_s32_s32,      s390_vsrab,         B_DEP,              0,                  BT_OV_V4SI_V4SI_V4SI)
+OB_DEF_VAR (s390_vec_srab_u64_u64,      s390_vsrab,         B_DEP,              0,                  BT_OV_UV2DI_UV2DI_UV2DI)
+OB_DEF_VAR (s390_vec_srab_u64_s64,      s390_vsrab,         B_DEP,              0,                  BT_OV_UV2DI_UV2DI_V2DI)
+OB_DEF_VAR (s390_vec_srab_s64_u64,      s390_vsrab,         B_DEP,              0,                  BT_OV_V2DI_V2DI_UV2DI)
+OB_DEF_VAR (s390_vec_srab_s64_s64,      s390_vsrab,         B_DEP,              0,                  BT_OV_V2DI_V2DI_V2DI)
+OB_DEF_VAR (s390_vec_srab_u128_u128,    s390_vsrab,         B_DEP,              0,                  BT_OV_UV1TI_UV1TI_UV1TI)
+OB_DEF_VAR (s390_vec_srab_u128_s128,    s390_vsrab,         B_DEP,              0,                  BT_OV_UV1TI_UV1TI_V1TI)
+OB_DEF_VAR (s390_vec_srab_s128_u128,    s390_vsrab,         B_DEP,              0,                  BT_OV_V1TI_V1TI_UV1TI)
+OB_DEF_VAR (s390_vec_srab_s128_s128,    s390_vsrab,         B_DEP,              0,                  BT_OV_V1TI_V1TI_V1TI)
+OB_DEF_VAR (s390_vec_srab_flt_u64,      s390_vsrab,         B_DEP | B_VXE,      0,                  BT_OV_V4SF_V4SF_UV4SI)
+OB_DEF_VAR (s390_vec_srab_dbl_u64,      s390_vsrab,         B_DEP,              0,                  BT_OV_V2DF_V2DF_UV2DI)
+OB_DEF_VAR (s390_vec_srab_flt_s64,      s390_vsrab,         B_DEP | B_VXE,      0,                  BT_OV_V4SF_V4SF_V4SI)
+OB_DEF_VAR (s390_vec_srab_dbl_s64,      s390_vsrab,         B_DEP,              0,                  BT_OV_V2DF_V2DF_V2DI)
 OB_DEF_VAR (s390_vec_srab_u8_u8,        s390_vsrab,         0,                  0,                  BT_OV_UV16QI_UV16QI_UV16QI)
-OB_DEF_VAR (s390_vec_srab_u8_s8,        s390_vsrab,         0,                  0,                  BT_OV_UV16QI_UV16QI_V16QI)
 OB_DEF_VAR (s390_vec_srab_s8_u8,        s390_vsrab,         0,                  0,                  BT_OV_V16QI_V16QI_UV16QI)
-OB_DEF_VAR (s390_vec_srab_s8_s8,        s390_vsrab,         0,                  0,                  BT_OV_V16QI_V16QI_V16QI)
-OB_DEF_VAR (s390_vec_srab_u16_u16,      s390_vsrab,         0,                  0,                  BT_OV_UV8HI_UV8HI_UV8HI)
-OB_DEF_VAR (s390_vec_srab_u16_s16,      s390_vsrab,         0,                  0,                  BT_OV_UV8HI_UV8HI_V8HI)
-OB_DEF_VAR (s390_vec_srab_s16_u16,      s390_vsrab,         0,                  0,                  BT_OV_V8HI_V8HI_UV8HI)
-OB_DEF_VAR (s390_vec_srab_s16_s16,      s390_vsrab,         0,                  0,                  BT_OV_V8HI_V8HI_V8HI)
-OB_DEF_VAR (s390_vec_srab_u32_u32,      s390_vsrab,         0,                  0,                  BT_OV_UV4SI_UV4SI_UV4SI)
-OB_DEF_VAR (s390_vec_srab_u32_s32,      s390_vsrab,         0,                  0,                  BT_OV_UV4SI_UV4SI_V4SI)
-OB_DEF_VAR (s390_vec_srab_s32_u32,      s390_vsrab,         0,                  0,                  BT_OV_V4SI_V4SI_UV4SI)
-OB_DEF_VAR (s390_vec_srab_s32_s32,      s390_vsrab,         0,                  0,                  BT_OV_V4SI_V4SI_V4SI)
-OB_DEF_VAR (s390_vec_srab_u64_u64,      s390_vsrab,         0,                  0,                  BT_OV_UV2DI_UV2DI_UV2DI)
-OB_DEF_VAR (s390_vec_srab_u64_s64,      s390_vsrab,         0,                  0,                  BT_OV_UV2DI_UV2DI_V2DI)
-OB_DEF_VAR (s390_vec_srab_s64_u64,      s390_vsrab,         0,                  0,                  BT_OV_V2DI_V2DI_UV2DI)
-OB_DEF_VAR (s390_vec_srab_s64_s64,      s390_vsrab,         0,                  0,                  BT_OV_V2DI_V2DI_V2DI)
-OB_DEF_VAR (s390_vec_srab_flt_u64,      s390_vsrab,         B_VXE,              0,                  BT_OV_V4SF_V4SF_UV4SI)
-OB_DEF_VAR (s390_vec_srab_dbl_u64,      s390_vsrab,         0,                  0,                  BT_OV_V2DF_V2DF_UV2DI)
-OB_DEF_VAR (s390_vec_srab_flt_s64,      s390_vsrab,         B_VXE,              0,                  BT_OV_V4SF_V4SF_V4SI)
-OB_DEF_VAR (s390_vec_srab_dbl_s64,      s390_vsrab,         0,                  0,                  BT_OV_V2DF_V2DF_V2DI)
+OB_DEF_VAR (s390_vec_srab_u16_u8,       s390_vsrab,         0,                  0,                  BT_OV_UV8HI_UV8HI_UV16QI)
+OB_DEF_VAR (s390_vec_srab_s16_u8,       s390_vsrab,         0,                  0,                  BT_OV_V8HI_V8HI_UV16QI)
+OB_DEF_VAR (s390_vec_srab_u32_u8,       s390_vsrab,         0,                  0,                  BT_OV_UV4SI_UV4SI_UV16QI)
+OB_DEF_VAR (s390_vec_srab_s32_u8,       s390_vsrab,         0,                  0,                  BT_OV_V4SI_V4SI_UV16QI)
+OB_DEF_VAR (s390_vec_srab_u64_u8,       s390_vsrab,         0,                  0,                  BT_OV_UV2DI_UV2DI_UV16QI)
+OB_DEF_VAR (s390_vec_srab_s64_u8,       s390_vsrab,         0,                  0,                  BT_OV_V2DI_V2DI_UV16QI)
+OB_DEF_VAR (s390_vec_srab_u128_u8,      s390_vsrab,         0,                  0,                  BT_OV_UV1TI_UV1TI_UV16QI)
+OB_DEF_VAR (s390_vec_srab_s128_u8,      s390_vsrab,         0,                  0,                  BT_OV_V1TI_V1TI_UV16QI)
+OB_DEF_VAR (s390_vec_srab_flt_u8,       s390_vsrab,         B_VXE,              0,                  BT_OV_V4SF_V4SF_UV16QI)
+OB_DEF_VAR (s390_vec_srab_dbl_u8,       s390_vsrab,         0,                  0,                  BT_OV_V2DF_V2DF_UV16QI)
 
 B_DEF      (s390_vsrab,                 vec_srabv16qi,      0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI)
 
-OB_DEF     (s390_vec_srl,               s390_vec_srl_u8q,   s390_vec_srl_b64s,  B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
+OB_DEF     (s390_vec_srl,               s390_vec_srl_u8q,   s390_vec_srl_s128,  B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
 OB_DEF_VAR (s390_vec_srl_u8q,           s390_vsrl,          0,                  0,                  BT_OV_UV16QI_UV16QI_UV16QI)
 OB_DEF_VAR (s390_vec_srl_u8h,           s390_vsrl,          B_DEP,              0,                  BT_OV_UV16QI_UV16QI_UV8HI)
 OB_DEF_VAR (s390_vec_srl_u8s,           s390_vsrl,          B_DEP,              0,                  BT_OV_UV16QI_UV16QI_UV4SI)
@@ -2216,53 +2454,74 @@ OB_DEF_VAR (s390_vec_srl_s64s,          s390_vsrl,          B_DEP,
 OB_DEF_VAR (s390_vec_srl_b64q,          s390_vsrl,          B_DEP,              0,                  BT_OV_BV2DI_BV2DI_UV16QI)
 OB_DEF_VAR (s390_vec_srl_b64h,          s390_vsrl,          B_DEP,              0,                  BT_OV_BV2DI_BV2DI_UV8HI)
 OB_DEF_VAR (s390_vec_srl_b64s,          s390_vsrl,          B_DEP,              0,                  BT_OV_BV2DI_BV2DI_UV4SI)
+OB_DEF_VAR (s390_vec_srl_u128,          s390_vsrl,          0,                  0,                  BT_OV_UV1TI_UV1TI_UV16QI)
+OB_DEF_VAR (s390_vec_srl_s128,          s390_vsrl,          0,                  0,                  BT_OV_V1TI_V1TI_UV16QI)
 
 B_DEF      (s390_vsrl,                  vec_srlv16qiv16qi,  0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI)
 
-OB_DEF     (s390_vec_srb,               s390_vec_srb_u8_u8, s390_vec_srb_dbl_s64,B_VX,              BT_FN_OV4SI_OV4SI_OV4SI)
+OB_DEF     (s390_vec_srb,               s390_vec_srb_u8_s8, s390_vec_srb_dbl_u8,B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
+OB_DEF_VAR (s390_vec_srb_u8_s8,         s390_vsrlb,         B_DEP,              0,                  BT_OV_UV16QI_UV16QI_V16QI)
+OB_DEF_VAR (s390_vec_srb_s8_s8,         s390_vsrlb,         B_DEP,              0,                  BT_OV_V16QI_V16QI_V16QI)
+OB_DEF_VAR (s390_vec_srb_u16_u16,       s390_vsrlb,         B_DEP,              0,                  BT_OV_UV8HI_UV8HI_UV8HI)
+OB_DEF_VAR (s390_vec_srb_u16_s16,       s390_vsrlb,         B_DEP,              0,                  BT_OV_UV8HI_UV8HI_V8HI)
+OB_DEF_VAR (s390_vec_srb_s16_u16,       s390_vsrlb,         B_DEP,              0,                  BT_OV_V8HI_V8HI_UV8HI)
+OB_DEF_VAR (s390_vec_srb_s16_s16,       s390_vsrlb,         B_DEP,              0,                  BT_OV_V8HI_V8HI_V8HI)
+OB_DEF_VAR (s390_vec_srb_u32_u32,       s390_vsrlb,         B_DEP,              0,                  BT_OV_UV4SI_UV4SI_UV4SI)
+OB_DEF_VAR (s390_vec_srb_u32_s32,       s390_vsrlb,         B_DEP,              0,                  BT_OV_UV4SI_UV4SI_V4SI)
+OB_DEF_VAR (s390_vec_srb_s32_u32,       s390_vsrlb,         B_DEP,              0,                  BT_OV_V4SI_V4SI_UV4SI)
+OB_DEF_VAR (s390_vec_srb_s32_s32,       s390_vsrlb,         B_DEP,              0,                  BT_OV_V4SI_V4SI_V4SI)
+OB_DEF_VAR (s390_vec_srb_u64_u64,       s390_vsrlb,         B_DEP,              0,                  BT_OV_UV2DI_UV2DI_UV2DI)
+OB_DEF_VAR (s390_vec_srb_u64_s64,       s390_vsrlb,         B_DEP,              0,                  BT_OV_UV2DI_UV2DI_V2DI)
+OB_DEF_VAR (s390_vec_srb_s64_u64,       s390_vsrlb,         B_DEP,              0,                  BT_OV_V2DI_V2DI_UV2DI)
+OB_DEF_VAR (s390_vec_srb_s64_s64,       s390_vsrlb,         B_DEP,              0,                  BT_OV_V2DI_V2DI_V2DI)
+OB_DEF_VAR (s390_vec_srb_u128_u128,     s390_vsrlb,         B_DEP,              0,                  BT_OV_UV1TI_UV1TI_UV1TI)
+OB_DEF_VAR (s390_vec_srb_u128_s128,     s390_vsrlb,         B_DEP,              0,                  BT_OV_UV1TI_UV1TI_V1TI)
+OB_DEF_VAR (s390_vec_srb_s128_u128,     s390_vsrlb,         B_DEP,              0,                  BT_OV_V1TI_V1TI_UV1TI)
+OB_DEF_VAR (s390_vec_srb_s128_s128,     s390_vsrlb,         B_DEP,              0,                  BT_OV_V1TI_V1TI_V1TI)
+OB_DEF_VAR (s390_vec_srb_flt_u64,       s390_vsrlb,         B_DEP | B_VXE,      0,                  BT_OV_V4SF_V4SF_UV4SI)
+OB_DEF_VAR (s390_vec_srb_dbl_u64,       s390_vsrlb,         B_DEP,              0,                  BT_OV_V2DF_V2DF_UV2DI)
+OB_DEF_VAR (s390_vec_srb_flt_s64,       s390_vsrlb,         B_DEP | B_VXE,      0,                  BT_OV_V4SF_V4SF_V4SI)
+OB_DEF_VAR (s390_vec_srb_dbl_s64,       s390_vsrlb,         B_DEP,              0,                  BT_OV_V2DF_V2DF_V2DI)
 OB_DEF_VAR (s390_vec_srb_u8_u8,         s390_vsrlb,         0,                  0,                  BT_OV_UV16QI_UV16QI_UV16QI)
-OB_DEF_VAR (s390_vec_srb_u8_s8,         s390_vsrlb,         0,                  0,                  BT_OV_UV16QI_UV16QI_V16QI)
 OB_DEF_VAR (s390_vec_srb_s8_u8,         s390_vsrlb,         0,                  0,                  BT_OV_V16QI_V16QI_UV16QI)
-OB_DEF_VAR (s390_vec_srb_s8_s8,         s390_vsrlb,         0,                  0,                  BT_OV_V16QI_V16QI_V16QI)
-OB_DEF_VAR (s390_vec_srb_u16_u16,       s390_vsrlb,         0,                  0,                  BT_OV_UV8HI_UV8HI_UV8HI)
-OB_DEF_VAR (s390_vec_srb_u16_s16,       s390_vsrlb,         0,                  0,                  BT_OV_UV8HI_UV8HI_V8HI)
-OB_DEF_VAR (s390_vec_srb_s16_u16,       s390_vsrlb,         0,                  0,                  BT_OV_V8HI_V8HI_UV8HI)
-OB_DEF_VAR (s390_vec_srb_s16_s16,       s390_vsrlb,         0,                  0,                  BT_OV_V8HI_V8HI_V8HI)
-OB_DEF_VAR (s390_vec_srb_u32_u32,       s390_vsrlb,         0,                  0,                  BT_OV_UV4SI_UV4SI_UV4SI)
-OB_DEF_VAR (s390_vec_srb_u32_s32,       s390_vsrlb,         0,                  0,                  BT_OV_UV4SI_UV4SI_V4SI)
-OB_DEF_VAR (s390_vec_srb_s32_u32,       s390_vsrlb,         0,                  0,                  BT_OV_V4SI_V4SI_UV4SI)
-OB_DEF_VAR (s390_vec_srb_s32_s32,       s390_vsrlb,         0,                  0,                  BT_OV_V4SI_V4SI_V4SI)
-OB_DEF_VAR (s390_vec_srb_u64_u64,       s390_vsrlb,         0,                  0,                  BT_OV_UV2DI_UV2DI_UV2DI)
-OB_DEF_VAR (s390_vec_srb_u64_s64,       s390_vsrlb,         0,                  0,                  BT_OV_UV2DI_UV2DI_V2DI)
-OB_DEF_VAR (s390_vec_srb_s64_u64,       s390_vsrlb,         0,                  0,                  BT_OV_V2DI_V2DI_UV2DI)
-OB_DEF_VAR (s390_vec_srb_s64_s64,       s390_vsrlb,         0,                  0,                  BT_OV_V2DI_V2DI_V2DI)
-OB_DEF_VAR (s390_vec_srb_flt_u64,       s390_vsrlb,         B_VXE,              0,                  BT_OV_V4SF_V4SF_UV4SI)
-OB_DEF_VAR (s390_vec_srb_dbl_u64,       s390_vsrlb,         0,                  0,                  BT_OV_V2DF_V2DF_UV2DI)
-OB_DEF_VAR (s390_vec_srb_flt_s64,       s390_vsrlb,         B_VXE,              0,                  BT_OV_V4SF_V4SF_V4SI)
-OB_DEF_VAR (s390_vec_srb_dbl_s64,       s390_vsrlb,         0,                  0,                  BT_OV_V2DF_V2DF_V2DI)
+OB_DEF_VAR (s390_vec_srb_u16_u8,        s390_vsrlb,         0,                  0,                  BT_OV_UV8HI_UV8HI_UV16QI)
+OB_DEF_VAR (s390_vec_srb_s16_u8,        s390_vsrlb,         0,                  0,                  BT_OV_V8HI_V8HI_UV16QI)
+OB_DEF_VAR (s390_vec_srb_u32_u8,        s390_vsrlb,         0,                  0,                  BT_OV_UV4SI_UV4SI_UV16QI)
+OB_DEF_VAR (s390_vec_srb_s32_u8,        s390_vsrlb,         0,                  0,                  BT_OV_V4SI_V4SI_UV16QI)
+OB_DEF_VAR (s390_vec_srb_u64_u8,        s390_vsrlb,         0,                  0,                  BT_OV_UV2DI_UV2DI_UV16QI)
+OB_DEF_VAR (s390_vec_srb_s64_u8,        s390_vsrlb,         0,                  0,                  BT_OV_V2DI_V2DI_UV16QI)
+OB_DEF_VAR (s390_vec_srb_u128_u8,       s390_vsrlb,         0,                  0,                  BT_OV_UV1TI_UV1TI_UV16QI)
+OB_DEF_VAR (s390_vec_srb_s128_u8,       s390_vsrlb,         0,                  0,                  BT_OV_V1TI_V1TI_UV16QI)
+OB_DEF_VAR (s390_vec_srb_flt_u8,        s390_vsrlb,         B_VXE,              0,                  BT_OV_V4SF_V4SF_UV16QI)
+OB_DEF_VAR (s390_vec_srb_dbl_u8,        s390_vsrlb,         0,                  0,                  BT_OV_V2DF_V2DF_UV16QI)
 
 B_DEF      (s390_vsrlb,                 vec_srbv16qi,       0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI)
 
-OB_DEF     (s390_vec_subc,              s390_vec_subc_u8,   s390_vec_subc_u64,  B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
+OB_DEF     (s390_vec_subc,              s390_vec_subc_u8,   s390_vec_subc_u128, B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
 OB_DEF_VAR (s390_vec_subc_u8,           s390_vscbib,        0,                  0,                  BT_OV_UV16QI_UV16QI_UV16QI)
 OB_DEF_VAR (s390_vec_subc_u16,          s390_vscbih,        0,                  0,                  BT_OV_UV8HI_UV8HI_UV8HI)
 OB_DEF_VAR (s390_vec_subc_u32,          s390_vscbif,        0,                  0,                  BT_OV_UV4SI_UV4SI_UV4SI)
 OB_DEF_VAR (s390_vec_subc_u64,          s390_vscbig,        0,                  0,                  BT_OV_UV2DI_UV2DI_UV2DI)
+OB_DEF_VAR (s390_vec_subc_u128,         s390_vscbiq,        0,                  0,                  BT_OV_UV1TI_UV1TI_UV1TI)
 
 B_DEF      (s390_vscbib,                vscbib_v16qi,       0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI)
 B_DEF      (s390_vscbih,                vscbih_v8hi,        0,                  B_VX,               0,                  BT_FN_UV8HI_UV8HI_UV8HI)
 B_DEF      (s390_vscbif,                vscbif_v4si,        0,                  B_VX,               0,                  BT_FN_UV4SI_UV4SI_UV4SI)
 B_DEF      (s390_vscbig,                vscbig_v2di,        0,                  B_VX,               0,                  BT_FN_UV2DI_UV2DI_UV2DI)
 
+B_DEF      (s390_vec_sube,              vsbiq,              0,                  B_VX,               0,                  BT_FN_UV1TI_UV1TI_UV1TI_UV1TI)
+
+B_DEF      (s390_vec_subec,             vsbcbiq,            0,                  B_VX,               0,                  BT_FN_UV1TI_UV1TI_UV1TI_UV1TI)
+
 /* The builtin definitions requires these to use vector unsigned char.
    But we want the GCC low-level builtins and the insn patterns to
    allow int128_t and TImode.  So we rely on s390_expand_builtin to
    switch modes.  */
 
-B_DEF      (s390_vec_sub_u128,          subti3,             0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI)
-B_DEF      (s390_vec_subc_u128,         vscbiq_ti,          0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI)
-B_DEF      (s390_vec_sube_u128,         vsbiq,              0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI_UV16QI)
-B_DEF      (s390_vec_subec_u128,        vsbcbiq,            0,                  B_VX,               0,                  BT_FN_UV16QI_UV16QI_UV16QI_UV16QI)
+B_DEF      (s390_vec_sub_u128,          subti3,             0,                  B_DEP | B_VX,       0,                  BT_FN_UV16QI_UV16QI_UV16QI)
+B_DEF      (s390_vec_subc_u128,         vscbiq_ti,          0,                  B_DEP | B_VX,       0,                  BT_FN_UV16QI_UV16QI_UV16QI)
+B_DEF      (s390_vec_sube_u128,         vsbiq,              0,                  B_DEP | B_VX,       0,                  BT_FN_UV16QI_UV16QI_UV16QI_UV16QI)
+B_DEF      (s390_vec_subec_u128,        vsbcbiq,            0,                  B_DEP | B_VX,       0,                  BT_FN_UV16QI_UV16QI_UV16QI_UV16QI)
 
 B_DEF      (s390_vsq,                   subti3,             0,                  B_VX,               0,                  BT_FN_INT128_INT128_INT128)
 B_DEF      (s390_vscbiq,                vscbiq_ti,          0,                  B_VX,               0,                  BT_FN_UINT128_UINT128_UINT128)
@@ -2276,10 +2535,14 @@ OB_DEF_VAR (s390_vec_sum2_u32,          s390_vsumgf,        0,
 B_DEF      (s390_vsumgh,                vec_sum2v8hi,       0,                  B_VX,               0,                  BT_FN_UV2DI_UV8HI_UV8HI)
 B_DEF      (s390_vsumgf,                vec_sum2v4si,       0,                  B_VX,               0,                  BT_FN_UV2DI_UV4SI_UV4SI)
 
-OB_DEF     (s390_vec_sum_u128,          s390_vec_sum_u128_u32,s390_vec_sum_u128_u64,B_VX,           BT_FN_OV4SI_OV4SI_OV4SI)
+OB_DEF     (s390_vec_sum_u128,          s390_vec_sum_u128_u32,s390_vec_sum_u128_u64,B_DEP | B_VX,   BT_FN_OV4SI_OV4SI_OV4SI)
 OB_DEF_VAR (s390_vec_sum_u128_u32,      s390_vsumqf,        0,                  0,                  BT_OV_UV16QI_UV4SI_UV4SI)
 OB_DEF_VAR (s390_vec_sum_u128_u64,      s390_vsumqg,        0,                  0,                  BT_OV_UV16QI_UV2DI_UV2DI)
 
+OB_DEF     (s390_vec_sum,               s390_vec_sum_u32,s390_vec_sum_u64,      B_VX,               BT_FN_OV4SI_OV4SI_OV4SI)
+OB_DEF_VAR (s390_vec_sum_u32,           s390_vsumqf,        0,                  0,                  BT_OV_UV1TI_UV4SI_UV4SI)
+OB_DEF_VAR (s390_vec_sum_u64,           s390_vsumqg,        0,                  0,                  BT_OV_UV1TI_UV2DI_UV2DI)
+
 B_DEF      (s390_vsumqf,                vec_sum_u128v4si,   0,                  B_VX,               0,                  BT_FN_UINT128_UV4SI_UV4SI)
 B_DEF      (s390_vsumqg,                vec_sum_u128v2di,   0,                  B_VX,               0,                  BT_FN_UINT128_UV2DI_UV2DI)
 
@@ -2299,12 +2562,15 @@ OB_DEF_VAR (s390_vec_test_mask_s32,     s390_vtm,           0,
 OB_DEF_VAR (s390_vec_test_mask_u32,     s390_vtm,           0,                  0,                  BT_OV_INT_UV4SI_UV4SI)
 OB_DEF_VAR (s390_vec_test_mask_s64,     s390_vtm,           0,                  0,                  BT_OV_INT_V2DI_UV2DI)
 OB_DEF_VAR (s390_vec_test_mask_u64,     s390_vtm,           0,                  0,                  BT_OV_INT_UV2DI_UV2DI)
+OB_DEF_VAR (s390_vec_test_mask_s128,    s390_vtm,           0,                  0,                  BT_OV_INT_V1TI_UV1TI)
+OB_DEF_VAR (s390_vec_test_mask_u128,    s390_vtm,           0,                  0,                  BT_OV_INT_UV1TI_UV1TI)
 OB_DEF_VAR (s390_vec_test_mask_flt,     s390_vtm,           B_VXE,              0,                  BT_OV_INT_V4SF_UV4SI)
 OB_DEF_VAR (s390_vec_test_mask_dbl,     s390_vtm,           0,                  0,                  BT_OV_INT_V2DF_UV2DI)
 
 B_DEF      (s390_vtm,                   vec_test_mask_intv16qi,0,               B_VX,               0,                  BT_FN_INT_UV16QI_UV16QI)
 
-B_DEF      (s390_vec_msum_u128,         vec_msumv2di,       0,                  B_VXE,              O4_M12,             BT_FN_UV16QI_UV2DI_UV2DI_UV16QI_INT)
+B_DEF      (s390_vec_msum,              vec_msumv2di,       0,                  B_VXE,              O4_M12,             BT_FN_UV1TI_UV2DI_UV2DI_UV1TI_INT)
+B_DEF      (s390_vec_msum_u128,         vec_msumv2di,       0,                  B_DEP | B_VXE,      O4_M12,             BT_FN_UV16QI_UV2DI_UV2DI_UV16QI_INT)
 B_DEF      (s390_vmslg,                 vmslg,              0,                  B_VXE,              O4_M12,             BT_FN_UINT128_UV2DI_UV2DI_UINT128_INT)
 
 OB_DEF     (s390_vec_eqv,               s390_vec_eqv_b8,    s390_vec_eqv_dbl_c, B_VXE,              BT_FN_OV4SI_OV4SI_OV4SI)
@@ -2336,6 +2602,9 @@ OB_DEF_VAR (s390_vec_eqv_s64_c,         s390_vnx,           B_DEP,
 OB_DEF_VAR (s390_vec_eqv_u64_a,         s390_vnx,           B_DEP,              0,                  BT_OV_UV2DI_BV2DI_UV2DI)
 OB_DEF_VAR (s390_vec_eqv_u64_b,         s390_vnx,           0,                  0,                  BT_OV_UV2DI_UV2DI_UV2DI)
 OB_DEF_VAR (s390_vec_eqv_u64_c,         s390_vnx,           B_DEP,              0,                  BT_OV_UV2DI_UV2DI_BV2DI)
+OB_DEF_VAR (s390_vec_eqv_b128,          s390_vnx,           0,                  0,                  BT_OV_BV1TI_BV1TI_BV1TI)
+OB_DEF_VAR (s390_vec_eqv_s128,          s390_vnx,           0,                  0,                  BT_OV_V1TI_V1TI_V1TI)
+OB_DEF_VAR (s390_vec_eqv_u128,          s390_vnx,           0,                  0,                  BT_OV_UV1TI_UV1TI_UV1TI)
 OB_DEF_VAR (s390_vec_eqv_flt_a,         s390_vnx,           B_VXE | B_DEP,      0,                  BT_OV_V4SF_BV4SI_V4SF)
 OB_DEF_VAR (s390_vec_eqv_flt_b,         s390_vnx,           B_VXE,              0,                  BT_OV_V4SF_V4SF_V4SF)
 OB_DEF_VAR (s390_vec_eqv_flt_c,         s390_vnx,           B_VXE | B_DEP,      0,                  BT_OV_V4SF_V4SF_BV4SI)
@@ -2374,6 +2643,9 @@ OB_DEF_VAR (s390_vec_nand_s64_c,        s390_vnn,           B_DEP,
 OB_DEF_VAR (s390_vec_nand_u64_a,        s390_vnn,           B_DEP,              0,                  BT_OV_UV2DI_BV2DI_UV2DI)
 OB_DEF_VAR (s390_vec_nand_u64_b,        s390_vnn,           0,                  0,                  BT_OV_UV2DI_UV2DI_UV2DI)
 OB_DEF_VAR (s390_vec_nand_u64_c,        s390_vnn,           B_DEP,              0,                  BT_OV_UV2DI_UV2DI_BV2DI)
+OB_DEF_VAR (s390_vec_nand_b128,         s390_vnn,           0,                  0,                  BT_OV_BV1TI_BV1TI_BV1TI)
+OB_DEF_VAR (s390_vec_nand_s128,         s390_vnn,           0,                  0,                  BT_OV_V1TI_V1TI_V1TI)
+OB_DEF_VAR (s390_vec_nand_u128,         s390_vnn,           0,                  0,                  BT_OV_UV1TI_UV1TI_UV1TI)
 OB_DEF_VAR (s390_vec_nand_flt_a,        s390_vnn,           B_DEP,              0,                  BT_OV_V4SF_BV4SI_V4SF)
 OB_DEF_VAR (s390_vec_nand_flt_b,        s390_vnn,           0,                  0,                  BT_OV_V4SF_V4SF_V4SF)
 OB_DEF_VAR (s390_vec_nand_flt_c,        s390_vnn,           B_DEP,              0,                  BT_OV_V4SF_V4SF_BV4SI)
@@ -2412,6 +2684,9 @@ OB_DEF_VAR (s390_vec_orc_s64_c,         s390_voc,           B_DEP,
 OB_DEF_VAR (s390_vec_orc_u64_a,         s390_voc,           B_DEP,              0,                  BT_OV_UV2DI_BV2DI_UV2DI)
 OB_DEF_VAR (s390_vec_orc_u64_b,         s390_voc,           0,                  0,                  BT_OV_UV2DI_UV2DI_UV2DI)
 OB_DEF_VAR (s390_vec_orc_u64_c,         s390_voc,           B_DEP,              0,                  BT_OV_UV2DI_UV2DI_BV2DI)
+OB_DEF_VAR (s390_vec_orc_b128,          s390_voc,           0,                  0,                  BT_OV_BV1TI_BV1TI_BV1TI)
+OB_DEF_VAR (s390_vec_orc_s128,          s390_voc,           0,                  0,                  BT_OV_V1TI_V1TI_V1TI)
+OB_DEF_VAR (s390_vec_orc_u128,          s390_voc,           0,                  0,                  BT_OV_UV1TI_UV1TI_UV1TI)
 OB_DEF_VAR (s390_vec_orc_flt_a,         s390_voc,           B_DEP,              0,                  BT_OV_V4SF_BV4SI_V4SF)
 OB_DEF_VAR (s390_vec_orc_flt_b,         s390_voc,           0,                  0,                  BT_OV_V4SF_V4SF_V4SF)
 OB_DEF_VAR (s390_vec_orc_flt_c,         s390_voc,           B_DEP,              0,                  BT_OV_V4SF_V4SF_BV4SI)
@@ -2919,6 +3194,8 @@ OB_DEF_VAR (s390_vec_revb_s32,          s390_vlbrf,         0,
 OB_DEF_VAR (s390_vec_revb_u32,          s390_vlbrf,         0,                  0,                  BT_OV_UV4SI_UV4SI)
 OB_DEF_VAR (s390_vec_revb_s64,          s390_vlbrg,         0,                  0,                  BT_OV_V2DI_V2DI)
 OB_DEF_VAR (s390_vec_revb_u64,          s390_vlbrg,         0,                  0,                  BT_OV_UV2DI_UV2DI)
+OB_DEF_VAR (s390_vec_revb_s128,         s390_vlbrq,         0,                  0,                  BT_OV_V1TI_V1TI)
+OB_DEF_VAR (s390_vec_revb_u128,         s390_vlbrq,         0,                  0,                  BT_OV_UV1TI_UV1TI)
 OB_DEF_VAR (s390_vec_revb_flt,          s390_vlbrf_flt,     B_VXE,              0,                  BT_OV_V4SF_V4SF)
 OB_DEF_VAR (s390_vec_revb_dbl,          s390_vlbrg_dbl,     0,                  0,                  BT_OV_V2DF_V2DF)
 
@@ -2976,6 +3253,8 @@ OB_DEF_VAR (s390_vec_sldb_s32,          s390_vsld,          0,
 OB_DEF_VAR (s390_vec_sldb_u32,          s390_vsld,          0,                  O3_U3,              BT_OV_UV4SI_UV4SI_UV4SI_UINT)
 OB_DEF_VAR (s390_vec_sldb_s64,          s390_vsld,          0,                  O3_U3,              BT_OV_V2DI_V2DI_V2DI_UINT)
 OB_DEF_VAR (s390_vec_sldb_u64,          s390_vsld,          0,                  O3_U3,              BT_OV_UV2DI_UV2DI_UV2DI_UINT)
+OB_DEF_VAR (s390_vec_sldb_s128,         s390_vsld,          0,                  O3_U3,              BT_OV_V1TI_V1TI_V1TI_UINT)
+OB_DEF_VAR (s390_vec_sldb_u128,         s390_vsld,          0,                  O3_U3,              BT_OV_UV1TI_UV1TI_UV1TI_UINT)
 OB_DEF_VAR (s390_vec_sldb_flt,          s390_vsld,          0,                  O3_U3,              BT_OV_V4SF_V4SF_V4SF_UINT)
 OB_DEF_VAR (s390_vec_sldb_dbl,          s390_vsld,          0,                  O3_U3,              BT_OV_V2DF_V2DF_V2DF_UINT)
 
@@ -2990,6 +3269,8 @@ OB_DEF_VAR (s390_vec_srdb_s32,          s390_vsrd,          0,
 OB_DEF_VAR (s390_vec_srdb_u32,          s390_vsrd,          0,                  O3_U3,              BT_OV_UV4SI_UV4SI_UV4SI_UINT)
 OB_DEF_VAR (s390_vec_srdb_s64,          s390_vsrd,          0,                  O3_U3,              BT_OV_V2DI_V2DI_V2DI_UINT)
 OB_DEF_VAR (s390_vec_srdb_u64,          s390_vsrd,          0,                  O3_U3,              BT_OV_UV2DI_UV2DI_UV2DI_UINT)
+OB_DEF_VAR (s390_vec_srdb_s128,         s390_vsrd,          0,                  O3_U3,              BT_OV_V1TI_V1TI_V1TI_UINT)
+OB_DEF_VAR (s390_vec_srdb_u128,         s390_vsrd,          0,                  O3_U3,              BT_OV_UV1TI_UV1TI_UV1TI_UINT)
 OB_DEF_VAR (s390_vec_srdb_flt,          s390_vsrd,          0,                  O3_U3,              BT_OV_V4SF_V4SF_V4SF_UINT)
 OB_DEF_VAR (s390_vec_srdb_dbl,          s390_vsrd,          0,                  O3_U3,              BT_OV_V2DF_V2DF_V2DF_UINT)
 
@@ -3034,3 +3315,60 @@ B_DEF      (s390_vcrnfs,                 vcrnfs_v8hi,       0,
 
 B_DEF      (s390_vcfn,                   vcfn_v8hi,         0,                  B_NNPA,             O2_U4,              BT_FN_UV8HI_UV8HI_UINT)
 B_DEF      (s390_vcnf,                   vcnf_v8hi,         0,                  B_NNPA,             O2_U4,              BT_FN_UV8HI_UV8HI_UINT)
+
+/* arch 15 builtins */
+
+B_DEF      (s390_bdepg,                 bdepg,              0,                  B_Z17,              0,                  BT_FN_ULONG_ULONG_ULONG)
+B_DEF      (s390_bextg,                 bextg,              0,                  B_Z17,              0,                  BT_FN_ULONG_ULONG_ULONG)
+
+OB_DEF     (s390_vec_blend,             s390_vec_blend_s8,  s390_vec_blend_dbl, B_VXE3,             BT_FN_OV4SI_OV4SI_OV4SI_OV4SI)
+OB_DEF_VAR (s390_vec_blend_s8,          s390_vblendb,       0,                  0,                  BT_OV_V16QI_V16QI_V16QI_V16QI)
+OB_DEF_VAR (s390_vec_blend_b8,          s390_vblendb,       0,                  0,                  BT_OV_BV16QI_BV16QI_BV16QI_V16QI)
+OB_DEF_VAR (s390_vec_blend_u8,          s390_vblendb,       0,                  0,                  BT_OV_UV16QI_UV16QI_UV16QI_V16QI)
+OB_DEF_VAR (s390_vec_blend_s16,         s390_vblendh,       0,                  0,                  BT_OV_V8HI_V8HI_V8HI_V8HI)
+OB_DEF_VAR (s390_vec_blend_b16,         s390_vblendh,       0,                  0,                  BT_OV_BV8HI_BV8HI_BV8HI_V8HI)
+OB_DEF_VAR (s390_vec_blend_u16,         s390_vblendh,       0,                  0,                  BT_OV_UV8HI_UV8HI_UV8HI_V8HI)
+OB_DEF_VAR (s390_vec_blend_s32,         s390_vblendf,       0,                  0,                  BT_OV_V4SI_V4SI_V4SI_V4SI)
+OB_DEF_VAR (s390_vec_blend_b32,         s390_vblendf,       0,                  0,                  BT_OV_BV4SI_BV4SI_BV4SI_V4SI)
+OB_DEF_VAR (s390_vec_blend_u32,         s390_vblendf,       0,                  0,                  BT_OV_UV4SI_UV4SI_UV4SI_V4SI)
+OB_DEF_VAR (s390_vec_blend_s64,         s390_vblendg,       0,                  0,                  BT_OV_V2DI_V2DI_V2DI_V2DI)
+OB_DEF_VAR (s390_vec_blend_b64,         s390_vblendg,       0,                  0,                  BT_OV_BV2DI_BV2DI_BV2DI_V2DI)
+OB_DEF_VAR (s390_vec_blend_u64,         s390_vblendg,       0,                  0,                  BT_OV_UV2DI_UV2DI_UV2DI_V2DI)
+OB_DEF_VAR (s390_vec_blend_s128,        s390_vblendq,       0,                  0,                  BT_OV_V1TI_V1TI_V1TI_V1TI)
+OB_DEF_VAR (s390_vec_blend_b128,        s390_vblendq,       0,                  0,                  BT_OV_BV1TI_BV1TI_BV1TI_V1TI)
+OB_DEF_VAR (s390_vec_blend_u128,        s390_vblendq,       0,                  0,                  BT_OV_UV1TI_UV1TI_UV1TI_V1TI)
+OB_DEF_VAR (s390_vec_blend_flt,         s390_vblendf,       0,                  0,                  BT_OV_V4SF_V4SF_V4SF_V4SI)
+OB_DEF_VAR (s390_vec_blend_dbl,         s390_vblendg,       0,                  0,                  BT_OV_V2DF_V2DF_V2DF_V2DI)
+
+B_DEF      (s390_vblendb,               vblendv16qi,        0,                  B_VXE3,             0,                  BT_FN_UV16QI_UV16QI_UV16QI_V16QI)
+B_DEF      (s390_vblendh,               vblendv8hi,         0,                  B_VXE3,             0,                  BT_FN_UV8HI_UV8HI_UV8HI_V8HI)
+B_DEF      (s390_vblendf,               vblendv4si,         0,                  B_VXE3,             0,                  BT_FN_UV4SI_UV4SI_UV4SI_V4SI)
+B_DEF      (s390_vblendg,               vblendv2di,         0,                  B_VXE3,             0,                  BT_FN_UV2DI_UV2DI_UV2DI_V2DI)
+B_DEF      (s390_vblendq,               vblendti,           0,                  B_VXE3,             0,                  BT_FN_UINT128_UINT128_UINT128_INT128)
+
+OB_DEF     (s390_vec_evaluate,          s390_vec_evaluate_s8,s390_vec_evaluate_u128,B_VXE3,         BT_FN_OV4SI_OV4SI_OV4SI_OV4SI_INT)
+OB_DEF_VAR (s390_vec_evaluate_s8,       s390_veval,         0,                  O4_U8,              BT_OV_V16QI_V16QI_V16QI_V16QI_INT)        /* veval */
+OB_DEF_VAR (s390_vec_evaluate_b8,       s390_veval,         0,                  O4_U8,              BT_OV_BV16QI_BV16QI_BV16QI_BV16QI_INT)    /* veval */
+OB_DEF_VAR (s390_vec_evaluate_u8,       s390_veval,         0,                  O4_U8,              BT_OV_UV16QI_UV16QI_UV16QI_UV16QI_INT)    /* veval */
+OB_DEF_VAR (s390_vec_evaluate_s16,      s390_veval,         0,                  O4_U8,              BT_OV_V8HI_V8HI_V8HI_V8HI_INT)            /* veval */
+OB_DEF_VAR (s390_vec_evaluate_b16,      s390_veval,         0,                  O4_U8,              BT_OV_BV8HI_BV8HI_BV8HI_BV8HI_INT)        /* veval */
+OB_DEF_VAR (s390_vec_evaluate_u16,      s390_veval,         0,                  O4_U8,              BT_OV_UV8HI_UV8HI_UV8HI_UV8HI_INT)        /* veval */
+OB_DEF_VAR (s390_vec_evaluate_s32,      s390_veval,         0,                  O4_U8,              BT_OV_V4SI_V4SI_V4SI_V4SI_INT)            /* veval */
+OB_DEF_VAR (s390_vec_evaluate_b32,      s390_veval,         0,                  O4_U8,              BT_OV_BV4SI_BV4SI_BV4SI_BV4SI_INT)        /* veval */
+OB_DEF_VAR (s390_vec_evaluate_u32,      s390_veval,         0,                  O4_U8,              BT_OV_UV4SI_UV4SI_UV4SI_UV4SI_INT)        /* veval */
+OB_DEF_VAR (s390_vec_evaluate_s64,      s390_veval,         0,                  O4_U8,              BT_OV_V2DI_V2DI_V2DI_V2DI_INT)            /* veval */
+OB_DEF_VAR (s390_vec_evaluate_b64,      s390_veval,         0,                  O4_U8,              BT_OV_BV2DI_BV2DI_BV2DI_BV2DI_INT)        /* veval */
+OB_DEF_VAR (s390_vec_evaluate_u64,      s390_veval,         0,                  O4_U8,              BT_OV_UV2DI_UV2DI_UV2DI_UV2DI_INT)        /* veval */
+OB_DEF_VAR (s390_vec_evaluate_s128,     s390_veval,         0,                  O4_U8,              BT_OV_V1TI_V1TI_V1TI_V1TI_INT)            /* veval */
+OB_DEF_VAR (s390_vec_evaluate_b128,     s390_veval,         0,                  O4_U8,              BT_OV_BV1TI_BV1TI_BV1TI_BV1TI_INT)        /* veval */
+OB_DEF_VAR (s390_vec_evaluate_u128,     s390_veval,         0,                  O4_U8,              BT_OV_UV1TI_UV1TI_UV1TI_UV1TI_INT)        /* veval */
+
+B_DEF      (s390_veval,                 vevalv16qi,         0,                  B_VXE3,             O4_U8,              BT_FN_UV16QI_UV16QI_UV16QI_UV16QI_INT)
+
+B_DEF      (s390_vec_gen_element_masks_128,vgemti,          0,                  B_VXE3,             0,                  BT_FN_UV1TI_UV16QI)
+
+B_DEF      (s390_vgemb,                 vgemv16qi,          0,                  B_VXE3,             0,                  BT_FN_UV16QI_UV8HI)
+B_DEF      (s390_vgemh,                 vgemv8hi,           0,                  B_VXE3,             0,                  BT_FN_UV8HI_UV16QI)
+B_DEF      (s390_vgemf,                 vgemv4si,           0,                  B_VXE3,             0,                  BT_FN_UV4SI_UV16QI)
+B_DEF      (s390_vgemg,                 vgemv2di,           0,                  B_VXE3,             0,                  BT_FN_UV2DI_UV16QI)
+B_DEF      (s390_vgemq,                 vgemti,             0,                  B_VXE3,             0,                  BT_FN_UINT128_UV16QI)
diff --git a/gcc/config/s390/s390-builtins.h b/gcc/config/s390/s390-builtins.h
index 8b70aaf..e19fc7e 100644
--- a/gcc/config/s390/s390-builtins.h
+++ b/gcc/config/s390/s390-builtins.h
@@ -1,5 +1,5 @@
 /* Common data structures used for builtin handling on S/390.
-   Copyright (C) 2015-2024 Free Software Foundation, Inc.
+   Copyright (C) 2015-2025 Free Software Foundation, Inc.
 
    Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
 
diff --git a/gcc/config/s390/s390-c.cc b/gcc/config/s390/s390-c.cc
index 4521a86..a01c44c 100644
--- a/gcc/config/s390/s390-c.cc
+++ b/gcc/config/s390/s390-c.cc
@@ -1,6 +1,6 @@
 /* Language specific subroutines used for code generation on IBM S/390
    and zSeries
-   Copyright (C) 2015-2024 Free Software Foundation, Inc.
+   Copyright (C) 2015-2025 Free Software Foundation, Inc.
 
    Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
 
@@ -174,6 +174,22 @@ s390_categorize_keyword (const cpp_token *tok)
 }
 
 
+/* Helper function to find out which RID_INT_N_* code is the one for
+   __int128, if any.  Returns RID_MAX+1 if none apply, which is safe
+   (for our purposes, since we always expect to have __int128) to
+   compare against.  */
+static inline int
+rid_int128 (void)
+{
+  for (int i = 0; i < NUM_INT_N_ENTS; ++i)
+    if (int_n_enabled_p[i]
+	&& int_n_data[i].bitsize == 128)
+      return RID_INT_N_0 + i;
+
+  return RID_MAX + 1;
+}
+
+
 /* Called to decide whether a conditional macro should be expanded.
    Since we have exactly one such macro (i.e, 'vector'), we do not
    need to examine the 'tok' parameter.  */
@@ -262,7 +278,8 @@ s390_macro_to_expand (cpp_reader *pfile, const cpp_token *tok)
       || rid_code == RID_SHORT || rid_code == RID_SIGNED
       || rid_code == RID_INT || rid_code == RID_CHAR
       || (rid_code == RID_FLOAT && TARGET_VXE)
-      || rid_code == RID_DOUBLE)
+      || rid_code == RID_DOUBLE
+      || rid_code == rid_int128 ())
     {
       expand_this = C_CPP_HASHNODE (__vector_keyword);
       /* If the next keyword is bool, it will need to be expanded as
@@ -341,7 +358,7 @@ s390_cpu_cpp_builtins_internal (cpp_reader *pfile,
   s390_def_or_undef_macro (pfile, target_flag_set_p (MASK_OPT_VX), old_opts,
 			   opts, "__VX__", "__VX__");
   s390_def_or_undef_macro (pfile, target_flag_set_p (MASK_ZVECTOR), old_opts,
-			   opts, "__VEC__=10304", "__VEC__");
+			   opts, "__VEC__=10305", "__VEC__");
   s390_def_or_undef_macro (pfile, target_flag_set_p (MASK_ZVECTOR), old_opts,
 			   opts, "__vector=__attribute__((vector_size(16)))",
 			   "__vector__");
@@ -689,6 +706,7 @@ s390_adjust_builtin_arglist (unsigned int ob_fcode, tree decl,
 	case S390_OVERLOADED_BUILTIN_s390_vec_sel:
 	case S390_OVERLOADED_BUILTIN_s390_vec_insert:
 	case S390_OVERLOADED_BUILTIN_s390_vec_load_len:
+	case S390_OVERLOADED_BUILTIN_s390_vec_load_len_r:
 	  /* Swap the first to arguments. It is better to do it here
 	     instead of the header file to avoid operand checking
 	     throwing error messages for a weird operand index.  */
@@ -701,6 +719,7 @@ s390_adjust_builtin_arglist (unsigned int ob_fcode, tree decl,
 	    }
 	  break;
 	case S390_OVERLOADED_BUILTIN_s390_vec_store_len:
+	case S390_OVERLOADED_BUILTIN_s390_vec_store_len_r:
 	  if (dest_arg_index == 1 || dest_arg_index == 2)
 	    {
 	      folded_args->quick_push (fully_fold_convert (TREE_VALUE (arg_chain),
@@ -884,9 +903,8 @@ s390_vec_n_elem (tree fndecl)
 /* Return a tree expression for a call to the overloaded builtin
    function OB_FNDECL at LOC with arguments PASSED_ARGLIST.  */
 tree
-s390_resolve_overloaded_builtin (location_t loc,
-				 tree ob_fndecl,
-				 void *passed_arglist)
+s390_resolve_overloaded_builtin (location_t loc, tree ob_fndecl,
+				 void *passed_arglist, bool)
 {
   vec<tree, va_gc> *arglist = static_cast<vec<tree, va_gc> *> (passed_arglist);
   unsigned int in_args_num = vec_safe_length (arglist);
@@ -942,6 +960,12 @@ s390_resolve_overloaded_builtin (location_t loc,
       return error_mark_node;
     }
 
+  if (!TARGET_VXE3 && (ob_flags & B_VXE3))
+    {
+      error_at (loc, "%qF requires z17 or higher", ob_fndecl);
+      return error_mark_node;
+    }
+
   ob_fcode -= S390_BUILTIN_MAX;
 
   for (b_arg_chain = TYPE_ARG_TYPES (TREE_TYPE (ob_fndecl));
@@ -1029,6 +1053,14 @@ s390_resolve_overloaded_builtin (location_t loc,
       return error_mark_node;
     }
 
+  if (!TARGET_VXE3
+      && bflags_overloaded_builtin_var[last_match_index] & B_VXE3)
+    {
+      error_at (loc, "%qs matching variant requires z17 or higher",
+		IDENTIFIER_POINTER (DECL_NAME (ob_fndecl)));
+      return error_mark_node;
+    }
+
   if (bflags_overloaded_builtin_var[last_match_index] & B_DEP)
     warning_at (loc, 0, "%qs matching variant is deprecated",
 		IDENTIFIER_POINTER (DECL_NAME (ob_fndecl)));
diff --git a/gcc/config/s390/s390-d.cc b/gcc/config/s390/s390-d.cc
index cdf03ba..ddd0473 100644
--- a/gcc/config/s390/s390-d.cc
+++ b/gcc/config/s390/s390-d.cc
@@ -1,5 +1,5 @@
 /* Subroutines for the D front end on the IBM S/390 and zSeries architectures.
-   Copyright (C) 2017-2024 Free Software Foundation, Inc.
+   Copyright (C) 2017-2025 Free Software Foundation, Inc.
 
 GCC is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
diff --git a/gcc/config/s390/s390-d.h b/gcc/config/s390/s390-d.h
index d3bf22c..b5c12b7 100644
--- a/gcc/config/s390/s390-d.h
+++ b/gcc/config/s390/s390-d.h
@@ -1,5 +1,5 @@
 /* Definitions for the D front end on the IBM S/390 and zSeries architectures.
-   Copyright (C) 2022-2024 Free Software Foundation, Inc.
+   Copyright (C) 2022-2025 Free Software Foundation, Inc.
 
 GCC is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
diff --git a/gcc/config/s390/s390-modes.def b/gcc/config/s390/s390-modes.def
index a8b2fa0..e135d66 100644
--- a/gcc/config/s390/s390-modes.def
+++ b/gcc/config/s390/s390-modes.def
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler, for IBM S/390
-   Copyright (C) 2002-2024 Free Software Foundation, Inc.
+   Copyright (C) 2002-2025 Free Software Foundation, Inc.
    Contributed by Hartmut Penner (hpenner@de.ibm.com) and
                   Ulrich Weigand (uweigand@de.ibm.com).
 
diff --git a/gcc/config/s390/s390-opts.h b/gcc/config/s390/s390-opts.h
index 6c47c64..9cacb2c 100644
--- a/gcc/config/s390/s390-opts.h
+++ b/gcc/config/s390/s390-opts.h
@@ -1,5 +1,5 @@
 /* Definitions for option handling for IBM S/390.
-   Copyright (C) 1999-2024 Free Software Foundation, Inc.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -39,6 +39,7 @@ enum processor_type
   PROCESSOR_3906_Z14,
   PROCESSOR_8561_Z15,
   PROCESSOR_3931_Z16,
+  PROCESSOR_9175_Z17,
   PROCESSOR_NATIVE,
   PROCESSOR_max
 };
diff --git a/gcc/config/s390/s390-passes.def b/gcc/config/s390/s390-passes.def
index 0b3ebdd..8c17f30 100644
--- a/gcc/config/s390/s390-passes.def
+++ b/gcc/config/s390/s390-passes.def
@@ -1,5 +1,5 @@
 /* Description of target passes for S/390.
-   Copyright (C) 2018-2024 Free Software Foundation, Inc.
+   Copyright (C) 2018-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
index b4646cc..d760a7e 100644
--- a/gcc/config/s390/s390-protos.h
+++ b/gcc/config/s390/s390-protos.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler, for IBM S/390.
-   Copyright (C) 2000-2024 Free Software Foundation, Inc.
+   Copyright (C) 2000-2025 Free Software Foundation, Inc.
 
    Contributed by Hartmut Penner (hpenner@de.ibm.com)
 
@@ -50,7 +50,6 @@ extern void s390_set_has_landing_pad_p (bool);
 extern bool s390_hard_regno_rename_ok (unsigned int, unsigned int);
 extern int s390_class_max_nregs (enum reg_class, machine_mode);
 extern bool s390_return_addr_from_memory(void);
-extern rtx s390_gen_lowpart_subreg (machine_mode, rtx);
 extern bool s390_fma_allowed_p (machine_mode);
 #if S390_USE_TARGET_ATTRIBUTE
 extern tree s390_valid_target_attribute_tree (tree args,
@@ -87,7 +86,7 @@ extern int tls_symbolic_operand (rtx);
 extern bool s390_match_ccmode (rtx_insn *, machine_mode);
 extern machine_mode s390_tm_ccmode (rtx, rtx, bool);
 extern machine_mode s390_select_ccmode (enum rtx_code, rtx, rtx);
-extern rtx s390_emit_compare (enum rtx_code, rtx, rtx);
+extern rtx s390_emit_compare (machine_mode, enum rtx_code, rtx, rtx);
 extern rtx_insn *s390_emit_jump (rtx, rtx);
 extern bool symbolic_reference_mentioned_p (rtx);
 extern bool tls_symbolic_reference_mentioned_p (rtx);
@@ -115,6 +114,7 @@ extern bool s390_expand_cmpmem (rtx, rtx, rtx, rtx);
 extern void s390_expand_vec_strlen (rtx, rtx, rtx);
 extern void s390_expand_vec_movstr (rtx, rtx, rtx);
 extern bool s390_expand_addcc (enum rtx_code, rtx, rtx, rtx, rtx, rtx);
+extern void s390_expand_cstoreti4 (rtx, rtx, rtx, rtx);
 extern bool s390_expand_insv (rtx, rtx, rtx, rtx);
 extern void s390_expand_cs (machine_mode, rtx, rtx, rtx, rtx, rtx, bool);
 extern void s390_expand_atomic_exchange_tdsi (rtx, rtx, rtx);
diff --git a/gcc/config/s390/s390.cc b/gcc/config/s390/s390.cc
index 47e1d5a..de9c15c 100644
--- a/gcc/config/s390/s390.cc
+++ b/gcc/config/s390/s390.cc
@@ -1,5 +1,5 @@
 /* Subroutines used for code generation on IBM S/390 and zSeries
-   Copyright (C) 1999-2024 Free Software Foundation, Inc.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
    Contributed by Hartmut Penner (hpenner@de.ibm.com) and
                   Ulrich Weigand (uweigand@de.ibm.com) and
                   Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
@@ -342,6 +342,7 @@ const struct s390_processor processor_table[] =
   { "z14",    "arch12", PROCESSOR_3906_Z14,    &zEC12_cost,  12 },
   { "z15",    "arch13", PROCESSOR_8561_Z15,    &zEC12_cost,  13 },
   { "z16",    "arch14", PROCESSOR_3931_Z16,    &zEC12_cost,  14 },
+  { "z17",    "arch15", PROCESSOR_9175_Z17,    &zEC12_cost,  15 },
   { "native", "",       PROCESSOR_NATIVE,      NULL,         0  }
 };
 
@@ -516,31 +517,6 @@ s390_return_addr_from_memory ()
   return cfun_gpr_save_slot(RETURN_REGNUM) == SAVE_SLOT_STACK;
 }
 
-/* Generate a SUBREG for the MODE lowpart of EXPR.
-
-   In contrast to gen_lowpart it will always return a SUBREG
-   expression.  This is useful to generate STRICT_LOW_PART
-   expressions.  */
-rtx
-s390_gen_lowpart_subreg (machine_mode mode, rtx expr)
-{
-  rtx lowpart = gen_lowpart (mode, expr);
-
-  /* There might be no SUBREG in case it could be applied to the hard
-     REG rtx or it could be folded with a paradoxical subreg.  Bring
-     it back.  */
-  if (!SUBREG_P (lowpart))
-    {
-      machine_mode reg_mode = TARGET_ZARCH ? DImode : SImode;
-      gcc_assert (REG_P (lowpart));
-      lowpart = gen_lowpart_SUBREG (mode,
-				    gen_rtx_REG (reg_mode,
-						 REGNO (lowpart)));
-    }
-
-  return lowpart;
-}
-
 /* Return nonzero if it's OK to use fused multiply-add for MODE.  */
 bool
 s390_fma_allowed_p (machine_mode mode)
@@ -937,6 +913,12 @@ s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
 	  error ("Builtin %qF requires z15 or higher", fndecl);
 	  return const0_rtx;
 	}
+
+      if ((bflags & B_VXE3) && !TARGET_VXE3)
+	{
+	  error ("Builtin %qF requires z17 or higher", fndecl);
+	  return const0_rtx;
+	}
     }
   if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET
       && fcode < S390_ALL_BUILTIN_MAX)
@@ -1223,6 +1205,9 @@ s390_handle_vectorbool_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
   mode = TYPE_MODE (type);
   switch (mode)
     {
+    case E_TImode: case E_V1TImode:
+      result = s390_builtin_types[BT_BV1TI];
+      break;
     case E_DImode: case E_V2DImode:
       result = s390_builtin_types[BT_BV2DI];
       break;
@@ -1884,7 +1869,7 @@ s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
       && CONST_INT_P (XEXP (*op0, 1))
       && CONST_INT_P (*op1)
       && INTVAL (XEXP (*op0, 1)) == -3
-      && *code == EQ)
+      && (*code == EQ || *code == NE))
     {
       if (INTVAL (*op1) == 0)
 	{
@@ -2054,9 +2039,9 @@ s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
    the IF_THEN_ELSE of the conditional branch testing the result.  */
 
 rtx
-s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
+s390_emit_compare (machine_mode mode, enum rtx_code code, rtx op0, rtx op1)
 {
-  machine_mode mode = s390_select_ccmode (code, op0, op1);
+  machine_mode cc_mode = s390_select_ccmode (code, op0, op1);
   rtx cc;
 
   /* Force OP1 into register in order to satisfy VXE TFmode patterns.  */
@@ -2068,17 +2053,17 @@ s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
       /* Do not output a redundant compare instruction if a
 	 compare_and_swap pattern already computed the result and the
 	 machine modes are compatible.  */
-      gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode)
+      gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), cc_mode)
 		  == GET_MODE (op0));
       cc = op0;
     }
   else
     {
-      cc = gen_rtx_REG (mode, CC_REGNUM);
-      emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, op0, op1)));
+      cc = gen_rtx_REG (cc_mode, CC_REGNUM);
+      emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (cc_mode, op0, op1)));
     }
 
-  return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx);
+  return gen_rtx_fmt_ee (code, mode, cc, const0_rtx);
 }
 
 /* If MEM is not a legitimate compare-and-swap memory operand, return a new
@@ -2128,7 +2113,7 @@ s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem,
     default:
       gcc_unreachable ();
     }
-  return s390_emit_compare (code, cc, const0_rtx);
+  return s390_emit_compare (VOIDmode, code, cc, const0_rtx);
 }
 
 /* Emit a jump instruction to TARGET and return it.  If COND is
@@ -2843,7 +2828,7 @@ s390_constant_via_vgbm_p (rtx op, unsigned *mask)
   unsigned tmp_mask = 0;
   int nunit, unit_size;
 
-  if (GET_CODE (op) == CONST_VECTOR)
+  if (GET_CODE (op) == CONST_VECTOR && GET_MODE_SIZE (GET_MODE (op)) <= 16)
     {
       if (GET_MODE_INNER (GET_MODE (op)) == TImode
 	  || GET_MODE_INNER (GET_MODE (op)) == TFmode)
@@ -3525,26 +3510,31 @@ s390_valid_shift_count (rtx op, HOST_WIDE_INT implicit_mask)
 
   /* Check for an and with proper constant.  */
   if (GET_CODE (op) == AND)
-  {
-    rtx op1 = XEXP (op, 0);
-    rtx imm = XEXP (op, 1);
+    {
+      rtx op1 = XEXP (op, 0);
+      rtx imm = XEXP (op, 1);
 
-    if (GET_CODE (op1) == SUBREG && subreg_lowpart_p (op1))
-      op1 = XEXP (op1, 0);
+      if (GET_CODE (op1) == SUBREG && subreg_lowpart_p (op1))
+	op1 = XEXP (op1, 0);
 
-    if (!(register_operand (op1, GET_MODE (op1)) || GET_CODE (op1) == PLUS))
-      return false;
+      if (!(register_operand (op1, GET_MODE (op1)) || GET_CODE (op1) == PLUS))
+	return false;
 
-    if (!immediate_operand (imm, GET_MODE (imm)))
-      return false;
+      /* Accept only CONST_INT as immediates, i.e., reject shift count operands
+	 which do not trivially fit the scheme of address operands.  Especially
+	 since strip_address_mutations() expects expressions of the form
+	 (and ... (const_int ...)) and fails for
+	 (and ... (const_wide_int ...)).  */
+      if (!const_int_operand (imm, GET_MODE (imm)))
+	return false;
 
-    HOST_WIDE_INT val = INTVAL (imm);
-    if (implicit_mask > 0
-	&& (val & implicit_mask) != implicit_mask)
-      return false;
+      HOST_WIDE_INT val = INTVAL (imm);
+      if (implicit_mask > 0
+	  && (val & implicit_mask) != implicit_mask)
+	return false;
 
-    op = op1;
-  }
+      op = op1;
+    }
 
   /* Check the rest.  */
   return s390_decompose_addrstyle_without_index (op, NULL, NULL);
@@ -3714,6 +3704,18 @@ s390_mem_constraint (const char *str, rtx op)
       if ((reload_completed || reload_in_progress)
 	  ? !offsettable_memref_p (op) : !offsettable_nonstrict_memref_p (op))
 	return 0;
+      /* offsettable_memref_p ensures only that any positive offset added to
+	 the address forms a valid general address.  For AQ and AR constraints
+	 we also have to verify that the resulting displacement after adding
+	 any positive offset less than the size of the object being referenced
+	 is still valid.  */
+      if (str[1] == 'Q' || str[1] == 'R')
+	{
+	  int o = GET_MODE_SIZE (GET_MODE (op)) - 1;
+	  rtx tmp = adjust_address (op, QImode, o);
+	  if (!s390_check_qrst_address (str[1], XEXP (tmp, 0), true))
+	    return 0;
+	}
       return s390_check_qrst_address (str[1], XEXP (op, 0), true);
     case 'B':
       /* Check for non-literal-pool variants of memory constraints.  */
@@ -3898,6 +3900,26 @@ s390_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
   return 2;
 }
 
+/* Implement TARGET_INSN_COST.  */
+
+static int
+s390_insn_cost (rtx_insn *insn, bool speed)
+{
+  /* For stores also consider the destination.  Penalize if the address
+     contains a SYMBOL_REF since this has to be fixed up by reload.  */
+  rtx pat = single_set (insn);
+  if (pat && MEM_P (SET_DEST (pat)))
+    {
+      rtx mem = SET_DEST (pat);
+      rtx addr = XEXP (mem, 0);
+      int penalty = contains_symbol_ref_p (addr) ? COSTS_N_INSNS (1) : 0;
+      int src_cost = set_src_cost (SET_SRC (pat), GET_MODE (mem), speed);
+      src_cost = src_cost > 0 ? src_cost : COSTS_N_INSNS (1);
+      return penalty + src_cost;
+    }
+  return pattern_cost (PATTERN (insn), speed);
+}
+
 /* Compute a (partial) cost for rtx X.  Return true if the complete
    cost has been computed, and false if subexpressions should be
    scanned.  In either case, *TOTAL contains the cost result.  The
@@ -5567,8 +5589,7 @@ legitimize_tls_address (rtx addr, rtx reg)
 	new_rtx = force_const_mem (Pmode, new_rtx);
 	emit_move_insn (r2, new_rtx);
 	s390_emit_tls_call_insn (r2, tls_call);
-	insn = get_insns ();
-	end_sequence ();
+	insn = end_sequence ();
 
 	new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
 	temp = gen_reg_rtx (Pmode);
@@ -5590,8 +5611,7 @@ legitimize_tls_address (rtx addr, rtx reg)
 	new_rtx = force_const_mem (Pmode, new_rtx);
 	emit_move_insn (r2, new_rtx);
 	s390_emit_tls_call_insn (r2, tls_call);
-	insn = get_insns ();
-	end_sequence ();
+	insn = end_sequence ();
 
 	new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF);
 	temp = gen_reg_rtx (Pmode);
@@ -6660,7 +6680,7 @@ s390_expand_vec_strlen (rtx target, rtx string, rtx alignment)
      Now we have to check whether the resulting index lies within the
      bytes actually part of the string.  */
 
-  cond = s390_emit_compare (GT, convert_to_mode (Pmode, len, 1),
+  cond = s390_emit_compare (VOIDmode, GT, convert_to_mode (Pmode, len, 1),
 			    highest_index_to_load_reg);
   s390_load_address (highest_index_to_load_reg,
 		     gen_rtx_PLUS (Pmode, highest_index_to_load_reg,
@@ -7112,15 +7132,21 @@ s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
       /* Emit a strict_low_part pattern if possible.  */
       if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
 	{
-	  rtx low_dest = s390_gen_lowpart_subreg (smode, dest);
-	  rtx low_src = gen_lowpart (smode, src);
-
-	  switch (smode)
+	  rtx low_dest = gen_lowpart (smode, dest);
+	  if (SUBREG_P (low_dest) && !paradoxical_subreg_p (low_dest))
 	    {
-	    case E_QImode: emit_insn (gen_movstrictqi (low_dest, low_src)); return true;
-	    case E_HImode: emit_insn (gen_movstricthi (low_dest, low_src)); return true;
-	    case E_SImode: emit_insn (gen_movstrictsi (low_dest, low_src)); return true;
-	    default: break;
+	      poly_int64 offset = GET_MODE_SIZE (mode) - GET_MODE_SIZE (smode);
+	      rtx low_src = adjust_address (src, smode, offset);
+	      switch (smode)
+		{
+		case E_QImode: emit_insn (gen_movstrictqi (low_dest, low_src));
+			       return true;
+		case E_HImode: emit_insn (gen_movstricthi (low_dest, low_src));
+			       return true;
+		case E_SImode: emit_insn (gen_movstrictsi (low_dest, low_src));
+			       return true;
+		default: break;
+		}
 	    }
 	}
 
@@ -7182,6 +7208,82 @@ s390_expand_mask_and_shift (rtx val, machine_mode mode, rtx count)
 			      NULL_RTX, 1, OPTAB_DIRECT);
 }
 
+/* Expand optab cstoreti4.  */
+
+void
+s390_expand_cstoreti4 (rtx dst, rtx cmp, rtx op1, rtx op2)
+{
+  rtx_code code = GET_CODE (cmp);
+
+  if (TARGET_VXE3)
+    {
+      rtx cond = s390_emit_compare (GET_MODE (cmp), code, op1, op2);
+      emit_insn (gen_movsicc (dst, cond, const1_rtx, const0_rtx));
+      return;
+    }
+
+  /* Prior VXE3 emulate the comparison.  For an (in)equality test exploit
+     VECTOR COMPARE EQUAL.  For a relational test, first compare the high part
+     via VECTOR ELEMENT COMPARE (LOGICAL).  If the high part does not equal,
+     then consume the CC immediatelly by a subsequent LOAD ON CONDITION.
+     Otherweise, if the high part equals, then perform a subsequent VECTOR
+     COMPARE HIGH LOGICAL followed by a LOAD ON CONDITION.  */
+
+  op1 = force_reg (V2DImode, simplify_gen_subreg (V2DImode, op1, TImode, 0));
+  op2 = force_reg (V2DImode, simplify_gen_subreg (V2DImode, op2, TImode, 0));
+
+  if (code == EQ || code == NE)
+    {
+      s390_expand_vec_compare_cc (dst, code, op1, op2, code == EQ);
+      return;
+    }
+
+  /* Normalize code into either GE(U) or GT(U).  */
+  if (code == LT || code == LE || code == LTU || code == LEU)
+    {
+      std::swap (op1, op2);
+      code = swap_condition (code);
+    }
+
+  /* For (un)signed comparisons
+     - high(op1) >= high(op2) instruction VECG op1, op2 sets CC1
+       if the relation does _not_ hold.
+     - high(op1) >  high(op2) instruction VECG op2, op1 sets CC1
+       if the relation holds.  */
+  if (code == GT || code == GTU)
+    std::swap (op1, op2);
+  machine_mode cc_mode = (code == GEU || code == GTU) ? CCUmode : CCSmode;
+  rtx lane0 = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
+  emit_insn (
+    gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
+		 gen_rtx_COMPARE (cc_mode,
+				  gen_rtx_VEC_SELECT (DImode, op1, lane0),
+				  gen_rtx_VEC_SELECT (DImode, op2, lane0))));
+  rtx ccs_reg = gen_rtx_REG (CCSmode, CC_REGNUM);
+  rtx lab = gen_label_rtx ();
+  s390_emit_jump (lab, gen_rtx_NE (VOIDmode, ccs_reg, const0_rtx));
+  /* At this point we have that high(op1) == high(op2).  Thus, test the low
+     part, now.  For unsigned comparisons
+     - low(op1) >= low(op2) instruction VCHLGS op2, op1 sets CC1
+       if the relation does _not_ hold.
+     - low(op1) >  low(op2) instruction VCHLGS op1, op2 sets CC1
+       if the relation holds.  */
+  std::swap (op1, op2);
+  emit_insn (gen_rtx_PARALLEL (
+    VOIDmode,
+    gen_rtvec (2,
+	       gen_rtx_SET (gen_rtx_REG (CCVIHUmode, CC_REGNUM),
+			    gen_rtx_COMPARE (CCVIHUmode, op1, op2)),
+	       gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (V2DImode)))));
+  emit_label (lab);
+  /* For (un)signed comparison >= any CC except CC1 means that the relation
+     holds.  For (un)signed comparison > only CC1 means that the relation
+     holds.  */
+  rtx_code cmp_code = (code == GE || code == GEU) ? UNGE : LT;
+  rtx cond = gen_rtx_fmt_ee (cmp_code, CCSmode, ccs_reg, const0_rtx);
+  emit_insn (gen_movsicc (dst, cond, const1_rtx, const0_rtx));
+}
+
 /* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store
    the result in TARGET.  */
 
@@ -7282,9 +7384,9 @@ s390_expand_vec_compare (rtx target, enum rtx_code cond,
 /* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into
    TARGET if either all (ALL_P is true) or any (ALL_P is false) of the
    elements in CMP1 and CMP2 fulfill the comparison.
-   This function is only used to emit patterns for the vx builtins and
-   therefore only handles comparison codes required by the
-   builtins.  */
+   This function is only used in s390_expand_cstoreti4 and to emit patterns for
+   the vx builtins and therefore only handles comparison codes required by
+   those.  */
 void
 s390_expand_vec_compare_cc (rtx target, enum rtx_code code,
 			    rtx cmp1, rtx cmp2, bool all_p)
@@ -7293,7 +7395,8 @@ s390_expand_vec_compare_cc (rtx target, enum rtx_code code,
   rtx tmp_reg = gen_reg_rtx (SImode);
   bool swap_p = false;
 
-  if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_INT)
+  if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_INT
+      || GET_MODE (cmp1) == TImode)
     {
       switch (code)
 	{
@@ -7764,8 +7867,7 @@ s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
 			    const0_rtx, ins))
 	{
 	  *seq1 = NULL;
-	  *seq2 = get_insns ();
-	  end_sequence ();
+	  *seq2 = end_sequence ();
 	  return tmp;
 	}
       end_sequence ();
@@ -7774,13 +7876,11 @@ s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
   /* Failed to use insv.  Generate a two part shift and mask.  */
   start_sequence ();
   tmp = s390_expand_mask_and_shift (ins, mode, ac->shift);
-  *seq1 = get_insns ();
-  end_sequence ();
+  *seq1 = end_sequence ();
 
   start_sequence ();
   tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT);
-  *seq2 = get_insns ();
-  end_sequence ();
+  *seq2 = end_sequence ();
 
   return tmp;
 }
@@ -7852,7 +7952,7 @@ s390_expand_cs_hqi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
       tmp = copy_to_reg (val);
       force_expand_binop (SImode, and_optab, res, ac.modemaski, val,
 			  1, OPTAB_DIRECT);
-      cc = s390_emit_compare (NE, val, tmp);
+      cc = s390_emit_compare (VOIDmode, NE, val, tmp);
       s390_emit_jump (csloop, cc);
 
       /* Failed.  */
@@ -8209,6 +8309,21 @@ s390_delegitimize_address (rtx orig_x)
 	return plus_constant (Pmode, XVECEXP (y, 0, 0), offset);
     }
 
+  if (GET_CODE (x) == CONST)
+    {
+      /* Extract the symbol ref from:
+	 (const:DI (unspec:DI [(symbol_ref:DI ("foo"))]
+				       UNSPEC_PLT/GOTENT))  */
+
+      y = XEXP (x, 0);
+      if (GET_CODE (y) == UNSPEC
+	  && (XINT (y, 1) == UNSPEC_GOTENT
+	      || XINT (y, 1) == UNSPEC_PLT31))
+	return XVECEXP (y, 0, 0);
+      else
+	return orig_x;
+    }
+
   if (GET_CODE (x) != MEM)
     return orig_x;
 
@@ -8581,7 +8696,7 @@ print_operand_address (FILE *file, rtx addr)
     'E': print opcode suffix for branch on index instruction.
     'G': print the size of the operand in bytes.
     'J': print tls_load/tls_gdcall/tls_ldcall suffix
-    'K': print @PLT suffix for call targets and load address values.
+    'K': print @PLT suffix for branch targets; do not use with larl.
     'M': print the second word of a TImode operand.
     'N': print the second word of a DImode operand.
     'O': print only the displacement of a memory reference or address.
@@ -8607,7 +8722,6 @@ print_operand_address (FILE *file, rtx addr)
     't': CONST_INT: "start" of contiguous bitmask X in SImode.
     'x': print integer X as if it's an unsigned halfword.
     'v': print register number as vector register (v1 instead of f1).
-    'V': print the second word of a TFmode operand as vector register.
 */
 
 void
@@ -8696,8 +8810,12 @@ print_operand (FILE *file, rtx x, int code)
       {
 	machine_mode mode;
 	short imm;
-	bool b = s390_constant_via_vrepi_p (x, &mode, &imm);
-	gcc_checking_assert (b);
+	if (!s390_constant_via_vrepi_p (x, &mode, &imm))
+	  {
+	    output_operand_lossage ("invalid constant for output "
+				    "modifier '%c'", code);
+	    return;
+	  }
 	switch (mode)
 	  {
 	  case QImode:
@@ -8722,8 +8840,12 @@ print_operand (FILE *file, rtx x, int code)
       {
 	machine_mode mode;
 	int start, end;
-	bool b = s390_constant_via_vgm_p (x, &mode, &start, &end);
-	gcc_checking_assert (b);
+	if (!s390_constant_via_vgm_p (x, &mode, &start, &end))
+	  {
+	    output_operand_lossage ("invalid constant for output "
+				    "modifier '%c'", code);
+	    return;
+	  }
 	switch (mode)
 	  {
 	  case QImode:
@@ -8747,8 +8869,12 @@ print_operand (FILE *file, rtx x, int code)
     case 'r':
       {
 	unsigned mask;
-	bool b = s390_constant_via_vgbm_p (x, &mask);
-	gcc_checking_assert (b);
+	if (!s390_constant_via_vgbm_p (x, &mask))
+	  {
+	    output_operand_lossage ("invalid constant for output "
+				    "modifier '%c'", code);
+	    return;
+	  }
 	fprintf (file, "%u", mask);
       }
       return;
@@ -8839,19 +8965,9 @@ print_operand (FILE *file, rtx x, int code)
 	 call even static functions via PLT.  ld will optimize @PLT away for
 	 normal code, and keep it for patches.
 
-	 Do not indiscriminately add @PLT in 31-bit mode due to the %r12
-	 restriction, use UNSPEC_PLT31 instead.
-
 	 @PLT only makes sense for functions, data is taken care of by
-	 -mno-pic-data-is-text-relative.
-
-	 Adding @PLT interferes with handling of weak symbols in non-PIC code,
-	 since their addresses are loaded with larl, which then always produces
-	 a non-NULL result, so skip them here as well.  */
-      if (TARGET_64BIT
-	  && GET_CODE (x) == SYMBOL_REF
-	  && SYMBOL_REF_FUNCTION_P (x)
-	  && !(SYMBOL_REF_WEAK (x) && !flag_pic))
+	 -mno-pic-data-is-text-relative.  */
+      if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (x))
 	fprintf (file, "@PLT");
       return;
     }
@@ -8861,13 +8977,13 @@ print_operand (FILE *file, rtx x, int code)
     case REG:
       /* Print FP regs as fx instead of vx when they are accessed
 	 through non-vector mode.  */
-      if ((code == 'v' || code == 'V')
+      if (code == 'v'
 	  || VECTOR_NOFP_REG_P (x)
 	  || (FP_REG_P (x) && VECTOR_MODE_P (GET_MODE (x)))
 	  || (VECTOR_REG_P (x)
 	      && (GET_MODE_SIZE (GET_MODE (x)) /
 		  s390_class_max_nregs (FP_REGS, GET_MODE (x))) > 8))
-	fprintf (file, "%%v%s", reg_names[REGNO (x) + (code == 'V')] + 2);
+	fprintf (file, "%%v%s", reg_names[REGNO (x)] + 2);
       else
 	fprintf (file, "%s", reg_names[REGNO (x)]);
       break;
@@ -9159,6 +9275,7 @@ s390_issue_rate (void)
     case PROCESSOR_3906_Z14:
     case PROCESSOR_8561_Z15:
     case PROCESSOR_3931_Z16:
+    case PROCESSOR_9175_Z17:
     default:
       return 1;
     }
@@ -11127,8 +11244,8 @@ s390_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
 	}
       break;
     case ADDR_REGS:
-      if (FRAME_REGNO_P (regno) && mode == Pmode)
-	return true;
+      if (FRAME_REGNO_P (regno))
+	return mode == Pmode;
 
       /* fallthrough */
     case GENERAL_REGS:
@@ -11350,13 +11467,6 @@ s390_can_change_mode_class (machine_mode from_mode,
   return true;
 }
 
-/* Return true if we use LRA instead of reload pass.  */
-static bool
-s390_lra_p (void)
-{
-  return s390_lra_flag;
-}
-
 /* Return true if register FROM can be eliminated via register TO.  */
 
 static bool
@@ -11696,8 +11806,7 @@ s390_load_got (void)
 
   emit_move_insn (got_rtx, s390_got_symbol ());
 
-  insns = get_insns ();
-  end_sequence ();
+  insns = end_sequence ();
   return insns;
 }
 
@@ -12702,7 +12811,7 @@ s390_expand_split_stack_prologue (void)
 	}
 
       /* Compare the (maybe adjusted) guard with the stack pointer.  */
-      cc = s390_emit_compare (LT, stack_pointer_rtx, guard);
+      cc = s390_emit_compare (VOIDmode, LT, stack_pointer_rtx, guard);
     }
 
   call_done = gen_label_rtx ();
@@ -13464,8 +13573,7 @@ s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
 
       start_sequence ();
       emit_move_insn (reg, gen_rtx_REG (Pmode, 1));
-      seq = get_insns ();
-      end_sequence ();
+      seq = end_sequence ();
 
       push_topmost_sequence ();
       emit_insn_after (seq, entry_of_function ());
@@ -14457,7 +14565,21 @@ s390_call_saved_register_used (tree call_expr)
 
 	  for (reg = 0; reg < nregs; reg++)
 	    if (!call_used_or_fixed_reg_p (reg + REGNO (parm_rtx)))
-	      return true;
+	      {
+		rtx parm;
+		/* Allow passing through unmodified value from caller,
+		   see PR119873.  */
+		if (TREE_CODE (parameter) == SSA_NAME
+		    && SSA_NAME_IS_DEFAULT_DEF (parameter)
+		    && SSA_NAME_VAR (parameter)
+		    && TREE_CODE (SSA_NAME_VAR (parameter)) == PARM_DECL
+		    && (parm = DECL_INCOMING_RTL (SSA_NAME_VAR (parameter)))
+		    && REG_P (parm)
+		    && REGNO (parm) == REGNO (parm_rtx)
+		    && REG_NREGS (parm) == REG_NREGS (parm_rtx))
+		  break;
+		return true;
+	      }
 	}
       else if (GET_CODE (parm_rtx) == PARALLEL)
 	{
@@ -14471,7 +14593,17 @@ s390_call_saved_register_used (tree call_expr)
 	      gcc_assert (REG_NREGS (r) == 1);
 
 	      if (!call_used_or_fixed_reg_p (REGNO (r)))
-		return true;
+		{
+		  rtx parm;
+		  if (TREE_CODE (parameter) == SSA_NAME
+		      && SSA_NAME_IS_DEFAULT_DEF (parameter)
+		      && SSA_NAME_VAR (parameter)
+		      && TREE_CODE (SSA_NAME_VAR (parameter)) == PARM_DECL
+		      && (parm = DECL_INCOMING_RTL (SSA_NAME_VAR (parameter)))
+		      && rtx_equal_p (parm_rtx, parm))
+		    break;
+		  return true;
+		}
 	    }
 	}
     }
@@ -14504,8 +14636,9 @@ s390_function_ok_for_sibcall (tree decl, tree exp)
     return false;
 
   /* Register 6 on s390 is available as an argument register but unfortunately
-     "caller saved". This makes functions needing this register for arguments
-     not suitable for sibcalls.  */
+     "caller saved".  This makes functions needing this register for arguments
+     not suitable for sibcalls, unless the same value is passed from the
+     caller.  */
   return !s390_call_saved_register_used (exp);
 }
 
@@ -15604,6 +15737,18 @@ s390_get_sched_attrmask (rtx_insn *insn)
       if (get_attr_z16_groupoftwo (insn))
 	mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO;
       break;
+    case PROCESSOR_9175_Z17:
+      if (get_attr_z17_cracked (insn))
+	mask |= S390_SCHED_ATTR_MASK_CRACKED;
+      if (get_attr_z17_expanded (insn))
+	mask |= S390_SCHED_ATTR_MASK_EXPANDED;
+      if (get_attr_z17_endgroup (insn))
+	mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
+      if (get_attr_z17_groupalone (insn))
+	mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
+      if (get_attr_z17_groupoftwo (insn))
+	mask |= S390_SCHED_ATTR_MASK_GROUPOFTWO;
+      break;
     default:
       gcc_unreachable ();
     }
@@ -15661,6 +15806,17 @@ s390_get_unit_mask (rtx_insn *insn, int *units)
       if (get_attr_z16_unit_vfu (insn))
 	mask |= 1 << 3;
       break;
+    case PROCESSOR_9175_Z17:
+      *units = 4;
+      if (get_attr_z17_unit_lsu (insn))
+	mask |= 1 << 0;
+      if (get_attr_z17_unit_fxa (insn))
+	mask |= 1 << 1;
+      if (get_attr_z17_unit_fxb (insn))
+	mask |= 1 << 2;
+      if (get_attr_z17_unit_vfu (insn))
+	mask |= 1 << 3;
+      break;
     default:
       gcc_unreachable ();
     }
@@ -15674,7 +15830,8 @@ s390_is_fpd (rtx_insn *insn)
     return false;
 
   return get_attr_z13_unit_fpd (insn) || get_attr_z14_unit_fpd (insn)
-    || get_attr_z15_unit_fpd (insn) || get_attr_z16_unit_fpd (insn);
+    || get_attr_z15_unit_fpd (insn) || get_attr_z16_unit_fpd (insn)
+    || get_attr_z17_unit_fpd (insn);
 }
 
 static bool
@@ -15684,7 +15841,8 @@ s390_is_fxd (rtx_insn *insn)
     return false;
 
   return get_attr_z13_unit_fxd (insn) || get_attr_z14_unit_fxd (insn)
-    || get_attr_z15_unit_fxd (insn) || get_attr_z16_unit_fxd (insn);
+    || get_attr_z15_unit_fxd (insn) || get_attr_z16_unit_fxd (insn)
+    || get_attr_z17_unit_fxd (insn);
 }
 
 /* Returns TRUE if INSN is a long-running instruction.  */
@@ -17883,9 +18041,34 @@ expand_perm_with_merge (const struct expand_vec_perm_d &d)
   static const unsigned char lo_perm_qi_swap[16]
     = {17, 1, 19, 3, 21, 5, 23, 7, 25, 9, 27, 11, 29, 13, 31, 15};
 
+  static const unsigned char hi_perm_qi_di[16]
+    = {0, 1, 2, 3, 4, 5, 6, 7, 16, 17, 18, 19, 20, 21, 22, 23};
+  static const unsigned char hi_perm_qi_si[16]
+    = {0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23};
+  static const unsigned char hi_perm_qi_hi[16]
+    = {0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23};
+
+  static const unsigned char lo_perm_qi_di[16]
+    = {8, 9, 10, 11, 12, 13, 14, 15, 24, 25, 26, 27, 28, 29, 30, 31};
+  static const unsigned char lo_perm_qi_si[16]
+    = {8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31};
+  static const unsigned char lo_perm_qi_hi[16]
+    = {8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31};
+
+  static const unsigned char hi_perm_hi_si[8] = {0, 1, 8, 9, 2, 3, 10, 11};
+  static const unsigned char hi_perm_hi_di[8] = {0, 1, 2, 3, 8, 9, 10, 11};
+
+  static const unsigned char lo_perm_hi_si[8] = {4, 5, 12, 13, 6, 7, 14, 15};
+  static const unsigned char lo_perm_hi_di[8] = {4, 5, 6, 7, 12, 13, 14, 15};
+
+  static const unsigned char hi_perm_si_di[4] = {0, 1, 4, 5};
+
+  static const unsigned char lo_perm_si_di[4] = {2, 3, 6, 7};
+
   bool merge_lo_p = false;
   bool merge_hi_p = false;
   bool swap_operands_p = false;
+  machine_mode mergemode = d.vmode;
 
   if ((d.nelt == 2 && memcmp (d.perm, hi_perm_di, 2) == 0)
       || (d.nelt == 4 && memcmp (d.perm, hi_perm_si, 4) == 0)
@@ -17917,6 +18100,75 @@ expand_perm_with_merge (const struct expand_vec_perm_d &d)
       merge_lo_p = true;
       swap_operands_p = true;
     }
+  else if (d.nelt == 16)
+    {
+      if (memcmp (d.perm, hi_perm_qi_di, 16) == 0)
+	{
+	  merge_hi_p = true;
+	  mergemode = E_V2DImode;
+	}
+      else if (memcmp (d.perm, hi_perm_qi_si, 16) == 0)
+	{
+	  merge_hi_p = true;
+	  mergemode = E_V4SImode;
+	}
+      else if (memcmp (d.perm, hi_perm_qi_hi, 16) == 0)
+	{
+	  merge_hi_p = true;
+	  mergemode = E_V8HImode;
+	}
+      else if (memcmp (d.perm, lo_perm_qi_di, 16) == 0)
+	{
+	  merge_lo_p = true;
+	  mergemode = E_V2DImode;
+	}
+      else if (memcmp (d.perm, lo_perm_qi_si, 16) == 0)
+	{
+	  merge_lo_p = true;
+	  mergemode = E_V4SImode;
+	}
+      else if (memcmp (d.perm, lo_perm_qi_hi, 16) == 0)
+	{
+	  merge_lo_p = true;
+	  mergemode = E_V8HImode;
+	}
+    }
+  else if (d.nelt == 8)
+    {
+      if (memcmp (d.perm, hi_perm_hi_di, 8) == 0)
+	{
+	  merge_hi_p = true;
+	  mergemode = E_V2DImode;
+	}
+      else if (memcmp (d.perm, hi_perm_hi_si, 8) == 0)
+	{
+	  merge_hi_p = true;
+	  mergemode = E_V4SImode;
+	}
+      else if (memcmp (d.perm, lo_perm_hi_di, 8) == 0)
+	{
+	  merge_lo_p = true;
+	  mergemode = E_V2DImode;
+	}
+      else if (memcmp (d.perm, lo_perm_hi_si, 8) == 0)
+	{
+	  merge_lo_p = true;
+	  mergemode = E_V4SImode;
+	}
+    }
+  else if (d.nelt == 4)
+    {
+      if (memcmp (d.perm, hi_perm_si_di, 4) == 0)
+	{
+	  merge_hi_p = true;
+	  mergemode = E_V2DImode;
+	}
+      else if (memcmp (d.perm, lo_perm_si_di, 4) == 0)
+	{
+	  merge_lo_p = true;
+	  mergemode = E_V2DImode;
+	}
+    }
 
   if (!merge_lo_p && !merge_hi_p)
     return false;
@@ -17924,7 +18176,7 @@ expand_perm_with_merge (const struct expand_vec_perm_d &d)
   if (d.testing_p)
     return merge_lo_p || merge_hi_p;
 
-  rtx op0, op1;
+  rtx op0, op1, target = d.target;
   if (swap_operands_p)
     {
       op0 = d.op1;
@@ -17935,12 +18187,80 @@ expand_perm_with_merge (const struct expand_vec_perm_d &d)
       op0 = d.op0;
       op1 = d.op1;
     }
+  if (mergemode != d.vmode)
+    {
+      target = simplify_gen_subreg (mergemode, target, d.vmode, 0);
+      op0 = simplify_gen_subreg (mergemode, op0, d.vmode, 0);
+      op1 = simplify_gen_subreg (mergemode, op1, d.vmode, 0);
+    }
 
-  s390_expand_merge (d.target, op0, op1, merge_hi_p);
+  s390_expand_merge (target, op0, op1, merge_hi_p);
 
   return true;
 }
 
+/* Try to expand the vector permute operation described by D using the vector
+   pack instruction vpk.  Return true if vector pack could be used.  */
+static bool
+expand_perm_with_pack (const struct expand_vec_perm_d &d)
+{
+  static const unsigned char qi_hi[16]
+    = {1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31};
+  static const unsigned char qi_si[16]
+    = {2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31};
+  static const unsigned char qi_di[16]
+    = {4, 5, 6, 7, 12, 13, 14, 15, 20, 21, 22, 23, 28, 29, 30, 31};
+
+  static const unsigned char hi_si[8]
+    = {1, 3, 5, 7, 9, 11, 13, 15};
+  static const unsigned char hi_di[8]
+    = {2, 3, 6, 7, 10, 11, 14, 15};
+
+  static const unsigned char si_di[4]
+    = {1, 3, 5, 7};
+
+  machine_mode packmode, resmode;
+  enum insn_code code = CODE_FOR_nothing;
+
+  if (d.nelt == 16 && memcmp (d.perm, qi_hi, 16) == 0)
+    {
+      packmode = E_V8HImode;
+      resmode = E_V16QImode;
+      code = CODE_FOR_vec_pack_trunc_v8hi;
+    }
+  else if ((d.nelt == 16 && memcmp (d.perm, qi_si, 16) == 0)
+	   || (d.nelt == 8 && memcmp (d.perm, hi_si, 8) == 0))
+    {
+      packmode = E_V4SImode;
+      resmode = E_V8HImode;
+      code = CODE_FOR_vec_pack_trunc_v4si;
+    }
+  else if ((d.nelt == 16 && memcmp (d.perm, qi_di, 16) == 0)
+	   || (d.nelt == 8 && memcmp (d.perm, hi_di, 8) == 0)
+	   || (d.nelt == 4 && memcmp (d.perm, si_di, 4) == 0))
+    {
+      packmode = E_V2DImode;
+      resmode = E_V4SImode;
+      code = CODE_FOR_vec_pack_trunc_v2di;
+    }
+
+  if (code == CODE_FOR_nothing)
+    return false;
+
+  if (d.testing_p)
+    return true;
+  rtx target = simplify_gen_subreg (resmode, d.target, d.vmode, 0);
+  rtx op0 = simplify_gen_subreg (packmode,
+				 force_reg (GET_MODE (d.op0), d.op0),
+				 d.vmode, 0);
+  rtx op1 = simplify_gen_subreg (packmode,
+				 force_reg (GET_MODE (d.op1), d.op1),
+				 d.vmode, 0);
+  rtx pat = GEN_FCN (code) (target, op0, op1);
+  emit_insn (pat);
+  return true;
+}
+
 /* Try to expand the vector permute operation described by D using the
    vector permute doubleword immediate instruction vpdi.  Return true
    if vpdi could be used.
@@ -18164,6 +18484,9 @@ vectorize_vec_perm_const_1 (const struct expand_vec_perm_d &d)
   if (expand_perm_with_merge (d))
     return true;
 
+  if (expand_perm_with_pack (d))
+    return true;
+
   if (expand_perm_with_vpdi (d))
     return true;
 
@@ -18349,6 +18672,8 @@ s390_c_mode_for_floating_type (enum tree_index ti)
 
 #undef TARGET_CANNOT_COPY_INSN_P
 #define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p
+#undef TARGET_INSN_COST
+#define TARGET_INSN_COST s390_insn_cost
 #undef TARGET_RTX_COSTS
 #define TARGET_RTX_COSTS s390_rtx_costs
 #undef TARGET_ADDRESS_COST
@@ -18452,9 +18777,6 @@ s390_c_mode_for_floating_type (enum tree_index ti)
 #undef TARGET_LEGITIMATE_CONSTANT_P
 #define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
 
-#undef TARGET_LRA_P
-#define TARGET_LRA_P s390_lra_p
-
 #undef TARGET_CAN_ELIMINATE
 #define TARGET_CAN_ELIMINATE s390_can_eliminate
 
@@ -18585,6 +18907,9 @@ s390_c_mode_for_floating_type (enum tree_index ti)
 #undef TARGET_C_MODE_FOR_FLOATING_TYPE
 #define TARGET_C_MODE_FOR_FLOATING_TYPE s390_c_mode_for_floating_type
 
+#undef TARGET_DOCUMENTATION_NAME
+#define TARGET_DOCUMENTATION_NAME "S/390"
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-s390.h"
diff --git a/gcc/config/s390/s390.h b/gcc/config/s390/s390.h
index 4a4dde1..8b04bc9 100644
--- a/gcc/config/s390/s390.h
+++ b/gcc/config/s390/s390.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler, for IBM S/390
-   Copyright (C) 1999-2024 Free Software Foundation, Inc.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
    Contributed by Hartmut Penner (hpenner@de.ibm.com) and
 		  Ulrich Weigand (uweigand@de.ibm.com).
 		  Andreas Krebbel (Andreas.Krebbel@de.ibm.com)
@@ -43,12 +43,14 @@ enum processor_flags
   PF_VXE2 = 8192,
   PF_Z15 = 16384,
   PF_NNPA = 32768,
-  PF_Z16 = 65536
+  PF_Z16 = 65536,
+  PF_VXE3 = 131072,
+  PF_Z17 = 262144
 };
 
 /* This is necessary to avoid a warning about comparing different enum
    types.  */
-#define s390_tune_attr ((enum attr_cpu)(s390_tune > PROCESSOR_3931_Z16 ? PROCESSOR_3931_Z16 : s390_tune ))
+#define s390_tune_attr ((enum attr_cpu)(s390_tune > PROCESSOR_9175_Z17 ? PROCESSOR_9175_Z17 : s390_tune ))
 
 /* These flags indicate that the generated code should run on a cpu
    providing the respective hardware facility regardless of the
@@ -118,6 +120,14 @@ enum processor_flags
 	(s390_arch_flags & PF_NNPA)
 #define TARGET_CPU_NNPA_P(opts) \
 	(opts->x_s390_arch_flags & PF_NNPA)
+#define TARGET_CPU_VXE3 \
+	(s390_arch_flags & PF_VXE3)
+#define TARGET_CPU_VXE3_P(opts) \
+	(opts->x_s390_arch_flags & PF_VXE3)
+#define TARGET_CPU_Z17 \
+	(s390_arch_flags & PF_Z17)
+#define TARGET_CPU_Z17_P(opts) \
+	(opts->x_s390_arch_flags & PF_Z17)
 
 #define TARGET_HARD_FLOAT_P(opts) (!TARGET_SOFT_FLOAT_P(opts))
 
@@ -184,6 +194,13 @@ enum processor_flags
 	(TARGET_ZARCH && TARGET_CPU_NNPA)
 #define TARGET_NNPA_P(opts)						\
 	(TARGET_ZARCH_P (opts) && TARGET_CPU_NNPA_P (opts))
+#define TARGET_VXE3 \
+	(TARGET_VX && TARGET_CPU_VXE3)
+#define TARGET_VXE3_P(opts)						\
+	(TARGET_VX_P (opts) && TARGET_CPU_VXE3_P (opts))
+#define TARGET_Z17 (TARGET_ZARCH && TARGET_CPU_Z17)
+#define TARGET_Z17_P(opts)						\
+	(TARGET_ZARCH_P (opts->x_target_flags) && TARGET_CPU_Z17_P (opts))
 
 #if defined(HAVE_AS_VECTOR_LOADSTORE_ALIGNMENT_HINTS_ON_Z13)
 #define TARGET_VECTOR_LOADSTORE_ALIGNMENT_HINTS TARGET_Z13
@@ -288,37 +305,6 @@ extern const char *s390_host_detect_local_cpu (int argc, const char **argv);
   "%{!mesa:%{!mzarch:%{m31:-mesa}%{m64:-mzarch}}}",		\
   "%{!march=*:-march=z900}"
 
-/* Constants needed to control the TEST DATA CLASS (TDC) instruction.  */
-#define S390_TDC_POSITIVE_ZERO                     (1 << 11)
-#define S390_TDC_NEGATIVE_ZERO                     (1 << 10)
-#define S390_TDC_POSITIVE_NORMALIZED_BFP_NUMBER    (1 << 9)
-#define S390_TDC_NEGATIVE_NORMALIZED_BFP_NUMBER    (1 << 8)
-#define S390_TDC_POSITIVE_DENORMALIZED_BFP_NUMBER  (1 << 7)
-#define S390_TDC_NEGATIVE_DENORMALIZED_BFP_NUMBER  (1 << 6)
-#define S390_TDC_POSITIVE_INFINITY                 (1 << 5)
-#define S390_TDC_NEGATIVE_INFINITY                 (1 << 4)
-#define S390_TDC_POSITIVE_QUIET_NAN                (1 << 3)
-#define S390_TDC_NEGATIVE_QUIET_NAN                (1 << 2)
-#define S390_TDC_POSITIVE_SIGNALING_NAN            (1 << 1)
-#define S390_TDC_NEGATIVE_SIGNALING_NAN            (1 << 0)
-
-/* The following values are different for DFP.  */
-#define S390_TDC_POSITIVE_DENORMALIZED_DFP_NUMBER (1 << 9)
-#define S390_TDC_NEGATIVE_DENORMALIZED_DFP_NUMBER (1 << 8)
-#define S390_TDC_POSITIVE_NORMALIZED_DFP_NUMBER   (1 << 7)
-#define S390_TDC_NEGATIVE_NORMALIZED_DFP_NUMBER   (1 << 6)
-
-/* For signbit, the BFP-DFP-difference makes no difference. */
-#define S390_TDC_SIGNBIT_SET (S390_TDC_NEGATIVE_ZERO \
-			  | S390_TDC_NEGATIVE_NORMALIZED_BFP_NUMBER \
-			  | S390_TDC_NEGATIVE_DENORMALIZED_BFP_NUMBER\
-			  | S390_TDC_NEGATIVE_INFINITY \
-			  | S390_TDC_NEGATIVE_QUIET_NAN \
-			  | S390_TDC_NEGATIVE_SIGNALING_NAN )
-
-#define S390_TDC_INFINITY (S390_TDC_POSITIVE_INFINITY \
-			  | S390_TDC_NEGATIVE_INFINITY )
-
 /* Target machine storage layout.  */
 
 /* Everything is big-endian.  */
@@ -1017,6 +1003,9 @@ do {									\
 /* Specify the value which is used when clz operand is zero.  */
 #define CLZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 64, 1)
 
+/* Specify the value which is used when ctz operand is zero.  */
+#define CTZ_DEFINED_VALUE_AT_ZERO(MODE, VALUE) ((VALUE) = 64, 1)
+
 /* Machine-specific symbol_ref flags.  */
 #define SYMBOL_FLAG_ALIGN_SHIFT	  SYMBOL_FLAG_MACH_DEP_SHIFT
 #define SYMBOL_FLAG_ALIGN_MASK    \
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index 3d5759d..02bc149 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -1,5 +1,5 @@
 ;;- Machine description for GNU compiler -- S/390 / zSeries version.
-;;  Copyright (C) 1999-2024 Free Software Foundation, Inc.
+;;  Copyright (C) 1999-2025 Free Software Foundation, Inc.
 ;;  Contributed by Hartmut Penner (hpenner@de.ibm.com) and
 ;;                 Ulrich Weigand (uweigand@de.ibm.com) and
 ;;                 Andreas Krebbel (Andreas.Krebbel@de.ibm.com)
@@ -125,6 +125,9 @@
    ; Byte-wise Population Count
    UNSPEC_POPCNT
 
+   UNSPEC_BDEPG
+   UNSPEC_BEXTG
+
    ; Load FP Integer
    UNSPEC_FPINT_FLOOR
    UNSPEC_FPINT_BTRUNC
@@ -136,9 +139,6 @@
    UNSPEC_LCBB
 
    ; Vector
-   UNSPEC_VEC_SMULT_HI
-   UNSPEC_VEC_UMULT_HI
-   UNSPEC_VEC_SMULT_LO
    UNSPEC_VEC_SMULT_EVEN
    UNSPEC_VEC_UMULT_EVEN
    UNSPEC_VEC_SMULT_ODD
@@ -238,8 +238,11 @@
 
    UNSPEC_VEC_MSUM
 
-   UNSPEC_VEC_VFMIN
-   UNSPEC_VEC_VFMAX
+   UNSPEC_VEC_VBLEND
+   UNSPEC_VEC_VEVAL
+   UNSPEC_VEC_VGEM
+
+   UNSPEC_TF_TO_FPRX2
 
    UNSPEC_NNPA_VCLFNHS_V8HI
    UNSPEC_NNPA_VCLFNLS_V8HI
@@ -247,6 +250,9 @@
 
    UNSPEC_NNPA_VCFN_V8HI
    UNSPEC_NNPA_VCNF_V8HI
+
+   UNSPEC_FMAX
+   UNSPEC_FMIN
 ])
 
 ;;
@@ -302,6 +308,9 @@
    UNSPECV_SPLIT_STACK_CALL
 
    UNSPECV_OSC_BREAK
+
+   ; Stack Protector
+   UNSPECV_SP_GET_TP
   ])
 
 ;;
@@ -359,6 +368,9 @@
    (VR23_REGNUM                 45)
    (VR24_REGNUM                 46)
    (VR31_REGNUM                 53)
+   ; Access registers
+   (AR0_REGNUM                  36)
+   (AR1_REGNUM                  37)
   ])
 
 ; Rounding modes for binary floating point numbers
@@ -432,6 +444,81 @@
 (define_constants [(TBEGIN_MASK  65292)]) ; 0xff0c
 (define_constants [(TBEGINC_MASK 65288)]) ; 0xff08
 
+;; TEST DATA CLASS
+
+; Data class bitmap:
+;
+; positive zero                    11
+; negative zero                    10
+; positive normalized bfp number    9
+; negative normalized bfp number    8
+; positive denormalized bfp number  7
+; negative denormalized bfp number  6
+; positive infinity                 5
+; negative infinity                 4
+; positive quiet nan                3
+; negative quiet nan                2
+; positive signaling nan            1
+; negative signaling nan            0
+;
+; The following values are different for DFP:
+;
+; positive denormalized dfp number  9
+; negative denormalized dfp number  8
+; positive normalized dfp number    7
+; negative normalized dfp number    6
+
+; For signbit, the BFP-DFP-difference makes no difference.
+; S390_TDC_SIGNBIT_SET = negative_zero
+;                        | negative_normalized_bfp_number
+;                        | negative_denormalized_bfp_number
+;                        | negative_infinity
+;                        | negative_quiet_nan
+;                        | negative_signaling_nan
+;                      = 1365
+;
+; For finite, the BFP-DFP-difference makes no difference.
+; S390_TDC_FINITE = positive_zero
+;                   | negative_zero
+;                   | positive_normalized_bfp_number
+;                   | negative_normalized_bfp_number
+;                   | positive_denormalized_bfp_number
+;                   | negative_denormalized_bfp_number
+;                 = 4032
+;
+; S390_TDC_INFINITY = positive_infinity
+;                     | negative_infinity
+;                   = 48
+;
+; S390_TDC_NORMAL_BFP = positive_normalized_bfp_number
+;                       | negative_normalized_bfp_number
+;                     = 768
+;
+; S390_TDC_NORMAL_DFP = positive_normalized_dfp_number
+;                       | negative_normalized_dfp_number
+;                     = 192
+
+(define_constants [(S390_TDC_SIGNBIT_SET 1365)
+		   (S390_TDC_FINITE 4032)
+		   (S390_TDC_INFINITY 48)
+		   (S390_TDC_NORMAL_BFP 768)
+		   (S390_TDC_NORMAL_DFP 192)])
+
+(define_int_iterator TDC_CLASS [S390_TDC_SIGNBIT_SET
+				S390_TDC_FINITE
+				S390_TDC_INFINITY])
+
+(define_int_iterator TDC_CLASS_BFP [S390_TDC_SIGNBIT_SET
+				    S390_TDC_FINITE
+				    S390_TDC_INFINITY
+				    S390_TDC_NORMAL_BFP])
+
+(define_int_attr tdc_insn [(S390_TDC_SIGNBIT_SET "signbit")
+			   (S390_TDC_FINITE "isfinite")
+			   (S390_TDC_INFINITY "isinf")
+			   (S390_TDC_NORMAL_BFP "isnormal")
+			   (S390_TDC_NORMAL_DFP "isnormal")])
+
 ;; Instruction operand type as used in the Principles of Operation.
 ;; Used to determine defaults for length and other attribute values.
 
@@ -515,11 +602,11 @@
 ;; Processor type.  This attribute must exactly match the processor_type
 ;; enumeration in s390.h.
 
-(define_attr "cpu" "z900,z990,z9_109,z9_ec,z10,z196,zEC12,z13,z14,z15,z16"
+(define_attr "cpu" "z900,z990,z9_109,z9_ec,z10,z196,zEC12,z13,z14,z15,z16,z17"
   (const (symbol_ref "s390_tune_attr")))
 
 (define_attr "cpu_facility"
-  "standard,ieee,zarch,cpu_zarch,longdisp,extimm,dfp,z10,z196,zEC12,vx,z13,z14,vxe,z15,vxe2,z16,nnpa"
+  "standard,ieee,zarch,cpu_zarch,longdisp,extimm,dfp,z10,z196,zEC12,vx,z13,z14,vxe,z15,vxe2,z16,nnpa,vxe3,z17"
   (const_string "standard"))
 
 (define_attr "enabled" ""
@@ -592,6 +679,14 @@
          (and (eq_attr "cpu_facility" "nnpa")
 	      (match_test "TARGET_NNPA"))
 	 (const_int 1)
+
+	 (and (eq_attr "cpu_facility" "vxe3")
+	      (match_test "TARGET_VXE3"))
+	 (const_int 1)
+
+	 (and (eq_attr "cpu_facility" "z17")
+	      (match_test "TARGET_Z17"))
+	 (const_int 1)
 ]
 	(const_int 0)))
 
@@ -633,6 +728,9 @@
 ;; Pipeline description for z16
 (include "3931.md")
 
+;; Pipeline description for z17
+(include "9175.md")
+
 ;; Predicates
 (include "predicates.md")
 
@@ -898,6 +996,10 @@
 (define_mode_attr asm_fcmp [(CCVEQ "e") (CCVFH "h") (CCVFHE "he")])
 (define_mode_attr insn_cmp [(CCVEQ "eq") (CCVIH "h") (CCVIHU "hl") (CCVFH "h") (CCVFHE "he")])
 
+(define_mode_iterator CC_SUZ [CCS CCU CCZ])
+(define_mode_attr l [(CCS "") (CCU "l") (CCZ "")])
+(define_mode_attr cc_tolower [(CCS "ccs") (CCU "ccu") (CCZ "ccz")])
+
 ; Analogue to TOINTVEC / tointvec
 (define_mode_attr TOINT [(TF "TI") (DF "DI") (SF "SI")])
 (define_mode_attr toint [(TF "ti") (DF "di") (SF "si")])
@@ -1909,7 +2011,7 @@
    vlgvg\t%0,%v1,0
    vleg\t%v0,%1,0
    vsteg\t%v1,%0,0
-   larl\t%0,%1%K1"
+   larl\t%0,%1"
   [(set_attr "op_type" "RI,RI,RI,RI,RI,RIL,RIL,RIL,RRE,RRE,RRE,RXY,RIL,RRE,RXY,
                         RXY,RR,RX,RXY,RX,RXY,RIL,SIL,*,*,RS,RS,VRI,VRR,VRS,VRS,
                         VRX,VRX,RIL")
@@ -1953,6 +2055,111 @@
                               *,*,yes")
 ])
 
+; LOAD INDEXED ADDRESS
+; lxab, lxah, lxaf, lxag, lxaq
+
+(define_int_iterator LXAMODEITER [1 2 3 4])
+(define_int_attr lxamode [(1 "h") (2 "f") (3 "g") (4 "q")])
+
+; see testsuite/gcc.target/s390/lxa-1.c
+(define_insn "*lxa<lxamode>_index"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(ashift:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "a"))
+		   (const_int LXAMODEITER)))]
+  "TARGET_Z17 && TARGET_64BIT"
+  "lxa<lxamode>\t%0,0(%1,0)"
+  [(set_attr "op_type" "RXY")])
+
+; see testsuite/gcc.target/s390/lxa-2.c
+(define_insn "*lxa<lxamode>_displacement_index"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(ashift:DI (sign_extend:DI (plus:SI (match_operand:SI 1 "register_operand" "a")
+					    (match_operand:SI 2 "const_int_operand")))
+		   (const_int LXAMODEITER)))]
+  "TARGET_Z17 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF"
+  "lxa<lxamode>\t%0,%2(%1,0)"
+  [(set_attr "op_type" "RXY")])
+
+; see testsuite/gcc.target/s390/lxa-3.c
+(define_insn "*lxa<lxamode>_index_base"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(plus:DI (ashift:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "a"))
+			    (const_int LXAMODEITER))
+		 (match_operand:DI 2 "register_operand" "a")))]
+  "TARGET_Z17 && TARGET_64BIT"
+  "lxa<lxamode>\t%0,0(%1,%2)"
+  [(set_attr "op_type" "RXY")])
+
+; see testsuite/gcc.target/s390/lxa-4.c
+(define_insn "*lxa<lxamode>_displacement_index_base"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(plus:DI (ashift:DI (sign_extend:DI (plus:SI (match_operand:SI 1 "register_operand" "a")
+						     (match_operand:SI 2 "const_int_operand")))
+			    (const_int LXAMODEITER))
+		 (match_operand:DI 3 "register_operand" "a")))]
+  "TARGET_Z17 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF"
+  "lxa<lxamode>\t%0,%2(%1,%3)"
+  [(set_attr "op_type" "RXY")])
+
+(define_insn "*lxab_displacement_index_base"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(plus:DI (sign_extend:DI (plus:SI (match_operand:SI 1 "register_operand" "a")
+					  (match_operand:SI 2 "const_int_operand")))
+		 (match_operand:DI 3 "register_operand" "a")))]
+  "TARGET_Z17 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF"
+  "lxab\t%0,%2(%1,%3)"
+  [(set_attr "op_type" "RXY")])
+
+; LOAD LOGICAL INDEXED ADDRESS
+; llxab, llxah, llxaf, llxag, llxaq
+
+(define_int_attr LLXAMASK [(1 "8589934590") (2 "17179869180") (3 "34359738360") (4 "68719476720")])
+
+; see testsuite/gcc.target/s390/llxa-1.c
+(define_insn "*llxa<lxamode>_displacement_index"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(and:DI (ashift:DI (subreg:DI (plus:SI (match_operand:SI 1 "register_operand" "a")
+					       (match_operand:SI 2 "const_int_operand"))
+				      0)
+			   (const_int LXAMODEITER))
+		(const_int <LLXAMASK>)))]
+  "TARGET_Z17 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF"
+  "llxa<lxamode>\t%0,%2(%1,0)"
+  [(set_attr "op_type" "RXY")])
+
+; see testsuite/gcc.target/s390/llxa-2.c
+(define_insn "*llxa<lxamode>_index_base"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(plus:DI (and:DI (ashift:DI (match_operand:DI 1 "register_operand" "a")
+				    (const_int LXAMODEITER))
+			 (const_int <LLXAMASK>))
+		 (match_operand:DI 2 "register_operand" "a")))]
+  "TARGET_Z17 && TARGET_64BIT"
+  "llxa<lxamode>\t%0,0(%1,%2)"
+  [(set_attr "op_type" "RXY")])
+
+; see testsuite/gcc.target/s390/llxa-3.c
+(define_insn "*llxa<lxamode>_displacement_index_base"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(plus:DI (and:DI (ashift:DI (subreg:DI (plus:SI (match_operand:SI 1 "register_operand" "a")
+							(match_operand:SI 2 "const_int_operand"))
+					       0)
+				    (const_int LXAMODEITER))
+			 (const_int <LLXAMASK>))
+		 (match_operand:DI 3 "register_operand" "a")))]
+  "TARGET_Z17 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF"
+  "llxa<lxamode>\t%0,%2(%1,%3)"
+  [(set_attr "op_type" "RXY")])
+
+(define_insn "*llxab_displacement_index_base"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(plus:DI (zero_extend:DI (plus:SI (match_operand:SI 1 "register_operand" "a")
+					  (match_operand:SI 2 "const_int_operand")))
+		 (match_operand:DI 3 "register_operand" "a")))]
+  "TARGET_Z17 && TARGET_64BIT && INTVAL (operands[2]) >= -0x80000 && INTVAL (operands[2]) <= 0x7FFFF"
+  "llxab\t%0,%2(%1,%3)"
+  [(set_attr "op_type" "RXY")])
+
 ; Splitters for loading TLS pointer from UNSPEC_GET_TP.
 ; UNSPEC_GET_TP is used instead of %a0:P, since the latter is a hard register,
 ; and those are not handled by Partial Redundancy Elimination (gcse.cc), which
@@ -1974,12 +2181,11 @@
   "TARGET_ZARCH"
   "#"
   "&& reload_completed"
-  [(set (match_dup 2) (match_dup 4))
+  [(set (match_dup 2) (match_dup 3))
    (set (match_dup 0) (ashift:DI (match_dup 0) (const_int 32)))
-   (set (strict_low_part (match_dup 3)) (match_dup 5))]
+   (set (strict_low_part (match_dup 2)) (match_dup 4))]
   "operands[2] = gen_lowpart (SImode, operands[0]);
-   operands[3] = s390_gen_lowpart_subreg (SImode, operands[0]);
-   s390_split_access_reg (operands[1], &operands[5], &operands[4]);")
+   s390_split_access_reg (operands[1], &operands[4], &operands[3]);")
 
 ; Splitters for storing TLS pointer to %a0:DI.
 
@@ -2194,7 +2400,7 @@
         (match_operand:SI 1 "larl_operand" "X"))]
   "!TARGET_64BIT
    && !FP_REG_P (operands[0])"
-  "larl\t%0,%1%K1"
+  "larl\t%0,%1"
    [(set_attr "op_type" "RIL")
     (set_attr "type"    "larl")
     (set_attr "z10prop" "z10_fwd_A1")
@@ -3398,7 +3604,7 @@
         (match_operand:BLK 1 "memory_operand" ""))
    (use (match_operand 2 "const_int_operand" ""))
    (use (match_operand 3 "immediate_operand" ""))
-   (clobber (scratch))]
+   (clobber (match_scratch 4))]
   "reload_completed"
   [(parallel
     [(set (match_dup 0) (match_dup 1))
@@ -3410,7 +3616,7 @@
         (match_operand:BLK 1 "memory_operand" ""))
    (use (match_operand 2 "register_operand" ""))
    (use (match_operand 3 "memory_operand" ""))
-   (clobber (scratch))]
+   (clobber (match_scratch 4))]
   "reload_completed"
   [(parallel
     [(unspec [(match_dup 2) (match_dup 3)
@@ -3424,14 +3630,14 @@
         (match_operand:BLK 1 "memory_operand" ""))
    (use (match_operand 2 "register_operand" ""))
    (use (const:BLK (unspec:BLK [(const_int 0)] UNSPEC_INSN)))
-   (clobber (scratch))]
+   (clobber (match_scratch 3))]
   "TARGET_Z10 && reload_completed"
   [(parallel
     [(unspec [(match_dup 2) (const_int 0)
-              (label_ref (match_dup 3))] UNSPEC_EXECUTE)
+	      (label_ref (match_dup 4))] UNSPEC_EXECUTE)
      (set (match_dup 0) (match_dup 1))
      (use (const_int 1))])]
-  "operands[3] = gen_label_rtx ();")
+  "operands[4] = gen_label_rtx ();")
 
 (define_split
   [(set (match_operand:BLK 0 "memory_operand" "")
@@ -3549,29 +3755,32 @@
 ; Test data class.
 ;
 
-(define_expand "signbit<mode>2<tf_fpr>"
+(define_expand "<TDC_CLASS:tdc_insn><mode>2<tf_fpr>"
   [(set (reg:CCZ CC_REGNUM)
-        (unspec:CCZ [(match_operand:FP_ALL 1 "register_operand" "f")
-                     (match_dup 2)]
-                     UNSPEC_TDC_INSN))
+	(unspec:CCZ [(match_operand:FP_ALL 1 "register_operand" "f")
+		     (const_int TDC_CLASS)]
+		     UNSPEC_TDC_INSN))
    (set (match_operand:SI 0 "register_operand" "=d")
-        (unspec:SI [(reg:CCZ CC_REGNUM)] UNSPEC_CC_TO_INT))]
-  "TARGET_HARD_FLOAT"
-{
-  operands[2] = GEN_INT (S390_TDC_SIGNBIT_SET);
-})
+	(unspec:SI [(reg:CCZ CC_REGNUM)] UNSPEC_CC_TO_INT))]
+  "TARGET_HARD_FLOAT")
 
-(define_expand "isinf<mode>2<tf_fpr>"
+(define_expand "isnormal<mode>2<tf_fpr>"
   [(set (reg:CCZ CC_REGNUM)
-        (unspec:CCZ [(match_operand:FP_ALL 1 "register_operand" "f")
-                     (match_dup 2)]
-                     UNSPEC_TDC_INSN))
+	(unspec:CCZ [(match_operand:BFP 1 "register_operand" "f")
+		     (const_int S390_TDC_NORMAL_BFP)]
+		     UNSPEC_TDC_INSN))
    (set (match_operand:SI 0 "register_operand" "=d")
-        (unspec:SI [(reg:CCZ CC_REGNUM)] UNSPEC_CC_TO_INT))]
-  "TARGET_HARD_FLOAT"
-{
-  operands[2] = GEN_INT (S390_TDC_INFINITY);
-})
+	(unspec:SI [(reg:CCZ CC_REGNUM)] UNSPEC_CC_TO_INT))]
+  "TARGET_HARD_FLOAT")
+
+(define_expand "isnormal<mode>2"
+  [(set (reg:CCZ CC_REGNUM)
+	(unspec:CCZ [(match_operand:DFP_ALL 1 "register_operand" "f")
+		     (const_int S390_TDC_NORMAL_DFP)]
+		     UNSPEC_TDC_INSN))
+   (set (match_operand:SI 0 "register_operand" "=d")
+	(unspec:SI [(reg:CCZ CC_REGNUM)] UNSPEC_CC_TO_INT))]
+  "TARGET_HARD_DFP")
 
 ; This extracts CC into a GPR properly shifted.  The actual IPM
 ; instruction will be issued by reload.  The constraint of operand 1
@@ -3650,7 +3859,7 @@
         (const_int 0))
    (use (match_operand 1 "const_int_operand" ""))
    (use (match_operand 2 "immediate_operand" ""))
-   (clobber (scratch))
+   (clobber (match_scratch 3))
    (clobber (reg:CC CC_REGNUM))]
   "reload_completed"
   [(parallel
@@ -3664,7 +3873,7 @@
         (const_int 0))
    (use (match_operand 1 "register_operand" ""))
    (use (match_operand 2 "memory_operand" ""))
-   (clobber (scratch))
+   (clobber (match_scratch 3))
    (clobber (reg:CC CC_REGNUM))]
   "reload_completed"
   [(parallel
@@ -3680,7 +3889,7 @@
         (const_int 0))
    (use (match_operand 1 "register_operand" ""))
    (use (const:BLK (unspec:BLK [(const_int 0)] UNSPEC_INSN)))
-   (clobber (scratch))
+   (clobber (match_scratch 2))
    (clobber (reg:CC CC_REGNUM))]
   "TARGET_Z10 && reload_completed"
   [(parallel
@@ -3845,7 +4054,7 @@
                      (match_operand:BLK 1 "memory_operand" "")))
    (use (match_operand 2 "const_int_operand" ""))
    (use (match_operand 3 "immediate_operand" ""))
-   (clobber (scratch))]
+   (clobber (match_scratch 4))]
   "reload_completed"
   [(parallel
     [(set (reg:CCU CC_REGNUM) (compare:CCU (match_dup 0) (match_dup 1)))
@@ -3858,7 +4067,7 @@
                      (match_operand:BLK 1 "memory_operand" "")))
    (use (match_operand 2 "register_operand" ""))
    (use (match_operand 3 "memory_operand" ""))
-   (clobber (scratch))]
+   (clobber (match_scratch 4))]
   "reload_completed"
   [(parallel
     [(unspec [(match_dup 2) (match_dup 3)
@@ -3873,7 +4082,7 @@
                      (match_operand:BLK 1 "memory_operand" "")))
    (use (match_operand 2 "register_operand" ""))
    (use (const:BLK (unspec:BLK [(const_int 0)] UNSPEC_INSN)))
-   (clobber (scratch))]
+   (clobber (match_scratch 3))]
   "TARGET_Z10 && reload_completed"
   [(parallel
     [(unspec [(match_dup 2) (const_int 0)
@@ -4733,6 +4942,33 @@
    (set_attr "z10prop" "z10_fwd_A1")])
 
 ;
+; BIT DEPOSIT
+;
+
+(define_insn "bdepg"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "d")
+		    (match_operand:DI 2 "register_operand" "d")]
+		   UNSPEC_BDEPG))]
+  "TARGET_Z17 && TARGET_64BIT"
+  "bdepg\t%0,%1,%2"
+  [(set_attr "op_type" "RRF")])
+
+;
+; BIT EXTRACT
+;
+
+(define_insn "bextg"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(unspec:DI [(match_operand:DI 1 "register_operand" "d")
+		    (match_operand:DI 2 "register_operand" "d")]
+		   UNSPEC_BEXTG))]
+  "TARGET_Z17 && TARGET_64BIT"
+  "bextg\t%0,%1,%2"
+  [(set_attr "op_type" "RRF")])
+
+
+;
 ; extendsidi2 instruction pattern(s).
 ;
 
@@ -5068,7 +5304,7 @@
    (parallel
     [(set (strict_low_part (match_dup 2)) (match_dup 1))
      (clobber (reg:CC CC_REGNUM))])]
-  "operands[2] = s390_gen_lowpart_subreg (HImode, operands[0]);")
+  "operands[2] = gen_lowpart (HImode, operands[0]);")
 
 (define_insn_and_split "*zero_extendqisi2_31"
   [(set (match_operand:SI 0 "register_operand" "=&d")
@@ -5078,7 +5314,7 @@
   "&& reload_completed"
   [(set (match_dup 0) (const_int 0))
    (set (strict_low_part (match_dup 2)) (match_dup 1))]
-  "operands[2] = s390_gen_lowpart_subreg (QImode, operands[0]);")
+  "operands[2] = gen_lowpart (QImode, operands[0]);")
 
 ;
 ; zero_extendqihi2 instruction pattern(s).
@@ -5110,7 +5346,7 @@
   "&& reload_completed"
   [(set (match_dup 0) (const_int 0))
    (set (strict_low_part (match_dup 2)) (match_dup 1))]
-  "operands[2] = s390_gen_lowpart_subreg (QImode, operands[0]);")
+  "operands[2] = gen_lowpart (QImode, operands[0]);")
 
 ;
 ; fixuns_trunc(dd|td|sf|df|tf)(si|di)2 expander
@@ -6000,14 +6236,14 @@
                         z10_super_E1,z10_super_E1,z10_super_E1")])
 
 ; alr, alfi, slfi, al, aly, alrk, alhsik, algr, algfi, slgfi, alg, alsi, algsi, algrk, alghsik
-(define_insn "*add<mode>3_carry1_cc"
-  [(set (reg CC_REGNUM)
-        (compare (plus:GPR (match_operand:GPR 1 "nonimmediate_operand" "%0,d, 0, 0,d,0,0,0")
-			   (match_operand:GPR 2 "general_operand"      " d,d,Op,On,K,R,T,C"))
-                 (match_dup 1)))
-   (set (match_operand:GPR 0 "nonimmediate_operand"                    "=d,d, d, d,d,d,d,d")
+(define_insn "add<mode>3_carry1_cc"
+  [(set (reg:CCL1 CC_REGNUM)
+        (compare:CCL1 (plus:GPR (match_operand:GPR 1 "nonimmediate_operand" "%0,d, 0, 0,d,0,0,0")
+				(match_operand:GPR 2 "general_operand"      " d,d,Op,On,K,R,T,C"))
+		      (match_dup 1)))
+   (set (match_operand:GPR 0 "nonimmediate_operand"                         "=d,d, d, d,d,d,d,d")
         (plus:GPR (match_dup 1) (match_dup 2)))]
-  "s390_match_ccmode (insn, CCL1mode)"
+  ""
   "@
    al<g>r\t%0,%2
    al<g>rk\t%0,%1,%2
@@ -6540,14 +6776,14 @@
    (set_attr "z10prop" "z10_super_c_E1,*,z10_super_E1,z10_super_E1")])
 
 ; slr, sl, sly, slgr, slg, slrk, slgrk
-(define_insn "*sub<mode>3_borrow_cc"
-  [(set (reg CC_REGNUM)
-        (compare (minus:GPR (match_operand:GPR 1 "register_operand" "0,d,0,0")
-			    (match_operand:GPR 2 "general_operand"  "d,d,R,T"))
-                 (match_dup 1)))
-   (set (match_operand:GPR 0 "register_operand"                    "=d,d,d,d")
+(define_insn "sub<mode>3_borrow_cc"
+  [(set (reg:CCL2 CC_REGNUM)
+        (compare:CCL2 (minus:GPR (match_operand:GPR 1 "register_operand" "0,d,0,0")
+				 (match_operand:GPR 2 "general_operand"  "d,d,R,T"))
+		      (match_dup 1)))
+   (set (match_operand:GPR 0 "register_operand"                         "=d,d,d,d")
         (minus:GPR (match_dup 1) (match_dup 2)))]
-  "s390_match_ccmode (insn, CCL2mode)"
+  ""
   "@
    sl<g>r\t%0,%2
    sl<g>rk\t%0,%1,%2
@@ -6753,22 +6989,43 @@
 ; add(di|si)cc instruction pattern(s).
 ;
 
+(define_expand "uaddc<mode>5"
+  [(match_operand:GPR 0 "register_operand")
+   (match_operand:GPR 1 "nonimmediate_operand")
+   (match_operand:GPR 2 "nonimmediate_operand")
+   (match_operand:GPR 3 "nonimmediate_operand")
+   (match_operand:GPR 4 "general_operand")]
+  "TARGET_Z196"
+{
+  if (operands[4] == const0_rtx)
+    emit_insn (gen_add<mode>3_carry1_cc (operands[0], operands[2], operands[3]));
+  else
+    {
+      operands[4] = force_reg (<MODE>mode, operands[4]);
+      rtx alc_cond = s390_emit_compare (<MODE>mode, GTU, operands[4], const0_rtx);
+      emit_insn (gen_add<mode>3_alc_carry1_cc (operands[0], operands[2], operands[3], alc_cond));
+    }
+  rtx mov_cond = gen_rtx_LTU (<MODE>mode, gen_rtx_REG (CCL1mode, CC_REGNUM), const0_rtx);
+  emit_insn (gen_mov<mode>cc (operands[1], mov_cond, const1_rtx, const0_rtx));
+  DONE;
+})
+
 ; the following 4 patterns are used when the result of an add with
 ; carry is checked for an overflow condition
 
 ; op1 + op2 + c < op1
 
 ; alcr, alc, alcgr, alcg
-(define_insn "*add<mode>3_alc_carry1_cc"
-  [(set (reg CC_REGNUM)
-        (compare
+(define_insn "add<mode>3_alc_carry1_cc"
+  [(set (reg:CCL1 CC_REGNUM)
+        (compare:CCL1
           (plus:GPR (plus:GPR (match_operand:GPR 3 "s390_alc_comparison" "")
                               (match_operand:GPR 1 "nonimmediate_operand" "%0,0"))
                     (match_operand:GPR 2 "general_operand" "d,T"))
           (match_dup 1)))
    (set (match_operand:GPR 0 "register_operand" "=d,d")
         (plus:GPR (plus:GPR (match_dup 3) (match_dup 1)) (match_dup 2)))]
-  "s390_match_ccmode (insn, CCL1mode)"
+  ""
   "@
    alc<g>r\t%0,%2
    alc<g>\t%0,%2"
@@ -6853,6 +7110,31 @@
    alc<g>\t%0,%2"
   [(set_attr "op_type"  "RRE,RXY")])
 
+(define_expand "usubc<mode>5"
+  [(match_operand:GPR 0 "register_operand")
+   (match_operand:GPR 1 "nonimmediate_operand")
+   (match_operand:GPR 2 "nonimmediate_operand")
+   (match_operand:GPR 3 "nonimmediate_operand")
+   (match_operand:GPR 4 "general_operand")]
+  "TARGET_Z196"
+{
+  if (operands[4] == const0_rtx)
+    {
+      operands[2] = force_reg (<MODE>mode, operands[2]);
+      emit_insn (gen_sub<mode>3_borrow_cc (operands[0], operands[2], operands[3]));
+    }
+  else
+    {
+      rtx tmp = gen_reg_rtx (<MODE>mode);
+      emit_insn (gen_xor<mode>3 (tmp, operands[4], const1_rtx));
+      rtx slb_cond = s390_emit_compare (<MODE>mode, LEU, tmp, const0_rtx);
+      emit_insn (gen_sub<mode>3_slb_borrow1_cc (operands[0], operands[2], operands[3], slb_cond));
+    }
+  rtx mov_cond = gen_rtx_GTU (<MODE>mode, gen_rtx_REG (CCL2mode, CC_REGNUM), const0_rtx);
+  emit_insn (gen_mov<mode>cc (operands[1], mov_cond, const1_rtx, const0_rtx));
+  DONE;
+})
+
 ; slbr, slb, slbgr, slbg
 (define_insn "*sub<mode>3_slb_cc"
   [(set (reg CC_REGNUM)
@@ -6884,6 +7166,23 @@
   [(set_attr "op_type"  "RRE,RXY")
    (set_attr "z10prop" "z10_c,*")])
 
+; slbr, slb, slbgr, slbg
+(define_insn "sub<mode>3_slb_borrow1_cc"
+  [(set (reg:CCL2 CC_REGNUM)
+	(compare:CCL2
+	  (minus:GPR (minus:GPR (match_operand:GPR 1 "nonimmediate_operand" "0,0")
+				(match_operand:GPR 2 "general_operand" "d,T"))
+		     (match_operand:GPR 3 "s390_slb_comparison" ""))
+	  (match_dup 1)))
+   (set (match_operand:GPR 0 "register_operand" "=d,d")
+	(minus:GPR (minus:GPR (match_dup 1) (match_dup 2)) (match_dup 3)))]
+  ""
+  "@
+   slb<g>r\t%0,%2
+   slb<g>\t%0,%2"
+  [(set_attr "op_type"  "RRE,RXY")
+   (set_attr "z10prop" "z10_c,*")])
+
 (define_expand "add<mode>cc"
   [(match_operand:GPR 0 "register_operand" "")
    (match_operand 1 "comparison_operator" "")
@@ -7017,7 +7316,8 @@
 
   /* Emit the comparison insn in case we do not already have a comparison result.  */
   if (!s390_comparison (operands[1], VOIDmode))
-    operands[1] = s390_emit_compare (GET_CODE (operands[1]),
+    operands[1] = s390_emit_compare (VOIDmode,
+				     GET_CODE (operands[1]),
 				     XEXP (operands[1], 0),
 				     XEXP (operands[1], 1));
 })
@@ -7035,7 +7335,8 @@
   /* Emit the comparison insn in case we do not already have a comparison
      result. */
   if (!s390_comparison (operands[1], VOIDmode))
-    operands[1] = s390_emit_compare (GET_CODE (operands[1]),
+    operands[1] = s390_emit_compare (VOIDmode,
+			      GET_CODE (operands[1]),
 			      XEXP (operands[1], 0),
 			      XEXP (operands[1], 1));
 
@@ -9102,6 +9403,29 @@
 
 
 ;;
+;; Emulate smin, smax, umin, umax
+;;
+
+; We have to manually emulate min/max for SI/DI modes since if they are not
+; available expand tries to find an implementation for a wider mode and ends
+; up with a vector implementation for TI mode.
+
+(define_code_iterator MINMAXOP [lt gt ltu gtu])
+(define_code_attr minmaxcc [(lt "CCS") (gt "CCS") (ltu "CCU") (gtu "CCU")])
+(define_code_attr minmaxname [(lt "smin") (gt "smax") (ltu "umin") (gtu "umax")])
+
+(define_expand "<minmaxname><mode>3"
+  [(set (reg:<minmaxcc> CC_REGNUM)
+        (compare:<minmaxcc> (match_operand:GPR 1 "loc_operand")
+			    (match_operand:GPR 2 "loc_operand")))
+   (set (match_operand:GPR 0 "nonimmediate_operand")
+	(if_then_else:GPR (MINMAXOP (reg:<minmaxcc> CC_REGNUM) (const_int 0))
+			  (match_dup 1)
+			  (match_dup 2)))]
+  "TARGET_Z196")
+
+
+;;
 ;;- Negated absolute value instructions
 ;;
 
@@ -9266,21 +9590,31 @@
 	(clz:DI (match_operand:DI 1 "register_operand" "d")))]
   "TARGET_EXTIMM && TARGET_ZARCH"
 {
-  rtx_insn *insn;
-  rtx clz_equal;
-  rtx wide_reg = gen_reg_rtx (TImode);
-  rtx msb = gen_rtx_CONST_INT (DImode, HOST_WIDE_INT_1U << 63);
+  if (!(TARGET_Z17 && TARGET_64BIT))
+    {
+      rtx_insn *insn;
+      rtx clz_equal;
+      rtx wide_reg = gen_reg_rtx (TImode);
+      rtx msb = gen_rtx_CONST_INT (DImode, HOST_WIDE_INT_1U << 63);
 
-  clz_equal = gen_rtx_CLZ (DImode, operands[1]);
+      clz_equal = gen_rtx_CLZ (DImode, operands[1]);
 
-  emit_insn (gen_clztidi2 (wide_reg, operands[1], msb));
+      emit_insn (gen_clztidi2 (wide_reg, operands[1], msb));
 
-  insn = emit_move_insn (operands[0], gen_highpart (DImode, wide_reg));
-  set_unique_reg_note (insn, REG_EQUAL, clz_equal);
+      insn = emit_move_insn (operands[0], gen_highpart (DImode, wide_reg));
+      set_unique_reg_note (insn, REG_EQUAL, clz_equal);
 
-  DONE;
+      DONE;
+    }
 })
 
+(define_insn "*clzg"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(clz:DI (match_operand:DI 1 "register_operand" "d")))]
+  "TARGET_Z17 && TARGET_64BIT"
+  "clzg\t%0,%1"
+  [(set_attr "op_type" "RRE")])
+
 ; CLZ result is in hard reg op0 - this is the high part of the target operand
 ; The source with the left-most one bit cleared is in hard reg op0 + 1 - the low part
 (define_insn "clztidi2"
@@ -9300,6 +9634,18 @@
 
 
 ;;
+;; Count Trailing Zeros.
+;;
+
+(define_insn "ctzdi2"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(ctz:DI (match_operand:DI 1 "register_operand" "d")))]
+  "TARGET_Z17 && TARGET_64BIT"
+  "ctzg\t%0,%1"
+  [(set_attr "op_type" "RRE")])
+
+
+;;
 ;;- Rotate instructions.
 ;;
 
@@ -9423,7 +9769,7 @@
                       (pc)))]
   ""
   "s390_emit_jump (operands[3],
-    s390_emit_compare (GET_CODE (operands[0]), operands[1], operands[2]));
+    s390_emit_compare (VOIDmode, GET_CODE (operands[0]), operands[1], operands[2]));
    DONE;")
 
 (define_expand "cbranch<mode>4"
@@ -9435,7 +9781,7 @@
                       (pc)))]
   "TARGET_HARD_FLOAT"
   "s390_emit_jump (operands[3],
-    s390_emit_compare (GET_CODE (operands[0]), operands[1], operands[2]));
+    s390_emit_compare (VOIDmode, GET_CODE (operands[0]), operands[1], operands[2]));
    DONE;")
 
 (define_expand "cbranchcc4"
@@ -9590,7 +9936,7 @@
 	     (match_operand 3 "const0_operand" ""))]
   ""
   {
-    rtx cond = s390_emit_compare (GET_CODE (operands[0]),
+    rtx cond = s390_emit_compare (VOIDmode, GET_CODE (operands[0]),
                                   operands[1], operands[2]);
     emit_insn (gen_condtrap (cond, XEXP (cond, 0)));
     DONE;
@@ -9603,7 +9949,7 @@
 	     (match_operand 3 "const0_operand" ""))]
   ""
   {
-    rtx cond = s390_emit_compare (GET_CODE (operands[0]),
+    rtx cond = s390_emit_compare (VOIDmode, GET_CODE (operands[0]),
                                   operands[1], operands[2]);
     emit_insn (gen_condtrap (cond, XEXP (cond, 0)));
     DONE;
@@ -11399,7 +11745,7 @@
   [(set (match_operand 0 "register_operand" "=a")
         (unspec [(label_ref (match_operand 1 "" ""))] UNSPEC_MAIN_BASE))]
   "GET_MODE (operands[0]) == Pmode"
-  "larl\t%0,%1%K1"
+  "larl\t%0,%1"
   [(set_attr "op_type" "RIL")
    (set_attr "type"    "larl")
    (set_attr "z10prop" "z10_fwd_A1")
@@ -11419,7 +11765,7 @@
   [(set (match_operand 0 "register_operand" "=a")
         (unspec [(label_ref (match_operand 1 "" ""))] UNSPEC_RELOAD_BASE))]
   "GET_MODE (operands[0]) == Pmode"
-  "larl\t%0,%1%K1"
+  "larl\t%0,%1"
   [(set_attr "op_type" "RIL")
    (set_attr "type"    "larl")
    (set_attr "z10prop" "z10_fwd_A1")])
@@ -11584,15 +11930,43 @@
 ; Stack Protector Patterns
 ;
 
+; Insns stack_protect_get_tp{si,di} are similar to *get_tp_{31,64} but still
+; distinct in the sense that they force recomputation of the thread pointer
+; instead of potentially reloading it from stack.
+
+(define_insn_and_split "stack_protect_get_tpsi"
+  [(set (match_operand:SI 0 "register_operand" "=d")
+	(unspec_volatile:SI [(const_int 0)] UNSPECV_SP_GET_TP))]
+  ""
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 0) (reg:SI AR0_REGNUM))])
+
+(define_insn_and_split "stack_protect_get_tpdi"
+  [(set (match_operand:DI 0 "register_operand" "=d")
+	(unspec_volatile:DI [(const_int 0)] UNSPECV_SP_GET_TP))]
+  ""
+  "#"
+  "&& reload_completed"
+  [(set (match_dup 1) (reg:SI AR0_REGNUM))
+   (set (match_dup 0) (ashift:DI (match_dup 0) (const_int 32)))
+   (set (strict_low_part (match_dup 1)) (reg:SI AR1_REGNUM))]
+  "operands[1] = gen_rtx_REG (SImode, REGNO (operands[0]));")
+
 (define_expand "stack_protect_set"
   [(set (match_operand 0 "memory_operand" "")
 	(match_operand 1 "memory_operand" ""))]
   ""
 {
 #ifdef TARGET_THREAD_SSP_OFFSET
+  rtx tp = gen_reg_rtx (Pmode);
+  if (TARGET_64BIT)
+    emit_insn (gen_stack_protect_get_tpdi (tp));
+  else
+    emit_insn (gen_stack_protect_get_tpsi (tp));
   operands[1]
-    = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, s390_get_thread_pointer (),
-                                        GEN_INT (TARGET_THREAD_SSP_OFFSET)));
+    = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tp,
+					GEN_INT (TARGET_THREAD_SSP_OFFSET)));
 #endif
   if (TARGET_64BIT)
     emit_insn (gen_stack_protect_setdi (operands[0], operands[1]));
@@ -11618,9 +11992,14 @@
 {
   rtx cc_reg, test;
 #ifdef TARGET_THREAD_SSP_OFFSET
+  rtx tp = gen_reg_rtx (Pmode);
+  if (TARGET_64BIT)
+    emit_insn (gen_stack_protect_get_tpdi (tp));
+  else
+    emit_insn (gen_stack_protect_get_tpsi (tp));
   operands[1]
-    = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, s390_get_thread_pointer (),
-                                        GEN_INT (TARGET_THREAD_SSP_OFFSET)));
+    = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tp,
+					GEN_INT (TARGET_THREAD_SSP_OFFSET)));
 #endif
   if (TARGET_64BIT)
     emit_insn (gen_stack_protect_testdi (operands[0], operands[1]));
@@ -12244,7 +12623,7 @@
     emit_insn (gen_subdi3 (reg, stack_pointer_rtx, operands[0]));
   else
     emit_insn (gen_subsi3 (reg, stack_pointer_rtx, operands[0]));
-  cc = s390_emit_compare (GT, reg, guard);
+  cc = s390_emit_compare (VOIDmode, GT, reg, guard);
   s390_emit_jump (operands[1], cc);
 
   DONE;
diff --git a/gcc/config/s390/s390.opt b/gcc/config/s390/s390.opt
index a5b5aa9..6753a93 100644
--- a/gcc/config/s390/s390.opt
+++ b/gcc/config/s390/s390.opt
@@ -1,6 +1,6 @@
 ; Options for the S/390 / zSeries port of the compiler.
 
-; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
@@ -122,6 +122,12 @@ EnumValue
 Enum(processor_type) String(z16) Value(PROCESSOR_3931_Z16)
 
 EnumValue
+Enum(processor_type) String(arch15) Value(PROCESSOR_9175_Z17)
+
+EnumValue
+Enum(processor_type) String(z17) Value(PROCESSOR_9175_Z17)
+
+EnumValue
 Enum(processor_type) String(native) Value(PROCESSOR_NATIVE) DriverOnly
 
 mbackchain
@@ -229,10 +235,6 @@ Set the branch costs for conditional branch instructions.  Reasonable
 values are small, non-negative integers.  The default branch cost is
 1.
 
-mlra
-Target Var(s390_lra_flag) Init(1) Save
-Use LRA instead of reload.
-
 mpic-data-is-text-relative
 Target Var(s390_pic_data_is_text_relative) Init(TARGET_DEFAULT_PIC_DATA_IS_TEXT_RELATIVE)
 Assume data segments are relative to text segment.
diff --git a/gcc/config/s390/s390.opt.urls b/gcc/config/s390/s390.opt.urls
index ab1e761..bb53fc9 100644
--- a/gcc/config/s390/s390.opt.urls
+++ b/gcc/config/s390/s390.opt.urls
@@ -7,7 +7,7 @@ m64
 UrlSuffix(gcc/S_002f390-and-zSeries-Options.html#index-m64-2)
 
 march=
-UrlSuffix(gcc/S_002f390-and-zSeries-Options.html#index-march-15)
+UrlSuffix(gcc/S_002f390-and-zSeries-Options.html#index-march-14)
 
 mbackchain
 UrlSuffix(gcc/S_002f390-and-zSeries-Options.html#index-mbackchain)
@@ -74,8 +74,6 @@ UrlSuffix(gcc/S_002f390-and-zSeries-Options.html#index-mzarch)
 
 ; skipping UrlSuffix for 'mbranch-cost=' due to finding no URLs
 
-; skipping UrlSuffix for 'mlra' due to finding no URLs
-
 ; skipping UrlSuffix for 'mpic-data-is-text-relative' due to finding no URLs
 
 ; skipping UrlSuffix for 'mindirect-branch=' due to finding no URLs
diff --git a/gcc/config/s390/s390intrin.h b/gcc/config/s390/s390intrin.h
index 17b51d5..db1bd3e 100644
--- a/gcc/config/s390/s390intrin.h
+++ b/gcc/config/s390/s390intrin.h
@@ -1,5 +1,5 @@
 /* S/390 System z specific intrinsics
-   Copyright (C) 2013-2024 Free Software Foundation, Inc.
+   Copyright (C) 2013-2025 Free Software Foundation, Inc.
    Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com)
 
 This file is part of GCC.
diff --git a/gcc/config/s390/s390x.h b/gcc/config/s390/s390x.h
index ac04743..306d1c1 100644
--- a/gcc/config/s390/s390x.h
+++ b/gcc/config/s390/s390x.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for IBM zSeries 64-bit
-   Copyright (C) 2002-2024 Free Software Foundation, Inc.
+   Copyright (C) 2002-2025 Free Software Foundation, Inc.
    Contributed by Hartmut Penner (hpenner@de.ibm.com) and
                   Ulrich Weigand (uweigand@de.ibm.com).
 
diff --git a/gcc/config/s390/subst.md b/gcc/config/s390/subst.md
index 53ab9b8..3ae0f1e 100644
--- a/gcc/config/s390/subst.md
+++ b/gcc/config/s390/subst.md
@@ -1,6 +1,6 @@
 ;;- Machine description for GNU compiler -- S/390 / zSeries version.
 ;;  Subst patterns.
-;;  Copyright (C) 2016-2024 Free Software Foundation, Inc.
+;;  Copyright (C) 2016-2025 Free Software Foundation, Inc.
 ;;  Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com)
 
 ;; This file is part of GCC.
diff --git a/gcc/config/s390/t-s390 b/gcc/config/s390/t-s390
index fa33f44..db1e3d9 100644
--- a/gcc/config/s390/t-s390
+++ b/gcc/config/s390/t-s390
@@ -1,4 +1,4 @@
-# Copyright (C) 2015-2024 Free Software Foundation, Inc.
+# Copyright (C) 2015-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/s390/tpf.h b/gcc/config/s390/tpf.h
index e1ddaea..b115383 100644
--- a/gcc/config/s390/tpf.h
+++ b/gcc/config/s390/tpf.h
@@ -1,5 +1,5 @@
 /* Definitions for target OS TPF for GNU compiler, for IBM S/390 hardware
-   Copyright (C) 2003-2024 Free Software Foundation, Inc.
+   Copyright (C) 2003-2025 Free Software Foundation, Inc.
    Contributed by P.J. Darcy (darcypj@us.ibm.com),
 		  Hartmut Penner (hpenner@de.ibm.com), and
 		  Ulrich Weigand (uweigand@de.ibm.com).
diff --git a/gcc/config/s390/tpf.md b/gcc/config/s390/tpf.md
index 4c1aaf9..ada77b5 100644
--- a/gcc/config/s390/tpf.md
+++ b/gcc/config/s390/tpf.md
@@ -1,5 +1,5 @@
 ;; S390 TPF-OS specific machine patterns
-;; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/s390/tpf.opt b/gcc/config/s390/tpf.opt
index 4989d9c..cc447c4 100644
--- a/gcc/config/s390/tpf.opt
+++ b/gcc/config/s390/tpf.opt
@@ -1,6 +1,6 @@
 ; Options for the TPF-OS port of the compiler.
 
-; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/s390/vecintrin.h b/gcc/config/s390/vecintrin.h
index de29f91..bda8471 100644
--- a/gcc/config/s390/vecintrin.h
+++ b/gcc/config/s390/vecintrin.h
@@ -1,5 +1,5 @@
 /* GNU compiler vector extension intrinsics
-   Copyright (C) 2015-2024 Free Software Foundation, Inc.
+   Copyright (C) 2015-2025 Free Software Foundation, Inc.
    Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com)
 
 This file is part of GCC.
@@ -80,6 +80,8 @@ __lcbb(const void *ptr, int bndry)
   return __builtin_s390_lcbb (ptr, code);
 }
 
+#define vec_adde __builtin_s390_vec_adde
+#define vec_addec __builtin_s390_vec_addec
 #define vec_all_nle(X, Y) vec_all_nge ((Y), (X))
 #define vec_all_nlt(X, Y) vec_all_ngt ((Y), (X))
 #define vec_any_nle(X, Y) vec_any_nge ((Y), (X))
@@ -97,6 +99,8 @@ __lcbb(const void *ptr, int bndry)
 #define vec_splat_s32 __builtin_s390_vec_splat_s32
 #define vec_splat_u64 __builtin_s390_vec_splat_u64
 #define vec_splat_s64 __builtin_s390_vec_splat_s64
+#define vec_sube __builtin_s390_vec_sube
+#define vec_subec __builtin_s390_vec_subec
 #define vec_checksum __builtin_s390_vcksm
 #define vec_gfmsum_128 __builtin_s390_vgfmg_128
 #define vec_gfmsum_accum_128 __builtin_s390_vgfmag_128
@@ -111,10 +115,6 @@ __lcbb(const void *ptr, int bndry)
 #define vec_round(X)  __builtin_s390_vfi((X), 4, 4)
 #define vec_doublee(X) __builtin_s390_vflls((X))
 #define vec_floate(X) __builtin_s390_vflrd((X), 0, 0)
-#define vec_load_len_r(X,L)				\
-  (__vector unsigned char)__builtin_s390_vlrlr((L),(X))
-#define vec_store_len_r(X,Y,L) \
-  __builtin_s390_vstrlr((__vector signed char)(X),(L),(Y))
 
 #define vec_all_nan(a)						\
   __extension__ ({						\
@@ -165,165 +165,179 @@ __lcbb(const void *ptr, int bndry)
 #define vec_round_from_fp32 __builtin_s390_vcrnfs
 #define vec_convert_to_fp16 __builtin_s390_vcfn
 #define vec_convert_from_fp16 __builtin_s390_vcnf
-#define vec_gather_element __builtin_s390_vec_gather_element
-#define vec_xl __builtin_s390_vec_xl
-#define vec_xld2 __builtin_s390_vec_xld2
-#define vec_xlw4 __builtin_s390_vec_xlw4
-#define vec_splats __builtin_s390_vec_splats
-#define vec_insert __builtin_s390_vec_insert
-#define vec_promote __builtin_s390_vec_promote
-#define vec_extract __builtin_s390_vec_extract
-#define vec_insert_and_zero __builtin_s390_vec_insert_and_zero
-#define vec_load_bndry __builtin_s390_vec_load_bndry
-#define vec_load_pair __builtin_s390_vec_load_pair
-#define vec_load_len __builtin_s390_vec_load_len
-#define vec_mergeh __builtin_s390_vec_mergeh
-#define vec_mergel __builtin_s390_vec_mergel
-#define vec_pack __builtin_s390_vec_pack
-#define vec_packs __builtin_s390_vec_packs
-#define vec_packs_cc __builtin_s390_vec_packs_cc
-#define vec_packsu __builtin_s390_vec_packsu
-#define vec_packsu_cc __builtin_s390_vec_packsu_cc
-#define vec_perm __builtin_s390_vec_perm
-#define vec_permi __builtin_s390_vec_permi
-#define vec_splat __builtin_s390_vec_splat
-#define vec_scatter_element __builtin_s390_vec_scatter_element
-#define vec_sel __builtin_s390_vec_sel
-#define vec_extend_s64 __builtin_s390_vec_extend_s64
-#define vec_xst __builtin_s390_vec_xst
-#define vec_xstd2 __builtin_s390_vec_xstd2
-#define vec_xstw4 __builtin_s390_vec_xstw4
-#define vec_store_len __builtin_s390_vec_store_len
-#define vec_bperm_u128 __builtin_s390_vec_bperm_u128
-#define vec_unpackh __builtin_s390_vec_unpackh
-#define vec_unpackl __builtin_s390_vec_unpackl
-#define vec_addc __builtin_s390_vec_addc
+
+#define vec_gen_element_masks_8 __builtin_s390_vgemb
+#define vec_gen_element_masks_16 __builtin_s390_vgemh
+#define vec_gen_element_masks_32 __builtin_s390_vgemf
+#define vec_gen_element_masks_64 __builtin_s390_vgemg
+#define vec_gen_element_masks_128 __builtin_s390_vec_gen_element_masks_128
+
+#define vec_abs __builtin_s390_vec_abs
 #define vec_add_u128 __builtin_s390_vec_add_u128
+#define vec_addc __builtin_s390_vec_addc
 #define vec_addc_u128 __builtin_s390_vec_addc_u128
 #define vec_adde_u128 __builtin_s390_vec_adde_u128
 #define vec_addec_u128 __builtin_s390_vec_addec_u128
-#define vec_and __builtin_s390_vec_and
-#define vec_andc __builtin_s390_vec_andc
-#define vec_avg __builtin_s390_vec_avg
 #define vec_all_eq __builtin_s390_vec_all_eq
-#define vec_all_ne __builtin_s390_vec_all_ne
 #define vec_all_ge __builtin_s390_vec_all_ge
 #define vec_all_gt __builtin_s390_vec_all_gt
 #define vec_all_le __builtin_s390_vec_all_le
 #define vec_all_lt __builtin_s390_vec_all_lt
+#define vec_all_ne __builtin_s390_vec_all_ne
+#define vec_all_nge __builtin_s390_vec_all_nge
+#define vec_all_ngt __builtin_s390_vec_all_ngt
+#define vec_and __builtin_s390_vec_and
+#define vec_andc __builtin_s390_vec_andc
 #define vec_any_eq __builtin_s390_vec_any_eq
-#define vec_any_ne __builtin_s390_vec_any_ne
 #define vec_any_ge __builtin_s390_vec_any_ge
 #define vec_any_gt __builtin_s390_vec_any_gt
 #define vec_any_le __builtin_s390_vec_any_le
 #define vec_any_lt __builtin_s390_vec_any_lt
+#define vec_any_ne __builtin_s390_vec_any_ne
+#define vec_any_nge __builtin_s390_vec_any_nge
+#define vec_any_ngt __builtin_s390_vec_any_ngt
+#define vec_avg __builtin_s390_vec_avg
+#define vec_blend __builtin_s390_vec_blend
+#define vec_bperm __builtin_s390_vec_bperm
+#define vec_bperm_u128 __builtin_s390_vec_bperm_u128
 #define vec_cmpeq __builtin_s390_vec_cmpeq
+#define vec_cmpeq_idx __builtin_s390_vec_cmpeq_idx
+#define vec_cmpeq_idx_cc __builtin_s390_vec_cmpeq_idx_cc
+#define vec_cmpeq_or_0_idx __builtin_s390_vec_cmpeq_or_0_idx
+#define vec_cmpeq_or_0_idx_cc __builtin_s390_vec_cmpeq_or_0_idx_cc
 #define vec_cmpge __builtin_s390_vec_cmpge
 #define vec_cmpgt __builtin_s390_vec_cmpgt
 #define vec_cmple __builtin_s390_vec_cmple
 #define vec_cmplt __builtin_s390_vec_cmplt
+#define vec_cmpne_idx __builtin_s390_vec_cmpne_idx
+#define vec_cmpne_idx_cc __builtin_s390_vec_cmpne_idx_cc
+#define vec_cmpne_or_0_idx __builtin_s390_vec_cmpne_or_0_idx
+#define vec_cmpne_or_0_idx_cc __builtin_s390_vec_cmpne_or_0_idx_cc
+#define vec_cmpnrg __builtin_s390_vec_cmpnrg
+#define vec_cmpnrg_cc __builtin_s390_vec_cmpnrg_cc
+#define vec_cmpnrg_idx __builtin_s390_vec_cmpnrg_idx
+#define vec_cmpnrg_idx_cc __builtin_s390_vec_cmpnrg_idx_cc
+#define vec_cmpnrg_or_0_idx __builtin_s390_vec_cmpnrg_or_0_idx
+#define vec_cmpnrg_or_0_idx_cc __builtin_s390_vec_cmpnrg_or_0_idx_cc
+#define vec_cmprg __builtin_s390_vec_cmprg
+#define vec_cmprg_cc __builtin_s390_vec_cmprg_cc
+#define vec_cmprg_idx __builtin_s390_vec_cmprg_idx
+#define vec_cmprg_idx_cc __builtin_s390_vec_cmprg_idx_cc
+#define vec_cmprg_or_0_idx __builtin_s390_vec_cmprg_or_0_idx
+#define vec_cmprg_or_0_idx_cc __builtin_s390_vec_cmprg_or_0_idx_cc
 #define vec_cntlz __builtin_s390_vec_cntlz
 #define vec_cnttz __builtin_s390_vec_cnttz
-#define vec_xor __builtin_s390_vec_xor
+#define vec_cp_until_zero __builtin_s390_vec_cp_until_zero
+#define vec_cp_until_zero_cc __builtin_s390_vec_cp_until_zero_cc
+#define vec_ctd __builtin_s390_vec_ctd
+#define vec_ctd_s64 __builtin_s390_vec_ctd_s64
+#define vec_ctd_u64 __builtin_s390_vec_ctd_u64
+#define vec_ctsl __builtin_s390_vec_ctsl
+#define vec_ctul __builtin_s390_vec_ctul
+#define vec_double __builtin_s390_vec_double
+#define vec_eqv __builtin_s390_vec_eqv
+#define vec_evaluate __builtin_s390_vec_evaluate
+#define vec_extend_s64 __builtin_s390_vec_extend_s64
+#define vec_extract __builtin_s390_vec_extract
+#define vec_find_any_eq __builtin_s390_vec_find_any_eq
+#define vec_find_any_eq_cc __builtin_s390_vec_find_any_eq_cc
+#define vec_find_any_eq_idx __builtin_s390_vec_find_any_eq_idx
+#define vec_find_any_eq_idx_cc __builtin_s390_vec_find_any_eq_idx_cc
+#define vec_find_any_eq_or_0_idx __builtin_s390_vec_find_any_eq_or_0_idx
+#define vec_find_any_eq_or_0_idx_cc __builtin_s390_vec_find_any_eq_or_0_idx_cc
+#define vec_find_any_ne __builtin_s390_vec_find_any_ne
+#define vec_find_any_ne_cc __builtin_s390_vec_find_any_ne_cc
+#define vec_find_any_ne_idx __builtin_s390_vec_find_any_ne_idx
+#define vec_find_any_ne_idx_cc __builtin_s390_vec_find_any_ne_idx_cc
+#define vec_find_any_ne_or_0_idx __builtin_s390_vec_find_any_ne_or_0_idx
+#define vec_find_any_ne_or_0_idx_cc __builtin_s390_vec_find_any_ne_or_0_idx_cc
+#define vec_float __builtin_s390_vec_float
+#define vec_fp_test_data_class __builtin_s390_vec_fp_test_data_class
+#define vec_gather_element __builtin_s390_vec_gather_element
 #define vec_gfmsum __builtin_s390_vec_gfmsum
 #define vec_gfmsum_accum __builtin_s390_vec_gfmsum_accum
-#define vec_abs __builtin_s390_vec_abs
+#define vec_insert __builtin_s390_vec_insert
+#define vec_insert_and_zero __builtin_s390_vec_insert_and_zero
+#define vec_ld2f __builtin_s390_vec_ld2f
+#define vec_load_bndry __builtin_s390_vec_load_bndry
+#define vec_load_len __builtin_s390_vec_load_len
+#define vec_load_len_r __builtin_s390_vec_load_len_r
+#define vec_load_pair __builtin_s390_vec_load_pair
+#define vec_madd __builtin_s390_vec_madd
 #define vec_max __builtin_s390_vec_max
+#define vec_meadd __builtin_s390_vec_meadd
+#define vec_mergeh __builtin_s390_vec_mergeh
+#define vec_mergel __builtin_s390_vec_mergel
+#define vec_mhadd __builtin_s390_vec_mhadd
 #define vec_min __builtin_s390_vec_min
 #define vec_mladd __builtin_s390_vec_mladd
-#define vec_mhadd __builtin_s390_vec_mhadd
-#define vec_meadd __builtin_s390_vec_meadd
 #define vec_moadd __builtin_s390_vec_moadd
-#define vec_mulh __builtin_s390_vec_mulh
+#define vec_msub __builtin_s390_vec_msub
+#define vec_msum __builtin_s390_vec_msum
+#define vec_msum_u128 __builtin_s390_vec_msum_u128
 #define vec_mule __builtin_s390_vec_mule
+#define vec_mulh __builtin_s390_vec_mulh
 #define vec_mulo __builtin_s390_vec_mulo
+#define vec_nabs __builtin_s390_vec_nabs
+#define vec_nand __builtin_s390_vec_nand
+#define vec_nmadd __builtin_s390_vec_nmadd
+#define vec_nmsub __builtin_s390_vec_nmsub
 #define vec_nor __builtin_s390_vec_nor
 #define vec_or __builtin_s390_vec_or
+#define vec_orc __builtin_s390_vec_orc
+#define vec_pack __builtin_s390_vec_pack
+#define vec_packs __builtin_s390_vec_packs
+#define vec_packs_cc __builtin_s390_vec_packs_cc
+#define vec_packsu __builtin_s390_vec_packsu
+#define vec_packsu_cc __builtin_s390_vec_packsu_cc
+#define vec_perm __builtin_s390_vec_perm
+#define vec_permi __builtin_s390_vec_permi
 #define vec_popcnt __builtin_s390_vec_popcnt
+#define vec_promote __builtin_s390_vec_promote
+#define vec_revb __builtin_s390_vec_revb
+#define vec_reve __builtin_s390_vec_reve
 #define vec_rl __builtin_s390_vec_rl
-#define vec_rli __builtin_s390_vec_rli
 #define vec_rl_mask __builtin_s390_vec_rl_mask
-#define vec_sll __builtin_s390_vec_sll
+#define vec_rli __builtin_s390_vec_rli
+#define vec_scatter_element __builtin_s390_vec_scatter_element
+#define vec_search_string_cc __builtin_s390_vec_search_string_cc
+#define vec_search_string_until_zero_cc __builtin_s390_vec_search_string_until_zero_cc
+#define vec_sel __builtin_s390_vec_sel
+#define vec_signed __builtin_s390_vec_signed
 #define vec_slb __builtin_s390_vec_slb
 #define vec_sld __builtin_s390_vec_sld
+#define vec_sldb __builtin_s390_vec_sldb
 #define vec_sldw __builtin_s390_vec_sldw
-#define vec_sral __builtin_s390_vec_sral
+#define vec_sll __builtin_s390_vec_sll
+#define vec_splat __builtin_s390_vec_splat
+#define vec_splats __builtin_s390_vec_splats
+#define vec_sqrt __builtin_s390_vec_sqrt
 #define vec_srab __builtin_s390_vec_srab
-#define vec_srl __builtin_s390_vec_srl
+#define vec_sral __builtin_s390_vec_sral
 #define vec_srb __builtin_s390_vec_srb
-#define vec_subc __builtin_s390_vec_subc
+#define vec_srdb __builtin_s390_vec_srdb
+#define vec_srl __builtin_s390_vec_srl
+#define vec_st2f __builtin_s390_vec_st2f
+#define vec_store_len __builtin_s390_vec_store_len
+#define vec_store_len_r __builtin_s390_vec_store_len_r
 #define vec_sub_u128 __builtin_s390_vec_sub_u128
+#define vec_subc __builtin_s390_vec_subc
 #define vec_subc_u128 __builtin_s390_vec_subc_u128
 #define vec_sube_u128 __builtin_s390_vec_sube_u128
 #define vec_subec_u128 __builtin_s390_vec_subec_u128
+#define vec_sum __builtin_s390_vec_sum
 #define vec_sum2 __builtin_s390_vec_sum2
-#define vec_sum_u128 __builtin_s390_vec_sum_u128
 #define vec_sum4 __builtin_s390_vec_sum4
+#define vec_sum_u128 __builtin_s390_vec_sum_u128
 #define vec_test_mask __builtin_s390_vec_test_mask
-#define vec_msum_u128 __builtin_s390_vec_msum_u128
-#define vec_eqv __builtin_s390_vec_eqv
-#define vec_nand __builtin_s390_vec_nand
-#define vec_orc __builtin_s390_vec_orc
-#define vec_find_any_eq_idx __builtin_s390_vec_find_any_eq_idx
-#define vec_find_any_ne_idx __builtin_s390_vec_find_any_ne_idx
-#define vec_find_any_eq_or_0_idx __builtin_s390_vec_find_any_eq_or_0_idx
-#define vec_find_any_ne_or_0_idx __builtin_s390_vec_find_any_ne_or_0_idx
-#define vec_find_any_eq __builtin_s390_vec_find_any_eq
-#define vec_find_any_ne __builtin_s390_vec_find_any_ne
-#define vec_find_any_eq_idx_cc __builtin_s390_vec_find_any_eq_idx_cc
-#define vec_find_any_ne_idx_cc __builtin_s390_vec_find_any_ne_idx_cc
-#define vec_find_any_eq_or_0_idx_cc __builtin_s390_vec_find_any_eq_or_0_idx_cc
-#define vec_find_any_ne_or_0_idx_cc __builtin_s390_vec_find_any_ne_or_0_idx_cc
-#define vec_find_any_eq_cc __builtin_s390_vec_find_any_eq_cc
-#define vec_find_any_ne_cc __builtin_s390_vec_find_any_ne_cc
-#define vec_cmpeq_idx __builtin_s390_vec_cmpeq_idx
-#define vec_cmpeq_or_0_idx __builtin_s390_vec_cmpeq_or_0_idx
-#define vec_cmpeq_idx_cc __builtin_s390_vec_cmpeq_idx_cc
-#define vec_cmpeq_or_0_idx_cc __builtin_s390_vec_cmpeq_or_0_idx_cc
-#define vec_cmpne_idx __builtin_s390_vec_cmpne_idx
-#define vec_cmpne_or_0_idx __builtin_s390_vec_cmpne_or_0_idx
-#define vec_cmpne_idx_cc __builtin_s390_vec_cmpne_idx_cc
-#define vec_cmpne_or_0_idx_cc __builtin_s390_vec_cmpne_or_0_idx_cc
-#define vec_cp_until_zero __builtin_s390_vec_cp_until_zero
-#define vec_cp_until_zero_cc __builtin_s390_vec_cp_until_zero_cc
-#define vec_cmprg_idx __builtin_s390_vec_cmprg_idx
-#define vec_cmpnrg_idx __builtin_s390_vec_cmpnrg_idx
-#define vec_cmprg_or_0_idx __builtin_s390_vec_cmprg_or_0_idx
-#define vec_cmpnrg_or_0_idx __builtin_s390_vec_cmpnrg_or_0_idx
-#define vec_cmprg __builtin_s390_vec_cmprg
-#define vec_cmpnrg __builtin_s390_vec_cmpnrg
-#define vec_cmprg_idx_cc __builtin_s390_vec_cmprg_idx_cc
-#define vec_cmpnrg_idx_cc __builtin_s390_vec_cmpnrg_idx_cc
-#define vec_cmprg_or_0_idx_cc __builtin_s390_vec_cmprg_or_0_idx_cc
-#define vec_cmpnrg_or_0_idx_cc __builtin_s390_vec_cmpnrg_or_0_idx_cc
-#define vec_cmprg_cc __builtin_s390_vec_cmprg_cc
-#define vec_cmpnrg_cc __builtin_s390_vec_cmpnrg_cc
-#define vec_all_nge __builtin_s390_vec_all_nge
-#define vec_all_ngt __builtin_s390_vec_all_ngt
-#define vec_any_nge __builtin_s390_vec_any_nge
-#define vec_any_ngt __builtin_s390_vec_any_ngt
-#define vec_ctd __builtin_s390_vec_ctd
-#define vec_ctd_s64 __builtin_s390_vec_ctd_s64
-#define vec_ctd_u64 __builtin_s390_vec_ctd_u64
-#define vec_ctsl __builtin_s390_vec_ctsl
-#define vec_ctul __builtin_s390_vec_ctul
-#define vec_float __builtin_s390_vec_float
-#define vec_double __builtin_s390_vec_double
-#define vec_signed __builtin_s390_vec_signed
+#define vec_unpackh __builtin_s390_vec_unpackh
+#define vec_unpackl __builtin_s390_vec_unpackl
 #define vec_unsigned __builtin_s390_vec_unsigned
-#define vec_ld2f __builtin_s390_vec_ld2f
-#define vec_st2f __builtin_s390_vec_st2f
-#define vec_madd __builtin_s390_vec_madd
-#define vec_msub __builtin_s390_vec_msub
-#define vec_nmadd __builtin_s390_vec_nmadd
-#define vec_nmsub __builtin_s390_vec_nmsub
-#define vec_nabs __builtin_s390_vec_nabs
-#define vec_sqrt __builtin_s390_vec_sqrt
-#define vec_fp_test_data_class __builtin_s390_vec_fp_test_data_class
-#define vec_revb __builtin_s390_vec_revb
-#define vec_reve __builtin_s390_vec_reve
-#define vec_sldb __builtin_s390_vec_sldb
-#define vec_srdb __builtin_s390_vec_srdb
-#define vec_search_string_cc __builtin_s390_vec_search_string_cc
-#define vec_search_string_until_zero_cc __builtin_s390_vec_search_string_until_zero_cc
+#define vec_xl __builtin_s390_vec_xl
+#define vec_xld2 __builtin_s390_vec_xld2
+#define vec_xlw4 __builtin_s390_vec_xlw4
+#define vec_xor __builtin_s390_vec_xor
+#define vec_xst __builtin_s390_vec_xst
+#define vec_xstd2 __builtin_s390_vec_xstd2
+#define vec_xstw4 __builtin_s390_vec_xstw4
 #endif /* _VECINTRIN_H */
diff --git a/gcc/config/s390/vector.md b/gcc/config/s390/vector.md
index a75b7cb..26753c0 100644
--- a/gcc/config/s390/vector.md
+++ b/gcc/config/s390/vector.md
@@ -1,5 +1,5 @@
 ;;- Instruction patterns for the System z vector facility
-;;  Copyright (C) 2015-2024 Free Software Foundation, Inc.
+;;  Copyright (C) 2015-2025 Free Software Foundation, Inc.
 ;;  Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com)
 
 ;; This file is part of GCC.
@@ -25,11 +25,20 @@
 (define_mode_iterator VT
   [V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI V1SI V2SI V4SI V1DI V2DI V1SF
    V2SF V4SF V1DF V2DF V1TF V1TI TI])
+(define_mode_iterator VT_VXE3
+  [V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI V1SI V2SI V4SI V1DI V2DI V1SF
+   V2SF V4SF V1DF V2DF V1TF (V1TI "TARGET_VXE3") (TI "TARGET_VXE3")])
 
 ; All modes directly supported by the hardware having full vector reg size
 (define_mode_iterator V_HW  [V16QI V8HI V4SI V2DI V1TI TI V2DF
 			     (V4SF "TARGET_VXE") (V1TF "TARGET_VXE")
 			     (TF "TARGET_VXE")])
+(define_mode_iterator V_HW1 [V16QI V8HI V4SI V2DI V1TI V2DF
+			     (V4SF "TARGET_VXE") (V1TF "TARGET_VXE")
+			     (TF "TARGET_VXE")])
+(define_mode_iterator V_HW2 [V16QI V8HI V4SI V2DI V2DF (V4SF "TARGET_VXE")
+			     (V1TF "TARGET_VXE") (TF "TARGET_VXE")])
+(define_mode_iterator V_HW3 [V16QI V8HI V4SI V2DI V1TI TI V4SF V2DF V1TF TF])
 
 (define_mode_iterator VT_HW_HSDT [V8HI V4SI V4SF V2DI V2DF V1TI V1TF TI TF])
 (define_mode_iterator V_HW_HSD [V8HI V4SI (V4SF "TARGET_VXE") V2DI V2DF])
@@ -39,8 +48,12 @@
 
 ; All full size integer vector modes supported in a vector register + TImode
 (define_mode_iterator VIT_HW    [V16QI V8HI V4SI V2DI V1TI TI])
+(define_mode_iterator VIT_HW_VXE3_T [V16QI V8HI V4SI V2DI (V1TI "TARGET_VXE3") (TI "TARGET_VXE3")])
+(define_mode_iterator VIT_HW_VXE3_DT [V16QI V8HI V4SI (V2DI "TARGET_VXE3") (V1TI "TARGET_VXE3") (TI "TARGET_VXE3")])
 (define_mode_iterator VI_HW     [V16QI V8HI V4SI V2DI])
+(define_mode_iterator VI_HW_VXE3 [V16QI V8HI V4SI (V2DI "TARGET_VXE3")])
 (define_mode_iterator VI_HW_QHS [V16QI V8HI V4SI])
+(define_mode_iterator VI_HW_SDT [V4SI  V2DI V1TI TI])
 (define_mode_iterator VI_HW_HSD [V8HI  V4SI V2DI])
 (define_mode_iterator VI_HW_HSDT [V8HI V4SI V2DI V1TI TI])
 (define_mode_iterator VI_HW_HS  [V8HI  V4SI])
@@ -53,15 +66,19 @@
 
 ; All integer vector modes supported in a vector register + TImode
 (define_mode_iterator VIT [V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI V1SI V2SI V4SI V1DI V2DI V1TI TI])
+(define_mode_iterator VIT_VXE3 [V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI V1SI V2SI V4SI V1DI V2DI (V1TI "TARGET_VXE3") (TI "TARGET_VXE3")])
 (define_mode_iterator VI  [V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI V1SI V2SI V4SI V1DI V2DI])
+(define_mode_iterator VI_VXE3 [V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI V1SI V2SI V4SI V1DI (V2DI "TARGET_VXE3")])
 (define_mode_iterator VI_QHS [V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI V1SI V2SI V4SI])
 
 (define_mode_iterator VFT [(V1SF "TARGET_VXE") (V2SF "TARGET_VXE") (V4SF "TARGET_VXE")
 			   V1DF V2DF
 			   (V1TF "TARGET_VXE") (TF "TARGET_VXE")])
 
-; All modes present in V_HW and VFT.
-(define_mode_iterator V_HW_FT [V16QI V8HI V4SI V2DI (V1TI "TARGET_VXE") V1DF
+(define_mode_iterator VF [V2SF V4SF V2DF])
+
+; All modes present in V_HW1 and VFT.
+(define_mode_iterator V_HW1_FT [V16QI V8HI V4SI V2DI V1TI V1DF
 			       V2DF (V1SF "TARGET_VXE") (V2SF "TARGET_VXE")
 			       (V4SF "TARGET_VXE") (V1TF "TARGET_VXE")
 			       (TF "TARGET_VXE")])
@@ -72,6 +89,13 @@
 (define_mode_iterator VF_HW [(V4SF "TARGET_VXE") V2DF (V1TF "TARGET_VXE")
 			     (TF "TARGET_VXE")])
 
+; FP scalar and vector modes
+(define_mode_iterator VFT_BFP [SF DF
+			      (V1SF "TARGET_VXE") (V2SF "TARGET_VXE") (V4SF "TARGET_VXE")
+			      V1DF V2DF
+			      (V1TF "TARGET_VXE") (TF "TARGET_VXE")])
+
+
 (define_mode_iterator V_8   [V1QI])
 (define_mode_iterator V_16  [V2QI  V1HI])
 (define_mode_iterator V_32  [V4QI  V2HI V1SI V1SF])
@@ -139,7 +163,8 @@
 (define_mode_attr w [(V1QI "")  (V2QI "")  (V4QI "")  (V8QI "") (V16QI "")
 		     (V1HI "w") (V2HI "w") (V4HI "w") (V8HI "w")
 		     (V1SI "")  (V2SI "")  (V4SI "")
-		     (V1DI "")  (V2DI "")])
+		     (V1DI "")  (V2DI "")
+		     (V1TI "")  (TI "")])
 
 ; Resulting mode of a vector comparison.  For floating point modes an
 ; integer vector mode with the same element size is picked.
@@ -147,19 +172,19 @@
 			    (V1HI "V1HI") (V2HI "V2HI") (V4HI "V4HI") (V8HI "V8HI")
 			    (V1SI "V1SI") (V2SI "V2SI") (V4SI "V4SI")
 			    (V1DI "V1DI") (V2DI "V2DI")
-			    (V1TI "V1TI") (TI "V1TI")
+			    (V1TI "V1TI") (TI "TI")
 			    (V1SF "V1SI") (V2SF "V2SI") (V4SF "V4SI")
 			    (V1DF "V1DI") (V2DF "V2DI")
-			    (V1TF "V1TI") (TF "V1TI")])
+			    (V1TF "V1TI") (TF "TI")])
 
 (define_mode_attr tointvec [(V1QI "v1qi") (V2QI "v2qi") (V4QI "v4qi") (V8QI "v8qi") (V16QI "v16qi")
 			    (V1HI "v1hi") (V2HI "v2hi") (V4HI "v4hi") (V8HI "v8hi")
 			    (V1SI "v1si") (V2SI "v2si") (V4SI "v4si")
 			    (V1DI "v1di") (V2DI "v2di")
-			    (V1TI "v1ti") (TI "v1ti")
+			    (V1TI "v1ti") (TI "ti")
 			    (V1SF "v1si") (V2SF "v2si") (V4SF "v4si")
 			    (V1DF "v1di") (V2DF "v2di")
-			    (V1TF "v1ti") (TF   "v1ti")])
+			    (V1TF "v1ti") (TF   "ti")])
 
 (define_mode_attr vw [(SF "w") (V1SF "w") (V2SF "v") (V4SF "v")
 		      (DF "w") (V1DF "w") (V2DF "v")
@@ -173,7 +198,7 @@
 (define_mode_attr vec_double [(V1QI "V1HI") (V2QI "V1HI") (V4QI "V2HI") (V8QI "V4HI") (V16QI "V8HI")
 			      (V1HI "V1SI") (V2HI "V1SI") (V4HI "V2SI") (V8HI "V4SI")
 			      (V1SI "V1DI") (V2SI "V1DI") (V4SI "V2DI")
-			      (V1DI "V1TI") (V2DI "V1TI")
+			      (V1DI "V1TI") (V2DI "TI")
 			      (V1SF "V1DF") (V2SF "V1DF") (V4SF "V2DF")])
 
 ; Vector with shrinked element size but twice the number of elements.
@@ -222,6 +247,7 @@
 ; Comparison operators on int and fp compares which are directly
 ; supported by the HW.
 (define_code_iterator VICMP_HW_OP [eq gt gtu])
+(define_code_iterator VICMP_HW_OP2 [gt gtu])
 ; For int insn_cmp_op can be used in the insn name as well as in the asm output.
 (define_code_attr insn_cmp_op [(eq "eq") (gt "h") (gtu "hl") (ge "he")])
 
@@ -489,26 +515,89 @@
 		   UNSPEC_VEC_SET))]
   "TARGET_VX")
 
+; Iterator for vec_set that does not use special float/vect overlay tricks
+(define_mode_iterator VEC_SET_NONFLOAT
+  [V1QI V2QI V4QI V8QI V16QI V1HI V2HI V4HI V8HI V1SI V2SI V4SI V1DI V2DI V2SF V4SF])
+; Iterator for single element float vectors
+(define_mode_iterator VEC_SET_SINGLEFLOAT [V1SF V1DF (V1TF "TARGET_VXE")])
+
 ; FIXME: Support also vector mode operands for 1
 ; FIXME: A target memory operand seems to be useful otherwise we end
 ; up with vl vlvgg vst.  Shouldn't the middle-end be able to handle
 ; that itself?
 ; vlvgb, vlvgh, vlvgf, vlvgg, vleb, vleh, vlef, vleg, vleib, vleih, vleif, vleig
 (define_insn "*vec_set<mode>"
-  [(set (match_operand:V                    0 "register_operand"  "=v,v,v")
-	(unspec:V [(match_operand:<non_vec> 1 "general_operand"    "d,R,K")
-		   (match_operand:SI        2 "nonmemory_operand" "an,I,I")
-		   (match_operand:V         3 "register_operand"   "0,0,0")]
-		  UNSPEC_VEC_SET))]
+  [(set (match_operand:VEC_SET_NONFLOAT      0 "register_operand"  "=v,v,v")
+	(unspec:VEC_SET_NONFLOAT
+	  [(match_operand:<non_vec>          1 "general_operand"    "d,R,K")
+	   (match_operand:SI                 2 "nonmemory_operand" "an,I,I")
+	   (match_operand:VEC_SET_NONFLOAT   3 "register_operand"   "0,0,0")]
+	  UNSPEC_VEC_SET))]
   "TARGET_VX
    && (!CONST_INT_P (operands[2])
-       || UINTVAL (operands[2]) < GET_MODE_NUNITS (<V:MODE>mode))"
+       || UINTVAL (operands[2]) < GET_MODE_NUNITS (<VEC_SET_NONFLOAT:MODE>mode))"
   "@
    vlvg<bhfgq>\t%v0,%1,%Y2
    vle<bhfgq>\t%v0,%1,%2
    vlei<bhfgq>\t%v0,%1,%2"
   [(set_attr "op_type" "VRS,VRX,VRI")])
 
+(define_insn "*vec_set<mode>"
+  [(set (match_operand:VEC_SET_SINGLEFLOAT     0 "register_operand"  "=v,v")
+	(unspec:VEC_SET_SINGLEFLOAT
+	  [(match_operand:<non_vec>            1 "general_operand"    "v,R")
+	   (match_operand:SI                   2 "nonmemory_operand" "an,I")
+	   (match_operand:VEC_SET_SINGLEFLOAT  3 "register_operand"   "0,0")]
+	  UNSPEC_VEC_SET))]
+  "TARGET_VX"
+  "@
+   vlr\t%v0,%v1
+   vle<bhfgq>\t%v0,%1,0"
+ [(set_attr "op_type" "VRR,VRX")])
+
+(define_insn "*vec_setv2df"
+  [(set (match_operand:V2DF                    0 "register_operand"  "=v,v,v,v")
+	(unspec:V2DF [(match_operand:DF        1 "general_operand"    "d,R,K,v")
+		      (match_operand:SI        2 "nonmemory_operand" "an,I,I,n")
+		      (match_operand:V2DF      3 "register_operand"   "0,0,0,0")]
+		     UNSPEC_VEC_SET))]
+  "TARGET_VX
+   && (!CONST_INT_P (operands[2])
+       || UINTVAL (operands[2]) < GET_MODE_NUNITS (V2DFmode))"
+  "@
+   vlvgg\t%v0,%1,%Y2
+   vleg\t%v0,%1,%2
+   vleig\t%v0,%1,%2
+   #"
+  [(set_attr "op_type" "VRS,VRX,VRI,*")])
+
+(define_split
+  [(set (match_operand:V2DF            0 "register_operand"  "")
+	(unspec:V2DF [(match_operand:DF    1 "register_operand"  "")
+		      (match_operand:SI        2 "const_int_operand" "")
+		      (match_operand:V2DF      3 "register_operand"  "")]
+		     UNSPEC_VEC_SET))]
+  "TARGET_VX
+   && (UINTVAL (operands[2]) < GET_MODE_NUNITS (V2DFmode))
+   && reload_completed
+   && VECTOR_REGNO_P (REGNO (operands[1]))"
+   [(set (match_dup 0)
+      (vec_select:V2DF
+        (vec_concat:V4DF
+	  (match_dup 1)
+	  (match_dup 3))
+	(parallel [(const_int 0) (match_dup 4)])))]
+{
+    operands[1] = gen_rtx_REG (V2DFmode, REGNO (operands[1]));
+    if (UINTVAL (operands[2]) == 0)
+      operands[4] = GEN_INT (3);
+    else
+    {
+      std::swap (operands[1], operands[3]);
+      operands[4] = GEN_INT (2);
+    }
+})
+
 ; vlvgb, vlvgh, vlvgf, vlvgg
 (define_insn "*vec_set<mode>_plus"
   [(set (match_operand:V                      0 "register_operand" "=v")
@@ -521,6 +610,14 @@
   "vlvg<bhfgq>\t%v0,%1,%Y4(%2)"
   [(set_attr "op_type" "VRS")])
 
+(define_expand "cstoreti4"
+  [(set (match_operand:SI 0 "register_operand")
+	(match_operator:SI 1 "ordered_comparison_operator"
+	 [(match_operand:TI 2 "register_operand")
+	  (match_operand:TI 3 "register_operand")]))]
+  "TARGET_VX"
+  "s390_expand_cstoreti4 (operands[0], operands[1], operands[2], operands[3]); DONE;")
+
 
 ;; FIXME: Support also vector mode operands for 0
 ;; This is used via RTL standard name as well as for expanding the builtin
@@ -537,18 +634,66 @@
 (define_insn "*vec_extract<mode>"
   [(set (match_operand:<non_vec> 0 "nonimmediate_operand" "=d,R")
        (vec_select:<non_vec>
-         (match_operand:V        1 "nonmemory_operand"  "v,v")
+         (match_operand:VI       1 "nonmemory_operand"  "v,v")
          (parallel
           [(match_operand:SI     2 "nonmemory_operand" "an,I")])))]
   "TARGET_VX"
   {
     if (CONST_INT_P (operands[2]))
-	  operands[2] = GEN_INT (UINTVAL (operands[2]) & (GET_MODE_NUNITS (<V:MODE>mode) - 1));
+	  operands[2] = GEN_INT (UINTVAL (operands[2]) & (GET_MODE_NUNITS (<VI:MODE>mode) - 1));
     if (which_alternative == 0)
       return "vlgv<bhfgq>\t%0,%v1,%Y2";
 	return "vste<bhfgq>\t%v1,%0,%2";
   }
-  [(set_attr "op_type" "VRS,VRX")])
+  [(set_attr "op_type" "VRS,VRX")
+   (set_attr "mnemonic" "vlgv<bhfgq>,vste<bhfgq>")])
+
+(define_insn "*vec_extract<mode>"
+  [(set (match_operand:<non_vec> 0 "nonimmediate_operand" "=d,R,v")
+       (vec_select:<non_vec>
+         (match_operand:VF       1 "nonmemory_operand"  "v,v,v")
+         (parallel
+          [(match_operand:SI     2 "nonmemory_operand" "an,I,n")])))]
+  "TARGET_VX"
+  {
+    if (CONST_INT_P (operands[2]))
+      operands[2] = GEN_INT (UINTVAL (operands[2]) & (GET_MODE_NUNITS (<VF:MODE>mode) - 1));
+    if (which_alternative == 0)
+      return "vlgv<bhfgq>\t%0,%v1,%Y2";
+    else if (which_alternative == 1)
+      return "vste<bhfgq>\t%v1,%0,%2";
+    else
+      return "#";
+  }
+  [(set_attr "op_type" "VRS,VRX,*")
+   (set_attr "mnemonic" "vlgv<bhfgq>,vste<bhfgq>,*")])
+
+(define_split
+  [(set (match_operand:<non_vec> 0 "register_operand" "")
+       (vec_select:<non_vec>
+         (match_operand:VF       1 "register_operand"  "")
+         (parallel
+          [(match_operand:SI     2 "const_int_operand" "")])))]
+  "TARGET_VX && reload_completed && VECTOR_REGNO_P (REGNO (operands[0]))"
+  [(set (match_dup 0)
+        (vec_duplicate:VF
+           (vec_select:<non_vec>
+              (match_dup 1)
+              (parallel [(match_dup 2)]))))]
+{
+    unsigned HOST_WIDE_INT idx = UINTVAL (operands[2]) & (GET_MODE_NUNITS (<VF:MODE>mode) - 1);
+    if (idx == 0)
+      {
+        rtx dest = gen_rtx_REG (<VF:MODE>mode, REGNO (operands[0]));
+        emit_insn (gen_mov<VF:mode> (dest, operands[1]));
+        DONE;
+      }
+    else
+      {
+        operands[0] = gen_rtx_REG (<VF:MODE>mode, REGNO (operands[0]));
+        operands[2] = GEN_INT (idx);
+      }
+})
 
 ; vlgvb, vlgvh, vlgvf, vlgvg
 (define_insn "*vec_extract<mode>_plus"
@@ -586,10 +731,10 @@
 ; Replicate from vector element
 ; vrepb, vreph, vrepf, vrepg
 (define_insn "*vec_splat<mode>"
-  [(set (match_operand:V_128_NOSINGLE   0 "register_operand" "=v")
-	(vec_duplicate:V_128_NOSINGLE
+  [(set (match_operand:V   0 "register_operand" "=v")
+	(vec_duplicate:V
 	 (vec_select:<non_vec>
-	  (match_operand:V_128_NOSINGLE 1 "register_operand"  "v")
+	  (match_operand:V 1 "register_operand"  "v")
 	  (parallel
 	   [(match_operand:QI 2 "const_mask_operand" "C")]))))]
   "TARGET_VX && UINTVAL (operands[2]) < GET_MODE_NUNITS (<MODE>mode)"
@@ -907,36 +1052,45 @@
   "vmrlg\t%0,%1,%2";
   [(set_attr "op_type" "VRR")])
 
-
-(define_insn "*tf_to_fprx2_0"
-  [(set (subreg:DF (match_operand:FPRX2 0 "nonimmediate_operand" "+f") 0)
-	(subreg:DF (match_operand:TF    1 "general_operand"       "v") 0))]
-  "TARGET_VXE"
-  ; M4 == 1 corresponds to %v0[0] = %v1[0]; %v0[1] = %v0[1];
-  "vpdi\t%v0,%v1,%v0,1"
-  [(set_attr "op_type" "VRR")])
-
-(define_insn "*tf_to_fprx2_1"
-  [(set (subreg:DF (match_operand:FPRX2 0 "nonimmediate_operand" "+f") 8)
-	(subreg:DF (match_operand:TF    1 "general_operand"       "v") 8))]
-  "TARGET_VXE"
-  ; M4 == 5 corresponds to %V0[0] = %v1[1]; %V0[1] = %V0[1];
-  "vpdi\t%V0,%v1,%V0,5"
-  [(set_attr "op_type" "VRR")])
-
-(define_insn_and_split "tf_to_fprx2"
-  [(set (match_operand:FPRX2            0 "nonimmediate_operand" "=f,f")
-	(subreg:FPRX2 (match_operand:TF 1 "general_operand"       "v,AR") 0))]
+(define_insn "tf_to_fprx2"
+  [(set (match_operand:FPRX2             0 "register_operand" "=f,f ,f")
+	(unspec:FPRX2 [(match_operand:TF 1 "general_operand"   "v,AR,AT")]
+		      UNSPEC_TF_TO_FPRX2))]
   "TARGET_VXE"
-  "#"
-  "!(MEM_P (operands[1]) && MEM_VOLATILE_P (operands[1]))"
-  [(set (match_dup 2) (match_dup 3))
-   (set (match_dup 4) (match_dup 5))]
 {
-  operands[2] = simplify_gen_subreg (DFmode, operands[0], FPRX2mode, 0);
-  operands[3] = simplify_gen_subreg (DFmode, operands[1], TFmode, 0);
-  operands[4] = simplify_gen_subreg (DFmode, operands[0], FPRX2mode, 8);
-  operands[5] = simplify_gen_subreg (DFmode, operands[1], TFmode, 8);
+  char buf[64];
+  const char *reg_pair = reg_names[REGNO (operands[0]) + 1];
+  switch (which_alternative)
+    {
+    case 0:
+      if (REGNO (operands[0]) == REGNO (operands[1]))
+	{
+	  reg_pair += 2;  // get rid of prefix %f
+	  snprintf (buf, sizeof (buf), "vpdi\t%%%%v%s,%%v1,%%%%v%s,5", reg_pair, reg_pair);
+	  output_asm_insn (buf, operands);
+	  return "";
+	}
+      else
+	{
+	  reg_pair += 2;  // get rid of prefix %f
+	  snprintf (buf, sizeof (buf), "vlr\t%%v0,%%v1;vpdi\t%%%%v%s,%%v1,%%%%v%s,5", reg_pair, reg_pair);
+	  output_asm_insn (buf, operands);
+	  return "";
+	}
+    case 1:
+      {
+	snprintf (buf, sizeof (buf), "ld\t%%f0,%%1;ld\t%%%s,8+%%1", reg_pair);
+	output_asm_insn (buf, operands);
+	return "";
+      }
+    case 2:
+      {
+	snprintf (buf, sizeof (buf), "ldy\t%%f0,%%1;ldy\t%%%s,8+%%1", reg_pair);
+	output_asm_insn (buf, operands);
+	return "";
+      }
+    default: gcc_unreachable ();
+    }
 })
 
 ;; VECTOR REVERSE ELEMENTS V16QI
@@ -1112,15 +1266,51 @@
   "vs<bhfgq>\t%v0,%v1,%v2"
   [(set_attr "op_type" "VRR")])
 
-; vmlb, vmlhw, vmlf
+; vmlb, vmlhw, vmlf, vmlg, vmlq
 (define_insn "mul<mode>3"
-  [(set (match_operand:VI_QHS              0 "register_operand" "=v")
-	(mult:VI_QHS (match_operand:VI_QHS 1 "register_operand"  "v")
-		     (match_operand:VI_QHS 2 "register_operand"  "v")))]
+  [(set (match_operand:VIT_HW_VXE3_DT                      0 "register_operand" "=v")
+	(mult:VIT_HW_VXE3_DT (match_operand:VIT_HW_VXE3_DT 1 "register_operand"  "v")
+			     (match_operand:VIT_HW_VXE3_DT 2 "register_operand"  "v")))]
   "TARGET_VX"
   "vml<bhfgq><w>\t%v0,%v1,%v2"
   [(set_attr "op_type" "VRR")])
 
+; vdf, vdg, vdq
+(define_insn "div<mode>3"
+  [(set (match_operand:VI_HW_SDT                0 "register_operand" "=v")
+	(div:VI_HW_SDT (match_operand:VI_HW_SDT 1 "register_operand"  "v")
+		       (match_operand:VI_HW_SDT 2 "register_operand"  "v")))]
+  "TARGET_VXE3"
+  "vd<bhfgq>\t%v0,%v1,%v2,0"
+  [(set_attr "op_type" "VRR")])
+
+; vdlf, vdlg, vdlq
+(define_insn "udiv<mode>3"
+  [(set (match_operand:VI_HW_SDT                 0 "register_operand" "=v")
+	(udiv:VI_HW_SDT (match_operand:VI_HW_SDT 1 "register_operand"  "v")
+			(match_operand:VI_HW_SDT 2 "register_operand"  "v")))]
+  "TARGET_VXE3"
+  "vdl<bhfgq>\t%v0,%v1,%v2,0"
+  [(set_attr "op_type" "VRR")])
+
+; vrf, vrg, vrq
+(define_insn "mod<mode>3"
+  [(set (match_operand:VI_HW_SDT                0 "register_operand" "=v")
+	(mod:VI_HW_SDT (match_operand:VI_HW_SDT 1 "register_operand"  "v")
+		       (match_operand:VI_HW_SDT 2 "register_operand"  "v")))]
+  "TARGET_VXE3"
+  "vr<bhfgq>\t%v0,%v1,%v2,0"
+  [(set_attr "op_type" "VRR")])
+
+; vrlf, vrlg, vrlq
+(define_insn "umod<mode>3"
+  [(set (match_operand:VI_HW_SDT                 0 "register_operand" "=v")
+	(umod:VI_HW_SDT (match_operand:VI_HW_SDT 1 "register_operand"  "v")
+			(match_operand:VI_HW_SDT 2 "register_operand"  "v")))]
+  "TARGET_VXE3"
+  "vrl<bhfgq>\t%v0,%v1,%v2,0"
+  [(set_attr "op_type" "VRR")])
+
 ; vlcb, vlch, vlcf, vlcg
 (define_insn "neg<mode>2"
   [(set (match_operand:VI         0 "register_operand" "=v")
@@ -1129,10 +1319,29 @@
   "vlc<bhfgq>\t%v0,%v1"
   [(set_attr "op_type" "VRR")])
 
-; vlpb, vlph, vlpf, vlpg
-(define_insn "abs<mode>2"
-  [(set (match_operand:VI         0 "register_operand" "=v")
-	(abs:VI (match_operand:VI 1 "register_operand"  "v")))]
+(define_expand "abs<mode>2"
+  [(set (match_operand:VIT          0 "register_operand" "=v")
+	(abs:VIT (match_operand:VIT 1 "register_operand"  "v")))]
+  "TARGET_VX"
+{
+  // Emulate via vec_sel (op1, -op1, op1 < 0)
+  if ((<MODE>mode == V1TImode || <MODE>mode == TImode) && !TARGET_VXE3)
+    {
+      rtx zero = gen_reg_rtx (<MODE>mode);
+      rtx neg_op1 = gen_reg_rtx (<MODE>mode);
+      rtx lt = gen_reg_rtx (<MODE>mode);
+      emit_move_insn (zero, GEN_INT (0));
+      emit_move_insn (neg_op1, gen_rtx_MINUS (<MODE>mode, zero, operands[1]));
+      s390_expand_vec_compare (lt, LT, operands[1], zero);
+      emit_insn (gen_vec_sel0<mode> (operands[0], operands[1], neg_op1, lt, GEN_INT (0)));
+      DONE;
+    }
+})
+
+; vlpb, vlph, vlpf, vlpg, vlpq
+(define_insn "*abs<mode>2"
+  [(set (match_operand:VIT_VXE3               0 "register_operand" "=v")
+	(abs:VIT_VXE3 (match_operand:VIT_VXE3 1 "register_operand"  "v")))]
   "TARGET_VX"
   "vlp<bhfgq>\t%v0,%v1"
   [(set_attr "op_type" "VRR")])
@@ -1348,19 +1557,19 @@
 })
 
 ; Count leading zeros
-; vclzb, vclzh, vclzf, vclzg
+; vclzb, vclzh, vclzf, vclzg, vclzq
 (define_insn "clz<mode>2"
-  [(set (match_operand:V        0 "register_operand" "=v")
-	(clz:V (match_operand:V 1 "register_operand"  "v")))]
+  [(set (match_operand:VT_VXE3              0 "register_operand" "=v")
+	(clz:VT_VXE3 (match_operand:VT_VXE3 1 "register_operand"  "v")))]
   "TARGET_VX"
   "vclz<bhfgq>\t%v0,%v1"
   [(set_attr "op_type" "VRR")])
 
 ; Count trailing zeros
-; vctzb, vctzh, vctzf, vctzg
+; vctzb, vctzh, vctzf, vctzg, vctzq
 (define_insn "ctz<mode>2"
-  [(set (match_operand:V        0 "register_operand" "=v")
-	(ctz:V (match_operand:V 1 "register_operand"  "v")))]
+  [(set (match_operand:VT_VXE3              0 "register_operand" "=v")
+	(ctz:VT_VXE3 (match_operand:VT_VXE3 1 "register_operand"  "v")))]
   "TARGET_VX"
   "vctz<bhfgq>\t%v0,%v1"
   [(set_attr "op_type" "VRR")])
@@ -1388,10 +1597,41 @@
 
 ; Each vector element rotated by a scalar
 (define_expand "<vec_shifts_name><mode>3"
-  [(set (match_operand:VI 0 "register_operand" "")
-	(VEC_SHIFTS:VI (match_operand:VI 1 "register_operand" "")
-		       (match_operand:QI 2 "shift_count_operand" "")))]
-  "TARGET_VX")
+  [(set (match_operand:VIT 0 "register_operand" "")
+	(VEC_SHIFTS:VIT (match_operand:VIT 1 "register_operand" "")
+			(match_operand:QI  2 "shift_count_operand" "")))]
+  "TARGET_VX && ((<MODE>mode != V1TImode && <MODE>mode != TImode) || <CODE> != ROTATE)"
+{
+  if (<MODE>mode == V1TImode || <MODE>mode == TImode)
+    {
+      rtx shift_count = gen_reg_rtx (V16QImode);
+      emit_insn (gen_vec_splatsv16qi (shift_count, operands[2]));
+
+      if (!CONST_INT_P (operands[2]) || UINTVAL (operands[2]) > 7)
+	switch (<CODE>)
+	  {
+	  case ASHIFT: emit_insn (gen_vec_slb (<MODE>mode, operands[0], operands[1], shift_count)); break;
+	  case ASHIFTRT: emit_insn (gen_vec_srab (<MODE>mode, operands[0], operands[1], shift_count)); break;
+	  case LSHIFTRT: emit_insn (gen_vec_srb (<MODE>mode, operands[0], operands[1], shift_count)); break;
+	  default: gcc_unreachable ();
+	  }
+      else
+	emit_move_insn (operands[0], operands[1]);
+
+      if (!CONST_INT_P (operands[2]) || (UINTVAL (operands[2]) & 7) != 0)
+	{
+	  switch (<CODE>)
+	    {
+	    case ASHIFT: emit_insn (gen_vec_sll (<MODE>mode, V16QImode, operands[0], operands[0], shift_count)); break;
+	    case ASHIFTRT: emit_insn (gen_vec_sral (<MODE>mode, V16QImode, operands[0], operands[0], shift_count)); break;
+	    case LSHIFTRT: emit_insn (gen_vec_srl (<MODE>mode, V16QImode, operands[0], operands[0], shift_count)); break;
+	    default: gcc_unreachable ();
+	    }
+	}
+
+      DONE;
+    }
+})
 
 ; verllb, verllh, verllf, verllg
 ; veslb,  veslh,  veslf,  veslg
@@ -1459,8 +1699,8 @@
 
 ; Pattern used by e.g. popcount
 (define_insn "*vec_srb<mode>"
-  [(set (match_operand:V_128                0 "register_operand" "=v")
-	(unspec:V_128 [(match_operand:V_128 1 "register_operand"  "v")
+  [(set (match_operand:V_HW3                0 "register_operand" "=v")
+	(unspec:V_HW3 [(match_operand:V_HW3 1 "register_operand"  "v")
 		       (match_operand:V16QI 2 "register_operand"  "v")]
 		   UNSPEC_VEC_SRLB))]
   "TARGET_VX"
@@ -1471,8 +1711,8 @@
 ; Vector shift left by byte
 
 (define_insn "*vec_slb<mode>"
-  [(set (match_operand:V_128                0 "register_operand" "=v")
-	(unspec:V_128 [(match_operand:V_128 1 "register_operand"  "v")
+  [(set (match_operand:V_HW3                0 "register_operand" "=v")
+	(unspec:V_HW3 [(match_operand:V_HW3 1 "register_operand"  "v")
 		    (match_operand:V16QI    2 "register_operand"  "v")]
 		   UNSPEC_VEC_SLB))]
   "TARGET_VX"
@@ -1496,77 +1736,141 @@
    operands[3] = gen_reg_rtx(V16QImode);
  })
 
-; vmnb, vmnh, vmnf, vmng
-(define_insn "smin<mode>3"
-  [(set (match_operand:VI          0 "register_operand" "=v")
-	(smin:VI (match_operand:VI 1 "register_operand"  "v")
-		 (match_operand:VI 2 "register_operand"  "v")))]
+(define_expand "smin<mode>3"
+  [(set (match_operand:VIT           0 "register_operand" "=v")
+	(smin:VIT (match_operand:VIT 1 "register_operand"  "v")
+		  (match_operand:VIT 2 "register_operand"  "v")))]
+  "TARGET_VX"
+{
+  // Emulate via vec_sel (op1, op2, op2 < op1)
+  if ((<MODE>mode == V1TImode || <MODE>mode == TImode) && !TARGET_VXE3)
+    {
+      rtx lt = gen_reg_rtx (<MODE>mode);
+      s390_expand_vec_compare (lt, LT, operands[2], operands[1]);
+      emit_insn (gen_vec_sel0<mode> (operands[0], operands[1], operands[2], lt, GEN_INT (0)));
+      DONE;
+    }
+})
+
+; vmnb, vmnh, vmnf, vmng, vmnq
+(define_insn "*smin<mode>3"
+  [(set (match_operand:VIT_VXE3                0 "register_operand" "=v")
+	(smin:VIT_VXE3 (match_operand:VIT_VXE3 1 "register_operand"  "v")
+		       (match_operand:VIT_VXE3 2 "register_operand"  "v")))]
   "TARGET_VX"
   "vmn<bhfgq>\t%v0,%v1,%v2"
   [(set_attr "op_type" "VRR")])
 
-; vmxb, vmxh, vmxf, vmxg
-(define_insn "smax<mode>3"
-  [(set (match_operand:VI          0 "register_operand" "=v")
-	(smax:VI (match_operand:VI 1 "register_operand"  "v")
-		 (match_operand:VI 2 "register_operand"  "v")))]
+(define_expand "smax<mode>3"
+  [(set (match_operand:VIT           0 "register_operand" "=v")
+	(smax:VIT (match_operand:VIT 1 "register_operand"  "v")
+		  (match_operand:VIT 2 "register_operand"  "v")))]
+  "TARGET_VX"
+{
+  // Emulate via vec_sel (op1, op2, op1 < op2)
+  if ((<MODE>mode == V1TImode || <MODE>mode == TImode) && !TARGET_VXE3)
+    {
+      rtx lt = gen_reg_rtx (<MODE>mode);
+      s390_expand_vec_compare (lt, LT, operands[1], operands[2]);
+      emit_insn (gen_vec_sel0<mode> (operands[0], operands[1], operands[2], lt, GEN_INT (0)));
+      DONE;
+    }
+})
+
+; vmxb, vmxh, vmxf, vmxg, vmxq
+(define_insn "*smax<mode>3"
+  [(set (match_operand:VIT_VXE3                0 "register_operand" "=v")
+	(smax:VIT_VXE3 (match_operand:VIT_VXE3 1 "register_operand"  "v")
+		       (match_operand:VIT_VXE3 2 "register_operand"  "v")))]
   "TARGET_VX"
   "vmx<bhfgq>\t%v0,%v1,%v2"
   [(set_attr "op_type" "VRR")])
 
-; vmnlb, vmnlh, vmnlf, vmnlg
-(define_insn "umin<mode>3"
-  [(set (match_operand:VI          0 "register_operand" "=v")
-	(umin:VI (match_operand:VI 1 "register_operand"  "v")
-		 (match_operand:VI 2 "register_operand"  "v")))]
+(define_expand "umin<mode>3"
+  [(set (match_operand:VIT           0 "register_operand" "=v")
+	(umin:VIT (match_operand:VIT 1 "register_operand"  "v")
+		  (match_operand:VIT 2 "register_operand"  "v")))]
+  "TARGET_VX"
+{
+  // Emulate via vec_sel (op1, op2, op2 < op1)
+  if ((<MODE>mode == V1TImode || <MODE>mode == TImode) && !TARGET_VXE3)
+    {
+      rtx ltu = gen_reg_rtx (<MODE>mode);
+      s390_expand_vec_compare (ltu, LTU, operands[2], operands[1]);
+      emit_insn (gen_vec_sel0<mode> (operands[0], operands[1], operands[2], ltu, GEN_INT (0)));
+      DONE;
+    }
+})
+
+; vmnlb, vmnlh, vmnlf, vmnlg, vmnlq
+(define_insn "*umin<mode>3"
+  [(set (match_operand:VIT_VXE3                0 "register_operand" "=v")
+	(umin:VIT_VXE3 (match_operand:VIT_VXE3 1 "register_operand"  "v")
+		       (match_operand:VIT_VXE3 2 "register_operand"  "v")))]
   "TARGET_VX"
   "vmnl<bhfgq>\t%v0,%v1,%v2"
   [(set_attr "op_type" "VRR")])
 
-; vmxlb, vmxlh, vmxlf, vmxlg
-(define_insn "umax<mode>3"
-  [(set (match_operand:VI          0 "register_operand" "=v")
-	(umax:VI (match_operand:VI 1 "register_operand"  "v")
-		 (match_operand:VI 2 "register_operand"  "v")))]
+(define_expand "umax<mode>3"
+  [(set (match_operand:VIT           0 "register_operand" "=v")
+	(umax:VIT (match_operand:VIT 1 "register_operand"  "v")
+		  (match_operand:VIT 2 "register_operand"  "v")))]
+  "TARGET_VX"
+{
+  // Emulate via vec_sel (op1, op2, op1 < op2)
+  if ((<MODE>mode == V1TImode || <MODE>mode == TImode) && !TARGET_VXE3)
+    {
+      rtx ltu = gen_reg_rtx (<MODE>mode);
+      s390_expand_vec_compare (ltu, LTU, operands[1], operands[2]);
+      emit_insn (gen_vec_sel0<mode> (operands[0], operands[1], operands[2], ltu, GEN_INT (0)));
+      DONE;
+    }
+})
+
+; vmxlb, vmxlh, vmxlf, vmxlg, vmxlq
+(define_insn "*umax<mode>3"
+  [(set (match_operand:VIT_VXE3                0 "register_operand" "=v")
+	(umax:VIT_VXE3 (match_operand:VIT_VXE3 1 "register_operand"  "v")
+		       (match_operand:VIT_VXE3 2 "register_operand"  "v")))]
   "TARGET_VX"
   "vmxl<bhfgq>\t%v0,%v1,%v2"
   [(set_attr "op_type" "VRR")])
 
-; vmeb, vmeh, vmef
+; vmeb, vmeh, vmef, vmeg
 (define_insn "vec_widen_smult_even_<mode>"
-  [(set (match_operand:<vec_double>                 0 "register_operand" "=v")
-	(unspec:<vec_double> [(match_operand:VI_QHS 1 "register_operand"  "v")
-			      (match_operand:VI_QHS 2 "register_operand"  "v")]
+  [(set (match_operand:<vec_double>                  0 "register_operand" "=v")
+	(unspec:<vec_double> [(match_operand:VI_VXE3 1 "register_operand"  "v")
+			      (match_operand:VI_VXE3 2 "register_operand"  "v")]
 			     UNSPEC_VEC_SMULT_EVEN))]
   "TARGET_VX"
   "vme<bhfgq>\t%v0,%v1,%v2"
   [(set_attr "op_type" "VRR")])
 
-; vmleb, vmleh, vmlef
+; vmleb, vmleh, vmlef, vmleg
 (define_insn "vec_widen_umult_even_<mode>"
-  [(set (match_operand:<vec_double>                 0 "register_operand" "=v")
-	(unspec:<vec_double> [(match_operand:VI_QHS 1 "register_operand"  "v")
-			      (match_operand:VI_QHS 2 "register_operand"  "v")]
+  [(set (match_operand:<vec_double>                  0 "register_operand" "=v")
+	(unspec:<vec_double> [(match_operand:VI_VXE3 1 "register_operand"  "v")
+			      (match_operand:VI_VXE3 2 "register_operand"  "v")]
 			     UNSPEC_VEC_UMULT_EVEN))]
   "TARGET_VX"
   "vmle<bhfgq>\t%v0,%v1,%v2"
   [(set_attr "op_type" "VRR")])
 
-; vmob, vmoh, vmof
+; vmob, vmoh, vmof, vmog
 (define_insn "vec_widen_smult_odd_<mode>"
-  [(set (match_operand:<vec_double>                 0 "register_operand" "=v")
-	(unspec:<vec_double> [(match_operand:VI_QHS 1 "register_operand"  "v")
-			      (match_operand:VI_QHS 2 "register_operand"  "v")]
+  [(set (match_operand:<vec_double>                  0 "register_operand" "=v")
+	(unspec:<vec_double> [(match_operand:VI_VXE3 1 "register_operand"  "v")
+			      (match_operand:VI_VXE3 2 "register_operand"  "v")]
 			     UNSPEC_VEC_SMULT_ODD))]
   "TARGET_VX"
   "vmo<bhfgq>\t%v0,%v1,%v2"
   [(set_attr "op_type" "VRR")])
 
-; vmlob, vmloh, vmlof
+; vmlob, vmloh, vmlof, vmlog
 (define_insn "vec_widen_umult_odd_<mode>"
-  [(set (match_operand:<vec_double>                 0 "register_operand" "=v")
-	(unspec:<vec_double> [(match_operand:VI_QHS 1 "register_operand"  "v")
-			      (match_operand:VI_QHS 2 "register_operand"  "v")]
+  [(set (match_operand:<vec_double>                  0 "register_operand" "=v")
+	(unspec:<vec_double> [(match_operand:VI_VXE3 1 "register_operand"  "v")
+			      (match_operand:VI_VXE3 2 "register_operand"  "v")]
 			     UNSPEC_VEC_UMULT_ODD))]
   "TARGET_VX"
   "vmlo<bhfgq>\t%v0,%v1,%v2"
@@ -1918,8 +2222,8 @@
 (define_expand "vec_cmp<mode><tointvec>"
   [(set (match_operand:<TOINTVEC>  0 "register_operand" "")
 	(match_operator:<TOINTVEC> 1 "vcond_comparison_operator"
-	  [(match_operand:V_HW     2 "register_operand" "")
-	   (match_operand:V_HW     3 "nonmemory_operand" "")]))]
+	  [(match_operand:V_HW1    2 "register_operand" "")
+	   (match_operand:V_HW1    3 "nonmemory_operand" "")]))]
   "TARGET_VX"
 {
   s390_expand_vec_compare (operands[0], GET_CODE(operands[1]), operands[2], operands[3]);
@@ -1937,19 +2241,19 @@
   DONE;
 })
 
-(define_insn "*vec_cmp<VICMP_HW_OP:code><VI:mode><VI:mode>_nocc"
-  [(set (match_operand:VI                 2 "register_operand" "=v")
-	(VICMP_HW_OP:VI (match_operand:VI 0 "register_operand"  "v")
-			(match_operand:VI 1 "register_operand"  "v")))]
+(define_insn "*vec_cmp<VICMP_HW_OP:code><VIT_VXE3:mode><VIT_VXE3:mode>_nocc"
+  [(set (match_operand:VIT_VXE3                       2 "register_operand" "=v")
+	(VICMP_HW_OP:VIT_VXE3 (match_operand:VIT_VXE3 0 "register_operand"  "v")
+			      (match_operand:VIT_VXE3 1 "register_operand"  "v")))]
   "TARGET_VX"
-  "vc<VICMP_HW_OP:insn_cmp_op><VI:bhfgq>\t%v2,%v0,%v1"
+  "vc<VICMP_HW_OP:insn_cmp_op><VIT_VXE3:bhfgq>\t%v2,%v0,%v1"
   [(set_attr "op_type" "VRR")])
 
 (define_insn_and_split "*vec_cmpeq<mode><mode>_nocc_emu"
   [(set (match_operand:VI_HW_T             0 "register_operand" "=v")
 	(eq:VI_HW_T (match_operand:VI_HW_T 1 "register_operand"  "v")
 		    (match_operand:VI_HW_T 2 "register_operand"  "v")))]
-  "TARGET_VX"
+  "TARGET_VX && !TARGET_VXE3"
   "#"
   "&& can_create_pseudo_p ()"
   [(set (match_dup 3)
@@ -1971,7 +2275,7 @@
   [(set (match_operand:VI_HW_T             0 "register_operand" "=v")
 	(gt:VI_HW_T (match_operand:VI_HW_T 1 "register_operand"  "v")
 		    (match_operand:VI_HW_T 2 "register_operand"  "v")))]
-  "TARGET_VX"
+  "TARGET_VX && !TARGET_VXE3"
   "#"
   "&& can_create_pseudo_p ()"
   [(set (match_dup 3)
@@ -1990,7 +2294,7 @@
 	(vec_duplicate:V2DI
 	 (vec_select:DI
 	  (match_dup 4)
-	  (parallel [(const_int 1)]))))
+	  (parallel [(const_int 0)]))))
    (set (match_dup 0)
 	(subreg:<MODE> (match_dup 4) 0))]
 {
@@ -2022,7 +2326,7 @@
 	(vec_duplicate:V2DI
 	 (vec_select:DI
 	  (match_dup 4)
-	  (parallel [(const_int 1)]))))
+	  (parallel [(const_int 0)]))))
    (set (match_dup 0)
 	(subreg:<MODE> (match_dup 4) 0))]
 {
@@ -2033,6 +2337,28 @@
   operands[5] = gen_reg_rtx (V2DImode);
 })
 
+(define_insn "*vec_cmpv2di_lane0_<cc_tolower>"
+  [(set (reg:CC_SUZ CC_REGNUM)
+	(compare:CC_SUZ
+	  (vec_select:DI
+	    (match_operand:V2DI 0 "register_operand" "v")
+	    (parallel [(const_int 0)]))
+	  (vec_select:DI
+	    (match_operand:V2DI 1 "register_operand" "v")
+	    (parallel [(const_int 0)]))))]
+  "TARGET_VX"
+  "vec<l>g\t%v0,%v1"
+  [(set_attr "op_type" "VRR")])
+
+(define_insn "*vec_cmpti_<cc_tolower>"
+  [(set (reg:CC_SUZ CC_REGNUM)
+	(compare:CC_SUZ
+	  (match_operand:TI 0 "register_operand" "v")
+	  (match_operand:TI 1 "register_operand" "v")))]
+  "TARGET_VXE3"
+  "vec<l>q\t%v0,%v1"
+  [(set_attr "op_type" "VRR")])
+
 
 ;;
 ;; Floating point compares
@@ -2368,7 +2694,7 @@
 })
 
 ; op0 = op3 == 0 ? op1 : op2
-(define_insn "*vec_sel0<mode>"
+(define_insn "vec_sel0<mode>"
   [(set (match_operand:VT 0 "register_operand" "=v")
 	(if_then_else:VT
 	 (eq (match_operand:<TOINTVEC> 3 "register_operand" "v")
@@ -2830,9 +3156,8 @@
 ; There is no instruction for rounding an extended BFP operand in a VR into
 ; a signed integer, therefore copy it into a FPR pair first.
 (define_expand "fix_trunctf<mode>2_vr"
-  [(set (subreg:DF (match_dup 2) 0)
-	(subreg:DF (match_operand:TF 1 "register_operand" "") 0))
-   (set (subreg:DF (match_dup 2) 8) (subreg:DF (match_dup 1) 8))
+  [(set (match_dup 2)
+	(unspec:FPRX2 [(match_operand:TF 1 "register_operand")] UNSPEC_TF_TO_FPRX2))
    (parallel [(set (match_operand:GPR 0 "register_operand" "")
 		   (fix:GPR (match_dup 2)))
 	      (unspec:GPR [(const_int BFP_RND_TOWARD_0)] UNSPEC_ROUND)
@@ -2863,9 +3188,8 @@
 ; There is no instruction for rounding an extended BFP operand in a VR into
 ; an unsigned integer, therefore copy it into a FPR pair first.
 (define_expand "fixuns_trunctf<mode>2_vr"
-  [(set (subreg:DF (match_dup 2) 0)
-	(subreg:DF (match_operand:TF 1 "register_operand" "") 0))
-   (set (subreg:DF (match_dup 2) 8) (subreg:DF (match_dup 1) 8))
+  [(set (match_dup 2)
+	(unspec:FPRX2 [(match_operand:TF 1 "register_operand")] UNSPEC_TF_TO_FPRX2))
    (parallel [(set (match_operand:GPR 0 "register_operand" "")
 		   (unsigned_fix:GPR (match_dup 2)))
 	      (unspec:GPR [(const_int BFP_RND_TOWARD_0)] UNSPEC_ROUND)
@@ -3064,53 +3388,27 @@
 
 ; test data class
 
-(define_expand "signbittf2_vr"
-  [(parallel
-    [(set (reg:CCRAW CC_REGNUM)
-	  (unspec:CCRAW [(match_operand:TF 1 "register_operand" "")
-			 (match_dup        2)]
-			UNSPEC_VEC_VFTCICC))
-     (clobber (scratch:V1TI))])
-   (set (match_operand:SI                  0 "register_operand" "")
-	(const_int 0))
-   (set (match_dup                         0)
-	(if_then_else:SI (eq (reg:CCRAW CC_REGNUM) (const_int 8))
-			 (const_int 1)
-			 (match_dup        0)))]
-  "TARGET_VXE"
-{
-  operands[2] = GEN_INT (S390_TDC_SIGNBIT_SET);
-})
-
-(define_expand "signbittf2"
-  [(match_operand:SI 0 "register_operand" "")
-   (match_operand:TF 1 "register_operand" "")]
-  "HAVE_TF (signbittf2)"
-  { EXPAND_TF (signbittf2, 2); })
-
-(define_expand "isinftf2_vr"
+(define_expand "<tdc_insn>tf2_vr"
   [(parallel
     [(set (reg:CCRAW CC_REGNUM)
 	  (unspec:CCRAW [(match_operand:TF 1 "register_operand" "")
-			 (match_dup        2)]
+			 (const_int TDC_CLASS_BFP)]
 			UNSPEC_VEC_VFTCICC))
-     (clobber (scratch:V1TI))])
+     (clobber (scratch:TI))])
    (set (match_operand:SI                  0 "register_operand" "")
 	(const_int 0))
    (set (match_dup                         0)
 	(if_then_else:SI (eq (reg:CCRAW CC_REGNUM) (const_int 8))
 			 (const_int 1)
 			 (match_dup        0)))]
-  "TARGET_VXE"
-{
-  operands[2] = GEN_INT (S390_TDC_INFINITY);
-})
+  "TARGET_VXE")
 
-(define_expand "isinftf2"
+(define_expand "<tdc_insn>tf2"
   [(match_operand:SI 0 "register_operand" "")
-   (match_operand:TF 1 "register_operand" "")]
-  "HAVE_TF (isinftf2)"
-  { EXPAND_TF (isinftf2, 2); })
+   (match_operand:TF 1 "register_operand" "")
+   (const_int TDC_CLASS_BFP)]
+  "HAVE_TF (<tdc_insn>tf2)"
+  { EXPAND_TF (<tdc_insn>tf2, 2); })
 
 ;
 ; Vector byte swap patterns
@@ -3237,6 +3535,42 @@
   DONE;
 });;
 
+(define_code_iterator LOGIC_OP1 [and ior xor])
+(define_code_iterator LOGIC_OP2 [and ior xor])
+(define_code_attr logic_op [(and "&") (ior "|") (xor "^")])
+(define_code_attr logic_op_stringify [(and "and") (ior "ior") (xor "xor")])
+
+(define_insn_and_split "*veval<mode>_<LOGIC_OP1:logic_op_stringify><LOGIC_OP2:logic_op_stringify>"
+  [(set (match_operand:VIT   0 "register_operand" "=v")
+        (LOGIC_OP1:VIT
+	 (LOGIC_OP2:VIT
+	  (match_operand:VIT 1 "register_operand"  "v")
+	  (match_operand:VIT 2 "register_operand"  "v"))
+	 (match_operand:VIT  3 "register_operand"  "v")))]
+  "TARGET_VXE3"
+  "#"
+  "&& true"
+  [(set (match_dup 0)
+	(unspec:VIT [(match_dup 3)
+		     (match_dup 1)
+		     (match_dup 2)
+		     (match_dup 4)]
+		    UNSPEC_VEC_VEVAL))]
+{
+  int op = 15 <LOGIC_OP1:logic_op> (23 <LOGIC_OP2:logic_op> 113);
+  operands[4] = GEN_INT (op);
+})
+
+(define_insn "veval<mode>"
+  [(set (match_operand:VIT              0 "register_operand" "=v")
+	(unspec:VIT [(match_operand:VIT 1 "register_operand"  "v")
+		     (match_operand:VIT 2 "register_operand"  "v")
+		     (match_operand:VIT 3 "register_operand"  "v")
+		     (match_operand:QI  4 "const_int_operand")]
+		    UNSPEC_VEC_VEVAL))]
+  "TARGET_VXE3"
+  "veval\t%v0,%v1,%v2,%v3,%b4"
+  [(set_attr "op_type" "VRI")])
 
 ; reduc_smin
 ; reduc_smax
@@ -3249,3 +3583,47 @@
 ; vec_unpacks_float_lo
 ; vec_unpacku_float_hi
 ; vec_unpacku_float_lo
+
+(define_expand "avg<mode>3_ceil"
+  [(set (match_operand:VIT_HW_VXE3_T                        0 "register_operand")
+	(unspec:VIT_HW_VXE3_T [(match_operand:VIT_HW_VXE3_T 1 "register_operand")
+			       (match_operand:VIT_HW_VXE3_T 2 "register_operand")]
+			      UNSPEC_VEC_AVG))]
+  "TARGET_VX")
+
+(define_expand "uavg<mode>3_ceil"
+  [(set (match_operand:VIT_HW_VXE3_T                        0 "register_operand")
+	(unspec:VIT_HW_VXE3_T [(match_operand:VIT_HW_VXE3_T 1 "register_operand")
+			       (match_operand:VIT_HW_VXE3_T 2 "register_operand")]
+			      UNSPEC_VEC_AVGU))]
+  "TARGET_VX")
+
+(define_expand "smul<mode>3_highpart"
+  [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand")
+	(smul_highpart:VIT_HW_VXE3_DT (match_operand:VIT_HW_VXE3_DT 1 "register_operand")
+				      (match_operand:VIT_HW_VXE3_DT 2 "register_operand")))]
+  "TARGET_VX")
+
+(define_expand "umul<mode>3_highpart"
+  [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand")
+	(umul_highpart:VIT_HW_VXE3_DT (match_operand:VIT_HW_VXE3_DT 1 "register_operand")
+				      (match_operand:VIT_HW_VXE3_DT 2 "register_operand")))]
+  "TARGET_VX")
+
+; fmax
+(define_expand "fmax<mode>3"
+  [(set (match_operand:VFT_BFP                  0 "register_operand")
+	(unspec:VFT_BFP [(match_operand:VFT_BFP 1 "register_operand")
+	       (match_operand:VFT_BFP           2 "register_operand")
+	       (const_int 4)]
+	      UNSPEC_FMAX))]
+  "TARGET_VXE")
+
+; fmin
+(define_expand "fmin<mode>3"
+  [(set (match_operand:VFT_BFP                  0 "register_operand")
+	(unspec:VFT_BFP [(match_operand:VFT_BFP 1 "register_operand")
+	       (match_operand:VFT_BFP           2 "register_operand")
+	       (const_int 4)]
+	      UNSPEC_FMIN))]
+  "TARGET_VXE")
diff --git a/gcc/config/s390/vx-builtins.md b/gcc/config/s390/vx-builtins.md
index bb271c0..9b89b13 100644
--- a/gcc/config/s390/vx-builtins.md
+++ b/gcc/config/s390/vx-builtins.md
@@ -1,5 +1,5 @@
 ;;- Instruction patterns for the System z vector facility builtins.
-;;  Copyright (C) 2015-2024 Free Software Foundation, Inc.
+;;  Copyright (C) 2015-2025 Free Software Foundation, Inc.
 ;;  Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com)
 
 ;; This file is part of GCC.
@@ -520,23 +520,23 @@
 ; swapped in s390-c.cc when we get here.
 
 (define_insn "vsel<mode>"
-  [(set (match_operand:V_HW_FT               0 "register_operand" "=v")
-	(ior:V_HW_FT
-	 (and:V_HW_FT (match_operand:V_HW_FT 1 "register_operand"  "v")
-		      (match_operand:V_HW_FT 3 "register_operand"  "v"))
-	 (and:V_HW_FT (not:V_HW_FT (match_dup 3))
-		      (match_operand:V_HW_FT 2 "register_operand"  "v"))))]
+  [(set (match_operand:V_HW1_FT                0 "register_operand" "=v")
+	(ior:V_HW1_FT
+	 (and:V_HW1_FT (match_operand:V_HW1_FT 1 "register_operand"  "v")
+		      (match_operand:V_HW1_FT  3 "register_operand"  "v"))
+	 (and:V_HW1_FT (not:V_HW1_FT (match_dup 3))
+		      (match_operand:V_HW1_FT  2 "register_operand"  "v"))))]
   "TARGET_VX"
   "vsel\t%v0,%1,%2,%3"
   [(set_attr "op_type" "VRR")])
 
 (define_insn "*vsel<mode>_swapped"
-  [(set (match_operand:V_HW_FT               0 "register_operand" "=v")
-	(ior:V_HW_FT
-	 (and:V_HW_FT (not:V_HW_FT (match_operand:V_HW_FT 3 "register_operand"  "v"))
-		      (match_operand:V_HW_FT 1 "register_operand"  "v"))
-	 (and:V_HW_FT (match_dup 3)
-		      (match_operand:V_HW_FT 2 "register_operand"  "v"))))]
+  [(set (match_operand:V_HW1_FT               0 "register_operand" "=v")
+	(ior:V_HW1_FT
+	 (and:V_HW1_FT (not:V_HW1_FT (match_operand:V_HW1_FT 3 "register_operand"  "v"))
+		      (match_operand:V_HW1_FT 1 "register_operand"  "v"))
+	 (and:V_HW1_FT (match_dup 3)
+		      (match_operand:V_HW1_FT 2 "register_operand"  "v"))))]
   "TARGET_VX"
   "vsel\t%v0,%2,%1,%3"
   [(set_attr "op_type" "VRR")])
@@ -612,19 +612,19 @@
 
 ; Vector unpack high
 
-; vuphb, vuphh, vuphf
+; vuphb, vuphh, vuphf, vuphg
 (define_insn "vec_unpackh<mode>"
-  [(set (match_operand:<vec_double>                    0 "register_operand" "=v")
-	(unspec:<vec_double> [(match_operand:VI_HW_QHS 1 "register_operand"  "v")]
+  [(set (match_operand:<vec_double>                     0 "register_operand" "=v")
+	(unspec:<vec_double> [(match_operand:VI_HW_VXE3 1 "register_operand"  "v")]
 			     UNSPEC_VEC_UNPACKH))]
   "TARGET_VX"
   "vuph<bhfgq>\t%v0,%v1"
   [(set_attr "op_type" "VRR")])
 
-; vuplhb, vuplhh, vuplhf
+; vuplhb, vuplhh, vuplhf, vuplhg
 (define_insn "vec_unpackh_l<mode>"
-  [(set (match_operand:<vec_double>                    0 "register_operand" "=v")
-	(unspec:<vec_double> [(match_operand:VI_HW_QHS 1 "register_operand"  "v")]
+  [(set (match_operand:<vec_double>                     0 "register_operand" "=v")
+	(unspec:<vec_double> [(match_operand:VI_HW_VXE3 1 "register_operand"  "v")]
 			     UNSPEC_VEC_UNPACKH_L))]
   "TARGET_VX"
   "vuplh<bhfgq>\t%v0,%v1"
@@ -633,19 +633,19 @@
 
 ; Vector unpack low
 
-; vuplb, vuplhw, vuplf
+; vuplb, vuplhw, vuplf, vuplg
 (define_insn "vec_unpackl<mode>"
-  [(set (match_operand:<vec_double>                    0 "register_operand" "=v")
-	(unspec:<vec_double> [(match_operand:VI_HW_QHS 1 "register_operand"  "v")]
+  [(set (match_operand:<vec_double>                     0 "register_operand" "=v")
+	(unspec:<vec_double> [(match_operand:VI_HW_VXE3 1 "register_operand"  "v")]
 			     UNSPEC_VEC_UNPACKL))]
   "TARGET_VX"
   "vupl<bhfgq><w>\t%v0,%v1"
   [(set_attr "op_type" "VRR")])
 
-; vupllb, vupllh, vupllf
+; vupllb, vupllh, vupllf, vupllg
 (define_insn "vec_unpackl_l<mode>"
-  [(set (match_operand:<vec_double>                    0 "register_operand" "=v")
-	(unspec:<vec_double> [(match_operand:VI_HW_QHS 1 "register_operand"  "v")]
+  [(set (match_operand:<vec_double>                     0 "register_operand" "=v")
+	(unspec:<vec_double> [(match_operand:VI_HW_VXE3 1 "register_operand"  "v")]
 			     UNSPEC_VEC_UNPACKL_L))]
   "TARGET_VX"
   "vupll<bhfgq>\t%v0,%v1"
@@ -708,24 +708,24 @@
 
 ; Vector average
 
-; vavgb, vavgh, vavgf, vavgg
+; vavgb, vavgh, vavgf, vavgg, vavgq
 (define_insn "vec_avg<mode>"
-  [(set (match_operand:VI_HW                0 "register_operand" "=v")
-	(unspec:VI_HW [(match_operand:VI_HW 1 "register_operand"  "v")
-		       (match_operand:VI_HW 2 "register_operand"  "v")]
-		      UNSPEC_VEC_AVG))]
+  [(set (match_operand:VIT_HW_VXE3_T                        0 "register_operand" "=v")
+	(unspec:VIT_HW_VXE3_T [(match_operand:VIT_HW_VXE3_T 1 "register_operand"  "v")
+			       (match_operand:VIT_HW_VXE3_T 2 "register_operand"  "v")]
+			      UNSPEC_VEC_AVG))]
   "TARGET_VX"
   "vavg<bhfgq>\t%v0,%v1,%v2"
   [(set_attr "op_type" "VRR")])
 
 ; Vector average logical
 
-; vavglb, vavglh, vavglf, vavglg
+; vavglb, vavglh, vavglf, vavglg, vavglq
 (define_insn "vec_avgu<mode>"
-  [(set (match_operand:VI_HW                0 "register_operand" "=v")
-	(unspec:VI_HW [(match_operand:VI_HW 1 "register_operand"  "v")
-		       (match_operand:VI_HW 2 "register_operand"  "v")]
-		      UNSPEC_VEC_AVGU))]
+  [(set (match_operand:VIT_HW_VXE3_T                        0 "register_operand" "=v")
+	(unspec:VIT_HW_VXE3_T [(match_operand:VIT_HW_VXE3_T 1 "register_operand"  "v")
+			       (match_operand:VIT_HW_VXE3_T 2 "register_operand"  "v")]
+			      UNSPEC_VEC_AVGU))]
   "TARGET_VX"
   "vavgl<bhfgq>\t%v0,%v1,%v2"
   [(set_attr "op_type" "VRR")])
@@ -748,10 +748,10 @@
 
 ; vec_all/any int compares
 
-(define_expand "vec_all_<intcmpcc:code><VI_HW:mode>"
-  [(match_operand:SI                0 "register_operand" "")
-   (intcmpcc (match_operand:VI_HW 1 "register_operand" "")
-	     (match_operand:VI_HW 2 "register_operand" ""))]
+(define_expand "vec_all_<intcmpcc:code><VIT_HW_VXE3_T:mode>"
+  [(match_operand:SI                      0 "register_operand" "")
+   (intcmpcc (match_operand:VIT_HW_VXE3_T 1 "register_operand" "")
+	     (match_operand:VIT_HW_VXE3_T 2 "register_operand" ""))]
   "TARGET_VX"
 {
   s390_expand_vec_compare_cc (operands[0],
@@ -762,10 +762,10 @@
   DONE;
 })
 
-(define_expand "vec_any_<intcmpcc:code><VI_HW:mode>"
-  [(match_operand:SI                0 "register_operand" "")
-   (intcmpcc (match_operand:VI_HW 1 "register_operand" "")
-	     (match_operand:VI_HW 2 "register_operand" ""))]
+(define_expand "vec_any_<intcmpcc:code><VIT_HW_VXE3_T:mode>"
+  [(match_operand:SI                      0 "register_operand" "")
+   (intcmpcc (match_operand:VIT_HW_VXE3_T 1 "register_operand" "")
+	     (match_operand:VIT_HW_VXE3_T 2 "register_operand" ""))]
   "TARGET_VX"
 {
   s390_expand_vec_compare_cc (operands[0],
@@ -809,10 +809,10 @@
 
 ; Compare without generating CC
 
-(define_expand "vec_cmp<intcmp:code><VI_HW:mode>"
-  [(set (match_operand:VI_HW               0 "register_operand" "=v")
-	(intcmp:VI_HW (match_operand:VI_HW 1 "register_operand"  "v")
-		      (match_operand:VI_HW 2 "register_operand"  "v")))]
+(define_expand "vec_cmp<intcmp:code><VIT_HW_VXE3_T:mode>"
+  [(set (match_operand:VIT_HW_VXE3_T                       0 "register_operand" "=v")
+	(intcmp:VIT_HW_VXE3_T (match_operand:VIT_HW_VXE3_T 1 "register_operand"  "v")
+			      (match_operand:VIT_HW_VXE3_T 2 "register_operand"  "v")))]
   "TARGET_VX"
 {
   s390_expand_vec_compare (operands[0], <intcmp:CODE>, operands[1], operands[2]);
@@ -872,10 +872,10 @@
   [(set_attr "op_type" "VRR")])
 
 (define_insn "vec_gfmsum_accum_128"
-  [(set (match_operand:V16QI 0 "register_operand" "=v")
-	(unspec:V16QI [(match_operand:V2DI 1 "register_operand" "v")
+  [(set (match_operand:TI 0 "register_operand" "=v")
+	(unspec:TI [(match_operand:V2DI 1 "register_operand" "v")
 		       (match_operand:V2DI 2 "register_operand" "v")
-		       (match_operand:V16QI 3 "register_operand" "v")]
+		       (match_operand:TI 3 "register_operand" "v")]
 		      UNSPEC_VEC_GFMSUM_ACCUM_128))]
   "TARGET_VX"
   "vgfmag\t%v0,%v1,%v2,%v3"
@@ -892,37 +892,37 @@
 ; Vector multiply and add high
 
 ; vec_mladd -> vec_vmal
-; vmalb, vmalh, vmalf, vmalg
+; vmalb, vmalh, vmalf, vmalg, vmalq
 (define_insn "vec_vmal<mode>"
-  [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v")
-	(unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "v")
-			   (match_operand:VI_HW_QHS 2 "register_operand" "v")
-			   (match_operand:VI_HW_QHS 3 "register_operand" "v")]
-			  UNSPEC_VEC_VMAL))]
+  [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand" "=v")
+	(unspec:VIT_HW_VXE3_DT [(match_operand:VIT_HW_VXE3_DT 1 "register_operand" "v")
+				(match_operand:VIT_HW_VXE3_DT 2 "register_operand" "v")
+				(match_operand:VIT_HW_VXE3_DT 3 "register_operand" "v")]
+			       UNSPEC_VEC_VMAL))]
   "TARGET_VX"
   "vmal<bhfgq><w>\t%v0,%v1,%v2,%v3"
   [(set_attr "op_type" "VRR")])
 
 ; vec_mhadd -> vec_vmah/vec_vmalh
 
-; vmahb; vmahh, vmahf, vmahg
+; vmahb; vmahh, vmahf, vmahg, vmahq
 (define_insn "vec_vmah<mode>"
-  [(set (match_operand:VI_HW_QHS 0 "register_operand" "=v")
-	(unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "v")
-			   (match_operand:VI_HW_QHS 2 "register_operand" "v")
-			   (match_operand:VI_HW_QHS 3 "register_operand" "v")]
-			  UNSPEC_VEC_VMAH))]
+  [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand" "=v")
+	(unspec:VIT_HW_VXE3_DT [(match_operand:VIT_HW_VXE3_DT 1 "register_operand" "v")
+				(match_operand:VIT_HW_VXE3_DT 2 "register_operand" "v")
+				(match_operand:VIT_HW_VXE3_DT 3 "register_operand" "v")]
+			       UNSPEC_VEC_VMAH))]
   "TARGET_VX"
   "vmah<bhfgq>\t%v0,%v1,%v2,%v3"
   [(set_attr "op_type" "VRR")])
 
-; vmalhb; vmalhh, vmalhf, vmalhg
+; vmalhb; vmalhh, vmalhf, vmalhg, vmalhq
 (define_insn "vec_vmalh<mode>"
-  [(set (match_operand:VI_HW_QHS 0 "register_operand"                   "=v")
-	(unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "v")
-			   (match_operand:VI_HW_QHS 2 "register_operand" "v")
-			   (match_operand:VI_HW_QHS 3 "register_operand" "v")]
-			  UNSPEC_VEC_VMALH))]
+  [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand"                        "=v")
+	(unspec:VIT_HW_VXE3_DT [(match_operand:VIT_HW_VXE3_DT 1 "register_operand" "v")
+				(match_operand:VIT_HW_VXE3_DT 2 "register_operand" "v")
+				(match_operand:VIT_HW_VXE3_DT 3 "register_operand" "v")]
+			       UNSPEC_VEC_VMALH))]
   "TARGET_VX"
   "vmalh<bhfgq>\t%v0,%v1,%v2,%v3"
   [(set_attr "op_type" "VRR")])
@@ -932,8 +932,8 @@
 ; vmaeb; vmaeh, vmaef, vmaeg
 (define_insn "vec_vmae<mode>"
   [(set (match_operand:<vec_double> 0 "register_operand"                      "=v")
-	(unspec:<vec_double> [(match_operand:VI_HW_QHS 1 "register_operand"    "v")
-			      (match_operand:VI_HW_QHS 2 "register_operand"    "v")
+	(unspec:<vec_double> [(match_operand:VI_HW_VXE3   1 "register_operand" "v")
+			      (match_operand:VI_HW_VXE3   2 "register_operand" "v")
 			      (match_operand:<vec_double> 3 "register_operand" "v")]
 			     UNSPEC_VEC_VMAE))]
   "TARGET_VX"
@@ -943,8 +943,8 @@
 ; vmaleb; vmaleh, vmalef, vmaleg
 (define_insn "vec_vmale<mode>"
   [(set (match_operand:<vec_double> 0 "register_operand"                      "=v")
-	(unspec:<vec_double> [(match_operand:VI_HW_QHS 1 "register_operand"    "v")
-			      (match_operand:VI_HW_QHS 2 "register_operand"    "v")
+	(unspec:<vec_double> [(match_operand:VI_HW_VXE3   1 "register_operand" "v")
+			      (match_operand:VI_HW_VXE3   2 "register_operand" "v")
 			      (match_operand:<vec_double> 3 "register_operand" "v")]
 			     UNSPEC_VEC_VMALE))]
   "TARGET_VX"
@@ -956,8 +956,8 @@
 ; vmaob; vmaoh, vmaof, vmaog
 (define_insn "vec_vmao<mode>"
   [(set (match_operand:<vec_double> 0 "register_operand"                      "=v")
-	(unspec:<vec_double> [(match_operand:VI_HW_QHS 1 "register_operand"    "v")
-			      (match_operand:VI_HW_QHS 2 "register_operand"    "v")
+	(unspec:<vec_double> [(match_operand:VI_HW_VXE3   1 "register_operand" "v")
+			      (match_operand:VI_HW_VXE3   2 "register_operand" "v")
 			      (match_operand:<vec_double> 3 "register_operand" "v")]
 			     UNSPEC_VEC_VMAO))]
   "TARGET_VX"
@@ -967,8 +967,8 @@
 ; vmalob; vmaloh, vmalof, vmalog
 (define_insn "vec_vmalo<mode>"
   [(set (match_operand:<vec_double> 0 "register_operand"                      "=v")
-	(unspec:<vec_double> [(match_operand:VI_HW_QHS 1 "register_operand"    "v")
-			      (match_operand:VI_HW_QHS 2 "register_operand"    "v")
+	(unspec:<vec_double> [(match_operand:VI_HW_VXE3   1 "register_operand" "v")
+			      (match_operand:VI_HW_VXE3   2 "register_operand" "v")
 			      (match_operand:<vec_double> 3 "register_operand" "v")]
 			     UNSPEC_VEC_VMALO))]
   "TARGET_VX"
@@ -980,22 +980,20 @@
 
 ; vec_mulh -> vec_smulh/vec_umulh
 
-; vmhb, vmhh, vmhf
+; vmhb, vmhh, vmhf, vmhg, vmhq
 (define_insn "vec_smulh<mode>"
-  [(set (match_operand:VI_HW_QHS 0 "register_operand"                   "=v")
-	(unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "v")
-			   (match_operand:VI_HW_QHS 2 "register_operand" "v")]
-			  UNSPEC_VEC_SMULT_HI))]
+  [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand"                              "=v")
+	(smul_highpart:VIT_HW_VXE3_DT (match_operand:VIT_HW_VXE3_DT 1 "register_operand" "v")
+				      (match_operand:VIT_HW_VXE3_DT 2 "register_operand" "v")))]
   "TARGET_VX"
   "vmh<bhfgq>\t%v0,%v1,%v2"
   [(set_attr "op_type" "VRR")])
 
-; vmlhb, vmlhh, vmlhf
+; vmlhb, vmlhh, vmlhf, vmlhg, vmlhq
 (define_insn "vec_umulh<mode>"
-  [(set (match_operand:VI_HW_QHS 0 "register_operand"                   "=v")
-	(unspec:VI_HW_QHS [(match_operand:VI_HW_QHS 1 "register_operand" "v")
-			   (match_operand:VI_HW_QHS 2 "register_operand" "v")]
-			  UNSPEC_VEC_UMULT_HI))]
+  [(set (match_operand:VIT_HW_VXE3_DT 0 "register_operand"                              "=v")
+	(umul_highpart:VIT_HW_VXE3_DT (match_operand:VIT_HW_VXE3_DT 1 "register_operand" "v")
+				      (match_operand:VIT_HW_VXE3_DT 2 "register_operand" "v")))]
   "TARGET_VX"
   "vmlh<bhfgq>\t%v0,%v1,%v2"
   [(set_attr "op_type" "VRR")])
@@ -1041,9 +1039,9 @@
 
 ; Vector shift left
 
-(define_insn "vec_sll<VI_HW:mode><VI_HW_QHS:mode>"
-  [(set (match_operand:VI_HW                    0 "register_operand" "=v")
-	(unspec:VI_HW [(match_operand:VI_HW     1 "register_operand"  "v")
+(define_insn "@vec_sll<V_HW3:mode><VI_HW_QHS:mode>"
+  [(set (match_operand:V_HW3                    0 "register_operand" "=v")
+	(unspec:V_HW3 [(match_operand:V_HW3     1 "register_operand"  "v")
 		       (match_operand:VI_HW_QHS 2 "register_operand"  "v")]
 		      UNSPEC_VEC_SLL))]
   "TARGET_VX"
@@ -1054,15 +1052,12 @@
 ; Vector shift left by byte
 
 ; Pattern definition in vector.md, see vec_vslb
-(define_expand "vec_slb<mode>"
-  [(set (match_operand:V_HW 0 "register_operand"                     "")
-	(unspec:V_HW [(match_operand:V_HW 1 "register_operand"       "")
-		      (match_operand:<TOINTVEC> 2 "register_operand" "")]
+(define_expand "@vec_slb<mode>"
+  [(set (match_operand:V_HW3 0 "register_operand"                     "")
+	(unspec:V_HW3 [(match_operand:V_HW3 1 "register_operand"       "")
+		      (match_operand:V16QI 2 "register_operand" "")]
 		     UNSPEC_VEC_SLB))]
-  "TARGET_VX"
-{
-  PUT_MODE (operands[2], V16QImode);
-})
+  "TARGET_VX")
 
 ; Vector shift left double by byte
 
@@ -1113,9 +1108,9 @@
 
 ; Vector shift right arithmetic
 
-(define_insn "vec_sral<VI_HW:mode><VI_HW_QHS:mode>"
-  [(set (match_operand:VI_HW                    0 "register_operand" "=v")
-	(unspec:VI_HW [(match_operand:VI_HW     1 "register_operand"  "v")
+(define_insn "@vec_sral<V_HW3:mode><VI_HW_QHS:mode>"
+  [(set (match_operand:V_HW3                    0 "register_operand" "=v")
+	(unspec:V_HW3 [(match_operand:V_HW3     1 "register_operand"  "v")
 		       (match_operand:VI_HW_QHS 2 "register_operand"  "v")]
 		      UNSPEC_VEC_SRAL))]
   "TARGET_VX"
@@ -1125,10 +1120,10 @@
 
 ; Vector shift right arithmetic by byte
 
-(define_insn "vec_srab<mode>"
-  [(set (match_operand:V_HW 0 "register_operand"                    "=v")
-	(unspec:V_HW [(match_operand:V_HW 1 "register_operand"       "v")
-		      (match_operand:<TOINTVEC> 2 "register_operand" "v")]
+(define_insn "@vec_srab<mode>"
+  [(set (match_operand:V_HW3 0 "register_operand"               "=v")
+	(unspec:V_HW3 [(match_operand:V_HW3 1 "register_operand" "v")
+		       (match_operand:V16QI 2 "register_operand" "v")]
 		     UNSPEC_VEC_SRAB))]
   "TARGET_VX"
   "vsrab\t%v0,%v1,%v2"
@@ -1137,9 +1132,9 @@
 
 ; Vector shift right logical
 
-(define_insn "vec_srl<VI_HW:mode><VI_HW_QHS:mode>"
-  [(set (match_operand:VI_HW                    0 "register_operand" "=v")
-	(unspec:VI_HW [(match_operand:VI_HW     1 "register_operand"  "v")
+(define_insn "@vec_srl<V_HW3:mode><VI_HW_QHS:mode>"
+  [(set (match_operand:V_HW3                    0 "register_operand" "=v")
+	(unspec:V_HW3 [(match_operand:V_HW3     1 "register_operand"  "v")
 		       (match_operand:VI_HW_QHS 2 "register_operand"  "v")]
 		      UNSPEC_VEC_SRL))]
   "TARGET_VX"
@@ -1150,15 +1145,12 @@
 ; Vector shift right logical by byte
 
 ; Pattern definition in vector.md, see vec_vsrb
-(define_expand "vec_srb<mode>"
-  [(set (match_operand:V_HW 0 "register_operand"                     "")
-	(unspec:V_HW [(match_operand:V_HW 1 "register_operand"       "")
-		      (match_operand:<TOINTVEC> 2 "register_operand" "")]
-		     UNSPEC_VEC_SRLB))]
-  "TARGET_VX"
-{
-  PUT_MODE (operands[2], V16QImode);
-})
+(define_expand "@vec_srb<mode>"
+  [(set (match_operand:V_HW3 0 "register_operand"                     "")
+	(unspec:V_HW3 [(match_operand:V_HW3 1 "register_operand"      "")
+		       (match_operand:V16QI 2 "register_operand" "")]
+		      UNSPEC_VEC_SRLB))]
+  "TARGET_VX")
 
 ; Vector subtract
 
@@ -1979,13 +1971,13 @@
 ; All comparisons which produce a CC need fully populated (VI_HW)
 ; vector arguments.  Otherwise the any/all CCs would be just bogus.
 
-(define_insn "*vec_cmp<VICMP:insn_cmp><VI_HW:mode>_cconly"
+(define_insn "*vec_cmp<VICMP:insn_cmp><VIT_HW_VXE3_T:mode>_cconly"
   [(set (reg:VICMP CC_REGNUM)
-	(compare:VICMP (match_operand:VI_HW 0 "register_operand" "v")
-		       (match_operand:VI_HW 1 "register_operand" "v")))
-   (clobber (match_scratch:VI_HW 2 "=v"))]
+	(compare:VICMP (match_operand:VIT_HW_VXE3_T 0 "register_operand" "v")
+		       (match_operand:VIT_HW_VXE3_T 1 "register_operand" "v")))
+   (clobber (match_scratch:VIT_HW_VXE3_T 2 "=v"))]
   "TARGET_VX"
-  "vc<VICMP:insn_cmp><VI_HW:bhfgq>s\t%v2,%v0,%v1"
+  "vc<VICMP:insn_cmp><VIT_HW_VXE3_T:bhfgq>s\t%v2,%v0,%v1"
   [(set_attr "op_type" "VRR")])
 
 ; FIXME: The following 2x3 definitions should be merged into 2 with
@@ -1993,68 +1985,68 @@
 ; operator (eq) depending on the mode CCVEQ (mode_iterator). Or the
 ; other way around - setting the mode depending on the code
 ; (code_iterator).
-(define_expand "vec_cmpeq<VI_HW:mode>_cc"
+(define_expand "vec_cmpeq<VIT_HW_VXE3_T:mode>_cc"
   [(parallel
     [(set (reg:CCVEQ CC_REGNUM)
-	(compare:CCVEQ (match_operand:VI_HW 1 "register_operand" "v")
-		       (match_operand:VI_HW 2 "register_operand" "v")))
-     (set (match_operand:VI_HW 0 "register_operand" "=v")
-	  (eq:VI_HW (match_dup 1) (match_dup 2)))])
+	(compare:CCVEQ (match_operand:VIT_HW_VXE3_T 1 "register_operand" "v")
+		       (match_operand:VIT_HW_VXE3_T 2 "register_operand" "v")))
+     (set (match_operand:VIT_HW_VXE3_T 0 "register_operand" "=v")
+	  (eq:VIT_HW_VXE3_T (match_dup 1) (match_dup 2)))])
    (set (match_operand:SI 3 "memory_operand" "")
 	(unspec:SI [(reg:CCVEQ CC_REGNUM)] UNSPEC_CC_TO_INT))]
   "TARGET_VX")
 
-(define_expand "vec_cmph<VI_HW:mode>_cc"
+(define_expand "vec_cmph<VIT_HW_VXE3_T:mode>_cc"
   [(parallel
     [(set (reg:CCVIH CC_REGNUM)
-	  (compare:CCVIH (match_operand:VI_HW 1 "register_operand" "v")
-			 (match_operand:VI_HW 2 "register_operand" "v")))
-     (set (match_operand:VI_HW 0 "register_operand" "=v")
-	  (gt:VI_HW (match_dup 1) (match_dup 2)))])
+	  (compare:CCVIH (match_operand:VIT_HW_VXE3_T 1 "register_operand" "v")
+			 (match_operand:VIT_HW_VXE3_T 2 "register_operand" "v")))
+     (set (match_operand:VIT_HW_VXE3_T 0 "register_operand" "=v")
+	  (gt:VIT_HW_VXE3_T (match_dup 1) (match_dup 2)))])
    (set (match_operand:SI 3 "memory_operand" "")
 	(unspec:SI [(reg:CCVIH CC_REGNUM)] UNSPEC_CC_TO_INT))]
   "TARGET_VX")
 
-(define_expand "vec_cmphl<VI_HW:mode>_cc"
+(define_expand "vec_cmphl<VIT_HW_VXE3_T:mode>_cc"
   [(parallel
     [(set (reg:CCVIHU CC_REGNUM)
-	  (compare:CCVIHU (match_operand:VI_HW 1 "register_operand" "v")
-			  (match_operand:VI_HW 2 "register_operand" "v")))
-     (set (match_operand:VI_HW 0 "register_operand" "=v")
-	  (gtu:VI_HW (match_dup 1) (match_dup 2)))])
+	  (compare:CCVIHU (match_operand:VIT_HW_VXE3_T 1 "register_operand" "v")
+			  (match_operand:VIT_HW_VXE3_T 2 "register_operand" "v")))
+     (set (match_operand:VIT_HW_VXE3_T 0 "register_operand" "=v")
+	  (gtu:VIT_HW_VXE3_T (match_dup 1) (match_dup 2)))])
    (set (match_operand:SI 3 "memory_operand" "")
 	(unspec:SI [(reg:CCVIHU CC_REGNUM)] UNSPEC_CC_TO_INT))]
   "TARGET_VX")
 
 
-(define_insn "*vec_cmpeq<VI_HW:mode>_cc"
+(define_insn "*vec_cmpeq<VIT_HW_VXE3_T:mode>_cc"
   [(set (reg:CCVEQ CC_REGNUM)
-	(compare:CCVEQ (match_operand:VI_HW 0 "register_operand"  "v")
-		       (match_operand:VI_HW 1 "register_operand"  "v")))
-   (set (match_operand:VI_HW                2 "register_operand" "=v")
-	(eq:VI_HW (match_dup 0) (match_dup 1)))]
+	(compare:CCVEQ (match_operand:VIT_HW_VXE3_T 0 "register_operand"  "v")
+		       (match_operand:VIT_HW_VXE3_T 1 "register_operand"  "v")))
+   (set (match_operand:VIT_HW_VXE3_T                2 "register_operand" "=v")
+	(eq:VIT_HW_VXE3_T (match_dup 0) (match_dup 1)))]
   "TARGET_VX"
-  "vceq<VI_HW:bhfgq>s\t%v2,%v0,%v1"
+  "vceq<VIT_HW_VXE3_T:bhfgq>s\t%v2,%v0,%v1"
   [(set_attr "op_type" "VRR")])
 
-(define_insn "*vec_cmph<VI_HW:mode>_cc"
+(define_insn "*vec_cmph<VIT_HW_VXE3_T:mode>_cc"
   [(set (reg:CCVIH CC_REGNUM)
-	(compare:CCVIH (match_operand:VI_HW 0 "register_operand"  "v")
-		       (match_operand:VI_HW 1 "register_operand"  "v")))
-   (set (match_operand:VI_HW               2 "register_operand" "=v")
-	(gt:VI_HW (match_dup 0) (match_dup 1)))]
+	(compare:CCVIH (match_operand:VIT_HW_VXE3_T 0 "register_operand"  "v")
+		       (match_operand:VIT_HW_VXE3_T 1 "register_operand"  "v")))
+   (set (match_operand:VIT_HW_VXE3_T                2 "register_operand" "=v")
+	(gt:VIT_HW_VXE3_T (match_dup 0) (match_dup 1)))]
   "TARGET_VX"
-  "vch<VI_HW:bhfgq>s\t%v2,%v0,%v1"
+  "vch<VIT_HW_VXE3_T:bhfgq>s\t%v2,%v0,%v1"
   [(set_attr "op_type" "VRR")])
 
-(define_insn "*vec_cmphl<VI_HW:mode>_cc"
+(define_insn "*vec_cmphl<VIT_HW_VXE3_T:mode>_cc"
   [(set (reg:CCVIHU CC_REGNUM)
-	(compare:CCVIHU (match_operand:VI_HW 0 "register_operand"  "v")
-			(match_operand:VI_HW 1 "register_operand"  "v")))
-   (set (match_operand:VI_HW                2 "register_operand" "=v")
-	(gtu:VI_HW (match_dup 0) (match_dup 1)))]
+	(compare:CCVIHU (match_operand:VIT_HW_VXE3_T 0 "register_operand"  "v")
+			(match_operand:VIT_HW_VXE3_T 1 "register_operand"  "v")))
+   (set (match_operand:VIT_HW_VXE3_T                 2 "register_operand" "=v")
+	(gtu:VIT_HW_VXE3_T (match_dup 0) (match_dup 1)))]
   "TARGET_VX"
-  "vchl<VI_HW:bhfgq>s\t%v2,%v0,%v1"
+  "vchl<VIT_HW_VXE3_T:bhfgq>s\t%v2,%v0,%v1"
   [(set_attr "op_type" "VRR")])
 
 ;;
@@ -2142,23 +2134,22 @@
   "<vw>fche<sdx>bs\t%v2,%v0,%v1"
   [(set_attr "op_type" "VRR")])
 
-
 (define_insn "vfmin<mode>"
-  [(set (match_operand:VF_HW                0 "register_operand"  "=v")
-	(unspec:VF_HW [(match_operand:VF_HW 1 "register_operand"   "v")
-		       (match_operand:VF_HW 2 "register_operand"   "v")
-		       (match_operand:QI    3 "const_mask_operand" "C")]
-		      UNSPEC_VEC_VFMIN))]
+  [(set (match_operand:VFT_BFP                  0 "register_operand"  "=v")
+	(unspec:VFT_BFP [(match_operand:VFT_BFP 1 "register_operand"   "v")
+		         (match_operand:VFT_BFP 2 "register_operand"   "v")
+		         (match_operand:QI      3 "const_mask_operand" "C")]
+		        UNSPEC_FMIN))]
   "TARGET_VXE"
   "<vw>fmin<sdx>b\t%v0,%v1,%v2,%b3"
   [(set_attr "op_type" "VRR")])
 
 (define_insn "vfmax<mode>"
-  [(set (match_operand:VF_HW                0 "register_operand"  "=v")
-	(unspec:VF_HW [(match_operand:VF_HW 1 "register_operand"   "v")
-		       (match_operand:VF_HW 2 "register_operand"   "v")
-		       (match_operand:QI    3 "const_mask_operand" "C")]
-		      UNSPEC_VEC_VFMAX))]
+  [(set (match_operand:VFT_BFP                  0 "register_operand"  "=v")
+	(unspec:VFT_BFP [(match_operand:VFT_BFP 1 "register_operand"   "v")
+		         (match_operand:VFT_BFP 2 "register_operand"   "v")
+		         (match_operand:QI      3 "const_mask_operand" "C")]
+		        UNSPEC_FMAX))]
   "TARGET_VXE"
   "<vw>fmax<sdx>b\t%v0,%v1,%v2,%b3"
   [(set_attr "op_type" "VRR")])
@@ -2269,3 +2260,32 @@
   "TARGET_NNPA"
   "vcnf\t%v0,%v1,%2,1"
   [(set_attr "op_type" "VRR")])
+
+; vblendb, vblendh, vblendf, vblendg, vblendq
+(define_insn "vblend<mode>"
+  [(set (match_operand:VT_HW 0 "register_operand" "=v")
+	(unspec:VT_HW [(match_operand:VT_HW 1 "register_operand" "v")
+		       (match_operand:VT_HW 2 "register_operand" "v")
+		       (match_operand:<VT_HW:TOINTVEC> 3 "register_operand" "v")]
+		      UNSPEC_VEC_VBLEND))]
+  "TARGET_VXE3"
+  "vblend<bhfgq>\t%v0,%v1,%v2,%v3"
+  [(set_attr "op_type" "VRR")])
+
+; vgemb
+(define_insn "vgemv16qi"
+  [(set (match_operand:V16QI 0 "register_operand" "=v")
+	(unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v")]
+		      UNSPEC_VEC_VGEM))]
+  "TARGET_VXE3"
+  "vgemb\t%v0,%v1"
+  [(set_attr "op_type" "VRR")])
+
+; vgemh, vgemf, vgemg, vgemq
+(define_insn "vgem<mode>"
+  [(set (match_operand:VI_HW_HSDT 0 "register_operand" "=v")
+	(unspec:VI_HW_HSDT [(match_operand:V16QI 1 "register_operand" "v")]
+			   UNSPEC_VEC_VGEM))]
+  "TARGET_VXE3"
+  "vgem<bhfgq>\t%v0,%v1"
+  [(set_attr "op_type" "VRR")])
diff --git a/gcc/config/sh/constraints.md b/gcc/config/sh/constraints.md
index ad9ce31..bfdd868 100644
--- a/gcc/config/sh/constraints.md
+++ b/gcc/config/sh/constraints.md
@@ -1,5 +1,5 @@
 ;; Constraint definitions for Renesas / SuperH SH.
-;; Copyright (C) 2007-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2007-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/sh/divcost-analysis b/gcc/config/sh/divcost-analysis
index ab2d5db..9546655 100644
--- a/gcc/config/sh/divcost-analysis
+++ b/gcc/config/sh/divcost-analysis
@@ -81,7 +81,7 @@ jmp @r0
 ; 2 cycles worse than SFUNC_STATIC
 
 
-Copyright (C) 2006-2024 Free Software Foundation, Inc.
+Copyright (C) 2006-2025 Free Software Foundation, Inc.
 
 Copying and distribution of this file, with or without modification,
 are permitted in any medium without royalty provided the copyright
diff --git a/gcc/config/sh/divtab-sh4-300.cc b/gcc/config/sh/divtab-sh4-300.cc
index 024335b..e1b870f 100644
--- a/gcc/config/sh/divtab-sh4-300.cc
+++ b/gcc/config/sh/divtab-sh4-300.cc
@@ -1,4 +1,4 @@
-/* Copyright (C) 2004-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2004-2025 Free Software Foundation, Inc.
 
 This file is free software; you can redistribute it and/or modify it
 under the terms of the GNU General Public License as published by the
diff --git a/gcc/config/sh/divtab-sh4.cc b/gcc/config/sh/divtab-sh4.cc
index 6eeadba..57f073a 100644
--- a/gcc/config/sh/divtab-sh4.cc
+++ b/gcc/config/sh/divtab-sh4.cc
@@ -1,4 +1,4 @@
-/* Copyright (C) 2004-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2004-2025 Free Software Foundation, Inc.
 
 This file is free software; you can redistribute it and/or modify it
 under the terms of the GNU General Public License as published by the
diff --git a/gcc/config/sh/elf.h b/gcc/config/sh/elf.h
index 505c5d6..11bae5b 100644
--- a/gcc/config/sh/elf.h
+++ b/gcc/config/sh/elf.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for gcc for Renesas / SuperH SH using ELF.
-   Copyright (C) 1996-2024 Free Software Foundation, Inc.
+   Copyright (C) 1996-2025 Free Software Foundation, Inc.
    Contributed by Ian Lance Taylor <ian@cygnus.com>.
 
 This file is part of GCC.
@@ -33,7 +33,7 @@ along with GCC; see the file COPYING3.  If not see
 
 #undef WCHAR_TYPE
 #define WCHAR_TYPE SH_ELF_WCHAR_TYPE
-   
+
 #undef WCHAR_TYPE_SIZE
 #define WCHAR_TYPE_SIZE 32
 
diff --git a/gcc/config/sh/embed-elf.h b/gcc/config/sh/embed-elf.h
index fef16de..df93124 100644
--- a/gcc/config/sh/embed-elf.h
+++ b/gcc/config/sh/embed-elf.h
@@ -1,6 +1,6 @@
-/* Definitions of target machine for GNU compiler for Renesas / SuperH SH 
+/* Definitions of target machine for GNU compiler for Renesas / SuperH SH
    non-Linux embedded targets.
-   Copyright (C) 2002-2024 Free Software Foundation, Inc.
+   Copyright (C) 2002-2025 Free Software Foundation, Inc.
    Contributed by J"orn Rennecke <joern.rennecke@superh.com>
 
 This file is part of GCC.
diff --git a/gcc/config/sh/iterators.md b/gcc/config/sh/iterators.md
index e9e5a86..53f1d1b 100644
--- a/gcc/config/sh/iterators.md
+++ b/gcc/config/sh/iterators.md
@@ -1,5 +1,5 @@
 ;; Iterator definitions for GCC SH machine description files.
-;; Copyright (C) 2012-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2012-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/sh/linux.h b/gcc/config/sh/linux.h
index 34cbbed..78a7905 100644
--- a/gcc/config/sh/linux.h
+++ b/gcc/config/sh/linux.h
@@ -1,5 +1,5 @@
 /* Definitions for SH running Linux-based GNU systems using ELF
-   Copyright (C) 1999-2024 Free Software Foundation, Inc.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
    Contributed by Kazumoto Kojima <kkojima@rr.iij4u.or.jp>
 
 This file is part of GCC.
diff --git a/gcc/config/sh/little.h b/gcc/config/sh/little.h
index 276fc42..268d52f 100644
--- a/gcc/config/sh/little.h
+++ b/gcc/config/sh/little.h
@@ -1,6 +1,6 @@
 /* Definition of little endian SH machine for GNU compiler.
 
-   Copyright (C) 2002-2024 Free Software Foundation, Inc.
+   Copyright (C) 2002-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/sh/netbsd-elf.h b/gcc/config/sh/netbsd-elf.h
index f195710..a78d298 100644
--- a/gcc/config/sh/netbsd-elf.h
+++ b/gcc/config/sh/netbsd-elf.h
@@ -1,5 +1,5 @@
 /* Definitions for SH running NetBSD using ELF
-   Copyright (C) 2002-2024 Free Software Foundation, Inc.
+   Copyright (C) 2002-2025 Free Software Foundation, Inc.
    Contributed by Wasabi Systems, Inc.
 
 This file is part of GCC.
@@ -62,7 +62,7 @@ along with GCC; see the file COPYING3.  If not see
 
 /* Define because we use the label and we do not need them.  */
 #define NO_PROFILE_COUNTERS 1
- 
+
 #undef FUNCTION_PROFILER
 #define FUNCTION_PROFILER(STREAM,LABELNO)				\
 do									\
diff --git a/gcc/config/sh/newlib.h b/gcc/config/sh/newlib.h
index 3dcd70d..b3e2ef8 100644
--- a/gcc/config/sh/newlib.h
+++ b/gcc/config/sh/newlib.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for gcc for Super-H using sh-superh-elf.
-   Copyright (C) 2001-2024 Free Software Foundation, Inc.
+   Copyright (C) 2001-2025 Free Software Foundation, Inc.
 
 This file is part of GNU CC.
 
diff --git a/gcc/config/sh/predicates.md b/gcc/config/sh/predicates.md
index da32329..e67ec8a 100644
--- a/gcc/config/sh/predicates.md
+++ b/gcc/config/sh/predicates.md
@@ -1,5 +1,5 @@
 ;; Predicate definitions for Renesas / SuperH SH.
-;; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
@@ -630,9 +630,7 @@
 ;; Same as treg_set_expr but disallow constants 0 and 1 which can be loaded
 ;; into the T bit.
 (define_predicate "treg_set_expr_not_const01"
-  (and (match_test "op != const0_rtx")
-       (match_test "op != const1_rtx")
-       (match_operand 0 "treg_set_expr")))
+  (match_test "sh_recog_treg_set_expr_not_01 (op, mode)"))
 
 ;; A predicate describing the T bit register in any form.
 (define_predicate "t_reg_operand"
diff --git a/gcc/config/sh/rtems.h b/gcc/config/sh/rtems.h
index f00678d..5d902d2 100644
--- a/gcc/config/sh/rtems.h
+++ b/gcc/config/sh/rtems.h
@@ -1,5 +1,5 @@
 /* Definitions for rtems targeting a SH using COFF.
-   Copyright (C) 1997-2024 Free Software Foundation, Inc.
+   Copyright (C) 1997-2025 Free Software Foundation, Inc.
    Contributed by Joel Sherrill (joel@OARcorp.com).
 
    This file is part of GCC.
diff --git a/gcc/config/sh/rtemself.h b/gcc/config/sh/rtemself.h
index ac30f92..ab36e04 100644
--- a/gcc/config/sh/rtemself.h
+++ b/gcc/config/sh/rtemself.h
@@ -1,5 +1,5 @@
 /* Definitions for rtems targeting a SH using elf.
-   Copyright (C) 1997-2024 Free Software Foundation, Inc.
+   Copyright (C) 1997-2025 Free Software Foundation, Inc.
    Contributed by Joel Sherrill (joel@OARcorp.com).
 
    This file is part of GCC.
diff --git a/gcc/config/sh/sh-c.cc b/gcc/config/sh/sh-c.cc
index c1ad131..144d0b0 100644
--- a/gcc/config/sh/sh-c.cc
+++ b/gcc/config/sh/sh-c.cc
@@ -1,5 +1,5 @@
 /* Pragma handling for GCC for Renesas / SuperH SH.
-   Copyright (C) 1993-2024 Free Software Foundation, Inc.
+   Copyright (C) 1993-2025 Free Software Foundation, Inc.
    Contributed by Joern Rennecke <joern.rennecke@st.com>.
 
 This file is part of GCC.
diff --git a/gcc/config/sh/sh-mem.cc b/gcc/config/sh/sh-mem.cc
index e224199..2a64bed 100644
--- a/gcc/config/sh/sh-mem.cc
+++ b/gcc/config/sh/sh-mem.cc
@@ -1,5 +1,5 @@
 /* Helper routines for memory move and comparison insns.
-   Copyright (C) 2013-2024 Free Software Foundation, Inc.
+   Copyright (C) 2013-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/sh/sh-modes.def b/gcc/config/sh/sh-modes.def
index a1d6fa9..e31ae69 100644
--- a/gcc/config/sh/sh-modes.def
+++ b/gcc/config/sh/sh-modes.def
@@ -1,5 +1,5 @@
 /* SH extra machine modes. 
-   Copyright (C) 2003-2024 Free Software Foundation, Inc.
+   Copyright (C) 2003-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -17,6 +17,12 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
+/* SH has the same reversed quiet bit as MIPS.  */
+RESET_FLOAT_FORMAT (SF, mips_single_format);
+RESET_FLOAT_FORMAT (DF, mips_double_format);
+/* TFmode: IEEE quad floating point (software).  */
+FLOAT_MODE (TF, 16, mips_quad_format);
+
 /* Vector modes.  */
 VECTOR_MODE  (INT, QI, 2);    /*                 V2QI */
 VECTOR_MODES (INT, 4);        /*            V4QI V2HI */
diff --git a/gcc/config/sh/sh-protos.h b/gcc/config/sh/sh-protos.h
index b151a7c..e78b669 100644
--- a/gcc/config/sh/sh-protos.h
+++ b/gcc/config/sh/sh-protos.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler for Renesas / SuperH SH.
-   Copyright (C) 1993-2024 Free Software Foundation, Inc.
+   Copyright (C) 1993-2025 Free Software Foundation, Inc.
    Contributed by Steve Chamberlain (sac@cygnus.com).
    Improved by Jim Wilson (wilson@cygnus.com).
 
@@ -261,6 +261,7 @@ extern rtx_insn* sh_peephole_emit_move_insn (rtx dst, rtx src);
 
 extern bool sh_in_recog_treg_set_expr (void);
 extern bool sh_recog_treg_set_expr (rtx op, machine_mode mode);
+extern bool sh_recog_treg_set_expr_not_01 (rtx op, machine_mode mode);
 
 /* Result value of sh_split_treg_set_expr.  Contains the first insn emitted
    and the optional trailing nott insn.  */
diff --git a/gcc/config/sh/sh.cc b/gcc/config/sh/sh.cc
index 7391b8d..09e4ff7 100644
--- a/gcc/config/sh/sh.cc
+++ b/gcc/config/sh/sh.cc
@@ -1,5 +1,5 @@
 /* Output routines for GCC for Renesas / SuperH SH.
-   Copyright (C) 1993-2024 Free Software Foundation, Inc.
+   Copyright (C) 1993-2025 Free Software Foundation, Inc.
    Contributed by Steve Chamberlain (sac@cygnus.com).
    Improved by Jim Wilson (wilson@cygnus.com).
 
@@ -377,7 +377,7 @@ TARGET_GNU_ATTRIBUTES (sh_attribute_table,
 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p
 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA sh_asm_output_addr_const_extra
- 
+
 #undef TARGET_ASM_FUNCTION_EPILOGUE
 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
 
@@ -668,6 +668,9 @@ TARGET_GNU_ATTRIBUTES (sh_attribute_table,
 #undef TARGET_C_MODE_FOR_FLOATING_TYPE
 #define TARGET_C_MODE_FOR_FLOATING_TYPE sh_c_mode_for_floating_type
 
+#undef TARGET_DOCUMENTATION_NAME
+#define TARGET_DOCUMENTATION_NAME "SH"
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 
@@ -814,7 +817,7 @@ register_sh_passes (void)
 		 PASS_POS_INSERT_BEFORE, "sched2", 1);
 }
 
-/* Implement TARGET_OPTION_OVERRIDE macro.  Validate and override 
+/* Implement TARGET_OPTION_OVERRIDE macro.  Validate and override
    various options, and do some machine dependent initialization.  */
 static void
 sh_option_override (void)
@@ -1012,7 +1015,7 @@ sh_override_options_after_change (void)
       fetched as a pair from a longword boundary.  For size use 16 bit
       alignment to get more compact code.
       Aligning all jumps increases the code size, even if it might
-      result in slightly faster code.  Thus, it is set to the smallest 
+      result in slightly faster code.  Thus, it is set to the smallest
       alignment possible if not specified by the user.  */
   if (flag_align_loops && !str_align_loops)
     str_align_loops = optimize_size ? "2" : "4";
@@ -2265,7 +2268,7 @@ sh_eval_treg_value (rtx op)
     t = 1;
   else
     return -1;
-  
+
   return t ^ (cmpval == cmpop);
 }
 
@@ -2543,7 +2546,7 @@ output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
 	     We punt for now, since this is likely very rare.  */
 	  gcc_assert (!REG_P (XEXP (inside, 1)));
 	  break;
-	  
+
 	case LABEL_REF:
 	  return       "mov.l	%1,%0" "\n"
 		 "	mov.l	%1+4,%T0";
@@ -3016,7 +3019,7 @@ bool
 sh_ashlsi_clobbers_t_reg_p (rtx shift_amount)
 {
   gcc_assert (CONST_INT_P (shift_amount));
-  
+
   const int shift_amount_i = INTVAL (shift_amount) & 31;
 
   /* Special case for shift count of 31: use and-rotl sequence.  */
@@ -3036,7 +3039,7 @@ sh_lshrsi_clobbers_t_reg_p (rtx shift_amount)
 
   /* For right shifts the constant might be negative.  */
   const int shift_amount_i = std::abs (INTVAL (shift_amount)) & 31;
- 
+
   /* Special case for shift count of 31: use shll-movt sequence.  */
   if (shift_amount_i == 31)
     return true;
@@ -3046,7 +3049,7 @@ sh_lshrsi_clobbers_t_reg_p (rtx shift_amount)
 }
 
 /* Return true if it is potentially beneficial to use a dynamic shift
-   instruction (shad / shar) instead of a combination of 1/2/8/16 
+   instruction (shad / shar) instead of a combination of 1/2/8/16
    shift instructions for the specified shift count.
    If dynamic shifts are not available, always return false.  */
 bool
@@ -3231,7 +3234,7 @@ multcosts (rtx x ATTRIBUTE_UNUSED)
 static bool
 sh_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
 	      int opno ATTRIBUTE_UNUSED,
-	      int *total, bool speed ATTRIBUTE_UNUSED)
+	      int *total, bool speed)
 {
   int code = GET_CODE (x);
 
@@ -3240,7 +3243,7 @@ sh_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
       /* The lower-subreg pass decides whether to split multi-word regs
 	 into individual regs by looking at the cost for a SET of certain
 	 modes with the following patterns:
-	   (set (reg) (reg)) 
+	   (set (reg) (reg))
 	   (set (reg) (const_int 0))
 	 On machines that support vector-move operations a multi-word move
 	 is the same cost as individual reg move.  On SH there is no
@@ -3264,10 +3267,12 @@ sh_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
         }
       return false;
 
-    /* The cost of a mem access is mainly the cost of the address mode.  */
+    /* The cost of a mem access is mainly the cost of the address mode on top
+       of the cost of the load/store insn itself.  */
     case MEM:
       *total = sh_address_cost (XEXP (x, 0), GET_MODE (x), MEM_ADDR_SPACE (x),
-				true);
+				speed)
+	       + COSTS_N_INSNS (1);
       return true;
 
     case IF_THEN_ELSE:
@@ -3317,7 +3322,8 @@ sh_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
 	{
 	  *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
 				    GET_MODE (XEXP (x, 0)),
-				    MEM_ADDR_SPACE (XEXP (x, 0)), true);
+				    MEM_ADDR_SPACE (XEXP (x, 0)), speed)
+		   + COSTS_N_INSNS (1);
 	  return true;
 	}
       return false;
@@ -3333,9 +3339,10 @@ sh_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
 		   || GET_MODE (XEXP (x, 0)) == HImode))
 	{
 	  /* Handle SH2A's movu.b and movu.w insn.  */
-	  *total = sh_address_cost (XEXP (XEXP (x, 0), 0), 
-				    GET_MODE (XEXP (x, 0)), 
-				    MEM_ADDR_SPACE (XEXP (x, 0)), true);
+	  *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
+				    GET_MODE (XEXP (x, 0)),
+				    MEM_ADDR_SPACE (XEXP (x, 0)), speed)
+		   + COSTS_N_INSNS (1);
 	  return true;
 	}
       return false;
@@ -3348,16 +3355,18 @@ sh_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
 	  rtx xx = XVECEXP (x, 0, i);
 	  if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 0)))
 	    {
-	      *total = sh_address_cost (XEXP (XEXP (xx, 0), 0), 
+	      *total = sh_address_cost (XEXP (XEXP (xx, 0), 0),
 					GET_MODE (XEXP (xx, 0)),
-					MEM_ADDR_SPACE (XEXP (xx, 0)), true);
+					MEM_ADDR_SPACE (XEXP (xx, 0)), speed)
+		       + COSTS_N_INSNS (1);
 	      return true;
 	    }
 	  if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 1)))
 	    {
 	      *total = sh_address_cost (XEXP (XEXP (xx, 1), 0),
 					GET_MODE (XEXP (xx, 1)),
-					MEM_ADDR_SPACE (XEXP (xx, 1)), true);
+					MEM_ADDR_SPACE (XEXP (xx, 1)), speed)
+		       + COSTS_N_INSNS (1);
 	      return true;
 	    }
 	}
@@ -3575,7 +3584,7 @@ sh_max_mov_insn_displacement (machine_mode mode, bool consider_sh2a)
       const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
       const int mode_sz = GET_MODE_SIZE (mode);
       int r = 15 * mov_insn_sz * disp_scale;
-    
+
       /* If the mov insn will be split into multiple loads/stores, the
 	 maximum possible displacement is a bit smaller.  */
       if (mode_sz > mov_insn_sz)
@@ -3645,7 +3654,7 @@ sh_address_cost (rtx x, machine_mode mode,
       return 3;
     }
 
-  /* 'reg + reg' addressing.  Account a slightly higher cost because of 
+  /* 'reg + reg' addressing.  Account a slightly higher cost because of
      increased pressure on R0.  */
   if (GET_CODE (x) == PLUS && ! CONSTANT_P (XEXP (x, 1)))
     return 3;
@@ -5225,7 +5234,7 @@ find_barrier (int num_mova, rtx_insn *mova, rtx_insn *from)
 	from = PREV_INSN (from);
 
       /* Don't emit a constant table int the middle of global pointer setting,
-	 since that that would move the addressing base GOT into another table. 
+	 since that that would move the addressing base GOT into another table.
 	 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
 	 in the pool anyway, so just move up the whole constant pool.
 
@@ -6059,7 +6068,7 @@ sh_reorg (void)
 		 later insn.  */
 
 	      /* ??? We shouldn't have to use FOUNDINSN here.
-		 This dates back to when we used LOG_LINKS to find 
+		 This dates back to when we used LOG_LINKS to find
 		 the most recent insn which sets the register.  */
 
 	      if (foundinsn
@@ -6759,7 +6768,7 @@ output_stack_adjust (int size, rtx reg, int epilogue_p,
 	  if (temp < 0)
 	    {
 	      rtx adj_reg, tmp_reg, mem;
-	      
+
 	      /* If we reached here, the most likely case is the (sibcall)
 		 epilogue.  Put a special push/pop sequence for such case as
 		 the last resort.  This looks lengthy but would not be problem
@@ -6770,7 +6779,7 @@ output_stack_adjust (int size, rtx reg, int epilogue_p,
 		  r5 have been reserved as fixed registers or assigned
 		  as global registers, and they change during an
 		  interrupt.  There are possible ways to handle this:
-		     
+
 		  - If we are adjusting the frame pointer (r14), we can do
 		    with a single temp register and an ordinary push / pop
 		    on the stack.
@@ -7268,7 +7277,7 @@ sh_expand_epilogue (bool sibcall_p)
 	/* For an ISR with RESBANK attribute assigned, don't pop PR
 	   register.  */
       if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
-	  && !sh_cfun_resbank_handler_p ())	
+	  && !sh_cfun_resbank_handler_p ())
 	{
 	  if (!frame_pointer_needed)
 	    emit_insn (gen_blockage ());
@@ -7328,7 +7337,7 @@ sh_expand_epilogue (bool sibcall_p)
 	    fpscr_deferred = true;
 	  /* For an ISR with RESBANK attribute assigned, don't pop
 	     following registers, R0-R14, MACH, MACL and GBR.  */
-	  else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j) 
+	  else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
 		   && ! (sh_cfun_resbank_handler_p ()
 			 && ((j >= FIRST_GENERAL_REG
 			      && j < LAST_GENERAL_REG)
@@ -9189,7 +9198,7 @@ legitimize_pic_address (rtx orig, machine_mode mode ATTRIBUTE_UNUSED, rtx reg)
    In some cases it is possible that a requested offset might seem unaligned
    or inappropriate for the mode size, like offset = 2 and mode size = 4.
    This is compensated by adjusting the base address so that the effective
-   address of the displacement move insn will be aligned. 
+   address of the displacement move insn will be aligned.
 
    This is not the best possible way of rebasing the base address, as it
    does not look at other present displacement addressings around it.
@@ -10405,7 +10414,7 @@ sh_vector_mode_supported_p (machine_mode mode ATTRIBUTE_UNUSED)
 bool
 sh_frame_pointer_required (void)
 {
-/* If needed override this in other tm.h files to cope with various OS 
+/* If needed override this in other tm.h files to cope with various OS
    lossage requiring a frame pointer.  */
   if (SUBTARGET_FRAME_POINTER_REQUIRED)
     return true;
@@ -11393,14 +11402,14 @@ sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
 	 <= sh_max_mov_insn_displacement (mode, false))
     return R0_REGS;
 
-  /* When reload is trying to address a QImode or HImode subreg on the stack, 
+  /* When reload is trying to address a QImode or HImode subreg on the stack,
      force any subreg byte into R0_REGS, as this is going to become a
      displacement address.
      We could restrict this to SUBREG_BYTE (x) > 0, but if the actual reg
      is on the stack, the memref to it might already require a displacement
      and that has to be added to the final address.  At this point we don't
      know the cumulative displacement so we assume the worst case.  */
-  if ((mode == QImode || mode == HImode) && rclass != R0_REGS 
+  if ((mode == QImode || mode == HImode) && rclass != R0_REGS
       && GET_CODE (x) == SUBREG && true_regnum (x) == -1)
     return R0_REGS;
 
@@ -11439,7 +11448,7 @@ sh_legitimize_address_displacement (rtx *offset1, rtx *offset2,
       *offset2 = adj.mov_disp;
       return true;
     }
- 
+
   return false;
 }
 
@@ -11589,7 +11598,7 @@ base_reg_disp::base_reg_disp (rtx br, disp_t d)
 : reg_ (br), disp_ (d)
 {
 }
- 
+
 inline bool
 base_reg_disp::is_reg (void) const
 {
@@ -11934,7 +11943,7 @@ sh_is_logical_t_store_expr (rtx op, rtx_insn* insn)
 	      op_is_t_count++;
 	}
     }
-  
+
   return op_is_t_count == 2;
 }
 
@@ -12339,6 +12348,23 @@ sh_recog_treg_set_expr (rtx op, machine_mode mode)
   return result >= 0;
 }
 
+/* Return TRUE if OP is an expression for which there is a pattern to
+   set the T bit unless the expression is trivially loadable into
+   the T bit, FALSE otherwise.  */
+bool
+sh_recog_treg_set_expr_not_01 (rtx op, machine_mode mode)
+{
+  if (op == const0_rtx || op == const1_rtx)
+    return false;
+
+  /* A right shift of 31 will return 0 or 1.  */
+  if ((GET_CODE (op) == LSHIFTRT || GET_CODE (op) == ASHIFTRT)
+      && INTVAL (XEXP (op, 1)) == 31)
+    return false;
+
+  return sh_recog_treg_set_expr (op, mode);
+}
+
 /* Returns true when recog of a 'treg_set_expr' is currently in progress.
    This can be used as a condition for insn/split patterns to allow certain
    T bit setting patters only to be matched as sub expressions of other
diff --git a/gcc/config/sh/sh.h b/gcc/config/sh/sh.h
index 53cad85..3052416 100644
--- a/gcc/config/sh/sh.h
+++ b/gcc/config/sh/sh.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler for Renesas / SuperH SH.
-   Copyright (C) 1993-2024 Free Software Foundation, Inc.
+   Copyright (C) 1993-2025 Free Software Foundation, Inc.
    Contributed by Steve Chamberlain (sac@cygnus.com).
    Improved by Jim Wilson (wilson@cygnus.com).
 
@@ -293,7 +293,7 @@ extern int code_for_indirect_jump_scratch;
 #else
 #define IS_LITTLE_ENDIAN_OPTION "%{!mb:"
 #endif
- 
+
 #if TARGET_CPU_DEFAULT & MASK_HARD_SH2A
 #define UNSUPPORTED_SH2A IS_LITTLE_ENDIAN_OPTION \
 "%{m2a*|!m1:%{!m2*:%{!m3*:%{!m4*:%eSH2a does not support little-endian}}}}}"
@@ -1490,7 +1490,7 @@ extern bool current_function_interrupt;
        return X << (Y & 31);
      else
        return X >> (-Y) & 31);
- 
+
    The dynamic shift library routines in lib1funcs.S do not use the sign bit
    like the hardware dynamic shifts and truncate the shift count to 31.
    We define SHIFT_COUNT_TRUNCATED to 0 and express the implied shift count
diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md
index 7eee12c..65b353d 100644
--- a/gcc/config/sh/sh.md
+++ b/gcc/config/sh/sh.md
@@ -1,5 +1,5 @@
 ;;- Machine description for Renesas / SuperH SH.
-;;  Copyright (C) 1993-2024 Free Software Foundation, Inc.
+;;  Copyright (C) 1993-2025 Free Software Foundation, Inc.
 ;;  Contributed by Steve Chamberlain (sac@cygnus.com).
 ;;  Improved by Jim Wilson (wilson@cygnus.com).
 
diff --git a/gcc/config/sh/sh.opt b/gcc/config/sh/sh.opt
index c44cfe7..beaf830 100644
--- a/gcc/config/sh/sh.opt
+++ b/gcc/config/sh/sh.opt
@@ -1,6 +1,6 @@
 ; Options for the SH port of the compiler.
 
-; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/sh/sh1.md b/gcc/config/sh/sh1.md
index 9ba6541..c1ecf01 100644
--- a/gcc/config/sh/sh1.md
+++ b/gcc/config/sh/sh1.md
@@ -1,5 +1,5 @@
 ;; DFA scheduling description for Renesas / SuperH SH.
-;; Copyright (C) 2004-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2004-2025 Free Software Foundation, Inc.
 
 ;; This file is part of GCC.
 
diff --git a/gcc/config/sh/sh4-300.md b/gcc/config/sh/sh4-300.md
index 73b5f8c..c9fb493 100644
--- a/gcc/config/sh/sh4-300.md
+++ b/gcc/config/sh/sh4-300.md
@@ -1,5 +1,5 @@
 ;; DFA scheduling description for ST40-300.
-;; Copyright (C) 2004-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2004-2025 Free Software Foundation, Inc.
 
 ;; This file is part of GCC.
 
diff --git a/gcc/config/sh/sh4.md b/gcc/config/sh/sh4.md
index 0b07d74..25e4832 100644
--- a/gcc/config/sh/sh4.md
+++ b/gcc/config/sh/sh4.md
@@ -1,5 +1,5 @@
 ;; DFA scheduling description for SH4.
-;; Copyright (C) 2004-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2004-2025 Free Software Foundation, Inc.
 
 ;; This file is part of GCC.
 
diff --git a/gcc/config/sh/sh4a.md b/gcc/config/sh/sh4a.md
index 119978e..3fb2608c 100644
--- a/gcc/config/sh/sh4a.md
+++ b/gcc/config/sh/sh4a.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for Renesas SH4a
-;; Copyright (C) 2003-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2003-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/sh/sh_optimize_sett_clrt.cc b/gcc/config/sh/sh_optimize_sett_clrt.cc
index 79e0460..8092f96 100644
--- a/gcc/config/sh/sh_optimize_sett_clrt.cc
+++ b/gcc/config/sh/sh_optimize_sett_clrt.cc
@@ -1,5 +1,5 @@
 /* An SH specific RTL pass that tries to optimize clrt and sett insns.
-   Copyright (C) 2013-2024 Free Software Foundation, Inc.
+   Copyright (C) 2013-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/sh/sh_treg_combine.cc b/gcc/config/sh/sh_treg_combine.cc
index a26fcfb..696fe32 100644
--- a/gcc/config/sh/sh_treg_combine.cc
+++ b/gcc/config/sh/sh_treg_combine.cc
@@ -1,6 +1,6 @@
 /* An SH specific RTL pass that tries to combine comparisons and redundant
    condition code register stores across multiple basic blocks.
-   Copyright (C) 2013-2024 Free Software Foundation, Inc.
+   Copyright (C) 2013-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -634,7 +634,7 @@ sh_treg_combine::sh_treg_combine (gcc::context* ctx, bool split_insns,
   m_split_insns (split_insns),
   m_ccreg (NULL_RTX)
 {
-  // Overwrite default name in pass_data base class. 
+  // Overwrite default name in pass_data base class.
   this->name = name;
 }
 
@@ -945,10 +945,7 @@ sh_treg_combine::make_not_reg_insn (rtx dst_reg, rtx src_reg) const
   else
     gcc_unreachable ();
 
-  rtx i = get_insns ();
-  end_sequence ();
-
-  return i;
+  return end_sequence ();
 }
 
 rtx_insn *
diff --git a/gcc/config/sh/superh.h b/gcc/config/sh/superh.h
index cd0e8e4..62257202 100644
--- a/gcc/config/sh/superh.h
+++ b/gcc/config/sh/superh.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for gcc for Super-H using sh-superh-elf.
-   Copyright (C) 2001-2024 Free Software Foundation, Inc.
+   Copyright (C) 2001-2025 Free Software Foundation, Inc.
 
 This file is part of GNU CC.
 
diff --git a/gcc/config/sh/sync.md b/gcc/config/sh/sync.md
index 52b764b..e56c3e0 100644
--- a/gcc/config/sh/sync.md
+++ b/gcc/config/sh/sync.md
@@ -1,5 +1,5 @@
 ;; GCC machine description for SH synchronization instructions.
-;; Copyright (C) 2011-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2011-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/sh/t-sh b/gcc/config/sh/t-sh
index 79ecfd8..f1c1ac8 100644
--- a/gcc/config/sh/t-sh
+++ b/gcc/config/sh/t-sh
@@ -1,4 +1,4 @@
-# Copyright (C) 1993-2024 Free Software Foundation, Inc.
+# Copyright (C) 1993-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/sh/vxworks.h b/gcc/config/sh/vxworks.h
index 7a07ce9..a0f1853 100644
--- a/gcc/config/sh/vxworks.h
+++ b/gcc/config/sh/vxworks.h
@@ -1,8 +1,8 @@
 /* Definitions of target machine for GCC,
-   for SuperH with targeting the VXWorks run time environment. 
-   Copyright (C) 2003-2024 Free Software Foundation, Inc.
+   for SuperH with targeting the VXWorks run time environment.
+   Copyright (C) 2003-2025 Free Software Foundation, Inc.
    Contributed by CodeSourcery, LLC.
-   
+
 This file is part of GCC.
 
 GCC is free software; you can redistribute it and/or modify
diff --git a/gcc/config/sol2-c.cc b/gcc/config/sol2-c.cc
index 26ec7d3..97c3db0 100644
--- a/gcc/config/sol2-c.cc
+++ b/gcc/config/sol2-c.cc
@@ -1,5 +1,5 @@
 /* Solaris support needed only by C/C++ frontends.
-   Copyright (C) 2004-2024 Free Software Foundation, Inc.
+   Copyright (C) 2004-2025 Free Software Foundation, Inc.
    Contributed by CodeSourcery, LLC.
 
 This file is part of GCC.
diff --git a/gcc/config/sol2-cxx.cc b/gcc/config/sol2-cxx.cc
index aa558be..f7b1535 100644
--- a/gcc/config/sol2-cxx.cc
+++ b/gcc/config/sol2-cxx.cc
@@ -1,5 +1,5 @@
 /* C++ specific Solaris system support.
-   Copyright (C) 2011-2024 Free Software Foundation, Inc.
+   Copyright (C) 2011-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/sol2-d.cc b/gcc/config/sol2-d.cc
index 9e63c8d..969e9b2 100644
--- a/gcc/config/sol2-d.cc
+++ b/gcc/config/sol2-d.cc
@@ -1,5 +1,5 @@
 /* Solaris support needed only by D front-end.
-   Copyright (C) 2018-2024 Free Software Foundation, Inc.
+   Copyright (C) 2018-2025 Free Software Foundation, Inc.
 
 GCC is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
diff --git a/gcc/config/sol2-protos.h b/gcc/config/sol2-protos.h
index 786dbd0..70dd958 100644
--- a/gcc/config/sol2-protos.h
+++ b/gcc/config/sol2-protos.h
@@ -1,6 +1,6 @@
 /* Operating system specific prototypes to be used when targeting GCC for any
    Solaris 2 system.
-   Copyright (C) 2004-2024 Free Software Foundation, Inc.
+   Copyright (C) 2004-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -24,7 +24,6 @@ extern void solaris_elf_asm_comdat_section (const char *, unsigned int, tree);
 extern void solaris_file_end (void);
 extern void solaris_insert_attributes (tree, tree *);
 extern void solaris_output_init_fini (FILE *, tree);
-extern void solaris_override_options (void);
 
 /* In sol2-c.cc.  */
 extern void solaris_register_pragmas (void);
diff --git a/gcc/config/sol2-rust.cc b/gcc/config/sol2-rust.cc
index 55aceeb..8bb1fd4 100644
--- a/gcc/config/sol2-rust.cc
+++ b/gcc/config/sol2-rust.cc
@@ -1,5 +1,5 @@
 /* Solaris support needed only by Rust front-end.
-   Copyright (C) 2022-2024 Free Software Foundation, Inc.
+   Copyright (C) 2022-2025 Free Software Foundation, Inc.
 
 GCC is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
diff --git a/gcc/config/sol2-stubs.cc b/gcc/config/sol2-stubs.cc
index 77e33cd..208b01c 100644
--- a/gcc/config/sol2-stubs.cc
+++ b/gcc/config/sol2-stubs.cc
@@ -1,5 +1,5 @@
 /* Stubs for C++ specific Solaris system support.
-   Copyright (C) 2011-2024 Free Software Foundation, Inc.
+   Copyright (C) 2011-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/sol2.cc b/gcc/config/sol2.cc
index 4efa693..f46bcfa 100644
--- a/gcc/config/sol2.cc
+++ b/gcc/config/sol2.cc
@@ -1,5 +1,5 @@
 /* General Solaris system support.
-   Copyright (C) 2004-2024 Free Software Foundation, Inc.
+   Copyright (C) 2004-2025 Free Software Foundation, Inc.
    Contributed by CodeSourcery, LLC.
 
 This file is part of GCC.
@@ -226,7 +226,7 @@ solaris_elf_asm_comdat_section (const char *name, unsigned int flags, tree decl)
      directive since Sun as treats undeclared sections as @progbits,
      which conflicts with .bss* sections which are @nobits.  */
   targetm.asm_out.named_section (section, flags & ~SECTION_LINKONCE, decl);
-  
+
   /* Sun as separates declaration of a group section and of the group
      itself, using the .group directive and the #comdat flag.  */
   fprintf (asm_out_file, "\t.group\t%s," SECTION_NAME_FORMAT ",#comdat\n",
@@ -291,12 +291,3 @@ solaris_file_end (void)
   solaris_comdat_htab->traverse <void *, solaris_define_comdat_signature>
     (NULL);
 }
-
-void
-solaris_override_options (void)
-{
-  /* Older versions of Solaris ld cannot handle CIE version 3 in .eh_frame.
-     Don't emit DWARF3/4 unless specifically selected if so.  */
-  if (!HAVE_LD_EH_FRAME_CIEV3 && !OPTION_SET_P (dwarf_version))
-    dwarf_version = 2;
-}
diff --git a/gcc/config/sol2.h b/gcc/config/sol2.h
index c602a94..2405c10 100644
--- a/gcc/config/sol2.h
+++ b/gcc/config/sol2.h
@@ -1,6 +1,6 @@
 /* Operating system specific defines to be used when targeting GCC for any
    Solaris 2 system.
-   Copyright (C) 2002-2024 Free Software Foundation, Inc.
+   Copyright (C) 2002-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -119,11 +119,6 @@ along with GCC; see the file COPYING3.  If not see
     TARGET_SUB_OS_CPP_BUILTINS();			\
   } while (0)
 
-#define SUBTARGET_OVERRIDE_OPTIONS			\
-  do {							\
-    solaris_override_options ();			\
-  } while (0)
-
 #if DEFAULT_ARCH32_P
 #define MULTILIB_DEFAULTS { "m32" }
 #else
diff --git a/gcc/config/sol2.opt b/gcc/config/sol2.opt
index 68ece54..d1bfad8 100644
--- a/gcc/config/sol2.opt
+++ b/gcc/config/sol2.opt
@@ -1,6 +1,6 @@
 ; Options for the Solaris 2 port of the compiler
 ;
-; Copyright (C) 2010-2024 Free Software Foundation, Inc.
+; Copyright (C) 2010-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/sparc/biarch64.h b/gcc/config/sparc/biarch64.h
index 3201b6b..5491615 100644
--- a/gcc/config/sparc/biarch64.h
+++ b/gcc/config/sparc/biarch64.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GCC, for Sun SPARC.
-   Copyright (C) 2001-2024 Free Software Foundation, Inc.
+   Copyright (C) 2001-2025 Free Software Foundation, Inc.
    Contributed by David E. O'Brien <obrien@FreeBSD.org>.
 
 This file is part of GCC.
diff --git a/gcc/config/sparc/constraints.md b/gcc/config/sparc/constraints.md
index 350ad8e..c0ab9d8 100644
--- a/gcc/config/sparc/constraints.md
+++ b/gcc/config/sparc/constraints.md
@@ -1,5 +1,5 @@
 ;; Constraint definitions for SPARC.
-;; Copyright (C) 2008-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2008-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
@@ -145,51 +145,6 @@
       (match_test "TARGET_ARCH32")
       (match_test "memory_ok_for_ldd (op)")))
 
-;; This awkward register constraint is necessary because it is not
-;; possible to express the "must be even numbered register" condition
-;; using register classes.  The problem is that membership in a
-;; register class requires that all registers of a multi-regno
-;; register be included in the set.  It is add_to_hard_reg_set
-;; and in_hard_reg_set_p which populate and test regsets with these
-;; semantics.
-;;
-;; So this means that we would have to put both the even and odd
-;; register into the register class, which would not restrict things
-;; at all.
-;;
-;; Using a combination of GENERAL_REGS and TARGET_HARD_REGNO_MODE_OK is
-;; not a full solution either.  In fact, even though IRA uses the macro
-;; TARGET_HARD_REGNO_MODE_OK to calculate which registers are prohibited
-;; from use in certain modes, it still can allocate an odd hard register
-;; for DImode values.  This is due to how IRA populates the table
-;; ira_useful_class_mode_regs[][].  It suffers from the same problem
-;; as using a register class to describe this restriction.  Namely, it
-;; sets both the odd and even part of an even register pair in the
-;; regset.  Therefore IRA can and will allocate odd registers for
-;; DImode values on 32-bit.
-;;
-;; There are legitimate cases where DImode values can end up in odd
-;; hard registers, the most notable example is argument passing.
-;;
-;; What saves us is reload and the DImode splitters.  Both are
-;; necessary.  The odd register splitters cannot match if, for
-;; example, we have a non-offsetable MEM.  Reload will notice this
-;; case and reload the address into a single hard register.
-;;
-;; The real downfall of this awkward register constraint is that it
-;; does not evaluate to a true register class like a bonafide use of
-;; define_register_constraint would.  This means that we cannot use
-;; it with LRA, since the constraint processing of LRA really depends
-;; upon whether an extra constraint is for registers or not.  It uses
-;; reg_class_for_constraint, and checks it against NO_REGS.
-(define_constraint "U"
- "Pseudo-register or hard even-numbered integer register"
- (and (match_code "reg")
-      (ior (match_test "REGNO (op) < FIRST_PSEUDO_REGISTER")
-	   (not (match_test "reload_in_progress && reg_renumber [REGNO (op)] < 0")))
-      (match_test "TARGET_ARCH32")
-      (match_test "register_ok_for_ldd (op)")))
-
 (define_memory_constraint "W"
   "A memory with only a base register"
   (match_operand 0 "mem_noofs_operand"))
diff --git a/gcc/config/sparc/cypress.md b/gcc/config/sparc/cypress.md
index 40ad9fb..3ac2ef7 100644
--- a/gcc/config/sparc/cypress.md
+++ b/gcc/config/sparc/cypress.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for SPARC Cypress.
-;;   Copyright (C) 2002-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2002-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/sparc/default64.h b/gcc/config/sparc/default64.h
index 556b211..a7b0b56 100644
--- a/gcc/config/sparc/default64.h
+++ b/gcc/config/sparc/default64.h
@@ -1,7 +1,7 @@
 /* Definitions of target machine for GCC, for bi-arch SPARC,
    defaulting to 64-bit code generation.
 
-   Copyright (C) 1999-2024 Free Software Foundation, Inc.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/sparc/driver-sparc.cc b/gcc/config/sparc/driver-sparc.cc
index 0e2549f..6519c28 100644
--- a/gcc/config/sparc/driver-sparc.cc
+++ b/gcc/config/sparc/driver-sparc.cc
@@ -1,5 +1,5 @@
 /* Subroutines for the gcc driver.
-   Copyright (C) 2011-2024 Free Software Foundation, Inc.
+   Copyright (C) 2011-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/sparc/freebsd.h b/gcc/config/sparc/freebsd.h
index 5396b32..d9874b2 100644
--- a/gcc/config/sparc/freebsd.h
+++ b/gcc/config/sparc/freebsd.h
@@ -1,5 +1,5 @@
 /* Definitions for Sun SPARC64 running FreeBSD using the ELF format
-   Copyright (C) 2001-2024 Free Software Foundation, Inc.
+   Copyright (C) 2001-2025 Free Software Foundation, Inc.
    Contributed by David E. O'Brien <obrien@FreeBSD.org> and BSDi.
 
 This file is part of GCC.
@@ -55,7 +55,7 @@ along with GCC; see the file COPYING3.  If not see
 
 /************************[  Target stuff  ]***********************************/
 
-/* Define the actual types of some ANSI-mandated types.  
+/* Define the actual types of some ANSI-mandated types.
    Needs to agree with <machine/ansi.h>.  GCC defaults come from c-decl.cc,
    c-common.cc, and config/<arch>/<arch>.h.  */
 
@@ -111,7 +111,7 @@ along with GCC; see the file COPYING3.  If not see
 
 /* DWARF bits.  */
 
-/* Follow Irix 6 and not the Dwarf2 draft in using 64-bit offsets. 
+/* Follow Irix 6 and not the Dwarf2 draft in using 64-bit offsets.
    Obviously the Dwarf2 folks havn't tried to actually build systems
    with their spec.  On a 64-bit system, only 64-bit relocs become
    RELATIVE relocations.  */
diff --git a/gcc/config/sparc/hypersparc.md b/gcc/config/sparc/hypersparc.md
index 28ade97..bfae680f 100644
--- a/gcc/config/sparc/hypersparc.md
+++ b/gcc/config/sparc/hypersparc.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for HyperSPARC.
-;;   Copyright (C) 2002-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2002-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/sparc/leon.md b/gcc/config/sparc/leon.md
index e89afa4..d8ddc6c 100644
--- a/gcc/config/sparc/leon.md
+++ b/gcc/config/sparc/leon.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for LEON.
-;;   Copyright (C) 2010-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2010-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/sparc/leon5.md b/gcc/config/sparc/leon5.md
index 3ce0e42..e8c7c2b 100644
--- a/gcc/config/sparc/leon5.md
+++ b/gcc/config/sparc/leon5.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for LEON5.
-;;   Copyright (C) 2021-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2021-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/sparc/linux.h b/gcc/config/sparc/linux.h
index 8cc5389..6d92884 100644
--- a/gcc/config/sparc/linux.h
+++ b/gcc/config/sparc/linux.h
@@ -1,5 +1,5 @@
 /* Definitions for SPARC running Linux-based GNU systems with ELF.
-   Copyright (C) 1996-2024 Free Software Foundation, Inc.
+   Copyright (C) 1996-2025 Free Software Foundation, Inc.
    Contributed by Eddie C. Dost (ecd@skynet.be)
 
 This file is part of GCC.
@@ -56,13 +56,13 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
 
 #undef SIZE_TYPE
 #define SIZE_TYPE "unsigned int"
- 
+
 #undef PTRDIFF_TYPE
 #define PTRDIFF_TYPE "int"
-  
+
 #undef WCHAR_TYPE
 #define WCHAR_TYPE "int"
-   
+
 #undef WCHAR_TYPE_SIZE
 #define WCHAR_TYPE_SIZE 32
 
diff --git a/gcc/config/sparc/linux64.h b/gcc/config/sparc/linux64.h
index 63543f9..97d5012 100644
--- a/gcc/config/sparc/linux64.h
+++ b/gcc/config/sparc/linux64.h
@@ -1,5 +1,5 @@
 /* Definitions for 64-bit SPARC running Linux-based GNU systems with ELF.
-   Copyright (C) 1996-2024 Free Software Foundation, Inc.
+   Copyright (C) 1996-2025 Free Software Foundation, Inc.
    Contributed by David S. Miller (davem@caip.rutgers.edu)
 
 This file is part of GCC.
@@ -254,7 +254,7 @@ do {									\
 
 /* DWARF bits.  */
 
-/* Follow Irix 6 and not the Dwarf2 draft in using 64-bit offsets. 
+/* Follow Irix 6 and not the Dwarf2 draft in using 64-bit offsets.
    Obviously the Dwarf2 folks haven't tried to actually build systems
    with their spec.  On a 64-bit system, only 64-bit relocs become
    RELATIVE relocations.  */
diff --git a/gcc/config/sparc/long-double-switch.opt b/gcc/config/sparc/long-double-switch.opt
index 772f918..de2daae 100644
--- a/gcc/config/sparc/long-double-switch.opt
+++ b/gcc/config/sparc/long-double-switch.opt
@@ -1,6 +1,6 @@
 ; Options for the SPARC port of the compiler
 ;
-; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/sparc/m8.md b/gcc/config/sparc/m8.md
index 48d7fb4..47ebedd 100644
--- a/gcc/config/sparc/m8.md
+++ b/gcc/config/sparc/m8.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for the SPARC M8.
-;;   Copyright (C) 2017-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2017-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/sparc/netbsd-elf.h b/gcc/config/sparc/netbsd-elf.h
index 3bb05c5..69369a5 100644
--- a/gcc/config/sparc/netbsd-elf.h
+++ b/gcc/config/sparc/netbsd-elf.h
@@ -1,6 +1,6 @@
 /* Definitions of target machine for GCC, for ELF on NetBSD/sparc
    and NetBSD/sparc64.
-   Copyright (C) 2002-2024 Free Software Foundation, Inc.
+   Copyright (C) 2002-2025 Free Software Foundation, Inc.
    Contributed by Matthew Green (mrg@eterna.com.au).
 
 This file is part of GCC.
diff --git a/gcc/config/sparc/niagara.md b/gcc/config/sparc/niagara.md
index 5ea4610c..343893c 100644
--- a/gcc/config/sparc/niagara.md
+++ b/gcc/config/sparc/niagara.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for Niagara.
-;;   Copyright (C) 2006-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2006-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/sparc/niagara2.md b/gcc/config/sparc/niagara2.md
index 60ee950..c8f25b8 100644
--- a/gcc/config/sparc/niagara2.md
+++ b/gcc/config/sparc/niagara2.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for Niagara-2 and Niagara-3.
-;;   Copyright (C) 2007-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2007-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/sparc/niagara4.md b/gcc/config/sparc/niagara4.md
index c18d6d1..e63bafd 100644
--- a/gcc/config/sparc/niagara4.md
+++ b/gcc/config/sparc/niagara4.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for Niagara-4
-;;   Copyright (C) 2012-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2012-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/sparc/niagara7.md b/gcc/config/sparc/niagara7.md
index c844315..4a87847 100644
--- a/gcc/config/sparc/niagara7.md
+++ b/gcc/config/sparc/niagara7.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for Niagara-7
-;;   Copyright (C) 2016-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2016-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/sparc/openbsd1-64.h b/gcc/config/sparc/openbsd1-64.h
index 7b17f98..1464f3b 100644
--- a/gcc/config/sparc/openbsd1-64.h
+++ b/gcc/config/sparc/openbsd1-64.h
@@ -1,5 +1,5 @@
 /* Configuration file for sparc64 OpenBSD target.
-   Copyright (C) 1999-2024 Free Software Foundation, Inc.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/sparc/openbsd64.h b/gcc/config/sparc/openbsd64.h
index 19cc20e..b18918b 100644
--- a/gcc/config/sparc/openbsd64.h
+++ b/gcc/config/sparc/openbsd64.h
@@ -1,5 +1,5 @@
 /* Configuration file for sparc64 OpenBSD target.
-   Copyright (C) 1999-2024 Free Software Foundation, Inc.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/sparc/predicates.md b/gcc/config/sparc/predicates.md
index 1be76a6..ce5b9c1 100644
--- a/gcc/config/sparc/predicates.md
+++ b/gcc/config/sparc/predicates.md
@@ -1,5 +1,5 @@
 ;; Predicate definitions for SPARC.
-;; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
@@ -527,3 +527,37 @@
 ;; and (xor ... (not ...)) to (not (xor ...)).
 (define_predicate "cc_arith_not_operator"
   (match_code "and,ior"))
+
+;; Return true if OP is an operator for a vec_cmp pattern
+;; VIS 4 is required for ordering comparisons if the mode is V8QI
+(define_predicate "vec_cmp_operator"
+  (match_operand 0 "comparison_operator")
+{
+  const enum rtx_code code = GET_CODE (op);
+
+  switch (GET_MODE (XEXP (op, 0)))
+    {
+    case V8QImode:
+      return code == EQ || code == NE || TARGET_VIS4;
+
+    default:
+      return true;
+    }
+})
+
+;; Return true if OP is an operator for a vec_cmpu pattern
+;; VIS 4 is required for ordering comparisons if the mode is not V8QI
+(define_predicate "vec_cmpu_operator"
+  (match_operand 0 "comparison_operator")
+{
+  const enum rtx_code code = GET_CODE (op);
+
+  switch (GET_MODE (XEXP (op, 0)))
+    {
+    case V8QImode:
+      return true;
+
+    default:
+      return code == EQ || code == NE || TARGET_VIS4;
+    }
+})
diff --git a/gcc/config/sparc/rtemself.h b/gcc/config/sparc/rtemself.h
index 4116b77..a3018da 100644
--- a/gcc/config/sparc/rtemself.h
+++ b/gcc/config/sparc/rtemself.h
@@ -1,5 +1,5 @@
 /* Definitions for rtems targeting a SPARC using ELF.
-   Copyright (C) 1996-2024 Free Software Foundation, Inc.
+   Copyright (C) 1996-2025 Free Software Foundation, Inc.
    Contributed by Joel Sherrill (joel@OARcorp.com).
 
    This file is part of GCC.
diff --git a/gcc/config/sparc/sol2.h b/gcc/config/sparc/sol2.h
index f0181d5..03a19bf 100644
--- a/gcc/config/sparc/sol2.h
+++ b/gcc/config/sparc/sol2.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GCC, for SPARC running Solaris 2
-   Copyright (C) 1992-2024 Free Software Foundation, Inc.
+   Copyright (C) 1992-2025 Free Software Foundation, Inc.
    Contributed by Ron Guilmette (rfg@netcom.com).
    Additional changes by David V. Henkel-Wallace (gumby@cygnus.com).
 
@@ -377,7 +377,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
 	  && (DECL) && DECL_SIZE (DECL))			\
 	{							\
 	  size_directive_output = 1;				\
-	  size = int_size_in_bytes (TREE_TYPE (DECL));		\
+	  size = tree_to_uhwi (DECL_SIZE_UNIT (DECL));		\
 	  ASM_OUTPUT_SIZE_DIRECTIVE (FILE, NAME, size);		\
 	}							\
 								\
diff --git a/gcc/config/sparc/sp-elf.h b/gcc/config/sparc/sp-elf.h
index cc64d5d..1cabc0a 100644
--- a/gcc/config/sparc/sp-elf.h
+++ b/gcc/config/sparc/sp-elf.h
@@ -1,6 +1,6 @@
 /* Definitions of target machine for GCC,
    for SPARC running in an embedded environment using the ELF file format.
-   Copyright (C) 2005-2024 Free Software Foundation, Inc.
+   Copyright (C) 2005-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/sparc/sp64-elf.h b/gcc/config/sparc/sp64-elf.h
index 6eb5b86..2899183 100644
--- a/gcc/config/sparc/sp64-elf.h
+++ b/gcc/config/sparc/sp64-elf.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GCC, for SPARC64, ELF.
-   Copyright (C) 1994-2024 Free Software Foundation, Inc.
+   Copyright (C) 1994-2025 Free Software Foundation, Inc.
    Contributed by Doug Evans, dje@cygnus.com.
 
 This file is part of GCC.
diff --git a/gcc/config/sparc/sparc-c.cc b/gcc/config/sparc/sparc-c.cc
index 176befa..d365da3 100644
--- a/gcc/config/sparc/sparc-c.cc
+++ b/gcc/config/sparc/sparc-c.cc
@@ -1,5 +1,5 @@
 /* Subroutines used for macro/preprocessor support on SPARC.
-   Copyright (C) 2011-2024 Free Software Foundation, Inc.
+   Copyright (C) 2011-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -52,6 +52,11 @@ sparc_target_macros (void)
       cpp_define (parse_in, "__VIS__=0x400");
       cpp_define (parse_in, "__VIS=0x400");
     }
+  else if (TARGET_VIS3B)
+    {
+      cpp_define (parse_in, "__VIS__=0x310");
+      cpp_define (parse_in, "__VIS=0x310");
+    }
   else if (TARGET_VIS3)
     {
       cpp_define (parse_in, "__VIS__=0x300");
diff --git a/gcc/config/sparc/sparc-d.cc b/gcc/config/sparc/sparc-d.cc
index 0bd497e..47385d9 100644
--- a/gcc/config/sparc/sparc-d.cc
+++ b/gcc/config/sparc/sparc-d.cc
@@ -1,5 +1,5 @@
 /* Subroutines for the D front end on the SPARC architecture.
-   Copyright (C) 2017-2024 Free Software Foundation, Inc.
+   Copyright (C) 2017-2025 Free Software Foundation, Inc.
 
 GCC is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
diff --git a/gcc/config/sparc/sparc-d.h b/gcc/config/sparc/sparc-d.h
index 4a01207..9e6ed72 100644
--- a/gcc/config/sparc/sparc-d.h
+++ b/gcc/config/sparc/sparc-d.h
@@ -1,5 +1,5 @@
 /* Definitions for the D front end on the SPARC architecture.
-   Copyright (C) 2022-2024 Free Software Foundation, Inc.
+   Copyright (C) 2022-2025 Free Software Foundation, Inc.
 
 GCC is free software; you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
diff --git a/gcc/config/sparc/sparc-modes.def b/gcc/config/sparc/sparc-modes.def
index 15845de..65da559 100644
--- a/gcc/config/sparc/sparc-modes.def
+++ b/gcc/config/sparc/sparc-modes.def
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GCC, for Sun SPARC.
-   Copyright (C) 2002-2024 Free Software Foundation, Inc.
+   Copyright (C) 2002-2025 Free Software Foundation, Inc.
    Contributed by Michael Tiemann (tiemann@cygnus.com).
    64-bit SPARC V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
    at Cygnus Support.
diff --git a/gcc/config/sparc/sparc-opts.h b/gcc/config/sparc/sparc-opts.h
index 7f98591..c038e27 100644
--- a/gcc/config/sparc/sparc-opts.h
+++ b/gcc/config/sparc/sparc-opts.h
@@ -1,5 +1,5 @@
 /* Definitions for option handling for SPARC.
-   Copyright (C) 1996-2024 Free Software Foundation, Inc.
+   Copyright (C) 1996-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/sparc/sparc-passes.def b/gcc/config/sparc/sparc-passes.def
index a4312aa..bdaed41 100644
--- a/gcc/config/sparc/sparc-passes.def
+++ b/gcc/config/sparc/sparc-passes.def
@@ -1,5 +1,5 @@
 /* Description of target passes for SPARC. 
-   Copyright (C) 2016-2024 Free Software Foundation, Inc.
+   Copyright (C) 2016-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/sparc/sparc-protos.h b/gcc/config/sparc/sparc-protos.h
index 399458a..dad88df 100644
--- a/gcc/config/sparc/sparc-protos.h
+++ b/gcc/config/sparc/sparc-protos.h
@@ -1,5 +1,5 @@
 /* Prototypes of target machine for SPARC.
-   Copyright (C) 1999-2024 Free Software Foundation, Inc.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
    Contributed by Michael Tiemann (tiemann@cygnus.com).
    64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
    at Cygnus Support.
@@ -99,14 +99,14 @@ extern int register_ok_for_ldd (rtx);
 extern int memory_ok_for_ldd (rtx);
 extern int v9_regcmp_p (enum rtx_code);
 /* Function used for V8+ code generation.  Returns 1 if the high
-   32 bits of REG are 0 before INSN.  */   
+   32 bits of REG are 0 before INSN.  */
 extern int sparc_check_64 (rtx, rtx_insn *);
 extern rtx gen_df_reg (rtx, int);
 extern void sparc_expand_compare_and_swap (rtx op[]);
 extern void sparc_expand_vector_init (rtx, rtx);
 extern void sparc_expand_vec_perm_bmask(machine_mode, rtx);
 extern bool sparc_expand_conditional_move (machine_mode, rtx *);
-extern void sparc_expand_vcond (machine_mode, rtx *, int, int);
+extern void sparc_expand_vcond_mask (machine_mode, rtx *, int);
 unsigned int sparc_regmode_natural_size (machine_mode);
 #endif /* RTX_CODE */
 
diff --git a/gcc/config/sparc/sparc.cc b/gcc/config/sparc/sparc.cc
index 3a4c13a..ffd1fb9 100644
--- a/gcc/config/sparc/sparc.cc
+++ b/gcc/config/sparc/sparc.cc
@@ -1,5 +1,5 @@
 /* Subroutines for insn-output.cc for SPARC.
-   Copyright (C) 1987-2024 Free Software Foundation, Inc.
+   Copyright (C) 1987-2025 Free Software Foundation, Inc.
    Contributed by Michael Tiemann (tiemann@cygnus.com)
    64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
    at Cygnus Support.
@@ -61,6 +61,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "builtins.h"
 #include "tree-vector-builder.h"
 #include "opts.h"
+#include "dwarf2out.h"
 
 /* This file should be included last.  */
 #include "target-def.h"
@@ -681,6 +682,9 @@ static rtx sparc_libcall_value (machine_mode, const_rtx);
 static bool sparc_function_value_regno_p (const unsigned int);
 static unsigned HOST_WIDE_INT sparc_asan_shadow_offset (void);
 static void sparc_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
+static bool sparc_output_cfi_directive (FILE *, dw_cfi_ref);
+static bool sparc_dw_cfi_oprnd1_desc (dwarf_call_frame_info,
+				      dw_cfi_oprnd_type &);
 static void sparc_file_end (void);
 static bool sparc_frame_pointer_required (void);
 static bool sparc_can_eliminate (const int, const int);
@@ -693,7 +697,6 @@ static const char *sparc_mangle_type (const_tree);
 static void sparc_trampoline_init (rtx, tree, rtx);
 static machine_mode sparc_preferred_simd_mode (scalar_mode);
 static reg_class_t sparc_preferred_reload_class (rtx x, reg_class_t rclass);
-static bool sparc_lra_p (void);
 static bool sparc_print_operand_punct_valid_p (unsigned char);
 static void sparc_print_operand (FILE *, rtx, int);
 static void sparc_print_operand_address (FILE *, machine_mode, rtx);
@@ -716,6 +719,7 @@ static HOST_WIDE_INT sparc_constant_alignment (const_tree, HOST_WIDE_INT);
 static bool sparc_vectorize_vec_perm_const (machine_mode, machine_mode,
 					    rtx, rtx, rtx,
 					    const vec_perm_indices &);
+static opt_machine_mode sparc_get_mask_mode (machine_mode);
 static bool sparc_can_follow_jump (const rtx_insn *, const rtx_insn *);
 static HARD_REG_SET sparc_zero_call_used_regs (HARD_REG_SET);
 static machine_mode sparc_c_mode_for_floating_type (enum tree_index);
@@ -878,6 +882,12 @@ char sparc_hard_reg_printed[8];
 #define TARGET_ASM_OUTPUT_DWARF_DTPREL sparc_output_dwarf_dtprel
 #endif
 
+#undef TARGET_OUTPUT_CFI_DIRECTIVE
+#define TARGET_OUTPUT_CFI_DIRECTIVE sparc_output_cfi_directive
+
+#undef TARGET_DW_CFI_OPRND1_DESC
+#define TARGET_DW_CFI_OPRND1_DESC sparc_dw_cfi_oprnd1_desc
+
 #undef TARGET_ASM_FILE_END
 #define TARGET_ASM_FILE_END sparc_file_end
 
@@ -911,9 +921,6 @@ char sparc_hard_reg_printed[8];
 #define TARGET_MANGLE_TYPE sparc_mangle_type
 #endif
 
-#undef TARGET_LRA_P
-#define TARGET_LRA_P sparc_lra_p
-
 #undef TARGET_LEGITIMATE_ADDRESS_P
 #define TARGET_LEGITIMATE_ADDRESS_P sparc_legitimate_address_p
 
@@ -966,6 +973,9 @@ char sparc_hard_reg_printed[8];
 #undef TARGET_VECTORIZE_VEC_PERM_CONST
 #define TARGET_VECTORIZE_VEC_PERM_CONST sparc_vectorize_vec_perm_const
 
+#undef TARGET_VECTORIZE_GET_MASK_MODE
+#define TARGET_VECTORIZE_GET_MASK_MODE sparc_get_mask_mode
+
 #undef TARGET_CAN_FOLLOW_JUMP
 #define TARGET_CAN_FOLLOW_JUMP sparc_can_follow_jump
 
@@ -1661,6 +1671,8 @@ dump_target_flag_bits (const int flags)
     fprintf (stderr, "VIS2 ");
   if (flags & MASK_VIS3)
     fprintf (stderr, "VIS3 ");
+  if (flags & MASK_VIS3B)
+    fprintf (stderr, "VIS3B ");
   if (flags & MASK_VIS4)
     fprintf (stderr, "VIS4 ");
   if (flags & MASK_VIS4B)
@@ -1909,19 +1921,23 @@ sparc_option_override (void)
   if (TARGET_VIS3)
     target_flags |= MASK_VIS2 | MASK_VIS;
 
-  /* -mvis4 implies -mvis3, -mvis2 and -mvis.  */
-  if (TARGET_VIS4)
+  /* -mvis3b implies -mvis3, -mvis2 and -mvis.  */
+  if (TARGET_VIS3B)
     target_flags |= MASK_VIS3 | MASK_VIS2 | MASK_VIS;
 
-  /* -mvis4b implies -mvis4, -mvis3, -mvis2 and -mvis */
+  /* -mvis4 implies -mvis3b, -mvis3, -mvis2 and -mvis.  */
+  if (TARGET_VIS4)
+    target_flags |= MASK_VIS3B | MASK_VIS3 | MASK_VIS2 | MASK_VIS;
+
+  /* -mvis4b implies -mvis4, -mvis3b, -mvis3, -mvis2 and -mvis */
   if (TARGET_VIS4B)
-    target_flags |= MASK_VIS4 | MASK_VIS3 | MASK_VIS2 | MASK_VIS;
+    target_flags |= MASK_VIS4 | MASK_VIS3B | MASK_VIS3 | MASK_VIS2 | MASK_VIS;
 
-  /* Don't allow -mvis, -mvis2, -mvis3, -mvis4, -mvis4b, -mfmaf and -mfsmuld if
-     FPU is disabled.  */
+  /* Don't allow -mvis, -mvis2, -mvis3, -mvis3b, -mvis4, -mvis4b, -mfmaf and
+     -mfsmuld if FPU is disabled.  */
   if (!TARGET_FPU)
-    target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_VIS4
-		      | MASK_VIS4B | MASK_FMAF | MASK_FSMULD);
+    target_flags &= ~(MASK_VIS | MASK_VIS2 | MASK_VIS3 | MASK_VIS3B
+		      | MASK_VIS4 | MASK_VIS4B | MASK_FMAF | MASK_FSMULD);
 
   /* -mvis assumes UltraSPARC+, so we are sure v9 instructions
      are available; -m64 also implies v9.  */
@@ -1947,10 +1963,6 @@ sparc_option_override (void)
   if (TARGET_ARCH32)
     target_flags &= ~MASK_STACK_BIAS;
 
-  /* Use LRA instead of reload, unless otherwise instructed.  */
-  if (!(target_flags_explicit & MASK_LRA))
-    target_flags |= MASK_LRA;
-
   /* Enable applicable errata workarounds for LEON3FT.  */
   if (sparc_fix_ut699 || sparc_fix_ut700 || sparc_fix_gr712rc)
     {
@@ -2168,7 +2180,7 @@ sparc_option_override (void)
 			 || sparc_cpu == PROCESSOR_M8)
 			? 128 : (sparc_cpu == PROCESSOR_NIAGARA7
 				 ? 256 : 512)));
-  
+
 
   /* Disable save slot sharing for call-clobbered registers by default.
      The IRA sharing algorithm works on single registers only and this
@@ -4750,8 +4762,7 @@ sparc_legitimize_tls_address (rtx addr)
 					     addr, const1_rtx));
 	use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
 	RTL_CONST_CALL_P (insn) = 1;
-	insn = get_insns ();
-	end_sequence ();
+	insn = end_sequence ();
 	emit_libcall_block (insn, ret, o0, addr);
 	break;
 
@@ -4770,8 +4781,7 @@ sparc_legitimize_tls_address (rtx addr)
 					      const1_rtx));
 	use_reg (&CALL_INSN_FUNCTION_USAGE (insn), o0);
 	RTL_CONST_CALL_P (insn) = 1;
-	insn = get_insns ();
-	end_sequence ();
+	insn = end_sequence ();
 	/* Attach a unique REG_EQUAL, to allow the RTL optimizers to
 	  share the LD_BASE result with other LD model accesses.  */
 	emit_libcall_block (insn, temp3, o0,
@@ -10142,7 +10152,7 @@ supersparc_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
       if (insn_type == TYPE_IALU || insn_type == TYPE_SHIFT)
 	return 0;
     }
-	
+
   return cost;
 }
 
@@ -10384,7 +10394,7 @@ sparc_branch_cost (bool speed_p, bool predictable_p)
       return cost;
     }
 }
-      
+
 static int
 set_extends (rtx_insn *insn)
 {
@@ -11006,7 +11016,7 @@ enum sparc_builtins
   SPARC_BUILTIN_FPCMPUR16SHL,
   SPARC_BUILTIN_FPCMPUR32SHL,
   SPARC_BUILTIN_LAST_FPCMPSHL = SPARC_BUILTIN_FPCMPUR32SHL,
-  
+
   SPARC_BUILTIN_MAX
 };
 
@@ -11269,40 +11279,40 @@ sparc_vis_init_builtins (void)
   /* Pixel compare.  */
   if (TARGET_ARCH64)
     {
-      def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16di_vis,
+      def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fpcmple16di_vis,
 			 SPARC_BUILTIN_FCMPLE16, di_ftype_v4hi_v4hi);
-      def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32di_vis,
+      def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fpcmple32di_vis,
 			 SPARC_BUILTIN_FCMPLE32, di_ftype_v2si_v2si);
-      def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16di_vis,
+      def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fpcmpne16di_vis,
 			 SPARC_BUILTIN_FCMPNE16, di_ftype_v4hi_v4hi);
-      def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32di_vis,
+      def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fpcmpne32di_vis,
 			 SPARC_BUILTIN_FCMPNE32, di_ftype_v2si_v2si);
-      def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16di_vis,
+      def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fpcmpgt16di_vis,
 			 SPARC_BUILTIN_FCMPGT16, di_ftype_v4hi_v4hi);
-      def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32di_vis,
+      def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fpcmpgt32di_vis,
 			 SPARC_BUILTIN_FCMPGT32, di_ftype_v2si_v2si);
-      def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16di_vis,
+      def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fpcmpeq16di_vis,
 			 SPARC_BUILTIN_FCMPEQ16, di_ftype_v4hi_v4hi);
-      def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32di_vis,
+      def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fpcmpeq32di_vis,
 			 SPARC_BUILTIN_FCMPEQ32, di_ftype_v2si_v2si);
     }
   else
     {
-      def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fcmple16si_vis,
+      def_builtin_const ("__builtin_vis_fcmple16", CODE_FOR_fpcmple16si_vis,
 			 SPARC_BUILTIN_FCMPLE16, si_ftype_v4hi_v4hi);
-      def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fcmple32si_vis,
+      def_builtin_const ("__builtin_vis_fcmple32", CODE_FOR_fpcmple32si_vis,
 			 SPARC_BUILTIN_FCMPLE32, si_ftype_v2si_v2si);
-      def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fcmpne16si_vis,
+      def_builtin_const ("__builtin_vis_fcmpne16", CODE_FOR_fpcmpne16si_vis,
 			 SPARC_BUILTIN_FCMPNE16, si_ftype_v4hi_v4hi);
-      def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fcmpne32si_vis,
+      def_builtin_const ("__builtin_vis_fcmpne32", CODE_FOR_fpcmpne32si_vis,
 			 SPARC_BUILTIN_FCMPNE32, si_ftype_v2si_v2si);
-      def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fcmpgt16si_vis,
+      def_builtin_const ("__builtin_vis_fcmpgt16", CODE_FOR_fpcmpgt16si_vis,
 			 SPARC_BUILTIN_FCMPGT16, si_ftype_v4hi_v4hi);
-      def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fcmpgt32si_vis,
+      def_builtin_const ("__builtin_vis_fcmpgt32", CODE_FOR_fpcmpgt32si_vis,
 			 SPARC_BUILTIN_FCMPGT32, si_ftype_v2si_v2si);
-      def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fcmpeq16si_vis,
+      def_builtin_const ("__builtin_vis_fcmpeq16", CODE_FOR_fpcmpeq16si_vis,
 			 SPARC_BUILTIN_FCMPEQ16, si_ftype_v4hi_v4hi);
-      def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fcmpeq32si_vis,
+      def_builtin_const ("__builtin_vis_fcmpeq32", CODE_FOR_fpcmpeq32si_vis,
 			 SPARC_BUILTIN_FCMPEQ32, si_ftype_v2si_v2si);
     }
 
@@ -11445,10 +11455,6 @@ sparc_vis_init_builtins (void)
 
       def_builtin_const ("__builtin_vis_fmean16", CODE_FOR_fmean16_vis,
 			 SPARC_BUILTIN_FMEAN16, v4hi_ftype_v4hi_v4hi);
-      def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
-			 SPARC_BUILTIN_FPADD64, di_ftype_di_di);
-      def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
-			 SPARC_BUILTIN_FPSUB64, di_ftype_di_di);
 
       def_builtin_const ("__builtin_vis_fpadds16", CODE_FOR_ssaddv4hi3,
 			 SPARC_BUILTIN_FPADDS16, v4hi_ftype_v4hi_v4hi);
@@ -11467,29 +11473,6 @@ sparc_vis_init_builtins (void)
       def_builtin_const ("__builtin_vis_fpsubs32s", CODE_FOR_sssubv1si3,
 			 SPARC_BUILTIN_FPSUBS32S, v1si_ftype_v1si_v1si);
 
-      if (TARGET_ARCH64)
-	{
-	  def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8di_vis,
-			     SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi);
-	  def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8di_vis,
-			     SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi);
-	  def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8di_vis,
-			     SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi);
-	  def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8di_vis,
-			     SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi);
-	}
-      else
-	{
-	  def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fucmple8si_vis,
-			     SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi);
-	  def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fucmpne8si_vis,
-			     SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi);
-	  def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fucmpgt8si_vis,
-			     SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi);
-	  def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fucmpeq8si_vis,
-			     SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi);
-	}
-
       def_builtin_const ("__builtin_vis_fhadds", CODE_FOR_fhaddsf_vis,
 			 SPARC_BUILTIN_FHADDS, sf_ftype_sf_sf);
       def_builtin_const ("__builtin_vis_fhaddd", CODE_FOR_fhadddf_vis,
@@ -11511,6 +11494,37 @@ sparc_vis_init_builtins (void)
 			 SPARC_BUILTIN_XMULXHI, di_ftype_di_di);
     }
 
+  if (TARGET_VIS3B)
+    {
+      def_builtin_const ("__builtin_vis_fpadd64", CODE_FOR_fpadd64_vis,
+			 SPARC_BUILTIN_FPADD64, di_ftype_di_di);
+      def_builtin_const ("__builtin_vis_fpsub64", CODE_FOR_fpsub64_vis,
+			 SPARC_BUILTIN_FPSUB64, di_ftype_di_di);
+
+      if (TARGET_ARCH64)
+	{
+	  def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fpcmpule8di_vis,
+			     SPARC_BUILTIN_FUCMPLE8, di_ftype_v8qi_v8qi);
+	  def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fpcmpne8di_vis,
+			     SPARC_BUILTIN_FUCMPNE8, di_ftype_v8qi_v8qi);
+	  def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fpcmpugt8di_vis,
+			     SPARC_BUILTIN_FUCMPGT8, di_ftype_v8qi_v8qi);
+	  def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fpcmpeq8di_vis,
+			     SPARC_BUILTIN_FUCMPEQ8, di_ftype_v8qi_v8qi);
+	}
+      else
+	{
+	  def_builtin_const ("__builtin_vis_fucmple8", CODE_FOR_fpcmpule8si_vis,
+			     SPARC_BUILTIN_FUCMPLE8, si_ftype_v8qi_v8qi);
+	  def_builtin_const ("__builtin_vis_fucmpne8", CODE_FOR_fpcmpne8si_vis,
+			     SPARC_BUILTIN_FUCMPNE8, si_ftype_v8qi_v8qi);
+	  def_builtin_const ("__builtin_vis_fucmpgt8", CODE_FOR_fpcmpugt8si_vis,
+			     SPARC_BUILTIN_FUCMPGT8, si_ftype_v8qi_v8qi);
+	  def_builtin_const ("__builtin_vis_fucmpeq8", CODE_FOR_fpcmpeq8si_vis,
+			     SPARC_BUILTIN_FUCMPEQ8, si_ftype_v8qi_v8qi);
+	}
+    }
+
   if (TARGET_VIS4)
     {
       def_builtin_const ("__builtin_vis_fpadd8", CODE_FOR_addv8qi3,
@@ -11553,7 +11567,7 @@ sparc_vis_init_builtins (void)
 	  def_builtin_const ("__builtin_vis_fpcmpugt32", CODE_FOR_fpcmpugt32si_vis,
 			     SPARC_BUILTIN_FPCMPUGT32, di_ftype_v2si_v2si);
 	}
-      
+
       def_builtin_const ("__builtin_vis_fpmax8", CODE_FOR_maxv8qi3,
 			 SPARC_BUILTIN_FPMAX8, v8qi_ftype_v8qi_v8qi);
       def_builtin_const ("__builtin_vis_fpmax16", CODE_FOR_maxv4hi3,
@@ -11608,7 +11622,7 @@ sparc_vis_init_builtins (void)
 	  tree di_ftype_v2si_v2si_si = build_function_type_list (intDI_type_node,
 								 v2si, v2si,
 								 intSI_type_node, 0);
-	  
+
 	  def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8dishl,
 			     SPARC_BUILTIN_FPCMPLE8SHL, di_ftype_v8qi_v8qi_si);
 	  def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8dishl,
@@ -11678,7 +11692,7 @@ sparc_vis_init_builtins (void)
 	  tree si_ftype_v2si_v2si_si = build_function_type_list (intSI_type_node,
 								 v2si, v2si,
 								 intSI_type_node, 0);
-	  
+
 	  def_builtin_const ("__builtin_vis_fpcmple8shl", CODE_FOR_fpcmple8sishl,
 			     SPARC_BUILTIN_FPCMPLE8SHL, si_ftype_v8qi_v8qi_si);
 	  def_builtin_const ("__builtin_vis_fpcmpgt8shl", CODE_FOR_fpcmpgt8sishl,
@@ -12514,8 +12528,7 @@ sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
 	  if (!TARGET_VXWORKS_RTP)
 	    pic_offset_table_rtx = got_register_rtx;
 	  scratch = sparc_legitimize_pic_address (funexp, scratch);
-	  seq = get_insns ();
-	  end_sequence ();
+	  seq = end_sequence ();
 	  emit_and_preserve (seq, spill_reg, pic_offset_table_rtx);
 	}
       else if (TARGET_ARCH32)
@@ -12541,8 +12554,7 @@ sparc_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
 	      spill_reg = gen_rtx_REG (DImode, 15);  /* %o7 */
 	      start_sequence ();
 	      sparc_emit_set_symbolic_const64 (scratch, funexp, spill_reg);
-	      seq = get_insns ();
-	      end_sequence ();
+	      seq = end_sequence ();
 	      emit_and_preserve (seq, spill_reg, 0);
 	      break;
 
@@ -12621,6 +12633,31 @@ sparc_output_dwarf_dtprel (FILE *file, int size, rtx x)
   fputs (")", file);
 }
 
+/* Implement TARGET_OUTPUT_CFI_DIRECTIVE.  */
+static bool
+sparc_output_cfi_directive (FILE *f, dw_cfi_ref cfi)
+{
+  if (cfi->dw_cfi_opc == DW_CFA_GNU_window_save)
+    {
+      fprintf (f, "\t.cfi_window_save\n");
+      return true;
+    }
+  return false;
+}
+
+/* Implement TARGET_DW_CFI_OPRND1_DESC.  */
+static bool
+sparc_dw_cfi_oprnd1_desc (dwarf_call_frame_info cfi_opc,
+			  dw_cfi_oprnd_type &oprnd_type)
+{
+  if (cfi_opc == DW_CFA_GNU_window_save)
+    {
+      oprnd_type = dw_cfi_oprnd_unused;
+      return true;
+    }
+  return false;
+}
+
 /* Do whatever processing is required at the end of a file.  */
 
 static void
@@ -12995,7 +13032,7 @@ sparc_expand_vec_perm_bmask (machine_mode vmode, rtx sel)
       t_1 = force_reg (SImode, GEN_INT (0x01010101));
       /* sel = { A*2, A*2+1, B*2, B*2+1, ... } */
       break;
-  
+
     case E_V8QImode:
       /* input = xAxBxCxDxExFxGxH */
       sel = expand_simple_binop (DImode, AND, sel,
@@ -13078,6 +13115,14 @@ sparc_vectorize_vec_perm_const (machine_mode vmode, machine_mode op_mode,
   return true;
 }
 
+/* Implement TARGET_VECTORIZE_GET_MASK_MODE.  */
+
+static opt_machine_mode
+sparc_get_mask_mode (machine_mode)
+{
+  return Pmode;
+}
+
 /* Implement TARGET_FRAME_POINTER_REQUIRED.  */
 
 static bool
@@ -13193,8 +13238,7 @@ sparc_init_pic_reg (void)
   load_got_register ();
   if (!TARGET_VXWORKS_RTP)
     emit_move_insn (pic_offset_table_rtx, got_register_rtx);
-  seq = get_insns ();
-  end_sequence ();
+  seq = end_sequence ();
 
   entry_edge = single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
   insert_insn_on_edge (seq, entry_edge);
@@ -13251,14 +13295,6 @@ sparc_preferred_reload_class (rtx x, reg_class_t rclass)
   return rclass;
 }
 
-/* Return true if we use LRA instead of reload pass.  */
-
-static bool
-sparc_lra_p (void)
-{
-  return TARGET_LRA;
-}
-
 /* Output a wide multiply instruction in V8+ mode.  INSN is the instruction,
    OPERANDS are its operands and OPCODE is the mnemonic to be used.  */
 
@@ -13660,43 +13696,20 @@ sparc_expand_conditional_move (machine_mode mode, rtx *operands)
 }
 
 /* Emit code to conditionally move a combination of OPERANDS[1] and OPERANDS[2]
-   into OPERANDS[0] in MODE, depending on the outcome of the comparison of
-   OPERANDS[4] and OPERANDS[5].  OPERANDS[3] is the operator of the condition.
-   FCODE is the machine code to be used for OPERANDS[3] and CCODE the machine
-   code to be used for the condition mask.  */
+   into OPERANDS[0] in MODE, depending on the mask in OPERANDS[3].  CODE is the
+   machine code to be used for the cmask instruction.  */
 
 void
-sparc_expand_vcond (machine_mode mode, rtx *operands, int ccode, int fcode)
-{
-  enum rtx_code code = signed_condition (GET_CODE (operands[3]));
-  rtx mask, cop0, cop1, fcmp, cmask, bshuf, gsr;
-
-  mask = gen_reg_rtx (Pmode);
-  cop0 = operands[4];
-  cop1 = operands[5];
-  if (code == LT || code == GE)
-    {
-      code = swap_condition (code);
-      std::swap (cop0, cop1);
-    }
-
-  gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
-
-  fcmp = gen_rtx_UNSPEC (Pmode,
-			 gen_rtvec (1, gen_rtx_fmt_ee (code, mode, cop0, cop1)),
-			 fcode);
-
-  cmask = gen_rtx_UNSPEC (DImode,
-			  gen_rtvec (2, mask, gsr),
-			  ccode);
-
-  bshuf = gen_rtx_UNSPEC (mode,
-			  gen_rtvec (3, operands[1], operands[2], gsr),
-			  UNSPEC_BSHUFFLE);
-
-  emit_insn (gen_rtx_SET (mask, fcmp));
+sparc_expand_vcond_mask (machine_mode mode, rtx *operands, int code)
+{
+  rtx gsr = gen_rtx_REG (DImode, SPARC_GSR_REG);
+  rtx cmask = gen_rtx_UNSPEC (DImode,
+			      gen_rtvec (2, operands[3], gsr),
+			      code);
+  rtx bshuf = gen_rtx_UNSPEC (mode,
+			      gen_rtvec (3, operands[1], operands[2], gsr),
+			      UNSPEC_BSHUFFLE);
   emit_insn (gen_rtx_SET (gsr, cmask));
-
   emit_insn (gen_rtx_SET (operands[0], bshuf));
 }
 
diff --git a/gcc/config/sparc/sparc.h b/gcc/config/sparc/sparc.h
index 8612832..68aba07 100644
--- a/gcc/config/sparc/sparc.h
+++ b/gcc/config/sparc/sparc.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler, for Sun SPARC.
-   Copyright (C) 1987-2024 Free Software Foundation, Inc.
+   Copyright (C) 1987-2025 Free Software Foundation, Inc.
    Contributed by Michael Tiemann (tiemann@cygnus.com).
    64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
    at Cygnus Support.
@@ -429,7 +429,7 @@ along with GCC; see the file COPYING3.  If not see
   (MASK_FPU + MASK_HARD_QUAD + MASK_VIS + MASK_VIS2 + MASK_VIS3	\
    + MASK_VIS4 + MASK_CBCOND + MASK_FMAF + MASK_FSMULD		\
    + MASK_POPC + MASK_SUBXC)
- 
+
 /* TARGET_HARD_MUL: Use 32-bit hardware multiply instructions but not %y.  */
 #define TARGET_HARD_MUL				\
   (TARGET_SPARCLITE || TARGET_SPARCLET		\
diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md
index 7363079..c6e06b4 100644
--- a/gcc/config/sparc/sparc.md
+++ b/gcc/config/sparc/sparc.md
@@ -1,5 +1,5 @@
 ;; Machine description for SPARC.
-;; Copyright (C) 1987-2024 Free Software Foundation, Inc.
+;; Copyright (C) 1987-2025 Free Software Foundation, Inc.
 ;; Contributed by Michael Tiemann (tiemann@cygnus.com)
 ;; 64-bit SPARC-V9 support by Michael Tiemann, Jim Wilson, and Doug Evans,
 ;; at Cygnus Support.
@@ -56,7 +56,6 @@
   UNSPEC_MUL8UL
   UNSPEC_MULDUL
   UNSPEC_ALIGNDATA
-  UNSPEC_FCMP
   UNSPEC_PDIST
   UNSPEC_EDGE8
   UNSPEC_EDGE8L
@@ -83,7 +82,6 @@
   UNSPEC_CMASK32
   UNSPEC_FCHKSM16
   UNSPEC_PDISTN
-  UNSPEC_FUCMP
   UNSPEC_FHADD
   UNSPEC_FHSUB
   UNSPEC_XMUL
@@ -97,7 +95,7 @@
 
   UNSPEC_DICTUNPACK
   UNSPEC_FPCMPSHL
-  UNSPEC_FPUCMPSHL
+  UNSPEC_FPCMPUSHL
   UNSPEC_FPCMPDESHL
   UNSPEC_FPCMPURSHL
 ])
@@ -265,12 +263,8 @@
 (define_attr "cpu_feature" "none,fpu,fpunotv9,v9,vis,vis3,vis4,vis4b"
   (const_string "none"))
 
-(define_attr "lra" "disabled,enabled"
-  (const_string "enabled"))
-
 (define_attr "enabled" ""
-  (cond [(eq_attr "cpu_feature" "none")
-           (cond [(eq_attr "lra" "disabled") (symbol_ref "!TARGET_LRA")] (const_int 1))
+  (cond [(eq_attr "cpu_feature" "none") (const_int 1)
          (eq_attr "cpu_feature" "fpu") (symbol_ref "TARGET_FPU")
          (eq_attr "cpu_feature" "fpunotv9") (symbol_ref "TARGET_FPU && !TARGET_V9")
          (eq_attr "cpu_feature" "v9") (symbol_ref "TARGET_V9")
@@ -1835,9 +1829,9 @@
 
 (define_insn "*movdi_insn_sp32"
   [(set (match_operand:DI 0 "nonimmediate_operand"
-			    "=T,o,U,T,r,o,r,r,?*f,  T,?*f,  o,?*e,?*e,  r,?*f,?*e,  T,*b,*b")
+			    "=T,o,r,o,r,r,?*f,  T,?*f,  o,?*e,?*e,  r,?*f,?*e,  T,*b,*b")
         (match_operand:DI 1 "input_operand"
-			    " J,J,T,U,o,r,i,r,  T,?*f,  o,?*f, *e, *e,?*f,  r,  T,?*e, J, P"))]
+			    " J,J,o,r,i,r,  T,?*f,  o,?*f, *e, *e,?*f,  r,  T,?*e, J, P"))]
   "TARGET_ARCH32
    && (register_operand (operands[0], DImode)
        || register_or_zero_operand (operands[1], DImode))"
@@ -1846,8 +1840,6 @@
    #
    ldd\t%1, %0
    std\t%1, %0
-   ldd\t%1, %0
-   std\t%1, %0
    #
    #
    ldd\t%1, %0
@@ -1862,13 +1854,11 @@
    std\t%1, %0
    fzero\t%0
    fone\t%0"
-  [(set_attr "type" "store,*,load,store,load,store,*,*,fpload,fpstore,*,*,fpmove,*,*,*,fpload,fpstore,visl,
-visl")
-   (set_attr "subtype" "*,*,regular,*,regular,*,*,*,*,*,*,*,*,*,*,*,*,*,double,double")
-   (set_attr "length" "*,2,*,*,*,*,2,2,*,*,2,2,*,2,2,2,*,*,*,*")
-   (set_attr "fptype" "*,*,*,*,*,*,*,*,*,*,*,*,double,*,*,*,*,*,double,double")
-   (set_attr "cpu_feature" "v9,*,*,*,*,*,*,*,fpu,fpu,fpu,fpu,v9,fpunotv9,vis3,vis3,fpu,fpu,vis,vis")
-   (set_attr "lra" "*,*,disabled,disabled,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*,*")])
+  [(set_attr "type" "store,*,load,store,*,*,fpload,fpstore,*,*,fpmove,*,*,*,fpload,fpstore,visl,visl")
+   (set_attr "subtype" "*,*,regular,*,*,*,*,*,*,*,*,*,*,*,*,*,double,double")
+   (set_attr "length" "*,2,*,*,2,2,*,*,2,2,*,2,2,2,*,*,*,*")
+   (set_attr "fptype" "*,*,*,*,*,*,*,*,*,*,double,*,*,*,*,*,double,double")
+   (set_attr "cpu_feature" "v9,*,*,*,*,*,fpu,fpu,fpu,fpu,v9,fpunotv9,vis3,vis3,fpu,fpu,vis,vis")])
 
 (define_insn "*movdi_insn_sp64"
   [(set (match_operand:DI 0 "nonimmediate_operand" "=r,r,r, m, r,*e,?*e,?*e,  m,b,b")
@@ -2468,9 +2458,9 @@ visl")
 
 (define_insn "*movdf_insn_sp32"
   [(set (match_operand:DF 0 "nonimmediate_operand"
-			    "=T,o,b,b,e,e,*r, f,  e,T,U,T,  f,o, *r,*r, o")
+			    "=T,o,b,b,e,e,*r, f,  e,T,  f,o, *r,*r, o")
 	(match_operand:DF 1 "input_operand"
-			    " G,G,G,C,e,e, f,*r,T#F,e,T,U,o#F,f,*rF, o,*r"))]
+			    " G,G,G,C,e,e, f,*r,T#F,e,o#F,f,*rF, o,*r"))]
   "TARGET_ARCH32
    && (register_operand (operands[0], DFmode)
        || register_or_zero_or_all_ones_operand (operands[1], DFmode))"
@@ -2485,19 +2475,16 @@ visl")
   #
   ldd\t%1, %0
   std\t%1, %0
-  ldd\t%1, %0
-  std\t%1, %0
   #
   #
   #
   ldd\t%1, %0
   std\t%1, %0"
-  [(set_attr "type" "store,*,visl,visl,fpmove,*,*,*,fpload,fpstore,load,store,*,*,*,load,store")
-   (set_attr "subtype" "*,*,double,double,*,*,*,*,*,*,regular,*,*,*,*,regular,*")
-   (set_attr "length" "*,2,*,*,*,2,2,2,*,*,*,*,2,2,2,*,*")
-   (set_attr "fptype" "*,*,double,double,double,*,*,*,*,*,*,*,*,*,*,*,*")
-   (set_attr "cpu_feature" "v9,*,vis,vis,v9,fpunotv9,vis3,vis3,fpu,fpu,*,*,fpu,fpu,*,*,*")
-   (set_attr "lra" "*,*,*,*,*,*,*,*,*,*,disabled,disabled,*,*,*,*,*")])
+  [(set_attr "type" "store,*,visl,visl,fpmove,*,*,*,fpload,fpstore,*,*,*,load,store")
+   (set_attr "subtype" "*,*,double,double,*,*,*,*,*,*,*,*,*,regular,*")
+   (set_attr "length" "*,2,*,*,*,2,2,2,*,*,2,2,2,*,*")
+   (set_attr "fptype" "*,*,double,double,double,*,*,*,*,*,*,*,*,*,*")
+   (set_attr "cpu_feature" "v9,*,vis,vis,v9,fpunotv9,vis3,vis3,fpu,fpu,fpu,fpu,*,*,*")])
 
 (define_insn "*movdf_insn_sp64"
   [(set (match_operand:DF 0 "nonimmediate_operand" "=b,b,e,*r, e,  e,m, *r,*r,  m,*r")
@@ -3027,17 +3014,18 @@ visl")
   rtx shift_16 = GEN_INT (16);
   int op1_subbyte = 0;
 
-  if (GET_CODE (operand1) == SUBREG)
+  if (GET_CODE (operands[1]) == SUBREG)
     {
-      op1_subbyte = SUBREG_BYTE (operand1);
+      op1_subbyte = SUBREG_BYTE (operands[1]);
       op1_subbyte /= GET_MODE_SIZE (SImode);
       op1_subbyte *= GET_MODE_SIZE (SImode);
-      operand1 = XEXP (operand1, 0);
+      operands[1] = XEXP (operands[1], 0);
     }
 
-  emit_insn (gen_ashlsi3 (temp, gen_rtx_SUBREG (SImode, operand1, op1_subbyte),
+  emit_insn (gen_ashlsi3 (temp, gen_rtx_SUBREG (SImode, operands[1],
+						op1_subbyte),
 			  shift_16));
-  emit_insn (gen_lshrsi3 (operand0, temp, shift_16));
+  emit_insn (gen_lshrsi3 (operands[0], temp, shift_16));
   DONE;
 })
 
@@ -3110,17 +3098,18 @@ visl")
   rtx shift_48 = GEN_INT (48);
   int op1_subbyte = 0;
 
-  if (GET_CODE (operand1) == SUBREG)
+  if (GET_CODE (operands[1]) == SUBREG)
     {
-      op1_subbyte = SUBREG_BYTE (operand1);
+      op1_subbyte = SUBREG_BYTE (operands[1]);
       op1_subbyte /= GET_MODE_SIZE (DImode);
       op1_subbyte *= GET_MODE_SIZE (DImode);
-      operand1 = XEXP (operand1, 0);
+      operands[1] = XEXP (operands[1], 0);
     }
 
-  emit_insn (gen_ashldi3 (temp, gen_rtx_SUBREG (DImode, operand1, op1_subbyte),
+  emit_insn (gen_ashldi3 (temp, gen_rtx_SUBREG (DImode, operands[1],
+						op1_subbyte),
 			  shift_48));
-  emit_insn (gen_lshrdi3 (operand0, temp, shift_48));
+  emit_insn (gen_lshrdi3 (operands[0], temp, shift_48));
   DONE;
 })
 
@@ -3296,17 +3285,18 @@ visl")
   rtx shift_16 = GEN_INT (16);
   int op1_subbyte = 0;
 
-  if (GET_CODE (operand1) == SUBREG)
+  if (GET_CODE (operands[1]) == SUBREG)
     {
-      op1_subbyte = SUBREG_BYTE (operand1);
+      op1_subbyte = SUBREG_BYTE (operands[1]);
       op1_subbyte /= GET_MODE_SIZE (SImode);
       op1_subbyte *= GET_MODE_SIZE (SImode);
-      operand1 = XEXP (operand1, 0);
+      operands[1] = XEXP (operands[1], 0);
     }
 
-  emit_insn (gen_ashlsi3 (temp, gen_rtx_SUBREG (SImode, operand1, op1_subbyte),
+  emit_insn (gen_ashlsi3 (temp, gen_rtx_SUBREG (SImode, operands[1],
+						op1_subbyte),
 			  shift_16));
-  emit_insn (gen_ashrsi3 (operand0, temp, shift_16));
+  emit_insn (gen_ashrsi3 (operands[0], temp, shift_16));
   DONE;
 })
 
@@ -3328,25 +3318,26 @@ visl")
   int op1_subbyte = 0;
   int op0_subbyte = 0;
 
-  if (GET_CODE (operand1) == SUBREG)
+  if (GET_CODE (operands[1]) == SUBREG)
     {
-      op1_subbyte = SUBREG_BYTE (operand1);
+      op1_subbyte = SUBREG_BYTE (operands[1]);
       op1_subbyte /= GET_MODE_SIZE (SImode);
       op1_subbyte *= GET_MODE_SIZE (SImode);
-      operand1 = XEXP (operand1, 0);
+      operands[1] = XEXP (operands[1], 0);
     }
-  if (GET_CODE (operand0) == SUBREG)
+  if (GET_CODE (operands[0]) == SUBREG)
     {
-      op0_subbyte = SUBREG_BYTE (operand0);
+      op0_subbyte = SUBREG_BYTE (operands[0]);
       op0_subbyte /= GET_MODE_SIZE (SImode);
       op0_subbyte *= GET_MODE_SIZE (SImode);
-      operand0 = XEXP (operand0, 0);
+      operands[0] = XEXP (operands[0], 0);
     }
-  emit_insn (gen_ashlsi3 (temp, gen_rtx_SUBREG (SImode, operand1, op1_subbyte),
+  emit_insn (gen_ashlsi3 (temp, gen_rtx_SUBREG (SImode, operands[1],
+						op1_subbyte),
 			  shift_24));
-  if (GET_MODE (operand0) != SImode)
-    operand0 = gen_rtx_SUBREG (SImode, operand0, op0_subbyte);
-  emit_insn (gen_ashrsi3 (operand0, temp, shift_24));
+  if (GET_MODE (operands[0]) != SImode)
+    operands[0] = gen_rtx_SUBREG (SImode, operands[0], op0_subbyte);
+  emit_insn (gen_ashrsi3 (operands[0], temp, shift_24));
   DONE;
 })
 
@@ -3367,17 +3358,18 @@ visl")
   rtx shift_24 = GEN_INT (24);
   int op1_subbyte = 0;
 
-  if (GET_CODE (operand1) == SUBREG)
+  if (GET_CODE (operands[1]) == SUBREG)
     {
-      op1_subbyte = SUBREG_BYTE (operand1);
+      op1_subbyte = SUBREG_BYTE (operands[1]);
       op1_subbyte /= GET_MODE_SIZE (SImode);
       op1_subbyte *= GET_MODE_SIZE (SImode);
-      operand1 = XEXP (operand1, 0);
+      operands[1] = XEXP (operands[1], 0);
     }
 
-  emit_insn (gen_ashlsi3 (temp, gen_rtx_SUBREG (SImode, operand1, op1_subbyte),
+  emit_insn (gen_ashlsi3 (temp, gen_rtx_SUBREG (SImode, operands[1],
+						op1_subbyte),
 			  shift_24));
-  emit_insn (gen_ashrsi3 (operand0, temp, shift_24));
+  emit_insn (gen_ashrsi3 (operands[0], temp, shift_24));
   DONE;
 })
 
@@ -3398,17 +3390,18 @@ visl")
   rtx shift_56 = GEN_INT (56);
   int op1_subbyte = 0;
 
-  if (GET_CODE (operand1) == SUBREG)
+  if (GET_CODE (operands[1]) == SUBREG)
     {
-      op1_subbyte = SUBREG_BYTE (operand1);
+      op1_subbyte = SUBREG_BYTE (operands[1]);
       op1_subbyte /= GET_MODE_SIZE (DImode);
       op1_subbyte *= GET_MODE_SIZE (DImode);
-      operand1 = XEXP (operand1, 0);
+      operands[1] = XEXP (operands[1], 0);
     }
 
-  emit_insn (gen_ashldi3 (temp, gen_rtx_SUBREG (DImode, operand1, op1_subbyte),
+  emit_insn (gen_ashldi3 (temp, gen_rtx_SUBREG (DImode, operands[1],
+						op1_subbyte),
 			  shift_56));
-  emit_insn (gen_ashrdi3 (operand0, temp, shift_56));
+  emit_insn (gen_ashrdi3 (operands[0], temp, shift_56));
   DONE;
 })
 
@@ -3429,17 +3422,18 @@ visl")
   rtx shift_48 = GEN_INT (48);
   int op1_subbyte = 0;
 
-  if (GET_CODE (operand1) == SUBREG)
+  if (GET_CODE (operands[1]) == SUBREG)
     {
-      op1_subbyte = SUBREG_BYTE (operand1);
+      op1_subbyte = SUBREG_BYTE (operands[1]);
       op1_subbyte /= GET_MODE_SIZE (DImode);
       op1_subbyte *= GET_MODE_SIZE (DImode);
-      operand1 = XEXP (operand1, 0);
+      operands[1] = XEXP (operands[1], 0);
     }
 
-  emit_insn (gen_ashldi3 (temp, gen_rtx_SUBREG (DImode, operand1, op1_subbyte),
+  emit_insn (gen_ashldi3 (temp, gen_rtx_SUBREG (DImode, operands[1],
+						op1_subbyte),
 			  shift_48));
-  emit_insn (gen_ashrdi3 (operand0, temp, shift_48));
+  emit_insn (gen_ashrdi3 (operands[0], temp, shift_48));
   DONE;
 })
 
@@ -7845,9 +7839,9 @@ visl")
   int read_or_write = INTVAL (operands[1]);
   int locality = INTVAL (operands[2]);
 
-  gcc_assert (read_or_write == 0 || read_or_write == 1);
-  gcc_assert (locality >= 0 && locality < 4);
-  return prefetch_instr [read_or_write][locality == 0 ? 0 : 1];
+  gcc_assert (IN_RANGE (read_or_write, 0, 2));
+  gcc_assert (IN_RANGE (locality, 0, 3));
+  return prefetch_instr [read_or_write & 1][locality == 0 ? 0 : 1];
 }
   [(set_attr "type" "load")
    (set_attr "subtype" "prefetch")])
@@ -7871,9 +7865,9 @@ visl")
   int read_or_write = INTVAL (operands[1]);
   int locality = INTVAL (operands[2]);
 
-  gcc_assert (read_or_write == 0 || read_or_write == 1);
-  gcc_assert (locality >= 0 && locality < 4);
-  return prefetch_instr [read_or_write][locality == 0 ? 0 : 1];
+  gcc_assert (IN_RANGE (read_or_write, 0, 2));
+  gcc_assert (IN_RANGE (locality, 0, 3));
+  return prefetch_instr [read_or_write & 1][locality == 0 ? 0 : 1];
 }
   [(set_attr "type" "load")
    (set_attr "subtype" "prefetch")])
@@ -8499,9 +8493,9 @@ visl")
 
 (define_insn "*mov<VM64:mode>_insn_sp32"
   [(set (match_operand:VM64 0 "nonimmediate_operand"
-			      "=T,o,e,e,e,*r, f,e,T,U,T,f,o,*r,*r, o")
+			      "=T,o,e,e,e,*r, f,e,T,f,o,*r,*r, o")
 	(match_operand:VM64 1 "input_operand"
-			      " Y,Y,Y,Z,e, f,*r,T,e,T,U,o,f,*r, o,*r"))]
+			      " Y,Y,Y,Z,e, f,*r,T,e,o,f,*r, o,*r"))]
   "TARGET_VIS
    && TARGET_ARCH32
    && (register_operand (operands[0], <VM64:MODE>mode)
@@ -8516,18 +8510,15 @@ visl")
   #
   ldd\t%1, %0
   std\t%1, %0
-  ldd\t%1, %0
-  std\t%1, %0
   #
   #
   #
   ldd\t%1, %0
   std\t%1, %0"
-  [(set_attr "type" "store,*,visl,visl,vismv,*,*,fpload,fpstore,load,store,*,*,*,load,store")
-   (set_attr "subtype" "*,*,double,double,double,*,*,*,*,regular,*,*,*,*,regular,*")
-   (set_attr "length" "*,2,*,*,*,2,2,*,*,*,*,2,2,2,*,*")
-   (set_attr "cpu_feature" "*,*,vis,vis,vis,vis3,vis3,*,*,*,*,*,*,*,*,*")
-   (set_attr "lra" "*,*,*,*,*,*,*,*,*,disabled,disabled,*,*,*,*,*")])
+  [(set_attr "type" "store,*,visl,visl,vismv,*,*,fpload,fpstore,*,*,*,load,store")
+   (set_attr "subtype" "*,*,double,double,double,*,*,*,*,*,*,*,regular,*")
+   (set_attr "length" "*,2,*,*,*,2,2,*,*,2,2,2,*,*")
+   (set_attr "cpu_feature" "*,*,vis,vis,vis,vis3,vis3,*,*,*,*,*,*,*")])
 
 (define_split
   [(set (match_operand:VM64 0 "register_operand" "")
@@ -8998,95 +8989,70 @@ visl")
   "edge32l\t%r1, %r2, %0"
   [(set_attr "type" "edge")])
 
-(define_code_iterator gcond [le ne gt eq])
-(define_mode_iterator GCM [V4HI V2SI])
-(define_mode_attr gcm_name [(V4HI "16") (V2SI "32")])
+(define_mode_iterator FPCMP [V8QI V4HI V2SI])
+(define_code_iterator fpcmpcond [eq ne le gt])
+(define_code_iterator fpcmpucond [leu gtu])
+(define_code_attr signed_code [(leu "le") (gtu "gt")])
 
-(define_insn "fcmp<gcond:code><GCM:gcm_name><P:mode>_vis"
+(define_insn "fpcmp<fpcmpcond:code><FPCMP:vbits><P:mode>_vis"
   [(set (match_operand:P 0 "register_operand" "=r")
-  	(unspec:P [(gcond:GCM (match_operand:GCM 1 "register_operand" "e")
-		              (match_operand:GCM 2 "register_operand" "e"))]
-	 UNSPEC_FCMP))]
-  "TARGET_VIS"
-  "fcmp<gcond:code><GCM:gcm_name>\t%1, %2, %0"
-  [(set_attr "type" "viscmp")])
-
-(define_insn "fpcmp<gcond:code>8<P:mode>_vis"
-  [(set (match_operand:P 0 "register_operand" "=r")
-  	(unspec:P [(gcond:V8QI (match_operand:V8QI 1 "register_operand" "e")
-		               (match_operand:V8QI 2 "register_operand" "e"))]
-	 UNSPEC_FCMP))]
-  "TARGET_VIS4"
-  "fpcmp<gcond:code>8\t%1, %2, %0"
+	(fpcmpcond:P (match_operand:FPCMP 1 "register_operand" "e")
+		     (match_operand:FPCMP 2 "register_operand" "e")))]
+  "TARGET_VIS
+   && (<FPCMP:MODE>mode != V8QImode
+       || (TARGET_VIS3B && (<fpcmpcond:CODE> == EQ || <fpcmpcond:CODE> == NE))
+       || TARGET_VIS4)"
+  "fpcmp<fpcmpcond:code><FPCMP:vbits>\t%1, %2, %0"
   [(set_attr "type" "viscmp")])
 
-(define_expand "vcond<GCM:mode><GCM:mode>"
-  [(match_operand:GCM 0 "register_operand" "")
-   (match_operand:GCM 1 "register_operand" "")
-   (match_operand:GCM 2 "register_operand" "")
-   (match_operator 3 ""
-     [(match_operand:GCM 4 "register_operand" "")
-      (match_operand:GCM 5 "register_operand" "")])]
-  "TARGET_VIS3"
+(define_expand "vec_cmp<FPCMP:mode><P:mode>"
+  [(set (match_operand:P 0 "register_operand" "")
+        (match_operator:P 1 "vec_cmp_operator"
+          [(match_operand:FPCMP 2 "register_operand" "")
+           (match_operand:FPCMP 3 "register_operand" "")]))]
+  "TARGET_VIS3B"
 {
-  sparc_expand_vcond (<MODE>mode, operands, UNSPEC_CMASK<gcm_name>, UNSPEC_FCMP);
-  DONE;
-})
+  enum rtx_code code = GET_CODE (operands[1]);
 
-(define_expand "vcondv8qiv8qi"
-  [(match_operand:V8QI 0 "register_operand" "")
-   (match_operand:V8QI 1 "register_operand" "")
-   (match_operand:V8QI 2 "register_operand" "")
-   (match_operator 3 ""
-     [(match_operand:V8QI 4 "register_operand" "")
-      (match_operand:V8QI 5 "register_operand" "")])]
-  "TARGET_VIS4"
-{
-  sparc_expand_vcond (V8QImode, operands, UNSPEC_CMASK8, UNSPEC_FCMP);
-  DONE;
+  if (code == LT || code == GE)
+    {
+      PUT_CODE (operands[1], swap_condition (code));
+      std::swap (operands[2], operands[3]);
+    }
 })
 
-(define_insn "fucmp<gcond:code>8<P:mode>_vis"
+(define_insn "fpcmpu<fpcmpucond:signed_code><FPCMP:vbits><P:mode>_vis"
   [(set (match_operand:P 0 "register_operand" "=r")
-	(unspec:P [(gcond:V8QI (match_operand:V8QI 1 "register_operand" "e")
-		               (match_operand:V8QI 2 "register_operand" "e"))]
-	 UNSPEC_FUCMP))]
-  "TARGET_VIS3"
-  "fucmp<gcond:code>8\t%1, %2, %0"
+	(fpcmpucond:P (match_operand:FPCMP 1 "register_operand" "e")
+		      (match_operand:FPCMP 2 "register_operand" "e")))]
+  "TARGET_VIS3B && (<FPCMP:MODE>mode == V8QImode || TARGET_VIS4)"
+  "fpcmpu<fpcmpucond:signed_code><FPCMP:vbits>\t%1, %2, %0"
   [(set_attr "type" "viscmp")])
 
-(define_insn "fpcmpu<gcond:code><GCM:gcm_name><P:mode>_vis"
-  [(set (match_operand:P 0 "register_operand" "=r")
-	(unspec:P [(gcond:GCM (match_operand:GCM 1 "register_operand" "e")
-		              (match_operand:GCM 2 "register_operand" "e"))]
-	 UNSPEC_FUCMP))]
-  "TARGET_VIS4"
-  "fpcmpu<gcond:code><GCM:gcm_name>\t%1, %2, %0"
-  [(set_attr "type" "viscmp")])
-
-(define_expand "vcondu<GCM:mode><GCM:mode>"
-  [(match_operand:GCM 0 "register_operand" "")
-   (match_operand:GCM 1 "register_operand" "")
-   (match_operand:GCM 2 "register_operand" "")
-   (match_operator 3 ""
-     [(match_operand:GCM 4 "register_operand" "")
-      (match_operand:GCM 5 "register_operand" "")])]
-  "TARGET_VIS4"
+(define_expand "vec_cmpu<FPCMP:mode><P:mode>"
+  [(set (match_operand:P 0 "register_operand" "")
+        (match_operator:P 1 "vec_cmpu_operator"
+          [(match_operand:FPCMP 2 "register_operand" "")
+           (match_operand:FPCMP 3 "register_operand" "")]))]
+  "TARGET_VIS3B"
 {
-  sparc_expand_vcond (<MODE>mode, operands, UNSPEC_CMASK<gcm_name>, UNSPEC_FUCMP);
-  DONE;
+  enum rtx_code code = GET_CODE (operands[1]);
+
+  if (code == LTU || code == GEU)
+    {
+      PUT_CODE (operands[1], swap_condition (code));
+      std::swap (operands[2], operands[3]);
+    }
 })
 
-(define_expand "vconduv8qiv8qi"
-  [(match_operand:V8QI 0 "register_operand" "")
-   (match_operand:V8QI 1 "register_operand" "")
-   (match_operand:V8QI 2 "register_operand" "")
-   (match_operator 3 ""
-     [(match_operand:V8QI 4 "register_operand" "")
-      (match_operand:V8QI 5 "register_operand" "")])]
-  "TARGET_VIS3"
+(define_expand "vcond_mask_<FPCMP:mode><P:mode>"
+  [(match_operand:FPCMP 0 "register_operand" "")
+   (match_operand:FPCMP 1 "register_operand" "")
+   (match_operand:FPCMP 2 "register_operand" "")
+   (match_operand:P     3 "register_operand" "")]
+  "TARGET_VIS3B"
 {
-  sparc_expand_vcond (V8QImode, operands, UNSPEC_CMASK8, UNSPEC_FUCMP);
+  sparc_expand_vcond_mask (<FPCMP:MODE>mode, operands, UNSPEC_CMASK<vbits>);
   DONE;
 })
 
@@ -9266,6 +9232,8 @@ visl")
 (define_code_attr vis3_shift_patname
   [(ashift "ashl") (ss_ashift "ssashl") (lshiftrt "lshr") (ashiftrt "ashr")])
    
+(define_mode_iterator GCM [V4HI V2SI])
+
 (define_insn "v<vis3_shift_patname><GCM:mode>3"
   [(set (match_operand:GCM 0 "register_operand" "=<vconstr>")
 	(vis3_shift:GCM (match_operand:GCM 1 "register_operand" "<vconstr>")
@@ -9307,7 +9275,7 @@ visl")
   [(set (match_operand:V1DI 0 "register_operand" "=e")
 	(plusminus:V1DI (match_operand:V1DI 1 "register_operand" "e")
 			(match_operand:V1DI 2 "register_operand" "e")))]
-  "TARGET_VIS3"
+  "TARGET_VIS3B"
   "fp<plusminus_insn>64\t%1, %2, %0"
   [(set_attr "type" "fga")
    (set_attr "subtype" "addsub64")])
@@ -9497,10 +9465,14 @@ visl")
   [(set_attr "type" "fp")
    (set_attr "fptype" "double")])
 
-;; VIS4B instructions.
+;; VIS4B instructions (specified in the unpublished OSA 2017)
 
-(define_mode_iterator DUMODE [V2SI V4HI V8QI])
+(define_mode_iterator DUMODE [V8QI V4HI V2SI])
 
+;; Unpack a DUMODE right-justified value from {8,4,2} consecutive bitfields of (opnd 1):
+;; for  0 <= (opnd 2) <= 7 : V8QI value from 8 consecutive bitfields of (opnd 2) + 1 bits
+;; for  8 <= (opnd 2) <= 15: V4HI value from 4 consecutive bitfields of (opnd 2) + 1 bits
+;; for 16 <= (opnd 2) <= 31: V2SI value from 2 consecutive bitfields of (opnd 2) + 1 bits
 (define_insn "dictunpack<DUMODE:vbits>"
   [(set (match_operand:DUMODE 0 "register_operand" "=e")
         (unspec:DUMODE [(match_operand:DF 1 "register_operand" "e")
@@ -9511,48 +9483,53 @@ visl")
   [(set_attr "type" "fga")
    (set_attr "subtype" "other")])
 
-(define_mode_iterator FPCSMODE [V2SI V4HI V8QI])
-(define_code_iterator fpcscond [le gt eq ne])
-(define_code_iterator fpcsucond [le gt])
-
-(define_insn "fpcmp<fpcscond:code><FPCSMODE:vbits><P:mode>shl"
+;; Same as fpcmp but the {8,4,2}-bit result is shifted left by (opnd 3) * {8,4,2}
+(define_insn "fpcmp<fpcmpcond:code><FPCMP:vbits><P:mode>shl"
   [(set (match_operand:P 0 "register_operand" "=r")
-        (unspec:P [(fpcscond:FPCSMODE (match_operand:FPCSMODE 1 "register_operand" "e")
-                                      (match_operand:FPCSMODE 2 "register_operand" "e"))
+        (unspec:P [(fpcmpcond:FPCMP (match_operand:FPCMP 1 "register_operand" "e")
+                                    (match_operand:FPCMP 2 "register_operand" "e"))
                    (match_operand:SI 3 "imm2_operand" "q")]
          UNSPEC_FPCMPSHL))]
    "TARGET_VIS4B"
-   "fpcmp<fpcscond:code><FPCSMODE:vbits>shl\t%1, %2, %3, %0"
+   "fpcmp<fpcmpcond:code><FPCMP:vbits>shl\t%1, %2, %3, %0"
    [(set_attr "type" "viscmp")])
 
-(define_insn "fpcmpu<fpcsucond:code><FPCSMODE:vbits><P:mode>shl"
+;; Same as fpcmpu but the {8,4,2}-bit result is shifted left by (opnd 3) * {8,4,2}
+(define_insn "fpcmpu<fpcmpucond:signed_code><FPCMP:vbits><P:mode>shl"
   [(set (match_operand:P 0 "register_operand" "=r")
-        (unspec:P [(fpcsucond:FPCSMODE (match_operand:FPCSMODE 1 "register_operand" "e")
-                                       (match_operand:FPCSMODE 2 "register_operand" "e"))
+        (unspec:P [(fpcmpucond:FPCMP (match_operand:FPCMP 1 "register_operand" "e")
+                                     (match_operand:FPCMP 2 "register_operand" "e"))
                    (match_operand:SI 3 "imm2_operand" "q")]
-         UNSPEC_FPUCMPSHL))]
+         UNSPEC_FPCMPUSHL))]
    "TARGET_VIS4B"
-   "fpcmpu<fpcsucond:code><FPCSMODE:vbits>shl\t%1, %2, %3, %0"
+   "fpcmpu<fpcmpucond:signed_code><FPCMP:vbits>shl\t%1, %2, %3, %0"
    [(set_attr "type" "viscmp")])
 
-(define_insn "fpcmpde<FPCSMODE:vbits><P:mode>shl"
+;; Dual Equal comparison: the unshifted result is the OR of two EQ comparisons
+;; of (opnd 1) with 1) the 32-bit highpart of (opnd 2) concatenated with itself
+;; and 2) the 32-bit lowpart of (opnd 2) concatenated with itself.
+(define_insn "fpcmpde<FPCMP:vbits><P:mode>shl"
   [(set (match_operand:P 0 "register_operand" "=r")
-        (unspec:P [(match_operand:FPCSMODE 1 "register_operand" "e")
-                   (match_operand:FPCSMODE 2 "register_operand" "e")
+        (unspec:P [(match_operand:FPCMP 1 "register_operand" "e")
+                   (match_operand:FPCMP 2 "register_operand" "e")
                    (match_operand:SI 3 "imm2_operand" "q")]
          UNSPEC_FPCMPDESHL))]
    "TARGET_VIS4B"
-   "fpcmpde<FPCSMODE:vbits>shl\t%1, %2, %3, %0"
+   "fpcmpde<FPCMP:vbits>shl\t%1, %2, %3, %0"
    [(set_attr "type" "viscmp")])
 
-(define_insn "fpcmpur<FPCSMODE:vbits><P:mode>shl"
+;; Unsigned Range comparison: the unshifted result is True if (opnd 1) lies in
+;; partitioned unsigned range (LB,HB) with LB) the 32-bit highpart of (opnd 2)
+;; concatenated with itself and HB) the 32-bit lowpart of (opnd 2) concatenated
+;; with itself.
+(define_insn "fpcmpur<FPCMP:vbits><P:mode>shl"
   [(set (match_operand:P 0 "register_operand" "=r")
-        (unspec:P [(match_operand:FPCSMODE 1 "register_operand" "e")
-                   (match_operand:FPCSMODE 2 "register_operand" "e")
+        (unspec:P [(match_operand:FPCMP 1 "register_operand" "e")
+                   (match_operand:FPCMP 2 "register_operand" "e")
                    (match_operand:SI 3 "imm2_operand" "q")]
          UNSPEC_FPCMPURSHL))]
    "TARGET_VIS4B"
-   "fpcmpur<FPCSMODE:vbits>shl\t%1, %2, %3, %0"
+   "fpcmpur<FPCMP:vbits>shl\t%1, %2, %3, %0"
    [(set_attr "type" "viscmp")])
 
 (include "sync.md")
diff --git a/gcc/config/sparc/sparc.opt b/gcc/config/sparc/sparc.opt
index afede3f..2af18bf 100644
--- a/gcc/config/sparc/sparc.opt
+++ b/gcc/config/sparc/sparc.opt
@@ -1,6 +1,6 @@
 ; Options for the SPARC port of the compiler
 ;
-; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
@@ -57,10 +57,6 @@ msoft-quad-float
 Target RejectNegative InverseMask(HARD_QUAD)
 Do not use hardware quad fp instructions.
 
-mlra
-Target Mask(LRA)
-Enable Local Register Allocation.
-
 mv8plus
 Target Mask(V8PLUS)
 Compile for V8+ ABI.
@@ -77,13 +73,17 @@ mvis3
 Target Mask(VIS3)
 Use UltraSPARC Visual Instruction Set version 3.0 extensions.
 
+mvis3b
+Target Mask(VIS3B)
+Use additional VIS 3 instructions introduced in OSA2011.
+
 mvis4
 Target Mask(VIS4)
 Use UltraSPARC Visual Instruction Set version 4.0 extensions.
 
 mvis4b
 Target Mask(VIS4B)
-Use additional VIS instructions introduced in OSA2017.
+Use additional VIS 4 instructions introduced in OSA2017.
 
 mcbcond
 Target Mask(CBCOND)
diff --git a/gcc/config/sparc/sparc.opt.urls b/gcc/config/sparc/sparc.opt.urls
index 24cc22e..1188f88 100644
--- a/gcc/config/sparc/sparc.opt.urls
+++ b/gcc/config/sparc/sparc.opt.urls
@@ -24,9 +24,6 @@ UrlSuffix(gcc/SPARC-Options.html#index-mhard-quad-float)
 msoft-quad-float
 UrlSuffix(gcc/SPARC-Options.html#index-msoft-quad-float)
 
-mlra
-UrlSuffix(gcc/SPARC-Options.html#index-mlra-3)
-
 mv8plus
 UrlSuffix(gcc/SPARC-Options.html#index-mv8plus)
 
@@ -39,6 +36,9 @@ UrlSuffix(gcc/SPARC-Options.html#index-mvis2)
 mvis3
 UrlSuffix(gcc/SPARC-Options.html#index-mvis3)
 
+mvis3b
+UrlSuffix(gcc/SPARC-Options.html#index-mvis3b)
+
 mvis4
 UrlSuffix(gcc/SPARC-Options.html#index-mvis4)
 
diff --git a/gcc/config/sparc/sparclet.md b/gcc/config/sparc/sparclet.md
index d8256e5..ee0cddb 100644
--- a/gcc/config/sparc/sparclet.md
+++ b/gcc/config/sparc/sparclet.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for SPARClet.
-;;   Copyright (C) 2002-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2002-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/sparc/supersparc.md b/gcc/config/sparc/supersparc.md
index 8c58b2a..cba90f1 100644
--- a/gcc/config/sparc/supersparc.md
+++ b/gcc/config/sparc/supersparc.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for SuperSPARC.
-;;   Copyright (C) 2002-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2002-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/sparc/sync.md b/gcc/config/sparc/sync.md
index fa24990..64ec9be 100644
--- a/gcc/config/sparc/sync.md
+++ b/gcc/config/sparc/sync.md
@@ -1,5 +1,5 @@
 ;; GCC machine description for SPARC synchronization instructions.
-;; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/sparc/sysv4.h b/gcc/config/sparc/sysv4.h
index 391d9b1..77640f8 100644
--- a/gcc/config/sparc/sysv4.h
+++ b/gcc/config/sparc/sysv4.h
@@ -1,5 +1,5 @@
 /* Target definitions for GNU compiler for SPARC running System V.4
-   Copyright (C) 1991-2024 Free Software Foundation, Inc.
+   Copyright (C) 1991-2025 Free Software Foundation, Inc.
    Contributed by Ron Guilmette (rfg@monkeys.com).
 
 This file is part of GCC.
@@ -90,7 +90,7 @@ do { ASM_OUTPUT_ALIGN ((FILE), Pmode == SImode ? 2 : 3);		\
 #define FINI_SECTION_ASM_OP	"\t.section\t\".fini\""
 
 /* Define the pseudo-ops used to switch to the .ctors and .dtors sections.
- 
+
    Note that we want to give these sections the SHF_WRITE attribute
    because these sections will actually contain data (i.e. tables of
    addresses of functions in the current root executable or shared library
@@ -103,7 +103,7 @@ do { ASM_OUTPUT_ALIGN ((FILE), Pmode == SImode ? 2 : 3);		\
    use the `-z text' option when building a shared library, you will get
    errors unless the .ctors and .dtors sections are marked as writable
    via the SHF_WRITE attribute.)  */
- 
+
 #undef CTORS_SECTION_ASM_OP
 #define CTORS_SECTION_ASM_OP    "\t.section\t\".ctors\",#alloc,#write"
 #undef DTORS_SECTION_ASM_OP
diff --git a/gcc/config/sparc/t-elf b/gcc/config/sparc/t-elf
index bbdde5b..217d5f4 100644
--- a/gcc/config/sparc/t-elf
+++ b/gcc/config/sparc/t-elf
@@ -1,4 +1,4 @@
-# Copyright (C) 1997-2024 Free Software Foundation, Inc.
+# Copyright (C) 1997-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/sparc/t-leon b/gcc/config/sparc/t-leon
index 970b90a..b69a0bd 100644
--- a/gcc/config/sparc/t-leon
+++ b/gcc/config/sparc/t-leon
@@ -1,4 +1,4 @@
-# Copyright (C) 2010-2024 Free Software Foundation, Inc.
+# Copyright (C) 2010-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/sparc/t-leon3 b/gcc/config/sparc/t-leon3
index ca7fc25..aee6fac 100644
--- a/gcc/config/sparc/t-leon3
+++ b/gcc/config/sparc/t-leon3
@@ -1,4 +1,4 @@
-# Copyright (C) 2010-2024 Free Software Foundation, Inc.
+# Copyright (C) 2010-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/sparc/t-linux64 b/gcc/config/sparc/t-linux64
index 0455656..ce5dfc7 100644
--- a/gcc/config/sparc/t-linux64
+++ b/gcc/config/sparc/t-linux64
@@ -1,4 +1,4 @@
-# Copyright (C) 1998-2024 Free Software Foundation, Inc.
+# Copyright (C) 1998-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/sparc/t-rtems b/gcc/config/sparc/t-rtems
index df8695e..bebb53f 100644
--- a/gcc/config/sparc/t-rtems
+++ b/gcc/config/sparc/t-rtems
@@ -1,4 +1,4 @@
-# Copyright (C) 2012-2024 Free Software Foundation, Inc.
+# Copyright (C) 2012-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/sparc/t-rtems-64 b/gcc/config/sparc/t-rtems-64
index a459bc2..f66fd1d 100644
--- a/gcc/config/sparc/t-rtems-64
+++ b/gcc/config/sparc/t-rtems-64
@@ -1,4 +1,4 @@
-# Copyright (C) 2012-2024 Free Software Foundation, Inc.
+# Copyright (C) 2012-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/sparc/t-sparc b/gcc/config/sparc/t-sparc
index 9568de6..3c7e780 100644
--- a/gcc/config/sparc/t-sparc
+++ b/gcc/config/sparc/t-sparc
@@ -1,6 +1,6 @@
 # General rules that all sparc/ targets must have.
 #
-# Copyright (C) 2011-2024 Free Software Foundation, Inc.
+# Copyright (C) 2011-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/sparc/tso.h b/gcc/config/sparc/tso.h
index d92087c..3e71bf4 100644
--- a/gcc/config/sparc/tso.h
+++ b/gcc/config/sparc/tso.h
@@ -1,5 +1,5 @@
 /* Include fragment for Sparc TSO operating systems.
-   Copyright (C) 2011-2024 Free Software Foundation, Inc.
+   Copyright (C) 2011-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/sparc/ultra1_2.md b/gcc/config/sparc/ultra1_2.md
index e2feeed..0693735 100644
--- a/gcc/config/sparc/ultra1_2.md
+++ b/gcc/config/sparc/ultra1_2.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for UltraSPARC-I/II.
-;;   Copyright (C) 2002-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2002-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/sparc/ultra3.md b/gcc/config/sparc/ultra3.md
index b729d19..da0e3b9 100644
--- a/gcc/config/sparc/ultra3.md
+++ b/gcc/config/sparc/ultra3.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for UltraSPARC-III.
-;;   Copyright (C) 2002-2024 Free Software Foundation, Inc.
+;;   Copyright (C) 2002-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/sparc/visintrin.h b/gcc/config/sparc/visintrin.h
index 5a260fd..b933c55 100644
--- a/gcc/config/sparc/visintrin.h
+++ b/gcc/config/sparc/visintrin.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2011-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2011-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/sparc/vxworks.h b/gcc/config/sparc/vxworks.h
index 4cdb3b1..67816c9 100644
--- a/gcc/config/sparc/vxworks.h
+++ b/gcc/config/sparc/vxworks.h
@@ -1,6 +1,6 @@
 /* Definitions of target machine for GNU compiler,
    for SPARC targeting the VxWorks run time environment.
-   Copyright (C) 2007-2024 Free Software Foundation, Inc.
+   Copyright (C) 2007-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/stormy16/constraints.md b/gcc/config/stormy16/constraints.md
index 22920d6..50dc594 100644
--- a/gcc/config/stormy16/constraints.md
+++ b/gcc/config/stormy16/constraints.md
@@ -1,5 +1,5 @@
 ;; Constraint definitions for XSTORMY16.
-;; Copyright (C) 2011-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2011-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/stormy16/predicates.md b/gcc/config/stormy16/predicates.md
index 085c9c5..b3f3fb8 100644
--- a/gcc/config/stormy16/predicates.md
+++ b/gcc/config/stormy16/predicates.md
@@ -1,5 +1,5 @@
 ;; Predicate definitions for XSTORMY16.
-;; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/stormy16/stormy-abi b/gcc/config/stormy16/stormy-abi
index 18c01c9..4d54c76 100644
--- a/gcc/config/stormy16/stormy-abi
+++ b/gcc/config/stormy16/stormy-abi
@@ -167,7 +167,7 @@ means that overflow is reported for either signed or unsigned
 overflow.
 
 
-Copyright (C) 2001-2024 Free Software Foundation, Inc.
+Copyright (C) 2001-2025 Free Software Foundation, Inc.
 
 Copying and distribution of this file, with or without modification,
 are permitted in any medium without royalty provided the copyright
diff --git a/gcc/config/stormy16/stormy16-protos.h b/gcc/config/stormy16/stormy16-protos.h
index 1d3a8cf..c988942 100644
--- a/gcc/config/stormy16/stormy16-protos.h
+++ b/gcc/config/stormy16/stormy16-protos.h
@@ -1,5 +1,5 @@
 /* Prototypes for exported functions defined in xstormy16.c
-   Copyright (C) 2000-2024 Free Software Foundation, Inc.
+   Copyright (C) 2000-2025 Free Software Foundation, Inc.
    Contributed by Red Hat, Inc.
 
 This file is part of GCC.
@@ -55,13 +55,13 @@ extern void xstormy16_expand_andqi3 (rtx *);
 extern void xstormy16_split_cbranch (machine_mode, rtx, rtx, rtx);
 extern int  short_memory_operand (rtx, machine_mode);
 extern bool  nonimmediate_nonstack_operand (rtx, machine_mode);
-extern enum reg_class xstormy16_secondary_reload_class 
+extern enum reg_class xstormy16_secondary_reload_class
  (enum reg_class, machine_mode, rtx);
 extern void xstormy16_split_move (machine_mode, rtx, rtx);
 extern void xstormy16_expand_move (machine_mode, rtx, rtx);
-extern void xstormy16_expand_arith (machine_mode, enum rtx_code, 
+extern void xstormy16_expand_arith (machine_mode, enum rtx_code,
 				    rtx, rtx, rtx);
-extern const char * xstormy16_output_shift (machine_mode, enum rtx_code, 
+extern const char * xstormy16_output_shift (machine_mode, enum rtx_code,
 					    rtx, rtx, rtx);
 extern bool  xstormy16_below100_symbol (rtx, machine_mode);
 extern bool  xstormy16_splittable_below100_operand (rtx, machine_mode);
diff --git a/gcc/config/stormy16/stormy16.cc b/gcc/config/stormy16/stormy16.cc
index 1016913..5b92743 100644
--- a/gcc/config/stormy16/stormy16.cc
+++ b/gcc/config/stormy16/stormy16.cc
@@ -1,5 +1,5 @@
 /* Xstormy16 target functions.
-   Copyright (C) 1997-2024 Free Software Foundation, Inc.
+   Copyright (C) 1997-2025 Free Software Foundation, Inc.
    Contributed by Red Hat, Inc.
 
    This file is part of GCC.
@@ -150,7 +150,7 @@ xstormy16_rtx_costs (rtx x, machine_mode mode,
         *total = COSTS_N_INSNS (speed_p ? 18 + 5 : 6);
       else if (mode == SImode)
 	*total = COSTS_N_INSNS (speed_p ? 3 * 18 + 14 : 17);
-      else 
+      else
         *total = COSTS_N_INSNS (speed_p ? 18 + 3 : 4);
       return false;
 
@@ -405,8 +405,7 @@ xstormy16_split_cbranch (machine_mode mode, rtx label, rtx comparison,
 
   start_sequence ();
   xstormy16_expand_arith (mode, COMPARE, dest, op0, op1);
-  seq = get_insns ();
-  end_sequence ();
+  seq = end_sequence ();
 
   gcc_assert (INSN_P (seq));
 
@@ -2915,6 +2914,9 @@ xstormy16_push_rounding (poly_int64 bytes)
 #undef  TARGET_HAVE_SPECULATION_SAFE_VALUE
 #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
 
+#undef TARGET_DOCUMENTATION_NAME
+#define TARGET_DOCUMENTATION_NAME "Xstormy16"
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-stormy16.h"
diff --git a/gcc/config/stormy16/stormy16.h b/gcc/config/stormy16/stormy16.h
index 3d5e21d..2a3d31d 100644
--- a/gcc/config/stormy16/stormy16.h
+++ b/gcc/config/stormy16/stormy16.h
@@ -1,5 +1,5 @@
 /* Xstormy16 cpu description.
-   Copyright (C) 1997-2024 Free Software Foundation, Inc.
+   Copyright (C) 1997-2025 Free Software Foundation, Inc.
    Contributed by Red Hat, Inc.
 
    This file is part of GCC.
@@ -292,7 +292,7 @@ enum reg_class
 
 /* This declaration must be present, but it can be an abort if profiling is
    not implemented.  */
-     
+
 #define FUNCTION_PROFILER(FILE, LABELNO) xstormy16_function_profiler ()
 
 
diff --git a/gcc/config/stormy16/stormy16.md b/gcc/config/stormy16/stormy16.md
index 6231834..15c60ad 100644
--- a/gcc/config/stormy16/stormy16.md
+++ b/gcc/config/stormy16/stormy16.md
@@ -1,5 +1,5 @@
 ;; XSTORMY16 Machine description template
-;; Copyright (C) 1997-2024 Free Software Foundation, Inc.
+;; Copyright (C) 1997-2025 Free Software Foundation, Inc.
 ;; Contributed by Red Hat, Inc.
 
 ;; This file is part of GCC.
@@ -702,8 +702,7 @@
   [(parallel [(set (match_operand:SI 0 "register_operand" "")
 		   (neg:SI (match_operand:SI 1 "register_operand" "")))
 	      (clobber (reg:BI CARRY_REG))])]
-  ""
-  { operands[2] = gen_reg_rtx (HImode); })
+  "")
 
 (define_insn_and_split "*negsi2_internal"
   [(set (match_operand:SI 0 "register_operand" "=&r")
diff --git a/gcc/config/stormy16/stormy16.opt b/gcc/config/stormy16/stormy16.opt
index 20a4ba5..f6160ad 100644
--- a/gcc/config/stormy16/stormy16.opt
+++ b/gcc/config/stormy16/stormy16.opt
@@ -1,6 +1,6 @@
 ; Options for the XSTORMY16 port of the compiler.
 
-; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/t-darwin b/gcc/config/t-darwin
index e90694d..a09f621 100644
--- a/gcc/config/t-darwin
+++ b/gcc/config/t-darwin
@@ -1,4 +1,4 @@
-# Copyright (C) 2002-2024 Free Software Foundation, Inc.
+# Copyright (C) 2002-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/t-dragonfly b/gcc/config/t-dragonfly
index 95e8cf9..a963805 100644
--- a/gcc/config/t-dragonfly
+++ b/gcc/config/t-dragonfly
@@ -1,4 +1,4 @@
-# Copyright (C) 2020-2024 Free Software Foundation, Inc.
+# Copyright (C) 2020-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/t-freebsd b/gcc/config/t-freebsd
index 97ce446..d9a1d27 100644
--- a/gcc/config/t-freebsd
+++ b/gcc/config/t-freebsd
@@ -1,4 +1,4 @@
-# Copyright (C) 2020-2024 Free Software Foundation, Inc.
+# Copyright (C) 2020-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/t-fuchsia b/gcc/config/t-fuchsia
index 16bc11f..71284d1 100644
--- a/gcc/config/t-fuchsia
+++ b/gcc/config/t-fuchsia
@@ -1,4 +1,4 @@
-# Copyright (C) 2022-2024 Free Software Foundation, Inc.
+# Copyright (C) 2022-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/t-glibc b/gcc/config/t-glibc
index f2cd690..386f990 100644
--- a/gcc/config/t-glibc
+++ b/gcc/config/t-glibc
@@ -1,4 +1,4 @@
-# Copyright (C) 2012-2024 Free Software Foundation, Inc.
+# Copyright (C) 2012-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/t-libunwind b/gcc/config/t-libunwind
index 6653e1d..5a6922a 100644
--- a/gcc/config/t-libunwind
+++ b/gcc/config/t-libunwind
@@ -1,4 +1,4 @@
-# Copyright (C) 2002-2024 Free Software Foundation, Inc.
+# Copyright (C) 2002-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/t-linux b/gcc/config/t-linux
index 5c5bfc5..52df255 100644
--- a/gcc/config/t-linux
+++ b/gcc/config/t-linux
@@ -1,4 +1,4 @@
-# Copyright (C) 2002-2024 Free Software Foundation, Inc.
+# Copyright (C) 2002-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/t-lynx b/gcc/config/t-lynx
index fdc0633..433d972 100644
--- a/gcc/config/t-lynx
+++ b/gcc/config/t-lynx
@@ -1,4 +1,4 @@
-# Copyright (C) 2004-2024 Free Software Foundation, Inc.
+# Copyright (C) 2004-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/t-netbsd b/gcc/config/t-netbsd
index b1e8ab8..3f6f7c4 100644
--- a/gcc/config/t-netbsd
+++ b/gcc/config/t-netbsd
@@ -1,4 +1,4 @@
-# Copyright (C) 2017-2024 Free Software Foundation, Inc.
+# Copyright (C) 2017-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/t-pnt16-warn b/gcc/config/t-pnt16-warn
index d7e8458..caf28b6 100644
--- a/gcc/config/t-pnt16-warn
+++ b/gcc/config/t-pnt16-warn
@@ -1,5 +1,5 @@
 # -Werror overrides for targets with 16 bit pointers
-# Copyright (C) 2010-2024 Free Software Foundation, Inc.
+# Copyright (C) 2010-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/t-sol2 b/gcc/config/t-sol2
index d4c7104..6c9743b 100644
--- a/gcc/config/t-sol2
+++ b/gcc/config/t-sol2
@@ -1,4 +1,4 @@
-# Copyright (C) 2004-2024 Free Software Foundation, Inc.
+# Copyright (C) 2004-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/t-vxworks b/gcc/config/t-vxworks
index 6063943..7ac159b 100644
--- a/gcc/config/t-vxworks
+++ b/gcc/config/t-vxworks
@@ -1,4 +1,4 @@
-# Copyright (C) 2002-2024 Free Software Foundation, Inc.
+# Copyright (C) 2002-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/t-winnt b/gcc/config/t-winnt
index d9117c1..fa24320 100644
--- a/gcc/config/t-winnt
+++ b/gcc/config/t-winnt
@@ -1,4 +1,4 @@
-# Copyright (C) 2013-2024 Free Software Foundation, Inc.
+# Copyright (C) 2013-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/usegas.h b/gcc/config/usegas.h
index ccbfa12..f099ae8 100644
--- a/gcc/config/usegas.h
+++ b/gcc/config/usegas.h
@@ -1,4 +1,4 @@
-/* Copyright (C) 2001-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2001-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/v850/constraints.md b/gcc/config/v850/constraints.md
index 1b4387e..36461cc 100644
--- a/gcc/config/v850/constraints.md
+++ b/gcc/config/v850/constraints.md
@@ -1,5 +1,5 @@
 ;; Constraint definitions for V850.
-;; Copyright (C) 2011-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2011-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/v850/predicates.md b/gcc/config/v850/predicates.md
index 3e76bda..10ca367 100644
--- a/gcc/config/v850/predicates.md
+++ b/gcc/config/v850/predicates.md
@@ -1,5 +1,5 @@
 ;; Predicate definitions for NEC V850.
-;; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
@@ -182,7 +182,7 @@
 
      */
 
-  for (i = 2; i < count - (TARGET_LONG_CALLS ? 2: 1); i++)
+  for (i = 2; i < count - (TARGET_LONG_CALLS ? 2 : 1); i++)
     {
       rtx dest;
       rtx src;
diff --git a/gcc/config/v850/rtems.h b/gcc/config/v850/rtems.h
index b933465..954d4ba 100644
--- a/gcc/config/v850/rtems.h
+++ b/gcc/config/v850/rtems.h
@@ -1,5 +1,5 @@
 /* Definitions for rtems targeting a v850 using ELF.
-   Copyright (C) 2012-2024 Free Software Foundation, Inc.
+   Copyright (C) 2012-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/v850/t-v850 b/gcc/config/v850/t-v850
index 901eabb..32f55aba 100644
--- a/gcc/config/v850/t-v850
+++ b/gcc/config/v850/t-v850
@@ -1,4 +1,4 @@
-# Copyright (C) 1997-2024 Free Software Foundation, Inc.
+# Copyright (C) 1997-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/v850/v850-c.cc b/gcc/config/v850/v850-c.cc
index bafd6d9..84da26d 100644
--- a/gcc/config/v850/v850-c.cc
+++ b/gcc/config/v850/v850-c.cc
@@ -1,5 +1,5 @@
 /* v850 specific, C compiler specific functions.
-   Copyright (C) 2000-2024 Free Software Foundation, Inc.
+   Copyright (C) 2000-2025 Free Software Foundation, Inc.
    Contributed by Jeff Law (law@cygnus.com).
 
 This file is part of GCC.
@@ -90,7 +90,7 @@ static void
 mark_current_function_as_interrupt (void)
 {
   tree name;
-  
+
   if (current_function_decl ==  NULL_TREE)
     {
       warning (0, "cannot set interrupt attribute: no current function");
@@ -104,7 +104,7 @@ mark_current_function_as_interrupt (void)
       warning (0, "cannot set interrupt attribute: no such identifier");
       return;
     }
-  
+
   decl_attributes (&current_function_decl,
 		   tree_cons (name, NULL_TREE, NULL_TREE), 0);
 }
@@ -125,9 +125,9 @@ ghs_pragma_section (cpp_reader * pfile ATTRIBUTE_UNUSED)
       tree sect_ident;
       const char *sect, *alias;
       enum GHS_section_kind kind;
-      
+
       type = pragma_lex (&x);
-      
+
       if (type == CPP_EOF && !repeat)
 	goto reset;
       else if (type == CPP_NAME)
@@ -138,20 +138,20 @@ ghs_pragma_section (cpp_reader * pfile ATTRIBUTE_UNUSED)
       else
 	goto bad;
       repeat = 0;
-      
+
       if (pragma_lex (&x) != CPP_EQ)
 	goto bad;
       if (pragma_lex (&x) != CPP_NAME)
 	goto bad;
-      
+
       alias = IDENTIFIER_POINTER (x);
-      
+
       type = pragma_lex (&x);
       if (type == CPP_COMMA)
 	repeat = 1;
       else if (type != CPP_EOF)
 	warning (OPT_Wpragmas, "junk at end of %<#pragma%> ghs section");
-      
+
       if      (streq (sect, "data"))    kind = GHS_SECTION_KIND_DATA;
       else if (streq (sect, "text"))    kind = GHS_SECTION_KIND_TEXT;
       else if (streq (sect, "rodata"))  kind = GHS_SECTION_KIND_RODATA;
@@ -170,7 +170,7 @@ ghs_pragma_section (cpp_reader * pfile ATTRIBUTE_UNUSED)
 	  warning (0, "unrecognized section name %qE", sect_ident);
 	  return;
 	}
-      
+
       if (streq (alias, "default"))
 	GHS_current_section_names [kind] = NULL;
       else
@@ -188,7 +188,7 @@ ghs_pragma_section (cpp_reader * pfile ATTRIBUTE_UNUSED)
   /* #pragma ghs section \n: Reset all section names back to their defaults.  */
   {
     int i;
-    
+
     for (i = COUNT_OF_GHS_SECTION_KINDS; i--;)
       GHS_current_section_names [i] = NULL;
   }
@@ -198,10 +198,10 @@ void
 ghs_pragma_interrupt (cpp_reader * pfile ATTRIBUTE_UNUSED)
 {
   tree x;
-  
+
   if (pragma_lex (&x) != CPP_EOF)
     warning (OPT_Wpragmas, "junk at end of %<#pragma%> ghs interrupt");
-  
+
   mark_current_function_as_interrupt ();
 }
 
@@ -209,10 +209,10 @@ void
 ghs_pragma_starttda (cpp_reader * pfile ATTRIBUTE_UNUSED)
 {
   tree x;
-  
+
   if (pragma_lex (&x) != CPP_EOF)
     warning (OPT_Wpragmas, "junk at end of %<#pragma%> ghs starttda");
-  
+
   push_data_area (DATA_AREA_TDA);
 }
 
@@ -220,10 +220,10 @@ void
 ghs_pragma_startsda (cpp_reader * pfile ATTRIBUTE_UNUSED)
 {
   tree x;
-  
+
   if (pragma_lex (&x) != CPP_EOF)
     warning (OPT_Wpragmas, "junk at end of %<#pragma%> ghs startsda");
-  
+
   push_data_area (DATA_AREA_SDA);
 }
 
@@ -231,10 +231,10 @@ void
 ghs_pragma_startzda (cpp_reader * pfile ATTRIBUTE_UNUSED)
 {
   tree x;
-  
+
   if (pragma_lex (&x) != CPP_EOF)
     warning (OPT_Wpragmas, "junk at end of %<#pragma%> ghs startzda");
-  
+
   push_data_area (DATA_AREA_ZDA);
 }
 
@@ -242,10 +242,10 @@ void
 ghs_pragma_endtda (cpp_reader * pfile ATTRIBUTE_UNUSED)
 {
   tree x;
-  
+
   if (pragma_lex (&x) != CPP_EOF)
     warning (OPT_Wpragmas, "junk at end of %<#pragma%> ghs endtda");
-  
+
   pop_data_area (DATA_AREA_TDA);
 }
 
@@ -253,10 +253,10 @@ void
 ghs_pragma_endsda (cpp_reader * pfile ATTRIBUTE_UNUSED)
 {
   tree x;
-  
+
   if (pragma_lex (&x) != CPP_EOF)
     warning (OPT_Wpragmas, "junk at end of %<#pragma%> ghs endsda");
-  
+
   pop_data_area (DATA_AREA_SDA);
 }
 
@@ -264,9 +264,9 @@ void
 ghs_pragma_endzda (cpp_reader * pfile ATTRIBUTE_UNUSED)
 {
   tree x;
-  
+
   if (pragma_lex (&x) != CPP_EOF)
     warning (OPT_Wpragmas, "junk at end of %<#pragma%> ghs endzda");
-  
+
   pop_data_area (DATA_AREA_ZDA);
 }
diff --git a/gcc/config/v850/v850-modes.def b/gcc/config/v850/v850-modes.def
index ac823ed..61248a3 100644
--- a/gcc/config/v850/v850-modes.def
+++ b/gcc/config/v850/v850-modes.def
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler. NEC V850 series
-   Copyright (C) 2005-2024 Free Software Foundation, Inc.
+   Copyright (C) 2005-2025 Free Software Foundation, Inc.
    Contributed by NEC EL
 
    This file is part of GCC.
diff --git a/gcc/config/v850/v850-opts.h b/gcc/config/v850/v850-opts.h
index a2488dc..08ed7b6 100644
--- a/gcc/config/v850/v850-opts.h
+++ b/gcc/config/v850/v850-opts.h
@@ -1,5 +1,5 @@
 /* Definitions for option handling for NEC V850 series.
-   Copyright (C) 1996-2024 Free Software Foundation, Inc.
+   Copyright (C) 1996-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/v850/v850-protos.h b/gcc/config/v850/v850-protos.h
index 4842bcc..b1bb101 100644
--- a/gcc/config/v850/v850-protos.h
+++ b/gcc/config/v850/v850-protos.h
@@ -1,5 +1,5 @@
 /* Prototypes for v850.cc functions used in the md file & elsewhere.
-   Copyright (C) 1999-2024 Free Software Foundation, Inc.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/v850/v850.cc b/gcc/config/v850/v850.cc
index 35fa6b5..6668cef 100644
--- a/gcc/config/v850/v850.cc
+++ b/gcc/config/v850/v850.cc
@@ -1,5 +1,5 @@
 /* Subroutines for insn-output.cc for NEC V850 series
-   Copyright (C) 1996-2024 Free Software Foundation, Inc.
+   Copyright (C) 1996-2025 Free Software Foundation, Inc.
    Contributed by Jeff Law (law@cygnus.com).
 
    This file is part of GCC.
@@ -60,7 +60,7 @@ static void v850_print_operand_address (FILE *, machine_mode, rtx);
 const char * GHS_default_section_names [(int) COUNT_OF_GHS_SECTION_KINDS];
 const char * GHS_current_section_names [(int) COUNT_OF_GHS_SECTION_KINDS];
 
-/* Track the current data area set by the data area pragma (which 
+/* Track the current data area set by the data area pragma (which
    can be nested).  Tested by check_default_data_area.  */
 data_area_stack_element * data_area_stack = NULL;
 
@@ -193,7 +193,7 @@ v850_arg_partial_bytes (cumulative_args_t cum_v, const function_arg_info &arg)
   size = arg.promoted_size_in_bytes ();
   if (size < 1)
     size = 1;
-  
+
   if (!TARGET_GCC_ABI)
     align = UNITS_PER_WORD;
   else if (arg.type)
@@ -449,7 +449,7 @@ v850_print_operand (FILE * file, rtx x, int code)
 	case CONST_INT:
 	  fprintf (file, "%d", (INTVAL (x) >= 0) ? 0 : -1);
 	  break;
-	  
+
 	case CONST_DOUBLE:
 	  const_double_split (x, &high, &low);
 	  fprintf (file, "%ld", (long) high);
@@ -465,7 +465,7 @@ v850_print_operand (FILE * file, rtx x, int code)
 	case CONST_INT:
 	  fprintf (file, "%ld", (long) INTVAL (x));
 	  break;
-	  
+
 	case CONST_DOUBLE:
 	  const_double_split (x, &high, &low);
 	  fprintf (file, "%ld", (long) low);
@@ -483,12 +483,12 @@ v850_print_operand (FILE * file, rtx x, int code)
       break;
     case 'O':
       gcc_assert (special_symbolref_operand (x, VOIDmode));
-      
+
       if (GET_CODE (x) == CONST)
 	x = XEXP (XEXP (x, 0), 0);
       else
 	gcc_assert (GET_CODE (x) == SYMBOL_REF);
-      
+
       if (SYMBOL_REF_ZDA_P (x))
 	fprintf (file, "zdaoff");
       else if (SYMBOL_REF_SDA_P (x))
@@ -504,12 +504,12 @@ v850_print_operand (FILE * file, rtx x, int code)
       break;
     case 'Q':
       gcc_assert (special_symbolref_operand (x, VOIDmode));
-      
+
       if (GET_CODE (x) == CONST)
 	x = XEXP (XEXP (x, 0), 0);
       else
 	gcc_assert (GET_CODE (x) == SYMBOL_REF);
-      
+
       if (SYMBOL_REF_ZDA_P (x))
 	fprintf (file, "r0");
       else if (SYMBOL_REF_SDA_P (x))
@@ -534,7 +534,7 @@ v850_print_operand (FILE * file, rtx x, int code)
 	      fprintf (file, "[r0]");
 	  }
 	  break;
-	  
+
 	case CONST_INT:
 	  {
 	    unsigned HOST_WIDE_INT v = INTVAL (x);
@@ -542,7 +542,7 @@ v850_print_operand (FILE * file, rtx x, int code)
 	    /* Trickery to avoid problems with shifting
 	       32-bits at a time on a 32-bit host.  */
 	    v = v >> 16;
-	    v = v >> 16;	  
+	    v = v >> 16;
 	    fprintf (file, HOST_WIDE_INT_PRINT_HEX, v);
 	    break;
 	  }
@@ -622,7 +622,7 @@ v850_print_operand (FILE * file, rtx x, int code)
 	case CONST_DOUBLE:
 	  fprintf (file, HOST_WIDE_INT_PRINT_HEX, CONST_DOUBLE_LOW (x));
 	  break;
-	  
+
 	case CONST_INT:
 	case SYMBOL_REF:
 	case CONST:
@@ -756,7 +756,7 @@ v850_print_operand_punct_valid_p (unsigned char code)
    output_addr_const will normally barf at this, but it is OK to omit
    the truncate and just emit the difference of the two labels.  The
    .hword directive will automatically handle the truncation for us.
-   
+
    Returns true if rtx was handled, false otherwise.  */
 
 static bool
@@ -852,7 +852,7 @@ output_move_single (rtx * operands)
 	       || GET_CODE (src) == SYMBOL_REF
 	       || GET_CODE (src) == CONST)
 	{
-	  if (TARGET_V850E_UP) 
+	  if (TARGET_V850E_UP)
 	    return "mov hilo(%1),%0";
 	  else
 	    return "movhi hi(%1),%.,%0\n\tmovea lo(%1),%0,%0";
@@ -1018,7 +1018,7 @@ ep_memory_offset (machine_mode mode, int unsignedp ATTRIBUTE_UNUSED)
     case E_SFmode:
       max_offset = (1 << 8);
       break;
-      
+
     default:
       break;
     }
@@ -1472,7 +1472,7 @@ compute_register_save_size (long * p_reg_saved)
 	 registers that need to be saved.  To detect this we note that the
 	 helper functions always push at least register r29 (provided
 	 that the function is not an interrupt handler).  */
-	 
+
       if (TARGET_PROLOG_FUNCTION
           && (i == 2 || ((i >= 20) && (i < 30))))
 	{
@@ -1510,7 +1510,7 @@ compute_register_save_size (long * p_reg_saved)
 	      }
 	}
     }
-  
+
   if (p_reg_saved)
     *p_reg_saved = reg_saved;
 
@@ -1640,7 +1640,7 @@ expand_prologue (void)
 	emit_insn (gen_save_interrupt ());
 
       actual_fsize -= INTERRUPT_FIXED_SAVE_SIZE;
-      
+
       if (((1L << LINK_POINTER_REGNUM) & reg_saved) != 0)
 	actual_fsize -= INTERRUPT_ALL_SAVE_SIZE;
 
@@ -1724,7 +1724,7 @@ expand_prologue (void)
 	      rtx insn = emit_insn (save_all);
 	      INSN_CODE (insn) = code;
 	      actual_fsize -= alloc_stack;
-	      
+
 	    }
 	  else
 	    save_all = NULL_RTX;
@@ -1753,13 +1753,13 @@ expand_prologue (void)
 	    init_stack_alloc = compute_register_save_size (NULL);
 	  else
 	    init_stack_alloc = actual_fsize;
-	      
+
 	  /* Save registers at the beginning of the stack frame.  */
 	  offset = init_stack_alloc - 4;
-	  
+
 	  if (init_stack_alloc)
 	    increment_stack (- (signed) init_stack_alloc, true);
-	  
+
 	  /* Save the return pointer first.  */
 	  if (num_save > 0 && REGNO (save_regs[num_save-1]) == LINK_POINTER_REGNUM)
 	    {
@@ -1770,7 +1770,7 @@ expand_prologue (void)
 				 save_regs[--num_save]));
 	      offset -= 4;
 	    }
-	  
+
 	  for (i = 0; i < num_save; i++)
 	    {
 	      F (emit_move_insn (gen_rtx_MEM (SImode,
@@ -1865,7 +1865,7 @@ expand_epilogue (void)
 	    }
 
 	  code = recog (restore_all, NULL, NULL);
-	  
+
 	  if (code >= 0)
 	    {
 	      rtx insn;
@@ -1967,10 +1967,10 @@ v850_get_data_area (tree decl)
 {
   if (lookup_attribute ("sda", DECL_ATTRIBUTES (decl)) != NULL_TREE)
     return DATA_AREA_SDA;
-  
+
   if (lookup_attribute ("tda", DECL_ATTRIBUTES (decl)) != NULL_TREE)
     return DATA_AREA_TDA;
-  
+
   if (lookup_attribute ("zda", DECL_ATTRIBUTES (decl)) != NULL_TREE)
     return DATA_AREA_ZDA;
 
@@ -1983,7 +1983,7 @@ static void
 v850_set_data_area (tree decl, v850_data_area data_area)
 {
   tree name;
-  
+
   switch (data_area)
     {
     case DATA_AREA_SDA: name = get_identifier ("sda"); break;
@@ -2036,7 +2036,7 @@ v850_handle_data_area_attribute (tree *node, tree name,
     data_area = DATA_AREA_ZDA;
   else
     gcc_unreachable ();
-  
+
   switch (TREE_CODE (decl))
     {
     case VAR_DECL:
@@ -2059,7 +2059,7 @@ v850_handle_data_area_attribute (tree *node, tree name,
 	  *no_add_attrs = true;
 	}
       break;
-      
+
     default:
       break;
     }
@@ -2113,7 +2113,7 @@ v850_encode_data_area (tree decl, rtx symbol)
       if (DECL_SECTION_NAME (decl))
 	{
 	  const char *name = DECL_SECTION_NAME (decl);
-	  
+
 	  if (streq (name, ".zdata") || streq (name, ".zbss"))
 	    v850_set_data_area (decl, DATA_AREA_ZDA);
 
@@ -2140,7 +2140,7 @@ v850_encode_data_area (tree decl, rtx symbol)
 	  else if (size <= small_memory_max [(int) SMALL_MEMORY_ZDA])
 	    v850_set_data_area (decl, DATA_AREA_ZDA);
 	}
-      
+
       if (v850_get_data_area (decl) == DATA_AREA_NORMAL)
 	return;
     }
@@ -2182,7 +2182,7 @@ construct_restore_jr (rtx op)
   unsigned long int last;
   int i;
   static char buff [256]; /* XXX */
-  
+
   if (count <= 2)
     {
       error ("bogus JR construction: %d", count);
@@ -2194,7 +2194,7 @@ construct_restore_jr (rtx op)
   gcc_assert (GET_CODE (XVECEXP (op, 0, 1)) == SET);
   gcc_assert (GET_CODE (SET_SRC (XVECEXP (op, 0, 1))) == PLUS);
   gcc_assert (GET_CODE (XEXP (SET_SRC (XVECEXP (op, 0, 1)), 1)) == CONST_INT);
-    
+
   stack_bytes = INTVAL (XEXP (SET_SRC (XVECEXP (op, 0, 1)), 1));
 
   /* Each pop will remove 4 bytes from the stack....  */
@@ -2212,12 +2212,12 @@ construct_restore_jr (rtx op)
   for (i = 2; i < count; i++)
     {
       rtx vector_element = XVECEXP (op, 0, i);
-      
+
       gcc_assert (GET_CODE (vector_element) == SET);
       gcc_assert (GET_CODE (SET_DEST (vector_element)) == REG);
       gcc_assert (register_is_ok_for_epilogue (SET_DEST (vector_element),
 					       SImode));
-      
+
       mask |= 1 << REGNO (SET_DEST (vector_element));
     }
 
@@ -2239,7 +2239,7 @@ construct_restore_jr (rtx op)
     {
       gcc_assert (!stack_bytes);
       gcc_assert (mask & (1 << 29));
-      
+
       last = 29;
     }
 
@@ -2247,16 +2247,16 @@ construct_restore_jr (rtx op)
      We ignore this here, and generate a JR anyway.  We will
      be popping more registers than is strictly necessary, but
      it does save code space.  */
-  
+
   if (TARGET_LONG_CALLS)
     {
       char name[40];
-      
+
       if (first == last)
 	sprintf (name, "__return_%s", reg_names [first]);
       else
 	sprintf (name, "__return_%s_%s", reg_names [first], reg_names [last]);
-      
+
       sprintf (buff, "movhi hi(%s), r0, r6\n\tmovea lo(%s), r6, r6\n\tjmp r6",
 	       name, name);
     }
@@ -2267,7 +2267,7 @@ construct_restore_jr (rtx op)
       else
 	sprintf (buff, "jr __return_%s_%s", reg_names [first], reg_names [last]);
     }
-  
+
   return buff;
 }
 
@@ -2287,8 +2287,8 @@ construct_save_jarl (rtx op)
   unsigned long int last;
   int i;
   static char buff [255]; /* XXX */
-  
-  if (count <= (TARGET_LONG_CALLS ? 3 : 2)) 
+
+  if (count <= (TARGET_LONG_CALLS ? 3 : 2))
     {
       error ("bogus JARL construction: %d", count);
       return NULL;
@@ -2299,7 +2299,7 @@ construct_save_jarl (rtx op)
   gcc_assert (GET_CODE (SET_SRC (XVECEXP (op, 0, 0))) == PLUS);
   gcc_assert (GET_CODE (XEXP (SET_SRC (XVECEXP (op, 0, 0)), 0)) == REG);
   gcc_assert (GET_CODE (XEXP (SET_SRC (XVECEXP (op, 0, 0)), 1)) == CONST_INT);
-    
+
   /* Work out how many bytes to push onto the stack after storing the
      registers.  */
   stack_bytes = INTVAL (XEXP (SET_SRC (XVECEXP (op, 0, 0)), 1));
@@ -2319,16 +2319,16 @@ construct_save_jarl (rtx op)
   for (i = 1; i < count - (TARGET_LONG_CALLS ? 3 : 2); i++)
     {
       rtx vector_element = XVECEXP (op, 0, i);
-      
+
       gcc_assert (GET_CODE (vector_element) == SET);
       gcc_assert (GET_CODE (SET_SRC (vector_element)) == REG);
       gcc_assert (register_is_ok_for_epilogue (SET_SRC (vector_element),
 					       SImode));
-      
+
       mask |= 1 << REGNO (SET_SRC (vector_element));
     }
 
-  /* Scan for the first register to push.  */  
+  /* Scan for the first register to push.  */
   for (first = 0; first < 32; first++)
     {
       if (mask & (1 << first))
@@ -2346,7 +2346,7 @@ construct_save_jarl (rtx op)
     {
       gcc_assert (!stack_bytes);
       gcc_assert (mask & (1 << 29));
-      
+
       last = 29;
     }
 
@@ -2354,16 +2354,16 @@ construct_save_jarl (rtx op)
      We ignore this here, and generate a JARL anyway.  We will
      be pushing more registers than is strictly necessary, but
      it does save code space.  */
-  
+
   if (TARGET_LONG_CALLS)
     {
       char name[40];
-      
+
       if (first == last)
 	sprintf (name, "__save_%s", reg_names [first]);
       else
 	sprintf (name, "__save_%s_%s", reg_names [first], reg_names [last]);
-      
+
       if (TARGET_V850E3V5_UP)
 	sprintf (buff, "mov hilo(%s), r11\n\tjarl [r11], r10", name);
       else
@@ -2404,12 +2404,12 @@ v850_output_aligned_bss (FILE * file,
     case DATA_AREA_TDA:
       switch_to_section (tdata_section);
       break;
-      
+
     default:
       switch_to_section (bss_section);
       break;
     }
-  
+
   ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
 #ifdef ASM_DECLARE_OBJECT_NAME
   last_assemble_variable_decl = decl;
@@ -2448,13 +2448,13 @@ v850_output_common (FILE * file,
 	case DATA_AREA_TDA:
 	  fprintf (file, "%s", TCOMMON_ASM_OP);
 	  break;
-      
+
 	default:
 	  fprintf (file, "%s", COMMON_ASM_OP);
 	  break;
 	}
     }
-  
+
   assemble_name (file, name);
   fprintf (file, ",%u,%u\n", size, align / BITS_PER_UNIT);
 }
@@ -2470,7 +2470,7 @@ v850_output_local (FILE * file,
   fprintf (file, "%s", LOCAL_ASM_OP);
   assemble_name (file, name);
   fprintf (file, "\n");
-  
+
   ASM_OUTPUT_ALIGNED_DECL_COMMON (file, decl, name, size, align);
 }
 
@@ -2488,7 +2488,7 @@ v850_insert_attributes (tree decl, tree * attr_ptr ATTRIBUTE_UNUSED )
 
   /* Initialize the default names of the v850 specific sections,
      if this has not been done before.  */
-  
+
   if (GHS_default_section_names [(int) GHS_SECTION_KIND_SDATA] == NULL)
     {
       GHS_default_section_names [(int) GHS_SECTION_KIND_SDATA]
@@ -2499,14 +2499,14 @@ v850_insert_attributes (tree decl, tree * attr_ptr ATTRIBUTE_UNUSED )
 
       GHS_default_section_names [(int) GHS_SECTION_KIND_TDATA]
 	= ".tdata";
-      
+
       GHS_default_section_names [(int) GHS_SECTION_KIND_ZDATA]
 	= ".zdata";
 
       GHS_default_section_names [(int) GHS_SECTION_KIND_ROZDATA]
 	= ".rozdata";
     }
-  
+
   if (current_function_decl == NULL_TREE
       && (VAR_P (decl)
 	  || TREE_CODE (decl) == CONST_DECL
@@ -2526,23 +2526,23 @@ v850_insert_attributes (tree decl, tree * attr_ptr ATTRIBUTE_UNUSED )
 	    {
 	    default:
 	      gcc_unreachable ();
-	      
+
 	    case DATA_AREA_SDA:
 	      kind = ((TREE_READONLY (decl))
 		      ? GHS_SECTION_KIND_ROSDATA
 		      : GHS_SECTION_KIND_SDATA);
 	      break;
-	      
+
 	    case DATA_AREA_TDA:
 	      kind = GHS_SECTION_KIND_TDATA;
 	      break;
-	      
+
 	    case DATA_AREA_ZDA:
 	      kind = ((TREE_READONLY (decl))
 		      ? GHS_SECTION_KIND_ROZDATA
 		      : GHS_SECTION_KIND_ZDATA);
 	      break;
-	      
+
 	    case DATA_AREA_NORMAL:		 /* default data area */
 	      if (TREE_READONLY (decl))
 		kind = GHS_SECTION_KIND_RODATA;
@@ -2585,7 +2585,7 @@ construct_dispose_instruction (rtx op)
   int		     i;
   static char        buff[ 120 ]; /* XXX */
   int                use_callt = 0;
-  
+
   if (count <= 2)
     {
       error ("bogus DISPOSE construction: %d", count);
@@ -2597,7 +2597,7 @@ construct_dispose_instruction (rtx op)
   gcc_assert (GET_CODE (XVECEXP (op, 0, 1)) == SET);
   gcc_assert (GET_CODE (SET_SRC (XVECEXP (op, 0, 1))) == PLUS);
   gcc_assert (GET_CODE (XEXP (SET_SRC (XVECEXP (op, 0, 1)), 1)) == CONST_INT);
-    
+
   stack_bytes = INTVAL (XEXP (SET_SRC (XVECEXP (op, 0, 1)), 1));
 
   /* Each pop will remove 4 bytes from the stack....  */
@@ -2617,7 +2617,7 @@ construct_dispose_instruction (rtx op)
   for (i = 2; i < count; i++)
     {
       rtx vector_element = XVECEXP (op, 0, i);
-      
+
       gcc_assert (GET_CODE (vector_element) == SET);
       gcc_assert (GET_CODE (SET_DEST (vector_element)) == REG);
       gcc_assert (register_is_ok_for_epilogue (SET_DEST (vector_element),
@@ -2642,7 +2642,7 @@ construct_dispose_instruction (rtx op)
 	  for (i = 20; i < 32; i++)
 	    if (mask & (1 << i))
 	      break;
-	  
+
 	  if (i == 31)
 	    sprintf (buff, "callt ctoff(__callt_return_r31c)");
 	  else
@@ -2654,31 +2654,31 @@ construct_dispose_instruction (rtx op)
     {
       static char        regs [100]; /* XXX */
       int                done_one;
-      
+
       /* Generate the DISPOSE instruction.  Note we could just issue the
 	 bit mask as a number as the assembler can cope with this, but for
 	 the sake of our readers we turn it into a textual description.  */
       regs[0] = 0;
       done_one = 0;
-      
+
       for (i = 20; i < 32; i++)
 	{
 	  if (mask & (1 << i))
 	    {
 	      int first;
-	      
+
 	      if (done_one)
 		strcat (regs, ", ");
 	      else
 		done_one = 1;
-	      
+
 	      first = i;
 	      strcat (regs, reg_names[ first ]);
-	      
+
 	      for (i++; i < 32; i++)
 		if ((mask & (1 << i)) == 0)
 		  break;
-	      
+
 	      if (i > first + 1)
 		{
 		  strcat (regs, " - ");
@@ -2686,10 +2686,10 @@ construct_dispose_instruction (rtx op)
 		}
 	    }
 	}
-      
+
       sprintf (buff, "dispose %d {%s}, r31", stack_bytes / 4, regs);
     }
-  
+
   return buff;
 }
 
@@ -2706,7 +2706,7 @@ construct_prepare_instruction (rtx op)
   int		     i;
   static char        buff[ 120 ]; /* XXX */
   int		     use_callt = 0;
-  
+
   if (XVECLEN (op, 0) <= 1)
     {
       error ("bogus PREPEARE construction: %d", XVECLEN (op, 0));
@@ -2718,7 +2718,7 @@ construct_prepare_instruction (rtx op)
   gcc_assert (GET_CODE (XVECEXP (op, 0, 0)) == SET);
   gcc_assert (GET_CODE (SET_SRC (XVECEXP (op, 0, 0))) == PLUS);
   gcc_assert (GET_CODE (XEXP (SET_SRC (XVECEXP (op, 0, 0)), 1)) == CONST_INT);
-    
+
   stack_bytes = INTVAL (XEXP (SET_SRC (XVECEXP (op, 0, 0)), 1));
 
 
@@ -2736,10 +2736,10 @@ construct_prepare_instruction (rtx op)
   for (i = 1; i < XVECLEN (op, 0); i++)
     {
       rtx vector_element = XVECEXP (op, 0, i);
-      
+
       if (GET_CODE (vector_element) == CLOBBER)
 	continue;
-      
+
       gcc_assert (GET_CODE (vector_element) == SET);
       gcc_assert (GET_CODE (SET_SRC (vector_element)) == REG);
       gcc_assert (register_is_ok_for_epilogue (SET_SRC (vector_element),
@@ -2762,7 +2762,7 @@ construct_prepare_instruction (rtx op)
 	  sprintf (buff, "callt ctoff(__callt_save_r2_r%d)", (mask & (1 << 31)) ? 31 : 29 );
 	  return buff;
 	}
-      
+
       for (i = 20; i < 32; i++)
 	if (mask & (1 << i))
 	  break;
@@ -2778,31 +2778,31 @@ construct_prepare_instruction (rtx op)
       static char        regs [100]; /* XXX */
       int                done_one;
 
-      
+
       /* Generate the PREPARE instruction.  Note we could just issue the
 	 bit mask as a number as the assembler can cope with this, but for
-	 the sake of our readers we turn it into a textual description.  */      
+	 the sake of our readers we turn it into a textual description.  */
       regs[0] = 0;
       done_one = 0;
-      
+
       for (i = 20; i < 32; i++)
 	{
 	  if (mask & (1 << i))
 	    {
 	      int first;
-	      
+
 	      if (done_one)
 		strcat (regs, ", ");
 	      else
 		done_one = 1;
-	      
+
 	      first = i;
 	      strcat (regs, reg_names[ first ]);
-	      
+
 	      for (i++; i < 32; i++)
 		if ((mask & (1 << i)) == 0)
 		  break;
-	      
+
 	      if (i > first + 1)
 		{
 		  strcat (regs, " - ");
@@ -2810,10 +2810,10 @@ construct_prepare_instruction (rtx op)
 		}
 	    }
 	}
-      	 
+
       sprintf (buff, "prepare {%s}, %d", regs, (- stack_bytes) / 4);
     }
-  
+
   return buff;
 }
 
@@ -2914,7 +2914,7 @@ v850_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
 /* Worker function for TARGET_FUNCTION_VALUE.  */
 
 static rtx
-v850_function_value (const_tree valtype, 
+v850_function_value (const_tree valtype,
                     const_tree fn_decl_or_type ATTRIBUTE_UNUSED,
                     bool outgoing ATTRIBUTE_UNUSED)
 {
@@ -3064,7 +3064,7 @@ v850_legitimate_address_p (machine_mode mode, rtx x, bool strict_p,
 			      + (GET_MODE_NUNITS (mode) * UNITS_PER_WORD))))
     return true;
 
-  return false;  
+  return false;
 }
 
 static int
@@ -3176,7 +3176,7 @@ v850_gen_movdi (rtx * operands)
   if (REGNO (operands[1]) & 1)
     /* Use two store word instructions to synthesise a store double.  */
     return "st.w %1, %0 ; st.w %R1, %R0 ";
-  
+
   return "st.dw %1, %0";
 }
 
@@ -3336,6 +3336,8 @@ v850_can_inline_p (tree caller, tree callee)
 #undef TARGET_CAN_INLINE_P
 #define TARGET_CAN_INLINE_P v850_can_inline_p
 
+#undef TARGET_DOCUMENTATION_NAME
+#define TARGET_DOCUMENTATION_NAME "V850"
 
 struct gcc_target targetm = TARGET_INITIALIZER;
 
diff --git a/gcc/config/v850/v850.h b/gcc/config/v850/v850.h
index 2b31dd1..b87293e 100644
--- a/gcc/config/v850/v850.h
+++ b/gcc/config/v850/v850.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler. NEC V850 series
-   Copyright (C) 1996-2024 Free Software Foundation, Inc.
+   Copyright (C) 1996-2025 Free Software Foundation, Inc.
    Contributed by Jeff Law (law@cygnus.com).
 
    This file is part of GCC.
@@ -63,7 +63,7 @@
 
 #if TARGET_CPU_DEFAULT == TARGET_CPU_v850e1
 #undef  MASK_DEFAULT
-#define MASK_DEFAULT            MASK_V850E     /* No practical difference.  */     
+#define MASK_DEFAULT            MASK_V850E     /* No practical difference.  */
 #undef  SUBTARGET_ASM_SPEC
 #define SUBTARGET_ASM_SPEC	"%{!mv*:-mv850e1}"
 #undef  SUBTARGET_CPP_SPEC
@@ -72,7 +72,7 @@
 
 #if TARGET_CPU_DEFAULT == TARGET_CPU_v850e2
 #undef  MASK_DEFAULT
-#define MASK_DEFAULT            MASK_V850E2	
+#define MASK_DEFAULT            MASK_V850E2
 #undef  SUBTARGET_ASM_SPEC
 #define SUBTARGET_ASM_SPEC 	"%{!mv*:-mv850e2}"
 #undef  SUBTARGET_CPP_SPEC
@@ -99,7 +99,7 @@
 #define TARGET_VERSION		fprintf (stderr, " (Renesas V850E3V5)");
 #endif
 
-#define TARGET_V850E3V5_UP ((TARGET_V850E3V5))     
+#define TARGET_V850E3V5_UP ((TARGET_V850E3V5))
 #define TARGET_V850E2V3_UP ((TARGET_V850E2V3) || TARGET_V850E3V5_UP)
 #define TARGET_V850E2_UP   ((TARGET_V850E2)   || TARGET_V850E2V3_UP)
 #define TARGET_V850E_UP    ((TARGET_V850E)    || TARGET_V850E2_UP)
@@ -127,7 +127,7 @@
 
 #define EXTRA_SPECS \
  { "subtarget_asm_spec", SUBTARGET_ASM_SPEC }, \
- { "subtarget_cpp_spec", SUBTARGET_CPP_SPEC } 
+ { "subtarget_cpp_spec", SUBTARGET_CPP_SPEC }
 
 
 /* Macro to decide when FPU instructions can be used.  */
@@ -308,7 +308,7 @@
 
    For any two classes, it is very desirable that there be another
    class that represents their union.  */
-   
+
 enum reg_class
 {
   NO_REGS, EVEN_REGS, GENERAL_REGS, ALL_REGS, LIM_REG_CLASSES
@@ -353,7 +353,7 @@ enum reg_class
    Since they use reg_renumber, they are safe only once reg_renumber
    has been allocated, which happens in reginfo.cc during register
    allocation.  */
- 
+
 #define REGNO_OK_FOR_BASE_P(regno)             \
   (((regno) < FIRST_PSEUDO_REGISTER            \
     && (regno) != CC_REGNUM                    \
@@ -412,7 +412,7 @@ enum reg_class
 
 /* Register containing return address from latest function call.  */
 #define LINK_POINTER_REGNUM LP_REGNUM
-     
+
 /* On some machines the offset between the frame pointer and starting
    offset of the automatic variables is not known until after register
    allocation has been done (for example, because the saved registers
@@ -432,7 +432,7 @@ enum reg_class
 
    Do not define this macro if it would be the same as
    `FRAME_POINTER_REGNUM'.  */
-#undef  HARD_FRAME_POINTER_REGNUM 
+#undef  HARD_FRAME_POINTER_REGNUM
 #define HARD_FRAME_POINTER_REGNUM 29
 
 /* Base register for access to arguments of the function.  */
@@ -578,7 +578,7 @@ struct cum_arg { int nbytes; };
 #define NO_FUNCTION_CSE 1
 
 /* The four different data regions on the v850.  */
-typedef enum 
+typedef enum
 {
   DATA_AREA_NORMAL,
   DATA_AREA_SDA,
@@ -617,7 +617,7 @@ typedef enum
 #define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
   asm_output_aligned_bss ((FILE), (DECL), (NAME), (SIZE), (ALIGN))
 
-#undef  ASM_OUTPUT_ALIGNED_BSS 
+#undef  ASM_OUTPUT_ALIGNED_BSS
 #define ASM_OUTPUT_ALIGNED_BSS(FILE, DECL, NAME, SIZE, ALIGN) \
   v850_output_aligned_bss (FILE, DECL, NAME, SIZE, ALIGN)
 
@@ -634,7 +634,7 @@ typedef enum
 #undef  ASM_OUTPUT_LOCAL
 #define ASM_OUTPUT_ALIGNED_DECL_LOCAL(FILE, DECL, NAME, SIZE, ALIGN) \
      v850_output_local (FILE, DECL, NAME, SIZE, ALIGN)
-     
+
 /* Globalizing directive for a label.  */
 #define GLOBAL_ASM_OP "\t.global "
 
@@ -767,26 +767,26 @@ typedef enum
    can appear in the "ghs section" pragma.  These names are used to index
    into the GHS_default_section_names[] and GHS_current_section_names[]
    that are defined in v850.cc, and so the ordering of each must remain
-   consistent. 
+   consistent.
 
-   These arrays give the default and current names for each kind of 
+   These arrays give the default and current names for each kind of
    section defined by the GHS pragmas.  The current names can be changed
-   by the "ghs section" pragma.  If the current names are null, use 
+   by the "ghs section" pragma.  If the current names are null, use
    the default names.  Note that the two arrays have different types.
 
    For the *normal* section kinds (like .data, .text, etc.) we do not
    want to explicitly force the name of these sections, but would rather
-   let the linker (or at least the back end) choose the name of the 
+   let the linker (or at least the back end) choose the name of the
    section, UNLESS the user has forced a specific name for these section
    kinds.  To accomplish this set the name in ghs_default_section_names
    to null.  */
 
 enum GHS_section_kind
-{ 
+{
   GHS_SECTION_KIND_DEFAULT,
 
   GHS_SECTION_KIND_TEXT,
-  GHS_SECTION_KIND_DATA, 
+  GHS_SECTION_KIND_DATA,
   GHS_SECTION_KIND_RODATA,
   GHS_SECTION_KIND_BSS,
   GHS_SECTION_KIND_SDATA,
diff --git a/gcc/config/v850/v850.md b/gcc/config/v850/v850.md
index 83cc997..21946c1 100644
--- a/gcc/config/v850/v850.md
+++ b/gcc/config/v850/v850.md
@@ -1,5 +1,5 @@
 ;; GCC machine description for NEC V850
-;; Copyright (C) 1996-2024 Free Software Foundation, Inc.
+;; Copyright (C) 1996-2025 Free Software Foundation, Inc.
 ;; Contributed by Jeff Law (law@cygnus.com).
 
 ;; This file is part of GCC.
diff --git a/gcc/config/v850/v850.opt b/gcc/config/v850/v850.opt
index 37601ba..038fc9e 100644
--- a/gcc/config/v850/v850.opt
+++ b/gcc/config/v850/v850.opt
@@ -1,6 +1,6 @@
 ; Options for the NEC V850 port of the compiler.
 
-; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/vax/builtins.md b/gcc/config/vax/builtins.md
index b1b006b..2c06f3f 100644
--- a/gcc/config/vax/builtins.md
+++ b/gcc/config/vax/builtins.md
@@ -1,5 +1,5 @@
 ;; builtin definitions for DEC VAX.
-;; Copyright (C) 2007-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2007-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/vax/constraints.md b/gcc/config/vax/constraints.md
index 74a6384..95ddbde 100644
--- a/gcc/config/vax/constraints.md
+++ b/gcc/config/vax/constraints.md
@@ -1,5 +1,5 @@
 ;; Constraints for the DEC VAX port.
-;; Copyright (C) 2007-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2007-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/vax/elf.h b/gcc/config/vax/elf.h
index d9d4c52..646d75a 100644
--- a/gcc/config/vax/elf.h
+++ b/gcc/config/vax/elf.h
@@ -1,5 +1,5 @@
 /* Target definitions for GNU compiler for VAX using ELF
-   Copyright (C) 2002-2024 Free Software Foundation, Inc.
+   Copyright (C) 2002-2025 Free Software Foundation, Inc.
    Contributed by Matt Thomas <matt@3am-software.com>
 
 This file is part of GCC.
diff --git a/gcc/config/vax/elf.opt b/gcc/config/vax/elf.opt
index 7d38d72..3303562 100644
--- a/gcc/config/vax/elf.opt
+++ b/gcc/config/vax/elf.opt
@@ -1,6 +1,6 @@
 ; VAX ELF options.
 
-; Copyright (C) 2011-2024 Free Software Foundation, Inc.
+; Copyright (C) 2011-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/vax/linux.h b/gcc/config/vax/linux.h
index 56d1a68..1ec1428 100644
--- a/gcc/config/vax/linux.h
+++ b/gcc/config/vax/linux.h
@@ -1,5 +1,5 @@
 /* Definitions for VAX running Linux-based GNU systems with ELF format.
-   Copyright (C) 2007-2024 Free Software Foundation, Inc.
+   Copyright (C) 2007-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/vax/netbsd-elf.h b/gcc/config/vax/netbsd-elf.h
index 7158576..ab6b94c 100644
--- a/gcc/config/vax/netbsd-elf.h
+++ b/gcc/config/vax/netbsd-elf.h
@@ -1,6 +1,6 @@
 /* Definitions of target machine for GNU compiler,
    for NetBSD/vax ELF systems.
-   Copyright (C) 2002-2024 Free Software Foundation, Inc.
+   Copyright (C) 2002-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/vax/openbsd.h b/gcc/config/vax/openbsd.h
index 4dc1f9e..acf9904 100644
--- a/gcc/config/vax/openbsd.h
+++ b/gcc/config/vax/openbsd.h
@@ -1,5 +1,5 @@
 /* Configuration fragment for a VAX OpenBSD target.
-   Copyright (C) 2000-2024 Free Software Foundation, Inc.
+   Copyright (C) 2000-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/vax/openbsd1.h b/gcc/config/vax/openbsd1.h
index 500193c..7de1e6a 100644
--- a/gcc/config/vax/openbsd1.h
+++ b/gcc/config/vax/openbsd1.h
@@ -1,5 +1,5 @@
 /* Configuration fragment for a VAX OpenBSD target.
-   Copyright (C) 2000-2024 Free Software Foundation, Inc.
+   Copyright (C) 2000-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/vax/predicates.md b/gcc/config/vax/predicates.md
index 1caf27d..0acfc93 100644
--- a/gcc/config/vax/predicates.md
+++ b/gcc/config/vax/predicates.md
@@ -1,5 +1,5 @@
 ;; Predicate definitions for DEC VAX.
-;; Copyright (C) 2007-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2007-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/vax/vax-modes.def b/gcc/config/vax/vax-modes.def
index ff218c3..517b8a1 100644
--- a/gcc/config/vax/vax-modes.def
+++ b/gcc/config/vax/vax-modes.def
@@ -1,5 +1,5 @@
 /* VAX extra machine modes.
-   Copyright (C) 2003-2024 Free Software Foundation, Inc.
+   Copyright (C) 2003-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/vax/vax-protos.h b/gcc/config/vax/vax-protos.h
index 8042cc9..ea5a18d 100644
--- a/gcc/config/vax/vax-protos.h
+++ b/gcc/config/vax/vax-protos.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler.  VAX version.
-   Copyright (C) 2000-2024 Free Software Foundation, Inc.
+   Copyright (C) 2000-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/vax/vax.cc b/gcc/config/vax/vax.cc
index 9ea063d..4db503e 100644
--- a/gcc/config/vax/vax.cc
+++ b/gcc/config/vax/vax.cc
@@ -1,5 +1,5 @@
 /* Subroutines for insn-output.cc for VAX.
-   Copyright (C) 1987-2024 Free Software Foundation, Inc.
+   Copyright (C) 1987-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/vax/vax.h b/gcc/config/vax/vax.h
index 3e82e96..d6ba92f 100644
--- a/gcc/config/vax/vax.h
+++ b/gcc/config/vax/vax.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GNU compiler.  VAX version.
-   Copyright (C) 1987-2024 Free Software Foundation, Inc.
+   Copyright (C) 1987-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/vax/vax.md b/gcc/config/vax/vax.md
index c9814e3..bb26d27 100644
--- a/gcc/config/vax/vax.md
+++ b/gcc/config/vax/vax.md
@@ -1,5 +1,5 @@
 ;; Machine description for GNU compiler, VAX Version
-;; Copyright (C) 1987-2024 Free Software Foundation, Inc.
+;; Copyright (C) 1987-2025 Free Software Foundation, Inc.
 
 ;; This file is part of GCC.
 
diff --git a/gcc/config/vax/vax.opt b/gcc/config/vax/vax.opt
index fa2be78..ea3602d 100644
--- a/gcc/config/vax/vax.opt
+++ b/gcc/config/vax/vax.opt
@@ -1,6 +1,6 @@
 ; Options for the VAX port of the compiler.
 
-; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/vax/vax.opt.urls b/gcc/config/vax/vax.opt.urls
index 10bee25..7813b88 100644
--- a/gcc/config/vax/vax.opt.urls
+++ b/gcc/config/vax/vax.opt.urls
@@ -19,5 +19,5 @@ munix
 UrlSuffix(gcc/VAX-Options.html#index-munix)
 
 mlra
-UrlSuffix(gcc/VAX-Options.html#index-mlra-4)
+UrlSuffix(gcc/VAX-Options.html#index-mlra-3)
 
diff --git a/gcc/config/visium/constraints.md b/gcc/config/visium/constraints.md
index ee5f9d6..09f280e 100644
--- a/gcc/config/visium/constraints.md
+++ b/gcc/config/visium/constraints.md
@@ -1,5 +1,5 @@
 ;; Constraint definitions for Visium.
-;; Copyright (C) 2006-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2006-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/visium/elf.h b/gcc/config/visium/elf.h
index 29670b1..ffcfffb 100644
--- a/gcc/config/visium/elf.h
+++ b/gcc/config/visium/elf.h
@@ -1,5 +1,5 @@
 /* ELF-specific defines for Visium.
-   Copyright (C) 2005-2024 Free Software Foundation, Inc.
+   Copyright (C) 2005-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/visium/gr5.md b/gcc/config/visium/gr5.md
index a628672..fab3947 100644
--- a/gcc/config/visium/gr5.md
+++ b/gcc/config/visium/gr5.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for GR5.
-;; Copyright (C) 2013-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2013-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/visium/gr6.md b/gcc/config/visium/gr6.md
index 9f45564..2d4ff5d 100644
--- a/gcc/config/visium/gr6.md
+++ b/gcc/config/visium/gr6.md
@@ -1,5 +1,5 @@
 ;; Scheduling description for GR6.
-;; Copyright (C) 2013-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2013-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/visium/predicates.md b/gcc/config/visium/predicates.md
index 6a681cc..bb1f9a1 100644
--- a/gcc/config/visium/predicates.md
+++ b/gcc/config/visium/predicates.md
@@ -1,5 +1,5 @@
 ;; Predicate definitions for Visium.
-;; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
diff --git a/gcc/config/visium/t-visium b/gcc/config/visium/t-visium
index a9b4ceb..29a199b 100644
--- a/gcc/config/visium/t-visium
+++ b/gcc/config/visium/t-visium
@@ -1,6 +1,6 @@
 # General rules that all visium/ targets must have.
 
-# Copyright (C) 2012-2024 Free Software Foundation, Inc.
+# Copyright (C) 2012-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/visium/visium-modes.def b/gcc/config/visium/visium-modes.def
index deaf7ca..2570918 100644
--- a/gcc/config/visium/visium-modes.def
+++ b/gcc/config/visium/visium-modes.def
@@ -1,5 +1,5 @@
 /* Machine description for Visium.
-   Copyright (C) 2014-2024 Free Software Foundation, Inc.
+   Copyright (C) 2014-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/visium/visium-opts.h b/gcc/config/visium/visium-opts.h
index 064dd57..b67ac0a 100644
--- a/gcc/config/visium/visium-opts.h
+++ b/gcc/config/visium/visium-opts.h
@@ -1,5 +1,5 @@
 /* Definitions for option handling for Visium.
-   Copyright (C) 2005-2024 Free Software Foundation, Inc.
+   Copyright (C) 2005-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/visium/visium-passes.def b/gcc/config/visium/visium-passes.def
index 1d690d1..2c3b079 100644
--- a/gcc/config/visium/visium-passes.def
+++ b/gcc/config/visium/visium-passes.def
@@ -1,5 +1,5 @@
 /* Description of target passes for Visium.
-   Copyright (C) 2018-2024 Free Software Foundation, Inc.
+   Copyright (C) 2018-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/visium/visium-protos.h b/gcc/config/visium/visium-protos.h
index 6bb1ce2..28d78eb 100644
--- a/gcc/config/visium/visium-protos.h
+++ b/gcc/config/visium/visium-protos.h
@@ -1,5 +1,5 @@
 /* Prototypes of target machine for Visium.
-   Copyright (C) 2002-2024 Free Software Foundation, Inc.
+   Copyright (C) 2002-2025 Free Software Foundation, Inc.
    Contributed by C.Nettleton,J.P.Parkes and P.Garbett.
 
    This file is part of GCC.
diff --git a/gcc/config/visium/visium.cc b/gcc/config/visium/visium.cc
index 0368a0f..9c0b900 100644
--- a/gcc/config/visium/visium.cc
+++ b/gcc/config/visium/visium.cc
@@ -1,5 +1,5 @@
 /* Output routines for Visium.
-   Copyright (C) 2002-2024 Free Software Foundation, Inc.
+   Copyright (C) 2002-2025 Free Software Foundation, Inc.
    Contributed by C.Nettleton, J.P.Parkes and P.Garbett.
 
    This file is part of GCC.
@@ -373,6 +373,9 @@ static HOST_WIDE_INT visium_constant_alignment (const_tree, HOST_WIDE_INT);
 #undef  TARGET_HAVE_SPECULATION_SAFE_VALUE
 #define TARGET_HAVE_SPECULATION_SAFE_VALUE speculation_safe_value_not_needed
 
+#undef TARGET_DOCUMENTATION_NAME
+#define TARGET_DOCUMENTATION_NAME "Visium"
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 namespace {
diff --git a/gcc/config/visium/visium.h b/gcc/config/visium/visium.h
index afeb68f..7f69a91 100644
--- a/gcc/config/visium/visium.h
+++ b/gcc/config/visium/visium.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for Visium.
-   Copyright (C) 2002-2024 Free Software Foundation, Inc.
+   Copyright (C) 2002-2025 Free Software Foundation, Inc.
    Contributed by C.Nettleton, J.P.Parkes and P.Garbett.
 
    This file is part of GCC.
@@ -119,7 +119,7 @@
    data area approach is no longer used, these pointers are no longer
    supported.
 
-   The macro and function pointers are described below. 
+   The macro and function pointers are described below.
 
    INIT_EXPANDERS:
 
@@ -1015,7 +1015,7 @@ struct visium_args
     A difficulty is setting the correct instruction parity at run time.
 
 
-    TRAMPOLINE_SIZE 
+    TRAMPOLINE_SIZE
     A C expression for the size in bytes of the trampoline, as an integer. */
 #define TRAMPOLINE_SIZE (visium_cpu == PROCESSOR_GR6 ? 24 : 20)
 
diff --git a/gcc/config/visium/visium.md b/gcc/config/visium/visium.md
index 52f9868..ce6d045 100644
--- a/gcc/config/visium/visium.md
+++ b/gcc/config/visium/visium.md
@@ -1,5 +1,5 @@
 ;; Machine description for Visium.
-;; Copyright (C) 2002-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2002-2025 Free Software Foundation, Inc.
 ;; Contributed by C.Nettleton, J.P.Parkes and P.Garbett.
 
 ;; This file is part of GCC.
diff --git a/gcc/config/visium/visium.opt b/gcc/config/visium/visium.opt
index 031f423..26fd385 100644
--- a/gcc/config/visium/visium.opt
+++ b/gcc/config/visium/visium.opt
@@ -1,5 +1,5 @@
 ; Options for Visium.
-; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/vms/make-crtlmap.awk b/gcc/config/vms/make-crtlmap.awk
index 59026ab..bdc4a31 100644
--- a/gcc/config/vms/make-crtlmap.awk
+++ b/gcc/config/vms/make-crtlmap.awk
@@ -1,5 +1,5 @@
 # Generate the VMS crtl map
-#	Copyright (C) 2011-2024 Free Software Foundation, Inc.
+#	Copyright (C) 2011-2025 Free Software Foundation, Inc.
 
 BEGIN {
     is_first = 1;
diff --git a/gcc/config/vms/t-vms b/gcc/config/vms/t-vms
index 251915c..a537ae6 100644
--- a/gcc/config/vms/t-vms
+++ b/gcc/config/vms/t-vms
@@ -1,4 +1,4 @@
-# Copyright (C) 2009-2024 Free Software Foundation, Inc.
+# Copyright (C) 2009-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/vms/t-vmsnative b/gcc/config/vms/t-vmsnative
index dbe417e..60b8e7b 100644
--- a/gcc/config/vms/t-vmsnative
+++ b/gcc/config/vms/t-vmsnative
@@ -1,4 +1,4 @@
-# Copyright (C) 2010-2024 Free Software Foundation, Inc.
+# Copyright (C) 2010-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/vms/vms-ar.c b/gcc/config/vms/vms-ar.c
index 0b9145d..62537b1 100644
--- a/gcc/config/vms/vms-ar.c
+++ b/gcc/config/vms/vms-ar.c
@@ -1,5 +1,5 @@
 /* VMS archive wrapper.
-   Copyright (C) 2011-2024 Free Software Foundation, Inc.
+   Copyright (C) 2011-2025 Free Software Foundation, Inc.
    Contributed by AdaCore.
 
 This file is part of GCC.
diff --git a/gcc/config/vms/vms-c.cc b/gcc/config/vms/vms-c.cc
index d0620b4..4e35e0e 100644
--- a/gcc/config/vms/vms-c.cc
+++ b/gcc/config/vms/vms-c.cc
@@ -1,5 +1,5 @@
 /* VMS specific, C compiler specific functions.
-   Copyright (C) 2011-2024 Free Software Foundation, Inc.
+   Copyright (C) 2011-2025 Free Software Foundation, Inc.
    Contributed by Tristan Gingold (gingold@adacore.com).
 
 This file is part of GCC.
diff --git a/gcc/config/vms/vms-f.cc b/gcc/config/vms/vms-f.cc
index 1c97260..f422307 100644
--- a/gcc/config/vms/vms-f.cc
+++ b/gcc/config/vms/vms-f.cc
@@ -1,5 +1,5 @@
 /* VMS support needed only by Fortran frontends.
-   Copyright (C) 2012-2024 Free Software Foundation, Inc.
+   Copyright (C) 2012-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/vms/vms-ld.c b/gcc/config/vms/vms-ld.c
index 6ef74f6..d90b22f 100644
--- a/gcc/config/vms/vms-ld.c
+++ b/gcc/config/vms/vms-ld.c
@@ -1,5 +1,5 @@
 /* VMS linker wrapper.
-   Copyright (C) 2011-2024 Free Software Foundation, Inc.
+   Copyright (C) 2011-2025 Free Software Foundation, Inc.
    Contributed by AdaCore
 
 This file is part of GCC.
diff --git a/gcc/config/vms/vms-opts.h b/gcc/config/vms/vms-opts.h
index da88116..a28cc39 100644
--- a/gcc/config/vms/vms-opts.h
+++ b/gcc/config/vms/vms-opts.h
@@ -1,5 +1,5 @@
 /* Definitions for option handling for OpenVMS.
-   Copyright (C) 2012-2024 Free Software Foundation, Inc.
+   Copyright (C) 2012-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/vms/vms-protos.h b/gcc/config/vms/vms-protos.h
index 9e4e75f..136bb6b 100644
--- a/gcc/config/vms/vms-protos.h
+++ b/gcc/config/vms/vms-protos.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine for GCC for VMS.
-   Copyright (C) 2011-2024 Free Software Foundation, Inc.
+   Copyright (C) 2011-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/vms/vms-stdint.h b/gcc/config/vms/vms-stdint.h
index 538e141..5b51c4d 100644
--- a/gcc/config/vms/vms-stdint.h
+++ b/gcc/config/vms/vms-stdint.h
@@ -1,5 +1,5 @@
 /* Definitions for <stdint.h> types on VMS systems.
-   Copyright (C) 2012-2024 Free Software Foundation, Inc.
+   Copyright (C) 2012-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/vms/vms.cc b/gcc/config/vms/vms.cc
index 2fcc673..53b6678 100644
--- a/gcc/config/vms/vms.cc
+++ b/gcc/config/vms/vms.cc
@@ -1,5 +1,5 @@
 /* Definitions of target machine GNU compiler. 32bit VMS version.
-   Copyright (C) 2009-2024 Free Software Foundation, Inc.
+   Copyright (C) 2009-2025 Free Software Foundation, Inc.
    Contributed by Douglas B Rupp (rupp@gnat.com).
 
 This file is part of GCC.
diff --git a/gcc/config/vms/vms.h b/gcc/config/vms/vms.h
index b47609e..6a77148 100644
--- a/gcc/config/vms/vms.h
+++ b/gcc/config/vms/vms.h
@@ -1,5 +1,5 @@
 /* Definitions of target machine GNU compiler. VMS common version.
-   Copyright (C) 2003-2024 Free Software Foundation, Inc.
+   Copyright (C) 2003-2025 Free Software Foundation, Inc.
    Contributed by Douglas B Rupp (rupp@gnat.com).
 
 This file is part of GCC.
diff --git a/gcc/config/vms/vms.opt b/gcc/config/vms/vms.opt
index d407948..4c790e6 100644
--- a/gcc/config/vms/vms.opt
+++ b/gcc/config/vms/vms.opt
@@ -1,4 +1,4 @@
-; Copyright (C) 2009-2024 Free Software Foundation, Inc.
+; Copyright (C) 2009-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/vms/x-vms b/gcc/config/vms/x-vms
index 3a86ec7..2c93bb5 100644
--- a/gcc/config/vms/x-vms
+++ b/gcc/config/vms/x-vms
@@ -1,4 +1,4 @@
-# Copyright (C) 2001-2024 Free Software Foundation, Inc.
+# Copyright (C) 2001-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/vms/xm-vms.h b/gcc/config/vms/xm-vms.h
index 0f1acf8..dccfb63 100644
--- a/gcc/config/vms/xm-vms.h
+++ b/gcc/config/vms/xm-vms.h
@@ -1,6 +1,6 @@
 /* Configuration for GCC for hosting on VMS
    using a Unix style C library.
-   Copyright (C) 1996-2024 Free Software Foundation, Inc.
+   Copyright (C) 1996-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/vx-common.h b/gcc/config/vx-common.h
index d727e85..9e6fe36 100644
--- a/gcc/config/vx-common.h
+++ b/gcc/config/vx-common.h
@@ -1,5 +1,5 @@
-/* Target-independent configuration for VxWorks and VxWorks AE.   
-   Copyright (C) 2005-2024 Free Software Foundation, Inc.
+/* Target-independent configuration for VxWorks and VxWorks AE.
+   Copyright (C) 2005-2025 Free Software Foundation, Inc.
    Contributed by CodeSourcery, LLC.
 
 This file is part of GCC.
diff --git a/gcc/config/vxworks-c.cc b/gcc/config/vxworks-c.cc
index 313453e..a514a5c 100644
--- a/gcc/config/vxworks-c.cc
+++ b/gcc/config/vxworks-c.cc
@@ -1,5 +1,5 @@
 /* C-family target hooks initializer for VxWorks targets.
-   Copyright (C) 2019-2024 Free Software Foundation, Inc.
+   Copyright (C) 2019-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/vxworks-driver.cc b/gcc/config/vxworks-driver.cc
index 3ccefbf..eb66440 100644
--- a/gcc/config/vxworks-driver.cc
+++ b/gcc/config/vxworks-driver.cc
@@ -1,4 +1,4 @@
-/* Copyright (C) 2022-2024 Free Software Foundation, Inc.
+/* Copyright (C) 2022-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/vxworks-dummy.h b/gcc/config/vxworks-dummy.h
index 93c8a7a..516728c 100644
--- a/gcc/config/vxworks-dummy.h
+++ b/gcc/config/vxworks-dummy.h
@@ -1,5 +1,5 @@
 /* Dummy definitions of VxWorks-related macros
-   Copyright (C) 2007-2024 Free Software Foundation, Inc.
+   Copyright (C) 2007-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
@@ -40,9 +40,21 @@ see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
 #define TARGET_VXWORKS_RTP false
 #endif
 
+/* True if offsets between different segments may vary, so we must avoid
+   cross-segment GOT- and PC-relative address computations.  */
+#ifndef TARGET_VXWORKS_VAROFF
+#define TARGET_VXWORKS_VAROFF false
+#endif
+
 /* The symbol that points to an RTP's table of GOTs.  */
 #define VXWORKS_GOTT_BASE (gcc_unreachable (), "")
 
 /* The symbol that holds the index of the current module's GOT in
    VXWORKS_GOTT_BASE.  */
 #define VXWORKS_GOTT_INDEX (gcc_unreachable (), "")
+
+/* True if PIC relies on the GOTT_* symbols above.  As of VxWorks7, they are no
+   longer used.  */
+#ifndef TARGET_VXWORKS_GOTTPIC
+#define TARGET_VXWORKS_GOTTPIC false
+#endif
diff --git a/gcc/config/vxworks-rust.cc b/gcc/config/vxworks-rust.cc
index c62c63b..5b471c2 100644
--- a/gcc/config/vxworks-rust.cc
+++ b/gcc/config/vxworks-rust.cc
@@ -1,5 +1,5 @@
 /* VxWorks support needed only by Rust front-end.
-   Copyright (C) 2022-2024 Free Software Foundation, Inc.
+   Copyright (C) 2022-2025 Free Software Foundation, Inc.
 
 GCC is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
diff --git a/gcc/config/vxworks-smp.opt b/gcc/config/vxworks-smp.opt
index 66cc9f8..0819858 100644
--- a/gcc/config/vxworks-smp.opt
+++ b/gcc/config/vxworks-smp.opt
@@ -1,7 +1,7 @@
 ; Options for VxWorks configurations where smp/!smp variants of the
 ; system libraries are installed in separate locations.
 ;
-; Copyright (C) 2023-2024 Free Software Foundation, Inc.
+; Copyright (C) 2023-2025 Free Software Foundation, Inc.
 ; Contributed by AdaCore.
 ;
 ; This file is part of GCC.
diff --git a/gcc/config/vxworks-stdint.h b/gcc/config/vxworks-stdint.h
index aa69d08..f4d3816 100644
--- a/gcc/config/vxworks-stdint.h
+++ b/gcc/config/vxworks-stdint.h
@@ -1,5 +1,5 @@
 /* Definitions for <stdint.h> types on systems using VxWorks.
-   Copyright (C) 2017-2024 Free Software Foundation, Inc.
+   Copyright (C) 2017-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/vxworks.cc b/gcc/config/vxworks.cc
index fab92d3..3c80f43 100644
--- a/gcc/config/vxworks.cc
+++ b/gcc/config/vxworks.cc
@@ -1,5 +1,5 @@
 /* Common VxWorks target definitions for GNU compiler.
-   Copyright (C) 2007-2024 Free Software Foundation, Inc.
+   Copyright (C) 2007-2025 Free Software Foundation, Inc.
    Contributed by CodeSourcery, Inc.
 
 This file is part of GCC.
@@ -72,9 +72,9 @@ static tree
 vxworks_emutls_var_fields (tree type, tree *name)
 {
   tree field, next_field;
-  
+
   *name = get_identifier ("__tls_var");
-  
+
   field = build_decl (BUILTINS_LOCATION, FIELD_DECL,
 		      get_identifier ("size"), unsigned_type_node);
   DECL_CONTEXT (field) = type;
@@ -106,23 +106,23 @@ vxworks_emutls_var_init (tree var, tree decl, tree tmpl_addr)
 {
   vec<constructor_elt, va_gc> *v;
   vec_alloc (v, 3);
-  
+
   tree type = TREE_TYPE (var);
   tree field = TYPE_FIELDS (type);
-  
+
   constructor_elt elt = {field, fold_convert (TREE_TYPE (field), tmpl_addr)};
   v->quick_push (elt);
-  
+
   field = DECL_CHAIN (field);
   elt.index = field;
   elt.value = build_int_cst (TREE_TYPE (field), 0);
   v->quick_push (elt);
-  
+
   field = DECL_CHAIN (field);
   elt.index = field;
   elt.value = fold_convert (TREE_TYPE (field), DECL_SIZE_UNIT (decl));
   v->quick_push (elt);
-  
+
   return build_constructor (type, v);
 }
 
@@ -155,7 +155,7 @@ vxworks_override_options (void)
      the toolchain user is expected to provide whatever linker level glue is
      required to get things to operate properly.  */
 
-  targetm.have_ctors_dtors = 
+  targetm.have_ctors_dtors =
     TARGET_VXWORKS_HAVE_CTORS_DTORS || HAVE_INITFINI_ARRAY_SUPPORT;
 
   /* PIC is only supported for RTPs.  flags_pic might be < 0 here, in
diff --git a/gcc/config/vxworks.h b/gcc/config/vxworks.h
index 3eb2a70..d2b6025 100644
--- a/gcc/config/vxworks.h
+++ b/gcc/config/vxworks.h
@@ -1,5 +1,5 @@
 /* Common VxWorks target definitions for GNU compiler.
-   Copyright (C) 1999-2024 Free Software Foundation, Inc.
+   Copyright (C) 1999-2025 Free Software Foundation, Inc.
    Contributed by Wind River Systems.
    Rewritten by CodeSourcery, LLC.
 
@@ -159,6 +159,18 @@ extern void vxworks_driver_init (unsigned int *, struct cl_decoded_option **);
    Earlier versions did not, not even for RTPS.  */
 #define VXWORKS_HAVE_TLS TARGET_VXWORKS7
 
+/* RTP segments could be loaded with varying offsets, so cross-segment offsets
+   could not be assumed to be constant.  This rules out some PC- and
+   GOT-relative addressing.  */
+#undef TARGET_VXWORKS_VAROFF
+#define TARGET_VXWORKS_VAROFF (!TARGET_VXWORKS7 && TARGET_VXWORKS_RTP)
+
+/* GOTT_BASE and GOTT_INDEX symbols are only used by some ports up to VxWorks6.
+   This macro is only used by i386 so far.  Other ports seem to keep on using
+   GOTTPIC from VxWorks7 on, but they don't test this macro.  */
+#undef TARGET_VXWORKS_GOTTPIC
+#define TARGET_VXWORKS_GOTTPIC (!TARGET_VXWORKS7)
+
 /* On Vx6 and previous, the libraries to pick up depends on the architecture,
    so cannot be defined for all archs at once.  On Vx7, a VSB is always needed
    and its structure is fixed and does not depend on the arch.  We can thus
diff --git a/gcc/config/vxworks.opt b/gcc/config/vxworks.opt
index 903d0fc..8c81197 100644
--- a/gcc/config/vxworks.opt
+++ b/gcc/config/vxworks.opt
@@ -1,6 +1,6 @@
 ; Processor-independent options for VxWorks.
 ;
-; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ; Contributed by CodeSourcery, LLC.
 ;
 ; This file is part of GCC.
diff --git a/gcc/config/vxworksae.h b/gcc/config/vxworksae.h
index b95f22d..e82eaa5 100644
--- a/gcc/config/vxworksae.h
+++ b/gcc/config/vxworksae.h
@@ -1,5 +1,5 @@
 /* Common VxWorks AE target definitions for GNU compiler.
-   Copyright (C) 2004-2024 Free Software Foundation, Inc.
+   Copyright (C) 2004-2025 Free Software Foundation, Inc.
    Contributed by CodeSourcery, LLC.
 
 This file is part of GCC.
@@ -45,7 +45,7 @@ along with GCC; see the file COPYING3.  If not see
 #undef VXWORKS_LINK_SPEC
 #define VXWORKS_LINK_SPEC	\
   "-r %{v:-V}"
- 
+
 #undef VXWORKS_LIBGCC_SPEC
 #define VXWORKS_LIBGCC_SPEC	\
   "-lgcc"
diff --git a/gcc/config/winnt-c.cc b/gcc/config/winnt-c.cc
index c7306cf..bb7d892 100644
--- a/gcc/config/winnt-c.cc
+++ b/gcc/config/winnt-c.cc
@@ -1,5 +1,5 @@
 /* Default C-family target hooks initializer.
-   Copyright (C) 2013-2024 Free Software Foundation, Inc.
+   Copyright (C) 2013-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/winnt-rust.cc b/gcc/config/winnt-rust.cc
index 0f57469..50849d9 100644
--- a/gcc/config/winnt-rust.cc
+++ b/gcc/config/winnt-rust.cc
@@ -1,5 +1,5 @@
 /* Windows support needed only by Rust front-end.
-   Copyright (C) 2022-2024 Free Software Foundation, Inc.
+   Copyright (C) 2022-2025 Free Software Foundation, Inc.
 
 GCC is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
diff --git a/gcc/config/xtensa/constraints.md b/gcc/config/xtensa/constraints.md
index d855fb8..77c9571 100644
--- a/gcc/config/xtensa/constraints.md
+++ b/gcc/config/xtensa/constraints.md
@@ -1,5 +1,5 @@
 ;; Constraint definitions for Xtensa.
-;; Copyright (C) 2006-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2006-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
@@ -32,7 +32,7 @@
   General-purpose AR registers for indirect sibling calls, @code{a2}-
   @code{a8}.")
 
-(define_register_constraint "d" "TARGET_DENSITY ? AR_REGS: NO_REGS"
+(define_register_constraint "d" "TARGET_DENSITY ? AR_REGS : NO_REGS"
  "@internal
   All AR registers, including sp, but only if the Xtensa Code Density
   Option is configured.")
@@ -53,7 +53,7 @@
   General-purpose AR registers, but only if the Xtensa Sign Extend
   Option is configured.")
 
-(define_register_constraint "C" "TARGET_MUL16 ? GR_REGS: NO_REGS"
+(define_register_constraint "C" "TARGET_MUL16 ? GR_REGS : NO_REGS"
  "@internal
   General-purpose AR registers, but only if the Xtensa 16-Bit Integer
   Multiply Option is configured.")
@@ -63,7 +63,7 @@
   General-purpose AR registers, but only if the Xtensa Code Density
   Option is configured.")
 
-(define_register_constraint "W" "TARGET_CONST16 ? GR_REGS: NO_REGS"
+(define_register_constraint "W" "TARGET_CONST16 ? GR_REGS : NO_REGS"
  "@internal
   General-purpose AR registers, but only if the Xtensa Const16
   Option is configured.")
diff --git a/gcc/config/xtensa/elf.h b/gcc/config/xtensa/elf.h
index aa14f9d..807a7de 100644
--- a/gcc/config/xtensa/elf.h
+++ b/gcc/config/xtensa/elf.h
@@ -1,6 +1,6 @@
 /* Xtensa/Elf configuration.
    Derived from the configuration for GCC for Intel i386 running Linux.
-   Copyright (C) 2001-2024 Free Software Foundation, Inc.
+   Copyright (C) 2001-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/xtensa/elf.opt b/gcc/config/xtensa/elf.opt
index 0ce6a62..0f7fe76 100644
--- a/gcc/config/xtensa/elf.opt
+++ b/gcc/config/xtensa/elf.opt
@@ -1,6 +1,6 @@
 ; Xtensa ELF (bare metal) options.
 
-; Copyright (C) 2011-2024 Free Software Foundation, Inc.
+; Copyright (C) 2011-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/xtensa/linux.h b/gcc/config/xtensa/linux.h
index 03ec8e7..ec544d9 100644
--- a/gcc/config/xtensa/linux.h
+++ b/gcc/config/xtensa/linux.h
@@ -1,6 +1,6 @@
 /* Xtensa Linux configuration.
    Derived from the configuration for GCC for Intel i386 running Linux.
-   Copyright (C) 2001-2024 Free Software Foundation, Inc.
+   Copyright (C) 2001-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/xtensa/predicates.md b/gcc/config/xtensa/predicates.md
index e676fa4..9aeaba6 100644
--- a/gcc/config/xtensa/predicates.md
+++ b/gcc/config/xtensa/predicates.md
@@ -1,5 +1,5 @@
 ;; Predicate definitions for Xtensa.
-;; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;;
 ;; This file is part of GCC.
 ;;
@@ -183,19 +183,6 @@
   (and (match_code "const_int")
        (match_test "xtensa_mem_offset (INTVAL (op), SFmode)")))
 
-(define_predicate "reload_operand"
-  (match_code "mem")
-{
-  const_rtx addr = XEXP (op, 0);
-  if (REG_P (addr))
-    return REGNO (addr) == A1_REG;
-  if (GET_CODE (addr) == PLUS)
-    return REG_P (XEXP (addr, 0))
-	   && REGNO (XEXP (addr, 0)) == A1_REG
-	   && CONST_INT_P (XEXP (addr, 1));
-  return false;
-})
-
 (define_predicate "branch_operator"
   (match_code "eq,ne,lt,ge"))
 
diff --git a/gcc/config/xtensa/t-xtensa b/gcc/config/xtensa/t-xtensa
index 4667920..98f98e0 100644
--- a/gcc/config/xtensa/t-xtensa
+++ b/gcc/config/xtensa/t-xtensa
@@ -1,4 +1,4 @@
-# Copyright (C) 2002-2024 Free Software Foundation, Inc.
+# Copyright (C) 2002-2025 Free Software Foundation, Inc.
 #
 # This file is part of GCC.
 #
diff --git a/gcc/config/xtensa/uclinux.h b/gcc/config/xtensa/uclinux.h
index 062e964..13c394c 100644
--- a/gcc/config/xtensa/uclinux.h
+++ b/gcc/config/xtensa/uclinux.h
@@ -1,6 +1,6 @@
 /* Xtensa uClinux configuration.
    Derived from the configuration for GCC for Intel i386 running Linux.
-   Copyright (C) 2001-2024 Free Software Foundation, Inc.
+   Copyright (C) 2001-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/xtensa/uclinux.opt b/gcc/config/xtensa/uclinux.opt
index abd6dc9..0df571e 100644
--- a/gcc/config/xtensa/uclinux.opt
+++ b/gcc/config/xtensa/uclinux.opt
@@ -1,6 +1,6 @@
 ; Xtensa uClinux options.
 
-; Copyright (C) 2015-2024 Free Software Foundation, Inc.
+; Copyright (C) 2015-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;
diff --git a/gcc/config/xtensa/xtensa-dynconfig.cc b/gcc/config/xtensa/xtensa-dynconfig.cc
index 3bd2760..1855d55 100644
--- a/gcc/config/xtensa/xtensa-dynconfig.cc
+++ b/gcc/config/xtensa/xtensa-dynconfig.cc
@@ -1,5 +1,5 @@
 /* Xtensa configuration settings loader.
-   Copyright (C) 2022-2024 Free Software Foundation, Inc.
+   Copyright (C) 2022-2025 Free Software Foundation, Inc.
 
    This file is part of GCC.
 
diff --git a/gcc/config/xtensa/xtensa-opts.h b/gcc/config/xtensa/xtensa-opts.h
index 4b38f49..412921a 100644
--- a/gcc/config/xtensa/xtensa-opts.h
+++ b/gcc/config/xtensa/xtensa-opts.h
@@ -1,5 +1,5 @@
 /* Definitions for option handling for Xtensa.
-   Copyright (C) 2023-2024 Free Software Foundation, Inc.
+   Copyright (C) 2023-2025 Free Software Foundation, Inc.
 
 This file is part of GCC.
 
diff --git a/gcc/config/xtensa/xtensa-protos.h b/gcc/config/xtensa/xtensa-protos.h
index 3646cea..1f5dcf5 100644
--- a/gcc/config/xtensa/xtensa-protos.h
+++ b/gcc/config/xtensa/xtensa-protos.h
@@ -1,5 +1,5 @@
 /* Prototypes of target machine for GNU compiler for Xtensa.
-   Copyright (C) 2001-2024 Free Software Foundation, Inc.
+   Copyright (C) 2001-2025 Free Software Foundation, Inc.
    Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
 
 This file is part of GCC.
diff --git a/gcc/config/xtensa/xtensa.cc b/gcc/config/xtensa/xtensa.cc
index 43b1332..b75cec1 100644
--- a/gcc/config/xtensa/xtensa.cc
+++ b/gcc/config/xtensa/xtensa.cc
@@ -1,5 +1,5 @@
 /* Subroutines for insn-output.cc for Tensilica's Xtensa architecture.
-   Copyright (C) 2001-2024 Free Software Foundation, Inc.
+   Copyright (C) 2001-2025 Free Software Foundation, Inc.
    Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
 
 This file is part of GCC.
@@ -48,7 +48,6 @@ along with GCC; see the file COPYING3.  If not see
 #include "alias.h"
 #include "explow.h"
 #include "expr.h"
-#include "reload.h"
 #include "langhooks.h"
 #include "gimplify.h"
 #include "builtins.h"
@@ -160,6 +159,10 @@ static void xtensa_asm_trampoline_template (FILE *);
 static void xtensa_trampoline_init (rtx, tree, rtx);
 static bool xtensa_output_addr_const_extra (FILE *, rtx);
 static bool xtensa_cannot_force_const_mem (machine_mode, rtx);
+static machine_mode xtensa_promote_function_mode (const_tree,
+						  machine_mode,
+						  int *, const_tree,
+						  int);
 
 static reg_class_t xtensa_preferred_reload_class (rtx, reg_class_t);
 static reg_class_t xtensa_preferred_output_reload_class (rtx, reg_class_t);
@@ -197,6 +200,9 @@ static void xtensa_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
 				    tree function);
 
 static rtx xtensa_delegitimize_address (rtx);
+static reg_class_t xtensa_ira_change_pseudo_allocno_class (int, reg_class_t,
+							   reg_class_t);
+static HARD_REG_SET xtensa_zero_call_used_regs (HARD_REG_SET);
 
 
 
@@ -233,9 +239,7 @@ static rtx xtensa_delegitimize_address (rtx);
 #define TARGET_EXPAND_BUILTIN_VA_START xtensa_va_start
 
 #undef TARGET_PROMOTE_FUNCTION_MODE
-#define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
-#undef TARGET_PROMOTE_PROTOTYPES
-#define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
+#define TARGET_PROMOTE_FUNCTION_MODE xtensa_promote_function_mode
 
 #undef TARGET_RETURN_IN_MEMORY
 #define TARGET_RETURN_IN_MEMORY xtensa_return_in_memory
@@ -363,6 +367,15 @@ static rtx xtensa_delegitimize_address (rtx);
 #undef TARGET_MAX_ANCHOR_OFFSET
 #define TARGET_MAX_ANCHOR_OFFSET 1020
 
+#undef TARGET_DIFFERENT_ADDR_DISPLACEMENT_P
+#define TARGET_DIFFERENT_ADDR_DISPLACEMENT_P hook_bool_void_true
+
+#undef TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS
+#define TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS xtensa_ira_change_pseudo_allocno_class
+
+#undef TARGET_ZERO_CALL_USED_REGS
+#define TARGET_ZERO_CALL_USED_REGS xtensa_zero_call_used_regs
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 
@@ -410,12 +423,13 @@ xtensa_uimm8x4 (HOST_WIDE_INT v)
 }
 
 
-static bool
-xtensa_b4const (HOST_WIDE_INT v)
+bool
+xtensa_b4const_or_zero (HOST_WIDE_INT v)
 {
   switch (v)
     {
     case -1:
+    case 0:
     case 1:
     case 2:
     case 3:
@@ -438,15 +452,6 @@ xtensa_b4const (HOST_WIDE_INT v)
 
 
 bool
-xtensa_b4const_or_zero (HOST_WIDE_INT v)
-{
-  if (v == 0)
-    return true;
-  return xtensa_b4const (v);
-}
-
-
-bool
 xtensa_b4constu (HOST_WIDE_INT v)
 {
   switch (v)
@@ -1479,8 +1484,7 @@ xtensa_copy_incoming_a7 (rtx opnd)
   if (mode == DFmode || mode == DImode)
     emit_insn (gen_movsi_internal (gen_rtx_SUBREG (SImode, tmp, 0),
 				   gen_rtx_REG (SImode, A7_REG - 1)));
-  entry_insns = get_insns ();
-  end_sequence ();
+  entry_insns = end_sequence ();
 
   if (cfun->machine->vararg_a7)
     {
@@ -1641,8 +1645,7 @@ xtensa_expand_block_set_libcall (rtx dst_mem,
 		     GEN_INT (value), SImode,
 		     GEN_INT (bytes), SImode);
 
-  seq = get_insns ();
-  end_sequence ();
+  seq = end_sequence ();
 
   return seq;
 }
@@ -1703,8 +1706,7 @@ xtensa_expand_block_set_unrolled_loop (rtx dst_mem,
     }
   while (bytes > 0);
 
-  seq = get_insns ();
-  end_sequence ();
+  seq = end_sequence ();
 
   return seq;
 }
@@ -1785,8 +1787,7 @@ xtensa_expand_block_set_small_loop (rtx dst_mem,
   emit_insn (gen_addsi3 (dst, dst, GEN_INT (align)));
   emit_cmp_and_jump_insns (dst, end, NE, const0_rtx, SImode, true, label);
 
-  seq = get_insns ();
-  end_sequence ();
+  seq = end_sequence ();
 
   return seq;
 }
@@ -2464,8 +2465,7 @@ xtensa_call_tls_desc (rtx sym, rtx *retp)
   emit_move_insn (a_io, arg);
   call_insn = emit_call_insn (gen_tls_call (a_io, fn, sym, const1_rtx));
   use_reg (&CALL_INSN_FUNCTION_USAGE (call_insn), a_io);
-  insns = get_insns ();
-  end_sequence ();
+  insns = end_sequence ();
 
   *retp = a_io;
   return insns;
@@ -3045,6 +3045,8 @@ xtensa_modes_tieable_p (machine_mode mode1, machine_mode mode2)
    'K'  CONST_INT, print number of bits in mask for EXTUI
    'R'  CONST_INT, print (X & 0x1f)
    'L'  CONST_INT, print ((32 - X) & 0x1f)
+   'U', CONST_DOUBLE:SF, print (REAL_EXP (rval) - 1)
+   'V', CONST_DOUBLE:SF, print (1 - REAL_EXP (rval))
    'D'  REG, print second register of double-word register operand
    'N'  MEM, print address of next word following a memory operand
    'v'  MEM, if memory reference is volatile, output a MEMW before it
@@ -3141,6 +3143,20 @@ print_operand (FILE *file, rtx x, int letter)
 	output_operand_lossage ("invalid %%R value");
       break;
 
+    case 'U':
+      if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
+	fprintf (file, "%d", REAL_EXP (CONST_DOUBLE_REAL_VALUE (x)) - 1);
+      else
+	output_operand_lossage ("invalid %%U value");
+      break;
+
+    case 'V':
+      if (CONST_DOUBLE_P (x) && GET_MODE (x) == SFmode)
+	fprintf (file, "%d", 1 - REAL_EXP (CONST_DOUBLE_REAL_VALUE (x)));
+      else
+	output_operand_lossage ("invalid %%V value");
+      break;
+
     case 'x':
       if (CONST_INT_P (x))
 	printx (file, INTVAL (x));
@@ -4427,17 +4443,25 @@ static int
 xtensa_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
 			   reg_class_t from, reg_class_t to)
 {
-  if (from == to && from != BR_REGS && to != BR_REGS)
+  /* If both are equal (except for BR_REGS) or belong to AR_REGS,
+     the cost is 2 (the default value).  */
+  if ((from == to && from != BR_REGS && to != BR_REGS)
+      || (reg_class_subset_p (from, AR_REGS)
+	  && reg_class_subset_p (to, AR_REGS)))
     return 2;
-  else if (reg_class_subset_p (from, AR_REGS)
-	   && reg_class_subset_p (to, AR_REGS))
+
+  /* The cost between AR_REGS and FR_REGS is 2 (the default value).  */
+  if ((reg_class_subset_p (from, AR_REGS) && to == FP_REGS)
+      || (from == FP_REGS && reg_class_subset_p (to, AR_REGS)))
     return 2;
-  else if (reg_class_subset_p (from, AR_REGS) && to == ACC_REG)
-    return 3;
-  else if (from == ACC_REG && reg_class_subset_p (to, AR_REGS))
+
+  if ((reg_class_subset_p (from, AR_REGS) && to == ACC_REG)
+      || (from == ACC_REG && reg_class_subset_p (to, AR_REGS)))
     return 3;
-  else
-    return 10;
+
+  /* Otherwise, spills to stack (because greater than 2x the default
+     MEMORY_MOVE_COST).  */
+  return 10;
 }
 
 /* Compute a (partial) cost for rtx X.  Return true if the complete
@@ -4480,7 +4504,8 @@ xtensa_rtx_costs (rtx x, machine_mode mode, int outer_code,
 	    }
 	  break;
 	case COMPARE:
-	  if ((INTVAL (x) == 0) || xtensa_b4const (INTVAL (x)))
+	  if (xtensa_b4const_or_zero (INTVAL (x))
+	      || xtensa_b4constu (INTVAL (x)))
 	    {
 	      *total = 0;
 	      return true;
@@ -4771,6 +4796,19 @@ xtensa_insn_cost (rtx_insn *insn, bool speed)
   return pattern_cost (PATTERN (insn), speed);
 }
 
+/* Worker function for TARGET_PROMOTE_FUNCTION_MODE.  */
+
+static machine_mode
+xtensa_promote_function_mode (const_tree type, machine_mode mode,
+			      int *punsignedp, const_tree, int)
+{
+  if (GET_MODE_CLASS (mode) == MODE_INT
+      && GET_MODE_SIZE (mode) < GET_MODE_SIZE (SImode))
+    return SImode;
+
+  return promote_mode (type, mode, punsignedp);
+}
+
 /* Worker function for TARGET_RETURN_IN_MEMORY.  */
 
 static bool
@@ -5425,4 +5463,72 @@ xtensa_delegitimize_address (rtx op)
   return op;
 }
 
+/* Implement TARGET_IRA_CHANGE_PSEUDO_ALLOCNO_CLASS, in order to tell
+   the register allocator to avoid using ALL_REGS rclass.  */
+
+static reg_class_t
+xtensa_ira_change_pseudo_allocno_class (int regno, reg_class_t allocno_class,
+					reg_class_t best_class)
+{
+  if (allocno_class != ALL_REGS)
+    return allocno_class;
+
+  if (best_class != ALL_REGS)
+    return best_class;
+
+  return FLOAT_MODE_P (PSEUDO_REGNO_MODE (regno)) ? FP_REGS : AR_REGS;
+}
+
+/* Implement TARGET_ZERO_CALL_USED_REGS.  */
+
+static HARD_REG_SET
+xtensa_zero_call_used_regs (HARD_REG_SET selected_regs)
+{
+  unsigned int regno;
+  int zeroed_regno = -1;
+  hard_reg_set_iterator hrsi;
+  rtvec argvec, convec;
+
+  EXECUTE_IF_SET_IN_HARD_REG_SET (selected_regs, 1, regno, hrsi)
+    {
+      if (GP_REG_P (regno))
+	{
+	  emit_move_insn (gen_rtx_REG (SImode, regno), const0_rtx);
+	  if (zeroed_regno < 0)
+	    zeroed_regno = regno;
+	  continue;
+	}
+      if (TARGET_BOOLEANS && BR_REG_P (regno))
+	{
+	  gcc_assert (zeroed_regno >= 0);
+	  argvec = rtvec_alloc (1);
+	  RTVEC_ELT (argvec, 0) = gen_rtx_REG (SImode, zeroed_regno);
+	  convec = rtvec_alloc (1);
+	  RTVEC_ELT (convec, 0) = gen_rtx_ASM_INPUT (SImode, "r");
+	  emit_insn (gen_rtx_ASM_OPERANDS (VOIDmode, "wsr\t%0, BR",
+					   "", 0, argvec, convec,
+					   rtvec_alloc (0),
+					   UNKNOWN_LOCATION));
+	  continue;
+	}
+      if (TARGET_HARD_FLOAT && FP_REG_P (regno))
+	{
+	  gcc_assert (zeroed_regno >= 0);
+	  emit_move_insn (gen_rtx_REG (SFmode, regno),
+			  gen_rtx_REG (SFmode, zeroed_regno));
+	  continue;
+	}
+      if (TARGET_MAC16 && ACC_REG_P (regno))
+	{
+	  gcc_assert (zeroed_regno >= 0);
+	  emit_move_insn (gen_rtx_REG (SImode, regno),
+			  gen_rtx_REG (SImode, zeroed_regno));
+	  continue;
+	}
+      CLEAR_HARD_REG_BIT (selected_regs, regno);
+    }
+
+  return selected_regs;
+}
+
 #include "gt-xtensa.h"
diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h
index 8680234..a8a0565 100644
--- a/gcc/config/xtensa/xtensa.h
+++ b/gcc/config/xtensa/xtensa.h
@@ -1,5 +1,5 @@
 /* Definitions of Tensilica's Xtensa target machine for GNU compiler.
-   Copyright (C) 2001-2024 Free Software Foundation, Inc.
+   Copyright (C) 2001-2025 Free Software Foundation, Inc.
    Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
 
 This file is part of GCC.
@@ -36,6 +36,7 @@ along with GCC; see the file COPYING3.  If not see
 #define TARGET_MINMAX		XCHAL_HAVE_MINMAX
 #define TARGET_SEXT		XCHAL_HAVE_SEXT
 #define TARGET_CLAMPS		XCHAL_HAVE_CLAMPS
+#define TARGET_DEPBITS		XCHAL_HAVE_DEPBITS
 #define TARGET_BOOLEANS		XCHAL_HAVE_BOOLEANS
 #define TARGET_HARD_FLOAT	XCHAL_HAVE_FP
 #define TARGET_HARD_FLOAT_DIV	XCHAL_HAVE_FP_DIV
diff --git a/gcc/config/xtensa/xtensa.md b/gcc/config/xtensa/xtensa.md
index f19e1fd..029be99 100644
--- a/gcc/config/xtensa/xtensa.md
+++ b/gcc/config/xtensa/xtensa.md
@@ -1,5 +1,5 @@
 ;; GCC machine description for Tensilica's Xtensa architecture.
-;; Copyright (C) 2001-2024 Free Software Foundation, Inc.
+;; Copyright (C) 2001-2025 Free Software Foundation, Inc.
 ;; Contributed by Bob Wilson (bwilson@tensilica.com) at Tensilica.
 
 ;; This file is part of GCC.
@@ -41,6 +41,8 @@
   UNSPEC_LSETUP_START
   UNSPEC_LSETUP_END
   UNSPEC_FRAME_BLOCKAGE
+  UNSPEC_CEIL
+  UNSPEC_FLOOR
 ])
 
 (define_c_enum "unspecv" [
@@ -103,6 +105,11 @@
 (define_code_attr m_float [(float "float") (unsigned_float "ufloat")])
 (define_code_attr s_float [(float "") (unsigned_float "uns")])
 
+;; This iterator and attribute allow FP-to-integer rounding of two types
+;; to be generated from one template.
+(define_int_iterator ANY_ROUND [UNSPEC_CEIL UNSPEC_FLOOR])
+(define_int_attr m_round [(UNSPEC_CEIL "ceil") (UNSPEC_FLOOR "floor")])
+
 
 ;; Attributes.
 
@@ -1007,7 +1014,7 @@
    (set_attr "length"	"3")])
 
 
-;; Field extract instructions.
+;; Field extract and insert instructions.
 
 (define_expand "extvsi"
   [(set (match_operand:SI 0 "register_operand" "")
@@ -1109,17 +1116,17 @@
 		      (const_int 6)))])
 
 (define_insn_and_split "*extzvsi-1bit_addsubx"
-  [(set (match_operand:SI 0 "register_operand" "=a")
+  [(set (match_operand:SI 0 "register_operand" "=&a")
 	(match_operator:SI 5 "addsub_operator"
 		[(and:SI (match_operator:SI 6 "logical_shift_operator"
-				[(match_operand:SI 1 "register_operand" "r")
+				[(match_operand:SI 1 "register_operand" "r0")
 				 (match_operand:SI 3 "const_int_operand" "i")])
 			 (match_operand:SI 4 "const_int_operand" "i"))
 		 (match_operand:SI 2 "register_operand" "r")]))]
   "TARGET_ADDX
    && IN_RANGE (exact_log2 (INTVAL (operands[4])), 1, 3)"
   "#"
-  "&& 1"
+  "&& reload_completed"
   [(set (match_dup 0)
 	(zero_extract:SI (match_dup 1)
 			 (const_int 1)
@@ -1141,6 +1148,25 @@
    (set_attr "mode"	"SI")
    (set_attr "length"	"6")])
 
+(define_insn "insvsi"
+  [(set (zero_extract:SI (match_operand:SI 0 "register_operand" "+a")
+			 (match_operand:SI 1 "extui_fldsz_operand" "i")
+			 (match_operand:SI 2 "const_int_operand" "i"))
+	(match_operand:SI 3 "register_operand" "r"))]
+  "TARGET_DEPBITS"
+{
+  int shift;
+  if (BITS_BIG_ENDIAN)
+    shift = (32 - (INTVAL (operands[1]) + INTVAL (operands[2]))) & 0x1f;
+  else
+    shift = INTVAL (operands[2]) & 0x1f;
+  operands[2] = GEN_INT (shift);
+  return "depbits\t%0, %3, %2, %1";
+}
+  [(set_attr "type"	"arith")
+   (set_attr "mode"	"SI")
+   (set_attr "length"	"3")])
+
 
 ;; Conversions.
 
@@ -1168,12 +1194,7 @@
 	(any_fix:SI (mult:SF (match_operand:SF 1 "register_operand" "f")
 			     (match_operand:SF 2 "fix_scaling_operand" "F"))))]
   "TARGET_HARD_FLOAT"
-{
-  static char result[64];
-  sprintf (result, "<m_fix>.s\t%%0, %%1, %d",
-	   REAL_EXP (CONST_DOUBLE_REAL_VALUE (operands[2])) - 1);
-  return result;
-}
+  "<m_fix>.s\t%0, %1, %U2"
   [(set_attr "type"	"fconv")
    (set_attr "mode"	"SF")
    (set_attr "length"	"3")])
@@ -1192,12 +1213,36 @@
 	(mult:SF (any_float:SF (match_operand:SI 1 "register_operand" "a"))
 		 (match_operand:SF 2 "float_scaling_operand" "F")))]
   "TARGET_HARD_FLOAT"
-{
-  static char result[64];
-  sprintf (result, "<m_float>.s\t%%0, %%1, %d",
-	   1 - REAL_EXP (CONST_DOUBLE_REAL_VALUE (operands[2])));
-  return result;
-}
+  "<m_float>.s\t%0, %1, %V2"
+  [(set_attr "type"	"fconv")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+(define_insn "l<m_round>sfsi2"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(unspec:SI [(match_operand:SF 1 "register_operand" "f")] ANY_ROUND))]
+  "TARGET_HARD_FLOAT"
+  "<m_round>.s\t%0, %1, 0"
+  [(set_attr "type"	"fconv")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+(define_insn "*l<m_round>sfsi2_2x"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(unspec:SI [(plus:SF (match_operand:SF 1 "register_operand" "f")
+			     (match_dup 1))] ANY_ROUND))]
+  "TARGET_HARD_FLOAT"
+  "<m_round>.s\t%0, %1, 1"
+  [(set_attr "type"	"fconv")
+   (set_attr "mode"	"SF")
+   (set_attr "length"	"3")])
+
+(define_insn "*l<m_round>sfsi2_scaled"
+  [(set (match_operand:SI 0 "register_operand" "=a")
+	(unspec:SI [(mult:SF (match_operand:SF 1 "register_operand" "f")
+			     (match_operand:SF 2 "fix_scaling_operand" "F"))] ANY_ROUND))]
+  "TARGET_HARD_FLOAT"
+  "<m_round>.s\t%0, %1, %U2"
   [(set_attr "type"	"fconv")
    (set_attr "mode"	"SF")
    (set_attr "length"	"3")])
@@ -1279,15 +1324,13 @@
 })
 
 (define_insn "movsi_internal"
-  [(set (match_operand:SI 0 "nonimmed_operand" "=D,D,D,a,U,D,R,R,a,q,a,a,W,a,*a,*A")
-	(match_operand:SI 1 "move_operand" "M,D,d,U,r,R,D,d,r,r,I,Y,i,T,*A,*r"))]
+  [(set (match_operand:SI 0 "nonimmed_operand" "=D,D,D,D,R,R,a,q,a,a,W,a,a,U,*a,*A")
+	(match_operand:SI 1 "move_operand" "M,D,d,R,D,d,r,r,I,Y,i,T,U,r,*A,*r"))]
   "xtensa_valid_move (SImode, operands)"
   "@
    movi.n\t%0, %x1
    mov.n\t%0, %1
    mov.n\t%0, %1
-   %v1l32i\t%0, %1
-   %v0s32i\t%1, %0
    %v1l32i.n\t%0, %1
    %v0s32i.n\t%1, %0
    %v0s32i.n\t%1, %0
@@ -1297,11 +1340,13 @@
    movi\t%0, %1
    const16\t%0, %t1\;const16\t%0, %b1
    %v1l32r\t%0, %1
+   %v1l32i\t%0, %1
+   %v0s32i\t%1, %0
    rsr\t%0, ACCLO
    wsr\t%1, ACCLO"
-  [(set_attr "type"	"move,move,move,load,store,load,store,store,move,move,move,move,move,load,rsr,wsr")
+  [(set_attr "type"	"move,move,move,load,store,store,move,move,move,move,move,load,load,store,rsr,wsr")
    (set_attr "mode"	"SI")
-   (set_attr "length"	"2,2,2,3,3,2,2,2,3,3,3,3,6,3,3,3")])
+   (set_attr "length"	"2,2,2,2,2,2,3,3,3,3,6,3,3,3,3,3")])
 
 (define_split
   [(set (match_operand:SHI 0 "register_operand")
@@ -1453,7 +1498,7 @@
 })
 
 (define_insn "movsf_internal"
-  [(set (match_operand:SF 0 "nonimmed_operand" "=f,f,^U,D,a,D,R,a,f,a,a,W,a,U")
+  [(set (match_operand:SF 0 "nonimmed_operand" "=f,f,U,D,a,D,R,a,f,a,a,W,a,U")
 	(match_operand:SF 1 "move_operand" "f,^U,f,d,T,R,d,r,r,f,Y,iF,U,r"))]
   "((register_operand (operands[0], SFmode)
      || register_operand (operands[1], SFmode))
@@ -3319,36 +3364,6 @@
 				    (const_int 8)
 				    (const_int 9))))])
 
-(define_peephole2
-  [(set (match_operand:SI 0 "register_operand")
-	(match_operand:SI 6 "reload_operand"))
-   (set (match_operand:SI 1 "register_operand")
-	(match_operand:SI 7 "reload_operand"))
-   (set (match_operand:SF 2 "register_operand")
-	(match_operand:SF 4 "register_operand"))
-   (set (match_operand:SF 3 "register_operand")
-	(match_operand:SF 5 "register_operand"))]
-  "REGNO (operands[0]) == REGNO (operands[4])
-   && REGNO (operands[1]) == REGNO (operands[5])
-   && peep2_reg_dead_p (4, operands[0])
-   && peep2_reg_dead_p (4, operands[1])"
-  [(set (match_dup 2)
-	(match_dup 6))
-   (set (match_dup 3)
-	(match_dup 7))]
-{
-  HARD_REG_SET regs;
-  int i;
-  CLEAR_HARD_REG_SET (regs);
-  for (i = 0; i <= 3; ++i)
-    if (TEST_HARD_REG_BIT (regs, REGNO (operands[i])))
-      FAIL;
-    else
-      SET_HARD_REG_BIT (regs, REGNO (operands[i]));
-  operands[6] = gen_rtx_MEM (SFmode, XEXP (operands[6], 0));
-  operands[7] = gen_rtx_MEM (SFmode, XEXP (operands[7], 0));
-})
-
 (define_split
   [(clobber (match_operand 0 "register_operand"))]
   "HARD_REGISTER_P (operands[0])
@@ -3434,49 +3449,3 @@ FALLTHRU:;
   operands[1] = GEN_INT (imm0);
   operands[2] = GEN_INT (imm1);
 })
-
-(define_peephole2
-  [(set (match_operand 0 "register_operand")
-	(match_operand 1 "register_operand"))]
-  "REG_NREGS (operands[0]) == 1 && GP_REG_P (REGNO (operands[0]))
-   && REG_NREGS (operands[1]) == 1 && GP_REG_P (REGNO (operands[1]))
-   && peep2_reg_dead_p (1, operands[1])"
-  [(const_int 0)]
-{
-  basic_block bb = BLOCK_FOR_INSN (curr_insn);
-  rtx_insn *head = BB_HEAD (bb), *insn;
-  rtx dest = operands[0], src = operands[1], pattern, t_dest, dest_orig;
-  for (insn = PREV_INSN (curr_insn);
-       insn && insn != head;
-       insn = PREV_INSN (insn))
-    if (CALL_P (insn))
-      break;
-    else if (INSN_P (insn))
-      {
-	if (GET_CODE (pattern = PATTERN (insn)) == SET
-	    && REG_P (t_dest = SET_DEST (pattern))
-	    && REG_NREGS (t_dest) == 1
-	    && REGNO (t_dest) == REGNO (src))
-	{
-	  dest_orig = SET_DEST (pattern);
-	  SET_DEST (pattern) = gen_rtx_REG (GET_MODE (t_dest),
-					    REGNO (dest));
-	  extract_insn (insn);
-	  if (!constrain_operands (true, get_enabled_alternatives (insn)))
-	    {
-	      SET_DEST (pattern) = dest_orig;
-	      goto ABORT;
-	    }
-	  df_insn_rescan (insn);
-	  goto FALLTHRU;
-	}
-	if (reg_overlap_mentioned_p (dest, pattern)
-	    || reg_overlap_mentioned_p (src, pattern)
-	    || set_of (dest, insn)
-	    || set_of (src, insn))
-	  break;
-      }
-ABORT:
-  FAIL;
-FALLTHRU:;
-})
diff --git a/gcc/config/xtensa/xtensa.opt b/gcc/config/xtensa/xtensa.opt
index b653e99..6721758 100644
--- a/gcc/config/xtensa/xtensa.opt
+++ b/gcc/config/xtensa/xtensa.opt
@@ -1,6 +1,6 @@
 ; Options for the Tensilica Xtensa port of the compiler.
 
-; Copyright (C) 2005-2024 Free Software Foundation, Inc.
+; Copyright (C) 2005-2025 Free Software Foundation, Inc.
 ;
 ; This file is part of GCC.
 ;